diff --git a/.gitignore b/.gitignore index 74c6dfe..d4daea6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,2 +1,4 @@ .env -.vscode/ \ No newline at end of file +.vscode/ +data/python.txt +__pycache__/ \ No newline at end of file diff --git a/Pipfile b/Pipfile index db72f8d..049e9fe 100644 --- a/Pipfile +++ b/Pipfile @@ -12,6 +12,7 @@ pygithub = "*" autopep8 = "*" textgenrnn = "*" tensorflow = "*" +keras = "*" [requires] -python_version = "3.7" +python_version = "3.6" diff --git a/Pipfile.lock b/Pipfile.lock index 7af81a5..55b91f8 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,11 +1,11 @@ { "_meta": { "hash": { - "sha256": "555205229aa098b23489011420821d562dd733b450e7aed999ecb70613154be8" + "sha256": "ba44b903801716066ee4d9e10775de380a7592631feec2593064bde5053681f7" }, "pipfile-spec": 6, "requires": { - "python_version": "3.7" + "python_version": "3.6" }, "sources": [ { @@ -152,6 +152,7 @@ "sha256:794d0c92c6c4122f1f0fcf3a7bc2f49054c6a54ddbef8d8ffafca62795d760b6", "sha256:90b610a3dbbf6d257b20a079eba3fdf2eed2158f64066a7c6f7227023fd60bc9" ], + "index": "pypi", "version": "==2.2.4" }, "keras-applications": { @@ -308,36 +309,24 @@ }, "scipy": { "hashes": [ - "sha256:014cb900c003b5ac81a53f2403294e8ecf37aedc315b59a6b9370dce0aa7627a", - "sha256:281a34da34a5e0de42d26aed692ab710141cad9d5d218b20643a9cb538ace976", - "sha256:588f9cc4bfab04c45fbd19c1354b5ade377a8124d6151d511c83730a9b6b2338", - "sha256:5a10661accd36b6e2e8855addcf3d675d6222006a15795420a39c040362def66", - "sha256:628f60be272512ca1123524969649a8cb5ae8b31cca349f7c6f8903daf9034d7", - "sha256:6dcc43a88e25b815c2dea1c6fac7339779fc988f5df8396e1de01610604a7c38", - "sha256:70e37cec0ac0fe95c85b74ca4e0620169590fd5d3f44765f3c3a532cedb0e5fd", - "sha256:7274735fb6fb5d67d3789ddec2cd53ed6362539b41aa6cc0d33a06c003aaa390", - "sha256:78e12972e144da47326958ac40c2bd1c1cca908edc8b01c26a36f9ffd3dce466", - "sha256:790cbd3c8d09f3a6d9c47c4558841e25bac34eb7a0864a9def8f26be0b8706af", - "sha256:79792c8fe8e9d06ebc50fe23266522c8c89f20aa94ac8e80472917ecdce1e5ba", - "sha256:865afedf35aaef6df6344bee0de391ee5e99d6e802950a237f9fb9b13e441f91", - "sha256:870fd401ec7b64a895cff8e206ee16569158db00254b2f7157b4c9a5db72c722", - "sha256:963815c226b29b0176d5e3d37fc9de46e2778ce4636a5a7af11a48122ef2577c", - "sha256:9726791484f08e394af0b59eb80489ad94d0a53bbb58ab1837dcad4d58489863", - "sha256:9de84a71bb7979aa8c089c4fb0ea0e2ed3917df3fb2a287a41aaea54bbad7f5d", - "sha256:b2c324ddc5d6dbd3f13680ad16a29425841876a84a1de23a984236d1afff4fa6", - "sha256:b86ae13c597fca087cb8c193870507c8916cefb21e52e1897da320b5a35075e5", - "sha256:ba0488d4dbba2af5bf9596b849873102d612e49a118c512d9d302ceafa36e01a", - "sha256:d78702af4102a3a4e23bb7372cec283e78f32f5573d92091aa6aaba870370fe1", - "sha256:def0e5d681dd3eb562b059d355ae8bebe27f5cc455ab7c2b6655586b63d3a8ea", - "sha256:e085d1babcb419bbe58e2e805ac61924dac4ca45a07c9fa081144739e500aa3c", - "sha256:e2cfcbab37c082a5087aba5ff00209999053260441caadd4f0e8f4c2d6b72088", - "sha256:e742f1f5dcaf222e8471c37ee3d1fd561568a16bb52e031c25674ff1cf9702d5", - "sha256:f06819b028b8ef9010281e74c59cb35483933583043091ed6b261bb1540f11cc", - "sha256:f15f2d60a11c306de7700ee9f65df7e9e463848dbea9c8051e293b704038da60", - "sha256:f31338ee269d201abe76083a990905473987371ff6f3fdb76a3f9073a361cf37", - "sha256:f6b88c8d302c3dac8dff7766955e38d670c82e0d79edfc7eae47d6bb2c186594" - ], - "version": "==1.2.1" + "sha256:03b1e0775edbe6a4c64effb05fff2ce1429b76d29d754aa5ee2d848b60033351", + "sha256:09d008237baabf52a5d4f5a6fcf9b3c03408f3f61a69c404472a16861a73917e", + "sha256:10325f0ffac2400b1ec09537b7e403419dcd25d9fee602a44e8a32119af9079e", + "sha256:1db9f964ed9c52dc5bd6127f0dd90ac89791daa690a5665cc01eae185912e1ba", + "sha256:409846be9d6bdcbd78b9e5afe2f64b2da5a923dd7c1cd0615ce589489533fdbb", + "sha256:4907040f62b91c2e170359c3d36c000af783f0fa1516a83d6c1517cde0af5340", + "sha256:6c0543f2fdd38dee631fb023c0f31c284a532d205590b393d72009c14847f5b1", + "sha256:826b9f5fbb7f908a13aa1efd4b7321e36992f5868d5d8311c7b40cf9b11ca0e7", + "sha256:a7695a378c2ce402405ea37b12c7a338a8755e081869bd6b95858893ceb617ae", + "sha256:a84c31e8409b420c3ca57fd30c7589378d6fdc8d155d866a7f8e6e80dec6fd06", + "sha256:adadeeae5500de0da2b9e8dd478520d0a9945b577b2198f2462555e68f58e7ef", + "sha256:b283a76a83fe463c9587a2c88003f800e08c3929dfbeba833b78260f9c209785", + "sha256:c19a7389ab3cd712058a8c3c9ffd8d27a57f3d84b9c91a931f542682bb3d269d", + "sha256:c3bb4bd2aca82fb498247deeac12265921fe231502a6bc6edea3ee7fe6c40a7a", + "sha256:c5ea60ece0c0c1c849025bfc541b60a6751b491b6f11dd9ef37ab5b8c9041921", + "sha256:db61a640ca20f237317d27bc658c1fc54c7581ff7f6502d112922dc285bdabee" + ], + "version": "==1.3.0" }, "six": { "hashes": [ diff --git a/README.md b/README.md index 0615daf..156f5ac 100644 --- a/README.md +++ b/README.md @@ -1,2 +1,12 @@ # PyOctoscraper -A Python based scraper to download Python source code from Github +A Python based scraper to download Python source code from Github and train an RNN to generate source code. I have no hopes that the code generated will be useful, or even valid, but it's a fun experiment nonetheless. + +## Scraper + +I could not find any dataset of source code, so I scraped it myself. The [scraper.py](scraper.py) does the magic. To keep things sane, we're only interested in `keras` code written in `python` that have more than 500 stars. The rationale being that well written code is more likely to be written correctly (not exactly proof, but a close enough approximation). + +## Generation + +A super big shoutout to [Max Woolf](http://minimaxir.com/) for creating the [textgenrnn](https://github.com/minimaxir/textgenrnn) package. It made my life considerably easier and I highly recommend it for quick and dirty projects. + +## Samples \ No newline at end of file diff --git a/Scraper/octoscrape.py b/Scraper/octoscrape.py index c84d14b..a1a6275 100644 --- a/Scraper/octoscrape.py +++ b/Scraper/octoscrape.py @@ -15,7 +15,7 @@ def __init__(self, page=0): def search_repos(self): return self.g.search_repositories( - query='keras stars:>=500 fork:true language:python').get_page(self.page) + query='keras stars:>=1000 fork:true language:python').get_page(self.page) def get_contents(self, repo, file_extension): try: diff --git a/Trainer/__init__.py b/Trainer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/Trainer/trainer.py b/Trainer/trainer.py new file mode 100644 index 0000000..ba6d399 --- /dev/null +++ b/Trainer/trainer.py @@ -0,0 +1,80 @@ +from keras.preprocessing.text import Tokenizer +from keras.models import Sequential +from keras.layers import Dropout, LSTM, Dense, Bidirectional, Activation, Embedding +from keras.callbacks import ModelCheckpoint, EarlyStopping + +import numpy as np + + +class Trainer: + def __init__(self, textfile, seq_length=50, batch_size=32): + self.textfile = textfile + self.seq_length = seq_length + self.BATCH_SIZE = batch_size + + self._process_content() + self._build_model() + self._setup_checkpoints() + + def _setup_checkpoints(self): + model_checkpoint = ModelCheckpoint( + 'model.hdf5', monitor='acc', save_best_only=True) + earlystopping_checkpoint = EarlyStopping( + monitor='acc', patience=20) + self._checkpoints = [model_checkpoint, earlystopping_checkpoint] + + def _build_model(self): + model = Sequential() + model.add(Embedding(input_dim=len(self._words), output_dim=1024)) + + model.add(Bidirectional( + LSTM(128), input_shape=(self.seq_length, len(self._words)))) + + model.add(Dropout(0.5)) + model.add(Dense(len(self._words))) + model.add(Activation('softmax')) + model.compile(loss='sparse_categorical_crossentropy', + optimizer="adam", metrics=['accuracy']) + + self._model = model + + def _process_content(self): + file = open(self.textfile, 'r') + filecontents = file.read() + file.close() + filecontents = filecontents.replace('\n', ' \n ') + + text_in_words = [w for w in filecontents.split( + ' ') if w.strip() != '' or w == '\n'] + self._words = set(text_in_words) + + self._word_indices = dict((c, i) for i, c in enumerate(self._words)) + self._indices_word = dict((i, c) for i, c in enumerate(self._words)) + + STEP = 1 + self._codelines = [] + self._next_words = [] + for i in range(0, len(text_in_words) - self.seq_length, STEP): + # print(text_in_words[i: i + self.seq_length]) + self._codelines.append(text_in_words[i: i + self.seq_length]) + self._next_words.append(text_in_words[i + self.seq_length]) + + def _generator(self, sentence_list, next_word_list, batch_size): + index = 0 + while True: + x = np.zeros((batch_size, self.seq_length), dtype=np.int32) + y = np.zeros((batch_size), dtype=np.int32) + for i in range(batch_size): + for t, w in enumerate(sentence_list[index % len(sentence_list)]): + x[i, t] = self._word_indices[w] + y[i] = self._word_indices[next_word_list[index % + len(sentence_list)]] + index = index + 1 + yield x, y + + def train(self): + self._model.fit_generator(self._generator(self._codelines, self._next_words, self.BATCH_SIZE), + steps_per_epoch=int( + len(self._codelines)/self.BATCH_SIZE) + 1, + epochs=100, + callbacks=self._checkpoints) diff --git a/data/python.txt b/data/python.txt deleted file mode 100644 index 12c95df..0000000 --- a/data/python.txt +++ /dev/null @@ -1,475916 +0,0 @@ -from setuptools import setup -from setuptools import find_packages - -long_description = ''' -Keras is a high-level neural networks API, -written in Python and capable of running on top of -TensorFlow, CNTK, or Theano. - -Use Keras if you need a deep learning library that: - -- Allows for easy and fast prototyping - (through user friendliness, modularity, and extensibility). -- Supports both convolutional networks and recurrent networks, - as well as combinations of the two. -- Runs seamlessly on CPU and GPU. - -Read the documentation at: https://keras.io/ - -For a detailed overview of what makes Keras special, see: -https://keras.io/why-use-keras/ - -Keras is compatible with Python 2.7-3.6 -and is distributed under the MIT license. -''' - -setup(name='Keras', - version='2.2.4', - description='Deep Learning for humans', - long_description=long_description, - author='Francois Chollet', - author_email='francois.chollet@gmail.com', - url='https://github.com/keras-team/keras', - download_url='https://github.com/keras-team/keras/tarball/2.2.4', - license='MIT', - install_requires=['numpy>=1.9.1', - 'scipy>=0.14', - 'six>=1.9.0', - 'pyyaml', - 'h5py', - 'keras_applications>=1.0.6', - 'keras_preprocessing>=1.0.5'], - extras_require={ - 'visualize': ['pydot>=1.2.4'], - 'tests': ['pytest', - 'pytest-pep8', - 'pytest-xdist', - 'flaky', - 'pytest-cov', - 'pandas', - 'requests', - 'markdown'], - }, - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules' - ], - packages=find_packages()) -import pyux -import keras -import json - - -import keras.backend.tensorflow_backend -import keras.backend.theano_backend -import keras.backend.cntk_backend -import keras.backend.numpy_backend -import keras.utils.test_utils - -sign = pyux.sign(keras) - -with open('api.json', 'w') as f: - json.dump(sign, f) -# -*- coding: utf-8 -*- -from __future__ import print_function -from __future__ import unicode_literals - -import re -import inspect -import os -import shutil -import six - -try: - import pathlib -except ImportError: - import pathlib2 as pathlib - -import keras -from keras import backend as K -from keras.backend import numpy_backend - -from docs.structure import EXCLUDE -from docs.structure import PAGES -from docs.structure import template_np_implementation -from docs.structure import template_hidden_np_implementation - -import sys -if sys.version[0] == '2': - reload(sys) - sys.setdefaultencoding('utf8') - -keras_dir = pathlib.Path(__file__).resolve().parents[1] - - -def get_function_signature(function, method=True): - wrapped = getattr(function, '_original_function', None) - if wrapped is None: - signature = inspect.getargspec(function) - else: - signature = inspect.getargspec(wrapped) - defaults = signature.defaults - if method: - args = signature.args[1:] - else: - args = signature.args - if defaults: - kwargs = zip(args[-len(defaults):], defaults) - args = args[:-len(defaults)] - else: - kwargs = [] - st = '%s.%s(' % (clean_module_name(function.__module__), function.__name__) - - for a in args: - st += str(a) + ', ' - for a, v in kwargs: - if isinstance(v, str): - v = '\'' + v + '\'' - st += str(a) + '=' + str(v) + ', ' - if kwargs or args: - signature = st[:-2] + ')' - else: - signature = st + ')' - return post_process_signature(signature) - - -def get_class_signature(cls): - try: - class_signature = get_function_signature(cls.__init__) - class_signature = class_signature.replace('__init__', cls.__name__) - except (TypeError, AttributeError): - # in case the class inherits from object and does not - # define __init__ - class_signature = "{clean_module_name}.{cls_name}()".format( - clean_module_name=cls.__module__, - cls_name=cls.__name__ - ) - return post_process_signature(class_signature) - - -def post_process_signature(signature): - parts = re.split(r'\.(?!\d)', signature) - if len(parts) >= 4: - if parts[1] == 'layers': - signature = 'keras.layers.' + '.'.join(parts[3:]) - if parts[1] == 'utils': - signature = 'keras.utils.' + '.'.join(parts[3:]) - if parts[1] == 'backend': - signature = 'keras.backend.' + '.'.join(parts[3:]) - return signature - - -def clean_module_name(name): - if name.startswith('keras_applications'): - name = name.replace('keras_applications', 'keras.applications') - if name.startswith('keras_preprocessing'): - name = name.replace('keras_preprocessing', 'keras.preprocessing') - return name - - -def class_to_source_link(cls): - module_name = clean_module_name(cls.__module__) - path = module_name.replace('.', '/') - path += '.py' - line = inspect.getsourcelines(cls)[-1] - link = ('https://github.com/keras-team/' - 'keras/blob/master/' + path + '#L' + str(line)) - return '[[source]](' + link + ')' - - -def code_snippet(snippet): - result = '```python\n' - result += snippet.encode('unicode_escape').decode('utf8') + '\n' - result += '```\n' - return result - - -def count_leading_spaces(s): - ws = re.search(r'\S', s) - if ws: - return ws.start() - else: - return 0 - - -def process_list_block(docstring, starting_point, section_end, - leading_spaces, marker): - ending_point = docstring.find('\n\n', starting_point) - block = docstring[starting_point: - (ending_point - 1 if ending_point > -1 - else section_end)] - # Place marker for later reinjection. - docstring_slice = docstring[ - starting_point:section_end].replace(block, marker) - docstring = (docstring[:starting_point] + - docstring_slice + - docstring[section_end:]) - lines = block.split('\n') - # Remove the computed number of leading white spaces from each line. - lines = [re.sub('^' + ' ' * leading_spaces, '', line) for line in lines] - # Usually lines have at least 4 additional leading spaces. - # These have to be removed, but first the list roots have to be detected. - top_level_regex = r'^ ([^\s\\\(]+):(.*)' - top_level_replacement = r'- __\1__:\2' - lines = [re.sub(top_level_regex, top_level_replacement, line) - for line in lines] - # All the other lines get simply the 4 leading space (if present) removed - lines = [re.sub(r'^ ', '', line) for line in lines] - # Fix text lines after lists - indent = 0 - text_block = False - for i in range(len(lines)): - line = lines[i] - spaces = re.search(r'\S', line) - if spaces: - # If it is a list element - if line[spaces.start()] == '-': - indent = spaces.start() + 1 - if text_block: - text_block = False - lines[i] = '\n' + line - elif spaces.start() < indent: - text_block = True - indent = spaces.start() - lines[i] = '\n' + line - else: - text_block = False - indent = 0 - block = '\n'.join(lines) - return docstring, block - - -def process_docstring(docstring): - # First, extract code blocks and process them. - code_blocks = [] - if '```' in docstring: - tmp = docstring[:] - while '```' in tmp: - tmp = tmp[tmp.find('```'):] - index = tmp[3:].find('```') + 6 - snippet = tmp[:index] - # Place marker in docstring for later reinjection. - docstring = docstring.replace( - snippet, '$CODE_BLOCK_%d' % len(code_blocks)) - snippet_lines = snippet.split('\n') - # Remove leading spaces. - num_leading_spaces = snippet_lines[-1].find('`') - snippet_lines = ([snippet_lines[0]] + - [line[num_leading_spaces:] - for line in snippet_lines[1:]]) - # Most code snippets have 3 or 4 more leading spaces - # on inner lines, but not all. Remove them. - inner_lines = snippet_lines[1:-1] - leading_spaces = None - for line in inner_lines: - if not line or line[0] == '\n': - continue - spaces = count_leading_spaces(line) - if leading_spaces is None: - leading_spaces = spaces - if spaces < leading_spaces: - leading_spaces = spaces - if leading_spaces: - snippet_lines = ([snippet_lines[0]] + - [line[leading_spaces:] - for line in snippet_lines[1:-1]] + - [snippet_lines[-1]]) - snippet = '\n'.join(snippet_lines) - code_blocks.append(snippet) - tmp = tmp[index:] - - # Format docstring lists. - section_regex = r'\n( +)# (.*)\n' - section_idx = re.search(section_regex, docstring) - shift = 0 - sections = {} - while section_idx and section_idx.group(2): - anchor = section_idx.group(2) - leading_spaces = len(section_idx.group(1)) - shift += section_idx.end() - next_section_idx = re.search(section_regex, docstring[shift:]) - if next_section_idx is None: - section_end = -1 - else: - section_end = shift + next_section_idx.start() - marker = '$' + anchor.replace(' ', '_') + '$' - docstring, content = process_list_block(docstring, - shift, - section_end, - leading_spaces, - marker) - sections[marker] = content - # `docstring` has changed, so we can't use `next_section_idx` anymore - # we have to recompute it - section_idx = re.search(section_regex, docstring[shift:]) - - # Format docstring section titles. - docstring = re.sub(r'\n(\s+)# (.*)\n', - r'\n\1__\2__\n\n', - docstring) - - # Strip all remaining leading spaces. - lines = docstring.split('\n') - docstring = '\n'.join([line.lstrip(' ') for line in lines]) - - # Reinject list blocks. - for marker, content in sections.items(): - docstring = docstring.replace(marker, content) - - # Reinject code blocks. - for i, code_block in enumerate(code_blocks): - docstring = docstring.replace( - '$CODE_BLOCK_%d' % i, code_block) - return docstring - - -def add_np_implementation(function, docstring): - np_implementation = getattr(numpy_backend, function.__name__) - code = inspect.getsource(np_implementation) - code_lines = code.split('\n') - for i in range(len(code_lines)): - if code_lines[i]: - # if there is something on the line, add 8 spaces. - code_lines[i] = ' ' + code_lines[i] - code = '\n'.join(code_lines[:-1]) - - if len(code_lines) < 10: - section = template_np_implementation.replace('{{code}}', code) - else: - section = template_hidden_np_implementation.replace('{{code}}', code) - return docstring.replace('{{np_implementation}}', section) - - -def read_file(path): - with open(path) as f: - return f.read() - - -def collect_class_methods(cls, methods): - if isinstance(methods, (list, tuple)): - return [getattr(cls, m) if isinstance(m, str) else m for m in methods] - methods = [] - for _, method in inspect.getmembers(cls, predicate=inspect.isroutine): - if method.__name__[0] == '_' or method.__name__ in EXCLUDE: - continue - methods.append(method) - return methods - - -def render_function(function, method=True): - subblocks = [] - signature = get_function_signature(function, method=method) - if method: - signature = signature.replace( - clean_module_name(function.__module__) + '.', '') - subblocks.append('### ' + function.__name__ + '\n') - subblocks.append(code_snippet(signature)) - docstring = function.__doc__ - if docstring: - if ('backend' in signature and - '{{np_implementation}}' in docstring): - docstring = add_np_implementation(function, docstring) - subblocks.append(process_docstring(docstring)) - return '\n\n'.join(subblocks) - - -def read_page_data(page_data, type): - assert type in ['classes', 'functions', 'methods'] - data = page_data.get(type, []) - for module in page_data.get('all_module_{}'.format(type), []): - module_data = [] - for name in dir(module): - if name[0] == '_' or name in EXCLUDE: - continue - module_member = getattr(module, name) - if (inspect.isclass(module_member) and type == 'classes' or - inspect.isfunction(module_member) and type == 'functions'): - instance = module_member - if module.__name__ in instance.__module__: - if instance not in module_data: - module_data.append(instance) - module_data.sort(key=lambda x: id(x)) - data += module_data - return data - - -def get_module_docstring(filepath): - """Extract the module docstring. - - Also finds the line at which the docstring ends. - """ - co = compile(open(filepath).read(), filepath, 'exec') - if co.co_consts and isinstance(co.co_consts[0], six.string_types): - docstring = co.co_consts[0] - else: - print('Could not get the docstring from ' + filepath) - docstring = '' - return docstring, co.co_firstlineno - - -def copy_examples(examples_dir, destination_dir): - """Copy the examples directory in the documentation. - - Prettify files by extracting the docstrings written in Markdown. - """ - pathlib.Path(destination_dir).mkdir(exist_ok=True) - for file in os.listdir(examples_dir): - if not file.endswith('.py'): - continue - module_path = os.path.join(examples_dir, file) - docstring, starting_line = get_module_docstring(module_path) - destination_file = os.path.join(destination_dir, file[:-2] + 'md') - with open(destination_file, 'w+') as f_out, \ - open(os.path.join(examples_dir, file), 'r+') as f_in: - - f_out.write(docstring + '\n\n') - - # skip docstring - for _ in range(starting_line): - next(f_in) - - f_out.write('```python\n') - # next line might be empty. - line = next(f_in) - if line != '\n': - f_out.write(line) - - # copy the rest of the file. - for line in f_in: - f_out.write(line) - f_out.write('```') - - -def generate(sources_dir): - """Generates the markdown files for the documentation. - - # Arguments - sources_dir: Where to put the markdown files. - """ - template_dir = os.path.join(str(keras_dir), 'docs', 'templates') - - if K.backend() != 'tensorflow': - raise RuntimeError('The documentation must be built ' - 'with the TensorFlow backend because this ' - 'is the only backend with docstrings.') - - print('Cleaning up existing sources directory.') - if os.path.exists(sources_dir): - shutil.rmtree(sources_dir) - - print('Populating sources directory with templates.') - shutil.copytree(template_dir, sources_dir) - - readme = read_file(os.path.join(str(keras_dir), 'README.md')) - index = read_file(os.path.join(template_dir, 'index.md')) - index = index.replace('{{autogenerated}}', readme[readme.find('##'):]) - with open(os.path.join(sources_dir, 'index.md'), 'w') as f: - f.write(index) - - print('Generating docs for Keras %s.' % keras.__version__) - for page_data in PAGES: - classes = read_page_data(page_data, 'classes') - - blocks = [] - for element in classes: - if not isinstance(element, (list, tuple)): - element = (element, []) - cls = element[0] - subblocks = [] - signature = get_class_signature(cls) - subblocks.append('' + - class_to_source_link(cls) + '') - if element[1]: - subblocks.append('## ' + cls.__name__ + ' class\n') - else: - subblocks.append('### ' + cls.__name__ + '\n') - subblocks.append(code_snippet(signature)) - docstring = cls.__doc__ - if docstring: - subblocks.append(process_docstring(docstring)) - methods = collect_class_methods(cls, element[1]) - if methods: - subblocks.append('\n---') - subblocks.append('## ' + cls.__name__ + ' methods\n') - subblocks.append('\n---\n'.join( - [render_function(method, method=True) - for method in methods])) - blocks.append('\n'.join(subblocks)) - - methods = read_page_data(page_data, 'methods') - - for method in methods: - blocks.append(render_function(method, method=True)) - - functions = read_page_data(page_data, 'functions') - - for function in functions: - blocks.append(render_function(function, method=False)) - - if not blocks: - raise RuntimeError('Found no content for page ' + - page_data['page']) - - mkdown = '\n----\n\n'.join(blocks) - # Save module page. - # Either insert content into existing page, - # or create page otherwise. - page_name = page_data['page'] - path = os.path.join(sources_dir, page_name) - if os.path.exists(path): - template = read_file(path) - if '{{autogenerated}}' not in template: - raise RuntimeError('Template found for ' + path + - ' but missing {{autogenerated}}' - ' tag.') - mkdown = template.replace('{{autogenerated}}', mkdown) - print('...inserting autogenerated content into template:', path) - else: - print('...creating new page with autogenerated content:', path) - subdir = os.path.dirname(path) - if not os.path.exists(subdir): - os.makedirs(subdir) - with open(path, 'w') as f: - f.write(mkdown) - - shutil.copyfile(os.path.join(str(keras_dir), 'CONTRIBUTING.md'), - os.path.join(str(sources_dir), 'contributing.md')) - copy_examples(os.path.join(str(keras_dir), 'examples'), - os.path.join(str(sources_dir), 'examples')) - - -if __name__ == '__main__': - generate(os.path.join(str(keras_dir), 'docs', 'sources')) -# -*- coding: utf-8 -*- -''' -General documentation architecture: - -Home -Index - -- Getting started - Getting started with the sequential model - Getting started with the functional api - FAQ - -- Models - About Keras models - explain when one should use Sequential or functional API - explain compilation step - explain weight saving, weight loading - explain serialization, deserialization - Sequential - Model (functional API) - -- Layers - About Keras layers - explain common layer functions: get_weights, set_weights, get_config - explain input_shape - explain usage on non-Keras tensors - Core Layers - Convolutional Layers - Pooling Layers - Locally-connected Layers - Recurrent Layers - Embedding Layers - Merge Layers - Advanced Activations Layers - Normalization Layers - Noise Layers - Layer Wrappers - Writing your own Keras layers - -- Preprocessing - Sequence Preprocessing - Text Preprocessing - Image Preprocessing - -Losses -Metrics -Optimizers -Activations -Callbacks -Datasets -Applications -Backend -Initializers -Regularizers -Constraints -Visualization -Scikit-learn API -Utils -Contributing - -''' -from keras import utils -from keras import layers -from keras.layers import advanced_activations -from keras.layers import noise -from keras.layers import wrappers -from keras import initializers -from keras import optimizers -from keras import callbacks -from keras import models -from keras import losses -from keras import metrics -from keras import backend -from keras import constraints -from keras import activations -from keras import preprocessing - - -EXCLUDE = { - 'Optimizer', - 'TFOptimizer', - 'Wrapper', - 'get_session', - 'set_session', - 'CallbackList', - 'serialize', - 'deserialize', - 'get', - 'set_image_dim_ordering', - 'normalize_data_format', - 'image_dim_ordering', - 'get_variable_shape', - 'Constraint' -} - -# For each class to document, it is possible to: -# 1) Document only the class: [classA, classB, ...] -# 2) Document all its methods: [classA, (classB, "*")] -# 3) Choose which methods to document (methods listed as strings): -# [classA, (classB, ["method1", "method2", ...]), ...] -# 4) Choose which methods to document (methods listed as qualified names): -# [classA, (classB, [module.classB.method1, module.classB.method2, ...]), ...] -PAGES = [ - { - 'page': 'models/sequential.md', - 'methods': [ - models.Sequential.compile, - models.Sequential.fit, - models.Sequential.evaluate, - models.Sequential.predict, - models.Sequential.train_on_batch, - models.Sequential.test_on_batch, - models.Sequential.predict_on_batch, - models.Sequential.fit_generator, - models.Sequential.evaluate_generator, - models.Sequential.predict_generator, - models.Sequential.get_layer, - ], - }, - { - 'page': 'models/model.md', - 'methods': [ - models.Model.compile, - models.Model.fit, - models.Model.evaluate, - models.Model.predict, - models.Model.train_on_batch, - models.Model.test_on_batch, - models.Model.predict_on_batch, - models.Model.fit_generator, - models.Model.evaluate_generator, - models.Model.predict_generator, - models.Model.get_layer, - ] - }, - { - 'page': 'layers/core.md', - 'classes': [ - layers.Dense, - layers.Activation, - layers.Dropout, - layers.Flatten, - layers.Input, - layers.Reshape, - layers.Permute, - layers.RepeatVector, - layers.Lambda, - layers.ActivityRegularization, - layers.Masking, - layers.SpatialDropout1D, - layers.SpatialDropout2D, - layers.SpatialDropout3D, - ], - }, - { - 'page': 'layers/convolutional.md', - 'classes': [ - layers.Conv1D, - layers.Conv2D, - layers.SeparableConv1D, - layers.SeparableConv2D, - layers.DepthwiseConv2D, - layers.Conv2DTranspose, - layers.Conv3D, - layers.Conv3DTranspose, - layers.Cropping1D, - layers.Cropping2D, - layers.Cropping3D, - layers.UpSampling1D, - layers.UpSampling2D, - layers.UpSampling3D, - layers.ZeroPadding1D, - layers.ZeroPadding2D, - layers.ZeroPadding3D, - ], - }, - { - 'page': 'layers/pooling.md', - 'classes': [ - layers.MaxPooling1D, - layers.MaxPooling2D, - layers.MaxPooling3D, - layers.AveragePooling1D, - layers.AveragePooling2D, - layers.AveragePooling3D, - layers.GlobalMaxPooling1D, - layers.GlobalAveragePooling1D, - layers.GlobalMaxPooling2D, - layers.GlobalAveragePooling2D, - layers.GlobalMaxPooling3D, - layers.GlobalAveragePooling3D, - ], - }, - { - 'page': 'layers/local.md', - 'classes': [ - layers.LocallyConnected1D, - layers.LocallyConnected2D, - ], - }, - { - 'page': 'layers/recurrent.md', - 'classes': [ - layers.RNN, - layers.SimpleRNN, - layers.GRU, - layers.LSTM, - layers.ConvLSTM2D, - layers.ConvLSTM2DCell, - layers.SimpleRNNCell, - layers.GRUCell, - layers.LSTMCell, - layers.CuDNNGRU, - layers.CuDNNLSTM, - ], - }, - { - 'page': 'layers/embeddings.md', - 'classes': [ - layers.Embedding, - ], - }, - { - 'page': 'layers/normalization.md', - 'classes': [ - layers.BatchNormalization, - ], - }, - { - 'page': 'layers/advanced-activations.md', - 'all_module_classes': [advanced_activations], - }, - { - 'page': 'layers/noise.md', - 'all_module_classes': [noise], - }, - { - 'page': 'layers/merge.md', - 'classes': [ - layers.Add, - layers.Subtract, - layers.Multiply, - layers.Average, - layers.Maximum, - layers.Minimum, - layers.Concatenate, - layers.Dot, - ], - 'functions': [ - layers.add, - layers.subtract, - layers.multiply, - layers.average, - layers.maximum, - layers.minimum, - layers.concatenate, - layers.dot, - ] - }, - { - 'page': 'preprocessing/sequence.md', - 'functions': [ - preprocessing.sequence.pad_sequences, - preprocessing.sequence.skipgrams, - preprocessing.sequence.make_sampling_table, - ], - 'classes': [ - preprocessing.sequence.TimeseriesGenerator, - ] - }, - { - 'page': 'preprocessing/image.md', - 'classes': [ - (preprocessing.image.ImageDataGenerator, '*') - ] - }, - { - 'page': 'preprocessing/text.md', - 'functions': [ - preprocessing.text.hashing_trick, - preprocessing.text.one_hot, - preprocessing.text.text_to_word_sequence, - ], - 'classes': [ - preprocessing.text.Tokenizer, - ] - }, - { - 'page': 'layers/wrappers.md', - 'all_module_classes': [wrappers], - }, - { - 'page': 'metrics.md', - 'all_module_functions': [metrics], - }, - { - 'page': 'losses.md', - 'all_module_functions': [losses], - }, - { - 'page': 'initializers.md', - 'all_module_functions': [initializers], - 'all_module_classes': [initializers], - }, - { - 'page': 'optimizers.md', - 'all_module_classes': [optimizers], - }, - { - 'page': 'callbacks.md', - 'all_module_classes': [callbacks], - }, - { - 'page': 'activations.md', - 'all_module_functions': [activations], - }, - { - 'page': 'backend.md', - 'all_module_functions': [backend], - }, - { - 'page': 'constraints.md', - 'all_module_classes': [constraints], - }, - { - 'page': 'utils.md', - 'functions': [utils.to_categorical, - utils.normalize, - utils.get_file, - utils.print_summary, - utils.plot_model, - utils.multi_gpu_model], - 'classes': [utils.CustomObjectScope, - utils.HDF5Matrix, - utils.Sequence], - }, -] - -ROOT = 'http://keras.io/' - -template_np_implementation = """# Numpy implementation - - ```python -{{code}} - ``` -""" - -template_hidden_np_implementation = """# Numpy implementation - -
- Show the Numpy implementation - - ```python -{{code}} - ``` - -
-""" -# -*- coding: utf-8 -*- -''' -# An implementation of sequence to sequence learning for performing addition - -Input: "535+61" -Output: "596" -Padding is handled by using a repeated sentinel character (space) - -Input may optionally be reversed, shown to increase performance in many tasks in: -"Learning to Execute" -http://arxiv.org/abs/1410.4615 -and -"Sequence to Sequence Learning with Neural Networks" -http://papers.nips.cc/paper/5346-sequence-to-sequence-learning-with-neural-networks.pdf -Theoretically it introduces shorter term dependencies between source and target. - -Two digits reversed: -+ One layer LSTM (128 HN), 5k training examples = 99% train/test accuracy in 55 epochs - -Three digits reversed: -+ One layer LSTM (128 HN), 50k training examples = 99% train/test accuracy in 100 epochs - -Four digits reversed: -+ One layer LSTM (128 HN), 400k training examples = 99% train/test accuracy in 20 epochs - -Five digits reversed: -+ One layer LSTM (128 HN), 550k training examples = 99% train/test accuracy in 30 epochs -''' # noqa - -from __future__ import print_function -from keras.models import Sequential -from keras import layers -import numpy as np -from six.moves import range - - -class CharacterTable(object): - """Given a set of characters: - + Encode them to a one-hot integer representation - + Decode the one-hot or integer representation to their character output - + Decode a vector of probabilities to their character output - """ - - def __init__(self, chars): - """Initialize character table. - - # Arguments - chars: Characters that can appear in the input. - """ - self.chars = sorted(set(chars)) - self.char_indices = dict((c, i) for i, c in enumerate(self.chars)) - self.indices_char = dict((i, c) for i, c in enumerate(self.chars)) - - def encode(self, C, num_rows): - """One-hot encode given string C. - - # Arguments - C: string, to be encoded. - num_rows: Number of rows in the returned one-hot encoding. This is - used to keep the # of rows for each data the same. - """ - x = np.zeros((num_rows, len(self.chars))) - for i, c in enumerate(C): - x[i, self.char_indices[c]] = 1 - return x - - def decode(self, x, calc_argmax=True): - """Decode the given vector or 2D array to their character output. - - # Arguments - x: A vector or a 2D array of probabilities or one-hot representations; - or a vector of character indices (used with `calc_argmax=False`). - calc_argmax: Whether to find the character index with maximum - probability, defaults to `True`. - """ - if calc_argmax: - x = x.argmax(axis=-1) - return ''.join(self.indices_char[x] for x in x) - - -class colors: - ok = '\033[92m' - fail = '\033[91m' - close = '\033[0m' - - -# Parameters for the model and dataset. -TRAINING_SIZE = 50000 -DIGITS = 3 -REVERSE = True - -# Maximum length of input is 'int + int' (e.g., '345+678'). Maximum length of -# int is DIGITS. -MAXLEN = DIGITS + 1 + DIGITS - -# All the numbers, plus sign and space for padding. -chars = '0123456789+ ' -ctable = CharacterTable(chars) - -questions = [] -expected = [] -seen = set() -print('Generating data...') -while len(questions) < TRAINING_SIZE: - def f(): return int(''.join(np.random.choice(list('0123456789')) - for i in range(np.random.randint(1, DIGITS + 1)))) - a, b = f(), f() - # Skip any addition questions we've already seen - # Also skip any such that x+Y == Y+x (hence the sorting). - key = tuple(sorted((a, b))) - if key in seen: - continue - seen.add(key) - # Pad the data with spaces such that it is always MAXLEN. - q = '{}+{}'.format(a, b) - query = q + ' ' * (MAXLEN - len(q)) - ans = str(a + b) - # Answers can be of maximum size DIGITS + 1. - ans += ' ' * (DIGITS + 1 - len(ans)) - if REVERSE: - # Reverse the query, e.g., '12+345 ' becomes ' 543+21'. (Note the - # space used for padding.) - query = query[::-1] - questions.append(query) - expected.append(ans) -print('Total addition questions:', len(questions)) - -print('Vectorization...') -x = np.zeros((len(questions), MAXLEN, len(chars)), dtype=np.bool) -y = np.zeros((len(questions), DIGITS + 1, len(chars)), dtype=np.bool) -for i, sentence in enumerate(questions): - x[i] = ctable.encode(sentence, MAXLEN) -for i, sentence in enumerate(expected): - y[i] = ctable.encode(sentence, DIGITS + 1) - -# Shuffle (x, y) in unison as the later parts of x will almost all be larger -# digits. -indices = np.arange(len(y)) -np.random.shuffle(indices) -x = x[indices] -y = y[indices] - -# Explicitly set apart 10% for validation data that we never train over. -split_at = len(x) - len(x) // 10 -(x_train, x_val) = x[:split_at], x[split_at:] -(y_train, y_val) = y[:split_at], y[split_at:] - -print('Training Data:') -print(x_train.shape) -print(y_train.shape) - -print('Validation Data:') -print(x_val.shape) -print(y_val.shape) - -# Try replacing GRU, or SimpleRNN. -RNN = layers.LSTM -HIDDEN_SIZE = 128 -BATCH_SIZE = 128 -LAYERS = 1 - -print('Build model...') -model = Sequential() -# "Encode" the input sequence using an RNN, producing an output of HIDDEN_SIZE. -# Note: In a situation where your input sequences have a variable length, -# use input_shape=(None, num_feature). -model.add(RNN(HIDDEN_SIZE, input_shape=(MAXLEN, len(chars)))) -# As the decoder RNN's input, repeatedly provide with the last output of -# RNN for each time step. Repeat 'DIGITS + 1' times as that's the maximum -# length of output, e.g., when DIGITS=3, max output is 999+999=1998. -model.add(layers.RepeatVector(DIGITS + 1)) -# The decoder RNN could be multiple layers stacked or a single layer. -for _ in range(LAYERS): - # By setting return_sequences to True, return not only the last output but - # all the outputs so far in the form of (num_samples, timesteps, - # output_dim). This is necessary as TimeDistributed in the below expects - # the first dimension to be the timesteps. - model.add(RNN(HIDDEN_SIZE, return_sequences=True)) - -# Apply a dense layer to the every temporal slice of an input. For each of step -# of the output sequence, decide which character should be chosen. -model.add(layers.TimeDistributed( - layers.Dense(len(chars), activation='softmax'))) -model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) -model.summary() - -# Train the model each generation and show predictions against the validation -# dataset. -for iteration in range(1, 200): - print() - print('-' * 50) - print('Iteration', iteration) - model.fit(x_train, y_train, - batch_size=BATCH_SIZE, - epochs=1, - validation_data=(x_val, y_val)) - # Select 10 samples from the validation set at random so we can visualize - # errors. - for i in range(10): - ind = np.random.randint(0, len(x_val)) - rowx, rowy = x_val[np.array([ind])], y_val[np.array([ind])] - preds = model.predict_classes(rowx, verbose=0) - q = ctable.decode(rowx[0]) - correct = ctable.decode(rowy[0]) - guess = ctable.decode(preds[0], calc_argmax=False) - print('Q', q[::-1] if REVERSE else q, end=' ') - print('T', correct, end=' ') - if correct == guess: - print(colors.ok + '☑' + colors.close, end=' ') - else: - print(colors.fail + '☒' + colors.close, end=' ') - print(guess) -''' -#This example demonstrates how to write custom layers for Keras. - -We build a custom activation layer called 'Antirectifier', -which modifies the shape of the tensor that passes through it. -We need to specify two methods: `compute_output_shape` and `call`. - -Note that the same result can also be achieved via a Lambda layer. - -Because our custom layer is written with primitives from the Keras -backend (`K`), our code can run both on TensorFlow and Theano. -''' - -from __future__ import print_function -import keras -from keras.models import Sequential -from keras import layers -from keras.datasets import mnist -from keras import backend as K - - -class Antirectifier(layers.Layer): - '''This is the combination of a sample-wise - L2 normalization with the concatenation of the - positive part of the input with the negative part - of the input. The result is a tensor of samples that are - twice as large as the input samples. - - It can be used in place of a ReLU. - - # Input shape - 2D tensor of shape (samples, n) - - # Output shape - 2D tensor of shape (samples, 2*n) - - # Theoretical justification - When applying ReLU, assuming that the distribution - of the previous output is approximately centered around 0., - you are discarding half of your input. This is inefficient. - - Antirectifier allows to return all-positive outputs like ReLU, - without discarding any data. - - Tests on MNIST show that Antirectifier allows to train networks - with twice less parameters yet with comparable - classification accuracy as an equivalent ReLU-based network. - ''' - - def compute_output_shape(self, input_shape): - shape = list(input_shape) - assert len(shape) == 2 # only valid for 2D tensors - shape[-1] *= 2 - return tuple(shape) - - def call(self, inputs): - inputs -= K.mean(inputs, axis=1, keepdims=True) - inputs = K.l2_normalize(inputs, axis=1) - pos = K.relu(inputs) - neg = K.relu(-inputs) - return K.concatenate([pos, neg], axis=1) - - -# global parameters -batch_size = 128 -num_classes = 10 -epochs = 40 - -# the data, split between train and test sets -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -x_train = x_train.reshape(60000, 784) -x_test = x_test.reshape(10000, 784) -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# convert class vectors to binary class matrices -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - -# build the model -model = Sequential() -model.add(layers.Dense(256, input_shape=(784,))) -model.add(Antirectifier()) -model.add(layers.Dropout(0.1)) -model.add(layers.Dense(256)) -model.add(Antirectifier()) -model.add(layers.Dropout(0.1)) -model.add(layers.Dense(num_classes)) -model.add(layers.Activation('softmax')) - -# compile the model -model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - -# train the model -model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - verbose=1, - validation_data=(x_test, y_test)) - -# next, compare with an equivalent network -# with2x bigger Dense layers and ReLU -''' -#Trains a memory network on the bAbI dataset. - -References: - -- Jason Weston, Antoine Bordes, Sumit Chopra, Tomas Mikolov, Alexander M. Rush, - ["Towards AI-Complete Question Answering: - A Set of Prerequisite Toy Tasks"](http://arxiv.org/abs/1502.05698) - -- Sainbayar Sukhbaatar, Arthur Szlam, Jason Weston, Rob Fergus, - ["End-To-End Memory Networks"](http://arxiv.org/abs/1503.08895) - -Reaches 98.6% accuracy on task 'single_supporting_fact_10k' after 120 epochs. -Time per epoch: 3s on CPU (core i7). -''' -from __future__ import print_function - -from keras.models import Sequential, Model -from keras.layers.embeddings import Embedding -from keras.layers import Input, Activation, Dense, Permute, Dropout -from keras.layers import add, dot, concatenate -from keras.layers import LSTM -from keras.utils.data_utils import get_file -from keras.preprocessing.sequence import pad_sequences -from functools import reduce -import tarfile -import numpy as np -import re - - -def tokenize(sent): - '''Return the tokens of a sentence including punctuation. - - >>> tokenize('Bob dropped the apple. Where is the apple?') - ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?'] - ''' - return [x.strip() for x in re.split(r'(\W+)?', sent) if x.strip()] - - -def parse_stories(lines, only_supporting=False): - '''Parse stories provided in the bAbi tasks format - - If only_supporting is true, only the sentences - that support the answer are kept. - ''' - data = [] - story = [] - for line in lines: - line = line.decode('utf-8').strip() - nid, line = line.split(' ', 1) - nid = int(nid) - if nid == 1: - story = [] - if '\t' in line: - q, a, supporting = line.split('\t') - q = tokenize(q) - if only_supporting: - # Only select the related substory - supporting = map(int, supporting.split()) - substory = [story[i - 1] for i in supporting] - else: - # Provide all the substories - substory = [x for x in story if x] - data.append((substory, q, a)) - story.append('') - else: - sent = tokenize(line) - story.append(sent) - return data - - -def get_stories(f, only_supporting=False, max_length=None): - '''Given a file name, read the file, - retrieve the stories, - and then convert the sentences into a single story. - - If max_length is supplied, - any stories longer than max_length tokens will be discarded. - ''' - data = parse_stories(f.readlines(), only_supporting=only_supporting) - def flatten(data): return reduce(lambda x, y: x + y, data) - data = [(flatten(story), q, answer) for story, q, answer in data - if not max_length or len(flatten(story)) < max_length] - return data - - -def vectorize_stories(data): - inputs, queries, answers = [], [], [] - for story, query, answer in data: - inputs.append([word_idx[w] for w in story]) - queries.append([word_idx[w] for w in query]) - answers.append(word_idx[answer]) - return (pad_sequences(inputs, maxlen=story_maxlen), - pad_sequences(queries, maxlen=query_maxlen), - np.array(answers)) - - -try: - path = get_file('babi-tasks-v1-2.tar.gz', - origin='https://s3.amazonaws.com/text-datasets/' - 'babi_tasks_1-20_v1-2.tar.gz') -except: - print('Error downloading dataset, please download it manually:\n' - '$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2' - '.tar.gz\n' - '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz') - raise - - -challenges = { - # QA1 with 10,000 samples - 'single_supporting_fact_10k': 'tasks_1-20_v1-2/en-10k/qa1_' - 'single-supporting-fact_{}.txt', - # QA2 with 10,000 samples - 'two_supporting_facts_10k': 'tasks_1-20_v1-2/en-10k/qa2_' - 'two-supporting-facts_{}.txt', -} -challenge_type = 'single_supporting_fact_10k' -challenge = challenges[challenge_type] - -print('Extracting stories for the challenge:', challenge_type) -with tarfile.open(path) as tar: - train_stories = get_stories(tar.extractfile(challenge.format('train'))) - test_stories = get_stories(tar.extractfile(challenge.format('test'))) - -vocab = set() -for story, q, answer in train_stories + test_stories: - vocab |= set(story + q + [answer]) -vocab = sorted(vocab) - -# Reserve 0 for masking via pad_sequences -vocab_size = len(vocab) + 1 -story_maxlen = max(map(len, (x for x, _, _ in train_stories + test_stories))) -query_maxlen = max(map(len, (x for _, x, _ in train_stories + test_stories))) - -print('-') -print('Vocab size:', vocab_size, 'unique words') -print('Story max length:', story_maxlen, 'words') -print('Query max length:', query_maxlen, 'words') -print('Number of training stories:', len(train_stories)) -print('Number of test stories:', len(test_stories)) -print('-') -print('Here\'s what a "story" tuple looks like (input, query, answer):') -print(train_stories[0]) -print('-') -print('Vectorizing the word sequences...') - -word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) -inputs_train, queries_train, answers_train = vectorize_stories(train_stories) -inputs_test, queries_test, answers_test = vectorize_stories(test_stories) - -print('-') -print('inputs: integer tensor of shape (samples, max_length)') -print('inputs_train shape:', inputs_train.shape) -print('inputs_test shape:', inputs_test.shape) -print('-') -print('queries: integer tensor of shape (samples, max_length)') -print('queries_train shape:', queries_train.shape) -print('queries_test shape:', queries_test.shape) -print('-') -print('answers: binary (1 or 0) tensor of shape (samples, vocab_size)') -print('answers_train shape:', answers_train.shape) -print('answers_test shape:', answers_test.shape) -print('-') -print('Compiling...') - -# placeholders -input_sequence = Input((story_maxlen,)) -question = Input((query_maxlen,)) - -# encoders -# embed the input sequence into a sequence of vectors -input_encoder_m = Sequential() -input_encoder_m.add(Embedding(input_dim=vocab_size, - output_dim=64)) -input_encoder_m.add(Dropout(0.3)) -# output: (samples, story_maxlen, embedding_dim) - -# embed the input into a sequence of vectors of size query_maxlen -input_encoder_c = Sequential() -input_encoder_c.add(Embedding(input_dim=vocab_size, - output_dim=query_maxlen)) -input_encoder_c.add(Dropout(0.3)) -# output: (samples, story_maxlen, query_maxlen) - -# embed the question into a sequence of vectors -question_encoder = Sequential() -question_encoder.add(Embedding(input_dim=vocab_size, - output_dim=64, - input_length=query_maxlen)) -question_encoder.add(Dropout(0.3)) -# output: (samples, query_maxlen, embedding_dim) - -# encode input sequence and questions (which are indices) -# to sequences of dense vectors -input_encoded_m = input_encoder_m(input_sequence) -input_encoded_c = input_encoder_c(input_sequence) -question_encoded = question_encoder(question) - -# compute a 'match' between the first input vector sequence -# and the question vector sequence -# shape: `(samples, story_maxlen, query_maxlen)` -match = dot([input_encoded_m, question_encoded], axes=(2, 2)) -match = Activation('softmax')(match) - -# add the match matrix with the second input vector sequence -# (samples, story_maxlen, query_maxlen) -response = add([match, input_encoded_c]) -response = Permute((2, 1))(response) # (samples, query_maxlen, story_maxlen) - -# concatenate the match matrix with the question vector sequence -answer = concatenate([response, question_encoded]) - -# the original paper uses a matrix multiplication for this reduction step. -# we choose to use a RNN instead. -answer = LSTM(32)(answer) # (samples, 32) - -# one regularization layer -- more would probably be needed. -answer = Dropout(0.3)(answer) -answer = Dense(vocab_size)(answer) # (samples, vocab_size) -# we output a probability distribution over the vocabulary -answer = Activation('softmax')(answer) - -# build the final model -model = Model([input_sequence, question], answer) -model.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - -# train -model.fit([inputs_train, queries_train], answers_train, - batch_size=32, - epochs=120, - validation_data=([inputs_test, queries_test], answers_test)) -''' -# Trains two recurrent neural networks based upon a story and a question. - -The resulting merged vector is then queried to answer a range of bAbI tasks. - -The results are comparable to those for an LSTM model provided in Weston et al.: -"Towards AI-Complete Question Answering: A Set of Prerequisite Toy Tasks" -http://arxiv.org/abs/1502.05698 - -Task Number | FB LSTM Baseline | Keras QA ---- | --- | --- -QA1 - Single Supporting Fact | 50 | 52.1 -QA2 - Two Supporting Facts | 20 | 37.0 -QA3 - Three Supporting Facts | 20 | 20.5 -QA4 - Two Arg. Relations | 61 | 62.9 -QA5 - Three Arg. Relations | 70 | 61.9 -QA6 - yes/No Questions | 48 | 50.7 -QA7 - Counting | 49 | 78.9 -QA8 - Lists/Sets | 45 | 77.2 -QA9 - Simple Negation | 64 | 64.0 -QA10 - Indefinite Knowledge | 44 | 47.7 -QA11 - Basic Coreference | 72 | 74.9 -QA12 - Conjunction | 74 | 76.4 -QA13 - Compound Coreference | 94 | 94.4 -QA14 - Time Reasoning | 27 | 34.8 -QA15 - Basic Deduction | 21 | 32.4 -QA16 - Basic Induction | 23 | 50.6 -QA17 - Positional Reasoning | 51 | 49.1 -QA18 - Size Reasoning | 52 | 90.8 -QA19 - Path Finding | 8 | 9.0 -QA20 - Agent's Motivations | 91 | 90.7 - -For the resources related to the bAbI project, refer to: -https://research.facebook.com/researchers/1543934539189348 - -### Notes - -- With default word, sentence, and query vector sizes, the GRU model achieves: - - 52.1% test accuracy on QA1 in 20 epochs (2 seconds per epoch on CPU) - - 37.0% test accuracy on QA2 in 20 epochs (16 seconds per epoch on CPU) -In comparison, the Facebook paper achieves 50% and 20% for the LSTM baseline. - -- The task does not traditionally parse the question separately. This likely -improves accuracy and is a good example of merging two RNNs. - -- The word vector embeddings are not shared between the story and question RNNs. - -- See how the accuracy changes given 10,000 training samples (en-10k) instead -of only 1000. 1000 was used in order to be comparable to the original paper. - -- Experiment with GRU, LSTM, and JZS1-3 as they give subtly different results. - -- The length and noise (i.e. 'useless' story components) impact the ability of -LSTMs / GRUs to provide the correct answer. Given only the supporting facts, -these RNNs can achieve 100% accuracy on many tasks. Memory networks and neural -networks that use attentional processes can efficiently search through this -noise to find the relevant statements, improving performance substantially. -This becomes especially obvious on QA2 and QA3, both far longer than QA1. -''' - -from __future__ import print_function -from functools import reduce -import re -import tarfile - -import numpy as np - -from keras.utils.data_utils import get_file -from keras.layers.embeddings import Embedding -from keras import layers -from keras.layers import recurrent -from keras.models import Model -from keras.preprocessing.sequence import pad_sequences - - -def tokenize(sent): - '''Return the tokens of a sentence including punctuation. - - >>> tokenize('Bob dropped the apple. Where is the apple?') - ['Bob', 'dropped', 'the', 'apple', '.', 'Where', 'is', 'the', 'apple', '?'] - ''' - return [x.strip() for x in re.split(r'(\W+)?', sent) if x.strip()] - - -def parse_stories(lines, only_supporting=False): - '''Parse stories provided in the bAbi tasks format - - If only_supporting is true, - only the sentences that support the answer are kept. - ''' - data = [] - story = [] - for line in lines: - line = line.decode('utf-8').strip() - nid, line = line.split(' ', 1) - nid = int(nid) - if nid == 1: - story = [] - if '\t' in line: - q, a, supporting = line.split('\t') - q = tokenize(q) - if only_supporting: - # Only select the related substory - supporting = map(int, supporting.split()) - substory = [story[i - 1] for i in supporting] - else: - # Provide all the substories - substory = [x for x in story if x] - data.append((substory, q, a)) - story.append('') - else: - sent = tokenize(line) - story.append(sent) - return data - - -def get_stories(f, only_supporting=False, max_length=None): - '''Given a file name, read the file, retrieve the stories, - and then convert the sentences into a single story. - - If max_length is supplied, - any stories longer than max_length tokens will be discarded. - ''' - data = parse_stories(f.readlines(), only_supporting=only_supporting) - def flatten(data): return reduce(lambda x, y: x + y, data) - data = [(flatten(story), q, answer) for story, q, answer in data - if not max_length or len(flatten(story)) < max_length] - return data - - -def vectorize_stories(data, word_idx, story_maxlen, query_maxlen): - xs = [] - xqs = [] - ys = [] - for story, query, answer in data: - x = [word_idx[w] for w in story] - xq = [word_idx[w] for w in query] - # let's not forget that index 0 is reserved - y = np.zeros(len(word_idx) + 1) - y[word_idx[answer]] = 1 - xs.append(x) - xqs.append(xq) - ys.append(y) - return (pad_sequences(xs, maxlen=story_maxlen), - pad_sequences(xqs, maxlen=query_maxlen), np.array(ys)) - - -RNN = recurrent.LSTM -EMBED_HIDDEN_SIZE = 50 -SENT_HIDDEN_SIZE = 100 -QUERY_HIDDEN_SIZE = 100 -BATCH_SIZE = 32 -EPOCHS = 20 -print('RNN / Embed / Sent / Query = {}, {}, {}, {}'.format(RNN, - EMBED_HIDDEN_SIZE, - SENT_HIDDEN_SIZE, - QUERY_HIDDEN_SIZE)) - -try: - path = get_file('babi-tasks-v1-2.tar.gz', - origin='https://s3.amazonaws.com/text-datasets/' - 'babi_tasks_1-20_v1-2.tar.gz') -except: - print('Error downloading dataset, please download it manually:\n' - '$ wget http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2' - '.tar.gz\n' - '$ mv tasks_1-20_v1-2.tar.gz ~/.keras/datasets/babi-tasks-v1-2.tar.gz') - raise - -# Default QA1 with 1000 samples -# challenge = 'tasks_1-20_v1-2/en/qa1_single-supporting-fact_{}.txt' -# QA1 with 10,000 samples -# challenge = 'tasks_1-20_v1-2/en-10k/qa1_single-supporting-fact_{}.txt' -# QA2 with 1000 samples -challenge = 'tasks_1-20_v1-2/en/qa2_two-supporting-facts_{}.txt' -# QA2 with 10,000 samples -# challenge = 'tasks_1-20_v1-2/en-10k/qa2_two-supporting-facts_{}.txt' -with tarfile.open(path) as tar: - train = get_stories(tar.extractfile(challenge.format('train'))) - test = get_stories(tar.extractfile(challenge.format('test'))) - -vocab = set() -for story, q, answer in train + test: - vocab |= set(story + q + [answer]) -vocab = sorted(vocab) - -# Reserve 0 for masking via pad_sequences -vocab_size = len(vocab) + 1 -word_idx = dict((c, i + 1) for i, c in enumerate(vocab)) -story_maxlen = max(map(len, (x for x, _, _ in train + test))) -query_maxlen = max(map(len, (x for _, x, _ in train + test))) - -x, xq, y = vectorize_stories(train, word_idx, story_maxlen, query_maxlen) -tx, txq, ty = vectorize_stories(test, word_idx, story_maxlen, query_maxlen) - -print('vocab = {}'.format(vocab)) -print('x.shape = {}'.format(x.shape)) -print('xq.shape = {}'.format(xq.shape)) -print('y.shape = {}'.format(y.shape)) -print('story_maxlen, query_maxlen = {}, {}'.format(story_maxlen, query_maxlen)) - -print('Build model...') - -sentence = layers.Input(shape=(story_maxlen,), dtype='int32') -encoded_sentence = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(sentence) -encoded_sentence = RNN(SENT_HIDDEN_SIZE)(encoded_sentence) - -question = layers.Input(shape=(query_maxlen,), dtype='int32') -encoded_question = layers.Embedding(vocab_size, EMBED_HIDDEN_SIZE)(question) -encoded_question = RNN(QUERY_HIDDEN_SIZE)(encoded_question) - -merged = layers.concatenate([encoded_sentence, encoded_question]) -preds = layers.Dense(vocab_size, activation='softmax')(merged) - -model = Model([sentence, question], preds) -model.compile(optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy']) - -print('Training') -model.fit([x, xq], y, - batch_size=BATCH_SIZE, - epochs=EPOCHS, - validation_split=0.05) - -print('Evaluation') -loss, acc = model.evaluate([tx, txq], ty, - batch_size=BATCH_SIZE) -print('Test loss / test accuracy = {:.4f} / {:.4f}'.format(loss, acc)) -''' -#Train a simple deep CNN on the CIFAR10 small images dataset. - -It gets to 75% validation accuracy in 25 epochs, and 79% after 50 epochs. -(it's still underfitting at that point, though). -''' - -from __future__ import print_function -import keras -from keras.datasets import cifar10 -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Conv2D, MaxPooling2D -import os - -batch_size = 32 -num_classes = 10 -epochs = 100 -data_augmentation = True -num_predictions = 20 -save_dir = os.path.join(os.getcwd(), 'saved_models') -model_name = 'keras_cifar10_trained_model.h5' - -# The data, split between train and test sets: -(x_train, y_train), (x_test, y_test) = cifar10.load_data() -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices. -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - -model = Sequential() -model.add(Conv2D(32, (3, 3), padding='same', - input_shape=x_train.shape[1:])) -model.add(Activation('relu')) -model.add(Conv2D(32, (3, 3))) -model.add(Activation('relu')) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Conv2D(64, (3, 3), padding='same')) -model.add(Activation('relu')) -model.add(Conv2D(64, (3, 3))) -model.add(Activation('relu')) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Flatten()) -model.add(Dense(512)) -model.add(Activation('relu')) -model.add(Dropout(0.5)) -model.add(Dense(num_classes)) -model.add(Activation('softmax')) - -# initiate RMSprop optimizer -opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) - -# Let's train the model using RMSprop -model.compile(loss='categorical_crossentropy', - optimizer=opt, - metrics=['accuracy']) - -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 - -if not data_augmentation: - print('Not using data augmentation.') - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - shuffle=True) -else: - print('Using real-time data augmentation.') - # This will do preprocessing and realtime data augmentation: - datagen = ImageDataGenerator( - featurewise_center=False, # set input mean to 0 over the dataset - samplewise_center=False, # set each sample mean to 0 - featurewise_std_normalization=False, # divide inputs by std of the dataset - samplewise_std_normalization=False, # divide each input by its std - zca_whitening=False, # apply ZCA whitening - zca_epsilon=1e-06, # epsilon for ZCA whitening - # randomly rotate images in the range (degrees, 0 to 180) - rotation_range=0, - # randomly shift images horizontally (fraction of total width) - width_shift_range=0.1, - # randomly shift images vertically (fraction of total height) - height_shift_range=0.1, - shear_range=0., # set range for random shear - zoom_range=0., # set range for random zoom - channel_shift_range=0., # set range for random channel shifts - # set mode for filling points outside the input boundaries - fill_mode='nearest', - cval=0., # value used for fill_mode = "constant" - horizontal_flip=True, # randomly flip images - vertical_flip=False, # randomly flip images - # set rescaling factor (applied before any other transformation) - rescale=None, - # set function that will be applied on each input - preprocessing_function=None, - # image data format, either "channels_first" or "channels_last" - data_format=None, - # fraction of images reserved for validation (strictly between 0 and 1) - validation_split=0.0) - - # Compute quantities required for feature-wise normalization - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(x_train) - - # Fit the model on the batches generated by datagen.flow(). - model.fit_generator(datagen.flow(x_train, y_train, - batch_size=batch_size), - epochs=epochs, - validation_data=(x_test, y_test), - workers=4) - -# Save model and weights -if not os.path.isdir(save_dir): - os.makedirs(save_dir) -model_path = os.path.join(save_dir, model_name) -model.save(model_path) -print('Saved trained model at %s ' % model_path) - -# Score trained model. -scores = model.evaluate(x_test, y_test, verbose=1) -print('Test loss:', scores[0]) -print('Test accuracy:', scores[1]) -""" -This example trains a simple CNN-Capsule Network on the CIFAR10 data set. - -Without Data Augmentation: -It gets to 75% validation accuracy in 10 epochs, 79% after 15 epochs, -and overfitting after 20 epochs - -With Data Augmentation: -It gets to 75% validation accuracy in 10 epochs, 79% after 15 epochs, -and 83% after 30 epochs. - -The highest achieved validation accuracy is 83.79% after 50 epochs. -This is a fast implementation that takes just 20s/epoch on a GTX 1070 GPU. - -The paper "Dynamic Routing Between Capsules": https://arxiv.org/abs/1710.09829 -""" -from __future__ import print_function - -from keras import activations -from keras import backend as K -from keras import layers -from keras import utils -from keras.datasets import cifar10 -from keras.models import Model -from keras.preprocessing.image import ImageDataGenerator - - -def squash(x, axis=-1): - """The Squashing Function. - The nonlinear activation function used in Capsule Network - # Arguments - x: Input Tensor. - axis: Integer axis along which the squashing function is to be applied. - - # Returns - Tensor with scaled value of the input tensor - """ - s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon() - scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm) - return scale * x - - -def margin_loss(y_true, y_pred): - """Margin loss - - # Arguments - y_true: tensor of true targets. - y_pred: tensor of predicted targets. - - # Returns - Tensor with one scalar loss entry per sample. - """ - lamb, margin = 0.5, 0.1 - return K.sum(y_true * K.square(K.relu(1 - margin - y_pred)) + lamb * ( - 1 - y_true) * K.square(K.relu(y_pred - margin)), axis=-1) - - -class Capsule(layers.Layer): - """Capsule Network - - A Capsule Network Layer implementation in Keras - There are two versions of Capsule Networks. - One is similar to dense layer (for the fixed-shape input), - and the other is similar to time distributed dense layer - (for inputs of varied length). - - The input shape of Capsule must be (batch_size, - input_num_capsule, - input_dim_capsule - ) - and the output shape is (batch_size, - num_capsule, - dim_capsule - ) - The Capsule implementation is from https://github.com/bojone/Capsule/ - - - # Arguments - num_capsule: An integer, the number of capsules. - dim_capsule: An integer, the dimensions of the capsule. - routings: An integer, the number of routings. - share_weights: A boolean, sets weight sharing between layers. - activation: A string, the activation function to be applied. - """ - - def __init__(self, - num_capsule, - dim_capsule, - routings=3, - share_weights=True, - activation='squash', - **kwargs): - super(Capsule, self).__init__(**kwargs) - self.num_capsule = num_capsule - self.dim_capsule = dim_capsule - self.routings = routings - self.share_weights = share_weights - if activation == 'squash': - self.activation = squash - else: - self.activation = activations.get(activation) - - def build(self, input_shape): - input_dim_capsule = input_shape[-1] - if self.share_weights: - self.kernel = self.add_weight( - name='capsule_kernel', - shape=(1, input_dim_capsule, - self.num_capsule * self.dim_capsule), - initializer='glorot_uniform', - trainable=True) - else: - input_num_capsule = input_shape[-2] - self.kernel = self.add_weight( - name='capsule_kernel', - shape=(input_num_capsule, input_dim_capsule, - self.num_capsule * self.dim_capsule), - initializer='glorot_uniform', - trainable=True) - - def call(self, inputs, **kwargs): - """Following the routing algorithm from Hinton's paper, - but replace b = b + with b = . - - This change can improve the feature representation of the capsule. - - However, you can replace - b = K.batch_dot(outputs, hat_inputs, [2, 3]) - with - b += K.batch_dot(outputs, hat_inputs, [2, 3]) - to get standard routing. - """ - - if self.share_weights: - hat_inputs = K.conv1d(inputs, self.kernel) - else: - hat_inputs = K.local_conv1d(inputs, self.kernel, [1], [1]) - - batch_size = K.shape(inputs)[0] - input_num_capsule = K.shape(inputs)[1] - hat_inputs = K.reshape(hat_inputs, - (batch_size, input_num_capsule, - self.num_capsule, self.dim_capsule)) - hat_inputs = K.permute_dimensions(hat_inputs, (0, 2, 1, 3)) - - b = K.zeros_like(hat_inputs[:, :, :, 0]) - print(self.routings) - for i in range(self.routings): - c = K.softmax(b, 1) - o = self.activation(K.batch_dot(c, hat_inputs, [2, 2])) - if i < self.routings - 1: - b = K.batch_dot(o, hat_inputs, [2, 3]) - if K.backend() == 'theano': - o = K.sum(o, axis=1) - return o - - def compute_output_shape(self, input_shape): - return None, self.num_capsule, self.dim_capsule - - -batch_size = 128 -num_classes = 10 -epochs = 100 -(x_train, y_train), (x_test, y_test) = cifar10.load_data() - -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 -y_train = utils.to_categorical(y_train, num_classes) -y_test = utils.to_categorical(y_test, num_classes) - -# A simple Conv2D model -input_image = layers.Input(shape=(None, None, 3)) -x = layers.Conv2D(64, (3, 3), activation='relu')(input_image) -x = layers.Conv2D(64, (3, 3), activation='relu')(x) -x = layers.AveragePooling2D((2, 2))(x) -x = layers.Conv2D(128, (3, 3), activation='relu')(x) -x = layers.Conv2D(128, (3, 3), activation='relu')(x) - -# Now, we reshape it to (batch_size, input_num_capsule, input_dim_capsule) -# then connect a capsule layer. -# The output of final model is the lengths of 10 capsules, which have 16 dimensions. -# The length of the output vector of the capsule expresses the probability of -# existence of the entity, so the problem becomes a 10 two-classification problem. - -x = layers.Reshape((-1, 128))(x) -capsule = Capsule(10, 16, 3, True)(x) -output = layers.Lambda(lambda z: K.sqrt(K.sum(K.square(z), 2)))(capsule) -model = Model(inputs=input_image, outputs=output) - -# Margin loss is used -model.compile(loss=margin_loss, optimizer='adam', metrics=['accuracy']) -model.summary() - -# Compare the performance with and without data augmentation -data_augmentation = True - -if not data_augmentation: - print('Not using data augmentation.') - model.fit( - x_train, - y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - shuffle=True) -else: - print('Using real-time data augmentation.') - # This will do preprocessing and real-time data augmentation: - datagen = ImageDataGenerator( - featurewise_center=False, # set input mean to 0 over the dataset - samplewise_center=False, # set each sample mean to 0 - featurewise_std_normalization=False, # divide inputs by dataset std - samplewise_std_normalization=False, # divide each input by its std - zca_whitening=False, # apply ZCA whitening - zca_epsilon=1e-06, # epsilon for ZCA whitening - rotation_range=0, # randomly rotate images in 0 to 180 degrees - width_shift_range=0.1, # randomly shift images horizontally - height_shift_range=0.1, # randomly shift images vertically - shear_range=0., # set range for random shear - zoom_range=0., # set range for random zoom - channel_shift_range=0., # set range for random channel shifts - # set mode for filling points outside the input boundaries - fill_mode='nearest', - cval=0., # value used for fill_mode = "constant" - horizontal_flip=True, # randomly flip images - vertical_flip=False, # randomly flip images - # set rescaling factor (applied before any other transformation) - rescale=None, - # set function that will be applied on each input - preprocessing_function=None, - # image data format, either "channels_first" or "channels_last" - data_format=None, - # fraction of images reserved for validation (strictly between 0 and 1) - validation_split=0.0) - - # Compute quantities required for feature-wise normalization - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(x_train) - - # Fit the model on the batches generated by datagen.flow(). - model.fit_generator( - datagen.flow(x_train, y_train, batch_size=batch_size), - epochs=epochs, - validation_data=(x_test, y_test), - workers=4) -''' -#Train a simple deep CNN on the CIFAR10 small images dataset using augmentation. - -Using TensorFlow internal augmentation APIs by replacing ImageGenerator with -an embedded AugmentLayer using LambdaLayer, which is faster on GPU. - -** Benchmark of `ImageGenerator`(IG) vs `AugmentLayer`(AL) both using augmentation -2D:** - -(backend = Tensorflow-GPU, Nvidia Tesla P100-SXM2) - -Epoch no. | IG %Accuracy | IG Performance | AL %Accuracy | AL Performance ----------:|---------------:|---------------:|--------------:|--------------: -1 | 44.84 | 15 ms/step | 45.54 | 358 us/step -2 | 52.34 | 8 ms/step | 50.55 | 285 us/step -8 | 65.45 | 8 ms/step | 65.59 | 281 us/step -25 | 76.74 | 8 ms/step | 76.17 | 280 us/step -100 | 78.81 | 8 ms/step | 78.70 | 285 us/step - -Settings: horizontal_flip = True - - -Epoch no. | IG %Accuracy | IG Performance | AL %Accuracy | AL Performance ----------:|---------------:|---------------:|--------------:|--------------: -1 | 43.46 | 15 ms/step | 42.21 | 334 us/step -2 | 48.95 | 11 ms/step | 48.06 | 282 us/step -8 | 63.59 | 11 ms/step | 61.35 | 290 us/step -25 | 72.25 | 12 ms/step | 71.08 | 287 us/step -100 | 76.35 | 11 ms/step | 74.62 | 286 us/step - -Settings: rotation = 30.0 - - -(Corner process and rotation precision by `ImageGenerator` and `AugmentLayer` -are slightly different.) -''' - -from __future__ import print_function -import tensorflow as tf -import keras -from keras.datasets import cifar10 -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Conv2D, Lambda, MaxPooling2D -from keras import backend as K -import os - -if K.backend() != 'tensorflow': - raise RuntimeError('This example can only run with the ' - 'TensorFlow backend, ' - 'because it requires TF-native augmentation APIs') - - -def augment_2d(inputs, rotation=0, horizontal_flip=False, vertical_flip=False): - """Apply additive augmentation on 2D data. - - # Arguments - rotation: A float, the degree range for rotation (0 <= rotation < 180), - e.g. 3 for random image rotation between (-3.0, 3.0). - horizontal_flip: A boolean, whether to allow random horizontal flip, - e.g. true for 50% possibility to flip image horizontally. - vertical_flip: A boolean, whether to allow random vertical flip, - e.g. true for 50% possibility to flip image vertically. - - # Returns - input data after augmentation, whose shape is the same as its original. - """ - if inputs.dtype != tf.float32: - inputs = tf.image.convert_image_dtype(inputs, dtype=tf.float32) - - with tf.name_scope('augmentation'): - shp = tf.shape(inputs) - batch_size, height, width = shp[0], shp[1], shp[2] - width = tf.cast(width, tf.float32) - height = tf.cast(height, tf.float32) - - transforms = [] - identity = tf.constant([1, 0, 0, 0, 1, 0, 0, 0], dtype=tf.float32) - - if rotation > 0: - angle_rad = rotation * 3.141592653589793 / 180.0 - angles = tf.random_uniform([batch_size], -angle_rad, angle_rad) - f = tf.contrib.image.angles_to_projective_transforms(angles, - height, width) - transforms.append(f) - - if horizontal_flip: - coin = tf.less(tf.random_uniform([batch_size], 0, 1.0), 0.5) - shape = [-1., 0., width, 0., 1., 0., 0., 0.] - flip_transform = tf.convert_to_tensor(shape, dtype=tf.float32) - flip = tf.tile(tf.expand_dims(flip_transform, 0), [batch_size, 1]) - noflip = tf.tile(tf.expand_dims(identity, 0), [batch_size, 1]) - transforms.append(tf.where(coin, flip, noflip)) - - if vertical_flip: - coin = tf.less(tf.random_uniform([batch_size], 0, 1.0), 0.5) - shape = [1., 0., 0., 0., -1., height, 0., 0.] - flip_transform = tf.convert_to_tensor(shape, dtype=tf.float32) - flip = tf.tile(tf.expand_dims(flip_transform, 0), [batch_size, 1]) - noflip = tf.tile(tf.expand_dims(identity, 0), [batch_size, 1]) - transforms.append(tf.where(coin, flip, noflip)) - - if transforms: - f = tf.contrib.image.compose_transforms(*transforms) - inputs = tf.contrib.image.transform( - inputs, f, interpolation='BILINEAR') - return inputs - - -batch_size = 32 -num_classes = 10 -epochs = 100 -num_predictions = 20 -save_dir = '/tmp/saved_models' -model_name = 'keras_cifar10_trained_model.h5' - -# The data, split between train and test sets: -(x_train, y_train), (x_test, y_test) = cifar10.load_data() -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices. -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - -model = Sequential() -model.add(Lambda(augment_2d, - input_shape=x_train.shape[1:], - arguments={'rotation': 8.0, 'horizontal_flip': True})) -model.add(Conv2D(32, (3, 3), padding='same')) -model.add(Activation('relu')) -model.add(Conv2D(32, (3, 3))) -model.add(Activation('relu')) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Conv2D(64, (3, 3), padding='same')) -model.add(Activation('relu')) -model.add(Conv2D(64, (3, 3))) -model.add(Activation('relu')) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Flatten()) -model.add(Dense(512)) -model.add(Activation('relu')) -model.add(Dropout(0.5)) -model.add(Dense(num_classes)) -model.add(Activation('softmax')) - -# initiate RMSprop optimizer -opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) - -# Let's train the model using RMSprop -model.compile(loss='categorical_crossentropy', - optimizer=opt, - metrics=['accuracy']) - -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 - -model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - shuffle=True) - -# Save model and weights -if not os.path.isdir(save_dir): - os.makedirs(save_dir) -model_path = os.path.join(save_dir, model_name) -model.save(model_path) -print('Saved trained model at %s ' % model_path) - -# Score trained model. -scores = model.evaluate(x_test, y_test, verbose=1) -print('Test loss:', scores[0]) -print('Test accuracy:', scores[1]) -""" -#Trains a ResNet on the CIFAR10 dataset. - -ResNet v1: -[Deep Residual Learning for Image Recognition -](https://arxiv.org/pdf/1512.03385.pdf) - -ResNet v2: -[Identity Mappings in Deep Residual Networks -](https://arxiv.org/pdf/1603.05027.pdf) - - -Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti -:------------|--:|-------:|-----------------------:|---: -ResNet20 v1| 3| 92.16 %| 91.25 %|35 -ResNet32 v1| 5| 92.46 %| 92.49 %|50 -ResNet44 v1| 7| 92.50 %| 92.83 %|70 -ResNet56 v1| 9| 92.71 %| 93.03 %|90 -ResNet110 v1| 18| 92.65 %| 93.39+-.16 %|165 -ResNet164 v1| 27| - %| 94.07 %| - -ResNet1001 v1|N/A| - %| 92.39 %| - - -  - -Model|n|200-epoch accuracy|Original paper accuracy |sec/epoch GTX1080Ti -:------------|--:|-------:|-----------------------:|---: -ResNet20 v2| 2| - %| - %|--- -ResNet32 v2|N/A| NA %| NA %| NA -ResNet44 v2|N/A| NA %| NA %| NA -ResNet56 v2| 6| 93.01 %| NA %|100 -ResNet110 v2| 12| 93.15 %| 93.63 %|180 -ResNet164 v2| 18| - %| 94.54 %| - -ResNet1001 v2|111| - %| 95.08+-.14 %| - -""" - -from __future__ import print_function -import keras -from keras.layers import Dense, Conv2D, BatchNormalization, Activation -from keras.layers import AveragePooling2D, Input, Flatten -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint, LearningRateScheduler -from keras.callbacks import ReduceLROnPlateau -from keras.preprocessing.image import ImageDataGenerator -from keras.regularizers import l2 -from keras import backend as K -from keras.models import Model -from keras.datasets import cifar10 -import numpy as np -import os - -# Training parameters -batch_size = 32 # orig paper trained all networks with batch_size=128 -epochs = 200 -data_augmentation = True -num_classes = 10 - -# Subtracting pixel mean improves accuracy -subtract_pixel_mean = True - -# Model parameter -# ---------------------------------------------------------------------------- -# | | 200-epoch | Orig Paper| 200-epoch | Orig Paper| sec/epoch -# Model | n | ResNet v1 | ResNet v1 | ResNet v2 | ResNet v2 | GTX1080Ti -# |v1(v2)| %Accuracy | %Accuracy | %Accuracy | %Accuracy | v1 (v2) -# ---------------------------------------------------------------------------- -# ResNet20 | 3 (2)| 92.16 | 91.25 | ----- | ----- | 35 (---) -# ResNet32 | 5(NA)| 92.46 | 92.49 | NA | NA | 50 ( NA) -# ResNet44 | 7(NA)| 92.50 | 92.83 | NA | NA | 70 ( NA) -# ResNet56 | 9 (6)| 92.71 | 93.03 | 93.01 | NA | 90 (100) -# ResNet110 |18(12)| 92.65 | 93.39+-.16| 93.15 | 93.63 | 165(180) -# ResNet164 |27(18)| ----- | 94.07 | ----- | 94.54 | ---(---) -# ResNet1001| (111)| ----- | 92.39 | ----- | 95.08+-.14| ---(---) -# --------------------------------------------------------------------------- -n = 3 - -# Model version -# Orig paper: version = 1 (ResNet v1), Improved ResNet: version = 2 (ResNet v2) -version = 1 - -# Computed depth from supplied model parameter n -if version == 1: - depth = n * 6 + 2 -elif version == 2: - depth = n * 9 + 2 - -# Model name, depth and version -model_type = 'ResNet%dv%d' % (depth, version) - -# Load the CIFAR10 data. -(x_train, y_train), (x_test, y_test) = cifar10.load_data() - -# Input image dimensions. -input_shape = x_train.shape[1:] - -# Normalize data. -x_train = x_train.astype('float32') / 255 -x_test = x_test.astype('float32') / 255 - -# If subtract pixel mean is enabled -if subtract_pixel_mean: - x_train_mean = np.mean(x_train, axis=0) - x_train -= x_train_mean - x_test -= x_train_mean - -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') -print('y_train shape:', y_train.shape) - -# Convert class vectors to binary class matrices. -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - - -def lr_schedule(epoch): - """Learning Rate Schedule - - Learning rate is scheduled to be reduced after 80, 120, 160, 180 epochs. - Called automatically every epoch as part of callbacks during training. - - # Arguments - epoch (int): The number of epochs - - # Returns - lr (float32): learning rate - """ - lr = 1e-3 - if epoch > 180: - lr *= 0.5e-3 - elif epoch > 160: - lr *= 1e-3 - elif epoch > 120: - lr *= 1e-2 - elif epoch > 80: - lr *= 1e-1 - print('Learning rate: ', lr) - return lr - - -def resnet_layer(inputs, - num_filters=16, - kernel_size=3, - strides=1, - activation='relu', - batch_normalization=True, - conv_first=True): - """2D Convolution-Batch Normalization-Activation stack builder - - # Arguments - inputs (tensor): input tensor from input image or previous layer - num_filters (int): Conv2D number of filters - kernel_size (int): Conv2D square kernel dimensions - strides (int): Conv2D square stride dimensions - activation (string): activation name - batch_normalization (bool): whether to include batch normalization - conv_first (bool): conv-bn-activation (True) or - bn-activation-conv (False) - - # Returns - x (tensor): tensor as input to the next layer - """ - conv = Conv2D(num_filters, - kernel_size=kernel_size, - strides=strides, - padding='same', - kernel_initializer='he_normal', - kernel_regularizer=l2(1e-4)) - - x = inputs - if conv_first: - x = conv(x) - if batch_normalization: - x = BatchNormalization()(x) - if activation is not None: - x = Activation(activation)(x) - else: - if batch_normalization: - x = BatchNormalization()(x) - if activation is not None: - x = Activation(activation)(x) - x = conv(x) - return x - - -def resnet_v1(input_shape, depth, num_classes=10): - """ResNet Version 1 Model builder [a] - - Stacks of 2 x (3 x 3) Conv2D-BN-ReLU - Last ReLU is after the shortcut connection. - At the beginning of each stage, the feature map size is halved (downsampled) - by a convolutional layer with strides=2, while the number of filters is - doubled. Within each stage, the layers have the same number filters and the - same number of filters. - Features maps sizes: - stage 0: 32x32, 16 - stage 1: 16x16, 32 - stage 2: 8x8, 64 - The Number of parameters is approx the same as Table 6 of [a]: - ResNet20 0.27M - ResNet32 0.46M - ResNet44 0.66M - ResNet56 0.85M - ResNet110 1.7M - - # Arguments - input_shape (tensor): shape of input image tensor - depth (int): number of core convolutional layers - num_classes (int): number of classes (CIFAR10 has 10) - - # Returns - model (Model): Keras model instance - """ - if (depth - 2) % 6 != 0: - raise ValueError('depth should be 6n+2 (eg 20, 32, 44 in [a])') - # Start model definition. - num_filters = 16 - num_res_blocks = int((depth - 2) / 6) - - inputs = Input(shape=input_shape) - x = resnet_layer(inputs=inputs) - # Instantiate the stack of residual units - for stack in range(3): - for res_block in range(num_res_blocks): - strides = 1 - if stack > 0 and res_block == 0: # first layer but not first stack - strides = 2 # downsample - y = resnet_layer(inputs=x, - num_filters=num_filters, - strides=strides) - y = resnet_layer(inputs=y, - num_filters=num_filters, - activation=None) - if stack > 0 and res_block == 0: # first layer but not first stack - # linear projection residual shortcut connection to match - # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) - x = keras.layers.add([x, y]) - x = Activation('relu')(x) - num_filters *= 2 - - # Add classifier on top. - # v1 does not use BN after last shortcut connection-ReLU - x = AveragePooling2D(pool_size=8)(x) - y = Flatten()(x) - outputs = Dense(num_classes, - activation='softmax', - kernel_initializer='he_normal')(y) - - # Instantiate model. - model = Model(inputs=inputs, outputs=outputs) - return model - - -def resnet_v2(input_shape, depth, num_classes=10): - """ResNet Version 2 Model builder [b] - - Stacks of (1 x 1)-(3 x 3)-(1 x 1) BN-ReLU-Conv2D or also known as - bottleneck layer - First shortcut connection per layer is 1 x 1 Conv2D. - Second and onwards shortcut connection is identity. - At the beginning of each stage, the feature map size is halved (downsampled) - by a convolutional layer with strides=2, while the number of filter maps is - doubled. Within each stage, the layers have the same number filters and the - same filter map sizes. - Features maps sizes: - conv1 : 32x32, 16 - stage 0: 32x32, 64 - stage 1: 16x16, 128 - stage 2: 8x8, 256 - - # Arguments - input_shape (tensor): shape of input image tensor - depth (int): number of core convolutional layers - num_classes (int): number of classes (CIFAR10 has 10) - - # Returns - model (Model): Keras model instance - """ - if (depth - 2) % 9 != 0: - raise ValueError('depth should be 9n+2 (eg 56 or 110 in [b])') - # Start model definition. - num_filters_in = 16 - num_res_blocks = int((depth - 2) / 9) - - inputs = Input(shape=input_shape) - # v2 performs Conv2D with BN-ReLU on input before splitting into 2 paths - x = resnet_layer(inputs=inputs, - num_filters=num_filters_in, - conv_first=True) - - # Instantiate the stack of residual units - for stage in range(3): - for res_block in range(num_res_blocks): - activation = 'relu' - batch_normalization = True - strides = 1 - if stage == 0: - num_filters_out = num_filters_in * 4 - if res_block == 0: # first layer and first stage - activation = None - batch_normalization = False - else: - num_filters_out = num_filters_in * 2 - if res_block == 0: # first layer but not first stage - strides = 2 # downsample - - # bottleneck residual unit - y = resnet_layer(inputs=x, - num_filters=num_filters_in, - kernel_size=1, - strides=strides, - activation=activation, - batch_normalization=batch_normalization, - conv_first=False) - y = resnet_layer(inputs=y, - num_filters=num_filters_in, - conv_first=False) - y = resnet_layer(inputs=y, - num_filters=num_filters_out, - kernel_size=1, - conv_first=False) - if res_block == 0: - # linear projection residual shortcut connection to match - # changed dims - x = resnet_layer(inputs=x, - num_filters=num_filters_out, - kernel_size=1, - strides=strides, - activation=None, - batch_normalization=False) - x = keras.layers.add([x, y]) - - num_filters_in = num_filters_out - - # Add classifier on top. - # v2 has BN-ReLU before Pooling - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = AveragePooling2D(pool_size=8)(x) - y = Flatten()(x) - outputs = Dense(num_classes, - activation='softmax', - kernel_initializer='he_normal')(y) - - # Instantiate model. - model = Model(inputs=inputs, outputs=outputs) - return model - - -if version == 2: - model = resnet_v2(input_shape=input_shape, depth=depth) -else: - model = resnet_v1(input_shape=input_shape, depth=depth) - -model.compile(loss='categorical_crossentropy', - optimizer=Adam(lr=lr_schedule(0)), - metrics=['accuracy']) -model.summary() -print(model_type) - -# Prepare model model saving directory. -save_dir = os.path.join(os.getcwd(), 'saved_models') -model_name = 'cifar10_%s_model.{epoch:03d}.h5' % model_type -if not os.path.isdir(save_dir): - os.makedirs(save_dir) -filepath = os.path.join(save_dir, model_name) - -# Prepare callbacks for model saving and for learning rate adjustment. -checkpoint = ModelCheckpoint(filepath=filepath, - monitor='val_acc', - verbose=1, - save_best_only=True) - -lr_scheduler = LearningRateScheduler(lr_schedule) - -lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), - cooldown=0, - patience=5, - min_lr=0.5e-6) - -callbacks = [checkpoint, lr_reducer, lr_scheduler] - -# Run training, with or without data augmentation. -if not data_augmentation: - print('Not using data augmentation.') - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - shuffle=True, - callbacks=callbacks) -else: - print('Using real-time data augmentation.') - # This will do preprocessing and realtime data augmentation: - datagen = ImageDataGenerator( - # set input mean to 0 over the dataset - featurewise_center=False, - # set each sample mean to 0 - samplewise_center=False, - # divide inputs by std of dataset - featurewise_std_normalization=False, - # divide each input by its std - samplewise_std_normalization=False, - # apply ZCA whitening - zca_whitening=False, - # epsilon for ZCA whitening - zca_epsilon=1e-06, - # randomly rotate images in the range (deg 0 to 180) - rotation_range=0, - # randomly shift images horizontally - width_shift_range=0.1, - # randomly shift images vertically - height_shift_range=0.1, - # set range for random shear - shear_range=0., - # set range for random zoom - zoom_range=0., - # set range for random channel shifts - channel_shift_range=0., - # set mode for filling points outside the input boundaries - fill_mode='nearest', - # value used for fill_mode = "constant" - cval=0., - # randomly flip images - horizontal_flip=True, - # randomly flip images - vertical_flip=False, - # set rescaling factor (applied before any other transformation) - rescale=None, - # set function that will be applied on each input - preprocessing_function=None, - # image data format, either "channels_first" or "channels_last" - data_format=None, - # fraction of images reserved for validation (strictly between 0 and 1) - validation_split=0.0) - - # Compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(x_train) - - # Fit the model on the batches generated by datagen.flow(). - model.fit_generator(datagen.flow(x_train, y_train, batch_size=batch_size), - validation_data=(x_test, y_test), - epochs=epochs, verbose=1, workers=4, - callbacks=callbacks) - -# Score trained model. -scores = model.evaluate(x_test, y_test, verbose=1) -print('Test loss:', scores[0]) -print('Test accuracy:', scores[1]) -# -*- coding: utf-8 -*- - -import numpy as np -import cv2 -import matplotlib.pyplot as plt - -from keras.models import Model - -import keras.applications.resnet50 as resnet -from keras.layers import UpSampling2D, Conv2D - - -# Please set an appropriate image file -INPUT_IMG_FILE = "dog.jpg" - -################################################################ -# The following parameters can be changed to other models -# that use global average pooling. -# e.g.) InceptionResnetV2 / NASNetLarge -NETWORK_INPUT_SIZE = 224 -MODEL_CLASS = resnet.ResNet50 -PREPROCESS_FN = resnet.preprocess_input -LAST_CONV_LAYER = "activation_49" -PRED_LAYER = "fc1000" -################################################################ - -# number of imagenet classes -N_CLASSES = 1000 - - -def load_img(fname, input_size, preprocess_fn): - original_img = cv2.imread(fname)[:, :, ::-1] - original_size = (original_img.shape[1], original_img.shape[0]) - img = cv2.resize(original_img, (input_size, input_size)) - imgs = np.expand_dims(preprocess_fn(img), axis=0) - return imgs, original_img, original_size - - -def get_cam_model(model_class, - input_size=224, - last_conv_layer="activation_49", - pred_layer="fc1000"): - model = model_class(input_shape=(input_size, input_size, 3)) - - final_params = model.get_layer(pred_layer).get_weights() - final_params = (final_params[0].reshape( - 1, 1, -1, N_CLASSES), final_params[1]) - - last_conv_output = model.get_layer(last_conv_layer).output - x = UpSampling2D(size=(32, 32), interpolation="bilinear")( - last_conv_output) - x = Conv2D(filters=N_CLASSES, kernel_size=( - 1, 1), name="predictions_2")(x) - - cam_model = Model(inputs=model.input, - outputs=[model.output, x]) - cam_model.get_layer("predictions_2").set_weights(final_params) - return cam_model - - -def postprocess(preds, cams, top_k=1): - idxes = np.argsort(preds[0])[-top_k:] - class_activation_map = np.zeros_like(cams[0, :, :, 0]) - for i in idxes: - class_activation_map += cams[0, :, :, i] - return class_activation_map - - -# 1. load image -imgs, original_img, original_size = load_img(INPUT_IMG_FILE, - input_size=NETWORK_INPUT_SIZE, - preprocess_fn=resnet.preprocess_input) - -# 2. prediction -model = get_cam_model(resnet.ResNet50, - NETWORK_INPUT_SIZE, - LAST_CONV_LAYER, - PRED_LAYER) -preds, cams = model.predict(imgs) - -# 4. post processing -class_activation_map = postprocess(preds, cams) - -# 5. plot image+cam to original size -plt.imshow(original_img, alpha=0.5) -plt.imshow(cv2.resize(class_activation_map, - original_size), cmap='jet', alpha=0.5) -plt.show() -""" -#Visualization of the filters of VGG16, via gradient ascent in input space. - -This script can run on CPU in a few minutes. - -Results example: ![Visualization](http://i.imgur.com/4nj4KjN.jpg) -""" -from __future__ import print_function - -import time -import numpy as np -from PIL import Image as pil_image -from keras.preprocessing.image import save_img -from keras import layers -from keras.applications import vgg16 -from keras import backend as K - - -def normalize(x): - """utility function to normalize a tensor. - - # Arguments - x: An input tensor. - - # Returns - The normalized input tensor. - """ - return x / (K.sqrt(K.mean(K.square(x))) + K.epsilon()) - - -def deprocess_image(x): - """utility function to convert a float array into a valid uint8 image. - - # Arguments - x: A numpy-array representing the generated image. - - # Returns - A processed numpy-array, which could be used in e.g. imshow. - """ - # normalize tensor: center on 0., ensure std is 0.25 - x -= x.mean() - x /= (x.std() + K.epsilon()) - x *= 0.25 - - # clip to [0, 1] - x += 0.5 - x = np.clip(x, 0, 1) - - # convert to RGB array - x *= 255 - if K.image_data_format() == 'channels_first': - x = x.transpose((1, 2, 0)) - x = np.clip(x, 0, 255).astype('uint8') - return x - - -def process_image(x, former): - """utility function to convert a valid uint8 image back into a float array. - Reverses `deprocess_image`. - - # Arguments - x: A numpy-array, which could be used in e.g. imshow. - former: The former numpy-array. - Need to determine the former mean and variance. - - # Returns - A processed numpy-array representing the generated image. - """ - if K.image_data_format() == 'channels_first': - x = x.transpose((2, 0, 1)) - return (x / 255 - 0.5) * 4 * former.std() + former.mean() - - -def visualize_layer(model, - layer_name, - step=1., - epochs=15, - upscaling_steps=9, - upscaling_factor=1.2, - output_dim=(412, 412), - filter_range=(0, None)): - """Visualizes the most relevant filters of one conv-layer in a certain model. - - # Arguments - model: The model containing layer_name. - layer_name: The name of the layer to be visualized. - Has to be a part of model. - step: step size for gradient ascent. - epochs: Number of iterations for gradient ascent. - upscaling_steps: Number of upscaling steps. - Starting image is in this case (80, 80). - upscaling_factor: Factor to which to slowly upgrade - the image towards output_dim. - output_dim: [img_width, img_height] The output image dimensions. - filter_range: Tupel[lower, upper] - Determines the to be computed filter numbers. - If the second value is `None`, - the last filter will be inferred as the upper boundary. - """ - - def _generate_filter_image(input_img, - layer_output, - filter_index): - """Generates image for one particular filter. - - # Arguments - input_img: The input-image Tensor. - layer_output: The output-image Tensor. - filter_index: The to be processed filter number. - Assumed to be valid. - - #Returns - Either None if no image could be generated. - or a tuple of the image (array) itself and the last loss. - """ - s_time = time.time() - - # we build a loss function that maximizes the activation - # of the nth filter of the layer considered - if K.image_data_format() == 'channels_first': - loss = K.mean(layer_output[:, filter_index, :, :]) - else: - loss = K.mean(layer_output[:, :, :, filter_index]) - - # we compute the gradient of the input picture wrt this loss - grads = K.gradients(loss, input_img)[0] - - # normalization trick: we normalize the gradient - grads = normalize(grads) - - # this function returns the loss and grads given the input picture - iterate = K.function([input_img], [loss, grads]) - - # we start from a gray image with some random noise - intermediate_dim = tuple( - int(x / (upscaling_factor ** upscaling_steps)) for x in output_dim) - if K.image_data_format() == 'channels_first': - input_img_data = np.random.random( - (1, 3, intermediate_dim[0], intermediate_dim[1])) - else: - input_img_data = np.random.random( - (1, intermediate_dim[0], intermediate_dim[1], 3)) - input_img_data = (input_img_data - 0.5) * 20 + 128 - - # Slowly upscaling towards the original size prevents - # a dominating high-frequency of the to visualized structure - # as it would occur if we directly compute the 412d-image. - # Behaves as a better starting point for each following dimension - # and therefore avoids poor local minima - for up in reversed(range(upscaling_steps)): - # we run gradient ascent for e.g. 20 steps - for _ in range(epochs): - loss_value, grads_value = iterate([input_img_data]) - input_img_data += grads_value * step - - # some filters get stuck to 0, we can skip them - if loss_value <= K.epsilon(): - return None - - # Calulate upscaled dimension - intermediate_dim = tuple( - int(x / (upscaling_factor ** up)) for x in output_dim) - # Upscale - img = deprocess_image(input_img_data[0]) - img = np.array(pil_image.fromarray(img).resize(intermediate_dim, - pil_image.BICUBIC)) - input_img_data = [process_image(img, input_img_data[0])] - - # decode the resulting input image - img = deprocess_image(input_img_data[0]) - e_time = time.time() - print('Costs of filter {:3}: {:5.0f} ( {:4.2f}s )'.format(filter_index, - loss_value, - e_time - s_time)) - return img, loss_value - - def _draw_filters(filters, n=None): - """Draw the best filters in a nxn grid. - - # Arguments - filters: A List of generated images and their corresponding losses - for each processed filter. - n: dimension of the grid. - If none, the largest possible square will be used - """ - if n is None: - n = int(np.floor(np.sqrt(len(filters)))) - - # the filters that have the highest loss are assumed to be better-looking. - # we will only keep the top n*n filters. - filters.sort(key=lambda x: x[1], reverse=True) - filters = filters[:n * n] - - # build a black picture with enough space for - # e.g. our 8 x 8 filters of size 412 x 412, with a 5px margin in between - MARGIN = 5 - width = n * output_dim[0] + (n - 1) * MARGIN - height = n * output_dim[1] + (n - 1) * MARGIN - stitched_filters = np.zeros((width, height, 3), dtype='uint8') - - # fill the picture with our saved filters - for i in range(n): - for j in range(n): - img, _ = filters[i * n + j] - width_margin = (output_dim[0] + MARGIN) * i - height_margin = (output_dim[1] + MARGIN) * j - stitched_filters[ - width_margin: width_margin + output_dim[0], - height_margin: height_margin + output_dim[1], :] = img - - # save the result to disk - save_img('vgg_{0:}_{1:}x{1:}.png'.format( - layer_name, n), stitched_filters) - - # this is the placeholder for the input images - assert len(model.inputs) == 1 - input_img = model.inputs[0] - - # get the symbolic outputs of each "key" layer (we gave them unique names). - layer_dict = dict([(layer.name, layer) for layer in model.layers[1:]]) - - output_layer = layer_dict[layer_name] - assert isinstance(output_layer, layers.Conv2D) - - # Compute to be processed filter range - filter_lower = filter_range[0] - filter_upper = (filter_range[1] - if filter_range[1] is not None - else len(output_layer.get_weights()[1])) - assert(filter_lower >= 0 - and filter_upper <= len(output_layer.get_weights()[1]) - and filter_upper > filter_lower) - print('Compute filters {:} to {:}'.format(filter_lower, filter_upper)) - - # iterate through each filter and generate its corresponding image - processed_filters = [] - for f in range(filter_lower, filter_upper): - img_loss = _generate_filter_image(input_img, output_layer.output, f) - - if img_loss is not None: - processed_filters.append(img_loss) - - print('{} filter processed.'.format(len(processed_filters))) - # Finally draw and store the best filters to disk - _draw_filters(processed_filters) - - -if __name__ == '__main__': - # the name of the layer we want to visualize - # (see model definition at keras/applications/vgg16.py) - LAYER_NAME = 'block5_conv1' - - # build the VGG16 network with ImageNet weights - vgg = vgg16.VGG16(weights='imagenet', include_top=False) - print('Model loaded.') - vgg.summary() - - # example function call - visualize_layer(vgg, LAYER_NAME) -""" -#This script demonstrates the use of a convolutional LSTM network. - -This network is used to predict the next frame of an artificially -generated movie which contains moving squares. -""" -from keras.models import Sequential -from keras.layers.convolutional import Conv3D -from keras.layers.convolutional_recurrent import ConvLSTM2D -from keras.layers.normalization import BatchNormalization -import numpy as np -import pylab as plt - -# We create a layer which take as input movies of shape -# (n_frames, width, height, channels) and returns a movie -# of identical shape. - -seq = Sequential() -seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), - input_shape=(None, 40, 40, 1), - padding='same', return_sequences=True)) -seq.add(BatchNormalization()) - -seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), - padding='same', return_sequences=True)) -seq.add(BatchNormalization()) - -seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), - padding='same', return_sequences=True)) -seq.add(BatchNormalization()) - -seq.add(ConvLSTM2D(filters=40, kernel_size=(3, 3), - padding='same', return_sequences=True)) -seq.add(BatchNormalization()) - -seq.add(Conv3D(filters=1, kernel_size=(3, 3, 3), - activation='sigmoid', - padding='same', data_format='channels_last')) -seq.compile(loss='binary_crossentropy', optimizer='adadelta') - - -# Artificial data generation: -# Generate movies with 3 to 7 moving squares inside. -# The squares are of shape 1x1 or 2x2 pixels, -# which move linearly over time. -# For convenience we first create movies with bigger width and height (80x80) -# and at the end we select a 40x40 window. - -def generate_movies(n_samples=1200, n_frames=15): - row = 80 - col = 80 - noisy_movies = np.zeros((n_samples, n_frames, row, col, 1), dtype=np.float) - shifted_movies = np.zeros((n_samples, n_frames, row, col, 1), - dtype=np.float) - - for i in range(n_samples): - # Add 3 to 7 moving squares - n = np.random.randint(3, 8) - - for j in range(n): - # Initial position - xstart = np.random.randint(20, 60) - ystart = np.random.randint(20, 60) - # Direction of motion - directionx = np.random.randint(0, 3) - 1 - directiony = np.random.randint(0, 3) - 1 - - # Size of the square - w = np.random.randint(2, 4) - - for t in range(n_frames): - x_shift = xstart + directionx * t - y_shift = ystart + directiony * t - noisy_movies[i, t, x_shift - w: x_shift + w, - y_shift - w: y_shift + w, 0] += 1 - - # Make it more robust by adding noise. - # The idea is that if during inference, - # the value of the pixel is not exactly one, - # we need to train the network to be robust and still - # consider it as a pixel belonging to a square. - if np.random.randint(0, 2): - noise_f = (-1)**np.random.randint(0, 2) - noisy_movies[i, t, - x_shift - w - 1: x_shift + w + 1, - y_shift - w - 1: y_shift + w + 1, - 0] += noise_f * 0.1 - - # Shift the ground truth by 1 - x_shift = xstart + directionx * (t + 1) - y_shift = ystart + directiony * (t + 1) - shifted_movies[i, t, x_shift - w: x_shift + w, - y_shift - w: y_shift + w, 0] += 1 - - # Cut to a 40x40 window - noisy_movies = noisy_movies[::, ::, 20:60, 20:60, ::] - shifted_movies = shifted_movies[::, ::, 20:60, 20:60, ::] - noisy_movies[noisy_movies >= 1] = 1 - shifted_movies[shifted_movies >= 1] = 1 - return noisy_movies, shifted_movies - - -# Train the network -noisy_movies, shifted_movies = generate_movies(n_samples=1200) -seq.fit(noisy_movies[:1000], shifted_movies[:1000], batch_size=10, - epochs=300, validation_split=0.05) - -# Testing the network on one movie -# feed it with the first 7 positions and then -# predict the new positions -which = 1004 -track = noisy_movies[which][:7, ::, ::, ::] - -for j in range(16): - new_pos = seq.predict(track[np.newaxis, ::, ::, ::, ::]) - new = new_pos[::, -1, ::, ::, ::] - track = np.concatenate((track, new), axis=0) - - -# And then compare the predictions -# to the ground truth -track2 = noisy_movies[which][::, ::, ::, ::] -for i in range(15): - fig = plt.figure(figsize=(10, 5)) - - ax = fig.add_subplot(121) - - if i >= 7: - ax.text(1, 3, 'Predictions !', fontsize=20, color='w') - else: - ax.text(1, 3, 'Initial trajectory', fontsize=20) - - toplot = track[i, ::, ::, 0] - - plt.imshow(toplot) - ax = fig.add_subplot(122) - plt.text(1, 3, 'Ground truth', fontsize=20) - - toplot = track2[i, ::, ::, 0] - if i >= 2: - toplot = shifted_movies[which][i - 1, ::, ::, 0] - - plt.imshow(toplot) - plt.savefig('%i_animate.png' % (i + 1)) -''' -#Deep Dreaming in Keras. - -Run the script with: -```python -python deep_dream.py path_to_your_base_image.jpg prefix_for_results -``` -e.g.: -```python -python deep_dream.py img/mypic.jpg results/dream -``` -''' -from __future__ import print_function - -from keras.preprocessing.image import load_img, save_img, img_to_array -import numpy as np -import scipy -import argparse - -from keras.applications import inception_v3 -from keras import backend as K - -parser = argparse.ArgumentParser(description='Deep Dreams with Keras.') -parser.add_argument('base_image_path', metavar='base', type=str, - help='Path to the image to transform.') -parser.add_argument('result_prefix', metavar='res_prefix', type=str, - help='Prefix for the saved results.') - -args = parser.parse_args() -base_image_path = args.base_image_path -result_prefix = args.result_prefix - -# These are the names of the layers -# for which we try to maximize activation, -# as well as their weight in the final loss -# we try to maximize. -# You can tweak these setting to obtain new visual effects. -settings = { - 'features': { - 'mixed2': 0.2, - 'mixed3': 0.5, - 'mixed4': 2., - 'mixed5': 1.5, - }, -} - - -def preprocess_image(image_path): - # Util function to open, resize and format pictures - # into appropriate tensors. - img = load_img(image_path) - img = img_to_array(img) - img = np.expand_dims(img, axis=0) - img = inception_v3.preprocess_input(img) - return img - - -def deprocess_image(x): - # Util function to convert a tensor into a valid image. - if K.image_data_format() == 'channels_first': - x = x.reshape((3, x.shape[2], x.shape[3])) - x = x.transpose((1, 2, 0)) - else: - x = x.reshape((x.shape[1], x.shape[2], 3)) - x /= 2. - x += 0.5 - x *= 255. - x = np.clip(x, 0, 255).astype('uint8') - return x - - -K.set_learning_phase(0) - -# Build the InceptionV3 network with our placeholder. -# The model will be loaded with pre-trained ImageNet weights. -model = inception_v3.InceptionV3(weights='imagenet', - include_top=False) -dream = model.input -print('Model loaded.') - -# Get the symbolic outputs of each "key" layer (we gave them unique names). -layer_dict = dict([(layer.name, layer) for layer in model.layers]) - -# Define the loss. -loss = K.variable(0.) -for layer_name in settings['features']: - # Add the L2 norm of the features of a layer to the loss. - if layer_name not in layer_dict: - raise ValueError('Layer ' + layer_name + ' not found in model.') - coeff = settings['features'][layer_name] - x = layer_dict[layer_name].output - # We avoid border artifacts by only involving non-border pixels in the loss. - scaling = K.prod(K.cast(K.shape(x), 'float32')) - if K.image_data_format() == 'channels_first': - loss += coeff * K.sum(K.square(x[:, :, 2: -2, 2: -2])) / scaling - else: - loss += coeff * K.sum(K.square(x[:, 2: -2, 2: -2, :])) / scaling - -# Compute the gradients of the dream wrt the loss. -grads = K.gradients(loss, dream)[0] -# Normalize gradients. -grads /= K.maximum(K.mean(K.abs(grads)), K.epsilon()) - -# Set up function to retrieve the value -# of the loss and gradients given an input image. -outputs = [loss, grads] -fetch_loss_and_grads = K.function([dream], outputs) - - -def eval_loss_and_grads(x): - outs = fetch_loss_and_grads([x]) - loss_value = outs[0] - grad_values = outs[1] - return loss_value, grad_values - - -def resize_img(img, size): - img = np.copy(img) - if K.image_data_format() == 'channels_first': - factors = (1, 1, - float(size[0]) / img.shape[2], - float(size[1]) / img.shape[3]) - else: - factors = (1, - float(size[0]) / img.shape[1], - float(size[1]) / img.shape[2], - 1) - return scipy.ndimage.zoom(img, factors, order=1) - - -def gradient_ascent(x, iterations, step, max_loss=None): - for i in range(iterations): - loss_value, grad_values = eval_loss_and_grads(x) - if max_loss is not None and loss_value > max_loss: - break - print('..Loss value at', i, ':', loss_value) - x += step * grad_values - return x - - -"""Process: - -- Load the original image. -- Define a number of processing scales (i.e. image shapes), - from smallest to largest. -- Resize the original image to the smallest scale. -- For every scale, starting with the smallest (i.e. current one): - - Run gradient ascent - - Upscale image to the next scale - - Reinject the detail that was lost at upscaling time -- Stop when we are back to the original size. - -To obtain the detail lost during upscaling, we simply -take the original image, shrink it down, upscale it, -and compare the result to the (resized) original image. -""" - - -# Playing with these hyperparameters will also allow you to achieve new effects -step = 0.01 # Gradient ascent step size -num_octave = 3 # Number of scales at which to run gradient ascent -octave_scale = 1.4 # Size ratio between scales -iterations = 20 # Number of ascent steps per scale -max_loss = 10. - -img = preprocess_image(base_image_path) -if K.image_data_format() == 'channels_first': - original_shape = img.shape[2:] -else: - original_shape = img.shape[1:3] -successive_shapes = [original_shape] -for i in range(1, num_octave): - shape = tuple([int(dim / (octave_scale ** i)) for dim in original_shape]) - successive_shapes.append(shape) -successive_shapes = successive_shapes[::-1] -original_img = np.copy(img) -shrunk_original_img = resize_img(img, successive_shapes[0]) - -for shape in successive_shapes: - print('Processing image shape', shape) - img = resize_img(img, shape) - img = gradient_ascent(img, - iterations=iterations, - step=step, - max_loss=max_loss) - upscaled_shrunk_original_img = resize_img(shrunk_original_img, shape) - same_size_original = resize_img(original_img, shape) - lost_detail = same_size_original - upscaled_shrunk_original_img - - img += lost_detail - shrunk_original_img = resize_img(original_img, shape) - -save_img(result_prefix + '.png', deprocess_image(np.copy(img))) -# -*- coding: utf-8 -*- -''' -# Optical character recognition -This example uses a convolutional stack followed by a recurrent stack -and a CTC logloss function to perform optical character recognition -of generated text images. I have no evidence of whether it actually -learns general shapes of text, or just is able to recognize all -the different fonts thrown at it...the purpose is more to demonstrate CTC -inside of Keras. Note that the font list may need to be updated -for the particular OS in use. - -This starts off with 4 letter words. For the first 12 epochs, the -difficulty is gradually increased using the TextImageGenerator class -which is both a generator class for test/train data and a Keras -callback class. After 20 epochs, longer sequences are thrown at it -by recompiling the model to handle a wider image and rebuilding -the word list to include two words separated by a space. - -The table below shows normalized edit distance values. Theano uses -a slightly different CTC implementation, hence the different results. - -Epoch | TF | TH ------:|-------:|-------: - 10| 0.027 | 0.064 - 15| 0.038 | 0.035 - 20| 0.043 | 0.045 - 25| 0.014 | 0.019 - -This requires ```cairo``` and ```editdistance``` packages: -```python -pip install cairocffi -pip install editdistance -``` - -Created by Mike Henry -https://github.com/mbhenry/ -''' -import os -import itertools -import codecs -import re -import datetime -import cairocffi as cairo -import editdistance -import numpy as np -from scipy import ndimage -import pylab -from keras import backend as K -from keras.layers.convolutional import Conv2D, MaxPooling2D -from keras.layers import Input, Dense, Activation -from keras.layers import Reshape, Lambda -from keras.layers.merge import add, concatenate -from keras.models import Model -from keras.layers.recurrent import GRU -from keras.optimizers import SGD -from keras.utils.data_utils import get_file -from keras.preprocessing import image -import keras.callbacks - - -OUTPUT_DIR = 'image_ocr' - -# character classes and matching regex filter -regex = r'^[a-z ]+$' -alphabet = u'abcdefghijklmnopqrstuvwxyz ' - -np.random.seed(55) - - -# this creates larger "blotches" of noise which look -# more realistic than just adding gaussian noise -# assumes greyscale with pixels ranging from 0 to 1 - -def speckle(img): - severity = np.random.uniform(0, 0.6) - blur = ndimage.gaussian_filter(np.random.randn(*img.shape) * severity, 1) - img_speck = (img + blur) - img_speck[img_speck > 1] = 1 - img_speck[img_speck <= 0] = 0 - return img_speck - - -# paints the string in a random location the bounding box -# also uses a random font, a slight random rotation, -# and a random amount of speckle noise - -def paint_text(text, w, h, rotate=False, ud=False, multi_fonts=False): - surface = cairo.ImageSurface(cairo.FORMAT_RGB24, w, h) - with cairo.Context(surface) as context: - context.set_source_rgb(1, 1, 1) # White - context.paint() - # this font list works in CentOS 7 - if multi_fonts: - fonts = [ - 'Century Schoolbook', 'Courier', 'STIX', - 'URW Chancery L', 'FreeMono'] - context.select_font_face( - np.random.choice(fonts), - cairo.FONT_SLANT_NORMAL, - np.random.choice([cairo.FONT_WEIGHT_BOLD, cairo.FONT_WEIGHT_NORMAL])) - else: - context.select_font_face('Courier', - cairo.FONT_SLANT_NORMAL, - cairo.FONT_WEIGHT_BOLD) - context.set_font_size(25) - box = context.text_extents(text) - border_w_h = (4, 4) - if box[2] > (w - 2 * border_w_h[1]) or box[3] > (h - 2 * border_w_h[0]): - raise IOError(('Could not fit string into image.' - 'Max char count is too large for given image width.')) - - # teach the RNN translational invariance by - # fitting text box randomly on canvas, with some room to rotate - max_shift_x = w - box[2] - border_w_h[0] - max_shift_y = h - box[3] - border_w_h[1] - top_left_x = np.random.randint(0, int(max_shift_x)) - if ud: - top_left_y = np.random.randint(0, int(max_shift_y)) - else: - top_left_y = h // 2 - context.move_to(top_left_x - int(box[0]), top_left_y - int(box[1])) - context.set_source_rgb(0, 0, 0) - context.show_text(text) - - buf = surface.get_data() - a = np.frombuffer(buf, np.uint8) - a.shape = (h, w, 4) - a = a[:, :, 0] # grab single channel - a = a.astype(np.float32) / 255 - a = np.expand_dims(a, 0) - if rotate: - a = image.random_rotation(a, 3 * (w - top_left_x) / w + 1) - a = speckle(a) - - return a - - -def shuffle_mats_or_lists(matrix_list, stop_ind=None): - ret = [] - assert all([len(i) == len(matrix_list[0]) for i in matrix_list]) - len_val = len(matrix_list[0]) - if stop_ind is None: - stop_ind = len_val - assert stop_ind <= len_val - - a = list(range(stop_ind)) - np.random.shuffle(a) - a += list(range(stop_ind, len_val)) - for mat in matrix_list: - if isinstance(mat, np.ndarray): - ret.append(mat[a]) - elif isinstance(mat, list): - ret.append([mat[i] for i in a]) - else: - raise TypeError('`shuffle_mats_or_lists` only supports ' - 'numpy.array and list objects.') - return ret - - -# Translation of characters to unique integer values -def text_to_labels(text): - ret = [] - for char in text: - ret.append(alphabet.find(char)) - return ret - - -# Reverse translation of numerical classes back to characters -def labels_to_text(labels): - ret = [] - for c in labels: - if c == len(alphabet): # CTC Blank - ret.append("") - else: - ret.append(alphabet[c]) - return "".join(ret) - - -# only a-z and space..probably not to difficult -# to expand to uppercase and symbols - -def is_valid_str(in_str): - search = re.compile(regex, re.UNICODE).search - return bool(search(in_str)) - - -# Uses generator functions to supply train/test with -# data. Image renderings and text are created on the fly -# each time with random perturbations - -class TextImageGenerator(keras.callbacks.Callback): - - def __init__(self, monogram_file, bigram_file, minibatch_size, - img_w, img_h, downsample_factor, val_split, - absolute_max_string_len=16): - - self.minibatch_size = minibatch_size - self.img_w = img_w - self.img_h = img_h - self.monogram_file = monogram_file - self.bigram_file = bigram_file - self.downsample_factor = downsample_factor - self.val_split = val_split - self.blank_label = self.get_output_size() - 1 - self.absolute_max_string_len = absolute_max_string_len - - def get_output_size(self): - return len(alphabet) + 1 - - # num_words can be independent of the epoch size due to the use of generators - # as max_string_len grows, num_words can grow - def build_word_list(self, num_words, max_string_len=None, mono_fraction=0.5): - assert max_string_len <= self.absolute_max_string_len - assert num_words % self.minibatch_size == 0 - assert (self.val_split * num_words) % self.minibatch_size == 0 - self.num_words = num_words - self.string_list = [''] * self.num_words - tmp_string_list = [] - self.max_string_len = max_string_len - self.Y_data = np.ones( - [self.num_words, self.absolute_max_string_len]) * -1 - self.X_text = [] - self.Y_len = [0] * self.num_words - - def _is_length_of_word_valid(word): - return (max_string_len == -1 or - max_string_len is None or - len(word) <= max_string_len) - - # monogram file is sorted by frequency in english speech - with codecs.open(self.monogram_file, mode='r', encoding='utf-8') as f: - for line in f: - if len(tmp_string_list) == int(self.num_words * mono_fraction): - break - word = line.rstrip() - if _is_length_of_word_valid(word): - tmp_string_list.append(word) - - # bigram file contains common word pairings in english speech - with codecs.open(self.bigram_file, mode='r', encoding='utf-8') as f: - lines = f.readlines() - for line in lines: - if len(tmp_string_list) == self.num_words: - break - columns = line.lower().split() - word = columns[0] + ' ' + columns[1] - if is_valid_str(word) and _is_length_of_word_valid(word): - tmp_string_list.append(word) - if len(tmp_string_list) != self.num_words: - raise IOError('Could not pull enough words' - 'from supplied monogram and bigram files.') - # interlace to mix up the easy and hard words - self.string_list[::2] = tmp_string_list[:self.num_words // 2] - self.string_list[1::2] = tmp_string_list[self.num_words // 2:] - - for i, word in enumerate(self.string_list): - self.Y_len[i] = len(word) - self.Y_data[i, 0:len(word)] = text_to_labels(word) - self.X_text.append(word) - self.Y_len = np.expand_dims(np.array(self.Y_len), 1) - - self.cur_val_index = self.val_split - self.cur_train_index = 0 - - # each time an image is requested from train/val/test, a new random - # painting of the text is performed - def get_batch(self, index, size, train): - # width and height are backwards from typical Keras convention - # because width is the time dimension when it gets fed into the RNN - if K.image_data_format() == 'channels_first': - X_data = np.ones([size, 1, self.img_w, self.img_h]) - else: - X_data = np.ones([size, self.img_w, self.img_h, 1]) - - labels = np.ones([size, self.absolute_max_string_len]) - input_length = np.zeros([size, 1]) - label_length = np.zeros([size, 1]) - source_str = [] - for i in range(size): - # Mix in some blank inputs. This seems to be important for - # achieving translational invariance - if train and i > size - 4: - if K.image_data_format() == 'channels_first': - X_data[i, 0, 0:self.img_w, :] = self.paint_func('')[ - 0, :, :].T - else: - X_data[i, 0:self.img_w, :, 0] = self.paint_func('',)[ - 0, :, :].T - labels[i, 0] = self.blank_label - input_length[i] = self.img_w // self.downsample_factor - 2 - label_length[i] = 1 - source_str.append('') - else: - if K.image_data_format() == 'channels_first': - X_data[i, 0, 0:self.img_w, :] = ( - self.paint_func(self.X_text[index + i])[0, :, :].T) - else: - X_data[i, 0:self.img_w, :, 0] = ( - self.paint_func(self.X_text[index + i])[0, :, :].T) - labels[i, :] = self.Y_data[index + i] - input_length[i] = self.img_w // self.downsample_factor - 2 - label_length[i] = self.Y_len[index + i] - source_str.append(self.X_text[index + i]) - inputs = {'the_input': X_data, - 'the_labels': labels, - 'input_length': input_length, - 'label_length': label_length, - 'source_str': source_str # used for visualization only - } - # dummy data for dummy loss function - outputs = {'ctc': np.zeros([size])} - return (inputs, outputs) - - def next_train(self): - while 1: - ret = self.get_batch(self.cur_train_index, - self.minibatch_size, train=True) - self.cur_train_index += self.minibatch_size - if self.cur_train_index >= self.val_split: - self.cur_train_index = self.cur_train_index % 32 - (self.X_text, self.Y_data, self.Y_len) = shuffle_mats_or_lists( - [self.X_text, self.Y_data, self.Y_len], self.val_split) - yield ret - - def next_val(self): - while 1: - ret = self.get_batch(self.cur_val_index, - self.minibatch_size, train=False) - self.cur_val_index += self.minibatch_size - if self.cur_val_index >= self.num_words: - self.cur_val_index = self.val_split + self.cur_val_index % 32 - yield ret - - def on_train_begin(self, logs={}): - self.build_word_list(16000, 4, 1) - self.paint_func = lambda text: paint_text( - text, self.img_w, self.img_h, - rotate=False, ud=False, multi_fonts=False) - - def on_epoch_begin(self, epoch, logs={}): - # rebind the paint function to implement curriculum learning - if 3 <= epoch < 6: - self.paint_func = lambda text: paint_text( - text, self.img_w, self.img_h, - rotate=False, ud=True, multi_fonts=False) - elif 6 <= epoch < 9: - self.paint_func = lambda text: paint_text( - text, self.img_w, self.img_h, - rotate=False, ud=True, multi_fonts=True) - elif epoch >= 9: - self.paint_func = lambda text: paint_text( - text, self.img_w, self.img_h, - rotate=True, ud=True, multi_fonts=True) - if epoch >= 21 and self.max_string_len < 12: - self.build_word_list(32000, 12, 0.5) - - -# the actual loss calc occurs here despite it not being -# an internal Keras loss function - -def ctc_lambda_func(args): - y_pred, labels, input_length, label_length = args - # the 2 is critical here since the first couple outputs of the RNN - # tend to be garbage: - y_pred = y_pred[:, 2:, :] - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - -# For a real OCR application, this should be beam search with a dictionary -# and language model. For this example, best path is sufficient. - -def decode_batch(test_func, word_batch): - out = test_func([word_batch])[0] - ret = [] - for j in range(out.shape[0]): - out_best = list(np.argmax(out[j, 2:], 1)) - out_best = [k for k, g in itertools.groupby(out_best)] - outstr = labels_to_text(out_best) - ret.append(outstr) - return ret - - -class VizCallback(keras.callbacks.Callback): - - def __init__(self, run_name, test_func, text_img_gen, num_display_words=6): - self.test_func = test_func - self.output_dir = os.path.join( - OUTPUT_DIR, run_name) - self.text_img_gen = text_img_gen - self.num_display_words = num_display_words - if not os.path.exists(self.output_dir): - os.makedirs(self.output_dir) - - def show_edit_distance(self, num): - num_left = num - mean_norm_ed = 0.0 - mean_ed = 0.0 - while num_left > 0: - word_batch = next(self.text_img_gen)[0] - num_proc = min(word_batch['the_input'].shape[0], num_left) - decoded_res = decode_batch(self.test_func, - word_batch['the_input'][0:num_proc]) - for j in range(num_proc): - edit_dist = editdistance.eval(decoded_res[j], - word_batch['source_str'][j]) - mean_ed += float(edit_dist) - mean_norm_ed += float(edit_dist) / \ - len(word_batch['source_str'][j]) - num_left -= num_proc - mean_norm_ed = mean_norm_ed / num - mean_ed = mean_ed / num - print('\nOut of %d samples: Mean edit distance:' - '%.3f Mean normalized edit distance: %0.3f' - % (num, mean_ed, mean_norm_ed)) - - def on_epoch_end(self, epoch, logs={}): - self.model.save_weights( - os.path.join(self.output_dir, 'weights%02d.h5' % (epoch))) - self.show_edit_distance(256) - word_batch = next(self.text_img_gen)[0] - res = decode_batch(self.test_func, - word_batch['the_input'][0:self.num_display_words]) - if word_batch['the_input'][0].shape[0] < 256: - cols = 2 - else: - cols = 1 - for i in range(self.num_display_words): - pylab.subplot(self.num_display_words // cols, cols, i + 1) - if K.image_data_format() == 'channels_first': - the_input = word_batch['the_input'][i, 0, :, :] - else: - the_input = word_batch['the_input'][i, :, :, 0] - pylab.imshow(the_input.T, cmap='Greys_r') - pylab.xlabel( - 'Truth = \'%s\'\nDecoded = \'%s\'' % - (word_batch['source_str'][i], res[i])) - fig = pylab.gcf() - fig.set_size_inches(10, 13) - pylab.savefig(os.path.join(self.output_dir, 'e%02d.png' % (epoch))) - pylab.close() - - -def train(run_name, start_epoch, stop_epoch, img_w): - # Input Parameters - img_h = 64 - words_per_epoch = 16000 - val_split = 0.2 - val_words = int(words_per_epoch * (val_split)) - - # Network parameters - conv_filters = 16 - kernel_size = (3, 3) - pool_size = 2 - time_dense_size = 32 - rnn_size = 512 - minibatch_size = 32 - - if K.image_data_format() == 'channels_first': - input_shape = (1, img_w, img_h) - else: - input_shape = (img_w, img_h, 1) - - fdir = os.path.dirname( - get_file('wordlists.tgz', - origin='http://www.mythic-ai.com/datasets/wordlists.tgz', - untar=True)) - - img_gen = TextImageGenerator( - monogram_file=os.path.join(fdir, 'wordlist_mono_clean.txt'), - bigram_file=os.path.join(fdir, 'wordlist_bi_clean.txt'), - minibatch_size=minibatch_size, - img_w=img_w, - img_h=img_h, - downsample_factor=(pool_size ** 2), - val_split=words_per_epoch - val_words) - act = 'relu' - input_data = Input(name='the_input', shape=input_shape, dtype='float32') - inner = Conv2D(conv_filters, kernel_size, padding='same', - activation=act, kernel_initializer='he_normal', - name='conv1')(input_data) - inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max1')(inner) - inner = Conv2D(conv_filters, kernel_size, padding='same', - activation=act, kernel_initializer='he_normal', - name='conv2')(inner) - inner = MaxPooling2D(pool_size=(pool_size, pool_size), name='max2')(inner) - - conv_to_rnn_dims = (img_w // (pool_size ** 2), - (img_h // (pool_size ** 2)) * conv_filters) - inner = Reshape(target_shape=conv_to_rnn_dims, name='reshape')(inner) - - # cuts down input size going into RNN: - inner = Dense(time_dense_size, activation=act, name='dense1')(inner) - - # Two layers of bidirectional GRUs - # GRU seems to work as well, if not better than LSTM: - gru_1 = GRU(rnn_size, return_sequences=True, - kernel_initializer='he_normal', name='gru1')(inner) - gru_1b = GRU(rnn_size, return_sequences=True, - go_backwards=True, kernel_initializer='he_normal', - name='gru1_b')(inner) - gru1_merged = add([gru_1, gru_1b]) - gru_2 = GRU(rnn_size, return_sequences=True, - kernel_initializer='he_normal', name='gru2')(gru1_merged) - gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, - kernel_initializer='he_normal', name='gru2_b')(gru1_merged) - - # transforms RNN output to character activations: - inner = Dense(img_gen.get_output_size(), kernel_initializer='he_normal', - name='dense2')(concatenate([gru_2, gru_2b])) - y_pred = Activation('softmax', name='softmax')(inner) - Model(inputs=input_data, outputs=y_pred).summary() - - labels = Input(name='the_labels', - shape=[img_gen.absolute_max_string_len], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - # Keras doesn't currently support loss funcs with extra parameters - # so CTC loss is implemented in a lambda layer - loss_out = Lambda( - ctc_lambda_func, output_shape=(1,), - name='ctc')([y_pred, labels, input_length, label_length]) - - # clipnorm seems to speeds up convergence - sgd = SGD(lr=0.02, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - - model = Model(inputs=[input_data, labels, input_length, label_length], - outputs=loss_out) - - # the loss calc occurs elsewhere, so use a dummy lambda func for the loss - model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - if start_epoch > 0: - weight_file = os.path.join( - OUTPUT_DIR, - os.path.join(run_name, 'weights%02d.h5' % (start_epoch - 1))) - model.load_weights(weight_file) - # captures output of softmax so we can decode the output during visualization - test_func = K.function([input_data], [y_pred]) - - viz_cb = VizCallback(run_name, test_func, img_gen.next_val()) - - model.fit_generator( - generator=img_gen.next_train(), - steps_per_epoch=(words_per_epoch - val_words) // minibatch_size, - epochs=stop_epoch, - validation_data=img_gen.next_val(), - validation_steps=val_words // minibatch_size, - callbacks=[viz_cb, img_gen], - initial_epoch=start_epoch) - - -if __name__ == '__main__': - run_name = datetime.datetime.now().strftime('%Y:%m:%d:%H:%M:%S') - train(run_name, 0, 20, 128) - # increase to wider images and start at epoch 20. - # The learned weights are reloaded - train(run_name, 20, 25, 512) -''' -#Trains a Bidirectional LSTM on the IMDB sentiment classification task. - -Output after 4 epochs on CPU: ~0.8146 -Time per epoch on CPU (Core i7): ~150s. -''' - -from __future__ import print_function -import numpy as np - -from keras.preprocessing import sequence -from keras.models import Sequential -from keras.layers import Dense, Dropout, Embedding, LSTM, Bidirectional -from keras.datasets import imdb - - -max_features = 20000 -# cut texts after this number of words -# (among top max_features most common words) -maxlen = 100 -batch_size = 32 - -print('Loading data...') -(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) -print(len(x_train), 'train sequences') -print(len(x_test), 'test sequences') - -print('Pad sequences (samples x time)') -x_train = sequence.pad_sequences(x_train, maxlen=maxlen) -x_test = sequence.pad_sequences(x_test, maxlen=maxlen) -print('x_train shape:', x_train.shape) -print('x_test shape:', x_test.shape) -y_train = np.array(y_train) -y_test = np.array(y_test) - -model = Sequential() -model.add(Embedding(max_features, 128, input_length=maxlen)) -model.add(Bidirectional(LSTM(64))) -model.add(Dropout(0.5)) -model.add(Dense(1, activation='sigmoid')) - -# try using different optimizers and different optimizer configs -model.compile('adam', 'binary_crossentropy', metrics=['accuracy']) - -print('Train...') -model.fit(x_train, y_train, - batch_size=batch_size, - epochs=4, - validation_data=[x_test, y_test]) -''' -#This example demonstrates the use of Convolution1D for text classification. - -Gets to 0.89 test accuracy after 2 epochs.
-90s/epoch on Intel i5 2.4Ghz CPU.
-10s/epoch on Tesla K40 GPU. -''' -from __future__ import print_function - -from keras.preprocessing import sequence -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation -from keras.layers import Embedding -from keras.layers import Conv1D, GlobalMaxPooling1D -from keras.datasets import imdb - -# set parameters: -max_features = 5000 -maxlen = 400 -batch_size = 32 -embedding_dims = 50 -filters = 250 -kernel_size = 3 -hidden_dims = 250 -epochs = 2 - -print('Loading data...') -(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) -print(len(x_train), 'train sequences') -print(len(x_test), 'test sequences') - -print('Pad sequences (samples x time)') -x_train = sequence.pad_sequences(x_train, maxlen=maxlen) -x_test = sequence.pad_sequences(x_test, maxlen=maxlen) -print('x_train shape:', x_train.shape) -print('x_test shape:', x_test.shape) - -print('Build model...') -model = Sequential() - -# we start off with an efficient embedding layer which maps -# our vocab indices into embedding_dims dimensions -model.add(Embedding(max_features, - embedding_dims, - input_length=maxlen)) -model.add(Dropout(0.2)) - -# we add a Convolution1D, which will learn filters -# word group filters of size filter_length: -model.add(Conv1D(filters, - kernel_size, - padding='valid', - activation='relu', - strides=1)) -# we use max pooling: -model.add(GlobalMaxPooling1D()) - -# We add a vanilla hidden layer: -model.add(Dense(hidden_dims)) -model.add(Dropout(0.2)) -model.add(Activation('relu')) - -# We project onto a single unit output layer, and squash it with a sigmoid: -model.add(Dense(1)) -model.add(Activation('sigmoid')) - -model.compile(loss='binary_crossentropy', - optimizer='adam', - metrics=['accuracy']) -model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test)) -''' -#Train a recurrent convolutional network on the IMDB sentiment classification task. - -Gets to 0.8498 test accuracy after 2 epochs. 41 s/epoch on K520 GPU. -''' -from __future__ import print_function - -from keras.preprocessing import sequence -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation -from keras.layers import Embedding -from keras.layers import LSTM -from keras.layers import Conv1D, MaxPooling1D -from keras.datasets import imdb - -# Embedding -max_features = 20000 -maxlen = 100 -embedding_size = 128 - -# Convolution -kernel_size = 5 -filters = 64 -pool_size = 4 - -# LSTM -lstm_output_size = 70 - -# Training -batch_size = 30 -epochs = 2 - -''' -Note: -batch_size is highly sensitive. -Only 2 epochs are needed as the dataset is very small. -''' - -print('Loading data...') -(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) -print(len(x_train), 'train sequences') -print(len(x_test), 'test sequences') - -print('Pad sequences (samples x time)') -x_train = sequence.pad_sequences(x_train, maxlen=maxlen) -x_test = sequence.pad_sequences(x_test, maxlen=maxlen) -print('x_train shape:', x_train.shape) -print('x_test shape:', x_test.shape) - -print('Build model...') - -model = Sequential() -model.add(Embedding(max_features, embedding_size, input_length=maxlen)) -model.add(Dropout(0.25)) -model.add(Conv1D(filters, - kernel_size, - padding='valid', - activation='relu', - strides=1)) -model.add(MaxPooling1D(pool_size=pool_size)) -model.add(LSTM(lstm_output_size)) -model.add(Dense(1)) -model.add(Activation('sigmoid')) - -model.compile(loss='binary_crossentropy', - optimizer='adam', - metrics=['accuracy']) - -print('Train...') -model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test)) -score, acc = model.evaluate(x_test, y_test, batch_size=batch_size) -print('Test score:', score) -print('Test accuracy:', acc) -''' -#This example demonstrates the use of fasttext for text classification - -Based on Joulin et al's paper: - -[Bags of Tricks for Efficient Text Classification -](https://arxiv.org/abs/1607.01759) - -Results on IMDB datasets with uni and bi-gram embeddings: - -Embedding|Accuracy, 5 epochs|Speed (s/epoch)|Hardware -:--------|-----------------:|----:|:------- -Uni-gram | 0.8813| 8|i7 CPU -Bi-gram | 0.9056| 2|GTx 980M GPU - -''' - -from __future__ import print_function -import numpy as np - -from keras.preprocessing import sequence -from keras.models import Sequential -from keras.layers import Dense -from keras.layers import Embedding -from keras.layers import GlobalAveragePooling1D -from keras.datasets import imdb - - -def create_ngram_set(input_list, ngram_value=2): - """ - Extract a set of n-grams from a list of integers. - - >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=2) - {(4, 9), (4, 1), (1, 4), (9, 4)} - - >>> create_ngram_set([1, 4, 9, 4, 1, 4], ngram_value=3) - [(1, 4, 9), (4, 9, 4), (9, 4, 1), (4, 1, 4)] - """ - return set(zip(*[input_list[i:] for i in range(ngram_value)])) - - -def add_ngram(sequences, token_indice, ngram_range=2): - """ - Augment the input list of list (sequences) by appending n-grams values. - - Example: adding bi-gram - >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]] - >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017} - >>> add_ngram(sequences, token_indice, ngram_range=2) - [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42]] - - Example: adding tri-gram - >>> sequences = [[1, 3, 4, 5], [1, 3, 7, 9, 2]] - >>> token_indice = {(1, 3): 1337, (9, 2): 42, (4, 5): 2017, (7, 9, 2): 2018} - >>> add_ngram(sequences, token_indice, ngram_range=3) - [[1, 3, 4, 5, 1337, 2017], [1, 3, 7, 9, 2, 1337, 42, 2018]] - """ - new_sequences = [] - for input_list in sequences: - new_list = input_list[:] - for ngram_value in range(2, ngram_range + 1): - for i in range(len(new_list) - ngram_value + 1): - ngram = tuple(new_list[i:i + ngram_value]) - if ngram in token_indice: - new_list.append(token_indice[ngram]) - new_sequences.append(new_list) - - return new_sequences - - -# Set parameters: -# ngram_range = 2 will add bi-grams features -ngram_range = 1 -max_features = 20000 -maxlen = 400 -batch_size = 32 -embedding_dims = 50 -epochs = 5 - -print('Loading data...') -(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) -print(len(x_train), 'train sequences') -print(len(x_test), 'test sequences') -print('Average train sequence length: {}'.format( - np.mean(list(map(len, x_train)), dtype=int))) -print('Average test sequence length: {}'.format( - np.mean(list(map(len, x_test)), dtype=int))) - -if ngram_range > 1: - print('Adding {}-gram features'.format(ngram_range)) - # Create set of unique n-gram from the training set. - ngram_set = set() - for input_list in x_train: - for i in range(2, ngram_range + 1): - set_of_ngram = create_ngram_set(input_list, ngram_value=i) - ngram_set.update(set_of_ngram) - - # Dictionary mapping n-gram token to a unique integer. - # Integer values are greater than max_features in order - # to avoid collision with existing features. - start_index = max_features + 1 - token_indice = {v: k + start_index for k, v in enumerate(ngram_set)} - indice_token = {token_indice[k]: k for k in token_indice} - - # max_features is the highest integer that could be found in the dataset. - max_features = np.max(list(indice_token.keys())) + 1 - - # Augmenting x_train and x_test with n-grams features - x_train = add_ngram(x_train, token_indice, ngram_range) - x_test = add_ngram(x_test, token_indice, ngram_range) - print('Average train sequence length: {}'.format( - np.mean(list(map(len, x_train)), dtype=int))) - print('Average test sequence length: {}'.format( - np.mean(list(map(len, x_test)), dtype=int))) - -print('Pad sequences (samples x time)') -x_train = sequence.pad_sequences(x_train, maxlen=maxlen) -x_test = sequence.pad_sequences(x_test, maxlen=maxlen) -print('x_train shape:', x_train.shape) -print('x_test shape:', x_test.shape) - -print('Build model...') -model = Sequential() - -# we start off with an efficient embedding layer which maps -# our vocab indices into embedding_dims dimensions -model.add(Embedding(max_features, - embedding_dims, - input_length=maxlen)) - -# we add a GlobalAveragePooling1D, which will average the embeddings -# of all words in the document -model.add(GlobalAveragePooling1D()) - -# We project onto a single unit output layer, and squash it with a sigmoid: -model.add(Dense(1, activation='sigmoid')) - -model.compile(loss='binary_crossentropy', - optimizer='adam', - metrics=['accuracy']) - -model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test)) -''' -#Trains an LSTM model on the IMDB sentiment classification task. - -The dataset is actually too small for LSTM to be of any advantage -compared to simpler, much faster methods such as TF-IDF + LogReg. - -**Notes** - -- RNNs are tricky. Choice of batch size is important, -choice of loss and optimizer is critical, etc. -Some configurations won't converge. - -- LSTM loss decrease patterns during training can be quite different -from what you see with CNNs/MLPs/etc. - -''' -from __future__ import print_function - -from keras.preprocessing import sequence -from keras.models import Sequential -from keras.layers import Dense, Embedding -from keras.layers import LSTM -from keras.datasets import imdb - -max_features = 20000 -# cut texts after this number of words (among top max_features most common words) -maxlen = 80 -batch_size = 32 - -print('Loading data...') -(x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_features) -print(len(x_train), 'train sequences') -print(len(x_test), 'test sequences') - -print('Pad sequences (samples x time)') -x_train = sequence.pad_sequences(x_train, maxlen=maxlen) -x_test = sequence.pad_sequences(x_test, maxlen=maxlen) -print('x_train shape:', x_train.shape) -print('x_test shape:', x_test.shape) - -print('Build model...') -model = Sequential() -model.add(Embedding(max_features, 128)) -model.add(LSTM(128, dropout=0.2, recurrent_dropout=0.2)) -model.add(Dense(1, activation='sigmoid')) - -# try using different optimizers and different optimizer configs -model.compile(loss='binary_crossentropy', - optimizer='adam', - metrics=['accuracy']) - -print('Train...') -model.fit(x_train, y_train, - batch_size=batch_size, - epochs=15, - validation_data=(x_test, y_test)) -score, acc = model.evaluate(x_test, y_test, - batch_size=batch_size) -print('Test score:', score) -print('Test accuracy:', acc) -''' -#Sequence to sequence example in Keras (character-level). - -This script demonstrates how to implement a basic character-level -sequence-to-sequence model. We apply it to translating -short English sentences into short French sentences, -character-by-character. Note that it is fairly unusual to -do character-level machine translation, as word-level -models are more common in this domain. - -**Summary of the algorithm** - -- We start with input sequences from a domain (e.g. English sentences) - and corresponding target sequences from another domain - (e.g. French sentences). -- An encoder LSTM turns input sequences to 2 state vectors - (we keep the last LSTM state and discard the outputs). -- A decoder LSTM is trained to turn the target sequences into - the same sequence but offset by one timestep in the future, - a training process called "teacher forcing" in this context. - It uses as initial state the state vectors from the encoder. - Effectively, the decoder learns to generate `targets[t+1...]` - given `targets[...t]`, conditioned on the input sequence. -- In inference mode, when we want to decode unknown input sequences, we: - - Encode the input sequence into state vectors - - Start with a target sequence of size 1 - (just the start-of-sequence character) - - Feed the state vectors and 1-char target sequence - to the decoder to produce predictions for the next character - - Sample the next character using these predictions - (we simply use argmax). - - Append the sampled character to the target sequence - - Repeat until we generate the end-of-sequence character or we - hit the character limit. - -**Data download** - -[English to French sentence pairs. -](http://www.manythings.org/anki/fra-eng.zip) - -[Lots of neat sentence pairs datasets. -](http://www.manythings.org/anki/) - -**References** - -- [Sequence to Sequence Learning with Neural Networks - ](https://arxiv.org/abs/1409.3215) -- [Learning Phrase Representations using - RNN Encoder-Decoder for Statistical Machine Translation - ](https://arxiv.org/abs/1406.1078) -''' -from __future__ import print_function - -from keras.models import Model -from keras.layers import Input, LSTM, Dense -import numpy as np - -batch_size = 64 # Batch size for training. -epochs = 100 # Number of epochs to train for. -latent_dim = 256 # Latent dimensionality of the encoding space. -num_samples = 10000 # Number of samples to train on. -# Path to the data txt file on disk. -data_path = 'fra-eng/fra.txt' - -# Vectorize the data. -input_texts = [] -target_texts = [] -input_characters = set() -target_characters = set() -with open(data_path, 'r', encoding='utf-8') as f: - lines = f.read().split('\n') -for line in lines[: min(num_samples, len(lines) - 1)]: - input_text, target_text = line.split('\t') - # We use "tab" as the "start sequence" character - # for the targets, and "\n" as "end sequence" character. - target_text = '\t' + target_text + '\n' - input_texts.append(input_text) - target_texts.append(target_text) - for char in input_text: - if char not in input_characters: - input_characters.add(char) - for char in target_text: - if char not in target_characters: - target_characters.add(char) - -input_characters = sorted(list(input_characters)) -target_characters = sorted(list(target_characters)) -num_encoder_tokens = len(input_characters) -num_decoder_tokens = len(target_characters) -max_encoder_seq_length = max([len(txt) for txt in input_texts]) -max_decoder_seq_length = max([len(txt) for txt in target_texts]) - -print('Number of samples:', len(input_texts)) -print('Number of unique input tokens:', num_encoder_tokens) -print('Number of unique output tokens:', num_decoder_tokens) -print('Max sequence length for inputs:', max_encoder_seq_length) -print('Max sequence length for outputs:', max_decoder_seq_length) - -input_token_index = dict( - [(char, i) for i, char in enumerate(input_characters)]) -target_token_index = dict( - [(char, i) for i, char in enumerate(target_characters)]) - -encoder_input_data = np.zeros( - (len(input_texts), max_encoder_seq_length, num_encoder_tokens), - dtype='float32') -decoder_input_data = np.zeros( - (len(input_texts), max_decoder_seq_length, num_decoder_tokens), - dtype='float32') -decoder_target_data = np.zeros( - (len(input_texts), max_decoder_seq_length, num_decoder_tokens), - dtype='float32') - -for i, (input_text, target_text) in enumerate(zip(input_texts, target_texts)): - for t, char in enumerate(input_text): - encoder_input_data[i, t, input_token_index[char]] = 1. - for t, char in enumerate(target_text): - # decoder_target_data is ahead of decoder_input_data by one timestep - decoder_input_data[i, t, target_token_index[char]] = 1. - if t > 0: - # decoder_target_data will be ahead by one timestep - # and will not include the start character. - decoder_target_data[i, t - 1, target_token_index[char]] = 1. - -# Define an input sequence and process it. -encoder_inputs = Input(shape=(None, num_encoder_tokens)) -encoder = LSTM(latent_dim, return_state=True) -encoder_outputs, state_h, state_c = encoder(encoder_inputs) -# We discard `encoder_outputs` and only keep the states. -encoder_states = [state_h, state_c] - -# Set up the decoder, using `encoder_states` as initial state. -decoder_inputs = Input(shape=(None, num_decoder_tokens)) -# We set up our decoder to return full output sequences, -# and to return internal states as well. We don't use the -# return states in the training model, but we will use them in inference. -decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True) -decoder_outputs, _, _ = decoder_lstm(decoder_inputs, - initial_state=encoder_states) -decoder_dense = Dense(num_decoder_tokens, activation='softmax') -decoder_outputs = decoder_dense(decoder_outputs) - -# Define the model that will turn -# `encoder_input_data` & `decoder_input_data` into `decoder_target_data` -model = Model([encoder_inputs, decoder_inputs], decoder_outputs) - -# Run training -model.compile(optimizer='rmsprop', loss='categorical_crossentropy') -model.fit([encoder_input_data, decoder_input_data], decoder_target_data, - batch_size=batch_size, - epochs=epochs, - validation_split=0.2) -# Save model -model.save('s2s.h5') - -# Next: inference mode (sampling). -# Here's the drill: -# 1) encode input and retrieve initial decoder state -# 2) run one step of decoder with this initial state -# and a "start of sequence" token as target. -# Output will be the next target token -# 3) Repeat with the current target token and current states - -# Define sampling models -encoder_model = Model(encoder_inputs, encoder_states) - -decoder_state_input_h = Input(shape=(latent_dim,)) -decoder_state_input_c = Input(shape=(latent_dim,)) -decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] -decoder_outputs, state_h, state_c = decoder_lstm( - decoder_inputs, initial_state=decoder_states_inputs) -decoder_states = [state_h, state_c] -decoder_outputs = decoder_dense(decoder_outputs) -decoder_model = Model( - [decoder_inputs] + decoder_states_inputs, - [decoder_outputs] + decoder_states) - -# Reverse-lookup token index to decode sequences back to -# something readable. -reverse_input_char_index = dict( - (i, char) for char, i in input_token_index.items()) -reverse_target_char_index = dict( - (i, char) for char, i in target_token_index.items()) - - -def decode_sequence(input_seq): - # Encode the input as state vectors. - states_value = encoder_model.predict(input_seq) - - # Generate empty target sequence of length 1. - target_seq = np.zeros((1, 1, num_decoder_tokens)) - # Populate the first character of target sequence with the start character. - target_seq[0, 0, target_token_index['\t']] = 1. - - # Sampling loop for a batch of sequences - # (to simplify, here we assume a batch of size 1). - stop_condition = False - decoded_sentence = '' - while not stop_condition: - output_tokens, h, c = decoder_model.predict( - [target_seq] + states_value) - - # Sample a token - sampled_token_index = np.argmax(output_tokens[0, -1, :]) - sampled_char = reverse_target_char_index[sampled_token_index] - decoded_sentence += sampled_char - - # Exit condition: either hit max length - # or find stop character. - if (sampled_char == '\n' or - len(decoded_sentence) > max_decoder_seq_length): - stop_condition = True - - # Update the target sequence (of length 1). - target_seq = np.zeros((1, 1, num_decoder_tokens)) - target_seq[0, 0, sampled_token_index] = 1. - - # Update states - states_value = [h, c] - - return decoded_sentence - - -for seq_index in range(100): - # Take one sequence (part of the training set) - # for trying out decoding. - input_seq = encoder_input_data[seq_index: seq_index + 1] - decoded_sentence = decode_sequence(input_seq) - print('-') - print('Input sentence:', input_texts[seq_index]) - print('Decoded sentence:', decoded_sentence) -''' -#Restore a character-level sequence to sequence model from to generate predictions. - -This script loads the ```s2s.h5``` model saved by [lstm_seq2seq.py -](/examples/lstm_seq2seq/) and generates sequences from it. It assumes -that no changes have been made (for example: ```latent_dim``` is unchanged, -and the input data and model architecture are unchanged). - -See [lstm_seq2seq.py](/examples/lstm_seq2seq/) for more details on the -model architecture and how it is trained. -''' -from __future__ import print_function - -from keras.models import Model, load_model -from keras.layers import Input -import numpy as np - -batch_size = 64 # Batch size for training. -epochs = 100 # Number of epochs to train for. -latent_dim = 256 # Latent dimensionality of the encoding space. -num_samples = 10000 # Number of samples to train on. -# Path to the data txt file on disk. -data_path = 'fra-eng/fra.txt' - -# Vectorize the data. We use the same approach as the training script. -# NOTE: the data must be identical, in order for the character -> integer -# mappings to be consistent. -# We omit encoding target_texts since they are not needed. -input_texts = [] -target_texts = [] -input_characters = set() -target_characters = set() -with open(data_path, 'r', encoding='utf-8') as f: - lines = f.read().split('\n') -for line in lines[: min(num_samples, len(lines) - 1)]: - input_text, target_text = line.split('\t') - # We use "tab" as the "start sequence" character - # for the targets, and "\n" as "end sequence" character. - target_text = '\t' + target_text + '\n' - input_texts.append(input_text) - target_texts.append(target_text) - for char in input_text: - if char not in input_characters: - input_characters.add(char) - for char in target_text: - if char not in target_characters: - target_characters.add(char) - -input_characters = sorted(list(input_characters)) -target_characters = sorted(list(target_characters)) -num_encoder_tokens = len(input_characters) -num_decoder_tokens = len(target_characters) -max_encoder_seq_length = max([len(txt) for txt in input_texts]) -max_decoder_seq_length = max([len(txt) for txt in target_texts]) - -print('Number of samples:', len(input_texts)) -print('Number of unique input tokens:', num_encoder_tokens) -print('Number of unique output tokens:', num_decoder_tokens) -print('Max sequence length for inputs:', max_encoder_seq_length) -print('Max sequence length for outputs:', max_decoder_seq_length) - -input_token_index = dict( - [(char, i) for i, char in enumerate(input_characters)]) -target_token_index = dict( - [(char, i) for i, char in enumerate(target_characters)]) - -encoder_input_data = np.zeros( - (len(input_texts), max_encoder_seq_length, num_encoder_tokens), - dtype='float32') - -for i, input_text in enumerate(input_texts): - for t, char in enumerate(input_text): - encoder_input_data[i, t, input_token_index[char]] = 1. - -# Restore the model and construct the encoder and decoder. -model = load_model('s2s.h5') - -encoder_inputs = model.input[0] # input_1 -encoder_outputs, state_h_enc, state_c_enc = model.layers[2].output # lstm_1 -encoder_states = [state_h_enc, state_c_enc] -encoder_model = Model(encoder_inputs, encoder_states) - -decoder_inputs = model.input[1] # input_2 -decoder_state_input_h = Input(shape=(latent_dim,), name='input_3') -decoder_state_input_c = Input(shape=(latent_dim,), name='input_4') -decoder_states_inputs = [decoder_state_input_h, decoder_state_input_c] -decoder_lstm = model.layers[3] -decoder_outputs, state_h_dec, state_c_dec = decoder_lstm( - decoder_inputs, initial_state=decoder_states_inputs) -decoder_states = [state_h_dec, state_c_dec] -decoder_dense = model.layers[4] -decoder_outputs = decoder_dense(decoder_outputs) -decoder_model = Model( - [decoder_inputs] + decoder_states_inputs, - [decoder_outputs] + decoder_states) - -# Reverse-lookup token index to decode sequences back to -# something readable. -reverse_input_char_index = dict( - (i, char) for char, i in input_token_index.items()) -reverse_target_char_index = dict( - (i, char) for char, i in target_token_index.items()) - - -# Decodes an input sequence. Future work should support beam search. -def decode_sequence(input_seq): - # Encode the input as state vectors. - states_value = encoder_model.predict(input_seq) - - # Generate empty target sequence of length 1. - target_seq = np.zeros((1, 1, num_decoder_tokens)) - # Populate the first character of target sequence with the start character. - target_seq[0, 0, target_token_index['\t']] = 1. - - # Sampling loop for a batch of sequences - # (to simplify, here we assume a batch of size 1). - stop_condition = False - decoded_sentence = '' - while not stop_condition: - output_tokens, h, c = decoder_model.predict( - [target_seq] + states_value) - - # Sample a token - sampled_token_index = np.argmax(output_tokens[0, -1, :]) - sampled_char = reverse_target_char_index[sampled_token_index] - decoded_sentence += sampled_char - - # Exit condition: either hit max length - # or find stop character. - if (sampled_char == '\n' or - len(decoded_sentence) > max_decoder_seq_length): - stop_condition = True - - # Update the target sequence (of length 1). - target_seq = np.zeros((1, 1, num_decoder_tokens)) - target_seq[0, 0, sampled_token_index] = 1. - - # Update states - states_value = [h, c] - - return decoded_sentence - - -for seq_index in range(100): - # Take one sequence (part of the training set) - # for trying out decoding. - input_seq = encoder_input_data[seq_index: seq_index + 1] - decoded_sentence = decode_sequence(input_seq) - print('-') - print('Input sentence:', input_texts[seq_index]) - print('Decoded sentence:', decoded_sentence) -''' -#How to use a stateful LSTM model, stateful vs stateless LSTM performance comparison - -[More documentation about the Keras LSTM model](/layers/recurrent/#lstm) - -The models are trained on an input/output pair, where -the input is a generated uniformly distributed -random sequence of length = `input_len`, -and the output is a moving average of the input with window length = `tsteps`. -Both `input_len` and `tsteps` are defined in the "editable parameters" -section. - -A larger `tsteps` value means that the LSTM will need more memory -to figure out the input-output relationship. -This memory length is controlled by the `lahead` variable (more details below). - -The rest of the parameters are: - -- `input_len`: the length of the generated input sequence -- `lahead`: the input sequence length that the LSTM - is trained on for each output point -- `batch_size`, `epochs`: same parameters as in the `model.fit(...)` - function - -When `lahead > 1`, the model input is preprocessed to a "rolling window view" -of the data, with the window length = `lahead`. -This is similar to sklearn's `view_as_windows` -with `window_shape` [being a single number.]( -http://scikit-image.org/docs/0.10.x/api/skimage.util.html#view-as-windows) - -When `lahead < tsteps`, only the stateful LSTM converges because its -statefulness allows it to see beyond the capability that lahead -gave it to fit the n-point average. The stateless LSTM does not have -this capability, and hence is limited by its `lahead` parameter, -which is not sufficient to see the n-point average. - -When `lahead >= tsteps`, both the stateful and stateless LSTM converge. -''' -from __future__ import print_function -import numpy as np -import matplotlib.pyplot as plt -import pandas as pd -from keras.models import Sequential -from keras.layers import Dense, LSTM - -# ---------------------------------------------------------- -# EDITABLE PARAMETERS -# Read the documentation in the script head for more details -# ---------------------------------------------------------- - -# length of input -input_len = 1000 - -# The window length of the moving average used to generate -# the output from the input in the input/output pair used -# to train the LSTM -# e.g. if tsteps=2 and input=[1, 2, 3, 4, 5], -# then output=[1.5, 2.5, 3.5, 4.5] -tsteps = 2 - -# The input sequence length that the LSTM is trained on for each output point -lahead = 1 - -# training parameters passed to "model.fit(...)" -batch_size = 1 -epochs = 10 - -# ------------ -# MAIN PROGRAM -# ------------ - -print("*" * 33) -if lahead >= tsteps: - print("STATELESS LSTM WILL ALSO CONVERGE") -else: - print("STATELESS LSTM WILL NOT CONVERGE") -print("*" * 33) - -np.random.seed(1986) - -print('Generating Data...') - - -def gen_uniform_amp(amp=1, xn=10000): - """Generates uniform random data between - -amp and +amp - and of length xn - - # Arguments - amp: maximum/minimum range of uniform data - xn: length of series - """ - data_input = np.random.uniform(-1 * amp, +1 * amp, xn) - data_input = pd.DataFrame(data_input) - return data_input - - -# Since the output is a moving average of the input, -# the first few points of output will be NaN -# and will be dropped from the generated data -# before training the LSTM. -# Also, when lahead > 1, -# the preprocessing step later of "rolling window view" -# will also cause some points to be lost. -# For aesthetic reasons, -# in order to maintain generated data length = input_len after pre-processing, -# add a few points to account for the values that will be lost. -to_drop = max(tsteps - 1, lahead - 1) -data_input = gen_uniform_amp(amp=0.1, xn=input_len + to_drop) - -# set the target to be a N-point average of the input -expected_output = data_input.rolling(window=tsteps, center=False).mean() - -# when lahead > 1, need to convert the input to "rolling window view" -# https://docs.scipy.org/doc/numpy/reference/generated/numpy.repeat.html -if lahead > 1: - data_input = np.repeat(data_input.values, repeats=lahead, axis=1) - data_input = pd.DataFrame(data_input) - for i, c in enumerate(data_input.columns): - data_input[c] = data_input[c].shift(i) - -# drop the nan -expected_output = expected_output[to_drop:] -data_input = data_input[to_drop:] - -print('Input shape:', data_input.shape) -print('Output shape:', expected_output.shape) -print('Input head: ') -print(data_input.head()) -print('Output head: ') -print(expected_output.head()) -print('Input tail: ') -print(data_input.tail()) -print('Output tail: ') -print(expected_output.tail()) - -print('Plotting input and expected output') -plt.plot(data_input[0][:10], '.') -plt.plot(expected_output[0][:10], '-') -plt.legend(['Input', 'Expected output']) -plt.title('Input') -plt.show() - - -def create_model(stateful): - model = Sequential() - model.add(LSTM(20, - input_shape=(lahead, 1), - batch_size=batch_size, - stateful=stateful)) - model.add(Dense(1)) - model.compile(loss='mse', optimizer='adam') - return model - - -print('Creating Stateful Model...') -model_stateful = create_model(stateful=True) - - -# split train/test data -def split_data(x, y, ratio=0.8): - to_train = int(input_len * ratio) - # tweak to match with batch_size - to_train -= to_train % batch_size - - x_train = x[:to_train] - y_train = y[:to_train] - x_test = x[to_train:] - y_test = y[to_train:] - - # tweak to match with batch_size - to_drop = x.shape[0] % batch_size - if to_drop > 0: - x_test = x_test[:-1 * to_drop] - y_test = y_test[:-1 * to_drop] - - # some reshaping - def reshape_3(x): return x.values.reshape((x.shape[0], x.shape[1], 1)) - x_train = reshape_3(x_train) - x_test = reshape_3(x_test) - - def reshape_2(x): return x.values.reshape((x.shape[0], 1)) - y_train = reshape_2(y_train) - y_test = reshape_2(y_test) - - return (x_train, y_train), (x_test, y_test) - - -(x_train, y_train), (x_test, y_test) = split_data(data_input, expected_output) -print('x_train.shape: ', x_train.shape) -print('y_train.shape: ', y_train.shape) -print('x_test.shape: ', x_test.shape) -print('y_test.shape: ', y_test.shape) - -print('Training') -for i in range(epochs): - print('Epoch', i + 1, '/', epochs) - # Note that the last state for sample i in a batch will - # be used as initial state for sample i in the next batch. - # Thus we are simultaneously training on batch_size series with - # lower resolution than the original series contained in data_input. - # Each of these series are offset by one step and can be - # extracted with data_input[i::batch_size]. - model_stateful.fit(x_train, - y_train, - batch_size=batch_size, - epochs=1, - verbose=1, - validation_data=(x_test, y_test), - shuffle=False) - model_stateful.reset_states() - -print('Predicting') -predicted_stateful = model_stateful.predict(x_test, batch_size=batch_size) - -print('Creating Stateless Model...') -model_stateless = create_model(stateful=False) - -print('Training') -model_stateless.fit(x_train, - y_train, - batch_size=batch_size, - epochs=epochs, - verbose=1, - validation_data=(x_test, y_test), - shuffle=False) - -print('Predicting') -predicted_stateless = model_stateless.predict(x_test, batch_size=batch_size) - -# ---------------------------- - -print('Plotting Results') -plt.subplot(3, 1, 1) -plt.plot(y_test) -plt.title('Expected') -plt.subplot(3, 1, 2) -# drop the first "tsteps-1" because it is not possible to predict them -# since the "previous" timesteps to use do not exist -plt.plot((y_test - predicted_stateful).flatten()[tsteps - 1:]) -plt.title('Stateful: Expected - Predicted') -plt.subplot(3, 1, 3) -plt.plot((y_test - predicted_stateless).flatten()) -plt.title('Stateless: Expected - Predicted') -plt.show() -''' -#Example script to generate text from Nietzsche's writings. - -At least 20 epochs are required before the generated text -starts sounding coherent. - -It is recommended to run this script on GPU, as recurrent -networks are quite computationally intensive. - -If you try this script on new data, make sure your corpus -has at least ~100k characters. ~1M is better. -''' - -from __future__ import print_function -from keras.callbacks import LambdaCallback -from keras.models import Sequential -from keras.layers import Dense -from keras.layers import LSTM -from keras.optimizers import RMSprop -from keras.utils.data_utils import get_file -import numpy as np -import random -import sys -import io - -path = get_file( - 'nietzsche.txt', - origin='https://s3.amazonaws.com/text-datasets/nietzsche.txt') -with io.open(path, encoding='utf-8') as f: - text = f.read().lower() -print('corpus length:', len(text)) - -chars = sorted(list(set(text))) -print('total chars:', len(chars)) -char_indices = dict((c, i) for i, c in enumerate(chars)) -indices_char = dict((i, c) for i, c in enumerate(chars)) - -# cut the text in semi-redundant sequences of maxlen characters -maxlen = 40 -step = 3 -sentences = [] -next_chars = [] -for i in range(0, len(text) - maxlen, step): - sentences.append(text[i: i + maxlen]) - next_chars.append(text[i + maxlen]) -print('nb sequences:', len(sentences)) - -print('Vectorization...') -x = np.zeros((len(sentences), maxlen, len(chars)), dtype=np.bool) -y = np.zeros((len(sentences), len(chars)), dtype=np.bool) -for i, sentence in enumerate(sentences): - for t, char in enumerate(sentence): - x[i, t, char_indices[char]] = 1 - y[i, char_indices[next_chars[i]]] = 1 - - -# build the model: a single LSTM -print('Build model...') -model = Sequential() -model.add(LSTM(128, input_shape=(maxlen, len(chars)))) -model.add(Dense(len(chars), activation='softmax')) - -optimizer = RMSprop(lr=0.01) -model.compile(loss='categorical_crossentropy', optimizer=optimizer) - - -def sample(preds, temperature=1.0): - # helper function to sample an index from a probability array - preds = np.asarray(preds).astype('float64') - preds = np.log(preds) / temperature - exp_preds = np.exp(preds) - preds = exp_preds / np.sum(exp_preds) - probas = np.random.multinomial(1, preds, 1) - return np.argmax(probas) - - -def on_epoch_end(epoch, _): - # Function invoked at end of each epoch. Prints generated text. - print() - print('----- Generating text after Epoch: %d' % epoch) - - start_index = random.randint(0, len(text) - maxlen - 1) - for diversity in [0.2, 0.5, 1.0, 1.2]: - print('----- diversity:', diversity) - - generated = '' - sentence = text[start_index: start_index + maxlen] - generated += sentence - print('----- Generating with seed: "' + sentence + '"') - sys.stdout.write(generated) - - for i in range(400): - x_pred = np.zeros((1, maxlen, len(chars))) - for t, char in enumerate(sentence): - x_pred[0, t, char_indices[char]] = 1. - - preds = model.predict(x_pred, verbose=0)[0] - next_index = sample(preds, diversity) - next_char = indices_char[next_index] - - sentence = sentence[1:] + next_char - - sys.stdout.write(next_char) - sys.stdout.flush() - print() - - -print_callback = LambdaCallback(on_epoch_end=on_epoch_end) - -model.fit(x, y, - batch_size=128, - epochs=60, - callbacks=[print_callback]) -# -*- coding: utf-8 -*- -""" -#Train an Auxiliary Classifier GAN (ACGAN) on the MNIST dataset. - -[More details on Auxiliary Classifier GANs.](https://arxiv.org/abs/1610.09585) - -You should start to see reasonable images after ~5 epochs, and good images -by ~15 epochs. You should use a GPU, as the convolution-heavy operations are -very slow on the CPU. Prefer the TensorFlow backend if you plan on iterating, -as the compilation time can be a blocker using Theano. - -Timings: - -Hardware | Backend | Time / Epoch -:------------------|:--------|------------: - CPU | TF | 3 hrs - Titan X (maxwell) | TF | 4 min - Titan X (maxwell) | TH | 7 min - -Consult [Auxiliary Classifier Generative Adversarial Networks in Keras -](https://github.com/lukedeo/keras-acgan) for more information and example output. -""" -from __future__ import print_function - -from collections import defaultdict -try: - import cPickle as pickle -except ImportError: - import pickle -from PIL import Image - -from six.moves import range - -from keras.datasets import mnist -from keras import layers -from keras.layers import Input, Dense, Reshape, Flatten, Embedding, Dropout -from keras.layers import BatchNormalization -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import Conv2DTranspose, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -from keras.utils.generic_utils import Progbar -import numpy as np - -np.random.seed(1337) -num_classes = 10 - - -def build_generator(latent_size): - # we will map a pair of (z, L), where z is a latent vector and L is a - # label drawn from P_c, to image space (..., 28, 28, 1) - cnn = Sequential() - - cnn.add(Dense(3 * 3 * 384, input_dim=latent_size, activation='relu')) - cnn.add(Reshape((3, 3, 384))) - - # upsample to (7, 7, ...) - cnn.add(Conv2DTranspose(192, 5, strides=1, padding='valid', - activation='relu', - kernel_initializer='glorot_normal')) - cnn.add(BatchNormalization()) - - # upsample to (14, 14, ...) - cnn.add(Conv2DTranspose(96, 5, strides=2, padding='same', - activation='relu', - kernel_initializer='glorot_normal')) - cnn.add(BatchNormalization()) - - # upsample to (28, 28, ...) - cnn.add(Conv2DTranspose(1, 5, strides=2, padding='same', - activation='tanh', - kernel_initializer='glorot_normal')) - - # this is the z space commonly referred to in GAN papers - latent = Input(shape=(latent_size, )) - - # this will be our label - image_class = Input(shape=(1,), dtype='int32') - - cls = Embedding(num_classes, latent_size, - embeddings_initializer='glorot_normal')(image_class) - - # hadamard product between z-space and a class conditional embedding - h = layers.multiply([latent, cls]) - - fake_image = cnn(h) - - return Model([latent, image_class], fake_image) - - -def build_discriminator(): - # build a relatively standard conv net, with LeakyReLUs as suggested in - # the reference paper - cnn = Sequential() - - cnn.add(Conv2D(32, 3, padding='same', strides=2, - input_shape=(28, 28, 1))) - cnn.add(LeakyReLU(0.2)) - cnn.add(Dropout(0.3)) - - cnn.add(Conv2D(64, 3, padding='same', strides=1)) - cnn.add(LeakyReLU(0.2)) - cnn.add(Dropout(0.3)) - - cnn.add(Conv2D(128, 3, padding='same', strides=2)) - cnn.add(LeakyReLU(0.2)) - cnn.add(Dropout(0.3)) - - cnn.add(Conv2D(256, 3, padding='same', strides=1)) - cnn.add(LeakyReLU(0.2)) - cnn.add(Dropout(0.3)) - - cnn.add(Flatten()) - - image = Input(shape=(28, 28, 1)) - - features = cnn(image) - - # first output (name=generation) is whether or not the discriminator - # thinks the image that is being shown is fake, and the second output - # (name=auxiliary) is the class that the discriminator thinks the image - # belongs to. - fake = Dense(1, activation='sigmoid', name='generation')(features) - aux = Dense(num_classes, activation='softmax', name='auxiliary')(features) - - return Model(image, [fake, aux]) - - -if __name__ == '__main__': - - # batch and latent size taken from the paper - epochs = 100 - batch_size = 100 - latent_size = 100 - - # Adam parameters suggested in https://arxiv.org/abs/1511.06434 - adam_lr = 0.0002 - adam_beta_1 = 0.5 - - # build the discriminator - print('Discriminator model:') - discriminator = build_discriminator() - discriminator.compile( - optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1), - loss=['binary_crossentropy', 'sparse_categorical_crossentropy'] - ) - discriminator.summary() - - # build the generator - generator = build_generator(latent_size) - - latent = Input(shape=(latent_size, )) - image_class = Input(shape=(1,), dtype='int32') - - # get a fake image - fake = generator([latent, image_class]) - - # we only want to be able to train generation for the combined model - discriminator.trainable = False - fake, aux = discriminator(fake) - combined = Model([latent, image_class], [fake, aux]) - - print('Combined model:') - combined.compile( - optimizer=Adam(lr=adam_lr, beta_1=adam_beta_1), - loss=['binary_crossentropy', 'sparse_categorical_crossentropy'] - ) - combined.summary() - - # get our mnist data, and force it to be of shape (..., 28, 28, 1) with - # range [-1, 1] - (x_train, y_train), (x_test, y_test) = mnist.load_data() - x_train = (x_train.astype(np.float32) - 127.5) / 127.5 - x_train = np.expand_dims(x_train, axis=-1) - - x_test = (x_test.astype(np.float32) - 127.5) / 127.5 - x_test = np.expand_dims(x_test, axis=-1) - - num_train, num_test = x_train.shape[0], x_test.shape[0] - - train_history = defaultdict(list) - test_history = defaultdict(list) - - for epoch in range(1, epochs + 1): - print('Epoch {}/{}'.format(epoch, epochs)) - - num_batches = int(np.ceil(x_train.shape[0] / float(batch_size))) - progress_bar = Progbar(target=num_batches) - - epoch_gen_loss = [] - epoch_disc_loss = [] - - for index in range(num_batches): - # get a batch of real images - image_batch = x_train[index * batch_size:(index + 1) * batch_size] - label_batch = y_train[index * batch_size:(index + 1) * batch_size] - - # generate a new batch of noise - noise = np.random.uniform(-1, 1, (len(image_batch), latent_size)) - - # sample some labels from p_c - sampled_labels = np.random.randint( - 0, num_classes, len(image_batch)) - - # generate a batch of fake images, using the generated labels as a - # conditioner. We reshape the sampled labels to be - # (len(image_batch), 1) so that we can feed them into the embedding - # layer as a length one sequence - generated_images = generator.predict( - [noise, sampled_labels.reshape((-1, 1))], verbose=0) - - x = np.concatenate((image_batch, generated_images)) - - # use one-sided soft real/fake labels - # Salimans et al., 2016 - # https://arxiv.org/pdf/1606.03498.pdf (Section 3.4) - soft_zero, soft_one = 0, 0.95 - y = np.array( - [soft_one] * len(image_batch) + [soft_zero] * len(image_batch)) - aux_y = np.concatenate((label_batch, sampled_labels), axis=0) - - # we don't want the discriminator to also maximize the classification - # accuracy of the auxiliary classifier on generated images, so we - # don't train discriminator to produce class labels for generated - # images (see https://openreview.net/forum?id=rJXTf9Bxg). - # To preserve sum of sample weights for the auxiliary classifier, - # we assign sample weight of 2 to the real images. - disc_sample_weight = [np.ones(2 * len(image_batch)), - np.concatenate((np.ones(len(image_batch)) * 2, - np.zeros(len(image_batch))))] - - # see if the discriminator can figure itself out... - epoch_disc_loss.append(discriminator.train_on_batch( - x, [y, aux_y], sample_weight=disc_sample_weight)) - - # make new noise. we generate 2 * batch size here such that we have - # the generator optimize over an identical number of images as the - # discriminator - noise = np.random.uniform(-1, 1, - (2 * len(image_batch), latent_size)) - sampled_labels = np.random.randint( - 0, num_classes, 2 * len(image_batch)) - - # we want to train the generator to trick the discriminator - # For the generator, we want all the {fake, not-fake} labels to say - # not-fake - trick = np.ones(2 * len(image_batch)) * soft_one - - epoch_gen_loss.append(combined.train_on_batch( - [noise, sampled_labels.reshape((-1, 1))], - [trick, sampled_labels])) - - progress_bar.update(index + 1) - - print('Testing for epoch {}:'.format(epoch)) - - # evaluate the testing loss here - - # generate a new batch of noise - noise = np.random.uniform(-1, 1, (num_test, latent_size)) - - # sample some labels from p_c and generate images from them - sampled_labels = np.random.randint(0, num_classes, num_test) - generated_images = generator.predict( - [noise, sampled_labels.reshape((-1, 1))], verbose=False) - - x = np.concatenate((x_test, generated_images)) - y = np.array([1] * num_test + [0] * num_test) - aux_y = np.concatenate((y_test, sampled_labels), axis=0) - - # see if the discriminator can figure itself out... - discriminator_test_loss = discriminator.evaluate( - x, [y, aux_y], verbose=False) - - discriminator_train_loss = np.mean(np.array(epoch_disc_loss), axis=0) - - # make new noise - noise = np.random.uniform(-1, 1, (2 * num_test, latent_size)) - sampled_labels = np.random.randint(0, num_classes, 2 * num_test) - - trick = np.ones(2 * num_test) - - generator_test_loss = combined.evaluate( - [noise, sampled_labels.reshape((-1, 1))], - [trick, sampled_labels], verbose=False) - - generator_train_loss = np.mean(np.array(epoch_gen_loss), axis=0) - - # generate an epoch report on performance - train_history['generator'].append(generator_train_loss) - train_history['discriminator'].append(discriminator_train_loss) - - test_history['generator'].append(generator_test_loss) - test_history['discriminator'].append(discriminator_test_loss) - - print('{0:<22s} | {1:4s} | {2:15s} | {3:5s}'.format( - 'component', *discriminator.metrics_names)) - print('-' * 65) - - ROW_FMT = '{0:<22s} | {1:<4.2f} | {2:<15.4f} | {3:<5.4f}' - print(ROW_FMT.format('generator (train)', - *train_history['generator'][-1])) - print(ROW_FMT.format('generator (test)', - *test_history['generator'][-1])) - print(ROW_FMT.format('discriminator (train)', - *train_history['discriminator'][-1])) - print(ROW_FMT.format('discriminator (test)', - *test_history['discriminator'][-1])) - - # save weights every epoch - generator.save_weights( - 'params_generator_epoch_{0:03d}.hdf5'.format(epoch), True) - discriminator.save_weights( - 'params_discriminator_epoch_{0:03d}.hdf5'.format(epoch), True) - - # generate some digits to display - num_rows = 40 - noise = np.tile(np.random.uniform(-1, 1, (num_rows, latent_size)), - (num_classes, 1)) - - sampled_labels = np.array([ - [i] * num_rows for i in range(num_classes) - ]).reshape(-1, 1) - - # get a batch to display - generated_images = generator.predict( - [noise, sampled_labels], verbose=0) - - # prepare real images sorted by class label - real_labels = y_train[(epoch - 1) * num_rows * num_classes: - epoch * num_rows * num_classes] - indices = np.argsort(real_labels, axis=0) - real_images = x_train[(epoch - 1) * num_rows * num_classes: - epoch * num_rows * num_classes][indices] - - # display generated images, white separator, real images - img = np.concatenate( - (generated_images, - np.repeat(np.ones_like(x_train[:1]), num_rows, axis=0), - real_images)) - - # arrange them into a grid - img = (np.concatenate([r.reshape(-1, 28) - for r in np.split(img, 2 * num_classes + 1) - ], axis=-1) * 127.5 + 127.5).astype(np.uint8) - - Image.fromarray(img).save( - 'plot_epoch_{0:03d}_generated.png'.format(epoch)) - - with open('acgan-history.pkl', 'wb') as f: - pickle.dump({'train': train_history, 'test': test_history}, f) -'''Trains a simple convnet on the MNIST dataset. - -Gets to 99.25% test accuracy after 12 epochs -(there is still a lot of margin for parameter tuning). -16 seconds per epoch on a GRID K520 GPU. -''' - -from __future__ import print_function -import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout, Flatten -from keras.layers import Conv2D, MaxPooling2D -from keras import backend as K - -batch_size = 128 -num_classes = 10 -epochs = 12 - -# input image dimensions -img_rows, img_cols = 28, 28 - -# the data, split between train and test sets -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -if K.image_data_format() == 'channels_first': - x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) - x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) -else: - x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) - x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) - input_shape = (img_rows, img_cols, 1) - -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# convert class vectors to binary class matrices -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - -model = Sequential() -model.add(Conv2D(32, kernel_size=(3, 3), - activation='relu', - input_shape=input_shape)) -model.add(Conv2D(64, (3, 3), activation='relu')) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) -model.add(Flatten()) -model.add(Dense(128, activation='relu')) -model.add(Dropout(0.5)) -model.add(Dense(num_classes, activation='softmax')) - -model.compile(loss=keras.losses.categorical_crossentropy, - optimizer=keras.optimizers.Adadelta(), - metrics=['accuracy']) - -model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - verbose=1, - validation_data=(x_test, y_test)) -score = model.evaluate(x_test, y_test, verbose=0) -print('Test loss:', score[0]) -print('Test accuracy:', score[1]) -'''MNIST classification with TensorFlow's Dataset API. - -Introduced in TensorFlow 1.3, the Dataset API is now the -standard method for loading data into TensorFlow models. -A Dataset is a sequence of elements, which are themselves -composed of tf.Tensor components. For more details, see: -https://www.tensorflow.org/programmers_guide/datasets - -To use this with Keras, we make a dataset out of elements -of the form (input batch, output batch). From there, we -create a one-shot iterator and a graph node corresponding -to its get_next() method. Its components are then provided -to the network's Input layer and the Model.compile() method, -respectively. - -This example is intended to closely follow the -mnist_tfrecord.py example. -''' -import numpy as np -import os -import tempfile - -import keras -from keras import backend as K -from keras import layers -from keras.datasets import mnist - -import tensorflow as tf - - -if K.backend() != 'tensorflow': - raise RuntimeError('This example can only run with the TensorFlow backend,' - ' because it requires the Datset API, which is not' - ' supported on other platforms.') - - -def cnn_layers(inputs): - x = layers.Conv2D(32, (3, 3), - activation='relu', padding='valid')(inputs) - x = layers.MaxPooling2D(pool_size=(2, 2))(x) - x = layers.Conv2D(64, (3, 3), activation='relu')(x) - x = layers.MaxPooling2D(pool_size=(2, 2))(x) - x = layers.Flatten()(x) - x = layers.Dense(512, activation='relu')(x) - x = layers.Dropout(0.5)(x) - predictions = layers.Dense(num_classes, - activation='softmax', - name='x_train_out')(x) - return predictions - - -batch_size = 128 -buffer_size = 10000 -steps_per_epoch = int(np.ceil(60000 / float(batch_size))) # = 469 -epochs = 5 -num_classes = 10 - -(x_train, y_train), (x_test, y_test) = mnist.load_data() -x_train = x_train.astype(np.float32) / 255 -x_train = np.expand_dims(x_train, -1) -y_train = tf.one_hot(y_train, num_classes) - -# Create the dataset and its associated one-shot iterator. -dataset = tf.data.Dataset.from_tensor_slices((x_train, y_train)) -dataset = dataset.repeat() -dataset = dataset.shuffle(buffer_size) -dataset = dataset.batch(batch_size) -iterator = dataset.make_one_shot_iterator() - -# Model creation using tensors from the get_next() graph node. -inputs, targets = iterator.get_next() -model_input = layers.Input(tensor=inputs) -model_output = cnn_layers(model_input) -train_model = keras.models.Model(inputs=model_input, outputs=model_output) - -train_model.compile(optimizer=keras.optimizers.RMSprop(lr=2e-3, decay=1e-5), - loss='categorical_crossentropy', - metrics=['accuracy'], - target_tensors=[targets]) -train_model.summary() - -train_model.fit(epochs=epochs, - steps_per_epoch=steps_per_epoch) - -# Save the model weights. -weight_path = os.path.join(tempfile.gettempdir(), 'saved_wt.h5') -train_model.save_weights(weight_path) - -# Clean up the TF session. -K.clear_session() - -# Second session to test loading trained model without tensors. -x_test = x_test.astype(np.float32) -x_test = np.expand_dims(x_test, -1) - -x_test_inp = layers.Input(shape=x_test.shape[1:]) -test_out = cnn_layers(x_test_inp) -test_model = keras.models.Model(inputs=x_test_inp, outputs=test_out) - -test_model.load_weights(weight_path) -test_model.compile(optimizer='rmsprop', - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) -test_model.summary() - -loss, acc = test_model.evaluate(x_test, y_test, num_classes) -print('\nTest accuracy: {0}'.format(acc)) -'''Trains a denoising autoencoder on MNIST dataset. - -Denoising is one of the classic applications of autoencoders. -The denoising process removes unwanted noise that corrupted the -true signal. - -Noise + Data ---> Denoising Autoencoder ---> Data - -Given a training dataset of corrupted data as input and -true signal as output, a denoising autoencoder can recover the -hidden structure to generate clean data. - -This example has modular design. The encoder, decoder and autoencoder -are 3 models that share weights. For example, after training the -autoencoder, the encoder can be used to generate latent vectors -of input data for low-dim visualization like PCA or TSNE. -''' - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import keras -from keras.layers import Activation, Dense, Input -from keras.layers import Conv2D, Flatten -from keras.layers import Reshape, Conv2DTranspose -from keras.models import Model -from keras import backend as K -from keras.datasets import mnist -import numpy as np -import matplotlib.pyplot as plt -from PIL import Image - -np.random.seed(1337) - -# MNIST dataset -(x_train, _), (x_test, _) = mnist.load_data() - -image_size = x_train.shape[1] -x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) -x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) -x_train = x_train.astype('float32') / 255 -x_test = x_test.astype('float32') / 255 - -# Generate corrupted MNIST images by adding noise with normal dist -# centered at 0.5 and std=0.5 -noise = np.random.normal(loc=0.5, scale=0.5, size=x_train.shape) -x_train_noisy = x_train + noise -noise = np.random.normal(loc=0.5, scale=0.5, size=x_test.shape) -x_test_noisy = x_test + noise - -x_train_noisy = np.clip(x_train_noisy, 0., 1.) -x_test_noisy = np.clip(x_test_noisy, 0., 1.) - -# Network parameters -input_shape = (image_size, image_size, 1) -batch_size = 128 -kernel_size = 3 -latent_dim = 16 -# Encoder/Decoder number of CNN layers and filters per layer -layer_filters = [32, 64] - -# Build the Autoencoder Model -# First build the Encoder Model -inputs = Input(shape=input_shape, name='encoder_input') -x = inputs -# Stack of Conv2D blocks -# Notes: -# 1) Use Batch Normalization before ReLU on deep networks -# 2) Use MaxPooling2D as alternative to strides>1 -# - faster but not as good as strides>1 -for filters in layer_filters: - x = Conv2D(filters=filters, - kernel_size=kernel_size, - strides=2, - activation='relu', - padding='same')(x) - -# Shape info needed to build Decoder Model -shape = K.int_shape(x) - -# Generate the latent vector -x = Flatten()(x) -latent = Dense(latent_dim, name='latent_vector')(x) - -# Instantiate Encoder Model -encoder = Model(inputs, latent, name='encoder') -encoder.summary() - -# Build the Decoder Model -latent_inputs = Input(shape=(latent_dim,), name='decoder_input') -x = Dense(shape[1] * shape[2] * shape[3])(latent_inputs) -x = Reshape((shape[1], shape[2], shape[3]))(x) - -# Stack of Transposed Conv2D blocks -# Notes: -# 1) Use Batch Normalization before ReLU on deep networks -# 2) Use UpSampling2D as alternative to strides>1 -# - faster but not as good as strides>1 -for filters in layer_filters[::-1]: - x = Conv2DTranspose(filters=filters, - kernel_size=kernel_size, - strides=2, - activation='relu', - padding='same')(x) - -x = Conv2DTranspose(filters=1, - kernel_size=kernel_size, - padding='same')(x) - -outputs = Activation('sigmoid', name='decoder_output')(x) - -# Instantiate Decoder Model -decoder = Model(latent_inputs, outputs, name='decoder') -decoder.summary() - -# Autoencoder = Encoder + Decoder -# Instantiate Autoencoder Model -autoencoder = Model(inputs, decoder(encoder(inputs)), name='autoencoder') -autoencoder.summary() - -autoencoder.compile(loss='mse', optimizer='adam') - -# Train the autoencoder -autoencoder.fit(x_train_noisy, - x_train, - validation_data=(x_test_noisy, x_test), - epochs=30, - batch_size=batch_size) - -# Predict the Autoencoder output from corrupted test images -x_decoded = autoencoder.predict(x_test_noisy) - -# Display the 1st 8 corrupted and denoised images -rows, cols = 10, 30 -num = rows * cols -imgs = np.concatenate([x_test[:num], x_test_noisy[:num], x_decoded[:num]]) -imgs = imgs.reshape((rows * 3, cols, image_size, image_size)) -imgs = np.vstack(np.split(imgs, rows, axis=1)) -imgs = imgs.reshape((rows * 3, -1, image_size, image_size)) -imgs = np.vstack([np.hstack(i) for i in imgs]) -imgs = (imgs * 255).astype(np.uint8) -plt.figure() -plt.axis('off') -plt.title('Original images: top rows, ' - 'Corrupted Input: middle rows, ' - 'Denoised Input: third rows') -plt.imshow(imgs, interpolation='none', cmap='gray') -Image.fromarray(imgs).save('corrupted_and_denoised.png') -plt.show() -"""Example of using Hierarchical RNN (HRNN) to classify MNIST digits. - -HRNNs can learn across multiple levels -of temporal hierarchy over a complex sequence. -Usually, the first recurrent layer of an HRNN -encodes a sentence (e.g. of word vectors) -into a sentence vector. -The second recurrent layer then encodes a sequence of -such vectors (encoded by the first layer) into a document vector. -This document vector is considered to preserve both -the word-level and sentence-level structure of the context. - -# References - -- [A Hierarchical Neural Autoencoder for Paragraphs and Documents] - (https://arxiv.org/abs/1506.01057) - Encodes paragraphs and documents with HRNN. - Results have shown that HRNN outperforms standard - RNNs and may play some role in more sophisticated generation tasks like - summarization or question answering. -- [Hierarchical recurrent neural network for skeleton based action recognition] - (http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=7298714) - Achieved state-of-the-art results on - skeleton based action recognition with 3 levels - of bidirectional HRNN combined with fully connected layers. - -In the below MNIST example the first LSTM layer first encodes every -column of pixels of shape (28, 1) to a column vector of shape (128,). -The second LSTM layer encodes then these 28 column vectors of shape (28, 128) -to a image vector representing the whole image. -A final Dense layer is added for prediction. - -After 5 epochs: train acc: 0.9858, val acc: 0.9864 -""" -from __future__ import print_function - -import keras -from keras.datasets import mnist -from keras.models import Model -from keras.layers import Input, Dense, TimeDistributed -from keras.layers import LSTM - -# Training parameters. -batch_size = 32 -num_classes = 10 -epochs = 5 - -# Embedding dimensions. -row_hidden = 128 -col_hidden = 128 - -# The data, split between train and test sets. -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -# Reshapes data to 4D for Hierarchical RNN. -x_train = x_train.reshape(x_train.shape[0], 28, 28, 1) -x_test = x_test.reshape(x_test.shape[0], 28, 28, 1) -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Converts class vectors to binary class matrices. -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - -row, col, pixel = x_train.shape[1:] - -# 4D input. -x = Input(shape=(row, col, pixel)) - -# Encodes a row of pixels using TimeDistributed Wrapper. -encoded_rows = TimeDistributed(LSTM(row_hidden))(x) - -# Encodes columns of encoded rows. -encoded_columns = LSTM(col_hidden)(encoded_rows) - -# Final predictions and model. -prediction = Dense(num_classes, activation='softmax')(encoded_columns) -model = Model(x, prediction) -model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - -# Training. -model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - verbose=1, - validation_data=(x_test, y_test)) - -# Evaluation. -scores = model.evaluate(x_test, y_test, verbose=0) -print('Test loss:', scores[0]) -print('Test accuracy:', scores[1]) -'''This is a reproduction of the IRNN experiment -with pixel-by-pixel sequential MNIST in -"A Simple Way to Initialize Recurrent Networks of Rectified Linear Units" -by Quoc V. Le, Navdeep Jaitly, Geoffrey E. Hinton - -arxiv:1504.00941v2 [cs.NE] 7 Apr 2015 -http://arxiv.org/pdf/1504.00941v2.pdf - -Optimizer is replaced with RMSprop which yields more stable and steady -improvement. - -Reaches 0.93 train/test accuracy after 900 epochs -(which roughly corresponds to 1687500 steps in the original paper.) -''' - -from __future__ import print_function - -import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Activation -from keras.layers import SimpleRNN -from keras import initializers -from keras.optimizers import RMSprop - -batch_size = 32 -num_classes = 10 -epochs = 200 -hidden_units = 100 - -learning_rate = 1e-6 -clip_norm = 1.0 - -# the data, split between train and test sets -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -x_train = x_train.reshape(x_train.shape[0], -1, 1) -x_test = x_test.reshape(x_test.shape[0], -1, 1) -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# convert class vectors to binary class matrices -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - -print('Evaluate IRNN...') -model = Sequential() -model.add(SimpleRNN(hidden_units, - kernel_initializer=initializers.RandomNormal(stddev=0.001), - recurrent_initializer=initializers.Identity(gain=1.0), - activation='relu', - input_shape=x_train.shape[1:])) -model.add(Dense(num_classes)) -model.add(Activation('softmax')) -rmsprop = RMSprop(lr=learning_rate) -model.compile(loss='categorical_crossentropy', - optimizer=rmsprop, - metrics=['accuracy']) - -model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - verbose=1, - validation_data=(x_test, y_test)) - -scores = model.evaluate(x_test, y_test, verbose=0) -print('IRNN test score:', scores[0]) -print('IRNN test accuracy:', scores[1]) -'''Trains a simple deep NN on the MNIST dataset. - -Gets to 98.40% test accuracy after 20 epochs -(there is *a lot* of margin for parameter tuning). -2 seconds per epoch on a K520 GPU. -''' - -from __future__ import print_function - -import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout -from keras.optimizers import RMSprop - -batch_size = 128 -num_classes = 10 -epochs = 20 - -# the data, split between train and test sets -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -x_train = x_train.reshape(60000, 784) -x_test = x_test.reshape(10000, 784) -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# convert class vectors to binary class matrices -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - -model = Sequential() -model.add(Dense(512, activation='relu', input_shape=(784,))) -model.add(Dropout(0.2)) -model.add(Dense(512, activation='relu')) -model.add(Dropout(0.2)) -model.add(Dense(num_classes, activation='softmax')) - -model.summary() - -model.compile(loss='categorical_crossentropy', - optimizer=RMSprop(), - metrics=['accuracy']) - -history = model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - verbose=1, - validation_data=(x_test, y_test)) -score = model.evaluate(x_test, y_test, verbose=0) -print('Test loss:', score[0]) -print('Test accuracy:', score[1]) -'''This is an implementation of Net2Net experiment with MNIST in -'Net2Net: Accelerating Learning via Knowledge Transfer' -by Tianqi Chen, Ian Goodfellow, and Jonathon Shlens - -arXiv:1511.05641v4 [cs.LG] 23 Apr 2016 -http://arxiv.org/abs/1511.05641 - -# Notes - -- What: - + Net2Net is a group of methods to transfer knowledge from a teacher neural - net to a student net,so that the student net can be trained faster than - from scratch. - + The paper discussed two specific methods of Net2Net, i.e. Net2WiderNet - and Net2DeeperNet. - + Net2WiderNet replaces a model with an equivalent wider model that has - more units in each hidden layer. - + Net2DeeperNet replaces a model with an equivalent deeper model. - + Both are based on the idea of 'function-preserving transformations of - neural nets'. -- Why: - + Enable fast exploration of multiple neural nets in experimentation and - design process,by creating a series of wider and deeper models with - transferable knowledge. - + Enable 'lifelong learning system' by gradually adjusting model complexity - to data availability,and reusing transferable knowledge. - -# Experiments - -- Teacher model: a basic CNN model trained on MNIST for 3 epochs. -- Net2WiderNet experiment: - + Student model has a wider Conv2D layer and a wider FC layer. - + Comparison of 'random-padding' vs 'net2wider' weight initialization. - + With both methods, after 1 epoch, student model should perform as well as - teacher model, but 'net2wider' is slightly better. -- Net2DeeperNet experiment: - + Student model has an extra Conv2D layer and an extra FC layer. - + Comparison of 'random-init' vs 'net2deeper' weight initialization. - + After 1 epoch, performance of 'net2deeper' is better than 'random-init'. -- Hyper-parameters: - + SGD with momentum=0.9 is used for training teacher and student models. - + Learning rate adjustment: it's suggested to reduce learning rate - to 1/10 for student model. - + Addition of noise in 'net2wider' is used to break weight symmetry - and thus enable full capacity of student models. It is optional - when a Dropout layer is used. - -# Results - -- Tested with TF backend and 'channels_last' image_data_format. -- Running on GPU GeForce GTX Titan X Maxwell -- Performance Comparisons - validation loss values during first 3 epochs: - -Teacher model ... -(0) teacher_model: 0.0537 0.0354 0.0356 - -Experiment of Net2WiderNet ... -(1) wider_random_pad: 0.0320 0.0317 0.0289 -(2) wider_net2wider: 0.0271 0.0274 0.0270 - -Experiment of Net2DeeperNet ... -(3) deeper_random_init: 0.0682 0.0506 0.0468 -(4) deeper_net2deeper: 0.0292 0.0294 0.0286 -''' - -from __future__ import print_function -import numpy as np -import keras -from keras import backend as K -from keras.models import Sequential -from keras.layers import Conv2D, MaxPooling2D, Dense, Flatten -from keras.optimizers import SGD -from keras.datasets import mnist - -if K.image_data_format() == 'channels_first': - input_shape = (1, 28, 28) # image shape -else: - input_shape = (28, 28, 1) # image shape -num_classes = 10 # number of classes -epochs = 3 - - -# load and pre-process data -def preprocess_input(x): - return x.astype('float32').reshape((-1,) + input_shape) / 255 - - -def preprocess_output(y): - return keras.utils.to_categorical(y) - - -(x_train, y_train), (x_test, y_test) = mnist.load_data() -x_train, x_test = map(preprocess_input, [x_train, x_test]) -y_train, y_test = map(preprocess_output, [y_train, y_test]) -print('Loading MNIST data...') -print('x_train shape:', x_train.shape, 'y_train shape:', y_train.shape) -print('x_test shape:', x_test.shape, 'y_test shape', y_test.shape) - - -# knowledge transfer algorithms -def wider2net_conv2d(teacher_w1, teacher_b1, teacher_w2, new_width, init): - '''Get initial weights for a wider conv2d layer with a bigger filters, - by 'random-padding' or 'net2wider'. - - # Arguments - teacher_w1: `weight` of conv2d layer to become wider, - of shape (filters1, num_channel1, kh1, kw1) - teacher_b1: `bias` of conv2d layer to become wider, - of shape (filters1, ) - teacher_w2: `weight` of next connected conv2d layer, - of shape (filters2, num_channel2, kh2, kw2) - new_width: new `filters` for the wider conv2d layer - init: initialization algorithm for new weights, - either 'random-pad' or 'net2wider' - ''' - assert teacher_w1.shape[0] == teacher_w2.shape[1], ( - 'successive layers from teacher model should have compatible shapes') - assert teacher_w1.shape[3] == teacher_b1.shape[0], ( - 'weight and bias from same layer should have compatible shapes') - assert new_width > teacher_w1.shape[3], ( - 'new width (filters) should be bigger than the existing one') - - n = new_width - teacher_w1.shape[3] - if init == 'random-pad': - new_w1 = np.random.normal(0, 0.1, size=teacher_w1.shape[:3] + (n,)) - new_b1 = np.ones(n) * 0.1 - new_w2 = np.random.normal( - 0, 0.1, - size=teacher_w2.shape[:2] + (n, teacher_w2.shape[3])) - elif init == 'net2wider': - index = np.random.randint(teacher_w1.shape[3], size=n) - factors = np.bincount(index)[index] + 1. - new_w1 = teacher_w1[:, :, :, index] - new_b1 = teacher_b1[index] - new_w2 = teacher_w2[:, :, index, :] / factors.reshape((1, 1, -1, 1)) - else: - raise ValueError('Unsupported weight initializer: %s' % init) - - student_w1 = np.concatenate((teacher_w1, new_w1), axis=3) - if init == 'random-pad': - student_w2 = np.concatenate((teacher_w2, new_w2), axis=2) - elif init == 'net2wider': - # add small noise to break symmetry, so that student model will have - # full capacity later - noise = np.random.normal(0, 5e-2 * new_w2.std(), size=new_w2.shape) - student_w2 = np.concatenate((teacher_w2, new_w2 + noise), axis=2) - student_w2[:, :, index, :] = new_w2 - student_b1 = np.concatenate((teacher_b1, new_b1), axis=0) - - return student_w1, student_b1, student_w2 - - -def wider2net_fc(teacher_w1, teacher_b1, teacher_w2, new_width, init): - '''Get initial weights for a wider fully connected (dense) layer - with a bigger nout, by 'random-padding' or 'net2wider'. - - # Arguments - teacher_w1: `weight` of fc layer to become wider, - of shape (nin1, nout1) - teacher_b1: `bias` of fc layer to become wider, - of shape (nout1, ) - teacher_w2: `weight` of next connected fc layer, - of shape (nin2, nout2) - new_width: new `nout` for the wider fc layer - init: initialization algorithm for new weights, - either 'random-pad' or 'net2wider' - ''' - assert teacher_w1.shape[1] == teacher_w2.shape[0], ( - 'successive layers from teacher model should have compatible shapes') - assert teacher_w1.shape[1] == teacher_b1.shape[0], ( - 'weight and bias from same layer should have compatible shapes') - assert new_width > teacher_w1.shape[1], ( - 'new width (nout) should be bigger than the existing one') - - n = new_width - teacher_w1.shape[1] - if init == 'random-pad': - new_w1 = np.random.normal(0, 0.1, size=(teacher_w1.shape[0], n)) - new_b1 = np.ones(n) * 0.1 - new_w2 = np.random.normal(0, 0.1, size=(n, teacher_w2.shape[1])) - elif init == 'net2wider': - index = np.random.randint(teacher_w1.shape[1], size=n) - factors = np.bincount(index)[index] + 1. - new_w1 = teacher_w1[:, index] - new_b1 = teacher_b1[index] - new_w2 = teacher_w2[index, :] / factors[:, np.newaxis] - else: - raise ValueError('Unsupported weight initializer: %s' % init) - - student_w1 = np.concatenate((teacher_w1, new_w1), axis=1) - if init == 'random-pad': - student_w2 = np.concatenate((teacher_w2, new_w2), axis=0) - elif init == 'net2wider': - # add small noise to break symmetry, so that student model will have - # full capacity later - noise = np.random.normal(0, 5e-2 * new_w2.std(), size=new_w2.shape) - student_w2 = np.concatenate((teacher_w2, new_w2 + noise), axis=0) - student_w2[index, :] = new_w2 - student_b1 = np.concatenate((teacher_b1, new_b1), axis=0) - - return student_w1, student_b1, student_w2 - - -def deeper2net_conv2d(teacher_w): - '''Get initial weights for a deeper conv2d layer by net2deeper'. - - # Arguments - teacher_w: `weight` of previous conv2d layer, - of shape (kh, kw, num_channel, filters) - ''' - kh, kw, num_channel, filters = teacher_w.shape - student_w = np.zeros_like(teacher_w) - for i in range(filters): - student_w[(kh - 1) // 2, (kw - 1) // 2, i, i] = 1. - student_b = np.zeros(filters) - return student_w, student_b - - -def copy_weights(teacher_model, student_model, layer_names): - '''Copy weights from teacher_model to student_model, - for layers with names listed in layer_names - ''' - for name in layer_names: - weights = teacher_model.get_layer(name=name).get_weights() - student_model.get_layer(name=name).set_weights(weights) - - -# methods to construct teacher_model and student_models -def make_teacher_model(x_train, y_train, - x_test, y_test, - epochs): - '''Train and benchmark performance of a simple CNN. - (0) Teacher model - ''' - model = Sequential() - model.add(Conv2D(64, 3, input_shape=input_shape, - padding='same', name='conv1')) - model.add(MaxPooling2D(2, name='pool1')) - model.add(Conv2D(64, 3, padding='same', name='conv2')) - model.add(MaxPooling2D(2, name='pool2')) - model.add(Flatten(name='flatten')) - model.add(Dense(64, activation='relu', name='fc1')) - model.add(Dense(num_classes, activation='softmax', name='fc2')) - model.compile(loss='categorical_crossentropy', - optimizer=SGD(lr=0.01, momentum=0.9), - metrics=['accuracy']) - - model.fit(x_train, y_train, - epochs=epochs, - validation_data=(x_test, y_test)) - return model - - -def make_wider_student_model(teacher_model, - x_train, y_train, - x_test, y_test, - init, epochs): - '''Train a wider student model based on teacher_model, - with either 'random-pad' (baseline) or 'net2wider' - ''' - new_conv1_width = 128 - new_fc1_width = 128 - - model = Sequential() - # a wider conv1 compared to teacher_model - model.add(Conv2D(new_conv1_width, 3, input_shape=input_shape, - padding='same', name='conv1')) - model.add(MaxPooling2D(2, name='pool1')) - model.add(Conv2D(64, 3, padding='same', name='conv2')) - model.add(MaxPooling2D(2, name='pool2')) - model.add(Flatten(name='flatten')) - # a wider fc1 compared to teacher model - model.add(Dense(new_fc1_width, activation='relu', name='fc1')) - model.add(Dense(num_classes, activation='softmax', name='fc2')) - - # The weights for other layers need to be copied from teacher_model - # to student_model, except for widened layers - # and their immediate downstreams, which will be initialized separately. - # For this example there are no other layers that need to be copied. - - w_conv1, b_conv1 = teacher_model.get_layer('conv1').get_weights() - w_conv2, b_conv2 = teacher_model.get_layer('conv2').get_weights() - new_w_conv1, new_b_conv1, new_w_conv2 = wider2net_conv2d( - w_conv1, b_conv1, w_conv2, new_conv1_width, init) - model.get_layer('conv1').set_weights([new_w_conv1, new_b_conv1]) - model.get_layer('conv2').set_weights([new_w_conv2, b_conv2]) - - w_fc1, b_fc1 = teacher_model.get_layer('fc1').get_weights() - w_fc2, b_fc2 = teacher_model.get_layer('fc2').get_weights() - new_w_fc1, new_b_fc1, new_w_fc2 = wider2net_fc( - w_fc1, b_fc1, w_fc2, new_fc1_width, init) - model.get_layer('fc1').set_weights([new_w_fc1, new_b_fc1]) - model.get_layer('fc2').set_weights([new_w_fc2, b_fc2]) - - model.compile(loss='categorical_crossentropy', - optimizer=SGD(lr=0.001, momentum=0.9), - metrics=['accuracy']) - - model.fit(x_train, y_train, - epochs=epochs, - validation_data=(x_test, y_test)) - - -def make_deeper_student_model(teacher_model, - x_train, y_train, - x_test, y_test, - init, epochs): - '''Train a deeper student model based on teacher_model, - with either 'random-init' (baseline) or 'net2deeper' - ''' - model = Sequential() - model.add(Conv2D(64, 3, input_shape=input_shape, - padding='same', name='conv1')) - model.add(MaxPooling2D(2, name='pool1')) - model.add(Conv2D(64, 3, padding='same', name='conv2')) - # add another conv2d layer to make original conv2 deeper - if init == 'net2deeper': - prev_w, _ = model.get_layer('conv2').get_weights() - new_weights = deeper2net_conv2d(prev_w) - model.add(Conv2D(64, 3, padding='same', - name='conv2-deeper', weights=new_weights)) - elif init == 'random-init': - model.add(Conv2D(64, 3, padding='same', name='conv2-deeper')) - else: - raise ValueError('Unsupported weight initializer: %s' % init) - model.add(MaxPooling2D(2, name='pool2')) - model.add(Flatten(name='flatten')) - model.add(Dense(64, activation='relu', name='fc1')) - # add another fc layer to make original fc1 deeper - if init == 'net2deeper': - # net2deeper for fc layer with relu, is just an identity initializer - model.add(Dense(64, kernel_initializer='identity', - activation='relu', name='fc1-deeper')) - elif init == 'random-init': - model.add(Dense(64, activation='relu', name='fc1-deeper')) - else: - raise ValueError('Unsupported weight initializer: %s' % init) - model.add(Dense(num_classes, activation='softmax', name='fc2')) - - # copy weights for other layers - copy_weights(teacher_model, model, layer_names=[ - 'conv1', 'conv2', 'fc1', 'fc2']) - - model.compile(loss='categorical_crossentropy', - optimizer=SGD(lr=0.001, momentum=0.9), - metrics=['accuracy']) - - model.fit(x_train, y_train, - epochs=epochs, - validation_data=(x_test, y_test)) - - -# experiments setup -def net2wider_experiment(): - '''Benchmark performances of - (1) a wider student model with `random_pad` initializer - (2) a wider student model with `Net2WiderNet` initializer - ''' - print('\nExperiment of Net2WiderNet ...') - - print('\n(1) building wider student model by random padding ...') - make_wider_student_model(teacher_model, - x_train, y_train, - x_test, y_test, - init='random-pad', - epochs=epochs) - print('\n(2) building wider student model by net2wider ...') - make_wider_student_model(teacher_model, - x_train, y_train, - x_test, y_test, - init='net2wider', - epochs=epochs) - - -def net2deeper_experiment(): - '''Benchmark performances of - (3) a deeper student model with `random_init` initializer - (4) a deeper student model with `Net2DeeperNet` initializer - ''' - print('\nExperiment of Net2DeeperNet ...') - - print('\n(3) building deeper student model by random init ...') - make_deeper_student_model(teacher_model, - x_train, y_train, - x_test, y_test, - init='random-init', - epochs=epochs) - print('\n(4) building deeper student model by net2deeper ...') - make_deeper_student_model(teacher_model, - x_train, y_train, - x_test, y_test, - init='net2deeper', - epochs=epochs) - - -print('\n(0) building teacher model ...') -teacher_model = make_teacher_model(x_train, y_train, - x_test, y_test, - epochs=epochs) - -# run the experiments -net2wider_experiment() -net2deeper_experiment() -'''Trains a Siamese MLP on pairs of digits from the MNIST dataset. - -It follows Hadsell-et-al.'06 [1] by computing the Euclidean distance on the -output of the shared network and by optimizing the contrastive loss (see paper -for more details). - -# References - -- Dimensionality Reduction by Learning an Invariant Mapping - http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf - -Gets to 97.2% test accuracy after 20 epochs. -2 seconds per epoch on a Titan X Maxwell GPU -''' -from __future__ import absolute_import -from __future__ import print_function -import numpy as np - -import random -from keras.datasets import mnist -from keras.models import Model -from keras.layers import Input, Flatten, Dense, Dropout, Lambda -from keras.optimizers import RMSprop -from keras import backend as K - -num_classes = 10 -epochs = 20 - - -def euclidean_distance(vects): - x, y = vects - sum_square = K.sum(K.square(x - y), axis=1, keepdims=True) - return K.sqrt(K.maximum(sum_square, K.epsilon())) - - -def eucl_dist_output_shape(shapes): - shape1, shape2 = shapes - return (shape1[0], 1) - - -def contrastive_loss(y_true, y_pred): - '''Contrastive loss from Hadsell-et-al.'06 - http://yann.lecun.com/exdb/publis/pdf/hadsell-chopra-lecun-06.pdf - ''' - margin = 1 - square_pred = K.square(y_pred) - margin_square = K.square(K.maximum(margin - y_pred, 0)) - return K.mean(y_true * square_pred + (1 - y_true) * margin_square) - - -def create_pairs(x, digit_indices): - '''Positive and negative pair creation. - Alternates between positive and negative pairs. - ''' - pairs = [] - labels = [] - n = min([len(digit_indices[d]) for d in range(num_classes)]) - 1 - for d in range(num_classes): - for i in range(n): - z1, z2 = digit_indices[d][i], digit_indices[d][i + 1] - pairs += [[x[z1], x[z2]]] - inc = random.randrange(1, num_classes) - dn = (d + inc) % num_classes - z1, z2 = digit_indices[d][i], digit_indices[dn][i] - pairs += [[x[z1], x[z2]]] - labels += [1, 0] - return np.array(pairs), np.array(labels) - - -def create_base_network(input_shape): - '''Base network to be shared (eq. to feature extraction). - ''' - input = Input(shape=input_shape) - x = Flatten()(input) - x = Dense(128, activation='relu')(x) - x = Dropout(0.1)(x) - x = Dense(128, activation='relu')(x) - x = Dropout(0.1)(x) - x = Dense(128, activation='relu')(x) - return Model(input, x) - - -def compute_accuracy(y_true, y_pred): - '''Compute classification accuracy with a fixed threshold on distances. - ''' - pred = y_pred.ravel() < 0.5 - return np.mean(pred == y_true) - - -def accuracy(y_true, y_pred): - '''Compute classification accuracy with a fixed threshold on distances. - ''' - return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype))) - - -# the data, split between train and test sets -(x_train, y_train), (x_test, y_test) = mnist.load_data() -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 -input_shape = x_train.shape[1:] - -# create training+test positive and negative pairs -digit_indices = [np.where(y_train == i)[0] for i in range(num_classes)] -tr_pairs, tr_y = create_pairs(x_train, digit_indices) - -digit_indices = [np.where(y_test == i)[0] for i in range(num_classes)] -te_pairs, te_y = create_pairs(x_test, digit_indices) - -# network definition -base_network = create_base_network(input_shape) - -input_a = Input(shape=input_shape) -input_b = Input(shape=input_shape) - -# because we re-use the same instance `base_network`, -# the weights of the network -# will be shared across the two branches -processed_a = base_network(input_a) -processed_b = base_network(input_b) - -distance = Lambda(euclidean_distance, - output_shape=eucl_dist_output_shape)([processed_a, processed_b]) - -model = Model([input_a, input_b], distance) - -# train -rms = RMSprop() -model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy]) -model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, - batch_size=128, - epochs=epochs, - validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)) - -# compute final accuracy on training and test sets -y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) -tr_acc = compute_accuracy(tr_y, y_pred) -y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) -te_acc = compute_accuracy(te_y, y_pred) - -print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc)) -print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) -'''Example of how to use sklearn wrapper - -Builds simple CNN models on MNIST and uses sklearn's GridSearchCV to find best model -''' - -from __future__ import print_function - -import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Conv2D, MaxPooling2D -from keras.wrappers.scikit_learn import KerasClassifier -from keras import backend as K -from sklearn.model_selection import GridSearchCV - - -num_classes = 10 - -# input image dimensions -img_rows, img_cols = 28, 28 - -# load training data and do basic data normalization -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -if K.image_data_format() == 'channels_first': - x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) - x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) -else: - x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) - x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) - input_shape = (img_rows, img_cols, 1) - -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 - -# convert class vectors to binary class matrices -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - - -def make_model(dense_layer_sizes, filters, kernel_size, pool_size): - '''Creates model comprised of 2 convolutional layers followed by dense layers - - dense_layer_sizes: List of layer sizes. - This list has one number for each layer - filters: Number of convolutional filters in each convolutional layer - kernel_size: Convolutional kernel size - pool_size: Size of pooling area for max pooling - ''' - - model = Sequential() - model.add(Conv2D(filters, kernel_size, - padding='valid', - input_shape=input_shape)) - model.add(Activation('relu')) - model.add(Conv2D(filters, kernel_size)) - model.add(Activation('relu')) - model.add(MaxPooling2D(pool_size=pool_size)) - model.add(Dropout(0.25)) - - model.add(Flatten()) - for layer_size in dense_layer_sizes: - model.add(Dense(layer_size)) - model.add(Activation('relu')) - model.add(Dropout(0.5)) - model.add(Dense(num_classes)) - model.add(Activation('softmax')) - - model.compile(loss='categorical_crossentropy', - optimizer='adadelta', - metrics=['accuracy']) - - return model - - -dense_size_candidates = [[32], [64], [32, 32], [64, 64]] -my_classifier = KerasClassifier(make_model, batch_size=32) -validator = GridSearchCV(my_classifier, - param_grid={'dense_layer_sizes': dense_size_candidates, - # epochs is avail for tuning even when not - # an argument to model building function - 'epochs': [3, 6], - 'filters': [8], - 'kernel_size': [3], - 'pool_size': [2]}, - scoring='neg_log_loss', - n_jobs=1) -validator.fit(x_train, y_train) - -print('The parameters of the best model are: ') -print(validator.best_params_) - -# validator.best_estimator_ returns sklearn-wrapped version of best model. -# validator.best_estimator_.model returns the (unwrapped) keras model -best_model = validator.best_estimator_.model -metric_names = best_model.metrics_names -metric_values = best_model.evaluate(x_test, y_test) -for metric, value in zip(metric_names, metric_values): - print(metric, ': ', value) -'''Trains a stacked what-where autoencoder built on residual blocks on the -MNIST dataset. It exemplifies two influential methods that have been developed -in the past few years. - -The first is the idea of properly 'unpooling.' During any max pool, the -exact location (the 'where') of the maximal value in a pooled receptive field -is lost, however it can be very useful in the overall reconstruction of an -input image. Therefore, if the 'where' is handed from the encoder -to the corresponding decoder layer, features being decoded can be 'placed' in -the right location, allowing for reconstructions of much higher fidelity. - -# References - -- Visualizing and Understanding Convolutional Networks - Matthew D Zeiler, Rob Fergus - https://arxiv.org/abs/1311.2901v3 -- Stacked What-Where Auto-encoders - Junbo Zhao, Michael Mathieu, Ross Goroshin, Yann LeCun - https://arxiv.org/abs/1506.02351v8 - -The second idea exploited here is that of residual learning. Residual blocks -ease the training process by allowing skip connections that give the network -the ability to be as linear (or non-linear) as the data sees fit. This allows -for much deep networks to be easily trained. The residual element seems to -be advantageous in the context of this example as it allows a nice symmetry -between the encoder and decoder. Normally, in the decoder, the final -projection to the space where the image is reconstructed is linear, however -this does not have to be the case for a residual block as the degree to which -its output is linear or non-linear is determined by the data it is fed. -However, in order to cap the reconstruction in this example, a hard softmax is -applied as a bias because we know the MNIST digits are mapped to [0, 1]. - -# References -- Deep Residual Learning for Image Recognition - Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - https://arxiv.org/abs/1512.03385v1 -- Identity Mappings in Deep Residual Networks - Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - https://arxiv.org/abs/1603.05027v3 -''' -from __future__ import print_function -import numpy as np - -from keras.datasets import mnist -from keras.models import Model -from keras.layers import Activation -from keras.layers import UpSampling2D, Conv2D, MaxPooling2D -from keras.layers import Input, BatchNormalization, ELU -import matplotlib.pyplot as plt -import keras.backend as K -from keras import layers - - -def convresblock(x, nfeats=8, ksize=3, nskipped=2, elu=True): - """The proposed residual block from [4]. - - Running with elu=True will use ELU nonlinearity and running with - elu=False will use BatchNorm + RELU nonlinearity. While ELU's are fast - due to the fact they do not suffer from BatchNorm overhead, they may - overfit because they do not offer the stochastic element of the batch - formation process of BatchNorm, which acts as a good regularizer. - - # Arguments - x: 4D tensor, the tensor to feed through the block - nfeats: Integer, number of feature maps for conv layers. - ksize: Integer, width and height of conv kernels in first convolution. - nskipped: Integer, number of conv layers for the residual function. - elu: Boolean, whether to use ELU or BN+RELU. - - # Input shape - 4D tensor with shape: - `(batch, channels, rows, cols)` - - # Output shape - 4D tensor with shape: - `(batch, filters, rows, cols)` - """ - y0 = Conv2D(nfeats, ksize, padding='same')(x) - y = y0 - for i in range(nskipped): - if elu: - y = ELU()(y) - else: - y = BatchNormalization(axis=1)(y) - y = Activation('relu')(y) - y = Conv2D(nfeats, 1, padding='same')(y) - return layers.add([y0, y]) - - -def getwhere(x): - ''' Calculate the 'where' mask that contains switches indicating which - index contained the max value when MaxPool2D was applied. Using the - gradient of the sum is a nice trick to keep everything high level.''' - y_prepool, y_postpool = x - return K.gradients(K.sum(y_postpool), y_prepool) - - -if K.backend() == 'tensorflow': - raise RuntimeError('This example can only run with the ' - 'Theano backend for the time being, ' - 'because it requires taking the gradient ' - 'of a gradient, which isn\'t ' - 'supported for all TensorFlow ops.') - -# This example assume 'channels_first' data format. -K.set_image_data_format('channels_first') - -# input image dimensions -img_rows, img_cols = 28, 28 - -# the data, split between train and test sets -(x_train, _), (x_test, _) = mnist.load_data() - -x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) -x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# The size of the kernel used for the MaxPooling2D -pool_size = 2 -# The total number of feature maps at each layer -nfeats = [8, 16, 32, 64, 128] -# The sizes of the pooling kernel at each layer -pool_sizes = np.array([1, 1, 1, 1, 1]) * pool_size -# The convolution kernel size -ksize = 3 -# Number of epochs to train for -epochs = 5 -# Batch size during training -batch_size = 128 - -if pool_size == 2: - # if using a 5 layer net of pool_size = 2 - x_train = np.pad(x_train, [[0, 0], [0, 0], [2, 2], [2, 2]], - mode='constant') - x_test = np.pad(x_test, [[0, 0], [0, 0], [2, 2], [2, 2]], mode='constant') - nlayers = 5 -elif pool_size == 3: - # if using a 3 layer net of pool_size = 3 - x_train = x_train[:, :, :-1, :-1] - x_test = x_test[:, :, :-1, :-1] - nlayers = 3 -else: - import sys - sys.exit('Script supports pool_size of 2 and 3.') - -# Shape of input to train on (note that model is fully convolutional however) -input_shape = x_train.shape[1:] -# The final list of the size of axis=1 for all layers, including input -nfeats_all = [input_shape[0]] + nfeats - -# First build the encoder, all the while keeping track of the 'where' masks -img_input = Input(shape=input_shape) - -# We push the 'where' masks to the following list -wheres = [None] * nlayers -y = img_input -for i in range(nlayers): - y_prepool = convresblock(y, nfeats=nfeats_all[i + 1], ksize=ksize) - y = MaxPooling2D(pool_size=(pool_sizes[i], pool_sizes[i]))(y_prepool) - wheres[i] = layers.Lambda( - getwhere, output_shape=lambda x: x[0])([y_prepool, y]) - -# Now build the decoder, and use the stored 'where' masks to place the features -for i in range(nlayers): - ind = nlayers - 1 - i - y = UpSampling2D(size=(pool_sizes[ind], pool_sizes[ind]))(y) - y = layers.multiply([y, wheres[ind]]) - y = convresblock(y, nfeats=nfeats_all[ind], ksize=ksize) - -# Use hard_simgoid to clip range of reconstruction -y = Activation('hard_sigmoid')(y) - -# Define the model and it's mean square error loss, and compile it with Adam -model = Model(img_input, y) -model.compile('adam', 'mse') - -# Fit the model -model.fit(x_train, x_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, x_test)) - -# Plot -x_recon = model.predict(x_test[:25]) -x_plot = np.concatenate((x_test[:25], x_recon), axis=1) -x_plot = x_plot.reshape((5, 10, input_shape[-2], input_shape[-1])) -x_plot = np.vstack([np.hstack(x) for x in x_plot]) -plt.figure() -plt.axis('off') -plt.title('Test Samples: Originals/Reconstructions') -plt.imshow(x_plot, interpolation='none', cmap='gray') -plt.savefig('reconstructions.png') -'''MNIST dataset with TFRecords, the standard TensorFlow data format. - -TFRecord is a data format supported throughout TensorFlow. -This example demonstrates how to load TFRecord data using -Input Tensors. Input Tensors differ from the normal Keras -workflow because instead of fitting to data loaded into a -a numpy array, data is supplied via a special tensor that -reads data from nodes that are wired directly into model -graph with the `Input(tensor=input_tensor)` parameter. - -There are several advantages to using Input Tensors. -First, if a dataset is already in TFRecord format you -can load and train on that data directly in Keras. -Second, extended backend API capabilities such as TensorFlow -data augmentation is easy to integrate directly into your -Keras training scripts via input tensors. -Third, TensorFlow implements several data APIs for -TFRecords, some of which provide significantly faster -training performance than numpy arrays can provide because -they run via the C++ backend. Please note that this -example is tailored for brevity and clarity and not -to demonstrate performance or augmentation capabilities. - -Input Tensors also have important disadvantages. In -particular, Input Tensors are fixed at model construction -because rewiring networks is not yet supported. -For this reason, changing the data input source means -model weights must be saved and the model rebuilt -from scratch to connect the new input data. -validation cannot currently be performed as training -progresses, and must be performed after training completes. -This example demonstrates how to train with input -tensors, save the model weights, and then evaluate the -model using the numpy based Keras API. - -Gets to ~99.1% test accuracy after 5 epochs -(high variance from run to run: 98.9-99.3). -''' -import numpy as np -import os -import tensorflow as tf -import keras -from keras import backend as K -from keras import layers -from keras.callbacks import Callback - -from tensorflow.contrib.learn.python.learn.datasets import mnist - -if K.backend() != 'tensorflow': - raise RuntimeError('This example can only run with the ' - 'TensorFlow backend, ' - 'because it requires TFRecords, which ' - 'are not supported on other platforms.') - - -class EvaluateInputTensor(Callback): - """ Validate a model which does not expect external numpy data during training. - - Keras does not expect external numpy data at training time, and thus cannot - accept numpy arrays for validation when all of a Keras Model's - `Input(input_tensor)` layers are provided an `input_tensor` parameter, - and the call to `Model.compile(target_tensors)` defines all `target_tensors`. - Instead, create a second model for validation which is also configured - with input tensors and add it to the `EvaluateInputTensor` callback - to perform validation. - - It is recommended that this callback be the first in the list of callbacks - because it defines the validation variables required by many other callbacks, - and Callbacks are made in order. - - # Arguments - model: Keras model on which to call model.evaluate(). - steps: Integer or `None`. - Total number of steps (batches of samples) - before declaring the evaluation round finished. - Ignored with the default value of `None`. - """ - - def __init__(self, model, steps, metrics_prefix='val', verbose=1): - # parameter of callbacks passed during initialization - # pass evalation mode directly - super(EvaluateInputTensor, self).__init__() - self.val_model = model - self.num_steps = steps - self.verbose = verbose - self.metrics_prefix = metrics_prefix - - def on_epoch_end(self, epoch, logs={}): - self.val_model.set_weights(self.model.get_weights()) - results = self.val_model.evaluate(None, None, steps=int(self.num_steps), - verbose=self.verbose) - metrics_str = '\n' - for result, name in zip(results, self.val_model.metrics_names): - metric_name = self.metrics_prefix + '_' + name - logs[metric_name] = result - if self.verbose > 0: - metrics_str = metrics_str + \ - metric_name + ': ' + str(result) + ' ' - - if self.verbose > 0: - print(metrics_str) - - -def cnn_layers(x_train_input): - x = layers.Conv2D(32, (3, 3), - activation='relu', padding='valid')(x_train_input) - x = layers.MaxPooling2D(pool_size=(2, 2))(x) - x = layers.Conv2D(64, (3, 3), activation='relu')(x) - x = layers.MaxPooling2D(pool_size=(2, 2))(x) - x = layers.Flatten()(x) - x = layers.Dense(512, activation='relu')(x) - x = layers.Dropout(0.5)(x) - x_train_out = layers.Dense(num_classes, - activation='softmax', - name='x_train_out')(x) - return x_train_out - - -sess = K.get_session() - -batch_size = 100 -batch_shape = (batch_size, 28, 28, 1) -epochs = 5 -num_classes = 10 - -# The capacity variable controls the maximum queue size -# allowed when prefetching data for training. -capacity = 10000 - -# min_after_dequeue is the minimum number elements in the queue -# after a dequeue, which ensures sufficient mixing of elements. -min_after_dequeue = 3000 - -# If `enqueue_many` is `False`, `tensors` is assumed to represent a -# single example. An input tensor with shape `[x, y, z]` will be output -# as a tensor with shape `[batch_size, x, y, z]`. -# -# If `enqueue_many` is `True`, `tensors` is assumed to represent a -# batch of examples, where the first dimension is indexed by example, -# and all members of `tensors` should have the same size in the -# first dimension. If an input tensor has shape `[*, x, y, z]`, the -# output will have shape `[batch_size, x, y, z]`. -enqueue_many = True - -cache_dir = os.path.expanduser( - os.path.join('~', '.keras', 'datasets', 'MNIST-data')) -data = mnist.read_data_sets(cache_dir, validation_size=0) - -x_train_batch, y_train_batch = tf.train.shuffle_batch( - tensors=[data.train.images, data.train.labels.astype(np.int32)], - batch_size=batch_size, - capacity=capacity, - min_after_dequeue=min_after_dequeue, - enqueue_many=enqueue_many, - num_threads=8) - -x_train_batch = tf.cast(x_train_batch, tf.float32) -x_train_batch = tf.reshape(x_train_batch, shape=batch_shape) - -y_train_batch = tf.cast(y_train_batch, tf.int32) -y_train_batch = tf.one_hot(y_train_batch, num_classes) - -x_batch_shape = x_train_batch.get_shape().as_list() -y_batch_shape = y_train_batch.get_shape().as_list() - -model_input = layers.Input(tensor=x_train_batch) -model_output = cnn_layers(model_input) -train_model = keras.models.Model(inputs=model_input, outputs=model_output) - -# Pass the target tensor `y_train_batch` to `compile` -# via the `target_tensors` keyword argument: -train_model.compile(optimizer=keras.optimizers.RMSprop(lr=2e-3, decay=1e-5), - loss='categorical_crossentropy', - metrics=['accuracy'], - target_tensors=[y_train_batch]) -train_model.summary() - -x_test_batch, y_test_batch = tf.train.batch( - tensors=[data.test.images, data.test.labels.astype(np.int32)], - batch_size=batch_size, - capacity=capacity, - enqueue_many=enqueue_many, - num_threads=8) - -# Create a separate test model -# to perform validation during training -x_test_batch = tf.cast(x_test_batch, tf.float32) -x_test_batch = tf.reshape(x_test_batch, shape=batch_shape) - -y_test_batch = tf.cast(y_test_batch, tf.int32) -y_test_batch = tf.one_hot(y_test_batch, num_classes) - -x_test_batch_shape = x_test_batch.get_shape().as_list() -y_test_batch_shape = y_test_batch.get_shape().as_list() - -test_model_input = layers.Input(tensor=x_test_batch) -test_model_output = cnn_layers(test_model_input) -test_model = keras.models.Model( - inputs=test_model_input, outputs=test_model_output) - -# Pass the target tensor `y_test_batch` to `compile` -# via the `target_tensors` keyword argument: -test_model.compile(optimizer=keras.optimizers.RMSprop(lr=2e-3, decay=1e-5), - loss='categorical_crossentropy', - metrics=['accuracy'], - target_tensors=[y_test_batch]) - -# Fit the model using data from the TFRecord data tensors. -coord = tf.train.Coordinator() -threads = tf.train.start_queue_runners(sess, coord) - -train_model.fit( - epochs=epochs, - steps_per_epoch=int(np.ceil(data.train.num_examples / float(batch_size))), - callbacks=[EvaluateInputTensor(test_model, steps=100)]) - -# Save the model weights. -train_model.save_weights('saved_wt.h5') - -# Clean up the TF session. -coord.request_stop() -coord.join(threads) -K.clear_session() - -# Second Session to test loading trained model without tensors -x_test = np.reshape(data.test.images, (data.test.images.shape[0], 28, 28, 1)) -y_test = data.test.labels -x_test_inp = layers.Input(shape=(x_test.shape[1:])) -test_out = cnn_layers(x_test_inp) -test_model = keras.models.Model(inputs=x_test_inp, outputs=test_out) - -test_model.load_weights('saved_wt.h5') -test_model.compile(optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) -test_model.summary() - -loss, acc = test_model.evaluate(x_test, - keras.utils.to_categorical(y_test), - batch_size=batch_size) -print('\nTest accuracy: {0}'.format(acc)) -'''Transfer learning toy example. - -1 - Train a simple convnet on the MNIST dataset the first 5 digits [0..4]. -2 - Freeze convolutional layers and fine-tune dense layers - for the classification of digits [5..9]. - -Get to 99.8% test accuracy after 5 epochs -for the first five digits classifier -and 99.2% for the last five digits after transfer + fine-tuning. -''' - -from __future__ import print_function - -import datetime -import keras -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Conv2D, MaxPooling2D -from keras import backend as K - -now = datetime.datetime.now - -batch_size = 128 -num_classes = 5 -epochs = 5 - -# input image dimensions -img_rows, img_cols = 28, 28 -# number of convolutional filters to use -filters = 32 -# size of pooling area for max pooling -pool_size = 2 -# convolution kernel size -kernel_size = 3 - -if K.image_data_format() == 'channels_first': - input_shape = (1, img_rows, img_cols) -else: - input_shape = (img_rows, img_cols, 1) - - -def train_model(model, train, test, num_classes): - x_train = train[0].reshape((train[0].shape[0],) + input_shape) - x_test = test[0].reshape((test[0].shape[0],) + input_shape) - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train /= 255 - x_test /= 255 - print('x_train shape:', x_train.shape) - print(x_train.shape[0], 'train samples') - print(x_test.shape[0], 'test samples') - - # convert class vectors to binary class matrices - y_train = keras.utils.to_categorical(train[1], num_classes) - y_test = keras.utils.to_categorical(test[1], num_classes) - - model.compile(loss='categorical_crossentropy', - optimizer='adadelta', - metrics=['accuracy']) - - t = now() - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - verbose=1, - validation_data=(x_test, y_test)) - print('Training time: %s' % (now() - t)) - score = model.evaluate(x_test, y_test, verbose=0) - print('Test score:', score[0]) - print('Test accuracy:', score[1]) - - -# the data, split between train and test sets -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -# create two datasets one with digits below 5 and one with 5 and above -x_train_lt5 = x_train[y_train < 5] -y_train_lt5 = y_train[y_train < 5] -x_test_lt5 = x_test[y_test < 5] -y_test_lt5 = y_test[y_test < 5] - -x_train_gte5 = x_train[y_train >= 5] -y_train_gte5 = y_train[y_train >= 5] - 5 -x_test_gte5 = x_test[y_test >= 5] -y_test_gte5 = y_test[y_test >= 5] - 5 - -# define two groups of layers: feature (convolutions) and classification (dense) -feature_layers = [ - Conv2D(filters, kernel_size, - padding='valid', - input_shape=input_shape), - Activation('relu'), - Conv2D(filters, kernel_size), - Activation('relu'), - MaxPooling2D(pool_size=pool_size), - Dropout(0.25), - Flatten(), -] - -classification_layers = [ - Dense(128), - Activation('relu'), - Dropout(0.5), - Dense(num_classes), - Activation('softmax') -] - -# create complete model -model = Sequential(feature_layers + classification_layers) - -# train model for 5-digit classification [0..4] -train_model(model, - (x_train_lt5, y_train_lt5), - (x_test_lt5, y_test_lt5), num_classes) - -# freeze feature layers and rebuild model -for l in feature_layers: - l.trainable = False - -# transfer: train dense layers for new classification task [5..9] -train_model(model, - (x_train_gte5, y_train_gte5), - (x_test_gte5, y_test_gte5), num_classes) -'''Neural doodle with Keras - -# Script Usage - -## Arguments -``` ---nlabels: # of regions (colors) in mask images ---style-image: image to learn style from ---style-mask: semantic labels for style image ---target-mask: semantic labels for target image (your doodle) ---content-image: optional image to learn content from ---target-image-prefix: path prefix for generated target images -``` - -## Example 1: doodle using a style image, style mask -and target mask. -``` -python neural_doodle.py --nlabels 4 --style-image Monet/style.png \ ---style-mask Monet/style_mask.png --target-mask Monet/target_mask.png \ ---target-image-prefix generated/monet -``` - -## Example 2: doodle using a style image, style mask, -target mask and an optional content image. -``` -python neural_doodle.py --nlabels 4 --style-image Renoir/style.png \ ---style-mask Renoir/style_mask.png --target-mask Renoir/target_mask.png \ ---content-image Renoir/creek.jpg \ ---target-image-prefix generated/renoir -``` - -# References - -- [Dmitry Ulyanov's blog on fast-neural-doodle] - (http://dmitryulyanov.github.io/feed-forward-neural-doodle/) -- [Torch code for fast-neural-doodle] - (https://github.com/DmitryUlyanov/fast-neural-doodle) -- [Torch code for online-neural-doodle] - (https://github.com/DmitryUlyanov/online-neural-doodle) -- [Paper Texture Networks: Feed-forward Synthesis of Textures and Stylized Images] - (http://arxiv.org/abs/1603.03417) -- [Discussion on parameter tuning] - (https://github.com/keras-team/keras/issues/3705) - -# Resources - -Example images can be downloaded from -https://github.com/DmitryUlyanov/fast-neural-doodle/tree/master/data -''' -from __future__ import print_function -import time -import argparse -import numpy as np -from scipy.optimize import fmin_l_bfgs_b - -from keras import backend as K -from keras.layers import Input, AveragePooling2D -from keras.models import Model -from keras.preprocessing.image import load_img, save_img, img_to_array -from keras.applications import vgg19 - -# Command line arguments -parser = argparse.ArgumentParser(description='Keras neural doodle example') -parser.add_argument('--nlabels', type=int, - help='number of semantic labels' - ' (regions in differnet colors)' - ' in style_mask/target_mask') -parser.add_argument('--style-image', type=str, - help='path to image to learn style from') -parser.add_argument('--style-mask', type=str, - help='path to semantic mask of style image') -parser.add_argument('--target-mask', type=str, - help='path to semantic mask of target image') -parser.add_argument('--content-image', type=str, default=None, - help='path to optional content image') -parser.add_argument('--target-image-prefix', type=str, - help='path prefix for generated results') -args = parser.parse_args() - -style_img_path = args.style_image -style_mask_path = args.style_mask -target_mask_path = args.target_mask -content_img_path = args.content_image -target_img_prefix = args.target_image_prefix -use_content_img = content_img_path is not None - -num_labels = args.nlabels -num_colors = 3 # RGB -# determine image sizes based on target_mask -ref_img = img_to_array(load_img(target_mask_path)) -img_nrows, img_ncols = ref_img.shape[:2] - -total_variation_weight = 50. -style_weight = 1. -content_weight = 0.1 if use_content_img else 0 - -content_feature_layers = ['block5_conv2'] -# To get better generation qualities, use more conv layers for style features -style_feature_layers = ['block1_conv1', 'block2_conv1', 'block3_conv1', - 'block4_conv1', 'block5_conv1'] - - -# helper functions for reading/processing images -def preprocess_image(image_path): - img = load_img(image_path, target_size=(img_nrows, img_ncols)) - img = img_to_array(img) - img = np.expand_dims(img, axis=0) - img = vgg19.preprocess_input(img) - return img - - -def deprocess_image(x): - if K.image_data_format() == 'channels_first': - x = x.reshape((3, img_nrows, img_ncols)) - x = x.transpose((1, 2, 0)) - else: - x = x.reshape((img_nrows, img_ncols, 3)) - # Remove zero-center by mean pixel - x[:, :, 0] += 103.939 - x[:, :, 1] += 116.779 - x[:, :, 2] += 123.68 - # 'BGR'->'RGB' - x = x[:, :, ::-1] - x = np.clip(x, 0, 255).astype('uint8') - return x - - -def kmeans(xs, k): - assert xs.ndim == 2 - try: - from sklearn.cluster import k_means - _, labels, _ = k_means(xs.astype('float64'), k) - except ImportError: - from scipy.cluster.vq import kmeans2 - _, labels = kmeans2(xs, k, missing='raise') - return labels - - -def load_mask_labels(): - '''Load both target and style masks. - A mask image (nr x nc) with m labels/colors will be loaded - as a 4D boolean tensor: - (1, m, nr, nc) for 'channels_first' or (1, nr, nc, m) for 'channels_last' - ''' - target_mask_img = load_img(target_mask_path, - target_size=(img_nrows, img_ncols)) - target_mask_img = img_to_array(target_mask_img) - style_mask_img = load_img(style_mask_path, - target_size=(img_nrows, img_ncols)) - style_mask_img = img_to_array(style_mask_img) - if K.image_data_format() == 'channels_first': - mask_vecs = np.vstack([style_mask_img.reshape((3, -1)).T, - target_mask_img.reshape((3, -1)).T]) - else: - mask_vecs = np.vstack([style_mask_img.reshape((-1, 3)), - target_mask_img.reshape((-1, 3))]) - - labels = kmeans(mask_vecs, num_labels) - style_mask_label = labels[:img_nrows * - img_ncols].reshape((img_nrows, img_ncols)) - target_mask_label = labels[img_nrows * - img_ncols:].reshape((img_nrows, img_ncols)) - - stack_axis = 0 if K.image_data_format() == 'channels_first' else -1 - style_mask = np.stack([style_mask_label == r for r in range(num_labels)], - axis=stack_axis) - target_mask = np.stack([target_mask_label == r for r in range(num_labels)], - axis=stack_axis) - - return (np.expand_dims(style_mask, axis=0), - np.expand_dims(target_mask, axis=0)) - - -# Create tensor variables for images -if K.image_data_format() == 'channels_first': - shape = (1, num_colors, img_nrows, img_ncols) -else: - shape = (1, img_nrows, img_ncols, num_colors) - -style_image = K.variable(preprocess_image(style_img_path)) -target_image = K.placeholder(shape=shape) -if use_content_img: - content_image = K.variable(preprocess_image(content_img_path)) -else: - content_image = K.zeros(shape=shape) - -images = K.concatenate([style_image, target_image, content_image], axis=0) - -# Create tensor variables for masks -raw_style_mask, raw_target_mask = load_mask_labels() -style_mask = K.variable(raw_style_mask.astype('float32')) -target_mask = K.variable(raw_target_mask.astype('float32')) -masks = K.concatenate([style_mask, target_mask], axis=0) - -# index constants for images and tasks variables -STYLE, TARGET, CONTENT = 0, 1, 2 - -# Build image model, mask model and use layer outputs as features -# image model as VGG19 -image_model = vgg19.VGG19(include_top=False, input_tensor=images) - -# mask model as a series of pooling -mask_input = Input(tensor=masks, shape=(None, None, None), name='mask_input') -x = mask_input -for layer in image_model.layers[1:]: - name = 'mask_%s' % layer.name - if 'conv' in layer.name: - x = AveragePooling2D((3, 3), padding='same', strides=( - 1, 1), name=name)(x) - elif 'pool' in layer.name: - x = AveragePooling2D((2, 2), name=name)(x) -mask_model = Model(mask_input, x) - -# Collect features from image_model and task_model -image_features = {} -mask_features = {} -for img_layer, mask_layer in zip(image_model.layers, mask_model.layers): - if 'conv' in img_layer.name: - assert 'mask_' + img_layer.name == mask_layer.name - layer_name = img_layer.name - img_feat, mask_feat = img_layer.output, mask_layer.output - image_features[layer_name] = img_feat - mask_features[layer_name] = mask_feat - - -# Define loss functions -def gram_matrix(x): - assert K.ndim(x) == 3 - features = K.batch_flatten(x) - gram = K.dot(features, K.transpose(features)) - return gram - - -def region_style_loss(style_image, target_image, style_mask, target_mask): - '''Calculate style loss between style_image and target_image, - for one common region specified by their (boolean) masks - ''' - assert 3 == K.ndim(style_image) == K.ndim(target_image) - assert 2 == K.ndim(style_mask) == K.ndim(target_mask) - if K.image_data_format() == 'channels_first': - masked_style = style_image * style_mask - masked_target = target_image * target_mask - num_channels = K.shape(style_image)[0] - else: - masked_style = K.permute_dimensions( - style_image, (2, 0, 1)) * style_mask - masked_target = K.permute_dimensions( - target_image, (2, 0, 1)) * target_mask - num_channels = K.shape(style_image)[-1] - num_channels = K.cast(num_channels, dtype='float32') - s = gram_matrix(masked_style) / K.mean(style_mask) / num_channels - c = gram_matrix(masked_target) / K.mean(target_mask) / num_channels - return K.mean(K.square(s - c)) - - -def style_loss(style_image, target_image, style_masks, target_masks): - '''Calculate style loss between style_image and target_image, - in all regions. - ''' - assert 3 == K.ndim(style_image) == K.ndim(target_image) - assert 3 == K.ndim(style_masks) == K.ndim(target_masks) - loss = K.variable(0) - for i in range(num_labels): - if K.image_data_format() == 'channels_first': - style_mask = style_masks[i, :, :] - target_mask = target_masks[i, :, :] - else: - style_mask = style_masks[:, :, i] - target_mask = target_masks[:, :, i] - loss += region_style_loss(style_image, - target_image, style_mask, target_mask) - return loss - - -def content_loss(content_image, target_image): - return K.sum(K.square(target_image - content_image)) - - -def total_variation_loss(x): - assert 4 == K.ndim(x) - if K.image_data_format() == 'channels_first': - a = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - - x[:, :, 1:, :img_ncols - 1]) - b = K.square(x[:, :, :img_nrows - 1, :img_ncols - 1] - - x[:, :, :img_nrows - 1, 1:]) - else: - a = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - - x[:, 1:, :img_ncols - 1, :]) - b = K.square(x[:, :img_nrows - 1, :img_ncols - 1, :] - - x[:, :img_nrows - 1, 1:, :]) - return K.sum(K.pow(a + b, 1.25)) - - -# Overall loss is the weighted sum of content_loss, style_loss and tv_loss -# Each individual loss uses features from image/mask models. -loss = K.variable(0) -for layer in content_feature_layers: - content_feat = image_features[layer][CONTENT, :, :, :] - target_feat = image_features[layer][TARGET, :, :, :] - loss += content_weight * content_loss(content_feat, target_feat) - -for layer in style_feature_layers: - style_feat = image_features[layer][STYLE, :, :, :] - target_feat = image_features[layer][TARGET, :, :, :] - style_masks = mask_features[layer][STYLE, :, :, :] - target_masks = mask_features[layer][TARGET, :, :, :] - sl = style_loss(style_feat, target_feat, style_masks, target_masks) - loss += (style_weight / len(style_feature_layers)) * sl - -loss += total_variation_weight * total_variation_loss(target_image) -loss_grads = K.gradients(loss, target_image) - -# Evaluator class for computing efficiency -outputs = [loss] -if isinstance(loss_grads, (list, tuple)): - outputs += loss_grads -else: - outputs.append(loss_grads) - -f_outputs = K.function([target_image], outputs) - - -def eval_loss_and_grads(x): - if K.image_data_format() == 'channels_first': - x = x.reshape((1, 3, img_nrows, img_ncols)) - else: - x = x.reshape((1, img_nrows, img_ncols, 3)) - outs = f_outputs([x]) - loss_value = outs[0] - if len(outs[1:]) == 1: - grad_values = outs[1].flatten().astype('float64') - else: - grad_values = np.array(outs[1:]).flatten().astype('float64') - return loss_value, grad_values - - -class Evaluator(object): - - def __init__(self): - self.loss_value = None - self.grads_values = None - - def loss(self, x): - assert self.loss_value is None - loss_value, grad_values = eval_loss_and_grads(x) - self.loss_value = loss_value - self.grad_values = grad_values - return self.loss_value - - def grads(self, x): - assert self.loss_value is not None - grad_values = np.copy(self.grad_values) - self.loss_value = None - self.grad_values = None - return grad_values - - -evaluator = Evaluator() - -# Generate images by iterative optimization -if K.image_data_format() == 'channels_first': - x = np.random.uniform(0, 255, (1, 3, img_nrows, img_ncols)) - 128. -else: - x = np.random.uniform(0, 255, (1, img_nrows, img_ncols, 3)) - 128. - -for i in range(50): - print('Start of iteration', i) - start_time = time.time() - x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), - fprime=evaluator.grads, maxfun=20) - print('Current loss value:', min_val) - # save current generated image - img = deprocess_image(x.copy()) - fname = target_img_prefix + '_at_iteration_%d.png' % i - save_img(fname, img) - end_time = time.time() - print('Image saved as', fname) - print('Iteration %d completed in %ds' % (i, end_time - start_time)) -'''Neural style transfer with Keras. - -Run the script with: -``` -python neural_style_transfer.py path_to_your_base_image.jpg \ - path_to_your_reference.jpg prefix_for_results -``` -e.g.: -``` -python neural_style_transfer.py img/tuebingen.jpg \ - img/starry_night.jpg results/my_result -``` -Optional parameters: -``` ---iter, To specify the number of iterations \ - the style transfer takes place (Default is 10) ---content_weight, The weight given to the content loss (Default is 0.025) ---style_weight, The weight given to the style loss (Default is 1.0) ---tv_weight, The weight given to the total variation loss (Default is 1.0) -``` - -It is preferable to run this script on GPU, for speed. - -Example result: https://twitter.com/fchollet/status/686631033085677568 - -# Details - -Style transfer consists in generating an image -with the same "content" as a base image, but with the -"style" of a different picture (typically artistic). - -This is achieved through the optimization of a loss function -that has 3 components: "style loss", "content loss", -and "total variation loss": - -- The total variation loss imposes local spatial continuity between -the pixels of the combination image, giving it visual coherence. - -- The style loss is where the deep learning keeps in --that one is defined -using a deep convolutional neural network. Precisely, it consists in a sum of -L2 distances between the Gram matrices of the representations of -the base image and the style reference image, extracted from -different layers of a convnet (trained on ImageNet). The general idea -is to capture color/texture information at different spatial -scales (fairly large scales --defined by the depth of the layer considered). - - - The content loss is a L2 distance between the features of the base -image (extracted from a deep layer) and the features of the combination image, -keeping the generated image close enough to the original one. - -# References - - [A Neural Algorithm of Artistic Style](http://arxiv.org/abs/1508.06576) -''' - -from __future__ import print_function -from keras.preprocessing.image import load_img, save_img, img_to_array -import numpy as np -from scipy.optimize import fmin_l_bfgs_b -import time -import argparse - -from keras.applications import vgg19 -from keras import backend as K - -parser = argparse.ArgumentParser( - description='Neural style transfer with Keras.') -parser.add_argument('base_image_path', metavar='base', type=str, - help='Path to the image to transform.') -parser.add_argument('style_reference_image_path', metavar='ref', type=str, - help='Path to the style reference image.') -parser.add_argument('result_prefix', metavar='res_prefix', type=str, - help='Prefix for the saved results.') -parser.add_argument('--iter', type=int, default=10, required=False, - help='Number of iterations to run.') -parser.add_argument('--content_weight', type=float, default=0.025, required=False, - help='Content weight.') -parser.add_argument('--style_weight', type=float, default=1.0, required=False, - help='Style weight.') -parser.add_argument('--tv_weight', type=float, default=1.0, required=False, - help='Total Variation weight.') - -args = parser.parse_args() -base_image_path = args.base_image_path -style_reference_image_path = args.style_reference_image_path -result_prefix = args.result_prefix -iterations = args.iter - -# these are the weights of the different loss components -total_variation_weight = args.tv_weight -style_weight = args.style_weight -content_weight = args.content_weight - -# dimensions of the generated picture. -width, height = load_img(base_image_path).size -img_nrows = 400 -img_ncols = int(width * img_nrows / height) - -# util function to open, resize and format pictures into appropriate tensors - - -def preprocess_image(image_path): - img = load_img(image_path, target_size=(img_nrows, img_ncols)) - img = img_to_array(img) - img = np.expand_dims(img, axis=0) - img = vgg19.preprocess_input(img) - return img - -# util function to convert a tensor into a valid image - - -def deprocess_image(x): - if K.image_data_format() == 'channels_first': - x = x.reshape((3, img_nrows, img_ncols)) - x = x.transpose((1, 2, 0)) - else: - x = x.reshape((img_nrows, img_ncols, 3)) - # Remove zero-center by mean pixel - x[:, :, 0] += 103.939 - x[:, :, 1] += 116.779 - x[:, :, 2] += 123.68 - # 'BGR'->'RGB' - x = x[:, :, ::-1] - x = np.clip(x, 0, 255).astype('uint8') - return x - - -# get tensor representations of our images -base_image = K.variable(preprocess_image(base_image_path)) -style_reference_image = K.variable( - preprocess_image(style_reference_image_path)) - -# this will contain our generated image -if K.image_data_format() == 'channels_first': - combination_image = K.placeholder((1, 3, img_nrows, img_ncols)) -else: - combination_image = K.placeholder((1, img_nrows, img_ncols, 3)) - -# combine the 3 images into a single Keras tensor -input_tensor = K.concatenate([base_image, - style_reference_image, - combination_image], axis=0) - -# build the VGG19 network with our 3 images as input -# the model will be loaded with pre-trained ImageNet weights -model = vgg19.VGG19(input_tensor=input_tensor, - weights='imagenet', include_top=False) -print('Model loaded.') - -# get the symbolic outputs of each "key" layer (we gave them unique names). -outputs_dict = dict([(layer.name, layer.output) for layer in model.layers]) - -# compute the neural style loss -# first we need to define 4 util functions - -# the gram matrix of an image tensor (feature-wise outer product) - - -def gram_matrix(x): - assert K.ndim(x) == 3 - if K.image_data_format() == 'channels_first': - features = K.batch_flatten(x) - else: - features = K.batch_flatten(K.permute_dimensions(x, (2, 0, 1))) - gram = K.dot(features, K.transpose(features)) - return gram - -# the "style loss" is designed to maintain -# the style of the reference image in the generated image. -# It is based on the gram matrices (which capture style) of -# feature maps from the style reference image -# and from the generated image - - -def style_loss(style, combination): - assert K.ndim(style) == 3 - assert K.ndim(combination) == 3 - S = gram_matrix(style) - C = gram_matrix(combination) - channels = 3 - size = img_nrows * img_ncols - return K.sum(K.square(S - C)) / (4.0 * (channels ** 2) * (size ** 2)) - -# an auxiliary loss function -# designed to maintain the "content" of the -# base image in the generated image - - -def content_loss(base, combination): - return K.sum(K.square(combination - base)) - -# the 3rd loss function, total variation loss, -# designed to keep the generated image locally coherent - - -def total_variation_loss(x): - assert K.ndim(x) == 4 - if K.image_data_format() == 'channels_first': - a = K.square( - x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, 1:, :img_ncols - 1]) - b = K.square( - x[:, :, :img_nrows - 1, :img_ncols - 1] - x[:, :, :img_nrows - 1, 1:]) - else: - a = K.square( - x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, 1:, :img_ncols - 1, :]) - b = K.square( - x[:, :img_nrows - 1, :img_ncols - 1, :] - x[:, :img_nrows - 1, 1:, :]) - return K.sum(K.pow(a + b, 1.25)) - - -# combine these loss functions into a single scalar -loss = K.variable(0.0) -layer_features = outputs_dict['block5_conv2'] -base_image_features = layer_features[0, :, :, :] -combination_features = layer_features[2, :, :, :] -loss += content_weight * content_loss(base_image_features, - combination_features) - -feature_layers = ['block1_conv1', 'block2_conv1', - 'block3_conv1', 'block4_conv1', - 'block5_conv1'] -for layer_name in feature_layers: - layer_features = outputs_dict[layer_name] - style_reference_features = layer_features[1, :, :, :] - combination_features = layer_features[2, :, :, :] - sl = style_loss(style_reference_features, combination_features) - loss += (style_weight / len(feature_layers)) * sl -loss += total_variation_weight * total_variation_loss(combination_image) - -# get the gradients of the generated image wrt the loss -grads = K.gradients(loss, combination_image) - -outputs = [loss] -if isinstance(grads, (list, tuple)): - outputs += grads -else: - outputs.append(grads) - -f_outputs = K.function([combination_image], outputs) - - -def eval_loss_and_grads(x): - if K.image_data_format() == 'channels_first': - x = x.reshape((1, 3, img_nrows, img_ncols)) - else: - x = x.reshape((1, img_nrows, img_ncols, 3)) - outs = f_outputs([x]) - loss_value = outs[0] - if len(outs[1:]) == 1: - grad_values = outs[1].flatten().astype('float64') - else: - grad_values = np.array(outs[1:]).flatten().astype('float64') - return loss_value, grad_values - -# this Evaluator class makes it possible -# to compute loss and gradients in one pass -# while retrieving them via two separate functions, -# "loss" and "grads". This is done because scipy.optimize -# requires separate functions for loss and gradients, -# but computing them separately would be inefficient. - - -class Evaluator(object): - - def __init__(self): - self.loss_value = None - self.grads_values = None - - def loss(self, x): - assert self.loss_value is None - loss_value, grad_values = eval_loss_and_grads(x) - self.loss_value = loss_value - self.grad_values = grad_values - return self.loss_value - - def grads(self, x): - assert self.loss_value is not None - grad_values = np.copy(self.grad_values) - self.loss_value = None - self.grad_values = None - return grad_values - - -evaluator = Evaluator() - -# run scipy-based optimization (L-BFGS) over the pixels of the generated image -# so as to minimize the neural style loss -x = preprocess_image(base_image_path) - -for i in range(iterations): - print('Start of iteration', i) - start_time = time.time() - x, min_val, info = fmin_l_bfgs_b(evaluator.loss, x.flatten(), - fprime=evaluator.grads, maxfun=20) - print('Current loss value:', min_val) - # save current generated image - img = deprocess_image(x.copy()) - fname = result_prefix + '_at_iteration_%d.png' % i - save_img(fname, img) - end_time = time.time() - print('Image saved as', fname) - print('Iteration %d completed in %ds' % (i, end_time - start_time)) -'''This script loads pre-trained word embeddings (GloVe embeddings) -into a frozen Keras Embedding layer, and uses it to -train a text classification model on the 20 Newsgroup dataset -(classification of newsgroup messages into 20 different categories). - -GloVe embedding data can be found at: -http://nlp.stanford.edu/data/glove.6B.zip -(source page: http://nlp.stanford.edu/projects/glove/) - -20 Newsgroup data can be found at: -http://www.cs.cmu.edu/afs/cs.cmu.edu/project/theo-20/www/data/news20.html -''' - -from __future__ import print_function - -import os -import sys -import numpy as np -from keras.preprocessing.text import Tokenizer -from keras.preprocessing.sequence import pad_sequences -from keras.utils import to_categorical -from keras.layers import Dense, Input, GlobalMaxPooling1D -from keras.layers import Conv1D, MaxPooling1D, Embedding -from keras.models import Model -from keras.initializers import Constant - - -BASE_DIR = '' -GLOVE_DIR = os.path.join(BASE_DIR, 'glove.6B') -TEXT_DATA_DIR = os.path.join(BASE_DIR, '20_newsgroup') -MAX_SEQUENCE_LENGTH = 1000 -MAX_NUM_WORDS = 20000 -EMBEDDING_DIM = 100 -VALIDATION_SPLIT = 0.2 - -# first, build index mapping words in the embeddings set -# to their embedding vector - -print('Indexing word vectors.') - -embeddings_index = {} -with open(os.path.join(GLOVE_DIR, 'glove.6B.100d.txt')) as f: - for line in f: - word, coefs = line.split(maxsplit=1) - coefs = np.fromstring(coefs, 'f', sep=' ') - embeddings_index[word] = coefs - -print('Found %s word vectors.' % len(embeddings_index)) - -# second, prepare text samples and their labels -print('Processing text dataset') - -texts = [] # list of text samples -labels_index = {} # dictionary mapping label name to numeric id -labels = [] # list of label ids -for name in sorted(os.listdir(TEXT_DATA_DIR)): - path = os.path.join(TEXT_DATA_DIR, name) - if os.path.isdir(path): - label_id = len(labels_index) - labels_index[name] = label_id - for fname in sorted(os.listdir(path)): - if fname.isdigit(): - fpath = os.path.join(path, fname) - args = {} if sys.version_info < ( - 3,) else {'encoding': 'latin-1'} - with open(fpath, **args) as f: - t = f.read() - i = t.find('\n\n') # skip header - if 0 < i: - t = t[i:] - texts.append(t) - labels.append(label_id) - -print('Found %s texts.' % len(texts)) - -# finally, vectorize the text samples into a 2D integer tensor -tokenizer = Tokenizer(num_words=MAX_NUM_WORDS) -tokenizer.fit_on_texts(texts) -sequences = tokenizer.texts_to_sequences(texts) - -word_index = tokenizer.word_index -print('Found %s unique tokens.' % len(word_index)) - -data = pad_sequences(sequences, maxlen=MAX_SEQUENCE_LENGTH) - -labels = to_categorical(np.asarray(labels)) -print('Shape of data tensor:', data.shape) -print('Shape of label tensor:', labels.shape) - -# split the data into a training set and a validation set -indices = np.arange(data.shape[0]) -np.random.shuffle(indices) -data = data[indices] -labels = labels[indices] -num_validation_samples = int(VALIDATION_SPLIT * data.shape[0]) - -x_train = data[:-num_validation_samples] -y_train = labels[:-num_validation_samples] -x_val = data[-num_validation_samples:] -y_val = labels[-num_validation_samples:] - -print('Preparing embedding matrix.') - -# prepare embedding matrix -num_words = min(MAX_NUM_WORDS, len(word_index)) + 1 -embedding_matrix = np.zeros((num_words, EMBEDDING_DIM)) -for word, i in word_index.items(): - if i > MAX_NUM_WORDS: - continue - embedding_vector = embeddings_index.get(word) - if embedding_vector is not None: - # words not found in embedding index will be all-zeros. - embedding_matrix[i] = embedding_vector - -# load pre-trained word embeddings into an Embedding layer -# note that we set trainable = False so as to keep the embeddings fixed -embedding_layer = Embedding(num_words, - EMBEDDING_DIM, - embeddings_initializer=Constant(embedding_matrix), - input_length=MAX_SEQUENCE_LENGTH, - trainable=False) - -print('Training model.') - -# train a 1D convnet with global maxpooling -sequence_input = Input(shape=(MAX_SEQUENCE_LENGTH,), dtype='int32') -embedded_sequences = embedding_layer(sequence_input) -x = Conv1D(128, 5, activation='relu')(embedded_sequences) -x = MaxPooling1D(5)(x) -x = Conv1D(128, 5, activation='relu')(x) -x = MaxPooling1D(5)(x) -x = Conv1D(128, 5, activation='relu')(x) -x = GlobalMaxPooling1D()(x) -x = Dense(128, activation='relu')(x) -preds = Dense(len(labels_index), activation='softmax')(x) - -model = Model(sequence_input, preds) -model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['acc']) - -model.fit(x_train, y_train, - batch_size=128, - epochs=10, - validation_data=(x_val, y_val)) -'''Trains and evaluate a simple MLP -on the Reuters newswire topic classification task. -''' -from __future__ import print_function - -import numpy as np -import keras -from keras.datasets import reuters -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation -from keras.preprocessing.text import Tokenizer - -max_words = 1000 -batch_size = 32 -epochs = 5 - -print('Loading data...') -(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, - test_split=0.2) -print(len(x_train), 'train sequences') -print(len(x_test), 'test sequences') - -num_classes = np.max(y_train) + 1 -print(num_classes, 'classes') - -print('Vectorizing sequence data...') -tokenizer = Tokenizer(num_words=max_words) -x_train = tokenizer.sequences_to_matrix(x_train, mode='binary') -x_test = tokenizer.sequences_to_matrix(x_test, mode='binary') -print('x_train shape:', x_train.shape) -print('x_test shape:', x_test.shape) - -print('Convert class vector to binary class matrix ' - '(for use with categorical_crossentropy)') -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) -print('y_train shape:', y_train.shape) -print('y_test shape:', y_test.shape) - -print('Building model...') -model = Sequential() -model.add(Dense(512, input_shape=(max_words,))) -model.add(Activation('relu')) -model.add(Dropout(0.5)) -model.add(Dense(num_classes)) -model.add(Activation('softmax')) - -model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - -history = model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - verbose=1, - validation_split=0.1) -score = model.evaluate(x_test, y_test, - batch_size=batch_size, verbose=1) -print('Test score:', score[0]) -print('Test accuracy:', score[1]) -'''Compares self-normalizing MLPs with regular MLPs. - -Compares the performance of a simple MLP using two -different activation functions: RELU and SELU -on the Reuters newswire topic classification task. - -# Reference - -- Klambauer, G., Unterthiner, T., Mayr, A., & Hochreiter, S. (2017). - Self-Normalizing Neural Networks. arXiv preprint arXiv:1706.02515. - https://arxiv.org/abs/1706.02515 -''' -from __future__ import print_function - -import numpy as np -import matplotlib.pyplot as plt -import keras -from keras.datasets import reuters -from keras.models import Sequential -from keras.layers import Dense, Activation, Dropout -from keras.layers.noise import AlphaDropout -from keras.preprocessing.text import Tokenizer - -max_words = 1000 -batch_size = 16 -epochs = 40 -plot = True - - -def create_network(n_dense=6, - dense_units=16, - activation='selu', - dropout=AlphaDropout, - dropout_rate=0.1, - kernel_initializer='lecun_normal', - optimizer='adam', - num_classes=1, - max_words=max_words): - """Generic function to create a fully-connected neural network. - - # Arguments - n_dense: int > 0. Number of dense layers. - dense_units: int > 0. Number of dense units per layer. - dropout: keras.layers.Layer. A dropout layer to apply. - dropout_rate: 0 <= float <= 1. The rate of dropout. - kernel_initializer: str. The initializer for the weights. - optimizer: str/keras.optimizers.Optimizer. The optimizer to use. - num_classes: int > 0. The number of classes to predict. - max_words: int > 0. The maximum number of words per data point. - - # Returns - A Keras model instance (compiled). - """ - model = Sequential() - model.add(Dense(dense_units, input_shape=(max_words,), - kernel_initializer=kernel_initializer)) - model.add(Activation(activation)) - model.add(dropout(dropout_rate)) - - for i in range(n_dense - 1): - model.add(Dense(dense_units, kernel_initializer=kernel_initializer)) - model.add(Activation(activation)) - model.add(dropout(dropout_rate)) - - model.add(Dense(num_classes)) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - return model - - -network1 = { - 'n_dense': 6, - 'dense_units': 16, - 'activation': 'relu', - 'dropout': Dropout, - 'dropout_rate': 0.5, - 'kernel_initializer': 'glorot_uniform', - 'optimizer': 'sgd' -} - -network2 = { - 'n_dense': 6, - 'dense_units': 16, - 'activation': 'selu', - 'dropout': AlphaDropout, - 'dropout_rate': 0.1, - 'kernel_initializer': 'lecun_normal', - 'optimizer': 'sgd' -} - -print('Loading data...') -(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=max_words, - test_split=0.2) -print(len(x_train), 'train sequences') -print(len(x_test), 'test sequences') - -num_classes = np.max(y_train) + 1 -print(num_classes, 'classes') - -print('Vectorizing sequence data...') -tokenizer = Tokenizer(num_words=max_words) -x_train = tokenizer.sequences_to_matrix(x_train, mode='binary') -x_test = tokenizer.sequences_to_matrix(x_test, mode='binary') -print('x_train shape:', x_train.shape) -print('x_test shape:', x_test.shape) - -print('Convert class vector to binary class matrix ' - '(for use with categorical_crossentropy)') -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) -print('y_train shape:', y_train.shape) -print('y_test shape:', y_test.shape) - -print('\nBuilding network 1...') - -model1 = create_network(num_classes=num_classes, **network1) -history_model1 = model1.fit(x_train, - y_train, - batch_size=batch_size, - epochs=epochs, - verbose=1, - validation_split=0.1) - -score_model1 = model1.evaluate(x_test, - y_test, - batch_size=batch_size, - verbose=1) - - -print('\nBuilding network 2...') -model2 = create_network(num_classes=num_classes, **network2) - -history_model2 = model2.fit(x_train, - y_train, - batch_size=batch_size, - epochs=epochs, - verbose=1, - validation_split=0.1) - -score_model2 = model2.evaluate(x_test, - y_test, - batch_size=batch_size, - verbose=1) - -print('\nNetwork 1 results') -print('Hyperparameters:', network1) -print('Test score:', score_model1[0]) -print('Test accuracy:', score_model1[1]) -print('Network 2 results') -print('Hyperparameters:', network2) -print('Test score:', score_model2[0]) -print('Test accuracy:', score_model2[1]) - -plt.plot(range(epochs), - history_model1.history['val_loss'], - 'g-', - label='Network 1 Val Loss') -plt.plot(range(epochs), - history_model2.history['val_loss'], - 'r-', - label='Network 2 Val Loss') -plt.plot(range(epochs), - history_model1.history['loss'], - 'g--', - label='Network 1 Loss') -plt.plot(range(epochs), - history_model2.history['loss'], - 'r--', - label='Network 2 Loss') -plt.xlabel('Epochs') -plt.ylabel('Loss') -plt.legend() -plt.savefig('comparison_of_networks.png') -'''Trains a simple convnet on the MNIST dataset and embeds test data. - -The test data is embedded using the weights of the final dense layer, just -before the classification head. This embedding can then be visualized using -TensorBoard's Embedding Projector. -''' - -from __future__ import print_function - -from os import makedirs -from os.path import exists, join - -import keras -from keras.callbacks import TensorBoard -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers import Dense, Dropout, Flatten -from keras.layers import Conv2D, MaxPooling2D -from keras import backend as K - -import numpy as np - -batch_size = 128 -num_classes = 10 -epochs = 12 -log_dir = './logs' - -if not exists(log_dir): - makedirs(log_dir) - -# input image dimensions -img_rows, img_cols = 28, 28 - -# the data, split between train and test sets -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -if K.image_data_format() == 'channels_first': - x_train = x_train.reshape(x_train.shape[0], 1, img_rows, img_cols) - x_test = x_test.reshape(x_test.shape[0], 1, img_rows, img_cols) - input_shape = (1, img_rows, img_cols) -else: - x_train = x_train.reshape(x_train.shape[0], img_rows, img_cols, 1) - x_test = x_test.reshape(x_test.shape[0], img_rows, img_cols, 1) - input_shape = (img_rows, img_cols, 1) - -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# save class labels to disk to color data points in TensorBoard accordingly -with open(join(log_dir, 'metadata.tsv'), 'w') as f: - np.savetxt(f, y_test) - -# convert class vectors to binary class matrices -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - -tensorboard = TensorBoard(batch_size=batch_size, - embeddings_freq=1, - embeddings_layer_names=['features'], - embeddings_metadata='metadata.tsv', - embeddings_data=x_test) - -model = Sequential() -model.add(Conv2D(32, kernel_size=(3, 3), - activation='relu', - input_shape=input_shape)) -model.add(Conv2D(64, (3, 3), activation='relu')) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) -model.add(Flatten()) -model.add(Dense(128, activation='relu', name='features')) -model.add(Dropout(0.5)) -model.add(Dense(num_classes, activation='softmax')) - -model.compile(loss=keras.losses.categorical_crossentropy, - optimizer=keras.optimizers.Adadelta(), - metrics=['accuracy']) - -model.fit(x_train, y_train, - batch_size=batch_size, - callbacks=[tensorboard], - epochs=epochs, - verbose=1, - validation_data=(x_test, y_test)) -score = model.evaluate(x_test, y_test, verbose=0) -print('Test loss:', score[0]) -print('Test accuracy:', score[1]) - -# You can now launch tensorboard with `tensorboard --logdir=./logs` on your -# command line and then go to http://localhost:6006/#projector to view the -# embeddings -'''Example of VAE on MNIST dataset using MLP - -The VAE has a modular design. The encoder, decoder and VAE -are 3 models that share weights. After training the VAE model, -the encoder can be used to generate latent vectors. -The decoder can be used to generate MNIST digits by sampling the -latent vector from a Gaussian distribution with mean = 0 and std = 1. - -# Reference - -[1] Kingma, Diederik P., and Max Welling. -"Auto-Encoding Variational Bayes." -https://arxiv.org/abs/1312.6114 -''' - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras.layers import Lambda, Input, Dense -from keras.models import Model -from keras.datasets import mnist -from keras.losses import mse, binary_crossentropy -from keras.utils import plot_model -from keras import backend as K - -import numpy as np -import matplotlib.pyplot as plt -import argparse -import os - - -# reparameterization trick -# instead of sampling from Q(z|X), sample epsilon = N(0,I) -# z = z_mean + sqrt(var) * epsilon -def sampling(args): - """Reparameterization trick by sampling from an isotropic unit Gaussian. - - # Arguments - args (tensor): mean and log of variance of Q(z|X) - - # Returns - z (tensor): sampled latent vector - """ - - z_mean, z_log_var = args - batch = K.shape(z_mean)[0] - dim = K.int_shape(z_mean)[1] - # by default, random_normal has mean = 0 and std = 1.0 - epsilon = K.random_normal(shape=(batch, dim)) - return z_mean + K.exp(0.5 * z_log_var) * epsilon - - -def plot_results(models, - data, - batch_size=128, - model_name="vae_mnist"): - """Plots labels and MNIST digits as a function of the 2D latent vector - - # Arguments - models (tuple): encoder and decoder models - data (tuple): test data and label - batch_size (int): prediction batch size - model_name (string): which model is using this function - """ - - encoder, decoder = models - x_test, y_test = data - os.makedirs(model_name, exist_ok=True) - - filename = os.path.join(model_name, "vae_mean.png") - # display a 2D plot of the digit classes in the latent space - z_mean, _, _ = encoder.predict(x_test, - batch_size=batch_size) - plt.figure(figsize=(12, 10)) - plt.scatter(z_mean[:, 0], z_mean[:, 1], c=y_test) - plt.colorbar() - plt.xlabel("z[0]") - plt.ylabel("z[1]") - plt.savefig(filename) - plt.show() - - filename = os.path.join(model_name, "digits_over_latent.png") - # display a 30x30 2D manifold of digits - n = 30 - digit_size = 28 - figure = np.zeros((digit_size * n, digit_size * n)) - # linearly spaced coordinates corresponding to the 2D plot - # of digit classes in the latent space - grid_x = np.linspace(-4, 4, n) - grid_y = np.linspace(-4, 4, n)[::-1] - - for i, yi in enumerate(grid_y): - for j, xi in enumerate(grid_x): - z_sample = np.array([[xi, yi]]) - x_decoded = decoder.predict(z_sample) - digit = x_decoded[0].reshape(digit_size, digit_size) - figure[i * digit_size: (i + 1) * digit_size, - j * digit_size: (j + 1) * digit_size] = digit - - plt.figure(figsize=(10, 10)) - start_range = digit_size // 2 - end_range = (n - 1) * digit_size + start_range + 1 - pixel_range = np.arange(start_range, end_range, digit_size) - sample_range_x = np.round(grid_x, 1) - sample_range_y = np.round(grid_y, 1) - plt.xticks(pixel_range, sample_range_x) - plt.yticks(pixel_range, sample_range_y) - plt.xlabel("z[0]") - plt.ylabel("z[1]") - plt.imshow(figure, cmap='Greys_r') - plt.savefig(filename) - plt.show() - - -# MNIST dataset -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -image_size = x_train.shape[1] -original_dim = image_size * image_size -x_train = np.reshape(x_train, [-1, original_dim]) -x_test = np.reshape(x_test, [-1, original_dim]) -x_train = x_train.astype('float32') / 255 -x_test = x_test.astype('float32') / 255 - -# network parameters -input_shape = (original_dim, ) -intermediate_dim = 512 -batch_size = 128 -latent_dim = 2 -epochs = 50 - -# VAE model = encoder + decoder -# build encoder model -inputs = Input(shape=input_shape, name='encoder_input') -x = Dense(intermediate_dim, activation='relu')(inputs) -z_mean = Dense(latent_dim, name='z_mean')(x) -z_log_var = Dense(latent_dim, name='z_log_var')(x) - -# use reparameterization trick to push the sampling out as input -# note that "output_shape" isn't necessary with the TensorFlow backend -z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var]) - -# instantiate encoder model -encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') -encoder.summary() -plot_model(encoder, to_file='vae_mlp_encoder.png', show_shapes=True) - -# build decoder model -latent_inputs = Input(shape=(latent_dim,), name='z_sampling') -x = Dense(intermediate_dim, activation='relu')(latent_inputs) -outputs = Dense(original_dim, activation='sigmoid')(x) - -# instantiate decoder model -decoder = Model(latent_inputs, outputs, name='decoder') -decoder.summary() -plot_model(decoder, to_file='vae_mlp_decoder.png', show_shapes=True) - -# instantiate VAE model -outputs = decoder(encoder(inputs)[2]) -vae = Model(inputs, outputs, name='vae_mlp') - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - help_ = "Load h5 model trained weights" - parser.add_argument("-w", "--weights", help=help_) - help_ = "Use mse loss instead of binary cross entropy (default)" - parser.add_argument("-m", - "--mse", - help=help_, action='store_true') - args = parser.parse_args() - models = (encoder, decoder) - data = (x_test, y_test) - - # VAE loss = mse_loss or xent_loss + kl_loss - if args.mse: - reconstruction_loss = mse(inputs, outputs) - else: - reconstruction_loss = binary_crossentropy(inputs, - outputs) - - reconstruction_loss *= original_dim - kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) - kl_loss = K.sum(kl_loss, axis=-1) - kl_loss *= -0.5 - vae_loss = K.mean(reconstruction_loss + kl_loss) - vae.add_loss(vae_loss) - vae.compile(optimizer='adam') - vae.summary() - plot_model(vae, - to_file='vae_mlp.png', - show_shapes=True) - - if args.weights: - vae.load_weights(args.weights) - else: - # train the autoencoder - vae.fit(x_train, - epochs=epochs, - batch_size=batch_size, - validation_data=(x_test, None)) - vae.save_weights('vae_mlp_mnist.h5') - - plot_results(models, - data, - batch_size=batch_size, - model_name="vae_mlp") -'''Example of VAE on MNIST dataset using CNN - -The VAE has a modular design. The encoder, decoder and VAE -are 3 models that share weights. After training the VAE model, -the encoder can be used to generate latent vectors. -The decoder can be used to generate MNIST digits by sampling the -latent vector from a Gaussian distribution with mean=0 and std=1. - -# Reference - -[1] Kingma, Diederik P., and Max Welling. -"Auto-encoding variational bayes." -https://arxiv.org/abs/1312.6114 -''' - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras.layers import Dense, Input -from keras.layers import Conv2D, Flatten, Lambda -from keras.layers import Reshape, Conv2DTranspose -from keras.models import Model -from keras.datasets import mnist -from keras.losses import mse, binary_crossentropy -from keras.utils import plot_model -from keras import backend as K - -import numpy as np -import matplotlib.pyplot as plt -import argparse -import os - - -# reparameterization trick -# instead of sampling from Q(z|X), sample eps = N(0,I) -# then z = z_mean + sqrt(var)*eps -def sampling(args): - """Reparameterization trick by sampling fr an isotropic unit Gaussian. - - # Arguments - args (tensor): mean and log of variance of Q(z|X) - - # Returns - z (tensor): sampled latent vector - """ - - z_mean, z_log_var = args - batch = K.shape(z_mean)[0] - dim = K.int_shape(z_mean)[1] - # by default, random_normal has mean=0 and std=1.0 - epsilon = K.random_normal(shape=(batch, dim)) - return z_mean + K.exp(0.5 * z_log_var) * epsilon - - -def plot_results(models, - data, - batch_size=128, - model_name="vae_mnist"): - """Plots labels and MNIST digits as function of 2-dim latent vector - - # Arguments - models (tuple): encoder and decoder models - data (tuple): test data and label - batch_size (int): prediction batch size - model_name (string): which model is using this function - """ - - encoder, decoder = models - x_test, y_test = data - os.makedirs(model_name, exist_ok=True) - - filename = os.path.join(model_name, "vae_mean.png") - # display a 2D plot of the digit classes in the latent space - z_mean, _, _ = encoder.predict(x_test, - batch_size=batch_size) - plt.figure(figsize=(12, 10)) - plt.scatter(z_mean[:, 0], z_mean[:, 1], c=y_test) - plt.colorbar() - plt.xlabel("z[0]") - plt.ylabel("z[1]") - plt.savefig(filename) - plt.show() - - filename = os.path.join(model_name, "digits_over_latent.png") - # display a 30x30 2D manifold of digits - n = 30 - digit_size = 28 - figure = np.zeros((digit_size * n, digit_size * n)) - # linearly spaced coordinates corresponding to the 2D plot - # of digit classes in the latent space - grid_x = np.linspace(-4, 4, n) - grid_y = np.linspace(-4, 4, n)[::-1] - - for i, yi in enumerate(grid_y): - for j, xi in enumerate(grid_x): - z_sample = np.array([[xi, yi]]) - x_decoded = decoder.predict(z_sample) - digit = x_decoded[0].reshape(digit_size, digit_size) - figure[i * digit_size: (i + 1) * digit_size, - j * digit_size: (j + 1) * digit_size] = digit - - plt.figure(figsize=(10, 10)) - start_range = digit_size // 2 - end_range = n * digit_size + start_range + 1 - pixel_range = np.arange(start_range, end_range, digit_size) - sample_range_x = np.round(grid_x, 1) - sample_range_y = np.round(grid_y, 1) - plt.xticks(pixel_range, sample_range_x) - plt.yticks(pixel_range, sample_range_y) - plt.xlabel("z[0]") - plt.ylabel("z[1]") - plt.imshow(figure, cmap='Greys_r') - plt.savefig(filename) - plt.show() - - -# MNIST dataset -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -image_size = x_train.shape[1] -x_train = np.reshape(x_train, [-1, image_size, image_size, 1]) -x_test = np.reshape(x_test, [-1, image_size, image_size, 1]) -x_train = x_train.astype('float32') / 255 -x_test = x_test.astype('float32') / 255 - -# network parameters -input_shape = (image_size, image_size, 1) -batch_size = 128 -kernel_size = 3 -filters = 16 -latent_dim = 2 -epochs = 30 - -# VAE model = encoder + decoder -# build encoder model -inputs = Input(shape=input_shape, name='encoder_input') -x = inputs -for i in range(2): - filters *= 2 - x = Conv2D(filters=filters, - kernel_size=kernel_size, - activation='relu', - strides=2, - padding='same')(x) - -# shape info needed to build decoder model -shape = K.int_shape(x) - -# generate latent vector Q(z|X) -x = Flatten()(x) -x = Dense(16, activation='relu')(x) -z_mean = Dense(latent_dim, name='z_mean')(x) -z_log_var = Dense(latent_dim, name='z_log_var')(x) - -# use reparameterization trick to push the sampling out as input -# note that "output_shape" isn't necessary with the TensorFlow backend -z = Lambda(sampling, output_shape=(latent_dim,), name='z')([z_mean, z_log_var]) - -# instantiate encoder model -encoder = Model(inputs, [z_mean, z_log_var, z], name='encoder') -encoder.summary() -plot_model(encoder, to_file='vae_cnn_encoder.png', show_shapes=True) - -# build decoder model -latent_inputs = Input(shape=(latent_dim,), name='z_sampling') -x = Dense(shape[1] * shape[2] * shape[3], activation='relu')(latent_inputs) -x = Reshape((shape[1], shape[2], shape[3]))(x) - -for i in range(2): - x = Conv2DTranspose(filters=filters, - kernel_size=kernel_size, - activation='relu', - strides=2, - padding='same')(x) - filters //= 2 - -outputs = Conv2DTranspose(filters=1, - kernel_size=kernel_size, - activation='sigmoid', - padding='same', - name='decoder_output')(x) - -# instantiate decoder model -decoder = Model(latent_inputs, outputs, name='decoder') -decoder.summary() -plot_model(decoder, to_file='vae_cnn_decoder.png', show_shapes=True) - -# instantiate VAE model -outputs = decoder(encoder(inputs)[2]) -vae = Model(inputs, outputs, name='vae') - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - help_ = "Load h5 model trained weights" - parser.add_argument("-w", "--weights", help=help_) - help_ = "Use mse loss instead of binary cross entropy (default)" - parser.add_argument("-m", "--mse", help=help_, action='store_true') - args = parser.parse_args() - models = (encoder, decoder) - data = (x_test, y_test) - - # VAE loss = mse_loss or xent_loss + kl_loss - if args.mse: - reconstruction_loss = mse(K.flatten(inputs), K.flatten(outputs)) - else: - reconstruction_loss = binary_crossentropy(K.flatten(inputs), - K.flatten(outputs)) - - reconstruction_loss *= image_size * image_size - kl_loss = 1 + z_log_var - K.square(z_mean) - K.exp(z_log_var) - kl_loss = K.sum(kl_loss, axis=-1) - kl_loss *= -0.5 - vae_loss = K.mean(reconstruction_loss + kl_loss) - vae.add_loss(vae_loss) - vae.compile(optimizer='rmsprop') - vae.summary() - plot_model(vae, to_file='vae_cnn.png', show_shapes=True) - - if args.weights: - vae.load_weights(args.weights) - else: - # train the autoencoder - vae.fit(x_train, - epochs=epochs, - batch_size=batch_size, - validation_data=(x_test, None)) - vae.save_weights('vae_cnn_mnist.h5') - - plot_results(models, data, batch_size=batch_size, model_name="vae_cnn") -from __future__ import absolute_import - -from . import utils -from . import activations -from . import applications -from . import backend -from . import datasets -from . import engine -from . import layers -from . import preprocessing -from . import wrappers -from . import callbacks -from . import constraints -from . import initializers -from . import metrics -from . import models -from . import losses -from . import optimizers -from . import regularizers - -# Also importable from root -from .layers import Input -from .models import Model -from .models import Sequential - -__version__ = '2.2.4' -"""Built-in activation functions. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import warnings -from . import backend as K -from .utils.generic_utils import deserialize_keras_object -from .engine import Layer - - -def softmax(x, axis=-1): - """Softmax activation function. - - # Arguments - x: Input tensor. - axis: Integer, axis along which the softmax normalization is applied. - - # Returns - Tensor, output of softmax transformation. - - # Raises - ValueError: In case `dim(x) == 1`. - """ - ndim = K.ndim(x) - if ndim == 2: - return K.softmax(x) - elif ndim > 2: - e = K.exp(x - K.max(x, axis=axis, keepdims=True)) - s = K.sum(e, axis=axis, keepdims=True) - return e / s - else: - raise ValueError('Cannot apply softmax to a tensor that is 1D. ' - 'Received input: %s' % x) - - -def elu(x, alpha=1.0): - """Exponential linear unit. - - # Arguments - x: Input tensor. - alpha: A scalar, slope of negative section. - - # Returns - The exponential linear activation: `x` if `x > 0` and - `alpha * (exp(x)-1)` if `x < 0`. - - # References - - [Fast and Accurate Deep Network Learning by Exponential - Linear Units (ELUs)](https://arxiv.org/abs/1511.07289) - """ - return K.elu(x, alpha) - - -def selu(x): - """Scaled Exponential Linear Unit (SELU). - - SELU is equal to: `scale * elu(x, alpha)`, where alpha and scale - are predefined constants. The values of `alpha` and `scale` are - chosen so that the mean and variance of the inputs are preserved - between two consecutive layers as long as the weights are initialized - correctly (see `lecun_normal` initialization) and the number of inputs - is "large enough" (see references for more information). - - # Arguments - x: A tensor or variable to compute the activation function for. - - # Returns - The scaled exponential unit activation: `scale * elu(x, alpha)`. - - # Note - - To be used together with the initialization "lecun_normal". - - To be used together with the dropout variant "AlphaDropout". - - # References - - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) - """ - alpha = 1.6732632423543772848170429916717 - scale = 1.0507009873554804934193349852946 - return scale * K.elu(x, alpha) - - -def softplus(x): - """Softplus activation function. - - # Arguments - x: Input tensor. - - # Returns - The softplus activation: `log(exp(x) + 1)`. - """ - return K.softplus(x) - - -def softsign(x): - """Softsign activation function. - - # Arguments - x: Input tensor. - - # Returns - The softsign activation: `x / (abs(x) + 1)`. - """ - return K.softsign(x) - - -def relu(x, alpha=0., max_value=None, threshold=0.): - """Rectified Linear Unit. - - With default values, it returns element-wise `max(x, 0)`. - - Otherwise, it follows: - `f(x) = max_value` for `x >= max_value`, - `f(x) = x` for `threshold <= x < max_value`, - `f(x) = alpha * (x - threshold)` otherwise. - - # Arguments - x: Input tensor. - alpha: float. Slope of the negative part. Defaults to zero. - max_value: float. Saturation threshold. - threshold: float. Threshold value for thresholded activation. - - # Returns - A tensor. - """ - return K.relu(x, alpha=alpha, max_value=max_value, threshold=threshold) - - -def tanh(x): - """Hyperbolic tangent activation function. - - # Arguments - x: Input tensor. - - # Returns - The hyperbolic activation: - `tanh(x) = (exp(x) - exp(-x)) / (exp(x) + exp(-x))` - - """ - return K.tanh(x) - - -def sigmoid(x): - """Sigmoid activation function. - - # Arguments - x: Input tensor. - - # Returns - The sigmoid activation: `1 / (1 + exp(-x))`. - """ - return K.sigmoid(x) - - -def hard_sigmoid(x): - """Hard sigmoid activation function. - - Faster to compute than sigmoid activation. - - # Arguments - x: Input tensor. - - # Returns - Hard sigmoid activation: - - - `0` if `x < -2.5` - - `1` if `x > 2.5` - - `0.2 * x + 0.5` if `-2.5 <= x <= 2.5`. - """ - return K.hard_sigmoid(x) - - -def exponential(x): - """Exponential (base e) activation function. - - # Arguments - x: Input tensor. - - # Returns - Exponential activation: `exp(x)`. - """ - return K.exp(x) - - -def linear(x): - """Linear (i.e. identity) activation function. - - # Arguments - x: Input tensor. - - # Returns - Input tensor, unchanged. - """ - return x - - -def serialize(activation): - return activation.__name__ - - -def deserialize(name, custom_objects=None): - return deserialize_keras_object( - name, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name='activation function') - - -def get(identifier): - """Get the `identifier` activation function. - - # Arguments - identifier: None or str, name of the function. - - # Returns - The activation function, `linear` if `identifier` is None. - - # Raises - ValueError if unknown identifier - """ - if identifier is None: - return linear - if isinstance(identifier, six.string_types): - identifier = str(identifier) - return deserialize(identifier) - elif callable(identifier): - if isinstance(identifier, Layer): - warnings.warn( - 'Do not pass a layer instance (such as {identifier}) as the ' - 'activation argument of another layer. Instead, advanced ' - 'activation layers should be used just like any other ' - 'layer in a model.'.format( - identifier=identifier.__class__.__name__)) - return identifier - else: - raise ValueError('Could not interpret ' - 'activation function identifier:', identifier) -"""Callbacks: utilities called at certain points during model training. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import csv -import six - -import numpy as np -import time -import json -import warnings -import io - -from collections import deque -from collections import OrderedDict -from collections import Iterable -from collections import defaultdict -from .utils.generic_utils import Progbar -from . import backend as K -from .engine.training_utils import standardize_input_data - -try: - import requests -except ImportError: - requests = None - - -_TRAIN = 'train' -_TEST = 'test' -_PREDICT = 'predict' - - -class CallbackList(object): - """Container abstracting a list of callbacks. - - # Arguments - callbacks: List of `Callback` instances. - queue_length: Queue length for keeping - running statistics over callback execution time. - """ - - def __init__(self, callbacks=None, queue_length=10): - callbacks = callbacks or [] - self.callbacks = [c for c in callbacks] - self.queue_length = queue_length - self.params = {} - self.model = None - self._reset_batch_timing() - - def _reset_batch_timing(self): - self._delta_t_batch = 0. - self._delta_ts = defaultdict( - lambda: deque([], maxlen=self.queue_length)) - - def append(self, callback): - self.callbacks.append(callback) - - def set_params(self, params): - self.params = params - for callback in self.callbacks: - callback.set_params(params) - - def set_model(self, model): - self.model = model - for callback in self.callbacks: - callback.set_model(model) - - def _call_batch_hook(self, mode, hook, batch, logs=None): - """Helper function for all batch_{begin | end} methods.""" - if not self.callbacks: - return - hook_name = 'on_{mode}_batch_{hook}'.format(mode=mode, hook=hook) - if hook == 'end': - if not hasattr(self, '_t_enter_batch'): - self._t_enter_batch = time.time() - # Batch is ending, calculate batch time - self._delta_t_batch = time.time() - self._t_enter_batch - - logs = logs or {} - t_before_callbacks = time.time() - for callback in self.callbacks: - batch_hook = getattr(callback, hook_name) - batch_hook(batch, logs) - self._delta_ts[hook_name].append(time.time() - t_before_callbacks) - - delta_t_median = np.median(self._delta_ts[hook_name]) - if (self._delta_t_batch > 0. and - delta_t_median > 0.95 * self._delta_t_batch and - delta_t_median > 0.1): - warnings.warn( - 'Method (%s) is slow compared ' - 'to the batch update (%f). Check your callbacks.' - % (hook_name, delta_t_median), RuntimeWarning) - - if hook == 'begin': - self._t_enter_batch = time.time() - - def _call_begin_hook(self, mode): - """Helper function for on_{train|test|predict}_begin methods.""" - if mode == _TRAIN: - self.on_train_begin() - elif mode == _TEST: - self.on_test_begin() - else: - self.on_predict_begin() - - def _call_end_hook(self, mode): - """Helper function for on_{train|test|predict}_end methods.""" - if mode == _TRAIN: - self.on_train_end() - elif mode == _TEST: - self.on_test_end() - else: - self.on_predict_end() - - def on_batch_begin(self, batch, logs=None): - self._call_batch_hook(_TRAIN, 'begin', batch, logs=logs) - - def on_batch_end(self, batch, logs=None): - self._call_batch_hook(_TRAIN, 'end', batch, logs=logs) - - def on_epoch_begin(self, epoch, logs=None): - """Calls the `on_epoch_begin` methods of its callbacks. - - This function should only be called during train mode. - - # Arguments - epoch: integer, index of epoch. - logs: dict, Currently no data is passed to this argument for this method - but that may change in the future. - """ - logs = logs or {} - for callback in self.callbacks: - callback.on_epoch_begin(epoch, logs) - self._reset_batch_timing() - - def on_epoch_end(self, epoch, logs=None): - """Calls the `on_epoch_end` methods of its callbacks. - - This function should only be called during train mode. - - # Arguments - epoch: integer, index of epoch. - logs: dict, metric results for this training epoch, and for the - validation epoch if validation is performed. Validation result keys - are prefixed with `val_`. - """ - logs = logs or {} - for callback in self.callbacks: - callback.on_epoch_end(epoch, logs) - - def on_train_batch_begin(self, batch, logs=None): - """Calls the `on_train_batch_begin` methods of its callbacks. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, has keys `batch` and `size` representing the current - batch number and the size of the batch. - """ - self._call_batch_hook(_TRAIN, 'begin', batch, logs=logs) - - def on_train_batch_end(self, batch, logs=None): - """Calls the `on_train_batch_end` methods of its callbacks. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, metric results for this batch. - """ - self._call_batch_hook(_TRAIN, 'end', batch, logs=logs) - - def on_test_batch_begin(self, batch, logs=None): - """Calls the `on_test_batch_begin` methods of its callbacks. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, has keys `batch` and `size` representing the current - batch number and the size of the batch. - """ - self._call_batch_hook(_TEST, 'begin', batch, logs=logs) - - def on_test_batch_end(self, batch, logs=None): - """Calls the `on_test_batch_end` methods of its callbacks. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, metric results for this batch. - """ - self._call_batch_hook(_TEST, 'end', batch, logs=logs) - - def on_predict_batch_begin(self, batch, logs=None): - """Calls the `on_predict_batch_begin` methods of its callbacks. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, has keys `batch` and `size` representing the current - batch number and the size of the batch. - """ - self._call_batch_hook(_PREDICT, 'begin', batch, logs=logs) - - def on_predict_batch_end(self, batch, logs=None): - """Calls the `on_predict_batch_end` methods of its callbacks. - - # Argument - batch: integer, index of batch within the current epoch. - logs: dict, metric results for this batch. - """ - self._call_batch_hook(_PREDICT, 'end', batch, logs=logs) - - def on_train_begin(self, logs=None): - """Calls the `on_train_begin` methods of its callbacks. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - for callback in self.callbacks: - callback.on_train_begin(logs) - - def on_train_end(self, logs=None): - """Calls the `on_train_end` methods of its callbacks. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - for callback in self.callbacks: - callback.on_train_end(logs) - - def on_test_begin(self, logs=None): - """Calls the `on_test_begin` methods of its callbacks. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - for callback in self.callbacks: - callback.on_test_begin(logs) - - def on_test_end(self, logs=None): - """Calls the `on_test_end` methods of its callbacks. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - for callback in self.callbacks: - callback.on_test_end(logs) - - def on_predict_begin(self, logs=None): - """Calls the `on_predict_begin` methods of its callbacks. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - for callback in self.callbacks: - callback.on_predict_begin(logs) - - def on_predict_end(self, logs=None): - """Calls the `on_predict_end` methods of its callbacks. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - for callback in self.callbacks: - callback.on_predict_end(logs) - - def __iter__(self): - return iter(self.callbacks) - - -class Callback(object): - """Abstract base class used to build new callbacks. - - # Properties - params: dict. Training parameters - (eg. verbosity, batch size, number of epochs...). - model: instance of `keras.models.Model`. - Reference of the model being trained. - - The `logs` dictionary that callback methods - take as argument will contain keys for quantities relevant to - the current batch or epoch. - - Currently, the `.fit()` method of the `Sequential` model class - will include the following quantities in the `logs` that - it passes to its callbacks: - - on_epoch_end: logs include `acc` and `loss`, and - optionally include `val_loss` - (if validation is enabled in `fit`), and `val_acc` - (if validation and accuracy monitoring are enabled). - on_batch_begin: logs include `size`, - the number of samples in the current batch. - on_batch_end: logs include `loss`, and optionally `acc` - (if accuracy monitoring is enabled). - """ - - def __init__(self): - self.validation_data = None - self.model = None - - def set_params(self, params): - self.params = params - - def set_model(self, model): - self.model = model - - def on_batch_begin(self, batch, logs=None): - """A backwards compatibility alias for `on_train_batch_begin`.""" - - def on_batch_end(self, batch, logs=None): - """A backwards compatibility alias for `on_train_batch_end`.""" - - def on_epoch_begin(self, epoch, logs=None): - """Called at the start of an epoch. - - Subclasses should override for any actions to run. This function should only - be called during train mode. - - # Arguments - epoch: integer, index of epoch. - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_epoch_end(self, epoch, logs=None): - """Called at the end of an epoch. - - Subclasses should override for any actions to run. This function should only - be called during train mode. - - # Arguments - epoch: integer, index of epoch. - logs: dict, metric results for this training epoch, and for the - validation epoch if validation is performed. Validation result keys - are prefixed with `val_`. - """ - - def on_train_batch_begin(self, batch, logs=None): - """Called at the beginning of a training batch in `fit` methods. - - Subclasses should override for any actions to run. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, has keys `batch` and `size` representing the current - batch number and the size of the batch. - """ - # For backwards compatibility - self.on_batch_begin(batch, logs=logs) - - def on_train_batch_end(self, batch, logs=None): - """Called at the end of a training batch in `fit` methods. - - Subclasses should override for any actions to run. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, metric results for this batch. - """ - # For backwards compatibility - self.on_batch_end(batch, logs=logs) - - def on_test_batch_begin(self, batch, logs=None): - """Called at the beginning of a batch in `evaluate` methods. - - Also called at the beginning of a validation batch in the `fit` methods, - if validation data is provided. - - Subclasses should override for any actions to run. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, has keys `batch` and `size` representing the current - batch number and the size of the batch. - """ - - def on_test_batch_end(self, batch, logs=None): - """Called at the end of a batch in `evaluate` methods. - - Also called at the end of a validation batch in the `fit` methods, - if validation data is provided. - - Subclasses should override for any actions to run. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, metric results for this batch. - """ - - def on_predict_batch_begin(self, batch, logs=None): - """Called at the beginning of a batch in `predict` methods. - - Subclasses should override for any actions to run. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, has keys `batch` and `size` representing the current - batch number and the size of the batch. - """ - - def on_predict_batch_end(self, batch, logs=None): - """Called at the end of a batch in `predict` methods. - - Subclasses should override for any actions to run. - - # Arguments - batch: integer, index of batch within the current epoch. - logs: dict, metric results for this batch. - """ - - def on_train_begin(self, logs=None): - """Called at the beginning of training. - - Subclasses should override for any actions to run. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_train_end(self, logs=None): - """Called at the end of training. - - Subclasses should override for any actions to run. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_test_begin(self, logs=None): - """Called at the beginning of evaluation or validation. - - Subclasses should override for any actions to run. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_test_end(self, logs=None): - """Called at the end of evaluation or validation. - - Subclasses should override for any actions to run. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_predict_begin(self, logs=None): - """Called at the beginning of prediction. - - Subclasses should override for any actions to run. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - - def on_predict_end(self, logs=None): - """Called at the end of prediction. - - Subclasses should override for any actions to run. - - # Arguments - logs: dict, currently no data is passed to this argument for this method - but that may change in the future. - """ - - -class BaseLogger(Callback): - """Callback that accumulates epoch averages of metrics. - - This callback is automatically applied to every Keras model. - - # Arguments - stateful_metrics: Iterable of string names of metrics that - should *not* be averaged over an epoch. - Metrics in this list will be logged as-is in `on_epoch_end`. - All others will be averaged in `on_epoch_end`. - """ - - def __init__(self, stateful_metrics=None): - if stateful_metrics: - self.stateful_metrics = set(stateful_metrics) - else: - self.stateful_metrics = set() - - def on_epoch_begin(self, epoch, logs=None): - self.seen = 0 - self.totals = {} - - def on_batch_end(self, batch, logs=None): - logs = logs or {} - batch_size = logs.get('size', 0) - self.seen += batch_size - - for k, v in logs.items(): - if k in self.stateful_metrics: - self.totals[k] = v - else: - if k in self.totals: - self.totals[k] += v * batch_size - else: - self.totals[k] = v * batch_size - - def on_epoch_end(self, epoch, logs=None): - if logs is not None: - for k in self.params['metrics']: - if k in self.totals: - # Make value available to next callbacks. - if k in self.stateful_metrics: - logs[k] = self.totals[k] - else: - logs[k] = self.totals[k] / self.seen - - -class TerminateOnNaN(Callback): - """Callback that terminates training when a NaN loss is encountered. - """ - - def on_batch_end(self, batch, logs=None): - logs = logs or {} - loss = logs.get('loss') - if loss is not None: - if np.isnan(loss) or np.isinf(loss): - print('Batch %d: Invalid loss, terminating training' % (batch)) - self.model.stop_training = True - - -class ProgbarLogger(Callback): - """Callback that prints metrics to stdout. - - # Arguments - count_mode: One of "steps" or "samples". - Whether the progress bar should - count samples seen or steps (batches) seen. - stateful_metrics: Iterable of string names of metrics that - should *not* be averaged over an epoch. - Metrics in this list will be logged as-is. - All others will be averaged over time (e.g. loss, etc). - - # Raises - ValueError: In case of invalid `count_mode`. - """ - - def __init__(self, count_mode='samples', - stateful_metrics=None): - super(ProgbarLogger, self).__init__() - if count_mode == 'samples': - self.use_steps = False - elif count_mode == 'steps': - self.use_steps = True - else: - raise ValueError('Unknown `count_mode`: ' + str(count_mode)) - if stateful_metrics: - self.stateful_metrics = set(stateful_metrics) - else: - self.stateful_metrics = set() - - def on_train_begin(self, logs=None): - self.verbose = self.params['verbose'] - self.epochs = self.params['epochs'] - - def on_epoch_begin(self, epoch, logs=None): - if self.verbose: - print('Epoch %d/%d' % (epoch + 1, self.epochs)) - if self.use_steps: - target = self.params['steps'] - else: - target = self.params['samples'] - self.target = target - self.progbar = Progbar(target=self.target, - verbose=self.verbose, - stateful_metrics=self.stateful_metrics) - self.seen = 0 - - def on_batch_begin(self, batch, logs=None): - if self.seen < self.target: - self.log_values = [] - - def on_batch_end(self, batch, logs=None): - logs = logs or {} - batch_size = logs.get('size', 0) - if self.use_steps: - self.seen += 1 - else: - self.seen += batch_size - - for k in self.params['metrics']: - if k in logs: - self.log_values.append((k, logs[k])) - - # Skip progbar update for the last batch; - # will be handled by on_epoch_end. - if self.verbose and self.seen < self.target: - self.progbar.update(self.seen, self.log_values) - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - for k in self.params['metrics']: - if k in logs: - self.log_values.append((k, logs[k])) - if self.verbose: - self.progbar.update(self.seen, self.log_values) - - -class History(Callback): - """Callback that records events into a `History` object. - - This callback is automatically applied to - every Keras model. The `History` object - gets returned by the `fit` method of models. - """ - - def on_train_begin(self, logs=None): - self.epoch = [] - self.history = {} - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - self.epoch.append(epoch) - for k, v in logs.items(): - self.history.setdefault(k, []).append(v) - - -class ModelCheckpoint(Callback): - """Save the model after every epoch. - - `filepath` can contain named formatting options, - which will be filled with the values of `epoch` and - keys in `logs` (passed in `on_epoch_end`). - - For example: if `filepath` is `weights.{epoch:02d}-{val_loss:.2f}.hdf5`, - then the model checkpoints will be saved with the epoch number and - the validation loss in the filename. - - # Arguments - filepath: string, path to save the model file. - monitor: quantity to monitor. - verbose: verbosity mode, 0 or 1. - save_best_only: if `save_best_only=True`, - the latest best model according to - the quantity monitored will not be overwritten. - save_weights_only: if True, then only the model's weights will be - saved (`model.save_weights(filepath)`), else the full model - is saved (`model.save(filepath)`). - mode: one of {auto, min, max}. - If `save_best_only=True`, the decision - to overwrite the current save file is made - based on either the maximization or the - minimization of the monitored quantity. For `val_acc`, - this should be `max`, for `val_loss` this should - be `min`, etc. In `auto` mode, the direction is - automatically inferred from the name of the monitored quantity. - period: Interval (number of epochs) between checkpoints. - """ - - def __init__(self, filepath, monitor='val_loss', verbose=0, - save_best_only=False, save_weights_only=False, - mode='auto', period=1): - super(ModelCheckpoint, self).__init__() - self.monitor = monitor - self.verbose = verbose - self.filepath = filepath - self.save_best_only = save_best_only - self.save_weights_only = save_weights_only - self.period = period - self.epochs_since_last_save = 0 - - if mode not in ['auto', 'min', 'max']: - warnings.warn('ModelCheckpoint mode %s is unknown, ' - 'fallback to auto mode.' % (mode), - RuntimeWarning) - mode = 'auto' - - if mode == 'min': - self.monitor_op = np.less - self.best = np.Inf - elif mode == 'max': - self.monitor_op = np.greater - self.best = -np.Inf - else: - if 'acc' in self.monitor or self.monitor.startswith('fmeasure'): - self.monitor_op = np.greater - self.best = -np.Inf - else: - self.monitor_op = np.less - self.best = np.Inf - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - self.epochs_since_last_save += 1 - if self.epochs_since_last_save >= self.period: - self.epochs_since_last_save = 0 - filepath = self.filepath.format(epoch=epoch + 1, **logs) - if self.save_best_only: - current = logs.get(self.monitor) - if current is None: - warnings.warn('Can save best model only with %s available, ' - 'skipping.' % (self.monitor), RuntimeWarning) - else: - if self.monitor_op(current, self.best): - if self.verbose > 0: - print('\nEpoch %05d: %s improved from %0.5f to %0.5f,' - ' saving model to %s' - % (epoch + 1, self.monitor, self.best, - current, filepath)) - self.best = current - if self.save_weights_only: - self.model.save_weights(filepath, overwrite=True) - else: - self.model.save(filepath, overwrite=True) - else: - if self.verbose > 0: - print('\nEpoch %05d: %s did not improve from %0.5f' % - (epoch + 1, self.monitor, self.best)) - else: - if self.verbose > 0: - print('\nEpoch %05d: saving model to %s' % - (epoch + 1, filepath)) - if self.save_weights_only: - self.model.save_weights(filepath, overwrite=True) - else: - self.model.save(filepath, overwrite=True) - - -class EarlyStopping(Callback): - """Stop training when a monitored quantity has stopped improving. - - # Arguments - monitor: quantity to be monitored. - min_delta: minimum change in the monitored quantity - to qualify as an improvement, i.e. an absolute - change of less than min_delta, will count as no - improvement. - patience: number of epochs with no improvement - after which training will be stopped. - verbose: verbosity mode. - mode: one of {auto, min, max}. In `min` mode, - training will stop when the quantity - monitored has stopped decreasing; in `max` - mode it will stop when the quantity - monitored has stopped increasing; in `auto` - mode, the direction is automatically inferred - from the name of the monitored quantity. - baseline: Baseline value for the monitored quantity to reach. - Training will stop if the model doesn't show improvement - over the baseline. - restore_best_weights: whether to restore model weights from - the epoch with the best value of the monitored quantity. - If False, the model weights obtained at the last step of - training are used. - """ - - def __init__(self, - monitor='val_loss', - min_delta=0, - patience=0, - verbose=0, - mode='auto', - baseline=None, - restore_best_weights=False): - super(EarlyStopping, self).__init__() - - self.monitor = monitor - self.baseline = baseline - self.patience = patience - self.verbose = verbose - self.min_delta = min_delta - self.wait = 0 - self.stopped_epoch = 0 - self.restore_best_weights = restore_best_weights - self.best_weights = None - - if mode not in ['auto', 'min', 'max']: - warnings.warn('EarlyStopping mode %s is unknown, ' - 'fallback to auto mode.' % mode, - RuntimeWarning) - mode = 'auto' - - if mode == 'min': - self.monitor_op = np.less - elif mode == 'max': - self.monitor_op = np.greater - else: - if 'acc' in self.monitor: - self.monitor_op = np.greater - else: - self.monitor_op = np.less - - if self.monitor_op == np.greater: - self.min_delta *= 1 - else: - self.min_delta *= -1 - - def on_train_begin(self, logs=None): - # Allow instances to be re-used - self.wait = 0 - self.stopped_epoch = 0 - if self.baseline is not None: - self.best = self.baseline - else: - self.best = np.Inf if self.monitor_op == np.less else -np.Inf - - def on_epoch_end(self, epoch, logs=None): - current = self.get_monitor_value(logs) - if current is None: - return - - if self.monitor_op(current - self.min_delta, self.best): - self.best = current - self.wait = 0 - if self.restore_best_weights: - self.best_weights = self.model.get_weights() - else: - self.wait += 1 - if self.wait >= self.patience: - self.stopped_epoch = epoch - self.model.stop_training = True - if self.restore_best_weights: - if self.verbose > 0: - print('Restoring model weights from the end of ' - 'the best epoch') - self.model.set_weights(self.best_weights) - - def on_train_end(self, logs=None): - if self.stopped_epoch > 0 and self.verbose > 0: - print('Epoch %05d: early stopping' % (self.stopped_epoch + 1)) - - def get_monitor_value(self, logs): - monitor_value = logs.get(self.monitor) - if monitor_value is None: - warnings.warn( - 'Early stopping conditioned on metric `%s` ' - 'which is not available. Available metrics are: %s' % - (self.monitor, ','.join(list(logs.keys()))), RuntimeWarning - ) - return monitor_value - - -class RemoteMonitor(Callback): - """Callback used to stream events to a server. - - Requires the `requests` library. - Events are sent to `root + '/publish/epoch/end/'` by default. Calls are - HTTP POST, with a `data` argument which is a - JSON-encoded dictionary of event data. - If send_as_json is set to True, the content type of the request will be - application/json. Otherwise the serialized JSON will be send within a form - - # Arguments - root: String; root url of the target server. - path: String; path relative to `root` to which the events will be sent. - field: String; JSON field under which the data will be stored. - The field is used only if the payload is sent within a form - (i.e. send_as_json is set to False). - headers: Dictionary; optional custom HTTP headers. - send_as_json: Boolean; whether the request should be send as - application/json. - """ - - def __init__(self, - root='http://localhost:9000', - path='/publish/epoch/end/', - field='data', - headers=None, - send_as_json=False): - super(RemoteMonitor, self).__init__() - - self.root = root - self.path = path - self.field = field - self.headers = headers - self.send_as_json = send_as_json - - def on_epoch_end(self, epoch, logs=None): - if requests is None: - raise ImportError('RemoteMonitor requires ' - 'the `requests` library.') - logs = logs or {} - send = {} - send['epoch'] = epoch - for k, v in logs.items(): - if isinstance(v, (np.ndarray, np.generic)): - send[k] = v.item() - else: - send[k] = v - try: - if self.send_as_json: - requests.post(self.root + self.path, - json=send, headers=self.headers) - else: - requests.post(self.root + self.path, - {self.field: json.dumps(send)}, - headers=self.headers) - except requests.exceptions.RequestException: - warnings.warn('Warning: could not reach RemoteMonitor ' - 'root server at ' + str(self.root)) - - -class LearningRateScheduler(Callback): - """Learning rate scheduler. - - # Arguments - schedule: a function that takes an epoch index as input - (integer, indexed from 0) and current learning rate - and returns a new learning rate as output (float). - verbose: int. 0: quiet, 1: update messages. - """ - - def __init__(self, schedule, verbose=0): - super(LearningRateScheduler, self).__init__() - self.schedule = schedule - self.verbose = verbose - - def on_epoch_begin(self, epoch, logs=None): - if not hasattr(self.model.optimizer, 'lr'): - raise ValueError('Optimizer must have a "lr" attribute.') - lr = float(K.get_value(self.model.optimizer.lr)) - try: # new API - lr = self.schedule(epoch, lr) - except TypeError: # old API for backward compatibility - lr = self.schedule(epoch) - if not isinstance(lr, (float, np.float32, np.float64)): - raise ValueError('The output of the "schedule" function ' - 'should be float.') - K.set_value(self.model.optimizer.lr, lr) - if self.verbose > 0: - print('\nEpoch %05d: LearningRateScheduler setting learning ' - 'rate to %s.' % (epoch + 1, lr)) - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - logs['lr'] = K.get_value(self.model.optimizer.lr) - - -class TensorBoard(Callback): - """TensorBoard basic visualizations. - - [TensorBoard](https://www.tensorflow.org/guide/summaries_and_tensorboard) - is a visualization tool provided with TensorFlow. - - This callback writes a log for TensorBoard, which allows - you to visualize dynamic graphs of your training and test - metrics, as well as activation histograms for the different - layers in your model. - - If you have installed TensorFlow with pip, you should be able - to launch TensorBoard from the command line: - ```sh - tensorboard --logdir=/full_path_to_your_logs - ``` - - When using a backend other than TensorFlow, TensorBoard will still work - (if you have TensorFlow installed), but the only feature available will - be the display of the losses and metrics plots. - - # Arguments - log_dir: the path of the directory where to save the log - files to be parsed by TensorBoard. - histogram_freq: frequency (in epochs) at which to compute activation - and weight histograms for the layers of the model. If set to 0, - histograms won't be computed. Validation data (or split) must be - specified for histogram visualizations. - batch_size: size of batch of inputs to feed to the network - for histograms computation. - write_graph: whether to visualize the graph in TensorBoard. - The log file can become quite large when - write_graph is set to True. - write_grads: whether to visualize gradient histograms in TensorBoard. - `histogram_freq` must be greater than 0. - write_images: whether to write model weights to visualize as - image in TensorBoard. - embeddings_freq: frequency (in epochs) at which selected embedding - layers will be saved. If set to 0, embeddings won't be computed. - Data to be visualized in TensorBoard's Embedding tab must be passed - as `embeddings_data`. - embeddings_layer_names: a list of names of layers to keep eye on. If - None or empty list all the embedding layer will be watched. - embeddings_metadata: a dictionary which maps layer name to a file name - in which metadata for this embedding layer is saved. See the - [details](https://www.tensorflow.org/guide/embedding#metadata) - about metadata files format. In case if the same metadata file is - used for all embedding layers, string can be passed. - embeddings_data: data to be embedded at layers specified in - `embeddings_layer_names`. Numpy array (if the model has a single - input) or list of Numpy arrays (if the model has multiple inputs). - Learn [more about embeddings]( - https://www.tensorflow.org/guide/embedding). - update_freq: `'batch'` or `'epoch'` or integer. When using `'batch'`, writes - the losses and metrics to TensorBoard after each batch. The same - applies for `'epoch'`. If using an integer, let's say `10000`, - the callback will write the metrics and losses to TensorBoard every - 10000 samples. Note that writing too frequently to TensorBoard - can slow down your training. - """ - - def __init__(self, log_dir='./logs', - histogram_freq=0, - batch_size=32, - write_graph=True, - write_grads=False, - write_images=False, - embeddings_freq=0, - embeddings_layer_names=None, - embeddings_metadata=None, - embeddings_data=None, - update_freq='epoch'): - super(TensorBoard, self).__init__() - global tf, projector - try: - import tensorflow as tf - from tensorflow.contrib.tensorboard.plugins import projector - except ImportError: - raise ImportError('You need the TensorFlow module installed to ' - 'use TensorBoard.') - - if K.backend() != 'tensorflow': - if histogram_freq != 0: - warnings.warn('You are not using the TensorFlow backend. ' - 'histogram_freq was set to 0') - histogram_freq = 0 - if write_graph: - warnings.warn('You are not using the TensorFlow backend. ' - 'write_graph was set to False') - write_graph = False - if write_images: - warnings.warn('You are not using the TensorFlow backend. ' - 'write_images was set to False') - write_images = False - if embeddings_freq != 0: - warnings.warn('You are not using the TensorFlow backend. ' - 'embeddings_freq was set to 0') - embeddings_freq = 0 - - self.log_dir = log_dir - self.histogram_freq = histogram_freq - self.merged = None - self.write_graph = write_graph - self.write_grads = write_grads - self.write_images = write_images - self.embeddings_freq = embeddings_freq - self.embeddings_layer_names = embeddings_layer_names - self.embeddings_metadata = embeddings_metadata or {} - self.batch_size = batch_size - self.embeddings_data = embeddings_data - if update_freq == 'batch': - # It is the same as writing as frequently as possible. - self.update_freq = 1 - else: - self.update_freq = update_freq - self.samples_seen = 0 - self.samples_seen_at_last_write = 0 - - def set_model(self, model): - self.model = model - if K.backend() == 'tensorflow': - self.sess = K.get_session() - if self.histogram_freq and self.merged is None: - for layer in self.model.layers: - for weight in layer.weights: - mapped_weight_name = weight.name.replace(':', '_') - tf.summary.histogram(mapped_weight_name, weight) - if self.write_grads and weight in layer.trainable_weights: - grads = model.optimizer.get_gradients(model.total_loss, - weight) - - def is_indexed_slices(grad): - return type(grad).__name__ == 'IndexedSlices' - grads = [ - grad.values if is_indexed_slices(grad) else grad - for grad in grads] - tf.summary.histogram('{}_grad'.format(mapped_weight_name), - grads) - if self.write_images: - w_img = tf.squeeze(weight) - shape = K.int_shape(w_img) - if len(shape) == 2: # dense layer kernel case - if shape[0] > shape[1]: - w_img = tf.transpose(w_img) - shape = K.int_shape(w_img) - w_img = tf.reshape(w_img, [1, - shape[0], - shape[1], - 1]) - elif len(shape) == 3: # convnet case - if K.image_data_format() == 'channels_last': - # switch to channels_first to display - # every kernel as a separate image - w_img = tf.transpose(w_img, perm=[2, 0, 1]) - shape = K.int_shape(w_img) - w_img = tf.reshape(w_img, [shape[0], - shape[1], - shape[2], - 1]) - elif len(shape) == 1: # bias case - w_img = tf.reshape(w_img, [1, - shape[0], - 1, - 1]) - else: - # not possible to handle 3D convnets etc. - continue - - shape = K.int_shape(w_img) - assert len(shape) == 4 and shape[-1] in [1, 3, 4] - tf.summary.image(mapped_weight_name, w_img) - - if hasattr(layer, 'output'): - if isinstance(layer.output, list): - for i, output in enumerate(layer.output): - tf.summary.histogram('{}_out_{}'.format(layer.name, i), - output) - else: - tf.summary.histogram('{}_out'.format(layer.name), - layer.output) - self.merged = tf.summary.merge_all() - - if self.write_graph: - self.writer = tf.summary.FileWriter(self.log_dir, - self.sess.graph) - else: - self.writer = tf.summary.FileWriter(self.log_dir) - - if self.embeddings_freq and self.embeddings_data is not None: - self.embeddings_data = standardize_input_data(self.embeddings_data, - model.input_names) - - embeddings_layer_names = self.embeddings_layer_names - - if not embeddings_layer_names: - embeddings_layer_names = [layer.name for layer in self.model.layers - if type(layer).__name__ == 'Embedding'] - self.assign_embeddings = [] - embeddings_vars = {} - - self.batch_id = batch_id = tf.placeholder(tf.int32) - self.step = step = tf.placeholder(tf.int32) - - for layer in self.model.layers: - if layer.name in embeddings_layer_names: - embedding_input = self.model.get_layer(layer.name).output - embedding_size = np.prod(embedding_input.shape[1:]) - embedding_input = tf.reshape(embedding_input, - (step, int(embedding_size))) - shape = (self.embeddings_data[0].shape[0], int( - embedding_size)) - embedding = tf.Variable(tf.zeros(shape), - name=layer.name + '_embedding') - embeddings_vars[layer.name] = embedding - batch = tf.assign(embedding[batch_id:batch_id + step], - embedding_input) - self.assign_embeddings.append(batch) - - self.saver = tf.train.Saver(list(embeddings_vars.values())) - - if not isinstance(self.embeddings_metadata, str): - embeddings_metadata = self.embeddings_metadata - else: - embeddings_metadata = {layer_name: self.embeddings_metadata - for layer_name in embeddings_vars.keys()} - - config = projector.ProjectorConfig() - - for layer_name, tensor in embeddings_vars.items(): - embedding = config.embeddings.add() - embedding.tensor_name = tensor.name - - if layer_name in embeddings_metadata: - embedding.metadata_path = embeddings_metadata[layer_name] - - projector.visualize_embeddings(self.writer, config) - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - - if not self.validation_data and self.histogram_freq: - raise ValueError("If printing histograms, validation_data must be " - "provided, and cannot be a generator.") - if self.embeddings_data is None and self.embeddings_freq: - raise ValueError("To visualize embeddings, embeddings_data must " - "be provided.") - if self.validation_data and self.histogram_freq: - if epoch % self.histogram_freq == 0: - - val_data = self.validation_data - tensors = (self.model.inputs + - self.model.targets + - self.model.sample_weights) - - if self.model.uses_learning_phase: - tensors += [K.learning_phase()] - - assert len(val_data) == len(tensors) - val_size = val_data[0].shape[0] - i = 0 - while i < val_size: - step = min(self.batch_size, val_size - i) - if self.model.uses_learning_phase: - # do not slice the learning phase - batch_val = [x[i:i + step] for x in val_data[:-1]] - batch_val.append(val_data[-1]) - else: - batch_val = [x[i:i + step] for x in val_data] - assert len(batch_val) == len(tensors) - feed_dict = dict(zip(tensors, batch_val)) - result = self.sess.run([self.merged], feed_dict=feed_dict) - summary_str = result[0] - self.writer.add_summary(summary_str, epoch) - i += self.batch_size - - if self.embeddings_freq and self.embeddings_data is not None: - if epoch % self.embeddings_freq == 0: - # We need a second forward-pass here because we're passing - # the `embeddings_data` explicitly. This design allows to pass - # arbitrary data as `embeddings_data` and results from the fact - # that we need to know the size of the `tf.Variable`s which - # hold the embeddings in `set_model`. At this point, however, - # the `validation_data` is not yet set. - - # More details in this discussion: - # https://github.com/keras-team/keras/pull/7766#issuecomment-329195622 - - embeddings_data = self.embeddings_data - n_samples = embeddings_data[0].shape[0] - - i = 0 - while i < n_samples: - step = min(self.batch_size, n_samples - i) - batch = slice(i, i + step) - - if type(self.model.input) == list: - feed_dict = {_input: embeddings_data[idx][batch] - for idx, _input in enumerate(self.model.input)} - else: - feed_dict = { - self.model.input: embeddings_data[0][batch]} - - feed_dict.update({self.batch_id: i, self.step: step}) - - if self.model.uses_learning_phase: - feed_dict[K.learning_phase()] = False - - self.sess.run(self.assign_embeddings, feed_dict=feed_dict) - self.saver.save(self.sess, - os.path.join(self.log_dir, - 'keras_embedding.ckpt'), - epoch) - - i += self.batch_size - - if self.update_freq == 'epoch': - index = epoch - else: - index = self.samples_seen - self._write_logs(logs, index) - - def _write_logs(self, logs, index): - for name, value in logs.items(): - if name in ['batch', 'size']: - continue - summary = tf.Summary() - summary_value = summary.value.add() - if isinstance(value, np.ndarray): - summary_value.simple_value = value.item() - else: - summary_value.simple_value = value - summary_value.tag = name - self.writer.add_summary(summary, index) - self.writer.flush() - - def on_train_end(self, _): - self.writer.close() - - def on_batch_end(self, batch, logs=None): - if self.update_freq != 'epoch': - self.samples_seen += logs['size'] - samples_seen_since = self.samples_seen - self.samples_seen_at_last_write - if samples_seen_since >= self.update_freq: - self._write_logs(logs, self.samples_seen) - self.samples_seen_at_last_write = self.samples_seen - - -class ReduceLROnPlateau(Callback): - """Reduce learning rate when a metric has stopped improving. - - Models often benefit from reducing the learning rate by a factor - of 2-10 once learning stagnates. This callback monitors a - quantity and if no improvement is seen for a 'patience' number - of epochs, the learning rate is reduced. - - # Example - - ```python - reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.2, - patience=5, min_lr=0.001) - model.fit(X_train, Y_train, callbacks=[reduce_lr]) - ``` - - # Arguments - monitor: quantity to be monitored. - factor: factor by which the learning rate will - be reduced. new_lr = lr * factor - patience: number of epochs with no improvement - after which learning rate will be reduced. - verbose: int. 0: quiet, 1: update messages. - mode: one of {auto, min, max}. In `min` mode, - lr will be reduced when the quantity - monitored has stopped decreasing; in `max` - mode it will be reduced when the quantity - monitored has stopped increasing; in `auto` - mode, the direction is automatically inferred - from the name of the monitored quantity. - min_delta: threshold for measuring the new optimum, - to only focus on significant changes. - cooldown: number of epochs to wait before resuming - normal operation after lr has been reduced. - min_lr: lower bound on the learning rate. - """ - - def __init__(self, monitor='val_loss', factor=0.1, patience=10, - verbose=0, mode='auto', min_delta=1e-4, cooldown=0, min_lr=0, - **kwargs): - super(ReduceLROnPlateau, self).__init__() - - self.monitor = monitor - if factor >= 1.0: - raise ValueError('ReduceLROnPlateau ' - 'does not support a factor >= 1.0.') - if 'epsilon' in kwargs: - min_delta = kwargs.pop('epsilon') - warnings.warn('`epsilon` argument is deprecated and ' - 'will be removed, use `min_delta` instead.') - self.factor = factor - self.min_lr = min_lr - self.min_delta = min_delta - self.patience = patience - self.verbose = verbose - self.cooldown = cooldown - self.cooldown_counter = 0 # Cooldown counter. - self.wait = 0 - self.best = 0 - self.mode = mode - self.monitor_op = None - self._reset() - - def _reset(self): - """Resets wait counter and cooldown counter. - """ - if self.mode not in ['auto', 'min', 'max']: - warnings.warn('Learning Rate Plateau Reducing mode %s is unknown, ' - 'fallback to auto mode.' % (self.mode), - RuntimeWarning) - self.mode = 'auto' - if (self.mode == 'min' or - (self.mode == 'auto' and 'acc' not in self.monitor)): - self.monitor_op = lambda a, b: np.less(a, b - self.min_delta) - self.best = np.Inf - else: - self.monitor_op = lambda a, b: np.greater(a, b + self.min_delta) - self.best = -np.Inf - self.cooldown_counter = 0 - self.wait = 0 - - def on_train_begin(self, logs=None): - self._reset() - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - logs['lr'] = K.get_value(self.model.optimizer.lr) - current = logs.get(self.monitor) - if current is None: - warnings.warn( - 'Reduce LR on plateau conditioned on metric `%s` ' - 'which is not available. Available metrics are: %s' % - (self.monitor, ','.join(list(logs.keys()))), RuntimeWarning - ) - - else: - if self.in_cooldown(): - self.cooldown_counter -= 1 - self.wait = 0 - - if self.monitor_op(current, self.best): - self.best = current - self.wait = 0 - elif not self.in_cooldown(): - self.wait += 1 - if self.wait >= self.patience: - old_lr = float(K.get_value(self.model.optimizer.lr)) - if old_lr > self.min_lr: - new_lr = old_lr * self.factor - new_lr = max(new_lr, self.min_lr) - K.set_value(self.model.optimizer.lr, new_lr) - if self.verbose > 0: - print('\nEpoch %05d: ReduceLROnPlateau reducing ' - 'learning rate to %s.' % (epoch + 1, new_lr)) - self.cooldown_counter = self.cooldown - self.wait = 0 - - def in_cooldown(self): - return self.cooldown_counter > 0 - - -class CSVLogger(Callback): - """Callback that streams epoch results to a csv file. - - Supports all values that can be represented as a string, - including 1D iterables such as np.ndarray. - - # Example - - ```python - csv_logger = CSVLogger('training.log') - model.fit(X_train, Y_train, callbacks=[csv_logger]) - ``` - - # Arguments - filename: filename of the csv file, e.g. 'run/log.csv'. - separator: string used to separate elements in the csv file. - append: True: append if file exists (useful for continuing - training). False: overwrite existing file, - """ - - def __init__(self, filename, separator=',', append=False): - self.sep = separator - self.filename = filename - self.append = append - self.writer = None - self.keys = None - self.append_header = True - if six.PY2: - self.file_flags = 'b' - self._open_args = {} - else: - self.file_flags = '' - self._open_args = {'newline': '\n'} - super(CSVLogger, self).__init__() - - def on_train_begin(self, logs=None): - if self.append: - if os.path.exists(self.filename): - with open(self.filename, 'r' + self.file_flags) as f: - self.append_header = not bool(len(f.readline())) - mode = 'a' - else: - mode = 'w' - self.csv_file = io.open(self.filename, - mode + self.file_flags, - **self._open_args) - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - - def handle_value(k): - is_zero_dim_ndarray = isinstance(k, np.ndarray) and k.ndim == 0 - if isinstance(k, six.string_types): - return k - elif isinstance(k, Iterable) and not is_zero_dim_ndarray: - return '"[%s]"' % (', '.join(map(str, k))) - else: - return k - - if self.keys is None: - self.keys = sorted(logs.keys()) - - if self.model.stop_training: - # We set NA so that csv parsers do not fail for this last epoch. - logs = dict([(k, logs[k] if k in logs else 'NA') - for k in self.keys]) - - if not self.writer: - class CustomDialect(csv.excel): - delimiter = self.sep - fieldnames = ['epoch'] + self.keys - if six.PY2: - fieldnames = [unicode(x) for x in fieldnames] - self.writer = csv.DictWriter(self.csv_file, - fieldnames=fieldnames, - dialect=CustomDialect) - if self.append_header: - self.writer.writeheader() - - row_dict = OrderedDict({'epoch': epoch}) - row_dict.update((key, handle_value(logs[key])) for key in self.keys) - self.writer.writerow(row_dict) - self.csv_file.flush() - - def on_train_end(self, logs=None): - self.csv_file.close() - self.writer = None - - -class LambdaCallback(Callback): - r"""Callback for creating simple, custom callbacks on-the-fly. - - This callback is constructed with anonymous functions that will be called - at the appropriate time. Note that the callbacks expects positional - arguments, as: - - - `on_epoch_begin` and `on_epoch_end` expect two positional arguments: - `epoch`, `logs` - - `on_batch_begin` and `on_batch_end` expect two positional arguments: - `batch`, `logs` - - `on_train_begin` and `on_train_end` expect one positional argument: - `logs` - - # Arguments - on_epoch_begin: called at the beginning of every epoch. - on_epoch_end: called at the end of every epoch. - on_batch_begin: called at the beginning of every batch. - on_batch_end: called at the end of every batch. - on_train_begin: called at the beginning of model training. - on_train_end: called at the end of model training. - - # Example - - ```python - # Print the batch number at the beginning of every batch. - batch_print_callback = LambdaCallback( - on_batch_begin=lambda batch,logs: print(batch)) - - # Stream the epoch loss to a file in JSON format. The file content - # is not well-formed JSON but rather has a JSON object per line. - import json - json_log = open('loss_log.json', mode='wt', buffering=1) - json_logging_callback = LambdaCallback( - on_epoch_end=lambda epoch, logs: json_log.write( - json.dumps({'epoch': epoch, 'loss': logs['loss']}) + '\n'), - on_train_end=lambda logs: json_log.close() - ) - - # Terminate some processes after having finished model training. - processes = ... - cleanup_callback = LambdaCallback( - on_train_end=lambda logs: [ - p.terminate() for p in processes if p.is_alive()]) - - model.fit(..., - callbacks=[batch_print_callback, - json_logging_callback, - cleanup_callback]) - ``` - """ - - def __init__(self, - on_epoch_begin=None, - on_epoch_end=None, - on_batch_begin=None, - on_batch_end=None, - on_train_begin=None, - on_train_end=None, - **kwargs): - super(LambdaCallback, self).__init__() - self.__dict__.update(kwargs) - if on_epoch_begin is not None: - self.on_epoch_begin = on_epoch_begin - else: - self.on_epoch_begin = lambda epoch, logs: None - if on_epoch_end is not None: - self.on_epoch_end = on_epoch_end - else: - self.on_epoch_end = lambda epoch, logs: None - if on_batch_begin is not None: - self.on_batch_begin = on_batch_begin - else: - self.on_batch_begin = lambda batch, logs: None - if on_batch_end is not None: - self.on_batch_end = on_batch_end - else: - self.on_batch_end = lambda batch, logs: None - if on_train_begin is not None: - self.on_train_begin = on_train_begin - else: - self.on_train_begin = lambda logs: None - if on_train_end is not None: - self.on_train_end = on_train_end - else: - self.on_train_end = lambda logs: None -"""Constraints: functions that impose constraints on weight values. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -from . import backend as K -from .utils.generic_utils import serialize_keras_object -from .utils.generic_utils import deserialize_keras_object - - -class Constraint(object): - - def __call__(self, w): - return w - - def get_config(self): - return {} - - -class MaxNorm(Constraint): - """MaxNorm weight constraint. - - Constrains the weights incident to each hidden unit - to have a norm less than or equal to a desired value. - - # Arguments - max_value: the maximum norm for the incoming weights. - axis: integer, axis along which to calculate weight norms. - For instance, in a `Dense` layer the weight matrix - has shape `(input_dim, output_dim)`, - set `axis` to `0` to constrain each weight vector - of length `(input_dim,)`. - In a `Conv2D` layer with `data_format="channels_last"`, - the weight tensor has shape - `(rows, cols, input_depth, output_depth)`, - set `axis` to `[0, 1, 2]` - to constrain the weights of each filter tensor of size - `(rows, cols, input_depth)`. - - # References - - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting]( - http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) - """ - - def __init__(self, max_value=2, axis=0): - self.max_value = max_value - self.axis = axis - - def __call__(self, w): - norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)) - desired = K.clip(norms, 0, self.max_value) - w *= (desired / (K.epsilon() + norms)) - return w - - def get_config(self): - return {'max_value': self.max_value, - 'axis': self.axis} - - -class NonNeg(Constraint): - """Constrains the weights to be non-negative. - """ - - def __call__(self, w): - w *= K.cast(K.greater_equal(w, 0.), K.floatx()) - return w - - -class UnitNorm(Constraint): - """Constrains the weights incident to each hidden unit to have unit norm. - - # Arguments - axis: integer, axis along which to calculate weight norms. - For instance, in a `Dense` layer the weight matrix - has shape `(input_dim, output_dim)`, - set `axis` to `0` to constrain each weight vector - of length `(input_dim,)`. - In a `Conv2D` layer with `data_format="channels_last"`, - the weight tensor has shape - `(rows, cols, input_depth, output_depth)`, - set `axis` to `[0, 1, 2]` - to constrain the weights of each filter tensor of size - `(rows, cols, input_depth)`. - """ - - def __init__(self, axis=0): - self.axis = axis - - def __call__(self, w): - return w / (K.epsilon() + K.sqrt(K.sum(K.square(w), - axis=self.axis, - keepdims=True))) - - def get_config(self): - return {'axis': self.axis} - - -class MinMaxNorm(Constraint): - """MinMaxNorm weight constraint. - - Constrains the weights incident to each hidden unit - to have the norm between a lower bound and an upper bound. - - # Arguments - min_value: the minimum norm for the incoming weights. - max_value: the maximum norm for the incoming weights. - rate: rate for enforcing the constraint: weights will be - rescaled to yield - `(1 - rate) * norm + rate * norm.clip(min_value, max_value)`. - Effectively, this means that rate=1.0 stands for strict - enforcement of the constraint, while rate<1.0 means that - weights will be rescaled at each step to slowly move - towards a value inside the desired interval. - axis: integer, axis along which to calculate weight norms. - For instance, in a `Dense` layer the weight matrix - has shape `(input_dim, output_dim)`, - set `axis` to `0` to constrain each weight vector - of length `(input_dim,)`. - In a `Conv2D` layer with `data_format="channels_last"`, - the weight tensor has shape - `(rows, cols, input_depth, output_depth)`, - set `axis` to `[0, 1, 2]` - to constrain the weights of each filter tensor of size - `(rows, cols, input_depth)`. - """ - - def __init__(self, min_value=0.0, max_value=1.0, rate=1.0, axis=0): - self.min_value = min_value - self.max_value = max_value - self.rate = rate - self.axis = axis - - def __call__(self, w): - norms = K.sqrt(K.sum(K.square(w), axis=self.axis, keepdims=True)) - desired = (self.rate * K.clip(norms, self.min_value, self.max_value) + - (1 - self.rate) * norms) - w *= (desired / (K.epsilon() + norms)) - return w - - def get_config(self): - return {'min_value': self.min_value, - 'max_value': self.max_value, - 'rate': self.rate, - 'axis': self.axis} - - -# Aliases. - -max_norm = MaxNorm -non_neg = NonNeg -unit_norm = UnitNorm -min_max_norm = MinMaxNorm - - -# Legacy aliases. -maxnorm = max_norm -nonneg = non_neg -unitnorm = unit_norm - - -def serialize(constraint): - return serialize_keras_object(constraint) - - -def deserialize(config, custom_objects=None): - return deserialize_keras_object(config, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name='constraint') - - -def get(identifier): - if identifier is None: - return None - if isinstance(identifier, dict): - return deserialize(identifier) - elif isinstance(identifier, six.string_types): - config = {'class_name': str(identifier), 'config': {}} - return deserialize(config) - elif callable(identifier): - return identifier - else: - raise ValueError('Could not interpret constraint identifier: ' + - str(identifier)) -"""Built-in weight initializers. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import six -from . import backend as K -from .utils.generic_utils import serialize_keras_object -from .utils.generic_utils import deserialize_keras_object - - -class Initializer(object): - """Initializer base class: all initializers inherit from this class. - """ - - def __call__(self, shape, dtype=None): - raise NotImplementedError - - def get_config(self): - return {} - - @classmethod - def from_config(cls, config): - if 'dtype' in config: - # Initializers saved from `tf.keras` - # may contain an unused `dtype` argument. - config.pop('dtype') - return cls(**config) - - -class Zeros(Initializer): - """Initializer that generates tensors initialized to 0. - """ - - def __call__(self, shape, dtype=None): - return K.constant(0, shape=shape, dtype=dtype) - - -class Ones(Initializer): - """Initializer that generates tensors initialized to 1. - """ - - def __call__(self, shape, dtype=None): - return K.constant(1, shape=shape, dtype=dtype) - - -class Constant(Initializer): - """Initializer that generates tensors initialized to a constant value. - - # Arguments - value: float; the value of the generator tensors. - """ - - def __init__(self, value=0): - self.value = value - - def __call__(self, shape, dtype=None): - return K.constant(self.value, shape=shape, dtype=dtype) - - def get_config(self): - return {'value': self.value} - - -class RandomNormal(Initializer): - """Initializer that generates tensors with a normal distribution. - - # Arguments - mean: a python scalar or a scalar tensor. Mean of the random values - to generate. - stddev: a python scalar or a scalar tensor. Standard deviation of the - random values to generate. - seed: A Python integer. Used to seed the random generator. - """ - - def __init__(self, mean=0., stddev=0.05, seed=None): - self.mean = mean - self.stddev = stddev - self.seed = seed - - def __call__(self, shape, dtype=None): - return K.random_normal(shape, self.mean, self.stddev, - dtype=dtype, seed=self.seed) - - def get_config(self): - return { - 'mean': self.mean, - 'stddev': self.stddev, - 'seed': self.seed - } - - -class RandomUniform(Initializer): - """Initializer that generates tensors with a uniform distribution. - - # Arguments - minval: A python scalar or a scalar tensor. Lower bound of the range - of random values to generate. - maxval: A python scalar or a scalar tensor. Upper bound of the range - of random values to generate. Defaults to 1 for float types. - seed: A Python integer. Used to seed the random generator. - """ - - def __init__(self, minval=-0.05, maxval=0.05, seed=None): - self.minval = minval - self.maxval = maxval - self.seed = seed - - def __call__(self, shape, dtype=None): - return K.random_uniform(shape, self.minval, self.maxval, - dtype=dtype, seed=self.seed) - - def get_config(self): - return { - 'minval': self.minval, - 'maxval': self.maxval, - 'seed': self.seed, - } - - -class TruncatedNormal(Initializer): - """Initializer that generates a truncated normal distribution. - - These values are similar to values from a `RandomNormal` - except that values more than two standard deviations from the mean - are discarded and redrawn. This is the recommended initializer for - neural network weights and filters. - - # Arguments - mean: a python scalar or a scalar tensor. Mean of the random values - to generate. - stddev: a python scalar or a scalar tensor. Standard deviation of the - random values to generate. - seed: A Python integer. Used to seed the random generator. - """ - - def __init__(self, mean=0., stddev=0.05, seed=None): - self.mean = mean - self.stddev = stddev - self.seed = seed - - def __call__(self, shape, dtype=None): - return K.truncated_normal(shape, self.mean, self.stddev, - dtype=dtype, seed=self.seed) - - def get_config(self): - return { - 'mean': self.mean, - 'stddev': self.stddev, - 'seed': self.seed - } - - -class VarianceScaling(Initializer): - """Initializer capable of adapting its scale to the shape of weights. - - With `distribution="normal"`, samples are drawn from a truncated normal - distribution centered on zero, with `stddev = sqrt(scale / n)` where n is: - - - number of input units in the weight tensor, if mode = "fan_in" - - number of output units, if mode = "fan_out" - - average of the numbers of input and output units, if mode = "fan_avg" - - With `distribution="uniform"`, - samples are drawn from a uniform distribution - within [-limit, limit], with `limit = sqrt(3 * scale / n)`. - - # Arguments - scale: Scaling factor (positive float). - mode: One of "fan_in", "fan_out", "fan_avg". - distribution: Random distribution to use. One of "normal", "uniform". - seed: A Python integer. Used to seed the random generator. - - # Raises - ValueError: In case of an invalid value for the "scale", mode" or - "distribution" arguments. - """ - - def __init__(self, scale=1.0, - mode='fan_in', - distribution='normal', - seed=None): - if scale <= 0.: - raise ValueError('`scale` must be a positive float. Got:', scale) - mode = mode.lower() - if mode not in {'fan_in', 'fan_out', 'fan_avg'}: - raise ValueError('Invalid `mode` argument: ' - 'expected on of {"fan_in", "fan_out", "fan_avg"} ' - 'but got', mode) - distribution = distribution.lower() - if distribution not in {'normal', 'uniform'}: - raise ValueError('Invalid `distribution` argument: ' - 'expected one of {"normal", "uniform"} ' - 'but got', distribution) - self.scale = scale - self.mode = mode - self.distribution = distribution - self.seed = seed - - def __call__(self, shape, dtype=None): - fan_in, fan_out = _compute_fans(shape) - scale = self.scale - if self.mode == 'fan_in': - scale /= max(1., fan_in) - elif self.mode == 'fan_out': - scale /= max(1., fan_out) - else: - scale /= max(1., float(fan_in + fan_out) / 2) - if self.distribution == 'normal': - # 0.879... = scipy.stats.truncnorm.std(a=-2, b=2, loc=0., scale=1.) - stddev = np.sqrt(scale) / .87962566103423978 - return K.truncated_normal(shape, 0., stddev, - dtype=dtype, seed=self.seed) - else: - limit = np.sqrt(3. * scale) - return K.random_uniform(shape, -limit, limit, - dtype=dtype, seed=self.seed) - - def get_config(self): - return { - 'scale': self.scale, - 'mode': self.mode, - 'distribution': self.distribution, - 'seed': self.seed - } - - -class Orthogonal(Initializer): - """Initializer that generates a random orthogonal matrix. - - # Arguments - gain: Multiplicative factor to apply to the orthogonal matrix. - seed: A Python integer. Used to seed the random generator. - - # References - - [Exact solutions to the nonlinear dynamics of learning in deep - linear neural networks](http://arxiv.org/abs/1312.6120) - """ - - def __init__(self, gain=1., seed=None): - self.gain = gain - self.seed = seed - - def __call__(self, shape, dtype=None): - num_rows = 1 - for dim in shape[:-1]: - num_rows *= dim - num_cols = shape[-1] - flat_shape = (num_rows, num_cols) - rng = np.random - if self.seed is not None: - rng = np.random.RandomState(self.seed) - a = rng.normal(0.0, 1.0, flat_shape) - u, _, v = np.linalg.svd(a, full_matrices=False) - # Pick the one with the correct shape. - q = u if u.shape == flat_shape else v - q = q.reshape(shape) - return self.gain * q[:shape[0], :shape[1]] - - def get_config(self): - return { - 'gain': self.gain, - 'seed': self.seed - } - - -class Identity(Initializer): - """Initializer that generates the identity matrix. - - Only use for 2D matrices. - If the desired matrix is not square, it pads with zeros on the - additional rows/columns - - # Arguments - gain: Multiplicative factor to apply to the identity matrix. - """ - - def __init__(self, gain=1.): - self.gain = gain - - def __call__(self, shape, dtype=None): - if len(shape) != 2: - raise ValueError( - 'Identity matrix initializer can only be used for 2D matrices.') - - return self.gain * K.eye((shape[0], shape[1]), dtype=dtype) - - def get_config(self): - return { - 'gain': self.gain - } - - -def lecun_uniform(seed=None): - """LeCun uniform initializer. - - It draws samples from a uniform distribution within [-limit, limit] - where `limit` is `sqrt(3 / fan_in)` - where `fan_in` is the number of input units in the weight tensor. - - # Arguments - seed: A Python integer. Used to seed the random generator. - - # Returns - An initializer. - - # References - - [Efficient BackProp](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf) - """ - return VarianceScaling(scale=1., - mode='fan_in', - distribution='uniform', - seed=seed) - - -def glorot_normal(seed=None): - """Glorot normal initializer, also called Xavier normal initializer. - - It draws samples from a truncated normal distribution centered on 0 - with `stddev = sqrt(2 / (fan_in + fan_out))` - where `fan_in` is the number of input units in the weight tensor - and `fan_out` is the number of output units in the weight tensor. - - # Arguments - seed: A Python integer. Used to seed the random generator. - - # Returns - An initializer. - - # References - - [Understanding the difficulty of training deep feedforward neural - networks](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf) - """ - return VarianceScaling(scale=1., - mode='fan_avg', - distribution='normal', - seed=seed) - - -def glorot_uniform(seed=None): - """Glorot uniform initializer, also called Xavier uniform initializer. - - It draws samples from a uniform distribution within [-limit, limit] - where `limit` is `sqrt(6 / (fan_in + fan_out))` - where `fan_in` is the number of input units in the weight tensor - and `fan_out` is the number of output units in the weight tensor. - - # Arguments - seed: A Python integer. Used to seed the random generator. - - # Returns - An initializer. - - # References - - [Understanding the difficulty of training deep feedforward neural - networks](http://jmlr.org/proceedings/papers/v9/glorot10a/glorot10a.pdf) - """ - return VarianceScaling(scale=1., - mode='fan_avg', - distribution='uniform', - seed=seed) - - -def he_normal(seed=None): - """He normal initializer. - - It draws samples from a truncated normal distribution centered on 0 - with `stddev = sqrt(2 / fan_in)` - where `fan_in` is the number of input units in the weight tensor. - - # Arguments - seed: A Python integer. Used to seed the random generator. - - # Returns - An initializer. - - # References - - [Delving Deep into Rectifiers: Surpassing Human-Level Performance on - ImageNet Classification](http://arxiv.org/abs/1502.01852) - """ - return VarianceScaling(scale=2., - mode='fan_in', - distribution='normal', - seed=seed) - - -def lecun_normal(seed=None): - """LeCun normal initializer. - - It draws samples from a truncated normal distribution centered on 0 - with `stddev = sqrt(1 / fan_in)` - where `fan_in` is the number of input units in the weight tensor. - - # Arguments - seed: A Python integer. Used to seed the random generator. - - # Returns - An initializer. - - # References - - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) - - [Efficient Backprop](http://yann.lecun.com/exdb/publis/pdf/lecun-98b.pdf) - """ - return VarianceScaling(scale=1., - mode='fan_in', - distribution='normal', - seed=seed) - - -def he_uniform(seed=None): - """He uniform variance scaling initializer. - - It draws samples from a uniform distribution within [-limit, limit] - where `limit` is `sqrt(6 / fan_in)` - where `fan_in` is the number of input units in the weight tensor. - - # Arguments - seed: A Python integer. Used to seed the random generator. - - # Returns - An initializer. - - # References - - [Delving Deep into Rectifiers: Surpassing Human-Level Performance on - ImageNet Classification](http://arxiv.org/abs/1502.01852) - """ - return VarianceScaling(scale=2., - mode='fan_in', - distribution='uniform', - seed=seed) - - -# Compatibility aliases - -zero = zeros = Zeros -one = ones = Ones -constant = Constant -uniform = random_uniform = RandomUniform -normal = random_normal = RandomNormal -truncated_normal = TruncatedNormal -identity = Identity -orthogonal = Orthogonal - -# Utility functions - - -def _compute_fans(shape, data_format='channels_last'): - """Computes the number of input and output units for a weight shape. - - # Arguments - shape: Integer shape tuple. - data_format: Image data format to use for convolution kernels. - Note that all kernels in Keras are standardized on the - `channels_last` ordering (even when inputs are set - to `channels_first`). - - # Returns - A tuple of scalars, `(fan_in, fan_out)`. - - # Raises - ValueError: in case of invalid `data_format` argument. - """ - if len(shape) == 2: - fan_in = shape[0] - fan_out = shape[1] - elif len(shape) in {3, 4, 5}: - # Assuming convolution kernels (1D, 2D or 3D). - # TH kernel shape: (depth, input_depth, ...) - # TF kernel shape: (..., input_depth, depth) - if data_format == 'channels_first': - receptive_field_size = np.prod(shape[2:]) - fan_in = shape[1] * receptive_field_size - fan_out = shape[0] * receptive_field_size - elif data_format == 'channels_last': - receptive_field_size = np.prod(shape[:-2]) - fan_in = shape[-2] * receptive_field_size - fan_out = shape[-1] * receptive_field_size - else: - raise ValueError('Invalid data_format: ' + data_format) - else: - # No specific assumptions. - fan_in = np.sqrt(np.prod(shape)) - fan_out = np.sqrt(np.prod(shape)) - return fan_in, fan_out - - -def serialize(initializer): - return serialize_keras_object(initializer) - - -def deserialize(config, custom_objects=None): - return deserialize_keras_object(config, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name='initializer') - - -def get(identifier): - if isinstance(identifier, dict): - return deserialize(identifier) - elif isinstance(identifier, six.string_types): - config = {'class_name': str(identifier), 'config': {}} - return deserialize(config) - elif callable(identifier): - return identifier - else: - raise ValueError('Could not interpret initializer identifier: ' + - str(identifier)) -"""Built-in loss functions. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -from . import backend as K -from .utils.generic_utils import deserialize_keras_object -from .utils.generic_utils import serialize_keras_object - - -def mean_squared_error(y_true, y_pred): - return K.mean(K.square(y_pred - y_true), axis=-1) - - -def mean_absolute_error(y_true, y_pred): - return K.mean(K.abs(y_pred - y_true), axis=-1) - - -def mean_absolute_percentage_error(y_true, y_pred): - diff = K.abs((y_true - y_pred) / K.clip(K.abs(y_true), - K.epsilon(), - None)) - return 100. * K.mean(diff, axis=-1) - - -def mean_squared_logarithmic_error(y_true, y_pred): - first_log = K.log(K.clip(y_pred, K.epsilon(), None) + 1.) - second_log = K.log(K.clip(y_true, K.epsilon(), None) + 1.) - return K.mean(K.square(first_log - second_log), axis=-1) - - -def squared_hinge(y_true, y_pred): - return K.mean(K.square(K.maximum(1. - y_true * y_pred, 0.)), axis=-1) - - -def hinge(y_true, y_pred): - return K.mean(K.maximum(1. - y_true * y_pred, 0.), axis=-1) - - -def categorical_hinge(y_true, y_pred): - pos = K.sum(y_true * y_pred, axis=-1) - neg = K.max((1. - y_true) * y_pred, axis=-1) - return K.maximum(0., neg - pos + 1.) - - -def logcosh(y_true, y_pred): - """Logarithm of the hyperbolic cosine of the prediction error. - - `log(cosh(x))` is approximately equal to `(x ** 2) / 2` for small `x` and - to `abs(x) - log(2)` for large `x`. This means that 'logcosh' works mostly - like the mean squared error, but will not be so strongly affected by the - occasional wildly incorrect prediction. - - # Arguments - y_true: tensor of true targets. - y_pred: tensor of predicted targets. - - # Returns - Tensor with one scalar loss entry per sample. - """ - def _logcosh(x): - return x + K.softplus(-2. * x) - K.log(2.) - return K.mean(_logcosh(y_pred - y_true), axis=-1) - - -def categorical_crossentropy(y_true, y_pred): - return K.categorical_crossentropy(y_true, y_pred) - - -def sparse_categorical_crossentropy(y_true, y_pred): - return K.sparse_categorical_crossentropy(y_true, y_pred) - - -def binary_crossentropy(y_true, y_pred): - return K.mean(K.binary_crossentropy(y_true, y_pred), axis=-1) - - -def kullback_leibler_divergence(y_true, y_pred): - y_true = K.clip(y_true, K.epsilon(), 1) - y_pred = K.clip(y_pred, K.epsilon(), 1) - return K.sum(y_true * K.log(y_true / y_pred), axis=-1) - - -def poisson(y_true, y_pred): - return K.mean(y_pred - y_true * K.log(y_pred + K.epsilon()), axis=-1) - - -def cosine_proximity(y_true, y_pred): - y_true = K.l2_normalize(y_true, axis=-1) - y_pred = K.l2_normalize(y_pred, axis=-1) - return -K.sum(y_true * y_pred, axis=-1) - - -# Aliases. - -mse = MSE = mean_squared_error -mae = MAE = mean_absolute_error -mape = MAPE = mean_absolute_percentage_error -msle = MSLE = mean_squared_logarithmic_error -kld = KLD = kullback_leibler_divergence -cosine = cosine_proximity - - -def serialize(loss): - return serialize_keras_object(loss) - - -def deserialize(name, custom_objects=None): - return deserialize_keras_object(name, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name='loss function') - - -def get(identifier): - """Get the `identifier` loss function. - - # Arguments - identifier: None or str, name of the function. - - # Returns - The loss function or None if `identifier` is None. - - # Raises - ValueError if unknown identifier. - """ - if identifier is None: - return None - if isinstance(identifier, six.string_types): - identifier = str(identifier) - return deserialize(identifier) - if isinstance(identifier, dict): - return deserialize(identifier) - elif callable(identifier): - return identifier - else: - raise ValueError('Could not interpret ' - 'loss function identifier:', identifier) -"""Built-in metrics. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -from . import backend as K -from .losses import mean_squared_error -from .losses import mean_absolute_error -from .losses import mean_absolute_percentage_error -from .losses import mean_squared_logarithmic_error -from .losses import hinge -from .losses import logcosh -from .losses import squared_hinge -from .losses import categorical_crossentropy -from .losses import sparse_categorical_crossentropy -from .losses import binary_crossentropy -from .losses import kullback_leibler_divergence -from .losses import poisson -from .losses import cosine_proximity -from .utils.generic_utils import deserialize_keras_object -from .utils.generic_utils import serialize_keras_object - - -def binary_accuracy(y_true, y_pred): - return K.mean(K.equal(y_true, K.round(y_pred)), axis=-1) - - -def categorical_accuracy(y_true, y_pred): - return K.cast(K.equal(K.argmax(y_true, axis=-1), - K.argmax(y_pred, axis=-1)), - K.floatx()) - - -def sparse_categorical_accuracy(y_true, y_pred): - # reshape in case it's in shape (num_samples, 1) instead of (num_samples,) - if K.ndim(y_true) == K.ndim(y_pred): - y_true = K.squeeze(y_true, -1) - # convert dense predictions to labels - y_pred_labels = K.argmax(y_pred, axis=-1) - y_pred_labels = K.cast(y_pred_labels, K.floatx()) - return K.cast(K.equal(y_true, y_pred_labels), K.floatx()) - - -def top_k_categorical_accuracy(y_true, y_pred, k=5): - return K.cast(K.in_top_k(y_pred, K.argmax(y_true, axis=-1), k), K.floatx()) - - -def sparse_top_k_categorical_accuracy(y_true, y_pred, k=5): - # If the shape of y_true is (num_samples, 1), flatten to (num_samples,) - return K.cast(K.in_top_k(y_pred, K.cast(K.flatten(y_true), 'int32'), k), - K.floatx()) - - -# Aliases - -mse = MSE = mean_squared_error -mae = MAE = mean_absolute_error -mape = MAPE = mean_absolute_percentage_error -msle = MSLE = mean_squared_logarithmic_error -cosine = cosine_proximity - - -def serialize(metric): - return serialize_keras_object(metric) - - -def deserialize(config, custom_objects=None): - return deserialize_keras_object(config, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name='metric function') - - -def get(identifier): - if isinstance(identifier, dict): - config = {'class_name': str(identifier), 'config': {}} - return deserialize(config) - elif isinstance(identifier, six.string_types): - return deserialize(str(identifier)) - elif callable(identifier): - return identifier - else: - raise ValueError('Could not interpret ' - 'metric function identifier:', identifier) -"""Model-related utilities. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from . import backend as K -from .utils.generic_utils import has_arg -from .utils.generic_utils import to_list -from .engine.input_layer import Input -from .engine.input_layer import InputLayer -from .engine.training import Model -from .engine.sequential import Sequential -from .engine.saving import save_model -from .engine.saving import load_model -from .engine.saving import model_from_config -from .engine.saving import model_from_yaml -from .engine.saving import model_from_json - -try: - import h5py -except ImportError: - h5py = None - - -def _clone_functional_model(model, input_tensors=None): - """Clone a functional `Model` instance. - - Model cloning is similar to calling a model on new inputs, - except that it creates new layers (and thus new weights) instead - of sharing the weights of the existing layers. - - # Arguments - model: Instance of `Model`. - input_tensors: optional list of input tensors - to build the model upon. If not provided, - placeholders will be created. - - # Returns - An instance of `Model` reproducing the behavior - of the original model, on top of new inputs tensors, - using newly instantiated weights. - - # Raises - ValueError: in case of invalid `model` argument value. - """ - if not isinstance(model, Model): - raise ValueError('Expected `model` argument ' - 'to be a `Model` instance, got ', model) - if isinstance(model, Sequential): - raise ValueError('Expected `model` argument ' - 'to be a functional `Model` instance, ' - 'got a `Sequential` instance instead:', model) - - layer_map = {} # Cache for created layers. - tensor_map = {} # Map {reference_tensor: (corresponding_tensor, mask)} - if input_tensors is None: - # Create placeholders to build the model on top of. - input_layers = [] - input_tensors = [] - for layer in model._input_layers: - input_tensor = Input(batch_shape=layer.batch_input_shape, - dtype=layer.dtype, - sparse=layer.sparse, - name=layer.name) - input_tensors.append(input_tensor) - # Cache newly created input layer. - newly_created_input_layer = input_tensor._keras_history[0] - layer_map[layer] = newly_created_input_layer - for _original, _cloned in zip(model._input_layers, input_layers): - layer_map[_original] = _cloned - else: - # Make sure that all input tensors come from a Keras layer. - # If tensor comes from an input layer: cache the input layer. - input_tensors = to_list(input_tensors) - _input_tensors = [] - for i, x in enumerate(input_tensors): - if not K.is_keras_tensor(x): - name = model._input_layers[i].name - input_tensor = Input(tensor=x, - name='input_wrapper_for_' + name) - _input_tensors.append(input_tensor) - # Cache newly created input layer. - original_input_layer = x._keras_history[0] - newly_created_input_layer = input_tensor._keras_history[0] - layer_map[original_input_layer] = newly_created_input_layer - else: - _input_tensors.append(x) - input_tensors = _input_tensors - - for x, y in zip(model.inputs, input_tensors): - tensor_map[x] = (y, None) # tensor, mask - - # Iterated over every node in the reference model, in depth order. - depth_keys = list(model._nodes_by_depth.keys()) - depth_keys.sort(reverse=True) - for depth in depth_keys: - nodes = model._nodes_by_depth[depth] - for node in nodes: - # Recover the corresponding layer. - layer = node.outbound_layer - - # Get or create layer. - if layer not in layer_map: - # Clone layer. - new_layer = layer.__class__.from_config(layer.get_config()) - layer_map[layer] = new_layer - layer = new_layer - else: - # Reuse previously cloned layer. - layer = layer_map[layer] - # Don't call InputLayer multiple times. - if isinstance(layer, InputLayer): - continue - - # Gather inputs to call the new layer. - reference_input_tensors = node.input_tensors - reference_output_tensors = node.output_tensors - - # If all previous input tensors are available in tensor_map, - # then call node.inbound_layer on them. - computed_data = [] # List of tuples (input, mask). - for x in reference_input_tensors: - if x in tensor_map: - computed_data.append(tensor_map[x]) - - if len(computed_data) == len(reference_input_tensors): - # Call layer. - if node.arguments: - kwargs = node.arguments - else: - kwargs = {} - if len(computed_data) == 1: - computed_tensor, computed_mask = computed_data[0] - if has_arg(layer.call, 'mask'): - if 'mask' not in kwargs: - kwargs['mask'] = computed_mask - output_tensors = to_list( - layer(computed_tensor, **kwargs)) - if layer.supports_masking: - output_masks = to_list( - layer.compute_mask(computed_tensor, - computed_mask)) - else: - output_masks = [None] * len(output_tensors) - computed_tensors = [computed_tensor] - computed_masks = [computed_mask] - else: - computed_tensors = [x[0] for x in computed_data] - computed_masks = [x[1] for x in computed_data] - if has_arg(layer.call, 'mask'): - if 'mask' not in kwargs: - kwargs['mask'] = computed_masks - output_tensors = to_list( - layer(computed_tensors, **kwargs)) - if layer.supports_masking: - output_masks = to_list( - layer.compute_mask(computed_tensors, - computed_masks)) - else: - output_masks = [None] * len(output_tensors) - # Update tensor_map. - for x, y, mask in zip(reference_output_tensors, - output_tensors, - output_masks): - tensor_map[x] = (y, mask) - - # Check that we did compute the model outputs, - # then instantiate a new model from inputs and outputs. - output_tensors = [] - for x in model.outputs: - assert x in tensor_map, 'Could not compute output ' + str(x) - tensor, _ = tensor_map[x] - output_tensors.append(tensor) - return Model(input_tensors, output_tensors, name=model.name) - - -def _clone_sequential_model(model, input_tensors=None): - """Clone a `Sequential` model instance. - - Model cloning is similar to calling a model on new inputs, - except that it creates new layers (and thus new weights) instead - of sharing the weights of the existing layers. - - # Arguments - model: Instance of `Sequential`. - input_tensors: optional list of input tensors - to build the model upon. If not provided, - placeholders will be created. - - # Returns - An instance of `Sequential` reproducing the behavior - of the original model, on top of new inputs tensors, - using newly instantiated weights. - - # Raises - ValueError: in case of invalid `model` argument value. - """ - if not isinstance(model, Sequential): - raise ValueError('Expected `model` argument ' - 'to be a `Sequential` model instance, ' - 'but got:', model) - - def clone(layer): - return layer.__class__.from_config(layer.get_config()) - - layers = [clone(layer) for layer in model.layers] - if input_tensors is None: - return Sequential(layers=layers, name=model.name) - else: - if len(to_list(input_tensors)) != 1: - raise ValueError('To clone a `Sequential` model, we expect ' - ' at most one tensor ' - 'as part of `input_tensors`.') - x = to_list(input_tensors)[0] - if K.is_keras_tensor(x): - origin_layer = x._keras_history[0] - if isinstance(origin_layer, InputLayer): - return Sequential(layers=[origin_layer] + layers, - name=model.name) - else: - raise ValueError('Cannot clone a `Sequential` model on top ' - 'of a tensor that comes from a Keras layer ' - 'other than an `InputLayer`. ' - 'Use the functional API instead.') - input_tensor = Input(tensor=x, - name='input_wrapper_for_' + str(x.name)) - input_layer = input_tensor._keras_history[0] - return Sequential(layers=[input_layer] + layers, name=model.name) - - -def clone_model(model, input_tensors=None): - """Clone any `Model` instance. - - Model cloning is similar to calling a model on new inputs, - except that it creates new layers (and thus new weights) instead - of sharing the weights of the existing layers. - - # Arguments - model: Instance of `Model` - (could be a functional model or a Sequential model). - input_tensors: optional list of input tensors - to build the model upon. If not provided, - placeholders will be created. - - # Returns - An instance of `Model` reproducing the behavior - of the original model, on top of new inputs tensors, - using newly instantiated weights. - - # Raises - ValueError: in case of invalid `model` argument value. - """ - if isinstance(model, Sequential): - return _clone_sequential_model(model, input_tensors=input_tensors) - else: - return _clone_functional_model(model, input_tensors=input_tensors) -"""Legacy objectives module. - -Only kept for backwards API compatibility. -""" -from __future__ import absolute_import -from .losses import * -"""Built-in optimizer classes. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import copy -from six.moves import zip - -from . import backend as K -from .utils.generic_utils import serialize_keras_object -from .utils.generic_utils import deserialize_keras_object -from .legacy import interfaces - -if K.backend() == 'tensorflow': - import tensorflow as tf - - -def clip_norm(g, c, n): - """Clip the gradient `g` if the L2 norm `n` exceeds `c`. - - # Arguments - g: Tensor, the gradient tensor - c: float >= 0. Gradients will be clipped - when their L2 norm exceeds this value. - n: Tensor, actual norm of `g`. - - # Returns - Tensor, the gradient clipped if required. - """ - if c <= 0: # if clipnorm == 0 no need to add ops to the graph - return g - - # tf require using a special op to multiply IndexedSliced by scalar - if K.backend() == 'tensorflow': - condition = n >= c - then_expression = tf.scalar_mul(c / n, g) - else_expression = g - - # saving the shape to avoid converting sparse tensor to dense - if isinstance(then_expression, tf.Tensor): - g_shape = copy.copy(then_expression.get_shape()) - elif isinstance(then_expression, tf.IndexedSlices): - g_shape = copy.copy(then_expression.dense_shape) - if condition.dtype != tf.bool: - condition = tf.cast(condition, 'bool') - g = tf.cond(condition, - lambda: then_expression, - lambda: else_expression) - if isinstance(then_expression, tf.Tensor): - g.set_shape(g_shape) - elif isinstance(then_expression, tf.IndexedSlices): - g._dense_shape = g_shape - else: - g = K.switch(K.greater_equal(n, c), g * c / n, g) - return g - - -class Optimizer(object): - """Abstract optimizer base class. - - Note: this is the parent class of all optimizers, not an actual optimizer - that can be used for training models. - - All Keras optimizers support the following keyword arguments: - - clipnorm: float >= 0. Gradients will be clipped - when their L2 norm exceeds this value. - clipvalue: float >= 0. Gradients will be clipped - when their absolute value exceeds this value. - """ - - def __init__(self, **kwargs): - allowed_kwargs = {'clipnorm', 'clipvalue'} - for k in kwargs: - if k not in allowed_kwargs: - raise TypeError('Unexpected keyword argument ' - 'passed to optimizer: ' + str(k)) - self.__dict__.update(kwargs) - self.updates = [] - self.weights = [] - - @interfaces.legacy_get_updates_support - def get_updates(self, loss, params): - raise NotImplementedError - - def get_gradients(self, loss, params): - grads = K.gradients(loss, params) - if None in grads: - raise ValueError('An operation has `None` for gradient. ' - 'Please make sure that all of your ops have a ' - 'gradient defined (i.e. are differentiable). ' - 'Common ops without gradient: ' - 'K.argmax, K.round, K.eval.') - if hasattr(self, 'clipnorm') and self.clipnorm > 0: - norm = K.sqrt(sum([K.sum(K.square(g)) for g in grads])) - grads = [clip_norm(g, self.clipnorm, norm) for g in grads] - if hasattr(self, 'clipvalue') and self.clipvalue > 0: - grads = [K.clip(g, -self.clipvalue, self.clipvalue) for g in grads] - return grads - - def set_weights(self, weights): - """Sets the weights of the optimizer, from Numpy arrays. - - Should only be called after computing the gradients - (otherwise the optimizer has no weights). - - # Arguments - weights: a list of Numpy arrays. The number - of arrays and their shape must match - number of the dimensions of the weights - of the optimizer (i.e. it should match the - output of `get_weights`). - - # Raises - ValueError: in case of incompatible weight shapes. - """ - params = self.weights - if len(params) != len(weights): - raise ValueError('Length of the specified weight list (' + - str(len(weights)) + - ') does not match the number of weights ' + - 'of the optimizer (' + str(len(params)) + ')') - weight_value_tuples = [] - param_values = K.batch_get_value(params) - for pv, p, w in zip(param_values, params, weights): - if pv.shape != w.shape: - raise ValueError('Optimizer weight shape ' + - str(pv.shape) + - ' not compatible with ' - 'provided weight shape ' + str(w.shape)) - weight_value_tuples.append((p, w)) - K.batch_set_value(weight_value_tuples) - - def get_weights(self): - """Returns the current value of the weights of the optimizer. - - # Returns - A list of numpy arrays. - """ - return K.batch_get_value(self.weights) - - def get_config(self): - config = {} - if hasattr(self, 'clipnorm'): - config['clipnorm'] = self.clipnorm - if hasattr(self, 'clipvalue'): - config['clipvalue'] = self.clipvalue - return config - - @classmethod - def from_config(cls, config): - return cls(**config) - - -class SGD(Optimizer): - """Stochastic gradient descent optimizer. - - Includes support for momentum, - learning rate decay, and Nesterov momentum. - - # Arguments - lr: float >= 0. Learning rate. - momentum: float >= 0. Parameter that accelerates SGD - in the relevant direction and dampens oscillations. - decay: float >= 0. Learning rate decay over each update. - nesterov: boolean. Whether to apply Nesterov momentum. - """ - - def __init__(self, lr=0.01, momentum=0., decay=0., - nesterov=False, **kwargs): - super(SGD, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.lr = K.variable(lr, name='lr') - self.momentum = K.variable(momentum, name='momentum') - self.decay = K.variable(decay, name='decay') - self.initial_decay = decay - self.nesterov = nesterov - - @interfaces.legacy_get_updates_support - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - # momentum - shapes = [K.int_shape(p) for p in params] - moments = [K.zeros(shape) for shape in shapes] - self.weights = [self.iterations] + moments - for p, g, m in zip(params, grads, moments): - v = self.momentum * m - lr * g # velocity - self.updates.append(K.update(m, v)) - - if self.nesterov: - new_p = p + self.momentum * v - lr * g - else: - new_p = p + v - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'momentum': float(K.get_value(self.momentum)), - 'decay': float(K.get_value(self.decay)), - 'nesterov': self.nesterov} - base_config = super(SGD, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class RMSprop(Optimizer): - """RMSProp optimizer. - - It is recommended to leave the parameters of this optimizer - at their default values - (except the learning rate, which can be freely tuned). - - # Arguments - lr: float >= 0. Learning rate. - rho: float >= 0. - epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - - # References - - [rmsprop: Divide the gradient by a running average of its recent magnitude - ](http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) - """ - - def __init__(self, lr=0.001, rho=0.9, epsilon=None, decay=0., - **kwargs): - super(RMSprop, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.lr = K.variable(lr, name='lr') - self.rho = K.variable(rho, name='rho') - self.decay = K.variable(decay, name='decay') - self.iterations = K.variable(0, dtype='int64', name='iterations') - if epsilon is None: - epsilon = K.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - - @interfaces.legacy_get_updates_support - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - accumulators = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) - for p in params] - self.weights = accumulators - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - for p, g, a in zip(params, grads, accumulators): - # update accumulator - new_a = self.rho * a + (1. - self.rho) * K.square(g) - self.updates.append(K.update(a, new_a)) - new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'rho': float(K.get_value(self.rho)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon} - base_config = super(RMSprop, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Adagrad(Optimizer): - """Adagrad optimizer. - - Adagrad is an optimizer with parameter-specific learning rates, - which are adapted relative to how frequently a parameter gets - updated during training. The more updates a parameter receives, - the smaller the learning rate. - - It is recommended to leave the parameters of this optimizer - at their default values. - - # Arguments - lr: float >= 0. Initial learning rate. - epsilon: float >= 0. If `None`, defaults to `K.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - - # References - - [Adaptive Subgradient Methods for Online Learning and Stochastic - Optimization](http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) - """ - - def __init__(self, lr=0.01, epsilon=None, decay=0., **kwargs): - super(Adagrad, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.lr = K.variable(lr, name='lr') - self.decay = K.variable(decay, name='decay') - self.iterations = K.variable(0, dtype='int64', name='iterations') - if epsilon is None: - epsilon = K.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - - @interfaces.legacy_get_updates_support - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - shapes = [K.int_shape(p) for p in params] - accumulators = [K.zeros(shape) for shape in shapes] - self.weights = accumulators - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - for p, g, a in zip(params, grads, accumulators): - new_a = a + K.square(g) # update accumulator - self.updates.append(K.update(a, new_a)) - new_p = p - lr * g / (K.sqrt(new_a) + self.epsilon) - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon} - base_config = super(Adagrad, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Adadelta(Optimizer): - """Adadelta optimizer. - - Adadelta is a more robust extension of Adagrad - that adapts learning rates based on a moving window of gradient updates, - instead of accumulating all past gradients. This way, Adadelta continues - learning even when many updates have been done. Compared to Adagrad, in the - original version of Adadelta you don't have to set an initial learning - rate. In this version, initial learning rate and decay factor can - be set, as in most other Keras optimizers. - - It is recommended to leave the parameters of this optimizer - at their default values. - - # Arguments - lr: float >= 0. Initial learning rate, defaults to 1. - It is recommended to leave it at the default value. - rho: float >= 0. Adadelta decay factor, corresponding to fraction of - gradient to keep at each time step. - epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. - decay: float >= 0. Initial learning rate decay. - - # References - - [Adadelta - an adaptive learning rate method]( - https://arxiv.org/abs/1212.5701) - """ - - def __init__(self, lr=1.0, rho=0.95, epsilon=None, decay=0., - **kwargs): - super(Adadelta, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.lr = K.variable(lr, name='lr') - self.decay = K.variable(decay, name='decay') - self.iterations = K.variable(0, dtype='int64', name='iterations') - if epsilon is None: - epsilon = K.epsilon() - self.rho = rho - self.epsilon = epsilon - self.initial_decay = decay - - @interfaces.legacy_get_updates_support - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - shapes = [K.int_shape(p) for p in params] - accumulators = [K.zeros(shape) for shape in shapes] - delta_accumulators = [K.zeros(shape) for shape in shapes] - self.weights = accumulators + delta_accumulators - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - for p, g, a, d_a in zip(params, grads, accumulators, delta_accumulators): - # update accumulator - new_a = self.rho * a + (1. - self.rho) * K.square(g) - self.updates.append(K.update(a, new_a)) - - # use the new accumulator and the *old* delta_accumulator - update = g * K.sqrt(d_a + self.epsilon) / \ - K.sqrt(new_a + self.epsilon) - new_p = p - lr * update - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - - # update delta_accumulator - new_d_a = self.rho * d_a + (1 - self.rho) * K.square(update) - self.updates.append(K.update(d_a, new_d_a)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'rho': self.rho, - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon} - base_config = super(Adadelta, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Adam(Optimizer): - """Adam optimizer. - - Default parameters follow those provided in the original paper. - - # Arguments - lr: float >= 0. Learning rate. - beta_1: float, 0 < beta < 1. Generally close to 1. - beta_2: float, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - amsgrad: boolean. Whether to apply the AMSGrad variant of this - algorithm from the paper "On the Convergence of Adam and - Beyond". - - # References - - [Adam - A Method for Stochastic Optimization]( - https://arxiv.org/abs/1412.6980v8) - - [On the Convergence of Adam and Beyond]( - https://openreview.net/forum?id=ryQu7f-RZ) - """ - - def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, - epsilon=None, decay=0., amsgrad=False, **kwargs): - super(Adam, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') - self.decay = K.variable(decay, name='decay') - if epsilon is None: - epsilon = K.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - self.amsgrad = amsgrad - - @interfaces.legacy_get_updates_support - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - t = K.cast(self.iterations, K.floatx()) + 1 - lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / - (1. - K.pow(self.beta_1, t))) - - ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - if self.amsgrad: - vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - else: - vhats = [K.zeros(1) for _ in params] - self.weights = [self.iterations] + ms + vs + vhats - - for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): - m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) - if self.amsgrad: - vhat_t = K.maximum(vhat, v_t) - p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) - self.updates.append(K.update(vhat, vhat_t)) - else: - p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(v, v_t)) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'beta_1': float(K.get_value(self.beta_1)), - 'beta_2': float(K.get_value(self.beta_2)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad} - base_config = super(Adam, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Adamax(Optimizer): - """Adamax optimizer from Adam paper's Section 7. - - It is a variant of Adam based on the infinity norm. - Default parameters follow those provided in the paper. - - # Arguments - lr: float >= 0. Learning rate. - beta_1: float, 0 < beta < 1. Generally close to 1. - beta_2: float, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - - # References - - [Adam - A Method for Stochastic Optimization]( - https://arxiv.org/abs/1412.6980v8) - """ - - def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, - epsilon=None, decay=0., **kwargs): - super(Adamax, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') - self.decay = K.variable(decay, name='decay') - if epsilon is None: - epsilon = K.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - - @interfaces.legacy_get_updates_support - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - t = K.cast(self.iterations, K.floatx()) + 1 - lr_t = lr / (1. - K.pow(self.beta_1, t)) - - shapes = [K.int_shape(p) for p in params] - # zero init of 1st moment - ms = [K.zeros(shape) for shape in shapes] - # zero init of exponentially weighted infinity norm - us = [K.zeros(shape) for shape in shapes] - self.weights = [self.iterations] + ms + us - - for p, g, m, u in zip(params, grads, ms, us): - - m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - u_t = K.maximum(self.beta_2 * u, K.abs(g)) - p_t = p - lr_t * m_t / (u_t + self.epsilon) - - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(u, u_t)) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'beta_1': float(K.get_value(self.beta_1)), - 'beta_2': float(K.get_value(self.beta_2)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon} - base_config = super(Adamax, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Nadam(Optimizer): - """Nesterov Adam optimizer. - - Much like Adam is essentially RMSprop with momentum, - Nadam is RMSprop with Nesterov momentum. - - Default parameters follow those provided in the paper. - It is recommended to leave the parameters of this optimizer - at their default values. - - # Arguments - lr: float >= 0. Learning rate. - beta_1: float, 0 < beta < 1. Generally close to 1. - beta_2: float, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. - schedule_decay: float, 0 < schedule_decay < 1. - - # References - - [Nadam report](http://cs229.stanford.edu/proj2015/054_report.pdf) - - [On the importance of initialization and momentum in deep learning]( - http://www.cs.toronto.edu/~fritz/absps/momentum.pdf) - """ - - def __init__(self, lr=0.002, beta_1=0.9, beta_2=0.999, - epsilon=None, schedule_decay=0.004, **kwargs): - super(Nadam, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.m_schedule = K.variable(1., name='m_schedule') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') - if epsilon is None: - epsilon = K.epsilon() - self.epsilon = epsilon - self.schedule_decay = schedule_decay - - @interfaces.legacy_get_updates_support - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - t = K.cast(self.iterations, K.floatx()) + 1 - - # Due to the recommendations in [2], i.e. warming momentum schedule - momentum_cache_t = self.beta_1 * (1. - 0.5 * ( - K.pow(K.cast_to_floatx(0.96), t * self.schedule_decay))) - momentum_cache_t_1 = self.beta_1 * (1. - 0.5 * ( - K.pow(K.cast_to_floatx(0.96), (t + 1) * self.schedule_decay))) - m_schedule_new = self.m_schedule * momentum_cache_t - m_schedule_next = self.m_schedule * momentum_cache_t * momentum_cache_t_1 - self.updates.append((self.m_schedule, m_schedule_new)) - - shapes = [K.int_shape(p) for p in params] - ms = [K.zeros(shape) for shape in shapes] - vs = [K.zeros(shape) for shape in shapes] - - self.weights = [self.iterations] + ms + vs - - for p, g, m, v in zip(params, grads, ms, vs): - # the following equations given in [1] - g_prime = g / (1. - m_schedule_new) - m_t = self.beta_1 * m + (1. - self.beta_1) * g - m_t_prime = m_t / (1. - m_schedule_next) - v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g) - v_t_prime = v_t / (1. - K.pow(self.beta_2, t)) - m_t_bar = (1. - momentum_cache_t) * g_prime + ( - momentum_cache_t_1 * m_t_prime) - - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(v, v_t)) - - p_t = p - self.lr * m_t_bar / (K.sqrt(v_t_prime) + self.epsilon) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'beta_1': float(K.get_value(self.beta_1)), - 'beta_2': float(K.get_value(self.beta_2)), - 'epsilon': self.epsilon, - 'schedule_decay': self.schedule_decay} - base_config = super(Nadam, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class TFOptimizer(Optimizer): - """Wrapper class for native TensorFlow optimizers. - - # Arguments - optimizer: Selected optimizer - """ - - def __init__(self, optimizer): - self.optimizer = optimizer - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - - @interfaces.legacy_get_updates_support - def get_updates(self, loss, params): - grads = self.optimizer.compute_gradients(loss, var_list=params) - self.updates = [K.update_add(self.iterations, 1)] - opt_update = self.optimizer.apply_gradients( - grads, global_step=self.iterations) - self.updates.append(opt_update) - return self.updates - - @property - def weights(self): - raise NotImplementedError - - def get_config(self): - raise NotImplementedError - - def from_config(self, config): - raise NotImplementedError - - -# Aliases. - -sgd = SGD -rmsprop = RMSprop -adagrad = Adagrad -adadelta = Adadelta -adam = Adam -adamax = Adamax -nadam = Nadam - - -def serialize(optimizer): - return serialize_keras_object(optimizer) - - -def deserialize(config, custom_objects=None): - """Inverse of the `serialize` function. - - # Arguments - config: Optimizer configuration dictionary. - custom_objects: Optional dictionary mapping - names (strings) to custom objects - (classes and functions) - to be considered during deserialization. - - # Returns - A Keras Optimizer instance. - """ - all_classes = { - 'sgd': SGD, - 'rmsprop': RMSprop, - 'adagrad': Adagrad, - 'adadelta': Adadelta, - 'adam': Adam, - 'adamax': Adamax, - 'nadam': Nadam, - 'tfoptimizer': TFOptimizer, - } - # Make deserialization case-insensitive for built-in optimizers. - if config['class_name'].lower() in all_classes: - config['class_name'] = config['class_name'].lower() - return deserialize_keras_object(config, - module_objects=all_classes, - custom_objects=custom_objects, - printable_module_name='optimizer') - - -def get(identifier): - """Retrieves a Keras Optimizer instance. - - # Arguments - identifier: Optimizer identifier, one of - - String: name of an optimizer - - Dictionary: configuration dictionary. - - Keras Optimizer instance (it will be returned unchanged). - - TensorFlow Optimizer instance - (it will be wrapped as a Keras Optimizer). - - # Returns - A Keras Optimizer instance. - - # Raises - ValueError: If `identifier` cannot be interpreted. - """ - if K.backend() == 'tensorflow': - # Wrap TF optimizer instances - if isinstance(identifier, tf.train.Optimizer): - return TFOptimizer(identifier) - if isinstance(identifier, dict): - return deserialize(identifier) - elif isinstance(identifier, six.string_types): - config = {'class_name': str(identifier), 'config': {}} - return deserialize(config) - if isinstance(identifier, Optimizer): - return identifier - else: - raise ValueError('Could not interpret optimizer identifier: ' + - str(identifier)) -"""Built-in regularizers. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -from . import backend as K -from .utils.generic_utils import serialize_keras_object -from .utils.generic_utils import deserialize_keras_object - - -class Regularizer(object): - """Regularizer base class. - """ - - def __call__(self, x): - return 0. - - @classmethod - def from_config(cls, config): - return cls(**config) - - -class L1L2(Regularizer): - """Regularizer for L1 and L2 regularization. - - # Arguments - l1: Float; L1 regularization factor. - l2: Float; L2 regularization factor. - """ - - def __init__(self, l1=0., l2=0.): - self.l1 = K.cast_to_floatx(l1) - self.l2 = K.cast_to_floatx(l2) - - def __call__(self, x): - regularization = 0. - if self.l1: - regularization += K.sum(self.l1 * K.abs(x)) - if self.l2: - regularization += K.sum(self.l2 * K.square(x)) - return regularization - - def get_config(self): - return {'l1': float(self.l1), - 'l2': float(self.l2)} - - -# Aliases. - - -def l1(l=0.01): - return L1L2(l1=l) - - -def l2(l=0.01): - return L1L2(l2=l) - - -def l1_l2(l1=0.01, l2=0.01): - return L1L2(l1=l1, l2=l2) - - -def serialize(regularizer): - return serialize_keras_object(regularizer) - - -def deserialize(config, custom_objects=None): - return deserialize_keras_object(config, - module_objects=globals(), - custom_objects=custom_objects, - printable_module_name='regularizer') - - -def get(identifier): - if identifier is None: - return None - if isinstance(identifier, dict): - return deserialize(identifier) - elif isinstance(identifier, six.string_types): - config = {'class_name': str(identifier), 'config': {}} - return deserialize(config) - elif callable(identifier): - return identifier - else: - raise ValueError('Could not interpret regularizer identifier: ' + - str(identifier)) -import pytest -from keras import backend as K - - -@pytest.fixture(autouse=True) -def clear_session_after_test(): - """Test wrapper to clean up after TensorFlow and CNTK tests. - - This wrapper runs for all the tests in the keras test suite. - """ - yield - if K.backend() == 'tensorflow' or K.backend() == 'cntk': - K.clear_session() -import pytest -import pyux -import keras -import json -import os - -import keras.backend.tensorflow_backend -import keras.backend.theano_backend -import keras.backend.cntk_backend -import keras.backend.numpy_backend -import keras.utils.test_utils - - -def test_api(): - api_file = os.path.join(os.getcwd(), 'api.json') - with open(api_file, 'r') as f: - previous_api = json.load(f) - current_api = pyux.sign(keras) - diff = pyux.diff(current_api, previous_api) - - exceptions = [ - pyux.ADDED_ARG_WITH_DEFAULT_IN_METHOD, - pyux.ADDED_DEFAULT_IN_METHOD - ] - - diff = list(filter(lambda c: c[0] not in exceptions, diff)) - if diff: - raise pyux.APIChangedException(diff) - - -if __name__ == '__main__': - test_api() -from __future__ import absolute_import -from __future__ import print_function -import pytest - -from keras.models import Model, Sequential -from keras.layers import Dense, Input - - -def test_layer_trainability_switch(): - # with constructor argument, in Sequential - model = Sequential() - model.add(Dense(2, trainable=False, input_dim=1)) - assert model.trainable_weights == [] - - # by setting the `trainable` argument, in Sequential - model = Sequential() - layer = Dense(2, input_dim=1) - model.add(layer) - assert model.trainable_weights == layer.trainable_weights - layer.trainable = False - assert model.trainable_weights == [] - - # with constructor argument, in Model - x = Input(shape=(1,)) - y = Dense(2, trainable=False)(x) - model = Model(x, y) - assert model.trainable_weights == [] - - # by setting the `trainable` argument, in Model - x = Input(shape=(1,)) - layer = Dense(2) - y = layer(x) - model = Model(x, y) - assert model.trainable_weights == layer.trainable_weights - layer.trainable = False - assert model.trainable_weights == [] - - -def test_model_trainability_switch(): - # a non-trainable model has no trainable weights - x = Input(shape=(1,)) - y = Dense(2)(x) - model = Model(x, y) - model.trainable = False - assert model.trainable_weights == [] - - # same for Sequential - model = Sequential() - model.add(Dense(2, input_dim=1)) - model.trainable = False - assert model.trainable_weights == [] - - -def test_nested_model_trainability(): - # a Sequential inside a Model - inner_model = Sequential() - inner_model.add(Dense(2, input_dim=1)) - - x = Input(shape=(1,)) - y = inner_model(x) - outer_model = Model(x, y) - assert outer_model.trainable_weights == inner_model.trainable_weights - inner_model.trainable = False - assert outer_model.trainable_weights == [] - inner_model.trainable = True - inner_model.layers[-1].trainable = False - assert outer_model.trainable_weights == [] - - # a Sequential inside a Sequential - inner_model = Sequential() - inner_model.add(Dense(2, input_dim=1)) - outer_model = Sequential() - outer_model.add(inner_model) - assert outer_model.trainable_weights == inner_model.trainable_weights - inner_model.trainable = False - assert outer_model.trainable_weights == [] - inner_model.trainable = True - inner_model.layers[-1].trainable = False - assert outer_model.trainable_weights == [] - - # a Model inside a Model - x = Input(shape=(1,)) - y = Dense(2)(x) - inner_model = Model(x, y) - x = Input(shape=(1,)) - y = inner_model(x) - outer_model = Model(x, y) - assert outer_model.trainable_weights == inner_model.trainable_weights - inner_model.trainable = False - assert outer_model.trainable_weights == [] - inner_model.trainable = True - inner_model.layers[-1].trainable = False - assert outer_model.trainable_weights == [] - - # a Model inside a Sequential - x = Input(shape=(1,)) - y = Dense(2)(x) - inner_model = Model(x, y) - outer_model = Sequential() - outer_model.add(inner_model) - assert outer_model.trainable_weights == inner_model.trainable_weights - inner_model.trainable = False - assert outer_model.trainable_weights == [] - inner_model.trainable = True - inner_model.layers[-1].trainable = False - assert outer_model.trainable_weights == [] - - -if __name__ == '__main__': - pytest.main([__file__]) -import numpy as np -import pytest - -from keras.models import Sequential -from keras.engine.training_utils import weighted_masked_objective -from keras.layers import TimeDistributed, Masking, Dense -from keras import losses -from keras import backend as K - - -def create_masking_model(): - model = Sequential() - model.add(Masking(mask_value=0, input_shape=(None, 1))) - model.add(TimeDistributed(Dense(1, kernel_initializer='one'))) - model.compile(loss='mse', optimizer='sgd') - return model - - -def test_masking(): - np.random.seed(1337) - x = np.array([[[1], [1]], - [[0], [0]]]) - model = create_masking_model() - y = np.array([[[1], [1]], - [[1], [1]]]) - loss = model.train_on_batch(x, y) - assert loss == 0 - - -def test_masking_is_all_zeros(): - x = y = np.array([[[0], [0]]]) - model = create_masking_model() - loss = model.train_on_batch(x, y) - assert loss == 0 - - -def test_loss_masking(): - weighted_loss = weighted_masked_objective(losses.get('mae')) - shape = (3, 4, 2) - x = np.arange(24).reshape(shape) - y = 2 * x - - # Normally the trailing 1 is added by standardize_weights - weights = np.ones((3,)) - mask = np.ones((3, 4)) - mask[1, 0] = 0 - - out = K.eval(weighted_loss(K.variable(x), - K.variable(y), - K.variable(weights), - K.variable(mask))) - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import absolute_import -from __future__ import print_function -import pytest -import numpy as np - -from keras import backend as K -from keras.utils.test_utils import get_test_data -from keras.models import Sequential, Model -from keras.layers import Dense, Activation, GRU, TimeDistributed, Input -from keras.utils import np_utils -from numpy.testing import assert_almost_equal, assert_array_almost_equal - -num_classes = 10 -batch_size = 128 -epochs = 15 -weighted_class = 5 -high_weight = 10 -train_samples = 5000 -test_samples = 1000 -timesteps = 3 -input_dim = 10 -loss = 'mse' -loss_full_name = 'mean_squared_error' -standard_weight = 1 -standard_score_sequential = 0.5 - -decimal_precision = { - 'cntk': 2, - 'theano': 6, - 'tensorflow': 6 -} - - -def _get_test_data(): - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=train_samples, - num_test=test_samples, - input_shape=( - input_dim,), - classification=True, - num_classes=num_classes) - int_y_test = y_test.copy() - int_y_train = y_train.copy() - # convert class vectors to binary class matrices - y_train = np_utils.to_categorical(y_train, num_classes) - y_test = np_utils.to_categorical(y_test, num_classes) - test_ids = np.where(int_y_test == np.array(weighted_class))[0] - - class_weight = dict([(i, standard_weight) for i in range(num_classes)]) - class_weight[weighted_class] = high_weight - - sample_weight = np.ones((y_train.shape[0])) * standard_weight - sample_weight[int_y_train == weighted_class] = high_weight - - return ((x_train, y_train), (x_test, y_test), - (sample_weight, class_weight, test_ids)) - - -def create_sequential_model(): - model = Sequential() - model.add(Dense(32, input_shape=(input_dim,))) - model.add(Activation('relu')) - model.add(Dense(num_classes)) - model.add(Activation('softmax')) - return model - - -def create_temporal_sequential_model(): - model = Sequential() - model.add(GRU(32, input_shape=(timesteps, input_dim), return_sequences=True)) - model.add(TimeDistributed(Dense(num_classes))) - model.add(Activation('softmax')) - return model - - -def test_sequential_class_weights(): - model = create_sequential_model() - model.compile(loss=loss, optimizer='rmsprop') - - ((x_train, y_train), (x_test, y_test), - (sample_weight, class_weight, test_ids)) = _get_test_data() - - model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs // 3, verbose=0, - class_weight=class_weight, - validation_data=(x_train, y_train, sample_weight)) - model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs // 2, verbose=0, - class_weight=class_weight) - model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs // 2, verbose=0, - class_weight=class_weight, - validation_split=0.1) - - model.train_on_batch(x_train[:32], y_train[:32], - class_weight=class_weight) - score = model.evaluate(x_test[test_ids, :], y_test[test_ids, :], verbose=0) - assert(score < standard_score_sequential) - - -def test_sequential_sample_weights(): - model = create_sequential_model() - model.compile(loss=loss, optimizer='rmsprop') - - ((x_train, y_train), (x_test, y_test), - (sample_weight, class_weight, test_ids)) = _get_test_data() - - model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs // 3, verbose=0, - sample_weight=sample_weight) - model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs // 3, verbose=0, - sample_weight=sample_weight, - validation_split=0.1) - - model.train_on_batch(x_train[:32], y_train[:32], - sample_weight=sample_weight[:32]) - model.test_on_batch(x_train[:32], y_train[:32], - sample_weight=sample_weight[:32]) - score = model.evaluate(x_test[test_ids, :], y_test[test_ids, :], verbose=0) - assert(score < standard_score_sequential) - - -def test_sequential_temporal_sample_weights(): - ((x_train, y_train), (x_test, y_test), - (sample_weight, class_weight, test_ids)) = _get_test_data() - - temporal_x_train = np.reshape(x_train, (len(x_train), 1, x_train.shape[1])) - temporal_x_train = np.repeat(temporal_x_train, timesteps, axis=1) - temporal_x_test = np.reshape(x_test, (len(x_test), 1, x_test.shape[1])) - temporal_x_test = np.repeat(temporal_x_test, timesteps, axis=1) - - temporal_y_train = np.reshape(y_train, (len(y_train), 1, y_train.shape[1])) - temporal_y_train = np.repeat(temporal_y_train, timesteps, axis=1) - temporal_y_test = np.reshape(y_test, (len(y_test), 1, y_test.shape[1])) - temporal_y_test = np.repeat(temporal_y_test, timesteps, axis=1) - - temporal_sample_weight = np.reshape(sample_weight, (len(sample_weight), 1)) - temporal_sample_weight = np.repeat( - temporal_sample_weight, timesteps, axis=1) - - model = create_temporal_sequential_model() - model.compile(loss=loss, optimizer='rmsprop', - sample_weight_mode='temporal') - - model.fit(temporal_x_train, temporal_y_train, batch_size=batch_size, - epochs=epochs // 3, verbose=0, - sample_weight=temporal_sample_weight) - model.fit(temporal_x_train, temporal_y_train, batch_size=batch_size, - epochs=epochs // 3, verbose=0, - sample_weight=temporal_sample_weight, - validation_split=0.1) - - model.train_on_batch(temporal_x_train[:32], temporal_y_train[:32], - sample_weight=temporal_sample_weight[:32]) - model.test_on_batch(temporal_x_train[:32], temporal_y_train[:32], - sample_weight=temporal_sample_weight[:32]) - score = model.evaluate(temporal_x_test[test_ids], temporal_y_test[test_ids], - verbose=0) - assert(score < standard_score_sequential) - - -def test_weighted_metrics_with_sample_weight(): - decimal = decimal_precision[K.backend()] - - model = create_sequential_model() - model.compile(loss=loss, optimizer='rmsprop', - metrics=[loss], weighted_metrics=[loss]) - - ((x_train, y_train), (x_test, y_test), - (sample_weight, class_weight, test_ids)) = _get_test_data() - - history = model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs // 3, verbose=0, - sample_weight=sample_weight) - - h = history.history - assert_array_almost_equal(h['loss'], h['weighted_' + loss_full_name], - decimal=decimal) - - history = model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs // 3, verbose=0, - sample_weight=sample_weight, - validation_split=0.1) - - h = history.history - assert_almost_equal(h['val_loss'], h['val_weighted_' + loss_full_name], - decimal=decimal) - - model.train_on_batch(x_train[:32], y_train[:32], - sample_weight=sample_weight[:32]) - model.test_on_batch(x_train[:32], y_train[:32], - sample_weight=sample_weight[:32]) - - test_sample_weight = np.ones((y_test.shape[0])) * standard_weight - test_sample_weight[test_ids] = high_weight - - scores = model.evaluate(x_test, y_test, verbose=0, - sample_weight=test_sample_weight) - loss_score, metric_score, weighted_metric_score = scores - - assert loss_score < standard_score_sequential - assert loss_score != metric_score - assert_almost_equal(loss_score, weighted_metric_score, decimal=decimal) - - -def test_weighted_metrics_with_no_sample_weight(): - decimal = decimal_precision[K.backend()] - - model = create_sequential_model() - model.compile(loss=loss, optimizer='rmsprop', - metrics=[loss], weighted_metrics=[loss]) - - (x_train, y_train), (x_test, y_test), _ = _get_test_data() - - history = model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs // 3, verbose=0) - - h = history.history - assert_array_almost_equal(h['loss'], h[loss_full_name], decimal=decimal) - assert_array_almost_equal(h['loss'], h['weighted_' + loss_full_name], - decimal=decimal) - - history = model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs // 3, verbose=0, validation_split=0.1) - - h = history.history - assert_array_almost_equal(h['val_loss'], h['val_' + loss_full_name], - decimal=decimal) - assert_array_almost_equal(h['val_loss'], h['val_weighted_' + loss_full_name], - decimal=decimal) - - model.train_on_batch(x_train[:32], y_train[:32]) - model.test_on_batch(x_train[:32], y_train[:32]) - - scores = model.evaluate(x_test, y_test, verbose=0) - loss_score, metric_score, weighted_metric_score = scores - - assert_almost_equal(loss_score, metric_score, decimal=decimal) - assert_almost_equal(loss_score, weighted_metric_score, decimal=decimal) - - -def test_weighted_metrics_with_weighted_accuracy_metric(): - model = create_sequential_model() - model.compile(loss=loss, optimizer='rmsprop', - metrics=['acc'], weighted_metrics=['acc']) - - (x_train, y_train), _, (sample_weight, _, _) = _get_test_data() - - history = model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs // 3, verbose=0, - sample_weight=sample_weight) - - assert history.history['acc'] != history.history['weighted_acc'] - - -def test_weighted_metrics_with_multiple_outputs(): - decimal = decimal_precision[K.backend()] - - inputs = Input(shape=(5,)) - x = Dense(5)(inputs) - output1 = Dense(1, name='output1')(x) - output2 = Dense(1, name='output2')(x) - - model = Model(inputs=inputs, outputs=[output1, output2]) - - metrics = {'output1': [loss], 'output2': [loss]} - weighted_metrics = {'output2': [loss]} - loss_map = {'output1': loss, 'output2': loss} - - model.compile(loss=loss_map, optimizer='sgd', - metrics=metrics, weighted_metrics=weighted_metrics) - - x = np.array([[1, 1, 1, 1, 1]]) - y = {'output1': np.array([0]), 'output2': np.array([1])} - weight = 5 - - history = model.fit(x, y, sample_weight={'output2': np.array([weight])}) - - unweighted_metric = history.history['output2_' + loss_full_name][0] - weighted_metric = history.history['output2_weighted_' + loss_full_name][0] - - assert_almost_equal(unweighted_metric * weight, - weighted_metric, decimal=decimal) - - -def test_class_weight_wrong_classes(): - model = create_sequential_model() - model.compile(loss=loss, optimizer='rmsprop') - - ((x_train, y_train), (x_test, y_test), - (sample_weight, class_weight, test_ids)) = _get_test_data() - - del class_weight[1] - with pytest.raises(ValueError): - model.fit(x_train, y_train, - epochs=0, verbose=0, class_weight=class_weight) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import sys -import numpy as np -from numpy.testing import assert_allclose - -import keras -from keras import layers -from keras import optimizers -from keras import losses -from keras import metrics - -if sys.version_info[0] == 3: - import pickle -else: - import cPickle as pickle - - -def test_sequential_model_pickling(): - model = keras.Sequential() - model.add(layers.Dense(2, input_shape=(3,))) - model.add(layers.RepeatVector(3)) - model.add(layers.TimeDistributed(layers.Dense(3))) - model.compile(loss=losses.MSE, - optimizer=optimizers.RMSprop(lr=0.0001), - metrics=[metrics.categorical_accuracy], - sample_weight_mode='temporal') - x = np.random.random((1, 3)) - y = np.random.random((1, 3, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - - state = pickle.dumps(model) - - new_model = pickle.loads(state) - - out2 = new_model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - # test that new updates are the same with both models - x = np.random.random((1, 3)) - y = np.random.random((1, 3, 3)) - model.train_on_batch(x, y) - new_model.train_on_batch(x, y) - out = model.predict(x) - out2 = new_model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_sequential_model_pickling_custom_objects(): - # test with custom optimizer, loss - class CustomSGD(optimizers.SGD): - pass - - def custom_mse(*args, **kwargs): - return losses.mse(*args, **kwargs) - - model = keras.Sequential() - model.add(layers.Dense(2, input_shape=(3,))) - model.add(layers.Dense(3)) - model.compile(loss=custom_mse, optimizer=CustomSGD(), metrics=['acc']) - - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - - state = pickle.dumps(model) - - with keras.utils.CustomObjectScope( - {'CustomSGD': CustomSGD, 'custom_mse': custom_mse}): - model = pickle.loads(state) - - out2 = model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_functional_model_pickling(): - inputs = keras.Input(shape=(3,)) - x = layers.Dense(2)(inputs) - outputs = layers.Dense(3)(x) - - model = keras.Model(inputs, outputs) - model.compile(loss=losses.MSE, - optimizer=optimizers.Adam(), - metrics=[metrics.categorical_accuracy]) - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - state = pickle.dumps(model) - - model = pickle.loads(state) - - out2 = model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_pickling_multiple_metrics_outputs(): - inputs = keras.Input(shape=(5,)) - x = layers.Dense(5)(inputs) - output1 = layers.Dense(1, name='output1')(x) - output2 = layers.Dense(1, name='output2')(x) - - model = keras.Model(inputs=inputs, outputs=[output1, output2]) - - metrics = {'output1': ['mse', 'binary_accuracy'], - 'output2': ['mse', 'binary_accuracy'] - } - loss = {'output1': 'mse', 'output2': 'mse'} - - model.compile(loss=loss, optimizer='sgd', metrics=metrics) - - # assure that model is working - x = np.array([[1, 1, 1, 1, 1]]) - out = model.predict(x) - - model = pickle.loads(pickle.dumps(model)) - - out2 = model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_pickling_without_compilation(): - """Test pickling model without compiling. - """ - model = keras.Sequential() - model.add(layers.Dense(2, input_shape=(3,))) - model.add(layers.Dense(3)) - - model = pickle.loads(pickle.dumps(model)) - - -def test_pickling_right_after_compilation(): - model = keras.Sequential() - model.add(layers.Dense(2, input_shape=(3,))) - model.add(layers.Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - model._make_train_function() - - model = pickle.loads(pickle.dumps(model)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import io -import pytest -import os -import h5py -import tempfile -from contextlib import contextmanager -import numpy as np -from numpy.testing import assert_allclose -from numpy.testing import assert_raises - -from keras import backend as K -from keras.engine.saving import preprocess_weights_for_loading -from keras.models import Model, Sequential -from keras.layers import Dense, Lambda, RepeatVector, TimeDistributed -from keras.layers import Bidirectional, GRU, LSTM, CuDNNGRU, CuDNNLSTM -from keras.layers import Conv2D, Flatten -from keras.layers import Input, InputLayer -from keras.initializers import Constant -from keras import optimizers -from keras import losses -from keras import metrics -from keras.models import save_model, load_model -from keras.utils.test_utils import tf_file_io_proxy -try: - from unittest.mock import patch -except: - from mock import patch - - -skipif_no_tf_gpu = pytest.mark.skipif( - (K.backend() != 'tensorflow' or - not K.tensorflow_backend._get_available_gpus()), - reason='Requires TensorFlow backend and a GPU') - - -def test_sequential_model_saving(): - model = Sequential() - model.add(Dense(2, input_shape=(3,))) - model.add(RepeatVector(3)) - model.add(TimeDistributed(Dense(3))) - model.compile(loss=losses.MSE, - optimizer=optimizers.RMSprop(lr=0.0001), - metrics=[metrics.categorical_accuracy], - sample_weight_mode='temporal') - x = np.random.random((1, 3)) - y = np.random.random((1, 3, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - new_model_disk = load_model(fname) - os.remove(fname) - - with tf_file_io_proxy('keras.engine.saving.tf_file_io') as file_io_proxy: - gcs_filepath = file_io_proxy.get_filepath(filename=fname) - save_model(model, gcs_filepath) - file_io_proxy.assert_exists(gcs_filepath) - new_model_gcs = load_model(gcs_filepath) - file_io_proxy.delete_file(gcs_filepath) # cleanup - - x2 = np.random.random((1, 3)) - y2 = np.random.random((1, 3, 3)) - model.train_on_batch(x2, y2) - out_2 = model.predict(x2) - - for new_model in [new_model_disk, new_model_gcs]: - new_out = new_model.predict(x) - assert_allclose(out, new_out, atol=1e-05) - # test that new updates are the same with both models - new_model.train_on_batch(x2, y2) - new_out_2 = new_model.predict(x2) - assert_allclose(out_2, new_out_2, atol=1e-05) - - -def test_sequential_model_saving_2(): - # test with custom optimizer, loss - custom_opt = optimizers.rmsprop - custom_loss = losses.mse - model = Sequential() - model.add(Dense(2, input_shape=(3,))) - model.add(Dense(3)) - model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) - - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - out = model.predict(x) - - load_kwargs = {'custom_objects': {'custom_opt': custom_opt, - 'custom_loss': custom_loss}} - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - new_model_disk = load_model(fname, **load_kwargs) - os.remove(fname) - - with tf_file_io_proxy('keras.engine.saving.tf_file_io') as file_io_proxy: - gcs_filepath = file_io_proxy.get_filepath(filename=fname) - save_model(model, gcs_filepath) - file_io_proxy.assert_exists(gcs_filepath) - new_model_gcs = load_model(gcs_filepath, **load_kwargs) - file_io_proxy.delete_file(gcs_filepath) # cleanup - - for new_model in [new_model_disk, new_model_gcs]: - new_out = new_model.predict(x) - assert_allclose(out, new_out, atol=1e-05) - - -def _get_sample_model_and_input(): - inputs = Input(shape=(3,)) - x = Dense(2)(inputs) - outputs = Dense(3)(x) - - model = Model(inputs, outputs) - model.compile(loss=losses.MSE, - optimizer=optimizers.Adam(), - metrics=[metrics.categorical_accuracy]) - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - return model, x - - -def test_functional_model_saving(): - model, x = _get_sample_model_and_input() - out = model.predict(x) - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - new_model_disk = load_model(fname) - os.remove(fname) - - with tf_file_io_proxy('keras.engine.saving.tf_file_io') as file_io_proxy: - gcs_filepath = file_io_proxy.get_filepath(filename=fname) - save_model(model, gcs_filepath) - file_io_proxy.assert_exists(gcs_filepath) - new_model_gcs = load_model(gcs_filepath) - file_io_proxy.delete_file(gcs_filepath) # cleanup - - for new_model in [new_model_disk, new_model_gcs]: - new_out = new_model.predict(x) - assert_allclose(out, new_out, atol=1e-05) - - -def test_model_saving_to_pre_created_h5py_file(): - model, x = _get_sample_model_and_input() - - out = model.predict(x) - _, fname = tempfile.mkstemp('.h5') - with h5py.File(fname, mode='r+') as h5file: - save_model(model, h5file) - loaded_model = load_model(h5file) - out2 = loaded_model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - # test non-default options in h5 - with h5py.File('does not matter', driver='core', - backing_store=False) as h5file: - save_model(model, h5file) - loaded_model = load_model(h5file) - out2 = loaded_model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - with h5py.File(fname, mode='r+') as h5file: - g = h5file.create_group('model') - save_model(model, g) - loaded_model = load_model(g) - out2 = loaded_model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -@contextmanager -def temp_filename(filename): - """Context that returns a temporary filename and deletes the file on exit if - it still exists (so that this is not forgotten). - """ - _, temp_fname = tempfile.mkstemp(filename) - yield temp_fname - if os.path.exists(temp_fname): - os.remove(temp_fname) - - -def test_model_saving_to_binary_stream(): - model, x = _get_sample_model_and_input() - out = model.predict(x) - - with temp_filename('h5') as fname: - # save directly to binary file - with open(fname, 'wb') as raw_file: - save_model(model, raw_file) - # Load the data the usual way, and make sure the model is intact. - with h5py.File(fname, mode='r') as h5file: - loaded_model = load_model(h5file) - out2 = loaded_model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_model_loading_from_binary_stream(): - model, x = _get_sample_model_and_input() - out = model.predict(x) - - with temp_filename('h5') as fname: - # save the model the usual way - with h5py.File(fname, mode='w') as h5file: - save_model(model, h5file) - # Load the data binary, and make sure the model is intact. - with open(fname, 'rb') as raw_file: - loaded_model = load_model(raw_file) - out2 = loaded_model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_model_save_load_binary_in_memory(): - model, x = _get_sample_model_and_input() - out = model.predict(x) - - stream = io.BytesIO() - save_model(model, stream) - stream.seek(0) - loaded_model = load_model(stream) - out2 = loaded_model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_saving_multiple_metrics_outputs(): - inputs = Input(shape=(5,)) - x = Dense(5)(inputs) - output1 = Dense(1, name='output1')(x) - output2 = Dense(1, name='output2')(x) - - model = Model(inputs=inputs, outputs=[output1, output2]) - - metrics = {'output1': ['mse', 'binary_accuracy'], - 'output2': ['mse', 'binary_accuracy'] - } - loss = {'output1': 'mse', 'output2': 'mse'} - - model.compile(loss=loss, optimizer='sgd', metrics=metrics) - - # assure that model is working - x = np.array([[1, 1, 1, 1, 1]]) - out = model.predict(x) - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - - model = load_model(fname) - os.remove(fname) - - out2 = model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_saving_without_compilation(): - """Test saving model without compiling. - """ - model = Sequential() - model.add(Dense(2, input_shape=(3,))) - model.add(Dense(3)) - - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - model = load_model(fname) - os.remove(fname) - - -def test_saving_right_after_compilation(): - model = Sequential() - model.add(Dense(2, input_shape=(3,))) - model.add(Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - model._make_train_function() - - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - model = load_model(fname) - os.remove(fname) - - -def test_saving_unused_layers_is_ok(): - a = Input(shape=(256, 512, 6)) - b = Input(shape=(256, 512, 1)) - c = Lambda(lambda x: x[:, :, :, :1])(a) - - model = Model(inputs=[a, b], outputs=c) - - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - load_model(fname) - os.remove(fname) - - -def test_loading_weights_by_name_and_reshape(): - """ - test loading model weights by name on: - - sequential model - """ - - # test with custom optimizer, loss - custom_opt = optimizers.rmsprop - custom_loss = losses.mse - - # sequential model - model = Sequential() - model.add(Conv2D(2, (1, 1), input_shape=(1, 1, 1), name='rick')) - model.add(Flatten()) - model.add(Dense(3, name='morty')) - model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) - - x = np.random.random((1, 1, 1, 1)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - old_weights = [layer.get_weights() for layer in model.layers] - _, fname = tempfile.mkstemp('.h5') - - model.save_weights(fname) - - # delete and recreate model - del(model) - model = Sequential() - model.add(Conv2D(2, (1, 1), input_shape=(1, 1, 1), name='rick')) - model.add(Conv2D(3, (1, 1), name='morty')) - model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) - - # load weights from first model - with pytest.raises(ValueError): - model.load_weights(fname, by_name=True, reshape=False) - with pytest.raises(ValueError): - model.load_weights(fname, by_name=False, reshape=False) - model.load_weights(fname, by_name=False, reshape=True) - model.load_weights(fname, by_name=True, reshape=True) - - out2 = model.predict(x) - assert_allclose(np.squeeze(out), np.squeeze(out2), atol=1e-05) - for i in range(len(model.layers)): - new_weights = model.layers[i].get_weights() - for j in range(len(new_weights)): - # only compare layers that have weights, skipping Flatten() - if old_weights[i]: - assert_allclose(old_weights[i][j], new_weights[j], atol=1e-05) - - # delete and recreate model with `use_bias=False` - del(model) - model = Sequential() - model.add(Conv2D(2, (1, 1), input_shape=( - 1, 1, 1), use_bias=False, name='rick')) - model.add(Flatten()) - model.add(Dense(3, name='morty')) - with pytest.raises(ValueError, - match=r'.* expects [0-9]+ .* but the saved .* [0-9]+ .*'): - model.load_weights(fname) - with pytest.raises(ValueError, - match=r'.* expects [0-9]+ .* but the saved .* [0-9]+ .*'): - model.load_weights(fname, by_name=True) - with pytest.warns(UserWarning, - match=r'Skipping loading .* due to mismatch .*'): - model.load_weights(fname, by_name=True, skip_mismatch=True) - - # delete and recreate model with `filters=10` - del(model) - model = Sequential() - model.add(Conv2D(10, (1, 1), input_shape=(1, 1, 1), name='rick')) - with pytest.raises(ValueError, - match=r'.* has shape .* but the saved .* shape .*'): - model.load_weights(fname, by_name=True) - with pytest.raises(ValueError, - match=r'.* load .* [0-9]+ layers into .* [0-9]+ layers.'): - model.load_weights(fname) - - os.remove(fname) - - -def test_loading_weights_by_name_2(): - """ - test loading model weights by name on: - - both sequential and functional api models - - different architecture with shared names - """ - - # test with custom optimizer, loss - custom_opt = optimizers.rmsprop - custom_loss = losses.mse - - # sequential model - model = Sequential() - model.add(Dense(2, input_shape=(3,), name='rick')) - model.add(Dense(3, name='morty')) - model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) - - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - old_weights = [layer.get_weights() for layer in model.layers] - _, fname = tempfile.mkstemp('.h5') - - model.save_weights(fname) - - # delete and recreate model using Functional API - del(model) - data = Input(shape=(3,)) - rick = Dense(2, name='rick')(data) - jerry = Dense(3, name='jerry')(rick) # add 2 layers (but maintain shapes) - jessica = Dense(2, name='jessica')(jerry) - morty = Dense(3, name='morty')(jessica) - - model = Model(inputs=[data], outputs=[morty]) - model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) - - # load weights from first model - model.load_weights(fname, by_name=True) - os.remove(fname) - - out2 = model.predict(x) - assert np.max(np.abs(out - out2)) > 1e-05 - - rick = model.layers[1].get_weights() - jerry = model.layers[2].get_weights() - jessica = model.layers[3].get_weights() - morty = model.layers[4].get_weights() - - assert_allclose(old_weights[0][0], rick[0], atol=1e-05) - assert_allclose(old_weights[0][1], rick[1], atol=1e-05) - assert_allclose(old_weights[1][0], morty[0], atol=1e-05) - assert_allclose(old_weights[1][1], morty[1], atol=1e-05) - assert_allclose(np.zeros_like(jerry[1]), jerry[1]) # biases init to 0 - assert_allclose(np.zeros_like(jessica[1]), jessica[1]) # biases init to 0 - - -def test_loading_weights_by_name_skip_mismatch(): - """ - test skipping layers while loading model weights by name on: - - sequential model - """ - - # test with custom optimizer, loss - custom_opt = optimizers.rmsprop - custom_loss = losses.mse - - # sequential model - model = Sequential() - model.add(Dense(2, input_shape=(3,), name='rick')) - model.add(Dense(3, name='morty')) - model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) - - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - old_weights = [layer.get_weights() for layer in model.layers] - _, fname = tempfile.mkstemp('.h5') - - model.save_weights(fname) - - # delete and recreate model - del(model) - model = Sequential() - model.add(Dense(2, input_shape=(3,), name='rick')) - model.add(Dense(4, name='morty')) # different shape w.r.t. previous model - model.compile(loss=custom_loss, optimizer=custom_opt(), metrics=['acc']) - - # load weights from first model - with pytest.warns(UserWarning): # expect UserWarning for skipping weights - model.load_weights(fname, by_name=True, skip_mismatch=True) - os.remove(fname) - - # assert layers 'rick' are equal - for old, new in zip(old_weights[0], model.layers[0].get_weights()): - assert_allclose(old, new, atol=1e-05) - - # assert layers 'morty' are not equal, since we skipped loading this layer - for old, new in zip(old_weights[1], model.layers[1].get_weights()): - assert_raises(AssertionError, assert_allclose, old, new, atol=1e-05) - - -# a function to be called from the Lambda layer -def square_fn(x): - return x * x - - -def test_saving_lambda_custom_objects(): - inputs = Input(shape=(3,)) - x = Lambda(lambda x: square_fn(x), output_shape=(3,))(inputs) - outputs = Dense(3)(x) - - model = Model(inputs, outputs) - model.compile(loss=losses.MSE, - optimizer=optimizers.RMSprop(lr=0.0001), - metrics=[metrics.categorical_accuracy]) - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - - model = load_model(fname, custom_objects={'square_fn': square_fn}) - os.remove(fname) - - out2 = model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_saving_lambda_numpy_array_arguments(): - mean = np.random.random((4, 2, 3)) - std = np.abs(np.random.random((4, 2, 3))) + 1e-5 - inputs = Input(shape=(4, 2, 3)) - outputs = Lambda(lambda image, mu, std: (image - mu) / std, - arguments={'mu': mean, 'std': std})(inputs) - model = Model(inputs, outputs) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - - model = load_model(fname) - os.remove(fname) - - assert_allclose(mean, model.layers[1].arguments['mu']) - assert_allclose(std, model.layers[1].arguments['std']) - - -def test_saving_custom_activation_function(): - x = Input(shape=(3,)) - output = Dense(3, activation=K.cos)(x) - - model = Model(x, output) - model.compile(loss=losses.MSE, - optimizer=optimizers.RMSprop(lr=0.0001), - metrics=[metrics.categorical_accuracy]) - x = np.random.random((1, 3)) - y = np.random.random((1, 3)) - model.train_on_batch(x, y) - - out = model.predict(x) - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - - model = load_model(fname, custom_objects={'cos': K.cos}) - os.remove(fname) - - out2 = model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_saving_model_with_long_layer_names(): - # This layer name will make the `layers_name` HDF5 attribute blow - # out of proportion. Note that it fits into the internal HDF5 - # attribute memory limit on its own but because h5py converts - # the list of layer names into numpy array, which uses the same - # amout of memory for every item, it increases the memory - # requirements substantially. - x = Input(shape=(2,), name='input_' + ('x' * (2**15))) - f = x - for i in range(4): - f = Dense(2, name='dense_%d' % (i,))(f) - - model = Model(inputs=[x], outputs=[f]) - - model.compile(loss='mse', optimizer='adam', metrics=['acc']) - - x = np.random.random((1, 2)) - y = np.random.random((1, 2)) - model.train_on_batch(x, y) - - out = model.predict(x) - - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - - model = load_model(fname) - - # Check that the HDF5 files contains chunked array - # of layer names. - with h5py.File(fname, 'r') as h5file: - n_layer_names_arrays = len([attr for attr in h5file['model_weights'].attrs - if attr.startswith('layer_names')]) - - os.remove(fname) - - # The chunking of layer names array should have happened. - assert n_layer_names_arrays > 0 - - out2 = model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_saving_model_with_long_weights_names(): - x = Input(shape=(2,), name='nested_model_input') - f = x - for i in range(4): - f = Dense(2, name='nested_model_dense_%d' % (i,))(f) - f = Dense(2, name='nested_model_dense_4', trainable=False)(f) - # This layer name will make the `weights_name` - # HDF5 attribute blow out of proportion. - f = Dense(2, name='nested_model_output' + ('x' * (2**15)))(f) - nested_model = Model(inputs=[x], outputs=[f], name='nested_model') - - x = Input(shape=(2,), name='outer_model_input') - f = nested_model(x) - f = Dense(2, name='outer_model_output')(f) - - model = Model(inputs=[x], outputs=[f]) - - model.compile(loss='mse', optimizer='adam', metrics=['acc']) - - x = np.random.random((1, 2)) - y = np.random.random((1, 2)) - model.train_on_batch(x, y) - - out = model.predict(x) - - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - - model = load_model(fname) - - # Check that the HDF5 files contains chunked array - # of weight names. - with h5py.File(fname, 'r') as h5file: - attrs = [attr for attr in h5file['model_weights']['nested_model'].attrs - if attr.startswith('weight_names')] - n_weight_names_arrays = len(attrs) - - os.remove(fname) - - # The chunking of layer names array should have happened. - assert n_weight_names_arrays > 0 - - out2 = model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_saving_recurrent_layer_with_init_state(): - vector_size = 8 - input_length = 20 - - input_initial_state = Input(shape=(vector_size,)) - input_x = Input(shape=(input_length, vector_size)) - - lstm = LSTM(vector_size, return_sequences=True)( - input_x, initial_state=[input_initial_state, input_initial_state]) - - model = Model(inputs=[input_x, input_initial_state], outputs=[lstm]) - - _, fname = tempfile.mkstemp('.h5') - model.save(fname) - - loaded_model = load_model(fname) - os.remove(fname) - - -def test_saving_recurrent_layer_without_bias(): - vector_size = 8 - input_length = 20 - - input_x = Input(shape=(input_length, vector_size)) - lstm = LSTM(vector_size, use_bias=False)(input_x) - model = Model(inputs=[input_x], outputs=[lstm]) - - _, fname = tempfile.mkstemp('.h5') - model.save(fname) - - loaded_model = load_model(fname) - os.remove(fname) - - -def test_loop_model_saving(): - model = Sequential() - model.add(Dense(2, input_shape=(3,))) - model.compile(loss=losses.MSE, - optimizer=optimizers.RMSprop(lr=0.0001), - metrics=[metrics.categorical_accuracy]) - - x = np.random.random((1, 3)) - y = np.random.random((1, 2)) - _, fname = tempfile.mkstemp('.h5') - - for _ in range(3): - model.train_on_batch(x, y) - save_model(model, fname, overwrite=True) - out = model.predict(x) - - new_model = load_model(fname) - os.remove(fname) - - out2 = new_model.predict(x) - assert_allclose(out, out2, atol=1e-05) - - -def test_saving_constant_initializer_with_numpy(): - """Test saving and loading model of constant initializer with numpy inputs. - """ - model = Sequential() - model.add(Dense(2, input_shape=(3,), - kernel_initializer=Constant(np.ones((3, 2))))) - model.add(Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - model = load_model(fname) - os.remove(fname) - - -def test_save_load_weights_gcs(): - model = Sequential() - model.add(Dense(2, input_shape=(3,))) - org_weights = model.get_weights() - - with tf_file_io_proxy('keras.engine.saving.tf_file_io') as file_io_proxy: - gcs_filepath = file_io_proxy.get_filepath( - filename='test_save_load_weights_gcs.h5') - # we should not use same filename in several tests to allow for parallel - # execution - model.save_weights(gcs_filepath) - model.set_weights([np.random.random(w.shape) for w in org_weights]) - for w, org_w in zip(model.get_weights(), org_weights): - assert not (w == org_w).all() - model.load_weights(gcs_filepath) - for w, org_w in zip(model.get_weights(), org_weights): - assert_allclose(w, org_w) - - file_io_proxy.delete_file(gcs_filepath) # cleanup - - -def test_saving_overwrite_option(): - model = Sequential() - model.add(Dense(2, input_shape=(3,))) - org_weights = model.get_weights() - new_weights = [np.random.random(w.shape) for w in org_weights] - - _, fname = tempfile.mkstemp('.h5') - save_model(model, fname) - model.set_weights(new_weights) - - with patch('keras.engine.saving.ask_to_proceed_with_overwrite') as ask: - ask.return_value = False - save_model(model, fname, overwrite=False) - ask.assert_called_once() - new_model = load_model(fname) - for w, org_w in zip(new_model.get_weights(), org_weights): - assert_allclose(w, org_w) - - ask.return_value = True - save_model(model, fname, overwrite=False) - assert ask.call_count == 2 - new_model = load_model(fname) - for w, new_w in zip(new_model.get_weights(), new_weights): - assert_allclose(w, new_w) - - os.remove(fname) - - -def test_saving_overwrite_option_gcs(): - model = Sequential() - model.add(Dense(2, input_shape=(3,))) - org_weights = model.get_weights() - new_weights = [np.random.random(w.shape) for w in org_weights] - - with tf_file_io_proxy('keras.engine.saving.tf_file_io') as file_io_proxy: - gcs_filepath = file_io_proxy.get_filepath( - filename='test_saving_overwrite_option_gcs.h5') - # we should not use same filename in several tests to allow for parallel - # execution - save_model(model, gcs_filepath) - model.set_weights(new_weights) - - with patch('keras.engine.saving.ask_to_proceed_with_overwrite') as ask: - ask.return_value = False - save_model(model, gcs_filepath, overwrite=False) - ask.assert_called_once() - new_model = load_model(gcs_filepath) - for w, org_w in zip(new_model.get_weights(), org_weights): - assert_allclose(w, org_w) - - ask.return_value = True - save_model(model, gcs_filepath, overwrite=False) - assert ask.call_count == 2 - new_model = load_model(gcs_filepath) - for w, new_w in zip(new_model.get_weights(), new_weights): - assert_allclose(w, new_w) - - file_io_proxy.delete_file(gcs_filepath) # cleanup - - -@pytest.mark.parametrize('implementation', [1, 2], ids=['impl1', 'impl2']) -@pytest.mark.parametrize('bidirectional', - [False, True], - ids=['single', 'bidirectional']) -@pytest.mark.parametrize('to_cudnn', [False, True], ids=['from_cudnn', 'to_cudnn']) -@pytest.mark.parametrize('rnn_type', ['LSTM', 'GRU'], ids=['LSTM', 'GRU']) -@pytest.mark.parametrize('model_nest_level', - [1, 2], - ids=['model_plain', 'model_nested']) -@pytest.mark.parametrize('model_type', - ['func', 'seq'], - ids=['model_func', 'model_seq']) -@skipif_no_tf_gpu -def test_load_weights_between_noncudnn_rnn(rnn_type, to_cudnn, bidirectional, - implementation, model_nest_level, - model_type): - input_size = 10 - timesteps = 6 - input_shape = (timesteps, input_size) - units = 2 - num_samples = 32 - inputs = np.random.random((num_samples, timesteps, input_size)) - - rnn_layer_kwargs = { - 'recurrent_activation': 'sigmoid', - # ensure biases are non-zero and properly converted - 'bias_initializer': 'random_uniform', - 'implementation': implementation - } - if rnn_type == 'LSTM': - rnn_layer_class = LSTM - cudnn_rnn_layer_class = CuDNNLSTM - else: - rnn_layer_class = GRU - cudnn_rnn_layer_class = CuDNNGRU - rnn_layer_kwargs['reset_after'] = True - - layer = rnn_layer_class(units, **rnn_layer_kwargs) - if bidirectional: - layer = Bidirectional(layer) - - cudnn_layer = cudnn_rnn_layer_class(units) - if bidirectional: - cudnn_layer = Bidirectional(cudnn_layer) - - model = _make_nested_model( - input_shape, layer, model_nest_level, model_type) - cudnn_model = _make_nested_model(input_shape, cudnn_layer, - model_nest_level, model_type) - - if to_cudnn: - _convert_model_weights(model, cudnn_model) - else: - _convert_model_weights(cudnn_model, model) - - assert_allclose(model.predict(inputs), - cudnn_model.predict(inputs), atol=1e-4) - - -def _make_nested_model(input_shape, layer, level=1, model_type='func'): - # example: make_nested_seq_model((1,), Dense(10), level=2).summary() - def make_nested_seq_model(input_shape, layer, level=1): - model = layer - for i in range(1, level + 1): - layers = [InputLayer(input_shape), model] if (i == 1) else [model] - model = Sequential(layers) - return model - - # example: make_nested_func_model((1,), Dense(10), level=2).summary() - def make_nested_func_model(input_shape, layer, level=1): - input = Input(input_shape) - model = layer - for i in range(level): - model = Model(input, model(input)) - return model - - if model_type == 'func': - return make_nested_func_model(input_shape, layer, level) - elif model_type == 'seq': - return make_nested_seq_model(input_shape, layer, level) - - -def _convert_model_weights(source_model, target_model): - _, fname = tempfile.mkstemp('.h5') - source_model.save_weights(fname) - target_model.load_weights(fname) - os.remove(fname) - - -@pytest.mark.parametrize('to_cudnn', [False, True], ids=['from_cudnn', 'to_cudnn']) -@pytest.mark.parametrize('rnn_type', ['LSTM', 'GRU'], ids=['LSTM', 'GRU']) -@skipif_no_tf_gpu -def test_load_weights_between_noncudnn_rnn_time_distributed(rnn_type, to_cudnn): - """ - Similar test as test_load_weights_between_noncudnn_rnn() but has different - rank of input due to usage of TimeDistributed. Issue: #10356. - """ - input_size = 10 - steps = 6 - timesteps = 6 - input_shape = (timesteps, steps, input_size) - units = 2 - num_samples = 32 - inputs = np.random.random((num_samples,) + input_shape) - - rnn_layer_kwargs = { - 'recurrent_activation': 'sigmoid', - # ensure biases are non-zero and properly converted - 'bias_initializer': 'random_uniform', - } - if rnn_type == 'LSTM': - rnn_layer_class = LSTM - cudnn_rnn_layer_class = CuDNNLSTM - else: - rnn_layer_class = GRU - cudnn_rnn_layer_class = CuDNNGRU - rnn_layer_kwargs['reset_after'] = True - - layer = rnn_layer_class(units, **rnn_layer_kwargs) - layer = TimeDistributed(layer) - - cudnn_layer = cudnn_rnn_layer_class(units) - cudnn_layer = TimeDistributed(cudnn_layer) - - model = _make_nested_model(input_shape, layer) - cudnn_model = _make_nested_model(input_shape, cudnn_layer) - - if to_cudnn: - _convert_model_weights(model, cudnn_model) - else: - _convert_model_weights(cudnn_model, model) - - assert_allclose(model.predict(inputs), - cudnn_model.predict(inputs), atol=1e-4) - - -@skipif_no_tf_gpu -def test_preprocess_weights_for_loading_gru_incompatible(): - """ - Loading weights between incompatible layers should fail fast with an exception. - """ - def gru(cudnn=False, **kwargs): - layer_class = CuDNNGRU if cudnn else GRU - return layer_class(2, input_shape=[3, 5], **kwargs) - - def initialize_weights(layer): - # A model is needed to initialize weights. - _ = Sequential([layer]) - return layer - - def assert_not_compatible(src, dest, message): - with pytest.raises(ValueError) as ex: - preprocess_weights_for_loading(dest, - initialize_weights(src).get_weights()) - assert message in ex.value.message - - assert_not_compatible(gru(), gru(cudnn=True), - 'GRU(reset_after=False) is not compatible with CuDNNGRU') - assert_not_compatible(gru(cudnn=True), gru(), - 'CuDNNGRU is not compatible with GRU(reset_after=False)') - assert_not_compatible(gru(), gru(reset_after=True), - 'GRU(reset_after=False) is not compatible with ' - 'GRU(reset_after=True)') - assert_not_compatible(gru(reset_after=True), gru(), - 'GRU(reset_after=True) is not compatible with ' - 'GRU(reset_after=False)') - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import print_function - -import multiprocessing as mp -import os -import sys -import threading -import pytest -import numpy as np -import six - -from keras.models import Sequential -from keras.layers.core import Dense -from keras.utils import Sequence -from keras import backend as K - -pytestmark = pytest.mark.skipif( - six.PY2 and 'TRAVIS_PYTHON_VERSION' in os.environ, - reason='Temporarily disabled until the use_multiprocessing problem is solved') - -skip_generators = pytest.mark.skipif(K.backend() in {'tensorflow', 'cntk'} and - 'TRAVIS_PYTHON_VERSION' in os.environ, - reason='Generators do not work with `spawn`.') - - -def use_spawn(func): - """Decorator which uses `spawn` when possible. - This is useful on Travis to avoid memory issues. - """ - - @six.wraps(func) - def wrapper(*args, **kwargs): - if sys.version_info > (3, 4) and os.name != 'nt': - mp.set_start_method('spawn', force=True) - out = func(*args, **kwargs) - mp.set_start_method('fork', force=True) - else: - out = func(*args, **kwargs) - return out - - return wrapper - - -STEPS_PER_EPOCH = 100 -STEPS = 100 -WORKERS = 4 if K.backend() != 'tensorflow' else 2 - - -class DummySequence(Sequence): - def __getitem__(self, idx): - return np.zeros([10, 2]), np.ones([10]) - - def __len__(self): - return 10 - - -class threadsafe_iter: - """Takes an iterator/generator and makes it thread-safe by - serializing call to the `next` method of given iterator/generator. - """ - - def __init__(self, it): - self.it = it - self.lock = threading.Lock() - - def __iter__(self): - return self - - def __next__(self): - return self.next() - - def next(self): - with self.lock: - return next(self.it) - - -def threadsafe_generator(f): - """A decorator that takes a generator function and makes it thread-safe. - """ - - def g(*a, **kw): - return threadsafe_iter(f(*a, **kw)) - - return g - - -@pytest.fixture -def in_tmpdir(tmpdir): - """Runs a function in a temporary directory. - - Checks that the directory is empty afterwards. - """ - with tmpdir.as_cwd(): - yield None - assert not tmpdir.listdir() - - -@skip_generators -def test_multiprocessing_training(): - arr_data = np.random.randint(0, 256, (50, 2)) - arr_labels = np.random.randint(0, 2, 50) - arr_weights = np.random.random(50) - - @threadsafe_generator - def custom_generator(use_weights=False): - batch_size = 10 - n_samples = 50 - - while True: - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - y = arr_labels[start: end] - if use_weights: - w = arr_weights[start: end] - yield X, y, w - else: - yield X, y - - # Build a NN - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker processes, consume on main process: - # - Each worker process runs OWN copy of generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `fit_generator()` raises ValueError - # exception and does not attempt to run the generator. - if os.name == 'nt': - with pytest.raises(ValueError): - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - else: - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - - # - Produce data on 4 worker threads, consume on main thread: - # - All worker threads share the SAME generator - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=False) - - # - Produce data on 1 worker process, consume on main process: - # - Worker process runs generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `fit_generator()` raises ValueError - # exception and does not attempt to run the generator. - if os.name == 'nt': - with pytest.raises(ValueError): - model.fit_generator(custom_generator(True), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=(arr_data[:10], - arr_labels[:10], - arr_weights[:10]), - validation_steps=1, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - else: - model.fit_generator(custom_generator(True), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=(arr_data[:10], - arr_labels[:10], - arr_weights[:10]), - validation_steps=1, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - - # - Produce data on 1 worker thread, consume on main thread: - # - Worker thread is the only thread running the generator - model.fit_generator(custom_generator(True), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=(arr_data[:10], - arr_labels[:10], - arr_weights[:10]), - validation_steps=1, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # - Produce data on 1 worker process, consume on main process: - # - Worker process runs generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `fit_generator()` raises ValueError - # exception and does not attempt to run the generator. - if os.name == 'nt': - with pytest.raises(ValueError): - model.fit_generator(custom_generator(True), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=custom_generator(True), - validation_steps=1, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - else: - model.fit_generator(custom_generator(True), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=custom_generator(True), - validation_steps=1, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - - # - Produce data on 1 worker thread AT A TIME, consume on main thread: - # - Worker threads for training and validation run generator SEQUENTIALLY - model.fit_generator(custom_generator(True), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=custom_generator(True), - validation_steps=1, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # - Produce and consume data without a queue on main thread - # - Make sure the value of `use_multiprocessing` is ignored - model.fit_generator(custom_generator(True), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=custom_generator(True), - validation_steps=1, - max_queue_size=10, - workers=0, - use_multiprocessing=True) - model.fit_generator(custom_generator(True), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=custom_generator(True), - validation_steps=1, - max_queue_size=10, - workers=0, - use_multiprocessing=False) - - # Test invalid use cases - @threadsafe_generator - def invalid_generator(): - while True: - yield arr_data[:10], arr_data[:10], arr_labels[:10], arr_labels[:10] - - # not specified `validation_steps` - with pytest.raises(ValueError): - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=custom_generator(), - validation_steps=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # validation data is neither a tuple nor a triple. - with pytest.raises(ValueError): - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=(arr_data[:10], - arr_data[:10], - arr_labels[:10], - arr_weights[:10]), - validation_steps=1, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # validation generator is neither a tuple nor a triple. - with pytest.raises(ValueError): - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=invalid_generator(), - validation_steps=1, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # - For Sequence - model.fit_generator(DummySequence(), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=DummySequence(), - validation_steps=1, - max_queue_size=10, - workers=0, - use_multiprocessing=True) - model.fit_generator(DummySequence(), - steps_per_epoch=STEPS_PER_EPOCH, - validation_data=DummySequence(), - validation_steps=1, - max_queue_size=10, - workers=0, - use_multiprocessing=False) - - -@skip_generators -def test_multiprocessing_training_from_file(in_tmpdir): - arr_data = np.random.randint(0, 256, (50, 2)) - arr_labels = np.random.randint(0, 2, 50) - np.savez('data.npz', **{'data': arr_data, 'labels': arr_labels}) - - @threadsafe_generator - def custom_generator(): - - batch_size = 10 - n_samples = 50 - - with np.load('data.npz') as arr: - while True: - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr['data'][start: end] - y = arr['labels'][start: end] - yield X, y - - # Build a NN - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker processes, consume on main process: - # - Each worker process runs OWN copy of generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `fit_generator()` raises ValueError - # exception and does not attempt to run the generator. - if os.name == 'nt': - with pytest.raises(ValueError): - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - else: - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - - # - Produce data on 1 worker process, consume on main process: - # - Worker process runs generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `fit_generator()` raises ValueError - # exception and does not attempt to run the generator. - if os.name == 'nt': - with pytest.raises(ValueError): - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - else: - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - - # - Produce data on 1 worker thread, consume on main thread: - # - Worker thread is the only thread running the generator - - # - Produce and consume data without a queue on main thread - # - Make sure the value of `use_multiprocessing` is ignored - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=0, - use_multiprocessing=True) - - os.remove('data.npz') - - -def test_multithreading_from_file(): - arr_data = np.random.randint(0, 256, (50, 2)) - arr_labels = np.random.randint(0, 2, 50) - np.savez('data_threads.npz', **{'data': arr_data, 'labels': arr_labels}) - - @threadsafe_generator - def custom_generator(): - batch_size = 10 - n_samples = 50 - - with np.load('data_threads.npz') as arr: - while True: - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr['data'][start: end] - y = arr['labels'][start: end] - yield X, y - - # Build a NN - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker threads, consume on main thread: - # - All worker threads share the SAME generator - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=False) - - # - Produce data on 1 worker thread, consume on main thread: - # - Worker thread is the only thread running the generator - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # - Produce and consume data without a queue on main thread - # - Make sure the value of `use_multiprocessing` is ignored - model.fit_generator(custom_generator(), - steps_per_epoch=STEPS_PER_EPOCH, - epochs=1, - verbose=1, - validation_steps=None, - max_queue_size=10, - workers=0, - use_multiprocessing=False) - - os.remove('data_threads.npz') - - -@skip_generators -def test_multiprocessing_predicting(): - arr_data = np.random.randint(0, 256, (50, 2)) - - @threadsafe_generator - def custom_generator(): - batch_size = 10 - n_samples = 50 - - while True: - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - yield X - - # Build a NN - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker processes, consume on main process: - # - Each worker process runs OWN copy of generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `predict_generator()` raises ValueError - # exception and does not attempt to run the generator. - if os.name == 'nt': - with pytest.raises(ValueError): - model.predict_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - else: - model.predict_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - - # - Produce data on 1 worker process, consume on main process: - # - Worker process runs generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `predict_generator()` raises ValueError - # exception and does not attempt to run the generator. - if os.name == 'nt': - with pytest.raises(ValueError): - model.predict_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - else: - model.predict_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - - # - Main thread runs the generator without a queue - # - Make sure the value of `use_multiprocessing` is ignored - model.predict_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=0, - use_multiprocessing=True) - - -def test_multithreading_predicting(): - arr_data = np.random.randint(0, 256, (50, 2)) - - @threadsafe_generator - def custom_generator(): - batch_size = 10 - n_samples = 50 - - while True: - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - yield X - - # Build a NN - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker threads, consume on main thread: - # - All worker threads share the SAME generator - model.predict_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=False) - - # - Produce data on 1 worker thread, consume on main thread: - # - Worker thread is the only thread running the generator - model.predict_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # - Main thread runs the generator without a queue - # - Make sure the value of `use_multiprocessing` is ignored - model.predict_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=0, - use_multiprocessing=False) - - -@skip_generators -def test_multiprocessing_evaluating(): - arr_data = np.random.randint(0, 256, (50, 2)) - arr_labels = np.random.randint(0, 2, 50) - - @threadsafe_generator - def custom_generator(): - batch_size = 10 - n_samples = 50 - - while True: - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - y = arr_labels[start: end] - yield X, y - - # Build a NN - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker processes, consume on main process: - # - Each worker process runs OWN copy of generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries - # -> make sure `evaluate_generator()` raises raises ValueError - # exception and does not attempt to run the generator. - if os.name == 'nt': - with pytest.raises(ValueError): - model.evaluate_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - else: - model.evaluate_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - - # - Produce data on 1 worker process, consume on main process: - # - Worker process runs generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `evaluate_generator()` raises ValueError - # exception and does not attempt to run the generator. - if os.name == 'nt': - with pytest.raises(ValueError): - model.evaluate_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - else: - model.evaluate_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - - # - Produce and consume data without a queue on main thread - # - Make sure the value of `use_multiprocessing` is ignored - model.evaluate_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=0, - use_multiprocessing=True) - - -def test_multithreading_evaluating(): - arr_data = np.random.randint(0, 256, (50, 2)) - arr_labels = np.random.randint(0, 2, 50) - - @threadsafe_generator - def custom_generator(): - batch_size = 10 - n_samples = 50 - - while True: - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - y = arr_labels[start: end] - yield X, y - - # Build a NN - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker threads, consume on main thread: - # - All worker threads share the SAME generator - model.evaluate_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=False) - - # - Produce data on 1 worker thread, consume on main thread: - # - Worker thread is the only thread running the generator - model.evaluate_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # - Produce and consume data without a queue on main thread - # - Make sure the value of `use_multiprocessing` is ignored - model.evaluate_generator(custom_generator(), - steps=STEPS, - max_queue_size=10, - workers=0, - use_multiprocessing=False) - - -@skip_generators -def test_multiprocessing_fit_error(): - arr_data = np.random.randint(0, 256, (50, 2)) - arr_labels = np.random.randint(0, 2, 50) - batch_size = 10 - n_samples = 50 - good_batches = 3 - - @threadsafe_generator - def custom_generator(use_weights=False): - """Raises an exception after a few good batches""" - for i in range(good_batches): - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - y = arr_labels[start: end] - yield X, y - raise RuntimeError - - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - samples = batch_size * (good_batches + 1) - - # - Produce data on 4 worker processes, consume on main process: - # - Each worker process runs OWN copy of generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `fit_generator()` raises ValueError - # exception and does not attempt to run the generator. - # - On other platforms, make sure `RuntimeError` exception bubbles up - if os.name == 'nt': - with pytest.raises(RuntimeError): - model.fit_generator(custom_generator(), - steps_per_epoch=samples, - validation_steps=None, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - else: - with pytest.raises(RuntimeError): - model.fit_generator(custom_generator(), - steps_per_epoch=samples, - validation_steps=None, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - - # - Produce data on 1 worker process, consume on main process: - # - Worker process runs generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `fit_generator()` raises ValueError - # exception and does not attempt to run the generator. - # - On other platforms, make sure `RuntimeError` exception bubbles up - if os.name == 'nt': - with pytest.raises(RuntimeError): - model.fit_generator(custom_generator(), - steps_per_epoch=samples, - validation_steps=None, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - else: - with pytest.raises(RuntimeError): - model.fit_generator(custom_generator(), - steps_per_epoch=samples, - validation_steps=None, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - - # - Make sure the value of `use_multiprocessing` is ignored - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.fit_generator(custom_generator(), - steps_per_epoch=samples, - validation_steps=None, - max_queue_size=10, - workers=0, - use_multiprocessing=True) - - -def test_multithreading_fit_error(): - arr_data = np.random.randint(0, 256, (50, 2)) - arr_labels = np.random.randint(0, 2, 50) - batch_size = 10 - n_samples = 50 - good_batches = 3 - - @threadsafe_generator - def custom_generator(): - """Raises an exception after a few good batches""" - for i in range(good_batches): - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - y = arr_labels[start: end] - yield X, y - raise RuntimeError - - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - samples = batch_size * (good_batches + 1) - - # - Produce data on 4 worker threads, consume on main thread: - # - All worker threads share the SAME generator - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.fit_generator(custom_generator(), - steps_per_epoch=samples, - validation_steps=None, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=False) - - # - Produce data on 1 worker thread, consume on main thread: - # - Worker thread is the only thread running the generator - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.fit_generator(custom_generator(), - steps_per_epoch=samples, - validation_steps=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # - Produce and consume data without a queue on main thread - # - Make sure the value of `use_multiprocessing` is ignored - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.fit_generator(custom_generator(), - steps_per_epoch=samples, - validation_steps=None, - max_queue_size=10, - workers=0, - use_multiprocessing=False) - - -@skip_generators -def test_multiprocessing_evaluate_error(): - arr_data = np.random.randint(0, 256, (50, 2)) - arr_labels = np.random.randint(0, 2, 50) - batch_size = 10 - n_samples = 50 - good_batches = 3 - - @threadsafe_generator - def custom_generator(): - """Raises an exception after a few good batches""" - for i in range(good_batches): - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - y = arr_labels[start: end] - yield X, y - raise RuntimeError - - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker processes, consume on main process: - # - Each worker process runs OWN copy of generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `evaluate_generator()` raises ValueError - # exception and does not attempt to run the generator. - # - On other platforms, make sure `RuntimeError` exception bubbles up - if os.name == 'nt': - with pytest.raises(ValueError): - model.evaluate_generator(custom_generator(), - steps=good_batches * WORKERS + 1, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - else: - with pytest.raises(RuntimeError): - model.evaluate_generator(custom_generator(), - steps=good_batches * WORKERS + 1, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - - # - Produce data on 1 worker process, consume on main process: - # - Worker process runs generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `evaluate_generator()` raises ValueError - # exception and does not attempt to run the generator. - # - On other platforms, make sure `RuntimeError` exception bubbles up - if os.name == 'nt': - with pytest.raises(RuntimeError): - model.evaluate_generator(custom_generator(), - steps=good_batches + 1, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - else: - with pytest.raises(RuntimeError): - model.evaluate_generator(custom_generator(), - steps=good_batches + 1, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - - # - Produce and consume data without a queue on main thread - # - Make sure the value of `use_multiprocessing` is ignored - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.evaluate_generator(custom_generator(), - steps=good_batches + 1, - max_queue_size=10, - workers=0, - use_multiprocessing=True) - - -def test_multithreading_evaluate_error(): - arr_data = np.random.randint(0, 256, (50, 2)) - arr_labels = np.random.randint(0, 2, 50) - batch_size = 10 - n_samples = 50 - good_batches = 3 - - @threadsafe_generator - def custom_generator(): - """Raises an exception after a few good batches""" - for i in range(good_batches): - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - y = arr_labels[start: end] - yield X, y - raise RuntimeError - - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker threads, consume on main thread: - # - All worker threads share the SAME generator - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.evaluate_generator(custom_generator(), - steps=good_batches * WORKERS + 1, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=False) - - # - Produce data on 1 worker thread, consume on main thread: - # - Worker thread is the only thread running the generator - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.evaluate_generator(custom_generator(), - steps=good_batches + 1, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # - Produce and consume data without a queue on main thread - # - Make sure the value of `use_multiprocessing` is ignored - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.evaluate_generator(custom_generator(), - steps=good_batches + 1, - max_queue_size=10, - workers=0, - use_multiprocessing=False) - - -@skip_generators -def test_multiprocessing_predict_error(): - arr_data = np.random.randint(0, 256, (50, 2)) - good_batches = 3 - - @threadsafe_generator - def custom_generator(): - """Raises an exception after a few good batches""" - batch_size = 10 - n_samples = 50 - - for i in range(good_batches): - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - yield X - raise RuntimeError - - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker processes, consume on main process: - # - Each worker process runs OWN copy of generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `predict_generator()` raises ValueError - # exception and does not attempt to run the generator. - # - On other platforms, make sure `RuntimeError` exception bubbles up - if os.name == 'nt': - with pytest.raises(StopIteration): - model.predict_generator(custom_generator(), - steps=good_batches * WORKERS + 1, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - else: - with pytest.raises(RuntimeError): - model.predict_generator(custom_generator(), - steps=good_batches * WORKERS + 1, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=True) - - # - Produce data on 1 worker process, consume on main process: - # - Worker process runs generator - # - BUT on Windows, `multiprocessing` won't marshall generators across - # process boundaries -> make sure `predict_generator()` raises ValueError - # exception and does not attempt to run the generator. - # - On other platforms, make sure `RuntimeError` exception bubbles up - if os.name == 'nt': - with pytest.raises(RuntimeError): - model.predict_generator(custom_generator(), - steps=good_batches + 1, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - else: - with pytest.raises(RuntimeError): - model.predict_generator(custom_generator(), - steps=good_batches + 1, - max_queue_size=10, - workers=1, - use_multiprocessing=True) - - # - Produce and consume data without a queue on main thread - # - Make sure the value of `use_multiprocessing` is ignored - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.predict_generator(custom_generator(), - steps=good_batches + 1, - max_queue_size=10, - workers=0, - use_multiprocessing=True) - - -def test_multithreading_predict_error(): - arr_data = np.random.randint(0, 256, (50, 2)) - good_batches = 3 - - @threadsafe_generator - def custom_generator(): - """Raises an exception after a few good batches""" - batch_size = 10 - n_samples = 50 - - for i in range(good_batches): - batch_index = np.random.randint(0, n_samples - batch_size) - start = batch_index - end = start + batch_size - X = arr_data[start: end] - yield X - raise RuntimeError - - model = Sequential() - model.add(Dense(1, input_shape=(2,))) - model.compile(loss='mse', optimizer='adadelta') - - # - Produce data on 4 worker threads, consume on main thread: - # - All worker threads share the SAME generator - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.predict_generator(custom_generator(), - steps=good_batches * WORKERS + 1, - max_queue_size=10, - workers=WORKERS, - use_multiprocessing=False) - # - Produce data on 1 worker thread, consume on main thread: - # - Worker thread is the only thread running the generator - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.predict_generator(custom_generator(), - steps=good_batches + 1, - max_queue_size=10, - workers=1, - use_multiprocessing=False) - - # - Produce and consume data without a queue on main thread - # - Make sure the value of `use_multiprocessing` is ignored - # - Make sure `RuntimeError` exception bubbles up - with pytest.raises(RuntimeError): - model.predict_generator(custom_generator(), - steps=good_batches + 1, - max_queue_size=10, - workers=0, - use_multiprocessing=False) - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .resnext import ResNeXt50, ResNeXt101 -from .resnet_v2 import ResNet50V2, ResNet101V2, ResNet152V2 -from .resnet import ResNet101, ResNet152 -from .nasnet import NASNetMobile, NASNetLarge -from .densenet import DenseNet121, DenseNet169, DenseNet201 -from .mobilenet_v2 import MobileNetV2 -from .mobilenet import MobileNet -from .xception import Xception -from .inception_resnet_v2 import InceptionResNetV2 -from .inception_v3 import InceptionV3 -from .resnet50 import ResNet50 -from .vgg19 import VGG19 -from .vgg16 import VGG16 -from .. import backend -from .. import layers -from .. import models -from .. import utils - -import keras_applications - -if not hasattr(keras_applications, 'get_submodules_from_kwargs'): - keras_applications.set_keras_submodules( - backend=backend, - layers=layers, - models=models, - utils=utils) - - -def keras_modules_injection(base_fun): - - def wrapper(*args, **kwargs): - if hasattr(keras_applications, 'get_submodules_from_kwargs'): - kwargs['backend'] = backend - kwargs['layers'] = layers - kwargs['models'] = models - kwargs['utils'] = utils - return base_fun(*args, **kwargs) - - return wrapper -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import densenet -from . import keras_modules_injection - - -@keras_modules_injection -def DenseNet121(*args, **kwargs): - return densenet.DenseNet121(*args, **kwargs) - - -@keras_modules_injection -def DenseNet169(*args, **kwargs): - return densenet.DenseNet169(*args, **kwargs) - - -@keras_modules_injection -def DenseNet201(*args, **kwargs): - return densenet.DenseNet201(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return densenet.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return densenet.preprocess_input(*args, **kwargs) -"""Utilities for ImageNet data preprocessing & prediction decoding. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import imagenet_utils -from . import keras_modules_injection - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return imagenet_utils.decode_predictions( - *args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return imagenet_utils.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import inception_resnet_v2 -from . import keras_modules_injection - - -@keras_modules_injection -def InceptionResNetV2(*args, **kwargs): - return inception_resnet_v2.InceptionResNetV2(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return inception_resnet_v2.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return inception_resnet_v2.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import inception_v3 -from . import keras_modules_injection - - -@keras_modules_injection -def InceptionV3(*args, **kwargs): - return inception_v3.InceptionV3(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return inception_v3.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return inception_v3.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import mobilenet -from . import keras_modules_injection - - -@keras_modules_injection -def MobileNet(*args, **kwargs): - return mobilenet.MobileNet(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return mobilenet.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return mobilenet.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import mobilenet_v2 -from . import keras_modules_injection - - -@keras_modules_injection -def MobileNetV2(*args, **kwargs): - return mobilenet_v2.MobileNetV2(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return mobilenet_v2.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return mobilenet_v2.preprocess_input(*args, **kwargs) -# Only for backwards compatibility. -from .mobilenet_v2 import * -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import nasnet -from . import keras_modules_injection - - -@keras_modules_injection -def NASNetMobile(*args, **kwargs): - return nasnet.NASNetMobile(*args, **kwargs) - - -@keras_modules_injection -def NASNetLarge(*args, **kwargs): - return nasnet.NASNetLarge(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return nasnet.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return nasnet.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -try: - from keras_applications import resnet -except: - resnet = None -from . import keras_modules_injection - - -@keras_modules_injection -def ResNet50(*args, **kwargs): - return resnet.ResNet50(*args, **kwargs) - - -@keras_modules_injection -def ResNet101(*args, **kwargs): - return resnet.ResNet101(*args, **kwargs) - - -@keras_modules_injection -def ResNet152(*args, **kwargs): - return resnet.ResNet152(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return resnet.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return resnet.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import resnet50 -from . import keras_modules_injection - - -@keras_modules_injection -def ResNet50(*args, **kwargs): - return resnet50.ResNet50(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return resnet50.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return resnet50.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -try: - from keras_applications import resnet_v2 -except: - resnet_v2 = None -from . import keras_modules_injection - - -@keras_modules_injection -def ResNet50V2(*args, **kwargs): - return resnet_v2.ResNet50V2(*args, **kwargs) - - -@keras_modules_injection -def ResNet101V2(*args, **kwargs): - return resnet_v2.ResNet101V2(*args, **kwargs) - - -@keras_modules_injection -def ResNet152V2(*args, **kwargs): - return resnet_v2.ResNet152V2(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return resnet_v2.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return resnet_v2.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -try: - from keras_applications import resnext -except: - resnext = None -from . import keras_modules_injection - - -@keras_modules_injection -def ResNeXt50(*args, **kwargs): - return resnext.ResNeXt50(*args, **kwargs) - - -@keras_modules_injection -def ResNeXt101(*args, **kwargs): - return resnext.ResNeXt101(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return resnext.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return resnext.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import vgg16 -from . import keras_modules_injection - - -@keras_modules_injection -def VGG16(*args, **kwargs): - return vgg16.VGG16(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return vgg16.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return vgg16.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import vgg19 -from . import keras_modules_injection - - -@keras_modules_injection -def VGG19(*args, **kwargs): - return vgg19.VGG19(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return vgg19.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return vgg19.preprocess_input(*args, **kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_applications import xception -from . import keras_modules_injection - - -@keras_modules_injection -def Xception(*args, **kwargs): - return xception.Xception(*args, **kwargs) - - -@keras_modules_injection -def decode_predictions(*args, **kwargs): - return xception.decode_predictions(*args, **kwargs) - - -@keras_modules_injection -def preprocess_input(*args, **kwargs): - return xception.preprocess_input(*args, **kwargs) -from .load_backend import epsilon -from .load_backend import set_epsilon -from .load_backend import floatx -from .load_backend import set_floatx -from .load_backend import cast_to_floatx -from .load_backend import image_data_format -from .load_backend import set_image_data_format -from .load_backend import reset_uids -from .load_backend import get_uid -from .load_backend import learning_phase -from .load_backend import set_learning_phase -from .load_backend import is_sparse -from .load_backend import to_dense -from .load_backend import variable -from .load_backend import constant -from .load_backend import is_keras_tensor -from .load_backend import is_tensor -from .load_backend import placeholder -from .load_backend import is_placeholder -from .load_backend import shape -from .load_backend import int_shape -from .load_backend import ndim -from .load_backend import dtype -from .load_backend import eval -from .load_backend import zeros -from .load_backend import ones -from .load_backend import eye -from .load_backend import zeros_like -from .load_backend import ones_like -from .load_backend import identity -from .load_backend import random_uniform_variable -from .load_backend import random_normal_variable -from .load_backend import count_params -from .load_backend import cast -from .load_backend import update -from .load_backend import update_add -from .load_backend import update_sub -from .load_backend import moving_average_update -from .load_backend import dot -from .load_backend import batch_dot -from .load_backend import transpose -from .load_backend import gather -from .load_backend import max -from .load_backend import min -from .load_backend import sum -from .load_backend import prod -from .load_backend import cumsum -from .load_backend import cumprod -from .load_backend import var -from .load_backend import std -from .load_backend import mean -from .load_backend import any -from .load_backend import all -from .load_backend import argmax -from .load_backend import argmin -from .load_backend import square -from .load_backend import abs -from .load_backend import sqrt -from .load_backend import exp -from .load_backend import log -from .load_backend import logsumexp -from .load_backend import round -from .load_backend import sign -from .load_backend import pow -from .load_backend import clip -from .load_backend import equal -from .load_backend import not_equal -from .load_backend import greater -from .load_backend import greater_equal -from .load_backend import less -from .load_backend import less_equal -from .load_backend import maximum -from .load_backend import minimum -from .load_backend import sin -from .load_backend import cos -from .load_backend import normalize_batch_in_training -from .load_backend import batch_normalization -from .load_backend import concatenate -from .load_backend import reshape -from .load_backend import permute_dimensions -from .load_backend import resize_images -from .load_backend import resize_volumes -from .load_backend import repeat_elements -from .load_backend import repeat -from .load_backend import arange -from .load_backend import tile -from .load_backend import flatten -from .load_backend import batch_flatten -from .load_backend import expand_dims -from .load_backend import squeeze -from .load_backend import temporal_padding -from .load_backend import spatial_2d_padding -from .load_backend import spatial_3d_padding -from .load_backend import stack -from .load_backend import one_hot -from .load_backend import reverse -from .load_backend import slice -from .load_backend import get_value -from .load_backend import batch_get_value -from .load_backend import set_value -from .load_backend import batch_set_value -from .load_backend import print_tensor -from .load_backend import function -from .load_backend import gradients -from .load_backend import stop_gradient -from .load_backend import rnn -from .load_backend import switch -from .load_backend import in_train_phase -from .load_backend import in_test_phase -from .load_backend import relu -from .load_backend import elu -from .load_backend import softmax -from .load_backend import softplus -from .load_backend import softsign -from .load_backend import categorical_crossentropy -from .load_backend import sparse_categorical_crossentropy -from .load_backend import binary_crossentropy -from .load_backend import sigmoid -from .load_backend import hard_sigmoid -from .load_backend import tanh -from .load_backend import dropout -from .load_backend import l2_normalize -from .load_backend import in_top_k -from .load_backend import conv1d -from .load_backend import separable_conv1d -from .load_backend import conv2d -from .load_backend import separable_conv2d -from .load_backend import conv2d_transpose -from .load_backend import depthwise_conv2d -from .load_backend import conv3d -from .load_backend import conv3d_transpose -from .load_backend import pool2d -from .load_backend import pool3d -from .load_backend import bias_add -from .load_backend import random_normal -from .load_backend import random_uniform -from .load_backend import random_binomial -from .load_backend import truncated_normal -from .load_backend import ctc_label_dense_to_sparse -from .load_backend import ctc_batch_cost -from .load_backend import ctc_decode -from .load_backend import map_fn -from .load_backend import foldl -from .load_backend import foldr -from .load_backend import local_conv1d -from .load_backend import local_conv2d -from .load_backend import backend -from .load_backend import normalize_data_format -from .load_backend import name_scope - -if backend() == 'theano': - from .load_backend import pattern_broadcast -elif backend() == 'tensorflow': - from .load_backend import clear_session - from .load_backend import manual_variable_initialization - from .load_backend import get_session - from .load_backend import set_session -elif backend() == 'cntk': - from .load_backend import clear_session -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import cntk as C -import numpy as np -from .common import floatx -from .common import epsilon -from .common import image_data_format -from .common import normalize_data_format -from ..utils.generic_utils import transpose_shape -from collections import defaultdict -from contextlib import contextmanager -import warnings - - -C.set_global_option('align_axis', 1) - -b_any = any -py_slice = slice - - -dev = C.device.use_default_device() -if dev.type() == 0: - warnings.warn( - 'CNTK backend warning: GPU is not detected. ' - 'CNTK\'s CPU version is not fully optimized,' - 'please run with GPU to get better performance.') - -# A learning phase is a bool tensor used to run Keras models in -# either train mode (learning_phase == 1) or test mode (learning_phase == 0). -# LEARNING_PHASE_PLACEHOLDER is the placeholder for dynamic learning phase -_LEARNING_PHASE_PLACEHOLDER = C.constant( - shape=(), dtype=np.float32, - value=1.0, - name='_keras_learning_phase') -# static learning phase flag, if it is not 0 or 1, we will go with dynamic -# learning phase tensor. -_LEARNING_PHASE = -1 -_UID_PREFIXES = defaultdict(int) - -# cntk doesn't support gradient as symbolic op, to hook up with keras model, -# we will create gradient as a constant placeholder, here use this global -# map to keep the mapping from grad placeholder to parameter -grad_parameter_dict = {} - -NAME_SCOPE_STACK = [] - - -@contextmanager -def name_scope(name): - global NAME_SCOPE_STACK - NAME_SCOPE_STACK.append(name) - yield - NAME_SCOPE_STACK.pop() - - -def get_uid(prefix=''): - _UID_PREFIXES[prefix] += 1 - return _UID_PREFIXES[prefix] - - -def learning_phase(): - # If _LEARNING_PHASE is not 0 or 1, return dynamic learning phase tensor - if _LEARNING_PHASE in {0, 1}: - return _LEARNING_PHASE - else: - return _LEARNING_PHASE_PLACEHOLDER - - -def set_learning_phase(value): - global _LEARNING_PHASE - if value not in {0, 1}: - raise ValueError('CNTK Backend: Set learning phase ' - 'with value %s is not supported, ' - 'expected 0 or 1.' % value) - _LEARNING_PHASE = value - - -def clear_session(): - """Reset learning phase flag for cntk backend. - """ - global _LEARNING_PHASE - global _LEARNING_PHASE_PLACEHOLDER - _LEARNING_PHASE = -1 - _LEARNING_PHASE_PLACEHOLDER.value = np.asarray(1.0) - - -def in_train_phase(x, alt, training=None): - global _LEARNING_PHASE - if training is None: - training = learning_phase() - uses_learning_phase = True - else: - uses_learning_phase = False - - # CNTK currently don't support cond op, so here we use - # element_select approach as workaround. It may have - # perf issue, will resolve it later with cntk cond op. - if callable(x) and isinstance(x, C.cntk_py.Function) is False: - x = x() - if callable(alt) and isinstance(alt, C.cntk_py.Function) is False: - alt = alt() - - if training is True: - x._uses_learning_phase = uses_learning_phase - return x - else: - # if _LEARNING_PHASE is static - if isinstance(training, int) or isinstance(training, bool): - result = x if training == 1 or training is True else alt - else: - result = C.element_select(training, x, alt) - result._uses_learning_phase = uses_learning_phase - return result - - -def in_test_phase(x, alt, training=None): - return in_train_phase(alt, x, training=training) - - -def _convert_string_dtype(dtype): - if dtype == 'float32': - return np.float32 - elif dtype == 'float64': - return np.float64 - elif dtype == 'float16': - return np.float16 - else: - # cntk only running with float, - # try to cast to float to run the model - return np.float32 - - -def _convert_dtype_string(dtype): - if dtype == np.float32: - return 'float32' - elif dtype == np.float64: - return 'float64' - elif dtype == np.float16: - return 'float16' - else: - raise ValueError('CNTK Backend: Unsupported dtype: %s. ' - 'CNTK only supports float32, float64, and ' - 'float16.' % dtype) - - -def variable(value, dtype=None, name=None, constraint=None): - """Instantiates a variable and returns it. - - # Arguments - value: Numpy array, initial value of the tensor. - dtype: Tensor type. - name: Optional name string for the tensor. - constraint: Optional projection function to be - applied to the variable after an optimizer update. - - # Returns - A variable instance (with Keras metadata included). - """ - if dtype is None: - dtype = floatx() - - if name is None: - name = '' - - if isinstance( - value, - C.variables.Constant) or isinstance( - value, - C.variables.Parameter): - value = value.value - - # we don't support init parameter with symbolic op, so eval it first as - # workaround - if isinstance(value, C.cntk_py.Function): - value = eval(value) - - shape = value.shape if hasattr(value, 'shape') else () - if hasattr(value, 'dtype') and value.dtype != dtype and len(shape) > 0: - value = value.astype(dtype) - - # TODO: remove the conversion when cntk supports int32, int64 - # https://www.cntk.ai/pythondocs/cntk.variables.html#cntk.variables.Parameter - dtype = 'float32' if 'int' in str(dtype) else dtype - - v = C.parameter(shape=shape, - init=value, - dtype=dtype, - name=_prepare_name(name, 'variable')) - v._keras_shape = v.shape - v._uses_learning_phase = False - v.constraint = constraint - return v - - -def bias_add(x, bias, data_format=None): - data_format = normalize_data_format(data_format) - - dims = len(x.shape) - if dims > 0 and x.shape[0] == C.InferredDimension: - dims -= 1 - - bias_dims = len(bias.shape) - if bias_dims != 1 and bias_dims != dims: - raise ValueError('Unexpected bias dimensions %d, ' - 'expected 1 or %d dimensions' % (bias_dims, dims)) - - if dims == 4: - if data_format == 'channels_first': - if bias_dims == 1: - shape = (bias.shape[0], 1, 1, 1) - else: - shape = (bias.shape[3],) + bias.shape[:3] - elif data_format == 'channels_last': - if bias_dims == 1: - shape = (1, 1, 1, bias.shape[0]) - else: - shape = bias.shape - elif dims == 3: - if data_format == 'channels_first': - if bias_dims == 1: - shape = (bias.shape[0], 1, 1) - else: - shape = (bias.shape[2],) + bias.shape[:2] - elif data_format == 'channels_last': - if bias_dims == 1: - shape = (1, 1, bias.shape[0]) - else: - shape = bias.shape - elif dims == 2: - if data_format == 'channels_first': - if bias_dims == 1: - shape = (bias.shape[0], 1) - else: - shape = (bias.shape[1],) + bias.shape[:1] - elif data_format == 'channels_last': - if bias_dims == 1: - shape = (1, bias.shape[0]) - else: - shape = bias.shape - else: - shape = bias.shape - return x + reshape(bias, shape) - - -def eval(x): - if isinstance(x, C.cntk_py.Function): - return x.eval() - elif (isinstance(x, C.variables.Constant) or isinstance( - x, C.variables.Parameter)): - return x.value - else: - raise ValueError('CNTK Backend: `eval` method on ' - '`%s` type is not supported. ' - 'CNTK only supports `eval` with ' - '`Function`, `Constant` or ' - '`Parameter`.' % type(x)) - - -def placeholder( - shape=None, - ndim=None, - dtype=None, - sparse=False, - name=None, - dynamic_axis_num=1): - if dtype is None: - dtype = floatx() - if not shape: - if ndim: - shape = tuple([None for _ in range(ndim)]) - - if _get_cntk_version() >= 2.2: - dynamic_dimension = C.FreeDimension - else: - dynamic_dimension = C.InferredDimension - - cntk_shape = [dynamic_dimension if s is None else s for s in shape] - cntk_shape = tuple(cntk_shape) - - if dynamic_axis_num > len(cntk_shape): - raise ValueError('CNTK backend: creating placeholder with ' - '%d dimension is not supported, at least ' - '%d dimensions are needed.' - % (len(cntk_shape), dynamic_axis_num)) - - if name is None: - name = '' - - cntk_shape = cntk_shape[dynamic_axis_num:] - - x = C.input( - shape=cntk_shape, - dtype=_convert_string_dtype(dtype), - is_sparse=sparse, - name=name) - x._keras_shape = shape - x._uses_learning_phase = False - x._cntk_placeholder = True - return x - - -def is_placeholder(x): - """Returns whether `x` is a placeholder. - - # Arguments - x: A candidate placeholder. - - # Returns - Boolean. - """ - return hasattr(x, '_cntk_placeholder') and x._cntk_placeholder - - -def is_keras_tensor(x): - if not is_tensor(x): - raise ValueError('Unexpectedly found an instance of type `' + - str(type(x)) + '`. ' - 'Expected a symbolic tensor instance.') - return hasattr(x, '_keras_history') - - -def is_tensor(x): - return isinstance(x, (C.variables.Constant, - C.variables.Variable, - C.variables.Parameter, - C.ops.functions.Function)) - - -def shape(x): - shape = list(int_shape(x)) - num_dynamic = _get_dynamic_axis_num(x) - non_dyn_shape = [] - for i in range(len(x.shape)): - if shape[i + num_dynamic] is None: - non_dyn_shape.append(x.shape[i]) - else: - non_dyn_shape.append(shape[i + num_dynamic]) - return shape[:num_dynamic] + non_dyn_shape - - -def is_sparse(tensor): - return tensor.is_sparse - - -def int_shape(x): - if hasattr(x, '_keras_shape'): - return x._keras_shape - - shape = x.shape - if hasattr(x, 'dynamic_axes'): - dynamic_shape = [None for a in x.dynamic_axes] - shape = tuple(dynamic_shape) + shape - return shape - - -def ndim(x): - shape = int_shape(x) - return len(shape) - - -def _prepare_name(name, default): - prefix = '_'.join(NAME_SCOPE_STACK) - if name is None or name == '': - return prefix + '/' + default - return prefix + '/' + name - - -def constant(value, dtype=None, shape=None, name=None): - if dtype is None: - dtype = floatx() - if shape is None: - shape = () - np_value = value * np.ones(shape) - const = C.constant(np_value, - dtype=dtype, - name=_prepare_name(name, 'constant')) - const._keras_shape = const.shape - const._uses_learning_phase = False - return const - - -def random_binomial(shape, p=0.0, dtype=None, seed=None): - if seed is None: - # ensure that randomness is conditioned by the Numpy RNG - seed = np.random.randint(10e7) - if dtype is None: - dtype = floatx() - else: - dtype = _convert_string_dtype(dtype) - - for _ in shape: - if _ is None: - raise ValueError('CNTK Backend: randomness op with ' - 'dynamic shape is not supported now. ' - 'Please provide fixed dimension ' - 'instead of `None`.') - return C.random.bernoulli(shape=shape, dtype=dtype, mean=p, seed=seed) - - -def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None): - for _ in shape: - if _ is None: - raise ValueError('CNTK Backend: randomness op with ' - 'dynamic shape is not supported now. ' - 'Please provide fixed dimension ' - 'instead of `None`.') - - if seed is None: - # ensure that randomness is conditioned by the Numpy RNG - seed = np.random.randint(10e3) - return C.random.uniform( - shape=shape, - dtype=dtype, - low=minval, - high=maxval, - seed=seed) - - -def random_uniform_variable(shape, low, high, - dtype=None, name=None, seed=None): - if seed is None: - # ensure that randomness is conditioned by the Numpy RNG - seed = np.random.randint(10e3) - - if dtype is None: - dtype = floatx() - else: - dtype = _convert_string_dtype(dtype) - - if name is None: - name = '' - - scale = (high - low) / 2 - p = C.parameter( - shape, - init=C.initializer.uniform( - scale, - seed=seed), - dtype=dtype, - name=name) - return variable(value=p.value + low + scale) - - -def random_normal_variable( - shape, - mean, - scale, - dtype=None, - name=None, - seed=None): - if seed is None: - # ensure that randomness is conditioned by the Numpy RNG - seed = np.random.randint(10e7) - if dtype is None: - dtype = floatx() - else: - dtype = _convert_string_dtype(dtype) - - if name is None: - name = '' - - p = C.parameter( - shape=shape, - init=C.initializer.normal( - scale=scale, - seed=seed), - dtype=dtype, - name=name) - return variable(value=p.value + mean) - - -def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): - if dtype is None: - dtype = floatx() - for _ in shape: - if _ is None: - raise ValueError('CNTK Backend: randomness op with ' - 'dynamic shape is not supported now. ' - 'Please provide fixed dimension ' - 'instead of `None`.') - if seed is None: - # ensure that randomness is conditioned by the Numpy RNG - seed = np.random.randint(10e3) - return C.random.normal( - shape=shape, mean=mean, - scale=stddev, seed=seed, - dtype=dtype) - - -def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): - if seed is None: - seed = np.random.randint(1, 10e6) - if dtype is None: - dtype = floatx() - else: - dtype = _convert_string_dtype(dtype) - - return C.parameter( - shape, init=C.initializer.truncated_normal( - stddev, seed=seed), dtype=dtype) - - -def dtype(x): - return _convert_dtype_string(x.dtype) - - -def zeros(shape, dtype=None, name=None): - if dtype is None: - dtype = floatx() - ctype = _convert_string_dtype(dtype) - return variable(value=np.zeros(shape, ctype), dtype=dtype, name=name) - - -def ones(shape, dtype=None, name=None): - if dtype is None: - dtype = floatx() - ctype = _convert_string_dtype(dtype) - return variable(value=np.ones(shape, ctype), dtype=dtype, name=name) - - -def eye(size, dtype=None, name=None): - if dtype is None: - dtype = floatx() - if isinstance(size, (list, tuple)): - n, m = size - else: - n, m = size, size - return variable(np.eye(n, m), dtype, name) - - -def zeros_like(x, dtype=None, name=None): - name = name or '' - if dtype is None: - dtype = floatx() - return C.cast(C.zeros_like(x, name), dtype) - - -def ones_like(x, dtype=None, name=None): - name = name or '' - if dtype is None: - dtype = floatx() - return C.cast(C.ones_like(x, name), dtype) - - -def count_params(x): - for _ in x.shape: - if _ == C.InferredDimension or _ == C.FreeDimension: - raise ValueError('CNTK backend: `count_params` with dynamic ' - 'shape is not supported. Please provide ' - 'fixed dimension instead of `None`.') - - return np.prod(int_shape(x)) - - -def cast(x, dtype): - # cntk calculate everything in float, so don't need case from bool / int - return x - - -def dot(x, y): - if len(x.shape) > 2 or len(y.shape) > 2: - y_shape = int_shape(y) - if len(y_shape) > 2: - permutation = [len(y_shape) - 2] - permutation += list(range(len(y_shape) - 2)) - permutation += [len(y_shape) - 1] - y = C.transpose(y, perm=permutation) - return C.times(x, y, len(y_shape) - 1) - else: - return C.times(x, y) - - -def batch_dot(x, y, axes=None): - x_shape = int_shape(x) - y_shape = int_shape(y) - - x_ndim = len(x_shape) - y_ndim = len(y_shape) - - if x_ndim < 2 or y_ndim < 2: - raise ValueError('Can not do batch_dot on inputs ' - 'with rank < 2. ' - 'Received inputs with shapes ' + - str(x_shape) + ' and ' + - str(y_shape) + '.') - - x_batch_size = x_shape[0] - y_batch_size = y_shape[0] - - if x_batch_size is not None and y_batch_size is not None: - if x_batch_size != y_batch_size: - raise ValueError('Can not do batch_dot on inputs ' - 'with different batch sizes. ' - 'Received inputs with shapes ' + - str(x_shape) + ' and ' + - str(y_shape) + '.') - - if isinstance(axes, int): - axes = [axes, axes] - - if axes is None: - if y_ndim == 2: - axes = [x_ndim - 1, y_ndim - 1] - else: - axes = [x_ndim - 1, y_ndim - 2] - - if b_any([isinstance(a, (list, tuple)) for a in axes]): - raise ValueError('Multiple target dimensions are not supported. ' + - 'Expected: None, int, (int, int), ' + - 'Provided: ' + str(axes)) - - # if tuple, convert to list - axes = list(axes) - - # convert negative indices - if axes[0] < 0: - axes[0] += x_ndim - if axes[1] < 0: - axes[1] += y_ndim - - if 0 in axes: - raise ValueError('Can not perform batch_dot over axis 0.' - ' If your inputs are not batched,' - ' add a dummy batch dimension to your ' - 'inputs using K.expand_dims(x, 0)') - d1 = x_shape[axes[0]] - d2 = y_shape[axes[1]] - - if d1 is not None and d2 is not None and d1 != d2: - raise ValueError('Can not do batch_dot on inputs with shapes ' + - str(x_shape) + ' and ' + str(y_shape) + - ' with axes=' + str(axes) + '. x.shape[%d] != ' - 'y.shape[%d] (%d != %d).' % (axes[0], axes[1], d1, d2)) - - # Input shapes: - # x: (b_size, x1, ..., d, ..., xn) - # y: (b_size, y1, ..., d, ..., yn) - # where d is the dimension to reduce. - - # Bring d to the last dimension in x - # x: (b_size, ..., d) - - permute_pattern = list(range(x_ndim)) - for i in range(axes[0], x_ndim - 1): - permute_pattern[i] = permute_pattern[i + 1] - permute_pattern[-1] = axes[0] - - x = permute_dimensions(x, permute_pattern) - - # Bring d to the second dimension in y - # y: (b_size, d, ...) - permute_pattern = list(range(y_ndim)) - - for i in range(axes[1], 1, -1): - permute_pattern[i] = permute_pattern[i - 1] - permute_pattern[1] = axes[1] - y = permute_dimensions(y, permute_pattern) - - # Expand to rank 3 if needed - if x_ndim == 2: - x = expand_dims(x, 1) - x_expanded = True - else: - x_expanded = False - - if y_ndim == 2: - y = expand_dims(y, -1) - y_expanded = True - else: - y_expanded = False - - x_shape = int_shape(x) - y_shape = int_shape(y) - - # batch size might be lost at this point - x_batch_size = x_shape[0] - y_batch_size = y_shape[0] - - if x_batch_size is None and y_batch_size is None: - dynamic_batch_size = True - elif x_batch_size is not None and y_batch_size is not None: - dynamic_batch_size = False - else: - raise ValueError('Can not perform batch_dot on inputs' + - ' with both static and dynamic batch sizes.' + - 'You probably attempted to permform the ' + - 'operation on a placeholder and a variable, ' + - 'which is not yet supported on the CNTK backend.') - - if dynamic_batch_size: - result = C.times(x, y, output_rank=y_ndim - 2 + int(y_expanded)) - else: - result = [] - - for i in range(x_batch_size): - xi = x[i] - yi = y[i] - if ndim(xi) == ndim(x): # for older versions of CNTK - xi = squeeze(xi, 0) - yi = squeeze(yi, 0) - result.append( - C.times(xi, yi, output_rank=y_ndim - 2 + int(y_expanded))) - result = stack(result, 0) - - if x_expanded: - result = squeeze(result, 1) - - if y_expanded: - result = squeeze(result, -1) - - if ndim(result) == 1: - return expand_dims(result) - return result - - -def transpose(x): - return C.swapaxes(x, 0, 1) - - -def gather(reference, indices): - # There is a bug in cntk gather op which may cause crash. - # We have made a fix but not catched in CNTK 2.1 release. - # Will update with gather op in next release - if _get_cntk_version() >= 2.2: - return C.ops.gather(reference, indices) - else: - num_classes = reference.shape[0] - one_hot_matrix = C.ops.one_hot(indices, num_classes) - return C.times( - one_hot_matrix, reference, - output_rank=len(reference.shape) - 1) - - -def _remove_dims(x, axis, keepdims=False): - if keepdims is False and isinstance(axis, list): - # sequence axis is removed by default, so don't need reshape on it - reduce_axes = [] - for a in axis: - if isinstance(a, C.Axis) is False: - reduce_axes.append(a) - return _reshape_dummy_dim(x, reduce_axes) - else: - if isinstance(axis, list): - has_seq = False - for a in axis: - if isinstance(a, C.Axis): - has_seq = True - break - if has_seq: - nones = _get_dynamic_axis_num(x) - x = expand_dims(x, nones) - return x - - -def max(x, axis=None, keepdims=False): - axis = _normalize_axis(axis, x) - output = _reduce_on_axis(x, axis, 'reduce_max') - - return _remove_dims(output, axis, keepdims) - - -def min(x, axis=None, keepdims=False): - axis = _normalize_axis(axis, x) - output = _reduce_on_axis(x, axis, 'reduce_min') - - return _remove_dims(output, axis, keepdims) - - -def sum(x, axis=None, keepdims=False): - axis = _normalize_axis(axis, x) - output = _reduce_on_axis(x, axis, 'reduce_sum') - - return _remove_dims(output, axis, keepdims) - - -def prod(x, axis=None, keepdims=False): - axis = _normalize_axis(axis, x) - output = _reduce_on_axis(x, axis, 'reduce_prod') - - return _remove_dims(output, axis, keepdims) - - -def logsumexp(x, axis=None, keepdims=False): - return log(sum(exp(x), axis=axis, keepdims=keepdims)) - - -def var(x, axis=None, keepdims=False): - m = mean(x, axis, keepdims=True) - devs_squared = C.square(x - m) - return mean(devs_squared, axis=axis, keepdims=keepdims) - - -def std(x, axis=None, keepdims=False): - return C.sqrt(var(x, axis=axis, keepdims=keepdims)) - - -def expand_dims(x, axis=-1): - shape = list(int_shape(x)) - nones = _get_dynamic_axis_num(x) - index = axis if axis >= 0 else len(shape) + 1 - shape.insert(index, 1) - new_shape = shape[nones:] - new_shape = tuple( - [C.InferredDimension if _ is None else _ for _ in new_shape]) - result = C.reshape(x, new_shape) - if index < nones: - result._keras_shape = shape - return result - - -def squeeze(x, axis): - if isinstance(axis, tuple): - axis = list(axis) - if not isinstance(axis, list): - axis = [axis] - - shape = list(int_shape(x)) - - _axis = [] - for _ in axis: - if isinstance(_, int): - _axis.append(_ if _ >= 0 else _ + len(shape)) - - if len(_axis) == 0: - return x - - nones = _get_dynamic_axis_num(x) - for _ in sorted(_axis, reverse=True): - del shape[_] - - new_shape = shape[nones:] - - new_shape_temp = [] - for _ in new_shape: - if _ == C.FreeDimension: - new_shape_temp.append(C.InferredDimension) - else: - new_shape_temp.append(_) - - new_shape = tuple(new_shape_temp) - - return C.reshape(x, new_shape) - - -def tile(x, n): - if isinstance(n, int): - n = (n,) - elif isinstance(n, list): - n = tuple(n) - - shape = int_shape(x) - num_dynamic_axis = _get_dynamic_axis_num(x) - if len(n) < len(shape): # Padding the axis - n = tuple([1 for _ in range(len(shape) - len(n))]) + n - elif len(n) != len(shape): - raise NotImplementedError - - i = num_dynamic_axis - for i, rep in enumerate(n): - if i >= num_dynamic_axis and shape[i] is not None: - tmp = [x] * rep - x = C.splice(*tmp, axis=i - num_dynamic_axis) - i += 1 - - return x - - -def _normalize_axis(axis, x): - shape = int_shape(x) - ndim = len(shape) - - nones = _get_dynamic_axis_num(x) - - if nones > ndim: - raise ValueError( - 'CNTK Backend: tensor with keras shape: `%s` has ' - '%d cntk dynamic axis, this is not expected, please ' - 'double check the keras shape history.' - % (str(shape), nones)) - - # Current cntk does not support shape like (1, batch). so using the workaround - # here to mapping the correct axis. Will remove this tricky after we add support - # in native cntk op - cntk_axis = [] - dynamic_axis_index = 0 - for i in range(ndim): - if shape[i] is None and dynamic_axis_index < nones: - cntk_axis.append(x.dynamic_axes[dynamic_axis_index]) - dynamic_axis_index += 1 - else: - cntk_axis.append(i - dynamic_axis_index) - - if dynamic_axis_index < nones: - i = 0 - while dynamic_axis_index < nones: - cntk_axis[i] = x.dynamic_axes[dynamic_axis_index] - i += 1 - dynamic_axis_index += 1 - - while i < len(cntk_axis): - cntk_axis[i] -= nones - i += 1 - - if isinstance(axis, tuple): - _axis = list(axis) - elif isinstance(axis, int): - _axis = [axis] - elif isinstance(axis, list): - _axis = list(axis) - else: - _axis = axis - - if isinstance(_axis, list): - for i, a in enumerate(_axis): - if a is not None and a < 0: - _axis[i] = (a % ndim) - if _axis[i] is not None: - _axis[i] = cntk_axis[_axis[i]] - else: - if _axis is None: - _axis = C.Axis.all_axes() - - return _axis - - -def _reshape_dummy_dim(x, axis): - shape = list(x.shape) - - _axis = [_ + len(shape) if _ < 0 else _ for _ in axis] - - if shape.count(C.InferredDimension) > 1 or shape.count(C.FreeDimension) > 1: - result = x - for index in sorted(_axis, reverse=True): - result = C.reshape(result, - shape=(), - begin_axis=index, - end_axis=index + 1) - return result - else: - for index in sorted(_axis, reverse=True): - del shape[index] - - shape = [C.InferredDimension if _ == - C.FreeDimension else _ for _ in shape] - return C.reshape(x, shape) - - -def mean(x, axis=None, keepdims=False): - axis = _normalize_axis(axis, x) - output = _reduce_on_axis(x, axis, 'reduce_mean') - - return _remove_dims(output, axis, keepdims) - - -def any(x, axis=None, keepdims=False): - reduce_result = sum(x, axis, keepdims=keepdims) - any_matrix = C.element_select( - reduce_result, - ones_like(reduce_result), - zeros_like(reduce_result)) - if len(reduce_result.shape) == 0 and _get_dynamic_axis_num(x) == 0: - return C.reduce_sum(any_matrix) - else: - return any_matrix - - -def all(x, axis=None, keepdims=False): - reduce_result = prod(x, axis, keepdims=keepdims) - all_matrix = C.element_select( - reduce_result, - ones_like(reduce_result), - zeros_like(reduce_result)) - if len(reduce_result.shape) == 0 and _get_dynamic_axis_num(x) == 0: - return C.reduce_sum(all_matrix) - else: - return all_matrix - - -def classification_error(target, output, axis=-1): - return C.ops.reduce_mean( - C.equal( - argmax( - output, - axis=-1), - argmax( - target, - axis=-1)), - axis=C.Axis.all_axes()) - - -def argmax(x, axis=-1): - axis = [axis] - axis = _normalize_axis(axis, x) - output = C.ops.argmax(x, axis=axis[0]) - return _reshape_dummy_dim(output, axis) - - -def argmin(x, axis=-1): - axis = [axis] - axis = _normalize_axis(axis, x) - output = C.ops.argmin(x, axis=axis[0]) - return _reshape_dummy_dim(output, axis) - - -def square(x): - return C.square(x) - - -def abs(x): - return C.abs(x) - - -def sqrt(x): - return C.sqrt(x) - - -def exp(x): - return C.exp(x) - - -def log(x): - return C.log(x) - - -def round(x): - return C.round(x) - - -def sigmoid(x): - return C.sigmoid(x) - - -def sign(x): - return x / C.abs(x) - - -def pow(x, a): - return C.pow(x, a) - - -def clip(x, min_value, max_value): - if (isinstance(min_value, (int, float)) and - isinstance(max_value, (int, float))): - if max_value < min_value: - max_value = min_value - if min_value is None: - min_value = -np.inf - if max_value is None: - max_value = np.inf - return C.clip(x, min_value, max_value) - - -def binary_crossentropy(target, output, from_logits=False): - if from_logits: - output = C.sigmoid(output) - output = C.clip(output, epsilon(), 1.0 - epsilon()) - output = -target * C.log(output) - (1.0 - target) * C.log(1.0 - output) - return output - - -def get_variable_shape(x): - return int_shape(x) - - -def update(x, new_x): - return C.assign(x, new_x) - - -def moving_average_update(variable, value, momentum): - return C.assign(variable, variable * momentum + value * (1. - momentum)) - - -def update_add(x, increment): - result = x + increment - return C.assign(x, result) - - -def update_sub(x, decrement): - result = x - decrement - return C.assign(x, result) - - -def gradients(loss, variables): - # cntk does not support gradients as symbolic op, - # to hook up with keras model - # we will return a constant as place holder, the cntk learner will apply - # the gradient during training. - global grad_parameter_dict - if isinstance(variables, list) is False: - variables = [variables] - grads = [] - for v in variables: - g = C.constant(0, shape=v.shape, name='keras_grad_placeholder') - grads.append(g) - grad_parameter_dict[g] = v - return grads - - -def equal(x, y): - return C.equal(x, y) - - -def not_equal(x, y): - return C.not_equal(x, y) - - -def greater(x, y): - return C.greater(x, y) - - -def greater_equal(x, y): - return C.greater_equal(x, y) - - -def less(x, y): - return C.less(x, y) - - -def less_equal(x, y): - return C.less_equal(x, y) - - -def maximum(x, y): - return C.element_max(x, y) - - -def minimum(x, y): - return C.element_min(x, y) - - -def sin(x): - return C.sin(x) - - -def cos(x): - return C.cos(x) - - -def normalize_batch_in_training(x, gamma, beta, - reduction_axes, epsilon=1e-3): - if gamma is None: - if beta is None: - gamma = ones_like(x) - else: - gamma = ones_like(beta) - if beta is None: - if gamma is None: - beta = zeros_like(x) - else: - beta = zeros_like(gamma) - - mean, variant = _moments(x, _normalize_axis(reduction_axes, x)) - - if sorted(reduction_axes) == list(range(ndim(x)))[:-1]: - normalized = batch_normalization( - x, mean, variant, beta, gamma, epsilon) - else: - # need broadcasting - target_shape = [] - x_shape = int_shape(x) - # skip the batch axis - for axis in range(1, ndim(x)): - if axis in reduction_axes: - target_shape.append(1) - if ndim(gamma) > axis: - gamma = C.reduce_mean(gamma, axis - 1) - beta = C.reduce_mean(beta, axis - 1) - else: - target_shape.append(x_shape[axis]) - - broadcast_mean = C.reshape(mean, target_shape) - broadcast_var = C.reshape(variant, target_shape) - broadcast_gamma = C.reshape(gamma, target_shape) - broadcast_beta = C.reshape(beta, target_shape) - normalized = batch_normalization( - x, - broadcast_mean, - broadcast_var, - broadcast_beta, - broadcast_gamma, - epsilon) - - return normalized, mean, variant - - -def _moments(x, axes=None, shift=None, keep_dims=False): - _axes = tuple(axes) - if shift is None: - shift = x - # Compute true mean while keeping the dims for proper broadcasting. - for axis in _axes: - shift = C.reduce_mean(shift, axis=axis) - - shift = C.stop_gradient(shift) - shifted_mean = C.minus(x, shift) - for axis in _axes: - shifted_mean = C.reduce_mean(shifted_mean, axis=axis) - - variance_mean = C.square(C.minus(x, shift)) - for axis in _axes: - variance_mean = C.reduce_mean(variance_mean, axis=axis) - - variance = C.minus(variance_mean, C.square(shifted_mean)) - mean = C.plus(shifted_mean, shift) - - if not keep_dims: - mean = squeeze(mean, _axes) - variance = squeeze(variance, _axes) - - return mean, variance - - -def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3): - # The mean / var / beta / gamma may be processed by broadcast - # so it may have an extra batch axis with 1, it is not needed - # in cntk, need to remove those dummy axis. - if ndim(mean) == ndim(x) and shape(mean)[0] == 1: - mean = _reshape_dummy_dim(mean, [0]) - if ndim(var) == ndim(x) and shape(var)[0] == 1: - var = _reshape_dummy_dim(var, [0]) - - if gamma is None: - gamma = ones_like(var) - elif ndim(gamma) == ndim(x) and shape(gamma)[0] == 1: - gamma = _reshape_dummy_dim(gamma, [0]) - - if beta is None: - beta = zeros_like(mean) - elif ndim(beta) == ndim(x) and shape(beta)[0] == 1: - beta = _reshape_dummy_dim(beta, [0]) - - return (x - mean) / C.sqrt(var + epsilon) * gamma + beta - - -def concatenate(tensors, axis=-1): - if len(tensors) == 0: - return None - - axis = [axis] - axis = _normalize_axis(axis, tensors[0]) - return C.splice(*tensors, axis=axis[0]) - - -def stack(x, axis=0): - x = [expand_dims(t, axis) for t in x] - return concatenate(x, axis) - - -def flatten(x): - return reshape(x, (-1,)) - - -def reshape(x, shape): - shape_temp = [] - for _ in shape: - if _ == C.FreeDimension: - shape_temp.append(C.InferredDimension) - else: - shape_temp.append(_) - - shape = tuple(shape_temp) - - if isinstance(x, C.variables.Parameter): - return C.reshape(x, shape) - else: - num_dynamic_axis = _get_dynamic_axis_num(x) - - if num_dynamic_axis == 1 and len(shape) > 0 and shape[0] == -1: - # collapse axis with batch axis - if b_any(_ == C.InferredDimension for _ in x.shape) or b_any( - _ == C.FreeDimension for _ in x.shape): - warnings.warn( - 'Warning: CNTK backend does not support ' - 'collapse of batch axis with inferred dimension. ' - 'The reshape did not take place.') - return x - return _reshape_batch(x, shape) - else: - # no collapse, then first need to padding the shape - if num_dynamic_axis >= len(shape): - i = 0 - while i < len(shape): - if shape[i] is None or shape[i] == -1: - i += 1 - else: - break - shape = tuple( - [-1 for _ in range(num_dynamic_axis - i)]) + shape - - new_shape = list(shape) - new_shape = new_shape[num_dynamic_axis:] - new_shape = [ - C.InferredDimension if _ is None else _ for _ in new_shape] - return C.reshape(x, new_shape) - - -def permute_dimensions(x, pattern): - dims = len(int_shape(x)) - num_dynamic_axis = _get_dynamic_axis_num(x) - if isinstance(pattern, list): - current_layout = [i for i in range(dims)] - else: - current_layout = tuple([i for i in range(dims)]) - - if (num_dynamic_axis > 0 and - pattern[:num_dynamic_axis] != current_layout[:num_dynamic_axis]): - raise ValueError('CNTK backend: the permute pattern %s ' - 'requested permute on dynamic axis, ' - 'which is not supported. Please do permute ' - 'on static axis.' % pattern) - - axis = list(pattern) - axis = axis[num_dynamic_axis:] - axis = _normalize_axis(axis, x) - return C.transpose(x, axis) - - -def resize_images(x, height_factor, width_factor, data_format, - interpolation='nearest'): - if interpolation == 'nearest': - if data_format == 'channels_first': - output = repeat_elements(x, height_factor, axis=2) - output = repeat_elements(output, width_factor, axis=3) - return output - elif data_format == 'channels_last': - output = repeat_elements(x, height_factor, axis=1) - output = repeat_elements(output, width_factor, axis=2) - return output - else: - raise ValueError( - 'CNTK Backend: Invalid data_format: %s' % data_format) - else: - raise NotImplementedError( - 'CNTK only supports `nearest` interpolation.') - - -def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): - if data_format == 'channels_first': - output = repeat_elements(x, depth_factor, axis=2) - output = repeat_elements(output, height_factor, axis=3) - output = repeat_elements(output, width_factor, axis=4) - return output - elif data_format == 'channels_last': - output = repeat_elements(x, depth_factor, axis=1) - output = repeat_elements(output, height_factor, axis=2) - output = repeat_elements(output, width_factor, axis=3) - return output - else: - raise ValueError('CNTK Backend: Invalid data_format: %s' % data_format) - - -def repeat_elements(x, rep, axis): - axis = _normalize_axis(axis, x) - axis = axis[0] - slices = [] - shape = x.shape - i = 0 - while i < shape[axis]: - tmp = C.ops.slice(x, axis, i, i + 1) - for _ in range(rep): - slices.append(tmp) - i += 1 - return C.splice(*slices, axis=axis) - - -def repeat(x, n): - # this is a workaround for recurrent layer - # if n is inferred dimension, - # we can't figure out how to repeat it in cntk now - # return the same x to take cntk broadcast feature - # to make the recurrent layer work. - # need to be fixed in GA. - if n is C.InferredDimension or n is C.FreeDimension: - return x - index = 1 - _get_dynamic_axis_num(x) - if index < 0 or index > 1: - raise NotImplementedError - - new_shape = list(x.shape) - new_shape.insert(index, 1) - new_shape = tuple(new_shape) - x = C.reshape(x, new_shape) - temp = [x] * n - return C.splice(*temp, axis=index) - - -def tanh(x): - return C.tanh(x) - - -def _static_rnn(step_function, inputs, initial_states, - go_backwards=False, mask=None, constants=None, - unroll=False, input_length=None): - - shape = int_shape(inputs) - dims = len(shape) - - uses_learning_phase = False - - if dims < 3: - raise ValueError('Input should be at least 3D.') - - # if the second axis is static axis, CNTK will do unroll by default - if shape[1] is None: - raise ValueError('CNTK Backend: the input of static rnn ' - 'has shape `%s`, the second axis ' - 'is not static. If you want to run ' - 'rnn with non-static axis, please try ' - 'dynamic rnn with sequence axis.' % shape) - if constants is None: - constants = [] - - if mask is not None: - mask_shape = int_shape(mask) - if len(mask_shape) == dims - 1: - mask = expand_dims(mask) - - nones = _get_dynamic_axis_num(inputs) - - states = tuple(initial_states) - - outputs = [] - - time_axis = 1 - nones if nones > 0 else 1 - - if go_backwards: - i = shape[1] - 1 - while i >= 0: - current = C.ops.slice(inputs, time_axis, i, i + 1) - # remove dummy dimension - current = squeeze(current, time_axis) - - output, new_states = step_function( - current, tuple(states) + tuple(constants)) - if getattr(output, '_uses_learning_phase', False): - uses_learning_phase = True - - if mask is not None: - mask_slice = C.ops.slice(mask, time_axis, i, i + 1) - mask_slice = squeeze(mask_slice, time_axis) - if len(outputs) == 0: - prev_output = zeros_like(output) - else: - prev_output = outputs[-1] - output = C.ops.element_select(mask_slice, output, prev_output) - - return_states = [] - for s, n_s in zip(states, new_states): - return_states.append( - C.ops.element_select( - mask_slice, n_s, s)) - new_states = return_states - outputs.append(output) - states = new_states - i -= 1 - else: - i = 0 - while i < shape[1]: - current = C.ops.slice(inputs, time_axis, i, i + 1) - # remove dummy dimension - current = squeeze(current, 1) - - output, new_states = step_function( - current, tuple(states) + tuple(constants)) - if getattr(output, '_uses_learning_phase', False): - uses_learning_phase = True - - if mask is not None: - mask_slice = C.ops.slice(mask, time_axis, i, i + 1) - mask_slice = squeeze(mask_slice, 1) - if len(outputs) == 0: - prev_output = zeros_like(output) - else: - prev_output = outputs[-1] - output = C.ops.element_select(mask_slice, output, prev_output) - - return_states = [] - for s, n_s in zip(states, new_states): - return_states.append( - C.ops.element_select( - mask_slice, n_s, s)) - new_states = return_states - outputs.append(output) - states = new_states[:len(states)] - i += 1 - - i = 1 - # add the time_step axis back - final_output = expand_dims(outputs[0], 1) - last_output = outputs[0] - while i < len(outputs): - # add the time_step axis back - output_slice = expand_dims(outputs[i], 1) - final_output = C.splice(final_output, output_slice, axis=time_axis) - last_output = outputs[i] - i += 1 - - last_output._uses_learning_phase = uses_learning_phase - return last_output, final_output, states - - -def rnn(step_function, inputs, initial_states, - go_backwards=False, mask=None, constants=None, - unroll=False, input_length=None): - - if not unroll and mask is not None: - warnings.warn( - 'CNTK Backend only supports accurate masking if ' - '`output == new_states[0]` for ' - '`output, new_states = step_function(inputs, states)`') - - shape = int_shape(inputs) - dims = len(shape) - - global uses_learning_phase - uses_learning_phase = False - - if dims < 3: - raise ValueError('CNTK Backend: the input of rnn has only rank %d ' - 'Need at least rank 3 to run RNN.' % dims) - - if _get_dynamic_axis_num(inputs) == 0 or unroll: - return _static_rnn( - step_function, - inputs, - initial_states, - go_backwards, - mask, - constants, - unroll, - input_length) - - if constants is None: - constants = [] - - num_time_step = shape[1] - if num_time_step is None and not has_seq_axis(inputs): - num_time_step = inputs.shape[0] - - initial = [] - for s in initial_states: - if _get_dynamic_axis_num(s) == 0: - if hasattr(C, 'to_batch'): - initial.append(C.to_batch(s)) - else: - initial.append(C.user_function(ConvertToBatch(s))) - else: - initial.append(s) - - need_convert = not has_seq_axis(inputs) - if go_backwards and need_convert is False: - raise NotImplementedError( - 'CNTK Backend: `go_backwards` is not supported with ' - 'variable-length sequences. Please specify a ' - 'static length for your sequences.') - - rnn_inputs = inputs - if need_convert: - if go_backwards: - rnn_inputs = reverse(rnn_inputs, 1) - - rnn_inputs = C.to_sequence(rnn_inputs) - - rnn_constants = [] - for constant in constants: - if isinstance(constant, list): - new_c = [] - for c in constant: - if _get_dynamic_axis_num(c) == 1: - new_c.append(C.sequence.broadcast_as(c, rnn_inputs)) - else: - new_c.append(c) - rnn_constants.append(new_c) - else: - if _get_dynamic_axis_num(constant) == 1: - rnn_constants.append(C.sequence.broadcast_as( - constant, - rnn_inputs)) - else: - rnn_constants.append(constant) - else: - rnn_constants = constants - - if mask is not None and not has_seq_axis(mask): - if go_backwards: - mask = reverse(mask, 1) - if len(int_shape(mask)) == 2: - mask = expand_dims(mask) - mask = C.to_sequence_like(mask, rnn_inputs) - - states = tuple(initial) - - with C.default_options(axis_offset=1): - def _recurrence(x, states, m): - # create place holder - place_holders = [C.placeholder( - dynamic_axes=x.dynamic_axes) for _ in states] - past_values = [] - for s, p in zip(states, place_holders): - past_values.append(C.sequence.past_value(p, s)) - new_output, new_states = step_function( - x, tuple(past_values) + tuple(rnn_constants)) - - if getattr(new_output, '_uses_learning_phase', False): - global uses_learning_phase - uses_learning_phase = True - - if m is not None: - new_states_temp = [] - for n, s in zip(new_states, past_values): - new_states_temp.append(C.element_select(m, n, s)) - - new_states = new_states_temp - - n_s = [] - for o, p in zip(new_states, place_holders): - n_s.append(o.replace_placeholders({p: o.output})) - if len(n_s) > 0: - new_output = n_s[-1] - return new_output, n_s - - final_output, final_states = _recurrence(rnn_inputs, states, mask) - last_output = C.sequence.last(final_output) - last_states = [C.sequence.last(s) for s in final_states] - - if need_convert: - final_output = C.sequence.unpack(final_output, 0, no_mask_output=True) - if num_time_step is not None and num_time_step is not C.FreeDimension: - final_output = _reshape_sequence(final_output, num_time_step) - - f_stats = [] - for l_s, i_s in zip(last_states, initial_states): - if _get_dynamic_axis_num(i_s) == 0 and _get_dynamic_axis_num(l_s) == 1: - if hasattr(C, 'unpack_batch'): - f_stats.append(C.unpack_batch(l_s)) - else: - f_stats.append( - C.user_function(ConvertToStatic(l_s, batch_size=i_s.shape[0]))) - else: - f_stats.append(l_s) - - last_output._uses_learning_phase = uses_learning_phase - return last_output, final_output, f_stats - - -def has_seq_axis(x): - return hasattr(x, 'dynamic_axes') and len(x.dynamic_axes) > 1 - - -def l2_normalize(x, axis=None): - axis = [axis] - axis = _normalize_axis(axis, x) - norm = C.sqrt(C.reduce_sum(C.square(x), axis=axis[0])) - return x / norm - - -def hard_sigmoid(x): - x = (0.2 * x) + 0.5 - x = C.clip(x, 0.0, 1.0) - return x - - -def conv1d(x, kernel, strides=1, padding='valid', - data_format=None, dilation_rate=1): - data_format = normalize_data_format(data_format) - - if padding == 'causal': - # causal (dilated) convolution: - left_pad = dilation_rate * (kernel.shape[0] - 1) - x = temporal_padding(x, (left_pad, 0)) - padding = 'valid' - - if data_format == 'channels_last': - x = C.swapaxes(x, 0, 1) - - # As of Keras 2.0.0, all kernels are normalized - # on the format `(steps, input_depth, depth)`, - # independently of `data_format`. - # CNTK expects `(depth, input_depth, steps)`. - kernel = C.swapaxes(kernel, 0, 2) - - padding = _preprocess_border_mode(padding) - - if dev.type() == 0 and dilation_rate != 1: - raise ValueError( - 'Dilated convolution on CPU is not supported by CNTK backend. ' - 'Please set `dilation_rate` to 1. You passed: %s' % (dilation_rate,)) - - dilation_rate = (1, dilation_rate) - - x = C.convolution( - kernel, - x, - strides=strides, - auto_padding=[False, padding], - dilation=dilation_rate) - - if data_format == 'channels_last': - x = C.swapaxes(x, 0, 1) - return x - - -def conv2d(x, kernel, strides=(1, 1), padding='valid', - data_format=None, dilation_rate=(1, 1)): - data_format = normalize_data_format(data_format) - - x = _preprocess_conv2d_input(x, data_format) - kernel = _preprocess_conv2d_kernel(kernel, data_format) - padding = _preprocess_border_mode(padding) - - if dev.type() == 0 and dilation_rate != (1, 1): - raise ValueError( - 'Dilated convolution on CPU is not supported by CNTK backend. ' - 'Please set `dilation_rate` to (1, 1). ' - 'You passed: %s' % (dilation_rate,)) - - dilation_rate = (1,) + dilation_rate - - x = C.convolution(kernel, - x, - strides, - auto_padding=[False, padding, padding], - dilation=dilation_rate) - - return _postprocess_conv2d_output(x, data_format) - - -def separable_conv1d(x, depthwise_kernel, pointwise_kernel, strides=1, - padding='valid', data_format=None, dilation_rate=1): - data_format = normalize_data_format(data_format) - if isinstance(strides, int): - strides = (strides,) - if isinstance(dilation_rate, int): - dilation_rate = (dilation_rate,) - - if dilation_rate != (1,): - raise ValueError( - 'Dilated separable 1D convolution is currently not supported ' - 'by CNTK backend. Please set `dilation_rate` to 1. ' - 'You passed: %s' % (dilation_rate,)) - - if data_format == 'channels_last': - spatial_start_dim = 2 - else: - spatial_start_dim = 3 - x = expand_dims(x, spatial_start_dim) - depthwise_kernel = expand_dims(depthwise_kernel, 1) - pointwise_kernel = expand_dims(pointwise_kernel, 1) - strides = (1,) + strides + (1,) - dilation_rate = (1,) + dilation_rate - - x = _preprocess_conv2d_input(x, data_format) - depthwise_kernel = _preprocess_conv2d_kernel(depthwise_kernel, data_format) - depthwise_kernel = C.reshape(C.transpose(depthwise_kernel, (1, 0, 2, 3)), - (-1, 1) + depthwise_kernel.shape[2:]) - pointwise_kernel = _preprocess_conv2d_kernel(pointwise_kernel, data_format) - padding = _preprocess_border_mode(padding) - - x = C.convolution(depthwise_kernel, x, - strides=strides, - auto_padding=[False, padding, padding], - groups=x.shape[0]) - x = C.convolution(pointwise_kernel, x, - strides=(1, 1, 1), - auto_padding=[False]) - - x = _postprocess_conv2d_output(x, data_format) - return squeeze(x, spatial_start_dim) - - -def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1), - padding='valid', data_format=None, dilation_rate=(1, 1)): - data_format = normalize_data_format(data_format) - - x = _preprocess_conv2d_input(x, data_format) - depthwise_kernel = _preprocess_conv2d_kernel(depthwise_kernel, data_format) - depthwise_kernel = C.reshape(C.transpose(depthwise_kernel, (1, 0, 2, 3)), - (-1, 1) + depthwise_kernel.shape[2:]) - pointwise_kernel = _preprocess_conv2d_kernel(pointwise_kernel, data_format) - padding = _preprocess_border_mode(padding) - - if dilation_rate == (1, 1): - strides = (1,) + strides - x = C.convolution(depthwise_kernel, x, - strides=strides, - auto_padding=[False, padding, padding], - groups=x.shape[0]) - x = C.convolution(pointwise_kernel, x, - strides=(1, 1, 1), - auto_padding=[False]) - else: - if dilation_rate[0] != dilation_rate[1]: - raise ValueError('CNTK Backend: non-square dilation_rate is ' - 'not supported.') - if strides != (1, 1): - raise ValueError('Invalid strides for dilated convolution') - x = C.convolution(depthwise_kernel, x, - strides=dilation_rate[0], - auto_padding=[False, padding, padding]) - x = C.convolution(pointwise_kernel, x, - strides=(1, 1, 1), - auto_padding=[False]) - return _postprocess_conv2d_output(x, data_format) - - -def depthwise_conv2d(x, depthwise_kernel, strides=(1, 1), padding='valid', - data_format=None, dilation_rate=(1, 1)): - data_format = normalize_data_format(data_format) - - x = _preprocess_conv2d_input(x, data_format) - depthwise_kernel = _preprocess_conv2d_kernel(depthwise_kernel, data_format) - depthwise_kernel = C.reshape(C.transpose(depthwise_kernel, (1, 0, 2, 3)), - (-1, 1) + depthwise_kernel.shape[2:]) - padding = _preprocess_border_mode(padding) - if dilation_rate == (1, 1): - strides = (1,) + strides - x = C.convolution(depthwise_kernel, x, - strides=strides, - auto_padding=[False, padding, padding], - groups=x.shape[0]) - else: - if dilation_rate[0] != dilation_rate[1]: - raise ValueError('CNTK Backend: non-square dilation_rate is ' - 'not supported.') - if strides != (1, 1): - raise ValueError('Invalid strides for dilated convolution') - x = C.convolution(depthwise_kernel, x, - strides=dilation_rate[0], - auto_padding=[False, padding, padding], - groups=x.shape[0]) - return _postprocess_conv2d_output(x, data_format) - - -def conv3d(x, kernel, strides=(1, 1, 1), padding='valid', - data_format=None, dilation_rate=(1, 1, 1)): - data_format = normalize_data_format(data_format) - - x = _preprocess_conv3d_input(x, data_format) - kernel = _preprocess_conv3d_kernel(kernel, data_format) - padding = _preprocess_border_mode(padding) - - if dev.type() == 0 and dilation_rate != (1, 1, 1): - raise ValueError( - 'Dilated convolution on CPU is not supported by CNTK backend. ' - 'Please set `dilation_rate` to (1, 1, 1). ' - 'You passed: %s' % (dilation_rate,)) - - dilation_rate = (1,) + dilation_rate - - x = C.convolution( - kernel, - x, - strides, - auto_padding=[False, padding, padding, padding], - dilation=dilation_rate) - - return _postprocess_conv3d_output(x, data_format) - - -def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1), - padding='valid', data_format=None): - data_format = normalize_data_format(data_format) - - x = _preprocess_conv3d_input(x, data_format) - kernel = _preprocess_conv3d_kernel(kernel, data_format) - padding = _preprocess_border_mode(padding) - strides = (1,) + strides - # cntk output_shape does not include batch axis - output_shape = output_shape[1:] - # in keras2, need handle output shape in different format - if data_format == 'channels_last': - output_shape = transpose_shape(output_shape, 'channels_first', - spatial_axes=(0, 1, 2)) - - x = C.convolution_transpose( - kernel, - x, - strides, - auto_padding=[ - False, - padding, - padding, - padding], - output_shape=output_shape) - return _postprocess_conv3d_output(x, data_format) - - -def pool2d(x, pool_size, strides=(1, 1), - padding='valid', data_format=None, - pool_mode='max'): - data_format = normalize_data_format(data_format) - - padding = _preprocess_border_mode(padding) - x = _preprocess_conv2d_input(x, data_format) - if pool_mode == 'max': - x = C.pooling( - x, - C.MAX_POOLING, - pool_size, - strides, - auto_padding=[padding]) - elif pool_mode == 'avg': - x = C.pooling( - x, - C.AVG_POOLING, - pool_size, - strides, - auto_padding=[padding]) - else: - raise ValueError('Invalid pooling mode: ' + str(pool_mode)) - return _postprocess_conv2d_output(x, data_format) - - -def pool3d(x, pool_size, strides=(1, 1, 1), padding='valid', - data_format=None, pool_mode='max'): - data_format = normalize_data_format(data_format) - - padding = _preprocess_border_mode(padding) - - x = _preprocess_conv3d_input(x, data_format) - - if pool_mode == 'max': - x = C.pooling( - x, - C.MAX_POOLING, - pool_size, - strides, - auto_padding=[padding]) - elif pool_mode == 'avg': - x = C.pooling( - x, - C.AVG_POOLING, - pool_size, - strides, - auto_padding=[padding]) - else: - raise ValueError('Invalid pooling mode: ' + str(pool_mode)) - - return _postprocess_conv3d_output(x, data_format) - - -def relu(x, alpha=0., max_value=None, threshold=0.): - - if alpha != 0.: - if threshold != 0.: - negative_part = C.relu(-x + threshold) - else: - negative_part = C.relu(-x) - - if threshold != 0.: - x = x * C.greater(x, threshold) - else: - x = C.relu(x) - - if max_value is not None: - x = C.clip(x, 0.0, max_value) - - if alpha != 0.: - x -= alpha * negative_part - - return x - - -def dropout(x, level, noise_shape=None, seed=None): - if level < 0. or level >= 1: - raise ValueError('CNTK Backend: Invalid dropout level %s, ' - 'must be in interval [0, 1].' % level) - return C.dropout(x, level) - - -def batch_flatten(x): - # cntk's batch axis is not in shape, - # so just flatten all the dim in x.shape - dim = np.prod(x.shape) - x = C.reshape(x, (-1,)) - x._keras_shape = (None, dim) - return x - - -def softmax(x, axis=-1): - return C.softmax(x, axis=axis) - - -def softplus(x): - return C.softplus(x) - - -def softsign(x): - return x / (1 + C.abs(x)) - - -def categorical_crossentropy(target, output, from_logits=False, axis=-1): - # Here, unlike other backends, the tensors lack a batch dimension: - axis_without_batch = -1 if axis == -1 else axis - 1 - output_dimensions = list(range(len(output.shape))) - if axis_without_batch != -1 and axis_without_batch not in output_dimensions: - raise ValueError( - '{}{}{}'.format( - 'Unexpected channels axis {}. '.format(axis_without_batch), - 'Expected to be -1 or one of the axes of `output`, ', - 'which has {} dimensions.'.format(len(output.shape)))) - # If the channels are not in the last axis, move them to be there: - if axis_without_batch != -1 and axis_without_batch != output_dimensions[-1]: - permutation = output_dimensions[:axis_without_batch] - permutation += output_dimensions[axis_without_batch + 1:] - permutation += [axis_without_batch] - output = C.transpose(output, permutation) - target = C.transpose(target, permutation) - if from_logits: - result = C.cross_entropy_with_softmax(output, target) - # cntk's result shape is (batch, 1), while keras expect (batch, ) - return C.reshape(result, ()) - else: - # scale preds so that the class probas of each sample sum to 1 - output /= C.reduce_sum(output, axis=-1) - # avoid numerical instability with epsilon clipping - output = C.clip(output, epsilon(), 1.0 - epsilon()) - return -sum(target * C.log(output), axis=-1) - - -def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1): - # Here, unlike other backends, the tensors lack a batch dimension: - axis_without_batch = -1 if axis == -1 else axis - 1 - output_dimensions = list(range(len(output.shape))) - if axis_without_batch != -1 and axis_without_batch not in output_dimensions: - raise ValueError( - '{}{}{}'.format( - 'Unexpected channels axis {}. '.format(axis_without_batch), - 'Expected to be -1 or one of the axes of `output`, ', - 'which has {} dimensions.'.format(len(output.shape)))) - target = C.one_hot(target, output.shape[axis_without_batch], - axis=axis_without_batch) - target = C.reshape(target, output.shape) - return categorical_crossentropy(target, output, from_logits, axis=axis) - - -class Function(object): - - def __init__(self, inputs, outputs, updates=[], **kwargs): - self.placeholders = inputs - self.trainer = None - self.unrelated_updates = None - self.updates = updates - if len(updates) > 0: - assert len(outputs) > 0 - self.loss = outputs[0] - # need group update by gradient place holder - u_ops = [] - unrelated_updates = [] - for update in updates: - if isinstance(update, tuple): - if len(update) != 2: - raise NotImplementedError - else: - u = C.assign(update[0], update[1]) - else: - u = update - - if len(u.arguments) == 0: - u_ops.append(u) - else: - unrelated_updates.append(u) - - update_func = C.combine([u.output for u in u_ops]) - - grads = update_func.find_all_with_name('keras_grad_placeholder') - - u_list = [] - p_list = [] - for g in grads: - if g in grad_parameter_dict: - p_list.append(grad_parameter_dict[g]) - u_list.append(g) - else: - raise ValueError( - 'CNTK backend: when constructing trainer, ' - 'found gradient node `%s` which is not ' - 'related to any parameters in the model. ' - 'Please double check how the gradient node ' - 'is constructed.' % g) - - if len(u_list) > 0: - learner = C.cntk_py.universal_learner( - p_list, u_list, update_func) - - criterion = ( - outputs[0], - outputs[1]) if len(outputs) > 1 else ( - outputs[0], - ) - self.trainer = C.trainer.Trainer( - outputs[0], criterion, [learner]) - self.trainer_output = tuple([f.output for f in criterion]) - elif len(u_ops) > 0: - unrelated_updates.extend(u_ops) - - if len(unrelated_updates) > 0: - self.unrelated_updates = C.combine( - [_.output for _ in unrelated_updates]) - - if self.trainer is None: - self.metrics_outputs = [f.output for f in outputs] - self.metrics_func = C.combine(self.metrics_outputs) - # cntk only could handle loss and 1 metric in trainer, for metrics more - # than 2, need manual eval - elif len(outputs) > 2: - self.metrics_outputs = [f.output for f in outputs[2:]] - self.metrics_func = C.combine(self.metrics_outputs) - else: - self.metrics_func = None - - @staticmethod - def _is_input_shape_compatible(input, placeholder): - if hasattr(input, 'shape') and hasattr(placeholder, 'shape'): - num_dynamic = get_num_dynamic_axis(placeholder) - input_shape = input.shape[num_dynamic:] - placeholder_shape = placeholder.shape - for i, p in zip(input_shape, placeholder_shape): - if i != p and p != C.InferredDimension and p != C.FreeDimension: - return False - return True - - def __call__(self, inputs): - global _LEARNING_PHASE_PLACEHOLDER - global _LEARNING_PHASE - assert isinstance(inputs, (list, tuple)) - feed_dict = {} - for tensor, value in zip(self.placeholders, inputs): - # cntk only support calculate on float, do auto cast here - if (hasattr(value, 'dtype') and - value.dtype != np.float32 and - value.dtype != np.float64): - value = value.astype(np.float32) - - if tensor == _LEARNING_PHASE_PLACEHOLDER: - _LEARNING_PHASE_PLACEHOLDER.value = np.asarray(value) - else: - # in current version cntk can't support input with variable - # length. Will support it in next release. - if not self._is_input_shape_compatible(value, tensor): - raise ValueError( - 'CNTK backend: The placeholder has been resolved ' - 'to shape `%s`, but input shape is `%s`. Currently ' - 'CNTK can not take variable length inputs. Please ' - 'pass inputs that have a static shape.' - % (str(tensor.shape), str(value.shape))) - feed_dict[tensor] = value - - updated = [] - if self.trainer is not None: - input_dict = {} - for argument in self.loss.arguments: - if argument in feed_dict: - input_dict[argument] = feed_dict[argument] - else: - raise ValueError( - 'CNTK backend: argument %s is not found in inputs. ' - 'Please double check the model and inputs in ' - '`train_function`.' % argument.name) - - result = self.trainer.train_minibatch( - input_dict, self.trainer_output) - - assert(len(result) == 2) - outputs = result[1] - for o in self.trainer_output: - updated.append(outputs[o]) - - if self.metrics_func is not None: - input_dict = {} - for argument in self.metrics_func.arguments: - if argument in feed_dict: - input_dict[argument] = feed_dict[argument] - else: - raise ValueError('CNTK backend: metrics argument %s ' - 'is not found in inputs. Please double ' - 'check the model and inputs.' % argument.name) - # Some ops (like dropout) won't be applied during "eval" in cntk. - # They only evaluated in training phase. To make it work, call - # "forward" method to let cntk know we want to evaluate them.from - # But the assign ops won't be executed under this mode, that's why - # we need this check. - if (self.unrelated_updates is None and - (_LEARNING_PHASE_PLACEHOLDER.value == 1.0 or - _LEARNING_PHASE == 1)): - _, output_values = self.metrics_func.forward( - input_dict, - self.metrics_func.outputs, - (self.metrics_func.outputs[0],), - as_numpy=False) - else: - output_values = self.metrics_func.eval( - input_dict, as_numpy=False) - if isinstance(output_values, dict): - for o in self.metrics_outputs: - value = output_values[o] - v = value.asarray() - updated.append(v) - else: - v = output_values.asarray() - for o in self.metrics_outputs: - updated.append(v) - - if self.unrelated_updates is not None: - input_dict = {} - for argument in self.unrelated_updates.arguments: - if argument in feed_dict: - input_dict[argument] = feed_dict[argument] - else: - raise ValueError( - 'CNTK backend: assign ops argument %s ' - 'is not found in inputs. Please double ' - 'check the model and inputs.' % argument.name) - self.unrelated_updates.eval(input_dict, as_numpy=False) - return updated - - -def function(inputs, outputs, updates=[], **kwargs): - return Function(inputs, outputs, updates=updates, **kwargs) - - -def temporal_padding(x, padding=(1, 1)): - assert len(padding) == 2 - num_dynamic_axis = _get_dynamic_axis_num(x) - assert len(x.shape) == 3 - (1 if num_dynamic_axis > 0 else 0) - return pad(x, [padding], 'channels_last', num_dynamic_axis) - - -def _padding(x, pattern, axis): # pragma: no cover - base_shape = x.shape - if b_any([dim < 0 for dim in base_shape]): - raise ValueError('CNTK Backend: padding input tensor with ' - 'shape `%s` contains non-specified dimension, ' - 'which is not supported. Please give fixed ' - 'dimension to enable padding.' % base_shape) - if pattern[0] > 0: - prefix_shape = list(base_shape) - prefix_shape[axis] = pattern[0] - prefix_shape = tuple(prefix_shape) - x = C.splice(C.constant(value=0, shape=prefix_shape), x, axis=axis) - base_shape = x.shape - if pattern[1] > 0: - postfix_shape = list(base_shape) - postfix_shape[axis] = pattern[1] - postfix_shape = tuple(postfix_shape) - x = C.splice(x, C.constant(value=0, shape=postfix_shape), axis=axis) - return x - - -def pad(x, pad_info, data_format, num_dynamic_axis): - if hasattr(C, 'pad'): - pattern = [list(p) for p in pad_info] - if data_format == 'channels_first': - pattern = [[0, 0]] + pattern - else: - pattern = pattern + [[0, 0]] - if num_dynamic_axis == 0: - pattern = [[0, 0]] + pattern - return C.pad(x, pattern=pattern) - else: # pragma: no cover - for (a, p) in enumerate(pad_info): - x = _padding(x, p, - a + (1 if num_dynamic_axis == 0 else 0) + - (1 if data_format == 'channels_first' else 0)) - return x - - -def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None): - assert len(padding) == 2 - assert len(padding[0]) == 2 - assert len(padding[1]) == 2 - data_format = normalize_data_format(data_format) - - num_dynamic_axis = _get_dynamic_axis_num(x) - assert len(x.shape) == 4 - (1 if num_dynamic_axis > 0 else 0) - return pad(x, padding, data_format, num_dynamic_axis) - - -def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None): - assert len(padding) == 3 - assert len(padding[0]) == 2 - assert len(padding[1]) == 2 - assert len(padding[2]) == 2 - data_format = normalize_data_format(data_format) - - num_dynamic_axis = _get_dynamic_axis_num(x) - assert len(x.shape) == 5 - (1 if num_dynamic_axis > 0 else 0) - return pad(x, padding, data_format, num_dynamic_axis) - - -def one_hot(indices, num_classes): - return C.one_hot(indices, num_classes) - - -def get_value(x): - if isinstance( - x, - (C.variables.Parameter, C.variables.Constant)): - return x.value - else: - return eval(x) - - -def batch_get_value(xs): - result = [get_value(x) for x in xs] - - return result - - -def set_value(x, value): - if (isinstance(x, C.variables.Parameter) or - isinstance(x, C.variables.Constant)): - if isinstance(value, (float, int)): - value = np.full(x.shape, value, dtype=floatx()) - x.value = value - else: - raise NotImplementedError - - -def print_tensor(x, message=''): - return C.user_function( - LambdaFunc(x, - when=lambda x: True, - execute=lambda x: print(message))) - - -def batch_set_value(tuples): - for t in tuples: - x = t[0] - value = t[1] - if isinstance(value, np.ndarray) is False: - value = np.asarray(value) - if isinstance(x, C.variables.Parameter): - x.value = value - else: - raise NotImplementedError - - -def stop_gradient(variables): - if isinstance(variables, (list, tuple)): - return map(C.stop_gradient, variables) - else: - return C.stop_gradient(variables) - - -def switch(condition, then_expression, else_expression): - ndim_cond = ndim(condition) - ndim_expr = ndim(then_expression) - if ndim_cond > ndim_expr: - raise ValueError('Rank of condition should be less' - ' than or equal to rank of then and' - ' else expressions. ndim(condition)=' + - str(ndim_cond) + ', ndim(then_expression)' - '=' + str(ndim_expr)) - elif ndim_cond < ndim_expr: - shape_expr = int_shape(then_expression) - ndim_diff = ndim_expr - ndim_cond - for i in range(ndim_diff): - condition = expand_dims(condition) - condition = tile(condition, shape_expr[ndim_cond + i]) - return C.element_select(condition, - then_expression, - else_expression) - - -def elu(x, alpha=1.): - res = C.elu(x) - if alpha == 1: - return res - else: - return C.element_select(C.greater(x, 0), res, alpha * res) - - -def in_top_k(predictions, targets, k): - _targets = C.one_hot(targets, predictions.shape[-1]) - result = [C.classification_error(predictions[i], _targets[i], topN=k) - for i in range(predictions.shape[0])] - result = concatenate(result, axis=-1) - return 1 - C.reshape(result, shape=(-1,)) - - -def conv2d_transpose(x, kernel, output_shape, strides=(1, 1), - padding='valid', data_format=None, dilation_rate=(1, 1)): - data_format = normalize_data_format(data_format) - - x = _preprocess_conv2d_input(x, data_format) - kernel = _preprocess_conv2d_kernel(kernel, data_format) - padding = _preprocess_border_mode(padding) - strides = (1,) + strides - # cntk output_shape does not include batch axis - output_shape = output_shape[1:] - # in keras2, need handle output shape in different format - if data_format == 'channels_last': - output_shape = transpose_shape(output_shape, 'channels_first', - spatial_axes=(0, 1)) - - dilation_rate = (1,) + dilation_rate - - x = C.convolution_transpose( - kernel, - x, - strides, - auto_padding=[ - False, - padding, - padding], - output_shape=output_shape, - dilation=dilation_rate) - return _postprocess_conv2d_output(x, data_format) - - -def identity(x, name=None): - if name is None: - name = '%s_alias' % x.name - return C.alias(x, name=name) - - -def _preprocess_conv2d_input(x, data_format): - if data_format == 'channels_last': - # TF uses the last dimension as channel dimension, - # instead of the 2nd one. - # TH input shape: (samples, input_depth, rows, cols) - # TF input shape: (samples, rows, cols, input_depth) - x = C.transpose(x, (2, 0, 1)) - return x - - -def _preprocess_conv2d_kernel(kernel, data_format): - # As of Keras 2.0.0, all kernels are normalized - # on the format `(rows, cols, input_depth, depth)`, - # independently of `data_format`. - # CNTK expects `(depth, input_depth, rows, cols)`. - kernel = C.transpose(kernel, (3, 2, 0, 1)) - return kernel - - -def _preprocess_border_mode(padding): - if padding == 'same': - padding = True - elif padding == 'valid': - padding = False - else: - raise ValueError('Invalid border mode: ' + str(padding)) - return padding - - -def _postprocess_conv2d_output(x, data_format): - if data_format == 'channels_last': - x = C.transpose(x, (1, 2, 0)) - return x - - -def _preprocess_conv3d_input(x, data_format): - if data_format == 'channels_last': - # TF uses the last dimension as channel dimension, - # instead of the 2nd one. - # TH input shape: (samples, input_depth, conv_dim1, conv_dim2, conv_dim3) - # TF input shape: (samples, conv_dim1, conv_dim2, conv_dim3, - # input_depth) - x = C.transpose(x, (3, 0, 1, 2)) - return x - - -def _preprocess_conv3d_kernel(kernel, dim_ordering): - kernel = C.transpose(kernel, (4, 3, 0, 1, 2)) - return kernel - - -def _postprocess_conv3d_output(x, dim_ordering): - if dim_ordering == 'channels_last': - x = C.transpose(x, (1, 2, 3, 0)) - return x - - -def _get_dynamic_axis_num(x): - if hasattr(x, 'dynamic_axes'): - return len(x.dynamic_axes) - else: - return 0 - - -def _contain_seqence_axis(x): - if _get_dynamic_axis_num(x) > 1: - return x.dynamic_axes[1] == C.Axis.default_dynamic_axis() - else: - return False - - -def get_num_dynamic_axis(x): - return _get_dynamic_axis_num(x) - - -def _reduce_on_axis(x, axis, reduce_fun_name): - if isinstance(axis, list): - for a in axis: - if isinstance(a, C.Axis) \ - and a != C.Axis.default_batch_axis() \ - and hasattr(C.sequence, reduce_fun_name): - x = getattr(C.sequence, reduce_fun_name)(x, a) - else: - x = getattr(C, reduce_fun_name)(x, a) - else: - x = getattr(C, reduce_fun_name)(x, axis) - return x - - -def _reshape_sequence(x, time_step): - tmp_shape = list(int_shape(x)) - tmp_shape[1] = time_step - return reshape(x, tmp_shape) - - -def local_conv1d(inputs, kernel, kernel_size, strides, data_format=None): - data_format = normalize_data_format(data_format) - - stride = strides[0] - kernel_shape = int_shape(kernel) - output_length, feature_dim, filters = kernel_shape - - xs = [] - for i in range(output_length): - slice_length = py_slice(i * stride, - i * stride + kernel_size[0]) - xs.append(reshape(inputs[:, slice_length, :], - (-1, 1, feature_dim))) - x_aggregate = concatenate(xs, axis=1) - # transpose kernel to output_filters first, to apply broadcast - weight = permute_dimensions(kernel, (2, 0, 1)) - # Shape: (batch, filters, output_length, input_length * kernel_size) - output = x_aggregate * weight - # Shape: (batch, filters, output_length) - output = sum(output, axis=3) - # Shape: (batch, output_length, filters) - return permute_dimensions(output, (0, 2, 1)) - - -def local_conv2d(inputs, - kernel, - kernel_size, - strides, - output_shape, - data_format=None): - data_format = normalize_data_format(data_format) - - stride_row, stride_col = strides - output_row, output_col = output_shape - kernel_shape = int_shape(kernel) - _, feature_dim, filters = kernel_shape - xs = [] - - for i in range(output_row): - for j in range(output_col): - slice_row = py_slice(i * stride_row, - i * stride_row + kernel_size[0]) - slice_col = py_slice(j * stride_col, - j * stride_col + kernel_size[1]) - if data_format == 'channels_first': - xs.append(reshape(inputs[:, :, slice_row, slice_col], - (-1, 1, feature_dim))) - else: - xs.append(reshape(inputs[:, slice_row, slice_col, :], - (-1, 1, feature_dim))) - x_aggregate = concatenate(xs, axis=1) - # transpose kernel to put filters first - weight = permute_dimensions(kernel, (2, 0, 1)) - # shape: batch, filters, output_length, input_length * kernel_size - output = x_aggregate * weight - # shape: batch, filters, output_length - output = sum(output, axis=3) - # shape: batch, filters, row, col - output = reshape(output, - (-1, filters, output_row, output_col)) - - if data_format == 'channels_last': - # shape: batch, row, col, filters - output = permute_dimensions(output, (0, 2, 3, 1)) - - return output - - -def reverse(x, axes): - if isinstance(axes, int): - axes = [axes] - cntk_axes = _normalize_axis(axes, x) - begin_index = [0 for _ in cntk_axes] - end_index = [0 for _ in cntk_axes] - strides = [-1 for _ in cntk_axes] - return C.slice(x, cntk_axes, begin_index, end_index, strides) - - -def slice(x, start, size): - if not (len(int_shape(x)) == len(start) == len(size)): - raise ValueError('The dimension and the size of indices should match.') - out = x[tuple([py_slice(i, i + j) for (i, j) in zip(start, size)])] - out._keras_shape = tuple(size) - return out - - -def _reshape_batch(x, shape): - # there is a bug in cntk 2.1's unpack_batch implementation - if hasattr(C, 'unpack_batch') and _get_cntk_version() >= 2.2: - const_a = C.unpack_batch(x) - const_a = C.reshape(const_a, shape) - return C.to_batch(const_a) - else: - return C.user_function(ReshapeBatch(x, shape[1:])) - - -def _get_cntk_version(): - version = C.__version__ - if version.endswith('+'): - version = version[:-1] - # for hot fix, ignore all the . except the first one. - if len(version) > 2 and version[1] == '.': - version = version[:2] + version[2:].replace('.', '') - try: - return float(version) - except: - warnings.warn( - 'CNTK backend warning: CNTK version not detected. ' - 'Will using CNTK 2.0 GA as default.') - return float(2.0) - - -class ReshapeBatch(C.ops.functions.UserFunction): - def __init__(self, input, shape, name='reshape_with_batch'): - super(ReshapeBatch, self).__init__([input], as_numpy=False, name=name) - self.from_shape = input.shape - self.target_shape = shape - - def infer_outputs(self): - batch_axis = C.Axis.default_batch_axis() - return [ - C.output_variable( - self.target_shape, - self.inputs[0].dtype, - [batch_axis])] - - def forward(self, arguments, device=None, outputs_to_retain=None): - num_element = arguments.shape()[0] * \ - np.prod(np.asarray(self.from_shape)) - num_static_element = np.prod(np.asarray(self.target_shape)) - num_batch = int(num_element / num_static_element) - result = arguments.data().as_shape((num_batch,) + self.target_shape) - return None, C.cntk_py.Value(result) - - def backward(self, state, root_gradients): - grad_array_view = root_gradients.data() - num_element = root_gradients.shape()[0] * np.prod( - np.asarray(self.target_shape)) - num_static_element = np.prod(np.asarray(self.from_shape)) - num_old_batch = int(num_element / num_static_element) - return C.cntk_py.Value( - grad_array_view.as_shape( - (num_old_batch,) + self.from_shape)) - - -class ConvertToBatch(C.ops.functions.UserFunction): - """Converts input first axis to CNTK batch axis. - - We may introduce this operation in CNTK native - implementation later. - - # Arguments - inputs: a cntk variable (parameter/constant) - name: name of this node - """ - - def __init__(self, input, name='convert_to_batch'): - super(ConvertToBatch, self).__init__( - [input], as_numpy=False, name=name) - - def infer_outputs(self): - batch_axis = C.Axis.default_batch_axis() - return [ - C.output_variable( - self.inputs[0].shape[1:], - self.inputs[0].dtype, - [batch_axis])] - - def forward(self, arguments, device=None, outputs_to_retain=None): - return None, C.cntk_py.Value(arguments.data()) - - def backward(self, state, root_gradients): - return C.cntk_py.Value(root_gradients.data()) - - -class ConvertToStatic(C.ops.functions.UserFunction): - """Converts input first axis to CNTK static axis. - - We may introduce this operation in CNTK native - implementation later. - - # Arguments - inputs: a cntk tensor which has batch axis - batch_size: size of batch axis. - name: name of this node. - """ - - def __init__(self, input, batch_size, name='convert_to_static'): - super(ConvertToStatic, self).__init__( - [input], as_numpy=False, name=name) - self.target_shape = (batch_size,) + input.shape - - def infer_outputs(self): - return [ - C.output_variable( - self.target_shape, - self.inputs[0].dtype, - [])] - - def forward(self, arguments, device=None, outputs_to_retain=None): - return None, C.cntk_py.Value(arguments.data()) - - def backward(self, state, root_gradients): - return C.cntk_py.Value(root_gradients.data()) - - -class LambdaFunc(C.ops.functions.UserFunction): - def __init__(self, - arg, - when=lambda arg: True, - execute=lambda arg: print(arg), - name=''): - self.when = when - self.execute = execute - - super(LambdaFunc, self).__init__([arg], name=name) - - def infer_outputs(self): - return [ - C.output_variable( - self.inputs[0].shape, - self.inputs[0].dtype, - self.inputs[0].dynamic_axes)] - - def forward(self, argument, device=None, outputs_to_retain=None): - if self.when(argument): - self.execute(argument) - - return None, argument - - def backward(self, state, root_gradients): - return root_gradients - - -def reset_uids(): - global _UID_PREFIXES - _UID_PREFIXES = defaultdict(int) - - -def to_dense(tensor): - raise NotImplementedError - - -def cumsum(x, axis=0): - dim = x.shape[axis] - U = C.constant(np.triu(np.ones((dim, dim))).astype(x.dtype)) - if axis != -1: - x = C.swapaxes(x, -1, axis) - out = C.times(x, U) - if axis != -1: - out = C.swapaxes(out, -1, axis) - return out - - -def cumprod(x, axis=0): - shape = x.shape - out = x - for rep in range(shape[axis] - 1): - sliced_shape = list(shape) - sliced_shape[axis] = rep + 1 - if axis == 0: - _x = x[rep:(rep + 1)] - elif axis == 1: - _x = x[:, rep:(rep + 1)] - elif axis == 2: - _x = x[:, :, rep:(rep + 1)] - y = concatenate([ones(sliced_shape, dtype=x.dtype), - repeat_elements(_x, rep=shape[axis] - 1 - rep, axis=axis)], - axis=axis) - out = C.element_times(out, y) - return out - - -def arange(start, stop=None, step=1, dtype='int32'): - raise NotImplementedError - - -def ctc_label_dense_to_sparse(labels, label_lengths): - raise NotImplementedError - - -def ctc_batch_cost(y_true, y_pred, input_length, label_length): - raise NotImplementedError - - -def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1, - merge_repeated=False): - raise NotImplementedError - - -def map_fn(fn, elems, name=None, dtype=None): - raise NotImplementedError - - -def foldl(fn, elems, initializer=None, name=None): - """Reduce `elems` by `fn` combined them from left to right on dimension 0. - - # Arguments - fn: Callable that will be called upon each element in `elems` - (and on the optional `initializer`) passed as a second argument. - The first argument passed to `fn` is the accumulator which is the - accumulated value calculated from the preceding invocation of `fn`. - Example For `fn`: - ```python - lambda acc, x: acc + x - ``` - elems: Tensor - initializer: (optional) Tensor, the initial value for the accumulator. - In case of None value is provided during the call - the first value is used (`elems[0]`) as `initializer` from `elems` - name: (optional) String, name for the foldl node in the graph. - - # Returns - Same type and shape as `initializer` - - # Raises: - TypeError: if `fn` is not callable. - TypeError: if `initializer` is neither a tensor nor None value. - TypeError: if `elems` is not a tensor. - """ - if not callable(fn): - raise TypeError("`fn` must be callable.") - if initializer is not None and not is_tensor(initializer): - raise TypeError("`initializer` must be a tensor or None") - if not is_tensor(elems): - raise TypeError('`elems` must be a tensor') - - if initializer is None and shape(elems)[0] > 1: - initializer = elems[0] - elems = elems[1:] - elif initializer is None: - initializer = elems[0] - elems = None - - accumulator = initializer - if elems is not None: - for i in range(shape(elems)[0]): - accumulator = fn(accumulator, elems[i]) - - if name is not None: - accumulator.name = str(name) - - return reshape(accumulator, shape(initializer)[1:]) - - -def foldr(fn, elems, initializer=None, name=None): - """Reduce `elems` by `fn` combined them from right to left on dimension 0. - - # Arguments - fn: Callable that will be called upon each element in `elems` - (and on the optional `initializer`) passed as a second argument. - The first argument passed to `fn` is the accumulator which is the - accumulated value calculated from the preceding invocation of `fn`. - Example For `fn`: - ```python - lambda acc, x: acc + x - ``` - elems: Tensor - initializer: (optional) Tensor, the initial value for the accumulator. - In case of None value is provided during the call - the last value is used (`elems[-1]`) as `initializer` from `elems` - name: (optional) String, name for the foldr node in the graph. - - # Returns - Same type and shape as `initializer` - - # Raises: - TypeError: if `fn` is not callable. - TypeError: if `initializer` is neither a tensor nor None value. - TypeError: if `elems` is not a tensor. - """ - if not callable(fn): - raise TypeError("`fn` must be callable.") - if initializer is not None and not is_tensor(initializer): - raise TypeError("`initializer` must be a tensor or None") - if not is_tensor(elems): - raise TypeError('`elems` must be a tensor') - - if initializer is None and shape(elems)[0] > 1: - initializer = elems[-1] - elems = elems[:-1] - elif initializer is None: - initializer = elems[0] - elems = None - - accumulator = initializer - if elems is not None: - for i in range(shape(elems)[0]): - accumulator = fn(accumulator, elems[-i]) - - if name is not None: - accumulator.name = str(name) - - return reshape(accumulator, shape(initializer)[1:]) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -# the type of float to use throughout the session. -_FLOATX = 'float32' -_EPSILON = 1e-7 -_IMAGE_DATA_FORMAT = 'channels_last' - - -def epsilon(): - """Returns the value of the fuzz factor used in numeric expressions. - - # Returns - A float. - - # Example - ```python - >>> keras.backend.epsilon() - 1e-07 - ``` - """ - return _EPSILON - - -def set_epsilon(e): - """Sets the value of the fuzz factor used in numeric expressions. - - # Arguments - e: float. New value of epsilon. - - # Example - ```python - >>> from keras import backend as K - >>> K.epsilon() - 1e-07 - >>> K.set_epsilon(1e-05) - >>> K.epsilon() - 1e-05 - ``` - """ - - global _EPSILON - _EPSILON = float(e) - - -def floatx(): - """Returns the default float type, as a string. - (e.g. 'float16', 'float32', 'float64'). - - # Returns - String, the current default float type. - - # Example - ```python - >>> keras.backend.floatx() - 'float32' - ``` - """ - return _FLOATX - - -def set_floatx(floatx): - """Sets the default float type. - - # Arguments - floatx: String, 'float16', 'float32', or 'float64'. - - # Example - ```python - >>> from keras import backend as K - >>> K.floatx() - 'float32' - >>> K.set_floatx('float16') - >>> K.floatx() - 'float16' - ``` - """ - global _FLOATX - if floatx not in {'float16', 'float32', 'float64'}: - raise ValueError('Unknown floatx type: ' + str(floatx)) - _FLOATX = str(floatx) - - -def cast_to_floatx(x): - """Cast a Numpy array to the default Keras float type. - - # Arguments - x: Numpy array. - - # Returns - The same Numpy array, cast to its new type. - - # Example - ```python - >>> from keras import backend as K - >>> K.floatx() - 'float32' - >>> arr = numpy.array([1.0, 2.0], dtype='float64') - >>> arr.dtype - dtype('float64') - >>> new_arr = K.cast_to_floatx(arr) - >>> new_arr - array([ 1., 2.], dtype=float32) - >>> new_arr.dtype - dtype('float32') - ``` - """ - return np.asarray(x, dtype=_FLOATX) - - -def image_data_format(): - """Returns the default image data format convention. - - # Returns - A string, either `'channels_first'` or `'channels_last'` - - # Example - ```python - >>> keras.backend.image_data_format() - 'channels_first' - ``` - """ - return _IMAGE_DATA_FORMAT - - -def set_image_data_format(data_format): - """Sets the value of the data format convention. - - # Arguments - data_format: string. `'channels_first'` or `'channels_last'`. - - # Example - ```python - >>> from keras import backend as K - >>> K.image_data_format() - 'channels_first' - >>> K.set_image_data_format('channels_last') - >>> K.image_data_format() - 'channels_last' - ``` - """ - global _IMAGE_DATA_FORMAT - if data_format not in {'channels_last', 'channels_first'}: - raise ValueError('Unknown data_format:', data_format) - _IMAGE_DATA_FORMAT = str(data_format) - - -def normalize_data_format(value): - """Checks that the value correspond to a valid data format. - - # Arguments - value: String or None. `'channels_first'` or `'channels_last'`. - - # Returns - A string, either `'channels_first'` or `'channels_last'` - - # Example - ```python - >>> from keras import backend as K - >>> K.normalize_data_format(None) - 'channels_first' - >>> K.normalize_data_format('channels_last') - 'channels_last' - ``` - - # Raises - ValueError: if `value` or the global `data_format` invalid. - """ - if value is None: - value = image_data_format() - data_format = value.lower() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('The `data_format` argument must be one of ' - '"channels_first", "channels_last". Received: ' + - str(value)) - return data_format - - -# Legacy methods - -def set_image_dim_ordering(dim_ordering): - """Legacy setter for `image_data_format`. - - # Arguments - dim_ordering: string. `tf` or `th`. - - # Example - ```python - >>> from keras import backend as K - >>> K.image_data_format() - 'channels_first' - >>> K.set_image_data_format('channels_last') - >>> K.image_data_format() - 'channels_last' - ``` - - # Raises - ValueError: if `dim_ordering` is invalid. - """ - global _IMAGE_DATA_FORMAT - if dim_ordering not in {'tf', 'th'}: - raise ValueError('Unknown dim_ordering:', dim_ordering) - if dim_ordering == 'th': - data_format = 'channels_first' - else: - data_format = 'channels_last' - _IMAGE_DATA_FORMAT = data_format - - -def image_dim_ordering(): - """Legacy getter for `image_data_format`. - - # Returns - string, one of `'th'`, `'tf'` - """ - if _IMAGE_DATA_FORMAT == 'channels_first': - return 'th' - else: - return 'tf' -from __future__ import absolute_import -from __future__ import print_function -import os -import json -import sys -import importlib -from .common import epsilon -from .common import floatx -from .common import set_epsilon -from .common import set_floatx -from .common import cast_to_floatx -from .common import image_data_format -from .common import set_image_data_format -from .common import normalize_data_format - -# Set Keras base dir path given KERAS_HOME env variable, if applicable. -# Otherwise either ~/.keras or /tmp. -if 'KERAS_HOME' in os.environ: - _keras_dir = os.environ.get('KERAS_HOME') -else: - _keras_base_dir = os.path.expanduser('~') - if not os.access(_keras_base_dir, os.W_OK): - _keras_base_dir = '/tmp' - _keras_dir = os.path.join(_keras_base_dir, '.keras') - -# Default backend: TensorFlow. -_BACKEND = 'tensorflow' - -# Attempt to read Keras config file. -_config_path = os.path.expanduser(os.path.join(_keras_dir, 'keras.json')) -if os.path.exists(_config_path): - try: - with open(_config_path) as f: - _config = json.load(f) - except ValueError: - _config = {} - _floatx = _config.get('floatx', floatx()) - assert _floatx in {'float16', 'float32', 'float64'} - _epsilon = _config.get('epsilon', epsilon()) - assert isinstance(_epsilon, float) - _backend = _config.get('backend', _BACKEND) - _image_data_format = _config.get('image_data_format', - image_data_format()) - assert _image_data_format in {'channels_last', 'channels_first'} - - set_floatx(_floatx) - set_epsilon(_epsilon) - set_image_data_format(_image_data_format) - _BACKEND = _backend - -# Save config file, if possible. -if not os.path.exists(_keras_dir): - try: - os.makedirs(_keras_dir) - except OSError: - # Except permission denied and potential race conditions - # in multi-threaded environments. - pass - -if not os.path.exists(_config_path): - _config = { - 'floatx': floatx(), - 'epsilon': epsilon(), - 'backend': _BACKEND, - 'image_data_format': image_data_format() - } - try: - with open(_config_path, 'w') as f: - f.write(json.dumps(_config, indent=4)) - except IOError: - # Except permission denied. - pass - -# Set backend based on KERAS_BACKEND flag, if applicable. -if 'KERAS_BACKEND' in os.environ: - _backend = os.environ['KERAS_BACKEND'] - if _backend: - _BACKEND = _backend - -# Import backend functions. -if _BACKEND == 'cntk': - sys.stderr.write('Using CNTK backend\n') - from .cntk_backend import * -elif _BACKEND == 'theano': - sys.stderr.write('Using Theano backend.\n') - from .theano_backend import * -elif _BACKEND == 'tensorflow': - sys.stderr.write('Using TensorFlow backend.\n') - from .tensorflow_backend import * -else: - # Try and load external backend. - try: - backend_module = importlib.import_module(_BACKEND) - entries = backend_module.__dict__ - # Check if valid backend. - # Module is a valid backend if it has the required entries. - required_entries = ['placeholder', 'variable', 'function'] - for e in required_entries: - if e not in entries: - raise ValueError( - 'Invalid backend. Missing required entry : ' + e) - namespace = globals() - for k, v in entries.items(): - # Make sure we don't override any entries from common, such as epsilon. - if k not in namespace: - namespace[k] = v - sys.stderr.write('Using ' + _BACKEND + ' backend.\n') - except ImportError: - raise ValueError('Unable to import backend : ' + str(_BACKEND)) - - -def backend(): - """Publicly accessible method - for determining the current backend. - - # Returns - String, the name of the backend Keras is currently using. - - # Example - ```python - >>> keras.backend.backend() - 'tensorflow' - ``` - """ - return _BACKEND -"""Utilities for backend functionality checks.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import scipy.signal as signal -import scipy as sp -from .common import floatx -from keras.utils.generic_utils import transpose_shape -from keras.utils import to_categorical - - -def normalize_conv(func): - def wrapper(*args, **kwargs): - x = args[0] - w = args[1] - if x.ndim == 3: - w = np.flipud(w) - w = np.transpose(w, (1, 2, 0)) - if kwargs['data_format'] == 'channels_last': - x = np.transpose(x, (0, 2, 1)) - elif x.ndim == 4: - w = np.fliplr(np.flipud(w)) - w = np.transpose(w, (2, 3, 0, 1)) - if kwargs['data_format'] == 'channels_last': - x = np.transpose(x, (0, 3, 1, 2)) - else: - w = np.flip(np.fliplr(np.flipud(w)), axis=2) - w = np.transpose(w, (3, 4, 0, 1, 2)) - if kwargs['data_format'] == 'channels_last': - x = np.transpose(x, (0, 4, 1, 2, 3)) - - dilation_rate = kwargs.pop('dilation_rate', 1) - if isinstance(dilation_rate, int): - dilation_rate = (dilation_rate,) * (x.ndim - 2) - for (i, d) in enumerate(dilation_rate): - if d > 1: - for j in range(w.shape[2 + i] - 1): - w = np.insert(w, 2 * j + 1, 0, axis=2 + i) - - y = func(x, w, **kwargs) - - if kwargs['data_format'] == 'channels_last': - if y.ndim == 3: - y = np.transpose(y, (0, 2, 1)) - elif y.ndim == 4: - y = np.transpose(y, (0, 2, 3, 1)) - else: - y = np.transpose(y, (0, 2, 3, 4, 1)) - - return y - - return wrapper - - -@normalize_conv -def conv(x, w, padding, data_format): - y = [] - for i in range(x.shape[0]): - _y = [] - for j in range(w.shape[1]): - __y = [] - for k in range(w.shape[0]): - __y.append(signal.convolve(x[i, k], w[k, j], mode=padding)) - _y.append(np.sum(np.stack(__y, axis=-1), axis=-1)) - y.append(_y) - y = np.array(y) - return y - - -@normalize_conv -def depthwise_conv(x, w, padding, data_format): - y = [] - for i in range(x.shape[0]): - _y = [] - for j in range(w.shape[0]): - __y = [] - for k in range(w.shape[1]): - __y.append(signal.convolve(x[i, j], w[j, k], mode=padding)) - _y.append(np.stack(__y, axis=0)) - y.append(np.concatenate(_y, axis=0)) - y = np.array(y) - return y - - -def separable_conv(x, w1, w2, padding, data_format): - x2 = depthwise_conv(x, w1, padding=padding, data_format=data_format) - return conv(x2, w2, padding=padding, data_format=data_format) - - -def conv_transpose(x, w, output_shape, padding, data_format, dilation_rate=1): - if x.ndim == 4: - w = np.fliplr(np.flipud(w)) - w = np.transpose(w, (0, 1, 3, 2)) - else: - w = np.flip(np.fliplr(np.flipud(w)), axis=2) - w = np.transpose(w, (0, 1, 2, 4, 3)) - - if isinstance(dilation_rate, int): - dilation_rate = (dilation_rate,) * (x.ndim - 2) - for (i, d) in enumerate(dilation_rate): - if d > 1: - for j in range(w.shape[i] - 1): - w = np.insert(w, 2 * j + 1, 0, axis=i) - - return conv(x, w, padding=padding, data_format=data_format) - - -conv1d = conv -conv2d = conv -conv3d = conv -depthwise_conv2d = depthwise_conv -separable_conv1d = separable_conv -separable_conv2d = separable_conv -conv2d_transpose = conv_transpose -conv3d_transpose = conv_transpose - - -def pool(x, pool_size, strides, padding, data_format, pool_mode): - if data_format == 'channels_last': - if x.ndim == 3: - x = np.transpose(x, (0, 2, 1)) - elif x.ndim == 4: - x = np.transpose(x, (0, 3, 1, 2)) - else: - x = np.transpose(x, (0, 4, 1, 2, 3)) - - if padding == 'same': - pad = [(0, 0), (0, 0)] + [(s // 2, s // 2) for s in pool_size] - x = np.pad(x, pad, 'constant', constant_values=-np.inf) - - # indexing trick - x = np.pad(x, [(0, 0), (0, 0)] + [(0, 1) for _ in pool_size], - 'constant', constant_values=0) - - if x.ndim == 3: - y = [x[:, :, k:k1:strides[0]] - for (k, k1) in zip(range(pool_size[0]), range(-pool_size[0], 0))] - elif x.ndim == 4: - y = [] - for (k, k1) in zip(range(pool_size[0]), range(-pool_size[0], 0)): - for (l, l1) in zip(range(pool_size[1]), range(-pool_size[1], 0)): - y.append(x[:, :, k:k1:strides[0], l:l1:strides[1]]) - else: - y = [] - for (k, k1) in zip(range(pool_size[0]), range(-pool_size[0], 0)): - for (l, l1) in zip(range(pool_size[1]), range(-pool_size[1], 0)): - for (m, m1) in zip(range(pool_size[2]), range(-pool_size[2], 0)): - y.append(x[:, - :, - k:k1:strides[0], - l:l1:strides[1], - m:m1:strides[2]]) - y = np.stack(y, axis=-1) - if pool_mode == 'avg': - y = np.mean(np.ma.masked_invalid(y), axis=-1).data - elif pool_mode == 'max': - y = np.max(y, axis=-1) - - if data_format == 'channels_last': - if y.ndim == 3: - y = np.transpose(y, (0, 2, 1)) - elif y.ndim == 4: - y = np.transpose(y, (0, 2, 3, 1)) - else: - y = np.transpose(y, (0, 2, 3, 4, 1)) - - return y - - -pool2d = pool -pool3d = pool - - -def bias_add(x, y, data_format): - if data_format == 'channels_first': - if y.ndim > 1: - y = np.reshape(y, y.shape[::-1]) - for _ in range(x.ndim - y.ndim - 1): - y = np.expand_dims(y, -1) - else: - for _ in range(x.ndim - y.ndim - 1): - y = np.expand_dims(y, 0) - return x + y - - -def rnn(step_function, inputs, initial_states, - go_backwards=False, mask=None, constants=None, - unroll=False, input_length=None): - - if constants is None: - constants = [] - - output_sample, _ = step_function(inputs[:, 0], initial_states + constants) - if mask is not None: - if mask.dtype != np.bool: - mask = mask.astype(np.bool) - if mask.shape != inputs.shape[:2]: - raise ValueError( - 'mask should have `shape=(samples, time)`, ' - 'got {}'.format(mask.shape)) - - def expand_mask(mask_, x): - # expand mask so that `mask[:, t].ndim == x.ndim` - while mask_.ndim < x.ndim + 1: - mask_ = np.expand_dims(mask_, axis=-1) - return mask_ - output_mask = expand_mask(mask, output_sample) - states_masks = [expand_mask(mask, state) for state in initial_states] - - if input_length is None: - input_length = inputs.shape[1] - assert input_length == inputs.shape[1] - time_index = range(input_length) - if go_backwards: - time_index = time_index[::-1] - - outputs = [] - states_tm1 = initial_states # tm1 means "t minus one" as in "previous timestep" - output_tm1 = np.zeros(output_sample.shape) - for t in time_index: - output_t, states_t = step_function( - inputs[:, t], states_tm1 + constants) - if mask is not None: - output_t = np.where(output_mask[:, t], output_t, output_tm1) - states_t = [np.where(state_mask[:, t], state_t, state_tm1) - for state_mask, state_t, state_tm1 - in zip(states_masks, states_t, states_tm1)] - outputs.append(output_t) - states_tm1 = states_t - output_tm1 = output_t - - return outputs[-1], np.stack(outputs, axis=1), states_tm1 - - -_LEARNING_PHASE = True - - -def learning_phase(): - return _LEARNING_PHASE - - -def set_learning_phase(value): - global _LEARNING_PHASE - _LEARNING_PHASE = value - - -def in_train_phase(x, alt, training=None): - if training is None: - training = learning_phase() - - if training is 1 or training is True: - if callable(x): - return x() - else: - return x - else: - if callable(alt): - return alt() - else: - return alt - - -def in_test_phase(x, alt, training=None): - return in_train_phase(alt, x, training=training) - - -def relu(x, alpha=0., max_value=None, threshold=0.): - if max_value is None: - max_value = np.inf - above_threshold = x * (x >= threshold) - above_threshold = np.clip(above_threshold, 0.0, max_value) - below_threshold = alpha * (x - threshold) * (x < threshold) - return below_threshold + above_threshold - - -def switch(condition, then_expression, else_expression): - cond_float = condition.astype(floatx()) - while cond_float.ndim < then_expression.ndim: - cond_float = cond_float[..., np.newaxis] - return cond_float * then_expression + (1 - cond_float) * else_expression - - -def softplus(x): - return np.log(1. + np.exp(x)) - - -def softsign(x): - return x / (1 + np.abs(x)) - - -def elu(x, alpha=1.): - return x * (x > 0) + alpha * (np.exp(x) - 1.) * (x < 0) - - -def sigmoid(x): - return 1. / (1. + np.exp(-x)) - - -def hard_sigmoid(x): - y = 0.2 * x + 0.5 - return np.clip(y, 0, 1) - - -def tanh(x): - return np.tanh(x) - - -def softmax(x, axis=-1): - y = np.exp(x - np.max(x, axis, keepdims=True)) - return y / np.sum(y, axis, keepdims=True) - - -def l2_normalize(x, axis=-1): - y = np.max(np.sum(x ** 2, axis, keepdims=True), axis, keepdims=True) - return x / np.sqrt(y) - - -def in_top_k(predictions, targets, k): - top_k = np.argsort(-predictions)[:, :k] - targets = targets.reshape(-1, 1) - return np.any(targets == top_k, axis=-1) - - -def binary_crossentropy(target, output, from_logits=False): - if not from_logits: - output = np.clip(output, 1e-7, 1 - 1e-7) - output = np.log(output / (1 - output)) - return (target * -np.log(sigmoid(output)) + - (1 - target) * -np.log(1 - sigmoid(output))) - - -def categorical_crossentropy(target, output, from_logits=False): - if from_logits: - output = softmax(output) - else: - output /= output.sum(axis=-1, keepdims=True) - output = np.clip(output, 1e-7, 1 - 1e-7) - return np.sum(target * -np.log(output), axis=-1, keepdims=False) - - -def max(x, axis=None, keepdims=False): - if isinstance(axis, list): - axis = tuple(axis) - return np.max(x, axis=axis, keepdims=keepdims) - - -def min(x, axis=None, keepdims=False): - if isinstance(axis, list): - axis = tuple(axis) - return np.min(x, axis=axis, keepdims=keepdims) - - -def mean(x, axis=None, keepdims=False): - if isinstance(axis, list): - axis = tuple(axis) - return np.mean(x, axis=axis, keepdims=keepdims) - - -def var(x, axis=None, keepdims=False): - if isinstance(axis, list): - axis = tuple(axis) - return np.var(x, axis=axis, keepdims=keepdims) - - -def std(x, axis=None, keepdims=False): - if isinstance(axis, list): - axis = tuple(axis) - return np.std(x, axis=axis, keepdims=keepdims) - - -def logsumexp(x, axis=None, keepdims=False): - if isinstance(axis, list): - axis = tuple(axis) - return sp.special.logsumexp(x, axis=axis, keepdims=keepdims) - - -def sum(x, axis=None, keepdims=False): - if isinstance(axis, list): - axis = tuple(axis) - return np.sum(x, axis=axis, keepdims=keepdims) - - -def prod(x, axis=None, keepdims=False): - if isinstance(axis, list): - axis = tuple(axis) - return np.prod(x, axis=axis, keepdims=keepdims) - - -def cumsum(x, axis=0): - return np.cumsum(x, axis=axis) - - -def cumprod(x, axis=0): - return np.cumprod(x, axis=axis) - - -def any(x, axis=None, keepdims=False): - if isinstance(axis, list): - axis = tuple(axis) - return np.any(x, axis=axis, keepdims=keepdims) - - -def all(x, axis=None, keepdims=False): - if isinstance(axis, list): - axis = tuple(axis) - return np.all(x, axis=axis, keepdims=keepdims) - - -def argmax(x, axis=-1): - return np.argmax(x, axis=axis) - - -def argmin(x, axis=-1): - return np.argmin(x, axis=axis) - - -def sqrt(x): - y = np.sqrt(x) - y[np.isnan(y)] = 0. - return y - - -def pow(x, a=1.): - return np.power(x, a) - - -def clip(x, min_value, max_value): - return np.clip(x, min_value, max_value) - - -def concatenate(tensors, axis=-1): - return np.concatenate(tensors, axis) - - -def permute_dimensions(x, pattern): - return np.transpose(x, pattern) - - -def reshape(x, shape): - return np.reshape(x, shape) - - -def repeat_elements(x, rep, axis): - return np.repeat(x, rep, axis=axis) - - -def repeat(x, n): - y = np.expand_dims(x, 1) - y = np.repeat(y, n, axis=1) - return y - - -def temporal_padding(x, padding=(1, 1)): - return np.pad(x, [(0, 0), padding, (0, 0)], mode='constant') - - -def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None): - all_dims_padding = ((0, 0),) + padding + ((0, 0),) - all_dims_padding = transpose_shape(all_dims_padding, data_format, - spatial_axes=(1, 2)) - return np.pad(x, all_dims_padding, mode='constant') - - -def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None): - all_dims_padding = ((0, 0),) + padding + ((0, 0),) - all_dims_padding = transpose_shape(all_dims_padding, data_format, - spatial_axes=(1, 2, 3)) - return np.pad(x, all_dims_padding, mode='constant') - - -def tile(x, n): - return np.tile(x, n) - - -def arange(start, stop=None, step=1, dtype='int32'): - return np.arange(start, stop, step, dtype) - - -def flatten(x): - return np.reshape(x, (-1,)) - - -def batch_flatten(x): - return np.reshape(x, (x.shape[0], -1)) - - -def gather(reference, indices): - return reference[indices] - - -def eval(x): - return x - - -def get_value(x): - return x - - -def count_params(x): - return x.size - - -def int_shape(x): - return x.shape - - -def get_variable_shape(x): - return int_shape(x) - - -def dtype(x): - return x.dtype.name - - -def constant(value, dtype=None, shape=None, name=None): - if dtype is None: - dtype = floatx() - if shape is None: - shape = () - np_value = value * np.ones(shape) - np_value.astype(dtype) - return np_value - - -def print_tensor(x, message=''): - print(x, message) - return x - - -def dot(x, y): - return np.dot(x, y) - - -def batch_dot(x, y, axes=None): - if x.ndim < 2 or y.ndim < 2: - raise ValueError('Batch dot requires inputs of rank 2 or more.') - - if isinstance(axes, int): - axes = [axes, axes] - elif isinstance(axes, tuple): - axes = list(axes) - - if axes is None: - if y.ndim == 2: - axes = [x.ndim - 1, y.ndim - 1] - else: - axes = [x.ndim - 1, y.ndim - 2] - - if any([isinstance(a, (list, tuple)) for a in axes]): - raise ValueError('Multiple target dimensions are not supported. ' + - 'Expected: None, int, (int, int), ' + - 'Provided: ' + str(axes)) - - # Handle negative axes - if axes[0] < 0: - axes[0] += x.ndim - if axes[1] < 0: - axes[1] += y.ndim - - if 0 in axes: - raise ValueError('Can not perform batch dot over axis 0.') - - if x.shape[0] != y.shape[0]: - raise ValueError('Can not perform batch dot on inputs' - ' with different batch sizes.') - - d1 = x.shape[axes[0]] - d2 = y.shape[axes[1]] - if d1 != d2: - raise ValueError('Can not do batch_dot on inputs with shapes ' + - str(x.shape) + ' and ' + str(y.shape) + - ' with axes=' + str(axes) + '. x.shape[%d] != ' - 'y.shape[%d] (%d != %d).' % (axes[0], axes[1], d1, d2)) - - result = [] - axes = [axes[0] - 1, axes[1] - 1] # ignore batch dimension - for xi, yi in zip(x, y): - result.append(np.tensordot(xi, yi, axes)) - result = np.array(result) - - if result.ndim == 1: - result = np.expand_dims(result, -1) - - return result - - -def transpose(x): - return np.transpose(x) - - -def reverse(x, axes): - if isinstance(axes, list): - axes = tuple(axes) - return np.flip(x, axes) - - -py_slice = slice - - -def slice(x, start, size): - slices = [py_slice(i, i + j) for i, j in zip(start, size)] - return x[tuple(slices)] - - -def variable(value, dtype=None, name=None, constraint=None): - if constraint is not None: - raise TypeError("Constraint must be None when " - "using the NumPy backend.") - return np.array(value, dtype) - - -def dropout(x, level, noise_shape=None, seed=None): - if noise_shape is None: - noise_shape = x.shape - if learning_phase(): - noise = np.random.choice([0, 1], - noise_shape, - replace=True, - p=[level, 1 - level]) - return x * noise / (1 - level) - else: - return x - - -def equal(x, y): - return x == y - - -def not_equal(x, y): - return x != y - - -def greater(x, y): - return x > y - - -def greater_equal(x, y): - return x >= y - - -def less(x, y): - return x < y - - -def less_equal(x, y): - return x <= y - - -def maximum(x, y): - return np.maximum(x, y) - - -def minimum(x, y): - return np.minimum(x, y) - - -def ndim(x): - return x.ndim - - -def random_uniform_variable(shape, low, high, dtype=None, name=None, seed=None): - return (high - low) * np.random.random(shape).astype(dtype) + low - - -def random_normal_variable(shape, mean, scale, dtype=None, name=None, seed=None): - return scale * np.random.randn(*shape).astype(dtype) + mean - - -def zeros(shape, dtype=floatx(), name=None): - return np.zeros(shape, dtype=dtype) - - -def zeros_like(x, dtype=floatx(), name=None): - return np.zeros_like(x, dtype=dtype) - - -def ones(shape, dtype=floatx(), name=None): - return np.ones(shape, dtype=dtype) - - -def ones_like(x, dtype=floatx(), name=None): - return np.ones_like(x, dtype=dtype) - - -def eye(size, dtype=None, name=None): - if isinstance(size, (list, tuple)): - n, m = size - else: - n, m = size, size - return np.eye(n, m, dtype=dtype) - - -def resize_images(x, height_factor, width_factor, data_format): - if data_format == 'channels_first': - x = repeat_elements(x, height_factor, axis=2) - x = repeat_elements(x, width_factor, axis=3) - elif data_format == 'channels_last': - x = repeat_elements(x, height_factor, axis=1) - x = repeat_elements(x, width_factor, axis=2) - return x - - -def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): - if data_format == 'channels_first': - x = repeat_elements(x, depth_factor, axis=2) - x = repeat_elements(x, height_factor, axis=3) - x = repeat_elements(x, width_factor, axis=4) - elif data_format == 'channels_last': - x = repeat_elements(x, depth_factor, axis=1) - x = repeat_elements(x, height_factor, axis=2) - x = repeat_elements(x, width_factor, axis=3) - return x - - -def one_hot(indices, num_classes): - return to_categorical(indices, num_classes) - - -def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1, - merge_repeated=False): - num_samples = y_pred.shape[0] - num_classes = y_pred.shape[-1] - log_prob = np.zeros((num_samples, 1)) - decoded_dense = -np.ones_like(y_pred[..., 0]) - decoded_length = np.zeros((num_samples,), dtype=np.int) - if greedy: - for i in range(num_samples): - prob = y_pred[i] - length = input_length[i] - decoded = np.argmax(prob[:length], axis=-1) - log_prob[i] = -np.sum(np.log(prob[np.arange(length), decoded])) - decoded = _remove_repeats(decoded) - decoded = _remove_blanks(decoded, num_classes) - decoded_length[i] = len(decoded) - decoded_dense[i, :len(decoded)] = decoded - return decoded_dense[:, :np.max(decoded_length)], log_prob - else: - raise NotImplementedError - - -def _remove_repeats(inds): - is_not_repeat = np.insert(np.diff(inds).astype(np.bool), 0, True) - return inds[is_not_repeat] - - -def _remove_blanks(inds, num_classes): - return inds[inds < (num_classes - 1)] - - -def stack(x, axis=0): - return np.stack(x, axis=axis) - - -square = np.square -abs = np.abs -exp = np.exp -log = np.log -round = np.round -sign = np.sign -expand_dims = np.expand_dims -squeeze = np.squeeze -cos = np.cos -sin = np.sin -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -from tensorflow.python.framework import ops as tf_ops -from tensorflow.python.training import moving_averages -from tensorflow.python.ops import tensor_array_ops -from tensorflow.python.ops import control_flow_ops -from tensorflow.python.ops import functional_ops -from tensorflow.python.ops import ctc_ops as ctc -from tensorflow.python.client import device_lib -from tensorflow.core.protobuf import config_pb2 - -from collections import defaultdict - -import numpy as np -from distutils.version import StrictVersion -import os - -from .common import floatx -from .common import epsilon -from .common import normalize_data_format -from ..utils.generic_utils import transpose_shape -from ..utils.generic_utils import has_arg - -# Legacy functions -from .common import set_image_dim_ordering -from .common import image_dim_ordering - -py_all = all -py_any = any -py_sum = sum -py_slice = slice - -# INTERNAL UTILS - -# This is the default internal TF session used by Keras. -# It can be set manually via `set_session(sess)`. -_SESSION = None - -# This dictionary holds a mapping {graph: learning_phase}. -# A learning phase is a bool tensor used to run Keras models in -# either train mode (learning_phase == 1) or test mode (learning_phase == 0). -_GRAPH_LEARNING_PHASES = {} - -# This dictionary holds a mapping {graph: UID_DICT}. -# each UID_DICT is a dictionary mapping name prefixes to a current index, -# used for generating graph-specific string UIDs -# for various names (e.g. layer names). -_GRAPH_UID_DICTS = {} - -# This boolean flag can be set to True to leave variable initialization -# up to the user. -# Change its value via `manual_variable_initialization(value)`. -_MANUAL_VAR_INIT = False - -# This list holds the available devices. -# It is populated when `_get_available_gpus()` is called for the first time. -# We assume our devices don't change during our lifetime. -_LOCAL_DEVICES = None - - -def get_uid(prefix=''): - """Get the uid for the default graph. - - # Arguments - prefix: An optional prefix of the graph. - - # Returns - A unique identifier for the graph. - """ - global _GRAPH_UID_DICTS - graph = tf.get_default_graph() - if graph not in _GRAPH_UID_DICTS: - _GRAPH_UID_DICTS[graph] = defaultdict(int) - _GRAPH_UID_DICTS[graph][prefix] += 1 - return _GRAPH_UID_DICTS[graph][prefix] - - -def reset_uids(): - """Resets graph identifiers. - """ - global _GRAPH_UID_DICTS - _GRAPH_UID_DICTS = {} - - -def clear_session(): - """Destroys the current TF graph and creates a new one. - - Useful to avoid clutter from old models / layers. - """ - global _SESSION - global _GRAPH_LEARNING_PHASES - tf.reset_default_graph() - reset_uids() - _SESSION = None - with tf.name_scope(''): - phase = tf.placeholder_with_default( - False, - shape=(), - name='keras_learning_phase') - _GRAPH_LEARNING_PHASES = {} - _GRAPH_LEARNING_PHASES[tf.get_default_graph()] = phase - - -def manual_variable_initialization(value): - """Sets the manual variable initialization flag. - - This boolean flag determines whether - variables should be initialized - as they are instantiated (default), or if - the user should handle the initialization - (e.g. via `tf.initialize_all_variables()`). - - # Arguments - value: Python boolean. - """ - global _MANUAL_VAR_INIT - _MANUAL_VAR_INIT = value - - -def learning_phase(): - """Returns the learning phase flag. - - The learning phase flag is a bool tensor (0 = test, 1 = train) - to be passed as input to any Keras function - that uses a different behavior at train time and test time. - - # Returns - Learning phase (scalar integer tensor or Python integer). - """ - graph = tf.get_default_graph() - if graph not in _GRAPH_LEARNING_PHASES: - with tf.name_scope(''): - phase = tf.placeholder_with_default( - False, - shape=(), - name='keras_learning_phase') - _GRAPH_LEARNING_PHASES[graph] = phase - return _GRAPH_LEARNING_PHASES[graph] - - -def set_learning_phase(value): - """Sets the learning phase to a fixed value. - - # Arguments - value: Learning phase value, either 0 or 1 (integers). - - # Raises - ValueError: if `value` is neither `0` nor `1`. - """ - global _GRAPH_LEARNING_PHASES - if value not in {0, 1}: - raise ValueError('Expected learning phase to be ' - '0 or 1.') - _GRAPH_LEARNING_PHASES[tf.get_default_graph()] = value - - -def get_session(): - """Returns the TF session to be used by the backend. - - If a default TensorFlow session is available, we will return it. - - Else, we will return the global Keras session. - - If no global Keras session exists at this point: - we will create a new global session. - - Note that you can manually set the global session - via `K.set_session(sess)`. - - # Returns - A TensorFlow session. - """ - global _SESSION - - default_session = tf.get_default_session() - - if default_session is not None: - session = default_session - else: - if _SESSION is None: - if not os.environ.get('OMP_NUM_THREADS'): - config = tf.ConfigProto(allow_soft_placement=True) - else: - num_thread = int(os.environ.get('OMP_NUM_THREADS')) - config = tf.ConfigProto(intra_op_parallelism_threads=num_thread, - inter_op_parallelism_threads=num_thread, - allow_soft_placement=True) - _SESSION = tf.Session(config=config) - session = _SESSION - if not _MANUAL_VAR_INIT: - with session.graph.as_default(): - variables = tf.global_variables() - candidate_vars = [] - for v in variables: - if not getattr(v, '_keras_initialized', False): - candidate_vars.append(v) - if candidate_vars: - # This step is expensive, so we only run it on variables - # not already marked as initialized. - is_initialized = session.run( - [tf.is_variable_initialized(v) for v in candidate_vars]) - uninitialized_vars = [] - for flag, v in zip(is_initialized, candidate_vars): - if not flag: - uninitialized_vars.append(v) - v._keras_initialized = True - if uninitialized_vars: - session.run(tf.variables_initializer(uninitialized_vars)) - # hack for list_devices() function. - # list_devices() function is not available under tensorflow r1.3. - if not hasattr(session, 'list_devices'): - session.list_devices = lambda: device_lib.list_local_devices() - return session - - -def set_session(session): - """Sets the global TensorFlow session. - - # Arguments - session: A TF Session. - """ - global _SESSION - _SESSION = session - - -# DEVICE MANIPULATION AND PROBING - -class _TfDeviceCaptureOp(object): - """Class for capturing the TF device scope.""" - - def __init__(self): - self.device = None - - def _set_device(self, device): - """This method captures TF's explicit device scope setting.""" - self.device = device - - -def _get_current_tf_device(): - """Return explicit device of current context, otherwise returns `None`. - - # Returns - If the current device scope is explicitly set, it returns a string with - the device (`CPU` or `GPU`). If the scope is not explicitly set, it will - return `None`. - """ - g = tf.get_default_graph() - op = _TfDeviceCaptureOp() - g._apply_device_functions(op) - return op.device - - -def _is_current_explicit_device(device_type): - """Check if the current device is explicitly set on the device type specified. - - # Arguments - device_type: A string containing `GPU` or `CPU` (case-insensitive). - - # Returns - A boolean indicating if the current device - scope is explicitly set on the device type. - - # Raises - ValueError: If the `device_type` string indicates an unsupported device. - """ - device_type = device_type.upper() - if device_type not in ['CPU', 'GPU']: - raise ValueError('`device_type` should be either "CPU" or "GPU".') - device = _get_current_tf_device() - return (device is not None and device.device_type == device_type.upper()) - - -def _get_available_gpus(): - """Get a list of available gpu devices (formatted as strings). - - # Returns - A list of available GPU devices. - """ - global _LOCAL_DEVICES - if _LOCAL_DEVICES is None: - _LOCAL_DEVICES = get_session().list_devices() - return [x.name for x in _LOCAL_DEVICES if x.device_type == 'GPU'] - - -def _has_nchw_support(): - """Check whether the current scope supports NCHW ops. - - TensorFlow does not support NCHW on CPU. - Therefore we check if we are not explicitly put on - CPU, and have GPUs available. - In this case there will be soft-placing on the GPU device. - - # Returns - bool: if the current scope device placement would support nchw - """ - explicitly_on_cpu = _is_current_explicit_device('CPU') - gpus_available = len(_get_available_gpus()) > 0 - return (not explicitly_on_cpu and gpus_available) - - -# VARIABLE MANIPULATION - -def _to_tensor(x, dtype): - """Convert the input `x` to a tensor of type `dtype`. - - # Arguments - x: An object to be converted (numpy array, list, tensors). - dtype: The destination type. - - # Returns - A tensor. - """ - return tf.convert_to_tensor(x, dtype=dtype) - - -def is_sparse(tensor): - """Returns whether a tensor is a sparse tensor. - - # Arguments - tensor: A tensor instance. - - # Returns - A boolean. - - # Example - ```python - >>> from keras import backend as K - >>> a = K.placeholder((2, 2), sparse=False) - >>> print(K.is_sparse(a)) - False - >>> b = K.placeholder((2, 2), sparse=True) - >>> print(K.is_sparse(b)) - True - ``` - """ - return isinstance(tensor, tf.SparseTensor) - - -def to_dense(tensor): - """Converts a sparse tensor into a dense tensor and returns it. - - # Arguments - tensor: A tensor instance (potentially sparse). - - # Returns - A dense tensor. - - # Examples - ```python - >>> from keras import backend as K - >>> b = K.placeholder((2, 2), sparse=True) - >>> print(K.is_sparse(b)) - True - >>> c = K.to_dense(b) - >>> print(K.is_sparse(c)) - False - ``` - """ - if is_sparse(tensor): - return tf.sparse_tensor_to_dense(tensor) - else: - return tensor - - -name_scope = tf.name_scope - - -def variable(value, dtype=None, name=None, constraint=None): - """Instantiates a variable and returns it. - - # Arguments - value: Numpy array, initial value of the tensor. - dtype: Tensor type. - name: Optional name string for the tensor. - constraint: Optional projection function to be - applied to the variable after an optimizer update. - - # Returns - A variable instance (with Keras metadata included). - - # Examples - ```python - >>> from keras import backend as K - >>> val = np.array([[1, 2], [3, 4]]) - >>> kvar = K.variable(value=val, dtype='float64', name='example_var') - >>> K.dtype(kvar) - 'float64' - >>> print(kvar) - example_var - >>> K.eval(kvar) - array([[ 1., 2.], - [ 3., 4.]]) - ``` - """ - if dtype is None: - dtype = floatx() - if hasattr(value, 'tocoo'): - sparse_coo = value.tocoo() - indices = np.concatenate((np.expand_dims(sparse_coo.row, 1), - np.expand_dims(sparse_coo.col, 1)), 1) - v = tf.SparseTensor(indices=indices, - values=sparse_coo.data, - dense_shape=sparse_coo.shape) - v._keras_shape = sparse_coo.shape - v._uses_learning_phase = False - return v - v = tf.Variable(value, dtype=tf.as_dtype(dtype), name=name) - if isinstance(value, np.ndarray): - v._keras_shape = value.shape - elif hasattr(value, 'get_shape'): - v._keras_shape = int_shape(value) - v._uses_learning_phase = False - # TODO: move to Variable constructor when supported in public release. - try: - v.constraint = constraint - except AttributeError: - v._constraint = constraint - return v - - -def constant(value, dtype=None, shape=None, name=None): - """Creates a constant tensor. - - # Arguments - value: A constant value (or list) - dtype: The type of the elements of the resulting tensor. - shape: Optional dimensions of resulting tensor. - name: Optional name for the tensor. - - # Returns - A Constant Tensor. - """ - if dtype is None: - dtype = floatx() - return tf.constant(value, dtype=dtype, shape=shape, name=name) - - -def is_keras_tensor(x): - """Returns whether `x` is a Keras tensor. - - A "Keras tensor" is a tensor that was returned by a Keras layer, - (`Layer` class) or by `Input`. - - # Arguments - x: A candidate tensor. - - # Returns - A boolean: Whether the argument is a Keras tensor. - - # Raises - ValueError: In case `x` is not a symbolic tensor. - - # Examples - ```python - >>> from keras import backend as K - >>> from keras.layers import Input, Dense - >>> np_var = numpy.array([1, 2]) - >>> K.is_keras_tensor(np_var) # A numpy array is not a symbolic tensor. - ValueError - >>> k_var = tf.placeholder('float32', shape=(1,1)) - >>> # A variable indirectly created outside of keras is not a Keras tensor. - >>> K.is_keras_tensor(k_var) - False - >>> keras_var = K.variable(np_var) - >>> # A variable created with the keras backend is not a Keras tensor. - >>> K.is_keras_tensor(keras_var) - False - >>> keras_placeholder = K.placeholder(shape=(2, 4, 5)) - >>> # A placeholder is not a Keras tensor. - >>> K.is_keras_tensor(keras_placeholder) - False - >>> keras_input = Input([10]) - >>> K.is_keras_tensor(keras_input) # An Input is a Keras tensor. - True - >>> keras_layer_output = Dense(10)(keras_input) - >>> # Any Keras layer output is a Keras tensor. - >>> K.is_keras_tensor(keras_layer_output) - True - ``` - """ - if not is_tensor(x): - raise ValueError('Unexpectedly found an instance of type `' + - str(type(x)) + '`. ' - 'Expected a symbolic tensor instance.') - return hasattr(x, '_keras_history') - - -def is_tensor(x): - return isinstance(x, tf_ops._TensorLike) or tf_ops.is_dense_tensor_like(x) - - -def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None): - """Instantiates a placeholder tensor and returns it. - - # Arguments - shape: Shape of the placeholder - (integer tuple, may include `None` entries). - ndim: Number of axes of the tensor. - At least one of {`shape`, `ndim`} must be specified. - If both are specified, `shape` is used. - dtype: Placeholder type. - sparse: Boolean, whether the placeholder should have a sparse type. - name: Optional name string for the placeholder. - - # Returns - Tensor instance (with Keras metadata included). - - # Examples - ```python - >>> from keras import backend as K - >>> input_ph = K.placeholder(shape=(2, 4, 5)) - >>> input_ph._keras_shape - (2, 4, 5) - >>> input_ph - - ``` - """ - if dtype is None: - dtype = floatx() - if not shape: - if ndim: - shape = tuple([None for _ in range(ndim)]) - if sparse: - x = tf.sparse_placeholder(dtype, shape=shape, name=name) - else: - x = tf.placeholder(dtype, shape=shape, name=name) - x._keras_shape = shape - x._uses_learning_phase = False - return x - - -def is_placeholder(x): - """Returns whether `x` is a placeholder. - - # Arguments - x: A candidate placeholder. - - # Returns - Boolean. - """ - try: - return x.op.type == 'Placeholder' - except AttributeError: - return False - - -def shape(x): - """Returns the symbolic shape of a tensor or variable. - - # Arguments - x: A tensor or variable. - - # Returns - A symbolic shape (which is itself a tensor). - - # Examples - ```python - # TensorFlow example - >>> from keras import backend as K - >>> tf_session = K.get_session() - >>> val = np.array([[1, 2], [3, 4]]) - >>> kvar = K.variable(value=val) - >>> inputs = keras.backend.placeholder(shape=(2, 4, 5)) - >>> K.shape(kvar) - - >>> K.shape(inputs) - - # To get integer shape (Instead, you can use K.int_shape(x)) - >>> K.shape(kvar).eval(session=tf_session) - array([2, 2], dtype=int32) - >>> K.shape(inputs).eval(session=tf_session) - array([2, 4, 5], dtype=int32) - ``` - """ - return tf.shape(x) - - -def int_shape(x): - """Returns the shape of tensor or variable as a tuple of int or None entries. - - # Arguments - x: Tensor or variable. - - # Returns - A tuple of integers (or None entries). - - # Examples - ```python - >>> from keras import backend as K - >>> inputs = K.placeholder(shape=(2, 4, 5)) - >>> K.int_shape(inputs) - (2, 4, 5) - >>> val = np.array([[1, 2], [3, 4]]) - >>> kvar = K.variable(value=val) - >>> K.int_shape(kvar) - (2, 2) - ``` - - {{np_implementation}} - """ - if hasattr(x, '_keras_shape'): - return x._keras_shape - try: - return tuple(x.get_shape().as_list()) - except ValueError: - return None - - -def ndim(x): - """Returns the number of axes in a tensor, as an integer. - - # Arguments - x: Tensor or variable. - - # Returns - Integer (scalar), number of axes. - - # Examples - ```python - >>> from keras import backend as K - >>> inputs = K.placeholder(shape=(2, 4, 5)) - >>> val = np.array([[1, 2], [3, 4]]) - >>> kvar = K.variable(value=val) - >>> K.ndim(inputs) - 3 - >>> K.ndim(kvar) - 2 - ``` - - {{np_implementation}} - """ - dims = x.get_shape()._dims - if dims is not None: - return len(dims) - return None - - -def dtype(x): - """Returns the dtype of a Keras tensor or variable, as a string. - - # Arguments - x: Tensor or variable. - - # Returns - String, dtype of `x`. - - # Examples - ```python - >>> from keras import backend as K - >>> K.dtype(K.placeholder(shape=(2,4,5))) - 'float32' - >>> K.dtype(K.placeholder(shape=(2,4,5), dtype='float32')) - 'float32' - >>> K.dtype(K.placeholder(shape=(2,4,5), dtype='float64')) - 'float64' - # Keras variable - >>> kvar = K.variable(np.array([[1, 2], [3, 4]])) - >>> K.dtype(kvar) - 'float32_ref' - >>> kvar = K.variable(np.array([[1, 2], [3, 4]]), dtype='float32') - >>> K.dtype(kvar) - 'float32_ref' - ``` - {{np_implementation}} - """ - return x.dtype.base_dtype.name - - -def eval(x): - """Evaluates the value of a variable. - - # Arguments - x: A variable. - - # Returns - A Numpy array. - - # Examples - ```python - >>> from keras import backend as K - >>> kvar = K.variable(np.array([[1, 2], [3, 4]]), dtype='float32') - >>> K.eval(kvar) - array([[ 1., 2.], - [ 3., 4.]], dtype=float32) - ``` - {{np_implementation}} - """ - return to_dense(x).eval(session=get_session()) - - -def zeros(shape, dtype=None, name=None): - """Instantiates an all-zeros variable and returns it. - - # Arguments - shape: Tuple of integers, shape of returned Keras variable - dtype: String, data type of returned Keras variable - name: String, name of returned Keras variable - - # Returns - A variable (including Keras metadata), filled with `0.0`. - Note that if `shape` was symbolic, we cannot return a variable, - and will return a dynamically-shaped tensor instead. - - # Example - ```python - >>> from keras import backend as K - >>> kvar = K.zeros((3,4)) - >>> K.eval(kvar) - array([[ 0., 0., 0., 0.], - [ 0., 0., 0., 0.], - [ 0., 0., 0., 0.]], dtype=float32) - ``` - {{np_implementation}} - """ - if dtype is None: - dtype = floatx() - tf_dtype = tf.as_dtype(dtype) - v = tf.zeros(shape=shape, dtype=tf_dtype, name=name) - if py_all(v.get_shape().as_list()): - return variable(v, dtype=dtype, name=name) - return v - - -def ones(shape, dtype=None, name=None): - """Instantiates an all-ones variable and returns it. - - # Arguments - shape: Tuple of integers, shape of returned Keras variable. - dtype: String, data type of returned Keras variable. - name: String, name of returned Keras variable. - - # Returns - A Keras variable, filled with `1.0`. - Note that if `shape` was symbolic, we cannot return a variable, - and will return a dynamically-shaped tensor instead. - - # Example - ```python - >>> from keras import backend as K - >>> kvar = K.ones((3,4)) - >>> K.eval(kvar) - array([[ 1., 1., 1., 1.], - [ 1., 1., 1., 1.], - [ 1., 1., 1., 1.]], dtype=float32) - ``` - {{np_implementation}} - """ - if dtype is None: - dtype = floatx() - tf_dtype = tf.as_dtype(dtype) - v = tf.ones(shape=shape, dtype=tf_dtype, name=name) - if py_all(v.get_shape().as_list()): - return variable(v, dtype=dtype, name=name) - return v - - -def eye(size, dtype=None, name=None): - """Instantiate an identity matrix and returns it. - - # Arguments - size: Tuple, number of rows and columns. If Integer, number of rows. - dtype: String, data type of returned Keras variable. - name: String, name of returned Keras variable. - - # Returns - A Keras variable, an identity matrix. - - # Example - ```python - >>> from keras import backend as K - >>> K.eval(K.eye(3)) - array([[ 1., 0., 0.], - [ 0., 1., 0.], - [ 0., 0., 1.]], dtype=float32) - >>> K.eval(K.eye((2, 3))) - array([[1., 0., 0.], - [0., 1., 0.]], dtype=float32) - ``` - {{np_implementation}} - """ - if dtype is None: - dtype = floatx() - tf_dtype = tf.as_dtype(dtype) - if isinstance(size, (list, tuple)): - n, m = size - else: - n, m = size, size - return variable(tf.eye(n, m, dtype=tf_dtype), dtype, name) - - -def zeros_like(x, dtype=None, name=None): - """Instantiates an all-zeros variable of the same shape as another tensor. - - # Arguments - x: Keras variable or Keras tensor. - dtype: String, dtype of returned Keras variable. - None uses the dtype of x. - name: String, name for the variable to create. - - # Returns - A Keras variable with the shape of x filled with zeros. - - # Example - ```python - >>> from keras import backend as K - >>> kvar = K.variable(np.random.random((2,3))) - >>> kvar_zeros = K.zeros_like(kvar) - >>> K.eval(kvar_zeros) - array([[ 0., 0., 0.], - [ 0., 0., 0.]], dtype=float32) - ``` - {{np_implementation}} - """ - if dtype is None: - dtype = floatx() - return tf.zeros_like(x, dtype=dtype, name=name) - - -def ones_like(x, dtype=None, name=None): - """Instantiates an all-ones variable of the same shape as another tensor. - - # Arguments - x: Keras variable or tensor. - dtype: String, dtype of returned Keras variable. - None uses the dtype of x. - name: String, name for the variable to create. - - # Returns - A Keras variable with the shape of x filled with ones. - - # Example - ```python - >>> from keras import backend as K - >>> kvar = K.variable(np.random.random((2,3))) - >>> kvar_ones = K.ones_like(kvar) - >>> K.eval(kvar_ones) - array([[ 1., 1., 1.], - [ 1., 1., 1.]], dtype=float32) - ``` - {{np_implementation}} - """ - if dtype is None: - dtype = floatx() - return tf.ones_like(x, dtype=dtype, name=name) - - -def identity(x, name=None): - """Returns a tensor with the same content as the input tensor. - - # Arguments - x: The input tensor. - name: String, name for the variable to create. - - # Returns - A tensor of the same shape, type and content. - """ - return tf.identity(x, name) - - -def random_uniform_variable(shape, low, high, dtype=None, - name=None, seed=None): - """Instantiates a variable with values drawn from a uniform distribution. - - # Arguments - shape: Tuple of integers, shape of returned Keras variable. - low: Float, lower boundary of the output interval. - high: Float, upper boundary of the output interval. - dtype: String, dtype of returned Keras variable. - name: String, name of returned Keras variable. - seed: Integer, random seed. - - # Returns - A Keras variable, filled with drawn samples. - - # Example - ```python - # TensorFlow example - >>> kvar = K.random_uniform_variable((2,3), 0, 1) - >>> kvar - - >>> K.eval(kvar) - array([[ 0.10940075, 0.10047495, 0.476143 ], - [ 0.66137183, 0.00869417, 0.89220798]], dtype=float32) - ``` - {{np_implementation}} - """ - if dtype is None: - dtype = floatx() - tf_dtype = tf.as_dtype(dtype) - if seed is None: - # ensure that randomness is conditioned by the Numpy RNG - seed = np.random.randint(10e8) - value = tf.random_uniform_initializer( - low, high, dtype=tf_dtype, seed=seed)(shape) - return variable(value, dtype=dtype, name=name) - - -def random_normal_variable(shape, mean, scale, dtype=None, - name=None, seed=None): - """Instantiates a variable with values drawn from a normal distribution. - - # Arguments - shape: Tuple of integers, shape of returned Keras variable. - mean: Float, mean of the normal distribution. - scale: Float, standard deviation of the normal distribution. - dtype: String, dtype of returned Keras variable. - name: String, name of returned Keras variable. - seed: Integer, random seed. - - # Returns - A Keras variable, filled with drawn samples. - - # Example - ```python - # TensorFlow example - >>> kvar = K.random_normal_variable((2,3), 0, 1) - >>> kvar - - >>> K.eval(kvar) - array([[ 1.19591331, 0.68685907, -0.63814116], - [ 0.92629528, 0.28055015, 1.70484698]], dtype=float32) - ``` - {{np_implementation}} - """ - if dtype is None: - dtype = floatx() - tf_dtype = tf.as_dtype(dtype) - if seed is None: - # ensure that randomness is conditioned by the Numpy RNG - seed = np.random.randint(10e8) - value = tf.random_normal_initializer( - mean, scale, dtype=tf_dtype, seed=seed)(shape) - return variable(value, dtype=dtype, name=name) - - -def count_params(x): - """Returns the static number of elements in a Keras variable or tensor. - - # Arguments - x: Keras variable or tensor. - - # Returns - Integer, the number of elements in `x`, i.e., the product of the - array's static dimensions. - - # Example - ```python - >>> kvar = K.zeros((2,3)) - >>> K.count_params(kvar) - 6 - >>> K.eval(kvar) - array([[ 0., 0., 0.], - [ 0., 0., 0.]], dtype=float32) - ``` - {{np_implementation}} - """ - return np.prod(int_shape(x)) - - -def cast(x, dtype): - """Casts a tensor to a different dtype and returns it. - - You can cast a Keras variable but it still returns a Keras tensor. - - # Arguments - x: Keras tensor (or variable). - dtype: String, either (`'float16'`, `'float32'`, or `'float64'`). - - # Returns - Keras tensor with dtype `dtype`. - - # Example - ```python - >>> from keras import backend as K - >>> input = K.placeholder((2, 3), dtype='float32') - >>> input - - # It doesn't work in-place as below. - >>> K.cast(input, dtype='float16') - - >>> input - - # you need to assign it. - >>> input = K.cast(input, dtype='float16') - >>> input - - ``` - """ - return tf.cast(x, dtype) - - -# UPDATES OPS - - -def update(x, new_x): - """Update the value of `x` to `new_x`. - - # Arguments - x: A `Variable`. - new_x: A tensor of same shape as `x`. - - # Returns - The variable `x` updated. - """ - return tf.assign(x, new_x) - - -def update_add(x, increment): - """Update the value of `x` by adding `increment`. - - # Arguments - x: A `Variable`. - increment: A tensor of same shape as `x`. - - # Returns - The variable `x` updated. - """ - return tf.assign_add(x, increment) - - -def update_sub(x, decrement): - """Update the value of `x` by subtracting `decrement`. - - # Arguments - x: A `Variable`. - decrement: A tensor of same shape as `x`. - - # Returns - The variable `x` updated. - """ - return tf.assign_sub(x, decrement) - - -def moving_average_update(x, value, momentum): - """Compute the moving average of a variable. - - # Arguments - x: A `Variable`. - value: A tensor with the same shape as `x`. - momentum: The moving average momentum. - - # Returns - An operation to update the variable. - """ - if value.dtype != x.dtype: - value = tf.cast(value, x.dtype) - return moving_averages.assign_moving_average( - x, value, momentum, zero_debias=True) - - -# LINEAR ALGEBRA - -def dot(x, y): - """Multiplies 2 tensors (and/or variables) and returns a *tensor*. - - When attempting to multiply a nD tensor - with a nD tensor, it reproduces the Theano behavior. - (e.g. `(2, 3) * (4, 3, 5) -> (2, 4, 5)`) - - # Arguments - x: Tensor or variable. - y: Tensor or variable. - - # Returns - A tensor, dot product of `x` and `y`. - - # Examples - ```python - # dot product between tensors - >>> x = K.placeholder(shape=(2, 3)) - >>> y = K.placeholder(shape=(3, 4)) - >>> xy = K.dot(x, y) - >>> xy - - ``` - - ```python - # dot product between tensors - >>> x = K.placeholder(shape=(32, 28, 3)) - >>> y = K.placeholder(shape=(3, 4)) - >>> xy = K.dot(x, y) - >>> xy - - ``` - - ```python - # Theano-like behavior example - >>> x = K.random_uniform_variable(shape=(2, 3), low=0, high=1) - >>> y = K.ones((4, 3, 5)) - >>> xy = K.dot(x, y) - >>> K.int_shape(xy) - (2, 4, 5) - ``` - {{np_implementation}} - """ - if ndim(x) is not None and (ndim(x) > 2 or ndim(y) > 2): - x_shape = [] - for i, s in zip(int_shape(x), tf.unstack(tf.shape(x))): - if i is not None: - x_shape.append(i) - else: - x_shape.append(s) - x_shape = tuple(x_shape) - y_shape = [] - for i, s in zip(int_shape(y), tf.unstack(tf.shape(y))): - if i is not None: - y_shape.append(i) - else: - y_shape.append(s) - y_shape = tuple(y_shape) - y_permute_dim = list(range(ndim(y))) - y_permute_dim = [y_permute_dim.pop(-2)] + y_permute_dim - xt = tf.reshape(x, [-1, x_shape[-1]]) - yt = tf.reshape(tf.transpose(y, perm=y_permute_dim), [y_shape[-2], -1]) - return tf.reshape(tf.matmul(xt, yt), - x_shape[:-1] + y_shape[:-2] + y_shape[-1:]) - if is_sparse(x): - out = tf.sparse_tensor_dense_matmul(x, y) - else: - out = tf.matmul(x, y) - return out - - -def batch_dot(x, y, axes=None): - """Batchwise dot product. - - `batch_dot` is used to compute dot product of `x` and `y` when - `x` and `y` are data in batches, i.e. in a shape of - `(batch_size, :)`. - `batch_dot` results in a tensor or variable with less dimensions - than the input. If the number of dimensions is reduced to 1, - we use `expand_dims` to make sure that ndim is at least 2. - - # Arguments - x: Keras tensor or variable with `ndim >= 2`. - y: Keras tensor or variable with `ndim >= 2`. - axes: int or tuple(int, int). Target dimensions to be reduced. - - # Returns - A tensor with shape equal to the concatenation of `x`'s shape - (less the dimension that was summed over) and `y`'s shape - (less the batch dimension and the dimension that was summed over). - If the final rank is 1, we reshape it to `(batch_size, 1)`. - - # Examples - Assume `x = [[1, 2], [3, 4]]` and `y = [[5, 6], [7, 8]]` - `batch_dot(x, y, axes=1) = [[17], [53]]` which is the main diagonal - of `x.dot(y.T)`, although we never have to calculate the off-diagonal - elements. - - Pseudocode: - ``` - inner_products = [] - for xi, yi in zip(x, y): - inner_products.append(xi.dot(yi)) - result = stack(inner_products) - ``` - - Shape inference: - Let `x`'s shape be `(100, 20)` and `y`'s shape be `(100, 30, 20)`. - If `axes` is (1, 2), to find the output shape of resultant tensor, - loop through each dimension in `x`'s shape and `y`'s shape: - - * `x.shape[0]` : 100 : append to output shape - * `x.shape[1]` : 20 : do not append to output shape, - dimension 1 of `x` has been summed over. (`dot_axes[0]` = 1) - * `y.shape[0]` : 100 : do not append to output shape, - always ignore first dimension of `y` - * `y.shape[1]` : 30 : append to output shape - * `y.shape[2]` : 20 : do not append to output shape, - dimension 2 of `y` has been summed over. (`dot_axes[1]` = 2) - `output_shape` = `(100, 30)` - - ```python - >>> x_batch = K.ones(shape=(32, 20, 1)) - >>> y_batch = K.ones(shape=(32, 30, 20)) - >>> xy_batch_dot = K.batch_dot(x_batch, y_batch, axes=(1, 2)) - >>> K.int_shape(xy_batch_dot) - (32, 1, 30) - ``` - - {{np_implementation}} - """ - x_shape = int_shape(x) - y_shape = int_shape(y) - - x_ndim = len(x_shape) - y_ndim = len(y_shape) - - if x_ndim < 2 or y_ndim < 2: - raise ValueError('Can not do batch_dot on inputs ' - 'with rank < 2. ' - 'Received inputs with shapes ' + - str(x_shape) + ' and ' + - str(y_shape) + '.') - - x_batch_size = x_shape[0] - y_batch_size = y_shape[0] - - if x_batch_size is not None and y_batch_size is not None: - if x_batch_size != y_batch_size: - raise ValueError('Can not do batch_dot on inputs ' - 'with different batch sizes. ' - 'Received inputs with shapes ' + - str(x_shape) + ' and ' + - str(y_shape) + '.') - - if isinstance(axes, int): - axes = [axes, axes] - - if axes is None: - if y_ndim == 2: - axes = [x_ndim - 1, y_ndim - 1] - else: - axes = [x_ndim - 1, y_ndim - 2] - - if py_any([isinstance(a, (list, tuple)) for a in axes]): - raise ValueError('Multiple target dimensions are not supported. ' + - 'Expected: None, int, (int, int), ' + - 'Provided: ' + str(axes)) - - # if tuple, convert to list. - axes = list(axes) - - # convert negative indices. - if axes[0] < 0: - axes[0] += x_ndim - if axes[1] < 0: - axes[1] += y_ndim - - # sanity checks - if 0 in axes: - raise ValueError('Can not perform batch_dot over axis 0.' - 'If your inputs are not batched,' - ' add a dummy batch dimension to your ' - 'inputs using K.expand_dims(x, 0)') - - a0, a1 = axes - d1 = x_shape[a0] - d2 = y_shape[a1] - - if d1 is not None and d2 is not None and d1 != d2: - raise ValueError('Can not do batch_dot on inputs with shapes ' + - str(x_shape) + ' and ' + str(y_shape) + - ' with axes=' + str(axes) + '. x.shape[%d] != ' - 'y.shape[%d] (%d != %d).' % (axes[0], axes[1], d1, d2)) - - # backup ndims. Need them later. - orig_x_ndim = x_ndim - orig_y_ndim = y_ndim - - # if rank is 2, expand to 3. - if x_ndim == 2: - x = tf.expand_dims(x, 1) - a0 += 1 - x_ndim += 1 - if y_ndim == 2: - y = tf.expand_dims(y, 2) - y_ndim += 1 - - # bring x's dimension to be reduced to last axis. - if a0 != x_ndim - 1: - pattern = list(range(x_ndim)) - for i in range(a0, x_ndim - 1): - pattern[i] = pattern[i + 1] - pattern[-1] = a0 - x = tf.transpose(x, pattern) - - # bring y's dimension to be reduced to axis 1. - if a1 != 1: - pattern = list(range(y_ndim)) - for i in range(a1, 1, -1): - pattern[i] = pattern[i - 1] - pattern[1] = a1 - y = tf.transpose(y, pattern) - - # normalize both inputs to rank 3. - if x_ndim > 3: - # squash middle dimensions of x. - x_shape = shape(x) - x_mid_dims = x_shape[1:-1] - x_squashed_dim = tf.reduce_prod(x_mid_dims) - x_squashed_shape = tf.stack([x_shape[0], x_squashed_dim, x_shape[-1]]) - x = tf.reshape(x, x_squashed_shape) - x_squashed = True - else: - x_squashed = False - - if y_ndim > 3: - # squash trailing dimensions of y. - y_shape = shape(y) - y_trail_dims = y_shape[2:] - y_squashed_dim = tf.reduce_prod(y_trail_dims) - y_squashed_shape = tf.stack([y_shape[0], y_shape[1], y_squashed_dim]) - y = tf.reshape(y, y_squashed_shape) - y_squashed = True - else: - y_squashed = False - - result = tf.matmul(x, y) - - # if inputs were squashed, we have to reshape the matmul output. - output_shape = tf.shape(result) - do_reshape = False - - if x_squashed: - output_shape = tf.concat([output_shape[:1], - x_mid_dims, - output_shape[-1:]], 0) - do_reshape = True - - if y_squashed: - output_shape = tf.concat([output_shape[:-1], y_trail_dims], 0) - do_reshape = True - - if do_reshape: - result = tf.reshape(result, output_shape) - - # if the inputs were originally rank 2, we remove the added 1 dim. - if orig_x_ndim == 2: - result = tf.squeeze(result, 1) - elif orig_y_ndim == 2: - result = tf.squeeze(result, -1) - - return result - - -def transpose(x): - """Transposes a tensor and returns it. - - # Arguments - x: Tensor or variable. - - # Returns - A tensor. - - # Examples - ```python - >>> var = K.variable([[1, 2, 3], [4, 5, 6]]) - >>> K.eval(var) - array([[ 1., 2., 3.], - [ 4., 5., 6.]], dtype=float32) - >>> var_transposed = K.transpose(var) - >>> K.eval(var_transposed) - array([[ 1., 4.], - [ 2., 5.], - [ 3., 6.]], dtype=float32) - ``` - - ```python - >>> inputs = K.placeholder((2, 3)) - >>> inputs - - >>> input_transposed = K.transpose(inputs) - >>> input_transposed - - - ``` - {{np_implementation}} - """ - return tf.transpose(x) - - -def gather(reference, indices): - """Retrieves the elements of indices `indices` in the tensor `reference`. - - # Arguments - reference: A tensor. - indices: An integer tensor of indices. - - # Returns - A tensor of same type as `reference`. - - {{np_implementation}} - """ - return tf.nn.embedding_lookup(reference, indices) - - -# ELEMENT-WISE OPERATIONS - - -def max(x, axis=None, keepdims=False): - """Maximum value in a tensor. - - # Arguments - x: A tensor or variable. - axis: An integer or list of integers in [-rank(x), rank(x)), - the axes to find maximum values. If `None` (default), finds the - maximum over all dimensions. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. - - # Returns - A tensor with maximum values of `x`. - - {{np_implementation}} - """ - return tf.reduce_max(x, axis, keepdims) - - -def min(x, axis=None, keepdims=False): - """Minimum value in a tensor. - - # Arguments - x: A tensor or variable. - axis: An integer or list of integers in [-rank(x), rank(x)), - the axes to find minimum values. If `None` (default), finds the - minimum over all dimensions. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. - - # Returns - A tensor with miminum values of `x`. - - {{np_implementation}} - """ - return tf.reduce_min(x, axis, keepdims) - - -def sum(x, axis=None, keepdims=False): - """Sum of the values in a tensor, alongside the specified axis. - - # Arguments - x: A tensor or variable. - axis: An integer or list of integers in [-rank(x), rank(x)), - the axes to sum over. If `None` (default), sums over all - dimensions. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. - - # Returns - A tensor with sum of `x`. - - {{np_implementation}} - """ - return tf.reduce_sum(x, axis, keepdims) - - -def prod(x, axis=None, keepdims=False): - """Multiplies the values in a tensor, alongside the specified axis. - - # Arguments - x: A tensor or variable. - axis: An integer or list of integers in [-rank(x), rank(x)), - the axes to compute the product. If `None` (default), computes - the product over all dimensions. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. - - # Returns - A tensor with the product of elements of `x`. - - {{np_implementation}} - """ - return tf.reduce_prod(x, axis, keepdims) - - -def cumsum(x, axis=0): - """Cumulative sum of the values in a tensor, alongside the specified axis. - - # Arguments - x: A tensor or variable. - axis: An integer, the axis to compute the sum. - - # Returns - A tensor of the cumulative sum of values of `x` along `axis`. - {{np_implementation}} - """ - return tf.cumsum(x, axis=axis) - - -def cumprod(x, axis=0): - """Cumulative product of the values in a tensor, alongside the specified axis. - - # Arguments - x: A tensor or variable. - axis: An integer, the axis to compute the product. - - # Returns - A tensor of the cumulative product of values of `x` along `axis`. - {{np_implementation}} - """ - return tf.cumprod(x, axis=axis) - - -def var(x, axis=None, keepdims=False): - """Variance of a tensor, alongside the specified axis. - - # Arguments - x: A tensor or variable. - axis: An integer or list of integers in [-rank(x), rank(x)), - the axes to compute the variance. If `None` (default), computes - the variance over all dimensions. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. - - # Returns - A tensor with the variance of elements of `x`. - {{np_implementation}} - """ - if x.dtype.base_dtype == tf.bool: - x = tf.cast(x, floatx()) - m = tf.reduce_mean(x, axis, True) - devs_squared = tf.square(x - m) - return tf.reduce_mean(devs_squared, - axis, - keepdims) - - -def std(x, axis=None, keepdims=False): - """Standard deviation of a tensor, alongside the specified axis. - - # Arguments - x: A tensor or variable. - axis: An integer or list of integers in [-rank(x), rank(x)), - the axes to compute the standard deviation. If `None` (default), - computes the standard deviation over all dimensions. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, - the reduced dimension is retained with length 1. - - # Returns - A tensor with the standard deviation of elements of `x`. - {{np_implementation}} - """ - return tf.sqrt(var(x, axis=axis, keepdims=keepdims)) - - -def mean(x, axis=None, keepdims=False): - """Mean of a tensor, alongside the specified axis. - - # Arguments - x: A tensor or variable. - axis: An integer or list of integers in [-rank(x), rank(x)), - the axes to compute the mean. If `None` (default), computes - the mean over all dimensions. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1 for each entry in `axis`. If `keepdims` is `True`, - the reduced dimensions are retained with length 1. - - # Returns - A tensor with the mean of elements of `x`. - {{np_implementation}} - """ - if x.dtype.base_dtype == tf.bool: - x = tf.cast(x, floatx()) - return tf.reduce_mean(x, axis, keepdims) - - -def any(x, axis=None, keepdims=False): - """Bitwise reduction (logical OR). - - # Arguments - x: Tensor or variable. - axis: An integer or list of integers in [-rank(x), rank(x)), - the axes to compute the logical or. If `None` (default), computes - the logical or over all dimensions. - keepdims: whether the drop or broadcast the reduction axes. - - # Returns - A uint8 tensor (0s and 1s). - {{np_implementation}} - """ - x = tf.cast(x, tf.bool) - return tf.reduce_any(x, axis, keepdims) - - -def all(x, axis=None, keepdims=False): - """Bitwise reduction (logical AND). - - # Arguments - x: Tensor or variable. - axis: An integer or list of integers in [-rank(x), rank(x)), - the axes to compute the logical and. If `None` (default), computes - the logical and over all dimensions. - keepdims: whether the drop or broadcast the reduction axes. - - # Returns - A uint8 tensor (0s and 1s). - {{np_implementation}} - """ - x = tf.cast(x, tf.bool) - return tf.reduce_all(x, axis, keepdims) - - -def argmax(x, axis=-1): - """Returns the index of the maximum value along an axis. - - # Arguments - x: Tensor or variable. - axis: axis along which to perform the reduction. - - # Returns - A tensor. - {{np_implementation}} - """ - return tf.argmax(x, axis) - - -def argmin(x, axis=-1): - """Returns the index of the minimum value along an axis. - - # Arguments - x: Tensor or variable. - axis: axis along which to perform the reduction. - - # Returns - A tensor. - {{np_implementation}} - """ - return tf.argmin(x, axis) - - -def square(x): - """Element-wise square. - - # Arguments - x: Tensor or variable. - - # Returns - A tensor. - """ - return tf.square(x) - - -def abs(x): - """Element-wise absolute value. - - # Arguments - x: Tensor or variable. - - # Returns - A tensor. - """ - return tf.abs(x) - - -def sqrt(x): - """Element-wise square root. - - # Arguments - x: Tensor or variable. - - # Returns - A tensor. - {{np_implementation}} - """ - zero = _to_tensor(0., x.dtype.base_dtype) - inf = _to_tensor(np.inf, x.dtype.base_dtype) - x = tf.clip_by_value(x, zero, inf) - return tf.sqrt(x) - - -def exp(x): - """Element-wise exponential. - - # Arguments - x: Tensor or variable. - - # Returns - A tensor. - """ - return tf.exp(x) - - -def log(x): - """Element-wise log. - - # Arguments - x: Tensor or variable. - - # Returns - A tensor. - """ - return tf.log(x) - - -def logsumexp(x, axis=None, keepdims=False): - """Computes log(sum(exp(elements across dimensions of a tensor))). - - This function is more numerically stable than log(sum(exp(x))). - It avoids overflows caused by taking the exp of large inputs and - underflows caused by taking the log of small inputs. - - # Arguments - x: A tensor or variable. - axis: axis: An integer or list of integers in [-rank(x), rank(x)), - the axes to compute the logsumexp. If `None` (default), computes - the logsumexp over all dimensions. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, the reduced dimension is - retained with length 1. - - # Returns - The reduced tensor. - {{np_implementation}} - """ - return tf.reduce_logsumexp(x, axis, keepdims) - - -def round(x): - """Element-wise rounding to the closest integer. - - In case of tie, the rounding mode used is "half to even". - - # Arguments - x: Tensor or variable. - - # Returns - A tensor. - """ - return tf.round(x) - - -def sign(x): - """Element-wise sign. - - # Arguments - x: Tensor or variable. - - # Returns - A tensor. - """ - return tf.sign(x) - - -def pow(x, a): - """Element-wise exponentiation. - - # Arguments - x: Tensor or variable. - a: Python integer. - - # Returns - A tensor. - {{np_implementation}} - """ - return tf.pow(x, a) - - -def clip(x, min_value, max_value): - """Element-wise value clipping. - - # Arguments - x: Tensor or variable. - min_value: Python float, integer or tensor. - max_value: Python float, integer or tensor. - - # Returns - A tensor. - {{np_implementation}} - """ - if (isinstance(min_value, (int, float)) and - isinstance(max_value, (int, float))): - if max_value < min_value: - max_value = min_value - if min_value is None: - min_value = -np.inf - if max_value is None: - max_value = np.inf - return tf.clip_by_value(x, min_value, max_value) - - -def equal(x, y): - """Element-wise equality between two tensors. - - # Arguments - x: Tensor or variable. - y: Tensor or variable. - - # Returns - A bool tensor. - - {{np_implementation}} - """ - return tf.equal(x, y) - - -def not_equal(x, y): - """Element-wise inequality between two tensors. - - # Arguments - x: Tensor or variable. - y: Tensor or variable. - - # Returns - A bool tensor. - - {{np_implementation}} - """ - return tf.not_equal(x, y) - - -def greater(x, y): - """Element-wise truth value of (x > y). - - # Arguments - x: Tensor or variable. - y: Tensor or variable. - - # Returns - A bool tensor. - - {{np_implementation}} - """ - return tf.greater(x, y) - - -def greater_equal(x, y): - """Element-wise truth value of (x >= y). - - # Arguments - x: Tensor or variable. - y: Tensor or variable. - - # Returns - A bool tensor. - - {{np_implementation}} - """ - return tf.greater_equal(x, y) - - -def less(x, y): - """Element-wise truth value of (x < y). - - # Arguments - x: Tensor or variable. - y: Tensor or variable. - - # Returns - A bool tensor. - - {{np_implementation}} - """ - return tf.less(x, y) - - -def less_equal(x, y): - """Element-wise truth value of (x <= y). - - # Arguments - x: Tensor or variable. - y: Tensor or variable. - - # Returns - A bool tensor. - - {{np_implementation}} - """ - return tf.less_equal(x, y) - - -def maximum(x, y): - """Element-wise maximum of two tensors. - - # Arguments - x: Tensor or variable. - y: Tensor or variable. - - # Returns - A tensor. - - {{np_implementation}} - """ - return tf.maximum(x, y) - - -def minimum(x, y): - """Element-wise minimum of two tensors. - - # Arguments - x: Tensor or variable. - y: Tensor or variable. - - # Returns - A tensor. - - {{np_implementation}} - """ - return tf.minimum(x, y) - - -def sin(x): - """Computes sin of x element-wise. - - # Arguments - x: Tensor or variable. - - # Returns - A tensor. - """ - return tf.sin(x) - - -def cos(x): - """Computes cos of x element-wise. - - # Arguments - x: Tensor or variable. - - # Returns - A tensor. - """ - return tf.cos(x) - - -def _regular_normalize_batch_in_training(x, gamma, beta, - reduction_axes, epsilon=1e-3): - """Non-fused version of `normalize_batch_in_training`. - - # Arguments - x: Input tensor or variable. - gamma: Tensor by which to scale the input. - beta: Tensor with which to center the input. - reduction_axes: iterable of integers, - axes over which to normalize. - epsilon: Fuzz factor. - - # Returns - A tuple length of 3, `(normalized_tensor, mean, variance)`. - """ - mean, var = tf.nn.moments(x, reduction_axes, - None, None, False) - normed = tf.nn.batch_normalization(x, mean, var, - beta, gamma, - epsilon) - return normed, mean, var - - -def _broadcast_normalize_batch_in_training(x, gamma, beta, - reduction_axes, epsilon=1e-3): - """Non-fused, broadcast version of `normalize_batch_in_training`. - - # Arguments - x: Input tensor or variable. - gamma: Tensor by which to scale the input. - beta: Tensor with which to center the input. - reduction_axes: iterable of integers, - axes over which to normalize. - epsilon: Fuzz factor. - - # Returns - A tuple length of 3, `(normalized_tensor, mean, variance)`. - """ - mean, var = tf.nn.moments(x, reduction_axes, - None, None, False) - target_shape = [] - for axis in range(ndim(x)): - if axis in reduction_axes: - target_shape.append(1) - else: - target_shape.append(tf.shape(x)[axis]) - target_shape = tf.stack(target_shape) - - broadcast_mean = tf.reshape(mean, target_shape) - broadcast_var = tf.reshape(var, target_shape) - if gamma is None: - broadcast_gamma = None - else: - broadcast_gamma = tf.reshape(gamma, target_shape) - if beta is None: - broadcast_beta = None - else: - broadcast_beta = tf.reshape(beta, target_shape) - - normed = tf.nn.batch_normalization( - x, - broadcast_mean, - broadcast_var, - broadcast_beta, - broadcast_gamma, - epsilon) - return normed, mean, var - - -def _fused_normalize_batch_in_training(x, gamma, beta, reduction_axes, - epsilon=1e-3): - """Fused version of `normalize_batch_in_training`. - - # Arguments - x: Input tensor or variable. - gamma: Tensor by which to scale the input. - beta: Tensor with which to center the input. - reduction_axes: iterable of integers, - axes over which to normalize. - epsilon: Fuzz factor. - - # Returns - A tuple length of 3, `(normalized_tensor, mean, variance)`. - """ - if list(reduction_axes) == [0, 1, 2]: - normalization_axis = 3 - tf_data_format = 'NHWC' - else: - normalization_axis = 1 - tf_data_format = 'NCHW' - - if gamma is None: - gamma = tf.constant(1.0, - dtype=x.dtype, - shape=[x.get_shape()[normalization_axis]]) - if beta is None: - beta = tf.constant(0.0, - dtype=x.dtype, - shape=[x.get_shape()[normalization_axis]]) - - if gamma.dtype != tf.float32: - gamma = tf.cast(gamma, tf.float32) - if beta.dtype != tf.float32: - beta = tf.cast(beta, tf.float32) - - return tf.nn.fused_batch_norm( - x, - gamma, - beta, - epsilon=epsilon, - data_format=tf_data_format) - - -def normalize_batch_in_training(x, gamma, beta, - reduction_axes, epsilon=1e-3): - """Computes mean and std for batch then apply batch_normalization on batch. - - # Arguments - x: Input tensor or variable. - gamma: Tensor by which to scale the input. - beta: Tensor with which to center the input. - reduction_axes: iterable of integers, - axes over which to normalize. - epsilon: Fuzz factor. - - # Returns - A tuple length of 3, `(normalized_tensor, mean, variance)`. - """ - if ndim(x) == 4 and list(reduction_axes) in [[0, 1, 2], [0, 2, 3]]: - if not _has_nchw_support() and list(reduction_axes) == [0, 2, 3]: - return _broadcast_normalize_batch_in_training(x, gamma, beta, - reduction_axes, - epsilon=epsilon) - return _fused_normalize_batch_in_training( - x, gamma, beta, reduction_axes, - epsilon=epsilon) - else: - if sorted(reduction_axes) == list(range(ndim(x)))[:-1]: - return _regular_normalize_batch_in_training(x, gamma, beta, - reduction_axes, - epsilon=epsilon) - else: - return _broadcast_normalize_batch_in_training(x, gamma, beta, - reduction_axes, - epsilon=epsilon) - - -def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3): - """Applies batch normalization on x given mean, var, beta and gamma. - - I.e. returns: - `output = (x - mean) / sqrt(var + epsilon) * gamma + beta` - - # Arguments - x: Input tensor or variable. - mean: Mean of batch. - var: Variance of batch. - beta: Tensor with which to center the input. - gamma: Tensor by which to scale the input. - axis: Integer, the axis that should be normalized. - (typically the features axis). - epsilon: Fuzz factor. - - # Returns - A tensor. - """ - if ndim(x) == 4: - # The CPU implementation of FusedBatchNorm only support NHWC - if axis == 1 or axis == -3: - tf_data_format = 'NCHW' - elif axis == 3 or axis == -1: - tf_data_format = 'NHWC' - else: - tf_data_format = None - - if (tf_data_format == 'NHWC' - or tf_data_format == 'NCHW' - and _has_nchw_support()): - # The mean / var / beta / gamma may be processed by broadcast - # so it may have extra axes with 1, - # it is not needed and should be removed - if ndim(mean) > 1: - mean = tf.reshape(mean, [-1]) - if ndim(var) > 1: - var = tf.reshape(var, [-1]) - if beta is None: - beta = zeros_like(mean) - elif ndim(beta) > 1: - beta = tf.reshape(beta, [-1]) - if gamma is None: - gamma = ones_like(mean) - elif ndim(gamma) > 1: - gamma = tf.reshape(gamma, [-1]) - - if gamma.dtype != tf.float32: - gamma = tf.cast(gamma, tf.float32) - if beta.dtype != tf.float32: - beta = tf.cast(beta, tf.float32) - if mean.dtype != tf.float32: - mean = tf.cast(mean, tf.float32) - if var.dtype != tf.float32: - var = tf.cast(var, tf.float32) - - y, _, _ = tf.nn.fused_batch_norm( - x, - gamma, - beta, - epsilon=epsilon, - mean=mean, - variance=var, - data_format=tf_data_format, - is_training=False - ) - return y - # default - return tf.nn.batch_normalization(x, mean, var, beta, gamma, epsilon) - - -# SHAPE OPERATIONS - -def concatenate(tensors, axis=-1): - """Concatenates a list of tensors alongside the specified axis. - - # Arguments - tensors: list of tensors to concatenate. - axis: concatenation axis. - - # Returns - A tensor. - """ - if axis < 0: - rank = ndim(tensors[0]) - if rank: - axis %= rank - else: - axis = 0 - - if py_all([is_sparse(x) for x in tensors]): - return tf.sparse_concat(axis, tensors) - else: - return tf.concat([to_dense(x) for x in tensors], axis) - - -def reshape(x, shape): - """Reshapes a tensor to the specified shape. - - # Arguments - x: Tensor or variable. - shape: Target shape tuple. - - # Returns - A tensor. - """ - return tf.reshape(x, shape) - - -def permute_dimensions(x, pattern): - """Permutes axes in a tensor. - - # Arguments - x: Tensor or variable. - pattern: A tuple of - dimension indices, e.g. `(0, 2, 1)`. - - # Returns - A tensor. - """ - return tf.transpose(x, perm=pattern) - - -def resize_images(x, - height_factor, - width_factor, - data_format, - interpolation='nearest'): - """Resizes the images contained in a 4D tensor. - - # Arguments - x: Tensor or variable to resize. - height_factor: Positive integer. - width_factor: Positive integer. - data_format: string, `"channels_last"` or `"channels_first"`. - interpolation: A string, one of `nearest` or `bilinear`. - - # Returns - A tensor. - - # Raises - ValueError: if `data_format` is - neither `"channels_last"` or `"channels_first"`. - """ - if data_format == 'channels_first': - rows, cols = 2, 3 - else: - rows, cols = 1, 2 - - original_shape = int_shape(x) - new_shape = tf.shape(x)[rows:cols + 1] - new_shape *= tf.constant(np.array([height_factor, - width_factor], dtype='int32')) - - if data_format == 'channels_first': - x = permute_dimensions(x, [0, 2, 3, 1]) - if interpolation == 'nearest': - x = tf.image.resize_nearest_neighbor(x, new_shape) - elif interpolation == 'bilinear': - x = tf.image.resize_bilinear(x, new_shape) - else: - raise ValueError('interpolation should be one ' - 'of "nearest" or "bilinear".') - if data_format == 'channels_first': - x = permute_dimensions(x, [0, 3, 1, 2]) - - if original_shape[rows] is None: - new_height = None - else: - new_height = original_shape[rows] * height_factor - - if original_shape[cols] is None: - new_width = None - else: - new_width = original_shape[cols] * width_factor - - output_shape = (None, new_height, new_width, None) - x.set_shape(transpose_shape( - output_shape, data_format, spatial_axes=(1, 2))) - return x - - -def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): - """Resizes the volume contained in a 5D tensor. - - # Arguments - x: Tensor or variable to resize. - depth_factor: Positive integer. - height_factor: Positive integer. - width_factor: Positive integer. - data_format: string, `"channels_last"` or `"channels_first"`. - - # Returns - A tensor. - - # Raises - ValueError: if `data_format` is - neither `"channels_last"` or `"channels_first"`. - """ - if data_format == 'channels_first': - output = repeat_elements(x, depth_factor, axis=2) - output = repeat_elements(output, height_factor, axis=3) - output = repeat_elements(output, width_factor, axis=4) - return output - elif data_format == 'channels_last': - output = repeat_elements(x, depth_factor, axis=1) - output = repeat_elements(output, height_factor, axis=2) - output = repeat_elements(output, width_factor, axis=3) - return output - else: - raise ValueError('Unknown data_format: ' + str(data_format)) - - -def repeat_elements(x, rep, axis): - """Repeats the elements of a tensor along an axis, like `np.repeat`. - - If `x` has shape `(s1, s2, s3)` and `axis` is `1`, the output - will have shape `(s1, s2 * rep, s3)`. - - # Arguments - x: Tensor or variable. - rep: Python integer, number of times to repeat. - axis: Axis along which to repeat. - - # Returns - A tensor. - """ - x_shape = x.get_shape().as_list() - # For static axis - if x_shape[axis] is not None: - # slices along the repeat axis - splits = tf.split(value=x, num_or_size_splits=x_shape[axis], axis=axis) - # repeat each slice the given number of reps - x_rep = [s for s in splits for _ in range(rep)] - return concatenate(x_rep, axis) - - # Here we use tf.tile to mimic behavior of np.repeat so that - # we can handle dynamic shapes (that include None). - # To do that, we need an auxiliary axis to repeat elements along - # it and then merge them along the desired axis. - - # Repeating - auxiliary_axis = axis + 1 - x_shape = tf.shape(x) - x_rep = tf.expand_dims(x, axis=auxiliary_axis) - reps = np.ones(len(x.get_shape()) + 1) - reps[auxiliary_axis] = rep - x_rep = tf.tile(x_rep, reps) - - # Merging - reps = np.delete(reps, auxiliary_axis) - reps[axis] = rep - reps = tf.constant(reps, dtype='int32') - x_shape = x_shape * reps - x_rep = tf.reshape(x_rep, x_shape) - - # Fix shape representation - x_shape = x.get_shape().as_list() - x_rep.set_shape(x_shape) - x_rep._keras_shape = tuple(x_shape) - return x_rep - - -def repeat(x, n): - """Repeats a 2D tensor. - - if `x` has shape (samples, dim) and `n` is `2`, - the output will have shape `(samples, 2, dim)`. - - # Arguments - x: Tensor or variable. - n: Python integer, number of times to repeat. - - # Returns - A tensor. - """ - assert ndim(x) == 2 - x = tf.expand_dims(x, 1) - pattern = tf.stack([1, n, 1]) - return tf.tile(x, pattern) - - -def arange(start, stop=None, step=1, dtype='int32'): - """Creates a 1D tensor containing a sequence of integers. - - The function arguments use the same convention as - Theano's arange: if only one argument is provided, - it is in fact the "stop" argument and "start" is 0. - - The default type of the returned tensor is `'int32'` to - match TensorFlow's default. - - # Arguments - start: Start value. - stop: Stop value. - step: Difference between two successive values. - dtype: Integer dtype to use. - - # Returns - An integer tensor. - - """ - # Match the behavior of numpy and Theano by returning an empty sequence. - if stop is None: - try: - if start < 0: - start = 0 - except TypeError: - # Handle case where start is a tensor - start = tf.cond(start < 0, - true_fn=lambda: tf.constant(0, dtype=start.dtype), - false_fn=lambda: start) - - result = tf.range(start, limit=stop, delta=step, name='arange') - if dtype != 'int32': - result = cast(result, dtype) - return result - - -def tile(x, n): - """Creates a tensor by tiling `x` by `n`. - - # Arguments - x: A tensor or variable - n: A list of integer. The length must be the same as the number of - dimensions in `x`. - - # Returns - A tiled tensor. - - # Example - ```python - >>> from keras import backend as K - >>> kvar = K.variable(np.random.random((2, 3))) - >>> kvar_tile = K.tile(K.eye(2), (2, 3)) - >>> K.eval(kvar_tile) - array([[1., 0., 1., 0., 1., 0.], - [0., 1., 0., 1., 0., 1.], - [1., 0., 1., 0., 1., 0.], - [0., 1., 0., 1., 0., 1.]], dtype=float32) - ``` - {{np_implementation}} - """ - if isinstance(n, int): - n = (n,) - elif isinstance(n, list): - n = tuple(n) - - shape = int_shape(x) - if len(n) < len(shape): # Padding the axis - n = tuple([1 for _ in range(len(shape) - len(n))]) + n - elif len(n) != len(shape): - raise NotImplementedError - - return tf.tile(x, n) - - -def flatten(x): - """Flatten a tensor. - - # Arguments - x: A tensor or variable. - - # Returns - A tensor, reshaped into 1-D - """ - return tf.reshape(x, [-1]) - - -def batch_flatten(x): - """Turn a nD tensor into a 2D tensor with same 0th dimension. - - In other words, it flattens each data samples of a batch. - - # Arguments - x: A tensor or variable. - - # Returns - A tensor. - """ - x = tf.reshape(x, tf.stack([-1, prod(shape(x)[1:])])) - return x - - -def expand_dims(x, axis=-1): - """Adds a 1-sized dimension at index "axis". - - # Arguments - x: A tensor or variable. - axis: Position where to add a new axis. - - # Returns - A tensor with expanded dimensions. - """ - return tf.expand_dims(x, axis) - - -def squeeze(x, axis): - """Removes a 1-dimension from the tensor at index "axis". - - # Arguments - x: A tensor or variable. - axis: Axis to drop. - - # Returns - A tensor with the same data as `x` but reduced dimensions. - """ - return tf.squeeze(x, [axis]) - - -def temporal_padding(x, padding=(1, 1)): - """Pads the middle dimension of a 3D tensor. - - # Arguments - x: Tensor or variable. - padding: Tuple of 2 integers, how many zeros to - add at the start and end of dim 1. - - # Returns - A padded 3D tensor. - """ - assert len(padding) == 2 - pattern = [[0, 0], [padding[0], padding[1]], [0, 0]] - return tf.pad(x, pattern) - - -def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None): - """Pads the 2nd and 3rd dimensions of a 4D tensor. - - # Arguments - x: Tensor or variable. - padding: Tuple of 2 tuples, padding pattern. - data_format: string, `"channels_last"` or `"channels_first"`. - - # Returns - A padded 4D tensor. - - # Raises - ValueError: if `data_format` is - neither `"channels_last"` or `"channels_first"`. - """ - assert len(padding) == 2 - assert len(padding[0]) == 2 - assert len(padding[1]) == 2 - data_format = normalize_data_format(data_format) - - pattern = [[0, 0], - list(padding[0]), - list(padding[1]), - [0, 0]] - pattern = transpose_shape(pattern, data_format, spatial_axes=(1, 2)) - return tf.pad(x, pattern) - - -def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None): - """Pads 5D tensor with zeros along the depth, height, width dimensions. - - Pads these dimensions with respectively - "padding[0]", "padding[1]" and "padding[2]" zeros left and right. - - For 'channels_last' data_format, - the 2nd, 3rd and 4th dimension will be padded. - For 'channels_first' data_format, - the 3rd, 4th and 5th dimension will be padded. - - # Arguments - x: Tensor or variable. - padding: Tuple of 3 tuples, padding pattern. - data_format: string, `"channels_last"` or `"channels_first"`. - - # Returns - A padded 5D tensor. - - # Raises - ValueError: if `data_format` is - neither `"channels_last"` or `"channels_first"`. - - """ - assert len(padding) == 3 - assert len(padding[0]) == 2 - assert len(padding[1]) == 2 - assert len(padding[2]) == 2 - data_format = normalize_data_format(data_format) - - pattern = [ - [0, 0], - [padding[0][0], padding[0][1]], - [padding[1][0], padding[1][1]], - [padding[2][0], padding[2][1]], - [0, 0] - ] - pattern = transpose_shape(pattern, data_format, spatial_axes=(1, 2, 3)) - - return tf.pad(x, pattern) - - -def stack(x, axis=0): - """Stacks a list of rank `R` tensors into a rank `R+1` tensor. - - # Arguments - x: List of tensors. - axis: Axis along which to perform stacking. - - # Returns - A tensor. - - {{np_implementation}} - """ - return tf.stack(x, axis=axis) - - -def one_hot(indices, num_classes): - """Computes the one-hot representation of an integer tensor. - - # Arguments - indices: nD integer tensor of shape - `(batch_size, dim1, dim2, ... dim(n-1))` - num_classes: Integer, number of classes to consider. - - # Returns - (n + 1)D one hot representation of the input - with shape `(batch_size, dim1, dim2, ... dim(n-1), num_classes)` - """ - return tf.one_hot(indices, depth=num_classes, axis=-1) - - -def reverse(x, axes): - """Reverses a tensor along the specified axes. - - # Arguments - x: Tensor to reverse. - axes: Integer or iterable of integers. - Axes to reverse. - - # Returns - A tensor. - - {{np_implementation}} - """ - if isinstance(axes, int): - axes = [axes] - return tf.reverse(x, axes) - - -def slice(x, start, size): - """Extracts a slice from a tensor. - - # Arguments - x: Input tensor. - start: Integer list/tuple or tensor - indicating the start indices of the slice - along each axis. - size: Integer list/tuple or tensor - indicating how many dimensions to slice - along each axis. - - # Returns - A sliced tensor: - ```python - new_x = x[start[0]: start[0] + size[0], ..., start[-1]: start[-1] + size[-1]] - ``` - - # Raises - ValueError: if the dimension and the size of indices mismatches. - - {{np_implementation}} - """ - x_shape = int_shape(x) - if (x_shape is not None) and (x_shape[0] is not None): - len_start = int_shape(start)[0] if is_tensor(start) else len(start) - len_size = int_shape(size)[0] if is_tensor(size) else len(size) - if not (len(int_shape(x)) == len_start == len_size): - raise ValueError( - 'The dimension and the size of indices should match.') - return tf.slice(x, start, size) - - -# VALUE MANIPULATION - - -def get_value(x): - """Returns the value of a variable. - - # Arguments - x: input variable. - - # Returns - A Numpy array. - """ - return x.eval(session=get_session()) - - -def batch_get_value(ops): - """Returns the value of more than one tensor variable. - - # Arguments - ops: list of ops to run. - - # Returns - A list of Numpy arrays. - """ - if ops: - return get_session().run(ops) - else: - return [] - - -def set_value(x, value): - """Sets the value of a variable, from a Numpy array. - - # Arguments - x: Tensor to set to a new value. - value: Value to set the tensor to, as a Numpy array - (of the same shape). - """ - value = np.asarray(value, dtype=dtype(x)) - tf_dtype = tf.as_dtype(x.dtype.name.split('_')[0]) - if hasattr(x, '_assign_placeholder'): - assign_placeholder = x._assign_placeholder - assign_op = x._assign_op - else: - assign_placeholder = tf.placeholder(tf_dtype, shape=value.shape) - assign_op = x.assign(assign_placeholder) - x._assign_placeholder = assign_placeholder - x._assign_op = assign_op - get_session().run(assign_op, feed_dict={assign_placeholder: value}) - - -def batch_set_value(tuples): - """Sets the values of many tensor variables at once. - - # Arguments - tuples: a list of tuples `(tensor, value)`. - `value` should be a Numpy array. - """ - if tuples: - assign_ops = [] - feed_dict = {} - for x, value in tuples: - value = np.asarray(value, dtype=dtype(x)) - tf_dtype = tf.as_dtype(x.dtype.name.split('_')[0]) - if hasattr(x, '_assign_placeholder'): - assign_placeholder = x._assign_placeholder - assign_op = x._assign_op - else: - assign_placeholder = tf.placeholder(tf_dtype, - shape=value.shape) - assign_op = x.assign(assign_placeholder) - x._assign_placeholder = assign_placeholder - x._assign_op = assign_op - assign_ops.append(assign_op) - feed_dict[assign_placeholder] = value - get_session().run(assign_ops, feed_dict=feed_dict) - - -def get_variable_shape(x): - """Returns the shape of a variable. - - # Arguments - x: A variable. - - # Returns - A tuple of integers. - """ - return int_shape(x) - - -def print_tensor(x, message=''): - """Prints `message` and the tensor value when evaluated. - - Note that `print_tensor` returns a new tensor identical to `x` - which should be used in the following code. Otherwise the - print operation is not taken into account during evaluation. - - # Example - ```python - >>> x = K.print_tensor(x, message="x is: ") - ``` - - # Arguments - x: Tensor to print. - message: Message to print jointly with the tensor. - - # Returns - The same tensor `x`, unchanged. - """ - return tf.Print(x, [x], message) - - -# GRAPH MANIPULATION - -class Function(object): - """Runs a computation graph. - - It's possible to pass arguments to `tf.Session.run()` via `session_kwargs`. - In particular additional operations via `fetches` argument and additional - tensor substitutions via `feed_dict` arguments. Note that given - substitutions are merged with substitutions from `inputs`. Even though - `feed_dict` is passed once in the constructor (called in `model.compile()`) - we can modify the values in the dictionary. Through this feed_dict we can - provide additional substitutions besides Keras inputs. - - # Arguments - inputs: Feed placeholders to the computation graph. - outputs: Output tensors to fetch. - updates: Additional update ops to be run at function call. - name: a name to help users identify what this function does. - session_kwargs: arguments to `tf.Session.run()`: - `fetches`, `feed_dict`, - `options`, `run_metadata` - """ - - def __init__(self, inputs, outputs, - updates=None, - name=None, - **session_kwargs): - updates = updates or [] - if not isinstance(inputs, (list, tuple)): - raise TypeError('`inputs` to a TensorFlow backend function ' - 'should be a list or tuple.') - if not isinstance(outputs, (list, tuple)): - raise TypeError('`outputs` of a TensorFlow backend function ' - 'should be a list or tuple.') - if not isinstance(updates, (list, tuple)): - raise TypeError('`updates` in a TensorFlow backend function ' - 'should be a list or tuple.') - self.inputs = list(inputs) - self.outputs = list(outputs) - with tf.control_dependencies(self.outputs): - updates_ops = [] - for update in updates: - if isinstance(update, tuple): - p, new_p = update - updates_ops.append(tf.assign(p, new_p)) - else: - # assumed already an op - updates_ops.append(update) - self.updates_op = tf.group(*updates_ops) - self.name = name - # additional tensor substitutions - self.feed_dict = session_kwargs.pop('feed_dict', {}) - # additional operations - self.fetches = session_kwargs.pop('fetches', []) - if not isinstance(self.fetches, list): - self.fetches = [self.fetches] - # The main use case of `fetches` being passed to a model is the ability - # to run custom updates - # (since the outputs of fetches are never returned). - # This requires us to wrap fetches in `identity` ops. - self.fetches = [tf.identity(x) for x in self.fetches] - # self.session_kwargs is used for _legacy_call - self.session_kwargs = session_kwargs.copy() - self.run_options = session_kwargs.pop('options', None) - self.run_metadata = session_kwargs.pop('run_metadata', None) - if session_kwargs: - raise ValueError('Some keys in session_kwargs are not ' - 'supported at this ' - 'time: %s', session_kwargs.keys()) - self._callable_fn = None - self._feed_arrays = None - self._feed_symbols = None - self._symbol_vals = None - self._session = None - - def _make_callable(self, feed_arrays, feed_symbols, symbol_vals, session): - """Generates a callable that runs the graph. - - # Arguments - feed_arrays: List of input tensors to be fed - Numpy arrays at runtime. - feed_symbols: List of input tensors to be fed - symbolic tensors at runtime. - symbol_vals: List of symbolic tensors to be fed to `feed_symbols`. - session: Session to use to generate the callable. - - # Returns - Function that runs the graph according to the above options. - """ - # Prepare callable options. - callable_opts = config_pb2.CallableOptions() - # Handle external-data feed. - for x in feed_arrays: - callable_opts.feed.append(x.name) - if self.feed_dict: - for key in sorted(self.feed_dict.keys()): - callable_opts.feed.append(key.name) - # Handle symbolic feed. - for x, y in zip(feed_symbols, symbol_vals): - connection = callable_opts.tensor_connection.add() - if x.dtype != y.dtype: - y = tf.cast(y, dtype=x.dtype) - from_tensor = tf_ops._as_graph_element(y) - if from_tensor is None: - from_tensor = y - connection.from_tensor = from_tensor.name # Data tensor - connection.to_tensor = x.name # Placeholder - # Handle fetches. - for x in self.outputs + self.fetches: - callable_opts.fetch.append(x.name) - # Handle updates. - callable_opts.target.append(self.updates_op.name) - # Handle run_options. - if self.run_options: - callable_opts.run_options.CopyFrom(self.run_options) - # Create callable. - callable_fn = session._make_callable_from_options(callable_opts) - # Cache parameters corresponding to the generated callable, so that - # we can detect future mismatches and refresh the callable. - self._callable_fn = callable_fn - self._feed_arrays = feed_arrays - self._feed_symbols = feed_symbols - self._symbol_vals = symbol_vals - self._session = session - - def _call(self, inputs): - if not isinstance(inputs, (list, tuple)): - raise TypeError('`inputs` should be a list or tuple.') - - session = get_session() - feed_arrays = [] - array_vals = [] - feed_symbols = [] - symbol_vals = [] - for tensor, value in zip(self.inputs, inputs): - if value is None: - continue - if is_tensor(value): - # Case: feeding symbolic tensor. - feed_symbols.append(tensor) - symbol_vals.append(value) - else: - feed_arrays.append(tensor) - # We need to do array conversion and type casting - # at this level, since - # `callable_fn` only supports exact matches. - array_vals.append( - np.asarray(value, - dtype=tf.as_dtype(tensor.dtype).as_numpy_dtype)) - if self.feed_dict: - for key in sorted(self.feed_dict.keys()): - array_vals.append( - np.asarray(self.feed_dict[key], - dtype=tf.as_dtype(key.dtype).as_numpy_dtype)) - - # Refresh callable if anything has changed. - if (self._callable_fn is None or - feed_arrays != self._feed_arrays or - symbol_vals != self._symbol_vals or - feed_symbols != self._feed_symbols or - session != self._session): - self._make_callable(feed_arrays, - feed_symbols, - symbol_vals, - session) - if self.run_metadata: - fetched = self._callable_fn( - *array_vals, run_metadata=self.run_metadata) - else: - fetched = self._callable_fn(*array_vals) - return fetched[:len(self.outputs)] - - def _legacy_call(self, inputs): - if not isinstance(inputs, (list, tuple)): - raise TypeError('`inputs` should be a list or tuple.') - feed_dict = self.feed_dict.copy() - for tensor, value in zip(self.inputs, inputs): - if is_sparse(tensor): - sparse_coo = value.tocoo() - indices = np.concatenate( - (np.expand_dims(sparse_coo.row, 1), - np.expand_dims(sparse_coo.col, 1)), 1) - value = (indices, sparse_coo.data, sparse_coo.shape) - feed_dict[tensor] = value - fetches = self.outputs + [self.updates_op] + self.fetches - session = get_session() - updated = session.run(fetches=fetches, feed_dict=feed_dict, - **self.session_kwargs) - return updated[:len(self.outputs)] - - def __call__(self, inputs): - if hasattr(get_session(), '_make_callable_from_options'): - if py_any(is_sparse(x) for x in self.inputs): - if py_any(is_tensor(x) for x in inputs): - raise ValueError( - 'Feeding from symbolic tensors is not ' - 'supported with sparse inputs.') - return self._legacy_call(inputs) - - # callable generated by Session._make_callable_from_options accepts - # `run_metadata` keyword argument since TF 1.10 - if self.run_metadata: - current_version = StrictVersion(tf.__version__.split('-')[0]) - if current_version < StrictVersion('1.10.0'): - if py_any(is_tensor(x) for x in inputs): - raise ValueError( - 'In order to feed symbolic tensors ' - 'to a Keras model and set ' - '`run_metadata`, you need tensorflow 1.10 or higher.') - return self._legacy_call(inputs) - - return self._call(inputs) - else: - if py_any(is_tensor(x) for x in inputs): - raise ValueError( - 'In order to feed symbolic tensors to a Keras model ' - 'in TensorFlow, you need tensorflow 1.8 or higher.') - return self._legacy_call(inputs) - - -def function(inputs, outputs, updates=None, **kwargs): - """Instantiates a Keras function. - - # Arguments - inputs: List of placeholder tensors. - outputs: List of output tensors. - updates: List of update ops. - **kwargs: Passed to `tf.Session.run`. - - # Returns - Output values as Numpy arrays. - - # Raises - ValueError: if invalid kwargs are passed in. - """ - if kwargs: - for key in kwargs: - session_has_key = has_arg(tf.Session.run, key, True) - function_has_key = has_arg(Function.__init__, key, True) - if not (session_has_key or function_has_key): - raise ValueError('Invalid argument "%s" passed to K.function ' - 'with TensorFlow backend' % key) - return Function(inputs, outputs, updates=updates, **kwargs) - - -def gradients(loss, variables): - """Returns the gradients of `loss` w.r.t. `variables`. - - # Arguments - loss: Scalar tensor to minimize. - variables: List of variables. - - # Returns - A gradients tensor. - """ - return tf.gradients(loss, variables, colocate_gradients_with_ops=True) - - -def stop_gradient(variables): - """Returns `variables` but with zero gradient w.r.t. every other variable. - - # Arguments - variables: tensor or list of tensors to consider constant with respect - to any other variable. - - # Returns - A single tensor or a list of tensors (depending on the passed argument) - that has constant gradient with respect to any other variable. - """ - if isinstance(variables, (list, tuple)): - return map(tf.stop_gradient, variables) - else: - return tf.stop_gradient(variables) - - -# CONTROL FLOW - -def rnn(step_function, inputs, initial_states, - go_backwards=False, mask=None, constants=None, - unroll=False, input_length=None): - """Iterates over the time dimension of a tensor. - - # Arguments - step_function: - Parameters: - inputs: Tensor with shape (samples, ...) (no time dimension), - representing input for the batch of samples at a certain - time step. - states: List of tensors. - Returns: - outputs: Tensor with shape (samples, ...) (no time dimension), - new_states: List of tensors, same length and shapes - as 'states'. - inputs: Tensor of temporal data of shape (samples, time, ...) - (at least 3D). - initial_states: Tensor with shape (samples, ...) (no time dimension), - containing the initial values for the states used in - the step function. - go_backwards: Boolean. If True, do the iteration over the time - dimension in reverse order and return the reversed sequence. - mask: Binary tensor with shape (samples, time), - with a zero for every element that is masked. - constants: A list of constant values passed at each step. - unroll: Whether to unroll the RNN or to use a symbolic loop - (`while_loop` or `scan` depending on backend). - input_length: Static number of timesteps in the input. - - # Returns - A tuple, `(last_output, outputs, new_states)`. - - last_output: The latest output of the rnn, of shape `(samples, ...)` - outputs: Tensor with shape `(samples, time, ...)` where each - entry `outputs[s, t]` is the output of the step function - at time `t` for sample `s`. - new_states: List of tensors, latest states returned by - the step function, of shape `(samples, ...)`. - - # Raises - ValueError: If input dimension is less than 3. - ValueError: If `unroll` is `True` - but input timestep is not a fixed number. - ValueError: If `mask` is provided (not `None`) - but states is not provided (`len(states)` == 0). - - {{np_implementation}} - """ - ndim = len(inputs.shape) - if ndim < 3: - raise ValueError('Input should be at least 3D.') - - # Transpose to time-major, i.e. - # from (batch, time, ...) to (time, batch, ...) - axes = [1, 0] + list(range(2, ndim)) - inputs = tf.transpose(inputs, (axes)) - - if mask is not None: - if mask.dtype != tf.bool: - mask = tf.cast(mask, tf.bool) - if len(mask.shape) != 2: - raise ValueError( - 'mask should have `shape=(samples, time)`, ' - 'got {}'.format(mask.shape)) - mask = tf.transpose(mask, [1, 0]) - - def get_matching_mask(mask_t, ref_tensor_t): - # tf.where needs its condition tensor - # to be the same shape as its two - # result tensors - ndim = len(ref_tensor_t.shape) - for _ in range(ndim - 1): - mask_t = expand_dims(mask_t) - add_shape = tf.shape(ref_tensor_t)[1:] - multiple = tf.concat([[1], add_shape], 0) - return tf.tile(mask_t, multiple) - - if constants is None: - constants = [] - - uses_learning_phase = [False] - - if unroll: - if not inputs.shape[0]: - raise ValueError('Unrolling requires a ' - 'fixed number of timesteps.') - states = initial_states - successive_states = [] - successive_outputs = [] - - input_list = tf.unstack(inputs) - if go_backwards: - input_list.reverse() - - if mask is not None: - mask_list = tf.unstack(mask) - if go_backwards: - mask_list.reverse() - - for inp, mask_t in zip(input_list, mask_list): - output, new_states = step_function(inp, states + constants) - if getattr(output, '_uses_learning_phase', False): - uses_learning_phase[0] = True - - if not successive_outputs: - prev_output = zeros_like(output) - else: - prev_output = successive_outputs[-1] - - output_mask_t = get_matching_mask(mask_t, output) - output = tf.where(output_mask_t, output, prev_output) - - return_states = [] - for state, new_state in zip(states, new_states): - state_mask_t = get_matching_mask(mask_t, new_state) - return_states.append(tf.where(state_mask_t, - new_state, - state)) - states = return_states - successive_outputs.append(output) - successive_states.append(states) - last_output = successive_outputs[-1] - new_states = successive_states[-1] - outputs = tf.stack(successive_outputs) - else: - for inp in input_list: - output, states = step_function(inp, states + constants) - if getattr(output, '_uses_learning_phase', False): - uses_learning_phase[0] = True - successive_outputs.append(output) - successive_states.append(states) - last_output = successive_outputs[-1] - new_states = successive_states[-1] - outputs = tf.stack(successive_outputs) - - else: - if go_backwards: - inputs = reverse(inputs, 0) - - states = tuple(initial_states) - - time_steps = tf.shape(inputs)[0] - output, _ = step_function(inputs[0], initial_states + constants) - output_ta = tensor_array_ops.TensorArray( - dtype=output.dtype, - size=time_steps, - tensor_array_name='output_ta') - initial_output = zeros_like(output) - input_ta = tensor_array_ops.TensorArray( - dtype=inputs.dtype, - size=time_steps, - tensor_array_name='input_ta') - input_ta = input_ta.unstack(inputs) - time = tf.constant(0, dtype='int32', name='time') - while_loop_kwargs = { - 'cond': lambda time, *_: time < time_steps, - 'parallel_iterations': 32, - 'swap_memory': True, - 'maximum_iterations': input_length} - - if mask is not None: - if go_backwards: - mask = reverse(mask, 0) - - mask_ta = tensor_array_ops.TensorArray( - dtype=tf.bool, - size=time_steps, - tensor_array_name='mask_ta') - mask_ta = mask_ta.unstack(mask) - - def _step(time, output_ta_t, output_tm1, *states): - """RNN step function. - - # Arguments - time: Current timestep value. - output_ta_t: TensorArray. - output_tm1: output Tensor from previous timestep - *states: List of states. - - # Returns - Tuple: `(time + 1,output_ta_t) + tuple(new_states)` - """ - current_input = input_ta.read(time) - mask_t = mask_ta.read(time) - output, new_states = step_function(current_input, - tuple(states) + - tuple(constants)) - if getattr(output, '_uses_learning_phase', False): - uses_learning_phase[0] = True - for state, new_state in zip(states, new_states): - new_state.set_shape(state.shape) - - output_mask_t = get_matching_mask(mask_t, output) - output = tf.where(output_mask_t, output, output_tm1) - - new_states = [tf.where(get_matching_mask(mask_t, new_states[i]), - new_states[i], - states[i]) for i in range(len(states))] - - output_ta_t = output_ta_t.write(time, output) - return (time + 1, output_ta_t, output) + tuple(new_states) - - final_outputs = control_flow_ops.while_loop( - body=_step, - loop_vars=(time, output_ta, initial_output) + states, - **while_loop_kwargs) - new_states = final_outputs[3:] # skip output_tm1 - else: - def _step(time, output_ta_t, *states): - """RNN step function. - - # Arguments - time: Current timestep value. - output_ta_t: TensorArray. - *states: List of states. - - # Returns - Tuple: `(time + 1,output_ta_t) + tuple(new_states)` - """ - current_input = input_ta.read(time) - output, new_states = step_function(current_input, - tuple(states) + - tuple(constants)) - if getattr(output, '_uses_learning_phase', False): - uses_learning_phase[0] = True - for state, new_state in zip(states, new_states): - new_state.set_shape(state.shape) - output_ta_t = output_ta_t.write(time, output) - return (time + 1, output_ta_t) + tuple(new_states) - - final_outputs = control_flow_ops.while_loop( - body=_step, - loop_vars=(time, output_ta) + states, - **while_loop_kwargs) - new_states = final_outputs[2:] - - last_time = final_outputs[0] - output_ta = final_outputs[1] - outputs = output_ta.stack() - last_output = output_ta.read(last_time - 1) - - axes = [1, 0] + list(range(2, len(outputs.shape))) - outputs = tf.transpose(outputs, axes) - last_output._uses_learning_phase = uses_learning_phase[0] - return last_output, outputs, new_states - - -def switch(condition, then_expression, else_expression): - """Switches between two operations depending on a scalar value. - - Note that both `then_expression` and `else_expression` - should be symbolic tensors of the *same shape*. - - # Arguments - condition: tensor (`int` or `bool`). - then_expression: either a tensor, or a callable that returns a tensor. - else_expression: either a tensor, or a callable that returns a tensor. - - # Returns - The selected tensor. - - # Raises - ValueError: If rank of `condition` is greater than rank of expressions. - - {{np_implementation}} - """ - if condition.dtype != tf.bool: - condition = tf.cast(condition, 'bool') - cond_ndim = ndim(condition) - if not cond_ndim: - if not callable(then_expression): - def then_expression_fn(): - return then_expression - else: - then_expression_fn = then_expression - if not callable(else_expression): - def else_expression_fn(): - return else_expression - else: - else_expression_fn = else_expression - x = tf.cond(condition, - then_expression_fn, - else_expression_fn) - else: - # tf.where needs its condition tensor - # to be the same shape as its two - # result tensors - if callable(then_expression): - then_expression = then_expression() - if callable(else_expression): - else_expression = else_expression() - expr_ndim = ndim(then_expression) - if cond_ndim > expr_ndim: - raise ValueError('Rank of `condition` should be less than or' - ' equal to rank of `then_expression` and ' - '`else_expression`. ndim(condition)=' + - str(cond_ndim) + ', ndim(then_expression)' - '=' + str(expr_ndim)) - if cond_ndim > 1: - ndim_diff = expr_ndim - cond_ndim - cond_shape = tf.concat( - [tf.shape(condition), [1] * ndim_diff], axis=0) - condition = tf.reshape(condition, cond_shape) - expr_shape = tf.shape(then_expression) - shape_diff = expr_shape - cond_shape - zero_expr_shape = tf.ones_like(expr_shape) - tile_shape = tf.where(shape_diff > 0, expr_shape, zero_expr_shape) - condition = tf.tile(condition, tile_shape) - x = tf.where(condition, then_expression, else_expression) - return x - - -def in_train_phase(x, alt, training=None): - """Selects `x` in train phase, and `alt` otherwise. - - Note that `alt` should have the *same shape* as `x`. - - # Arguments - x: What to return in train phase - (tensor or callable that returns a tensor). - alt: What to return otherwise - (tensor or callable that returns a tensor). - training: Optional scalar tensor - (or Python boolean, or Python integer) - specifying the learning phase. - - # Returns - Either `x` or `alt` based on the `training` flag. - the `training` flag defaults to `K.learning_phase()`. - """ - if training is None: - training = learning_phase() - uses_learning_phase = True - else: - uses_learning_phase = False - - if training is 1 or training is True: - if callable(x): - return x() - else: - return x - - elif training is 0 or training is False: - if callable(alt): - return alt() - else: - return alt - - # else: assume learning phase is a placeholder tensor. - x = switch(training, x, alt) - if uses_learning_phase: - x._uses_learning_phase = True - return x - - -def in_test_phase(x, alt, training=None): - """Selects `x` in test phase, and `alt` otherwise. - - Note that `alt` should have the *same shape* as `x`. - - # Arguments - x: What to return in test phase - (tensor or callable that returns a tensor). - alt: What to return otherwise - (tensor or callable that returns a tensor). - training: Optional scalar tensor - (or Python boolean, or Python integer) - specifying the learning phase. - - # Returns - Either `x` or `alt` based on `K.learning_phase`. - """ - return in_train_phase(alt, x, training=training) - - -# NN OPERATIONS - -def relu(x, alpha=0., max_value=None, threshold=0.): - """Rectified linear unit. - - With default values, it returns element-wise `max(x, 0)`. - - Otherwise, it follows: - `f(x) = max_value` for `x >= max_value`, - `f(x) = x` for `threshold <= x < max_value`, - `f(x) = alpha * (x - threshold)` otherwise. - - # Arguments - x: A tensor or variable. - alpha: A scalar, slope of negative section (default=`0.`). - max_value: float. Saturation threshold. - threshold: float. Threshold value for thresholded activation. - - # Returns - A tensor. - - {{np_implementation}} - """ - - if alpha != 0.: - if max_value is None and threshold == 0.: - return tf.nn.leaky_relu(x, alpha=alpha) - - if threshold != 0.: - negative_part = tf.nn.relu(-x + threshold) - else: - negative_part = tf.nn.relu(-x) - - clip_max = max_value is not None - - if threshold != 0: - # computes x for x > threshold else 0 - x = x * tf.cast(tf.greater(x, threshold), floatx()) - elif max_value == 6: - # if no threshold, then can use nn.relu6 native TF op for performance - x = tf.nn.relu6(x) - clip_max = False - else: - x = tf.nn.relu(x) - - if clip_max: - max_value = _to_tensor(max_value, x.dtype.base_dtype) - zero = _to_tensor(0., x.dtype.base_dtype) - x = tf.clip_by_value(x, zero, max_value) - - if alpha != 0: - alpha = _to_tensor(alpha, x.dtype.base_dtype) - x -= alpha * negative_part - return x - - -def elu(x, alpha=1.): - """Exponential linear unit. - - # Arguments - x: A tensor or variable to compute the activation function for. - alpha: A scalar, slope of negative section. - - # Returns - A tensor. - - {{np_implementation}} - """ - res = tf.nn.elu(x) - if alpha == 1: - return res - else: - return tf.where(x > 0, res, alpha * res) - - -def softmax(x, axis=-1): - """Softmax of a tensor. - - # Arguments - x: A tensor or variable. - axis: The dimension softmax would be performed on. - The default is -1 which indicates the last dimension. - - # Returns - A tensor. - - {{np_implementation}} - """ - return tf.nn.softmax(x, axis=axis) - - -def softplus(x): - """Softplus of a tensor. - - # Arguments - x: A tensor or variable. - - # Returns - A tensor. - - {{np_implementation}} - """ - return tf.nn.softplus(x) - - -def softsign(x): - """Softsign of a tensor. - - # Arguments - x: A tensor or variable. - - # Returns - A tensor. - - {{np_implementation}} - """ - return tf.nn.softsign(x) - - -def categorical_crossentropy(target, output, from_logits=False, axis=-1): - """Categorical crossentropy between an output tensor and a target tensor. - - # Arguments - target: A tensor of the same shape as `output`. - output: A tensor resulting from a softmax - (unless `from_logits` is True, in which - case `output` is expected to be the logits). - from_logits: Boolean, whether `output` is the - result of a softmax, or is a tensor of logits. - axis: Int specifying the channels axis. `axis=-1` - corresponds to data format `channels_last`, - and `axis=1` corresponds to data format - `channels_first`. - - # Returns - Output tensor. - - # Raises - ValueError: if `axis` is neither -1 nor one of - the axes of `output`. - """ - output_dimensions = list(range(len(output.get_shape()))) - if axis != -1 and axis not in output_dimensions: - raise ValueError( - '{}{}{}'.format( - 'Unexpected channels axis {}. '.format(axis), - 'Expected to be -1 or one of the axes of `output`, ', - 'which has {} dimensions.'.format(len(output.get_shape())))) - # Note: tf.nn.softmax_cross_entropy_with_logits - # expects logits, Keras expects probabilities. - if not from_logits: - # scale preds so that the class probas of each sample sum to 1 - output /= tf.reduce_sum(output, axis, True) - # manual computation of crossentropy - _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype) - output = tf.clip_by_value(output, _epsilon, 1. - _epsilon) - return - tf.reduce_sum(target * tf.log(output), axis) - else: - return tf.nn.softmax_cross_entropy_with_logits(labels=target, - logits=output) - - -def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1): - """Categorical crossentropy with integer targets. - - # Arguments - target: An integer tensor. - output: A tensor resulting from a softmax - (unless `from_logits` is True, in which - case `output` is expected to be the logits). - from_logits: Boolean, whether `output` is the - result of a softmax, or is a tensor of logits. - axis: Int specifying the channels axis. `axis=-1` - corresponds to data format `channels_last`, - and `axis=1` corresponds to data format - `channels_first`. - - # Returns - Output tensor. - - # Raises - ValueError: if `axis` is neither -1 nor one of - the axes of `output`. - """ - output_dimensions = list(range(len(output.get_shape()))) - if axis != -1 and axis not in output_dimensions: - raise ValueError( - '{}{}{}'.format( - 'Unexpected channels axis {}. '.format(axis), - 'Expected to be -1 or one of the axes of `output`, ', - 'which has {} dimensions.'.format(len(output.get_shape())))) - # If the channels are not in the last axis, move them to be there: - if axis != -1 and axis != output_dimensions[-1]: - permutation = output_dimensions[:axis] + output_dimensions[axis + 1:] - permutation += [axis] - output = tf.transpose(output, perm=permutation) - - # Note: tf.nn.sparse_softmax_cross_entropy_with_logits - # expects logits, Keras expects probabilities. - if not from_logits: - _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype) - output = tf.clip_by_value(output, _epsilon, 1 - _epsilon) - output = tf.log(output) - - output_shape = output.get_shape() - targets = cast(flatten(target), 'int64') - logits = tf.reshape(output, [-1, int(output_shape[-1])]) - res = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=targets, - logits=logits) - if len(output_shape) >= 3: - # if our output includes timestep dimension - # or spatial dimensions we need to reshape - return tf.reshape(res, tf.shape(output)[:-1]) - else: - return res - - -def binary_crossentropy(target, output, from_logits=False): - """Binary crossentropy between an output tensor and a target tensor. - - # Arguments - target: A tensor with the same shape as `output`. - output: A tensor. - from_logits: Whether `output` is expected to be a logits tensor. - By default, we consider that `output` - encodes a probability distribution. - - # Returns - A tensor. - """ - # Note: tf.nn.sigmoid_cross_entropy_with_logits - # expects logits, Keras expects probabilities. - if not from_logits: - # transform back to logits - _epsilon = _to_tensor(epsilon(), output.dtype.base_dtype) - output = tf.clip_by_value(output, _epsilon, 1 - _epsilon) - output = tf.log(output / (1 - output)) - - return tf.nn.sigmoid_cross_entropy_with_logits(labels=target, - logits=output) - - -def sigmoid(x): - """Element-wise sigmoid. - - # Arguments - x: A tensor or variable. - - # Returns - A tensor. - - {{np_implementation}} - """ - return tf.nn.sigmoid(x) - - -def hard_sigmoid(x): - """Segment-wise linear approximation of sigmoid. - - Faster than sigmoid. - Returns `0.` if `x < -2.5`, `1.` if `x > 2.5`. - In `-2.5 <= x <= 2.5`, returns `0.2 * x + 0.5`. - - # Arguments - x: A tensor or variable. - - # Returns - A tensor. - - {{np_implementation}} - """ - x = (0.2 * x) + 0.5 - zero = _to_tensor(0., x.dtype.base_dtype) - one = _to_tensor(1., x.dtype.base_dtype) - x = tf.clip_by_value(x, zero, one) - return x - - -def tanh(x): - """Element-wise tanh. - - # Arguments - x: A tensor or variable. - - # Returns - A tensor. - - {{np_implementation}} - """ - return tf.nn.tanh(x) - - -def dropout(x, level, noise_shape=None, seed=None): - """Sets entries in `x` to zero at random, while scaling the entire tensor. - - # Arguments - x: tensor - level: fraction of the entries in the tensor - that will be set to 0. - noise_shape: shape for randomly generated keep/drop flags, - must be broadcastable to the shape of `x` - seed: random seed to ensure determinism. - - # Returns - A tensor. - {{np_implementation}} - """ - retain_prob = 1. - level - if seed is None: - seed = np.random.randint(10e6) - # the dummy 1. works around a TF bug - # (float32_ref vs. float32 incompatibility) - return tf.nn.dropout(x * 1., retain_prob, noise_shape, seed=seed) - - -def l2_normalize(x, axis=None): - """Normalizes a tensor wrt the L2 norm alongside the specified axis. - - # Arguments - x: Tensor or variable. - axis: axis along which to perform normalization. - - # Returns - A tensor. - - {{np_implementation}} - """ - return tf.nn.l2_normalize(x, axis=axis) - - -def in_top_k(predictions, targets, k): - """Returns whether the `targets` are in the top `k` `predictions`. - - # Arguments - predictions: A tensor of shape `(batch_size, classes)` and type `float32`. - targets: A 1D tensor of length `batch_size` and type `int32` or `int64`. - k: An `int`, number of top elements to consider. - - # Returns - A 1D tensor of length `batch_size` and type `bool`. - `output[i]` is `True` if `predictions[i, targets[i]]` is within top-`k` - values of `predictions[i]`. - """ - return tf.nn.in_top_k(predictions, targets, k) - - -# CONVOLUTIONS - - -def _preprocess_conv1d_input(x, data_format): - """Transpose and cast the input before the conv1d. - - # Arguments - x: input tensor. - data_format: string, `"channels_last"` or `"channels_first"`. - - # Returns - A tensor. - """ - # tensorflow doesn't support float64 for conv layer before 1.8.0 - if (dtype(x) == 'float64' and - StrictVersion(tf.__version__.split('-')[0]) < StrictVersion('1.8.0')): - x = tf.cast(x, 'float32') - tf_data_format = 'NWC' # to pass TF Conv2dNative operations - if data_format == 'channels_first': - if not _has_nchw_support(): - x = tf.transpose(x, (0, 2, 1)) # NCW -> NWC - else: - tf_data_format = 'NCW' - return x, tf_data_format - - -def _preprocess_conv2d_input(x, data_format, force_transpose=False): - """Transpose and cast the input before the conv2d. - - # Arguments - x: input tensor. - data_format: string, `"channels_last"` or `"channels_first"`. - force_transpose: boolean, whether force to transpose input from NCHW to NHWC - if the `data_format` is `"channels_first"`. - - # Returns - A tensor. - """ - # tensorflow doesn't support float64 for conv layer before 1.8.0 - if (dtype(x) == 'float64' and - StrictVersion(tf.__version__.split('-')[0]) < StrictVersion('1.8.0')): - x = tf.cast(x, 'float32') - tf_data_format = 'NHWC' - if data_format == 'channels_first': - if not _has_nchw_support() or force_transpose: - x = tf.transpose(x, (0, 2, 3, 1)) # NCHW -> NHWC - else: - tf_data_format = 'NCHW' - return x, tf_data_format - - -def _preprocess_conv3d_input(x, data_format): - """Transpose and cast the input before the conv3d. - - # Arguments - x: input tensor. - data_format: string, `"channels_last"` or `"channels_first"`. - - # Returns - A tensor. - """ - # tensorflow doesn't support float64 for conv layer before 1.8.0 - if (dtype(x) == 'float64' and - StrictVersion(tf.__version__.split('-')[0]) < StrictVersion('1.8.0')): - x = tf.cast(x, 'float32') - tf_data_format = 'NDHWC' - if data_format == 'channels_first': - if not _has_nchw_support(): - x = tf.transpose(x, (0, 2, 3, 4, 1)) - else: - tf_data_format = 'NCDHW' - return x, tf_data_format - - -def _preprocess_padding(padding): - """Convert keras' padding to tensorflow's padding. - - # Arguments - padding: string, `"same"` or `"valid"`. - - # Returns - a string, `"SAME"` or `"VALID"`. - - # Raises - ValueError: if `padding` is invalid. - """ - if padding == 'same': - padding = 'SAME' - elif padding == 'valid': - padding = 'VALID' - else: - raise ValueError('Invalid padding: ' + str(padding)) - return padding - - -def conv1d(x, kernel, strides=1, padding='valid', - data_format=None, dilation_rate=1): - """1D convolution. - - # Arguments - x: Tensor or variable. - kernel: kernel tensor. - strides: stride integer. - padding: string, `"same"`, `"causal"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: integer dilate rate. - - # Returns - A tensor, result of 1D convolution. - - # Raises - ValueError: If `data_format` is neither - `"channels_last"` nor `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - - kernel_shape = kernel.get_shape().as_list() - if padding == 'causal': - if data_format != 'channels_last': - raise ValueError('When using causal padding in `conv1d`, ' - '`data_format` must be "channels_last" ' - '(temporal data).') - # causal (dilated) convolution: - left_pad = dilation_rate * (kernel_shape[0] - 1) - x = temporal_padding(x, (left_pad, 0)) - padding = 'valid' - padding = _preprocess_padding(padding) - x, tf_data_format = _preprocess_conv1d_input(x, data_format) - x = tf.nn.convolution( - input=x, - filter=kernel, - dilation_rate=(dilation_rate,), - strides=(strides,), - padding=padding, - data_format=tf_data_format) - - if data_format == 'channels_first' and tf_data_format == 'NWC': - x = tf.transpose(x, (0, 2, 1)) # NWC -> NCW - return x - - -def conv2d(x, kernel, strides=(1, 1), padding='valid', - data_format=None, dilation_rate=(1, 1)): - """2D convolution. - - # Arguments - x: Tensor or variable. - kernel: kernel tensor. - strides: strides tuple. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - Whether to use Theano or TensorFlow/CNTK data format - for inputs/kernels/outputs. - dilation_rate: tuple of 2 integers. - - # Returns - A tensor, result of 2D convolution. - - # Raises - ValueError: If `data_format` is neither - `"channels_last"` nor `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - - x, tf_data_format = _preprocess_conv2d_input(x, data_format) - - padding = _preprocess_padding(padding) - x = tf.nn.convolution( - input=x, - filter=kernel, - dilation_rate=dilation_rate, - strides=strides, - padding=padding, - data_format=tf_data_format) - - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW - return x - - -def conv2d_transpose(x, kernel, output_shape, strides=(1, 1), - padding='valid', data_format=None, dilation_rate=(1, 1)): - """2D deconvolution (i.e. transposed convolution). - - # Arguments - x: Tensor or variable. - kernel: kernel tensor. - output_shape: 1D int tensor for the output shape. - strides: strides tuple. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - Whether to use Theano or TensorFlow/CNTK data format - for inputs/kernels/outputs. - dilation_rate: tuple of 2 integers. - - # Returns - A tensor, result of transposed 2D convolution. - - # Raises - ValueError: If `data_format` is neither - `"channels_last"` nor `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - if isinstance(output_shape, (tuple, list)): - output_shape = tf.stack(output_shape) - - # tf.nn.atrous_conv2d_transpose input only supports NHWC format - if data_format == 'channels_first' and dilation_rate != (1, 1): - force_transpose = True - else: - force_transpose = False - - x, tf_data_format = _preprocess_conv2d_input( - x, data_format, force_transpose) - - if data_format == 'channels_first' and tf_data_format == 'NHWC': - output_shape = (output_shape[0], - output_shape[2], - output_shape[3], - output_shape[1]) - if output_shape[0] is None: - output_shape = (tf.shape(x)[0],) + tuple(output_shape[1:]) - output_shape = tf.stack(list(output_shape)) - - padding = _preprocess_padding(padding) - if tf_data_format == 'NHWC': - strides = (1,) + strides + (1,) - else: - strides = (1, 1) + strides - - if dilation_rate == (1, 1): - x = tf.nn.conv2d_transpose(x, kernel, output_shape, strides, - padding=padding, - data_format=tf_data_format) - else: - assert dilation_rate[0] == dilation_rate[1] - x = tf.nn.atrous_conv2d_transpose( - x, kernel, output_shape, dilation_rate[0], padding) - - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW - return x - - -def separable_conv1d(x, depthwise_kernel, pointwise_kernel, strides=1, - padding='valid', data_format=None, dilation_rate=1): - """1D convolution with separable filters. - - # Arguments - x: input tensor - depthwise_kernel: convolution kernel for the depthwise convolution. - pointwise_kernel: kernel for the 1x1 convolution. - strides: stride integer. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: integer dilation rate. - - # Returns - Output tensor. - - # Raises - ValueError: If `data_format` is neither - `"channels_last"` nor `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - if isinstance(strides, int): - strides = (strides,) - if isinstance(dilation_rate, int): - dilation_rate = (dilation_rate,) - - x, tf_data_format = _preprocess_conv1d_input(x, data_format) - if tf_data_format == 'NWC': - tf_data_format = 'NHWC' - else: - tf_data_format = 'NCHW' - padding = _preprocess_padding(padding) - if tf_data_format == 'NHWC': - spatial_start_dim = 1 - strides = (1,) + strides * 2 + (1,) - else: - spatial_start_dim = 2 - strides = (1, 1) + strides * 2 - x = tf.expand_dims(x, spatial_start_dim) - depthwise_kernel = tf.expand_dims(depthwise_kernel, 0) - pointwise_kernel = tf.expand_dims(pointwise_kernel, 0) - dilation_rate = (1,) + dilation_rate - - x = tf.nn.separable_conv2d(x, depthwise_kernel, pointwise_kernel, - strides=strides, - padding=padding, - rate=dilation_rate, - data_format=tf_data_format) - - x = tf.squeeze(x, [spatial_start_dim]) - - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.transpose(x, (0, 2, 1)) # NWC -> NCW - - return x - - -def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1), - padding='valid', data_format=None, dilation_rate=(1, 1)): - """2D convolution with separable filters. - - # Arguments - x: input tensor - depthwise_kernel: convolution kernel for the depthwise convolution. - pointwise_kernel: kernel for the 1x1 convolution. - strides: strides tuple (length 2). - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: tuple of integers, - dilation rates for the separable convolution. - - # Returns - Output tensor. - - # Raises - ValueError: If `data_format` is neither - `"channels_last"` nor `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - - x, tf_data_format = _preprocess_conv2d_input(x, data_format) - padding = _preprocess_padding(padding) - if tf_data_format == 'NHWC': - strides = (1,) + strides + (1,) - else: - strides = (1, 1) + strides - - x = tf.nn.separable_conv2d(x, depthwise_kernel, pointwise_kernel, - strides=strides, - padding=padding, - rate=dilation_rate, - data_format=tf_data_format) - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW - return x - - -def depthwise_conv2d(x, depthwise_kernel, strides=(1, 1), padding='valid', - data_format=None, dilation_rate=(1, 1)): - """2D convolution with separable filters. - - # Arguments - x: input tensor - depthwise_kernel: convolution kernel for the depthwise convolution. - strides: strides tuple (length 2). - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: tuple of integers, - dilation rates for the separable convolution. - - # Returns - Output tensor. - - # Raises - ValueError: If `data_format` is neither - `"channels_last"` nor `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - - x, tf_data_format = _preprocess_conv2d_input(x, data_format) - padding = _preprocess_padding(padding) - if tf_data_format == 'NHWC': - strides = (1,) + strides + (1,) - else: - strides = (1, 1) + strides - - x = tf.nn.depthwise_conv2d(x, depthwise_kernel, - strides=strides, - padding=padding, - rate=dilation_rate, - data_format=tf_data_format) - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW - return x - - -def conv3d(x, kernel, strides=(1, 1, 1), padding='valid', - data_format=None, dilation_rate=(1, 1, 1)): - """3D convolution. - - # Arguments - x: Tensor or variable. - kernel: kernel tensor. - strides: strides tuple. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - Whether to use Theano or TensorFlow/CNTK data format - for inputs/kernels/outputs. - dilation_rate: tuple of 3 integers. - - # Returns - A tensor, result of 3D convolution. - - # Raises - ValueError: If `data_format` is neither - `"channels_last"` nor `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - - x, tf_data_format = _preprocess_conv3d_input(x, data_format) - padding = _preprocess_padding(padding) - x = tf.nn.convolution( - input=x, - filter=kernel, - dilation_rate=dilation_rate, - strides=strides, - padding=padding, - data_format=tf_data_format) - if data_format == 'channels_first' and tf_data_format == 'NDHWC': - x = tf.transpose(x, (0, 4, 1, 2, 3)) - return x - - -def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1), - padding='valid', data_format=None): - """3D deconvolution (i.e. transposed convolution). - - # Arguments - x: input tensor. - kernel: kernel tensor. - output_shape: 1D int tensor for the output shape. - strides: strides tuple. - padding: string, "same" or "valid". - data_format: string, `"channels_last"` or `"channels_first"`. - Whether to use Theano or TensorFlow/CNTK data format - for inputs/kernels/outputs. - - # Returns - A tensor, result of transposed 3D convolution. - - # Raises - ValueError: If `data_format` is neither - `"channels_last"` nor `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - if isinstance(output_shape, (tuple, list)): - output_shape = tf.stack(output_shape) - - x, tf_data_format = _preprocess_conv3d_input(x, data_format) - - if data_format == 'channels_first' and tf_data_format == 'NDHWC': - output_shape = (output_shape[0], - output_shape[2], - output_shape[3], - output_shape[4], - output_shape[1]) - if output_shape[0] is None: - output_shape = (tf.shape(x)[0],) + tuple(output_shape[1:]) - output_shape = tf.stack(list(output_shape)) - - padding = _preprocess_padding(padding) - if tf_data_format == 'NDHWC': - strides = (1,) + strides + (1,) - else: - strides = (1, 1) + strides - - x = tf.nn.conv3d_transpose(x, kernel, output_shape, strides, - padding=padding, - data_format=tf_data_format) - if data_format == 'channels_first' and tf_data_format == 'NDHWC': - x = tf.transpose(x, (0, 4, 1, 2, 3)) - return x - - -def pool2d(x, pool_size, strides=(1, 1), - padding='valid', data_format=None, - pool_mode='max'): - """2D Pooling. - - # Arguments - x: Tensor or variable. - pool_size: tuple of 2 integers. - strides: tuple of 2 integers. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - pool_mode: string, `"max"` or `"avg"`. - - # Returns - A tensor, result of 2D pooling. - - # Raises - ValueError: if `data_format` is - neither `"channels_last"` or `"channels_first"`. - ValueError: if `pool_mode` is neither `"max"` or `"avg"`. - """ - data_format = normalize_data_format(data_format) - - x, tf_data_format = _preprocess_conv2d_input(x, data_format) - padding = _preprocess_padding(padding) - if tf_data_format == 'NHWC': - strides = (1,) + strides + (1,) - pool_size = (1,) + pool_size + (1,) - else: - strides = (1, 1) + strides - pool_size = (1, 1) + pool_size - - if pool_mode == 'max': - x = tf.nn.max_pool(x, pool_size, strides, - padding=padding, - data_format=tf_data_format) - elif pool_mode == 'avg': - x = tf.nn.avg_pool(x, pool_size, strides, - padding=padding, - data_format=tf_data_format) - else: - raise ValueError('Invalid pool_mode: ' + str(pool_mode)) - - if data_format == 'channels_first' and tf_data_format == 'NHWC': - x = tf.transpose(x, (0, 3, 1, 2)) # NHWC -> NCHW - return x - - -def pool3d(x, pool_size, strides=(1, 1, 1), padding='valid', - data_format=None, pool_mode='max'): - """3D Pooling. - - # Arguments - x: Tensor or variable. - pool_size: tuple of 3 integers. - strides: tuple of 3 integers. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - pool_mode: string, `"max"` or `"avg"`. - - # Returns - A tensor, result of 3D pooling. - - # Raises - ValueError: if `data_format` is - neither `"channels_last"` or `"channels_first"`. - ValueError: if `pool_mode` is neither `"max"` or `"avg"`. - """ - data_format = normalize_data_format(data_format) - - x, tf_data_format = _preprocess_conv3d_input(x, data_format) - padding = _preprocess_padding(padding) - if tf_data_format == 'NDHWC': - strides = (1,) + strides + (1,) - pool_size = (1,) + pool_size + (1,) - else: - strides = (1, 1) + strides - pool_size = (1, 1) + pool_size - - if pool_mode == 'max': - x = tf.nn.max_pool3d(x, pool_size, strides, - padding=padding, - data_format=tf_data_format) - elif pool_mode == 'avg': - x = tf.nn.avg_pool3d(x, pool_size, strides, - padding=padding, - data_format=tf_data_format) - else: - raise ValueError('Invalid pool_mode: ' + str(pool_mode)) - - if data_format == 'channels_first' and tf_data_format == 'NDHWC': - x = tf.transpose(x, (0, 4, 1, 2, 3)) - return x - - -def bias_add(x, bias, data_format=None): - """Adds a bias vector to a tensor. - - # Arguments - x: Tensor or variable. - bias: Bias tensor to add. - data_format: string, `"channels_last"` or `"channels_first"`. - - # Returns - Output tensor. - - # Raises - ValueError: In one of the two cases below: - 1. invalid `data_format` argument. - 2. invalid bias shape. - the bias should be either a vector or - a tensor with ndim(x) - 1 dimension - {{np_implementation}} - """ - data_format = normalize_data_format(data_format) - bias_shape = int_shape(bias) - if len(bias_shape) != 1 and len(bias_shape) != ndim(x) - 1: - raise ValueError('Unexpected bias dimensions %d, ' - 'expect to be 1 or %d dimensions' - % (len(bias_shape), ndim(x))) - if ndim(x) == 5: - if len(bias_shape) == 1: - new_shape = (1, 1, 1, 1, bias_shape[0]) - else: - new_shape = (1,) + bias_shape - new_shape = transpose_shape( - new_shape, data_format, spatial_axes=(1, 2, 3)) - x += reshape(bias, new_shape) - elif ndim(x) == 4: - if data_format == 'channels_first': - if len(bias_shape) == 1: - if _has_nchw_support(): - x = tf.nn.bias_add(x, bias, - data_format='NCHW') - else: - x += reshape(bias, (1, bias_shape[0], 1, 1)) - else: - x += reshape(bias, (1, bias_shape[2]) + bias_shape[:2]) - elif data_format == 'channels_last': - if len(bias_shape) == 1: - x = tf.nn.bias_add(x, bias, - data_format='NHWC') - else: - x += reshape(bias, (1,) + bias_shape) - elif ndim(x) == 3: - if len(bias_shape) == 1: - new_shape = (1, 1, bias_shape[0]) - else: - new_shape = (1,) + bias_shape - new_shape = transpose_shape(new_shape, data_format, spatial_axes=(1,)) - x += reshape(bias, new_shape) - else: - x = tf.nn.bias_add(x, bias) - return x - - -# RANDOMNESS - -def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): - """Returns a tensor with normal distribution of values. - - # Arguments - shape: A tuple of integers, the shape of tensor to create. - mean: A float, mean of the normal distribution to draw samples. - stddev: A float, standard deviation of the normal distribution - to draw samples. - dtype: String, dtype of returned tensor. - seed: Integer, random seed. - - # Returns - A tensor. - """ - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(10e6) - return tf.random_normal(shape, mean=mean, stddev=stddev, - dtype=dtype, seed=seed) - - -def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None): - """Returns a tensor with uniform distribution of values. - - # Arguments - shape: A tuple of integers, the shape of tensor to create. - minval: A float, lower boundary of the uniform distribution - to draw samples. - maxval: A float, upper boundary of the uniform distribution - to draw samples. - dtype: String, dtype of returned tensor. - seed: Integer, random seed. - - # Returns - A tensor. - """ - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(10e6) - return tf.random_uniform(shape, minval=minval, maxval=maxval, - dtype=dtype, seed=seed) - - -def random_binomial(shape, p=0.0, dtype=None, seed=None): - """Returns a tensor with random binomial distribution of values. - - # Arguments - shape: A tuple of integers, the shape of tensor to create. - p: A float, `0. <= p <= 1`, probability of binomial distribution. - dtype: String, dtype of returned tensor. - seed: Integer, random seed. - - # Returns - A tensor. - """ - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(10e6) - return tf.where(tf.random_uniform(shape, dtype=dtype, seed=seed) <= p, - tf.ones(shape, dtype=dtype), - tf.zeros(shape, dtype=dtype)) - - -def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): - """Returns a tensor with truncated random normal distribution of values. - - The generated values follow a normal distribution - with specified mean and standard deviation, - except that values whose magnitude is more than - two standard deviations from the mean are dropped and re-picked. - - # Arguments - shape: A tuple of integers, the shape of tensor to create. - mean: Mean of the values. - stddev: Standard deviation of the values. - dtype: String, dtype of returned tensor. - seed: Integer, random seed. - - # Returns - A tensor. - """ - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(10e6) - return tf.truncated_normal(shape, mean, stddev, dtype=dtype, seed=seed) - - -# CTC -# TensorFlow has a native implementation, but it uses sparse tensors -# and therefore requires a wrapper for Keras. The functions below convert -# dense to sparse tensors and also wraps up the beam search code that is -# in TensorFlow's CTC implementation - - -def ctc_label_dense_to_sparse(labels, label_lengths): - """Converts CTC labels from dense to sparse. - - # Arguments - labels: dense CTC labels. - label_lengths: length of the labels. - - # Returns - A sparse tensor representation of the labels. - """ - label_shape = tf.shape(labels) - num_batches_tns = tf.stack([label_shape[0]]) - max_num_labels_tns = tf.stack([label_shape[1]]) - - def range_less_than(_, current_input): - return tf.expand_dims(tf.range(label_shape[1]), 0) < tf.fill( - max_num_labels_tns, current_input) - - init = tf.cast(tf.fill([1, label_shape[1]], 0), tf.bool) - dense_mask = functional_ops.scan(range_less_than, label_lengths, - initializer=init, parallel_iterations=1) - dense_mask = dense_mask[:, 0, :] - - label_array = tf.reshape(tf.tile(tf.range(label_shape[1]), num_batches_tns), - label_shape) - label_ind = tf.boolean_mask(label_array, dense_mask) - - tmp = tf.tile(tf.range(label_shape[0]), max_num_labels_tns) - batch_array = tf.transpose(tf.reshape(tmp, reverse(label_shape, 0))) - batch_ind = tf.boolean_mask(batch_array, dense_mask) - - indices = concatenate([batch_ind, label_ind], axis=0) - indices = tf.transpose(tf.reshape(indices, [2, -1])) - - vals_sparse = tf.gather_nd(labels, indices) - - indices = tf.cast(indices, tf.int64) - label_shape = tf.cast(label_shape, tf.int64) - return tf.SparseTensor(indices, vals_sparse, label_shape) - - -def ctc_batch_cost(y_true, y_pred, input_length, label_length): - """Runs CTC loss algorithm on each batch element. - - # Arguments - y_true: tensor `(samples, max_string_length)` - containing the truth labels. - y_pred: tensor `(samples, time_steps, num_categories)` - containing the prediction, or output of the softmax. - input_length: tensor `(samples, 1)` containing the sequence length for - each batch item in `y_pred`. - label_length: tensor `(samples, 1)` containing the sequence length for - each batch item in `y_true`. - - # Returns - Tensor with shape (samples,1) containing the - CTC loss of each element. - """ - label_length = tf.cast(tf.squeeze(label_length, axis=-1), tf.int32) - input_length = tf.cast(tf.squeeze(input_length, axis=-1), tf.int32) - sparse_labels = tf.cast( - ctc_label_dense_to_sparse(y_true, label_length), tf.int32) - y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + epsilon()) - return tf.expand_dims(ctc.ctc_loss(inputs=y_pred, - labels=sparse_labels, - sequence_length=input_length), 1) - - -def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, - top_paths=1, merge_repeated=False): - """Decodes the output of a softmax. - - Can use either greedy search (also known as best path) - or a constrained dictionary search. - - # Arguments - y_pred: tensor `(samples, time_steps, num_categories)` - containing the prediction, or output of the softmax. - input_length: tensor `(samples, )` containing the sequence length for - each batch item in `y_pred`. - greedy: perform much faster best-path search if `True`. - This does not use a dictionary. - beam_width: if `greedy` is `False`: a beam search decoder will be used - with a beam of this width. - top_paths: if `greedy` is `False`, - how many of the most probable paths will be returned. - merge_repeated: if `greedy` is `False`, - merge repeated classes in the output beams. - - # Returns - Tuple: - List: if `greedy` is `True`, returns a list of one element that - contains the decoded sequence. - If `False`, returns the `top_paths` most probable - decoded sequences. - Important: blank labels are returned as `-1`. - Tensor `(top_paths, )` that contains - the log probability of each decoded sequence. - """ - y_pred = tf.log(tf.transpose(y_pred, perm=[1, 0, 2]) + epsilon()) - input_length = tf.cast(input_length, tf.int32) - - if greedy: - (decoded, log_prob) = ctc.ctc_greedy_decoder( - inputs=y_pred, - sequence_length=input_length) - else: - (decoded, log_prob) = ctc.ctc_beam_search_decoder( - inputs=y_pred, - sequence_length=input_length, beam_width=beam_width, - top_paths=top_paths, merge_repeated=merge_repeated) - - decoded_dense = [] - for st in decoded: - dense_tensor = tf.sparse.to_dense(st, default_value=-1) - decoded_dense.append(dense_tensor) - return (decoded_dense, log_prob) - - -# HIGH ORDER FUNCTIONS - -def map_fn(fn, elems, name=None, dtype=None): - """Map the function fn over the elements elems and return the outputs. - - # Arguments - fn: Callable that will be called upon each element in elems - elems: tensor - name: A string name for the map node in the graph - dtype: Output data type. - - # Returns - Tensor with dtype `dtype`. - """ - return tf.map_fn(fn, elems, name=name, dtype=dtype) - - -def foldl(fn, elems, initializer=None, name=None): - """Reduce elems using fn to combine them from left to right. - - # Arguments - fn: Callable that will be called upon each element in elems and an - accumulator, for instance `lambda acc, x: acc + x` - elems: tensor - initializer: The first value used (`elems[0]` in case of None) - name: A string name for the foldl node in the graph - - # Returns - Tensor with same type and shape as `initializer`. - """ - return tf.foldl(fn, elems, initializer=initializer, name=name) - - -def foldr(fn, elems, initializer=None, name=None): - """Reduce elems using fn to combine them from right to left. - - # Arguments - fn: Callable that will be called upon each element in elems and an - accumulator, for instance `lambda acc, x: acc + x` - elems: tensor - initializer: The first value used (`elems[-1]` in case of None) - name: A string name for the foldr node in the graph - - # Returns - Tensor with same type and shape as `initializer`. - """ - return tf.foldr(fn, elems, initializer=initializer, name=name) - - -def local_conv1d(inputs, kernel, kernel_size, strides, data_format=None): - """Apply 1D conv with un-shared weights. - - # Arguments - inputs: 3D tensor with shape: (batch_size, steps, input_dim) - kernel: the unshared weight for convolution, - with shape (output_length, feature_dim, filters) - kernel_size: a tuple of a single integer, - specifying the length of the 1D convolution window - strides: a tuple of a single integer, - specifying the stride length of the convolution - data_format: the data format, channels_first or channels_last - - # Returns - the tensor after 1d conv with un-shared weights, - with shape (batch_size, output_length, filters) - - # Raises - ValueError: If `data_format` is neither - `"channels_last"` nor `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - - stride = strides[0] - kernel_shape = int_shape(kernel) - output_length, feature_dim, filters = kernel_shape - - xs = [] - for i in range(output_length): - slice_length = py_slice(i * stride, - i * stride + kernel_size[0]) - xs.append(reshape(inputs[:, slice_length, :], - (1, -1, feature_dim))) - x_aggregate = concatenate(xs, axis=0) - # Shape: `(output_length, batch_size, filters)`. - output = batch_dot(x_aggregate, kernel) - return permute_dimensions(output, (1, 0, 2)) - - -def local_conv2d(inputs, - kernel, - kernel_size, - strides, - output_shape, - data_format=None): - """Apply 2D conv with un-shared weights. - - # Arguments - inputs: 4D tensor with shape: - (batch_size, filters, new_rows, new_cols) - if data_format='channels_first' - or 4D tensor with shape: - (batch_size, new_rows, new_cols, filters) - if data_format='channels_last'. - kernel: the unshared weight for convolution, - with shape (output_items, feature_dim, filters) - kernel_size: a tuple of 2 integers, specifying the - width and height of the 2D convolution window. - strides: a tuple of 2 integers, specifying the strides - of the convolution along the width and height. - output_shape: a tuple with (output_row, output_col) - data_format: the data format, channels_first or channels_last - - # Returns - A 4d tensor with shape: - (batch_size, filters, new_rows, new_cols) - if data_format='channels_first' - or 4D tensor with shape: - (batch_size, new_rows, new_cols, filters) - if data_format='channels_last'. - - # Raises - ValueError: if `data_format` is neither - `channels_last` or `channels_first`. - """ - data_format = normalize_data_format(data_format) - - stride_row, stride_col = strides - output_row, output_col = output_shape - kernel_shape = int_shape(kernel) - _, feature_dim, filters = kernel_shape - - xs = [] - for i in range(output_row): - for j in range(output_col): - slice_row = py_slice(i * stride_row, - i * stride_row + kernel_size[0]) - slice_col = py_slice(j * stride_col, - j * stride_col + kernel_size[1]) - if data_format == 'channels_first': - xs.append(reshape(inputs[:, :, slice_row, slice_col], - (1, -1, feature_dim))) - else: - xs.append(reshape(inputs[:, slice_row, slice_col, :], - (1, -1, feature_dim))) - - x_aggregate = concatenate(xs, axis=0) - output = batch_dot(x_aggregate, kernel) - output = reshape(output, - (output_row, output_col, -1, filters)) - - if data_format == 'channels_first': - output = permute_dimensions(output, (2, 3, 0, 1)) - else: - output = permute_dimensions(output, (2, 0, 1, 3)) - return output -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import defaultdict -from contextlib import contextmanager -import theano -from theano import tensor as T -from theano.sandbox.rng_mrg import MRG_RandomStreams as RandomStreams -from theano.tensor.signal import pool -from theano.printing import Print -from theano.ifelse import ifelse -try: - import theano.sparse as th_sparse_module -except ImportError: - th_sparse_module = None -try: - from theano.tensor.nnet.nnet import softsign as T_softsign -except ImportError: - from theano.sandbox.softsign import softsign as T_softsign - -import numpy as np -from .common import floatx -from .common import epsilon -from .common import normalize_data_format -from ..utils.generic_utils import transpose_shape -from ..utils.generic_utils import has_arg -# Legacy functions -from .common import set_image_dim_ordering, image_dim_ordering - -py_all = all -py_any = any -py_sum = sum -py_slice = slice - - -# INTERNAL UTILS -theano.config.floatX = floatx() -# 0 = test, 1 = train -_LEARNING_PHASE = T.scalar(dtype='uint8', name='keras_learning_phase') -_UID_PREFIXES = defaultdict(int) - - -def learning_phase(): - # False = test, True = train - return _LEARNING_PHASE - - -def set_learning_phase(value): - global _LEARNING_PHASE - if value not in {0, 1}: - raise ValueError('Expected learning phase to be ' - '0 or 1.') - _LEARNING_PHASE = value - - -def get_uid(prefix=''): - """Provides a unique UID given a string prefix. - - # Arguments - prefix: string. - - # Returns - An integer. - - # Example - ```python - >>> keras.backend.get_uid('dense') - 1 - >>> keras.backend.get_uid('dense') - 2 - ``` - - """ - _UID_PREFIXES[prefix] += 1 - return _UID_PREFIXES[prefix] - - -def reset_uids(): - global _UID_PREFIXES - _UID_PREFIXES = defaultdict(int) - - -# VARIABLE MANIPULATION - - -def _assert_sparse_module(): - if not th_sparse_module: - raise ImportError("Failed to import theano.sparse\n" - "You probably need to pip install nose-parameterized") - - -def is_sparse(tensor): - return th_sparse_module and isinstance(tensor.type, th_sparse_module.SparseType) - - -def to_dense(tensor): - if is_sparse(tensor): - return th_sparse_module.dense_from_sparse(tensor) - else: - return tensor - - -NAME_SCOPE_STACK = [] - - -@contextmanager -def name_scope(name): - global NAME_SCOPE_STACK - NAME_SCOPE_STACK.append(name) - yield - NAME_SCOPE_STACK.pop() - - -def _prepare_name(name, default): - prefix = '/'.join(NAME_SCOPE_STACK) - if name is None: - return prefix + '/' + default - return prefix + '/' + name - - -def variable(value, dtype=None, name=None, constraint=None): - """Instantiates a variable and returns it. - - # Arguments - value: Numpy array, initial value of the tensor. - dtype: Tensor type. - name: Optional name string for the tensor. - constraint: Optional projection function to be - applied to the variable after an optimizer update. - - # Returns - A variable instance (with Keras metadata included). - """ - if dtype is None: - dtype = floatx() - if hasattr(value, 'tocoo'): - _assert_sparse_module() - variable = th_sparse_module.as_sparse_variable( - value, name=_prepare_name(name, 'variable')) - else: - if isinstance(value, (theano.tensor.TensorVariable, - theano.tensor.sharedvar.TensorSharedVariable, - theano.tensor.TensorConstant)): - # Support for RandomStreams().normal(), .uniform(). - value = value.eval() - value = np.asarray(value, dtype=dtype) - variable = theano.shared(value=value, - name=_prepare_name(name, 'variable'), - strict=False) - variable._keras_shape = value.shape - variable._uses_learning_phase = False - variable.constraint = constraint - return variable - - -def constant(value, dtype=None, shape=None, name=None): - if dtype is None: - dtype = floatx() - if shape is None: - shape = () - np_value = value * np.ones(shape) - const = T.constant(np_value, - dtype=dtype, - name=_prepare_name(name, 'constant')) - const._keras_shape = shape - const._uses_learning_phase = False - return const - - -def is_keras_tensor(x): - """Returns whether `x` is a Keras tensor. - - A "Keras tensor" is a tensor that was returned by a Keras layer, - (`Layer` class) or by `Input`. - - # Arguments - x: A candidate tensor. - - # Returns - A boolean: Whether the argument is a Keras tensor. - - # Raises - ValueError: In case `x` is not a symbolic tensor. - - # Examples - ```python - >>> from keras import backend as K - >>> from keras.layers import Input, Dense - >>> np_var = numpy.array([1, 2]) - >>> K.is_keras_tensor(np_var) # A numpy array is not a symbolic tensor. - ValueError - >>> k_var = tf.placeholder('float32', shape=(1,1)) - >>> # A variable indirectly created outside of keras is not a Keras tensor. - >>> K.is_keras_tensor(k_var) - False - >>> keras_var = K.variable(np_var) - >>> # A variable created with the keras backend is not a Keras tensor. - >>> K.is_keras_tensor(keras_var) - False - >>> keras_placeholder = K.placeholder(shape=(2, 4, 5)) - >>> # A placeholder is not a Keras tensor. - >>> K.is_keras_tensor(keras_placeholder) - False - >>> keras_input = Input([10]) - >>> K.is_keras_tensor(keras_input) # An Input is a Keras tensor. - True - >>> keras_layer_output = Dense(10)(keras_input) - >>> # Any Keras layer output is a Keras tensor. - >>> K.is_keras_tensor(keras_layer_output) - True - ``` - """ - if not is_tensor(x): - raise ValueError('Unexpectedly found an instance of type `' + - str(type(x)) + '`. ' - 'Expected a symbolic tensor instance.') - return hasattr(x, '_keras_history') - - -def is_tensor(x): - return isinstance(x, (T.TensorVariable, - T.sharedvar.TensorSharedVariable)) - - -def placeholder(shape=None, ndim=None, dtype=None, sparse=False, name=None): - """Instantiate an input data placeholder variable. - """ - if dtype is None: - dtype = floatx() - if shape is None and ndim is None: - raise ValueError('Specify either a shape or ndim value.') - if shape is not None: - ndim = len(shape) - else: - shape = tuple([None for _ in range(ndim)]) - - name = _prepare_name(name, 'placeholder') - broadcast = (False,) * ndim - if sparse: - _assert_sparse_module() - x = th_sparse_module.csr_matrix(name=name, dtype=dtype) - else: - x = T.TensorType(dtype, broadcast)(name) - x._keras_shape = shape - x._uses_learning_phase = False - x._theano_placeholder = True - return x - - -def is_placeholder(x): - """Returns whether `x` is a placeholder. - - # Arguments - x: A candidate placeholder. - - # Returns - Boolean. - """ - return hasattr(x, '_theano_placeholder') and x._theano_placeholder - - -def shape(x): - """Returns the shape of a tensor. - - Warning: type returned will be different for - Theano backend (Theano tensor type) and TF backend (TF TensorShape). - """ - return x.shape - - -def int_shape(x): - """Returns the shape of a Keras tensor or a Keras variable as a tuple of - integers or None entries. - - # Arguments - x: Tensor or variable. - - # Returns - A tuple of integers (or None entries). - """ - if hasattr(x, '_keras_shape'): - return x._keras_shape - else: - return None - - -def ndim(x): - return x.ndim - - -def dtype(x): - return x.dtype - - -def eval(x): - """Returns the value of a tensor. - """ - return to_dense(x).eval() - - -def zeros(shape, dtype=None, name=None): - """Instantiates an all-zeros variable. - """ - if dtype is None: - dtype = floatx() - return variable(np.zeros(shape), dtype, name) - - -def ones(shape, dtype=None, name=None): - """Instantiates an all-ones variable. - """ - if dtype is None: - dtype = floatx() - return variable(np.ones(shape), dtype, name) - - -def eye(size, dtype=None, name=None): - """Instantiates an identity matrix. - """ - if dtype is None: - dtype = floatx() - if isinstance(size, (list, tuple)): - n, m = size - else: - n, m = size, size - return variable(np.eye(n, m), dtype, name) - - -def ones_like(x, dtype=None, name=None): - if dtype is None: - dtype = floatx() - return T.ones_like(x, dtype=dtype) - - -def zeros_like(x, dtype=None, name=None): - if dtype is None: - dtype = floatx() - return T.zeros_like(x, dtype=dtype) - - -def identity(x, name=None): - """Returns a tensor with the same content as the input tensor. - - # Arguments - x: The input tensor. - name: String, name for the variable to create. - - # Returns - A tensor of the same shape, type and content. - """ - return x.copy(name=name) - - -def random_uniform_variable(shape, low, high, dtype=None, name=None): - return variable(np.random.uniform(low=low, high=high, size=shape), - dtype=dtype, name=name) - - -def random_normal_variable(shape, mean, scale, dtype=None, name=None): - return variable(np.random.normal(loc=0.0, scale=scale, size=shape), - dtype=dtype, name=name) - - -def count_params(x): - """Returns the number of scalars in a tensor. - - Return: numpy integer. - """ - # We don't want those compilation to show up in Theano profiler. - f = theano.function([], x.shape, profile=False) - return np.prod(f()) - - -def cast(x, dtype): - return T.cast(x, dtype) - - -# UPDATES OPS - - -def update(x, new_x): - return (x, new_x) - - -def update_add(x, increment): - return (x, x + increment) - - -def update_sub(x, decrement): - return (x, x - decrement) - - -def moving_average_update(variable, value, momentum): - return (variable, variable * momentum + value * (1. - momentum)) - - -# LINEAR ALGEBRA - -""" -Assumed overridden: -+, -, /, *, +=, -=, *=, /= -""" - - -def dot(x, y): - if is_sparse(x): - out = th_sparse_module.basic.structured_dot(x, y) - else: - out = T.dot(x, y) - if hasattr(x, '_keras_shape') and hasattr(y, '_keras_shape'): - x_shape = list(x._keras_shape) - y_shape = list(y._keras_shape) - if len(x_shape) > 0: - x_shape.pop() - if len(y_shape) == 1: - y_shape.pop() - elif len(y_shape) > 1: - y_shape.pop(-2) - out._keras_shape = tuple(x_shape + y_shape) - return out - - -def batch_dot(x, y, axes=None): - """Batchwise dot product. - - batch_dot results in a tensor with less dimensions than the input. - If the number of dimensions is reduced to 1, we use `expand_dims` to - make sure that ndim is at least 2. - - # Arguments - x, y: tensors with ndim >= 2 - axes: list (or single) int with target dimensions - - # Returns - A tensor with shape equal to the concatenation of x's shape - (less the dimension that was summed over) and y's shape - (less the batch dimension and the dimension that was summed over). - If the final rank is 1, we reshape it to (batch_size, 1). - - # Examples - Assume x = [[1, 2], [3, 4]] and y = [[5, 6], [7, 8]] - batch_dot(x, y, axes=1) = [[17, 53]] which is the main diagonal - of x.dot(y.T), although we never have to calculate the off-diagonal - elements. - - Shape inference: - Let x's shape be (100, 20) and y's shape be (100, 30, 20). - If dot_axes is (1, 2), to find the output shape of resultant tensor, - loop through each dimension in x's shape and y's shape: - x.shape[0] : 100 : append to output shape - x.shape[1] : 20 : do not append to output shape, - dimension 1 of x has been summed over. (dot_axes[0] = 1) - y.shape[0] : 100 : do not append to output shape, - always ignore first dimension of y - y.shape[1] : 30 : append to output shape - y.shape[2] : 20 : do not append to output shape, - dimension 2 of y has been summed over. (dot_axes[1] = 2) - - output_shape = (100, 30) - """ - if isinstance(axes, int): - axes = (axes, axes) - if axes is None: - # behaves like tf.batch_matmul as default - if y.ndim == 2: - axes = [x.ndim - 1, y.ndim - 1] - else: - axes = [x.ndim - 1, y.ndim - 2] - if py_any([isinstance(a, (list, tuple)) for a in axes]): - raise ValueError('Multiple target dimensions are not supported. ' + - 'Expected: None, int, (int, int), ' + - 'Provided: ' + str(axes)) - if isinstance(axes, tuple): - axes = list(axes) - - if 0 in axes: - raise ValueError('Can not perform batch_dot over axis 0.' - 'If your inputs are not batched,' - ' add a dummy batch dimension to your ' - 'inputs using K.expand_dims(x, 0)') - - out = T.batched_tensordot(x, y, axes=axes) - if ndim(out) == 1: - out = expand_dims(out, 1) - - if hasattr(x, '_keras_shape') and hasattr(y, '_keras_shape'): - shape = [] - for axis in range(len(x._keras_shape)): - if axis != axes[0]: - shape.append(x._keras_shape[axis]) - for axis in range(1, len(y._keras_shape)): - if axis != axes[1]: - shape.append(y._keras_shape[axis]) - if len(shape) == 1: - shape.append(1) # Expand dims if ndim == 1 - out._keras_shape = tuple(shape) - return out - - -def transpose(x): - y = T.transpose(x) - if hasattr(x, '_keras_shape'): - y._keras_shape = tuple(reversed(x._keras_shape)) - return y - - -def gather(reference, indices): - """Retrieves the elements of indices `indices` in the tensor `reference`. - - # Arguments - reference: A tensor. - indices: An integer tensor of indices. - - # Returns - A tensor of same type as `reference`. - """ - y = reference[indices] - if hasattr(reference, '_keras_shape') and hasattr(indices, '_keras_shape'): - y._keras_shape = indices._keras_shape + reference._keras_shape[1:] - return y - - -# ELEMENT-WISE OPERATIONS - - -def max(x, axis=None, keepdims=False): - return T.max(x, axis=axis, keepdims=keepdims) - - -def min(x, axis=None, keepdims=False): - return T.min(x, axis=axis, keepdims=keepdims) - - -def sum(x, axis=None, keepdims=False): - """Sum of the values in a tensor, alongside the specified axis. - """ - return T.sum(x, axis=axis, keepdims=keepdims) - - -def prod(x, axis=None, keepdims=False): - """Multiply the values in a tensor, alongside the specified axis. - """ - return T.prod(x, axis=axis, keepdims=keepdims) - - -def cumsum(x, axis=0): - """Cumulative sum of the values in a tensor, alongside the specified axis. - - # Arguments - x: A tensor or variable. - axis: An integer, the axis to compute the sum. - - # Returns - A tensor of the cumulative sum of values of `x` along `axis`. - """ - return T.extra_ops.cumsum(x, axis=axis) - - -def cumprod(x, axis=0): - """Cumulative product of the values in a tensor, alongside the specified axis. - - # Arguments - x: A tensor or variable. - axis: An integer, the axis to compute the product. - - # Returns - A tensor of the cumulative product of values of `x` along `axis`. - """ - return T.extra_ops.cumprod(x, axis=axis) - - -def mean(x, axis=None, keepdims=False): - """Mean of a tensor, alongside the specified axis. - """ - dtype = None - # bool is available since theano v0.9dev - if 'int' in x.dtype or x.dtype == 'bool': - dtype = floatx() - return T.mean(x, axis=axis, keepdims=keepdims, dtype=dtype) - - -def std(x, axis=None, keepdims=False): - return T.std(x, axis=axis, keepdims=keepdims) - - -def var(x, axis=None, keepdims=False): - return T.var(x, axis=axis, keepdims=keepdims) - - -def any(x, axis=None, keepdims=False): - """Bitwise reduction (logical OR). - """ - y = T.any(x, axis=axis, keepdims=keepdims) - y = _set_keras_shape_for_reduction(x, y, axis, keepdims) - return y - - -def all(x, axis=None, keepdims=False): - """Bitwise reduction (logical AND). - """ - y = T.all(x, axis=axis, keepdims=keepdims) - y = _set_keras_shape_for_reduction(x, y, axis, keepdims) - return y - - -def _set_keras_shape_for_reduction(x, y, axis, keepdims): - if hasattr(x, '_keras_shape'): - if axis is None: - y._keras_shape = (1,) * len(x._keras_shape) if keepdims else (1,) - else: - if isinstance(axis, int): - axis_list = [axis] - else: - axis_list = list(set(int(a) for a in axis)) - keras_shape_list = list(x._keras_shape) - if keepdims: - for a in axis_list: - keras_shape_list[a] = 1 - else: - for a in axis_list[::-1]: - keras_shape_list.pop(a) - if not keras_shape_list: - keras_shape_list = (1,) - y._keras_shape = tuple(keras_shape_list) - return y - - -def argmax(x, axis=-1): - return T.argmax(x, axis=axis, keepdims=False) - - -def argmin(x, axis=-1): - return T.argmin(x, axis=axis, keepdims=False) - - -def square(x): - return T.sqr(x) - - -def abs(x): - return T.abs_(x) - - -def sqrt(x): - x = T.clip(x, 0., np.inf) - return T.sqrt(x) - - -def exp(x): - return T.exp(x) - - -def log(x): - return T.log(x) - - -def logsumexp(x, axis=None, keepdims=False): - """Computes log(sum(exp(elements across dimensions of a tensor))). - - This function is more numerically stable than log(sum(exp(x))). - It avoids overflows caused by taking the exp of large inputs and - underflows caused by taking the log of small inputs. - - # Arguments - x: A tensor or variable. - axis: An integer, the axis to reduce over. - keepdims: A boolean, whether to keep the dimensions or not. - If `keepdims` is `False`, the rank of the tensor is reduced - by 1. If `keepdims` is `True`, the reduced dimension is - retained with length 1. - - # Returns - The reduced tensor. - """ - # Theano has a built-in optimization for logsumexp - # (see https://github.com/Theano/Theano/pull/4736) - # so we can just write the expression directly: - return T.log(T.sum(T.exp(x), axis=axis, keepdims=keepdims)) - - -def round(x): - return T.round(x, mode='half_to_even') - - -def sign(x): - return T.sgn(x) - - -def pow(x, a): - return T.pow(x, a) - - -def clip(x, min_value, max_value): - if (isinstance(min_value, (int, float)) and - isinstance(max_value, (int, float))): - if max_value < min_value: - max_value = min_value - if min_value is None: - min_value = -np.inf - if max_value is None: - max_value = np.inf - return T.clip(x, min_value, max_value) - - -def equal(x, y): - return T.eq(x, y) - - -def not_equal(x, y): - z = T.neq(x, y) - if hasattr(x, '_keras_shape'): - z._keras_shape = x._keras_shape - elif hasattr(y, '_keras_shape'): - z._keras_shape = y._keras_shape - return z - - -def greater(x, y): - return T.gt(x, y) - - -def greater_equal(x, y): - return T.ge(x, y) - - -def less(x, y): - return T.lt(x, y) - - -def less_equal(x, y): - return T.le(x, y) - - -def maximum(x, y): - return T.maximum(x, y) - - -def minimum(x, y): - return T.minimum(x, y) - - -def sin(x): - return T.sin(x) - - -def cos(x): - return T.cos(x) - - -def normalize_batch_in_training(x, gamma, beta, - reduction_axes, epsilon=1e-3): - """Computes mean and std for batch then apply batch_normalization on batch. - """ - # TODO remove this if statement when Theano without - # T.nnet.bn.batch_normalization_train is deprecated - if not hasattr(T.nnet.bn, 'batch_normalization_train'): - return _old_normalize_batch_in_training( - x, gamma, beta, reduction_axes, epsilon) - - if gamma is None: - if beta is None: - gamma = ones_like(x) - else: - gamma = ones_like(beta) - if beta is None: - if gamma is None: - beta = zeros_like(x) - beta = zeros_like(gamma) - - normed, mean, stdinv = T.nnet.bn.batch_normalization_train( - x, gamma, beta, reduction_axes, epsilon) - - return normed, mean, T.inv(stdinv ** 2) - - -def batch_normalization(x, mean, var, beta, gamma, axis=-1, epsilon=1e-3): - """Apply batch normalization on x given mean, var, beta and gamma. - """ - # TODO remove this if statement when Theano without - # T.nnet.bn.batch_normalization_test is deprecated - if not hasattr(T.nnet.bn, 'batch_normalization_test'): - return _old_batch_normalization(x, mean, var, beta, gamma, epsilon) - - if gamma is None: - gamma = ones_like(var) - if beta is None: - beta = zeros_like(mean) - - if mean.ndim == 1: - # based on TensorFlow's default: normalize along rightmost dimension - reduction_axes = list(range(x.ndim - 1)) - else: - reduction_axes = [i for i in range(x.ndim) if mean.broadcastable[i]] - - return T.nnet.bn.batch_normalization_test( - x, gamma, beta, mean, var, reduction_axes, epsilon) - - -# TODO remove this function when Theano without -# T.nnet.bn.batch_normalization_train is deprecated -def _old_normalize_batch_in_training(x, gamma, beta, reduction_axes, - epsilon=1e-3): # pragma: no cover - """Computes mean and std for batch then apply batch_normalization on batch. - """ - if gamma is None: - gamma = ones_like(x) - if beta is None: - beta = zeros_like(x) - - dev = theano.config.device - use_cudnn = (ndim(x) < 5 and - reduction_axes == [0, 2, 3] and - (dev.startswith('cuda') or dev.startswith('gpu'))) - if use_cudnn: - broadcast_beta = beta.dimshuffle('x', 0, 'x', 'x') - broadcast_gamma = gamma.dimshuffle('x', 0, 'x', 'x') - try: - trained = theano.sandbox.cuda.dnn.dnn_batch_normalization_train( - x, broadcast_gamma, broadcast_beta, 'spatial', epsilon) - normed, mean, stdinv = trained - normed = theano.tensor.as_tensor_variable(normed) - mean = theano.tensor.as_tensor_variable(mean) - stdinv = theano.tensor.as_tensor_variable(stdinv) - var = T.inv(stdinv ** 2) - return normed, T.flatten(mean), T.flatten(var) - except AttributeError: - pass - - var = x.var(reduction_axes) - mean = x.mean(reduction_axes) - - target_shape = [] - for axis in range(ndim(x)): - if axis in reduction_axes: - target_shape.append(1) - else: - target_shape.append(x.shape[axis]) - target_shape = T.stack(*target_shape) - - broadcast_mean = T.reshape(mean, target_shape) - broadcast_var = T.reshape(var, target_shape) - broadcast_beta = T.reshape(beta, target_shape) - broadcast_gamma = T.reshape(gamma, target_shape) - normed = batch_normalization(x, broadcast_mean, broadcast_var, - broadcast_beta, broadcast_gamma, - epsilon) - return normed, mean, var - - -# TODO remove this if statement when Theano without -# T.nnet.bn.batch_normalization_test is deprecated -def _old_batch_normalization(x, mean, var, beta, gamma, - epsilon=1e-3): # pragma: no cover - """Apply batch normalization on x given mean, var, beta and gamma. - """ - if gamma is None: - gamma = ones_like(var) - if beta is None: - beta = zeros_like(mean) - - if mean.ndim == 1 and x.ndim > 1: - # in TensorFlow's batch_normalization, if the parameters are vectors - # the batch normalization should be applied along the rightmost axis. - # Theano expects the parameters to always have x.ndim dimensions. - shuffle_pattern = ['x'] * (x.ndim - 1) + [0] - mean = mean.dimshuffle(shuffle_pattern) - var = var.dimshuffle(shuffle_pattern) - beta = beta.dimshuffle(shuffle_pattern) - gamma = gamma.dimshuffle(shuffle_pattern) - - ndim = x.ndim - dev = theano.config.device - use_cudnn = ndim < 5 and (dev.startswith('cuda') or dev.startswith('gpu')) - if use_cudnn: - try: - axis = mean.broadcastable.index(False) - if axis != 1: - shuffle_pattern = list(range(ndim)) - shuffle_pattern[1] = shuffle_pattern[axis] - shuffle_pattern[axis] = 1 - result = theano.sandbox.cuda.dnn.dnn_batch_normalization_test( - x.dimshuffle(shuffle_pattern), - gamma.dimshuffle(shuffle_pattern), - beta.dimshuffle(shuffle_pattern), - mean.dimshuffle(shuffle_pattern), - var.dimshuffle(shuffle_pattern), - 'spatial', epsilon).dimshuffle(shuffle_pattern) - else: - result = theano.sandbox.cuda.dnn.dnn_batch_normalization_test( - x, gamma, beta, mean, var, 'spatial', epsilon) - return theano.tensor.as_tensor_variable(result) - except AttributeError: - pass - except ValueError: - pass - return T.nnet.bn.batch_normalization(x, gamma, beta, mean, sqrt(var + epsilon), - mode='high_mem') - - -# SHAPE OPERATIONS - -def concatenate(tensors, axis=-1): - if py_all([is_sparse(x) for x in tensors]): - axis = axis % ndim(tensors[0]) - if axis == 0: - output = th_sparse_module.basic.vstack(tensors, format='csr') - elif axis == 1: - output = th_sparse_module.basic.hstack(tensors, format='csr') - else: - raise ValueError('Invalid concat axis for sparse matrix:', axis) - else: - output = T.concatenate([to_dense(x) for x in tensors], axis=axis) - - if py_all([hasattr(tensor, '_keras_shape') for tensor in tensors]): - input_shapes = [tensor._keras_shape for tensor in tensors] - output_shape = list(input_shapes[0]) - for shape in input_shapes[1:]: - if output_shape[axis] is None or shape[axis] is None: - output_shape[axis] = None - break - output_shape[axis] += shape[axis] - output._keras_shape = tuple(output_shape) - - return output - - -def reshape(x, shape): - y = T.reshape(x, shape) - shape = tuple(x if isinstance(x, int) and x > 0 else None for x in shape) - y._keras_shape = shape - if hasattr(x, '_uses_learning_phase'): - y._uses_learning_phase = x._uses_learning_phase - else: - y._uses_learning_phase = False - return y - - -def permute_dimensions(x, pattern): - """Transpose dimensions. - - pattern should be a tuple or list of - dimension indices, e.g. [0, 2, 1]. - """ - pattern = tuple(pattern) - y = x.dimshuffle(pattern) - if hasattr(x, '_keras_shape'): - y._keras_shape = tuple(np.asarray(x._keras_shape)[list(pattern)]) - return y - - -def repeat_elements(x, rep, axis): - """Repeat the elements of a tensor along an axis, like np.repeat. - - If x has shape (s1, s2, s3) and axis=1, the output - will have shape (s1, s2 * rep, s3). - """ - y = T.repeat(x, rep, axis=axis) - if hasattr(x, '_keras_shape'): - y._keras_shape = list(x._keras_shape) - repeat_dim = x._keras_shape[axis] - if repeat_dim is not None: - y._keras_shape[axis] = repeat_dim * rep - y._keras_shape = tuple(y._keras_shape) - return y - - -def resize_images(x, - height_factor, - width_factor, - data_format, - interpolation='nearest'): - """Resize the images contained in a 4D tensor of shape - - [batch, channels, height, width] (for 'channels_first' data_format) - - [batch, height, width, channels] (for 'channels_last' data_format) - by a factor of (height_factor, width_factor). Both factors should be - positive integers. - """ - if data_format == 'channels_first': - axis_1 = 2 - axis_2 = 3 - elif data_format == 'channels_last': - axis_1 = 1 - axis_2 = 2 - else: - raise ValueError('Invalid data_format:', data_format) - - if interpolation == 'nearest': - output = repeat_elements(x, height_factor, axis=axis_1) - output = repeat_elements(output, width_factor, axis=axis_2) - elif interpolation == 'bilinear': - if not (height_factor == width_factor == 2): - raise NotImplementedError( - 'Bilinear upscaling with factors other than (2, 2)' - 'is not available when using the Theano backend.') - if data_format == 'channels_last': - output = permute_dimensions(x, [0, 3, 1, 2]) - else: - output = x - output = T.nnet.abstract_conv.bilinear_upsampling(output, - ratio=height_factor) - if data_format == 'channels_last': - output = permute_dimensions(output, [0, 2, 3, 1]) - if hasattr(x, '_keras_shape'): - output._keras_shape = list(x._keras_shape) - output._keras_shape[axis_1] *= height_factor - output._keras_shape[axis_2] *= width_factor - output._keras_shape = tuple(output._keras_shape) - else: - raise ValueError( - 'interpolation should be one of "nearest" or "bilinear".') - - return output - - -def resize_volumes(x, depth_factor, height_factor, width_factor, data_format): - """Resize the volume contained in a 5D tensor of shape - - [batch, channels, depth, height, width] (for 'channels_first' data_format) - - [batch, depth, height, width, channels] (for 'channels_last' data_format) - by a factor of (depth_factor, height_factor, width_factor). - Both factors should be positive integers. - """ - if data_format == 'channels_first': - output = repeat_elements(x, depth_factor, axis=2) - output = repeat_elements(output, height_factor, axis=3) - output = repeat_elements(output, width_factor, axis=4) - return output - elif data_format == 'channels_last': - output = repeat_elements(x, depth_factor, axis=1) - output = repeat_elements(output, height_factor, axis=2) - output = repeat_elements(output, width_factor, axis=3) - return output - else: - raise ValueError('Invalid data_format:', data_format) - - -def repeat(x, n): - """Repeat a 2D tensor. - - If x has shape (samples, dim) and n=2, - the output will have shape (samples, 2, dim). - """ - assert x.ndim == 2 - y = x.dimshuffle((0, 'x', 1)) - y = T.extra_ops.repeat(y, n, axis=1) - if hasattr(x, '_keras_shape'): - shape = list(x._keras_shape) - shape.insert(1, n) - y._keras_shape = tuple(shape) - - return y - - -def arange(start, stop=None, step=1, dtype='int32'): - """Creates a 1-D tensor containing a sequence of integers. - - The function arguments use the same convention as - Theano's arange: if only one argument is provided, - it is in fact the "stop" argument. - - The default type of the returned tensor is 'int32' to - match TensorFlow's default. - """ - return T.arange(start, stop=stop, step=step, dtype=dtype) - - -def tile(x, n): - if isinstance(n, int): - n = (n,) - elif isinstance(n, list): - n = tuple(n) - - y = T.tile(x, n) - shape = int_shape(x) - if shape is None: - return y - elif len(n) < len(shape): # Padding the axis - n = tuple([1 for _ in range(len(shape) - len(n))]) + n - elif len(n) != len(shape): - raise NotImplementedError - - y._keras_shape = tuple([None if a is None else a * b - for (a, b) in zip(shape, n)]) - return y - - -def flatten(x): - y = T.flatten(x) - if hasattr(x, '_keras_shape'): - if None in x._keras_shape: - y._keras_shape = (None,) - else: - y._keras_shape = (np.prod(x._keras_shape), ) - return y - - -def batch_flatten(x): - """Turn a n-D tensor into a 2D tensor where - the first dimension is conserved. - """ - y = T.reshape(x, (x.shape[0], T.prod(x.shape[1:]))) - if hasattr(x, '_keras_shape'): - if None in x._keras_shape[1:]: - y._keras_shape = (x._keras_shape[0], None) - else: - y._keras_shape = (x._keras_shape[0], np.prod(x._keras_shape[1:])) - return y - - -def expand_dims(x, axis=-1): - """Add a 1-sized dimension at index "dim". - """ - pattern = [i for i in range(x.type.ndim)] - if axis < 0: - if x.type.ndim == 0: - axis = 0 - else: - axis = axis % x.type.ndim + 1 - pattern.insert(axis, 'x') - y = x.dimshuffle(pattern) - if hasattr(x, '_keras_shape'): - shape = list(x._keras_shape) - shape.insert(axis, 1) - y._keras_shape = tuple(shape) - return y - - -def squeeze(x, axis): - """Remove a 1-dimension from the tensor at index "axis". - """ - shape = list(x.shape) - shape.pop(axis) - y = T.reshape(x, tuple(shape)) - if hasattr(x, '_keras_shape'): - kshape = list(x._keras_shape) - kshape.pop(axis) - y._keras_shape = tuple(kshape) - return y - - -def temporal_padding(x, padding=(1, 1)): - """Pad the middle dimension of a 3D tensor - with "padding" zeros left and right. - - Apologies for the inane API, but Theano makes this - really hard. - """ - assert len(padding) == 2 - input_shape = x.shape - output_shape = (input_shape[0], - input_shape[1] + padding[0] + padding[1], - input_shape[2]) - output = T.zeros(output_shape) - result = T.set_subtensor( - output[:, padding[0]:x.shape[1] + padding[0], :], x) - if hasattr(x, '_keras_shape'): - result._keras_shape = (x._keras_shape[0], - x._keras_shape[1] + py_sum(padding), - x._keras_shape[2]) - return result - - -def spatial_2d_padding(x, padding=((1, 1), (1, 1)), data_format=None): - """Pad the 2nd and 3rd dimensions of a 4D tensor - with "padding[0]" and "padding[1]" (resp.) zeros left and right. - """ - assert len(padding) == 2 - assert len(padding[0]) == 2 - assert len(padding[1]) == 2 - top_pad, bottom_pad = padding[0] - left_pad, right_pad = padding[1] - data_format = normalize_data_format(data_format) - - input_shape = x.shape - if data_format == 'channels_first': - output_shape = (input_shape[0], - input_shape[1], - input_shape[2] + top_pad + bottom_pad, - input_shape[3] + left_pad + right_pad) - output = T.zeros(output_shape) - indices = (py_slice(None), - py_slice(None), - py_slice(top_pad, input_shape[2] + top_pad), - py_slice(left_pad, input_shape[3] + left_pad)) - - else: - output_shape = (input_shape[0], - input_shape[1] + top_pad + bottom_pad, - input_shape[2] + left_pad + right_pad, - input_shape[3]) - output = T.zeros(output_shape) - indices = (py_slice(None), - py_slice(top_pad, input_shape[1] + top_pad), - py_slice(left_pad, input_shape[2] + left_pad), - py_slice(None)) - y = T.set_subtensor(output[indices], x) - if hasattr(x, '_keras_shape'): - if data_format == 'channels_first': - if x._keras_shape[2] is not None: - h = x._keras_shape[2] + top_pad + bottom_pad - else: - h = None - if x._keras_shape[3] is not None: - w = x._keras_shape[3] + left_pad + right_pad - else: - w = None - output_keras_shape = (x._keras_shape[0], - x._keras_shape[1], - h, - w) - else: - if x._keras_shape[1] is not None: - h = x._keras_shape[1] + top_pad + bottom_pad - else: - h = None - if x._keras_shape[2] is not None: - w = x._keras_shape[2] + left_pad + right_pad - else: - w = None - output_keras_shape = (x._keras_shape[0], - h, - w, - x._keras_shape[3]) - y._keras_shape = output_keras_shape - return y - - -def spatial_3d_padding(x, padding=((1, 1), (1, 1), (1, 1)), data_format=None): - """Pad the 2nd, 3rd and 4th dimensions of a 5D tensor - with "padding[0]", "padding[1]" and "padding[2]" (resp.) zeros left and right. - """ - data_format = normalize_data_format(data_format) - - input_shape = x.shape - if data_format == 'channels_first': - output_shape = (input_shape[0], - input_shape[1], - input_shape[2] + padding[0][0] + padding[0][1], - input_shape[3] + padding[1][0] + padding[1][1], - input_shape[4] + padding[2][0] + padding[2][1]) - output = T.zeros(output_shape) - indices = (py_slice(None), - py_slice(None), - py_slice(padding[0][0], input_shape[2] + padding[0][0]), - py_slice(padding[1][0], input_shape[3] + padding[1][0]), - py_slice(padding[2][0], input_shape[4] + padding[2][0])) - - else: - output_shape = (input_shape[0], - input_shape[1] + padding[0][0] + padding[0][1], - input_shape[2] + padding[1][0] + padding[1][1], - input_shape[3] + padding[2][0] + padding[2][1], - input_shape[4]) - output = T.zeros(output_shape) - indices = (py_slice(None), - py_slice(padding[0][0], input_shape[1] + padding[0][0]), - py_slice(padding[1][0], input_shape[2] + padding[1][0]), - py_slice(padding[2][0], input_shape[3] + padding[2][0]), - py_slice(None)) - y = T.set_subtensor(output[indices], x) - if hasattr(x, '_keras_shape'): - if data_format == 'channels_first': - if x._keras_shape[2] is not None: - h = x._keras_shape[2] + padding[0][0] + padding[0][1] - else: - h = None - if x._keras_shape[3] is not None: - w = x._keras_shape[3] + padding[1][0] + padding[1][1] - else: - w = None - if x._keras_shape[4] is not None: - d = x._keras_shape[4] + padding[2][0] + padding[2][1] - else: - d = None - output_keras_shape = (x._keras_shape[0], - x._keras_shape[1], - h, - w, - d) - else: - if x._keras_shape[1] is not None: - h = x._keras_shape[1] + padding[0][0] + padding[0][1] - else: - h = None - if x._keras_shape[2] is not None: - w = x._keras_shape[2] + padding[1][0] + padding[1][1] - else: - w = None - if x._keras_shape[3] is not None: - d = x._keras_shape[3] + padding[2][0] + padding[2][1] - else: - d = None - output_keras_shape = (x._keras_shape[0], - h, - w, - d, - x._keras_shape[4]) - y._keras_shape = output_keras_shape - return y - - -def stack(x, axis=0): - return T.stack(x, axis=axis) - - -def one_hot(indices, num_classes): - """Input: nD integer tensor of shape (batch_size, dim1, dim2, ... dim(n-1)) - Output: (n + 1)D one hot representation of the input - with shape (batch_size, dim1, dim2, ... dim(n-1), num_classes) - """ - input_shape = tuple((indices.shape[i] for i in range(indices.ndim))) - indices = T.flatten(indices) - oh = T.extra_ops.to_one_hot(indices, num_classes) - oh = T.reshape(oh, input_shape + (num_classes,)) - return oh - - -def reverse(x, axes): - """Reverse a tensor along the specified axes - """ - if isinstance(axes, int): - axes = [axes] - elif isinstance(axes, tuple): - axes = list(axes) - for i in range(len(axes)): - if axes[i] == -1: - axes[i] = x.ndim - 1 - slices = [] - for i in range(x.ndim): - if i in axes: - slices.append(py_slice(None, None, -1)) - else: - slices.append(py_slice(None, None, None)) - return x[slices] - - -def slice(x, start, size): - if not (len(int_shape(x)) == len(start) == len(size)): - raise ValueError('The dimension and the size of indices should match.') - out = x[tuple([py_slice(i, i + j) for (i, j) in zip(start, size)])] - out._keras_shape = tuple(size) - return out - - -def pattern_broadcast(x, broadcastable): - return T.patternbroadcast(x, broadcastable) - -# VALUE MANIPULATION - - -def get_value(x): - if not hasattr(x, 'get_value'): - raise TypeError('`get_value` can only be called on a variable. ' - 'If you have an expression instead, use `eval()`.') - return x.get_value() - - -def batch_get_value(xs): - """Returns the value of more than one tensor variable, - as a list of Numpy arrays. - """ - return [get_value(x) for x in xs] - - -def set_value(x, value): - x.set_value(np.asarray(value, dtype=x.dtype)) - - -def batch_set_value(tuples): - for x, value in tuples: - x.set_value(np.asarray(value, dtype=x.dtype)) - - -def get_variable_shape(x): - return x.get_value(borrow=True, return_internal_type=True).shape - - -def print_tensor(x, message=''): - """Print the message and the tensor when evaluated and return the same - tensor. - """ - p_op = Print(message) - return p_op(x) - - -# GRAPH MANIPULATION - -class Function(object): - - def __init__(self, inputs, outputs, updates=[], name=None, **kwargs): - unique_variables_to_update = {} - for v, nv in updates: - if v not in unique_variables_to_update: - unique_variables_to_update[v] = nv - updates = unique_variables_to_update.items() - self.function = theano.function(inputs, outputs, updates=updates, - allow_input_downcast=True, - on_unused_input='ignore', - name=name, - **kwargs) - self.name = name - - def __call__(self, inputs): - assert isinstance(inputs, (list, tuple)) - return self.function(*inputs) - - -def _raise_invalid_arg(key): - msg = 'Invalid argument "%s" passed to K.function with Theano backend' % key - raise ValueError(msg) - - -def function(inputs, outputs, updates=[], **kwargs): - if len(kwargs) > 0: - for key in kwargs.keys(): - if not has_arg(theano.function, key, True): - _raise_invalid_arg(key) - return Function(inputs, outputs, updates=updates, **kwargs) - - -def gradients(loss, variables): - return T.grad(loss, variables) - - -def stop_gradient(variables): - """Returns `variables` but with zero gradient w.r.t. every other variable. - - # Arguments - variables: tensor or list of tensors to consider constant with respect - to any other variable. - - # Returns - A single tensor or a list of tensors (depending on the passed argument) - that has constant gradient with respect to any other variable. - """ - if isinstance(variables, (list, tuple)): - return map(theano.gradient.disconnected_grad, variables) - else: - return theano.gradient.disconnected_grad(variables) - - -# CONTROL FLOW - -def rnn(step_function, inputs, initial_states, - go_backwards=False, mask=None, constants=None, - unroll=False, input_length=None): - """Iterates over the time dimension of a tensor. - - # Arguments - step_function: - Parameters: - inputs: Tensor with shape (samples, ...) (no time dimension), - representing input for the batch of samples at a certain - time step. - states: List of tensors. - Returns: - outputs: Tensor with shape (samples, ...) (no time dimension), - new_states: List of tensors, same length and shapes - as 'states'. - inputs: Tensor of temporal data of shape (samples, time, ...) - (at least 3D). - initial_states: Tensor with shape (samples, ...) (no time dimension), - containing the initial values for the states used in - the step function. - go_backwards: Boolean. If True, do the iteration over the time - dimension in reverse order and return the reversed sequence. - mask: Binary tensor with shape (samples, time), - with a zero for every element that is masked. - constants: A list of constant values passed at each step. - unroll: Whether to unroll the RNN or to use a symbolic loop - (`while_loop` or `scan` depending on backend). - input_length: Static number of timesteps in the input. - Must be specified if using `unroll`. - - # Returns - A tuple (last_output, outputs, new_states). - - last_output: The latest output of the rnn, of shape `(samples, ...)` - outputs: Tensor with shape `(samples, time, ...)` where each - entry `outputs[s, t]` is the output of the step function - at time `t` for sample `s`. - new_states: List of tensors, latest states returned by - the step function, of shape `(samples, ...)`. - """ - ndim = inputs.ndim - assert ndim >= 3, 'Input should be at least 3D.' - - if unroll: - if input_length is None: - raise ValueError('When specifying `unroll=True`, ' - 'an `input_length` ' - 'must be provided to `rnn`.') - - axes = [1, 0] + list(range(2, ndim)) - inputs = inputs.dimshuffle(axes) - - if constants is None: - constants = [] - - global uses_learning_phase - uses_learning_phase = False - - if mask is not None: - if mask.ndim != 2: - raise ValueError( - 'mask should have `shape=(samples, time)`, ' - 'got {}'.format(mask.shape)) - mask = mask.dimshuffle([1, 0]) - - def get_matching_mask(mask_t, ref_tensor_t): - # tf.where needs its condition tensor - # to be the same shape as its two - # result tensors - ndim = ref_tensor_t.ndim - for _ in range(ndim - 1): - mask_t = expand_dims(mask_t) - add_shape = ref_tensor_t.shape[1:] - reps = T.concatenate([[1], add_shape], 0) - return T.tile(mask_t, reps, ndim=ndim) - - if unroll: - indices = list(range(input_length)) - if go_backwards: - indices = indices[::-1] - - successive_outputs = [] - successive_states = [] - states = initial_states - for i in indices: - output, new_states = step_function( - inputs[i], states + constants) - if getattr(output, '_uses_learning_phase', False): - uses_learning_phase = True - - if len(successive_outputs) == 0: - prev_output = zeros_like(output) - else: - prev_output = successive_outputs[-1] - - output_mask = get_matching_mask(mask[i], output) - output = T.switch(output_mask, output, prev_output) - kept_states = [] - for state, new_state in zip(states, new_states): - state_mask = get_matching_mask(mask[i], state) - kept_states.append(T.switch(state_mask, new_state, state)) - states = kept_states - - successive_outputs.append(output) - successive_states.append(states) - - outputs = T.stack(*successive_outputs) - states = [] - for i in range(len(successive_states[-1])): - new_states = [] - for states_at_step in successive_states: - new_states.append(states_at_step[i]) - states.append(T.stack(*new_states)) - else: - # build an all-zero tensor of shape (samples, output_dim) - initial_output = step_function( - inputs[0], initial_states + constants) - initial_output = initial_output[0] * 0 - # Theano gets confused by broadcasting patterns in the scan op - initial_output = T.unbroadcast(initial_output, 0, 1) - if len(initial_states) > 0: - initial_states[0] = T.unbroadcast(initial_states[0], 0, 1) - - def _step(inputs, mask, output_tm1, *states): - outputs, new_states = step_function(inputs, states) - if getattr(outputs, '_uses_learning_phase', False): - global uses_learning_phase - uses_learning_phase = True - # output previous output if masked. - output_mask = get_matching_mask(mask, outputs) - outputs = T.switch(output_mask, outputs, output_tm1) - return_states = [] - for state, new_state in zip(states, new_states): - state_mask = get_matching_mask(mask, state) - return_states.append( - T.switch(state_mask, new_state, state)) - return [outputs] + return_states - - results, _ = theano.scan( - _step, - sequences=[inputs, mask], - outputs_info=[initial_output] + initial_states, - non_sequences=constants, - go_backwards=go_backwards) - - # deal with Theano API inconsistency - if isinstance(results, list): - outputs = results[0] - states = results[1:] - else: - outputs = results - states = [] - else: - if unroll: - indices = list(range(input_length)) - if go_backwards: - indices = indices[::-1] - - successive_outputs = [] - successive_states = [] - states = initial_states - for i in indices: - outputs, states = step_function(inputs[i], states + constants) - if getattr(outputs, '_uses_learning_phase', False): - uses_learning_phase = True - successive_outputs.append(outputs) - successive_states.append(states) - outputs = T.stack(*successive_outputs) - states = [] - for i in range(len(successive_states[-1])): - states.append(T.stack( - *[states_at_step[i] for states_at_step in successive_states])) - - else: - def _step(inputs, *states): - outputs, new_states = step_function(inputs, states) - if getattr(outputs, '_uses_learning_phase', False): - global uses_learning_phase - uses_learning_phase = True - return [outputs] + new_states - - # Theano likes to make shape==1 dimensions - # in the initial states (outputs_info) broadcastable - if len(initial_states) > 0: - initial_states[0] = T.unbroadcast(initial_states[0], 0, 1) - - results, _ = theano.scan( - _step, - sequences=inputs, - outputs_info=[None] + initial_states, - non_sequences=constants, - go_backwards=go_backwards) - - # deal with Theano API inconsistency - if isinstance(results, list): - outputs = results[0] - states = results[1:] - else: - outputs = results - states = [] - - outputs = T.squeeze(outputs) - last_output = outputs[-1] - - axes = [1, 0] + list(range(2, outputs.ndim)) - outputs = outputs.dimshuffle(axes) - states = [T.squeeze(state[-1]) for state in states] - last_output._uses_learning_phase = uses_learning_phase - return last_output, outputs, states - - -def switch(condition, then_expression, else_expression): - """Switches between two operations depending on a scalar value. - - Note that both `then_expression` and `else_expression` - should be symbolic tensors of the *same shape*. - - # Arguments - condition: scalar tensor (`int` or `bool`). - then_expression: either a tensor, or a callable that returns a tensor. - else_expression: either a tensor, or a callable that returns a tensor. - - # Returns - The selected tensor. - """ - if callable(then_expression): - then_expression = then_expression() - if callable(else_expression): - else_expression = else_expression() - cond_ndim = ndim(condition) - expr_ndim = ndim(then_expression) - if cond_ndim < expr_ndim: - ndim_diff = expr_ndim - cond_ndim - for _ in range(ndim_diff): - condition = expand_dims(condition) - return T.switch(condition, then_expression, else_expression) - - -def in_train_phase(x, alt, training=None): - """Selects `x` in train phase, and `alt` otherwise. - - Note that `alt` should have the *same shape* as `x`. - - # Returns - Either `x` or `alt` based on the `training` flag. - the `training` flag defaults to `K.learning_phase()`. - """ - if training is None: - training = learning_phase() - uses_learning_phase = True - else: - uses_learning_phase = False - - if training is 1 or training is True: - if callable(x): - return x() - else: - return x - - elif training is 0 or training is False: - if callable(alt): - return alt() - else: - return alt - - if callable(x): - x = x() - if callable(alt): - alt = alt() - - # else: assume learning phase is a placeholder tensor. - x = ifelse(training, x, alt) - if uses_learning_phase: - x._uses_learning_phase = True - return x - - -def in_test_phase(x, alt, training=None): - """Selects `x` in test phase, and `alt` otherwise. - Note that `alt` should have the *same shape* as `x`. - - # Returns - Either `x` or `alt` based on `K.learning_phase`. - """ - return in_train_phase(alt, x, training=training) - - -# NN OPERATIONS - -def _assert_has_capability(module, func): - if not hasattr(module, func): - raise EnvironmentError( - 'It looks like like your version of ' - 'Theano is out of date. ' - 'Install the latest version with:\n' - 'pip install git+git://github.com/Theano/Theano.git ' - '--upgrade --no-deps') - - -def elu(x, alpha=1.0): - """ Exponential linear unit - - # Arguments - x: Tensor to compute the activation function for. - alpha: scalar - """ - _assert_has_capability(T.nnet, 'elu') - return T.nnet.elu(x, alpha) - - -def relu(x, alpha=0., max_value=None, threshold=0.): - _assert_has_capability(T.nnet, 'relu') - - if alpha != 0.: - if threshold != 0.: - negative_part = T.nnet.relu(-x + threshold) - else: - negative_part = T.nnet.relu(-x) - - if threshold != 0.: - x = x * T.cast(T.gt(x, threshold), floatx()) - else: - x = T.nnet.relu(x) - - if max_value is not None: - x = T.clip(x, 0.0, max_value) - - if alpha != 0.: - x -= alpha * negative_part - - return x - - -def softmax(x, axis=-1): - if (axis == -1 or axis == x.ndim - 1) and x.ndim == 2: - return T.nnet.softmax(x) - xm = x.max(axis=axis, keepdims=True) - return T.exp(x - xm) / T.exp( - x - xm).sum(axis=axis, keepdims=True) - - -def softplus(x): - return T.nnet.softplus(x) - - -def softsign(x): - return T_softsign(x) - - -def categorical_crossentropy(target, output, from_logits=False, axis=-1): - output_dimensions = list(range(len(int_shape(output)))) - if axis != -1 and axis not in output_dimensions: - raise ValueError( - '{}{}{}'.format( - 'Unexpected channels axis {}. '.format(axis), - 'Expected to be -1 or one of the axes of `output`, ', - 'which has {} dimensions.'.format(len(int_shape(output))))) - # If the channels are not in the last axis, move them to be there: - if axis != -1 and axis != output_dimensions[-1]: - permutation = output_dimensions[:axis] - permutation += output_dimensions[axis + 1:] + [axis] - output = permute_dimensions(output, permutation) - target = permute_dimensions(target, permutation) - if from_logits: - output = T.nnet.softmax(output) - else: - # scale preds so that the class probas of each sample sum to 1 - output /= output.sum(axis=-1, keepdims=True) - # avoid numerical instability with _EPSILON clipping - output = T.clip(output, epsilon(), 1.0 - epsilon()) - return T.nnet.categorical_crossentropy(output, target) - - -def sparse_categorical_crossentropy(target, output, from_logits=False, axis=-1): - output_dimensions = list(range(len(int_shape(output)))) - if axis != -1 and axis not in output_dimensions: - raise ValueError( - '{}{}{}'.format( - 'Unexpected channels axis {}. '.format(axis), - 'Expected to be -1 or one of the axes of `output`, ', - 'which has {} dimensions.'.format(len(int_shape(output))))) - # If the channels are not in the last axis, move them to be there: - if axis != -1 and axis != output_dimensions[-1]: - permutation = output_dimensions[:axis] - permutation += output_dimensions[axis + 1:] + [axis] - output = permute_dimensions(output, permutation) - target = permute_dimensions(target, permutation) - target = T.cast(T.flatten(target), 'int32') - target = T.extra_ops.to_one_hot(target, nb_class=output.shape[-1]) - target = reshape(target, shape(output)) - return categorical_crossentropy(target, output, from_logits, axis=-1) - - -def binary_crossentropy(target, output, from_logits=False): - if from_logits: - output = T.nnet.sigmoid(output) - # avoid numerical instability with _EPSILON clipping - output = T.clip(output, epsilon(), 1.0 - epsilon()) - return T.nnet.binary_crossentropy(output, target) - - -def sigmoid(x): - return T.nnet.sigmoid(x) - - -def hard_sigmoid(x): - return T.nnet.hard_sigmoid(x) - - -def tanh(x): - return T.tanh(x) - - -def dropout(x, level, noise_shape=None, seed=None): - """Sets entries in `x` to zero at random, - while scaling the entire tensor. - - # Arguments - x: tensor - level: fraction of the entries in the tensor - that will be set to 0. - noise_shape: shape for randomly generated keep/drop flags, - must be broadcastable to the shape of `x` - seed: random seed to ensure determinism. - """ - if level < 0. or level >= 1: - raise ValueError('Dropout level must be in interval [0, 1[.') - if seed is None: - seed = np.random.randint(1, 10e6) - if isinstance(noise_shape, list): - noise_shape = tuple(noise_shape) - - rng = RandomStreams(seed=seed) - retain_prob = 1. - level - - if noise_shape is None: - random_tensor = rng.binomial(x.shape, p=retain_prob, dtype=x.dtype) - else: - random_tensor = rng.binomial(noise_shape, p=retain_prob, dtype=x.dtype) - random_tensor = T.patternbroadcast(random_tensor, - [dim == 1 for dim in noise_shape]) - x *= random_tensor - x /= retain_prob - return x - - -def l2_normalize(x, axis=None): - square_sum = T.sum(T.square(x), axis=axis, keepdims=True) - norm = T.sqrt(T.maximum(square_sum, epsilon())) - return x / norm - - -def in_top_k(predictions, targets, k): - """Returns whether the `targets` are in the top `k` `predictions`. - - # Arguments - predictions: A tensor of shape `(batch_size, classes)` and type `float32`. - targets: A 1D tensor of length `batch_size` and type `int32` or `int64`. - k: An `int`, number of top elements to consider. - - # Returns - A 1D tensor of length `batch_size` and type `bool`. - `output[i]` is `True` if `predictions[i, targets[i]]` is within top-`k` - values of `predictions[i]`. - """ - # handle k < 1 and k >= predictions.shape[1] cases to match TF behavior - if k < 1: - # dtype='bool' is only available since Theano 0.9.0 - try: - return T.zeros_like(targets, dtype='bool') - except TypeError: - return T.zeros_like(targets, dtype='int8') - - if k >= int_shape(predictions)[1]: - try: - return T.ones_like(targets, dtype='bool') - except TypeError: - return T.ones_like(targets, dtype='int8') - - predictions_k = T.sort(predictions)[:, -k] - targets_values = predictions[T.arange(targets.shape[0]), targets] - return T.ge(targets_values, predictions_k) - - -# CONVOLUTIONS - -def _preprocess_conv2d_input(x, data_format): - if data_format == 'channels_last': - # TF uses the last dimension as channel dimension, - # instead of the 2nd one. - # TH input shape: (samples, input_depth, rows, cols) - # TF input shape: (samples, rows, cols, input_depth) - x = x.dimshuffle((0, 3, 1, 2)) - return x - - -def _preprocess_conv3d_input(x, data_format): - if data_format == 'channels_last': - # TF uses the last dimension as channel dimension, - # instead of the 2nd one. - # TH input shape: (samples, input_depth, rows, cols, slices) - # TF input shape: (samples, rows, cols, slices, input_depth) - x = x.dimshuffle((0, 4, 1, 2, 3)) - return x - - -def _preprocess_conv2d_kernel(kernel, data_format): - # As of Keras 2.0.0, all kernels are normalized - # on the format `(rows, cols, input_depth, depth)`, - # independently of `data_format`. - # Theano expects `(depth, input_depth, rows, cols)`. - kernel = kernel.dimshuffle((3, 2, 0, 1)) - return kernel - - -def _preprocess_conv2d_depthwise_kernel(kernel, kernel_shape, data_format): - # As of Keras 2.0.0, all kernels are normalized - # on the format `(rows, cols, input_depth, depth)`, - # independently of `data_format`. - # Theano expects `(input_depth * depth, 1, rows, cols)` - # for depthwise convolution. - kernel = kernel[::-1, ::-1, :, :] - kernel = kernel.dimshuffle((2, 3, 0, 1)) - kernel = reshape(kernel, kernel_shape) - return kernel - - -def _preprocess_conv3d_kernel(kernel, data_format): - # As of Keras 2.0.0, all kernels are normalized - # on the format `(space, input_depth, depth)`, - # independently of `data_format`. - # Theano expects `(depth, input_depth, space)`. - kernel = kernel.dimshuffle((4, 3, 0, 1, 2)) - return kernel - - -def _preprocess_padding(padding): - if padding == 'same': - th_padding = 'half' - elif padding == 'valid': - th_padding = 'valid' - elif padding == 'full': - th_padding = 'full' - else: - raise ValueError('Border mode not supported:', str(padding)) - return th_padding - - -def _preprocess_conv2d_image_shape(image_shape, data_format): - # Theano might not accept long type - def int_or_none(value): - try: - return int(value) - except TypeError: - return None - if data_format == 'channels_last': - if image_shape: - image_shape = transpose_shape(image_shape, 'channels_first', - spatial_axes=(1, 2)) - if image_shape is not None: - image_shape = tuple(int_or_none(v) for v in image_shape) - return image_shape - - -def _preprocess_conv3d_volume_shape(volume_shape, data_format): - # Theano might not accept long type - def int_or_none(value): - try: - return int(value) - except TypeError: - return None - if data_format == 'channels_last': - if volume_shape: - volume_shape = (volume_shape[0], volume_shape[4], - volume_shape[1], volume_shape[2], volume_shape[3]) - if volume_shape is not None: - volume_shape = tuple(int_or_none(v) for v in volume_shape) - return volume_shape - - -def _preprocess_conv2d_filter_shape(filter_shape, data_format): - # Theano might not accept long type - def int_or_none(value): - try: - return int(value) - except TypeError: - return None - if filter_shape: - filter_shape = (filter_shape[3], filter_shape[2], - filter_shape[0], filter_shape[1]) - if filter_shape is not None: - filter_shape = tuple(int_or_none(v) for v in filter_shape) - return filter_shape - - -def _preprocess_conv2d_depthwise_filter_shape(filter_shape, data_format): - # Theano might not accept long type - def int_or_none(value): - try: - return int(value) - except TypeError: - return None - if filter_shape: - filter_shape = (filter_shape[3] * filter_shape[2], 1, - filter_shape[0], filter_shape[1]) - if filter_shape is not None: - filter_shape = tuple(int_or_none(v) for v in filter_shape) - return filter_shape - - -def _preprocess_conv3d_filter_shape(filter_shape, data_format): - # Theano might not accept long type - def int_or_none(value): - try: - return int(value) - except TypeError: - return None - if filter_shape: - filter_shape = (filter_shape[4], filter_shape[3], - filter_shape[0], filter_shape[1], filter_shape[2]) - if filter_shape is not None: - filter_shape = tuple(int_or_none(v) for v in filter_shape) - return filter_shape - - -def _postprocess_conv2d_output(conv_out, x, - padding, kernel_shape, - strides, data_format): - if padding == 'same': - if kernel_shape[2] % 2 == 0: - i = (x.shape[2] + strides[0] - 1) // strides[0] - conv_out = conv_out[:, :, :i, :] - if kernel_shape[3] % 2 == 0: - i = (x.shape[3] + strides[1] - 1) // strides[1] - conv_out = conv_out[:, :, :, :i] - if data_format == 'channels_last': - conv_out = conv_out.dimshuffle((0, 2, 3, 1)) - return conv_out - - -def _postprocess_conv3d_output(conv_out, x, - padding, kernel_shape, - strides, data_format): - if padding == 'same': - if kernel_shape[2] % 2 == 0: - i = (x.shape[2] + strides[0] - 1) // strides[0] - conv_out = conv_out[:, :, :i, :, :] - if kernel_shape[3] % 2 == 0: - i = (x.shape[3] + strides[1] - 1) // strides[1] - conv_out = conv_out[:, :, :, :i, :] - if kernel_shape[4] % 2 == 0: - i = (x.shape[4] + strides[2] - 1) // strides[2] - conv_out = conv_out[:, :, :, :, :i] - if data_format == 'channels_last': - conv_out = conv_out.dimshuffle((0, 2, 3, 4, 1)) - return conv_out - - -def conv1d(x, kernel, strides=1, padding='valid', - data_format=None, dilation_rate=1): - """1D convolution. - - # Arguments - kernel: kernel tensor. - strides: stride integer. - padding: string, `"same"`, `"causal"` or `"valid"`. - data_format: string, one of "channels_last", "channels_first" - dilation_rate: integer. - """ - data_format = normalize_data_format(data_format) - - kernel_shape = int_shape(kernel) - if padding == 'causal': - # causal (dilated) convolution: - if not kernel_shape: - raise AttributeError( - 'Causal padding requires kernel._keras_shape set.') - left_pad = dilation_rate * (kernel_shape[0] - 1) - x = temporal_padding(x, (left_pad, 0)) - padding = 'valid' - shape = int_shape(x) - if data_format == 'channels_last': - # original shape: (batch, length, input_dim) - # add dim to x to have (batch, length, 1, input_dim) - x = expand_dims(x, 2) - # update x._keras_shape - if shape is not None: - x._keras_shape = (shape[0], shape[1], 1, shape[2]) - else: - # original shape: (batch, input_dim, length) - # add dim to x to have (batch, input_dim, length, 1) - x = expand_dims(x, 3) - # update x._keras_shape - if shape is not None: - x._keras_shape = (shape[0], shape[1], shape[2], 1) - # update dilation rate, strides - dilation_rate = (dilation_rate, 1) - strides = (strides, 1) - # add dim to kernel (always same format independently of data_format) - # i.e. (rows, 1, input_depth, depth) - kernel = expand_dims(kernel, 1) - output = conv2d(x, kernel, - strides=strides, padding=padding, - data_format=data_format, dilation_rate=dilation_rate) - # remove added dim - if data_format == 'channels_last': - output = squeeze(output, 2) - else: - output = squeeze(output, 3) - return output - - -def conv2d(x, kernel, strides=(1, 1), padding='valid', - data_format=None, dilation_rate=(1, 1)): - """2D convolution. - - # Arguments - kernel: kernel tensor. - strides: strides tuple. - padding: string, "same" or "valid". - data_format: "channels_last" or "channels_first". - Whether to use Theano or TensorFlow data format - in inputs/kernels/outputs. - """ - data_format = normalize_data_format(data_format) - - image_shape = _preprocess_conv2d_image_shape(int_shape(x), data_format) - kernel_shape = int_shape(kernel) - if kernel_shape is None: - kernel_shape = kernel.eval().shape # in case of a shared variable - kernel_shape = _preprocess_conv2d_filter_shape(kernel_shape, data_format) - - x = _preprocess_conv2d_input(x, data_format) - kernel = _preprocess_conv2d_kernel(kernel, data_format) - th_padding = _preprocess_padding(padding) - - conv_out = T.nnet.conv2d(x, kernel, - border_mode=th_padding, - subsample=strides, - input_shape=image_shape, - filter_shape=kernel_shape, - filter_dilation=dilation_rate) - conv_out = _postprocess_conv2d_output(conv_out, x, padding, - kernel_shape, strides, data_format) - return conv_out - - -def conv2d_transpose(x, kernel, output_shape, strides=(1, 1), - padding='valid', data_format=None, dilation_rate=(1, 1)): - """2D deconvolution (transposed convolution). - - # Arguments - kernel: kernel tensor. - output_shape: desired dimensions of output. - strides: strides tuple. - padding: string, "same" or "valid". - data_format: "channels_last" or "channels_first". - Whether to use Theano or TensorFlow data format - in inputs/kernels/outputs. - dilation_rate: tuple of 2 integers. - - # Raises - ValueError: if using an even kernel size with padding 'same'. - """ - flip_filters = False - data_format = normalize_data_format(data_format) - - if data_format == 'channels_last': - output_shape = (output_shape[0], - output_shape[3], - output_shape[1], - output_shape[2]) - - kernel_shape = int_shape(kernel) - if kernel_shape is None: - kernel_shape = kernel.eval().shape # in case of a shared variable - - if padding == 'same' and kernel_shape[0] % 2 == 0: - raise ValueError('In `Conv2DTranspose`, with padding mode `same`, ' - 'even kernel sizes are not supported with Theano. ' - 'You can set `kernel_size` to an odd number.') - - kernel_shape = _preprocess_conv2d_filter_shape(kernel_shape, data_format) - - x = _preprocess_conv2d_input(x, data_format) - kernel = _preprocess_conv2d_kernel(kernel, data_format) - - th_padding = _preprocess_padding(padding) - op = T.nnet.abstract_conv.AbstractConv2d_gradInputs( - imshp=None, - kshp=kernel_shape, - subsample=strides, - border_mode=th_padding, - filter_flip=not flip_filters, - filter_dilation=dilation_rate) - conv_out = op(kernel, x, output_shape[2:]) - conv_out = _postprocess_conv2d_output(conv_out, x, padding, - kernel_shape, strides, data_format) - return conv_out - - -def separable_conv1d(x, depthwise_kernel, pointwise_kernel, strides=1, - padding='valid', data_format=None, dilation_rate=1): - """1D convolution with separable filters. - - # Arguments - x: input tensor - depthwise_kernel: convolution kernel for the depthwise convolution. - pointwise_kernel: kernel for the 1x1 convolution. - strides: strides integer. - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: integer dilation rate. - - # Returns - Output tensor. - - # Raises - ValueError: if `data_format` is neither `"channels_last"` or - `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - if isinstance(strides, int): - strides = (strides,) - if isinstance(dilation_rate, int): - dilation_rate = (dilation_rate,) - - if data_format == 'channels_last': - spatial_start_dim = 2 - else: - spatial_start_dim = 3 - x = expand_dims(x, spatial_start_dim) - depthwise_kernel = expand_dims(depthwise_kernel, 1) - pointwise_kernel = expand_dims(pointwise_kernel, 1) - strides = strides + (1,) - dilation_rate = dilation_rate + (1,) - - image_shape = _preprocess_conv2d_image_shape(int_shape(x), data_format) - depthwise_kernel_shape = int_shape(depthwise_kernel) - if depthwise_kernel_shape is None: - # in case of a shared variable - depthwise_kernel_shape = depthwise_kernel.eval().shape - depthwise_kernel_shape = _preprocess_conv2d_depthwise_filter_shape( - depthwise_kernel_shape, data_format) - pointwise_kernel_shape = int_shape(pointwise_kernel) - if pointwise_kernel_shape is None: - # in case of a shared variable - pointwise_kernel_shape = pointwise_kernel.eval().shape - pointwise_kernel_shape = _preprocess_conv2d_filter_shape( - pointwise_kernel_shape, data_format) - - x = _preprocess_conv2d_input(x, data_format) - depthwise_kernel = _preprocess_conv2d_depthwise_kernel( - depthwise_kernel, depthwise_kernel_shape, data_format) - pointwise_kernel = _preprocess_conv2d_kernel(pointwise_kernel, data_format) - th_padding = _preprocess_padding(padding) - - conv_out = T.nnet.conv2d(x, depthwise_kernel, - border_mode=th_padding, - subsample=strides, - input_shape=image_shape, - filter_shape=depthwise_kernel_shape, - filter_dilation=dilation_rate, - num_groups=image_shape[1]) - conv_out = T.nnet.conv2d(conv_out, pointwise_kernel, - border_mode=th_padding, - subsample=(1, 1), - input_shape=None, - filter_shape=pointwise_kernel_shape, - filter_dilation=dilation_rate) - conv_out = _postprocess_conv2d_output(conv_out, x, padding, - pointwise_kernel_shape, - strides, data_format) - conv_out = squeeze(conv_out, spatial_start_dim) - return conv_out - - -def separable_conv2d(x, depthwise_kernel, pointwise_kernel, strides=(1, 1), - padding='valid', data_format=None, dilation_rate=(1, 1)): - """2D convolution with separable filters. - - # Arguments - x: input tensor - depthwise_kernel: convolution kernel for the depthwise convolution. - pointwise_kernel: kernel for the 1x1 convolution. - strides: strides tuple (length 2). - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: tuple of integers, - dilation rates for the separable convolution. - - # Returns - Output tensor. - - # Raises - ValueError: if `data_format` is neither `"channels_last"` or - `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - - image_shape = _preprocess_conv2d_image_shape(int_shape(x), data_format) - depthwise_kernel_shape = int_shape(depthwise_kernel) - if depthwise_kernel_shape is None: - # in case of a shared variable - depthwise_kernel_shape = depthwise_kernel.eval().shape - depthwise_kernel_shape = _preprocess_conv2d_depthwise_filter_shape( - depthwise_kernel_shape, data_format) - pointwise_kernel_shape = int_shape(pointwise_kernel) - if pointwise_kernel_shape is None: - # in case of a shared variable - pointwise_kernel_shape = pointwise_kernel.eval().shape - pointwise_kernel_shape = _preprocess_conv2d_filter_shape( - pointwise_kernel_shape, data_format) - - x = _preprocess_conv2d_input(x, data_format) - depthwise_kernel = _preprocess_conv2d_depthwise_kernel( - depthwise_kernel, depthwise_kernel_shape, data_format) - pointwise_kernel = _preprocess_conv2d_kernel(pointwise_kernel, data_format) - th_padding = _preprocess_padding(padding) - - conv_out = T.nnet.conv2d(x, depthwise_kernel, - border_mode=th_padding, - subsample=strides, - input_shape=image_shape, - filter_shape=depthwise_kernel_shape, - filter_dilation=dilation_rate, - num_groups=image_shape[1]) - conv_out = T.nnet.conv2d(conv_out, pointwise_kernel, - border_mode=th_padding, - subsample=(1, 1), - input_shape=None, - filter_shape=pointwise_kernel_shape, - filter_dilation=dilation_rate) - conv_out = _postprocess_conv2d_output(conv_out, x, padding, - pointwise_kernel_shape, - strides, data_format) - return conv_out - - -def depthwise_conv2d(x, depthwise_kernel, strides=(1, 1), padding='valid', - data_format=None, dilation_rate=(1, 1)): - """2D convolution with separable filters. - - # Arguments - x: input tensor - depthwise_kernel: convolution kernel for the depthwise convolution. - strides: strides tuple (length 2). - padding: string, `"same"` or `"valid"`. - data_format: string, `"channels_last"` or `"channels_first"`. - dilation_rate: tuple of integers, - dilation rates for the separable convolution. - - # Returns - Output tensor. - - # Raises - ValueError: if `data_format` is neither `"channels_last"` or - `"channels_first"`. - """ - data_format = normalize_data_format(data_format) - - image_shape = _preprocess_conv2d_image_shape(int_shape(x), data_format) - depthwise_kernel_shape = int_shape(depthwise_kernel) - if depthwise_kernel_shape is None: - # in case of a shared variable - depthwise_kernel_shape = depthwise_kernel.eval().shape - depthwise_kernel_shape = _preprocess_conv2d_depthwise_filter_shape( - depthwise_kernel_shape, data_format) - - x = _preprocess_conv2d_input(x, data_format) - depthwise_kernel = _preprocess_conv2d_depthwise_kernel( - depthwise_kernel, depthwise_kernel_shape, data_format) - th_padding = _preprocess_padding(padding) - - conv_out = T.nnet.conv2d(x, depthwise_kernel, - border_mode=th_padding, - subsample=strides, - input_shape=image_shape, - filter_shape=depthwise_kernel_shape, - filter_dilation=dilation_rate, - num_groups=image_shape[1]) - conv_out = _postprocess_conv2d_output( - conv_out, x, padding, depthwise_kernel_shape, strides, data_format) - return conv_out - - -def conv3d(x, kernel, strides=(1, 1, 1), - padding='valid', data_format=None, - dilation_rate=(1, 1, 1)): - """3D convolution. - - # Arguments - kernel: kernel tensor. - strides: strides tuple. - padding: string, "same" or "valid". - data_format: "channels_last" or "channels_first". - Whether to use Theano or TensorFlow data format - in inputs/kernels/outputs. - """ - data_format = normalize_data_format(data_format) - - volume_shape = _preprocess_conv3d_volume_shape(int_shape(x), data_format) - kernel_shape = int_shape(kernel) - if kernel_shape is None: - kernel_shape = kernel.eval().shape # in case of a shared variable - kernel_shape = _preprocess_conv3d_filter_shape(kernel_shape, data_format) - - x = _preprocess_conv3d_input(x, data_format) - kernel = _preprocess_conv3d_kernel(kernel, data_format) - th_padding = _preprocess_padding(padding) - - conv_out = T.nnet.conv3d(x, kernel, - border_mode=th_padding, - subsample=strides, - input_shape=volume_shape, - filter_shape=kernel_shape, - filter_dilation=dilation_rate) - conv_out = _postprocess_conv3d_output(conv_out, x, padding, - kernel_shape, strides, data_format) - return conv_out - - -def conv3d_transpose(x, kernel, output_shape, strides=(1, 1, 1), - padding='valid', data_format=None): - """3D deconvolution (transposed convolution). - - # Arguments - kernel: kernel tensor. - output_shape: desired dimensions of output. - strides: strides tuple. - padding: string, "same" or "valid". - data_format: "channels_last" or "channels_first". - Whether to use Theano or TensorFlow data format - in inputs/kernels/outputs. - - # Raises - ValueError: if using an even kernel size with padding 'same'. - """ - flip_filters = False - data_format = normalize_data_format(data_format) - - if data_format == 'channels_last': - output_shape = (output_shape[0], - output_shape[4], - output_shape[1], - output_shape[2], - output_shape[3]) - - kernel_shape = int_shape(kernel) - if kernel_shape is None: - kernel_shape = kernel.eval().shape # in case of a shared variable - - if padding == 'same' and kernel_shape[0] % 2 == 0: - raise ValueError('In `Conv3DTranspose`, with padding mode `same`, ' - 'even kernel sizes are not supported with Theano. ' - 'You can set `kernel_size` to an odd number.') - - kernel_shape = _preprocess_conv3d_filter_shape(kernel_shape, data_format) - - x = _preprocess_conv3d_input(x, data_format) - kernel = _preprocess_conv3d_kernel(kernel, data_format) - - th_padding = _preprocess_padding(padding) - op = T.nnet.abstract_conv.AbstractConv3d_gradInputs(imshp=None, - kshp=kernel_shape, - subsample=strides, - border_mode=th_padding, - filter_flip=not flip_filters) - conv_out = op(kernel, x, output_shape[2:]) - conv_out = _postprocess_conv3d_output(conv_out, x, padding, - kernel_shape, strides, data_format) - return conv_out - - -def pool2d(x, pool_size, strides=(1, 1), padding='valid', - data_format=None, pool_mode='max'): - data_format = normalize_data_format(data_format) - - assert pool_size[0] >= 1 and pool_size[1] >= 1 - - if padding == 'same': - odd_pad_w = pool_size[0] > 2 and pool_size[0] % 2 == 1 - w_pad = pool_size[0] - 2 if odd_pad_w else pool_size[0] - 1 - odd_pad_h = pool_size[1] > 2 and pool_size[1] % 2 == 1 - h_pad = pool_size[1] - 2 if odd_pad_h else pool_size[1] - 1 - pad = (w_pad, h_pad) - elif padding == 'valid': - pad = (0, 0) - else: - raise ValueError('Invalid border mode:', padding) - - if data_format == 'channels_last': - x = x.dimshuffle((0, 3, 1, 2)) - - if pool_mode == 'max': - pool_out = pool.pool_2d(x, ws=pool_size, stride=strides, - ignore_border=True, - pad=pad, - mode='max') - elif pool_mode == 'avg': - pool_out = pool.pool_2d(x, ws=pool_size, stride=strides, - ignore_border=True, - pad=pad, - mode='average_exc_pad') - else: - raise ValueError('Invalid pooling mode:', pool_mode) - if padding == 'same': - expected_width = (x.shape[2] + strides[0] - 1) // strides[0] - expected_height = (x.shape[3] + strides[1] - 1) // strides[1] - pool_out = pool_out[:, :, - : expected_width, - : expected_height] - - if data_format == 'channels_last': - pool_out = pool_out.dimshuffle((0, 2, 3, 1)) - return pool_out - - -def pool3d(x, pool_size, strides=(1, 1, 1), padding='valid', - data_format=None, pool_mode='max'): - data_format = normalize_data_format(data_format) - - if padding == 'same': - w_pad = pool_size[0] - 2 if pool_size[0] % 2 == 1 else pool_size[0] - 1 - h_pad = pool_size[1] - 2 if pool_size[1] % 2 == 1 else pool_size[1] - 1 - d_pad = pool_size[2] - 2 if pool_size[2] % 2 == 1 else pool_size[2] - 1 - pad = (w_pad, h_pad, d_pad) - elif padding == 'valid': - pad = (0, 0, 0) - else: - raise ValueError('Invalid padding:', padding) - - if data_format == 'channels_last': - x = x.dimshuffle((0, 4, 1, 2, 3)) - - if pool_mode == 'max': - pool_out = pool.pool_3d(x, ws=pool_size, stride=strides, - ignore_border=True, - pad=pad, - mode='max') - elif pool_mode == 'avg': - pool_out = pool.pool_3d(x, ws=pool_size, stride=strides, - ignore_border=True, - pad=pad, - mode='average_exc_pad') - else: - raise ValueError('Invalid pooling mode:', pool_mode) - - if padding == 'same': - expected_width = (x.shape[2] + strides[0] - 1) // strides[0] - expected_height = (x.shape[3] + strides[1] - 1) // strides[1] - expected_depth = (x.shape[4] + strides[2] - 1) // strides[2] - - pool_out = pool_out[:, :, - : expected_width, - : expected_height, - : expected_depth] - - if data_format == 'channels_last': - pool_out = pool_out.dimshuffle((0, 2, 3, 4, 1)) - return pool_out - - -def bias_add(x, bias, data_format=None): - data_format = normalize_data_format(data_format) - if ndim(bias) != 1 and ndim(bias) != ndim(x) - 1: - raise ValueError('Unexpected bias dimensions %d, ' - 'expect to be 1 or %d dimensions' - % (ndim(bias), ndim(x) - 1)) - bias_shape = tuple(bias.shape) - if ndim(x) == 5: - if data_format == 'channels_first': - if ndim(bias) == 1: - x += reshape(bias, (1, bias_shape[0], 1, 1, 1)) - else: - x += reshape(bias, (1, bias_shape[3]) + bias_shape[:3]) - elif data_format == 'channels_last': - if ndim(bias) == 1: - x += reshape(bias, (1, 1, 1, 1, bias_shape[0])) - else: - x += reshape(bias, (1,) + bias_shape) - elif ndim(x) == 4: - if data_format == 'channels_first': - if ndim(bias) == 1: - x += reshape(bias, (1, bias_shape[0], 1, 1)) - else: - x += reshape(bias, (1, bias_shape[2]) + bias_shape[:2]) - elif data_format == 'channels_last': - if ndim(bias) == 1: - x += reshape(bias, (1, 1, 1, bias_shape[0])) - else: - x += reshape(bias, (1,) + bias_shape) - elif ndim(x) == 3: - if data_format == 'channels_first': - if ndim(bias) == 1: - x += reshape(bias, (1, bias_shape[0], 1)) - else: - x += reshape(bias, (1, bias_shape[1], bias_shape[0])) - elif data_format == 'channels_last': - if ndim(bias) == 1: - x += reshape(bias, (1, 1, bias_shape[0])) - else: - x += reshape(bias, (1,) + bias_shape) - else: - x += bias - return x - - -# RANDOMNESS - - -def random_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(1, 10e6) - rng = RandomStreams(seed=seed) - return rng.normal(size=shape, avg=mean, std=stddev, dtype=dtype) - - -def random_uniform(shape, minval=0.0, maxval=1.0, dtype=None, seed=None): - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(1, 10e6) - rng = RandomStreams(seed=seed) - return rng.uniform(shape, low=minval, high=maxval, dtype=dtype) - - -def random_binomial(shape, p=0.0, dtype=None, seed=None): - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(1, 10e6) - rng = RandomStreams(seed=seed) - return rng.binomial(shape, p=p, dtype=dtype) - - -def truncated_normal(shape, mean=0.0, stddev=1.0, dtype=None, seed=None): - if dtype is None: - dtype = floatx() - if seed is None: - seed = np.random.randint(1, 10e6) - rng = RandomStreams(seed=seed) - - try: - return rng.normal(size=shape, avg=mean, std=stddev, dtype=dtype, - truncate=True) - except TypeError: - normal_t = rng.normal(size=shape, avg=mean, std=stddev, dtype=dtype) - # Poor man's truncated normal: we literally clip the tensor - return T.clip(normal_t, mean - 2 * stddev, mean + 2 * stddev) - - -# Theano implementation of CTC -# Used with permission from Shawn Tan -# https://github.com/shawntan/ -# Note that TensorFlow's native CTC code is significantly -# faster than this - - -def ctc_interleave_blanks(Y): - Y_ = T.alloc(-1, Y.shape[0] * 2 + 1) - Y_ = T.set_subtensor(Y_[T.arange(Y.shape[0]) * 2 + 1], Y) - return Y_ - - -def ctc_create_skip_idxs(Y): - skip_idxs = T.arange((Y.shape[0] - 3) // 2) * 2 + 1 - non_repeats = T.neq(Y[skip_idxs], Y[skip_idxs + 2]) - return skip_idxs[non_repeats.nonzero()] - - -def ctc_update_log_p(skip_idxs, zeros, active, log_p_curr, log_p_prev): - active_skip_idxs = skip_idxs[(skip_idxs < active).nonzero()] - active_next = T.cast(T.minimum( - T.maximum( - active + 1, - T.max(T.concatenate([active_skip_idxs, [-1]])) + 2 + 1 - ), log_p_curr.shape[0]), 'int32') - - common_factor = T.max(log_p_prev[:active]) - p_prev = T.exp(log_p_prev[:active] - common_factor) - _p_prev = zeros[:active_next] - # copy over - _p_prev = T.set_subtensor(_p_prev[:active], p_prev) - # previous transitions - _p_prev = T.inc_subtensor(_p_prev[1:], _p_prev[:-1]) - # skip transitions - _p_prev = T.inc_subtensor( - _p_prev[active_skip_idxs + 2], p_prev[active_skip_idxs]) - updated_log_p_prev = T.log(_p_prev) + common_factor - - log_p_next = T.set_subtensor( - zeros[:active_next], - log_p_curr[:active_next] + updated_log_p_prev - ) - return active_next, log_p_next - - -def ctc_path_probs(predict, Y, alpha=1e-4): - smoothed = (1 - alpha) * predict[:, Y] + \ - alpha * np.float32(1.) / Y.shape[0] - L = T.log(smoothed) - zeros = T.zeros_like(L[0]) - log_first = zeros - - f_skip_idxs = ctc_create_skip_idxs(Y) - # there should be a shortcut to calculating this - b_skip_idxs = ctc_create_skip_idxs(Y[::-1]) - - def step(log_f_curr, log_b_curr, f_active, log_f_prev, b_active, log_b_prev): - f_active_next, log_f_next = ctc_update_log_p( - f_skip_idxs, zeros, f_active, log_f_curr, log_f_prev) - b_active_next, log_b_next = ctc_update_log_p( - b_skip_idxs, zeros, b_active, log_b_curr, log_b_prev) - return f_active_next, log_f_next, b_active_next, log_b_next - - [f_active, log_f_probs, b_active, log_b_probs], _ = theano.scan( - step, - sequences=[L, L[::-1, ::-1]], - outputs_info=[np.int32(1), log_first, np.int32(1), log_first]) - - idxs = T.arange(L.shape[1]).dimshuffle('x', 0) - mask = ((idxs < f_active.dimshuffle(0, 'x')) & - (idxs < b_active.dimshuffle(0, 'x'))[::-1, ::-1]) - log_probs = log_f_probs + log_b_probs[::-1, ::-1] - L - return log_probs, mask - - -def ctc_cost(predict, Y): - log_probs, mask = ctc_path_probs(predict, ctc_interleave_blanks(Y)) - common_factor = T.max(log_probs) - total_log_prob = T.log( - T.sum(T.exp(log_probs - common_factor)[mask.nonzero()])) - total_log_prob = total_log_prob + common_factor - return -total_log_prob - - -# batchifies original CTC code -def ctc_batch_cost(y_true, y_pred, input_length, label_length): - """Runs CTC loss algorithm on each batch element. - - # Arguments - y_true: tensor (samples, max_string_length) containing the truth labels - y_pred: tensor (samples, time_steps, num_categories) containing the - prediction, or output of the softmax - input_length: tensor (samples,1) containing the sequence length for - each batch item in y_pred - label_length: tensor (samples,1) containing the sequence length for - each batch item in y_true - - # Returns - Tensor with shape (samples,1) containing the - CTC loss of each element - """ - - def ctc_step(y_true_step, y_pred_step, input_length_step, label_length_step): - y_pred_step = y_pred_step[0: input_length_step[0]] - y_true_step = y_true_step[0:label_length_step[0]] - return ctc_cost(y_pred_step, y_true_step) - - ret, _ = theano.scan( - fn=ctc_step, - outputs_info=None, - sequences=[y_true, y_pred, input_length, label_length] - ) - - ret = ret.dimshuffle('x', 0) - return ret - - -# HIGH ORDER FUNCTIONS - -def map_fn(fn, elems, name=None, dtype=None): - """Map the function fn over the elements elems and return the outputs. - - # Arguments - fn: Callable that will be called upon each element in elems - elems: tensor, at least 2 dimensional - name: A string name for the map node in the graph - - # Returns - Tensor with first dimension equal to the elems and second depending on - fn - """ - return theano.map(fn, elems, name=name)[0] - - -def foldl(fn, elems, initializer=None, name=None): - """Reduce elems using fn to combine them from left to right. - - # Arguments - fn: Callable that will be called upon each element in elems and an - accumulator, for instance lambda acc, x: acc + x - elems: tensor - initializer: The first value used (elems[0] in case of None) - name: A string name for the foldl node in the graph - - # Returns - Same type and shape as initializer - """ - if initializer is None: - initializer = elems[0] - elems = elems[1:] - - # We need to change the order of the arguments because theano accepts x as - # first parameter and accumulator as second - return theano.foldl(lambda x, acc: fn(acc, x), - elems, initializer, name=name)[0] - - -def foldr(fn, elems, initializer=None, name=None): - """Reduce elems using fn to combine them from right to left. - - # Arguments - fn: Callable that will be called upon each element in elems and an - accumulator, for instance lambda acc, x: acc + x - elems: tensor - initializer: The first value used (elems[-1] in case of None) - name: A string name for the foldr node in the graph - - # Returns - Same type and shape as initializer - """ - if initializer is None: - initializer = elems[-1] - elems = elems[:-1] - - # We need to change the order of the arguments because theano accepts x as - # first parameter and accumulator as second - return theano.foldr(lambda x, acc: fn(acc, x), - elems, initializer, name=name)[0] - - -def local_conv1d(inputs, kernel, kernel_size, strides, data_format=None): - data_format = normalize_data_format(data_format) - - stride = strides[0] - kernel_shape = int_shape(kernel) - output_length, feature_dim, filters = kernel_shape - - xs = [] - for i in range(output_length): - slice_length = py_slice(i * stride, - i * stride + kernel_size[0]) - xs.append(reshape(inputs[:, slice_length, :], - (1, -1, feature_dim))) - x_aggregate = concatenate(xs, axis=0) - # Shape: `(output_length, batch_size, filters)`. - output = batch_dot(x_aggregate, kernel) - return permute_dimensions(output, (1, 0, 2)) - - -def local_conv2d(inputs, - kernel, - kernel_size, - strides, - output_shape, - data_format=None): - data_format = normalize_data_format(data_format) - - stride_row, stride_col = strides - output_row, output_col = output_shape - kernel_shape = int_shape(kernel) - _, feature_dim, filters = kernel_shape - - if data_format == 'channels_first': - output = [] - for i in range(output_row): - for j in range(output_col): - slice_row = py_slice(i * stride_row, - i * stride_row + kernel_size[0]) - slice_col = py_slice(j * stride_col, - j * stride_col + kernel_size[1]) - x_flatten = reshape(inputs[:, :, slice_row, slice_col], - (1, -1, feature_dim)) - output.append(dot(x_flatten, - kernel[i * output_col + j, :, :])) - output = concatenate(output, axis=0) - output = reshape(output, - (output_row, output_col, -1, filters)) - output = permute_dimensions(output, (2, 3, 0, 1)) - else: - xs = [] - for i in range(output_row): - for j in range(output_col): - slice_row = py_slice(i * stride_row, - i * stride_row + kernel_size[0]) - slice_col = py_slice(j * stride_col, - j * stride_col + kernel_size[1]) - xs.append(reshape(inputs[:, slice_row, slice_col, :], - (1, -1, feature_dim))) - - x_aggregate = concatenate(xs, axis=0) - output = batch_dot(x_aggregate, kernel) - output = reshape(output, - (output_row, output_col, -1, filters)) - output = permute_dimensions(output, (2, 0, 1, 3)) - return output - - -def ctc_label_dense_to_sparse(labels, label_lengths): - raise NotImplementedError - - -def ctc_decode(y_pred, input_length, greedy=True, beam_width=100, top_paths=1, - merge_repeated=False): - raise NotImplementedError -from __future__ import absolute_import - -from . import mnist -from . import imdb -from . import reuters -from . import cifar10 -from . import cifar100 -from . import boston_housing -from . import fashion_mnist -"""Boston housing price regression dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ..utils.data_utils import get_file -import numpy as np - - -def load_data(path='boston_housing.npz', test_split=0.2, seed=113): - """Loads the Boston Housing dataset. - - # Arguments - path: path where to cache the dataset locally - (relative to ~/.keras/datasets). - test_split: fraction of the data to reserve as test set. - seed: Random seed for shuffling the data - before computing the test split. - - # Returns - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - """ - assert 0 <= test_split < 1 - path = get_file( - path, - origin='https://s3.amazonaws.com/keras-datasets/boston_housing.npz', - file_hash='f553886a1f8d56431e820c5b82552d9d95cfcb96d1e678153f8839538947dff5') - with np.load(path, allow_pickle=True) as f: - x = f['x'] - y = f['y'] - - rng = np.random.RandomState(seed) - indices = np.arange(len(x)) - rng.shuffle(indices) - x = x[indices] - y = y[indices] - - x_train = np.array(x[:int(len(x) * (1 - test_split))]) - y_train = np.array(y[:int(len(x) * (1 - test_split))]) - x_test = np.array(x[int(len(x) * (1 - test_split)):]) - y_test = np.array(y[int(len(x) * (1 - test_split)):]) - return (x_train, y_train), (x_test, y_test) -# -*- coding: utf-8 -*- -"""Utilities common to CIFAR10 and CIFAR100 datasets. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys -from six.moves import cPickle - - -def load_batch(fpath, label_key='labels'): - """Internal utility for parsing CIFAR data. - - # Arguments - fpath: path the file to parse. - label_key: key for label data in the retrieve - dictionary. - - # Returns - A tuple `(data, labels)`. - """ - with open(fpath, 'rb') as f: - if sys.version_info < (3,): - d = cPickle.load(f) - else: - d = cPickle.load(f, encoding='bytes') - # decode utf8 - d_decoded = {} - for k, v in d.items(): - d_decoded[k.decode('utf8')] = v - d = d_decoded - data = d['data'] - labels = d[label_key] - - data = data.reshape(data.shape[0], 3, 32, 32) - return data, labels -"""CIFAR10 small images classification dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .cifar import load_batch -from ..utils.data_utils import get_file -from .. import backend as K -import numpy as np -import os - - -def load_data(): - """Loads CIFAR10 dataset. - - # Returns - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - """ - dirname = 'cifar-10-batches-py' - origin = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' - path = get_file(dirname, origin=origin, untar=True) - - num_train_samples = 50000 - - x_train = np.empty((num_train_samples, 3, 32, 32), dtype='uint8') - y_train = np.empty((num_train_samples,), dtype='uint8') - - for i in range(1, 6): - fpath = os.path.join(path, 'data_batch_' + str(i)) - (x_train[(i - 1) * 10000: i * 10000, :, :, :], - y_train[(i - 1) * 10000: i * 10000]) = load_batch(fpath) - - fpath = os.path.join(path, 'test_batch') - x_test, y_test = load_batch(fpath) - - y_train = np.reshape(y_train, (len(y_train), 1)) - y_test = np.reshape(y_test, (len(y_test), 1)) - - if K.image_data_format() == 'channels_last': - x_train = x_train.transpose(0, 2, 3, 1) - x_test = x_test.transpose(0, 2, 3, 1) - - return (x_train, y_train), (x_test, y_test) -"""CIFAR100 small images classification dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .cifar import load_batch -from ..utils.data_utils import get_file -from .. import backend as K -import numpy as np -import os - - -def load_data(label_mode='fine'): - """Loads CIFAR100 dataset. - - # Arguments - label_mode: one of "fine", "coarse". - - # Returns - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - - # Raises - ValueError: in case of invalid `label_mode`. - """ - if label_mode not in ['fine', 'coarse']: - raise ValueError('`label_mode` must be one of `"fine"`, `"coarse"`.') - - dirname = 'cifar-100-python' - origin = 'https://www.cs.toronto.edu/~kriz/cifar-100-python.tar.gz' - path = get_file(dirname, origin=origin, untar=True) - - fpath = os.path.join(path, 'train') - x_train, y_train = load_batch(fpath, label_key=label_mode + '_labels') - - fpath = os.path.join(path, 'test') - x_test, y_test = load_batch(fpath, label_key=label_mode + '_labels') - - y_train = np.reshape(y_train, (len(y_train), 1)) - y_test = np.reshape(y_test, (len(y_test), 1)) - - if K.image_data_format() == 'channels_last': - x_train = x_train.transpose(0, 2, 3, 1) - x_test = x_test.transpose(0, 2, 3, 1) - - return (x_train, y_train), (x_test, y_test) -"""Fashion-MNIST dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import gzip -import os - -from ..utils.data_utils import get_file -import numpy as np - - -def load_data(): - """Loads the Fashion-MNIST dataset. - - # Returns - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - """ - dirname = os.path.join('datasets', 'fashion-mnist') - base = 'http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/' - files = ['train-labels-idx1-ubyte.gz', 'train-images-idx3-ubyte.gz', - 't10k-labels-idx1-ubyte.gz', 't10k-images-idx3-ubyte.gz'] - - paths = [] - for fname in files: - paths.append(get_file(fname, - origin=base + fname, - cache_subdir=dirname)) - - with gzip.open(paths[0], 'rb') as lbpath: - y_train = np.frombuffer(lbpath.read(), np.uint8, offset=8) - - with gzip.open(paths[1], 'rb') as imgpath: - x_train = np.frombuffer(imgpath.read(), np.uint8, - offset=16).reshape(len(y_train), 28, 28) - - with gzip.open(paths[2], 'rb') as lbpath: - y_test = np.frombuffer(lbpath.read(), np.uint8, offset=8) - - with gzip.open(paths[3], 'rb') as imgpath: - x_test = np.frombuffer(imgpath.read(), np.uint8, - offset=16).reshape(len(y_test), 28, 28) - - return (x_train, y_train), (x_test, y_test) -"""IMDB sentiment classification dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ..utils.data_utils import get_file -from ..preprocessing.sequence import _remove_long_seq -import numpy as np -import json -import warnings - - -def load_data(path='imdb.npz', num_words=None, skip_top=0, - maxlen=None, seed=113, - start_char=1, oov_char=2, index_from=3, **kwargs): - """Loads the IMDB dataset. - - # Arguments - path: where to cache the data (relative to `~/.keras/dataset`). - num_words: max number of words to include. Words are ranked - by how often they occur (in the training set) and only - the most frequent words are kept - skip_top: skip the top N most frequently occurring words - (which may not be informative). - maxlen: sequences longer than this will be filtered out. - seed: random seed for sample shuffling. - start_char: The start of a sequence will be marked with this character. - Set to 1 because 0 is usually the padding character. - oov_char: words that were cut out because of the `num_words` - or `skip_top` limit will be replaced with this character. - index_from: index actual words with this index and higher. - - # Returns - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - - # Raises - ValueError: in case `maxlen` is so low - that no input sequence could be kept. - - Note that the 'out of vocabulary' character is only used for - words that were present in the training set but are not included - because they're not making the `num_words` cut here. - Words that were not seen in the training set but are in the test set - have simply been skipped. - """ - # Legacy support - if 'nb_words' in kwargs: - warnings.warn('The `nb_words` argument in `load_data` ' - 'has been renamed `num_words`.') - num_words = kwargs.pop('nb_words') - if kwargs: - raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) - - path = get_file(path, - origin='https://s3.amazonaws.com/text-datasets/imdb.npz', - file_hash='599dadb1135973df5b59232a0e9a887c') - with np.load(path, allow_pickle=True) as f: - x_train, labels_train = f['x_train'], f['y_train'] - x_test, labels_test = f['x_test'], f['y_test'] - - rng = np.random.RandomState(seed) - indices = np.arange(len(x_train)) - rng.shuffle(indices) - x_train = x_train[indices] - labels_train = labels_train[indices] - - indices = np.arange(len(x_test)) - rng.shuffle(indices) - x_test = x_test[indices] - labels_test = labels_test[indices] - - xs = np.concatenate([x_train, x_test]) - labels = np.concatenate([labels_train, labels_test]) - - if start_char is not None: - xs = [[start_char] + [w + index_from for w in x] for x in xs] - elif index_from: - xs = [[w + index_from for w in x] for x in xs] - - if maxlen: - xs, labels = _remove_long_seq(maxlen, xs, labels) - if not xs: - raise ValueError('After filtering for sequences shorter than maxlen=' + - str(maxlen) + ', no sequence was kept. ' - 'Increase maxlen.') - if not num_words: - num_words = max([max(x) for x in xs]) - - # by convention, use 2 as OOV word - # reserve 'index_from' (=3 by default) characters: - # 0 (padding), 1 (start), 2 (OOV) - if oov_char is not None: - xs = [[w if (skip_top <= w < num_words) else oov_char for w in x] - for x in xs] - else: - xs = [[w for w in x if skip_top <= w < num_words] - for x in xs] - - idx = len(x_train) - x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx]) - x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:]) - - return (x_train, y_train), (x_test, y_test) - - -def get_word_index(path='imdb_word_index.json'): - """Retrieves the dictionary mapping words to word indices. - - # Arguments - path: where to cache the data (relative to `~/.keras/dataset`). - - # Returns - The word index dictionary. - """ - path = get_file( - path, - origin='https://s3.amazonaws.com/text-datasets/imdb_word_index.json', - file_hash='bfafd718b763782e994055a2d397834f') - with open(path) as f: - return json.load(f) -"""MNIST handwritten digits dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ..utils.data_utils import get_file -import numpy as np - - -def load_data(path='mnist.npz'): - """Loads the MNIST dataset. - - # Arguments - path: path where to cache the dataset locally - (relative to ~/.keras/datasets). - - # Returns - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - """ - path = get_file(path, - origin='https://s3.amazonaws.com/img-datasets/mnist.npz', - file_hash='8a61469f7ea1b51cbae51d4f78837e45') - with np.load(path, allow_pickle=True) as f: - x_train, y_train = f['x_train'], f['y_train'] - x_test, y_test = f['x_test'], f['y_test'] - return (x_train, y_train), (x_test, y_test) -# -*- coding: utf-8 -*- -"""Reuters topic classification dataset. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ..utils.data_utils import get_file -from ..preprocessing.sequence import _remove_long_seq -import numpy as np -import json -import warnings - - -def load_data(path='reuters.npz', num_words=None, skip_top=0, - maxlen=None, test_split=0.2, seed=113, - start_char=1, oov_char=2, index_from=3, **kwargs): - """Loads the Reuters newswire classification dataset. - - # Arguments - path: where to cache the data (relative to `~/.keras/dataset`). - num_words: max number of words to include. Words are ranked - by how often they occur (in the training set) and only - the most frequent words are kept - skip_top: skip the top N most frequently occurring words - (which may not be informative). - maxlen: truncate sequences after this length. - test_split: Fraction of the dataset to be used as test data. - seed: random seed for sample shuffling. - start_char: The start of a sequence will be marked with this character. - Set to 1 because 0 is usually the padding character. - oov_char: words that were cut out because of the `num_words` - or `skip_top` limit will be replaced with this character. - index_from: index actual words with this index and higher. - - # Returns - Tuple of Numpy arrays: `(x_train, y_train), (x_test, y_test)`. - - Note that the 'out of vocabulary' character is only used for - words that were present in the training set but are not included - because they're not making the `num_words` cut here. - Words that were not seen in the training set but are in the test set - have simply been skipped. - """ - # Legacy support - if 'nb_words' in kwargs: - warnings.warn('The `nb_words` argument in `load_data` ' - 'has been renamed `num_words`.') - num_words = kwargs.pop('nb_words') - if kwargs: - raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) - - path = get_file(path, - origin='https://s3.amazonaws.com/text-datasets/reuters.npz', - file_hash='87aedbeb0cb229e378797a632c1997b6') - with np.load(path, allow_pickle=True) as f: - xs, labels = f['x'], f['y'] - - rng = np.random.RandomState(seed) - indices = np.arange(len(xs)) - rng.shuffle(indices) - xs = xs[indices] - labels = labels[indices] - - if start_char is not None: - xs = [[start_char] + [w + index_from for w in x] for x in xs] - elif index_from: - xs = [[w + index_from for w in x] for x in xs] - - if maxlen: - xs, labels = _remove_long_seq(maxlen, xs, labels) - - if not num_words: - num_words = max([max(x) for x in xs]) - - # by convention, use 2 as OOV word - # reserve 'index_from' (=3 by default) characters: - # 0 (padding), 1 (start), 2 (OOV) - if oov_char is not None: - xs = [[w if skip_top <= w < num_words else oov_char for w in x] - for x in xs] - else: - xs = [[w for w in x if skip_top <= w < num_words] for x in xs] - - idx = int(len(xs) * (1 - test_split)) - x_train, y_train = np.array(xs[:idx]), np.array(labels[:idx]) - x_test, y_test = np.array(xs[idx:]), np.array(labels[idx:]) - - return (x_train, y_train), (x_test, y_test) - - -def get_word_index(path='reuters_word_index.json'): - """Retrieves the dictionary mapping words to word indices. - - # Arguments - path: where to cache the data (relative to `~/.keras/dataset`). - - # Returns - The word index dictionary. - """ - path = get_file( - path, - origin='https://s3.amazonaws.com/text-datasets/reuters_word_index.json', - file_hash='4d44cc38712099c9e383dc6e5f11a921') - with open(path) as f: - return json.load(f) -# note: `Node` is an internal class, -# it isn't meant to be used by Keras users. -from .input_layer import Input -from .input_layer import InputLayer -from .base_layer import InputSpec -from .base_layer import Layer -from .network import get_source_inputs -from .training import Model -"""Contains the base Layer class, from which all layers inherit. -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import re -from six.moves import zip - -from .. import backend as K -from .. import initializers -from ..utils.layer_utils import count_params -from ..utils.generic_utils import has_arg -from ..utils.generic_utils import object_list_uid -from ..utils.generic_utils import to_list -from ..utils.generic_utils import unpack_singleton -from ..utils.generic_utils import is_all_none -from ..legacy import interfaces - - -class Layer(object): - """Abstract base layer class. - - # Properties - input, output: Input/output tensor(s). Note that if the layer - is used more than once (shared layer), this is ill-defined - and will raise an exception. In such cases, use - `layer.get_input_at(node_index)`. - input_mask, output_mask: Mask tensors. Same caveats apply as - input, output. - input_shape: Shape tuple. Provided for convenience, but note - that there may be cases in which this attribute is - ill-defined (e.g. a shared layer with multiple input - shapes), in which case requesting `input_shape` will raise - an Exception. Prefer using - `layer.get_input_shape_at(node_index)`. - input_spec: List of InputSpec class instances - each entry describes one required input: - - ndim - - dtype - A layer with `n` input tensors must have - an `input_spec` of length `n`. - name: String, must be unique within a model. - non_trainable_weights: List of variables. - output_shape: Shape tuple. See `input_shape`. - stateful: Boolean indicating whether the layer carries - additional non-weight state. Used in, for instance, RNN - cells to carry information between batches. - supports_masking: Boolean indicator of whether the layer - supports masking, typically for unused timesteps in a - sequence. - trainable: Boolean, whether the layer weights - will be updated during training. - trainable_weights: List of variables. - uses_learning_phase: Whether any operation - of the layer uses `K.in_training_phase()` - or `K.in_test_phase()`. - weights: The concatenation of the lists trainable_weights and - non_trainable_weights (in this order). - - - # Methods - call(x, mask=None): Where the layer's logic lives. - __call__(x, mask=None): Wrapper around the layer logic (`call`). - If x is a Keras tensor: - - Connect current layer with last layer from tensor: - `self._add_inbound_node(last_layer)` - - Add layer to tensor history - If layer is not built: - - Build from x._keras_shape - compute_mask(x, mask) - compute_output_shape(input_shape) - count_params() - get_config() - get_input_at(node_index) - get_input_mask_at(node_index) - get_input_shape_at(node_index) - get_output_at(node_index) - get_output_mask_at(node_index) - get_output_shape_at(node_index) - get_weights() - set_weights(weights) - - # Class Methods - from_config(config) - - # Internal methods: - _add_inbound_node(layer, index=0) - assert_input_compatibility() - build(input_shape) - """ - - def __init__(self, **kwargs): - self.input_spec = None - self.supports_masking = False - self.stateful = False - - # These properties will be set upon call of self.build() - self._trainable_weights = [] - self._non_trainable_weights = [] - self._losses = [] - self._updates = [] - self._per_input_losses = {} - self._per_input_updates = {} - self._built = False - - # These lists will be filled via successive calls - # to self._add_inbound_node(). - self._inbound_nodes = [] - self._outbound_nodes = [] - - # These properties should be set by the user via keyword arguments. - # note that 'dtype', 'input_shape' and 'batch_input_shape' - # are only applicable to input layers: do not pass these keywords - # to non-input layers. - allowed_kwargs = {'input_shape', - 'batch_input_shape', - 'batch_size', - 'dtype', - 'name', - 'trainable', - 'weights', - 'input_dtype', # legacy - } - for kwarg in kwargs: - if kwarg not in allowed_kwargs: - raise TypeError('Keyword argument not understood:', kwarg) - name = kwargs.get('name') - if not name: - prefix = self.__class__.__name__ - name = _to_snake_case(prefix) + '_' + str(K.get_uid(prefix)) - self.name = name - - self.trainable = kwargs.get('trainable', True) - if 'input_shape' in kwargs or 'batch_input_shape' in kwargs: - # In this case we will later create an input layer - # to insert before the current layer - if 'batch_input_shape' in kwargs: - batch_input_shape = tuple(kwargs['batch_input_shape']) - elif 'input_shape' in kwargs: - batch_size = kwargs.get('batch_size') - batch_input_shape = ( - batch_size,) + tuple(kwargs['input_shape']) - self.batch_input_shape = batch_input_shape - - # Set dtype. - dtype = kwargs.get('dtype') - if dtype is None: - dtype = kwargs.get('input_dtype') - if dtype is None: - dtype = K.floatx() - self.dtype = dtype - - self._initial_weights = kwargs.get('weights') - - @staticmethod - def _node_key(layer, node_index): - """Converts a layer and its index to a unique (immutable type) name. - - This function is used internally with `self._network_nodes`. - - # Arguments - layer: The layer. - node_index: The layer's position (e.g. via enumerate) in a list of - nodes. - - # Returns - The unique name. - """ - return layer.name + '_ib-' + str(node_index) - - @property - def losses(self): - return self._losses - - @property - def updates(self): - if not self.trainable and not self.stateful: - return [] - return self._updates - - @property - def built(self): - return self._built - - @built.setter - def built(self, value): - self._built = value - - @property - def trainable_weights(self): - trainable = getattr(self, 'trainable', True) - if trainable: - return self._trainable_weights - else: - return [] - - @trainable_weights.setter - def trainable_weights(self, weights): - self._trainable_weights = weights - - @property - def non_trainable_weights(self): - trainable = getattr(self, 'trainable', True) - if not trainable: - return self._trainable_weights + self._non_trainable_weights - else: - return self._non_trainable_weights - - @non_trainable_weights.setter - def non_trainable_weights(self, weights): - self._non_trainable_weights = weights - - @interfaces.legacy_add_weight_support - def add_weight(self, - name, - shape, - dtype=None, - initializer=None, - regularizer=None, - trainable=True, - constraint=None): - """Adds a weight variable to the layer. - - # Arguments - name: String, the name for the weight variable. - shape: The shape tuple of the weight. - dtype: The dtype of the weight. - initializer: An Initializer instance (callable). - regularizer: An optional Regularizer instance. - trainable: A boolean, whether the weight should - be trained via backprop or not (assuming - that the layer itself is also trainable). - constraint: An optional Constraint instance. - - # Returns - The created weight variable. - """ - initializer = initializers.get(initializer) - if dtype is None: - dtype = K.floatx() - weight = K.variable(initializer(shape), - dtype=dtype, - name=name, - constraint=constraint) - if regularizer is not None: - with K.name_scope('weight_regularizer'): - self.add_loss(regularizer(weight)) - if trainable: - self._trainable_weights.append(weight) - else: - self._non_trainable_weights.append(weight) - return weight - - def assert_input_compatibility(self, inputs): - """Checks compatibility between the layer and provided inputs. - - This checks that the tensor(s) `input` - verify the input assumptions of the layer - (if any). If not, exceptions are raised. - - # Arguments - inputs: input tensor or list of input tensors. - - # Raises - ValueError: in case of mismatch between - the provided inputs and the expectations of the layer. - """ - inputs = to_list(inputs) - for x in inputs: - try: - K.is_keras_tensor(x) - except ValueError: - raise ValueError('Layer ' + self.name + ' was called with ' - 'an input that isn\'t a symbolic tensor. ' - 'Received type: ' + - str(type(x)) + '. Full input: ' + - str(inputs) + '. All inputs to the layer ' - 'should be tensors.') - - if not self.input_spec: - return - if not isinstance(self.input_spec, (list, tuple)): - input_spec = to_list(self.input_spec) - else: - input_spec = self.input_spec - if len(inputs) != len(input_spec): - raise ValueError('Layer ' + self.name + ' expects ' + - str(len(input_spec)) + ' inputs, ' - 'but it received ' + str(len(inputs)) + - ' input tensors. Input received: ' + - str(inputs)) - for input_index, (x, spec) in enumerate(zip(inputs, input_spec)): - if spec is None: - continue - - # Check ndim. - if spec.ndim is not None: - if K.ndim(x) != spec.ndim: - raise ValueError('Input ' + str(input_index) + - ' is incompatible with layer ' + - self.name + ': expected ndim=' + - str(spec.ndim) + ', found ndim=' + - str(K.ndim(x))) - if spec.max_ndim is not None: - ndim = K.ndim(x) - if ndim is not None and ndim > spec.max_ndim: - raise ValueError('Input ' + str(input_index) + - ' is incompatible with layer ' + - self.name + ': expected max_ndim=' + - str(spec.max_ndim) + ', found ndim=' + - str(K.ndim(x))) - if spec.min_ndim is not None: - ndim = K.ndim(x) - if ndim is not None and ndim < spec.min_ndim: - raise ValueError('Input ' + str(input_index) + - ' is incompatible with layer ' + - self.name + ': expected min_ndim=' + - str(spec.min_ndim) + ', found ndim=' + - str(K.ndim(x))) - # Check dtype. - if spec.dtype is not None: - if K.dtype(x) != spec.dtype: - raise ValueError('Input ' + str(input_index) + - ' is incompatible with layer ' + - self.name + ': expected dtype=' + - str(spec.dtype) + ', found dtype=' + - str(K.dtype(x))) - # Check specific shape axes. - if spec.axes: - try: - x_shape = K.int_shape(x) - except TypeError: - x_shape = None - if x_shape is not None: - for axis, value in spec.axes.items(): - if (value is not None and - x_shape[int(axis)] not in {value, None}): - raise ValueError( - 'Input ' + str(input_index) + - ' is incompatible with layer ' + - self.name + ': expected axis ' + - str(axis) + ' of input shape to have ' - 'value ' + str(value) + - ' but got shape ' + str(x_shape)) - # Check shape. - if spec.shape is not None: - try: - x_shape = K.int_shape(x) - except TypeError: - x_shape = None - if x_shape is not None: - for spec_dim, dim in zip(spec.shape, x_shape): - if spec_dim is not None and dim is not None: - if spec_dim != dim: - raise ValueError( - 'Input ' + str(input_index) + - ' is incompatible with layer ' + - self.name + ': expected shape=' + - str(spec.shape) + ', found shape=' + - str(x_shape)) - - def call(self, inputs, **kwargs): - """This is where the layer's logic lives. - - # Arguments - inputs: Input tensor, or list/tuple of input tensors. - **kwargs: Additional keyword arguments. - - # Returns - A tensor or list/tuple of tensors. - """ - return inputs - - def __call__(self, inputs, **kwargs): - """Wrapper around self.call(), for handling internal references. - - If a Keras tensor is passed: - - We call self._add_inbound_node(). - - If necessary, we `build` the layer to match - the _keras_shape of the input(s). - - We update the _keras_shape of every input tensor with - its new shape (obtained via self.compute_output_shape). - This is done as part of _add_inbound_node(). - - We update the _keras_history of the output tensor(s) - with the current layer. - This is done as part of _add_inbound_node(). - - # Arguments - inputs: Can be a tensor or list/tuple of tensors. - **kwargs: Additional keyword arguments to be passed to `call()`. - - # Returns - Output of the layer's `call` method. - - # Raises - ValueError: in case the layer is missing shape information - for its `build` call. - """ - if isinstance(inputs, list): - inputs = inputs[:] - with K.name_scope(self.name): - # Handle laying building (weight creating, input spec locking). - if not self.built: - # Raise exceptions in case the input is not compatible - # with the input_spec specified in the layer constructor. - self.assert_input_compatibility(inputs) - - # Collect input shapes to build layer. - input_shapes = [] - for x_elem in to_list(inputs): - if hasattr(x_elem, '_keras_shape'): - input_shapes.append(x_elem._keras_shape) - elif hasattr(K, 'int_shape'): - input_shapes.append(K.int_shape(x_elem)) - else: - raise ValueError('You tried to call layer "' + - self.name + - '". This layer has no information' - ' about its expected input shape, ' - 'and thus cannot be built. ' - 'You can build it manually via: ' - '`layer.build(batch_input_shape)`') - self.build(unpack_singleton(input_shapes)) - self.built = True - - # Load weights that were specified at layer instantiation. - if self._initial_weights is not None: - self.set_weights(self._initial_weights) - - # Raise exceptions in case the input is not compatible - # with the input_spec set at build time. - self.assert_input_compatibility(inputs) - - # Handle mask propagation. - previous_mask = _collect_previous_mask(inputs) - user_kwargs = kwargs.copy() - if not is_all_none(previous_mask): - # The previous layer generated a mask. - if has_arg(self.call, 'mask'): - if 'mask' not in kwargs: - # If mask is explicitly passed to __call__, - # we should override the default mask. - kwargs['mask'] = previous_mask - # Handle automatic shape inference (only useful for Theano). - input_shape = _collect_input_shape(inputs) - - # Actually call the layer, - # collecting output(s), mask(s), and shape(s). - output = self.call(inputs, **kwargs) - output_mask = self.compute_mask(inputs, previous_mask) - - # If the layer returns tensors from its inputs, unmodified, - # we copy them to avoid loss of tensor metadata. - output_ls = to_list(output) - inputs_ls = to_list(inputs) - output_ls_copy = [] - for x in output_ls: - if x in inputs_ls: - x = K.identity(x) - output_ls_copy.append(x) - output = unpack_singleton(output_ls_copy) - - # Inferring the output shape is only relevant for Theano. - if all([s is not None - for s in to_list(input_shape)]): - output_shape = self.compute_output_shape(input_shape) - else: - if isinstance(input_shape, list): - output_shape = [None for _ in input_shape] - else: - output_shape = None - - if (not isinstance(output_mask, (list, tuple)) and - len(output_ls) > 1): - # Augment the mask to match the length of the output. - output_mask = [output_mask] * len(output_ls) - - # Add an inbound node to the layer, so that it keeps track - # of the call and of all new variables created during the call. - # This also updates the layer history of the output tensor(s). - # If the input tensor(s) had not previous Keras history, - # this does nothing. - self._add_inbound_node(input_tensors=inputs, - output_tensors=output, - input_masks=previous_mask, - output_masks=output_mask, - input_shapes=input_shape, - output_shapes=output_shape, - arguments=user_kwargs) - - # Apply activity regularizer if any: - if (hasattr(self, 'activity_regularizer') and - self.activity_regularizer is not None): - with K.name_scope('activity_regularizer'): - regularization_losses = [ - self.activity_regularizer(x) - for x in to_list(output)] - self.add_loss(regularization_losses, - inputs=to_list(inputs)) - return output - - def _add_inbound_node(self, input_tensors, output_tensors, - input_masks, output_masks, - input_shapes, output_shapes, arguments=None): - """Internal method to create an inbound node for the layer. - - # Arguments - input_tensors: list of input tensors. - output_tensors: list of output tensors. - input_masks: list of input masks (a mask can be a tensor, or None). - output_masks: list of output masks - (a mask can be a tensor, or None). - input_shapes: list of input shape tuples. - output_shapes: list of output shape tuples. - arguments: dictionary of keyword arguments that were passed to the - `call` method of the layer at the call that created the node. - """ - input_tensors = to_list(input_tensors) - output_tensors = to_list(output_tensors) - input_masks = to_list(input_masks) - output_masks = to_list(output_masks) - input_shapes = to_list(input_shapes) - output_shapes = to_list(output_shapes) - - # Collect input tensor(s) coordinates. - inbound_layers = [] - node_indices = [] - tensor_indices = [] - for x in input_tensors: - if hasattr(x, '_keras_history'): - inbound_layer, node_index, tensor_index = x._keras_history - inbound_layers.append(inbound_layer) - node_indices.append(node_index) - tensor_indices.append(tensor_index) - else: - inbound_layers.append(None) - node_indices.append(None) - tensor_indices.append(None) - - # Create node, add it to inbound nodes. - Node( - self, - inbound_layers=inbound_layers, - node_indices=node_indices, - tensor_indices=tensor_indices, - input_tensors=input_tensors, - output_tensors=output_tensors, - input_masks=input_masks, - output_masks=output_masks, - input_shapes=input_shapes, - output_shapes=output_shapes, - arguments=arguments - ) - - # Update tensor history, _keras_shape and _uses_learning_phase. - for i in range(len(output_tensors)): - output_tensors[i]._keras_shape = output_shapes[i] - uses_lp = any( - [getattr(x, '_uses_learning_phase', False) - for x in input_tensors]) - uses_lp = getattr(self, 'uses_learning_phase', False) or uses_lp - output_tensors[i]._uses_learning_phase = getattr( - output_tensors[i], '_uses_learning_phase', False) or uses_lp - output_tensors[i]._keras_history = (self, - len(self._inbound_nodes) - 1, - i) - - def compute_output_shape(self, input_shape): - """Computes the output shape of the layer. - - Assumes that the layer will be built - to match that input shape provided. - - # Arguments - input_shape: Shape tuple (tuple of integers) - or list of shape tuples (one per output tensor of the layer). - Shape tuples can include None for free dimensions, - instead of an integer. - - # Returns - An input shape tuple. - """ - return input_shape - - def compute_mask(self, inputs, mask=None): - """Computes an output mask tensor. - - # Arguments - inputs: Tensor or list of tensors. - mask: Tensor or list of tensors. - - # Returns - None or a tensor (or list of tensors, - one per output tensor of the layer). - """ - if not self.supports_masking: - if mask is not None: - if isinstance(mask, list): - if any(m is not None for m in mask): - raise TypeError('Layer ' + self.name + - ' does not support masking, ' - 'but was passed an input_mask: ' + - str(mask)) - else: - raise TypeError('Layer ' + self.name + - ' does not support masking, ' - 'but was passed an input_mask: ' + - str(mask)) - # masking not explicitly supported: return None as mask - return None - # if masking is explicitly supported, by default - # carry over the input mask - return mask - - def build(self, input_shape): - """Creates the layer weights. - - Must be implemented on all layers that have weights. - - # Arguments - input_shape: Keras tensor (future input to layer) - or list/tuple of Keras tensors to reference - for weight shape computations. - """ - self.built = True - - def _get_node_attribute_at_index(self, node_index, attr, attr_name): - """Retrieves an attribute (e.g. input_tensors) from a node. - - This is used to implement the methods: - - get_input_shape_at - - get_output_shape_at - - get_input_at - etc... - - # Arguments - node_index: Integer index of the node from which - to retrieve the attribute. - attr: Exact node attribute name. - attr_name: Human-readable attribute name, for error messages. - - # Returns - The layer's attribute `attr` at the node of index `node_index`. - - # Raises - RuntimeError: If the layer has no inbound nodes. - ValueError: If the index is does not match any node. - """ - if not self._inbound_nodes: - raise RuntimeError('The layer has never been called ' - 'and thus has no defined ' + attr_name + '.') - if not len(self._inbound_nodes) > node_index: - raise ValueError('Asked to get ' + attr_name + - ' at node ' + str(node_index) + - ', but the layer has only ' + - str(len(self._inbound_nodes)) + ' inbound nodes.') - values = getattr(self._inbound_nodes[node_index], attr) - return unpack_singleton(values) - - def get_input_shape_at(self, node_index): - """Retrieves the input shape(s) of a layer at a given node. - - # Arguments - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - # Returns - A shape tuple - (or list of shape tuples if the layer has multiple inputs). - """ - return self._get_node_attribute_at_index(node_index, - 'input_shapes', - 'input shape') - - def get_output_shape_at(self, node_index): - """Retrieves the output shape(s) of a layer at a given node. - - # Arguments - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - # Returns - A shape tuple - (or list of shape tuples if the layer has multiple outputs). - """ - return self._get_node_attribute_at_index(node_index, - 'output_shapes', - 'output shape') - - def get_input_at(self, node_index): - """Retrieves the input tensor(s) of a layer at a given node. - - # Arguments - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - # Returns - A tensor (or list of tensors if the layer has multiple inputs). - """ - return self._get_node_attribute_at_index(node_index, - 'input_tensors', - 'input') - - def get_output_at(self, node_index): - """Retrieves the output tensor(s) of a layer at a given node. - - # Arguments - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - # Returns - A tensor (or list of tensors if the layer has multiple outputs). - """ - return self._get_node_attribute_at_index(node_index, - 'output_tensors', - 'output') - - def get_input_mask_at(self, node_index): - """Retrieves the input mask tensor(s) of a layer at a given node. - - # Arguments - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - # Returns - A mask tensor - (or list of tensors if the layer has multiple inputs). - """ - return self._get_node_attribute_at_index(node_index, - 'input_masks', - 'input mask') - - def get_output_mask_at(self, node_index): - """Retrieves the output mask tensor(s) of a layer at a given node. - - # Arguments - node_index: Integer, index of the node - from which to retrieve the attribute. - E.g. `node_index=0` will correspond to the - first time the layer was called. - - # Returns - A mask tensor - (or list of tensors if the layer has multiple outputs). - """ - return self._get_node_attribute_at_index(node_index, - 'output_masks', - 'output mask') - - @property - def input(self): - """Retrieves the input tensor(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - # Returns - Input tensor or list of input tensors. - - # Raises - AttributeError: if the layer is connected to - more than one incoming layers. - """ - if len(self._inbound_nodes) > 1: - raise AttributeError('Layer ' + self.name + - ' has multiple inbound nodes, ' - 'hence the notion of "layer input" ' - 'is ill-defined. ' - 'Use `get_input_at(node_index)` instead.') - elif not self._inbound_nodes: - raise AttributeError('Layer ' + self.name + - ' is not connected, no input to return.') - return self._get_node_attribute_at_index(0, 'input_tensors', - 'input') - - @property - def output(self): - """Retrieves the output tensor(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - # Returns - Output tensor or list of output tensors. - - # Raises - AttributeError: if the layer is connected to - more than one incoming layers. - """ - if not self._inbound_nodes: - raise AttributeError('Layer ' + self.name + - ' has no inbound nodes.') - if len(self._inbound_nodes) > 1: - raise AttributeError('Layer ' + self.name + - ' has multiple inbound nodes, ' - 'hence the notion of "layer output" ' - 'is ill-defined. ' - 'Use `get_output_at(node_index)` instead.') - return self._get_node_attribute_at_index(0, 'output_tensors', - 'output') - - @property - def input_mask(self): - """Retrieves the input mask tensor(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - # Returns - Input mask tensor (potentially None) or list of input - mask tensors. - - # Raises - AttributeError: if the layer is connected to - more than one incoming layers. - """ - if len(self._inbound_nodes) != 1: - raise AttributeError('Layer ' + self.name + - ' has multiple inbound nodes, ' + - 'hence the notion of "layer input mask" ' - 'is ill-defined. ' - 'Use `get_input_mask_at(node_index)` ' - 'instead.') - return self._get_node_attribute_at_index(0, 'input_masks', - 'input mask') - - @property - def output_mask(self): - """Retrieves the output mask tensor(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - # Returns - Output mask tensor (potentially None) or list of output - mask tensors. - - # Raises - AttributeError: if the layer is connected to - more than one incoming layers. - """ - if len(self._inbound_nodes) != 1: - raise AttributeError('Layer ' + self.name + - ' has multiple inbound nodes, ' - 'hence the notion of "layer output mask" ' - 'is ill-defined. ' - 'Use `get_output_mask_at(node_index)` ' - 'instead.') - return self._get_node_attribute_at_index(0, 'output_masks', - 'output mask') - - @property - def input_shape(self): - """Retrieves the input shape tuple(s) of a layer. - - Only applicable if the layer has exactly one inbound node, - i.e. if it is connected to one incoming layer. - - # Returns - Input shape tuple - (or list of input shape tuples, one tuple per input tensor). - - # Raises - AttributeError: if the layer is connected to - more than one incoming layers. - """ - if not self._inbound_nodes: - raise AttributeError('The layer has never been called ' - 'and thus has no defined input shape.') - all_input_shapes = set( - [str(node.input_shapes) for node in self._inbound_nodes]) - if len(all_input_shapes) == 1: - input_shapes = self._inbound_nodes[0].input_shapes - return unpack_singleton(input_shapes) - else: - raise AttributeError('The layer "' + str(self.name) + - ' has multiple inbound nodes, ' - 'with different input shapes. Hence ' - 'the notion of "input shape" is ' - 'ill-defined for the layer. ' - 'Use `get_input_shape_at(node_index)` ' - 'instead.') - - @property - def output_shape(self): - """Retrieves the output shape tuple(s) of a layer. - - Only applicable if the layer has one inbound node, - or if all inbound nodes have the same output shape. - - # Returns - Output shape tuple - (or list of input shape tuples, one tuple per output tensor). - - # Raises - AttributeError: if the layer is connected to - more than one incoming layers. - """ - if not self._inbound_nodes: - raise AttributeError('The layer has never been called ' - 'and thus has no defined output shape.') - all_output_shapes = set( - [str(node.output_shapes) for node in self._inbound_nodes]) - if len(all_output_shapes) == 1: - output_shapes = self._inbound_nodes[0].output_shapes - return unpack_singleton(output_shapes) - else: - raise AttributeError('The layer "' + str(self.name) + - ' has multiple inbound nodes, ' - 'with different output shapes. Hence ' - 'the notion of "output shape" is ' - 'ill-defined for the layer. ' - 'Use `get_output_shape_at(node_index)` ' - 'instead.') - - def add_loss(self, losses, inputs=None): - """Adds losses to the layer. - - The loss may potentially be conditional on some inputs tensors, - for instance activity losses are conditional on the layer's inputs. - - # Arguments - losses: loss tensor or list of loss tensors - to add to the layer. - inputs: input tensor or list of inputs tensors to mark - the losses as conditional on these inputs. - If None is passed, the loss is assumed unconditional - (e.g. L2 weight regularization, which only depends - on the layer's weights variables, not on any inputs tensors). - """ - if losses is None or losses == []: - return - # Update self.losses - losses = to_list(losses) - if hasattr(self, '_losses'): - self._losses += losses - # Update self._per_input_updates - if isinstance(inputs, list) and inputs == []: - inputs = None - if inputs is not None: - inputs_hash = object_list_uid(inputs) - else: - # Updates indexed by None are unconditional - # rather than input-dependent - inputs_hash = None - if inputs_hash not in self._per_input_losses: - self._per_input_losses[inputs_hash] = [] - self._per_input_losses[inputs_hash] += losses - - def add_update(self, updates, inputs=None): - """Adds updates to the layer. - - The updates may potentially be conditional on some inputs tensors, - for instance batch norm updates are conditional on the layer's inputs. - - # Arguments - updates: update op or list of update ops - to add to the layer. - inputs: input tensor or list of inputs tensors to mark - the updates as conditional on these inputs. - If None is passed, the updates are assumed unconditional. - """ - if updates is None or updates == []: - return - # Update self.updates - updates = to_list(updates) - if hasattr(self, '_updates'): - self._updates += updates - # Update self._per_input_updates - if isinstance(inputs, list) and inputs == []: - inputs = None - if inputs is not None: - inputs_hash = object_list_uid(inputs) - else: - # Updates indexed by None are unconditional - # rather than input-dependent - inputs_hash = None - if inputs_hash not in self._per_input_updates: - self._per_input_updates[inputs_hash] = [] - self._per_input_updates[inputs_hash] += updates - - def get_updates_for(self, inputs): - if not self.trainable and not self.stateful: - return [] - if inputs is not None: - inputs_hash = object_list_uid(inputs) - else: - inputs_hash = None - if inputs_hash in self._per_input_updates: - return self._per_input_updates[inputs_hash] - return [] - - def get_losses_for(self, inputs): - if inputs is not None: - inputs_hash = object_list_uid(inputs) - else: - inputs_hash = None - if inputs_hash in self._per_input_losses: - return self._per_input_losses[inputs_hash] - return [] - - @property - def weights(self): - return self.trainable_weights + self.non_trainable_weights - - def set_weights(self, weights): - """Sets the weights of the layer, from Numpy arrays. - - # Arguments - weights: a list of Numpy arrays. The number - of arrays and their shape must match - number of the dimensions of the weights - of the layer (i.e. it should match the - output of `get_weights`). - - # Raises - ValueError: If the provided weights list does not match the - layer's specifications. - """ - params = self.weights - if len(params) != len(weights): - raise ValueError('You called `set_weights(weights)` on layer "' + - self.name + - '" with a weight list of length ' + - str(len(weights)) + - ', but the layer was expecting ' + - str(len(params)) + - ' weights. Provided weights: ' + - str(weights)[:50] + '...') - if not params: - return - weight_value_tuples = [] - param_values = K.batch_get_value(params) - for pv, p, w in zip(param_values, params, weights): - if pv.shape != w.shape: - raise ValueError('Layer weight shape ' + - str(pv.shape) + - ' not compatible with ' - 'provided weight shape ' + str(w.shape)) - weight_value_tuples.append((p, w)) - K.batch_set_value(weight_value_tuples) - - def get_weights(self): - """Returns the current weights of the layer. - - # Returns - Weights values as a list of numpy arrays. - """ - params = self.weights - return K.batch_get_value(params) - - def get_config(self): - """Returns the config of the layer. - - A layer config is a Python dictionary (serializable) - containing the configuration of a layer. - The same layer can be reinstantiated later - (without its trained weights) from this configuration. - - The config of a layer does not include connectivity - information, nor the layer class name. These are handled - by `Network` (one layer of abstraction above). - - # Returns - Python dictionary. - """ - config = {'name': self.name, - 'trainable': self.trainable} - if hasattr(self, 'batch_input_shape'): - config['batch_input_shape'] = self.batch_input_shape - if hasattr(self, 'dtype'): - config['dtype'] = self.dtype - return config - - @classmethod - def from_config(cls, config): - """Creates a layer from its config. - - This method is the reverse of `get_config`, - capable of instantiating the same layer from the config - dictionary. It does not handle layer connectivity - (handled by Network), nor weights (handled by `set_weights`). - - # Arguments - config: A Python dictionary, typically the - output of get_config. - - # Returns - A layer instance. - """ - return cls(**config) - - def count_params(self): - """Counts the total number of scalars composing the weights. - - # Returns - An integer count. - - # Raises - RuntimeError: if the layer isn't yet built - (in which case its weights aren't yet defined). - """ - if not self.built: - if self.__class__.__name__ == 'Sequential': - self.build() - else: - raise RuntimeError('You tried to call `count_params` on ' + - self.name + ', but the layer isn\'t built. ' - 'You can build it manually via: `' + - self.name + '.build(batch_input_shape)`.') - return count_params(self.weights) - - -class InputSpec(object): - """Specifies the ndim, dtype and shape of every input to a layer. - - Every layer should expose (if appropriate) an `input_spec` attribute: - a list of instances of InputSpec (one per input tensor). - - A None entry in a shape is compatible with any dimension, - a None shape is compatible with any shape. - - # Arguments - dtype: Expected datatype of the input. - shape: Shape tuple, expected shape of the input - (may include None for unchecked axes). - ndim: Integer, expected rank of the input. - max_ndim: Integer, maximum rank of the input. - min_ndim: Integer, minimum rank of the input. - axes: Dictionary mapping integer axes to - a specific dimension value. - """ - - def __init__(self, dtype=None, - shape=None, - ndim=None, - max_ndim=None, - min_ndim=None, - axes=None): - self.dtype = dtype - self.shape = shape - if shape is not None: - self.ndim = len(shape) - else: - self.ndim = ndim - self.max_ndim = max_ndim - self.min_ndim = min_ndim - self.axes = axes or {} - - def __repr__(self): - spec = [('dtype=' + str(self.dtype)) if self.dtype else '', - ('shape=' + str(self.shape)) if self.shape else '', - ('ndim=' + str(self.ndim)) if self.ndim else '', - ('max_ndim=' + str(self.max_ndim)) if self.max_ndim else '', - ('min_ndim=' + str(self.min_ndim)) if self.min_ndim else '', - ('axes=' + str(self.axes)) if self.axes else ''] - return 'InputSpec(%s)' % ', '.join(x for x in spec if x) - - -class Node(object): - """A `Node` describes the connectivity between two layers. - - Each time a layer is connected to some new input, - a node is added to `layer._inbound_nodes`. - Each time the output of a layer is used by another layer, - a node is added to `layer._outbound_nodes`. - - # Arguments - outbound_layer: the layer that takes - `input_tensors` and turns them into `output_tensors` - (the node gets created when the `call` - method of the layer was called). - inbound_layers: a list of layers, the same length as `input_tensors`, - the layers from where `input_tensors` originate. - node_indices: a list of integers, the same length as `inbound_layers`. - `node_indices[i]` is the origin node of `input_tensors[i]` - (necessary since each inbound layer might have several nodes, - e.g. if the layer is being shared with a different data stream). - tensor_indices: a list of integers, - the same length as `inbound_layers`. - `tensor_indices[i]` is the index of `input_tensors[i]` within the - output of the inbound layer - (necessary since each inbound layer might - have multiple tensor outputs, with each one being - independently manipulable). - input_tensors: list of input tensors. - output_tensors: list of output tensors. - input_masks: list of input masks (a mask can be a tensor, or None). - output_masks: list of output masks (a mask can be a tensor, or None). - input_shapes: list of input shape tuples. - output_shapes: list of output shape tuples. - arguments: dictionary of keyword arguments that were passed to the - `call` method of the layer at the call that created the node. - - `node_indices` and `tensor_indices` are basically fine-grained coordinates - describing the origin of the `input_tensors`, verifying the following: - - origin_node = inbound_layers[i]._inbound_nodes[node_indices[i]] - input_tensors[i] == origin_node.output_tensors[tensor_indices[i]] - - A node from layer A to layer B is added to: - A._outbound_nodes - B._inbound_nodes - """ - - def __init__(self, outbound_layer, - inbound_layers, node_indices, tensor_indices, - input_tensors, output_tensors, - input_masks, output_masks, - input_shapes, output_shapes, - arguments=None): - # Layer instance (NOT a list). - # this is the layer that takes a list of input tensors - # and turns them into a list of output tensors. - # the current node will be added to - # the inbound_nodes of outbound_layer. - self.outbound_layer = outbound_layer - - # The following 3 properties describe where - # the input tensors come from: which layers, - # and for each layer, which node and which - # tensor output of each node. - - # List of layer instances. - self.inbound_layers = inbound_layers - # List of integers, 1:1 mapping with inbound_layers. - self.node_indices = node_indices - # List of integers, 1:1 mapping with inbound_layers. - self.tensor_indices = tensor_indices - - # Following 2 properties: - # tensor inputs and outputs of outbound_layer. - - # List of tensors. 1:1 mapping with inbound_layers. - self.input_tensors = input_tensors - # List of tensors, created by outbound_layer.call(). - self.output_tensors = output_tensors - - # Following 2 properties: input and output masks. - # List of tensors, 1:1 mapping with input_tensor. - self.input_masks = input_masks - # List of tensors, created by outbound_layer.compute_mask(). - self.output_masks = output_masks - - # Following 2 properties: input and output shapes. - - # List of shape tuples, shapes of input_tensors. - self.input_shapes = input_shapes - # List of shape tuples, shapes of output_tensors. - self.output_shapes = output_shapes - - # Optional keyword arguments to layer's `call`. - self.arguments = arguments - - # Add nodes to all layers involved. - for layer in inbound_layers: - if layer is not None: - layer._outbound_nodes.append(self) - outbound_layer._inbound_nodes.append(self) - - def get_config(self): - inbound_names = [] - for layer in self.inbound_layers: - if layer: - inbound_names.append(layer.name) - else: - inbound_names.append(None) - if self.outbound_layer: - outbound_layer = self.outbound_layer.name - else: - outbound_layer = None - return {'outbound_layer': outbound_layer, - 'inbound_layers': inbound_names, - 'node_indices': self.node_indices, - 'tensor_indices': self.tensor_indices} - - -def _collect_previous_mask(input_tensors): - """Retrieves the output mask(s) of the previous node. - - # Arguments - input_tensors: A tensor or list of tensors. - - # Returns - A mask tensor or list of mask tensors. - """ - input_tensors = to_list(input_tensors) - masks = [] - for x in input_tensors: - if hasattr(x, '_keras_history'): - inbound_layer, node_index, tensor_index = x._keras_history - node = inbound_layer._inbound_nodes[node_index] - mask = node.output_masks[tensor_index] - masks.append(mask) - else: - masks.append(None) - return unpack_singleton(masks) - - -def _to_snake_case(name): - intermediate = re.sub('(.)([A-Z][a-z0-9]+)', r'\1_\2', name) - insecure = re.sub('([a-z])([A-Z])', r'\1_\2', intermediate).lower() - # If the class is private the name starts with "_" which is not secure - # for creating scopes. We prefix the name with "private" in this case. - if insecure[0] != '_': - return insecure - return 'private' + insecure - - -def _collect_input_shape(input_tensors): - """Collects the output shape(s) of a list of Keras tensors. - - # Arguments - input_tensors: list of input tensors (or single input tensor). - - # Returns - List of shape tuples (or single tuple), one tuple per input. - """ - input_tensors = to_list(input_tensors) - shapes = [] - for x in input_tensors: - try: - shapes.append(K.int_shape(x)) - except TypeError: - shapes.append(None) - return unpack_singleton(shapes) -"""Input layer code (`Input` and `InputLayer`). -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -from .base_layer import Layer -from .base_layer import Node -from .. import backend as K -from ..legacy import interfaces -from ..utils.generic_utils import unpack_singleton - - -class InputLayer(Layer): - """Layer to be used as an entry point into a model. - - It can either wrap an existing tensor (pass an `input_tensor` argument) - or create its a placeholder tensor (pass arguments `input_shape` - or `batch_input_shape` as well as `dtype`). - - # Arguments - input_shape: Shape tuple, not including the batch axis. - batch_size: Optional input batch size (integer or None). - batch_input_shape: Shape tuple, including the batch axis. - dtype: Datatype of the input. - input_tensor: Optional tensor to use as layer input - instead of creating a placeholder. - sparse: Boolean, whether the placeholder created - is meant to be sparse. - name: Name of the layer (string). - """ - - @interfaces.legacy_input_support - def __init__(self, input_shape=None, batch_size=None, - batch_input_shape=None, - dtype=None, input_tensor=None, sparse=False, name=None): - if not name: - prefix = 'input' - name = prefix + '_' + str(K.get_uid(prefix)) - super(InputLayer, self).__init__(dtype=dtype, name=name) - - self.trainable = False - self.built = True - self.sparse = sparse - self.supports_masking = True - - if input_shape and batch_input_shape: - raise ValueError('Only provide the input_shape OR ' - 'batch_input_shape argument to ' - 'InputLayer, not both at the same time.') - if input_tensor is not None and batch_input_shape is None: - # If input_tensor is set, and batch_input_shape is not set: - # Attempt automatic input shape inference. - try: - batch_input_shape = K.int_shape(input_tensor) - except TypeError: - if not input_shape and not batch_input_shape: - raise ValueError('InputLayer was provided ' - 'an input_tensor argument, ' - 'but its input shape cannot be ' - 'automatically inferred. ' - 'You should pass an input_shape or ' - 'batch_input_shape argument.') - if not batch_input_shape: - if not input_shape: - raise ValueError('An Input layer should be passed either ' - 'a `batch_input_shape` or an `input_shape`.') - else: - batch_input_shape = (batch_size,) + tuple(input_shape) - else: - batch_input_shape = tuple(batch_input_shape) - - if not dtype: - if input_tensor is None: - dtype = K.floatx() - else: - dtype = K.dtype(input_tensor) - - self.batch_input_shape = batch_input_shape - self.dtype = dtype - - if input_tensor is None: - self.is_placeholder = True - input_tensor = K.placeholder(shape=batch_input_shape, - dtype=dtype, - sparse=self.sparse, - name=self.name) - else: - self.is_placeholder = False - input_tensor._keras_shape = batch_input_shape - # Create an input node to add to self.outbound_node - # and set output_tensors' _keras_history. - input_tensor._uses_learning_phase = False - input_tensor._keras_history = (self, 0, 0) - Node(self, - inbound_layers=[], - node_indices=[], - tensor_indices=[], - input_tensors=[input_tensor], - output_tensors=[input_tensor], - input_masks=[None], - output_masks=[None], - input_shapes=[batch_input_shape], - output_shapes=[batch_input_shape]) - - def get_config(self): - config = {'batch_input_shape': self.batch_input_shape, - 'dtype': self.dtype, - 'sparse': self.sparse, - 'name': self.name} - return config - - -def Input(shape=None, batch_shape=None, - name=None, dtype=None, sparse=False, - tensor=None): - """`Input()` is used to instantiate a Keras tensor. - - A Keras tensor is a tensor object from the underlying backend - (Theano, TensorFlow or CNTK), which we augment with certain - attributes that allow us to build a Keras model - just by knowing the inputs and outputs of the model. - - For instance, if a, b and c are Keras tensors, - it becomes possible to do: - `model = Model(input=[a, b], output=c)` - - The added Keras attributes are: - `_keras_shape`: Integer shape tuple propagated - via Keras-side shape inference. - `_keras_history`: Last layer applied to the tensor. - the entire layer graph is retrievable from that layer, - recursively. - - # Arguments - shape: A shape tuple (integer), not including the batch size. - For instance, `shape=(32,)` indicates that the expected input - will be batches of 32-dimensional vectors. - batch_shape: A shape tuple (integer), including the batch size. - For instance, `batch_shape=(10, 32)` indicates that - the expected input will be batches of 10 32-dimensional vectors. - `batch_shape=(None, 32)` indicates batches of an arbitrary number - of 32-dimensional vectors. - name: An optional name string for the layer. - Should be unique in a model (do not reuse the same name twice). - It will be autogenerated if it isn't provided. - dtype: The data type expected by the input, as a string - (`float32`, `float64`, `int32`...) - sparse: A boolean specifying whether the placeholder - to be created is sparse. - tensor: Optional existing tensor to wrap into the `Input` layer. - If set, the layer will not create a placeholder tensor. - - # Returns - A tensor. - - # Example - - ```python - # this is a logistic regression in Keras - x = Input(shape=(32,)) - y = Dense(16, activation='softmax')(x) - model = Model(x, y) - ``` - """ - if not batch_shape and tensor is None: - assert shape is not None, ('Please provide to Input either a `shape`' - ' or a `batch_shape` argument. Note that ' - '`shape` does not include the batch ' - 'dimension.') - if shape is not None and not batch_shape: - batch_shape = (None,) + tuple(shape) - if not dtype: - dtype = K.floatx() - input_layer = InputLayer(batch_input_shape=batch_shape, - name=name, dtype=dtype, - sparse=sparse, - input_tensor=tensor) - # Return tensor including _keras_shape and _keras_history. - # Note that in this case train_output and test_output are the same pointer. - outputs = input_layer._inbound_nodes[0].output_tensors - return unpack_singleton(outputs) -"""A `Network` is way to compose layers: the topological form of a `Model`. -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import numpy as np -import json -import yaml -import warnings -import copy -import os -from six.moves import zip - -from . import saving -from .base_layer import Layer -from .base_layer import Node -from .input_layer import InputLayer -from .. import backend as K -from ..utils.io_utils import ask_to_proceed_with_overwrite -from ..utils.layer_utils import print_summary as print_layer_summary -from ..utils.layer_utils import get_source_inputs -from ..utils.generic_utils import has_arg -from ..utils.generic_utils import to_list -from ..utils.generic_utils import object_list_uid -from ..utils.generic_utils import unpack_singleton -from ..legacy import interfaces - -try: - import h5py -except ImportError: - h5py = None - - -class Network(Layer): - """A Network is a directed acyclic graph of layers. - - It is the topological form of a "model". A Model - is simply a Network with added training routines. - - # Properties - name - inputs - outputs - layers - input_spec (list of class instances) - each entry describes one required input: - - ndim - - dtype - trainable (boolean) - input_shape - output_shape - weights (list of variables) - trainable_weights (list of variables) - non_trainable_weights (list of variables) - losses - updates - state_updates - stateful - - # Methods - __call__ - summary - get_layer - get_weights - set_weights - get_config - compute_output_shape - save - add_loss - add_update - get_losses_for - get_updates_for - to_json - to_yaml - reset_states - - # Class Methods - from_config - - # Raises - TypeError: if input tensors are not Keras tensors - (tensors returned by `Input`). - """ - - @interfaces.legacy_model_constructor_support - def __init__(self, *args, **kwargs): - # Signature detection - if (len(args) == 2 or - len(args) == 1 and 'outputs' in kwargs or - 'inputs' in kwargs and 'outputs' in kwargs): - # Graph network - self._init_graph_network(*args, **kwargs) - else: - # Subclassed network - self._init_subclassed_network(**kwargs) - - def _base_init(self, name=None): - # The following are implemented as property functions: - # self.trainable_weights - # self.non_trainable_weights - # self.input_spec - # self.losses - # self.updates - - # Handle `name` argument. - if not name: - prefix = self.__class__.__name__.lower() - name = prefix + '_' + str(K.get_uid(prefix)) - self.name = name - - # This acts just like the `trainable` attribute of any layer instance. - # It does not affect users of the underlying layers, only users of the - # Network instance. - self.trainable = True - self._is_compiled = False - self._expects_training_arg = False - self._initial_weights = None - - self.supports_masking = False - if not hasattr(self, 'optimizer'): - # Don't reset optimizer if already set. - self.optimizer = None - - # Private attributes to implement compatibility with Layer. - self._updates = [] - self._losses = [] - self._per_input_losses = {} - self._per_input_updates = {} - - # All layers in order of horizontal graph traversal. - # Entries are unique. Includes input and output layers. - self._layers = [] - - # Used only in conjunction with graph-networks - self._outbound_nodes = [] - self._inbound_nodes = [] - - def _init_graph_network(self, inputs, outputs, name=None): - self._uses_inputs_arg = True - # Normalize and set self.inputs, self.outputs. - self.inputs = to_list(inputs, allow_tuple=True) - self.outputs = to_list(outputs, allow_tuple=True) - - # User-provided argument validation. - # Check for redundancy in inputs. - if len(set(self.inputs)) != len(self.inputs): - raise ValueError('The list of inputs passed to the model ' - 'is redundant. ' - 'All inputs should only appear once.' - ' Found: ' + str(self.inputs)) - for x in self.inputs: - # Check that x has appropriate `_keras_history` metadata. - if not hasattr(x, '_keras_history'): - cls_name = self.__class__.__name__ - raise ValueError('Input tensors to a ' + cls_name + ' ' + - 'must come from `keras.layers.Input`. ' - 'Received: ' + str(x) + - ' (missing previous layer metadata).') - # Check that x is an input tensor. - layer, node_index, tensor_index = x._keras_history - if (len(layer._inbound_nodes) > 1 or - (layer._inbound_nodes and - layer._inbound_nodes[0].inbound_layers)): - cls_name = self.__class__.__name__ - warnings.warn(cls_name + ' inputs must come from ' - '`keras.layers.Input` ' - '(thus holding past layer metadata), ' - 'they cannot be the output of ' - 'a previous non-Input layer. ' - 'Here, a tensor specified as ' - 'input to your model ' - 'was not an Input tensor, ' - 'it was generated by layer ' + - layer.name + '.\n' - 'Note that input tensors are ' - 'instantiated via ' - '`tensor = keras.layers.Input(shape)`.\n' - 'The tensor that caused the issue was: ' + - str(x.name)) - for x in self.outputs: - if not hasattr(x, '_keras_history'): - cls_name = self.__class__.__name__ - raise ValueError('Output tensors to a ' + cls_name + - ' must be ' - 'the output of a Keras `Layer` ' - '(thus holding past layer metadata). ' - 'Found: ' + str(x)) - self._base_init(name=name) - self._compute_previous_mask = ( - has_arg(self.call, 'mask') or - hasattr(self, 'compute_mask')) - # A Network does not create weights of its own, - # thus it is already built. - self.built = True - self._is_graph_network = True - - self._input_layers = [] - self._output_layers = [] - self._input_coordinates = [] - self._output_coordinates = [] - - # This is for performance optimization when calling the Network on new - # inputs. Every time the Network is called on a set on input tensors, - # we compute the output tensors, - # output masks and output shapes in one pass, - # then cache them here. When any of these outputs is queried later, we - # retrieve it from there instead of recomputing it. - self._output_mask_cache = {} - self._output_tensor_cache = {} - self._output_shape_cache = {} - - # Build self._output_layers: - for x in self.outputs: - layer, node_index, tensor_index = x._keras_history - self._output_layers.append(layer) - self._output_coordinates.append((layer, node_index, tensor_index)) - - # Build self._input_layers: - for x in self.inputs: - layer, node_index, tensor_index = x._keras_history - # It's supposed to be an input layer, so only one node - # and one tensor output. - assert node_index == 0 - assert tensor_index == 0 - self._input_layers.append(layer) - self._input_coordinates.append((layer, node_index, tensor_index)) - - # Keep track of the network's nodes and layers. - nodes, nodes_by_depth, layers, layers_by_depth = _map_graph_network( - self.inputs, self.outputs) - self._network_nodes = nodes - self._nodes_by_depth = nodes_by_depth - self._layers = layers - self._layers_by_depth = layers_by_depth - - # Create the node linking internal inputs to internal outputs. - Node(outbound_layer=self, - inbound_layers=[], - node_indices=[], - tensor_indices=[], - input_tensors=self.inputs, - output_tensors=self.outputs, - # No network-level masking for now. - input_masks=[None for _ in self.inputs], - output_masks=[None for _ in self.outputs], - input_shapes=[x._keras_shape for x in self.inputs], - output_shapes=[x._keras_shape for x in self.outputs]) - - # Fill in the output mask cache. - masks = [] - for x in self.inputs: - layer, node_index, tensor_index = x._keras_history - node = layer._inbound_nodes[node_index] - mask = node.output_masks[tensor_index] - masks.append(mask) - mask_cache_key = object_list_uid(inputs) - mask_cache_key += '_' + object_list_uid(masks) - masks = [] - for x in self.outputs: - layer, node_index, tensor_index = x._keras_history - node = layer._inbound_nodes[node_index] - mask = node.output_masks[tensor_index] - masks.append(mask) - mask = unpack_singleton(masks) - self._output_mask_cache[mask_cache_key] = mask - - # Build self.input_names and self.output_names. - self.input_names = [] - self.output_names = [] - self._feed_input_names = [] - self._feed_inputs = [] - self._feed_input_shapes = [] - for i, layer in enumerate(self._input_layers): - # Check that layer is an InputLayer. - if not isinstance(layer, InputLayer): - raise TypeError( - 'Input layers to a `Model` must be `InputLayer` objects. ' - 'Received inputs: {}. ' - 'Input {} (0-based) originates ' - 'from layer type `{}`.'.format(inputs, - i, - layer.__class__.__name__)) - self.input_names.append(layer.name) - if layer.is_placeholder: - self._feed_inputs.append(layer.input) - self._feed_input_names.append(layer.name) - self._feed_input_shapes.append(self.inputs[i]._keras_shape) - - for layer in self._output_layers: - self.output_names.append(layer.name) - - def _init_subclassed_network(self, name=None): - self._base_init(name=name) - self._is_graph_network = False - self._expects_training_arg = has_arg(self.call, 'training') - self._uses_inputs_arg = has_arg(self.call, 'inputs') - self.outputs = None - self.inputs = None - self.built = False - - def __setattr__(self, name, value): - # Automatically track layers set as Model - # attributes for subclassed Models. - if isinstance(value, (Layer, Network)): - try: - is_graph_network = self._is_graph_network - except AttributeError: - raise RuntimeError( - 'It looks like you are subclassing `Model` and you ' - 'forgot to call `super(YourClass, self).__init__()`.' - ' Always start with this line.') - if not is_graph_network: - if value not in self._layers: - self._layers.append(value) - super(Network, self).__setattr__(name, value) - - @property - def layers(self): - return self._layers - - def get_layer(self, name=None, index=None): - """Retrieves a layer based on either its name (unique) or index. - - If `name` and `index` are both provided, `index` will take precedence. - - Indices are based on order of horizontal graph traversal (bottom-up). - - # Arguments - name: String, name of layer. - index: Integer, index of layer. - - # Returns - A layer instance. - - # Raises - ValueError: In case of invalid layer name or index. - """ - # It would be unreliable to build a dictionary - # based on layer names, because names can potentially - # be changed at any point by the user - # without the network being notified of it. - if index is not None: - if len(self.layers) <= index: - raise ValueError('Was asked to retrieve layer at index ' + - str(index) + ' but model only has ' + - str(len(self.layers)) + ' layers.') - else: - return self.layers[index] - else: - if not name: - raise ValueError('Provide either a layer name or layer index.') - - for layer in self.layers: - if layer.name == name: - return layer - - raise ValueError('No such layer: ' + name) - - @property - def updates(self): - """Retrieves the model's updates. - - Will only include updates that are either - unconditional, or conditional on inputs to this model - (e.g. will not include updates that depend on tensors - that aren't inputs to this model). - - # Returns - A list of update ops. - """ - if not self.trainable and not self.stateful: - return [] - updates = [] - for layer in self.layers: - if hasattr(layer, 'updates'): - if self._is_graph_network: - # Collect updates that are dependent on inputs - # that are part of the model. - for node_index, node in enumerate(layer._inbound_nodes): - node_key = self._node_key(layer, node_index) - if node_key in self._network_nodes: - # The model owns this layer node. - inputs = node.input_tensors - updates += layer.get_updates_for(inputs) - # Collect unconditional updates. - updates += layer.get_updates_for(None) - else: - updates += layer.updates - return updates - - @property - def losses(self): - """Retrieves the model's losses. - - Will only include losses that are either - unconditional, or conditional on inputs to this model - (e.g. will not include losses that depend on tensors - that aren't inputs to this model). - - # Returns - A list of loss tensors. - """ - losses = [] - for layer in self.layers: - if hasattr(layer, 'losses'): - if self._is_graph_network: - # Collect losses that are dependent on inputs - # that are part of the model. - for node_index, node in enumerate(layer._inbound_nodes): - node_key = self._node_key(layer, node_index) - if node_key in self._network_nodes: - # The model owns this layer node. - inputs = node.input_tensors - losses += layer.get_losses_for(inputs) - # Collect unconditional losses. - losses += layer.get_losses_for(None) - else: - losses += layer.losses - - # Add any potential unconditional model-level loss. - losses += self.get_losses_for(None) - - unique_tensors = list( - set(x for x in losses if not isinstance(x, (float, int)))) - non_tensors = [x for x in losses if isinstance(x, (float, int))] - return unique_tensors + non_tensors - - @property - def uses_learning_phase(self): - if not self.outputs: - return False - return any([x._uses_learning_phase for x in self.outputs]) - - @property - def stateful(self): - return any([(hasattr(layer, 'stateful') and - layer.stateful) for layer in self.layers]) - - def reset_states(self): - for layer in self.layers: - if hasattr(layer, 'reset_states') and getattr(layer, 'stateful', False): - layer.reset_states() - - @property - def state_updates(self): - """Returns the `updates` from all layers that are stateful. - - This is useful for separating training updates and - state updates, e.g. when we need to update a layer's internal state - during prediction. - - # Returns - A list of update ops. - """ - state_updates = [] - for layer in self.layers: - if layer.stateful: - state_updates += layer.updates - return state_updates - - @property - def trainable_weights(self): - if not self.trainable: - return [] - weights = [] - for layer in self.layers: - weights += layer.trainable_weights - return weights - - @property - def non_trainable_weights(self): - weights = [] - for layer in self.layers: - weights += layer.non_trainable_weights - if not self.trainable: - trainable_weights = [] - for layer in self.layers: - trainable_weights += layer.trainable_weights - return trainable_weights + weights - return weights - - def get_weights(self): - """Retrieves the weights of the model. - - # Returns - A flat list of Numpy arrays. - """ - weights = [] - for layer in self.layers: - weights += layer.weights - return K.batch_get_value(weights) - - def set_weights(self, weights): - """Sets the weights of the model. - - # Arguments - weights: A list of Numpy arrays with shapes and types matching - the output of `model.get_weights()`. - """ - tuples = [] - for layer in self.layers: - num_param = len(layer.weights) - layer_weights = weights[:num_param] - for sw, w in zip(layer.weights, layer_weights): - tuples.append((sw, w)) - weights = weights[num_param:] - K.batch_set_value(tuples) - - @property - def input_spec(self): - """Gets the model's input specs. - - # Returns - A list of `InputSpec` instances (one per input to the model) - or a single instance if the model has only one input. - """ - if not self._is_graph_network: - # TODO: support it in subclassed networks after inputs are set. - return None - - specs = [] - for layer in getattr(self, '_input_layers', []): - if layer.input_spec is None: - specs.append(None) - else: - if not isinstance(layer.input_spec, list): - raise TypeError('Layer ' + layer.name + - ' has an input_spec attribute that ' - 'is not a list. We expect a list. ' - 'Found input_spec = ' + - str(layer.input_spec)) - specs += layer.input_spec - return unpack_singleton(specs) - - def call(self, inputs, mask=None): - """Calls the model on new inputs. - - In this case `call` just reapplies - all ops in the graph to the new inputs - (e.g. build a new computational graph from the provided inputs). - - A model is callable on non-Keras tensors. - - # Arguments - inputs: A tensor or list of tensors. - mask: A mask or list of masks. A mask can be - either a tensor or None (no mask). - - # Returns - A tensor if there is a single output, or - a list of tensors if there are more than one outputs. - """ - inputs = to_list(inputs) - if mask is None: - masks = [None for _ in range(len(inputs))] - else: - masks = to_list(mask) - cache_key = object_list_uid(inputs) - cache_key += '_' + object_list_uid(masks) - if cache_key in self._output_tensor_cache: - return self._output_tensor_cache[cache_key] - else: - output_tensors, _, _ = self.run_internal_graph(inputs, masks) - return output_tensors - - def compute_mask(self, inputs, mask): - if not self._is_graph_network: - return None - - inputs = to_list(inputs) - if mask is None: - masks = [None for _ in range(len(inputs))] - else: - masks = to_list(mask) - cache_key = object_list_uid(inputs) - cache_key += '_' + object_list_uid(masks) - if cache_key in self._output_mask_cache: - return self._output_mask_cache[cache_key] - else: - _, output_masks, _ = self.run_internal_graph(inputs, masks) - return output_masks - - def compute_output_shape(self, input_shape): - if not self._is_graph_network: - # Must be implemented by subclasses. - raise NotImplementedError - - input_shapes = to_list(input_shape) - if len(input_shapes) != len(self._input_layers): - raise ValueError('Invalid input_shape argument ' + - str(input_shape) + ': model has ' + - str(len(self._input_layers)) + ' tensor inputs.') - - cache_key = ', '.join([str(x) for x in input_shapes]) - if cache_key in self._output_shape_cache: - output_shapes = self._output_shape_cache[cache_key] - if isinstance(output_shapes, list): - return unpack_singleton(output_shapes) - return output_shapes - else: - # Bad luck, we have to run the graph manually. - layers_to_output_shapes = {} - for i in range(len(input_shapes)): - layer = self._input_layers[i] - input_shape = input_shapes[i] - # It's an input layer: compute_output_shape is identity, - # and there is only one node and one tensor output. - shape_key = layer.name + '_0_0' - layers_to_output_shapes[shape_key] = input_shape - - depth_keys = list(self._nodes_by_depth.keys()) - depth_keys.sort(reverse=True) - # Iterate over nodes, by depth level. - if len(depth_keys) > 1: - for depth in depth_keys: - nodes = self._nodes_by_depth[depth] - for node in nodes: - # This is always a single layer, never a list. - layer = node.outbound_layer - if layer in self._input_layers: - # We've already covered the input layers - # a few lines above. - continue - # Potentially redundant list, - # same size of node.input_tensors. - input_shapes = [] - for j in range(len(node.inbound_layers)): - inbound_layer = node.inbound_layers[j] - node_index = node.node_indices[j] - tensor_index = node.tensor_indices[j] - shape_key = inbound_layer.name - shape_key += '_%s_%s' % (node_index, tensor_index) - input_shape = layers_to_output_shapes[shape_key] - input_shapes.append(input_shape) - - output_shape = layer.compute_output_shape( - unpack_singleton(input_shapes)) - - output_shapes = to_list(output_shape) - node_index = layer._inbound_nodes.index(node) - for j in range(len(output_shapes)): - shape_key = layer.name + '_%s_%s' % (node_index, j) - layers_to_output_shapes[shape_key] = output_shapes[j] - - # Read final output shapes from layers_to_output_shapes. - output_shapes = [] - output_shape_keys = [] - for i in range(len(self._output_layers)): - layer = self._output_layers[i] - node_index = self._output_coordinates[i][1] - tensor_index = self._output_coordinates[i][2] - shape_key = layer.name + '_%s_%s' % (node_index, tensor_index) - output_shape_keys.append(shape_key) - - for i, key in enumerate(output_shape_keys): - assert key in layers_to_output_shapes - output_shapes.append(layers_to_output_shapes[key]) - # Store in cache. - self._output_shape_cache[cache_key] = output_shapes - if isinstance(output_shapes, list): - return unpack_singleton(output_shapes) - return output_shapes - - def run_internal_graph(self, inputs, masks=None): - """Computes output tensors for new inputs. - - # Note: - - Expects `inputs` to be a list (potentially with 1 element). - - Can be run on non-Keras tensors. - - # Arguments - inputs: List of tensors - masks: List of masks (tensors or None). - - # Returns - Three lists: output_tensors, output_masks, output_shapes - """ - if masks is None: - masks = [None for _ in range(len(inputs))] - - # Dictionary mapping reference tensors to tuples - # (computed tensor, compute mask) - # we assume a 1:1 mapping from tensor to mask - # TODO: raise exception when a `.compute_mask()` call - # does not return a list the same size as `call` - tensor_map = {} - for x, y, mask in zip(self.inputs, inputs, masks): - tensor_map[str(id(x))] = (y, mask) - - depth_keys = list(self._nodes_by_depth.keys()) - depth_keys.sort(reverse=True) - for depth in depth_keys: - nodes = self._nodes_by_depth[depth] - for node in nodes: - # This is always a single layer, never a list. - layer = node.outbound_layer - reference_input_tensors = node.input_tensors - reference_output_tensors = node.output_tensors - - # If all previous input tensors are available in tensor_map, - # then call node.inbound_layer on them. - computed_data = [] # List of tuples (input, mask). - for x in reference_input_tensors: - if str(id(x)) in tensor_map: - computed_data.append(tensor_map[str(id(x))]) - - if len(computed_data) == len(reference_input_tensors): - # call layer - with K.name_scope(layer.name): - if node.arguments: - kwargs = node.arguments - else: - kwargs = {} - if len(computed_data) == 1: - computed_tensor, computed_mask = computed_data[0] - if has_arg(layer.call, 'mask'): - if 'mask' not in kwargs: - kwargs['mask'] = computed_mask - output_tensors = to_list( - layer.call(computed_tensor, **kwargs)) - output_masks = layer.compute_mask(computed_tensor, - computed_mask) - if output_masks is None: - output_masks = [None for _ in output_tensors] - else: - output_masks = to_list(output_masks) - computed_tensors = [computed_tensor] - - # computed_masks might be used in the future. - computed_masks = [computed_mask] - else: - computed_tensors = [x[0] for x in computed_data] - computed_masks = [x[1] for x in computed_data] - if has_arg(layer.call, 'mask'): - if 'mask' not in kwargs: - kwargs['mask'] = computed_masks - output_tensors = to_list( - layer.call(computed_tensors, **kwargs)) - output_masks = layer.compute_mask(computed_tensors, - computed_masks) - if output_masks is None: - output_masks = [None for _ in output_tensors] - else: - output_masks = to_list(output_masks) - # Apply activity regularizer if any: - if (hasattr(layer, 'activity_regularizer') and - layer.activity_regularizer is not None): - with K.name_scope('activity_regularizer'): - regularization_losses = [ - layer.activity_regularizer(x) - for x in output_tensors] - layer.add_loss(regularization_losses, - inputs=computed_tensors) - - if len(output_masks) != len(output_tensors): - raise Exception( - 'Layers should have equal number of output tensors ' - 'and output masks. Layer ' + - str(layer.name) + ' has' - ' ' + str(len(output_tensors)) + - ' output tensors ' - 'and ' + str(len(output_masks)) + ' output masks.') - # Update model updates and losses: - # Keep track of updates that depend on the inputs - # (e.g. BN updates). - self.add_update(layer.get_updates_for( - computed_tensors), inputs) - # Keep track of unconditional updates (e.g. a counter). - self.add_update(layer.get_updates_for(None), None) - # Keep track of losses that depend on the inputs - # (e.g. activity regularizers). - self.add_loss(layer.get_losses_for( - computed_tensors), inputs) - # Keep track of unconditional losses - # (e.g. weight regularizers). - self.add_loss(layer.get_losses_for(None), None) - - # Update _keras_shape. - if all([hasattr(x, '_keras_shape') for x in computed_tensors]): - input_shapes = unpack_singleton( - [x._keras_shape for x in computed_tensors]) - shapes = to_list( - layer.compute_output_shape(input_shapes)) - uses_learning_phase = any( - [x._uses_learning_phase for x in computed_tensors]) - - for x, s in zip(output_tensors, shapes): - x._keras_shape = s - _u = getattr(x, '_uses_learning_phase', False) - x._uses_learning_phase = _u or uses_learning_phase - - # Update tensor_map. - for x, y, mask in zip(reference_output_tensors, - output_tensors, - output_masks): - tensor_map[str(id(x))] = (y, mask) - - output_tensors = [] - output_masks = [] - output_shapes = [] - for x in self.outputs: - assert str( - id(x)) in tensor_map, 'Could not compute output ' + str(x) - tensor, mask = tensor_map[str(id(x))] - if hasattr(tensor, '_keras_shape') and output_shapes is not None: - shape = tensor._keras_shape - output_shapes.append(shape) - else: - output_shapes = None - output_tensors.append(tensor) - output_masks.append(mask) - - # Update cache; - # keys are based on ids on input tensors and inputs masks. - cache_key = object_list_uid(inputs) - cache_key += '_' + object_list_uid(masks) - - output_tensors = unpack_singleton(output_tensors) - self._output_tensor_cache[cache_key] = output_tensors - - output_masks = unpack_singleton(output_masks) - self._output_mask_cache[cache_key] = output_masks - - if output_shapes is not None: - input_shapes = [x._keras_shape for x in inputs] - cache_key = ', '.join([str(x) for x in input_shapes]) - - output_shapes = unpack_singleton(output_shapes) - self._output_shape_cache[cache_key] = output_shapes - return output_tensors, output_masks, output_shapes - - def get_config(self): - if not self._is_graph_network: - # Subclassed networks are not serializable - # (unless serialization is implemented by - # the author of the subclassed network). - raise NotImplementedError - - config = { - 'name': self.name, - } - - # Build a map from a layer unique name (self._node_key) - # to the index of the nodes that are saved in the config. - # Only nodes in network_nodes are saved. - node_conversion_map = {} - for layer in self.layers: - if issubclass(layer.__class__, Network): - # Networks start with a pre-existing node - # linking their input to output. - kept_nodes = 1 - else: - kept_nodes = 0 - for original_node_index, node in enumerate(layer._inbound_nodes): - node_key = self._node_key(layer, original_node_index) - if node_key in self._network_nodes: - # i.e. we mark it to be saved - node_conversion_map[node_key] = kept_nodes - kept_nodes += 1 - - # serialize and save the layers in layer_configs - layer_configs = [] - for layer in self.layers: # From the earliest layers on. - layer_class_name = layer.__class__.__name__ - layer_config = layer.get_config() - filtered_inbound_nodes = [] - for original_node_index, node in enumerate(layer._inbound_nodes): - node_key = self._node_key(layer, original_node_index) - if node_key in self._network_nodes: - # The node is relevant to the model: - # add to filtered_inbound_nodes. - if node.arguments: - try: - json.dumps(node.arguments) - kwargs = node.arguments - except TypeError: - warnings.warn( - 'Layer ' + layer.name + - ' was passed non-serializable ' - 'keyword arguments: ' + - str(node.arguments) + - '. They will not be included ' - 'in the serialized model ' - '(and thus will be missing ' - 'at deserialization time).') - kwargs = {} - else: - kwargs = {} - if node.inbound_layers: - node_data = [] - for i in range(len(node.inbound_layers)): - inbound_layer = node.inbound_layers[i] - node_index = node.node_indices[i] - tensor_index = node.tensor_indices[i] - - new_node_index = node_conversion_map.get( - self._node_key(inbound_layer, node_index), 0) - node_data.append([inbound_layer.name, - new_node_index, - tensor_index, - kwargs]) - filtered_inbound_nodes.append(node_data) - layer_configs.append({ - 'name': layer.name, - 'class_name': layer_class_name, - 'config': layer_config, - 'inbound_nodes': filtered_inbound_nodes, - }) - config['layers'] = layer_configs - - # Gather info about inputs and outputs. - model_inputs = [] - for i in range(len(self._input_layers)): - layer = self._input_layers[i] - node_index = self._input_coordinates[i][1] - - node_key = self._node_key(layer, node_index) - if node_key not in self._network_nodes: - continue - new_node_index = node_conversion_map[node_key] - tensor_index = self._input_coordinates[i][2] - model_inputs.append([layer.name, new_node_index, tensor_index]) - config['input_layers'] = model_inputs - model_outputs = [] - for i in range(len(self._output_layers)): - layer = self._output_layers[i] - node_index = self._output_coordinates[i][1] - - node_key = self._node_key(layer, node_index) - if node_key not in self._network_nodes: - continue - new_node_index = node_conversion_map[node_key] - tensor_index = self._output_coordinates[i][2] - model_outputs.append([layer.name, new_node_index, tensor_index]) - config['output_layers'] = model_outputs - return copy.deepcopy(config) - - @classmethod - def from_config(cls, config, custom_objects=None): - """Instantiates a Model from its config (output of `get_config()`). - - # Arguments - config: Model config dictionary. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - # Returns - A model instance. - - # Raises - ValueError: In case of improperly formatted config dict. - """ - # Layer instances created during - # the graph reconstruction process - created_layers = {} - - # Dictionary mapping layer instances to - # node data that specifies a layer call. - # It acts as a queue that maintains any unprocessed - # layer call until it becomes possible to process it - # (i.e. until the input tensors to the call all exist). - unprocessed_nodes = {} - - def add_unprocessed_node(layer, node_data): - """Add node to layer list - - # Arguments - layer: layer object - node_data: Node data specifying layer call - """ - if layer not in unprocessed_nodes: - unprocessed_nodes[layer] = [node_data] - else: - unprocessed_nodes[layer].append(node_data) - - def process_node(layer, node_data): - """Reconstruct node by linking to inbound layers - - # Arguments - layer: Layer to process - node_data: List of layer configs - - # Raises - ValueError: For incorrect layer config - LookupError: If layer required is not found - """ - input_tensors = [] - for input_data in node_data: - inbound_layer_name = input_data[0] - inbound_node_index = input_data[1] - inbound_tensor_index = input_data[2] - if len(input_data) == 3: - kwargs = {} - elif len(input_data) == 4: - kwargs = input_data[3] - else: - raise ValueError('Improperly formatted model config.') - inbound_layer = created_layers[inbound_layer_name] - # Raise an error if the corresponding layer node - # has not yet been created - if len(inbound_layer._inbound_nodes) <= inbound_node_index: - raise LookupError - inbound_node = inbound_layer._inbound_nodes[inbound_node_index] - input_tensors.append( - inbound_node.output_tensors[inbound_tensor_index]) - - # Call layer on its inputs, thus creating the node - # and building the layer if needed. - if input_tensors: - layer(unpack_singleton(input_tensors), **kwargs) - - def process_layer(layer_data): - """Deserializes a layer, then call it on appropriate inputs. - - # Arguments - layer_data: layer config dict. - - # Raises - ValueError: In case of improperly formatted `layer_data` dict. - """ - layer_name = layer_data['name'] - - # Instantiate layer. - from ..layers import deserialize as deserialize_layer - - layer = deserialize_layer(layer_data, - custom_objects=custom_objects) - created_layers[layer_name] = layer - - # Gather layer inputs. - inbound_nodes_data = layer_data['inbound_nodes'] - for node_data in inbound_nodes_data: - # We don't process nodes (i.e. make layer calls) - # on the fly because the inbound node may not yet exist, - # in case of layer shared at different topological depths - # (e.g. a model such as A(B(A(B(x))))) - add_unprocessed_node(layer, node_data) - - # First, we create all layers and enqueue nodes to be processed - for layer_data in config['layers']: - process_layer(layer_data) - - # Then we process nodes in order of layer depth. - # Nodes that cannot yet be processed (if the inbound node - # does not yet exist) are re-enqueued, and the process - # is repeated until all nodes are processed. - while unprocessed_nodes: - for layer_data in config['layers']: - layer = created_layers[layer_data['name']] - - # Process all nodes in layer, if not yet processed - if layer in unprocessed_nodes: - node_data_list = unprocessed_nodes[layer] - - # Process nodes in order - node_index = 0 - while node_index < len(node_data_list): - node_data = node_data_list[node_index] - try: - process_node(layer, node_data) - - # If the node does not have all inbound layers - # available, stop processing and continue later - except LookupError: - break - - node_index += 1 - - # If not all nodes processed then store unprocessed nodes - if node_index < len(node_data_list): - unprocessed_nodes[layer] = node_data_list[node_index:] - # If all nodes processed remove the layer - else: - del unprocessed_nodes[layer] - - # Create lits of input and output tensors and return new class - name = config.get('name') - input_tensors = [] - output_tensors = [] - for layer_data in config['input_layers']: - layer_name, node_index, tensor_index = layer_data - assert layer_name in created_layers - layer = created_layers[layer_name] - layer_output_tensors = layer._inbound_nodes[node_index].output_tensors - input_tensors.append(layer_output_tensors[tensor_index]) - for layer_data in config['output_layers']: - layer_name, node_index, tensor_index = layer_data - assert layer_name in created_layers - layer = created_layers[layer_name] - layer_output_tensors = layer._inbound_nodes[node_index].output_tensors - output_tensors.append(layer_output_tensors[tensor_index]) - return cls(inputs=input_tensors, outputs=output_tensors, name=name) - - def save(self, filepath, overwrite=True, include_optimizer=True): - """Saves the model to a single HDF5 file. - - The savefile includes: - - The model architecture, allowing to re-instantiate the model. - - The model weights. - - The state of the optimizer, allowing to resume training - exactly where you left off. - - This allows you to save the entirety of the state of a model - in a single file. - - Saved models can be reinstantiated via `keras.models.load_model`. - The model returned by `load_model` - is a compiled model ready to be used (unless the saved model - was never compiled in the first place). - - # Arguments - filepath: one of the following: - - string, path to the file to save the model to - - h5py.File or h5py.Group object where to save the model - - any file-like object implementing the method `write` that accepts - `bytes` data (e.g. `io.BytesIO`). - overwrite: Whether to silently overwrite any existing file at the - target location, or provide the user with a manual prompt. - include_optimizer: If True, save optimizer's state together. - - # Example - - ```python - from keras.models import load_model - - model.save('my_model.h5') # creates a HDF5 file 'my_model.h5' - del model # deletes the existing model - - # returns a compiled model - # identical to the previous one - model = load_model('my_model.h5') - ``` - """ - if not self._is_graph_network: - raise NotImplementedError - from ..models import save_model - save_model(self, filepath, overwrite, include_optimizer) - - @saving.allow_write_to_gcs - def save_weights(self, filepath, overwrite=True): - """Dumps all layer weights to a HDF5 file. - - The weight file has: - - `layer_names` (attribute), a list of strings - (ordered names of model layers). - - For every layer, a `group` named `layer.name` - - For every such layer group, a group attribute `weight_names`, - a list of strings - (ordered names of weights tensor of the layer). - - For every weight in the layer, a dataset - storing the weight value, named after the weight tensor. - - # Arguments - filepath: String, path to the file to save the weights to. - overwrite: Whether to silently overwrite any existing file at the - target location, or provide the user with a manual prompt. - - # Raises - ImportError: If h5py is not available. - """ - if h5py is None: - raise ImportError('`save_weights` requires h5py.') - # If file exists and should not be overwritten: - if not overwrite and os.path.isfile(filepath): - proceed = ask_to_proceed_with_overwrite(filepath) - if not proceed: - return - with h5py.File(filepath, 'w') as f: - saving.save_weights_to_hdf5_group(f, self.layers) - f.flush() - - @saving.allow_read_from_gcs - def load_weights(self, filepath, by_name=False, - skip_mismatch=False, reshape=False): - """Loads all layer weights from a HDF5 save file. - - If `by_name` is False (default) weights are loaded - based on the network's topology, meaning the architecture - should be the same as when the weights were saved. - Note that layers that don't have weights are not taken - into account in the topological ordering, so adding or - removing layers is fine as long as they don't have weights. - - If `by_name` is True, weights are loaded into layers - only if they share the same name. This is useful - for fine-tuning or transfer-learning models where - some of the layers have changed. - - # Arguments - filepath: String, path to the weights file to load. - by_name: Boolean, whether to load weights by name - or by topological order. - skip_mismatch: Boolean, whether to skip loading of layers - where there is a mismatch in the number of weights, - or a mismatch in the shape of the weight - (only valid when `by_name`=True). - reshape: Reshape weights to fit the layer when the correct number - of weight arrays is present but their shape does not match. - - - # Raises - ImportError: If h5py is not available. - """ - if h5py is None: - raise ImportError('`load_weights` requires h5py.') - with h5py.File(filepath, mode='r') as f: - if 'layer_names' not in f.attrs and 'model_weights' in f: - f = f['model_weights'] - if by_name: - saving.load_weights_from_hdf5_group_by_name( - f, self.layers, skip_mismatch=skip_mismatch, - reshape=reshape) - else: - saving.load_weights_from_hdf5_group( - f, self.layers, reshape=reshape) - - def _updated_config(self): - """Util hared between different serialization methods. - - # Returns - Model config with Keras version information added. - """ - from .. import __version__ as keras_version - - config = self.get_config() - model_config = { - 'class_name': self.__class__.__name__, - 'config': config, - 'keras_version': keras_version, - 'backend': K.backend() - } - return model_config - - def to_json(self, **kwargs): - """Returns a JSON string containing the network configuration. - - To load a network from a JSON save file, use - `keras.models.model_from_json(json_string, custom_objects={})`. - - # Arguments - **kwargs: Additional keyword arguments - to be passed to `json.dumps()`. - - # Returns - A JSON string. - """ - def get_json_type(obj): - # If obj is any numpy type - if type(obj).__module__ == np.__name__: - if isinstance(obj, np.ndarray): - return obj.tolist() - else: - return obj.item() - - # If obj is a python 'type' - if type(obj).__name__ == type.__name__: - return obj.__name__ - - raise TypeError('Not JSON Serializable:', obj) - - model_config = self._updated_config() - return json.dumps(model_config, default=get_json_type, **kwargs) - - def to_yaml(self, **kwargs): - """Returns a yaml string containing the network configuration. - - To load a network from a yaml save file, use - `keras.models.model_from_yaml(yaml_string, custom_objects={})`. - - `custom_objects` should be a dictionary mapping - the names of custom losses / layers / etc to the corresponding - functions / classes. - - # Arguments - **kwargs: Additional keyword arguments - to be passed to `yaml.dump()`. - - # Returns - A YAML string. - """ - return yaml.dump(self._updated_config(), **kwargs) - - def summary(self, line_length=None, positions=None, print_fn=None): - """Prints a string summary of the network. - - # Arguments - line_length: Total length of printed lines - (e.g. set this to adapt the display to different - terminal window sizes). - positions: Relative or absolute positions of log elements - in each line. If not provided, - defaults to `[.33, .55, .67, 1.]`. - print_fn: Print function to use. - It will be called on each line of the summary. - You can set it to a custom function - in order to capture the string summary. - It defaults to `print` (prints to stdout). - """ - if not self.built: - raise ValueError( - 'This model has not yet been built. ' - 'Build the model first by calling build() ' - 'or calling fit() with some data. ' - 'Or specify input_shape or batch_input_shape ' - 'in the first layer for automatic build. ') - return print_layer_summary(self, - line_length=line_length, - positions=positions, - print_fn=print_fn) - - def __getstate__(self): - return saving.pickle_model(self) - - def __setstate__(self, state): - model = saving.unpickle_model(state) - self.__dict__.update(model.__dict__) - - -def _make_node_key(layer_name, node_index): - return layer_name + '_ib-' + str(node_index) - - -def _map_graph_network(inputs, outputs): - """Validates a network's topology and gather its layers and nodes. - - # Arguments - inputs: List of input tensors. - outputs: List of outputs tensors. - - # Returns - A tuple `(nodes, nodes_by_depth, layers, layers_by_depth)`. - - nodes: list of Node instances. - - nodes_by_depth: dict mapping ints (depth) to lists of node instances. - - layers: list of Layer instances. - - layers_by_depth: dict mapping ints (depth) - to lists of layer instances. - - # Raises - ValueError: In case the network is not valid (e.g. disconnected graph). - """ - # Network_nodes: set of nodes included in the graph of layers - # (not all nodes included in the layers are relevant to the current graph). - network_nodes = set() # ids of all nodes relevant to the Network - nodes_depths = {} # dict {node: depth value} - layers_depths = {} # dict {layer: depth value} - layer_indices = {} # dict {layer: index in traversal} - nodes_in_decreasing_depth = [] - - def build_map(tensor, - finished_nodes, - nodes_in_progress, - layer, - node_index, - tensor_index): - """Builds a map of the graph of layers. - - This recursively updates the map `layer_indices`, - the list `nodes_in_decreasing_depth` and the set `network_nodes`. - - # Arguments - tensor: Some tensor in a graph. - finished_nodes: Set of nodes whose subgraphs have been traversed - completely. Useful to prevent duplicated work. - nodes_in_progress: Set of nodes that are currently active on the - recursion stack. Useful to detect cycles. - layer: Layer from which `tensor` comes from. If not provided, - will be obtained from `tensor._keras_history`. - node_index: Node index from which `tensor` comes from. - tensor_index: Tensor_index from which `tensor` comes from. - - # Raises - ValueError: if a cycle is detected. - """ - node = layer._inbound_nodes[node_index] - - # Prevent cycles. - if node in nodes_in_progress: - raise ValueError('The tensor ' + str(tensor) + ' at layer "' + - layer.name + '" is part of a cycle.') - - # Don't repeat work for shared subgraphs - if node in finished_nodes: - return - - node_key = _make_node_key(layer.name, node_index) - # Update network_nodes. - network_nodes.add(node_key) - - # Store the traversal order for layer sorting. - if layer not in layer_indices: - layer_indices[layer] = len(layer_indices) - - nodes_in_progress.add(node) - - # Propagate to all previous tensors connected to this node. - for i in range(len(node.inbound_layers)): - x = node.input_tensors[i] - layer = node.inbound_layers[i] - node_index = node.node_indices[i] - tensor_index = node.tensor_indices[i] - build_map(x, finished_nodes, nodes_in_progress, layer, - node_index, tensor_index) - - finished_nodes.add(node) - nodes_in_progress.remove(node) - nodes_in_decreasing_depth.append(node) - - finished_nodes = set() - nodes_in_progress = set() - for x in outputs: - layer, node_index, tensor_index = x._keras_history - build_map(x, finished_nodes, nodes_in_progress, - layer=layer, - node_index=node_index, - tensor_index=tensor_index) - - for node in reversed(nodes_in_decreasing_depth): - # If the depth is not set, the node has no outbound nodes (depth 0). - depth = nodes_depths.setdefault(node, 0) - - # Update the depth of the corresponding layer - previous_depth = layers_depths.get(node.outbound_layer, 0) - # If we've seen this layer before at a higher depth, - # we should use that depth instead of the node depth. - # This is necessary for shared layers that have inputs at different - # depth levels in the graph. - depth = max(depth, previous_depth) - layers_depths[node.outbound_layer] = depth - nodes_depths[node] = depth - - # Update the depth of inbound nodes. - # The "depth" of a node is the max of the depths - # of all layers it is connected to. - for i in range(len(node.inbound_layers)): - inbound_layer = node.inbound_layers[i] - node_index = node.node_indices[i] - inbound_node = inbound_layer._inbound_nodes[node_index] - previous_depth = nodes_depths.get(inbound_node, 0) - nodes_depths[inbound_node] = max(depth + 1, previous_depth) - - # Build a dict {depth: list of nodes with this depth} - nodes_by_depth = {} - for node, depth in nodes_depths.items(): - if depth not in nodes_by_depth: - nodes_by_depth[depth] = [] - nodes_by_depth[depth].append(node) - - # Build a dict {depth: list of layers with this depth} - layers_by_depth = {} - for layer, depth in layers_depths.items(): - if depth not in layers_by_depth: - layers_by_depth[depth] = [] - layers_by_depth[depth].append(layer) - - # Get sorted list of layer depths. - depth_keys = list(layers_by_depth.keys()) - depth_keys.sort(reverse=True) - - # Set self.layers and self._layers_by_depth. - layers = [] - for depth in depth_keys: - layers_for_depth = layers_by_depth[depth] - # Network.layers needs to have a deterministic order: - # here we order them by traversal order. - layers_for_depth.sort(key=lambda x: layer_indices[x]) - layers.extend(layers_for_depth) - - # Get sorted list of node depths. - depth_keys = list(nodes_by_depth.keys()) - depth_keys.sort(reverse=True) - - # Check that all tensors required are computable. - # computable_tensors: all tensors in the graph - # that can be computed from the inputs provided. - computable_tensors = [] - for x in inputs: - computable_tensors.append(x) - - layers_with_complete_input = [] # To provide a better error msg. - for depth in depth_keys: - for node in nodes_by_depth[depth]: - layer = node.outbound_layer - if layer: - for x in node.input_tensors: - if x not in computable_tensors: - raise ValueError('Graph disconnected: ' - 'cannot obtain value for tensor ' + - str(x) + ' at layer "' + - layer.name + '". ' - 'The following previous layers ' - 'were accessed without issue: ' + - str(layers_with_complete_input)) - for x in node.output_tensors: - computable_tensors.append(x) - layers_with_complete_input.append(layer.name) - - # Ensure name unicity, which will be crucial for serialization - # (since serialized nodes refer to layers by their name). - all_names = [layer.name for layer in layers] - for name in all_names: - if all_names.count(name) != 1: - raise ValueError('The name "' + name + '" is used ' + - str(all_names.count(name)) + - ' times in the model. ' - 'All layer names should be unique.') - return network_nodes, nodes_by_depth, layers, layers_by_depth -"""Model saving utilities. -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import os -import json -import yaml -import inspect -import warnings -import tempfile -from six.moves import zip -from six import string_types -from functools import wraps - -import numpy as np - -from .. import backend as K -from .. import optimizers -from ..utils.io_utils import H5Dict -from ..utils.io_utils import ask_to_proceed_with_overwrite -from ..utils.io_utils import save_to_binary_h5py -from ..utils.io_utils import load_from_binary_h5py -from ..utils import conv_utils - -try: - import h5py - HDF5_OBJECT_HEADER_LIMIT = 64512 -except ImportError: - h5py = None - -try: - from tensorflow.python.lib.io import file_io as tf_file_io -except ImportError: - tf_file_io = None - -try: - getargspec = inspect.getfullargspec -except AttributeError: # getargspec() is deprecated since Python 3.0 - getargspec = inspect.getargspec - - -def _serialize_model(model, h5dict, include_optimizer=True): - """Model serialization logic. - - This method is used for both writing to HDF5 file/group, - as well as pickling. This is achieved via a - `keras.utils.hdf5_utls.H5Dict` object, which can wrap HDF5 - files, groups and dicts with a common API. - - # Arguments - model: Keras model instance to be serialized. - h5dict: keras.utils.io_utils.HD5Dict instance. - include_optimizer: If True, serialize optimizer's state together. - - """ - def get_json_type(obj): - """Serialize any object to a JSON-serializable structure. - - # Arguments - obj: the object to serialize - - # Returns - JSON-serializable structure representing `obj`. - - # Raises - TypeError: if `obj` cannot be serialized. - """ - # if obj is a serializable Keras class instance - # e.g. optimizer, layer - if hasattr(obj, 'get_config'): - return {'class_name': obj.__class__.__name__, - 'config': obj.get_config()} - - # if obj is any numpy type - if type(obj).__module__ == np.__name__: - if isinstance(obj, np.ndarray): - return obj.tolist() - else: - return obj.item() - - # misc functions (e.g. loss function) - if callable(obj): - return obj.__name__ - - # if obj is a python 'type' - if type(obj).__name__ == type.__name__: - return obj.__name__ - - raise TypeError('Not JSON Serializable: %s' % (obj,)) - - from .. import __version__ as keras_version - - h5dict['keras_version'] = str(keras_version).encode('utf8') - h5dict['backend'] = K.backend().encode('utf8') - - model_config = {} - model_config['class_name'] = model.__class__.__name__ - model_config['config'] = model.get_config() - model_config = json.dumps(model_config, default=get_json_type) - model_config = model_config.encode('utf-8') - h5dict['model_config'] = model_config - - model_weights_group = h5dict['model_weights'] - model_layers = model.layers - model_weights_group['layer_names'] = [layer.name.encode('utf8') - for layer in model_layers] - model_weights_group['backend'] = K.backend().encode('utf8') - model_weights_group['keras_version'] = str(keras_version).encode('utf8') - for layer in model_layers: - layer_group = model_weights_group[layer.name] - symbolic_weights = layer.weights - weight_values = K.batch_get_value(symbolic_weights) - weight_names = [] - for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): - if hasattr(w, 'name') and w.name: - name = str(w.name) - else: - name = 'param_' + str(i) - if name in weight_names: - idx = 2 - unique_name = name + '_1' - while unique_name in weight_names: - unique_name = name + '_' + str(idx) - idx += 1 - name = unique_name - weight_names.append(name.encode('utf8')) - layer_group['weight_names'] = weight_names - for name, val in zip(weight_names, weight_values): - layer_group[name] = val - if include_optimizer and model.optimizer: - if isinstance(model.optimizer, optimizers.TFOptimizer): - warnings.warn( - 'TensorFlow optimizers do not ' - 'make it possible to access ' - 'optimizer attributes or optimizer state ' - 'after instantiation. ' - 'As a result, we cannot save the optimizer ' - 'as part of the model save file.' - 'You will have to compile your model again ' - 'after loading it. ' - 'Prefer using a Keras optimizer instead ' - '(see keras.io/optimizers).') - else: - h5dict['training_config'] = json.dumps({ - 'optimizer_config': { - 'class_name': model.optimizer.__class__.__name__, - 'config': model.optimizer.get_config() - }, - 'loss': model.loss, - 'metrics': model.metrics, - 'weighted_metrics': model.weighted_metrics, - 'sample_weight_mode': model.sample_weight_mode, - 'loss_weights': model.loss_weights, - }, default=get_json_type).encode('utf8') - symbolic_weights = getattr(model.optimizer, 'weights') - if symbolic_weights: - optimizer_weights_group = h5dict['optimizer_weights'] - weight_values = K.batch_get_value(symbolic_weights) - weight_names = [] - for i, (w, val) in enumerate(zip(symbolic_weights, - weight_values)): - # Default values of symbolic_weights is /variable - # for Theano and CNTK - if K.backend() == 'theano' or K.backend() == 'cntk': - if hasattr(w, 'name'): - if w.name.split('/')[-1] == 'variable': - name = str(w.name) + '_' + str(i) - else: - name = str(w.name) - else: - name = 'param_' + str(i) - else: - if hasattr(w, 'name') and w.name: - name = str(w.name) - else: - name = 'param_' + str(i) - if name in weight_names: - idx = 2 - unique_name = name + '_1' - while unique_name in weight_names: - unique_name = name + '_' + str(idx) - idx += 1 - name = unique_name - weight_names.append(name.encode('utf8')) - optimizer_weights_group['weight_names'] = weight_names - for name, val in zip(weight_names, weight_values): - optimizer_weights_group[name] = val - - -def _deserialize_model(h5dict, custom_objects=None, compile=True): - """De-serializes a model serialized via _serialize_model - - # Arguments - h5dict: `keras.utils.hdf5_utils.HFDict` instance. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - compile: Boolean, whether to compile the model - after loading. - - # Returns - A Keras model instance. If an optimizer was found - as part of the saved model, the model is already - compiled. Otherwise, the model is uncompiled and - a warning will be displayed. When `compile` is set - to False, the compilation is omitted without any - warning. - """ - if not custom_objects: - custom_objects = {} - - def convert_custom_objects(obj): - """Handles custom object lookup. - - # Arguments - obj: object, dict, or list. - - # Returns - The same structure, where occurrences - of a custom object name have been replaced - with the custom object. - """ - if isinstance(obj, list): - deserialized = [] - for value in obj: - deserialized.append(convert_custom_objects(value)) - return deserialized - if isinstance(obj, dict): - deserialized = {} - for key, value in obj.items(): - deserialized[key] = convert_custom_objects(value) - return deserialized - if obj in custom_objects: - return custom_objects[obj] - return obj - - model_config = h5dict['model_config'] - if model_config is None: - raise ValueError('No model found in config.') - model_config = json.loads(model_config.decode('utf-8')) - model = model_from_config(model_config, custom_objects=custom_objects) - model_weights_group = h5dict['model_weights'] - - if 'keras_version' in model_weights_group: - original_keras_version = model_weights_group['keras_version'].decode( - 'utf8') - else: - original_keras_version = '1' - if 'backend' in model_weights_group: - original_backend = model_weights_group['backend'].decode('utf8') - else: - original_backend = None - - layer_names = model_weights_group['layer_names'] - - layers = model.layers - - filtered_layers = [] - for layer in layers: - weights = layer.weights - if weights: - filtered_layers.append(layer) - - filtered_layer_names = [] - for name in layer_names: - layer_weights = model_weights_group[name] - weight_names = layer_weights['weight_names'] - if len(weight_names) > 0: - filtered_layer_names.append(name) - - layer_names = filtered_layer_names - if len(layer_names) != len(filtered_layers): - raise ValueError('You are trying to load a weight file' - ' containing {} layers into a model with {} layers' - .format(len(layer_names), len(filtered_layers)) - ) - - # We batch weight value assignments in a single backend call - # which provides a speedup in TensorFlow. - weight_value_tuples = [] - for k, name in enumerate(layer_names): - layer_weights = model_weights_group[name] - weight_names = layer_weights['weight_names'] - weight_values = [layer_weights[weight_name] - for weight_name in weight_names] - layer = filtered_layers[k] - symbolic_weights = layer.weights - weight_values = preprocess_weights_for_loading(layer, - weight_values, - original_keras_version, - original_backend, - reshape=False) - if len(weight_values) != len(symbolic_weights): - raise ValueError('Layer #' + str(k) + - ' (named "' + layer.name + - '" in the current model) was found to ' - 'correspond to layer ' + name + - ' in the save file. ' - 'However the new layer ' + layer.name + - ' expects ' + str(len(symbolic_weights)) + - ' weights, but the saved weights have ' + - str(len(weight_values)) + - ' elements.') - weight_value_tuples += zip(symbolic_weights, weight_values) - K.batch_set_value(weight_value_tuples) - - if compile: - training_config = h5dict.get('training_config') - if training_config is None: - warnings.warn('No training configuration found in save file: ' - 'the model was *not* compiled. ' - 'Compile it manually.') - return model - training_config = json.loads(training_config.decode('utf-8')) - optimizer_config = training_config['optimizer_config'] - optimizer = optimizers.deserialize(optimizer_config, - custom_objects=custom_objects) - - # Recover loss functions and metrics. - loss = convert_custom_objects(training_config['loss']) - metrics = convert_custom_objects(training_config['metrics']) - sample_weight_mode = training_config['sample_weight_mode'] - loss_weights = training_config['loss_weights'] - - # Compile model. - model.compile(optimizer=optimizer, - loss=loss, - metrics=metrics, - loss_weights=loss_weights, - sample_weight_mode=sample_weight_mode) - - # Set optimizer weights. - if 'optimizer_weights' in h5dict: - # Build train function (to get weight updates). - model._make_train_function() - optimizer_weights_group = h5dict['optimizer_weights'] - optimizer_weight_names = [ - n.decode('utf8') for n in - optimizer_weights_group['weight_names']] - optimizer_weight_values = [optimizer_weights_group[n] for n in - optimizer_weight_names] - try: - model.optimizer.set_weights(optimizer_weight_values) - except ValueError: - warnings.warn('Error in loading the saved optimizer ' - 'state. As a result, your model is ' - 'starting with a freshly initialized ' - 'optimizer.') - - return model - - -def _gcs_copy(source_filepath, target_filepath, overwrite=True): - """Copies a file to/from/within Google Cloud Storage (GCS). - - # Arguments - source_filepath: String, path to the file on filesystem or object on GCS to - copy from. - target_filepath: String, path to the file on filesystem or object on GCS to - copy to. - overwrite: Whether we should overwrite an existing file/object at the target - location, or instead ask the user with a manual prompt. - """ - if tf_file_io is None: - raise ImportError( - 'Google Cloud Storage file transfer requires TensorFlow.') - if not overwrite and tf_file_io.file_exists(target_filepath): - proceed = ask_to_proceed_with_overwrite(target_filepath) - if not proceed: - return - with tf_file_io.FileIO(source_filepath, mode='rb') as source_f: - with tf_file_io.FileIO(target_filepath, mode='wb') as target_f: - target_f.write(source_f.read()) - - -def _is_gcs_location(filepath): - """Checks if `filepath` is referencing a google storage bucket. - - # Arguments - filepath: The location to check. - """ - return isinstance(filepath, string_types) and filepath.startswith('gs://') - - -def allow_write_to_gcs(save_function): - """Function decorator to support saving to Google Cloud Storage (GCS). - - This decorator parses the `filepath` argument of the `save_function` and - transfers the file to GCS if `filepath` starts with "gs://". - - Note: the file is temporarily writen to local filesystem before copied to GSC. - - # Arguments - save_function: The function to wrap, with requirements: - - second positional argument should indicate the location to save to. - - third positional argument should be the `overwrite` option indicating - whether we should overwrite an existing file/object at the target - location, or instead ask the user with a manual prompt. - """ - @wraps(save_function) - def save_wrapper(obj, filepath, overwrite=True, *args, **kwargs): - if _is_gcs_location(filepath): - tmp_filepath = os.path.join(tempfile.gettempdir(), - os.path.basename(filepath)) - save_function(obj, tmp_filepath, True, *args, **kwargs) - try: - _gcs_copy(tmp_filepath, filepath, overwrite) - finally: - os.remove(tmp_filepath) - else: - save_function(obj, filepath, overwrite, *args, **kwargs) - - return save_wrapper - - -def allow_read_from_gcs(load_function): - """Function decorator to support loading from Google Cloud Storage (GCS). - - This decorator parses the `filepath` argument of the `load_function` and - fetches the required object from GCS if `filepath` starts with "gs://". - - Note: the file is temporarily copied to local filesystem from GCS before loaded. - - # Arguments - load_function: The function to wrap, with requirements: - - should have one _named_ argument `filepath` indicating the location to - load from. - """ - def extract_named_arg(f, name, args, kwargs): - if name in kwargs: - arg = kwargs.pop(name) - return arg, args, kwargs - argnames = getargspec(f)[0] - for i, (argname, arg) in enumerate(zip(argnames, args)): - if argname == name: - return arg, args[:i] + args[i + 1:], kwargs - else: - raise ValueError('function {} has no argument {}'.format(f, name)) - - @wraps(load_function) - def load_wrapper(*args, **kwargs): - filepath, _args, _kwargs = extract_named_arg( - load_function, 'filepath', args, kwargs) - if _is_gcs_location(filepath): - tmp_filepath = os.path.join(tempfile.gettempdir(), - os.path.basename(filepath)) - _gcs_copy(filepath, tmp_filepath) - _kwargs['filepath'] = tmp_filepath - try: - res = load_function(*_args, **_kwargs) - finally: - os.remove(tmp_filepath) - return res - return load_function(*args, **kwargs) - - return load_wrapper - - -@allow_write_to_gcs -def save_model(model, filepath, overwrite=True, include_optimizer=True): - """Save a model to a HDF5 file. - - Note: Please also see - [How can I install HDF5 or h5py to save my models in Keras?]( - /getting-started/faq/ - #how-can-i-install-HDF5-or-h5py-to-save-my-models-in-Keras) - in the FAQ for instructions on how to install `h5py`. - - The saved model contains: - - the model's configuration (topology) - - the model's weights - - the model's optimizer's state (if any) - - Thus the saved model can be reinstantiated in - the exact same state, without any of the code - used for model definition or training. - - # Arguments - model: Keras model instance to be saved. - filepath: one of the following: - - string, path to the file to save the model to - - h5py.File or h5py.Group object where to save the model - - any file-like object implementing the method `write` that accepts - `bytes` data (e.g. `io.BytesIO`). - overwrite: Whether we should overwrite any existing - model at the target location, or instead - ask the user with a manual prompt. - include_optimizer: If True, save optimizer's state together. - - # Raises - ImportError: if h5py is not available. - """ - if h5py is None: - raise ImportError('`save_model` requires h5py.') - - if H5Dict.is_supported_type(filepath): - opens_file = not isinstance(filepath, (dict, h5py.Group)) - if opens_file and os.path.isfile(filepath) and not overwrite: - proceed = ask_to_proceed_with_overwrite(filepath) - if not proceed: - return - with H5Dict(filepath, mode='w') as h5dict: - _serialize_model(model, h5dict, include_optimizer) - elif hasattr(filepath, 'write') and callable(filepath.write): - # write as binary stream - def save_function(h5file): - _serialize_model(model, H5Dict(h5file), include_optimizer) - save_to_binary_h5py(save_function, filepath) - else: - raise ValueError( - 'unexpected type {} for `filepath`'.format(type(filepath))) - - -@allow_read_from_gcs -def load_model(filepath, custom_objects=None, compile=True): - """Loads a model saved via `save_model`. - - # Arguments - filepath: one of the following: - - string, path to the saved model - - h5py.File or h5py.Group object from which to load the model - - any file-like object implementing the method `read` that returns - `bytes` data (e.g. `io.BytesIO`) that represents a valid h5py file image. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - compile: Boolean, whether to compile the model - after loading. - - # Returns - A Keras model instance. If an optimizer was found - as part of the saved model, the model is already - compiled. Otherwise, the model is uncompiled and - a warning will be displayed. When `compile` is set - to False, the compilation is omitted without any - warning. - - # Raises - ImportError: if h5py is not available. - ValueError: In case of an invalid savefile. - """ - if h5py is None: - raise ImportError('`load_model` requires h5py.') - - if H5Dict.is_supported_type(filepath): - with H5Dict(filepath, mode='r') as h5dict: - model = _deserialize_model(h5dict, custom_objects, compile) - elif hasattr(filepath, 'write') and callable(filepath.write): - def load_function(h5file): - return _deserialize_model(H5Dict(h5file), custom_objects, compile) - model = load_from_binary_h5py(load_function, filepath) - else: - raise ValueError( - 'unexpected type {} for `filepath`'.format(type(filepath))) - - return model - - -def pickle_model(model): - d = {} - h5dict = H5Dict(d) - _serialize_model(model, h5dict) - return d - - -def unpickle_model(state): - h5dict = H5Dict(state, mode='r') - return _deserialize_model(h5dict) - - -def model_from_config(config, custom_objects=None): - """Instantiates a Keras model from its config. - - # Arguments - config: Configuration dictionary. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - # Returns - A Keras model instance (uncompiled). - - # Raises - TypeError: if `config` is not a dictionary. - """ - if isinstance(config, list): - raise TypeError('`model_from_config` expects a dictionary, ' - 'not a list. Maybe you meant to use ' - '`Sequential.from_config(config)`?') - from ..layers import deserialize - return deserialize(config, custom_objects=custom_objects) - - -def model_from_yaml(yaml_string, custom_objects=None): - """Parses a yaml model configuration file and returns a model instance. - - # Arguments - yaml_string: YAML string encoding a model configuration. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - # Returns - A Keras model instance (uncompiled). - """ - config = yaml.load(yaml_string, Loader=yaml.FullLoader) - from ..layers import deserialize - return deserialize(config, custom_objects=custom_objects) - - -def model_from_json(json_string, custom_objects=None): - """Parses a JSON model configuration file and returns a model instance. - - # Arguments - json_string: JSON string encoding a model configuration. - custom_objects: Optional dictionary mapping names - (strings) to custom classes or functions to be - considered during deserialization. - - # Returns - A Keras model instance (uncompiled). - """ - config = json.loads(json_string) - from ..layers import deserialize - return deserialize(config, custom_objects=custom_objects) - - -def save_attributes_to_hdf5_group(group, name, data): - """Saves attributes (data) of the specified name into the HDF5 group. - - This method deals with an inherent problem of HDF5 file which is not - able to store data larger than HDF5_OBJECT_HEADER_LIMIT bytes. - - # Arguments - group: A pointer to a HDF5 group. - name: A name of the attributes to save. - data: Attributes data to store. - """ - # Check that no item in `data` is larger than `HDF5_OBJECT_HEADER_LIMIT` - # because in that case even chunking the array would not make the saving - # possible. - bad_attributes = [x for x in data if len(x) > HDF5_OBJECT_HEADER_LIMIT] - - # Expecting this to never be true. - if len(bad_attributes) > 0: - raise RuntimeError('The following attributes cannot be saved to HDF5 ' - 'file because they are larger than %d bytes: %s' - % (HDF5_OBJECT_HEADER_LIMIT, - ', '.join([x for x in bad_attributes]))) - - data_npy = np.asarray(data) - - num_chunks = 1 - chunked_data = np.array_split(data_npy, num_chunks) - - # This will never loop forever thanks to the test above. - while any(map(lambda x: x.nbytes > HDF5_OBJECT_HEADER_LIMIT, chunked_data)): - num_chunks += 1 - chunked_data = np.array_split(data_npy, num_chunks) - - if num_chunks > 1: - for chunk_id, chunk_data in enumerate(chunked_data): - group.attrs['%s%d' % (name, chunk_id)] = chunk_data - else: - group.attrs[name] = data - - -def load_attributes_from_hdf5_group(group, name): - """Loads attributes of the specified name from the HDF5 group. - - This method deals with an inherent problem - of HDF5 file which is not able to store - data larger than HDF5_OBJECT_HEADER_LIMIT bytes. - - # Arguments - group: A pointer to a HDF5 group. - name: A name of the attributes to load. - - # Returns - data: Attributes data. - """ - if name in group.attrs: - data = [n.decode('utf8') for n in group.attrs[name]] - else: - data = [] - chunk_id = 0 - while ('%s%d' % (name, chunk_id)) in group.attrs: - data.extend([n.decode('utf8') - for n in group.attrs['%s%d' % (name, chunk_id)]]) - chunk_id += 1 - return data - - -def save_weights_to_hdf5_group(group, layers): - """Saves weights into the HDF5 group. - - # Arguments - group: A pointer to a HDF5 group. - layers: Layers to load. - """ - from .. import __version__ as keras_version - - save_attributes_to_hdf5_group( - group, 'layer_names', [layer.name.encode('utf8') for layer in layers]) - group.attrs['backend'] = K.backend().encode('utf8') - group.attrs['keras_version'] = str(keras_version).encode('utf8') - - for layer in layers: - g = group.create_group(layer.name) - symbolic_weights = layer.weights - weight_values = K.batch_get_value(symbolic_weights) - weight_names = [] - for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): - if hasattr(w, 'name') and w.name: - name = str(w.name) - else: - name = 'param_' + str(i) - weight_names.append(name.encode('utf8')) - save_attributes_to_hdf5_group(g, 'weight_names', weight_names) - for name, val in zip(weight_names, weight_values): - param_dset = g.create_dataset(name, val.shape, - dtype=val.dtype) - if not val.shape: - # scalar - param_dset[()] = val - else: - param_dset[:] = val - - -def preprocess_weights_for_loading(layer, weights, - original_keras_version=None, - original_backend=None, - reshape=False): - """Converts layers weights from Keras 1 format to Keras 2. - - # Arguments - layer: Layer instance. - weights: List of weights values (Numpy arrays). - original_keras_version: Keras version for the weights, as a string. - original_backend: Keras backend the weights were trained with, - as a string. - reshape: Reshape weights to fit the layer when the correct number - of values are present but the shape does not match. - - # Returns - A list of weights values (Numpy arrays). - """ - def convert_nested_bidirectional(weights): - """Converts layers nested in `Bidirectional` wrapper. - - # Arguments - weights: List of weights values (Numpy arrays). - # Returns - A list of weights values (Numpy arrays). - """ - num_weights_per_layer = len(weights) // 2 - forward_weights = preprocess_weights_for_loading( - layer.forward_layer, - weights[:num_weights_per_layer], - original_keras_version, - original_backend) - backward_weights = preprocess_weights_for_loading( - layer.backward_layer, - weights[num_weights_per_layer:], - original_keras_version, - original_backend) - return forward_weights + backward_weights - - def convert_nested_time_distributed(weights): - """Converts layers nested in `TimeDistributed` wrapper. - - # Arguments - weights: List of weights values (Numpy arrays). - # Returns - A list of weights values (Numpy arrays). - """ - return preprocess_weights_for_loading( - layer.layer, weights, original_keras_version, original_backend) - - def convert_nested_model(weights): - """Converts layers nested in `Model` or `Sequential`. - - # Arguments - weights: List of weights values (Numpy arrays). - # Returns - A list of weights values (Numpy arrays). - """ - new_weights = [] - # trainable weights - for sublayer in layer.layers: - num_weights = len(sublayer.trainable_weights) - if num_weights > 0: - new_weights.extend(preprocess_weights_for_loading( - layer=sublayer, - weights=weights[:num_weights], - original_keras_version=original_keras_version, - original_backend=original_backend)) - weights = weights[num_weights:] - - # non-trainable weights - for sublayer in layer.layers: - num_weights = len([l for l in sublayer.weights - if l not in sublayer.trainable_weights]) - if num_weights > 0: - new_weights.extend(preprocess_weights_for_loading( - layer=sublayer, - weights=weights[:num_weights], - original_keras_version=original_keras_version, - original_backend=original_backend)) - weights = weights[num_weights:] - return new_weights - - # Convert layers nested in Bidirectional/TimeDistributed/Model/Sequential. - # Both transformation should be ran for both Keras 1->2 conversion - # and for conversion of CuDNN layers. - if layer.__class__.__name__ == 'Bidirectional': - weights = convert_nested_bidirectional(weights) - if layer.__class__.__name__ == 'TimeDistributed': - weights = convert_nested_time_distributed(weights) - elif layer.__class__.__name__ in ['Model', 'Sequential']: - weights = convert_nested_model(weights) - - if original_keras_version == '1': - if layer.__class__.__name__ == 'TimeDistributed': - weights = preprocess_weights_for_loading(layer.layer, - weights, - original_keras_version, - original_backend) - - if layer.__class__.__name__ == 'Conv1D': - shape = weights[0].shape - # Handle Keras 1.1 format - if shape[:2] != (layer.kernel_size[0], 1) or shape[3] != layer.filters: - # Legacy shape: - # (filters, input_dim, filter_length, 1) - assert (shape[0] == layer.filters and - shape[2:] == (layer.kernel_size[0], 1)) - weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) - weights[0] = weights[0][:, 0, :, :] - - if layer.__class__.__name__ == 'Conv2D': - if layer.data_format == 'channels_first': - # old: (filters, stack_size, kernel_rows, kernel_cols) - # new: (kernel_rows, kernel_cols, stack_size, filters) - weights[0] = np.transpose(weights[0], (2, 3, 1, 0)) - - if layer.__class__.__name__ == 'Conv2DTranspose': - if layer.data_format == 'channels_last': - # old: (kernel_rows, kernel_cols, stack_size, filters) - # new: (kernel_rows, kernel_cols, filters, stack_size) - weights[0] = np.transpose(weights[0], (0, 1, 3, 2)) - if layer.data_format == 'channels_first': - # old: (filters, stack_size, kernel_rows, kernel_cols) - # new: (kernel_rows, kernel_cols, filters, stack_size) - weights[0] = np.transpose(weights[0], (2, 3, 0, 1)) - - if layer.__class__.__name__ == 'Conv3D': - if layer.data_format == 'channels_first': - # old: (filters, stack_size, ...) - # new: (..., stack_size, filters) - weights[0] = np.transpose(weights[0], (2, 3, 4, 1, 0)) - - if layer.__class__.__name__ == 'GRU': - if len(weights) == 9: - kernel = np.concatenate([weights[0], - weights[3], - weights[6]], axis=-1) - recurrent_kernel = np.concatenate([weights[1], - weights[4], - weights[7]], axis=-1) - bias = np.concatenate([weights[2], - weights[5], - weights[8]], axis=-1) - weights = [kernel, recurrent_kernel, bias] - - if layer.__class__.__name__ == 'LSTM': - if len(weights) == 12: - # old: i, c, f, o - # new: i, f, c, o - kernel = np.concatenate([weights[0], - weights[6], - weights[3], - weights[9]], axis=-1) - recurrent_kernel = np.concatenate([weights[1], - weights[7], - weights[4], - weights[10]], axis=-1) - bias = np.concatenate([weights[2], - weights[8], - weights[5], - weights[11]], axis=-1) - weights = [kernel, recurrent_kernel, bias] - - if layer.__class__.__name__ == 'ConvLSTM2D': - if len(weights) == 12: - kernel = np.concatenate([weights[0], - weights[6], - weights[3], - weights[9]], axis=-1) - recurrent_kernel = np.concatenate([weights[1], - weights[7], - weights[4], - weights[10]], axis=-1) - bias = np.concatenate([weights[2], - weights[8], - weights[5], - weights[11]], axis=-1) - if layer.data_format == 'channels_first': - # old: (filters, stack_size, kernel_rows, kernel_cols) - # new: (kernel_rows, kernel_cols, stack_size, filters) - kernel = np.transpose(kernel, (2, 3, 1, 0)) - recurrent_kernel = np.transpose(recurrent_kernel, - (2, 3, 1, 0)) - weights = [kernel, recurrent_kernel, bias] - - conv_layers = ['Conv1D', - 'Conv2D', - 'Conv3D', - 'Conv2DTranspose', - 'ConvLSTM2D'] - if layer.__class__.__name__ in conv_layers: - layer_weights_shape = K.int_shape(layer.weights[0]) - if _need_convert_kernel(original_backend): - weights[0] = conv_utils.convert_kernel(weights[0]) - if layer.__class__.__name__ == 'ConvLSTM2D': - weights[1] = conv_utils.convert_kernel(weights[1]) - if reshape and layer_weights_shape != weights[0].shape: - if weights[0].size != np.prod(layer_weights_shape): - raise ValueError('Weights must be of equal size to ' + - 'apply a reshape operation. ' + - 'Layer ' + layer.name + - '\'s weights have shape ' + - str(layer_weights_shape) + ' and size ' + - str(np.prod(layer_weights_shape)) + '. ' + - 'The weights for loading have shape ' + - str(weights[0].shape) + ' and size ' + - str(weights[0].size) + '. ') - weights[0] = np.reshape(weights[0], layer_weights_shape) - elif layer_weights_shape != weights[0].shape: - weights[0] = np.transpose(weights[0], (3, 2, 0, 1)) - if layer.__class__.__name__ == 'ConvLSTM2D': - weights[1] = np.transpose(weights[1], (3, 2, 0, 1)) - - # convert CuDNN layers - weights = _convert_rnn_weights(layer, weights) - - return weights - - -def _convert_rnn_weights(layer, weights): - """Converts weights for RNN layers between native and CuDNN format. - - Input kernels for each gate are transposed and converted between Fortran - and C layout, recurrent kernels are transposed. For LSTM biases are summed/ - split in half, for GRU biases are reshaped. - - Weights can be converted in both directions between `LSTM` and`CuDNNSLTM` - and between `CuDNNGRU` and `GRU(reset_after=True)`. Default `GRU` is not - compatible with `CuDNNGRU`. - - For missing biases in `LSTM`/`GRU` (`use_bias=False`), - no conversion is made. - - # Arguments - layer: Target layer instance. - weights: List of source weights values (input kernels, recurrent - kernels, [biases]) (Numpy arrays). - - # Returns - A list of converted weights values (Numpy arrays). - - # Raises - ValueError: for incompatible GRU layer/weights or incompatible biases - """ - - def transform_kernels(kernels, func, n_gates): - """Transforms kernel for each gate separately using given function. - - # Arguments - kernels: Stacked array of kernels for individual gates. - func: Function applied to kernel of each gate. - n_gates: Number of gates (4 for LSTM, 3 for GRU). - # Returns - Stacked array of transformed kernels. - """ - return np.hstack([func(k) for k in np.hsplit(kernels, n_gates)]) - - def transpose_input(from_cudnn): - """Makes a function that transforms input kernels from/to CuDNN format. - - It keeps the shape, but changes between the layout (Fortran/C). Eg.: - - ``` - Keras CuDNN - [[0, 1, 2], <---> [[0, 2, 4], - [3, 4, 5]] [1, 3, 5]] - ``` - - It can be passed to `transform_kernels()`. - - # Arguments - from_cudnn: `True` if source weights are in CuDNN format, `False` - if they're in plain Keras format. - # Returns - Function that converts input kernel to the other format. - """ - order = 'F' if from_cudnn else 'C' - - def transform(kernel): - return kernel.T.reshape(kernel.shape, order=order) - - return transform - - target_class = layer.__class__.__name__ - - # convert the weights between CuDNNLSTM and LSTM - if target_class in ['LSTM', 'CuDNNLSTM'] and len(weights) == 3: - # determine if we're loading a CuDNNLSTM layer - # from the number of bias weights: - # CuDNNLSTM has (units * 8) weights; while LSTM has (units * 4) - # if there's no bias weight in the file, skip this conversion - units = weights[1].shape[0] - bias_shape = weights[2].shape - n_gates = 4 - - if bias_shape == (2 * units * n_gates,): - source = 'CuDNNLSTM' - elif bias_shape == (units * n_gates,): - source = 'LSTM' - else: - raise ValueError('Invalid bias shape: ' + str(bias_shape)) - - def convert_weights(weights, from_cudnn=True): - # transpose (and reshape) input and recurrent kernels - kernels = transform_kernels(weights[0], - transpose_input(from_cudnn), - n_gates) - recurrent_kernels = transform_kernels( - weights[1], lambda k: k.T, n_gates) - if from_cudnn: - # merge input and recurrent biases into a single set - biases = np.sum(np.split(weights[2], 2, axis=0), axis=0) - else: - # Split single set of biases evenly to two sets. The way of - # splitting doesn't matter as long as the two sets sum is kept. - biases = np.tile(0.5 * weights[2], 2) - return [kernels, recurrent_kernels, biases] - - if source != target_class: - weights = convert_weights( - weights, from_cudnn=source == 'CuDNNLSTM') - - # convert the weights between CuDNNGRU and GRU(reset_after=True) - if target_class in ['GRU', 'CuDNNGRU'] and len(weights) == 3: - # We can determine the source of the weights from the shape of the bias. - # If there is no bias we skip the conversion - # since CuDNNGRU always has biases. - - units = weights[1].shape[0] - bias_shape = weights[2].shape - n_gates = 3 - - def convert_weights(weights, from_cudnn=True): - kernels = transform_kernels(weights[0], - transpose_input(from_cudnn), - n_gates) - recurrent_kernels = transform_kernels( - weights[1], lambda k: k.T, n_gates) - biases = np.array(weights[2]).reshape( - (2, -1) if from_cudnn else -1) - return [kernels, recurrent_kernels, biases] - - if bias_shape == (2 * units * n_gates,): - source = 'CuDNNGRU' - elif bias_shape == (2, units * n_gates): - source = 'GRU(reset_after=True)' - elif bias_shape == (units * n_gates,): - source = 'GRU(reset_after=False)' - else: - raise ValueError('Invalid bias shape: ' + str(bias_shape)) - - if target_class == 'CuDNNGRU': - target = 'CuDNNGRU' - elif layer.reset_after: - target = 'GRU(reset_after=True)' - else: - target = 'GRU(reset_after=False)' - - # only convert between different types - if source != target: - types = (source, target) - if 'GRU(reset_after=False)' in types: - raise ValueError('%s is not compatible with %s' % types) - if source == 'CuDNNGRU': - weights = convert_weights(weights, from_cudnn=True) - elif source == 'GRU(reset_after=True)': - weights = convert_weights(weights, from_cudnn=False) - - return weights - - -def _need_convert_kernel(original_backend): - """Checks if conversion on kernel matrices is required during weight loading. - - The convolution operation is implemented differently in different backends. - While TH implements convolution, TF and CNTK implement the correlation operation. - So the channel axis needs to be flipped when TF weights are loaded on a TH model, - or vice versa. However, there's no conversion required between TF and CNTK. - - # Arguments - original_backend: Keras backend the weights were trained with, as a string. - - # Returns - `True` if conversion on kernel matrices is required, otherwise `False`. - """ - if original_backend is None: - # backend information not available - return False - uses_correlation = {'tensorflow': True, - 'theano': False, - 'cntk': True} - if original_backend not in uses_correlation: - # By default, do not convert the kernels if the original backend is unknown - return False - if K.backend() in uses_correlation: - current_uses_correlation = uses_correlation[K.backend()] - else: - # Assume unknown backends use correlation - current_uses_correlation = True - return uses_correlation[original_backend] != current_uses_correlation - - -def load_weights_from_hdf5_group(f, layers, reshape=False): - """Implements topological (order-based) weight loading. - - # Arguments - f: A pointer to a HDF5 group. - layers: a list of target layers. - reshape: Reshape weights to fit the layer when the correct number - of values are present but the shape does not match. - - # Raises - ValueError: in case of mismatch between provided layers - and weights file. - """ - if 'keras_version' in f.attrs: - original_keras_version = f.attrs['keras_version'].decode('utf8') - else: - original_keras_version = '1' - if 'backend' in f.attrs: - original_backend = f.attrs['backend'].decode('utf8') - else: - original_backend = None - - filtered_layers = [] - for layer in layers: - weights = layer.weights - if weights: - filtered_layers.append(layer) - - layer_names = load_attributes_from_hdf5_group(f, 'layer_names') - filtered_layer_names = [] - for name in layer_names: - g = f[name] - weight_names = load_attributes_from_hdf5_group(g, 'weight_names') - if weight_names: - filtered_layer_names.append(name) - layer_names = filtered_layer_names - if len(layer_names) != len(filtered_layers): - raise ValueError('You are trying to load a weight file ' - 'containing ' + str(len(layer_names)) + - ' layers into a model with ' + - str(len(filtered_layers)) + ' layers.') - - # We batch weight value assignments in a single backend call - # which provides a speedup in TensorFlow. - weight_value_tuples = [] - for k, name in enumerate(layer_names): - g = f[name] - weight_names = load_attributes_from_hdf5_group(g, 'weight_names') - weight_values = [np.asarray(g[weight_name]) - for weight_name in weight_names] - layer = filtered_layers[k] - symbolic_weights = layer.weights - weight_values = preprocess_weights_for_loading(layer, - weight_values, - original_keras_version, - original_backend, - reshape=reshape) - if len(weight_values) != len(symbolic_weights): - raise ValueError('Layer #' + str(k) + - ' (named "' + layer.name + - '" in the current model) was found to ' - 'correspond to layer ' + name + - ' in the save file. ' - 'However the new layer ' + layer.name + - ' expects ' + str(len(symbolic_weights)) + - ' weights, but the saved weights have ' + - str(len(weight_values)) + - ' elements.') - weight_value_tuples += zip(symbolic_weights, weight_values) - K.batch_set_value(weight_value_tuples) - - -def load_weights_from_hdf5_group_by_name(f, layers, skip_mismatch=False, - reshape=False): - """Implements name-based weight loading. - - (instead of topological weight loading). - - Layers that have no matching name are skipped. - - # Arguments - f: A pointer to a HDF5 group. - layers: A list of target layers. - skip_mismatch: Boolean, whether to skip loading of layers - where there is a mismatch in the number of weights, - or a mismatch in the shape of the weights. - reshape: Reshape weights to fit the layer when the correct number - of values are present but the shape does not match. - - # Raises - ValueError: in case of mismatch between provided layers - and weights file and skip_mismatch=False. - """ - if 'keras_version' in f.attrs: - original_keras_version = f.attrs['keras_version'].decode('utf8') - else: - original_keras_version = '1' - if 'backend' in f.attrs: - original_backend = f.attrs['backend'].decode('utf8') - else: - original_backend = None - - # New file format. - layer_names = load_attributes_from_hdf5_group(f, 'layer_names') - - # Reverse index of layer name to list of layers with name. - index = {} - for layer in layers: - if layer.name: - index.setdefault(layer.name, []).append(layer) - - # We batch weight value assignments in a single backend call - # which provides a speedup in TensorFlow. - weight_value_tuples = [] - for k, name in enumerate(layer_names): - g = f[name] - weight_names = load_attributes_from_hdf5_group(g, 'weight_names') - weight_values = [np.asarray(g[weight_name]) - for weight_name in weight_names] - - for layer in index.get(name, []): - symbolic_weights = layer.weights - weight_values = preprocess_weights_for_loading( - layer, - weight_values, - original_keras_version, - original_backend, - reshape=reshape) - if len(weight_values) != len(symbolic_weights): - if skip_mismatch: - warnings.warn('Skipping loading of weights for ' - 'layer {}'.format( - layer.name) + ' due to mismatch ' - 'in number of weights ({} vs {}).'.format( - len(symbolic_weights), len(weight_values))) - continue - else: - raise ValueError('Layer #' + str(k) + - ' (named "' + layer.name + - '") expects ' + - str(len(symbolic_weights)) + - ' weight(s), but the saved weights' + - ' have ' + str(len(weight_values)) + - ' element(s).') - # Set values. - for i in range(len(weight_values)): - symbolic_shape = K.int_shape(symbolic_weights[i]) - if symbolic_shape != weight_values[i].shape: - if skip_mismatch: - warnings.warn('Skipping loading of weights for ' - 'layer {}'.format( - layer.name) + ' due to ' - 'mismatch in shape ({} vs {}).'.format( - symbolic_weights[i].shape, - weight_values[i].shape)) - continue - else: - raise ValueError('Layer #' + str(k) + - ' (named "' + layer.name + - '"), weight ' + - str(symbolic_weights[i]) + - ' has shape {}'.format(symbolic_shape) + - ', but the saved weight has shape ' + - str(weight_values[i].shape) + '.') - else: - weight_value_tuples.append((symbolic_weights[i], - weight_values[i])) - - K.batch_set_value(weight_value_tuples) -"""Sequential model class. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import warnings -import copy - -from . import network -from .training import Model -from .base_layer import Layer -from .input_layer import Input -from .input_layer import InputLayer -from .. import backend as K -from .. import layers as layer_module - -try: - import h5py -except ImportError: - h5py = None - - -class Sequential(Model): - """Linear stack of layers. - - # Arguments - layers: list of layers to add to the model. - name: Name given to the model - - # Example - - ```python - # Optionally, the first layer can receive an `input_shape` argument: - model = Sequential() - model.add(Dense(32, input_shape=(500,))) - - # Afterwards, we do automatic shape inference: - model.add(Dense(32)) - - # This is identical to the following: - model = Sequential() - model.add(Dense(32, input_dim=500)) - - # And to the following: - model = Sequential() - model.add(Dense(32, batch_input_shape=(None, 500))) - - # Note that you can also omit the `input_shape` argument: - # In that case the model gets built the first time you call `fit` (or other - # training and evaluation methods). - model = Sequential() - model.add(Dense(32)) - model.add(Dense(32)) - model.compile(optimizer=optimizer, loss=loss) - - # This builds the model for the first time: - model.fit(x, y, batch_size=32, epochs=10) - - # Note that when using this delayed-build pattern - # (no input shape specified), - # the model doesn't have any weights until the first call - # to a training/evaluation method (since it isn't yet built): - model = Sequential() - model.add(Dense(32)) - model.add(Dense(32)) - model.weights # returns [] - - # Whereas if you specify the input shape, the model gets built continuously - # as you are adding layers: - model = Sequential() - model.add(Dense(32, input_shape=(500,))) - model.add(Dense(32)) - model.weights # returns list of length 4 - - # When using the delayed-build pattern (no input shape specified), you can - # choose to manually build your model by calling - # `build(batch_input_shape)`: - model = Sequential() - model.add(Dense(32)) - model.add(Dense(32)) - model.build((None, 500)) - model.weights # returns list of length 4 - ``` - """ - - def __init__(self, layers=None, name=None): - super(Sequential, self).__init__(name=name) - self._build_input_shape = None - - # Add to the model any layers passed to the constructor. - if layers: - for layer in layers: - self.add(layer) - - @property - def layers(self): - # Historically, `sequential.layers` only returns layers that were added - # via `add`, and omits the auto-generated `InputLayer` - # that comes at the bottom of the stack. - if self._layers and isinstance(self._layers[0], InputLayer): - return self._layers[1:] - return self._layers - - @property - def model(self): - # Historically, `Sequential` was once - # implemented as a wrapper for `Model` which maintained - # its underlying `Model` as the `model` property. - # We keep it for compatibility reasons. - warnings.warn('`Sequential.model` is deprecated. ' - '`Sequential` is a subclass of `Model`, you can ' - 'just use your `Sequential` instance directly.') - return self - - def add(self, layer): - """Adds a layer instance on top of the layer stack. - - # Arguments - layer: layer instance. - - # Raises - TypeError: If `layer` is not a layer instance. - ValueError: In case the `layer` argument does not - know its input shape. - ValueError: In case the `layer` argument has - multiple output tensors, or is already connected - somewhere else (forbidden in `Sequential` models). - """ - if not isinstance(layer, Layer): - raise TypeError('The added layer must be ' - 'an instance of class Layer. ' - 'Found: ' + str(layer)) - self.built = False - if not self._layers: - set_inputs = False - # First layer in model: check that it is an input layer. - if not isinstance(layer, InputLayer): - # Create an input tensor and call `layer` on the input tensor. - # First, we need to infer the expected input shape and dtype. - first_layer = layer - if isinstance(layer, (Model, Sequential)): - # We were passed a model as first layer. - # This requires a specific way to figure out the - # input shape and dtype. - if not layer.layers: - raise ValueError('Cannot add an empty model ' - 'to a `Sequential` model.') - # In case of nested models: recover the first layer - # of the deepest model to infer input shape and dtype. - first_layer = layer.layers[0] - while isinstance(first_layer, (Model, Sequential)): - first_layer = first_layer.layers[0] - - if hasattr(first_layer, 'batch_input_shape'): - batch_shape = first_layer.batch_input_shape - dtype = first_layer.dtype - # Instantiate the input layer. - x = Input( - batch_shape=batch_shape, - dtype=dtype, - name=layer.name + '_input') - # This will build the current layer - # and create the node connecting the current layer - # to the input layer we just created. - layer(x) - set_inputs = True - else: - # Corner case where the user passes an InputLayer via `add`. - assert len(layer._inbound_nodes[-1].output_tensors) == 1 - set_inputs = True - - if set_inputs: - if len(layer._inbound_nodes[-1].output_tensors) != 1: - raise ValueError('All layers in a Sequential model ' - 'should have a single output tensor. ' - 'For multi-output layers, ' - 'use the functional API.') - self.outputs = [layer._inbound_nodes[-1].output_tensors[0]] - self.inputs = network.get_source_inputs(self.outputs[0]) - elif self.outputs: - output_tensor = layer(self.outputs[0]) - if isinstance(output_tensor, list): - raise TypeError('All layers in a Sequential model ' - 'should have a single output tensor. ' - 'For multi-output layers, ' - 'use the functional API.') - self.outputs = [output_tensor] - if self.inputs: - self.build() - else: - self._layers.append(layer) - - def pop(self): - """Removes the last layer in the model. - - # Raises - TypeError: if there are no layers in the model. - """ - if not self.layers: - raise TypeError('There are no layers in the model.') - - self._layers.pop() - self.built = False - if not self.layers: - self.outputs = None - self.inputs = None - elif self.outputs: - self.layers[-1]._outbound_nodes = [] - self.outputs = [self.layers[-1].output] - self.build() - - def build(self, input_shape=None): - if input_shape and not self.inputs: - batch_shape = tuple(input_shape) - dtype = K.floatx() - x = Input(batch_shape=batch_shape, - dtype=dtype, - name=self.name + '_input') - self.inputs = [x] - for layer in self._layers: - x = layer(x) - self.outputs = [x] - self._build_input_shape = input_shape - - if self.inputs: - self._init_graph_network(self.inputs, - self.outputs, - name=self.name) - self.built = True - - def predict_proba(self, x, batch_size=32, verbose=0): - """Generates class probability predictions for the input samples. - - The input samples are processed batch by batch. - - # Arguments - x: input data, as a Numpy array or list of Numpy arrays - (if the model has multiple inputs). - batch_size: integer. - verbose: verbosity mode, 0 or 1. - - # Returns - A Numpy array of probability predictions. - """ - preds = self.predict(x, batch_size, verbose) - if preds.min() < 0. or preds.max() > 1.: - warnings.warn('Network returning invalid probability values. ' - 'The last layer might not normalize predictions ' - 'into probabilities ' - '(like softmax or sigmoid would).') - return preds - - def predict_classes(self, x, batch_size=32, verbose=0): - """Generate class predictions for the input samples. - - The input samples are processed batch by batch. - - # Arguments - x: input data, as a Numpy array or list of Numpy arrays - (if the model has multiple inputs). - batch_size: integer. - verbose: verbosity mode, 0 or 1. - - # Returns - A numpy array of class predictions. - """ - proba = self.predict(x, batch_size=batch_size, verbose=verbose) - if proba.shape[-1] > 1: - return proba.argmax(axis=-1) - else: - return (proba > 0.5).astype('int32') - - def get_config(self): - layer_configs = [] - for layer in self.layers: - layer_configs.append({ - 'class_name': layer.__class__.__name__, - 'config': layer.get_config() - }) - config = { - 'name': self.name, - 'layers': copy.deepcopy(layer_configs) - } - if self._build_input_shape: - config['build_input_shape'] = self._build_input_shape - return config - - @classmethod - def from_config(cls, config, custom_objects=None): - if 'name' in config: - name = config['name'] - build_input_shape = config.get('build_input_shape') - layer_configs = config['layers'] - else: # legacy config file - name = build_input_shape = None - layer_configs = config - model = cls(name=name) - for conf in layer_configs: - layer = layer_module.deserialize(conf, - custom_objects=custom_objects) - model.add(layer) - if not model.inputs and build_input_shape: - model.build(build_input_shape) - return model -"""This module is deprecated, but kept around for backwards compatibility. -""" -from .base_layer import Layer, Node, InputSpec -from .input_layer import Input, InputLayer -from .network import Network, get_source_inputs -"""Training-related part of the Keras engine. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import warnings -import copy -import numpy as np - -from .network import Network -from .base_layer import Layer -from .training_utils import collect_metrics -from .training_utils import check_array_length_consistency -from .training_utils import check_loss_and_target_compatibility -from .training_utils import check_generator_arguments -from .training_utils import standardize_class_weights -from .training_utils import standardize_input_data -from .training_utils import standardize_sample_weights -from .training_utils import standardize_weights -from .training_utils import weighted_masked_objective -from .training_utils import get_static_batch_size -from .training_utils import is_generator_or_sequence -from . import training_arrays -from . import training_generator -from .. import backend as K -from .. import optimizers -from .. import losses -from .. import metrics as metrics_module -from ..utils.generic_utils import slice_arrays -from ..utils.generic_utils import to_list -from ..utils.generic_utils import unpack_singleton -from ..legacy import interfaces - - -class Model(Network): - """The `Model` class adds training & evaluation routines to a `Network`. - """ - - def compile(self, optimizer, - loss=None, - metrics=None, - loss_weights=None, - sample_weight_mode=None, - weighted_metrics=None, - target_tensors=None, - **kwargs): - """Configures the model for training. - - # Arguments - optimizer: String (name of optimizer) or optimizer instance. - See [optimizers](/optimizers). - loss: String (name of objective function) or objective function. - See [losses](/losses). - If the model has multiple outputs, you can use a different loss - on each output by passing a dictionary or a list of losses. - The loss value that will be minimized by the model - will then be the sum of all individual losses. - metrics: List of metrics to be evaluated by the model - during training and testing. - Typically you will use `metrics=['accuracy']`. - To specify different metrics for different outputs of a - multi-output model, you could also pass a dictionary, - such as `metrics={'output_a': 'accuracy'}`. - loss_weights: Optional list or dictionary specifying scalar - coefficients (Python floats) to weight the loss contributions - of different model outputs. - The loss value that will be minimized by the model - will then be the *weighted sum* of all individual losses, - weighted by the `loss_weights` coefficients. - If a list, it is expected to have a 1:1 mapping - to the model's outputs. If a dict, it is expected to map - output names (strings) to scalar coefficients. - sample_weight_mode: If you need to do timestep-wise - sample weighting (2D weights), set this to `"temporal"`. - `None` defaults to sample-wise weights (1D). - If the model has multiple outputs, you can use a different - `sample_weight_mode` on each output by passing a - dictionary or a list of modes. - weighted_metrics: List of metrics to be evaluated and weighted - by sample_weight or class_weight during training and testing. - target_tensors: By default, Keras will create placeholders for the - model's target, which will be fed with the target data during - training. If instead you would like to use your own - target tensors (in turn, Keras will not expect external - Numpy data for these targets at training time), you - can specify them via the `target_tensors` argument. It can be - a single tensor (for a single-output model), a list of tensors, - or a dict mapping output names to target tensors. - **kwargs: When using the Theano/CNTK backends, these arguments - are passed into `K.function`. - When using the TensorFlow backend, - these arguments are passed into `tf.Session.run`. - - # Raises - ValueError: In case of invalid arguments for - `optimizer`, `loss`, `metrics` or `sample_weight_mode`. - """ - self.optimizer = optimizers.get(optimizer) - self.loss = loss or [] - self.metrics = metrics or [] - self.loss_weights = loss_weights - self.sample_weight_mode = sample_weight_mode - self.weighted_metrics = weighted_metrics - - if not self.built: - # Model is not compilable because - # it does not know its number of inputs - # and outputs, nor their shapes and names. - # We will compile after the first - # time the model gets called on training data. - return - self._is_compiled = True - - # Prepare loss functions. - if isinstance(loss, dict): - for name in loss: - if name not in self.output_names: - raise ValueError('Unknown entry in loss ' - 'dictionary: "' + name + '". ' - 'Only expected the following keys: ' + - str(self.output_names)) - loss_functions = [] - for name in self.output_names: - if name not in loss: - warnings.warn('Output "' + name + - '" missing from loss dictionary. ' - 'We assume this was done on purpose, ' - 'and we will not be expecting ' - 'any data to be passed to "' + name + - '" during training.', stacklevel=2) - loss_functions.append(losses.get(loss.get(name))) - elif isinstance(loss, list): - if len(loss) != len(self.outputs): - raise ValueError('When passing a list as loss, ' - 'it should have one entry per model outputs. ' - 'The model has ' + str(len(self.outputs)) + - ' outputs, but you passed loss=' + - str(loss)) - loss_functions = [losses.get(l) for l in loss] - else: - loss_function = losses.get(loss) - loss_functions = [loss_function for _ in range(len(self.outputs))] - self.loss_functions = loss_functions - weighted_losses = [ - weighted_masked_objective(fn) for fn in loss_functions] - skip_target_indices = [] - skip_target_weighing_indices = [] - self._feed_outputs = [] - self._feed_output_names = [] - self._feed_output_shapes = [] - self._feed_loss_fns = [] - for i in range(len(weighted_losses)): - if weighted_losses[i] is None: - skip_target_indices.append(i) - skip_target_weighing_indices.append(i) - - # Prepare output masks. - masks = self.compute_mask(self.inputs, mask=None) - if masks is None: - masks = [None for _ in self.outputs] - masks = to_list(masks) - - # Prepare loss weights. - if loss_weights is None: - loss_weights_list = [1. for _ in range(len(self.outputs))] - elif isinstance(loss_weights, dict): - for name in loss_weights: - if name not in self.output_names: - raise ValueError('Unknown entry in loss_weights ' - 'dictionary: "' + name + '". ' - 'Only expected the following keys: ' + - str(self.output_names)) - loss_weights_list = [] - for name in self.output_names: - loss_weights_list.append(loss_weights.get(name, 1.)) - elif isinstance(loss_weights, list): - if len(loss_weights) != len(self.outputs): - raise ValueError('When passing a list as loss_weights, ' - 'it should have one entry per model output. ' - 'The model has ' + str(len(self.outputs)) + - ' outputs, but you passed loss_weights=' + - str(loss_weights)) - loss_weights_list = loss_weights - else: - raise TypeError('Could not interpret loss_weights argument: ' + - str(loss_weights) + - ' - expected a list of dicts.') - - # Prepare targets of model. - self.targets = [] - self._feed_targets = [] - if target_tensors is not None: - if isinstance(target_tensors, list): - if len(target_tensors) != len(self.outputs): - raise ValueError( - 'When passing a list as `target_tensors`, ' - 'it should have one entry per model output. ' - 'The model has ' + str(len(self.outputs)) + - ' outputs, but you passed target_tensors=' + - str(target_tensors)) - elif isinstance(target_tensors, dict): - for name in target_tensors: - if name not in self.output_names: - raise ValueError('Unknown entry in `target_tensors` ' - 'dictionary: "' + name + '". ' - 'Only expected the following keys: ' + - str(self.output_names)) - tmp_target_tensors = [] - for name in self.output_names: - tmp_target_tensors.append(target_tensors.get(name, None)) - target_tensors = tmp_target_tensors - elif K.is_tensor(target_tensors): - if len(self.outputs) != 1: - raise ValueError('The model has ' + str(len(self.outputs)) + - ' outputs, but you passed a single tensor as ' - '`target_tensors`. Expected a list or a dict ' - 'of tensors.') - target_tensors = [target_tensors] - else: - raise TypeError('Expected `target_tensors` to be a tensor, ' - 'a list of tensors, or dict of tensors, but got:', - target_tensors) - - for i in range(len(self.outputs)): - if i in skip_target_indices: - self.targets.append(None) - else: - shape = K.int_shape(self.outputs[i]) - name = self.output_names[i] - if target_tensors is not None: - target = target_tensors[i] - else: - target = None - if target is None or K.is_placeholder(target): - if target is None: - target = K.placeholder( - ndim=len(shape), - name=name + '_target', - sparse=K.is_sparse(self.outputs[i]), - dtype=K.dtype(self.outputs[i])) - self._feed_targets.append(target) - self._feed_outputs.append(self.outputs[i]) - self._feed_output_names.append(name) - self._feed_output_shapes.append(shape) - self._feed_loss_fns.append(self.loss_functions[i]) - else: - skip_target_weighing_indices.append(i) - self.targets.append(target) - - # Prepare sample weights. - sample_weights = [] - sample_weight_modes = [] - if isinstance(sample_weight_mode, dict): - for name in sample_weight_mode: - if name not in self.output_names: - raise ValueError('Unknown entry in ' - 'sample_weight_mode dictionary: "' + - name + '". ' - 'Only expected the following keys: ' + - str(self.output_names)) - for i, name in enumerate(self.output_names): - if i in skip_target_weighing_indices: - weight = None - sample_weight_modes.append(None) - else: - if name not in sample_weight_mode: - raise ValueError('Output "' + name + - '" missing from sample_weight_modes ' - 'dictionary') - if sample_weight_mode.get(name) == 'temporal': - weight = K.placeholder(ndim=2, - name=name + '_sample_weights') - sample_weight_modes.append('temporal') - else: - weight = K.placeholder(ndim=1, - name=name + '_sample_weights') - sample_weight_modes.append(None) - sample_weights.append(weight) - elif isinstance(sample_weight_mode, list): - if len(sample_weight_mode) != len(self.outputs): - raise ValueError('When passing a list as sample_weight_mode, ' - 'it should have one entry per model output. ' - 'The model has ' + str(len(self.outputs)) + - ' outputs, but you passed ' - 'sample_weight_mode=' + - str(sample_weight_mode)) - for i in range(len(self.output_names)): - if i in skip_target_weighing_indices: - weight = None - sample_weight_modes.append(None) - else: - mode = sample_weight_mode[i] - name = self.output_names[i] - if mode == 'temporal': - weight = K.placeholder(ndim=2, - name=name + '_sample_weights') - sample_weight_modes.append('temporal') - else: - weight = K.placeholder(ndim=1, - name=name + '_sample_weights') - sample_weight_modes.append(None) - sample_weights.append(weight) - else: - for i, name in enumerate(self.output_names): - if i in skip_target_weighing_indices: - sample_weight_modes.append(None) - sample_weights.append(None) - else: - if sample_weight_mode == 'temporal': - sample_weights.append( - K.placeholder(ndim=2, - name=name + '_sample_weights')) - sample_weight_modes.append('temporal') - else: - sample_weights.append( - K.placeholder(ndim=1, - name=name + '_sample_weights')) - sample_weight_modes.append(None) - self.sample_weight_modes = sample_weight_modes - self._feed_sample_weight_modes = [] - for i in range(len(self.outputs)): - if i not in skip_target_weighing_indices: - self._feed_sample_weight_modes.append( - self.sample_weight_modes[i]) - - # Prepare metrics. - self.metrics_names = ['loss'] - self.metrics_tensors = [] - - # Compute total loss. - total_loss = None - with K.name_scope('loss'): - for i in range(len(self.outputs)): - if i in skip_target_indices: - continue - y_true = self.targets[i] - y_pred = self.outputs[i] - weighted_loss = weighted_losses[i] - sample_weight = sample_weights[i] - mask = masks[i] - loss_weight = loss_weights_list[i] - with K.name_scope(self.output_names[i] + '_loss'): - output_loss = weighted_loss(y_true, y_pred, - sample_weight, mask) - if len(self.outputs) > 1: - self.metrics_tensors.append(output_loss) - self.metrics_names.append(self.output_names[i] + '_loss') - if total_loss is None: - total_loss = loss_weight * output_loss - else: - total_loss += loss_weight * output_loss - if total_loss is None: - if not self.losses: - raise ValueError('The model cannot be compiled ' - 'because it has no loss to optimize.') - else: - total_loss = 0. - - # Add regularization penalties - # and other layer-specific losses. - for loss_tensor in self.losses: - total_loss += loss_tensor - - # List of same size as output_names. - # contains tuples (metrics for output, names of metrics). - nested_metrics = collect_metrics(metrics, self.output_names) - nested_weighted_metrics = collect_metrics(weighted_metrics, - self.output_names) - self.metrics_updates = [] - self.stateful_metric_names = [] - self.stateful_metric_functions = [] - - def handle_metrics(metrics, weights=None): - metric_name_prefix = 'weighted_' if weights is not None else '' - - for metric in metrics: - if metric in ('accuracy', 'acc', 'crossentropy', 'ce'): - # custom handling of accuracy/crossentropy - # (because of class mode duality) - output_shape = K.int_shape(self.outputs[i]) - if (output_shape[-1] == 1 or - self.loss_functions[i] == losses.binary_crossentropy): - # case: binary accuracy/crossentropy - if metric in ('accuracy', 'acc'): - metric_fn = metrics_module.binary_accuracy - elif metric in ('crossentropy', 'ce'): - metric_fn = metrics_module.binary_crossentropy - elif (self.loss_functions[i] == - losses.sparse_categorical_crossentropy): - # case: categorical accuracy/crossentropy - # with sparse targets - if metric in ('accuracy', 'acc'): - metric_fn = metrics_module.sparse_categorical_accuracy - elif metric in ('crossentropy', 'ce'): - metric_fn = ( - metrics_module.sparse_categorical_crossentropy) - else: - # case: categorical accuracy/crossentropy - if metric in ('accuracy', 'acc'): - metric_fn = metrics_module.categorical_accuracy - elif metric in ('crossentropy', 'ce'): - metric_fn = metrics_module.categorical_crossentropy - if metric in ('accuracy', 'acc'): - suffix = 'acc' - elif metric in ('crossentropy', 'ce'): - suffix = 'ce' - weighted_metric_fn = weighted_masked_objective(metric_fn) - metric_name = metric_name_prefix + suffix - else: - metric_fn = metrics_module.get(metric) - weighted_metric_fn = weighted_masked_objective(metric_fn) - # Get metric name as string - if hasattr(metric_fn, 'name'): - metric_name = metric_fn.name - else: - metric_name = metric_fn.__name__ - metric_name = metric_name_prefix + metric_name - - with K.name_scope(metric_name): - metric_result = weighted_metric_fn(y_true, y_pred, - weights=weights, - mask=masks[i]) - - # Append to self.metrics_names, self.metric_tensors, - # self.stateful_metric_names - if len(self.output_names) > 1: - metric_name = self.output_names[i] + '_' + metric_name - # Dedupe name - j = 1 - base_metric_name = metric_name - while metric_name in self.metrics_names: - metric_name = base_metric_name + '_' + str(j) - j += 1 - self.metrics_names.append(metric_name) - self.metrics_tensors.append(metric_result) - - # Keep track of state updates created by - # stateful metrics (i.e. metrics layers). - if isinstance(metric_fn, Layer) and metric_fn.stateful: - self.stateful_metric_names.append(metric_name) - self.stateful_metric_functions.append(metric_fn) - self.metrics_updates += metric_fn.updates - with K.name_scope('metrics'): - for i in range(len(self.outputs)): - if i in skip_target_indices: - continue - - y_true = self.targets[i] - y_pred = self.outputs[i] - weights = sample_weights[i] - output_metrics = nested_metrics[i] - output_weighted_metrics = nested_weighted_metrics[i] - handle_metrics(output_metrics) - handle_metrics(output_weighted_metrics, weights=weights) - - # Prepare gradient updates and state updates. - self.total_loss = total_loss - self.sample_weights = sample_weights - self._feed_sample_weights = [] - for i in range(len(self.sample_weights)): - if i not in skip_target_weighing_indices: - self._feed_sample_weights.append(sample_weights[i]) - - # Functions for train, test and predict will - # be compiled lazily when required. - # This saves time when the user is not using all functions. - self._function_kwargs = kwargs - - self.train_function = None - self.test_function = None - self.predict_function = None - - # Collected trainable weights, sorted in topological order. - trainable_weights = self.trainable_weights - self._collected_trainable_weights = trainable_weights - - def _check_trainable_weights_consistency(self): - """Check trainable weights count consistency. - - This will raise a warning if `trainable_weights` and - `_collected_trainable_weights` are inconsistent (i.e. have different - number of parameters). - Inconsistency will typically arise when one modifies `model.trainable` - without calling `model.compile` again. - """ - if not hasattr(self, '_collected_trainable_weights'): - return - - if (len(self.trainable_weights) != - len(self._collected_trainable_weights)): - warnings.warn(UserWarning( - 'Discrepancy between trainable weights and collected trainable' - ' weights, did you set `model.trainable` without calling' - ' `model.compile` after ?')) - - def _make_train_function(self): - if not hasattr(self, 'train_function'): - raise RuntimeError('You must compile your model before using it.') - self._check_trainable_weights_consistency() - if self.train_function is None: - inputs = (self._feed_inputs + - self._feed_targets + - self._feed_sample_weights) - if self._uses_dynamic_learning_phase(): - inputs += [K.learning_phase()] - - with K.name_scope('training'): - with K.name_scope(self.optimizer.__class__.__name__): - training_updates = self.optimizer.get_updates( - params=self._collected_trainable_weights, - loss=self.total_loss) - updates = (self.updates + - training_updates + - self.metrics_updates) - # Gets loss and metrics. Updates weights at each call. - self.train_function = K.function( - inputs, - [self.total_loss] + self.metrics_tensors, - updates=updates, - name='train_function', - **self._function_kwargs) - - def _make_test_function(self): - if not hasattr(self, 'test_function'): - raise RuntimeError('You must compile your model before using it.') - if self.test_function is None: - inputs = (self._feed_inputs + - self._feed_targets + - self._feed_sample_weights) - if self._uses_dynamic_learning_phase(): - inputs += [K.learning_phase()] - # Return loss and metrics, no gradient updates. - # Does update the network states. - self.test_function = K.function( - inputs, - [self.total_loss] + self.metrics_tensors, - updates=self.state_updates + self.metrics_updates, - name='test_function', - **self._function_kwargs) - - def _make_predict_function(self): - if not hasattr(self, 'predict_function'): - self.predict_function = None - if self.predict_function is None: - if self._uses_dynamic_learning_phase(): - inputs = self._feed_inputs + [K.learning_phase()] - else: - inputs = self._feed_inputs - # Gets network outputs. Does not update weights. - # Does update the network states. - kwargs = getattr(self, '_function_kwargs', {}) - self.predict_function = K.function(inputs, - self.outputs, - updates=self.state_updates, - name='predict_function', - **kwargs) - - def _uses_dynamic_learning_phase(self): - return (self.uses_learning_phase and - not isinstance(K.learning_phase(), int)) - - def _set_inputs(self, inputs, outputs=None, training=None): - """Set model's input and output specs based on the input data received. - - This is to be used for Model subclasses, which do not know at instantiation - time what their inputs look like. - - # Arguments - inputs: Single array, or list of arrays. The arrays could be - placeholders, Numpy arrays, or data tensors. - - if placeholders: the model is built on top of these - placeholders, and we expect Numpy data to be fed for them - when calling `fit`/etc. - - if Numpy data: we create placeholders matching the shape of - the Numpy arrays. We expect Numpy data to be fed for these - placeholders when calling `fit`/etc. - - if data tensors: the model is built on top of these tensors. - We do not expect any Numpy data to be provided when calling - `fit`/etc. - outputs: Optional output tensors (if already computed by running - the model). - training: Boolean or None. Only relevant in symbolic mode. - Specifies whether to build the model's graph in inference - mode (False), training mode (True), or using the Keras - learning phase (None). - """ - if self.__class__.__name__ == 'Sequential': - # Note: we can't test whether the model - # is `Sequential` via `isinstance` - # since `Sequential` depends on `Model`. - if isinstance(inputs, list): - assert len(inputs) == 1 - inputs = inputs[0] - self.build(input_shape=(None,) + inputs.shape[1:]) - return - - if self.inputs: - raise ValueError('Model inputs are already set.') - - # On-the-fly setting of symbolic model inputs - # (either by using the tensor provided, - # or by creating a placeholder if Numpy data was provided). - self.inputs = [] - self.input_names = [] - self._feed_inputs = [] - self._feed_input_names = [] - self._feed_input_shapes = [] - inputs = to_list(inputs, allow_tuple=True) - - for i, v in enumerate(inputs): - name = 'input_%d' % (i + 1) - self.input_names.append(name) - if isinstance(v, list): - v = np.asarray(v) - if v.ndim == 1: - v = np.expand_dims(v, 1) - if isinstance(v, (np.ndarray)): - # We fix the placeholder shape except the batch size. - # This is suboptimal, but it is the best we can do with the info - # we have. The user should call `model._set_inputs(placeholders)` - # to specify custom placeholders if the need arises. - shape = (None,) + v.shape[1:] - placeholder = K.placeholder(shape=shape, name=name) - self.inputs.append(placeholder) - self._feed_inputs.append(placeholder) - self._feed_input_names.append(name) - self._feed_input_shapes.append(shape) - else: - # Assumed tensor - TODO(fchollet) additional type check? - self.inputs.append(v) - if K.is_placeholder(v): - self._feed_inputs.append(v) - self._feed_input_names.append(name) - self._feed_input_shapes.append(K.int_shape(v)) - - if outputs is None: - # Obtain symbolic outputs by calling the model. - if self._expects_training_arg: - outputs = self.call(unpack_singleton( - self.inputs), training=training) - else: - outputs = self.call(unpack_singleton(self.inputs)) - outputs = to_list(outputs, allow_tuple=True) - self.outputs = outputs - self.output_names = [ - 'output_%d' % (i + 1) for i in range(len(self.outputs))] - self.built = True - - def _standardize_user_data(self, x, - y=None, - sample_weight=None, - class_weight=None, - check_array_lengths=True, - batch_size=None): - all_inputs = [] - if not self.built: - # We need to use `x` to set the model inputs. - # We type-check that `x` and `y` are either single arrays - # or lists of arrays. - if isinstance(x, (list, tuple)): - if not all(isinstance(v, np.ndarray) or - K.is_tensor(v) for v in x): - raise ValueError('Please provide as model inputs ' - 'either a single ' - 'array or a list of arrays. ' - 'You passed: x=' + str(x)) - all_inputs += list(x) - elif isinstance(x, dict): - raise ValueError('Please do not pass a dictionary ' - 'as model inputs.') - else: - if not isinstance(x, np.ndarray) and not K.is_tensor(x): - raise ValueError('Please provide as model inputs ' - 'either a single ' - 'array or a list of arrays. ' - 'You passed: x=' + str(x)) - all_inputs.append(x) - - # Build the model using the retrieved inputs (value or symbolic). - # If values, then in symbolic-mode placeholders will be created - # to match the value shapes. - if not self.inputs: - self._set_inputs(x) - - if y is not None: - if not self.optimizer: - raise RuntimeError('You must compile a model before ' - 'training/testing. ' - 'Use `model.compile(optimizer, loss)`.') - if not self._is_compiled: - # On-the-fly compilation of the model. - # We need to use `y` to set the model targets. - if isinstance(y, (list, tuple)): - if not all(isinstance(v, np.ndarray) or - K.is_tensor(v) for v in y): - raise ValueError('Please provide as model targets ' - 'either a single ' - 'array or a list of arrays. ' - 'You passed: y=' + str(y)) - elif isinstance(y, dict): - raise ValueError('Please do not pass a dictionary ' - 'as model targets.') - else: - if not isinstance(y, np.ndarray) and not K.is_tensor(y): - raise ValueError('Please provide as model targets ' - 'either a single ' - 'array or a list of arrays. ' - 'You passed: y=' + str(y)) - # Typecheck that all inputs are *either* value *or* symbolic. - if y is not None: - all_inputs += to_list(y, allow_tuple=True) - if any(K.is_tensor(v) for v in all_inputs): - if not all(K.is_tensor(v) for v in all_inputs): - raise ValueError('Do not pass inputs that mix Numpy ' - 'arrays and symbolic tensors. ' - 'You passed: x=' + str(x) + - '; y=' + str(y)) - - # Handle target tensors if any passed. - y = to_list(y, allow_tuple=True) - target_tensors = [v for v in y if K.is_tensor(v)] - if not target_tensors: - target_tensors = None - self.compile(optimizer=self.optimizer, - loss=self.loss, - metrics=self.metrics, - loss_weights=self.loss_weights, - target_tensors=target_tensors) - - # If `x` and `y` were all symbolic, - # then the model should not be fed any inputs and targets. - # Note: in this case, `any` and `all` are equivalent since we disallow - # mixed symbolic/value inputs. - if any(K.is_tensor(v) for v in all_inputs): - return [], [], [] - - # What follows is input validation and standardization to list format, - # in the case where all inputs are value arrays. - - if not self._is_graph_network: - # Case: symbolic-mode subclassed network. - # Do not do shape validation. - feed_input_names = self._feed_input_names - feed_input_shapes = None - else: - # Case: symbolic-mode graph network. - # In this case, we run extensive shape validation checks. - feed_input_names = self._feed_input_names - feed_input_shapes = self._feed_input_shapes - - # Standardize the inputs. - x = standardize_input_data( - x, - feed_input_names, - feed_input_shapes, - check_batch_axis=False, # Don't enforce the batch size. - exception_prefix='input') - - if y is not None: - if not self._is_graph_network: - feed_output_names = self._feed_output_names - feed_output_shapes = None - # Sample weighting not supported in this case. - # TODO: consider supporting it. - feed_sample_weight_modes = [None for _ in self.outputs] - else: - feed_output_names = self._feed_output_names - feed_sample_weight_modes = self._feed_sample_weight_modes - feed_output_shapes = [] - for output_shape, loss_fn in zip(self._feed_output_shapes, - self._feed_loss_fns): - if loss_fn is losses.sparse_categorical_crossentropy: - if K.image_data_format() == 'channels_first' and len( - output_shape) in [4, 5]: - feed_output_shapes.append( - (output_shape[0], 1) + output_shape[2:]) - else: - feed_output_shapes.append(output_shape[:-1] + (1,)) - elif (not hasattr(loss_fn, '__name__') or - getattr(losses, loss_fn.__name__, None) is None): - # If `loss_fn` is not a function (e.g. callable class) - # or if it not in the `losses` module, then - # it is a user-defined loss and we make no assumptions - # about it. - feed_output_shapes.append(None) - else: - feed_output_shapes.append(output_shape) - - # Standardize the outputs. - y = standardize_input_data( - y, - feed_output_names, - feed_output_shapes, - check_batch_axis=False, # Don't enforce the batch size. - exception_prefix='target') - - # Generate sample-wise weight values given the `sample_weight` and - # `class_weight` arguments. - sample_weights = standardize_sample_weights( - sample_weight, feed_output_names) - class_weights = standardize_class_weights( - class_weight, feed_output_names) - sample_weights = [ - standardize_weights(ref, sw, cw, mode) - for (ref, sw, cw, mode) in - zip(y, sample_weights, class_weights, - feed_sample_weight_modes) - ] - # Check that all arrays have the same length. - if check_array_lengths: - check_array_length_consistency(x, y, sample_weights) - if self._is_graph_network: - # Additional checks to avoid users mistakenly - # using improper loss fns. - check_loss_and_target_compatibility( - y, self._feed_loss_fns, feed_output_shapes) - else: - y = [] - sample_weights = [] - - if self.stateful and batch_size: - # Check that for stateful networks, number of samples is a multiple - # of the static batch size. - if x[0].shape[0] % batch_size != 0: - raise ValueError('In a stateful network, ' - 'you should only pass inputs with ' - 'a number of samples that can be ' - 'divided by the batch size. Found: ' + - str(x[0].shape[0]) + ' samples') - return x, y, sample_weights - - def _get_callback_model(self): - """Returns the Callback Model for this Model.""" - if hasattr(self, 'callback_model') and self.callback_model: - return self.callback_model - return self - - def _validate_or_infer_batch_size(self, batch_size, steps, x): - """Validates that the `batch_size` provided is consistent with InputLayer. - - It's possible that the user specified a static batch size in their - InputLayer. If so, this method checks the provided `batch_size` and `x` - arguments are consistent with this static batch size. Also, if - `batch_size` is `None`, this method will attempt to infer the batch size - from the static batch size of the InputLayer. Lastly, ValueError will be - raised if `x` is a generator or `Sequence` instance and `batch_size` is - specified as we expect users to provide batched datasets. - - # Arguments - batch_size: The batch_size provided as an argument to - fit/evaluate/predict. - steps: The steps provided as an argument to fit/evaluate/predict. - x: The data passed as `x` to fit/evaluate/predict. - - # Returns - The validated batch_size, auto-inferred from the first layer if - not provided. - - # Raises - ValueError: if a batch size is specified and a generator/Sequence - is passed, or if the specified batch size does not match the - exepected size defined in the Input Layer. - """ - if batch_size is not None and is_generator_or_sequence(x): - raise ValueError('The `batch_size` argument must not be specified when' - ' using a generator or Sequence as an input.') - - # Avoids the override in Sequential. - layers = super(Model, self).layers - if layers: - first_layer = layers[0] - static_batch_size = get_static_batch_size(first_layer) - if static_batch_size is not None: - - # Check `batch_size` argument is consistent with InputLayer. - if batch_size is not None and batch_size != static_batch_size: - raise ValueError('The `batch_size` argument value {} is ' - 'incompatible with the specified batch ' - 'size of your Input Layer: {}' - .format(batch_size, static_batch_size)) - - # Set inferred batch size from the InputLayer. - if steps is None: - batch_size = static_batch_size - - if batch_size is None and steps is None: - # Backwards compatibility - batch_size = 32 - return batch_size - - def fit(self, - x=None, - y=None, - batch_size=None, - epochs=1, - verbose=1, - callbacks=None, - validation_split=0., - validation_data=None, - shuffle=True, - class_weight=None, - sample_weight=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_freq=1, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - **kwargs): - """Trains the model for a fixed number of epochs (iterations on a dataset). - - # Arguments - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A dict mapping input names to the corresponding - array/tensors, if the model has named inputs. - - A generator or `keras.utils.Sequence` returning - `(inputs, targets)` or `(inputs, targets, sample weights)`. - - None (default) if feeding from framework-native - tensors (e.g. TensorFlow data tensors). - y: Target data. Like the input data `x`, - it could be either Numpy array(s), framework-native tensor(s), - list of Numpy arrays (if the model has multiple outputs) or - None (default) if feeding from framework-native tensors - (e.g. TensorFlow data tensors). - If output layers in the model are named, you can also pass a - dictionary mapping output names to Numpy arrays. - If `x` is a generator, or `keras.utils.Sequence` instance, - `y` should not be specified (since targets will be obtained - from `x`). - batch_size: Integer or `None`. - Number of samples per gradient update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` if your data is in the - form of symbolic tensors, generators, or `Sequence` instances - (since they generate batches). - epochs: Integer. Number of epochs to train the model. - An epoch is an iteration over the entire `x` and `y` - data provided. - Note that in conjunction with `initial_epoch`, - `epochs` is to be understood as "final epoch". - The model is not trained for a number of iterations - given by `epochs`, but merely until the epoch - of index `epochs` is reached. - verbose: Integer. 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = one line per epoch. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during training and validation - (if ). - See [callbacks](/callbacks). - validation_split: Float between 0 and 1. - Fraction of the training data to be used as validation data. - The model will set apart this fraction of the training data, - will not train on it, and will evaluate - the loss and any model metrics - on this data at the end of each epoch. - The validation data is selected from the last samples - in the `x` and `y` data provided, before shuffling. - This argument is not supported when `x` is a generator or - `Sequence` instance. - validation_data: Data on which to evaluate - the loss and any model metrics at the end of each epoch. - The model will not be trained on this data. - `validation_data` will override `validation_split`. - `validation_data` could be: - - tuple `(x_val, y_val)` of Numpy arrays or tensors - - tuple `(x_val, y_val, val_sample_weights)` of Numpy arrays - - dataset or a dataset iterator - For the first two cases, `batch_size` must be provided. - For the last case, `validation_steps` must be provided. - shuffle: Boolean (whether to shuffle the training data - before each epoch) or str (for 'batch'). - 'batch' is a special option for dealing with the - limitations of HDF5 data; it shuffles in batch-sized chunks. - Has no effect when `steps_per_epoch` is not `None`. - class_weight: Optional dictionary mapping class indices (integers) - to a weight (float) value, used for weighting the loss function - (during training only). - This can be useful to tell the model to - "pay more attention" to samples from - an under-represented class. - sample_weight: Optional Numpy array of weights for - the training samples, used for weighting the loss function - (during training only). You can either pass a flat (1D) - Numpy array with the same length as the input samples - (1:1 mapping between weights and samples), - or in the case of temporal data, - you can pass a 2D array with shape - `(samples, sequence_length)`, - to apply a different weight to every timestep of every sample. - In this case you should make sure to specify - `sample_weight_mode="temporal"` in `compile()`. This argument - is not supported when `x` generator, or `Sequence` instance, - instead provide the sample_weights as the third element of `x`. - initial_epoch: Integer. - Epoch at which to start training - (useful for resuming a previous training run). - steps_per_epoch: Integer or `None`. - Total number of steps (batches of samples) - before declaring one epoch finished and starting the - next epoch. When training with input tensors such as - TensorFlow data tensors, the default `None` is equal to - the number of samples in your dataset divided by - the batch size, or 1 if that cannot be determined. - validation_steps: Only relevant if `steps_per_epoch` - is specified. Total number of steps (batches of samples) - to validate before stopping. - validation_steps: Only relevant if `validation_data` is provided - and is a generator. Total number of steps (batches of samples) - to draw before stopping when performing validation at the end - of every epoch. - validation_freq: Only relevant if validation data is provided. Integer - or list/tuple/set. If an integer, specifies how many training - epochs to run before a new validation run is performed, e.g. - `validation_freq=2` runs validation every 2 epochs. If a list, - tuple, or set, specifies the epochs on which to run validation, - e.g. `validation_freq=[1, 2, 10]` runs validation at the end - of the 1st, 2nd, and 10th epochs. - max_queue_size: Integer. Used for generator or `keras.utils.Sequence` - input only. Maximum size for the generator queue. - If unspecified, `max_queue_size` will default to 10. - workers: Integer. Used for generator or `keras.utils.Sequence` input - only. Maximum number of processes to spin up - when using process-based threading. If unspecified, `workers` - will default to 1. If 0, will execute the generator on the main - thread. - use_multiprocessing: Boolean. Used for generator or - `keras.utils.Sequence` input only. If `True`, use process-based - threading. If unspecified, `use_multiprocessing` will default to - `False`. Note that because this implementation relies on - multiprocessing, you should not pass non-picklable arguments to - the generator as they can't be passed easily to children processes. - **kwargs: Used for backwards compatibility. - - # Returns - A `History` object. Its `History.history` attribute is - a record of training loss values and metrics values - at successive epochs, as well as validation loss values - and validation metrics values (if applicable). - - # Raises - RuntimeError: If the model was never compiled. - ValueError: In case of mismatch between the provided input data - and what the model expects. - """ - # Legacy support - if 'nb_epoch' in kwargs: - warnings.warn('The `nb_epoch` argument in `fit` ' - 'has been renamed `epochs`.', stacklevel=2) - epochs = kwargs.pop('nb_epoch') - if kwargs: - raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) - - if x is None and y is None and steps_per_epoch is None: - raise ValueError('If fitting from data tensors, ' - 'you should specify the `steps_per_epoch` ' - 'argument.') - - batch_size = self._validate_or_infer_batch_size( - batch_size, steps_per_epoch, x) - - # Case 1: generator-like. Input is Python generator, - # or Sequence object, or iterator. - if is_generator_or_sequence(x): - check_generator_arguments( - y, sample_weight, validation_split=validation_split) - return self.fit_generator( - x, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - validation_data=validation_data, - validation_steps=validation_steps, - validation_freq=validation_freq, - class_weight=class_weight, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - shuffle=shuffle, - initial_epoch=initial_epoch) - - # Case 2: Symbolic tensors or Numpy array-like. - x, y, sample_weights = self._standardize_user_data( - x, y, - sample_weight=sample_weight, - class_weight=class_weight, - batch_size=batch_size) - - # Prepare validation data. - do_validation = False - if validation_data: - do_validation = True - if len(validation_data) == 2: - val_x, val_y = validation_data - val_sample_weight = None - elif len(validation_data) == 3: - val_x, val_y, val_sample_weight = validation_data - else: - raise ValueError('When passing validation_data, ' - 'it must contain 2 (x_val, y_val) ' - 'or 3 (x_val, y_val, val_sample_weights) ' - 'items, however it contains %d items' % - len(validation_data)) - - val_x, val_y, val_sample_weights = self._standardize_user_data( - val_x, val_y, - sample_weight=val_sample_weight, - batch_size=batch_size) - if self._uses_dynamic_learning_phase(): - val_inputs = val_x + val_y + val_sample_weights + [0.] - else: - val_inputs = val_x + val_y + val_sample_weights - - elif validation_split and 0. < validation_split < 1.: - if any(K.is_tensor(t) for t in x): - raise ValueError( - 'If your data is in the form of symbolic tensors, ' - 'you cannot use `validation_split`.') - do_validation = True - if hasattr(x[0], 'shape'): - split_at = int(int(x[0].shape[0]) * (1. - validation_split)) - else: - split_at = int(len(x[0]) * (1. - validation_split)) - x, val_x = (slice_arrays(x, 0, split_at), - slice_arrays(x, split_at)) - y, val_y = (slice_arrays(y, 0, split_at), - slice_arrays(y, split_at)) - sample_weights, val_sample_weights = ( - slice_arrays(sample_weights, 0, split_at), - slice_arrays(sample_weights, split_at)) - if self._uses_dynamic_learning_phase(): - val_inputs = val_x + val_y + val_sample_weights + [0.] - else: - val_inputs = val_x + val_y + val_sample_weights - - elif validation_steps: - do_validation = True - if self._uses_dynamic_learning_phase(): - val_inputs = [0.] - - # Prepare input arrays and training function. - if self._uses_dynamic_learning_phase(): - fit_inputs = x + y + sample_weights + [1.] - else: - fit_inputs = x + y + sample_weights - self._make_train_function() - fit_function = self.train_function - - # Prepare display labels. - out_labels = self.metrics_names - - if do_validation: - self._make_test_function() - val_function = self.test_function - callback_metrics = copy.copy(out_labels) + [ - 'val_' + n for n in out_labels] - else: - callback_metrics = copy.copy(out_labels) - val_function = None - val_inputs = [] - - # Delegate logic to `fit_loop`. - return training_arrays.fit_loop(self, fit_function, fit_inputs, - out_labels=out_labels, - batch_size=batch_size, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - val_function=val_function, - val_inputs=val_inputs, - shuffle=shuffle, - callback_metrics=callback_metrics, - initial_epoch=initial_epoch, - steps_per_epoch=steps_per_epoch, - validation_steps=validation_steps, - validation_freq=validation_freq) - - def evaluate(self, - x=None, - y=None, - batch_size=None, - verbose=1, - sample_weight=None, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False): - """Returns the loss value & metrics values for the model in test mode. - - Computation is done in batches. - - # Arguments - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A dict mapping input names to the corresponding - array/tensors, if the model has named inputs. - - A generator or `keras.utils.Sequence` returning - `(inputs, targets)` or `(inputs, targets, sample weights)`. - - None (default) if feeding from framework-native - tensors (e.g. TensorFlow data tensors). - y: Target data. Like the input data `x`, - it could be either Numpy array(s), framework-native tensor(s), - list of Numpy arrays (if the model has multiple outputs) or - None (default) if feeding from framework-native tensors - (e.g. TensorFlow data tensors). - If output layers in the model are named, you can also pass a - dictionary mapping output names to Numpy arrays. - If `x` is a generator, or `keras.utils.Sequence` instance, - `y` should not be specified (since targets will be obtained - from `x`). - batch_size: Integer or `None`. - Number of samples per gradient update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` is your data is in the - form of symbolic tensors, generators, or - `keras.utils.Sequence` instances (since they generate batches). - verbose: 0 or 1. Verbosity mode. - 0 = silent, 1 = progress bar. - sample_weight: Optional Numpy array of weights for - the test samples, used for weighting the loss function. - You can either pass a flat (1D) - Numpy array with the same length as the input samples - (1:1 mapping between weights and samples), - or in the case of temporal data, - you can pass a 2D array with shape - `(samples, sequence_length)`, - to apply a different weight to every timestep of every sample. - In this case you should make sure to specify - `sample_weight_mode="temporal"` in `compile()`. - steps: Integer or `None`. - Total number of steps (batches of samples) - before declaring the evaluation round finished. - Ignored with the default value of `None`. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during evaluation. - See [callbacks](/callbacks). - max_queue_size: Integer. Used for generator or `keras.utils.Sequence` - input only. Maximum size for the generator queue. - If unspecified, `max_queue_size` will default to 10. - workers: Integer. Used for generator or `keras.utils.Sequence` input - only. Maximum number of processes to spin up when using - process-based threading. If unspecified, `workers` will default - to 1. If 0, will execute the generator on the main thread. - use_multiprocessing: Boolean. Used for generator or - `keras.utils.Sequence` input only. If `True`, use process-based - threading. If unspecified, `use_multiprocessing` will default to - `False`. Note that because this implementation relies on - multiprocessing, you should not pass non-picklable arguments to - the generator as they can't be passed easily to children processes. - - # Raises - ValueError: in case of invalid arguments. - - # Returns - Scalar test loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - """ - - batch_size = self._validate_or_infer_batch_size(batch_size, steps, x) - - # Case 1: generator-like. Input is Python generator, or Sequence object. - if is_generator_or_sequence(x): - check_generator_arguments(y, sample_weight) - return self.evaluate_generator( - x, - steps=steps, - verbose=verbose, - callbacks=callbacks, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing) - - # Case 2: Symbolic tensors or Numpy array-like. - if x is None and y is None and steps is None: - raise ValueError('If evaluating from data tensors, ' - 'you should specify the `steps` ' - 'argument.') - # Validate user data. - x, y, sample_weights = self._standardize_user_data( - x, y, - sample_weight=sample_weight, - batch_size=batch_size) - # Prepare inputs, delegate logic to `test_loop`. - if self._uses_dynamic_learning_phase(): - ins = x + y + sample_weights + [0.] - else: - ins = x + y + sample_weights - self._make_test_function() - f = self.test_function - return training_arrays.test_loop(self, f, ins, - batch_size=batch_size, - verbose=verbose, - steps=steps, - callbacks=callbacks) - - def predict(self, x, - batch_size=None, - verbose=0, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False): - """Generates output predictions for the input samples. - - Computation is done in batches. - - # Arguments - x: Input data. It could be: - - A Numpy array (or array-like), or a list of arrays - (in case the model has multiple inputs). - - A dict mapping input names to the corresponding - array/tensors, if the model has named inputs. - - A generator or `keras.utils.Sequence` returning - `(inputs, targets)` or `(inputs, targets, sample weights)`. - - None (default) if feeding from framework-native - tensors (e.g. TensorFlow data tensors). - batch_size: Integer or `None`. - Number of samples per gradient update. - If unspecified, `batch_size` will default to 32. - Do not specify the `batch_size` is your data is in the - form of symbolic tensors, generators, or - `keras.utils.Sequence` instances (since they generate batches). - verbose: Verbosity mode, 0 or 1. - steps: Total number of steps (batches of samples) - before declaring the prediction round finished. - Ignored with the default value of `None`. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during prediction. - See [callbacks](/callbacks). - max_queue_size: Integer. Used for generator or `keras.utils.Sequence` - input only. Maximum size for the generator queue. - If unspecified, `max_queue_size` will default to 10. - workers: Integer. Used for generator or `keras.utils.Sequence` input - only. Maximum number of processes to spin up when using - process-based threading. If unspecified, `workers` will default - to 1. If 0, will execute the generator on the main thread. - use_multiprocessing: Boolean. Used for generator or - `keras.utils.Sequence` input only. If `True`, use process-based - threading. If unspecified, `use_multiprocessing` will default to - `False`. Note that because this implementation relies on - multiprocessing, you should not pass non-picklable arguments to - the generator as they can't be passed easily to children processes. - - # Returns - Numpy array(s) of predictions. - - # Raises - ValueError: In case of mismatch between the provided - input data and the model's expectations, - or in case a stateful model receives a number of samples - that is not a multiple of the batch size. - """ - - batch_size = self._validate_or_infer_batch_size(batch_size, steps, x) - - # Case 1: generator-like. Input is Python generator, or Sequence object. - if is_generator_or_sequence(x): - return self.predict_generator( - x, - steps=steps, - verbose=verbose, - callbacks=callbacks, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing) - - if x is None and steps is None: - raise ValueError('If predicting from data tensors, ' - 'you should specify the `steps` ' - 'argument.') - - # Case 2: Symbolic tensors or Numpy array-like. - x, _, _ = self._standardize_user_data(x) - if self.stateful: - if x[0].shape[0] > batch_size and x[0].shape[0] % batch_size != 0: - raise ValueError('In a stateful network, ' - 'you should only pass inputs with ' - 'a number of samples that can be ' - 'divided by the batch size. Found: ' + - str(x[0].shape[0]) + ' samples. ' - 'Batch size: ' + str(batch_size) + '.') - - # Prepare inputs, delegate logic to `predict_loop`. - if self._uses_dynamic_learning_phase(): - ins = x + [0.] - else: - ins = x - self._make_predict_function() - f = self.predict_function - return training_arrays.predict_loop(self, f, ins, - batch_size=batch_size, - verbose=verbose, - steps=steps, - callbacks=callbacks) - - def train_on_batch(self, x, y, - sample_weight=None, - class_weight=None): - """Runs a single gradient update on a single batch of data. - - # Arguments - x: Numpy array of training data, - or list of Numpy arrays if the model has multiple inputs. - If all inputs in the model are named, - you can also pass a dictionary - mapping input names to Numpy arrays. - y: Numpy array of target data, - or list of Numpy arrays if the model has multiple outputs. - If all outputs in the model are named, - you can also pass a dictionary - mapping output names to Numpy arrays. - sample_weight: Optional array of the same length as x, containing - weights to apply to the model's loss for each sample. - In the case of temporal data, you can pass a 2D array - with shape (samples, sequence_length), - to apply a different weight to every timestep of every sample. - In this case you should make sure to specify - sample_weight_mode="temporal" in compile(). - class_weight: Optional dictionary mapping - class indices (integers) to - a weight (float) to apply to the model's loss for the samples - from this class during training. - This can be useful to tell the model to "pay more attention" to - samples from an under-represented class. - - # Returns - Scalar training loss - (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - """ - x, y, sample_weights = self._standardize_user_data( - x, y, - sample_weight=sample_weight, - class_weight=class_weight) - if self._uses_dynamic_learning_phase(): - ins = x + y + sample_weights + [1.] - else: - ins = x + y + sample_weights - self._make_train_function() - outputs = self.train_function(ins) - return unpack_singleton(outputs) - - def test_on_batch(self, x, y, sample_weight=None): - """Test the model on a single batch of samples. - - # Arguments - x: Numpy array of test data, - or list of Numpy arrays if the model has multiple inputs. - If all inputs in the model are named, - you can also pass a dictionary - mapping input names to Numpy arrays. - y: Numpy array of target data, - or list of Numpy arrays if the model has multiple outputs. - If all outputs in the model are named, - you can also pass a dictionary - mapping output names to Numpy arrays. - sample_weight: Optional array of the same length as x, containing - weights to apply to the model's loss for each sample. - In the case of temporal data, you can pass a 2D array - with shape (samples, sequence_length), - to apply a different weight to every timestep of every sample. - In this case you should make sure to specify - sample_weight_mode="temporal" in compile(). - - # Returns - Scalar test loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - """ - x, y, sample_weights = self._standardize_user_data( - x, y, - sample_weight=sample_weight) - if self._uses_dynamic_learning_phase(): - ins = x + y + sample_weights + [0.] - else: - ins = x + y + sample_weights - self._make_test_function() - outputs = self.test_function(ins) - return unpack_singleton(outputs) - - def predict_on_batch(self, x): - """Returns predictions for a single batch of samples. - - # Arguments - x: Input samples, as a Numpy array. - - # Returns - Numpy array(s) of predictions. - """ - x, _, _ = self._standardize_user_data(x) - if self._uses_dynamic_learning_phase(): - ins = x + [0.] - else: - ins = x - self._make_predict_function() - outputs = self.predict_function(ins) - return unpack_singleton(outputs) - - @interfaces.legacy_generator_methods_support - def fit_generator(self, generator, - steps_per_epoch=None, - epochs=1, - verbose=1, - callbacks=None, - validation_data=None, - validation_steps=None, - validation_freq=1, - class_weight=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - shuffle=True, - initial_epoch=0): - """Trains the model on data generated batch-by-batch by a Python generator - (or an instance of `Sequence`). - - The generator is run in parallel to the model, for efficiency. - For instance, this allows you to do real-time data augmentation - on images on CPU in parallel to training your model on GPU. - - The use of `keras.utils.Sequence` guarantees the ordering - and guarantees the single use of every input per epoch when - using `use_multiprocessing=True`. - - # Arguments - generator: A generator or an instance of `Sequence` - (`keras.utils.Sequence`) object in order to avoid - duplicate data when using multiprocessing. - The output of the generator must be either - - a tuple `(inputs, targets)` - - a tuple `(inputs, targets, sample_weights)`. - This tuple (a single output of the generator) makes a single - batch. Therefore, all arrays in this tuple must have the same - length (equal to the size of this batch). Different batches may - have different sizes. For example, the last batch of the epoch - is commonly smaller than the others, if the size of the dataset - is not divisible by the batch size. - The generator is expected to loop over its data - indefinitely. An epoch finishes when `steps_per_epoch` - batches have been seen by the model. - steps_per_epoch: Integer. - Total number of steps (batches of samples) - to yield from `generator` before declaring one epoch - finished and starting the next epoch. It should typically - be equal to `ceil(num_samples / batch_size)` - Optional for `Sequence`: if unspecified, will use - the `len(generator)` as a number of steps. - epochs: Integer. Number of epochs to train the model. - An epoch is an iteration over the entire data provided, - as defined by `steps_per_epoch`. - Note that in conjunction with `initial_epoch`, - `epochs` is to be understood as "final epoch". - The model is not trained for a number of iterations - given by `epochs`, but merely until the epoch - of index `epochs` is reached. - verbose: Integer. 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = one line per epoch. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during training. - See [callbacks](/callbacks). - validation_data: This can be either - - a generator or a `Sequence` object for the validation data - - tuple `(x_val, y_val)` - - tuple `(x_val, y_val, val_sample_weights)` - on which to evaluate - the loss and any model metrics at the end of each epoch. - The model will not be trained on this data. - validation_steps: Only relevant if `validation_data` - is a generator. Total number of steps (batches of samples) - to yield from `validation_data` generator before stopping - at the end of every epoch. It should typically - be equal to the number of samples of your - validation dataset divided by the batch size. - Optional for `Sequence`: if unspecified, will use - the `len(validation_data)` as a number of steps. - validation_freq: Only relevant if validation data is provided. Integer - or `collections.Container` instance (e.g. list, tuple, etc.). If an - integer, specifies how many training epochs to run before a new - validation run is performed, e.g. `validation_freq=2` runs - validation every 2 epochs. If a Container, specifies the epochs on - which to run validation, e.g. `validation_freq=[1, 2, 10]` runs - validation at the end of the 1st, 2nd, and 10th epochs. - class_weight: Optional dictionary mapping class indices (integers) - to a weight (float) value, used for weighting the loss function - (during training only). This can be useful to tell the model to - "pay more attention" to samples - from an under-represented class. - max_queue_size: Integer. Maximum size for the generator queue. - If unspecified, `max_queue_size` will default to 10. - workers: Integer. Maximum number of processes to spin up - when using process-based threading. - If unspecified, `workers` will default to 1. If 0, will - execute the generator on the main thread. - use_multiprocessing: Boolean. - If `True`, use process-based threading. - If unspecified, `use_multiprocessing` will default to `False`. - Note that because this implementation - relies on multiprocessing, - you should not pass non-picklable arguments to the generator - as they can't be passed easily to children processes. - shuffle: Boolean. Whether to shuffle the order of the batches at - the beginning of each epoch. Only used with instances - of `Sequence` (`keras.utils.Sequence`). - Has no effect when `steps_per_epoch` is not `None`. - initial_epoch: Integer. - Epoch at which to start training - (useful for resuming a previous training run). - - # Returns - A `History` object. Its `History.history` attribute is - a record of training loss values and metrics values - at successive epochs, as well as validation loss values - and validation metrics values (if applicable). - - # Raises - ValueError: In case the generator yields data in an invalid format. - - # Example - - ```python - def generate_arrays_from_file(path): - while True: - with open(path) as f: - for line in f: - # create numpy arrays of input data - # and labels, from each line in the file - x1, x2, y = process_line(line) - yield ({'input_1': x1, 'input_2': x2}, {'output': y}) - - model.fit_generator(generate_arrays_from_file('/my_file.txt'), - steps_per_epoch=10000, epochs=10) - ``` - """ - return training_generator.fit_generator( - self, generator, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - verbose=verbose, - callbacks=callbacks, - validation_data=validation_data, - validation_steps=validation_steps, - validation_freq=validation_freq, - class_weight=class_weight, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - shuffle=shuffle, - initial_epoch=initial_epoch) - - @interfaces.legacy_generator_methods_support - def evaluate_generator(self, generator, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - verbose=0): - """Evaluates the model on a data generator. - - The generator should return the same kind of data - as accepted by `test_on_batch`. - - # Arguments - generator: Generator yielding tuples (inputs, targets) - or (inputs, targets, sample_weights) - or an instance of Sequence (keras.utils.Sequence) - object in order to avoid duplicate data - when using multiprocessing. - steps: Total number of steps (batches of samples) - to yield from `generator` before stopping. - Optional for `Sequence`: if unspecified, will use - the `len(generator)` as a number of steps. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during training. - See [callbacks](/callbacks). - max_queue_size: maximum size for the generator queue - workers: Integer. Maximum number of processes to spin up - when using process based threading. - If unspecified, `workers` will default to 1. If 0, will - execute the generator on the main thread. - use_multiprocessing: if True, use process based threading. - Note that because - this implementation relies on multiprocessing, - you should not pass - non picklable arguments to the generator - as they can't be passed - easily to children processes. - verbose: verbosity mode, 0 or 1. - - # Returns - Scalar test loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - - # Raises - ValueError: In case the generator yields - data in an invalid format. - """ - return training_generator.evaluate_generator( - self, generator, - steps=steps, - callbacks=callbacks, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - verbose=verbose) - - @interfaces.legacy_generator_methods_support - def predict_generator(self, generator, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - verbose=0): - """Generates predictions for the input samples from a data generator. - - The generator should return the same kind of data as accepted by - `predict_on_batch`. - - # Arguments - generator: Generator yielding batches of input samples - or an instance of Sequence (keras.utils.Sequence) - object in order to avoid duplicate data - when using multiprocessing. - steps: Total number of steps (batches of samples) - to yield from `generator` before stopping. - Optional for `Sequence`: if unspecified, will use - the `len(generator)` as a number of steps. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during training. - See [callbacks](/callbacks). - max_queue_size: Maximum size for the generator queue. - workers: Integer. Maximum number of processes to spin up - when using process based threading. - If unspecified, `workers` will default to 1. If 0, will - execute the generator on the main thread. - use_multiprocessing: If `True`, use process based threading. - Note that because - this implementation relies on multiprocessing, - you should not pass - non picklable arguments to the generator - as they can't be passed - easily to children processes. - verbose: verbosity mode, 0 or 1. - - # Returns - Numpy array(s) of predictions. - - # Raises - ValueError: In case the generator yields - data in an invalid format. - """ - return training_generator.predict_generator( - self, generator, - steps=steps, - callbacks=callbacks, - max_queue_size=max_queue_size, - workers=workers, - use_multiprocessing=use_multiprocessing, - verbose=verbose) -"""Part of the training engine related to plain array data (e.g. Numpy). -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from scipy.sparse import issparse - -from .training_utils import batch_shuffle -from .training_utils import check_num_samples -from .training_utils import make_batches -from .training_utils import should_run_validation -from .. import backend as K -from .. import callbacks as cbks -from ..utils.generic_utils import Progbar -from ..utils.generic_utils import slice_arrays -from ..utils.generic_utils import to_list -from ..utils.generic_utils import unpack_singleton - - -def fit_loop(model, fit_function, fit_inputs, - out_labels=None, - batch_size=None, - epochs=100, - verbose=1, - callbacks=None, - val_function=None, - val_inputs=None, - shuffle=True, - callback_metrics=None, - initial_epoch=0, - steps_per_epoch=None, - validation_steps=None, - validation_freq=1): - """Abstract fit function for `fit_function(fit_inputs)`. - - Assumes that fit_function returns a list, labeled by out_labels. - - # Arguments - model: Keras model instance. - fit_function: Keras function returning a list of tensors - fit_inputs: List of tensors to be fed to `fit_function` - out_labels: List of strings, display names of - the outputs of `fit_function` - batch_size: Integer batch size or None if unknown. - epochs: Number of times to iterate over the data - verbose: Verbosity mode, 0, 1 or 2 - callbacks: List of callbacks to be called during training and validation - (if `val_function` and `val_inputs` are not `None`). - val_function: Keras function to call for validation - val_inputs: List of tensors to be fed to `val_function` - shuffle: Whether to shuffle the data at the beginning of each epoch - callback_metrics: List of strings, the display names of the metrics - passed to the callbacks. They should be the - concatenation of list the display names of the outputs of - `fit_function` and the list of display names - of the outputs of `fit_inputs`. - initial_epoch: Epoch at which to start training - (useful for resuming a previous training run) - steps_per_epoch: Total number of steps (batches of samples) - before declaring one epoch finished and starting the - next epoch. Ignored with the default value of `None`. - validation_steps: Number of steps to run validation for - (only if doing validation from data tensors). - Ignored with the default value of `None`. - validation_freq: Only relevant if validation data is provided. Integer - or list/tuple/set. If an integer, specifies how many training - epochs to run before a new validation run is performed, e.g. - validation_freq=2` runs validation every 2 epochs. If a list, - tuple, or set, specifies the epochs on which to run validation, - e.g. `validation_freq=[1, 2, 10]` runs validation at the end - of the 1st, 2nd, and 10th epochs. - - # Returns - `History` object. - """ - do_validation = False - if val_function and val_inputs: - do_validation = True - if (verbose and fit_inputs and - hasattr(fit_inputs[0], 'shape') and hasattr(val_inputs[0], 'shape')): - print('Train on %d samples, validate on %d samples' % - (fit_inputs[0].shape[0], val_inputs[0].shape[0])) - if validation_steps: - do_validation = True - if steps_per_epoch is None: - raise ValueError('Can only use `validation_steps` ' - 'when doing step-wise ' - 'training, i.e. `steps_per_epoch` ' - 'must be set.') - elif do_validation: - if steps_per_epoch: - raise ValueError('Must specify `validation_steps` ' - 'to perform validation ' - 'when doing step-wise training.') - - num_train_samples = check_num_samples(fit_inputs, - batch_size=batch_size, - steps=steps_per_epoch, - steps_name='steps_per_epoch') - if num_train_samples is not None: - index_array = np.arange(num_train_samples) - - model.history = cbks.History() - _callbacks = [cbks.BaseLogger( - stateful_metrics=model.stateful_metric_names)] - if verbose: - if steps_per_epoch is not None: - count_mode = 'steps' - else: - count_mode = 'samples' - _callbacks.append( - cbks.ProgbarLogger( - count_mode, - stateful_metrics=model.stateful_metric_names)) - _callbacks += (callbacks or []) + [model.history] - callbacks = cbks.CallbackList(_callbacks) - out_labels = out_labels or [] - - # it's possible to callback a different model than itself - # (used by Sequential models) - callback_model = model._get_callback_model() - - callbacks.set_model(callback_model) - callbacks.set_params({ - 'batch_size': batch_size, - 'epochs': epochs, - 'steps': steps_per_epoch, - 'samples': num_train_samples, - 'verbose': verbose, - 'do_validation': do_validation, - 'metrics': callback_metrics or [], - }) - callbacks._call_begin_hook('train') - callbacks.model.stop_training = False - for cbk in callbacks: - cbk.validation_data = val_inputs - - # To prevent a slowdown, - # we find beforehand the arrays that need conversion. - feed = (model._feed_inputs + - model._feed_targets + - model._feed_sample_weights) - indices_for_conversion_to_dense = [] - for i in range(len(feed)): - if issparse(fit_inputs[i]) and not K.is_sparse(feed[i]): - indices_for_conversion_to_dense.append(i) - - for epoch in range(initial_epoch, epochs): - # Reset stateful metrics - for m in model.stateful_metric_functions: - m.reset_states() - callbacks.on_epoch_begin(epoch) - epoch_logs = {} - if steps_per_epoch is not None: - for step_index in range(steps_per_epoch): - batch_logs = {'batch': step_index, 'size': 1} - callbacks._call_batch_hook( - 'train', 'begin', step_index, batch_logs) - outs = fit_function(fit_inputs) - - outs = to_list(outs) - for l, o in zip(out_labels, outs): - batch_logs[l] = o - - callbacks._call_batch_hook( - 'train', 'end', step_index, batch_logs) - if callback_model.stop_training: - break - - if do_validation and should_run_validation(validation_freq, epoch): - val_outs = test_loop(model, val_function, val_inputs, - steps=validation_steps, - callbacks=callbacks, - verbose=0) - val_outs = to_list(val_outs) - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o - else: - if shuffle == 'batch': - index_array = batch_shuffle(index_array, batch_size) - elif shuffle: - np.random.shuffle(index_array) - - batches = make_batches(num_train_samples, batch_size) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - try: - if isinstance(fit_inputs[-1], float): - # Do not slice the training phase flag. - ins_batch = slice_arrays( - fit_inputs[:-1], batch_ids) + [fit_inputs[-1]] - else: - ins_batch = slice_arrays(fit_inputs, batch_ids) - except TypeError: - raise TypeError('TypeError while preparing batch. ' - 'If using HDF5 input data, ' - 'pass shuffle="batch".') - batch_logs = {'batch': batch_index, 'size': len(batch_ids)} - callbacks._call_batch_hook( - 'train', 'begin', batch_index, batch_logs) - for i in indices_for_conversion_to_dense: - ins_batch[i] = ins_batch[i].toarray() - - outs = fit_function(ins_batch) - outs = to_list(outs) - for l, o in zip(out_labels, outs): - batch_logs[l] = o - - callbacks._call_batch_hook( - 'train', 'end', batch_index, batch_logs) - if callbacks.model.stop_training: - break - - if batch_index == len(batches) - 1: # Last batch. - if do_validation and should_run_validation(validation_freq, epoch): - val_outs = test_loop(model, val_function, val_inputs, - batch_size=batch_size, - callbacks=callbacks, - verbose=0) - val_outs = to_list(val_outs) - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o - - callbacks.on_epoch_end(epoch, epoch_logs) - if callbacks.model.stop_training: - break - callbacks._call_end_hook('train') - return model.history - - -def predict_loop(model, f, ins, - batch_size=32, - verbose=0, - steps=None, - callbacks=None): - """Abstract method to loop over some data in batches. - - # Arguments - model: Keras model instance. - f: Keras function returning a list of tensors. - ins: list of tensors to be fed to `f`. - batch_size: integer batch size. - verbose: verbosity mode. - steps: Total number of steps (batches of samples) - before declaring `predict_loop` finished. - Ignored with the default value of `None`. - callbacks: List of callbacks or an instance of - `keras.callbacks.CallbackList` to be called during prediction. - - # Returns - Array of predictions (if the model has a single output) - or list of arrays of predictions - (if the model has multiple outputs). - """ - num_samples = check_num_samples(ins, - batch_size=batch_size, - steps=steps, - steps_name='steps') - - # Check if callbacks have not been already configured - if not isinstance(callbacks, cbks.CallbackList): - callbacks = cbks.CallbackList(callbacks) - callback_model = model._get_callback_model() - callbacks.set_model(callback_model) - callback_params = { - 'batch_size': batch_size, - 'steps': steps, - 'samples': num_samples, - 'verbose': verbose, - } - callbacks.set_params(callback_params) - - if verbose == 1: - if steps is not None: - progbar = Progbar(target=steps) - else: - progbar = Progbar(target=num_samples) - - indices_for_conversion_to_dense = [] - for i in range(len(model._feed_inputs)): - if issparse(ins[i]) and not K.is_sparse(model._feed_inputs[i]): - indices_for_conversion_to_dense.append(i) - - callbacks.model.stop_training = False - callbacks._call_begin_hook('predict') - - if steps is not None: - # Step-based predictions. - # Since we do not know how many samples - # we will see, we cannot pre-allocate - # the returned Numpy arrays. - # Instead, we store one array per batch seen - # and concatenate them upon returning. - unconcatenated_outs = [] - for step in range(steps): - batch_logs = {'batch': step, 'size': 1} - callbacks._call_batch_hook('predict', 'begin', step, batch_logs) - batch_outs = f(ins) - batch_outs = to_list(batch_outs) - if step == 0: - for batch_out in batch_outs: - unconcatenated_outs.append([]) - for i, batch_out in enumerate(batch_outs): - unconcatenated_outs[i].append(batch_out) - - batch_logs['outputs'] = batch_outs - callbacks._call_batch_hook('predict', 'end', step, batch_logs) - if verbose == 1: - progbar.update(step + 1) - callbacks.on_predict_end() - if len(unconcatenated_outs) == 1: - return np.concatenate(unconcatenated_outs[0], axis=0) - return [np.concatenate(unconcatenated_outs[i], axis=0) - for i in range(len(unconcatenated_outs))] - else: - # Sample-based predictions. - outs = [] - batches = make_batches(num_samples, batch_size) - index_array = np.arange(num_samples) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - if ins and isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - for i in indices_for_conversion_to_dense: - ins_batch[i] = ins_batch[i].toarray() - - batch_logs = {'batch': batch_index, 'size': len(batch_ids)} - callbacks._call_batch_hook( - 'predict', 'begin', batch_index, batch_logs) - batch_outs = f(ins_batch) - batch_outs = to_list(batch_outs) - if batch_index == 0: - # Pre-allocate the results arrays. - for batch_out in batch_outs: - shape = (num_samples,) + batch_out.shape[1:] - outs.append(np.zeros(shape, dtype=batch_out.dtype)) - for i, batch_out in enumerate(batch_outs): - outs[i][batch_start:batch_end] = batch_out - - batch_logs['outputs'] = batch_outs - callbacks._call_batch_hook( - 'predict', 'end', batch_index, batch_logs) - if verbose == 1: - progbar.update(batch_end) - callbacks._call_end_hook('predict') - return unpack_singleton(outs) - - -def test_loop(model, f, ins, - batch_size=None, - verbose=0, - steps=None, - callbacks=None): - """Abstract method to loop over some data in batches. - - # Arguments - model: Keras model instance. - f: Keras function returning a list of tensors. - ins: list of tensors to be fed to `f`. - batch_size: integer batch size or `None`. - verbose: verbosity mode. - steps: Total number of steps (batches of samples) - before declaring predictions finished. - Ignored with the default value of `None`. - callbacks: List of callbacks or an instance of - `keras.callbacks.CallbackList` to be called during evaluation. - - # Returns - Scalar loss (if the model has a single output and no metrics) - or list of scalars (if the model has multiple outputs - and/or metrics). The attribute `model.metrics_names` will give you - the display labels for the scalar outputs. - """ - - if hasattr(model, 'metrics'): - for m in model.stateful_metric_functions: - m.reset_states() - stateful_metric_indices = [ - i for i, name in enumerate(model.metrics_names) - if str(name) in model.stateful_metric_names] - else: - stateful_metric_indices = [] - - num_samples = check_num_samples(ins, - batch_size=batch_size, - steps=steps, - steps_name='steps') - - # Check if callbacks have not been already configured - if not isinstance(callbacks, cbks.CallbackList): - callbacks = cbks.CallbackList(callbacks) - callback_model = model._get_callback_model() - callbacks.set_model(callback_model) - callback_metrics = [] - if hasattr(model, 'metrics_names'): - callback_metrics = list(model.metrics_names) - callback_params = { - 'batch_size': batch_size, - 'steps': steps, - 'samples': num_samples, - 'verbose': verbose, - 'metrics': callback_metrics, - } - callbacks.set_params(callback_params) - - outs = [] - if verbose == 1: - if steps is not None: - progbar = Progbar(target=steps) - else: - progbar = Progbar(target=num_samples) - - # To prevent a slowdown, - # we find beforehand the arrays that need conversion. - feed = (model._feed_inputs + - model._feed_targets + - model._feed_sample_weights) - indices_for_conversion_to_dense = [] - for i in range(len(feed)): - if issparse(ins[i]) and not K.is_sparse(feed[i]): - indices_for_conversion_to_dense.append(i) - - callbacks.model.stop_training = False - callbacks._call_begin_hook('test') - - if steps is not None: - for step in range(steps): - batch_logs = {'batch': step, 'size': 1} - callbacks._call_batch_hook('test', 'begin', step, batch_logs) - batch_outs = f(ins) - if isinstance(batch_outs, list): - if step == 0: - outs.extend([0.] * len(batch_outs)) - for i, batch_out in enumerate(batch_outs): - if i in stateful_metric_indices: - outs[i] = float(batch_out) - else: - outs[i] += batch_out - else: - if step == 0: - outs.append(0.) - outs[0] += batch_outs - - if hasattr(model, 'metrics_names'): - for l, o in zip(model.metrics_names, batch_outs): - batch_logs[l] = o - callbacks._call_batch_hook('test', 'end', step, batch_logs) - - if verbose == 1: - progbar.update(step + 1) - for i in range(len(outs)): - if i not in stateful_metric_indices: - outs[i] /= steps - else: - batches = make_batches(num_samples, batch_size) - index_array = np.arange(num_samples) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - if isinstance(ins[-1], float): - # Do not slice the training phase flag. - ins_batch = slice_arrays(ins[:-1], batch_ids) + [ins[-1]] - else: - ins_batch = slice_arrays(ins, batch_ids) - for i in indices_for_conversion_to_dense: - ins_batch[i] = ins_batch[i].toarray() - - batch_logs = {'batch': batch_index, 'size': len(batch_ids)} - callbacks._call_batch_hook( - 'test', 'begin', batch_index, batch_logs) - batch_outs = f(ins_batch) - if isinstance(batch_outs, list): - if batch_index == 0: - outs.extend([0.] * len(batch_outs)) - for i, batch_out in enumerate(batch_outs): - if i in stateful_metric_indices: - outs[i] = batch_out - else: - outs[i] += batch_out * len(batch_ids) - else: - if batch_index == 0: - outs.append(0.) - outs[0] += batch_outs * len(batch_ids) - - if hasattr(model, 'metrics_names'): - for l, o in zip(model.metrics_names, batch_outs): - batch_logs[l] = o - callbacks._call_batch_hook('test', 'end', batch_index, batch_logs) - - if verbose == 1: - progbar.update(batch_end) - for i in range(len(outs)): - if i not in stateful_metric_indices: - outs[i] /= num_samples - callbacks._call_end_hook('test') - return unpack_singleton(outs) -"""Part of the training engine related to Python generators of array data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import warnings -import numpy as np - -from .training_utils import is_sequence -from .training_utils import iter_sequence_infinite -from .training_utils import should_run_validation -from .. import backend as K -from ..utils.data_utils import Sequence -from ..utils.data_utils import GeneratorEnqueuer -from ..utils.data_utils import OrderedEnqueuer -from ..utils.generic_utils import Progbar -from ..utils.generic_utils import to_list -from ..utils.generic_utils import unpack_singleton -from .. import callbacks as cbks - - -def fit_generator(model, - generator, - steps_per_epoch=None, - epochs=1, - verbose=1, - callbacks=None, - validation_data=None, - validation_steps=None, - validation_freq=1, - class_weight=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - shuffle=True, - initial_epoch=0): - """See docstring for `Model.fit_generator`.""" - epoch = initial_epoch - - do_validation = bool(validation_data) - model._make_train_function() - if do_validation: - model._make_test_function() - - use_sequence_api = is_sequence(generator) - if not use_sequence_api and use_multiprocessing and workers > 1: - warnings.warn( - UserWarning('Using a generator with `use_multiprocessing=True`' - ' and multiple workers may duplicate your data.' - ' Please consider using the `keras.utils.Sequence' - ' class.')) - if steps_per_epoch is None: - if use_sequence_api: - steps_per_epoch = len(generator) - else: - raise ValueError('`steps_per_epoch=None` is only valid for a' - ' generator based on the ' - '`keras.utils.Sequence`' - ' class. Please specify `steps_per_epoch` ' - 'or use the `keras.utils.Sequence` class.') - - # python 2 has 'next', 3 has '__next__' - # avoid any explicit version checks - val_use_sequence_api = is_sequence(validation_data) - val_gen = (hasattr(validation_data, 'next') or - hasattr(validation_data, '__next__') or - val_use_sequence_api) - if (val_gen and not val_use_sequence_api and - not validation_steps): - raise ValueError('`validation_steps=None` is only valid for a' - ' generator based on the `keras.utils.Sequence`' - ' class. Please specify `validation_steps` or use' - ' the `keras.utils.Sequence` class.') - - # Prepare display labels. - out_labels = model.metrics_names - callback_metrics = out_labels + ['val_' + n for n in out_labels] - - # prepare callbacks - model.history = cbks.History() - _callbacks = [cbks.BaseLogger( - stateful_metrics=model.stateful_metric_names)] - if verbose: - _callbacks.append( - cbks.ProgbarLogger( - count_mode='steps', - stateful_metrics=model.stateful_metric_names)) - _callbacks += (callbacks or []) + [model.history] - callbacks = cbks.CallbackList(_callbacks) - - # it's possible to callback a different model than self: - callback_model = model._get_callback_model() - - callbacks.set_model(callback_model) - callbacks.set_params({ - 'epochs': epochs, - 'steps': steps_per_epoch, - 'verbose': verbose, - 'do_validation': do_validation, - 'metrics': callback_metrics, - }) - callbacks._call_begin_hook('train') - - enqueuer = None - val_enqueuer = None - - try: - if do_validation: - if val_gen and workers > 0: - # Create an Enqueuer that can be reused - val_data = validation_data - if is_sequence(val_data): - val_enqueuer = OrderedEnqueuer( - val_data, - use_multiprocessing=use_multiprocessing) - validation_steps = validation_steps or len(val_data) - else: - val_enqueuer = GeneratorEnqueuer( - val_data, - use_multiprocessing=use_multiprocessing) - val_enqueuer.start(workers=workers, - max_queue_size=max_queue_size) - val_enqueuer_gen = val_enqueuer.get() - elif val_gen: - val_data = validation_data - if is_sequence(val_data): - val_enqueuer_gen = iter_sequence_infinite(val_data) - validation_steps = validation_steps or len(val_data) - else: - val_enqueuer_gen = val_data - else: - # Prepare data for validation - if len(validation_data) == 2: - val_x, val_y = validation_data - val_sample_weight = None - elif len(validation_data) == 3: - val_x, val_y, val_sample_weight = validation_data - else: - raise ValueError('`validation_data` should be a tuple ' - '`(val_x, val_y, val_sample_weight)` ' - 'or `(val_x, val_y)`. Found: ' + - str(validation_data)) - val_x, val_y, val_sample_weights = model._standardize_user_data( - val_x, val_y, val_sample_weight) - val_data = val_x + val_y + val_sample_weights - if model.uses_learning_phase and not isinstance(K.learning_phase(), - int): - val_data += [0.] - for cbk in callbacks: - cbk.validation_data = val_data - - if workers > 0: - if use_sequence_api: - enqueuer = OrderedEnqueuer( - generator, - use_multiprocessing=use_multiprocessing, - shuffle=shuffle) - else: - enqueuer = GeneratorEnqueuer( - generator, - use_multiprocessing=use_multiprocessing) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - output_generator = enqueuer.get() - else: - if use_sequence_api: - output_generator = iter_sequence_infinite(generator) - else: - output_generator = generator - - callbacks.model.stop_training = False - # Construct epoch logs. - epoch_logs = {} - while epoch < epochs: - for m in model.stateful_metric_functions: - m.reset_states() - callbacks.on_epoch_begin(epoch) - steps_done = 0 - batch_index = 0 - while steps_done < steps_per_epoch: - generator_output = next(output_generator) - - if not hasattr(generator_output, '__len__'): - raise ValueError('Output of generator should be ' - 'a tuple `(x, y, sample_weight)` ' - 'or `(x, y)`. Found: ' + - str(generator_output)) - - if len(generator_output) == 2: - x, y = generator_output - sample_weight = None - elif len(generator_output) == 3: - x, y, sample_weight = generator_output - else: - raise ValueError('Output of generator should be ' - 'a tuple `(x, y, sample_weight)` ' - 'or `(x, y)`. Found: ' + - str(generator_output)) - if x is None or len(x) == 0: - # Handle data tensors support when no input given - # step-size = 1 for data tensors - batch_size = 1 - elif isinstance(x, list): - batch_size = x[0].shape[0] - elif isinstance(x, dict): - batch_size = list(x.values())[0].shape[0] - else: - batch_size = x.shape[0] - # build batch logs - batch_logs = {'batch': batch_index, 'size': batch_size} - callbacks.on_batch_begin(batch_index, batch_logs) - - outs = model.train_on_batch(x, y, - sample_weight=sample_weight, - class_weight=class_weight) - - outs = to_list(outs) - for l, o in zip(out_labels, outs): - batch_logs[l] = o - - callbacks._call_batch_hook( - 'train', 'end', batch_index, batch_logs) - - batch_index += 1 - steps_done += 1 - - # Epoch finished. - if (steps_done >= steps_per_epoch and - do_validation and - should_run_validation(validation_freq, epoch)): - # Note that `callbacks` here is an instance of - # `keras.callbacks.CallbackList` - if val_gen: - val_outs = model.evaluate_generator( - val_enqueuer_gen, - validation_steps, - callbacks=callbacks, - workers=0) - else: - # No need for try/except because - # data has already been validated. - val_outs = model.evaluate( - val_x, val_y, - batch_size=batch_size, - sample_weight=val_sample_weights, - callbacks=callbacks, - verbose=0) - val_outs = to_list(val_outs) - # Same labels assumed. - for l, o in zip(out_labels, val_outs): - epoch_logs['val_' + l] = o - - if callbacks.model.stop_training: - break - - callbacks.on_epoch_end(epoch, epoch_logs) - epoch += 1 - if callbacks.model.stop_training: - break - - finally: - try: - if enqueuer is not None: - enqueuer.stop() - finally: - if val_enqueuer is not None: - val_enqueuer.stop() - - callbacks._call_end_hook('train') - return model.history - - -def evaluate_generator(model, generator, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - verbose=0): - """See docstring for `Model.evaluate_generator`.""" - model._make_test_function() - - if hasattr(model, 'metrics'): - for m in model.stateful_metric_functions: - m.reset_states() - stateful_metric_indices = [ - i for i, name in enumerate(model.metrics_names) - if str(name) in model.stateful_metric_names] - else: - stateful_metric_indices = [] - - steps_done = 0 - outs_per_batch = [] - batch_sizes = [] - use_sequence_api = is_sequence(generator) - if not use_sequence_api and use_multiprocessing and workers > 1: - warnings.warn( - UserWarning('Using a generator with `use_multiprocessing=True`' - ' and multiple workers may duplicate your data.' - ' Please consider using the `keras.utils.Sequence' - ' class.')) - if steps is None: - if use_sequence_api: - steps = len(generator) - else: - raise ValueError('`steps=None` is only valid for a generator' - ' based on the `keras.utils.Sequence` class.' - ' Please specify `steps` or use the' - ' `keras.utils.Sequence` class.') - enqueuer = None - - # Check if callbacks have not been already configured - if not isinstance(callbacks, cbks.CallbackList): - callbacks = cbks.CallbackList(callbacks) - callback_model = model._get_callback_model() - callbacks.set_model(callback_model) - callback_metrics = [] - if hasattr(model, 'metrics_names'): - callback_metrics = list(model.metrics_names) - callback_params = { - 'steps': steps, - 'verbose': verbose, - 'metrics': callback_metrics, - } - callbacks.set_params(callback_params) - - callbacks.model.stop_training = False - callbacks._call_begin_hook('test') - - try: - if workers > 0: - if use_sequence_api: - enqueuer = OrderedEnqueuer( - generator, - use_multiprocessing=use_multiprocessing) - else: - enqueuer = GeneratorEnqueuer( - generator, - use_multiprocessing=use_multiprocessing) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - output_generator = enqueuer.get() - else: - if use_sequence_api: - output_generator = iter_sequence_infinite(generator) - else: - output_generator = generator - - if verbose == 1: - progbar = Progbar(target=steps) - - while steps_done < steps: - generator_output = next(output_generator) - if not hasattr(generator_output, '__len__'): - raise ValueError('Output of generator should be a tuple ' - '(x, y, sample_weight) ' - 'or (x, y). Found: ' + - str(generator_output)) - if len(generator_output) == 2: - x, y = generator_output - sample_weight = None - elif len(generator_output) == 3: - x, y, sample_weight = generator_output - else: - raise ValueError('Output of generator should be a tuple ' - '(x, y, sample_weight) ' - 'or (x, y). Found: ' + - str(generator_output)) - - if x is None or len(x) == 0: - # Handle data tensors support when no input given - # step-size = 1 for data tensors - batch_size = 1 - elif isinstance(x, list): - batch_size = x[0].shape[0] - elif isinstance(x, dict): - batch_size = list(x.values())[0].shape[0] - else: - batch_size = x.shape[0] - if batch_size == 0: - raise ValueError('Received an empty batch. ' - 'Batches should contain ' - 'at least one item.') - - batch_logs = {'batch': steps_done, 'size': batch_size} - callbacks._call_batch_hook('test', 'begin', steps_done, batch_logs) - outs = model.test_on_batch(x, y, sample_weight=sample_weight) - outs = to_list(outs) - outs_per_batch.append(outs) - - if hasattr(model, 'metrics_names'): - for l, o in zip(model.metrics_names, outs): - batch_logs[l] = o - callbacks._call_batch_hook('test', 'end', steps_done, batch_logs) - - steps_done += 1 - batch_sizes.append(batch_size) - - if verbose == 1: - progbar.update(steps_done) - callbacks._call_end_hook('test') - - finally: - if enqueuer is not None: - enqueuer.stop() - - averages = [] - for i in range(len(outs)): - if i not in stateful_metric_indices: - averages.append(np.average([out[i] for out in outs_per_batch], - weights=batch_sizes)) - else: - averages.append(np.float64(outs_per_batch[-1][i])) - return unpack_singleton(averages) - - -def predict_generator(model, generator, - steps=None, - callbacks=None, - max_queue_size=10, - workers=1, - use_multiprocessing=False, - verbose=0): - """See docstring for `Model.predict_generator`.""" - model._make_predict_function() - - steps_done = 0 - all_outs = [] - use_sequence_api = is_sequence(generator) - if not use_sequence_api and use_multiprocessing and workers > 1: - warnings.warn( - UserWarning('Using a generator with `use_multiprocessing=True`' - ' and multiple workers may duplicate your data.' - ' Please consider using the `keras.utils.Sequence' - ' class.')) - if steps is None: - if use_sequence_api: - steps = len(generator) - else: - raise ValueError('`steps=None` is only valid for a generator' - ' based on the `keras.utils.Sequence` class.' - ' Please specify `steps` or use the' - ' `keras.utils.Sequence` class.') - enqueuer = None - - # Check if callbacks have not been already configured - if not isinstance(callbacks, cbks.CallbackList): - callbacks = cbks.CallbackList(callbacks) - callback_model = model._get_callback_model() - callbacks.set_model(callback_model) - callback_params = { - 'steps': steps, - 'verbose': verbose, - } - callbacks.set_params(callback_params) - - callbacks.model.stop_training = False - callbacks._call_begin_hook('predict') - - try: - if workers > 0: - if use_sequence_api: - enqueuer = OrderedEnqueuer( - generator, - use_multiprocessing=use_multiprocessing) - else: - enqueuer = GeneratorEnqueuer( - generator, - use_multiprocessing=use_multiprocessing) - enqueuer.start(workers=workers, max_queue_size=max_queue_size) - output_generator = enqueuer.get() - else: - if use_sequence_api: - output_generator = iter_sequence_infinite(generator) - else: - output_generator = generator - - if verbose == 1: - progbar = Progbar(target=steps) - - while steps_done < steps: - generator_output = next(output_generator) - if isinstance(generator_output, tuple): - # Compatibility with the generators - # used for training. - if len(generator_output) == 2: - x, _ = generator_output - elif len(generator_output) == 3: - x, _, _ = generator_output - else: - raise ValueError('Output of generator should be ' - 'a tuple `(x, y, sample_weight)` ' - 'or `(x, y)`. Found: ' + - str(generator_output)) - else: - # Assumes a generator that only - # yields inputs (not targets and sample weights). - x = generator_output - - if x is None or len(x) == 0: - # Handle data tensors support when no input given - # step-size = 1 for data tensors - batch_size = 1 - elif isinstance(x, list): - batch_size = x[0].shape[0] - elif isinstance(x, dict): - batch_size = list(x.values())[0].shape[0] - else: - batch_size = x.shape[0] - if batch_size == 0: - raise ValueError('Received an empty batch. ' - 'Batches should contain ' - 'at least one item.') - - batch_logs = {'batch': steps_done, 'size': batch_size} - callbacks._call_batch_hook( - 'predict', 'begin', steps_done, batch_logs) - - outs = model.predict_on_batch(x) - outs = to_list(outs) - - if not all_outs: - for out in outs: - all_outs.append([]) - - for i, out in enumerate(outs): - all_outs[i].append(out) - - batch_logs['outputs'] = outs - callbacks._call_batch_hook( - 'predict', 'end', steps_done, batch_logs) - - steps_done += 1 - if verbose == 1: - progbar.update(steps_done) - callbacks._call_end_hook('predict') - finally: - if enqueuer is not None: - enqueuer.stop() - - if len(all_outs) == 1: - if steps_done == 1: - return all_outs[0][0] - else: - return np.concatenate(all_outs[0]) - if steps_done == 1: - return [out[0] for out in all_outs] - else: - return [np.concatenate(out) for out in all_outs] -"""Training-related utilities. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import inspect -import collections -import copy -import numpy as np -import warnings - -from .. import backend as K -from .. import losses -from ..utils import Sequence -from ..utils.generic_utils import to_list - - -def standardize_single_array(x): - if x is None: - return None - elif K.is_tensor(x): - shape = K.int_shape(x) - if shape is None or shape[0] is None: - raise ValueError( - 'When feeding symbolic tensors to a model, we expect the ' - 'tensors to have a static batch size. ' - 'Got tensor with shape: %s' % str(shape)) - return x - elif x.ndim == 1: - x = np.expand_dims(x, 1) - return x - - -def standardize_input_data(data, - names, - shapes=None, - check_batch_axis=True, - exception_prefix=''): - """Normalizes inputs and targets provided by users. - - Users may pass data as a list of arrays, dictionary of arrays, - or as a single array. We normalize this to an ordered list of - arrays (same order as `names`), while checking that the provided - arrays have shapes that match the network's expectations. - - # Arguments - data: User-provided input data (polymorphic). - names: List of expected array names. - shapes: Optional list of expected array shapes. - check_batch_axis: Boolean; whether to check that - the batch axis of the arrays matches the expected - value found in `shapes`. - exception_prefix: String prefix used for exception formatting. - - # Returns - List of standardized input arrays (one array per model input). - - # Raises - ValueError: in case of improperly formatted user-provided data. - """ - if not names: - if data is not None and hasattr(data, '__len__') and len(data): - raise ValueError('Error when checking model ' + - exception_prefix + ': ' - 'expected no data, but got:', data) - return [] - if data is None: - return [None for _ in range(len(names))] - - if isinstance(data, dict): - try: - data = [ - data[x].values - if data[x].__class__.__name__ == 'DataFrame' else data[x] - for x in names - ] - except KeyError as e: - raise ValueError('No data provided for "' + e.args[0] + - '". Need data ' - 'for each key in: ' + str(names)) - elif isinstance(data, list): - if isinstance(data[0], list): - data = [np.asarray(d) for d in data] - elif len(names) == 1 and isinstance(data[0], (float, int)): - data = [np.asarray(data)] - else: - data = [ - x.values if x.__class__.__name__ == 'DataFrame' - else x for x in data - ] - else: - data = data.values if data.__class__.__name__ == 'DataFrame' else data - data = [data] - data = [standardize_single_array(x) for x in data] - - if len(data) != len(names): - if data and hasattr(data[0], 'shape'): - raise ValueError( - 'Error when checking model ' + exception_prefix + - ': the list of Numpy arrays that you are passing to ' - 'your model is not the size the model expected. ' - 'Expected to see ' + str(len(names)) + ' array(s), ' - 'but instead got the following list of ' + - str(len(data)) + ' arrays: ' + str(data)[:200] + '...') - elif len(names) > 1: - raise ValueError( - 'Error when checking model ' + exception_prefix + - ': you are passing a list as input to your model, ' - 'but the model expects a list of ' + str(len(names)) + - ' Numpy arrays instead. ' - 'The list you passed was: ' + str(data)[:200]) - elif len(data) == 1 and not hasattr(data[0], 'shape'): - raise TypeError('Error when checking model ' + exception_prefix + - ': data should be a Numpy array, or list/dict of ' - 'Numpy arrays. Found: ' + str(data)[:200] + '...') - elif len(names) == 1: - data = [np.asarray(data)] - - # Check shapes compatibility. - if shapes: - for i in range(len(names)): - if shapes[i] is not None and not K.is_tensor(data[i]): - data_shape = data[i].shape - shape = shapes[i] - if data[i].ndim != len(shape): - raise ValueError( - 'Error when checking ' + exception_prefix + - ': expected ' + names[i] + ' to have ' + - str(len(shape)) + ' dimensions, but got array ' - 'with shape ' + str(data_shape)) - if not check_batch_axis: - data_shape = data_shape[1:] - shape = shape[1:] - for dim, ref_dim in zip(data_shape, shape): - if ref_dim != dim and ref_dim: - raise ValueError( - 'Error when checking ' + exception_prefix + - ': expected ' + names[i] + ' to have shape ' + - str(shape) + ' but got array with shape ' + - str(data_shape)) - return data - - -def standardize_sample_or_class_weights(x_weight, - output_names, - weight_type): - """Maps `sample_weight` or `class_weight` to model outputs. - - # Arguments - x_weight: User-provided `sample_weight` or `class_weight` argument. - output_names: List of output names (strings) in the model. - weight_type: A string used purely for exception printing. - - # Returns - A list of `sample_weight` or `class_weight` where there are exactly - one element per model output. - - # Raises - ValueError: In case of invalid user-provided argument. - """ - if x_weight is None or len(x_weight) == 0: - return [None for _ in output_names] - if len(output_names) == 1: - if isinstance(x_weight, list) and len(x_weight) == 1: - return x_weight - if isinstance(x_weight, dict) and output_names[0] in x_weight: - return [x_weight[output_names[0]]] - else: - return [x_weight] - if isinstance(x_weight, list): - if len(x_weight) != len(output_names): - raise ValueError('Provided `' + weight_type + '` was a list of ' + - str(len(x_weight)) + - ' elements, but the model has ' + - str(len(output_names)) + ' outputs. ' - 'You should provide one `' + weight_type + '`' - 'array per model output.') - return x_weight - if isinstance(x_weight, dict): - x_weights = [] - for name in output_names: - x_weights.append(x_weight.get(name)) - return x_weights - else: - raise TypeError('The model has multiple outputs, so `' + - weight_type + '` ' - 'should be either a list or a dict. ' - 'Provided `' + weight_type + - '` type not understood: ' + - str(x_weight)) - - -def standardize_class_weights(class_weight, output_names): - return standardize_sample_or_class_weights(class_weight, - output_names, - 'class_weight') - - -def standardize_sample_weights(sample_weight, output_names): - return standardize_sample_or_class_weights(sample_weight, - output_names, - 'sample_weight') - - -def check_array_length_consistency(inputs, targets, weights=None): - """Checks if batch axes are the same for Numpy arrays. - - # Arguments - inputs: list of Numpy arrays of inputs. - targets: list of Numpy arrays of targets. - weights: list of Numpy arrays of sample weights. - - # Raises - ValueError: in case of incorrectly formatted data. - """ - def set_of_lengths(x): - # return a set with the variation between - # different shapes, with None => 0 - if x is None: - return {0} - else: - return set([0 if y is None else int(y.shape[0]) for y in x]) - - set_x = set_of_lengths(inputs) - set_y = set_of_lengths(targets) - set_w = set_of_lengths(weights) - if len(set_x) > 1: - raise ValueError('All input arrays (x) should have ' - 'the same number of samples. Got array shapes: ' + - str([x.shape for x in inputs])) - if len(set_y) > 1: - raise ValueError('All target arrays (y) should have ' - 'the same number of samples. Got array shapes: ' + - str([y.shape for y in targets])) - if set_x and set_y and list(set_x)[0] != list(set_y)[0]: - raise ValueError('Input arrays should have ' - 'the same number of samples as target arrays. ' - 'Found ' + str(list(set_x)[0]) + ' input samples ' - 'and ' + str(list(set_y)[0]) + ' target samples.') - if len(set_w) > 1: - raise ValueError('All sample_weight arrays should have ' - 'the same number of samples. Got array shapes: ' + - str([w.shape for w in weights])) - if set_y and set_w and list(set_y)[0] != list(set_w)[0]: - raise ValueError('Sample_weight arrays should have ' - 'the same number of samples as target arrays. Got ' + - str(list(set_y)[0]) + ' input samples and ' + - str(list(set_w)[0]) + ' target samples.') - - -def check_loss_and_target_compatibility(targets, loss_fns, output_shapes): - """Does validation on the compatibility of targets and loss functions. - - This helps prevent users from using loss functions incorrectly. - - # Arguments - targets: list of Numpy arrays of targets. - loss_fns: list of loss functions. - output_shapes: list of shapes of model outputs. - - # Raises - ValueError: if a loss function or target array - is incompatible with an output. - """ - key_losses = {losses.mean_squared_error, - losses.binary_crossentropy, - losses.categorical_crossentropy} - for y, loss, shape in zip(targets, loss_fns, output_shapes): - if y is None or loss is None: - continue - if loss is losses.categorical_crossentropy: - if y.shape[-1] == 1: - raise ValueError( - 'You are passing a target array of shape ' + str(y.shape) + - ' while using as loss `categorical_crossentropy`. ' - '`categorical_crossentropy` expects ' - 'targets to be binary matrices (1s and 0s) ' - 'of shape (samples, classes). ' - 'If your targets are integer classes, ' - 'you can convert them to the expected format via:\n' - '```\n' - 'from keras.utils import to_categorical\n' - 'y_binary = to_categorical(y_int)\n' - '```\n' - '\n' - 'Alternatively, you can use the loss function ' - '`sparse_categorical_crossentropy` instead, ' - 'which does expect integer targets.') - if loss in key_losses: - for target_dim, out_dim in zip(y.shape[1:], shape[1:]): - if out_dim is not None and target_dim != out_dim: - raise ValueError( - 'A target array with shape ' + str(y.shape) + - ' was passed for an output of shape ' + str(shape) + - ' while using as loss `' + loss.__name__ + '`. ' - 'This loss expects ' - 'targets to have the same shape ' - 'as the output.') - - -def check_generator_arguments(y=None, sample_weight=None, - validation_split=None): - """Validates arguments passed when using a generator.""" - if y is not None: - raise ValueError('`y` argument is not supported when data is' - 'a generator or Sequence instance. Instead pass targets' - ' as the second element of the generator.') - if sample_weight is not None: - raise ValueError('`sample_weight` argument is not supported when data is' - 'a generator or Sequence instance. Instead pass sample' - ' weights as the third element of the generator.') - if validation_split: - raise ValueError('If your data is in the form of a Python generator, ' - 'you cannot use `validation_split`.') - - -def collect_metrics(metrics, output_names): - """Maps metric functions to model outputs. - - # Arguments - metrics: a list or dict of metric functions. - output_names: a list of the names (strings) of model outputs. - - # Returns - A list (one entry per model output) of lists of metric functions. - For instance, if the model has 2 outputs, and for the first output - we want to compute "binary_accuracy" and "binary_crossentropy", - and just "binary_accuracy" for the second output, - the list would look like: - `[[binary_accuracy, binary_crossentropy], [binary_accuracy]]` - - # Raises - TypeError: if an incorrect type is passed for the `metrics` argument. - """ - if not metrics: - return [[] for _ in output_names] - if isinstance(metrics, list): - # we then apply all metrics to all outputs. - return [copy.copy(metrics) for _ in output_names] - elif isinstance(metrics, dict): - nested_metrics = [] - if not set(metrics.keys()).issubset(set(output_names)): - unknown_output_names = list( - set(metrics.keys()) - set(output_names)) - warnings.warn('Invalid layer name for metric computations: ' - '{}. Available names are {}.' - .format(unknown_output_names, output_names)) - for name in output_names: - output_metrics = metrics.get(name, []) - output_metrics = to_list(output_metrics) - nested_metrics.append(output_metrics) - return nested_metrics - else: - raise TypeError('Type of `metrics` argument not understood. ' - 'Expected a list or dictionary, found: ' + - str(metrics)) - - -def batch_shuffle(index_array, batch_size): - """Shuffles an array in a batch-wise fashion. - - Useful for shuffling HDF5 arrays - (where one cannot access arbitrary indices). - - # Arguments - index_array: array of indices to be shuffled. - batch_size: integer. - - # Returns - The `index_array` array, shuffled in a batch-wise fashion. - """ - batch_count = int(len(index_array) / batch_size) - # to reshape we need to be cleanly divisible by batch size - # we stash extra items and reappend them after shuffling - last_batch = index_array[batch_count * batch_size:] - index_array = index_array[:batch_count * batch_size] - index_array = index_array.reshape((batch_count, batch_size)) - np.random.shuffle(index_array) - index_array = index_array.flatten() - return np.append(index_array, last_batch) - - -def make_batches(size, batch_size): - """Returns a list of batch indices (tuples of indices). - - # Arguments - size: Integer, total size of the data to slice into batches. - batch_size: Integer, batch size. - - # Returns - A list of tuples of array indices. - """ - num_batches = (size + batch_size - 1) // batch_size # round up - return [(i * batch_size, min(size, (i + 1) * batch_size)) - for i in range(num_batches)] - - -def weighted_masked_objective(fn): - """Adds support for masking and sample-weighting to an objective function. - - It transforms an objective function `fn(y_true, y_pred)` - into a sample-weighted, cost-masked objective function - `fn(y_true, y_pred, weights, mask)`. - - # Arguments - fn: The objective function to wrap, - with signature `fn(y_true, y_pred)`. - - # Returns - A function with signature `fn(y_true, y_pred, weights, mask)`. - """ - if fn is None: - return None - - def weighted(y_true, y_pred, weights, mask=None): - """Wrapper function. - - # Arguments - y_true: `y_true` argument of `fn`. - y_pred: `y_pred` argument of `fn`. - weights: Weights tensor. - mask: Mask tensor. - - # Returns - Scalar tensor. - """ - # score_array has ndim >= 2 - score_array = fn(y_true, y_pred) - if mask is not None: - # Cast the mask to floatX to avoid float64 upcasting in Theano - mask = K.cast(mask, K.floatx()) - # mask should have the same shape as score_array - score_array *= mask - # the loss per batch should be proportional - # to the number of unmasked samples. - score_array /= K.mean(mask) + K.epsilon() - - # apply sample weighting - if weights is not None: - # reduce score_array to same ndim as weight array - ndim = K.ndim(score_array) - weight_ndim = K.ndim(weights) - score_array = K.mean(score_array, - axis=list(range(weight_ndim, ndim))) - score_array *= weights - score_array /= K.mean(K.cast(K.not_equal(weights, 0), K.floatx())) - return K.mean(score_array) - return weighted - - -def standardize_weights(y, - sample_weight=None, - class_weight=None, - sample_weight_mode=None): - """Performs sample weight validation and standardization. - - Everything gets normalized to a single sample-wise (or timestep-wise) - weight array. If both `sample_weights` and `class_weights` are provided, - the weights are multiplied together. - - # Arguments - y: Numpy array of model targets to be weighted. - sample_weight: User-provided `sample_weight` argument. - class_weight: User-provided `class_weight` argument. - sample_weight_mode: One of `None` or `"temporal"`. - `"temporal"` indicated that we expect 2D weight data - that will be applied to the last 2 dimensions of - the targets (i.e. we are weighting timesteps, not samples). - - # Returns - A Numpy array of target weights, one entry per sample to weight. - - # Raises - ValueError: In case of invalid user-provided arguments. - """ - if sample_weight_mode is not None: - if sample_weight_mode != 'temporal': - raise ValueError('"sample_weight_mode ' - 'should be None or "temporal". ' - 'Found: ' + str(sample_weight_mode)) - if len(y.shape) < 3: - raise ValueError('Found a sample_weight array for ' - 'an input with shape ' + - str(y.shape) + '. ' - 'Timestep-wise sample weighting (use of ' - 'sample_weight_mode="temporal") is restricted to ' - 'outputs that are at least 3D, i.e. that have ' - 'a time dimension.') - if sample_weight is not None and len(sample_weight.shape) != 2: - raise ValueError('Found a sample_weight array with shape ' + - str(sample_weight.shape) + '. ' - 'In order to use timestep-wise sample weighting, ' - 'you should pass a 2D sample_weight array.') - else: - if sample_weight is not None and len(sample_weight.shape) != 1: - raise ValueError('Found a sample_weight array with shape ' + - str(sample_weight.shape) + '. ' - 'In order to use timestep-wise sample weights, ' - 'you should specify ' - 'sample_weight_mode="temporal" ' - 'in compile(). If you just mean to use ' - 'sample-wise weights, make sure your ' - 'sample_weight array is 1D.') - - if sample_weight is not None: - if len(sample_weight.shape) > len(y.shape): - raise ValueError('Found a sample_weight with shape' + - str(sample_weight.shape) + '.' - 'Expected sample_weight with rank ' - 'less than or equal to ' + str(len(y.shape))) - - if y.shape[:sample_weight.ndim] != sample_weight.shape: - raise ValueError('Found a sample_weight array with shape ' + - str(sample_weight.shape) + - ' for an input with shape ' + - str(y.shape) + '. ' - 'sample_weight cannot be broadcast.') - - class_sample_weight = None - if isinstance(class_weight, dict): - if len(y.shape) > 2: - raise ValueError('`class_weight` not supported for ' - '3+ dimensional targets.') - if len(y.shape) == 2: - if y.shape[1] > 1: - y_classes = np.argmax(y, axis=1) - elif y.shape[1] == 1: - y_classes = np.reshape(y, y.shape[0]) - else: - y_classes = y - - class_sample_weight = np.asarray( - [class_weight[cls] for cls in y_classes if cls in class_weight]) - - if len(class_sample_weight) != len(y_classes): - # subtract the sets to pick all missing classes - existing_classes = set(y_classes) - existing_class_weight = set(class_weight.keys()) - raise ValueError('`class_weight` must contain ' - 'all classes in the data.' - ' The classes %s exist in the data but not in ' - '`class_weight`.' - % (existing_classes - existing_class_weight)) - - if sample_weight is not None and class_sample_weight is not None: - return sample_weight * class_sample_weight - if sample_weight is not None: - return sample_weight - if class_sample_weight is not None: - return class_sample_weight - - # Everything has weight 1 by default. - if sample_weight_mode is None: - return np.ones((y.shape[0],), dtype=K.floatx()) - else: - return np.ones((y.shape[0], y.shape[1]), dtype=K.floatx()) - - -def check_num_samples(ins, - batch_size=None, - steps=None, - steps_name='steps'): - """Checks the number of samples provided for training and evaluation. - - The number of samples is not defined when running with `steps`, - in which case the number of samples is set to `None`. - - # Arguments - ins: List of tensors to be fed to the Keras function. - batch_size: Integer batch size or `None` if not defined. - steps: Total number of steps (batches of samples) - before declaring `predict_loop` finished. - Ignored with the default value of `None`. - steps_name: The public API's parameter name for `steps`. - - # Raises - ValueError: when `steps` is `None` and the attribute `ins.shape` - does not exist. Also raises ValueError when `steps` is not `None` - and `batch_size` is not `None` because they are mutually - exclusive. - - # Returns - When `steps` is `None`, returns the number of samples to be - processed based on the size of the first dimension of the - first input Numpy array. When `steps` is not `None` and - `batch_size` is `None`, returns `None`. - - # Raises - ValueError: In case of invalid arguments. - """ - if steps is not None and batch_size is not None: - raise ValueError( - 'If ' + steps_name + ' is set, the `batch_size` must be None.') - - if not ins or any(K.is_tensor(x) for x in ins): - if steps is None: - raise ValueError( - 'If your data is in the form of symbolic tensors, ' - 'you should specify the `' + steps_name + '` argument ' - '(instead of the `batch_size` argument, ' - 'because symbolic tensors are expected to produce ' - 'batches of input data).') - return None - - if hasattr(ins[0], 'shape'): - return int(ins[0].shape[0]) - return None # Edge case where ins == [static_learning_phase] - - -def iter_sequence_infinite(seq): - """Iterate indefinitely over a Sequence. - - # Arguments - seq: Sequence object - - # Returns - Generator yielding batches. - """ - while True: - for item in seq: - yield item - - -def is_sequence(seq): - """Determine if an object follows the Sequence API. - - # Arguments - seq: a possible Sequence object - - # Returns - boolean, whether the object follows the Sequence API. - """ - # TODO Dref360: Decide which pattern to follow. First needs a new TF Version. - return (getattr(seq, 'use_sequence_api', False) - or set(dir(Sequence())).issubset(set(dir(seq) + ['use_sequence_api']))) - - -def is_generator_or_sequence(x): - """Check if `x` is a Keras generator type.""" - return inspect.isgenerator(x) or is_sequence(x) - - -def should_run_validation(validation_freq, epoch): - """Checks if validation should be run this epoch. - - # Arguments - validation_freq: Integer or list. If an integer, specifies how many training - epochs to run before a new validation run is performed. If a list, - specifies the epochs on which to run validation. - epoch: Integer, the number of the training epoch just completed. - - # Returns - Bool, True if validation should be run. - - # Raises - ValueError: if `validation_freq` is an Integer and less than 1, or if - it is neither an Integer nor a Sequence. - """ - # `epoch` is 0-indexed internally but 1-indexed in the public API. - one_indexed_epoch = epoch + 1 - - if isinstance(validation_freq, int): - if validation_freq < 1: - raise ValueError('`validation_freq` can not be less than 1.') - return one_indexed_epoch % validation_freq == 0 - - if not isinstance(validation_freq, collections.Container): - raise ValueError('`validation_freq` must be an Integer or ' - '`collections.Container` (e.g. list, tuple, etc.)') - return one_indexed_epoch in validation_freq - - -def get_static_batch_size(layer): - """Gets the static batch size of a Layer. - - # Arguments - layer: a `Layer` instance. - - # Returns - The static batch size of a Layer. - """ - batch_input_shape, _ = get_input_shape_and_dtype(layer) - if batch_input_shape is not None: - return batch_input_shape[0] - return None - - -def get_input_shape_and_dtype(layer): - """Retrieves input shape and input dtype of layer if applicable. - - # Arguments - layer: Layer (or model) instance. - - # Returns - Tuple (input_shape, input_dtype). Both could be None if the layer - does not have a defined input shape. - - # Raises - ValueError: in case an empty Sequential or Functional model is passed. - """ - def _is_graph_model(layer): - return ((hasattr(layer, '_is_graph_network') and layer._is_graph_network) or - layer.__class__.__name__ == 'Sequential') - - # In case of nested models: recover the first layer - # of the deepest model to infer input shape and dtype. - # Subclassed Models may not have been built so can't be checked. - while _is_graph_model(layer): - if not layer.layers: - raise ValueError('An empty Model cannot be used as a Layer.') - layer = layer.layers[0] - - if hasattr(layer, '_batch_input_shape'): - return layer._batch_input_shape, layer.dtype - return None, None -from __future__ import absolute_import - -from ..utils.generic_utils import deserialize_keras_object -from ..engine.base_layer import Layer -from ..engine import Input -from ..engine import InputLayer -from ..engine.base_layer import InputSpec - -from .merge import Add -from .merge import Subtract -from .merge import Multiply -from .merge import Average -from .merge import Maximum -from .merge import Minimum -from .merge import Concatenate -from .merge import Dot -from .merge import add -from .merge import subtract -from .merge import multiply -from .merge import average -from .merge import maximum -from .merge import minimum -from .merge import concatenate -from .merge import dot - -from .core import Dense -from .core import Activation -from .core import Dropout -from .core import Flatten -from .core import Reshape -from .core import Permute -from .core import RepeatVector -from .core import Lambda -from .core import ActivityRegularization -from .core import Masking -from .core import SpatialDropout1D -from .core import SpatialDropout2D -from .core import SpatialDropout3D - -from .convolutional import Conv1D -from .convolutional import Conv2D -from .convolutional import SeparableConv1D -from .convolutional import SeparableConv2D -from .convolutional import DepthwiseConv2D -from .convolutional import Conv2DTranspose -from .convolutional import Conv3D -from .convolutional import Conv3DTranspose -from .convolutional import Cropping1D -from .convolutional import Cropping2D -from .convolutional import Cropping3D -from .convolutional import UpSampling1D -from .convolutional import UpSampling2D -from .convolutional import UpSampling3D -from .convolutional import ZeroPadding1D -from .convolutional import ZeroPadding2D -from .convolutional import ZeroPadding3D - -# Aliases (not in the docs) -from .convolutional import Convolution1D -from .convolutional import Convolution2D -from .convolutional import Convolution3D -from .convolutional import Deconvolution2D -from .convolutional import Deconvolution3D - -from .pooling import MaxPooling1D -from .pooling import MaxPooling2D -from .pooling import MaxPooling3D -from .pooling import AveragePooling1D -from .pooling import AveragePooling2D -from .pooling import AveragePooling3D -from .pooling import GlobalMaxPooling1D -from .pooling import GlobalMaxPooling2D -from .pooling import GlobalMaxPooling3D -from .pooling import GlobalAveragePooling2D -from .pooling import GlobalAveragePooling1D -from .pooling import GlobalAveragePooling3D - -# Aliases (not in the docs) -from .pooling import MaxPool1D -from .pooling import MaxPool2D -from .pooling import MaxPool3D -from .pooling import AvgPool1D -from .pooling import AvgPool2D -from .pooling import AvgPool3D -from .pooling import GlobalMaxPool1D -from .pooling import GlobalMaxPool2D -from .pooling import GlobalMaxPool3D -from .pooling import GlobalAvgPool1D -from .pooling import GlobalAvgPool2D -from .pooling import GlobalAvgPool3D - -from .local import LocallyConnected1D -from .local import LocallyConnected2D - -from .recurrent import RNN -from .recurrent import SimpleRNN -from .recurrent import GRU -from .recurrent import LSTM -from .recurrent import SimpleRNNCell -from .recurrent import GRUCell -from .recurrent import LSTMCell -from .recurrent import StackedRNNCells - -from .cudnn_recurrent import CuDNNGRU -from .cudnn_recurrent import CuDNNLSTM - -from .normalization import BatchNormalization - -from .embeddings import Embedding - -from .noise import GaussianNoise -from .noise import GaussianDropout -from .noise import AlphaDropout - -from .advanced_activations import LeakyReLU -from .advanced_activations import PReLU -from .advanced_activations import ELU -from .advanced_activations import ThresholdedReLU -from .advanced_activations import Softmax -from .advanced_activations import ReLU - -from .wrappers import Bidirectional -from .wrappers import TimeDistributed - -from .convolutional_recurrent import ConvLSTM2D -from .convolutional_recurrent import ConvLSTM2DCell - -# Legacy imports -from ..legacy.layers import MaxoutDense -from ..legacy.layers import Highway -from ..legacy.layers import AtrousConvolution1D -from ..legacy.layers import AtrousConvolution2D -from ..legacy.layers import Recurrent -from ..legacy.layers import ConvRecurrent2D - - -def serialize(layer): - """Serialize a layer. - - # Arguments - layer: a Layer object. - - # Returns - dictionary with config. - """ - return {'class_name': layer.__class__.__name__, - 'config': layer.get_config()} - - -def deserialize(config, custom_objects=None): - """Instantiate a layer from a config dictionary. - - # Arguments - config: dict of the form {'class_name': str, 'config': dict} - custom_objects: dict mapping class names (or function names) - of custom (non-Keras) objects to class/functions - - # Returns - Layer instance (may be Model, Sequential, Layer...) - """ - from .. import models - globs = globals() # All layers. - globs['Model'] = models.Model - globs['Sequential'] = models.Sequential - return deserialize_keras_object(config, - module_objects=globs, - custom_objects=custom_objects, - printable_module_name='layer') -# -*- coding: utf-8 -*- -"""Layers that act as activation functions. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .. import activations -from .. import initializers -from .. import regularizers -from .. import constraints -from ..engine.base_layer import Layer -from ..engine.base_layer import InputSpec -from .. import backend as K -from ..legacy import interfaces -from ..utils.generic_utils import to_list - - -class LeakyReLU(Layer): - """Leaky version of a Rectified Linear Unit. - - It allows a small gradient when the unit is not active: - `f(x) = alpha * x for x < 0`, - `f(x) = x for x >= 0`. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as the input. - - # Arguments - alpha: float >= 0. Negative slope coefficient. - - # References - - [Rectifier Nonlinearities Improve Neural Network Acoustic Models]( - https://ai.stanford.edu/~amaas/papers/relu_hybrid_icml2013_final.pdf) - """ - - def __init__(self, alpha=0.3, **kwargs): - super(LeakyReLU, self).__init__(**kwargs) - self.supports_masking = True - self.alpha = K.cast_to_floatx(alpha) - - def call(self, inputs): - return K.relu(inputs, alpha=self.alpha) - - def get_config(self): - config = {'alpha': float(self.alpha)} - base_config = super(LeakyReLU, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - - -class PReLU(Layer): - """Parametric Rectified Linear Unit. - - It follows: - `f(x) = alpha * x for x < 0`, - `f(x) = x for x >= 0`, - where `alpha` is a learned array with the same shape as x. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as the input. - - # Arguments - alpha_initializer: initializer function for the weights. - alpha_regularizer: regularizer for the weights. - alpha_constraint: constraint for the weights. - shared_axes: the axes along which to share learnable - parameters for the activation function. - For example, if the incoming feature maps - are from a 2D convolution - with output shape `(batch, height, width, channels)`, - and you wish to share parameters across space - so that each filter only has one set of parameters, - set `shared_axes=[1, 2]`. - - # References - - [Delving Deep into Rectifiers: Surpassing Human-Level Performance on - ImageNet Classification](https://arxiv.org/abs/1502.01852) - """ - - @interfaces.legacy_prelu_support - def __init__(self, alpha_initializer='zeros', - alpha_regularizer=None, - alpha_constraint=None, - shared_axes=None, - **kwargs): - super(PReLU, self).__init__(**kwargs) - self.supports_masking = True - self.alpha_initializer = initializers.get(alpha_initializer) - self.alpha_regularizer = regularizers.get(alpha_regularizer) - self.alpha_constraint = constraints.get(alpha_constraint) - if shared_axes is None: - self.shared_axes = None - else: - self.shared_axes = to_list(shared_axes, allow_tuple=True) - - def build(self, input_shape): - param_shape = list(input_shape[1:]) - self.param_broadcast = [False] * len(param_shape) - if self.shared_axes is not None: - for i in self.shared_axes: - param_shape[i - 1] = 1 - self.param_broadcast[i - 1] = True - self.alpha = self.add_weight(shape=param_shape, - name='alpha', - initializer=self.alpha_initializer, - regularizer=self.alpha_regularizer, - constraint=self.alpha_constraint) - # Set input spec - axes = {} - if self.shared_axes: - for i in range(1, len(input_shape)): - if i not in self.shared_axes: - axes[i] = input_shape[i] - self.input_spec = InputSpec(ndim=len(input_shape), axes=axes) - self.built = True - - def call(self, inputs, mask=None): - pos = K.relu(inputs) - if K.backend() == 'theano': - neg = (K.pattern_broadcast(self.alpha, self.param_broadcast) * - (inputs - K.abs(inputs)) * 0.5) - else: - neg = -self.alpha * K.relu(-inputs) - return pos + neg - - def get_config(self): - config = { - 'alpha_initializer': initializers.serialize(self.alpha_initializer), - 'alpha_regularizer': regularizers.serialize(self.alpha_regularizer), - 'alpha_constraint': constraints.serialize(self.alpha_constraint), - 'shared_axes': self.shared_axes - } - base_config = super(PReLU, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - - -class ELU(Layer): - """Exponential Linear Unit. - - It follows: - `f(x) = alpha * (exp(x) - 1.) for x < 0`, - `f(x) = x for x >= 0`. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as the input. - - # Arguments - alpha: scale for the negative factor. - - # References - - [Fast and Accurate Deep Network Learning by Exponential Linear Units - (ELUs)](https://arxiv.org/abs/1511.07289v1) - """ - - def __init__(self, alpha=1.0, **kwargs): - super(ELU, self).__init__(**kwargs) - self.supports_masking = True - self.alpha = K.cast_to_floatx(alpha) - - def call(self, inputs): - return K.elu(inputs, self.alpha) - - def get_config(self): - config = {'alpha': float(self.alpha)} - base_config = super(ELU, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - - -class ThresholdedReLU(Layer): - """Thresholded Rectified Linear Unit. - - It follows: - `f(x) = x for x > theta`, - `f(x) = 0 otherwise`. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as the input. - - # Arguments - theta: float >= 0. Threshold location of activation. - - # References - - [Zero-Bias Autoencoders and the Benefits of Co-Adapting Features]( - https://arxiv.org/abs/1402.3337) - """ - - def __init__(self, theta=1.0, **kwargs): - super(ThresholdedReLU, self).__init__(**kwargs) - self.supports_masking = True - self.theta = K.cast_to_floatx(theta) - - def call(self, inputs, mask=None): - return inputs * K.cast(K.greater(inputs, self.theta), K.floatx()) - - def get_config(self): - config = {'theta': float(self.theta)} - base_config = super(ThresholdedReLU, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - - -class Softmax(Layer): - """Softmax activation function. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as the input. - - # Arguments - axis: Integer, axis along which the softmax normalization is applied. - """ - - def __init__(self, axis=-1, **kwargs): - super(Softmax, self).__init__(**kwargs) - self.supports_masking = True - self.axis = axis - - def call(self, inputs): - return activations.softmax(inputs, axis=self.axis) - - def get_config(self): - config = {'axis': self.axis} - base_config = super(Softmax, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - - -class ReLU(Layer): - """Rectified Linear Unit activation function. - - With default values, it returns element-wise `max(x, 0)`. - - Otherwise, it follows: - `f(x) = max_value` for `x >= max_value`, - `f(x) = x` for `threshold <= x < max_value`, - `f(x) = negative_slope * (x - threshold)` otherwise. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as the input. - - # Arguments - max_value: float >= 0. Maximum activation value. - negative_slope: float >= 0. Negative slope coefficient. - threshold: float. Threshold value for thresholded activation. - """ - - def __init__(self, max_value=None, negative_slope=0., - threshold=0., **kwargs): - super(ReLU, self).__init__(**kwargs) - if max_value is not None and max_value < 0.: - raise ValueError('max_value of ReLU layer ' - 'cannot be negative value: %s' % str(max_value)) - if negative_slope < 0.: - raise ValueError('negative_slope of ReLU layer cannot be ' - 'negative value: %s' % str(negative_slope)) - self.supports_masking = True - if max_value is not None: - max_value = K.cast_to_floatx(max_value) - self.max_value = max_value - self.negative_slope = K.cast_to_floatx(negative_slope) - self.threshold = K.cast_to_floatx(threshold) - - def call(self, inputs): - return K.relu(inputs, - alpha=self.negative_slope, - max_value=self.max_value, - threshold=self.threshold) - - def get_config(self): - config = { - 'max_value': self.max_value, - 'negative_slope': self.negative_slope, - 'threshold': self.threshold - } - base_config = super(ReLU, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape -# -*- coding: utf-8 -*- -"""Convolutional layers. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .. import backend as K -from .. import activations -from .. import initializers -from .. import regularizers -from .. import constraints -from ..engine.base_layer import Layer -from ..engine.base_layer import InputSpec -from ..utils import conv_utils -from ..utils.generic_utils import transpose_shape -from ..legacy import interfaces - -# imports for backwards namespace compatibility -from .pooling import AveragePooling1D -from .pooling import AveragePooling2D -from .pooling import AveragePooling3D -from .pooling import MaxPooling1D -from .pooling import MaxPooling2D -from .pooling import MaxPooling3D - -from ..legacy.layers import AtrousConvolution1D -from ..legacy.layers import AtrousConvolution2D - - -class _Conv(Layer): - """Abstract nD convolution layer (private, used as implementation base). - - This layer creates a convolution kernel that is convolved - with the layer input to produce a tensor of outputs. - If `use_bias` is True, a bias vector is created and added to the outputs. - Finally, if `activation` is not `None`, - it is applied to the outputs as well. - - # Arguments - rank: An integer, the rank of the convolution, - e.g. "2" for 2D convolution. - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - dimensions of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the strides of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, ..., channels)` while `"channels_first"` corresponds to - inputs with shape `(batch, channels, ...)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to the kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - """ - - def __init__(self, rank, - filters, - kernel_size, - strides=1, - padding='valid', - data_format=None, - dilation_rate=1, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super(_Conv, self).__init__(**kwargs) - self.rank = rank - self.filters = filters - self.kernel_size = conv_utils.normalize_tuple(kernel_size, rank, - 'kernel_size') - self.strides = conv_utils.normalize_tuple(strides, rank, 'strides') - self.padding = conv_utils.normalize_padding(padding) - self.data_format = K.normalize_data_format(data_format) - self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, rank, - 'dilation_rate') - self.activation = activations.get(activation) - self.use_bias = use_bias - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - self.input_spec = InputSpec(ndim=self.rank + 2) - - def build(self, input_shape): - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs ' - 'should be defined. Found `None`.') - input_dim = input_shape[channel_axis] - kernel_shape = self.kernel_size + (input_dim, self.filters) - - self.kernel = self.add_weight(shape=kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(self.filters,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - # Set input spec. - self.input_spec = InputSpec(ndim=self.rank + 2, - axes={channel_axis: input_dim}) - self.built = True - - def call(self, inputs): - if self.rank == 1: - outputs = K.conv1d( - inputs, - self.kernel, - strides=self.strides[0], - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate[0]) - if self.rank == 2: - outputs = K.conv2d( - inputs, - self.kernel, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate) - if self.rank == 3: - outputs = K.conv3d( - inputs, - self.kernel, - strides=self.strides, - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate) - - if self.use_bias: - outputs = K.bias_add( - outputs, - self.bias, - data_format=self.data_format) - - if self.activation is not None: - return self.activation(outputs) - return outputs - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_last': - space = input_shape[1:-1] - elif self.data_format == 'channels_first': - space = input_shape[2:] - new_space = [] - for i in range(len(space)): - new_dim = conv_utils.conv_output_length( - space[i], - self.kernel_size[i], - padding=self.padding, - stride=self.strides[i], - dilation=self.dilation_rate[i]) - new_space.append(new_dim) - if self.data_format == 'channels_last': - return (input_shape[0],) + tuple(new_space) + (self.filters,) - elif self.data_format == 'channels_first': - return (input_shape[0], self.filters) + tuple(new_space) - - def get_config(self): - config = { - 'rank': self.rank, - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'dilation_rate': self.dilation_rate, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super(_Conv, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Conv1D(_Conv): - """1D convolution layer (e.g. temporal convolution). - - This layer creates a convolution kernel that is convolved - with the layer input over a single spatial (or temporal) dimension - to produce a tensor of outputs. - If `use_bias` is True, a bias vector is created and added to the outputs. - Finally, if `activation` is not `None`, - it is applied to the outputs as well. - - When using this layer as the first layer in a model, - provide an `input_shape` argument (tuple of integers or `None`, does not - include the batch axis), e.g. `input_shape=(10, 128)` for time series - sequences of 10 time steps with 128 features per step in - `data_format="channels_last"`, or `(None, 128)` for variable-length - sequences with 128 features per step. - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of a single integer, - specifying the length of the 1D convolution window. - strides: An integer or tuple/list of a single integer, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"`, `"causal"` or `"same"` (case-insensitive). - `"valid"` means "no padding". - `"same"` results in padding the input such that - the output has the same length as the original input. - `"causal"` results in causal (dilated) convolutions, - e.g. `output[t]` does not depend on `input[t + 1:]`. - A zero padding is used such that - the output has the same length as the original input. - Useful when modeling temporal data where the model - should not violate the temporal order. See - [WaveNet: A Generative Model for Raw Audio, section 2.1]( - https://arxiv.org/abs/1609.03499). - data_format: A string, - one of `"channels_last"` (default) or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, steps, channels)` - (default format for temporal data in Keras) - while `"channels_first"` corresponds to inputs - with shape `(batch, channels, steps)`. - dilation_rate: an integer or tuple/list of a single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to the kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 3D tensor with shape: `(batch, steps, channels)` - - # Output shape - 3D tensor with shape: `(batch, new_steps, filters)` - `steps` value might have changed due to padding or strides. - """ - - @interfaces.legacy_conv1d_support - def __init__(self, filters, - kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=1, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - if padding == 'causal': - if data_format != 'channels_last': - raise ValueError('When using causal padding in `Conv1D`, ' - '`data_format` must be "channels_last" ' - '(temporal data).') - super(Conv1D, self).__init__( - rank=1, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) - - def get_config(self): - config = super(Conv1D, self).get_config() - config.pop('rank') - return config - - -class Conv2D(_Conv): - """2D convolution layer (e.g. spatial convolution over images). - - This layer creates a convolution kernel that is convolved - with the layer input to produce a tensor of - outputs. If `use_bias` is True, - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - When using this layer as the first layer in a model, - provide the keyword argument `input_shape` - (tuple of integers, does not include the batch axis), - e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures - in `data_format="channels_last"`. - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution - along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - Note that `"same"` is slightly inconsistent across backends with - `strides` != 1, as described - [here](https://github.com/keras-team/keras/pull/9473#issuecomment-372166860) - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, height, width, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: an integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to the kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 4D tensor with shape: - `(batch, channels, rows, cols)` - if `data_format` is `"channels_first"` - or 4D tensor with shape: - `(batch, rows, cols, channels)` - if `data_format` is `"channels_last"`. - - # Output shape - 4D tensor with shape: - `(batch, filters, new_rows, new_cols)` - if `data_format` is `"channels_first"` - or 4D tensor with shape: - `(batch, new_rows, new_cols, filters)` - if `data_format` is `"channels_last"`. - `rows` and `cols` values might have changed due to padding. - """ - - @interfaces.legacy_conv2d_support - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1), - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super(Conv2D, self).__init__( - rank=2, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) - - def get_config(self): - config = super(Conv2D, self).get_config() - config.pop('rank') - return config - - -class Conv3D(_Conv): - """3D convolution layer (e.g. spatial convolution over volumes). - - This layer creates a convolution kernel that is convolved - with the layer input to produce a tensor of - outputs. If `use_bias` is True, - a bias vector is created and added to the outputs. Finally, if - `activation` is not `None`, it is applied to the outputs as well. - - When using this layer as the first layer in a model, - provide the keyword argument `input_shape` - (tuple of integers, does not include the batch axis), - e.g. `input_shape=(128, 128, 128, 1)` for 128x128x128 volumes - with a single channel, - in `data_format="channels_last"`. - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the convolution along each spatial dimension. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `"channels_first"` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: an integer or tuple/list of 3 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to the kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 5D tensor with shape: - `(batch, channels, conv_dim1, conv_dim2, conv_dim3)` - if `data_format` is `"channels_first"` - or 5D tensor with shape: - `(batch, conv_dim1, conv_dim2, conv_dim3, channels)` - if `data_format` is `"channels_last"`. - - # Output shape - 5D tensor with shape: - `(batch, filters, new_conv_dim1, new_conv_dim2, new_conv_dim3)` - if `data_format` is `"channels_first"` - or 5D tensor with shape: - `(batch, new_conv_dim1, new_conv_dim2, new_conv_dim3, filters)` - if `data_format` is `"channels_last"`. - `new_conv_dim1`, `new_conv_dim2` and `new_conv_dim3` values might have - changed due to padding. - """ - - @interfaces.legacy_conv3d_support - def __init__(self, filters, - kernel_size, - strides=(1, 1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1, 1), - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super(Conv3D, self).__init__( - rank=3, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) - - def get_config(self): - config = super(Conv3D, self).get_config() - config.pop('rank') - return config - - -class Conv2DTranspose(Conv2D): - """Transposed convolution layer (sometimes called Deconvolution). - - The need for transposed convolutions generally arises - from the desire to use a transformation going in the opposite direction - of a normal convolution, i.e., from something that has the shape of the - output of some convolution to something that has the shape of its input - while maintaining a connectivity pattern that is compatible with - said convolution. - - When using this layer as the first layer in a model, - provide the keyword argument `input_shape` - (tuple of integers, does not include the batch axis), - e.g. `input_shape=(128, 128, 3)` for 128x128 RGB pictures - in `data_format="channels_last"`. - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution - along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - output_padding: An integer or tuple/list of 2 integers, - specifying the amount of padding along the height and width - of the output tensor. - Can be a single integer to specify the same value for all - spatial dimensions. - The amount of output padding along a given dimension must be - lower than the stride along that same dimension. - If set to `None` (default), the output shape is inferred. - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, height, width, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: an integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to the kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 4D tensor with shape: - `(batch, channels, rows, cols)` - if `data_format` is `"channels_first"` - or 4D tensor with shape: - `(batch, rows, cols, channels)` - if `data_format` is `"channels_last"`. - - # Output shape - 4D tensor with shape: - `(batch, filters, new_rows, new_cols)` - if `data_format` is `"channels_first"` - or 4D tensor with shape: - `(batch, new_rows, new_cols, filters)` - if `data_format` is `"channels_last"`. - `rows` and `cols` values might have changed due to padding. - If `output_padding` is specified: - - ``` - new_rows = ((rows - 1) * strides[0] + kernel_size[0] - - 2 * padding[0] + output_padding[0]) - new_cols = ((cols - 1) * strides[1] + kernel_size[1] - - 2 * padding[1] + output_padding[1]) - ``` - - # References - - [A guide to convolution arithmetic for deep learning]( - https://arxiv.org/abs/1603.07285v1) - - [Deconvolutional Networks]( - https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) - """ - - @interfaces.legacy_deconv2d_support - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - output_padding=None, - data_format=None, - dilation_rate=(1, 1), - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super(Conv2DTranspose, self).__init__( - filters, - kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) - - self.output_padding = output_padding - if self.output_padding is not None: - self.output_padding = conv_utils.normalize_tuple( - self.output_padding, 2, 'output_padding') - for stride, out_pad in zip(self.strides, self.output_padding): - if out_pad >= stride: - raise ValueError('Stride ' + str(self.strides) + ' must be ' - 'greater than output padding ' + - str(self.output_padding)) - - def build(self, input_shape): - if len(input_shape) != 4: - raise ValueError('Inputs should have rank ' + - str(4) + - '; Received input shape:', str(input_shape)) - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs ' - 'should be defined. Found `None`.') - input_dim = input_shape[channel_axis] - kernel_shape = self.kernel_size + (self.filters, input_dim) - - self.kernel = self.add_weight(shape=kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(self.filters,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - # Set input spec. - self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) - self.built = True - - def call(self, inputs): - input_shape = K.shape(inputs) - batch_size = input_shape[0] - if self.data_format == 'channels_first': - h_axis, w_axis = 2, 3 - else: - h_axis, w_axis = 1, 2 - - height, width = input_shape[h_axis], input_shape[w_axis] - kernel_h, kernel_w = self.kernel_size - stride_h, stride_w = self.strides - if self.output_padding is None: - out_pad_h = out_pad_w = None - else: - out_pad_h, out_pad_w = self.output_padding - - # Infer the dynamic output shape: - out_height = conv_utils.deconv_length(height, - stride_h, kernel_h, - self.padding, - out_pad_h, - self.dilation_rate[0]) - out_width = conv_utils.deconv_length(width, - stride_w, kernel_w, - self.padding, - out_pad_w, - self.dilation_rate[1]) - if self.data_format == 'channels_first': - output_shape = (batch_size, self.filters, out_height, out_width) - else: - output_shape = (batch_size, out_height, out_width, self.filters) - - outputs = K.conv2d_transpose( - inputs, - self.kernel, - output_shape, - self.strides, - padding=self.padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate) - - if self.use_bias: - outputs = K.bias_add( - outputs, - self.bias, - data_format=self.data_format) - - if self.activation is not None: - return self.activation(outputs) - return outputs - - def compute_output_shape(self, input_shape): - output_shape = list(input_shape) - if self.data_format == 'channels_first': - c_axis, h_axis, w_axis = 1, 2, 3 - else: - c_axis, h_axis, w_axis = 3, 1, 2 - - kernel_h, kernel_w = self.kernel_size - stride_h, stride_w = self.strides - if self.output_padding is None: - out_pad_h = out_pad_w = None - else: - out_pad_h, out_pad_w = self.output_padding - - output_shape[c_axis] = self.filters - output_shape[h_axis] = conv_utils.deconv_length(output_shape[h_axis], - stride_h, - kernel_h, - self.padding, - out_pad_h, - self.dilation_rate[0]) - output_shape[w_axis] = conv_utils.deconv_length(output_shape[w_axis], - stride_w, - kernel_w, - self.padding, - out_pad_w, - self.dilation_rate[1]) - return tuple(output_shape) - - def get_config(self): - config = super(Conv2DTranspose, self).get_config() - config['output_padding'] = self.output_padding - return config - - -class Conv3DTranspose(Conv3D): - """Transposed convolution layer (sometimes called Deconvolution). - - The need for transposed convolutions generally arises - from the desire to use a transformation going in the opposite direction - of a normal convolution, i.e., from something that has the shape of the - output of some convolution to something that has the shape of its input - while maintaining a connectivity pattern that is compatible with - said convolution. - - When using this layer as the first layer in a model, - provide the keyword argument `input_shape` - (tuple of integers, does not include the batch axis), - e.g. `input_shape=(128, 128, 128, 3)` for a 128x128x128 volume with 3 channels - if `data_format="channels_last"`. - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of 3 integers, specifying the - depth, height and width of the 3D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 3 integers, - specifying the strides of the convolution - along the depth, height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - output_padding: An integer or tuple/list of 3 integers, - specifying the amount of padding along the depth, height, and - width. - Can be a single integer to specify the same value for all - spatial dimensions. - The amount of output padding along a given dimension must be - lower than the stride along that same dimension. - If set to `None` (default), the output shape is inferred. - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, depth, height, width, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, depth, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: an integer or tuple/list of 3 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to the kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 5D tensor with shape: - `(batch, channels, depth, rows, cols)` - if `data_format` is `"channels_first"` - or 5D tensor with shape: - `(batch, depth, rows, cols, channels)` - if `data_format` is `"channels_last"`. - - # Output shape - 5D tensor with shape: - `(batch, filters, new_depth, new_rows, new_cols)` - if `data_format` is `"channels_first"` - or 5D tensor with shape: - `(batch, new_depth, new_rows, new_cols, filters)` - if `data_format` is `"channels_last"`. - `depth` and `rows` and `cols` values might have changed due to padding. - If `output_padding` is specified:: - - ``` - new_depth = ((depth - 1) * strides[0] + kernel_size[0] - - 2 * padding[0] + output_padding[0]) - new_rows = ((rows - 1) * strides[1] + kernel_size[1] - - 2 * padding[1] + output_padding[1]) - new_cols = ((cols - 1) * strides[2] + kernel_size[2] - - 2 * padding[2] + output_padding[2]) - ``` - - # References - - [A guide to convolution arithmetic for deep learning]( - https://arxiv.org/abs/1603.07285v1) - - [Deconvolutional Networks]( - https://www.matthewzeiler.com/mattzeiler/deconvolutionalnetworks.pdf) - """ - - def __init__(self, filters, - kernel_size, - strides=(1, 1, 1), - padding='valid', - output_padding=None, - data_format=None, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super(Conv3DTranspose, self).__init__( - filters, - kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) - - self.output_padding = output_padding - if self.output_padding is not None: - self.output_padding = conv_utils.normalize_tuple( - self.output_padding, 3, 'output_padding') - for stride, out_pad in zip(self.strides, self.output_padding): - if out_pad >= stride: - raise ValueError('Stride ' + str(self.strides) + ' must be ' - 'greater than output padding ' + - str(self.output_padding)) - - def build(self, input_shape): - if len(input_shape) != 5: - raise ValueError('Inputs should have rank ' + - str(5) + - '; Received input shape:', str(input_shape)) - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs ' - 'should be defined. Found `None`.') - input_dim = input_shape[channel_axis] - kernel_shape = self.kernel_size + (self.filters, input_dim) - - self.kernel = self.add_weight(shape=kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(self.filters,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - # Set input spec. - self.input_spec = InputSpec(ndim=5, axes={channel_axis: input_dim}) - self.built = True - - def call(self, inputs): - input_shape = K.shape(inputs) - batch_size = input_shape[0] - if self.data_format == 'channels_first': - d_axis, h_axis, w_axis = 2, 3, 4 - else: - d_axis, h_axis, w_axis = 1, 2, 3 - - depth = input_shape[d_axis] - height = input_shape[h_axis] - width = input_shape[w_axis] - - kernel_d, kernel_h, kernel_w = self.kernel_size - stride_d, stride_h, stride_w = self.strides - if self.output_padding is None: - out_pad_d = out_pad_h = out_pad_w = None - else: - out_pad_d, out_pad_h, out_pad_w = self.output_padding - - # Infer the dynamic output shape: - out_depth = conv_utils.deconv_length(depth, - stride_d, kernel_d, - self.padding, - out_pad_d) - out_height = conv_utils.deconv_length(height, - stride_h, kernel_h, - self.padding, - out_pad_h) - out_width = conv_utils.deconv_length(width, - stride_w, kernel_w, - self.padding, - out_pad_w) - - if self.data_format == 'channels_first': - output_shape = (batch_size, self.filters, - out_depth, out_height, out_width) - else: - output_shape = (batch_size, out_depth, - out_height, out_width, self.filters) - - outputs = K.conv3d_transpose(inputs, - self.kernel, - output_shape, - self.strides, - padding=self.padding, - data_format=self.data_format) - - if self.use_bias: - outputs = K.bias_add( - outputs, - self.bias, - data_format=self.data_format) - - if self.activation is not None: - return self.activation(outputs) - return outputs - - def compute_output_shape(self, input_shape): - output_shape = list(input_shape) - if self.data_format == 'channels_first': - c_axis, d_axis, h_axis, w_axis = 1, 2, 3, 4 - else: - c_axis, d_axis, h_axis, w_axis = 4, 1, 2, 3 - - kernel_d, kernel_h, kernel_w = self.kernel_size - stride_d, stride_h, stride_w = self.strides - if self.output_padding is None: - out_pad_d = out_pad_h = out_pad_w = None - else: - out_pad_d, out_pad_h, out_pad_w = self.output_padding - - output_shape[c_axis] = self.filters - output_shape[d_axis] = conv_utils.deconv_length(output_shape[d_axis], - stride_d, - kernel_d, - self.padding, - out_pad_d) - output_shape[h_axis] = conv_utils.deconv_length(output_shape[h_axis], - stride_h, - kernel_h, - self.padding, - out_pad_h) - output_shape[w_axis] = conv_utils.deconv_length(output_shape[w_axis], - stride_w, - kernel_w, - self.padding, - out_pad_w) - - return tuple(output_shape) - - def get_config(self): - config = super(Conv3DTranspose, self).get_config() - config.pop('dilation_rate') - config['output_padding'] = self.output_padding - return config - - -class _SeparableConv(_Conv): - """Abstract nD depthwise separable convolution layer (private). - - Separable convolutions consist in first performing - a depthwise spatial convolution - (which acts on each input channel separately) - followed by a pointwise convolution which mixes together the resulting - output channels. The `depth_multiplier` argument controls how many - output channels are generated per input channel in the depthwise step. - - Intuitively, separable convolutions can be understood as - a way to factorize a convolution kernel into two smaller kernels, - or as an extreme version of an Inception block. - - # Arguments - rank: An integer, the rank of the convolution, - e.g. "2" for 2D convolution. - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution - along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, height, width, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: an integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - depth_multiplier: The number of depthwise convolution output channels - for each input channel. - The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - depthwise_initializer: Initializer for the depthwise kernel matrix - (see [initializers](../initializers.md)). - pointwise_initializer: Initializer for the pointwise kernel matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - depthwise_regularizer: Regularizer function applied to - the depthwise kernel matrix - (see [regularizer](../regularizers.md)). - pointwise_regularizer: Regularizer function applied to - the pointwise kernel matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - depthwise_constraint: Constraint function applied to - the depthwise kernel matrix - (see [constraints](../constraints.md)). - pointwise_constraint: Constraint function applied to - the pointwise kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 4D tensor with shape: - `(batch, channels, rows, cols)` - if `data_format` is `"channels_first"` - or 4D tensor with shape: - `(batch, rows, cols, channels)` - if `data_format` is `"channels_last"`. - - # Output shape - 4D tensor with shape: - `(batch, filters, new_rows, new_cols)` - if `data_format` is `"channels_first"` - or 4D tensor with shape: - `(batch, new_rows, new_cols, filters)` - if `data_format` is `"channels_last"`. - `rows` and `cols` values might have changed due to padding. - """ - - def __init__(self, rank, - filters, - kernel_size, - strides=1, - padding='valid', - data_format=None, - dilation_rate=1, - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - pointwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - **kwargs): - super(_SeparableConv, self).__init__( - rank=rank, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - bias_initializer=bias_initializer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - bias_constraint=bias_constraint, - **kwargs) - self.depth_multiplier = depth_multiplier - self.depthwise_initializer = initializers.get(depthwise_initializer) - self.pointwise_initializer = initializers.get(pointwise_initializer) - self.depthwise_regularizer = regularizers.get(depthwise_regularizer) - self.pointwise_regularizer = regularizers.get(pointwise_regularizer) - self.depthwise_constraint = constraints.get(depthwise_constraint) - self.pointwise_constraint = constraints.get(pointwise_constraint) - - def build(self, input_shape): - if len(input_shape) < self.rank + 2: - raise ValueError('Inputs to `SeparableConv' + str(self.rank) + 'D` ' - 'should have rank ' + str(self.rank + 2) + '. ' - 'Received input shape:', str(input_shape)) - channel_axis = 1 if self.data_format == 'channels_first' else -1 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs ' - 'should be defined. Found `None`.') - input_dim = int(input_shape[channel_axis]) - depthwise_kernel_shape = (input_dim, self.depth_multiplier) - depthwise_kernel_shape = self.kernel_size + depthwise_kernel_shape - pointwise_kernel_shape = ( - self.depth_multiplier * input_dim, self.filters) - pointwise_kernel_shape = (1,) * self.rank + pointwise_kernel_shape - - self.depthwise_kernel = self.add_weight( - shape=depthwise_kernel_shape, - initializer=self.depthwise_initializer, - name='depthwise_kernel', - regularizer=self.depthwise_regularizer, - constraint=self.depthwise_constraint) - self.pointwise_kernel = self.add_weight( - shape=pointwise_kernel_shape, - initializer=self.pointwise_initializer, - name='pointwise_kernel', - regularizer=self.pointwise_regularizer, - constraint=self.pointwise_constraint) - - if self.use_bias: - self.bias = self.add_weight(shape=(self.filters,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - # Set input spec. - self.input_spec = InputSpec(ndim=self.rank + 2, - axes={channel_axis: input_dim}) - self.built = True - - def call(self, inputs): - if self.rank == 1: - outputs = K.separable_conv1d( - inputs, - self.depthwise_kernel, - self.pointwise_kernel, - data_format=self.data_format, - strides=self.strides, - padding=self.padding, - dilation_rate=self.dilation_rate) - if self.rank == 2: - outputs = K.separable_conv2d( - inputs, - self.depthwise_kernel, - self.pointwise_kernel, - data_format=self.data_format, - strides=self.strides, - padding=self.padding, - dilation_rate=self.dilation_rate) - - if self.use_bias: - outputs = K.bias_add( - outputs, - self.bias, - data_format=self.data_format) - - if self.activation is not None: - return self.activation(outputs) - return outputs - - def get_config(self): - config = super(_SeparableConv, self).get_config() - config.pop('rank') - config.pop('kernel_initializer') - config.pop('kernel_regularizer') - config.pop('kernel_constraint') - config['depth_multiplier'] = self.depth_multiplier - config['depthwise_initializer'] = ( - initializers.serialize(self.depthwise_initializer)) - config['pointwise_initializer'] = ( - initializers.serialize(self.pointwise_initializer)) - config['depthwise_regularizer'] = ( - regularizers.serialize(self.depthwise_regularizer)) - config['pointwise_regularizer'] = ( - regularizers.serialize(self.pointwise_regularizer)) - config['depthwise_constraint'] = ( - constraints.serialize(self.depthwise_constraint)) - config['pointwise_constraint'] = ( - constraints.serialize(self.pointwise_constraint)) - return config - - -class SeparableConv1D(_SeparableConv): - """Depthwise separable 1D convolution. - - Separable convolutions consist in first performing - a depthwise spatial convolution - (which acts on each input channel separately) - followed by a pointwise convolution which mixes together the resulting - output channels. The `depth_multiplier` argument controls how many - output channels are generated per input channel in the depthwise step. - - Intuitively, separable convolutions can be understood as - a way to factorize a convolution kernel into two smaller kernels, - or as an extreme version of an Inception block. - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of single integer, - specifying the length of the 1D convolution window. - strides: An integer or tuple/list of single integer, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, steps, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, steps)`. - dilation_rate: An integer or tuple/list of a single integer, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - depth_multiplier: The number of depthwise convolution output channels - for each input channel. - The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - depthwise_initializer: Initializer for the depthwise kernel matrix - (see [initializers](../initializers.md)). - pointwise_initializer: Initializer for the pointwise kernel matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - depthwise_regularizer: Regularizer function applied to - the depthwise kernel matrix - (see [regularizer](../regularizers.md)). - pointwise_regularizer: Regularizer function applied to - the pointwise kernel matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - depthwise_constraint: Constraint function applied to - the depthwise kernel matrix - (see [constraints](../constraints.md)). - pointwise_constraint: Constraint function applied to - the pointwise kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 3D tensor with shape: - `(batch, channels, steps)` - if `data_format` is `"channels_first"` - or 3D tensor with shape: - `(batch, steps, channels)` - if `data_format` is `"channels_last"`. - - # Output shape - 3D tensor with shape: - `(batch, filters, new_steps)` - if `data_format` is `"channels_first"` - or 3D tensor with shape: - `(batch, new_steps, filters)` - if `data_format` is `"channels_last"`. - `new_steps` values might have changed due to padding or strides. - """ - - def __init__(self, filters, - kernel_size, - strides=1, - padding='valid', - data_format='channels_last', - dilation_rate=1, - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - pointwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - **kwargs): - super(SeparableConv1D, self).__init__( - rank=1, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - depth_multiplier=depth_multiplier, - activation=activation, - use_bias=use_bias, - depthwise_initializer=depthwise_initializer, - pointwise_initializer=pointwise_initializer, - bias_initializer=bias_initializer, - depthwise_regularizer=depthwise_regularizer, - pointwise_regularizer=pointwise_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - depthwise_constraint=depthwise_constraint, - pointwise_constraint=pointwise_constraint, - bias_constraint=bias_constraint, - **kwargs) - - -class SeparableConv2D(_SeparableConv): - """Depthwise separable 2D convolution. - - Separable convolutions consist in first performing - a depthwise spatial convolution - (which acts on each input channel separately) - followed by a pointwise convolution which mixes together the resulting - output channels. The `depth_multiplier` argument controls how many - output channels are generated per input channel in the depthwise step. - - Intuitively, separable convolutions can be understood as - a way to factorize a convolution kernel into two smaller kernels, - or as an extreme version of an Inception block. - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution - along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, height, width, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: An integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - depth_multiplier: The number of depthwise convolution output channels - for each input channel. - The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - depthwise_initializer: Initializer for the depthwise kernel matrix - (see [initializers](../initializers.md)). - pointwise_initializer: Initializer for the pointwise kernel matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - depthwise_regularizer: Regularizer function applied to - the depthwise kernel matrix - (see [regularizer](../regularizers.md)). - pointwise_regularizer: Regularizer function applied to - the pointwise kernel matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - depthwise_constraint: Constraint function applied to - the depthwise kernel matrix - (see [constraints](../constraints.md)). - pointwise_constraint: Constraint function applied to - the pointwise kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 4D tensor with shape: - `(batch, channels, rows, cols)` - if `data_format` is `"channels_first"` - or 4D tensor with shape: - `(batch, rows, cols, channels)` - if `data_format` is `"channels_last"`. - - # Output shape - 4D tensor with shape: - `(batch, filters, new_rows, new_cols)` - if `data_format` is `"channels_first"` - or 4D tensor with shape: - `(batch, new_rows, new_cols, filters)` - if `data_format` is `"channels_last"`. - `rows` and `cols` values might have changed due to padding. - """ - - @interfaces.legacy_separable_conv2d_support - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1), - depth_multiplier=1, - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - pointwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - pointwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - pointwise_constraint=None, - bias_constraint=None, - **kwargs): - super(SeparableConv2D, self).__init__( - rank=2, - filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - depth_multiplier=depth_multiplier, - activation=activation, - use_bias=use_bias, - depthwise_initializer=depthwise_initializer, - pointwise_initializer=pointwise_initializer, - bias_initializer=bias_initializer, - depthwise_regularizer=depthwise_regularizer, - pointwise_regularizer=pointwise_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - depthwise_constraint=depthwise_constraint, - pointwise_constraint=pointwise_constraint, - bias_constraint=bias_constraint, - **kwargs) - - -class DepthwiseConv2D(Conv2D): - """Depthwise separable 2D convolution. - - Depthwise Separable convolutions consists in performing - just the first step in a depthwise spatial convolution - (which acts on each input channel separately). - The `depth_multiplier` argument controls how many - output channels are generated per input channel in the depthwise step. - - # Arguments - kernel_size: An integer or tuple/list of 2 integers, specifying the - height and width of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution - along the height and width. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - depth_multiplier: The number of depthwise convolution output channels - for each input channel. - The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, height, width, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be 'channels_last'. - dilation_rate: an integer or tuple/list of 2 integers, specifying - the dilation rate to use for dilated convolution. - Can be a single integer to specify the same value for - all spatial dimensions. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any stride value != 1. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. 'linear' activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - depthwise_initializer: Initializer for the depthwise kernel matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - depthwise_regularizer: Regularizer function applied to - the depthwise kernel matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its 'activation'). - (see [regularizer](../regularizers.md)). - depthwise_constraint: Constraint function applied to - the depthwise kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 4D tensor with shape: - `(batch, channels, rows, cols)` - if `data_format` is `"channels_first"` - or 4D tensor with shape: - `(batch, rows, cols, channels)` - if `data_format` is `"channels_last"`. - - # Output shape - 4D tensor with shape: - `(batch, filters, new_rows, new_cols)` - if `data_format` is `"channels_first"` - or 4D tensor with shape: - `(batch, new_rows, new_cols, filters)` - if `data_format` is `"channels_last"`. - `rows` and `cols` values might have changed due to padding. - """ - - def __init__(self, - kernel_size, - strides=(1, 1), - padding='valid', - depth_multiplier=1, - data_format=None, - dilation_rate=(1, 1), - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - bias_constraint=None, - **kwargs): - super(DepthwiseConv2D, self).__init__( - filters=None, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - bias_constraint=bias_constraint, - **kwargs) - self.depth_multiplier = depth_multiplier - self.depthwise_initializer = initializers.get(depthwise_initializer) - self.depthwise_regularizer = regularizers.get(depthwise_regularizer) - self.depthwise_constraint = constraints.get(depthwise_constraint) - self.bias_initializer = initializers.get(bias_initializer) - - def build(self, input_shape): - if len(input_shape) < 4: - raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. ' - 'Received input shape:', str(input_shape)) - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = 3 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs to ' - '`DepthwiseConv2D` ' - 'should be defined. Found `None`.') - input_dim = int(input_shape[channel_axis]) - depthwise_kernel_shape = (self.kernel_size[0], - self.kernel_size[1], - input_dim, - self.depth_multiplier) - - self.depthwise_kernel = self.add_weight( - shape=depthwise_kernel_shape, - initializer=self.depthwise_initializer, - name='depthwise_kernel', - regularizer=self.depthwise_regularizer, - constraint=self.depthwise_constraint) - - if self.use_bias: - self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - # Set input spec. - self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) - self.built = True - - def call(self, inputs, training=None): - outputs = K.depthwise_conv2d( - inputs, - self.depthwise_kernel, - strides=self.strides, - padding=self.padding, - dilation_rate=self.dilation_rate, - data_format=self.data_format) - - if self.use_bias: - outputs = K.bias_add( - outputs, - self.bias, - data_format=self.data_format) - - if self.activation is not None: - return self.activation(outputs) - - return outputs - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_last': - space = input_shape[1:-1] - out_filters = input_shape[3] * self.depth_multiplier - elif self.data_format == 'channels_first': - space = input_shape[2:] - out_filters = input_shape[1] * self.depth_multiplier - new_space = [] - for i in range(len(space)): - new_dim = conv_utils.conv_output_length( - space[i], - self.kernel_size[i], - padding=self.padding, - stride=self.strides[i], - dilation=self.dilation_rate[i]) - new_space.append(new_dim) - if self.data_format == 'channels_last': - return (input_shape[0], new_space[0], new_space[1], out_filters) - elif self.data_format == 'channels_first': - return (input_shape[0], out_filters, new_space[0], new_space[1]) - - def get_config(self): - config = super(DepthwiseConv2D, self).get_config() - config.pop('filters') - config.pop('kernel_initializer') - config.pop('kernel_regularizer') - config.pop('kernel_constraint') - config['depth_multiplier'] = self.depth_multiplier - config['depthwise_initializer'] = ( - initializers.serialize(self.depthwise_initializer)) - config['depthwise_regularizer'] = ( - regularizers.serialize(self.depthwise_regularizer)) - config['depthwise_constraint'] = ( - constraints.serialize(self.depthwise_constraint)) - return config - - -class _UpSampling(Layer): - """Abstract nD UpSampling layer (private, used as implementation base). - - # Arguments - size: Tuple of ints. - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, ..., channels)` while `"channels_first"` corresponds to - inputs with shape `(batch, channels, ...)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - """ - - def __init__(self, size, data_format=None, **kwargs): - # self.rank is 1 for UpSampling1D, 2 for UpSampling2D. - self.rank = len(size) - self.size = size - self.data_format = K.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=self.rank + 2) - super(_UpSampling, self).__init__(**kwargs) - - def call(self, inputs): - raise NotImplementedError - - def compute_output_shape(self, input_shape): - size_all_dims = (1,) + self.size + (1,) - spatial_axes = list(range(1, 1 + self.rank)) - size_all_dims = transpose_shape(size_all_dims, - self.data_format, - spatial_axes) - output_shape = list(input_shape) - for dim in range(len(output_shape)): - if output_shape[dim] is not None: - output_shape[dim] *= size_all_dims[dim] - return tuple(output_shape) - - def get_config(self): - config = {'size': self.size, - 'data_format': self.data_format} - base_config = super(_UpSampling, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class UpSampling1D(_UpSampling): - """Upsampling layer for 1D inputs. - - Repeats each temporal step `size` times along the time axis. - - # Arguments - size: integer. Upsampling factor. - - # Input shape - 3D tensor with shape: `(batch, steps, features)`. - - # Output shape - 3D tensor with shape: `(batch, upsampled_steps, features)`. - """ - - @interfaces.legacy_upsampling1d_support - def __init__(self, size=2, **kwargs): - super(UpSampling1D, self).__init__( - (int(size),), 'channels_last', **kwargs) - - def call(self, inputs): - output = K.repeat_elements(inputs, self.size[0], axis=1) - return output - - def get_config(self): - config = super(UpSampling1D, self).get_config() - config['size'] = self.size[0] - config.pop('data_format') - return config - - -class UpSampling2D(_UpSampling): - """Upsampling layer for 2D inputs. - - Repeats the rows and columns of the data - by size[0] and size[1] respectively. - - # Arguments - size: int, or tuple of 2 integers. - The upsampling factors for rows and columns. - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, height, width, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - interpolation: A string, one of `nearest` or `bilinear`. - Note that CNTK does not support yet the `bilinear` upscaling - and that with Theano, only `size=(2, 2)` is possible. - - # Input shape - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, rows, cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch, channels, rows, cols)` - - # Output shape - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, upsampled_rows, upsampled_cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch, channels, upsampled_rows, upsampled_cols)` - """ - - @interfaces.legacy_upsampling2d_support - def __init__(self, size=(2, 2), data_format=None, interpolation='nearest', - **kwargs): - normalized_size = conv_utils.normalize_tuple(size, 2, 'size') - super(UpSampling2D, self).__init__( - normalized_size, data_format, **kwargs) - if interpolation not in ['nearest', 'bilinear']: - raise ValueError('interpolation should be one ' - 'of "nearest" or "bilinear".') - self.interpolation = interpolation - - def call(self, inputs): - return K.resize_images(inputs, self.size[0], self.size[1], - self.data_format, self.interpolation) - - def get_config(self): - config = super(UpSampling2D, self).get_config() - config['interpolation'] = self.interpolation - return config - - -class UpSampling3D(_UpSampling): - """Upsampling layer for 3D inputs. - - Repeats the 1st, 2nd and 3rd dimensions - of the data by size[0], size[1] and size[2] respectively. - - # Arguments - size: int, or tuple of 3 integers. - The upsampling factors for dim1, dim2 and dim3. - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `"channels_first"` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, dim1, dim2, dim3, channels)` - - If `data_format` is `"channels_first"`: - `(batch, channels, dim1, dim2, dim3)` - - # Output shape - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, upsampled_dim1, upsampled_dim2, upsampled_dim3, channels)` - - If `data_format` is `"channels_first"`: - `(batch, channels, upsampled_dim1, upsampled_dim2, upsampled_dim3)` - """ - - @interfaces.legacy_upsampling3d_support - def __init__(self, size=(2, 2, 2), data_format=None, **kwargs): - normalized_size = conv_utils.normalize_tuple(size, 3, 'size') - super(UpSampling3D, self).__init__( - normalized_size, data_format, **kwargs) - - def call(self, inputs): - return K.resize_volumes(inputs, - self.size[0], self.size[1], self.size[2], - self.data_format) - - -class _ZeroPadding(Layer): - """Abstract nD ZeroPadding layer (private, used as implementation base). - - # Arguments - padding: Tuple of tuples of two ints. Can be a tuple of ints when - rank is 1. - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, ..., channels)` while `"channels_first"` corresponds to - inputs with shape `(batch, channels, ...)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - """ - - def __init__(self, padding, data_format=None, **kwargs): - # self.rank is 1 for ZeroPadding1D, 2 for ZeroPadding2D. - self.rank = len(padding) - self.padding = padding - self.data_format = K.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=self.rank + 2) - super(_ZeroPadding, self).__init__(**kwargs) - - def call(self, inputs): - raise NotImplementedError - - def compute_output_shape(self, input_shape): - padding_all_dims = ((0, 0),) + self.padding + ((0, 0),) - spatial_axes = list(range(1, 1 + self.rank)) - padding_all_dims = transpose_shape(padding_all_dims, - self.data_format, - spatial_axes) - output_shape = list(input_shape) - for dim in range(len(output_shape)): - if output_shape[dim] is not None: - output_shape[dim] += sum(padding_all_dims[dim]) - return tuple(output_shape) - - def get_config(self): - config = {'padding': self.padding, - 'data_format': self.data_format} - base_config = super(_ZeroPadding, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class ZeroPadding1D(_ZeroPadding): - """Zero-padding layer for 1D input (e.g. temporal sequence). - - # Arguments - padding: int, or tuple of int (length 2), or dictionary. - - If int: - How many zeros to add at the beginning and end of - the padding dimension (axis 1). - - If tuple of int (length 2): - How many zeros to add at the beginning and at the end of - the padding dimension (`(left_pad, right_pad)`). - - # Input shape - 3D tensor with shape `(batch, axis_to_pad, features)` - - # Output shape - 3D tensor with shape `(batch, padded_axis, features)` - """ - - def __init__(self, padding=1, **kwargs): - normalized_padding = ( - conv_utils.normalize_tuple(padding, 2, 'padding'),) - super(ZeroPadding1D, self).__init__(normalized_padding, - 'channels_last', - **kwargs) - - def call(self, inputs): - return K.temporal_padding(inputs, padding=self.padding[0]) - - def get_config(self): - config = super(ZeroPadding1D, self).get_config() - config['padding'] = config['padding'][0] - config.pop('data_format') - return config - - -class ZeroPadding2D(_ZeroPadding): - """Zero-padding layer for 2D input (e.g. picture). - - This layer can add rows and columns of zeros - at the top, bottom, left and right side of an image tensor. - - # Arguments - padding: int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. - - If int: the same symmetric padding - is applied to height and width. - - If tuple of 2 ints: - interpreted as two different - symmetric padding values for height and width: - `(symmetric_height_pad, symmetric_width_pad)`. - - If tuple of 2 tuples of 2 ints: - interpreted as - `((top_pad, bottom_pad), (left_pad, right_pad))` - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, height, width, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, rows, cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch, channels, rows, cols)` - - # Output shape - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, padded_rows, padded_cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch, channels, padded_rows, padded_cols)` - """ - - @interfaces.legacy_zeropadding2d_support - def __init__(self, - padding=(1, 1), - data_format=None, - **kwargs): - if isinstance(padding, int): - normalized_padding = ((padding, padding), (padding, padding)) - elif hasattr(padding, '__len__'): - if len(padding) != 2: - raise ValueError('`padding` should have two elements. ' - 'Found: ' + str(padding)) - height_padding = conv_utils.normalize_tuple(padding[0], 2, - '1st entry of padding') - width_padding = conv_utils.normalize_tuple(padding[1], 2, - '2nd entry of padding') - normalized_padding = (height_padding, width_padding) - else: - raise ValueError('`padding` should be either an int, ' - 'a tuple of 2 ints ' - '(symmetric_height_pad, symmetric_width_pad), ' - 'or a tuple of 2 tuples of 2 ints ' - '((top_pad, bottom_pad), (left_pad, right_pad)). ' - 'Found: ' + str(padding)) - super(ZeroPadding2D, self).__init__(normalized_padding, - data_format, - **kwargs) - - def call(self, inputs): - return K.spatial_2d_padding(inputs, - padding=self.padding, - data_format=self.data_format) - - -class ZeroPadding3D(_ZeroPadding): - """Zero-padding layer for 3D data (spatial or spatio-temporal). - - # Arguments - padding: int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints. - - If int: the same symmetric padding - is applied to height and width. - - If tuple of 3 ints: - interpreted as three different - symmetric padding values for depth, height, and width: - `(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad)`. - - If tuple of 3 tuples of 2 ints: - interpreted as - `((left_dim1_pad, right_dim1_pad), - (left_dim2_pad, right_dim2_pad), - (left_dim3_pad, right_dim3_pad))` - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `"channels_first"` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, first_axis_to_pad, second_axis_to_pad, third_axis_to_pad, - depth)` - - If `data_format` is `"channels_first"`: - `(batch, depth, - first_axis_to_pad, second_axis_to_pad, third_axis_to_pad)` - - # Output shape - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, first_padded_axis, second_padded_axis, third_axis_to_pad, - depth)` - - If `data_format` is `"channels_first"`: - `(batch, depth, - first_padded_axis, second_padded_axis, third_axis_to_pad)` - """ - - @interfaces.legacy_zeropadding3d_support - def __init__(self, padding=(1, 1, 1), data_format=None, **kwargs): - if isinstance(padding, int): - normalized_padding = 3 * ((padding, padding),) - elif hasattr(padding, '__len__'): - if len(padding) != 3: - raise ValueError('`padding` should have 3 elements. ' - 'Found: ' + str(padding)) - dim1_padding = conv_utils.normalize_tuple(padding[0], 2, - '1st entry of padding') - dim2_padding = conv_utils.normalize_tuple(padding[1], 2, - '2nd entry of padding') - dim3_padding = conv_utils.normalize_tuple(padding[2], 2, - '3rd entry of padding') - normalized_padding = (dim1_padding, dim2_padding, dim3_padding) - else: - raise ValueError( - '`padding` should be either an int, a tuple of 3 ints ' - '(symmetric_dim1_pad, symmetric_dim2_pad, symmetric_dim3_pad), ' - 'or a tuple of 3 tuples of 2 ints ' - '((left_dim1_pad, right_dim1_pad),' - ' (left_dim2_pad, right_dim2_pad),' - ' (left_dim3_pad, right_dim2_pad)). ' - 'Found: ' + str(padding)) - super(ZeroPadding3D, self).__init__(normalized_padding, - data_format, - **kwargs) - - def call(self, inputs): - return K.spatial_3d_padding(inputs, - padding=self.padding, - data_format=self.data_format) - - -class _Cropping(Layer): - """Abstract nD copping layer (private, used as implementation base). - - # Arguments - cropping: A tuple of tuples of 2 ints. - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, ..., channels)` while `"channels_first"` corresponds to - inputs with shape `(batch, channels, ...)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - For Cropping1D, the data format is always `"channels_last"`. - """ - - def __init__(self, cropping, - data_format=None, - **kwargs): - super(_Cropping, self).__init__(**kwargs) - # self.rank is 1 for Cropping1D, 2 for Cropping2D... - self.rank = len(cropping) - self.cropping = cropping - self.data_format = K.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=2 + self.rank) - - def call(self, inputs): - slices_dims = [] - for start, end in self.cropping: - if end == 0: - end = None - else: - end = -end - slices_dims.append(slice(start, end)) - - slices = [slice(None)] + slices_dims + [slice(None)] - slices = tuple(slices) - spatial_axes = list(range(1, 1 + self.rank)) - slices = transpose_shape(slices, self.data_format, spatial_axes) - return inputs[slices] - - def compute_output_shape(self, input_shape): - cropping_all_dims = ((0, 0),) + self.cropping + ((0, 0),) - spatial_axes = list(range(1, 1 + self.rank)) - cropping_all_dims = transpose_shape(cropping_all_dims, - self.data_format, - spatial_axes) - output_shape = list(input_shape) - for dim in range(len(output_shape)): - if output_shape[dim] is not None: - output_shape[dim] -= sum(cropping_all_dims[dim]) - return tuple(output_shape) - - def get_config(self): - config = {'cropping': self.cropping, - 'data_format': self.data_format} - base_config = super(_Cropping, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Cropping1D(_Cropping): - """Cropping layer for 1D input (e.g. temporal sequence). - - It crops along the time dimension (axis 1). - - # Arguments - cropping: int or tuple of int (length 2) - How many units should be trimmed off at the beginning and end of - the cropping dimension (axis 1). - If a single int is provided, - the same value will be used for both. - - # Input shape - 3D tensor with shape `(batch, axis_to_crop, features)` - - # Output shape - 3D tensor with shape `(batch, cropped_axis, features)` - """ - - def __init__(self, cropping=(1, 1), **kwargs): - normalized_cropping = ( - conv_utils.normalize_tuple(cropping, 2, 'cropping'),) - super(Cropping1D, self).__init__(normalized_cropping, - 'channels_last', - **kwargs) - - def get_config(self): - base_config = super(Cropping1D, self).get_config() - base_config.pop('data_format') - base_config['cropping'] = base_config['cropping'][0] - return base_config - - -class Cropping2D(_Cropping): - """Cropping layer for 2D input (e.g. picture). - - It crops along spatial dimensions, i.e. height and width. - - # Arguments - cropping: int, or tuple of 2 ints, or tuple of 2 tuples of 2 ints. - - If int: the same symmetric cropping - is applied to height and width. - - If tuple of 2 ints: - interpreted as two different - symmetric cropping values for height and width: - `(symmetric_height_crop, symmetric_width_crop)`. - - If tuple of 2 tuples of 2 ints: - interpreted as - `((top_crop, bottom_crop), (left_crop, right_crop))` - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, height, width, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, rows, cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch, channels, rows, cols)` - - # Output shape - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, cropped_rows, cropped_cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch, channels, cropped_rows, cropped_cols)` - - # Examples - - ```python - # Crop the input 2D images or feature maps - model = Sequential() - model.add(Cropping2D(cropping=((2, 2), (4, 4)), - input_shape=(28, 28, 3))) - # now model.output_shape == (None, 24, 20, 3) - model.add(Conv2D(64, (3, 3), padding='same')) - model.add(Cropping2D(cropping=((2, 2), (2, 2)))) - # now model.output_shape == (None, 20, 16, 64) - ``` - """ - - @interfaces.legacy_cropping2d_support - def __init__(self, cropping=((0, 0), (0, 0)), - data_format=None, **kwargs): - if isinstance(cropping, int): - normalized_cropping = ((cropping, cropping), (cropping, cropping)) - elif hasattr(cropping, '__len__'): - if len(cropping) != 2: - raise ValueError('`cropping` should have two elements. ' - 'Found: ' + str(cropping)) - height_cropping = conv_utils.normalize_tuple( - cropping[0], 2, - '1st entry of cropping') - width_cropping = conv_utils.normalize_tuple( - cropping[1], 2, - '2nd entry of cropping') - normalized_cropping = (height_cropping, width_cropping) - else: - raise ValueError('`cropping` should be either an int, ' - 'a tuple of 2 ints ' - '(symmetric_height_crop, symmetric_width_crop), ' - 'or a tuple of 2 tuples of 2 ints ' - '((top_crop, bottom_crop), (left_crop, right_crop)). ' - 'Found: ' + str(cropping)) - super(Cropping2D, self).__init__(normalized_cropping, - data_format, - **kwargs) - - -class Cropping3D(_Cropping): - """Cropping layer for 3D data (e.g. spatial or spatio-temporal). - - # Arguments - cropping: int, or tuple of 3 ints, or tuple of 3 tuples of 2 ints. - - If int: the same symmetric cropping - is applied to depth, height, and width. - - If tuple of 3 ints: - interpreted as three different - symmetric cropping values for depth, height, and width: - `(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop)`. - - If tuple of 3 tuples of 2 ints: - interpreted as - `((left_dim1_crop, right_dim1_crop), - (left_dim2_crop, right_dim2_crop), - (left_dim3_crop, right_dim3_crop))` - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `"channels_first"` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, first_axis_to_crop, second_axis_to_crop, third_axis_to_crop, - depth)` - - If `data_format` is `"channels_first"`: - `(batch, depth, - first_axis_to_crop, second_axis_to_crop, third_axis_to_crop)` - - # Output shape - 5D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, first_cropped_axis, second_cropped_axis, third_cropped_axis, - depth)` - - If `data_format` is `"channels_first"`: - `(batch, depth, - first_cropped_axis, second_cropped_axis, third_cropped_axis)` - """ - - @interfaces.legacy_cropping3d_support - def __init__(self, cropping=((1, 1), (1, 1), (1, 1)), - data_format=None, **kwargs): - self.data_format = K.normalize_data_format(data_format) - if isinstance(cropping, int): - normalized_cropping = ((cropping, cropping), - (cropping, cropping), - (cropping, cropping)) - elif hasattr(cropping, '__len__'): - if len(cropping) != 3: - raise ValueError('`cropping` should have 3 elements. ' - 'Found: ' + str(cropping)) - dim1_cropping = conv_utils.normalize_tuple(cropping[0], 2, - '1st entry of cropping') - dim2_cropping = conv_utils.normalize_tuple(cropping[1], 2, - '2nd entry of cropping') - dim3_cropping = conv_utils.normalize_tuple(cropping[2], 2, - '3rd entry of cropping') - normalized_cropping = (dim1_cropping, dim2_cropping, dim3_cropping) - else: - raise ValueError( - '`cropping` should be either an int, a tuple of 3 ints ' - '(symmetric_dim1_crop, symmetric_dim2_crop, symmetric_dim3_crop), ' - 'or a tuple of 3 tuples of 2 ints ' - '((left_dim1_crop, right_dim1_crop),' - ' (left_dim2_crop, right_dim2_crop),' - ' (left_dim3_crop, right_dim2_crop)). ' - 'Found: ' + str(cropping)) - super(Cropping3D, self).__init__(normalized_cropping, - data_format, - **kwargs) - - -# Aliases - -Convolution1D = Conv1D -Convolution2D = Conv2D -Convolution3D = Conv3D -SeparableConvolution1D = SeparableConv1D -SeparableConvolution2D = SeparableConv2D -Convolution2DTranspose = Conv2DTranspose -Deconvolution2D = Deconv2D = Conv2DTranspose -Deconvolution3D = Deconv3D = Conv3DTranspose - -# Legacy aliases -AtrousConv1D = AtrousConvolution1D -AtrousConv2D = AtrousConvolution2D -# -*- coding: utf-8 -*- -"""Convolutional-recurrent layers. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .. import backend as K -from .. import activations -from .. import initializers -from .. import regularizers -from .. import constraints -from .recurrent import _generate_dropout_mask -from .recurrent import _standardize_args - -import numpy as np -import warnings -from ..engine.base_layer import InputSpec, Layer -from ..utils import conv_utils -from ..legacy import interfaces -from ..legacy.layers import Recurrent, ConvRecurrent2D -from .recurrent import RNN -from ..utils.generic_utils import has_arg -from ..utils.generic_utils import to_list -from ..utils.generic_utils import transpose_shape - - -class ConvRNN2D(RNN): - """Base class for convolutional-recurrent layers. - - # Arguments - cell: A RNN cell instance. A RNN cell is a class that has: - - a `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. The call method of the - cell can also take the optional argument `constants`, see - section "Note on passing external constants" below. - - a `state_size` attribute. This can be a single integer (single state) - in which case it is the number of channels of the recurrent state - (which should be the same as the number of channels of the cell - output). This can also be a list/tuple of integers - (one size per state). In this case, the first entry (`state_size[0]`) - should be the same as the size of the cell output. - return_sequences: Boolean. Whether to return the last output. - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - input_shape: Use this argument to specify the shape of the - input when this layer is the first one in a model. - - # Input shape - 5D tensor with shape: - `(samples, timesteps, channels, rows, cols)` if data_format='channels_first' - or 5D tensor with shape: - `(samples, timesteps, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - - if `return_state`: a list of tensors. The first tensor is - the output. The remaining tensors are the last states, - each 5D tensor with shape: - `(samples, timesteps, - filters, new_rows, new_cols)` if data_format='channels_first' - or 5D tensor with shape: - `(samples, timesteps, - new_rows, new_cols, filters)` if data_format='channels_last'. - `rows` and `cols` values might have changed due to padding. - - if `return_sequences`: 5D tensor with shape: - `(samples, timesteps, - filters, new_rows, new_cols)` if data_format='channels_first' - or 5D tensor with shape: - `(samples, timesteps, - new_rows, new_cols, filters)` if data_format='channels_last'. - - else, 4D tensor with shape: - `(samples, filters, new_rows, new_cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. - - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an [Embedding](embeddings.md) layer with the `mask_zero` parameter - set to `True`. - - # Note on using statefulness in RNNs - You can set RNN layers to be 'stateful', which means that the states - computed for the samples in one batch will be reused as initial states - for the samples in the next batch. This assumes a one-to-one mapping - between samples in different successive batches. - - To enable statefulness: - - specify `stateful=True` in the layer constructor. - - specify a fixed batch size for your model, by passing - - if sequential model: - `batch_input_shape=(...)` to the first layer in your model. - - if functional model with 1 or more Input layers: - `batch_shape=(...)` to all the first layers in your model. - This is the expected shape of your inputs - *including the batch size*. - It should be a tuple of integers, e.g. `(32, 10, 100, 100, 32)`. - Note that the number of rows and columns should be specified too. - - specify `shuffle=False` when calling fit(). - - To reset the states of your model, call `.reset_states()` on either - a specific layer, or on your entire model. - - # Note on specifying the initial state of RNNs - You can specify the initial state of RNN layers symbolically by - calling them with the keyword argument `initial_state`. The value of - `initial_state` should be a tensor or list of tensors representing - the initial state of the RNN layer. - - You can specify the initial state of RNN layers numerically by - calling `reset_states` with the keyword argument `states`. The value of - `states` should be a numpy array or list of numpy arrays representing - the initial state of the RNN layer. - - # Note on passing external constants to RNNs - You can pass "external" constants to the cell using the `constants` - keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This - requires that the `cell.call` method accepts the same keyword argument - `constants`. Such constants can be used to condition the cell - transformation on additional static inputs (not changing over time), - a.k.a. an attention mechanism. - """ - - def __init__(self, cell, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - **kwargs): - if unroll: - raise TypeError('Unrolling isn\'t possible with ' - 'convolutional RNNs.') - if isinstance(cell, (list, tuple)): - # The StackedConvRNN2DCells isn't implemented yet. - raise TypeError('It is not possible at the moment to' - 'stack convolutional cells.') - super(ConvRNN2D, self).__init__(cell, - return_sequences, - return_state, - go_backwards, - stateful, - unroll, - **kwargs) - self.input_spec = [InputSpec(ndim=5)] - - def compute_output_shape(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - - cell = self.cell - if cell.data_format == 'channels_first': - rows = input_shape[3] - cols = input_shape[4] - elif cell.data_format == 'channels_last': - rows = input_shape[2] - cols = input_shape[3] - rows = conv_utils.conv_output_length(rows, - cell.kernel_size[0], - padding=cell.padding, - stride=cell.strides[0], - dilation=cell.dilation_rate[0]) - cols = conv_utils.conv_output_length(cols, - cell.kernel_size[1], - padding=cell.padding, - stride=cell.strides[1], - dilation=cell.dilation_rate[1]) - - output_shape = input_shape[:2] + (rows, cols, cell.filters) - output_shape = transpose_shape(output_shape, cell.data_format, - spatial_axes=(2, 3)) - - if not self.return_sequences: - output_shape = output_shape[:1] + output_shape[2:] - - if self.return_state: - output_shape = [output_shape] - base = (input_shape[0], rows, cols, cell.filters) - base = transpose_shape(base, cell.data_format, spatial_axes=(1, 2)) - output_shape += [base[:] for _ in range(2)] - return output_shape - - def build(self, input_shape): - # Note input_shape will be list of shapes of initial states and - # constants if these are passed in __call__. - if self._num_constants is not None: - constants_shape = input_shape[-self._num_constants:] - else: - constants_shape = None - - if isinstance(input_shape, list): - input_shape = input_shape[0] - - batch_size = input_shape[0] if self.stateful else None - self.input_spec[0] = InputSpec( - shape=(batch_size, None) + input_shape[2:5]) - - # allow cell (if layer) to build before we set or validate state_spec - if isinstance(self.cell, Layer): - step_input_shape = (input_shape[0],) + input_shape[2:] - if constants_shape is not None: - self.cell.build([step_input_shape] + constants_shape) - else: - self.cell.build(step_input_shape) - - # set or validate state_spec - if hasattr(self.cell.state_size, '__len__'): - state_size = list(self.cell.state_size) - else: - state_size = [self.cell.state_size] - - if self.state_spec is not None: - # initial_state was passed in call, check compatibility - if self.cell.data_format == 'channels_first': - ch_dim = 1 - elif self.cell.data_format == 'channels_last': - ch_dim = 3 - if not [spec.shape[ch_dim] for spec in self.state_spec] == state_size: - raise ValueError( - 'An initial_state was passed that is not compatible with ' - '`cell.state_size`. Received `state_spec`={}; ' - 'However `cell.state_size` is ' - '{}'.format([spec.shape for spec in self.state_spec], - self.cell.state_size)) - else: - if self.cell.data_format == 'channels_first': - self.state_spec = [InputSpec(shape=(None, dim, None, None)) - for dim in state_size] - elif self.cell.data_format == 'channels_last': - self.state_spec = [InputSpec(shape=(None, None, None, dim)) - for dim in state_size] - if self.stateful: - self.reset_states() - self.built = True - - def get_initial_state(self, inputs): - # (samples, timesteps, rows, cols, filters) - initial_state = K.zeros_like(inputs) - # (samples, rows, cols, filters) - initial_state = K.sum(initial_state, axis=1) - shape = list(self.cell.kernel_shape) - shape[-1] = self.cell.filters - initial_state = self.cell.input_conv(initial_state, - K.zeros(tuple(shape)), - padding=self.cell.padding) - # Fix for Theano because it needs - # K.int_shape to work in call() with initial_state. - keras_shape = list(K.int_shape(inputs)) - keras_shape.pop(1) - if K.image_data_format() == 'channels_first': - indices = 2, 3 - else: - indices = 1, 2 - for i, j in enumerate(indices): - keras_shape[j] = conv_utils.conv_output_length( - keras_shape[j], - shape[i], - padding=self.cell.padding, - stride=self.cell.strides[i], - dilation=self.cell.dilation_rate[i]) - initial_state._keras_shape = keras_shape - - if hasattr(self.cell.state_size, '__len__'): - return [initial_state for _ in self.cell.state_size] - else: - return [initial_state] - - def __call__(self, inputs, initial_state=None, constants=None, **kwargs): - inputs, initial_state, constants = _standardize_args( - inputs, initial_state, constants, self._num_constants) - - if initial_state is None and constants is None: - return super(ConvRNN2D, self).__call__(inputs, **kwargs) - - # If any of `initial_state` or `constants` are specified and are Keras - # tensors, then add them to the inputs and temporarily modify the - # input_spec to include them. - - additional_inputs = [] - additional_specs = [] - if initial_state is not None: - kwargs['initial_state'] = initial_state - additional_inputs += initial_state - self.state_spec = [] - for state in initial_state: - try: - shape = K.int_shape(state) - # Fix for Theano - except TypeError: - shape = tuple(None for _ in range(K.ndim(state))) - self.state_spec.append(InputSpec(shape=shape)) - - additional_specs += self.state_spec - if constants is not None: - kwargs['constants'] = constants - additional_inputs += constants - self.constants_spec = [InputSpec(shape=K.int_shape(constant)) - for constant in constants] - self._num_constants = len(constants) - additional_specs += self.constants_spec - # at this point additional_inputs cannot be empty - for tensor in additional_inputs: - if K.is_keras_tensor(tensor) != K.is_keras_tensor(additional_inputs[0]): - raise ValueError('The initial state or constants of an RNN' - ' layer cannot be specified with a mix of' - ' Keras tensors and non-Keras tensors') - - if K.is_keras_tensor(additional_inputs[0]): - # Compute the full input spec, including state and constants - full_input = [inputs] + additional_inputs - full_input_spec = self.input_spec + additional_specs - # Perform the call with temporarily replaced input_spec - original_input_spec = self.input_spec - self.input_spec = full_input_spec - output = super(ConvRNN2D, self).__call__(full_input, **kwargs) - self.input_spec = original_input_spec - return output - else: - return super(ConvRNN2D, self).__call__(inputs, **kwargs) - - def call(self, - inputs, - mask=None, - training=None, - initial_state=None, - constants=None): - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - if isinstance(inputs, list): - inputs = inputs[0] - if initial_state is not None: - pass - elif self.stateful: - initial_state = self.states - else: - initial_state = self.get_initial_state(inputs) - - if isinstance(mask, list): - mask = mask[0] - - if len(initial_state) != len(self.states): - raise ValueError('Layer has ' + str(len(self.states)) + - ' states but was passed ' + - str(len(initial_state)) + - ' initial states.') - timesteps = K.int_shape(inputs)[1] - - kwargs = {} - if has_arg(self.cell.call, 'training'): - kwargs['training'] = training - - if constants: - if not has_arg(self.cell.call, 'constants'): - raise ValueError('RNN cell does not support constants') - - def step(inputs, states): - constants = states[-self._num_constants:] - states = states[:-self._num_constants] - return self.cell.call(inputs, states, constants=constants, - **kwargs) - else: - def step(inputs, states): - return self.cell.call(inputs, states, **kwargs) - - last_output, outputs, states = K.rnn(step, - inputs, - initial_state, - constants=constants, - go_backwards=self.go_backwards, - mask=mask, - input_length=timesteps) - if self.stateful: - updates = [] - for i in range(len(states)): - updates.append((self.states[i], states[i])) - self.add_update(updates, inputs) - - if self.return_sequences: - output = outputs - else: - output = last_output - - # Properly set learning phase - if getattr(last_output, '_uses_learning_phase', False): - output._uses_learning_phase = True - - if self.return_state: - states = to_list(states, allow_tuple=True) - return [output] + states - else: - return output - - def reset_states(self, states=None): - if not self.stateful: - raise AttributeError('Layer must be stateful.') - input_shape = self.input_spec[0].shape - state_shape = self.compute_output_shape(input_shape) - if self.return_state: - state_shape = state_shape[0] - if self.return_sequences: - state_shape = state_shape[:1] + state_shape[2:] - if None in state_shape: - raise ValueError('If a RNN is stateful, it needs to know ' - 'its batch size. Specify the batch size ' - 'of your input tensors: \n' - '- If using a Sequential model, ' - 'specify the batch size by passing ' - 'a `batch_input_shape` ' - 'argument to your first layer.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a ' - '`batch_shape` argument to your Input layer.\n' - 'The same thing goes for the number of rows ' - 'and columns.') - - # helper function - def get_tuple_shape(nb_channels): - result = list(state_shape) - if self.cell.data_format == 'channels_first': - result[1] = nb_channels - elif self.cell.data_format == 'channels_last': - result[3] = nb_channels - else: - raise KeyError - return tuple(result) - - # initialize state if None - if self.states[0] is None: - if hasattr(self.cell.state_size, '__len__'): - self.states = [K.zeros(get_tuple_shape(dim)) - for dim in self.cell.state_size] - else: - self.states = [K.zeros(get_tuple_shape(self.cell.state_size))] - elif states is None: - if hasattr(self.cell.state_size, '__len__'): - for state, dim in zip(self.states, self.cell.state_size): - K.set_value(state, np.zeros(get_tuple_shape(dim))) - else: - K.set_value(self.states[0], - np.zeros(get_tuple_shape(self.cell.state_size))) - else: - states = to_list(states, allow_tuple=True) - if len(states) != len(self.states): - raise ValueError('Layer ' + self.name + ' expects ' + - str(len(self.states)) + ' states, ' - 'but it received ' + str(len(states)) + - ' state values. Input received: ' + - str(states)) - for index, (value, state) in enumerate(zip(states, self.states)): - if hasattr(self.cell.state_size, '__len__'): - dim = self.cell.state_size[index] - else: - dim = self.cell.state_size - if value.shape != get_tuple_shape(dim): - raise ValueError('State ' + str(index) + - ' is incompatible with layer ' + - self.name + ': expected shape=' + - str(get_tuple_shape(dim)) + - ', found shape=' + str(value.shape)) - # TODO: consider batch calls to `set_value`. - K.set_value(state, value) - - -class ConvLSTM2DCell(Layer): - """Cell class for the ConvLSTM2D layer. - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - dimensions of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the strides of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `"channels_last"` (default) or `"channels_first"`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be `"channels_last"`. - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step - (see [activations](../activations.md)). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - unit_forget_bias: Boolean. - If True, add 1 to the bias of the forget gate at initialization. - Use in combination with `bias_initializer="zeros"`. - This is recommended in [Jozefowicz et al. (2015)]( - http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - """ - - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1), - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - **kwargs): - super(ConvLSTM2DCell, self).__init__(**kwargs) - self.filters = filters - self.kernel_size = conv_utils.normalize_tuple( - kernel_size, 2, 'kernel_size') - self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') - self.padding = conv_utils.normalize_padding(padding) - self.data_format = K.normalize_data_format(data_format) - self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, 2, - 'dilation_rate') - self.activation = activations.get(activation) - self.recurrent_activation = activations.get(recurrent_activation) - self.use_bias = use_bias - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.unit_forget_bias = unit_forget_bias - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - if K.backend() == 'theano' and (dropout or recurrent_dropout): - warnings.warn( - 'RNN dropout is no longer supported with the Theano backend ' - 'due to technical limitations. ' - 'You can either set `dropout` and `recurrent_dropout` to 0, ' - 'or use the TensorFlow backend.') - dropout = 0. - recurrent_dropout = 0. - self.dropout = min(1., max(0., dropout)) - self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_size = (self.filters, self.filters) - self._dropout_mask = None - self._recurrent_dropout_mask = None - - def build(self, input_shape): - - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = -1 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs ' - 'should be defined. Found `None`.') - input_dim = input_shape[channel_axis] - kernel_shape = self.kernel_size + (input_dim, self.filters * 4) - self.kernel_shape = kernel_shape - recurrent_kernel_shape = self.kernel_size + \ - (self.filters, self.filters * 4) - - self.kernel = self.add_weight(shape=kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.recurrent_kernel = self.add_weight( - shape=recurrent_kernel_shape, - initializer=self.recurrent_initializer, - name='recurrent_kernel', - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - if self.use_bias: - if self.unit_forget_bias: - def bias_initializer(_, *args, **kwargs): - return K.concatenate([ - self.bias_initializer( - (self.filters,), *args, **kwargs), - initializers.Ones()((self.filters,), *args, **kwargs), - self.bias_initializer( - (self.filters * 2,), *args, **kwargs), - ]) - else: - bias_initializer = self.bias_initializer - self.bias = self.add_weight(shape=(self.filters * 4,), - name='bias', - initializer=bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - - self.kernel_i = self.kernel[:, :, :, :self.filters] - self.recurrent_kernel_i = self.recurrent_kernel[:, :, :, :self.filters] - self.kernel_f = self.kernel[:, :, :, self.filters: self.filters * 2] - self.recurrent_kernel_f = ( - self.recurrent_kernel[:, :, :, self.filters: self.filters * 2]) - self.kernel_c = self.kernel[:, :, :, - self.filters * 2: self.filters * 3] - self.recurrent_kernel_c = ( - self.recurrent_kernel[:, :, :, self.filters * 2: self.filters * 3]) - self.kernel_o = self.kernel[:, :, :, self.filters * 3:] - self.recurrent_kernel_o = self.recurrent_kernel[:, - :, :, self.filters * 3:] - - if self.use_bias: - self.bias_i = self.bias[:self.filters] - self.bias_f = self.bias[self.filters: self.filters * 2] - self.bias_c = self.bias[self.filters * 2: self.filters * 3] - self.bias_o = self.bias[self.filters * 3:] - else: - self.bias_i = None - self.bias_f = None - self.bias_c = None - self.bias_o = None - self.built = True - - def call(self, inputs, states, training=None): - if 0 < self.dropout < 1 and self._dropout_mask is None: - self._dropout_mask = _generate_dropout_mask( - K.ones_like(inputs), - self.dropout, - training=training, - count=4) - if (0 < self.recurrent_dropout < 1 and - self._recurrent_dropout_mask is None): - self._recurrent_dropout_mask = _generate_dropout_mask( - K.ones_like(states[1]), - self.recurrent_dropout, - training=training, - count=4) - - # dropout matrices for input units - dp_mask = self._dropout_mask - # dropout matrices for recurrent units - rec_dp_mask = self._recurrent_dropout_mask - - h_tm1 = states[0] # previous memory state - c_tm1 = states[1] # previous carry state - - if 0 < self.dropout < 1.: - inputs_i = inputs * dp_mask[0] - inputs_f = inputs * dp_mask[1] - inputs_c = inputs * dp_mask[2] - inputs_o = inputs * dp_mask[3] - else: - inputs_i = inputs - inputs_f = inputs - inputs_c = inputs - inputs_o = inputs - - if 0 < self.recurrent_dropout < 1.: - h_tm1_i = h_tm1 * rec_dp_mask[0] - h_tm1_f = h_tm1 * rec_dp_mask[1] - h_tm1_c = h_tm1 * rec_dp_mask[2] - h_tm1_o = h_tm1 * rec_dp_mask[3] - else: - h_tm1_i = h_tm1 - h_tm1_f = h_tm1 - h_tm1_c = h_tm1 - h_tm1_o = h_tm1 - - x_i = self.input_conv(inputs_i, self.kernel_i, self.bias_i, - padding=self.padding) - x_f = self.input_conv(inputs_f, self.kernel_f, self.bias_f, - padding=self.padding) - x_c = self.input_conv(inputs_c, self.kernel_c, self.bias_c, - padding=self.padding) - x_o = self.input_conv(inputs_o, self.kernel_o, self.bias_o, - padding=self.padding) - h_i = self.recurrent_conv(h_tm1_i, - self.recurrent_kernel_i) - h_f = self.recurrent_conv(h_tm1_f, - self.recurrent_kernel_f) - h_c = self.recurrent_conv(h_tm1_c, - self.recurrent_kernel_c) - h_o = self.recurrent_conv(h_tm1_o, - self.recurrent_kernel_o) - - i = self.recurrent_activation(x_i + h_i) - f = self.recurrent_activation(x_f + h_f) - c = f * c_tm1 + i * self.activation(x_c + h_c) - o = self.recurrent_activation(x_o + h_o) - h = o * self.activation(c) - - if 0 < self.dropout + self.recurrent_dropout: - if training is None: - h._uses_learning_phase = True - - return h, [h, c] - - def input_conv(self, x, w, b=None, padding='valid'): - conv_out = K.conv2d(x, w, strides=self.strides, - padding=padding, - data_format=self.data_format, - dilation_rate=self.dilation_rate) - if b is not None: - conv_out = K.bias_add(conv_out, b, - data_format=self.data_format) - return conv_out - - def recurrent_conv(self, x, w): - conv_out = K.conv2d(x, w, strides=(1, 1), - padding='same', - data_format=self.data_format) - return conv_out - - def get_config(self): - config = {'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'dilation_rate': self.dilation_rate, - 'activation': activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'unit_forget_bias': self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout} - base_config = super(ConvLSTM2DCell, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class ConvLSTM2D(ConvRNN2D): - """Convolutional LSTM. - - It is similar to an LSTM layer, but the input transformations - and recurrent transformations are both convolutional. - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - dimensions of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the strides of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `"channels_last"` (default) or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, time, ..., channels)` - while `"channels_first"` corresponds to - inputs with shape `(batch, time, channels, ...)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be `"channels_last"`. - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step - (see [activations](../activations.md)). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - unit_forget_bias: Boolean. - If True, add 1 to the bias of the forget gate at initialization. - Use in combination with `bias_initializer="zeros"`. - This is recommended in [Jozefowicz et al. (2015)]( - http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - go_backwards: Boolean (default False). - If True, process the input sequence backwards. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - - # Input shape - - if data_format='channels_first' - 5D tensor with shape: - `(samples, time, channels, rows, cols)` - - if data_format='channels_last' - 5D tensor with shape: - `(samples, time, rows, cols, channels)` - - # Output shape - - if `return_sequences` - - if data_format='channels_first' - 5D tensor with shape: - `(samples, time, filters, output_row, output_col)` - - if data_format='channels_last' - 5D tensor with shape: - `(samples, time, output_row, output_col, filters)` - - else - - if data_format='channels_first' - 4D tensor with shape: - `(samples, filters, output_row, output_col)` - - if data_format='channels_last' - 4D tensor with shape: - `(samples, output_row, output_col, filters)` - where o_row and o_col depend on the shape of the filter and - the padding - - # Raises - ValueError: in case of invalid constructor arguments. - - # References - - [Convolutional LSTM Network: A Machine Learning Approach for - Precipitation Nowcasting](http://arxiv.org/abs/1506.04214v1) - The current implementation does not include the feedback loop on the - cells output - """ - - @interfaces.legacy_convlstm2d_support - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1), - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - return_sequences=False, - go_backwards=False, - stateful=False, - dropout=0., - recurrent_dropout=0., - **kwargs): - cell = ConvLSTM2DCell(filters=filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - dilation_rate=dilation_rate, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - unit_forget_bias=unit_forget_bias, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout) - super(ConvLSTM2D, self).__init__(cell, - return_sequences=return_sequences, - go_backwards=go_backwards, - stateful=stateful, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - - def call(self, inputs, mask=None, training=None, initial_state=None): - return super(ConvLSTM2D, self).call(inputs, - mask=mask, - training=training, - initial_state=initial_state) - - @property - def filters(self): - return self.cell.filters - - @property - def kernel_size(self): - return self.cell.kernel_size - - @property - def strides(self): - return self.cell.strides - - @property - def padding(self): - return self.cell.padding - - @property - def data_format(self): - return self.cell.data_format - - @property - def dilation_rate(self): - return self.cell.dilation_rate - - @property - def activation(self): - return self.cell.activation - - @property - def recurrent_activation(self): - return self.cell.recurrent_activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def unit_forget_bias(self): - return self.cell.unit_forget_bias - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - def get_config(self): - config = {'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'dilation_rate': self.dilation_rate, - 'activation': activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'unit_forget_bias': self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': - constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout} - base_config = super(ConvLSTM2D, self).get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - return cls(**config) -# -*- coding: utf-8 -*- -"""Core Keras layers. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -import copy -import types as python_types -import warnings - -from .. import backend as K -from .. import activations -from .. import initializers -from .. import regularizers -from .. import constraints -from ..engine.base_layer import InputSpec -from ..engine.base_layer import Layer -from ..utils.generic_utils import func_dump -from ..utils.generic_utils import func_load -from ..utils.generic_utils import deserialize_keras_object -from ..utils.generic_utils import has_arg -from ..legacy import interfaces - - -class Masking(Layer): - """Masks a sequence by using a mask value to skip timesteps. - - If all features for a given sample timestep are equal to `mask_value`, - then the sample timestep will be masked (skipped) in all downstream layers - (as long as they support masking). - - If any downstream layer does not support masking yet receives such - an input mask, an exception will be raised. - - # Example - - Consider a Numpy data array `x` of shape `(samples, timesteps, features)`, - to be fed to an LSTM layer. - You want to mask sample #0 at timestep #3, and sample #2 at timestep #5, - because you lack features for these sample timesteps. You can do: - - - set `x[0, 3, :] = 0.` and `x[2, 5, :] = 0.` - - insert a `Masking` layer with `mask_value=0.` before the LSTM layer: - - ```python - model = Sequential() - model.add(Masking(mask_value=0., input_shape=(timesteps, features))) - model.add(LSTM(32)) - ``` - - # Arguments - mask_value: Either None or mask value to skip - """ - - def __init__(self, mask_value=0., **kwargs): - super(Masking, self).__init__(**kwargs) - self.supports_masking = True - self.mask_value = mask_value - - def compute_mask(self, inputs, mask=None): - output_mask = K.any(K.not_equal(inputs, self.mask_value), axis=-1) - return output_mask - - def call(self, inputs): - boolean_mask = K.any(K.not_equal(inputs, self.mask_value), - axis=-1, keepdims=True) - return inputs * K.cast(boolean_mask, K.dtype(inputs)) - - def get_config(self): - config = {'mask_value': self.mask_value} - base_config = super(Masking, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - - -class Dropout(Layer): - """Applies Dropout to the input. - - Dropout consists in randomly setting - a fraction `rate` of input units to 0 at each update during training time, - which helps prevent overfitting. - - # Arguments - rate: float between 0 and 1. Fraction of the input units to drop. - noise_shape: 1D integer tensor representing the shape of the - binary dropout mask that will be multiplied with the input. - For instance, if your inputs have shape - `(batch_size, timesteps, features)` and - you want the dropout mask to be the same for all timesteps, - you can use `noise_shape=(batch_size, 1, features)`. - seed: A Python integer to use as random seed. - - # References - - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting]( - http://www.jmlr.org/papers/volume15/srivastava14a/srivastava14a.pdf) - """ - @interfaces.legacy_dropout_support - def __init__(self, rate, noise_shape=None, seed=None, **kwargs): - super(Dropout, self).__init__(**kwargs) - self.rate = min(1., max(0., rate)) - self.noise_shape = noise_shape - self.seed = seed - self.supports_masking = True - - def _get_noise_shape(self, inputs): - if self.noise_shape is None: - return self.noise_shape - - symbolic_shape = K.shape(inputs) - noise_shape = [symbolic_shape[axis] if shape is None else shape - for axis, shape in enumerate(self.noise_shape)] - return tuple(noise_shape) - - def call(self, inputs, training=None): - if 0. < self.rate < 1.: - noise_shape = self._get_noise_shape(inputs) - - def dropped_inputs(): - return K.dropout(inputs, self.rate, noise_shape, - seed=self.seed) - return K.in_train_phase(dropped_inputs, inputs, - training=training) - return inputs - - def get_config(self): - config = {'rate': self.rate, - 'noise_shape': self.noise_shape, - 'seed': self.seed} - base_config = super(Dropout, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - - -class SpatialDropout1D(Dropout): - """Spatial 1D version of Dropout. - - This version performs the same function as Dropout, however it drops - entire 1D feature maps instead of individual elements. If adjacent frames - within feature maps are strongly correlated (as is normally the case in - early convolution layers) then regular dropout will not regularize the - activations and will otherwise just result in an effective learning rate - decrease. In this case, SpatialDropout1D will help promote independence - between feature maps and should be used instead. - - # Arguments - rate: float between 0 and 1. Fraction of the input units to drop. - - # Input shape - 3D tensor with shape: - `(samples, timesteps, channels)` - - # Output shape - Same as input - - # References - - [Efficient Object Localization Using Convolutional Networks]( - https://arxiv.org/abs/1411.4280) - """ - - @interfaces.legacy_spatialdropout1d_support - def __init__(self, rate, **kwargs): - super(SpatialDropout1D, self).__init__(rate, **kwargs) - self.input_spec = InputSpec(ndim=3) - - def _get_noise_shape(self, inputs): - input_shape = K.shape(inputs) - noise_shape = (input_shape[0], 1, input_shape[2]) - return noise_shape - - -class SpatialDropout2D(Dropout): - """Spatial 2D version of Dropout. - - This version performs the same function as Dropout, however it drops - entire 2D feature maps instead of individual elements. If adjacent pixels - within feature maps are strongly correlated (as is normally the case in - early convolution layers) then regular dropout will not regularize the - activations and will otherwise just result in an effective learning rate - decrease. In this case, SpatialDropout2D will help promote independence - between feature maps and should be used instead. - - # Arguments - rate: float between 0 and 1. Fraction of the input units to drop. - data_format: 'channels_first' or 'channels_last'. - In 'channels_first' mode, the channels dimension - (the depth) is at index 1, - in 'channels_last' mode is it at index 3. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - 4D tensor with shape: - `(samples, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - Same as input - - # References - - [Efficient Object Localization Using Convolutional Networks]( - https://arxiv.org/abs/1411.4280) - """ - - @interfaces.legacy_spatialdropoutNd_support - def __init__(self, rate, data_format=None, **kwargs): - super(SpatialDropout2D, self).__init__(rate, **kwargs) - self.data_format = K.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=4) - - def _get_noise_shape(self, inputs): - input_shape = K.shape(inputs) - if self.data_format == 'channels_first': - noise_shape = (input_shape[0], input_shape[1], 1, 1) - else: - noise_shape = (input_shape[0], 1, 1, input_shape[3]) - return noise_shape - - -class SpatialDropout3D(Dropout): - """Spatial 3D version of Dropout. - - This version performs the same function as Dropout, however it drops - entire 3D feature maps instead of individual elements. If adjacent voxels - within feature maps are strongly correlated (as is normally the case in - early convolution layers) then regular dropout will not regularize the - activations and will otherwise just result in an effective learning rate - decrease. In this case, SpatialDropout3D will help promote independence - between feature maps and should be used instead. - - # Arguments - rate: float between 0 and 1. Fraction of the input units to drop. - data_format: 'channels_first' or 'channels_last'. - In 'channels_first' mode, the channels dimension (the depth) - is at index 1, in 'channels_last' mode is it at index 4. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - 5D tensor with shape: - `(samples, channels, dim1, dim2, dim3)` if data_format='channels_first' - or 5D tensor with shape: - `(samples, dim1, dim2, dim3, channels)` if data_format='channels_last'. - - # Output shape - Same as input - - # References - - [Efficient Object Localization Using Convolutional Networks]( - https://arxiv.org/abs/1411.4280) - """ - - @interfaces.legacy_spatialdropoutNd_support - def __init__(self, rate, data_format=None, **kwargs): - super(SpatialDropout3D, self).__init__(rate, **kwargs) - self.data_format = K.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=5) - - def _get_noise_shape(self, inputs): - input_shape = K.shape(inputs) - if self.data_format == 'channels_first': - noise_shape = (input_shape[0], input_shape[1], 1, 1, 1) - else: - noise_shape = (input_shape[0], 1, 1, 1, input_shape[4]) - return noise_shape - - -class Activation(Layer): - """Applies an activation function to an output. - - # Arguments - activation: name of activation function to use - (see: [activations](../activations.md)), - or alternatively, a Theano or TensorFlow operation. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as input. - """ - - def __init__(self, activation, **kwargs): - super(Activation, self).__init__(**kwargs) - self.supports_masking = True - self.activation = activations.get(activation) - - def call(self, inputs): - return self.activation(inputs) - - def get_config(self): - config = {'activation': activations.serialize(self.activation)} - base_config = super(Activation, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - - -class Reshape(Layer): - """Reshapes an output to a certain shape. - - # Arguments - target_shape: target shape. Tuple of integers. - Does not include the batch axis. - - # Input shape - Arbitrary, although all dimensions in the input shaped must be fixed. - Use the keyword argument `input_shape` - (tuple of integers, does not include the batch axis) - when using this layer as the first layer in a model. - - # Output shape - `(batch_size,) + target_shape` - - # Example - - ```python - # as first layer in a Sequential model - model = Sequential() - model.add(Reshape((3, 4), input_shape=(12,))) - # now: model.output_shape == (None, 3, 4) - # note: `None` is the batch dimension - - # as intermediate layer in a Sequential model - model.add(Reshape((6, 2))) - # now: model.output_shape == (None, 6, 2) - - # also supports shape inference using `-1` as dimension - model.add(Reshape((-1, 2, 2))) - # now: model.output_shape == (None, 3, 2, 2) - ``` - """ - - def __init__(self, target_shape, **kwargs): - super(Reshape, self).__init__(**kwargs) - self.target_shape = tuple(target_shape) - - def _fix_unknown_dimension(self, input_shape, output_shape): - """Finds and replaces a missing dimension in an output shape. - - This is a near direct port of the internal Numpy function - `_fix_unknown_dimension` in `numpy/core/src/multiarray/shape.c` - - # Arguments - input_shape: original shape of array being reshaped - output_shape: target shape of the array, with at most - a single -1 which indicates a dimension that should be - derived from the input shape. - - # Returns - The new output shape with a `-1` replaced with its computed value. - - # Raises - ValueError: if `input_shape` and `output_shape` do not match. - """ - output_shape = list(output_shape) - msg = 'total size of new array must be unchanged' - - known, unknown = 1, None - for index, dim in enumerate(output_shape): - if dim < 0: - if unknown is None: - unknown = index - else: - raise ValueError('Can only specify one unknown dimension.') - else: - known *= dim - - original = np.prod(input_shape, dtype=int) - if unknown is not None: - if known == 0 or original % known != 0: - raise ValueError(msg) - output_shape[unknown] = original // known - elif original != known: - raise ValueError(msg) - - return tuple(output_shape) - - def compute_output_shape(self, input_shape): - if None in input_shape[1:]: - # input shape (partially) unknown? replace -1's with None's - return ((input_shape[0],) + - tuple(s if s != -1 else None for s in self.target_shape)) - else: - # input shape known? then we can compute the output shape - return (input_shape[0],) + self._fix_unknown_dimension( - input_shape[1:], self.target_shape) - - def call(self, inputs): - return K.reshape(inputs, (K.shape(inputs)[0],) + self.target_shape) - - def get_config(self): - config = {'target_shape': self.target_shape} - base_config = super(Reshape, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Permute(Layer): - """Permutes the dimensions of the input according to a given pattern. - - Useful for e.g. connecting RNNs and convnets together. - - # Example - - ```python - model = Sequential() - model.add(Permute((2, 1), input_shape=(10, 64))) - # now: model.output_shape == (None, 64, 10) - # note: `None` is the batch dimension - ``` - - # Arguments - dims: Tuple of integers. Permutation pattern, does not include the - samples dimension. Indexing starts at 1. - For instance, `(2, 1)` permutes the first and second dimension - of the input. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same as the input shape, but with the dimensions re-ordered according - to the specified pattern. - """ - - def __init__(self, dims, **kwargs): - super(Permute, self).__init__(**kwargs) - self.dims = tuple(dims) - self.input_spec = InputSpec(ndim=len(self.dims) + 1) - - def compute_output_shape(self, input_shape): - input_shape = list(input_shape) - output_shape = copy.copy(input_shape) - for i, dim in enumerate(self.dims): - target_dim = input_shape[dim] - output_shape[i + 1] = target_dim - return tuple(output_shape) - - def call(self, inputs): - return K.permute_dimensions(inputs, (0,) + self.dims) - - def get_config(self): - config = {'dims': self.dims} - base_config = super(Permute, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Flatten(Layer): - """Flattens the input. Does not affect the batch size. - - # Arguments - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - The purpose of this argument is to preserve weight - ordering when switching a model from one data format - to another. - `channels_last` corresponds to inputs with shape - `(batch, ..., channels)` while `channels_first` corresponds to - inputs with shape `(batch, channels, ...)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Example - - ```python - model = Sequential() - model.add(Conv2D(64, (3, 3), - input_shape=(3, 32, 32), padding='same',)) - # now: model.output_shape == (None, 64, 32, 32) - - model.add(Flatten()) - # now: model.output_shape == (None, 65536) - ``` - """ - - def __init__(self, data_format=None, **kwargs): - super(Flatten, self).__init__(**kwargs) - self.input_spec = InputSpec(min_ndim=3) - self.data_format = K.normalize_data_format(data_format) - - def compute_output_shape(self, input_shape): - if not all(input_shape[1:]): - raise ValueError('The shape of the input to "Flatten" ' - 'is not fully defined ' - '(got ' + str(input_shape[1:]) + '). ' - 'Make sure to pass a complete "input_shape" ' - 'or "batch_input_shape" argument to the first ' - 'layer in your model.') - return (input_shape[0], np.prod(input_shape[1:])) - - def call(self, inputs): - if self.data_format == 'channels_first': - # Ensure works for any dim - permutation = [0] - permutation.extend([i for i in - range(2, K.ndim(inputs))]) - permutation.append(1) - inputs = K.permute_dimensions(inputs, permutation) - - return K.batch_flatten(inputs) - - def get_config(self): - config = {'data_format': self.data_format} - base_config = super(Flatten, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class RepeatVector(Layer): - """Repeats the input n times. - - # Example - - ```python - model = Sequential() - model.add(Dense(32, input_dim=32)) - # now: model.output_shape == (None, 32) - # note: `None` is the batch dimension - - model.add(RepeatVector(3)) - # now: model.output_shape == (None, 3, 32) - ``` - - # Arguments - n: integer, repetition factor. - - # Input shape - 2D tensor of shape `(num_samples, features)`. - - # Output shape - 3D tensor of shape `(num_samples, n, features)`. - """ - - def __init__(self, n, **kwargs): - super(RepeatVector, self).__init__(**kwargs) - self.n = n - self.input_spec = InputSpec(ndim=2) - - def compute_output_shape(self, input_shape): - return (input_shape[0], self.n, input_shape[1]) - - def call(self, inputs): - return K.repeat(inputs, self.n) - - def get_config(self): - config = {'n': self.n} - base_config = super(RepeatVector, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Lambda(Layer): - """Wraps arbitrary expression as a `Layer` object. - - # Examples - - ```python - # add a x -> x^2 layer - model.add(Lambda(lambda x: x ** 2)) - ``` - ```python - # add a layer that returns the concatenation - # of the positive part of the input and - # the opposite of the negative part - - def antirectifier(x): - x -= K.mean(x, axis=1, keepdims=True) - x = K.l2_normalize(x, axis=1) - pos = K.relu(x) - neg = K.relu(-x) - return K.concatenate([pos, neg], axis=1) - - def antirectifier_output_shape(input_shape): - shape = list(input_shape) - assert len(shape) == 2 # only valid for 2D tensors - shape[-1] *= 2 - return tuple(shape) - - model.add(Lambda(antirectifier, - output_shape=antirectifier_output_shape)) - ``` - ```python - # add a layer that returns the hadamard product - # and sum of it from two input tensors - - def hadamard_product_sum(tensors): - out1 = tensors[0] * tensors[1] - out2 = K.sum(out1, axis=-1) - return [out1, out2] - - def hadamard_product_sum_output_shape(input_shapes): - shape1 = list(input_shapes[0]) - shape2 = list(input_shapes[1]) - assert shape1 == shape2 # else hadamard product isn't possible - return [tuple(shape1), tuple(shape2[:-1])] - - x1 = Dense(32)(input_1) - x2 = Dense(32)(input_2) - layer = Lambda(hadamard_product_sum, hadamard_product_sum_output_shape) - x_hadamard, x_sum = layer([x1, x2]) - ``` - - # Arguments - function: The function to be evaluated. - Takes input tensor or list of tensors as first argument. - output_shape: Expected output shape from function. - Only relevant when using Theano. - Can be a tuple or function. - If a tuple, it only specifies the first dimension onward; - sample dimension is assumed either the same as the input: - `output_shape = (input_shape[0], ) + output_shape` - or, the input is `None` and - the sample dimension is also `None`: - `output_shape = (None, ) + output_shape` - If a function, it specifies the entire shape as a function of the - input shape: `output_shape = f(input_shape)` - mask: Either None (indicating no masking) or a Tensor indicating the - input mask for Embedding. - arguments: optional dictionary of keyword arguments to be passed - to the function. - - # Input shape - Arbitrary. Use the keyword argument input_shape - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Specified by `output_shape` argument - (or auto-inferred when using TensorFlow or CNTK). - """ - - @interfaces.legacy_lambda_support - def __init__(self, function, output_shape=None, - mask=None, arguments=None, **kwargs): - super(Lambda, self).__init__(**kwargs) - self.function = function - self._input_dtypes = None - self.arguments = arguments if arguments else {} - if mask is not None: - self.supports_masking = True - self.mask = mask - - if output_shape is None: - self._output_shape = None - elif isinstance(output_shape, (tuple, list)): - self._output_shape = tuple(output_shape) - else: - if not callable(output_shape): - raise TypeError('In Lambda, `output_shape` ' - 'must be a list, a tuple, or a function.') - self._output_shape = output_shape - - def compute_output_shape(self, input_shape): - if self._output_shape is None: - # With TensorFlow or CNTK, we can infer the output shape directly: - if K.backend() in ('tensorflow', 'cntk'): - if isinstance(input_shape, list): - xs = [K.placeholder(shape=shape, dtype=dtype) - for shape, dtype in zip(input_shape, self._input_dtypes)] - x = self.call(xs) - else: - x = K.placeholder(shape=input_shape, - dtype=self._input_dtypes) - x = self.call(x) - if isinstance(x, list): - return [K.int_shape(x_elem) for x_elem in x] - else: - return K.int_shape(x) - # Otherwise, we default to the input shape. - warnings.warn('`output_shape` argument not specified for layer {} ' - 'and cannot be automatically inferred ' - 'with the Theano backend. ' - 'Defaulting to output shape `{}` ' - '(same as input shape). ' - 'If the expected output shape is different, ' - 'specify it via the `output_shape` argument.' - .format(self.name, input_shape)) - return input_shape - elif isinstance(self._output_shape, (tuple, list)): - if isinstance(input_shape, list): - num_samples = input_shape[0][0] - else: - num_samples = input_shape[0] if input_shape else None - return (num_samples,) + tuple(self._output_shape) - else: - shape = self._output_shape(input_shape) - if not isinstance(shape, (list, tuple)): - raise ValueError('`output_shape` function must return a tuple or ' - 'a list of tuples.') - if isinstance(shape, list): - if isinstance(shape[0], int) or shape[0] is None: - shape = tuple(shape) - return shape - - def call(self, inputs, mask=None): - arguments = self.arguments - if has_arg(self.function, 'mask'): - arguments['mask'] = mask - if isinstance(inputs, list): - self._input_dtypes = [K.dtype(x) for x in inputs] - else: - self._input_dtypes = K.dtype(inputs) - return self.function(inputs, **arguments) - - def compute_mask(self, inputs, mask=None): - if callable(self.mask): - return self.mask(inputs, mask) - return self.mask - - def get_config(self): - if isinstance(self.function, python_types.LambdaType): - function = func_dump(self.function) - function_type = 'lambda' - else: - function = self.function.__name__ - function_type = 'function' - - if isinstance(self._output_shape, python_types.LambdaType): - output_shape = func_dump(self._output_shape) - output_shape_type = 'lambda' - elif callable(self._output_shape): - output_shape = self._output_shape.__name__ - output_shape_type = 'function' - else: - output_shape = self._output_shape - output_shape_type = 'raw' - - config = {'function': function, - 'function_type': function_type, - 'output_shape': output_shape, - 'output_shape_type': output_shape_type, - 'arguments': self.arguments} - base_config = super(Lambda, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - config = config.copy() - globs = globals() - if custom_objects: - globs = dict(list(globs.items()) + list(custom_objects.items())) - function_type = config.pop('function_type') - if function_type == 'function': - # Simple lookup in custom objects - function = deserialize_keras_object( - config['function'], - custom_objects=custom_objects, - printable_module_name='function in Lambda layer') - elif function_type == 'lambda': - # Unsafe deserialization from bytecode - function = func_load(config['function'], globs=globs) - else: - raise TypeError('Unknown function type:', function_type) - - output_shape_type = config.pop('output_shape_type') - if output_shape_type == 'function': - # Simple lookup in custom objects - output_shape = deserialize_keras_object( - config['output_shape'], - custom_objects=custom_objects, - printable_module_name='output_shape function in Lambda layer') - elif output_shape_type == 'lambda': - # Unsafe deserialization from bytecode - output_shape = func_load(config['output_shape'], globs=globs) - else: - output_shape = config['output_shape'] - - # If arguments were numpy array, they have been saved as - # list. We need to recover the ndarray - if 'arguments' in config: - for key in config['arguments']: - if isinstance(config['arguments'][key], dict): - arg_dict = config['arguments'][key] - if 'type' in arg_dict and arg_dict['type'] == 'ndarray': - # Overwrite the argument with its numpy translation - config['arguments'][key] = np.array(arg_dict['value']) - - config['function'] = function - config['output_shape'] = output_shape - return cls(**config) - - -class Dense(Layer): - """Just your regular densely-connected NN layer. - - `Dense` implements the operation: - `output = activation(dot(input, kernel) + bias)` - where `activation` is the element-wise activation function - passed as the `activation` argument, `kernel` is a weights matrix - created by the layer, and `bias` is a bias vector created by the layer - (only applicable if `use_bias` is `True`). - - Note: if the input to the layer has a rank greater than 2, then - it is flattened prior to the initial dot product with `kernel`. - - # Example - - ```python - # as first layer in a sequential model: - model = Sequential() - model.add(Dense(32, input_shape=(16,))) - # now the model will take as input arrays of shape (*, 16) - # and output arrays of shape (*, 32) - - # after the first layer, you don't need to specify - # the size of the input anymore: - model.add(Dense(32)) - ``` - - # Arguments - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - nD tensor with shape: `(batch_size, ..., input_dim)`. - The most common situation would be - a 2D input with shape `(batch_size, input_dim)`. - - # Output shape - nD tensor with shape: `(batch_size, ..., units)`. - For instance, for a 2D input with shape `(batch_size, input_dim)`, - the output would have shape `(batch_size, units)`. - """ - - @interfaces.legacy_dense_support - def __init__(self, units, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - if 'input_shape' not in kwargs and 'input_dim' in kwargs: - kwargs['input_shape'] = (kwargs.pop('input_dim'),) - super(Dense, self).__init__(**kwargs) - self.units = units - self.activation = activations.get(activation) - self.use_bias = use_bias - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - self.input_spec = InputSpec(min_ndim=2) - self.supports_masking = True - - def build(self, input_shape): - assert len(input_shape) >= 2 - input_dim = input_shape[-1] - - self.kernel = self.add_weight(shape=(input_dim, self.units), - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(self.units,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - self.input_spec = InputSpec(min_ndim=2, axes={-1: input_dim}) - self.built = True - - def call(self, inputs): - output = K.dot(inputs, self.kernel) - if self.use_bias: - output = K.bias_add(output, self.bias, data_format='channels_last') - if self.activation is not None: - output = self.activation(output) - return output - - def compute_output_shape(self, input_shape): - assert input_shape and len(input_shape) >= 2 - assert input_shape[-1] - output_shape = list(input_shape) - output_shape[-1] = self.units - return tuple(output_shape) - - def get_config(self): - config = { - 'units': self.units, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super(Dense, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class ActivityRegularization(Layer): - """Layer that applies an update to the cost function based input activity. - - # Arguments - l1: L1 regularization factor (positive float). - l2: L2 regularization factor (positive float). - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as input. - """ - - def __init__(self, l1=0., l2=0., **kwargs): - super(ActivityRegularization, self).__init__(**kwargs) - self.supports_masking = True - self.l1 = l1 - self.l2 = l2 - self.activity_regularizer = regularizers.L1L2(l1=l1, l2=l2) - - def get_config(self): - config = {'l1': self.l1, - 'l2': self.l2} - base_config = super(ActivityRegularization, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape -"""Recurrent layers backed by cuDNN. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .. import backend as K -from .. import initializers -from .. import regularizers -from .. import constraints -from .recurrent import RNN -from ..layers import InputSpec - -from collections import namedtuple - - -class _CuDNNRNN(RNN): - """Private base class for CuDNNGRU and CuDNNLSTM. - - # Arguments - return_sequences: Boolean. Whether to return the last output. - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - """ - - def __init__(self, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - **kwargs): - if K.backend() != 'tensorflow': - raise RuntimeError('CuDNN RNNs are only available ' - 'with the TensorFlow backend.') - super(RNN, self).__init__(**kwargs) - self.return_sequences = return_sequences - self.return_state = return_state - self.go_backwards = go_backwards - self.stateful = stateful - self.supports_masking = False - self.input_spec = [InputSpec(ndim=3)] - if hasattr(self.cell.state_size, '__len__'): - state_size = self.cell.state_size - else: - state_size = [self.cell.state_size] - self.state_spec = [InputSpec(shape=(None, dim)) - for dim in state_size] - self.constants_spec = None - self._states = None - self._num_constants = None - - def _canonical_to_params(self, weights, biases): - import tensorflow as tf - weights = [tf.reshape(x, (-1,)) for x in weights] - biases = [tf.reshape(x, (-1,)) for x in biases] - return tf.concat(weights + biases, 0) - - def call(self, inputs, mask=None, training=None, initial_state=None): - if isinstance(mask, list): - mask = mask[0] - if mask is not None: - raise ValueError('Masking is not supported for CuDNN RNNs.') - - # input shape: `(samples, time (padded with zeros), input_dim)` - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - if isinstance(inputs, list): - initial_state = inputs[1:] - inputs = inputs[0] - elif initial_state is not None: - pass - elif self.stateful: - initial_state = self.states - else: - initial_state = self.get_initial_state(inputs) - - if len(initial_state) != len(self.states): - raise ValueError('Layer has ' + str(len(self.states)) + - ' states but was passed ' + - str(len(initial_state)) + - ' initial states.') - - if self.go_backwards: - # Reverse time axis. - inputs = K.reverse(inputs, 1) - output, states = self._process_batch(inputs, initial_state) - - if self.stateful: - updates = [] - for i in range(len(states)): - updates.append((self.states[i], states[i])) - self.add_update(updates, inputs) - - if self.return_state: - return [output] + states - else: - return output - - def get_config(self): - config = {'return_sequences': self.return_sequences, - 'return_state': self.return_state, - 'go_backwards': self.go_backwards, - 'stateful': self.stateful} - base_config = super(RNN, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - return cls(**config) - - @property - def trainable_weights(self): - if self.trainable and self.built: - return [self.kernel, self.recurrent_kernel, self.bias] - return [] - - @property - def non_trainable_weights(self): - if not self.trainable and self.built: - return [self.kernel, self.recurrent_kernel, self.bias] - return [] - - @property - def losses(self): - return super(RNN, self).losses - - def get_losses_for(self, inputs=None): - return super(RNN, self).get_losses_for(inputs=inputs) - - -class CuDNNGRU(_CuDNNRNN): - """Fast GRU implementation backed by [CuDNN](https://developer.nvidia.com/cudnn). - - Can only be run on GPU, with the TensorFlow backend. - - # Arguments - units: Positive integer, dimensionality of the output space. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - return_sequences: Boolean. Whether to return the last output. - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - """ - - def __init__(self, units, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - return_sequences=False, - return_state=False, - stateful=False, - **kwargs): - self.units = units - super(CuDNNGRU, self).__init__( - return_sequences=return_sequences, - return_state=return_state, - stateful=stateful, - **kwargs) - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - @property - def cell(self): - Cell = namedtuple('cell', 'state_size') - cell = Cell(state_size=self.units) - return cell - - def build(self, input_shape): - super(CuDNNGRU, self).build(input_shape) - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_dim = input_shape[-1] - - from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops - self._cudnn_gru = cudnn_rnn_ops.CudnnGRU( - num_layers=1, - num_units=self.units, - input_size=input_dim, - input_mode='linear_input') - - self.kernel = self.add_weight(shape=(input_dim, self.units * 3), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units * 3), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - - self.bias = self.add_weight(shape=(self.units * 6,), - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - - self.kernel_z = self.kernel[:, :self.units] - self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units] - self.kernel_r = self.kernel[:, self.units: self.units * 2] - self.recurrent_kernel_r = self.recurrent_kernel[:, - self.units: - self.units * 2] - self.kernel_h = self.kernel[:, self.units * 2:] - self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:] - - self.bias_z_i = self.bias[:self.units] - self.bias_r_i = self.bias[self.units: self.units * 2] - self.bias_h_i = self.bias[self.units * 2: self.units * 3] - self.bias_z = self.bias[self.units * 3: self.units * 4] - self.bias_r = self.bias[self.units * 4: self.units * 5] - self.bias_h = self.bias[self.units * 5:] - - self.built = True - - def _process_batch(self, inputs, initial_state): - import tensorflow as tf - inputs = tf.transpose(inputs, (1, 0, 2)) - input_h = initial_state[0] - input_h = tf.expand_dims(input_h, axis=0) - - params = self._canonical_to_params( - weights=[ - self.kernel_r, - self.kernel_z, - self.kernel_h, - self.recurrent_kernel_r, - self.recurrent_kernel_z, - self.recurrent_kernel_h, - ], - biases=[ - self.bias_r_i, - self.bias_z_i, - self.bias_h_i, - self.bias_r, - self.bias_z, - self.bias_h, - ], - ) - outputs, h = self._cudnn_gru( - inputs, - input_h=input_h, - params=params, - is_training=True) - - if self.stateful or self.return_state: - h = h[0] - if self.return_sequences: - output = tf.transpose(outputs, (1, 0, 2)) - else: - output = outputs[-1] - return output, [h] - - def get_config(self): - config = { - 'units': self.units, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint)} - base_config = super(CuDNNGRU, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class CuDNNLSTM(_CuDNNRNN): - """Fast LSTM implementation with [CuDNN](https://developer.nvidia.com/cudnn). - - Can only be run on GPU, with the TensorFlow backend. - - # Arguments - units: Positive integer, dimensionality of the output space. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - unit_forget_bias: Boolean. - If True, add 1 to the bias of the forget gate at initialization. - Setting it to true will also force `bias_initializer="zeros"`. - This is recommended in [Jozefowicz et al. (2015)]( - http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - return_sequences: Boolean. Whether to return the last output. - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - """ - - def __init__(self, units, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - return_sequences=False, - return_state=False, - stateful=False, - **kwargs): - self.units = units - super(CuDNNLSTM, self).__init__( - return_sequences=return_sequences, - return_state=return_state, - stateful=stateful, - **kwargs) - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.unit_forget_bias = unit_forget_bias - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - @property - def cell(self): - Cell = namedtuple('cell', 'state_size') - cell = Cell(state_size=(self.units, self.units)) - return cell - - def build(self, input_shape): - super(CuDNNLSTM, self).build(input_shape) - if isinstance(input_shape, list): - input_shape = input_shape[0] - input_dim = input_shape[-1] - - from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops - self._cudnn_lstm = cudnn_rnn_ops.CudnnLSTM( - num_layers=1, - num_units=self.units, - input_size=input_dim, - input_mode='linear_input') - - self.kernel = self.add_weight(shape=(input_dim, self.units * 4), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units * 4), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - - if self.unit_forget_bias: - def bias_initializer(shape, *args, **kwargs): - return K.concatenate([ - self.bias_initializer((self.units * 5,), *args, **kwargs), - initializers.Ones()((self.units,), *args, **kwargs), - self.bias_initializer((self.units * 2,), *args, **kwargs), - ]) - else: - bias_initializer = self.bias_initializer - self.bias = self.add_weight(shape=(self.units * 8,), - name='bias', - initializer=bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - - self.kernel_i = self.kernel[:, :self.units] - self.kernel_f = self.kernel[:, self.units: self.units * 2] - self.kernel_c = self.kernel[:, self.units * 2: self.units * 3] - self.kernel_o = self.kernel[:, self.units * 3:] - - self.recurrent_kernel_i = self.recurrent_kernel[:, :self.units] - self.recurrent_kernel_f = ( - self.recurrent_kernel[:, self.units: self.units * 2]) - self.recurrent_kernel_c = ( - self.recurrent_kernel[:, self.units * 2: self.units * 3]) - self.recurrent_kernel_o = self.recurrent_kernel[:, self.units * 3:] - - self.bias_i_i = self.bias[:self.units] - self.bias_f_i = self.bias[self.units: self.units * 2] - self.bias_c_i = self.bias[self.units * 2: self.units * 3] - self.bias_o_i = self.bias[self.units * 3: self.units * 4] - self.bias_i = self.bias[self.units * 4: self.units * 5] - self.bias_f = self.bias[self.units * 5: self.units * 6] - self.bias_c = self.bias[self.units * 6: self.units * 7] - self.bias_o = self.bias[self.units * 7:] - - self.built = True - - def _process_batch(self, inputs, initial_state): - import tensorflow as tf - inputs = tf.transpose(inputs, (1, 0, 2)) - input_h = initial_state[0] - input_c = initial_state[1] - input_h = tf.expand_dims(input_h, axis=0) - input_c = tf.expand_dims(input_c, axis=0) - - params = self._canonical_to_params( - weights=[ - self.kernel_i, - self.kernel_f, - self.kernel_c, - self.kernel_o, - self.recurrent_kernel_i, - self.recurrent_kernel_f, - self.recurrent_kernel_c, - self.recurrent_kernel_o, - ], - biases=[ - self.bias_i_i, - self.bias_f_i, - self.bias_c_i, - self.bias_o_i, - self.bias_i, - self.bias_f, - self.bias_c, - self.bias_o, - ], - ) - outputs, h, c = self._cudnn_lstm( - inputs, - input_h=input_h, - input_c=input_c, - params=params, - is_training=True) - - if self.stateful or self.return_state: - h = h[0] - c = c[0] - if self.return_sequences: - output = tf.transpose(outputs, (1, 0, 2)) - else: - output = outputs[-1] - return output, [h, c] - - def get_config(self): - config = { - 'units': self.units, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'unit_forget_bias': self.unit_forget_bias, - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint)} - base_config = super(CuDNNLSTM, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -"""Embedding layer. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .. import backend as K -from .. import initializers -from .. import regularizers -from .. import constraints -from ..engine.base_layer import Layer -from ..legacy import interfaces -from ..utils.generic_utils import to_list - - -class Embedding(Layer): - """Turns positive integers (indexes) into dense vectors of fixed size. - eg. [[4], [20]] -> [[0.25, 0.1], [0.6, -0.2]] - - This layer can only be used as the first layer in a model. - - # Example - - ```python - model = Sequential() - model.add(Embedding(1000, 64, input_length=10)) - # the model will take as input an integer matrix of size (batch, input_length). - # the largest integer (i.e. word index) in the input should be - # no larger than 999 (vocabulary size). - # now model.output_shape == (None, 10, 64), where None is the batch dimension. - - input_array = np.random.randint(1000, size=(32, 10)) - - model.compile('rmsprop', 'mse') - output_array = model.predict(input_array) - assert output_array.shape == (32, 10, 64) - ``` - - # Arguments - input_dim: int > 0. Size of the vocabulary, - i.e. maximum integer index + 1. - output_dim: int >= 0. Dimension of the dense embedding. - embeddings_initializer: Initializer for the `embeddings` matrix - (see [initializers](../initializers.md)). - embeddings_regularizer: Regularizer function applied to - the `embeddings` matrix - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - embeddings_constraint: Constraint function applied to - the `embeddings` matrix - (see [constraints](../constraints.md)). - mask_zero: Whether or not the input value 0 is a special "padding" - value that should be masked out. - This is useful when using [recurrent layers](recurrent.md) - which may take variable length input. - If this is `True` then all subsequent layers - in the model need to support masking or an exception will be raised. - If mask_zero is set to True, as a consequence, index 0 cannot be - used in the vocabulary (input_dim should equal size of - vocabulary + 1). - input_length: Length of input sequences, when it is constant. - This argument is required if you are going to connect - `Flatten` then `Dense` layers upstream - (without it, the shape of the dense outputs cannot be computed). - - # Input shape - 2D tensor with shape: `(batch_size, sequence_length)`. - - # Output shape - 3D tensor with shape: `(batch_size, sequence_length, output_dim)`. - - # References - - [A Theoretically Grounded Application of Dropout in - Recurrent Neural Networks](http://arxiv.org/abs/1512.05287) - """ - - @interfaces.legacy_embedding_support - def __init__(self, input_dim, output_dim, - embeddings_initializer='uniform', - embeddings_regularizer=None, - activity_regularizer=None, - embeddings_constraint=None, - mask_zero=False, - input_length=None, - **kwargs): - if 'input_shape' not in kwargs: - if input_length: - kwargs['input_shape'] = (input_length,) - else: - kwargs['input_shape'] = (None,) - super(Embedding, self).__init__(**kwargs) - - self.input_dim = input_dim - self.output_dim = output_dim - self.embeddings_initializer = initializers.get(embeddings_initializer) - self.embeddings_regularizer = regularizers.get(embeddings_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.embeddings_constraint = constraints.get(embeddings_constraint) - self.mask_zero = mask_zero - self.supports_masking = mask_zero - self.input_length = input_length - - def build(self, input_shape): - self.embeddings = self.add_weight( - shape=(self.input_dim, self.output_dim), - initializer=self.embeddings_initializer, - name='embeddings', - regularizer=self.embeddings_regularizer, - constraint=self.embeddings_constraint, - dtype=self.dtype) - self.built = True - - def compute_mask(self, inputs, mask=None): - if not self.mask_zero: - return None - output_mask = K.not_equal(inputs, 0) - return output_mask - - def compute_output_shape(self, input_shape): - if self.input_length is None: - return input_shape + (self.output_dim,) - else: - # input_length can be tuple if input is 3D or higher - in_lens = to_list(self.input_length, allow_tuple=True) - if len(in_lens) != len(input_shape) - 1: - raise ValueError( - '"input_length" is %s, but received input has shape %s' % - (str(self.input_length), str(input_shape))) - else: - for i, (s1, s2) in enumerate(zip(in_lens, input_shape[1:])): - if s1 is not None and s2 is not None and s1 != s2: - raise ValueError( - '"input_length" is %s, but received input has shape %s' % - (str(self.input_length), str(input_shape))) - elif s1 is None: - in_lens[i] = s2 - return (input_shape[0],) + tuple(in_lens) + (self.output_dim,) - - def call(self, inputs): - if K.dtype(inputs) != 'int32': - inputs = K.cast(inputs, 'int32') - out = K.gather(self.embeddings, inputs) - return out - - def get_config(self): - config = {'input_dim': self.input_dim, - 'output_dim': self.output_dim, - 'embeddings_initializer': - initializers.serialize(self.embeddings_initializer), - 'embeddings_regularizer': - regularizers.serialize(self.embeddings_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'embeddings_constraint': - constraints.serialize(self.embeddings_constraint), - 'mask_zero': self.mask_zero, - 'input_length': self.input_length} - base_config = super(Embedding, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -# -*- coding: utf-8 -*- -"""Locally-connected layers. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .. import backend as K -from .. import activations -from .. import initializers -from .. import regularizers -from .. import constraints -from ..engine.base_layer import Layer -from ..engine.base_layer import InputSpec -from ..utils import conv_utils -from ..legacy import interfaces - - -class LocallyConnected1D(Layer): - """Locally-connected layer for 1D inputs. - - The `LocallyConnected1D` layer works similarly to - the `Conv1D` layer, except that weights are unshared, - that is, a different set of filters is applied at each different patch - of the input. - - # Example - ```python - # apply a unshared weight convolution 1d of length 3 to a sequence with - # 10 timesteps, with 64 output filters - model = Sequential() - model.add(LocallyConnected1D(64, 3, input_shape=(10, 32))) - # now model.output_shape == (None, 8, 64) - # add a new conv1d on top - model.add(LocallyConnected1D(32, 3)) - # now model.output_shape == (None, 6, 32) - ``` - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of a single integer, - specifying the length of the 1D convolution window. - strides: An integer or tuple/list of a single integer, - specifying the stride length of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: Currently only supports `"valid"` (case-insensitive). - `"same"` may be supported in the future. - data_format: String, one of `channels_first`, `channels_last`. - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to the kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 3D tensor with shape: `(batch_size, steps, input_dim)` - - # Output shape - 3D tensor with shape: `(batch_size, new_steps, filters)` - `steps` value might have changed due to padding or strides. - """ - - @interfaces.legacy_conv1d_support - def __init__(self, filters, - kernel_size, - strides=1, - padding='valid', - data_format=None, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super(LocallyConnected1D, self).__init__(**kwargs) - self.filters = filters - self.kernel_size = conv_utils.normalize_tuple( - kernel_size, 1, 'kernel_size') - self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') - self.padding = conv_utils.normalize_padding(padding) - if self.padding != 'valid': - raise ValueError('Invalid border mode for LocallyConnected1D ' - '(only "valid" is supported): ' + padding) - self.data_format = K.normalize_data_format(data_format) - self.activation = activations.get(activation) - self.use_bias = use_bias - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - self.input_spec = InputSpec(ndim=3) - - def build(self, input_shape): - input_dim = input_shape[2] - if input_dim is None: - raise ValueError('Axis 2 of input should be fully-defined. ' - 'Found shape:', input_shape) - output_length = conv_utils.conv_output_length(input_shape[1], - self.kernel_size[0], - self.padding, - self.strides[0]) - self.kernel_shape = (output_length, - self.kernel_size[0] * input_dim, - self.filters) - self.kernel = self.add_weight( - shape=self.kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - if self.use_bias: - self.bias = self.add_weight( - shape=(output_length, self.filters), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - self.input_spec = InputSpec(ndim=3, axes={2: input_dim}) - self.built = True - - def compute_output_shape(self, input_shape): - length = conv_utils.conv_output_length(input_shape[1], - self.kernel_size[0], - self.padding, - self.strides[0]) - return (input_shape[0], length, self.filters) - - def call(self, inputs): - output = K.local_conv1d(inputs, self.kernel, - self.kernel_size, self.strides) - if self.use_bias: - output = K.bias_add(output, self.bias) - if self.activation is not None: - output = self.activation(output) - return output - - def get_config(self): - config = { - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super(LocallyConnected1D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class LocallyConnected2D(Layer): - """Locally-connected layer for 2D inputs. - - The `LocallyConnected2D` layer works similarly - to the `Conv2D` layer, except that weights are unshared, - that is, a different set of filters is applied at each - different patch of the input. - - # Examples - ```python - # apply a 3x3 unshared weights convolution with 64 output filters - # on a 32x32 image with `data_format="channels_last"`: - model = Sequential() - model.add(LocallyConnected2D(64, (3, 3), input_shape=(32, 32, 3))) - # now model.output_shape == (None, 30, 30, 64) - # notice that this layer will consume (30*30)*(3*3*3*64) - # + (30*30)*64 parameters - - # add a 3x3 unshared weights convolution on top, with 32 output filters: - model.add(LocallyConnected2D(32, (3, 3))) - # now model.output_shape == (None, 28, 28, 32) - ``` - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - kernel_size: An integer or tuple/list of 2 integers, specifying the - width and height of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the width and height. - Can be a single integer to specify the same value for - all spatial dimensions. - padding: Currently only support `"valid"` (case-insensitive). - `"same"` will be supported in future. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - activation: Activation function to use - (see [activations](../activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to the kernel matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - - # Input shape - 4D tensor with shape: - `(samples, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `(samples, filters, new_rows, new_cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. - `rows` and `cols` values might have changed due to padding. - """ - - @interfaces.legacy_conv2d_support - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - activation=None, - use_bias=True, - kernel_initializer='glorot_uniform', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - super(LocallyConnected2D, self).__init__(**kwargs) - self.filters = filters - self.kernel_size = conv_utils.normalize_tuple( - kernel_size, 2, 'kernel_size') - self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') - self.padding = conv_utils.normalize_padding(padding) - if self.padding != 'valid': - raise ValueError('Invalid border mode for LocallyConnected2D ' - '(only "valid" is supported): ' + padding) - self.data_format = K.normalize_data_format(data_format) - self.activation = activations.get(activation) - self.use_bias = use_bias - self.kernel_initializer = initializers.get(kernel_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - self.input_spec = InputSpec(ndim=4) - - def build(self, input_shape): - if self.data_format == 'channels_last': - input_row, input_col = input_shape[1:-1] - input_filter = input_shape[3] - else: - input_row, input_col = input_shape[2:] - input_filter = input_shape[1] - if input_row is None or input_col is None: - raise ValueError('The spatial dimensions of the inputs to ' - ' a LocallyConnected2D layer ' - 'should be fully-defined, but layer received ' - 'the inputs shape ' + str(input_shape)) - output_row = conv_utils.conv_output_length(input_row, self.kernel_size[0], - self.padding, self.strides[0]) - output_col = conv_utils.conv_output_length(input_col, self.kernel_size[1], - self.padding, self.strides[1]) - self.output_row = output_row - self.output_col = output_col - self.kernel_shape = ( - output_row * output_col, - self.kernel_size[0] * self.kernel_size[1] * input_filter, - self.filters) - self.kernel = self.add_weight(shape=self.kernel_shape, - initializer=self.kernel_initializer, - name='kernel', - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(output_row, output_col, self.filters), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - if self.data_format == 'channels_first': - self.input_spec = InputSpec(ndim=4, axes={1: input_filter}) - else: - self.input_spec = InputSpec(ndim=4, axes={-1: input_filter}) - self.built = True - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - rows = input_shape[2] - cols = input_shape[3] - elif self.data_format == 'channels_last': - rows = input_shape[1] - cols = input_shape[2] - - rows = conv_utils.conv_output_length(rows, self.kernel_size[0], - self.padding, self.strides[0]) - cols = conv_utils.conv_output_length(cols, self.kernel_size[1], - self.padding, self.strides[1]) - - if self.data_format == 'channels_first': - return (input_shape[0], self.filters, rows, cols) - elif self.data_format == 'channels_last': - return (input_shape[0], rows, cols, self.filters) - - def call(self, inputs): - output = K.local_conv2d(inputs, - self.kernel, - self.kernel_size, - self.strides, - (self.output_row, self.output_col), - self.data_format) - - if self.use_bias: - output = K.bias_add(output, self.bias, - data_format=self.data_format) - - output = self.activation(output) - return output - - def get_config(self): - config = { - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super(LocallyConnected2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -"""Layers that can merge several inputs into one. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ..engine.base_layer import Layer -from .. import backend as K - - -class _Merge(Layer): - """Generic merge layer for elementwise merge functions. - - Used to implement `Sum`, `Average`, etc. - - # Arguments - **kwargs: standard layer keyword arguments. - """ - - def __init__(self, **kwargs): - super(_Merge, self).__init__(**kwargs) - self.supports_masking = True - - def _merge_function(self, inputs): - raise NotImplementedError - - def _compute_elemwise_op_output_shape(self, shape1, shape2): - """Computes the shape of the resultant of an elementwise operation. - - # Arguments - shape1: tuple or None. Shape of the first tensor - shape2: tuple or None. Shape of the second tensor - - # Returns - expected output shape when an element-wise operation is - carried out on 2 tensors with shapes shape1 and shape2. - tuple or None. - - # Raises - ValueError: if shape1 and shape2 are not compatible for - element-wise operations. - """ - if None in [shape1, shape2]: - return None - elif len(shape1) < len(shape2): - return self._compute_elemwise_op_output_shape(shape2, shape1) - elif not shape2: - return shape1 - output_shape = list(shape1[:-len(shape2)]) - for i, j in zip(shape1[-len(shape2):], shape2): - if i is None or j is None: - output_shape.append(None) - elif i == 1: - output_shape.append(j) - elif j == 1: - output_shape.append(i) - else: - if i != j: - raise ValueError('Operands could not be broadcast ' - 'together with shapes ' + - str(shape1) + ' ' + str(shape2)) - output_shape.append(i) - return tuple(output_shape) - - def build(self, input_shape): - # Used purely for shape validation. - if not isinstance(input_shape, list): - raise ValueError('A merge layer should be called ' - 'on a list of inputs.') - if len(input_shape) < 2: - raise ValueError('A merge layer should be called ' - 'on a list of at least 2 inputs. ' - 'Got ' + str(len(input_shape)) + ' inputs.') - batch_sizes = [s[0] for s in input_shape if s is not None] - batch_sizes = set(batch_sizes) - batch_sizes -= set([None]) - if len(batch_sizes) > 1: - raise ValueError('Can not merge tensors with different ' - 'batch sizes. Got tensors with shapes : ' + - str(input_shape)) - if input_shape[0] is None: - output_shape = None - else: - output_shape = input_shape[0][1:] - for i in range(1, len(input_shape)): - if input_shape[i] is None: - shape = None - else: - shape = input_shape[i][1:] - output_shape = self._compute_elemwise_op_output_shape(output_shape, - shape) - # If the inputs have different ranks, we have to reshape them - # to make them broadcastable. - if None not in input_shape and len(set(map(len, input_shape))) == 1: - self._reshape_required = False - else: - self._reshape_required = True - - def call(self, inputs): - if not isinstance(inputs, list): - raise ValueError('A merge layer should be called ' - 'on a list of inputs.') - if self._reshape_required: - reshaped_inputs = [] - input_ndims = list(map(K.ndim, inputs)) - if None not in input_ndims: - # If ranks of all inputs are available, - # we simply expand each of them at axis=1 - # until all of them have the same rank. - max_ndim = max(input_ndims) - for x in inputs: - x_ndim = K.ndim(x) - for _ in range(max_ndim - x_ndim): - x = K.expand_dims(x, 1) - reshaped_inputs.append(x) - return self._merge_function(reshaped_inputs) - else: - # Transpose all inputs so that batch size is the last dimension. - # (batch_size, dim1, dim2, ... ) -> (dim1, dim2, ... , batch_size) - transposed = False - for x in inputs: - x_ndim = K.ndim(x) - if x_ndim is None: - x_shape = K.shape(x) - batch_size = x_shape[0] - new_shape = K.concatenate([x_shape[1:], - K.expand_dims(batch_size)]) - x_transposed = K.reshape(x, K.stack([batch_size, - K.prod(x_shape[1:])])) - x_transposed = K.permute_dimensions( - x_transposed, (1, 0)) - x_transposed = K.reshape(x_transposed, new_shape) - reshaped_inputs.append(x_transposed) - transposed = True - elif x_ndim > 1: - dims = list(range(1, x_ndim)) + [0] - reshaped_inputs.append(K.permute_dimensions(x, dims)) - transposed = True - else: - # We don't transpose inputs if they are - # 1D vectors or scalars. - reshaped_inputs.append(x) - y = self._merge_function(reshaped_inputs) - y_ndim = K.ndim(y) - if transposed: - # If inputs have been transposed, - # we have to transpose the output too. - if y_ndim is None: - y_shape = K.shape(y) - y_ndim = K.shape(y_shape)[0] - batch_size = y_shape[y_ndim - 1] - new_shape = K.concatenate([K.expand_dims(batch_size), - y_shape[:y_ndim - 1]]) - y = K.reshape(y, (-1, batch_size)) - y = K.permute_dimensions(y, (1, 0)) - y = K.reshape(y, new_shape) - elif y_ndim > 1: - dims = [y_ndim - 1] + list(range(y_ndim - 1)) - y = K.permute_dimensions(y, dims) - return y - else: - return self._merge_function(inputs) - - def compute_output_shape(self, input_shape): - if input_shape[0] is None: - output_shape = None - else: - output_shape = input_shape[0][1:] - for i in range(1, len(input_shape)): - if input_shape[i] is None: - shape = None - else: - shape = input_shape[i][1:] - output_shape = self._compute_elemwise_op_output_shape(output_shape, - shape) - batch_sizes = [s[0] for s in input_shape if s is not None] - batch_sizes = set(batch_sizes) - batch_sizes -= set([None]) - if len(batch_sizes) == 1: - output_shape = (list(batch_sizes)[0],) + output_shape - else: - output_shape = (None,) + output_shape - return output_shape - - def compute_mask(self, inputs, mask=None): - if mask is None: - return None - if not isinstance(mask, list): - raise ValueError('`mask` should be a list.') - if not isinstance(inputs, list): - raise ValueError('`inputs` should be a list.') - if len(mask) != len(inputs): - raise ValueError('The lists `inputs` and `mask` ' - 'should have the same length.') - if all([m is None for m in mask]): - return None - masks = [K.expand_dims(m, 0) for m in mask if m is not None] - return K.all(K.concatenate(masks, axis=0), axis=0, keepdims=False) - - -class Add(_Merge): - """Layer that adds a list of inputs. - - It takes as input a list of tensors, - all of the same shape, and returns - a single tensor (also of the same shape). - - # Examples - - ```python - import keras - - input1 = keras.layers.Input(shape=(16,)) - x1 = keras.layers.Dense(8, activation='relu')(input1) - input2 = keras.layers.Input(shape=(32,)) - x2 = keras.layers.Dense(8, activation='relu')(input2) - # equivalent to added = keras.layers.add([x1, x2]) - added = keras.layers.Add()([x1, x2]) - - out = keras.layers.Dense(4)(added) - model = keras.models.Model(inputs=[input1, input2], outputs=out) - ``` - """ - - def _merge_function(self, inputs): - output = inputs[0] - for i in range(1, len(inputs)): - output += inputs[i] - return output - - -class Subtract(_Merge): - """Layer that subtracts two inputs. - - It takes as input a list of tensors of size 2, - both of the same shape, and returns a single tensor, (inputs[0] - inputs[1]), - also of the same shape. - - # Examples - - ```python - import keras - - input1 = keras.layers.Input(shape=(16,)) - x1 = keras.layers.Dense(8, activation='relu')(input1) - input2 = keras.layers.Input(shape=(32,)) - x2 = keras.layers.Dense(8, activation='relu')(input2) - # Equivalent to subtracted = keras.layers.subtract([x1, x2]) - subtracted = keras.layers.Subtract()([x1, x2]) - - out = keras.layers.Dense(4)(subtracted) - model = keras.models.Model(inputs=[input1, input2], outputs=out) - ``` - """ - - def build(self, input_shape): - super(Subtract, self).build(input_shape) - if len(input_shape) != 2: - raise ValueError('A `Subtract` layer should be called ' - 'on exactly 2 inputs') - - def _merge_function(self, inputs): - if len(inputs) != 2: - raise ValueError('A `Subtract` layer should be called ' - 'on exactly 2 inputs') - return inputs[0] - inputs[1] - - -class Multiply(_Merge): - """Layer that multiplies (element-wise) a list of inputs. - - It takes as input a list of tensors, - all of the same shape, and returns - a single tensor (also of the same shape). - """ - - def _merge_function(self, inputs): - output = inputs[0] - for i in range(1, len(inputs)): - output *= inputs[i] - return output - - -class Average(_Merge): - """Layer that averages a list of inputs. - - It takes as input a list of tensors, - all of the same shape, and returns - a single tensor (also of the same shape). - """ - - def _merge_function(self, inputs): - output = inputs[0] - for i in range(1, len(inputs)): - output += inputs[i] - return output / len(inputs) - - -class Maximum(_Merge): - """Layer that computes the maximum (element-wise) a list of inputs. - - It takes as input a list of tensors, - all of the same shape, and returns - a single tensor (also of the same shape). - """ - - def _merge_function(self, inputs): - output = inputs[0] - for i in range(1, len(inputs)): - output = K.maximum(output, inputs[i]) - return output - - -class Minimum(_Merge): - """Layer that computes the minimum (element-wise) a list of inputs. - - It takes as input a list of tensors, - all of the same shape, and returns - a single tensor (also of the same shape). - """ - - def _merge_function(self, inputs): - output = inputs[0] - for i in range(1, len(inputs)): - output = K.minimum(output, inputs[i]) - return output - - -class Concatenate(_Merge): - """Layer that concatenates a list of inputs. - - It takes as input a list of tensors, - all of the same shape except for the concatenation axis, - and returns a single tensor, the concatenation of all inputs. - - # Arguments - axis: Axis along which to concatenate. - **kwargs: standard layer keyword arguments. - """ - - def __init__(self, axis=-1, **kwargs): - super(Concatenate, self).__init__(**kwargs) - self.axis = axis - self.supports_masking = True - self._reshape_required = False - - def build(self, input_shape): - # Used purely for shape validation. - if not isinstance(input_shape, list) or len(input_shape) < 2: - raise ValueError('A `Concatenate` layer should be called ' - 'on a list of at least 2 inputs') - if all([shape is None for shape in input_shape]): - return - reduced_inputs_shapes = [list(shape) for shape in input_shape] - shape_set = set() - for i in range(len(reduced_inputs_shapes)): - del reduced_inputs_shapes[i][self.axis] - shape_set.add(tuple(reduced_inputs_shapes[i])) - if len(shape_set) > 1: - raise ValueError('A `Concatenate` layer requires ' - 'inputs with matching shapes ' - 'except for the concat axis. ' - 'Got inputs shapes: %s' % (input_shape)) - - def _merge_function(self, inputs): - return K.concatenate(inputs, axis=self.axis) - - def compute_output_shape(self, input_shape): - if not isinstance(input_shape, list): - raise ValueError('A `Concatenate` layer should be called ' - 'on a list of inputs.') - input_shapes = input_shape - output_shape = list(input_shapes[0]) - for shape in input_shapes[1:]: - if output_shape[self.axis] is None or shape[self.axis] is None: - output_shape[self.axis] = None - break - output_shape[self.axis] += shape[self.axis] - return tuple(output_shape) - - def compute_mask(self, inputs, mask=None): - if mask is None: - return None - if not isinstance(mask, list): - raise ValueError('`mask` should be a list.') - if not isinstance(inputs, list): - raise ValueError('`inputs` should be a list.') - if len(mask) != len(inputs): - raise ValueError('The lists `inputs` and `mask` ' - 'should have the same length.') - if all([m is None for m in mask]): - return None - # Make a list of masks while making sure - # the dimensionality of each mask - # is the same as the corresponding input. - masks = [] - for input_i, mask_i in zip(inputs, mask): - if mask_i is None: - # Input is unmasked. Append all 1s to masks, - masks.append(K.ones_like(input_i, dtype='bool')) - elif K.ndim(mask_i) < K.ndim(input_i): - # Mask is smaller than the input, expand it - masks.append(K.expand_dims(mask_i)) - else: - masks.append(mask_i) - concatenated = K.concatenate(masks, axis=self.axis) - return K.all(concatenated, axis=-1, keepdims=False) - - def get_config(self): - config = { - 'axis': self.axis, - } - base_config = super(Concatenate, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Dot(_Merge): - """Layer that computes a dot product between samples in two tensors. - - E.g. if applied to a list of two tensors `a` and `b` of shape `(batch_size, n)`, - the output will be a tensor of shape `(batch_size, 1)` - where each entry `i` will be the dot product between - `a[i]` and `b[i]`. - - # Arguments - axes: Integer or tuple of integers, - axis or axes along which to take the dot product. - normalize: Whether to L2-normalize samples along the - dot product axis before taking the dot product. - If set to True, then the output of the dot product - is the cosine proximity between the two samples. - **kwargs: Standard layer keyword arguments. - """ - - def __init__(self, axes, normalize=False, **kwargs): - super(Dot, self).__init__(**kwargs) - if not isinstance(axes, int): - if not isinstance(axes, (list, tuple)): - raise TypeError('Invalid type for `axes` - ' - 'should be a list or an int.') - if len(axes) != 2: - raise ValueError('Invalid format for `axes` - ' - 'should contain two elements.') - if not isinstance(axes[0], int) or not isinstance(axes[1], int): - raise ValueError('Invalid format for `axes` - ' - 'list elements should be "int".') - self.axes = axes - self.normalize = normalize - self.supports_masking = True - self._reshape_required = False - - def build(self, input_shape): - # Used purely for shape validation. - if not isinstance(input_shape, list) or len(input_shape) != 2: - raise ValueError('A `Dot` layer should be called ' - 'on a list of 2 inputs.') - shape1 = input_shape[0] - shape2 = input_shape[1] - if shape1 is None or shape2 is None: - return - if isinstance(self.axes, int): - if self.axes < 0: - axes = [self.axes % len(shape1), self.axes % len(shape2)] - else: - axes = [self.axes] * 2 - else: - axes = self.axes - if shape1[axes[0]] != shape2[axes[1]]: - raise ValueError( - 'Dimension incompatibility ' - '%s != %s. ' % (shape1[axes[0]], shape2[axes[1]]) + - 'Layer shapes: %s, %s' % (shape1, shape2)) - - def _merge_function(self, inputs): - if len(inputs) != 2: - raise ValueError('A `Dot` layer should be called ' - 'on exactly 2 inputs') - x1 = inputs[0] - x2 = inputs[1] - if isinstance(self.axes, int): - if self.axes < 0: - axes = [self.axes % K.ndim(x1), self.axes % K.ndim(x2)] - else: - axes = [self.axes] * 2 - else: - axes = [] - for i in range(len(self.axes)): - if self.axes[i] < 0: - axes.append(self.axes[i] % K.ndim(inputs[i])) - else: - axes.append(self.axes[i]) - if self.normalize: - x1 = K.l2_normalize(x1, axis=axes[0]) - x2 = K.l2_normalize(x2, axis=axes[1]) - output = K.batch_dot(x1, x2, axes) - return output - - def compute_output_shape(self, input_shape): - if not isinstance(input_shape, list) or len(input_shape) != 2: - raise ValueError('A `Dot` layer should be called ' - 'on a list of 2 inputs.') - shape1 = list(input_shape[0]) - shape2 = list(input_shape[1]) - if isinstance(self.axes, int): - if self.axes < 0: - axes = [self.axes % len(shape1), self.axes % len(shape2)] - else: - axes = [self.axes] * 2 - else: - axes = self.axes - shape1.pop(axes[0]) - shape2.pop(axes[1]) - shape2.pop(0) - output_shape = shape1 + shape2 - if len(output_shape) == 1: - output_shape += [1] - return tuple(output_shape) - - def compute_mask(self, inputs, mask=None): - return None - - def get_config(self): - config = { - 'axes': self.axes, - 'normalize': self.normalize, - } - base_config = super(Dot, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -def add(inputs, **kwargs): - """Functional interface to the `Add` layer. - - # Arguments - inputs: A list of input tensors (at least 2). - **kwargs: Standard layer keyword arguments. - - # Returns - A tensor, the sum of the inputs. - - # Examples - - ```python - import keras - - input1 = keras.layers.Input(shape=(16,)) - x1 = keras.layers.Dense(8, activation='relu')(input1) - input2 = keras.layers.Input(shape=(32,)) - x2 = keras.layers.Dense(8, activation='relu')(input2) - added = keras.layers.add([x1, x2]) - - out = keras.layers.Dense(4)(added) - model = keras.models.Model(inputs=[input1, input2], outputs=out) - ``` - """ - return Add(**kwargs)(inputs) - - -def subtract(inputs, **kwargs): - """Functional interface to the `Subtract` layer. - - # Arguments - inputs: A list of input tensors (exactly 2). - **kwargs: Standard layer keyword arguments. - - # Returns - A tensor, the difference of the inputs. - - # Examples - - ```python - import keras - - input1 = keras.layers.Input(shape=(16,)) - x1 = keras.layers.Dense(8, activation='relu')(input1) - input2 = keras.layers.Input(shape=(32,)) - x2 = keras.layers.Dense(8, activation='relu')(input2) - subtracted = keras.layers.subtract([x1, x2]) - - out = keras.layers.Dense(4)(subtracted) - model = keras.models.Model(inputs=[input1, input2], outputs=out) - ``` - """ - return Subtract(**kwargs)(inputs) - - -def multiply(inputs, **kwargs): - """Functional interface to the `Multiply` layer. - - # Arguments - inputs: A list of input tensors (at least 2). - **kwargs: Standard layer keyword arguments. - - # Returns - A tensor, the element-wise product of the inputs. - """ - return Multiply(**kwargs)(inputs) - - -def average(inputs, **kwargs): - """Functional interface to the `Average` layer. - - # Arguments - inputs: A list of input tensors (at least 2). - **kwargs: Standard layer keyword arguments. - - # Returns - A tensor, the average of the inputs. - """ - return Average(**kwargs)(inputs) - - -def maximum(inputs, **kwargs): - """Functional interface to the `Maximum` layer. - - # Arguments - inputs: A list of input tensors (at least 2). - **kwargs: Standard layer keyword arguments. - - # Returns - A tensor, the element-wise maximum of the inputs. - """ - return Maximum(**kwargs)(inputs) - - -def minimum(inputs, **kwargs): - """Functional interface to the `Minimum` layer. - - # Arguments - inputs: A list of input tensors (at least 2). - **kwargs: Standard layer keyword arguments. - - # Returns - A tensor, the element-wise minimum of the inputs. - """ - return Minimum(**kwargs)(inputs) - - -def concatenate(inputs, axis=-1, **kwargs): - """Functional interface to the `Concatenate` layer. - - # Arguments - inputs: A list of input tensors (at least 2). - axis: Concatenation axis. - **kwargs: Standard layer keyword arguments. - - # Returns - A tensor, the concatenation of the inputs alongside axis `axis`. - """ - return Concatenate(axis=axis, **kwargs)(inputs) - - -def dot(inputs, axes, normalize=False, **kwargs): - """Functional interface to the `Dot` layer. - - # Arguments - inputs: A list of input tensors (at least 2). - axes: Integer or tuple of integers, - axis or axes along which to take the dot product. - normalize: Whether to L2-normalize samples along the - dot product axis before taking the dot product. - If set to True, then the output of the dot product - is the cosine proximity between the two samples. - **kwargs: Standard layer keyword arguments. - - # Returns - A tensor, the dot product of the samples from the inputs. - """ - return Dot(axes=axes, normalize=normalize, **kwargs)(inputs) -# -*- coding: utf-8 -*- -"""Layers that operate regularization via the addition of noise. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ..engine.base_layer import Layer -from .. import backend as K -import numpy as np -from ..legacy import interfaces - - -class GaussianNoise(Layer): - """Apply additive zero-centered Gaussian noise. - - This is useful to mitigate overfitting - (you could see it as a form of random data augmentation). - Gaussian Noise (GS) is a natural choice as corruption process - for real valued inputs. - - As it is a regularization layer, it is only active at training time. - - # Arguments - stddev: float, standard deviation of the noise distribution. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as input. - """ - - @interfaces.legacy_gaussiannoise_support - def __init__(self, stddev, **kwargs): - super(GaussianNoise, self).__init__(**kwargs) - self.supports_masking = True - self.stddev = stddev - - def call(self, inputs, training=None): - def noised(): - return inputs + K.random_normal(shape=K.shape(inputs), - mean=0., - stddev=self.stddev) - return K.in_train_phase(noised, inputs, training=training) - - def get_config(self): - config = {'stddev': self.stddev} - base_config = super(GaussianNoise, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - - -class GaussianDropout(Layer): - """Apply multiplicative 1-centered Gaussian noise. - - As it is a regularization layer, it is only active at training time. - - # Arguments - rate: float, drop probability (as with `Dropout`). - The multiplicative noise will have - standard deviation `sqrt(rate / (1 - rate))`. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as input. - - # References - - [Dropout: A Simple Way to Prevent Neural Networks from Overfitting]( - http://www.cs.toronto.edu/~rsalakhu/papers/srivastava14a.pdf) - """ - - @interfaces.legacy_gaussiandropout_support - def __init__(self, rate, **kwargs): - super(GaussianDropout, self).__init__(**kwargs) - self.supports_masking = True - self.rate = rate - - def call(self, inputs, training=None): - if 0 < self.rate < 1: - def noised(): - stddev = np.sqrt(self.rate / (1.0 - self.rate)) - return inputs * K.random_normal(shape=K.shape(inputs), - mean=1.0, - stddev=stddev) - return K.in_train_phase(noised, inputs, training=training) - return inputs - - def get_config(self): - config = {'rate': self.rate} - base_config = super(GaussianDropout, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape - - -class AlphaDropout(Layer): - """Applies Alpha Dropout to the input. - - Alpha Dropout is a `Dropout` that keeps mean and variance of inputs - to their original values, in order to ensure the self-normalizing property - even after this dropout. - Alpha Dropout fits well to Scaled Exponential Linear Units - by randomly setting activations to the negative saturation value. - - # Arguments - rate: float, drop probability (as with `Dropout`). - The multiplicative noise will have - standard deviation `sqrt(rate / (1 - rate))`. - noise_shape: A 1-D `Tensor` of type `int32`, representing the - shape for randomly generated keep/drop flags. - seed: A Python integer to use as random seed. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as input. - - # References - - [Self-Normalizing Neural Networks](https://arxiv.org/abs/1706.02515) - """ - - def __init__(self, rate, noise_shape=None, seed=None, **kwargs): - super(AlphaDropout, self).__init__(**kwargs) - self.rate = rate - self.noise_shape = noise_shape - self.seed = seed - self.supports_masking = True - - def _get_noise_shape(self, inputs): - return self.noise_shape if self.noise_shape else K.shape(inputs) - - def call(self, inputs, training=None): - if 0. < self.rate < 1.: - noise_shape = self._get_noise_shape(inputs) - - def dropped_inputs(inputs=inputs, rate=self.rate, seed=self.seed): - alpha = 1.6732632423543772848170429916717 - scale = 1.0507009873554804934193349852946 - alpha_p = -alpha * scale - - kept_idx = K.greater_equal(K.random_uniform(noise_shape, - seed=seed), rate) - kept_idx = K.cast(kept_idx, K.floatx()) - - # Get affine transformation params - a = ((1 - rate) * (1 + rate * alpha_p ** 2)) ** -0.5 - b = -a * alpha_p * rate - - # Apply mask - x = inputs * kept_idx + alpha_p * (1 - kept_idx) - - # Do affine transformation - return a * x + b - - return K.in_train_phase(dropped_inputs, inputs, training=training) - return inputs - - def get_config(self): - config = {'rate': self.rate} - base_config = super(AlphaDropout, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape -# -*- coding: utf-8 -*- -"""Normalization layers. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ..engine.base_layer import Layer, InputSpec -from .. import initializers -from .. import regularizers -from .. import constraints -from .. import backend as K -from ..legacy import interfaces - - -class BatchNormalization(Layer): - """Batch normalization layer (Ioffe and Szegedy, 2014). - - Normalize the activations of the previous layer at each batch, - i.e. applies a transformation that maintains the mean activation - close to 0 and the activation standard deviation close to 1. - - # Arguments - axis: Integer, the axis that should be normalized - (typically the features axis). - For instance, after a `Conv2D` layer with - `data_format="channels_first"`, - set `axis=1` in `BatchNormalization`. - momentum: Momentum for the moving mean and the moving variance. - epsilon: Small float added to variance to avoid dividing by zero. - center: If True, add offset of `beta` to normalized tensor. - If False, `beta` is ignored. - scale: If True, multiply by `gamma`. - If False, `gamma` is not used. - When the next layer is linear (also e.g. `nn.relu`), - this can be disabled since the scaling - will be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - moving_mean_initializer: Initializer for the moving mean. - moving_variance_initializer: Initializer for the moving variance. - beta_regularizer: Optional regularizer for the beta weight. - gamma_regularizer: Optional regularizer for the gamma weight. - beta_constraint: Optional constraint for the beta weight. - gamma_constraint: Optional constraint for the gamma weight. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as input. - - # References - - [Batch Normalization: Accelerating Deep Network Training by - Reducing Internal Covariate Shift](https://arxiv.org/abs/1502.03167) - """ - - @interfaces.legacy_batchnorm_support - def __init__(self, - axis=-1, - momentum=0.99, - epsilon=1e-3, - center=True, - scale=True, - beta_initializer='zeros', - gamma_initializer='ones', - moving_mean_initializer='zeros', - moving_variance_initializer='ones', - beta_regularizer=None, - gamma_regularizer=None, - beta_constraint=None, - gamma_constraint=None, - **kwargs): - super(BatchNormalization, self).__init__(**kwargs) - self.supports_masking = True - self.axis = axis - self.momentum = momentum - self.epsilon = epsilon - self.center = center - self.scale = scale - self.beta_initializer = initializers.get(beta_initializer) - self.gamma_initializer = initializers.get(gamma_initializer) - self.moving_mean_initializer = initializers.get( - moving_mean_initializer) - self.moving_variance_initializer = ( - initializers.get(moving_variance_initializer)) - self.beta_regularizer = regularizers.get(beta_regularizer) - self.gamma_regularizer = regularizers.get(gamma_regularizer) - self.beta_constraint = constraints.get(beta_constraint) - self.gamma_constraint = constraints.get(gamma_constraint) - - def build(self, input_shape): - dim = input_shape[self.axis] - if dim is None: - raise ValueError('Axis ' + str(self.axis) + ' of ' - 'input tensor should have a defined dimension ' - 'but the layer received an input with shape ' + - str(input_shape) + '.') - self.input_spec = InputSpec(ndim=len(input_shape), - axes={self.axis: dim}) - shape = (dim,) - - if self.scale: - self.gamma = self.add_weight(shape=shape, - name='gamma', - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint) - else: - self.gamma = None - if self.center: - self.beta = self.add_weight(shape=shape, - name='beta', - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint) - else: - self.beta = None - self.moving_mean = self.add_weight( - shape=shape, - name='moving_mean', - initializer=self.moving_mean_initializer, - trainable=False) - self.moving_variance = self.add_weight( - shape=shape, - name='moving_variance', - initializer=self.moving_variance_initializer, - trainable=False) - self.built = True - - def call(self, inputs, training=None): - input_shape = K.int_shape(inputs) - # Prepare broadcasting shape. - ndim = len(input_shape) - reduction_axes = list(range(len(input_shape))) - del reduction_axes[self.axis] - broadcast_shape = [1] * len(input_shape) - broadcast_shape[self.axis] = input_shape[self.axis] - - # Determines whether broadcasting is needed. - needs_broadcasting = (sorted(reduction_axes) != list(range(ndim))[:-1]) - - def normalize_inference(): - if needs_broadcasting: - # In this case we must explicitly broadcast all parameters. - broadcast_moving_mean = K.reshape(self.moving_mean, - broadcast_shape) - broadcast_moving_variance = K.reshape(self.moving_variance, - broadcast_shape) - if self.center: - broadcast_beta = K.reshape(self.beta, broadcast_shape) - else: - broadcast_beta = None - if self.scale: - broadcast_gamma = K.reshape(self.gamma, - broadcast_shape) - else: - broadcast_gamma = None - return K.batch_normalization( - inputs, - broadcast_moving_mean, - broadcast_moving_variance, - broadcast_beta, - broadcast_gamma, - axis=self.axis, - epsilon=self.epsilon) - else: - return K.batch_normalization( - inputs, - self.moving_mean, - self.moving_variance, - self.beta, - self.gamma, - axis=self.axis, - epsilon=self.epsilon) - - # If the learning phase is *static* and set to inference: - if training in {0, False}: - return normalize_inference() - - # If the learning is either dynamic, or set to training: - normed_training, mean, variance = K.normalize_batch_in_training( - inputs, self.gamma, self.beta, reduction_axes, - epsilon=self.epsilon) - - if K.backend() != 'cntk': - sample_size = K.prod([K.shape(inputs)[axis] - for axis in reduction_axes]) - sample_size = K.cast(sample_size, dtype=K.dtype(inputs)) - if K.backend() == 'tensorflow' and sample_size.dtype != 'float32': - sample_size = K.cast(sample_size, dtype='float32') - - # sample variance - unbiased estimator of population variance - variance *= sample_size / (sample_size - (1.0 + self.epsilon)) - - self.add_update([K.moving_average_update(self.moving_mean, - mean, - self.momentum), - K.moving_average_update(self.moving_variance, - variance, - self.momentum)], - inputs) - - # Pick the normalized form corresponding to the training phase. - return K.in_train_phase(normed_training, - normalize_inference, - training=training) - - def get_config(self): - config = { - 'axis': self.axis, - 'momentum': self.momentum, - 'epsilon': self.epsilon, - 'center': self.center, - 'scale': self.scale, - 'beta_initializer': initializers.serialize(self.beta_initializer), - 'gamma_initializer': initializers.serialize(self.gamma_initializer), - 'moving_mean_initializer': - initializers.serialize(self.moving_mean_initializer), - 'moving_variance_initializer': - initializers.serialize(self.moving_variance_initializer), - 'beta_regularizer': regularizers.serialize(self.beta_regularizer), - 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), - 'beta_constraint': constraints.serialize(self.beta_constraint), - 'gamma_constraint': constraints.serialize(self.gamma_constraint) - } - base_config = super(BatchNormalization, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape -# -*- coding: utf-8 -*- -"""Pooling layers. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .. import backend as K -from ..engine.base_layer import Layer -from ..engine.base_layer import InputSpec -from ..utils import conv_utils -from ..legacy import interfaces - - -class _Pooling1D(Layer): - """Abstract class for different pooling 1D layers. - """ - - def __init__(self, pool_size=2, strides=None, - padding='valid', data_format='channels_last', **kwargs): - super(_Pooling1D, self).__init__(**kwargs) - if strides is None: - strides = pool_size - self.pool_size = conv_utils.normalize_tuple(pool_size, 1, 'pool_size') - self.strides = conv_utils.normalize_tuple(strides, 1, 'strides') - self.padding = conv_utils.normalize_padding(padding) - self.data_format = K.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=3) - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - steps = input_shape[2] - features = input_shape[1] - else: - steps = input_shape[1] - features = input_shape[2] - length = conv_utils.conv_output_length(steps, - self.pool_size[0], - self.padding, - self.strides[0]) - if self.data_format == 'channels_first': - return (input_shape[0], features, length) - else: - return (input_shape[0], length, features) - - def _pooling_function(self, inputs, pool_size, strides, - padding, data_format): - raise NotImplementedError - - def call(self, inputs): - dummy_axis = 2 if self.data_format == 'channels_last' else 3 - inputs = K.expand_dims(inputs, dummy_axis) # add dummy last dimension - output = self._pooling_function(inputs=inputs, - pool_size=self.pool_size + (1,), - strides=self.strides + (1,), - padding=self.padding, - data_format=self.data_format) - return K.squeeze(output, dummy_axis) # remove dummy last dimension - - def get_config(self): - config = {'strides': self.strides, - 'pool_size': self.pool_size, - 'padding': self.padding, - 'data_format': self.data_format} - base_config = super(_Pooling1D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class MaxPooling1D(_Pooling1D): - """Max pooling operation for temporal data. - - # Arguments - pool_size: Integer, size of the max pooling windows. - strides: Integer, or None. Factor by which to downscale. - E.g. 2 will halve the input. - If None, it will default to `pool_size`. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, steps, features)` while `channels_first` - corresponds to inputs with shape - `(batch, features, steps)`. - - # Input shape - - If `data_format='channels_last'`: - 3D tensor with shape: - `(batch_size, steps, features)` - - If `data_format='channels_first'`: - 3D tensor with shape: - `(batch_size, features, steps)` - - # Output shape - - If `data_format='channels_last'`: - 3D tensor with shape: - `(batch_size, downsampled_steps, features)` - - If `data_format='channels_first'`: - 3D tensor with shape: - `(batch_size, features, downsampled_steps)` - """ - - @interfaces.legacy_pooling1d_support - def __init__(self, pool_size=2, strides=None, - padding='valid', data_format='channels_last', **kwargs): - super(MaxPooling1D, self).__init__(pool_size, strides, - padding, data_format, - **kwargs) - - def _pooling_function(self, inputs, pool_size, strides, - padding, data_format): - output = K.pool2d(inputs, pool_size, strides, - padding, data_format, pool_mode='max') - return output - - -class AveragePooling1D(_Pooling1D): - """Average pooling for temporal data. - - # Arguments - pool_size: Integer, size of the average pooling windows. - strides: Integer, or None. Factor by which to downscale. - E.g. 2 will halve the input. - If None, it will default to `pool_size`. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, steps, features)` while `channels_first` - corresponds to inputs with shape - `(batch, features, steps)`. - - # Input shape - - If `data_format='channels_last'`: - 3D tensor with shape: - `(batch_size, steps, features)` - - If `data_format='channels_first'`: - 3D tensor with shape: - `(batch_size, features, steps)` - - # Output shape - - If `data_format='channels_last'`: - 3D tensor with shape: - `(batch_size, downsampled_steps, features)` - - If `data_format='channels_first'`: - 3D tensor with shape: - `(batch_size, features, downsampled_steps)` - """ - - @interfaces.legacy_pooling1d_support - def __init__(self, pool_size=2, strides=None, - padding='valid', data_format='channels_last', **kwargs): - super(AveragePooling1D, self).__init__(pool_size, strides, - padding, data_format, - **kwargs) - - def _pooling_function(self, inputs, pool_size, strides, - padding, data_format): - output = K.pool2d(inputs, pool_size, strides, - padding, data_format, pool_mode='avg') - return output - - -class _Pooling2D(Layer): - """Abstract class for different pooling 2D layers. - """ - - def __init__(self, pool_size=(2, 2), strides=None, padding='valid', - data_format=None, **kwargs): - super(_Pooling2D, self).__init__(**kwargs) - if strides is None: - strides = pool_size - self.pool_size = conv_utils.normalize_tuple(pool_size, 2, 'pool_size') - self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') - self.padding = conv_utils.normalize_padding(padding) - self.data_format = K.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=4) - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - rows = input_shape[2] - cols = input_shape[3] - elif self.data_format == 'channels_last': - rows = input_shape[1] - cols = input_shape[2] - rows = conv_utils.conv_output_length(rows, self.pool_size[0], - self.padding, self.strides[0]) - cols = conv_utils.conv_output_length(cols, self.pool_size[1], - self.padding, self.strides[1]) - if self.data_format == 'channels_first': - return (input_shape[0], input_shape[1], rows, cols) - elif self.data_format == 'channels_last': - return (input_shape[0], rows, cols, input_shape[3]) - - def _pooling_function(self, inputs, pool_size, strides, - padding, data_format): - raise NotImplementedError - - def call(self, inputs): - output = self._pooling_function(inputs=inputs, - pool_size=self.pool_size, - strides=self.strides, - padding=self.padding, - data_format=self.data_format) - return output - - def get_config(self): - config = {'pool_size': self.pool_size, - 'padding': self.padding, - 'strides': self.strides, - 'data_format': self.data_format} - base_config = super(_Pooling2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class MaxPooling2D(_Pooling2D): - """Max pooling operation for spatial data. - - # Arguments - pool_size: integer or tuple of 2 integers, - factors by which to downscale (vertical, horizontal). - (2, 2) will halve the input in both spatial dimension. - If only one integer is specified, the same window length - will be used for both dimensions. - strides: Integer, tuple of 2 integers, or None. - Strides values. - If None, it will default to `pool_size`. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - - If `data_format='channels_last'`: - 4D tensor with shape: - `(batch_size, rows, cols, channels)` - - If `data_format='channels_first'`: - 4D tensor with shape: - `(batch_size, channels, rows, cols)` - - # Output shape - - If `data_format='channels_last'`: - 4D tensor with shape: - `(batch_size, pooled_rows, pooled_cols, channels)` - - If `data_format='channels_first'`: - 4D tensor with shape: - `(batch_size, channels, pooled_rows, pooled_cols)` - """ - - @interfaces.legacy_pooling2d_support - def __init__(self, pool_size=(2, 2), strides=None, padding='valid', - data_format=None, **kwargs): - super(MaxPooling2D, self).__init__(pool_size, strides, padding, - data_format, **kwargs) - - def _pooling_function(self, inputs, pool_size, strides, - padding, data_format): - output = K.pool2d(inputs, pool_size, strides, - padding, data_format, - pool_mode='max') - return output - - -class AveragePooling2D(_Pooling2D): - """Average pooling operation for spatial data. - - # Arguments - pool_size: integer or tuple of 2 integers, - factors by which to downscale (vertical, horizontal). - (2, 2) will halve the input in both spatial dimension. - If only one integer is specified, the same window length - will be used for both dimensions. - strides: Integer, tuple of 2 integers, or None. - Strides values. - If None, it will default to `pool_size`. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - - If `data_format='channels_last'`: - 4D tensor with shape: - `(batch_size, rows, cols, channels)` - - If `data_format='channels_first'`: - 4D tensor with shape: - `(batch_size, channels, rows, cols)` - - # Output shape - - If `data_format='channels_last'`: - 4D tensor with shape: - `(batch_size, pooled_rows, pooled_cols, channels)` - - If `data_format='channels_first'`: - 4D tensor with shape: - `(batch_size, channels, pooled_rows, pooled_cols)` - """ - - @interfaces.legacy_pooling2d_support - def __init__(self, pool_size=(2, 2), strides=None, padding='valid', - data_format=None, **kwargs): - super(AveragePooling2D, self).__init__(pool_size, strides, padding, - data_format, **kwargs) - - def _pooling_function(self, inputs, pool_size, strides, - padding, data_format): - output = K.pool2d(inputs, pool_size, strides, - padding, data_format, pool_mode='avg') - return output - - -class _Pooling3D(Layer): - """Abstract class for different pooling 3D layers. - """ - - def __init__(self, pool_size=(2, 2, 2), strides=None, padding='valid', - data_format=None, **kwargs): - super(_Pooling3D, self).__init__(**kwargs) - if strides is None: - strides = pool_size - self.pool_size = conv_utils.normalize_tuple(pool_size, 3, 'pool_size') - self.strides = conv_utils.normalize_tuple(strides, 3, 'strides') - self.padding = conv_utils.normalize_padding(padding) - self.data_format = K.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=5) - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - len_dim1 = input_shape[2] - len_dim2 = input_shape[3] - len_dim3 = input_shape[4] - elif self.data_format == 'channels_last': - len_dim1 = input_shape[1] - len_dim2 = input_shape[2] - len_dim3 = input_shape[3] - len_dim1 = conv_utils.conv_output_length(len_dim1, self.pool_size[0], - self.padding, self.strides[0]) - len_dim2 = conv_utils.conv_output_length(len_dim2, self.pool_size[1], - self.padding, self.strides[1]) - len_dim3 = conv_utils.conv_output_length(len_dim3, self.pool_size[2], - self.padding, self.strides[2]) - if self.data_format == 'channels_first': - return (input_shape[0], - input_shape[1], - len_dim1, len_dim2, len_dim3) - elif self.data_format == 'channels_last': - return (input_shape[0], - len_dim1, len_dim2, len_dim3, - input_shape[4]) - - def _pooling_function(self, inputs, pool_size, strides, - padding, data_format): - raise NotImplementedError - - def call(self, inputs): - output = self._pooling_function(inputs=inputs, - pool_size=self.pool_size, - strides=self.strides, - padding=self.padding, - data_format=self.data_format) - return output - - def get_config(self): - config = {'pool_size': self.pool_size, - 'padding': self.padding, - 'strides': self.strides, - 'data_format': self.data_format} - base_config = super(_Pooling3D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class MaxPooling3D(_Pooling3D): - """Max pooling operation for 3D data (spatial or spatio-temporal). - - # Arguments - pool_size: tuple of 3 integers, - factors by which to downscale (dim1, dim2, dim3). - (2, 2, 2) will halve the size of the 3D input in each dimension. - strides: tuple of 3 integers, or None. Strides values. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` - - # Output shape - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` - """ - - @interfaces.legacy_pooling3d_support - def __init__(self, pool_size=(2, 2, 2), strides=None, padding='valid', - data_format=None, **kwargs): - super(MaxPooling3D, self).__init__(pool_size, strides, padding, - data_format, **kwargs) - - def _pooling_function(self, inputs, pool_size, strides, - padding, data_format): - output = K.pool3d(inputs, pool_size, strides, - padding, data_format, pool_mode='max') - return output - - -class AveragePooling3D(_Pooling3D): - """Average pooling operation for 3D data (spatial or spatio-temporal). - - # Arguments - pool_size: tuple of 3 integers, - factors by which to downscale (dim1, dim2, dim3). - (2, 2, 2) will halve the size of the 3D input in each dimension. - strides: tuple of 3 integers, or None. Strides values. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` - - # Output shape - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, pooled_dim1, pooled_dim2, pooled_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, pooled_dim1, pooled_dim2, pooled_dim3)` - """ - - @interfaces.legacy_pooling3d_support - def __init__(self, pool_size=(2, 2, 2), strides=None, padding='valid', - data_format=None, **kwargs): - super(AveragePooling3D, self).__init__(pool_size, strides, padding, - data_format, **kwargs) - - def _pooling_function(self, inputs, pool_size, strides, - padding, data_format): - output = K.pool3d(inputs, pool_size, strides, - padding, data_format, - pool_mode='avg') - return output - - -class _GlobalPooling1D(Layer): - """Abstract class for different global pooling 1D layers. - """ - - def __init__(self, data_format='channels_last', **kwargs): - super(_GlobalPooling1D, self).__init__(**kwargs) - self.input_spec = InputSpec(ndim=3) - self.data_format = K.normalize_data_format(data_format) - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - return (input_shape[0], input_shape[1]) - else: - return (input_shape[0], input_shape[2]) - - def call(self, inputs): - raise NotImplementedError - - def get_config(self): - config = {'data_format': self.data_format} - base_config = super(_GlobalPooling1D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class GlobalAveragePooling1D(_GlobalPooling1D): - """Global average pooling operation for temporal data. - - # Arguments - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, steps, features)` while `channels_first` - corresponds to inputs with shape - `(batch, features, steps)`. - - # Input shape - - If `data_format='channels_last'`: - 3D tensor with shape: - `(batch_size, steps, features)` - - If `data_format='channels_first'`: - 3D tensor with shape: - `(batch_size, features, steps)` - - # Output shape - 2D tensor with shape: - `(batch_size, features)` - """ - - def __init__(self, data_format='channels_last', **kwargs): - super(GlobalAveragePooling1D, self).__init__(data_format, - **kwargs) - self.supports_masking = True - - def call(self, inputs, mask=None): - steps_axis = 1 if self.data_format == 'channels_last' else 2 - if mask is not None: - mask = K.cast(mask, K.floatx()) - input_shape = K.int_shape(inputs) - broadcast_shape = [-1, input_shape[steps_axis], 1] - mask = K.reshape(mask, broadcast_shape) - inputs *= mask - return K.sum(inputs, axis=steps_axis) / K.sum(mask, axis=steps_axis) - else: - return K.mean(inputs, axis=steps_axis) - - def compute_mask(self, inputs, mask=None): - return None - - -class GlobalMaxPooling1D(_GlobalPooling1D): - """Global max pooling operation for temporal data. - - # Arguments - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, steps, features)` while `channels_first` - corresponds to inputs with shape - `(batch, features, steps)`. - - # Input shape - - If `data_format='channels_last'`: - 3D tensor with shape: - `(batch_size, steps, features)` - - If `data_format='channels_first'`: - 3D tensor with shape: - `(batch_size, features, steps)` - - # Output shape - 2D tensor with shape: - `(batch_size, features)` - """ - - def call(self, inputs): - steps_axis = 1 if self.data_format == 'channels_last' else 2 - return K.max(inputs, axis=steps_axis) - - -class _GlobalPooling2D(Layer): - """Abstract class for different global pooling 2D layers. - """ - - @interfaces.legacy_global_pooling_support - def __init__(self, data_format=None, **kwargs): - super(_GlobalPooling2D, self).__init__(**kwargs) - self.data_format = K.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=4) - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_last': - return (input_shape[0], input_shape[3]) - else: - return (input_shape[0], input_shape[1]) - - def call(self, inputs): - raise NotImplementedError - - def get_config(self): - config = {'data_format': self.data_format} - base_config = super(_GlobalPooling2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class GlobalAveragePooling2D(_GlobalPooling2D): - """Global average pooling operation for spatial data. - - # Arguments - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - - If `data_format='channels_last'`: - 4D tensor with shape: - `(batch_size, rows, cols, channels)` - - If `data_format='channels_first'`: - 4D tensor with shape: - `(batch_size, channels, rows, cols)` - - # Output shape - 2D tensor with shape: - `(batch_size, channels)` - """ - - def call(self, inputs): - if self.data_format == 'channels_last': - return K.mean(inputs, axis=[1, 2]) - else: - return K.mean(inputs, axis=[2, 3]) - - -class GlobalMaxPooling2D(_GlobalPooling2D): - """Global max pooling operation for spatial data. - - # Arguments - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - - If `data_format='channels_last'`: - 4D tensor with shape: - `(batch_size, rows, cols, channels)` - - If `data_format='channels_first'`: - 4D tensor with shape: - `(batch_size, channels, rows, cols)` - - # Output shape - 2D tensor with shape: - `(batch_size, channels)` - """ - - def call(self, inputs): - if self.data_format == 'channels_last': - return K.max(inputs, axis=[1, 2]) - else: - return K.max(inputs, axis=[2, 3]) - - -class _GlobalPooling3D(Layer): - """Abstract class for different global pooling 3D layers. - """ - - @interfaces.legacy_global_pooling_support - def __init__(self, data_format=None, **kwargs): - super(_GlobalPooling3D, self).__init__(**kwargs) - self.data_format = K.normalize_data_format(data_format) - self.input_spec = InputSpec(ndim=5) - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_last': - return (input_shape[0], input_shape[4]) - else: - return (input_shape[0], input_shape[1]) - - def call(self, inputs): - raise NotImplementedError - - def get_config(self): - config = {'data_format': self.data_format} - base_config = super(_GlobalPooling3D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class GlobalAveragePooling3D(_GlobalPooling3D): - """Global Average pooling operation for 3D data. - - # Arguments - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` - - # Output shape - 2D tensor with shape: - `(batch_size, channels)` - """ - - def call(self, inputs): - if self.data_format == 'channels_last': - return K.mean(inputs, axis=[1, 2, 3]) - else: - return K.mean(inputs, axis=[2, 3, 4]) - - -class GlobalMaxPooling3D(_GlobalPooling3D): - """Global Max pooling operation for 3D data. - - # Arguments - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - while `channels_first` corresponds to inputs with shape - `(batch, channels, spatial_dim1, spatial_dim2, spatial_dim3)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - - # Input shape - - If `data_format='channels_last'`: - 5D tensor with shape: - `(batch_size, spatial_dim1, spatial_dim2, spatial_dim3, channels)` - - If `data_format='channels_first'`: - 5D tensor with shape: - `(batch_size, channels, spatial_dim1, spatial_dim2, spatial_dim3)` - - # Output shape - 2D tensor with shape: - `(batch_size, channels)` - """ - - def call(self, inputs): - if self.data_format == 'channels_last': - return K.max(inputs, axis=[1, 2, 3]) - else: - return K.max(inputs, axis=[2, 3, 4]) - - -# Aliases - -AvgPool1D = AveragePooling1D -MaxPool1D = MaxPooling1D -AvgPool2D = AveragePooling2D -MaxPool2D = MaxPooling2D -AvgPool3D = AveragePooling3D -MaxPool3D = MaxPooling3D -GlobalMaxPool1D = GlobalMaxPooling1D -GlobalMaxPool2D = GlobalMaxPooling2D -GlobalMaxPool3D = GlobalMaxPooling3D -GlobalAvgPool1D = GlobalAveragePooling1D -GlobalAvgPool2D = GlobalAveragePooling2D -GlobalAvgPool3D = GlobalAveragePooling3D -# -*- coding: utf-8 -*- -"""Recurrent layers and their base classes. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import warnings - -from .. import backend as K -from .. import activations -from .. import initializers -from .. import regularizers -from .. import constraints -from ..engine.base_layer import Layer -from ..engine.base_layer import InputSpec -from ..utils.generic_utils import has_arg -from ..utils.generic_utils import to_list - -# Legacy support. -from ..legacy.layers import Recurrent -from ..legacy import interfaces - - -class StackedRNNCells(Layer): - """Wrapper allowing a stack of RNN cells to behave as a single cell. - - Used to implement efficient stacked RNNs. - - # Arguments - cells: List of RNN cell instances. - - # Examples - - ```python - cells = [ - keras.layers.LSTMCell(output_dim), - keras.layers.LSTMCell(output_dim), - keras.layers.LSTMCell(output_dim), - ] - - inputs = keras.Input((timesteps, input_dim)) - x = keras.layers.RNN(cells)(inputs) - ``` - """ - - def __init__(self, cells, **kwargs): - for cell in cells: - if not hasattr(cell, 'call'): - raise ValueError('All cells must have a `call` method. ' - 'received cells:', cells) - if not hasattr(cell, 'state_size'): - raise ValueError('All cells must have a ' - '`state_size` attribute. ' - 'received cells:', cells) - self.cells = cells - # reverse_state_order determines whether the state size will be in a - # reverse order of the cells' state. User might want to set this to True - # to keep the existing behavior. This is only useful when use - # `RNN(return_state=True)` since the state will be returned as the same - # order of state_size. - self.reverse_state_order = kwargs.pop('reverse_state_order', False) - if self.reverse_state_order: - warnings.warn('`reverse_state_order=True` in `StackedRNNCells` ' - 'will soon be deprecated. Please update the code to ' - 'work with the natural order of states if you ' - 'reply on the RNN states, ' - 'eg `RNN(return_state=True)`.') - super(StackedRNNCells, self).__init__(**kwargs) - - @property - def state_size(self): - # States are a flat list of the individual cell state size. - # e.g. states of a 2-layer LSTM would be `[h1, c1, h2, c2]`. - # (assuming one LSTM has states [h, c]) - # In the case of reverse_state_order=True, the state_size will be - # `[h2, c2, h1, c1]`. - state_size = [] - for cell in self.cells[::-1] if self.reverse_state_order else self.cells: - if hasattr(cell.state_size, '__len__'): - state_size += list(cell.state_size) - else: - state_size.append(cell.state_size) - return tuple(state_size) - - @property - def output_size(self): - if getattr(self.cells[-1], 'output_size', None) is not None: - return self.cells[-1].output_size - if hasattr(self.cells[-1].state_size, '__len__'): - return self.cells[-1].state_size[0] - else: - return self.cells[-1].state_size - - def call(self, inputs, states, constants=None, **kwargs): - # Recover per-cell states. - nested_states = [] - for cell in self.cells[::-1] if self.reverse_state_order else self.cells: - if hasattr(cell.state_size, '__len__'): - nested_states.append(states[:len(cell.state_size)]) - states = states[len(cell.state_size):] - else: - nested_states.append([states[0]]) - states = states[1:] - if self.reverse_state_order: - nested_states = nested_states[::-1] - - # Call the cells in order and store the returned states. - new_nested_states = [] - for cell, states in zip(self.cells, nested_states): - if has_arg(cell.call, 'constants'): - inputs, states = cell.call(inputs, states, - constants=constants, - **kwargs) - else: - inputs, states = cell.call(inputs, states, **kwargs) - new_nested_states.append(states) - - # Format the new states as a flat list - # in reverse cell order. - new_states = [] - if self.reverse_state_order: - new_nested_states = new_nested_states[::-1] - for cell_states in new_nested_states: - new_states += cell_states - return inputs, new_states - - def build(self, input_shape): - if isinstance(input_shape, list): - constants_shape = input_shape[1:] - input_shape = input_shape[0] - for cell in self.cells: - if isinstance(cell, Layer): - if has_arg(cell.call, 'constants'): - cell.build([input_shape] + constants_shape) - else: - cell.build(input_shape) - if getattr(cell, 'output_size', None) is not None: - output_dim = cell.output_size - elif hasattr(cell.state_size, '__len__'): - output_dim = cell.state_size[0] - else: - output_dim = cell.state_size - input_shape = (input_shape[0], output_dim) - self.built = True - - def get_config(self): - cells = [] - for cell in self.cells: - cells.append({'class_name': cell.__class__.__name__, - 'config': cell.get_config()}) - config = {'cells': cells} - base_config = super(StackedRNNCells, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - from . import deserialize as deserialize_layer - cells = [] - for cell_config in config.pop('cells'): - cells.append(deserialize_layer(cell_config, - custom_objects=custom_objects)) - return cls(cells, **config) - - @property - def trainable_weights(self): - if not self.trainable: - return [] - weights = [] - for cell in self.cells: - if isinstance(cell, Layer): - weights += cell.trainable_weights - return weights - - @property - def non_trainable_weights(self): - weights = [] - for cell in self.cells: - if isinstance(cell, Layer): - weights += cell.non_trainable_weights - if not self.trainable: - trainable_weights = [] - for cell in self.cells: - if isinstance(cell, Layer): - trainable_weights += cell.trainable_weights - return trainable_weights + weights - return weights - - def get_weights(self): - """Retrieves the weights of the model. - - # Returns - A flat list of Numpy arrays. - """ - weights = [] - for cell in self.cells: - if isinstance(cell, Layer): - weights += cell.weights - return K.batch_get_value(weights) - - def set_weights(self, weights): - """Sets the weights of the model. - - # Arguments - weights: A list of Numpy arrays with shapes and types matching - the output of `model.get_weights()`. - """ - tuples = [] - for cell in self.cells: - if isinstance(cell, Layer): - num_param = len(cell.weights) - weights = weights[:num_param] - for sw, w in zip(cell.weights, weights): - tuples.append((sw, w)) - weights = weights[num_param:] - K.batch_set_value(tuples) - - @property - def losses(self): - losses = [] - for cell in self.cells: - if isinstance(cell, Layer): - cell_losses = cell.losses - losses += cell_losses - return losses - - def get_losses_for(self, inputs=None): - losses = [] - for cell in self.cells: - if isinstance(cell, Layer): - cell_losses = cell.get_losses_for(inputs) - losses += cell_losses - return losses - - -class RNN(Layer): - """Base class for recurrent layers. - - # Arguments - cell: A RNN cell instance. A RNN cell is a class that has: - - a `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. The call method of the - cell can also take the optional argument `constants`, see - section "Note on passing external constants" below. - - a `state_size` attribute. This can be a single integer - (single state) in which case it is - the size of the recurrent state - (which should be the same as the size of the cell output). - This can also be a list/tuple of integers - (one size per state). - - a `output_size` attribute. This can be a single integer or a - TensorShape, which represent the shape of the output. For - backward compatible reason, if this attribute is not available - for the cell, the value will be inferred by the first element - of the `state_size`. - It is also possible for `cell` to be a list of RNN cell instances, - in which cases the cells get stacked on after the other in the RNN, - implementing an efficient stacked RNN. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - input_dim: dimensionality of the input (integer). - This argument (or alternatively, - the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - input_length: Length of input sequences, to be specified - when it is constant. - This argument is required if you are going to connect - `Flatten` then `Dense` layers upstream - (without it, the shape of the dense outputs cannot be computed). - Note that if the recurrent layer is not the first layer - in your model, you would need to specify the input length - at the level of the first layer - (e.g. via the `input_shape` argument) - - # Input shape - 3D tensor with shape `(batch_size, timesteps, input_dim)`. - - # Output shape - - if `return_state`: a list of tensors. The first tensor is - the output. The remaining tensors are the last states, - each with shape `(batch_size, units)`. For example, the number of - state tensors is 1 (for RNN and GRU) or 2 (for LSTM). - - if `return_sequences`: 3D tensor with shape - `(batch_size, timesteps, units)`. - - else, 2D tensor with shape `(batch_size, units)`. - - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an [Embedding](embeddings.md) layer with the `mask_zero` parameter - set to `True`. - - # Note on using statefulness in RNNs - You can set RNN layers to be 'stateful', which means that the states - computed for the samples in one batch will be reused as initial states - for the samples in the next batch. This assumes a one-to-one mapping - between samples in different successive batches. - - To enable statefulness: - - specify `stateful=True` in the layer constructor. - - specify a fixed batch size for your model, by passing - if sequential model: - `batch_input_shape=(...)` to the first layer in your model. - else for functional model with 1 or more Input layers: - `batch_shape=(...)` to all the first layers in your model. - This is the expected shape of your inputs - *including the batch size*. - It should be a tuple of integers, e.g. `(32, 10, 100)`. - - specify `shuffle=False` when calling fit(). - - To reset the states of your model, call `.reset_states()` on either - a specific layer, or on your entire model. - - # Note on specifying the initial state of RNNs - You can specify the initial state of RNN layers symbolically by - calling them with the keyword argument `initial_state`. The value of - `initial_state` should be a tensor or list of tensors representing - the initial state of the RNN layer. - - You can specify the initial state of RNN layers numerically by - calling `reset_states` with the keyword argument `states`. The value of - `states` should be a numpy array or list of numpy arrays representing - the initial state of the RNN layer. - - # Note on passing external constants to RNNs - You can pass "external" constants to the cell using the `constants` - keyword argument of `RNN.__call__` (as well as `RNN.call`) method. This - requires that the `cell.call` method accepts the same keyword argument - `constants`. Such constants can be used to condition the cell - transformation on additional static inputs (not changing over time), - a.k.a. an attention mechanism. - - # Examples - - ```python - # First, let's define a RNN Cell, as a layer subclass. - - class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = K.dot(inputs, self.kernel) - output = h + K.dot(prev_output, self.recurrent_kernel) - return output, [output] - - # Let's use this cell in a RNN layer: - - cell = MinimalRNNCell(32) - x = keras.Input((None, 5)) - layer = RNN(cell) - y = layer(x) - - # Here's how to use the cell to build a stacked RNN: - - cells = [MinimalRNNCell(32), MinimalRNNCell(64)] - x = keras.Input((None, 5)) - layer = RNN(cells) - y = layer(x) - ``` - """ - - def __init__(self, cell, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - **kwargs): - if isinstance(cell, (list, tuple)): - cell = StackedRNNCells(cell) - if not hasattr(cell, 'call'): - raise ValueError('`cell` should have a `call` method. ' - 'The RNN was passed:', cell) - if not hasattr(cell, 'state_size'): - raise ValueError('The RNN cell should have ' - 'an attribute `state_size` ' - '(tuple of integers, ' - 'one integer per RNN state).') - super(RNN, self).__init__(**kwargs) - self.cell = cell - self.return_sequences = return_sequences - self.return_state = return_state - self.go_backwards = go_backwards - self.stateful = stateful - self.unroll = unroll - - self.supports_masking = True - self.input_spec = [InputSpec(ndim=3)] - self.state_spec = None - self._states = None - self.constants_spec = None - self._num_constants = None - - @property - def states(self): - if self._states is None: - if isinstance(self.cell.state_size, int): - num_states = 1 - else: - num_states = len(self.cell.state_size) - return [None for _ in range(num_states)] - return self._states - - @states.setter - def states(self, states): - self._states = states - - def compute_output_shape(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - - if hasattr(self.cell.state_size, '__len__'): - state_size = self.cell.state_size - else: - state_size = [self.cell.state_size] - - if getattr(self.cell, 'output_size', None) is not None: - output_dim = self.cell.output_size - else: - output_dim = state_size[0] - - if self.return_sequences: - output_shape = (input_shape[0], input_shape[1], output_dim) - else: - output_shape = (input_shape[0], output_dim) - - if self.return_state: - state_shape = [(input_shape[0], dim) for dim in state_size] - return [output_shape] + state_shape - else: - return output_shape - - def compute_mask(self, inputs, mask): - if isinstance(mask, list): - mask = mask[0] - output_mask = mask if self.return_sequences else None - if self.return_state: - state_mask = [None for _ in self.states] - return [output_mask] + state_mask - else: - return output_mask - - def build(self, input_shape): - # Note input_shape will be list of shapes of initial states and - # constants if these are passed in __call__. - if self._num_constants is not None: - constants_shape = input_shape[-self._num_constants:] - else: - constants_shape = None - - if isinstance(input_shape, list): - input_shape = input_shape[0] - - batch_size = input_shape[0] if self.stateful else None - input_dim = input_shape[-1] - self.input_spec[0] = InputSpec(shape=(batch_size, None, input_dim)) - - # allow cell (if layer) to build before we set or validate state_spec - if isinstance(self.cell, Layer): - step_input_shape = (input_shape[0],) + input_shape[2:] - if constants_shape is not None: - self.cell.build([step_input_shape] + constants_shape) - else: - self.cell.build(step_input_shape) - - # set or validate state_spec - if hasattr(self.cell.state_size, '__len__'): - state_size = list(self.cell.state_size) - else: - state_size = [self.cell.state_size] - - if self.state_spec is not None: - # initial_state was passed in call, check compatibility - if [spec.shape[-1] for spec in self.state_spec] != state_size: - raise ValueError( - 'An `initial_state` was passed that is not compatible with ' - '`cell.state_size`. Received `state_spec`={}; ' - 'however `cell.state_size` is ' - '{}'.format(self.state_spec, self.cell.state_size)) - else: - self.state_spec = [InputSpec(shape=(None, dim)) - for dim in state_size] - if self.stateful: - self.reset_states() - self.built = True - - def get_initial_state(self, inputs): - # build an all-zero tensor of shape (samples, output_dim) - initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) - initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) - initial_state = K.expand_dims(initial_state) # (samples, 1) - if hasattr(self.cell.state_size, '__len__'): - return [K.tile(initial_state, [1, dim]) - for dim in self.cell.state_size] - else: - return [K.tile(initial_state, [1, self.cell.state_size])] - - def __call__(self, inputs, initial_state=None, constants=None, **kwargs): - inputs, initial_state, constants = _standardize_args( - inputs, initial_state, constants, self._num_constants) - - if initial_state is None and constants is None: - return super(RNN, self).__call__(inputs, **kwargs) - - # If any of `initial_state` or `constants` are specified and are Keras - # tensors, then add them to the inputs and temporarily modify the - # input_spec to include them. - - additional_inputs = [] - additional_specs = [] - if initial_state is not None: - kwargs['initial_state'] = initial_state - additional_inputs += initial_state - self.state_spec = [InputSpec(shape=K.int_shape(state)) - for state in initial_state] - additional_specs += self.state_spec - if constants is not None: - kwargs['constants'] = constants - additional_inputs += constants - self.constants_spec = [InputSpec(shape=K.int_shape(constant)) - for constant in constants] - self._num_constants = len(constants) - additional_specs += self.constants_spec - # at this point additional_inputs cannot be empty - is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) - for tensor in additional_inputs: - if K.is_keras_tensor(tensor) != is_keras_tensor: - raise ValueError('The initial state or constants of an RNN' - ' layer cannot be specified with a mix of' - ' Keras tensors and non-Keras tensors' - ' (a "Keras tensor" is a tensor that was' - ' returned by a Keras layer, or by `Input`)') - - if is_keras_tensor: - # Compute the full input spec, including state and constants - full_input = [inputs] + additional_inputs - full_input_spec = self.input_spec + additional_specs - # Perform the call with temporarily replaced input_spec - original_input_spec = self.input_spec - self.input_spec = full_input_spec - output = super(RNN, self).__call__(full_input, **kwargs) - self.input_spec = original_input_spec - return output - else: - return super(RNN, self).__call__(inputs, **kwargs) - - def call(self, - inputs, - mask=None, - training=None, - initial_state=None, - constants=None): - # input shape: `(samples, time (padded with zeros), input_dim)` - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - if isinstance(inputs, list): - # get initial_state from full input spec - # as they could be copied to multiple GPU. - if self._num_constants is None: - initial_state = inputs[1:] - else: - initial_state = inputs[1:-self._num_constants] - if len(initial_state) == 0: - initial_state = None - inputs = inputs[0] - if initial_state is not None: - pass - elif self.stateful: - initial_state = self.states - else: - initial_state = self.get_initial_state(inputs) - - if isinstance(mask, list): - mask = mask[0] - - if len(initial_state) != len(self.states): - raise ValueError('Layer has ' + str(len(self.states)) + - ' states but was passed ' + - str(len(initial_state)) + - ' initial states.') - input_shape = K.int_shape(inputs) - timesteps = input_shape[1] - if self.unroll and timesteps in [None, 1]: - raise ValueError('Cannot unroll a RNN if the ' - 'time dimension is undefined or equal to 1. \n' - '- If using a Sequential model, ' - 'specify the time dimension by passing ' - 'an `input_shape` or `batch_input_shape` ' - 'argument to your first layer. If your ' - 'first layer is an Embedding, you can ' - 'also use the `input_length` argument.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a `shape` ' - 'or `batch_shape` argument to your Input layer.') - - kwargs = {} - if has_arg(self.cell.call, 'training'): - kwargs['training'] = training - - if constants: - if not has_arg(self.cell.call, 'constants'): - raise ValueError('RNN cell does not support constants') - - def step(inputs, states): - constants = states[-self._num_constants:] - states = states[:-self._num_constants] - return self.cell.call(inputs, states, constants=constants, - **kwargs) - else: - def step(inputs, states): - return self.cell.call(inputs, states, **kwargs) - - last_output, outputs, states = K.rnn(step, - inputs, - initial_state, - constants=constants, - go_backwards=self.go_backwards, - mask=mask, - unroll=self.unroll, - input_length=timesteps) - if self.stateful: - updates = [] - for i in range(len(states)): - updates.append((self.states[i], states[i])) - self.add_update(updates, inputs) - - if self.return_sequences: - output = outputs - else: - output = last_output - - # Properly set learning phase - if getattr(last_output, '_uses_learning_phase', False): - output._uses_learning_phase = True - for state in states: - state._uses_learning_phase = True - - if self.return_state: - states = to_list(states, allow_tuple=True) - return [output] + states - else: - return output - - def reset_states(self, states=None): - if not self.stateful: - raise AttributeError('Layer must be stateful.') - batch_size = self.input_spec[0].shape[0] - if not batch_size: - raise ValueError('If a RNN is stateful, it needs to know ' - 'its batch size. Specify the batch size ' - 'of your input tensors: \n' - '- If using a Sequential model, ' - 'specify the batch size by passing ' - 'a `batch_input_shape` ' - 'argument to your first layer.\n' - '- If using the functional API, specify ' - 'the batch size by passing a ' - '`batch_shape` argument to your Input layer.') - # initialize state if None - if self.states[0] is None: - if hasattr(self.cell.state_size, '__len__'): - self.states = [K.zeros((batch_size, dim)) - for dim in self.cell.state_size] - else: - self.states = [K.zeros((batch_size, self.cell.state_size))] - elif states is None: - if hasattr(self.cell.state_size, '__len__'): - for state, dim in zip(self.states, self.cell.state_size): - K.set_value(state, np.zeros((batch_size, dim))) - else: - K.set_value(self.states[0], - np.zeros((batch_size, self.cell.state_size))) - else: - states = to_list(states, allow_tuple=True) - if len(states) != len(self.states): - raise ValueError('Layer ' + self.name + ' expects ' + - str(len(self.states)) + ' states, ' - 'but it received ' + str(len(states)) + - ' state values. Input received: ' + - str(states)) - for index, (value, state) in enumerate(zip(states, self.states)): - if hasattr(self.cell.state_size, '__len__'): - dim = self.cell.state_size[index] - else: - dim = self.cell.state_size - if value.shape != (batch_size, dim): - raise ValueError('State ' + str(index) + - ' is incompatible with layer ' + - self.name + ': expected shape=' + - str((batch_size, dim)) + - ', found shape=' + str(value.shape)) - # TODO: consider batch calls to `set_value`. - K.set_value(state, value) - - def get_config(self): - config = {'return_sequences': self.return_sequences, - 'return_state': self.return_state, - 'go_backwards': self.go_backwards, - 'stateful': self.stateful, - 'unroll': self.unroll} - if self._num_constants is not None: - config['num_constants'] = self._num_constants - - cell_config = self.cell.get_config() - config['cell'] = {'class_name': self.cell.__class__.__name__, - 'config': cell_config} - base_config = super(RNN, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - from . import deserialize as deserialize_layer - cell = deserialize_layer(config.pop('cell'), - custom_objects=custom_objects) - num_constants = config.pop('num_constants', None) - layer = cls(cell, **config) - layer._num_constants = num_constants - return layer - - @property - def trainable_weights(self): - if not self.trainable: - return [] - if isinstance(self.cell, Layer): - return self.cell.trainable_weights - return [] - - @property - def non_trainable_weights(self): - if isinstance(self.cell, Layer): - if not self.trainable: - return self.cell.weights - return self.cell.non_trainable_weights - return [] - - @property - def losses(self): - layer_losses = super(RNN, self).losses - if isinstance(self.cell, Layer): - return self.cell.losses + layer_losses - return layer_losses - - def get_losses_for(self, inputs=None): - if isinstance(self.cell, Layer): - cell_losses = self.cell.get_losses_for(inputs) - return cell_losses + super(RNN, self).get_losses_for(inputs) - return super(RNN, self).get_losses_for(inputs) - - -class SimpleRNNCell(Layer): - """Cell class for SimpleRNN. - - # Arguments - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - """ - - def __init__(self, units, - activation='tanh', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - **kwargs): - super(SimpleRNNCell, self).__init__(**kwargs) - self.units = units - self.activation = activations.get(activation) - self.use_bias = use_bias - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.dropout = min(1., max(0., dropout)) - self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.state_size = self.units - self.output_size = self.units - self._dropout_mask = None - self._recurrent_dropout_mask = None - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(self.units,), - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - self.built = True - - def call(self, inputs, states, training=None): - prev_output = states[0] - if 0 < self.dropout < 1 and self._dropout_mask is None: - self._dropout_mask = _generate_dropout_mask( - K.ones_like(inputs), - self.dropout, - training=training) - if (0 < self.recurrent_dropout < 1 and - self._recurrent_dropout_mask is None): - self._recurrent_dropout_mask = _generate_dropout_mask( - K.ones_like(prev_output), - self.recurrent_dropout, - training=training) - - dp_mask = self._dropout_mask - rec_dp_mask = self._recurrent_dropout_mask - - if dp_mask is not None: - h = K.dot(inputs * dp_mask, self.kernel) - else: - h = K.dot(inputs, self.kernel) - if self.bias is not None: - h = K.bias_add(h, self.bias) - - if rec_dp_mask is not None: - prev_output *= rec_dp_mask - output = h + K.dot(prev_output, self.recurrent_kernel) - if self.activation is not None: - output = self.activation(output) - - # Properly set learning phase on output tensor. - if 0 < self.dropout + self.recurrent_dropout: - if training is None: - output._uses_learning_phase = True - return output, [output] - - def get_config(self): - config = {'units': self.units, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout} - base_config = super(SimpleRNNCell, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class SimpleRNN(RNN): - """Fully-connected RNN where the output is to be fed back to input. - - # Arguments - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - """ - - @interfaces.legacy_recurrent_support - def __init__(self, units, - activation='tanh', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - **kwargs): - if 'implementation' in kwargs: - kwargs.pop('implementation') - warnings.warn('The `implementation` argument ' - 'in `SimpleRNN` has been deprecated. ' - 'Please remove it from your layer call.') - if K.backend() == 'theano' and (dropout or recurrent_dropout): - warnings.warn( - 'RNN dropout is no longer supported with the Theano backend ' - 'due to technical limitations. ' - 'You can either set `dropout` and `recurrent_dropout` to 0, ' - 'or use the TensorFlow backend.') - dropout = 0. - recurrent_dropout = 0. - - cell = SimpleRNNCell(units, - activation=activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout) - super(SimpleRNN, self).__init__(cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - unroll=unroll, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - - def call(self, inputs, mask=None, training=None, initial_state=None): - self.cell._dropout_mask = None - self.cell._recurrent_dropout_mask = None - return super(SimpleRNN, self).call(inputs, - mask=mask, - training=training, - initial_state=initial_state) - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - def get_config(self): - config = {'units': self.units, - 'activation': activations.serialize(self.activation), - 'use_bias': self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout} - base_config = super(SimpleRNN, self).get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - if 'implementation' in config: - config.pop('implementation') - return cls(**config) - - -class GRUCell(Layer): - """Cell class for the GRU layer. - - # Arguments - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step - (see [activations](../activations.md)). - Default: hard sigmoid (`hard_sigmoid`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - implementation: Implementation mode, either 1 or 2. - Mode 1 will structure its operations as a larger number of - smaller dot products and additions, whereas mode 2 will - batch them into fewer, larger operations. These modes will - have different performance profiles on different hardware and - for different applications. - reset_after: GRU convention (whether to apply reset gate after or - before matrix multiplication). False = "before" (default), - True = "after" (CuDNN compatible). - """ - - def __init__(self, units, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - implementation=1, - reset_after=False, - **kwargs): - super(GRUCell, self).__init__(**kwargs) - self.units = units - self.activation = activations.get(activation) - self.recurrent_activation = activations.get(recurrent_activation) - self.use_bias = use_bias - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.dropout = min(1., max(0., dropout)) - self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.implementation = implementation - self.reset_after = reset_after - self.state_size = self.units - self.output_size = self.units - self._dropout_mask = None - self._recurrent_dropout_mask = None - - def build(self, input_shape): - input_dim = input_shape[-1] - - if isinstance(self.recurrent_initializer, initializers.Identity): - def recurrent_identity(shape, gain=1.): - return gain * np.concatenate( - [np.identity(shape[0])] * (shape[1] // shape[0]), axis=1) - - self.recurrent_initializer = recurrent_identity - - self.kernel = self.add_weight(shape=(input_dim, self.units * 3), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units * 3), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - - if self.use_bias: - if not self.reset_after: - bias_shape = (3 * self.units,) - else: - # separate biases for input and recurrent kernels - # Note: the shape is intentionally different from CuDNNGRU biases - # `(2 * 3 * self.units,)`, so that we can distinguish the classes - # when loading and converting saved weights. - bias_shape = (2, 3 * self.units) - self.bias = self.add_weight(shape=bias_shape, - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - if not self.reset_after: - self.input_bias, self.recurrent_bias = self.bias, None - else: - # NOTE: need to flatten, since slicing in CNTK gives 2D array - self.input_bias = K.flatten(self.bias[0]) - self.recurrent_bias = K.flatten(self.bias[1]) - else: - self.bias = None - - # update gate - self.kernel_z = self.kernel[:, :self.units] - self.recurrent_kernel_z = self.recurrent_kernel[:, :self.units] - # reset gate - self.kernel_r = self.kernel[:, self.units: self.units * 2] - self.recurrent_kernel_r = self.recurrent_kernel[:, - self.units: - self.units * 2] - # new gate - self.kernel_h = self.kernel[:, self.units * 2:] - self.recurrent_kernel_h = self.recurrent_kernel[:, self.units * 2:] - - if self.use_bias: - # bias for inputs - self.input_bias_z = self.input_bias[:self.units] - self.input_bias_r = self.input_bias[self.units: self.units * 2] - self.input_bias_h = self.input_bias[self.units * 2:] - # bias for hidden state - just for compatibility with CuDNN - if self.reset_after: - self.recurrent_bias_z = self.recurrent_bias[:self.units] - self.recurrent_bias_r = ( - self.recurrent_bias[self.units: self.units * 2]) - self.recurrent_bias_h = self.recurrent_bias[self.units * 2:] - else: - self.input_bias_z = None - self.input_bias_r = None - self.input_bias_h = None - if self.reset_after: - self.recurrent_bias_z = None - self.recurrent_bias_r = None - self.recurrent_bias_h = None - self.built = True - - def call(self, inputs, states, training=None): - h_tm1 = states[0] # previous memory - - if 0 < self.dropout < 1 and self._dropout_mask is None: - self._dropout_mask = _generate_dropout_mask( - K.ones_like(inputs), - self.dropout, - training=training, - count=3) - if (0 < self.recurrent_dropout < 1 and - self._recurrent_dropout_mask is None): - self._recurrent_dropout_mask = _generate_dropout_mask( - K.ones_like(h_tm1), - self.recurrent_dropout, - training=training, - count=3) - - # dropout matrices for input units - dp_mask = self._dropout_mask - # dropout matrices for recurrent units - rec_dp_mask = self._recurrent_dropout_mask - - if self.implementation == 1: - if 0. < self.dropout < 1.: - inputs_z = inputs * dp_mask[0] - inputs_r = inputs * dp_mask[1] - inputs_h = inputs * dp_mask[2] - else: - inputs_z = inputs - inputs_r = inputs - inputs_h = inputs - - x_z = K.dot(inputs_z, self.kernel_z) - x_r = K.dot(inputs_r, self.kernel_r) - x_h = K.dot(inputs_h, self.kernel_h) - if self.use_bias: - x_z = K.bias_add(x_z, self.input_bias_z) - x_r = K.bias_add(x_r, self.input_bias_r) - x_h = K.bias_add(x_h, self.input_bias_h) - - if 0. < self.recurrent_dropout < 1.: - h_tm1_z = h_tm1 * rec_dp_mask[0] - h_tm1_r = h_tm1 * rec_dp_mask[1] - h_tm1_h = h_tm1 * rec_dp_mask[2] - else: - h_tm1_z = h_tm1 - h_tm1_r = h_tm1 - h_tm1_h = h_tm1 - - recurrent_z = K.dot(h_tm1_z, self.recurrent_kernel_z) - recurrent_r = K.dot(h_tm1_r, self.recurrent_kernel_r) - if self.reset_after and self.use_bias: - recurrent_z = K.bias_add(recurrent_z, self.recurrent_bias_z) - recurrent_r = K.bias_add(recurrent_r, self.recurrent_bias_r) - - z = self.recurrent_activation(x_z + recurrent_z) - r = self.recurrent_activation(x_r + recurrent_r) - - # reset gate applied after/before matrix multiplication - if self.reset_after: - recurrent_h = K.dot(h_tm1_h, self.recurrent_kernel_h) - if self.use_bias: - recurrent_h = K.bias_add( - recurrent_h, self.recurrent_bias_h) - recurrent_h = r * recurrent_h - else: - recurrent_h = K.dot(r * h_tm1_h, self.recurrent_kernel_h) - - hh = self.activation(x_h + recurrent_h) - else: - if 0. < self.dropout < 1.: - inputs *= dp_mask[0] - - # inputs projected by all gate matrices at once - matrix_x = K.dot(inputs, self.kernel) - if self.use_bias: - # biases: bias_z_i, bias_r_i, bias_h_i - matrix_x = K.bias_add(matrix_x, self.input_bias) - x_z = matrix_x[:, :self.units] - x_r = matrix_x[:, self.units: 2 * self.units] - x_h = matrix_x[:, 2 * self.units:] - - if 0. < self.recurrent_dropout < 1.: - h_tm1 *= rec_dp_mask[0] - - if self.reset_after: - # hidden state projected by all gate matrices at once - matrix_inner = K.dot(h_tm1, self.recurrent_kernel) - if self.use_bias: - matrix_inner = K.bias_add( - matrix_inner, self.recurrent_bias) - else: - # hidden state projected separately for update/reset and new - matrix_inner = K.dot(h_tm1, - self.recurrent_kernel[:, :2 * self.units]) - - recurrent_z = matrix_inner[:, :self.units] - recurrent_r = matrix_inner[:, self.units: 2 * self.units] - - z = self.recurrent_activation(x_z + recurrent_z) - r = self.recurrent_activation(x_r + recurrent_r) - - if self.reset_after: - recurrent_h = r * matrix_inner[:, 2 * self.units:] - else: - recurrent_h = K.dot(r * h_tm1, - self.recurrent_kernel[:, 2 * self.units:]) - - hh = self.activation(x_h + recurrent_h) - - # previous and candidate state mixed by update gate - h = z * h_tm1 + (1 - z) * hh - - if 0 < self.dropout + self.recurrent_dropout: - if training is None: - h._uses_learning_phase = True - - return h, [h] - - def get_config(self): - config = {'units': self.units, - 'activation': activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout, - 'implementation': self.implementation, - 'reset_after': self.reset_after} - base_config = super(GRUCell, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class GRU(RNN): - """Gated Recurrent Unit - Cho et al. 2014. - - There are two variants. The default one is based on 1406.1078v3 and - has reset gate applied to hidden state before matrix multiplication. The - other one is based on original 1406.1078v1 and has the order reversed. - - The second variant is compatible with CuDNNGRU (GPU-only) and allows - inference on CPU. Thus it has separate biases for `kernel` and - `recurrent_kernel`. Use `'reset_after'=True` and - `recurrent_activation='sigmoid'`. - - # Arguments - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step - (see [activations](../activations.md)). - Default: hard sigmoid (`hard_sigmoid`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - implementation: Implementation mode, either 1 or 2. - Mode 1 will structure its operations as a larger number of - smaller dot products and additions, whereas mode 2 will - batch them into fewer, larger operations. These modes will - have different performance profiles on different hardware and - for different applications. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - reset_after: GRU convention (whether to apply reset gate after or - before matrix multiplication). False = "before" (default), - True = "after" (CuDNN compatible). - - # References - - [Learning Phrase Representations using RNN Encoder-Decoder for - Statistical Machine Translation](https://arxiv.org/abs/1406.1078) - - [On the Properties of Neural Machine Translation: - Encoder-Decoder Approaches](https://arxiv.org/abs/1409.1259) - - [Empirical Evaluation of Gated Recurrent Neural Networks on - Sequence Modeling](https://arxiv.org/abs/1412.3555v1) - - [A Theoretically Grounded Application of Dropout in - Recurrent Neural Networks](https://arxiv.org/abs/1512.05287) - """ - - @interfaces.legacy_recurrent_support - def __init__(self, units, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - implementation=1, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - reset_after=False, - **kwargs): - if implementation == 0: - warnings.warn('`implementation=0` has been deprecated, ' - 'and now defaults to `implementation=1`.' - 'Please update your layer call.') - if K.backend() == 'theano' and (dropout or recurrent_dropout): - warnings.warn( - 'RNN dropout is no longer supported with the Theano backend ' - 'due to technical limitations. ' - 'You can either set `dropout` and `recurrent_dropout` to 0, ' - 'or use the TensorFlow backend.') - dropout = 0. - recurrent_dropout = 0. - - cell = GRUCell(units, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - implementation=implementation, - reset_after=reset_after) - super(GRU, self).__init__(cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - unroll=unroll, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - - def call(self, inputs, mask=None, training=None, initial_state=None): - self.cell._dropout_mask = None - self.cell._recurrent_dropout_mask = None - return super(GRU, self).call(inputs, - mask=mask, - training=training, - initial_state=initial_state) - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def recurrent_activation(self): - return self.cell.recurrent_activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - @property - def implementation(self): - return self.cell.implementation - - @property - def reset_after(self): - return self.cell.reset_after - - def get_config(self): - config = {'units': self.units, - 'activation': activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout, - 'implementation': self.implementation, - 'reset_after': self.reset_after} - base_config = super(GRU, self).get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - if 'implementation' in config and config['implementation'] == 0: - config['implementation'] = 1 - return cls(**config) - - -class LSTMCell(Layer): - """Cell class for the LSTM layer. - - # Arguments - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step - (see [activations](../activations.md)). - Default: hard sigmoid (`hard_sigmoid`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`).x - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - unit_forget_bias: Boolean. - If True, add 1 to the bias of the forget gate at initialization. - Setting it to true will also force `bias_initializer="zeros"`. - This is recommended in [Jozefowicz et al. (2015)]( - http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - implementation: Implementation mode, either 1 or 2. - Mode 1 will structure its operations as a larger number of - smaller dot products and additions, whereas mode 2 will - batch them into fewer, larger operations. These modes will - have different performance profiles on different hardware and - for different applications. - """ - - def __init__(self, units, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - implementation=1, - **kwargs): - super(LSTMCell, self).__init__(**kwargs) - self.units = units - self.activation = activations.get(activation) - self.recurrent_activation = activations.get(recurrent_activation) - self.use_bias = use_bias - - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.unit_forget_bias = unit_forget_bias - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.dropout = min(1., max(0., dropout)) - self.recurrent_dropout = min(1., max(0., recurrent_dropout)) - self.implementation = implementation - self.state_size = (self.units, self.units) - self.output_size = self.units - self._dropout_mask = None - self._recurrent_dropout_mask = None - - def build(self, input_shape): - input_dim = input_shape[-1] - - if type(self.recurrent_initializer).__name__ == 'Identity': - def recurrent_identity(shape, gain=1.): - return gain * np.concatenate( - [np.identity(shape[0])] * (shape[1] // shape[0]), axis=1) - - self.recurrent_initializer = recurrent_identity - - self.kernel = self.add_weight(shape=(input_dim, self.units * 4), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units * 4), - name='recurrent_kernel', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - - if self.use_bias: - if self.unit_forget_bias: - def bias_initializer(_, *args, **kwargs): - return K.concatenate([ - self.bias_initializer((self.units,), *args, **kwargs), - initializers.Ones()((self.units,), *args, **kwargs), - self.bias_initializer( - (self.units * 2,), *args, **kwargs), - ]) - else: - bias_initializer = self.bias_initializer - self.bias = self.add_weight(shape=(self.units * 4,), - name='bias', - initializer=bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - - self.kernel_i = self.kernel[:, :self.units] - self.kernel_f = self.kernel[:, self.units: self.units * 2] - self.kernel_c = self.kernel[:, self.units * 2: self.units * 3] - self.kernel_o = self.kernel[:, self.units * 3:] - - self.recurrent_kernel_i = self.recurrent_kernel[:, :self.units] - self.recurrent_kernel_f = ( - self.recurrent_kernel[:, self.units: self.units * 2]) - self.recurrent_kernel_c = ( - self.recurrent_kernel[:, self.units * 2: self.units * 3]) - self.recurrent_kernel_o = self.recurrent_kernel[:, self.units * 3:] - - if self.use_bias: - self.bias_i = self.bias[:self.units] - self.bias_f = self.bias[self.units: self.units * 2] - self.bias_c = self.bias[self.units * 2: self.units * 3] - self.bias_o = self.bias[self.units * 3:] - else: - self.bias_i = None - self.bias_f = None - self.bias_c = None - self.bias_o = None - self.built = True - - def call(self, inputs, states, training=None): - if 0 < self.dropout < 1 and self._dropout_mask is None: - self._dropout_mask = _generate_dropout_mask( - K.ones_like(inputs), - self.dropout, - training=training, - count=4) - if (0 < self.recurrent_dropout < 1 and - self._recurrent_dropout_mask is None): - self._recurrent_dropout_mask = _generate_dropout_mask( - K.ones_like(states[0]), - self.recurrent_dropout, - training=training, - count=4) - - # dropout matrices for input units - dp_mask = self._dropout_mask - # dropout matrices for recurrent units - rec_dp_mask = self._recurrent_dropout_mask - - h_tm1 = states[0] # previous memory state - c_tm1 = states[1] # previous carry state - - if self.implementation == 1: - if 0 < self.dropout < 1.: - inputs_i = inputs * dp_mask[0] - inputs_f = inputs * dp_mask[1] - inputs_c = inputs * dp_mask[2] - inputs_o = inputs * dp_mask[3] - else: - inputs_i = inputs - inputs_f = inputs - inputs_c = inputs - inputs_o = inputs - x_i = K.dot(inputs_i, self.kernel_i) - x_f = K.dot(inputs_f, self.kernel_f) - x_c = K.dot(inputs_c, self.kernel_c) - x_o = K.dot(inputs_o, self.kernel_o) - if self.use_bias: - x_i = K.bias_add(x_i, self.bias_i) - x_f = K.bias_add(x_f, self.bias_f) - x_c = K.bias_add(x_c, self.bias_c) - x_o = K.bias_add(x_o, self.bias_o) - - if 0 < self.recurrent_dropout < 1.: - h_tm1_i = h_tm1 * rec_dp_mask[0] - h_tm1_f = h_tm1 * rec_dp_mask[1] - h_tm1_c = h_tm1 * rec_dp_mask[2] - h_tm1_o = h_tm1 * rec_dp_mask[3] - else: - h_tm1_i = h_tm1 - h_tm1_f = h_tm1 - h_tm1_c = h_tm1 - h_tm1_o = h_tm1 - i = self.recurrent_activation(x_i + K.dot(h_tm1_i, - self.recurrent_kernel_i)) - f = self.recurrent_activation(x_f + K.dot(h_tm1_f, - self.recurrent_kernel_f)) - c = f * c_tm1 + i * self.activation(x_c + K.dot(h_tm1_c, - self.recurrent_kernel_c)) - o = self.recurrent_activation(x_o + K.dot(h_tm1_o, - self.recurrent_kernel_o)) - else: - if 0. < self.dropout < 1.: - inputs *= dp_mask[0] - z = K.dot(inputs, self.kernel) - if 0. < self.recurrent_dropout < 1.: - h_tm1 *= rec_dp_mask[0] - z += K.dot(h_tm1, self.recurrent_kernel) - if self.use_bias: - z = K.bias_add(z, self.bias) - - z0 = z[:, :self.units] - z1 = z[:, self.units: 2 * self.units] - z2 = z[:, 2 * self.units: 3 * self.units] - z3 = z[:, 3 * self.units:] - - i = self.recurrent_activation(z0) - f = self.recurrent_activation(z1) - c = f * c_tm1 + i * self.activation(z2) - o = self.recurrent_activation(z3) - - h = o * self.activation(c) - if 0 < self.dropout + self.recurrent_dropout: - if training is None: - h._uses_learning_phase = True - return h, [h, c] - - def get_config(self): - config = {'units': self.units, - 'activation': activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'unit_forget_bias': self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout, - 'implementation': self.implementation} - base_config = super(LSTMCell, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class LSTM(RNN): - """Long Short-Term Memory layer - Hochreiter 1997. - - # Arguments - units: Positive integer, dimensionality of the output space. - activation: Activation function to use - (see [activations](../activations.md)). - Default: hyperbolic tangent (`tanh`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - recurrent_activation: Activation function to use - for the recurrent step - (see [activations](../activations.md)). - Default: hard sigmoid (`hard_sigmoid`). - If you pass `None`, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - recurrent_initializer: Initializer for the `recurrent_kernel` - weights matrix, - used for the linear transformation of the recurrent state. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - unit_forget_bias: Boolean. - If True, add 1 to the bias of the forget gate at initialization. - Setting it to true will also force `bias_initializer="zeros"`. - This is recommended in [Jozefowicz et al. (2015)]( - http://www.jmlr.org/proceedings/papers/v37/jozefowicz15.pdf). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - recurrent_regularizer: Regularizer function applied to - the `recurrent_kernel` weights matrix - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - recurrent_constraint: Constraint function applied to - the `recurrent_kernel` weights matrix - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the inputs. - recurrent_dropout: Float between 0 and 1. - Fraction of the units to drop for - the linear transformation of the recurrent state. - implementation: Implementation mode, either 1 or 2. - Mode 1 will structure its operations as a larger number of - smaller dot products and additions, whereas mode 2 will - batch them into fewer, larger operations. These modes will - have different performance profiles on different hardware and - for different applications. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. The returned elements of the - states list are the hidden state and the cell state, respectively. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - - # References - - [Long short-term memory]( - http://www.bioinf.jku.at/publications/older/2604.pdf) - - [Learning to forget: Continual prediction with LSTM]( - http://www.mitpressjournals.org/doi/pdf/10.1162/089976600300015015) - - [Supervised sequence labeling with recurrent neural networks]( - http://www.cs.toronto.edu/~graves/preprint.pdf) - - [A Theoretically Grounded Application of Dropout in - Recurrent Neural Networks](https://arxiv.org/abs/1512.05287) - """ - - @interfaces.legacy_recurrent_support - def __init__(self, units, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - unit_forget_bias=True, - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - dropout=0., - recurrent_dropout=0., - implementation=1, - return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - **kwargs): - if implementation == 0: - warnings.warn('`implementation=0` has been deprecated, ' - 'and now defaults to `implementation=1`.' - 'Please update your layer call.') - if K.backend() == 'theano' and (dropout or recurrent_dropout): - warnings.warn( - 'RNN dropout is no longer supported with the Theano backend ' - 'due to technical limitations. ' - 'You can either set `dropout` and `recurrent_dropout` to 0, ' - 'or use the TensorFlow backend.') - dropout = 0. - recurrent_dropout = 0. - - cell = LSTMCell(units, - activation=activation, - recurrent_activation=recurrent_activation, - use_bias=use_bias, - kernel_initializer=kernel_initializer, - recurrent_initializer=recurrent_initializer, - unit_forget_bias=unit_forget_bias, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - dropout=dropout, - recurrent_dropout=recurrent_dropout, - implementation=implementation) - super(LSTM, self).__init__(cell, - return_sequences=return_sequences, - return_state=return_state, - go_backwards=go_backwards, - stateful=stateful, - unroll=unroll, - **kwargs) - self.activity_regularizer = regularizers.get(activity_regularizer) - - def call(self, inputs, mask=None, training=None, initial_state=None): - self.cell._dropout_mask = None - self.cell._recurrent_dropout_mask = None - return super(LSTM, self).call(inputs, - mask=mask, - training=training, - initial_state=initial_state) - - @property - def units(self): - return self.cell.units - - @property - def activation(self): - return self.cell.activation - - @property - def recurrent_activation(self): - return self.cell.recurrent_activation - - @property - def use_bias(self): - return self.cell.use_bias - - @property - def kernel_initializer(self): - return self.cell.kernel_initializer - - @property - def recurrent_initializer(self): - return self.cell.recurrent_initializer - - @property - def bias_initializer(self): - return self.cell.bias_initializer - - @property - def unit_forget_bias(self): - return self.cell.unit_forget_bias - - @property - def kernel_regularizer(self): - return self.cell.kernel_regularizer - - @property - def recurrent_regularizer(self): - return self.cell.recurrent_regularizer - - @property - def bias_regularizer(self): - return self.cell.bias_regularizer - - @property - def kernel_constraint(self): - return self.cell.kernel_constraint - - @property - def recurrent_constraint(self): - return self.cell.recurrent_constraint - - @property - def bias_constraint(self): - return self.cell.bias_constraint - - @property - def dropout(self): - return self.cell.dropout - - @property - def recurrent_dropout(self): - return self.cell.recurrent_dropout - - @property - def implementation(self): - return self.cell.implementation - - def get_config(self): - config = {'units': self.units, - 'activation': activations.serialize(self.activation), - 'recurrent_activation': - activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': - initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': - initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'unit_forget_bias': self.unit_forget_bias, - 'kernel_regularizer': - regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': - regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': - constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'dropout': self.dropout, - 'recurrent_dropout': self.recurrent_dropout, - 'implementation': self.implementation} - base_config = super(LSTM, self).get_config() - del base_config['cell'] - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config): - if 'implementation' in config and config['implementation'] == 0: - config['implementation'] = 1 - return cls(**config) - - -def _generate_dropout_mask(ones, rate, training=None, count=1): - def dropped_inputs(): - return K.dropout(ones, rate) - - if count > 1: - return [K.in_train_phase( - dropped_inputs, - ones, - training=training) for _ in range(count)] - return K.in_train_phase( - dropped_inputs, - ones, - training=training) - - -def _standardize_args(inputs, initial_state, constants, num_constants): - """Standardize `__call__` to a single list of tensor inputs. - - When running a model loaded from file, the input tensors - `initial_state` and `constants` can be passed to `RNN.__call__` as part - of `inputs` instead of by the dedicated keyword arguments. This method - makes sure the arguments are separated and that `initial_state` and - `constants` are lists of tensors (or None). - - # Arguments - inputs: tensor or list/tuple of tensors - initial_state: tensor or list of tensors or None - constants: tensor or list of tensors or None - - # Returns - inputs: tensor - initial_state: list of tensors or None - constants: list of tensors or None - """ - if isinstance(inputs, list): - assert initial_state is None and constants is None - if num_constants is not None: - constants = inputs[-num_constants:] - inputs = inputs[:-num_constants] - if len(inputs) > 1: - initial_state = inputs[1:] - inputs = inputs[0] - - def to_list_or_none(x): - if x is None or isinstance(x, list): - return x - if isinstance(x, tuple): - return list(x) - return [x] - - initial_state = to_list_or_none(initial_state) - constants = to_list_or_none(constants) - - return inputs, initial_state, constants -# -*- coding: utf-8 -*- -"""Layers that augment the functionality of a base layer. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy -from ..engine.base_layer import Layer -from ..engine.base_layer import InputSpec -from ..utils.generic_utils import has_arg -from ..utils.generic_utils import object_list_uid -from .. import backend as K - -from . import recurrent - - -class Wrapper(Layer): - """Abstract wrapper base class. - - Wrappers take another layer and augment it in various ways. - Do not use this class as a layer, it is only an abstract base class. - Two usable wrappers are the `TimeDistributed` and `Bidirectional` wrappers. - - # Arguments - layer: The layer to be wrapped. - """ - - def __init__(self, layer, **kwargs): - self.layer = layer - # Tracks mapping of Wrapper inputs to inner layer inputs. Useful when - # the inner layer has update ops that depend on its inputs (as opposed - # to the inputs to the Wrapper layer). - self._input_map = {} - super(Wrapper, self).__init__(**kwargs) - - def build(self, input_shape=None): - self.built = True - - @property - def activity_regularizer(self): - if hasattr(self.layer, 'activity_regularizer'): - return self.layer.activity_regularizer - else: - return None - - @property - def trainable(self): - return self.layer.trainable - - @trainable.setter - def trainable(self, value): - self.layer.trainable = value - - @property - def trainable_weights(self): - return self.layer.trainable_weights - - @property - def non_trainable_weights(self): - return self.layer.non_trainable_weights - - @property - def updates(self): - if hasattr(self.layer, 'updates'): - return self.layer.updates - return [] - - def get_updates_for(self, inputs=None): - # If the wrapper modifies the inputs, use the modified inputs to - # get the updates from the inner layer. - inner_inputs = inputs - if inputs is not None: - uid = object_list_uid(inputs) - if uid in self._input_map: - inner_inputs = self._input_map[uid] - - updates = self.layer.get_updates_for(inner_inputs) - updates += super(Wrapper, self).get_updates_for(inputs) - return updates - - @property - def losses(self): - if hasattr(self.layer, 'losses'): - return self.layer.losses - return [] - - def get_losses_for(self, inputs=None): - if inputs is None: - losses = self.layer.get_losses_for(None) - return losses + super(Wrapper, self).get_losses_for(None) - return super(Wrapper, self).get_losses_for(inputs) - - def get_weights(self): - return self.layer.get_weights() - - def set_weights(self, weights): - self.layer.set_weights(weights) - - def get_config(self): - config = {'layer': {'class_name': self.layer.__class__.__name__, - 'config': self.layer.get_config()}} - base_config = super(Wrapper, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - from . import deserialize as deserialize_layer - layer = deserialize_layer(config.pop('layer'), - custom_objects=custom_objects) - return cls(layer, **config) - - -class TimeDistributed(Wrapper): - """This wrapper applies a layer to every temporal slice of an input. - - The input should be at least 3D, and the dimension of index one - will be considered to be the temporal dimension. - - Consider a batch of 32 samples, - where each sample is a sequence of 10 vectors of 16 dimensions. - The batch input shape of the layer is then `(32, 10, 16)`, - and the `input_shape`, not including the samples dimension, is `(10, 16)`. - - You can then use `TimeDistributed` to apply a `Dense` layer - to each of the 10 timesteps, independently: - - ```python - # as the first layer in a model - model = Sequential() - model.add(TimeDistributed(Dense(8), input_shape=(10, 16))) - # now model.output_shape == (None, 10, 8) - ``` - - The output will then have shape `(32, 10, 8)`. - - In subsequent layers, there is no need for the `input_shape`: - - ```python - model.add(TimeDistributed(Dense(32))) - # now model.output_shape == (None, 10, 32) - ``` - - The output will then have shape `(32, 10, 32)`. - - `TimeDistributed` can be used with arbitrary layers, not just `Dense`, - for instance with a `Conv2D` layer: - - ```python - model = Sequential() - model.add(TimeDistributed(Conv2D(64, (3, 3)), - input_shape=(10, 299, 299, 3))) - ``` - - # Arguments - layer: a layer instance. - """ - - def __init__(self, layer, **kwargs): - super(TimeDistributed, self).__init__(layer, **kwargs) - self.supports_masking = True - - def _get_shape_tuple(self, init_tuple, tensor, start_idx, int_shape=None): - """Finds non-specific dimensions in the static shapes - and replaces them by the corresponding dynamic shapes of the tensor. - - # Arguments - init_tuple: a tuple, the first part of the output shape - tensor: the tensor from which to get the (static and dynamic) shapes - as the last part of the output shape - start_idx: int, which indicate the first dimension to take from - the static shape of the tensor - int_shape: an alternative static shape to take as the last part - of the output shape - - # Returns - The new int_shape with the first part from init_tuple - and the last part from either `int_shape` (if provided) - or K.int_shape(tensor), where every `None` is replaced by - the corresponding dimension from K.shape(tensor) - """ - # replace all None in int_shape by K.shape - if int_shape is None: - int_shape = K.int_shape(tensor)[start_idx:] - if not any(not s for s in int_shape): - return init_tuple + int_shape - tensor_shape = K.shape(tensor) - int_shape = list(int_shape) - for i, s in enumerate(int_shape): - if not s: - int_shape[i] = tensor_shape[start_idx + i] - return init_tuple + tuple(int_shape) - - def build(self, input_shape): - assert len(input_shape) >= 3 - self.input_spec = InputSpec(shape=input_shape) - child_input_shape = (input_shape[0],) + input_shape[2:] - if not self.layer.built: - self.layer.build(child_input_shape) - self.layer.built = True - super(TimeDistributed, self).build() - - def compute_output_shape(self, input_shape): - child_input_shape = (input_shape[0],) + input_shape[2:] - child_output_shape = self.layer.compute_output_shape(child_input_shape) - timesteps = input_shape[1] - return (child_output_shape[0], timesteps) + child_output_shape[1:] - - def call(self, inputs, training=None, mask=None): - kwargs = {} - if has_arg(self.layer.call, 'training'): - kwargs['training'] = training - uses_learning_phase = False - - input_shape = K.int_shape(inputs) - if input_shape[0]: - # batch size matters, use rnn-based implementation - def step(x, _): - global uses_learning_phase - output = self.layer.call(x, **kwargs) - if hasattr(output, '_uses_learning_phase'): - uses_learning_phase = (output._uses_learning_phase or - uses_learning_phase) - return output, [] - - _, outputs, _ = K.rnn(step, inputs, - initial_states=[], - input_length=input_shape[1], - unroll=False) - y = outputs - else: - # No batch size specified, therefore the layer will be able - # to process batches of any size. - # We can go with reshape-based implementation for performance. - input_length = input_shape[1] - if not input_length: - input_length = K.shape(inputs)[1] - inner_input_shape = self._get_shape_tuple((-1,), inputs, 2) - # Shape: (num_samples * timesteps, ...). And track the - # transformation in self._input_map. - input_uid = object_list_uid(inputs) - inputs = K.reshape(inputs, inner_input_shape) - self._input_map[input_uid] = inputs - # (num_samples * timesteps, ...) - if has_arg(self.layer.call, 'mask') and mask is not None: - inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) - kwargs['mask'] = K.reshape(mask, inner_mask_shape) - y = self.layer.call(inputs, **kwargs) - if hasattr(y, '_uses_learning_phase'): - uses_learning_phase = y._uses_learning_phase - # Shape: (num_samples, timesteps, ...) - output_shape = self.compute_output_shape(input_shape) - output_shape = self._get_shape_tuple( - (-1, input_length), y, 1, output_shape[2:]) - y = K.reshape(y, output_shape) - - # Apply activity regularizer if any: - if (hasattr(self.layer, 'activity_regularizer') and - self.layer.activity_regularizer is not None): - regularization_loss = self.layer.activity_regularizer(y) - self.add_loss(regularization_loss, inputs) - - if uses_learning_phase: - y._uses_learning_phase = True - return y - - def compute_mask(self, inputs, mask=None): - """Computes an output mask tensor for Embedding layer - based on the inputs, mask, and the inner layer. - - If batch size is specified: - Simply return the input `mask`. (An rnn-based implementation with - more than one rnn inputs is required but not supported in Keras yet.) - - Otherwise we call `compute_mask` of the inner layer at each time step. - If the output mask at each time step is not `None`: - (E.g., inner layer is Masking or RNN) - Concatenate all of them and return the concatenation. - If the output mask at each time step is `None` and - the input mask is not `None`: - (E.g., inner layer is Dense) - Reduce the input_mask to 2 dimensions and return it. - Otherwise (both the output mask and the input mask are `None`): - (E.g., `mask` is not used at all) - Return `None`. - - # Arguments - inputs: Tensor - mask: Tensor - # Returns - None or a tensor - """ - # cases need to call the layer.compute_mask when input_mask is None: - # Masking layer and Embedding layer with mask_zero - input_shape = K.int_shape(inputs) - if input_shape[0]: - # batch size matters, we currently do not handle mask explicitly - return mask - inner_mask = mask - if inner_mask is not None: - inner_mask_shape = self._get_shape_tuple((-1,), mask, 2) - inner_mask = K.reshape(inner_mask, inner_mask_shape) - input_uid = object_list_uid(inputs) - inner_inputs = self._input_map[input_uid] - output_mask = self.layer.compute_mask(inner_inputs, inner_mask) - if output_mask is None: - if mask is None: - return None - # input_mask is not None, and output_mask is None: - # we should return a not-None mask - output_mask = mask - for _ in range(2, len(K.int_shape(mask))): - output_mask = K.any(output_mask, axis=-1) - else: - # output_mask is not None. We need to reshape it - input_length = input_shape[1] - if not input_length: - input_length = K.shape(inputs)[1] - output_mask_int_shape = K.int_shape(output_mask) - if output_mask_int_shape is None: - # if the output_mask does not have a static shape, - # its shape must be the same as mask's - if mask is not None: - output_mask_int_shape = K.int_shape(mask) - else: - output_mask_int_shape = K.compute_output_shape(input_shape)[ - :-1] - output_mask_shape = self._get_shape_tuple( - (-1, input_length), output_mask, 1, output_mask_int_shape[1:]) - output_mask = K.reshape(output_mask, output_mask_shape) - return output_mask - - -class Bidirectional(Wrapper): - """Bidirectional wrapper for RNNs. - - # Arguments - layer: `Recurrent` instance. - merge_mode: Mode by which outputs of the - forward and backward RNNs will be combined. - One of {'sum', 'mul', 'concat', 'ave', None}. - If None, the outputs will not be combined, - they will be returned as a list. - weights: Initial weights to load in the Bidirectional model - - # Raises - ValueError: In case of invalid `merge_mode` argument. - - # Examples - - ```python - model = Sequential() - model.add(Bidirectional(LSTM(10, return_sequences=True), - input_shape=(5, 10))) - model.add(Bidirectional(LSTM(10))) - model.add(Dense(5)) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - ``` - """ - - def __init__(self, layer, merge_mode='concat', weights=None, **kwargs): - if merge_mode not in ['sum', 'mul', 'ave', 'concat', None]: - raise ValueError('Invalid merge mode. ' - 'Merge mode should be one of ' - '{"sum", "mul", "ave", "concat", None}') - self.forward_layer = copy.copy(layer) - config = layer.get_config() - config['go_backwards'] = not config['go_backwards'] - self.backward_layer = layer.__class__.from_config(config) - self.forward_layer.name = 'forward_' + self.forward_layer.name - self.backward_layer.name = 'backward_' + self.backward_layer.name - self.merge_mode = merge_mode - if weights: - nw = len(weights) - self.forward_layer.initial_weights = weights[:nw // 2] - self.backward_layer.initial_weights = weights[nw // 2:] - self.stateful = layer.stateful - self.return_sequences = layer.return_sequences - self.return_state = layer.return_state - self.supports_masking = True - self._trainable = True - super(Bidirectional, self).__init__(layer, **kwargs) - self.input_spec = layer.input_spec - self._num_constants = None - - @property - def trainable(self): - return self._trainable - - @trainable.setter - def trainable(self, value): - self._trainable = value - self.forward_layer.trainable = value - self.backward_layer.trainable = value - - def get_weights(self): - return self.forward_layer.get_weights() + self.backward_layer.get_weights() - - def set_weights(self, weights): - nw = len(weights) - self.forward_layer.set_weights(weights[:nw // 2]) - self.backward_layer.set_weights(weights[nw // 2:]) - - def compute_output_shape(self, input_shape): - output_shape = self.forward_layer.compute_output_shape(input_shape) - if self.return_state: - state_shape = output_shape[1:] - output_shape = output_shape[0] - - if self.merge_mode == 'concat': - output_shape = list(output_shape) - output_shape[-1] *= 2 - output_shape = tuple(output_shape) - elif self.merge_mode is None: - output_shape = [output_shape, copy.copy(output_shape)] - - if self.return_state: - if self.merge_mode is None: - return output_shape + state_shape + copy.copy(state_shape) - return [output_shape] + state_shape + copy.copy(state_shape) - return output_shape - - def __call__(self, inputs, initial_state=None, constants=None, **kwargs): - inputs, initial_state, constants = recurrent._standardize_args( - inputs, initial_state, constants, self._num_constants) - - if initial_state is None and constants is None: - return super(Bidirectional, self).__call__(inputs, **kwargs) - - # Applies the same workaround as in `RNN.__call__` - additional_inputs = [] - additional_specs = [] - if initial_state is not None: - # Check if `initial_state` can be splitted into half - num_states = len(initial_state) - if num_states % 2 > 0: - raise ValueError( - 'When passing `initial_state` to a Bidirectional RNN, ' - 'the state should be a list containing the states of ' - 'the underlying RNNs. ' - 'Found: ' + str(initial_state)) - - kwargs['initial_state'] = initial_state - additional_inputs += initial_state - state_specs = [InputSpec(shape=K.int_shape(state)) - for state in initial_state] - self.forward_layer.state_spec = state_specs[:num_states // 2] - self.backward_layer.state_spec = state_specs[num_states // 2:] - additional_specs += state_specs - if constants is not None: - kwargs['constants'] = constants - additional_inputs += constants - constants_spec = [InputSpec(shape=K.int_shape(constant)) - for constant in constants] - self.forward_layer.constants_spec = constants_spec - self.backward_layer.constants_spec = constants_spec - additional_specs += constants_spec - - self._num_constants = len(constants) - self.forward_layer._num_constants = self._num_constants - self.backward_layer._num_constants = self._num_constants - - is_keras_tensor = K.is_keras_tensor(additional_inputs[0]) - for tensor in additional_inputs: - if K.is_keras_tensor(tensor) != is_keras_tensor: - raise ValueError('The initial state of a Bidirectional' - ' layer cannot be specified with a mix of' - ' Keras tensors and non-Keras tensors' - ' (a "Keras tensor" is a tensor that was' - ' returned by a Keras layer, or by `Input`)') - - if is_keras_tensor: - # Compute the full input spec, including state - full_input = [inputs] + additional_inputs - full_input_spec = self.input_spec + additional_specs - - # Perform the call with temporarily replaced input_spec - original_input_spec = self.input_spec - self.input_spec = full_input_spec - output = super(Bidirectional, self).__call__(full_input, **kwargs) - self.input_spec = original_input_spec - return output - else: - return super(Bidirectional, self).__call__(inputs, **kwargs) - - def call(self, - inputs, - mask=None, - training=None, - initial_state=None, - constants=None): - kwargs = {} - if has_arg(self.layer.call, 'training'): - kwargs['training'] = training - if has_arg(self.layer.call, 'mask'): - kwargs['mask'] = mask - if has_arg(self.layer.call, 'constants'): - kwargs['constants'] = constants - - if initial_state is not None and has_arg(self.layer.call, 'initial_state'): - forward_inputs = [inputs[0]] - backward_inputs = [inputs[0]] - pivot = len(initial_state) // 2 + 1 - # add forward initial state - forward_state = inputs[1:pivot] - forward_inputs += forward_state - if self._num_constants is None: - # add backward initial state - backward_state = inputs[pivot:] - backward_inputs += backward_state - else: - # add backward initial state - backward_state = inputs[pivot:-self._num_constants] - backward_inputs += backward_state - # add constants for forward and backward layers - forward_inputs += inputs[-self._num_constants:] - backward_inputs += inputs[-self._num_constants:] - y = self.forward_layer.call(forward_inputs, - initial_state=forward_state, **kwargs) - y_rev = self.backward_layer.call(backward_inputs, - initial_state=backward_state, **kwargs) - else: - y = self.forward_layer.call(inputs, **kwargs) - y_rev = self.backward_layer.call(inputs, **kwargs) - - if self.return_state: - states = y[1:] + y_rev[1:] - y = y[0] - y_rev = y_rev[0] - - if self.return_sequences: - y_rev = K.reverse(y_rev, 1) - if self.merge_mode == 'concat': - output = K.concatenate([y, y_rev]) - elif self.merge_mode == 'sum': - output = y + y_rev - elif self.merge_mode == 'ave': - output = (y + y_rev) / 2 - elif self.merge_mode == 'mul': - output = y * y_rev - elif self.merge_mode is None: - output = [y, y_rev] - else: - raise ValueError('Unrecognized value for argument ' - 'merge_mode: %s' % (self.merge_mode)) - - # Properly set learning phase - if (getattr(y, '_uses_learning_phase', False) or - getattr(y_rev, '_uses_learning_phase', False)): - if self.merge_mode is None: - for out in output: - out._uses_learning_phase = True - else: - output._uses_learning_phase = True - - if self.return_state: - if self.merge_mode is None: - return output + states - return [output] + states - return output - - def reset_states(self): - self.forward_layer.reset_states() - self.backward_layer.reset_states() - - def build(self, input_shape): - with K.name_scope(self.forward_layer.name): - self.forward_layer.build(input_shape) - with K.name_scope(self.backward_layer.name): - self.backward_layer.build(input_shape) - self.built = True - - def compute_mask(self, inputs, mask): - if isinstance(mask, list): - mask = mask[0] - if self.return_sequences: - if not self.merge_mode: - output_mask = [mask, mask] - else: - output_mask = mask - else: - output_mask = [None, None] if not self.merge_mode else None - - if self.return_state: - states = self.forward_layer.states - state_mask = [None for _ in states] - if isinstance(output_mask, list): - return output_mask + state_mask * 2 - return [output_mask] + state_mask * 2 - - return output_mask - - @property - def trainable_weights(self): - if hasattr(self.forward_layer, 'trainable_weights'): - return (self.forward_layer.trainable_weights + - self.backward_layer.trainable_weights) - return [] - - @property - def non_trainable_weights(self): - if hasattr(self.forward_layer, 'non_trainable_weights'): - return (self.forward_layer.non_trainable_weights + - self.backward_layer.non_trainable_weights) - return [] - - @property - def updates(self): - if hasattr(self.forward_layer, 'updates'): - return self.forward_layer.updates + self.backward_layer.updates - return [] - - def get_updates_for(self, inputs=None): - forward_updates = self.forward_layer.get_updates_for(inputs) - backward_updates = self.backward_layer.get_updates_for(inputs) - return (super(Wrapper, self).get_updates_for(inputs) + - forward_updates + backward_updates) - - @property - def losses(self): - if hasattr(self.forward_layer, 'losses'): - return self.forward_layer.losses + self.backward_layer.losses - return [] - - def get_losses_for(self, inputs=None): - forward_losses = self.forward_layer.get_losses_for(inputs) - backward_losses = self.backward_layer.get_losses_for(inputs) - return (super(Wrapper, self).get_losses_for(inputs) + - forward_losses + backward_losses) - - @property - def constraints(self): - constraints = {} - if hasattr(self.forward_layer, 'constraints'): - constraints.update(self.forward_layer.constraints) - constraints.update(self.backward_layer.constraints) - return constraints - - def get_config(self): - config = {'merge_mode': self.merge_mode} - if self._num_constants is not None: - config['num_constants'] = self._num_constants - - base_config = super(Bidirectional, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @classmethod - def from_config(cls, config, custom_objects=None): - from . import deserialize as deserialize_layer - rnn_layer = deserialize_layer(config.pop('layer'), - custom_objects=custom_objects) - num_constants = config.pop('num_constants', None) - layer = cls(rnn_layer, **config) - layer._num_constants = num_constants - return layer -"""Interface converters for Keras 1 support in Keras 2. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import six -import warnings -import functools -import numpy as np - - -def generate_legacy_interface(allowed_positional_args=None, - conversions=None, - preprocessor=None, - value_conversions=None, - object_type='class'): - if allowed_positional_args is None: - check_positional_args = False - else: - check_positional_args = True - allowed_positional_args = allowed_positional_args or [] - conversions = conversions or [] - value_conversions = value_conversions or [] - - def legacy_support(func): - @six.wraps(func) - def wrapper(*args, **kwargs): - if object_type == 'class': - object_name = args[0].__class__.__name__ - else: - object_name = func.__name__ - if preprocessor: - args, kwargs, converted = preprocessor(args, kwargs) - else: - converted = [] - if check_positional_args: - if len(args) > len(allowed_positional_args) + 1: - raise TypeError('`' + object_name + - '` can accept only ' + - str(len(allowed_positional_args)) + - ' positional arguments ' + - str(tuple(allowed_positional_args)) + - ', but you passed the following ' - 'positional arguments: ' + - str(list(args[1:]))) - for key in value_conversions: - if key in kwargs: - old_value = kwargs[key] - if old_value in value_conversions[key]: - kwargs[key] = value_conversions[key][old_value] - for old_name, new_name in conversions: - if old_name in kwargs: - value = kwargs.pop(old_name) - if new_name in kwargs: - raise_duplicate_arg_error(old_name, new_name) - kwargs[new_name] = value - converted.append((new_name, old_name)) - if converted: - signature = '`' + object_name + '(' - for i, value in enumerate(args[1:]): - if isinstance(value, six.string_types): - signature += '"' + value + '"' - else: - if isinstance(value, np.ndarray): - str_val = 'array' - else: - str_val = str(value) - if len(str_val) > 10: - str_val = str_val[:10] + '...' - signature += str_val - if i < len(args[1:]) - 1 or kwargs: - signature += ', ' - for i, (name, value) in enumerate(kwargs.items()): - signature += name + '=' - if isinstance(value, six.string_types): - signature += '"' + value + '"' - else: - if isinstance(value, np.ndarray): - str_val = 'array' - else: - str_val = str(value) - if len(str_val) > 10: - str_val = str_val[:10] + '...' - signature += str_val - if i < len(kwargs) - 1: - signature += ', ' - signature += ')`' - warnings.warn('Update your `' + object_name + '` call to the ' + - 'Keras 2 API: ' + signature, stacklevel=2) - return func(*args, **kwargs) - wrapper._original_function = func - return wrapper - return legacy_support - - -generate_legacy_method_interface = functools.partial(generate_legacy_interface, - object_type='method') - - -def raise_duplicate_arg_error(old_arg, new_arg): - raise TypeError('For the `' + new_arg + '` argument, ' - 'the layer received both ' - 'the legacy keyword argument ' - '`' + old_arg + '` and the Keras 2 keyword argument ' - '`' + new_arg + '`. Stick to the latter!') - - -legacy_dense_support = generate_legacy_interface( - allowed_positional_args=['units'], - conversions=[('output_dim', 'units'), - ('init', 'kernel_initializer'), - ('W_regularizer', 'kernel_regularizer'), - ('b_regularizer', 'bias_regularizer'), - ('W_constraint', 'kernel_constraint'), - ('b_constraint', 'bias_constraint'), - ('bias', 'use_bias')]) - -legacy_dropout_support = generate_legacy_interface( - allowed_positional_args=['rate', 'noise_shape', 'seed'], - conversions=[('p', 'rate')]) - - -def embedding_kwargs_preprocessor(args, kwargs): - converted = [] - if 'dropout' in kwargs: - kwargs.pop('dropout') - warnings.warn('The `dropout` argument is no longer support in `Embedding`. ' - 'You can apply a `keras.layers.SpatialDropout1D` layer ' - 'right after the `Embedding` layer to get the same behavior.', - stacklevel=3) - return args, kwargs, converted - - -legacy_embedding_support = generate_legacy_interface( - allowed_positional_args=['input_dim', 'output_dim'], - conversions=[('init', 'embeddings_initializer'), - ('W_regularizer', 'embeddings_regularizer'), - ('W_constraint', 'embeddings_constraint')], - preprocessor=embedding_kwargs_preprocessor) - -legacy_pooling1d_support = generate_legacy_interface( - allowed_positional_args=['pool_size', 'strides', 'padding'], - conversions=[('pool_length', 'pool_size'), - ('stride', 'strides'), - ('border_mode', 'padding')]) - -legacy_prelu_support = generate_legacy_interface( - allowed_positional_args=['alpha_initializer'], - conversions=[('init', 'alpha_initializer')]) - - -legacy_gaussiannoise_support = generate_legacy_interface( - allowed_positional_args=['stddev'], - conversions=[('sigma', 'stddev')]) - - -def recurrent_args_preprocessor(args, kwargs): - converted = [] - if 'forget_bias_init' in kwargs: - if kwargs['forget_bias_init'] == 'one': - kwargs.pop('forget_bias_init') - kwargs['unit_forget_bias'] = True - converted.append(('forget_bias_init', 'unit_forget_bias')) - else: - kwargs.pop('forget_bias_init') - warnings.warn('The `forget_bias_init` argument ' - 'has been ignored. Use `unit_forget_bias=True` ' - 'instead to initialize with ones.', stacklevel=3) - if 'input_dim' in kwargs: - input_length = kwargs.pop('input_length', None) - input_dim = kwargs.pop('input_dim') - input_shape = (input_length, input_dim) - kwargs['input_shape'] = input_shape - converted.append(('input_dim', 'input_shape')) - warnings.warn('The `input_dim` and `input_length` arguments ' - 'in recurrent layers are deprecated. ' - 'Use `input_shape` instead.', stacklevel=3) - return args, kwargs, converted - - -legacy_recurrent_support = generate_legacy_interface( - allowed_positional_args=['units'], - conversions=[('output_dim', 'units'), - ('init', 'kernel_initializer'), - ('inner_init', 'recurrent_initializer'), - ('inner_activation', 'recurrent_activation'), - ('W_regularizer', 'kernel_regularizer'), - ('b_regularizer', 'bias_regularizer'), - ('U_regularizer', 'recurrent_regularizer'), - ('dropout_W', 'dropout'), - ('dropout_U', 'recurrent_dropout'), - ('consume_less', 'implementation')], - value_conversions={'consume_less': {'cpu': 0, - 'mem': 1, - 'gpu': 2}}, - preprocessor=recurrent_args_preprocessor) - -legacy_gaussiandropout_support = generate_legacy_interface( - allowed_positional_args=['rate'], - conversions=[('p', 'rate')]) - -legacy_pooling2d_support = generate_legacy_interface( - allowed_positional_args=['pool_size', 'strides', 'padding'], - conversions=[('border_mode', 'padding'), - ('dim_ordering', 'data_format')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}) - -legacy_pooling3d_support = generate_legacy_interface( - allowed_positional_args=['pool_size', 'strides', 'padding'], - conversions=[('border_mode', 'padding'), - ('dim_ordering', 'data_format')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}) - -legacy_global_pooling_support = generate_legacy_interface( - conversions=[('dim_ordering', 'data_format')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}) - -legacy_upsampling1d_support = generate_legacy_interface( - allowed_positional_args=['size'], - conversions=[('length', 'size')]) - -legacy_upsampling2d_support = generate_legacy_interface( - allowed_positional_args=['size'], - conversions=[('dim_ordering', 'data_format')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}) - -legacy_upsampling3d_support = generate_legacy_interface( - allowed_positional_args=['size'], - conversions=[('dim_ordering', 'data_format')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}) - - -def conv1d_args_preprocessor(args, kwargs): - converted = [] - if 'input_dim' in kwargs: - if 'input_length' in kwargs: - length = kwargs.pop('input_length') - else: - length = None - input_shape = (length, kwargs.pop('input_dim')) - kwargs['input_shape'] = input_shape - converted.append(('input_shape', 'input_dim')) - return args, kwargs, converted - - -legacy_conv1d_support = generate_legacy_interface( - allowed_positional_args=['filters', 'kernel_size'], - conversions=[('nb_filter', 'filters'), - ('filter_length', 'kernel_size'), - ('subsample_length', 'strides'), - ('border_mode', 'padding'), - ('init', 'kernel_initializer'), - ('W_regularizer', 'kernel_regularizer'), - ('b_regularizer', 'bias_regularizer'), - ('W_constraint', 'kernel_constraint'), - ('b_constraint', 'bias_constraint'), - ('bias', 'use_bias')], - preprocessor=conv1d_args_preprocessor) - - -def conv2d_args_preprocessor(args, kwargs): - converted = [] - if len(args) > 4: - raise TypeError('Layer can receive at most 3 positional arguments.') - elif len(args) == 4: - if isinstance(args[2], int) and isinstance(args[3], int): - new_keywords = ['padding', 'strides', 'data_format'] - for kwd in new_keywords: - if kwd in kwargs: - raise ValueError( - 'It seems that you are using the Keras 2 ' - 'and you are passing both `kernel_size` and `strides` ' - 'as integer positional arguments. For safety reasons, ' - 'this is disallowed. Pass `strides` ' - 'as a keyword argument instead.') - kernel_size = (args[2], args[3]) - args = [args[0], args[1], kernel_size] - converted.append(('kernel_size', 'nb_row/nb_col')) - elif len(args) == 3 and isinstance(args[2], int): - if 'nb_col' in kwargs: - kernel_size = (args[2], kwargs.pop('nb_col')) - args = [args[0], args[1], kernel_size] - converted.append(('kernel_size', 'nb_row/nb_col')) - elif len(args) == 2: - if 'nb_row' in kwargs and 'nb_col' in kwargs: - kernel_size = (kwargs.pop('nb_row'), kwargs.pop('nb_col')) - args = [args[0], args[1], kernel_size] - converted.append(('kernel_size', 'nb_row/nb_col')) - elif len(args) == 1: - if 'nb_row' in kwargs and 'nb_col' in kwargs: - kernel_size = (kwargs.pop('nb_row'), kwargs.pop('nb_col')) - kwargs['kernel_size'] = kernel_size - converted.append(('kernel_size', 'nb_row/nb_col')) - return args, kwargs, converted - - -legacy_conv2d_support = generate_legacy_interface( - allowed_positional_args=['filters', 'kernel_size'], - conversions=[('nb_filter', 'filters'), - ('subsample', 'strides'), - ('border_mode', 'padding'), - ('dim_ordering', 'data_format'), - ('init', 'kernel_initializer'), - ('W_regularizer', 'kernel_regularizer'), - ('b_regularizer', 'bias_regularizer'), - ('W_constraint', 'kernel_constraint'), - ('b_constraint', 'bias_constraint'), - ('bias', 'use_bias')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}, - preprocessor=conv2d_args_preprocessor) - - -def separable_conv2d_args_preprocessor(args, kwargs): - converted = [] - if 'init' in kwargs: - init = kwargs.pop('init') - kwargs['depthwise_initializer'] = init - kwargs['pointwise_initializer'] = init - converted.append( - ('init', 'depthwise_initializer/pointwise_initializer')) - args, kwargs, _converted = conv2d_args_preprocessor(args, kwargs) - return args, kwargs, converted + _converted - - -legacy_separable_conv2d_support = generate_legacy_interface( - allowed_positional_args=['filters', 'kernel_size'], - conversions=[('nb_filter', 'filters'), - ('subsample', 'strides'), - ('border_mode', 'padding'), - ('dim_ordering', 'data_format'), - ('b_regularizer', 'bias_regularizer'), - ('b_constraint', 'bias_constraint'), - ('bias', 'use_bias')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}, - preprocessor=separable_conv2d_args_preprocessor) - - -def deconv2d_args_preprocessor(args, kwargs): - converted = [] - if len(args) == 5: - if isinstance(args[4], tuple): - args = args[:-1] - converted.append(('output_shape', None)) - if 'output_shape' in kwargs: - kwargs.pop('output_shape') - converted.append(('output_shape', None)) - args, kwargs, _converted = conv2d_args_preprocessor(args, kwargs) - return args, kwargs, converted + _converted - - -legacy_deconv2d_support = generate_legacy_interface( - allowed_positional_args=['filters', 'kernel_size'], - conversions=[('nb_filter', 'filters'), - ('subsample', 'strides'), - ('border_mode', 'padding'), - ('dim_ordering', 'data_format'), - ('init', 'kernel_initializer'), - ('W_regularizer', 'kernel_regularizer'), - ('b_regularizer', 'bias_regularizer'), - ('W_constraint', 'kernel_constraint'), - ('b_constraint', 'bias_constraint'), - ('bias', 'use_bias')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}, - preprocessor=deconv2d_args_preprocessor) - - -def conv3d_args_preprocessor(args, kwargs): - converted = [] - if len(args) > 5: - raise TypeError('Layer can receive at most 4 positional arguments.') - if len(args) == 5: - if all([isinstance(x, int) for x in args[2:5]]): - kernel_size = (args[2], args[3], args[4]) - args = [args[0], args[1], kernel_size] - converted.append(('kernel_size', 'kernel_dim*')) - elif len(args) == 4 and isinstance(args[3], int): - if isinstance(args[2], int) and isinstance(args[3], int): - new_keywords = ['padding', 'strides', 'data_format'] - for kwd in new_keywords: - if kwd in kwargs: - raise ValueError( - 'It seems that you are using the Keras 2 ' - 'and you are passing both `kernel_size` and `strides` ' - 'as integer positional arguments. For safety reasons, ' - 'this is disallowed. Pass `strides` ' - 'as a keyword argument instead.') - if 'kernel_dim3' in kwargs: - kernel_size = (args[2], args[3], kwargs.pop('kernel_dim3')) - args = [args[0], args[1], kernel_size] - converted.append(('kernel_size', 'kernel_dim*')) - elif len(args) == 3: - if all([x in kwargs for x in ['kernel_dim2', 'kernel_dim3']]): - kernel_size = (args[2], - kwargs.pop('kernel_dim2'), - kwargs.pop('kernel_dim3')) - args = [args[0], args[1], kernel_size] - converted.append(('kernel_size', 'kernel_dim*')) - elif len(args) == 2: - if all([x in kwargs for x in ['kernel_dim1', 'kernel_dim2', 'kernel_dim3']]): - kernel_size = (kwargs.pop('kernel_dim1'), - kwargs.pop('kernel_dim2'), - kwargs.pop('kernel_dim3')) - args = [args[0], args[1], kernel_size] - converted.append(('kernel_size', 'kernel_dim*')) - elif len(args) == 1: - if all([x in kwargs for x in ['kernel_dim1', 'kernel_dim2', 'kernel_dim3']]): - kernel_size = (kwargs.pop('kernel_dim1'), - kwargs.pop('kernel_dim2'), - kwargs.pop('kernel_dim3')) - kwargs['kernel_size'] = kernel_size - converted.append(('kernel_size', 'nb_row/nb_col')) - return args, kwargs, converted - - -legacy_conv3d_support = generate_legacy_interface( - allowed_positional_args=['filters', 'kernel_size'], - conversions=[('nb_filter', 'filters'), - ('subsample', 'strides'), - ('border_mode', 'padding'), - ('dim_ordering', 'data_format'), - ('init', 'kernel_initializer'), - ('W_regularizer', 'kernel_regularizer'), - ('b_regularizer', 'bias_regularizer'), - ('W_constraint', 'kernel_constraint'), - ('b_constraint', 'bias_constraint'), - ('bias', 'use_bias')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}, - preprocessor=conv3d_args_preprocessor) - - -def batchnorm_args_preprocessor(args, kwargs): - converted = [] - if len(args) > 1: - raise TypeError('The `BatchNormalization` layer ' - 'does not accept positional arguments. ' - 'Use keyword arguments instead.') - if 'mode' in kwargs: - value = kwargs.pop('mode') - if value != 0: - raise TypeError('The `mode` argument of `BatchNormalization` ' - 'no longer exists. `mode=1` and `mode=2` ' - 'are no longer supported.') - converted.append(('mode', None)) - return args, kwargs, converted - - -def convlstm2d_args_preprocessor(args, kwargs): - converted = [] - if 'forget_bias_init' in kwargs: - value = kwargs.pop('forget_bias_init') - if value == 'one': - kwargs['unit_forget_bias'] = True - converted.append(('forget_bias_init', 'unit_forget_bias')) - else: - warnings.warn('The `forget_bias_init` argument ' - 'has been ignored. Use `unit_forget_bias=True` ' - 'instead to initialize with ones.', stacklevel=3) - args, kwargs, _converted = conv2d_args_preprocessor(args, kwargs) - return args, kwargs, converted + _converted - - -legacy_convlstm2d_support = generate_legacy_interface( - allowed_positional_args=['filters', 'kernel_size'], - conversions=[('nb_filter', 'filters'), - ('subsample', 'strides'), - ('border_mode', 'padding'), - ('dim_ordering', 'data_format'), - ('init', 'kernel_initializer'), - ('inner_init', 'recurrent_initializer'), - ('W_regularizer', 'kernel_regularizer'), - ('U_regularizer', 'recurrent_regularizer'), - ('b_regularizer', 'bias_regularizer'), - ('inner_activation', 'recurrent_activation'), - ('dropout_W', 'dropout'), - ('dropout_U', 'recurrent_dropout'), - ('bias', 'use_bias')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}, - preprocessor=convlstm2d_args_preprocessor) - -legacy_batchnorm_support = generate_legacy_interface( - allowed_positional_args=[], - conversions=[('beta_init', 'beta_initializer'), - ('gamma_init', 'gamma_initializer')], - preprocessor=batchnorm_args_preprocessor) - - -def zeropadding2d_args_preprocessor(args, kwargs): - converted = [] - if 'padding' in kwargs and isinstance(kwargs['padding'], dict): - if set(kwargs['padding'].keys()) <= {'top_pad', 'bottom_pad', - 'left_pad', 'right_pad'}: - top_pad = kwargs['padding'].get('top_pad', 0) - bottom_pad = kwargs['padding'].get('bottom_pad', 0) - left_pad = kwargs['padding'].get('left_pad', 0) - right_pad = kwargs['padding'].get('right_pad', 0) - kwargs['padding'] = ((top_pad, bottom_pad), (left_pad, right_pad)) - warnings.warn('The `padding` argument in the Keras 2 API no longer' - 'accepts dict types. You can now input argument as: ' - '`padding=(top_pad, bottom_pad, left_pad, right_pad)`.', - stacklevel=3) - elif len(args) == 2 and isinstance(args[1], dict): - if set(args[1].keys()) <= {'top_pad', 'bottom_pad', - 'left_pad', 'right_pad'}: - top_pad = args[1].get('top_pad', 0) - bottom_pad = args[1].get('bottom_pad', 0) - left_pad = args[1].get('left_pad', 0) - right_pad = args[1].get('right_pad', 0) - args = (args[0], ((top_pad, bottom_pad), (left_pad, right_pad))) - warnings.warn('The `padding` argument in the Keras 2 API no longer' - 'accepts dict types. You can now input argument as: ' - '`padding=((top_pad, bottom_pad), (left_pad, right_pad))`', - stacklevel=3) - return args, kwargs, converted - - -legacy_zeropadding2d_support = generate_legacy_interface( - allowed_positional_args=['padding'], - conversions=[('dim_ordering', 'data_format')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}, - preprocessor=zeropadding2d_args_preprocessor) - -legacy_zeropadding3d_support = generate_legacy_interface( - allowed_positional_args=['padding'], - conversions=[('dim_ordering', 'data_format')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}) - -legacy_cropping2d_support = generate_legacy_interface( - allowed_positional_args=['cropping'], - conversions=[('dim_ordering', 'data_format')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}) - -legacy_cropping3d_support = generate_legacy_interface( - allowed_positional_args=['cropping'], - conversions=[('dim_ordering', 'data_format')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}) - -legacy_spatialdropout1d_support = generate_legacy_interface( - allowed_positional_args=['rate'], - conversions=[('p', 'rate')]) - -legacy_spatialdropoutNd_support = generate_legacy_interface( - allowed_positional_args=['rate'], - conversions=[('p', 'rate'), - ('dim_ordering', 'data_format')], - value_conversions={'dim_ordering': {'tf': 'channels_last', - 'th': 'channels_first', - 'default': None}}) - -legacy_lambda_support = generate_legacy_interface( - allowed_positional_args=['function', 'output_shape']) - - -# Model methods - -def generator_methods_args_preprocessor(args, kwargs): - converted = [] - if len(args) < 3: - if 'samples_per_epoch' in kwargs: - samples_per_epoch = kwargs.pop('samples_per_epoch') - if len(args) > 1: - generator = args[1] - else: - generator = kwargs['generator'] - if hasattr(generator, 'batch_size'): - kwargs['steps_per_epoch'] = samples_per_epoch // generator.batch_size - else: - kwargs['steps_per_epoch'] = samples_per_epoch - converted.append(('samples_per_epoch', 'steps_per_epoch')) - - keras1_args = {'samples_per_epoch', 'val_samples', - 'nb_epoch', 'nb_val_samples', 'nb_worker'} - if keras1_args.intersection(kwargs.keys()): - warnings.warn('The semantics of the Keras 2 argument ' - '`steps_per_epoch` is not the same as the ' - 'Keras 1 argument `samples_per_epoch`. ' - '`steps_per_epoch` is the number of batches ' - 'to draw from the generator at each epoch. ' - 'Basically steps_per_epoch = samples_per_epoch/batch_size. ' - 'Similarly `nb_val_samples`->`validation_steps` and ' - '`val_samples`->`steps` arguments have changed. ' - 'Update your method calls accordingly.', stacklevel=3) - - return args, kwargs, converted - - -legacy_generator_methods_support = generate_legacy_method_interface( - allowed_positional_args=['generator', 'steps_per_epoch', 'epochs'], - conversions=[('samples_per_epoch', 'steps_per_epoch'), - ('val_samples', 'steps'), - ('nb_epoch', 'epochs'), - ('nb_val_samples', 'validation_steps'), - ('nb_worker', 'workers'), - ('pickle_safe', 'use_multiprocessing'), - ('max_q_size', 'max_queue_size')], - preprocessor=generator_methods_args_preprocessor) - - -legacy_model_constructor_support = generate_legacy_interface( - allowed_positional_args=None, - conversions=[('input', 'inputs'), - ('output', 'outputs')]) - -legacy_input_support = generate_legacy_interface( - allowed_positional_args=None, - conversions=[('input_dtype', 'dtype')]) - - -def add_weight_args_preprocessing(args, kwargs): - if len(args) > 1: - if isinstance(args[1], (tuple, list)): - kwargs['shape'] = args[1] - args = (args[0],) + args[2:] - if len(args) > 1: - if isinstance(args[1], six.string_types): - kwargs['name'] = args[1] - args = (args[0],) + args[2:] - return args, kwargs, [] - - -legacy_add_weight_support = generate_legacy_interface( - allowed_positional_args=['name', 'shape'], - preprocessor=add_weight_args_preprocessing) - - -def get_updates_arg_preprocessing(args, kwargs): - # Old interface: (params, constraints, loss) - # New interface: (loss, params) - if len(args) > 4: - raise TypeError('`get_update` call received more arguments ' - 'than expected.') - elif len(args) == 4: - # Assuming old interface. - opt, params, _, loss = args - kwargs['loss'] = loss - kwargs['params'] = params - return [opt], kwargs, [] - elif len(args) == 3: - if isinstance(args[1], (list, tuple)): - assert isinstance(args[2], dict) - assert 'loss' in kwargs - opt, params, _ = args - kwargs['params'] = params - return [opt], kwargs, [] - return args, kwargs, [] - - -legacy_get_updates_support = generate_legacy_interface( - allowed_positional_args=None, - conversions=[], - preprocessor=get_updates_arg_preprocessing) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import warnings - -from ..engine import Layer, InputSpec -from .. import backend as K -from ..utils import conv_utils -from ..utils.generic_utils import to_list -from .. import regularizers -from .. import constraints -from .. import activations -from .. import initializers - - -class MaxoutDense(Layer): - """A dense maxout layer. - A `MaxoutDense` layer takes the element-wise maximum of - `nb_feature` `Dense(input_dim, output_dim)` linear layers. - This allows the layer to learn a convex, - piecewise linear activation function over the inputs. - Note that this is a *linear* layer; - if you wish to apply activation function - (you shouldn't need to --they are universal function approximators), - an `Activation` layer must be added after. - # Arguments - output_dim: int > 0. - nb_feature: number of Dense layers to use internally. - init: name of initialization function for the weights of the layer - (see [initializations](../initializations.md)), - or alternatively, Theano function to use for weights - initialization. This parameter is only relevant - if you don't pass a `weights` argument. - weights: list of Numpy arrays to set as initial weights. - The list should have 2 elements, of shape `(input_dim, output_dim)` - and (output_dim,) for weights and biases respectively. - W_regularizer: instance of [WeightRegularizer](../regularizers.md) - (eg. L1 or L2 regularization), applied to the main weights matrix. - b_regularizer: instance of [WeightRegularizer](../regularizers.md), - applied to the bias. - activity_regularizer: instance of [ActivityRegularizer](../regularizers.md), - applied to the network output. - W_constraint: instance of the [constraints](../constraints.md) module - (eg. maxnorm, nonneg), applied to the main weights matrix. - b_constraint: instance of the [constraints](../constraints.md) module, - applied to the bias. - bias: whether to include a bias - (i.e. make the layer affine rather than linear). - input_dim: dimensionality of the input (integer). This argument - (or alternatively, the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - # Input shape - 2D tensor with shape: `(nb_samples, input_dim)`. - # Output shape - 2D tensor with shape: `(nb_samples, output_dim)`. - # References - - [Maxout Networks](http://arxiv.org/abs/1302.4389) - """ - - def __init__(self, output_dim, - nb_feature=4, - init='glorot_uniform', - weights=None, - W_regularizer=None, - b_regularizer=None, - activity_regularizer=None, - W_constraint=None, - b_constraint=None, - bias=True, - input_dim=None, - **kwargs): - warnings.warn('The `MaxoutDense` layer is deprecated ' - 'and will be removed after 06/2017.') - self.output_dim = output_dim - self.nb_feature = nb_feature - self.init = initializers.get(init) - - self.W_regularizer = regularizers.get(W_regularizer) - self.b_regularizer = regularizers.get(b_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - - self.W_constraint = constraints.get(W_constraint) - self.b_constraint = constraints.get(b_constraint) - - self.bias = bias - self.initial_weights = weights - self.input_spec = InputSpec(ndim=2) - - self.input_dim = input_dim - if self.input_dim: - kwargs['input_shape'] = (self.input_dim,) - super(MaxoutDense, self).__init__(**kwargs) - - def build(self, input_shape): - input_dim = input_shape[1] - self.input_spec = InputSpec(dtype=K.floatx(), - shape=(None, input_dim)) - - self.W = self.add_weight((self.nb_feature, input_dim, self.output_dim), - initializer=self.init, - name='W', - regularizer=self.W_regularizer, - constraint=self.W_constraint) - if self.bias: - self.b = self.add_weight((self.nb_feature, self.output_dim,), - initializer='zero', - name='b', - regularizer=self.b_regularizer, - constraint=self.b_constraint) - else: - self.b = None - - if self.initial_weights is not None: - self.set_weights(self.initial_weights) - del self.initial_weights - self.built = True - - def compute_output_shape(self, input_shape): - assert input_shape and len(input_shape) == 2 - return (input_shape[0], self.output_dim) - - def call(self, x): - # no activation, this layer is only linear. - output = K.dot(x, self.W) - if self.bias: - output += self.b - output = K.max(output, axis=1) - return output - - def get_config(self): - config = {'output_dim': self.output_dim, - 'init': initializers.serialize(self.init), - 'nb_feature': self.nb_feature, - 'W_regularizer': regularizers.serialize(self.W_regularizer), - 'b_regularizer': regularizers.serialize(self.b_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'W_constraint': constraints.serialize(self.W_constraint), - 'b_constraint': constraints.serialize(self.b_constraint), - 'bias': self.bias, - 'input_dim': self.input_dim} - base_config = super(MaxoutDense, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Highway(Layer): - """Densely connected highway network. - Highway layers are a natural extension of LSTMs to feedforward networks. - # Arguments - init: name of initialization function for the weights of the layer - (see [initializations](../initializations.md)), - or alternatively, Theano function to use for weights - initialization. This parameter is only relevant - if you don't pass a `weights` argument. - activation: name of activation function to use - (see [activations](../activations.md)), - or alternatively, elementwise Theano function. - If you don't specify anything, no activation is applied - (ie. "linear" activation: a(x) = x). - weights: list of Numpy arrays to set as initial weights. - The list should have 2 elements, of shape `(input_dim, output_dim)` - and (output_dim,) for weights and biases respectively. - W_regularizer: instance of [WeightRegularizer](../regularizers.md) - (eg. L1 or L2 regularization), applied to the main weights matrix. - b_regularizer: instance of [WeightRegularizer](../regularizers.md), - applied to the bias. - activity_regularizer: instance of [ActivityRegularizer](../regularizers.md), - applied to the network output. - W_constraint: instance of the [constraints](../constraints.md) module - (eg. maxnorm, nonneg), applied to the main weights matrix. - b_constraint: instance of the [constraints](../constraints.md) module, - applied to the bias. - bias: whether to include a bias - (i.e. make the layer affine rather than linear). - input_dim: dimensionality of the input (integer). This argument - (or alternatively, the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - # Input shape - 2D tensor with shape: `(nb_samples, input_dim)`. - # Output shape - 2D tensor with shape: `(nb_samples, input_dim)`. - # References - - [Highway Networks](http://arxiv.org/abs/1505.00387v2) - """ - - def __init__(self, - init='glorot_uniform', - activation=None, - weights=None, - W_regularizer=None, - b_regularizer=None, - activity_regularizer=None, - W_constraint=None, - b_constraint=None, - bias=True, - input_dim=None, - **kwargs): - warnings.warn('The `Highway` layer is deprecated ' - 'and will be removed after 06/2017.') - if 'transform_bias' in kwargs: - kwargs.pop('transform_bias') - warnings.warn('`transform_bias` argument is deprecated and ' - 'has been removed.') - self.init = initializers.get(init) - self.activation = activations.get(activation) - - self.W_regularizer = regularizers.get(W_regularizer) - self.b_regularizer = regularizers.get(b_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - - self.W_constraint = constraints.get(W_constraint) - self.b_constraint = constraints.get(b_constraint) - - self.bias = bias - self.initial_weights = weights - self.input_spec = InputSpec(ndim=2) - - self.input_dim = input_dim - if self.input_dim: - kwargs['input_shape'] = (self.input_dim,) - super(Highway, self).__init__(**kwargs) - - def build(self, input_shape): - input_dim = input_shape[1] - self.input_spec = InputSpec(dtype=K.floatx(), - shape=(None, input_dim)) - - self.W = self.add_weight((input_dim, input_dim), - initializer=self.init, - name='W', - regularizer=self.W_regularizer, - constraint=self.W_constraint) - self.W_carry = self.add_weight((input_dim, input_dim), - initializer=self.init, - name='W_carry') - if self.bias: - self.b = self.add_weight((input_dim,), - initializer='zero', - name='b', - regularizer=self.b_regularizer, - constraint=self.b_constraint) - self.b_carry = self.add_weight((input_dim,), - initializer='one', - name='b_carry') - else: - self.b_carry = None - - if self.initial_weights is not None: - self.set_weights(self.initial_weights) - del self.initial_weights - self.built = True - - def call(self, x): - y = K.dot(x, self.W_carry) - if self.bias: - y += self.b_carry - transform_weight = activations.sigmoid(y) - y = K.dot(x, self.W) - if self.bias: - y += self.b - act = self.activation(y) - act *= transform_weight - output = act + (1 - transform_weight) * x - return output - - def get_config(self): - config = {'init': initializers.serialize(self.init), - 'activation': activations.serialize(self.activation), - 'W_regularizer': regularizers.serialize(self.W_regularizer), - 'b_regularizer': regularizers.serialize(self.b_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'W_constraint': constraints.serialize(self.W_constraint), - 'b_constraint': constraints.serialize(self.b_constraint), - 'bias': self.bias, - 'input_dim': self.input_dim} - base_config = super(Highway, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -def AtrousConvolution1D(*args, **kwargs): - from ..layers import Conv1D - if 'atrous_rate' in kwargs: - rate = kwargs.pop('atrous_rate') - else: - rate = 1 - kwargs['dilation_rate'] = rate - warnings.warn('The `AtrousConvolution1D` layer ' - ' has been deprecated. Use instead ' - 'the `Conv1D` layer with the `dilation_rate` ' - 'argument.') - return Conv1D(*args, **kwargs) - - -def AtrousConvolution2D(*args, **kwargs): - from ..layers import Conv2D - if 'atrous_rate' in kwargs: - rate = kwargs.pop('atrous_rate') - else: - rate = 1 - kwargs['dilation_rate'] = rate - warnings.warn('The `AtrousConvolution2D` layer ' - ' has been deprecated. Use instead ' - 'the `Conv2D` layer with the `dilation_rate` ' - 'argument.') - return Conv2D(*args, **kwargs) - - -class Recurrent(Layer): - """Abstract base class for recurrent layers. - - Do not use in a model -- it's not a valid layer! - Use its children classes `LSTM`, `GRU` and `SimpleRNN` instead. - All recurrent layers (`LSTM`, `GRU`, `SimpleRNN`) also - follow the specifications of this class and accept - the keyword arguments listed below. - - # Example - - ```python - # as the first layer in a Sequential model - model = Sequential() - model.add(LSTM(32, input_shape=(10, 64))) - # now model.output_shape == (None, 32) - # note: `None` is the batch dimension. - # for subsequent layers, no need to specify the input size: - model.add(LSTM(16)) - # to stack recurrent layers, you must use return_sequences=True - # on any recurrent layer that feeds into another recurrent layer. - # note that you only need to specify the input size on the first layer. - model = Sequential() - model.add(LSTM(64, input_dim=64, input_length=10, return_sequences=True)) - model.add(LSTM(32, return_sequences=True)) - model.add(LSTM(10)) - ``` - - # Arguments - weights: list of Numpy arrays to set as initial weights. - The list should have 3 elements, of shapes: - `[(input_dim, output_dim), (output_dim, output_dim), (output_dim,)]`. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - implementation: one of {0, 1, or 2}. - If set to 0, the RNN will use - an implementation that uses fewer, larger matrix products, - thus running faster on CPU but consuming more memory. - If set to 1, the RNN will use more matrix products, - but smaller ones, thus running slower - (may actually be faster on GPU) while consuming less memory. - If set to 2 (LSTM/GRU only), - the RNN will combine the input gate, - the forget gate and the output gate into a single matrix, - enabling more time-efficient parallelization on the GPU. - Note: RNN dropout must be shared for all gates, - resulting in a slightly reduced regularization. - input_dim: dimensionality of the input (integer). - This argument (or alternatively, the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - input_length: Length of input sequences, to be specified - when it is constant. - This argument is required if you are going to connect - `Flatten` then `Dense` layers upstream - (without it, the shape of the dense outputs cannot be computed). - Note that if the recurrent layer is not the first layer - in your model, you would need to specify the input length - at the level of the first layer - (e.g. via the `input_shape` argument) - - # Input shapes - 3D tensor with shape `(batch_size, timesteps, input_dim)`, - (Optional) 2D tensors with shape `(batch_size, output_dim)`. - - # Output shape - - if `return_state`: a list of tensors. The first tensor is - the output. The remaining tensors are the last states, - each with shape `(batch_size, units)`. - - if `return_sequences`: 3D tensor with shape - `(batch_size, timesteps, units)`. - - else, 2D tensor with shape `(batch_size, units)`. - - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an [Embedding](embeddings.md) layer with the `mask_zero` parameter - set to `True`. - - # Note on using statefulness in RNNs - You can set RNN layers to be 'stateful', which means that the states - computed for the samples in one batch will be reused as initial states - for the samples in the next batch. This assumes a one-to-one mapping - between samples in different successive batches. - To enable statefulness: - - specify `stateful=True` in the layer constructor. - - specify a fixed batch size for your model, by passing - if sequential model: - `batch_input_shape=(...)` to the first layer in your model. - else for functional model with 1 or more Input layers: - `batch_shape=(...)` to all the first layers in your model. - This is the expected shape of your inputs - *including the batch size*. - It should be a tuple of integers, e.g. `(32, 10, 100)`. - - specify `shuffle=False` when calling fit(). - To reset the states of your model, call `.reset_states()` on either - a specific layer, or on your entire model. - - # Note on specifying the initial state of RNNs - You can specify the initial state of RNN layers symbolically by - calling them with the keyword argument `initial_state`. The value of - `initial_state` should be a tensor or list of tensors representing - the initial state of the RNN layer. - You can specify the initial state of RNN layers numerically by - calling `reset_states` with the keyword argument `states`. The value of - `states` should be a numpy array or list of numpy arrays representing - the initial state of the RNN layer. - """ - - def __init__(self, return_sequences=False, - return_state=False, - go_backwards=False, - stateful=False, - unroll=False, - implementation=0, - **kwargs): - super(Recurrent, self).__init__(**kwargs) - self.return_sequences = return_sequences - self.return_state = return_state - self.go_backwards = go_backwards - - self.stateful = stateful - self.unroll = unroll - self.implementation = implementation - self.supports_masking = True - self.input_spec = [InputSpec(ndim=3)] - self.state_spec = None - self.dropout = 0 - self.recurrent_dropout = 0 - - def compute_output_shape(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - - if self.return_sequences: - output_shape = (input_shape[0], input_shape[1], self.units) - else: - output_shape = (input_shape[0], self.units) - - if self.return_state: - state_shape = [(input_shape[0], self.units) for _ in self.states] - return [output_shape] + state_shape - else: - return output_shape - - def compute_mask(self, inputs, mask): - if isinstance(mask, list): - mask = mask[0] - output_mask = mask if self.return_sequences else None - if self.return_state: - state_mask = [None for _ in self.states] - return [output_mask] + state_mask - else: - return output_mask - - def step(self, inputs, states): - raise NotImplementedError - - def get_constants(self, inputs, training=None): - return [] - - def get_initial_state(self, inputs): - # build an all-zero tensor of shape (samples, output_dim) - initial_state = K.zeros_like(inputs) # (samples, timesteps, input_dim) - initial_state = K.sum(initial_state, axis=(1, 2)) # (samples,) - initial_state = K.expand_dims(initial_state) # (samples, 1) - # (samples, output_dim) - initial_state = K.tile(initial_state, [1, self.units]) - initial_state = [initial_state for _ in range(len(self.states))] - return initial_state - - def preprocess_input(self, inputs, training=None): - return inputs - - def __call__(self, inputs, initial_state=None, **kwargs): - - # If there are multiple inputs, then - # they should be the main input and `initial_state` - # e.g. when loading model from file - if (isinstance(inputs, (list, tuple)) - and len(inputs) > 1 and initial_state is None): - initial_state = inputs[1:] - inputs = inputs[0] - - # If `initial_state` is specified, - # and if it a Keras tensor, - # then add it to the inputs and temporarily - # modify the input spec to include the state. - if initial_state is None: - return super(Recurrent, self).__call__(inputs, **kwargs) - - initial_state = to_list(initial_state, allow_tuple=True) - - is_keras_tensor = hasattr(initial_state[0], '_keras_history') - for tensor in initial_state: - if hasattr(tensor, '_keras_history') != is_keras_tensor: - raise ValueError('The initial state of an RNN layer cannot be' - ' specified with a mix of Keras tensors and' - ' non-Keras tensors') - - if is_keras_tensor: - # Compute the full input spec, including state - input_spec = self.input_spec - state_spec = self.state_spec - input_spec = to_list(input_spec) - state_spec = to_list(state_spec) - self.input_spec = input_spec + state_spec - - # Compute the full inputs, including state - inputs = [inputs] + list(initial_state) - - # Perform the call - output = super(Recurrent, self).__call__(inputs, **kwargs) - - # Restore original input spec - self.input_spec = input_spec - return output - else: - kwargs['initial_state'] = initial_state - return super(Recurrent, self).__call__(inputs, **kwargs) - - def call(self, inputs, mask=None, training=None, initial_state=None): - # input shape: `(samples, time (padded with zeros), input_dim)` - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - if isinstance(inputs, list): - initial_state = inputs[1:] - inputs = inputs[0] - elif initial_state is not None: - pass - elif self.stateful: - initial_state = self.states - else: - initial_state = self.get_initial_state(inputs) - - if isinstance(mask, list): - mask = mask[0] - - if len(initial_state) != len(self.states): - raise ValueError('Layer has ' + str(len(self.states)) + - ' states but was passed ' + - str(len(initial_state)) + - ' initial states.') - input_shape = K.int_shape(inputs) - timesteps = input_shape[1] - if self.unroll and timesteps in [None, 1]: - raise ValueError('Cannot unroll a RNN if the ' - 'time dimension is undefined or equal to 1. \n' - '- If using a Sequential model, ' - 'specify the time dimension by passing ' - 'an `input_shape` or `batch_input_shape` ' - 'argument to your first layer. If your ' - 'first layer is an Embedding, you can ' - 'also use the `input_length` argument.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a `shape` ' - 'or `batch_shape` argument to your Input layer.') - constants = self.get_constants(inputs, training=None) - preprocessed_input = self.preprocess_input(inputs, training=None) - last_output, outputs, states = K.rnn(self.step, - preprocessed_input, - initial_state, - go_backwards=self.go_backwards, - mask=mask, - constants=constants, - unroll=self.unroll, - input_length=timesteps) - if self.stateful: - updates = [] - for i in range(len(states)): - updates.append((self.states[i], states[i])) - self.add_update(updates, inputs) - - # Properly set learning phase - if 0 < self.dropout + self.recurrent_dropout: - last_output._uses_learning_phase = True - outputs._uses_learning_phase = True - - if self.return_sequences: - output = outputs - else: - output = last_output - - if self.return_state: - states = to_list(states, allow_tuple=True) - return [output] + states - else: - return output - - def reset_states(self, states=None): - if not self.stateful: - raise AttributeError('Layer must be stateful.') - batch_size = self.input_spec[0].shape[0] - if not batch_size: - raise ValueError('If a RNN is stateful, it needs to know ' - 'its batch size. Specify the batch size ' - 'of your input tensors: \n' - '- If using a Sequential model, ' - 'specify the batch size by passing ' - 'a `batch_input_shape` ' - 'argument to your first layer.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a ' - '`batch_shape` argument to your Input layer.') - # initialize state if None - if self.states[0] is None: - self.states = [K.zeros((batch_size, self.units)) - for _ in self.states] - elif states is None: - for state in self.states: - K.set_value(state, np.zeros((batch_size, self.units))) - else: - states = to_list(states, allow_tuple=True) - if len(states) != len(self.states): - raise ValueError('Layer ' + self.name + ' expects ' + - str(len(self.states)) + ' states, ' - 'but it received ' + str(len(states)) + - ' state values. Input received: ' + - str(states)) - for index, (value, state) in enumerate(zip(states, self.states)): - if value.shape != (batch_size, self.units): - raise ValueError('State ' + str(index) + - ' is incompatible with layer ' + - self.name + ': expected shape=' + - str((batch_size, self.units)) + - ', found shape=' + str(value.shape)) - K.set_value(state, value) - - def get_config(self): - config = {'return_sequences': self.return_sequences, - 'return_state': self.return_state, - 'go_backwards': self.go_backwards, - 'stateful': self.stateful, - 'unroll': self.unroll, - 'implementation': self.implementation} - base_config = super(Recurrent, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class ConvRecurrent2D(Recurrent): - """Abstract base class for convolutional recurrent layers. - - Do not use in a model -- it's not a functional layer! - - # Arguments - filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). - kernel_size: An integer or tuple/list of n integers, specifying the - dimensions of the convolution window. - strides: An integer or tuple/list of n integers, - specifying the strides of the convolution. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: One of `"valid"` or `"same"` (case-insensitive). - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, time, ..., channels)` - while `channels_first` corresponds to - inputs with shape `(batch, time, channels, ...)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - dilation_rate: An integer or tuple/list of n integers, specifying - the dilation rate to use for dilated convolution. - Currently, specifying any `dilation_rate` value != 1 is - incompatible with specifying any `strides` value != 1. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - go_backwards: Boolean (default False). - If True, process the input sequence backwards. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - - # Input shape - 5D tensor with shape `(num_samples, timesteps, channels, rows, cols)`. - - # Output shape - - if `return_sequences`: 5D tensor with shape - `(num_samples, timesteps, channels, rows, cols)`. - - else, 4D tensor with shape `(num_samples, channels, rows, cols)`. - - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an [Embedding](embeddings.md) layer with the `mask_zero` parameter - set to `True`. - **Note:** for the time being, masking is only supported with Theano. - - # Note on using statefulness in RNNs - You can set RNN layers to be 'stateful', which means that the states - computed for the samples in one batch will be reused as initial states - for the samples in the next batch. - This assumes a one-to-one mapping between - samples in different successive batches. - - To enable statefulness: - - specify `stateful=True` in the layer constructor. - - specify a fixed batch size for your model, by passing - a `batch_input_size=(...)` to the first layer in your model. - This is the expected shape of your inputs *including the batch - size*. - It should be a tuple of integers, e.g. `(32, 10, 100)`. - - To reset the states of your model, call `.reset_states()` on either - a specific layer, or on your entire model. - """ - - def __init__(self, filters, - kernel_size, - strides=(1, 1), - padding='valid', - data_format=None, - dilation_rate=(1, 1), - return_sequences=False, - go_backwards=False, - stateful=False, - **kwargs): - super(ConvRecurrent2D, self).__init__(**kwargs) - self.filters = filters - self.kernel_size = conv_utils.normalize_tuple( - kernel_size, 2, 'kernel_size') - self.strides = conv_utils.normalize_tuple(strides, 2, 'strides') - self.padding = conv_utils.normalize_padding(padding) - self.data_format = K.normalize_data_format(data_format) - self.dilation_rate = conv_utils.normalize_tuple(dilation_rate, 2, - 'dilation_rate') - self.return_sequences = return_sequences - self.go_backwards = go_backwards - self.stateful = stateful - self.input_spec = [InputSpec(ndim=5)] - self.state_spec = None - - def compute_output_shape(self, input_shape): - if isinstance(input_shape, list): - input_shape = input_shape[0] - if self.data_format == 'channels_first': - rows = input_shape[3] - cols = input_shape[4] - elif self.data_format == 'channels_last': - rows = input_shape[2] - cols = input_shape[3] - rows = conv_utils.conv_output_length(rows, - self.kernel_size[0], - padding=self.padding, - stride=self.strides[0], - dilation=self.dilation_rate[0]) - cols = conv_utils.conv_output_length(cols, - self.kernel_size[1], - padding=self.padding, - stride=self.strides[1], - dilation=self.dilation_rate[1]) - if self.return_sequences: - if self.data_format == 'channels_first': - output_shape = (input_shape[0], input_shape[1], - self.filters, rows, cols) - elif self.data_format == 'channels_last': - output_shape = (input_shape[0], input_shape[1], - rows, cols, self.filters) - else: - if self.data_format == 'channels_first': - output_shape = (input_shape[0], self.filters, rows, cols) - elif self.data_format == 'channels_last': - output_shape = (input_shape[0], rows, cols, self.filters) - - if self.return_state: - if self.data_format == 'channels_first': - state_shape = (input_shape[0], self.filters, rows, cols) - elif self.data_format == 'channels_last': - state_shape = (input_shape[0], rows, cols, self.filters) - output_shape = [output_shape, state_shape, state_shape] - - return output_shape - - def get_config(self): - config = {'filters': self.filters, - 'kernel_size': self.kernel_size, - 'strides': self.strides, - 'padding': self.padding, - 'data_format': self.data_format, - 'dilation_rate': self.dilation_rate, - 'return_sequences': self.return_sequences, - 'go_backwards': self.go_backwards, - 'stateful': self.stateful} - base_config = super(ConvRecurrent2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from . import text -from . import sequence -from . import image -from .. import backend -from .. import utils - -import keras_preprocessing - -keras_preprocessing.set_keras_submodules(backend=backend, utils=utils) -"""Utilities for real-time data augmentation on image data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .. import backend -from .. import utils -from ..utils import generic_utils - -from keras_preprocessing import image - -random_rotation = image.random_rotation -random_shift = image.random_shift -random_shear = image.random_shear -random_zoom = image.random_zoom -apply_channel_shift = image.apply_channel_shift -random_channel_shift = image.random_channel_shift -apply_brightness_shift = image.apply_brightness_shift -random_brightness = image.random_brightness -apply_affine_transform = image.apply_affine_transform -load_img = image.load_img - - -def array_to_img(x, data_format=None, scale=True, dtype=None): - if data_format is None: - data_format = backend.image_data_format() - if 'dtype' in generic_utils.getargspec(image.array_to_img).args: - if dtype is None: - dtype = backend.floatx() - return image.array_to_img(x, - data_format=data_format, - scale=scale, - dtype=dtype) - return image.array_to_img(x, - data_format=data_format, - scale=scale) - - -def img_to_array(img, data_format=None, dtype=None): - if data_format is None: - data_format = backend.image_data_format() - if 'dtype' in generic_utils.getargspec(image.img_to_array).args: - if dtype is None: - dtype = backend.floatx() - return image.img_to_array(img, data_format=data_format, dtype=dtype) - return image.img_to_array(img, data_format=data_format) - - -def save_img(path, - x, - data_format=None, - file_format=None, - scale=True, **kwargs): - if data_format is None: - data_format = backend.image_data_format() - return image.save_img(path, - x, - data_format=data_format, - file_format=file_format, - scale=scale, **kwargs) - - -class Iterator(image.Iterator, utils.Sequence): - """Base class for image data iterators. - - Every `Iterator` must implement the `_get_batches_of_transformed_samples` - method. - - # Arguments - n: Integer, total number of samples in the dataset to loop over. - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - seed: Random seeding for data shuffling. - """ - pass - - -class DirectoryIterator(image.DirectoryIterator, Iterator): - """Iterator capable of reading images from a directory on disk. - - # Arguments - directory: Path to the directory to read images from. - Each subdirectory in this directory will be - considered to contain images from one class, - or alternatively you could specify class subdirectories - via the `classes` argument. - image_data_generator: Instance of `ImageDataGenerator` - to use for random transformations and normalization. - target_size: tuple of integers, dimensions to resize input images to. - color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. - Color mode to read images. - classes: Optional list of strings, names of subdirectories - containing images from each class (e.g. `["dogs", "cats"]`). - It will be computed automatically if not set. - class_mode: Mode for yielding the targets: - `"binary"`: binary targets (if there are only two classes), - `"categorical"`: categorical targets, - `"sparse"`: integer targets, - `"input"`: targets are images identical to input images (mainly - used to work with autoencoders), - `None`: no targets get yielded (only input images are yielded). - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - If set to False, sorts the data in alphanumeric order. - seed: Random seed for data shuffling. - data_format: String, one of `channels_first`, `channels_last`. - save_to_dir: Optional directory where to save the pictures - being yielded, in a viewable format. This is useful - for visualizing the random transformations being - applied, for debugging purposes. - save_prefix: String prefix to use for saving sample - images (if `save_to_dir` is set). - save_format: Format to use for saving sample images - (if `save_to_dir` is set). - subset: Subset of data (`"training"` or `"validation"`) if - validation_split is set in ImageDataGenerator. - interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. - Supported methods are "nearest", "bilinear", and "bicubic". - If PIL version 1.1.3 or newer is installed, "lanczos" is also - supported. If PIL version 3.4.0 or newer is installed, "box" and - "hamming" are also supported. By default, "nearest" is used. - dtype: Dtype to use for generated arrays. - """ - - def __init__(self, directory, image_data_generator, - target_size=(256, 256), - color_mode='rgb', - classes=None, - class_mode='categorical', - batch_size=32, - shuffle=True, - seed=None, - data_format=None, - save_to_dir=None, - save_prefix='', - save_format='png', - follow_links=False, - subset=None, - interpolation='nearest', - dtype=None): - if data_format is None: - data_format = backend.image_data_format() - kwargs = {} - if 'dtype' in generic_utils.getargspec( - image.ImageDataGenerator.__init__).args: - if dtype is None: - dtype = backend.floatx() - kwargs['dtype'] = dtype - super(DirectoryIterator, self).__init__( - directory, image_data_generator, - target_size=target_size, - color_mode=color_mode, - classes=classes, - class_mode=class_mode, - batch_size=batch_size, - shuffle=shuffle, - seed=seed, - data_format=data_format, - save_to_dir=save_to_dir, - save_prefix=save_prefix, - save_format=save_format, - follow_links=follow_links, - subset=subset, - interpolation=interpolation, - **kwargs) - - -class NumpyArrayIterator(image.NumpyArrayIterator, Iterator): - """Iterator yielding data from a Numpy array. - - # Arguments - x: Numpy array of input data or tuple. - If tuple, the second elements is either - another numpy array or a list of numpy arrays, - each of which gets passed - through as an output without any modifications. - y: Numpy array of targets data. - image_data_generator: Instance of `ImageDataGenerator` - to use for random transformations and normalization. - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - sample_weight: Numpy array of sample weights. - seed: Random seed for data shuffling. - data_format: String, one of `channels_first`, `channels_last`. - save_to_dir: Optional directory where to save the pictures - being yielded, in a viewable format. This is useful - for visualizing the random transformations being - applied, for debugging purposes. - save_prefix: String prefix to use for saving sample - images (if `save_to_dir` is set). - save_format: Format to use for saving sample images - (if `save_to_dir` is set). - subset: Subset of data (`"training"` or `"validation"`) if - validation_split is set in ImageDataGenerator. - dtype: Dtype to use for the generated arrays. - """ - - def __init__(self, x, y, image_data_generator, - batch_size=32, - shuffle=False, - sample_weight=None, - seed=None, - data_format=None, - save_to_dir=None, - save_prefix='', - save_format='png', - subset=None, - dtype=None): - if data_format is None: - data_format = backend.image_data_format() - kwargs = {} - if 'dtype' in generic_utils.getargspec( - image.NumpyArrayIterator.__init__).args: - if dtype is None: - dtype = backend.floatx() - kwargs['dtype'] = dtype - super(NumpyArrayIterator, self).__init__( - x, y, image_data_generator, - batch_size=batch_size, - shuffle=shuffle, - sample_weight=sample_weight, - seed=seed, - data_format=data_format, - save_to_dir=save_to_dir, - save_prefix=save_prefix, - save_format=save_format, - subset=subset, - **kwargs) - - -class ImageDataGenerator(image.ImageDataGenerator): - """Generate batches of tensor image data with real-time data augmentation. - The data will be looped over (in batches). - - # Arguments - featurewise_center: Boolean. - Set input mean to 0 over the dataset, feature-wise. - samplewise_center: Boolean. Set each sample mean to 0. - featurewise_std_normalization: Boolean. - Divide inputs by std of the dataset, feature-wise. - samplewise_std_normalization: Boolean. Divide each input by its std. - zca_epsilon: epsilon for ZCA whitening. Default is 1e-6. - zca_whitening: Boolean. Apply ZCA whitening. - rotation_range: Int. Degree range for random rotations. - width_shift_range: Float, 1-D array-like or int - - float: fraction of total width, if < 1, or pixels if >= 1. - - 1-D array-like: random elements from the array. - - int: integer number of pixels from interval - `(-width_shift_range, +width_shift_range)` - - With `width_shift_range=2` possible values - are integers `[-1, 0, +1]`, - same as with `width_shift_range=[-1, 0, +1]`, - while with `width_shift_range=1.0` possible values are floats - in the half-open interval `[-1.0, +1.0[`. - height_shift_range: Float, 1-D array-like or int - - float: fraction of total height, if < 1, or pixels if >= 1. - - 1-D array-like: random elements from the array. - - int: integer number of pixels from interval - `(-height_shift_range, +height_shift_range)` - - With `height_shift_range=2` possible values - are integers `[-1, 0, +1]`, - same as with `height_shift_range=[-1, 0, +1]`, - while with `height_shift_range=1.0` possible values are floats - in the half-open interval `[-1.0, +1.0[`. - brightness_range: Tuple or list of two floats. Range for picking - a brightness shift value from. - shear_range: Float. Shear Intensity - (Shear angle in counter-clockwise direction in degrees) - zoom_range: Float or [lower, upper]. Range for random zoom. - If a float, `[lower, upper] = [1-zoom_range, 1+zoom_range]`. - channel_shift_range: Float. Range for random channel shifts. - fill_mode: One of {"constant", "nearest", "reflect" or "wrap"}. - Default is 'nearest'. - Points outside the boundaries of the input are filled - according to the given mode: - - 'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k) - - 'nearest': aaaaaaaa|abcd|dddddddd - - 'reflect': abcddcba|abcd|dcbaabcd - - 'wrap': abcdabcd|abcd|abcdabcd - cval: Float or Int. - Value used for points outside the boundaries - when `fill_mode = "constant"`. - horizontal_flip: Boolean. Randomly flip inputs horizontally. - vertical_flip: Boolean. Randomly flip inputs vertically. - rescale: rescaling factor. Defaults to None. - If None or 0, no rescaling is applied, - otherwise we multiply the data by the value provided - (after applying all other transformations). - preprocessing_function: function that will be implied on each input. - The function will run after the image is resized and augmented. - The function should take one argument: - one image (Numpy tensor with rank 3), - and should output a Numpy tensor with the same shape. - data_format: Image data format, - either "channels_first" or "channels_last". - "channels_last" mode means that the images should have shape - `(samples, height, width, channels)`, - "channels_first" mode means that the images should have shape - `(samples, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - validation_split: Float. Fraction of images reserved for validation - (strictly between 0 and 1). - dtype: Dtype to use for the generated arrays. - - # Examples - Example of using `.flow(x, y)`: - - ```python - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - y_train = np_utils.to_categorical(y_train, num_classes) - y_test = np_utils.to_categorical(y_test, num_classes) - - datagen = ImageDataGenerator( - featurewise_center=True, - featurewise_std_normalization=True, - rotation_range=20, - width_shift_range=0.2, - height_shift_range=0.2, - horizontal_flip=True) - - # compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied) - datagen.fit(x_train) - - # fits the model on batches with real-time data augmentation: - model.fit_generator(datagen.flow(x_train, y_train, batch_size=32), - steps_per_epoch=len(x_train) / 32, epochs=epochs) - - # here's a more "manual" example - for e in range(epochs): - print('Epoch', e) - batches = 0 - for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=32): - model.fit(x_batch, y_batch) - batches += 1 - if batches >= len(x_train) / 32: - # we need to break the loop by hand because - # the generator loops indefinitely - break - ``` - Example of using `.flow_from_directory(directory)`: - - ```python - train_datagen = ImageDataGenerator( - rescale=1./255, - shear_range=0.2, - zoom_range=0.2, - horizontal_flip=True) - - test_datagen = ImageDataGenerator(rescale=1./255) - - train_generator = train_datagen.flow_from_directory( - 'data/train', - target_size=(150, 150), - batch_size=32, - class_mode='binary') - - validation_generator = test_datagen.flow_from_directory( - 'data/validation', - target_size=(150, 150), - batch_size=32, - class_mode='binary') - - model.fit_generator( - train_generator, - steps_per_epoch=2000, - epochs=50, - validation_data=validation_generator, - validation_steps=800) - ``` - - Example of transforming images and masks together. - - ```python - # we create two instances with the same arguments - data_gen_args = dict(featurewise_center=True, - featurewise_std_normalization=True, - rotation_range=90, - width_shift_range=0.1, - height_shift_range=0.1, - zoom_range=0.2) - image_datagen = ImageDataGenerator(**data_gen_args) - mask_datagen = ImageDataGenerator(**data_gen_args) - - # Provide the same seed and keyword arguments to the fit and flow methods - seed = 1 - image_datagen.fit(images, augment=True, seed=seed) - mask_datagen.fit(masks, augment=True, seed=seed) - - image_generator = image_datagen.flow_from_directory( - 'data/images', - class_mode=None, - seed=seed) - - mask_generator = mask_datagen.flow_from_directory( - 'data/masks', - class_mode=None, - seed=seed) - - # combine generators into one which yields image and masks - train_generator = zip(image_generator, mask_generator) - - model.fit_generator( - train_generator, - steps_per_epoch=2000, - epochs=50) - ``` - """ - - def __init__(self, - featurewise_center=False, - samplewise_center=False, - featurewise_std_normalization=False, - samplewise_std_normalization=False, - zca_whitening=False, - zca_epsilon=1e-6, - rotation_range=0, - width_shift_range=0., - height_shift_range=0., - brightness_range=None, - shear_range=0., - zoom_range=0., - channel_shift_range=0., - fill_mode='nearest', - cval=0., - horizontal_flip=False, - vertical_flip=False, - rescale=None, - preprocessing_function=None, - data_format=None, - validation_split=0.0, - dtype=None): - if data_format is None: - data_format = backend.image_data_format() - kwargs = {} - if 'dtype' in generic_utils.getargspec( - image.ImageDataGenerator.__init__).args: - if dtype is None: - dtype = backend.floatx() - kwargs['dtype'] = dtype - super(ImageDataGenerator, self).__init__( - featurewise_center=featurewise_center, - samplewise_center=samplewise_center, - featurewise_std_normalization=featurewise_std_normalization, - samplewise_std_normalization=samplewise_std_normalization, - zca_whitening=zca_whitening, - zca_epsilon=zca_epsilon, - rotation_range=rotation_range, - width_shift_range=width_shift_range, - height_shift_range=height_shift_range, - brightness_range=brightness_range, - shear_range=shear_range, - zoom_range=zoom_range, - channel_shift_range=channel_shift_range, - fill_mode=fill_mode, - cval=cval, - horizontal_flip=horizontal_flip, - vertical_flip=vertical_flip, - rescale=rescale, - preprocessing_function=preprocessing_function, - data_format=data_format, - validation_split=validation_split, - **kwargs) - - -array_to_img.__doc__ = image.array_to_img.__doc__ -img_to_array.__doc__ = image.img_to_array.__doc__ -save_img.__doc__ = image.save_img.__doc__ -"""Utilities for preprocessing sequence data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_preprocessing import sequence -from .. import utils - -pad_sequences = sequence.pad_sequences -make_sampling_table = sequence.make_sampling_table -skipgrams = sequence.skipgrams -_remove_long_seq = sequence._remove_long_seq # TODO: make it public? - - -class TimeseriesGenerator(sequence.TimeseriesGenerator, utils.Sequence): - """Utility class for generating batches of temporal data. - - This class takes in a sequence of data-points gathered at - equal intervals, along with time series parameters such as - stride, length of history, etc., to produce batches for - training/validation. - - # Arguments - data: Indexable generator (such as list or Numpy array) - containing consecutive data points (timesteps). - The data should be at 2D, and axis 0 is expected - to be the time dimension. - targets: Targets corresponding to timesteps in `data`. - It should have same length as `data`. - length: Length of the output sequences (in number of timesteps). - sampling_rate: Period between successive individual timesteps - within sequences. For rate `r`, timesteps - `data[i]`, `data[i-r]`, ... `data[i - length]` - are used for create a sample sequence. - stride: Period between successive output sequences. - For stride `s`, consecutive output samples would - be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc. - start_index: Data points earlier than `start_index` will not be used - in the output sequences. This is useful to reserve part of the - data for test or validation. - end_index: Data points later than `end_index` will not be used - in the output sequences. This is useful to reserve part of the - data for test or validation. - shuffle: Whether to shuffle output samples, - or instead draw them in chronological order. - reverse: Boolean: if `true`, timesteps in each output sample will be - in reverse chronological order. - batch_size: Number of timeseries samples in each batch - (except maybe the last one). - - # Returns - A [Sequence](/utils/#sequence) instance. - - # Examples - - ```python - from keras.preprocessing.sequence import TimeseriesGenerator - import numpy as np - - data = np.array([[i] for i in range(50)]) - targets = np.array([[i] for i in range(50)]) - - data_gen = TimeseriesGenerator(data, targets, - length=10, sampling_rate=2, - batch_size=2) - assert len(data_gen) == 20 - - batch_0 = data_gen[0] - x, y = batch_0 - assert np.array_equal(x, - np.array([[[0], [2], [4], [6], [8]], - [[1], [3], [5], [7], [9]]])) - assert np.array_equal(y, - np.array([[10], [11]])) - ``` - """ - pass -"""Utilities for text input preprocessing. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras_preprocessing import text - -text_to_word_sequence = text.text_to_word_sequence -one_hot = text.one_hot -hashing_trick = text.hashing_trick -Tokenizer = text.Tokenizer -tokenizer_from_json = text.tokenizer_from_json -from __future__ import absolute_import -from . import np_utils -from . import generic_utils -from . import data_utils -from . import io_utils -from . import conv_utils - -# Globally-importable utils. -from .io_utils import HDF5Matrix -from .io_utils import H5Dict -from .data_utils import get_file -from .data_utils import Sequence -from .data_utils import GeneratorEnqueuer -from .data_utils import OrderedEnqueuer -from .generic_utils import CustomObjectScope -from .generic_utils import custom_object_scope -from .generic_utils import get_custom_objects -from .generic_utils import serialize_keras_object -from .generic_utils import deserialize_keras_object -from .generic_utils import Progbar -from .layer_utils import convert_all_kernels_in_model -from .layer_utils import get_source_inputs -from .layer_utils import print_summary -from .vis_utils import model_to_dot -from .vis_utils import plot_model -from .np_utils import to_categorical -from .np_utils import normalize -from .multi_gpu_utils import multi_gpu_model -"""Utilities used in convolutional layers. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from six.moves import range -import numpy as np -from .. import backend as K - - -def normalize_tuple(value, n, name): - """Transforms a single int or iterable of ints into an int tuple. - - # Arguments - value: The value to validate and convert. Could be an int, or any iterable - of ints. - n: The size of the tuple to be returned. - name: The name of the argument being validated, e.g. `strides` or - `kernel_size`. This is only used to format error messages. - - # Returns - A tuple of n integers. - - # Raises - ValueError: If something else than an int/long or iterable thereof was - passed. - """ - if isinstance(value, int): - return (value,) * n - else: - try: - value_tuple = tuple(value) - except TypeError: - raise ValueError('The `' + name + '` argument must be a tuple of ' + - str(n) + ' integers. Received: ' + str(value)) - if len(value_tuple) != n: - raise ValueError('The `' + name + '` argument must be a tuple of ' + - str(n) + ' integers. Received: ' + str(value)) - for single_value in value_tuple: - try: - int(single_value) - except ValueError: - raise ValueError('The `' + name + '` argument must be a tuple of ' + - str(n) + ' integers. Received: ' + - str(value) + ' ' - 'including element ' + - str(single_value) + ' of ' - 'type ' + str(type(single_value))) - return value_tuple - - -def normalize_padding(value): - padding = value.lower() - allowed = {'valid', 'same', 'causal'} - if K.backend() == 'theano': - allowed.add('full') - if padding not in allowed: - raise ValueError('The `padding` argument must be one of "valid", "same" ' - '(or "causal" for Conv1D). Received: ' + str(padding)) - return padding - - -def convert_kernel(kernel): - """Converts a Numpy kernel matrix from Theano format to TensorFlow format. - - Also works reciprocally, since the transformation is its own inverse. - - # Arguments - kernel: Numpy array (3D, 4D or 5D). - - # Returns - The converted kernel. - - # Raises - ValueError: in case of invalid kernel shape or invalid data_format. - """ - kernel = np.asarray(kernel) - if not 3 <= kernel.ndim <= 5: - raise ValueError('Invalid kernel shape:', kernel.shape) - slices = [slice(None, None, -1) for _ in range(kernel.ndim)] - no_flip = (slice(None, None), slice(None, None)) - slices[-2:] = no_flip - return np.copy(kernel[tuple(slices)]) - - -def conv_output_length(input_length, filter_size, - padding, stride, dilation=1): - """Determines output length of a convolution given input length. - - # Arguments - input_length: integer. - filter_size: integer. - padding: one of `"same"`, `"valid"`, `"full"`. - stride: integer. - dilation: dilation rate, integer. - - # Returns - The output length (integer). - """ - if input_length is None: - return None - assert padding in {'same', 'valid', 'full', 'causal'} - dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1) - if padding == 'same': - output_length = input_length - elif padding == 'valid': - output_length = input_length - dilated_filter_size + 1 - elif padding == 'causal': - output_length = input_length - elif padding == 'full': - output_length = input_length + dilated_filter_size - 1 - return (output_length + stride - 1) // stride - - -def conv_input_length(output_length, filter_size, padding, stride): - """Determines input length of a convolution given output length. - - # Arguments - output_length: integer. - filter_size: integer. - padding: one of `"same"`, `"valid"`, `"full"`. - stride: integer. - - # Returns - The input length (integer). - """ - if output_length is None: - return None - assert padding in {'same', 'valid', 'full'} - if padding == 'same': - pad = filter_size // 2 - elif padding == 'valid': - pad = 0 - elif padding == 'full': - pad = filter_size - 1 - return (output_length - 1) * stride - 2 * pad + filter_size - - -def deconv_length(dim_size, stride_size, kernel_size, padding, - output_padding, dilation=1): - """Determines output length of a transposed convolution given input length. - - # Arguments - dim_size: Integer, the input length. - stride_size: Integer, the stride along the dimension of `dim_size`. - kernel_size: Integer, the kernel size along the dimension of - `dim_size`. - padding: One of `"same"`, `"valid"`, `"full"`. - output_padding: Integer, amount of padding along the output dimension, - Can be set to `None` in which case the output length is inferred. - dilation: dilation rate, integer. - - # Returns - The output length (integer). - """ - assert padding in {'same', 'valid', 'full'} - if dim_size is None: - return None - - # Get the dilated kernel size - kernel_size = kernel_size + (kernel_size - 1) * (dilation - 1) - - # Infer length if output padding is None, else compute the exact length - if output_padding is None: - if padding == 'valid': - dim_size = dim_size * stride_size + \ - max(kernel_size - stride_size, 0) - elif padding == 'full': - dim_size = dim_size * stride_size - (stride_size + kernel_size - 2) - elif padding == 'same': - dim_size = dim_size * stride_size - else: - if padding == 'same': - pad = kernel_size // 2 - elif padding == 'valid': - pad = 0 - elif padding == 'full': - pad = kernel_size - 1 - - dim_size = ((dim_size - 1) * stride_size + kernel_size - 2 * pad + - output_padding) - - return dim_size -"""Utilities for file download and caching.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import hashlib -import multiprocessing as mp -import os -import random -import shutil -import sys -import tarfile -import threading -import time -import warnings -import zipfile -from abc import abstractmethod -from contextlib import closing -from multiprocessing.pool import ThreadPool - -import numpy as np -import six -from six.moves.urllib.error import HTTPError -from six.moves.urllib.error import URLError -from six.moves.urllib.request import urlopen - -try: - import queue -except ImportError: - import Queue as queue - -from ..utils.generic_utils import Progbar - -if sys.version_info[0] == 2: - def urlretrieve(url, filename, reporthook=None, data=None): - """Replacement for `urlretrive` for Python 2. - - Under Python 2, `urlretrieve` relies on `FancyURLopener` from legacy - `urllib` module, known to have issues with proxy management. - - # Arguments - url: url to retrieve. - filename: where to store the retrieved data locally. - reporthook: a hook function that will be called once - on establishment of the network connection and once - after each block read thereafter. - The hook will be passed three arguments; - a count of blocks transferred so far, - a block size in bytes, and the total size of the file. - data: `data` argument passed to `urlopen`. - """ - - def chunk_read(response, chunk_size=8192, reporthook=None): - content_type = response.info().get('Content-Length') - total_size = -1 - if content_type is not None: - total_size = int(content_type.strip()) - count = 0 - while True: - chunk = response.read(chunk_size) - count += 1 - if reporthook is not None: - reporthook(count, chunk_size, total_size) - if chunk: - yield chunk - else: - break - - with closing(urlopen(url, data)) as response, open(filename, 'wb') as fd: - for chunk in chunk_read(response, reporthook=reporthook): - fd.write(chunk) -else: - from six.moves.urllib.request import urlretrieve - - -def _extract_archive(file_path, path='.', archive_format='auto'): - """Extracts an archive if it matches tar, tar.gz, tar.bz, or zip formats. - - # Arguments - file_path: path to the archive file - path: path to extract the archive file - archive_format: Archive format to try for extracting the file. - Options are 'auto', 'tar', 'zip', and None. - 'tar' includes tar, tar.gz, and tar.bz files. - The default 'auto' is ['tar', 'zip']. - None or an empty list will return no matches found. - - # Returns - True if a match was found and an archive extraction was completed, - False otherwise. - """ - if archive_format is None: - return False - if archive_format == 'auto': - archive_format = ['tar', 'zip'] - if isinstance(archive_format, six.string_types): - archive_format = [archive_format] - - for archive_type in archive_format: - if archive_type == 'tar': - open_fn = tarfile.open - is_match_fn = tarfile.is_tarfile - if archive_type == 'zip': - open_fn = zipfile.ZipFile - is_match_fn = zipfile.is_zipfile - - if is_match_fn(file_path): - with open_fn(file_path) as archive: - try: - archive.extractall(path) - except (tarfile.TarError, RuntimeError, - KeyboardInterrupt): - if os.path.exists(path): - if os.path.isfile(path): - os.remove(path) - else: - shutil.rmtree(path) - raise - return True - return False - - -def get_file(fname, - origin, - untar=False, - md5_hash=None, - file_hash=None, - cache_subdir='datasets', - hash_algorithm='auto', - extract=False, - archive_format='auto', - cache_dir=None): - """Downloads a file from a URL if it not already in the cache. - - By default the file at the url `origin` is downloaded to the - cache_dir `~/.keras`, placed in the cache_subdir `datasets`, - and given the filename `fname`. The final location of a file - `example.txt` would therefore be `~/.keras/datasets/example.txt`. - - Files in tar, tar.gz, tar.bz, and zip formats can also be extracted. - Passing a hash will verify the file after download. The command line - programs `shasum` and `sha256sum` can compute the hash. - - # Arguments - fname: Name of the file. If an absolute path `/path/to/file.txt` is - specified the file will be saved at that location. - origin: Original URL of the file. - untar: Deprecated in favor of 'extract'. - boolean, whether the file should be decompressed - md5_hash: Deprecated in favor of 'file_hash'. - md5 hash of the file for verification - file_hash: The expected hash string of the file after download. - The sha256 and md5 hash algorithms are both supported. - cache_subdir: Subdirectory under the Keras cache dir where the file is - saved. If an absolute path `/path/to/folder` is - specified the file will be saved at that location. - hash_algorithm: Select the hash algorithm to verify the file. - options are 'md5', 'sha256', and 'auto'. - The default 'auto' detects the hash algorithm in use. - extract: True tries extracting the file as an Archive, like tar or zip. - archive_format: Archive format to try for extracting the file. - Options are 'auto', 'tar', 'zip', and None. - 'tar' includes tar, tar.gz, and tar.bz files. - The default 'auto' is ['tar', 'zip']. - None or an empty list will return no matches found. - cache_dir: Location to store cached files, when None it - defaults to the [Keras Directory](/faq/#where-is-the-keras-configuration-filed-stored). - - # Returns - Path to the downloaded file - """ # noqa - if cache_dir is None: - if 'KERAS_HOME' in os.environ: - cache_dir = os.environ.get('KERAS_HOME') - else: - cache_dir = os.path.join(os.path.expanduser('~'), '.keras') - if md5_hash is not None and file_hash is None: - file_hash = md5_hash - hash_algorithm = 'md5' - datadir_base = os.path.expanduser(cache_dir) - if not os.access(datadir_base, os.W_OK): - datadir_base = os.path.join('/tmp', '.keras') - datadir = os.path.join(datadir_base, cache_subdir) - if not os.path.exists(datadir): - os.makedirs(datadir) - - if untar: - untar_fpath = os.path.join(datadir, fname) - fpath = untar_fpath + '.tar.gz' - else: - fpath = os.path.join(datadir, fname) - - download = False - if os.path.exists(fpath): - # File found; verify integrity if a hash was provided. - if file_hash is not None: - if not validate_file(fpath, file_hash, algorithm=hash_algorithm): - print('A local file was found, but it seems to be ' - 'incomplete or outdated because the ' + hash_algorithm + - ' file hash does not match the original value of ' + - file_hash + ' so we will re-download the data.') - download = True - else: - download = True - - if download: - print('Downloading data from', origin) - - class ProgressTracker(object): - # Maintain progbar for the lifetime of download. - # This design was chosen for Python 2.7 compatibility. - progbar = None - - def dl_progress(count, block_size, total_size): - if ProgressTracker.progbar is None: - if total_size == -1: - total_size = None - ProgressTracker.progbar = Progbar(total_size) - else: - ProgressTracker.progbar.update(count * block_size) - - error_msg = 'URL fetch failure on {} : {} -- {}' - try: - try: - urlretrieve(origin, fpath, dl_progress) - except HTTPError as e: - raise Exception(error_msg.format(origin, e.code, e.msg)) - except URLError as e: - raise Exception(error_msg.format(origin, e.errno, e.reason)) - except (Exception, KeyboardInterrupt): - if os.path.exists(fpath): - os.remove(fpath) - raise - ProgressTracker.progbar = None - - if untar: - if not os.path.exists(untar_fpath): - _extract_archive(fpath, datadir, archive_format='tar') - return untar_fpath - - if extract: - _extract_archive(fpath, datadir, archive_format) - - return fpath - - -def _hash_file(fpath, algorithm='sha256', chunk_size=65535): - """Calculates a file sha256 or md5 hash. - - # Example - - ```python - >>> from keras.data_utils import _hash_file - >>> _hash_file('/path/to/file.zip') - 'e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855' - ``` - - # Arguments - fpath: path to the file being validated - algorithm: hash algorithm, one of 'auto', 'sha256', or 'md5'. - The default 'auto' detects the hash algorithm in use. - chunk_size: Bytes to read at a time, important for large files. - - # Returns - The file hash - """ - if (algorithm == 'sha256') or (algorithm == 'auto' and len(hash) == 64): - hasher = hashlib.sha256() - else: - hasher = hashlib.md5() - - with open(fpath, 'rb') as fpath_file: - for chunk in iter(lambda: fpath_file.read(chunk_size), b''): - hasher.update(chunk) - - return hasher.hexdigest() - - -def validate_file(fpath, file_hash, algorithm='auto', chunk_size=65535): - """Validates a file against a sha256 or md5 hash. - - # Arguments - fpath: path to the file being validated - file_hash: The expected hash string of the file. - The sha256 and md5 hash algorithms are both supported. - algorithm: Hash algorithm, one of 'auto', 'sha256', or 'md5'. - The default 'auto' detects the hash algorithm in use. - chunk_size: Bytes to read at a time, important for large files. - - # Returns - Whether the file is valid - """ - if ((algorithm == 'sha256') or - (algorithm == 'auto' and len(file_hash) == 64)): - hasher = 'sha256' - else: - hasher = 'md5' - - if str(_hash_file(fpath, hasher, chunk_size)) == str(file_hash): - return True - else: - return False - - -class Sequence(object): - """Base object for fitting to a sequence of data, such as a dataset. - - Every `Sequence` must implement the `__getitem__` and the `__len__` methods. - If you want to modify your dataset between epochs you may implement - `on_epoch_end`. The method `__getitem__` should return a complete batch. - - # Notes - - `Sequence` are a safer way to do multiprocessing. This structure guarantees - that the network will only train once on each sample per epoch which is not - the case with generators. - - # Examples - - ```python - from skimage.io import imread - from skimage.transform import resize - import numpy as np - - # Here, `x_set` is list of path to the images - # and `y_set` are the associated classes. - - class CIFAR10Sequence(Sequence): - - def __init__(self, x_set, y_set, batch_size): - self.x, self.y = x_set, y_set - self.batch_size = batch_size - - def __len__(self): - return int(np.ceil(len(self.x) / float(self.batch_size))) - - def __getitem__(self, idx): - batch_x = self.x[idx * self.batch_size:(idx + 1) * self.batch_size] - batch_y = self.y[idx * self.batch_size:(idx + 1) * self.batch_size] - - return np.array([ - resize(imread(file_name), (200, 200)) - for file_name in batch_x]), np.array(batch_y) - ``` - """ - - use_sequence_api = True - - @abstractmethod - def __getitem__(self, index): - """Gets batch at position `index`. - - # Arguments - index: position of the batch in the Sequence. - - # Returns - A batch - """ - raise NotImplementedError - - @abstractmethod - def __len__(self): - """Number of batch in the Sequence. - - # Returns - The number of batches in the Sequence. - """ - raise NotImplementedError - - def on_epoch_end(self): - """Method called at the end of every epoch. - """ - pass - - def __iter__(self): - """Create a generator that iterate over the Sequence.""" - for item in (self[i] for i in range(len(self))): - yield item - - -# Global variables to be shared across processes -_SHARED_SEQUENCES = {} -# We use a Value to provide unique id to different processes. -_SEQUENCE_COUNTER = None - - -def init_pool(seqs): - global _SHARED_SEQUENCES - _SHARED_SEQUENCES = seqs - - -def get_index(uid, i): - """Get the value from the Sequence `uid` at index `i`. - - To allow multiple Sequences to be used at the same time, we use `uid` to - get a specific one. A single Sequence would cause the validation to - overwrite the training Sequence. - - # Arguments - uid: int, Sequence identifier - i: index - - # Returns - The value at index `i`. - """ - return _SHARED_SEQUENCES[uid][i] - - -class SequenceEnqueuer(object): - """Base class to enqueue inputs. - - The task of an Enqueuer is to use parallelism to speed up preprocessing. - This is done with processes or threads. - - # Examples - - ```python - enqueuer = SequenceEnqueuer(...) - enqueuer.start() - datas = enqueuer.get() - for data in datas: - # Use the inputs; training, evaluating, predicting. - # ... stop sometime. - enqueuer.close() - ``` - - The `enqueuer.get()` should be an infinite stream of datas. - - """ - - def __init__(self, sequence, - use_multiprocessing=False): - self.sequence = sequence - self.use_multiprocessing = use_multiprocessing - - global _SEQUENCE_COUNTER - if _SEQUENCE_COUNTER is None: - try: - _SEQUENCE_COUNTER = mp.Value('i', 0) - except OSError: - # In this case the OS does not allow us to use - # multiprocessing. We resort to an int - # for enqueuer indexing. - _SEQUENCE_COUNTER = 0 - - if isinstance(_SEQUENCE_COUNTER, int): - self.uid = _SEQUENCE_COUNTER - _SEQUENCE_COUNTER += 1 - else: - # Doing Multiprocessing.Value += x is not process-safe. - with _SEQUENCE_COUNTER.get_lock(): - self.uid = _SEQUENCE_COUNTER.value - _SEQUENCE_COUNTER.value += 1 - - self.workers = 0 - self.executor_fn = None - self.queue = None - self.run_thread = None - self.stop_signal = None - - def is_running(self): - return self.stop_signal is not None and not self.stop_signal.is_set() - - def start(self, workers=1, max_queue_size=10): - """Start the handler's workers. - - # Arguments - workers: number of worker threads - max_queue_size: queue size - (when full, workers could block on `put()`) - """ - if self.use_multiprocessing: - self.executor_fn = self._get_executor_init(workers) - else: - # We do not need the init since it's threads. - self.executor_fn = lambda _: ThreadPool(workers) - self.workers = workers - self.queue = queue.Queue(max_queue_size) - self.stop_signal = threading.Event() - self.run_thread = threading.Thread(target=self._run) - self.run_thread.daemon = True - self.run_thread.start() - - def _send_sequence(self): - """Send current Iterable to all workers.""" - # For new processes that may spawn - _SHARED_SEQUENCES[self.uid] = self.sequence - - def stop(self, timeout=None): - """Stops running threads and wait for them to exit, if necessary. - - Should be called by the same thread which called `start()`. - - # Arguments - timeout: maximum time to wait on `thread.join()` - """ - self.stop_signal.set() - with self.queue.mutex: - self.queue.queue.clear() - self.queue.unfinished_tasks = 0 - self.queue.not_full.notify() - self.run_thread.join(timeout) - _SHARED_SEQUENCES[self.uid] = None - - @abstractmethod - def _run(self): - """Submits request to the executor and queue the `Future` objects.""" - raise NotImplementedError - - @abstractmethod - def _get_executor_init(self, workers): - """Get the Pool initializer for multiprocessing. - - # Returns - Function, a Function to initialize the pool - """ - raise NotImplementedError - - @abstractmethod - def get(self): - """Creates a generator to extract data from the queue. - - Skip the data if it is `None`. - - # Returns - Generator yielding tuples `(inputs, targets)` - or `(inputs, targets, sample_weights)`. - """ - raise NotImplementedError - - -class OrderedEnqueuer(SequenceEnqueuer): - """Builds a Enqueuer from a Sequence. - - Used in `fit_generator`, `evaluate_generator`, `predict_generator`. - - # Arguments - sequence: A `keras.utils.data_utils.Sequence` object. - use_multiprocessing: use multiprocessing if True, otherwise threading - shuffle: whether to shuffle the data at the beginning of each epoch - """ - - def __init__(self, sequence, use_multiprocessing=False, shuffle=False): - super(OrderedEnqueuer, self).__init__(sequence, use_multiprocessing) - self.shuffle = shuffle - - def _get_executor_init(self, workers): - """Get the Pool initializer for multiprocessing. - - # Returns - Function, a Function to initialize the pool - """ - return lambda seqs: mp.Pool(workers, - initializer=init_pool, - initargs=(seqs,)) - - def _wait_queue(self): - """Wait for the queue to be empty.""" - while True: - time.sleep(0.1) - if self.queue.unfinished_tasks == 0 or self.stop_signal.is_set(): - return - - def _run(self): - """Submits request to the executor and queue the `Future` objects.""" - sequence = list(range(len(self.sequence))) - self._send_sequence() # Share the initial sequence - while True: - if self.shuffle: - random.shuffle(sequence) - - with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: - for i in sequence: - if self.stop_signal.is_set(): - return - future = executor.apply_async(get_index, (self.uid, i)) - future.idx = i - self.queue.put(future, block=True) - - # Done with the current epoch, waiting for the final batches - self._wait_queue() - - if self.stop_signal.is_set(): - # We're done - return - - # Call the internal on epoch end. - self.sequence.on_epoch_end() - self._send_sequence() # Update the pool - - def get(self): - """Creates a generator to extract data from the queue. - - Skip the data if it is `None`. - - # Yields - The next element in the queue, i.e. a tuple - `(inputs, targets)` or - `(inputs, targets, sample_weights)`. - """ - try: - while self.is_running(): - try: - future = self.queue.get(block=True) - inputs = future.get(timeout=30) - self.queue.task_done() - except mp.TimeoutError: - idx = future.idx - warnings.warn( - 'The input {} could not be retrieved.' - ' It could be because a worker has died.'.format(idx), - UserWarning) - inputs = self.sequence[idx] - if inputs is not None: - yield inputs - except Exception: - self.stop() - six.reraise(*sys.exc_info()) - - -def init_pool_generator(gens, random_seed=None): - global _SHARED_SEQUENCES - _SHARED_SEQUENCES = gens - - if random_seed is not None: - ident = mp.current_process().ident - np.random.seed(random_seed + ident) - - -def next_sample(uid): - """Get the next value from the generator `uid`. - - To allow multiple generators to be used at the same time, we use `uid` to - get a specific one. A single generator would cause the validation to - overwrite the training generator. - - # Arguments - uid: int, generator identifier - - # Returns - The next value of generator `uid`. - """ - return six.next(_SHARED_SEQUENCES[uid]) - - -class GeneratorEnqueuer(SequenceEnqueuer): - """Builds a queue out of a data generator. - - The provided generator can be finite in which case the class will throw - a `StopIteration` exception. - - Used in `fit_generator`, `evaluate_generator`, `predict_generator`. - - # Arguments - sequence: a sequence function which yields data - use_multiprocessing: use multiprocessing if True, otherwise threading - wait_time: time to sleep in-between calls to `put()` - random_seed: Initial seed for workers, - will be incremented by one for each worker. - """ - - def __init__(self, sequence, use_multiprocessing=False, wait_time=None, - random_seed=None): - super(GeneratorEnqueuer, self).__init__(sequence, use_multiprocessing) - self.random_seed = random_seed - if wait_time is not None: - warnings.warn('`wait_time` is not used anymore.', - DeprecationWarning) - - def _get_executor_init(self, workers): - """Get the Pool initializer for multiprocessing. - - # Returns - Function, a Function to initialize the pool - """ - return lambda seqs: mp.Pool(workers, - initializer=init_pool_generator, - initargs=(seqs, self.random_seed)) - - def _run(self): - """Submits request to the executor and queue the `Future` objects.""" - self._send_sequence() # Share the initial generator - with closing(self.executor_fn(_SHARED_SEQUENCES)) as executor: - while True: - if self.stop_signal.is_set(): - return - self.queue.put( - executor.apply_async(next_sample, (self.uid,)), block=True) - - def get(self): - """Creates a generator to extract data from the queue. - - Skip the data if it is `None`. - - # Yields - The next element in the queue, i.e. a tuple - `(inputs, targets)` or - `(inputs, targets, sample_weights)`. - """ - try: - while self.is_running(): - try: - future = self.queue.get(block=True) - inputs = future.get(timeout=30) - self.queue.task_done() - except mp.TimeoutError: - warnings.warn( - 'An input could not be retrieved.' - ' It could be because a worker has died.' - 'We do not have any information on the lost sample.', - UserWarning) - continue - if inputs is not None: - yield inputs - except StopIteration: - # Special case for finite generators - last_ones = [] - while self.queue.qsize() > 0: - last_ones.append(self.queue.get(block=True)) - # Wait for them to complete - list(map(lambda f: f.wait(), last_ones)) - # Keep the good ones - last_ones = [future.get() - for future in last_ones if future.successful()] - for inputs in last_ones: - if inputs is not None: - yield inputs - except Exception as e: - self.stop() - if 'generator already executing' in str(e): - raise RuntimeError( - "Your generator is NOT thread-safe." - "Keras requires a thread-safe generator when" - "`use_multiprocessing=False, workers > 1`." - "For more information see issue #1638.") - six.reraise(*sys.exc_info()) -"""Python utilities required by Keras.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import binascii -import numpy as np - -import time -import sys -import six -import marshal -import types as python_types -import inspect -import codecs -import collections - -_GLOBAL_CUSTOM_OBJECTS = {} - - -class CustomObjectScope(object): - """Provides a scope that changes to `_GLOBAL_CUSTOM_OBJECTS` cannot escape. - - Code within a `with` statement will be able to access custom objects - by name. Changes to global custom objects persist - within the enclosing `with` statement. At end of the `with` statement, - global custom objects are reverted to state - at beginning of the `with` statement. - - # Example - - Consider a custom object `MyObject` (e.g. a class): - - ```python - with CustomObjectScope({'MyObject':MyObject}): - layer = Dense(..., kernel_regularizer='MyObject') - # save, load, etc. will recognize custom object by name - ``` - """ - - def __init__(self, *args): - self.custom_objects = args - self.backup = None - - def __enter__(self): - self.backup = _GLOBAL_CUSTOM_OBJECTS.copy() - for objects in self.custom_objects: - _GLOBAL_CUSTOM_OBJECTS.update(objects) - return self - - def __exit__(self, *args, **kwargs): - _GLOBAL_CUSTOM_OBJECTS.clear() - _GLOBAL_CUSTOM_OBJECTS.update(self.backup) - - -def custom_object_scope(*args): - """Provides a scope that changes to `_GLOBAL_CUSTOM_OBJECTS` cannot escape. - - Convenience wrapper for `CustomObjectScope`. - Code within a `with` statement will be able to access custom objects - by name. Changes to global custom objects persist - within the enclosing `with` statement. At end of the `with` statement, - global custom objects are reverted to state - at beginning of the `with` statement. - - # Example - - Consider a custom object `MyObject` - - ```python - with custom_object_scope({'MyObject':MyObject}): - layer = Dense(..., kernel_regularizer='MyObject') - # save, load, etc. will recognize custom object by name - ``` - - # Arguments - *args: Variable length list of dictionaries of name, - class pairs to add to custom objects. - - # Returns - Object of type `CustomObjectScope`. - """ - return CustomObjectScope(*args) - - -def get_custom_objects(): - """Retrieves a live reference to the global dictionary of custom objects. - - Updating and clearing custom objects using `custom_object_scope` - is preferred, but `get_custom_objects` can - be used to directly access `_GLOBAL_CUSTOM_OBJECTS`. - - # Example - - ```python - get_custom_objects().clear() - get_custom_objects()['MyObject'] = MyObject - ``` - - # Returns - Global dictionary of names to classes (`_GLOBAL_CUSTOM_OBJECTS`). - """ - return _GLOBAL_CUSTOM_OBJECTS - - -def serialize_keras_object(instance): - if instance is None: - return None - if hasattr(instance, 'get_config'): - return { - 'class_name': instance.__class__.__name__, - 'config': instance.get_config() - } - if hasattr(instance, '__name__'): - return instance.__name__ - else: - raise ValueError('Cannot serialize', instance) - - -def deserialize_keras_object(identifier, module_objects=None, - custom_objects=None, - printable_module_name='object'): - if identifier is None: - return None - if isinstance(identifier, dict): - # In this case we are dealing with a Keras config dictionary. - config = identifier - if 'class_name' not in config or 'config' not in config: - raise ValueError('Improper config format: ' + str(config)) - class_name = config['class_name'] - if custom_objects and class_name in custom_objects: - cls = custom_objects[class_name] - elif class_name in _GLOBAL_CUSTOM_OBJECTS: - cls = _GLOBAL_CUSTOM_OBJECTS[class_name] - else: - module_objects = module_objects or {} - cls = module_objects.get(class_name) - if cls is None: - raise ValueError('Unknown ' + printable_module_name + - ': ' + class_name) - if hasattr(cls, 'from_config'): - custom_objects = custom_objects or {} - if has_arg(cls.from_config, 'custom_objects'): - return cls.from_config( - config['config'], - custom_objects=dict(list(_GLOBAL_CUSTOM_OBJECTS.items()) + - list(custom_objects.items()))) - with CustomObjectScope(custom_objects): - return cls.from_config(config['config']) - else: - # Then `cls` may be a function returning a class. - # in this case by convention `config` holds - # the kwargs of the function. - custom_objects = custom_objects or {} - with CustomObjectScope(custom_objects): - return cls(**config['config']) - elif isinstance(identifier, six.string_types): - function_name = identifier - if custom_objects and function_name in custom_objects: - fn = custom_objects.get(function_name) - elif function_name in _GLOBAL_CUSTOM_OBJECTS: - fn = _GLOBAL_CUSTOM_OBJECTS[function_name] - else: - fn = module_objects.get(function_name) - if fn is None: - raise ValueError('Unknown ' + printable_module_name + - ':' + function_name) - return fn - else: - raise ValueError('Could not interpret serialized ' + - printable_module_name + ': ' + identifier) - - -def func_dump(func): - """Serializes a user defined function. - - # Arguments - func: the function to serialize. - - # Returns - A tuple `(code, defaults, closure)`. - """ - raw_code = marshal.dumps(func.__code__) - code = codecs.encode(raw_code, 'base64').decode('ascii') - defaults = func.__defaults__ - if func.__closure__: - closure = tuple(c.cell_contents for c in func.__closure__) - else: - closure = None - return code, defaults, closure - - -def func_load(code, defaults=None, closure=None, globs=None): - """Deserializes a user defined function. - - # Arguments - code: bytecode of the function. - defaults: defaults of the function. - closure: closure of the function. - globs: dictionary of global objects. - - # Returns - A function object. - """ - if isinstance(code, (tuple, list)): # unpack previous dump - code, defaults, closure = code - if isinstance(defaults, list): - defaults = tuple(defaults) - - def ensure_value_to_cell(value): - """Ensures that a value is converted to a python cell object. - - # Arguments - value: Any value that needs to be casted to the cell type - - # Returns - A value wrapped as a cell object (see function "func_load") - - """ - def dummy_fn(): - value # just access it so it gets captured in .__closure__ - - cell_value = dummy_fn.__closure__[0] - if not isinstance(value, type(cell_value)): - return cell_value - else: - return value - - if closure is not None: - closure = tuple(ensure_value_to_cell(_) for _ in closure) - try: - raw_code = codecs.decode(code.encode('ascii'), 'base64') - code = marshal.loads(raw_code) - except (UnicodeEncodeError, binascii.Error, ValueError): - # backwards compatibility for models serialized prior to 2.1.2 - raw_code = code.encode('raw_unicode_escape') - code = marshal.loads(raw_code) - if globs is None: - globs = globals() - return python_types.FunctionType(code, globs, - name=code.co_name, - argdefs=defaults, - closure=closure) - - -def getargspec(fn): - """Python 2/3 compatible `getargspec`. - - Calls `getfullargspec` and assigns args, varargs, - varkw, and defaults to a python 2/3 compatible `ArgSpec`. - The parameter name 'varkw' is changed to 'keywords' to fit the - `ArgSpec` struct. - - # Arguments - fn: the target function to inspect. - - # Returns - An ArgSpec with args, varargs, keywords, and defaults parameters - from FullArgSpec. - """ - if sys.version_info < (3,): - arg_spec = inspect.getargspec(fn) - else: - full_arg_spec = inspect.getfullargspec(fn) - arg_spec = inspect.ArgSpec( - args=full_arg_spec.args, - varargs=full_arg_spec.varargs, - keywords=full_arg_spec.varkw, - defaults=full_arg_spec.defaults) - return arg_spec - - -def has_arg(fn, name, accept_all=False): - """Checks if a callable accepts a given keyword argument. - - For Python 2, checks if there is an argument with the given name. - - For Python 3, checks if there is an argument with the given name, and - also whether this argument can be called with a keyword (i.e. if it is - not a positional-only argument). - - # Arguments - fn: Callable to inspect. - name: Check if `fn` can be called with `name` as a keyword argument. - accept_all: What to return if there is no parameter called `name` - but the function accepts a `**kwargs` argument. - - # Returns - bool, whether `fn` accepts a `name` keyword argument. - """ - if sys.version_info < (3,): - arg_spec = inspect.getargspec(fn) - if accept_all and arg_spec.keywords is not None: - return True - return (name in arg_spec.args) - elif sys.version_info < (3, 3): - arg_spec = inspect.getfullargspec(fn) - if accept_all and arg_spec.varkw is not None: - return True - return (name in arg_spec.args or - name in arg_spec.kwonlyargs) - else: - signature = inspect.signature(fn) - parameter = signature.parameters.get(name) - if parameter is None: - if accept_all: - for param in signature.parameters.values(): - if param.kind == inspect.Parameter.VAR_KEYWORD: - return True - return False - return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, - inspect.Parameter.KEYWORD_ONLY)) - - -class Progbar(object): - """Displays a progress bar. - - # Arguments - target: Total number of steps expected, None if unknown. - width: Progress bar width on screen. - verbose: Verbosity mode, 0 (silent), 1 (verbose), 2 (semi-verbose) - stateful_metrics: Iterable of string names of metrics that - should *not* be averaged over time. Metrics in this list - will be displayed as-is. All others will be averaged - by the progbar before display. - interval: Minimum visual progress update interval (in seconds). - """ - - def __init__(self, target, width=30, verbose=1, interval=0.05, - stateful_metrics=None): - self.target = target - self.width = width - self.verbose = verbose - self.interval = interval - if stateful_metrics: - self.stateful_metrics = set(stateful_metrics) - else: - self.stateful_metrics = set() - - self._dynamic_display = ((hasattr(sys.stdout, 'isatty') and - sys.stdout.isatty()) or - 'ipykernel' in sys.modules) - self._total_width = 0 - self._seen_so_far = 0 - self._values = collections.OrderedDict() - self._start = time.time() - self._last_update = 0 - - def update(self, current, values=None): - """Updates the progress bar. - - # Arguments - current: Index of current step. - values: List of tuples: - `(name, value_for_last_step)`. - If `name` is in `stateful_metrics`, - `value_for_last_step` will be displayed as-is. - Else, an average of the metric over time will be displayed. - """ - values = values or [] - for k, v in values: - if k not in self.stateful_metrics: - if k not in self._values: - self._values[k] = [v * (current - self._seen_so_far), - current - self._seen_so_far] - else: - self._values[k][0] += v * (current - self._seen_so_far) - self._values[k][1] += (current - self._seen_so_far) - else: - # Stateful metrics output a numeric value. This representation - # means "take an average from a single value" but keeps the - # numeric formatting. - self._values[k] = [v, 1] - self._seen_so_far = current - - now = time.time() - info = ' - %.0fs' % (now - self._start) - if self.verbose == 1: - if (now - self._last_update < self.interval and - self.target is not None and current < self.target): - return - - prev_total_width = self._total_width - if self._dynamic_display: - sys.stdout.write('\b' * prev_total_width) - sys.stdout.write('\r') - else: - sys.stdout.write('\n') - - if self.target is not None: - numdigits = int(np.floor(np.log10(self.target))) + 1 - barstr = '%%%dd/%d [' % (numdigits, self.target) - bar = barstr % current - prog = float(current) / self.target - prog_width = int(self.width * prog) - if prog_width > 0: - bar += ('=' * (prog_width - 1)) - if current < self.target: - bar += '>' - else: - bar += '=' - bar += ('.' * (self.width - prog_width)) - bar += ']' - else: - bar = '%7d/Unknown' % current - - self._total_width = len(bar) - sys.stdout.write(bar) - - if current: - time_per_unit = (now - self._start) / current - else: - time_per_unit = 0 - if self.target is not None and current < self.target: - eta = time_per_unit * (self.target - current) - if eta > 3600: - eta_format = ('%d:%02d:%02d' % - (eta // 3600, (eta % 3600) // 60, eta % 60)) - elif eta > 60: - eta_format = '%d:%02d' % (eta // 60, eta % 60) - else: - eta_format = '%ds' % eta - - info = ' - ETA: %s' % eta_format - else: - if time_per_unit >= 1: - info += ' %.0fs/step' % time_per_unit - elif time_per_unit >= 1e-3: - info += ' %.0fms/step' % (time_per_unit * 1e3) - else: - info += ' %.0fus/step' % (time_per_unit * 1e6) - - for k in self._values: - info += ' - %s:' % k - if isinstance(self._values[k], list): - avg = np.mean( - self._values[k][0] / max(1, self._values[k][1])) - if abs(avg) > 1e-3: - info += ' %.4f' % avg - else: - info += ' %.4e' % avg - else: - info += ' %s' % self._values[k] - - self._total_width += len(info) - if prev_total_width > self._total_width: - info += (' ' * (prev_total_width - self._total_width)) - - if self.target is not None and current >= self.target: - info += '\n' - - sys.stdout.write(info) - sys.stdout.flush() - - elif self.verbose == 2: - if self.target is None or current >= self.target: - for k in self._values: - info += ' - %s:' % k - avg = np.mean( - self._values[k][0] / max(1, self._values[k][1])) - if avg > 1e-3: - info += ' %.4f' % avg - else: - info += ' %.4e' % avg - info += '\n' - - sys.stdout.write(info) - sys.stdout.flush() - - self._last_update = now - - def add(self, n, values=None): - self.update(self._seen_so_far + n, values) - - -def to_list(x, allow_tuple=False): - """Normalizes a list/tensor into a list. - - If a tensor is passed, we return - a list of size 1 containing the tensor. - - # Arguments - x: target object to be normalized. - allow_tuple: If False and x is a tuple, - it will be converted into a list - with a single element (the tuple). - Else converts the tuple to a list. - - # Returns - A list. - """ - if isinstance(x, list): - return x - if allow_tuple and isinstance(x, tuple): - return list(x) - return [x] - - -def unpack_singleton(x): - """Gets the first element if the iterable has only one value. - - Otherwise return the iterable. - - # Argument - x: A list or tuple. - - # Returns - The same iterable or the first element. - """ - if len(x) == 1: - return x[0] - return x - - -def object_list_uid(object_list): - object_list = to_list(object_list) - return ', '.join([str(abs(id(x))) for x in object_list]) - - -def is_all_none(iterable_or_element): - iterable = to_list(iterable_or_element, allow_tuple=True) - for element in iterable: - if element is not None: - return False - return True - - -def slice_arrays(arrays, start=None, stop=None): - """Slices an array or list of arrays. - - This takes an array-like, or a list of - array-likes, and outputs: - - arrays[start:stop] if `arrays` is an array-like - - [x[start:stop] for x in arrays] if `arrays` is a list - - Can also work on list/array of indices: `_slice_arrays(x, indices)` - - # Arguments - arrays: Single array or list of arrays. - start: can be an integer index (start index) - or a list/array of indices - stop: integer (stop index); should be None if - `start` was a list. - - # Returns - A slice of the array(s). - """ - if arrays is None: - return [None] - elif isinstance(arrays, list): - if hasattr(start, '__len__'): - # hdf5 datasets only support list objects as indices - if hasattr(start, 'shape'): - start = start.tolist() - return [None if x is None else x[start] for x in arrays] - else: - return [None if x is None else x[start:stop] for x in arrays] - else: - if hasattr(start, '__len__'): - if hasattr(start, 'shape'): - start = start.tolist() - return arrays[start] - elif hasattr(start, '__getitem__'): - return arrays[start:stop] - else: - return [None] - - -def transpose_shape(shape, target_format, spatial_axes): - """Converts a tuple or a list to the correct `data_format`. - - It does so by switching the positions of its elements. - - # Arguments - shape: Tuple or list, often representing shape, - corresponding to `'channels_last'`. - target_format: A string, either `'channels_first'` or `'channels_last'`. - spatial_axes: A tuple of integers. - Correspond to the indexes of the spatial axes. - For example, if you pass a shape - representing (batch_size, timesteps, rows, cols, channels), - then `spatial_axes=(2, 3)`. - - # Returns - A tuple or list, with the elements permuted according - to `target_format`. - - # Example - ```python - >>> from keras.utils.generic_utils import transpose_shape - >>> transpose_shape((16, 128, 128, 32),'channels_first', spatial_axes=(1, 2)) - (16, 32, 128, 128) - >>> transpose_shape((16, 128, 128, 32), 'channels_last', spatial_axes=(1, 2)) - (16, 128, 128, 32) - >>> transpose_shape((128, 128, 32), 'channels_first', spatial_axes=(0, 1)) - (32, 128, 128) - ``` - - # Raises - ValueError: if `value` or the global `data_format` invalid. - """ - if target_format == 'channels_first': - new_values = shape[:spatial_axes[0]] - new_values += (shape[-1],) - new_values += tuple(shape[x] for x in spatial_axes) - - if isinstance(shape, list): - return list(new_values) - return new_values - elif target_format == 'channels_last': - return shape - else: - raise ValueError('The `data_format` argument must be one of ' - '"channels_first", "channels_last". Received: ' + - str(target_format)) -"""Utilities related to disk I/O.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from collections import defaultdict -import sys -import contextlib - - -import six -try: - import h5py - HDF5_OBJECT_HEADER_LIMIT = 64512 -except ImportError: - h5py = None - - -if sys.version_info[0] == 3: - import pickle -else: - import cPickle as pickle - - -class HDF5Matrix(object): - """Representation of HDF5 dataset to be used instead of a Numpy array. - - # Example - - ```python - x_data = HDF5Matrix('input/file.hdf5', 'data') - model.predict(x_data) - ``` - - Providing `start` and `end` allows use of a slice of the dataset. - - Optionally, a normalizer function (or lambda) can be given. This will - be called on every slice of data retrieved. - - # Arguments - datapath: string, path to a HDF5 file - dataset: string, name of the HDF5 dataset in the file specified - in datapath - start: int, start of desired slice of the specified dataset - end: int, end of desired slice of the specified dataset - normalizer: function to be called on data when retrieved - - # Returns - An array-like HDF5 dataset. - """ - refs = defaultdict(int) - - def __init__(self, datapath, dataset, start=0, end=None, normalizer=None): - if h5py is None: - raise ImportError('The use of HDF5Matrix requires ' - 'HDF5 and h5py installed.') - - if datapath not in list(self.refs.keys()): - f = h5py.File(datapath) - self.refs[datapath] = f - else: - f = self.refs[datapath] - self.data = f[dataset] - self.start = start - if end is None: - self.end = self.data.shape[0] - else: - self.end = end - self.normalizer = normalizer - if self.normalizer is not None: - first_val = self.normalizer(self.data[0:1]) - else: - first_val = self.data[0:1] - self._base_shape = first_val.shape[1:] - self._base_dtype = first_val.dtype - - def __len__(self): - return self.end - self.start - - def __getitem__(self, key): - if isinstance(key, slice): - start, stop = key.start, key.stop - if start is None: - start = 0 - if stop is None: - stop = self.shape[0] - if stop + self.start <= self.end: - idx = slice(start + self.start, stop + self.start) - else: - raise IndexError - elif isinstance(key, (int, np.integer)): - if key + self.start < self.end: - idx = key + self.start - else: - raise IndexError - elif isinstance(key, np.ndarray): - if np.max(key) + self.start < self.end: - idx = (self.start + key).tolist() - else: - raise IndexError - else: - # Assume list/iterable - if max(key) + self.start < self.end: - idx = [x + self.start for x in key] - else: - raise IndexError - if self.normalizer is not None: - return self.normalizer(self.data[idx]) - else: - return self.data[idx] - - @property - def shape(self): - """Gets a numpy-style shape tuple giving the dataset dimensions. - - # Returns - A numpy-style shape tuple. - """ - return (self.end - self.start,) + self._base_shape - - @property - def dtype(self): - """Gets the datatype of the dataset. - - # Returns - A numpy dtype string. - """ - return self._base_dtype - - @property - def ndim(self): - """Gets the number of dimensions (rank) of the dataset. - - # Returns - An integer denoting the number of dimensions (rank) of the dataset. - """ - return self.data.ndim - - @property - def size(self): - """Gets the total dataset size (number of elements). - - # Returns - An integer denoting the number of elements in the dataset. - """ - return np.prod(self.shape) - - -def ask_to_proceed_with_overwrite(filepath): - """Produces a prompt asking about overwriting a file. - - # Arguments - filepath: the path to the file to be overwritten. - - # Returns - True if we can proceed with overwrite, False otherwise. - """ - overwrite = six.moves.input('[WARNING] %s already exists - overwrite? ' - '[y/n]' % (filepath)).strip().lower() - while overwrite not in ('y', 'n'): - overwrite = six.moves.input('Enter "y" (overwrite) or "n" ' - '(cancel).').strip().lower() - if overwrite == 'n': - return False - print('[TIP] Next time specify overwrite=True!') - return True - - -class H5Dict(object): - """ A dict-like wrapper around h5py groups (or dicts). - - This allows us to have a single serialization logic - for both pickling and saving to disk. - - Note: This is not intended to be a generic wrapper. - There are lot of edge cases which have been hardcoded, - and makes sense only in the context of model serialization/ - deserialization. - - # Arguments - path: Either a string (path on disk), a Path, a dict, or a HDF5 Group. - mode: File open mode (one of `{"a", "r", "w"}`). - """ - - def __init__(self, path, mode='a'): - if isinstance(path, h5py.Group): - self.data = path - self._is_file = False - elif isinstance(path, six.string_types) or _is_path_instance(path): - self.data = h5py.File(path, mode=mode) - self._is_file = True - elif isinstance(path, dict): - self.data = path - self._is_file = False - if mode == 'w': - self.data.clear() - # Flag to check if a dict is user defined data or a sub group: - self.data['_is_group'] = True - else: - raise TypeError('Required Group, str, Path or dict. ' - 'Received: {}.'.format(type(path))) - self.read_only = mode == 'r' - - @staticmethod - def is_supported_type(path): - """Check if `path` is of supported type for instantiating a `H5Dict`""" - return ( - isinstance(path, h5py.Group) or - isinstance(path, dict) or - isinstance(path, six.string_types) or - _is_path_instance(path) - ) - - def __setitem__(self, attr, val): - if self.read_only: - raise ValueError('Cannot set item in read-only mode.') - is_np = type(val).__module__ == np.__name__ - if isinstance(self.data, dict): - if isinstance(attr, bytes): - attr = attr.decode('utf-8') - if is_np: - self.data[attr] = pickle.dumps(val) - # We have to remember to unpickle in __getitem__ - self.data['_{}_pickled'.format(attr)] = True - else: - self.data[attr] = val - return - if isinstance(self.data, h5py.Group) and attr in self.data: - raise KeyError('Cannot set attribute. ' - 'Group with name "{}" exists.'.format(attr)) - if is_np: - dataset = self.data.create_dataset( - attr, val.shape, dtype=val.dtype) - if not val.shape: - # scalar - dataset[()] = val - else: - dataset[:] = val - elif isinstance(val, (list, tuple)): - # Check that no item in `data` is larger than `HDF5_OBJECT_HEADER_LIMIT` - # because in that case even chunking the array would not make the saving - # possible. - bad_attributes = [x for x in val if len( - x) > HDF5_OBJECT_HEADER_LIMIT] - - # Expecting this to never be true. - if bad_attributes: - raise RuntimeError('The following attributes cannot be saved to ' - 'HDF5 file because they are larger than ' - '%d bytes: %s' % (HDF5_OBJECT_HEADER_LIMIT, - ', '.join(bad_attributes))) - - if (val and sys.version_info[0] == 3 and isinstance( - val[0], six.string_types)): - # convert to bytes - val = [x.encode('utf-8') for x in val] - - data_npy = np.asarray(val) - - num_chunks = 1 - chunked_data = np.array_split(data_npy, num_chunks) - - # This will never loop forever thanks to the test above. - def is_too_big(x): return x.nbytes > HDF5_OBJECT_HEADER_LIMIT - while any(map(is_too_big, chunked_data)): - num_chunks += 1 - chunked_data = np.array_split(data_npy, num_chunks) - - if num_chunks > 1: - for chunk_id, chunk_data in enumerate(chunked_data): - self.data.attrs['%s%d' % (attr, chunk_id)] = chunk_data - else: - self.data.attrs[attr] = val - else: - self.data.attrs[attr] = val - - def __getitem__(self, attr): - if isinstance(self.data, dict): - if isinstance(attr, bytes): - attr = attr.decode('utf-8') - if attr in self.data: - val = self.data[attr] - if isinstance(val, dict) and val.get('_is_group'): - val = H5Dict(val) - elif '_{}_pickled'.format(attr) in self.data: - val = pickle.loads(val) - return val - else: - if self.read_only: - raise ValueError('Cannot create group in read-only mode.') - val = {'_is_group': True} - self.data[attr] = val - return H5Dict(val) - if attr in self.data.attrs: - val = self.data.attrs[attr] - if type(val).__module__ == np.__name__: - if val.dtype.type == np.string_: - val = val.tolist() - elif attr in self.data: - val = self.data[attr] - if isinstance(val, h5py.Dataset): - val = np.asarray(val) - else: - val = H5Dict(val) - else: - # could be chunked - chunk_attr = '%s%d' % (attr, 0) - is_chunked = chunk_attr in self.data.attrs - if is_chunked: - val = [] - chunk_id = 0 - while chunk_attr in self.data.attrs: - chunk = self.data.attrs[chunk_attr] - val.extend([x.decode('utf8') for x in chunk]) - chunk_id += 1 - chunk_attr = '%s%d' % (attr, chunk_id) - else: - if self.read_only: - raise ValueError('Cannot create group in read-only mode.') - val = H5Dict(self.data.create_group(attr)) - return val - - def __len__(self): - return len(self.data) - - def __iter__(self): - return iter(self.data) - - def iter(self): - return iter(self.data) - - def __getattr__(self, attr): - - def wrapper(f): - def h5wrapper(*args, **kwargs): - out = f(*args, **kwargs) - if isinstance(self.data, type(out)): - return H5Dict(out) - else: - return out - return h5wrapper - - return wrapper(getattr(self.data, attr)) - - def close(self): - if isinstance(self.data, h5py.Group): - self.data.file.flush() - if self._is_file: - self.data.close() - - def update(self, *args): - if isinstance(self.data, dict): - self.data.update(*args) - raise NotImplementedError - - def __contains__(self, key): - if isinstance(self.data, dict): - return key in self.data - else: - return (key in self.data) or (key in self.data.attrs) - - def get(self, key, default=None): - if key in self: - return self[key] - return default - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.close() - - -h5dict = H5Dict - - -def load_from_binary_h5py(load_function, stream): - """Calls `load_function` on a `h5py.File` read from the binary `stream`. - - # Arguments - load_function: A function that takes a `h5py.File`, reads from it, and - returns any object. - stream: Any file-like object implementing the method `read` that returns - `bytes` data (e.g. `io.BytesIO`) that represents a valid h5py file image. - - # Returns - The object returned by `load_function`. - """ - # Implementation based on suggestion solution here: - # https://github.com/keras-team/keras/issues/9343#issuecomment-440903847 - binary_data = stream.read() - file_access_property_list = h5py.h5p.create(h5py.h5p.FILE_ACCESS) - file_access_property_list.set_fapl_core(backing_store=False) - file_access_property_list.set_file_image(binary_data) - file_id_args = {'fapl': file_access_property_list, - 'flags': h5py.h5f.ACC_RDONLY, - 'name': b'in-memory-h5py'} # name does not matter - h5_file_args = {'backing_store': False, - 'driver': 'core', - 'mode': 'r'} - with contextlib.closing(h5py.h5f.open(**file_id_args)) as file_id: - with h5py.File(file_id, **h5_file_args) as h5_file: - return load_function(h5_file) - - -def save_to_binary_h5py(save_function, stream): - """Calls `save_function` on an in memory `h5py.File`. - - The file is subsequently written to the binary `stream`. - - # Arguments - save_function: A function that takes a `h5py.File`, writes to it and - (optionally) returns any object. - stream: Any file-like object implementing the method `write` that accepts - `bytes` data (e.g. `io.BytesIO`). - """ - with h5py.File('in-memory-h5py', driver='core', backing_store=False) as h5file: - # note that filename does not matter here. - return_value = save_function(h5file) - h5file.flush() - binary_data = h5file.fid.get_file_image() - stream.write(binary_data) - - return return_value - - -def _is_path_instance(path): - # We can't use isinstance here because it would require - # us to add pathlib2 to the Python 2 dependencies. - class_name = type(path).__name__ - return class_name == 'PosixPath' or class_name == 'WindowsPath' -"""Utilities related to layer/model functionality. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from .conv_utils import convert_kernel -from .. import backend as K -import numpy as np - - -def count_params(weights): - """Count the total number of scalars composing the weights. - - # Arguments - weights: An iterable containing the weights on which to compute params - - # Returns - The total number of scalars composing the weights - """ - return int(np.sum([K.count_params(p) for p in set(weights)])) - - -def print_summary(model, line_length=None, positions=None, print_fn=None): - """Prints a summary of a model. - - # Arguments - model: Keras model instance. - line_length: Total length of printed lines - (e.g. set this to adapt the display to different - terminal window sizes). - positions: Relative or absolute positions of log elements in each line. - If not provided, defaults to `[.33, .55, .67, 1.]`. - print_fn: Print function to use. - It will be called on each line of the summary. - You can set it to a custom function - in order to capture the string summary. - It defaults to `print` (prints to stdout). - """ - if print_fn is None: - print_fn = print - - if model.__class__.__name__ == 'Sequential': - sequential_like = True - elif not model._is_graph_network: - # We treat subclassed models as a simple sequence of layers, - # for logging purposes. - sequential_like = True - else: - sequential_like = True - nodes_by_depth = model._nodes_by_depth.values() - nodes = [] - for v in nodes_by_depth: - if (len(v) > 1) or (len(v) == 1 and len(v[0].inbound_layers) > 1): - # if the model has multiple nodes - # or if the nodes have multiple inbound_layers - # the model is no longer sequential - sequential_like = False - break - nodes += v - if sequential_like: - # search for shared layers - for layer in model.layers: - flag = False - for node in layer._inbound_nodes: - if node in nodes: - if flag: - sequential_like = False - break - else: - flag = True - if not sequential_like: - break - - if sequential_like: - line_length = line_length or 65 - positions = positions or [.45, .85, 1.] - if positions[-1] <= 1: - positions = [int(line_length * p) for p in positions] - # header names for the different log elements - to_display = ['Layer (type)', 'Output Shape', 'Param #'] - else: - line_length = line_length or 98 - positions = positions or [.33, .55, .67, 1.] - if positions[-1] <= 1: - positions = [int(line_length * p) for p in positions] - # header names for the different log elements - to_display = ['Layer (type)', - 'Output Shape', - 'Param #', - 'Connected to'] - relevant_nodes = [] - for v in model._nodes_by_depth.values(): - relevant_nodes += v - - def print_row(fields, positions): - line = '' - for i in range(len(fields)): - if i > 0: - line = line[:-1] + ' ' - line += str(fields[i]) - line = line[:positions[i]] - line += ' ' * (positions[i] - len(line)) - print_fn(line) - - print_fn('Model: "{}"'.format(model.name)) - print_fn('_' * line_length) - print_row(to_display, positions) - print_fn('=' * line_length) - - def print_layer_summary(layer): - try: - output_shape = layer.output_shape - except AttributeError: - output_shape = 'multiple' - name = layer.name - cls_name = layer.__class__.__name__ - fields = [name + ' (' + cls_name + ')', - output_shape, layer.count_params()] - print_row(fields, positions) - - def print_layer_summary_with_connections(layer): - """Prints a summary for a single layer. - - # Arguments - layer: target layer. - """ - try: - output_shape = layer.output_shape - except AttributeError: - output_shape = 'multiple' - connections = [] - for node in layer._inbound_nodes: - if relevant_nodes and node not in relevant_nodes: - # node is not part of the current network - continue - for i in range(len(node.inbound_layers)): - inbound_layer = node.inbound_layers[i].name - inbound_node_index = node.node_indices[i] - inbound_tensor_index = node.tensor_indices[i] - connections.append(inbound_layer + - '[' + str(inbound_node_index) + '][' + - str(inbound_tensor_index) + ']') - - name = layer.name - cls_name = layer.__class__.__name__ - if not connections: - first_connection = '' - else: - first_connection = connections[0] - fields = [name + - ' (' + cls_name + ')', - output_shape, - layer.count_params(), - first_connection] - print_row(fields, positions) - if len(connections) > 1: - for i in range(1, len(connections)): - fields = ['', '', '', connections[i]] - print_row(fields, positions) - - layers = model.layers - for i in range(len(layers)): - if sequential_like: - print_layer_summary(layers[i]) - else: - print_layer_summary_with_connections(layers[i]) - if i == len(layers) - 1: - print_fn('=' * line_length) - else: - print_fn('_' * line_length) - - model._check_trainable_weights_consistency() - if hasattr(model, '_collected_trainable_weights'): - trainable_count = count_params(model._collected_trainable_weights) - else: - trainable_count = count_params(model.trainable_weights) - - non_trainable_count = count_params(model.non_trainable_weights) - - print_fn( - 'Total params: {:,}'.format(trainable_count + non_trainable_count)) - print_fn('Trainable params: {:,}'.format(trainable_count)) - print_fn('Non-trainable params: {:,}'.format(non_trainable_count)) - print_fn('_' * line_length) - - -def convert_all_kernels_in_model(model): - """Converts all convolution kernels in a model from Theano to TensorFlow. - - Also works from TensorFlow to Theano. - - # Arguments - model: target model for the conversion. - """ - # Note: SeparableConvolution not included - # since only supported by TF. - conv_classes = { - 'Conv1D', - 'Conv2D', - 'Conv3D', - 'Conv2DTranspose', - } - to_assign = [] - for layer in model.layers: - if layer.__class__.__name__ in conv_classes: - original_kernel = K.get_value(layer.kernel) - converted_kernel = convert_kernel(original_kernel) - to_assign.append((layer.kernel, converted_kernel)) - K.batch_set_value(to_assign) - - -def convert_dense_weights_data_format(dense, - previous_feature_map_shape, - target_data_format='channels_first'): - """Utility useful when changing a convnet's `data_format`. - - When porting the weights of a convnet from one data format to the other, - if the convnet includes a `Flatten` layer - (applied to the last convolutional feature map) - followed by a `Dense` layer, the weights of that `Dense` layer - should be updated to reflect the new dimension ordering. - - # Arguments - dense: The target `Dense` layer. - previous_feature_map_shape: A shape tuple of 3 integers, - e.g. `(512, 7, 7)`. The shape of the convolutional - feature map right before the `Flatten` layer that - came before the target `Dense` layer. - target_data_format: One of "channels_last", "channels_first". - Set it "channels_last" - if converting a "channels_first" model to "channels_last", - or reciprocally. - """ - assert target_data_format in {'channels_last', 'channels_first'} - kernel, bias = dense.get_weights() - for i in range(kernel.shape[1]): - if target_data_format == 'channels_first': - c, h, w = previous_feature_map_shape - original_fm_shape = (h, w, c) - ki = kernel[:, i].reshape(original_fm_shape) - ki = np.transpose(ki, (2, 0, 1)) # last -> first - else: - h, w, c = previous_feature_map_shape - original_fm_shape = (c, h, w) - ki = kernel[:, i].reshape(original_fm_shape) - ki = np.transpose(ki, (1, 2, 0)) # first -> last - kernel[:, i] = np.reshape(ki, (np.prod(previous_feature_map_shape),)) - dense.set_weights([kernel, bias]) - - -def get_source_inputs(tensor, layer=None, node_index=None): - """Returns the list of input tensors necessary to compute `tensor`. - - Output will always be a list of tensors - (potentially with 1 element). - - # Arguments - tensor: The tensor to start from. - layer: Origin layer of the tensor. Will be - determined via tensor._keras_history if not provided. - node_index: Origin node index of the tensor. - - # Returns - List of input tensors. - """ - if not hasattr(tensor, '_keras_history'): - return tensor - - if layer is None or node_index: - layer, node_index, _ = tensor._keras_history - if not layer._inbound_nodes: - return [tensor] - else: - node = layer._inbound_nodes[node_index] - if not node.inbound_layers: - # Reached an Input layer, stop recursion. - return node.input_tensors - else: - source_tensors = [] - for i in range(len(node.inbound_layers)): - x = node.input_tensors[i] - layer = node.inbound_layers[i] - node_index = node.node_indices[i] - previous_sources = get_source_inputs(x, - layer, - node_index) - # Avoid input redundancy. - for x in previous_sources: - if x not in source_tensors: - source_tensors.append(x) - return source_tensors -"""Multi-GPU training utilities. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from ..layers.merge import concatenate -from .. import backend as K -from ..layers.core import Lambda -from ..engine.training import Model -from ..models import clone_model -from ..utils.generic_utils import to_list - - -def _get_available_devices(): - return [x.name for x in K.get_session().list_devices()] - - -def _normalize_device_name(name): - name = '/' + ':'.join(name.lower().replace('/', '').split(':')[-2:]) - return name - - -def multi_gpu_model(model, gpus=None, cpu_merge=True, cpu_relocation=False): - """Replicates a model on different GPUs. - - Specifically, this function implements single-machine - multi-GPU data parallelism. It works in the following way: - - - Divide the model's input(s) into multiple sub-batches. - - Apply a model copy on each sub-batch. Every model copy - is executed on a dedicated GPU. - - Concatenate the results (on CPU) into one big batch. - - E.g. if your `batch_size` is 64 and you use `gpus=2`, - then we will divide the input into 2 sub-batches of 32 samples, - process each sub-batch on one GPU, then return the full - batch of 64 processed samples. - - This induces quasi-linear speedup on up to 8 GPUs. - - This function is only available with the TensorFlow backend - for the time being. - - # Arguments - model: A Keras model instance. To avoid OOM errors, - this model could have been built on CPU, for instance - (see usage example below). - gpus: Integer >= 2 or list of integers, number of GPUs or - list of GPU IDs on which to create model replicas. - cpu_merge: A boolean value to identify whether to force - merging model weights under the scope of the CPU or not. - cpu_relocation: A boolean value to identify whether to - create the model's weights under the scope of the CPU. - If the model is not defined under any preceding device - scope, you can still rescue it by activating this option. - - # Returns - A Keras `Model` instance which can be used just like the initial - `model` argument, but which distributes its workload on multiple GPUs. - - # Examples - - Example 1 - Training models with weights merge on CPU - - ```python - import tensorflow as tf - from keras.applications import Xception - from keras.utils import multi_gpu_model - import numpy as np - - num_samples = 1000 - height = 224 - width = 224 - num_classes = 1000 - - # Instantiate the base model (or "template" model). - # We recommend doing this with under a CPU device scope, - # so that the model's weights are hosted on CPU memory. - # Otherwise they may end up hosted on a GPU, which would - # complicate weight sharing. - with tf.device('/cpu:0'): - model = Xception(weights=None, - input_shape=(height, width, 3), - classes=num_classes) - - # Replicates the model on 8 GPUs. - # This assumes that your machine has 8 available GPUs. - parallel_model = multi_gpu_model(model, gpus=8) - parallel_model.compile(loss='categorical_crossentropy', - optimizer='rmsprop') - - # Generate dummy data. - x = np.random.random((num_samples, height, width, 3)) - y = np.random.random((num_samples, num_classes)) - - # This `fit` call will be distributed on 8 GPUs. - # Since the batch size is 256, each GPU will process 32 samples. - parallel_model.fit(x, y, epochs=20, batch_size=256) - - # Save model via the template model (which shares the same weights): - model.save('my_model.h5') - ``` - - Example 2 - Training models with weights merge on CPU using cpu_relocation - - ```python - .. - # Not needed to change the device scope for model definition: - model = Xception(weights=None, ..) - - try: - parallel_model = multi_gpu_model(model, cpu_relocation=True) - print("Training using multiple GPUs..") - except ValueError: - parallel_model = model - print("Training using single GPU or CPU..") - parallel_model.compile(..) - .. - ``` - - Example 3 - Training models with weights merge on GPU (recommended for NV-link) - - ```python - .. - # Not needed to change the device scope for model definition: - model = Xception(weights=None, ..) - - try: - parallel_model = multi_gpu_model(model, cpu_merge=False) - print("Training using multiple GPUs..") - except: - parallel_model = model - print("Training using single GPU or CPU..") - - parallel_model.compile(..) - .. - ``` - - # On model saving - - To save the multi-gpu model, use `.save(fname)` or `.save_weights(fname)` - with the template model (the argument you passed to `multi_gpu_model`), - rather than the model returned by `multi_gpu_model`. - """ - if K.backend() != 'tensorflow': - raise ValueError('`multi_gpu_model` is only available ' - 'with the TensorFlow backend.') - - available_devices = _get_available_devices() - available_devices = [_normalize_device_name(name) - for name in available_devices] - if not gpus: - # Using all visible GPUs when not specifying `gpus` - # e.g. CUDA_VISIBLE_DEVICES=0,2 python keras_mgpu.py - gpus = len([x for x in available_devices if 'gpu' in x]) - - if isinstance(gpus, (list, tuple)): - if len(gpus) <= 1: - raise ValueError('For multi-gpu usage to be effective, ' - 'call `multi_gpu_model` with `len(gpus) >= 2`. ' - 'Received: `gpus=%s`' % gpus) - num_gpus = len(gpus) - target_gpu_ids = gpus - else: - if gpus <= 1: - raise ValueError('For multi-gpu usage to be effective, ' - 'call `multi_gpu_model` with `gpus >= 2`. ' - 'Received: `gpus=%d`' % gpus) - num_gpus = gpus - target_gpu_ids = range(num_gpus) - - import tensorflow as tf - - target_devices = ['/cpu:0'] + ['/gpu:%d' % i for i in target_gpu_ids] - for device in target_devices: - if device not in available_devices: - raise ValueError( - 'To call `multi_gpu_model` with `gpus=%s`, ' - 'we expect the following devices to be available: %s. ' - 'However this machine only has: %s. ' - 'Try reducing `gpus`.' % (gpus, - target_devices, - available_devices)) - - def get_slice(data, i, parts): - shape = K.shape(data) - batch_size = shape[:1] - input_shape = shape[1:] - step = batch_size // parts - if i == parts - 1: - size = batch_size - step * i - else: - size = step - size = K.concatenate([size, input_shape], axis=0) - stride = K.concatenate([step, input_shape * 0], axis=0) - start = stride * i - return K.slice(data, start, size) - - # Relocate the model definition under CPU device scope if needed - if cpu_relocation: - with tf.device('/cpu:0'): - model = clone_model(model) - - all_outputs = [] - for i in range(len(model.outputs)): - all_outputs.append([]) - - # Place a copy of the model on each GPU, - # each getting a slice of the inputs. - for i, gpu_id in enumerate(target_gpu_ids): - with tf.device('/gpu:%d' % gpu_id): - with tf.name_scope('replica_%d' % gpu_id): - inputs = [] - # Retrieve a slice of the input. - for x in model.inputs: - # In-place input splitting which is not only - # 5% ~ 12% faster but also less GPU memory - # duplication. - with tf.device(x.device): - input_shape = K.int_shape(x)[1:] - slice_i = Lambda(get_slice, - output_shape=input_shape, - arguments={'i': i, - 'parts': num_gpus})(x) - inputs.append(slice_i) - - # Apply model on slice - # (creating a model replica on the target device). - outputs = model(inputs) - outputs = to_list(outputs) - - # Save the outputs for merging back together later. - for o in range(len(outputs)): - all_outputs[o].append(outputs[o]) - - # Deduplicate output names to handle Siamese networks. - occurrences = {} - for n in model.output_names: - if n not in occurrences: - occurrences[n] = 1 - else: - occurrences[n] += 1 - conflict_counter = {n: 0 for n, count in occurrences.items() if count > 1} - output_names = [] - for n in model.output_names: - if n in conflict_counter: - conflict_counter[n] += 1 - n += '_%d' % conflict_counter[n] - output_names.append(n) - - # Merge outputs under expected scope. - with tf.device('/cpu:0' if cpu_merge else '/gpu:%d' % target_gpu_ids[0]): - merged = [] - for name, outputs in zip(output_names, all_outputs): - merged.append(concatenate(outputs, - axis=0, name=name)) - return Model(model.inputs, merged) -"""Numpy-related utilities.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - - -def to_categorical(y, num_classes=None, dtype='float32'): - """Converts a class vector (integers) to binary class matrix. - - E.g. for use with categorical_crossentropy. - - # Arguments - y: class vector to be converted into a matrix - (integers from 0 to num_classes). - num_classes: total number of classes. - dtype: The data type expected by the input, as a string - (`float32`, `float64`, `int32`...) - - # Returns - A binary matrix representation of the input. The classes axis - is placed last. - - # Example - - ```python - # Consider an array of 5 labels out of a set of 3 classes {0, 1, 2}: - > labels - array([0, 2, 1, 2, 0]) - # `to_categorical` converts this into a matrix with as many - # columns as there are classes. The number of rows - # stays the same. - > to_categorical(labels) - array([[ 1., 0., 0.], - [ 0., 0., 1.], - [ 0., 1., 0.], - [ 0., 0., 1.], - [ 1., 0., 0.]], dtype=float32) - ``` - """ - - y = np.array(y, dtype='int') - input_shape = y.shape - if input_shape and input_shape[-1] == 1 and len(input_shape) > 1: - input_shape = tuple(input_shape[:-1]) - y = y.ravel() - if not num_classes: - num_classes = np.max(y) + 1 - n = y.shape[0] - categorical = np.zeros((n, num_classes), dtype=dtype) - categorical[np.arange(n), y] = 1 - output_shape = input_shape + (num_classes,) - categorical = np.reshape(categorical, output_shape) - return categorical - - -def normalize(x, axis=-1, order=2): - """Normalizes a Numpy array. - - # Arguments - x: Numpy array to normalize. - axis: axis along which to normalize. - order: Normalization order (e.g. 2 for L2 norm). - - # Returns - A normalized copy of the array. - """ - l2 = np.atleast_1d(np.linalg.norm(x, order, axis)) - l2[l2 == 0] = 1 - return x / np.expand_dims(l2, axis) -"""Utilities related to Keras unit tests.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from io import BytesIO - -import numpy as np -from numpy.testing import assert_allclose - -from .generic_utils import has_arg -from ..engine import Model, Input -from .. import backend as K - -try: - from tensorflow.python.lib.io import file_io as tf_file_io -except ImportError: - tf_file_io = None - -try: - from unittest.mock import patch, Mock, MagicMock -except: - from mock import patch, Mock, MagicMock - - -def get_test_data(num_train=1000, num_test=500, input_shape=(10,), - output_shape=(2,), - classification=True, num_classes=2): - """Generates test data to train a model on. - - classification=True overrides output_shape - (i.e. output_shape is set to (1,)) and the output - consists in integers in [0, num_classes-1]. - - Otherwise: float output with shape output_shape. - """ - samples = num_train + num_test - if classification: - y = np.random.randint(0, num_classes, size=(samples,)) - X = np.zeros((samples,) + input_shape, dtype=np.float32) - for i in range(samples): - X[i] = np.random.normal(loc=y[i], scale=0.7, size=input_shape) - else: - y_loc = np.random.random((samples,)) - X = np.zeros((samples,) + input_shape, dtype=np.float32) - y = np.zeros((samples,) + output_shape, dtype=np.float32) - for i in range(samples): - X[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=input_shape) - y[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=output_shape) - - return (X[:num_train], y[:num_train]), (X[num_train:], y[num_train:]) - - -def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, - input_data=None, expected_output=None, - expected_output_dtype=None, fixed_batch_size=False): - """Test routine for a layer with a single input tensor - and single output tensor. - """ - # generate input data - if input_data is None: - assert input_shape - if not input_dtype: - input_dtype = K.floatx() - input_data_shape = list(input_shape) - for i, e in enumerate(input_data_shape): - if e is None: - input_data_shape[i] = np.random.randint(1, 4) - input_data = (10 * np.random.random(input_data_shape)) - input_data = input_data.astype(input_dtype) - else: - if input_shape is None: - input_shape = input_data.shape - if input_dtype is None: - input_dtype = input_data.dtype - if expected_output_dtype is None: - expected_output_dtype = input_dtype - - # instantiation - layer = layer_cls(**kwargs) - - # test get_weights , set_weights at layer level - weights = layer.get_weights() - layer.set_weights(weights) - - expected_output_shape = layer.compute_output_shape(input_shape) - - # test in functional API - if fixed_batch_size: - x = Input(batch_shape=input_shape, dtype=input_dtype) - else: - x = Input(shape=input_shape[1:], dtype=input_dtype) - y = layer(x) - assert K.dtype(y) == expected_output_dtype - - # check with the functional API - model = Model(x, y) - - actual_output = model.predict(input_data) - actual_output_shape = actual_output.shape - for expected_dim, actual_dim in zip(expected_output_shape, - actual_output_shape): - if expected_dim is not None: - assert expected_dim == actual_dim - - if expected_output is not None: - assert_allclose(actual_output, expected_output, rtol=1e-3) - - # test serialization, weight setting at model level - model_config = model.get_config() - recovered_model = model.__class__.from_config(model_config) - if model.weights: - weights = model.get_weights() - recovered_model.set_weights(weights) - _output = recovered_model.predict(input_data) - assert_allclose(_output, actual_output, rtol=1e-3) - - # test training mode (e.g. useful when the layer has a - # different behavior at training and testing time). - if has_arg(layer.call, 'training'): - model.compile('rmsprop', 'mse') - model.train_on_batch(input_data, actual_output) - - # test instantiation from layer config - layer_config = layer.get_config() - layer_config['batch_input_shape'] = input_shape - layer = layer.__class__.from_config(layer_config) - - # for further checks in the caller function - return actual_output - - -class tf_file_io_proxy(object): - """Context manager for mock patching `tensorflow.python.lib.io.file_io` in tests. - - The purpose of this class is to be able to tests model saving/loading to/from - Google Cloud Storage, for witch the tensorflow `file_io` package is used. - - If a `bucket_name` is provided, either as an input argument or by setting the - environment variable GCS_TEST_BUCKET, *NO mocking* will be done and files will be - transferred to the real GCS bucket. For this to work, valid Google application - credentials must be available, see: - https://cloud.google.com/video-intelligence/docs/common/auth - for further details. - - If a `bucket_name` is not provided, an identifier of the import of the file_io - module to mock must be provided, using the `file_io_module` argument. - NOTE that only part of the module is mocked and that the same Exceptions - are not raised in mock implementation. - - Since the bucket name can be provided using an environment variable, it is - recommended to use method `get_filepath(filename)` in tests to make them - pass with and without a real GCS bucket during testing. See example below. - - # Arguments - file_io_module: String identifier of the file_io module import to patch. E.g - 'keras.engine.saving.tf_file_io' - bucket_name: String identifier of *a real* GCS bucket (with or without the - 'gs://' prefix). A bucket name provided with argument precedes what is - specified using the GCS_TEST_BUCKET environment variable. - - # Example - ```python - model = Sequential() - model.add(Dense(2, input_shape=(3,))) - - with tf_file_io_proxy('keras.engine.saving.tf_file_io') as file_io_proxy: - gcs_filepath = file_io_proxy.get_filepath(filename='model.h5') - save_model(model, gcs_filepath) - file_io_proxy.assert_exists(gcs_filepath) - new_model_gcs = load_model(gcs_filepath) - file_io_proxy.delete_file(gcs_filepath) # cleanup - ``` - """ - _gcs_prefix = 'gs://' - _test_bucket_env_key = 'GCS_TEST_BUCKET' - - def __init__(self, file_io_module=None, bucket_name=None): - if bucket_name is None: - bucket_name = os.environ.get(self._test_bucket_env_key, None) - if bucket_name is None: - # will mock gcs locally for tests - if file_io_module is None: - raise ValueError( - '`file_io_module` must be provided for mocking') - self.mock_gcs = True - self.file_io_module = file_io_module - self.local_objects = {} - self.bucket_name = 'mock-bucket' - else: - # will use real bucket for tests - if bucket_name.startswith(self._gcs_prefix): - bucket_name = bucket_name[len(self._gcs_prefix):] - self.bucket_name = bucket_name - if tf_file_io is None: - raise ImportError( - 'tensorflow must be installed to read/write to GCS') - try: - # check that bucket exists and is accessible - tf_file_io.is_directory(self.bucket_path) - except: - raise IOError( - 'could not access provided bucket {}'.format(self.bucket_path)) - self.mock_gcs = False - self.file_io_module = None - self.local_objects = None - - self.patched_file_io = None - self._is_started = False - - @property - def bucket_path(self): - """Returns the full GCS bucket path""" - return self._gcs_prefix + self.bucket_name - - def get_filepath(self, filename): - """Returns filename appended to bucketpath""" - return os.path.join(self.bucket_path, filename) - - def FileIO(self, name, mode): - """Proxy for tensorflow.python.lib.io.file_io.FileIO class. Mocks the class - if a real GCS bucket is not available for testing. - """ - self._check_started() - if not self.mock_gcs: - return tf_file_io.FileIO(name, mode) - - filepath = name - if filepath.startswith(self._gcs_prefix): - mock_fio = MagicMock() - mock_fio.__enter__ = Mock(return_value=mock_fio) - if mode == 'rb': - if filepath not in self.local_objects: - raise IOError('{} does not exist'.format(filepath)) - self.local_objects[filepath].seek(0) - mock_fio.read = self.local_objects[filepath].read - elif mode == 'wb': - self.local_objects[filepath] = BytesIO() - mock_fio.write = self.local_objects[filepath].write - else: - raise ValueError( - '{} only supports wrapping of FileIO for `mode` "rb" or "wb"') - return mock_fio - - return open(filepath, mode) - - def file_exists(self, filename): - """Proxy for tensorflow.python.lib.io.file_io.file_exists class. Mocks the - function if a real GCS bucket is not available for testing. - """ - self._check_started() - if not self.mock_gcs: - return tf_file_io.file_exists(filename) - - if filename.startswith(self._gcs_prefix): - return filename in self.local_objects - - return os.path.exists(filename) - - def delete_file(self, filename): - """Proxy for tensorflow.python.lib.io.file_io.delete_file function. Mocks - the function if a real GCS bucket is not available for testing. - """ - if not self.mock_gcs: - tf_file_io.delete_file(filename) - elif filename.startswith(self._gcs_prefix): - self.local_objects.pop(filename) - else: - os.remove(filename) - - def assert_exists(self, filepath): - """Convenience method for verifying that a file exists after writing.""" - self._check_started() - if not self.file_exists(filepath): - raise AssertionError('{} does not exist'.format(filepath)) - - def _check_started(self): - if not self._is_started: - raise RuntimeError('tf_file_io_proxy is not started') - - def start(self): - """Start mocking of `self.file_io_module` if real bucket not - available for testing""" - if self._is_started: - raise RuntimeError( - 'start called on already started tf_file_io_proxy') - if self.mock_gcs: - mock_module = Mock() - mock_module.FileIO = self.FileIO - mock_module.file_exists = self.file_exists - mock_module.delete_file = self.delete_file - patched_file_io = patch(self.file_io_module, new=mock_module) - self.patched_file_io = patched_file_io - self.patched_file_io.start() - self._is_started = True - - def stop(self): - """Stop mocking of `self.file_io_module` if real bucket not - available for testing""" - if not self._is_started: - raise RuntimeError('stop called on unstarted tf_file_io_proxy') - if self.mock_gcs: - self.patched_file_io.stop() - self._is_started = False - - def __enter__(self): - self.start() - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.stop() -"""Utilities related to model visualization.""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -from ..models import Model -from ..layers.wrappers import Wrapper - -# `pydot` is an optional dependency, -# see `extras_require` in `setup.py`. -try: - import pydot -except ImportError: - pydot = None - - -def _check_pydot(): - """Raise errors if `pydot` or GraphViz unavailable.""" - if pydot is None: - raise ImportError( - 'Failed to import `pydot`. ' - 'Please install `pydot`. ' - 'For example with `pip install pydot`.') - try: - # Attempt to create an image of a blank graph - # to check the pydot/graphviz installation. - pydot.Dot.create(pydot.Dot()) - except OSError: - raise OSError( - '`pydot` failed to call GraphViz.' - 'Please install GraphViz (https://www.graphviz.org/) ' - 'and ensure that its executables are in the $PATH.') - - -def is_model(layer): - return isinstance(layer, Model) - - -def is_wrapped_model(layer): - return isinstance(layer, Wrapper) and isinstance(layer.layer, Model) - - -def add_edge(dot, src, dst): - if not dot.get_edge(src, dst): - dot.add_edge(pydot.Edge(src, dst)) - - -def model_to_dot(model, - show_shapes=False, - show_layer_names=True, - rankdir='TB', - expand_nested=False, - dpi=96, - subgraph=False): - """Convert a Keras model to dot format. - - # Arguments - model: A Keras model instance. - show_shapes: whether to display shape information. - show_layer_names: whether to display layer names. - rankdir: `rankdir` argument passed to PyDot, - a string specifying the format of the plot: - 'TB' creates a vertical plot; - 'LR' creates a horizontal plot. - expand_nested: whether to expand nested models into clusters. - dpi: dot DPI. - subgraph: whether to return a pydot.Cluster instance. - - # Returns - A `pydot.Dot` instance representing the Keras model or - a `pydot.Cluster` instance representing nested model if - `subgraph=True`. - """ - from ..layers.wrappers import Wrapper - from ..models import Model - from ..models import Sequential - - _check_pydot() - if subgraph: - dot = pydot.Cluster(style='dashed', graph_name=model.name) - dot.set('label', model.name) - dot.set('labeljust', 'l') - else: - dot = pydot.Dot() - dot.set('rankdir', rankdir) - dot.set('concentrate', True) - dot.set('dpi', dpi) - dot.set_node_defaults(shape='record') - - if isinstance(model, Sequential): - if not model.built: - model.build() - layers = model._layers - - # Create graph nodes. - for i, layer in enumerate(layers): - layer_id = str(id(layer)) - - # Append a wrapped layer's label to node's label, if it exists. - layer_name = layer.name - class_name = layer.__class__.__name__ - - if isinstance(layer, Wrapper): - if expand_nested and isinstance(layer.layer, Model): - submodel_wrapper = model_to_dot(layer.layer, show_shapes, - show_layer_names, rankdir, - expand_nested, - subgraph=True) - # sub_w : submodel_wrapper - sub_w_nodes = submodel_wrapper.get_nodes() - sub_w_first_node = sub_w_nodes[0] - sub_w_last_node = sub_w_nodes[len(sub_w_nodes) - 1] - dot.add_subgraph(submodel_wrapper) - else: - layer_name = '{}({})'.format(layer_name, layer.layer.name) - child_class_name = layer.layer.__class__.__name__ - class_name = '{}({})'.format(class_name, child_class_name) - - if expand_nested and isinstance(layer, Model): - submodel_not_wrapper = model_to_dot(layer, show_shapes, - show_layer_names, rankdir, - expand_nested, - subgraph=True) - # sub_n : submodel_not_wrapper - sub_n_nodes = submodel_not_wrapper.get_nodes() - sub_n_first_node = sub_n_nodes[0] - sub_n_last_node = sub_n_nodes[len(sub_n_nodes) - 1] - dot.add_subgraph(submodel_not_wrapper) - - # Create node's label. - if show_layer_names: - label = '{}: {}'.format(layer_name, class_name) - else: - label = class_name - - # Rebuild the label as a table including input/output shapes. - if show_shapes: - try: - outputlabels = str(layer.output_shape) - except AttributeError: - outputlabels = 'multiple' - if hasattr(layer, 'input_shape'): - inputlabels = str(layer.input_shape) - elif hasattr(layer, 'input_shapes'): - inputlabels = ', '.join( - [str(ishape) for ishape in layer.input_shapes]) - else: - inputlabels = 'multiple' - label = '%s\n|{input:|output:}|{{%s}|{%s}}' % (label, - inputlabels, - outputlabels) - - if not expand_nested or not isinstance(layer, Model): - node = pydot.Node(layer_id, label=label) - dot.add_node(node) - - # Connect nodes with edges. - for layer in layers: - layer_id = str(id(layer)) - for i, node in enumerate(layer._inbound_nodes): - node_key = layer.name + '_ib-' + str(i) - if node_key in model._network_nodes: - for inbound_layer in node.inbound_layers: - inbound_layer_id = str(id(inbound_layer)) - if not expand_nested: - assert dot.get_node(inbound_layer_id) - assert dot.get_node(layer_id) - dot.add_edge(pydot.Edge(inbound_layer_id, layer_id)) - else: - # if inbound_layer is not Model or wrapped Model - if not is_model(inbound_layer) and ( - not is_wrapped_model(inbound_layer)): - # if current layer is not Model or wrapped Model - if not is_model(layer) and ( - not is_wrapped_model(layer)): - assert dot.get_node(inbound_layer_id) - assert dot.get_node(layer_id) - dot.add_edge(pydot.Edge(inbound_layer_id, - layer_id)) - # if current layer is Model - elif is_model(layer): - add_edge(dot, inbound_layer_id, - sub_n_first_node.get_name()) - # if current layer is wrapped Model - elif is_wrapped_model(layer): - dot.add_edge(pydot.Edge(inbound_layer_id, - layer_id)) - dot.add_edge(pydot.Edge(layer_id, - sub_w_first_node.get_name())) - # if inbound_layer is Model - elif is_model(inbound_layer): - add_edge(dot, sub_n_last_node.get_name(), layer_id) - # if inbound_layer is wrapped Model - elif is_wrapped_model(inbound_layer): - add_edge(dot, sub_w_last_node.get_name(), layer_id) - return dot - - -def plot_model(model, - to_file='model.png', - show_shapes=False, - show_layer_names=True, - rankdir='TB', - expand_nested=False, - dpi=96): - """Converts a Keras model to dot format and save to a file. - - # Arguments - model: A Keras model instance - to_file: File name of the plot image. - show_shapes: whether to display shape information. - show_layer_names: whether to display layer names. - rankdir: `rankdir` argument passed to PyDot, - a string specifying the format of the plot: - 'TB' creates a vertical plot; - 'LR' creates a horizontal plot. - expand_nested: whether to expand nested models into clusters. - dpi: dot DPI. - - # Returns - A Jupyter notebook Image object if Jupyter is installed. - This enables in-line display of the model plots in notebooks. - """ - dot = model_to_dot(model, show_shapes, show_layer_names, rankdir, - expand_nested, dpi) - _, extension = os.path.splitext(to_file) - if not extension: - extension = 'png' - else: - extension = extension[1:] - dot.write(to_file, format=extension) - # Return the image as a Jupyter Image object, to be displayed in-line. - try: - from IPython import display - return display.Image(filename=to_file) - except ImportError: - pass -from __future__ import absolute_import - -from . import scikit_learn -"""Wrapper for using the Scikit-Learn API with Keras models. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import copy -import types - -import numpy as np - -from ..utils.np_utils import to_categorical -from ..utils.generic_utils import has_arg -from ..utils.generic_utils import to_list -from ..models import Sequential - - -class BaseWrapper(object): - """Base class for the Keras scikit-learn wrapper. - - Warning: This class should not be used directly. - Use descendant classes instead. - - # Arguments - build_fn: callable function or class instance - **sk_params: model parameters & fitting parameters - - The `build_fn` should construct, compile and return a Keras model, which - will then be used to fit/predict. One of the following - three values could be passed to `build_fn`: - 1. A function - 2. An instance of a class that implements the `__call__` method - 3. None. This means you implement a class that inherits from either - `KerasClassifier` or `KerasRegressor`. The `__call__` method of the - present class will then be treated as the default `build_fn`. - - `sk_params` takes both model parameters and fitting parameters. Legal model - parameters are the arguments of `build_fn`. Note that like all other - estimators in scikit-learn, `build_fn` should provide default values for - its arguments, so that you could create the estimator without passing any - values to `sk_params`. - - `sk_params` could also accept parameters for calling `fit`, `predict`, - `predict_proba`, and `score` methods (e.g., `epochs`, `batch_size`). - fitting (predicting) parameters are selected in the following order: - - 1. Values passed to the dictionary arguments of - `fit`, `predict`, `predict_proba`, and `score` methods - 2. Values passed to `sk_params` - 3. The default values of the `keras.models.Sequential` - `fit`, `predict`, `predict_proba` and `score` methods - - When using scikit-learn's `grid_search` API, legal tunable parameters are - those you could pass to `sk_params`, including fitting parameters. - In other words, you could use `grid_search` to search for the best - `batch_size` or `epochs` as well as the model parameters. - """ - - def __init__(self, build_fn=None, **sk_params): - self.build_fn = build_fn - self.sk_params = sk_params - self.check_params(sk_params) - - def check_params(self, params): - """Checks for user typos in `params`. - - # Arguments - params: dictionary; the parameters to be checked - - # Raises - ValueError: if any member of `params` is not a valid argument. - """ - legal_params_fns = [Sequential.fit, Sequential.predict, - Sequential.predict_classes, Sequential.evaluate] - if self.build_fn is None: - legal_params_fns.append(self.__call__) - elif (not isinstance(self.build_fn, types.FunctionType) and - not isinstance(self.build_fn, types.MethodType)): - legal_params_fns.append(self.build_fn.__call__) - else: - legal_params_fns.append(self.build_fn) - - for params_name in params: - for fn in legal_params_fns: - if has_arg(fn, params_name): - break - else: - if params_name != 'nb_epoch': - raise ValueError( - '{} is not a legal parameter'.format(params_name)) - - def get_params(self, **params): - """Gets parameters for this estimator. - - # Arguments - **params: ignored (exists for API compatibility). - - # Returns - Dictionary of parameter names mapped to their values. - """ - res = copy.deepcopy(self.sk_params) - res.update({'build_fn': self.build_fn}) - return res - - def set_params(self, **params): - """Sets the parameters of this estimator. - - # Arguments - **params: Dictionary of parameter names mapped to their values. - - # Returns - self - """ - self.check_params(params) - self.sk_params.update(params) - return self - - def fit(self, x, y, **kwargs): - """Constructs a new model with `build_fn` & fit the model to `(x, y)`. - - # Arguments - x : array-like, shape `(n_samples, n_features)` - Training samples where `n_samples` is the number of samples - and `n_features` is the number of features. - y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` - True labels for `x`. - **kwargs: dictionary arguments - Legal arguments are the arguments of `Sequential.fit` - - # Returns - history : object - details about the training history at each epoch. - """ - if self.build_fn is None: - self.model = self.__call__(**self.filter_sk_params(self.__call__)) - elif (not isinstance(self.build_fn, types.FunctionType) and - not isinstance(self.build_fn, types.MethodType)): - self.model = self.build_fn( - **self.filter_sk_params(self.build_fn.__call__)) - else: - self.model = self.build_fn(**self.filter_sk_params(self.build_fn)) - - loss_name = self.model.loss - if hasattr(loss_name, '__name__'): - loss_name = loss_name.__name__ - if loss_name == 'categorical_crossentropy' and len(y.shape) != 2: - y = to_categorical(y) - - fit_args = copy.deepcopy(self.filter_sk_params(Sequential.fit)) - fit_args.update(kwargs) - - history = self.model.fit(x, y, **fit_args) - - return history - - def filter_sk_params(self, fn, override=None): - """Filters `sk_params` and returns those in `fn`'s arguments. - - # Arguments - fn : arbitrary function - override: dictionary, values to override `sk_params` - - # Returns - res : dictionary containing variables - in both `sk_params` and `fn`'s arguments. - """ - override = override or {} - res = {} - for name, value in self.sk_params.items(): - if has_arg(fn, name): - res.update({name: value}) - res.update(override) - return res - - -class KerasClassifier(BaseWrapper): - """Implementation of the scikit-learn classifier API for Keras. - """ - - def fit(self, x, y, sample_weight=None, **kwargs): - """Constructs a new model with `build_fn` & fit the model to `(x, y)`. - - # Arguments - x : array-like, shape `(n_samples, n_features)` - Training samples where `n_samples` is the number of samples - and `n_features` is the number of features. - y : array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` - True labels for `x`. - **kwargs: dictionary arguments - Legal arguments are the arguments of `Sequential.fit` - - # Returns - history : object - details about the training history at each epoch. - - # Raises - ValueError: In case of invalid shape for `y` argument. - """ - y = np.array(y) - if len(y.shape) == 2 and y.shape[1] > 1: - self.classes_ = np.arange(y.shape[1]) - elif (len(y.shape) == 2 and y.shape[1] == 1) or len(y.shape) == 1: - self.classes_ = np.unique(y) - y = np.searchsorted(self.classes_, y) - else: - raise ValueError('Invalid shape for y: ' + str(y.shape)) - self.n_classes_ = len(self.classes_) - if sample_weight is not None: - kwargs['sample_weight'] = sample_weight - return super(KerasClassifier, self).fit(x, y, **kwargs) - - def predict(self, x, **kwargs): - """Returns the class predictions for the given test data. - - # Arguments - x: array-like, shape `(n_samples, n_features)` - Test samples where `n_samples` is the number of samples - and `n_features` is the number of features. - **kwargs: dictionary arguments - Legal arguments are the arguments - of `Sequential.predict_classes`. - - # Returns - preds: array-like, shape `(n_samples,)` - Class predictions. - """ - kwargs = self.filter_sk_params(Sequential.predict_classes, kwargs) - - proba = self.model.predict(x, **kwargs) - if proba.shape[-1] > 1: - classes = proba.argmax(axis=-1) - else: - classes = (proba > 0.5).astype('int32') - return self.classes_[classes] - - def predict_proba(self, x, **kwargs): - """Returns class probability estimates for the given test data. - - # Arguments - x: array-like, shape `(n_samples, n_features)` - Test samples where `n_samples` is the number of samples - and `n_features` is the number of features. - **kwargs: dictionary arguments - Legal arguments are the arguments - of `Sequential.predict_classes`. - - # Returns - proba: array-like, shape `(n_samples, n_outputs)` - Class probability estimates. - In the case of binary classification, - to match the scikit-learn API, - will return an array of shape `(n_samples, 2)` - (instead of `(n_sample, 1)` as in Keras). - """ - kwargs = self.filter_sk_params(Sequential.predict_proba, kwargs) - probs = self.model.predict(x, **kwargs) - - # check if binary classification - if probs.shape[1] == 1: - # first column is probability of class 0 and second is of class 1 - probs = np.hstack([1 - probs, probs]) - return probs - - def score(self, x, y, **kwargs): - """Returns the mean accuracy on the given test data and labels. - - # Arguments - x: array-like, shape `(n_samples, n_features)` - Test samples where `n_samples` is the number of samples - and `n_features` is the number of features. - y: array-like, shape `(n_samples,)` or `(n_samples, n_outputs)` - True labels for `x`. - **kwargs: dictionary arguments - Legal arguments are the arguments of `Sequential.evaluate`. - - # Returns - score: float - Mean accuracy of predictions on `x` wrt. `y`. - - # Raises - ValueError: If the underlying model isn't configured to - compute accuracy. You should pass `metrics=["accuracy"]` to - the `.compile()` method of the model. - """ - y = np.searchsorted(self.classes_, y) - kwargs = self.filter_sk_params(Sequential.evaluate, kwargs) - - loss_name = self.model.loss - if hasattr(loss_name, '__name__'): - loss_name = loss_name.__name__ - if loss_name == 'categorical_crossentropy' and len(y.shape) != 2: - y = to_categorical(y) - - outputs = self.model.evaluate(x, y, **kwargs) - outputs = to_list(outputs) - for name, output in zip(self.model.metrics_names, outputs): - if name == 'acc': - return output - raise ValueError('The model is not configured to compute accuracy. ' - 'You should pass `metrics=["accuracy"]` to ' - 'the `model.compile()` method.') - - -class KerasRegressor(BaseWrapper): - """Implementation of the scikit-learn regressor API for Keras. - """ - - def predict(self, x, **kwargs): - """Returns predictions for the given test data. - - # Arguments - x: array-like, shape `(n_samples, n_features)` - Test samples where `n_samples` is the number of samples - and `n_features` is the number of features. - **kwargs: dictionary arguments - Legal arguments are the arguments of `Sequential.predict`. - - # Returns - preds: array-like, shape `(n_samples,)` - Predictions. - """ - kwargs = self.filter_sk_params(Sequential.predict, kwargs) - return np.squeeze(self.model.predict(x, **kwargs), axis=-1) - - def score(self, x, y, **kwargs): - """Returns the mean loss on the given test data and labels. - - # Arguments - x: array-like, shape `(n_samples, n_features)` - Test samples where `n_samples` is the number of samples - and `n_features` is the number of features. - y: array-like, shape `(n_samples,)` - True labels for `x`. - **kwargs: dictionary arguments - Legal arguments are the arguments of `Sequential.evaluate`. - - # Returns - score: float - Mean accuracy of predictions on `x` wrt. `y`. - """ - kwargs = self.filter_sk_params(Sequential.evaluate, kwargs) - loss = self.model.evaluate(x, y, **kwargs) - if isinstance(loss, list): - return -loss[0] - return -loss -import os -from markdown import markdown -from docs import autogen -import pytest - -test_doc1 = { - 'doc': """Base class for recurrent layers. - - # Arguments - cell: A RNN cell instance. A RNN cell is a class that has: - - a `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. The call method of the - cell can also take the optional argument `constants`, see - section "Note on passing external constants" below. - - a `state_size` attribute. This can be a single integer - (single state) in which case it is - the size of the recurrent state - (which should be the same as the size of the cell output). - This can also be a list/tuple of integers - (one size per state). In this case, the first entry - (`state_size[0]`) should be the same as - the size of the cell output. - It is also possible for `cell` to be a list of RNN cell instances, - in which cases the cells get stacked on after the other in the RNN, - implementing an efficient stacked RNN. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - input_dim: dimensionality of the input (integer). - This argument (or alternatively, - the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - input_length: Length of input sequences, to be specified - when it is constant. - This argument is required if you are going to connect - `Flatten` then `Dense` layers upstream - (without it, the shape of the dense outputs cannot be computed). - Note that if the recurrent layer is not the first layer - in your model, you would need to specify the input length - at the level of the first layer - (e.g. via the `input_shape` argument) - - # Input shape - 3D tensor with shape `(batch_size, timesteps, input_dim)`. - - # Output shape - - if `return_state`: a list of tensors. The first tensor is - the output. The remaining tensors are the last states, - each with shape `(batch_size, units)`. - - if `return_sequences`: 3D tensor with shape - `(batch_size, timesteps, units)`. - - else, 2D tensor with shape `(batch_size, units)`. - - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an [Embedding](embeddings.md) layer with the `mask_zero` parameter - set to `True`. - - # Note on using statefulness in RNNs - You can set RNN layers to be 'stateful', which means that the states - computed for the samples in one batch will be reused as initial states - for the samples in the next batch. This assumes a one-to-one mapping - between samples in different successive batches. - - To enable statefulness: - - specify `stateful=True` in the layer constructor. - - specify a fixed batch size for your model, by passing - if sequential model: - `batch_input_shape=(...)` to the first layer in your model. - else for functional model with 1 or more Input layers: - `batch_shape=(...)` to all the first layers in your model. - This is the expected shape of your inputs - *including the batch size*. - It should be a tuple of integers, e.g. `(32, 10, 100)`. - - specify `shuffle=False` when calling fit(). - - To reset the states of your model, call `.reset_states()` on either - a specific layer, or on your entire model. - - # Note on specifying the initial state of RNNs - Note: that - One: You can specify the initial state of RNN layers symbolically by - calling them with the keyword argument `initial_state`. - Two: The value of `initial_state` should be a tensor or list of - tensors representing - the initial state of the RNN layer. - You can specify the initial state of RNN layers numerically by: - One: calling `reset_states` - - With the keyword argument `states`. - - The value of - `states` should be a numpy array or - list of numpy arrays representing - the initial state of the RNN layer. - - # Note on passing external constants to RNNs - You can pass "external" constants to the cell using the `constants` - keyword: argument of `RNN.__call__` (as well as `RNN.call`) method. - This: requires that the `cell.call` method accepts the same keyword argument - `constants`. Such constants can be used to condition the cell - transformation on additional static inputs (not changing over time), - a.k.a. an attention mechanism. - - # Examples - - ```python - # First, let's define a RNN Cell, as a layer subclass. - - class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = K.dot(inputs, self.kernel) - output = h + K.dot(prev_output, self.recurrent_kernel) - return output, [output] - - # Let's use this cell in a RNN layer: - - cell = MinimalRNNCell(32) - x = keras.Input((None, 5)) - layer = RNN(cell) - y = layer(x) - - # Here's how to use the cell to build a stacked RNN: - - cells = [MinimalRNNCell(32), MinimalRNNCell(64)] - x = keras.Input((None, 5)) - layer = RNN(cells) - y = layer(x) - ``` - """, - 'result': '''Base class for recurrent layers. - -__Arguments__ - -- __cell__: A RNN cell instance. A RNN cell is a class that has: - - a `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. The call method of the - cell can also take the optional argument `constants`, see - section "Note on passing external constants" below. - - a `state_size` attribute. This can be a single integer - (single state) in which case it is - the size of the recurrent state - (which should be the same as the size of the cell output). - This can also be a list/tuple of integers - (one size per state). In this case, the first entry - (`state_size[0]`) should be the same as - the size of the cell output. - - It is also possible for `cell` to be a list of RNN cell instances, - in which cases the cells get stacked on after the other in the RNN, - implementing an efficient stacked RNN. - -- __return_sequences__: Boolean. Whether to return the last output - in the output sequence, or the full sequence. -- __return_state__: Boolean. Whether to return the last state - in addition to the output. -- __go_backwards__: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. -- __stateful__: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. -- __unroll__: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. -- __input_dim__: dimensionality of the input (integer). - This argument (or alternatively, - the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. -- __input_length__: Length of input sequences, to be specified - when it is constant. - This argument is required if you are going to connect - `Flatten` then `Dense` layers upstream - (without it, the shape of the dense outputs cannot be computed). - Note that if the recurrent layer is not the first layer - in your model, you would need to specify the input length - at the level of the first layer - (e.g. via the `input_shape` argument) - -__Input shape__ - -3D tensor with shape `(batch_size, timesteps, input_dim)`. - -__Output shape__ - -- if `return_state`: a list of tensors. The first tensor is - the output. The remaining tensors are the last states, - each with shape `(batch_size, units)`. -- if `return_sequences`: 3D tensor with shape - `(batch_size, timesteps, units)`. -- else, 2D tensor with shape `(batch_size, units)`. - -__Masking__ - -This layer supports masking for input data with a variable number -of timesteps. To introduce masks to your data, -use an [Embedding](embeddings.md) layer with the `mask_zero` parameter -set to `True`. - -__Note on using statefulness in RNNs__ - -You can set RNN layers to be 'stateful', which means that the states -computed for the samples in one batch will be reused as initial states -for the samples in the next batch. This assumes a one-to-one mapping -between samples in different successive batches. - -To enable statefulness: -- specify `stateful=True` in the layer constructor. -- specify a fixed batch size for your model, by passing -if sequential model: -`batch_input_shape=(...)` to the first layer in your model. -else for functional model with 1 or more Input layers: -`batch_shape=(...)` to all the first layers in your model. -This is the expected shape of your inputs -*including the batch size*. -It should be a tuple of integers, e.g. `(32, 10, 100)`. -- specify `shuffle=False` when calling fit(). - -To reset the states of your model, call `.reset_states()` on either -a specific layer, or on your entire model. - -__Note on specifying the initial state of RNNs__ - -Note: that -- __One__: You can specify the initial state of RNN layers symbolically by - calling them with the keyword argument `initial_state`. -- __Two__: The value of `initial_state` should be a tensor or list of - tensors representing - the initial state of the RNN layer. - -You can specify the initial state of RNN layers numerically by: - -- __One__: calling `reset_states` - - With the keyword argument `states`. - - The value of - - `states` should be a numpy array or - list of numpy arrays representing - -the initial state of the RNN layer. - -__Note on passing external constants to RNNs__ - -You can pass "external" constants to the cell using the `constants` -- __keyword__: argument of `RNN.__call__` (as well as `RNN.call`) method. -- __This__: requires that the `cell.call` method accepts the same keyword argument - -`constants`. Such constants can be used to condition the cell -transformation on additional static inputs (not changing over time), -a.k.a. an attention mechanism. - -__Examples__ - - -```python -# First, let's define a RNN Cell, as a layer subclass. - -class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = K.dot(inputs, self.kernel) - output = h + K.dot(prev_output, self.recurrent_kernel) - return output, [output] - -# Let's use this cell in a RNN layer: - -cell = MinimalRNNCell(32) -x = keras.Input((None, 5)) -layer = RNN(cell) -y = layer(x) - -# Here's how to use the cell to build a stacked RNN: - -cells = [MinimalRNNCell(32), MinimalRNNCell(64)] -x = keras.Input((None, 5)) -layer = RNN(cells) -y = layer(x) -``` -'''} - - -test_doc_with_arguments_as_last_block = { - 'doc': """Base class for recurrent layers. - - # Arguments - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - """, - 'result': '''Base class for recurrent layers. - -__Arguments__ - -- __return_sequences__: Boolean. Whether to return the last output - in the output sequence, or the full sequence. -- __return_state__: Boolean. Whether to return the last state - in addition to the output. -'''} - - -@pytest.mark.parametrize('docs_descriptor', [ - test_doc1, - test_doc_with_arguments_as_last_block, -]) -def test_doc_lists(docs_descriptor): - docstring = autogen.process_docstring(docs_descriptor['doc']) - assert markdown(docstring) == markdown(docs_descriptor['result']) - - -dummy_docstring = """Multiplies 2 tensors (and/or variables) and returns a *tensor*. - - When attempting to multiply a nD tensor - with a nD tensor, it reproduces the Theano behavior. - (e.g. `(2, 3) * (4, 3, 5) -> (2, 4, 5)`) - - # Examples - ```python - # Theano-like behavior example - >>> x = K.random_uniform_variable(shape=(2, 3), low=0, high=1) - >>> y = K.ones((4, 3, 5)) - >>> xy = K.dot(x, y) - >>> K.int_shape(xy) - (2, 4, 5) - ``` - - # Numpy implementation - ```python - def dot(x, y): - return dot(x, y) - ``` - """ - - -def test_doc_multiple_sections_code(): - """ Checks that we can have code blocks in multiple sections.""" - generated = autogen.process_docstring(dummy_docstring) - assert '# Theano-like behavior example' in generated - assert 'def dot(x, y):' in generated - - -def test_docs_in_custom_destination_dir(tmpdir): - autogen.generate(tmpdir) - assert os.path.isdir(os.path.join(tmpdir, 'layers')) - assert os.path.isdir(os.path.join(tmpdir, 'models')) - assert os.path.isdir(os.path.join(tmpdir, 'examples')) - assert os.listdir(os.path.join(tmpdir, 'examples')) - - -def test_module_name(): - for page in autogen.PAGES: - list_of_classes = autogen.read_page_data(page, 'classes') - for element in list_of_classes: - if isinstance(element, (list, tuple)): - cls = element[0] - else: - cls = element - signature = autogen.get_class_signature(cls) - assert signature.startswith('keras.') - - list_of_functions = autogen.read_page_data(page, 'functions') - for function_ in list_of_functions: - signature = autogen.get_function_signature(function_) - assert signature.startswith('keras.') - - -if __name__ == '__main__': - pytest.main([__file__]) -import importlib -import inspect -import re -import sys -from itertools import compress - -import pytest - -modules = ['keras.layers', 'keras.models', 'keras', - 'keras.backend.tensorflow_backend', 'keras.engine', - 'keras.wrappers', 'keras.utils', - 'keras.callbacks', 'keras.activations', - 'keras.losses', 'keras.models', 'keras.optimizers'] -accepted_name = ['from_config'] -accepted_module = ['keras.legacy.layers', 'keras.utils.generic_utils'] - -# Functions or classes with less than 'MIN_CODE_SIZE' lines can be ignored -MIN_CODE_SIZE = 10 - - -def handle_class_init(name, member): - init_args = [ - arg for arg in list(inspect.signature(member.__init__).parameters.keys()) - if arg not in ['self', 'args', 'kwargs'] - ] - assert_args_presence(init_args, member.__doc__, member, name) - - -def handle_class(name, member): - if is_accepted(name, member): - return - - if member.__doc__ is None and not member_too_small(member): - raise ValueError("{} class doesn't have any documentation".format(name), - member.__module__, inspect.getmodule(member).__file__) - - handle_class_init(name, member) - - for n, met in inspect.getmembers(member): - if inspect.ismethod(met): - handle_method(n, met) - - -def handle_function(name, member): - if is_accepted(name, member) or member_too_small(member): - # We don't need to check this one. - return - doc = member.__doc__ - if doc is None: - raise ValueError("{} function doesn't have any documentation".format(name), - member.__module__, inspect.getmodule(member).__file__) - - args = list(inspect.signature(member).parameters.keys()) - assert_args_presence(args, doc, member, name) - assert_function_style(name, member, doc, args) - assert_doc_style(name, member, doc) - - -def assert_doc_style(name, member, doc): - lines = doc.split("\n") - first_line = lines[0] - if len(first_line.strip()) == 0: - raise ValueError( - "{} the documentation should be on the first line.".format(name), - member.__module__) - if first_line.strip()[-1] != '.': - raise ValueError("{} first line should end with a '.'".format(name), - member.__module__) - - -def assert_function_style(name, member, doc, args): - code = inspect.getsource(member) - has_return = re.findall(r"\s*return \S+", code, re.MULTILINE) - if has_return and "# Returns" not in doc: - innerfunction = [inspect.getsource(x) for x in member.__code__.co_consts if - inspect.iscode(x)] - return_in_sub = [ret for code_inner in innerfunction for ret in - re.findall(r"\s*return \S+", code_inner, re.MULTILINE)] - if len(return_in_sub) < len(has_return): - raise ValueError("{} needs a '# Returns' section".format(name), - member.__module__) - - has_raise = re.findall(r"^\s*raise \S+", code, re.MULTILINE) - if has_raise and "# Raises" not in doc: - innerfunction = [inspect.getsource(x) for x in member.__code__.co_consts if - inspect.iscode(x)] - raise_in_sub = [ret for code_inner in innerfunction for ret in - re.findall(r"\s*raise \S+", code_inner, re.MULTILINE)] - if len(raise_in_sub) < len(has_raise): - raise ValueError("{} needs a '# Raises' section".format(name), - member.__module__) - - if len(args) > 0 and "# Arguments" not in doc: - raise ValueError("{} needs a '# Arguments' section".format(name), - member.__module__) - - assert_blank_before(name, member, doc, [ - '# Arguments', '# Raises', '# Returns']) - - -def assert_blank_before(name, member, doc, keywords): - doc_lines = [x.strip() for x in doc.split('\n')] - for keyword in keywords: - if keyword in doc_lines: - index = doc_lines.index(keyword) - if doc_lines[index - 1] != '': - raise ValueError( - "{} '{}' should have a blank line above.".format( - name, keyword), - member.__module__) - - -def is_accepted(name, member): - if 'keras' not in str(member.__module__): - return True - return name in accepted_name or member.__module__ in accepted_module - - -def member_too_small(member): - code = inspect.getsource(member).split('\n') - return len(code) < MIN_CODE_SIZE - - -def assert_args_presence(args, doc, member, name): - args_not_in_doc = [arg not in doc for arg in args] - if any(args_not_in_doc): - raise ValueError( - "{} {} arguments are not present in documentation ".format(name, list( - compress(args, args_not_in_doc))), member.__module__) - words = doc.replace('*', '').split() - # Check arguments styling - styles = [arg + ":" not in words for arg in args] - if any(styles): - raise ValueError( - "{} {} are not style properly 'argument': documentation".format( - name, - list(compress(args, styles))), - member.__module__) - - # Check arguments order - indexes = [words.index(arg + ":") for arg in args] - if indexes != sorted(indexes): - raise ValueError( - "{} arguments order is different from the documentation".format( - name), - member.__module__) - - -def handle_method(name, member): - if name in accepted_name or member.__module__ in accepted_module: - return - handle_function(name, member) - - -def handle_module(mod): - for name, mem in inspect.getmembers(mod): - if inspect.isclass(mem): - handle_class(name, mem) - elif inspect.isfunction(mem): - handle_function(name, mem) - elif 'keras' in name and inspect.ismodule(mem): - # Only test keras' modules - handle_module(mem) - - -@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3") -def test_doc(): - for module in modules: - mod = importlib.import_module(module) - handle_module(mod) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import random -import os -from multiprocessing import Process, Queue -from keras import applications -from keras import backend as K - - -MODEL_LIST = [ - (applications.ResNet50, 2048), - (applications.VGG16, 512), - (applications.VGG19, 512), - (applications.Xception, 2048), - (applications.InceptionV3, 2048), - (applications.InceptionResNetV2, 1536), - (applications.MobileNet, 1024), - (applications.MobileNetV2, 1280), - (applications.DenseNet121, 1024), - (applications.DenseNet169, 1664), - (applications.DenseNet201, 1920), - # Note that NASNetLarge is too heavy to test on Travis. - (applications.NASNetMobile, 1056) -] - - -def _get_output_shape(model_fn): - if K.backend() == 'cntk': - # Create model in a subprocess so that - # the memory consumed by InceptionResNetV2 will be - # released back to the system after this test - # (to deal with OOM error on CNTK backend). - # TODO: remove the use of multiprocessing from these tests - # once a memory clearing mechanism - # is implemented in the CNTK backend. - def target(queue): - model = model_fn() - queue.put(model.output_shape) - queue = Queue() - p = Process(target=target, args=(queue,)) - p.start() - p.join() - # The error in a subprocess won't propagate - # to the main process, so we check if the model - # is successfully created by checking if the output shape - # has been put into the queue - assert not queue.empty(), 'Model creation failed.' - return queue.get_nowait() - else: - model = model_fn() - return model.output_shape - - -def _test_application_basic(app, last_dim=1000): - output_shape = _get_output_shape(lambda: app(weights=None)) - assert output_shape == (None, last_dim) - - -def _test_application_notop(app, last_dim): - output_shape = _get_output_shape( - lambda: app(weights=None, include_top=False)) - assert len(output_shape) == 4 - assert output_shape[-1] == last_dim - - -def test_mobilenet_v2_legacy_import(): - from keras.applications import mobilenetv2 - assert hasattr(mobilenetv2, 'MobileNetV2') - from keras.applications import mobilenet_v2 - assert hasattr(mobilenet_v2, 'MobileNetV2') - - -def test_applications(): - for _ in range(3): - app, last_dim = random.choice(MODEL_LIST) - _test_application_basic(app) - _test_application_notop(app, last_dim) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras.applications import imagenet_utils as utils -from keras.models import Model -from keras.layers import Input, Lambda - - -def test_preprocess_input(): - # Test image batch with float and int image input - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - xint = x.astype('int32') - assert utils.preprocess_input(x).shape == x.shape - assert utils.preprocess_input(xint).shape == xint.shape - - out1 = utils.preprocess_input(x, 'channels_last') - out1int = utils.preprocess_input(xint, 'channels_last') - out2 = utils.preprocess_input(np.transpose(x, (0, 3, 1, 2)), - 'channels_first') - out2int = utils.preprocess_input(np.transpose(xint, (0, 3, 1, 2)), - 'channels_first') - assert_allclose(out1, out2.transpose(0, 2, 3, 1)) - assert_allclose(out1int, out2int.transpose(0, 2, 3, 1)) - - # Test single image - x = np.random.uniform(0, 255, (10, 10, 3)) - xint = x.astype('int32') - assert utils.preprocess_input(x).shape == x.shape - assert utils.preprocess_input(xint).shape == xint.shape - - out1 = utils.preprocess_input(x, 'channels_last') - out1int = utils.preprocess_input(xint, 'channels_last') - out2 = utils.preprocess_input(np.transpose(x, (2, 0, 1)), - 'channels_first') - out2int = utils.preprocess_input(np.transpose(xint, (2, 0, 1)), - 'channels_first') - assert_allclose(out1, out2.transpose(1, 2, 0)) - assert_allclose(out1int, out2int.transpose(1, 2, 0)) - - # Test that writing over the input data works predictably - for mode in ['torch', 'tf']: - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - xint = x.astype('int') - x2 = utils.preprocess_input(x, mode=mode) - xint2 = utils.preprocess_input(xint) - assert_allclose(x, x2) - assert xint.astype('float').max() != xint2.max() - # Caffe mode works differently from the others - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - xint = x.astype('int') - x2 = utils.preprocess_input(x, data_format='channels_last', mode='caffe') - xint2 = utils.preprocess_input(xint) - assert_allclose(x, x2[..., ::-1]) - assert xint.astype('float').max() != xint2.max() - - -def test_preprocess_input_symbolic(): - # Test image batch - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - inputs = Input(shape=x.shape[1:]) - outputs = Lambda(utils.preprocess_input, output_shape=x.shape[1:])(inputs) - model = Model(inputs, outputs) - assert model.predict(x).shape == x.shape - - outputs1 = Lambda(lambda x: utils.preprocess_input(x, 'channels_last'), - output_shape=x.shape[1:])(inputs) - model1 = Model(inputs, outputs1) - out1 = model1.predict(x) - x2 = np.transpose(x, (0, 3, 1, 2)) - inputs2 = Input(shape=x2.shape[1:]) - outputs2 = Lambda(lambda x: utils.preprocess_input(x, 'channels_first'), - output_shape=x2.shape[1:])(inputs2) - model2 = Model(inputs2, outputs2) - out2 = model2.predict(x2) - assert_allclose(out1, out2.transpose(0, 2, 3, 1)) - - # Test single image - x = np.random.uniform(0, 255, (10, 10, 3)) - inputs = Input(shape=x.shape) - outputs = Lambda(utils.preprocess_input, output_shape=x.shape)(inputs) - model = Model(inputs, outputs) - assert model.predict(x[np.newaxis])[0].shape == x.shape - - outputs1 = Lambda(lambda x: utils.preprocess_input(x, 'channels_last'), - output_shape=x.shape)(inputs) - model1 = Model(inputs, outputs1) - out1 = model1.predict(x[np.newaxis])[0] - x2 = np.transpose(x, (2, 0, 1)) - inputs2 = Input(shape=x2.shape) - outputs2 = Lambda(lambda x: utils.preprocess_input(x, 'channels_first'), - output_shape=x2.shape)(inputs2) - model2 = Model(inputs2, outputs2) - out2 = model2.predict(x2[np.newaxis])[0] - assert_allclose(out1, out2.transpose(1, 2, 0)) - - -def test_decode_predictions(): - x = np.zeros((2, 1000)) - x[0, 372] = 1.0 - x[1, 549] = 1.0 - outs = utils.decode_predictions(x, top=1) - scores = [out[0][2] for out in outs] - assert scores[0] == scores[1] - - # the numbers of columns and ImageNet classes are not identical. - with pytest.raises(ValueError): - utils.decode_predictions(np.ones((2, 100))) - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import print_function -import pytest -import time -import random -from keras.datasets import cifar10 -from keras.datasets import cifar100 -from keras.datasets import reuters -from keras.datasets import imdb -from keras.datasets import mnist -from keras.datasets import boston_housing -from keras.datasets import fashion_mnist - - -def test_cifar(): - # only run data download tests 20% of the time - # to speed up frequent testing - random.seed(time.time()) - if random.random() > 0.8: - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - assert len(x_train) == len(y_train) == 50000 - assert len(x_test) == len(y_test) == 10000 - (x_train, y_train), (x_test, y_test) = cifar100.load_data('fine') - assert len(x_train) == len(y_train) == 50000 - assert len(x_test) == len(y_test) == 10000 - (x_train, y_train), (x_test, y_test) = cifar100.load_data('coarse') - assert len(x_train) == len(y_train) == 50000 - assert len(x_test) == len(y_test) == 10000 - - -def test_reuters(): - # only run data download tests 20% of the time - # to speed up frequent testing - random.seed(time.time()) - if random.random() > 0.8: - (x_train, y_train), (x_test, y_test) = reuters.load_data() - assert len(x_train) == len(y_train) - assert len(x_test) == len(y_test) - assert len(x_train) + len(x_test) == 11228 - (x_train, y_train), (x_test, y_test) = reuters.load_data(maxlen=10) - assert len(x_train) == len(y_train) - assert len(x_test) == len(y_test) - word_index = reuters.get_word_index() - assert isinstance(word_index, dict) - - -def test_mnist(): - # only run data download tests 20% of the time - # to speed up frequent testing - random.seed(time.time()) - if random.random() > 0.8: - (x_train, y_train), (x_test, y_test) = mnist.load_data() - assert len(x_train) == len(y_train) == 60000 - assert len(x_test) == len(y_test) == 10000 - - -def test_imdb(): - # only run data download tests 20% of the time - # to speed up frequent testing - random.seed(time.time()) - if random.random() > 0.8: - (x_train, y_train), (x_test, y_test) = imdb.load_data() - (x_train, y_train), (x_test, y_test) = imdb.load_data(maxlen=40) - assert len(x_train) == len(y_train) - assert len(x_test) == len(y_test) - word_index = imdb.get_word_index() - assert isinstance(word_index, dict) - - -def test_boston_housing(): - # only run data download tests 20% of the time - # to speed up frequent testing - random.seed(time.time()) - if random.random() > 0.8: - (x_train, y_train), (x_test, y_test) = boston_housing.load_data() - assert len(x_train) == len(y_train) - assert len(x_test) == len(y_test) - - -def test_fashion_mnist(): - # only run data download tests 20% of the time - # to speed up frequent testing - random.seed(time.time()) - if random.random() > 0.8: - (x_train, y_train), (x_test, y_test) = fashion_mnist.load_data() - assert len(x_train) == len(y_train) == 60000 - assert len(x_test) == len(y_test) == 10000 - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import print_function -import numpy as np -import pytest - -from keras.preprocessing.image import ImageDataGenerator -from keras.utils.test_utils import get_test_data -from keras.models import Sequential -from keras import layers -from keras.utils.np_utils import to_categorical - - -def test_image_classification(): - np.random.seed(1337) - input_shape = (16, 16, 3) - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, - num_test=200, - input_shape=input_shape, - classification=True, - num_classes=4) - y_train = to_categorical(y_train) - y_test = to_categorical(y_test) - - model = Sequential([ - layers.Conv2D(filters=8, kernel_size=3, - activation='relu', - input_shape=input_shape), - layers.MaxPooling2D(pool_size=2), - layers.Conv2D(filters=4, kernel_size=(3, 3), - activation='relu', padding='same'), - layers.GlobalAveragePooling2D(), - layers.Dense(y_test.shape[-1], activation='softmax') - ]) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - model.summary() - history = model.fit(x_train, y_train, epochs=10, batch_size=16, - validation_data=(x_test, y_test), - verbose=0) - assert history.history['val_acc'][-1] > 0.75 - config = model.get_config() - model = Sequential.from_config(config) - - -def test_image_data_generator_training(): - np.random.seed(1337) - img_gen = ImageDataGenerator(rescale=1.) # Dummy ImageDataGenerator - input_shape = (16, 16, 3) - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, - num_test=200, - input_shape=input_shape, - classification=True, - num_classes=4) - y_train = to_categorical(y_train) - y_test = to_categorical(y_test) - - model = Sequential([ - layers.Conv2D(filters=8, kernel_size=3, - activation='relu', - input_shape=input_shape), - layers.MaxPooling2D(pool_size=2), - layers.Conv2D(filters=4, kernel_size=(3, 3), - activation='relu', padding='same'), - layers.GlobalAveragePooling2D(), - layers.Dense(y_test.shape[-1], activation='softmax') - ]) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - history = model.fit_generator(img_gen.flow(x_train, y_train, batch_size=16), - epochs=10, - validation_data=img_gen.flow(x_test, y_test, - batch_size=16), - verbose=0) - assert history.history['val_acc'][-1] > 0.75 - model.evaluate_generator(img_gen.flow(x_train, y_train, batch_size=16)) - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import print_function -import numpy as np -import pytest -import string - -from keras.utils.test_utils import get_test_data -from keras.utils.np_utils import to_categorical -from keras.models import Sequential -from keras import layers, optimizers -import keras.backend as K -import keras - - -def test_temporal_classification(): - ''' - Classify temporal sequences of float numbers - of length 3 into 2 classes using - single layer of GRU units and softmax applied - to the last activations of the units - ''' - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=200, - num_test=20, - input_shape=(3, 4), - classification=True, - num_classes=2) - y_train = to_categorical(y_train) - y_test = to_categorical(y_test) - - model = Sequential() - model.add(layers.GRU(8, - input_shape=(x_train.shape[1], x_train.shape[2]))) - model.add(layers.Dense(y_train.shape[-1], activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - model.summary() - history = model.fit(x_train, y_train, epochs=4, batch_size=10, - validation_data=(x_test, y_test), - verbose=0) - assert(history.history['acc'][-1] >= 0.8) - config = model.get_config() - model = Sequential.from_config(config) - - -def test_temporal_classification_functional(): - ''' - Classify temporal sequences of float numbers - of length 3 into 2 classes using - single layer of GRU units and softmax applied - to the last activations of the units - ''' - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=200, - num_test=20, - input_shape=(3, 4), - classification=True, - num_classes=2) - y_train = to_categorical(y_train) - y_test = to_categorical(y_test) - - inputs = layers.Input(shape=(x_train.shape[1], x_train.shape[2])) - x = layers.SimpleRNN(8)(inputs) - outputs = layers.Dense(y_train.shape[-1], activation='softmax')(x) - model = keras.models.Model(inputs, outputs) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - history = model.fit(x_train, y_train, epochs=4, batch_size=10, - validation_data=(x_test, y_test), - verbose=0) - assert(history.history['acc'][-1] >= 0.8) - - -def test_temporal_regression(): - ''' - Predict float numbers (regression) based on sequences - of float numbers of length 3 using a single layer of GRU units - ''' - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=200, - num_test=20, - input_shape=(3, 5), - output_shape=(2,), - classification=False) - model = Sequential() - model.add(layers.LSTM(y_train.shape[-1], - input_shape=(x_train.shape[1], x_train.shape[2]))) - model.compile(loss='hinge', optimizer='adam') - history = model.fit(x_train, y_train, epochs=5, batch_size=16, - validation_data=(x_test, y_test), verbose=0) - assert(history.history['loss'][-1] < 1.) - - -def test_3d_to_3d(): - ''' - Apply a same Dense layer for each element of time dimension of the input - and make predictions of the output sequence elements. - This does not make use of the temporal structure of the sequence - (see TimeDistributedDense for more details) - ''' - np.random.seed(1337) - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=100, - num_test=20, - input_shape=(3, 5), - output_shape=(3, 5), - classification=False) - - model = Sequential() - model.add(layers.TimeDistributed( - layers.Dense(y_train.shape[-1]), input_shape=x_train.shape[1:3])) - model.compile(loss='hinge', optimizer='rmsprop') - history = model.fit(x_train, y_train, epochs=20, batch_size=16, - validation_data=(x_test, y_test), verbose=0) - assert(history.history['loss'][-1] < 1.) - - -def test_stacked_lstm_char_prediction(): - ''' - Learn alphabetical char sequence with stacked LSTM. - Predict the whole alphabet based on the first two letters ('ab' -> 'ab...z') - See non-toy example in examples/lstm_text_generation.py - ''' - # generate alphabet: - # http://stackoverflow.com/questions/16060899/alphabet-range-python - alphabet = string.ascii_lowercase - number_of_chars = len(alphabet) - - # generate char sequences of length 'sequence_length' out of alphabet and - # store the next char as label (e.g. 'ab'->'c') - sequence_length = 2 - sentences = [alphabet[i: i + sequence_length] - for i in range(len(alphabet) - sequence_length)] - next_chars = [alphabet[i + sequence_length] - for i in range(len(alphabet) - sequence_length)] - - # Transform sequences and labels into 'one-hot' encoding - x = np.zeros((len(sentences), sequence_length, - number_of_chars), dtype=np.bool) - y = np.zeros((len(sentences), number_of_chars), dtype=np.bool) - for i, sentence in enumerate(sentences): - for t, char in enumerate(sentence): - x[i, t, ord(char) - ord('a')] = 1 - y[i, ord(next_chars[i]) - ord('a')] = 1 - - # learn the alphabet with stacked LSTM - model = Sequential([ - layers.LSTM(16, return_sequences=True, - input_shape=(sequence_length, number_of_chars)), - layers.LSTM(16, return_sequences=False), - layers.Dense(number_of_chars, activation='softmax') - ]) - model.compile(loss='categorical_crossentropy', optimizer='adam') - model.fit(x, y, batch_size=1, epochs=60, verbose=1) - - # prime the model with 'ab' sequence and let it generate the learned alphabet - sentence = alphabet[:sequence_length] - generated = sentence - for iteration in range(number_of_chars - sequence_length): - x = np.zeros((1, sequence_length, number_of_chars)) - for t, char in enumerate(sentence): - x[0, t, ord(char) - ord('a')] = 1. - preds = model.predict(x, verbose=0)[0] - next_char = chr(np.argmax(preds) + ord('a')) - generated += next_char - sentence = sentence[1:] + next_char - - # check that it did generate the alphabet correctly - assert(generated == alphabet) - - -def test_masked_temporal(): - ''' - Confirm that even with masking on both inputs and outputs, cross-entropies are - of the expected scale. - - In this task, there are variable length inputs of integers from 1-9, and a random - subset of unmasked outputs. Each of these outputs has a 50% probability of being - the input number unchanged, and a 50% probability of being 2*input%10. - - The ground-truth best cross-entropy loss should, then be -log(0.5) = 0.69 - - ''' - np.random.seed(1338) - - model = Sequential() - model.add(layers.Embedding(10, 10, mask_zero=True)) - model.add(layers.Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer='adam') - - x = np.random.randint(1, 10, size=(20000, 10)) - for rowi in range(x.shape[0]): - padding = np.random.randint(0, x.shape[1] / 2 + 1) - x[rowi, :padding] = 0 - - # 50% of the time the correct output is the input. - # The other 50% of the time it's 2 * input % 10 - y = (x * np.random.randint(1, 3, size=x.shape)) % 10 - ys = np.zeros((y.size, 10), dtype='int32') - for i, target in enumerate(y.flat): - ys[i, target] = 1 - ys = ys.reshape(y.shape + (10,)) - - history = model.fit(x, ys, validation_split=0.05, batch_size=10, - verbose=0, epochs=3) - ground_truth = -np.log(0.5) - assert(np.abs(history.history['loss'][-1] - ground_truth) < 0.06) - - -@pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TF backend') -def test_embedding_with_clipnorm(): - model = Sequential() - model.add(layers.Embedding(input_dim=1, output_dim=1)) - model.compile(optimizer=optimizers.SGD(clipnorm=0.1), loss='mse') - model.fit(np.array([[0]]), np.array([[[0.5]]]), epochs=1) - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import print_function - -import os -import tempfile -import pytest -import keras -from keras import layers -from keras.utils.test_utils import get_test_data - - -@pytest.mark.skipif(keras.backend.backend() != 'tensorflow', - reason='Requires TF backend') -def test_tf_optimizer(): - import tensorflow as tf - - num_hidden = 10 - output_dim = 2 - input_dim = 10 - target = 0.8 - optimizer = tf.train.AdadeltaOptimizer( - learning_rate=1., rho=0.95, epsilon=1e-08) - - (x_train, y_train), (x_test, y_test) = get_test_data( - num_train=1000, num_test=200, - input_shape=(input_dim,), - classification=True, num_classes=output_dim) - - model = keras.Sequential() - model.add(layers.Dense(num_hidden, - activation='relu', - input_shape=(input_dim,))) - model.add(layers.Dense(output_dim, activation='softmax')) - - model.compile(loss='sparse_categorical_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - history = model.fit(x_train, y_train, epochs=8, batch_size=16, - validation_data=(x_test, y_test), verbose=2) - assert history.history['val_acc'][-1] >= target - - # Test saving. - _, fname = tempfile.mkstemp('.h5') - model.save(fname) - model = keras.models.load_model(fname) - assert len(model.weights) == 4 - os.remove(fname) - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import print_function -import pytest - -from keras.utils.test_utils import get_test_data -from keras.models import Sequential -from keras import layers -import keras -from keras.utils.np_utils import to_categorical - -num_classes = 2 - - -def test_vector_classification(): - ''' - Classify random float vectors into 2 classes with logistic regression - using 2 layer neural network with ReLU hidden units. - ''' - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, - num_test=200, - input_shape=(20,), - classification=True, - num_classes=num_classes) - y_train = to_categorical(y_train) - y_test = to_categorical(y_test) - - # Test with Sequential API - model = Sequential([ - layers.Dense(16, input_shape=(x_train.shape[-1],), activation='relu'), - layers.Dense(8), - layers.Activation('relu'), - layers.Dense(num_classes, activation='softmax') - ]) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - model.summary() - history = model.fit(x_train, y_train, epochs=15, batch_size=16, - validation_data=(x_test, y_test), - verbose=0) - assert(history.history['val_acc'][-1] > 0.8) - config = model.get_config() - model = Sequential.from_config(config) - - -def test_vector_classification_functional(): - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, - num_test=200, - input_shape=(20,), - classification=True, - num_classes=num_classes) - # Test with functional API - inputs = layers.Input(shape=(x_train.shape[-1],)) - x = layers.Dense(16, activation=keras.activations.relu)(inputs) - x = layers.Dense(8)(x) - x = layers.Activation('relu')(x) - outputs = layers.Dense(num_classes, activation='softmax')(x) - model = keras.models.Model(inputs, outputs) - model.compile(loss=keras.losses.sparse_categorical_crossentropy, - optimizer=keras.optimizers.RMSprop(), - metrics=['acc']) - history = model.fit(x_train, y_train, epochs=15, batch_size=16, - validation_data=(x_test, y_test), - verbose=0) - assert(history.history['val_acc'][-1] > 0.8) - - -def test_vector_regression(): - ''' - Perform float data prediction (regression) using 2 layer MLP - with tanh and sigmoid activations. - ''' - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=500, - num_test=200, - input_shape=(20,), - output_shape=( - num_classes,), - classification=False) - - model = Sequential([ - layers.Dense(16, input_shape=(x_train.shape[-1],), activation='tanh'), - layers.Dense(num_classes) - ]) - - model.compile(loss='hinge', optimizer='adagrad') - history = model.fit(x_train, y_train, epochs=20, batch_size=16, - validation_data=(x_test, y_test), verbose=0) - assert (history.history['val_loss'][-1] < 0.9) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras import backend as K -from keras import activations - -from keras.layers.core import Dense - - -def get_standard_values(): - """A set of floats used for testing the activations. - """ - return np.array([[0, 0.1, 0.5, 0.9, 1.0]], dtype=K.floatx()) - - -def test_serialization(): - all_activations = ['softmax', 'relu', 'elu', 'tanh', - 'sigmoid', 'hard_sigmoid', 'linear', - 'softplus', 'softsign', 'selu'] - for name in all_activations: - fn = activations.get(name) - ref_fn = getattr(activations, name) - assert fn == ref_fn - config = activations.serialize(fn) - fn = activations.deserialize(config) - assert fn == ref_fn - - -def test_get_fn(): - """Activations has a convenience "get" function. All paths of this - function are tested here, although the behaviour in some instances - seems potentially surprising (e.g. situation 3) - """ - - # 1. Default returns linear - a = activations.get(None) - assert a == activations.linear - - # 2. Passing in a layer raises a warning - layer = Dense(32) - with pytest.warns(UserWarning): - a = activations.get(layer) - - # 3. Callables return themselves for some reason - a = activations.get(lambda x: 5) - assert a(None) == 5 - - # 4. Anything else is not a valid argument - with pytest.raises(ValueError): - a = activations.get(6) - - -def test_softmax_valid(): - """Test using a reference implementation of softmax. - """ - def softmax(values): - m = np.max(values) - e = np.exp(values - m) - return e / np.sum(e) - - x = K.placeholder(ndim=2) - f = K.function([x], [activations.softmax(x)]) - test_values = get_standard_values() - - result = f([test_values])[0] - expected = softmax(test_values) - assert_allclose(result, expected, rtol=1e-05) - - -def test_softmax_invalid(): - """Test for the expected exception behaviour on invalid input - """ - - x = K.placeholder(ndim=1) - - # One dimensional arrays are supposed to raise a value error - with pytest.raises(ValueError): - f = K.function([x], [activations.softmax(x)]) - - -def test_softmax_3d(): - """Test using a reference implementation of softmax. - """ - def softmax(values, axis): - m = np.max(values, axis=axis, keepdims=True) - e = np.exp(values - m) - return e / np.sum(e, axis=axis, keepdims=True) - - x = K.placeholder(ndim=3) - f = K.function([x], [activations.softmax(x, axis=1)]) - test_values = get_standard_values()[:, :, np.newaxis].copy() - - result = f([test_values])[0] - expected = softmax(test_values, axis=1) - assert_allclose(result, expected, rtol=1e-05) - - -def test_time_distributed_softmax(): - x = K.placeholder(shape=(1, 1, 5)) - f = K.function([x], [activations.softmax(x)]) - test_values = get_standard_values() - test_values = np.reshape(test_values, (1, 1, np.size(test_values))) - f([test_values])[0] - - -def test_softplus(): - """Test using a reference softplus implementation. - """ - def softplus(x): - return np.log(np.ones_like(x) + np.exp(x)) - - x = K.placeholder(ndim=2) - f = K.function([x], [activations.softplus(x)]) - test_values = get_standard_values() - - result = f([test_values])[0] - expected = softplus(test_values) - assert_allclose(result, expected, rtol=1e-05) - - -def test_softsign(): - """Test using a reference softsign implementation. - """ - def softsign(x): - return np.divide(x, np.ones_like(x) + np.absolute(x)) - - x = K.placeholder(ndim=2) - f = K.function([x], [activations.softsign(x)]) - test_values = get_standard_values() - - result = f([test_values])[0] - expected = softsign(test_values) - assert_allclose(result, expected, rtol=1e-05) - - -def test_sigmoid(): - """Test using a numerically stable reference sigmoid implementation. - """ - def ref_sigmoid(x): - if x >= 0: - return 1 / (1 + np.exp(-x)) - else: - z = np.exp(x) - return z / (1 + z) - sigmoid = np.vectorize(ref_sigmoid) - - x = K.placeholder(ndim=2) - f = K.function([x], [activations.sigmoid(x)]) - test_values = get_standard_values() - - result = f([test_values])[0] - expected = sigmoid(test_values) - assert_allclose(result, expected, rtol=1e-05) - - -def test_hard_sigmoid(): - """Test using a reference hard sigmoid implementation. - """ - def ref_hard_sigmoid(x): - x = (x * 0.2) + 0.5 - z = 0.0 if x <= 0 else (1.0 if x >= 1 else x) - return z - hard_sigmoid = np.vectorize(ref_hard_sigmoid) - - x = K.placeholder(ndim=2) - f = K.function([x], [activations.hard_sigmoid(x)]) - test_values = get_standard_values() - - result = f([test_values])[0] - expected = hard_sigmoid(test_values) - assert_allclose(result, expected, rtol=1e-05) - - -def test_relu(): - x = K.placeholder(ndim=2) - f = K.function([x], [activations.relu(x)]) - - test_values = get_standard_values() - result = f([test_values])[0] - assert_allclose(result, test_values, rtol=1e-05) - - # Test max_value - test_values = np.array([[0.5, 1.5]], dtype=K.floatx()) - f = K.function([x], [activations.relu(x, max_value=1.)]) - result = f([test_values])[0] - assert np.max(result) <= 1. - - # Test max_value == 6. - test_values = np.array([[0.5, 6.]], dtype=K.floatx()) - f = K.function([x], [activations.relu(x, max_value=1.)]) - result = f([test_values])[0] - assert np.max(result) <= 6. - - -def test_elu(): - x = K.placeholder(ndim=2) - f = K.function([x], [activations.elu(x, 0.5)]) - - test_values = get_standard_values() - result = f([test_values])[0] - assert_allclose(result, test_values, rtol=1e-05) - - negative_values = np.array([[-1, -2]], dtype=K.floatx()) - result = f([negative_values])[0] - true_result = (np.exp(negative_values) - 1) / 2 - - assert_allclose(result, true_result) - - -def test_selu(): - x = K.placeholder(ndim=2) - f = K.function([x], [activations.selu(x)]) - alpha = 1.6732632423543772848170429916717 - scale = 1.0507009873554804934193349852946 - - positive_values = get_standard_values() - result = f([positive_values])[0] - assert_allclose(result, positive_values * scale, rtol=1e-05) - - negative_values = np.array([[-1, -2]], dtype=K.floatx()) - - result = f([negative_values])[0] - true_result = (np.exp(negative_values) - 1) * scale * alpha - - assert_allclose(result, true_result) - - -def test_tanh(): - test_values = get_standard_values() - - x = K.placeholder(ndim=2) - exp = activations.tanh(x) - f = K.function([x], [exp]) - - result = f([test_values])[0] - expected = np.tanh(test_values) - assert_allclose(result, expected, rtol=1e-05) - - -def test_linear(): - xs = [1, 5, True, None] - for x in xs: - assert(x == activations.linear(x)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras import backend as K -from keras import constraints - - -def get_test_values(): - return [0.1, 0.5, 3, 8, 1e-7] - - -def get_example_array(): - np.random.seed(3537) - example_array = np.random.random((100, 100)) * 100. - 50. - example_array[0, 0] = 0. # 0 could possibly cause trouble - return example_array - - -def test_serialization(): - all_activations = ['max_norm', 'non_neg', - 'unit_norm', 'min_max_norm'] - for name in all_activations: - fn = constraints.get(name) - ref_fn = getattr(constraints, name)() - assert fn.__class__ == ref_fn.__class__ - config = constraints.serialize(fn) - fn = constraints.deserialize(config) - assert fn.__class__ == ref_fn.__class__ - - -def test_max_norm(): - array = get_example_array() - for m in get_test_values(): - norm_instance = constraints.max_norm(m) - normed = norm_instance(K.variable(array)) - assert(np.all(K.eval(normed) < m)) - - # a more explicit example - norm_instance = constraints.max_norm(2.0) - x = np.array([[0, 0, 0], [1.0, 0, 0], [3, 0, 0], [3, 3, 3]]).T - x_normed_target = np.array([[0, 0, 0], [1.0, 0, 0], - [2.0, 0, 0], - [2. / np.sqrt(3), - 2. / np.sqrt(3), - 2. / np.sqrt(3)]]).T - x_normed_actual = K.eval(norm_instance(K.variable(x))) - assert_allclose(x_normed_actual, x_normed_target, rtol=1e-05) - - -def test_non_neg(): - non_neg_instance = constraints.non_neg() - normed = non_neg_instance(K.variable(get_example_array())) - assert(np.all(np.min(K.eval(normed), axis=1) == 0.)) - - -def test_unit_norm(): - unit_norm_instance = constraints.unit_norm() - normalized = unit_norm_instance(K.variable(get_example_array())) - norm_of_normalized = np.sqrt(np.sum(K.eval(normalized) ** 2, axis=0)) - # In the unit norm constraint, it should be equal to 1. - difference = norm_of_normalized - 1. - largest_difference = np.max(np.abs(difference)) - assert(np.abs(largest_difference) < 10e-5) - - -def test_min_max_norm(): - array = get_example_array() - for m in get_test_values(): - norm_instance = constraints.min_max_norm(min_value=m, max_value=m * 2) - normed = norm_instance(K.variable(array)) - value = K.eval(normed) - l2 = np.sqrt(np.sum(np.square(value), axis=0)) - assert l2[l2 < m].size == 0 - assert l2[l2 > m * 2 + 1e-5].size == 0 - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np - -from keras import initializers -from keras import backend as K - -# 2D tensor test fixture -FC_SHAPE = (200, 100) - -# 4D convolution in th order. This shape has the same effective shape as FC_SHAPE -CONV_SHAPE = (25, 25, 20, 20) - - -def _runner(init, shape, target_mean=None, target_std=None, - target_max=None, target_min=None): - variable = K.variable(init(shape)) - output = K.get_value(variable) - lim = 3e-2 - if target_std is not None: - assert abs(output.std() - target_std) < lim - if target_mean is not None: - assert abs(output.mean() - target_mean) < lim - if target_max is not None: - assert abs(output.max() - target_max) < lim - if target_min is not None: - assert abs(output.min() - target_min) < lim - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_uniform(tensor_shape): - _runner(initializers.RandomUniform(minval=-1, maxval=1), tensor_shape, - target_mean=0., target_max=1, target_min=-1) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_normal(tensor_shape): - _runner(initializers.RandomNormal(mean=0, stddev=1), tensor_shape, - target_mean=0., target_std=1) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_truncated_normal(tensor_shape): - _runner(initializers.TruncatedNormal(mean=0, stddev=1), tensor_shape, - target_mean=0., target_max=2, target_min=-2) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_constant(tensor_shape): - _runner(initializers.Constant(2), tensor_shape, - target_mean=2, target_max=2, target_min=2) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_lecun_uniform(tensor_shape): - fan_in, _ = initializers._compute_fans(tensor_shape) - std = np.sqrt(1. / fan_in) - _runner(initializers.lecun_uniform(), tensor_shape, - target_mean=0., target_std=std) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_glorot_uniform(tensor_shape): - fan_in, fan_out = initializers._compute_fans(tensor_shape) - std = np.sqrt(2. / (fan_in + fan_out)) - _runner(initializers.glorot_uniform(), tensor_shape, - target_mean=0., target_std=std) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_he_uniform(tensor_shape): - fan_in, _ = initializers._compute_fans(tensor_shape) - std = np.sqrt(2. / fan_in) - _runner(initializers.he_uniform(), tensor_shape, - target_mean=0., target_std=std) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_lecun_normal(tensor_shape): - fan_in, _ = initializers._compute_fans(tensor_shape) - std = np.sqrt(1. / fan_in) - _runner(initializers.lecun_normal(), tensor_shape, - target_mean=0., target_std=std) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_glorot_normal(tensor_shape): - fan_in, fan_out = initializers._compute_fans(tensor_shape) - std = np.sqrt(2. / (fan_in + fan_out)) - _runner(initializers.glorot_normal(), tensor_shape, - target_mean=0., target_std=std) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_he_normal(tensor_shape): - fan_in, _ = initializers._compute_fans(tensor_shape) - std = np.sqrt(2. / fan_in) - _runner(initializers.he_normal(), tensor_shape, - target_mean=0., target_std=std) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_orthogonal(tensor_shape): - _runner(initializers.orthogonal(), tensor_shape, - target_mean=0.) - - -def test_orthogonal_init_does_not_affect_global_rng(): - np.random.seed(1337) - before = np.random.randint(0, 100, size=10) - - np.random.seed(1337) - init = initializers.orthogonal(seed=9876) - init(shape=(10, 5)) - after = np.random.randint(0, 100, size=10) - - assert np.array_equal(before, after) - - -@pytest.mark.parametrize('tensor_shape', - [(100, 100), (10, 20), (30, 80), (1, 2, 3, 4)], - ids=['FC', 'RNN', 'RNN_INVALID', 'CONV']) -def test_identity(tensor_shape): - target_mean = (1. * min(tensor_shape)) / \ - (tensor_shape[0] * tensor_shape[1]) - if len(tensor_shape) > 2: - with pytest.raises(ValueError): - _runner(initializers.identity(), tensor_shape, - target_mean=target_mean, target_max=1.) - else: - _runner(initializers.identity(), tensor_shape, - target_mean=target_mean, target_max=1.) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_zero(tensor_shape): - _runner(initializers.zeros(), tensor_shape, - target_mean=0., target_max=0.) - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_one(tensor_shape): - _runner(initializers.ones(), tensor_shape, - target_mean=1., target_max=1.) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np - -import keras -from keras import losses -from keras import backend as K -from keras.utils.generic_utils import custom_object_scope - - -allobj = [losses.mean_squared_error, - losses.mean_absolute_error, - losses.mean_absolute_percentage_error, - losses.mean_squared_logarithmic_error, - losses.squared_hinge, - losses.hinge, - losses.categorical_crossentropy, - losses.binary_crossentropy, - losses.kullback_leibler_divergence, - losses.poisson, - losses.cosine_proximity, - losses.logcosh, - losses.categorical_hinge] - - -def test_objective_shapes_3d(): - y_a = K.variable(np.random.random((5, 6, 7))) - y_b = K.variable(np.random.random((5, 6, 7))) - for obj in allobj: - objective_output = obj(y_a, y_b) - assert K.eval(objective_output).shape == (5, 6) - - -def test_objective_shapes_2d(): - y_a = K.variable(np.random.random((6, 7))) - y_b = K.variable(np.random.random((6, 7))) - for obj in allobj: - objective_output = obj(y_a, y_b) - assert K.eval(objective_output).shape == (6,) - - -def test_cce_one_hot(): - y_a = K.variable(np.random.randint(0, 7, (5, 6))) - y_b = K.variable(np.random.random((5, 6, 7))) - objective_output = losses.sparse_categorical_crossentropy(y_a, y_b) - assert K.eval(objective_output).shape == (5, 6) - - y_a = K.variable(np.random.randint(0, 7, (6,))) - y_b = K.variable(np.random.random((6, 7))) - assert K.eval(losses.sparse_categorical_crossentropy( - y_a, y_b)).shape == (6,) - - -def test_categorical_hinge(): - y_pred = K.variable(np.array([[0.3, 0.2, 0.1], - [0.1, 0.2, 0.7]])) - y_true = K.variable(np.array([[0, 1, 0], - [1, 0, 0]])) - expected_loss = ((0.3 - 0.2 + 1) + (0.7 - 0.1 + 1)) / 2.0 - loss = K.eval(losses.categorical_hinge(y_true, y_pred)) - assert np.isclose(expected_loss, np.mean(loss)) - - -def test_sparse_categorical_crossentropy(): - y_pred = K.variable(np.array([[0.3, 0.6, 0.1], - [0.1, 0.2, 0.7]])) - y_true = K.variable(np.array([1, 2])) - expected_loss = - (np.log(0.6) + np.log(0.7)) / 2 - loss = K.eval(losses.sparse_categorical_crossentropy(y_true, y_pred)) - assert np.isclose(expected_loss, np.mean(loss)) - - -def test_sparse_categorical_crossentropy_4d(): - y_pred = K.variable(np.array([[[[0.7, 0.1, 0.2], - [0.0, 0.3, 0.7], - [0.1, 0.1, 0.8]], - [[0.3, 0.7, 0.0], - [0.3, 0.4, 0.3], - [0.2, 0.5, 0.3]], - [[0.8, 0.1, 0.1], - [1.0, 0.0, 0.0], - [0.4, 0.3, 0.3]]]])) - y_true = K.variable(np.array([[[0, 1, 0], - [2, 1, 0], - [2, 2, 1]]])) - expected_loss = - (np.log(0.7) + np.log(0.3) + np.log(0.1) + - np.log(K.epsilon()) + np.log(0.4) + np.log(0.2) + - np.log(0.1) + np.log(K.epsilon()) + np.log(0.3)) / 9 - loss = K.eval(losses.sparse_categorical_crossentropy(y_true, y_pred)) - assert np.isclose(expected_loss, np.mean(loss)) - - -class MSE_MAE_loss: - """Loss function with internal state, for testing serialization code.""" - - def __init__(self, mse_fraction): - self.mse_fraction = mse_fraction - - def __call__(self, y_true, y_pred): - return (self.mse_fraction * losses.mse(y_true, y_pred) + - (1 - self.mse_fraction) * losses.mae(y_true, y_pred)) - - def get_config(self): - return {'mse_fraction': self.mse_fraction} - - -def test_serializing_loss_class(): - orig_loss_class = MSE_MAE_loss(0.3) - with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}): - serialized = losses.serialize(orig_loss_class) - - with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}): - deserialized = losses.deserialize(serialized) - assert isinstance(deserialized, MSE_MAE_loss) - assert deserialized.mse_fraction == 0.3 - - -def test_serializing_model_with_loss_class(tmpdir): - model_filename = str(tmpdir / 'custom_loss.hdf') - - with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}): - loss = MSE_MAE_loss(0.3) - inputs = keras.layers.Input((2,)) - outputs = keras.layers.Dense(1, name='model_output')(inputs) - model = keras.models.Model(inputs, outputs) - model.compile(optimizer='sgd', loss={'model_output': loss}) - model.fit(np.random.rand(256, 2), np.random.rand(256, 1)) - model.save(model_filename) - - with custom_object_scope({'MSE_MAE_loss': MSE_MAE_loss}): - loaded_model = keras.models.load_model(model_filename) - loaded_model.predict(np.random.rand(128, 2)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose -from flaky import flaky - -import keras -from keras import metrics -from keras import backend as K - -all_metrics = [ - metrics.binary_accuracy, - metrics.categorical_accuracy, - metrics.mean_squared_error, - metrics.mean_absolute_error, - metrics.mean_absolute_percentage_error, - metrics.mean_squared_logarithmic_error, - metrics.squared_hinge, - metrics.hinge, - metrics.categorical_crossentropy, - metrics.binary_crossentropy, - metrics.poisson, - metrics.cosine_proximity, - metrics.logcosh, -] - -all_sparse_metrics = [ - metrics.sparse_categorical_accuracy, - metrics.sparse_categorical_crossentropy, -] - - -@pytest.mark.parametrize('metric', all_metrics) -def test_metrics(metric): - y_a = K.variable(np.random.random((6, 7))) - y_b = K.variable(np.random.random((6, 7))) - output = metric(y_a, y_b) - assert K.eval(output).shape == (6,) - - -@pytest.mark.parametrize('metric', all_sparse_metrics) -def test_sparse_metrics(metric): - y_a = K.variable(np.random.randint(0, 7, (6,)), dtype=K.floatx()) - y_b = K.variable(np.random.random((6, 7)), dtype=K.floatx()) - assert K.eval(metric(y_a, y_b)).shape == (6,) - - -@pytest.mark.parametrize('shape', [(6,), (6, 3), (6, 3, 1)]) -def test_sparse_categorical_accuracy_correctness(shape): - y_a = K.variable(np.random.randint(0, 7, shape), dtype=K.floatx()) - y_b_shape = shape + (7,) - y_b = K.variable(np.random.random(y_b_shape), dtype=K.floatx()) - # use one_hot embedding to convert sparse labels to equivalent dense labels - y_a_dense_labels = K.cast(K.one_hot(K.cast(y_a, dtype='int32'), 7), - dtype=K.floatx()) - sparse_categorical_acc = metrics.sparse_categorical_accuracy(y_a, y_b) - categorical_acc = metrics.categorical_accuracy(y_a_dense_labels, y_b) - assert np.allclose(K.eval(sparse_categorical_acc), K.eval(categorical_acc)) - - -def test_serialize(): - '''This is a mock 'round trip' of serialize and deserialize. - ''' - - class MockMetric: - def __init__(self): - self.__name__ = "mock_metric" - - mock = MockMetric() - found = metrics.serialize(mock) - assert found == "mock_metric" - - found = metrics.deserialize('mock_metric', - custom_objects={'mock_metric': True}) - assert found is True - - -def test_invalid_get(): - - with pytest.raises(ValueError): - metrics.get(5) - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason='CNTK backend does not support top_k yet') -def test_top_k_categorical_accuracy(): - y_pred = K.variable(np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]])) - y_true = K.variable(np.array([[0, 1, 0], [1, 0, 0]])) - success_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, - k=3)) - assert np.mean(success_result) == 1 - partial_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, - k=2)) - assert np.mean(partial_result) == 0.5 - failure_result = K.eval(metrics.top_k_categorical_accuracy(y_true, y_pred, - k=1)) - assert np.mean(failure_result) == 0 - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason='CNTK backend does not support top_k yet') -@pytest.mark.parametrize('y_pred, y_true', [ - # Test correctness if the shape of y_true is (num_samples, 1) - (np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]), np.array([[1], [0]])), - # Test correctness if the shape of y_true is (num_samples,) - (np.array([[0.3, 0.2, 0.1], [0.1, 0.2, 0.7]]), np.array([1, 0])), -]) -def test_sparse_top_k_categorical_accuracy(y_pred, y_true): - y_pred = K.variable(y_pred) - y_true = K.variable(y_true) - success_result = K.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=3)) - - assert np.mean(success_result) == 1 - partial_result = K.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=2)) - - assert np.mean(partial_result) == 0.5 - failure_result = K.eval( - metrics.sparse_top_k_categorical_accuracy(y_true, y_pred, k=1)) - - assert np.mean(failure_result) == 0 - - -# TODO: resolve flakyness issue. Tracked with #11064 -@pytest.mark.parametrize('metrics_mode', ['list', 'dict']) -@flaky(rerun_filter=lambda err, *args: issubclass(err[0], AssertionError)) -def test_stateful_metrics(metrics_mode): - np.random.seed(1334) - - class BinaryTruePositives(keras.layers.Layer): - """Stateful Metric to count the total true positives over all batches. - - Assumes predictions and targets of shape `(samples, 1)`. - - # Arguments - name: String, name for the metric. - """ - - def __init__(self, name='true_positives', **kwargs): - super(BinaryTruePositives, self).__init__(name=name, **kwargs) - self.stateful = True - self.true_positives = K.variable(value=0, dtype='int32') - - def reset_states(self): - K.set_value(self.true_positives, 0) - - def __call__(self, y_true, y_pred): - """Computes the number of true positives in a batch. - - # Arguments - y_true: Tensor, batch_wise labels - y_pred: Tensor, batch_wise predictions - - # Returns - The total number of true positives seen this epoch at the - completion of the batch. - """ - y_true = K.cast(y_true, 'int32') - y_pred = K.cast(K.round(y_pred), 'int32') - correct_preds = K.cast(K.equal(y_pred, y_true), 'int32') - true_pos = K.cast(K.sum(correct_preds * y_true), 'int32') - current_true_pos = self.true_positives * 1 - self.add_update(K.update_add(self.true_positives, - true_pos), - inputs=[y_true, y_pred]) - return current_true_pos + true_pos - - metric_fn = BinaryTruePositives() - config = metrics.serialize(metric_fn) - metric_fn = metrics.deserialize( - config, custom_objects={'BinaryTruePositives': BinaryTruePositives}) - - # Test on simple model - inputs = keras.Input(shape=(2,)) - outputs = keras.layers.Dense(1, activation='sigmoid', name='out')(inputs) - model = keras.Model(inputs, outputs) - - if metrics_mode == 'list': - model.compile(optimizer='sgd', - loss='binary_crossentropy', - metrics=['acc', metric_fn]) - elif metrics_mode == 'dict': - model.compile(optimizer='sgd', - loss='binary_crossentropy', - metrics={'out': ['acc', metric_fn]}) - - samples = 1000 - x = np.random.random((samples, 2)) - y = np.random.randint(2, size=(samples, 1)) - - val_samples = 10 - val_x = np.random.random((val_samples, 2)) - val_y = np.random.randint(2, size=(val_samples, 1)) - - # Test fit and evaluate - history = model.fit(x, y, validation_data=(val_x, val_y), - epochs=1, batch_size=10) - outs = model.evaluate(x, y, batch_size=10) - preds = model.predict(x) - - def ref_true_pos(y_true, y_pred): - return np.sum(np.logical_and(y_pred > 0.5, y_true == 1)) - - # Test correctness (e.g. updates should have been run) - np.testing.assert_allclose(outs[2], ref_true_pos(y, preds), atol=1e-5) - - # Test correctness of the validation metric computation - val_preds = model.predict(val_x) - val_outs = model.evaluate(val_x, val_y, batch_size=10) - assert_allclose(val_outs[2], ref_true_pos(val_y, val_preds), atol=1e-5) - assert_allclose(val_outs[2], history.history['val_true_positives'][-1], - atol=1e-5) - - # Test with generators - gen = [(np.array([x0]), np.array([y0])) for x0, y0 in zip(x, y)] - val_gen = [(np.array([x0]), np.array([y0])) - for x0, y0 in zip(val_x, val_y)] - history = model.fit_generator(iter(gen), epochs=1, steps_per_epoch=samples, - validation_data=iter(val_gen), - validation_steps=val_samples) - outs = model.evaluate_generator(iter(gen), steps=samples, workers=0) - preds = model.predict_generator(iter(gen), steps=samples, workers=0) - - # Test correctness of the metric re ref_true_pos() - np.testing.assert_allclose(outs[2], ref_true_pos(y, preds), - atol=1e-5) - - # Test correctness of the validation metric computation - val_preds = model.predict_generator( - iter(val_gen), steps=val_samples, workers=0) - val_outs = model.evaluate_generator( - iter(val_gen), steps=val_samples, workers=0) - np.testing.assert_allclose(val_outs[2], ref_true_pos(val_y, val_preds), - atol=1e-5) - np.testing.assert_allclose(val_outs[2], - history.history['val_true_positives'][-1], - atol=1e-5) - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import print_function -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras.utils import test_utils -from keras import optimizers, Input -from keras.models import Sequential, Model -from keras.layers.core import Dense, Activation, Lambda -from keras.utils.np_utils import to_categorical -from keras import backend as K - -num_classes = 2 - - -def get_test_data(): - np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data(num_train=1000, - num_test=200, - input_shape=(10,), - classification=True, - num_classes=num_classes) - y_train = to_categorical(y_train) - return x_train, y_train - - -def _test_optimizer(optimizer, target=0.75): - x_train, y_train = get_test_data() - - model = Sequential() - model.add(Dense(10, input_shape=(x_train.shape[1],))) - model.add(Activation('relu')) - model.add(Dense(y_train.shape[1])) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - - history = model.fit(x_train, y_train, epochs=2, batch_size=16, verbose=0) - assert history.history['acc'][-1] >= target - config = optimizers.serialize(optimizer) - optim = optimizers.deserialize(config) - new_config = optimizers.serialize(optim) - new_config['class_name'] = new_config['class_name'].lower() - assert config == new_config - - # Test constraints. - model = Sequential() - dense = Dense(10, - input_shape=(x_train.shape[1],), - kernel_constraint=lambda x: 0. * x + 1., - bias_constraint=lambda x: 0. * x + 2.,) - model.add(dense) - model.add(Activation('relu')) - model.add(Dense(y_train.shape[1])) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - model.train_on_batch(x_train[:10], y_train[:10]) - kernel, bias = dense.get_weights() - assert_allclose(kernel, 1.) - assert_allclose(bias, 2.) - - -@pytest.mark.skipif((K.backend() != 'tensorflow'), - reason="Only Tensorflow raises a " - "ValueError if the gradient is null.") -def test_no_grad(): - inp = Input([3]) - x = Dense(10)(inp) - x = Lambda(lambda l: 1.0 * K.reshape(K.cast(K.argmax(l), 'float32'), [-1, 1]), - output_shape=lambda x: [x[0], 1])(x) - mod = Model(inp, x) - mod.compile('sgd', 'mse') - with pytest.raises(ValueError): - mod.fit(np.zeros([10, 3]), np.zeros([10, 1], np.float32), - batch_size=10, epochs=10) - - -def test_sgd(): - sgd = optimizers.SGD(lr=0.01, momentum=0.9, nesterov=True) - _test_optimizer(sgd) - - -def test_rmsprop(): - _test_optimizer(optimizers.RMSprop()) - _test_optimizer(optimizers.RMSprop(decay=1e-3)) - - -def test_adagrad(): - _test_optimizer(optimizers.Adagrad()) - _test_optimizer(optimizers.Adagrad(decay=1e-3)) - - -def test_adadelta(): - _test_optimizer(optimizers.Adadelta(), target=0.6) - _test_optimizer(optimizers.Adadelta(decay=1e-3), target=0.6) - - -def test_adam(): - _test_optimizer(optimizers.Adam()) - _test_optimizer(optimizers.Adam(decay=1e-3)) - - -def test_adamax(): - _test_optimizer(optimizers.Adamax()) - _test_optimizer(optimizers.Adamax(decay=1e-3)) - - -def test_nadam(): - _test_optimizer(optimizers.Nadam()) - - -def test_adam_amsgrad(): - _test_optimizer(optimizers.Adam(amsgrad=True)) - _test_optimizer(optimizers.Adam(amsgrad=True, decay=1e-3)) - - -def test_clipnorm(): - sgd = optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=0.5) - _test_optimizer(sgd) - - -def test_clipvalue(): - sgd = optimizers.SGD(lr=0.01, momentum=0.9, clipvalue=0.5) - _test_optimizer(sgd) - - -@pytest.mark.skipif((K.backend() != 'tensorflow'), - reason='Requires TensorFlow backend') -def test_tfoptimizer(): - from keras import constraints - from tensorflow import train - optimizer = optimizers.TFOptimizer(train.AdamOptimizer()) - model = Sequential() - model.add(Dense(num_classes, input_shape=(3,), - kernel_constraint=constraints.MaxNorm(1))) - model.compile(loss='mean_squared_error', optimizer=optimizer) - model.fit(np.random.random((5, 3)), np.random.random((5, num_classes)), - epochs=1, batch_size=5, verbose=0) - # not supported - with pytest.raises(NotImplementedError): - optimizer.weights - with pytest.raises(NotImplementedError): - optimizer.get_config() - with pytest.raises(NotImplementedError): - optimizer.from_config(None) - - -@pytest.mark.skipif((K.backend() != 'tensorflow'), - reason='Requires TensorFlow backend') -def test_tfoptimizer_pass_correct_named_params_to_native_tensorflow_optimizer(): - from keras import constraints - from tensorflow import train - - class MyTfOptimizer(train.Optimizer): - wrapping_optimizer = train.AdamOptimizer() - - def compute_gradients(self, loss, **kwargs): - return super(MyTfOptimizer, self).compute_gradients(loss, **kwargs) - - def apply_gradients(self, grads_and_vars, **kwargs): - return self.wrapping_optimizer.apply_gradients(grads_and_vars, - **kwargs) - my_tf_optimizer = MyTfOptimizer(use_locking=False, name='MyTfOptimizer') - optimizer = optimizers.TFOptimizer(my_tf_optimizer) - model = Sequential() - model.add(Dense(num_classes, input_shape=(3,), - kernel_constraint=constraints.MaxNorm(1))) - model.compile(loss='mean_squared_error', optimizer=optimizer) - model.fit(np.random.random((5, 3)), np.random.random((5, num_classes)), - epochs=1, batch_size=5, verbose=0) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest - -from keras.models import Sequential, Model -from keras.layers import Dense, Input, Average -from keras.utils import np_utils -from keras.utils import test_utils -from keras import regularizers -from keras import backend as K - -data_dim = 5 -num_classes = 2 -batch_size = 10 - - -def get_data(): - (x_train, y_train), _ = test_utils.get_test_data( - num_train=batch_size, - num_test=batch_size, - input_shape=(data_dim,), - classification=True, - num_classes=num_classes) - y_train = np_utils.to_categorical(y_train, num_classes) - - return x_train, y_train - - -def create_model(kernel_regularizer=None, activity_regularizer=None): - model = Sequential() - model.add(Dense(num_classes, - kernel_regularizer=kernel_regularizer, - activity_regularizer=activity_regularizer, - input_shape=(data_dim,))) - return model - - -def create_multi_input_model_from(layer1, layer2): - input_1 = Input(shape=(data_dim,)) - input_2 = Input(shape=(data_dim,)) - out1 = layer1(input_1) - out2 = layer2(input_2) - out = Average()([out1, out2]) - model = Model([input_1, input_2], out) - model.add_loss(K.mean(out2)) - model.add_loss(1) - model.add_loss(1) - return model - - -def test_kernel_regularization(): - x_train, y_train = get_data() - for reg in [regularizers.l1(), - regularizers.l2(), - regularizers.l1_l2()]: - model = create_model(kernel_regularizer=reg) - model.compile(loss='categorical_crossentropy', optimizer='sgd') - assert len(model.losses) == 1 - model.train_on_batch(x_train, y_train) - - -def test_activity_regularization(): - x_train, y_train = get_data() - for reg in [regularizers.l1(), regularizers.l2()]: - model = create_model(activity_regularizer=reg) - model.compile(loss='categorical_crossentropy', optimizer='sgd') - assert len(model.losses) == 1 - model.train_on_batch(x_train, y_train) - - -def test_regularization_shared_layer(): - dense_layer = Dense(num_classes, - kernel_regularizer=regularizers.l1(), - activity_regularizer=regularizers.l1()) - - model = create_multi_input_model_from(dense_layer, dense_layer) - model.compile(loss='categorical_crossentropy', optimizer='sgd') - assert len(model.losses) == 6 - - -def test_regularization_shared_model(): - dense_layer = Dense(num_classes, - kernel_regularizer=regularizers.l1(), - activity_regularizer=regularizers.l1()) - - input_tensor = Input(shape=(data_dim,)) - dummy_model = Model(input_tensor, dense_layer(input_tensor)) - - model = create_multi_input_model_from(dummy_model, dummy_model) - model.compile(loss='categorical_crossentropy', optimizer='sgd') - assert len(model.losses) == 6 - - -def test_regularization_shared_layer_in_different_models(): - shared_dense = Dense(num_classes, - kernel_regularizer=regularizers.l1(), - activity_regularizer=regularizers.l1()) - models = [] - for _ in range(2): - input_tensor = Input(shape=(data_dim,)) - unshared_dense = Dense( - num_classes, kernel_regularizer=regularizers.l1()) - out = unshared_dense(shared_dense(input_tensor)) - models.append(Model(input_tensor, out)) - - model = create_multi_input_model_from(*models) - model.compile(loss='categorical_crossentropy', optimizer='sgd') - assert len(model.losses) == 8 - - -if __name__ == '__main__': - pytest.main([__file__]) -import os -import multiprocessing - -import numpy as np -import pytest -from numpy.testing import assert_allclose -from csv import reader -from csv import Sniffer -import shutil -from collections import defaultdict -from keras import optimizers -from keras import initializers -from keras import callbacks -from keras.models import Sequential, Model -from keras.layers import Input, Dense, Dropout, add, dot, Lambda, Layer -from keras.layers import Conv2D -from keras.layers import MaxPooling2D -from keras.layers import GlobalAveragePooling1D -from keras.layers import GlobalAveragePooling2D -from keras.layers import BatchNormalization -from keras.utils.test_utils import get_test_data -from keras.utils.generic_utils import to_list -from keras.utils.generic_utils import unpack_singleton -from keras import backend as K -from keras.utils import np_utils -try: - from unittest.mock import patch -except: - from mock import patch - - -input_dim = 2 -num_hidden = 4 -num_classes = 2 -batch_size = 5 -train_samples = 20 -test_samples = 20 - - -def data_generator(x, y, batch_size): - x = to_list(x) - y = to_list(y) - max_batch_index = len(x[0]) // batch_size - i = 0 - while 1: - x_batch = [array[i * batch_size: (i + 1) * batch_size] for array in x] - x_batch = unpack_singleton(x_batch) - - y_batch = [array[i * batch_size: (i + 1) * batch_size] for array in y] - y_batch = unpack_singleton(y_batch) - yield x_batch, y_batch - i += 1 - i = i % max_batch_index - - -# Changing the default arguments of get_test_data. -def get_data_callbacks(num_train=train_samples, - num_test=test_samples, - input_shape=(input_dim,), - classification=True, - num_classes=num_classes): - return get_test_data(num_train=num_train, - num_test=num_test, - input_shape=input_shape, - classification=classification, - num_classes=num_classes) - - -class Counter(callbacks.Callback): - """Counts the number of times each callback method was run. - - # Arguments - method_counts: dict, contains the counts of time each callback method was - run. - """ - - def __init__(self): - self.method_counts = defaultdict(int) - methods_to_count = [ - 'on_batch_begin', 'on_batch_end', 'on_epoch_begin', 'on_epoch_end', - 'on_train_batch_begin', 'on_train_batch_end', - 'on_test_batch_begin', 'on_test_batch_end', - 'on_predict_batch_begin', 'on_predict_batch_end', - 'on_train_begin', 'on_train_end', 'on_predict_begin', 'on_predict_end', - 'on_test_begin', 'on_test_end', - ] - for method_name in methods_to_count: - setattr(self, method_name, - self.wrap_with_counts(method_name, getattr(self, method_name))) - - def wrap_with_counts(self, method_name, method): - - def _call_and_count(*args, **kwargs): - self.method_counts[method_name] += 1 - return method(*args, **kwargs) - - return _call_and_count - - -class TestCallbackCounts(object): - - def _check_counts(self, counter, expected_counts): - """Checks that the counts registered by `counter` are those expected.""" - for method_name, expected_count in expected_counts.items(): - count = counter.method_counts[method_name] - assert count == expected_count, \ - 'For method {}: expected {}, got: {}'.format( - method_name, expected_count, count) - - def _get_model(self): - layers = [ - Dense(10, activation='relu', input_dim=input_dim), - Dense(num_classes, activation='softmax') - ] - model = Sequential(layers=layers) - model.compile(optimizer='adam', loss='binary_crossentropy') - return model - - def test_callback_hooks_are_called_in_fit(self): - np.random.seed(1337) - (X_train, y_train), (X_test, y_test) = get_data_callbacks(num_train=10, - num_test=4) - y_train = np_utils.to_categorical(y_train) - y_test = np_utils.to_categorical(y_test) - - model = self._get_model() - counter = Counter() - model.fit(X_train, y_train, validation_data=(X_test, y_test), - batch_size=2, epochs=5, callbacks=[counter]) - - self._check_counts( - counter, { - 'on_batch_begin': 25, - 'on_batch_end': 25, - 'on_epoch_begin': 5, - 'on_epoch_end': 5, - 'on_predict_batch_begin': 0, - 'on_predict_batch_end': 0, - 'on_predict_begin': 0, - 'on_predict_end': 0, - 'on_test_batch_begin': 10, - 'on_test_batch_end': 10, - 'on_test_begin': 5, - 'on_test_end': 5, - 'on_train_batch_begin': 25, - 'on_train_batch_end': 25, - 'on_train_begin': 1, - 'on_train_end': 1, - }) - - def test_callback_hooks_are_called_in_evaluate(self): - np.random.seed(1337) - (_, _), (X_test, y_test) = get_data_callbacks(num_test=10) - - y_test = np_utils.to_categorical(y_test) - - model = self._get_model() - counter = Counter() - model.evaluate(X_test, y_test, batch_size=2, callbacks=[counter]) - self._check_counts( - counter, { - 'on_test_batch_begin': 5, - 'on_test_batch_end': 5, - 'on_test_begin': 1, - 'on_test_end': 1, - 'on_batch_begin': 0, - 'on_batch_end': 0, - 'on_epoch_begin': 0, - 'on_epoch_end': 0, - 'on_predict_batch_begin': 0, - 'on_predict_batch_end': 0, - 'on_predict_begin': 0, - 'on_predict_end': 0, - 'on_train_batch_begin': 0, - 'on_train_batch_end': 0, - 'on_train_begin': 0, - 'on_train_end': 0, - }) - - def test_callback_hooks_are_called_in_predict(self): - np.random.seed(1337) - (_, _), (X_test, _) = get_data_callbacks(num_test=10) - - model = self._get_model() - counter = Counter() - model.predict(X_test, batch_size=2, callbacks=[counter]) - self._check_counts( - counter, { - 'on_predict_batch_begin': 5, - 'on_predict_batch_end': 5, - 'on_predict_begin': 1, - 'on_predict_end': 1, - 'on_batch_begin': 0, - 'on_batch_end': 0, - 'on_epoch_begin': 0, - 'on_epoch_end': 0, - 'on_test_batch_begin': 0, - 'on_test_batch_end': 0, - 'on_test_begin': 0, - 'on_test_end': 0, - 'on_train_batch_begin': 0, - 'on_train_batch_end': 0, - 'on_train_begin': 0, - 'on_train_end': 0, - }) - - def test_callback_hooks_are_called_in_fit_generator(self): - np.random.seed(1337) - (X_train, y_train), (X_test, y_test) = get_data_callbacks(num_train=10, - num_test=4) - y_train = np_utils.to_categorical(y_train) - y_test = np_utils.to_categorical(y_test) - train_generator = data_generator(X_train, y_train, batch_size=2) - validation_generator = data_generator(X_test, y_test, batch_size=2) - - model = self._get_model() - counter = Counter() - model.fit_generator(train_generator, steps_per_epoch=len(X_train) // 2, - epochs=5, validation_data=validation_generator, - validation_steps=len(X_test) // 2, callbacks=[counter]) - - self._check_counts( - counter, { - 'on_batch_begin': 25, - 'on_batch_end': 25, - 'on_epoch_begin': 5, - 'on_epoch_end': 5, - 'on_predict_batch_begin': 0, - 'on_predict_batch_end': 0, - 'on_predict_begin': 0, - 'on_predict_end': 0, - 'on_test_batch_begin': 10, - 'on_test_batch_end': 10, - 'on_test_begin': 5, - 'on_test_end': 5, - 'on_train_batch_begin': 25, - 'on_train_batch_end': 25, - 'on_train_begin': 1, - 'on_train_end': 1, - }) - - def test_callback_hooks_are_called_in_evaluate_generator(self): - np.random.seed(1337) - (_, _), (X_test, y_test) = get_data_callbacks(num_test=10) - y_test = np_utils.to_categorical(y_test) - - model = self._get_model() - counter = Counter() - model.evaluate_generator(data_generator(X_test, y_test, batch_size=2), - steps=len(X_test) // 2, callbacks=[counter]) - self._check_counts( - counter, { - 'on_test_batch_begin': 5, - 'on_test_batch_end': 5, - 'on_test_begin': 1, - 'on_test_end': 1, - 'on_batch_begin': 0, - 'on_batch_end': 0, - 'on_epoch_begin': 0, - 'on_epoch_end': 0, - 'on_predict_batch_begin': 0, - 'on_predict_batch_end': 0, - 'on_predict_begin': 0, - 'on_predict_end': 0, - 'on_train_batch_begin': 0, - 'on_train_batch_end': 0, - 'on_train_begin': 0, - 'on_train_end': 0, - }) - - def test_callback_hooks_are_called_in_predict_generator(self): - np.random.seed(1337) - (_, _), (X_test, _) = get_data_callbacks(num_test=10) - - def data_generator(x, batch_size): - x = to_list(x) - max_batch_index = len(x[0]) // batch_size - i = 0 - while 1: - x_batch = [ - array[i * batch_size: (i + 1) * batch_size] for array in x] - x_batch = unpack_singleton(x_batch) - - yield x_batch - i += 1 - i = i % max_batch_index - - model = self._get_model() - counter = Counter() - model.predict_generator(data_generator(X_test, batch_size=2), - steps=len(X_test) // 2, callbacks=[counter]) - self._check_counts( - counter, { - 'on_predict_batch_begin': 5, - 'on_predict_batch_end': 5, - 'on_predict_begin': 1, - 'on_predict_end': 1, - 'on_batch_begin': 0, - 'on_batch_end': 0, - 'on_epoch_begin': 0, - 'on_epoch_end': 0, - 'on_test_batch_begin': 0, - 'on_test_batch_end': 0, - 'on_test_begin': 0, - 'on_test_end': 0, - 'on_train_batch_begin': 0, - 'on_train_batch_end': 0, - 'on_train_begin': 0, - 'on_train_end': 0, - }) - - def test_callback_list_methods(self): - counter = Counter() - callback_list = callbacks.CallbackList([counter]) - - batch = 0 - callback_list.on_test_batch_begin(batch) - callback_list.on_test_batch_end(batch) - callback_list.on_predict_batch_begin(batch) - callback_list.on_predict_batch_end(batch) - - self._check_counts( - counter, { - 'on_test_batch_begin': 1, - 'on_test_batch_end': 1, - 'on_predict_batch_begin': 1, - 'on_predict_batch_end': 1, - 'on_predict_begin': 0, - 'on_predict_end': 0, - 'on_batch_begin': 0, - 'on_batch_end': 0, - 'on_epoch_begin': 0, - 'on_epoch_end': 0, - 'on_test_begin': 0, - 'on_test_end': 0, - 'on_train_batch_begin': 0, - 'on_train_batch_end': 0, - 'on_train_begin': 0, - 'on_train_end': 0, - }) - - -def test_TerminateOnNaN(): - np.random.seed(1337) - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - cbks = [callbacks.TerminateOnNaN()] - model = Sequential() - initializer = initializers.Constant(value=1e5) - for _ in range(5): - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu', - kernel_initializer=initializer)) - model.add(Dense(num_classes, activation='linear')) - model.compile(loss='mean_squared_error', - optimizer='rmsprop') - - # case 1 fit - history = model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=20) - loss = history.history['loss'] - assert len(loss) == 1 - assert loss[0] == np.inf - - history = model.fit_generator(data_generator(X_train, y_train, batch_size), - len(X_train), - validation_data=(X_test, y_test), - callbacks=cbks, - epochs=20) - loss = history.history['loss'] - assert len(loss) == 1 - assert loss[0] == np.inf or np.isnan(loss[0]) - - -def test_stop_training_csv(tmpdir): - np.random.seed(1337) - fp = str(tmpdir / 'test.csv') - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - cbks = [callbacks.TerminateOnNaN(), callbacks.CSVLogger(fp)] - model = Sequential() - for _ in range(5): - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='linear')) - model.compile(loss='mean_squared_error', - optimizer='rmsprop') - - def data_generator(): - i = 0 - max_batch_index = len(X_train) // batch_size - tot = 0 - while 1: - if tot > 3 * len(X_train): - yield (np.ones([batch_size, input_dim]) * np.nan, - np.ones([batch_size, num_classes]) * np.nan) - else: - yield (X_train[i * batch_size: (i + 1) * batch_size], - y_train[i * batch_size: (i + 1) * batch_size]) - i += 1 - tot += 1 - i = i % max_batch_index - - history = model.fit_generator(data_generator(), - len(X_train) // batch_size, - validation_data=(X_test, y_test), - callbacks=cbks, - epochs=20) - loss = history.history['loss'] - assert len(loss) > 1 - assert loss[-1] == np.inf or np.isnan(loss[-1]) - - values = [] - with open(fp) as f: - for x in reader(f): - values.append(x) - - assert 'nan' in values[-1], 'The last epoch was not logged.' - os.remove(fp) - - -def test_ModelCheckpoint(tmpdir): - np.random.seed(1337) - filepath = str(tmpdir / 'checkpoint.h5') - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - # case 1 - monitor = 'val_loss' - save_best_only = False - mode = 'auto' - - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - - cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor, - save_best_only=save_best_only, mode=mode)] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=1) - assert os.path.isfile(filepath) - os.remove(filepath) - - # case 2 - mode = 'min' - cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor, - save_best_only=save_best_only, mode=mode)] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=1) - assert os.path.isfile(filepath) - os.remove(filepath) - - # case 3 - mode = 'max' - monitor = 'val_acc' - cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor, - save_best_only=save_best_only, mode=mode)] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=1) - assert os.path.isfile(filepath) - os.remove(filepath) - - # case 4 - save_best_only = True - cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor, - save_best_only=save_best_only, mode=mode)] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=1) - assert os.path.isfile(filepath) - os.remove(filepath) - - # case 5 - save_best_only = False - period = 2 - mode = 'auto' - filepath = 'checkpoint.{epoch:02d}.h5' - cbks = [callbacks.ModelCheckpoint(filepath, monitor=monitor, - save_best_only=save_best_only, mode=mode, - period=period)] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=4) - assert os.path.isfile(filepath.format(epoch=2)) - assert os.path.isfile(filepath.format(epoch=4)) - assert not os.path.exists(filepath.format(epoch=1)) - assert not os.path.exists(filepath.format(epoch=3)) - os.remove(filepath.format(epoch=2)) - os.remove(filepath.format(epoch=4)) - assert not tmpdir.listdir() - - -def test_EarlyStopping(): - np.random.seed(1337) - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - mode = 'max' - monitor = 'val_acc' - patience = 0 - cbks = [callbacks.EarlyStopping( - patience=patience, monitor=monitor, mode=mode)] - history = model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=20) - - mode = 'auto' - monitor = 'val_acc' - patience = 2 - cbks = [callbacks.EarlyStopping( - patience=patience, monitor=monitor, mode=mode)] - history = model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=20) - - -def test_EarlyStopping_reuse(): - np.random.seed(1337) - patience = 3 - data = np.random.random((100, 1)) - labels = np.where(data > 0.5, 1, 0) - model = Sequential(( - Dense(1, input_dim=1, activation='relu'), - Dense(1, activation='sigmoid'), - )) - model.compile(optimizer='sgd', loss='binary_crossentropy', - metrics=['accuracy']) - stopper = callbacks.EarlyStopping(monitor='acc', patience=patience) - weights = model.get_weights() - - hist = model.fit(data, labels, callbacks=[stopper], epochs=20) - assert len(hist.epoch) >= patience - - # This should allow training to go for at least `patience` epochs - model.set_weights(weights) - hist = model.fit(data, labels, callbacks=[stopper], epochs=20) - assert len(hist.epoch) >= patience - - -def test_EarlyStopping_patience(): - class DummyModel(object): - def __init__(self): - self.stop_training = False - - def get_weights(self): - return [] - - def set_weights(self, weights): - pass - - early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=2) - early_stop.model = DummyModel() - - losses = [0.0860, 0.1096, 0.1040, 0.1019] - - # Should stop after epoch 3, - # as the loss has not improved after patience=2 epochs. - epochs_trained = 0 - early_stop.on_train_begin() - - for epoch in range(len(losses)): - epochs_trained += 1 - early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) - - if early_stop.model.stop_training: - break - - assert epochs_trained == 3 - - -def test_EarlyStopping_baseline(): - class DummyModel(object): - def __init__(self): - self.stop_training = False - - def get_weights(self): - return [] - - def set_weights(self, weights): - pass - - def baseline_tester(acc_levels): - early_stop = callbacks.EarlyStopping(monitor='val_acc', baseline=0.75, - patience=2) - early_stop.model = DummyModel() - epochs_trained = 0 - early_stop.on_train_begin() - for epoch in range(len(acc_levels)): - epochs_trained += 1 - early_stop.on_epoch_end(epoch, logs={'val_acc': acc_levels[epoch]}) - if early_stop.model.stop_training: - break - return epochs_trained - - acc_levels = [0.55, 0.76, 0.81, 0.81] - baseline_met = baseline_tester(acc_levels) - acc_levels = [0.55, 0.74, 0.81, 0.81] - baseline_not_met = baseline_tester(acc_levels) - - # All epochs should run because baseline was met in second epoch - assert baseline_met == 4 - # Baseline was not met by second epoch and should stop - assert baseline_not_met == 2 - - -def test_EarlyStopping_final_weights(): - class DummyModel(object): - def __init__(self): - self.stop_training = False - self.weights = -1 - - def get_weights(self): - return self.weights - - def set_weights(self, weights): - self.weights = weights - - def set_weight_to_epoch(self, epoch): - self.weights = epoch - - early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=2) - early_stop.model = DummyModel() - - losses = [0.2, 0.15, 0.1, 0.11, 0.12] - - epochs_trained = 0 - early_stop.on_train_begin() - - for epoch in range(len(losses)): - epochs_trained += 1 - early_stop.model.set_weight_to_epoch(epoch=epoch) - early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) - - if early_stop.model.stop_training: - break - - # The best configuration is in the epoch 2 (loss = 0.1000), - # so with patience=2 we need to end up at epoch 4 - assert early_stop.model.get_weights() == 4 - - -def test_EarlyStopping_final_weights_when_restoring_model_weights(): - class DummyModel(object): - def __init__(self): - self.stop_training = False - self.weights = -1 - - def get_weights(self): - return self.weights - - def set_weights(self, weights): - self.weights = weights - - def set_weight_to_epoch(self, epoch): - self.weights = epoch - - early_stop = callbacks.EarlyStopping(monitor='val_loss', patience=2, - restore_best_weights=True) - early_stop.model = DummyModel() - - losses = [0.2, 0.15, 0.1, 0.11, 0.12] - - # The best configuration is in the epoch 2 (loss = 0.1000). - - epochs_trained = 0 - early_stop.on_train_begin() - - for epoch in range(len(losses)): - epochs_trained += 1 - early_stop.model.set_weight_to_epoch(epoch=epoch) - early_stop.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) - - if early_stop.model.stop_training: - break - - # The best configuration is in epoch 2 (loss = 0.1000), - # and while patience = 2, we're restoring the best weights, - # so we end up at the epoch with the best weights, i.e. epoch 2 - assert early_stop.model.get_weights() == 2 - - -def test_LearningRateScheduler(): - np.random.seed(1337) - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - - cbks = [callbacks.LearningRateScheduler(lambda x: 1. / (1. + x))] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=5) - assert (float(K.get_value(model.optimizer.lr)) - 0.2) < K.epsilon() - - -def test_ReduceLROnPlateau(): - np.random.seed(1337) - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - def make_model(): - np.random.seed(1337) - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='softmax')) - - model.compile(loss='categorical_crossentropy', - optimizer=optimizers.SGD(lr=0.1), - metrics=['accuracy']) - return model - - model = make_model() - - # This should reduce the LR after the first epoch (due to high epsilon). - cbks = [callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, - min_delta=10, patience=1, cooldown=5)] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=5, verbose=2) - assert_allclose(float(K.get_value(model.optimizer.lr)), - 0.01, atol=K.epsilon()) - - model = make_model() - cbks = [callbacks.ReduceLROnPlateau(monitor='val_loss', factor=0.1, - min_delta=0, patience=1, cooldown=5)] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=5, verbose=2) - assert_allclose(float(K.get_value(model.optimizer.lr)), - 0.1, atol=K.epsilon()) - - -def test_ReduceLROnPlateau_patience(): - class DummyOptimizer(object): - def __init__(self): - self.lr = K.variable(1.0) - - class DummyModel(object): - def __init__(self): - self.optimizer = DummyOptimizer() - - reduce_on_plateau = callbacks.ReduceLROnPlateau(monitor='val_loss', - patience=2) - reduce_on_plateau.model = DummyModel() - - losses = [0.0860, 0.1096, 0.1040] - lrs = [] - - for epoch in range(len(losses)): - reduce_on_plateau.on_epoch_end(epoch, logs={'val_loss': losses[epoch]}) - lrs.append(K.get_value(reduce_on_plateau.model.optimizer.lr)) - - # The learning rates should be 1.0 except the last one - assert all([lr == 1.0 for lr in lrs[:-1]]) and lrs[-1] < 1.0 - - -def test_ReduceLROnPlateau_backwards_compatibility(): - import warnings - with warnings.catch_warnings(record=True) as ws: - reduce_on_plateau = callbacks.ReduceLROnPlateau(epsilon=1e-13) - # Check if warnings are disabled - if os.environ.get("PYTHONWARNINGS") != "ignore": - assert "`epsilon` argument is deprecated" in str(ws[0].message) - assert not hasattr(reduce_on_plateau, 'epsilon') - assert hasattr(reduce_on_plateau, 'min_delta') - assert reduce_on_plateau.min_delta == 1e-13 - - -def test_CSVLogger(tmpdir): - np.random.seed(1337) - filepath = str(tmpdir / 'log.tsv') - sep = '\t' - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - def make_model(): - np.random.seed(1337) - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='softmax')) - - model.compile(loss='categorical_crossentropy', - optimizer=optimizers.SGD(lr=0.1), - metrics=['accuracy']) - return model - - # case 1, create new file with defined separator - model = make_model() - cbks = [callbacks.CSVLogger(filepath, separator=sep)] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=1) - - assert os.path.isfile(filepath) - with open(filepath) as csvfile: - dialect = Sniffer().sniff(csvfile.read()) - assert dialect.delimiter == sep - del model - del cbks - - # case 2, append data to existing file, skip header - model = make_model() - cbks = [callbacks.CSVLogger(filepath, separator=sep, append=True)] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=1) - - # case 3, reuse of CSVLogger object - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=2) - - import re - with open(filepath) as csvfile: - list_lines = csvfile.readlines() - for line in list_lines: - assert line.count(sep) == 4 - assert len(list_lines) == 5 - output = " ".join(list_lines) - assert len(re.findall('epoch', output)) == 1 - - os.remove(filepath) - assert not tmpdir.listdir() - - -@pytest.mark.parametrize('update_freq', ['batch', 'epoch', 9]) -def test_TensorBoard(tmpdir, update_freq): - np.random.seed(np.random.randint(1, 1e7)) - filepath = str(tmpdir / 'logs') - - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - class DummyStatefulMetric(Layer): - - def __init__(self, name='dummy_stateful_metric', **kwargs): - super(DummyStatefulMetric, self).__init__(name=name, **kwargs) - self.stateful = True - self.state = K.variable(value=0, dtype='int32') - - def reset_states(self): - pass - - def __call__(self, y_true, y_pred): - return self.state - - inp = Input((input_dim,)) - hidden = Dense(num_hidden, activation='relu')(inp) - hidden = Dropout(0.1)(hidden) - hidden = BatchNormalization()(hidden) - output = Dense(num_classes, activation='softmax')(hidden) - model = Model(inputs=inp, outputs=output) - model.compile(loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy', DummyStatefulMetric()]) - - # we must generate new callbacks for each test, as they aren't stateless - def callbacks_factory(histogram_freq, embeddings_freq=1, write_images=True, - write_grads=True): - return [callbacks.TensorBoard(log_dir=filepath, - histogram_freq=histogram_freq, - write_images=write_images, - write_grads=write_grads, - embeddings_freq=embeddings_freq, - embeddings_layer_names=['dense_1'], - embeddings_data=X_test, - batch_size=5, - update_freq=update_freq)] - - # fit without validation data - model.fit(X_train, y_train, batch_size=batch_size, - callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0), - epochs=2) - - # fit with validation data and accuracy - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), - callbacks=callbacks_factory(histogram_freq=0, write_images=False, - write_grads=False), - epochs=2) - - # fit generator without validation data - train_generator = data_generator(X_train, y_train, batch_size) - model.fit_generator(train_generator, len(X_train), epochs=2, - callbacks=callbacks_factory(histogram_freq=0, - write_images=False, - write_grads=False, - embeddings_freq=0)) - - # fit generator with validation data and accuracy - train_generator = data_generator(X_train, y_train, batch_size) - model.fit_generator(train_generator, len(X_train), epochs=2, - validation_data=(X_test, y_test), - callbacks=callbacks_factory(histogram_freq=1, - write_images=False, - write_grads=False)) - - assert os.path.isdir(filepath) - shutil.rmtree(filepath) - assert not tmpdir.listdir() - - -@pytest.mark.skipif((K.backend() != 'tensorflow'), - reason='Requires TensorFlow backend') -def test_TensorBoard_histogram_freq_must_have_validation_data(tmpdir): - np.random.seed(np.random.randint(1, 1e7)) - filepath = str(tmpdir / 'logs') - - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - inp = Input((input_dim,)) - hidden = Dense(num_hidden, activation='relu')(inp) - hidden = Dropout(0.1)(hidden) - output = Dense(num_classes, activation='softmax')(hidden) - model = Model(inputs=inp, outputs=output) - model.compile(loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - - # we must generate new callbacks for each test, as they aren't stateless - def callbacks_factory(histogram_freq, embeddings_freq=1, write_images=True, - write_grads=True): - return [callbacks.TensorBoard(log_dir=filepath, - histogram_freq=histogram_freq, - write_images=write_images, - write_grads=write_grads, - embeddings_freq=embeddings_freq, - embeddings_layer_names=['dense_1'], - embeddings_data=X_test, - batch_size=5)] - - # fit without validation data should raise ValueError if histogram_freq > 0 - with pytest.raises(ValueError) as raised_exception: - model.fit(X_train, y_train, batch_size=batch_size, - callbacks=callbacks_factory(histogram_freq=1), epochs=3) - assert 'validation_data must be provided' in str(raised_exception.value) - - train_generator = data_generator(X_train, y_train, batch_size) - validation_generator = data_generator(X_test, y_test, batch_size) - - # fit generator without validation data should raise ValueError if - # histogram_freq > 0 - with pytest.raises(ValueError) as raised_exception: - model.fit_generator(train_generator, - len(X_train), epochs=2, - callbacks=callbacks_factory(histogram_freq=1, - write_images=False, - write_grads=False)) - assert 'validation_data must be provided' in str(raised_exception.value) - - # fit generator with validation data generator should raise ValueError if - # histogram_freq > 0 - with pytest.raises(ValueError) as raised_exception: - model.fit_generator(train_generator, len(X_train), epochs=2, - validation_data=validation_generator, - validation_steps=1, - callbacks=callbacks_factory(histogram_freq=1, - write_images=False, - write_grads=False)) - assert 'validation_data must be provided' in str(raised_exception.value) - - -def test_TensorBoard_multi_input_output(tmpdir): - np.random.seed(np.random.randint(1, 1e7)) - filepath = str(tmpdir / 'logs') - - (X_train, y_train), (X_test, y_test) = get_data_callbacks( - input_shape=(input_dim, input_dim)) - - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - inp1 = Input((input_dim, input_dim)) - inp2 = Input((input_dim, input_dim)) - inp_3d = add([inp1, inp2]) - inp_2d = GlobalAveragePooling1D()(inp_3d) - # test a layer with a list of output tensors - inp_pair = Lambda(lambda x: x)([inp_3d, inp_2d]) - hidden = dot(inp_pair, axes=-1) - hidden = Dense(num_hidden, activation='relu')(hidden) - hidden = Dropout(0.1)(hidden) - output1 = Dense(num_classes, activation='softmax')(hidden) - output2 = Dense(num_classes, activation='softmax')(hidden) - model = Model(inputs=[inp1, inp2], outputs=[output1, output2]) - model.compile(loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - - # we must generate new callbacks for each test, as they aren't stateless - def callbacks_factory(histogram_freq, embeddings_freq=1, write_images=True, - write_grads=True): - return [callbacks.TensorBoard(log_dir=filepath, - histogram_freq=histogram_freq, - write_images=write_images, - write_grads=write_grads, - embeddings_freq=embeddings_freq, - embeddings_layer_names=['dense_1'], - embeddings_data=[X_test] * 2, - batch_size=5)] - - # fit without validation data - model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size, - callbacks=callbacks_factory(histogram_freq=0, embeddings_freq=0), - epochs=3) - - # fit with validation data and accuracy - model.fit([X_train] * 2, [y_train] * 2, batch_size=batch_size, - validation_data=([X_test] * 2, [y_test] * 2), - callbacks=callbacks_factory(histogram_freq=1, write_images=False, - write_grads=False), - epochs=2) - - train_generator = data_generator([X_train] * 2, [y_train] * 2, batch_size) - - # fit generator without validation data - model.fit_generator(train_generator, len(X_train), epochs=2, - callbacks=callbacks_factory(histogram_freq=0, - embeddings_freq=0, - write_images=False, - write_grads=False)) - - # fit generator with validation data and accuracy - model.fit_generator(train_generator, len(X_train), epochs=2, - validation_data=([X_test] * 2, [y_test] * 2), - callbacks=callbacks_factory(histogram_freq=1, - write_images=False, - write_grads=False)) - - assert os.path.isdir(filepath) - shutil.rmtree(filepath) - assert not tmpdir.listdir() - - -def test_TensorBoard_convnet(tmpdir): - np.random.seed(np.random.randint(1, 1e7)) - filepath = str(tmpdir / 'logs') - - input_shape = (16, 16, 3) - (x_train, y_train), (x_test, y_test) = get_data_callbacks( - num_train=500, - num_test=200, - input_shape=input_shape) - y_train = np_utils.to_categorical(y_train) - y_test = np_utils.to_categorical(y_test) - - model = Sequential([ - Conv2D(filters=8, kernel_size=3, - activation='relu', - input_shape=input_shape), - MaxPooling2D(pool_size=2), - Conv2D(filters=4, kernel_size=(3, 3), - activation='relu', padding='same'), - BatchNormalization(), - GlobalAveragePooling2D(), - Dense(num_classes, activation='softmax') - ]) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - tsb = callbacks.TensorBoard(log_dir=filepath, histogram_freq=1, - write_images=True, write_grads=True, - batch_size=16) - cbks = [tsb] - model.summary() - history = model.fit(x_train, y_train, epochs=2, batch_size=16, - validation_data=(x_test, y_test), - callbacks=cbks, - verbose=0) - assert os.path.isdir(filepath) - shutil.rmtree(filepath) - assert not tmpdir.listdir() - - -def test_TensorBoard_display_float_from_logs(tmpdir): - filepath = str(tmpdir / 'logs') - - input_shape = (3,) - (x_train, y_train), _ = get_data_callbacks(num_train=10, - num_test=0, - input_shape=input_shape) - y_train = np_utils.to_categorical(y_train) - - model = Sequential([ - Dense(num_classes, activation='softmax') - ]) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop') - - class CustomCallback(callbacks.Callback): - - def on_epoch_end(self, epoch, logs=None): - logs['test'] = 0. - - tsb = callbacks.TensorBoard(log_dir=filepath, - batch_size=16) - cbks = [CustomCallback(), tsb] - model.fit(x_train, y_train, epochs=2, batch_size=16, - callbacks=cbks, - verbose=0) - assert os.path.isdir(filepath) - shutil.rmtree(filepath) - assert not tmpdir.listdir() - - -def test_CallbackValData(): - np.random.seed(1337) - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - - cbk = callbacks.LambdaCallback(on_train_end=lambda x: 1) - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=[cbk], epochs=1) - - cbk2 = callbacks.LambdaCallback(on_train_end=lambda x: 1) - train_generator = data_generator(X_train, y_train, batch_size) - model.fit_generator(train_generator, len(X_train), epochs=1, - validation_data=(X_test, y_test), - callbacks=[cbk2]) - - # callback validation data should always have x, y, and sample weights - assert len(cbk.validation_data) == len(cbk2.validation_data) == 3 - assert cbk.validation_data[0] is cbk2.validation_data[0] - assert cbk.validation_data[1] is cbk2.validation_data[1] - assert cbk.validation_data[2].shape == cbk2.validation_data[2].shape - - -def test_LambdaCallback(): - np.random.seed(1337) - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - - # Start an arbitrary process that should run during model training and - # be terminated after training has completed. - def f(): - while True: - pass - - p = multiprocessing.Process(target=f) - p.start() - cleanup_callback = callbacks.LambdaCallback( - on_train_end=lambda logs: p.terminate()) - - cbks = [cleanup_callback] - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=5) - p.join() - assert not p.is_alive() - - -def test_TensorBoard_with_ReduceLROnPlateau(tmpdir): - import shutil - np.random.seed(np.random.randint(1, 1e7)) - filepath = str(tmpdir / 'logs') - - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='softmax')) - model.compile(loss='binary_crossentropy', - optimizer='sgd', - metrics=['accuracy']) - - cbks = [ - callbacks.ReduceLROnPlateau( - monitor='val_loss', - factor=0.5, - patience=4, - verbose=1), - callbacks.TensorBoard( - log_dir=filepath)] - - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=2) - - assert os.path.isdir(filepath) - shutil.rmtree(filepath) - assert not tmpdir.listdir() - - -def tests_RemoteMonitor(): - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - cbks = [callbacks.RemoteMonitor()] - - with patch('requests.post'): - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=1) - - -def tests_RemoteMonitorWithJsonPayload(): - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim, activation='relu')) - model.add(Dense(num_classes, activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop', - metrics=['accuracy']) - cbks = [callbacks.RemoteMonitor(send_as_json=True)] - - with patch('requests.post'): - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), callbacks=cbks, epochs=1) - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import absolute_import -from __future__ import print_function -import pytest -import os -import numpy as np -from numpy.testing import assert_allclose - -from keras import backend as K -import keras -from keras.models import Sequential -from keras.layers import Dense, Activation -from keras.utils import np_utils -from keras.utils.test_utils import get_test_data -from keras.models import model_from_json, model_from_yaml -from keras import losses -from keras.engine.training_utils import make_batches - - -input_dim = 16 -num_hidden = 8 -num_classes = 4 -batch_size = 32 -epochs = 1 - - -@pytest.fixture -def in_tmpdir(tmpdir): - """Runs a function in a temporary directory. - - Checks that the directory is empty afterwards. - """ - with tmpdir.as_cwd(): - yield None - assert not tmpdir.listdir() - - -def test_sequential_pop(): - model = Sequential() - model.add(Dense(num_hidden, input_dim=input_dim)) - model.add(Dense(num_classes)) - model.compile(loss='mse', optimizer='sgd') - x = np.random.random((batch_size, input_dim)) - y = np.random.random((batch_size, num_classes)) - model.fit(x, y, epochs=1) - model.pop() - assert len(model.layers) == 1 - assert model.output_shape == (None, num_hidden) - model.compile(loss='mse', optimizer='sgd') - y = np.random.random((batch_size, num_hidden)) - model.fit(x, y, epochs=1) - - -def _get_test_data(): - np.random.seed(1234) - - train_samples = 100 - test_samples = 50 - - (x_train, y_train), (x_test, y_test) = get_test_data(num_train=train_samples, - num_test=test_samples, - input_shape=( - input_dim,), - classification=True, - num_classes=num_classes) - y_test = np_utils.to_categorical(y_test) - y_train = np_utils.to_categorical(y_train) - return (x_train, y_train), (x_test, y_test) - - -def test_sequential_fit_generator(): - (x_train, y_train), (x_test, y_test) = _get_test_data() - - def data_generator(train): - if train: - max_batch_index = len(x_train) // batch_size - else: - max_batch_index = len(x_test) // batch_size - i = 0 - while 1: - if train: - yield (x_train[i * batch_size: (i + 1) * batch_size], - y_train[i * batch_size: (i + 1) * batch_size]) - else: - yield (x_test[i * batch_size: (i + 1) * batch_size], - y_test[i * batch_size: (i + 1) * batch_size]) - i += 1 - i = i % max_batch_index - - model = Sequential() - model.add(Dense(num_hidden, input_shape=(input_dim,))) - model.add(Activation('relu')) - model.add(Dense(num_classes)) - model.pop() - model.add(Dense(num_classes)) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - - model.fit_generator(data_generator(True), 5, epochs) - model.fit_generator(data_generator(True), 5, epochs, - validation_data=(x_test, y_test)) - model.fit_generator(data_generator(True), 5, epochs, - validation_data=data_generator(False), - validation_steps=3) - model.fit_generator(data_generator(True), 5, epochs, max_queue_size=2) - model.evaluate(x_train, y_train) - - -def test_sequential(in_tmpdir): - (x_train, y_train), (x_test, y_test) = _get_test_data() - - # TODO: factor out - def data_generator(x, y, batch_size=50): - index_array = np.arange(len(x)) - while 1: - batches = make_batches(len(x_test), batch_size) - for batch_index, (batch_start, batch_end) in enumerate(batches): - batch_ids = index_array[batch_start:batch_end] - x_batch = x[batch_ids] - y_batch = y[batch_ids] - yield (x_batch, y_batch) - - model = Sequential() - model.add(Dense(num_hidden, input_shape=(input_dim,))) - model.add(Activation('relu')) - model.add(Dense(num_classes)) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - - model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, - validation_data=(x_test, y_test)) - model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2, - validation_split=0.1) - model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs, verbose=0) - model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, - shuffle=False) - - model.train_on_batch(x_train[:32], y_train[:32]) - - loss = model.evaluate(x_test, y_test) - - prediction = model.predict_generator(data_generator(x_test, y_test), 1, - max_queue_size=2, verbose=1) - gen_loss = model.evaluate_generator(data_generator(x_test, y_test, 50), 1, - max_queue_size=2) - pred_loss = K.eval(K.mean(losses.get(model.loss)(K.variable(y_test), - K.variable(prediction)))) - - assert(np.isclose(pred_loss, loss)) - assert(np.isclose(gen_loss, loss)) - - model.predict(x_test, verbose=0) - model.predict_classes(x_test, verbose=0) - model.predict_proba(x_test, verbose=0) - - fname = 'test_sequential_temp.h5' - model.save_weights(fname, overwrite=True) - model = Sequential() - model.add(Dense(num_hidden, input_shape=(input_dim,))) - model.add(Activation('relu')) - model.add(Dense(num_classes)) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - model.load_weights(fname) - os.remove(fname) - - nloss = model.evaluate(x_test, y_test, verbose=0) - assert(loss == nloss) - - # Test serialization - config = model.get_config() - assert 'name' in config - new_model = Sequential.from_config(config) - assert new_model.weights # Model should be built. - - model.summary() - json_str = model.to_json() - model_from_json(json_str) - - yaml_str = model.to_yaml() - model_from_yaml(yaml_str) - - -def test_nested_sequential(in_tmpdir): - (x_train, y_train), (x_test, y_test) = _get_test_data() - - inner = Sequential() - inner.add(Dense(num_hidden, input_shape=(input_dim,))) - inner.add(Activation('relu')) - inner.add(Dense(num_classes)) - - middle = Sequential() - middle.add(inner) - - model = Sequential() - model.add(middle) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - - model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, - validation_data=(x_test, y_test)) - model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=2, - validation_split=0.1) - model.fit(x_train, y_train, batch_size=batch_size, - epochs=epochs, verbose=0) - model.fit(x_train, y_train, batch_size=batch_size, epochs=epochs, verbose=1, - shuffle=False) - - model.train_on_batch(x_train[:32], y_train[:32]) - - loss = model.evaluate(x_test, y_test, verbose=0) - - model.predict(x_test, verbose=0) - model.predict_classes(x_test, verbose=0) - model.predict_proba(x_test, verbose=0) - - fname = 'test_nested_sequential_temp.h5' - model.save_weights(fname, overwrite=True) - - inner = Sequential() - inner.add(Dense(num_hidden, input_shape=(input_dim,))) - inner.add(Activation('relu')) - inner.add(Dense(num_classes)) - - middle = Sequential() - middle.add(inner) - - model = Sequential() - model.add(middle) - model.add(Activation('softmax')) - model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - model.load_weights(fname) - os.remove(fname) - - nloss = model.evaluate(x_test, y_test, verbose=0) - assert(loss == nloss) - - # Test serialization - config = model.get_config() - Sequential.from_config(config) - - model.summary() - json_str = model.to_json() - model_from_json(json_str) - - yaml_str = model.to_yaml() - model_from_yaml(yaml_str) - - -def test_sequential_count_params(): - input_dim = 20 - num_units = 10 - num_classes = 2 - - n = input_dim * num_units + num_units - n += num_units * num_units + num_units - n += num_units * num_classes + num_classes - - model = Sequential() - model.add(Dense(num_units, input_shape=(input_dim,))) - model.add(Dense(num_units)) - model.add(Dense(num_classes)) - model.add(Activation('softmax')) - model.build() - - assert(n == model.count_params()) - - model.compile('sgd', 'binary_crossentropy') - assert(n == model.count_params()) - - -def test_nested_sequential_trainability(): - input_dim = 20 - num_units = 10 - num_classes = 2 - - inner_model = Sequential() - inner_model.add(Dense(num_units, input_shape=(input_dim,))) - - model = Sequential() - model.add(inner_model) - model.add(Dense(num_classes)) - - assert len(model.trainable_weights) == 4 - inner_model.trainable = False - assert len(model.trainable_weights) == 2 - inner_model.trainable = True - assert len(model.trainable_weights) == 4 - - -def test_rebuild_model(): - model = Sequential() - model.add(Dense(128, input_shape=(784,))) - model.add(Dense(64)) - assert(model.get_layer(index=-1).output_shape == (None, 64)) - - model.add(Dense(32)) - assert(model.get_layer(index=-1).output_shape == (None, 32)) - - -def test_clone_functional_model(): - val_a = np.random.random((10, 4)) - val_b = np.random.random((10, 4)) - val_out = np.random.random((10, 4)) - - input_a = keras.Input(shape=(4,)) - input_b = keras.Input(shape=(4,)) - dense_1 = keras.layers.Dense(4) - dense_2 = keras.layers.Dense(4) - - x_a = dense_1(input_a) - x_a = keras.layers.Dropout(0.5)(x_a) - x_a = keras.layers.BatchNormalization()(x_a) - x_b = dense_1(input_b) - x_a = dense_2(x_a) - outputs = keras.layers.add([x_a, x_b]) - model = keras.models.Model([input_a, input_b], outputs) - - if K.backend() == 'tensorflow': - # Everything should work in a new session. - K.clear_session() - - # With placeholder creation - new_model = keras.models.clone_model(model) - new_model.compile('rmsprop', 'mse') - new_model.train_on_batch([val_a, val_b], val_out) - - # On top of new tensors - input_a = keras.Input(shape=(4,), name='a') - input_b = keras.Input(shape=(4,), name='b') - new_model = keras.models.clone_model( - model, input_tensors=[input_a, input_b]) - new_model.compile('rmsprop', 'mse') - new_model.train_on_batch([val_a, val_b], val_out) - - # On top of new, non-Keras tensors - input_a = keras.backend.variable(val_a) - input_b = keras.backend.variable(val_b) - new_model = keras.models.clone_model( - model, input_tensors=[input_a, input_b]) - new_model.compile('rmsprop', 'mse') - new_model.train_on_batch(None, val_out) - - -def test_clone_functional_model_with_multi_outputs(): - input_layer = keras.Input(shape=(4,)) - - # Layer with single input and multiple outputs - layer1 = keras.layers.Lambda(lambda x: [x + 1, x], - lambda shapes: [shapes, shapes]) - x_a, x_b = layer1(input_layer) - - class SwapLayer(keras.layers.Layer): - def call(self, inputs, **kwargs): - return [inputs[1], inputs[0]] - - def compute_output_shape(self, input_shape): - return [input_shape[1], input_shape[0]] - - # Layer with multiple inputs and outputs - x_a, x_b = SwapLayer()([x_a, x_b]) - model = keras.Model(inputs=[input_layer], outputs=[x_a, x_b]) - new_model = keras.models.clone_model(model) - - x_test = np.random.random((10, 4)) - pred_a, pred_b = model.predict(x_test) - pred_new_a, pred_new_b = new_model.predict(x_test) - assert(pred_a.all() == pred_new_a.all()) - assert(pred_b.all() == pred_new_b.all()) - - -def test_clone_sequential_model(): - val_a = np.random.random((10, 4)) - val_out = np.random.random((10, 4)) - - model = keras.models.Sequential() - model.add(keras.layers.Dense(4, input_shape=(4,))) - model.add(keras.layers.BatchNormalization()) - model.add(keras.layers.Dropout(0.5)) - model.add(keras.layers.Dense(4)) - - if K.backend() == 'tensorflow': - # Everything should work in a new session. - K.clear_session() - - # With placeholder creation - new_model = keras.models.clone_model(model) - new_model.compile('rmsprop', 'mse') - new_model.train_on_batch(val_a, val_out) - - # On top of new tensor - input_a = keras.Input(shape=(4,)) - new_model = keras.models.clone_model( - model, input_tensors=input_a) - new_model.compile('rmsprop', 'mse') - new_model.train_on_batch(val_a, val_out) - - # On top of new, non-Keras tensor - input_a = keras.backend.variable(val_a) - new_model = keras.models.clone_model( - model, input_tensors=input_a) - new_model.compile('rmsprop', 'mse') - new_model.train_on_batch(None, val_out) - - -def test_sequential_update_disabling(): - val_a = np.random.random((10, 4)) - val_out = np.random.random((10, 4)) - - model = keras.models.Sequential() - model.add(keras.layers.BatchNormalization(input_shape=(4,))) - - model.trainable = False - assert not model.updates - - model.compile('sgd', 'mse') - assert not model.updates - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert_allclose(x1, x2, atol=1e-7) - - model.trainable = True - model.compile('sgd', 'mse') - assert model.updates - - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert np.abs(np.sum(x1 - x2)) > 1e-5 - - -def test_sequential_deferred_build(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(3)) - model.add(keras.layers.Dense(3)) - model.compile('sgd', 'mse') - - assert model.built is False - assert len(model.layers) == 2 - assert len(model.weights) == 0 - - model.train_on_batch( - np.random.random((2, 4)), np.random.random((2, 3))) - - assert model.built is True - assert len(model.layers) == 2 - assert len(model.weights) == 4 - - # Test serialization - config = model.get_config() - assert 'name' in config - new_model = Sequential.from_config(config) - assert new_model.built is True - assert len(new_model.layers) == 2 - assert len(new_model.weights) == 4 - - -def test_nested_sequential_deferred_build(): - inner_model = keras.models.Sequential() - inner_model.add(keras.layers.Dense(3)) - inner_model.add(keras.layers.Dense(3)) - - model = keras.models.Sequential() - model.add(inner_model) - model.add(keras.layers.Dense(5)) - model.compile('sgd', 'mse') - - assert inner_model.built is False - assert len(inner_model.layers) == 2 - assert len(inner_model.weights) == 0 - assert model.built is False - assert len(model.layers) == 2 - assert len(model.weights) == 0 - - model.train_on_batch( - np.random.random((2, 4)), np.random.random((2, 5))) - - assert inner_model.built is True - assert len(inner_model.layers) == 2 - assert len(inner_model.weights) == 4 - assert model.built is True - assert len(model.layers) == 2 - assert len(model.weights) == 6 - - config = model.get_config() - new_model = keras.models.Sequential.from_config(config) - assert new_model.built is True - assert len(new_model.layers) == 2 - assert len(new_model.weights) == 6 - - new_inner_model = new_model.layers[0] - assert new_inner_model.built is True - assert len(new_inner_model.layers) == 2 - assert len(new_inner_model.weights) == 4 - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -from keras.preprocessing import image -from PIL import Image -import numpy as np -import os -import tempfile -import shutil - - -class TestImage(object): - - def setup_class(cls): - cls.img_w = cls.img_h = 20 - rgb_images = [] - gray_images = [] - for n in range(8): - bias = np.random.rand(cls.img_w, cls.img_h, 1) * 64 - variance = np.random.rand(cls.img_w, cls.img_h, 1) * (255 - 64) - imarray = np.random.rand(cls.img_w, cls.img_h, 3) * variance + bias - im = Image.fromarray(imarray.astype('uint8')).convert('RGB') - rgb_images.append(im) - - imarray = np.random.rand(cls.img_w, cls.img_h, 1) * variance + bias - im = Image.fromarray(imarray.astype( - 'uint8').squeeze()).convert('L') - gray_images.append(im) - - cls.all_test_images = [rgb_images, gray_images] - - def teardown_class(cls): - del cls.all_test_images - - def test_image_data_generator(self, tmpdir): - for test_images in self.all_test_images: - img_list = [] - for im in test_images: - img_list.append(image.img_to_array(im)[None, ...]) - - images = np.vstack(img_list) - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True) - generator.fit(images, augment=True) - - num_samples = images.shape[0] - for x, y in generator.flow(images, np.arange(num_samples), - shuffle=False, save_to_dir=str(tmpdir), - batch_size=3): - assert x.shape == images[:3].shape - assert list(y) == [0, 1, 2] - break - - # Test with sample weights - for x, y, w in generator.flow(images, np.arange(num_samples), - shuffle=False, - sample_weight=np.arange( - num_samples) + 1, - save_to_dir=str(tmpdir), - batch_size=3): - assert x.shape == images[:3].shape - assert list(y) == [0, 1, 2] - assert list(w) == [1, 2, 3] - break - - # Test with `shuffle=True` - for x, y in generator.flow(images, np.arange(num_samples), - shuffle=True, save_to_dir=str(tmpdir), - batch_size=3): - assert x.shape == images[:3].shape - # Check that the sequence is shuffled. - assert list(y) != [0, 1, 2] - break - - # Test without y - for x in generator.flow(images, None, - shuffle=True, save_to_dir=str(tmpdir), - batch_size=3): - assert type(x) is np.ndarray - assert x.shape == images[:3].shape - # Check that the sequence is shuffled. - break - - # Test with a single miscellaneous input data array - dsize = images.shape[0] - x_misc1 = np.random.random(dsize) - - for i, (x, y) in enumerate(generator.flow((images, x_misc1), - np.arange(dsize), - shuffle=False, batch_size=2)): - assert x[0].shape == images[:2].shape - assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all() - if i == 2: - break - - # Test with two miscellaneous inputs - x_misc2 = np.random.random((dsize, 3, 3)) - - for i, (x, y) in enumerate(generator.flow((images, [x_misc1, x_misc2]), - np.arange(dsize), - shuffle=False, batch_size=2)): - assert x[0].shape == images[:2].shape - assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all() - assert (x[2] == x_misc2[(i * 2):((i + 1) * 2)]).all() - if i == 2: - break - - # Test cases with `y = None` - x = generator.flow(images, None, batch_size=3).next() - assert type(x) is np.ndarray - assert x.shape == images[:3].shape - x = generator.flow((images, x_misc1), None, - batch_size=3, shuffle=False).next() - assert type(x) is list - assert x[0].shape == images[:3].shape - assert (x[1] == x_misc1[:3]).all() - x = generator.flow((images, [x_misc1, x_misc2]), None, - batch_size=3, shuffle=False).next() - assert type(x) is list - assert x[0].shape == images[:3].shape - assert (x[1] == x_misc1[:3]).all() - assert (x[2] == x_misc2[:3]).all() - - # Test some failure cases: - x_misc_err = np.random.random((dsize + 1, 3, 3)) - - with pytest.raises(ValueError) as e_info: - generator.flow((images, x_misc_err), - np.arange(dsize), batch_size=3) - assert 'All of the arrays in' in str(e_info.value) - - with pytest.raises(ValueError) as e_info: - generator.flow((images, x_misc1), - np.arange(dsize + 1), - batch_size=3) - assert '`x` (images tensor) and `y` (labels) ' in str(e_info.value) - - # Test `flow` behavior as Sequence - seq = generator.flow(images, np.arange(images.shape[0]), - shuffle=False, save_to_dir=str(tmpdir), - batch_size=3) - assert len(seq) == images.shape[0] // 3 + 1 - x, y = seq[0] - assert x.shape == images[:3].shape - assert list(y) == [0, 1, 2] - - # Test with `shuffle=True` - seq = generator.flow(images, np.arange(images.shape[0]), - shuffle=True, save_to_dir=str(tmpdir), - batch_size=3, seed=123) - x, y = seq[0] - # Check that the sequence is shuffled. - assert list(y) != [0, 1, 2] - - # `on_epoch_end` should reshuffle the sequence. - seq.on_epoch_end() - x2, y2 = seq[0] - assert list(y) != list(y2) - - def test_image_data_generator_with_split_value_error(self): - with pytest.raises(ValueError): - generator = image.ImageDataGenerator(validation_split=5) - - def test_image_data_generator_invalid_data(self): - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - data_format='channels_last') - # Test fit with invalid data - with pytest.raises(ValueError): - x = np.random.random((3, 10, 10)) - generator.fit(x) - - # Test flow with invalid data - with pytest.raises(ValueError): - x = np.random.random((32, 10, 10)) - generator.flow(np.arange(x.shape[0])) - - def test_image_data_generator_fit(self): - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - zoom_range=(0.2, 0.2), - data_format='channels_last') - # Test grayscale - x = np.random.random((32, 10, 10, 1)) - generator.fit(x) - # Test RBG - x = np.random.random((32, 10, 10, 3)) - generator.fit(x) - # Test more samples than dims - x = np.random.random((32, 4, 4, 1)) - generator.fit(x) - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - data_format='channels_first') - # Test grayscale - x = np.random.random((32, 1, 10, 10)) - generator.fit(x) - # Test RBG - x = np.random.random((32, 3, 10, 10)) - generator.fit(x) - # Test more samples than dims - x = np.random.random((32, 1, 4, 4)) - generator.fit(x) - - def test_directory_iterator(self, tmpdir): - num_classes = 2 - - # create folders and subfolders - paths = [] - for cl in range(num_classes): - class_directory = 'class-{}'.format(cl) - classpaths = [ - class_directory, - os.path.join(class_directory, 'subfolder-1'), - os.path.join(class_directory, 'subfolder-2'), - os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') - ] - for path in classpaths: - tmpdir.join(path).mkdir() - paths.append(classpaths) - - # save the images in the paths - count = 0 - filenames = [] - for test_images in self.all_test_images: - for im in test_images: - # rotate image class - im_class = count % num_classes - # rotate subfolders - classpaths = paths[im_class] - filename = os.path.join(classpaths[count % len(classpaths)], - 'image-{}.jpg'.format(count)) - filenames.append(filename) - im.save(str(tmpdir / filename)) - count += 1 - - # create iterator - generator = image.ImageDataGenerator() - dir_iterator = generator.flow_from_directory(str(tmpdir)) - - # check number of classes and images - assert len(dir_iterator.class_indices) == num_classes - assert len(dir_iterator.classes) == count - assert set(dir_iterator.filenames) == set(filenames) - - # Test invalid use cases - with pytest.raises(ValueError): - generator.flow_from_directory(str(tmpdir), color_mode='cmyk') - with pytest.raises(ValueError): - generator.flow_from_directory(str(tmpdir), class_mode='output') - - def preprocessing_function(x): - """This will fail if not provided by a Numpy array. - Note: This is made to enforce backward compatibility. - """ - - assert x.shape == (26, 26, 3) - assert type(x) is np.ndarray - - return np.zeros_like(x) - - # Test usage as Sequence - generator = image.ImageDataGenerator( - preprocessing_function=preprocessing_function) - dir_seq = generator.flow_from_directory(str(tmpdir), - target_size=(26, 26), - color_mode='rgb', - batch_size=3, - class_mode='categorical') - assert len(dir_seq) == count // 3 + 1 - x1, y1 = dir_seq[1] - assert x1.shape == (3, 26, 26, 3) - assert y1.shape == (3, num_classes) - x1, y1 = dir_seq[5] - assert (x1 == 0).all() - - with pytest.raises(ValueError): - x1, y1 = dir_seq[9] - - def test_directory_iterator_class_mode_input(self, tmpdir): - tmpdir.join('class-1').mkdir() - - # save the images in the paths - count = 0 - for test_images in self.all_test_images: - for im in test_images: - filename = str(tmpdir / 'class-1' / - 'image-{}.jpg'.format(count)) - im.save(filename) - count += 1 - - # create iterator - generator = image.ImageDataGenerator() - dir_iterator = generator.flow_from_directory(str(tmpdir), - class_mode='input') - batch = next(dir_iterator) - - # check if input and output have the same shape - assert(batch[0].shape == batch[1].shape) - # check if the input and output images are not the same numpy array - input_img = batch[0][0] - output_img = batch[1][0] - output_img[0][0][0] += 1 - assert(input_img[0][0][0] != output_img[0][0][0]) - - @pytest.mark.parametrize('validation_split,num_training', [ - (0.25, 12), - (0.40, 10), - (0.50, 8), - ]) - def test_directory_iterator_with_validation_split(self, validation_split, - num_training): - num_classes = 2 - tmp_folder = tempfile.mkdtemp(prefix='test_images') - - # create folders and subfolders - paths = [] - for cl in range(num_classes): - class_directory = 'class-{}'.format(cl) - classpaths = [ - class_directory, - os.path.join(class_directory, 'subfolder-1'), - os.path.join(class_directory, 'subfolder-2'), - os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') - ] - for path in classpaths: - os.mkdir(os.path.join(tmp_folder, path)) - paths.append(classpaths) - - # save the images in the paths - count = 0 - filenames = [] - for test_images in self.all_test_images: - for im in test_images: - # rotate image class - im_class = count % num_classes - # rotate subfolders - classpaths = paths[im_class] - filename = os.path.join(classpaths[count % len(classpaths)], - 'image-{}.jpg'.format(count)) - filenames.append(filename) - im.save(os.path.join(tmp_folder, filename)) - count += 1 - - # create iterator - generator = image.ImageDataGenerator(validation_split=validation_split) - - with pytest.raises(ValueError): - generator.flow_from_directory(tmp_folder, subset='foo') - - train_iterator = generator.flow_from_directory(tmp_folder, - subset='training') - assert train_iterator.samples == num_training - - valid_iterator = generator.flow_from_directory(tmp_folder, - subset='validation') - assert valid_iterator.samples == count - num_training - - # check number of classes and images - assert len(train_iterator.class_indices) == num_classes - assert len(train_iterator.classes) == num_training - assert len(set(train_iterator.filenames) & - set(filenames)) == num_training - - shutil.rmtree(tmp_folder) - - def test_img_utils(self): - height, width = 10, 8 - - # Test th data format - x = np.random.random((3, height, width)) - img = image.array_to_img(x, data_format='channels_first') - assert img.size == (width, height) - x = image.img_to_array(img, data_format='channels_first') - assert x.shape == (3, height, width) - # Test 2D - x = np.random.random((1, height, width)) - img = image.array_to_img(x, data_format='channels_first') - assert img.size == (width, height) - x = image.img_to_array(img, data_format='channels_first') - assert x.shape == (1, height, width) - - # Test tf data format - x = np.random.random((height, width, 3)) - img = image.array_to_img(x, data_format='channels_last') - assert img.size == (width, height) - x = image.img_to_array(img, data_format='channels_last') - assert x.shape == (height, width, 3) - # Test 2D - x = np.random.random((height, width, 1)) - img = image.array_to_img(x, data_format='channels_last') - assert img.size == (width, height) - x = image.img_to_array(img, data_format='channels_last') - assert x.shape == (height, width, 1) - - # Test invalid use case - with pytest.raises(ValueError): - x = np.random.random((height, width)) # not 3D - img = image.array_to_img(x, data_format='channels_first') - with pytest.raises(ValueError): # unknown data_format - x = np.random.random((height, width, 3)) - img = image.array_to_img(x, data_format='channels') - with pytest.raises(ValueError): # neither RGB nor gray-scale - x = np.random.random((height, width, 5)) - img = image.array_to_img(x, data_format='channels_last') - with pytest.raises(ValueError): # unknown data_format - x = np.random.random((height, width, 3)) - img = image.img_to_array(x, data_format='channels') - with pytest.raises(ValueError): # neither RGB nor gray-scale - x = np.random.random((height, width, 5, 3)) - img = image.img_to_array(x, data_format='channels_last') - - def test_random_transforms(self): - x = np.random.random((2, 28, 28)) - assert image.random_rotation(x, 45).shape == (2, 28, 28) - assert image.random_shift(x, 1, 1).shape == (2, 28, 28) - assert image.random_shear(x, 20).shape == (2, 28, 28) - assert image.random_zoom(x, (5, 5)).shape == (2, 28, 28) - assert image.random_channel_shift(x, 20).shape == (2, 28, 28) - - # Test get_random_transform with predefined seed - seed = 1 - generator = image.ImageDataGenerator( - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0.1, - brightness_range=(1, 5), - horizontal_flip=True, - vertical_flip=True) - transform_dict = generator.get_random_transform(x.shape, seed) - transform_dict2 = generator.get_random_transform(x.shape, seed * 2) - assert transform_dict['theta'] != 0 - assert transform_dict['theta'] != transform_dict2['theta'] - assert transform_dict['tx'] != 0 - assert transform_dict['tx'] != transform_dict2['tx'] - assert transform_dict['ty'] != 0 - assert transform_dict['ty'] != transform_dict2['ty'] - assert transform_dict['shear'] != 0 - assert transform_dict['shear'] != transform_dict2['shear'] - assert transform_dict['zx'] != 0 - assert transform_dict['zx'] != transform_dict2['zx'] - assert transform_dict['zy'] != 0 - assert transform_dict['zy'] != transform_dict2['zy'] - assert transform_dict['channel_shift_intensity'] != 0 - assert (transform_dict['channel_shift_intensity'] != - transform_dict2['channel_shift_intensity']) - assert transform_dict['brightness'] != 0 - assert transform_dict['brightness'] != transform_dict2['brightness'] - - # Test get_random_transform without any randomness - generator = image.ImageDataGenerator() - transform_dict = generator.get_random_transform(x.shape, seed) - assert transform_dict['theta'] == 0 - assert transform_dict['tx'] == 0 - assert transform_dict['ty'] == 0 - assert transform_dict['shear'] == 0 - assert transform_dict['zx'] == 1 - assert transform_dict['zy'] == 1 - assert transform_dict['channel_shift_intensity'] is None - assert transform_dict['brightness'] is None - - def test_deterministic_transform(self): - x = np.ones((32, 32, 3)) - generator = image.ImageDataGenerator( - rotation_range=90, - fill_mode='constant') - x = np.random.random((32, 32, 3)) - assert np.allclose(generator.apply_transform(x, {'flip_vertical': True}), - x[::-1, :, :]) - assert np.allclose(generator.apply_transform(x, {'flip_horizontal': True}), - x[:, ::-1, :]) - x = np.ones((3, 3, 3)) - x_rotated = np.array([[[0., 0., 0.], - [0., 0., 0.], - [1., 1., 1.]], - [[0., 0., 0.], - [1., 1., 1.], - [1., 1., 1.]], - [[0., 0., 0.], - [0., 0., 0.], - [1., 1., 1.]]]) - assert np.allclose(generator.apply_transform(x, {'theta': 45}), - x_rotated) - assert np.allclose(image.apply_affine_transform( - x, theta=45, channel_axis=2, fill_mode='constant'), x_rotated) - - def test_batch_standardize(self): - # ImageDataGenerator.standardize should work on batches - for test_images in self.all_test_images: - img_list = [] - for im in test_images: - img_list.append(image.img_to_array(im)[None, ...]) - - images = np.vstack(img_list) - generator = image.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True) - generator.fit(images, augment=True) - - transformed = np.copy(images) - for i, im in enumerate(transformed): - transformed[i] = generator.random_transform(im) - transformed = generator.standardize(transformed) - - def test_load_img(self, tmpdir): - filename = str(tmpdir / 'image.png') - - original_im_array = np.array(255 * np.random.rand(100, 100, 3), - dtype=np.uint8) - original_im = image.array_to_img(original_im_array, scale=False) - original_im.save(filename) - - # Test that loaded image is exactly equal to original. - - loaded_im = image.load_img(filename) - loaded_im_array = image.img_to_array(loaded_im) - assert loaded_im_array.shape == original_im_array.shape - assert np.all(loaded_im_array == original_im_array) - - loaded_im = image.load_img(filename, grayscale=True) - loaded_im_array = image.img_to_array(loaded_im) - assert loaded_im_array.shape == (original_im_array.shape[0], - original_im_array.shape[1], 1) - - # Test that nothing is changed when target size is equal to original. - - loaded_im = image.load_img(filename, target_size=(100, 100)) - loaded_im_array = image.img_to_array(loaded_im) - assert loaded_im_array.shape == original_im_array.shape - assert np.all(loaded_im_array == original_im_array) - - loaded_im = image.load_img(filename, grayscale=True, - target_size=(100, 100)) - loaded_im_array = image.img_to_array(loaded_im) - assert loaded_im_array.shape == (original_im_array.shape[0], - original_im_array.shape[1], 1) - - # Test down-sampling with bilinear interpolation. - - loaded_im = image.load_img(filename, target_size=(25, 25)) - loaded_im_array = image.img_to_array(loaded_im) - assert loaded_im_array.shape == (25, 25, 3) - - loaded_im = image.load_img(filename, grayscale=True, - target_size=(25, 25)) - loaded_im_array = image.img_to_array(loaded_im) - assert loaded_im_array.shape == (25, 25, 1) - - # Test down-sampling with nearest neighbor interpolation. - - loaded_im_nearest = image.load_img(filename, target_size=(25, 25), - interpolation="nearest") - loaded_im_array_nearest = image.img_to_array(loaded_im_nearest) - assert loaded_im_array_nearest.shape == (25, 25, 3) - assert np.any(loaded_im_array_nearest != loaded_im_array) - - # Check that exception is raised if interpolation not supported. - - loaded_im = image.load_img(filename, interpolation="unsupported") - with pytest.raises(ValueError): - loaded_im = image.load_img(filename, target_size=(25, 25), - interpolation="unsupported") - - -if __name__ == '__main__': - pytest.main([__file__]) -from math import ceil - -import numpy as np -from numpy.testing import assert_allclose, assert_raises - -import pytest - -from keras.preprocessing.sequence import pad_sequences -from keras.preprocessing.sequence import make_sampling_table -from keras.preprocessing.sequence import skipgrams -from keras.preprocessing.sequence import _remove_long_seq -from keras.preprocessing.sequence import TimeseriesGenerator - - -def test_pad_sequences(): - a = [[1], [1, 2], [1, 2, 3]] - - # test padding - b = pad_sequences(a, maxlen=3, padding='pre') - assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]]) - b = pad_sequences(a, maxlen=3, padding='post') - assert_allclose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]]) - - # test truncating - b = pad_sequences(a, maxlen=2, truncating='pre') - assert_allclose(b, [[0, 1], [1, 2], [2, 3]]) - b = pad_sequences(a, maxlen=2, truncating='post') - assert_allclose(b, [[0, 1], [1, 2], [1, 2]]) - - # test value - b = pad_sequences(a, maxlen=3, value=1) - assert_allclose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]]) - - -def test_pad_sequences_vector(): - a = [[[1, 1]], - [[2, 1], [2, 2]], - [[3, 1], [3, 2], [3, 3]]] - - # test padding - b = pad_sequences(a, maxlen=3, padding='pre') - assert_allclose(b, [[[0, 0], [0, 0], [1, 1]], - [[0, 0], [2, 1], [2, 2]], - [[3, 1], [3, 2], [3, 3]]]) - b = pad_sequences(a, maxlen=3, padding='post') - assert_allclose(b, [[[1, 1], [0, 0], [0, 0]], - [[2, 1], [2, 2], [0, 0]], - [[3, 1], [3, 2], [3, 3]]]) - - # test truncating - b = pad_sequences(a, maxlen=2, truncating='pre') - assert_allclose(b, [[[0, 0], [1, 1]], - [[2, 1], [2, 2]], - [[3, 2], [3, 3]]]) - - b = pad_sequences(a, maxlen=2, truncating='post') - assert_allclose(b, [[[0, 0], [1, 1]], - [[2, 1], [2, 2]], - [[3, 1], [3, 2]]]) - - # test value - b = pad_sequences(a, maxlen=3, value=1) - assert_allclose(b, [[[1, 1], [1, 1], [1, 1]], - [[1, 1], [2, 1], [2, 2]], - [[3, 1], [3, 2], [3, 3]]]) - - -def test_make_sampling_table(): - a = make_sampling_table(3) - assert_allclose(a, np.asarray([0.00315225, 0.00315225, 0.00547597]), - rtol=.1) - - -def test_skipgrams(): - # test with no window size and binary labels - couples, labels = skipgrams(np.arange(3), vocabulary_size=3) - for couple in couples: - assert couple[0] in [0, 1, 2] and couple[1] in [0, 1, 2] - - # test window size and categorical labels - couples, labels = skipgrams(np.arange(5), vocabulary_size=5, window_size=1, - categorical=True) - for couple in couples: - assert couple[0] - couple[1] <= 3 - for l in labels: - assert len(l) == 2 - - -def test_remove_long_seq(): - maxlen = 5 - seq = [ - [1, 2, 3], - [1, 2, 3, 4, 5, 6], - ] - label = ['a', 'b'] - new_seq, new_label = _remove_long_seq(maxlen, seq, label) - assert new_seq == [[1, 2, 3]] - assert new_label == ['a'] - - -def test_TimeseriesGenerator(): - data = np.array([[i] for i in range(50)]) - targets = np.array([[i] for i in range(50)]) - - data_gen = TimeseriesGenerator(data, targets, - length=10, sampling_rate=2, - batch_size=2) - assert len(data_gen) == 20 - assert (np.allclose(data_gen[0][0], - np.array([[[0], [2], [4], [6], [8]], - [[1], [3], [5], [7], [9]]]))) - assert (np.allclose(data_gen[0][1], - np.array([[10], [11]]))) - assert (np.allclose(data_gen[1][0], - np.array([[[2], [4], [6], [8], [10]], - [[3], [5], [7], [9], [11]]]))) - assert (np.allclose(data_gen[1][1], - np.array([[12], [13]]))) - - data_gen = TimeseriesGenerator(data, targets, - length=10, sampling_rate=2, reverse=True, - batch_size=2) - assert len(data_gen) == 20 - assert (np.allclose(data_gen[0][0], - np.array([[[8], [6], [4], [2], [0]], - [[9], [7], [5], [3], [1]]]))) - assert (np.allclose(data_gen[0][1], - np.array([[10], [11]]))) - - data_gen = TimeseriesGenerator(data, targets, - length=10, sampling_rate=2, shuffle=True, - batch_size=1) - batch = data_gen[0] - r = batch[1][0][0] - assert (np.allclose(batch[0], - np.array([[[r - 10], - [r - 8], - [r - 6], - [r - 4], - [r - 2]]]))) - assert (np.allclose(batch[1], np.array([[r], ]))) - - data_gen = TimeseriesGenerator(data, targets, - length=10, sampling_rate=2, stride=2, - batch_size=2) - assert len(data_gen) == 10 - assert (np.allclose(data_gen[1][0], - np.array([[[4], [6], [8], [10], [12]], - [[6], [8], [10], [12], [14]]]))) - assert (np.allclose(data_gen[1][1], - np.array([[14], [16]]))) - - data_gen = TimeseriesGenerator(data, targets, - length=10, sampling_rate=2, - start_index=10, end_index=30, - batch_size=2) - assert len(data_gen) == 6 - assert (np.allclose(data_gen[0][0], - np.array([[[10], [12], [14], [16], [18]], - [[11], [13], [15], [17], [19]]]))) - assert (np.allclose(data_gen[0][1], - np.array([[20], [21]]))) - - data = np.array([np.random.random_sample((1, 2, 3, 4)) for i in range(50)]) - targets = np.array([np.random.random_sample((3, 2, 1)) for i in range(50)]) - data_gen = TimeseriesGenerator(data, targets, - length=10, sampling_rate=2, - start_index=10, end_index=30, - batch_size=2) - assert len(data_gen) == 6 - assert np.allclose(data_gen[0][0], np.array( - [np.array(data[10:19:2]), np.array(data[11:20:2])])) - assert (np.allclose(data_gen[0][1], - np.array([targets[20], targets[21]]))) - - with assert_raises(ValueError) as context: - TimeseriesGenerator(data, targets, length=50) - error = str(context.exception) - assert '`start_index+length=50 > end_index=49` is disallowed' in error - - -def test_TimeSeriesGenerator_doesnt_miss_any_sample(): - x = np.array([[i] for i in range(10)]) - - for length in range(3, 10): - g = TimeseriesGenerator(x, x, - length=length, - batch_size=1) - expected = max(0, len(x) - length) - actual = len(g) - - assert expected == actual - - if len(g) > 0: - # All elements in range(length, 10) should be used as current step - expected = np.arange(length, 10).reshape(-1, 1) - - y = np.concatenate([g[ix][1] for ix in range(len(g))], axis=0) - assert_allclose(y, expected) - - x = np.array([[i] for i in range(23)]) - - strides = (1, 1, 5, 7, 3, 5, 3) - lengths = (3, 3, 4, 3, 1, 3, 7) - batch_sizes = (6, 6, 6, 5, 6, 6, 6) - shuffles = (False, True, True, False, False, False, False) - - for stride, length, batch_size, shuffle in zip(strides, - lengths, - batch_sizes, - shuffles): - g = TimeseriesGenerator(x, x, - length=length, - sampling_rate=1, - stride=stride, - start_index=0, - end_index=None, - shuffle=shuffle, - reverse=False, - batch_size=batch_size) - if shuffle: - # all batches have the same size when shuffle is True. - expected_sequences = ceil( - (23 - length) / float(batch_size * stride)) * batch_size - else: - # last batch will be different if `(samples - length) / stride` - # is not a multiple of `batch_size`. - expected_sequences = ceil((23 - length) / float(stride)) - - expected_batches = ceil(expected_sequences / float(batch_size)) - - y = [g[ix][1] for ix in range(len(g))] - - actual_sequences = sum(len(_y) for _y in y) - actual_batches = len(y) - - assert expected_sequences == actual_sequences - assert expected_batches == actual_batches - - -if __name__ == '__main__': - pytest.main([__file__]) -# -*- coding: utf-8 -*- - -import numpy as np -import pytest - -from keras.preprocessing.text import Tokenizer -from keras.preprocessing.text import one_hot -from keras.preprocessing.text import hashing_trick -from keras.preprocessing.text import text_to_word_sequence - - -def test_one_hot(): - text = 'The cat sat on the mat.' - encoded = one_hot(text, 5) - assert len(encoded) == 6 - assert np.max(encoded) <= 4 - assert np.min(encoded) >= 0 - - -def test_hashing_trick_hash(): - text = 'The cat sat on the mat.' - encoded = hashing_trick(text, 5) - assert len(encoded) == 6 - assert np.max(encoded) <= 4 - assert np.min(encoded) >= 1 - - -def test_hashing_trick_md5(): - text = 'The cat sat on the mat.' - encoded = hashing_trick(text, 5, hash_function='md5') - assert len(encoded) == 6 - assert np.max(encoded) <= 4 - assert np.min(encoded) >= 1 - - -def test_tokenizer(): - texts = ['The cat sat on the mat.', - 'The dog sat on the log.', - 'Dogs and cats living together.'] - tokenizer = Tokenizer(num_words=10) - tokenizer.fit_on_texts(texts) - - sequences = [] - for seq in tokenizer.texts_to_sequences_generator(texts): - sequences.append(seq) - assert np.max(np.max(sequences)) < 10 - assert np.min(np.min(sequences)) == 1 - - tokenizer.fit_on_sequences(sequences) - - for mode in ['binary', 'count', 'tfidf', 'freq']: - matrix = tokenizer.texts_to_matrix(texts, mode) - - -def test_sequential_fit(): - texts = ['The cat sat on the mat.', - 'The dog sat on the log.', - 'Dogs and cats living together.'] - word_sequences = [ - ['The', 'cat', 'is', 'sitting'], - ['The', 'dog', 'is', 'standing'] - ] - - tokenizer = Tokenizer() - tokenizer.fit_on_texts(texts) - tokenizer.fit_on_texts(word_sequences) - - assert tokenizer.document_count == 5 - - tokenizer.texts_to_matrix(texts) - tokenizer.texts_to_matrix(word_sequences) - - -def test_text_to_word_sequence(): - text = 'hello! ? world!' - assert text_to_word_sequence(text) == ['hello', 'world'] - - -def test_text_to_word_sequence_multichar_split(): - text = 'hello!stop?world!' - assert text_to_word_sequence(text, split='stop') == ['hello', 'world'] - - -def test_text_to_word_sequence_unicode(): - text = u'ali! veli? kırk dokuz elli' - assert (text_to_word_sequence(text) == - [u'ali', u'veli', u'kırk', u'dokuz', u'elli']) - - -def test_text_to_word_sequence_unicode_multichar_split(): - text = u'ali!stopveli?stopkırkstopdokuzstopelli' - assert (text_to_word_sequence(text, split='stop') == - [u'ali', u'veli', u'kırk', u'dokuz', u'elli']) - - -def test_tokenizer_unicode(): - texts = [u'ali veli kırk dokuz elli', - u'ali veli kırk dokuz elli veli kırk dokuz'] - tokenizer = Tokenizer(num_words=5) - tokenizer.fit_on_texts(texts) - - assert len(tokenizer.word_counts) == 5 - - -def test_tokenizer_oov_flag(): - """ - Test of Out of Vocabulary (OOV) flag in Tokenizer - """ - x_train = ['This text has only known words'] - x_test = ['This text has some unknown words'] # 2 OOVs: some, unknown - - # Default, without OOV flag - tokenizer = Tokenizer() - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - assert len(x_test_seq[0]) == 4 # discards 2 OOVs - - # With OOV feature - tokenizer = Tokenizer(oov_token='') - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - assert len(x_test_seq[0]) == 6 # OOVs marked in place - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -from numpy.testing import assert_allclose -import numpy as np -import scipy.sparse as sparse -import warnings - -from keras import backend as K -from keras.utils.conv_utils import convert_kernel -from keras.backend import numpy_backend as KNP - - -try: - from keras.backend import cntk_backend as KC -except ImportError: - KC = None - warnings.warn('Could not import the CNTK backend') - -try: - from keras.backend import tensorflow_backend as KTF -except ImportError: - KTF = None - warnings.warn('Could not import the TensorFlow backend.') - -try: - from keras.backend import theano_backend as KTH -except ImportError: - KTH = None - warnings.warn('Could not import the Theano backend') - -if K.backend() == 'theano': - WITH_NP = [KTH, KNP] -elif K.backend() == 'cntk': - WITH_NP = [KC, KNP] -else: - WITH_NP = [KTF, KNP] - -if K.backend() == 'cntk': - supports_sparse = False -elif K.backend() == 'theano' and not KTH.th_sparse_module: - supports_sparse = False -else: - supports_sparse = True - - -def check_dtype(var, dtype): - if K.backend() == 'tensorflow': - assert dtype in str(var.dtype.name) - else: - assert dtype in str(var.dtype) - - -def cntk_func_tensors(function_name, shapes_or_vals, **kwargs): - placeholders = [] - variables = [] - for shape_or_val in shapes_or_vals: - if isinstance(shape_or_val, tuple): - shape = shape_or_val - placeholders.append(KC.placeholder(shape)) - else: - value = shape_or_val - variables.append(KC.variable(value)) - - output_cntk = getattr(KC, function_name)( - *(placeholders + variables), **kwargs) - cntk_func = KC.function(placeholders, [output_cntk]) - return output_cntk, cntk_func - - -def parse_shape_or_val(shape_or_val): - if isinstance(shape_or_val, np.ndarray): - return shape_or_val.shape, shape_or_val - else: - return shape_or_val, np.random.random(shape_or_val).astype(np.float32) - 0.5 - - -def assert_list_pairwise(z_list, - shape=True, - allclose=True, - itself=False, - atol=1e-05): - for (z1, z2) in zip(z_list[1:], z_list[:-1]): - if shape: - assert z1.shape == z2.shape - if allclose: - assert_allclose(z1, z2, atol=atol) - if itself: - assert z1 == z2 - - -def assert_list_keras_shape(t_list, z_list): - for t, z in zip(t_list, z_list): - if hasattr(t, '_keras_shape') and len(t._keras_shape) > 1: - for i, s in enumerate(t._keras_shape): - if s: - assert t._keras_shape[i] == z.shape[i] - - -def check_single_tensor_operation(function_name, - x_shape_or_val, - backend_list, - **kwargs): - shape_or_val = kwargs.pop('shape_or_val', True) - assert_value_equality = kwargs.pop('assert_value_equality', True) - cntk_dynamicity = kwargs.pop('cntk_dynamicity', False) - - if shape_or_val: - x_shape, x_val = parse_shape_or_val(x_shape_or_val) - - t_list = [] - z_list = [] - for k in backend_list: - if shape_or_val: - if (k == KC) & (cntk_dynamicity): - t, f = cntk_func_tensors(function_name, [x_shape], **kwargs) - z = f([x_val])[0] - else: - t = getattr(k, function_name)(k.variable(x_val), **kwargs) - z = k.eval(t) - else: - t = getattr(k, function_name)(x_shape_or_val, **kwargs) - z = k.eval(t) - t_list += [t] - z_list += [z] - - assert_list_pairwise(z_list, allclose=assert_value_equality) - assert_list_keras_shape(t_list, z_list) - - -def check_two_tensor_operation(function_name, - x_shape_or_val, - y_shape_or_val, - backend_list, - **kwargs): - concat_args = kwargs.pop('concat_args', False) - cntk_dynamicity = kwargs.pop('cntk_dynamicity', False) - cntk_two_dynamicity = kwargs.pop('cntk_two_dynamicity', False) - - x_shape, x_val = parse_shape_or_val(x_shape_or_val) - y_shape, y_val = parse_shape_or_val(y_shape_or_val) - - t_list = [] - z_list = [] - for k in backend_list: - if (k == KC) & (cntk_dynamicity): - t, f = cntk_func_tensors(function_name, [x_shape, y_val], **kwargs) - z = f([x_val])[0] - elif (k == KC) & (cntk_two_dynamicity): - t, f = cntk_func_tensors( - function_name, [x_shape, y_shape], **kwargs) - z = f([x_val, y_val])[0] - elif (k == KTH) & (function_name[:4] == 'conv'): - t = getattr(k, function_name)( - k.variable(x_val), k.variable(convert_kernel(y_val)), **kwargs) - z = k.eval(t) - elif concat_args: - t = getattr(k, function_name)( - [k.variable(x_val), k.variable(y_val)], **kwargs) - z = k.eval(t) - else: - t = getattr(k, function_name)( - k.variable(x_val), k.variable(y_val), **kwargs) - z = k.eval(t) - t_list += [t] - z_list += [z] - - assert_list_pairwise(z_list) - assert_list_keras_shape(t_list, z_list) - - -def check_composed_tensor_operations(first_function_name, - first_function_args, - second_function_name, - second_function_args, - input_shape, - backend_list): - val = np.random.random(input_shape) - 0.5 - - z_list = [] - for k in backend_list: - x = k.variable(val) - y = getattr(k, first_function_name)(x, **first_function_args) - z = k.eval(getattr(k, second_function_name)(y, **second_function_args)) - z_list += [z] - - assert_list_pairwise(z_list) - - -def check_rnn_operation(step_function_k, - step_function_np, - inputs_np, - initial_states_np, - mask_np=None, - constants_np=None, - **kwargs): - inputs_k = K.variable(inputs_np) - initial_states_k = [K.variable(s) for s in initial_states_np] - if mask_np is not None: - mask_k = K.variable(mask_np) - else: - mask_k = None - if constants_np is not None: - constants_k = [K.variable(c) for c in constants_np] - else: - constants_k = None - - last_output_np, output_np, last_states_np = KNP.rnn( - step_function_np, - inputs_np, - initial_states_np, - mask=mask_np, - constants=constants_np, - **kwargs) - # note that numpy reference implementation is independent of `unroll` argument - - for unroll in [True, False]: - last_output_k, output_k, last_states_k = K.rnn( - step_function_k, - inputs_k, - initial_states_k, - mask=mask_k, - constants=constants_k, - unroll=unroll, - input_length=inputs_np.shape[1] if unroll else None, - **kwargs) - - last_states_k = [K.eval(s) for s in last_states_k] - last_output_k = K.eval(last_output_k) - output_k = K.eval(output_k) - - assert_allclose(last_output_k, last_output_np, atol=1e-05) - assert_allclose(output_k, output_np, atol=1e-05) - assert len(last_states_k) == len(last_states_np) - for s_k, s_np in zip(last_states_k, last_states_np): - assert_allclose(s_k, s_np, atol=1e-05) - - -class TestBackend(object): - - def test_is_keras_tensor(self): - np_var = np.array([1, 2]) - with pytest.raises(ValueError): - K.is_keras_tensor(np_var) - - keras_var = K.variable(np_var) - assert K.is_keras_tensor(keras_var) is False - keras_placeholder = K.placeholder(shape=(2, 4, 5)) - assert K.is_keras_tensor(keras_placeholder) is False - - def test_set_learning_phase(self): - # not supported learning_phase - with pytest.raises(ValueError): - K.set_learning_phase(2) - - def test_creation_operations(self): - check_single_tensor_operation('eye', 3, WITH_NP, shape_or_val=False) - check_single_tensor_operation( - 'eye', (3, 2), WITH_NP, shape_or_val=False) - check_single_tensor_operation( - 'eye', (3, 4), WITH_NP, shape_or_val=False) - - check_single_tensor_operation('ones', (3, 5, 10, 8), - WITH_NP, shape_or_val=False) - check_single_tensor_operation('zeros', (3, 5, 10, 8), - WITH_NP, shape_or_val=False) - - check_single_tensor_operation('ones_like', (3, 5, 10, 8), WITH_NP) - check_single_tensor_operation('zeros_like', (3, 5, 10, 8), WITH_NP) - - def test_linear_operations(self): - check_two_tensor_operation('dot', (4, 2), (2, 4), WITH_NP) - check_two_tensor_operation('dot', (4, 2), (5, 2, 3), WITH_NP) - - check_two_tensor_operation('batch_dot', (4, 2, 3), (4, 5, 3), - WITH_NP, cntk_two_dynamicity=True, axes=(2, 2)) - check_two_tensor_operation('batch_dot', (4, 2, 3), (4, 3), - WITH_NP, cntk_two_dynamicity=True, axes=(2, 1)) - check_two_tensor_operation('batch_dot', (4, 2), (4, 2, 3), - WITH_NP, cntk_two_dynamicity=True, axes=(1, 1)) - check_two_tensor_operation('batch_dot', (32, 20), (32, 20), - WITH_NP, cntk_two_dynamicity=True, axes=1) - check_two_tensor_operation('batch_dot', (32, 20), (32, 20), - WITH_NP, cntk_two_dynamicity=True, axes=(1, 1)) - check_two_tensor_operation('batch_dot', (4, 2, 3), (4, 5, 3), - WITH_NP, axes=(2, 2)) - check_two_tensor_operation('batch_dot', (4, 2, 3), (4, 3), - WITH_NP, axes=(2, 1)) - check_two_tensor_operation('batch_dot', (4, 2), (4, 2, 3), - WITH_NP, axes=(1, 1)) - check_two_tensor_operation('batch_dot', (32, 20), (32, 20), - WITH_NP, axes=1) - check_two_tensor_operation('batch_dot', (32, 20), (32, 20), - WITH_NP, axes=(1, 1)) - - check_single_tensor_operation('transpose', (4, 2), WITH_NP) - check_single_tensor_operation('reverse', (4, 3, 2), WITH_NP, axes=1) - check_single_tensor_operation( - 'reverse', (4, 3, 2), WITH_NP, axes=(1, 2)) - check_single_tensor_operation( - 'reverse', (4, 3, 2), WITH_NP, axes=(0, -1)) - - def test_random_variables(self): - check_single_tensor_operation('random_uniform_variable', (2, 3), WITH_NP, - low=0., high=1., - shape_or_val=False, - assert_value_equality=False) - check_single_tensor_operation('random_normal_variable', (2, 3), WITH_NP, - mean=0., scale=1., - shape_or_val=False, - assert_value_equality=False) - - def test_batch_dot_shape(self): - # Note : batch_dot implementation is different for - # placeholders and variables in CNTK backend - - test_cases = [] - test_cases.append([(None, 3, 4, 5), (None, 2, 3, 4), (2, 3)]) - test_cases.append([(None, 3, 4, 5), (None, 2, 4), 2]) - test_cases.append([(None, 3, 4), (None, 2, 3, 4), (2, 3)]) - test_cases.append([(None, 4, 3), (None, 3, 5), (2, 1)]) - test_cases.append([(None, 4), (None, 3, 4), (1, 2)]) - test_cases.append([(None, 4), (None, 4), None]) - - batch_size = 7 - - def batch_shape(shape): - return (batch_size, ) + shape[1:] - - def random(shape): - return np.random.random(batch_shape(shape)) - - for x_shape, y_shape, axes in test_cases: - x_np = random(x_shape) - y_np = random(y_shape) - z_np = KNP.batch_dot(x_np, y_np, axes) - - # test with placeholders - x = K.placeholder(shape=x_shape) - y = K.placeholder(shape=y_shape) - z = K.batch_dot(x, y, axes) - - z_shape = K.int_shape(z) - if z_shape is not None: - assert z_shape[1:] == z_np.shape[1:] - - f = K.function([x, y], [z]) - - assert_allclose(f([x_np, y_np])[0], z_np, atol=1e-05) - - # test with placeholders (no shape info) - if K.backend() != 'cntk': - x = K.placeholder(ndim=len(x_shape)) - y = K.placeholder(ndim=len(y_shape)) - z = K.batch_dot(x, y, axes) - - z_shape = K.int_shape(z) - if z_shape is not None: - assert len(z_shape) == z_np.ndim - assert set(z_shape) <= set((None, 1)) - - f = K.function([x, y], [z]) - - assert_allclose(f([x_np, y_np])[0], z_np, atol=1e-05) - - # test with variables - x = K.variable(x_np) - y = K.variable(y_np) - z = K.batch_dot(x, y, axes) - - z_shape = K.int_shape(z) - if z_shape is not None: - assert z_shape[1:] == z_np.shape[1:] - - z = K.eval(z) - assert_allclose(z, z_np, atol=1e-05) - - def test_shape_operations(self): - check_single_tensor_operation('reshape', (4, 2), WITH_NP, shape=(8, 1)) - check_single_tensor_operation('permute_dimensions', (4, 2, 3), WITH_NP, - pattern=(2, 0, 1)) - check_single_tensor_operation('repeat', (4, 1), WITH_NP, n=3) - check_single_tensor_operation('flatten', (4, 1), WITH_NP) - check_single_tensor_operation('batch_flatten', (20, 2, 5), WITH_NP, - cntk_dynamicity=True) - check_single_tensor_operation('expand_dims', (4, 3), WITH_NP, axis=-1) - check_single_tensor_operation( - 'expand_dims', (4, 3, 2), WITH_NP, axis=1) - check_single_tensor_operation('squeeze', (4, 3, 1), WITH_NP, axis=2) - check_single_tensor_operation('squeeze', (4, 1, 1), WITH_NP, axis=1) - check_composed_tensor_operations('reshape', {'shape': (4, 3, 1, 1)}, - 'squeeze', {'axis': 2}, - (4, 3, 1, 1), WITH_NP) - - @pytest.mark.skipif(K.backend() != 'theano', - reason='We only test the shape inference of the ' - 'theano backend.') - def test_none_shape_operations(self): - # Test shape inference when input - # shape has `None` entries - x = K.placeholder((3, None, 4)) - - y = K.batch_flatten(x) - if hasattr(y, '_keras_shape'): - assert y._keras_shape == (3, None) - - y = K.flatten(x) - if hasattr(y, '_keras_shape'): - assert y._keras_shape == (None,) - - def test_repeat_elements(self): - reps = 3 - for ndims in [1, 2, 3]: - shape = np.arange(2, 2 + ndims) - arr = np.arange(np.prod(shape)).reshape(shape) - - for rep_axis in range(ndims): - check_single_tensor_operation('repeat_elements', arr, WITH_NP, - rep=reps, axis=rep_axis) - - if K.backend() != 'cntk': - shape = list(shape) - shape[rep_axis] = None - x = K.placeholder(shape=shape) - y = K.repeat_elements(x, reps, axis=rep_axis) - assert y._keras_shape == tuple(shape) - assert y._keras_shape == K.int_shape(y) - - def test_tile(self): - check_single_tensor_operation('tile', (3, 4), WITH_NP, n=2) - check_single_tensor_operation('tile', (3, 4), WITH_NP, n=(2, 1)) - check_single_tensor_operation('tile', (3, 4, 5), WITH_NP, n=2) - check_single_tensor_operation('tile', (3, 4, 5), WITH_NP, n=(1, 2)) - check_single_tensor_operation('tile', (3, 4, 5), WITH_NP, n=(3, 1, 2)) - - # test theano shape inference when - # input shape has None entries - if K.backend() == 'theano': - x = K.placeholder(shape=(None, 4)) - n = 2 - y = K.tile(x, n) - assert y._keras_shape == (None, 8) - n = (4, 3) - y = K.tile(x, n) - assert y._keras_shape == (None, 12) - - def test_gather(self): - shape = (10, 2, 3) - ref = np.arange(np.prod(shape)).reshape(shape) - inds = [1, 3, 7, 9] - t_list = [k.gather(k.variable(ref), k.variable(inds, dtype='int32')) - for k in WITH_NP] - z_list = [k.eval(k.gather(k.variable(ref), k.variable(inds, dtype='int32'))) - for k in WITH_NP] - - assert_list_pairwise(z_list) - assert_list_keras_shape(t_list, z_list) - - # test theano shape inference when - # input shape has None entries - if K.backend() == 'theano': - x = K.placeholder(shape=(None, 3, 4)) - indices = K.placeholder(shape=(5, 6), dtype='int32') - y = K.gather(x, indices) - assert y._keras_shape == (5, 6, 3, 4) - - @pytest.mark.parametrize('function_name', - ['get_value', 'count_params', - 'int_shape', 'get_variable_shape']) - def test_value_manipulation(self, function_name): - val = np.random.random((4, 2)) - v_list = [getattr(k, function_name)(k.variable(val)) - for k in WITH_NP] - - if function_name == 'get_value': - assert_list_pairwise(v_list) - else: - assert_list_pairwise(v_list, shape=False, - allclose=False, itself=True) - - def test_print_tensor(self): - check_single_tensor_operation('print_tensor', (), WITH_NP) - check_single_tensor_operation('print_tensor', (2,), WITH_NP) - check_single_tensor_operation('print_tensor', (4, 3), WITH_NP) - check_single_tensor_operation('print_tensor', (1, 2, 3), WITH_NP) - - def test_elementwise_operations(self): - check_single_tensor_operation('max', (4, 2), WITH_NP) - check_single_tensor_operation( - 'max', (4, 2), WITH_NP, axis=1, keepdims=True) - check_single_tensor_operation('max', (4, 2, 3), WITH_NP, axis=[1, -1]) - - check_single_tensor_operation('min', (4, 2), WITH_NP) - check_single_tensor_operation( - 'min', (4, 2), WITH_NP, axis=1, keepdims=True) - check_single_tensor_operation('min', (4, 2, 3), WITH_NP, axis=[1, -1]) - - check_single_tensor_operation('mean', (4, 2), WITH_NP) - check_single_tensor_operation( - 'mean', (4, 2), WITH_NP, axis=1, keepdims=True) - check_single_tensor_operation('mean', (4, 2, 3), - WITH_NP, axis=-1, keepdims=True) - check_single_tensor_operation('mean', (4, 2, 3), WITH_NP, axis=[1, -1]) - - check_single_tensor_operation('var', (4, 2), WITH_NP) - check_single_tensor_operation( - 'var', (4, 2), WITH_NP, axis=1, keepdims=True) - check_single_tensor_operation('var', (4, 2, 3), WITH_NP, axis=[1, -1]) - - check_single_tensor_operation('std', (4, 2), WITH_NP) - check_single_tensor_operation( - 'std', (4, 2), WITH_NP, axis=1, keepdims=True) - check_single_tensor_operation('std', (4, 2, 3), WITH_NP, axis=[1, -1]) - - check_single_tensor_operation('prod', (4, 2), WITH_NP) - check_single_tensor_operation( - 'prod', (4, 2), WITH_NP, axis=1, keepdims=True) - check_single_tensor_operation('prod', (4, 2, 3), WITH_NP, axis=[1, -1]) - - check_single_tensor_operation('any', (4, 2), WITH_NP) - check_single_tensor_operation( - 'any', (4, 2), WITH_NP, axis=1, keepdims=True) - check_single_tensor_operation('any', (4, 2, 3), WITH_NP, axis=[1, -1]) - - check_single_tensor_operation('all', (4, 2), WITH_NP) - check_single_tensor_operation( - 'all', (4, 2), WITH_NP, axis=1, keepdims=True) - check_single_tensor_operation('all', (4, 2, 3), WITH_NP, axis=[1, -1]) - - check_single_tensor_operation('argmax', (4, 2), WITH_NP) - check_single_tensor_operation('argmax', (4, 2), WITH_NP, axis=1) - - check_single_tensor_operation('argmin', (4, 2), WITH_NP) - check_single_tensor_operation('argmin', (4, 2), WITH_NP, axis=1) - - check_single_tensor_operation('square', (4, 2), WITH_NP) - check_single_tensor_operation('abs', (4, 2), WITH_NP) - check_single_tensor_operation('sqrt', (4, 2), WITH_NP) - check_single_tensor_operation('exp', (4, 2), WITH_NP) - - check_single_tensor_operation('round', (4, 2), WITH_NP) - check_single_tensor_operation('sign', (4, 2), WITH_NP) - check_single_tensor_operation('pow', (4, 2), WITH_NP, a=3) - check_single_tensor_operation('clip', (4, 2), WITH_NP, min_value=0.4, - max_value=0.6) - - check_single_tensor_operation('cos', (4, 2), WITH_NP) - check_single_tensor_operation('sin', (4, 2), WITH_NP) - - # two-tensor ops - check_two_tensor_operation('equal', (4, 2), (4, 2), WITH_NP) - check_two_tensor_operation('not_equal', (4, 2), (4, 2), WITH_NP) - check_two_tensor_operation('greater', (4, 2), (4, 2), WITH_NP) - check_two_tensor_operation('greater_equal', (4, 2), (4, 2), WITH_NP) - check_two_tensor_operation('less', (4, 2), (4, 2), WITH_NP) - check_two_tensor_operation('less_equal', (4, 2), (4, 2), WITH_NP) - check_two_tensor_operation('maximum', (4, 2), (4, 2), WITH_NP) - check_two_tensor_operation('minimum', (4, 2), (4, 2), WITH_NP) - - # assumes first uid will always be the same - def test_reset_uids(self): - first = K.get_uid() - K.get_uid() - K.reset_uids() - assert K.get_uid() == first - - def test_cumsum(self): - check_single_tensor_operation('cumsum', (4, 2), WITH_NP) - check_single_tensor_operation('cumsum', (4, 2), WITH_NP, axis=1) - - def test_cumprod(self): - check_single_tensor_operation('cumprod', (4, 2), WITH_NP) - check_single_tensor_operation('cumprod', (4, 2), WITH_NP, axis=1) - - @pytest.mark.skipif(K.backend() == 'cntk', - reason='cntk return -85.1 for zero or ' - 'negative number, not nan, so can\'t ' - 'compare with other backend.') - def test_log(self): - check_single_tensor_operation('log', (4, 2), WITH_NP) - - @pytest.mark.skipif(K.backend() == 'theano', - reason='theano returns tuples for update ops') - def test_update_add(self): - x = np.random.randn(3, 4) - x_var = K.variable(x) - increment = np.random.randn(3, 4) - - x += increment - K.eval(K.update_add(x_var, increment)) - - assert_allclose(x, K.eval(x_var), atol=1e-05) - - @pytest.mark.skipif(K.backend() == 'theano', - reason='theano returns tuples for update ops') - def test_update_sub(self): - x = np.random.randn(3, 4) - x_var = K.variable(x) - decrement = np.random.randn(3, 4) - - x -= decrement - K.eval(K.update_sub(x_var, decrement)) - - assert_allclose(x, K.eval(x_var), atol=1e-05) - - @pytest.mark.skipif(K.backend() == 'cntk', - reason='cntk doesn\'t support gradient in this way.') - def test_gradient(self): - val = np.random.random((4, 2)) - x_list = [k.variable(val) for k in [KTH, KTF]] - z_list = [] - zero_list = [] - for x, k in zip(x_list, [KTH, KTF]): - exp = x * k.exp(x) - loss = k.sum(exp) - zero_loss = k.stop_gradient(loss) - grad = k.gradients(loss, [exp]) - zero_grad = k.gradients(loss + zero_loss, [exp]) - z_list.append(k.eval(grad[0])) - zero_list.append(k.eval(zero_grad[0])) - - assert_list_pairwise(z_list) - assert_list_pairwise(zero_list) - for i in range(len(z_list)): - assert_allclose(zero_list[i], z_list[i], atol=1e-05) - - def test_stop_gradient(self): - # This test checks the consistency of the stop_gradient backend API. - # It doesn't check the functionality (which is checked at the - # test_gradient test). - val = np.random.random((4, 2)) - a = K.variable(val) - b = K.square(a) - c, d = K.stop_gradient([a, b]) - e = K.stop_gradient(b) - - @pytest.mark.skipif(K.backend() == 'cntk', - reason='cntk currently not support function in this ' - 'way, so can\'t test as this.') - def test_function(self): - test_backend = [KTH, KTF] - val = np.random.random((4, 2)) - input_val = np.random.random((4, 2)) - - f_list = [] - x_list = [] - for k in test_backend: - x = k.variable(val) - x_list.append(x) - y = k.placeholder(ndim=2) - exp = k.square(x) + y - update = x * 2 - f = k.function([y], [exp], updates=[(x, update)]) - f_list.append(f) - - function_outputs_list = [f([input_val])[0] for f in f_list] - assert_list_pairwise(function_outputs_list) - - new_val_list = [k.get_value(x) for x, k in zip(x_list, test_backend)] - assert_list_pairwise(new_val_list) - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='Uses the `fetches` argument.') - def test_function_tf_fetches(self): - # Additional operations can be passed to tf.Session().run() via its - # `fetches` arguments. In contrast to `updates` argument of - # KTF.function() these do not have control dependency on `outputs`, so - # they can run in parallel. Also they should not contribute to output of - # KTF.function(). - - x = K.variable(0.) - y = K.variable(0.) - x_placeholder = K.placeholder(shape=()) - y_placeholder = K.placeholder(shape=()) - - f = K.function(inputs=[x_placeholder, y_placeholder], - outputs=[x_placeholder + y_placeholder], - updates=[(x, x_placeholder + 1.)], - fetches=[K.update(y, 5.)]) - output = f([10., 20.]) - assert output == [30.] - assert K.get_session().run(fetches=[x, y]) == [11., 5.] - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='Uses the `feed_dict` argument.') - def test_function_tf_feed_dict(self): - # Additional substitutions can be passed to `tf.Session().run()` via its - # `feed_dict` arguments. Note that the feed_dict is passed once in the - # constructor but we can modify the values in the dictionary. Through - # this feed_dict we can provide additional substitutions besides Keras - # inputs. - - x = K.variable(0.) - y = K.variable(0.) - x_placeholder = K.placeholder(shape=()) - y_placeholder = K.placeholder(shape=()) - - feed_dict = {y_placeholder: 3.} - - f = K.function(inputs=[x_placeholder], - outputs=[x_placeholder + 1.], - updates=[(x, x_placeholder + 10.)], - feed_dict=feed_dict, - fetches=[K.update(y, y_placeholder * 10.)]) - output = f([10.]) - assert output == [11.] - assert K.get_session().run(fetches=[x, y]) == [20., 30.] - - # updated value in feed_dict will be modified within the K.function() - feed_dict[y_placeholder] = 4. - output = f([20.]) - assert output == [21.] - assert K.get_session().run(fetches=[x, y]) == [30., 40.] - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='Uses the `options` and `run_metadata` arguments.') - def test_function_tf_run_options_with_run_metadata(self): - from tensorflow.core.protobuf import config_pb2 - x_placeholder = K.placeholder(shape=()) - y_placeholder = K.placeholder(shape=()) - - run_options = config_pb2.RunOptions(output_partition_graphs=True) - run_metadata = config_pb2.RunMetadata() - # enable run_options. - f = K.function(inputs=[x_placeholder, y_placeholder], - outputs=[x_placeholder + y_placeholder], - options=run_options, - run_metadata=run_metadata) - output = f([10., 20.]) - assert output == [30.] - assert len(run_metadata.partition_graphs) > 0 - # disable run_options. - f = K.function(inputs=[x_placeholder, y_placeholder], - outputs=[x_placeholder + y_placeholder], - run_metadata=run_metadata) - output = f([10., 20.]) - assert output == [30.] - assert len(run_metadata.partition_graphs) == 0 - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='Uses the `string` type for a tensor.') - def test_function_tf_string_input(self): - # Test functions with string inputs. - - x_placeholder = K.placeholder(shape=(), dtype="string") - x_identity = K.identity(x_placeholder) - - f = K.function(inputs=[x_placeholder], outputs=[x_identity]) - output = f([b'test']) - assert output == [b'test'] - - def test_rnn(self): - # implement a simple RNN - num_samples = 4 - input_dim = 5 - output_dim = 3 - timesteps = 6 - - _, x = parse_shape_or_val((num_samples, timesteps, input_dim)) - _, h0 = parse_shape_or_val((num_samples, output_dim)) - _, wi = parse_shape_or_val((input_dim, output_dim)) - _, wh = parse_shape_or_val((output_dim, output_dim)) - mask = np.random.randint(2, size=(num_samples, timesteps)) - - wi_k = K.variable(wi) - wh_k = K.variable(wh) - - def get_step_function(backend, w_i, w_h): - - def simple_rnn(inputs, states): - assert len(states) == 1 - h = states[0] - y = backend.dot(inputs, w_i) + backend.dot(h, w_h) - return y, [y] - - return simple_rnn - - kwargs_list = [ - {'go_backwards': False, 'mask': None}, - {'go_backwards': True, 'mask': None}, - {'go_backwards': False, 'mask': mask}, - {'go_backwards': True, 'mask': mask}, - ] - for kwargs in kwargs_list: - check_rnn_operation(step_function_k=get_step_function(K, wi_k, wh_k), - step_function_np=get_step_function( - KNP, wi, wh), - inputs_np=x, - initial_states_np=[h0], - mask_np=kwargs.pop('mask', None), - **kwargs) - - def test_rnn_additional_states(self): - # implement a simple RNN with an additional state - # whose shape is different from that of the output - num_samples = 4 - input_dim = 5 - output_dim = 3 - timesteps = 6 - - _, x = parse_shape_or_val((num_samples, timesteps, input_dim)) - _, h0 = parse_shape_or_val((num_samples, output_dim)) - h1 = np.concatenate([h0, h0], axis=-1) - _, wi = parse_shape_or_val((input_dim, output_dim)) - _, wh = parse_shape_or_val((output_dim, output_dim)) - mask = np.random.randint(2, size=(num_samples, timesteps)) - - wi_k = K.variable(wi) - wh_k = K.variable(wh) - - def get_step_function(backend, w_i, w_h): - - def simple_rnn_with_extra_mock_state(inputs, states): - assert len(states) == 2 - h = states[0] - y = backend.dot(inputs, w_i) + backend.dot(h, w_h) - return y, [y, backend.concatenate([y, y], axis=-1)] - - return simple_rnn_with_extra_mock_state - - kwargs_list = [ - {'go_backwards': False, 'mask': None}, - {'go_backwards': True, 'mask': None}, - {'go_backwards': False, 'mask': mask}, - {'go_backwards': True, 'mask': mask}, - ] - for kwargs in kwargs_list: - check_rnn_operation(step_function_k=get_step_function(K, wi_k, wh_k), - step_function_np=get_step_function( - KNP, wi, wh), - inputs_np=x, - initial_states_np=[h0, h1], - mask_np=kwargs.pop('mask', None), - **kwargs) - - def test_rnn_no_states(self): - # implement a simple RNN without states - num_samples = 3 - input_dim = 8 - output_dim = 4 - timesteps = 5 - - _, x = parse_shape_or_val((num_samples, timesteps, input_dim)) - _, wi = parse_shape_or_val((input_dim, output_dim)) - mask = np.random.randint(2, size=(num_samples, timesteps)) - - wi_k = K.variable(wi) - - def get_step_function(backend, w_i): - - def simple_no_states(inputs, states): - assert len(states) == 0 - y = backend.dot(inputs, w_i) - return y, [] - - return simple_no_states - - kwargs_list = [ - {'go_backwards': False, 'mask': None}, - {'go_backwards': True, 'mask': None}, - {'go_backwards': False, 'mask': mask}, - {'go_backwards': True, 'mask': mask}, - ] - for kwargs in kwargs_list: - check_rnn_operation(step_function_k=get_step_function(K, wi_k), - step_function_np=get_step_function(KNP, wi), - inputs_np=x, - initial_states_np=[], - mask_np=kwargs.pop('mask', None), - **kwargs) - - def test_rnn_constants(self): - # implement a simple RNN - num_samples = 4 - input_dim = 5 - output_dim = 3 - timesteps = 6 - - _, x = parse_shape_or_val((num_samples, timesteps, input_dim)) - _, h0 = parse_shape_or_val((num_samples, output_dim)) - _, c = parse_shape_or_val((num_samples, output_dim)) - _, wi = parse_shape_or_val((input_dim, output_dim)) - _, wh = parse_shape_or_val((output_dim, output_dim)) - mask = np.random.randint(2, size=(num_samples, timesteps)) - - wi_k = K.variable(wi) - wh_k = K.variable(wh) - - def get_step_function(backend, w_i, w_h): - - def simple_rnn_add_constant(inputs, states_and_constants): - # constants are appended to states in K.rnn - [h, c] = states_and_constants - y = backend.dot(inputs, w_i) + backend.dot(h, w_h) + c - return y, [y] - - return simple_rnn_add_constant - - kwargs_list = [ - {'go_backwards': False, 'mask': None}, - {'go_backwards': True, 'mask': None}, - {'go_backwards': False, 'mask': mask}, - {'go_backwards': True, 'mask': mask}, - ] - for kwargs in kwargs_list: - check_rnn_operation(step_function_k=get_step_function(K, wi_k, wh_k), - step_function_np=get_step_function( - KNP, wi, wh), - inputs_np=x, - initial_states_np=[h0], - mask_np=kwargs.pop('mask', None), - constants_np=[c], - **kwargs) - - def test_rnn_output_and_state_masking_independent(self): - num_samples = 2 - num_timesteps = 4 - state_and_io_size = 5 - mask_last_num_timesteps = 2 # for second sample only - - # a step function that just outputs inputs, - # but increments states +1 per timestep - def step_function(inputs, states): - return inputs, [s + 1 for s in states] - - inputs_vals = np.random.random( - (num_samples, num_timesteps, state_and_io_size)) - initial_state_vals = np.random.random((num_samples, state_and_io_size)) - # masking of two last timesteps for second sample only - mask_vals = np.ones((num_samples, num_timesteps)) - mask_vals[1, -mask_last_num_timesteps:] = 0 - - # outputs expected to be same as inputs for the first sample - expected_outputs = inputs_vals.copy() - # but for the second sample all outputs in masked region should be the same - # as last output before masked region - expected_outputs[1, -mask_last_num_timesteps:] = \ - expected_outputs[1, -(mask_last_num_timesteps + 1)] - - expected_state = initial_state_vals.copy() - # first state should be incremented for every timestep (no masking) - expected_state[0] += num_timesteps - # second state should not be incremented for last two timesteps - expected_state[1] += (num_timesteps - mask_last_num_timesteps) - - # verify same expected output for `unroll=true/false` - inputs = K.variable(inputs_vals) - initial_states = [K.variable(initial_state_vals)] - mask = K.variable(mask_vals) - for unroll in [True, False]: - last_output, outputs, last_states = K.rnn( - step_function, - inputs, - initial_states, - mask=mask, - unroll=unroll, - input_length=num_timesteps if unroll else None) - - assert_allclose(K.eval(outputs), expected_outputs) - assert_allclose(K.eval(last_states[0]), expected_state) - - @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported') - def test_rnn_output_num_dim_larger_than_2_masking(self): - num_samples = 3 - num_timesteps = 4 - num_features = 5 - - def step_function(inputs, states): - outputs = K.tile(K.expand_dims(inputs), [1, 1, 2]) - return outputs, states - - inputs_vals = np.random.random( - (num_samples, num_timesteps, num_features)) - initial_state_vals = np.random.random((num_samples, 6)) - mask_vals = np.ones((num_samples, num_timesteps)) - mask_vals[-1, -1] = 0 # final timestep masked for last sample - - expected_outputs = np.repeat( - inputs_vals[..., None], repeats=2, axis=-1) - # for the last sample, the final timestep (in masked region) should be the - # same as the second to final output (before masked region) - expected_outputs[-1, -1] = expected_outputs[-1, -2] - - inputs = K.variable(inputs_vals) - initial_states = [K.variable(initial_state_vals)] - mask = K.variable(mask_vals) - for unroll in [True, False]: - last_output, outputs, last_states = K.rnn( - step_function, - inputs, - initial_states, - mask=mask, - unroll=unroll, - input_length=num_timesteps if unroll else None) - - assert_allclose(K.eval(outputs), expected_outputs) - - @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported') - def test_rnn_state_num_dim_larger_than_2_masking(self): - num_samples = 3 - num_timesteps = 4 - - def step_function(inputs, states): - return inputs, [s + 1 for s in states] - - inputs_vals = np.random.random((num_samples, num_timesteps, 5)) - initial_state_vals = np.random.random((num_samples, 6, 7)) - mask_vals = np.ones((num_samples, num_timesteps)) - mask_vals[0, -2:] = 0 # final two timesteps masked for first sample - - expected_last_state = initial_state_vals.copy() - expected_last_state[0] += (num_timesteps - 2) - expected_last_state[1:] += num_timesteps - - inputs = K.variable(inputs_vals) - initial_states = [K.variable(initial_state_vals)] - mask = K.variable(mask_vals) - for unroll in [True, False]: - last_output, outputs, last_states = K.rnn( - step_function, - inputs, - initial_states, - mask=mask, - unroll=unroll, - input_length=num_timesteps if unroll else None) - - # not updated last timestep: - assert_allclose(K.eval(last_states[0]), expected_last_state) - - @pytest.mark.parametrize('shape', [(3, ), (1, 3), (2, 1), (4, 2), (4, 2, 3)]) - def test_logsumexp(self, shape): - check_single_tensor_operation('logsumexp', shape, WITH_NP, axis=None) - check_single_tensor_operation('logsumexp', shape, WITH_NP, axis=0) - check_single_tensor_operation('logsumexp', shape, WITH_NP, axis=-1) - check_single_tensor_operation('logsumexp', shape, WITH_NP, axis=-1, - keepdims=True) - if len(shape) > 1: - check_single_tensor_operation('logsumexp', shape, WITH_NP, axis=1) - check_single_tensor_operation('logsumexp', shape, WITH_NP, axis=1, - keepdims=True) - if len(shape) > 2: - check_single_tensor_operation( - 'logsumexp', shape, WITH_NP, axis=[1, -1]) - check_single_tensor_operation('logsumexp', shape, WITH_NP, axis=[1, -1], - keepdims=True) - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='The optimization is applied only with TensorFlow.') - def test_logsumexp_optim(self): - ''' - Check if optimization works. - ''' - x_np = np.array([1e+4, 1e-4]) - result = K.eval(K.logsumexp(K.variable(x_np), axis=0)) - assert_allclose(result, 1e4, rtol=1e-5) - - def test_switch(self): - # scalar - val = np.random.random() - z_list = [] - for k in WITH_NP: - x = k.variable(val) - x = k.switch(k.greater_equal(x, 0.5), x * 0.1, x * 0.2) - z_list.append(k.eval(x)) - assert_list_pairwise(z_list) - # non scalar - shapes = [] - shapes.append([(4, 3, 2), (4, 3, 2), (4, 3, 2)]) - shapes.append([(4, 3,), (4, 3, 2), (4, 3, 2)]) - shapes.append([(4,), (4, 3, 2), (4, 3, 2)]) - for s in shapes: - z_list = [] - arrays = list(map(np.random.random, s)) - for k in WITH_NP: - x, then_expr, else_expr = map(k.variable, arrays) - cond = k.greater_equal(x, 0.5) - z_list.append(k.eval(k.switch(cond, then_expr, else_expr))) - assert_list_pairwise(z_list) - - def test_dropout(self): - val = np.random.random((100, 100)) - z_list = [k.eval(k.dropout(k.variable(val), level=0.2)) - for k in WITH_NP] - assert_list_pairwise(z_list, allclose=False) - # dropout patterns are different, only check mean - for i in range(len(z_list) - 1): - assert np.abs(z_list[i].mean() - z_list[i + 1].mean()) < 0.05 - - z_list = [k.eval(k.dropout(k.variable(val), level=0.2, - noise_shape=list(val.shape))) - for k in WITH_NP] - assert_list_pairwise(z_list, allclose=False) - # dropout patterns are different, only check mean - for i in range(len(z_list) - 1): - assert np.abs(z_list[i].mean() - z_list[i + 1].mean()) < 0.05 - - # Test invalid use cases - with pytest.raises(ValueError): - z = K.dropout(K.variable(val), level=-0.5) - - @pytest.mark.parametrize('alpha,max_value,threshold', [ - (0.0, None, 0.0), # standard relu - (0.1, None, 0.0), # set alpha only - (0.0, 5.0, 0.0), # set max_value only - (0.0, None, 0.8), # set threshold only - (0.1, 5.0, 0.0), # set alpha and max_value - (0.1, None, 0.8), # set alpha and threshold - (0.0, 5.0, 0.8), # set max_value and threshold - (0.1, 5.0, 0.8), # set all - (0.1, 0.0, 0.8), # max_value is zero - (0.1, 5.0, -2.8), # threshold is negative - (0.1, 9.0, 0.8), # max_value > 6 - ]) - def test_relu(self, alpha, max_value, threshold): - check_single_tensor_operation('relu', (4, 2), WITH_NP, alpha=alpha, - max_value=max_value, threshold=threshold) - - def test_nn_operations(self): - check_single_tensor_operation('softsign', (4, 10), WITH_NP) - check_single_tensor_operation('softplus', (4, 10), WITH_NP) - check_single_tensor_operation('elu', (4, 10), WITH_NP, alpha=0.5) - - check_single_tensor_operation('sigmoid', (4, 2), WITH_NP) - check_single_tensor_operation('hard_sigmoid', (4, 2), WITH_NP) - check_single_tensor_operation('tanh', (4, 2), WITH_NP) - - check_single_tensor_operation('softmax', (4, 10), WITH_NP) - check_single_tensor_operation('softmax', (4, 5, 3), WITH_NP, axis=1) - check_single_tensor_operation( - 'softmax', (4, 5, 3, 10), WITH_NP, axis=2) - - check_single_tensor_operation('l2_normalize', (4, 3), WITH_NP, axis=-1) - check_single_tensor_operation('l2_normalize', (4, 3), WITH_NP, axis=1) - - def test_crossentropy(self): - # toy label matrix (4 samples, 2 classes) - label = np.array([[.4, .6], [.3, .7], [.1, .9], - [.2, .8]], dtype=np.float32) - check_two_tensor_operation( - 'binary_crossentropy', label, (4, 2), WITH_NP) - check_two_tensor_operation('binary_crossentropy', label, (4, 2), - WITH_NP, from_logits=True) - check_two_tensor_operation('categorical_crossentropy', label, (4, 2), - WITH_NP, cntk_two_dynamicity=True) - check_two_tensor_operation('categorical_crossentropy', label, (4, 2), - WITH_NP, cntk_two_dynamicity=True, - from_logits=True) - - # toy label matrix (2 samples, 3 classes) - label = np.array([[.4, .1, .5], [.2, .6, .2]], dtype=np.float32) - check_two_tensor_operation('categorical_crossentropy', label, (2, 3), - WITH_NP, cntk_two_dynamicity=True) - check_two_tensor_operation('categorical_crossentropy', label, (2, 3), - WITH_NP, cntk_two_dynamicity=True, - from_logits=True) - - def test_in_top_k(self): - batch_size = 20 - num_classes = 10 - - # Random prediction test case - predictions = np.random.random( - (batch_size, num_classes)).astype('float32') - targets = np.random.randint( - num_classes, size=batch_size, dtype='int32') - - # (k == 0 or k > num_classes) does not raise an error - # but just return an unmeaningful tensor. - for k in range(1, 2 if K.backend() == 'cntk' else (num_classes + 1)): - z_list = [b.eval(b.in_top_k(b.variable(predictions, dtype='float32'), - b.variable(targets, dtype='int32'), k)) - for b in WITH_NP] - assert_list_pairwise(z_list) - - # Identical prediction test case: - # randomly set half of the predictions to an identical value - num_identical = num_classes // 2 - for i in range(batch_size): - idx_identical = np.random.choice(num_classes, - size=num_identical, replace=False) - predictions[i, idx_identical] = predictions[i, 0] - targets = np.zeros(batch_size, dtype='int32') - - for k in range(1, 2 if K.backend() == 'cntk' else (num_classes + 1)): - z_list = [b.eval(b.in_top_k(b.variable(predictions, dtype='float32'), - b.variable(targets, dtype='int32'), k)) - for b in WITH_NP] - assert_list_pairwise(z_list) - - @pytest.mark.parametrize('op,input_shape,kernel_shape,padding,data_format', [ - ('conv1d', (2, 8, 2), (3, 2, 3), 'same', 'channels_last'), - ('conv1d', (1, 8, 2), (3, 2, 3), 'valid', 'channels_last'), - ('conv1d', (1, 2, 8), (3, 2, 3), 'valid', 'channels_first'), - ('conv2d', (2, 3, 4, 5), (3, 3, 3, 2), 'same', 'channels_first'), - ('conv2d', (2, 3, 5, 6), (4, 3, 3, 4), 'valid', 'channels_first'), - ('conv2d', (1, 6, 5, 3), (3, 4, 3, 2), 'valid', 'channels_last'), - ('conv2d', (1, 7, 6, 3), (3, 3, 3, 4), 'same', 'channels_last'), - ('conv3d', (2, 3, 4, 5, 4), (3, 3, 3, 3, 4), 'same', 'channels_first'), - ('conv3d', (2, 3, 5, 4, 6), (3, 2, 4, 3, 4), 'valid', 'channels_first'), - ('conv3d', (1, 2, 2, 2, 1), (2, 2, 2, 1, 1), 'valid', 'channels_last'), - ('conv3d', (1, 3, 5, 4, 2), (3, 3, 3, 2, 3), 'same', 'channels_last'), - ]) - def test_conv(self, op, input_shape, kernel_shape, padding, data_format): - check_two_tensor_operation( - op, input_shape, kernel_shape, WITH_NP, - padding=padding, data_format=data_format, - cntk_dynamicity=True) - - @pytest.mark.parametrize( - 'op,input_shape,kernel_shape,output_shape,padding,data_format', [ - ('conv2d_transpose', (2, 5, 6, 3), (3, 3, 2, 3), (2, 5, 6, 2), - 'same', 'channels_last'), - ('conv2d_transpose', (2, 3, 8, 9), (3, 3, 2, 3), (2, 2, 8, 9), - 'same', 'channels_first'), - ]) - def test_conv_transpose(self, - op, - input_shape, - kernel_shape, - output_shape, - padding, - data_format): - check_two_tensor_operation( - op, input_shape, kernel_shape, WITH_NP, - output_shape=output_shape, padding=padding, data_format=data_format, - cntk_dynamicity=True) - - @pytest.mark.skipif((K.backend() == 'cntk' and KC.dev.type() == 0), - reason='cntk only supports dilated conv on GPU') - @pytest.mark.parametrize( - 'op,input_shape,kernel_shape,padding,data_format,dilation_rate', [ - ('conv1d', (2, 8, 3), (4, 3, 2), 'valid', 'channels_last', 2), - ('conv1d', (2, 3, 8), (4, 3, 2), 'valid', 'channels_first', 2), - ('conv2d', (2, 8, 9, 3), (3, 3, 3, 2), - 'same', 'channels_last', (2, 2)), - ('conv2d', (2, 3, 9, 8), (4, 3, 3, 4), - 'valid', 'channels_first', (2, 2)), - ('conv3d', (2, 5, 4, 6, 3), (2, 2, 3, 3, 4), - 'valid', 'channels_last', (2, 2, 2)), - ('conv3d', (2, 3, 5, 4, 6), (2, 2, 3, 3, 4), - 'same', 'channels_first', (2, 2, 2)), - ]) - def test_dilated_conv(self, - op, - input_shape, - kernel_shape, - padding, - data_format, - dilation_rate): - check_two_tensor_operation( - op, input_shape, kernel_shape, WITH_NP, - padding=padding, data_format=data_format, - dilation_rate=dilation_rate, cntk_dynamicity=True) - - @pytest.mark.skipif((K.backend() == 'cntk' and KC.dev.type() == 0), - reason='cntk only supports dilated conv transpose on GPU') - @pytest.mark.parametrize( - 'op,input_shape,kernel_shape,output_shape,padding,data_format,dilation_rate', - [ - ('conv2d_transpose', (2, 5, 6, 3), (3, 3, 2, 3), (2, 5, 6, 2), - 'same', 'channels_last', (2, 2)), - ('conv2d_transpose', (2, 3, 8, 9), (3, 3, 2, 3), (2, 2, 8, 9), - 'same', 'channels_first', (2, 2)), - ]) - def test_dilated_conv_transpose(self, - op, - input_shape, - kernel_shape, - output_shape, - padding, - data_format, - dilation_rate): - check_two_tensor_operation( - op, input_shape, kernel_shape, WITH_NP, output_shape=output_shape, - padding=padding, data_format=data_format, dilation_rate=dilation_rate, - cntk_dynamicity=True) - - @pytest.mark.parametrize('op,input_shape,kernel_shape,padding,data_format', [ - ('depthwise_conv2d', (2, 3, 4, 5), (3, 3, 3, 2), 'same', 'channels_first'), - ('depthwise_conv2d', (2, 3, 5, 6), (4, 3, 3, 4), 'valid', 'channels_first'), - ('depthwise_conv2d', (1, 6, 5, 3), (3, 4, 3, 2), 'valid', 'channels_last'), - ('depthwise_conv2d', (1, 7, 6, 3), (3, 3, 3, 4), 'same', 'channels_last'), - ]) - def test_depthwise_conv(self, - op, - input_shape, - kernel_shape, - padding, - data_format): - check_two_tensor_operation( - op, input_shape, kernel_shape, WITH_NP, - padding=padding, data_format=data_format, - cntk_dynamicity=True) - - @pytest.mark.parametrize( - 'op,input_shape,pool_size,strides,padding,data_format,pool_mode', [ - ('pool2d', (2, 3, 7, 7), (3, 3), (1, 1), - 'same', 'channels_first', 'avg'), - ('pool2d', (3, 3, 8, 5), (2, 3), (1, 1), - 'valid', 'channels_first', 'max'), - ('pool2d', (2, 9, 5, 3), (3, 2), (1, 1), - 'valid', 'channels_last', 'avg'), - ('pool2d', (3, 6, 7, 3), (3, 3), (1, 1), - 'same', 'channels_last', 'max'), - ('pool3d', (2, 3, 7, 7, 7), (3, 3, 3), (1, 1, 1), - 'same', 'channels_first', 'avg'), - ('pool3d', (3, 3, 8, 5, 9), (2, 3, 2), (1, 1, 1), - 'valid', 'channels_first', 'max'), - ('pool3d', (2, 8, 9, 5, 3), (3, 2, 3), (1, 1, 1), - 'valid', 'channels_last', 'avg'), - ('pool3d', (3, 5, 6, 7, 3), (3, 3, 3), (1, 1, 1), - 'same', 'channels_last', 'max'), - ]) - def test_pool(self, - op, - input_shape, - pool_size, - strides, - padding, - data_format, - pool_mode): - check_single_tensor_operation( - op, input_shape, WITH_NP, - pool_size=pool_size, strides=strides, - padding=padding, data_format=data_format, pool_mode=pool_mode, - cntk_dynamicity=True) - - @pytest.mark.parametrize( - 'op,input_shape,kernel_shape,depth_multiplier,padding,data_format', [ - ('separable_conv1d', (2, 8, 2), (3,), 1, 'same', 'channels_last'), - ('separable_conv1d', (1, 8, 2), (3,), 2, 'valid', 'channels_last'), - ('separable_conv2d', (2, 3, 4, 5), (3, 3), 1, 'same', 'channels_first'), - ('separable_conv2d', (2, 3, 5, 6), - (4, 3), 2, 'valid', 'channels_first'), - ('separable_conv2d', (1, 6, 5, 3), (3, 4), 1, 'valid', 'channels_last'), - ('separable_conv2d', (1, 7, 6, 3), (3, 3), 2, 'same', 'channels_last'), - ]) - def test_separable_conv(self, - op, - input_shape, - kernel_shape, - depth_multiplier, - padding, - data_format): - if data_format == 'channels_first': - input_depth = input_shape[1] - else: - input_depth = input_shape[-1] - _, x = parse_shape_or_val(input_shape) - _, depthwise = parse_shape_or_val(kernel_shape + - (input_depth, depth_multiplier)) - _, pointwise = parse_shape_or_val((1,) * len(kernel_shape) + - (input_depth * depth_multiplier, 7)) - y1 = KNP.separable_conv(x, depthwise, pointwise, - padding=padding, data_format=data_format) - if K.backend() == 'cntk': - _, cntk_func = cntk_func_tensors( - op, [input_shape, depthwise, pointwise], - padding=padding, data_format=data_format) - y2 = cntk_func([x])[0] - else: - y2 = K.eval(getattr(K, op)( - K.variable(x), - K.variable(depthwise), K.variable(pointwise), - padding=padding, data_format=data_format)) - assert_allclose(y1, y2, atol=1e-05) - - def test_random_normal(self): - # test standard normal as well as a normal with a different set of parameters - for mean, std in [(0., 1.), (-10., 5.)]: - rand = K.eval(K.random_normal((300, 200), - mean=mean, stddev=std, seed=1337)) - assert rand.shape == (300, 200) - assert np.abs(np.mean(rand) - mean) < std * 0.015 - assert np.abs(np.std(rand) - std) < std * 0.015 - - # test that random_normal also generates different values when used - # within a function - r = K.random_normal((10, 10), mean=mean, stddev=std, seed=1337) - samples = np.array([K.eval(r) for _ in range(200)]) - assert np.abs(np.mean(samples) - mean) < std * 0.015 - assert np.abs(np.std(samples) - std) < std * 0.015 - - def test_random_uniform(self): - min_val = -1. - max_val = 1. - rand = K.eval(K.random_uniform((200, 100), min_val, max_val)) - assert rand.shape == (200, 100) - assert np.abs(np.mean(rand)) < 0.015 - assert max_val - 0.015 < np.max(rand) <= max_val - assert min_val + 0.015 > np.min(rand) >= min_val - - r = K.random_uniform((10, 10), minval=min_val, maxval=max_val) - samples = np.array([K.eval(r) for _ in range(200)]) - assert np.abs(np.mean(samples)) < 0.015 - assert max_val - 0.015 < np.max(samples) <= max_val - assert min_val + 0.015 > np.min(samples) >= min_val - - def test_random_binomial(self): - p = 0.5 - rand = K.eval(K.random_binomial((200, 100), p)) - assert rand.shape == (200, 100) - assert np.abs(np.mean(rand) - p) < 0.015 - assert np.max(rand) == 1 - assert np.min(rand) == 0 - - r = K.random_binomial((10, 10), p) - samples = np.array([K.eval(r) for _ in range(200)]) - assert np.abs(np.mean(samples) - p) < 0.015 - assert np.max(samples) == 1 - assert np.min(samples) == 0 - - def test_truncated_normal(self): - mean = 0. - std = 1. - min_val = -2. - max_val = 2. - rand = K.eval(K.truncated_normal((300, 200), - mean=mean, stddev=std, seed=1337)) - assert rand.shape == (300, 200) - assert np.abs(np.mean(rand) - mean) < 0.015 - assert np.max(rand) <= max_val - assert np.min(rand) >= min_val - - # assumption in initializers.VarianceScaling - assert np.abs(np.std(rand) - std * 0.87962) < 0.015 - - def test_conv_invalid_use(self): - dummy_x_1d = K.variable(np.ones((4, 8, 2))) - dummy_w_1d = K.variable(np.ones((3, 2, 3))) - dummy_x_2d = K.variable(np.ones((2, 3, 4, 5))) - dummy_w_2d = K.variable(np.ones((2, 2, 3, 4))) - dummy_x_3d = K.variable(np.ones((2, 3, 4, 5, 4))) - dummy_w_3d = K.variable(np.ones((2, 2, 2, 3, 4))) - dummy_w1x1_2d = K.variable(np.ones((1, 1, 12, 7))) - - with pytest.raises(ValueError): - K.conv1d(dummy_x_1d, dummy_w_1d, data_format='channels_middle') - - with pytest.raises(ValueError): - K.conv2d(dummy_x_2d, dummy_w_2d, data_format='channels_middle') - - with pytest.raises(ValueError): - K.conv3d(dummy_x_3d, dummy_w_3d, data_format='channels_middle') - - with pytest.raises(ValueError): - K.separable_conv2d(dummy_x_2d, dummy_w_2d, dummy_w1x1_2d, - data_format='channels_middle') - - with pytest.raises(ValueError): - K.depthwise_conv2d(dummy_x_2d, dummy_w_2d, - data_format='channels_middle') - - if K.backend() == 'cntk': - with pytest.raises(ValueError): - K.separable_conv2d(dummy_x_2d, dummy_w_2d, dummy_w1x1_2d, - dilation_rate=(1, 2)) - with pytest.raises(ValueError): - K.separable_conv2d(dummy_x_2d, dummy_w_2d, dummy_w1x1_2d, - strides=(2, 2), dilation_rate=(1, 2)) - with pytest.raises(ValueError): - K.depthwise_conv2d(dummy_x_2d, dummy_w_2d, - dilation_rate=(1, 2)) - with pytest.raises(ValueError): - K.depthwise_conv2d(dummy_x_2d, dummy_w_2d, - strides=(2, 2), dilation_rate=(1, 2)) - - def test_pooling_invalid_use(self): - for (input_shape, pool_size) in zip([(5, 10, 12, 3), (5, 10, 12, 6, 3)], - [(2, 2), (2, 2, 2)]): - x = K.variable(np.random.random(input_shape)) - if len(pool_size) == 2: - with pytest.raises(ValueError): - K.pool2d(x, pool_size=pool_size, - data_format='channels_middle') - with pytest.raises(ValueError): - K.pool2d(x, pool_size=pool_size, padding='twice') - with pytest.raises(ValueError): - K.pool2d(x, pool_size=pool_size, pool_mode='median') - else: - with pytest.raises(ValueError): - K.pool3d(x, pool_size=pool_size, - data_format='channels_middle') - with pytest.raises(ValueError): - K.pool3d(x, pool_size=pool_size, padding='twice') - with pytest.raises(ValueError): - K.pool3d(x, pool_size=pool_size, pool_mode='median') - - def test_resize_images(self): - for data_format in ['channels_first', 'channels_last']: - shape = (5, 5) - if data_format == 'channels_first': - x_shape = (2, 3) + shape - elif data_format == 'channels_last': - x_shape = (2,) + shape + (3,) - check_single_tensor_operation('resize_images', x_shape, - WITH_NP, cntk_dynamicity=True, - height_factor=2, - width_factor=2, - data_format=data_format) - - # Test invalid use cases - xval = np.random.random(x_shape) - with pytest.raises(ValueError): - K.resize_images(K.variable(xval), 2, 2, - data_format='channels_middle') - - @staticmethod - def _helper_bilinear(data_format, height_factor, width_factor): - x_shape = (2, 3, 4, 5) - check_single_tensor_operation('resize_images', x_shape, - [KTF, KTH], - height_factor=height_factor, - width_factor=width_factor, - data_format=data_format, - interpolation='bilinear') - - @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') - @pytest.mark.parametrize('data_format', ['channels_first', 'channels_last']) - def test_resize_images_bilinear(self, data_format): - self._helper_bilinear(data_format, 2, 2) - with pytest.raises(NotImplementedError): - self._helper_bilinear(data_format, 4, 4) - - def test_resize_volumes(self): - for data_format in ['channels_first', 'channels_last']: - shape = (5, 5, 5) - if data_format == 'channels_first': - x_shape = (2, 3) + shape - elif data_format == 'channels_last': - x_shape = (2,) + shape + (3,) - check_single_tensor_operation('resize_volumes', x_shape, - WITH_NP, cntk_dynamicity=True, - depth_factor=2, - height_factor=2, - width_factor=2, - data_format=data_format) - - # Test invalid use cases - xval = np.random.random(x_shape) - with pytest.raises(ValueError): - K.resize_volumes(K.variable(xval), 2, 2, 2, - data_format='channels_middle') - - def test_temporal_padding(self): - check_single_tensor_operation('temporal_padding', (4, 3, 3), - WITH_NP) - check_single_tensor_operation('temporal_padding', (2, 3, 4), - WITH_NP, padding=(1, 2)) - - def test_spatial_2d_padding(self): - padding = ((1, 2), (2, 1)) - for data_format in ['channels_first', 'channels_last']: - shape = (5, 5) - if data_format == 'channels_first': - x_shape = (1, 3) + shape - else: - x_shape = (1,) + shape + (3,) - check_single_tensor_operation('spatial_2d_padding', x_shape, WITH_NP, - padding=padding, data_format=data_format) - # Check handling of dynamic shapes. - if K in [KTF, KTH]: - x = K.placeholder(shape=(1, None, None, 1)) - y = K.spatial_2d_padding( - x, padding=padding, data_format='channels_last') - assert K.int_shape(y) == (1, None, None, 1) - - # Test invalid use cases - xval = np.random.random(x_shape) - with pytest.raises(ValueError): - K.spatial_2d_padding(K.variable(xval), padding=padding, - data_format='channels_middle') - - def test_spatial_3d_padding(self): - padding = ((1, 2), (2, 1), (1, 2)) - for data_format in ['channels_first', 'channels_last']: - shape = (5, 5, 5) - if data_format == 'channels_first': - x_shape = (1, 3) + shape - else: - x_shape = (1,) + shape + (3,) - check_single_tensor_operation('spatial_3d_padding', x_shape, WITH_NP, - padding=padding, data_format=data_format) - # Check handling of dynamic shapes. - if K in [KTF, KTH]: - x = K.placeholder(shape=(1, None, None, None, 1)) - y = K.spatial_3d_padding( - x, padding=padding, data_format='channels_last') - assert K.int_shape(y) == (1, None, None, None, 1) - - # Test invalid use cases - xval = np.random.random(x_shape) - with pytest.raises(ValueError): - K.spatial_3d_padding(K.variable(xval), padding=padding, - data_format='channels_middle') - - def test_bias_add(self): - for data_format in ['channels_first', 'channels_last']: - for shape in [(), (3,), (2, 3), (5, 3, 2)]: - if data_format == 'channels_first': - x_shape = (1, 4) + shape - else: - x_shape = (1,) + shape + (4,) - bias_shape = (4,) - check_two_tensor_operation('bias_add', x_shape, bias_shape, - WITH_NP, cntk_dynamicity=True, - data_format=data_format) - - if data_format == 'channels_first': - x_shape = (20, 6, 10) - else: - x_shape = (20, 10, 6) - check_two_tensor_operation('bias_add', x_shape, (10, 6), - WITH_NP, cntk_dynamicity=True, - data_format=data_format) - - # Test invalid use cases - x = K.variable(np.random.random(x_shape)) - b = K.variable(np.random.random(bias_shape)) - with pytest.raises(ValueError): - K.bias_add(x, b, data_format='channels_middle') - - @pytest.mark.skipif(K.backend() != 'theano', - reason='Specific to Theano.') - @pytest.mark.parametrize('x_shape', [(1, 4, 2, 3), (1, 2, 3, 4)]) - def test_batchnorm_th(self, x_shape): - x_val = np.random.random(x_shape).astype(np.float32) - x = K.variable(x_val) - z, _, _ = K.normalize_batch_in_training( - x, None, None, reduction_axes='per-activation') - z = K.eval(z) - assert z.shape == x_shape - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='Specific to Tensorflow.') - @pytest.mark.parametrize('x_shape', [(1, 4, 2, 3), (1, 2, 3, 4)]) - def test_batchnorm_tf(self, x_shape): - x_val = np.random.random(x_shape).astype(np.float32) - x = K.variable(x_val) - z, _, _ = K.normalize_batch_in_training( - x, None, None, reduction_axes=[0, 1, 2, 3]) - z = K.eval(z) - assert z.shape == x_shape - - @pytest.mark.skipif(K.backend() != 'cntk', reason='Specific to CNTK.') - @pytest.mark.parametrize('x_shape', [(1, 4, 2, 3), (1, 2, 3, 4)]) - def test_batchnorm_cntk(self, x_shape): - x_val = np.random.random(x_shape).astype(np.float32) - x = K.placeholder(x_shape) - z, _, _ = K.normalize_batch_in_training( - x, None, None, reduction_axes=[0, 1, 2, 3]) - z = K.function([x], [z])([x_val])[0] - assert z.shape == x_shape - - # the Theano and TensorFlow CTC code use different methods to ensure - # numerical stability. The Theano code subtracts out the max - # before the final log, so the results are different but scale - # identically and still train properly - @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') - def test_ctc(self): - if K.backend() == 'theano': - ref = [1.73308, 3.81351] - else: - ref = [3.34211, 5.42262] - # simplified version of TensorFlow's test - - label_lens = np.expand_dims(np.asarray([5, 4]), 1) - input_lens = np.expand_dims( - np.asarray([5, 5]), 1) # number of timesteps - - # dimensions are batch x time x categories - labels = np.asarray([[0, 1, 2, 1, 0], [0, 1, 1, 0, -1]]) - inputs = np.asarray( - [[[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553], - [0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436], - [0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688], - [0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533], - [0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]], - [[0.30176, 0.28562, 0.0831517, 0.0862751, 0.0816851, 0.161508], - [0.24082, 0.397533, 0.0557226, 0.0546814, 0.0557528, 0.19549], - [0.230246, 0.450868, 0.0389607, 0.038309, 0.0391602, 0.202456], - [0.280884, 0.429522, 0.0326593, 0.0339046, 0.0326856, 0.190345], - [0.423286, 0.315517, 0.0338439, 0.0393744, 0.0339315, 0.154046]]], - dtype=np.float32) - - k_labels = K.variable(labels, dtype="int32") - k_inputs = K.variable(inputs, dtype="float32") - k_input_lens = K.variable(input_lens, dtype="int32") - k_label_lens = K.variable(label_lens, dtype="int32") - res = K.eval(K.ctc_batch_cost(k_labels, k_inputs, k_input_lens, - k_label_lens)) - if K.backend() == 'theano': - assert_allclose(res[0, :], ref, atol=1e-05) - else: - assert_allclose(res[:, 0], ref, atol=1e-05) - - # test when batch_size = 1, that is, one sample only - # get only first sample from above test case - if K.backend() == 'theano': - ref = [1.73308] - else: - ref = [3.34211] - - input_lens = np.expand_dims(np.asarray([5]), 1) - label_lens = np.expand_dims(np.asarray([5]), 1) - - labels = np.asarray([[0, 1, 2, 1, 0]]) - inputs = np.asarray( - [[[0.633766, 0.221185, 0.0917319, 0.0129757, 0.0142857, 0.0260553], - [0.111121, 0.588392, 0.278779, 0.0055756, 0.00569609, 0.010436], - [0.0357786, 0.633813, 0.321418, 0.00249248, 0.00272882, 0.0037688], - [0.0663296, 0.643849, 0.280111, 0.00283995, 0.0035545, 0.00331533], - [0.458235, 0.396634, 0.123377, 0.00648837, 0.00903441, 0.00623107]]], - dtype=np.float32) - - k_labels = K.variable(labels, dtype="int32") - k_inputs = K.variable(inputs, dtype="float32") - k_input_lens = K.variable(input_lens, dtype="int32") - k_label_lens = K.variable(label_lens, dtype="int32") - res = K.eval(K.ctc_batch_cost(k_labels, k_inputs, k_input_lens, - k_label_lens)) - if K.backend() == 'theano': - assert_allclose(res[0, :], ref, atol=1e-05) - else: - assert_allclose(res[:, 0], ref, atol=1e-05) - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='Test adapted from tensorflow.') - def test_ctc_decode_greedy(self): - """Test two batch entries - best path decoder.""" - max_time_steps = 6 - - seq_len_0 = 4 - input_prob_matrix_0 = np.asarray( - [[1.0, 0.0, 0.0, 0.0], # t=0 - [0.0, 0.0, 0.4, 0.6], # t=1 - [0.0, 0.0, 0.4, 0.6], # t=2 - [0.0, 0.9, 0.1, 0.0], # t=3 - [0.0, 0.0, 0.0, 0.0], # t=4 (ignored) - [0.0, 0.0, 0.0, 0.0]], # t=5 (ignored) - dtype=np.float32) - - seq_len_1 = 5 - # dimensions are time x depth - - input_prob_matrix_1 = np.asarray( - [[0.1, 0.9, 0.0, 0.0], # t=0 - [0.0, 0.9, 0.1, 0.0], # t=1 - [0.0, 0.0, 0.1, 0.9], # t=2 - [0.0, 0.9, 0.1, 0.1], # t=3 - [0.9, 0.1, 0.0, 0.0], # t=4 - [0.0, 0.0, 0.0, 0.0]], # t=5 (ignored) - dtype=np.float32) - - # len max_time_steps array of batch_size x depth matrices - inputs = [np.vstack([input_prob_matrix_0[t, :], - input_prob_matrix_1[t, :]]) - for t in range(max_time_steps)] - - # change tensorflow order to keras backend order - inputs = np.asarray(inputs).transpose((1, 0, 2)) - - # batch_size length vector of sequence_lengths - input_length = np.array([seq_len_0, seq_len_1], dtype=np.int32) - - decode_pred_np, log_prob_pred_np = KNP.ctc_decode(inputs, - input_length, greedy=True) - inputs = K.variable(inputs) - input_length = K.variable(input_length) - decode_pred_tf, log_prob_pred_tf = K.ctc_decode(inputs, - input_length, greedy=True) - - assert len(decode_pred_tf) == 1 - - decode_pred = K.eval(decode_pred_tf[0]) - log_prob_pred = K.eval(log_prob_pred_tf) - - assert np.alltrue(decode_pred_np == decode_pred) - assert np.allclose(log_prob_pred_np, log_prob_pred) - - @pytest.mark.parametrize('shape,start,size', [ - ((2, 5), (0, 1), (2, 3)), - ((2, 5), (1, 0), (1, 4)), - ((3, 2, 3), (1, 1, 0), (1, 1, 3)), - ((3, 2, 3), (1, 0, 0), (1, 2, 3)), - ((3, 2, 3), (1, 0, 0), (2, 1, 3)), - ]) - def test_slice(self, shape, start, size): - check_single_tensor_operation('slice', shape, WITH_NP, - start=start, size=size) - with pytest.raises(ValueError): - K.slice(K.variable(np.random.random(shape)), - start=[1, 0, 0, 0], size=size) - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='Beam search is only implemented with ' - 'the TensorFlow backend.') - def test_ctc_decode_beam_search(self): - """Test one batch, two beams - hibernating beam search.""" - - depth = 6 - - seq_len_0 = 5 - input_prob_matrix_0 = np.asarray( - [[0.30999, 0.309938, 0.0679938, 0.0673362, 0.0708352, 0.173908], - [0.215136, 0.439699, 0.0370931, 0.0393967, 0.0381581, 0.230517], - [0.199959, 0.489485, 0.0233221, 0.0251417, 0.0233289, 0.238763], - [0.279611, 0.452966, 0.0204795, 0.0209126, 0.0194803, 0.20655], - [0.51286, 0.288951, 0.0243026, 0.0220788, 0.0219297, 0.129878], - # Random entry added in at time=5 - [0.155251, 0.164444, 0.173517, 0.176138, 0.169979, 0.160671]], - dtype=np.float32) - - # Add arbitrary offset - this is fine - input_prob_matrix_0 = input_prob_matrix_0 + 2.0 - - # len max_time_steps array of batch_size x depth matrices - inputs = ([input_prob_matrix_0[t, :][np.newaxis, :] - for t in range(seq_len_0)] + # Pad to max_time_steps = 8 - 2 * [np.zeros((1, depth), dtype=np.float32)]) - - # Take exponential as we directly apply ctc_decode_beam_search - inputs = np.exp(inputs) - - # change tensorflow order to keras backend order - inputs = K.variable(inputs.transpose((1, 0, 2))) - - # batch_size length vector of sequence_lengths - input_length = K.variable(np.array([seq_len_0], dtype=np.int32)) - # batch_size length vector of log probabilities - log_prob_truth = np.array( - [ - -5.811451, # output beam 0 - -6.63339 # output beam 1 - ], - np.float32)[np.newaxis, :] - - decode_truth = [np.array([1, 0]), np.array([[1]])] - - beam_width = 2 - top_paths = 2 - - decode_pred_tf, log_prob_pred_tf = K.ctc_decode(inputs, - input_length, - greedy=False, - beam_width=beam_width, - top_paths=top_paths) - - assert len(decode_pred_tf) == top_paths - - log_prob_pred = K.eval(log_prob_pred_tf) - - for i in range(top_paths): - assert np.alltrue(decode_truth[i] == K.eval(decode_pred_tf[i])) - - assert np.allclose(log_prob_truth, log_prob_pred) - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='Beam search is only implemented with ' - 'the TensorFlow backend.') - def test_ctc_decode_beam_search_no_merge(self): - # A simple CTC probability map with some repeating characters, - # shape(batch, input_width, char_count) - # Without merging should be decoded as: "AABB", with merging as: "AB". - input_prob = np.array([ - [ # blank, A ,B - [0, 0, 1], # blank - [1, 0, 0], # A - [0, 0, 1], # blank - [1, 0, 0], # A - [0, 1, 0], # B - [0, 0, 1], # blank - [0, 1, 0] # B - ] - ]) - input_len = np.array(input_prob.shape[0] * [input_prob.shape[1]]) - - def decode(merge_repeated): - input_prob_tensor = K.placeholder(shape=(None, None, None), - dtype='float32') - input_len_tensor = K.placeholder(shape=(None), dtype='int64') - paths_tensors, _ = K.ctc_decode(input_prob_tensor, input_len_tensor, - greedy=False, beam_width=1, top_paths=1, - merge_repeated=merge_repeated) - decode_func = K.function([input_prob_tensor, input_len_tensor], - paths_tensors) - paths = decode_func([input_prob, input_len]) - return paths - - # merged: A B - assert np.allclose(decode(merge_repeated=True), [np.array([[0, 1]])]) - # not merged: A A B B - assert np.allclose(decode(merge_repeated=False), - [np.array([[0, 0, 1, 1]])]) - - def test_one_hot(self): - input_length = 10 - num_classes = 20 - batch_size = 30 - indices = np.random.randint( - 0, num_classes, size=(batch_size, input_length)) - oh = KNP.one_hot(np.int32(indices), num_classes) - koh = K.eval(K.one_hot(K.variable( - indices, dtype='int32'), num_classes)) - assert np.all(koh == oh) - - @pytest.mark.skipif(not supports_sparse, - reason='Sparse tensors are not supported in cntk ' - 'and Theano has some dependency issues for sparse.') - def test_sparse_dot(self): - x_d = np.array([0, 7, 2, 3], dtype=np.float32) - x_r = np.array([0, 2, 2, 3], dtype=np.int64) - x_c = np.array([4, 3, 2, 3], dtype=np.int64) - - x_sparse = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5)) - x_dense = x_sparse.toarray() - - W = np.random.random((5, 4)) - t_W = K.variable(W) - k_s = K.eval(K.dot(K.variable(x_sparse), t_W)) - k_d = K.eval(K.dot(K.variable(x_dense), t_W)) - - assert k_s.shape == k_d.shape - assert_allclose(k_s, k_d, atol=1e-05) - - @pytest.mark.skipif(not supports_sparse, - reason='Sparse tensors are not supported in cntk ' - 'and Theano has some dependency issues for sparse.') - def test_sparse_concat(self): - x_d = np.array([0, 7, 2, 3], dtype=np.float32) - x_r = np.array([0, 2, 2, 3], dtype=np.int64) - x_c = np.array([4, 3, 2, 3], dtype=np.int64) - - x_sparse_1 = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5)) - - x_d = np.array([0, 7, 2, 3], dtype=np.float32) - x_r = np.array([0, 2, 2, 3], dtype=np.int64) - x_c = np.array([4, 3, 2, 3], dtype=np.int64) - - x_sparse_2 = sparse.csr_matrix((x_d, (x_r, x_c)), shape=(4, 5)) - - x_dense_1 = x_sparse_1.toarray() - x_dense_2 = x_sparse_2.toarray() - - k_s = K.concatenate([K.variable(x_sparse_1), K.variable(x_sparse_2)]) - assert K.is_sparse(k_s) - - k_s_d = K.eval(k_s) - - k_d = K.eval(K.concatenate( - [K.variable(x_dense_1), K.variable(x_dense_2)])) - - assert k_s_d.shape == k_d.shape - assert_allclose(k_s_d, k_d, atol=1e-05) - - @pytest.mark.parametrize('shape,shape2,axis', [ - ((5, 2), (7, 2), 0), - ((5, 4, 6), (5, 3, 6), 1), - ((5, 4, 6, 10), (5, 4, 6, 2), 3), - ((5, 4, 6, 3), (5, 4, 6, 2), -1), - ]) - def test_concat_operations(self, shape, shape2, axis): - # In stack, each array must have the same shape. - check_two_tensor_operation('stack', shape, shape, WITH_NP, - axis=axis, concat_args=True) - check_two_tensor_operation('concatenate', shape, shape2, WITH_NP, - axis=axis, concat_args=True) - check_two_tensor_operation('concatenate', shape, shape2, WITH_NP, - axis=axis, concat_args=True) - - @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') - def test_map(self): - x = np.random.rand(10, 3).astype(np.float32) - vx = K.variable(x) - kx = K.eval(K.map_fn(K.sum, vx)) - # make sure we can also walk the indexes in tensorflow which we - # can't without specifying dtype - kx2 = K.eval(K.map_fn( - lambda i: K.sum(vx[i]), - K.arange(10), - dtype=K.floatx() - )) - - assert (10,) == kx.shape - assert (10,) == kx2.shape - assert_allclose(x.sum(axis=1), kx, atol=1e-05) - assert_allclose(kx, kx2, atol=1e-05) - - def test_foldl(self): - x = np.random.rand(10, 3).astype(np.float32) - kx = K.eval(K.foldl(lambda a, b: a + b, K.variable(x))) - - assert (3,) == kx.shape - assert_allclose(x.sum(axis=0), kx, atol=1e-05) - - def test_foldr(self): - # This test aims to make sure that we walk the array from right to left - # and checks it in the following way: multiplying left to right 1e-40 - # cannot be held into a float32 so it causes an underflow while from - # right to left we have no such problem and the result is larger - x = np.array([1e-20, 1e-20, 10, 10, 10], dtype=np.float32) - vx = K.variable(x) - p1 = K.eval(K.foldl(lambda a, b: a * b, vx)) - p2 = K.eval(K.foldr(lambda a, b: a * b, vx)) - - assert p1 < p2 - assert 9e-38 < p2 <= 1e-37 - - @pytest.mark.skipif(K.backend() == 'cntk', - reason='cntk has issues with negative number.') - def test_arange(self): - for test_value in (-20, 0, 1, 10): - a_list = [] - dtype_list = [] - for k in WITH_NP: - t = k.arange(test_value) - a = k.eval(t) - assert np.array_equal(a, np.arange(test_value)) - dtype_list.append(k.dtype(t)) - a_list.append(a) - - for i in range(len(a_list) - 1): - assert np.array_equal(a_list[i], a_list[i + 1]) - - for start, stop, step in ((0, 5, 1), (-5, 5, 2), (0, 1, 2)): - a_list = [] - for k in WITH_NP: - a = k.eval(k.arange(start, stop, step)) - assert np.array_equal(a, np.arange(start, stop, step)) - a_list.append(a) - for i in range(len(a_list) - 1): - assert np.array_equal(a_list[i], a_list[i + 1]) - - for dtype in ('int32', 'int64', 'float32', 'float64'): - for k in WITH_NP: - t = k.arange(10, dtype=dtype) - assert k.dtype(t) == dtype - - start = K.constant(1, dtype='int32') - t = K.arange(start) - assert len(K.eval(t)) == 1 - - start = K.constant(-1, dtype='int32') - t = K.arange(start) - assert len(K.eval(t)) == 0 - - @pytest.mark.parametrize('training', [True, False]) - def test_in_train_phase(self, training): - check_two_tensor_operation('in_train_phase', (3, 3), (2, 2), WITH_NP, - training=training) - check_two_tensor_operation('in_train_phase', (2, 3), (2, 3), WITH_NP, - training=training) - - @pytest.mark.parametrize('training', [True, False]) - def test_in_test_phase(self, training): - check_two_tensor_operation('in_test_phase', (3, 3), (2, 2), WITH_NP, - training=training) - check_two_tensor_operation('in_test_phase', (2, 3), (2, 3), WITH_NP, - training=training) - - @pytest.mark.parametrize('dtype', ['', 'beerfloat', 123]) - def test_setfloatx_incorrect_values(self, dtype): - # Keep track of the old value - old_floatx = K.floatx() - with pytest.raises(ValueError): - K.set_floatx(dtype) - assert K.floatx() == old_floatx - - @pytest.mark.parametrize('dtype', ['float16', 'float32', 'float64']) - def test_setfloatx_correct_values(self, dtype): - # Keep track of the old value - old_floatx = K.floatx() - # Check correct values - K.set_floatx(dtype) - assert K.floatx() == dtype - # Make sure that changes to the global floatx are effectively - # taken into account by the backend. - check_dtype(K.variable([10]), dtype) - # Restore old value - K.set_floatx(old_floatx) - - @pytest.mark.parametrize('dtype', ['float16', 'float32', 'float64']) - def test_dtype(self, dtype): - assert K.dtype(K.variable(1, dtype=dtype)) == dtype - - @pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported') - def test_variable_support_bool_dtype(self): - assert K.dtype(K.variable(1, dtype='int16')) == 'int16' - assert K.dtype(K.variable(False, dtype='bool')) == 'bool' - with pytest.raises(TypeError): - K.variable('', dtype='unsupported') - - @pytest.mark.parametrize('shape', [(4, 2), (2, 3)]) - def test_clip_supports_tensor_arguments(self, shape): - # GitHub issue: 11435 - _, x = parse_shape_or_val(shape) - _, min_val = parse_shape_or_val(shape) - max_val = min_val + 1. - x_k = K.variable(x) - min_val_k = K.variable(min_val) - max_val_k = K.variable(max_val) - assert np.allclose(K.eval(K.clip(x_k, min_val_k, max_val_k)), - KNP.eval(KNP.clip(x, min_val, max_val))) - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='This test is for tensorflow parallelism.') - def test_tensorflow_session_parallelism_settings(self, monkeypatch): - for threads in [0, 1, 4]: - K.clear_session() - monkeypatch.setenv('OMP_NUM_THREADS', str(threads)) - cfg = K.get_session()._config - assert cfg.intra_op_parallelism_threads == threads - assert cfg.inter_op_parallelism_threads == threads - - -if __name__ == '__main__': - pytest.main([__file__]) -import tempfile - -import numpy as np -import pytest - -from keras.datasets import boston_housing -from keras.datasets import imdb -from keras.datasets import reuters - - -@pytest.fixture -def fake_downloaded_boston_path(monkeypatch): - num_rows = 100 - num_cols = 10 - rng = np.random.RandomState(123) - - x = rng.randint(1, 100, size=(num_rows, num_cols)) - y = rng.normal(loc=100, scale=15, size=num_rows) - - with tempfile.NamedTemporaryFile('wb', delete=True) as f: - np.savez(f, x=x, y=y) - monkeypatch.setattr(boston_housing, 'get_file', - lambda *args, **kwargs: f.name) - yield f.name - - -@pytest.fixture -def fake_downloaded_imdb_path(monkeypatch): - train_rows = 100 - test_rows = 20 - seq_length = 10 - rng = np.random.RandomState(123) - - x_train = rng.randint(1, 100, size=(train_rows, seq_length)) - y_train = rng.binomial(n=1, p=0.5, size=train_rows) - x_test = rng.randint(1, 100, size=(test_rows, seq_length)) - y_test = rng.binomial(n=1, p=0.5, size=test_rows) - - with tempfile.NamedTemporaryFile('wb', delete=True) as f: - np.savez(f, x_train=x_train, y_train=y_train, - x_test=x_test, y_test=y_test) - monkeypatch.setattr(imdb, 'get_file', lambda *args, **kwargs: f.name) - yield f.name - - -@pytest.fixture -def fake_downloaded_reuters_path(monkeypatch): - num_rows = 100 - seq_length = 10 - rng = np.random.RandomState(123) - - x = rng.randint(1, 100, size=(num_rows, seq_length)) - y = rng.binomial(n=1, p=0.5, size=num_rows) - - with tempfile.NamedTemporaryFile('wb', delete=True) as f: - np.savez(f, x=x, y=y) - monkeypatch.setattr(reuters, 'get_file', - lambda *args, **kwargs: f.name) - yield f.name - - -def test_boston_load_does_not_affect_global_rng(fake_downloaded_boston_path): - np.random.seed(1337) - before = np.random.randint(0, 100, size=10) - - np.random.seed(1337) - boston_housing.load_data(path=fake_downloaded_boston_path, seed=9876) - after = np.random.randint(0, 100, size=10) - - assert np.array_equal(before, after) - - -def test_imdb_load_does_not_affect_global_rng(fake_downloaded_imdb_path): - np.random.seed(1337) - before = np.random.randint(0, 100, size=10) - - np.random.seed(1337) - imdb.load_data(path=fake_downloaded_imdb_path, seed=9876) - after = np.random.randint(0, 100, size=10) - - assert np.array_equal(before, after) - - -def test_reuters_load_does_not_affect_global_rng(fake_downloaded_reuters_path): - np.random.seed(1337) - before = np.random.randint(0, 100, size=10) - - np.random.seed(1337) - reuters.load_data(path=fake_downloaded_reuters_path, seed=9876) - after = np.random.randint(0, 100, size=10) - - assert np.array_equal(before, after) -import pytest -import json -import numpy as np - -from keras.layers import Dense, Dropout, Conv2D, InputLayer -from keras import layers -from keras.engine import Input, Layer, saving, get_source_inputs -from keras.models import Model, Sequential -from keras import backend as K -from keras.models import model_from_json, model_from_yaml -from keras.initializers import Constant - - -skipif_no_tf_gpu = pytest.mark.skipif( - (K.backend() != 'tensorflow' or - not K.tensorflow_backend._get_available_gpus()), - reason='Requires TensorFlow backend and a GPU') - - -def test_get_updates_for(): - a = Input(shape=(2,)) - dense_layer = Dense(1) - dense_layer.add_update(0, inputs=a) - dense_layer.add_update(1, inputs=None) - - assert dense_layer.get_updates_for(a) == [0] - assert dense_layer.get_updates_for(None) == [1] - - -def test_get_losses_for(): - a = Input(shape=(2,)) - dense_layer = Dense(1) - dense_layer.add_loss(0, inputs=a) - dense_layer.add_loss(1, inputs=None) - - assert dense_layer.get_losses_for(a) == [0] - assert dense_layer.get_losses_for(None) == [1] - - -def test_trainable_weights(): - a = Input(shape=(2,)) - b = Dense(1)(a) - model = Model(a, b) - - weights = model.weights - assert model.trainable_weights == weights - assert model.non_trainable_weights == [] - - model.trainable = False - assert model.trainable_weights == [] - assert model.non_trainable_weights == weights - - model.trainable = True - assert model.trainable_weights == weights - assert model.non_trainable_weights == [] - - model.layers[1].trainable = False - assert model.trainable_weights == [] - assert model.non_trainable_weights == weights - - # sequential model - model = Sequential() - model.add(Dense(1, input_dim=2)) - weights = model.weights - - assert model.trainable_weights == weights - assert model.non_trainable_weights == [] - - model.trainable = False - assert model.trainable_weights == [] - assert model.non_trainable_weights == weights - - model.trainable = True - assert model.trainable_weights == weights - assert model.non_trainable_weights == [] - - model.layers[0].trainable = False - assert model.trainable_weights == [] - assert model.non_trainable_weights == weights - - -def test_valid_compute_mask(): - model = Sequential() - model.add(Dense(1, input_dim=2)) - assert model.layers[0].supports_masking is True - assert model.layers[0].compute_mask([model.input], [0., 1.]) == [0., 1.] - - -def test_invalid_compute_mask(): - model = Sequential() - model.add(Conv2D(1, [2, 2], input_shape=[3, 3, 1])) - assert model.layers[0].supports_masking is False - assert model.layers[0].compute_mask([model.input], [None]) is None - - mask = np.array([[0., 1.], [1., 0.]]) - with pytest.raises(TypeError): - model.layers[0].compute_mask([model.input], [mask]) - with pytest.raises(TypeError): - model.layers[0].compute_mask([model.input], mask) - - -def test_get_layer(): - model = Sequential() - model.add(Dense(1, input_dim=2)) - with pytest.raises(ValueError): - model.get_layer(index=5) - with pytest.raises(ValueError): - model.get_layer(index=None) - with pytest.raises(ValueError): - model.get_layer(name='conv') - - -def test_learning_phase(): - a = Input(shape=(32,), name='input_a') - b = Input(shape=(32,), name='input_b') - - a_2 = Dense(16, name='dense_1')(a) - dp = Dropout(0.5, name='dropout') - b_2 = dp(b) - - assert not a_2._uses_learning_phase - assert b_2._uses_learning_phase - - # test merge - m = layers.concatenate([a_2, b_2]) - assert m._uses_learning_phase - - # Test recursion - model = Model([a, b], [a_2, b_2]) - print(model.input_spec) - assert model.uses_learning_phase - - c = Input(shape=(32,), name='input_c') - d = Input(shape=(32,), name='input_d') - - c_2, b_2 = model([c, d]) - assert c_2._uses_learning_phase - assert b_2._uses_learning_phase - - # try actually running graph - fn = K.function(model.inputs + [K.learning_phase()], model.outputs) - input_a_np = np.random.random((10, 32)) - input_b_np = np.random.random((10, 32)) - fn_outputs_no_dp = fn([input_a_np, input_b_np, 0]) - fn_outputs_dp = fn([input_a_np, input_b_np, 1]) - # output a: nothing changes - assert fn_outputs_no_dp[0].sum() == fn_outputs_dp[0].sum() - # output b: dropout applied - assert fn_outputs_no_dp[1].sum() != fn_outputs_dp[1].sum() - - -def test_layer_call_arguments(): - # Test the ability to pass and serialize arguments to `call`. - inp = layers.Input(shape=(2,)) - x = layers.Dense(3)(inp) - x = layers.Dropout(0.5)(x, training=True) - model = Model(inp, x) - assert not model.uses_learning_phase - - # Test that argument is kept when applying the model - inp2 = layers.Input(shape=(2,)) - out2 = model(inp2) - assert not out2._uses_learning_phase - - # Test that argument is kept after loading a model - config = model.get_config() - model = Model.from_config(config) - assert not model.uses_learning_phase - - -def test_node_construction(): - #################################################### - # test basics - - a = Input(shape=(32,), name='input_a') - b = Input(shape=(32,), name='input_b') - - assert a._keras_shape == (None, 32) - a_layer, a_node_index, a_tensor_index = a._keras_history - b_layer, b_node_index, b_tensor_index = b._keras_history - assert len(a_layer._inbound_nodes) == 1 - assert a_tensor_index is 0 - node = a_layer._inbound_nodes[a_node_index] - assert node.outbound_layer == a_layer - - assert isinstance(node.inbound_layers, list) - assert node.inbound_layers == [] - assert isinstance(node.input_tensors, list) - assert node.input_tensors == [a] - assert isinstance(node.input_masks, list) - assert node.input_masks == [None] - assert isinstance(node.input_shapes, list) - assert node.input_shapes == [(None, 32)] - - assert isinstance(node.output_tensors, list) - assert node.output_tensors == [a] - assert isinstance(node.output_shapes, list) - assert node.output_shapes == [(None, 32)] - assert isinstance(node.output_masks, list) - assert node.output_masks == [None] - - dense = Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - - assert len(dense._inbound_nodes) == 2 - assert len(dense._outbound_nodes) == 0 - assert dense._inbound_nodes[0].inbound_layers == [a_layer] - assert dense._inbound_nodes[0].outbound_layer == dense - assert dense._inbound_nodes[1].inbound_layers == [b_layer] - assert dense._inbound_nodes[1].outbound_layer == dense - - assert dense._inbound_nodes[0].input_tensors == [a] - assert dense._inbound_nodes[1].input_tensors == [b] - - assert dense._inbound_nodes[0].get_config()['inbound_layers'] == [ - 'input_a'] - assert dense._inbound_nodes[1].get_config()['inbound_layers'] == [ - 'input_b'] - - # test layer properties - test_layer = Dense(16, name='test_layer') - a_test = test_layer(a) - assert K.int_shape(test_layer.kernel) == (32, 16) - assert test_layer.input == a - assert test_layer.output == a_test - assert test_layer.input_mask is None - assert test_layer.output_mask is None - assert test_layer.input_shape == (None, 32) - assert test_layer.output_shape == (None, 16) - - with pytest.raises(AttributeError): - dense.input - with pytest.raises(AttributeError): - dense.output - with pytest.raises(AttributeError): - dense.input_mask - with pytest.raises(AttributeError): - dense.output_mask - - assert dense.get_input_at(0) == a - assert dense.get_input_at(1) == b - assert dense.get_output_at(0) == a_2 - assert dense.get_output_at(1) == b_2 - assert dense.get_input_shape_at(0) == (None, 32) - assert dense.get_input_shape_at(1) == (None, 32) - assert dense.get_output_shape_at(0) == (None, 16) - assert dense.get_output_shape_at(1) == (None, 16) - assert dense.get_input_mask_at(0) is None - assert dense.get_input_mask_at(1) is None - assert dense.get_output_mask_at(0) is None - assert dense.get_output_mask_at(1) is None - - -def test_multi_input_layer(): - #################################################### - # test multi-input layer - a = Input(shape=(32,), name='input_a') - b = Input(shape=(32,), name='input_b') - - dense = Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - - merged = layers.concatenate([a_2, b_2], name='merge') - assert merged._keras_shape == (None, 16 * 2) - merge_layer, merge_node_index, merge_tensor_index = merged._keras_history - - assert merge_node_index == 0 - assert merge_tensor_index == 0 - - assert len(merge_layer._inbound_nodes) == 1 - assert len(merge_layer._outbound_nodes) == 0 - - assert len(merge_layer._inbound_nodes[0].input_tensors) == 2 - assert len(merge_layer._inbound_nodes[0].inbound_layers) == 2 - - c = Dense(64, name='dense_2')(merged) - d = Dense(5, name='dense_3')(c) - - model = Model(inputs=[a, b], outputs=[c, d], name='model') - assert len(model.layers) == 6 - expected_shapes = [(None, 64), (None, 5)] - assert model.compute_output_shape( - [(None, 32), (None, 32)]) == expected_shapes - assert model.compute_mask([a, b], [None, None]) == [None, None] - assert model.compute_output_shape( - [(None, 32), (None, 32)]) == expected_shapes - - # we don't check names of first 2 layers (inputs) because - # ordering of same-level layers is not fixed - expected_names = ['dense_1', 'merge', 'dense_2', 'dense_3'] - assert [l.name for l in model.layers][2:] == expected_names - assert [l.name for l in model._input_layers] == ['input_a', 'input_b'] - assert [l.name for l in model._output_layers] == ['dense_2', 'dense_3'] - - # actually run model - fn = K.function(model.inputs, model.outputs) - input_a_np = np.random.random((10, 32)) - input_b_np = np.random.random((10, 32)) - fn_outputs = fn([input_a_np, input_b_np]) - assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)] - - # test get_source_inputs - assert get_source_inputs(c) == [a, b] - - # serialization / deserialization - json_config = model.to_json() - recreated_model = model_from_json(json_config) - recreated_model.compile('rmsprop', 'mse') - - assert [l.name for l in recreated_model.layers][2:] == expected_names - assert [l.name for l in recreated_model._input_layers] == [ - 'input_a', 'input_b'] - assert [l.name for l in recreated_model._output_layers] == [ - 'dense_2', 'dense_3'] - - fn = K.function(recreated_model.inputs, recreated_model.outputs) - input_a_np = np.random.random((10, 32)) - input_b_np = np.random.random((10, 32)) - fn_outputs = fn([input_a_np, input_b_np]) - assert [x.shape for x in fn_outputs] == [(10, 64), (10, 5)] - - -def test_recursion(): - #################################################### - # test recursion - - a = Input(shape=(32,), name='input_a') - b = Input(shape=(32,), name='input_b') - - dense = Dense(16, name='dense_1') - a_2 = dense(a) - b_2 = dense(b) - merged = layers.concatenate([a_2, b_2], name='merge') - c = Dense(64, name='dense_2')(merged) - d = Dense(5, name='dense_3')(c) - - model = Model(inputs=[a, b], outputs=[c, d], name='model') - - e = Input(shape=(32,), name='input_e') - f = Input(shape=(32,), name='input_f') - g, h = model([e, f]) - - # g2, h2 = model([e, f]) - - assert g._keras_shape == c._keras_shape - assert h._keras_shape == d._keras_shape - - # test separate manipulation of different layer outputs - i = Dense(7, name='dense_4')(h) - - final_model = Model(inputs=[e, f], outputs=[i, g], name='final') - assert len(final_model.inputs) == 2 - assert len(final_model.outputs) == 2 - assert len(final_model.layers) == 4 - - # we don't check names of first 2 layers (inputs) because - # ordering of same-level layers is not fixed - expected_shapes = [(10, 7), (10, 64)] - assert [layer.name for layer in final_model.layers][2:] == [ - 'model', 'dense_4'] - assert model.compute_mask([e, f], [None, None]) == [None, None] - assert final_model.compute_output_shape( - [(10, 32), (10, 32)]) == expected_shapes - - # run recursive model - fn = K.function(final_model.inputs, final_model.outputs) - input_a_np = np.random.random((10, 32)) - input_b_np = np.random.random((10, 32)) - fn_outputs = fn([input_a_np, input_b_np]) - assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] - - # test serialization - model_config = final_model.get_config() - print(json.dumps(model_config, indent=4)) - recreated_model = Model.from_config(model_config) - - fn = K.function(recreated_model.inputs, recreated_model.outputs) - input_a_np = np.random.random((10, 32)) - input_b_np = np.random.random((10, 32)) - fn_outputs = fn([input_a_np, input_b_np]) - assert [x.shape for x in fn_outputs] == [(10, 7), (10, 64)] - - #################################################### - # test multi-input multi-output - - j = Input(shape=(32,), name='input_j') - k = Input(shape=(32,), name='input_k') - m, n = model([j, k]) - - o = Input(shape=(32,), name='input_o') - p = Input(shape=(32,), name='input_p') - q, r = model([o, p]) - - assert n._keras_shape == (None, 5) - assert q._keras_shape == (None, 64) - s = layers.concatenate([n, q], name='merge_nq') - assert s._keras_shape == (None, 64 + 5) - - # test with single output as 1-elem list - multi_io_model = Model([j, k, o, p], [s]) - - fn = K.function(multi_io_model.inputs, multi_io_model.outputs) - fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)), - np.random.random((10, 32)), np.random.random((10, 32))]) - assert [x.shape for x in fn_outputs] == [(10, 69)] - - # test with single output as tensor - multi_io_model = Model([j, k, o, p], s) - - fn = K.function(multi_io_model.inputs, multi_io_model.outputs) - fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)), - np.random.random((10, 32)), np.random.random((10, 32))]) - # note that the output of the K.function will still be a 1-elem list - assert [x.shape for x in fn_outputs] == [(10, 69)] - - # test serialization - model_config = multi_io_model.get_config() - recreated_model = Model.from_config(model_config) - - fn = K.function(recreated_model.inputs, recreated_model.outputs) - fn_outputs = fn([np.random.random((10, 32)), np.random.random((10, 32)), - np.random.random((10, 32)), np.random.random((10, 32))]) - # note that the output of the K.function will still be a 1-elem list - assert [x.shape for x in fn_outputs] == [(10, 69)] - - config = model.get_config() - Model.from_config(config) - - model.summary() - json_str = model.to_json() - model_from_json(json_str) - - yaml_str = model.to_yaml() - model_from_yaml(yaml_str) - - #################################################### - # test invalid graphs - - # input is not an Input tensor - j = Input(shape=(32,), name='input_j') - j = Dense(32)(j) - k = Input(shape=(32,), name='input_k') - m, n = model([j, k]) - - with pytest.raises(ValueError): - Model([j, k], [m, n]) - - # disconnected graph - j = Input(shape=(32,), name='input_j') - k = Input(shape=(32,), name='input_k') - m, n = model([j, k]) - with pytest.raises(ValueError): - Model([j], [m, n]) - - # redundant outputs - j = Input(shape=(32,), name='input_j') - k = Input(shape=(32,), name='input_k') - m, n = model([j, k]) - # this should work with a warning - Model([j, k], [m, n, n]) - - # redundant inputs - j = Input(shape=(32,), name='input_j') - k = Input(shape=(32,), name='input_k') - m, n = model([j, k]) - with pytest.raises(ValueError): - Model([j, k, j], [m, n]) - - # i have not idea what I'm doing: garbage as inputs/outputs - j = Input(shape=(32,), name='input_j') - k = Input(shape=(32,), name='input_k') - m, n = model([j, k]) - with pytest.raises(ValueError): - Model([j, k], [m, n, 0]) - - #################################################### - # test calling layers/models on TF tensors - - if K.backend() == 'tensorflow': - import tensorflow as tf - j = Input(shape=(32,), name='input_j') - k = Input(shape=(32,), name='input_k') - m, n = model([j, k]) - tf_model = Model([j, k], [m, n]) - - j_tf = tf.placeholder(dtype=K.floatx()) - k_tf = tf.placeholder(dtype=K.floatx()) - m_tf, n_tf = tf_model([j_tf, k_tf]) - assert m_tf.get_shape().as_list() == [None, 64] - assert n_tf.get_shape().as_list() == [None, 5] - - # test merge - layers.concatenate([j_tf, k_tf], axis=1) - layers.add([j_tf, k_tf]) - - # test tensor input - x = tf.placeholder(shape=(None, 2), dtype=K.floatx()) - InputLayer(input_tensor=x) - - x = Input(tensor=x) - Dense(2)(x) - - -def test_load_layers(): - from keras.layers import ConvLSTM2D, TimeDistributed - from keras.layers import Bidirectional, Conv2D, Input - from keras.models import Model - - if K.backend() == 'tensorflow' or K.backend() == 'cntk': - inputs = Input(shape=(10, 20, 20, 1)) - else: - inputs = Input(shape=(10, 1, 20, 20)) - td_conv = TimeDistributed(Conv2D(15, (5, 5)))(inputs) - bi_conv = Bidirectional(ConvLSTM2D(10, (3, 3)), - merge_mode='concat')(td_conv) - model = Model(inputs=inputs, outputs=bi_conv) - - weight_value_tuples = [] - - # TimeDistributed Conv2D layer - # use 'channels_first' data format to check that - # the function is being called correctly for Conv2D - # old: (filters, stack_size, kernel_rows, kernel_cols) - # new: (kernel_rows, kernel_cols, stack_size, filters) - weight_tensor_td_conv_old = list() - weight_tensor_td_conv_old.append(np.zeros((15, 1, 5, 5))) - weight_tensor_td_conv_old.append(np.zeros((15,))) - td_conv_layer = model.layers[1] - td_conv_layer.layer.data_format = 'channels_first' - weight_tensor_td_conv_new = saving.preprocess_weights_for_loading( - td_conv_layer, - weight_tensor_td_conv_old, - original_keras_version='1') - symbolic_weights = td_conv_layer.weights - assert (len(symbolic_weights) == len(weight_tensor_td_conv_new)) - weight_value_tuples += zip(symbolic_weights, weight_tensor_td_conv_new) - - # Bidirectional ConvLSTM2D layer - # old ConvLSTM2D took a list of 12 weight tensors, - # returns a list of 3 concatenated larger tensors. - weights_bi_conv_old = [] - for j in range(2): # bidirectional - for i in range(4): - weights_bi_conv_old.append(np.zeros((3, 3, 15, 10))) # kernel - weights_bi_conv_old.append( - np.zeros((3, 3, 10, 10))) # recurrent kernel - weights_bi_conv_old.append(np.zeros((10,))) # bias - - bi_convlstm_layer = model.layers[2] - weights_bi_conv_new = saving.preprocess_weights_for_loading( - bi_convlstm_layer, - weights_bi_conv_old, - original_keras_version='1') - - symbolic_weights = bi_convlstm_layer.weights - assert (len(symbolic_weights) == len(weights_bi_conv_new)) - weight_value_tuples += zip(symbolic_weights, weights_bi_conv_new) - - K.batch_set_value(weight_value_tuples) - - assert np.all(K.eval(model.layers[1].weights[0]) - == weight_tensor_td_conv_new[0]) - assert np.all(K.eval(model.layers[1].weights[1]) - == weight_tensor_td_conv_new[1]) - assert np.all(K.eval(model.layers[2].weights[0]) == weights_bi_conv_new[0]) - assert np.all(K.eval(model.layers[2].weights[1]) == weights_bi_conv_new[1]) - assert np.all(K.eval(model.layers[2].weights[2]) == weights_bi_conv_new[2]) - assert np.all(K.eval(model.layers[2].weights[3]) == weights_bi_conv_new[3]) - assert np.all(K.eval(model.layers[2].weights[4]) == weights_bi_conv_new[4]) - assert np.all(K.eval(model.layers[2].weights[5]) == weights_bi_conv_new[5]) - - -def convert_weights(layer, weights): - if layer.__class__.__name__ == 'GRU': - W = [np.split(w, 3, axis=-1) for w in weights] - return sum(map(list, zip(*W)), []) - elif layer.__class__.__name__ in ('LSTM', 'ConvLSTM2D'): - W = [np.split(w, 4, axis=-1) for w in weights] - for w in W: - w[2], w[1] = w[1], w[2] - return sum(map(list, zip(*W)), []) - elif layer.__class__.__name__ == 'Conv2DTranspose': - return [np.transpose(weights[0], (2, 3, 0, 1)), weights[1]] - return weights - - -@pytest.mark.parametrize("layer", [ - layers.GRU(2, input_shape=[3, 5]), - layers.LSTM(2, input_shape=[3, 5]), - layers.ConvLSTM2D(5, (3, 3), - input_shape=[6, 6, 6, 6], - data_format='channels_first'), -], ids=['GRU', 'LSTM', 'ConvLSTM2D']) -def test_preprocess_weights_for_loading(layer): - # A model is needed to initialize weights. - _ = Sequential([layer]) - weights1 = layer.get_weights() - weights2 = saving.preprocess_weights_for_loading( - layer, convert_weights(layer, weights1), - original_keras_version='1') - assert all([np.allclose(x, y, 1e-5) - for (x, y) in zip(weights1, weights2)]) - - -@pytest.mark.parametrize("layer", [ - layers.Conv2D(2, (3, 3), input_shape=[5, 5, 3]), - layers.Conv2DTranspose(2, (5, 5), - input_shape=[7, 7, 3], - data_format='channels_first'), -], ids=['Conv2D', 'Conv2DTranspose']) -def test_preprocess_weights_for_loading_for_model(layer): - model = Sequential([layer]) - weights1 = model.get_weights() - weights2 = saving.preprocess_weights_for_loading( - model, convert_weights(layer, weights1), - original_keras_version='1') - assert all([np.allclose(x, y, 1e-5) - for (x, y) in zip(weights1, weights2)]) - - -@pytest.mark.parametrize('layer_class,args', [ - (layers.GRU, {'units': 2, 'input_shape': [3, 5]}), - (layers.GRU, {'units': 2, 'input_shape': [3, 5], 'reset_after': True}), - (layers.LSTM, {'units': 2, 'input_shape': [3, 5]}), -]) -def test_preprocess_weights_for_loading_rnn_should_be_idempotent(layer_class, args): - """ - Loading weights from a RNN class to itself should not convert the weights. - """ - # layer can be instantiated only for supported backends - layer = layer_class(**args) - # A model is needed to initialize weights. - _ = Sequential([layer]) - weights1 = layer.get_weights() - weights2 = saving.preprocess_weights_for_loading(layer, weights1) - assert all([np.allclose(x, y, 1e-5) for (x, y) in zip(weights1, weights2)]) - - -@pytest.mark.parametrize('layer_class,args', [ - (layers.CuDNNGRU, {'units': 2, 'input_shape': [3, 5]}), - (layers.CuDNNLSTM, {'units': 2, 'input_shape': [3, 5]}), -]) -@skipif_no_tf_gpu -def test_preprocess_weights_for_loading_cudnn_rnn_should_be_idempotent(layer_class, - args): - test_preprocess_weights_for_loading_rnn_should_be_idempotent( - layer_class, args) - - -def test_recursion_with_bn_and_loss(): - model1 = Sequential([ - layers.Dense(5, input_dim=5, activity_regularizer='l1'), - layers.BatchNormalization(), - layers.Dense(5), - ]) - - print('NEW MODEL') - inputs = layers.Input(shape=(5,)) - outputs = model1(inputs) - model2 = Model(inputs=inputs, outputs=outputs) - - assert len(model1.updates) == 2 - assert len(model2.updates) == 2 - assert len(model1.losses) == 1 - assert len(model2.losses) == 1, model2.layers[1]._per_input_losses - - model1.compile(optimizer='sgd', loss='categorical_crossentropy') - model2.compile(optimizer='sgd', loss='categorical_crossentropy') - - x = np.ones((3, 5)) - y = np.ones((3, 5)) - model1.fit(x, y, verbose=0, epochs=1) - model2.fit(x, y, verbose=0, epochs=1) - - -def test_activity_regularization_with_model_composition(): - - def reg(x): - return K.sum(x) - - net_a_input = Input((2,)) - net_a = net_a_input - net_a = Dense(2, kernel_initializer='ones', - use_bias=False, - activity_regularizer=reg)(net_a) - model_a = Model([net_a_input], [net_a]) - - net_b_input = Input((2,)) - net_b = model_a(net_b_input) - model_b = Model([net_b_input], [net_b]) - - model_b.compile(optimizer='sgd', loss=None) - x = np.ones((1, 2)) - loss = model_b.evaluate(x) - assert loss == 4 - - -def test_shared_layer_depth_is_correct(): - # Basic outline here: we have a shared embedding layer, and two inputs that - # go through different depths of computation in the graph before - # the final output. We need the computed depth of the input layers to be - # the same, because they both pass through the embedding layer before anything - # else happens. That's what we're testing. - from keras.layers import Embedding, Input, Dense, Concatenate - from keras.models import Model - input1 = Input(shape=(10,), name='input1') - input2 = Input(shape=(10,), name='input2') - embedding_layer = Embedding(name='embedding', input_dim=5, output_dim=10) - embedded_input1 = embedding_layer(input1) - embedded_input2 = embedding_layer(input2) - transformed_input2 = Dense(6)(Dense(5)(Dense(3)(embedded_input2))) - final_output = Dense(2)(Concatenate()( - [embedded_input1, transformed_input2])) - model = Model(inputs=[input1, input2], outputs=final_output) - input1_depth = -1 - input2_depth = -1 - for depth, layers in model._layers_by_depth.items(): - for layer in layers: - if layer.name == 'input1': - input1_depth = depth - if layer.name == 'input2': - input2_depth = depth - assert input1_depth != -1 - assert input1_depth == input2_depth - - -def test_layer_sharing_at_heterogeneous_depth(): - x_val = np.random.random((10, 5)) - - x = Input(shape=(5,)) - A = Dense(5, name='A') - B = Dense(5, name='B') - output = A(B(A(B(x)))) - M = Model(x, output) - - output_val = M.predict(x_val) - - config = M.get_config() - weights = M.get_weights() - - M2 = Model.from_config(config) - M2.set_weights(weights) - - output_val_2 = M2.predict(x_val) - np.testing.assert_allclose(output_val, output_val_2, atol=1e-6) - - -def test_layer_sharing_at_heterogeneous_depth_with_concat(): - input_shape = (16, 9, 3) - input_layer = Input(shape=input_shape) - - A = Dense(3, name='dense_A') - B = Dense(3, name='dense_B') - C = Dense(3, name='dense_C') - - x1 = B(A(input_layer)) - x2 = A(C(input_layer)) - output = layers.concatenate([x1, x2]) - - M = Model(inputs=input_layer, outputs=output) - - x_val = np.random.random((10, 16, 9, 3)) - output_val = M.predict(x_val) - - config = M.get_config() - weights = M.get_weights() - - M2 = Model.from_config(config) - M2.set_weights(weights) - - output_val_2 = M2.predict(x_val) - np.testing.assert_allclose(output_val, output_val_2, atol=1e-6) - - -def test_layer_sharing_at_heterogeneous_depth_order(): - # This tests for the bug in this issue - # https://github.com/keras-team/keras/issues/11159 - # It occurs with layer sharing at heterogeneous depth when - # the layers need to be applied in an order that differs from - # the order that occurs in the config. - - input_shape = (1, 12) - input_layer = Input(shape=input_shape) - - A = Dense(12, name='layer_a') - r1 = layers.Reshape((12,))(input_layer) - Aout1 = A(r1) - - r2 = layers.Reshape((12,))(A(input_layer)) - Aout2 = A(r2) - - # Note: if the order of the layers in the concat is - # changed to ([Aout1, Aout2]) the bug doesn't trigger - c1 = layers.concatenate([Aout2, Aout1]) - output = Dense(2, name='layer_b')(c1) - - M = Model(inputs=input_layer, outputs=output) - - x_val = np.random.random((10,) + input_shape) - output_val = M.predict(x_val) - - config = M.get_config() - weights = M.get_weights() - - M2 = Model.from_config(config) - M2.set_weights(weights) - - output_val_2 = M2.predict(x_val) - np.testing.assert_allclose(output_val, output_val_2, atol=1e-6) - - -def test_multi_output_mask(): - """Fixes #7589""" - class TestMultiOutputLayer(Layer): - def __init__(self, **kwargs): - super(TestMultiOutputLayer, self).__init__(**kwargs) - - def call(self, inputs, **kwargs): - return [K.abs(inputs), K.abs(inputs)] - - def compute_output_shape(self, input_shape): - out_shape = super(TestMultiOutputLayer, self).compute_output_shape( - input_shape) - return [out_shape, out_shape] - - class TestMultiInputLayer(Layer): - def __init__(self, **kwargs): - super(TestMultiInputLayer, self).__init__(**kwargs) - - def call(self, inputs, **kwargs): - negative, positive = inputs - return negative + positive - - input_layer = Input(shape=(16, 16, 3)) - x, y = TestMultiOutputLayer()(input_layer) - z = TestMultiInputLayer()([x, y]) - _ = Model(inputs=input_layer, outputs=z) - assert K.int_shape(z)[1:] == (16, 16, 3) - - -def test_constant_initializer_with_numpy(): - model = Sequential() - model.add(Dense(2, input_shape=(3,), - kernel_initializer=Constant(1.))) - model.add(Dense(3)) - model.compile(loss='mse', optimizer='sgd', metrics=['acc']) - - json_str = model.to_json() - model_from_json(json_str).summary() - - yaml_str = model.to_yaml() - model_from_yaml(yaml_str).summary() - - -if __name__ == '__main__': - pytest.main([__file__]) -import threading - -import pytest -import numpy as np -import pandas as pd -from numpy.testing import assert_allclose -import sys -import scipy.sparse as sparse -from flaky import flaky - -import keras -from keras import losses -from keras.layers import Activation, Dense, Dropout, Conv2D, Concatenate -from keras.engine import Input -from keras.engine.training import Model -from keras.engine import training_utils -from keras.utils.generic_utils import slice_arrays -from keras.models import Sequential -from keras import backend as K -from keras.utils import Sequence -from keras.callbacks import LambdaCallback -from keras.callbacks import Callback - - -class RandomSequence(Sequence): - def __init__(self, batch_size, sequence_length=12): - self.batch_size = batch_size - self.sequence_length = sequence_length - self.logs = [] # It will work for use_multiprocessing=False - - def __len__(self): - return self.sequence_length - - def __getitem__(self, idx): - self.logs.append(idx) - return ([np.random.random((self.batch_size, 3)), - np.random.random((self.batch_size, 3))], - [np.random.random((self.batch_size, 4)), - np.random.random((self.batch_size, 3))]) - - def on_epoch_end(self): - pass - - -class threadsafe_iter: - """Takes an iterator/generator and makes it thread-safe by - serializing call to the `next` method of given iterator/generator. - """ - - def __init__(self, it): - self.it = it - self.lock = threading.Lock() - - def __iter__(self): - return self - - def __next__(self): - return self.next() - - def next(self): - with self.lock: - return next(self.it) - - -def threadsafe_generator(f): - """A decorator that takes a generator function and makes it thread-safe. - """ - - def g(*a, **kw): - return threadsafe_iter(f(*a, **kw)) - - return g - - -def test_check_array_length_consistency(): - training_utils.check_array_length_consistency(None, None, None) - a_np = np.random.random((4, 3, 3)) - training_utils.check_array_length_consistency(a_np, a_np, a_np) - training_utils.check_array_length_consistency( - [a_np, a_np], [a_np, a_np], [a_np, a_np]) - training_utils.check_array_length_consistency([None], [None], [None]) - - b_np = np.random.random((3, 4)) - with pytest.raises(ValueError): - training_utils.check_array_length_consistency(a_np, None, None) - with pytest.raises(ValueError): - training_utils.check_array_length_consistency(a_np, a_np, None) - with pytest.raises(ValueError): - training_utils.check_array_length_consistency([a_np], [None], None) - with pytest.raises(ValueError): - training_utils.check_array_length_consistency([a_np], [b_np], None) - with pytest.raises(ValueError): - training_utils.check_array_length_consistency([a_np], None, [b_np]) - - -def testslice_arrays(): - input_a = np.random.random((10, 3)) - slice_arrays(None) - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = [None, [1, 1], None, [1, 1]] - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = [None] - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - input_a = None - slice_arrays(input_a, 0) - slice_arrays(input_a, 0, 1) - slice_arrays(input_a, stop=2) - - -def test_weighted_masked_objective(): - a = Input(shape=(3,), name='input_a') - - # weighted_masked_objective - def mask_dummy(y_true=None, y_pred=None, weight=None): - return K.placeholder(y_true.shape) - - weighted_function = training_utils.weighted_masked_objective( - losses.categorical_crossentropy) - weighted_function(a, a, None) - - -def get_model(num_outputs=1): - a = Input(shape=(3,), name='input_a') - b = Input(shape=(3,), name='input_b') - - a_2 = Dense(4, name='dense_1')(a) - dp = Dropout(0.5, name='dropout') - b_2 = dp(b) - - if num_outputs == 1: - model = Model([a, b], a_2) - else: - model = Model([a, b], [a_2, b_2]) - return model - - -class TrackerCallback(Callback): - - def __init__(self): - # test starting from non-zero initial epoch - self.trained_epochs = [] - self.trained_batches = [] - super(TrackerCallback, self).__init__() - - # define tracer callback - def on_epoch_begin(self, epoch, logs): - self.trained_epochs.append(epoch) - - def on_batch_begin(self, batch, logs): - self.trained_batches.append(batch) - - -# TODO: resolve flakyness issue. Tracked with #11560 -@flaky(rerun_filter=lambda err, *args: issubclass(err[0], AssertionError)) -def test_model_methods(): - model = get_model(num_outputs=2) - - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 3)) - - # training/testing doesn't work before compiling. - with pytest.raises(RuntimeError): - model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - - model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, - sample_weight_mode=None) - - # test train_on_batch - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, - [output_a_np, output_b_np]) - out = model.train_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, - {'dense_1': output_a_np, 'dropout': output_b_np}) - - # test fit - out = model.fit([input_a_np, input_b_np], - [output_a_np, output_b_np], epochs=1, batch_size=4) - out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, - [output_a_np, output_b_np], epochs=1, batch_size=4) - out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, - {'dense_1': output_a_np, 'dropout': output_b_np}, - epochs=1, batch_size=4) - - # test validation_split - out = model.fit([input_a_np, input_b_np], - [output_a_np, output_b_np], - epochs=1, batch_size=4, validation_split=0.5) - out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, - [output_a_np, output_b_np], - epochs=1, batch_size=4, validation_split=0.5) - - # test validation data - out = model.fit([input_a_np, input_b_np], - [output_a_np, output_b_np], - epochs=1, batch_size=4, - validation_data=([input_a_np, input_b_np], - [output_a_np, output_b_np])) - out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, - [output_a_np, output_b_np], - epochs=1, batch_size=4, validation_split=0.5, - validation_data=({'input_a': input_a_np, - 'input_b': input_b_np}, - [output_a_np, output_b_np])) - out = model.fit({'input_a': input_a_np, 'input_b': input_b_np}, - {'dense_1': output_a_np, 'dropout': output_b_np}, - epochs=1, batch_size=4, validation_split=0.5, - validation_data=( - {'input_a': input_a_np, 'input_b': input_b_np}, - {'dense_1': output_a_np, 'dropout': output_b_np})) - - # test_on_batch - out = model.test_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, - [output_a_np, output_b_np]) - out = model.test_on_batch({'input_a': input_a_np, 'input_b': input_b_np}, - {'dense_1': output_a_np, 'dropout': output_b_np}) - - # predict_on_batch - out = model.predict_on_batch([input_a_np, input_b_np]) - out = model.predict_on_batch({'input_a': input_a_np, - 'input_b': input_b_np}) - - # predict, evaluate - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 3)) - - out = model.evaluate([input_a_np, input_b_np], - [output_a_np, output_b_np], - batch_size=4) - out = model.predict([input_a_np, input_b_np], batch_size=4) - - # with sample_weight - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 3)) - - sample_weight = [None, np.random.random((10,))] - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np], - sample_weight=sample_weight) - - out = model.test_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np], - sample_weight=sample_weight) - - # test accuracy metric - model.compile(optimizer, loss, metrics=['acc'], - sample_weight_mode=None) - - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - assert len(out) == 5 - out = model.test_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - assert len(out) == 5 - - # this should also work - model.compile(optimizer, loss, metrics={'dense_1': 'acc'}, - sample_weight_mode=None) - - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - assert len(out) == 4 - out = model.test_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - assert len(out) == 4 - - # and this as well - model.compile(optimizer, loss, metrics={'dense_1': ['acc']}, - sample_weight_mode=None) - - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - assert len(out) == 4 - out = model.test_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - assert len(out) == 4 - - tracker_cb = TrackerCallback() - - out = model.fit([input_a_np, input_b_np], - [output_a_np, output_b_np], epochs=5, batch_size=4, - initial_epoch=2, callbacks=[tracker_cb]) - assert tracker_cb.trained_epochs == [2, 3, 4] - - # test starting from non-zero initial epoch for generator too - tracker_cb = TrackerCallback() - - @threadsafe_generator - def gen_data(batch_sz): - while True: - yield ([np.random.random((batch_sz, 3)), - np.random.random((batch_sz, 3))], - [np.random.random((batch_sz, 4)), - np.random.random((batch_sz, 3))]) - - out = model.fit_generator(gen_data(4), steps_per_epoch=3, epochs=5, - initial_epoch=2, callbacks=[tracker_cb]) - assert tracker_cb.trained_epochs == [2, 3, 4] - - # test with a custom metric function - def mse(y_true, y_pred): - return K.mean(K.pow(y_true - y_pred, 2)) - - model.compile(optimizer, loss, metrics=[mse], - sample_weight_mode=None) - - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - out_len = 1 + 2 * (1 + 1) # total loss + 2 outputs * (loss + metric) - assert len(out) == out_len - out = model.test_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - assert len(out) == out_len - - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 3)) - - out = model.fit([input_a_np, input_b_np], - [output_a_np, output_b_np], - batch_size=4, epochs=1) - out = model.evaluate([input_a_np, input_b_np], - [output_a_np, output_b_np], - batch_size=4) - out = model.predict([input_a_np, input_b_np], batch_size=4) - - # enable verbose for evaluate_generator - out = model.evaluate_generator(gen_data(4), steps=3, verbose=1) - # pass generator directly so `is_generator_or_sequence` - # doesn't get confused. - out = model.evaluate(gen_data(4).it, steps=3, verbose=1) - - # empty batch - with pytest.raises(ValueError): - @threadsafe_generator - def gen_data(): - while True: - yield (np.asarray([]), np.asarray([])) - - out = model.evaluate_generator(gen_data(), steps=1) - with pytest.raises(ValueError): - @threadsafe_generator - def gen_data(): - while True: - yield (np.asarray([]), np.asarray([])) - - out = model.evaluate(gen_data().it, steps=1) - - # x is not a list of numpy arrays. - with pytest.raises(ValueError): - out = model.predict([None]) - - # x does not match _feed_input_names. - with pytest.raises(ValueError): - out = model.predict([input_a_np, None, input_b_np]) - with pytest.raises(ValueError): - out = model.predict([None, input_a_np, input_b_np]) - - # all input/output/weight arrays should have the same number of samples. - with pytest.raises(ValueError): - out = model.train_on_batch([input_a_np, input_b_np[:2]], - [output_a_np, output_b_np], - sample_weight=sample_weight) - with pytest.raises(ValueError): - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np[:2]], - sample_weight=sample_weight) - with pytest.raises(ValueError): - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np], - sample_weight=[sample_weight[1], - sample_weight[1][:2]]) - - # `sample_weight` is neither a dict nor a list. - with pytest.raises(TypeError): - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np], - sample_weight=tuple(sample_weight)) - - # `validation_data` is neither a tuple nor a triple. - with pytest.raises(ValueError): - out = model.fit([input_a_np, input_b_np], - [output_a_np, output_b_np], - epochs=1, batch_size=4, - validation_data=([input_a_np, input_b_np],)) - - # `loss` does not match outputs. - with pytest.raises(ValueError): - model.compile(optimizer, loss=['mse', 'mae', 'mape']) - - # `loss_weights` does not match output_names. - with pytest.raises(ValueError): - model.compile(optimizer, loss='mse', loss_weights={'lstm': 0.5}) - - # `loss_weights` does not match outputs. - with pytest.raises(ValueError): - model.compile(optimizer, loss='mse', loss_weights=[0.5]) - - # `loss_weights` is invalid type. - with pytest.raises(TypeError): - model.compile(optimizer, loss='mse', loss_weights=(0.5, 0.5)) - - # `sample_weight_mode` does not match output_names. - with pytest.raises(ValueError): - model.compile(optimizer, loss='mse', - sample_weight_mode={'lstm': 'temporal'}) - - # `sample_weight_mode` does not match output_names. - with pytest.raises(ValueError): - model.compile(optimizer, loss='mse', sample_weight_mode=['temporal']) - - # `sample_weight_mode` matches output_names partially. - with pytest.raises(ValueError): - model.compile(optimizer, loss='mse', - sample_weight_mode={'dense_1': 'temporal'}) - - # `loss` does not exist. - with pytest.raises(ValueError): - model.compile(optimizer, loss=[]) - - model.compile(optimizer, loss=['mse', 'mae']) - model.compile(optimizer, loss='mse', loss_weights={'dense_1': 0.2, - 'dropout': 0.8}) - model.compile(optimizer, loss='mse', loss_weights=[0.2, 0.8]) - - # the rank of weight arrays should be 1. - with pytest.raises(ValueError): - out = model.train_on_batch( - [input_a_np, input_b_np], - [output_a_np, output_b_np], - sample_weight=[None, np.random.random((10, 20, 30))]) - - model.compile(optimizer, loss='mse', - sample_weight_mode={'dense_1': None, 'dropout': 'temporal'}) - model.compile(optimizer, loss='mse', sample_weight_mode=[None, 'temporal']) - - # the rank of output arrays should be at least 3D. - with pytest.raises(ValueError): - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np], - sample_weight=sample_weight) - - -# TODO: resolve flakyness issue. Tracked with #11560 -@flaky(rerun_filter=lambda err, *args: issubclass(err[0], AssertionError)) -def test_fit_generator(): - model = get_model(num_outputs=2) - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - - model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, - sample_weight_mode=None) - tracker_cb = TrackerCallback() - val_seq = RandomSequence(4) - out = model.fit_generator(generator=RandomSequence(3), - steps_per_epoch=3, - epochs=5, - initial_epoch=0, - validation_data=val_seq, - validation_steps=3, - max_queue_size=1, - callbacks=[tracker_cb]) - assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4] - assert tracker_cb.trained_batches == list(range(3)) * 5 - assert len(val_seq.logs) <= 4 * 5 - - tracker_cb = TrackerCallback() - val_seq = RandomSequence(4) - out = model.fit(RandomSequence(3), - steps_per_epoch=3, - epochs=5, - initial_epoch=0, - validation_data=val_seq, - validation_steps=3, - max_queue_size=1, - callbacks=[tracker_cb]) - assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4] - assert tracker_cb.trained_batches == list(range(3)) * 5 - assert len(val_seq.logs) <= 4 * 5 - - # steps_per_epoch will be equal to len of sequence if it's unspecified - tracker_cb = TrackerCallback() - val_seq = RandomSequence(4) - out = model.fit_generator(generator=RandomSequence(3), - epochs=5, - initial_epoch=0, - validation_data=val_seq, - callbacks=[tracker_cb], - max_queue_size=1) - assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4] - assert tracker_cb.trained_batches == list(range(12)) * 5 - # the queue may be full. - assert 12 * 5 <= len(val_seq.logs) <= (12 * 5) + 2 - - tracker_cb = TrackerCallback() - val_seq = RandomSequence(4) - out = model.fit(RandomSequence(3), - epochs=5, - initial_epoch=0, - validation_data=val_seq, - callbacks=[tracker_cb], - max_queue_size=1) - assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4] - assert tracker_cb.trained_batches == list(range(12)) * 5 - # the queue may be full. - assert 12 * 5 <= len(val_seq.logs) <= (12 * 5) + 2 - - # test for workers = 0 - tracker_cb = TrackerCallback() - val_seq = RandomSequence(4) - out = model.fit_generator(generator=RandomSequence(3), - epochs=5, - validation_data=val_seq, - callbacks=[tracker_cb], - workers=0) - assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4] - assert tracker_cb.trained_batches == list(range(12)) * 5 - assert len(val_seq.logs) == 12 * 5 - - tracker_cb = TrackerCallback() - val_seq = RandomSequence(4) - out = model.fit(RandomSequence(3), - steps_per_epoch=3, - epochs=5, - initial_epoch=0, - validation_data=val_seq, - validation_steps=3, - max_queue_size=1, - callbacks=[tracker_cb]) - assert tracker_cb.trained_epochs == [0, 1, 2, 3, 4] - assert tracker_cb.trained_batches == list(range(3)) * 5 - assert len(val_seq.logs) <= 4 * 5 - - # fit_generator will throw an exception - # if steps is unspecified for regular generator - with pytest.raises(ValueError): - @threadsafe_generator - def gen_data(): - while True: - yield (np.asarray([]), np.asarray([])) - - out = model.fit_generator(generator=gen_data(), epochs=5, - initial_epoch=0, validation_data=gen_data(), - callbacks=[tracker_cb]) - - # Check if generator is only accessed an expected number of times - gen_counters = [0, 0] - - @threadsafe_generator - def gen_data(i): - while True: - gen_counters[i] += 1 - yield ([np.random.random((1, 3)), np.random.random((1, 3))], - [np.random.random((1, 4)), np.random.random((1, 3))]) - out = model.fit_generator(generator=gen_data(0), epochs=3, - steps_per_epoch=2, - validation_data=gen_data(1), - validation_steps=1, - max_queue_size=2, - workers=2) - - # Need range check here as filling - # of the queue depends on sleep in the enqueuers - max_train = 3 * 2 + 2 * 2 - min_train = 2 * 3 - assert min_train <= gen_counters[0] <= max_train - # 12 = (epoch * workers * validation steps * max_queue_size) - assert 3 <= gen_counters[1] <= 12 - - gen_counters = [0] - out = model.fit_generator(generator=RandomSequence(3), epochs=3, - validation_data=gen_data(0), - validation_steps=1, - max_queue_size=2, - workers=2) - - # 12 = (epoch * workers * validation steps * max_queue_size) - # Need range check here as filling - # of the queue depends on sleep in the enqueuers - assert 3 <= gen_counters[0] <= 12 - - -def test_fit_generator_shape(): - # predict_generator output shape behavior should be consistent - def expected_shape(batch_size, n_batches): - return (batch_size * n_batches, 4), (batch_size * n_batches, 3) - - model = get_model(num_outputs=2) - optimizer = 'rmsprop' - loss = 'mse' - - # Multiple outputs and one step. - batch_size = 5 - sequence_length = 1 - shape_0, shape_1 = expected_shape(batch_size, sequence_length) - out = model.predict_generator( - RandomSequence(batch_size, sequence_length=sequence_length)) - assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 - - out = model.predict( - RandomSequence(batch_size, sequence_length=sequence_length)) - assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 - - # Multiple outputs and multiple steps. - batch_size = 5 - sequence_length = 2 - shape_0, shape_1 = expected_shape(batch_size, sequence_length) - out = model.predict_generator( - RandomSequence(batch_size, sequence_length=sequence_length)) - assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 - - out = model.predict( - RandomSequence(batch_size, sequence_length=sequence_length)) - assert np.shape(out[0]) == shape_0 and np.shape(out[1]) == shape_1 - - # Create a model with a single output. - single_output_model = get_model(num_outputs=1) - single_output_model.compile(optimizer, loss, - metrics=[], sample_weight_mode=None) - - # Single output and one step. - batch_size = 5 - sequence_length = 1 - shape_0, _ = expected_shape(batch_size, sequence_length) - out = single_output_model.predict_generator( - RandomSequence(batch_size, sequence_length=sequence_length)) - assert np.shape(out) == shape_0 - - out = single_output_model.predict( - RandomSequence(batch_size, sequence_length=sequence_length)) - assert np.shape(out) == shape_0 - - # Single output and multiple steps. - batch_size = 5 - sequence_length = 2 - shape_0, _ = expected_shape(batch_size, sequence_length) - out = single_output_model.predict_generator( - RandomSequence(batch_size, sequence_length=sequence_length)) - assert np.shape(out) == shape_0 - - out = single_output_model.predict( - RandomSequence(batch_size, sequence_length=sequence_length)) - assert np.shape(out) == shape_0 - - -@pytest.mark.skipif(sys.version_info < (3,), - reason='Cannot catch warnings in python 2') -def test_warnings(): - a = Input(shape=(3,), name='input_a') - b = Input(shape=(3,), name='input_b') - - a_2 = Dense(4, name='dense_1')(a) - dp = Dropout(0.5, name='dropout') - b_2 = dp(b) - - model = Model([a, b], [a_2, b_2]) - - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, - sample_weight_mode=None) - - @threadsafe_generator - def gen_data(batch_sz): - while True: - yield ([np.random.random((batch_sz, 3)), - np.random.random((batch_sz, 3))], - [np.random.random((batch_sz, 4)), - np.random.random((batch_sz, 3))]) - - with pytest.warns(Warning) as w: - out = model.fit_generator(gen_data(4), - steps_per_epoch=10, - use_multiprocessing=True, - workers=2) - warning_raised = any(['Sequence' in str(w_.message) for w_ in w]) - assert warning_raised, 'No warning raised when using generator with processes.' - - with pytest.warns(None) as w: - out = model.fit_generator(RandomSequence(3), - steps_per_epoch=4, - use_multiprocessing=True, - workers=2) - assert all(['Sequence' not in str(w_.message) for w_ in w]), ( - 'A warning was raised for Sequence.') - - -def test_sparse_inputs_targets(): - test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] - test_outputs = [sparse.random(6, i, density=0.25).tocsr() - for i in range(3, 5)] - in1 = Input(shape=(3,)) - in2 = Input(shape=(3,)) - out1 = Dropout(0.5, name='dropout')(in1) - out2 = Dense(4, name='dense_1')(in2) - model = Model([in1, in2], [out1, out2]) - model.predict(test_inputs, batch_size=2) - model.compile('rmsprop', 'mse') - model.fit(test_inputs, test_outputs, - epochs=1, batch_size=2, validation_split=0.5) - model.evaluate(test_inputs, test_outputs, batch_size=2) - - -@pytest.mark.skipif(K.backend() != 'tensorflow', - reason='sparse operations supported only by TensorFlow') -def test_sparse_placeholder_fit(): - test_inputs = [sparse.random(6, 3, density=0.25).tocsr() for _ in range(2)] - test_outputs = [sparse.random(6, i, density=0.25).tocsr() - for i in range(3, 5)] - in1 = Input(shape=(3,)) - in2 = Input(shape=(3,), sparse=True) - out1 = Dropout(0.5, name='dropout')(in1) - out2 = Dense(4, name='dense_1')(in2) - model = Model([in1, in2], [out1, out2]) - model.predict(test_inputs, batch_size=2) - model.compile('rmsprop', 'mse') - model.fit(test_inputs, test_outputs, - epochs=1, batch_size=2, validation_split=0.5) - model.evaluate(test_inputs, test_outputs, batch_size=2) - - -def test_trainable_argument(): - x = np.random.random((5, 3)) - y = np.random.random((5, 2)) - - model = Sequential() - model.add(Dense(2, input_dim=3, trainable=False)) - model.compile('rmsprop', 'mse') - out = model.predict(x) - model.train_on_batch(x, y) - out_2 = model.predict(x) - assert_allclose(out, out_2) - - # test with nesting - inputs = Input(shape=(3,)) - outputs = model(inputs) - model = Model(inputs, outputs) - model.compile('rmsprop', 'mse') - out = model.predict(x) - model.train_on_batch(x, y) - out_2 = model.predict(x) - assert_allclose(out, out_2) - - -def test_with_list_as_targets(): - model = Sequential() - model.add(Dense(1, input_dim=3, trainable=False)) - model.compile('rmsprop', 'mse') - - x = np.random.random((2, 3)) - y = [0, 1] - model.train_on_batch(x, y) - - -def test_check_not_failing(): - a = np.random.random((2, 1, 3)) - training_utils.check_loss_and_target_compatibility( - [a], [losses.categorical_crossentropy], [a.shape]) - training_utils.check_loss_and_target_compatibility( - [a], [losses.categorical_crossentropy], [(2, None, 3)]) - - -def test_check_last_is_one(): - a = np.random.random((2, 3, 1)) - with pytest.raises(ValueError) as exc: - training_utils.check_loss_and_target_compatibility( - [a], [losses.categorical_crossentropy], [a.shape]) - - assert 'You are passing a target array' in str(exc) - - -def test_check_bad_shape(): - a = np.random.random((2, 3, 5)) - with pytest.raises(ValueError) as exc: - training_utils.check_loss_and_target_compatibility( - [a], [losses.categorical_crossentropy], [(2, 3, 6)]) - - assert 'targets to have the same shape' in str(exc) - - -@pytest.mark.parametrize('input_metrics,expected_output', [ - (None, [[], []]), - (['mse', 'mae'], [['mse', 'mae'], ['mse', 'mae']]), - ({'layer_1': 'mae', 'layer_2': 'mse'}, [['mae'], ['mse']]), -]) -def test_collect_metrics(input_metrics, expected_output): - output_names = ['layer_1', 'layer_2'] - - output_metrics = training_utils.collect_metrics(input_metrics, - output_names) - assert output_metrics == expected_output - - -def test_collect_metrics_with_invalid_metrics_format(): - with pytest.raises(TypeError): - training_utils.collect_metrics({'a', 'set', 'type'}, []) - - -def test_collect_metrics_with_invalid_layer_name(): - with pytest.warns(Warning) as w: - training_utils.collect_metrics({'unknown_layer': 'mse'}, ['layer_1']) - - warning_raised = all(['unknown_layer' in str(w_.message) for w_ in w]) - assert warning_raised, 'Warning was raised for unknown_layer' - - -@pytest.mark.skipif(K.backend() != 'tensorflow', - reason='Requires TensorFlow backend') -def test_model_with_input_feed_tensor(): - """We test building a model with a TF variable as input. - We should be able to call fit, evaluate, predict, - by only passing them data for the placeholder inputs - in the model. - """ - import tensorflow as tf - - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 3)) - - a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) - b = Input(shape=(3,), name='input_b') - - a_2 = Dense(4, name='dense_1')(a) - dp = Dropout(0.5, name='dropout') - b_2 = dp(b) - - model = Model([a, b], [a_2, b_2]) - model.summary() - - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - model.compile(optimizer, loss, metrics=['mean_squared_error'], - loss_weights=loss_weights, - sample_weight_mode=None) - - # test train_on_batch - out = model.train_on_batch(input_b_np, - [output_a_np, output_b_np]) - out = model.train_on_batch({'input_b': input_b_np}, - [output_a_np, output_b_np]) - out = model.test_on_batch({'input_b': input_b_np}, - [output_a_np, output_b_np]) - out = model.predict_on_batch({'input_b': input_b_np}) - - # test fit - out = model.fit({'input_b': input_b_np}, - [output_a_np, output_b_np], epochs=1, batch_size=10) - out = model.fit(input_b_np, - [output_a_np, output_b_np], epochs=1, batch_size=10) - - # test evaluate - out = model.evaluate({'input_b': input_b_np}, - [output_a_np, output_b_np], batch_size=10) - out = model.evaluate(input_b_np, - [output_a_np, output_b_np], batch_size=10) - - # test predict - out = model.predict({'input_b': input_b_np}, batch_size=10) - out = model.predict(input_b_np, batch_size=10) - assert len(out) == 2 - - # Now test a model with a single input - # i.e. we don't pass any data to fit the model. - a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) - a_2 = Dense(4, name='dense_1')(a) - a_2 = Dropout(0.5, name='dropout')(a_2) - model = Model(a, a_2) - model.summary() - - optimizer = 'rmsprop' - loss = 'mse' - model.compile(optimizer, loss, metrics=['mean_squared_error']) - - # test train_on_batch - out = model.train_on_batch(None, - output_a_np) - out = model.train_on_batch(None, - output_a_np) - out = model.test_on_batch(None, - output_a_np) - out = model.predict_on_batch(None) - out = model.train_on_batch([], - output_a_np) - out = model.train_on_batch({}, - output_a_np) - - # test fit - out = model.fit(None, - output_a_np, epochs=1, batch_size=10) - out = model.fit(None, - output_a_np, epochs=1, batch_size=10) - - # test evaluate - out = model.evaluate(None, - output_a_np, batch_size=10) - out = model.evaluate(None, - output_a_np, batch_size=10) - - # test predict - out = model.predict(None, steps=3) - out = model.predict(None, steps=3) - assert out.shape == (10 * 3, 4) - - # Same, without learning phase - # i.e. we don't pass any data to fit the model. - a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) - a_2 = Dense(4, name='dense_1')(a) - model = Model(a, a_2) - model.summary() - - optimizer = 'rmsprop' - loss = 'mse' - model.compile(optimizer, loss, metrics=['mean_squared_error']) - - # test train_on_batch - out = model.train_on_batch(None, - output_a_np) - out = model.train_on_batch(None, - output_a_np) - out = model.test_on_batch(None, - output_a_np) - out = model.predict_on_batch(None) - out = model.train_on_batch([], - output_a_np) - out = model.train_on_batch({}, - output_a_np) - - # test fit - out = model.fit(None, - output_a_np, epochs=1, batch_size=10) - out = model.fit(None, - output_a_np, epochs=1, batch_size=10) - - # test evaluate - out = model.evaluate(None, - output_a_np, batch_size=10) - out = model.evaluate(None, - output_a_np, batch_size=10) - - # test predict - out = model.predict(None, steps=3) - out = model.predict(None, steps=3) - assert out.shape == (10 * 3, 4) - - -def test_model_with_partial_loss(): - a = Input(shape=(3,), name='input_a') - a_2 = Dense(4, name='dense_1')(a) - dp = Dropout(0.5, name='dropout') - a_3 = dp(a_2) - model = Model(a, [a_2, a_3]) - - optimizer = 'rmsprop' - loss = {'dropout': 'mse'} - model.compile(optimizer, loss, metrics=['mae']) - - input_a_np = np.random.random((10, 3)) - output_a_np = np.random.random((10, 4)) - - # test train_on_batch - out = model.train_on_batch(input_a_np, output_a_np) - out = model.test_on_batch(input_a_np, output_a_np) - # fit - out = model.fit(input_a_np, [output_a_np]) - # evaluate - out = model.evaluate(input_a_np, [output_a_np]) - - # Same without dropout. - a = Input(shape=(3,), name='input_a') - a_2 = Dense(4, name='dense_1')(a) - a_3 = Dense(4, name='dense_2')(a_2) - model = Model(a, [a_2, a_3]) - - optimizer = 'rmsprop' - loss = {'dense_2': 'mse'} - model.compile(optimizer, loss, metrics={'dense_1': 'mae'}) - - # test train_on_batch - out = model.train_on_batch(input_a_np, output_a_np) - out = model.test_on_batch(input_a_np, output_a_np) - # fit - out = model.fit(input_a_np, [output_a_np]) - # evaluate - out = model.evaluate(input_a_np, [output_a_np]) - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason='cntk does not support external loss yet') -def test_model_with_external_loss(): - # None loss, only regularization loss. - a = Input(shape=(3,), name='input_a') - a_2 = Dense(4, name='dense_1', - kernel_regularizer='l1', - bias_regularizer='l2')(a) - dp = Dropout(0.5, name='dropout') - a_3 = dp(a_2) - - model = Model(a, [a_2, a_3]) - - optimizer = 'rmsprop' - loss = None - model.compile(optimizer, loss, metrics=['mae']) - - input_a_np = np.random.random((10, 3)) - - # test train_on_batch - out = model.train_on_batch(input_a_np, None) - out = model.test_on_batch(input_a_np, None) - # fit - out = model.fit(input_a_np, None) - # evaluate - out = model.evaluate(input_a_np, None) - - # No dropout, external loss. - a = Input(shape=(3,), name='input_a') - a_2 = Dense(4, name='dense_1')(a) - a_3 = Dense(4, name='dense_2')(a) - - model = Model(a, [a_2, a_3]) - model.add_loss(K.mean(a_3 + a_2)) - - optimizer = 'rmsprop' - loss = None - model.compile(optimizer, loss, metrics=['mae']) - - # test train_on_batch - out = model.train_on_batch(input_a_np, None) - out = model.test_on_batch(input_a_np, None) - # fit - out = model.fit(input_a_np, None) - # evaluate - out = model.evaluate(input_a_np, None) - - # Test fit with no external data at all. - if K.backend() == 'tensorflow': - import tensorflow as tf - - a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) - a_2 = Dense(4, name='dense_1')(a) - a_2 = Dropout(0.5, name='dropout')(a_2) - model = Model(a, a_2) - model.add_loss(K.mean(a_2)) - - model.compile(optimizer='rmsprop', - loss=None, - metrics=['mean_squared_error']) - - # test train_on_batch - out = model.train_on_batch(None, None) - out = model.test_on_batch(None, None) - out = model.predict_on_batch(None) - - # test fit - with pytest.raises(ValueError): - out = model.fit(None, None, epochs=1, batch_size=10) - out = model.fit(None, None, epochs=1, steps_per_epoch=1) - - # define a generator to produce x=None and y=None - @threadsafe_generator - def data_tensors_generator(): - while True: - yield (None, None) - - generator = data_tensors_generator() - - # test fit_generator for framework-native data tensors - out = model.fit_generator(generator, epochs=1, - steps_per_epoch=3) - - # test evaluate_generator for framework-native data tensors - out = model.evaluate_generator(generator, steps=3) - out = model.evaluate(generator, steps=3) - - # test fit with validation data - with pytest.raises(ValueError): - out = model.fit(None, None, - epochs=1, - steps_per_epoch=None, - validation_steps=2) - out = model.fit(None, None, - epochs=1, - steps_per_epoch=2, - validation_steps=2) - - # test evaluate - with pytest.raises(ValueError): - out = model.evaluate(None, None, batch_size=10) - out = model.evaluate(None, None, steps=3) - - # test predict - with pytest.raises(ValueError): - out = model.predict(None, batch_size=10) - out = model.predict(None, steps=3) - assert out.shape == (10 * 3, 4) - - # Test multi-output model without external data. - a = Input(tensor=tf.Variable(input_a_np, dtype=tf.float32)) - a_1 = Dense(4, name='dense_1')(a) - a_2 = Dropout(0.5, name='dropout')(a_1) - model = Model(a, [a_1, a_2]) - model.add_loss(K.mean(a_2)) - model.compile(optimizer='rmsprop', - loss=None, - metrics=['mean_squared_error']) - - # test train_on_batch - out = model.train_on_batch(None, None) - out = model.test_on_batch(None, None) - out = model.predict_on_batch(None) - - # test fit - with pytest.raises(ValueError): - out = model.fit(None, None, epochs=1, batch_size=10) - out = model.fit(None, None, epochs=1, steps_per_epoch=1) - - # test fit with validation data - with pytest.raises(ValueError): - out = model.fit(None, None, - epochs=1, - steps_per_epoch=None, - validation_steps=2) - out = model.fit(None, None, - epochs=1, - steps_per_epoch=2, - validation_steps=2) - - # test evaluate - with pytest.raises(ValueError): - out = model.evaluate(None, None, batch_size=10) - out = model.evaluate(None, None, steps=3) - - # test predict - with pytest.raises(ValueError): - out = model.predict(None, batch_size=10) - out = model.predict(None, steps=3) - assert len(out) == 2 - assert out[0].shape == (10 * 3, 4) - assert out[1].shape == (10 * 3, 4) - - -def test_target_tensors(): - # single-output, as list - model = keras.models.Sequential() - model.add(keras.layers.Dense(4, input_shape=(4,), name='dense')) - input_val = np.random.random((10, 4)) - target_val = np.random.random((10, 4)) - target = keras.backend.variable(target_val) - model.compile(optimizer='rmsprop', loss='mse', target_tensors=[target]) - model.train_on_batch(input_val, None) - - # single-output, as dict - model.compile(optimizer='rmsprop', loss='mse', - target_tensors={'dense': target}) - model.train_on_batch(input_val, None) - - # single-output, as tensor - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=target) - model.train_on_batch(input_val, None) - - # test invalid arguments - with pytest.raises(TypeError): - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=set()) - with pytest.raises(ValueError): - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=[target, target]) - with pytest.raises(ValueError): - model.compile(optimizer='rmsprop', loss='mse', - target_tensors={'dense2': None}) - with pytest.raises(ValueError): - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=[target]) - model.train_on_batch(input_val, target_val) - - # multi-output, as list - input_val = np.random.random((10, 4)) - target_val_a = np.random.random((10, 4)) - target_val_b = np.random.random((10, 4)) - target_a = keras.backend.variable(target_val_a) - target_b = keras.backend.variable(target_val_b) - - inputs = keras.layers.Input(shape=(4,)) - output_a = keras.layers.Dense(4, name='dense_a')(inputs) - output_b = keras.layers.Dense(4, name='dense_b')(inputs) - model = keras.models.Model(inputs, [output_a, output_b]) - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=[target_a, target_b]) - model.train_on_batch(input_val, None) - - # multi-output, as dict - model.compile(optimizer='rmsprop', loss='mse', - target_tensors={'dense_a': target_a, - 'dense_b': target_b}) - model.train_on_batch(input_val, None) - - # multi-output, not enough target tensors when `target_tensors` is not a dict - with pytest.raises(ValueError, - match='When passing a list as `target_tensors`, it should ' - 'have one entry per model output. The model has \\d ' - 'outputs, but you passed target_tensors='): - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=[target_a]) - with pytest.raises(ValueError, - match='The model has \\d outputs, but you passed a single ' - 'tensor as `target_tensors`. Expected a list or ' - 'a dict of tensors.'): - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=target_a) - - # test with sample weights - model.compile(optimizer='rmsprop', loss='mse', - target_tensors=[target_a, target_b]) - model.train_on_batch(input_val, None, - sample_weight={'dense_a': np.random.random((10,))}) - - -def test_model_custom_target_tensors(): - a = Input(shape=(3,), name='input_a') - b = Input(shape=(3,), name='input_b') - - a_2 = Dense(4, name='dense_1')(a) - dp = Dropout(0.5, name='dropout') - b_2 = dp(b) - - y = K.placeholder([10, 4], name='y') - y1 = K.placeholder([10, 3], name='y1') - y2 = K.placeholder([7, 5], name='y2') - model = Model([a, b], [a_2, b_2]) - - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - - # test list of target tensors - with pytest.raises(ValueError): - model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, - sample_weight_mode=None, target_tensors=[y, y1, y2]) - model.compile(optimizer, loss, metrics=[], loss_weights=loss_weights, - sample_weight_mode=None, target_tensors=[y, y1]) - input_a_np = np.random.random((10, 3)) - input_b_np = np.random.random((10, 3)) - - output_a_np = np.random.random((10, 4)) - output_b_np = np.random.random((10, 3)) - - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np], - {y: np.random.random((10, 4)), - y1: np.random.random((10, 3))}) - # test dictionary of target_tensors - with pytest.raises(ValueError): - model.compile(optimizer, loss, - metrics=[], - loss_weights=loss_weights, - sample_weight_mode=None, - target_tensors={'does_not_exist': y2}) - # test dictionary of target_tensors - model.compile(optimizer, loss, - metrics=[], - loss_weights=loss_weights, - sample_weight_mode=None, - target_tensors={'dense_1': y, 'dropout': y1}) - out = model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np], - {y: np.random.random((10, 4)), - y1: np.random.random((10, 3))}) - - if K.backend() == 'tensorflow': - import tensorflow as tf - # test with custom TF placeholder as target - pl_target_a = tf.placeholder('float32', shape=(None, 4)) - model.compile(optimizer='rmsprop', loss='mse', - target_tensors={'dense_1': pl_target_a}) - model.train_on_batch([input_a_np, input_b_np], - [output_a_np, output_b_np]) - - -@pytest.mark.skipif(sys.version_info < (3,), - reason='Cannot catch warnings in python 2') -def test_trainable_weights_count_consistency(): - """Tests the trainable weights consistency check of Model. - - This verifies that a warning is shown if model.trainable is modified - and the model is summarized/run without a new call to .compile() - - Reproduce issue #8121 - """ - a = Input(shape=(3,), name='input_a') - model1 = Model(inputs=a, outputs=Dense(1)(a)) - - model1.trainable = False - b = Input(shape=(3,), name='input_b') - y = model1(b) - model2 = Model(inputs=b, outputs=Dense(1)(y)) - - model2.compile(optimizer='adam', loss='mse') - - model1.trainable = True - - # Should warn on .summary() - with pytest.warns(UserWarning) as w: - model2.summary() - warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) - assert warning_raised, ( - 'No warning raised when trainable is modified without .compile.') - - # And on .fit() - with pytest.warns(UserWarning) as w: - model2.fit(x=np.zeros((5, 3)), y=np.zeros((5, 1))) - warning_raised = any(['Discrepancy' in str(w_.message) for w_ in w]) - assert warning_raised, ( - 'No warning raised when trainable is modified without .compile.') - - # And shouldn't warn if we recompile - model2.compile(optimizer='adam', loss='mse') - with pytest.warns(None) as w: - model2.summary() - assert len(w) == 0, ( - 'Warning raised even when .compile() is called after modifying .trainable') - - -def test_pandas_dataframe(): - input_a = Input(shape=(3,), name='input_a') - input_b = Input(shape=(3,), name='input_b') - - x = Dense(4, name='dense_1')(input_a) - y = Dense(3, name='desne_2')(input_b) - - model_1 = Model(inputs=input_a, outputs=x) - model_2 = Model(inputs=[input_a, input_b], outputs=[x, y]) - - optimizer = 'rmsprop' - loss = 'mse' - - model_1.compile(optimizer=optimizer, loss=loss) - model_2.compile(optimizer=optimizer, loss=loss) - - input_a_df = pd.DataFrame(np.random.random((10, 3))) - input_b_df = pd.DataFrame(np.random.random((10, 3))) - - output_a_df = pd.DataFrame(np.random.random((10, 4))) - output_b_df = pd.DataFrame(np.random.random((10, 3))) - - model_1.fit(input_a_df, - output_a_df) - model_2.fit([input_a_df, input_b_df], - [output_a_df, output_b_df]) - model_1.fit([input_a_df], - [output_a_df]) - model_1.fit({'input_a': input_a_df}, - output_a_df) - model_2.fit({'input_a': input_a_df, 'input_b': input_b_df}, - [output_a_df, output_b_df]) - - model_1.predict(input_a_df) - model_2.predict([input_a_df, input_b_df]) - model_1.predict([input_a_df]) - model_1.predict({'input_a': input_a_df}) - model_2.predict({'input_a': input_a_df, 'input_b': input_b_df}) - - model_1.predict_on_batch(input_a_df) - model_2.predict_on_batch([input_a_df, input_b_df]) - model_1.predict_on_batch([input_a_df]) - model_1.predict_on_batch({'input_a': input_a_df}) - model_2.predict_on_batch({'input_a': input_a_df, 'input_b': input_b_df}) - - model_1.evaluate(input_a_df, - output_a_df) - model_2.evaluate([input_a_df, input_b_df], - [output_a_df, output_b_df]) - model_1.evaluate([input_a_df], - [output_a_df]) - model_1.evaluate({'input_a': input_a_df}, - output_a_df) - model_2.evaluate({'input_a': input_a_df, 'input_b': input_b_df}, - [output_a_df, output_b_df]) - - model_1.train_on_batch(input_a_df, - output_a_df) - model_2.train_on_batch([input_a_df, input_b_df], - [output_a_df, output_b_df]) - model_1.train_on_batch([input_a_df], - [output_a_df]) - model_1.train_on_batch({'input_a': input_a_df}, - output_a_df) - model_2.train_on_batch({'input_a': input_a_df, 'input_b': input_b_df}, - [output_a_df, output_b_df]) - - model_1.test_on_batch(input_a_df, - output_a_df) - model_2.test_on_batch([input_a_df, input_b_df], - [output_a_df, output_b_df]) - model_1.test_on_batch([input_a_df], - [output_a_df]) - model_1.test_on_batch({'input_a': input_a_df}, - output_a_df) - model_2.test_on_batch({'input_a': input_a_df, 'input_b': input_b_df}, - [output_a_df, output_b_df]) - - -@pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TensorFlow') -@pytest.mark.skipif((K.backend() == 'tensorflow' and - not hasattr(K.get_session(), - '_make_callable_from_options')), - reason='Requires TF 1.8 or higher') -def test_training_and_eval_methods_on_symbolic_tensors_single_io(): - x = keras.layers.Input(shape=(3,), name='input') - y = keras.layers.Dense(4, name='dense')(x) - model = keras.Model(x, y) - - optimizer = 'rmsprop' - loss = 'mse' - metrics = ['mae'] - model.compile(optimizer, loss, metrics=metrics) - - inputs = keras.backend.zeros(shape=(10, 3)) - targets = keras.backend.zeros(shape=(10, 4)) - - model.fit(inputs, targets, epochs=1, steps_per_epoch=2, verbose=0) - model.evaluate(inputs, targets, steps=2, verbose=0) - model.predict(inputs, steps=2) - model.train_on_batch(inputs, targets) - model.test_on_batch(inputs, targets) - model.fit(inputs, targets, - epochs=1, steps_per_epoch=2, verbose=1, - validation_data=(inputs, targets), validation_steps=2) - - -@pytest.mark.skipif(K.backend() != 'tensorflow', reason='Requires TensorFlow') -@pytest.mark.skipif((K.backend() == 'tensorflow' and - not hasattr(K.get_session(), - '_make_callable_from_options')), - reason='Requires TF 1.8 or higher') -def test_training_and_eval_methods_on_symbolic_tensors_multi_io(): - a = keras.layers.Input(shape=(3,), name='input_a') - b = keras.layers.Input(shape=(3,), name='input_b') - - dense = keras.layers.Dense(4, name='dense') - c = dense(a) - d = dense(b) - e = keras.layers.Dropout(0.5, name='dropout')(c) - - model = keras.models.Model([a, b], [d, e]) - - optimizer = 'rmsprop' - loss = 'mse' - loss_weights = [1., 0.5] - metrics = ['mae'] - model.compile(optimizer, loss, metrics=metrics, loss_weights=loss_weights) - - input_a_tf = keras.backend.zeros(shape=(10, 3)) - input_b_tf = keras.backend.zeros(shape=(10, 3)) - - output_d_tf = keras.backend.zeros(shape=(10, 4)) - output_e_tf = keras.backend.zeros(shape=(10, 4)) - - model.fit( - [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], - epochs=1, - steps_per_epoch=2, - verbose=0) - with pytest.raises(ValueError) as excinfo: - model.fit( - [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], - epochs=1, - batch_size=5, - verbose=0) - assert 'should specify the `steps_per_epoch`' in str(excinfo.value) - model.train_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]) - - # Test with dictionary inputs - model.fit( - {'input_a': input_a_tf, - 'input_b': input_b_tf}, - {'dense': output_d_tf, - 'dropout': output_e_tf}, - epochs=1, - steps_per_epoch=2, - verbose=0) - model.fit( - {'input_a': input_a_tf, - 'input_b': input_b_tf}, - {'dense': output_d_tf, - 'dropout': output_e_tf}, - validation_data=({'input_a': input_a_tf, - 'input_b': input_b_tf}, - {'dense': output_d_tf, - 'dropout': output_e_tf}), - epochs=1, - steps_per_epoch=2, - validation_steps=2, - verbose=0) - model.train_on_batch( - {'input_a': input_a_tf, - 'input_b': input_b_tf}, - {'dense': output_d_tf, - 'dropout': output_e_tf}) - - # Test with validation data - model.fit( - [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], - validation_data=([input_a_tf, input_b_tf], - [output_d_tf, output_e_tf]), - epochs=1, - steps_per_epoch=2, - validation_steps=2, - verbose=0) - # Test with validation split - with pytest.raises(ValueError) as excinfo: - model.fit( - [input_a_tf, input_b_tf], [output_d_tf, output_e_tf], - epochs=2, - steps_per_epoch=2, - verbose=0, - validation_split=0.2, - validation_steps=2) - assert 'you cannot use `validation_split`' in str(excinfo.value) - - # Test evaluation / prediction methods - model.evaluate([input_a_tf, input_b_tf], [output_d_tf, output_e_tf], - steps=2, verbose=0) - model.predict([input_a_tf, input_b_tf], steps=2) - model.test_on_batch([input_a_tf, input_b_tf], [output_d_tf, output_e_tf]) - - -def test_model_with_crossentropy_losses_channels_first(): - """Tests use of all crossentropy losses with `channels_first`. - - Tests `sparse_categorical_crossentropy`, `categorical_crossentropy`, - and `binary_crossentropy`. - Verifies that evaluate gives the same result with either - `channels_first` or `channels_last` image_data_format. - Tests PR #9715. - """ - - def prepare_simple_model(input_tensor, loss_name, target): - axis = 1 if K.image_data_format() == 'channels_first' else -1 - if loss_name == 'sparse_categorical_crossentropy': - def loss(y_true, y_pred): return K.sparse_categorical_crossentropy( - y_true, y_pred, axis=axis) - num_channels = np.amax(target) + 1 - activation = 'softmax' - elif loss_name == 'categorical_crossentropy': - def loss(y_true, y_pred): return K.categorical_crossentropy( - y_true, y_pred, axis=axis) - num_channels = target.shape[axis] - activation = 'softmax' - elif loss_name == 'binary_crossentropy': - def loss(y_true, y_pred): return K.binary_crossentropy( - y_true, y_pred) - num_channels = target.shape[axis] - activation = 'sigmoid' - predictions = Conv2D(num_channels, 1, activation=activation, - kernel_initializer='ones', - bias_initializer='ones')(input_tensor) - simple_model = Model(inputs=input_tensor, outputs=predictions) - simple_model.compile(optimizer='rmsprop', loss=loss) - return simple_model - - losses_to_test = ['sparse_categorical_crossentropy', - 'categorical_crossentropy', 'binary_crossentropy'] - - data_channels_first = np.array([[[[8., 7.1, 0.], [4.5, 2.6, 0.55], - [0.9, 4.2, 11.2]]]], dtype=np.float32) - # Labels for testing 4-class sparse_categorical_crossentropy, 4-class - # categorical_crossentropy, and 2-class binary_crossentropy: - labels_channels_first = [np.array([[[[0, 1, 3], [2, 1, 0], [2, 2, 1]]]]), - np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 0]], - [[1, 0, 0], [0, 0, 1], [0, 1, 0]], - [[0, 0, 0], [1, 0, 0], [0, 0, 1]], - [[0, 0, 1], [0, 0, 0], [1, 0, 0]]]]), - np.array([[[[0, 1, 0], [0, 1, 0], [0, 0, 1]], - [[1, 0, 1], [1, 0, 1], [1, 1, 0]]]])] - # Compute one loss for each loss function in the list `losses_to_test`: - loss_channels_last = [0., 0., 0.] - loss_channels_first = [0., 0., 0.] - - old_data_format = K.image_data_format() - - # Evaluate a simple network with channels last, with all three loss - # functions: - K.set_image_data_format('channels_last') - data = np.moveaxis(data_channels_first, 1, -1) - for index, loss_function in enumerate(losses_to_test): - labels = np.moveaxis(labels_channels_first[index], 1, -1) - inputs = Input(shape=(3, 3, 1)) - model = prepare_simple_model(inputs, loss_function, labels) - loss_channels_last[index] = model.evaluate(x=data, y=labels, - batch_size=1, verbose=0) - - # Evaluate the same network with channels first, with all three loss - # functions: - K.set_image_data_format('channels_first') - data = data_channels_first - for index, loss_function in enumerate(losses_to_test): - labels = labels_channels_first[index] - inputs = Input(shape=(1, 3, 3)) - model = prepare_simple_model(inputs, loss_function, labels) - loss_channels_first[index] = model.evaluate(x=data, y=labels, - batch_size=1, verbose=0) - - K.set_image_data_format(old_data_format) - - assert_allclose(loss_channels_first, loss_channels_last, - err_msg='{}{}'.format('Computed different losses for ', - 'channels_first and channels_last.')) - - -def test_dynamic_set_inputs(): - model = Sequential() - model.add(Dense(16, input_dim=32)) - model.add(Activation('relu')) - - model2 = Sequential() - model2.add(model.layers[-1]) - model2.add(Dense(8)) - preds2 = model2.predict([np.random.random((1, 32))]) - assert preds2.shape == (1, 8) - - model3 = Model(inputs=model.inputs, outputs=model.outputs) - with pytest.raises(ValueError): - model3._set_inputs(model.inputs) - - model3.inputs = None - model3._set_inputs(model.inputs) - preds3 = model3.predict([np.random.random((1, 32))]) - assert preds3.shape == (1, 16) - - model3.inputs = None - model3._set_inputs(model.input) - preds3 = model3.predict(np.random.random((1, 32))) - assert preds3.shape == (1, 16) - - aux_input = Input(shape=(5,), name='aux_input') - aux_model = Dense(3)(aux_input) - model4 = Model(inputs=model.inputs + [aux_input], - outputs=Concatenate()(model.outputs + [aux_model])) - model4.inputs = None - model4._set_inputs(model.inputs + [aux_input]) - preds4 = model4.predict([np.random.random((1, 32)), - np.random.random((1, 5))]) - assert preds4.shape == (1, 19) - - -def test_sample_weights(): - y = np.array([0, 1, 0, 0, 2]) - sample_weights = np.array([0.5, 1., 1., 0., 2.]) - class_weights = {0: 0.5, 1: 1., 2: 1.5} - - # Only `sample_weights`. - weights = training_utils.standardize_weights(y, sample_weights) - assert np.allclose(weights, sample_weights) - - # Only `class_weights`. - weights = training_utils.standardize_weights(y, class_weight=class_weights) - assert np.allclose(weights, np.array([0.5, 1., 0.5, 0.5, 1.5])) - - # Both 'sample_weights` and 'class_weights`. - weights = training_utils.standardize_weights(y, sample_weights, - class_weights) - expected = sample_weights * np.array([0.5, 1., 0.5, 0.5, 1.5]) - assert np.allclose(weights, expected) - - -def test_validation_freq(): - model = Sequential([Dense(1)]) - model.compile('sgd', 'mse') - - def _gen(): - while True: - yield np.ones((2, 10)), np.ones((2, 1)) - - x, y = np.ones((10, 10)), np.ones((10, 1)) - - class ValCounter(Callback): - - def __init__(self): - self.val_runs = 0 - - def on_test_begin(self, logs=None): - self.val_runs += 1 - - # Test in training_arrays.py - val_counter = ValCounter() - model.fit( - x, - y, - batch_size=2, - epochs=4, - validation_data=(x, y), - validation_freq=2, - callbacks=[val_counter]) - assert val_counter.val_runs == 2 - - # Test in training_generator.py - val_counter = ValCounter() - model.fit_generator( - _gen(), - epochs=4, - steps_per_epoch=5, - validation_data=(x, y), - validation_freq=[4, 2, 2, 1], - callbacks=[val_counter]) - assert val_counter.val_runs == 3 - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -from keras.utils.test_utils import layer_test -from keras import layers -from keras import backend as K - - -@pytest.mark.parametrize('activation_layer', - [layers.LeakyReLU, - layers.ELU]) -@pytest.mark.parametrize('alpha', [0., .5, -1.]) -def test_linear_unit_activations(activation_layer, - alpha): - layer_test(activation_layer, kwargs={'alpha': alpha}, - input_shape=(2, 3, 4)) - - -def test_prelu(): - layer_test(layers.PReLU, kwargs={}, - input_shape=(2, 3, 4)) - - -def test_prelu_share(): - layer_test(layers.PReLU, kwargs={'shared_axes': 1}, - input_shape=(2, 3, 4)) - - -def test_thresholded_relu(): - layer_test(layers.ThresholdedReLU, kwargs={'theta': 0.5}, - input_shape=(2, 3, 4)) - - -@pytest.mark.parametrize('axis', [1, -1]) -def test_softmax(axis): - layer_test(layers.Softmax, kwargs={'axis': axis}, - input_shape=(2, 3, 4)) - - -def test_relu(): - layer_test(layers.ReLU, - kwargs={'max_value': 10, - 'negative_slope': 0.2, - 'threshold': 3.0}, - input_shape=(2, 3, 4)) - layer_test(layers.ReLU, - kwargs={'max_value': 6}, - input_shape=(2, 3, 4)) - layer_test(layers.ReLU, - kwargs={'negative_slope': 0.2}, - input_shape=(2, 3, 4)) - - # max_value of ReLU layer cannot be negative value - with pytest.raises(ValueError): - layer_test(layers.ReLU, kwargs={'max_value': -2.0}, - input_shape=(2, 3, 4)) - - # negative_slope of ReLU layer cannot be negative value - with pytest.raises(ValueError): - layer_test(layers.ReLU, kwargs={'negative_slope': -2.0}, - input_shape=(2, 3, 4)) - - -@pytest.mark.skipif((K.backend() != 'tensorflow'), - reason='TF-specific implementation.') -def test_relu_tf_ops(): - inputs = layers.Input((3,)) - # Test that `relu` op gets used. - outputs = layers.ReLU()(inputs) - assert outputs.op.name.lower().endswith('/relu') - # Test that `leakyrelu` op gets used. - outputs = layers.ReLU(negative_slope=0.2)(inputs) - assert outputs.op.name.lower().endswith('/leakyrelu') - # Test that `relu6` op gets used. - outputs = layers.ReLU(max_value=6)(inputs) - assert outputs.op.name.lower().endswith('/relu6') - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras import backend as K -from keras.models import Sequential, Model -from keras.layers import convolutional_recurrent, Input, Masking, Lambda -from keras.utils.test_utils import layer_test -from keras import regularizers - -num_row = 3 -num_col = 3 -filters = 2 -num_samples = 1 -input_channel = 2 -input_num_row = 5 -input_num_col = 5 -sequence_len = 2 - - -@pytest.mark.parametrize('data_format', ['channels_first', 'channels_last']) -@pytest.mark.parametrize('return_sequences', [True, False]) -@pytest.mark.parametrize('use_mask', [True, False]) -def test_convolutional_recurrent(data_format, return_sequences, use_mask): - - class Masking5D(Masking): - """Regular masking layer returns wrong shape of mask for RNN""" - - def compute_mask(self, inputs, mask=None): - return K.any(K.not_equal(inputs, 0.), axis=[2, 3, 4]) - - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, sequence_len, - input_channel, - input_num_row, input_num_col) - else: - inputs = np.random.rand(num_samples, sequence_len, - input_num_row, input_num_col, - input_channel) - - # test for return state: - x = Input(batch_shape=inputs.shape) - kwargs = {'data_format': data_format, - 'return_sequences': return_sequences, - 'return_state': True, - 'stateful': True, - 'filters': filters, - 'kernel_size': (num_row, num_col), - 'padding': 'valid'} - layer = convolutional_recurrent.ConvLSTM2D(**kwargs) - layer.build(inputs.shape) - if use_mask: - outputs = layer(Masking5D()(x)) - else: - outputs = layer(x) - output, states = outputs[0], outputs[1:] - assert len(states) == 2 - model = Model(x, states[0]) - state = model.predict(inputs) - np.testing.assert_allclose(K.eval(layer.states[0]), state, atol=1e-4) - - # test for output shape: - output = layer_test(convolutional_recurrent.ConvLSTM2D, - kwargs={'data_format': data_format, - 'return_sequences': return_sequences, - 'filters': filters, - 'kernel_size': (num_row, num_col), - 'padding': 'valid'}, - input_shape=inputs.shape) - - -def test_convolutional_recurrent_statefulness(): - - data_format = 'channels_last' - return_sequences = False - inputs = np.random.rand(num_samples, sequence_len, - input_num_row, input_num_col, - input_channel) - # Tests for statefulness - model = Sequential() - kwargs = {'data_format': data_format, - 'return_sequences': return_sequences, - 'filters': filters, - 'kernel_size': (num_row, num_col), - 'stateful': True, - 'batch_input_shape': inputs.shape, - 'padding': 'same'} - layer = convolutional_recurrent.ConvLSTM2D(**kwargs) - - model.add(layer) - model.compile(optimizer='sgd', loss='mse') - out1 = model.predict(np.ones_like(inputs)) - - # train once so that the states change - model.train_on_batch(np.ones_like(inputs), - np.random.random(out1.shape)) - out2 = model.predict(np.ones_like(inputs)) - - # if the state is not reset, output should be different - assert(out1.max() != out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones_like(inputs)) - assert(out2.max() != out3.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones_like(inputs)) - assert_allclose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones_like(inputs)) - assert(out4.max() != out5.max()) - - # cntk doesn't support eval convolution with static - # variable, will enable it later - if K.backend() != 'cntk': - # check regularizers - kwargs = {'data_format': data_format, - 'return_sequences': return_sequences, - 'kernel_size': (num_row, num_col), - 'stateful': True, - 'filters': filters, - 'batch_input_shape': inputs.shape, - 'kernel_regularizer': regularizers.L1L2(l1=0.01), - 'recurrent_regularizer': regularizers.L1L2(l1=0.01), - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'kernel_constraint': 'max_norm', - 'recurrent_constraint': 'max_norm', - 'bias_constraint': 'max_norm', - 'padding': 'same'} - - layer = convolutional_recurrent.ConvLSTM2D(**kwargs) - layer.build(inputs.shape) - assert len(layer.losses) == 3 - assert layer.activity_regularizer - output = layer(K.variable(np.ones(inputs.shape))) - assert len(layer.losses) == 4 - K.eval(output) - - # check dropout - layer_test(convolutional_recurrent.ConvLSTM2D, - kwargs={'data_format': data_format, - 'return_sequences': return_sequences, - 'filters': filters, - 'kernel_size': (num_row, num_col), - 'padding': 'same', - 'dropout': 0.1, - 'recurrent_dropout': 0.1}, - input_shape=inputs.shape) - - # check state initialization - layer = convolutional_recurrent.ConvLSTM2D( - filters=filters, kernel_size=(num_row, num_col), - data_format=data_format, return_sequences=return_sequences) - layer.build(inputs.shape) - x = Input(batch_shape=inputs.shape) - initial_state = layer.get_initial_state(x) - y = layer(x, initial_state=initial_state) - model = Model(x, y) - assert (model.predict(inputs).shape == - layer.compute_output_shape(inputs.shape)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras.utils.test_utils import layer_test -from keras import backend as K -from keras.layers import convolutional -from keras.models import Sequential -from keras.backend import load_backend - - -# TensorFlow does not support full convolution. -if K.backend() == 'theano': - _convolution_paddings = ['valid', 'same', 'full'] -else: - _convolution_paddings = ['valid', 'same'] - - -@pytest.mark.skipif((K.backend() == 'cntk' and load_backend.dev.type() == 0), - reason='cntk only support dilated conv on GPU') -@pytest.mark.parametrize( - 'layer_kwargs,input_length,expected_output', - [ - # Causal - ({'filters': 1, 'kernel_size': 2, 'dilation_rate': 1, 'padding': 'causal', - 'kernel_initializer': 'ones', 'use_bias': False}, - 4, [[[0], [1], [3], [5]]]), - # Non-causal - ({'filters': 1, 'kernel_size': 2, 'dilation_rate': 1, 'padding': 'valid', - 'kernel_initializer': 'ones', 'use_bias': False}, - 4, [[[1], [3], [5]]]), - # Causal dilated with larger kernel size - ({'filters': 1, 'kernel_size': 3, 'dilation_rate': 2, 'padding': 'causal', - 'kernel_initializer': 'ones', 'use_bias': False}, - 10, np.float32([[[0], [1], [2], [4], [6], [9], [12], [15], [18], [21]]])), - ] -) -def test_causal_dilated_conv(layer_kwargs, input_length, expected_output): - input_data = np.reshape(np.arange(input_length, dtype='float32'), - (1, input_length, 1)) - layer_test(convolutional.Conv1D, input_data=input_data, - kwargs=layer_kwargs, expected_output=expected_output) - - -@pytest.mark.parametrize( - 'padding,strides', - [(padding, strides) - for padding in _convolution_paddings - for strides in [1, 2] - if not (padding == 'same' and strides != 1)] -) -def test_conv_1d(padding, strides): - batch_size = 2 - steps = 8 - input_dim = 2 - kernel_size = 3 - filters = 3 - - layer_test(convolutional.Conv1D, - kwargs={'filters': filters, - 'kernel_size': kernel_size, - 'padding': padding, - 'strides': strides}, - input_shape=(batch_size, steps, input_dim)) - - layer_test(convolutional.Conv1D, - kwargs={'filters': filters, - 'kernel_size': kernel_size, - 'padding': padding, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'kernel_constraint': 'max_norm', - 'bias_constraint': 'max_norm', - 'strides': strides}, - input_shape=(batch_size, steps, input_dim)) - - -@pytest.mark.skipif((K.backend() == 'cntk' and load_backend.dev.type() == 0), - reason='cntk only support dilated conv on GPU') -def test_conv_1d_dilation(): - batch_size = 2 - steps = 8 - input_dim = 2 - kernel_size = 3 - filters = 3 - padding = _convolution_paddings[-1] - - layer_test(convolutional.Conv1D, - kwargs={'filters': filters, - 'kernel_size': kernel_size, - 'padding': padding, - 'dilation_rate': 2}, - input_shape=(batch_size, steps, input_dim)) - - -def test_conv_1d_channels_first(): - batch_size = 2 - steps = 8 - input_dim = 2 - kernel_size = 3 - filters = 3 - - layer_test(convolutional.Conv1D, - kwargs={'filters': filters, - 'kernel_size': kernel_size, - 'data_format': 'channels_first'}, - input_shape=(batch_size, input_dim, steps)) - - -@pytest.mark.parametrize( - 'strides,padding', - [(strides, padding) - for padding in _convolution_paddings - for strides in [(1, 1), (2, 2)] - if not (padding == 'same' and strides != (1, 1))] -) -def test_convolution_2d(strides, padding): - num_samples = 2 - filters = 2 - stack_size = 3 - kernel_size = (3, 2) - num_row = 7 - num_col = 6 - - layer_test(convolutional.Conv2D, - kwargs={'filters': filters, - 'kernel_size': kernel_size, - 'padding': padding, - 'strides': strides, - 'data_format': 'channels_first'}, - input_shape=(num_samples, stack_size, num_row, num_col)) - - -def test_convolution_2d_channels_last(): - num_samples = 2 - filters = 2 - stack_size = 3 - num_row = 7 - num_col = 6 - padding = 'valid' - strides = (2, 2) - - layer_test(convolutional.Conv2D, - kwargs={'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'data_format': 'channels_last', - 'activation': None, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'kernel_constraint': 'max_norm', - 'bias_constraint': 'max_norm', - 'strides': strides}, - input_shape=(num_samples, num_row, num_col, stack_size)) - - -@pytest.mark.skipif((K.backend() == 'cntk' and load_backend.dev.type() == 0), - reason='cntk only supports dilated conv on GPU') -def test_convolution_2d_dilation(): - num_samples = 2 - filters = 2 - stack_size = 3 - kernel_size = (3, 2) - num_row = 7 - num_col = 6 - padding = 'valid' - - layer_test(convolutional.Conv2D, - kwargs={'filters': filters, - 'kernel_size': kernel_size, - 'padding': padding, - 'dilation_rate': (2, 2)}, - input_shape=(num_samples, num_row, num_col, stack_size)) - - -def test_convolution_2d_invalid(): - filters = 2 - padding = _convolution_paddings[-1] - kernel_size = (3, 2) - - with pytest.raises(ValueError): - model = Sequential([convolutional.Conv2D( - filters=filters, kernel_size=kernel_size, padding=padding, - batch_input_shape=(None, None, 5, None))]) - - -@pytest.mark.parametrize( - 'padding,out_padding,strides', - [(padding, out_padding, strides) - for padding in _convolution_paddings - for out_padding in [None, (0, 0), (1, 1)] - for strides in [(1, 1), (2, 2)] - if (not (padding == 'same' and strides != (1, 1)) - and not(strides == (1, 1) and out_padding == (1, 1)))] -) -def test_conv2d_transpose(padding, out_padding, strides): - num_samples = 2 - filters = 2 - stack_size = 3 - num_row = 5 - num_col = 6 - - layer_test(convolutional.Conv2DTranspose, - kwargs={'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'output_padding': out_padding, - 'strides': strides, - 'data_format': 'channels_last'}, - input_shape=(num_samples, num_row, num_col, stack_size), - fixed_batch_size=True) - - -@pytest.mark.skipif((K.backend() == 'cntk' and load_backend.dev.type() == 0), - reason='cntk only supports dilated conv transpose on GPU') -def test_conv2d_transpose_dilation(): - - layer_test(convolutional.Conv2DTranspose, - kwargs={'filters': 2, - 'kernel_size': 3, - 'padding': 'same', - 'data_format': 'channels_last', - 'dilation_rate': (2, 2)}, - input_shape=(2, 5, 6, 3)) - - # Check dilated conv transpose returns expected output - input_data = np.arange(48).reshape((1, 4, 4, 3)).astype(np.float32) - expected_output = np.float32([[192, 228, 192, 228], - [336, 372, 336, 372], - [192, 228, 192, 228], - [336, 372, 336, 372]]).reshape((1, 4, 4, 1)) - - layer_test(convolutional.Conv2DTranspose, - input_data=input_data, - kwargs={'filters': 1, - 'kernel_size': 3, - 'padding': 'same', - 'data_format': 'channels_last', - 'dilation_rate': (2, 2), - 'kernel_initializer': 'ones'}, - expected_output=expected_output) - - -def test_conv2d_transpose_channels_first(): - num_samples = 2 - filters = 2 - stack_size = 3 - num_row = 5 - num_col = 6 - padding = 'valid' - strides = (2, 2) - - layer_test(convolutional.Conv2DTranspose, - kwargs={'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'data_format': 'channels_first', - 'activation': None, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'kernel_constraint': 'max_norm', - 'bias_constraint': 'max_norm', - 'strides': strides}, - input_shape=(num_samples, stack_size, num_row, num_col), - fixed_batch_size=True) - - -def test_conv2d_transpose_invalid(): - filters = 2 - stack_size = 3 - num_row = 5 - num_col = 6 - padding = 'valid' - - with pytest.raises(ValueError): - model = Sequential([convolutional.Conv2DTranspose( - filters=filters, - kernel_size=3, - padding=padding, - use_bias=True, - batch_input_shape=(None, None, 5, None))]) - - # Test invalid output padding for given stride. Output padding equal to stride - with pytest.raises(ValueError): - model = Sequential([convolutional.Conv2DTranspose( - filters=filters, - kernel_size=3, - padding=padding, - output_padding=(0, 3), - strides=(1, 3), - batch_input_shape=(None, num_row, num_col, stack_size))]) - - # Output padding greater than stride - with pytest.raises(ValueError): - model = Sequential([convolutional.Conv2DTranspose( - filters=filters, - kernel_size=3, - padding=padding, - output_padding=(2, 2), - strides=(1, 3), - batch_input_shape=(None, num_row, num_col, stack_size))]) - - -@pytest.mark.parametrize( - 'padding,strides,multiplier,dilation_rate', - [(padding, strides, multiplier, dilation_rate) - for padding in _convolution_paddings - for strides in [1, 2] - for multiplier in [1, 2] - for dilation_rate in [1, 2] - if (not (padding == 'same' and strides != 1) - and not (dilation_rate != 1 and strides != 1) - and not (dilation_rate != 1 and K.backend() == 'cntk'))] -) -def test_separable_conv_1d(padding, strides, multiplier, dilation_rate): - num_samples = 2 - filters = 6 - stack_size = 3 - num_step = 9 - - layer_test(convolutional.SeparableConv1D, - kwargs={'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'strides': strides, - 'depth_multiplier': multiplier, - 'dilation_rate': dilation_rate}, - input_shape=(num_samples, num_step, stack_size)) - - -def test_separable_conv_1d_additional_args(): - num_samples = 2 - filters = 6 - stack_size = 3 - num_step = 9 - padding = 'valid' - multiplier = 2 - - layer_test(convolutional.SeparableConv1D, - kwargs={'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'data_format': 'channels_first', - 'activation': None, - 'depthwise_regularizer': 'l2', - 'pointwise_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'pointwise_constraint': 'unit_norm', - 'depthwise_constraint': 'unit_norm', - 'strides': 1, - 'use_bias': True, - 'depth_multiplier': multiplier}, - input_shape=(num_samples, stack_size, num_step)) - - -def test_separable_conv_1d_invalid(): - filters = 6 - padding = 'valid' - with pytest.raises(ValueError): - model = Sequential([convolutional.SeparableConv1D( - filters=filters, kernel_size=3, padding=padding, - batch_input_shape=(None, 5, None))]) - - -@pytest.mark.parametrize( - 'padding,strides,multiplier,dilation_rate', - [(padding, strides, multiplier, dilation_rate) - for padding in _convolution_paddings - for strides in [(1, 1), (2, 2)] - for multiplier in [1, 2] - for dilation_rate in [(1, 1), (2, 2), (2, 1), (1, 2)] - if (not (padding == 'same' and strides != (1, 1)) - and not (dilation_rate != (1, 1) and strides != (1, 1)) - and not (dilation_rate != (1, 1) and multiplier == dilation_rate[0]) - and not (dilation_rate != (1, 1) and K.backend() == 'cntk'))] -) -def test_separable_conv_2d(padding, strides, multiplier, dilation_rate): - num_samples = 2 - filters = 6 - stack_size = 3 - num_row = 7 - num_col = 6 - - layer_test( - convolutional.SeparableConv2D, - kwargs={'filters': filters, - 'kernel_size': (3, 3), - 'padding': padding, - 'strides': strides, - 'depth_multiplier': multiplier, - 'dilation_rate': dilation_rate}, - input_shape=(num_samples, num_row, num_col, stack_size)) - - -def test_separable_conv_2d_additional_args(): - num_samples = 2 - filters = 6 - stack_size = 3 - num_row = 7 - num_col = 6 - padding = 'valid' - strides = (2, 2) - multiplier = 2 - - layer_test(convolutional.SeparableConv2D, - kwargs={'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'data_format': 'channels_first', - 'activation': None, - 'depthwise_regularizer': 'l2', - 'pointwise_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'pointwise_constraint': 'unit_norm', - 'depthwise_constraint': 'unit_norm', - 'strides': strides, - 'depth_multiplier': multiplier}, - input_shape=(num_samples, stack_size, num_row, num_col)) - - -def test_separable_conv_2d_invalid(): - filters = 6 - padding = 'valid' - with pytest.raises(ValueError): - model = Sequential([convolutional.SeparableConv2D( - filters=filters, kernel_size=3, padding=padding, - batch_input_shape=(None, None, 5, None))]) - - -@pytest.mark.parametrize( - 'padding,strides,multiplier,dilation_rate', - [(padding, strides, multiplier, dilation_rate) - for padding in _convolution_paddings - for strides in [(1, 1), (2, 2)] - for multiplier in [1, 2] - for dilation_rate in [(1, 1), (2, 2), (2, 1), (1, 2)] - if (not (padding == 'same' and strides != (1, 1)) - and not (dilation_rate != (1, 1) and strides != (1, 1)) - and not (dilation_rate != (1, 1) and multiplier == dilation_rate[0]) - and not (dilation_rate != (1, 1) and K.backend() == 'cntk'))] -) -def test_depthwise_conv_2d(padding, strides, multiplier, dilation_rate): - num_samples = 2 - stack_size = 3 - num_row = 7 - num_col = 6 - - layer_test(convolutional.DepthwiseConv2D, - kwargs={'kernel_size': (3, 3), - 'padding': padding, - 'strides': strides, - 'depth_multiplier': multiplier, - 'dilation_rate': dilation_rate}, - input_shape=(num_samples, - num_row, - num_col, - stack_size)) - - -def test_depthwise_conv_2d_additional_args(): - num_samples = 2 - stack_size = 3 - num_row = 7 - num_col = 6 - padding = 'valid' - strides = (2, 2) - multiplier = 2 - - layer_test(convolutional.DepthwiseConv2D, - kwargs={'kernel_size': 3, - 'padding': padding, - 'data_format': 'channels_first', - 'activation': None, - 'depthwise_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'depthwise_constraint': 'unit_norm', - 'use_bias': True, - 'strides': strides, - 'depth_multiplier': multiplier}, - input_shape=(num_samples, stack_size, num_row, num_col)) - - -def test_depthwise_conv_2d_invalid(): - padding = 'valid' - with pytest.raises(ValueError): - Sequential([convolutional.DepthwiseConv2D( - kernel_size=3, - padding=padding, - batch_input_shape=(None, None, 5, None))]) - - -@pytest.mark.parametrize( - 'padding,strides', - [(padding, strides) - for padding in _convolution_paddings - for strides in [(1, 1, 1), (2, 2, 2)] - if not (padding == 'same' and strides != (1, 1, 1))] -) -def test_convolution_3d(padding, strides): - num_samples = 2 - filters = 2 - stack_size = 3 - - input_len_dim1 = 9 - input_len_dim2 = 8 - input_len_dim3 = 8 - - layer_test(convolutional.Convolution3D, - kwargs={'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'strides': strides}, - input_shape=(num_samples, - input_len_dim1, input_len_dim2, input_len_dim3, - stack_size)) - - -def test_convolution_3d_additional_args(): - num_samples = 2 - filters = 2 - stack_size = 3 - padding = 'valid' - strides = (2, 2, 2) - - input_len_dim1 = 9 - input_len_dim2 = 8 - input_len_dim3 = 8 - - layer_test(convolutional.Convolution3D, - kwargs={'filters': filters, - 'kernel_size': (1, 2, 3), - 'padding': padding, - 'activation': None, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'kernel_constraint': 'max_norm', - 'bias_constraint': 'max_norm', - 'strides': strides}, - input_shape=(num_samples, - input_len_dim1, input_len_dim2, input_len_dim3, - stack_size)) - - -@pytest.mark.parametrize( - 'padding,out_padding,strides,data_format', - [(padding, out_padding, strides, data_format) - for padding in _convolution_paddings - for out_padding in [None, (0, 0, 0), (1, 1, 1)] - for strides in [(1, 1, 1), (2, 2, 2)] - for data_format in ['channels_first', 'channels_last'] - if (not (padding == 'same' and strides != (1, 1, 1)) - and not (strides == (1, 1, 1) and out_padding == (1, 1, 1)))] -) -def test_conv3d_transpose(padding, out_padding, strides, data_format): - filters = 2 - stack_size = 3 - num_depth = 7 - num_row = 5 - num_col = 6 - - layer_test( - convolutional.Conv3DTranspose, - kwargs={'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'output_padding': out_padding, - 'strides': strides, - 'data_format': data_format}, - input_shape=(None, num_depth, num_row, num_col, stack_size), - fixed_batch_size=True) - - -def test_conv3d_transpose_additional_args(): - filters = 2 - stack_size = 3 - num_depth = 7 - num_row = 5 - num_col = 6 - padding = 'valid' - strides = (2, 2, 2) - - layer_test(convolutional.Conv3DTranspose, - kwargs={'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'data_format': 'channels_first', - 'activation': None, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'kernel_constraint': 'max_norm', - 'bias_constraint': 'max_norm', - 'use_bias': True, - 'strides': strides}, - input_shape=(None, stack_size, num_depth, num_row, num_col), - fixed_batch_size=True) - - -def test_conv3d_transpose_invalid(): - filters = 2 - stack_size = 3 - num_depth = 7 - num_row = 5 - num_col = 6 - padding = 'valid' - - # Test invalid use case - with pytest.raises(ValueError): - model = Sequential([convolutional.Conv3DTranspose( - filters=filters, - kernel_size=3, - padding=padding, - batch_input_shape=(None, None, 5, None, None))]) - - # Test invalid output padding for given stride. Output padding equal - # to stride - with pytest.raises(ValueError): - model = Sequential([convolutional.Conv3DTranspose( - filters=filters, - kernel_size=3, - padding=padding, - output_padding=(0, 3, 3), - strides=(1, 3, 4), - batch_input_shape=(None, num_depth, num_row, num_col, stack_size))]) - - # Output padding greater than stride - with pytest.raises(ValueError): - model = Sequential([convolutional.Conv3DTranspose( - filters=filters, - kernel_size=3, - padding=padding, - output_padding=(2, 2, 3), - strides=(1, 3, 4), - batch_input_shape=(None, num_depth, num_row, num_col, stack_size))]) - - -def test_zero_padding_1d(): - num_samples = 2 - input_dim = 2 - num_steps = 5 - shape = (num_samples, num_steps, input_dim) - inputs = np.ones(shape) - - # basic test - layer_test(convolutional.ZeroPadding1D, - kwargs={'padding': 2}, - input_shape=inputs.shape) - layer_test(convolutional.ZeroPadding1D, - kwargs={'padding': (1, 2)}, - input_shape=inputs.shape) - - # correctness test - layer = convolutional.ZeroPadding1D(padding=2) - layer.build(shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - for offset in [0, 1, -1, -2]: - assert_allclose(np_output[:, offset, :], 0.) - assert_allclose(np_output[:, 2:-2, :], 1.) - - layer = convolutional.ZeroPadding1D(padding=(1, 2)) - layer.build(shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - for left_offset in [0]: - assert_allclose(np_output[:, left_offset, :], 0.) - for right_offset in [-1, -2]: - assert_allclose(np_output[:, right_offset, :], 0.) - assert_allclose(np_output[:, 1:-2, :], 1.) - layer.get_config() - - -@pytest.mark.parametrize( - 'data_format,padding', - [(data_format, padding) - for data_format in ['channels_first', 'channels_last'] - for padding in [(2, 2), ((1, 2), (3, 4))]] -) -def test_zero_padding_2d(data_format, padding): - num_samples = 2 - stack_size = 2 - input_num_row = 4 - input_num_col = 5 - - if data_format == 'channels_last': - inputs = np.ones((num_samples, input_num_row, - input_num_col, stack_size)) - else: - inputs = np.ones( - (num_samples, stack_size, input_num_row, input_num_col)) - - layer_test(convolutional.ZeroPadding2D, - kwargs={'padding': padding, 'data_format': data_format}, - input_shape=inputs.shape) - - -@pytest.mark.parametrize('data_format', - ['channels_first', 'channels_last']) -def test_zero_padding_2d_correctness(data_format): - num_samples = 2 - stack_size = 2 - input_num_row = 4 - input_num_col = 5 - inputs = np.ones((num_samples, stack_size, input_num_row, input_num_col)) - - layer = convolutional.ZeroPadding2D(padding=(2, 2), - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - if data_format == 'channels_last': - for offset in [0, 1, -1, -2]: - assert_allclose(np_output[:, offset, :, :], 0.) - assert_allclose(np_output[:, :, offset, :], 0.) - assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.) - elif data_format == 'channels_first': - for offset in [0, 1, -1, -2]: - assert_allclose(np_output[:, :, offset, :], 0.) - assert_allclose(np_output[:, :, :, offset], 0.) - assert_allclose(np_output[:, 2:-2, 2:-2, :], 1.) - - layer = convolutional.ZeroPadding2D(padding=((1, 2), (3, 4)), - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - if data_format == 'channels_last': - for top_offset in [0]: - assert_allclose(np_output[:, top_offset, :, :], 0.) - for bottom_offset in [-1, -2]: - assert_allclose(np_output[:, bottom_offset, :, :], 0.) - for left_offset in [0, 1, 2]: - assert_allclose(np_output[:, :, left_offset, :], 0.) - for right_offset in [-1, -2, -3, -4]: - assert_allclose(np_output[:, :, right_offset, :], 0.) - assert_allclose(np_output[:, 1:-2, 3:-4, :], 1.) - elif data_format == 'channels_first': - for top_offset in [0]: - assert_allclose(np_output[:, :, top_offset, :], 0.) - for bottom_offset in [-1, -2]: - assert_allclose(np_output[:, :, bottom_offset, :], 0.) - for left_offset in [0, 1, 2]: - assert_allclose(np_output[:, :, :, left_offset], 0.) - for right_offset in [-1, -2, -3, -4]: - assert_allclose(np_output[:, :, :, right_offset], 0.) - assert_allclose(np_output[:, :, 1:-2, 3:-4], 1.) - - -@pytest.mark.parametrize( - 'data_format,padding', - [(data_format, padding) - for data_format in ['channels_first', 'channels_last'] - for padding in [(2, 2, 2), ((1, 2), (3, 4), (0, 2))]] -) -def test_zero_padding_3d(data_format, padding): - num_samples = 2 - stack_size = 2 - input_len_dim1 = 4 - input_len_dim2 = 5 - input_len_dim3 = 3 - inputs = np.ones((num_samples, - input_len_dim1, input_len_dim2, input_len_dim3, - stack_size)) - - layer_test(convolutional.ZeroPadding3D, - kwargs={'padding': padding, 'data_format': data_format}, - input_shape=inputs.shape) - - -@pytest.mark.parametrize('data_format', - ['channels_first', 'channels_last']) -def test_zero_padding_3d_correctness(data_format): - num_samples = 2 - stack_size = 2 - input_len_dim1 = 4 - input_len_dim2 = 5 - input_len_dim3 = 3 - inputs = np.ones((num_samples, - input_len_dim1, input_len_dim2, input_len_dim3, - stack_size)) - - layer = convolutional.ZeroPadding3D(padding=(2, 2, 2), - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - if data_format == 'channels_last': - for offset in [0, 1, -1, -2]: - assert_allclose(np_output[:, offset, :, :, :], 0.) - assert_allclose(np_output[:, :, offset, :, :], 0.) - assert_allclose(np_output[:, :, :, offset, :], 0.) - assert_allclose(np_output[:, 2:-2, 2:-2, 2:-2, :], 1.) - elif data_format == 'channels_first': - for offset in [0, 1, -1, -2]: - assert_allclose(np_output[:, :, offset, :, :], 0.) - assert_allclose(np_output[:, :, :, offset, :], 0.) - assert_allclose(np_output[:, :, :, :, offset], 0.) - assert_allclose(np_output[:, :, 2:-2, 2:-2, 2:-2], 1.) - - layer = convolutional.ZeroPadding3D(padding=((1, 2), (3, 4), (0, 2)), - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - if data_format == 'channels_last': - for dim1_offset in [0, -1, -2]: - assert_allclose(np_output[:, dim1_offset, :, :, :], 0.) - for dim2_offset in [0, 1, 2, -1, -2, -3, -4]: - assert_allclose(np_output[:, :, dim2_offset, :, :], 0.) - for dim3_offset in [-1, -2]: - assert_allclose(np_output[:, :, :, dim3_offset, :], 0.) - assert_allclose(np_output[:, 1:-2, 3:-4, 0:-2, :], 1.) - elif data_format == 'channels_first': - for dim1_offset in [0, -1, -2]: - assert_allclose(np_output[:, :, dim1_offset, :, :], 0.) - for dim2_offset in [0, 1, 2, -1, -2, -3, -4]: - assert_allclose(np_output[:, :, :, dim2_offset, :], 0.) - for dim3_offset in [-1, -2]: - assert_allclose(np_output[:, :, :, :, dim3_offset], 0.) - assert_allclose(np_output[:, :, 1:-2, 3:-4, 0:-2], 1.) - - -def test_upsampling_1d(): - layer_test(convolutional.UpSampling1D, - kwargs={'size': 2}, - input_shape=(3, 5, 4)) - - -@pytest.mark.parametrize('data_format', - ['channels_first', 'channels_last']) -def test_upsampling_2d(data_format): - num_samples = 2 - stack_size = 2 - input_num_row = 11 - input_num_col = 12 - - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, input_num_row, - input_num_col) - else: # tf - inputs = np.random.rand(num_samples, input_num_row, input_num_col, - stack_size) - - # basic test - layer_test(convolutional.UpSampling2D, - kwargs={'size': (2, 2), 'data_format': data_format}, - input_shape=inputs.shape) - - for length_row in [2]: - for length_col in [2, 3]: - layer = convolutional.UpSampling2D( - size=(length_row, length_col), - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - if data_format == 'channels_first': - assert np_output.shape[2] == length_row * input_num_row - assert np_output.shape[3] == length_col * input_num_col - else: # tf - assert np_output.shape[1] == length_row * input_num_row - assert np_output.shape[2] == length_col * input_num_col - - # compare with numpy - if data_format == 'channels_first': - expected_out = np.repeat(inputs, length_row, axis=2) - expected_out = np.repeat(expected_out, length_col, axis=3) - else: # tf - expected_out = np.repeat(inputs, length_row, axis=1) - expected_out = np.repeat(expected_out, length_col, axis=2) - - assert_allclose(np_output, expected_out) - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason='cntk does not support it yet') -@pytest.mark.parametrize('data_format', - ['channels_first', 'channels_last']) -def test_upsampling_2d_bilinear(data_format): - num_samples = 2 - stack_size = 2 - input_num_row = 11 - input_num_col = 12 - - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, input_num_row, - input_num_col) - else: # tf - inputs = np.random.rand(num_samples, input_num_row, input_num_col, - stack_size) - - # basic test - layer_test(convolutional.UpSampling2D, - kwargs={'size': (2, 2), - 'data_format': data_format, - 'interpolation': 'bilinear'}, - input_shape=inputs.shape) - - for length_row in [2]: - for length_col in [2, 3]: - layer = convolutional.UpSampling2D( - size=(length_row, length_col), - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - if data_format == 'channels_first': - assert np_output.shape[2] == length_row * input_num_row - assert np_output.shape[3] == length_col * input_num_col - else: # tf - assert np_output.shape[1] == length_row * input_num_row - assert np_output.shape[2] == length_col * input_num_col - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason="cntk does not support it yet") -@pytest.mark.parametrize('data_format', - ['channels_first', 'channels_last']) -def test_upsampling_3d(data_format): - num_samples = 2 - stack_size = 2 - input_len_dim1 = 10 - input_len_dim2 = 11 - input_len_dim3 = 12 - - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, - stack_size, - input_len_dim1, input_len_dim2, input_len_dim3) - else: # tf - inputs = np.random.rand(num_samples, - input_len_dim1, input_len_dim2, input_len_dim3, - stack_size) - - # basic test - layer_test(convolutional.UpSampling3D, - kwargs={'size': (2, 2, 2), 'data_format': data_format}, - input_shape=inputs.shape) - - for length_dim1 in [2, 3]: - for length_dim2 in [2]: - for length_dim3 in [3]: - layer = convolutional.UpSampling3D( - size=(length_dim1, length_dim2, length_dim3), - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - if data_format == 'channels_first': - assert np_output.shape[2] == length_dim1 * input_len_dim1 - assert np_output.shape[3] == length_dim2 * input_len_dim2 - assert np_output.shape[4] == length_dim3 * input_len_dim3 - else: # tf - assert np_output.shape[1] == length_dim1 * input_len_dim1 - assert np_output.shape[2] == length_dim2 * input_len_dim2 - assert np_output.shape[3] == length_dim3 * input_len_dim3 - - # compare with numpy - if data_format == 'channels_first': - expected_out = np.repeat(inputs, length_dim1, axis=2) - expected_out = np.repeat(expected_out, length_dim2, axis=3) - expected_out = np.repeat(expected_out, length_dim3, axis=4) - else: # tf - expected_out = np.repeat(inputs, length_dim1, axis=1) - expected_out = np.repeat(expected_out, length_dim2, axis=2) - expected_out = np.repeat(expected_out, length_dim3, axis=3) - - assert_allclose(np_output, expected_out) - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason="cntk does not support slice to 0 dimension") -def test_cropping_1d(): - num_samples = 2 - time_length = 4 - input_len_dim1 = 2 - inputs = np.random.rand(num_samples, time_length, input_len_dim1) - - layer_test(convolutional.Cropping1D, - kwargs={'cropping': (2, 2)}, - input_shape=inputs.shape) - - -def test_cropping_2d(): - num_samples = 2 - stack_size = 2 - input_len_dim1 = 9 - input_len_dim2 = 9 - cropping = ((2, 2), (3, 3)) - - for data_format in ['channels_first', 'channels_last']: - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, - input_len_dim1, input_len_dim2) - else: - inputs = np.random.rand(num_samples, - input_len_dim1, input_len_dim2, - stack_size) - # basic test - layer_test(convolutional.Cropping2D, - kwargs={'cropping': cropping, - 'data_format': data_format}, - input_shape=inputs.shape) - # correctness test - layer = convolutional.Cropping2D(cropping=cropping, - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - # compare with numpy - if data_format == 'channels_first': - expected_out = inputs[:, - :, - cropping[0][0]: -cropping[0][1], - cropping[1][0]: -cropping[1][1]] - else: - expected_out = inputs[:, - cropping[0][0]: -cropping[0][1], - cropping[1][0]: -cropping[1][1], - :] - assert_allclose(np_output, expected_out) - - for data_format in ['channels_first', 'channels_last']: - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, - input_len_dim1, input_len_dim2) - else: - inputs = np.random.rand(num_samples, - input_len_dim1, input_len_dim2, - stack_size) - # another correctness test (no cropping) - cropping = ((0, 0), (0, 0)) - layer = convolutional.Cropping2D(cropping=cropping, - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - # compare with input - assert_allclose(np_output, inputs) - - # Test invalid use cases - with pytest.raises(ValueError): - layer = convolutional.Cropping2D(cropping=((1, 1),)) - with pytest.raises(ValueError): - layer = convolutional.Cropping2D(cropping=lambda x: x) - - -def test_cropping_3d(): - num_samples = 2 - stack_size = 2 - input_len_dim1 = 8 - input_len_dim2 = 8 - input_len_dim3 = 8 - cropping = ((2, 2), (3, 3), (2, 3)) - - for data_format in ['channels_last', 'channels_first']: - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, - input_len_dim1, input_len_dim2, input_len_dim3) - else: - inputs = np.random.rand(num_samples, - input_len_dim1, input_len_dim2, - input_len_dim3, stack_size) - # basic test - layer_test(convolutional.Cropping3D, - kwargs={'cropping': cropping, - 'data_format': data_format}, - input_shape=inputs.shape) - # correctness test - layer = convolutional.Cropping3D(cropping=cropping, - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - # compare with numpy - if data_format == 'channels_first': - expected_out = inputs[:, - :, - cropping[0][0]: -cropping[0][1], - cropping[1][0]: -cropping[1][1], - cropping[2][0]: -cropping[2][1]] - else: - expected_out = inputs[:, - cropping[0][0]: -cropping[0][1], - cropping[1][0]: -cropping[1][1], - cropping[2][0]: -cropping[2][1], - :] - assert_allclose(np_output, expected_out) - - for data_format in ['channels_last', 'channels_first']: - if data_format == 'channels_first': - inputs = np.random.rand(num_samples, stack_size, - input_len_dim1, input_len_dim2, input_len_dim3) - else: - inputs = np.random.rand(num_samples, - input_len_dim1, input_len_dim2, - input_len_dim3, stack_size) - # another correctness test (no cropping) - cropping = ((0, 0), (0, 0), (0, 0)) - layer = convolutional.Cropping3D(cropping=cropping, - data_format=data_format) - layer.build(inputs.shape) - outputs = layer(K.variable(inputs)) - np_output = K.eval(outputs) - # compare with input - assert_allclose(np_output, inputs) - - # Test invalid use cases - with pytest.raises(ValueError): - layer = convolutional.Cropping3D(cropping=((1, 1),)) - with pytest.raises(ValueError): - layer = convolutional.Cropping3D(cropping=lambda x: x) - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason='CNTK does not support float64') -@pytest.mark.parametrize( - 'input_shape,conv_class', - [((2, 4, 2), convolutional.Conv1D), - ((2, 4, 4, 2), convolutional.Conv2D), - ((2, 4, 4, 4, 2), convolutional.Conv3D)] -) -def test_conv_float64(input_shape, conv_class): - kernel_size = 3 - strides = 1 - filters = 3 - K.set_floatx('float64') - layer_test(conv_class, - kwargs={'filters': filters, - 'kernel_size': kernel_size, - 'padding': 'valid', - 'strides': strides}, - input_shape=input_shape) - K.set_floatx('float32') - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras import backend as K -from keras import layers -from keras.models import Model -from keras.models import Sequential -from keras.utils.test_utils import layer_test -from keras import regularizers -from keras import constraints -from keras.layers import deserialize as deserialize_layer - - -def test_masking(): - layer_test(layers.Masking, - kwargs={}, - input_shape=(3, 2, 3)) - - -def test_dropout(): - layer_test(layers.Dropout, - kwargs={'rate': 0.5}, - input_shape=(3, 2)) - - layer_test(layers.Dropout, - kwargs={'rate': 0.5, 'noise_shape': [3, 1]}, - input_shape=(3, 2)) - - layer_test(layers.Dropout, - kwargs={'rate': 0.5, 'noise_shape': [None, 1]}, - input_shape=(3, 2)) - - layer_test(layers.SpatialDropout1D, - kwargs={'rate': 0.5}, - input_shape=(2, 3, 4)) - - for data_format in ['channels_last', 'channels_first']: - for shape in [(4, 5), (4, 5, 6)]: - if data_format == 'channels_last': - input_shape = (2,) + shape + (3,) - else: - input_shape = (2, 3) + shape - if len(shape) == 2: - layer = layers.SpatialDropout2D - else: - layer = layers.SpatialDropout3D - layer_test(layer, - kwargs={'rate': 0.5, - 'data_format': data_format}, - input_shape=input_shape) - - # Test invalid use cases - with pytest.raises(ValueError): - layer_test(layer, - kwargs={'rate': 0.5, - 'data_format': 'channels_middle'}, - input_shape=input_shape) - - -def test_activation(): - # with string argument - layer_test(layers.Activation, - kwargs={'activation': 'relu'}, - input_shape=(3, 2)) - - # with function argument - layer_test(layers.Activation, - kwargs={'activation': K.relu}, - input_shape=(3, 2)) - - -@pytest.mark.parametrize('target_shape,input_shape', - [((8, 1), (3, 2, 4)), - ((-1, 1), (3, 2, 4)), - ((1, -1), (3, 2, 4)), - ((-1, 1), (None, None, 4))]) -def test_reshape(target_shape, input_shape): - layer_test(layers.Reshape, - kwargs={'target_shape': target_shape}, - input_shape=input_shape) - - -def test_permute(): - layer_test(layers.Permute, - kwargs={'dims': (2, 1)}, - input_shape=(3, 2, 4)) - - -def test_flatten(): - - def test_4d(): - np_inp_channels_last = np.arange(24, dtype='float32').reshape( - (1, 4, 3, 2)) - - np_output_cl = layer_test(layers.Flatten, - kwargs={'data_format': - 'channels_last'}, - input_data=np_inp_channels_last) - - np_inp_channels_first = np.transpose(np_inp_channels_last, - [0, 3, 1, 2]) - - np_output_cf = layer_test(layers.Flatten, - kwargs={'data_format': - 'channels_first'}, - input_data=np_inp_channels_first, - expected_output=np_output_cl) - - def test_3d(): - np_inp_channels_last = np.arange(12, dtype='float32').reshape( - (1, 4, 3)) - - np_output_cl = layer_test(layers.Flatten, - kwargs={'data_format': - 'channels_last'}, - input_data=np_inp_channels_last) - - np_inp_channels_first = np.transpose(np_inp_channels_last, - [0, 2, 1]) - - np_output_cf = layer_test(layers.Flatten, - kwargs={'data_format': - 'channels_first'}, - input_data=np_inp_channels_first, - expected_output=np_output_cl) - - def test_5d(): - np_inp_channels_last = np.arange(120, dtype='float32').reshape( - (1, 5, 4, 3, 2)) - - np_output_cl = layer_test(layers.Flatten, - kwargs={'data_format': - 'channels_last'}, - input_data=np_inp_channels_last) - - np_inp_channels_first = np.transpose(np_inp_channels_last, - [0, 4, 1, 2, 3]) - - np_output_cf = layer_test(layers.Flatten, - kwargs={'data_format': - 'channels_first'}, - input_data=np_inp_channels_first, - expected_output=np_output_cl) - test_3d() - test_4d() - test_5d() - - -def test_repeat_vector(): - layer_test(layers.RepeatVector, - kwargs={'n': 3}, - input_shape=(3, 2)) - - -def test_lambda(): - layer_test(layers.Lambda, - kwargs={'function': lambda x: x + 1}, - input_shape=(3, 2)) - - layer_test(layers.Lambda, - kwargs={'function': lambda x, a, b: x * a + b, - 'arguments': {'a': 0.6, 'b': 0.4}}, - input_shape=(3, 2)) - - def antirectifier(x): - x -= K.mean(x, axis=1, keepdims=True) - x = K.l2_normalize(x, axis=1) - pos = K.relu(x) - neg = K.relu(-x) - return K.concatenate([pos, neg], axis=1) - - def antirectifier_output_shape(input_shape): - shape = list(input_shape) - assert len(shape) == 2 # only valid for 2D tensors - shape[-1] *= 2 - return tuple(shape) - - layer_test(layers.Lambda, - kwargs={'function': antirectifier, - 'output_shape': antirectifier_output_shape}, - input_shape=(3, 2)) - - # test layer with multiple outputs - def test_multiple_outputs(): - def func(x): - return [x * 0.2, x * 0.3] - - def output_shape(input_shape): - return [input_shape, input_shape] - - def mask(inputs, mask=None): - return [None, None] - - i = layers.Input(shape=(3, 2, 1)) - o = layers.Lambda(function=func, - output_shape=output_shape, - mask=mask)(i) - - o1, o2 = o - assert o1._keras_shape == (None, 3, 2, 1) - assert o2._keras_shape == (None, 3, 2, 1) - - model = Model(i, o) - - x = np.random.random((4, 3, 2, 1)) - out1, out2 = model.predict(x) - assert out1.shape == (4, 3, 2, 1) - assert out2.shape == (4, 3, 2, 1) - assert_allclose(out1, x * 0.2, atol=1e-4) - assert_allclose(out2, x * 0.3, atol=1e-4) - - test_multiple_outputs() - - # test layer with multiple outputs and no - # explicit mask - def test_multiple_outputs_no_mask(): - def func(x): - return [x * 0.2, x * 0.3] - - def output_shape(input_shape): - return [input_shape, input_shape] - - i = layers.Input(shape=(3, 2, 1)) - o = layers.Lambda(function=func, - output_shape=output_shape)(i) - - assert o[0]._keras_shape == (None, 3, 2, 1) - assert o[1]._keras_shape == (None, 3, 2, 1) - - o = layers.add(o) - model = Model(i, o) - - i2 = layers.Input(shape=(3, 2, 1)) - o2 = model(i2) - model2 = Model(i2, o2) - - x = np.random.random((4, 3, 2, 1)) - out = model2.predict(x) - assert out.shape == (4, 3, 2, 1) - assert_allclose(out, x * 0.2 + x * 0.3, atol=1e-4) - - test_multiple_outputs_no_mask() - - def test_dtypes(): - def func(x): - if K.dtype(x) != 'float16': - raise TypeError('x dtype is not float16, it is', K.dtype(x)) - return x - - i = layers.Input(shape=(3, 2, 1), dtype='float16') - o = layers.Lambda(func) - _ = o(i) - assert o._input_dtypes == 'float16' - test_dtypes() - - # test serialization with function - def f(x): - return x + 1 - - ld = layers.Lambda(f) - config = ld.get_config() - ld = deserialize_layer({'class_name': 'Lambda', 'config': config}) - - # test with lambda - ld = layers.Lambda( - lambda x: K.concatenate([K.square(x), x]), - output_shape=lambda s: tuple(list(s)[:-1] + [2 * s[-1]])) - config = ld.get_config() - ld = layers.Lambda.from_config(config) - - # test serialization with output_shape function - def f(x): - return K.concatenate([K.square(x), x]) - - def f_shape(s): - return tuple(list(s)[:-1] + [2 * s[-1]]) - - ld = layers.Lambda(f, output_shape=f_shape) - config = ld.get_config() - ld = deserialize_layer({'class_name': 'Lambda', 'config': config}) - - -@pytest.mark.skipif((K.backend() == 'theano'), - reason="theano cannot compute " - "the output shape automatically.") -def test_lambda_output_shape(): - layer_test(layers.Lambda, - kwargs={'function': lambda x: K.mean(x, axis=-1)}, - input_shape=(3, 2, 4)) - - -def test_dense(): - layer_test(layers.Dense, - kwargs={'units': 3}, - input_shape=(3, 2)) - - layer_test(layers.Dense, - kwargs={'units': 3}, - input_shape=(3, 4, 2)) - - layer_test(layers.Dense, - kwargs={'units': 3}, - input_shape=(None, None, 2)) - - layer_test(layers.Dense, - kwargs={'units': 3}, - input_shape=(3, 4, 5, 2)) - - layer_test(layers.Dense, - kwargs={'units': 3, - 'kernel_regularizer': regularizers.l2(0.01), - 'bias_regularizer': regularizers.l1(0.01), - 'activity_regularizer': regularizers.L1L2(l1=0.01, l2=0.01), - 'kernel_constraint': constraints.MaxNorm(1), - 'bias_constraint': constraints.max_norm(1)}, - input_shape=(3, 2)) - - layer = layers.Dense(3, - kernel_regularizer=regularizers.l1(0.01), - bias_regularizer='l1') - layer.build((None, 4)) - assert len(layer.losses) == 2 - - -def test_activity_regularization(): - layer = layers.ActivityRegularization(l1=0.01, l2=0.01) - - # test in functional API - x = layers.Input(shape=(3,)) - z = layers.Dense(2)(x) - y = layer(z) - model = Model(x, y) - model.compile('rmsprop', 'mse') - - model.predict(np.random.random((2, 3))) - - # test serialization - model_config = model.get_config() - model = Model.from_config(model_config) - model.compile('rmsprop', 'mse') - - -def test_sequential_as_downstream_of_masking_layer(): - - inputs = layers.Input(shape=(3, 4)) - x = layers.Masking(mask_value=0., input_shape=(3, 4))(inputs) - s = Sequential() - s.add(layers.Dense(5, input_shape=(4,))) - s.add(layers.Activation('relu')) - x = layers.wrappers.TimeDistributed(s)(x) - model = Model(inputs=inputs, outputs=x) - model.compile(optimizer='rmsprop', loss='mse') - model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) - for i in range(4): - model_input[i, i:, :] = 0. - model.fit(model_input, - np.random.random((10, 3, 5)), epochs=1, batch_size=6) - - mask_outputs = [model.layers[1].compute_mask(model.layers[1].input)] - mask_outputs += [model.layers[2].compute_mask(model.layers[2].input, - mask_outputs[-1])] - func = K.function([model.input], mask_outputs) - mask_outputs_val = func([model_input]) - assert np.array_equal(mask_outputs_val[0], np.any(model_input, axis=-1)) - assert np.array_equal(mask_outputs_val[1], np.any(model_input, axis=-1)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose -import keras -import keras.backend as K -from keras.utils.test_utils import layer_test -import time - - -skipif_no_tf_gpu = pytest.mark.skipif( - (K.backend() != 'tensorflow' or - not K.tensorflow_backend._get_available_gpus()), - reason='Requires TensorFlow backend and a GPU') - - -@skipif_no_tf_gpu -def test_cudnn_rnn_canonical_to_params_lstm(): - units = 1 - input_size = 1 - layer = keras.layers.CuDNNLSTM(units) - layer.build((None, None, input_size)) - - params = layer._canonical_to_params( - weights=[ - layer.kernel_i, - layer.kernel_f, - layer.kernel_c, - layer.kernel_o, - layer.recurrent_kernel_i, - layer.recurrent_kernel_f, - layer.recurrent_kernel_c, - layer.recurrent_kernel_o, - ], - biases=[ - layer.bias_i_i, - layer.bias_f_i, - layer.bias_c_i, - layer.bias_o_i, - layer.bias_i, - layer.bias_f, - layer.bias_c, - layer.bias_o, - ], - ) - ref_params = layer._cudnn_lstm.canonical_to_params( - weights=[ - layer.kernel_i, - layer.kernel_f, - layer.kernel_c, - layer.kernel_o, - layer.recurrent_kernel_i, - layer.recurrent_kernel_f, - layer.recurrent_kernel_c, - layer.recurrent_kernel_o, - ], - biases=[ - layer.bias_i_i, - layer.bias_f_i, - layer.bias_c_i, - layer.bias_o_i, - layer.bias_i, - layer.bias_f, - layer.bias_c, - layer.bias_o, - ], - ) - ref_params_value = keras.backend.get_value(ref_params) - params_value = keras.backend.get_value(params) - diff = np.mean(ref_params_value - params_value) - assert diff < 1e-8 - - -@skipif_no_tf_gpu -def test_cudnn_rnn_canonical_to_params_gru(): - units = 7 - input_size = 9 - layer = keras.layers.CuDNNGRU(units) - layer.build((None, None, input_size)) - - ref_params = layer._cudnn_gru.canonical_to_params( - weights=[ - layer.kernel_r, - layer.kernel_z, - layer.kernel_h, - layer.recurrent_kernel_r, - layer.recurrent_kernel_z, - layer.recurrent_kernel_h, - ], - biases=[ - layer.bias_r_i, - layer.bias_z_i, - layer.bias_h_i, - layer.bias_r, - layer.bias_z, - layer.bias_h, - ], - ) - params = layer._canonical_to_params( - weights=[ - layer.kernel_r, - layer.kernel_z, - layer.kernel_h, - layer.recurrent_kernel_r, - layer.recurrent_kernel_z, - layer.recurrent_kernel_h, - ], - biases=[ - layer.bias_r_i, - layer.bias_z_i, - layer.bias_h_i, - layer.bias_r, - layer.bias_z, - layer.bias_h, - ], - ) - ref_params_value = keras.backend.get_value(ref_params) - params_value = keras.backend.get_value(params) - diff = np.mean(ref_params_value - params_value) - assert diff < 1e-8 - - -@pytest.mark.parametrize('rnn_type', ['lstm', 'gru'], ids=['LSTM', 'GRU']) -@skipif_no_tf_gpu -def test_cudnn_rnn_timing(rnn_type): - input_size = 1000 - timesteps = 60 - units = 256 - num_samples = 10000 - - times = [] - for use_cudnn in [True, False]: - start_time = time.time() - inputs = keras.layers.Input(shape=(None, input_size)) - if use_cudnn: - if rnn_type == 'lstm': - layer = keras.layers.CuDNNLSTM(units) - else: - layer = keras.layers.CuDNNGRU(units) - else: - if rnn_type == 'lstm': - layer = keras.layers.LSTM(units) - else: - layer = keras.layers.GRU(units) - outputs = layer(inputs) - - model = keras.models.Model(inputs, outputs) - model.compile('sgd', 'mse') - - x = np.random.random((num_samples, timesteps, input_size)) - y = np.random.random((num_samples, units)) - model.fit(x, y, epochs=4, batch_size=32) - - times.append(time.time() - start_time) - - speedup = times[1] / times[0] - print(rnn_type, 'speedup', speedup) - assert speedup > 3 - - -@skipif_no_tf_gpu -def test_cudnn_rnn_basics(): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: - for return_sequences in [True, False]: - with keras.utils.CustomObjectScope( - {'keras.layers.CuDNNGRU': keras.layers.CuDNNGRU, - 'keras.layers.CuDNNLSTM': keras.layers.CuDNNLSTM}): - layer_test( - layer_class, - kwargs={'units': units, - 'return_sequences': return_sequences}, - input_shape=(num_samples, timesteps, input_size)) - for go_backwards in [True, False]: - with keras.utils.CustomObjectScope( - {'keras.layers.CuDNNGRU': keras.layers.CuDNNGRU, - 'keras.layers.CuDNNLSTM': keras.layers.CuDNNLSTM}): - layer_test( - layer_class, - kwargs={'units': units, - 'go_backwards': go_backwards}, - input_shape=(num_samples, timesteps, input_size)) - - -@skipif_no_tf_gpu -def test_trainability(): - input_size = 10 - units = 2 - for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: - layer = layer_class(units) - layer.build((None, None, input_size)) - assert len(layer.weights) == 3 - assert len(layer.trainable_weights) == 3 - assert len(layer.non_trainable_weights) == 0 - layer.trainable = False - assert len(layer.weights) == 3 - assert len(layer.non_trainable_weights) == 3 - assert len(layer.trainable_weights) == 0 - layer.trainable = True - assert len(layer.weights) == 3 - assert len(layer.trainable_weights) == 3 - assert len(layer.non_trainable_weights) == 0 - - -@skipif_no_tf_gpu -def test_regularizer(): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: - layer = layer_class(units, return_sequences=False, - input_shape=(timesteps, input_size), - kernel_regularizer=keras.regularizers.l1(0.01), - recurrent_regularizer=keras.regularizers.l1(0.01), - bias_regularizer='l2') - layer.build((None, None, input_size)) - assert len(layer.losses) == 3 - - layer = layer_class(units, return_sequences=False, - input_shape=(timesteps, input_size), - activity_regularizer='l2') - assert layer.activity_regularizer - x = keras.backend.variable(np.ones((num_samples, - timesteps, - input_size))) - layer(x) - assert len(layer.get_losses_for(x)) == 1 - - -@skipif_no_tf_gpu -def test_return_state(): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - - for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: - num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 - - inputs = keras.Input(batch_shape=(num_samples, timesteps, input_size)) - layer = layer_class(units, return_state=True, stateful=True) - outputs = layer(inputs) - output, state = outputs[0], outputs[1:] - assert len(state) == num_states - model = keras.models.Model(inputs, state[0]) - - inputs = np.random.random((num_samples, timesteps, input_size)) - state = model.predict(inputs) - np.testing.assert_allclose( - keras.backend.eval(layer.states[0]), state, atol=1e-4) - - -@skipif_no_tf_gpu -def test_specify_initial_state_keras_tensor(): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: - num_states = 2 if layer_class is keras.layers.CuDNNLSTM else 1 - - inputs = keras.Input((timesteps, input_size)) - initial_state = [keras.Input((units,)) for _ in range(num_states)] - layer = layer_class(units) - if len(initial_state) == 1: - output = layer(inputs, initial_state=initial_state[0]) - else: - output = layer(inputs, initial_state=initial_state) - assert initial_state[0] in layer._inbound_nodes[0].input_tensors - - model = keras.models.Model([inputs] + initial_state, output) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - inputs = np.random.random((num_samples, timesteps, input_size)) - initial_state = [np.random.random((num_samples, units)) - for _ in range(num_states)] - targets = np.random.random((num_samples, units)) - model.fit([inputs] + initial_state, targets) - - -@skipif_no_tf_gpu -def test_statefulness(): - input_size = 10 - timesteps = 6 - units = 2 - num_samples = 32 - - for layer_class in [keras.layers.CuDNNGRU, keras.layers.CuDNNLSTM]: - model = keras.models.Sequential() - model.add(keras.layers.Embedding(10, input_size, - input_length=timesteps, - batch_input_shape=(num_samples, - timesteps))) - layer = layer_class(units, - return_sequences=False, - stateful=True, - weights=None) - model.add(layer) - model.compile(optimizer='sgd', loss='mse') - out1 = model.predict(np.ones((num_samples, timesteps))) - assert(out1.shape == (num_samples, units)) - - # train once so that the states change - model.train_on_batch(np.ones((num_samples, timesteps)), - np.ones((num_samples, units))) - out2 = model.predict(np.ones((num_samples, timesteps))) - - # if the state is not reset, output should be different - assert(out1.max() != out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones((num_samples, timesteps))) - assert(out2.max() != out3.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones((num_samples, timesteps))) - assert_allclose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones((num_samples, timesteps))) - assert(out4.max() != out5.max()) - - -@skipif_no_tf_gpu -def test_cudnnrnn_bidirectional(): - rnn = keras.layers.CuDNNGRU - samples = 2 - dim = 2 - timesteps = 2 - output_dim = 2 - mode = 'concat' - - x = np.random.random((samples, timesteps, dim)) - target_dim = 2 * output_dim if mode == 'concat' else output_dim - y = np.random.random((samples, target_dim)) - - # test with Sequential model - model = keras.Sequential() - model.add(keras.layers.Bidirectional(rnn(output_dim), - merge_mode=mode, - input_shape=(None, dim))) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - # test config - model.get_config() - model = keras.models.model_from_json(model.to_json()) - model.summary() - - # test stacked bidirectional layers - model = keras.Sequential() - model.add(keras.layers.Bidirectional(rnn(output_dim, - return_sequences=True), - merge_mode=mode, - input_shape=(None, dim))) - model.add(keras.layers.Bidirectional(rnn(output_dim), merge_mode=mode)) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - # test with functional API - inputs = keras.Input((timesteps, dim)) - outputs = keras.layers.Bidirectional(rnn(output_dim), - merge_mode=mode)(inputs) - model = keras.Model(inputs, outputs) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - # Bidirectional and stateful - inputs = keras.Input(batch_shape=(1, timesteps, dim)) - outputs = keras.layers.Bidirectional(rnn(output_dim, stateful=True), - merge_mode=mode)(inputs) - model = keras.Model(inputs, outputs) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -from keras.utils.test_utils import layer_test -from keras.layers.embeddings import Embedding -from keras.models import Sequential -import keras.backend as K - - -def test_embedding(): - layer_test(Embedding, - kwargs={'output_dim': 4, 'input_dim': 10, 'input_length': 2}, - input_shape=(3, 2), - input_dtype='int32', - expected_output_dtype=K.floatx()) - layer_test(Embedding, - kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True}, - input_shape=(3, 2), - input_dtype='int32', - expected_output_dtype=K.floatx()) - layer_test(Embedding, - kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True}, - input_shape=(3, 2, 5), - input_dtype='int32', - expected_output_dtype=K.floatx()) - layer_test(Embedding, - kwargs={'output_dim': 4, 'input_dim': 10, 'mask_zero': True, - 'input_length': (None, 5)}, - input_shape=(3, 2, 5), - input_dtype='int32', - expected_output_dtype=K.floatx()) - - -@pytest.mark.parametrize('input_shape', - [(3, 4, 5), - (3, 5)]) -def test_embedding_invalid(input_shape): - - # len(input_length) should be equal to len(input_shape) - 1 - with pytest.raises(ValueError): - model = Sequential([Embedding( - input_dim=10, - output_dim=4, - input_length=2, - input_shape=input_shape)]) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest - -from keras.utils.test_utils import layer_test -from keras.layers import local - - -def test_locallyconnected_1d(): - num_samples = 2 - num_steps = 8 - input_dim = 5 - filter_length = 3 - filters = 4 - padding = 'valid' - strides = 1 - - layer_test(local.LocallyConnected1D, - kwargs={'filters': filters, - 'kernel_size': filter_length, - 'padding': padding, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'strides': strides}, - input_shape=(num_samples, num_steps, input_dim)) - - -def test_locallyconnected_2d(): - num_samples = 5 - filters = 3 - stack_size = 4 - num_row = 6 - num_col = 8 - padding = 'valid' - - for strides in [(1, 1), (2, 2)]: - layer_test(local.LocallyConnected2D, - kwargs={'filters': filters, - 'kernel_size': 3, - 'padding': padding, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'strides': strides, - 'data_format': 'channels_last'}, - input_shape=(num_samples, num_row, num_col, stack_size)) - - layer_test(local.LocallyConnected2D, - kwargs={'filters': filters, - 'kernel_size': (3, 3), - 'padding': padding, - 'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2', - 'strides': strides, - 'data_format': 'channels_first'}, - input_shape=(num_samples, stack_size, num_row, num_col)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose -from keras import layers -from keras import models -from keras import backend as K -from keras.utils.test_utils import layer_test -from keras.layers import merge - - -def test_merge_add(): - i1 = layers.Input(shape=(4, 5)) - i2 = layers.Input(shape=(4, 5)) - i3 = layers.Input(shape=(4, 5)) - o = layers.add([i1, i2, i3]) - assert o._keras_shape == (None, 4, 5) - model = models.Model([i1, i2, i3], o) - - add_layer = layers.Add() - o2 = add_layer([i1, i2, i3]) - assert add_layer.output_shape == (None, 4, 5) - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - x3 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2, x3]) - assert out.shape == (2, 4, 5) - assert_allclose(out, x1 + x2 + x3, atol=1e-4) - - assert add_layer.compute_mask([i1, i2, i3], [None, None, None]) is None - assert np.all(K.eval(add_layer.compute_mask( - [i1, i2, i3], [K.variable(x1), K.variable(x2), K.variable(x3)]))) - - # Test invalid use case - with pytest.raises(ValueError): - add_layer.compute_mask([i1, i2, i3], x1) - with pytest.raises(ValueError): - add_layer.compute_mask(i1, [None, None, None]) - with pytest.raises(ValueError): - add_layer.compute_mask([i1, i2, i3], [None, None]) - - -def test_merge_subtract(): - i1 = layers.Input(shape=(4, 5)) - i2 = layers.Input(shape=(4, 5)) - i3 = layers.Input(shape=(4, 5)) - i4 = layers.Input(shape=(3, 5)) - o = layers.subtract([i1, i2]) - assert o._keras_shape == (None, 4, 5) - model = models.Model([i1, i2], o) - - subtract_layer = layers.Subtract() - o2 = subtract_layer([i1, i2]) - assert subtract_layer.output_shape == (None, 4, 5) - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2]) - assert out.shape == (2, 4, 5) - assert_allclose(out, x1 - x2, atol=1e-4) - - assert subtract_layer.compute_mask([i1, i2], [None, None]) is None - assert np.all(K.eval(subtract_layer.compute_mask( - [i1, i2], [K.variable(x1), K.variable(x2)]))) - - # Test invalid use case - with pytest.raises(ValueError): - subtract_layer.compute_mask([i1, i2], x1) - with pytest.raises(ValueError): - subtract_layer.compute_mask(i1, [None, None]) - with pytest.raises(ValueError): - subtract_layer([i1, i2, i3]) - with pytest.raises(ValueError): - subtract_layer([i1]) - - -def test_merge_multiply(): - i1 = layers.Input(shape=(4, 5)) - i2 = layers.Input(shape=(4, 5)) - i3 = layers.Input(shape=(4, 5)) - o = layers.multiply([i1, i2, i3]) - assert o._keras_shape == (None, 4, 5) - model = models.Model([i1, i2, i3], o) - - mul_layer = layers.Multiply() - o2 = mul_layer([i1, i2, i3]) - assert mul_layer.output_shape == (None, 4, 5) - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - x3 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2, x3]) - assert out.shape == (2, 4, 5) - assert_allclose(out, x1 * x2 * x3, atol=1e-4) - - -def test_merge_average(): - i1 = layers.Input(shape=(4, 5)) - i2 = layers.Input(shape=(4, 5)) - o = layers.average([i1, i2]) - assert o._keras_shape == (None, 4, 5) - model = models.Model([i1, i2], o) - - avg_layer = layers.Average() - o2 = avg_layer([i1, i2]) - assert avg_layer.output_shape == (None, 4, 5) - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2]) - assert out.shape == (2, 4, 5) - assert_allclose(out, 0.5 * (x1 + x2), atol=1e-4) - - -def test_merge_maximum(): - i1 = layers.Input(shape=(4, 5)) - i2 = layers.Input(shape=(4, 5)) - o = layers.maximum([i1, i2]) - assert o._keras_shape == (None, 4, 5) - model = models.Model([i1, i2], o) - - max_layer = layers.Maximum() - o2 = max_layer([i1, i2]) - assert max_layer.output_shape == (None, 4, 5) - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2]) - assert out.shape == (2, 4, 5) - assert_allclose(out, np.maximum(x1, x2), atol=1e-4) - - -def test_merge_minimum(): - i1 = layers.Input(shape=(4, 5)) - i2 = layers.Input(shape=(4, 5)) - o = layers.minimum([i1, i2]) - assert o._keras_shape == (None, 4, 5) - model = models.Model([i1, i2], o) - - max_layer = layers.Minimum() - o2 = max_layer([i1, i2]) - assert max_layer.output_shape == (None, 4, 5) - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2]) - assert out.shape == (2, 4, 5) - assert_allclose(out, np.minimum(x1, x2), atol=1e-4) - - -def test_merge_concatenate(): - i1 = layers.Input(shape=(None, 5)) - i2 = layers.Input(shape=(None, 5)) - o = layers.concatenate([i1, i2], axis=1) - assert o._keras_shape == (None, None, 5) - model = models.Model([i1, i2], o) - - i1 = layers.Input(shape=(4, 5)) - i2 = layers.Input(shape=(4, 5)) - o = layers.concatenate([i1, i2], axis=1) - assert o._keras_shape == (None, 8, 5) - model = models.Model([i1, i2], o) - - concat_layer = layers.Concatenate(axis=1) - o2 = concat_layer([i1, i2]) - assert concat_layer.output_shape == (None, 8, 5) - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 4, 5)) - out = model.predict([x1, x2]) - assert out.shape == (2, 8, 5) - assert_allclose(out, np.concatenate([x1, x2], axis=1), atol=1e-4) - - x3 = np.random.random((1, 1, 1)) - nb_layers = 4 - x_i = layers.Input(shape=(None, None)) - x_list = [x_i] - x = x_i - for i in range(nb_layers): - x_list.append(x) - x = layers.concatenate(x_list, axis=1) - concat_model = models.Model(x_i, x) - concat_out = concat_model.predict([x3]) - x3 = np.repeat(x3, 16, axis=1) - assert concat_out.shape == (1, 16, 1) - assert_allclose(concat_out, x3) - - assert concat_layer.compute_mask([i1, i2], [None, None]) is None - assert np.all(K.eval(concat_layer.compute_mask( - [i1, i2], [K.variable(x1), K.variable(x2)])).reshape(-1)) - - # Test invalid use case - with pytest.raises(ValueError): - concat_layer.compute_mask([i1, i2], x1) - with pytest.raises(ValueError): - concat_layer.compute_mask(i1, [None, None]) - with pytest.raises(ValueError): - concat_layer.compute_mask([i1, i2], [None]) - with pytest.raises(ValueError): - concat_layer([i1]) - - -def test_merge_dot(): - i1 = layers.Input(shape=(4,)) - i2 = layers.Input(shape=(4,)) - o = layers.dot([i1, i2], axes=1) - assert o._keras_shape == (None, 1) - model = models.Model([i1, i2], o) - - dot_layer = layers.Dot(axes=1) - o2 = dot_layer([i1, i2]) - assert dot_layer.output_shape == (None, 1) - - x1 = np.random.random((2, 4)) - x2 = np.random.random((2, 4)) - out = model.predict([x1, x2]) - assert out.shape == (2, 1) - expected = np.zeros((2, 1)) - expected[0, 0] = np.dot(x1[0], x2[0]) - expected[1, 0] = np.dot(x1[1], x2[1]) - assert_allclose(out, expected, atol=1e-4) - - # Test with negative tuple of axes. - o = layers.dot([i1, i2], axes=(-1, -1)) - assert o._keras_shape == (None, 1) - model = models.Model([i1, i2], o) - out = model.predict([x1, x2]) - assert out.shape == (2, 1) - assert_allclose(out, expected, atol=1e-4) - - -def test_merge_broadcast(): - # shapes provided - i1 = layers.Input(shape=(4, 5)) - i2 = layers.Input(shape=(5,)) - ops = [layers.add, layers.maximum] - for op in ops: - o = op([i1, i2]) - assert o._keras_shape == (None, 4, 5) - model = models.Model([i1, i2], o) - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 5)) - out = model.predict([x1, x2]) - assert out.shape == (2, 4, 5) - - # shapes not provided - i1 = layers.Input(shape=(None, None)) - i2 = layers.Input(shape=(None,)) - ops = [layers.add, layers.maximum] - for op in ops: - o = op([i1, i2]) - assert o._keras_shape == (None, None, None) - model = models.Model([i1, i2], o) - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 5)) - out = model.predict([x1, x2]) - assert out.shape == (2, 4, 5) - - # ndim not provided - if K.backend() == 'tensorflow': - k_ndim = K.ndim - K.ndim = lambda _: None - - i1 = layers.Input(shape=(None, None)) - i2 = layers.Input(shape=(None,)) - ops = [layers.add, layers.maximum] - for op in ops: - o = op([i1, i2]) - assert o._keras_shape == (None, None, None) - model = models.Model([i1, i2], o) - - x1 = np.random.random((2, 4, 5)) - x2 = np.random.random((2, 5)) - out = model.predict([x1, x2]) - assert out.shape == (2, 4, 5) - K.ndim = k_ndim - - -def test_masking_concatenate(): - input1 = layers.Input(shape=(6,)) - input2 = layers.Input(shape=(6,)) - x1 = layers.Embedding(10, 5, input_length=6, mask_zero=True)(input1) - x2 = layers.Embedding(10, 5, input_length=6, mask_zero=True)(input2) - x = layers.concatenate([x1, x2]) - x = layers.wrappers.TimeDistributed( - layers.Dense(3, activation='softmax'))(x) - models.Model(inputs=[input1, input2], outputs=[x]) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -from keras.utils.test_utils import layer_test -from keras.layers import noise -from keras import backend as K - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason="cntk does not support it yet") -def test_GaussianNoise(): - layer_test(noise.GaussianNoise, - kwargs={'stddev': 1.}, - input_shape=(3, 2, 3)) - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason="cntk does not support it yet") -def test_GaussianDropout(): - layer_test(noise.GaussianDropout, - kwargs={'rate': 0.5}, - input_shape=(3, 2, 3)) - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason="cntk does not support it yet") -def test_AlphaDropout(): - layer_test(noise.AlphaDropout, - kwargs={'rate': 0.1}, - input_shape=(3, 2, 3)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras.layers import Input -from keras import regularizers -from keras.utils.test_utils import layer_test -from keras.layers import normalization -from keras.models import Sequential, Model -from keras import backend as K - -input_1 = np.arange(10) -input_2 = np.zeros(10) -input_3 = np.ones((10)) -input_4 = np.expand_dims(np.arange(10.), axis=1) -input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))] - - -def test_basic_batchnorm(): - layer_test(normalization.BatchNormalization, - kwargs={'momentum': 0.9, - 'epsilon': 0.1, - 'gamma_regularizer': regularizers.l2(0.01), - 'beta_regularizer': regularizers.l2(0.01)}, - input_shape=(3, 4, 2)) - layer_test(normalization.BatchNormalization, - kwargs={'momentum': 0.9, - 'epsilon': 0.1, - 'axis': 1}, - input_shape=(1, 4, 1)) - layer_test(normalization.BatchNormalization, - kwargs={'gamma_initializer': 'ones', - 'beta_initializer': 'ones', - 'moving_mean_initializer': 'zeros', - 'moving_variance_initializer': 'ones'}, - input_shape=(3, 4, 2, 4)) - if K.backend() != 'theano': - layer_test(normalization.BatchNormalization, - kwargs={'momentum': 0.9, - 'epsilon': 0.1, - 'axis': 1, - 'scale': False, - 'center': False}, - input_shape=(3, 4, 2, 4)) - - -def test_batchnorm_correctness_1d(): - np.random.seed(1337) - model = Sequential() - norm = normalization.BatchNormalization(input_shape=(10,), momentum=0.8) - model.add(norm) - model.compile(loss='mse', optimizer='rmsprop') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10)) - model.fit(x, x, epochs=5, verbose=0) - out = model.predict(x) - out -= K.eval(norm.beta) - out /= K.eval(norm.gamma) - - assert_allclose(out.mean(), 0.0, atol=1e-1) - assert_allclose(out.std(), 1.0, atol=1e-1) - - -def test_batchnorm_correctness_2d(): - np.random.seed(1337) - model = Sequential() - norm = normalization.BatchNormalization(axis=1, input_shape=(10, 6), - momentum=0.8) - model.add(norm) - model.compile(loss='mse', optimizer='rmsprop') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 6)) - model.fit(x, x, epochs=5, verbose=0) - out = model.predict(x) - out -= np.reshape(K.eval(norm.beta), (1, 10, 1)) - out /= np.reshape(K.eval(norm.gamma), (1, 10, 1)) - - assert_allclose(out.mean(axis=(0, 2)), 0.0, atol=1.1e-1) - assert_allclose(out.std(axis=(0, 2)), 1.0, atol=1.1e-1) - - -def test_batchnorm_training_argument(): - np.random.seed(1337) - bn1 = normalization.BatchNormalization(input_shape=(10,)) - x1 = Input(shape=(10,)) - y1 = bn1(x1, training=True) - assert bn1.updates - - model1 = Model(x1, y1) - x = np.random.normal(loc=5.0, scale=10.0, size=(20, 10)) - output_a = model1.predict(x) - - model1.compile(loss='mse', optimizer='rmsprop') - model1.fit(x, x, epochs=1, verbose=0) - output_b = model1.predict(x) - assert np.abs(np.sum(output_a - output_b)) > 0.1 - assert_allclose(output_b.mean(), 0.0, atol=1e-1) - assert_allclose(output_b.std(), 1.0, atol=1e-1) - - bn2 = normalization.BatchNormalization(input_shape=(10,)) - x2 = Input(shape=(10,)) - bn2(x2, training=False) - assert not bn2.updates - - -def test_batchnorm_mode_twice(): - # This is a regression test for issue #4881 with the old - # batch normalization functions in the Theano backend. - model = Sequential() - model.add(normalization.BatchNormalization(input_shape=(10, 5, 5), axis=1)) - model.add(normalization.BatchNormalization(input_shape=(10, 5, 5), axis=1)) - model.compile(loss='mse', optimizer='sgd') - - x = np.random.normal(loc=5.0, scale=10.0, size=(20, 10, 5, 5)) - model.fit(x, x, epochs=1, verbose=0) - model.predict(x) - - -def test_batchnorm_convnet(): - np.random.seed(1337) - model = Sequential() - norm = normalization.BatchNormalization(axis=1, input_shape=(3, 4, 4), - momentum=0.8) - model.add(norm) - model.compile(loss='mse', optimizer='sgd') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) - model.fit(x, x, epochs=4, verbose=0) - out = model.predict(x) - out -= np.reshape(K.eval(norm.beta), (1, 3, 1, 1)) - out /= np.reshape(K.eval(norm.gamma), (1, 3, 1, 1)) - - assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) - assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) - - -@pytest.mark.skipif((K.backend() == 'theano'), - reason='Bug with theano backend') -def test_batchnorm_convnet_no_center_no_scale(): - np.random.seed(1337) - model = Sequential() - norm = normalization.BatchNormalization(axis=-1, center=False, scale=False, - input_shape=(3, 4, 4), momentum=0.8) - model.add(norm) - model.compile(loss='mse', optimizer='sgd') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) - model.fit(x, x, epochs=4, verbose=0) - out = model.predict(x) - - assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) - assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) - - -def test_shared_batchnorm(): - '''Test that a BN layer can be shared - across different data streams. - ''' - # Test single layer reuse - bn = normalization.BatchNormalization(input_shape=(10,)) - x1 = Input(shape=(10,)) - bn(x1) - - x2 = Input(shape=(10,)) - y2 = bn(x2) - - x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10)) - model = Model(x2, y2) - assert len(model.updates) == 2 - model.compile('sgd', 'mse') - model.train_on_batch(x, x) - - # Test model-level reuse - x3 = Input(shape=(10,)) - y3 = model(x3) - new_model = Model(x3, y3) - assert len(model.updates) == 2 - new_model.compile('sgd', 'mse') - new_model.train_on_batch(x, x) - - -def test_that_trainable_disables_updates(): - val_a = np.random.random((10, 4)) - val_out = np.random.random((10, 4)) - - a = Input(shape=(4,)) - layer = normalization.BatchNormalization(input_shape=(4,)) - b = layer(a) - model = Model(a, b) - - model.trainable = False - assert not model.updates - - model.compile('sgd', 'mse') - assert not model.updates - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert_allclose(x1, x2, atol=1e-7) - - model.trainable = True - model.compile('sgd', 'mse') - assert model.updates - - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert np.abs(np.sum(x1 - x2)) > 1e-5 - - layer.trainable = False - model.compile('sgd', 'mse') - assert not model.updates - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert_allclose(x1, x2, atol=1e-7) - - -def test_batchnorm_trainable(): - bn_mean = 0.5 - bn_std = 10. - - def get_model(bn_mean, bn_std): - input = Input(shape=(1,)) - x = normalization.BatchNormalization()(input) - model = Model(input, x) - model.set_weights([np.array([1.]), np.array([0.]), - np.array([bn_mean]), np.array([bn_std ** 2])]) - return model - # Simulates training-mode with trainable layer. Should use mini-batch statistics. - K.set_learning_phase(1) - model = get_model(bn_mean, bn_std) - model.compile(loss='mse', optimizer='rmsprop') - out = model.predict(input_4) - assert_allclose((input_4 - np.mean(input_4)) / - np.std(input_4), out, atol=1e-3) - - -if __name__ == '__main__': - pytest.main([__file__]) -import numpy as np -import pytest - -from keras.utils.test_utils import layer_test -from keras.layers import pooling -from keras.layers import Masking -from keras.layers import convolutional -from keras.models import Sequential - - -@pytest.mark.parametrize( - 'padding,stride,data_format', - [(padding, stride, data_format) - for padding in ['valid', 'same'] - for stride in [1, 2] - for data_format in ['channels_first', 'channels_last']] -) -def test_maxpooling_1d(padding, stride, data_format): - layer_test(convolutional.MaxPooling1D, - kwargs={'strides': stride, - 'padding': padding, - 'data_format': data_format}, - input_shape=(3, 5, 4)) - - -@pytest.mark.parametrize( - 'strides', - [(1, 1), (2, 3)] -) -def test_maxpooling_2d(strides): - pool_size = (3, 3) - layer_test(convolutional.MaxPooling2D, - kwargs={'strides': strides, - 'padding': 'valid', - 'pool_size': pool_size}, - input_shape=(3, 5, 6, 4)) - - -@pytest.mark.parametrize( - 'strides,data_format,input_shape', - [(2, None, (3, 11, 12, 10, 4)), - (3, 'channels_first', (3, 4, 11, 12, 10))] -) -def test_maxpooling_3d(strides, data_format, input_shape): - pool_size = (3, 3, 3) - layer_test(convolutional.MaxPooling3D, - kwargs={'strides': strides, - 'padding': 'valid', - 'data_format': data_format, - 'pool_size': pool_size}, - input_shape=input_shape) - - -@pytest.mark.parametrize( - 'padding,stride,data_format', - [(padding, stride, data_format) - for padding in ['valid', 'same'] - for stride in [1, 2] - for data_format in ['channels_first', 'channels_last']] -) -def test_averagepooling_1d(padding, stride, data_format): - layer_test(convolutional.AveragePooling1D, - kwargs={'strides': stride, - 'padding': padding, - 'data_format': data_format}, - input_shape=(3, 5, 4)) - - -@pytest.mark.parametrize( - 'strides,padding,data_format,input_shape', - [((2, 2), 'same', None, (3, 5, 6, 4)), - ((2, 2), 'valid', None, (3, 5, 6, 4)), - ((1, 1), 'valid', 'channels_first', (3, 4, 5, 6))] -) -def test_averagepooling_2d(strides, padding, data_format, input_shape): - layer_test(convolutional.AveragePooling2D, - kwargs={'strides': strides, - 'padding': padding, - 'pool_size': (2, 2), - 'data_format': data_format}, - input_shape=input_shape) - - -@pytest.mark.parametrize( - 'strides,data_format,input_shape', - [(2, None, (3, 11, 12, 10, 4)), - (3, 'channels_first', (3, 4, 11, 12, 10))] -) -def test_averagepooling_3d(strides, data_format, input_shape): - pool_size = (3, 3, 3) - - layer_test(convolutional.AveragePooling3D, - kwargs={'strides': strides, - 'padding': 'valid', - 'data_format': data_format, - 'pool_size': pool_size}, - input_shape=input_shape) - - -@pytest.mark.parametrize( - 'data_format,pooling_class', - [(data_format, pooling_class) - for data_format in ['channels_first', 'channels_last'] - for pooling_class in [pooling.GlobalMaxPooling1D, - pooling.GlobalAveragePooling1D]] -) -def test_globalpooling_1d(data_format, pooling_class): - layer_test(pooling_class, - kwargs={'data_format': data_format}, - input_shape=(3, 4, 5)) - - -def test_globalpooling_1d_supports_masking(): - # Test GlobalAveragePooling1D supports masking - model = Sequential() - model.add(Masking(mask_value=0., input_shape=(3, 4))) - model.add(pooling.GlobalAveragePooling1D()) - model.compile(loss='mae', optimizer='adam') - - model_input = np.random.randint(low=1, high=5, size=(2, 3, 4)) - model_input[0, 1:, :] = 0 - output = model.predict(model_input) - assert np.array_equal(output[0], model_input[0, 0, :]) - - -@pytest.mark.parametrize( - 'data_format,pooling_class', - [(data_format, pooling_class) - for data_format in ['channels_first', 'channels_last'] - for pooling_class in [pooling.GlobalMaxPooling2D, - pooling.GlobalAveragePooling2D]] -) -def test_globalpooling_2d(data_format, pooling_class): - layer_test(pooling_class, - kwargs={'data_format': data_format}, - input_shape=(3, 4, 5, 6)) - - -@pytest.mark.parametrize( - 'data_format,pooling_class', - [(data_format, pooling_class) - for data_format in ['channels_first', 'channels_last'] - for pooling_class in [pooling.GlobalMaxPooling3D, - pooling.GlobalAveragePooling3D]] -) -def test_globalpooling_3d(data_format, pooling_class): - layer_test(pooling_class, - kwargs={'data_format': data_format}, - input_shape=(3, 4, 3, 4, 3)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose - -import keras -from keras.utils.test_utils import layer_test -from keras.layers import recurrent -from keras.layers import embeddings -from keras.models import Sequential -from keras.models import Model -from keras.engine import Input -from keras.layers import Masking -from keras import regularizers -from keras import backend as K - -num_samples, timesteps, embedding_dim, units = 2, 5, 4, 3 -embedding_num = 12 - - -rnn_test = pytest.mark.parametrize('layer_class', - [recurrent.SimpleRNN, - recurrent.GRU, - recurrent.LSTM]) - - -rnn_cell_test = pytest.mark.parametrize('cell_class', - [recurrent.SimpleRNNCell, - recurrent.GRUCell, - recurrent.LSTMCell]) - - -@rnn_test -def test_return_sequences(layer_class): - layer_test(layer_class, - kwargs={'units': units, - 'return_sequences': True}, - input_shape=(num_samples, timesteps, embedding_dim)) - - -@rnn_test -def test_dynamic_behavior(layer_class): - layer = layer_class(units, input_shape=(None, embedding_dim)) - model = Sequential() - model.add(layer) - model.compile('sgd', 'mse') - x = np.random.random((num_samples, timesteps, embedding_dim)) - y = np.random.random((num_samples, units)) - model.train_on_batch(x, y) - - -@rnn_test -def test_stateful_invalid_use(layer_class): - layer = layer_class(units, - stateful=True, - batch_input_shape=(num_samples, - timesteps, - embedding_dim)) - model = Sequential() - model.add(layer) - model.compile('sgd', 'mse') - x = np.random.random((num_samples * 2, timesteps, embedding_dim)) - y = np.random.random((num_samples * 2, units)) - with pytest.raises(ValueError): - model.fit(x, y) - with pytest.raises(ValueError): - model.predict(x, batch_size=num_samples + 1) - - -@rnn_test -@pytest.mark.skipif((K.backend() in ['theano']), - reason='Not supported.') -def test_dropout(layer_class): - for unroll in [True, False]: - layer_test(layer_class, - kwargs={'units': units, - 'dropout': 0.1, - 'recurrent_dropout': 0.1, - 'unroll': unroll}, - input_shape=(num_samples, timesteps, embedding_dim)) - - # Test that dropout is applied during training - x = K.ones((num_samples, timesteps, embedding_dim)) - layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, - input_shape=(timesteps, embedding_dim)) - y = layer(x) - assert y._uses_learning_phase - - y = layer(x, training=True) - assert not getattr(y, '_uses_learning_phase') - - # Test that dropout is not applied during testing - x = np.random.random((num_samples, timesteps, embedding_dim)) - layer = layer_class(units, dropout=0.5, recurrent_dropout=0.5, - unroll=unroll, - input_shape=(timesteps, embedding_dim)) - model = Sequential([layer]) - assert model.uses_learning_phase - y1 = model.predict(x) - y2 = model.predict(x) - assert_allclose(y1, y2) - - -@rnn_test -def test_statefulness(layer_class): - model = Sequential() - model.add(embeddings.Embedding(embedding_num, embedding_dim, - mask_zero=True, - input_length=timesteps, - batch_input_shape=(num_samples, timesteps))) - layer = layer_class(units, return_sequences=False, - stateful=True, - weights=None) - model.add(layer) - model.compile(optimizer='sgd', loss='mse') - out1 = model.predict(np.ones((num_samples, timesteps))) - assert(out1.shape == (num_samples, units)) - - # train once so that the states change - model.train_on_batch(np.ones((num_samples, timesteps)), - np.ones((num_samples, units))) - out2 = model.predict(np.ones((num_samples, timesteps))) - - # if the state is not reset, output should be different - assert(out1.max() != out2.max()) - - # check that output changes after states are reset - # (even though the model itself didn't change) - layer.reset_states() - out3 = model.predict(np.ones((num_samples, timesteps))) - assert(out2.max() != out3.max()) - - # check that container-level reset_states() works - model.reset_states() - out4 = model.predict(np.ones((num_samples, timesteps))) - assert_allclose(out3, out4, atol=1e-5) - - # check that the call to `predict` updated the states - out5 = model.predict(np.ones((num_samples, timesteps))) - assert(out4.max() != out5.max()) - - -@rnn_test -def test_masking_correctness(layer_class): - # Check masking: output with left padding and right padding - # should be the same. - model = Sequential() - model.add(embeddings.Embedding(embedding_num, embedding_dim, - mask_zero=True, - input_length=timesteps, - batch_input_shape=(num_samples, timesteps))) - layer = layer_class(units, return_sequences=False) - model.add(layer) - model.compile(optimizer='sgd', loss='mse') - - left_padded_input = np.ones((num_samples, timesteps)) - left_padded_input[0, :1] = 0 - left_padded_input[1, :2] = 0 - out6 = model.predict(left_padded_input) - - right_padded_input = np.ones((num_samples, timesteps)) - right_padded_input[0, -1:] = 0 - right_padded_input[1, -2:] = 0 - out7 = model.predict(right_padded_input) - - assert_allclose(out7, out6, atol=1e-5) - - -@pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') -def test_masking_correctness_output_not_equal_to_first_state(): - - class Cell(keras.layers.Layer): - - def __init__(self): - self.state_size = None - self.output_size = None - super(Cell, self).__init__() - - def build(self, input_shape): - self.state_size = input_shape[-1] - self.output_size = input_shape[-1] - - def call(self, inputs, states): - return inputs, [s + 1 for s in states] - - num_samples = 5 - num_timesteps = 4 - state_size = input_size = 3 # also equal to `output_size` - - # random inputs and state values - x_vals = np.random.random((num_samples, num_timesteps, input_size)) - # last timestep masked for first sample (all zero inputs masked by Masking layer) - x_vals[0, -1, :] = 0 - s_initial_vals = np.random.random((num_samples, state_size)) - - # final outputs equal to last inputs - y_vals_expected = x_vals[:, -1].copy() - # except for first sample, where it is equal to second to last value due to mask - y_vals_expected[0] = x_vals[0, -2] - - s_final_vals_expected = s_initial_vals.copy() - # states are incremented `num_timesteps - 1` times for first sample - s_final_vals_expected[0] += (num_timesteps - 1) - # and `num_timesteps - 1` times for remaining samples - s_final_vals_expected[1:] += num_timesteps - - for unroll in [True, False]: - x = Input((num_timesteps, input_size), name="x") - x_masked = Masking()(x) - s_initial = Input((state_size,), name="s_initial") - y, s_final = recurrent.RNN(Cell(), - return_state=True, - unroll=unroll)(x_masked, initial_state=s_initial) - model = Model([x, s_initial], [y, s_final]) - model.compile(optimizer='sgd', loss='mse') - - y_vals, s_final_vals = model.predict([x_vals, s_initial_vals]) - assert_allclose(y_vals, - y_vals_expected, - err_msg="Unexpected output for unroll={}".format(unroll)) - assert_allclose(s_final_vals, - s_final_vals_expected, - err_msg="Unexpected state for unroll={}".format(unroll)) - - -@pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') -def test_masking_correctness_output_size_not_equal_to_first_state_size(): - - class Cell(keras.layers.Layer): - - def __init__(self): - self.state_size = None - self.output_size = None - super(Cell, self).__init__() - - def build(self, input_shape): - self.state_size = input_shape[-1] - self.output_size = input_shape[-1] * 2 - - def call(self, inputs, states): - return keras.layers.concatenate([inputs] * 2), [s + 1 for s in states] - - num_samples = 5 - num_timesteps = 6 - input_size = state_size = 7 - - # random inputs and state values - x_vals = np.random.random((num_samples, num_timesteps, input_size)) - # last timestep masked for first sample (all zero inputs masked by Masking layer) - x_vals[0, -1, :] = 0 - s_initial_vals = np.random.random((num_samples, state_size)) - - # final outputs equal to last inputs concatenated - y_vals_expected = np.concatenate([x_vals[:, -1]] * 2, axis=-1) - # except for first sample, where it is equal to second to last value due to mask - y_vals_expected[0] = np.concatenate([x_vals[0, -2]] * 2, axis=-1) - - s_final_vals_expected = s_initial_vals.copy() - # states are incremented `num_timesteps - 1` times for first sample - s_final_vals_expected[0] += (num_timesteps - 1) - # and `num_timesteps - 1` times for remaining samples - s_final_vals_expected[1:] += num_timesteps - - for unroll in [True, False]: - x = Input((num_timesteps, input_size), name="x") - x_masked = Masking()(x) - s_initial = Input((state_size,), name="s_initial") - y, s_final = recurrent.RNN(Cell(), - return_state=True, - unroll=unroll)(x_masked, initial_state=s_initial) - model = Model([x, s_initial], [y, s_final]) - model.compile(optimizer='sgd', loss='mse') - - y_vals, s_final_vals = model.predict([x_vals, s_initial_vals]) - assert_allclose(y_vals, - y_vals_expected, - err_msg="Unexpected output for unroll={}".format(unroll)) - assert_allclose(s_final_vals, - s_final_vals_expected, - err_msg="Unexpected state for unroll={}".format(unroll)) - - -@rnn_test -def test_implementation_mode(layer_class): - for mode in [1, 2]: - # Without dropout - layer_test(layer_class, - kwargs={'units': units, - 'implementation': mode}, - input_shape=(num_samples, timesteps, embedding_dim)) - # With dropout - layer_test(layer_class, - kwargs={'units': units, - 'implementation': mode, - 'dropout': 0.1, - 'recurrent_dropout': 0.1}, - input_shape=(num_samples, timesteps, embedding_dim)) - # Without bias - layer_test(layer_class, - kwargs={'units': units, - 'implementation': mode, - 'use_bias': False}, - input_shape=(num_samples, timesteps, embedding_dim)) - - -@rnn_test -def test_regularizer(layer_class): - layer = layer_class(units, return_sequences=False, weights=None, - input_shape=(timesteps, embedding_dim), - kernel_regularizer=regularizers.l1(0.01), - recurrent_regularizer=regularizers.l1(0.01), - bias_regularizer='l2') - layer.build((None, None, embedding_dim)) - assert len(layer.losses) == 3 - assert len(layer.cell.losses) == 3 - - layer = layer_class(units, return_sequences=False, weights=None, - input_shape=(timesteps, embedding_dim), - activity_regularizer='l2') - assert layer.activity_regularizer - x = K.variable(np.ones((num_samples, timesteps, embedding_dim))) - layer(x) - assert len(layer.cell.get_losses_for(x)) == 0 - assert len(layer.get_losses_for(x)) == 1 - - -@rnn_test -def test_trainability(layer_class): - layer = layer_class(units) - layer.build((None, None, embedding_dim)) - assert len(layer.weights) == 3 - assert len(layer.trainable_weights) == 3 - assert len(layer.non_trainable_weights) == 0 - layer.trainable = False - assert len(layer.weights) == 3 - assert len(layer.trainable_weights) == 0 - assert len(layer.non_trainable_weights) == 3 - layer.trainable = True - assert len(layer.weights) == 3 - assert len(layer.trainable_weights) == 3 - assert len(layer.non_trainable_weights) == 0 - - -def test_masking_layer(): - ''' This test based on a previously failing issue here: - https://github.com/keras-team/keras/issues/1567 - ''' - inputs = np.random.random((6, 3, 4)) - targets = np.abs(np.random.random((6, 3, 5))) - targets /= targets.sum(axis=-1, keepdims=True) - - model = Sequential() - model.add(Masking(input_shape=(3, 4))) - model.add(recurrent.SimpleRNN( - units=5, return_sequences=True, unroll=False)) - model.compile(loss='categorical_crossentropy', optimizer='adam') - model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) - - model = Sequential() - model.add(Masking(input_shape=(3, 4))) - model.add(recurrent.SimpleRNN(units=5, return_sequences=True, unroll=True)) - model.compile(loss='categorical_crossentropy', optimizer='adam') - model.fit(inputs, targets, epochs=1, batch_size=100, verbose=1) - - -@rnn_test -def test_from_config(layer_class): - stateful_flags = (False, True) - for stateful in stateful_flags: - l1 = layer_class(units=1, stateful=stateful) - l2 = layer_class.from_config(l1.get_config()) - assert l1.get_config() == l2.get_config() - - -@rnn_test -def test_specify_initial_state_keras_tensor(layer_class): - num_states = 2 if layer_class is recurrent.LSTM else 1 - - # Test with Keras tensor - inputs = Input((timesteps, embedding_dim)) - initial_state = [Input((units,)) for _ in range(num_states)] - layer = layer_class(units) - if len(initial_state) == 1: - output = layer(inputs, initial_state=initial_state[0]) - else: - output = layer(inputs, initial_state=initial_state) - assert initial_state[0] in layer._inbound_nodes[0].input_tensors - - model = Model([inputs] + initial_state, output) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [np.random.random((num_samples, units)) - for _ in range(num_states)] - targets = np.random.random((num_samples, units)) - model.fit([inputs] + initial_state, targets) - - -@rnn_test -def test_specify_initial_state_non_keras_tensor(layer_class): - num_states = 2 if layer_class is recurrent.LSTM else 1 - - # Test with non-Keras tensor - inputs = Input((timesteps, embedding_dim)) - initial_state = [K.random_normal_variable((num_samples, units), 0, 1) - for _ in range(num_states)] - layer = layer_class(units) - output = layer(inputs, initial_state=initial_state) - - model = Model(inputs, output) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - targets = np.random.random((num_samples, units)) - model.fit(inputs, targets) - - -@rnn_test -def test_reset_states_with_values(layer_class): - num_states = 2 if layer_class is recurrent.LSTM else 1 - - layer = layer_class(units, stateful=True) - layer.build((num_samples, timesteps, embedding_dim)) - layer.reset_states() - assert len(layer.states) == num_states - assert layer.states[0] is not None - np.testing.assert_allclose(K.eval(layer.states[0]), - np.zeros(K.int_shape(layer.states[0])), - atol=1e-4) - state_shapes = [K.int_shape(state) for state in layer.states] - values = [np.ones(shape) for shape in state_shapes] - if len(values) == 1: - values = values[0] - layer.reset_states(values) - np.testing.assert_allclose(K.eval(layer.states[0]), - np.ones(K.int_shape(layer.states[0])), - atol=1e-4) - - # Test fit with invalid data - with pytest.raises(ValueError): - layer.reset_states([1] * (len(layer.states) + 1)) - - -@rnn_test -def test_initial_states_as_other_inputs(layer_class): - num_states = 2 if layer_class is recurrent.LSTM else 1 - - # Test with Keras tensor - main_inputs = Input((timesteps, embedding_dim)) - initial_state = [Input((units,)) for _ in range(num_states)] - inputs = [main_inputs] + initial_state - - layer = layer_class(units) - output = layer(inputs) - assert initial_state[0] in layer._inbound_nodes[0].input_tensors - - model = Model(inputs, output) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - main_inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [np.random.random((num_samples, units)) - for _ in range(num_states)] - targets = np.random.random((num_samples, units)) - model.train_on_batch([main_inputs] + initial_state, targets) - - -@rnn_test -def test_specify_state_with_masking(layer_class): - ''' This test based on a previously failing issue here: - https://github.com/keras-team/keras/issues/1567 - ''' - num_states = 2 if layer_class is recurrent.LSTM else 1 - - inputs = Input((timesteps, embedding_dim)) - _ = Masking()(inputs) - initial_state = [Input((units,)) for _ in range(num_states)] - output = layer_class(units)(inputs, initial_state=initial_state) - - model = Model([inputs] + initial_state, output) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - initial_state = [np.random.random((num_samples, units)) - for _ in range(num_states)] - targets = np.random.random((num_samples, units)) - model.fit([inputs] + initial_state, targets) - - -@rnn_test -def test_return_state(layer_class): - num_states = 2 if layer_class is recurrent.LSTM else 1 - - inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) - layer = layer_class(units, return_state=True, stateful=True) - outputs = layer(inputs) - output, state = outputs[0], outputs[1:] - assert len(state) == num_states - model = Model(inputs, state[0]) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - state = model.predict(inputs) - np.testing.assert_allclose(K.eval(layer.states[0]), state, atol=1e-4) - - -@rnn_test -def test_state_reuse(layer_class): - inputs = Input(batch_shape=(num_samples, timesteps, embedding_dim)) - layer = layer_class(units, return_state=True, return_sequences=True) - outputs = layer(inputs) - output, state = outputs[0], outputs[1:] - output = layer_class(units)(output, initial_state=state) - model = Model(inputs, output) - - inputs = np.random.random((num_samples, timesteps, embedding_dim)) - outputs = model.predict(inputs) - - -@rnn_test -@pytest.mark.skipif((K.backend() in ['theano']), - reason='Not supported.') -def test_state_reuse_with_dropout(layer_class): - input1 = Input(batch_shape=(num_samples, timesteps, embedding_dim)) - layer = layer_class(units, return_state=True, - return_sequences=True, dropout=0.2) - state = layer(input1)[1:] - - input2 = Input(batch_shape=(num_samples, timesteps, embedding_dim)) - output = layer_class(units)(input2, initial_state=state) - model = Model([input1, input2], output) - - inputs = [np.random.random((num_samples, timesteps, embedding_dim)), - np.random.random((num_samples, timesteps, embedding_dim))] - outputs = model.predict(inputs) - - -def test_minimal_rnn_cell_non_layer(): - - class MinimalRNNCell(object): - - def __init__(self, units, input_dim): - self.units = units - self.state_size = units - self.kernel = keras.backend.variable( - np.random.random((input_dim, units))) - - def call(self, inputs, states): - prev_output = states[0] - output = keras.backend.dot(inputs, self.kernel) + prev_output - return output, [output] - - # Basic test case. - cell = MinimalRNNCell(32, 5) - x = keras.Input((None, 5)) - layer = recurrent.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [MinimalRNNCell(8, 5), - MinimalRNNCell(32, 8), - MinimalRNNCell(32, 32)] - layer = recurrent.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - -def test_minimal_rnn_cell_non_layer_multiple_states(): - - class MinimalRNNCell(object): - - def __init__(self, units, input_dim): - self.units = units - self.state_size = (units, units) - self.kernel = keras.backend.variable( - np.random.random((input_dim, units))) - - def call(self, inputs, states): - prev_output_1 = states[0] - prev_output_2 = states[1] - output = keras.backend.dot(inputs, self.kernel) - output += prev_output_1 - output -= prev_output_2 - return output, [output * 2, output * 3] - - # Basic test case. - cell = MinimalRNNCell(32, 5) - x = keras.Input((None, 5)) - layer = recurrent.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacking. - cells = [MinimalRNNCell(8, 5), - MinimalRNNCell(16, 8), - MinimalRNNCell(32, 16)] - layer = recurrent.RNN(cells) - assert layer.cell.state_size == (8, 8, 16, 16, 32, 32) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - -def test_minimal_rnn_cell_layer(): - - class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - # no time axis in the input shape passed to RNN cells - assert len(input_shape) == 2 - - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = keras.backend.dot(inputs, self.kernel) - output = h + keras.backend.dot(prev_output, self.recurrent_kernel) - return output, [output] - - def get_config(self): - config = {'units': self.units} - base_config = super(MinimalRNNCell, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - # Test basic case. - x = keras.Input((None, 5)) - cell = MinimalRNNCell(32) - layer = recurrent.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): - layer = recurrent.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - assert_allclose(y_np, y_np_2, atol=1e-4) - - # Test stacking. - cells = [MinimalRNNCell(8), - MinimalRNNCell(12), - MinimalRNNCell(32)] - layer = recurrent.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacked RNN serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - with keras.utils.CustomObjectScope({'MinimalRNNCell': MinimalRNNCell}): - layer = recurrent.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - assert_allclose(y_np, y_np_2, atol=1e-4) - - -@rnn_cell_test -def test_builtin_rnn_cell_layer(cell_class): - # Test basic case. - x = keras.Input((None, 5)) - cell = cell_class(32) - layer = recurrent.RNN(cell) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - layer = recurrent.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - assert_allclose(y_np, y_np_2, atol=1e-4) - - # Test stacking. - cells = [cell_class(8), - cell_class(12), - cell_class(32)] - layer = recurrent.RNN(cells) - y = layer(x) - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.zeros((6, 5, 5)), np.zeros((6, 32))) - - # Test stacked RNN serialization. - x_np = np.random.random((6, 5, 5)) - y_np = model.predict(x_np) - weights = model.get_weights() - config = layer.get_config() - layer = recurrent.RNN.from_config(config) - y = layer(x) - model = keras.models.Model(x, y) - model.set_weights(weights) - y_np_2 = model.predict(x_np) - assert_allclose(y_np, y_np_2, atol=1e-4) - - -@pytest.mark.skipif((K.backend() in ['cntk', 'theano']), - reason='Not supported.') -def test_stacked_rnn_dropout(): - cells = [recurrent.LSTMCell(3, dropout=0.1, recurrent_dropout=0.1), - recurrent.LSTMCell(3, dropout=0.1, recurrent_dropout=0.1)] - layer = recurrent.RNN(cells) - - x = keras.Input((None, 5)) - y = layer(x) - model = keras.models.Model(x, y) - model.compile('sgd', 'mse') - x_np = np.random.random((6, 5, 5)) - y_np = np.random.random((6, 3)) - model.train_on_batch(x_np, y_np) - - -def test_stacked_rnn_attributes(): - cells = [recurrent.LSTMCell(3), - recurrent.LSTMCell(3, kernel_regularizer='l2')] - layer = recurrent.RNN(cells) - layer.build((None, None, 5)) - - # Test regularization losses - assert len(layer.losses) == 1 - - # Test weights - assert len(layer.trainable_weights) == 6 - cells[0].trainable = False - assert len(layer.trainable_weights) == 3 - assert len(layer.non_trainable_weights) == 3 - - # Test `get_losses_for` - x = keras.Input((None, 5)) - y = K.sum(x) - cells[0].add_loss(y, inputs=x) - assert layer.get_losses_for(x) == [y] - - -def test_stacked_rnn_compute_output_shape(): - cells = [recurrent.LSTMCell(3), - recurrent.LSTMCell(6)] - layer = recurrent.RNN(cells, return_state=True, return_sequences=True) - output_shape = layer.compute_output_shape((None, timesteps, embedding_dim)) - expected_output_shape = [(None, timesteps, 6), - (None, 3), - (None, 3), - (None, 6), - (None, 6)] - assert output_shape == expected_output_shape - - # Test reverse_state_order = True for stacked cell. - stacked_cell = recurrent.StackedRNNCells( - cells, reverse_state_order=True) - layer = recurrent.RNN( - stacked_cell, return_state=True, return_sequences=True) - output_shape = layer.compute_output_shape((None, timesteps, embedding_dim)) - expected_output_shape = [(None, timesteps, 6), - (None, 6), - (None, 6), - (None, 3), - (None, 3)] - assert output_shape == expected_output_shape - - -@rnn_test -def test_batch_size_equal_one(layer_class): - inputs = Input(batch_shape=(1, timesteps, embedding_dim)) - layer = layer_class(units) - outputs = layer(inputs) - model = Model(inputs, outputs) - model.compile('sgd', 'mse') - x = np.random.random((1, timesteps, embedding_dim)) - y = np.random.random((1, units)) - model.train_on_batch(x, y) - - -def test_rnn_cell_with_constants_layer(): - - class RNNCellWithConstants(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(RNNCellWithConstants, self).__init__(**kwargs) - - def build(self, input_shape): - if not isinstance(input_shape, list): - raise TypeError('expects constants shape') - [input_shape, constant_shape] = input_shape - # will (and should) raise if more than one constant passed - - self.input_kernel = self.add_weight( - shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.constant_kernel = self.add_weight( - shape=(constant_shape[-1], self.units), - initializer='uniform', - name='constant_kernel') - self.built = True - - def call(self, inputs, states, constants): - [prev_output] = states - [constant] = constants - h_input = keras.backend.dot(inputs, self.input_kernel) - h_state = keras.backend.dot(prev_output, self.recurrent_kernel) - h_const = keras.backend.dot(constant, self.constant_kernel) - output = h_input + h_state + h_const - return output, [output] - - def get_config(self): - config = {'units': self.units} - base_config = super(RNNCellWithConstants, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - # Test basic case. - x = keras.Input((None, 5)) - c = keras.Input((3,)) - cell = RNNCellWithConstants(32) - layer = recurrent.RNN(cell) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 3))], - np.zeros((6, 32)) - ) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} - with keras.utils.CustomObjectScope(custom_objects): - layer = recurrent.RNN.from_config(config.copy()) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, c_np]) - assert_allclose(y_np, y_np_2, atol=1e-4) - - # test flat list inputs - with keras.utils.CustomObjectScope(custom_objects): - layer = recurrent.RNN.from_config(config.copy()) - y = layer([x, c]) - model = keras.models.Model([x, c], y) - model.set_weights(weights) - y_np_3 = model.predict([x_np, c_np]) - assert_allclose(y_np, y_np_3, atol=1e-4) - - # Test stacking. - cells = [recurrent.GRUCell(8), - RNNCellWithConstants(12), - RNNCellWithConstants(32)] - layer = recurrent.RNN(cells) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 3))], - np.zeros((6, 32)) - ) - - # Test stacked RNN serialization. - x_np = np.random.random((6, 5, 5)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - with keras.utils.CustomObjectScope(custom_objects): - layer = recurrent.RNN.from_config(config.copy()) - y = layer(x, constants=c) - model = keras.models.Model([x, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, c_np]) - assert_allclose(y_np, y_np_2, atol=1e-4) - - -def test_rnn_cell_with_constants_layer_passing_initial_state(): - - class RNNCellWithConstants(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(RNNCellWithConstants, self).__init__(**kwargs) - - def build(self, input_shape): - if not isinstance(input_shape, list): - raise TypeError('expects constants shape') - [input_shape, constant_shape] = input_shape - # will (and should) raise if more than one constant passed - - self.input_kernel = self.add_weight( - shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.constant_kernel = self.add_weight( - shape=(constant_shape[-1], self.units), - initializer='uniform', - name='constant_kernel') - self.built = True - - def call(self, inputs, states, constants): - [prev_output] = states - [constant] = constants - h_input = keras.backend.dot(inputs, self.input_kernel) - h_state = keras.backend.dot(prev_output, self.recurrent_kernel) - h_const = keras.backend.dot(constant, self.constant_kernel) - output = h_input + h_state + h_const - return output, [output] - - def get_config(self): - config = {'units': self.units} - base_config = super(RNNCellWithConstants, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - # Test basic case. - x = keras.Input((None, 5)) - c = keras.Input((3,)) - s = keras.Input((32,)) - cell = RNNCellWithConstants(32) - layer = recurrent.RNN(cell) - y = layer(x, initial_state=s, constants=c) - model = keras.models.Model([x, s, c], y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 32)), np.zeros((6, 3))], - np.zeros((6, 32)) - ) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - s_np = np.random.random((6, 32)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, s_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} - with keras.utils.CustomObjectScope(custom_objects): - layer = recurrent.RNN.from_config(config.copy()) - y = layer(x, initial_state=s, constants=c) - model = keras.models.Model([x, s, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, s_np, c_np]) - assert_allclose(y_np, y_np_2, atol=1e-4) - - # verify that state is used - y_np_2_different_s = model.predict([x_np, s_np + 10., c_np]) - with pytest.raises(AssertionError): - assert_allclose(y_np, y_np_2_different_s, atol=1e-4) - - # test flat list inputs - with keras.utils.CustomObjectScope(custom_objects): - layer = recurrent.RNN.from_config(config.copy()) - y = layer([x, s, c]) - model = keras.models.Model([x, s, c], y) - model.set_weights(weights) - y_np_3 = model.predict([x_np, s_np, c_np]) - assert_allclose(y_np, y_np_3, atol=1e-4) - - -@rnn_test -def test_rnn_cell_identity_initializer(layer_class): - inputs = Input(shape=(timesteps, embedding_dim)) - layer = layer_class(units, recurrent_initializer='identity') - layer(inputs) - recurrent_kernel = layer.get_weights()[1] - num_kernels = recurrent_kernel.shape[1] // recurrent_kernel.shape[0] - assert np.array_equal(recurrent_kernel, - np.concatenate([np.identity(units)] * num_kernels, axis=1)) - - -@pytest.mark.skipif(K.backend() == 'cntk', reason='Not supported.') -def test_inconsistent_output_state_size(): - - class PlusOneRNNCell(keras.layers.Layer): - """Add one to the input and state. - - This cell is used for testing state_size and output_size.""" - - def __init__(self, num_unit, **kwargs): - self.state_size = num_unit - super(PlusOneRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.output_size = input_shape[-1] - - def call(self, inputs, states): - return inputs + 1, [states[0] + 1] - - batch = 32 - time_step = 4 - state_size = 5 - input_size = 6 - cell = PlusOneRNNCell(state_size) - x = keras.Input((None, input_size)) - layer = recurrent.RNN(cell) - y = layer(x) - - assert cell.state_size == state_size - init_state = layer.get_initial_state(x) - assert len(init_state) == 1 - if K.backend() != 'theano': - # theano does not support static shape inference. - assert K.int_shape(init_state[0]) == (None, state_size) - - model = keras.models.Model(x, y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - np.zeros((batch, time_step, input_size)), - np.zeros((batch, input_size))) - assert model.output_shape == (None, input_size) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -import copy -from numpy.testing import assert_allclose -from keras.utils import CustomObjectScope -from keras.layers import wrappers, Input, Layer -from keras.layers import RNN -from keras import layers -from keras.models import Sequential, Model, model_from_json -from keras import backend as K -from keras.utils.generic_utils import object_list_uid, to_list - - -def test_TimeDistributed(): - # first, test with Dense layer - model = Sequential() - model.add(wrappers.TimeDistributed(layers.Dense(2), input_shape=(3, 4))) - model.add(layers.Activation('relu')) - model.compile(optimizer='rmsprop', loss='mse') - model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 2)), - epochs=1, - batch_size=10) - - # test config - model.get_config() - - # test when specifying a batch_input_shape - test_input = np.random.random((1, 3, 4)) - test_output = model.predict(test_input) - weights = model.layers[0].get_weights() - - reference = Sequential() - reference.add(wrappers.TimeDistributed(layers.Dense(2), - batch_input_shape=(1, 3, 4))) - reference.add(layers.Activation('relu')) - reference.compile(optimizer='rmsprop', loss='mse') - reference.layers[0].set_weights(weights) - - reference_output = reference.predict(test_input) - assert_allclose(test_output, reference_output, atol=1e-05) - - # test with Embedding - model = Sequential() - model.add(wrappers.TimeDistributed(layers.Embedding(5, 6), - batch_input_shape=(10, 3, 4), - dtype='int32')) - model.compile(optimizer='rmsprop', loss='mse') - model.fit(np.random.randint(5, size=(10, 3, 4), dtype='int32'), - np.random.random((10, 3, 4, 6)), epochs=1, batch_size=10) - - # compare to not using batch_input_shape - test_input = np.random.randint(5, size=(10, 3, 4), dtype='int32') - test_output = model.predict(test_input) - weights = model.layers[0].get_weights() - - reference = Sequential() - reference.add(wrappers.TimeDistributed(layers.Embedding(5, 6), - input_shape=(3, 4), dtype='int32')) - reference.compile(optimizer='rmsprop', loss='mse') - reference.layers[0].set_weights(weights) - - reference_output = reference.predict(test_input) - assert_allclose(test_output, reference_output, atol=1e-05) - - # test with Conv2D - model = Sequential() - model.add(wrappers.TimeDistributed(layers.Conv2D(5, (2, 2), - padding='same'), - input_shape=(2, 4, 4, 3))) - model.add(layers.Activation('relu')) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch(np.random.random((1, 2, 4, 4, 3)), - np.random.random((1, 2, 4, 4, 5))) - - model = model_from_json(model.to_json()) - model.summary() - - # test stacked layers - model = Sequential() - model.add(wrappers.TimeDistributed(layers.Dense(2), input_shape=(3, 4))) - model.add(wrappers.TimeDistributed(layers.Dense(3))) - model.add(layers.Activation('relu')) - model.compile(optimizer='rmsprop', loss='mse') - - model.fit(np.random.random((10, 3, 4)), np.random.random((10, 3, 3)), - epochs=1, batch_size=10) - - # test wrapping Sequential model - model = Sequential() - model.add(layers.Dense(3, input_dim=2)) - outer_model = Sequential() - outer_model.add(wrappers.TimeDistributed(model, input_shape=(3, 2))) - outer_model.compile(optimizer='rmsprop', loss='mse') - outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), - epochs=1, batch_size=10) - - # test with functional API - x = Input(shape=(3, 2)) - y = wrappers.TimeDistributed(model)(x) - outer_model = Model(x, y) - outer_model.compile(optimizer='rmsprop', loss='mse') - outer_model.fit(np.random.random((10, 3, 2)), np.random.random((10, 3, 3)), - epochs=1, batch_size=10) - - # test with BatchNormalization - model = Sequential() - model.add(wrappers.TimeDistributed( - layers.BatchNormalization(center=True, scale=True), - name='bn', input_shape=(10, 2))) - model.compile(optimizer='rmsprop', loss='mse') - # Assert that mean and variance are 0 and 1. - td = model.layers[0] - assert np.array_equal(td.get_weights()[2], np.array([0, 0])) - assert np.array_equal(td.get_weights()[3], np.array([1, 1])) - # Train - model.train_on_batch(np.random.normal(loc=2, scale=2, size=(1, 10, 2)), - np.broadcast_to(np.array([0, 1]), (1, 10, 2))) - # Assert that mean and variance changed. - assert not np.array_equal(td.get_weights()[2], np.array([0, 0])) - assert not np.array_equal(td.get_weights()[3], np.array([1, 1])) - # Verify input_map has one mapping from inputs to reshaped inputs. - uid = object_list_uid(model.inputs) - assert len(td._input_map.keys()) == 1 - assert uid in td._input_map - assert K.int_shape(td._input_map[uid]) == (None, 2) - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason='Flaky with CNTK backend') -def test_TimeDistributed_learning_phase(): - # test layers that need learning_phase to be set - np.random.seed(1234) - x = Input(shape=(3, 2)) - y = wrappers.TimeDistributed(layers.Dropout(.999))(x, training=True) - model = Model(x, y) - y = model.predict(np.random.random((10, 3, 2))) - assert_allclose(np.mean(y), 0., atol=1e-1, rtol=1e-1) - - -def test_TimeDistributed_trainable(): - # test layers that need learning_phase to be set - x = Input(shape=(3, 2)) - layer = wrappers.TimeDistributed(layers.BatchNormalization()) - _ = layer(x) - assert len(layer.updates) == 2 - assert len(layer.trainable_weights) == 2 - layer.trainable = False - assert len(layer.updates) == 0 - assert len(layer.trainable_weights) == 0 - layer.trainable = True - assert len(layer.updates) == 2 - assert len(layer.trainable_weights) == 2 - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason='Unknown timestamps for RNN not supported in CNTK.') -def test_TimeDistributed_with_masked_embedding_and_unspecified_shape(): - # test with unspecified shape and Embeddings with mask_zero - model = Sequential() - model.add(wrappers.TimeDistributed(layers.Embedding(5, 6, mask_zero=True), - input_shape=(None, None))) - # the shape so far: (N, t_1, t_2, 6) - model.add(wrappers.TimeDistributed( - layers.SimpleRNN(7, return_sequences=True))) - model.add(wrappers.TimeDistributed( - layers.SimpleRNN(8, return_sequences=False))) - model.add(layers.SimpleRNN(1, return_sequences=False)) - model.compile(optimizer='rmsprop', loss='mse') - model_input = np.random.randint( - low=1, high=5, size=(10, 3, 4), dtype='int32') - for i in range(4): - model_input[i, i:, i:] = 0 - model.fit(model_input, - np.random.random((10, 1)), epochs=1, batch_size=10) - mask_outputs = [model.layers[0].compute_mask(model.input)] - for layer in model.layers[1:]: - mask_outputs.append(layer.compute_mask(layer.input, mask_outputs[-1])) - func = K.function([model.input], mask_outputs[:-1]) - mask_outputs_val = func([model_input]) - ref_mask_val_0 = model_input > 0 # embedding layer - ref_mask_val_1 = ref_mask_val_0 # first RNN layer - ref_mask_val_2 = np.any(ref_mask_val_1, axis=-1) # second RNN layer - ref_mask_val = [ref_mask_val_0, ref_mask_val_1, ref_mask_val_2] - for i in range(3): - assert np.array_equal(mask_outputs_val[i], ref_mask_val[i]) - assert mask_outputs[-1] is None # final layer - - -def test_TimeDistributed_with_masking_layer(): - # test with Masking layer - model = Sequential() - model.add(wrappers.TimeDistributed(layers.Masking(mask_value=0.,), - input_shape=(None, 4))) - model.add(wrappers.TimeDistributed(layers.Dense(5))) - model.compile(optimizer='rmsprop', loss='mse') - model_input = np.random.randint(low=1, high=5, size=(10, 3, 4)) - for i in range(4): - model_input[i, i:, :] = 0. - model.compile(optimizer='rmsprop', loss='mse') - model.fit(model_input, - np.random.random((10, 3, 5)), epochs=1, batch_size=6) - mask_outputs = [model.layers[0].compute_mask(model.input)] - mask_outputs += [model.layers[1].compute_mask(model.layers[1].input, - mask_outputs[-1])] - func = K.function([model.input], mask_outputs) - mask_outputs_val = func([model_input]) - assert np.array_equal(mask_outputs_val[0], np.any(model_input, axis=-1)) - assert np.array_equal(mask_outputs_val[1], np.any(model_input, axis=-1)) - - -def test_regularizers(): - model = Sequential() - model.add(wrappers.TimeDistributed( - layers.Dense(2, kernel_regularizer='l1'), input_shape=(3, 4))) - model.add(layers.Activation('relu')) - model.compile(optimizer='rmsprop', loss='mse') - assert len(model.layers[0].layer.losses) == 1 - assert len(model.layers[0].losses) == 1 - assert len(model.layers[0].get_losses_for(None)) == 1 - assert len(model.losses) == 1 - - model = Sequential() - model.add(wrappers.TimeDistributed( - layers.Dense(2, activity_regularizer='l1'), input_shape=(3, 4))) - model.add(layers.Activation('relu')) - model.compile(optimizer='rmsprop', loss='mse') - assert len(model.losses) == 1 - - -def test_Bidirectional(): - rnn = layers.SimpleRNN - samples = 2 - dim = 2 - timesteps = 2 - output_dim = 2 - dropout_rate = 0.2 - for mode in ['sum', 'concat']: - x = np.random.random((samples, timesteps, dim)) - target_dim = 2 * output_dim if mode == 'concat' else output_dim - y = np.random.random((samples, target_dim)) - - # test with Sequential model - model = Sequential() - model.add(wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate, - recurrent_dropout=dropout_rate), - merge_mode=mode, - input_shape=(timesteps, dim))) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - # test config - model.get_config() - model = model_from_json(model.to_json()) - model.summary() - - # test stacked bidirectional layers - model = Sequential() - model.add(wrappers.Bidirectional(rnn(output_dim, - return_sequences=True), - merge_mode=mode, - input_shape=(timesteps, dim))) - model.add(wrappers.Bidirectional(rnn(output_dim), merge_mode=mode)) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - # test with functional API - inputs = Input((timesteps, dim)) - outputs = wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate, - recurrent_dropout=dropout_rate), - merge_mode=mode)(inputs) - model = Model(inputs, outputs) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - # Bidirectional and stateful - inputs = Input(batch_shape=(1, timesteps, dim)) - outputs = wrappers.Bidirectional(rnn(output_dim, stateful=True), - merge_mode=mode)(inputs) - model = Model(inputs, outputs) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - -@pytest.mark.skipif((K.backend() == 'cntk'), - reason='Unknown timestamps not supported in CNTK.') -def test_Bidirectional_dynamic_timesteps(): - # test with functional API with dynamic length - rnn = layers.SimpleRNN - samples = 2 - dim = 2 - timesteps = 2 - output_dim = 2 - dropout_rate = 0.2 - for mode in ['sum', 'concat']: - x = np.random.random((samples, timesteps, dim)) - target_dim = 2 * output_dim if mode == 'concat' else output_dim - y = np.random.random((samples, target_dim)) - - inputs = Input((None, dim)) - outputs = wrappers.Bidirectional(rnn(output_dim, dropout=dropout_rate, - recurrent_dropout=dropout_rate), - merge_mode=mode)(inputs) - model = Model(inputs, outputs) - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1, batch_size=1) - - -@pytest.mark.parametrize('merge_mode', ['sum', 'mul', 'ave', 'concat', None]) -def test_Bidirectional_merged_value(merge_mode): - rnn = layers.LSTM - samples = 2 - dim = 5 - timesteps = 3 - units = 3 - X = [np.random.rand(samples, timesteps, dim)] - - if merge_mode == 'sum': - def merge_func(y, y_rev): return y + y_rev - elif merge_mode == 'mul': - def merge_func(y, y_rev): return y * y_rev - elif merge_mode == 'ave': - def merge_func(y, y_rev): return (y + y_rev) / 2 - elif merge_mode == 'concat': - def merge_func(y, y_rev): return np.concatenate((y, y_rev), axis=-1) - else: - def merge_func(y, y_rev): return [y, y_rev] - - # basic case - inputs = Input((timesteps, dim)) - layer = wrappers.Bidirectional(rnn(units, return_sequences=True), - merge_mode=merge_mode) - f_merged = K.function([inputs], to_list(layer(inputs))) - f_forward = K.function([inputs], [layer.forward_layer.call(inputs)]) - f_backward = K.function([inputs], - [K.reverse(layer.backward_layer.call(inputs), 1)]) - - y_merged = f_merged(X) - y_expected = to_list(merge_func(f_forward(X)[0], f_backward(X)[0])) - assert len(y_merged) == len(y_expected) - for x1, x2 in zip(y_merged, y_expected): - assert_allclose(x1, x2, atol=1e-5) - - # test return_state - inputs = Input((timesteps, dim)) - layer = wrappers.Bidirectional(rnn(units, return_state=True), - merge_mode=merge_mode) - f_merged = K.function([inputs], layer(inputs)) - f_forward = K.function([inputs], layer.forward_layer.call(inputs)) - f_backward = K.function([inputs], layer.backward_layer.call(inputs)) - n_states = len(layer.layer.states) - - y_merged = f_merged(X) - y_forward = f_forward(X) - y_backward = f_backward(X) - y_expected = to_list(merge_func(y_forward[0], y_backward[0])) - assert len(y_merged) == len(y_expected) + n_states * 2 - for x1, x2 in zip(y_merged, y_expected): - assert_allclose(x1, x2, atol=1e-5) - - # test if the state of a BiRNN is the concatenation of the underlying RNNs - y_merged = y_merged[-n_states * 2:] - y_forward = y_forward[-n_states:] - y_backward = y_backward[-n_states:] - for state_birnn, state_inner in zip(y_merged, y_forward + y_backward): - assert_allclose(state_birnn, state_inner, atol=1e-5) - - -@pytest.mark.skipif(K.backend() == 'theano', reason='Not supported.') -@pytest.mark.parametrize('merge_mode', ['sum', 'concat', None]) -def test_Bidirectional_dropout(merge_mode): - rnn = layers.LSTM - samples = 2 - dim = 5 - timesteps = 3 - units = 3 - X = [np.random.rand(samples, timesteps, dim)] - - inputs = Input((timesteps, dim)) - wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2, recurrent_dropout=0.2), - merge_mode=merge_mode) - outputs = to_list(wrapped(inputs, training=True)) - assert all(not getattr(x, '_uses_learning_phase') for x in outputs) - - inputs = Input((timesteps, dim)) - wrapped = wrappers.Bidirectional(rnn(units, dropout=0.2, return_state=True), - merge_mode=merge_mode) - outputs = to_list(wrapped(inputs)) - assert all(x._uses_learning_phase for x in outputs) - - model = Model(inputs, outputs) - assert model.uses_learning_phase - y1 = to_list(model.predict(X)) - y2 = to_list(model.predict(X)) - for x1, x2 in zip(y1, y2): - assert_allclose(x1, x2, atol=1e-5) - - -def test_Bidirectional_state_reuse(): - rnn = layers.LSTM - samples = 2 - dim = 5 - timesteps = 3 - units = 3 - - input1 = Input((timesteps, dim)) - layer = wrappers.Bidirectional(rnn(units, return_state=True, - return_sequences=True)) - state = layer(input1)[1:] - - # test passing invalid initial_state: passing a tensor - input2 = Input((timesteps, dim)) - with pytest.raises(ValueError): - output = wrappers.Bidirectional(rnn(units))( - input2, initial_state=state[0]) - - # test valid usage: passing a list - output = wrappers.Bidirectional(rnn(units))(input2, initial_state=state) - model = Model([input1, input2], output) - assert len(model.layers) == 4 - assert isinstance(model.layers[-1].input, list) - inputs = [np.random.rand(samples, timesteps, dim), - np.random.rand(samples, timesteps, dim)] - outputs = model.predict(inputs) - - -def test_Bidirectional_with_constants(): - class RNNCellWithConstants(Layer): - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(RNNCellWithConstants, self).__init__(**kwargs) - - def build(self, input_shape): - if not isinstance(input_shape, list): - raise TypeError('expects constants shape') - [input_shape, constant_shape] = input_shape - # will (and should) raise if more than one constant passed - - self.input_kernel = self.add_weight( - shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.constant_kernel = self.add_weight( - shape=(constant_shape[-1], self.units), - initializer='uniform', - name='constant_kernel') - self.built = True - - def call(self, inputs, states, constants): - [prev_output] = states - [constant] = constants - h_input = K.dot(inputs, self.input_kernel) - h_state = K.dot(prev_output, self.recurrent_kernel) - h_const = K.dot(constant, self.constant_kernel) - output = h_input + h_state + h_const - return output, [output] - - def get_config(self): - config = {'units': self.units} - base_config = super(RNNCellWithConstants, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - # Test basic case. - x = Input((5, 5)) - c = Input((3,)) - cell = RNNCellWithConstants(32) - custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} - with CustomObjectScope(custom_objects): - layer = wrappers.Bidirectional(RNN(cell)) - y = layer(x, constants=c) - model = Model([x, c], y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 3))], - np.zeros((6, 64)) - ) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - with CustomObjectScope(custom_objects): - layer = wrappers.Bidirectional.from_config(copy.deepcopy(config)) - y = layer(x, constants=c) - model = Model([x, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, c_np]) - assert_allclose(y_np, y_np_2, atol=1e-4) - - # test flat list inputs - with CustomObjectScope(custom_objects): - layer = wrappers.Bidirectional.from_config(copy.deepcopy(config)) - y = layer([x, c]) - model = Model([x, c], y) - model.set_weights(weights) - y_np_3 = model.predict([x_np, c_np]) - assert_allclose(y_np, y_np_3, atol=1e-4) - - -def test_Bidirectional_with_constants_layer_passing_initial_state(): - class RNNCellWithConstants(Layer): - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(RNNCellWithConstants, self).__init__(**kwargs) - - def build(self, input_shape): - if not isinstance(input_shape, list): - raise TypeError('expects constants shape') - [input_shape, constant_shape] = input_shape - # will (and should) raise if more than one constant passed - - self.input_kernel = self.add_weight( - shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.constant_kernel = self.add_weight( - shape=(constant_shape[-1], self.units), - initializer='uniform', - name='constant_kernel') - self.built = True - - def call(self, inputs, states, constants): - [prev_output] = states - [constant] = constants - h_input = K.dot(inputs, self.input_kernel) - h_state = K.dot(prev_output, self.recurrent_kernel) - h_const = K.dot(constant, self.constant_kernel) - output = h_input + h_state + h_const - return output, [output] - - def get_config(self): - config = {'units': self.units} - base_config = super(RNNCellWithConstants, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - # Test basic case. - x = Input((5, 5)) - c = Input((3,)) - s_for = Input((32,)) - s_bac = Input((32,)) - cell = RNNCellWithConstants(32) - custom_objects = {'RNNCellWithConstants': RNNCellWithConstants} - with CustomObjectScope(custom_objects): - layer = wrappers.Bidirectional(RNN(cell)) - y = layer(x, initial_state=[s_for, s_bac], constants=c) - model = Model([x, s_for, s_bac, c], y) - model.compile(optimizer='rmsprop', loss='mse') - model.train_on_batch( - [np.zeros((6, 5, 5)), np.zeros((6, 32)), - np.zeros((6, 32)), np.zeros((6, 3))], - np.zeros((6, 64)) - ) - - # Test basic case serialization. - x_np = np.random.random((6, 5, 5)) - s_fw_np = np.random.random((6, 32)) - s_bk_np = np.random.random((6, 32)) - c_np = np.random.random((6, 3)) - y_np = model.predict([x_np, s_fw_np, s_bk_np, c_np]) - weights = model.get_weights() - config = layer.get_config() - with CustomObjectScope(custom_objects): - layer = wrappers.Bidirectional.from_config(copy.deepcopy(config)) - y = layer(x, initial_state=[s_for, s_bac], constants=c) - model = Model([x, s_for, s_bac, c], y) - model.set_weights(weights) - y_np_2 = model.predict([x_np, s_fw_np, s_bk_np, c_np]) - assert_allclose(y_np, y_np_2, atol=1e-4) - - # verify that state is used - y_np_2_different_s = model.predict( - [x_np, s_fw_np + 10., s_bk_np + 10., c_np]) - with pytest.raises(AssertionError): - assert_allclose(y_np, y_np_2_different_s, atol=1e-4) - - # test flat list inputs - with CustomObjectScope(custom_objects): - layer = wrappers.Bidirectional.from_config(copy.deepcopy(config)) - y = layer([x, s_for, s_bac, c]) - model = Model([x, s_for, s_bac, c], y) - model.set_weights(weights) - y_np_3 = model.predict([x_np, s_fw_np, s_bk_np, c_np]) - assert_allclose(y_np, y_np_3, atol=1e-4) - - -def test_Bidirectional_trainable(): - # test layers that need learning_phase to be set - x = Input(shape=(3, 2)) - layer = wrappers.Bidirectional(layers.SimpleRNN(3)) - _ = layer(x) - assert len(layer.trainable_weights) == 6 - layer.trainable = False - assert len(layer.trainable_weights) == 0 - layer.trainable = True - assert len(layer.trainable_weights) == 6 - - -def test_Bidirectional_updates(): - x = Input(shape=(3, 2)) - layer = wrappers.Bidirectional(layers.SimpleRNN(3)) - assert len(layer.updates) == 0 - assert len(layer.get_updates_for(None)) == 0 - assert len(layer.get_updates_for(x)) == 0 - layer.forward_layer.add_update(0, inputs=x) - layer.forward_layer.add_update(1, inputs=None) - layer.backward_layer.add_update(0, inputs=x) - layer.backward_layer.add_update(1, inputs=None) - assert len(layer.updates) == 4 - assert len(layer.get_updates_for(None)) == 2 - assert len(layer.get_updates_for(x)) == 2 - - -def test_Bidirectional_losses(): - x = Input(shape=(3, 2)) - layer = wrappers.Bidirectional( - layers.SimpleRNN(3, kernel_regularizer='l1', bias_regularizer='l1')) - _ = layer(x) - assert len(layer.losses) == 4 - assert len(layer.get_losses_for(None)) == 4 - assert len(layer.get_losses_for(x)) == 0 - layer.forward_layer.add_loss(0, inputs=x) - layer.forward_layer.add_loss(1, inputs=None) - layer.backward_layer.add_loss(0, inputs=x) - layer.backward_layer.add_loss(1, inputs=None) - assert len(layer.losses) == 8 - assert len(layer.get_losses_for(None)) == 6 - assert len(layer.get_losses_for(x)) == 2 - - -if __name__ == '__main__': - pytest.main([__file__]) -import warnings -import pytest - - -@pytest.fixture(autouse=True) -def clear_session_after_test(): - """This wrapper runs for all the tests in the legacy directory (recursively). - """ - with warnings.catch_warnings(): - warnings.filterwarnings('ignore', message=r'(.+) Keras 2 ', - category=UserWarning) - yield -import pytest -import json -import keras -import keras.backend as K -import numpy as np -import os - - -def test_dense_legacy_interface(): - old_layer = keras.layers.Dense(input_dim=3, output_dim=2, name='d') - new_layer = keras.layers.Dense(2, input_shape=(3,), name='d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Dense(2, bias=False, init='normal', - W_regularizer='l1', - W_constraint='maxnorm', name='d') - new_layer = keras.layers.Dense(2, use_bias=False, - kernel_initializer='normal', - kernel_regularizer='l1', - kernel_constraint='max_norm', name='d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Dense(2, bias=True, - b_regularizer='l1', - b_constraint='maxnorm', name='d') - new_layer = keras.layers.Dense(2, use_bias=True, - bias_regularizer='l1', - bias_constraint='max_norm', name='d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_dropout_legacy_interface(): - old_layer = keras.layers.Dropout(p=3, name='drop') - new_layer1 = keras.layers.Dropout(rate=3, name='drop') - new_layer2 = keras.layers.Dropout(3, name='drop') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer1.get_config()) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer2.get_config()) - - -def test_embedding_legacy_interface(): - old_layer = keras.layers.Embedding(4, 2, name='d') - new_layer = keras.layers.Embedding(output_dim=2, input_dim=4, name='d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Embedding(input_dim=4, output_dim=2, name='d', - init='normal', - W_regularizer='l1', - W_constraint='maxnorm') - new_layer = keras.layers.Embedding(input_dim=4, output_dim=2, name='d', - embeddings_initializer='normal', - embeddings_regularizer='l1', - embeddings_constraint='max_norm') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Embedding(1, 1, dropout=0.0, name='d') - new_layer = keras.layers.Embedding(1, 1, name='d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_maxpooling1d_legacy_interface(): - old_layer = keras.layers.MaxPool1D(pool_length=2, - border_mode='valid', - name='maxpool1d') - new_layer = keras.layers.MaxPool1D(pool_size=2, - padding='valid', - name='maxpool1d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.MaxPool1D(2, padding='valid', name='maxpool1d') - new_layer = keras.layers.MaxPool1D(pool_size=2, - padding='valid', - name='maxpool1d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_avgpooling1d_legacy_interface(): - old_layer = keras.layers.AvgPool1D(pool_length=2, - border_mode='valid', - name='d') - new_layer = keras.layers.AvgPool1D(pool_size=2, padding='valid', name='d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.AvgPool1D(2, padding='valid', name='d') - new_layer = keras.layers.AvgPool1D(pool_size=2, padding='valid', name='d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_prelu_legacy_interface(): - old_layer = keras.layers.PReLU(init='zero', name='p') - new_layer = keras.layers.PReLU('zero', name='p') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_gaussiannoise_legacy_interface(): - old_layer = keras.layers.GaussianNoise(sigma=0.5, name='gn') - new_layer = keras.layers.GaussianNoise(stddev=0.5, name='gn') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_lstm_legacy_interface(): - old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d') - new_layer = keras.layers.LSTM(2, input_shape=[3, 5], name='d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d', - consume_less='mem') - new_layer = keras.layers.LSTM( - 2, input_shape=[3, 5], name='d', implementation=1) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.LSTM(input_dim=5, input_length=3, - output_dim=2, name='d', consume_less='mem') - new_layer = keras.layers.LSTM( - 2, input_shape=[3, 5], name='d', implementation=1) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.LSTM(input_dim=5, - output_dim=2, name='d', consume_less='mem') - new_layer = keras.layers.LSTM(2, input_shape=[None, 5], name='d', - implementation=1) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.LSTM(input_shape=[3, 5], output_dim=2, name='d', - consume_less='gpu') - new_layer = keras.layers.LSTM( - 2, input_shape=[3, 5], name='d', implementation=2) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.LSTM(2, init='normal', - inner_init='glorot_uniform', - forget_bias_init='one', - inner_activation='hard_sigmoid', - W_regularizer='l1', - U_regularizer='l1', - b_regularizer='l1', - dropout_W=0.1, - dropout_U=0.1, - name='LSTM') - - new_layer = keras.layers.LSTM(2, kernel_initializer='normal', - recurrent_initializer='glorot_uniform', - unit_forget_bias=True, - recurrent_activation='hard_sigmoid', - kernel_regularizer='l1', - recurrent_regularizer='l1', - bias_regularizer='l1', - dropout=0.1, - recurrent_dropout=0.1, - name='LSTM') - - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.LSTM(2, init='normal', - inner_init='glorot_uniform', - forget_bias_init='zero', - inner_activation='hard_sigmoid', - W_regularizer='l1', - U_regularizer='l1', - b_regularizer='l1', - dropout_W=0.1, - dropout_U=0.1, - name='LSTM') - - new_layer = keras.layers.LSTM(2, kernel_initializer='normal', - recurrent_initializer='glorot_uniform', - unit_forget_bias=True, - recurrent_activation='hard_sigmoid', - kernel_regularizer='l1', - recurrent_regularizer='l1', - bias_regularizer='l1', - dropout=0.1, - recurrent_dropout=0.1, - name='LSTM') - - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_simplernn_legacy_interface(): - old_layer = keras.layers.SimpleRNN( - input_shape=[3, 5], output_dim=2, name='d') - new_layer = keras.layers.SimpleRNN(2, input_shape=[3, 5], name='d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.SimpleRNN(2, init='normal', - inner_init='glorot_uniform', - W_regularizer='l1', - U_regularizer='l1', - b_regularizer='l1', - dropout_W=0.1, - dropout_U=0.1, - name='SimpleRNN') - new_layer = keras.layers.SimpleRNN(2, kernel_initializer='normal', - recurrent_initializer='glorot_uniform', - kernel_regularizer='l1', - recurrent_regularizer='l1', - bias_regularizer='l1', - dropout=0.1, - recurrent_dropout=0.1, - name='SimpleRNN') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_gru_legacy_interface(): - old_layer = keras.layers.GRU(input_shape=[3, 5], output_dim=2, name='d') - new_layer = keras.layers.GRU(2, input_shape=[3, 5], name='d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.GRU(2, init='normal', - inner_init='glorot_uniform', - inner_activation='hard_sigmoid', - W_regularizer='l1', - U_regularizer='l1', - b_regularizer='l1', - dropout_W=0.1, - dropout_U=0.1, - name='GRU') - new_layer = keras.layers.GRU(2, kernel_initializer='normal', - recurrent_initializer='glorot_uniform', - recurrent_activation='hard_sigmoid', - kernel_regularizer='l1', - recurrent_regularizer='l1', - bias_regularizer='l1', - dropout=0.1, - recurrent_dropout=0.1, - name='GRU') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_gaussiandropout_legacy_interface(): - old_layer = keras.layers.GaussianDropout(p=0.6, name='drop') - new_layer1 = keras.layers.GaussianDropout(rate=0.6, name='drop') - new_layer2 = keras.layers.GaussianDropout(0.6, name='drop') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer1.get_config()) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer2.get_config()) - - -def test_maxpooling2d_legacy_interface(): - old_layer = keras.layers.MaxPooling2D( - pool_size=(2, 2), border_mode='valid', name='maxpool2d') - new_layer = keras.layers.MaxPool2D( - pool_size=2, padding='valid', name='maxpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.MaxPooling2D((2, 2), 2, 'valid', name='maxpool2d') - new_layer = keras.layers.MaxPool2D( - pool_size=2, strides=2, padding='valid', name='maxpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.MaxPooling2D( - (2, 2), padding='valid', dim_ordering='tf', name='maxpool2d') - new_layer = keras.layers.MaxPool2D( - pool_size=2, padding='valid', data_format='channels_last', name='maxpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.MaxPooling2D( - (2, 2), padding='valid', dim_ordering='th', name='maxpool2d') - new_layer = keras.layers.MaxPool2D( - pool_size=2, padding='valid', data_format='channels_first', - name='maxpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.MaxPooling2D( - (2, 2), padding='valid', dim_ordering='default', name='maxpool2d') - new_layer = keras.layers.MaxPool2D( - pool_size=2, padding='valid', name='maxpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_avgpooling2d_legacy_interface(): - old_layer = keras.layers.AveragePooling2D( - pool_size=(2, 2), border_mode='valid', name='avgpooling2d') - new_layer = keras.layers.AvgPool2D( - pool_size=(2, 2), padding='valid', name='avgpooling2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.AveragePooling2D( - (2, 2), (2, 2), 'valid', name='avgpooling2d') - new_layer = keras.layers.AvgPool2D( - pool_size=(2, 2), strides=(2, 2), padding='valid', name='avgpooling2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.AveragePooling2D( - (2, 2), padding='valid', dim_ordering='tf', name='avgpooling2d') - new_layer = keras.layers.AvgPool2D( - pool_size=2, padding='valid', data_format='channels_last', - name='avgpooling2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.AveragePooling2D( - (2, 2), padding='valid', dim_ordering='th', name='avgpooling2d') - new_layer = keras.layers.AvgPool2D( - pool_size=2, padding='valid', data_format='channels_first', - name='avgpooling2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.AveragePooling2D( - (2, 2), padding='valid', dim_ordering='default', name='avgpooling2d') - new_layer = keras.layers.AvgPool2D( - pool_size=2, padding='valid', name='avgpooling2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_maxpooling3d_legacy_interface(): - old_layer = keras.layers.MaxPooling3D( - pool_size=(2, 2, 2), border_mode='valid', name='maxpool3d') - new_layer = keras.layers.MaxPool3D( - pool_size=(2, 2, 2), padding='valid', name='maxpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.MaxPooling3D( - (2, 2, 2), (2, 2, 2), 'valid', name='maxpool3d') - new_layer = keras.layers.MaxPool3D( - pool_size=(2, 2, 2), strides=(2, 2, 2), padding='valid', name='maxpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.MaxPooling3D( - (2, 2, 2), padding='valid', dim_ordering='tf', name='maxpool3d') - new_layer = keras.layers.MaxPool3D( - pool_size=(2, 2, 2), padding='valid', data_format='channels_last', - name='maxpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.MaxPooling3D( - (2, 2, 2), padding='valid', dim_ordering='th', name='maxpool3d') - new_layer = keras.layers.MaxPool3D( - pool_size=(2, 2, 2), padding='valid', data_format='channels_first', - name='maxpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.MaxPooling3D( - (2, 2, 2), padding='valid', dim_ordering='default', name='maxpool3d') - new_layer = keras.layers.MaxPool3D( - pool_size=(2, 2, 2), padding='valid', name='maxpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_avgpooling3d_legacy_interface(): - old_layer = keras.layers.AveragePooling3D( - pool_size=(2, 2, 2), border_mode='valid', name='avgpooling3d') - new_layer = keras.layers.AvgPool3D( - pool_size=(2, 2, 2), padding='valid', name='avgpooling3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.AveragePooling3D( - (2, 2, 2), (2, 2, 2), 'valid', name='avgpooling3d') - new_layer = keras.layers.AvgPool3D( - pool_size=(2, 2, 2), strides=(2, 2, 2), padding='valid', - name='avgpooling3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.AveragePooling3D( - (2, 2, 2), padding='valid', dim_ordering='tf', name='avgpooling3d') - new_layer = keras.layers.AvgPool3D( - pool_size=(2, 2, 2), padding='valid', data_format='channels_last', - name='avgpooling3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.AveragePooling3D( - (2, 2, 2), padding='valid', dim_ordering='th', name='avgpooling3d') - new_layer = keras.layers.AvgPool3D( - pool_size=(2, 2, 2), padding='valid', data_format='channels_first', - name='avgpooling3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.AveragePooling3D( - (2, 2, 2), padding='valid', dim_ordering='default', name='avgpooling3d') - new_layer = keras.layers.AvgPool3D( - pool_size=(2, 2, 2), padding='valid', name='avgpooling3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_global_maxpooling2d_legacy_interface(): - old_layer = keras.layers.GlobalMaxPooling2D(dim_ordering='tf', - name='global_maxpool2d') - new_layer = keras.layers.GlobalMaxPool2D(data_format='channels_last', - name='global_maxpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.GlobalMaxPooling2D(dim_ordering='th', - name='global_maxpool2d') - new_layer = keras.layers.GlobalMaxPool2D(data_format='channels_first', - name='global_maxpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.GlobalMaxPooling2D(dim_ordering='default', - name='global_maxpool2d') - new_layer = keras.layers.GlobalMaxPool2D(name='global_maxpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_global_avgpooling2d_legacy_interface(): - old_layer = keras.layers.GlobalAveragePooling2D(dim_ordering='tf', - name='global_avgpool2d') - new_layer = keras.layers.GlobalAvgPool2D(data_format='channels_last', - name='global_avgpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.GlobalAveragePooling2D(dim_ordering='th', - name='global_avgpool2d') - new_layer = keras.layers.GlobalAvgPool2D(data_format='channels_first', - name='global_avgpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.GlobalAveragePooling2D(dim_ordering='default', - name='global_avgpool2d') - new_layer = keras.layers.GlobalAvgPool2D(name='global_avgpool2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_global_maxpooling3d_legacy_interface(): - old_layer = keras.layers.GlobalMaxPooling3D(dim_ordering='tf', - name='global_maxpool3d') - new_layer = keras.layers.GlobalMaxPool3D(data_format='channels_last', - name='global_maxpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.GlobalMaxPooling3D(dim_ordering='th', - name='global_maxpool3d') - new_layer = keras.layers.GlobalMaxPool3D(data_format='channels_first', - name='global_maxpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.GlobalMaxPooling3D(dim_ordering='default', - name='global_maxpool3d') - new_layer = keras.layers.GlobalMaxPool3D(name='global_maxpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_global_avgpooling3d_legacy_interface(): - old_layer = keras.layers.GlobalAveragePooling3D(dim_ordering='tf', - name='global_avgpool3d') - new_layer = keras.layers.GlobalAvgPool3D(data_format='channels_last', - name='global_avgpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.GlobalAveragePooling3D(dim_ordering='th', - name='global_avgpool3d') - new_layer = keras.layers.GlobalAvgPool3D(data_format='channels_first', - name='global_avgpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.GlobalAveragePooling3D(dim_ordering='default', - name='global_avgpool3d') - new_layer = keras.layers.GlobalAvgPool3D(name='global_avgpool3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_upsampling1d_legacy_interface(): - old_layer = keras.layers.UpSampling1D(length=3, name='us1d') - new_layer_1 = keras.layers.UpSampling1D(size=3, name='us1d') - new_layer_2 = keras.layers.UpSampling1D(3, name='us1d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer_1.get_config()) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer_2.get_config()) - - -def test_upsampling2d_legacy_interface(): - old_layer = keras.layers.UpSampling2D( - (2, 2), dim_ordering='tf', name='us2d') - new_layer = keras.layers.UpSampling2D((2, 2), data_format='channels_last', - name='us2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_upsampling3d_legacy_interface(): - old_layer = keras.layers.UpSampling3D((2, 2, 2), - dim_ordering='tf', - name='us3d') - new_layer = keras.layers.UpSampling3D((2, 2, 2), - data_format='channels_last', - name='us3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_conv2d_legacy_interface(): - old_layer = keras.layers.Convolution2D(5, 3, 3, name='conv') - new_layer = keras.layers.Conv2D(5, (3, 3), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Convolution2D(5, 3, nb_col=3, name='conv') - new_layer = keras.layers.Conv2D(5, (3, 3), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Convolution2D(5, nb_row=3, nb_col=3, name='conv') - new_layer = keras.layers.Conv2D(5, (3, 3), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Convolution2D(5, 3, 3, - init='normal', - subsample=(2, 2), - border_mode='valid', - dim_ordering='th', - W_regularizer='l1', - b_regularizer='l2', - W_constraint='maxnorm', - b_constraint='unitnorm', - name='conv') - new_layer = keras.layers.Conv2D(5, (3, 3), - kernel_initializer='normal', - strides=(2, 2), - padding='valid', - kernel_regularizer='l1', - bias_regularizer='l2', - kernel_constraint='max_norm', - bias_constraint='unit_norm', - data_format='channels_first', - name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_deconv2d_legacy_interface(): - old_layer = keras.layers.Deconvolution2D(5, 3, 3, (6, 7, 5), name='deconv') - new_layer = keras.layers.Conv2DTranspose(5, (3, 3), name='deconv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Deconvolution2D(5, 3, 3, output_shape=(6, 7, 5), - name='deconv') - new_layer = keras.layers.Conv2DTranspose(5, (3, 3), name='deconv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Deconvolution2D(5, 3, nb_col=3, output_shape=(6, 7, 5), - name='deconv') - new_layer = keras.layers.Conv2DTranspose(5, (3, 3), name='deconv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Deconvolution2D(5, nb_row=3, nb_col=3, - output_shape=(6, 7, 5), name='deconv') - new_layer = keras.layers.Conv2DTranspose(5, (3, 3), name='deconv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Deconvolution2D(5, 3, 3, - output_shape=(6, 7, 5), - init='normal', - subsample=(2, 2), - border_mode='valid', - dim_ordering='th', - W_regularizer='l1', - b_regularizer='l2', - W_constraint='maxnorm', - b_constraint='unitnorm', - name='conv') - new_layer = keras.layers.Conv2DTranspose( - 5, (3, 3), - kernel_initializer='normal', - strides=(2, 2), - padding='valid', - kernel_regularizer='l1', - bias_regularizer='l2', - kernel_constraint='max_norm', - bias_constraint='unit_norm', - data_format='channels_first', - name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_conv1d_legacy_interface(): - old_layer = keras.layers.Convolution1D(5, - filter_length=3, - input_dim=3, - input_length=4, - name='conv') - new_layer = keras.layers.Conv1D(5, 3, name='conv', input_shape=(4, 3)) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Convolution1D(5, 3, - init='normal', - subsample_length=2, - border_mode='valid', - W_regularizer='l1', - b_regularizer='l2', - W_constraint='maxnorm', - b_constraint='unitnorm', - name='conv') - new_layer = keras.layers.Conv1D(5, 3, - kernel_initializer='normal', - strides=2, - padding='valid', - kernel_regularizer='l1', - bias_regularizer='l2', - kernel_constraint='max_norm', - bias_constraint='unit_norm', - name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_separable_conv2d_legacy_interface(): - old_layer = keras.layers.SeparableConv2D(5, 3, 3, name='conv') - new_layer = keras.layers.SeparableConv2D(5, (3, 3), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.SeparableConv2D(5, 3, nb_col=3, name='conv') - new_layer = keras.layers.SeparableConv2D(5, (3, 3), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.SeparableConv2D( - 5, nb_row=3, nb_col=3, name='conv') - new_layer = keras.layers.SeparableConv2D(5, (3, 3), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.SeparableConv2D(5, 3, 3, - init='normal', - subsample=(2, 2), - border_mode='valid', - dim_ordering='th', - depthwise_regularizer='l1', - b_regularizer='l2', - depthwise_constraint='maxnorm', - b_constraint='unitnorm', - name='conv') - new_layer = keras.layers.SeparableConv2D(5, (3, 3), - depthwise_initializer='normal', - pointwise_initializer='normal', - strides=(2, 2), - padding='valid', - depthwise_regularizer='l1', - bias_regularizer='l2', - depthwise_constraint='max_norm', - bias_constraint='unit_norm', - data_format='channels_first', - name='conv') - old_config = json.dumps(old_layer.get_config()) - new_config = json.dumps(new_layer.get_config()) - assert old_config == new_config - - -def test_conv3d_legacy_interface(): - old_layer = keras.layers.Convolution3D(5, 3, 3, 4, name='conv') - new_layer = keras.layers.Conv3D(5, (3, 3, 4), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Convolution3D(5, 3, 3, kernel_dim3=4, name='conv') - new_layer = keras.layers.Conv3D(5, (3, 3, 4), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Convolution3D(5, 3, - kernel_dim2=3, - kernel_dim3=4, - name='conv') - new_layer = keras.layers.Conv3D(5, (3, 3, 4), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Convolution3D(5, - kernel_dim1=3, - kernel_dim2=3, - kernel_dim3=4, - name='conv') - new_layer = keras.layers.Conv3D(5, (3, 3, 4), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.Convolution3D(5, 3, 3, 4, - init='normal', - subsample=(2, 2, 2), - border_mode='valid', - dim_ordering='th', - W_regularizer='l1', - b_regularizer='l2', - W_constraint='maxnorm', - b_constraint='unitnorm', - name='conv') - new_layer = keras.layers.Conv3D(5, (3, 3, 4), - kernel_initializer='normal', - strides=(2, 2, 2), - padding='valid', - kernel_regularizer='l1', - bias_regularizer='l2', - kernel_constraint='max_norm', - bias_constraint='unit_norm', - data_format='channels_first', - name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_convlstm2d_legacy_interface(): - old_layer = keras.layers.ConvLSTM2D(5, 3, 3, name='conv') - new_layer = keras.layers.ConvLSTM2D(5, (3, 3), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.ConvLSTM2D(5, 3, nb_col=3, name='conv') - new_layer = keras.layers.ConvLSTM2D(5, (3, 3), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.ConvLSTM2D(5, nb_row=3, nb_col=3, name='conv') - new_layer = keras.layers.ConvLSTM2D(5, (3, 3), name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.ConvLSTM2D(5, 3, 3, - init='normal', - inner_init='uniform', - forget_bias_init='one', - inner_activation='relu', - subsample=(2, 2), - border_mode='valid', - dim_ordering='th', - W_regularizer='l1', - U_regularizer='l2', - b_regularizer='l2', - dropout_W=0.2, - dropout_U=0.1, - name='conv') - new_layer = keras.layers.ConvLSTM2D(5, (3, 3), - kernel_initializer='normal', - recurrent_initializer='uniform', - unit_forget_bias=True, - recurrent_activation='relu', - strides=(2, 2), - padding='valid', - kernel_regularizer='l1', - recurrent_regularizer='l2', - bias_regularizer='l2', - data_format='channels_first', - dropout=0.2, - recurrent_dropout=0.1, - name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_batchnorm_legacy_interface(): - old_layer = keras.layers.BatchNormalization(mode=0, name='bn') - new_layer = keras.layers.BatchNormalization(name='bn') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - old_layer = keras.layers.BatchNormalization(mode=0, - beta_init='one', - gamma_init='uniform', - name='bn') - new_layer = keras.layers.BatchNormalization(beta_initializer='ones', - gamma_initializer='uniform', - name='bn') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_atrousconv1d_legacy_interface(): - old_layer = keras.layers.AtrousConvolution1D(5, 3, - init='normal', - subsample_length=2, - border_mode='valid', - W_regularizer='l1', - b_regularizer='l2', - W_constraint='maxnorm', - b_constraint='unitnorm', - atrous_rate=2, - name='conv') - new_layer = keras.layers.Conv1D(5, 3, - kernel_initializer='normal', - strides=2, - padding='valid', - kernel_regularizer='l1', - bias_regularizer='l2', - kernel_constraint='max_norm', - bias_constraint='unit_norm', - dilation_rate=2, - name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_atrousconv2d_legacy_interface(): - old_layer = keras.layers.AtrousConvolution2D( - 5, 3, 3, - atrous_rate=(2, 2), - init='normal', - subsample=(2, 2), - border_mode='valid', - dim_ordering='th', - W_regularizer='l1', - b_regularizer='l2', - W_constraint='maxnorm', - b_constraint='unitnorm', - name='conv') - new_layer = keras.layers.Conv2D(5, (3, 3), - kernel_initializer='normal', - strides=(2, 2), - padding='valid', - kernel_regularizer='l1', - bias_regularizer='l2', - kernel_constraint='max_norm', - bias_constraint='unit_norm', - data_format='channels_first', - dilation_rate=(2, 2), - name='conv') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_zeropadding2d_legacy_interface(): - old_layer = keras.layers.ZeroPadding2D(padding={'right_pad': 4, - 'bottom_pad': 2, - 'top_pad': 1, - 'left_pad': 3}, - dim_ordering='tf', - name='zp2d') - new_layer = keras.layers.ZeroPadding2D(((1, 2), (3, 4)), - data_format='channels_last', - name='zp2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_zeropadding3d_legacy_interface(): - old_layer = keras.layers.ZeroPadding3D((2, 2, 2), - dim_ordering='tf', - name='zp3d') - new_layer = keras.layers.ZeroPadding3D((2, 2, 2), - data_format='channels_last', - name='zp3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_cropping2d_legacy_interface(): - old_layer = keras.layers.Cropping2D(dim_ordering='tf', name='c2d') - new_layer = keras.layers.Cropping2D( - data_format='channels_last', name='c2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -def test_cropping3d_legacy_interface(): - old_layer = keras.layers.Cropping3D(dim_ordering='tf', name='c3d') - new_layer = keras.layers.Cropping3D( - data_format='channels_last', name='c3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer.get_config()) - - -@pytest.mark.skipif(K.backend() in {'tensorflow', 'cntk'} - and 'TRAVIS_PYTHON_VERSION' in os.environ, - reason='Generators cannot use `spawn`.') -def test_generator_methods_interface(): - def train_generator(): - x = np.random.randn(2, 2) - y = np.random.randint(0, 2, size=[2, 1]) - while True: - yield (x, y) - - def val_generator(): - x = np.random.randn(2, 2) - y = np.random.randint(0, 2, size=[2, 1]) - while True: - yield (x, y) - - def pred_generator(): - x = np.random.randn(1, 2) - while True: - yield x - - x = keras.layers.Input(shape=(2, )) - y = keras.layers.Dense(2)(x) - - model = keras.models.Model(inputs=x, outputs=y) - model.compile(optimizer='rmsprop', - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - model.fit_generator(generator=train_generator(), - samples_per_epoch=1, - validation_data=val_generator(), - nb_val_samples=1, - nb_worker=1, pickle_safe=True, max_q_size=3) - - model.evaluate_generator(generator=train_generator(), - val_samples=2, - nb_worker=1, pickle_safe=False, max_q_size=3) - model.predict_generator(generator=pred_generator(), - val_samples=2, - nb_worker=1, pickle_safe=False, max_q_size=3) - - -def test_spatialdropout1d_legacy_interface(): - old_layer = keras.layers.SpatialDropout1D(p=0.6, name='sd1d') - new_layer_1 = keras.layers.SpatialDropout1D(rate=0.6, name='sd1d') - new_layer_2 = keras.layers.SpatialDropout1D(0.6, name='sd1d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer_1.get_config()) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer_2.get_config()) - - -def test_spatialdropout2d_legacy_interface(): - old_layer = keras.layers.SpatialDropout2D(p=0.5, - dim_ordering='tf', - name='sd2d') - new_layer_1 = keras.layers.SpatialDropout2D(rate=0.5, - data_format='channels_last', - name='sd2d') - new_layer_2 = keras.layers.SpatialDropout2D(0.5, - data_format='channels_last', - name='sd2d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer_1.get_config()) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer_2.get_config()) - - -def test_spatialdropout3d_legacy_interface(): - old_layer = keras.layers.SpatialDropout3D(p=0.5, - dim_ordering='tf', - name='sd3d') - new_layer_1 = keras.layers.SpatialDropout3D(rate=0.5, - data_format='channels_last', - name='sd3d') - new_layer_2 = keras.layers.SpatialDropout3D(0.5, - data_format='channels_last', - name='sd3d') - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer_1.get_config()) - assert json.dumps(old_layer.get_config()) == json.dumps( - new_layer_2.get_config()) - - -def test_optimizer_get_updates_legacy_interface(): - for optimizer_cls in [keras.optimizers.RMSprop, - keras.optimizers.SGD, - keras.optimizers.Adadelta, - keras.optimizers.Adam, - keras.optimizers.Adagrad, - keras.optimizers.Nadam, - keras.optimizers.Adamax]: - optimizer = optimizer_cls() - param = keras.backend.variable(0.) - loss = keras.backend.mean(param) - constraints = {param: lambda x: x} - params = [param] - optimizer.get_updates(params, constraints, loss) - optimizer.get_updates(params, constraints, loss=loss) - optimizer.get_updates(loss, params) - optimizer.get_updates(loss, params=params) - optimizer.get_updates(loss=loss, params=params) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest - -from keras.utils.test_utils import layer_test -from keras.legacy import layers as legacy_layers -from keras import regularizers -from keras import constraints - - -def test_highway(): - layer_test(legacy_layers.Highway, - kwargs={}, - input_shape=(3, 2)) - - layer_test(legacy_layers.Highway, - kwargs={'W_regularizer': regularizers.l2(0.01), - 'b_regularizer': regularizers.l1(0.01), - 'activity_regularizer': regularizers.l2(0.01), - 'W_constraint': constraints.MaxNorm(1), - 'b_constraint': constraints.MaxNorm(1)}, - input_shape=(3, 2)) - - -def test_maxout_dense(): - layer_test(legacy_layers.MaxoutDense, - kwargs={'output_dim': 3}, - input_shape=(3, 2)) - - layer_test(legacy_layers.MaxoutDense, - kwargs={'output_dim': 3, - 'W_regularizer': regularizers.l2(0.01), - 'b_regularizer': regularizers.l1(0.01), - 'activity_regularizer': regularizers.l2(0.01), - 'W_constraint': constraints.MaxNorm(1), - 'b_constraint': constraints.MaxNorm(1)}, - input_shape=(3, 2)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from keras.utils import conv_utils -from keras import backend as K - - -def test_normalize_tuple(): - assert conv_utils.normalize_tuple(5, 2, 'kernel_size') == (5, 5) - assert conv_utils.normalize_tuple([7, 9], 2, 'kernel_size') == (7, 9) - - with pytest.raises(ValueError): - conv_utils.normalize_tuple(None, 2, 'kernel_size') - with pytest.raises(ValueError): - conv_utils.normalize_tuple([2, 3, 4], 2, 'kernel_size') - with pytest.raises(ValueError): - conv_utils.normalize_tuple(['str', 'impossible'], 2, 'kernel_size') - - -def test_invalid_data_format(): - with pytest.raises(ValueError): - K.normalize_data_format('channels_middle') - - -def test_invalid_padding(): - with pytest.raises(ValueError): - conv_utils.normalize_padding('diagonal') - - -def test_invalid_convert_kernel(): - with pytest.raises(ValueError): - conv_utils.convert_kernel(np.zeros((10, 20))) - - -def test_conv_output_length(): - assert conv_utils.conv_output_length(None, 7, 'same', 1) is None - assert conv_utils.conv_output_length(224, 7, 'same', 1) == 224 - assert conv_utils.conv_output_length(224, 7, 'same', 2) == 112 - assert conv_utils.conv_output_length(32, 5, 'valid', 1) == 28 - assert conv_utils.conv_output_length(32, 5, 'valid', 2) == 14 - assert conv_utils.conv_output_length(32, 5, 'causal', 1) == 32 - assert conv_utils.conv_output_length(32, 5, 'causal', 2) == 16 - assert conv_utils.conv_output_length(32, 5, 'full', 1) == 36 - assert conv_utils.conv_output_length(32, 5, 'full', 2) == 18 - - with pytest.raises(AssertionError): - conv_utils.conv_output_length(32, 5, 'diagonal', 2) - - -def test_conv_input_length(): - assert conv_utils.conv_input_length(None, 7, 'same', 1) is None - assert conv_utils.conv_input_length(112, 7, 'same', 1) == 112 - assert conv_utils.conv_input_length(112, 7, 'same', 2) == 223 - assert conv_utils.conv_input_length(28, 5, 'valid', 1) == 32 - assert conv_utils.conv_input_length(14, 5, 'valid', 2) == 31 - assert conv_utils.conv_input_length(36, 5, 'full', 1) == 32 - assert conv_utils.conv_input_length(18, 5, 'full', 2) == 31 - - with pytest.raises(AssertionError): - conv_utils.conv_output_length(18, 5, 'diagonal', 2) - - -def test_deconv_length(): - assert conv_utils.deconv_length(None, 1, 7, 'same', None) is None - assert conv_utils.deconv_length(224, 1, 7, 'same', None) == 224 - assert conv_utils.deconv_length(224, 2, 7, 'same', None) == 448 - assert conv_utils.deconv_length(32, 1, 5, 'valid', None) == 36 - assert conv_utils.deconv_length(32, 2, 5, 'valid', None) == 67 - assert conv_utils.deconv_length(32, 1, 5, 'full', None) == 28 - assert conv_utils.deconv_length(32, 2, 5, 'full', None) == 59 - assert conv_utils.deconv_length(224, 1, 7, 'same', 0) == 224 - assert conv_utils.deconv_length(224, 2, 7, 'same', 0) == 447 - assert conv_utils.deconv_length(224, 2, 7, 'same', 1) == 448 - assert conv_utils.deconv_length(32, 1, 5, 'valid', 0) == 36 - assert conv_utils.deconv_length(32, 2, 5, 'valid', 0) == 67 - assert conv_utils.deconv_length(32, 2, 5, 'valid', 1) == 68 - assert conv_utils.deconv_length(6, 1, 3, 'full', 0) == 4 - assert conv_utils.deconv_length(6, 2, 3, 'full', 1) == 10 - assert conv_utils.deconv_length(6, 2, 3, 'full', 2) == 11 - - -if __name__ == '__main__': - pytest.main([__file__]) -"""Tests for functions in data_utils.py. -""" -import os -import time -import sys -import tarfile -import threading -import shutil -import zipfile -from itertools import cycle -import multiprocessing as mp -import numpy as np -import pytest -import six -from six.moves.urllib.parse import urljoin -from six.moves.urllib.request import pathname2url -from six.moves import reload_module - -from flaky import flaky - -from keras.utils import GeneratorEnqueuer -from keras.utils import OrderedEnqueuer -from keras.utils import Sequence -from keras.utils.data_utils import _hash_file -from keras.utils.data_utils import get_file -from keras.utils.data_utils import validate_file -from keras import backend as K -from keras.backend import load_backend - -pytestmark = pytest.mark.skipif( - six.PY2 and 'TRAVIS_PYTHON_VERSION' in os.environ, - reason='Temporarily disabled until the use_multiprocessing problem is solved') - -skip_generators = pytest.mark.skipif(K.backend() in {'tensorflow', 'cntk'} and - 'TRAVIS_PYTHON_VERSION' in os.environ, - reason='Generators do not work with `spawn`.') - - -def use_spawn(func): - """Decorator which uses `spawn` when possible. - This is useful on Travis to avoid memory issues. - """ - - @six.wraps(func) - def wrapper(*args, **kwargs): - if sys.version_info > (3, 4) and os.name != 'nt': - mp.set_start_method('spawn', force=True) - out = func(*args, **kwargs) - mp.set_start_method('fork', force=True) - else: - out = func(*args, **kwargs) - return out - - return wrapper - - -if sys.version_info < (3,): - def next(x): - return x.next() - - -@pytest.fixture -def in_tmpdir(tmpdir): - """Runs a function in a temporary directory. - - Checks that the directory is empty afterwards. - """ - with tmpdir.as_cwd(): - yield None - assert not tmpdir.listdir() - - -def test_data_utils(in_tmpdir): - """Tests get_file from a url, plus extraction and validation. - """ - dirname = 'data_utils' - - with open('test.txt', 'w') as text_file: - text_file.write('Float like a butterfly, sting like a bee.') - - with tarfile.open('test.tar.gz', 'w:gz') as tar_file: - tar_file.add('test.txt') - - with zipfile.ZipFile('test.zip', 'w') as zip_file: - zip_file.write('test.txt') - - origin = urljoin('file://', pathname2url(os.path.abspath('test.tar.gz'))) - - path = get_file(dirname, origin, untar=True) - filepath = path + '.tar.gz' - data_keras_home = os.path.dirname( - os.path.dirname(os.path.abspath(filepath))) - assert data_keras_home == os.path.dirname(load_backend._config_path) - os.remove(filepath) - - _keras_home = os.path.join(os.path.abspath('.'), '.keras') - if not os.path.exists(_keras_home): - os.makedirs(_keras_home) - os.environ['KERAS_HOME'] = _keras_home - reload_module(load_backend) - path = get_file(dirname, origin, untar=True) - filepath = path + '.tar.gz' - data_keras_home = os.path.dirname( - os.path.dirname(os.path.abspath(filepath))) - assert data_keras_home == os.path.dirname(load_backend._config_path) - os.environ.pop('KERAS_HOME') - shutil.rmtree(_keras_home) - reload_module(load_backend) - - path = get_file(dirname, origin, untar=True) - filepath = path + '.tar.gz' - hashval_sha256 = _hash_file(filepath) - hashval_md5 = _hash_file(filepath, algorithm='md5') - path = get_file(dirname, origin, md5_hash=hashval_md5, untar=True) - path = get_file(filepath, origin, file_hash=hashval_sha256, extract=True) - assert os.path.exists(filepath) - assert validate_file(filepath, hashval_sha256) - assert validate_file(filepath, hashval_md5) - os.remove(filepath) - os.remove('test.tar.gz') - - origin = urljoin('file://', pathname2url(os.path.abspath('test.zip'))) - - hashval_sha256 = _hash_file('test.zip') - hashval_md5 = _hash_file('test.zip', algorithm='md5') - path = get_file(dirname, origin, md5_hash=hashval_md5, extract=True) - path = get_file(dirname, origin, file_hash=hashval_sha256, extract=True) - assert os.path.exists(path) - assert validate_file(path, hashval_sha256) - assert validate_file(path, hashval_md5) - - os.remove(path) - os.remove(os.path.join(os.path.dirname(path), 'test.txt')) - os.remove('test.txt') - os.remove('test.zip') - - -"""Enqueuers Tests""" - - -class threadsafe_iter: - """Takes an iterator/generator and makes it thread-safe by - serializing call to the `next` method of given iterator/generator. - """ - - def __init__(self, it): - self.it = it - self.lock = threading.Lock() - - def __iter__(self): - return self - - def __next__(self): - return self.next() - - def next(self): - with self.lock: - return next(self.it) - - -def threadsafe_generator(f): - """A decorator that takes a generator function and makes it thread-safe. - """ - - def g(*a, **kw): - return threadsafe_iter(f(*a, **kw)) - - return g - - -class DummySequence(Sequence): - def __init__(self, shape, value=1.0): - self.shape = shape - self.inner = value - - def __getitem__(self, item): - time.sleep(0.05) - return np.ones(self.shape, dtype=np.uint32) * item * self.inner - - def __len__(self): - return 100 - - def on_epoch_end(self): - self.inner *= 5.0 - - -class FaultSequence(Sequence): - def __getitem__(self, item): - raise IndexError(item, 'is not present') - - def __len__(self): - return 100 - - def on_epoch_end(self): - pass - - -@threadsafe_generator -def create_generator_from_sequence_threads(ds): - for i in cycle(range(len(ds))): - yield ds[i] - - -def create_generator_from_sequence_pcs(ds): - for i in cycle(range(len(ds))): - yield ds[i] - - -def test_generator_enqueuer_threads(): - enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads( - DummySequence([3, 10, 10, 3])), use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for i in range(100): - acc.append(int(next(gen_output)[0, 0, 0, 0])) - - """ - Not comparing the order since it is not guaranteed. - It may get ordered, but not a lot, one thread can take - the GIL before he was supposed to. - """ - assert len(set(acc) - set(range(100))) == 0, "Output is not the same" - enqueuer.stop() - - -@skip_generators -def test_generator_enqueuer_processes(): - enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs( - DummySequence([3, 10, 10, 3])), use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for i in range(100): - acc.append(int(next(gen_output)[0, 0, 0, 0])) - assert acc != list(range(100)), ('Order was keep in GeneratorEnqueuer ' - 'with processes') - enqueuer.stop() - - -def test_generator_enqueuer_threadsafe(): - enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs( - DummySequence([3, 10, 10, 3])), use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - with pytest.raises(RuntimeError) as e: - [next(gen_output) for _ in range(10)] - assert 'thread-safe' in str(e.value) - enqueuer.stop() - - -# TODO: resolve flakyness issue. Tracked with #11587 -@flaky(rerun_filter=lambda err, *args: issubclass(err[0], StopIteration)) -def test_generator_enqueuer_fail_threads(): - enqueuer = GeneratorEnqueuer(create_generator_from_sequence_threads( - FaultSequence()), use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - with pytest.raises(IndexError): - next(gen_output) - - -@skip_generators -def test_generator_enqueuer_fail_processes(): - enqueuer = GeneratorEnqueuer(create_generator_from_sequence_pcs( - FaultSequence()), use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - with pytest.raises(IndexError): - next(gen_output) - - -def test_ordered_enqueuer_threads(): - enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]), - use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for i in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - assert acc == list(range(100)), ('Order was not keep in GeneratorEnqueuer ' - 'with threads') - enqueuer.stop() - - -def test_ordered_enqueuer_threads_not_ordered(): - enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]), - use_multiprocessing=False, - shuffle=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for i in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - assert acc != list(range(100)), ('Order was not keep in GeneratorEnqueuer ' - 'with threads') - enqueuer.stop() - - -@use_spawn -def test_ordered_enqueuer_processes(): - enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]), - use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for i in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - assert acc == list(range(100)), ('Order was not keep in GeneratorEnqueuer ' - 'with processes') - enqueuer.stop() - - -def test_ordered_enqueuer_fail_threads(): - enqueuer = OrderedEnqueuer(FaultSequence(), use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - with pytest.raises(IndexError): - next(gen_output) - - -@use_spawn -def test_on_epoch_end_processes(): - enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]), - use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for i in range(200): - acc.append(next(gen_output)[0, 0, 0, 0]) - assert acc[100:] == list([k * 5 for k in range(100)]), ( - 'Order was not keep in GeneratorEnqueuer with processes') - enqueuer.stop() - - -@use_spawn -def test_context_switch(): - enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]), - use_multiprocessing=True) - enqueuer2 = OrderedEnqueuer(DummySequence([3, 10, 10, 3], value=15), - use_multiprocessing=True) - enqueuer.start(3, 10) - enqueuer2.start(3, 10) - gen_output = enqueuer.get() - gen_output2 = enqueuer2.get() - acc = [] - for i in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - assert acc[-1] == 99 - # One epoch is completed so enqueuer will switch the Sequence - - acc = [] - for i in range(100): - acc.append(next(gen_output2)[0, 0, 0, 0]) - assert acc[-1] == 99 * 15 - # One epoch has been completed so enqueuer2 will switch - - # Be sure that both Sequence were updated - assert next(gen_output)[0, 0, 0, 0] == 0 - assert next(gen_output)[0, 0, 0, 0] == 5 - assert next(gen_output2)[0, 0, 0, 0] == 0 - assert next(gen_output2)[0, 0, 0, 0] == 15 * 5 - - # Tear down everything - enqueuer.stop() - enqueuer2.stop() - - -def test_on_epoch_end_threads(): - enqueuer = OrderedEnqueuer(DummySequence([3, 10, 10, 3]), - use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for i in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - acc = [] - for i in range(100): - acc.append(next(gen_output)[0, 0, 0, 0]) - assert acc == list([k * 5 for k in range(100)]), ( - 'Order was not keep in GeneratorEnqueuer with processes') - enqueuer.stop() - - -@use_spawn -def test_ordered_enqueuer_fail_processes(): - enqueuer = OrderedEnqueuer(FaultSequence(), use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - with pytest.raises(IndexError): - next(gen_output) - - -@threadsafe_generator -def create_finite_generator_from_sequence_threads(ds): - for i in range(len(ds)): - yield ds[i] - - -def create_finite_generator_from_sequence_pcs(ds): - for i in range(len(ds)): - yield ds[i] - - -# TODO: resolve flakyness issue. Tracked with #11586 -@flaky(rerun_filter=lambda err, *args: issubclass(err[0], AssertionError)) -def test_finite_generator_enqueuer_threads(): - enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_threads( - DummySequence([3, 10, 10, 3])), use_multiprocessing=False) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for output in gen_output: - acc.append(int(output[0, 0, 0, 0])) - assert set(acc) == set(range(100)), "Output is not the same" - enqueuer.stop() - - -@skip_generators -def test_finite_generator_enqueuer_processes(): - enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_pcs( - DummySequence([3, 10, 10, 3])), use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - acc = [] - for output in gen_output: - acc.append(int(output[0, 0, 0, 0])) - assert acc != list(range(100)), ('Order was keep in GeneratorEnqueuer ' - 'with processes') - enqueuer.stop() - - -@pytest.mark.skipif('TRAVIS_PYTHON_VERSION' in os.environ, - reason='Takes 150s to run') -def test_missing_inputs(): - missing_idx = 10 - - class TimeOutSequence(DummySequence): - def __getitem__(self, item): - if item == missing_idx: - time.sleep(120) - return super(TimeOutSequence, self).__getitem__(item) - - enqueuer = GeneratorEnqueuer(create_finite_generator_from_sequence_pcs( - TimeOutSequence([3, 2, 2, 3])), use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - with pytest.warns(UserWarning, match='An input could not be retrieved.'): - for _ in range(4 * missing_idx): - next(gen_output) - - enqueuer = OrderedEnqueuer(TimeOutSequence([3, 2, 2, 3]), - use_multiprocessing=True) - enqueuer.start(3, 10) - gen_output = enqueuer.get() - warning_msg = "The input {} could not be retrieved.".format(missing_idx) - with pytest.warns(UserWarning, match=warning_msg): - for _ in range(11): - next(gen_output) - - -if __name__ == '__main__': - pytest.main([__file__]) -import sys -import pytest -import numpy as np -import marshal -from keras.utils.generic_utils import custom_object_scope -from keras.utils.generic_utils import has_arg -from keras.utils.generic_utils import Progbar -from keras.utils.generic_utils import func_dump -from keras.utils.generic_utils import func_load -from keras import activations -from keras import regularizers - - -def test_progbar(): - values_s = [None, - [['key1', 1], ['key2', 1e-4]], - [['key3', 1], ['key2', 1e-4]]] - - for target in (len(values_s) - 1, None): - for verbose in (0, 1, 2): - bar = Progbar(target, width=30, verbose=verbose, interval=0.05) - for current, values in enumerate(values_s): - bar.update(current, values=values) - - -def test_custom_objects_scope(): - - def custom_fn(): - pass - - class CustomClass(object): - pass - - with custom_object_scope({'CustomClass': CustomClass, - 'custom_fn': custom_fn}): - act = activations.get('custom_fn') - assert act == custom_fn - cl = regularizers.get('CustomClass') - assert cl.__class__ == CustomClass - - -@pytest.mark.parametrize('fn, name, accept_all, expected', [ - ('f(x)', 'x', False, True), - ('f(x)', 'y', False, False), - ('f(x)', 'y', True, False), - ('f(x, y)', 'y', False, True), - ('f(x, y=1)', 'y', False, True), - ('f(x, **kwargs)', 'x', False, True), - ('f(x, **kwargs)', 'y', False, False), - ('f(x, **kwargs)', 'y', True, True), - ('f(x, y=1, **kwargs)', 'y', False, True), - # Keyword-only arguments (Python 3 only) - ('f(x, *args, y=1)', 'y', False, True), - ('f(x, *args, y=1)', 'z', True, False), - ('f(x, *, y=1)', 'x', False, True), - ('f(x, *, y=1)', 'y', False, True), - # lambda - (lambda x: x, 'x', False, True), - (lambda x: x, 'y', False, False), - (lambda x: x, 'y', True, False), -]) -def test_has_arg(fn, name, accept_all, expected): - if isinstance(fn, str): - context = dict() - try: - exec('def {}: pass'.format(fn), context) - except SyntaxError: - if sys.version_info >= (3,): - raise - pytest.skip('Function is not compatible with Python 2') - # Sometimes exec adds builtins to the context - context.pop('__builtins__', None) - fn, = context.values() - - assert has_arg(fn, name, accept_all) is expected - - -@pytest.mark.xfail(sys.version_info < (3, 3), - reason='inspect API does not reveal positional-only arguments') -def test_has_arg_positional_only(): - assert has_arg(pow, 'x') is False - - -@pytest.mark.parametrize( - 'test_function_type', - ('simple function', 'closured function')) -def test_func_dump_and_load(test_function_type): - - if test_function_type == 'simple function': - def test_func(): - return r'\u' - - elif test_function_type == 'closured function': - def get_test_func(): - x = r'\u' - - def test_func(): - return x - return test_func - test_func = get_test_func() - else: - raise Exception('Unknown test case for test_func_dump_and_load') - - serialized = func_dump(test_func) - deserialized = func_load(serialized) - assert deserialized.__code__ == test_func.__code__ - assert deserialized.__defaults__ == test_func.__defaults__ - assert deserialized.__closure__ == test_func.__closure__ - - -def test_func_dump_and_load_closure(): - y = 0 - def test_func(x): return x + y - serialized, _, closure = func_dump(test_func) - deserialized = func_load(serialized, closure=closure) - assert deserialized.__code__ == test_func.__code__ - assert deserialized.__defaults__ == test_func.__defaults__ - assert deserialized.__closure__ == test_func.__closure__ - - -@pytest.mark.parametrize( - 'test_func', [activations.softmax, np.argmax, lambda x: x**2, lambda x: x]) -def test_func_dump_and_load_backwards_compat(test_func): - # this test ensures that models serialized prior to version 2.1.2 can still be - # deserialized - - # see: - # https://github.com/evhub/keras/blob/2.1.1/keras/utils/generic_utils.py#L166 - serialized = marshal.dumps(test_func.__code__).decode('raw_unicode_escape') - - deserialized = func_load(serialized, defaults=test_func.__defaults__) - assert deserialized.__code__ == test_func.__code__ - assert deserialized.__defaults__ == test_func.__defaults__ - assert deserialized.__closure__ == test_func.__closure__ - - -if __name__ == '__main__': - pytest.main([__file__]) -'''Tests for functions in io_utils.py. -''' -import os -import io -import pytest - -from contextlib import contextmanager - -from keras.models import Sequential -from keras.layers import Dense -from keras.utils.io_utils import HDF5Matrix -from keras.utils.io_utils import H5Dict -from keras.utils.io_utils import ask_to_proceed_with_overwrite -from keras.utils.io_utils import save_to_binary_h5py -from keras.utils.io_utils import load_from_binary_h5py -from numpy.testing import assert_allclose -from numpy.testing import assert_array_equal -import numpy as np -import six -import h5py -import tempfile -try: - from pathlib import Path - from unittest.mock import patch -except: - from pathlib2 import Path - from mock import patch - - -@pytest.fixture -def in_tmpdir(tmpdir): - """Runs a function in a temporary directory. - - Checks that the directory is empty afterwards. - """ - with tmpdir.as_cwd(): - yield None - assert not tmpdir.listdir() - - -def create_dataset(h5_path='test.h5'): - X = np.random.randn(200, 10).astype('float32') - y = np.random.randint(0, 2, size=(200, 1)) - with h5py.File(h5_path, 'w') as f: - # Creating dataset to store features - X_dset = f.create_dataset('my_data', (200, 10), dtype='f') - X_dset[:] = X - # Creating dataset to store labels - y_dset = f.create_dataset('my_labels', (200, 1), dtype='i') - y_dset[:] = y - - -def test_io_utils(in_tmpdir): - '''Tests the HDF5Matrix code using the sample from @jfsantos at - https://gist.github.com/jfsantos/e2ef822c744357a4ed16ec0c885100a3 - ''' - _, h5_path = tempfile.mkstemp('.h5') - create_dataset(h5_path) - - # Instantiating HDF5Matrix for the training set, - # which is a slice of the first 150 elements - X_train = HDF5Matrix(h5_path, 'my_data', start=0, end=150) - y_train = HDF5Matrix(h5_path, 'my_labels', start=0, end=150) - - # Likewise for the test set - X_test = HDF5Matrix(h5_path, 'my_data', start=150, end=200) - y_test = HDF5Matrix(h5_path, 'my_labels', start=150, end=200) - - # HDF5Matrix behave more or less like Numpy matrices with regards to indexing - assert y_train.shape == ( - 150, 1), 'HDF5Matrix shape should match input array' - # But they do not support negative indices, so don't try print(X_train[-1]) - - assert y_train.dtype == np.dtype('i'), ( - 'HDF5Matrix dtype should match input array') - assert y_train.ndim == 2, 'HDF5Matrix ndim should match input array' - assert y_train.size == 150, 'HDF5Matrix ndim should match input array' - - model = Sequential() - model.add(Dense(64, input_shape=(10,), activation='relu')) - model.add(Dense(1, activation='sigmoid')) - - model.compile(loss='binary_crossentropy', optimizer='sgd') - - # Note: you have to use shuffle='batch' or False with HDF5Matrix - model.fit(X_train, y_train, batch_size=32, shuffle='batch', verbose=False) - # test that evalutation and prediction don't crash and - # return reasonable results - out_pred = model.predict(X_test, batch_size=32, verbose=False) - out_eval = model.evaluate(X_test, y_test, batch_size=32, verbose=False) - - assert out_pred.shape == (50, 1), 'Prediction shape does not match' - assert out_eval.shape == (), 'Shape of evaluation does not match' - assert out_eval > 0, ( - 'Evaluation value does not meet criteria: {}'.format(out_eval)) - - # test slicing for shortened array - assert len(X_train[0:]) == len(X_train), 'Incorrect shape for sliced data' - - # test __getitem__ - with pytest.raises(IndexError): - X_train[1000] - with pytest.raises(IndexError): - X_train[1000:1001] - with pytest.raises(IndexError): - X_train[[1000, 1001]] - with pytest.raises(IndexError): - X_train[six.moves.range(1000, 1001)] - with pytest.raises(IndexError): - X_train[np.array([1000])] - with pytest.raises(TypeError): - X_train[None] - assert (X_train[0] == X_train[:1][0]).all() - assert (X_train[[0, 1]] == X_train[:2]).all() - assert (X_train[np.array([0, 1])] == X_train[:2]).all() - - # test normalizer - def normalizer(x): return x + 1 - normalized_X_train = HDF5Matrix(h5_path, 'my_data', start=0, end=150, - normalizer=normalizer) - assert np.isclose(normalized_X_train[0][0], X_train[0][0] + 1) - - # test resizing normalizer - def normalizer_rs(x): return x[:, ::2] - normalized_rs_X_train = HDF5Matrix(h5_path, 'my_data', start=0, end=150, - normalizer=normalizer_rs) - assert (normalized_rs_X_train.shape[1] == 5) - - # test dtype changing normalizer - def normalizer_dtype(x): return x.astype(np.uint8) - normalized_dtype_X_train = HDF5Matrix(h5_path, 'my_data', start=0, end=150, - normalizer=normalizer_dtype) - assert (normalized_dtype_X_train.dtype == np.uint8) - - os.remove(h5_path) - - -def test_ask_to_proceed_with_overwrite(): - with patch('six.moves.input') as mock: - mock.return_value = 'y' - assert ask_to_proceed_with_overwrite('/tmp/not_exists') - - mock.return_value = 'n' - assert not ask_to_proceed_with_overwrite('/tmp/not_exists') - - -def test_H5Dict_attrs(): - _, h5_path = tempfile.mkstemp('.h5') - - # test both HDF5 and dict implementations - paths = [h5_path, dict()] - - for path in paths: - f = H5Dict(path, mode='w') - - # str - f['x'] = 'abcd' - f['x2'] = u'abcd' - - # list - f['y'] = [b'efg', b'hij', b'klmn'] - f['y2'] = (b'asd', b'sdf', b'dfg') - - # ndarray - array = np.random.random((4, 5, 512)) - f['z'] = array - - f.close() - del f - - f = H5Dict(path, mode='r') - - assert f['x'] == 'abcd' - assert f['x2'] == u'abcd' - assert f['y'] == [b'efg', b'hij', b'klmn'] - assert list(f['y2']) == [b'asd', b'sdf', b'dfg'] - assert_allclose(f['z'], array) - - f.close() - os.remove(h5_path) - - -def test_H5Dict_groups(): - _, h5_path = tempfile.mkstemp('.h5') - - # test both HDF5 and dict implementations - paths = [h5_path, dict()] - - for path in paths: - f = H5Dict(path, mode='w') - - group1 = f['group1'] - group2 = group1['group2'] - - group2['x'] = 'abcd' - - group3 = group2['group3'] - group3['y'] = [b'efg', b'hij', b'klmn'] - - group4 = group3['group4'] - array = np.random.random((4, 5, 512)) - group4['z'] = array - - f.close() - - f = H5Dict(path, mode='r') - - assert 'group1' in f - group1 = f['group1'] - - assert 'group2' in group1 - group2 = group1['group2'] - assert group2['x'] == 'abcd' - - assert 'group3' in group2 - group3 = group2['group3'] - assert group3['y'] == [b'efg', b'hij', b'klmn'] - - assert 'group4' in group3 - group4 = group3['group4'] - assert_allclose(group4['z'], array) - - f.close() - os.remove(h5_path) - - -def test_H5Dict_accepts_pathlib_Path(): - """GitHub issue: 11459""" - _, h5_path = tempfile.mkstemp('.h5') - - f = H5Dict(Path(h5_path), mode='w') - f['x'] = 'abcd' - f.close() - del f - - f = H5Dict(Path(h5_path), mode='r') - assert f['x'] == 'abcd' - f.close() - - os.remove(h5_path) - - -@contextmanager -def temp_filename(suffix): - """Context that returns a temporary filename and deletes the file on exit if - it still exists (so that this is not forgotten). - """ - _, temp_fname = tempfile.mkstemp(suffix=suffix) - yield temp_fname - if os.path.exists(temp_fname): - os.remove(temp_fname) - - -def test_save_to_binary_h5py_direct_to_file(): - data = np.random.random((3, 5)) - - def save_function(h5file_): - h5file_['data'] = data - - with temp_filename('.h5') as fname: - with open(fname, 'wb') as f: - save_to_binary_h5py(save_function, f) - - with h5py.File(fname) as h5file: - data_rec = h5file['data'][:] - - assert_array_equal(data_rec, data) - - -def test_save_to_binary_h5py_to_bytes_io(): - data = np.random.random((3, 5)) - - def save_function(h5file_): - h5file_['data'] = data - - file_like = io.BytesIO() - save_to_binary_h5py(save_function, file_like) - - file_like.seek(0) - - with temp_filename('.h5') as fname: - with open(fname, 'wb') as f: - f.write(file_like.read()) - - with h5py.File(fname) as h5file: - data_rec = h5file['data'][:] - - assert_array_equal(data_rec, data) - - -def test_load_from_binary_h5py_direct_from_file(): - data = np.random.random((3, 5)) - - def load_function(h5file_): - return h5file_['data'][:] - - with temp_filename('.h5') as fname: - with h5py.File(fname, 'w') as h5file: - h5file['data'] = data - - with open(fname, 'rb') as f: - data_rec = load_from_binary_h5py(load_function, f) - - assert_array_equal(data_rec, data) - - -def test_load_from_binary_h5py_from_bytes_io(): - data = np.random.random((3, 5)) - - def load_function(h5file_): - return h5file_['data'][:] - - with temp_filename('.h5') as fname: - with h5py.File(fname, 'w') as h5file: - h5file['data'] = data - - file_like = io.BytesIO() - with open(fname, 'rb') as f: - file_like.write(f.read()) - - file_like.seek(0) - data_rec = load_from_binary_h5py(load_function, file_like) - - assert_array_equal(data_rec, data) - - -def test_save_load_binary_h5py(): - - data1 = np.random.random((3, 5)) - data2 = np.random.random((2, 3, 5)) - attr = 1 - datas = [data1, data2, attr] - - def save_function(h5file_): - h5file_['data1'] = data1 - h5file_['subgroup/data2'] = data2 - h5file_['data1'].attrs['attr'] = attr - - def load_function(h5file_): - d1 = h5file_['data1'][:] - d2 = h5file_['subgroup/data2'][:] - a = h5file_['data1'].attrs['attr'] - return d1, d2, a - - file_like = io.BytesIO() - save_to_binary_h5py(save_function, file_like) - file_like.seek(0) - datas_rec = load_from_binary_h5py(load_function, file_like) - for d_rec, d in zip(datas_rec, datas): - assert_array_equal(d_rec, d) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose -from keras import backend as K -from keras.layers import Conv2D -from keras.layers import Dense -from keras.layers import Flatten -from keras.models import Sequential -from keras.utils import layer_utils - - -def test_convert_weights(): - def get_model(shape, data_format): - model = Sequential() - model.add(Conv2D(filters=2, - kernel_size=(4, 3), - input_shape=shape, - data_format=data_format)) - model.add(Flatten()) - model.add(Dense(5)) - return model - - for data_format in ['channels_first', 'channels_last']: - if data_format == 'channels_first': - shape = (3, 5, 5) - target_shape = (5, 5, 3) - prev_shape = (2, 3, 2) - def flip(x): return np.flip(np.flip(x, axis=2), axis=3) - def transpose(x): return np.transpose(x, (0, 2, 3, 1)) - target_data_format = 'channels_last' - elif data_format == 'channels_last': - shape = (5, 5, 3) - target_shape = (3, 5, 5) - prev_shape = (2, 2, 3) - def flip(x): return np.flip(np.flip(x, axis=1), axis=2) - def transpose(x): return np.transpose(x, (0, 3, 1, 2)) - target_data_format = 'channels_first' - - model1 = get_model(shape, data_format) - model2 = get_model(target_shape, target_data_format) - conv = K.function([model1.input], [model1.layers[0].output]) - - x = np.random.random((1,) + shape) - - # Test equivalence of convert_all_kernels_in_model - convout1 = conv([x])[0] - layer_utils.convert_all_kernels_in_model(model1) - convout2 = flip(conv([flip(x)])[0]) - - assert_allclose(convout1, convout2, atol=1e-5) - - # Test equivalence of convert_dense_weights_data_format - out1 = model1.predict(x) - layer_utils.convert_dense_weights_data_format( - model1.layers[2], prev_shape, target_data_format) - for (src, dst) in zip(model1.layers, model2.layers): - dst.set_weights(src.get_weights()) - out2 = model2.predict(transpose(x)) - - assert_allclose(out1, out2, atol=1e-5) - - -if __name__ == '__main__': - pytest.main([__file__]) -"""These tests are not meant to be run on CI. -""" -from __future__ import print_function - -import keras -from keras import backend as K -from keras.utils import multi_gpu_model - -import numpy as np -import pytest -import time -import tempfile -import tensorflow as tf -from keras.preprocessing.image import ImageDataGenerator - - -pytestmark = pytest.mark.skipif(K.backend() != 'tensorflow', - reason='Requires TF.') -if K.backend() == 'tensorflow': - available_devices = keras.utils.multi_gpu_utils._get_available_devices() - available_devices = [keras.utils.multi_gpu_utils._normalize_device_name(name) - for name in available_devices] - pytestmark = pytest.mark.skipif('/gpu:7' not in available_devices, - reason='Requires 8 GPUs.') - - -def test_multi_gpu_simple_model(): - print('####### test simple model') - num_samples = 1000 - input_dim = 10 - output_dim = 1 - hidden_dim = 10 - gpus = 8 - target_gpu_id = [0, 2, 4] - epochs = 2 - model = keras.models.Sequential() - model.add(keras.layers.Dense(hidden_dim, - input_shape=(input_dim,))) - model.add(keras.layers.Dense(output_dim)) - - x = np.random.random((num_samples, input_dim)) - y = np.random.random((num_samples, output_dim)) - - parallel_model = multi_gpu_model(model, gpus=gpus) - parallel_model.compile(loss='mse', optimizer='rmsprop') - parallel_model.fit(x, y, epochs=epochs) - - parallel_model = multi_gpu_model(model, gpus=target_gpu_id) - parallel_model.compile(loss='mse', optimizer='rmsprop') - parallel_model.fit(x, y, epochs=epochs) - - -def test_multi_gpu_multi_io_model(): - print('####### test multi-io model') - num_samples = 1000 - input_dim_a = 10 - input_dim_b = 5 - output_dim_a = 1 - output_dim_b = 2 - hidden_dim = 10 - gpus = 8 - target_gpu_id = [0, 2, 4] - epochs = 2 - - input_a = keras.Input((input_dim_a,)) - input_b = keras.Input((input_dim_b,)) - a = keras.layers.Dense(hidden_dim)(input_a) - b = keras.layers.Dense(hidden_dim)(input_b) - c = keras.layers.concatenate([a, b]) - output_a = keras.layers.Dense(output_dim_a)(c) - output_b = keras.layers.Dense(output_dim_b)(c) - model = keras.models.Model([input_a, input_b], [output_a, output_b]) - - a_x = np.random.random((num_samples, input_dim_a)) - b_x = np.random.random((num_samples, input_dim_b)) - a_y = np.random.random((num_samples, output_dim_a)) - b_y = np.random.random((num_samples, output_dim_b)) - - parallel_model = multi_gpu_model(model, gpus=gpus) - parallel_model.compile(loss='mse', optimizer='rmsprop') - parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs) - - parallel_model = multi_gpu_model(model, gpus=target_gpu_id) - parallel_model.compile(loss='mse', optimizer='rmsprop') - parallel_model.fit([a_x, b_x], [a_y, b_y], epochs=epochs) - - -def test_multi_gpu_invalid_devices(): - input_shape = (1000, 10) - model = keras.models.Sequential() - model.add(keras.layers.Dense(10, - activation='relu', - input_shape=input_shape[1:])) - model.add(keras.layers.Dense(1, activation='sigmoid')) - - x = np.random.random(input_shape) - y = np.random.random((input_shape[0], 1)) - with pytest.raises(ValueError): - parallel_model = multi_gpu_model(model, gpus=10) - parallel_model.compile(loss='mse', optimizer='rmsprop') - parallel_model.fit(x, y, epochs=2) - - with pytest.raises(ValueError): - parallel_model = multi_gpu_model(model, gpus=[0, 2, 4, 6, 8]) - parallel_model.compile(loss='mse', optimizer='rmsprop') - parallel_model.fit(x, y, epochs=2) - - with pytest.raises(ValueError): - parallel_model = multi_gpu_model(model, gpus=1) - parallel_model.compile(loss='mse', optimizer='rmsprop') - parallel_model.fit(x, y, epochs=2) - - with pytest.raises(ValueError): - parallel_model = multi_gpu_model(model, gpus=[0]) - parallel_model.compile(loss='mse', optimizer='rmsprop') - parallel_model.fit(x, y, epochs=2) - - -def test_serialization(): - model = keras.models.Sequential() - model.add(keras.layers.Dense(3, - input_shape=(4,))) - model.add(keras.layers.Dense(4)) - - x = np.random.random((100, 4)) - y = np.random.random((100, 4)) - - parallel_model = multi_gpu_model(model, gpus=2) - parallel_model.compile(loss='mse', optimizer='rmsprop') - parallel_model.fit(x, y, epochs=1) - - ref_output = parallel_model.predict(x) - - _, fname = tempfile.mkstemp('.h5') - parallel_model.save(fname) - - K.clear_session() - parallel_model = keras.models.load_model(fname) - output = parallel_model.predict(x) - np.testing.assert_allclose(ref_output, output, atol=1e-5) - - -def multi_gpu_application_np_array_benchmark(): - print('####### Xception benchmark - np i/o') - model_cls = keras.applications.Xception - - num_samples = 1000 - height = 224 - width = 224 - num_classes = 1000 - epochs = 4 - batch_size = 40 - x = np.random.random((num_samples, height, width, 3)) - y = np.random.random((num_samples, num_classes)) - - # Baseline - model = model_cls(weights=None, - input_shape=(height, width, 3), - classes=num_classes) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop') - - # Training - start_time = time.time() - model.fit(x, y, epochs=epochs) - total_time = time.time() - start_time - print('baseline training:', total_time) - - # Inference - start_time = time.time() - model.predict(x) - total_time = time.time() - start_time - print('baseline inference:', total_time) - - for i in range(2, 9, 2): - K.clear_session() - with tf.device('/cpu:0'): - model = model_cls(weights=None, - input_shape=(height, width, 3), - classes=num_classes) - parallel_model = multi_gpu_model(model, gpus=i) - parallel_model.compile(loss='categorical_crossentropy', - optimizer='rmsprop') - - start_time = time.time() - parallel_model.fit(x, y, epochs=epochs, batch_size=batch_size) - total_time = time.time() - start_time - print('%d gpus training:' % i, total_time) - - # Inference - start_time = time.time() - parallel_model.predict(x, batch_size=batch_size) - total_time = time.time() - start_time - print('%d gpus inference:' % i, total_time) - - -def multi_gpu_application_folder_generator_benchmark(): - """Before running this test: - - wget https://s3.amazonaws.com/img-datasets/cats_and_dogs_small.zip - unzip cats_and_dogs_small.zip - """ - print('####### Xception benchmark - folder generator i/o') - model_cls = keras.applications.Xception - - height = 150 - width = 150 - num_classes = 2 - epochs = 3 - steps_per_epoch = 100 - batch_size = 64 - - # Baseline - model = model_cls(weights=None, - input_shape=(height, width, 3), - classes=num_classes) - model.compile(loss='categorical_crossentropy', - optimizer='rmsprop') - - datagen = ImageDataGenerator( - rotation_range=40, - width_shift_range=0.2, - height_shift_range=0.2, - shear_range=0.2, - zoom_range=0.2, - horizontal_flip=True, - fill_mode='nearest') - train_dir = '/home/ubuntu/cats_and_dogs_small/train' # Change this - train_gen = datagen.flow_from_directory( - train_dir, - target_size=(height, width), - batch_size=batch_size, - class_mode='categorical') - - # Training - start_time = time.time() - model.fit_generator(train_gen, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - workers=4) - total_time = time.time() - start_time - print('baseline training:', total_time) - - for i in range(2, 9): - K.clear_session() - with tf.device('/cpu:0'): - model = model_cls(weights=None, - input_shape=(height, width, 3), - classes=num_classes) - parallel_model = multi_gpu_model(model, gpus=i) - parallel_model.compile(loss='categorical_crossentropy', - optimizer='rmsprop') - - train_gen = datagen.flow_from_directory( - train_dir, - target_size=(height, width), - batch_size=batch_size, - class_mode='categorical') - - start_time = time.time() - parallel_model.fit_generator( - train_gen, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - workers=4 * i) - total_time = time.time() - start_time - print('%d gpus training:' % i, total_time) - - -def test_multi_gpu_with_multi_input_layers(): - inputs = keras.Input((4, 3)) - init_state = keras.Input((3,)) - outputs = keras.layers.SimpleRNN( - 3, return_sequences=True)(inputs, initial_state=init_state) - x = [np.random.randn(2, 4, 3), np.random.randn(2, 3)] - y = np.random.randn(2, 4, 3) - model = keras.models.Model([inputs, init_state], outputs) - parallel_model = multi_gpu_model(model, 2) - parallel_model.compile(loss='mean_squared_error', optimizer='adam') - parallel_model.train_on_batch(x, y) - - -def test_multi_gpu_with_siamese(): - input_shape = (3,) - nested_model = keras.models.Sequential([ - keras.layers.Dense(32, input_shape=input_shape), - keras.layers.Dense(1) - ], name='nested') - - input1 = keras.Input(input_shape) - input2 = keras.Input(input_shape) - score1 = nested_model(input1) - score2 = nested_model(input2) - score_sum = keras.layers.Add(name='add')([score1, score2]) - - siamese = keras.models.Model(inputs=[input1, input2], - outputs=[score_sum, score1, score2], - name='siamese') - parallel_siamese = multi_gpu_model(siamese, 2) - assert parallel_siamese.output_names == ['add', 'nested_1', 'nested_2'] - - -if __name__ == '__main__': - pytest.main([__file__]) -"""Tests for functions in np_utils.py. -""" -import numpy as np -import pytest -from keras.utils import to_categorical - - -def test_to_categorical(): - num_classes = 5 - shapes = [(1,), (3,), (4, 3), (5, 4, 3), (3, 1), (3, 2, 1)] - expected_shapes = [(1, num_classes), - (3, num_classes), - (4, 3, num_classes), - (5, 4, 3, num_classes), - (3, num_classes), - (3, 2, num_classes)] - labels = [np.random.randint(0, num_classes, shape) for shape in shapes] - one_hots = [to_categorical(label, num_classes) for label in labels] - for label, one_hot, expected_shape in zip(labels, - one_hots, - expected_shapes): - # Check shape - assert one_hot.shape == expected_shape - # Make sure there are only 0s and 1s - assert np.array_equal(one_hot, one_hot.astype(bool)) - # Make sure there is exactly one 1 in a row - assert np.all(one_hot.sum(axis=-1) == 1) - # Get original labels back from one hots - assert np.all(np.argmax(one_hot, -1).reshape(label.shape) == label) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import os -import sys -import numpy as np -from keras import Input, Model - -from keras.layers import Conv2D, Bidirectional -from keras.layers import Dense -from keras.layers import Embedding -from keras.layers import Flatten -from keras.layers import LSTM -from keras.layers import TimeDistributed -from keras.models import Sequential -from keras.utils import vis_utils - - -def test_plot_model(): - model = Sequential() - model.add(Conv2D(2, kernel_size=(2, 3), input_shape=(3, 5, 5), name='conv')) - model.add(Flatten(name='flat')) - model.add(Dense(5, name='dense1')) - vis_utils.plot_model(model, to_file='model1.png', show_layer_names=False) - os.remove('model1.png') - - model = Sequential() - model.add(LSTM(16, return_sequences=True, input_shape=(2, 3), name='lstm')) - model.add(TimeDistributed(Dense(5, name='dense2'))) - vis_utils.plot_model(model, to_file='model2.png', show_shapes=True) - os.remove('model2.png') - - inner_input = Input(shape=(2, 3), dtype='float32', name='inner_input') - inner_lstm = Bidirectional( - LSTM(16, name='inner_lstm'), name='bd')(inner_input) - encoder = Model(inner_input, inner_lstm, name='Encoder_Model') - outer_input = Input(shape=(5, 2, 3), dtype='float32', name='input') - inner_encoder = TimeDistributed(encoder, name='td_encoder')(outer_input) - lstm = LSTM(16, name='outer_lstm')(inner_encoder) - preds = Dense(5, activation='softmax', name='predictions')(lstm) - model = Model(outer_input, preds) - vis_utils.plot_model(model, to_file='model3.png', show_shapes=True, - expand_nested=True, dpi=300) - os.remove('model3.png') - - -def test_plot_sequential_embedding(): - """Fixes #11376""" - model = Sequential() - model.add(Embedding(10000, 256, input_length=400, name='embed')) - vis_utils.plot_model(model, - to_file='model1.png', - show_shapes=True, - show_layer_names=True) - os.remove('model1.png') - - -if __name__ == '__main__': - pytest.main([__file__]) -from setuptools import setup -from setuptools import find_packages - - -setup(name='keras-rl', - version='0.4.2', - description='Deep Reinforcement Learning for Keras', - author='Matthias Plappert', - author_email='matthiasplappert@me.com', - url='https://github.com/keras-rl/keras-rl', - license='MIT', - install_requires=['keras>=2.0.7'], - extras_require={ - 'gym': ['gym'], - }, - packages=find_packages()) -# -*- coding: utf-8 -*- -""" This code and the entire documentation setup was adopted from the Keras repository: -https://github.com/fchollet/keras/blob/master/docs/autogen.py -""" -from __future__ import print_function -from __future__ import unicode_literals - -import rl.agents -import rl.processors -import rl.core -import rl -import re -import inspect -import os -import shutil -import sys -if sys.version[0] == '2': - reload(sys) - sys.setdefaultencoding('utf8') - - -EXCLUDE = { - -} - -PAGES = [ - { - 'page': 'core.md', - 'all_module_classes': [rl.core], - }, - { - 'page': 'processors.md', - 'all_module_classes': [rl.processors], - }, - { - 'page': 'agents/overview.md', - 'functions': [ - rl.core.Agent.fit, - rl.core.Agent.test, - rl.core.Agent.compile, - rl.core.Agent.get_config, - rl.core.Agent.reset_states, - rl.core.Agent.load_weights, - rl.core.Agent.save_weights, - ], - }, - { - 'page': 'agents/dqn.md', - 'classes': [rl.agents.DQNAgent], - }, - { - 'page': 'agents/naf.md', - 'classes': [rl.agents.NAFAgent], - }, - { - 'page': 'agents/ddpg.md', - 'classes': [rl.agents.DDPGAgent], - }, - { - 'page': 'agents/sarsa.md', - 'classes': [rl.agents.SARSAAgent], - }, - { - 'page': 'agents/cem.md', - 'classes': [rl.agents.CEMAgent], - }, -] - - -ROOT_MODULE_NAME = 'rl.' - - -def get_earliest_class_that_defined_member(member, cls): - ancestors = get_classes_ancestors([cls]) - result = None - for ancestor in ancestors: - if member in dir(ancestor): - result = ancestor - if not result: - return cls - return result - - -def get_classes_ancestors(classes): - ancestors = [] - for cls in classes: - ancestors += cls.__bases__ - filtered_ancestors = [] - for ancestor in ancestors: - if ancestor.__name__ in ['object']: - continue - filtered_ancestors.append(ancestor) - if filtered_ancestors: - return filtered_ancestors + get_classes_ancestors(filtered_ancestors) - else: - return filtered_ancestors - - -def get_function_signature(function, method=True): - signature = getattr(function, '_legacy_support_signature', None) - if signature is None: - signature = inspect.getargspec(function) - defaults = signature.defaults - if method: - args = signature.args[1:] - else: - args = signature.args - if defaults: - kwargs = zip(args[-len(defaults):], defaults) - args = args[:-len(defaults)] - else: - kwargs = [] - st = '%s.%s(' % (function.__module__, function.__name__) - for a in args: - st += str(a) + ', ' - for a, v in kwargs: - if isinstance(v, str): - v = '\'' + v + '\'' - st += str(a) + '=' + str(v) + ', ' - if kwargs or args: - return st[:-2] + ')' - else: - return st + ')' - - -def get_class_signature(cls): - try: - class_signature = get_function_signature(cls.__init__) - class_signature = class_signature.replace('__init__', cls.__name__) - except: - # in case the class inherits from object and does not - # define __init__ - class_signature = cls.__module__ + '.' + cls.__name__ + '()' - return class_signature - - -def class_to_source_link(cls): - module_name = cls.__module__ - assert module_name.startswith(ROOT_MODULE_NAME) - path = module_name.replace('.', '/') - path += '.py' - line = inspect.getsourcelines(cls)[-1] - link = 'https://github.com/keras-rl/keras-rl/blob/master/' + \ - path + '#L' + str(line) - return '[[source]](' + link + ')' - - -def function_to_source_link(fn): - module_name = fn.__module__ - assert module_name.startswith(ROOT_MODULE_NAME) - path = module_name.replace('.', '/') - path += '.py' - line = inspect.getsourcelines(fn)[-1] - link = 'https://github.com/keras-rl/keras-rl/blob/master/' + \ - path + '#L' + str(line) - return '[[source]](' + link + ')' - - -def code_snippet(snippet): - result = '```python\n' - result += snippet + '\n' - result += '```\n' - return result - - -def process_class_docstring(docstring): - docstring = re.sub(r'\n # (.*)\n', - r'\n __\1__\n\n', - docstring) - - docstring = re.sub(r' ([^\s\\]+) \((.*)\n', - r' - __\1__ (\2\n', - docstring) - - docstring = docstring.replace(' ' * 5, '\t\t') - docstring = docstring.replace(' ' * 3, '\t') - docstring = docstring.replace(' ', '') - return docstring - - -def process_function_docstring(docstring): - docstring = re.sub(r'\n # (.*)\n', - r'\n __\1__\n\n', - docstring) - docstring = re.sub(r'\n # (.*)\n', - r'\n __\1__\n\n', - docstring) - - docstring = re.sub(r' ([^\s\\]+) \((.*)\n', - r' - __\1__ (\2\n', - docstring) - - docstring = docstring.replace(' ' * 6, '\t\t') - docstring = docstring.replace(' ' * 4, '\t') - docstring = docstring.replace(' ', '') - return docstring - - -print('Cleaning up existing sources directory.') -if os.path.exists('sources'): - shutil.rmtree('sources') - -print('Populating sources directory with templates.') -for subdir, dirs, fnames in os.walk('templates'): - for fname in fnames: - new_subdir = subdir.replace('templates', 'sources') - if not os.path.exists(new_subdir): - os.makedirs(new_subdir) - if fname[-3:] == '.md': - fpath = os.path.join(subdir, fname) - new_fpath = fpath.replace('templates', 'sources') - shutil.copy(fpath, new_fpath) - -# Take care of index page. -readme = open('../README.md').read() -index = open('templates/index.md').read() -index = index.replace('{{autogenerated}}', readme[readme.find('##'):]) -f = open('sources/index.md', 'w') -f.write(index) -f.close() - -print('Starting autogeneration.') -for page_data in PAGES: - blocks = [] - classes = page_data.get('classes', []) - for module in page_data.get('all_module_classes', []): - module_classes = [] - for name in dir(module): - if name[0] == '_' or name in EXCLUDE: - continue - module_member = getattr(module, name) - if inspect.isclass(module_member): - cls = module_member - if cls.__module__ == module.__name__: - if cls not in module_classes: - module_classes.append(cls) - module_classes.sort(key=lambda x: id(x)) - classes += module_classes - - for cls in classes: - subblocks = [] - signature = get_class_signature(cls) - subblocks.append('' + - class_to_source_link(cls) + '') - subblocks.append('### ' + cls.__name__ + '\n') - subblocks.append(code_snippet(signature)) - docstring = cls.__doc__ - if docstring: - subblocks.append(process_class_docstring(docstring)) - blocks.append('\n'.join(subblocks)) - - functions = page_data.get('functions', []) - for module in page_data.get('all_module_functions', []): - module_functions = [] - for name in dir(module): - if name[0] == '_' or name in EXCLUDE: - continue - module_member = getattr(module, name) - if inspect.isfunction(module_member): - function = module_member - if module.__name__ in function.__module__: - if function not in module_functions: - module_functions.append(function) - module_functions.sort(key=lambda x: id(x)) - functions += module_functions - - for function in functions: - subblocks = [] - signature = get_function_signature(function, method=False) - signature = signature.replace(function.__module__ + '.', '') - subblocks.append('' + - function_to_source_link(function) + '') - subblocks.append('### ' + function.__name__ + '\n') - subblocks.append(code_snippet(signature)) - docstring = function.__doc__ - if docstring: - subblocks.append(process_function_docstring(docstring)) - blocks.append('\n\n'.join(subblocks)) - - if not blocks: - raise RuntimeError('Found no content for page ' + - page_data['page']) - - mkdown = '\n----\n\n'.join(blocks) - # save module page. - # Either insert content into existing page, - # or create page otherwise - page_name = page_data['page'] - path = os.path.join('sources', page_name) - if os.path.exists(path): - template = open(path).read() - assert '{{autogenerated}}' in template, ('Template found for ' + path + - ' but missing {{autogenerated}} tag.') - mkdown = template.replace('{{autogenerated}}', mkdown) - print('...inserting autogenerated content into template:', path) - else: - print('...creating new page with autogenerated content:', path) - subdir = os.path.dirname(path) - if not os.path.exists(subdir): - os.makedirs(subdir) - open(path, 'w').write(mkdown) -import numpy as np -import gym - -from keras.models import Sequential -from keras.layers import Dense, Activation, Flatten -from keras.optimizers import Adam - -from rl.agents.cem import CEMAgent -from rl.memory import EpisodeParameterMemory - -ENV_NAME = 'CartPole-v0' - - -# Get the environment and extract the number of actions. -env = gym.make(ENV_NAME) -np.random.seed(123) -env.seed(123) - -nb_actions = env.action_space.n -obs_dim = env.observation_space.shape[0] - -# Option 1 : Simple model -model = Sequential() -model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) -model.add(Dense(nb_actions)) -model.add(Activation('softmax')) - -# Option 2: deep network -# model = Sequential() -# model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) -# model.add(Dense(16)) -# model.add(Activation('relu')) -# model.add(Dense(16)) -# model.add(Activation('relu')) -# model.add(Dense(16)) -# model.add(Activation('relu')) -# model.add(Dense(nb_actions)) -# model.add(Activation('softmax')) - - -print(model.summary()) - - -# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and -# even the metrics! -memory = EpisodeParameterMemory(limit=1000, window_length=1) - -cem = CEMAgent(model=model, nb_actions=nb_actions, memory=memory, - batch_size=50, nb_steps_warmup=2000, train_interval=50, elite_frac=0.05) -cem.compile() - -# Okay, now it's time to learn something! We visualize the training here for show, but this -# slows down training quite a lot. You can always safely abort the training prematurely using -# Ctrl + C. -cem.fit(env, nb_steps=100000, visualize=False, verbose=2) - -# After training is done, we save the best weights. -cem.save_weights('cem_{}_params.h5f'.format(ENV_NAME), overwrite=True) - -# Finally, evaluate our algorithm for 5 episodes. -cem.test(env, nb_episodes=5, visualize=True) -import numpy as np - -import gym -from gym import wrappers - -from keras.models import Sequential, Model -from keras.layers import Dense, Activation, Flatten, Input, Concatenate -from keras.optimizers import Adam - -from rl.processors import WhiteningNormalizerProcessor -from rl.agents import DDPGAgent -from rl.memory import SequentialMemory -from rl.random import OrnsteinUhlenbeckProcess - - -class MujocoProcessor(WhiteningNormalizerProcessor): - def process_action(self, action): - return np.clip(action, -1., 1.) - - -ENV_NAME = 'HalfCheetah-v1' - - -# Get the environment and extract the number of actions. -env = gym.make(ENV_NAME) -env = wrappers.Monitor(env, '/tmp/{}'.format(ENV_NAME), force=True) -np.random.seed(123) -env.seed(123) -assert len(env.action_space.shape) == 1 -nb_actions = env.action_space.shape[0] - -# Next, we build a very simple model. -actor = Sequential() -actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) -actor.add(Dense(400)) -actor.add(Activation('relu')) -actor.add(Dense(300)) -actor.add(Activation('relu')) -actor.add(Dense(nb_actions)) -actor.add(Activation('tanh')) -print(actor.summary()) - -action_input = Input(shape=(nb_actions,), name='action_input') -observation_input = Input( - shape=(1,) + env.observation_space.shape, name='observation_input') -flattened_observation = Flatten()(observation_input) -x = Dense(400)(flattened_observation) -x = Activation('relu')(x) -x = Concatenate()([x, action_input]) -x = Dense(300)(x) -x = Activation('relu')(x) -x = Dense(1)(x) -x = Activation('linear')(x) -critic = Model(inputs=[action_input, observation_input], outputs=x) -print(critic.summary()) - -# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and -# even the metrics! -memory = SequentialMemory(limit=100000, window_length=1) -random_process = OrnsteinUhlenbeckProcess( - size=nb_actions, theta=.15, mu=0., sigma=.1) -agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, - memory=memory, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, - random_process=random_process, gamma=.99, target_model_update=1e-3, - processor=MujocoProcessor()) -agent.compile([Adam(lr=1e-4), Adam(lr=1e-3)], metrics=['mae']) - -# Okay, now it's time to learn something! We visualize the training here for show, but this -# slows down training quite a lot. You can always safely abort the training prematurely using -# Ctrl + C. -agent.fit(env, nb_steps=1000000, visualize=False, verbose=1) - -# After training is done, we save the final weights. -agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) - -# Finally, evaluate our algorithm for 5 episodes. -agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200) -import numpy as np -import gym - -from keras.models import Sequential, Model -from keras.layers import Dense, Activation, Flatten, Input, Concatenate -from keras.optimizers import Adam - -from rl.agents import DDPGAgent -from rl.memory import SequentialMemory -from rl.random import OrnsteinUhlenbeckProcess - - -ENV_NAME = 'Pendulum-v0' - - -# Get the environment and extract the number of actions. -env = gym.make(ENV_NAME) -np.random.seed(123) -env.seed(123) -assert len(env.action_space.shape) == 1 -nb_actions = env.action_space.shape[0] - -# Next, we build a very simple model. -actor = Sequential() -actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) -actor.add(Dense(16)) -actor.add(Activation('relu')) -actor.add(Dense(16)) -actor.add(Activation('relu')) -actor.add(Dense(16)) -actor.add(Activation('relu')) -actor.add(Dense(nb_actions)) -actor.add(Activation('linear')) -print(actor.summary()) - -action_input = Input(shape=(nb_actions,), name='action_input') -observation_input = Input( - shape=(1,) + env.observation_space.shape, name='observation_input') -flattened_observation = Flatten()(observation_input) -x = Concatenate()([action_input, flattened_observation]) -x = Dense(32)(x) -x = Activation('relu')(x) -x = Dense(32)(x) -x = Activation('relu')(x) -x = Dense(32)(x) -x = Activation('relu')(x) -x = Dense(1)(x) -x = Activation('linear')(x) -critic = Model(inputs=[action_input, observation_input], outputs=x) -print(critic.summary()) - -# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and -# even the metrics! -memory = SequentialMemory(limit=100000, window_length=1) -random_process = OrnsteinUhlenbeckProcess( - size=nb_actions, theta=.15, mu=0., sigma=.3) -agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, - memory=memory, nb_steps_warmup_critic=100, nb_steps_warmup_actor=100, - random_process=random_process, gamma=.99, target_model_update=1e-3) -agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) - -# Okay, now it's time to learn something! We visualize the training here for show, but this -# slows down training quite a lot. You can always safely abort the training prematurely using -# Ctrl + C. -agent.fit(env, nb_steps=50000, visualize=True, - verbose=1, nb_max_episode_steps=200) - -# After training is done, we save the final weights. -agent.save_weights('ddpg_{}_weights.h5f'.format(ENV_NAME), overwrite=True) - -# Finally, evaluate our algorithm for 5 episodes. -agent.test(env, nb_episodes=5, visualize=True, nb_max_episode_steps=200) -from __future__ import division -import argparse - -from PIL import Image -import numpy as np -import gym - -from keras.models import Sequential -from keras.layers import Dense, Activation, Flatten, Convolution2D, Permute -from keras.optimizers import Adam -import keras.backend as K - -from rl.agents.dqn import DQNAgent -from rl.policy import LinearAnnealedPolicy, BoltzmannQPolicy, EpsGreedyQPolicy -from rl.memory import SequentialMemory -from rl.core import Processor -from rl.callbacks import FileLogger, ModelIntervalCheckpoint - - -INPUT_SHAPE = (84, 84) -WINDOW_LENGTH = 4 - - -class AtariProcessor(Processor): - def process_observation(self, observation): - assert observation.ndim == 3 # (height, width, channel) - img = Image.fromarray(observation) - img = img.resize(INPUT_SHAPE).convert( - 'L') # resize and convert to grayscale - processed_observation = np.array(img) - assert processed_observation.shape == INPUT_SHAPE - # saves storage in experience memory - return processed_observation.astype('uint8') - - def process_state_batch(self, batch): - # We could perform this processing step in `process_observation`. In this case, however, - # we would need to store a `float32` array instead, which is 4x more memory intensive than - # an `uint8` array. This matters if we store 1M observations. - processed_batch = batch.astype('float32') / 255. - return processed_batch - - def process_reward(self, reward): - return np.clip(reward, -1., 1.) - - -parser = argparse.ArgumentParser() -parser.add_argument('--mode', choices=['train', 'test'], default='train') -parser.add_argument('--env-name', type=str, default='BreakoutDeterministic-v4') -parser.add_argument('--weights', type=str, default=None) -args = parser.parse_args() - -# Get the environment and extract the number of actions. -env = gym.make(args.env_name) -np.random.seed(123) -env.seed(123) -nb_actions = env.action_space.n - -# Next, we build our model. We use the same model that was described by Mnih et al. (2015). -input_shape = (WINDOW_LENGTH,) + INPUT_SHAPE -model = Sequential() -if K.image_dim_ordering() == 'tf': - # (width, height, channels) - model.add(Permute((2, 3, 1), input_shape=input_shape)) -elif K.image_dim_ordering() == 'th': - # (channels, width, height) - model.add(Permute((1, 2, 3), input_shape=input_shape)) -else: - raise RuntimeError('Unknown image_dim_ordering.') -model.add(Convolution2D(32, (8, 8), strides=(4, 4))) -model.add(Activation('relu')) -model.add(Convolution2D(64, (4, 4), strides=(2, 2))) -model.add(Activation('relu')) -model.add(Convolution2D(64, (3, 3), strides=(1, 1))) -model.add(Activation('relu')) -model.add(Flatten()) -model.add(Dense(512)) -model.add(Activation('relu')) -model.add(Dense(nb_actions)) -model.add(Activation('linear')) -print(model.summary()) - -# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and -# even the metrics! -memory = SequentialMemory(limit=1000000, window_length=WINDOW_LENGTH) -processor = AtariProcessor() - -# Select a policy. We use eps-greedy action selection, which means that a random action is selected -# with probability eps. We anneal eps from 1.0 to 0.1 over the course of 1M steps. This is done so that -# the agent initially explores the environment (high eps) and then gradually sticks to what it knows -# (low eps). We also set a dedicated eps value that is used during testing. Note that we set it to 0.05 -# so that the agent still performs some random actions. This ensures that the agent cannot get stuck. -policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr='eps', value_max=1., value_min=.1, value_test=.05, - nb_steps=1000000) - -# The trade-off between exploration and exploitation is difficult and an on-going research topic. -# If you want, you can experiment with the parameters or use a different policy. Another popular one -# is Boltzmann-style exploration: -# policy = BoltzmannQPolicy(tau=1.) -# Feel free to give it a try! - -dqn = DQNAgent(model=model, nb_actions=nb_actions, policy=policy, memory=memory, - processor=processor, nb_steps_warmup=50000, gamma=.99, target_model_update=10000, - train_interval=4, delta_clip=1.) -dqn.compile(Adam(lr=.00025), metrics=['mae']) - -if args.mode == 'train': - # Okay, now it's time to learn something! We capture the interrupt exception so that training - # can be prematurely aborted. Notice that now you can use the built-in Keras callbacks! - weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) - checkpoint_weights_filename = 'dqn_' + \ - args.env_name + '_weights_{step}.h5f' - log_filename = 'dqn_{}_log.json'.format(args.env_name) - callbacks = [ModelIntervalCheckpoint( - checkpoint_weights_filename, interval=250000)] - callbacks += [FileLogger(log_filename, interval=100)] - dqn.fit(env, callbacks=callbacks, nb_steps=1750000, log_interval=10000) - - # After training is done, we save the final weights one more time. - dqn.save_weights(weights_filename, overwrite=True) - - # Finally, evaluate our algorithm for 10 episodes. - dqn.test(env, nb_episodes=10, visualize=False) -elif args.mode == 'test': - weights_filename = 'dqn_{}_weights.h5f'.format(args.env_name) - if args.weights: - weights_filename = args.weights - dqn.load_weights(weights_filename) - dqn.test(env, nb_episodes=10, visualize=True) -import numpy as np -import gym - -from keras.models import Sequential -from keras.layers import Dense, Activation, Flatten -from keras.optimizers import Adam - -from rl.agents.dqn import DQNAgent -from rl.policy import BoltzmannQPolicy -from rl.memory import SequentialMemory - - -ENV_NAME = 'CartPole-v0' - - -# Get the environment and extract the number of actions. -env = gym.make(ENV_NAME) -np.random.seed(123) -env.seed(123) -nb_actions = env.action_space.n - -# Next, we build a very simple model. -model = Sequential() -model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) -model.add(Dense(16)) -model.add(Activation('relu')) -model.add(Dense(16)) -model.add(Activation('relu')) -model.add(Dense(16)) -model.add(Activation('relu')) -model.add(Dense(nb_actions)) -model.add(Activation('linear')) -print(model.summary()) - -# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and -# even the metrics! -memory = SequentialMemory(limit=50000, window_length=1) -policy = BoltzmannQPolicy() -dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, - target_model_update=1e-2, policy=policy) -dqn.compile(Adam(lr=1e-3), metrics=['mae']) - -# Okay, now it's time to learn something! We visualize the training here for show, but this -# slows down training quite a lot. You can always safely abort the training prematurely using -# Ctrl + C. -dqn.fit(env, nb_steps=50000, visualize=True, verbose=2) - -# After training is done, we save the final weights. -dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) - -# Finally, evaluate our algorithm for 5 episodes. -dqn.test(env, nb_episodes=5, visualize=True) -import numpy as np -import gym - -from keras.models import Sequential -from keras.layers import Dense, Activation, Flatten -from keras.optimizers import Adam - -from rl.agents.dqn import DQNAgent -from rl.policy import BoltzmannQPolicy -from rl.memory import SequentialMemory - - -ENV_NAME = 'CartPole-v0' - - -# Get the environment and extract the number of actions. -env = gym.make(ENV_NAME) -np.random.seed(123) -env.seed(123) -nb_actions = env.action_space.n - -# Next, we build a very simple model regardless of the dueling architecture -# if you enable dueling network in DQN , DQN will build a dueling network base on your model automatically -# Also, you can build a dueling network by yourself and turn off the dueling network in DQN. -model = Sequential() -model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) -model.add(Dense(16)) -model.add(Activation('relu')) -model.add(Dense(16)) -model.add(Activation('relu')) -model.add(Dense(16)) -model.add(Activation('relu')) -model.add(Dense(nb_actions, activation='linear')) -print(model.summary()) - -# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and -# even the metrics! -memory = SequentialMemory(limit=50000, window_length=1) -policy = BoltzmannQPolicy() -# enable the dueling network -# you can specify the dueling_type to one of {'avg','max','naive'} -dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10, - enable_dueling_network=True, dueling_type='avg', target_model_update=1e-2, policy=policy) -dqn.compile(Adam(lr=1e-3), metrics=['mae']) - -# Okay, now it's time to learn something! We visualize the training here for show, but this -# slows down training quite a lot. You can always safely abort the training prematurely using -# Ctrl + C. -dqn.fit(env, nb_steps=50000, visualize=False, verbose=2) - -# After training is done, we save the final weights. -dqn.save_weights('duel_dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) - -# Finally, evaluate our algorithm for 5 episodes. -dqn.test(env, nb_episodes=5, visualize=False) -import numpy as np -import gym - -from keras.models import Sequential, Model -from keras.layers import Dense, Activation, Flatten, Input, Concatenate -from keras.optimizers import Adam - -from rl.agents import NAFAgent -from rl.memory import SequentialMemory -from rl.random import OrnsteinUhlenbeckProcess -from rl.core import Processor - - -class PendulumProcessor(Processor): - def process_reward(self, reward): - # The magnitude of the reward can be important. Since each step yields a relatively - # high reward, we reduce the magnitude by two orders. - return reward / 100. - - -ENV_NAME = 'Pendulum-v0' - - -# Get the environment and extract the number of actions. -env = gym.make(ENV_NAME) -np.random.seed(123) -env.seed(123) -assert len(env.action_space.shape) == 1 -nb_actions = env.action_space.shape[0] - -# Build all necessary models: V, mu, and L networks. -V_model = Sequential() -V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) -V_model.add(Dense(16)) -V_model.add(Activation('relu')) -V_model.add(Dense(16)) -V_model.add(Activation('relu')) -V_model.add(Dense(16)) -V_model.add(Activation('relu')) -V_model.add(Dense(1)) -V_model.add(Activation('linear')) -print(V_model.summary()) - -mu_model = Sequential() -mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) -mu_model.add(Dense(16)) -mu_model.add(Activation('relu')) -mu_model.add(Dense(16)) -mu_model.add(Activation('relu')) -mu_model.add(Dense(16)) -mu_model.add(Activation('relu')) -mu_model.add(Dense(nb_actions)) -mu_model.add(Activation('linear')) -print(mu_model.summary()) - -action_input = Input(shape=(nb_actions,), name='action_input') -observation_input = Input( - shape=(1,) + env.observation_space.shape, name='observation_input') -x = Concatenate()([action_input, Flatten()(observation_input)]) -x = Dense(32)(x) -x = Activation('relu')(x) -x = Dense(32)(x) -x = Activation('relu')(x) -x = Dense(32)(x) -x = Activation('relu')(x) -x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) -x = Activation('linear')(x) -L_model = Model(inputs=[action_input, observation_input], outputs=x) -print(L_model.summary()) - -# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and -# even the metrics! -processor = PendulumProcessor() -memory = SequentialMemory(limit=100000, window_length=1) -random_process = OrnsteinUhlenbeckProcess( - theta=.15, mu=0., sigma=.3, size=nb_actions) -agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, - memory=memory, nb_steps_warmup=100, random_process=random_process, - gamma=.99, target_model_update=1e-3, processor=processor) -agent.compile(Adam(lr=.001, clipnorm=1.), metrics=['mae']) - -# Okay, now it's time to learn something! We visualize the training here for show, but this -# slows down training quite a lot. You can always safely abort the training prematurely using -# Ctrl + C. -agent.fit(env, nb_steps=50000, visualize=True, - verbose=1, nb_max_episode_steps=200) - -# After training is done, we save the final weights. -agent.save_weights('cdqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True) - -# Finally, evaluate our algorithm for 5 episodes. -agent.test(env, nb_episodes=10, visualize=True, nb_max_episode_steps=200) -import numpy as np -import gym - -from keras.models import Sequential -from keras.layers import Dense, Activation, Flatten -from keras.optimizers import Adam - -from rl.agents import SARSAAgent -from rl.policy import BoltzmannQPolicy - - -ENV_NAME = 'CartPole-v0' - -# Get the environment and extract the number of actions. -env = gym.make(ENV_NAME) -np.random.seed(123) -env.seed(123) -nb_actions = env.action_space.n - -# Next, we build a very simple model. -model = Sequential() -model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) -model.add(Dense(16)) -model.add(Activation('relu')) -model.add(Dense(16)) -model.add(Activation('relu')) -model.add(Dense(16)) -model.add(Activation('relu')) -model.add(Dense(nb_actions)) -model.add(Activation('linear')) -print(model.summary()) - -# SARSA does not require a memory. -policy = BoltzmannQPolicy() -sarsa = SARSAAgent(model=model, nb_actions=nb_actions, - nb_steps_warmup=10, policy=policy) -sarsa.compile(Adam(lr=1e-3), metrics=['mae']) - -# Okay, now it's time to learn something! We visualize the training here for show, but this -# slows down training quite a lot. You can always safely abort the training prematurely using -# Ctrl + C. -sarsa.fit(env, nb_steps=50000, visualize=False, verbose=2) - -# After training is done, we save the final weights. -sarsa.save_weights('sarsa_{}_weights.h5f'.format(ENV_NAME), overwrite=True) - -# Finally, evaluate our algorithm for 5 episodes. -sarsa.test(env, nb_episodes=5, visualize=True) -import argparse -import json - -import matplotlib.pyplot as plt - - -def visualize_log(filename, figsize=None, output=None): - with open(filename, 'r') as f: - data = json.load(f) - if 'episode' not in data: - raise ValueError( - 'Log file "{}" does not contain the "episode" key.'.format(filename)) - episodes = data['episode'] - - # Get value keys. The x axis is shared and is the number of episodes. - keys = sorted(list(set(data.keys()).difference(set(['episode'])))) - - if figsize is None: - figsize = (15., 5. * len(keys)) - f, axarr = plt.subplots(len(keys), sharex=True, figsize=figsize) - for idx, key in enumerate(keys): - axarr[idx].plot(episodes, data[key]) - axarr[idx].set_ylabel(key) - plt.xlabel('episodes') - plt.tight_layout() - if output is None: - plt.show() - else: - plt.savefig(output) - - -parser = argparse.ArgumentParser() -parser.add_argument('filename', type=str, - help='The filename of the JSON log generated during training.') -parser.add_argument('--output', type=str, default=None, - help='The output file. If not specified, the log will only be displayed.') -parser.add_argument('--figsize', nargs=2, type=float, default=None, - help='The size of the figure in `width height` format specified in points.') -args = parser.parse_args() - -# You can use visualize_log to easily view the stats that were recorded during training. Simply -# provide the filename of the `FileLogger` that was used in `FileLogger`. -visualize_log(args.filename, output=args.output, figsize=args.figsize) -from __future__ import division -from __future__ import print_function -import warnings -import timeit -import json -from tempfile import mkdtemp - -import numpy as np - -from keras import __version__ as KERAS_VERSION -from keras.callbacks import Callback as KerasCallback, CallbackList as KerasCallbackList -from keras.utils.generic_utils import Progbar - - -class Callback(KerasCallback): - def _set_env(self, env): - self.env = env - - def on_episode_begin(self, episode, logs={}): - """Called at beginning of each episode""" - pass - - def on_episode_end(self, episode, logs={}): - """Called at end of each episode""" - pass - - def on_step_begin(self, step, logs={}): - """Called at beginning of each step""" - pass - - def on_step_end(self, step, logs={}): - """Called at end of each step""" - pass - - def on_action_begin(self, action, logs={}): - """Called at beginning of each action""" - pass - - def on_action_end(self, action, logs={}): - """Called at end of each action""" - pass - - -class CallbackList(KerasCallbackList): - def _set_env(self, env): - """ Set environment for each callback in callbackList """ - for callback in self.callbacks: - if callable(getattr(callback, '_set_env', None)): - callback._set_env(env) - - def on_episode_begin(self, episode, logs={}): - """ Called at beginning of each episode for each callback in callbackList""" - for callback in self.callbacks: - # Check if callback supports the more appropriate `on_episode_begin` callback. - # If not, fall back to `on_epoch_begin` to be compatible with built-in Keras callbacks. - if callable(getattr(callback, 'on_episode_begin', None)): - callback.on_episode_begin(episode, logs=logs) - else: - callback.on_epoch_begin(episode, logs=logs) - - def on_episode_end(self, episode, logs={}): - """ Called at end of each episode for each callback in callbackList""" - for callback in self.callbacks: - # Check if callback supports the more appropriate `on_episode_end` callback. - # If not, fall back to `on_epoch_end` to be compatible with built-in Keras callbacks. - if callable(getattr(callback, 'on_episode_end', None)): - callback.on_episode_end(episode, logs=logs) - else: - callback.on_epoch_end(episode, logs=logs) - - def on_step_begin(self, step, logs={}): - """ Called at beginning of each step for each callback in callbackList""" - for callback in self.callbacks: - # Check if callback supports the more appropriate `on_step_begin` callback. - # If not, fall back to `on_batch_begin` to be compatible with built-in Keras callbacks. - if callable(getattr(callback, 'on_step_begin', None)): - callback.on_step_begin(step, logs=logs) - else: - callback.on_batch_begin(step, logs=logs) - - def on_step_end(self, step, logs={}): - """ Called at end of each step for each callback in callbackList""" - for callback in self.callbacks: - # Check if callback supports the more appropriate `on_step_end` callback. - # If not, fall back to `on_batch_end` to be compatible with built-in Keras callbacks. - if callable(getattr(callback, 'on_step_end', None)): - callback.on_step_end(step, logs=logs) - else: - callback.on_batch_end(step, logs=logs) - - def on_action_begin(self, action, logs={}): - """ Called at beginning of each action for each callback in callbackList""" - for callback in self.callbacks: - if callable(getattr(callback, 'on_action_begin', None)): - callback.on_action_begin(action, logs=logs) - - def on_action_end(self, action, logs={}): - """ Called at end of each action for each callback in callbackList""" - for callback in self.callbacks: - if callable(getattr(callback, 'on_action_end', None)): - callback.on_action_end(action, logs=logs) - - -class TestLogger(Callback): - """ Logger Class for Test """ - - def on_train_begin(self, logs): - """ Print logs at beginning of training""" - print('Testing for {} episodes ...'.format(self.params['nb_episodes'])) - - def on_episode_end(self, episode, logs): - """ Print logs at end of each episode """ - template = 'Episode {0}: reward: {1:.3f}, steps: {2}' - variables = [ - episode + 1, - logs['episode_reward'], - logs['nb_steps'], - ] - print(template.format(*variables)) - - -class TrainEpisodeLogger(Callback): - def __init__(self): - # Some algorithms compute multiple episodes at once since they are multi-threaded. - # We therefore use a dictionary that is indexed by the episode to separate episodes - # from each other. - self.episode_start = {} - self.observations = {} - self.rewards = {} - self.actions = {} - self.metrics = {} - self.step = 0 - - def on_train_begin(self, logs): - """ Print training values at beginning of training """ - self.train_start = timeit.default_timer() - self.metrics_names = self.model.metrics_names - print('Training for {} steps ...'.format(self.params['nb_steps'])) - - def on_train_end(self, logs): - """ Print training time at end of training """ - duration = timeit.default_timer() - self.train_start - print('done, took {:.3f} seconds'.format(duration)) - - def on_episode_begin(self, episode, logs): - """ Reset environment variables at beginning of each episode """ - self.episode_start[episode] = timeit.default_timer() - self.observations[episode] = [] - self.rewards[episode] = [] - self.actions[episode] = [] - self.metrics[episode] = [] - - def on_episode_end(self, episode, logs): - """ Compute and print training statistics of the episode when done """ - duration = timeit.default_timer() - self.episode_start[episode] - episode_steps = len(self.observations[episode]) - - # Format all metrics. - metrics = np.array(self.metrics[episode]) - metrics_template = '' - metrics_variables = [] - with warnings.catch_warnings(): - warnings.filterwarnings('error') - for idx, name in enumerate(self.metrics_names): - if idx > 0: - metrics_template += ', ' - try: - value = np.nanmean(metrics[:, idx]) - metrics_template += '{}: {:f}' - except Warning: - value = '--' - metrics_template += '{}: {}' - metrics_variables += [name, value] - metrics_text = metrics_template.format(*metrics_variables) - - nb_step_digits = str( - int(np.ceil(np.log10(self.params['nb_steps']))) + 1) - template = '{step: ' + nb_step_digits + \ - 'd}/{nb_steps}: episode: {episode}, duration: {duration:.3f}s, episode steps: {episode_steps}, steps per second: {sps:.0f}, episode reward: {episode_reward:.3f}, mean reward: {reward_mean:.3f} [{reward_min:.3f}, {reward_max:.3f}], mean action: {action_mean:.3f} [{action_min:.3f}, {action_max:.3f}], mean observation: {obs_mean:.3f} [{obs_min:.3f}, {obs_max:.3f}], {metrics}' - variables = { - 'step': self.step, - 'nb_steps': self.params['nb_steps'], - 'episode': episode + 1, - 'duration': duration, - 'episode_steps': episode_steps, - 'sps': float(episode_steps) / duration, - 'episode_reward': np.sum(self.rewards[episode]), - 'reward_mean': np.mean(self.rewards[episode]), - 'reward_min': np.min(self.rewards[episode]), - 'reward_max': np.max(self.rewards[episode]), - 'action_mean': np.mean(self.actions[episode]), - 'action_min': np.min(self.actions[episode]), - 'action_max': np.max(self.actions[episode]), - 'obs_mean': np.mean(self.observations[episode]), - 'obs_min': np.min(self.observations[episode]), - 'obs_max': np.max(self.observations[episode]), - 'metrics': metrics_text, - } - print(template.format(**variables)) - - # Free up resources. - del self.episode_start[episode] - del self.observations[episode] - del self.rewards[episode] - del self.actions[episode] - del self.metrics[episode] - - def on_step_end(self, step, logs): - """ Update statistics of episode after each step """ - episode = logs['episode'] - self.observations[episode].append(logs['observation']) - self.rewards[episode].append(logs['reward']) - self.actions[episode].append(logs['action']) - self.metrics[episode].append(logs['metrics']) - self.step += 1 - - -class TrainIntervalLogger(Callback): - def __init__(self, interval=10000): - self.interval = interval - self.step = 0 - self.reset() - - def reset(self): - """ Reset statistics """ - self.interval_start = timeit.default_timer() - self.progbar = Progbar(target=self.interval) - self.metrics = [] - self.infos = [] - self.info_names = None - self.episode_rewards = [] - - def on_train_begin(self, logs): - """ Initialize training statistics at beginning of training """ - self.train_start = timeit.default_timer() - self.metrics_names = self.model.metrics_names - print('Training for {} steps ...'.format(self.params['nb_steps'])) - - def on_train_end(self, logs): - """ Print training duration at end of training """ - duration = timeit.default_timer() - self.train_start - print('done, took {:.3f} seconds'.format(duration)) - - def on_step_begin(self, step, logs): - """ Print metrics if interval is over """ - if self.step % self.interval == 0: - if len(self.episode_rewards) > 0: - metrics = np.array(self.metrics) - assert metrics.shape == ( - self.interval, len(self.metrics_names)) - formatted_metrics = '' - if not np.isnan(metrics).all(): # not all values are means - means = np.nanmean(self.metrics, axis=0) - assert means.shape == (len(self.metrics_names),) - for name, mean in zip(self.metrics_names, means): - formatted_metrics += ' - {}: {:.3f}'.format(name, mean) - - formatted_infos = '' - if len(self.infos) > 0: - infos = np.array(self.infos) - if not np.isnan(infos).all(): # not all values are means - means = np.nanmean(self.infos, axis=0) - assert means.shape == (len(self.info_names),) - for name, mean in zip(self.info_names, means): - formatted_infos += ' - {}: {:.3f}'.format( - name, mean) - print('{} episodes - episode_reward: {:.3f} [{:.3f}, {:.3f}]{}{}'.format(len(self.episode_rewards), np.mean( - self.episode_rewards), np.min(self.episode_rewards), np.max(self.episode_rewards), formatted_metrics, formatted_infos)) - print('') - self.reset() - print('Interval {} ({} steps performed)'.format( - self.step // self.interval + 1, self.step)) - - def on_step_end(self, step, logs): - """ Update progression bar at the end of each step """ - if self.info_names is None: - self.info_names = logs['info'].keys() - values = [('reward', logs['reward'])] - if KERAS_VERSION > '2.1.3': - self.progbar.update((self.step % self.interval) + 1, values=values) - else: - self.progbar.update((self.step % self.interval) + - 1, values=values, force=True) - self.step += 1 - self.metrics.append(logs['metrics']) - if len(self.info_names) > 0: - self.infos.append([logs['info'][k] for k in self.info_names]) - - def on_episode_end(self, episode, logs): - """ Update reward value at the end of each episode """ - self.episode_rewards.append(logs['episode_reward']) - - -class FileLogger(Callback): - def __init__(self, filepath, interval=None): - self.filepath = filepath - self.interval = interval - - # Some algorithms compute multiple episodes at once since they are multi-threaded. - # We therefore use a dict that maps from episode to metrics array. - self.metrics = {} - self.starts = {} - self.data = {} - - def on_train_begin(self, logs): - """ Initialize model metrics before training """ - self.metrics_names = self.model.metrics_names - - def on_train_end(self, logs): - """ Save model at the end of training """ - self.save_data() - - def on_episode_begin(self, episode, logs): - """ Initialize metrics at the beginning of each episode """ - assert episode not in self.metrics - assert episode not in self.starts - self.metrics[episode] = [] - self.starts[episode] = timeit.default_timer() - - def on_episode_end(self, episode, logs): - """ Compute and print metrics at the end of each episode """ - duration = timeit.default_timer() - self.starts[episode] - - metrics = self.metrics[episode] - if np.isnan(metrics).all(): - mean_metrics = np.array([np.nan for _ in self.metrics_names]) - else: - mean_metrics = np.nanmean(metrics, axis=0) - assert len(mean_metrics) == len(self.metrics_names) - - data = list(zip(self.metrics_names, mean_metrics)) - data += list(logs.items()) - data += [('episode', episode), ('duration', duration)] - for key, value in data: - if key not in self.data: - self.data[key] = [] - self.data[key].append(value) - - if self.interval is not None and episode % self.interval == 0: - self.save_data() - - # Clean up. - del self.metrics[episode] - del self.starts[episode] - - def on_step_end(self, step, logs): - """ Append metric at the end of each step """ - self.metrics[logs['episode']].append(logs['metrics']) - - def save_data(self): - """ Save metrics in a json file """ - if len(self.data.keys()) == 0: - return - - # Sort everything by episode. - assert 'episode' in self.data - sorted_indexes = np.argsort(self.data['episode']) - sorted_data = {} - for key, values in self.data.items(): - assert len(self.data[key]) == len(sorted_indexes) - # We convert to np.array() and then to list to convert from np datatypes to native datatypes. - # This is necessary because json.dump cannot handle np.float32, for example. - sorted_data[key] = np.array( - [self.data[key][idx] for idx in sorted_indexes]).tolist() - - # Overwrite already open file. We can simply seek to the beginning since the file will - # grow strictly monotonously. - with open(self.filepath, 'w') as f: - json.dump(sorted_data, f) - - -class Visualizer(Callback): - def on_action_end(self, action, logs): - """ Render environment at the end of each action """ - self.env.render(mode='human') - - -class ModelIntervalCheckpoint(Callback): - def __init__(self, filepath, interval, verbose=0): - super(ModelIntervalCheckpoint, self).__init__() - self.filepath = filepath - self.interval = interval - self.verbose = verbose - self.total_steps = 0 - - def on_step_end(self, step, logs={}): - """ Save weights at interval steps during training """ - self.total_steps += 1 - if self.total_steps % self.interval != 0: - # Nothing to do. - return - - filepath = self.filepath.format(step=self.total_steps, **logs) - if self.verbose > 0: - print('Step {}: saving model to {}'.format( - self.total_steps, filepath)) - self.model.save_weights(filepath, overwrite=True) -# -*- coding: utf-8 -*- -import warnings -from copy import deepcopy - -import numpy as np -from keras.callbacks import History - -from rl.callbacks import ( - CallbackList, - TestLogger, - TrainEpisodeLogger, - TrainIntervalLogger, - Visualizer -) - - -class Agent(object): - """Abstract base class for all implemented agents. - - Each agent interacts with the environment (as defined by the `Env` class) by first observing the - state of the environment. Based on this observation the agent changes the environment by performing - an action. - - Do not use this abstract base class directly but instead use one of the concrete agents implemented. - Each agent realizes a reinforcement learning algorithm. Since all agents conform to the same - interface, you can use them interchangeably. - - To implement your own agent, you have to implement the following methods: - - - `forward` - - `backward` - - `compile` - - `load_weights` - - `save_weights` - - `layers` - - # Arguments - processor (`Processor` instance): See [Processor](#processor) for details. - """ - - def __init__(self, processor=None): - self.processor = processor - self.training = False - self.step = 0 - - def get_config(self): - """Configuration of the agent for serialization. - - # Returns - Dictionnary with agent configuration - """ - return {} - - def fit(self, env, nb_steps, action_repetition=1, callbacks=None, verbose=1, - visualize=False, nb_max_start_steps=0, start_step_policy=None, log_interval=10000, - nb_max_episode_steps=None): - """Trains the agent on the given environment. - - # Arguments - env: (`Env` instance): Environment that the agent interacts with. See [Env](#env) for details. - nb_steps (integer): Number of training steps to be performed. - action_repetition (integer): Number of times the agent repeats the same action without - observing the environment again. Setting this to a value > 1 can be useful - if a single action only has a very small effect on the environment. - callbacks (list of `keras.callbacks.Callback` or `rl.callbacks.Callback` instances): - List of callbacks to apply during training. See [callbacks](/callbacks) for details. - verbose (integer): 0 for no logging, 1 for interval logging (compare `log_interval`), 2 for episode logging - visualize (boolean): If `True`, the environment is visualized during training. However, - this is likely going to slow down training significantly and is thus intended to be - a debugging instrument. - nb_max_start_steps (integer): Number of maximum steps that the agent performs at the beginning - of each episode using `start_step_policy`. Notice that this is an upper limit since - the exact number of steps to be performed is sampled uniformly from [0, max_start_steps] - at the beginning of each episode. - start_step_policy (`lambda observation: action`): The policy - to follow if `nb_max_start_steps` > 0. If set to `None`, a random action is performed. - log_interval (integer): If `verbose` = 1, the number of steps that are considered to be an interval. - nb_max_episode_steps (integer): Number of steps per episode that the agent performs before - automatically resetting the environment. Set to `None` if each episode should run - (potentially indefinitely) until the environment signals a terminal state. - - # Returns - A `keras.callbacks.History` instance that recorded the entire training process. - """ - if not self.compiled: - raise RuntimeError( - 'Your tried to fit your agent but it hasn\'t been compiled yet. Please call `compile()` before `fit()`.') - if action_repetition < 1: - raise ValueError( - 'action_repetition must be >= 1, is {}'.format(action_repetition)) - - self.training = True - - callbacks = [] if not callbacks else callbacks[:] - - if verbose == 1: - callbacks += [TrainIntervalLogger(interval=log_interval)] - elif verbose > 1: - callbacks += [TrainEpisodeLogger()] - if visualize: - callbacks += [Visualizer()] - history = History() - callbacks += [history] - callbacks = CallbackList(callbacks) - if hasattr(callbacks, 'set_model'): - callbacks.set_model(self) - else: - callbacks._set_model(self) - callbacks._set_env(env) - params = { - 'nb_steps': nb_steps, - } - if hasattr(callbacks, 'set_params'): - callbacks.set_params(params) - else: - callbacks._set_params(params) - self._on_train_begin() - callbacks.on_train_begin() - - episode = np.int16(0) - self.step = np.int16(0) - observation = None - episode_reward = None - episode_step = None - did_abort = False - try: - while self.step < nb_steps: - if observation is None: # start of a new episode - callbacks.on_episode_begin(episode) - episode_step = np.int16(0) - episode_reward = np.float32(0) - - # Obtain the initial observation by resetting the environment. - self.reset_states() - observation = deepcopy(env.reset()) - if self.processor is not None: - observation = self.processor.process_observation( - observation) - assert observation is not None - - # Perform random starts at beginning of episode and do not record them into the experience. - # This slightly changes the start position between games. - nb_random_start_steps = 0 if nb_max_start_steps == 0 else np.random.randint( - nb_max_start_steps) - for _ in range(nb_random_start_steps): - if start_step_policy is None: - action = env.action_space.sample() - else: - action = start_step_policy(observation) - if self.processor is not None: - action = self.processor.process_action(action) - callbacks.on_action_begin(action) - observation, reward, done, info = env.step(action) - observation = deepcopy(observation) - if self.processor is not None: - observation, reward, done, info = self.processor.process_step( - observation, reward, done, info) - callbacks.on_action_end(action) - if done: - warnings.warn('Env ended before {} random steps could be performed at the start. You should probably lower the `nb_max_start_steps` parameter.'.format( - nb_random_start_steps)) - observation = deepcopy(env.reset()) - if self.processor is not None: - observation = self.processor.process_observation( - observation) - break - - # At this point, we expect to be fully initialized. - assert episode_reward is not None - assert episode_step is not None - assert observation is not None - - # Run a single step. - callbacks.on_step_begin(episode_step) - # This is were all of the work happens. We first perceive and compute the action - # (forward step) and then use the reward to improve (backward step). - action = self.forward(observation) - if self.processor is not None: - action = self.processor.process_action(action) - reward = np.float32(0) - accumulated_info = {} - done = False - for _ in range(action_repetition): - callbacks.on_action_begin(action) - observation, r, done, info = env.step(action) - observation = deepcopy(observation) - if self.processor is not None: - observation, r, done, info = self.processor.process_step( - observation, r, done, info) - for key, value in info.items(): - if not np.isreal(value): - continue - if key not in accumulated_info: - accumulated_info[key] = np.zeros_like(value) - accumulated_info[key] += value - callbacks.on_action_end(action) - reward += r - if done: - break - if nb_max_episode_steps and episode_step >= nb_max_episode_steps - 1: - # Force a terminal state. - done = True - metrics = self.backward(reward, terminal=done) - episode_reward += reward - - step_logs = { - 'action': action, - 'observation': observation, - 'reward': reward, - 'metrics': metrics, - 'episode': episode, - 'info': accumulated_info, - } - callbacks.on_step_end(episode_step, step_logs) - episode_step += 1 - self.step += 1 - - if done: - # We are in a terminal state but the agent hasn't yet seen it. We therefore - # perform one more forward-backward call and simply ignore the action before - # resetting the environment. We need to pass in `terminal=False` here since - # the *next* state, that is the state of the newly reset environment, is - # always non-terminal by convention. - self.forward(observation) - self.backward(0., terminal=False) - - # This episode is finished, report and reset. - episode_logs = { - 'episode_reward': episode_reward, - 'nb_episode_steps': episode_step, - 'nb_steps': self.step, - } - callbacks.on_episode_end(episode, episode_logs) - - episode += 1 - observation = None - episode_step = None - episode_reward = None - except KeyboardInterrupt: - # We catch keyboard interrupts here so that training can be be safely aborted. - # This is so common that we've built this right into this function, which ensures that - # the `on_train_end` method is properly called. - did_abort = True - callbacks.on_train_end(logs={'did_abort': did_abort}) - self._on_train_end() - - return history - - def test(self, env, nb_episodes=1, action_repetition=1, callbacks=None, visualize=True, - nb_max_episode_steps=None, nb_max_start_steps=0, start_step_policy=None, verbose=1): - """Callback that is called before training begins. - - # Arguments - env: (`Env` instance): Environment that the agent interacts with. See [Env](#env) for details. - nb_episodes (integer): Number of episodes to perform. - action_repetition (integer): Number of times the agent repeats the same action without - observing the environment again. Setting this to a value > 1 can be useful - if a single action only has a very small effect on the environment. - callbacks (list of `keras.callbacks.Callback` or `rl.callbacks.Callback` instances): - List of callbacks to apply during training. See [callbacks](/callbacks) for details. - verbose (integer): 0 for no logging, 1 for interval logging (compare `log_interval`), 2 for episode logging - visualize (boolean): If `True`, the environment is visualized during training. However, - this is likely going to slow down training significantly and is thus intended to be - a debugging instrument. - nb_max_start_steps (integer): Number of maximum steps that the agent performs at the beginning - of each episode using `start_step_policy`. Notice that this is an upper limit since - the exact number of steps to be performed is sampled uniformly from [0, max_start_steps] - at the beginning of each episode. - start_step_policy (`lambda observation: action`): The policy - to follow if `nb_max_start_steps` > 0. If set to `None`, a random action is performed. - log_interval (integer): If `verbose` = 1, the number of steps that are considered to be an interval. - nb_max_episode_steps (integer): Number of steps per episode that the agent performs before - automatically resetting the environment. Set to `None` if each episode should run - (potentially indefinitely) until the environment signals a terminal state. - - # Returns - A `keras.callbacks.History` instance that recorded the entire training process. - """ - if not self.compiled: - raise RuntimeError( - 'Your tried to test your agent but it hasn\'t been compiled yet. Please call `compile()` before `test()`.') - if action_repetition < 1: - raise ValueError( - 'action_repetition must be >= 1, is {}'.format(action_repetition)) - - self.training = False - self.step = 0 - - callbacks = [] if not callbacks else callbacks[:] - - if verbose >= 1: - callbacks += [TestLogger()] - if visualize: - callbacks += [Visualizer()] - history = History() - callbacks += [history] - callbacks = CallbackList(callbacks) - if hasattr(callbacks, 'set_model'): - callbacks.set_model(self) - else: - callbacks._set_model(self) - callbacks._set_env(env) - params = { - 'nb_episodes': nb_episodes, - } - if hasattr(callbacks, 'set_params'): - callbacks.set_params(params) - else: - callbacks._set_params(params) - - self._on_test_begin() - callbacks.on_train_begin() - for episode in range(nb_episodes): - callbacks.on_episode_begin(episode) - episode_reward = 0. - episode_step = 0 - - # Obtain the initial observation by resetting the environment. - self.reset_states() - observation = deepcopy(env.reset()) - if self.processor is not None: - observation = self.processor.process_observation(observation) - assert observation is not None - - # Perform random starts at beginning of episode and do not record them into the experience. - # This slightly changes the start position between games. - nb_random_start_steps = 0 if nb_max_start_steps == 0 else np.random.randint( - nb_max_start_steps) - for _ in range(nb_random_start_steps): - if start_step_policy is None: - action = env.action_space.sample() - else: - action = start_step_policy(observation) - if self.processor is not None: - action = self.processor.process_action(action) - callbacks.on_action_begin(action) - observation, r, done, info = env.step(action) - observation = deepcopy(observation) - if self.processor is not None: - observation, r, done, info = self.processor.process_step( - observation, r, done, info) - callbacks.on_action_end(action) - if done: - warnings.warn('Env ended before {} random steps could be performed at the start. You should probably lower the `nb_max_start_steps` parameter.'.format( - nb_random_start_steps)) - observation = deepcopy(env.reset()) - if self.processor is not None: - observation = self.processor.process_observation( - observation) - break - - # Run the episode until we're done. - done = False - while not done: - callbacks.on_step_begin(episode_step) - - action = self.forward(observation) - if self.processor is not None: - action = self.processor.process_action(action) - reward = 0. - accumulated_info = {} - for _ in range(action_repetition): - callbacks.on_action_begin(action) - observation, r, d, info = env.step(action) - observation = deepcopy(observation) - if self.processor is not None: - observation, r, d, info = self.processor.process_step( - observation, r, d, info) - callbacks.on_action_end(action) - reward += r - for key, value in info.items(): - if not np.isreal(value): - continue - if key not in accumulated_info: - accumulated_info[key] = np.zeros_like(value) - accumulated_info[key] += value - if d: - done = True - break - if nb_max_episode_steps and episode_step >= nb_max_episode_steps - 1: - done = True - self.backward(reward, terminal=done) - episode_reward += reward - - step_logs = { - 'action': action, - 'observation': observation, - 'reward': reward, - 'episode': episode, - 'info': accumulated_info, - } - callbacks.on_step_end(episode_step, step_logs) - episode_step += 1 - self.step += 1 - - # We are in a terminal state but the agent hasn't yet seen it. We therefore - # perform one more forward-backward call and simply ignore the action before - # resetting the environment. We need to pass in `terminal=False` here since - # the *next* state, that is the state of the newly reset environment, is - # always non-terminal by convention. - self.forward(observation) - self.backward(0., terminal=False) - - # Report end of episode. - episode_logs = { - 'episode_reward': episode_reward, - 'nb_steps': episode_step, - } - callbacks.on_episode_end(episode, episode_logs) - callbacks.on_train_end() - self._on_test_end() - - return history - - def reset_states(self): - """Resets all internally kept states after an episode is completed. - """ - pass - - def forward(self, observation): - """Takes the an observation from the environment and returns the action to be taken next. - If the policy is implemented by a neural network, this corresponds to a forward (inference) pass. - - # Argument - observation (object): The current observation from the environment. - - # Returns - The next action to be executed in the environment. - """ - raise NotImplementedError() - - def backward(self, reward, terminal): - """Updates the agent after having executed the action returned by `forward`. - If the policy is implemented by a neural network, this corresponds to a weight update using back-prop. - - # Argument - reward (float): The observed reward after executing the action returned by `forward`. - terminal (boolean): `True` if the new state of the environment is terminal. - - # Returns - List of metrics values - """ - raise NotImplementedError() - - def compile(self, optimizer, metrics=[]): - """Compiles an agent and the underlaying models to be used for training and testing. - - # Arguments - optimizer (`keras.optimizers.Optimizer` instance): The optimizer to be used during training. - metrics (list of functions `lambda y_true, y_pred: metric`): The metrics to run during training. - """ - raise NotImplementedError() - - def load_weights(self, filepath): - """Loads the weights of an agent from an HDF5 file. - - # Arguments - filepath (str): The path to the HDF5 file. - """ - raise NotImplementedError() - - def save_weights(self, filepath, overwrite=False): - """Saves the weights of an agent as an HDF5 file. - - # Arguments - filepath (str): The path to where the weights should be saved. - overwrite (boolean): If `False` and `filepath` already exists, raises an error. - """ - raise NotImplementedError() - - @property - def layers(self): - """Returns all layers of the underlying model(s). - - If the concrete implementation uses multiple internal models, - this method returns them in a concatenated list. - - # Returns - A list of the model's layers - """ - raise NotImplementedError() - - @property - def metrics_names(self): - """The human-readable names of the agent's metrics. Must return as many names as there - are metrics (see also `compile`). - - # Returns - A list of metric's names (string) - """ - return [] - - def _on_train_begin(self): - """Callback that is called before training begins." - """ - pass - - def _on_train_end(self): - """Callback that is called after training ends." - """ - pass - - def _on_test_begin(self): - """Callback that is called before testing begins." - """ - pass - - def _on_test_end(self): - """Callback that is called after testing ends." - """ - pass - - -class Processor(object): - """Abstract base class for implementing processors. - - A processor acts as a coupling mechanism between an `Agent` and its `Env`. This can - be necessary if your agent has different requirements with respect to the form of the - observations, actions, and rewards of the environment. By implementing a custom processor, - you can effectively translate between the two without having to change the underlaying - implementation of the agent or environment. - - Do not use this abstract base class directly but instead use one of the concrete implementations - or write your own. - """ - - def process_step(self, observation, reward, done, info): - """Processes an entire step by applying the processor to the observation, reward, and info arguments. - - # Arguments - observation (object): An observation as obtained by the environment. - reward (float): A reward as obtained by the environment. - done (boolean): `True` if the environment is in a terminal state, `False` otherwise. - info (dict): The debug info dictionary as obtained by the environment. - - # Returns - The tupel (observation, reward, done, reward) with with all elements after being processed. - """ - observation = self.process_observation(observation) - reward = self.process_reward(reward) - info = self.process_info(info) - return observation, reward, done, info - - def process_observation(self, observation): - """Processes the observation as obtained from the environment for use in an agent and - returns it. - - # Arguments - observation (object): An observation as obtained by the environment - - # Returns - Observation obtained by the environment processed - """ - return observation - - def process_reward(self, reward): - """Processes the reward as obtained from the environment for use in an agent and - returns it. - - # Arguments - reward (float): A reward as obtained by the environment - - # Returns - Reward obtained by the environment processed - """ - return reward - - def process_info(self, info): - """Processes the info as obtained from the environment for use in an agent and - returns it. - - # Arguments - info (dict): An info as obtained by the environment - - # Returns - Info obtained by the environment processed - """ - return info - - def process_action(self, action): - """Processes an action predicted by an agent but before execution in an environment. - - # Arguments - action (int): Action given to the environment - - # Returns - Processed action given to the environment - """ - return action - - def process_state_batch(self, batch): - """Processes an entire batch of states and returns it. - - # Arguments - batch (list): List of states - - # Returns - Processed list of states - """ - return batch - - @property - def metrics(self): - """The metrics of the processor, which will be reported during training. - - # Returns - List of `lambda y_true, y_pred: metric` functions. - """ - return [] - - @property - def metrics_names(self): - """The human-readable names of the agent's metrics. Must return as many names as there - are metrics (see also `compile`). - """ - return [] - - -# Note: the API of the `Env` and `Space` classes are taken from the OpenAI Gym implementation. -# https://github.com/openai/gym/blob/master/gym/core.py - - -class Env(object): - """The abstract environment class that is used by all agents. This class has the exact - same API that OpenAI Gym uses so that integrating with it is trivial. In contrast to the - OpenAI Gym implementation, this class only defines the abstract methods without any actual - implementation. - - To implement your own environment, you need to define the following methods: - - - `step` - - `reset` - - `render` - - `close` - - Refer to the [Gym documentation](https://gym.openai.com/docs/#environments). - """ - reward_range = (-np.inf, np.inf) - action_space = None - observation_space = None - - def step(self, action): - """Run one timestep of the environment's dynamics. - Accepts an action and returns a tuple (observation, reward, done, info). - - # Arguments - action (object): An action provided by the environment. - - # Returns - observation (object): Agent's observation of the current environment. - reward (float) : Amount of reward returned after previous action. - done (boolean): Whether the episode has ended, in which case further step() calls will return undefined results. - info (dict): Contains auxiliary diagnostic information (helpful for debugging, and sometimes learning). - """ - raise NotImplementedError() - - def reset(self): - """ - Resets the state of the environment and returns an initial observation. - - # Returns - observation (object): The initial observation of the space. Initial reward is assumed to be 0. - """ - raise NotImplementedError() - - def render(self, mode='human', close=False): - """Renders the environment. - The set of supported modes varies per environment. (And some - environments do not support rendering at all.) - - # Arguments - mode (str): The mode to render with. - close (bool): Close all open renderings. - """ - raise NotImplementedError() - - def close(self): - """Override in your subclass to perform any necessary cleanup. - Environments will automatically close() themselves when - garbage collected or when the program exits. - """ - raise NotImplementedError() - - def seed(self, seed=None): - """Sets the seed for this env's random number generator(s). - - # Returns - Returns the list of seeds used in this env's random number generators - """ - raise NotImplementedError() - - def configure(self, *args, **kwargs): - """Provides runtime configuration to the environment. - This configuration should consist of data that tells your - environment how to run (such as an address of a remote server, - or path to your ImageNet data). It should not affect the - semantics of the environment. - """ - raise NotImplementedError() - - def __del__(self): - self.close() - - def __str__(self): - return '<{} instance>'.format(type(self).__name__) - - -class Space(object): - """Abstract model for a space that is used for the state and action spaces. This class has the - exact same API that OpenAI Gym uses so that integrating with it is trivial. - - Please refer to [Gym Documentation](https://gym.openai.com/docs/#spaces) - """ - - def sample(self, seed=None): - """Uniformly randomly sample a random element of this space. - """ - raise NotImplementedError() - - def contains(self, x): - """Return boolean specifying if x is a valid member of this space - """ - raise NotImplementedError() -from __future__ import absolute_import -from collections import deque, namedtuple -import warnings -import random - -import numpy as np - - -# This is to be understood as a transition: Given `state0`, performing `action` -# yields `reward` and results in `state1`, which might be `terminal`. -Experience = namedtuple( - 'Experience', 'state0, action, reward, state1, terminal1') - - -def sample_batch_indexes(low, high, size): - """Return a sample of (size) unique elements between low and high - - # Argument - low (int): The minimum value for our samples - high (int): The maximum value for our samples - size (int): The number of samples to pick - - # Returns - A list of samples of length size, with values between low and high - """ - if high - low >= size: - # We have enough data. Draw without replacement, that is each index is unique in the - # batch. We cannot use `np.random.choice` here because it is horribly inefficient as - # the memory grows. See https://github.com/numpy/numpy/issues/2764 for a discussion. - # `random.sample` does the same thing (drawing without replacement) and is way faster. - try: - r = xrange(low, high) - except NameError: - r = range(low, high) - batch_idxs = random.sample(r, size) - else: - # Not enough data. Help ourselves with sampling from the range, but the same index - # can occur multiple times. This is not good and should be avoided by picking a - # large enough warm-up phase. - warnings.warn( - 'Not enough entries to sample without replacement. Consider increasing your warm-up phase to avoid oversampling!') - batch_idxs = np.random.random_integers(low, high - 1, size=size) - assert len(batch_idxs) == size - return batch_idxs - - -class RingBuffer(object): - def __init__(self, maxlen): - self.maxlen = maxlen - self.data = deque(maxlen=maxlen) - - def __len__(self): - return self.length() - - def __getitem__(self, idx): - """Return element of buffer at specific index - - # Argument - idx (int): Index wanted - - # Returns - The element of buffer at given index - """ - if idx < 0 or idx >= self.length(): - raise KeyError() - return self.data[idx] - - def append(self, v): - """Append an element to the buffer - - # Argument - v (object): Element to append - """ - self.data.append(v) - - def length(self): - """Return the length of Deque - - # Argument - None - - # Returns - The lenght of deque element - """ - return len(self.data) - - -def zeroed_observation(observation): - """Return an array of zeros with same shape as given observation - - # Argument - observation (list): List of observation - - # Return - A np.ndarray of zeros with observation.shape - """ - if hasattr(observation, 'shape'): - return np.zeros(observation.shape) - elif hasattr(observation, '__iter__'): - out = [] - for x in observation: - out.append(zeroed_observation(x)) - return out - else: - return 0. - - -class Memory(object): - def __init__(self, window_length, ignore_episode_boundaries=False): - self.window_length = window_length - self.ignore_episode_boundaries = ignore_episode_boundaries - - self.recent_observations = deque(maxlen=window_length) - self.recent_terminals = deque(maxlen=window_length) - - def sample(self, batch_size, batch_idxs=None): - raise NotImplementedError() - - def append(self, observation, action, reward, terminal, training=True): - self.recent_observations.append(observation) - self.recent_terminals.append(terminal) - - def get_recent_state(self, current_observation): - """Return list of last observations - - # Argument - current_observation (object): Last observation - - # Returns - A list of the last observations - """ - # This code is slightly complicated by the fact that subsequent observations might be - # from different episodes. We ensure that an experience never spans multiple episodes. - # This is probably not that important in practice but it seems cleaner. - state = [current_observation] - idx = len(self.recent_observations) - 1 - for offset in range(0, self.window_length - 1): - current_idx = idx - offset - current_terminal = self.recent_terminals[current_idx - - 1] if current_idx - 1 >= 0 else False - if current_idx < 0 or (not self.ignore_episode_boundaries and current_terminal): - # The previously handled observation was terminal, don't add the current one. - # Otherwise we would leak into a different episode. - break - state.insert(0, self.recent_observations[current_idx]) - while len(state) < self.window_length: - state.insert(0, zeroed_observation(state[0])) - return state - - def get_config(self): - """Return configuration (window_length, ignore_episode_boundaries) for Memory - - # Return - A dict with keys window_length and ignore_episode_boundaries - """ - config = { - 'window_length': self.window_length, - 'ignore_episode_boundaries': self.ignore_episode_boundaries, - } - return config - - -class SequentialMemory(Memory): - def __init__(self, limit, **kwargs): - super(SequentialMemory, self).__init__(**kwargs) - - self.limit = limit - - # Do not use deque to implement the memory. This data structure may seem convenient but - # it is way too slow on random access. Instead, we use our own ring buffer implementation. - self.actions = RingBuffer(limit) - self.rewards = RingBuffer(limit) - self.terminals = RingBuffer(limit) - self.observations = RingBuffer(limit) - - def sample(self, batch_size, batch_idxs=None): - """Return a randomized batch of experiences - - # Argument - batch_size (int): Size of the all batch - batch_idxs (int): Indexes to extract - # Returns - A list of experiences randomly selected - """ - # It is not possible to tell whether the first state in the memory is terminal, because it - # would require access to the "terminal" flag associated to the previous state. As a result - # we will never return this first state (only using `self.terminals[0]` to know whether the - # second state is terminal). - # In addition we need enough entries to fill the desired window length. - assert self.nb_entries >= self.window_length + \ - 2, 'not enough entries in the memory' - - if batch_idxs is None: - # Draw random indexes such that we have enough entries before each index to fill the - # desired window length. - batch_idxs = sample_batch_indexes( - self.window_length, self.nb_entries - 1, size=batch_size) - batch_idxs = np.array(batch_idxs) + 1 - assert np.min(batch_idxs) >= self.window_length + 1 - assert np.max(batch_idxs) < self.nb_entries - assert len(batch_idxs) == batch_size - - # Create experiences - experiences = [] - for idx in batch_idxs: - terminal0 = self.terminals[idx - 2] - while terminal0: - # Skip this transition because the environment was reset here. Select a new, random - # transition and use this instead. This may cause the batch to contain the same - # transition twice. - idx = sample_batch_indexes( - self.window_length + 1, self.nb_entries, size=1)[0] - terminal0 = self.terminals[idx - 2] - assert self.window_length + 1 <= idx < self.nb_entries - - # This code is slightly complicated by the fact that subsequent observations might be - # from different episodes. We ensure that an experience never spans multiple episodes. - # This is probably not that important in practice but it seems cleaner. - state0 = [self.observations[idx - 1]] - for offset in range(0, self.window_length - 1): - current_idx = idx - 2 - offset - assert current_idx >= 1 - current_terminal = self.terminals[current_idx - 1] - if current_terminal and not self.ignore_episode_boundaries: - # The previously handled observation was terminal, don't add the current one. - # Otherwise we would leak into a different episode. - break - state0.insert(0, self.observations[current_idx]) - while len(state0) < self.window_length: - state0.insert(0, zeroed_observation(state0[0])) - action = self.actions[idx - 1] - reward = self.rewards[idx - 1] - terminal1 = self.terminals[idx - 1] - - # Okay, now we need to create the follow-up state. This is state0 shifted on timestep - # to the right. Again, we need to be careful to not include an observation from the next - # episode if the last state is terminal. - state1 = [np.copy(x) for x in state0[1:]] - state1.append(self.observations[idx]) - - assert len(state0) == self.window_length - assert len(state1) == len(state0) - experiences.append(Experience(state0=state0, action=action, reward=reward, - state1=state1, terminal1=terminal1)) - assert len(experiences) == batch_size - return experiences - - def append(self, observation, action, reward, terminal, training=True): - """Append an observation to the memory - - # Argument - observation (dict): Observation returned by environment - action (int): Action taken to obtain this observation - reward (float): Reward obtained by taking this action - terminal (boolean): Is the state terminal - """ - super(SequentialMemory, self).append(observation, - action, reward, terminal, training=training) - - # This needs to be understood as follows: in `observation`, take `action`, obtain `reward` - # and weather the next state is `terminal` or not. - if training: - self.observations.append(observation) - self.actions.append(action) - self.rewards.append(reward) - self.terminals.append(terminal) - - @property - def nb_entries(self): - """Return number of observations - - # Returns - Number of observations - """ - return len(self.observations) - - def get_config(self): - """Return configurations of SequentialMemory - - # Returns - Dict of config - """ - config = super(SequentialMemory, self).get_config() - config['limit'] = self.limit - return config - - -class EpisodeParameterMemory(Memory): - def __init__(self, limit, **kwargs): - super(EpisodeParameterMemory, self).__init__(**kwargs) - self.limit = limit - - self.params = RingBuffer(limit) - self.intermediate_rewards = [] - self.total_rewards = RingBuffer(limit) - - def sample(self, batch_size, batch_idxs=None): - """Return a randomized batch of params and rewards - - # Argument - batch_size (int): Size of the all batch - batch_idxs (int): Indexes to extract - # Returns - A list of params randomly selected and a list of associated rewards - """ - if batch_idxs is None: - batch_idxs = sample_batch_indexes( - 0, self.nb_entries, size=batch_size) - assert len(batch_idxs) == batch_size - - batch_params = [] - batch_total_rewards = [] - for idx in batch_idxs: - batch_params.append(self.params[idx]) - batch_total_rewards.append(self.total_rewards[idx]) - return batch_params, batch_total_rewards - - def append(self, observation, action, reward, terminal, training=True): - """Append a reward to the memory - - # Argument - observation (dict): Observation returned by environment - action (int): Action taken to obtain this observation - reward (float): Reward obtained by taking this action - terminal (boolean): Is the state terminal - """ - super(EpisodeParameterMemory, self).append( - observation, action, reward, terminal, training=training) - if training: - self.intermediate_rewards.append(reward) - - def finalize_episode(self, params): - """Closes the current episode, sums up rewards and stores the parameters - - # Argument - params (object): Parameters associated with the episode to be stored and then retrieved back in sample() - """ - total_reward = sum(self.intermediate_rewards) - self.total_rewards.append(total_reward) - self.params.append(params) - self.intermediate_rewards = [] - - @property - def nb_entries(self): - """Return number of episode rewards - - # Returns - Number of episode rewards - """ - return len(self.total_rewards) - - def get_config(self): - """Return configurations of SequentialMemory - - # Returns - Dict of config - """ - config = super(SequentialMemory, self).get_config() - config['limit'] = self.limit - return config -from __future__ import division -import numpy as np - -from rl.util import * - - -class Policy(object): - """Abstract base class for all implemented policies. - - Each policy helps with selection of action to take on an environment. - - Do not use this abstract base class directly but instead use one of the concrete policies implemented. - To implement your own policy, you have to implement the following methods: - - - `select_action` - - # Arguments - agent (rl.core.Agent): Agent used - """ - - def _set_agent(self, agent): - self.agent = agent - - @property - def metrics_names(self): - return [] - - @property - def metrics(self): - return [] - - def select_action(self, **kwargs): - raise NotImplementedError() - - def get_config(self): - """Return configuration of the policy - - # Returns - Configuration as dict - """ - return {} - - -class LinearAnnealedPolicy(Policy): - """Implement the linear annealing policy - - Linear Annealing Policy computes a current threshold value and - transfers it to an inner policy which chooses the action. The threshold - value is following a linear function decreasing over time.""" - - def __init__(self, inner_policy, attr, value_max, value_min, value_test, nb_steps): - if not hasattr(inner_policy, attr): - raise ValueError( - 'Policy does not have attribute "{}".'.format(attr)) - - super(LinearAnnealedPolicy, self).__init__() - - self.inner_policy = inner_policy - self.attr = attr - self.value_max = value_max - self.value_min = value_min - self.value_test = value_test - self.nb_steps = nb_steps - - def get_current_value(self): - """Return current annealing value - - # Returns - Value to use in annealing - """ - if self.agent.training: - # Linear annealed: f(x) = ax + b. - a = -float(self.value_max - self.value_min) / float(self.nb_steps) - b = float(self.value_max) - value = max(self.value_min, a * float(self.agent.step) + b) - else: - value = self.value_test - return value - - def select_action(self, **kwargs): - """Choose an action to perform - - # Returns - Action to take (int) - """ - setattr(self.inner_policy, self.attr, self.get_current_value()) - return self.inner_policy.select_action(**kwargs) - - @property - def metrics_names(self): - """Return names of metrics - - # Returns - List of metric names - """ - return ['mean_{}'.format(self.attr)] - - @property - def metrics(self): - """Return metrics values - - # Returns - List of metric values - """ - - return [getattr(self.inner_policy, self.attr)] - - def get_config(self): - """Return configurations of LinearAnnealedPolicy - - # Returns - Dict of config - """ - config = super(LinearAnnealedPolicy, self).get_config() - config['attr'] = self.attr - config['value_max'] = self.value_max - config['value_min'] = self.value_min - config['value_test'] = self.value_test - config['nb_steps'] = self.nb_steps - config['inner_policy'] = get_object_config(self.inner_policy) - return config - - -class SoftmaxPolicy(Policy): - """ Implement softmax policy for multinimial distribution - - Simple Policy - - - takes action according to the pobability distribution - - """ - - def select_action(self, nb_actions, probs): - """Return the selected action - - # Arguments - probs (np.ndarray) : Probabilty for each action - - # Returns - action - - """ - action = np.random.choice(range(nb_actions), p=probs) - return action - - -class EpsGreedyQPolicy(Policy): - """Implement the epsilon greedy policy - - Eps Greedy policy either: - - - takes a random action with probability epsilon - - takes current best action with prob (1 - epsilon) - """ - - def __init__(self, eps=.1): - super(EpsGreedyQPolicy, self).__init__() - self.eps = eps - - def select_action(self, q_values): - """Return the selected action - - # Arguments - q_values (np.ndarray): List of the estimations of Q for each action - - # Returns - Selection action - """ - assert q_values.ndim == 1 - nb_actions = q_values.shape[0] - - if np.random.uniform() < self.eps: - action = np.random.randint(0, nb_actions) - else: - action = np.argmax(q_values) - return action - - def get_config(self): - """Return configurations of EpsGreedyQPolicy - - # Returns - Dict of config - """ - config = super(EpsGreedyQPolicy, self).get_config() - config['eps'] = self.eps - return config - - -class GreedyQPolicy(Policy): - """Implement the greedy policy - - Greedy policy returns the current best action according to q_values - """ - - def select_action(self, q_values): - """Return the selected action - - # Arguments - q_values (np.ndarray): List of the estimations of Q for each action - - # Returns - Selection action - """ - assert q_values.ndim == 1 - action = np.argmax(q_values) - return action - - -class BoltzmannQPolicy(Policy): - """Implement the Boltzmann Q Policy - - Boltzmann Q Policy builds a probability law on q values and returns - an action selected randomly according to this law. - """ - - def __init__(self, tau=1., clip=(-500., 500.)): - super(BoltzmannQPolicy, self).__init__() - self.tau = tau - self.clip = clip - - def select_action(self, q_values): - """Return the selected action - - # Arguments - q_values (np.ndarray): List of the estimations of Q for each action - - # Returns - Selection action - """ - assert q_values.ndim == 1 - q_values = q_values.astype('float64') - nb_actions = q_values.shape[0] - - exp_values = np.exp(np.clip(q_values / self.tau, - self.clip[0], self.clip[1])) - probs = exp_values / np.sum(exp_values) - action = np.random.choice(range(nb_actions), p=probs) - return action - - def get_config(self): - """Return configurations of BoltzmannQPolicy - - # Returns - Dict of config - """ - config = super(BoltzmannQPolicy, self).get_config() - config['tau'] = self.tau - config['clip'] = self.clip - return config - - -class MaxBoltzmannQPolicy(Policy): - """ - A combination of the eps-greedy and Boltzman q-policy. - - Wiering, M.: Explorations in Efficient Reinforcement Learning. - PhD thesis, University of Amsterdam, Amsterdam (1999) - - https://pure.uva.nl/ws/files/3153478/8461_UBA003000033.pdf - """ - - def __init__(self, eps=.1, tau=1., clip=(-500., 500.)): - super(MaxBoltzmannQPolicy, self).__init__() - self.eps = eps - self.tau = tau - self.clip = clip - - def select_action(self, q_values): - """Return the selected action - The selected action follows the BoltzmannQPolicy with probability epsilon - or return the Greedy Policy with probability (1 - epsilon) - - # Arguments - q_values (np.ndarray): List of the estimations of Q for each action - - # Returns - Selection action - """ - assert q_values.ndim == 1 - q_values = q_values.astype('float64') - nb_actions = q_values.shape[0] - - if np.random.uniform() < self.eps: - exp_values = np.exp( - np.clip(q_values / self.tau, self.clip[0], self.clip[1])) - probs = exp_values / np.sum(exp_values) - action = np.random.choice(range(nb_actions), p=probs) - else: - action = np.argmax(q_values) - return action - - def get_config(self): - """Return configurations of MaxBoltzmannQPolicy - - # Returns - Dict of config - """ - config = super(MaxBoltzmannQPolicy, self).get_config() - config['eps'] = self.eps - config['tau'] = self.tau - config['clip'] = self.clip - return config - - -class BoltzmannGumbelQPolicy(Policy): - """Implements Boltzmann-Gumbel exploration (BGE) adapted for Q learning - based on the paper Boltzmann Exploration Done Right - (https://arxiv.org/pdf/1705.10257.pdf). - - BGE is invariant with respect to the mean of the rewards but not their - variance. The parameter C, which defaults to 1, can be used to correct for - this, and should be set to the least upper bound on the standard deviation - of the rewards. - - BGE is only available for training, not testing. For testing purposes, you - can achieve approximately the same result as BGE after training for N steps - on K actions with parameter C by using the BoltzmannQPolicy and setting - tau = C/sqrt(N/K).""" - - def __init__(self, C=1.0): - assert C > 0, "BoltzmannGumbelQPolicy C parameter must be > 0, not " + \ - repr(C) - super(BoltzmannGumbelQPolicy, self).__init__() - self.C = C - self.action_counts = None - - def select_action(self, q_values): - """Return the selected action - - # Arguments - q_values (np.ndarray): List of the estimations of Q for each action - - # Returns - Selection action - """ - # We can't use BGE during testing, since we don't have access to the - # action_counts at the end of training. - assert self.agent.training, "BoltzmannGumbelQPolicy should only be used for training, not testing" - - assert q_values.ndim == 1, q_values.ndim - q_values = q_values.astype('float64') - - # If we are starting training, we should reset the action_counts. - # Otherwise, action_counts should already be initialized, since we - # always do so when we begin training. - if self.agent.step == 0: - self.action_counts = np.ones(q_values.shape) - assert self.action_counts is not None, self.agent.step - assert self.action_counts.shape == q_values.shape, ( - self.action_counts.shape, q_values.shape) - - beta = self.C/np.sqrt(self.action_counts) - Z = np.random.gumbel(size=q_values.shape) - - perturbation = beta * Z - perturbed_q_values = q_values + perturbation - action = np.argmax(perturbed_q_values) - - self.action_counts[action] += 1 - return action - - def get_config(self): - """Return configurations of BoltzmannGumbelQPolicy - - # Returns - Dict of config - """ - config = super(BoltzmannGumbelQPolicy, self).get_config() - config['C'] = self.C - return config -import numpy as np - -from rl.core import Processor -from rl.util import WhiteningNormalizer - - -class MultiInputProcessor(Processor): - """Converts observations from an environment with multiple observations for use in a neural network - policy. - - In some cases, you have environments that return multiple different observations per timestep - (in a robotics context, for example, a camera may be used to view the scene and a joint encoder may - be used to report the angles for each joint). Usually, this can be handled by a policy that has - multiple inputs, one for each modality. However, observations are returned by the environment - in the form of a tuple `[(modality1_t, modality2_t, ..., modalityn_t) for t in T]` but the neural network - expects them in per-modality batches like so: `[[modality1_1, ..., modality1_T], ..., [[modalityn_1, ..., modalityn_T]]`. - This processor converts observations appropriate for this use case. - - # Arguments - nb_inputs (integer): The number of inputs, that is different modalities, to be used. - Your neural network that you use for the policy must have a corresponding number of - inputs. - """ - - def __init__(self, nb_inputs): - self.nb_inputs = nb_inputs - - def process_state_batch(self, state_batch): - input_batches = [[] for x in range(self.nb_inputs)] - for state in state_batch: - processed_state = [[] for x in range(self.nb_inputs)] - for observation in state: - assert len(observation) == self.nb_inputs - for o, s in zip(observation, processed_state): - s.append(o) - for idx, s in enumerate(processed_state): - input_batches[idx].append(s) - return [np.array(x) for x in input_batches] - - -class WhiteningNormalizerProcessor(Processor): - """Normalizes the observations to have zero mean and standard deviation of one, - i.e. it applies whitening to the inputs. - - This typically helps significantly with learning, especially if different dimensions are - on different scales. However, it complicates training in the sense that you will have to store - these weights alongside the policy if you intend to load it later. It is the responsibility of - the user to do so. - """ - - def __init__(self): - self.normalizer = None - - def process_state_batch(self, batch): - if self.normalizer is None: - self.normalizer = WhiteningNormalizer( - shape=batch.shape[1:], dtype=batch.dtype) - self.normalizer.update(batch) - return self.normalizer.normalize(batch) -from __future__ import division -import numpy as np - - -class RandomProcess(object): - def reset_states(self): - pass - - -class AnnealedGaussianProcess(RandomProcess): - def __init__(self, mu, sigma, sigma_min, n_steps_annealing): - self.mu = mu - self.sigma = sigma - self.n_steps = 0 - - if sigma_min is not None: - self.m = -float(sigma - sigma_min) / float(n_steps_annealing) - self.c = sigma - self.sigma_min = sigma_min - else: - self.m = 0. - self.c = sigma - self.sigma_min = sigma - - @property - def current_sigma(self): - sigma = max(self.sigma_min, self.m * float(self.n_steps) + self.c) - return sigma - - -class GaussianWhiteNoiseProcess(AnnealedGaussianProcess): - def __init__(self, mu=0., sigma=1., sigma_min=None, n_steps_annealing=1000, size=1): - super(GaussianWhiteNoiseProcess, self).__init__(mu=mu, sigma=sigma, - sigma_min=sigma_min, n_steps_annealing=n_steps_annealing) - self.size = size - - def sample(self): - sample = np.random.normal(self.mu, self.current_sigma, self.size) - self.n_steps += 1 - return sample - -# Based on http://math.stackexchange.com/questions/1287634/implementing-ornstein-uhlenbeck-in-matlab - - -class OrnsteinUhlenbeckProcess(AnnealedGaussianProcess): - def __init__(self, theta, mu=0., sigma=1., dt=1e-2, size=1, sigma_min=None, n_steps_annealing=1000): - super(OrnsteinUhlenbeckProcess, self).__init__(mu=mu, sigma=sigma, - sigma_min=sigma_min, n_steps_annealing=n_steps_annealing) - self.theta = theta - self.mu = mu - self.dt = dt - self.size = size - self.reset_states() - - def sample(self): - x = self.x_prev + self.theta * (self.mu - self.x_prev) * self.dt + \ - self.current_sigma * np.sqrt(self.dt) * \ - np.random.normal(size=self.size) - self.x_prev = x - self.n_steps += 1 - return x - - def reset_states(self): - self.x_prev = np.random.normal(self.mu, self.current_sigma, self.size) -import numpy as np - -from keras.models import model_from_config, Sequential, Model, model_from_config -import keras.optimizers as optimizers -import keras.backend as K - - -def clone_model(model, custom_objects={}): - # Requires Keras 1.0.7 since get_config has breaking changes. - config = { - 'class_name': model.__class__.__name__, - 'config': model.get_config(), - } - clone = model_from_config(config, custom_objects=custom_objects) - clone.set_weights(model.get_weights()) - return clone - - -def clone_optimizer(optimizer): - if type(optimizer) is str: - return optimizers.get(optimizer) - # Requires Keras 1.0.7 since get_config has breaking changes. - params = dict([(k, v) for k, v in optimizer.get_config().items()]) - config = { - 'class_name': optimizer.__class__.__name__, - 'config': params, - } - if hasattr(optimizers, 'optimizer_from_config'): - # COMPATIBILITY: Keras < 2.0 - clone = optimizers.optimizer_from_config(config) - else: - clone = optimizers.deserialize(config) - return clone - - -def get_soft_target_model_updates(target, source, tau): - target_weights = target.trainable_weights + \ - sum([l.non_trainable_weights for l in target.layers], []) - source_weights = source.trainable_weights + \ - sum([l.non_trainable_weights for l in source.layers], []) - assert len(target_weights) == len(source_weights) - - # Create updates. - updates = [] - for tw, sw in zip(target_weights, source_weights): - updates.append((tw, tau * sw + (1. - tau) * tw)) - return updates - - -def get_object_config(o): - if o is None: - return None - - config = { - 'class_name': o.__class__.__name__, - 'config': o.get_config() - } - return config - - -def huber_loss(y_true, y_pred, clip_value): - # Huber loss, see https://en.wikipedia.org/wiki/Huber_loss and - # https://medium.com/@karpathy/yes-you-should-understand-backprop-e2f06eab496b - # for details. - assert clip_value > 0. - - x = y_true - y_pred - if np.isinf(clip_value): - # Spacial case for infinity since Tensorflow does have problems - # if we compare `K.abs(x) < np.inf`. - return .5 * K.square(x) - - condition = K.abs(x) < clip_value - squared_loss = .5 * K.square(x) - linear_loss = clip_value * (K.abs(x) - .5 * clip_value) - if K.backend() == 'tensorflow': - import tensorflow as tf - if hasattr(tf, 'select'): - # condition, true, false - return tf.select(condition, squared_loss, linear_loss) - else: - # condition, true, false - return tf.where(condition, squared_loss, linear_loss) - elif K.backend() == 'theano': - from theano import tensor as T - return T.switch(condition, squared_loss, linear_loss) - else: - raise RuntimeError('Unknown backend "{}".'.format(K.backend())) - - -class AdditionalUpdatesOptimizer(optimizers.Optimizer): - def __init__(self, optimizer, additional_updates): - super(AdditionalUpdatesOptimizer, self).__init__() - self.optimizer = optimizer - self.additional_updates = additional_updates - - def get_updates(self, params, loss): - updates = self.optimizer.get_updates(params=params, loss=loss) - updates += self.additional_updates - self.updates = updates - return self.updates - - def get_config(self): - return self.optimizer.get_config() - - -# Based on https://github.com/openai/baselines/blob/master/baselines/common/mpi_running_mean_std.py -class WhiteningNormalizer(object): - def __init__(self, shape, eps=1e-2, dtype=np.float64): - self.eps = eps - self.shape = shape - self.dtype = dtype - - self._sum = np.zeros(shape, dtype=dtype) - self._sumsq = np.zeros(shape, dtype=dtype) - self._count = 0 - - self.mean = np.zeros(shape, dtype=dtype) - self.std = np.ones(shape, dtype=dtype) - - def normalize(self, x): - return (x - self.mean) / self.std - - def denormalize(self, x): - return self.std * x + self.mean - - def update(self, x): - if x.ndim == len(self.shape): - x = x.reshape(-1, *self.shape) - assert x.shape[1:] == self.shape - - self._count += x.shape[0] - self._sum += np.sum(x, axis=0) - self._sumsq += np.sum(np.square(x), axis=0) - - self.mean = self._sum / float(self._count) - self.std = np.sqrt(np.maximum( - np.square(self.eps), self._sumsq / float(self._count) - np.square(self.mean))) -from __future__ import absolute_import -from .dqn import DQNAgent, NAFAgent, ContinuousDQNAgent -from .ddpg import DDPGAgent -from .cem import CEMAgent -from .sarsa import SarsaAgent, SARSAAgent -from __future__ import division -from collections import deque -from copy import deepcopy - -import numpy as np -import keras.backend as K -from keras.models import Model - -from rl.core import Agent -from rl.util import * - - -class CEMAgent(Agent): - """Write me - """ - - def __init__(self, model, nb_actions, memory, batch_size=50, nb_steps_warmup=1000, - train_interval=50, elite_frac=0.05, memory_interval=1, theta_init=None, - noise_decay_const=0.0, noise_ampl=0.0, **kwargs): - super(CEMAgent, self).__init__(**kwargs) - - # Parameters. - self.nb_actions = nb_actions - self.batch_size = batch_size - self.elite_frac = elite_frac - self.num_best = int(self.batch_size * self.elite_frac) - self.nb_steps_warmup = nb_steps_warmup - self.train_interval = train_interval - self.memory_interval = memory_interval - - # if using noisy CEM, the minimum standard deviation will be ampl * exp (- decay_const * step ) - self.noise_decay_const = noise_decay_const - self.noise_ampl = noise_ampl - - # default initial mean & cov, override this by passing an theta_init argument - self.init_mean = 0.0 - self.init_stdev = 1.0 - - # Related objects. - self.memory = memory - self.model = model - self.shapes = [w.shape for w in model.get_weights()] - self.sizes = [w.size for w in model.get_weights()] - self.num_weights = sum(self.sizes) - - # store the best result seen during training, as a tuple (reward, flat_weights) - self.best_seen = (-np.inf, np.zeros(self.num_weights)) - - self.theta = np.zeros(self.num_weights*2) - self.update_theta(theta_init) - - # State. - self.episode = 0 - self.compiled = False - self.reset_states() - - def compile(self): - self.model.compile(optimizer='sgd', loss='mse') - self.compiled = True - - def load_weights(self, filepath): - self.model.load_weights(filepath) - - def save_weights(self, filepath, overwrite=False): - self.model.save_weights(filepath, overwrite=overwrite) - - def get_weights_flat(self, weights): - weights_flat = np.zeros(self.num_weights) - - pos = 0 - for i_layer, size in enumerate(self.sizes): - weights_flat[pos:pos+size] = weights[i_layer].flatten() - pos += size - return weights_flat - - def get_weights_list(self, weights_flat): - weights = [] - pos = 0 - for i_layer, size in enumerate(self.sizes): - arr = weights_flat[pos:pos+size].reshape(self.shapes[i_layer]) - weights.append(arr) - pos += size - return weights - - def reset_states(self): - self.recent_observation = None - self.recent_action = None - - def select_action(self, state, stochastic=False): - batch = np.array([state]) - if self.processor is not None: - batch = self.processor.process_state_batch(batch) - - action = self.model.predict_on_batch(batch).flatten() - if stochastic or self.training: - return np.random.choice(np.arange(self.nb_actions), p=np.exp(action) / np.sum(np.exp(action))) - return np.argmax(action) - - def update_theta(self, theta): - if (theta is not None): - assert theta.shape == self.theta.shape, "Invalid theta, shape is {0} but should be {1}".format( - theta.shape, self.theta.shape) - assert (not np.isnan(theta).any() - ), "Invalid theta, NaN encountered" - assert (theta[self.num_weights:] >= 0.).all( - ), "Invalid theta, standard deviations must be nonnegative" - self.theta = theta - else: - means = np.ones(self.num_weights) * self.init_mean - stdevs = np.ones(self.num_weights) * self.init_stdev - self.theta = np.hstack((means, stdevs)) - - def choose_weights(self): - mean = self.theta[:self.num_weights] - std = self.theta[self.num_weights:] - weights_flat = std * np.random.randn(self.num_weights) + mean - - sampled_weights = self.get_weights_list(weights_flat) - self.model.set_weights(sampled_weights) - - def forward(self, observation): - # Select an action. - state = self.memory.get_recent_state(observation) - action = self.select_action(state) - - # Book-keeping. - self.recent_observation = observation - self.recent_action = action - - return action - - @property - def layers(self): - return self.model.layers[:] - - def backward(self, reward, terminal): - # Store most recent experience in memory. - if self.step % self.memory_interval == 0: - self.memory.append(self.recent_observation, self.recent_action, reward, terminal, - training=self.training) - - metrics = [np.nan for _ in self.metrics_names] - if not self.training: - # We're done here. No need to update the experience memory since we only use the working - # memory to obtain the state over the most recent observations. - return metrics - - if terminal: - params = self.get_weights_flat(self.model.get_weights()) - self.memory.finalize_episode(params) - - if self.step > self.nb_steps_warmup and self.episode % self.train_interval == 0: - params, reward_totals = self.memory.sample(self.batch_size) - best_idx = np.argsort(np.array(reward_totals))[-self.num_best:] - best = np.vstack([params[i] for i in best_idx]) - - if reward_totals[best_idx[-1]] > self.best_seen[0]: - self.best_seen = ( - reward_totals[best_idx[-1]], params[best_idx[-1]]) - - metrics = [np.mean(np.array(reward_totals)[best_idx])] - if self.processor is not None: - metrics += self.processor.metrics - min_std = self.noise_ampl * \ - np.exp(-self.step * self.noise_decay_const) - - mean = np.mean(best, axis=0) - std = np.std(best, axis=0) + min_std - new_theta = np.hstack((mean, std)) - self.update_theta(new_theta) - self.choose_weights() - self.episode += 1 - return metrics - - def _on_train_end(self): - self.model.set_weights(self.get_weights_list(self.best_seen[1])) - - @property - def metrics_names(self): - names = ['mean_best_reward'] - if self.processor is not None: - names += self.processor.metrics_names[:] - return names -from __future__ import division -from collections import deque -import os -import warnings - -import numpy as np -import keras.backend as K -import keras.optimizers as optimizers - -from rl.core import Agent -from rl.random import OrnsteinUhlenbeckProcess -from rl.util import * - - -def mean_q(y_true, y_pred): - return K.mean(K.max(y_pred, axis=-1)) - - -# Deep DPG as described by Lillicrap et al. (2015) -# http://arxiv.org/pdf/1509.02971v2.pdf -# http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.646.4324&rep=rep1&type=pdf -class DDPGAgent(Agent): - """Write me - """ - - def __init__(self, nb_actions, actor, critic, critic_action_input, memory, - gamma=.99, batch_size=32, nb_steps_warmup_critic=1000, nb_steps_warmup_actor=1000, - train_interval=1, memory_interval=1, delta_range=None, delta_clip=np.inf, - random_process=None, custom_model_objects={}, target_model_update=.001, **kwargs): - if hasattr(actor.output, '__len__') and len(actor.output) > 1: - raise ValueError( - 'Actor "{}" has more than one output. DDPG expects an actor that has a single output.'.format(actor)) - if hasattr(critic.output, '__len__') and len(critic.output) > 1: - raise ValueError( - 'Critic "{}" has more than one output. DDPG expects a critic that has a single output.'.format(critic)) - if critic_action_input not in critic.input: - raise ValueError('Critic "{}" does not have designated action input "{}".'.format( - critic, critic_action_input)) - if not hasattr(critic.input, '__len__') or len(critic.input) < 2: - raise ValueError( - 'Critic "{}" does not have enough inputs. The critic must have at exactly two inputs, one for the action and one for the observation.'.format(critic)) - - super(DDPGAgent, self).__init__(**kwargs) - - # Soft vs hard target model updates. - if target_model_update < 0: - raise ValueError('`target_model_update` must be >= 0.') - elif target_model_update >= 1: - # Hard update every `target_model_update` steps. - target_model_update = int(target_model_update) - else: - # Soft update with `(1 - target_model_update) * old + target_model_update * new`. - target_model_update = float(target_model_update) - - if delta_range is not None: - warnings.warn( - '`delta_range` is deprecated. Please use `delta_clip` instead, which takes a single scalar. For now we\'re falling back to `delta_range[1] = {}`'.format(delta_range[1])) - delta_clip = delta_range[1] - - # Parameters. - self.nb_actions = nb_actions - self.nb_steps_warmup_actor = nb_steps_warmup_actor - self.nb_steps_warmup_critic = nb_steps_warmup_critic - self.random_process = random_process - self.delta_clip = delta_clip - self.gamma = gamma - self.target_model_update = target_model_update - self.batch_size = batch_size - self.train_interval = train_interval - self.memory_interval = memory_interval - self.custom_model_objects = custom_model_objects - - # Related objects. - self.actor = actor - self.critic = critic - self.critic_action_input = critic_action_input - self.critic_action_input_idx = self.critic.input.index( - critic_action_input) - self.memory = memory - - # State. - self.compiled = False - self.reset_states() - - @property - def uses_learning_phase(self): - return self.actor.uses_learning_phase or self.critic.uses_learning_phase - - def compile(self, optimizer, metrics=[]): - metrics += [mean_q] - - if type(optimizer) in (list, tuple): - if len(optimizer) != 2: - raise ValueError( - 'More than two optimizers provided. Please only provide a maximum of two optimizers, the first one for the actor and the second one for the critic.') - actor_optimizer, critic_optimizer = optimizer - else: - actor_optimizer = optimizer - critic_optimizer = clone_optimizer(optimizer) - if type(actor_optimizer) is str: - actor_optimizer = optimizers.get(actor_optimizer) - if type(critic_optimizer) is str: - critic_optimizer = optimizers.get(critic_optimizer) - assert actor_optimizer != critic_optimizer - - if len(metrics) == 2 and hasattr(metrics[0], '__len__') and hasattr(metrics[1], '__len__'): - actor_metrics, critic_metrics = metrics - else: - actor_metrics = critic_metrics = metrics - - def clipped_error(y_true, y_pred): - return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1) - - # Compile target networks. We only use them in feed-forward mode, hence we can pass any - # optimizer and loss since we never use it anyway. - self.target_actor = clone_model(self.actor, self.custom_model_objects) - self.target_actor.compile(optimizer='sgd', loss='mse') - self.target_critic = clone_model( - self.critic, self.custom_model_objects) - self.target_critic.compile(optimizer='sgd', loss='mse') - - # We also compile the actor. We never optimize the actor using Keras but instead compute - # the policy gradient ourselves. However, we need the actor in feed-forward mode, hence - # we also compile it with any optimzer and - self.actor.compile(optimizer='sgd', loss='mse') - - # Compile the critic. - if self.target_model_update < 1.: - # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model. - critic_updates = get_soft_target_model_updates( - self.target_critic, self.critic, self.target_model_update) - critic_optimizer = AdditionalUpdatesOptimizer( - critic_optimizer, critic_updates) - self.critic.compile(optimizer=critic_optimizer, - loss=clipped_error, metrics=critic_metrics) - - # Combine actor and critic so that we can get the policy gradient. - # Assuming critic's state inputs are the same as actor's. - combined_inputs = [] - state_inputs = [] - for i in self.critic.input: - if i == self.critic_action_input: - combined_inputs.append([]) - else: - combined_inputs.append(i) - state_inputs.append(i) - combined_inputs[self.critic_action_input_idx] = self.actor( - state_inputs) - - combined_output = self.critic(combined_inputs) - - updates = actor_optimizer.get_updates( - params=self.actor.trainable_weights, loss=-K.mean(combined_output)) - if self.target_model_update < 1.: - # Include soft target model updates. - updates += get_soft_target_model_updates( - self.target_actor, self.actor, self.target_model_update) - updates += self.actor.updates # include other updates of the actor, e.g. for BN - - # Finally, combine it all into a callable function. - if K.backend() == 'tensorflow': - self.actor_train_fn = K.function(state_inputs + [K.learning_phase()], - [self.actor(state_inputs)], updates=updates) - else: - if self.uses_learning_phase: - state_inputs += [K.learning_phase()] - self.actor_train_fn = K.function( - state_inputs, [self.actor(state_inputs)], updates=updates) - self.actor_optimizer = actor_optimizer - - self.compiled = True - - def load_weights(self, filepath): - filename, extension = os.path.splitext(filepath) - actor_filepath = filename + '_actor' + extension - critic_filepath = filename + '_critic' + extension - self.actor.load_weights(actor_filepath) - self.critic.load_weights(critic_filepath) - self.update_target_models_hard() - - def save_weights(self, filepath, overwrite=False): - filename, extension = os.path.splitext(filepath) - actor_filepath = filename + '_actor' + extension - critic_filepath = filename + '_critic' + extension - self.actor.save_weights(actor_filepath, overwrite=overwrite) - self.critic.save_weights(critic_filepath, overwrite=overwrite) - - def update_target_models_hard(self): - self.target_critic.set_weights(self.critic.get_weights()) - self.target_actor.set_weights(self.actor.get_weights()) - - # TODO: implement pickle - - def reset_states(self): - if self.random_process is not None: - self.random_process.reset_states() - self.recent_action = None - self.recent_observation = None - if self.compiled: - self.actor.reset_states() - self.critic.reset_states() - self.target_actor.reset_states() - self.target_critic.reset_states() - - def process_state_batch(self, batch): - batch = np.array(batch) - if self.processor is None: - return batch - return self.processor.process_state_batch(batch) - - def select_action(self, state): - batch = self.process_state_batch([state]) - action = self.actor.predict_on_batch(batch).flatten() - assert action.shape == (self.nb_actions,) - - # Apply noise, if a random process is set. - if self.training and self.random_process is not None: - noise = self.random_process.sample() - assert noise.shape == action.shape - action += noise - - return action - - def forward(self, observation): - # Select an action. - state = self.memory.get_recent_state(observation) - action = self.select_action(state) # TODO: move this into policy - - # Book-keeping. - self.recent_observation = observation - self.recent_action = action - - return action - - @property - def layers(self): - return self.actor.layers[:] + self.critic.layers[:] - - @property - def metrics_names(self): - names = self.critic.metrics_names[:] - if self.processor is not None: - names += self.processor.metrics_names[:] - return names - - def backward(self, reward, terminal=False): - # Store most recent experience in memory. - if self.step % self.memory_interval == 0: - self.memory.append(self.recent_observation, self.recent_action, reward, terminal, - training=self.training) - - metrics = [np.nan for _ in self.metrics_names] - if not self.training: - # We're done here. No need to update the experience memory since we only use the working - # memory to obtain the state over the most recent observations. - return metrics - - # Train the network on a single stochastic batch. - can_train_either = self.step > self.nb_steps_warmup_critic or self.step > self.nb_steps_warmup_actor - if can_train_either and self.step % self.train_interval == 0: - experiences = self.memory.sample(self.batch_size) - assert len(experiences) == self.batch_size - - # Start by extracting the necessary parameters (we use a vectorized implementation). - state0_batch = [] - reward_batch = [] - action_batch = [] - terminal1_batch = [] - state1_batch = [] - for e in experiences: - state0_batch.append(e.state0) - state1_batch.append(e.state1) - reward_batch.append(e.reward) - action_batch.append(e.action) - terminal1_batch.append(0. if e.terminal1 else 1.) - - # Prepare and validate parameters. - state0_batch = self.process_state_batch(state0_batch) - state1_batch = self.process_state_batch(state1_batch) - terminal1_batch = np.array(terminal1_batch) - reward_batch = np.array(reward_batch) - action_batch = np.array(action_batch) - assert reward_batch.shape == (self.batch_size,) - assert terminal1_batch.shape == reward_batch.shape - assert action_batch.shape == (self.batch_size, self.nb_actions) - - # Update critic, if warm up is over. - if self.step > self.nb_steps_warmup_critic: - target_actions = self.target_actor.predict_on_batch( - state1_batch) - assert target_actions.shape == ( - self.batch_size, self.nb_actions) - if len(self.critic.inputs) >= 3: - state1_batch_with_action = state1_batch[:] - else: - state1_batch_with_action = [state1_batch] - state1_batch_with_action.insert( - self.critic_action_input_idx, target_actions) - target_q_values = self.target_critic.predict_on_batch( - state1_batch_with_action).flatten() - assert target_q_values.shape == (self.batch_size,) - - # Compute r_t + gamma * max_a Q(s_t+1, a) and update the target ys accordingly, - # but only for the affected output units (as given by action_batch). - discounted_reward_batch = self.gamma * target_q_values - discounted_reward_batch *= terminal1_batch - assert discounted_reward_batch.shape == reward_batch.shape - targets = (reward_batch + - discounted_reward_batch).reshape(self.batch_size, 1) - - # Perform a single batch update on the critic network. - if len(self.critic.inputs) >= 3: - state0_batch_with_action = state0_batch[:] - else: - state0_batch_with_action = [state0_batch] - state0_batch_with_action.insert( - self.critic_action_input_idx, action_batch) - metrics = self.critic.train_on_batch( - state0_batch_with_action, targets) - if self.processor is not None: - metrics += self.processor.metrics - - # Update actor, if warm up is over. - if self.step > self.nb_steps_warmup_actor: - # TODO: implement metrics for actor - if len(self.actor.inputs) >= 2: - inputs = state0_batch[:] - else: - inputs = [state0_batch] - if self.uses_learning_phase: - inputs += [self.training] - action_values = self.actor_train_fn(inputs)[0] - assert action_values.shape == ( - self.batch_size, self.nb_actions) - - if self.target_model_update >= 1 and self.step % self.target_model_update == 0: - self.update_target_models_hard() - - return metrics -from __future__ import division -import warnings - -import keras.backend as K -from keras.models import Model -from keras.layers import Lambda, Input, Layer, Dense - -from rl.core import Agent -from rl.policy import EpsGreedyQPolicy, GreedyQPolicy -from rl.util import * - - -def mean_q(y_true, y_pred): - return K.mean(K.max(y_pred, axis=-1)) - - -class AbstractDQNAgent(Agent): - """Write me - """ - - def __init__(self, nb_actions, memory, gamma=.99, batch_size=32, nb_steps_warmup=1000, - train_interval=1, memory_interval=1, target_model_update=10000, - delta_range=None, delta_clip=np.inf, custom_model_objects={}, **kwargs): - super(AbstractDQNAgent, self).__init__(**kwargs) - - # Soft vs hard target model updates. - if target_model_update < 0: - raise ValueError('`target_model_update` must be >= 0.') - elif target_model_update >= 1: - # Hard update every `target_model_update` steps. - target_model_update = int(target_model_update) - else: - # Soft update with `(1 - target_model_update) * old + target_model_update * new`. - target_model_update = float(target_model_update) - - if delta_range is not None: - warnings.warn( - '`delta_range` is deprecated. Please use `delta_clip` instead, which takes a single scalar. For now we\'re falling back to `delta_range[1] = {}`'.format(delta_range[1])) - delta_clip = delta_range[1] - - # Parameters. - self.nb_actions = nb_actions - self.gamma = gamma - self.batch_size = batch_size - self.nb_steps_warmup = nb_steps_warmup - self.train_interval = train_interval - self.memory_interval = memory_interval - self.target_model_update = target_model_update - self.delta_clip = delta_clip - self.custom_model_objects = custom_model_objects - - # Related objects. - self.memory = memory - - # State. - self.compiled = False - - def process_state_batch(self, batch): - batch = np.array(batch) - if self.processor is None: - return batch - return self.processor.process_state_batch(batch) - - def compute_batch_q_values(self, state_batch): - batch = self.process_state_batch(state_batch) - q_values = self.model.predict_on_batch(batch) - assert q_values.shape == (len(state_batch), self.nb_actions) - return q_values - - def compute_q_values(self, state): - q_values = self.compute_batch_q_values([state]).flatten() - assert q_values.shape == (self.nb_actions,) - return q_values - - def get_config(self): - return { - 'nb_actions': self.nb_actions, - 'gamma': self.gamma, - 'batch_size': self.batch_size, - 'nb_steps_warmup': self.nb_steps_warmup, - 'train_interval': self.train_interval, - 'memory_interval': self.memory_interval, - 'target_model_update': self.target_model_update, - 'delta_clip': self.delta_clip, - 'memory': get_object_config(self.memory), - } - -# An implementation of the DQN agent as described in Mnih (2013) and Mnih (2015). -# http://arxiv.org/pdf/1312.5602.pdf -# http://arxiv.org/abs/1509.06461 - - -class DQNAgent(AbstractDQNAgent): - """ - # Arguments - model__: A Keras model. - policy__: A Keras-rl policy that are defined in [policy](https://github.com/keras-rl/keras-rl/blob/master/rl/policy.py). - test_policy__: A Keras-rl policy. - enable_double_dqn__: A boolean which enable target network as a second network proposed by van Hasselt et al. to decrease overfitting. - enable_dueling_dqn__: A boolean which enable dueling architecture proposed by Mnih et al. - dueling_type__: If `enable_dueling_dqn` is set to `True`, a type of dueling architecture must be chosen which calculate Q(s,a) from V(s) and A(s,a) differently. Note that `avg` is recommanded in the [paper](https://arxiv.org/abs/1511.06581). - `avg`: Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Avg_a(A(s,a;theta))) - `max`: Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-max_a(A(s,a;theta))) - `naive`: Q(s,a;theta) = V(s;theta) + A(s,a;theta) - - """ - - def __init__(self, model, policy=None, test_policy=None, enable_double_dqn=False, enable_dueling_network=False, - dueling_type='avg', *args, **kwargs): - super(DQNAgent, self).__init__(*args, **kwargs) - - # Validate (important) input. - if hasattr(model.output, '__len__') and len(model.output) > 1: - raise ValueError( - 'Model "{}" has more than one output. DQN expects a model that has a single output.'.format(model)) - if model.output._keras_shape != (None, self.nb_actions): - raise ValueError('Model output "{}" has invalid shape. DQN expects a model that has one dimension for each action, in this case {}.'.format( - model.output, self.nb_actions)) - - # Parameters. - self.enable_double_dqn = enable_double_dqn - self.enable_dueling_network = enable_dueling_network - self.dueling_type = dueling_type - if self.enable_dueling_network: - # get the second last layer of the model, abandon the last layer - layer = model.layers[-2] - nb_action = model.output._keras_shape[-1] - # layer y has a shape (nb_action+1,) - # y[:,0] represents V(s;theta) - # y[:,1:] represents A(s,a;theta) - y = Dense(nb_action + 1, activation='linear')(layer.output) - # caculate the Q(s,a;theta) - # dueling_type == 'avg' - # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-Avg_a(A(s,a;theta))) - # dueling_type == 'max' - # Q(s,a;theta) = V(s;theta) + (A(s,a;theta)-max_a(A(s,a;theta))) - # dueling_type == 'naive' - # Q(s,a;theta) = V(s;theta) + A(s,a;theta) - if self.dueling_type == 'avg': - outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.mean( - a[:, 1:], axis=1, keepdims=True), output_shape=(nb_action,))(y) - elif self.dueling_type == 'max': - outputlayer = Lambda(lambda a: K.expand_dims(a[:, 0], -1) + a[:, 1:] - K.max( - a[:, 1:], axis=1, keepdims=True), output_shape=(nb_action,))(y) - elif self.dueling_type == 'naive': - outputlayer = Lambda(lambda a: K.expand_dims( - a[:, 0], -1) + a[:, 1:], output_shape=(nb_action,))(y) - else: - assert False, "dueling_type must be one of {'avg','max','naive'}" - - model = Model(inputs=model.input, outputs=outputlayer) - - # Related objects. - self.model = model - if policy is None: - policy = EpsGreedyQPolicy() - if test_policy is None: - test_policy = GreedyQPolicy() - self.policy = policy - self.test_policy = test_policy - - # State. - self.reset_states() - - def get_config(self): - config = super(DQNAgent, self).get_config() - config['enable_double_dqn'] = self.enable_double_dqn - config['dueling_type'] = self.dueling_type - config['enable_dueling_network'] = self.enable_dueling_network - config['model'] = get_object_config(self.model) - config['policy'] = get_object_config(self.policy) - config['test_policy'] = get_object_config(self.test_policy) - if self.compiled: - config['target_model'] = get_object_config(self.target_model) - return config - - def compile(self, optimizer, metrics=[]): - metrics += [mean_q] # register default metrics - - # We never train the target model, hence we can set the optimizer and loss arbitrarily. - self.target_model = clone_model(self.model, self.custom_model_objects) - self.target_model.compile(optimizer='sgd', loss='mse') - self.model.compile(optimizer='sgd', loss='mse') - - # Compile model. - if self.target_model_update < 1.: - # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model. - updates = get_soft_target_model_updates( - self.target_model, self.model, self.target_model_update) - optimizer = AdditionalUpdatesOptimizer(optimizer, updates) - - def clipped_masked_error(args): - y_true, y_pred, mask = args - loss = huber_loss(y_true, y_pred, self.delta_clip) - loss *= mask # apply element-wise mask - return K.sum(loss, axis=-1) - - # Create trainable model. The problem is that we need to mask the output since we only - # ever want to update the Q values for a certain action. The way we achieve this is by - # using a custom Lambda layer that computes the loss. This gives us the necessary flexibility - # to mask out certain parameters by passing in multiple inputs to the Lambda layer. - y_pred = self.model.output - y_true = Input(name='y_true', shape=(self.nb_actions,)) - mask = Input(name='mask', shape=(self.nb_actions,)) - loss_out = Lambda(clipped_masked_error, output_shape=( - 1,), name='loss')([y_true, y_pred, mask]) - ins = [self.model.input] if type( - self.model.input) is not list else self.model.input - trainable_model = Model( - inputs=ins + [y_true, mask], outputs=[loss_out, y_pred]) - assert len(trainable_model.output_names) == 2 - combined_metrics = {trainable_model.output_names[1]: metrics} - losses = [ - lambda y_true, y_pred: y_pred, # loss is computed in Lambda layer - # we only include this for the metrics - lambda y_true, y_pred: K.zeros_like(y_pred), - ] - trainable_model.compile(optimizer=optimizer, - loss=losses, metrics=combined_metrics) - self.trainable_model = trainable_model - - self.compiled = True - - def load_weights(self, filepath): - self.model.load_weights(filepath) - self.update_target_model_hard() - - def save_weights(self, filepath, overwrite=False): - self.model.save_weights(filepath, overwrite=overwrite) - - def reset_states(self): - self.recent_action = None - self.recent_observation = None - if self.compiled: - self.model.reset_states() - self.target_model.reset_states() - - def update_target_model_hard(self): - self.target_model.set_weights(self.model.get_weights()) - - def forward(self, observation): - # Select an action. - state = self.memory.get_recent_state(observation) - q_values = self.compute_q_values(state) - if self.training: - action = self.policy.select_action(q_values=q_values) - else: - action = self.test_policy.select_action(q_values=q_values) - - # Book-keeping. - self.recent_observation = observation - self.recent_action = action - - return action - - def backward(self, reward, terminal): - # Store most recent experience in memory. - if self.step % self.memory_interval == 0: - self.memory.append(self.recent_observation, self.recent_action, reward, terminal, - training=self.training) - - metrics = [np.nan for _ in self.metrics_names] - if not self.training: - # We're done here. No need to update the experience memory since we only use the working - # memory to obtain the state over the most recent observations. - return metrics - - # Train the network on a single stochastic batch. - if self.step > self.nb_steps_warmup and self.step % self.train_interval == 0: - experiences = self.memory.sample(self.batch_size) - assert len(experiences) == self.batch_size - - # Start by extracting the necessary parameters (we use a vectorized implementation). - state0_batch = [] - reward_batch = [] - action_batch = [] - terminal1_batch = [] - state1_batch = [] - for e in experiences: - state0_batch.append(e.state0) - state1_batch.append(e.state1) - reward_batch.append(e.reward) - action_batch.append(e.action) - terminal1_batch.append(0. if e.terminal1 else 1.) - - # Prepare and validate parameters. - state0_batch = self.process_state_batch(state0_batch) - state1_batch = self.process_state_batch(state1_batch) - terminal1_batch = np.array(terminal1_batch) - reward_batch = np.array(reward_batch) - assert reward_batch.shape == (self.batch_size,) - assert terminal1_batch.shape == reward_batch.shape - assert len(action_batch) == len(reward_batch) - - # Compute Q values for mini-batch update. - if self.enable_double_dqn: - # According to the paper "Deep Reinforcement Learning with Double Q-learning" - # (van Hasselt et al., 2015), in Double DQN, the online network predicts the actions - # while the target network is used to estimate the Q value. - q_values = self.model.predict_on_batch(state1_batch) - assert q_values.shape == (self.batch_size, self.nb_actions) - actions = np.argmax(q_values, axis=1) - assert actions.shape == (self.batch_size,) - - # Now, estimate Q values using the target network but select the values with the - # highest Q value wrt to the online model (as computed above). - target_q_values = self.target_model.predict_on_batch( - state1_batch) - assert target_q_values.shape == ( - self.batch_size, self.nb_actions) - q_batch = target_q_values[range(self.batch_size), actions] - else: - # Compute the q_values given state1, and extract the maximum for each sample in the batch. - # We perform this prediction on the target_model instead of the model for reasons - # outlined in Mnih (2015). In short: it makes the algorithm more stable. - target_q_values = self.target_model.predict_on_batch( - state1_batch) - assert target_q_values.shape == ( - self.batch_size, self.nb_actions) - q_batch = np.max(target_q_values, axis=1).flatten() - assert q_batch.shape == (self.batch_size,) - - targets = np.zeros((self.batch_size, self.nb_actions)) - dummy_targets = np.zeros((self.batch_size,)) - masks = np.zeros((self.batch_size, self.nb_actions)) - - # Compute r_t + gamma * max_a Q(s_t+1, a) and update the target targets accordingly, - # but only for the affected output units (as given by action_batch). - discounted_reward_batch = self.gamma * q_batch - # Set discounted reward to zero for all states that were terminal. - discounted_reward_batch *= terminal1_batch - assert discounted_reward_batch.shape == reward_batch.shape - Rs = reward_batch + discounted_reward_batch - for idx, (target, mask, R, action) in enumerate(zip(targets, masks, Rs, action_batch)): - # update action with estimated accumulated reward - target[action] = R - dummy_targets[idx] = R - mask[action] = 1. # enable loss for this specific action - targets = np.array(targets).astype('float32') - masks = np.array(masks).astype('float32') - - # Finally, perform a single update on the entire batch. We use a dummy target since - # the actual loss is computed in a Lambda layer that needs more complex input. However, - # it is still useful to know the actual target to compute metrics properly. - ins = [state0_batch] if type( - self.model.input) is not list else state0_batch - metrics = self.trainable_model.train_on_batch( - ins + [targets, masks], [dummy_targets, targets]) - metrics = [metric for idx, metric in enumerate( - metrics) if idx not in (1, 2)] # throw away individual losses - metrics += self.policy.metrics - if self.processor is not None: - metrics += self.processor.metrics - - if self.target_model_update >= 1 and self.step % self.target_model_update == 0: - self.update_target_model_hard() - - return metrics - - @property - def layers(self): - return self.model.layers[:] - - @property - def metrics_names(self): - # Throw away individual losses and replace output name since this is hidden from the user. - assert len(self.trainable_model.output_names) == 2 - dummy_output_name = self.trainable_model.output_names[1] - model_metrics = [name for idx, name in enumerate( - self.trainable_model.metrics_names) if idx not in (1, 2)] - model_metrics = [name.replace(dummy_output_name + '_', '') - for name in model_metrics] - - names = model_metrics + self.policy.metrics_names[:] - if self.processor is not None: - names += self.processor.metrics_names[:] - return names - - @property - def policy(self): - return self.__policy - - @policy.setter - def policy(self, policy): - self.__policy = policy - self.__policy._set_agent(self) - - @property - def test_policy(self): - return self.__test_policy - - @test_policy.setter - def test_policy(self, policy): - self.__test_policy = policy - self.__test_policy._set_agent(self) - - -class NAFLayer(Layer): - """Write me - """ - - def __init__(self, nb_actions, mode='full', **kwargs): - if mode not in ('full', 'diag'): - raise RuntimeError( - 'Unknown mode "{}" in NAFLayer.'.format(self.mode)) - - self.nb_actions = nb_actions - self.mode = mode - super(NAFLayer, self).__init__(**kwargs) - - def call(self, x, mask=None): - # TODO: validate input shape - - assert (len(x) == 3) - L_flat = x[0] - mu = x[1] - a = x[2] - - if self.mode == 'full': - # Create L and L^T matrix, which we use to construct the positive-definite matrix P. - L = None - LT = None - if K.backend() == 'theano': - import theano.tensor as T - import theano - - def fn(x, L_acc, LT_acc): - x_ = K.zeros((self.nb_actions, self.nb_actions)) - x_ = T.set_subtensor( - x_[np.tril_indices(self.nb_actions)], x) - diag = K.exp(T.diag(x_)) + K.epsilon() - x_ = T.set_subtensor( - x_[np.diag_indices(self.nb_actions)], diag) - return x_, x_.T - - outputs_info = [ - K.zeros((self.nb_actions, self.nb_actions)), - K.zeros((self.nb_actions, self.nb_actions)), - ] - results, _ = theano.scan( - fn=fn, sequences=L_flat, outputs_info=outputs_info) - L, LT = results - elif K.backend() == 'tensorflow': - import tensorflow as tf - - # Number of elements in a triangular matrix. - nb_elems = (self.nb_actions * self.nb_actions + - self.nb_actions) // 2 - - # Create mask for the diagonal elements in L_flat. This is used to exponentiate - # only the diagonal elements, which is done before gathering. - diag_indeces = [0] - for row in range(1, self.nb_actions): - diag_indeces.append(diag_indeces[-1] + (row + 1)) - diag_mask = np.zeros(1 + nb_elems) # +1 for the leading zero - diag_mask[np.array(diag_indeces) + 1] = 1 - diag_mask = K.variable(diag_mask) - - # Add leading zero element to each element in the L_flat. We use this zero - # element when gathering L_flat into a lower triangular matrix L. - nb_rows = tf.shape(L_flat)[0] - zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1) - try: - # Old TF behavior. - L_flat = tf.concat(1, [zeros, L_flat]) - except (TypeError, ValueError): - # New TF behavior - L_flat = tf.concat([zeros, L_flat], 1) - - # Create mask that can be used to gather elements from L_flat and put them - # into a lower triangular matrix. - tril_mask = np.zeros( - (self.nb_actions, self.nb_actions), dtype='int32') - tril_mask[np.tril_indices(self.nb_actions)] = range( - 1, nb_elems + 1) - - # Finally, process each element of the batch. - init = [ - K.zeros((self.nb_actions, self.nb_actions)), - K.zeros((self.nb_actions, self.nb_actions)), - ] - - def fn(a, x): - # Exponentiate everything. This is much easier than only exponentiating - # the diagonal elements, and, usually, the action space is relatively low. - x_ = K.exp(x) + K.epsilon() - # Only keep the diagonal elements. - x_ *= diag_mask - # Add the original, non-diagonal elements. - x_ += x * (1. - diag_mask) - # Finally, gather everything into a lower triangular matrix. - L_ = tf.gather(x_, tril_mask) - return [L_, tf.transpose(L_)] - - tmp = tf.scan(fn, L_flat, initializer=init) - if isinstance(tmp, (list, tuple)): - # TensorFlow 0.10 now returns a tuple of tensors. - L, LT = tmp - else: - # Old TensorFlow < 0.10 returns a shared tensor. - L = tmp[:, 0, :, :] - LT = tmp[:, 1, :, :] - else: - raise RuntimeError( - 'Unknown Keras backend "{}".'.format(K.backend())) - assert L is not None - assert LT is not None - P = K.batch_dot(L, LT) - elif self.mode == 'diag': - if K.backend() == 'theano': - import theano.tensor as T - import theano - - def fn(x, P_acc): - x_ = K.zeros((self.nb_actions, self.nb_actions)) - x_ = T.set_subtensor( - x_[np.diag_indices(self.nb_actions)], x) - return x_ - - outputs_info = [ - K.zeros((self.nb_actions, self.nb_actions)), - ] - P, _ = theano.scan(fn=fn, sequences=L_flat, - outputs_info=outputs_info) - elif K.backend() == 'tensorflow': - import tensorflow as tf - - # Create mask that can be used to gather elements from L_flat and put them - # into a diagonal matrix. - diag_mask = np.zeros( - (self.nb_actions, self.nb_actions), dtype='int32') - diag_mask[np.diag_indices(self.nb_actions)] = range( - 1, self.nb_actions + 1) - - # Add leading zero element to each element in the L_flat. We use this zero - # element when gathering L_flat into a lower triangular matrix L. - nb_rows = tf.shape(L_flat)[0] - zeros = tf.expand_dims(tf.tile(K.zeros((1,)), [nb_rows]), 1) - try: - # Old TF behavior. - L_flat = tf.concat(1, [zeros, L_flat]) - except (TypeError, ValueError): - # New TF behavior - L_flat = tf.concat([zeros, L_flat], 1) - - # Finally, process each element of the batch. - def fn(a, x): - x_ = tf.gather(x, diag_mask) - return x_ - - P = tf.scan(fn, L_flat, initializer=K.zeros( - (self.nb_actions, self.nb_actions))) - else: - raise RuntimeError( - 'Unknown Keras backend "{}".'.format(K.backend())) - assert P is not None - assert K.ndim(P) == 3 - - # Combine a, mu and P into a scalar (over the batches). What we compute here is - # -.5 * (a - mu)^T * P * (a - mu), where * denotes the dot-product. Unfortunately - # TensorFlow handles vector * P slightly suboptimal, hence we convert the vectors to - # 1xd/dx1 matrices and finally flatten the resulting 1x1 matrix into a scalar. All - # operations happen over the batch size, which is dimension 0. - prod = K.batch_dot(K.expand_dims(a - mu, 1), P) - prod = K.batch_dot(prod, K.expand_dims(a - mu, -1)) - A = -.5 * K.batch_flatten(prod) - assert K.ndim(A) == 2 - return A - - def get_output_shape_for(self, input_shape): - return self.compute_output_shape(input_shape) - - def compute_output_shape(self, input_shape): - if len(input_shape) != 3: - raise RuntimeError("Expects 3 inputs: L, mu, a") - for i, shape in enumerate(input_shape): - if len(shape) != 2: - raise RuntimeError( - "Input {} has {} dimensions but should have 2".format(i, len(shape))) - assert self.mode in ('full', 'diag') - if self.mode == 'full': - expected_elements = ( - self.nb_actions * self.nb_actions + self.nb_actions) // 2 - elif self.mode == 'diag': - expected_elements = self.nb_actions - else: - expected_elements = None - assert expected_elements is not None - if input_shape[0][1] != expected_elements: - raise RuntimeError( - "Input 0 (L) should have {} elements but has {}".format(input_shape[0][1])) - if input_shape[1][1] != self.nb_actions: - raise RuntimeError( - "Input 1 (mu) should have {} elements but has {}".format(self.nb_actions, input_shape[1][1])) - if input_shape[2][1] != self.nb_actions: - raise RuntimeError( - "Input 2 (action) should have {} elements but has {}".format(self.nb_actions, input_shape[1][1])) - return input_shape[0][0], 1 - - -class NAFAgent(AbstractDQNAgent): - """Write me - """ - - def __init__(self, V_model, L_model, mu_model, random_process=None, - covariance_mode='full', *args, **kwargs): - super(NAFAgent, self).__init__(*args, **kwargs) - - # TODO: Validate (important) input. - - # Parameters. - self.random_process = random_process - self.covariance_mode = covariance_mode - - # Related objects. - self.V_model = V_model - self.L_model = L_model - self.mu_model = mu_model - - # State. - self.reset_states() - - def update_target_model_hard(self): - self.target_V_model.set_weights(self.V_model.get_weights()) - - def load_weights(self, filepath): - # updates V, L and mu model since the weights are shared - self.combined_model.load_weights(filepath) - self.update_target_model_hard() - - def save_weights(self, filepath, overwrite=False): - self.combined_model.save_weights(filepath, overwrite=overwrite) - - def reset_states(self): - if self.random_process is not None: - self.random_process.reset_states() - self.recent_action = None - self.recent_observation = None - if self.compiled: - self.combined_model.reset_states() - self.target_V_model.reset_states() - - def compile(self, optimizer, metrics=[]): - metrics += [mean_q] # register default metrics - - # Create target V model. We don't need targets for mu or L. - self.target_V_model = clone_model( - self.V_model, self.custom_model_objects) - self.target_V_model.compile(optimizer='sgd', loss='mse') - - # Build combined model. - a_in = Input(shape=(self.nb_actions,), name='action_input') - if type(self.V_model.input) is list: - observation_shapes = [i._keras_shape[1:] - for i in self.V_model.input] - else: - observation_shapes = [self.V_model.input._keras_shape[1:]] - os_in = [Input(shape=shape, name='observation_input_{}'.format(idx)) - for idx, shape in enumerate(observation_shapes)] - L_out = self.L_model([a_in] + os_in) - V_out = self.V_model(os_in) - - mu_out = self.mu_model(os_in) - A_out = NAFLayer(self.nb_actions, mode=self.covariance_mode)( - [L_out, mu_out, a_in]) - combined_out = Lambda( - lambda x: x[0]+x[1], output_shape=lambda x: x[0])([A_out, V_out]) - combined = Model(inputs=[a_in] + os_in, outputs=[combined_out]) - # Compile combined model. - if self.target_model_update < 1.: - # We use the `AdditionalUpdatesOptimizer` to efficiently soft-update the target model. - updates = get_soft_target_model_updates( - self.target_V_model, self.V_model, self.target_model_update) - optimizer = AdditionalUpdatesOptimizer(optimizer, updates) - - def clipped_error(y_true, y_pred): - return K.mean(huber_loss(y_true, y_pred, self.delta_clip), axis=-1) - - combined.compile(loss=clipped_error, - optimizer=optimizer, metrics=metrics) - self.combined_model = combined - - self.compiled = True - - def select_action(self, state): - batch = self.process_state_batch([state]) - action = self.mu_model.predict_on_batch(batch).flatten() - assert action.shape == (self.nb_actions,) - - # Apply noise, if a random process is set. - if self.training and self.random_process is not None: - noise = self.random_process.sample() - assert noise.shape == action.shape - action += noise - - return action - - def forward(self, observation): - # Select an action. - state = self.memory.get_recent_state(observation) - action = self.select_action(state) - - # Book-keeping. - self.recent_observation = observation - self.recent_action = action - - return action - - def backward(self, reward, terminal): - # Store most recent experience in memory. - if self.step % self.memory_interval == 0: - self.memory.append(self.recent_observation, self.recent_action, reward, terminal, - training=self.training) - - metrics = [np.nan for _ in self.metrics_names] - if not self.training: - # We're done here. No need to update the experience memory since we only use the working - # memory to obtain the state over the most recent observations. - return metrics - - # Train the network on a single stochastic batch. - if self.step > self.nb_steps_warmup and self.step % self.train_interval == 0: - experiences = self.memory.sample(self.batch_size) - assert len(experiences) == self.batch_size - - # Start by extracting the necessary parameters (we use a vectorized implementation). - state0_batch = [] - reward_batch = [] - action_batch = [] - terminal1_batch = [] - state1_batch = [] - for e in experiences: - state0_batch.append(e.state0) - state1_batch.append(e.state1) - reward_batch.append(e.reward) - action_batch.append(e.action) - terminal1_batch.append(0. if e.terminal1 else 1.) - - # Prepare and validate parameters. - state0_batch = self.process_state_batch(state0_batch) - state1_batch = self.process_state_batch(state1_batch) - terminal1_batch = np.array(terminal1_batch) - reward_batch = np.array(reward_batch) - action_batch = np.array(action_batch) - assert reward_batch.shape == (self.batch_size,) - assert terminal1_batch.shape == reward_batch.shape - assert action_batch.shape == (self.batch_size, self.nb_actions) - - # Compute Q values for mini-batch update. - q_batch = self.target_V_model.predict_on_batch( - state1_batch).flatten() - assert q_batch.shape == (self.batch_size,) - - # Compute discounted reward. - discounted_reward_batch = self.gamma * q_batch - # Set discounted reward to zero for all states that were terminal. - discounted_reward_batch *= terminal1_batch - assert discounted_reward_batch.shape == reward_batch.shape - Rs = reward_batch + discounted_reward_batch - assert Rs.shape == (self.batch_size,) - - # Finally, perform a single update on the entire batch. - if len(self.combined_model.input) == 2: - metrics = self.combined_model.train_on_batch( - [action_batch, state0_batch], Rs) - else: - metrics = self.combined_model.train_on_batch( - [action_batch] + state0_batch, Rs) - if self.processor is not None: - metrics += self.processor.metrics - - if self.target_model_update >= 1 and self.step % self.target_model_update == 0: - self.update_target_model_hard() - - return metrics - - @property - def layers(self): - return self.combined_model.layers[:] - - def get_config(self): - config = super(NAFAgent, self).get_config() - config['V_model'] = get_object_config(self.V_model) - config['mu_model'] = get_object_config(self.mu_model) - config['L_model'] = get_object_config(self.L_model) - if self.compiled: - config['target_V_model'] = get_object_config(self.target_V_model) - return config - - @property - def metrics_names(self): - names = self.combined_model.metrics_names[:] - if self.processor is not None: - names += self.processor.metrics_names[:] - return names - - -# Aliases -ContinuousDQNAgent = NAFAgent -import collections - -import numpy as np - -from keras.callbacks import History -from keras.models import Model -from keras.layers import Input, Lambda -import keras.backend as K - -from rl.core import Agent -from rl.agents.dqn import mean_q -from rl.util import huber_loss -from rl.policy import EpsGreedyQPolicy, GreedyQPolicy -from rl.util import get_object_config - - -class SARSAAgent(Agent): - """Write me - """ - - def __init__(self, model, nb_actions, policy=None, test_policy=None, gamma=.99, nb_steps_warmup=10, - train_interval=1, delta_clip=np.inf, *args, **kwargs): - super(SarsaAgent, self).__init__(*args, **kwargs) - - # Do not use defaults in constructor because that would mean that each instance shares the same - # policy. - if policy is None: - policy = EpsGreedyQPolicy() - if test_policy is None: - test_policy = GreedyQPolicy() - - self.model = model - self.nb_actions = nb_actions - self.policy = policy - self.test_policy = test_policy - self.gamma = gamma - self.nb_steps_warmup = nb_steps_warmup - self.train_interval = train_interval - - self.delta_clip = delta_clip - self.compiled = False - self.actions = None - self.observations = None - self.rewards = None - - def compute_batch_q_values(self, state_batch): - batch = self.process_state_batch(state_batch) - q_values = self.model.predict_on_batch(batch) - assert q_values.shape == (len(state_batch), self.nb_actions) - return q_values - - def compute_q_values(self, state): - q_values = self.compute_batch_q_values([state]).flatten() - assert q_values.shape == (self.nb_actions,) - return q_values - - def process_state_batch(self, batch): - batch = np.array(batch) - if self.processor is None: - return batch - return self.processor.process_state_batch(batch) - - def get_config(self): - config = super(SarsaAgent, self).get_config() - config['nb_actions'] = self.nb_actions - config['gamma'] = self.gamma - config['nb_steps_warmup'] = self.nb_steps_warmup - config['train_interval'] = self.train_interval - config['delta_clip'] = self.delta_clip - config['model'] = get_object_config(self.model) - config['policy'] = get_object_config(self.policy) - config['test_policy'] = get_object_config(self.test_policy) - return config - - def compile(self, optimizer, metrics=[]): - metrics += [mean_q] # register default metrics - - def clipped_masked_error(args): - y_true, y_pred, mask = args - loss = huber_loss(y_true, y_pred, self.delta_clip) - loss *= mask # apply element-wise mask - return K.sum(loss, axis=-1) - - # Create trainable model. The problem is that we need to mask the output since we only - # ever want to update the Q values for a certain action. The way we achieve this is by - # using a custom Lambda layer that computes the loss. This gives us the necessary flexibility - # to mask out certain parameters by passing in multiple inputs to the Lambda layer. - y_pred = self.model.output - y_true = Input(name='y_true', shape=(self.nb_actions,)) - mask = Input(name='mask', shape=(self.nb_actions,)) - loss_out = Lambda(clipped_masked_error, output_shape=( - 1,), name='loss')([y_pred, y_true, mask]) - ins = [self.model.input] if type( - self.model.input) is not list else self.model.input - trainable_model = Model( - inputs=ins + [y_true, mask], outputs=[loss_out, y_pred]) - assert len(trainable_model.output_names) == 2 - combined_metrics = {trainable_model.output_names[1]: metrics} - losses = [ - lambda y_true, y_pred: y_pred, # loss is computed in Lambda layer - # we only include this for the metrics - lambda y_true, y_pred: K.zeros_like(y_pred), - ] - trainable_model.compile(optimizer=optimizer, - loss=losses, metrics=combined_metrics) - self.trainable_model = trainable_model - - self.compiled = True - - def load_weights(self, filepath): - self.model.load_weights(filepath) - - def save_weights(self, filepath, overwrite=False): - self.model.save_weights(filepath, overwrite=overwrite) - - def reset_states(self): - self.actions = collections.deque(maxlen=2) - self.observations = collections.deque(maxlen=2) - self.rewards = collections.deque(maxlen=2) - if self.compiled: - self.model.reset_states() - - def forward(self, observation): - # Select an action. - q_values = self.compute_q_values([observation]) - if self.training: - action = self.policy.select_action(q_values=q_values) - else: - action = self.test_policy.select_action(q_values=q_values) - - # Book-keeping. - self.observations.append(observation) - self.actions.append(action) - - return action - - def backward(self, reward, terminal): - metrics = [np.nan for _ in self.metrics_names] - if not self.training: - # We're done here. No need to update the experience memory since we only use the working - # memory to obtain the state over the most recent observations. - return metrics - - # Train the network on a single stochastic batch. - if self.step > self.nb_steps_warmup and self.step % self.train_interval == 0: - # Start by extracting the necessary parameters (we use a vectorized implementation). - self.rewards.append(reward) - if len(self.observations) < 2: - return metrics # not enough data yet - - state0_batch = [self.observations[0]] - reward_batch = [self.rewards[0]] - action_batch = [self.actions[0]] - terminal1_batch = [0.] if terminal else [1.] - state1_batch = [self.observations[1]] - action1_batch = [self.actions[1]] - - # Prepare and validate parameters. - state0_batch = self.process_state_batch(state0_batch) - state1_batch = self.process_state_batch(state1_batch) - terminal1_batch = np.array(terminal1_batch) - reward_batch = np.array(reward_batch) - assert reward_batch.shape == (1,) - assert terminal1_batch.shape == reward_batch.shape - assert len(action_batch) == len(reward_batch) - - batch = self.process_state_batch(state1_batch) - q_values = self.compute_q_values(batch) - q_values = q_values.reshape((1, self.nb_actions)) - - q_batch = q_values[0, action1_batch] - - assert q_batch.shape == (1,) - targets = np.zeros((1, self.nb_actions)) - dummy_targets = np.zeros((1,)) - masks = np.zeros((1, self.nb_actions)) - - # Compute r_t + gamma * Q(s_t+1, a_t+1) - discounted_reward_batch = self.gamma * q_batch - # Set discounted reward to zero for all states that were terminal. - discounted_reward_batch *= terminal1_batch - assert discounted_reward_batch.shape == reward_batch.shape - Rs = reward_batch + discounted_reward_batch - for idx, (target, mask, R, action) in enumerate(zip(targets, masks, Rs, action_batch)): - # update action with estimated accumulated reward - target[action] = R - dummy_targets[idx] = R - mask[action] = 1. # enable loss for this specific action - targets = np.array(targets).astype('float32') - masks = np.array(masks).astype('float32') - - # Finally, perform a single update on the entire batch. We use a dummy target since - # the actual loss is computed in a Lambda layer that needs more complex input. However, - # it is still useful to know the actual target to compute metrics properly. - state0_batch = state0_batch.reshape((1,) + state0_batch.shape) - ins = [state0_batch] if type( - self.model.input) is not list else state0_batch - metrics = self.trainable_model.train_on_batch( - ins + [targets, masks], [dummy_targets, targets]) - metrics = [metric for idx, metric in enumerate( - metrics) if idx not in (1, 2)] # throw away individual losses - metrics += self.policy.metrics - if self.processor is not None: - metrics += self.processor.metrics - return metrics - - @property - def layers(self): - return self.model.layers[:] - - @property - def metrics_names(self): - # Throw away individual losses and replace output name since this is hidden from the user. - assert len(self.trainable_model.output_names) == 2 - dummy_output_name = self.trainable_model.output_names[1] - model_metrics = [name for idx, name in enumerate( - self.trainable_model.metrics_names) if idx not in (1, 2)] - model_metrics = [name.replace(dummy_output_name + '_', '') - for name in model_metrics] - - names = model_metrics + self.policy.metrics_names[:] - if self.processor is not None: - names += self.processor.metrics_names[:] - return names - - @property - def policy(self): - return self.__policy - - @policy.setter - def policy(self, policy): - self.__policy = policy - self.__policy._set_agent(self) - - @property - def test_policy(self): - return self.__test_policy - - @test_policy.setter - def test_policy(self, policy): - self.__test_policy = policy - self.__test_policy._set_agent(self) - - -# Aliases -SarsaAgent = SARSAAgent -from .misc_util import * -# Inspired from OpenAI Baselines -import gym -from rl.common.vec_env.subproc_env_vec import SubprocVecEnv -from rl.common import set_global_seeds - - -def make_gym_env(env_id, num_env=2, seed=123, wrapper_kwargs=None, start_index=0): - """ - Create a wrapped, SubprocVecEnv for Gym Environments. - """ - if wrapper_kwargs is None: - wrapper_kwargs = {} - - def make_env(rank): # pylint: disable=C0111 - def _thunk(): - env = gym.make(env_id) - env.seed(seed + rank) - return env - return _thunk - - set_global_seeds(seed) - return SubprocVecEnv([make_env(i + start_index) for i in range(num_env)]) -# Inspired from OpenAI Baselines - -import gym -import numpy as np -import random - - -def set_global_seeds(i): - np.random.seed(i) - random.seed(i) -import numpy as np - - -def tile_images(img_nhwc): - """ - Tile N images into one big PxQ image - (P,Q) are chosen to be as close as possible, and if N - is square, then P=Q. - input: img_nhwc, list or array of images, ndim=4 once turned into array - n = batch index, h = height, w = width, c = channel - returns: - bigim_HWc, ndarray with ndim=3 - """ - img_nhwc = np.asarray(img_nhwc) - N, h, w, c = img_nhwc.shape - H = int(np.ceil(np.sqrt(N))) - W = int(np.ceil(float(N)/H)) - img_nhwc = np.array( - list(img_nhwc) + [img_nhwc[0]*0 for _ in range(N, H*W)]) - img_HWhwc = img_nhwc.reshape(H, W, h, w, c) - img_HhWwc = img_HWhwc.transpose(0, 2, 1, 3, 4) - img_Hh_Ww_c = img_HhWwc.reshape(H*h, W*w, c) - return img_Hh_Ww_c -import random - -import numpy as np -import gym - -from keras.models import Sequential, Model -from keras.layers import Dense, Activation, Flatten, Input, Concatenate -from keras.optimizers import Adam - -from rl.agents import NAFAgent, DDPGAgent -from rl.random import OrnsteinUhlenbeckProcess -from rl.memory import SequentialMemory - - -def test_cdqn(): - # TODO: replace this with a simpler environment where we can actually test if it finds a solution - env = gym.make('Pendulum-v0') - np.random.seed(123) - env.seed(123) - random.seed(123) - nb_actions = env.action_space.shape[0] - - V_model = Sequential() - V_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) - V_model.add(Dense(16)) - V_model.add(Activation('relu')) - V_model.add(Dense(1)) - - mu_model = Sequential() - mu_model.add(Flatten(input_shape=(1,) + env.observation_space.shape)) - mu_model.add(Dense(16)) - mu_model.add(Activation('relu')) - mu_model.add(Dense(nb_actions)) - - action_input = Input(shape=(nb_actions,), name='action_input') - observation_input = Input( - shape=(1,) + env.observation_space.shape, name='observation_input') - x = Concatenate()([action_input, Flatten()(observation_input)]) - x = Dense(16)(x) - x = Activation('relu')(x) - x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) - L_model = Model(inputs=[action_input, observation_input], outputs=x) - - memory = SequentialMemory(limit=1000, window_length=1) - random_process = OrnsteinUhlenbeckProcess( - theta=.15, mu=0., sigma=.3, size=nb_actions) - agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, - memory=memory, nb_steps_warmup=50, random_process=random_process, - gamma=.99, target_model_update=1e-3) - agent.compile(Adam(lr=1e-3)) - - agent.fit(env, nb_steps=400, visualize=False, - verbose=0, nb_max_episode_steps=100) - h = agent.test(env, nb_episodes=2, visualize=False, - nb_max_episode_steps=100) - # TODO: evaluate history - - -def test_ddpg(): - # TODO: replace this with a simpler environment where we can actually test if it finds a solution - env = gym.make('Pendulum-v0') - np.random.seed(123) - env.seed(123) - random.seed(123) - nb_actions = env.action_space.shape[0] - - actor = Sequential() - actor.add(Flatten(input_shape=(1,) + env.observation_space.shape)) - actor.add(Dense(16)) - actor.add(Activation('relu')) - actor.add(Dense(nb_actions)) - actor.add(Activation('linear')) - - action_input = Input(shape=(nb_actions,), name='action_input') - observation_input = Input( - shape=(1,) + env.observation_space.shape, name='observation_input') - flattened_observation = Flatten()(observation_input) - x = Concatenate()([action_input, flattened_observation]) - x = Dense(16)(x) - x = Activation('relu')(x) - x = Dense(1)(x) - x = Activation('linear')(x) - critic = Model(inputs=[action_input, observation_input], outputs=x) - - memory = SequentialMemory(limit=1000, window_length=1) - random_process = OrnsteinUhlenbeckProcess(theta=.15, mu=0., sigma=.3) - agent = DDPGAgent(nb_actions=nb_actions, actor=actor, critic=critic, critic_action_input=action_input, - memory=memory, nb_steps_warmup_critic=50, nb_steps_warmup_actor=50, - random_process=random_process, gamma=.99, target_model_update=1e-3) - agent.compile([Adam(lr=1e-3), Adam(lr=1e-3)]) - - agent.fit(env, nb_steps=400, visualize=False, - verbose=0, nb_max_episode_steps=100) - h = agent.test(env, nb_episodes=2, visualize=False, - nb_max_episode_steps=100) - # TODO: evaluate history -import random - -import numpy as np -from numpy.testing import assert_allclose - -from keras.models import Sequential -from keras.layers import Dense, Activation -from keras.optimizers import Adam -from rl.agents import DQNAgent, CEMAgent, SARSAAgent -from rl.policy import EpsGreedyQPolicy -from rl.memory import SequentialMemory, EpisodeParameterMemory -from utils.gym.envs import TwoRoundDeterministicRewardEnv - - -def test_dqn(): - env = TwoRoundDeterministicRewardEnv() - np.random.seed(123) - env.seed(123) - random.seed(123) - nb_actions = env.action_space.n - - # Next, we build a very simple model. - model = Sequential() - model.add(Dense(16, input_shape=(1,))) - model.add(Activation('relu')) - model.add(Dense(nb_actions)) - model.add(Activation('linear')) - - memory = SequentialMemory(limit=1000, window_length=1) - policy = EpsGreedyQPolicy(eps=.1) - dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, - target_model_update=1e-1, policy=policy, enable_double_dqn=False) - dqn.compile(Adam(lr=1e-3)) - - dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) - policy.eps = 0. - h = dqn.test(env, nb_episodes=20, visualize=False) - assert_allclose(np.mean(h.history['episode_reward']), 3.) - - -def test_double_dqn(): - env = TwoRoundDeterministicRewardEnv() - np.random.seed(123) - env.seed(123) - random.seed(123) - nb_actions = env.action_space.n - - # Next, we build a very simple model. - model = Sequential() - model.add(Dense(16, input_shape=(1,))) - model.add(Activation('relu')) - model.add(Dense(nb_actions)) - model.add(Activation('linear')) - - memory = SequentialMemory(limit=1000, window_length=1) - policy = EpsGreedyQPolicy(eps=.1) - dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, - target_model_update=1e-1, policy=policy, enable_double_dqn=True) - dqn.compile(Adam(lr=1e-3)) - - dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) - policy.eps = 0. - h = dqn.test(env, nb_episodes=20, visualize=False) - assert_allclose(np.mean(h.history['episode_reward']), 3.) - - -def test_cem(): - env = TwoRoundDeterministicRewardEnv() - np.random.seed(123) - env.seed(123) - random.seed(123) - nb_actions = env.action_space.n - - # Next, we build a very simple model. - model = Sequential() - model.add(Dense(16, input_shape=(1,))) - model.add(Activation('relu')) - model.add(Dense(nb_actions)) - model.add(Activation('linear')) - - memory = EpisodeParameterMemory(limit=1000, window_length=1) - dqn = CEMAgent(model=model, nb_actions=nb_actions, memory=memory) - dqn.compile() - - dqn.fit(env, nb_steps=2000, visualize=False, verbose=1) - h = dqn.test(env, nb_episodes=20, visualize=False) - assert_allclose(np.mean(h.history['episode_reward']), 3.) - - -def test_duel_dqn(): - env = TwoRoundDeterministicRewardEnv() - np.random.seed(123) - env.seed(123) - random.seed(123) - nb_actions = env.action_space.n - - # Next, we build a very simple model. - model = Sequential() - model.add(Dense(16, input_shape=(1,))) - model.add(Activation('relu')) - model.add(Dense(nb_actions, activation='linear')) - - memory = SequentialMemory(limit=1000, window_length=1) - policy = EpsGreedyQPolicy(eps=.1) - dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=50, - target_model_update=1e-1, policy=policy, enable_double_dqn=False, enable_dueling_network=True) - dqn.compile(Adam(lr=1e-3)) - - dqn.fit(env, nb_steps=2000, visualize=False, verbose=0) - policy.eps = 0. - h = dqn.test(env, nb_episodes=20, visualize=False) - assert_allclose(np.mean(h.history['episode_reward']), 3.) - - -def test_sarsa(): - env = TwoRoundDeterministicRewardEnv() - np.random.seed(123) - env.seed(123) - random.seed(123) - nb_actions = env.action_space.n - - # Next, we build a very simple model. - model = Sequential() - model.add(Dense(16, input_shape=(1,))) - model.add(Activation('relu')) - model.add(Dense(nb_actions, activation='linear')) - - policy = EpsGreedyQPolicy(eps=.1) - sarsa = SARSAAgent(model=model, nb_actions=nb_actions, - nb_steps_warmup=50, policy=policy) - sarsa.compile(Adam(lr=1e-3)) - - sarsa.fit(env, nb_steps=20000, visualize=False, verbose=0) - policy.eps = 0. - h = sarsa.test(env, nb_episodes=20, visualize=False) - assert_allclose(np.mean(h.history['episode_reward']), 3.) -from __future__ import division -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from rl.memory import SequentialMemory -from rl.core import Agent, Env, Processor - - -class TestEnv(Env): - def __init__(self): - super(TestEnv, self).__init__() - - def step(self, action): - self.state += 1 - done = self.state >= 6 - reward = float(self.state) / 10. - return np.array(self.state), reward, done, {} - - def reset(self): - self.state = 1 - return np.array(self.state) - - def seed(self, seed=None): - pass - - def configure(self, *args, **kwargs): - pass - - -class TestAgent(Agent): - def __init__(self, memory, **kwargs): - super(TestAgent, self).__init__(**kwargs) - self.memory = memory - - def forward(self, observation): - action = observation - self.recent_action = action - self.recent_observation = observation - return action - - def backward(self, reward, terminal): - metrics = [np.nan for _ in self.metrics_names] - self.memory.append(self.recent_observation, - self.recent_action, reward, terminal) - return metrics - - def compile(self): - self.compiled = True - - -def test_fit_observations(): - memory = SequentialMemory(100, window_length=2, - ignore_episode_boundaries=False) - agent = TestAgent(memory) - env = TestEnv() - agent.compile() - agent.fit(env, 20, verbose=0) - - # Inspect memory to see if observations are correct. - experiencies = memory.sample(batch_size=6, batch_idxs=range(2, 8)) - - assert experiencies[0].reward == .4 - assert experiencies[0].action == 3 - assert_allclose(experiencies[0].state0, np.array([2, 3])) - assert_allclose(experiencies[0].state1, np.array([3, 4])) - assert experiencies[0].terminal1 is False - - assert experiencies[1].reward == .5 - assert experiencies[1].action == 4 - assert_allclose(experiencies[1].state0, np.array([3, 4])) - assert_allclose(experiencies[1].state1, np.array([4, 5])) - assert experiencies[1].terminal1 is False - - assert experiencies[2].reward == .6 - assert experiencies[2].action == 5 - assert_allclose(experiencies[2].state0, np.array([4, 5])) - assert_allclose(experiencies[2].state1, np.array([5, 6])) - assert experiencies[2].terminal1 is True - - # Experience 3 has been re-sampled since since state0 would be terminal in which case we - # cannot really have a meaningful transition because the environment gets reset. We thus - # just ensure that state0 is not terminal. - assert not np.all(experiencies[3].state0 == np.array([5, 6])) - - assert experiencies[4].reward == .2 - assert experiencies[4].action == 1 - assert_allclose(experiencies[4].state0, np.array([0, 1])) - assert_allclose(experiencies[4].state1, np.array([1, 2])) - assert experiencies[4].terminal1 is False - - assert experiencies[5].reward == .3 - assert experiencies[5].action == 2 - assert_allclose(experiencies[5].state0, np.array([1, 2])) - assert_allclose(experiencies[5].state1, np.array([2, 3])) - assert experiencies[5].terminal1 is False - - -def test_copy_observations(): - methods = [ - 'fit', - 'test', - ] - - for method in methods: - original_observations = [] - - class LocalEnv(Env): - def __init__(self): - super(LocalEnv, self).__init__() - - def step(self, action): - self.state += 1 - done = self.state >= 6 - reward = float(self.state) / 10. - obs = np.array(self.state) - original_observations.append(obs) - return obs, reward, done, {} - - def reset(self): - self.state = 1 - return np.array(self.state) - - def seed(self, seed=None): - pass - - def configure(self, *args, **kwargs): - pass - - # Slight abuse of the processor for test purposes. - observations = [] - - class LocalProcessor(Processor): - def process_step(self, observation, reward, done, info): - observations.append(observation) - return observation, reward, done, info - - processor = LocalProcessor() - memory = SequentialMemory(100, window_length=1) - agent = TestAgent(memory, processor=processor) - env = LocalEnv() - agent.compile() - getattr(agent, method)(env, 20, verbose=0, visualize=False) - - assert len(observations) == len(original_observations) - assert_allclose(np.array(observations), - np.array(original_observations)) - assert np.all([o is not o_ for o, o_ in zip( - original_observations, observations)]) - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import division -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from rl.memory import SequentialMemory, RingBuffer - - -def test_ring_buffer(): - def assert_elements(b, ref): - assert len(b) == len(ref) - for idx in range(b.maxlen): - if idx >= len(ref): - with pytest.raises(KeyError): - b[idx] - else: - assert b[idx] == ref[idx] - - b = RingBuffer(5) - - # Fill buffer. - assert_elements(b, []) - b.append(1) - assert_elements(b, [1]) - b.append(2) - assert_elements(b, [1, 2]) - b.append(3) - assert_elements(b, [1, 2, 3]) - b.append(4) - assert_elements(b, [1, 2, 3, 4]) - b.append(5) - assert_elements(b, [1, 2, 3, 4, 5]) - - # Add couple more items with buffer at limit. - b.append(6) - assert_elements(b, [2, 3, 4, 5, 6]) - b.append(7) - assert_elements(b, [3, 4, 5, 6, 7]) - b.append(8) - assert_elements(b, [4, 5, 6, 7, 8]) - - -def test_get_recent_state_with_episode_boundaries(): - memory = SequentialMemory( - 3, window_length=2, ignore_episode_boundaries=False) - obs_size = (3, 4) - - obs0 = np.random.random(obs_size) - terminal0 = False - - obs1 = np.random.random(obs_size) - terminal1 = False - - obs2 = np.random.random(obs_size) - terminal2 = False - - obs3 = np.random.random(obs_size) - terminal3 = True - - obs4 = np.random.random(obs_size) - terminal4 = False - - obs5 = np.random.random(obs_size) - terminal5 = True - - obs6 = np.random.random(obs_size) - terminal6 = False - - state = np.array(memory.get_recent_state(obs0)) - assert state.shape == (2,) + obs_size - assert np.allclose(state[0], 0.) - assert np.all(state[1] == obs0) - - # memory.append takes the current observation, the reward after taking an action and if - # the *new* observation is terminal, thus `obs0` and `terminal1` is correct. - memory.append(obs0, 0, 0., terminal1) - state = np.array(memory.get_recent_state(obs1)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs0) - assert np.all(state[1] == obs1) - - memory.append(obs1, 0, 0., terminal2) - state = np.array(memory.get_recent_state(obs2)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs1) - assert np.all(state[1] == obs2) - - memory.append(obs2, 0, 0., terminal3) - state = np.array(memory.get_recent_state(obs3)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs2) - assert np.all(state[1] == obs3) - - memory.append(obs3, 0, 0., terminal4) - state = np.array(memory.get_recent_state(obs4)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == np.zeros(obs_size)) - assert np.all(state[1] == obs4) - - memory.append(obs4, 0, 0., terminal5) - state = np.array(memory.get_recent_state(obs5)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs4) - assert np.all(state[1] == obs5) - - memory.append(obs5, 0, 0., terminal6) - state = np.array(memory.get_recent_state(obs6)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == np.zeros(obs_size)) - assert np.all(state[1] == obs6) - - -def test_training_flag(): - obs_size = (3, 4) - - obs0 = np.random.random(obs_size) - terminal0 = False - - obs1 = np.random.random(obs_size) - terminal1 = True - - obs2 = np.random.random(obs_size) - terminal2 = False - - for training in (True, False): - memory = SequentialMemory(3, window_length=2) - - state = np.array(memory.get_recent_state(obs0)) - assert state.shape == (2,) + obs_size - assert np.allclose(state[0], 0.) - assert np.all(state[1] == obs0) - assert memory.nb_entries == 0 - - memory.append(obs0, 0, 0., terminal1, training=training) - state = np.array(memory.get_recent_state(obs1)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs0) - assert np.all(state[1] == obs1) - if training: - assert memory.nb_entries == 1 - else: - assert memory.nb_entries == 0 - - memory.append(obs1, 0, 0., terminal2, training=training) - state = np.array(memory.get_recent_state(obs2)) - assert state.shape == (2,) + obs_size - assert np.allclose(state[0], 0.) - assert np.all(state[1] == obs2) - if training: - assert memory.nb_entries == 2 - else: - assert memory.nb_entries == 0 - - -def test_get_recent_state_without_episode_boundaries(): - memory = SequentialMemory( - 3, window_length=2, ignore_episode_boundaries=True) - obs_size = (3, 4) - - obs0 = np.random.random(obs_size) - terminal0 = False - - obs1 = np.random.random(obs_size) - terminal1 = False - - obs2 = np.random.random(obs_size) - terminal2 = False - - obs3 = np.random.random(obs_size) - terminal3 = True - - obs4 = np.random.random(obs_size) - terminal4 = False - - obs5 = np.random.random(obs_size) - terminal5 = True - - obs6 = np.random.random(obs_size) - terminal6 = False - - state = np.array(memory.get_recent_state(obs0)) - assert state.shape == (2,) + obs_size - assert np.allclose(state[0], 0.) - assert np.all(state[1] == obs0) - - # memory.append takes the current observation, the reward after taking an action and if - # the *new* observation is terminal, thus `obs0` and `terminal1` is correct. - memory.append(obs0, 0, 0., terminal1) - state = np.array(memory.get_recent_state(obs1)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs0) - assert np.all(state[1] == obs1) - - memory.append(obs1, 0, 0., terminal2) - state = np.array(memory.get_recent_state(obs2)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs1) - assert np.all(state[1] == obs2) - - memory.append(obs2, 0, 0., terminal3) - state = np.array(memory.get_recent_state(obs3)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs2) - assert np.all(state[1] == obs3) - - memory.append(obs3, 0, 0., terminal4) - state = np.array(memory.get_recent_state(obs4)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs3) - assert np.all(state[1] == obs4) - - memory.append(obs4, 0, 0., terminal5) - state = np.array(memory.get_recent_state(obs5)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs4) - assert np.all(state[1] == obs5) - - memory.append(obs5, 0, 0., terminal6) - state = np.array(memory.get_recent_state(obs6)) - assert state.shape == (2,) + obs_size - assert np.all(state[0] == obs5) - assert np.all(state[1] == obs6) - - -def test_sampling(): - memory = SequentialMemory(100, window_length=2, - ignore_episode_boundaries=False) - obs_size = (3, 4) - actions = range(5) - - obs0 = np.random.random(obs_size) - terminal0 = False - action0 = np.random.choice(actions) - reward0 = np.random.random() - - obs1 = np.random.random(obs_size) - terminal1 = False - action1 = np.random.choice(actions) - reward1 = np.random.random() - - obs2 = np.random.random(obs_size) - terminal2 = False - action2 = np.random.choice(actions) - reward2 = np.random.random() - - obs3 = np.random.random(obs_size) - terminal3 = True - action3 = np.random.choice(actions) - reward3 = np.random.random() - - obs4 = np.random.random(obs_size) - terminal4 = False - action4 = np.random.choice(actions) - reward4 = np.random.random() - - obs5 = np.random.random(obs_size) - terminal5 = False - action5 = np.random.choice(actions) - reward5 = np.random.random() - - obs6 = np.random.random(obs_size) - terminal6 = False - action6 = np.random.choice(actions) - reward6 = np.random.random() - - # memory.append takes the current observation, the reward after taking an action and if - # the *new* observation is terminal, thus `obs0` and `terminal1` is correct. - memory.append(obs0, action0, reward0, terminal1) - memory.append(obs1, action1, reward1, terminal2) - memory.append(obs2, action2, reward2, terminal3) - memory.append(obs3, action3, reward3, terminal4) - memory.append(obs4, action4, reward4, terminal5) - memory.append(obs5, action5, reward5, terminal6) - assert memory.nb_entries == 6 - - experiences = memory.sample(batch_size=3, batch_idxs=[2, 3, 4]) - assert len(experiences) == 3 - - assert_allclose(experiences[0].state0, np.array([obs1, obs2])) - assert_allclose(experiences[0].state1, np.array([obs2, obs3])) - assert experiences[0].action == action2 - assert experiences[0].reward == reward2 - assert experiences[0].terminal1 is True - - # Next experience has been re-sampled since since state0 would be terminal in which case we - # cannot really have a meaningful transition because the environment gets reset. We thus - # just ensure that state0 is not terminal. - assert not np.all(experiences[1].state0 == np.array([obs2, obs3])) - - assert_allclose(experiences[2].state0, - np.array([np.zeros(obs_size), obs4])) - assert_allclose(experiences[2].state1, np.array([obs4, obs5])) - assert experiences[2].action == action4 - assert experiences[2].reward == reward4 - assert experiences[2].terminal1 is False - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import division -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras.models import Model, Sequential -from keras.layers import Input, Dense, Concatenate -from keras.optimizers import SGD -import keras.backend as K - -from rl.util import clone_optimizer, clone_model, huber_loss, WhiteningNormalizer - - -def test_clone_sequential_model(): - seq = Sequential() - seq.add(Dense(8, input_shape=(3,))) - seq.compile(optimizer='sgd', loss='mse') - - clone = clone_model(seq) - clone.compile(optimizer='sgd', loss='mse') - - ins = np.random.random((4, 3)) - y_pred_seq = seq.predict_on_batch(ins) - y_pred_clone = clone.predict_on_batch(ins) - assert y_pred_seq.shape == y_pred_clone.shape - assert_allclose(y_pred_seq, y_pred_clone) - - -def test_clone_graph_model(): - in1 = Input(shape=(2,)) - in2 = Input(shape=(3,)) - x = Dense(8)(Concatenate()([in1, in2])) - graph = Model([in1, in2], x) - graph.compile(optimizer='sgd', loss='mse') - - clone = clone_model(graph) - clone.compile(optimizer='sgd', loss='mse') - - ins = [np.random.random((4, 2)), np.random.random((4, 3))] - y_pred_graph = graph.predict_on_batch(ins) - y_pred_clone = clone.predict_on_batch(ins) - assert y_pred_graph.shape == y_pred_clone.shape - assert_allclose(y_pred_graph, y_pred_clone) - - -def test_clone_optimizer(): - lr, momentum, clipnorm, clipvalue = np.random.random(size=4) - optimizer = SGD(lr=lr, momentum=momentum, - clipnorm=clipnorm, clipvalue=clipvalue) - clone = clone_optimizer(optimizer) - - assert isinstance(clone, SGD) - assert K.get_value(optimizer.lr) == K.get_value(clone.lr) - assert K.get_value(optimizer.momentum) == K.get_value(clone.momentum) - assert optimizer.clipnorm == clone.clipnorm - assert optimizer.clipvalue == clone.clipvalue - - -def test_clone_optimizer_from_string(): - clone = clone_optimizer('sgd') - assert isinstance(clone, SGD) - - -def test_huber_loss(): - a = np.array([1., 1.5, 2., 4.]) - b = np.array([1.5, 1., 4., 2.]) - assert_allclose(K.eval(huber_loss(a, b, 1.)), - np.array([.125, .125, 1.5, 1.5])) - assert_allclose(K.eval(huber_loss(a, b, 3.)), - np.array([.125, .125, 2., 2.])) - assert_allclose(K.eval(huber_loss(a, b, np.inf)), - np.array([.125, .125, 2., 2.])) - - -def test_whitening_normalizer(): - x = np.random.normal(loc=.2, scale=2., size=(1000, 5)) - normalizer = WhiteningNormalizer(shape=(5,)) - normalizer.update(x[:500]) - normalizer.update(x[500:]) - - assert_allclose(normalizer.mean, np.mean(x, axis=0)) - assert_allclose(normalizer.std, np.std(x, axis=0)) - - x_norm = normalizer.normalize(x) - assert_allclose(np.mean(x_norm, axis=0), np.zeros( - 5, dtype=normalizer.dtype), atol=1e-5) - assert_allclose(np.std(x_norm, axis=0), np.ones( - 5, dtype=normalizer.dtype), atol=1e-5) - - x_denorm = normalizer.denormalize(x_norm) - assert_allclose(x_denorm, x) - - -if __name__ == '__main__': - pytest.main([__file__]) -import numpy as np -import random - -from rl.core import Env - - -class MultiInputTestEnv(Env): - def __init__(self, observation_shape): - self.observation_shape = observation_shape - - def step(self, action): - return self._get_obs(), random.choice([0, 1]), random.choice([True, False]), {} - - def reset(self): - return self._get_obs() - - def _get_obs(self): - if type(self.observation_shape) is list: - return [np.random.random(s) for s in self.observation_shape] - else: - return np.random.random(self.observation_shape) - - def __del__(self): - pass -import numpy - -np_random = numpy.random.RandomState() - - -def seed(seed=None): - """Seed the common numpy.random.RandomState used in spaces - - CF - https://github.com/openai/gym/commit/58e6aa95e5af2c738557431f812abb81c505a7cf#commitcomment-17669277 - for some details about why we seed the spaces separately from the - envs, but tl;dr is that it's pretty uncommon for them to be used - within an actual algorithm, and the code becomes simpler to just - use this common numpy.random.RandomState. - """ - np_random.seed(seed) - - -# This numpy.random.RandomState gets used in all spaces for their -# 'sample' method. It's not really expected that people will be using -# these in their algorithms. -seed(0) -# Inspired from VecEnv from OpenAI Baselines - - -class VecEnv(object): - """ - An abstract asynchronous, vectorized environment. - """ - - def __init__(self, num_envs, observation_space, action_space): - self.num_envs = num_envs - self.observation_space = observation_space - self.action_space = action_space - - def reset(self): - """ - Reset all the environments and return an array of - observations, or a tuple of observation arrays. - If step_async is still doing work, that work will - be cancelled and step_wait() should not be called - until step_async() is invoked again. - """ - pass - - def step_async(self, actions): - """ - Tell all the environments to start taking a step - with the given actions. - Call step_wait() to get the results of the step. - You should not call this if a step_async run is - already pending. - """ - raise NotImplementedError() - - def step_wait(self): - """ - Wait for the step taken with step_async(). - Returns (obs, rews, dones, infos): - - obs: an array of observations, or a tuple of - arrays of observations. - - rews: an array of rewards - - dones: an array of "episode done" booleans - - infos: a sequence of info objects - """ - raise NotImplementedError() - - def close(self): - """ - Clean up the environments' resources. - """ - raise NotImplementedError() - - def step(self, actions): - self.step_async(actions) - return self.step_wait() - - def render(self, mode='human'): - logger.warn('Render not defined for %s' % self) - - def seed(self, i): - raise NotImplementedError() - - @property - def unwrapped(self): - if isinstance(self, VecEnvWrapper): - return self.venv.unwrapped - else: - return self - - -class CloudpickleWrapper(object): - """ - Uses cloudpickle to serialize contents (otherwise multiprocessing tries to use pickle) - """ - - def __init__(self, x): - self.x = x - - def __getstate__(self): - import cloudpickle - return cloudpickle.dumps(self.x) - - def __setstate__(self, ob): - import pickle - self.x = pickle.loads(ob) -# Inspired from OpenAI Baselines - -import numpy as np -from multiprocessing import Process, Pipe -from rl.common.vec_env import VecEnv, CloudpickleWrapper -from rl.common.tile_images import tile_images - - -def worker(remote, parent_remote, env_fn_wrapper): - parent_remote.close() - env = env_fn_wrapper.x() - while True: - cmd, data = remote.recv() - if cmd == 'step': - ob, reward, done, info = env.step(data) - if done: - ob = env.reset() - remote.send((ob, reward, done, info)) - elif cmd == 'reset': - ob = env.reset() - remote.send(ob) - elif cmd == 'render': - remote.send(env.render(mode='rgb_array')) - elif cmd == 'close': - remote.close() - break - elif cmd == 'get_spaces': - remote.send((env.observation_space, env.action_space)) - elif cmd == 'seed': - val = env.seed(data) - remote.send(val) - else: - raise NotImplementedError - - -class SubprocVecEnv(VecEnv): - def __init__(self, env_fns, spaces=None): - """ - envs: list of gym environments to run in subprocesses - """ - self.waiting = False - self.closed = False - nenvs = len(env_fns) - self.remotes, self.work_remotes = zip(*[Pipe() for _ in range(nenvs)]) - self.ps = [Process(target=worker, args=(work_remote, remote, CloudpickleWrapper(env_fn))) - for (work_remote, remote, env_fn) in zip(self.work_remotes, self.remotes, env_fns)] - for p in self.ps: - p.daemon = True # if the main process crashes, we should not cause things to hang - p.start() - for remote in self.work_remotes: - remote.close() - - self.remotes[0].send(('get_spaces', None)) - observation_space, action_space = self.remotes[0].recv() - VecEnv.__init__(self, len(env_fns), observation_space, action_space) - - def step_async(self, actions): - for remote, action in zip(self.remotes, actions): - remote.send(('step', action)) - self.waiting = True - - def step_wait(self): - results = [remote.recv() for remote in self.remotes] - self.waiting = False - obs, rews, dones, infos = zip(*results) - return np.stack(obs), np.stack(rews), np.stack(dones), infos - - def reset(self): - for remote in self.remotes: - remote.send(('reset', None)) - return np.stack([remote.recv() for remote in self.remotes]) - - def reset_task(self): - for remote in self.remotes: - remote.send(('reset_task', None)) - return np.stack([remote.recv() for remote in self.remotes]) - - def close(self): - if self.closed: - return - if self.waiting: - for remote in self.remotes: - remote.recv() - for remote in self.remotes: - remote.send(('close', None)) - for p in self.ps: - p.join() - self.closed = True - - def render(self, mode='human'): - raise NotImplementedError( - 'Render is not implemented for Synchronous Environment') - - def seed(self, i): - rank = i - for remote in self.remotes: - remote.send(('seed', rank)) - rank += 1 -from __future__ import division -from __future__ import absolute_import - -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras.models import Model, Sequential -from keras.layers import Input, Dense, Flatten, Concatenate - -from rl.agents.cem import CEMAgent -from rl.memory import EpisodeParameterMemory -from rl.processors import MultiInputProcessor - -from ..util import MultiInputTestEnv - - -def test_single_cem_input(): - model = Sequential() - model.add(Flatten(input_shape=(2, 3))) - model.add(Dense(2)) - - memory = EpisodeParameterMemory(limit=10, window_length=2) - agent = CEMAgent(model, memory=memory, nb_actions=2, - nb_steps_warmup=5, batch_size=4, train_interval=50) - agent.compile() - agent.fit(MultiInputTestEnv((3,)), nb_steps=100) - - -def test_multi_cem_input(): - input1 = Input(shape=(2, 3)) - input2 = Input(shape=(2, 4)) - x = Concatenate()([input1, input2]) - x = Flatten()(x) - x = Dense(2)(x) - model = Model(inputs=[input1, input2], outputs=x) - - memory = EpisodeParameterMemory(limit=10, window_length=2) - processor = MultiInputProcessor(nb_inputs=2) - agent = CEMAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, - processor=processor, train_interval=50) - agent.compile() - agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=100) -from __future__ import division -from __future__ import absolute_import - -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras.models import Model, Sequential -from keras.layers import Input, Dense, Flatten, Concatenate - -from rl.agents.ddpg import DDPGAgent -from rl.memory import SequentialMemory -from rl.processors import MultiInputProcessor - -from ..util import MultiInputTestEnv - - -def test_single_ddpg_input(): - nb_actions = 2 - - actor = Sequential() - actor.add(Flatten(input_shape=(2, 3))) - actor.add(Dense(nb_actions)) - - action_input = Input(shape=(nb_actions,), name='action_input') - observation_input = Input(shape=(2, 3), name='observation_input') - x = Concatenate()([action_input, Flatten()(observation_input)]) - x = Dense(1)(x) - critic = Model(inputs=[action_input, observation_input], outputs=x) - - memory = SequentialMemory(limit=10, window_length=2) - agent = DDPGAgent(actor=actor, critic=critic, critic_action_input=action_input, memory=memory, - nb_actions=2, nb_steps_warmup_critic=5, nb_steps_warmup_actor=5, batch_size=4) - agent.compile('sgd') - agent.fit(MultiInputTestEnv((3,)), nb_steps=10) - - -def test_multi_ddpg_input(): - nb_actions = 2 - - actor_observation_input1 = Input( - shape=(2, 3), name='actor_observation_input1') - actor_observation_input2 = Input( - shape=(2, 4), name='actor_observation_input2') - actor = Sequential() - x = Concatenate()([actor_observation_input1, actor_observation_input2]) - x = Flatten()(x) - x = Dense(nb_actions)(x) - actor = Model(inputs=[actor_observation_input1, - actor_observation_input2], outputs=x) - - action_input = Input(shape=(nb_actions,), name='action_input') - critic_observation_input1 = Input( - shape=(2, 3), name='critic_observation_input1') - critic_observation_input2 = Input( - shape=(2, 4), name='critic_observation_input2') - x = Concatenate()([critic_observation_input1, critic_observation_input2]) - x = Concatenate()([action_input, Flatten()(x)]) - x = Dense(1)(x) - critic = Model(inputs=[ - action_input, critic_observation_input1, critic_observation_input2], outputs=x) - - processor = MultiInputProcessor(nb_inputs=2) - memory = SequentialMemory(limit=10, window_length=2) - agent = DDPGAgent(actor=actor, critic=critic, critic_action_input=action_input, memory=memory, - nb_actions=2, nb_steps_warmup_critic=5, nb_steps_warmup_actor=5, batch_size=4, - processor=processor) - agent.compile('sgd') - agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10) -from __future__ import division -from __future__ import absolute_import - -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras.models import Sequential, Model -from keras.layers import Input, Dense, Flatten, Concatenate - -from rl.agents.dqn import NAFLayer, DQNAgent, NAFAgent -from rl.memory import SequentialMemory -from rl.processors import MultiInputProcessor - -from ..util import MultiInputTestEnv - - -def test_single_dqn_input(): - model = Sequential() - model.add(Flatten(input_shape=(2, 3))) - model.add(Dense(2)) - - memory = SequentialMemory(limit=10, window_length=2) - for double_dqn in (True, False): - agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, - enable_double_dqn=double_dqn) - agent.compile('sgd') - agent.fit(MultiInputTestEnv((3,)), nb_steps=10) - - -def test_multi_dqn_input(): - input1 = Input(shape=(2, 3)) - input2 = Input(shape=(2, 4)) - x = Concatenate()([input1, input2]) - x = Flatten()(x) - x = Dense(2)(x) - model = Model(inputs=[input1, input2], outputs=x) - - memory = SequentialMemory(limit=10, window_length=2) - processor = MultiInputProcessor(nb_inputs=2) - for double_dqn in (True, False): - agent = DQNAgent(model, memory=memory, nb_actions=2, nb_steps_warmup=5, batch_size=4, - processor=processor, enable_double_dqn=double_dqn) - agent.compile('sgd') - agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10) - - -def test_single_continuous_dqn_input(): - nb_actions = 2 - - V_model = Sequential() - V_model.add(Flatten(input_shape=(2, 3))) - V_model.add(Dense(1)) - - mu_model = Sequential() - mu_model.add(Flatten(input_shape=(2, 3))) - mu_model.add(Dense(nb_actions)) - - L_input = Input(shape=(2, 3)) - L_input_action = Input(shape=(nb_actions,)) - x = Concatenate()([Flatten()(L_input), L_input_action]) - x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) - L_model = Model(inputs=[L_input_action, L_input], outputs=x) - - memory = SequentialMemory(limit=10, window_length=2) - agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, - memory=memory, nb_steps_warmup=5, batch_size=4) - agent.compile('sgd') - agent.fit(MultiInputTestEnv((3,)), nb_steps=10) - - -def test_multi_continuous_dqn_input(): - nb_actions = 2 - - V_input1 = Input(shape=(2, 3)) - V_input2 = Input(shape=(2, 4)) - x = Concatenate()([V_input1, V_input2]) - x = Flatten()(x) - x = Dense(1)(x) - V_model = Model(inputs=[V_input1, V_input2], outputs=x) - - mu_input1 = Input(shape=(2, 3)) - mu_input2 = Input(shape=(2, 4)) - x = Concatenate()([mu_input1, mu_input2]) - x = Flatten()(x) - x = Dense(nb_actions)(x) - mu_model = Model(inputs=[mu_input1, mu_input2], outputs=x) - - L_input1 = Input(shape=(2, 3)) - L_input2 = Input(shape=(2, 4)) - L_input_action = Input(shape=(nb_actions,)) - x = Concatenate()([L_input1, L_input2]) - x = Concatenate()([Flatten()(x), L_input_action]) - x = Dense(((nb_actions * nb_actions + nb_actions) // 2))(x) - L_model = Model(inputs=[L_input_action, L_input1, L_input2], outputs=x) - - memory = SequentialMemory(limit=10, window_length=2) - processor = MultiInputProcessor(nb_inputs=2) - agent = NAFAgent(nb_actions=nb_actions, V_model=V_model, L_model=L_model, mu_model=mu_model, - memory=memory, nb_steps_warmup=5, batch_size=4, processor=processor) - agent.compile('sgd') - agent.fit(MultiInputTestEnv([(3,), (4,)]), nb_steps=10) - - -def test_naf_layer_full(): - batch_size = 2 - for nb_actions in (1, 3): - # Construct single model with NAF as the only layer, hence it is fully deterministic - # since no weights are used, which would be randomly initialized. - L_flat_input = Input( - shape=((nb_actions * nb_actions + nb_actions) // 2,)) - mu_input = Input(shape=(nb_actions,)) - action_input = Input(shape=(nb_actions,)) - x = NAFLayer(nb_actions, mode='full')( - [L_flat_input, mu_input, action_input]) - model = Model(inputs=[L_flat_input, mu_input, action_input], outputs=x) - model.compile(loss='mse', optimizer='sgd') - - # Create random test data. - L_flat = np.random.random( - (batch_size, (nb_actions * nb_actions + nb_actions) // 2)).astype('float32') - mu = np.random.random((batch_size, nb_actions)).astype('float32') - action = np.random.random((batch_size, nb_actions)).astype('float32') - - # Perform reference computations in numpy since these are much easier to verify. - L = np.zeros((batch_size, nb_actions, nb_actions)).astype('float32') - LT = np.copy(L) - for l, l_T, l_flat in zip(L, LT, L_flat): - l[np.tril_indices(nb_actions)] = l_flat - l[np.diag_indices(nb_actions)] = np.exp( - l[np.diag_indices(nb_actions)]) - l_T[:, :] = l.T - P = np.array([np.dot(l, l_T) - for l, l_T in zip(L, LT)]).astype('float32') - A_ref = np.array([np.dot(np.dot(a - m, p), a - m) - for a, m, p in zip(action, mu, P)]).astype('float32') - A_ref *= -.5 - - # Finally, compute the output of the net, which should be identical to the previously - # computed reference. - A_net = model.predict([L_flat, mu, action]).flatten() - assert_allclose(A_net, A_ref, rtol=1e-5) - - -def test_naf_layer_diag(): - batch_size = 2 - for nb_actions in (1, 3): - # Construct single model with NAF as the only layer, hence it is fully deterministic - # since no weights are used, which would be randomly initialized. - L_flat_input = Input(shape=(nb_actions,)) - mu_input = Input(shape=(nb_actions,)) - action_input = Input(shape=(nb_actions,)) - x = NAFLayer(nb_actions, mode='diag')( - [L_flat_input, mu_input, action_input]) - model = Model(inputs=[L_flat_input, mu_input, action_input], outputs=x) - model.compile(loss='mse', optimizer='sgd') - - # Create random test data. - L_flat = np.random.random((batch_size, nb_actions)).astype('float32') - mu = np.random.random((batch_size, nb_actions)).astype('float32') - action = np.random.random((batch_size, nb_actions)).astype('float32') - - # Perform reference computations in numpy since these are much easier to verify. - P = np.zeros((batch_size, nb_actions, nb_actions)).astype('float32') - for p, l_flat in zip(P, L_flat): - p[np.diag_indices(nb_actions)] = l_flat - print(P, L_flat) - A_ref = np.array([np.dot(np.dot(a - m, p), a - m) - for a, m, p in zip(action, mu, P)]).astype('float32') - A_ref *= -.5 - - # Finally, compute the output of the net, which should be identical to the previously - # computed reference. - A_net = model.predict([L_flat, mu, action]).flatten() - assert_allclose(A_net, A_ref, rtol=1e-5) - - -if __name__ == '__main__': - pytest.main([__file__]) -from .twoRoundDeterministicRewardEnv import TwoRoundDeterministicRewardEnv -import gym -from ..spaces import Discrete - - -class TwoRoundDeterministicRewardEnv(gym.Env): - def __init__(self): - self.action_space = Discrete(2) - self.observation_space = Discrete(3) - self.reset() - - def step(self, action): - rewards = [[0, 3], [1, 2]] - - assert self.action_space.contains(action) - - if self.firstAction is None: - self.firstAction = action - reward = 0 - done = False - else: - reward = rewards[self.firstAction][action] - done = True - - return self.get_obs(), reward, done, {} - - def get_obs(self): - if self.firstAction is None: - return 2 - else: - return self.firstAction - - def reset(self): - self.firstAction = None - return self.get_obs() -from .discrete import Discrete -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GaussianNoise -from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D -from keras.layers import MaxPooling2D, merge -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -from keras import losses -from keras.utils import to_categorical -import keras.backend as K - -import matplotlib.pyplot as plt - -import numpy as np - - -class AdversarialAutoencoder(): - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.latent_dim = 10 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss='binary_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - - # Build the encoder / decoder - self.encoder = self.build_encoder() - self.decoder = self.build_decoder() - - img = Input(shape=self.img_shape) - # The generator takes the image, encodes it and reconstructs it - # from the encoding - encoded_repr = self.encoder(img) - reconstructed_img = self.decoder(encoded_repr) - - # For the adversarial_autoencoder model we will only train the generator - self.discriminator.trainable = False - - # The discriminator determines validity of the encoding - validity = self.discriminator(encoded_repr) - - # The adversarial_autoencoder model (stacked generator and discriminator) - self.adversarial_autoencoder = Model( - img, [reconstructed_img, validity]) - self.adversarial_autoencoder.compile(loss=['mse', 'binary_crossentropy'], - loss_weights=[0.999, 0.001], - optimizer=optimizer) - - def build_encoder(self): - # Encoder - - img = Input(shape=self.img_shape) - - h = Flatten()(img) - h = Dense(512)(h) - h = LeakyReLU(alpha=0.2)(h) - h = Dense(512)(h) - h = LeakyReLU(alpha=0.2)(h) - mu = Dense(self.latent_dim)(h) - log_var = Dense(self.latent_dim)(h) - latent_repr = merge([mu, log_var], - mode=lambda p: p[0] + - K.random_normal(K.shape(p[0])) * K.exp(p[1] / 2), - output_shape=lambda p: p[0]) - - return Model(img, latent_repr) - - def build_decoder(self): - - model = Sequential() - - model.add(Dense(512, input_dim=self.latent_dim)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(np.prod(self.img_shape), activation='tanh')) - model.add(Reshape(self.img_shape)) - - model.summary() - - z = Input(shape=(self.latent_dim,)) - img = model(z) - - return Model(z, img) - - def build_discriminator(self): - - model = Sequential() - - model.add(Dense(512, input_dim=self.latent_dim)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(256)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(1, activation="sigmoid")) - model.summary() - - encoded_repr = Input(shape=(self.latent_dim, )) - validity = model(encoded_repr) - - return Model(encoded_repr, validity) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, _), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - - latent_fake = self.encoder.predict(imgs) - latent_real = np.random.normal(size=(batch_size, self.latent_dim)) - - # Train the discriminator - d_loss_real = self.discriminator.train_on_batch(latent_real, valid) - d_loss_fake = self.discriminator.train_on_batch(latent_fake, fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - # Train the generator - g_loss = self.adversarial_autoencoder.train_on_batch(imgs, [ - imgs, valid]) - - # Plot the progress - print("%d [D loss: %f, acc: %.2f%%] [G loss: %f, mse: %f]" % - (epoch, d_loss[0], 100*d_loss[1], g_loss[0], g_loss[1])) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 5, 5 - - z = np.random.normal(size=(r*c, self.latent_dim)) - gen_imgs = self.decoder.predict(z) - - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/mnist_%d.png" % epoch) - plt.close() - - def save_model(self): - - def save(model, model_name): - model_path = "saved_model/%s.json" % model_name - weights_path = "saved_model/%s_weights.hdf5" % model_name - options = {"file_arch": model_path, - "file_weight": weights_path} - json_string = model.to_json() - open(options['file_arch'], 'w').write(json_string) - model.save_weights(options['file_weight']) - - save(self.generator, "aae_generator") - save(self.discriminator, "aae_discriminator") - - -if __name__ == '__main__': - aae = AdversarialAutoencoder() - aae.train(epochs=20000, batch_size=32, sample_interval=200) -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply -from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam - -import matplotlib.pyplot as plt - -import numpy as np - - -class ACGAN(): - def __init__(self): - # Input shape - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.num_classes = 10 - self.latent_dim = 100 - - optimizer = Adam(0.0002, 0.5) - losses = ['binary_crossentropy', 'sparse_categorical_crossentropy'] - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss=losses, - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise and the target label as input - # and generates the corresponding digit of that label - noise = Input(shape=(self.latent_dim,)) - label = Input(shape=(1,)) - img = self.generator([noise, label]) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # The discriminator takes generated image as input and determines validity - # and the label of that image - valid, target_label = self.discriminator(img) - - # The combined model (stacked generator and discriminator) - # Trains the generator to fool the discriminator - self.combined = Model([noise, label], [valid, target_label]) - self.combined.compile(loss=losses, - optimizer=optimizer) - - def build_generator(self): - - model = Sequential() - - model.add(Dense(128 * 7 * 7, activation="relu", - input_dim=self.latent_dim)) - model.add(Reshape((7, 7, 128))) - model.add(BatchNormalization(momentum=0.8)) - model.add(UpSampling2D()) - model.add(Conv2D(128, kernel_size=3, padding="same")) - model.add(Activation("relu")) - model.add(BatchNormalization(momentum=0.8)) - model.add(UpSampling2D()) - model.add(Conv2D(64, kernel_size=3, padding="same")) - model.add(Activation("relu")) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(self.channels, kernel_size=3, padding='same')) - model.add(Activation("tanh")) - - model.summary() - - noise = Input(shape=(self.latent_dim,)) - label = Input(shape=(1,), dtype='int32') - label_embedding = Flatten()(Embedding(self.num_classes, 100)(label)) - - model_input = multiply([noise, label_embedding]) - img = model(model_input) - - return Model([noise, label], img) - - def build_discriminator(self): - - model = Sequential() - - model.add(Conv2D(16, kernel_size=3, strides=2, - input_shape=self.img_shape, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(32, kernel_size=3, strides=2, padding="same")) - model.add(ZeroPadding2D(padding=((0, 1), (0, 1)))) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(64, kernel_size=3, strides=2, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(128, kernel_size=3, strides=1, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - - model.add(Flatten()) - model.summary() - - img = Input(shape=self.img_shape) - - # Extract feature representation - features = model(img) - - # Determine validity and label of the image - validity = Dense(1, activation="sigmoid")(features) - label = Dense(self.num_classes, activation="softmax")(features) - - return Model(img, [validity, label]) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, y_train), (_, _) = mnist.load_data() - - # Configure inputs - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - y_train = y_train.reshape(-1, 1) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - - # Sample noise as generator input - noise = np.random.normal(0, 1, (batch_size, 100)) - - # The labels of the digits that the generator tries to create an - # image representation of - sampled_labels = np.random.randint(0, 10, (batch_size, 1)) - - # Generate a half batch of new images - gen_imgs = self.generator.predict([noise, sampled_labels]) - - # Image labels. 0-9 - img_labels = y_train[idx] - - # Train the discriminator - d_loss_real = self.discriminator.train_on_batch( - imgs, [valid, img_labels]) - d_loss_fake = self.discriminator.train_on_batch( - gen_imgs, [fake, sampled_labels]) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - # Train the generator - g_loss = self.combined.train_on_batch( - [noise, sampled_labels], [valid, sampled_labels]) - - # Plot the progress - print("%d [D loss: %f, acc.: %.2f%%, op_acc: %.2f%%] [G loss: %f]" % ( - epoch, d_loss[0], 100*d_loss[3], 100*d_loss[4], g_loss[0])) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.save_model() - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 10, 10 - noise = np.random.normal(0, 1, (r * c, 100)) - sampled_labels = np.array([num for _ in range(r) for num in range(c)]) - gen_imgs = self.generator.predict([noise, sampled_labels]) - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/%d.png" % epoch) - plt.close() - - def save_model(self): - - def save(model, model_name): - model_path = "saved_model/%s.json" % model_name - weights_path = "saved_model/%s_weights.hdf5" % model_name - options = {"file_arch": model_path, - "file_weight": weights_path} - json_string = model.to_json() - open(options['file_arch'], 'w').write(json_string) - model.save_weights(options['file_weight']) - - save(self.generator, "generator") - save(self.discriminator, "discriminator") - - -if __name__ == '__main__': - acgan = ACGAN() - acgan.train(epochs=14000, batch_size=32, sample_interval=200) -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -import keras.backend as K - -import matplotlib.pyplot as plt - -import sys - -import numpy as np - - -class BGAN(): - """Reference: https://wiseodd.github.io/techblog/2017/03/07/boundary-seeking-gan/""" - - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.latent_dim = 100 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss='binary_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise as input and generated imgs - z = Input(shape=(self.latent_dim,)) - img = self.generator(z) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # The valid takes generated images as input and determines validity - valid = self.discriminator(img) - - # The combined model (stacked generator and discriminator) - # Trains the generator to fool the discriminator - self.combined = Model(z, valid) - self.combined.compile(loss=self.boundary_loss, optimizer=optimizer) - - def build_generator(self): - - model = Sequential() - - model.add(Dense(256, input_dim=self.latent_dim)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(1024)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(np.prod(self.img_shape), activation='tanh')) - model.add(Reshape(self.img_shape)) - - model.summary() - - noise = Input(shape=(self.latent_dim,)) - img = model(noise) - - return Model(noise, img) - - def build_discriminator(self): - - model = Sequential() - - model.add(Flatten(input_shape=self.img_shape)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(256)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(1, activation='sigmoid')) - model.summary() - - img = Input(shape=self.img_shape) - validity = model(img) - - return Model(img, validity) - - def boundary_loss(self, y_true, y_pred): - """ - Boundary seeking loss. - Reference: https://wiseodd.github.io/techblog/2017/03/07/boundary-seeking-gan/ - """ - return 0.5 * K.mean((K.log(y_pred) - K.log(1 - y_pred))**2) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, _), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = X_train / 127.5 - 1. - X_train = np.expand_dims(X_train, axis=3) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - - noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) - - # Generate a batch of new images - gen_imgs = self.generator.predict(noise) - - # Train the discriminator - d_loss_real = self.discriminator.train_on_batch(imgs, valid) - d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - g_loss = self.combined.train_on_batch(noise, valid) - - # Plot the progress - print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % - (epoch, d_loss[0], 100*d_loss[1], g_loss)) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 5, 5 - noise = np.random.normal(0, 1, (r * c, self.latent_dim)) - gen_imgs = self.generator.predict(noise) - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/mnist_%d.png" % epoch) - plt.close() - - -if __name__ == '__main__': - bgan = BGAN() - bgan.train(epochs=30000, batch_size=32, sample_interval=200) -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GaussianNoise -from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D -from keras.layers import MaxPooling2D, concatenate -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -from keras import losses -from keras.utils import to_categorical -import keras.backend as K - -import matplotlib.pyplot as plt - -import numpy as np - - -class BIGAN(): - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.latent_dim = 100 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss=['binary_crossentropy'], - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # Build the encoder - self.encoder = self.build_encoder() - - # The part of the bigan that trains the discriminator and encoder - self.discriminator.trainable = False - - # Generate image from sampled noise - z = Input(shape=(self.latent_dim, )) - img_ = self.generator(z) - - # Encode image - img = Input(shape=self.img_shape) - z_ = self.encoder(img) - - # Latent -> img is fake, and img -> latent is valid - fake = self.discriminator([z, img_]) - valid = self.discriminator([z_, img]) - - # Set up and compile the combined model - # Trains generator to fool the discriminator - self.bigan_generator = Model([z, img], [fake, valid]) - self.bigan_generator.compile(loss=['binary_crossentropy', 'binary_crossentropy'], - optimizer=optimizer) - - def build_encoder(self): - model = Sequential() - - model.add(Flatten(input_shape=self.img_shape)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(self.latent_dim)) - - model.summary() - - img = Input(shape=self.img_shape) - z = model(img) - - return Model(img, z) - - def build_generator(self): - model = Sequential() - - model.add(Dense(512, input_dim=self.latent_dim)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(np.prod(self.img_shape), activation='tanh')) - model.add(Reshape(self.img_shape)) - - model.summary() - - z = Input(shape=(self.latent_dim,)) - gen_img = model(z) - - return Model(z, gen_img) - - def build_discriminator(self): - - z = Input(shape=(self.latent_dim, )) - img = Input(shape=self.img_shape) - d_in = concatenate([z, Flatten()(img)]) - - model = Dense(1024)(d_in) - model = LeakyReLU(alpha=0.2)(model) - model = Dropout(0.5)(model) - model = Dense(1024)(model) - model = LeakyReLU(alpha=0.2)(model) - model = Dropout(0.5)(model) - model = Dense(1024)(model) - model = LeakyReLU(alpha=0.2)(model) - model = Dropout(0.5)(model) - validity = Dense(1, activation="sigmoid")(model) - - return Model([z, img], validity) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, _), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Sample noise and generate img - z = np.random.normal(size=(batch_size, self.latent_dim)) - imgs_ = self.generator.predict(z) - - # Select a random batch of images and encode - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - z_ = self.encoder.predict(imgs) - - # Train the discriminator (img -> z is valid, z -> img is fake) - d_loss_real = self.discriminator.train_on_batch([z_, imgs], valid) - d_loss_fake = self.discriminator.train_on_batch([z, imgs_], fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - # Train the generator (z -> img is valid and img -> z is is invalid) - g_loss = self.bigan_generator.train_on_batch( - [z, imgs], [valid, fake]) - - # Plot the progress - print("%d [D loss: %f, acc: %.2f%%] [G loss: %f]" % - (epoch, d_loss[0], 100*d_loss[1], g_loss[0])) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_interval(epoch) - - def sample_interval(self, epoch): - r, c = 5, 5 - z = np.random.normal(size=(25, self.latent_dim)) - gen_imgs = self.generator.predict(z) - - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/mnist_%d.png" % epoch) - plt.close() - - -if __name__ == '__main__': - bigan = BIGAN() - bigan.train(epochs=40000, batch_size=32, sample_interval=400) -from __future__ import print_function, division - -from keras.datasets import mnist -from keras_contrib.layers.normalization.instancenormalization import InstanceNormalization -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GaussianNoise -from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D -from keras.layers import Concatenate -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -from keras import losses -from keras.utils import to_categorical -import keras.backend as K -import scipy - -import matplotlib.pyplot as plt - -import numpy as np - - -class CCGAN(): - def __init__(self): - self.img_rows = 32 - self.img_cols = 32 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.mask_height = 10 - self.mask_width = 10 - self.num_classes = 10 - - # Number of filters in first layer of generator and discriminator - self.gf = 32 - self.df = 32 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss=['mse', 'categorical_crossentropy'], - loss_weights=[0.5, 0.5], - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise as input and generates imgs - masked_img = Input(shape=self.img_shape) - gen_img = self.generator(masked_img) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # The valid takes generated images as input and determines validity - valid, _ = self.discriminator(gen_img) - - # The combined model (stacked generator and discriminator) - # Trains the generator to fool the discriminator - self.combined = Model(masked_img, valid) - self.combined.compile(loss=['mse'], - optimizer=optimizer) - - def build_generator(self): - """U-Net Generator""" - - def conv2d(layer_input, filters, f_size=4, bn=True): - """Layers used during downsampling""" - d = Conv2D(filters, kernel_size=f_size, - strides=2, padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - if bn: - d = BatchNormalization(momentum=0.8)(d) - return d - - def deconv2d(layer_input, skip_input, filters, f_size=4, dropout_rate=0): - """Layers used during upsampling""" - u = UpSampling2D(size=2)(layer_input) - u = Conv2D(filters, kernel_size=f_size, strides=1, - padding='same', activation='relu')(u) - if dropout_rate: - u = Dropout(dropout_rate)(u) - u = BatchNormalization(momentum=0.8)(u) - u = Concatenate()([u, skip_input]) - return u - - img = Input(shape=self.img_shape) - - # Downsampling - d1 = conv2d(img, self.gf, bn=False) - d2 = conv2d(d1, self.gf*2) - d3 = conv2d(d2, self.gf*4) - d4 = conv2d(d3, self.gf*8) - - # Upsampling - u1 = deconv2d(d4, d3, self.gf*4) - u2 = deconv2d(u1, d2, self.gf*2) - u3 = deconv2d(u2, d1, self.gf) - - u4 = UpSampling2D(size=2)(u3) - output_img = Conv2D(self.channels, kernel_size=4, - strides=1, padding='same', activation='tanh')(u4) - - return Model(img, output_img) - - def build_discriminator(self): - - img = Input(shape=self.img_shape) - - model = Sequential() - model.add(Conv2D(64, kernel_size=4, strides=2, - padding='same', input_shape=self.img_shape)) - model.add(LeakyReLU(alpha=0.8)) - model.add(Conv2D(128, kernel_size=4, strides=2, padding='same')) - model.add(LeakyReLU(alpha=0.2)) - model.add(InstanceNormalization()) - model.add(Conv2D(256, kernel_size=4, strides=2, padding='same')) - model.add(LeakyReLU(alpha=0.2)) - model.add(InstanceNormalization()) - - model.summary() - - img = Input(shape=self.img_shape) - features = model(img) - - validity = Conv2D(1, kernel_size=4, strides=1, - padding='same')(features) - - label = Flatten()(features) - label = Dense(self.num_classes+1, activation="softmax")(label) - - return Model(img, [validity, label]) - - def mask_randomly(self, imgs): - y1 = np.random.randint( - 0, self.img_rows - self.mask_height, imgs.shape[0]) - y2 = y1 + self.mask_height - x1 = np.random.randint( - 0, self.img_rows - self.mask_width, imgs.shape[0]) - x2 = x1 + self.mask_width - - masked_imgs = np.empty_like(imgs) - for i, img in enumerate(imgs): - masked_img = img.copy() - _y1, _y2, _x1, _x2 = y1[i], y2[i], x1[i], x2[i], - masked_img[_y1:_y2, _x1:_x2, :] = 0 - masked_imgs[i] = masked_img - - return masked_imgs - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, y_train), (_, _) = mnist.load_data() - - # Rescale MNIST to 32x32 - X_train = np.array([scipy.misc.imresize( - x, [self.img_rows, self.img_cols]) for x in X_train]) - - # Rescale -1 to 1 - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - y_train = y_train.reshape(-1, 1) - - # Adversarial ground truths - valid = np.ones((batch_size, 4, 4, 1)) - fake = np.zeros((batch_size, 4, 4, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Sample half batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - labels = y_train[idx] - - masked_imgs = self.mask_randomly(imgs) - - # Generate a half batch of new images - gen_imgs = self.generator.predict(masked_imgs) - - # One-hot encoding of labels - labels = to_categorical(labels, num_classes=self.num_classes+1) - fake_labels = to_categorical( - np.full((batch_size, 1), self.num_classes), num_classes=self.num_classes+1) - - # Train the discriminator - d_loss_real = self.discriminator.train_on_batch( - imgs, [valid, labels]) - d_loss_fake = self.discriminator.train_on_batch( - gen_imgs, [fake, fake_labels]) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - # Train the generator - g_loss = self.combined.train_on_batch(masked_imgs, valid) - - # Plot the progress - print("%d [D loss: %f, op_acc: %.2f%%] [G loss: %f]" % - (epoch, d_loss[0], 100*d_loss[4], g_loss)) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - # Select a random half batch of images - idx = np.random.randint(0, X_train.shape[0], 6) - imgs = X_train[idx] - self.sample_images(epoch, imgs) - self.save_model() - - def sample_images(self, epoch, imgs): - r, c = 3, 6 - - masked_imgs = self.mask_randomly(imgs) - gen_imgs = self.generator.predict(masked_imgs) - - imgs = (imgs + 1.0) * 0.5 - masked_imgs = (masked_imgs + 1.0) * 0.5 - gen_imgs = (gen_imgs + 1.0) * 0.5 - - gen_imgs = np.where(gen_imgs < 0, 0, gen_imgs) - - fig, axs = plt.subplots(r, c) - for i in range(c): - axs[0, i].imshow(imgs[i, :, :, 0], cmap='gray') - axs[0, i].axis('off') - axs[1, i].imshow(masked_imgs[i, :, :, 0], cmap='gray') - axs[1, i].axis('off') - axs[2, i].imshow(gen_imgs[i, :, :, 0], cmap='gray') - axs[2, i].axis('off') - fig.savefig("images/%d.png" % epoch) - plt.close() - - def save_model(self): - - def save(model, model_name): - model_path = "saved_model/%s.json" % model_name - weights_path = "saved_model/%s_weights.hdf5" % model_name - options = {"file_arch": model_path, - "file_weight": weights_path} - json_string = model.to_json() - open(options['file_arch'], 'w').write(json_string) - model.save_weights(options['file_weight']) - - save(self.generator, "ccgan_generator") - save(self.discriminator, "ccgan_discriminator") - - -if __name__ == '__main__': - ccgan = CCGAN() - ccgan.train(epochs=20000, batch_size=32, sample_interval=200) -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply -from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam - -import matplotlib.pyplot as plt - -import numpy as np - - -class CGAN(): - def __init__(self): - # Input shape - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.num_classes = 10 - self.latent_dim = 100 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss=['binary_crossentropy'], - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise and the target label as input - # and generates the corresponding digit of that label - noise = Input(shape=(self.latent_dim,)) - label = Input(shape=(1,)) - img = self.generator([noise, label]) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # The discriminator takes generated image as input and determines validity - # and the label of that image - valid = self.discriminator([img, label]) - - # The combined model (stacked generator and discriminator) - # Trains generator to fool discriminator - self.combined = Model([noise, label], valid) - self.combined.compile(loss=['binary_crossentropy'], - optimizer=optimizer) - - def build_generator(self): - - model = Sequential() - - model.add(Dense(256, input_dim=self.latent_dim)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(1024)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(np.prod(self.img_shape), activation='tanh')) - model.add(Reshape(self.img_shape)) - - model.summary() - - noise = Input(shape=(self.latent_dim,)) - label = Input(shape=(1,), dtype='int32') - label_embedding = Flatten()(Embedding(self.num_classes, self.latent_dim)(label)) - - model_input = multiply([noise, label_embedding]) - img = model(model_input) - - return Model([noise, label], img) - - def build_discriminator(self): - - model = Sequential() - - model.add(Dense(512, input_dim=np.prod(self.img_shape))) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.4)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.4)) - model.add(Dense(1, activation='sigmoid')) - model.summary() - - img = Input(shape=self.img_shape) - label = Input(shape=(1,), dtype='int32') - - label_embedding = Flatten()(Embedding(self.num_classes, np.prod(self.img_shape))(label)) - flat_img = Flatten()(img) - - model_input = multiply([flat_img, label_embedding]) - - validity = model(model_input) - - return Model([img, label], validity) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, y_train), (_, _) = mnist.load_data() - - # Configure input - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - y_train = y_train.reshape(-1, 1) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random half batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs, labels = X_train[idx], y_train[idx] - - # Sample noise as generator input - noise = np.random.normal(0, 1, (batch_size, 100)) - - # Generate a half batch of new images - gen_imgs = self.generator.predict([noise, labels]) - - # Train the discriminator - d_loss_real = self.discriminator.train_on_batch( - [imgs, labels], valid) - d_loss_fake = self.discriminator.train_on_batch( - [gen_imgs, labels], fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - # Condition on labels - sampled_labels = np.random.randint( - 0, 10, batch_size).reshape(-1, 1) - - # Train the generator - g_loss = self.combined.train_on_batch( - [noise, sampled_labels], valid) - - # Plot the progress - print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % - (epoch, d_loss[0], 100*d_loss[1], g_loss)) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 2, 5 - noise = np.random.normal(0, 1, (r * c, 100)) - sampled_labels = np.arange(0, 10).reshape(-1, 1) - - gen_imgs = self.generator.predict([noise, sampled_labels]) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].set_title("Digit: %d" % sampled_labels[cnt]) - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/%d.png" % epoch) - plt.close() - - -if __name__ == '__main__': - cgan = CGAN() - cgan.train(epochs=20000, batch_size=32, sample_interval=200) -from __future__ import print_function, division -import scipy - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam - -import matplotlib.pyplot as plt - -import sys - -import numpy as np - - -class COGAN(): - """Reference: https://wiseodd.github.io/techblog/2017/02/18/coupled_gan/""" - - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.latent_dim = 100 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.d1, self.d2 = self.build_discriminators() - self.d1.compile(loss='binary_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - self.d2.compile(loss='binary_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.g1, self.g2 = self.build_generators() - - # The generator takes noise as input and generated imgs - z = Input(shape=(self.latent_dim,)) - img1 = self.g1(z) - img2 = self.g2(z) - - # For the combined model we will only train the generators - self.d1.trainable = False - self.d2.trainable = False - - # The valid takes generated images as input and determines validity - valid1 = self.d1(img1) - valid2 = self.d2(img2) - - # The combined model (stacked generators and discriminators) - # Trains generators to fool discriminators - self.combined = Model(z, [valid1, valid2]) - self.combined.compile(loss=['binary_crossentropy', 'binary_crossentropy'], - optimizer=optimizer) - - def build_generators(self): - - # Shared weights between generators - model = Sequential() - model.add(Dense(256, input_dim=self.latent_dim)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - - noise = Input(shape=(self.latent_dim,)) - feature_repr = model(noise) - - # Generator 1 - g1 = Dense(1024)(feature_repr) - g1 = LeakyReLU(alpha=0.2)(g1) - g1 = BatchNormalization(momentum=0.8)(g1) - g1 = Dense(np.prod(self.img_shape), activation='tanh')(g1) - img1 = Reshape(self.img_shape)(g1) - - # Generator 2 - g2 = Dense(1024)(feature_repr) - g2 = LeakyReLU(alpha=0.2)(g2) - g2 = BatchNormalization(momentum=0.8)(g2) - g2 = Dense(np.prod(self.img_shape), activation='tanh')(g2) - img2 = Reshape(self.img_shape)(g2) - - model.summary() - - return Model(noise, img1), Model(noise, img2) - - def build_discriminators(self): - - img1 = Input(shape=self.img_shape) - img2 = Input(shape=self.img_shape) - - # Shared discriminator layers - model = Sequential() - model.add(Flatten(input_shape=self.img_shape)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(256)) - model.add(LeakyReLU(alpha=0.2)) - - img1_embedding = model(img1) - img2_embedding = model(img2) - - # Discriminator 1 - validity1 = Dense(1, activation='sigmoid')(img1_embedding) - # Discriminator 2 - validity2 = Dense(1, activation='sigmoid')(img2_embedding) - - return Model(img1, validity1), Model(img2, validity2) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, _), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - - # Images in domain A and B (rotated) - X1 = X_train[:int(X_train.shape[0]/2)] - X2 = X_train[int(X_train.shape[0]/2):] - X2 = scipy.ndimage.interpolation.rotate(X2, 90, axes=(1, 2)) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # ---------------------- - # Train Discriminators - # ---------------------- - - # Select a random batch of images - idx = np.random.randint(0, X1.shape[0], batch_size) - imgs1 = X1[idx] - imgs2 = X2[idx] - - # Sample noise as generator input - noise = np.random.normal(0, 1, (batch_size, 100)) - - # Generate a batch of new images - gen_imgs1 = self.g1.predict(noise) - gen_imgs2 = self.g2.predict(noise) - - # Train the discriminators - d1_loss_real = self.d1.train_on_batch(imgs1, valid) - d2_loss_real = self.d2.train_on_batch(imgs2, valid) - d1_loss_fake = self.d1.train_on_batch(gen_imgs1, fake) - d2_loss_fake = self.d2.train_on_batch(gen_imgs2, fake) - d1_loss = 0.5 * np.add(d1_loss_real, d1_loss_fake) - d2_loss = 0.5 * np.add(d2_loss_real, d2_loss_fake) - - # ------------------ - # Train Generators - # ------------------ - - g_loss = self.combined.train_on_batch(noise, [valid, valid]) - - # Plot the progress - print("%d [D1 loss: %f, acc.: %.2f%%] [D2 loss: %f, acc.: %.2f%%] [G loss: %f]" - % (epoch, d1_loss[0], 100*d1_loss[1], d2_loss[0], 100*d2_loss[1], g_loss[0])) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 4, 4 - noise = np.random.normal(0, 1, (r * int(c/2), 100)) - gen_imgs1 = self.g1.predict(noise) - gen_imgs2 = self.g2.predict(noise) - - gen_imgs = np.concatenate([gen_imgs1, gen_imgs2]) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/mnist_%d.png" % epoch) - plt.close() - - -if __name__ == '__main__': - gan = COGAN() - gan.train(epochs=30000, batch_size=32, sample_interval=200) -from __future__ import print_function, division - -from keras.datasets import cifar10 -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GaussianNoise -from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D -from keras.layers import MaxPooling2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -from keras import losses -from keras.utils import to_categorical -import keras.backend as K - -import matplotlib.pyplot as plt - -import numpy as np - - -class ContextEncoder(): - def __init__(self): - self.img_rows = 32 - self.img_cols = 32 - self.mask_height = 8 - self.mask_width = 8 - self.channels = 3 - self.num_classes = 2 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.missing_shape = (self.mask_height, self.mask_width, self.channels) - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss='binary_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise as input and generates the missing - # part of the image - masked_img = Input(shape=self.img_shape) - gen_missing = self.generator(masked_img) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # The discriminator takes generated images as input and determines - # if it is generated or if it is a real image - valid = self.discriminator(gen_missing) - - # The combined model (stacked generator and discriminator) - # Trains generator to fool discriminator - self.combined = Model(masked_img, [gen_missing, valid]) - self.combined.compile(loss=['mse', 'binary_crossentropy'], - loss_weights=[0.999, 0.001], - optimizer=optimizer) - - def build_generator(self): - - model = Sequential() - - # Encoder - model.add(Conv2D(32, kernel_size=3, strides=2, - input_shape=self.img_shape, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(64, kernel_size=3, strides=2, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(128, kernel_size=3, strides=2, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - - model.add(Conv2D(512, kernel_size=1, strides=2, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.5)) - - # Decoder - model.add(UpSampling2D()) - model.add(Conv2D(128, kernel_size=3, padding="same")) - model.add(Activation('relu')) - model.add(BatchNormalization(momentum=0.8)) - model.add(UpSampling2D()) - model.add(Conv2D(64, kernel_size=3, padding="same")) - model.add(Activation('relu')) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(self.channels, kernel_size=3, padding="same")) - model.add(Activation('tanh')) - - model.summary() - - masked_img = Input(shape=self.img_shape) - gen_missing = model(masked_img) - - return Model(masked_img, gen_missing) - - def build_discriminator(self): - - model = Sequential() - - model.add(Conv2D(64, kernel_size=3, strides=2, - input_shape=self.missing_shape, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(128, kernel_size=3, strides=2, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(256, kernel_size=3, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Flatten()) - model.add(Dense(1, activation='sigmoid')) - model.summary() - - img = Input(shape=self.missing_shape) - validity = model(img) - - return Model(img, validity) - - def mask_randomly(self, imgs): - y1 = np.random.randint( - 0, self.img_rows - self.mask_height, imgs.shape[0]) - y2 = y1 + self.mask_height - x1 = np.random.randint( - 0, self.img_rows - self.mask_width, imgs.shape[0]) - x2 = x1 + self.mask_width - - masked_imgs = np.empty_like(imgs) - missing_parts = np.empty( - (imgs.shape[0], self.mask_height, self.mask_width, self.channels)) - for i, img in enumerate(imgs): - masked_img = img.copy() - _y1, _y2, _x1, _x2 = y1[i], y2[i], x1[i], x2[i] - missing_parts[i] = masked_img[_y1:_y2, _x1:_x2, :].copy() - masked_img[_y1:_y2, _x1:_x2, :] = 0 - masked_imgs[i] = masked_img - - return masked_imgs, missing_parts, (y1, y2, x1, x2) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, y_train), (_, _) = cifar10.load_data() - - # Extract dogs and cats - X_cats = X_train[(y_train == 3).flatten()] - X_dogs = X_train[(y_train == 5).flatten()] - X_train = np.vstack((X_cats, X_dogs)) - - # Rescale -1 to 1 - X_train = X_train / 127.5 - 1. - y_train = y_train.reshape(-1, 1) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - - masked_imgs, missing_parts, _ = self.mask_randomly(imgs) - - # Generate a batch of new images - gen_missing = self.generator.predict(masked_imgs) - - # Train the discriminator - d_loss_real = self.discriminator.train_on_batch( - missing_parts, valid) - d_loss_fake = self.discriminator.train_on_batch(gen_missing, fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - g_loss = self.combined.train_on_batch( - masked_imgs, [missing_parts, valid]) - - # Plot the progress - print("%d [D loss: %f, acc: %.2f%%] [G loss: %f, mse: %f]" % - (epoch, d_loss[0], 100*d_loss[1], g_loss[0], g_loss[1])) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - idx = np.random.randint(0, X_train.shape[0], 6) - imgs = X_train[idx] - self.sample_images(epoch, imgs) - - def sample_images(self, epoch, imgs): - r, c = 3, 6 - - masked_imgs, missing_parts, (y1, y2, x1, x2) = self.mask_randomly(imgs) - gen_missing = self.generator.predict(masked_imgs) - - imgs = 0.5 * imgs + 0.5 - masked_imgs = 0.5 * masked_imgs + 0.5 - gen_missing = 0.5 * gen_missing + 0.5 - - fig, axs = plt.subplots(r, c) - for i in range(c): - axs[0, i].imshow(imgs[i, :, :]) - axs[0, i].axis('off') - axs[1, i].imshow(masked_imgs[i, :, :]) - axs[1, i].axis('off') - filled_in = imgs[i].copy() - filled_in[y1[i]:y2[i], x1[i]:x2[i], :] = gen_missing[i] - axs[2, i].imshow(filled_in) - axs[2, i].axis('off') - fig.savefig("images/%d.png" % epoch) - plt.close() - - def save_model(self): - - def save(model, model_name): - model_path = "saved_model/%s.json" % model_name - weights_path = "saved_model/%s_weights.hdf5" % model_name - options = {"file_arch": model_path, - "file_weight": weights_path} - json_string = model.to_json() - open(options['file_arch'], 'w').write(json_string) - model.save_weights(options['file_weight']) - - save(self.generator, "generator") - save(self.discriminator, "discriminator") - - -if __name__ == '__main__': - context_encoder = ContextEncoder() - context_encoder.train(epochs=30000, batch_size=64, sample_interval=50) -from __future__ import print_function, division -import scipy - -from keras.datasets import mnist -from keras_contrib.layers.normalization.instancenormalization import InstanceNormalization -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Concatenate -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -import datetime -import matplotlib.pyplot as plt -import sys -from data_loader import DataLoader -import numpy as np -import os - - -class CycleGAN(): - def __init__(self): - # Input shape - self.img_rows = 128 - self.img_cols = 128 - self.channels = 3 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - - # Configure data loader - self.dataset_name = 'apple2orange' - self.data_loader = DataLoader(dataset_name=self.dataset_name, - img_res=(self.img_rows, self.img_cols)) - - # Calculate output shape of D (PatchGAN) - patch = int(self.img_rows / 2**4) - self.disc_patch = (patch, patch, 1) - - # Number of filters in the first layer of G and D - self.gf = 32 - self.df = 64 - - # Loss weights - self.lambda_cycle = 10.0 # Cycle-consistency loss - self.lambda_id = 0.1 * self.lambda_cycle # Identity loss - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminators - self.d_A = self.build_discriminator() - self.d_B = self.build_discriminator() - self.d_A.compile(loss='mse', - optimizer=optimizer, - metrics=['accuracy']) - self.d_B.compile(loss='mse', - optimizer=optimizer, - metrics=['accuracy']) - - # ------------------------- - # Construct Computational - # Graph of Generators - # ------------------------- - - # Build the generators - self.g_AB = self.build_generator() - self.g_BA = self.build_generator() - - # Input images from both domains - img_A = Input(shape=self.img_shape) - img_B = Input(shape=self.img_shape) - - # Translate images to the other domain - fake_B = self.g_AB(img_A) - fake_A = self.g_BA(img_B) - # Translate images back to original domain - reconstr_A = self.g_BA(fake_B) - reconstr_B = self.g_AB(fake_A) - # Identity mapping of images - img_A_id = self.g_BA(img_A) - img_B_id = self.g_AB(img_B) - - # For the combined model we will only train the generators - self.d_A.trainable = False - self.d_B.trainable = False - - # Discriminators determines validity of translated images - valid_A = self.d_A(fake_A) - valid_B = self.d_B(fake_B) - - # Combined model trains generators to fool discriminators - self.combined = Model(inputs=[img_A, img_B], - outputs=[valid_A, valid_B, - reconstr_A, reconstr_B, - img_A_id, img_B_id]) - self.combined.compile(loss=['mse', 'mse', - 'mae', 'mae', - 'mae', 'mae'], - loss_weights=[1, 1, - self.lambda_cycle, self.lambda_cycle, - self.lambda_id, self.lambda_id], - optimizer=optimizer) - - def build_generator(self): - """U-Net Generator""" - - def conv2d(layer_input, filters, f_size=4): - """Layers used during downsampling""" - d = Conv2D(filters, kernel_size=f_size, - strides=2, padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - d = InstanceNormalization()(d) - return d - - def deconv2d(layer_input, skip_input, filters, f_size=4, dropout_rate=0): - """Layers used during upsampling""" - u = UpSampling2D(size=2)(layer_input) - u = Conv2D(filters, kernel_size=f_size, strides=1, - padding='same', activation='relu')(u) - if dropout_rate: - u = Dropout(dropout_rate)(u) - u = InstanceNormalization()(u) - u = Concatenate()([u, skip_input]) - return u - - # Image input - d0 = Input(shape=self.img_shape) - - # Downsampling - d1 = conv2d(d0, self.gf) - d2 = conv2d(d1, self.gf*2) - d3 = conv2d(d2, self.gf*4) - d4 = conv2d(d3, self.gf*8) - - # Upsampling - u1 = deconv2d(d4, d3, self.gf*4) - u2 = deconv2d(u1, d2, self.gf*2) - u3 = deconv2d(u2, d1, self.gf) - - u4 = UpSampling2D(size=2)(u3) - output_img = Conv2D(self.channels, kernel_size=4, - strides=1, padding='same', activation='tanh')(u4) - - return Model(d0, output_img) - - def build_discriminator(self): - - def d_layer(layer_input, filters, f_size=4, normalization=True): - """Discriminator layer""" - d = Conv2D(filters, kernel_size=f_size, - strides=2, padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - if normalization: - d = InstanceNormalization()(d) - return d - - img = Input(shape=self.img_shape) - - d1 = d_layer(img, self.df, normalization=False) - d2 = d_layer(d1, self.df*2) - d3 = d_layer(d2, self.df*4) - d4 = d_layer(d3, self.df*8) - - validity = Conv2D(1, kernel_size=4, strides=1, padding='same')(d4) - - return Model(img, validity) - - def train(self, epochs, batch_size=1, sample_interval=50): - - start_time = datetime.datetime.now() - - # Adversarial loss ground truths - valid = np.ones((batch_size,) + self.disc_patch) - fake = np.zeros((batch_size,) + self.disc_patch) - - for epoch in range(epochs): - for batch_i, (imgs_A, imgs_B) in enumerate(self.data_loader.load_batch(batch_size)): - - # ---------------------- - # Train Discriminators - # ---------------------- - - # Translate images to opposite domain - fake_B = self.g_AB.predict(imgs_A) - fake_A = self.g_BA.predict(imgs_B) - - # Train the discriminators (original images = real / translated = Fake) - dA_loss_real = self.d_A.train_on_batch(imgs_A, valid) - dA_loss_fake = self.d_A.train_on_batch(fake_A, fake) - dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake) - - dB_loss_real = self.d_B.train_on_batch(imgs_B, valid) - dB_loss_fake = self.d_B.train_on_batch(fake_B, fake) - dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake) - - # Total disciminator loss - d_loss = 0.5 * np.add(dA_loss, dB_loss) - - # ------------------ - # Train Generators - # ------------------ - - # Train the generators - g_loss = self.combined.train_on_batch([imgs_A, imgs_B], - [valid, valid, - imgs_A, imgs_B, - imgs_A, imgs_B]) - - elapsed_time = datetime.datetime.now() - start_time - - # Plot the progress - print("[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %3d%%] [G loss: %05f, adv: %05f, recon: %05f, id: %05f] time: %s " - % (epoch, epochs, - batch_i, self.data_loader.n_batches, - d_loss[0], 100 * - d_loss[1], - g_loss[0], - np.mean( - g_loss[1:3]), - np.mean( - g_loss[3:5]), - np.mean( - g_loss[5:6]), - elapsed_time)) - - # If at save interval => save generated image samples - if batch_i % sample_interval == 0: - self.sample_images(epoch, batch_i) - - def sample_images(self, epoch, batch_i): - os.makedirs('images/%s' % self.dataset_name, exist_ok=True) - r, c = 2, 3 - - imgs_A = self.data_loader.load_data( - domain="A", batch_size=1, is_testing=True) - imgs_B = self.data_loader.load_data( - domain="B", batch_size=1, is_testing=True) - - # Demo (for GIF) - #imgs_A = self.data_loader.load_img('datasets/apple2orange/testA/n07740461_1541.jpg') - #imgs_B = self.data_loader.load_img('datasets/apple2orange/testB/n07749192_4241.jpg') - - # Translate images to the other domain - fake_B = self.g_AB.predict(imgs_A) - fake_A = self.g_BA.predict(imgs_B) - # Translate back to original domain - reconstr_A = self.g_BA.predict(fake_B) - reconstr_B = self.g_AB.predict(fake_A) - - gen_imgs = np.concatenate( - [imgs_A, fake_B, reconstr_A, imgs_B, fake_A, reconstr_B]) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - titles = ['Original', 'Translated', 'Reconstructed'] - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt]) - axs[i, j].set_title(titles[j]) - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/%s/%d_%d.png" % - (self.dataset_name, epoch, batch_i)) - plt.close() - - -if __name__ == '__main__': - gan = CycleGAN() - gan.train(epochs=200, batch_size=1, sample_interval=200) -import scipy -from glob import glob -import numpy as np - - -class DataLoader(): - def __init__(self, dataset_name, img_res=(128, 128)): - self.dataset_name = dataset_name - self.img_res = img_res - - def load_data(self, domain, batch_size=1, is_testing=False): - data_type = "train%s" % domain if not is_testing else "test%s" % domain - path = glob('./datasets/%s/%s/*' % (self.dataset_name, data_type)) - - batch_images = np.random.choice(path, size=batch_size) - - imgs = [] - for img_path in batch_images: - img = self.imread(img_path) - if not is_testing: - img = scipy.misc.imresize(img, self.img_res) - - if np.random.random() > 0.5: - img = np.fliplr(img) - else: - img = scipy.misc.imresize(img, self.img_res) - imgs.append(img) - - imgs = np.array(imgs)/127.5 - 1. - - return imgs - - def load_batch(self, batch_size=1, is_testing=False): - data_type = "train" if not is_testing else "val" - path_A = glob('./datasets/%s/%sA/*' % (self.dataset_name, data_type)) - path_B = glob('./datasets/%s/%sB/*' % (self.dataset_name, data_type)) - - self.n_batches = int(min(len(path_A), len(path_B)) / batch_size) - total_samples = self.n_batches * batch_size - - # Sample n_batches * batch_size from each path list so that model sees all - # samples from both domains - path_A = np.random.choice(path_A, total_samples, replace=False) - path_B = np.random.choice(path_B, total_samples, replace=False) - - for i in range(self.n_batches-1): - batch_A = path_A[i*batch_size:(i+1)*batch_size] - batch_B = path_B[i*batch_size:(i+1)*batch_size] - imgs_A, imgs_B = [], [] - for img_A, img_B in zip(batch_A, batch_B): - img_A = self.imread(img_A) - img_B = self.imread(img_B) - - img_A = scipy.misc.imresize(img_A, self.img_res) - img_B = scipy.misc.imresize(img_B, self.img_res) - - if not is_testing and np.random.random() > 0.5: - img_A = np.fliplr(img_A) - img_B = np.fliplr(img_B) - - imgs_A.append(img_A) - imgs_B.append(img_B) - - imgs_A = np.array(imgs_A)/127.5 - 1. - imgs_B = np.array(imgs_B)/127.5 - 1. - - yield imgs_A, imgs_B - - def load_img(self, path): - img = self.imread(path) - img = scipy.misc.imresize(img, self.img_res) - img = img/127.5 - 1. - return img[np.newaxis, :, :, :] - - def imread(self, path): - return scipy.misc.imread(path, mode='RGB').astype(np.float) -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam - -import matplotlib.pyplot as plt - -import sys - -import numpy as np - - -class DCGAN(): - def __init__(self): - # Input shape - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.latent_dim = 100 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss='binary_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise as input and generates imgs - z = Input(shape=(self.latent_dim,)) - img = self.generator(z) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # The discriminator takes generated images as input and determines validity - valid = self.discriminator(img) - - # The combined model (stacked generator and discriminator) - # Trains the generator to fool the discriminator - self.combined = Model(z, valid) - self.combined.compile(loss='binary_crossentropy', optimizer=optimizer) - - def build_generator(self): - - model = Sequential() - - model.add(Dense(128 * 7 * 7, activation="relu", - input_dim=self.latent_dim)) - model.add(Reshape((7, 7, 128))) - model.add(UpSampling2D()) - model.add(Conv2D(128, kernel_size=3, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(Activation("relu")) - model.add(UpSampling2D()) - model.add(Conv2D(64, kernel_size=3, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(Activation("relu")) - model.add(Conv2D(self.channels, kernel_size=3, padding="same")) - model.add(Activation("tanh")) - - model.summary() - - noise = Input(shape=(self.latent_dim,)) - img = model(noise) - - return Model(noise, img) - - def build_discriminator(self): - - model = Sequential() - - model.add(Conv2D(32, kernel_size=3, strides=2, - input_shape=self.img_shape, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(64, kernel_size=3, strides=2, padding="same")) - model.add(ZeroPadding2D(padding=((0, 1), (0, 1)))) - model.add(BatchNormalization(momentum=0.8)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(128, kernel_size=3, strides=2, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(256, kernel_size=3, strides=1, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Flatten()) - model.add(Dense(1, activation='sigmoid')) - - model.summary() - - img = Input(shape=self.img_shape) - validity = model(img) - - return Model(img, validity) - - def train(self, epochs, batch_size=128, save_interval=50): - - # Load the dataset - (X_train, _), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = X_train / 127.5 - 1. - X_train = np.expand_dims(X_train, axis=3) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random half of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - - # Sample noise and generate a batch of new images - noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) - gen_imgs = self.generator.predict(noise) - - # Train the discriminator (real classified as ones and generated as zeros) - d_loss_real = self.discriminator.train_on_batch(imgs, valid) - d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - # Train the generator (wants discriminator to mistake images as real) - g_loss = self.combined.train_on_batch(noise, valid) - - # Plot the progress - print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % - (epoch, d_loss[0], 100*d_loss[1], g_loss)) - - # If at save interval => save generated image samples - if epoch % save_interval == 0: - self.save_imgs(epoch) - - def save_imgs(self, epoch): - r, c = 5, 5 - noise = np.random.normal(0, 1, (r * c, self.latent_dim)) - gen_imgs = self.generator.predict(noise) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/mnist_%d.png" % epoch) - plt.close() - - -if __name__ == '__main__': - dcgan = DCGAN() - dcgan.train(epochs=4000, batch_size=32, save_interval=50) -import scipy -from glob import glob -import numpy as np - - -class DataLoader(): - def __init__(self, dataset_name, img_res=(128, 128)): - self.dataset_name = dataset_name - self.img_res = img_res - - def load_data(self, batch_size=1, is_testing=False): - data_type = "train" if not is_testing else "val" - path = glob('./datasets/%s/%s/*' % (self.dataset_name, data_type)) - - batch = np.random.choice(path, size=batch_size) - - imgs_A, imgs_B = [], [] - for img in batch: - img = self.imread(img) - h, w, _ = img.shape - half_w = int(w/2) - img_A = img[:, :half_w, :] - img_B = img[:, half_w:, :] - - img_A = scipy.misc.imresize(img_A, self.img_res) - img_B = scipy.misc.imresize(img_B, self.img_res) - - if not is_testing and np.random.random() > 0.5: - img_A = np.fliplr(img_A) - img_B = np.fliplr(img_B) - - imgs_A.append(img_A) - imgs_B.append(img_B) - - imgs_A = np.array(imgs_A)/127.5 - 1. - imgs_B = np.array(imgs_B)/127.5 - 1. - - return imgs_A, imgs_B - - def load_batch(self, batch_size=1, is_testing=False): - data_type = "train" if not is_testing else "val" - path = glob('./datasets/%s/%s/*' % (self.dataset_name, data_type)) - - self.n_batches = int(len(path) / batch_size) - - for i in range(self.n_batches-1): - batch = path[i*batch_size:(i+1)*batch_size] - imgs_A, imgs_B = [], [] - for img in batch: - img = self.imread(img) - h, w, _ = img.shape - half_w = int(w/2) - img_A = img[:, :half_w, :] - img_B = img[:, half_w:, :] - - img_A = scipy.misc.imresize(img_A, self.img_res) - img_B = scipy.misc.imresize(img_B, self.img_res) - - if not is_testing and np.random.random() > 0.5: - img_A = np.fliplr(img_A) - img_B = np.fliplr(img_B) - - imgs_A.append(img_A) - imgs_B.append(img_B) - - imgs_A = np.array(imgs_A)/127.5 - 1. - imgs_B = np.array(imgs_B)/127.5 - 1. - - yield imgs_A, imgs_B - - def load_img(self, path): - img = self.imread(path) - img = scipy.misc.imresize(img, self.img_res) - img = img/127.5 - 1. - return img[np.newaxis, :, :, :] - - def imread(self, path): - return scipy.misc.imread(path, mode='RGB').astype(np.float) -from __future__ import print_function, division -import scipy - -from keras.datasets import mnist -from keras_contrib.layers.normalization.instancenormalization import InstanceNormalization -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Concatenate -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -import datetime -import matplotlib.pyplot as plt -import sys -from data_loader import DataLoader -import numpy as np -import os - - -class DiscoGAN(): - def __init__(self): - # Input shape - self.img_rows = 128 - self.img_cols = 128 - self.channels = 3 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - - # Configure data loader - self.dataset_name = 'edges2shoes' - self.data_loader = DataLoader(dataset_name=self.dataset_name, - img_res=(self.img_rows, self.img_cols)) - - # Calculate output shape of D (PatchGAN) - patch = int(self.img_rows / 2**4) - self.disc_patch = (patch, patch, 1) - - # Number of filters in the first layer of G and D - self.gf = 64 - self.df = 64 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminators - self.d_A = self.build_discriminator() - self.d_B = self.build_discriminator() - self.d_A.compile(loss='mse', - optimizer=optimizer, - metrics=['accuracy']) - self.d_B.compile(loss='mse', - optimizer=optimizer, - metrics=['accuracy']) - - # ------------------------- - # Construct Computational - # Graph of Generators - # ------------------------- - - # Build the generators - self.g_AB = self.build_generator() - self.g_BA = self.build_generator() - - # Input images from both domains - img_A = Input(shape=self.img_shape) - img_B = Input(shape=self.img_shape) - - # Translate images to the other domain - fake_B = self.g_AB(img_A) - fake_A = self.g_BA(img_B) - # Translate images back to original domain - reconstr_A = self.g_BA(fake_B) - reconstr_B = self.g_AB(fake_A) - - # For the combined model we will only train the generators - self.d_A.trainable = False - self.d_B.trainable = False - - # Discriminators determines validity of translated images - valid_A = self.d_A(fake_A) - valid_B = self.d_B(fake_B) - - # Objectives - # + Adversarial: Fool domain discriminators - # + Translation: Minimize MAE between e.g. fake B and true B - # + Cycle-consistency: Minimize MAE between reconstructed images and original - self.combined = Model(inputs=[img_A, img_B], - outputs=[valid_A, valid_B, - fake_B, fake_A, - reconstr_A, reconstr_B]) - self.combined.compile(loss=['mse', 'mse', - 'mae', 'mae', - 'mae', 'mae'], - optimizer=optimizer) - - def build_generator(self): - """U-Net Generator""" - - def conv2d(layer_input, filters, f_size=4, normalize=True): - """Layers used during downsampling""" - d = Conv2D(filters, kernel_size=f_size, - strides=2, padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - if normalize: - d = InstanceNormalization()(d) - return d - - def deconv2d(layer_input, skip_input, filters, f_size=4, dropout_rate=0): - """Layers used during upsampling""" - u = UpSampling2D(size=2)(layer_input) - u = Conv2D(filters, kernel_size=f_size, strides=1, - padding='same', activation='relu')(u) - if dropout_rate: - u = Dropout(dropout_rate)(u) - u = InstanceNormalization()(u) - u = Concatenate()([u, skip_input]) - return u - - # Image input - d0 = Input(shape=self.img_shape) - - # Downsampling - d1 = conv2d(d0, self.gf, normalize=False) - d2 = conv2d(d1, self.gf*2) - d3 = conv2d(d2, self.gf*4) - d4 = conv2d(d3, self.gf*8) - d5 = conv2d(d4, self.gf*8) - d6 = conv2d(d5, self.gf*8) - d7 = conv2d(d6, self.gf*8) - - # Upsampling - u1 = deconv2d(d7, d6, self.gf*8) - u2 = deconv2d(u1, d5, self.gf*8) - u3 = deconv2d(u2, d4, self.gf*8) - u4 = deconv2d(u3, d3, self.gf*4) - u5 = deconv2d(u4, d2, self.gf*2) - u6 = deconv2d(u5, d1, self.gf) - - u7 = UpSampling2D(size=2)(u6) - output_img = Conv2D(self.channels, kernel_size=4, strides=1, - padding='same', activation='tanh')(u7) - - return Model(d0, output_img) - - def build_discriminator(self): - - def d_layer(layer_input, filters, f_size=4, normalization=True): - """Discriminator layer""" - d = Conv2D(filters, kernel_size=f_size, - strides=2, padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - if normalization: - d = InstanceNormalization()(d) - return d - - img = Input(shape=self.img_shape) - - d1 = d_layer(img, self.df, normalization=False) - d2 = d_layer(d1, self.df*2) - d3 = d_layer(d2, self.df*4) - d4 = d_layer(d3, self.df*8) - - validity = Conv2D(1, kernel_size=4, strides=1, padding='same')(d4) - - return Model(img, validity) - - def train(self, epochs, batch_size=128, sample_interval=50): - - start_time = datetime.datetime.now() - - # Adversarial loss ground truths - valid = np.ones((batch_size,) + self.disc_patch) - fake = np.zeros((batch_size,) + self.disc_patch) - - for epoch in range(epochs): - - for batch_i, (imgs_A, imgs_B) in enumerate(self.data_loader.load_batch(batch_size)): - - # ---------------------- - # Train Discriminators - # ---------------------- - - # Translate images to opposite domain - fake_B = self.g_AB.predict(imgs_A) - fake_A = self.g_BA.predict(imgs_B) - - # Train the discriminators (original images = real / translated = Fake) - dA_loss_real = self.d_A.train_on_batch(imgs_A, valid) - dA_loss_fake = self.d_A.train_on_batch(fake_A, fake) - dA_loss = 0.5 * np.add(dA_loss_real, dA_loss_fake) - - dB_loss_real = self.d_B.train_on_batch(imgs_B, valid) - dB_loss_fake = self.d_B.train_on_batch(fake_B, fake) - dB_loss = 0.5 * np.add(dB_loss_real, dB_loss_fake) - - # Total disciminator loss - d_loss = 0.5 * np.add(dA_loss, dB_loss) - - # ------------------ - # Train Generators - # ------------------ - - # Train the generators - g_loss = self.combined.train_on_batch([imgs_A, imgs_B], [valid, valid, - imgs_B, imgs_A, - imgs_A, imgs_B]) - - elapsed_time = datetime.datetime.now() - start_time - # Plot the progress - print("[%d] [%d/%d] time: %s, [d_loss: %f, g_loss: %f]" % (epoch, batch_i, - self.data_loader.n_batches, - elapsed_time, - d_loss[0], g_loss[0])) - - # If at save interval => save generated image samples - if batch_i % sample_interval == 0: - self.sample_images(epoch, batch_i) - - def sample_images(self, epoch, batch_i): - os.makedirs('images/%s' % self.dataset_name, exist_ok=True) - r, c = 2, 3 - - imgs_A, imgs_B = self.data_loader.load_data( - batch_size=1, is_testing=True) - - # Translate images to the other domain - fake_B = self.g_AB.predict(imgs_A) - fake_A = self.g_BA.predict(imgs_B) - # Translate back to original domain - reconstr_A = self.g_BA.predict(fake_B) - reconstr_B = self.g_AB.predict(fake_A) - - gen_imgs = np.concatenate( - [imgs_A, fake_B, reconstr_A, imgs_B, fake_A, reconstr_B]) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - titles = ['Original', 'Translated', 'Reconstructed'] - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt]) - axs[i, j].set_title(titles[j]) - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/%s/%d_%d.png" % - (self.dataset_name, epoch, batch_i)) - plt.close() - - -if __name__ == '__main__': - gan = DiscoGAN() - gan.train(epochs=20, batch_size=1, sample_interval=200) -from __future__ import print_function, division -import scipy - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Concatenate -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import RMSprop, Adam -from keras.utils import to_categorical -import keras.backend as K - -import matplotlib.pyplot as plt - -import sys - -import numpy as np - - -class DUALGAN(): - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_dim = self.img_rows*self.img_cols - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminators - self.D_A = self.build_discriminator() - self.D_A.compile(loss=self.wasserstein_loss, - optimizer=optimizer, - metrics=['accuracy']) - self.D_B = self.build_discriminator() - self.D_B.compile(loss=self.wasserstein_loss, - optimizer=optimizer, - metrics=['accuracy']) - - # ------------------------- - # Construct Computational - # Graph of Generators - # ------------------------- - - # Build the generators - self.G_AB = self.build_generator() - self.G_BA = self.build_generator() - - # For the combined model we will only train the generators - self.D_A.trainable = False - self.D_B.trainable = False - - # The generator takes images from their respective domains as inputs - imgs_A = Input(shape=(self.img_dim,)) - imgs_B = Input(shape=(self.img_dim,)) - - # Generators translates the images to the opposite domain - fake_B = self.G_AB(imgs_A) - fake_A = self.G_BA(imgs_B) - - # The discriminators determines validity of translated images - valid_A = self.D_A(fake_A) - valid_B = self.D_B(fake_B) - - # Generators translate the images back to their original domain - recov_A = self.G_BA(fake_B) - recov_B = self.G_AB(fake_A) - - # The combined model (stacked generators and discriminators) - self.combined = Model(inputs=[imgs_A, imgs_B], outputs=[ - valid_A, valid_B, recov_A, recov_B]) - self.combined.compile(loss=[self.wasserstein_loss, self.wasserstein_loss, 'mae', 'mae'], - optimizer=optimizer, - loss_weights=[1, 1, 100, 100]) - - def build_generator(self): - - X = Input(shape=(self.img_dim,)) - - model = Sequential() - model.add(Dense(256, input_dim=self.img_dim)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dropout(0.4)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dropout(0.4)) - model.add(Dense(1024)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dropout(0.4)) - model.add(Dense(self.img_dim, activation='tanh')) - - X_translated = model(X) - - return Model(X, X_translated) - - def build_discriminator(self): - - img = Input(shape=(self.img_dim,)) - - model = Sequential() - model.add(Dense(512, input_dim=self.img_dim)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(256)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(1)) - - validity = model(img) - - return Model(img, validity) - - def sample_generator_input(self, X, batch_size): - # Sample random batch of images from X - idx = np.random.randint(0, X.shape[0], batch_size) - return X[idx] - - def wasserstein_loss(self, y_true, y_pred): - return K.mean(y_true * y_pred) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, _), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - - # Domain A and B (rotated) - X_A = X_train[:int(X_train.shape[0]/2)] - X_B = scipy.ndimage.interpolation.rotate( - X_train[int(X_train.shape[0]/2):], 90, axes=(1, 2)) - - X_A = X_A.reshape(X_A.shape[0], self.img_dim) - X_B = X_B.reshape(X_B.shape[0], self.img_dim) - - clip_value = 0.01 - n_critic = 4 - - # Adversarial ground truths - valid = -np.ones((batch_size, 1)) - fake = np.ones((batch_size, 1)) - - for epoch in range(epochs): - - # Train the discriminator for n_critic iterations - for _ in range(n_critic): - - # ---------------------- - # Train Discriminators - # ---------------------- - - # Sample generator inputs - imgs_A = self.sample_generator_input(X_A, batch_size) - imgs_B = self.sample_generator_input(X_B, batch_size) - - # Translate images to their opposite domain - fake_B = self.G_AB.predict(imgs_A) - fake_A = self.G_BA.predict(imgs_B) - - # Train the discriminators - D_A_loss_real = self.D_A.train_on_batch(imgs_A, valid) - D_A_loss_fake = self.D_A.train_on_batch(fake_A, fake) - - D_B_loss_real = self.D_B.train_on_batch(imgs_B, valid) - D_B_loss_fake = self.D_B.train_on_batch(fake_B, fake) - - D_A_loss = 0.5 * np.add(D_A_loss_real, D_A_loss_fake) - D_B_loss = 0.5 * np.add(D_B_loss_real, D_B_loss_fake) - - # Clip discriminator weights - for d in [self.D_A, self.D_B]: - for l in d.layers: - weights = l.get_weights() - weights = [np.clip(w, -clip_value, clip_value) - for w in weights] - l.set_weights(weights) - - # ------------------ - # Train Generators - # ------------------ - - # Train the generators - g_loss = self.combined.train_on_batch( - [imgs_A, imgs_B], [valid, valid, imgs_A, imgs_B]) - - # Plot the progress - print("%d [D1 loss: %f] [D2 loss: %f] [G loss: %f]" - % (epoch, D_A_loss[0], D_B_loss[0], g_loss[0])) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.save_imgs(epoch, X_A, X_B) - - def save_imgs(self, epoch, X_A, X_B): - r, c = 4, 4 - - # Sample generator inputs - imgs_A = self.sample_generator_input(X_A, c) - imgs_B = self.sample_generator_input(X_B, c) - - # Images translated to their opposite domain - fake_B = self.G_AB.predict(imgs_A) - fake_A = self.G_BA.predict(imgs_B) - - gen_imgs = np.concatenate([imgs_A, fake_B, imgs_B, fake_A]) - gen_imgs = gen_imgs.reshape((r, c, self.img_rows, self.img_cols, 1)) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[i, j, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/mnist_%d.png" % epoch) - plt.close() - - -if __name__ == '__main__': - gan = DUALGAN() - gan.train(epochs=30000, batch_size=32, sample_interval=200) -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam - -import matplotlib.pyplot as plt - -import sys - -import numpy as np - - -class GAN(): - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.latent_dim = 100 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss='binary_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise as input and generates imgs - z = Input(shape=(self.latent_dim,)) - img = self.generator(z) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # The discriminator takes generated images as input and determines validity - validity = self.discriminator(img) - - # The combined model (stacked generator and discriminator) - # Trains the generator to fool the discriminator - self.combined = Model(z, validity) - self.combined.compile(loss='binary_crossentropy', optimizer=optimizer) - - def build_generator(self): - - model = Sequential() - - model.add(Dense(256, input_dim=self.latent_dim)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(1024)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(np.prod(self.img_shape), activation='tanh')) - model.add(Reshape(self.img_shape)) - - model.summary() - - noise = Input(shape=(self.latent_dim,)) - img = model(noise) - - return Model(noise, img) - - def build_discriminator(self): - - model = Sequential() - - model.add(Flatten(input_shape=self.img_shape)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(256)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(1, activation='sigmoid')) - model.summary() - - img = Input(shape=self.img_shape) - validity = model(img) - - return Model(img, validity) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, _), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = X_train / 127.5 - 1. - X_train = np.expand_dims(X_train, axis=3) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - - noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) - - # Generate a batch of new images - gen_imgs = self.generator.predict(noise) - - # Train the discriminator - d_loss_real = self.discriminator.train_on_batch(imgs, valid) - d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) - - # Train the generator (to have the discriminator label samples as valid) - g_loss = self.combined.train_on_batch(noise, valid) - - # Plot the progress - print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % - (epoch, d_loss[0], 100*d_loss[1], g_loss)) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 5, 5 - noise = np.random.normal(0, 1, (r * c, self.latent_dim)) - gen_imgs = self.generator.predict(noise) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/%d.png" % epoch) - plt.close() - - -if __name__ == '__main__': - gan = GAN() - gan.train(epochs=30000, batch_size=32, sample_interval=200) -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, concatenate -from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D, Lambda -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -from keras.utils import to_categorical -import keras.backend as K - -import matplotlib.pyplot as plt - -import numpy as np - - -class INFOGAN(): - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.num_classes = 10 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.latent_dim = 72 - - optimizer = Adam(0.0002, 0.5) - losses = ['binary_crossentropy', self.mutual_info_loss] - - # Build and the discriminator and recognition network - self.discriminator, self.auxilliary = self.build_disk_and_q_net() - - self.discriminator.compile(loss=['binary_crossentropy'], - optimizer=optimizer, - metrics=['accuracy']) - - # Build and compile the recognition network Q - self.auxilliary.compile(loss=[self.mutual_info_loss], - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise and the target label as input - # and generates the corresponding digit of that label - gen_input = Input(shape=(self.latent_dim,)) - img = self.generator(gen_input) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # The discriminator takes generated image as input and determines validity - valid = self.discriminator(img) - # The recognition network produces the label - target_label = self.auxilliary(img) - - # The combined model (stacked generator and discriminator) - self.combined = Model(gen_input, [valid, target_label]) - self.combined.compile(loss=losses, - optimizer=optimizer) - - def build_generator(self): - - model = Sequential() - - model.add(Dense(128 * 7 * 7, activation="relu", - input_dim=self.latent_dim)) - model.add(Reshape((7, 7, 128))) - model.add(BatchNormalization(momentum=0.8)) - model.add(UpSampling2D()) - model.add(Conv2D(128, kernel_size=3, padding="same")) - model.add(Activation("relu")) - model.add(BatchNormalization(momentum=0.8)) - model.add(UpSampling2D()) - model.add(Conv2D(64, kernel_size=3, padding="same")) - model.add(Activation("relu")) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(self.channels, kernel_size=3, padding='same')) - model.add(Activation("tanh")) - - gen_input = Input(shape=(self.latent_dim,)) - img = model(gen_input) - - model.summary() - - return Model(gen_input, img) - - def build_disk_and_q_net(self): - - img = Input(shape=self.img_shape) - - # Shared layers between discriminator and recognition network - model = Sequential() - model.add(Conv2D(64, kernel_size=3, strides=2, - input_shape=self.img_shape, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(128, kernel_size=3, strides=2, padding="same")) - model.add(ZeroPadding2D(padding=((0, 1), (0, 1)))) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(256, kernel_size=3, strides=2, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(512, kernel_size=3, strides=2, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Flatten()) - - img_embedding = model(img) - - # Discriminator - validity = Dense(1, activation='sigmoid')(img_embedding) - - # Recognition - q_net = Dense(128, activation='relu')(img_embedding) - label = Dense(self.num_classes, activation='softmax')(q_net) - - # Return discriminator and recognition network - return Model(img, validity), Model(img, label) - - def mutual_info_loss(self, c, c_given_x): - """The mutual information metric we aim to minimize""" - eps = 1e-8 - conditional_entropy = K.mean( - - K.sum(K.log(c_given_x + eps) * c, axis=1)) - entropy = K.mean(- K.sum(K.log(c + eps) * c, axis=1)) - - return conditional_entropy + entropy - - def sample_generator_input(self, batch_size): - # Generator inputs - sampled_noise = np.random.normal(0, 1, (batch_size, 62)) - sampled_labels = np.random.randint( - 0, self.num_classes, batch_size).reshape(-1, 1) - sampled_labels = to_categorical( - sampled_labels, num_classes=self.num_classes) - - return sampled_noise, sampled_labels - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, y_train), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - y_train = y_train.reshape(-1, 1) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random half batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - - # Sample noise and categorical labels - sampled_noise, sampled_labels = self.sample_generator_input( - batch_size) - gen_input = np.concatenate((sampled_noise, sampled_labels), axis=1) - - # Generate a half batch of new images - gen_imgs = self.generator.predict(gen_input) - - # Train on real and generated data - d_loss_real = self.discriminator.train_on_batch(imgs, valid) - d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake) - - # Avg. loss - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator and Q-network - # --------------------- - - g_loss = self.combined.train_on_batch( - gen_input, [valid, sampled_labels]) - - # Plot the progress - print("%d [D loss: %.2f, acc.: %.2f%%] [Q loss: %.2f] [G loss: %.2f]" % ( - epoch, d_loss[0], 100*d_loss[1], g_loss[1], g_loss[2])) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 10, 10 - - fig, axs = plt.subplots(r, c) - for i in range(c): - sampled_noise, _ = self.sample_generator_input(c) - label = to_categorical( - np.full(fill_value=i, shape=(r, 1)), num_classes=self.num_classes) - gen_input = np.concatenate((sampled_noise, label), axis=1) - gen_imgs = self.generator.predict(gen_input) - gen_imgs = 0.5 * gen_imgs + 0.5 - for j in range(r): - axs[j, i].imshow(gen_imgs[j, :, :, 0], cmap='gray') - axs[j, i].axis('off') - fig.savefig("images/%d.png" % epoch) - plt.close() - - def save_model(self): - - def save(model, model_name): - model_path = "saved_model/%s.json" % model_name - weights_path = "saved_model/%s_weights.hdf5" % model_name - options = {"file_arch": model_path, - "file_weight": weights_path} - json_string = model.to_json() - open(options['file_arch'], 'w').write(json_string) - model.save_weights(options['file_weight']) - - save(self.generator, "generator") - save(self.discriminator, "discriminator") - - -if __name__ == '__main__': - infogan = INFOGAN() - infogan.train(epochs=50000, batch_size=128, sample_interval=50) -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam - -import matplotlib.pyplot as plt - -import sys - -import numpy as np - - -class LSGAN(): - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.latent_dim = 100 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss='mse', - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise as input and generated imgs - z = Input(shape=(self.latent_dim,)) - img = self.generator(z) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # The valid takes generated images as input and determines validity - valid = self.discriminator(img) - - # The combined model (stacked generator and discriminator) - # Trains generator to fool discriminator - self.combined = Model(z, valid) - # (!!!) Optimize w.r.t. MSE loss instead of crossentropy - self.combined.compile(loss='mse', optimizer=optimizer) - - def build_generator(self): - - model = Sequential() - - model.add(Dense(256, input_dim=self.latent_dim)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(1024)) - model.add(LeakyReLU(alpha=0.2)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Dense(np.prod(self.img_shape), activation='tanh')) - model.add(Reshape(self.img_shape)) - - model.summary() - - noise = Input(shape=(self.latent_dim,)) - img = model(noise) - - return Model(noise, img) - - def build_discriminator(self): - - model = Sequential() - - model.add(Flatten(input_shape=self.img_shape)) - model.add(Dense(512)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dense(256)) - model.add(LeakyReLU(alpha=0.2)) - # (!!!) No softmax - model.add(Dense(1)) - model.summary() - - img = Input(shape=self.img_shape) - validity = model(img) - - return Model(img, validity) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, _), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - - # Sample noise as generator input - noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) - - # Generate a batch of new images - gen_imgs = self.generator.predict(noise) - - # Train the discriminator - d_loss_real = self.discriminator.train_on_batch(imgs, valid) - d_loss_fake = self.discriminator.train_on_batch(gen_imgs, fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - g_loss = self.combined.train_on_batch(noise, valid) - - # Plot the progress - print("%d [D loss: %f, acc.: %.2f%%] [G loss: %f]" % - (epoch, d_loss[0], 100*d_loss[1], g_loss)) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 5, 5 - noise = np.random.normal(0, 1, (r * c, self.latent_dim)) - gen_imgs = self.generator.predict(noise) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/mnist_%d.png" % epoch) - plt.close() - - -if __name__ == '__main__': - gan = LSGAN() - gan.train(epochs=30000, batch_size=32, sample_interval=200) -import scipy -from glob import glob -import numpy as np -import matplotlib.pyplot as plt - - -class DataLoader(): - def __init__(self, dataset_name, img_res=(128, 128)): - self.dataset_name = dataset_name - self.img_res = img_res - - def load_data(self, batch_size=1, is_testing=False): - data_type = "train" if not is_testing else "test" - path = glob('./datasets/%s/%s/*' % (self.dataset_name, data_type)) - - batch_images = np.random.choice(path, size=batch_size) - - imgs_A = [] - imgs_B = [] - for img_path in batch_images: - img = self.imread(img_path) - - h, w, _ = img.shape - _w = int(w/2) - img_A, img_B = img[:, :_w, :], img[:, _w:, :] - - img_A = scipy.misc.imresize(img_A, self.img_res) - img_B = scipy.misc.imresize(img_B, self.img_res) - - # If training => do random flip - if not is_testing and np.random.random() < 0.5: - img_A = np.fliplr(img_A) - img_B = np.fliplr(img_B) - - imgs_A.append(img_A) - imgs_B.append(img_B) - - imgs_A = np.array(imgs_A)/127.5 - 1. - imgs_B = np.array(imgs_B)/127.5 - 1. - - return imgs_A, imgs_B - - def load_batch(self, batch_size=1, is_testing=False): - data_type = "train" if not is_testing else "val" - path = glob('./datasets/%s/%s/*' % (self.dataset_name, data_type)) - - self.n_batches = int(len(path) / batch_size) - - for i in range(self.n_batches-1): - batch = path[i*batch_size:(i+1)*batch_size] - imgs_A, imgs_B = [], [] - for img in batch: - img = self.imread(img) - h, w, _ = img.shape - half_w = int(w/2) - img_A = img[:, :half_w, :] - img_B = img[:, half_w:, :] - - img_A = scipy.misc.imresize(img_A, self.img_res) - img_B = scipy.misc.imresize(img_B, self.img_res) - - if not is_testing and np.random.random() > 0.5: - img_A = np.fliplr(img_A) - img_B = np.fliplr(img_B) - - imgs_A.append(img_A) - imgs_B.append(img_B) - - imgs_A = np.array(imgs_A)/127.5 - 1. - imgs_B = np.array(imgs_B)/127.5 - 1. - - yield imgs_A, imgs_B - - def imread(self, path): - return scipy.misc.imread(path, mode='RGB').astype(np.float) -from __future__ import print_function, division -import scipy - -from keras.datasets import mnist -from keras_contrib.layers.normalization.instancenormalization import InstanceNormalization -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Concatenate -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -import datetime -import matplotlib.pyplot as plt -import sys -from data_loader import DataLoader -import numpy as np -import os - - -class Pix2Pix(): - def __init__(self): - # Input shape - self.img_rows = 256 - self.img_cols = 256 - self.channels = 3 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - - # Configure data loader - self.dataset_name = 'facades' - self.data_loader = DataLoader(dataset_name=self.dataset_name, - img_res=(self.img_rows, self.img_cols)) - - # Calculate output shape of D (PatchGAN) - patch = int(self.img_rows / 2**4) - self.disc_patch = (patch, patch, 1) - - # Number of filters in the first layer of G and D - self.gf = 64 - self.df = 64 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss='mse', - optimizer=optimizer, - metrics=['accuracy']) - - # ------------------------- - # Construct Computational - # Graph of Generator - # ------------------------- - - # Build the generator - self.generator = self.build_generator() - - # Input images and their conditioning images - img_A = Input(shape=self.img_shape) - img_B = Input(shape=self.img_shape) - - # By conditioning on B generate a fake version of A - fake_A = self.generator(img_B) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # Discriminators determines validity of translated images / condition pairs - valid = self.discriminator([fake_A, img_B]) - - self.combined = Model(inputs=[img_A, img_B], outputs=[valid, fake_A]) - self.combined.compile(loss=['mse', 'mae'], - loss_weights=[1, 100], - optimizer=optimizer) - - def build_generator(self): - """U-Net Generator""" - - def conv2d(layer_input, filters, f_size=4, bn=True): - """Layers used during downsampling""" - d = Conv2D(filters, kernel_size=f_size, - strides=2, padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - if bn: - d = BatchNormalization(momentum=0.8)(d) - return d - - def deconv2d(layer_input, skip_input, filters, f_size=4, dropout_rate=0): - """Layers used during upsampling""" - u = UpSampling2D(size=2)(layer_input) - u = Conv2D(filters, kernel_size=f_size, strides=1, - padding='same', activation='relu')(u) - if dropout_rate: - u = Dropout(dropout_rate)(u) - u = BatchNormalization(momentum=0.8)(u) - u = Concatenate()([u, skip_input]) - return u - - # Image input - d0 = Input(shape=self.img_shape) - - # Downsampling - d1 = conv2d(d0, self.gf, bn=False) - d2 = conv2d(d1, self.gf*2) - d3 = conv2d(d2, self.gf*4) - d4 = conv2d(d3, self.gf*8) - d5 = conv2d(d4, self.gf*8) - d6 = conv2d(d5, self.gf*8) - d7 = conv2d(d6, self.gf*8) - - # Upsampling - u1 = deconv2d(d7, d6, self.gf*8) - u2 = deconv2d(u1, d5, self.gf*8) - u3 = deconv2d(u2, d4, self.gf*8) - u4 = deconv2d(u3, d3, self.gf*4) - u5 = deconv2d(u4, d2, self.gf*2) - u6 = deconv2d(u5, d1, self.gf) - - u7 = UpSampling2D(size=2)(u6) - output_img = Conv2D(self.channels, kernel_size=4, - strides=1, padding='same', activation='tanh')(u7) - - return Model(d0, output_img) - - def build_discriminator(self): - - def d_layer(layer_input, filters, f_size=4, bn=True): - """Discriminator layer""" - d = Conv2D(filters, kernel_size=f_size, - strides=2, padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - if bn: - d = BatchNormalization(momentum=0.8)(d) - return d - - img_A = Input(shape=self.img_shape) - img_B = Input(shape=self.img_shape) - - # Concatenate image and conditioning image by channels to produce input - combined_imgs = Concatenate(axis=-1)([img_A, img_B]) - - d1 = d_layer(combined_imgs, self.df, bn=False) - d2 = d_layer(d1, self.df*2) - d3 = d_layer(d2, self.df*4) - d4 = d_layer(d3, self.df*8) - - validity = Conv2D(1, kernel_size=4, strides=1, padding='same')(d4) - - return Model([img_A, img_B], validity) - - def train(self, epochs, batch_size=1, sample_interval=50): - - start_time = datetime.datetime.now() - - # Adversarial loss ground truths - valid = np.ones((batch_size,) + self.disc_patch) - fake = np.zeros((batch_size,) + self.disc_patch) - - for epoch in range(epochs): - for batch_i, (imgs_A, imgs_B) in enumerate(self.data_loader.load_batch(batch_size)): - - # --------------------- - # Train Discriminator - # --------------------- - - # Condition on B and generate a translated version - fake_A = self.generator.predict(imgs_B) - - # Train the discriminators (original images = real / generated = Fake) - d_loss_real = self.discriminator.train_on_batch( - [imgs_A, imgs_B], valid) - d_loss_fake = self.discriminator.train_on_batch( - [fake_A, imgs_B], fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # ----------------- - # Train Generator - # ----------------- - - # Train the generators - g_loss = self.combined.train_on_batch( - [imgs_A, imgs_B], [valid, imgs_A]) - - elapsed_time = datetime.datetime.now() - start_time - # Plot the progress - print("[Epoch %d/%d] [Batch %d/%d] [D loss: %f, acc: %3d%%] [G loss: %f] time: %s" % (epoch, epochs, - batch_i, self.data_loader.n_batches, - d_loss[0], 100 * - d_loss[1], - g_loss[0], - elapsed_time)) - - # If at save interval => save generated image samples - if batch_i % sample_interval == 0: - self.sample_images(epoch, batch_i) - - def sample_images(self, epoch, batch_i): - os.makedirs('images/%s' % self.dataset_name, exist_ok=True) - r, c = 3, 3 - - imgs_A, imgs_B = self.data_loader.load_data( - batch_size=3, is_testing=True) - fake_A = self.generator.predict(imgs_B) - - gen_imgs = np.concatenate([imgs_B, fake_A, imgs_A]) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - titles = ['Condition', 'Generated', 'Original'] - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt]) - axs[i, j].set_title(titles[i]) - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/%s/%d_%d.png" % - (self.dataset_name, epoch, batch_i)) - plt.close() - - -if __name__ == '__main__': - gan = Pix2Pix() - gan.train(epochs=200, batch_size=1, sample_interval=200) -import scipy -from glob import glob -import numpy as np -from keras.datasets import mnist -from skimage.transform import resize as imresize -import pickle -import os -import urllib -import gzip - - -class DataLoader(): - """Loads images from MNIST (domain A) and MNIST-M (domain B)""" - - def __init__(self, img_res=(128, 128)): - self.img_res = img_res - - self.mnistm_url = 'https://github.com/VanushVaswani/keras_mnistm/releases/download/1.0/keras_mnistm.pkl.gz' - - self.setup_mnist(img_res) - self.setup_mnistm(img_res) - - def normalize(self, images): - return images.astype(np.float32) / 127.5 - 1. - - def setup_mnist(self, img_res): - - print("Setting up MNIST...") - - if not os.path.exists('datasets/mnist_x.npy'): - # Load the dataset - (mnist_X, mnist_y), (_, _) = mnist.load_data() - - # Normalize and rescale images - mnist_X = self.normalize(mnist_X) - mnist_X = np.array([imresize(x, img_res) for x in mnist_X]) - mnist_X = np.expand_dims(mnist_X, axis=-1) - mnist_X = np.repeat(mnist_X, 3, axis=-1) - - self.mnist_X, self.mnist_y = mnist_X, mnist_y - - # Save formatted images - np.save('datasets/mnist_x.npy', self.mnist_X) - np.save('datasets/mnist_y.npy', self.mnist_y) - else: - self.mnist_X = np.load('datasets/mnist_x.npy') - self.mnist_y = np.load('datasets/mnist_y.npy') - - print("+ Done.") - - def setup_mnistm(self, img_res): - - print("Setting up MNIST-M...") - - if not os.path.exists('datasets/mnistm_x.npy'): - - # Download the MNIST-M pkl file - filepath = 'datasets/keras_mnistm.pkl.gz' - if not os.path.exists(filepath.replace('.gz', '')): - print('+ Downloading ' + self.mnistm_url) - data = urllib.request.urlopen(self.mnistm_url) - with open(filepath, 'wb') as f: - f.write(data.read()) - with open(filepath.replace('.gz', ''), 'wb') as out_f, \ - gzip.GzipFile(filepath) as zip_f: - out_f.write(zip_f.read()) - os.unlink(filepath) - - # load MNIST-M images from pkl file - with open('datasets/keras_mnistm.pkl', "rb") as f: - data = pickle.load(f, encoding='bytes') - - # Normalize and rescale images - mnistm_X = np.array(data[b'train']) - mnistm_X = self.normalize(mnistm_X) - mnistm_X = np.array([imresize(x, img_res) for x in mnistm_X]) - - self.mnistm_X, self.mnistm_y = mnistm_X, self.mnist_y.copy() - - # Save formatted images - np.save('datasets/mnistm_x.npy', self.mnistm_X) - np.save('datasets/mnistm_y.npy', self.mnistm_y) - else: - self.mnistm_X = np.load('datasets/mnistm_x.npy') - self.mnistm_y = np.load('datasets/mnistm_y.npy') - - print("+ Done.") - - def load_data(self, domain, batch_size=1): - - X = self.mnist_X if domain == 'A' else self.mnistm_X - y = self.mnist_y if domain == 'A' else self.mnistm_y - - idx = np.random.choice(list(range(len(X))), size=batch_size) - - return X[idx], y[idx] -from __future__ import print_function, division -import scipy - -from keras.datasets import mnist -from keras_contrib.layers.normalization.instancenormalization import InstanceNormalization -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Concatenate -from keras.layers import BatchNormalization, Activation, ZeroPadding2D, Add -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -from keras.utils import to_categorical -import datetime -import matplotlib.pyplot as plt -import sys -from data_loader import DataLoader -import numpy as np -import os - - -class PixelDA(): - def __init__(self): - # Input shape - self.img_rows = 32 - self.img_cols = 32 - self.channels = 3 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.num_classes = 10 - - # Configure MNIST and MNIST-M data loader - self.data_loader = DataLoader(img_res=(self.img_rows, self.img_cols)) - - # Loss weights - lambda_adv = 10 - lambda_clf = 1 - - # Calculate output shape of D (PatchGAN) - patch = int(self.img_rows / 2**4) - self.disc_patch = (patch, patch, 1) - - # Number of residual blocks in the generator - self.residual_blocks = 6 - - optimizer = Adam(0.0002, 0.5) - - # Number of filters in first layer of discriminator and classifier - self.df = 64 - self.cf = 64 - - # Build and compile the discriminators - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss='mse', - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # Build the task (classification) network - self.clf = self.build_classifier() - - # Input images from both domains - img_A = Input(shape=self.img_shape) - img_B = Input(shape=self.img_shape) - - # Translate images from domain A to domain B - fake_B = self.generator(img_A) - - # Classify the translated image - class_pred = self.clf(fake_B) - - # For the combined model we will only train the generator and classifier - self.discriminator.trainable = False - - # Discriminator determines validity of translated images - valid = self.discriminator(fake_B) - - self.combined = Model(img_A, [valid, class_pred]) - self.combined.compile(loss=['mse', 'categorical_crossentropy'], - loss_weights=[lambda_adv, lambda_clf], - optimizer=optimizer, - metrics=['accuracy']) - - def build_generator(self): - """Resnet Generator""" - - def residual_block(layer_input): - """Residual block described in paper""" - d = Conv2D(64, kernel_size=3, strides=1, - padding='same')(layer_input) - d = BatchNormalization(momentum=0.8)(d) - d = Activation('relu')(d) - d = Conv2D(64, kernel_size=3, strides=1, padding='same')(d) - d = BatchNormalization(momentum=0.8)(d) - d = Add()([d, layer_input]) - return d - - # Image input - img = Input(shape=self.img_shape) - - l1 = Conv2D(64, kernel_size=3, padding='same', activation='relu')(img) - - # Propogate signal through residual blocks - r = residual_block(l1) - for _ in range(self.residual_blocks - 1): - r = residual_block(r) - - output_img = Conv2D(self.channels, kernel_size=3, - padding='same', activation='tanh')(r) - - return Model(img, output_img) - - def build_discriminator(self): - - def d_layer(layer_input, filters, f_size=4, normalization=True): - """Discriminator layer""" - d = Conv2D(filters, kernel_size=f_size, - strides=2, padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - if normalization: - d = InstanceNormalization()(d) - return d - - img = Input(shape=self.img_shape) - - d1 = d_layer(img, self.df, normalization=False) - d2 = d_layer(d1, self.df*2) - d3 = d_layer(d2, self.df*4) - d4 = d_layer(d3, self.df*8) - - validity = Conv2D(1, kernel_size=4, strides=1, padding='same')(d4) - - return Model(img, validity) - - def build_classifier(self): - - def clf_layer(layer_input, filters, f_size=4, normalization=True): - """Classifier layer""" - d = Conv2D(filters, kernel_size=f_size, - strides=2, padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - if normalization: - d = InstanceNormalization()(d) - return d - - img = Input(shape=self.img_shape) - - c1 = clf_layer(img, self.cf, normalization=False) - c2 = clf_layer(c1, self.cf*2) - c3 = clf_layer(c2, self.cf*4) - c4 = clf_layer(c3, self.cf*8) - c5 = clf_layer(c4, self.cf*8) - - class_pred = Dense( - self.num_classes, activation='softmax')(Flatten()(c5)) - - return Model(img, class_pred) - - def train(self, epochs, batch_size=128, sample_interval=50): - - half_batch = int(batch_size / 2) - - # Classification accuracy on 100 last batches of domain B - test_accs = [] - - # Adversarial ground truths - valid = np.ones((batch_size, *self.disc_patch)) - fake = np.zeros((batch_size, *self.disc_patch)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - imgs_A, labels_A = self.data_loader.load_data( - domain="A", batch_size=batch_size) - imgs_B, labels_B = self.data_loader.load_data( - domain="B", batch_size=batch_size) - - # Translate images from domain A to domain B - fake_B = self.generator.predict(imgs_A) - - # Train the discriminators (original images = real / translated = Fake) - d_loss_real = self.discriminator.train_on_batch(imgs_B, valid) - d_loss_fake = self.discriminator.train_on_batch(fake_B, fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # -------------------------------- - # Train Generator and Classifier - # -------------------------------- - - # One-hot encoding of labels - labels_A = to_categorical(labels_A, num_classes=self.num_classes) - - # Train the generator and classifier - g_loss = self.combined.train_on_batch(imgs_A, [valid, labels_A]) - - # ----------------------- - # Evaluation (domain B) - # ----------------------- - - pred_B = self.clf.predict(imgs_B) - test_acc = np.mean(np.argmax(pred_B, axis=1) == labels_B) - - # Add accuracy to list of last 100 accuracy measurements - test_accs.append(test_acc) - if len(test_accs) > 100: - test_accs.pop(0) - - # Plot the progress - print("%d : [D - loss: %.5f, acc: %3d%%], [G - loss: %.5f], [clf - loss: %.5f, acc: %3d%%, test_acc: %3d%% (%3d%%)]" % - (epoch, d_loss[0], 100*float(d_loss[1]), - g_loss[1], g_loss[2], 100*float(g_loss[-1]), - 100*float(test_acc), 100*float(np.mean(test_accs)))) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 2, 5 - - imgs_A, _ = self.data_loader.load_data(domain="A", batch_size=5) - - # Translate images to the other domain - fake_B = self.generator.predict(imgs_A) - - gen_imgs = np.concatenate([imgs_A, fake_B]) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - #titles = ['Original', 'Translated'] - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt]) - #axs[i, j].set_title(titles[i]) - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/%d.png" % (epoch)) - plt.close() - - -if __name__ == '__main__': - gan = PixelDA() - gan.train(epochs=30000, batch_size=32, sample_interval=500) -from __future__ import print_function, division -import scipy - -import datetime -import matplotlib.pyplot as plt -import sys -from data_loader import DataLoader -import numpy as np -import os - - -# Configure MNIST and MNIST-M data loader -data_loader = DataLoader(img_res=(32, 32)) - -mnist, _ = data_loader.load_data(domain="A", batch_size=25) -mnistm, _ = data_loader.load_data(domain="B", batch_size=25) - -r, c = 5, 5 - -for img_i, imgs in enumerate([mnist, mnistm]): - - #titles = ['Original', 'Translated'] - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(imgs[cnt]) - #axs[i, j].set_title(titles[i]) - axs[i, j].axis('off') - cnt += 1 - fig.savefig("%d.png" % (img_i)) - plt.close() -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, multiply, GaussianNoise -from keras.layers import BatchNormalization, Activation, Embedding, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import Adam -from keras import losses -from keras.utils import to_categorical -import keras.backend as K - -import matplotlib.pyplot as plt - -import numpy as np - - -class SGAN: - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.num_classes = 10 - self.latent_dim = 100 - - optimizer = Adam(0.0002, 0.5) - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile( - loss=['binary_crossentropy', 'categorical_crossentropy'], - loss_weights=[0.5, 0.5], - optimizer=optimizer, - metrics=['accuracy'] - ) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise as input and generates imgs - noise = Input(shape=(100,)) - img = self.generator(noise) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # The valid takes generated images as input and determines validity - valid, _ = self.discriminator(img) - - # The combined model (stacked generator and discriminator) - # Trains generator to fool discriminator - self.combined = Model(noise, valid) - self.combined.compile( - loss=['binary_crossentropy'], optimizer=optimizer) - - def build_generator(self): - - model = Sequential() - - model.add(Dense(128 * 7 * 7, activation="relu", - input_dim=self.latent_dim)) - model.add(Reshape((7, 7, 128))) - model.add(BatchNormalization(momentum=0.8)) - model.add(UpSampling2D()) - model.add(Conv2D(128, kernel_size=3, padding="same")) - model.add(Activation("relu")) - model.add(BatchNormalization(momentum=0.8)) - model.add(UpSampling2D()) - model.add(Conv2D(64, kernel_size=3, padding="same")) - model.add(Activation("relu")) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(1, kernel_size=3, padding="same")) - model.add(Activation("tanh")) - - model.summary() - - noise = Input(shape=(self.latent_dim,)) - img = model(noise) - - return Model(noise, img) - - def build_discriminator(self): - - model = Sequential() - - model.add(Conv2D(32, kernel_size=3, strides=2, - input_shape=self.img_shape, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(64, kernel_size=3, strides=2, padding="same")) - model.add(ZeroPadding2D(padding=((0, 1), (0, 1)))) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(128, kernel_size=3, strides=2, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(BatchNormalization(momentum=0.8)) - model.add(Conv2D(256, kernel_size=3, strides=1, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Flatten()) - - model.summary() - - img = Input(shape=self.img_shape) - - features = model(img) - valid = Dense(1, activation="sigmoid")(features) - label = Dense(self.num_classes+1, activation="softmax")(features) - - return Model(img, [valid, label]) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, y_train), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - y_train = y_train.reshape(-1, 1) - - # Class weights: - # To balance the difference in occurences of digit class labels. - # 50% of labels that the discriminator trains on are 'fake'. - # Weight = 1 / frequency - half_batch = batch_size // 2 - cw1 = {0: 1, 1: 1} - cw2 = {i: self.num_classes / - half_batch for i in range(self.num_classes)} - cw2[self.num_classes] = 1 / half_batch - - # Adversarial ground truths - valid = np.ones((batch_size, 1)) - fake = np.zeros((batch_size, 1)) - - for epoch in range(epochs): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - - # Sample noise and generate a batch of new images - noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) - gen_imgs = self.generator.predict(noise) - - # One-hot encoding of labels - labels = to_categorical( - y_train[idx], num_classes=self.num_classes+1) - fake_labels = to_categorical( - np.full((batch_size, 1), self.num_classes), num_classes=self.num_classes+1) - - # Train the discriminator - d_loss_real = self.discriminator.train_on_batch( - imgs, [valid, labels], class_weight=[cw1, cw2]) - d_loss_fake = self.discriminator.train_on_batch( - gen_imgs, [fake, fake_labels], class_weight=[cw1, cw2]) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # --------------------- - # Train Generator - # --------------------- - - g_loss = self.combined.train_on_batch( - noise, valid, class_weight=[cw1, cw2]) - - # Plot the progress - print("%d [D loss: %f, acc: %.2f%%, op_acc: %.2f%%] [G loss: %f]" % ( - epoch, d_loss[0], 100*d_loss[3], 100*d_loss[4], g_loss)) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 5, 5 - noise = np.random.normal(0, 1, (r * c, self.latent_dim)) - gen_imgs = self.generator.predict(noise) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/mnist_%d.png" % epoch) - plt.close() - - def save_model(self): - - def save(model, model_name): - model_path = "saved_model/%s.json" % model_name - weights_path = "saved_model/%s_weights.hdf5" % model_name - options = {"file_arch": model_path, - "file_weight": weights_path} - json_string = model.to_json() - open(options['file_arch'], 'w').write(json_string) - model.save_weights(options['file_weight']) - - save(self.generator, "mnist_sgan_generator") - save(self.discriminator, "mnist_sgan_discriminator") - save(self.combined, "mnist_sgan_adversarial") - - -if __name__ == '__main__': - sgan = SGAN() - sgan.train(epochs=20000, batch_size=32, sample_interval=50) -import scipy -from glob import glob -import numpy as np -import matplotlib.pyplot as plt - - -class DataLoader(): - def __init__(self, dataset_name, img_res=(128, 128)): - self.dataset_name = dataset_name - self.img_res = img_res - - def load_data(self, batch_size=1, is_testing=False): - data_type = "train" if not is_testing else "test" - - path = glob('./datasets/%s/*' % (self.dataset_name)) - - batch_images = np.random.choice(path, size=batch_size) - - imgs_hr = [] - imgs_lr = [] - for img_path in batch_images: - img = self.imread(img_path) - - h, w = self.img_res - low_h, low_w = int(h / 4), int(w / 4) - - img_hr = scipy.misc.imresize(img, self.img_res) - img_lr = scipy.misc.imresize(img, (low_h, low_w)) - - # If training => do random flip - if not is_testing and np.random.random() < 0.5: - img_hr = np.fliplr(img_hr) - img_lr = np.fliplr(img_lr) - - imgs_hr.append(img_hr) - imgs_lr.append(img_lr) - - imgs_hr = np.array(imgs_hr) / 127.5 - 1. - imgs_lr = np.array(imgs_lr) / 127.5 - 1. - - return imgs_hr, imgs_lr - - def imread(self, path): - return scipy.misc.imread(path, mode='RGB').astype(np.float) -""" -Super-resolution of CelebA using Generative Adversarial Networks. - -The dataset can be downloaded from: https://www.dropbox.com/sh/8oqt9vytwxb3s4r/AADIKlz8PR9zr6Y20qbkunrba/Img/img_align_celeba.zip?dl=0 - -Instrustion on running the script: -1. Download the dataset from the provided link -2. Save the folder 'img_align_celeba' to 'datasets/' -4. Run the sript using command 'python srgan.py' -""" - -from __future__ import print_function, division -import scipy - -from keras.datasets import mnist -from keras_contrib.layers.normalization.instancenormalization import InstanceNormalization -from keras.layers import Input, Dense, Reshape, Flatten, Dropout, Concatenate -from keras.layers import BatchNormalization, Activation, ZeroPadding2D, Add -from keras.layers.advanced_activations import PReLU, LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.applications import VGG19 -from keras.models import Sequential, Model -from keras.optimizers import Adam -import datetime -import matplotlib.pyplot as plt -import sys -from data_loader import DataLoader -import numpy as np -import os - -import keras.backend as K - - -class SRGAN(): - def __init__(self): - # Input shape - self.channels = 3 - self.lr_height = 64 # Low resolution height - self.lr_width = 64 # Low resolution width - self.lr_shape = (self.lr_height, self.lr_width, self.channels) - self.hr_height = self.lr_height*4 # High resolution height - self.hr_width = self.lr_width*4 # High resolution width - self.hr_shape = (self.hr_height, self.hr_width, self.channels) - - # Number of residual blocks in the generator - self.n_residual_blocks = 16 - - optimizer = Adam(0.0002, 0.5) - - # We use a pre-trained VGG19 model to extract image features from the high resolution - # and the generated high resolution images and minimize the mse between them - self.vgg = self.build_vgg() - self.vgg.trainable = False - self.vgg.compile(loss='mse', - optimizer=optimizer, - metrics=['accuracy']) - - # Configure data loader - self.dataset_name = 'img_align_celeba' - self.data_loader = DataLoader(dataset_name=self.dataset_name, - img_res=(self.hr_height, self.hr_width)) - - # Calculate output shape of D (PatchGAN) - patch = int(self.hr_height / 2**4) - self.disc_patch = (patch, patch, 1) - - # Number of filters in the first layer of G and D - self.gf = 64 - self.df = 64 - - # Build and compile the discriminator - self.discriminator = self.build_discriminator() - self.discriminator.compile(loss='mse', - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # High res. and low res. images - img_hr = Input(shape=self.hr_shape) - img_lr = Input(shape=self.lr_shape) - - # Generate high res. version from low res. - fake_hr = self.generator(img_lr) - - # Extract image features of the generated img - fake_features = self.vgg(fake_hr) - - # For the combined model we will only train the generator - self.discriminator.trainable = False - - # Discriminator determines validity of generated high res. images - validity = self.discriminator(fake_hr) - - self.combined = Model([img_lr, img_hr], [validity, fake_features]) - self.combined.compile(loss=['binary_crossentropy', 'mse'], - loss_weights=[1e-3, 1], - optimizer=optimizer) - - def build_vgg(self): - """ - Builds a pre-trained VGG19 model that outputs image features extracted at the - third block of the model - """ - vgg = VGG19(weights="imagenet") - # Set outputs to outputs of last conv. layer in block 3 - # See architecture at: https://github.com/keras-team/keras/blob/master/keras/applications/vgg19.py - vgg.outputs = [vgg.layers[9].output] - - img = Input(shape=self.hr_shape) - - # Extract image features - img_features = vgg(img) - - return Model(img, img_features) - - def build_generator(self): - - def residual_block(layer_input, filters): - """Residual block described in paper""" - d = Conv2D(filters, kernel_size=3, strides=1, - padding='same')(layer_input) - d = Activation('relu')(d) - d = BatchNormalization(momentum=0.8)(d) - d = Conv2D(filters, kernel_size=3, strides=1, padding='same')(d) - d = BatchNormalization(momentum=0.8)(d) - d = Add()([d, layer_input]) - return d - - def deconv2d(layer_input): - """Layers used during upsampling""" - u = UpSampling2D(size=2)(layer_input) - u = Conv2D(256, kernel_size=3, strides=1, padding='same')(u) - u = Activation('relu')(u) - return u - - # Low resolution image input - img_lr = Input(shape=self.lr_shape) - - # Pre-residual block - c1 = Conv2D(64, kernel_size=9, strides=1, padding='same')(img_lr) - c1 = Activation('relu')(c1) - - # Propogate through residual blocks - r = residual_block(c1, self.gf) - for _ in range(self.n_residual_blocks - 1): - r = residual_block(r, self.gf) - - # Post-residual block - c2 = Conv2D(64, kernel_size=3, strides=1, padding='same')(r) - c2 = BatchNormalization(momentum=0.8)(c2) - c2 = Add()([c2, c1]) - - # Upsampling - u1 = deconv2d(c2) - u2 = deconv2d(u1) - - # Generate high resolution output - gen_hr = Conv2D(self.channels, kernel_size=9, strides=1, - padding='same', activation='tanh')(u2) - - return Model(img_lr, gen_hr) - - def build_discriminator(self): - - def d_block(layer_input, filters, strides=1, bn=True): - """Discriminator layer""" - d = Conv2D(filters, kernel_size=3, strides=strides, - padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - if bn: - d = BatchNormalization(momentum=0.8)(d) - return d - - # Input img - d0 = Input(shape=self.hr_shape) - - d1 = d_block(d0, self.df, bn=False) - d2 = d_block(d1, self.df, strides=2) - d3 = d_block(d2, self.df*2) - d4 = d_block(d3, self.df*2, strides=2) - d5 = d_block(d4, self.df*4) - d6 = d_block(d5, self.df*4, strides=2) - d7 = d_block(d6, self.df*8) - d8 = d_block(d7, self.df*8, strides=2) - - d9 = Dense(self.df*16)(d8) - d10 = LeakyReLU(alpha=0.2)(d9) - validity = Dense(1, activation='sigmoid')(d10) - - return Model(d0, validity) - - def train(self, epochs, batch_size=1, sample_interval=50): - - start_time = datetime.datetime.now() - - for epoch in range(epochs): - - # ---------------------- - # Train Discriminator - # ---------------------- - - # Sample images and their conditioning counterparts - imgs_hr, imgs_lr = self.data_loader.load_data(batch_size) - - # From low res. image generate high res. version - fake_hr = self.generator.predict(imgs_lr) - - valid = np.ones((batch_size,) + self.disc_patch) - fake = np.zeros((batch_size,) + self.disc_patch) - - # Train the discriminators (original images = real / generated = Fake) - d_loss_real = self.discriminator.train_on_batch(imgs_hr, valid) - d_loss_fake = self.discriminator.train_on_batch(fake_hr, fake) - d_loss = 0.5 * np.add(d_loss_real, d_loss_fake) - - # ------------------ - # Train Generator - # ------------------ - - # Sample images and their conditioning counterparts - imgs_hr, imgs_lr = self.data_loader.load_data(batch_size) - - # The generators want the discriminators to label the generated images as real - valid = np.ones((batch_size,) + self.disc_patch) - - # Extract ground truth image features using pre-trained VGG19 model - image_features = self.vgg.predict(imgs_hr) - - # Train the generators - g_loss = self.combined.train_on_batch( - [imgs_lr, imgs_hr], [valid, image_features]) - - elapsed_time = datetime.datetime.now() - start_time - # Plot the progress - print("%d time: %s" % (epoch, elapsed_time)) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - os.makedirs('images/%s' % self.dataset_name, exist_ok=True) - r, c = 2, 2 - - imgs_hr, imgs_lr = self.data_loader.load_data( - batch_size=2, is_testing=True) - fake_hr = self.generator.predict(imgs_lr) - - # Rescale images 0 - 1 - imgs_lr = 0.5 * imgs_lr + 0.5 - fake_hr = 0.5 * fake_hr + 0.5 - imgs_hr = 0.5 * imgs_hr + 0.5 - - # Save generated images and the high resolution originals - titles = ['Generated', 'Original'] - fig, axs = plt.subplots(r, c) - cnt = 0 - for row in range(r): - for col, image in enumerate([fake_hr, imgs_hr]): - axs[row, col].imshow(image[row]) - axs[row, col].set_title(titles[col]) - axs[row, col].axis('off') - cnt += 1 - fig.savefig("images/%s/%d.png" % (self.dataset_name, epoch)) - plt.close() - - # Save low resolution images for comparison - for i in range(r): - fig = plt.figure() - plt.imshow(imgs_lr[i]) - fig.savefig('images/%s/%d_lowres%d.png' % - (self.dataset_name, epoch, i)) - plt.close() - - -if __name__ == '__main__': - gan = SRGAN() - gan.train(epochs=30000, batch_size=1, sample_interval=50) -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers import Input, Dense, Reshape, Flatten, Dropout -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import RMSprop - -import keras.backend as K - -import matplotlib.pyplot as plt - -import sys - -import numpy as np - - -class WGAN(): - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.latent_dim = 100 - - # Following parameter and optimizer set as recommended in paper - self.n_critic = 5 - self.clip_value = 0.01 - optimizer = RMSprop(lr=0.00005) - - # Build and compile the critic - self.critic = self.build_critic() - self.critic.compile(loss=self.wasserstein_loss, - optimizer=optimizer, - metrics=['accuracy']) - - # Build the generator - self.generator = self.build_generator() - - # The generator takes noise as input and generated imgs - z = Input(shape=(self.latent_dim,)) - img = self.generator(z) - - # For the combined model we will only train the generator - self.critic.trainable = False - - # The critic takes generated images as input and determines validity - valid = self.critic(img) - - # The combined model (stacked generator and critic) - self.combined = Model(z, valid) - self.combined.compile(loss=self.wasserstein_loss, - optimizer=optimizer, - metrics=['accuracy']) - - def wasserstein_loss(self, y_true, y_pred): - return K.mean(y_true * y_pred) - - def build_generator(self): - - model = Sequential() - - model.add(Dense(128 * 7 * 7, activation="relu", - input_dim=self.latent_dim)) - model.add(Reshape((7, 7, 128))) - model.add(UpSampling2D()) - model.add(Conv2D(128, kernel_size=4, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(Activation("relu")) - model.add(UpSampling2D()) - model.add(Conv2D(64, kernel_size=4, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(Activation("relu")) - model.add(Conv2D(self.channels, kernel_size=4, padding="same")) - model.add(Activation("tanh")) - - model.summary() - - noise = Input(shape=(self.latent_dim,)) - img = model(noise) - - return Model(noise, img) - - def build_critic(self): - - model = Sequential() - - model.add(Conv2D(16, kernel_size=3, strides=2, - input_shape=self.img_shape, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(32, kernel_size=3, strides=2, padding="same")) - model.add(ZeroPadding2D(padding=((0, 1), (0, 1)))) - model.add(BatchNormalization(momentum=0.8)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(64, kernel_size=3, strides=2, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(128, kernel_size=3, strides=1, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Flatten()) - model.add(Dense(1)) - - model.summary() - - img = Input(shape=self.img_shape) - validity = model(img) - - return Model(img, validity) - - def train(self, epochs, batch_size=128, sample_interval=50): - - # Load the dataset - (X_train, _), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - - # Adversarial ground truths - valid = -np.ones((batch_size, 1)) - fake = np.ones((batch_size, 1)) - - for epoch in range(epochs): - - for _ in range(self.n_critic): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - - # Sample noise as generator input - noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) - - # Generate a batch of new images - gen_imgs = self.generator.predict(noise) - - # Train the critic - d_loss_real = self.critic.train_on_batch(imgs, valid) - d_loss_fake = self.critic.train_on_batch(gen_imgs, fake) - d_loss = 0.5 * np.add(d_loss_fake, d_loss_real) - - # Clip critic weights - for l in self.critic.layers: - weights = l.get_weights() - weights = [np.clip(w, -self.clip_value, - self.clip_value) for w in weights] - l.set_weights(weights) - - # --------------------- - # Train Generator - # --------------------- - - g_loss = self.combined.train_on_batch(noise, valid) - - # Plot the progress - print("%d [D loss: %f] [G loss: %f]" % - (epoch, 1 - d_loss[0], 1 - g_loss[0])) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 5, 5 - noise = np.random.normal(0, 1, (r * c, self.latent_dim)) - gen_imgs = self.generator.predict(noise) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/mnist_%d.png" % epoch) - plt.close() - - -if __name__ == '__main__': - wgan = WGAN() - wgan.train(epochs=4000, batch_size=32, sample_interval=50) - -# Large amount of credit goes to: -# https://github.com/keras-team/keras-contrib/blob/master/examples/improved_wgan.py -# which I've used as a reference for this implementation - -from __future__ import print_function, division - -from keras.datasets import mnist -from keras.layers.merge import _Merge -from keras.layers import Input, Dense, Reshape, Flatten, Dropout -from keras.layers import BatchNormalization, Activation, ZeroPadding2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.convolutional import UpSampling2D, Conv2D -from keras.models import Sequential, Model -from keras.optimizers import RMSprop -from functools import partial - -import keras.backend as K - -import matplotlib.pyplot as plt - -import sys - -import numpy as np - - -class RandomWeightedAverage(_Merge): - """Provides a (random) weighted average between real and generated image samples""" - - def _merge_function(self, inputs): - alpha = K.random_uniform((32, 1, 1, 1)) - return (alpha * inputs[0]) + ((1 - alpha) * inputs[1]) - - -class WGANGP(): - def __init__(self): - self.img_rows = 28 - self.img_cols = 28 - self.channels = 1 - self.img_shape = (self.img_rows, self.img_cols, self.channels) - self.latent_dim = 100 - - # Following parameter and optimizer set as recommended in paper - self.n_critic = 5 - optimizer = RMSprop(lr=0.00005) - - # Build the generator and critic - self.generator = self.build_generator() - self.critic = self.build_critic() - - # ------------------------------- - # Construct Computational Graph - # for the Critic - # ------------------------------- - - # Freeze generator's layers while training critic - self.generator.trainable = False - - # Image input (real sample) - real_img = Input(shape=self.img_shape) - - # Noise input - z_disc = Input(shape=(self.latent_dim,)) - # Generate image based of noise (fake sample) - fake_img = self.generator(z_disc) - - # Discriminator determines validity of the real and fake images - fake = self.critic(fake_img) - valid = self.critic(real_img) - - # Construct weighted average between real and fake images - interpolated_img = RandomWeightedAverage()([real_img, fake_img]) - # Determine validity of weighted sample - validity_interpolated = self.critic(interpolated_img) - - # Use Python partial to provide loss function with additional - # 'averaged_samples' argument - partial_gp_loss = partial(self.gradient_penalty_loss, - averaged_samples=interpolated_img) - partial_gp_loss.__name__ = 'gradient_penalty' # Keras requires function names - - self.critic_model = Model(inputs=[real_img, z_disc], - outputs=[valid, fake, validity_interpolated]) - self.critic_model.compile(loss=[self.wasserstein_loss, - self.wasserstein_loss, - partial_gp_loss], - optimizer=optimizer, - loss_weights=[1, 1, 10]) - # ------------------------------- - # Construct Computational Graph - # for Generator - # ------------------------------- - - # For the generator we freeze the critic's layers - self.critic.trainable = False - self.generator.trainable = True - - # Sampled noise for input to generator - z_gen = Input(shape=(100,)) - # Generate images based of noise - img = self.generator(z_gen) - # Discriminator determines validity - valid = self.critic(img) - # Defines generator model - self.generator_model = Model(z_gen, valid) - self.generator_model.compile( - loss=self.wasserstein_loss, optimizer=optimizer) - - def gradient_penalty_loss(self, y_true, y_pred, averaged_samples): - """ - Computes gradient penalty based on prediction and weighted real / fake samples - """ - gradients = K.gradients(y_pred, averaged_samples)[0] - # compute the euclidean norm by squaring ... - gradients_sqr = K.square(gradients) - # ... summing over the rows ... - gradients_sqr_sum = K.sum(gradients_sqr, - axis=np.arange(1, len(gradients_sqr.shape))) - # ... and sqrt - gradient_l2_norm = K.sqrt(gradients_sqr_sum) - # compute lambda * (1 - ||grad||)^2 still for each single sample - gradient_penalty = K.square(1 - gradient_l2_norm) - # return the mean as loss over all the batch samples - return K.mean(gradient_penalty) - - def wasserstein_loss(self, y_true, y_pred): - return K.mean(y_true * y_pred) - - def build_generator(self): - - model = Sequential() - - model.add(Dense(128 * 7 * 7, activation="relu", - input_dim=self.latent_dim)) - model.add(Reshape((7, 7, 128))) - model.add(UpSampling2D()) - model.add(Conv2D(128, kernel_size=4, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(Activation("relu")) - model.add(UpSampling2D()) - model.add(Conv2D(64, kernel_size=4, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(Activation("relu")) - model.add(Conv2D(self.channels, kernel_size=4, padding="same")) - model.add(Activation("tanh")) - - model.summary() - - noise = Input(shape=(self.latent_dim,)) - img = model(noise) - - return Model(noise, img) - - def build_critic(self): - - model = Sequential() - - model.add(Conv2D(16, kernel_size=3, strides=2, - input_shape=self.img_shape, padding="same")) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(32, kernel_size=3, strides=2, padding="same")) - model.add(ZeroPadding2D(padding=((0, 1), (0, 1)))) - model.add(BatchNormalization(momentum=0.8)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(64, kernel_size=3, strides=2, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Conv2D(128, kernel_size=3, strides=1, padding="same")) - model.add(BatchNormalization(momentum=0.8)) - model.add(LeakyReLU(alpha=0.2)) - model.add(Dropout(0.25)) - model.add(Flatten()) - model.add(Dense(1)) - - model.summary() - - img = Input(shape=self.img_shape) - validity = model(img) - - return Model(img, validity) - - def train(self, epochs, batch_size, sample_interval=50): - - # Load the dataset - (X_train, _), (_, _) = mnist.load_data() - - # Rescale -1 to 1 - X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - X_train = np.expand_dims(X_train, axis=3) - - # Adversarial ground truths - valid = -np.ones((batch_size, 1)) - fake = np.ones((batch_size, 1)) - dummy = np.zeros((batch_size, 1)) # Dummy gt for gradient penalty - for epoch in range(epochs): - - for _ in range(self.n_critic): - - # --------------------- - # Train Discriminator - # --------------------- - - # Select a random batch of images - idx = np.random.randint(0, X_train.shape[0], batch_size) - imgs = X_train[idx] - # Sample generator input - noise = np.random.normal(0, 1, (batch_size, self.latent_dim)) - # Train the critic - d_loss = self.critic_model.train_on_batch([imgs, noise], - [valid, fake, dummy]) - - # --------------------- - # Train Generator - # --------------------- - - g_loss = self.generator_model.train_on_batch(noise, valid) - - # Plot the progress - print("%d [D loss: %f] [G loss: %f]" % (epoch, d_loss[0], g_loss)) - - # If at save interval => save generated image samples - if epoch % sample_interval == 0: - self.sample_images(epoch) - - def sample_images(self, epoch): - r, c = 5, 5 - noise = np.random.normal(0, 1, (r * c, self.latent_dim)) - gen_imgs = self.generator.predict(noise) - - # Rescale images 0 - 1 - gen_imgs = 0.5 * gen_imgs + 0.5 - - fig, axs = plt.subplots(r, c) - cnt = 0 - for i in range(r): - for j in range(c): - axs[i, j].imshow(gen_imgs[cnt, :, :, 0], cmap='gray') - axs[i, j].axis('off') - cnt += 1 - fig.savefig("images/mnist_%d.png" % epoch) - plt.close() - - -if __name__ == '__main__': - wgan = WGANGP() - wgan.train(epochs=30000, batch_size=32, sample_interval=100) -import json -from collections import defaultdict - -name_box_id = defaultdict(list) -id_name = dict() -f = open( - "mscoco2017/annotations/instances_train2017.json", - encoding='utf-8') -data = json.load(f) - -annotations = data['annotations'] -for ant in annotations: - id = ant['image_id'] - name = 'mscoco2017/train2017/%012d.jpg' % id - cat = ant['category_id'] - - if cat >= 1 and cat <= 11: - cat = cat - 1 - elif cat >= 13 and cat <= 25: - cat = cat - 2 - elif cat >= 27 and cat <= 28: - cat = cat - 3 - elif cat >= 31 and cat <= 44: - cat = cat - 5 - elif cat >= 46 and cat <= 65: - cat = cat - 6 - elif cat == 67: - cat = cat - 7 - elif cat == 70: - cat = cat - 9 - elif cat >= 72 and cat <= 82: - cat = cat - 10 - elif cat >= 84 and cat <= 90: - cat = cat - 11 - - name_box_id[name].append([ant['bbox'], cat]) - -f = open('train.txt', 'w') -for key in name_box_id.keys(): - f.write(key) - box_infos = name_box_id[key] - for info in box_infos: - x_min = int(info[0][0]) - y_min = int(info[0][1]) - x_max = x_min + int(info[0][2]) - y_max = y_min + int(info[0][3]) - - box_info = " %d,%d,%d,%d,%d" % ( - x_min, y_min, x_max, y_max, int(info[1])) - f.write(box_info) - f.write('\n') -f.close() -#! /usr/bin/env python -""" -Reads Darknet config and weights and creates Keras model with TF backend. - -""" - -import argparse -import configparser -import io -import os -from collections import defaultdict - -import numpy as np -from keras import backend as K -from keras.layers import (Conv2D, Input, ZeroPadding2D, Add, - UpSampling2D, MaxPooling2D, Concatenate) -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.normalization import BatchNormalization -from keras.models import Model -from keras.regularizers import l2 -from keras.utils.vis_utils import plot_model as plot - - -parser = argparse.ArgumentParser(description='Darknet To Keras Converter.') -parser.add_argument('config_path', help='Path to Darknet cfg file.') -parser.add_argument('weights_path', help='Path to Darknet weights file.') -parser.add_argument('output_path', help='Path to output Keras model file.') -parser.add_argument( - '-p', - '--plot_model', - help='Plot generated Keras model and save as image.', - action='store_true') -parser.add_argument( - '-w', - '--weights_only', - help='Save as Keras weights file instead of model file.', - action='store_true') - - -def unique_config_sections(config_file): - """Convert all config sections to have unique names. - - Adds unique suffixes to config sections for compability with configparser. - """ - section_counters = defaultdict(int) - output_stream = io.StringIO() - with open(config_file) as fin: - for line in fin: - if line.startswith('['): - section = line.strip().strip('[]') - _section = section + '_' + str(section_counters[section]) - section_counters[section] += 1 - line = line.replace(section, _section) - output_stream.write(line) - output_stream.seek(0) - return output_stream - -# %% - - -def _main(args): - config_path = os.path.expanduser(args.config_path) - weights_path = os.path.expanduser(args.weights_path) - assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( - config_path) - assert weights_path.endswith( - '.weights'), '{} is not a .weights file'.format(weights_path) - - output_path = os.path.expanduser(args.output_path) - assert output_path.endswith( - '.h5'), 'output path {} is not a .h5 file'.format(output_path) - output_root = os.path.splitext(output_path)[0] - - # Load weights and config. - print('Loading weights.') - weights_file = open(weights_path, 'rb') - major, minor, revision = np.ndarray( - shape=(3, ), dtype='int32', buffer=weights_file.read(12)) - if (major*10+minor) >= 2 and major < 1000 and minor < 1000: - seen = np.ndarray(shape=(1,), dtype='int64', - buffer=weights_file.read(8)) - else: - seen = np.ndarray(shape=(1,), dtype='int32', - buffer=weights_file.read(4)) - print('Weights Header: ', major, minor, revision, seen) - - print('Parsing Darknet config.') - unique_config_file = unique_config_sections(config_path) - cfg_parser = configparser.ConfigParser() - cfg_parser.read_file(unique_config_file) - - print('Creating Keras model.') - input_layer = Input(shape=(None, None, 3)) - prev_layer = input_layer - all_layers = [] - - weight_decay = float(cfg_parser['net_0']['decay'] - ) if 'net_0' in cfg_parser.sections() else 5e-4 - count = 0 - out_index = [] - for section in cfg_parser.sections(): - print('Parsing section {}'.format(section)) - if section.startswith('convolutional'): - filters = int(cfg_parser[section]['filters']) - size = int(cfg_parser[section]['size']) - stride = int(cfg_parser[section]['stride']) - pad = int(cfg_parser[section]['pad']) - activation = cfg_parser[section]['activation'] - batch_normalize = 'batch_normalize' in cfg_parser[section] - - padding = 'same' if pad == 1 and stride == 1 else 'valid' - - # Setting weights. - # Darknet serializes convolutional weights as: - # [bias/beta, [gamma, mean, variance], conv_weights] - prev_layer_shape = K.int_shape(prev_layer) - - weights_shape = (size, size, prev_layer_shape[-1], filters) - darknet_w_shape = (filters, weights_shape[2], size, size) - weights_size = np.product(weights_shape) - - print('conv2d', 'bn' - if batch_normalize else ' ', activation, weights_shape) - - conv_bias = np.ndarray( - shape=(filters, ), - dtype='float32', - buffer=weights_file.read(filters * 4)) - count += filters - - if batch_normalize: - bn_weights = np.ndarray( - shape=(3, filters), - dtype='float32', - buffer=weights_file.read(filters * 12)) - count += 3 * filters - - bn_weight_list = [ - bn_weights[0], # scale gamma - conv_bias, # shift beta - bn_weights[1], # running mean - bn_weights[2] # running var - ] - - conv_weights = np.ndarray( - shape=darknet_w_shape, - dtype='float32', - buffer=weights_file.read(weights_size * 4)) - count += weights_size - - # DarkNet conv_weights are serialized Caffe-style: - # (out_dim, in_dim, height, width) - # We would like to set these to Tensorflow order: - # (height, width, in_dim, out_dim) - conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) - conv_weights = [conv_weights] if batch_normalize else [ - conv_weights, conv_bias - ] - - # Handle activation. - act_fn = None - if activation == 'leaky': - pass # Add advanced activation later. - elif activation != 'linear': - raise ValueError( - 'Unknown activation function `{}` in section {}'.format( - activation, section)) - - # Create Conv2D layer - if stride > 1: - # Darknet uses left and top padding instead of 'same' mode - prev_layer = ZeroPadding2D(((1, 0), (1, 0)))(prev_layer) - conv_layer = (Conv2D( - filters, (size, size), - strides=(stride, stride), - kernel_regularizer=l2(weight_decay), - use_bias=not batch_normalize, - weights=conv_weights, - activation=act_fn, - padding=padding))(prev_layer) - - if batch_normalize: - conv_layer = (BatchNormalization( - weights=bn_weight_list))(conv_layer) - prev_layer = conv_layer - - if activation == 'linear': - all_layers.append(prev_layer) - elif activation == 'leaky': - act_layer = LeakyReLU(alpha=0.1)(prev_layer) - prev_layer = act_layer - all_layers.append(act_layer) - - elif section.startswith('route'): - ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] - layers = [all_layers[i] for i in ids] - if len(layers) > 1: - print('Concatenating route layers:', layers) - concatenate_layer = Concatenate()(layers) - all_layers.append(concatenate_layer) - prev_layer = concatenate_layer - else: - skip_layer = layers[0] # only one layer to route - all_layers.append(skip_layer) - prev_layer = skip_layer - - elif section.startswith('maxpool'): - size = int(cfg_parser[section]['size']) - stride = int(cfg_parser[section]['stride']) - all_layers.append( - MaxPooling2D( - pool_size=(size, size), - strides=(stride, stride), - padding='same')(prev_layer)) - prev_layer = all_layers[-1] - - elif section.startswith('shortcut'): - index = int(cfg_parser[section]['from']) - activation = cfg_parser[section]['activation'] - assert activation == 'linear', 'Only linear activation supported.' - all_layers.append(Add()([all_layers[index], prev_layer])) - prev_layer = all_layers[-1] - - elif section.startswith('upsample'): - stride = int(cfg_parser[section]['stride']) - assert stride == 2, 'Only stride=2 supported.' - all_layers.append(UpSampling2D(stride)(prev_layer)) - prev_layer = all_layers[-1] - - elif section.startswith('yolo'): - out_index.append(len(all_layers)-1) - all_layers.append(None) - prev_layer = all_layers[-1] - - elif section.startswith('net'): - pass - - else: - raise ValueError( - 'Unsupported section header type: {}'.format(section)) - - # Create and save model. - if len(out_index) == 0: - out_index.append(len(all_layers)-1) - model = Model(inputs=input_layer, outputs=[ - all_layers[i] for i in out_index]) - print(model.summary()) - if args.weights_only: - model.save_weights('{}'.format(output_path)) - print('Saved Keras weights to {}'.format(output_path)) - else: - model.save('{}'.format(output_path)) - print('Saved Keras model to {}'.format(output_path)) - - # Check to see if all weights have been read. - remaining_weights = len(weights_file.read()) / 4 - weights_file.close() - print('Read {} of {} from Darknet weights.'.format(count, count + - remaining_weights)) - if remaining_weights > 0: - print('Warning: {} unused weights'.format(remaining_weights)) - - if args.plot_model: - plot(model, to_file='{}.png'.format(output_root), show_shapes=True) - print('Saved model plot to {}.png'.format(output_root)) - - -if __name__ == '__main__': - _main(parser.parse_args()) -import numpy as np - - -class YOLO_Kmeans: - - def __init__(self, cluster_number, filename): - self.cluster_number = cluster_number - self.filename = "2012_train.txt" - - def iou(self, boxes, clusters): # 1 box -> k clusters - n = boxes.shape[0] - k = self.cluster_number - - box_area = boxes[:, 0] * boxes[:, 1] - box_area = box_area.repeat(k) - box_area = np.reshape(box_area, (n, k)) - - cluster_area = clusters[:, 0] * clusters[:, 1] - cluster_area = np.tile(cluster_area, [1, n]) - cluster_area = np.reshape(cluster_area, (n, k)) - - box_w_matrix = np.reshape(boxes[:, 0].repeat(k), (n, k)) - cluster_w_matrix = np.reshape(np.tile(clusters[:, 0], (1, n)), (n, k)) - min_w_matrix = np.minimum(cluster_w_matrix, box_w_matrix) - - box_h_matrix = np.reshape(boxes[:, 1].repeat(k), (n, k)) - cluster_h_matrix = np.reshape(np.tile(clusters[:, 1], (1, n)), (n, k)) - min_h_matrix = np.minimum(cluster_h_matrix, box_h_matrix) - inter_area = np.multiply(min_w_matrix, min_h_matrix) - - result = inter_area / (box_area + cluster_area - inter_area) - return result - - def avg_iou(self, boxes, clusters): - accuracy = np.mean([np.max(self.iou(boxes, clusters), axis=1)]) - return accuracy - - def kmeans(self, boxes, k, dist=np.median): - box_number = boxes.shape[0] - distances = np.empty((box_number, k)) - last_nearest = np.zeros((box_number,)) - np.random.seed() - clusters = boxes[np.random.choice( - box_number, k, replace=False)] # init k clusters - while True: - - distances = 1 - self.iou(boxes, clusters) - - current_nearest = np.argmin(distances, axis=1) - if (last_nearest == current_nearest).all(): - break # clusters won't change - for cluster in range(k): - clusters[cluster] = dist( # update clusters - boxes[current_nearest == cluster], axis=0) - - last_nearest = current_nearest - - return clusters - - def result2txt(self, data): - f = open("yolo_anchors.txt", 'w') - row = np.shape(data)[0] - for i in range(row): - if i == 0: - x_y = "%d,%d" % (data[i][0], data[i][1]) - else: - x_y = ", %d,%d" % (data[i][0], data[i][1]) - f.write(x_y) - f.close() - - def txt2boxes(self): - f = open(self.filename, 'r') - dataSet = [] - for line in f: - infos = line.split(" ") - length = len(infos) - for i in range(1, length): - width = int(infos[i].split(",")[2]) - \ - int(infos[i].split(",")[0]) - height = int(infos[i].split(",")[3]) - \ - int(infos[i].split(",")[1]) - dataSet.append([width, height]) - result = np.array(dataSet) - f.close() - return result - - def txt2clusters(self): - all_boxes = self.txt2boxes() - result = self.kmeans(all_boxes, k=self.cluster_number) - result = result[np.lexsort(result.T[0, None])] - self.result2txt(result) - print("K anchors:\n {}".format(result)) - print("Accuracy: {:.2f}%".format( - self.avg_iou(all_boxes, result) * 100)) - - -if __name__ == "__main__": - cluster_number = 9 - filename = "2012_train.txt" - kmeans = YOLO_Kmeans(cluster_number, filename) - kmeans.txt2clusters() -""" -Retrain the YOLO model for your own dataset. -""" - -import numpy as np -import keras.backend as K -from keras.layers import Input, Lambda -from keras.models import Model -from keras.optimizers import Adam -from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping - -from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss -from yolo3.utils import get_random_data - - -def _main(): - annotation_path = 'train.txt' - log_dir = 'logs/000/' - classes_path = 'model_data/voc_classes.txt' - anchors_path = 'model_data/yolo_anchors.txt' - class_names = get_classes(classes_path) - num_classes = len(class_names) - anchors = get_anchors(anchors_path) - - input_shape = (416, 416) # multiple of 32, hw - - is_tiny_version = len(anchors) == 6 # default setting - if is_tiny_version: - model = create_tiny_model(input_shape, anchors, num_classes, - freeze_body=2, weights_path='model_data/tiny_yolo_weights.h5') - else: - model = create_model(input_shape, anchors, num_classes, - freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freeze - - logging = TensorBoard(log_dir=log_dir) - checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', - monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) - reduce_lr = ReduceLROnPlateau( - monitor='val_loss', factor=0.1, patience=3, verbose=1) - early_stopping = EarlyStopping( - monitor='val_loss', min_delta=0, patience=10, verbose=1) - - val_split = 0.1 - with open(annotation_path) as f: - lines = f.readlines() - np.random.seed(10101) - np.random.shuffle(lines) - np.random.seed(None) - num_val = int(len(lines)*val_split) - num_train = len(lines) - num_val - - # Train with frozen layers first, to get a stable loss. - # Adjust num epochs to your dataset. This step is enough to obtain a not bad model. - if True: - model.compile(optimizer=Adam(lr=1e-3), loss={ - # use custom yolo_loss Lambda layer. - 'yolo_loss': lambda y_true, y_pred: y_pred}) - - batch_size = 32 - print('Train on {} samples, val on {} samples, with batch size {}.'.format( - num_train, num_val, batch_size)) - model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), - steps_per_epoch=max(1, num_train//batch_size), - validation_data=data_generator_wrapper( - lines[num_train:], batch_size, input_shape, anchors, num_classes), - validation_steps=max(1, num_val//batch_size), - epochs=50, - initial_epoch=0, - callbacks=[logging, checkpoint]) - model.save_weights(log_dir + 'trained_weights_stage_1.h5') - - # Unfreeze and continue training, to fine-tune. - # Train longer if the result is not good. - if True: - for i in range(len(model.layers)): - model.layers[i].trainable = True - # recompile to apply the change - model.compile(optimizer=Adam(lr=1e-4), - loss={'yolo_loss': lambda y_true, y_pred: y_pred}) - print('Unfreeze all of the layers.') - - batch_size = 32 # note that more GPU memory is required after unfreezing the body - print('Train on {} samples, val on {} samples, with batch size {}.'.format( - num_train, num_val, batch_size)) - model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), - steps_per_epoch=max(1, num_train//batch_size), - validation_data=data_generator_wrapper( - lines[num_train:], batch_size, input_shape, anchors, num_classes), - validation_steps=max(1, num_val//batch_size), - epochs=100, - initial_epoch=50, - callbacks=[logging, checkpoint, reduce_lr, early_stopping]) - model.save_weights(log_dir + 'trained_weights_final.h5') - - # Further training if needed. - - -def get_classes(classes_path): - '''loads the classes''' - with open(classes_path) as f: - class_names = f.readlines() - class_names = [c.strip() for c in class_names] - return class_names - - -def get_anchors(anchors_path): - '''loads the anchors from a file''' - with open(anchors_path) as f: - anchors = f.readline() - anchors = [float(x) for x in anchors.split(',')] - return np.array(anchors).reshape(-1, 2) - - -def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2, - weights_path='model_data/yolo_weights.h5'): - '''create the training model''' - K.clear_session() # get a new session - image_input = Input(shape=(None, None, 3)) - h, w = input_shape - num_anchors = len(anchors) - - y_true = [Input(shape=(h//{0: 32, 1: 16, 2: 8}[l], w//{0: 32, 1: 16, 2: 8}[l], - num_anchors//3, num_classes+5)) for l in range(3)] - - model_body = yolo_body(image_input, num_anchors//3, num_classes) - print('Create YOLOv3 model with {} anchors and {} classes.'.format( - num_anchors, num_classes)) - - if load_pretrained: - model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) - print('Load weights {}.'.format(weights_path)) - if freeze_body in [1, 2]: - # Freeze darknet53 body or freeze all but 3 output layers. - num = (185, len(model_body.layers)-3)[freeze_body-1] - for i in range(num): - model_body.layers[i].trainable = False - print('Freeze the first {} layers of total {} layers.'.format( - num, len(model_body.layers))) - - model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', - arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})( - [*model_body.output, *y_true]) - model = Model([model_body.input, *y_true], model_loss) - - return model - - -def create_tiny_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2, - weights_path='model_data/tiny_yolo_weights.h5'): - '''create the training model, for Tiny YOLOv3''' - K.clear_session() # get a new session - image_input = Input(shape=(None, None, 3)) - h, w = input_shape - num_anchors = len(anchors) - - y_true = [Input(shape=(h//{0: 32, 1: 16}[l], w//{0: 32, 1: 16}[l], - num_anchors//2, num_classes+5)) for l in range(2)] - - model_body = tiny_yolo_body(image_input, num_anchors//2, num_classes) - print('Create Tiny YOLOv3 model with {} anchors and {} classes.'.format( - num_anchors, num_classes)) - - if load_pretrained: - model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) - print('Load weights {}.'.format(weights_path)) - if freeze_body in [1, 2]: - # Freeze the darknet body or freeze all but 2 output layers. - num = (20, len(model_body.layers)-2)[freeze_body-1] - for i in range(num): - model_body.layers[i].trainable = False - print('Freeze the first {} layers of total {} layers.'.format( - num, len(model_body.layers))) - - model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', - arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.7})( - [*model_body.output, *y_true]) - model = Model([model_body.input, *y_true], model_loss) - - return model - - -def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes): - '''data generator for fit_generator''' - n = len(annotation_lines) - i = 0 - while True: - image_data = [] - box_data = [] - for b in range(batch_size): - if i == 0: - np.random.shuffle(annotation_lines) - image, box = get_random_data( - annotation_lines[i], input_shape, random=True) - image_data.append(image) - box_data.append(box) - i = (i+1) % n - image_data = np.array(image_data) - box_data = np.array(box_data) - y_true = preprocess_true_boxes( - box_data, input_shape, anchors, num_classes) - yield [image_data, *y_true], np.zeros(batch_size) - - -def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes): - n = len(annotation_lines) - if n == 0 or batch_size <= 0: - return None - return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes) - - -if __name__ == '__main__': - _main() -""" -Retrain the YOLO model for your own dataset. -""" -import os -import numpy as np -import keras.backend as K -from keras.layers import Input, Lambda -from keras.models import Model -from keras.optimizers import Adam -from keras.callbacks import TensorBoard, ModelCheckpoint, ReduceLROnPlateau, EarlyStopping - -from yolo3.model import preprocess_true_boxes, yolo_body, tiny_yolo_body, yolo_loss -from yolo3.utils import get_random_data - - -def _main(): - annotation_path = 'train.txt' - log_dir = 'logs/000/' - classes_path = 'model_data/coco_classes.txt' - anchors_path = 'model_data/yolo_anchors.txt' - class_names = get_classes(classes_path) - num_classes = len(class_names) - anchors = get_anchors(anchors_path) - - input_shape = (416, 416) # multiple of 32, hw - - model, bottleneck_model, last_layer_model = create_model(input_shape, anchors, num_classes, - freeze_body=2, weights_path='model_data/yolo_weights.h5') # make sure you know what you freeze - - logging = TensorBoard(log_dir=log_dir) - checkpoint = ModelCheckpoint(log_dir + 'ep{epoch:03d}-loss{loss:.3f}-val_loss{val_loss:.3f}.h5', - monitor='val_loss', save_weights_only=True, save_best_only=True, period=3) - reduce_lr = ReduceLROnPlateau( - monitor='val_loss', factor=0.1, patience=3, verbose=1) - early_stopping = EarlyStopping( - monitor='val_loss', min_delta=0, patience=10, verbose=1) - - val_split = 0.1 - with open(annotation_path) as f: - lines = f.readlines() - np.random.seed(10101) - np.random.shuffle(lines) - np.random.seed(None) - num_val = int(len(lines)*val_split) - num_train = len(lines) - num_val - - # Train with frozen layers first, to get a stable loss. - # Adjust num epochs to your dataset. This step is enough to obtain a not bad model. - if True: - # perform bottleneck training - if not os.path.isfile("bottlenecks.npz"): - print("calculating bottlenecks") - batch_size = 8 - bottlenecks = bottleneck_model.predict_generator(data_generator_wrapper(lines, batch_size, input_shape, anchors, num_classes, random=False, verbose=True), - steps=(len(lines)//batch_size)+1, max_queue_size=1) - np.savez("bottlenecks.npz", - bot0=bottlenecks[0], bot1=bottlenecks[1], bot2=bottlenecks[2]) - - # load bottleneck features from file - dict_bot = np.load("bottlenecks.npz") - bottlenecks_train = [dict_bot["bot0"][:num_train], - dict_bot["bot1"][:num_train], dict_bot["bot2"][:num_train]] - bottlenecks_val = [dict_bot["bot0"][num_train:], - dict_bot["bot1"][num_train:], dict_bot["bot2"][num_train:]] - - # train last layers with fixed bottleneck features - batch_size = 8 - print("Training last layers with bottleneck features") - print('with {} samples, val on {} samples and batch size {}.'.format( - num_train, num_val, batch_size)) - last_layer_model.compile(optimizer='adam', loss={ - 'yolo_loss': lambda y_true, y_pred: y_pred}) - last_layer_model.fit_generator(bottleneck_generator(lines[:num_train], batch_size, input_shape, anchors, num_classes, bottlenecks_train), - steps_per_epoch=max( - 1, num_train//batch_size), - validation_data=bottleneck_generator( - lines[num_train:], batch_size, input_shape, anchors, num_classes, bottlenecks_val), - validation_steps=max( - 1, num_val//batch_size), - epochs=30, - initial_epoch=0, max_queue_size=1) - model.save_weights(log_dir + 'trained_weights_stage_0.h5') - - # train last layers with random augmented data - model.compile(optimizer=Adam(lr=1e-3), loss={ - # use custom yolo_loss Lambda layer. - 'yolo_loss': lambda y_true, y_pred: y_pred}) - batch_size = 16 - print('Train on {} samples, val on {} samples, with batch size {}.'.format( - num_train, num_val, batch_size)) - model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), - steps_per_epoch=max(1, num_train//batch_size), - validation_data=data_generator_wrapper( - lines[num_train:], batch_size, input_shape, anchors, num_classes), - validation_steps=max(1, num_val//batch_size), - epochs=50, - initial_epoch=0, - callbacks=[logging, checkpoint]) - model.save_weights(log_dir + 'trained_weights_stage_1.h5') - - # Unfreeze and continue training, to fine-tune. - # Train longer if the result is not good. - if True: - for i in range(len(model.layers)): - model.layers[i].trainable = True - # recompile to apply the change - model.compile(optimizer=Adam(lr=1e-4), - loss={'yolo_loss': lambda y_true, y_pred: y_pred}) - print('Unfreeze all of the layers.') - - batch_size = 4 # note that more GPU memory is required after unfreezing the body - print('Train on {} samples, val on {} samples, with batch size {}.'.format( - num_train, num_val, batch_size)) - model.fit_generator(data_generator_wrapper(lines[:num_train], batch_size, input_shape, anchors, num_classes), - steps_per_epoch=max(1, num_train//batch_size), - validation_data=data_generator_wrapper( - lines[num_train:], batch_size, input_shape, anchors, num_classes), - validation_steps=max(1, num_val//batch_size), - epochs=100, - initial_epoch=50, - callbacks=[logging, checkpoint, reduce_lr, early_stopping]) - model.save_weights(log_dir + 'trained_weights_final.h5') - - # Further training if needed. - - -def get_classes(classes_path): - '''loads the classes''' - with open(classes_path) as f: - class_names = f.readlines() - class_names = [c.strip() for c in class_names] - return class_names - - -def get_anchors(anchors_path): - '''loads the anchors from a file''' - with open(anchors_path) as f: - anchors = f.readline() - anchors = [float(x) for x in anchors.split(',')] - return np.array(anchors).reshape(-1, 2) - - -def create_model(input_shape, anchors, num_classes, load_pretrained=True, freeze_body=2, - weights_path='model_data/yolo_weights.h5'): - '''create the training model''' - K.clear_session() # get a new session - image_input = Input(shape=(None, None, 3)) - h, w = input_shape - num_anchors = len(anchors) - - y_true = [Input(shape=(h//{0: 32, 1: 16, 2: 8}[l], w//{0: 32, 1: 16, 2: 8}[l], - num_anchors//3, num_classes+5)) for l in range(3)] - - model_body = yolo_body(image_input, num_anchors//3, num_classes) - print('Create YOLOv3 model with {} anchors and {} classes.'.format( - num_anchors, num_classes)) - - if load_pretrained: - model_body.load_weights(weights_path, by_name=True, skip_mismatch=True) - print('Load weights {}.'.format(weights_path)) - if freeze_body in [1, 2]: - # Freeze darknet53 body or freeze all but 3 output layers. - num = (185, len(model_body.layers)-3)[freeze_body-1] - for i in range(num): - model_body.layers[i].trainable = False - print('Freeze the first {} layers of total {} layers.'.format( - num, len(model_body.layers))) - - # get output of second last layers and create bottleneck model of it - out1 = model_body.layers[246].output - out2 = model_body.layers[247].output - out3 = model_body.layers[248].output - bottleneck_model = Model([model_body.input, *y_true], [out1, out2, out3]) - - # create last layer model of last layers from yolo model - in0 = Input(shape=bottleneck_model.output[0].shape[1:].as_list()) - in1 = Input(shape=bottleneck_model.output[1].shape[1:].as_list()) - in2 = Input(shape=bottleneck_model.output[2].shape[1:].as_list()) - last_out0 = model_body.layers[249](in0) - last_out1 = model_body.layers[250](in1) - last_out2 = model_body.layers[251](in2) - model_last = Model(inputs=[in0, in1, in2], outputs=[ - last_out0, last_out1, last_out2]) - model_loss_last = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', - arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})( - [*model_last.output, *y_true]) - last_layer_model = Model([in0, in1, in2, *y_true], model_loss_last) - - model_loss = Lambda(yolo_loss, output_shape=(1,), name='yolo_loss', - arguments={'anchors': anchors, 'num_classes': num_classes, 'ignore_thresh': 0.5})( - [*model_body.output, *y_true]) - model = Model([model_body.input, *y_true], model_loss) - - return model, bottleneck_model, last_layer_model - - -def data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, random=True, verbose=False): - '''data generator for fit_generator''' - n = len(annotation_lines) - i = 0 - while True: - image_data = [] - box_data = [] - for b in range(batch_size): - if i == 0 and random: - np.random.shuffle(annotation_lines) - image, box = get_random_data( - annotation_lines[i], input_shape, random=random) - image_data.append(image) - box_data.append(box) - i = (i+1) % n - image_data = np.array(image_data) - if verbose: - print("Progress: ", i, "/", n) - box_data = np.array(box_data) - y_true = preprocess_true_boxes( - box_data, input_shape, anchors, num_classes) - yield [image_data, *y_true], np.zeros(batch_size) - - -def data_generator_wrapper(annotation_lines, batch_size, input_shape, anchors, num_classes, random=True, verbose=False): - n = len(annotation_lines) - if n == 0 or batch_size <= 0: - return None - return data_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, random, verbose) - - -def bottleneck_generator(annotation_lines, batch_size, input_shape, anchors, num_classes, bottlenecks): - n = len(annotation_lines) - i = 0 - while True: - box_data = [] - b0 = np.zeros( - (batch_size, bottlenecks[0].shape[1], bottlenecks[0].shape[2], bottlenecks[0].shape[3])) - b1 = np.zeros( - (batch_size, bottlenecks[1].shape[1], bottlenecks[1].shape[2], bottlenecks[1].shape[3])) - b2 = np.zeros( - (batch_size, bottlenecks[2].shape[1], bottlenecks[2].shape[2], bottlenecks[2].shape[3])) - for b in range(batch_size): - _, box = get_random_data( - annotation_lines[i], input_shape, random=False, proc_img=False) - box_data.append(box) - b0[b] = bottlenecks[0][i] - b1[b] = bottlenecks[1][i] - b2[b] = bottlenecks[2][i] - i = (i+1) % n - box_data = np.array(box_data) - y_true = preprocess_true_boxes( - box_data, input_shape, anchors, num_classes) - yield [b0, b1, b2, *y_true], np.zeros(batch_size) - - -if __name__ == '__main__': - _main() -import xml.etree.ElementTree as ET -from os import getcwd - -sets = [('2007', 'train'), ('2007', 'val'), ('2007', 'test')] - -classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", - "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] - - -def convert_annotation(year, image_id, list_file): - in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml' % (year, image_id)) - tree = ET.parse(in_file) - root = tree.getroot() - - for obj in root.iter('object'): - difficult = obj.find('difficult').text - cls = obj.find('name').text - if cls not in classes or int(difficult) == 1: - continue - cls_id = classes.index(cls) - xmlbox = obj.find('bndbox') - b = (int(xmlbox.find('xmin').text), int(xmlbox.find('ymin').text), - int(xmlbox.find('xmax').text), int(xmlbox.find('ymax').text)) - list_file.write(" " + ",".join([str(a) - for a in b]) + ',' + str(cls_id)) - - -wd = getcwd() - -for year, image_set in sets: - image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt' % - (year, image_set)).read().strip().split() - list_file = open('%s_%s.txt' % (year, image_set), 'w') - for image_id in image_ids: - list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg' % - (wd, year, image_id)) - convert_annotation(year, image_id, list_file) - list_file.write('\n') - list_file.close() -# -*- coding: utf-8 -*- -""" -Class definition of YOLO_v3 style detection model on image and video -""" - -import colorsys -import os -from timeit import default_timer as timer - -import numpy as np -from keras import backend as K -from keras.models import load_model -from keras.layers import Input -from PIL import Image, ImageFont, ImageDraw - -from yolo3.model import yolo_eval, yolo_body, tiny_yolo_body -from yolo3.utils import letterbox_image -import os -from keras.utils import multi_gpu_model - - -class YOLO(object): - _defaults = { - "model_path": 'model_data/yolo.h5', - "anchors_path": 'model_data/yolo_anchors.txt', - "classes_path": 'model_data/coco_classes.txt', - "score": 0.3, - "iou": 0.45, - "model_image_size": (416, 416), - "gpu_num": 1, - } - - @classmethod - def get_defaults(cls, n): - if n in cls._defaults: - return cls._defaults[n] - else: - return "Unrecognized attribute name '" + n + "'" - - def __init__(self, **kwargs): - self.__dict__.update(self._defaults) # set up default values - self.__dict__.update(kwargs) # and update with user overrides - self.class_names = self._get_class() - self.anchors = self._get_anchors() - self.sess = K.get_session() - self.boxes, self.scores, self.classes = self.generate() - - def _get_class(self): - classes_path = os.path.expanduser(self.classes_path) - with open(classes_path) as f: - class_names = f.readlines() - class_names = [c.strip() for c in class_names] - return class_names - - def _get_anchors(self): - anchors_path = os.path.expanduser(self.anchors_path) - with open(anchors_path) as f: - anchors = f.readline() - anchors = [float(x) for x in anchors.split(',')] - return np.array(anchors).reshape(-1, 2) - - def generate(self): - model_path = os.path.expanduser(self.model_path) - assert model_path.endswith( - '.h5'), 'Keras model or weights must be a .h5 file.' - - # Load model, or construct model and load weights. - num_anchors = len(self.anchors) - num_classes = len(self.class_names) - is_tiny_version = num_anchors == 6 # default setting - try: - self.yolo_model = load_model(model_path, compile=False) - except: - self.yolo_model = tiny_yolo_body(Input(shape=(None, None, 3)), num_anchors//2, num_classes) \ - if is_tiny_version else yolo_body(Input(shape=(None, None, 3)), num_anchors//3, num_classes) - # make sure model, anchors and classes match - self.yolo_model.load_weights(self.model_path) - else: - assert self.yolo_model.layers[-1].output_shape[-1] == \ - num_anchors/len(self.yolo_model.output) * (num_classes + 5), \ - 'Mismatch between model and given anchor and class sizes' - - print('{} model, anchors, and classes loaded.'.format(model_path)) - - # Generate colors for drawing bounding boxes. - hsv_tuples = [(x / len(self.class_names), 1., 1.) - for x in range(len(self.class_names))] - self.colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) - self.colors = list( - map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), - self.colors)) - np.random.seed(10101) # Fixed seed for consistent colors across runs. - # Shuffle colors to decorrelate adjacent classes. - np.random.shuffle(self.colors) - np.random.seed(None) # Reset seed to default. - - # Generate output tensor targets for filtered bounding boxes. - self.input_image_shape = K.placeholder(shape=(2, )) - if self.gpu_num >= 2: - self.yolo_model = multi_gpu_model( - self.yolo_model, gpus=self.gpu_num) - boxes, scores, classes = yolo_eval(self.yolo_model.output, self.anchors, - len(self.class_names), self.input_image_shape, - score_threshold=self.score, iou_threshold=self.iou) - return boxes, scores, classes - - def detect_image(self, image): - start = timer() - - if self.model_image_size != (None, None): - assert self.model_image_size[0] % 32 == 0, 'Multiples of 32 required' - assert self.model_image_size[1] % 32 == 0, 'Multiples of 32 required' - boxed_image = letterbox_image( - image, tuple(reversed(self.model_image_size))) - else: - new_image_size = (image.width - (image.width % 32), - image.height - (image.height % 32)) - boxed_image = letterbox_image(image, new_image_size) - image_data = np.array(boxed_image, dtype='float32') - - print(image_data.shape) - image_data /= 255. - image_data = np.expand_dims(image_data, 0) # Add batch dimension. - - out_boxes, out_scores, out_classes = self.sess.run( - [self.boxes, self.scores, self.classes], - feed_dict={ - self.yolo_model.input: image_data, - self.input_image_shape: [image.size[1], image.size[0]], - K.learning_phase(): 0 - }) - - print('Found {} boxes for {}'.format(len(out_boxes), 'img')) - - font = ImageFont.truetype(font='font/FiraMono-Medium.otf', - size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) - thickness = (image.size[0] + image.size[1]) // 300 - - for i, c in reversed(list(enumerate(out_classes))): - predicted_class = self.class_names[c] - box = out_boxes[i] - score = out_scores[i] - - label = '{} {:.2f}'.format(predicted_class, score) - draw = ImageDraw.Draw(image) - label_size = draw.textsize(label, font) - - top, left, bottom, right = box - top = max(0, np.floor(top + 0.5).astype('int32')) - left = max(0, np.floor(left + 0.5).astype('int32')) - bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) - right = min(image.size[0], np.floor(right + 0.5).astype('int32')) - print(label, (left, top), (right, bottom)) - - if top - label_size[1] >= 0: - text_origin = np.array([left, top - label_size[1]]) - else: - text_origin = np.array([left, top + 1]) - - # My kingdom for a good redistributable image drawing library. - for i in range(thickness): - draw.rectangle( - [left + i, top + i, right - i, bottom - i], - outline=self.colors[c]) - draw.rectangle( - [tuple(text_origin), tuple(text_origin + label_size)], - fill=self.colors[c]) - draw.text(text_origin, label, fill=(0, 0, 0), font=font) - del draw - - end = timer() - print(end - start) - return image - - def close_session(self): - self.sess.close() - - -def detect_video(yolo, video_path, output_path=""): - import cv2 - vid = cv2.VideoCapture(video_path) - if not vid.isOpened(): - raise IOError("Couldn't open webcam or video") - video_FourCC = int(vid.get(cv2.CAP_PROP_FOURCC)) - video_fps = vid.get(cv2.CAP_PROP_FPS) - video_size = (int(vid.get(cv2.CAP_PROP_FRAME_WIDTH)), - int(vid.get(cv2.CAP_PROP_FRAME_HEIGHT))) - isOutput = True if output_path != "" else False - if isOutput: - print("!!! TYPE:", type(output_path), type( - video_FourCC), type(video_fps), type(video_size)) - out = cv2.VideoWriter(output_path, video_FourCC, video_fps, video_size) - accum_time = 0 - curr_fps = 0 - fps = "FPS: ??" - prev_time = timer() - while True: - return_value, frame = vid.read() - image = Image.fromarray(frame) - image = yolo.detect_image(image) - result = np.asarray(image) - curr_time = timer() - exec_time = curr_time - prev_time - prev_time = curr_time - accum_time = accum_time + exec_time - curr_fps = curr_fps + 1 - if accum_time > 1: - accum_time = accum_time - 1 - fps = "FPS: " + str(curr_fps) - curr_fps = 0 - cv2.putText(result, text=fps, org=(3, 15), fontFace=cv2.FONT_HERSHEY_SIMPLEX, - fontScale=0.50, color=(255, 0, 0), thickness=2) - cv2.namedWindow("result", cv2.WINDOW_NORMAL) - cv2.imshow("result", result) - if isOutput: - out.write(result) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - yolo.close_session() -import sys -import argparse -from yolo import YOLO, detect_video -from PIL import Image - - -def detect_img(yolo): - while True: - img = input('Input image filename:') - try: - image = Image.open(img) - except: - print('Open Error! Try again!') - continue - else: - r_image = yolo.detect_image(image) - r_image.show() - yolo.close_session() - - -FLAGS = None - -if __name__ == '__main__': - # class YOLO defines the default value, so suppress any default here - parser = argparse.ArgumentParser(argument_default=argparse.SUPPRESS) - ''' - Command line options - ''' - parser.add_argument( - '--model', type=str, - help='path to model weight file, default ' + - YOLO.get_defaults("model_path") - ) - - parser.add_argument( - '--anchors', type=str, - help='path to anchor definitions, default ' + - YOLO.get_defaults("anchors_path") - ) - - parser.add_argument( - '--classes', type=str, - help='path to class definitions, default ' + - YOLO.get_defaults("classes_path") - ) - - parser.add_argument( - '--gpu_num', type=int, - help='Number of GPU to use, default ' + - str(YOLO.get_defaults("gpu_num")) - ) - - parser.add_argument( - '--image', default=False, action="store_true", - help='Image detection mode, will ignore all positional arguments' - ) - ''' - Command line positional arguments -- for video detection mode - ''' - parser.add_argument( - "--input", nargs='?', type=str, required=False, default='./path2your_video', - help="Video input path" - ) - - parser.add_argument( - "--output", nargs='?', type=str, default="", - help="[Optional] Video output path" - ) - - FLAGS = parser.parse_args() - - if FLAGS.image: - """ - Image detection mode, disregard any remaining command line arguments - """ - print("Image detection mode") - if "input" in FLAGS: - print(" Ignoring remaining command line arguments: " + - FLAGS.input + "," + FLAGS.output) - detect_img(YOLO(**vars(FLAGS))) - elif "input" in FLAGS: - detect_video(YOLO(**vars(FLAGS)), FLAGS.input, FLAGS.output) - else: - print("Must specify at least video_input_path. See usage with --help.") -"""YOLO_v3 Model Defined in Keras.""" - -from functools import wraps - -import numpy as np -import tensorflow as tf -from keras import backend as K -from keras.layers import Conv2D, Add, ZeroPadding2D, UpSampling2D, Concatenate, MaxPooling2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.normalization import BatchNormalization -from keras.models import Model -from keras.regularizers import l2 - -from yolo3.utils import compose - - -@wraps(Conv2D) -def DarknetConv2D(*args, **kwargs): - """Wrapper to set Darknet parameters for Convolution2D.""" - darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} - darknet_conv_kwargs['padding'] = 'valid' if kwargs.get( - 'strides') == (2, 2) else 'same' - darknet_conv_kwargs.update(kwargs) - return Conv2D(*args, **darknet_conv_kwargs) - - -def DarknetConv2D_BN_Leaky(*args, **kwargs): - """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" - no_bias_kwargs = {'use_bias': False} - no_bias_kwargs.update(kwargs) - return compose( - DarknetConv2D(*args, **no_bias_kwargs), - BatchNormalization(), - LeakyReLU(alpha=0.1)) - - -def resblock_body(x, num_filters, num_blocks): - '''A series of resblocks starting with a downsampling Convolution2D''' - # Darknet uses left and top padding instead of 'same' mode - x = ZeroPadding2D(((1, 0), (1, 0)))(x) - x = DarknetConv2D_BN_Leaky(num_filters, (3, 3), strides=(2, 2))(x) - for i in range(num_blocks): - y = compose( - DarknetConv2D_BN_Leaky(num_filters//2, (1, 1)), - DarknetConv2D_BN_Leaky(num_filters, (3, 3)))(x) - x = Add()([x, y]) - return x - - -def darknet_body(x): - '''Darknent body having 52 Convolution2D layers''' - x = DarknetConv2D_BN_Leaky(32, (3, 3))(x) - x = resblock_body(x, 64, 1) - x = resblock_body(x, 128, 2) - x = resblock_body(x, 256, 8) - x = resblock_body(x, 512, 8) - x = resblock_body(x, 1024, 4) - return x - - -def make_last_layers(x, num_filters, out_filters): - '''6 Conv2D_BN_Leaky layers followed by a Conv2D_linear layer''' - x = compose( - DarknetConv2D_BN_Leaky(num_filters, (1, 1)), - DarknetConv2D_BN_Leaky(num_filters*2, (3, 3)), - DarknetConv2D_BN_Leaky(num_filters, (1, 1)), - DarknetConv2D_BN_Leaky(num_filters*2, (3, 3)), - DarknetConv2D_BN_Leaky(num_filters, (1, 1)))(x) - y = compose( - DarknetConv2D_BN_Leaky(num_filters*2, (3, 3)), - DarknetConv2D(out_filters, (1, 1)))(x) - return x, y - - -def yolo_body(inputs, num_anchors, num_classes): - """Create YOLO_V3 model CNN body in Keras.""" - darknet = Model(inputs, darknet_body(inputs)) - x, y1 = make_last_layers(darknet.output, 512, num_anchors*(num_classes+5)) - - x = compose( - DarknetConv2D_BN_Leaky(256, (1, 1)), - UpSampling2D(2))(x) - x = Concatenate()([x, darknet.layers[152].output]) - x, y2 = make_last_layers(x, 256, num_anchors*(num_classes+5)) - - x = compose( - DarknetConv2D_BN_Leaky(128, (1, 1)), - UpSampling2D(2))(x) - x = Concatenate()([x, darknet.layers[92].output]) - x, y3 = make_last_layers(x, 128, num_anchors*(num_classes+5)) - - return Model(inputs, [y1, y2, y3]) - - -def tiny_yolo_body(inputs, num_anchors, num_classes): - '''Create Tiny YOLO_v3 model CNN body in keras.''' - x1 = compose( - DarknetConv2D_BN_Leaky(16, (3, 3)), - MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), - DarknetConv2D_BN_Leaky(32, (3, 3)), - MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), - DarknetConv2D_BN_Leaky(64, (3, 3)), - MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), - DarknetConv2D_BN_Leaky(128, (3, 3)), - MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), - DarknetConv2D_BN_Leaky(256, (3, 3)))(inputs) - x2 = compose( - MaxPooling2D(pool_size=(2, 2), strides=(2, 2), padding='same'), - DarknetConv2D_BN_Leaky(512, (3, 3)), - MaxPooling2D(pool_size=(2, 2), strides=(1, 1), padding='same'), - DarknetConv2D_BN_Leaky(1024, (3, 3)), - DarknetConv2D_BN_Leaky(256, (1, 1)))(x1) - y1 = compose( - DarknetConv2D_BN_Leaky(512, (3, 3)), - DarknetConv2D(num_anchors*(num_classes+5), (1, 1)))(x2) - - x2 = compose( - DarknetConv2D_BN_Leaky(128, (1, 1)), - UpSampling2D(2))(x2) - y2 = compose( - Concatenate(), - DarknetConv2D_BN_Leaky(256, (3, 3)), - DarknetConv2D(num_anchors*(num_classes+5), (1, 1)))([x2, x1]) - - return Model(inputs, [y1, y2]) - - -def yolo_head(feats, anchors, num_classes, input_shape, calc_loss=False): - """Convert final layer features to bounding box parameters.""" - num_anchors = len(anchors) - # Reshape to batch, height, width, num_anchors, box_params. - anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) - - grid_shape = K.shape(feats)[1:3] # height, width - grid_y = K.tile(K.reshape(K.arange(0, stop=grid_shape[0]), [-1, 1, 1, 1]), - [1, grid_shape[1], 1, 1]) - grid_x = K.tile(K.reshape(K.arange(0, stop=grid_shape[1]), [1, -1, 1, 1]), - [grid_shape[0], 1, 1, 1]) - grid = K.concatenate([grid_x, grid_y]) - grid = K.cast(grid, K.dtype(feats)) - - feats = K.reshape( - feats, [-1, grid_shape[0], grid_shape[1], num_anchors, num_classes + 5]) - - # Adjust preditions to each spatial grid point and anchor size. - box_xy = (K.sigmoid(feats[..., :2]) + grid) / \ - K.cast(grid_shape[::-1], K.dtype(feats)) - box_wh = K.exp(feats[..., 2:4]) * anchors_tensor / \ - K.cast(input_shape[::-1], K.dtype(feats)) - box_confidence = K.sigmoid(feats[..., 4:5]) - box_class_probs = K.sigmoid(feats[..., 5:]) - - if calc_loss == True: - return grid, feats, box_xy, box_wh - return box_xy, box_wh, box_confidence, box_class_probs - - -def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): - '''Get corrected boxes''' - box_yx = box_xy[..., ::-1] - box_hw = box_wh[..., ::-1] - input_shape = K.cast(input_shape, K.dtype(box_yx)) - image_shape = K.cast(image_shape, K.dtype(box_yx)) - new_shape = K.round(image_shape * K.min(input_shape/image_shape)) - offset = (input_shape-new_shape)/2./input_shape - scale = input_shape/new_shape - box_yx = (box_yx - offset) * scale - box_hw *= scale - - box_mins = box_yx - (box_hw / 2.) - box_maxes = box_yx + (box_hw / 2.) - boxes = K.concatenate([ - box_mins[..., 0:1], # y_min - box_mins[..., 1:2], # x_min - box_maxes[..., 0:1], # y_max - box_maxes[..., 1:2] # x_max - ]) - - # Scale boxes back to original image shape. - boxes *= K.concatenate([image_shape, image_shape]) - return boxes - - -def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape): - '''Process Conv layer output''' - box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, - anchors, num_classes, input_shape) - boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) - boxes = K.reshape(boxes, [-1, 4]) - box_scores = box_confidence * box_class_probs - box_scores = K.reshape(box_scores, [-1, num_classes]) - return boxes, box_scores - - -def yolo_eval(yolo_outputs, - anchors, - num_classes, - image_shape, - max_boxes=20, - score_threshold=.6, - iou_threshold=.5): - """Evaluate YOLO model on given input and return filtered boxes.""" - num_layers = len(yolo_outputs) - anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2]] if num_layers == 3 else [ - [3, 4, 5], [1, 2, 3]] # default setting - input_shape = K.shape(yolo_outputs[0])[1:3] * 32 - boxes = [] - box_scores = [] - for l in range(num_layers): - _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[l], - anchors[anchor_mask[l]], num_classes, input_shape, image_shape) - boxes.append(_boxes) - box_scores.append(_box_scores) - boxes = K.concatenate(boxes, axis=0) - box_scores = K.concatenate(box_scores, axis=0) - - mask = box_scores >= score_threshold - max_boxes_tensor = K.constant(max_boxes, dtype='int32') - boxes_ = [] - scores_ = [] - classes_ = [] - for c in range(num_classes): - # TODO: use keras backend instead of tf. - class_boxes = tf.boolean_mask(boxes, mask[:, c]) - class_box_scores = tf.boolean_mask(box_scores[:, c], mask[:, c]) - nms_index = tf.image.non_max_suppression( - class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) - class_boxes = K.gather(class_boxes, nms_index) - class_box_scores = K.gather(class_box_scores, nms_index) - classes = K.ones_like(class_box_scores, 'int32') * c - boxes_.append(class_boxes) - scores_.append(class_box_scores) - classes_.append(classes) - boxes_ = K.concatenate(boxes_, axis=0) - scores_ = K.concatenate(scores_, axis=0) - classes_ = K.concatenate(classes_, axis=0) - - return boxes_, scores_, classes_ - - -def preprocess_true_boxes(true_boxes, input_shape, anchors, num_classes): - '''Preprocess true boxes to training input format - - Parameters - ---------- - true_boxes: array, shape=(m, T, 5) - Absolute x_min, y_min, x_max, y_max, class_id relative to input_shape. - input_shape: array-like, hw, multiples of 32 - anchors: array, shape=(N, 2), wh - num_classes: integer - - Returns - ------- - y_true: list of array, shape like yolo_outputs, xywh are reletive value - - ''' - assert (true_boxes[..., 4] < num_classes).all( - ), 'class id must be less than num_classes' - num_layers = len(anchors)//3 # default setting - anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] - ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] - - true_boxes = np.array(true_boxes, dtype='float32') - input_shape = np.array(input_shape, dtype='int32') - boxes_xy = (true_boxes[..., 0:2] + true_boxes[..., 2:4]) // 2 - boxes_wh = true_boxes[..., 2:4] - true_boxes[..., 0:2] - true_boxes[..., 0:2] = boxes_xy/input_shape[::-1] - true_boxes[..., 2:4] = boxes_wh/input_shape[::-1] - - m = true_boxes.shape[0] - grid_shapes = [input_shape//{0: 32, 1: 16, 2: 8}[l] - for l in range(num_layers)] - y_true = [np.zeros((m, grid_shapes[l][0], grid_shapes[l][1], len(anchor_mask[l]), 5+num_classes), - dtype='float32') for l in range(num_layers)] - - # Expand dim to apply broadcasting. - anchors = np.expand_dims(anchors, 0) - anchor_maxes = anchors / 2. - anchor_mins = -anchor_maxes - valid_mask = boxes_wh[..., 0] > 0 - - for b in range(m): - # Discard zero rows. - wh = boxes_wh[b, valid_mask[b]] - if len(wh) == 0: - continue - # Expand dim to apply broadcasting. - wh = np.expand_dims(wh, -2) - box_maxes = wh / 2. - box_mins = -box_maxes - - intersect_mins = np.maximum(box_mins, anchor_mins) - intersect_maxes = np.minimum(box_maxes, anchor_maxes) - intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) - intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] - box_area = wh[..., 0] * wh[..., 1] - anchor_area = anchors[..., 0] * anchors[..., 1] - iou = intersect_area / (box_area + anchor_area - intersect_area) - - # Find best anchor for each true box - best_anchor = np.argmax(iou, axis=-1) - - for t, n in enumerate(best_anchor): - for l in range(num_layers): - if n in anchor_mask[l]: - i = np.floor(true_boxes[b, t, 0] * - grid_shapes[l][1]).astype('int32') - j = np.floor(true_boxes[b, t, 1] * - grid_shapes[l][0]).astype('int32') - k = anchor_mask[l].index(n) - c = true_boxes[b, t, 4].astype('int32') - y_true[l][b, j, i, k, 0:4] = true_boxes[b, t, 0:4] - y_true[l][b, j, i, k, 4] = 1 - y_true[l][b, j, i, k, 5+c] = 1 - - return y_true - - -def box_iou(b1, b2): - '''Return iou tensor - - Parameters - ---------- - b1: tensor, shape=(i1,...,iN, 4), xywh - b2: tensor, shape=(j, 4), xywh - - Returns - ------- - iou: tensor, shape=(i1,...,iN, j) - - ''' - - # Expand dim to apply broadcasting. - b1 = K.expand_dims(b1, -2) - b1_xy = b1[..., :2] - b1_wh = b1[..., 2:4] - b1_wh_half = b1_wh/2. - b1_mins = b1_xy - b1_wh_half - b1_maxes = b1_xy + b1_wh_half - - # Expand dim to apply broadcasting. - b2 = K.expand_dims(b2, 0) - b2_xy = b2[..., :2] - b2_wh = b2[..., 2:4] - b2_wh_half = b2_wh/2. - b2_mins = b2_xy - b2_wh_half - b2_maxes = b2_xy + b2_wh_half - - intersect_mins = K.maximum(b1_mins, b2_mins) - intersect_maxes = K.minimum(b1_maxes, b2_maxes) - intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) - intersect_area = intersect_wh[..., 0] * intersect_wh[..., 1] - b1_area = b1_wh[..., 0] * b1_wh[..., 1] - b2_area = b2_wh[..., 0] * b2_wh[..., 1] - iou = intersect_area / (b1_area + b2_area - intersect_area) - - return iou - - -def yolo_loss(args, anchors, num_classes, ignore_thresh=.5, print_loss=False): - '''Return yolo_loss tensor - - Parameters - ---------- - yolo_outputs: list of tensor, the output of yolo_body or tiny_yolo_body - y_true: list of array, the output of preprocess_true_boxes - anchors: array, shape=(N, 2), wh - num_classes: integer - ignore_thresh: float, the iou threshold whether to ignore object confidence loss - - Returns - ------- - loss: tensor, shape=(1,) - - ''' - num_layers = len(anchors)//3 # default setting - yolo_outputs = args[:num_layers] - y_true = args[num_layers:] - anchor_mask = [[6, 7, 8], [3, 4, 5], [0, 1, 2] - ] if num_layers == 3 else [[3, 4, 5], [1, 2, 3]] - input_shape = K.cast(K.shape(yolo_outputs[0])[ - 1:3] * 32, K.dtype(y_true[0])) - grid_shapes = [K.cast(K.shape(yolo_outputs[l])[1:3], - K.dtype(y_true[0])) for l in range(num_layers)] - loss = 0 - m = K.shape(yolo_outputs[0])[0] # batch size, tensor - mf = K.cast(m, K.dtype(yolo_outputs[0])) - - for l in range(num_layers): - object_mask = y_true[l][..., 4:5] - true_class_probs = y_true[l][..., 5:] - - grid, raw_pred, pred_xy, pred_wh = yolo_head(yolo_outputs[l], - anchors[anchor_mask[l]], num_classes, input_shape, calc_loss=True) - pred_box = K.concatenate([pred_xy, pred_wh]) - - # Darknet raw box to calculate loss. - raw_true_xy = y_true[l][..., :2]*grid_shapes[l][::-1] - grid - raw_true_wh = K.log(y_true[l][..., 2:4] / - anchors[anchor_mask[l]] * input_shape[::-1]) - raw_true_wh = K.switch(object_mask, raw_true_wh, - K.zeros_like(raw_true_wh)) # avoid log(0)=-inf - box_loss_scale = 2 - y_true[l][..., 2:3]*y_true[l][..., 3:4] - - # Find ignore mask, iterate over each of batch. - ignore_mask = tf.TensorArray( - K.dtype(y_true[0]), size=1, dynamic_size=True) - object_mask_bool = K.cast(object_mask, 'bool') - - def loop_body(b, ignore_mask): - true_box = tf.boolean_mask( - y_true[l][b, ..., 0:4], object_mask_bool[b, ..., 0]) - iou = box_iou(pred_box[b], true_box) - best_iou = K.max(iou, axis=-1) - ignore_mask = ignore_mask.write( - b, K.cast(best_iou < ignore_thresh, K.dtype(true_box))) - return b+1, ignore_mask - _, ignore_mask = K.control_flow_ops.while_loop( - lambda b, *args: b < m, loop_body, [0, ignore_mask]) - ignore_mask = ignore_mask.stack() - ignore_mask = K.expand_dims(ignore_mask, -1) - - # K.binary_crossentropy is helpful to avoid exp overflow. - xy_loss = object_mask * box_loss_scale * \ - K.binary_crossentropy( - raw_true_xy, raw_pred[..., 0:2], from_logits=True) - wh_loss = object_mask * box_loss_scale * 0.5 * \ - K.square(raw_true_wh-raw_pred[..., 2:4]) - confidence_loss = object_mask * K.binary_crossentropy(object_mask, raw_pred[..., 4:5], from_logits=True) + \ - (1-object_mask) * K.binary_crossentropy(object_mask, - raw_pred[..., 4:5], from_logits=True) * ignore_mask - class_loss = object_mask * \ - K.binary_crossentropy( - true_class_probs, raw_pred[..., 5:], from_logits=True) - - xy_loss = K.sum(xy_loss) / mf - wh_loss = K.sum(wh_loss) / mf - confidence_loss = K.sum(confidence_loss) / mf - class_loss = K.sum(class_loss) / mf - loss += xy_loss + wh_loss + confidence_loss + class_loss - if print_loss: - loss = tf.Print(loss, [loss, xy_loss, wh_loss, confidence_loss, class_loss, K.sum( - ignore_mask)], message='loss: ') - return loss -import setuptools -from setuptools.extension import Extension -from distutils.command.build_ext import build_ext as DistUtilsBuildExt - - -class BuildExtension(setuptools.Command): - description = DistUtilsBuildExt.description - user_options = DistUtilsBuildExt.user_options - boolean_options = DistUtilsBuildExt.boolean_options - help_options = DistUtilsBuildExt.help_options - - def __init__(self, *args, **kwargs): - from setuptools.command.build_ext import build_ext as SetupToolsBuildExt - - # Bypass __setatrr__ to avoid infinite recursion. - self.__dict__['_command'] = SetupToolsBuildExt(*args, **kwargs) - - def __getattr__(self, name): - return getattr(self._command, name) - - def __setattr__(self, name, value): - setattr(self._command, name, value) - - def initialize_options(self, *args, **kwargs): - return self._command.initialize_options(*args, **kwargs) - - def finalize_options(self, *args, **kwargs): - ret = self._command.finalize_options(*args, **kwargs) - import numpy - self.include_dirs.append(numpy.get_include()) - return ret - - def run(self, *args, **kwargs): - return self._command.run(*args, **kwargs) - - -extensions = [ - Extension( - 'keras_retinanet.utils.compute_overlap', - ['keras_retinanet/utils/compute_overlap.pyx'] - ), -] - - -setuptools.setup( - name='keras-retinanet', - version='0.5.0', - description='Keras implementation of RetinaNet object detection.', - url='https://github.com/fizyr/keras-retinanet', - author='Hans Gaiser', - author_email='h.gaiser@fizyr.com', - maintainer='Hans Gaiser', - maintainer_email='h.gaiser@fizyr.com', - cmdclass={'build_ext': BuildExtension}, - packages=setuptools.find_packages(), - install_requires=['keras', 'keras-resnet', 'six', 'scipy', - 'cython', 'Pillow', 'opencv-python', 'progressbar2'], - entry_points={ - 'console_scripts': [ - 'retinanet-train=keras_retinanet.bin.train:main', - 'retinanet-evaluate=keras_retinanet.bin.evaluate:main', - 'retinanet-debug=keras_retinanet.bin.debug:main', - 'retinanet-convert-model=keras_retinanet.bin.convert_model:main', - ], - }, - ext_modules=extensions, - setup_requires=["cython>=0.28", "numpy>=1.14.0"] -) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras - -import numpy as np -import math - - -class PriorProbability(keras.initializers.Initializer): - """ Apply a prior probability to the weights. - """ - - def __init__(self, probability=0.01): - self.probability = probability - - def get_config(self): - return { - 'probability': self.probability - } - - def __call__(self, shape, dtype=None): - # set bias to -log((1 - p)/p) for foreground - result = np.ones(shape, dtype=dtype) * - \ - math.log((1 - self.probability) / self.probability) - - return result -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -from . import backend - - -def focal(alpha=0.25, gamma=2.0): - """ Create a functor for computing the focal loss. - - Args - alpha: Scale the focal weight with alpha. - gamma: Take the power of the focal weight with gamma. - - Returns - A functor that computes the focal loss using the alpha and gamma. - """ - def _focal(y_true, y_pred): - """ Compute the focal loss given the target tensor and the predicted tensor. - - As defined in https://arxiv.org/abs/1708.02002 - - Args - y_true: Tensor of target data from the generator with shape (B, N, num_classes). - y_pred: Tensor of predicted data from the network with shape (B, N, num_classes). - - Returns - The focal loss of y_pred w.r.t. y_true. - """ - labels = y_true[:, :, :-1] - # -1 for ignore, 0 for background, 1 for object - anchor_state = y_true[:, :, -1] - classification = y_pred - - # filter out "ignore" anchors - indices = backend.where(keras.backend.not_equal(anchor_state, -1)) - labels = backend.gather_nd(labels, indices) - classification = backend.gather_nd(classification, indices) - - # compute the focal loss - alpha_factor = keras.backend.ones_like(labels) * alpha - alpha_factor = backend.where(keras.backend.equal( - labels, 1), alpha_factor, 1 - alpha_factor) - focal_weight = backend.where(keras.backend.equal( - labels, 1), 1 - classification, classification) - focal_weight = alpha_factor * focal_weight ** gamma - - cls_loss = focal_weight * \ - keras.backend.binary_crossentropy(labels, classification) - - # compute the normalizer: the number of positive anchors - normalizer = backend.where(keras.backend.equal(anchor_state, 1)) - normalizer = keras.backend.cast(keras.backend.shape(normalizer)[ - 0], keras.backend.floatx()) - normalizer = keras.backend.maximum( - keras.backend.cast_to_floatx(1.0), normalizer) - - return keras.backend.sum(cls_loss) / normalizer - - return _focal - - -def smooth_l1(sigma=3.0): - """ Create a smooth L1 loss functor. - - Args - sigma: This argument defines the point where the loss changes from L2 to L1. - - Returns - A functor for computing the smooth L1 loss given target data and predicted data. - """ - sigma_squared = sigma ** 2 - - def _smooth_l1(y_true, y_pred): - """ Compute the smooth L1 loss of y_pred w.r.t. y_true. - - Args - y_true: Tensor from the generator of shape (B, N, 5). The last value for each box is the state of the anchor (ignore, negative, positive). - y_pred: Tensor from the network of shape (B, N, 4). - - Returns - The smooth L1 loss of y_pred w.r.t. y_true. - """ - # separate target and state - regression = y_pred - regression_target = y_true[:, :, :-1] - anchor_state = y_true[:, :, -1] - - # filter out "ignore" anchors - indices = backend.where(keras.backend.equal(anchor_state, 1)) - regression = backend.gather_nd(regression, indices) - regression_target = backend.gather_nd(regression_target, indices) - - # compute smooth L1 loss - # f(x) = 0.5 * (sigma * x)^2 if |x| < 1 / sigma / sigma - # |x| - 0.5 / sigma / sigma otherwise - regression_diff = regression - regression_target - regression_diff = keras.backend.abs(regression_diff) - regression_loss = backend.where( - keras.backend.less(regression_diff, 1.0 / sigma_squared), - 0.5 * sigma_squared * keras.backend.pow(regression_diff, 2), - regression_diff - 0.5 / sigma_squared - ) - - # compute the normalizer: the number of positive anchors - normalizer = keras.backend.maximum(1, keras.backend.shape(indices)[0]) - normalizer = keras.backend.cast( - normalizer, dtype=keras.backend.floatx()) - return keras.backend.sum(regression_loss) / normalizer - - return _smooth_l1 -import keras_retinanet.losses -import keras - -import numpy as np - -import pytest - - -def test_smooth_l1(): - regression = np.array([ - [ - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0], - [0, 0, 0, 0], - ] - ], dtype=keras.backend.floatx()) - regression = keras.backend.variable(regression) - - regression_target = np.array([ - [ - [0, 0, 0, 1, 1], - [0, 0, 1, 0, 1], - [0, 0, 0.05, 0, 1], - [0, 0, 1, 0, 0], - ] - ], dtype=keras.backend.floatx()) - regression_target = keras.backend.variable(regression_target) - - loss = keras_retinanet.losses.smooth_l1()(regression_target, regression) - loss = keras.backend.eval(loss) - - assert loss == pytest.approx( - (((1 - 0.5 / 9) * 2 + (0.5 * 9 * 0.05 ** 2)) / 3)) -from .dynamic import * # noqa: F401,F403 -from .common import * # noqa: F401,F403 -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras.backend -from .dynamic import meshgrid - - -def bbox_transform_inv(boxes, deltas, mean=None, std=None): - """ Applies deltas (usually regression results) to boxes (usually anchors). - - Before applying the deltas to the boxes, the normalization that was previously applied (in the generator) has to be removed. - The mean and std are the mean and std as applied in the generator. They are unnormalized in this function and then applied to the boxes. - - Args - boxes : np.array of shape (B, N, 4), where B is the batch size, N the number of boxes and 4 values for (x1, y1, x2, y2). - deltas: np.array of same shape as boxes. These deltas (d_x1, d_y1, d_x2, d_y2) are a factor of the width/height. - mean : The mean value used when computing deltas (defaults to [0, 0, 0, 0]). - std : The standard deviation used when computing deltas (defaults to [0.2, 0.2, 0.2, 0.2]). - - Returns - A np.array of the same shape as boxes, but with deltas applied to each box. - The mean and std are used during training to normalize the regression values (networks love normalization). - """ - if mean is None: - mean = [0, 0, 0, 0] - if std is None: - std = [0.2, 0.2, 0.2, 0.2] - - width = boxes[:, :, 2] - boxes[:, :, 0] - height = boxes[:, :, 3] - boxes[:, :, 1] - - x1 = boxes[:, :, 0] + (deltas[:, :, 0] * std[0] + mean[0]) * width - y1 = boxes[:, :, 1] + (deltas[:, :, 1] * std[1] + mean[1]) * height - x2 = boxes[:, :, 2] + (deltas[:, :, 2] * std[2] + mean[2]) * width - y2 = boxes[:, :, 3] + (deltas[:, :, 3] * std[3] + mean[3]) * height - - pred_boxes = keras.backend.stack([x1, y1, x2, y2], axis=2) - - return pred_boxes - - -def shift(shape, stride, anchors): - """ Produce shifted anchors based on shape of the map and stride size. - - Args - shape : Shape to shift the anchors over. - stride : Stride to shift the anchors with over the shape. - anchors: The anchors to apply at each location. - """ - shift_x = (keras.backend.arange(0, shape[1], dtype=keras.backend.floatx( - )) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride - shift_y = (keras.backend.arange(0, shape[0], dtype=keras.backend.floatx( - )) + keras.backend.constant(0.5, dtype=keras.backend.floatx())) * stride - - shift_x, shift_y = meshgrid(shift_x, shift_y) - shift_x = keras.backend.reshape(shift_x, [-1]) - shift_y = keras.backend.reshape(shift_y, [-1]) - - shifts = keras.backend.stack([ - shift_x, - shift_y, - shift_x, - shift_y - ], axis=0) - - shifts = keras.backend.transpose(shifts) - number_of_anchors = keras.backend.shape(anchors)[0] - - # number of base points = feat_h * feat_w - k = keras.backend.shape(shifts)[0] - - shifted_anchors = keras.backend.reshape(anchors, [1, number_of_anchors, 4]) + keras.backend.cast( - keras.backend.reshape(shifts, [k, 1, 4]), keras.backend.floatx()) - shifted_anchors = keras.backend.reshape( - shifted_anchors, [k * number_of_anchors, 4]) - - return shifted_anchors -import os - -_BACKEND = "tensorflow" - -if "KERAS_BACKEND" in os.environ: - _backend = os.environ["KERAS_BACKEND"] - - backends = { - "cntk", - "tensorflow", - "theano" - } - - assert _backend in backends - - _BACKEND = _backend - -if _BACKEND == "cntk": - from .cntk_backend import * # noqa: F401,F403 -elif _BACKEND == "theano": - from .theano_backend import * # noqa: F401,F403 -elif _BACKEND == "tensorflow": - from .tensorflow_backend import * # noqa: F401,F403 -else: - raise ValueError("Unknown backend: " + str(_BACKEND)) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import tensorflow - - -def ones(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/ones . - """ - return tensorflow.ones(*args, **kwargs) - - -def transpose(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/transpose . - """ - return tensorflow.transpose(*args, **kwargs) - - -def map_fn(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/map_fn . - """ - return tensorflow.map_fn(*args, **kwargs) - - -def pad(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/pad . - """ - return tensorflow.pad(*args, **kwargs) - - -def top_k(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/nn/top_k . - """ - return tensorflow.nn.top_k(*args, **kwargs) - - -def clip_by_value(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/clip_by_value . - """ - return tensorflow.clip_by_value(*args, **kwargs) - - -def resize_images(images, size, method='bilinear', align_corners=False): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/image/resize_images . - - Args - method: The method used for interpolation. One of ('bilinear', 'nearest', 'bicubic', 'area'). - """ - methods = { - 'bilinear': tensorflow.image.ResizeMethod.BILINEAR, - 'nearest': tensorflow.image.ResizeMethod.NEAREST_NEIGHBOR, - 'bicubic': tensorflow.image.ResizeMethod.BICUBIC, - 'area': tensorflow.image.ResizeMethod.AREA, - } - return tensorflow.image.resize_images(images, size, methods[method], align_corners) - - -def non_max_suppression(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/image/non_max_suppression . - """ - return tensorflow.image.non_max_suppression(*args, **kwargs) - - -def range(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/range . - """ - return tensorflow.range(*args, **kwargs) - - -def scatter_nd(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/scatter_nd . - """ - return tensorflow.scatter_nd(*args, **kwargs) - - -def gather_nd(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/gather_nd . - """ - return tensorflow.gather_nd(*args, **kwargs) - - -def meshgrid(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/meshgrid . - """ - return tensorflow.meshgrid(*args, **kwargs) - - -def where(*args, **kwargs): - """ See https://www.tensorflow.org/versions/master/api_docs/python/tf/where . - """ - return tensorflow.where(*args, **kwargs) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" -#!/usr/bin/env python - -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from ..utils.config import read_config_file, parse_anchor_parameters -from .. import models -import argparse -import os -import sys - -import keras -import tensorflow as tf - -# Allow relative imports when being executed as script. -if __name__ == "__main__" and __package__ is None: - sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) - import keras_retinanet.bin # noqa: F401 - __package__ = "keras_retinanet.bin" - -# Change these to absolute imports if you copy this script outside the keras_retinanet package. - - -def get_session(): - """ Construct a modified tf session. - """ - config = tf.ConfigProto() - os.environ["CUDA_VISIBLE_DEVICES"] = "" - return tf.Session(config=config) - - -def parse_args(args): - parser = argparse.ArgumentParser( - description='Script for converting a training model to an inference model.') - - parser.add_argument('model_in', help='The model to convert.') - parser.add_argument( - 'model_out', help='Path to save the converted model to.') - parser.add_argument( - '--backbone', help='The backbone of the model to convert.', default='resnet50') - parser.add_argument( - '--no-nms', help='Disables non maximum suppression.', dest='nms', action='store_false') - parser.add_argument('--no-class-specific-filter', help='Disables class specific filtering.', - dest='class_specific_filter', action='store_false') - parser.add_argument( - '--config', help='Path to a configuration parameters .ini file.') - - return parser.parse_args(args) - - -def main(args=None): - # parse arguments - if args is None: - args = sys.argv[1:] - args = parse_args(args) - - # Set modified tf session to avoid using the GPUs - keras.backend.tensorflow_backend.set_session(get_session()) - - # optionally load config parameters - anchor_parameters = None - if args.config: - args.config = read_config_file(args.config) - if 'anchor_parameters' in args.config: - anchor_parameters = parse_anchor_parameters(args.config) - - # load the model - model = models.load_model(args.model_in, backbone_name=args.backbone) - - # check if this is indeed a training model - models.check_training_model(model) - - # convert the model - model = models.convert_model( - model, nms=args.nms, class_specific_filter=args.class_specific_filter, anchor_params=anchor_parameters) - - # save model - model.save(args.model_out) - - -if __name__ == '__main__': - main() -#!/usr/bin/env python - -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from ..utils.config import read_config_file, parse_anchor_parameters -from ..utils.anchors import anchors_for_shape, compute_gt_annotations -from ..utils.visualization import draw_annotations, draw_boxes -from ..utils.transform import random_transform_generator -from ..utils.keras_version import check_keras_version -from ..preprocessing.open_images import OpenImagesGenerator -from ..preprocessing.kitti import KittiGenerator -from ..preprocessing.csv_generator import CSVGenerator -from ..preprocessing.pascal_voc import PascalVocGenerator -import argparse -import os -import sys -import cv2 - -# Allow relative imports when being executed as script. -if __name__ == "__main__" and __package__ is None: - sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) - import keras_retinanet.bin # noqa: F401 - __package__ = "keras_retinanet.bin" - -# Change these to absolute imports if you copy this script outside the keras_retinanet package. - - -def create_generator(args): - """ Create the data generators. - - Args: - args: parseargs arguments object. - """ - # create random transform generator for augmenting training data - transform_generator = random_transform_generator( - min_rotation=-0.1, - max_rotation=0.1, - min_translation=(-0.1, -0.1), - max_translation=(0.1, 0.1), - min_shear=-0.1, - max_shear=0.1, - min_scaling=(0.9, 0.9), - max_scaling=(1.1, 1.1), - flip_x_chance=0.5, - flip_y_chance=0.5, - ) - - if args.dataset_type == 'coco': - # import here to prevent unnecessary dependency on cocoapi - from ..preprocessing.coco import CocoGenerator - - generator = CocoGenerator( - args.coco_path, - args.coco_set, - transform_generator=transform_generator, - image_min_side=args.image_min_side, - image_max_side=args.image_max_side, - config=args.config - ) - elif args.dataset_type == 'pascal': - generator = PascalVocGenerator( - args.pascal_path, - args.pascal_set, - transform_generator=transform_generator, - image_min_side=args.image_min_side, - image_max_side=args.image_max_side, - config=args.config - ) - elif args.dataset_type == 'csv': - generator = CSVGenerator( - args.annotations, - args.classes, - transform_generator=transform_generator, - image_min_side=args.image_min_side, - image_max_side=args.image_max_side, - config=args.config - ) - elif args.dataset_type == 'oid': - generator = OpenImagesGenerator( - args.main_dir, - subset=args.subset, - version=args.version, - labels_filter=args.labels_filter, - parent_label=args.parent_label, - annotation_cache_dir=args.annotation_cache_dir, - transform_generator=transform_generator, - image_min_side=args.image_min_side, - image_max_side=args.image_max_side, - config=args.config - ) - elif args.dataset_type == 'kitti': - generator = KittiGenerator( - args.kitti_path, - subset=args.subset, - transform_generator=transform_generator, - image_min_side=args.image_min_side, - image_max_side=args.image_max_side, - config=args.config - ) - else: - raise ValueError( - 'Invalid data type received: {}'.format(args.dataset_type)) - - return generator - - -def parse_args(args): - """ Parse the arguments. - """ - parser = argparse.ArgumentParser( - description='Debug script for a RetinaNet network.') - subparsers = parser.add_subparsers( - help='Arguments for specific dataset types.', dest='dataset_type') - subparsers.required = True - - coco_parser = subparsers.add_parser('coco') - coco_parser.add_argument( - 'coco_path', help='Path to dataset directory (ie. /tmp/COCO).') - coco_parser.add_argument( - '--coco-set', help='Name of the set to show (defaults to val2017).', default='val2017') - - pascal_parser = subparsers.add_parser('pascal') - pascal_parser.add_argument( - 'pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).') - pascal_parser.add_argument( - '--pascal-set', help='Name of the set to show (defaults to test).', default='test') - - kitti_parser = subparsers.add_parser('kitti') - kitti_parser.add_argument( - 'kitti_path', help='Path to dataset directory (ie. /tmp/kitti).') - kitti_parser.add_argument( - 'subset', help='Argument for loading a subset from train/val.') - - def csv_list(string): - return string.split(',') - - oid_parser = subparsers.add_parser('oid') - oid_parser.add_argument('main_dir', help='Path to dataset directory.') - oid_parser.add_argument( - 'subset', help='Argument for loading a subset from train/validation/test.') - oid_parser.add_argument( - '--version', help='The current dataset version is v4.', default='v4') - oid_parser.add_argument( - '--labels-filter', help='A list of labels to filter.', type=csv_list, default=None) - oid_parser.add_argument('--annotation-cache-dir', - help='Path to store annotation cache.', default='.') - oid_parser.add_argument( - '--parent-label', help='Use the hierarchy children of this label.', default=None) - - csv_parser = subparsers.add_parser('csv') - csv_parser.add_argument( - 'annotations', help='Path to CSV file containing annotations for evaluation.') - csv_parser.add_argument( - 'classes', help='Path to a CSV file containing class label mapping.') - - parser.add_argument( - '-l', '--loop', help='Loop forever, even if the dataset is exhausted.', action='store_true') - parser.add_argument('--no-resize', help='Disable image resizing.', - dest='resize', action='store_false') - parser.add_argument( - '--anchors', help='Show positive anchors on the image.', action='store_true') - parser.add_argument( - '--annotations', help='Show annotations on the image. Green annotations have anchors, red annotations don\'t and therefore don\'t contribute to training.', action='store_true') - parser.add_argument( - '--random-transform', help='Randomly transform image and annotations.', action='store_true') - parser.add_argument( - '--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800) - parser.add_argument( - '--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333) - parser.add_argument( - '--config', help='Path to a configuration parameters .ini file.') - - return parser.parse_args(args) - - -def run(generator, args, anchor_params): - """ Main loop. - - Args - generator: The generator to debug. - args: parseargs args object. - """ - # display images, one at a time - for i in range(generator.size()): - # load the data - image = generator.load_image(i) - annotations = generator.load_annotations(i) - if len(annotations['labels']) > 0: - # apply random transformations - if args.random_transform: - image, annotations = generator.random_transform_group_entry( - image, annotations) - - # resize the image and annotations - if args.resize: - image, image_scale = generator.resize_image(image) - annotations['bboxes'] *= image_scale - - anchors = anchors_for_shape( - image.shape, anchor_params=anchor_params) - positive_indices, _, max_indices = compute_gt_annotations( - anchors, annotations['bboxes']) - - # draw anchors on the image - if args.anchors: - draw_boxes( - image, anchors[positive_indices], (255, 255, 0), thickness=1) - - # draw annotations on the image - if args.annotations: - # draw annotations in red - draw_annotations(image, annotations, color=( - 0, 0, 255), label_to_name=generator.label_to_name) - - # draw regressed anchors in green to override most red annotations - # result is that annotations without anchors are red, with anchors are green - draw_boxes( - image, annotations['bboxes'][max_indices[positive_indices], :], (0, 255, 0)) - - cv2.imshow('Image', image) - if cv2.waitKey() == ord('q'): - return False - return True - - -def main(args=None): - # parse arguments - if args is None: - args = sys.argv[1:] - args = parse_args(args) - - # make sure keras is the minimum required version - check_keras_version() - - # create the generator - generator = create_generator(args) - - # optionally load config parameters - if args.config: - args.config = read_config_file(args.config) - - # optionally load anchor parameters - anchor_params = None - if args.config and 'anchor_parameters' in args.config: - anchor_params = parse_anchor_parameters(args.config) - - # create the display window - cv2.namedWindow('Image', cv2.WINDOW_NORMAL) - - if args.loop: - while run(generator, args, anchor_params=anchor_params): - pass - else: - run(generator, args, anchor_params=anchor_params) - - -if __name__ == '__main__': - main() -#!/usr/bin/env python - -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - http://www.apache.org/licenses/LICENSE-2.0 -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from ..utils.keras_version import check_keras_version -from ..utils.eval import evaluate -from ..utils.config import read_config_file, parse_anchor_parameters -from ..preprocessing.pascal_voc import PascalVocGenerator -from ..preprocessing.csv_generator import CSVGenerator -from .. import models -import argparse -import os -import sys - -import keras -import tensorflow as tf - -# Allow relative imports when being executed as script. -if __name__ == "__main__" and __package__ is None: - sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) - import keras_retinanet.bin # noqa: F401 - __package__ = "keras_retinanet.bin" - -# Change these to absolute imports if you copy this script outside the keras_retinanet package. - - -def get_session(): - """ Construct a modified tf session. - """ - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - return tf.Session(config=config) - - -def create_generator(args): - """ Create generators for evaluation. - """ - if args.dataset_type == 'coco': - # import here to prevent unnecessary dependency on cocoapi - from ..preprocessing.coco import CocoGenerator - - validation_generator = CocoGenerator( - args.coco_path, - 'val2017', - image_min_side=args.image_min_side, - image_max_side=args.image_max_side, - config=args.config - ) - elif args.dataset_type == 'pascal': - validation_generator = PascalVocGenerator( - args.pascal_path, - 'test', - image_min_side=args.image_min_side, - image_max_side=args.image_max_side, - config=args.config - ) - elif args.dataset_type == 'csv': - validation_generator = CSVGenerator( - args.annotations, - args.classes, - image_min_side=args.image_min_side, - image_max_side=args.image_max_side, - config=args.config - ) - else: - raise ValueError( - 'Invalid data type received: {}'.format(args.dataset_type)) - - return validation_generator - - -def parse_args(args): - """ Parse the arguments. - """ - parser = argparse.ArgumentParser( - description='Evaluation script for a RetinaNet network.') - subparsers = parser.add_subparsers( - help='Arguments for specific dataset types.', dest='dataset_type') - subparsers.required = True - - coco_parser = subparsers.add_parser('coco') - coco_parser.add_argument( - 'coco_path', help='Path to dataset directory (ie. /tmp/COCO).') - - pascal_parser = subparsers.add_parser('pascal') - pascal_parser.add_argument( - 'pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).') - - csv_parser = subparsers.add_parser('csv') - csv_parser.add_argument( - 'annotations', help='Path to CSV file containing annotations for evaluation.') - csv_parser.add_argument( - 'classes', help='Path to a CSV file containing class label mapping.') - - parser.add_argument('model', help='Path to RetinaNet model.') - parser.add_argument( - '--convert-model', help='Convert the model to an inference model (ie. the input is a training model).', action='store_true') - parser.add_argument( - '--backbone', help='The backbone of the model.', default='resnet50') - parser.add_argument( - '--gpu', help='Id of the GPU to use (as reported by nvidia-smi).') - parser.add_argument( - '--score-threshold', help='Threshold on score to filter detections with (defaults to 0.05).', default=0.05, type=float) - parser.add_argument( - '--iou-threshold', help='IoU Threshold to count for a positive detection (defaults to 0.5).', default=0.5, type=float) - parser.add_argument( - '--max-detections', help='Max Detections per image (defaults to 100).', default=100, type=int) - parser.add_argument( - '--save-path', help='Path for saving images with detections (doesn\'t work for COCO).') - parser.add_argument( - '--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800) - parser.add_argument( - '--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333) - parser.add_argument( - '--config', help='Path to a configuration parameters .ini file (only used with --convert-model).') - - return parser.parse_args(args) - - -def main(args=None): - # parse arguments - if args is None: - args = sys.argv[1:] - args = parse_args(args) - - # make sure keras is the minimum required version - check_keras_version() - - # optionally choose specific GPU - if args.gpu: - os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu - keras.backend.tensorflow_backend.set_session(get_session()) - - # make save path if it doesn't exist - if args.save_path is not None and not os.path.exists(args.save_path): - os.makedirs(args.save_path) - - # optionally load config parameters - if args.config: - args.config = read_config_file(args.config) - - # create the generator - generator = create_generator(args) - - # optionally load anchor parameters - anchor_params = None - if args.config and 'anchor_parameters' in args.config: - anchor_params = parse_anchor_parameters(args.config) - - # load the model - print('Loading model, this may take a second...') - model = models.load_model(args.model, backbone_name=args.backbone) - - # optionally convert the model - if args.convert_model: - model = models.convert_model(model, anchor_params=anchor_params) - - # print model summary - # print(model.summary()) - - # start evaluation - if args.dataset_type == 'coco': - from ..utils.coco_eval import evaluate_coco - evaluate_coco(generator, model, args.score_threshold) - else: - average_precisions = evaluate( - generator, - model, - iou_threshold=args.iou_threshold, - score_threshold=args.score_threshold, - max_detections=args.max_detections, - save_path=args.save_path - ) - - # print evaluation - total_instances = [] - precisions = [] - for label, (average_precision, num_annotations) in average_precisions.items(): - print('{:.0f} instances of class'.format(num_annotations), - generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision)) - total_instances.append(num_annotations) - precisions.append(average_precision) - - if sum(total_instances) == 0: - print('No test instances found.') - return - - print('mAP using the weighted average of precisions among classes: {:.4f}'.format( - sum([a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances))) - print('mAP: {:.4f}'.format(sum(precisions) / - sum(x > 0 for x in total_instances))) - - -if __name__ == '__main__': - main() -#!/usr/bin/env python - -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from ..utils.transform import random_transform_generator -from ..utils.model import freeze as freeze_model -from ..utils.keras_version import check_keras_version -from ..utils.config import read_config_file, parse_anchor_parameters -from ..utils.anchors import make_shapes_callback -from ..preprocessing.pascal_voc import PascalVocGenerator -from ..preprocessing.open_images import OpenImagesGenerator -from ..preprocessing.kitti import KittiGenerator -from ..preprocessing.csv_generator import CSVGenerator -from ..models.retinanet import retinanet_bbox -from ..callbacks.eval import Evaluate -from ..callbacks import RedirectModel -from .. import models -from .. import losses -import argparse -import os -import sys -import warnings - -import keras -import keras.preprocessing.image -import tensorflow as tf - -# Allow relative imports when being executed as script. -if __name__ == "__main__" and __package__ is None: - sys.path.insert(0, os.path.join(os.path.dirname(__file__), '..', '..')) - import keras_retinanet.bin # noqa: F401 - __package__ = "keras_retinanet.bin" - -# Change these to absolute imports if you copy this script outside the keras_retinanet package. -from .. import layers # noqa: F401 - - -def makedirs(path): - # Intended behavior: try to create the directory, - # pass if the directory exists already, fails otherwise. - # Meant for Python 2.7/3.n compatibility. - try: - os.makedirs(path) - except OSError: - if not os.path.isdir(path): - raise - - -def get_session(): - """ Construct a modified tf session. - """ - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - return tf.Session(config=config) - - -def model_with_weights(model, weights, skip_mismatch): - """ Load weights for model. - - Args - model : The model to load weights for. - weights : The weights to load. - skip_mismatch : If True, skips layers whose shape of weights doesn't match with the model. - """ - if weights is not None: - model.load_weights(weights, by_name=True, skip_mismatch=skip_mismatch) - return model - - -def create_models(backbone_retinanet, num_classes, weights, multi_gpu=0, - freeze_backbone=False, lr=1e-5, config=None): - """ Creates three models (model, training_model, prediction_model). - - Args - backbone_retinanet : A function to call to create a retinanet model with a given backbone. - num_classes : The number of classes to train. - weights : The weights to load into the model. - multi_gpu : The number of GPUs to use for training. - freeze_backbone : If True, disables learning for the backbone. - config : Config parameters, None indicates the default configuration. - - Returns - model : The base model. This is also the model that is saved in snapshots. - training_model : The training model. If multi_gpu=0, this is identical to model. - prediction_model : The model wrapped with utility functions to perform object detection (applies regression values and performs NMS). - """ - - modifier = freeze_model if freeze_backbone else None - - # load anchor parameters, or pass None (so that defaults will be used) - anchor_params = None - num_anchors = None - if config and 'anchor_parameters' in config: - anchor_params = parse_anchor_parameters(config) - num_anchors = anchor_params.num_anchors() - - # Keras recommends initialising a multi-gpu model on the CPU to ease weight sharing, and to prevent OOM errors. - # optionally wrap in a parallel model - if multi_gpu > 1: - from keras.utils import multi_gpu_model - with tf.device('/cpu:0'): - model = model_with_weights(backbone_retinanet( - num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True) - training_model = multi_gpu_model(model, gpus=multi_gpu) - else: - model = model_with_weights(backbone_retinanet( - num_classes, num_anchors=num_anchors, modifier=modifier), weights=weights, skip_mismatch=True) - training_model = model - - # make prediction model - prediction_model = retinanet_bbox(model=model, anchor_params=anchor_params) - - # compile model - training_model.compile( - loss={ - 'regression': losses.smooth_l1(), - 'classification': losses.focal() - }, - optimizer=keras.optimizers.adam(lr=lr, clipnorm=0.001) - ) - - return model, training_model, prediction_model - - -def create_callbacks(model, training_model, prediction_model, validation_generator, args): - """ Creates the callbacks to use during training. - - Args - model: The base model. - training_model: The model that is used for training. - prediction_model: The model that should be used for validation. - validation_generator: The generator for creating validation data. - args: parseargs args object. - - Returns: - A list of callbacks used for training. - """ - callbacks = [] - - tensorboard_callback = None - - if args.tensorboard_dir: - tensorboard_callback = keras.callbacks.TensorBoard( - log_dir=args.tensorboard_dir, - histogram_freq=0, - batch_size=args.batch_size, - write_graph=True, - write_grads=False, - write_images=False, - embeddings_freq=0, - embeddings_layer_names=None, - embeddings_metadata=None - ) - callbacks.append(tensorboard_callback) - - if args.evaluation and validation_generator: - if args.dataset_type == 'coco': - from ..callbacks.coco import CocoEval - - # use prediction model for evaluation - evaluation = CocoEval(validation_generator, - tensorboard=tensorboard_callback) - else: - evaluation = Evaluate( - validation_generator, tensorboard=tensorboard_callback, weighted_average=args.weighted_average) - evaluation = RedirectModel(evaluation, prediction_model) - callbacks.append(evaluation) - - # save the model - if args.snapshots: - # ensure directory created first; otherwise h5py will error after epoch. - makedirs(args.snapshot_path) - checkpoint = keras.callbacks.ModelCheckpoint( - os.path.join( - args.snapshot_path, - '{backbone}_{dataset_type}_{{epoch:02d}}.h5'.format( - backbone=args.backbone, dataset_type=args.dataset_type) - ), - verbose=1, - # save_best_only=True, - # monitor="mAP", - # mode='max' - ) - checkpoint = RedirectModel(checkpoint, model) - callbacks.append(checkpoint) - - callbacks.append(keras.callbacks.ReduceLROnPlateau( - monitor='loss', - factor=0.1, - patience=2, - verbose=1, - mode='auto', - min_delta=0.0001, - cooldown=0, - min_lr=0 - )) - - return callbacks - - -def create_generators(args, preprocess_image): - """ Create generators for training and validation. - - Args - args : parseargs object containing configuration for generators. - preprocess_image : Function that preprocesses an image for the network. - """ - common_args = { - 'batch_size': args.batch_size, - 'config': args.config, - 'image_min_side': args.image_min_side, - 'image_max_side': args.image_max_side, - 'preprocess_image': preprocess_image, - } - - # create random transform generator for augmenting training data - if args.random_transform: - transform_generator = random_transform_generator( - min_rotation=-0.1, - max_rotation=0.1, - min_translation=(-0.1, -0.1), - max_translation=(0.1, 0.1), - min_shear=-0.1, - max_shear=0.1, - min_scaling=(0.9, 0.9), - max_scaling=(1.1, 1.1), - flip_x_chance=0.5, - flip_y_chance=0.5, - ) - else: - transform_generator = random_transform_generator(flip_x_chance=0.5) - - if args.dataset_type == 'coco': - # import here to prevent unnecessary dependency on cocoapi - from ..preprocessing.coco import CocoGenerator - - train_generator = CocoGenerator( - args.coco_path, - 'train2017', - transform_generator=transform_generator, - **common_args - ) - - validation_generator = CocoGenerator( - args.coco_path, - 'val2017', - **common_args - ) - elif args.dataset_type == 'pascal': - train_generator = PascalVocGenerator( - args.pascal_path, - 'trainval', - transform_generator=transform_generator, - **common_args - ) - - validation_generator = PascalVocGenerator( - args.pascal_path, - 'test', - **common_args - ) - elif args.dataset_type == 'csv': - train_generator = CSVGenerator( - args.annotations, - args.classes, - transform_generator=transform_generator, - **common_args - ) - - if args.val_annotations: - validation_generator = CSVGenerator( - args.val_annotations, - args.classes, - **common_args - ) - else: - validation_generator = None - elif args.dataset_type == 'oid': - train_generator = OpenImagesGenerator( - args.main_dir, - subset='train', - version=args.version, - labels_filter=args.labels_filter, - annotation_cache_dir=args.annotation_cache_dir, - parent_label=args.parent_label, - transform_generator=transform_generator, - **common_args - ) - - validation_generator = OpenImagesGenerator( - args.main_dir, - subset='validation', - version=args.version, - labels_filter=args.labels_filter, - annotation_cache_dir=args.annotation_cache_dir, - parent_label=args.parent_label, - **common_args - ) - elif args.dataset_type == 'kitti': - train_generator = KittiGenerator( - args.kitti_path, - subset='train', - transform_generator=transform_generator, - **common_args - ) - - validation_generator = KittiGenerator( - args.kitti_path, - subset='val', - **common_args - ) - else: - raise ValueError( - 'Invalid data type received: {}'.format(args.dataset_type)) - - return train_generator, validation_generator - - -def check_args(parsed_args): - """ Function to check for inherent contradictions within parsed arguments. - For example, batch_size < num_gpus - Intended to raise errors prior to backend initialisation. - - Args - parsed_args: parser.parse_args() - - Returns - parsed_args - """ - - if parsed_args.multi_gpu > 1 and parsed_args.batch_size < parsed_args.multi_gpu: - raise ValueError( - "Batch size ({}) must be equal to or higher than the number of GPUs ({})".format(parsed_args.batch_size, - parsed_args.multi_gpu)) - - if parsed_args.multi_gpu > 1 and parsed_args.snapshot: - raise ValueError( - "Multi GPU training ({}) and resuming from snapshots ({}) is not supported.".format(parsed_args.multi_gpu, - parsed_args.snapshot)) - - if parsed_args.multi_gpu > 1 and not parsed_args.multi_gpu_force: - raise ValueError( - "Multi-GPU support is experimental, use at own risk! Run with --multi-gpu-force if you wish to continue.") - - if 'resnet' not in parsed_args.backbone: - warnings.warn('Using experimental backbone {}. Only resnet50 has been properly tested.'.format( - parsed_args.backbone)) - - return parsed_args - - -def parse_args(args): - """ Parse the arguments. - """ - parser = argparse.ArgumentParser( - description='Simple training script for training a RetinaNet network.') - subparsers = parser.add_subparsers( - help='Arguments for specific dataset types.', dest='dataset_type') - subparsers.required = True - - coco_parser = subparsers.add_parser('coco') - coco_parser.add_argument( - 'coco_path', help='Path to dataset directory (ie. /tmp/COCO).') - - pascal_parser = subparsers.add_parser('pascal') - pascal_parser.add_argument( - 'pascal_path', help='Path to dataset directory (ie. /tmp/VOCdevkit).') - - kitti_parser = subparsers.add_parser('kitti') - kitti_parser.add_argument( - 'kitti_path', help='Path to dataset directory (ie. /tmp/kitti).') - - def csv_list(string): - return string.split(',') - - oid_parser = subparsers.add_parser('oid') - oid_parser.add_argument('main_dir', help='Path to dataset directory.') - oid_parser.add_argument( - '--version', help='The current dataset version is v4.', default='v4') - oid_parser.add_argument( - '--labels-filter', help='A list of labels to filter.', type=csv_list, default=None) - oid_parser.add_argument('--annotation-cache-dir', - help='Path to store annotation cache.', default='.') - oid_parser.add_argument( - '--parent-label', help='Use the hierarchy children of this label.', default=None) - - csv_parser = subparsers.add_parser('csv') - csv_parser.add_argument( - 'annotations', help='Path to CSV file containing annotations for training.') - csv_parser.add_argument( - 'classes', help='Path to a CSV file containing class label mapping.') - csv_parser.add_argument( - '--val-annotations', help='Path to CSV file containing annotations for validation (optional).') - - group = parser.add_mutually_exclusive_group() - group.add_argument('--snapshot', - help='Resume training from a snapshot.') - group.add_argument('--imagenet-weights', help='Initialize the model with pretrained imagenet weights. This is the default behaviour.', - action='store_const', const=True, default=True) - group.add_argument( - '--weights', help='Initialize the model with weights from a file.') - group.add_argument('--no-weights', help='Don\'t initialize the model with any weights.', - dest='imagenet_weights', action='store_const', const=False) - - parser.add_argument( - '--backbone', help='Backbone model used by retinanet.', default='resnet50', type=str) - parser.add_argument('--batch-size', - help='Size of the batches.', default=1, type=int) - parser.add_argument( - '--gpu', help='Id of the GPU to use (as reported by nvidia-smi).') - parser.add_argument( - '--multi-gpu', help='Number of GPUs to use for parallel processing.', type=int, default=0) - parser.add_argument( - '--multi-gpu-force', help='Extra flag needed to enable (experimental) multi-gpu support.', action='store_true') - parser.add_argument( - '--epochs', help='Number of epochs to train.', type=int, default=50) - parser.add_argument( - '--steps', help='Number of steps per epoch.', type=int, default=10000) - parser.add_argument( - '--lr', help='Learning rate.', type=float, default=1e-5) - parser.add_argument( - '--snapshot-path', help='Path to store snapshots of models during training (defaults to \'./snapshots\')', default='./snapshots') - parser.add_argument( - '--tensorboard-dir', help='Log directory for Tensorboard output', default='./logs') - parser.add_argument('--no-snapshots', help='Disable saving snapshots.', - dest='snapshots', action='store_false') - parser.add_argument('--no-evaluation', help='Disable per epoch evaluation.', - dest='evaluation', action='store_false') - parser.add_argument( - '--freeze-backbone', help='Freeze training of backbone layers.', action='store_true') - parser.add_argument( - '--random-transform', help='Randomly transform image and annotations.', action='store_true') - parser.add_argument( - '--image-min-side', help='Rescale the image so the smallest side is min_side.', type=int, default=800) - parser.add_argument( - '--image-max-side', help='Rescale the image if the largest side is larger than max_side.', type=int, default=1333) - parser.add_argument( - '--config', help='Path to a configuration parameters .ini file.') - parser.add_argument( - '--weighted-average', help='Compute the mAP using the weighted average of precisions among classes.', action='store_true') - parser.add_argument('--compute-val-loss', help='Compute validation loss during training', - dest='compute_val_loss', action='store_true') - - # Fit generator arguments - parser.add_argument( - '--workers', help='Number of multiprocessing workers. To disable multiprocessing, set workers to 0', type=int, default=1) - parser.add_argument( - '--max-queue-size', help='Queue length for multiprocessing workers in fit generator.', type=int, default=10) - - return check_args(parser.parse_args(args)) - - -def main(args=None): - # parse arguments - if args is None: - args = sys.argv[1:] - args = parse_args(args) - - # create object that stores backbone information - backbone = models.backbone(args.backbone) - - # make sure keras is the minimum required version - check_keras_version() - - # optionally choose specific GPU - if args.gpu: - os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu - keras.backend.tensorflow_backend.set_session(get_session()) - - # optionally load config parameters - if args.config: - args.config = read_config_file(args.config) - - # create the generators - train_generator, validation_generator = create_generators( - args, backbone.preprocess_image) - - # create the model - if args.snapshot is not None: - print('Loading model, this may take a second...') - model = models.load_model(args.snapshot, backbone_name=args.backbone) - training_model = model - anchor_params = None - if args.config and 'anchor_parameters' in args.config: - anchor_params = parse_anchor_parameters(args.config) - prediction_model = retinanet_bbox( - model=model, anchor_params=anchor_params) - else: - weights = args.weights - # default to imagenet if nothing else is specified - if weights is None and args.imagenet_weights: - weights = backbone.download_imagenet() - - print('Creating model, this may take a second...') - model, training_model, prediction_model = create_models( - backbone_retinanet=backbone.retinanet, - num_classes=train_generator.num_classes(), - weights=weights, - multi_gpu=args.multi_gpu, - freeze_backbone=args.freeze_backbone, - lr=args.lr, - config=args.config - ) - - # print model summary - print(model.summary()) - - # this lets the generator compute backbone layer shapes using the actual backbone model - if 'vgg' in args.backbone or 'densenet' in args.backbone: - train_generator.compute_shapes = make_shapes_callback(model) - if validation_generator: - validation_generator.compute_shapes = train_generator.compute_shapes - - # create the callbacks - callbacks = create_callbacks( - model, - training_model, - prediction_model, - validation_generator, - args, - ) - - # Use multiprocessing if workers > 0 - if args.workers > 0: - use_multiprocessing = True - else: - use_multiprocessing = False - - if not args.compute_val_loss: - validation_generator = None - - # start training - return training_model.fit_generator( - generator=train_generator, - steps_per_epoch=args.steps, - epochs=args.epochs, - verbose=1, - callbacks=callbacks, - workers=args.workers, - use_multiprocessing=use_multiprocessing, - max_queue_size=args.max_queue_size, - validation_data=validation_generator - ) - - -if __name__ == '__main__': - main() -from .common import * # noqa: F401,F403 -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -from ..utils.coco_eval import evaluate_coco - - -class CocoEval(keras.callbacks.Callback): - """ Performs COCO evaluation on each epoch. - """ - - def __init__(self, generator, tensorboard=None, threshold=0.05): - """ CocoEval callback intializer. - - Args - generator : The generator used for creating validation data. - tensorboard : If given, the results will be written to tensorboard. - threshold : The score threshold to use. - """ - self.generator = generator - self.threshold = threshold - self.tensorboard = tensorboard - - super(CocoEval, self).__init__() - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - - coco_tag = ['AP @[ IoU=0.50:0.95 | area= all | maxDets=100 ]', - 'AP @[ IoU=0.50 | area= all | maxDets=100 ]', - 'AP @[ IoU=0.75 | area= all | maxDets=100 ]', - 'AP @[ IoU=0.50:0.95 | area= small | maxDets=100 ]', - 'AP @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]', - 'AP @[ IoU=0.50:0.95 | area= large | maxDets=100 ]', - 'AR @[ IoU=0.50:0.95 | area= all | maxDets= 1 ]', - 'AR @[ IoU=0.50:0.95 | area= all | maxDets= 10 ]', - 'AR @[ IoU=0.50:0.95 | area= all | maxDets=100 ]', - 'AR @[ IoU=0.50:0.95 | area= small | maxDets=100 ]', - 'AR @[ IoU=0.50:0.95 | area=medium | maxDets=100 ]', - 'AR @[ IoU=0.50:0.95 | area= large | maxDets=100 ]'] - coco_eval_stats = evaluate_coco( - self.generator, self.model, self.threshold) - if coco_eval_stats is not None and self.tensorboard is not None and self.tensorboard.writer is not None: - import tensorflow as tf - summary = tf.Summary() - for index, result in enumerate(coco_eval_stats): - summary_value = summary.value.add() - summary_value.simple_value = result - summary_value.tag = '{}. {}'.format(index + 1, coco_tag[index]) - self.tensorboard.writer.add_summary(summary, epoch) - logs[coco_tag[index]] = result -import keras.callbacks - - -class RedirectModel(keras.callbacks.Callback): - """Callback which wraps another callback, but executed on a different model. - - ```python - model = keras.models.load_model('model.h5') - model_checkpoint = ModelCheckpoint(filepath='snapshot.h5') - parallel_model = multi_gpu_model(model, gpus=2) - parallel_model.fit(X_train, Y_train, callbacks=[RedirectModel(model_checkpoint, model)]) - ``` - - Args - callback : callback to wrap. - model : model to use when executing callbacks. - """ - - def __init__(self, - callback, - model): - super(RedirectModel, self).__init__() - - self.callback = callback - self.redirect_model = model - - def on_epoch_begin(self, epoch, logs=None): - self.callback.on_epoch_begin(epoch, logs=logs) - - def on_epoch_end(self, epoch, logs=None): - self.callback.on_epoch_end(epoch, logs=logs) - - def on_batch_begin(self, batch, logs=None): - self.callback.on_batch_begin(batch, logs=logs) - - def on_batch_end(self, batch, logs=None): - self.callback.on_batch_end(batch, logs=logs) - - def on_train_begin(self, logs=None): - # overwrite the model with our custom model - self.callback.set_model(self.redirect_model) - - self.callback.on_train_begin(logs=logs) - - def on_train_end(self, logs=None): - self.callback.on_train_end(logs=logs) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -from ..utils.eval import evaluate - - -class Evaluate(keras.callbacks.Callback): - """ Evaluation callback for arbitrary datasets. - """ - - def __init__( - self, - generator, - iou_threshold=0.5, - score_threshold=0.05, - max_detections=100, - save_path=None, - tensorboard=None, - weighted_average=False, - verbose=1 - ): - """ Evaluate a given dataset using a given model at the end of every epoch during training. - - # Arguments - generator : The generator that represents the dataset to evaluate. - iou_threshold : The threshold used to consider when a detection is positive or negative. - score_threshold : The score confidence threshold to use for detections. - max_detections : The maximum number of detections to use per image. - save_path : The path to save images with visualized detections to. - tensorboard : Instance of keras.callbacks.TensorBoard used to log the mAP value. - weighted_average : Compute the mAP using the weighted average of precisions among classes. - verbose : Set the verbosity level, by default this is set to 1. - """ - self.generator = generator - self.iou_threshold = iou_threshold - self.score_threshold = score_threshold - self.max_detections = max_detections - self.save_path = save_path - self.tensorboard = tensorboard - self.weighted_average = weighted_average - self.verbose = verbose - - super(Evaluate, self).__init__() - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - - # run evaluation - average_precisions = evaluate( - self.generator, - self.model, - iou_threshold=self.iou_threshold, - score_threshold=self.score_threshold, - max_detections=self.max_detections, - save_path=self.save_path - ) - - # compute per class average precision - total_instances = [] - precisions = [] - for label, (average_precision, num_annotations) in average_precisions.items(): - if self.verbose == 1: - print('{:.0f} instances of class'.format(num_annotations), - self.generator.label_to_name(label), 'with average precision: {:.4f}'.format(average_precision)) - total_instances.append(num_annotations) - precisions.append(average_precision) - if self.weighted_average: - self.mean_ap = sum( - [a * b for a, b in zip(total_instances, precisions)]) / sum(total_instances) - else: - self.mean_ap = sum(precisions) / \ - sum(x > 0 for x in total_instances) - - if self.tensorboard is not None and self.tensorboard.writer is not None: - import tensorflow as tf - summary = tf.Summary() - summary_value = summary.value.add() - summary_value.simple_value = self.mean_ap - summary_value.tag = "mAP" - self.tensorboard.writer.add_summary(summary, epoch) - - logs['mAP'] = self.mean_ap - - if self.verbose == 1: - print('mAP: {:.4f}'.format(self.mean_ap)) -from ._misc import RegressBoxes, UpsampleLike, Anchors, ClipBoxes # noqa: F401 -from .filter_detections import FilterDetections # noqa: F401 -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -from .. import backend -from ..utils import anchors as utils_anchors - -import numpy as np - - -class Anchors(keras.layers.Layer): - """ Keras layer for generating achors for a given shape. - """ - - def __init__(self, size, stride, ratios=None, scales=None, *args, **kwargs): - """ Initializer for an Anchors layer. - - Args - size: The base size of the anchors to generate. - stride: The stride of the anchors to generate. - ratios: The ratios of the anchors to generate (defaults to AnchorParameters.default.ratios). - scales: The scales of the anchors to generate (defaults to AnchorParameters.default.scales). - """ - self.size = size - self.stride = stride - self.ratios = ratios - self.scales = scales - - if ratios is None: - self.ratios = utils_anchors.AnchorParameters.default.ratios - elif isinstance(ratios, list): - self.ratios = np.array(ratios) - if scales is None: - self.scales = utils_anchors.AnchorParameters.default.scales - elif isinstance(scales, list): - self.scales = np.array(scales) - - self.num_anchors = len(ratios) * len(scales) - self.anchors = keras.backend.variable(utils_anchors.generate_anchors( - base_size=size, - ratios=ratios, - scales=scales, - )) - - super(Anchors, self).__init__(*args, **kwargs) - - def call(self, inputs, **kwargs): - features = inputs - features_shape = keras.backend.shape(features) - - # generate proposals from bbox deltas and shifted anchors - if keras.backend.image_data_format() == 'channels_first': - anchors = backend.shift( - features_shape[2:4], self.stride, self.anchors) - else: - anchors = backend.shift( - features_shape[1:3], self.stride, self.anchors) - anchors = keras.backend.tile(keras.backend.expand_dims( - anchors, axis=0), (features_shape[0], 1, 1)) - - return anchors - - def compute_output_shape(self, input_shape): - if None not in input_shape[1:]: - if keras.backend.image_data_format() == 'channels_first': - total = np.prod(input_shape[2:4]) * self.num_anchors - else: - total = np.prod(input_shape[1:3]) * self.num_anchors - - return (input_shape[0], total, 4) - else: - return (input_shape[0], None, 4) - - def get_config(self): - config = super(Anchors, self).get_config() - config.update({ - 'size': self.size, - 'stride': self.stride, - 'ratios': self.ratios.tolist(), - 'scales': self.scales.tolist(), - }) - - return config - - -class UpsampleLike(keras.layers.Layer): - """ Keras layer for upsampling a Tensor to be the same shape as another Tensor. - """ - - def call(self, inputs, **kwargs): - source, target = inputs - target_shape = keras.backend.shape(target) - if keras.backend.image_data_format() == 'channels_first': - source = backend.transpose(source, (0, 2, 3, 1)) - output = backend.resize_images( - source, (target_shape[2], target_shape[3]), method='nearest') - output = backend.transpose(output, (0, 3, 1, 2)) - return output - else: - return backend.resize_images(source, (target_shape[1], target_shape[2]), method='nearest') - - def compute_output_shape(self, input_shape): - if keras.backend.image_data_format() == 'channels_first': - return (input_shape[0][0], input_shape[0][1]) + input_shape[1][2:4] - else: - return (input_shape[0][0],) + input_shape[1][1:3] + (input_shape[0][-1],) - - -class RegressBoxes(keras.layers.Layer): - """ Keras layer for applying regression values to boxes. - """ - - def __init__(self, mean=None, std=None, *args, **kwargs): - """ Initializer for the RegressBoxes layer. - - Args - mean: The mean value of the regression values which was used for normalization. - std: The standard value of the regression values which was used for normalization. - """ - if mean is None: - mean = np.array([0, 0, 0, 0]) - if std is None: - std = np.array([0.2, 0.2, 0.2, 0.2]) - - if isinstance(mean, (list, tuple)): - mean = np.array(mean) - elif not isinstance(mean, np.ndarray): - raise ValueError( - 'Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean))) - - if isinstance(std, (list, tuple)): - std = np.array(std) - elif not isinstance(std, np.ndarray): - raise ValueError( - 'Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std))) - - self.mean = mean - self.std = std - super(RegressBoxes, self).__init__(*args, **kwargs) - - def call(self, inputs, **kwargs): - anchors, regression = inputs - return backend.bbox_transform_inv(anchors, regression, mean=self.mean, std=self.std) - - def compute_output_shape(self, input_shape): - return input_shape[0] - - def get_config(self): - config = super(RegressBoxes, self).get_config() - config.update({ - 'mean': self.mean.tolist(), - 'std': self.std.tolist(), - }) - - return config - - -class ClipBoxes(keras.layers.Layer): - """ Keras layer to clip box values to lie inside a given shape. - """ - - def call(self, inputs, **kwargs): - image, boxes = inputs - shape = keras.backend.cast( - keras.backend.shape(image), keras.backend.floatx()) - if keras.backend.image_data_format() == 'channels_first': - height = shape[2] - width = shape[3] - else: - height = shape[1] - width = shape[2] - x1 = backend.clip_by_value(boxes[:, :, 0], 0, width) - y1 = backend.clip_by_value(boxes[:, :, 1], 0, height) - x2 = backend.clip_by_value(boxes[:, :, 2], 0, width) - y2 = backend.clip_by_value(boxes[:, :, 3], 0, height) - - return keras.backend.stack([x1, y1, x2, y2], axis=2) - - def compute_output_shape(self, input_shape): - return input_shape[1] -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -from .. import backend - - -def filter_detections( - boxes, - classification, - other=[], - class_specific_filter=True, - nms=True, - score_threshold=0.05, - max_detections=300, - nms_threshold=0.5 -): - """ Filter detections using the boxes and classification values. - - Args - boxes : Tensor of shape (num_boxes, 4) containing the boxes in (x1, y1, x2, y2) format. - classification : Tensor of shape (num_boxes, num_classes) containing the classification scores. - other : List of tensors of shape (num_boxes, ...) to filter along with the boxes and classification scores. - class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those. - nms : Flag to enable/disable non maximum suppression. - score_threshold : Threshold used to prefilter the boxes with. - max_detections : Maximum number of detections to keep. - nms_threshold : Threshold for the IoU value to determine when a box should be suppressed. - - Returns - A list of [boxes, scores, labels, other[0], other[1], ...]. - boxes is shaped (max_detections, 4) and contains the (x1, y1, x2, y2) of the non-suppressed boxes. - scores is shaped (max_detections,) and contains the scores of the predicted class. - labels is shaped (max_detections,) and contains the predicted label. - other[i] is shaped (max_detections, ...) and contains the filtered other[i] data. - In case there are less than max_detections detections, the tensors are padded with -1's. - """ - def _filter_detections(scores, labels): - # threshold based on score - indices = backend.where(keras.backend.greater(scores, score_threshold)) - - if nms: - filtered_boxes = backend.gather_nd(boxes, indices) - filtered_scores = keras.backend.gather(scores, indices)[:, 0] - - # perform NMS - nms_indices = backend.non_max_suppression( - filtered_boxes, filtered_scores, max_output_size=max_detections, iou_threshold=nms_threshold) - - # filter indices based on NMS - indices = keras.backend.gather(indices, nms_indices) - - # add indices to list of all indices - labels = backend.gather_nd(labels, indices) - indices = keras.backend.stack([indices[:, 0], labels], axis=1) - - return indices - - if class_specific_filter: - all_indices = [] - # perform per class filtering - for c in range(int(classification.shape[1])): - scores = classification[:, c] - labels = c * \ - backend.ones((keras.backend.shape(scores)[0],), dtype='int64') - all_indices.append(_filter_detections(scores, labels)) - - # concatenate indices to single tensor - indices = keras.backend.concatenate(all_indices, axis=0) - else: - scores = keras.backend.max(classification, axis=1) - labels = keras.backend.argmax(classification, axis=1) - indices = _filter_detections(scores, labels) - - # select top k - scores = backend.gather_nd(classification, indices) - labels = indices[:, 1] - scores, top_indices = backend.top_k(scores, k=keras.backend.minimum( - max_detections, keras.backend.shape(scores)[0])) - - # filter input using the final set of indices - indices = keras.backend.gather(indices[:, 0], top_indices) - boxes = keras.backend.gather(boxes, indices) - labels = keras.backend.gather(labels, top_indices) - other_ = [keras.backend.gather(o, indices) for o in other] - - # zero pad the outputs - pad_size = keras.backend.maximum( - 0, max_detections - keras.backend.shape(scores)[0]) - boxes = backend.pad(boxes, [[0, pad_size], [0, 0]], constant_values=-1) - scores = backend.pad(scores, [[0, pad_size]], constant_values=-1) - labels = backend.pad(labels, [[0, pad_size]], constant_values=-1) - labels = keras.backend.cast(labels, 'int32') - other_ = [backend.pad(o, [[0, pad_size]] + [[0, 0] - for _ in range(1, len(o.shape))], constant_values=-1) for o in other_] - - # set shapes, since we know what they are - boxes.set_shape([max_detections, 4]) - scores.set_shape([max_detections]) - labels.set_shape([max_detections]) - for o, s in zip(other_, [list(keras.backend.int_shape(o)) for o in other]): - o.set_shape([max_detections] + s[1:]) - - return [boxes, scores, labels] + other_ - - -class FilterDetections(keras.layers.Layer): - """ Keras layer for filtering detections using score threshold and NMS. - """ - - def __init__( - self, - nms=True, - class_specific_filter=True, - nms_threshold=0.5, - score_threshold=0.05, - max_detections=300, - parallel_iterations=32, - **kwargs - ): - """ Filters detections using score threshold, NMS and selecting the top-k detections. - - Args - nms : Flag to enable/disable NMS. - class_specific_filter : Whether to perform filtering per class, or take the best scoring class and filter those. - nms_threshold : Threshold for the IoU value to determine when a box should be suppressed. - score_threshold : Threshold used to prefilter the boxes with. - max_detections : Maximum number of detections to keep. - parallel_iterations : Number of batch items to process in parallel. - """ - self.nms = nms - self.class_specific_filter = class_specific_filter - self.nms_threshold = nms_threshold - self.score_threshold = score_threshold - self.max_detections = max_detections - self.parallel_iterations = parallel_iterations - super(FilterDetections, self).__init__(**kwargs) - - def call(self, inputs, **kwargs): - """ Constructs the NMS graph. - - Args - inputs : List of [boxes, classification, other[0], other[1], ...] tensors. - """ - boxes = inputs[0] - classification = inputs[1] - other = inputs[2:] - - # wrap nms with our parameters - def _filter_detections(args): - boxes = args[0] - classification = args[1] - other = args[2] - - return filter_detections( - boxes, - classification, - other, - nms=self.nms, - class_specific_filter=self.class_specific_filter, - score_threshold=self.score_threshold, - max_detections=self.max_detections, - nms_threshold=self.nms_threshold, - ) - - # call filter_detections on each batch - outputs = backend.map_fn( - _filter_detections, - elems=[boxes, classification, other], - dtype=[keras.backend.floatx(), keras.backend.floatx(), - 'int32'] + [o.dtype for o in other], - parallel_iterations=self.parallel_iterations - ) - - return outputs - - def compute_output_shape(self, input_shape): - """ Computes the output shapes given the input shapes. - - Args - input_shape : List of input shapes [boxes, classification, other[0], other[1], ...]. - - Returns - List of tuples representing the output shapes: - [filtered_boxes.shape, filtered_scores.shape, filtered_labels.shape, filtered_other[0].shape, filtered_other[1].shape, ...] - """ - return [ - (input_shape[0][0], self.max_detections, 4), - (input_shape[1][0], self.max_detections), - (input_shape[1][0], self.max_detections), - ] + [ - tuple([input_shape[i][0], self.max_detections] + list(input_shape[i][2:])) for i in range(2, len(input_shape)) - ] - - def compute_mask(self, inputs, mask=None): - """ This is required in Keras when there is more than 1 output. - """ - return (len(inputs) + 1) * [None] - - def get_config(self): - """ Gets the configuration of this layer. - - Returns - Dictionary containing the parameters of this layer. - """ - config = super(FilterDetections, self).get_config() - config.update({ - 'nms': self.nms, - 'class_specific_filter': self.class_specific_filter, - 'nms_threshold': self.nms_threshold, - 'score_threshold': self.score_threshold, - 'max_detections': self.max_detections, - 'parallel_iterations': self.parallel_iterations, - }) - - return config -from __future__ import print_function -import sys - - -class Backbone(object): - """ This class stores additional information on backbones. - """ - - def __init__(self, backbone): - # a dictionary mapping custom layer names to the correct classes - from .. import layers - from .. import losses - from .. import initializers - self.custom_objects = { - 'UpsampleLike': layers.UpsampleLike, - 'PriorProbability': initializers.PriorProbability, - 'RegressBoxes': layers.RegressBoxes, - 'FilterDetections': layers.FilterDetections, - 'Anchors': layers.Anchors, - 'ClipBoxes': layers.ClipBoxes, - '_smooth_l1': losses.smooth_l1(), - '_focal': losses.focal(), - } - - self.backbone = backbone - self.validate() - - def retinanet(self, *args, **kwargs): - """ Returns a retinanet model using the correct backbone. - """ - raise NotImplementedError('retinanet method not implemented.') - - def download_imagenet(self): - """ Downloads ImageNet weights and returns path to weights file. - """ - raise NotImplementedError('download_imagenet method not implemented.') - - def validate(self): - """ Checks whether the backbone string is correct. - """ - raise NotImplementedError('validate method not implemented.') - - def preprocess_image(self, inputs): - """ Takes as input an image and prepares it for being passed through the network. - Having this function in Backbone allows other backbones to define a specific preprocessing step. - """ - raise NotImplementedError('preprocess_image method not implemented.') - - -def backbone(backbone_name): - """ Returns a backbone object for the given backbone. - """ - if 'resnet' in backbone_name: - from .resnet import ResNetBackbone as b - elif 'mobilenet' in backbone_name: - from .mobilenet import MobileNetBackbone as b - elif 'vgg' in backbone_name: - from .vgg import VGGBackbone as b - elif 'densenet' in backbone_name: - from .densenet import DenseNetBackbone as b - else: - raise NotImplementedError( - 'Backbone class for \'{}\' not implemented.'.format(backbone)) - - return b(backbone_name) - - -def load_model(filepath, backbone_name='resnet50'): - """ Loads a retinanet model using the correct custom objects. - - Args - filepath: one of the following: - - string, path to the saved model, or - - h5py.File object from which to load the model - backbone_name : Backbone with which the model was trained. - - Returns - A keras.models.Model object. - - Raises - ImportError: if h5py is not available. - ValueError: In case of an invalid savefile. - """ - import keras.models - return keras.models.load_model(filepath, custom_objects=backbone(backbone_name).custom_objects) - - -def convert_model(model, nms=True, class_specific_filter=True, anchor_params=None): - """ Converts a training model to an inference model. - - Args - model : A retinanet training model. - nms : Boolean, whether to add NMS filtering to the converted model. - class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only. - anchor_params : Anchor parameters object. If omitted, default values are used. - - Returns - A keras.models.Model object. - - Raises - ImportError: if h5py is not available. - ValueError: In case of an invalid savefile. - """ - from .retinanet import retinanet_bbox - return retinanet_bbox(model=model, nms=nms, class_specific_filter=class_specific_filter, anchor_params=anchor_params) - - -def assert_training_model(model): - """ Assert that the model is a training model. - """ - assert(all(output in model.output_names for output in ['regression', 'classification'])), \ - "Input is not a training model (no 'regression' and 'classification' outputs were found, outputs are: {}).".format( - model.output_names) - - -def check_training_model(model): - """ Check that model is a training model and exit otherwise. - """ - try: - assert_training_model(model) - except AssertionError as e: - print(e, file=sys.stderr) - sys.exit(1) -""" -Copyright 2018 vidosits (https://github.com/vidosits/) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -from keras.applications import densenet -from keras.utils import get_file - -from . import retinanet -from . import Backbone -from ..utils.image import preprocess_image - - -allowed_backbones = { - 'densenet121': ([6, 12, 24, 16], densenet.DenseNet121), - 'densenet169': ([6, 12, 32, 32], densenet.DenseNet169), - 'densenet201': ([6, 12, 48, 32], densenet.DenseNet201), -} - - -class DenseNetBackbone(Backbone): - """ Describes backbone information and provides utility functions. - """ - - def retinanet(self, *args, **kwargs): - """ Returns a retinanet model using the correct backbone. - """ - return densenet_retinanet(*args, backbone=self.backbone, **kwargs) - - def download_imagenet(self): - """ Download pre-trained weights for the specified backbone name. - This name is in the format {backbone}_weights_tf_dim_ordering_tf_kernels_notop - where backbone is the densenet + number of layers (e.g. densenet121). - For more info check the explanation from the keras densenet script itself: - https://github.com/keras-team/keras/blob/master/keras/applications/densenet.py - """ - origin = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.8/' - file_name = '{}_weights_tf_dim_ordering_tf_kernels_notop.h5' - - # load weights - if keras.backend.image_data_format() == 'channels_first': - raise ValueError( - 'Weights for "channels_first" format are not available.') - - weights_url = origin + file_name.format(self.backbone) - return get_file(file_name.format(self.backbone), weights_url, cache_subdir='models') - - def validate(self): - """ Checks whether the backbone string is correct. - """ - backbone = self.backbone.split('_')[0] - - if backbone not in allowed_backbones: - raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format( - backbone, allowed_backbones.keys())) - - def preprocess_image(self, inputs): - """ Takes as input an image and prepares it for being passed through the network. - """ - return preprocess_image(inputs, mode='tf') - - -def densenet_retinanet(num_classes, backbone='densenet121', inputs=None, modifier=None, **kwargs): - """ Constructs a retinanet model using a densenet backbone. - - Args - num_classes: Number of classes to predict. - backbone: Which backbone to use (one of ('densenet121', 'densenet169', 'densenet201')). - inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). - modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). - - Returns - RetinaNet model with a DenseNet backbone. - """ - # choose default input - if inputs is None: - inputs = keras.layers.Input((None, None, 3)) - - blocks, creator = allowed_backbones[backbone] - model = creator(input_tensor=inputs, include_top=False, - pooling=None, weights=None) - - # get last conv layer from the end of each dense block - layer_outputs = [model.get_layer(name='conv{}_block{}_concat'.format( - idx + 2, block_num)).output for idx, block_num in enumerate(blocks)] - - # create the densenet backbone - model = keras.models.Model( - inputs=inputs, outputs=layer_outputs[1:], name=model.name) - - # invoke modifier if given - if modifier: - model = modifier(model) - - # create the full model - model = retinanet.retinanet( - inputs=inputs, num_classes=num_classes, backbone_layers=model.outputs, **kwargs) - - return model -""" -Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -from keras.applications import mobilenet -from keras.utils import get_file -from ..utils.image import preprocess_image - -from . import retinanet -from . import Backbone - - -class MobileNetBackbone(Backbone): - """ Describes backbone information and provides utility functions. - """ - - allowed_backbones = ['mobilenet128', - 'mobilenet160', 'mobilenet192', 'mobilenet224'] - - def retinanet(self, *args, **kwargs): - """ Returns a retinanet model using the correct backbone. - """ - return mobilenet_retinanet(*args, backbone=self.backbone, **kwargs) - - def download_imagenet(self): - """ Download pre-trained weights for the specified backbone name. - This name is in the format mobilenet{rows}_{alpha} where rows is the - imagenet shape dimension and 'alpha' controls the width of the network. - For more info check the explanation from the keras mobilenet script itself. - """ - - alpha = float(self.backbone.split('_')[1]) - rows = int(self.backbone.split('_')[0].replace('mobilenet', '')) - - # load weights - if keras.backend.image_data_format() == 'channels_first': - raise ValueError('Weights for "channels_last" format ' - 'are not available.') - if alpha == 1.0: - alpha_text = '1_0' - elif alpha == 0.75: - alpha_text = '7_5' - elif alpha == 0.50: - alpha_text = '5_0' - else: - alpha_text = '2_5' - - model_name = 'mobilenet_{}_{}_tf_no_top.h5'.format(alpha_text, rows) - weights_url = mobilenet.mobilenet.BASE_WEIGHT_PATH + model_name - weights_path = get_file(model_name, weights_url, cache_subdir='models') - - return weights_path - - def validate(self): - """ Checks whether the backbone string is correct. - """ - backbone = self.backbone.split('_')[0] - - if backbone not in MobileNetBackbone.allowed_backbones: - raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format( - backbone, MobileNetBackbone.allowed_backbones)) - - def preprocess_image(self, inputs): - """ Takes as input an image and prepares it for being passed through the network. - """ - return preprocess_image(inputs, mode='tf') - - -def mobilenet_retinanet(num_classes, backbone='mobilenet224_1.0', inputs=None, modifier=None, **kwargs): - """ Constructs a retinanet model using a mobilenet backbone. - - Args - num_classes: Number of classes to predict. - backbone: Which backbone to use (one of ('mobilenet128', 'mobilenet160', 'mobilenet192', 'mobilenet224')). - inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). - modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). - - Returns - RetinaNet model with a MobileNet backbone. - """ - alpha = float(backbone.split('_')[1]) - - # choose default input - if inputs is None: - inputs = keras.layers.Input((None, None, 3)) - - backbone = mobilenet.MobileNet( - input_tensor=inputs, alpha=alpha, include_top=False, pooling=None, weights=None) - - # create the full model - layer_names = ['conv_pw_5_relu', 'conv_pw_11_relu', 'conv_pw_13_relu'] - layer_outputs = [backbone.get_layer(name).output for name in layer_names] - backbone = keras.models.Model( - inputs=inputs, outputs=layer_outputs, name=backbone.name) - - # invoke modifier if given - if modifier: - backbone = modifier(backbone) - - return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=backbone.outputs, **kwargs) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -from keras.utils import get_file -import keras_resnet -import keras_resnet.models - -from . import retinanet -from . import Backbone -from ..utils.image import preprocess_image - - -class ResNetBackbone(Backbone): - """ Describes backbone information and provides utility functions. - """ - - def __init__(self, backbone): - super(ResNetBackbone, self).__init__(backbone) - self.custom_objects.update(keras_resnet.custom_objects) - - def retinanet(self, *args, **kwargs): - """ Returns a retinanet model using the correct backbone. - """ - return resnet_retinanet(*args, backbone=self.backbone, **kwargs) - - def download_imagenet(self): - """ Downloads ImageNet weights and returns path to weights file. - """ - resnet_filename = 'ResNet-{}-model.keras.h5' - resnet_resource = 'https://github.com/fizyr/keras-models/releases/download/v0.0.1/{}'.format( - resnet_filename) - depth = int(self.backbone.replace('resnet', '')) - - filename = resnet_filename.format(depth) - resource = resnet_resource.format(depth) - if depth == 50: - checksum = '3e9f4e4f77bbe2c9bec13b53ee1c2319' - elif depth == 101: - checksum = '05dc86924389e5b401a9ea0348a3213c' - elif depth == 152: - checksum = '6ee11ef2b135592f8031058820bb9e71' - - return get_file( - filename, - resource, - cache_subdir='models', - md5_hash=checksum - ) - - def validate(self): - """ Checks whether the backbone string is correct. - """ - allowed_backbones = ['resnet50', 'resnet101', 'resnet152'] - backbone = self.backbone.split('_')[0] - - if backbone not in allowed_backbones: - raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format( - backbone, allowed_backbones)) - - def preprocess_image(self, inputs): - """ Takes as input an image and prepares it for being passed through the network. - """ - return preprocess_image(inputs, mode='caffe') - - -def resnet_retinanet(num_classes, backbone='resnet50', inputs=None, modifier=None, **kwargs): - """ Constructs a retinanet model using a resnet backbone. - - Args - num_classes: Number of classes to predict. - backbone: Which backbone to use (one of ('resnet50', 'resnet101', 'resnet152')). - inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). - modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). - - Returns - RetinaNet model with a ResNet backbone. - """ - # choose default input - if inputs is None: - if keras.backend.image_data_format() == 'channels_first': - inputs = keras.layers.Input(shape=(3, None, None)) - else: - inputs = keras.layers.Input(shape=(None, None, 3)) - - # create the resnet backbone - if backbone == 'resnet50': - resnet = keras_resnet.models.ResNet50( - inputs, include_top=False, freeze_bn=True) - elif backbone == 'resnet101': - resnet = keras_resnet.models.ResNet101( - inputs, include_top=False, freeze_bn=True) - elif backbone == 'resnet152': - resnet = keras_resnet.models.ResNet152( - inputs, include_top=False, freeze_bn=True) - else: - raise ValueError('Backbone (\'{}\') is invalid.'.format(backbone)) - - # invoke modifier if given - if modifier: - resnet = modifier(resnet) - - # create the full model - return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=resnet.outputs[1:], **kwargs) - - -def resnet50_retinanet(num_classes, inputs=None, **kwargs): - return resnet_retinanet(num_classes=num_classes, backbone='resnet50', inputs=inputs, **kwargs) - - -def resnet101_retinanet(num_classes, inputs=None, **kwargs): - return resnet_retinanet(num_classes=num_classes, backbone='resnet101', inputs=inputs, **kwargs) - - -def resnet152_retinanet(num_classes, inputs=None, **kwargs): - return resnet_retinanet(num_classes=num_classes, backbone='resnet152', inputs=inputs, **kwargs) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -from .. import initializers -from .. import layers -from ..utils.anchors import AnchorParameters -from . import assert_training_model - - -def default_classification_model( - num_classes, - num_anchors, - pyramid_feature_size=256, - prior_probability=0.01, - classification_feature_size=256, - name='classification_submodel' -): - """ Creates the default regression submodel. - - Args - num_classes : Number of classes to predict a score for at each feature level. - num_anchors : Number of anchors to predict classification scores for at each feature level. - pyramid_feature_size : The number of filters to expect from the feature pyramid levels. - classification_feature_size : The number of filters to use in the layers in the classification submodel. - name : The name of the submodel. - - Returns - A keras.models.Model that predicts classes for each anchor. - """ - options = { - 'kernel_size': 3, - 'strides': 1, - 'padding': 'same', - } - - if keras.backend.image_data_format() == 'channels_first': - inputs = keras.layers.Input(shape=(pyramid_feature_size, None, None)) - else: - inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size)) - outputs = inputs - for i in range(4): - outputs = keras.layers.Conv2D( - filters=classification_feature_size, - activation='relu', - name='pyramid_classification_{}'.format(i), - kernel_initializer=keras.initializers.normal( - mean=0.0, stddev=0.01, seed=None), - bias_initializer='zeros', - **options - )(outputs) - - outputs = keras.layers.Conv2D( - filters=num_classes * num_anchors, - kernel_initializer=keras.initializers.normal( - mean=0.0, stddev=0.01, seed=None), - bias_initializer=initializers.PriorProbability( - probability=prior_probability), - name='pyramid_classification', - **options - )(outputs) - - # reshape output and apply sigmoid - if keras.backend.image_data_format() == 'channels_first': - outputs = keras.layers.Permute( - (2, 3, 1), name='pyramid_classification_permute')(outputs) - outputs = keras.layers.Reshape( - (-1, num_classes), name='pyramid_classification_reshape')(outputs) - outputs = keras.layers.Activation( - 'sigmoid', name='pyramid_classification_sigmoid')(outputs) - - return keras.models.Model(inputs=inputs, outputs=outputs, name=name) - - -def default_regression_model(num_values, num_anchors, pyramid_feature_size=256, regression_feature_size=256, name='regression_submodel'): - """ Creates the default regression submodel. - - Args - num_values : Number of values to regress. - num_anchors : Number of anchors to regress for each feature level. - pyramid_feature_size : The number of filters to expect from the feature pyramid levels. - regression_feature_size : The number of filters to use in the layers in the regression submodel. - name : The name of the submodel. - - Returns - A keras.models.Model that predicts regression values for each anchor. - """ - # All new conv layers except the final one in the - # RetinaNet (classification) subnets are initialized - # with bias b = 0 and a Gaussian weight fill with stddev = 0.01. - options = { - 'kernel_size': 3, - 'strides': 1, - 'padding': 'same', - 'kernel_initializer': keras.initializers.normal(mean=0.0, stddev=0.01, seed=None), - 'bias_initializer': 'zeros' - } - - if keras.backend.image_data_format() == 'channels_first': - inputs = keras.layers.Input(shape=(pyramid_feature_size, None, None)) - else: - inputs = keras.layers.Input(shape=(None, None, pyramid_feature_size)) - outputs = inputs - for i in range(4): - outputs = keras.layers.Conv2D( - filters=regression_feature_size, - activation='relu', - name='pyramid_regression_{}'.format(i), - **options - )(outputs) - - outputs = keras.layers.Conv2D( - num_anchors * num_values, name='pyramid_regression', **options)(outputs) - if keras.backend.image_data_format() == 'channels_first': - outputs = keras.layers.Permute( - (2, 3, 1), name='pyramid_regression_permute')(outputs) - outputs = keras.layers.Reshape( - (-1, num_values), name='pyramid_regression_reshape')(outputs) - - return keras.models.Model(inputs=inputs, outputs=outputs, name=name) - - -def __create_pyramid_features(C3, C4, C5, feature_size=256): - """ Creates the FPN layers on top of the backbone features. - - Args - C3 : Feature stage C3 from the backbone. - C4 : Feature stage C4 from the backbone. - C5 : Feature stage C5 from the backbone. - feature_size : The feature size to use for the resulting feature levels. - - Returns - A list of feature levels [P3, P4, P5, P6, P7]. - """ - # upsample C5 to get P5 from the FPN paper - P5 = keras.layers.Conv2D(feature_size, kernel_size=1, - strides=1, padding='same', name='C5_reduced')(C5) - P5_upsampled = layers.UpsampleLike(name='P5_upsampled')([P5, C4]) - P5 = keras.layers.Conv2D(feature_size, kernel_size=3, - strides=1, padding='same', name='P5')(P5) - - # add P5 elementwise to C4 - P4 = keras.layers.Conv2D(feature_size, kernel_size=1, - strides=1, padding='same', name='C4_reduced')(C4) - P4 = keras.layers.Add(name='P4_merged')([P5_upsampled, P4]) - P4_upsampled = layers.UpsampleLike(name='P4_upsampled')([P4, C3]) - P4 = keras.layers.Conv2D(feature_size, kernel_size=3, - strides=1, padding='same', name='P4')(P4) - - # add P4 elementwise to C3 - P3 = keras.layers.Conv2D(feature_size, kernel_size=1, - strides=1, padding='same', name='C3_reduced')(C3) - P3 = keras.layers.Add(name='P3_merged')([P4_upsampled, P3]) - P3 = keras.layers.Conv2D(feature_size, kernel_size=3, - strides=1, padding='same', name='P3')(P3) - - # "P6 is obtained via a 3x3 stride-2 conv on C5" - P6 = keras.layers.Conv2D(feature_size, kernel_size=3, - strides=2, padding='same', name='P6')(C5) - - # "P7 is computed by applying ReLU followed by a 3x3 stride-2 conv on P6" - P7 = keras.layers.Activation('relu', name='C6_relu')(P6) - P7 = keras.layers.Conv2D(feature_size, kernel_size=3, - strides=2, padding='same', name='P7')(P7) - - return [P3, P4, P5, P6, P7] - - -def default_submodels(num_classes, num_anchors): - """ Create a list of default submodels used for object detection. - - The default submodels contains a regression submodel and a classification submodel. - - Args - num_classes : Number of classes to use. - num_anchors : Number of base anchors. - - Returns - A list of tuple, where the first element is the name of the submodel and the second element is the submodel itself. - """ - return [ - ('regression', default_regression_model(4, num_anchors)), - ('classification', default_classification_model(num_classes, num_anchors)) - ] - - -def __build_model_pyramid(name, model, features): - """ Applies a single submodel to each FPN level. - - Args - name : Name of the submodel. - model : The submodel to evaluate. - features : The FPN features. - - Returns - A tensor containing the response from the submodel on the FPN features. - """ - return keras.layers.Concatenate(axis=1, name=name)([model(f) for f in features]) - - -def __build_pyramid(models, features): - """ Applies all submodels to each FPN level. - - Args - models : List of sumodels to run on each pyramid level (by default only regression, classifcation). - features : The FPN features. - - Returns - A list of tensors, one for each submodel. - """ - return [__build_model_pyramid(n, m, features) for n, m in models] - - -def __build_anchors(anchor_parameters, features): - """ Builds anchors for the shape of the features from FPN. - - Args - anchor_parameters : Parameteres that determine how anchors are generated. - features : The FPN features. - - Returns - A tensor containing the anchors for the FPN features. - - The shape is: - ``` - (batch_size, num_anchors, 4) - ``` - """ - anchors = [ - layers.Anchors( - size=anchor_parameters.sizes[i], - stride=anchor_parameters.strides[i], - ratios=anchor_parameters.ratios, - scales=anchor_parameters.scales, - name='anchors_{}'.format(i) - )(f) for i, f in enumerate(features) - ] - - return keras.layers.Concatenate(axis=1, name='anchors')(anchors) - - -def retinanet( - inputs, - backbone_layers, - num_classes, - num_anchors=None, - create_pyramid_features=__create_pyramid_features, - submodels=None, - name='retinanet' -): - """ Construct a RetinaNet model on top of a backbone. - - This model is the minimum model necessary for training (with the unfortunate exception of anchors as output). - - Args - inputs : keras.layers.Input (or list of) for the input to the model. - num_classes : Number of classes to classify. - num_anchors : Number of base anchors. - create_pyramid_features : Functor for creating pyramid features given the features C3, C4, C5 from the backbone. - submodels : Submodels to run on each feature map (default is regression and classification submodels). - name : Name of the model. - - Returns - A keras.models.Model which takes an image as input and outputs generated anchors and the result from each submodel on every pyramid level. - - The order of the outputs is as defined in submodels: - ``` - [ - regression, classification, other[0], other[1], ... - ] - ``` - """ - - if num_anchors is None: - num_anchors = AnchorParameters.default.num_anchors() - - if submodels is None: - submodels = default_submodels(num_classes, num_anchors) - - C3, C4, C5 = backbone_layers - - # compute pyramid features as per https://arxiv.org/abs/1708.02002 - features = create_pyramid_features(C3, C4, C5) - - # for all pyramid levels, run available submodels - pyramids = __build_pyramid(submodels, features) - - return keras.models.Model(inputs=inputs, outputs=pyramids, name=name) - - -def retinanet_bbox( - model=None, - nms=True, - class_specific_filter=True, - name='retinanet-bbox', - anchor_params=None, - **kwargs -): - """ Construct a RetinaNet model on top of a backbone and adds convenience functions to output boxes directly. - - This model uses the minimum retinanet model and appends a few layers to compute boxes within the graph. - These layers include applying the regression values to the anchors and performing NMS. - - Args - model : RetinaNet model to append bbox layers to. If None, it will create a RetinaNet model using **kwargs. - nms : Whether to use non-maximum suppression for the filtering step. - class_specific_filter : Whether to use class specific filtering or filter for the best scoring class only. - name : Name of the model. - anchor_params : Struct containing anchor parameters. If None, default values are used. - *kwargs : Additional kwargs to pass to the minimal retinanet model. - - Returns - A keras.models.Model which takes an image as input and outputs the detections on the image. - - The order is defined as follows: - ``` - [ - boxes, scores, labels, other[0], other[1], ... - ] - ``` - """ - - # if no anchor parameters are passed, use default values - if anchor_params is None: - anchor_params = AnchorParameters.default - - # create RetinaNet model - if model is None: - model = retinanet(num_anchors=anchor_params.num_anchors(), **kwargs) - else: - assert_training_model(model) - - # compute the anchors - features = [model.get_layer(p_name).output for p_name in [ - 'P3', 'P4', 'P5', 'P6', 'P7']] - anchors = __build_anchors(anchor_params, features) - - # we expect the anchors, regression and classification values as first output - regression = model.outputs[0] - classification = model.outputs[1] - - # "other" can be any additional output from custom submodels, by default this will be [] - other = model.outputs[2:] - - # apply predicted regression to anchors - boxes = layers.RegressBoxes(name='boxes')([anchors, regression]) - boxes = layers.ClipBoxes(name='clipped_boxes')([model.inputs[0], boxes]) - - # filter detections (apply NMS / score threshold / select top-k) - detections = layers.FilterDetections( - nms=nms, - class_specific_filter=class_specific_filter, - name='filtered_detections' - )([boxes, classification] + other) - - # construct the model - return keras.models.Model(inputs=model.inputs, outputs=detections, name=name) -""" -Copyright 2017-2018 cgratie (https://github.com/cgratie/) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - - -import keras -from keras.utils import get_file - -from . import retinanet -from . import Backbone -from ..utils.image import preprocess_image - - -class VGGBackbone(Backbone): - """ Describes backbone information and provides utility functions. - """ - - def retinanet(self, *args, **kwargs): - """ Returns a retinanet model using the correct backbone. - """ - return vgg_retinanet(*args, backbone=self.backbone, **kwargs) - - def download_imagenet(self): - """ Downloads ImageNet weights and returns path to weights file. - Weights can be downloaded at https://github.com/fizyr/keras-models/releases . - """ - if self.backbone == 'vgg16': - resource = keras.applications.vgg16.vgg16.WEIGHTS_PATH_NO_TOP - checksum = '6d6bbae143d832006294945121d1f1fc' - elif self.backbone == 'vgg19': - resource = keras.applications.vgg19.vgg19.WEIGHTS_PATH_NO_TOP - checksum = '253f8cb515780f3b799900260a226db6' - else: - raise ValueError( - "Backbone '{}' not recognized.".format(self.backbone)) - - return get_file( - '{}_weights_tf_dim_ordering_tf_kernels_notop.h5'.format( - self.backbone), - resource, - cache_subdir='models', - file_hash=checksum - ) - - def validate(self): - """ Checks whether the backbone string is correct. - """ - allowed_backbones = ['vgg16', 'vgg19'] - - if self.backbone not in allowed_backbones: - raise ValueError('Backbone (\'{}\') not in allowed backbones ({}).'.format( - self.backbone, allowed_backbones)) - - def preprocess_image(self, inputs): - """ Takes as input an image and prepares it for being passed through the network. - """ - return preprocess_image(inputs, mode='caffe') - - -def vgg_retinanet(num_classes, backbone='vgg16', inputs=None, modifier=None, **kwargs): - """ Constructs a retinanet model using a vgg backbone. - - Args - num_classes: Number of classes to predict. - backbone: Which backbone to use (one of ('vgg16', 'vgg19')). - inputs: The inputs to the network (defaults to a Tensor of shape (None, None, 3)). - modifier: A function handler which can modify the backbone before using it in retinanet (this can be used to freeze backbone layers for example). - - Returns - RetinaNet model with a VGG backbone. - """ - # choose default input - if inputs is None: - inputs = keras.layers.Input(shape=(None, None, 3)) - - # create the vgg backbone - if backbone == 'vgg16': - vgg = keras.applications.VGG16( - input_tensor=inputs, include_top=False, weights=None) - elif backbone == 'vgg19': - vgg = keras.applications.VGG19( - input_tensor=inputs, include_top=False, weights=None) - else: - raise ValueError("Backbone '{}' not recognized.".format(backbone)) - - if modifier: - vgg = modifier(vgg) - - # create the full model - layer_names = ["block3_pool", "block4_pool", "block5_pool"] - layer_outputs = [vgg.get_layer(name).output for name in layer_names] - return retinanet.retinanet(inputs=inputs, num_classes=num_classes, backbone_layers=layer_outputs, **kwargs) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from ..preprocessing.generator import Generator -from ..utils.image import read_image_bgr - -import os -import numpy as np - -from pycocotools.coco import COCO - - -class CocoGenerator(Generator): - """ Generate data from the COCO dataset. - - See https://github.com/cocodataset/cocoapi/tree/master/PythonAPI for more information. - """ - - def __init__(self, data_dir, set_name, **kwargs): - """ Initialize a COCO data generator. - - Args - data_dir: Path to where the COCO dataset is stored. - set_name: Name of the set to parse. - """ - self.data_dir = data_dir - self.set_name = set_name - self.coco = COCO(os.path.join(data_dir, 'annotations', - 'instances_' + set_name + '.json')) - self.image_ids = self.coco.getImgIds() - - self.load_classes() - - super(CocoGenerator, self).__init__(**kwargs) - - def load_classes(self): - """ Loads the class to label mapping (and inverse) for COCO. - """ - # load class names (name -> label) - categories = self.coco.loadCats(self.coco.getCatIds()) - categories.sort(key=lambda x: x['id']) - - self.classes = {} - self.coco_labels = {} - self.coco_labels_inverse = {} - for c in categories: - self.coco_labels[len(self.classes)] = c['id'] - self.coco_labels_inverse[c['id']] = len(self.classes) - self.classes[c['name']] = len(self.classes) - - # also load the reverse (label -> name) - self.labels = {} - for key, value in self.classes.items(): - self.labels[value] = key - - def size(self): - """ Size of the COCO dataset. - """ - return len(self.image_ids) - - def num_classes(self): - """ Number of classes in the dataset. For COCO this is 80. - """ - return len(self.classes) - - def has_label(self, label): - """ Return True if label is a known label. - """ - return label in self.labels - - def has_name(self, name): - """ Returns True if name is a known class. - """ - return name in self.classes - - def name_to_label(self, name): - """ Map name to label. - """ - return self.classes[name] - - def label_to_name(self, label): - """ Map label to name. - """ - return self.labels[label] - - def coco_label_to_label(self, coco_label): - """ Map COCO label to the label as used in the network. - COCO has some gaps in the order of labels. The highest label is 90, but there are 80 classes. - """ - return self.coco_labels_inverse[coco_label] - - def coco_label_to_name(self, coco_label): - """ Map COCO label to name. - """ - return self.label_to_name(self.coco_label_to_label(coco_label)) - - def label_to_coco_label(self, label): - """ Map label as used by the network to labels as used by COCO. - """ - return self.coco_labels[label] - - def image_aspect_ratio(self, image_index): - """ Compute the aspect ratio for an image with image_index. - """ - image = self.coco.loadImgs(self.image_ids[image_index])[0] - return float(image['width']) / float(image['height']) - - def load_image(self, image_index): - """ Load an image at the image_index. - """ - image_info = self.coco.loadImgs(self.image_ids[image_index])[0] - path = os.path.join(self.data_dir, 'images', - self.set_name, image_info['file_name']) - return read_image_bgr(path) - - def load_annotations(self, image_index): - """ Load annotations for an image_index. - """ - # get ground truth annotations - annotations_ids = self.coco.getAnnIds( - imgIds=self.image_ids[image_index], iscrowd=False) - annotations = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))} - - # some images appear to miss annotations (like image with id 257034) - if len(annotations_ids) == 0: - return annotations - - # parse annotations - coco_annotations = self.coco.loadAnns(annotations_ids) - for idx, a in enumerate(coco_annotations): - # some annotations have basically no width / height, skip them - if a['bbox'][2] < 1 or a['bbox'][3] < 1: - continue - - annotations['labels'] = np.concatenate( - [annotations['labels'], [self.coco_label_to_label(a['category_id'])]], axis=0) - annotations['bboxes'] = np.concatenate([annotations['bboxes'], [[ - a['bbox'][0], - a['bbox'][1], - a['bbox'][0] + a['bbox'][2], - a['bbox'][1] + a['bbox'][3], - ]]], axis=0) - - return annotations -""" -Copyright 2017-2018 yhenon (https://github.com/yhenon/) -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from .generator import Generator -from ..utils.image import read_image_bgr - -import numpy as np -from PIL import Image -from six import raise_from - -import csv -import sys -import os.path - - -def _parse(value, function, fmt): - """ - Parse a string into a value, and format a nice ValueError if it fails. - - Returns `function(value)`. - Any `ValueError` raised is catched and a new `ValueError` is raised - with message `fmt.format(e)`, where `e` is the caught `ValueError`. - """ - try: - return function(value) - except ValueError as e: - raise_from(ValueError(fmt.format(e)), None) - - -def _read_classes(csv_reader): - """ Parse the classes file given by csv_reader. - """ - result = {} - for line, row in enumerate(csv_reader): - line += 1 - - try: - class_name, class_id = row - except ValueError: - raise_from(ValueError( - 'line {}: format should be \'class_name,class_id\''.format(line)), None) - class_id = _parse( - class_id, int, 'line {}: malformed class ID: {{}}'.format(line)) - - if class_name in result: - raise ValueError( - 'line {}: duplicate class name: \'{}\''.format(line, class_name)) - result[class_name] = class_id - return result - - -def _read_annotations(csv_reader, classes): - """ Read annotations from the csv_reader. - """ - result = {} - for line, row in enumerate(csv_reader): - line += 1 - - try: - img_file, x1, y1, x2, y2, class_name = row[:6] - except ValueError: - raise_from(ValueError( - 'line {}: format should be \'img_file,x1,y1,x2,y2,class_name\' or \'img_file,,,,,\''.format(line)), None) - - if img_file not in result: - result[img_file] = [] - - # If a row contains only an image path, it's an image without annotations. - if (x1, y1, x2, y2, class_name) == ('', '', '', '', ''): - continue - - x1 = _parse(x1, int, 'line {}: malformed x1: {{}}'.format(line)) - y1 = _parse(y1, int, 'line {}: malformed y1: {{}}'.format(line)) - x2 = _parse(x2, int, 'line {}: malformed x2: {{}}'.format(line)) - y2 = _parse(y2, int, 'line {}: malformed y2: {{}}'.format(line)) - - # Check that the bounding box is valid. - if x2 <= x1: - raise ValueError( - 'line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1)) - if y2 <= y1: - raise ValueError( - 'line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1)) - - # check if the current class name is correctly present - if class_name not in classes: - raise ValueError('line {}: unknown class name: \'{}\' (classes: {})'.format( - line, class_name, classes)) - - result[img_file].append( - {'x1': x1, 'x2': x2, 'y1': y1, 'y2': y2, 'class': class_name}) - return result - - -def _open_for_csv(path): - """ Open a file with flags suitable for csv.reader. - - This is different for python2 it means with mode 'rb', - for python3 this means 'r' with "universal newlines". - """ - if sys.version_info[0] < 3: - return open(path, 'rb') - else: - return open(path, 'r', newline='') - - -class CSVGenerator(Generator): - """ Generate data for a custom CSV dataset. - - See https://github.com/fizyr/keras-retinanet#csv-datasets for more information. - """ - - def __init__( - self, - csv_data_file, - csv_class_file, - base_dir=None, - **kwargs - ): - """ Initialize a CSV data generator. - - Args - csv_data_file: Path to the CSV annotations file. - csv_class_file: Path to the CSV classes file. - base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file). - """ - self.image_names = [] - self.image_data = {} - self.base_dir = base_dir - - # Take base_dir from annotations file if not explicitly specified. - if self.base_dir is None: - self.base_dir = os.path.dirname(csv_data_file) - - # parse the provided class file - try: - with _open_for_csv(csv_class_file) as file: - self.classes = _read_classes(csv.reader(file, delimiter=',')) - except ValueError as e: - raise_from(ValueError( - 'invalid CSV class file: {}: {}'.format(csv_class_file, e)), None) - - self.labels = {} - for key, value in self.classes.items(): - self.labels[value] = key - - # csv with img_path, x1, y1, x2, y2, class_name - try: - with _open_for_csv(csv_data_file) as file: - self.image_data = _read_annotations( - csv.reader(file, delimiter=','), self.classes) - except ValueError as e: - raise_from(ValueError( - 'invalid CSV annotations file: {}: {}'.format(csv_data_file, e)), None) - self.image_names = list(self.image_data.keys()) - - super(CSVGenerator, self).__init__(**kwargs) - - def size(self): - """ Size of the dataset. - """ - return len(self.image_names) - - def num_classes(self): - """ Number of classes in the dataset. - """ - return max(self.classes.values()) + 1 - - def has_label(self, label): - """ Return True if label is a known label. - """ - return label in self.labels - - def has_name(self, name): - """ Returns True if name is a known class. - """ - return name in self.classes - - def name_to_label(self, name): - """ Map name to label. - """ - return self.classes[name] - - def label_to_name(self, label): - """ Map label to name. - """ - return self.labels[label] - - def image_path(self, image_index): - """ Returns the image path for image_index. - """ - return os.path.join(self.base_dir, self.image_names[image_index]) - - def image_aspect_ratio(self, image_index): - """ Compute the aspect ratio for an image with image_index. - """ - # PIL is fast for metadata - image = Image.open(self.image_path(image_index)) - return float(image.width) / float(image.height) - - def load_image(self, image_index): - """ Load an image at the image_index. - """ - return read_image_bgr(self.image_path(image_index)) - - def load_annotations(self, image_index): - """ Load annotations for an image_index. - """ - path = self.image_names[image_index] - annotations = {'labels': np.empty((0,)), 'bboxes': np.empty((0, 4))} - - for idx, annot in enumerate(self.image_data[path]): - annotations['labels'] = np.concatenate( - (annotations['labels'], [self.name_to_label(annot['class'])])) - annotations['bboxes'] = np.concatenate((annotations['bboxes'], [[ - float(annot['x1']), - float(annot['y1']), - float(annot['x2']), - float(annot['y2']), - ]])) - - return annotations -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import numpy as np -import random -import warnings - -import keras - -from ..utils.anchors import ( - anchor_targets_bbox, - anchors_for_shape, - guess_shapes -) -from ..utils.config import parse_anchor_parameters -from ..utils.image import ( - TransformParameters, - adjust_transform_for_image, - apply_transform, - preprocess_image, - resize_image, -) -from ..utils.transform import transform_aabb - - -class Generator(keras.utils.Sequence): - """ Abstract generator class. - """ - - def __init__( - self, - transform_generator=None, - batch_size=1, - group_method='ratio', # one of 'none', 'random', 'ratio' - shuffle_groups=True, - image_min_side=800, - image_max_side=1333, - transform_parameters=None, - compute_anchor_targets=anchor_targets_bbox, - compute_shapes=guess_shapes, - preprocess_image=preprocess_image, - config=None - ): - """ Initialize Generator object. - - Args - transform_generator : A generator used to randomly transform images and annotations. - batch_size : The size of the batches to generate. - group_method : Determines how images are grouped together (defaults to 'ratio', one of ('none', 'random', 'ratio')). - shuffle_groups : If True, shuffles the groups each epoch. - image_min_side : After resizing the minimum side of an image is equal to image_min_side. - image_max_side : If after resizing the maximum side is larger than image_max_side, scales down further so that the max side is equal to image_max_side. - transform_parameters : The transform parameters used for data augmentation. - compute_anchor_targets : Function handler for computing the targets of anchors for an image and its annotations. - compute_shapes : Function handler for computing the shapes of the pyramid for a given input. - preprocess_image : Function handler for preprocessing an image (scaling / normalizing) for passing through a network. - """ - self.transform_generator = transform_generator - self.batch_size = int(batch_size) - self.group_method = group_method - self.shuffle_groups = shuffle_groups - self.image_min_side = image_min_side - self.image_max_side = image_max_side - self.transform_parameters = transform_parameters or TransformParameters() - self.compute_anchor_targets = compute_anchor_targets - self.compute_shapes = compute_shapes - self.preprocess_image = preprocess_image - self.config = config - - # Define groups - self.group_images() - - # Shuffle when initializing - if self.shuffle_groups: - self.on_epoch_end() - - def on_epoch_end(self): - if self.shuffle_groups: - random.shuffle(self.groups) - - def size(self): - """ Size of the dataset. - """ - raise NotImplementedError('size method not implemented') - - def num_classes(self): - """ Number of classes in the dataset. - """ - raise NotImplementedError('num_classes method not implemented') - - def has_label(self, label): - """ Returns True if label is a known label. - """ - raise NotImplementedError('has_label method not implemented') - - def has_name(self, name): - """ Returns True if name is a known class. - """ - raise NotImplementedError('has_name method not implemented') - - def name_to_label(self, name): - """ Map name to label. - """ - raise NotImplementedError('name_to_label method not implemented') - - def label_to_name(self, label): - """ Map label to name. - """ - raise NotImplementedError('label_to_name method not implemented') - - def image_aspect_ratio(self, image_index): - """ Compute the aspect ratio for an image with image_index. - """ - raise NotImplementedError('image_aspect_ratio method not implemented') - - def load_image(self, image_index): - """ Load an image at the image_index. - """ - raise NotImplementedError('load_image method not implemented') - - def load_annotations(self, image_index): - """ Load annotations for an image_index. - """ - raise NotImplementedError('load_annotations method not implemented') - - def load_annotations_group(self, group): - """ Load annotations for all images in group. - """ - annotations_group = [self.load_annotations( - image_index) for image_index in group] - for annotations in annotations_group: - assert(isinstance(annotations, dict) - ), '\'load_annotations\' should return a list of dictionaries, received: {}'.format(type(annotations)) - assert('labels' in annotations), '\'load_annotations\' should return a list of dictionaries that contain \'labels\' and \'bboxes\'.' - assert('bboxes' in annotations), '\'load_annotations\' should return a list of dictionaries that contain \'labels\' and \'bboxes\'.' - - return annotations_group - - def filter_annotations(self, image_group, annotations_group, group): - """ Filter annotations by removing those that are outside of the image bounds or whose width/height < 0. - """ - # test all annotations - for index, (image, annotations) in enumerate(zip(image_group, annotations_group)): - # test x2 < x1 | y2 < y1 | x1 < 0 | y1 < 0 | x2 <= 0 | y2 <= 0 | x2 >= image.shape[1] | y2 >= image.shape[0] - invalid_indices = np.where( - (annotations['bboxes'][:, 2] <= annotations['bboxes'][:, 0]) | - (annotations['bboxes'][:, 3] <= annotations['bboxes'][:, 1]) | - (annotations['bboxes'][:, 0] < 0) | - (annotations['bboxes'][:, 1] < 0) | - (annotations['bboxes'][:, 2] > image.shape[1]) | - (annotations['bboxes'][:, 3] > image.shape[0]) - )[0] - - # delete invalid indices - if len(invalid_indices): - warnings.warn('Image with id {} (shape {}) contains the following invalid boxes: {}.'.format( - group[index], - image.shape, - annotations['bboxes'][invalid_indices, :] - )) - for k in annotations_group[index].keys(): - annotations_group[index][k] = np.delete( - annotations[k], invalid_indices, axis=0) - - return image_group, annotations_group - - def load_image_group(self, group): - """ Load images for all images in a group. - """ - return [self.load_image(image_index) for image_index in group] - - def random_transform_group_entry(self, image, annotations, transform=None): - """ Randomly transforms image and annotation. - """ - # randomly transform both image and annotations - if transform is not None or self.transform_generator: - if transform is None: - transform = adjust_transform_for_image(next( - self.transform_generator), image, self.transform_parameters.relative_translation) - - # apply transformation to image - image = apply_transform( - transform, image, self.transform_parameters) - - # Transform the bounding boxes in the annotations. - annotations['bboxes'] = annotations['bboxes'].copy() - for index in range(annotations['bboxes'].shape[0]): - annotations['bboxes'][index, :] = transform_aabb( - transform, annotations['bboxes'][index, :]) - - return image, annotations - - def random_transform_group(self, image_group, annotations_group): - """ Randomly transforms each image and its annotations. - """ - - assert(len(image_group) == len(annotations_group)) - - for index in range(len(image_group)): - # transform a single group entry - image_group[index], annotations_group[index] = self.random_transform_group_entry( - image_group[index], annotations_group[index]) - - return image_group, annotations_group - - def resize_image(self, image): - """ Resize an image using image_min_side and image_max_side. - """ - return resize_image(image, min_side=self.image_min_side, max_side=self.image_max_side) - - def preprocess_group_entry(self, image, annotations): - """ Preprocess image and its annotations. - """ - # preprocess the image - image = self.preprocess_image(image) - - # resize image - image, image_scale = self.resize_image(image) - - # apply resizing to annotations too - annotations['bboxes'] *= image_scale - - # convert to the wanted keras floatx - image = keras.backend.cast_to_floatx(image) - - return image, annotations - - def preprocess_group(self, image_group, annotations_group): - """ Preprocess each image and its annotations in its group. - """ - assert(len(image_group) == len(annotations_group)) - - for index in range(len(image_group)): - # preprocess a single group entry - image_group[index], annotations_group[index] = self.preprocess_group_entry( - image_group[index], annotations_group[index]) - - return image_group, annotations_group - - def group_images(self): - """ Order the images according to self.order and makes groups of self.batch_size. - """ - # determine the order of the images - order = list(range(self.size())) - if self.group_method == 'random': - random.shuffle(order) - elif self.group_method == 'ratio': - order.sort(key=lambda x: self.image_aspect_ratio(x)) - - # divide into groups, one group = one batch - self.groups = [[order[x % len(order)] for x in range( - i, i + self.batch_size)] for i in range(0, len(order), self.batch_size)] - - def compute_inputs(self, image_group): - """ Compute inputs for the network using an image_group. - """ - # get the max image shape - max_shape = tuple(max(image.shape[x] - for image in image_group) for x in range(3)) - - # construct an image batch object - image_batch = np.zeros((self.batch_size,) + - max_shape, dtype=keras.backend.floatx()) - - # copy all images to the upper left part of the image batch object - for image_index, image in enumerate(image_group): - image_batch[image_index, :image.shape[0], - :image.shape[1], :image.shape[2]] = image - - if keras.backend.image_data_format() == 'channels_first': - image_batch = image_batch.transpose((0, 3, 1, 2)) - - return image_batch - - def generate_anchors(self, image_shape): - anchor_params = None - if self.config and 'anchor_parameters' in self.config: - anchor_params = parse_anchor_parameters(self.config) - return anchors_for_shape(image_shape, anchor_params=anchor_params, shapes_callback=self.compute_shapes) - - def compute_targets(self, image_group, annotations_group): - """ Compute target outputs for the network using images and their annotations. - """ - # get the max image shape - max_shape = tuple(max(image.shape[x] - for image in image_group) for x in range(3)) - anchors = self.generate_anchors(max_shape) - - batches = self.compute_anchor_targets( - anchors, - image_group, - annotations_group, - self.num_classes() - ) - - return list(batches) - - def compute_input_output(self, group): - """ Compute inputs and target outputs for the network. - """ - # load images and annotations - image_group = self.load_image_group(group) - annotations_group = self.load_annotations_group(group) - - # check validity of annotations - image_group, annotations_group = self.filter_annotations( - image_group, annotations_group, group) - - # randomly transform data - image_group, annotations_group = self.random_transform_group( - image_group, annotations_group) - - # perform preprocessing steps - image_group, annotations_group = self.preprocess_group( - image_group, annotations_group) - - # compute network inputs - inputs = self.compute_inputs(image_group) - - # compute network targets - targets = self.compute_targets(image_group, annotations_group) - - return inputs, targets - - def __len__(self): - """ - Number of batches for generator. - """ - - return len(self.groups) - - def __getitem__(self, index): - """ - Keras sequence method for generating batches. - """ - group = self.groups[index] - inputs, targets = self.compute_input_output(group) - - return inputs, targets -""" -Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import csv -import os.path - -import numpy as np -from PIL import Image - -from .generator import Generator -from ..utils.image import read_image_bgr - -kitti_classes = { - 'Car': 0, - 'Van': 1, - 'Truck': 2, - 'Pedestrian': 3, - 'Person_sitting': 4, - 'Cyclist': 5, - 'Tram': 6, - 'Misc': 7, - 'DontCare': 7 -} - - -class KittiGenerator(Generator): - """ Generate data for a KITTI dataset. - - See http://www.cvlibs.net/datasets/kitti/ for more information. - """ - - def __init__( - self, - base_dir, - subset='train', - **kwargs - ): - """ Initialize a KITTI data generator. - - Args - base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file). - subset: The subset to generate data for (defaults to 'train'). - """ - self.base_dir = base_dir - - label_dir = os.path.join(self.base_dir, subset, 'labels') - image_dir = os.path.join(self.base_dir, subset, 'images') - - """ - 1 type Describes the type of object: 'Car', 'Van', 'Truck', - 'Pedestrian', 'Person_sitting', 'Cyclist', 'Tram', - 'Misc' or 'DontCare' - 1 truncated Float from 0 (non-truncated) to 1 (truncated), where - truncated refers to the object leaving image boundaries - 1 occluded Integer (0,1,2,3) indicating occlusion state: - 0 = fully visible, 1 = partly occluded - 2 = largely occluded, 3 = unknown - 1 alpha Observation angle of object, ranging [-pi..pi] - 4 bbox 2D bounding box of object in the image (0-based index): - contains left, top, right, bottom pixel coordinates - 3 dimensions 3D object dimensions: height, width, length (in meters) - 3 location 3D object location x,y,z in camera coordinates (in meters) - 1 rotation_y Rotation ry around Y-axis in camera coordinates [-pi..pi] - """ - - self.labels = {} - self.classes = kitti_classes - for name, label in self.classes.items(): - self.labels[label] = name - - self.image_data = dict() - self.images = [] - for i, fn in enumerate(os.listdir(label_dir)): - label_fp = os.path.join(label_dir, fn) - image_fp = os.path.join(image_dir, fn.replace('.txt', '.png')) - - self.images.append(image_fp) - - fieldnames = ['type', 'truncated', 'occluded', 'alpha', 'left', 'top', 'right', 'bottom', 'dh', 'dw', 'dl', - 'lx', 'ly', 'lz', 'ry'] - with open(label_fp, 'r') as csv_file: - reader = csv.DictReader( - csv_file, delimiter=' ', fieldnames=fieldnames) - boxes = [] - for line, row in enumerate(reader): - label = row['type'] - cls_id = kitti_classes[label] - - annotation = { - 'cls_id': cls_id, 'x1': row['left'], 'x2': row['right'], 'y2': row['bottom'], 'y1': row['top']} - boxes.append(annotation) - - self.image_data[i] = boxes - - super(KittiGenerator, self).__init__(**kwargs) - - def size(self): - """ Size of the dataset. - """ - return len(self.images) - - def num_classes(self): - """ Number of classes in the dataset. - """ - return max(self.classes.values()) + 1 - - def has_label(self, label): - """ Return True if label is a known label. - """ - return label in self.labels - - def has_name(self, name): - """ Returns True if name is a known class. - """ - return name in self.classes - - def name_to_label(self, name): - """ Map name to label. - """ - raise NotImplementedError() - - def label_to_name(self, label): - """ Map label to name. - """ - return self.labels[label] - - def image_aspect_ratio(self, image_index): - """ Compute the aspect ratio for an image with image_index. - """ - # PIL is fast for metadata - image = Image.open(self.images[image_index]) - return float(image.width) / float(image.height) - - def load_image(self, image_index): - """ Load an image at the image_index. - """ - return read_image_bgr(self.images[image_index]) - - def load_annotations(self, image_index): - """ Load annotations for an image_index. - """ - image_data = self.image_data[image_index] - annotations = {'labels': np.empty( - (len(image_data),)), 'bboxes': np.empty((len(image_data), 4))} - - for idx, ann in enumerate(image_data): - annotations['bboxes'][idx, 0] = float(ann['x1']) - annotations['bboxes'][idx, 1] = float(ann['y1']) - annotations['bboxes'][idx, 2] = float(ann['x2']) - annotations['bboxes'][idx, 3] = float(ann['y2']) - annotations['labels'][idx] = int(ann['cls_id']) - - return annotations -""" -Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import csv -import json -import os -import warnings - -import numpy as np -from PIL import Image - -from .generator import Generator -from ..utils.image import read_image_bgr - - -def load_hierarchy(metadata_dir, version='v4'): - hierarchy = None - if version == 'challenge2018': - hierarchy = 'bbox_labels_500_hierarchy.json' - elif version == 'v4': - hierarchy = 'bbox_labels_600_hierarchy.json' - elif version == 'v3': - hierarchy = 'bbox_labels_600_hierarchy.json' - - hierarchy_json = os.path.join(metadata_dir, hierarchy) - with open(hierarchy_json) as f: - hierarchy_data = json.loads(f.read()) - - return hierarchy_data - - -def load_hierarchy_children(hierarchy): - res = [hierarchy['LabelName']] - - if 'Subcategory' in hierarchy: - for subcategory in hierarchy['Subcategory']: - children = load_hierarchy_children(subcategory) - - for c in children: - res.append(c) - - return res - - -def find_hierarchy_parent(hierarchy, parent_cls): - if hierarchy['LabelName'] == parent_cls: - return hierarchy - elif 'Subcategory' in hierarchy: - for child in hierarchy['Subcategory']: - res = find_hierarchy_parent(child, parent_cls) - if res is not None: - return res - - return None - - -def get_labels(metadata_dir, version='v4'): - if version == 'v4' or version == 'challenge2018': - csv_file = 'class-descriptions-boxable.csv' if version == 'v4' else 'challenge-2018-class-descriptions-500.csv' - - boxable_classes_descriptions = os.path.join(metadata_dir, csv_file) - id_to_labels = {} - cls_index = {} - - i = 0 - with open(boxable_classes_descriptions) as f: - for row in csv.reader(f): - # make sure the csv row is not empty (usually the last one) - if len(row): - label = row[0] - description = row[1].replace( - "\"", "").replace("'", "").replace('`', '') - - id_to_labels[i] = description - cls_index[label] = i - - i += 1 - else: - trainable_classes_path = os.path.join( - metadata_dir, 'classes-bbox-trainable.txt') - description_path = os.path.join(metadata_dir, 'class-descriptions.csv') - - description_table = {} - with open(description_path) as f: - for row in csv.reader(f): - # make sure the csv row is not empty (usually the last one) - if len(row): - description_table[row[0]] = row[1].replace( - "\"", "").replace("'", "").replace('`', '') - - with open(trainable_classes_path, 'rb') as f: - trainable_classes = f.read().split('\n') - - id_to_labels = dict([(i, description_table[c]) - for i, c in enumerate(trainable_classes)]) - cls_index = dict([(c, i) for i, c in enumerate(trainable_classes)]) - - return id_to_labels, cls_index - - -def generate_images_annotations_json(main_dir, metadata_dir, subset, cls_index, version='v4'): - validation_image_ids = {} - - if version == 'v4': - annotations_path = os.path.join( - metadata_dir, subset, '{}-annotations-bbox.csv'.format(subset)) - elif version == 'challenge2018': - validation_image_ids_path = os.path.join( - metadata_dir, 'challenge-2018-image-ids-valset-od.csv') - - with open(validation_image_ids_path, 'r') as csv_file: - reader = csv.DictReader(csv_file, fieldnames=['ImageID']) - next(reader) - for line, row in enumerate(reader): - image_id = row['ImageID'] - validation_image_ids[image_id] = True - - annotations_path = os.path.join( - metadata_dir, 'challenge-2018-train-annotations-bbox.csv') - else: - annotations_path = os.path.join( - metadata_dir, subset, 'annotations-human-bbox.csv') - - fieldnames = ['ImageID', 'Source', 'LabelName', 'Confidence', - 'XMin', 'XMax', 'YMin', 'YMax', - 'IsOccluded', 'IsTruncated', 'IsGroupOf', 'IsDepiction', 'IsInside'] - - id_annotations = dict() - with open(annotations_path, 'r') as csv_file: - reader = csv.DictReader(csv_file, fieldnames=fieldnames) - next(reader) - - images_sizes = {} - for line, row in enumerate(reader): - frame = row['ImageID'] - - if version == 'challenge2018': - if subset == 'train': - if frame in validation_image_ids: - continue - elif subset == 'validation': - if frame not in validation_image_ids: - continue - else: - raise NotImplementedError( - 'This generator handles only the train and validation subsets') - - class_name = row['LabelName'] - - if class_name not in cls_index: - continue - - cls_id = cls_index[class_name] - - if version == 'challenge2018': - # We recommend participants to use the provided subset of the training set as a validation set. - # This is preferable over using the V4 val/test sets, as the training set is more densely annotated. - img_path = os.path.join( - main_dir, 'images', 'train', frame + '.jpg') - else: - img_path = os.path.join( - main_dir, 'images', subset, frame + '.jpg') - - if frame in images_sizes: - width, height = images_sizes[frame] - else: - try: - with Image.open(img_path) as img: - width, height = img.width, img.height - images_sizes[frame] = (width, height) - except Exception as ex: - if version == 'challenge2018': - raise ex - continue - - x1 = float(row['XMin']) - x2 = float(row['XMax']) - y1 = float(row['YMin']) - y2 = float(row['YMax']) - - x1_int = int(round(x1 * width)) - x2_int = int(round(x2 * width)) - y1_int = int(round(y1 * height)) - y2_int = int(round(y2 * height)) - - # Check that the bounding box is valid. - if x2 <= x1: - raise ValueError( - 'line {}: x2 ({}) must be higher than x1 ({})'.format(line, x2, x1)) - if y2 <= y1: - raise ValueError( - 'line {}: y2 ({}) must be higher than y1 ({})'.format(line, y2, y1)) - - if y2_int == y1_int: - warnings.warn( - 'filtering line {}: rounding y2 ({}) and y1 ({}) makes them equal'.format(line, y2, y1)) - continue - - if x2_int == x1_int: - warnings.warn( - 'filtering line {}: rounding x2 ({}) and x1 ({}) makes them equal'.format(line, x2, x1)) - continue - - img_id = row['ImageID'] - annotation = {'cls_id': cls_id, 'x1': x1, - 'x2': x2, 'y1': y1, 'y2': y2} - - if img_id in id_annotations: - annotations = id_annotations[img_id] - annotations['boxes'].append(annotation) - else: - id_annotations[img_id] = {'w': width, - 'h': height, 'boxes': [annotation]} - return id_annotations - - -class OpenImagesGenerator(Generator): - def __init__( - self, main_dir, subset, version='v4', - labels_filter=None, annotation_cache_dir='.', - parent_label=None, - **kwargs - ): - if version == 'challenge2018': - metadata = 'challenge2018' - elif version == 'v4': - metadata = '2018_04' - elif version == 'v3': - metadata = '2017_11' - else: - raise NotImplementedError( - 'There is currently no implementation for versions older than v3') - - if version == 'challenge2018': - self.base_dir = os.path.join(main_dir, 'images', 'train') - else: - self.base_dir = os.path.join(main_dir, 'images', subset) - - metadata_dir = os.path.join(main_dir, metadata) - annotation_cache_json = os.path.join( - annotation_cache_dir, subset + '.json') - - self.hierarchy = load_hierarchy(metadata_dir, version=version) - id_to_labels, cls_index = get_labels(metadata_dir, version=version) - - if os.path.exists(annotation_cache_json): - with open(annotation_cache_json, 'r') as f: - self.annotations = json.loads(f.read()) - else: - self.annotations = generate_images_annotations_json( - main_dir, metadata_dir, subset, cls_index, version=version) - json.dump(self.annotations, open(annotation_cache_json, "w")) - - if labels_filter is not None or parent_label is not None: - self.id_to_labels, self.annotations = self.__filter_data( - id_to_labels, cls_index, labels_filter, parent_label) - else: - self.id_to_labels = id_to_labels - - self.id_to_image_id = dict([(i, k) - for i, k in enumerate(self.annotations)]) - - super(OpenImagesGenerator, self).__init__(**kwargs) - - def __filter_data(self, id_to_labels, cls_index, labels_filter=None, parent_label=None): - """ - If you want to work with a subset of the labels just set a list with trainable labels - :param labels_filter: Ex: labels_filter = ['Helmet', 'Hat', 'Analog television'] - :param parent_label: If parent_label is set this will bring you the parent label - but also its children in the semantic hierarchy as defined in OID, ex: Animal - hierarchical tree - :return: - """ - - children_id_to_labels = {} - - if parent_label is None: - # there is/are no other sublabel(s) other than the labels itself - - for label in labels_filter: - for i, lb in id_to_labels.items(): - if lb == label: - children_id_to_labels[i] = label - break - else: - parent_cls = None - for i, lb in id_to_labels.items(): - if lb == parent_label: - parent_id = i - for c, index in cls_index.items(): - if index == parent_id: - parent_cls = c - break - - if parent_cls is None: - raise Exception('Couldnt find label {}'.format(parent_label)) - - parent_tree = find_hierarchy_parent(self.hierarchy, parent_cls) - - if parent_tree is None: - raise Exception( - 'Couldnt find parent {} in the semantic hierarchical tree'.format(parent_label)) - - children = load_hierarchy_children(parent_tree) - - for cls in children: - index = cls_index[cls] - label = id_to_labels[index] - children_id_to_labels[index] = label - - id_map = dict([(ind, i) - for i, ind in enumerate(children_id_to_labels.keys())]) - - filtered_annotations = {} - for k in self.annotations: - img_ann = self.annotations[k] - - filtered_boxes = [] - for ann in img_ann['boxes']: - cls_id = ann['cls_id'] - if cls_id in children_id_to_labels: - ann['cls_id'] = id_map[cls_id] - filtered_boxes.append(ann) - - if len(filtered_boxes) > 0: - filtered_annotations[k] = { - 'w': img_ann['w'], 'h': img_ann['h'], 'boxes': filtered_boxes} - - children_id_to_labels = dict( - [(id_map[i], l) for (i, l) in children_id_to_labels.items()]) - - return children_id_to_labels, filtered_annotations - - def size(self): - return len(self.annotations) - - def num_classes(self): - return len(self.id_to_labels) - - def has_label(self, label): - """ Return True if label is a known label. - """ - return label in self.id_to_labels - - def has_name(self, name): - """ Returns True if name is a known class. - """ - raise NotImplementedError() - - def name_to_label(self, name): - raise NotImplementedError() - - def label_to_name(self, label): - return self.id_to_labels[label] - - def image_aspect_ratio(self, image_index): - img_annotations = self.annotations[self.id_to_image_id[image_index]] - height, width = img_annotations['h'], img_annotations['w'] - return float(width) / float(height) - - def image_path(self, image_index): - path = os.path.join( - self.base_dir, self.id_to_image_id[image_index] + '.jpg') - return path - - def load_image(self, image_index): - return read_image_bgr(self.image_path(image_index)) - - def load_annotations(self, image_index): - image_annotations = self.annotations[self.id_to_image_id[image_index]] - - labels = image_annotations['boxes'] - height, width = image_annotations['h'], image_annotations['w'] - - annotations = {'labels': np.empty( - (len(labels),)), 'bboxes': np.empty((len(labels), 4))} - for idx, ann in enumerate(labels): - cls_id = ann['cls_id'] - x1 = ann['x1'] * width - x2 = ann['x2'] * width - y1 = ann['y1'] * height - y2 = ann['y2'] * height - - annotations['bboxes'][idx, 0] = x1 - annotations['bboxes'][idx, 1] = y1 - annotations['bboxes'][idx, 2] = x2 - annotations['bboxes'][idx, 3] = y2 - annotations['labels'][idx] = cls_id - - return annotations -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from ..preprocessing.generator import Generator -from ..utils.image import read_image_bgr - -import os -import numpy as np -from six import raise_from -from PIL import Image - -try: - import xml.etree.cElementTree as ET -except ImportError: - import xml.etree.ElementTree as ET - -voc_classes = { - 'aeroplane': 0, - 'bicycle': 1, - 'bird': 2, - 'boat': 3, - 'bottle': 4, - 'bus': 5, - 'car': 6, - 'cat': 7, - 'chair': 8, - 'cow': 9, - 'diningtable': 10, - 'dog': 11, - 'horse': 12, - 'motorbike': 13, - 'person': 14, - 'pottedplant': 15, - 'sheep': 16, - 'sofa': 17, - 'train': 18, - 'tvmonitor': 19 -} - - -def _findNode(parent, name, debug_name=None, parse=None): - if debug_name is None: - debug_name = name - - result = parent.find(name) - if result is None: - raise ValueError('missing element \'{}\''.format(debug_name)) - if parse is not None: - try: - return parse(result.text) - except ValueError as e: - raise_from(ValueError( - 'illegal value for \'{}\': {}'.format(debug_name, e)), None) - return result - - -class PascalVocGenerator(Generator): - """ Generate data for a Pascal VOC dataset. - - See http://host.robots.ox.ac.uk/pascal/VOC/ for more information. - """ - - def __init__( - self, - data_dir, - set_name, - classes=voc_classes, - image_extension='.jpg', - skip_truncated=False, - skip_difficult=False, - **kwargs - ): - """ Initialize a Pascal VOC data generator. - - Args - base_dir: Directory w.r.t. where the files are to be searched (defaults to the directory containing the csv_data_file). - csv_class_file: Path to the CSV classes file. - """ - self.data_dir = data_dir - self.set_name = set_name - self.classes = classes - self.image_names = [l.strip().split(None, 1)[0] for l in open( - os.path.join(data_dir, 'ImageSets', 'Main', set_name + '.txt')).readlines()] - self.image_extension = image_extension - self.skip_truncated = skip_truncated - self.skip_difficult = skip_difficult - - self.labels = {} - for key, value in self.classes.items(): - self.labels[value] = key - - super(PascalVocGenerator, self).__init__(**kwargs) - - def size(self): - """ Size of the dataset. - """ - return len(self.image_names) - - def num_classes(self): - """ Number of classes in the dataset. - """ - return len(self.classes) - - def has_label(self, label): - """ Return True if label is a known label. - """ - return label in self.labels - - def has_name(self, name): - """ Returns True if name is a known class. - """ - return name in self.classes - - def name_to_label(self, name): - """ Map name to label. - """ - return self.classes[name] - - def label_to_name(self, label): - """ Map label to name. - """ - return self.labels[label] - - def image_aspect_ratio(self, image_index): - """ Compute the aspect ratio for an image with image_index. - """ - path = os.path.join(self.data_dir, 'JPEGImages', - self.image_names[image_index] + self.image_extension) - image = Image.open(path) - return float(image.width) / float(image.height) - - def load_image(self, image_index): - """ Load an image at the image_index. - """ - path = os.path.join(self.data_dir, 'JPEGImages', - self.image_names[image_index] + self.image_extension) - return read_image_bgr(path) - - def __parse_annotation(self, element): - """ Parse an annotation given an XML element. - """ - truncated = _findNode(element, 'truncated', parse=int) - difficult = _findNode(element, 'difficult', parse=int) - - class_name = _findNode(element, 'name').text - if class_name not in self.classes: - raise ValueError('class name \'{}\' not found in classes: {}'.format( - class_name, list(self.classes.keys()))) - - box = np.zeros((4,)) - label = self.name_to_label(class_name) - - bndbox = _findNode(element, 'bndbox') - box[0] = _findNode(bndbox, 'xmin', 'bndbox.xmin', parse=float) - 1 - box[1] = _findNode(bndbox, 'ymin', 'bndbox.ymin', parse=float) - 1 - box[2] = _findNode(bndbox, 'xmax', 'bndbox.xmax', parse=float) - 1 - box[3] = _findNode(bndbox, 'ymax', 'bndbox.ymax', parse=float) - 1 - - return truncated, difficult, box, label - - def __parse_annotations(self, xml_root): - """ Parse all annotations under the xml_root. - """ - annotations = {'labels': np.empty((len(xml_root.findall( - 'object')),)), 'bboxes': np.empty((len(xml_root.findall('object')), 4))} - for i, element in enumerate(xml_root.iter('object')): - try: - truncated, difficult, box, label = self.__parse_annotation( - element) - except ValueError as e: - raise_from(ValueError( - 'could not parse object #{}: {}'.format(i, e)), None) - - if truncated and self.skip_truncated: - continue - if difficult and self.skip_difficult: - continue - - annotations['bboxes'][i, :] = box - annotations['labels'][i] = label - - return annotations - - def load_annotations(self, image_index): - """ Load annotations for an image_index. - """ - filename = self.image_names[image_index] + '.xml' - try: - tree = ET.parse(os.path.join( - self.data_dir, 'Annotations', filename)) - return self.__parse_annotations(tree.getroot()) - except ET.ParseError as e: - raise_from(ValueError( - 'invalid annotations file: {}: {}'.format(filename, e)), None) - except ValueError as e: - raise_from(ValueError( - 'invalid annotations file: {}: {}'.format(filename, e)), None) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import numpy as np -import keras - -from ..utils.compute_overlap import compute_overlap - - -class AnchorParameters: - """ The parameteres that define how anchors are generated. - - Args - sizes : List of sizes to use. Each size corresponds to one feature level. - strides : List of strides to use. Each stride correspond to one feature level. - ratios : List of ratios to use per location in a feature map. - scales : List of scales to use per location in a feature map. - """ - - def __init__(self, sizes, strides, ratios, scales): - self.sizes = sizes - self.strides = strides - self.ratios = ratios - self.scales = scales - - def num_anchors(self): - return len(self.ratios) * len(self.scales) - - -""" -The default anchor parameters. -""" -AnchorParameters.default = AnchorParameters( - sizes=[32, 64, 128, 256, 512], - strides=[8, 16, 32, 64, 128], - ratios=np.array([0.5, 1, 2], keras.backend.floatx()), - scales=np.array([2 ** 0, 2 ** (1.0 / 3.0), 2 ** - (2.0 / 3.0)], keras.backend.floatx()), -) - - -def anchor_targets_bbox( - anchors, - image_group, - annotations_group, - num_classes, - negative_overlap=0.4, - positive_overlap=0.5 -): - """ Generate anchor targets for bbox detection. - - Args - anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2). - image_group: List of BGR images. - annotations_group: List of annotations (np.array of shape (N, 5) for (x1, y1, x2, y2, label)). - num_classes: Number of classes to predict. - mask_shape: If the image is padded with zeros, mask_shape can be used to mark the relevant part of the image. - negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative). - positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive). - - Returns - labels_batch: batch that contains labels & anchor states (np.array of shape (batch_size, N, num_classes + 1), - where N is the number of anchors for an image and the last column defines the anchor state (-1 for ignore, 0 for bg, 1 for fg). - regression_batch: batch that contains bounding-box regression targets for an image & anchor states (np.array of shape (batch_size, N, 4 + 1), - where N is the number of anchors for an image, the first 4 columns define regression targets for (x1, y1, x2, y2) and the - last column defines anchor states (-1 for ignore, 0 for bg, 1 for fg). - """ - - assert(len(image_group) == len(annotations_group) - ), "The length of the images and annotations need to be equal." - assert(len(annotations_group) > - 0), "No data received to compute anchor targets for." - for annotations in annotations_group: - assert('bboxes' in annotations), "Annotations should contain bboxes." - assert('labels' in annotations), "Annotations should contain labels." - - batch_size = len(image_group) - - regression_batch = np.zeros( - (batch_size, anchors.shape[0], 4 + 1), dtype=keras.backend.floatx()) - labels_batch = np.zeros( - (batch_size, anchors.shape[0], num_classes + 1), dtype=keras.backend.floatx()) - - # compute labels and regression targets - for index, (image, annotations) in enumerate(zip(image_group, annotations_group)): - if annotations['bboxes'].shape[0]: - # obtain indices of gt annotations with the greatest overlap - positive_indices, ignore_indices, argmax_overlaps_inds = compute_gt_annotations( - anchors, annotations['bboxes'], negative_overlap, positive_overlap) - - labels_batch[index, ignore_indices, -1] = -1 - labels_batch[index, positive_indices, -1] = 1 - - regression_batch[index, ignore_indices, -1] = -1 - regression_batch[index, positive_indices, -1] = 1 - - # compute target class labels - labels_batch[index, positive_indices, annotations['labels'] - [argmax_overlaps_inds[positive_indices]].astype(int)] = 1 - - regression_batch[index, :, :-1] = bbox_transform( - anchors, annotations['bboxes'][argmax_overlaps_inds, :]) - - # ignore annotations outside of image - if image.shape: - anchors_centers = np.vstack( - [(anchors[:, 0] + anchors[:, 2]) / 2, (anchors[:, 1] + anchors[:, 3]) / 2]).T - indices = np.logical_or( - anchors_centers[:, 0] >= image.shape[1], anchors_centers[:, 1] >= image.shape[0]) - - labels_batch[index, indices, -1] = -1 - regression_batch[index, indices, -1] = -1 - - return regression_batch, labels_batch - - -def compute_gt_annotations( - anchors, - annotations, - negative_overlap=0.4, - positive_overlap=0.5 -): - """ Obtain indices of gt annotations with the greatest overlap. - - Args - anchors: np.array of annotations of shape (N, 4) for (x1, y1, x2, y2). - annotations: np.array of shape (N, 5) for (x1, y1, x2, y2, label). - negative_overlap: IoU overlap for negative anchors (all anchors with overlap < negative_overlap are negative). - positive_overlap: IoU overlap or positive anchors (all anchors with overlap > positive_overlap are positive). - - Returns - positive_indices: indices of positive anchors - ignore_indices: indices of ignored anchors - argmax_overlaps_inds: ordered overlaps indices - """ - - overlaps = compute_overlap(anchors.astype( - np.float64), annotations.astype(np.float64)) - argmax_overlaps_inds = np.argmax(overlaps, axis=1) - max_overlaps = overlaps[np.arange(overlaps.shape[0]), argmax_overlaps_inds] - - # assign "dont care" labels - positive_indices = max_overlaps >= positive_overlap - ignore_indices = (max_overlaps > negative_overlap) & ~positive_indices - - return positive_indices, ignore_indices, argmax_overlaps_inds - - -def layer_shapes(image_shape, model): - """Compute layer shapes given input image shape and the model. - - Args - image_shape: The shape of the image. - model: The model to use for computing how the image shape is transformed in the pyramid. - - Returns - A dictionary mapping layer names to image shapes. - """ - shape = { - model.layers[0].name: (None,) + image_shape, - } - - for layer in model.layers[1:]: - nodes = layer._inbound_nodes - for node in nodes: - inputs = [shape[lr.name] for lr in node.inbound_layers] - if not inputs: - continue - shape[layer.name] = layer.compute_output_shape( - inputs[0] if len(inputs) == 1 else inputs) - - return shape - - -def make_shapes_callback(model): - """ Make a function for getting the shape of the pyramid levels. - """ - def get_shapes(image_shape, pyramid_levels): - shape = layer_shapes(image_shape, model) - image_shapes = [shape["P{}".format(level)][1:3] - for level in pyramid_levels] - return image_shapes - - return get_shapes - - -def guess_shapes(image_shape, pyramid_levels): - """Guess shapes based on pyramid levels. - - Args - image_shape: The shape of the image. - pyramid_levels: A list of what pyramid levels are used. - - Returns - A list of image shapes at each pyramid level. - """ - image_shape = np.array(image_shape[:2]) - image_shapes = [(image_shape + 2 ** x - 1) // (2 ** x) - for x in pyramid_levels] - return image_shapes - - -def anchors_for_shape( - image_shape, - pyramid_levels=None, - anchor_params=None, - shapes_callback=None, -): - """ Generators anchors for a given shape. - - Args - image_shape: The shape of the image. - pyramid_levels: List of ints representing which pyramids to use (defaults to [3, 4, 5, 6, 7]). - anchor_params: Struct containing anchor parameters. If None, default values are used. - shapes_callback: Function to call for getting the shape of the image at different pyramid levels. - - Returns - np.array of shape (N, 4) containing the (x1, y1, x2, y2) coordinates for the anchors. - """ - - if pyramid_levels is None: - pyramid_levels = [3, 4, 5, 6, 7] - - if anchor_params is None: - anchor_params = AnchorParameters.default - - if shapes_callback is None: - shapes_callback = guess_shapes - image_shapes = shapes_callback(image_shape, pyramid_levels) - - # compute anchors over all pyramid levels - all_anchors = np.zeros((0, 4)) - for idx, p in enumerate(pyramid_levels): - anchors = generate_anchors( - base_size=anchor_params.sizes[idx], - ratios=anchor_params.ratios, - scales=anchor_params.scales - ) - shifted_anchors = shift( - image_shapes[idx], anchor_params.strides[idx], anchors) - all_anchors = np.append(all_anchors, shifted_anchors, axis=0) - - return all_anchors - - -def shift(shape, stride, anchors): - """ Produce shifted anchors based on shape of the map and stride size. - - Args - shape : Shape to shift the anchors over. - stride : Stride to shift the anchors with over the shape. - anchors: The anchors to apply at each location. - """ - - # create a grid starting from half stride from the top left corner - shift_x = (np.arange(0, shape[1]) + 0.5) * stride - shift_y = (np.arange(0, shape[0]) + 0.5) * stride - - shift_x, shift_y = np.meshgrid(shift_x, shift_y) - - shifts = np.vstack(( - shift_x.ravel(), shift_y.ravel(), - shift_x.ravel(), shift_y.ravel() - )).transpose() - - # add A anchors (1, A, 4) to - # cell K shifts (K, 1, 4) to get - # shift anchors (K, A, 4) - # reshape to (K*A, 4) shifted anchors - A = anchors.shape[0] - K = shifts.shape[0] - all_anchors = (anchors.reshape((1, A, 4)) + - shifts.reshape((1, K, 4)).transpose((1, 0, 2))) - all_anchors = all_anchors.reshape((K * A, 4)) - - return all_anchors - - -def generate_anchors(base_size=16, ratios=None, scales=None): - """ - Generate anchor (reference) windows by enumerating aspect ratios X - scales w.r.t. a reference window. - """ - - if ratios is None: - ratios = AnchorParameters.default.ratios - - if scales is None: - scales = AnchorParameters.default.scales - - num_anchors = len(ratios) * len(scales) - - # initialize output anchors - anchors = np.zeros((num_anchors, 4)) - - # scale base_size - anchors[:, 2:] = base_size * np.tile(scales, (2, len(ratios))).T - - # compute areas of anchors - areas = anchors[:, 2] * anchors[:, 3] - - # correct for ratios - anchors[:, 2] = np.sqrt(areas / np.repeat(ratios, len(scales))) - anchors[:, 3] = anchors[:, 2] * np.repeat(ratios, len(scales)) - - # transform from (x_ctr, y_ctr, w, h) -> (x1, y1, x2, y2) - anchors[:, 0::2] -= np.tile(anchors[:, 2] * 0.5, (2, 1)).T - anchors[:, 1::2] -= np.tile(anchors[:, 3] * 0.5, (2, 1)).T - - return anchors - - -def bbox_transform(anchors, gt_boxes, mean=None, std=None): - """Compute bounding-box regression targets for an image.""" - - if mean is None: - mean = np.array([0, 0, 0, 0]) - if std is None: - std = np.array([0.2, 0.2, 0.2, 0.2]) - - if isinstance(mean, (list, tuple)): - mean = np.array(mean) - elif not isinstance(mean, np.ndarray): - raise ValueError( - 'Expected mean to be a np.ndarray, list or tuple. Received: {}'.format(type(mean))) - - if isinstance(std, (list, tuple)): - std = np.array(std) - elif not isinstance(std, np.ndarray): - raise ValueError( - 'Expected std to be a np.ndarray, list or tuple. Received: {}'.format(type(std))) - - anchor_widths = anchors[:, 2] - anchors[:, 0] - anchor_heights = anchors[:, 3] - anchors[:, 1] - - targets_dx1 = (gt_boxes[:, 0] - anchors[:, 0]) / anchor_widths - targets_dy1 = (gt_boxes[:, 1] - anchors[:, 1]) / anchor_heights - targets_dx2 = (gt_boxes[:, 2] - anchors[:, 2]) / anchor_widths - targets_dy2 = (gt_boxes[:, 3] - anchors[:, 3]) / anchor_heights - - targets = np.stack((targets_dx1, targets_dy1, targets_dx2, targets_dy2)) - targets = targets.T - - targets = (targets - mean) / std - - return targets -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from pycocotools.cocoeval import COCOeval - -import keras -import numpy as np -import json - -import progressbar -assert(callable(progressbar.progressbar) - ), "Using wrong progressbar module, install 'progressbar2' instead." - - -def evaluate_coco(generator, model, threshold=0.05): - """ Use the pycocotools to evaluate a COCO model on a dataset. - - Args - generator : The generator for generating the evaluation data. - model : The model to evaluate. - threshold : The score threshold to use. - """ - # start collecting results - results = [] - image_ids = [] - for index in progressbar.progressbar(range(generator.size()), prefix='COCO evaluation: '): - image = generator.load_image(index) - image = generator.preprocess_image(image) - image, scale = generator.resize_image(image) - - if keras.backend.image_data_format() == 'channels_first': - image = image.transpose((2, 0, 1)) - - # run network - boxes, scores, labels = model.predict_on_batch( - np.expand_dims(image, axis=0)) - - # correct boxes for image scale - boxes /= scale - - # change to (x, y, w, h) (MS COCO standard) - boxes[:, :, 2] -= boxes[:, :, 0] - boxes[:, :, 3] -= boxes[:, :, 1] - - # compute predicted labels and scores - for box, score, label in zip(boxes[0], scores[0], labels[0]): - # scores are sorted, so we can break - if score < threshold: - break - - # append detection for each positively labeled class - image_result = { - 'image_id': generator.image_ids[index], - 'category_id': generator.label_to_coco_label(label), - 'score': float(score), - 'bbox': box.tolist(), - } - - # append detection to results - results.append(image_result) - - # append image to list of processed images - image_ids.append(generator.image_ids[index]) - - if not len(results): - return - - # write output - json.dump(results, open('{}_bbox_results.json'.format( - generator.set_name), 'w'), indent=4) - json.dump(image_ids, open('{}_processed_image_ids.json'.format( - generator.set_name), 'w'), indent=4) - - # load results in COCO evaluation tool - coco_true = generator.coco - coco_pred = coco_true.loadRes( - '{}_bbox_results.json'.format(generator.set_name)) - - # run COCO evaluation - coco_eval = COCOeval(coco_true, coco_pred, 'bbox') - coco_eval.params.imgIds = image_ids - coco_eval.evaluate() - coco_eval.accumulate() - coco_eval.summarize() - return coco_eval.stats -import warnings - - -def label_color(label): - """ Return a color from a set of predefined colors. Contains 80 colors in total. - - Args - label: The label to get the color for. - - Returns - A list of three values representing a RGB color. - - If no color is defined for a certain label, the color green is returned and a warning is printed. - """ - if label < len(colors): - return colors[label] - else: - warnings.warn( - 'Label {} has no color, returning default.'.format(label)) - return (0, 255, 0) - - -""" -Generated using: - -``` -colors = [list((matplotlib.colors.hsv_to_rgb([x, 1.0, 1.0]) * 255).astype(int)) for x in np.arange(0, 1, 1.0 / 80)] -shuffle(colors) -pprint(colors) -``` -""" -colors = [ - [31, 0, 255], - [0, 159, 255], - [255, 95, 0], - [255, 19, 0], - [255, 0, 0], - [255, 38, 0], - [0, 255, 25], - [255, 0, 133], - [255, 172, 0], - [108, 0, 255], - [0, 82, 255], - [0, 255, 6], - [255, 0, 152], - [223, 0, 255], - [12, 0, 255], - [0, 255, 178], - [108, 255, 0], - [184, 0, 255], - [255, 0, 76], - [146, 255, 0], - [51, 0, 255], - [0, 197, 255], - [255, 248, 0], - [255, 0, 19], - [255, 0, 38], - [89, 255, 0], - [127, 255, 0], - [255, 153, 0], - [0, 255, 255], - [0, 255, 216], - [0, 255, 121], - [255, 0, 248], - [70, 0, 255], - [0, 255, 159], - [0, 216, 255], - [0, 6, 255], - [0, 63, 255], - [31, 255, 0], - [255, 57, 0], - [255, 0, 210], - [0, 255, 102], - [242, 255, 0], - [255, 191, 0], - [0, 255, 63], - [255, 0, 95], - [146, 0, 255], - [184, 255, 0], - [255, 114, 0], - [0, 255, 235], - [255, 229, 0], - [0, 178, 255], - [255, 0, 114], - [255, 0, 57], - [0, 140, 255], - [0, 121, 255], - [12, 255, 0], - [255, 210, 0], - [0, 255, 44], - [165, 255, 0], - [0, 25, 255], - [0, 255, 140], - [0, 101, 255], - [0, 255, 82], - [223, 255, 0], - [242, 0, 255], - [89, 0, 255], - [165, 0, 255], - [70, 255, 0], - [255, 0, 172], - [255, 76, 0], - [203, 255, 0], - [204, 0, 255], - [255, 0, 229], - [255, 133, 0], - [127, 0, 255], - [0, 235, 255], - [0, 255, 197], - [255, 0, 191], - [0, 44, 255], - [50, 255, 0] -] -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import configparser -import numpy as np -import keras -from ..utils.anchors import AnchorParameters - - -def read_config_file(config_path): - config = configparser.ConfigParser() - config.read(config_path) - - return config - - -def parse_anchor_parameters(config): - ratios = np.array(list(map(float, config['anchor_parameters']['ratios'].split( - ' '))), keras.backend.floatx()) - scales = np.array(list(map(float, config['anchor_parameters']['scales'].split( - ' '))), keras.backend.floatx()) - sizes = list(map(int, config['anchor_parameters']['sizes'].split(' '))) - strides = list(map(int, config['anchor_parameters']['strides'].split(' '))) - - return AnchorParameters(sizes, strides, ratios, scales) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from .anchors import compute_overlap -from .visualization import draw_detections, draw_annotations - -import keras -import numpy as np -import os - -import cv2 -import progressbar -assert(callable(progressbar.progressbar) - ), "Using wrong progressbar module, install 'progressbar2' instead." - - -def _compute_ap(recall, precision): - """ Compute the average precision, given the recall and precision curves. - - Code originally from https://github.com/rbgirshick/py-faster-rcnn. - - # Arguments - recall: The recall curve (list). - precision: The precision curve (list). - # Returns - The average precision as computed in py-faster-rcnn. - """ - # correct AP calculation - # first append sentinel values at the end - mrec = np.concatenate(([0.], recall, [1.])) - mpre = np.concatenate(([0.], precision, [0.])) - - # compute the precision envelope - for i in range(mpre.size - 1, 0, -1): - mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) - - # to calculate area under PR curve, look for points - # where X axis (recall) changes value - i = np.where(mrec[1:] != mrec[:-1])[0] - - # and sum (\Delta recall) * prec - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) - return ap - - -def _get_detections(generator, model, score_threshold=0.05, max_detections=100, save_path=None): - """ Get the detections from the model using the generator. - - The result is a list of lists such that the size is: - all_detections[num_images][num_classes] = detections[num_detections, 4 + num_classes] - - # Arguments - generator : The generator used to run images through the model. - model : The model to run on the images. - score_threshold : The score confidence threshold to use. - max_detections : The maximum number of detections to use per image. - save_path : The path to save the images with visualized detections to. - # Returns - A list of lists containing the detections for each image in the generator. - """ - all_detections = [[None for i in range(generator.num_classes( - )) if generator.has_label(i)] for j in range(generator.size())] - - for i in progressbar.progressbar(range(generator.size()), prefix='Running network: '): - raw_image = generator.load_image(i) - image = generator.preprocess_image(raw_image.copy()) - image, scale = generator.resize_image(image) - - if keras.backend.image_data_format() == 'channels_first': - image = image.transpose((2, 0, 1)) - - # run network - boxes, scores, labels = model.predict_on_batch( - np.expand_dims(image, axis=0))[:3] - - # correct boxes for image scale - boxes /= scale - - # select indices which have a score above the threshold - indices = np.where(scores[0, :] > score_threshold)[0] - - # select those scores - scores = scores[0][indices] - - # find the order with which to sort the scores - scores_sort = np.argsort(-scores)[:max_detections] - - # select detections - image_boxes = boxes[0, indices[scores_sort], :] - image_scores = scores[scores_sort] - image_labels = labels[0, indices[scores_sort]] - image_detections = np.concatenate([image_boxes, np.expand_dims( - image_scores, axis=1), np.expand_dims(image_labels, axis=1)], axis=1) - - if save_path is not None: - draw_annotations(raw_image, generator.load_annotations( - i), label_to_name=generator.label_to_name) - draw_detections(raw_image, image_boxes, image_scores, - image_labels, label_to_name=generator.label_to_name) - - cv2.imwrite(os.path.join(save_path, '{}.png'.format(i)), raw_image) - - # copy detections to all_detections - for label in range(generator.num_classes()): - if not generator.has_label(label): - continue - - all_detections[i][label] = image_detections[image_detections[:, -1] == label, :-1] - - return all_detections - - -def _get_annotations(generator): - """ Get the ground truth annotations from the generator. - - The result is a list of lists such that the size is: - all_detections[num_images][num_classes] = annotations[num_detections, 5] - - # Arguments - generator : The generator used to retrieve ground truth annotations. - # Returns - A list of lists containing the annotations for each image in the generator. - """ - all_annotations = [[None for i in range( - generator.num_classes())] for j in range(generator.size())] - - for i in progressbar.progressbar(range(generator.size()), prefix='Parsing annotations: '): - # load the annotations - annotations = generator.load_annotations(i) - - # copy detections to all_annotations - for label in range(generator.num_classes()): - if not generator.has_label(label): - continue - - all_annotations[i][label] = annotations['bboxes'][annotations['labels'] - == label, :].copy() - - return all_annotations - - -def evaluate( - generator, - model, - iou_threshold=0.5, - score_threshold=0.05, - max_detections=100, - save_path=None -): - """ Evaluate a given dataset using a given model. - - # Arguments - generator : The generator that represents the dataset to evaluate. - model : The model to evaluate. - iou_threshold : The threshold used to consider when a detection is positive or negative. - score_threshold : The score confidence threshold to use for detections. - max_detections : The maximum number of detections to use per image. - save_path : The path to save images with visualized detections to. - # Returns - A dict mapping class names to mAP scores. - """ - # gather all detections and annotations - all_detections = _get_detections( - generator, model, score_threshold=score_threshold, max_detections=max_detections, save_path=save_path) - all_annotations = _get_annotations(generator) - average_precisions = {} - - # all_detections = pickle.load(open('all_detections.pkl', 'rb')) - # all_annotations = pickle.load(open('all_annotations.pkl', 'rb')) - # pickle.dump(all_detections, open('all_detections.pkl', 'wb')) - # pickle.dump(all_annotations, open('all_annotations.pkl', 'wb')) - - # process detections and annotations - for label in range(generator.num_classes()): - if not generator.has_label(label): - continue - - false_positives = np.zeros((0,)) - true_positives = np.zeros((0,)) - scores = np.zeros((0,)) - num_annotations = 0.0 - - for i in range(generator.size()): - detections = all_detections[i][label] - annotations = all_annotations[i][label] - num_annotations += annotations.shape[0] - detected_annotations = [] - - for d in detections: - scores = np.append(scores, d[4]) - - if annotations.shape[0] == 0: - false_positives = np.append(false_positives, 1) - true_positives = np.append(true_positives, 0) - continue - - overlaps = compute_overlap( - np.expand_dims(d, axis=0), annotations) - assigned_annotation = np.argmax(overlaps, axis=1) - max_overlap = overlaps[0, assigned_annotation] - - if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations: - false_positives = np.append(false_positives, 0) - true_positives = np.append(true_positives, 1) - detected_annotations.append(assigned_annotation) - else: - false_positives = np.append(false_positives, 1) - true_positives = np.append(true_positives, 0) - - # no annotations -> AP for this class is 0 (is this correct?) - if num_annotations == 0: - average_precisions[label] = 0, 0 - continue - - # sort by score - indices = np.argsort(-scores) - false_positives = false_positives[indices] - true_positives = true_positives[indices] - - # compute false positives and true positives - false_positives = np.cumsum(false_positives) - true_positives = np.cumsum(true_positives) - - # compute recall and precision - recall = true_positives / num_annotations - precision = true_positives / \ - np.maximum(true_positives + false_positives, - np.finfo(np.float64).eps) - - # compute average precision - average_precision = _compute_ap(recall, precision) - average_precisions[label] = average_precision, num_annotations - - return average_precisions -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from __future__ import division -import numpy as np -import cv2 -from PIL import Image - -from .transform import change_transform_origin - - -def read_image_bgr(path): - """ Read an image in BGR format. - - Args - path: Path to the image. - """ - image = np.asarray(Image.open(path).convert('RGB')) - return image[:, :, ::-1].copy() - - -def preprocess_image(x, mode='caffe'): - """ Preprocess an image by subtracting the ImageNet mean. - - Args - x: np.array of shape (None, None, 3) or (3, None, None). - mode: One of "caffe" or "tf". - - caffe: will zero-center each color channel with - respect to the ImageNet dataset, without scaling. - - tf: will scale pixels between -1 and 1, sample-wise. - - Returns - The input with the ImageNet mean subtracted. - """ - # mostly identical to "https://github.com/keras-team/keras-applications/blob/master/keras_applications/imagenet_utils.py" - # except for converting RGB -> BGR since we assume BGR already - - # covert always to float32 to keep compatibility with opencv - x = x.astype(np.float32) - - if mode == 'tf': - x /= 127.5 - x -= 1. - elif mode == 'caffe': - x[..., 0] -= 103.939 - x[..., 1] -= 116.779 - x[..., 2] -= 123.68 - - return x - - -def adjust_transform_for_image(transform, image, relative_translation): - """ Adjust a transformation for a specific image. - - The translation of the matrix will be scaled with the size of the image. - The linear part of the transformation will adjusted so that the origin of the transformation will be at the center of the image. - """ - height, width, channels = image.shape - - result = transform - - # Scale the translation with the image size if specified. - if relative_translation: - result[0:2, 2] *= [width, height] - - # Move the origin of transformation. - result = change_transform_origin(transform, (0.5 * width, 0.5 * height)) - - return result - - -class TransformParameters: - """ Struct holding parameters determining how to apply a transformation to an image. - - Args - fill_mode: One of: 'constant', 'nearest', 'reflect', 'wrap' - interpolation: One of: 'nearest', 'linear', 'cubic', 'area', 'lanczos4' - cval: Fill value to use with fill_mode='constant' - relative_translation: If true (the default), interpret translation as a factor of the image size. - If false, interpret it as absolute pixels. - """ - - def __init__( - self, - fill_mode='nearest', - interpolation='linear', - cval=0, - relative_translation=True, - ): - self.fill_mode = fill_mode - self.cval = cval - self.interpolation = interpolation - self.relative_translation = relative_translation - - def cvBorderMode(self): - if self.fill_mode == 'constant': - return cv2.BORDER_CONSTANT - if self.fill_mode == 'nearest': - return cv2.BORDER_REPLICATE - if self.fill_mode == 'reflect': - return cv2.BORDER_REFLECT_101 - if self.fill_mode == 'wrap': - return cv2.BORDER_WRAP - - def cvInterpolation(self): - if self.interpolation == 'nearest': - return cv2.INTER_NEAREST - if self.interpolation == 'linear': - return cv2.INTER_LINEAR - if self.interpolation == 'cubic': - return cv2.INTER_CUBIC - if self.interpolation == 'area': - return cv2.INTER_AREA - if self.interpolation == 'lanczos4': - return cv2.INTER_LANCZOS4 - - -def apply_transform(matrix, image, params): - """ - Apply a transformation to an image. - - The origin of transformation is at the top left corner of the image. - - The matrix is interpreted such that a point (x, y) on the original image is moved to transform * (x, y) in the generated image. - Mathematically speaking, that means that the matrix is a transformation from the transformed image space to the original image space. - - Args - matrix: A homogeneous 3 by 3 matrix holding representing the transformation to apply. - image: The image to transform. - params: The transform parameters (see TransformParameters) - """ - output = cv2.warpAffine( - image, - matrix[:2, :], - dsize=(image.shape[1], image.shape[0]), - flags=params.cvInterpolation(), - borderMode=params.cvBorderMode(), - borderValue=params.cval, - ) - return output - - -def compute_resize_scale(image_shape, min_side=800, max_side=1333): - """ Compute an image scale such that the image size is constrained to min_side and max_side. - - Args - min_side: The image's min side will be equal to min_side after resizing. - max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side. - - Returns - A resizing scale. - """ - (rows, cols, _) = image_shape - - smallest_side = min(rows, cols) - - # rescale the image so the smallest side is min_side - scale = min_side / smallest_side - - # check if the largest side is now greater than max_side, which can happen - # when images have a large aspect ratio - largest_side = max(rows, cols) - if largest_side * scale > max_side: - scale = max_side / largest_side - - return scale - - -def resize_image(img, min_side=800, max_side=1333): - """ Resize an image such that the size is constrained to min_side and max_side. - - Args - min_side: The image's min side will be equal to min_side after resizing. - max_side: If after resizing the image's max side is above max_side, resize until the max side is equal to max_side. - - Returns - A resized image. - """ - # compute scale to resize the image - scale = compute_resize_scale( - img.shape, min_side=min_side, max_side=max_side) - - # resize the image with the computed scale - img = cv2.resize(img, None, fx=scale, fy=scale) - - return img, scale -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from __future__ import print_function - -import keras -import sys - -minimum_keras_version = 2, 2, 4 - - -def keras_version(): - """ Get the Keras version. - - Returns - tuple of (major, minor, patch). - """ - return tuple(map(int, keras.__version__.split('.'))) - - -def keras_version_ok(): - """ Check if the current Keras version is higher than the minimum version. - """ - return keras_version() >= minimum_keras_version - - -def assert_keras_version(): - """ Assert that the Keras version is up to date. - """ - detected = keras.__version__ - required = '.'.join(map(str, minimum_keras_version)) - assert(keras_version() >= minimum_keras_version), 'You are using keras version {}. The minimum required version is {}.'.format( - detected, required) - - -def check_keras_version(): - """ Check that the Keras version is up to date. If it isn't, print an error message and exit the script. - """ - try: - assert_keras_version() - except AssertionError as e: - print(e, file=sys.stderr) - sys.exit(1) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - - -def freeze(model): - """ Set all layers in a model to non-trainable. - - The weights for these layers will not be updated during training. - - This function modifies the given model in-place, - but it also returns the modified model to allow easy chaining with other functions. - """ - for layer in model.layers: - layer.trainable = False - return model -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import numpy as np - -DEFAULT_PRNG = np.random - - -def colvec(*args): - """ Create a numpy array representing a column vector. """ - return np.array([args]).T - - -def transform_aabb(transform, aabb): - """ Apply a transformation to an axis aligned bounding box. - - The result is a new AABB in the same coordinate system as the original AABB. - The new AABB contains all corner points of the original AABB after applying the given transformation. - - Args - transform: The transformation to apply. - x1: The minimum x value of the AABB. - y1: The minimum y value of the AABB. - x2: The maximum x value of the AABB. - y2: The maximum y value of the AABB. - Returns - The new AABB as tuple (x1, y1, x2, y2) - """ - x1, y1, x2, y2 = aabb - # Transform all 4 corners of the AABB. - points = transform.dot([ - [x1, x2, x1, x2], - [y1, y2, y2, y1], - [1, 1, 1, 1], - ]) - - # Extract the min and max corners again. - min_corner = points.min(axis=1) - max_corner = points.max(axis=1) - - return [min_corner[0], min_corner[1], max_corner[0], max_corner[1]] - - -def _random_vector(min, max, prng=DEFAULT_PRNG): - """ Construct a random vector between min and max. - Args - min: the minimum value for each component - max: the maximum value for each component - """ - min = np.array(min) - max = np.array(max) - assert min.shape == max.shape - assert len(min.shape) == 1 - return prng.uniform(min, max) - - -def rotation(angle): - """ Construct a homogeneous 2D rotation matrix. - Args - angle: the angle in radians - Returns - the rotation matrix as 3 by 3 numpy array - """ - return np.array([ - [np.cos(angle), -np.sin(angle), 0], - [np.sin(angle), np.cos(angle), 0], - [0, 0, 1] - ]) - - -def random_rotation(min, max, prng=DEFAULT_PRNG): - """ Construct a random rotation between -max and max. - Args - min: a scalar for the minimum absolute angle in radians - max: a scalar for the maximum absolute angle in radians - prng: the pseudo-random number generator to use. - Returns - a homogeneous 3 by 3 rotation matrix - """ - return rotation(prng.uniform(min, max)) - - -def translation(translation): - """ Construct a homogeneous 2D translation matrix. - # Arguments - translation: the translation 2D vector - # Returns - the translation matrix as 3 by 3 numpy array - """ - return np.array([ - [1, 0, translation[0]], - [0, 1, translation[1]], - [0, 0, 1] - ]) - - -def random_translation(min, max, prng=DEFAULT_PRNG): - """ Construct a random 2D translation between min and max. - Args - min: a 2D vector with the minimum translation for each dimension - max: a 2D vector with the maximum translation for each dimension - prng: the pseudo-random number generator to use. - Returns - a homogeneous 3 by 3 translation matrix - """ - return translation(_random_vector(min, max, prng)) - - -def shear(angle): - """ Construct a homogeneous 2D shear matrix. - Args - angle: the shear angle in radians - Returns - the shear matrix as 3 by 3 numpy array - """ - return np.array([ - [1, -np.sin(angle), 0], - [0, np.cos(angle), 0], - [0, 0, 1] - ]) - - -def random_shear(min, max, prng=DEFAULT_PRNG): - """ Construct a random 2D shear matrix with shear angle between -max and max. - Args - min: the minimum shear angle in radians. - max: the maximum shear angle in radians. - prng: the pseudo-random number generator to use. - Returns - a homogeneous 3 by 3 shear matrix - """ - return shear(prng.uniform(min, max)) - - -def scaling(factor): - """ Construct a homogeneous 2D scaling matrix. - Args - factor: a 2D vector for X and Y scaling - Returns - the zoom matrix as 3 by 3 numpy array - """ - return np.array([ - [factor[0], 0, 0], - [0, factor[1], 0], - [0, 0, 1] - ]) - - -def random_scaling(min, max, prng=DEFAULT_PRNG): - """ Construct a random 2D scale matrix between -max and max. - Args - min: a 2D vector containing the minimum scaling factor for X and Y. - min: a 2D vector containing The maximum scaling factor for X and Y. - prng: the pseudo-random number generator to use. - Returns - a homogeneous 3 by 3 scaling matrix - """ - return scaling(_random_vector(min, max, prng)) - - -def random_flip(flip_x_chance, flip_y_chance, prng=DEFAULT_PRNG): - """ Construct a transformation randomly containing X/Y flips (or not). - Args - flip_x_chance: The chance that the result will contain a flip along the X axis. - flip_y_chance: The chance that the result will contain a flip along the Y axis. - prng: The pseudo-random number generator to use. - Returns - a homogeneous 3 by 3 transformation matrix - """ - flip_x = prng.uniform(0, 1) < flip_x_chance - flip_y = prng.uniform(0, 1) < flip_y_chance - # 1 - 2 * bool gives 1 for False and -1 for True. - return scaling((1 - 2 * flip_x, 1 - 2 * flip_y)) - - -def change_transform_origin(transform, center): - """ Create a new transform representing the same transformation, - only with the origin of the linear part changed. - Args - transform: the transformation matrix - center: the new origin of the transformation - Returns - translate(center) * transform * translate(-center) - """ - center = np.array(center) - return np.linalg.multi_dot([translation(center), transform, translation(-center)]) - - -def random_transform( - min_rotation=0, - max_rotation=0, - min_translation=(0, 0), - max_translation=(0, 0), - min_shear=0, - max_shear=0, - min_scaling=(1, 1), - max_scaling=(1, 1), - flip_x_chance=0, - flip_y_chance=0, - prng=DEFAULT_PRNG -): - """ Create a random transformation. - - The transformation consists of the following operations in this order (from left to right): - * rotation - * translation - * shear - * scaling - * flip x (if applied) - * flip y (if applied) - - Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation - as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width. - Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret - the translation directly as pixel distances instead. - - Args - min_rotation: The minimum rotation in radians for the transform as scalar. - max_rotation: The maximum rotation in radians for the transform as scalar. - min_translation: The minimum translation for the transform as 2D column vector. - max_translation: The maximum translation for the transform as 2D column vector. - min_shear: The minimum shear angle for the transform in radians. - max_shear: The maximum shear angle for the transform in radians. - min_scaling: The minimum scaling for the transform as 2D column vector. - max_scaling: The maximum scaling for the transform as 2D column vector. - flip_x_chance: The chance (0 to 1) that a transform will contain a flip along X direction. - flip_y_chance: The chance (0 to 1) that a transform will contain a flip along Y direction. - prng: The pseudo-random number generator to use. - """ - return np.linalg.multi_dot([ - random_rotation(min_rotation, max_rotation, prng), - random_translation(min_translation, max_translation, prng), - random_shear(min_shear, max_shear, prng), - random_scaling(min_scaling, max_scaling, prng), - random_flip(flip_x_chance, flip_y_chance, prng) - ]) - - -def random_transform_generator(prng=None, **kwargs): - """ Create a random transform generator. - - Uses a dedicated, newly created, properly seeded PRNG by default instead of the global DEFAULT_PRNG. - - The transformation consists of the following operations in this order (from left to right): - * rotation - * translation - * shear - * scaling - * flip x (if applied) - * flip y (if applied) - - Note that by default, the data generators in `keras_retinanet.preprocessing.generators` interpret the translation - as factor of the image size. So an X translation of 0.1 would translate the image by 10% of it's width. - Set `relative_translation` to `False` in the `TransformParameters` of a data generator to have it interpret - the translation directly as pixel distances instead. - - Args - min_rotation: The minimum rotation in radians for the transform as scalar. - max_rotation: The maximum rotation in radians for the transform as scalar. - min_translation: The minimum translation for the transform as 2D column vector. - max_translation: The maximum translation for the transform as 2D column vector. - min_shear: The minimum shear angle for the transform in radians. - max_shear: The maximum shear angle for the transform in radians. - min_scaling: The minimum scaling for the transform as 2D column vector. - max_scaling: The maximum scaling for the transform as 2D column vector. - flip_x_chance: The chance (0 to 1) that a transform will contain a flip along X direction. - flip_y_chance: The chance (0 to 1) that a transform will contain a flip along Y direction. - prng: The pseudo-random number generator to use. - """ - - if prng is None: - # RandomState automatically seeds using the best available method. - prng = np.random.RandomState() - - while True: - yield random_transform(prng=prng, **kwargs) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import cv2 -import numpy as np - -from .colors import label_color - - -def draw_box(image, box, color, thickness=2): - """ Draws a box on an image with a given color. - - # Arguments - image : The image to draw on. - box : A list of 4 elements (x1, y1, x2, y2). - color : The color of the box. - thickness : The thickness of the lines to draw a box with. - """ - b = np.array(box).astype(int) - cv2.rectangle(image, (b[0], b[1]), (b[2], b[3]), - color, thickness, cv2.LINE_AA) - - -def draw_caption(image, box, caption): - """ Draws a caption above the box in an image. - - # Arguments - image : The image to draw on. - box : A list of 4 elements (x1, y1, x2, y2). - caption : String containing the text to draw. - """ - b = np.array(box).astype(int) - cv2.putText(image, caption, (b[0], b[1] - 10), - cv2.FONT_HERSHEY_PLAIN, 1, (0, 0, 0), 2) - cv2.putText(image, caption, (b[0], b[1] - 10), - cv2.FONT_HERSHEY_PLAIN, 1, (255, 255, 255), 1) - - -def draw_boxes(image, boxes, color, thickness=2): - """ Draws boxes on an image with a given color. - - # Arguments - image : The image to draw on. - boxes : A [N, 4] matrix (x1, y1, x2, y2). - color : The color of the boxes. - thickness : The thickness of the lines to draw boxes with. - """ - for b in boxes: - draw_box(image, b, color, thickness=thickness) - - -def draw_detections(image, boxes, scores, labels, color=None, label_to_name=None, score_threshold=0.5): - """ Draws detections in an image. - - # Arguments - image : The image to draw on. - boxes : A [N, 4] matrix (x1, y1, x2, y2). - scores : A list of N classification scores. - labels : A list of N labels. - color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. - label_to_name : (optional) Functor for mapping a label to a name. - score_threshold : Threshold used for determining what detections to draw. - """ - selection = np.where(scores > score_threshold)[0] - - for i in selection: - c = color if color is not None else label_color(labels[i]) - draw_box(image, boxes[i, :], color=c) - - # draw labels - caption = (label_to_name( - labels[i]) if label_to_name else labels[i]) + ': {0:.2f}'.format(scores[i]) - draw_caption(image, boxes[i, :], caption) - - -def draw_annotations(image, annotations, color=(0, 255, 0), label_to_name=None): - """ Draws annotations in an image. - - # Arguments - image : The image to draw on. - annotations : A [N, 5] matrix (x1, y1, x2, y2, label) or dictionary containing bboxes (shaped [N, 4]) and labels (shaped [N]). - color : The color of the boxes. By default the color from keras_retinanet.utils.colors.label_color will be used. - label_to_name : (optional) Functor for mapping a label to a name. - """ - if isinstance(annotations, np.ndarray): - annotations = { - 'bboxes': annotations[:, :4], 'labels': annotations[:, 4]} - - assert('bboxes' in annotations) - assert('labels' in annotations) - assert(annotations['bboxes'].shape[0] == annotations['labels'].shape[0]) - - for i in range(annotations['bboxes'].shape[0]): - label = annotations['labels'][i] - c = color if color is not None else label_color(label) - caption = '{}'.format(label_to_name(label) if label_to_name else label) - draw_caption(image, annotations['bboxes'][i], caption) - draw_box(image, annotations['bboxes'][i], color=c) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import numpy as np -import keras -import keras_retinanet.backend - - -def test_bbox_transform_inv(): - boxes = np.array([[ - [100, 100, 200, 200], - [100, 100, 300, 300], - [100, 100, 200, 300], - [100, 100, 300, 200], - [80, 120, 200, 200], - [80, 120, 300, 300], - [80, 120, 200, 300], - [80, 120, 300, 200], - ]]) - boxes = keras.backend.variable(boxes) - - deltas = np.array([[ - [0, 0, 0, 0], - [0, 0.1, 0, 0], - [-0.3, 0, 0, 0], - [0.2, 0.2, 0, 0], - [0, 0, 0.1, 0], - [0, 0, 0, -0.3], - [0, 0, 0.2, 0.2], - [0.1, 0.2, -0.3, 0.4], - ]]) - deltas = keras.backend.variable(deltas) - - expected = np.array([[ - [100, 100, 200, 200], - [100, 104, 300, 300], - [94, 100, 200, 300], - [108, 104, 300, 200], - [80, 120, 202.4, 200], - [80, 120, 300, 289.2], - [80, 120, 204.8, 307.2], - [84.4, 123.2, 286.8, 206.4] - ]]) - - result = keras_retinanet.backend.bbox_transform_inv(boxes, deltas) - result = keras.backend.eval(result) - - np.testing.assert_array_almost_equal(result, expected, decimal=2) - - -def test_shift(): - shape = (2, 3) - stride = 8 - - anchors = np.array([ - [-8, -8, 8, 8], - [-16, -16, 16, 16], - [-12, -12, 12, 12], - [-12, -16, 12, 16], - [-16, -12, 16, 12] - ], dtype=keras.backend.floatx()) - - expected = [ - # anchors for (0, 0) - [4 - 8, 4 - 8, 4 + 8, 4 + 8], - [4 - 16, 4 - 16, 4 + 16, 4 + 16], - [4 - 12, 4 - 12, 4 + 12, 4 + 12], - [4 - 12, 4 - 16, 4 + 12, 4 + 16], - [4 - 16, 4 - 12, 4 + 16, 4 + 12], - - # anchors for (0, 1) - [12 - 8, 4 - 8, 12 + 8, 4 + 8], - [12 - 16, 4 - 16, 12 + 16, 4 + 16], - [12 - 12, 4 - 12, 12 + 12, 4 + 12], - [12 - 12, 4 - 16, 12 + 12, 4 + 16], - [12 - 16, 4 - 12, 12 + 16, 4 + 12], - - # anchors for (0, 2) - [20 - 8, 4 - 8, 20 + 8, 4 + 8], - [20 - 16, 4 - 16, 20 + 16, 4 + 16], - [20 - 12, 4 - 12, 20 + 12, 4 + 12], - [20 - 12, 4 - 16, 20 + 12, 4 + 16], - [20 - 16, 4 - 12, 20 + 16, 4 + 12], - - # anchors for (1, 0) - [4 - 8, 12 - 8, 4 + 8, 12 + 8], - [4 - 16, 12 - 16, 4 + 16, 12 + 16], - [4 - 12, 12 - 12, 4 + 12, 12 + 12], - [4 - 12, 12 - 16, 4 + 12, 12 + 16], - [4 - 16, 12 - 12, 4 + 16, 12 + 12], - - # anchors for (1, 1) - [12 - 8, 12 - 8, 12 + 8, 12 + 8], - [12 - 16, 12 - 16, 12 + 16, 12 + 16], - [12 - 12, 12 - 12, 12 + 12, 12 + 12], - [12 - 12, 12 - 16, 12 + 12, 12 + 16], - [12 - 16, 12 - 12, 12 + 16, 12 + 12], - - # anchors for (1, 2) - [20 - 8, 12 - 8, 20 + 8, 12 + 8], - [20 - 16, 12 - 16, 20 + 16, 12 + 16], - [20 - 12, 12 - 12, 20 + 12, 12 + 12], - [20 - 12, 12 - 16, 20 + 12, 12 + 16], - [20 - 16, 12 - 12, 20 + 16, 12 + 12], - ] - - result = keras_retinanet.backend.shift(shape, stride, anchors) - result = keras.backend.eval(result) - - np.testing.assert_array_equal(result, expected) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras_retinanet.bin.train -import keras.backend - -import warnings - -import pytest - - -@pytest.fixture(autouse=True) -def clear_session(): - # run before test (do nothing) - yield - # run after test, clear keras session - keras.backend.clear_session() - - -def test_coco(): - # ignore warnings in this test - warnings.simplefilter('ignore') - - # run training / evaluation - keras_retinanet.bin.train.main([ - '--epochs=1', - '--steps=1', - '--no-weights', - '--no-snapshots', - 'coco', - 'tests/test-data/coco', - ]) - - -def test_pascal(): - # ignore warnings in this test - warnings.simplefilter('ignore') - - # run training / evaluation - keras_retinanet.bin.train.main([ - '--epochs=1', - '--steps=1', - '--no-weights', - '--no-snapshots', - 'pascal', - 'tests/test-data/pascal', - ]) - - -def test_csv(): - # ignore warnings in this test - warnings.simplefilter('ignore') - - # run training / evaluation - keras_retinanet.bin.train.main([ - '--epochs=1', - '--steps=1', - '--no-weights', - '--no-snapshots', - 'csv', - 'tests/test-data/csv/annotations.csv', - 'tests/test-data/csv/classes.csv', - ]) - - -def test_vgg(): - # ignore warnings in this test - warnings.simplefilter('ignore') - - # run training / evaluation - keras_retinanet.bin.train.main([ - '--backbone=vgg16', - '--epochs=1', - '--steps=1', - '--no-weights', - '--no-snapshots', - '--freeze-backbone', - 'coco', - 'tests/test-data/coco', - ]) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -import keras_retinanet.layers - -import numpy as np - - -class TestFilterDetections(object): - def test_simple(self): - # create simple FilterDetections layer - filter_detections_layer = keras_retinanet.layers.FilterDetections() - - # create simple input - boxes = np.array([[ - [0, 0, 10, 10], - [0, 0, 10, 10], # this will be suppressed - ]], dtype=keras.backend.floatx()) - boxes = keras.backend.constant(boxes) - - classification = np.array([[ - [0, 0.9], # this will be suppressed - [0, 1], - ]], dtype=keras.backend.floatx()) - classification = keras.backend.constant(classification) - - # compute output - actual_boxes, actual_scores, actual_labels = filter_detections_layer.call([ - boxes, classification]) - actual_boxes = keras.backend.eval(actual_boxes) - actual_scores = keras.backend.eval(actual_scores) - actual_labels = keras.backend.eval(actual_labels) - - # define expected output - expected_boxes = -1 * \ - np.ones((1, 300, 4), dtype=keras.backend.floatx()) - expected_boxes[0, 0, :] = [0, 0, 10, 10] - - expected_scores = -1 * np.ones((1, 300), dtype=keras.backend.floatx()) - expected_scores[0, 0] = 1 - - expected_labels = -1 * np.ones((1, 300), dtype=keras.backend.floatx()) - expected_labels[0, 0] = 1 - - # assert actual and expected are equal - np.testing.assert_array_equal(actual_boxes, expected_boxes) - np.testing.assert_array_equal(actual_scores, expected_scores) - np.testing.assert_array_equal(actual_labels, expected_labels) - - def test_simple_with_other(self): - # create simple FilterDetections layer - filter_detections_layer = keras_retinanet.layers.FilterDetections() - - # create simple input - boxes = np.array([[ - [0, 0, 10, 10], - [0, 0, 10, 10], # this will be suppressed - ]], dtype=keras.backend.floatx()) - boxes = keras.backend.constant(boxes) - - classification = np.array([[ - [0, 0.9], # this will be suppressed - [0, 1], - ]], dtype=keras.backend.floatx()) - classification = keras.backend.constant(classification) - - other = [] - other.append(np.array([[ - [0, 1234], # this will be suppressed - [0, 5678], - ]], dtype=keras.backend.floatx())) - other.append(np.array([[ - 5678, # this will be suppressed - 1234, - ]], dtype=keras.backend.floatx())) - other = [keras.backend.constant(o) for o in other] - - # compute output - actual = filter_detections_layer.call([boxes, classification] + other) - actual_boxes = keras.backend.eval(actual[0]) - actual_scores = keras.backend.eval(actual[1]) - actual_labels = keras.backend.eval(actual[2]) - actual_other = [keras.backend.eval(a) for a in actual[3:]] - - # define expected output - expected_boxes = -1 * \ - np.ones((1, 300, 4), dtype=keras.backend.floatx()) - expected_boxes[0, 0, :] = [0, 0, 10, 10] - - expected_scores = -1 * np.ones((1, 300), dtype=keras.backend.floatx()) - expected_scores[0, 0] = 1 - - expected_labels = -1 * np.ones((1, 300), dtype=keras.backend.floatx()) - expected_labels[0, 0] = 1 - - expected_other = [] - expected_other.append(-1 * np.ones((1, 300, 2), - dtype=keras.backend.floatx())) - expected_other[-1][0, 0, :] = [0, 5678] - expected_other.append(-1 * np.ones((1, 300), - dtype=keras.backend.floatx())) - expected_other[-1][0, 0] = 1234 - - # assert actual and expected are equal - np.testing.assert_array_equal(actual_boxes, expected_boxes) - np.testing.assert_array_equal(actual_scores, expected_scores) - np.testing.assert_array_equal(actual_labels, expected_labels) - - for a, e in zip(actual_other, expected_other): - np.testing.assert_array_equal(a, e) - - def test_mini_batch(self): - # create simple FilterDetections layer - filter_detections_layer = keras_retinanet.layers.FilterDetections() - - # create input with batch_size=2 - boxes = np.array([ - [ - [0, 0, 10, 10], # this will be suppressed - [0, 0, 10, 10], - ], - [ - [100, 100, 150, 150], - [100, 100, 150, 150], # this will be suppressed - ], - ], dtype=keras.backend.floatx()) - boxes = keras.backend.constant(boxes) - - classification = np.array([ - [ - [0, 0.9], # this will be suppressed - [0, 1], - ], - [ - [1, 0], - [0.9, 0], # this will be suppressed - ], - ], dtype=keras.backend.floatx()) - classification = keras.backend.constant(classification) - - # compute output - actual_boxes, actual_scores, actual_labels = filter_detections_layer.call([ - boxes, classification]) - actual_boxes = keras.backend.eval(actual_boxes) - actual_scores = keras.backend.eval(actual_scores) - actual_labels = keras.backend.eval(actual_labels) - - # define expected output - expected_boxes = -1 * \ - np.ones((2, 300, 4), dtype=keras.backend.floatx()) - expected_boxes[0, 0, :] = [0, 0, 10, 10] - expected_boxes[1, 0, :] = [100, 100, 150, 150] - - expected_scores = -1 * np.ones((2, 300), dtype=keras.backend.floatx()) - expected_scores[0, 0] = 1 - expected_scores[1, 0] = 1 - - expected_labels = -1 * np.ones((2, 300), dtype=keras.backend.floatx()) - expected_labels[0, 0] = 1 - expected_labels[1, 0] = 0 - - # assert actual and expected are equal - np.testing.assert_array_equal(actual_boxes, expected_boxes) - np.testing.assert_array_equal(actual_scores, expected_scores) - np.testing.assert_array_equal(actual_labels, expected_labels) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import keras -import keras_retinanet.layers - -import numpy as np - - -class TestAnchors(object): - def test_simple(self): - # create simple Anchors layer - anchors_layer = keras_retinanet.layers.Anchors( - size=32, - stride=8, - ratios=np.array([1], keras.backend.floatx()), - scales=np.array([1], keras.backend.floatx()), - ) - - # create fake features input (only shape is used anyway) - features = np.zeros((1, 2, 2, 1024), dtype=keras.backend.floatx()) - features = keras.backend.variable(features) - - # call the Anchors layer - anchors = anchors_layer.call(features) - anchors = keras.backend.eval(anchors) - - # expected anchor values - expected = np.array([[ - [-12, -12, 20, 20], - [-4, -12, 28, 20], - [-12, -4, 20, 28], - [-4, -4, 28, 28], - ]], dtype=keras.backend.floatx()) - - # test anchor values - np.testing.assert_array_equal(anchors, expected) - - # mark test to fail - def test_mini_batch(self): - # create simple Anchors layer - anchors_layer = keras_retinanet.layers.Anchors( - size=32, - stride=8, - ratios=np.array([1], dtype=keras.backend.floatx()), - scales=np.array([1], dtype=keras.backend.floatx()), - ) - - # create fake features input with batch_size=2 - features = np.zeros((2, 2, 2, 1024), dtype=keras.backend.floatx()) - features = keras.backend.variable(features) - - # call the Anchors layer - anchors = anchors_layer.call(features) - anchors = keras.backend.eval(anchors) - - # expected anchor values - expected = np.array([[ - [-12, -12, 20, 20], - [-4, -12, 28, 20], - [-12, -4, 20, 28], - [-4, -4, 28, 28], - ]], dtype=keras.backend.floatx()) - expected = np.tile(expected, (2, 1, 1)) - - # test anchor values - np.testing.assert_array_equal(anchors, expected) - - -class TestUpsampleLike(object): - def test_simple(self): - # create simple UpsampleLike layer - upsample_like_layer = keras_retinanet.layers.UpsampleLike() - - # create input source - source = np.zeros((1, 2, 2, 1), dtype=keras.backend.floatx()) - source = keras.backend.variable(source) - target = np.zeros((1, 5, 5, 1), dtype=keras.backend.floatx()) - expected = target - target = keras.backend.variable(target) - - # compute output - actual = upsample_like_layer.call([source, target]) - actual = keras.backend.eval(actual) - - np.testing.assert_array_equal(actual, expected) - - def test_mini_batch(self): - # create simple UpsampleLike layer - upsample_like_layer = keras_retinanet.layers.UpsampleLike() - - # create input source - source = np.zeros((2, 2, 2, 1), dtype=keras.backend.floatx()) - source = keras.backend.variable(source) - - target = np.zeros((2, 5, 5, 1), dtype=keras.backend.floatx()) - expected = target - target = keras.backend.variable(target) - - # compute output - actual = upsample_like_layer.call([source, target]) - actual = keras.backend.eval(actual) - - np.testing.assert_array_equal(actual, expected) - - -class TestRegressBoxes(object): - def test_simple(self): - mean = [0, 0, 0, 0] - std = [0.2, 0.2, 0.2, 0.2] - - # create simple RegressBoxes layer - regress_boxes_layer = keras_retinanet.layers.RegressBoxes( - mean=mean, std=std) - - # create input - anchors = np.array([[ - [0, 0, 10, 10], - [50, 50, 100, 100], - [20, 20, 40, 40], - ]], dtype=keras.backend.floatx()) - anchors = keras.backend.variable(anchors) - - regression = np.array([[ - [0, 0, 0, 0], - [0.1, 0.1, 0, 0], - [0, 0, 0.1, 0.1], - ]], dtype=keras.backend.floatx()) - regression = keras.backend.variable(regression) - - # compute output - actual = regress_boxes_layer.call([anchors, regression]) - actual = keras.backend.eval(actual) - - # compute expected output - expected = np.array([[ - [0, 0, 10, 10], - [51, 51, 100, 100], - [20, 20, 40.4, 40.4], - ]], dtype=keras.backend.floatx()) - - np.testing.assert_array_almost_equal(actual, expected, decimal=2) - - # mark test to fail - def test_mini_batch(self): - mean = [0, 0, 0, 0] - std = [0.2, 0.2, 0.2, 0.2] - - # create simple RegressBoxes layer - regress_boxes_layer = keras_retinanet.layers.RegressBoxes( - mean=mean, std=std) - - # create input - anchors = np.array([ - [ - [0, 0, 10, 10], # 1 - [50, 50, 100, 100], # 2 - [20, 20, 40, 40], # 3 - ], - [ - [20, 20, 40, 40], # 3 - [0, 0, 10, 10], # 1 - [50, 50, 100, 100], # 2 - ], - ], dtype=keras.backend.floatx()) - anchors = keras.backend.variable(anchors) - - regression = np.array([ - [ - [0, 0, 0, 0], # 1 - [0.1, 0.1, 0, 0], # 2 - [0, 0, 0.1, 0.1], # 3 - ], - [ - [0, 0, 0.1, 0.1], # 3 - [0, 0, 0, 0], # 1 - [0.1, 0.1, 0, 0], # 2 - ], - ], dtype=keras.backend.floatx()) - regression = keras.backend.variable(regression) - - # compute output - actual = regress_boxes_layer.call([anchors, regression]) - actual = keras.backend.eval(actual) - - # compute expected output - expected = np.array([ - [ - [0, 0, 10, 10], # 1 - [51, 51, 100, 100], # 2 - [20, 20, 40.4, 40.4], # 3 - ], - [ - [20, 20, 40.4, 40.4], # 3 - [0, 0, 10, 10], # 1 - [51, 51, 100, 100], # 2 - ], - ], dtype=keras.backend.floatx()) - - np.testing.assert_array_almost_equal(actual, expected, decimal=2) -""" -Copyright 2018 vidosits (https://github.com/vidosits/) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import warnings -import pytest -import numpy as np -import keras -from keras_retinanet import losses -from keras_retinanet.models.densenet import DenseNetBackbone - -parameters = ['densenet121'] - - -@pytest.mark.parametrize("backbone", parameters) -def test_backbone(backbone): - # ignore warnings in this test - warnings.simplefilter('ignore') - - num_classes = 10 - - inputs = np.zeros((1, 200, 400, 3), dtype=np.float32) - targets = [np.zeros((1, 14814, 5), dtype=np.float32), - np.zeros((1, 14814, num_classes + 1))] - - inp = keras.layers.Input(inputs[0].shape) - - densenet_backbone = DenseNetBackbone(backbone) - model = densenet_backbone.retinanet(num_classes=num_classes, inputs=inp) - model.summary() - - # compile model - model.compile( - loss={ - 'regression': losses.smooth_l1(), - 'classification': losses.focal() - }, - optimizer=keras.optimizers.adam(lr=1e-5, clipnorm=0.001)) - - model.fit(inputs, targets, batch_size=1) -""" -Copyright 2017-2018 lvaleriu (https://github.com/lvaleriu/) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import warnings -import pytest -import numpy as np -import keras -from keras_retinanet import losses -from keras_retinanet.models.mobilenet import MobileNetBackbone - -alphas = ['1.0'] -parameters = [] - -for backbone in MobileNetBackbone.allowed_backbones: - for alpha in alphas: - parameters.append((backbone, alpha)) - - -@pytest.mark.parametrize("backbone, alpha", parameters) -def test_backbone(backbone, alpha): - # ignore warnings in this test - warnings.simplefilter('ignore') - - num_classes = 10 - - inputs = np.zeros((1, 1024, 363, 3), dtype=np.float32) - targets = [np.zeros((1, 68760, 5), dtype=np.float32), - np.zeros((1, 68760, num_classes + 1))] - - inp = keras.layers.Input(inputs[0].shape) - - mobilenet_backbone = MobileNetBackbone( - backbone='{}_{}'.format(backbone, format(alpha))) - training_model = mobilenet_backbone.retinanet( - num_classes=num_classes, inputs=inp) - training_model.summary() - - # compile model - training_model.compile( - loss={ - 'regression': losses.smooth_l1(), - 'classification': losses.focal() - }, - optimizer=keras.optimizers.adam(lr=1e-5, clipnorm=0.001)) - - training_model.fit(inputs, targets, batch_size=1) -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -import csv -import pytest -try: - from io import StringIO -except ImportError: - from stringio import StringIO - -from keras_retinanet.preprocessing import csv_generator - - -def csv_str(string): - if str == bytes: - string = string.decode('utf-8') - return csv.reader(StringIO(string)) - - -def annotation(x1, y1, x2, y2, class_name): - return {'x1': x1, 'y1': y1, 'x2': x2, 'y2': y2, 'class': class_name} - - -def test_read_classes(): - assert csv_generator._read_classes(csv_str('')) == {} - assert csv_generator._read_classes(csv_str('a,1')) == {'a': 1} - assert csv_generator._read_classes(csv_str('a,1\nb,2')) == {'a': 1, 'b': 2} - - -def test_read_classes_wrong_format(): - with pytest.raises(ValueError): - try: - csv_generator._read_classes(csv_str('a,b,c')) - except ValueError as e: - assert str(e).startswith('line 1: format should be') - raise - with pytest.raises(ValueError): - try: - csv_generator._read_classes(csv_str('a,1\nb,c,d')) - except ValueError as e: - assert str(e).startswith('line 2: format should be') - raise - - -def test_read_classes_malformed_class_id(): - with pytest.raises(ValueError): - try: - csv_generator._read_classes(csv_str('a,b')) - except ValueError as e: - assert str(e).startswith("line 1: malformed class ID:") - raise - - with pytest.raises(ValueError): - try: - csv_generator._read_classes(csv_str('a,1\nb,c')) - except ValueError as e: - assert str(e).startswith('line 2: malformed class ID:') - raise - - -def test_read_classes_duplicate_name(): - with pytest.raises(ValueError): - try: - csv_generator._read_classes(csv_str('a,1\nb,2\na,3')) - except ValueError as e: - assert str(e).startswith('line 3: duplicate class name') - raise - - -def test_read_annotations(): - classes = {'a': 1, 'b': 2, 'c': 4, 'd': 10} - annotations = csv_generator._read_annotations(csv_str( - 'a.png,0,1,2,3,a' '\n' - 'b.png,4,5,6,7,b' '\n' - 'c.png,8,9,10,11,c' '\n' - 'd.png,12,13,14,15,d' '\n' - ), classes) - assert annotations == { - 'a.png': [annotation(0, 1, 2, 3, 'a')], - 'b.png': [annotation(4, 5, 6, 7, 'b')], - 'c.png': [annotation(8, 9, 10, 11, 'c')], - 'd.png': [annotation(12, 13, 14, 15, 'd')], - } - - -def test_read_annotations_multiple(): - classes = {'a': 1, 'b': 2, 'c': 4, 'd': 10} - annotations = csv_generator._read_annotations(csv_str( - 'a.png,0,1,2,3,a' '\n' - 'b.png,4,5,6,7,b' '\n' - 'a.png,8,9,10,11,c' '\n' - ), classes) - assert annotations == { - 'a.png': [ - annotation(0, 1, 2, 3, 'a'), - annotation(8, 9, 10, 11, 'c'), - ], - 'b.png': [annotation(4, 5, 6, 7, 'b')], - } - - -def test_read_annotations_wrong_format(): - classes = {'a': 1, 'b': 2, 'c': 4, 'd': 10} - with pytest.raises(ValueError): - try: - csv_generator._read_annotations(csv_str('a.png,1,2,3,a'), classes) - except ValueError as e: - assert str(e).startswith("line 1: format should be") - raise - - with pytest.raises(ValueError): - try: - csv_generator._read_annotations(csv_str( - 'a.png,0,1,2,3,a' '\n' - 'a.png,1,2,3,a' '\n' - ), classes) - except ValueError as e: - assert str(e).startswith("line 2: format should be") - raise - - -def test_read_annotations_wrong_x1(): - with pytest.raises(ValueError): - try: - csv_generator._read_annotations( - csv_str('a.png,a,0,1,2,a'), {'a': 1}) - except ValueError as e: - assert str(e).startswith("line 1: malformed x1:") - raise - - -def test_read_annotations_wrong_y1(): - with pytest.raises(ValueError): - try: - csv_generator._read_annotations( - csv_str('a.png,0,a,1,2,a'), {'a': 1}) - except ValueError as e: - assert str(e).startswith("line 1: malformed y1:") - raise - - -def test_read_annotations_wrong_x2(): - with pytest.raises(ValueError): - try: - csv_generator._read_annotations( - csv_str('a.png,0,1,a,2,a'), {'a': 1}) - except ValueError as e: - assert str(e).startswith("line 1: malformed x2:") - raise - - -def test_read_annotations_wrong_y2(): - with pytest.raises(ValueError): - try: - csv_generator._read_annotations( - csv_str('a.png,0,1,2,a,a'), {'a': 1}) - except ValueError as e: - assert str(e).startswith("line 1: malformed y2:") - raise - - -def test_read_annotations_wrong_class(): - with pytest.raises(ValueError): - try: - csv_generator._read_annotations( - csv_str('a.png,0,1,2,3,g'), {'a': 1}) - except ValueError as e: - assert str(e).startswith("line 1: unknown class name:") - raise - - -def test_read_annotations_invalid_bb_x(): - with pytest.raises(ValueError): - try: - csv_generator._read_annotations( - csv_str('a.png,1,2,1,3,g'), {'a': 1}) - except ValueError as e: - assert str(e).startswith( - "line 1: x2 (1) must be higher than x1 (1)") - raise - with pytest.raises(ValueError): - try: - csv_generator._read_annotations( - csv_str('a.png,9,2,5,3,g'), {'a': 1}) - except ValueError as e: - assert str(e).startswith( - "line 1: x2 (5) must be higher than x1 (9)") - raise - - -def test_read_annotations_invalid_bb_y(): - with pytest.raises(ValueError): - try: - csv_generator._read_annotations( - csv_str('a.png,1,2,3,2,a'), {'a': 1}) - except ValueError as e: - assert str(e).startswith( - "line 1: y2 (2) must be higher than y1 (2)") - raise - with pytest.raises(ValueError): - try: - csv_generator._read_annotations( - csv_str('a.png,1,8,3,5,a'), {'a': 1}) - except ValueError as e: - assert str(e).startswith( - "line 1: y2 (5) must be higher than y1 (8)") - raise - - -def test_read_annotations_empty_image(): - # Check that images without annotations are parsed. - assert csv_generator._read_annotations(csv_str('a.png,,,,,\nb.png,,,,,'), { - 'a': 1}) == {'a.png': [], 'b.png': []} - - # Check that lines without annotations don't clear earlier annotations. - assert csv_generator._read_annotations(csv_str('a.png,0,1,2,3,a\na.png,,,,,'), { - 'a': 1}) == {'a.png': [annotation(0, 1, 2, 3, 'a')]} -""" -Copyright 2017-2018 Fizyr (https://fizyr.com) - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -""" - -from keras_retinanet.preprocessing.generator import Generator - -import numpy as np -import pytest - - -class SimpleGenerator(Generator): - def __init__(self, bboxes, labels, num_classes=0, image=None): - assert(len(bboxes) == len(labels)) - self.bboxes = bboxes - self.labels = labels - self.num_classes_ = num_classes - self.image = image - super(SimpleGenerator, self).__init__( - group_method='none', shuffle_groups=False) - - def num_classes(self): - return self.num_classes_ - - def load_image(self, image_index): - return self.image - - def size(self): - return len(self.bboxes) - - def load_annotations(self, image_index): - annotations = { - 'labels': self.labels[image_index], 'bboxes': self.bboxes[image_index]} - return annotations - - -class TestLoadAnnotationsGroup(object): - def test_simple(self): - input_bboxes_group = [ - np.array([ - [0, 0, 10, 10], - [150, 150, 350, 350] - ]), - ] - input_labels_group = [ - np.array([ - 1, - 3 - ]), - ] - expected_bboxes_group = input_bboxes_group - expected_labels_group = input_labels_group - - simple_generator = SimpleGenerator( - input_bboxes_group, input_labels_group) - annotations = simple_generator.load_annotations_group( - simple_generator.groups[0]) - - assert('bboxes' in annotations[0]) - assert('labels' in annotations[0]) - np.testing.assert_equal( - expected_bboxes_group[0], annotations[0]['bboxes']) - np.testing.assert_equal( - expected_labels_group[0], annotations[0]['labels']) - - def test_multiple(self): - input_bboxes_group = [ - np.array([ - [0, 0, 10, 10], - [150, 150, 350, 350] - ]), - np.array([ - [0, 0, 50, 50], - ]), - ] - input_labels_group = [ - np.array([ - 1, - 0 - ]), - np.array([ - 3 - ]) - ] - expected_bboxes_group = input_bboxes_group - expected_labels_group = input_labels_group - - simple_generator = SimpleGenerator( - input_bboxes_group, input_labels_group) - annotations_group_0 = simple_generator.load_annotations_group( - simple_generator.groups[0]) - annotations_group_1 = simple_generator.load_annotations_group( - simple_generator.groups[1]) - - assert('bboxes' in annotations_group_0[0]) - assert('bboxes' in annotations_group_1[0]) - assert('labels' in annotations_group_0[0]) - assert('labels' in annotations_group_1[0]) - np.testing.assert_equal( - expected_bboxes_group[0], annotations_group_0[0]['bboxes']) - np.testing.assert_equal( - expected_labels_group[0], annotations_group_0[0]['labels']) - np.testing.assert_equal( - expected_bboxes_group[1], annotations_group_1[0]['bboxes']) - np.testing.assert_equal( - expected_labels_group[1], annotations_group_1[0]['labels']) - - -class TestFilterAnnotations(object): - def test_simple_filter(self): - input_bboxes_group = [ - np.array([ - [0, 0, 10, 10], - [150, 150, 50, 50] - ]), - ] - input_labels_group = [ - np.array([ - 3, - 1 - ]), - ] - - input_image = np.zeros((500, 500, 3)) - - expected_bboxes_group = [ - np.array([ - [0, 0, 10, 10], - ]), - ] - expected_labels_group = [ - np.array([ - 3, - ]), - ] - - simple_generator = SimpleGenerator( - input_bboxes_group, input_labels_group) - annotations = simple_generator.load_annotations_group( - simple_generator.groups[0]) - # expect a UserWarning - with pytest.warns(UserWarning): - image_group, annotations_group = simple_generator.filter_annotations( - [input_image], annotations, simple_generator.groups[0]) - - np.testing.assert_equal( - expected_bboxes_group[0], annotations_group[0]['bboxes']) - np.testing.assert_equal( - expected_labels_group[0], annotations_group[0]['labels']) - - def test_multiple_filter(self): - input_bboxes_group = [ - np.array([ - [0, 0, 10, 10], - [150, 150, 50, 50], - [150, 150, 350, 350], - [350, 350, 150, 150], - [1, 1, 2, 2], - [2, 2, 1, 1] - ]), - np.array([ - [0, 0, -1, -1] - ]), - np.array([ - [-10, -10, 0, 0], - [-10, -10, -100, -100], - [10, 10, 100, 100] - ]), - np.array([ - [10, 10, 100, 100], - [10, 10, 600, 600] - ]), - ] - - input_labels_group = [ - np.array([ - 6, - 5, - 4, - 3, - 2, - 1 - ]), - np.array([ - 0 - ]), - np.array([ - 10, - 11, - 12 - ]), - np.array([ - 105, - 107 - ]), - ] - - input_image = np.zeros((500, 500, 3)) - - expected_bboxes_group = [ - np.array([ - [0, 0, 10, 10], - [150, 150, 350, 350], - [1, 1, 2, 2] - ]), - np.zeros((0, 4)), - np.array([ - [10, 10, 100, 100] - ]), - np.array([ - [10, 10, 100, 100] - ]), - ] - expected_labels_group = [ - np.array([ - 6, - 4, - 2 - ]), - np.zeros((0,)), - np.array([ - 12 - ]), - np.array([ - 105 - ]), - ] - - simple_generator = SimpleGenerator( - input_bboxes_group, input_labels_group) - # expect a UserWarning - annotations_group_0 = simple_generator.load_annotations_group( - simple_generator.groups[0]) - with pytest.warns(UserWarning): - image_group, annotations_group_0 = simple_generator.filter_annotations( - [input_image], annotations_group_0, simple_generator.groups[0]) - - annotations_group_1 = simple_generator.load_annotations_group( - simple_generator.groups[1]) - with pytest.warns(UserWarning): - image_group, annotations_group_1 = simple_generator.filter_annotations( - [input_image], annotations_group_1, simple_generator.groups[1]) - - annotations_group_2 = simple_generator.load_annotations_group( - simple_generator.groups[2]) - with pytest.warns(UserWarning): - image_group, annotations_group_2 = simple_generator.filter_annotations( - [input_image], annotations_group_2, simple_generator.groups[2]) - - np.testing.assert_equal( - expected_bboxes_group[0], annotations_group_0[0]['bboxes']) - np.testing.assert_equal( - expected_labels_group[0], annotations_group_0[0]['labels']) - - np.testing.assert_equal( - expected_bboxes_group[1], annotations_group_1[0]['bboxes']) - np.testing.assert_equal( - expected_labels_group[1], annotations_group_1[0]['labels']) - - np.testing.assert_equal( - expected_bboxes_group[2], annotations_group_2[0]['bboxes']) - np.testing.assert_equal( - expected_labels_group[2], annotations_group_2[0]['labels']) - - def test_complete(self): - input_bboxes_group = [ - np.array([ - [0, 0, 50, 50], - [150, 150, 50, 50], # invalid bbox - ], dtype=float) - ] - - input_labels_group = [ - np.array([ - 5, # one object of class 5 - 3, # one object of class 3 with an invalid box - ], dtype=float) - ] - - input_image = np.zeros((500, 500, 3), dtype=np.uint8) - - simple_generator = SimpleGenerator( - input_bboxes_group, input_labels_group, image=input_image, num_classes=6) - # expect a UserWarning - with pytest.warns(UserWarning): - _, [_, labels_batch] = simple_generator[0] - - # test that only object with class 5 is present in labels_batch - labels = np.unique(np.argmax(labels_batch == 5, axis=2)) - assert(len(labels) == 1 and labels[0] == - 0), 'Expected only class 0 to be present, but got classes {}'.format(labels) -import numpy as np -import configparser -import keras - -from keras_retinanet.utils.anchors import anchors_for_shape, AnchorParameters -from keras_retinanet.utils.config import read_config_file, parse_anchor_parameters - - -def test_config_read(): - config = read_config_file('tests/test-data/config/config.ini') - assert 'anchor_parameters' in config - assert 'sizes' in config['anchor_parameters'] - assert 'strides' in config['anchor_parameters'] - assert 'ratios' in config['anchor_parameters'] - assert 'scales' in config['anchor_parameters'] - assert config['anchor_parameters']['sizes'] == '32 64 128 256 512' - assert config['anchor_parameters']['strides'] == '8 16 32 64 128' - assert config['anchor_parameters']['ratios'] == '0.5 1 2 3' - assert config['anchor_parameters']['scales'] == '1 1.2 1.6' - - -def create_anchor_params_config(): - config = configparser.ConfigParser() - config['anchor_parameters'] = {} - config['anchor_parameters']['sizes'] = '32 64 128 256 512' - config['anchor_parameters']['strides'] = '8 16 32 64 128' - config['anchor_parameters']['ratios'] = '0.5 1' - config['anchor_parameters']['scales'] = '1 1.2 1.6' - - return config - - -def test_parse_anchor_parameters(): - config = create_anchor_params_config() - anchor_params_parsed = parse_anchor_parameters(config) - - sizes = [32, 64, 128, 256, 512] - strides = [8, 16, 32, 64, 128] - ratios = np.array([0.5, 1], keras.backend.floatx()) - scales = np.array([1, 1.2, 1.6], keras.backend.floatx()) - - assert sizes == anchor_params_parsed.sizes - assert strides == anchor_params_parsed.strides - np.testing.assert_equal(ratios, anchor_params_parsed.ratios) - np.testing.assert_equal(scales, anchor_params_parsed.scales) - - -def test_anchors_for_shape_dimensions(): - sizes = [32, 64, 128] - strides = [8, 16, 32] - ratios = np.array([0.5, 1, 2, 3], keras.backend.floatx()) - scales = np.array([1, 1.2, 1.6], keras.backend.floatx()) - anchor_params = AnchorParameters(sizes, strides, ratios, scales) - - pyramid_levels = [3, 4, 5] - image_shape = (64, 64) - all_anchors = anchors_for_shape( - image_shape, pyramid_levels=pyramid_levels, anchor_params=anchor_params) - - assert all_anchors.shape == (1008, 4) - - -def test_anchors_for_shape_values(): - sizes = [12] - strides = [8] - ratios = np.array([1, 2], keras.backend.floatx()) - scales = np.array([1, 2], keras.backend.floatx()) - anchor_params = AnchorParameters(sizes, strides, ratios, scales) - - pyramid_levels = [3] - image_shape = (16, 16) - all_anchors = anchors_for_shape( - image_shape, pyramid_levels=pyramid_levels, anchor_params=anchor_params) - - # using almost_equal for floating point imprecisions - np.testing.assert_almost_equal(all_anchors[0, :], [ - strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, - strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, - strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, - strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[1, :], [ - strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, - strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, - strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, - strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[2, :], [ - strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, - strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, - strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, - strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[3, :], [ - strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, - strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, - strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, - strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[4, :], [ - strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, - strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, - strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[5, :], [ - strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, - strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, - strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[6, :], [ - strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, - strides[0] / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, - strides[0] / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[7, :], [ - strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, - strides[0] / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, - strides[0] / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[8, :], [ - strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, - strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[9, :], [ - strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, - strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[10, :], [ - strides[0] / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, - strides[0] / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[11, :], [ - strides[0] / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, - strides[0] / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[12, :], [ - strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[0])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[13, :], [ - strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[0])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[0])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[14, :], [ - strides[0] * 3 / 2 - (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 - (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[0] / np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[0] * np.sqrt(ratios[1])) / 2, - ], decimal=6) - np.testing.assert_almost_equal(all_anchors[15, :], [ - strides[0] * 3 / 2 - (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 - (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[1] / np.sqrt(ratios[1])) / 2, - strides[0] * 3 / 2 + (sizes[0] * scales[1] * np.sqrt(ratios[1])) / 2, - ], decimal=6) -from setuptools import setup -from setuptools import find_packages - -install_requires = [ - 'Keras', - 'recurrentshop' -] - -setup( - name='seq2seq', - version='1.0.0', - description='Sequence to Sequence Learning with Keras', - author='Fariz Rahman', - author_email='farizrahman4u@gmail.com', - url='https://github.com/farizrahman4u/seq2seq', - license='GNU GPL v2', - install_requires=install_requires, - packages=find_packages(), - dependency_links=['git+git://github.com/datalogai/recurrentshop.git'] -) -from .cells import * -from .models import * -import recurrentshop -from recurrentshop.cells import * -from keras.models import Model -from keras.layers import Input, Dense, Lambda, Activation -from keras.layers import add, multiply, concatenate -from keras import backend as K - - -class LSTMDecoderCell(ExtendedRNNCell): - - def __init__(self, hidden_dim=None, **kwargs): - if hidden_dim: - self.hidden_dim = hidden_dim - else: - self.hidden_dim = self.output_dim - super(LSTMDecoderCell, self).__init__(**kwargs) - - def build_model(self, input_shape): - hidden_dim = self.hidden_dim - output_dim = self.output_dim - - x = Input(batch_shape=input_shape) - h_tm1 = Input(batch_shape=(input_shape[0], hidden_dim)) - c_tm1 = Input(batch_shape=(input_shape[0], hidden_dim)) - - W1 = Dense(hidden_dim * 4, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer, - use_bias=False) - W2 = Dense(output_dim, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer,) - U = Dense(hidden_dim * 4, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer,) - - z = add([W1(x), U(h_tm1)]) - - z0, z1, z2, z3 = get_slices(z, 4) - i = Activation(self.recurrent_activation)(z0) - f = Activation(self.recurrent_activation)(z1) - c = add([multiply([f, c_tm1]), multiply( - [i, Activation(self.activation)(z2)])]) - o = Activation(self.recurrent_activation)(z3) - h = multiply([o, Activation(self.activation)(c)]) - y = Activation(self.activation)(W2(h)) - - return Model([x, h_tm1, c_tm1], [y, h, c]) - - -class AttentionDecoderCell(ExtendedRNNCell): - - def __init__(self, hidden_dim=None, **kwargs): - if hidden_dim: - self.hidden_dim = hidden_dim - else: - self.hidden_dim = self.output_dim - self.input_ndim = 3 - super(AttentionDecoderCell, self).__init__(**kwargs) - - def build_model(self, input_shape): - - input_dim = input_shape[-1] - output_dim = self.output_dim - input_length = input_shape[1] - hidden_dim = self.hidden_dim - - x = Input(batch_shape=input_shape) - h_tm1 = Input(batch_shape=(input_shape[0], hidden_dim)) - c_tm1 = Input(batch_shape=(input_shape[0], hidden_dim)) - - W1 = Dense(hidden_dim * 4, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer) - W2 = Dense(output_dim, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer) - W3 = Dense(1, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer) - U = Dense(hidden_dim * 4, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer) - - C = Lambda(lambda x: K.repeat(x, input_length), - output_shape=(input_length, input_dim))(c_tm1) - _xC = concatenate([x, C]) - _xC = Lambda(lambda x: K.reshape(x, (-1, input_dim + hidden_dim)), - output_shape=(input_dim + hidden_dim,))(_xC) - - alpha = W3(_xC) - alpha = Lambda(lambda x: K.reshape(x, (-1, input_length)), - output_shape=(input_length,))(alpha) - alpha = Activation('softmax')(alpha) - - _x = Lambda(lambda x: K.batch_dot(x[0], x[1], axes=( - 1, 1)), output_shape=(input_dim,))([alpha, x]) - - z = add([W1(_x), U(h_tm1)]) - - z0, z1, z2, z3 = get_slices(z, 4) - - i = Activation(self.recurrent_activation)(z0) - f = Activation(self.recurrent_activation)(z1) - - c = add([multiply([f, c_tm1]), multiply( - [i, Activation(self.activation)(z2)])]) - o = Activation(self.recurrent_activation)(z3) - h = multiply([o, Activation(self.activation)(c)]) - y = Activation(self.activation)(W2(h)) - - return Model([x, h_tm1, c_tm1], [y, h, c]) -from __future__ import absolute_import -from recurrentshop import LSTMCell, RecurrentSequential -from .cells import LSTMDecoderCell, AttentionDecoderCell -from keras.models import Sequential, Model -from keras.layers import Dense, Dropout, TimeDistributed, Bidirectional, Input - - -''' -Papers: -[1] Sequence to Sequence Learning with Neural Networks (http://arxiv.org/abs/1409.3215) -[2] Learning Phrase Representations using RNN Encoder-Decoder for Statistical Machine Translation (http://arxiv.org/abs/1406.1078) -[3] Neural Machine Translation by Jointly Learning to Align and Translate (http://arxiv.org/abs/1409.0473) -''' - - -def SimpleSeq2Seq(output_dim, output_length, hidden_dim=None, input_shape=None, - batch_size=None, batch_input_shape=None, input_dim=None, - input_length=None, depth=1, dropout=0.0, unroll=False, - stateful=False): - ''' - Simple model for sequence to sequence learning. - The encoder encodes the input sequence to vector (called context vector) - The decoder decodes the context vector in to a sequence of vectors. - There is no one on one relation between the input and output sequence - elements. The input sequence and output sequence may differ in length. - - Arguments: - - output_dim : Required output dimension. - hidden_dim : The dimension of the internal representations of the model. - output_length : Length of the required output sequence. - depth : Used to create a deep Seq2seq model. For example, if depth = 3, - there will be 3 LSTMs on the enoding side and 3 LSTMs on the - decoding side. You can also specify depth as a tuple. For example, - if depth = (4, 5), 4 LSTMs will be added to the encoding side and - 5 LSTMs will be added to the decoding side. - dropout : Dropout probability in between layers. - - ''' - - if isinstance(depth, int): - depth = (depth, depth) - if batch_input_shape: - shape = batch_input_shape - elif input_shape: - shape = (batch_size,) + input_shape - elif input_dim: - if input_length: - shape = (batch_size,) + (input_length,) + (input_dim,) - else: - shape = (batch_size,) + (None,) + (input_dim,) - else: - # TODO Proper error message - raise TypeError - if hidden_dim is None: - hidden_dim = output_dim - encoder = RecurrentSequential(unroll=unroll, stateful=stateful) - encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[-1]))) - - for _ in range(1, depth[0]): - encoder.add(Dropout(dropout)) - encoder.add(LSTMCell(hidden_dim)) - - decoder = RecurrentSequential(unroll=unroll, stateful=stateful, - decode=True, output_length=output_length) - decoder.add(Dropout(dropout, batch_input_shape=(shape[0], hidden_dim))) - - if depth[1] == 1: - decoder.add(LSTMCell(output_dim)) - else: - decoder.add(LSTMCell(hidden_dim)) - for _ in range(depth[1] - 2): - decoder.add(Dropout(dropout)) - decoder.add(LSTMCell(hidden_dim)) - decoder.add(Dropout(dropout)) - decoder.add(LSTMCell(output_dim)) - - _input = Input(batch_shape=shape) - x = encoder(_input) - output = decoder(x) - return Model(_input, output) - - -def Seq2Seq(output_dim, output_length, batch_input_shape=None, - input_shape=None, batch_size=None, input_dim=None, input_length=None, - hidden_dim=None, depth=1, broadcast_state=True, unroll=False, - stateful=False, inner_broadcast_state=True, teacher_force=False, - peek=False, dropout=0.): - ''' - Seq2seq model based on [1] and [2]. - This model has the ability to transfer the encoder hidden state to the decoder's - hidden state(specified by the broadcast_state argument). Also, in deep models - (depth > 1), the hidden state is propogated throughout the LSTM stack(specified by - the inner_broadcast_state argument. You can switch between [1] based model and [2] - based model using the peek argument.(peek = True for [2], peek = False for [1]). - When peek = True, the decoder gets a 'peek' at the context vector at every timestep. - - [1] based model: - - Encoder: - X = Input sequence - C = LSTM(X); The context vector - - Decoder: - y(t) = LSTM(s(t-1), y(t-1)); Where s is the hidden state of the LSTM (h and c) - y(0) = LSTM(s0, C); C is the context vector from the encoder. - - [2] based model: - - Encoder: - X = Input sequence - C = LSTM(X); The context vector - - Decoder: - y(t) = LSTM(s(t-1), y(t-1), C) - y(0) = LSTM(s0, C, C) - Where s is the hidden state of the LSTM (h and c), and C is the context vector - from the encoder. - - Arguments: - - output_dim : Required output dimension. - hidden_dim : The dimension of the internal representations of the model. - output_length : Length of the required output sequence. - depth : Used to create a deep Seq2seq model. For example, if depth = 3, - there will be 3 LSTMs on the enoding side and 3 LSTMs on the - decoding side. You can also specify depth as a tuple. For example, - if depth = (4, 5), 4 LSTMs will be added to the encoding side and - 5 LSTMs will be added to the decoding side. - broadcast_state : Specifies whether the hidden state from encoder should be - transfered to the deocder. - inner_broadcast_state : Specifies whether hidden states should be propogated - throughout the LSTM stack in deep models. - peek : Specifies if the decoder should be able to peek at the context vector - at every timestep. - dropout : Dropout probability in between layers. - - - ''' - - if isinstance(depth, int): - depth = (depth, depth) - if batch_input_shape: - shape = batch_input_shape - elif input_shape: - shape = (batch_size,) + input_shape - elif input_dim: - if input_length: - shape = (batch_size,) + (input_length,) + (input_dim,) - else: - shape = (batch_size,) + (None,) + (input_dim,) - else: - # TODO Proper error message - raise TypeError - if hidden_dim is None: - hidden_dim = output_dim - - encoder = RecurrentSequential(readout=True, state_sync=inner_broadcast_state, - unroll=unroll, stateful=stateful, - return_states=broadcast_state) - for _ in range(depth[0]): - encoder.add( - LSTMCell(hidden_dim, batch_input_shape=(shape[0], hidden_dim))) - encoder.add(Dropout(dropout)) - - dense1 = TimeDistributed(Dense(hidden_dim)) - dense1.supports_masking = True - dense2 = Dense(output_dim) - - decoder = RecurrentSequential(readout='add' if peek else 'readout_only', - state_sync=inner_broadcast_state, decode=True, - output_length=output_length, unroll=unroll, - stateful=stateful, teacher_force=teacher_force) - - for _ in range(depth[1]): - decoder.add(Dropout(dropout, batch_input_shape=(shape[0], output_dim))) - decoder.add(LSTMDecoderCell(output_dim=output_dim, hidden_dim=hidden_dim, - batch_input_shape=(shape[0], output_dim))) - - _input = Input(batch_shape=shape) - _input._keras_history[0].supports_masking = True - encoded_seq = dense1(_input) - encoded_seq = encoder(encoded_seq) - if broadcast_state: - assert type(encoded_seq) is list - states = encoded_seq[-2:] - encoded_seq = encoded_seq[0] - else: - states = None - encoded_seq = dense2(encoded_seq) - inputs = [_input] - if teacher_force: - truth_tensor = Input(batch_shape=(shape[0], output_length, output_dim)) - truth_tensor._keras_history[0].supports_masking = True - inputs += [truth_tensor] - - decoded_seq = decoder(encoded_seq, - ground_truth=inputs[1] if teacher_force else None, - initial_readout=encoded_seq, initial_state=states) - - model = Model(inputs, decoded_seq) - model.encoder = encoder - model.decoder = decoder - return model - - -def AttentionSeq2Seq(output_dim, output_length, batch_input_shape=None, - batch_size=None, input_shape=None, input_length=None, - input_dim=None, hidden_dim=None, depth=1, - bidirectional=True, unroll=False, stateful=False, dropout=0.0,): - ''' - This is an attention Seq2seq model based on [3]. - Here, there is a soft allignment between the input and output sequence elements. - A bidirection encoder is used by default. There is no hidden state transfer in this - model. - - The math: - - Encoder: - X = Input Sequence of length m. - H = Bidirection_LSTM(X); Note that here the LSTM has return_sequences = True, - so H is a sequence of vectors of length m. - - Decoder: - y(i) = LSTM(s(i-1), y(i-1), v(i)); Where s is the hidden state of the LSTM (h and c) - and v (called the context vector) is a weighted sum over H: - - v(i) = sigma(j = 0 to m-1) alpha(i, j) * H(j) - - The weight alpha[i, j] for each hj is computed as follows: - energy = a(s(i-1), H(j)) - alpha = softmax(energy) - Where a is a feed forward network. - - ''' - - if isinstance(depth, int): - depth = (depth, depth) - if batch_input_shape: - shape = batch_input_shape - elif input_shape: - shape = (batch_size,) + input_shape - elif input_dim: - if input_length: - shape = (batch_size,) + (input_length,) + (input_dim,) - else: - shape = (batch_size,) + (None,) + (input_dim,) - else: - # TODO Proper error message - raise TypeError - if hidden_dim is None: - hidden_dim = output_dim - - _input = Input(batch_shape=shape) - _input._keras_history[0].supports_masking = True - - encoder = RecurrentSequential(unroll=unroll, stateful=stateful, - return_sequences=True) - encoder.add(LSTMCell(hidden_dim, batch_input_shape=(shape[0], shape[2]))) - - for _ in range(1, depth[0]): - encoder.add(Dropout(dropout)) - encoder.add(LSTMCell(hidden_dim)) - - if bidirectional: - encoder = Bidirectional(encoder, merge_mode='sum') - encoder.forward_layer.build(shape) - encoder.backward_layer.build(shape) - # patch - encoder.layer = encoder.forward_layer - - encoded = encoder(_input) - decoder = RecurrentSequential(decode=True, output_length=output_length, - unroll=unroll, stateful=stateful) - decoder.add(Dropout(dropout, batch_input_shape=( - shape[0], shape[1], hidden_dim))) - if depth[1] == 1: - decoder.add(AttentionDecoderCell( - output_dim=output_dim, hidden_dim=hidden_dim)) - else: - decoder.add(AttentionDecoderCell( - output_dim=output_dim, hidden_dim=hidden_dim)) - for _ in range(depth[1] - 2): - decoder.add(Dropout(dropout)) - decoder.add(LSTMDecoderCell( - output_dim=hidden_dim, hidden_dim=hidden_dim)) - decoder.add(Dropout(dropout)) - decoder.add(LSTMDecoderCell( - output_dim=output_dim, hidden_dim=hidden_dim)) - - inputs = [_input] - decoded = decoder(encoded) - model = Model(inputs, decoded) - return model -from seq2seq import SimpleSeq2Seq, Seq2Seq, AttentionSeq2Seq -import numpy as np -from keras.utils.test_utils import keras_test - - -input_length = 5 -input_dim = 3 - -output_length = 3 -output_dim = 4 - -samples = 100 -hidden_dim = 24 - - -@keras_test -def test_SimpleSeq2Seq(): - x = np.random.random((samples, input_length, input_dim)) - y = np.random.random((samples, output_length, output_dim)) - - models = [] - models += [SimpleSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, - output_length=output_length, input_shape=(input_length, input_dim))] - models += [SimpleSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, - output_length=output_length, input_shape=(input_length, input_dim), depth=2)] - - for model in models: - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, nb_epoch=1) - - -@keras_test -def test_Seq2Seq(): - x = np.random.random((samples, input_length, input_dim)) - y = np.random.random((samples, output_length, output_dim)) - - models = [] - models += [Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, - output_length=output_length, input_shape=(input_length, input_dim))] - models += [Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, - output_length=output_length, input_shape=(input_length, input_dim), peek=True)] - models += [Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, - output_length=output_length, input_shape=(input_length, input_dim), depth=2)] - models += [Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, - input_shape=(input_length, input_dim), peek=True, depth=2)] - - for model in models: - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1) - - model = Seq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, output_length=output_length, - input_shape=(input_length, input_dim), peek=True, depth=2, teacher_force=True) - model.compile(loss='mse', optimizer='sgd') - model.fit([x, y], y, epochs=1) - - -@keras_test -def test_AttentionSeq2Seq(): - x = np.random.random((samples, input_length, input_dim)) - y = np.random.random((samples, output_length, output_dim)) - - models = [] - models += [AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, - output_length=output_length, input_shape=(input_length, input_dim))] - models += [AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, - output_length=output_length, input_shape=(input_length, input_dim), depth=2)] - models += [AttentionSeq2Seq(output_dim=output_dim, hidden_dim=hidden_dim, - output_length=output_length, input_shape=(input_length, input_dim), depth=3)] - - for model in models: - model.compile(loss='mse', optimizer='sgd') - model.fit(x, y, epochs=1) -import numpy as np -from keras import backend as K - - -TAGS = ['rock', 'pop', 'alternative', 'indie', 'electronic', - 'female vocalists', 'dance', '00s', 'alternative rock', 'jazz', - 'beautiful', 'metal', 'chillout', 'male vocalists', - 'classic rock', 'soul', 'indie rock', 'Mellow', 'electronica', - '80s', 'folk', '90s', 'chill', 'instrumental', 'punk', - 'oldies', 'blues', 'hard rock', 'ambient', 'acoustic', - 'experimental', 'female vocalist', 'guitar', 'Hip-Hop', - '70s', 'party', 'country', 'easy listening', - 'sexy', 'catchy', 'funk', 'electro', 'heavy metal', - 'Progressive rock', '60s', 'rnb', 'indie pop', - 'sad', 'House', 'happy'] - - -def librosa_exists(): - try: - __import__('librosa') - except ImportError: - return False - else: - return True - - -def preprocess_input(audio_path, dim_ordering='default'): - '''Reads an audio file and outputs a Mel-spectrogram. - ''' - if dim_ordering == 'default': - dim_ordering = K.image_dim_ordering() - assert dim_ordering in {'tf', 'th'} - - if librosa_exists(): - import librosa - else: - raise RuntimeError('Librosa is required to process audio files.\n' + - 'Install it via `pip install librosa` \nor visit ' + - 'http://librosa.github.io/librosa/ for details.') - - # mel-spectrogram parameters - SR = 12000 - N_FFT = 512 - N_MELS = 96 - HOP_LEN = 256 - DURA = 29.12 - - src, sr = librosa.load(audio_path, sr=SR) - n_sample = src.shape[0] - n_sample_wanted = int(DURA * SR) - - # trim the signal at the center - if n_sample < n_sample_wanted: # if too short - src = np.hstack((src, np.zeros((int(DURA * SR) - n_sample,)))) - elif n_sample > n_sample_wanted: # if too long - src = src[(n_sample - n_sample_wanted) / 2: - (n_sample + n_sample_wanted) / 2] - - logam = librosa.logamplitude - melgram = librosa.feature.melspectrogram - x = logam(melgram(y=src, sr=SR, hop_length=HOP_LEN, - n_fft=N_FFT, n_mels=N_MELS) ** 2, - ref_power=1.0) - - if dim_ordering == 'th': - x = np.expand_dims(x, axis=0) - elif dim_ordering == 'tf': - x = np.expand_dims(x, axis=3) - return x - - -def decode_predictions(preds, top_n=5): - '''Decode the output of a music tagger model. - - # Arguments - preds: 2-dimensional numpy array - top_n: integer in [0, 50], number of items to show - - ''' - assert len(preds.shape) == 2 and preds.shape[1] == 50 - results = [] - for pred in preds: - result = zip(TAGS, pred) - result = sorted(result, key=lambda x: x[1], reverse=True) - results.append(result[:top_n]) - return results -import numpy as np -import json - -from keras.utils.data_utils import get_file -from keras import backend as K - -CLASS_INDEX = None -CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json' - - -def preprocess_input(x, dim_ordering='default'): - if dim_ordering == 'default': - dim_ordering = K.image_dim_ordering() - assert dim_ordering in {'tf', 'th'} - - if dim_ordering == 'th': - x[:, 0, :, :] -= 103.939 - x[:, 1, :, :] -= 116.779 - x[:, 2, :, :] -= 123.68 - # 'RGB'->'BGR' - x = x[:, ::-1, :, :] - else: - x[:, :, :, 0] -= 103.939 - x[:, :, :, 1] -= 116.779 - x[:, :, :, 2] -= 123.68 - # 'RGB'->'BGR' - x = x[:, :, :, ::-1] - return x - - -def decode_predictions(preds, top=5): - global CLASS_INDEX - if len(preds.shape) != 2 or preds.shape[1] != 1000: - raise ValueError('`decode_predictions` expects ' - 'a batch of predictions ' - '(i.e. a 2D array of shape (samples, 1000)). ' - 'Found array with shape: ' + str(preds.shape)) - if CLASS_INDEX is None: - fpath = get_file('imagenet_class_index.json', - CLASS_INDEX_PATH, - cache_subdir='models') - CLASS_INDEX = json.load(open(fpath)) - results = [] - for pred in preds: - top_indices = pred.argsort()[-top:][::-1] - result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] - results.append(result) - return results -# -*- coding: utf-8 -*- -"""Inception-ResNet V2 model for Keras. - -Model naming and structure follows TF-slim implementation (which has some additional -layers and different number of filters from the original arXiv paper): -https://github.com/tensorflow/models/blob/master/slim/nets/inception_resnet_v2.py - -Pre-trained ImageNet weights are also converted from TF-slim, which can be found in: -https://github.com/tensorflow/models/tree/master/slim#pre-trained-models - -# Reference -- [Inception-v4, Inception-ResNet and the Impact of - Residual Connections on Learning](https://arxiv.org/abs/1602.07261) - -""" -from __future__ import print_function -from __future__ import absolute_import - -import warnings -import numpy as np - -from keras.preprocessing import image -from keras.models import Model -from keras.layers import Activation -from keras.layers import AveragePooling2D -from keras.layers import BatchNormalization -from keras.layers import Concatenate -from keras.layers import Conv2D -from keras.layers import Dense -from keras.layers import GlobalAveragePooling2D -from keras.layers import GlobalMaxPooling2D -from keras.layers import Input -from keras.layers import Lambda -from keras.layers import MaxPooling2D -from keras.utils.data_utils import get_file -from keras.engine.topology import get_source_inputs -from keras.applications.imagenet_utils import _obtain_input_shape -from keras.applications.imagenet_utils import decode_predictions -from keras import backend as K - - -BASE_WEIGHT_URL = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.7/' - - -def preprocess_input(x): - """Preprocesses a numpy array encoding a batch of images. - - This function applies the "Inception" preprocessing which converts - the RGB values from [0, 255] to [-1, 1]. Note that this preprocessing - function is different from `imagenet_utils.preprocess_input()`. - - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - - # Returns - Preprocessed array. - """ - x /= 255. - x -= 0.5 - x *= 2. - return x - - -def conv2d_bn(x, - filters, - kernel_size, - strides=1, - padding='same', - activation='relu', - use_bias=False, - name=None): - """Utility function to apply conv + BN. - - # Arguments - x: input tensor. - filters: filters in `Conv2D`. - kernel_size: kernel size as in `Conv2D`. - padding: padding mode in `Conv2D`. - activation: activation in `Conv2D`. - strides: strides in `Conv2D`. - name: name of the ops; will become `name + '_ac'` for the activation - and `name + '_bn'` for the batch norm layer. - - # Returns - Output tensor after applying `Conv2D` and `BatchNormalization`. - """ - x = Conv2D(filters, - kernel_size, - strides=strides, - padding=padding, - use_bias=use_bias, - name=name)(x) - if not use_bias: - bn_axis = 1 if K.image_data_format() == 'channels_first' else 3 - bn_name = None if name is None else name + '_bn' - x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) - if activation is not None: - ac_name = None if name is None else name + '_ac' - x = Activation(activation, name=ac_name)(x) - return x - - -def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): - """Adds a Inception-ResNet block. - - This function builds 3 types of Inception-ResNet blocks mentioned - in the paper, controlled by the `block_type` argument (which is the - block name used in the official TF-slim implementation): - - Inception-ResNet-A: `block_type='block35'` - - Inception-ResNet-B: `block_type='block17'` - - Inception-ResNet-C: `block_type='block8'` - - # Arguments - x: input tensor. - scale: scaling factor to scale the residuals (i.e., the output of - passing `x` through an inception module) before adding them - to the shortcut branch. Let `r` be the output from the residual branch, - the output of this block will be `x + scale * r`. - block_type: `'block35'`, `'block17'` or `'block8'`, determines - the network structure in the residual branch. - block_idx: an `int` used for generating layer names. The Inception-ResNet blocks - are repeated many times in this network. We use `block_idx` to identify - each of the repetitions. For example, the first Inception-ResNet-A block - will have `block_type='block35', block_idx=0`, ane the layer names will have - a common prefix `'block35_0'`. - activation: activation function to use at the end of the block - (see [activations](keras./activations.md)). - When `activation=None`, no activation is applied - (i.e., "linear" activation: `a(x) = x`). - - # Returns - Output tensor for the block. - - # Raises - ValueError: if `block_type` is not one of `'block35'`, - `'block17'` or `'block8'`. - """ - if block_type == 'block35': - branch_0 = conv2d_bn(x, 32, 1) - branch_1 = conv2d_bn(x, 32, 1) - branch_1 = conv2d_bn(branch_1, 32, 3) - branch_2 = conv2d_bn(x, 32, 1) - branch_2 = conv2d_bn(branch_2, 48, 3) - branch_2 = conv2d_bn(branch_2, 64, 3) - branches = [branch_0, branch_1, branch_2] - elif block_type == 'block17': - branch_0 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(x, 128, 1) - branch_1 = conv2d_bn(branch_1, 160, [1, 7]) - branch_1 = conv2d_bn(branch_1, 192, [7, 1]) - branches = [branch_0, branch_1] - elif block_type == 'block8': - branch_0 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(branch_1, 224, [1, 3]) - branch_1 = conv2d_bn(branch_1, 256, [3, 1]) - branches = [branch_0, branch_1] - else: - raise ValueError('Unknown Inception-ResNet block type. ' - 'Expects "block35", "block17" or "block8", ' - 'but got: ' + str(block_type)) - - block_name = block_type + '_' + str(block_idx) - channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 - mixed = Concatenate(axis=channel_axis, - name=block_name + '_mixed')(branches) - up = conv2d_bn(mixed, - K.int_shape(x)[channel_axis], - 1, - activation=None, - use_bias=True, - name=block_name + '_conv') - - x = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, - output_shape=K.int_shape(x)[1:], - arguments={'scale': scale}, - name=block_name)([x, up]) - if activation is not None: - x = Activation(activation, name=block_name + '_ac')(x) - return x - - -def InceptionResNetV2(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000): - """Instantiates the Inception-ResNet v2 architecture. - - Optionally loads weights pre-trained on ImageNet. - Note that when using TensorFlow, for best performance you should - set `"image_data_format": "channels_last"` in your Keras config - at `~/.keras/keras.json`. - - The model and the weights are compatible with both TensorFlow and Theano - backends (but not CNTK). The data format convention used by the model is - the one specified in your Keras config file. - - Note that the default input image size for this model is 299x299, instead - of 224x224 as in the VGG16 and ResNet models. Also, the input preprocessing - function is different (i.e., do not use `imagenet_utils.preprocess_input()` - with this model. Use `preprocess_input()` defined in this module instead). - - # Arguments - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization) - or `'imagenet'` (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is `False` (otherwise the input shape - has to be `(299, 299, 3)` (with `'channels_last'` data format) - or `(3, 299, 299)` (with `'channels_first'` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 139. - E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the last convolutional layer. - - `'avg'` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is `True`, and - if no `weights` argument is specified. - - # Returns - A Keras `Model` instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with an unsupported backend. - """ - if K.backend() in {'cntk'}: - raise RuntimeError( - K.backend() + ' backend is currently unsupported for this model.') - - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as imagenet with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape( - input_shape, - default_size=299, - min_size=139, - data_format=K.image_data_format(), - require_flatten=False, - weights=weights) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - # Stem block: 35 x 35 x 192 - x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid') - x = conv2d_bn(x, 32, 3, padding='valid') - x = conv2d_bn(x, 64, 3) - x = MaxPooling2D(3, strides=2)(x) - x = conv2d_bn(x, 80, 1, padding='valid') - x = conv2d_bn(x, 192, 3, padding='valid') - x = MaxPooling2D(3, strides=2)(x) - - # Mixed 5b (Inception-A block): 35 x 35 x 320 - branch_0 = conv2d_bn(x, 96, 1) - branch_1 = conv2d_bn(x, 48, 1) - branch_1 = conv2d_bn(branch_1, 64, 5) - branch_2 = conv2d_bn(x, 64, 1) - branch_2 = conv2d_bn(branch_2, 96, 3) - branch_2 = conv2d_bn(branch_2, 96, 3) - branch_pool = AveragePooling2D(3, strides=1, padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1) - branches = [branch_0, branch_1, branch_2, branch_pool] - channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 - x = Concatenate(axis=channel_axis, name='mixed_5b')(branches) - - # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 - for block_idx in range(1, 11): - x = inception_resnet_block(x, - scale=0.17, - block_type='block35', - block_idx=block_idx) - - # Mixed 6a (Reduction-A block): 17 x 17 x 1088 - branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid') - branch_1 = conv2d_bn(x, 256, 1) - branch_1 = conv2d_bn(branch_1, 256, 3) - branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid') - branch_pool = MaxPooling2D(3, strides=2, padding='valid')(x) - branches = [branch_0, branch_1, branch_pool] - x = Concatenate(axis=channel_axis, name='mixed_6a')(branches) - - # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 - for block_idx in range(1, 21): - x = inception_resnet_block(x, - scale=0.1, - block_type='block17', - block_idx=block_idx) - - # Mixed 7a (Reduction-B block): 8 x 8 x 2080 - branch_0 = conv2d_bn(x, 256, 1) - branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid') - branch_1 = conv2d_bn(x, 256, 1) - branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid') - branch_2 = conv2d_bn(x, 256, 1) - branch_2 = conv2d_bn(branch_2, 288, 3) - branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid') - branch_pool = MaxPooling2D(3, strides=2, padding='valid')(x) - branches = [branch_0, branch_1, branch_2, branch_pool] - x = Concatenate(axis=channel_axis, name='mixed_7a')(branches) - - # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 - for block_idx in range(1, 10): - x = inception_resnet_block(x, - scale=0.2, - block_type='block8', - block_idx=block_idx) - x = inception_resnet_block(x, - scale=1., - activation=None, - block_type='block8', - block_idx=10) - - # Final convolution block: 8 x 8 x 1536 - x = conv2d_bn(x, 1536, 1, name='conv_7b') - - if include_top: - # Classification block - x = GlobalAveragePooling2D(name='avg_pool')(x) - x = Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor` - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model - model = Model(inputs, x, name='inception_resnet_v2') - - # Load weights - if weights == 'imagenet': - if K.image_data_format() == 'channels_first': - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image data format convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - if include_top: - weights_filename = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5' - weights_path = get_file(weights_filename, - BASE_WEIGHT_URL + weights_filename, - cache_subdir='models', - md5_hash='e693bd0210a403b3192acc6073ad2e96') - else: - weights_filename = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5' - weights_path = get_file(weights_filename, - BASE_WEIGHT_URL + weights_filename, - cache_subdir='models', - md5_hash='d19885ff4a710c122648d3b5c3b684e4') - model.load_weights(weights_path) - - return model - - -if __name__ == '__main__': - model = InceptionResNetV2(include_top=True, weights='imagenet') - - img_path = 'elephant.jpg' - img = image.load_img(img_path, target_size=(299, 299)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - - x = preprocess_input(x) - - preds = model.predict(x) - print('Predicted:', decode_predictions(preds)) -# -*- coding: utf-8 -*- -"""Inception V3 model for Keras. - -Note that the input image format for this model is different than for -the VGG16 and ResNet models (299x299 instead of 224x224), -and that the input preprocessing function is also different (same as Xception). - -# Reference - -- [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567) - -""" -from __future__ import print_function -from __future__ import absolute_import - -import warnings -import numpy as np - -from keras.models import Model -from keras import layers -from keras.layers import Activation -from keras.layers import Dense -from keras.layers import Input -from keras.layers import BatchNormalization -from keras.layers import Conv2D -from keras.layers import MaxPooling2D -from keras.layers import AveragePooling2D -from keras.layers import GlobalAveragePooling2D -from keras.layers import GlobalMaxPooling2D -from keras.engine.topology import get_source_inputs -from keras.utils.layer_utils import convert_all_kernels_in_model -from keras.utils.data_utils import get_file -from keras import backend as K -from keras.applications.imagenet_utils import decode_predictions -from keras.applications.imagenet_utils import _obtain_input_shape -from keras.preprocessing import image - - -WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels.h5' -WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5' - - -def conv2d_bn(x, - filters, - num_row, - num_col, - padding='same', - strides=(1, 1), - name=None): - """Utility function to apply conv + BN. - - Arguments: - x: input tensor. - filters: filters in `Conv2D`. - num_row: height of the convolution kernel. - num_col: width of the convolution kernel. - padding: padding mode in `Conv2D`. - strides: strides in `Conv2D`. - name: name of the ops; will become `name + '_conv'` - for the convolution and `name + '_bn'` for the - batch norm layer. - - Returns: - Output tensor after applying `Conv2D` and `BatchNormalization`. - """ - if name is not None: - bn_name = name + '_bn' - conv_name = name + '_conv' - else: - bn_name = None - conv_name = None - if K.image_data_format() == 'channels_first': - bn_axis = 1 - else: - bn_axis = 3 - x = Conv2D( - filters, (num_row, num_col), - strides=strides, - padding=padding, - use_bias=False, - name=conv_name)(x) - x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) - x = Activation('relu', name=name)(x) - return x - - -def InceptionV3(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000): - """Instantiates the Inception v3 architecture. - - Optionally loads weights pre-trained - on ImageNet. Note that when using TensorFlow, - for best performance you should set - `image_data_format="channels_last"` in your Keras config - at ~/.keras/keras.json. - The model and the weights are compatible with both - TensorFlow and Theano. The data format - convention used by the model is the one - specified in your Keras config file. - Note that the default input image size for this model is 299x299. - - Arguments: - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization) - or "imagenet" (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(299, 299, 3)` (with `channels_last` data format) - or `(3, 299, 299)` (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 139. - E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - Returns: - A Keras model instance. - - Raises: - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as imagenet with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape( - input_shape, - default_size=299, - min_size=139, - data_format=K.image_data_format(), - include_top=include_top) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - img_input = Input(tensor=input_tensor, shape=input_shape) - - if K.image_data_format() == 'channels_first': - channel_axis = 1 - else: - channel_axis = 3 - - x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid') - x = conv2d_bn(x, 32, 3, 3, padding='valid') - x = conv2d_bn(x, 64, 3, 3) - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv2d_bn(x, 80, 1, 1, padding='valid') - x = conv2d_bn(x, 192, 3, 3, padding='valid') - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - - # mixed 0, 1, 2: 35 x 35 x 256 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 32, 1, 1) - x = layers.concatenate( - [branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed0') - - # mixed 1: 35 x 35 x 256 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1, 1) - x = layers.concatenate( - [branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed1') - - # mixed 2: 35 x 35 x 256 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1, 1) - x = layers.concatenate( - [branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed2') - - # mixed 3: 17 x 17 x 768 - branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid') - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn( - branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid') - - branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) - x = layers.concatenate( - [branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed3') - - # mixed 4: 17 x 17 x 768 - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 128, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 128, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 128, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed4') - - # mixed 5, 6: 17 x 17 x 768 - for i in range(2): - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 160, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 160, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 160, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed' + str(5 + i)) - - # mixed 7: 17 x 17 x 768 - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 192, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 192, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed7') - - # mixed 8: 8 x 8 x 1280 - branch3x3 = conv2d_bn(x, 192, 1, 1) - branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, - strides=(2, 2), padding='valid') - - branch7x7x3 = conv2d_bn(x, 192, 1, 1) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) - branch7x7x3 = conv2d_bn( - branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid') - - branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) - x = layers.concatenate( - [branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name='mixed8') - - # mixed 9: 8 x 8 x 2048 - for i in range(2): - branch1x1 = conv2d_bn(x, 320, 1, 1) - - branch3x3 = conv2d_bn(x, 384, 1, 1) - branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) - branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) - branch3x3 = layers.concatenate( - [branch3x3_1, branch3x3_2], axis=channel_axis, name='mixed9_' + str(i)) - - branch3x3dbl = conv2d_bn(x, 448, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) - branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) - branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) - branch3x3dbl = layers.concatenate( - [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis) - - branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch3x3, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed' + str(9 + i)) - if include_top: - # Classification block - x = GlobalAveragePooling2D(name='avg_pool')(x) - x = Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = Model(inputs, x, name='inception_v3') - - # load weights - if weights == 'imagenet': - if K.image_data_format() == 'channels_first': - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image data format convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - if include_top: - weights_path = get_file( - 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models', - md5_hash='9a0d58056eeedaa3f26cb7ebd46da564') - else: - weights_path = get_file( - 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='bcbd6486424b2319ff4ef7d526e38f63') - model.load_weights(weights_path) - if K.backend() == 'theano': - convert_all_kernels_in_model(model) - return model - - -def preprocess_input(x): - x /= 255. - x -= 0.5 - x *= 2. - return x - - -if __name__ == '__main__': - model = InceptionV3(include_top=True, weights='imagenet') - - img_path = 'elephant.jpg' - img = image.load_img(img_path, target_size=(299, 299)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - - x = preprocess_input(x) - - preds = model.predict(x) - print('Predicted:', decode_predictions(preds)) -"""MobileNet v1 models for Keras. - -Code contributed by Somshubra Majumdar (@titu1994). - -MobileNet is a general architecture and can be used for multiple use cases. -Depending on the use case, it can use different input layer size and -different width factors. This allows different width models to reduce -the number of multiply-adds and thereby -reduce inference cost on mobile devices. - -MobileNets support any input size greater than 32 x 32, with larger image sizes -offering better performance. -The number of parameters and number of multiply-adds -can be modified by using the `alpha` parameter, -which increases/decreases the number of filters in each layer. -By altering the image size and `alpha` parameter, -all 16 models from the paper can be built, with ImageNet weights provided. - -The paper demonstrates the performance of MobileNets using `alpha` values of -1.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25. -For each of these `alpha` values, weights for 4 different input image sizes -are provided (224, 192, 160, 128). - -The following table describes the size and accuracy of the 100% MobileNet -on size 224 x 224: ----------------------------------------------------------------------------- -Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M) ----------------------------------------------------------------------------- -| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | -| 0.75 MobileNet-224 | 68.4 % | 325 | 2.6 | -| 0.50 MobileNet-224 | 63.7 % | 149 | 1.3 | -| 0.25 MobileNet-224 | 50.6 % | 41 | 0.5 | ----------------------------------------------------------------------------- - -The following table describes the performance of -the 100 % MobileNet on various input sizes: ------------------------------------------------------------------------- - Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) ------------------------------------------------------------------------- -| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | -| 1.0 MobileNet-192 | 69.1 % | 529 | 4.2 | -| 1.0 MobileNet-160 | 67.2 % | 529 | 4.2 | -| 1.0 MobileNet-128 | 64.4 % | 529 | 4.2 | ------------------------------------------------------------------------- - -The weights for all 16 models are obtained and translated -from Tensorflow checkpoints found at -https://github.com/tensorflow/models/blob/master/slim/nets/mobilenet_v1.md - -# Reference -- [MobileNets: Efficient Convolutional Neural Networks for - Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf)) -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import warnings -import numpy as np - -from keras.preprocessing import image - -from keras.models import Model -from keras.layers import Input -from keras.layers import Activation -from keras.layers import Dropout -from keras.layers import Reshape -from keras.layers import BatchNormalization -from keras.layers import GlobalAveragePooling2D -from keras.layers import GlobalMaxPooling2D -from keras.layers import Conv2D -from keras import initializers -from keras import regularizers -from keras import constraints -from keras.utils import conv_utils -from keras.utils.data_utils import get_file -from keras.engine.topology import get_source_inputs -from keras.engine import InputSpec -from keras.applications.imagenet_utils import _obtain_input_shape -from keras.applications.imagenet_utils import decode_predictions -from keras import backend as K - - -BASE_WEIGHT_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.6/' - - -def relu6(x): - return K.relu(x, max_value=6) - - -def preprocess_input(x): - x /= 255. - x -= 0.5 - x *= 2. - return x - - -class DepthwiseConv2D(Conv2D): - """Depthwise separable 2D convolution. - - Depthwise Separable convolutions consists in performing - just the first step in a depthwise spatial convolution - (which acts on each input channel separately). - The `depth_multiplier` argument controls how many - output channels are generated per input channel in the depthwise step. - - # Arguments - kernel_size: An integer or tuple/list of 2 integers, specifying the - width and height of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the width and height. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - padding: one of `"valid"` or `"same"` (case-insensitive). - depth_multiplier: The number of depthwise convolution output channels - for each input channel. - The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, height, width, channels)` while `channels_first` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - activation: Activation function to use - (see [activations](keras./activations.md)). - If you don't specify anything, no activation is applied - (ie. "linear" activation: `a(x) = x`). - use_bias: Boolean, whether the layer uses a bias vector. - depthwise_initializer: Initializer for the depthwise kernel matrix - (see [initializers](keras./initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](keras./initializers.md)). - depthwise_regularizer: Regularizer function applied to - the depthwise kernel matrix - (see [regularizer](keras./regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](keras./regularizers.md)). - activity_regularizer: Regularizer function applied to - the output of the layer (its "activation"). - (see [regularizer](keras./regularizers.md)). - depthwise_constraint: Constraint function applied to - the depthwise kernel matrix - (see [constraints](keras./constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](keras./constraints.md)). - - # Input shape - 4D tensor with shape: - `[batch, channels, rows, cols]` if data_format='channels_first' - or 4D tensor with shape: - `[batch, rows, cols, channels]` if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `[batch, filters, new_rows, new_cols]` if data_format='channels_first' - or 4D tensor with shape: - `[batch, new_rows, new_cols, filters]` if data_format='channels_last'. - `rows` and `cols` values might have changed due to padding. - """ - - def __init__(self, - kernel_size, - strides=(1, 1), - padding='valid', - depth_multiplier=1, - data_format=None, - activation=None, - use_bias=True, - depthwise_initializer='glorot_uniform', - bias_initializer='zeros', - depthwise_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - depthwise_constraint=None, - bias_constraint=None, - **kwargs): - super(DepthwiseConv2D, self).__init__( - filters=None, - kernel_size=kernel_size, - strides=strides, - padding=padding, - data_format=data_format, - activation=activation, - use_bias=use_bias, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - bias_constraint=bias_constraint, - **kwargs) - self.depth_multiplier = depth_multiplier - self.depthwise_initializer = initializers.get(depthwise_initializer) - self.depthwise_regularizer = regularizers.get(depthwise_regularizer) - self.depthwise_constraint = constraints.get(depthwise_constraint) - self.bias_initializer = initializers.get(bias_initializer) - - def build(self, input_shape): - if len(input_shape) < 4: - raise ValueError('Inputs to `DepthwiseConv2D` should have rank 4. ' - 'Received input shape:', str(input_shape)) - if self.data_format == 'channels_first': - channel_axis = 1 - else: - channel_axis = 3 - if input_shape[channel_axis] is None: - raise ValueError('The channel dimension of the inputs to ' - '`DepthwiseConv2D` ' - 'should be defined. Found `None`.') - input_dim = int(input_shape[channel_axis]) - depthwise_kernel_shape = (self.kernel_size[0], - self.kernel_size[1], - input_dim, - self.depth_multiplier) - - self.depthwise_kernel = self.add_weight( - shape=depthwise_kernel_shape, - initializer=self.depthwise_initializer, - name='depthwise_kernel', - regularizer=self.depthwise_regularizer, - constraint=self.depthwise_constraint) - - if self.use_bias: - self.bias = self.add_weight(shape=(input_dim * self.depth_multiplier,), - initializer=self.bias_initializer, - name='bias', - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - # Set input spec. - self.input_spec = InputSpec(ndim=4, axes={channel_axis: input_dim}) - self.built = True - - def call(self, inputs, training=None): - outputs = K.depthwise_conv2d( - inputs, - self.depthwise_kernel, - strides=self.strides, - padding=self.padding, - dilation_rate=self.dilation_rate, - data_format=self.data_format) - - if self.bias: - outputs = K.bias_add( - outputs, - self.bias, - data_format=self.data_format) - - if self.activation is not None: - return self.activation(outputs) - - return outputs - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - rows = input_shape[2] - cols = input_shape[3] - out_filters = input_shape[1] * self.depth_multiplier - elif self.data_format == 'channels_last': - rows = input_shape[1] - cols = input_shape[2] - out_filters = input_shape[3] * self.depth_multiplier - - rows = conv_utils.conv_output_length(rows, self.kernel_size[0], - self.padding, - self.strides[0]) - cols = conv_utils.conv_output_length(cols, self.kernel_size[1], - self.padding, - self.strides[1]) - - if self.data_format == 'channels_first': - return (input_shape[0], out_filters, rows, cols) - elif self.data_format == 'channels_last': - return (input_shape[0], rows, cols, out_filters) - - def get_config(self): - config = super(DepthwiseConv2D, self).get_config() - config.pop('filters') - config.pop('kernel_initializer') - config.pop('kernel_regularizer') - config.pop('kernel_constraint') - config['depth_multiplier'] = self.depth_multiplier - config['depthwise_initializer'] = initializers.serialize( - self.depthwise_initializer) - config['depthwise_regularizer'] = regularizers.serialize( - self.depthwise_regularizer) - config['depthwise_constraint'] = constraints.serialize( - self.depthwise_constraint) - return config - - -def MobileNet(input_shape=None, - alpha=1.0, - depth_multiplier=1, - dropout=1e-3, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000): - """Instantiates the MobileNet architecture. - - Note that only TensorFlow is supported for now, - therefore it only works with the data format - `image_data_format='channels_last'` in your Keras config - at `~/.keras/keras.json`. - - To load a MobileNet model via `load_model`, import the custom - objects `relu6` and `DepthwiseConv2D` and pass them to the - `custom_objects` parameter. - E.g. - model = load_model('mobilenet.h5', custom_objects={ - 'relu6': mobilenet.relu6, - 'DepthwiseConv2D': mobilenet.DepthwiseConv2D}) - - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `channels_last` data format) - or (3, 224, 224) (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(200, 200, 3)` would be one valid value. - alpha: controls the width of the network. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - depth_multiplier: depth multiplier for depthwise convolution - (also called the resolution multiplier) - dropout: dropout rate - include_top: whether to include the fully-connected - layer at the top of the network. - weights: `None` (random initialization) or - `imagenet` (ImageNet weights) - input_tensor: optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - """ - - if K.backend() != 'tensorflow': - raise RuntimeError('Only Tensorflow backend is currently supported, ' - 'as other backends do not support ' - 'depthwise convolution.') - - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as ImageNet with `include_top` ' - 'as true, `classes` should be 1000') - - # Determine proper input shape. - input_shape = _obtain_input_shape(input_shape, - default_size=224, - min_size=32, - data_format=K.image_data_format(), - include_top=include_top or weights) - if K.image_data_format() == 'channels_last': - row_axis, col_axis = (0, 1) - else: - row_axis, col_axis = (1, 2) - rows = input_shape[row_axis] - cols = input_shape[col_axis] - - if weights == 'imagenet': - if depth_multiplier != 1: - raise ValueError('If imagenet weights are being loaded, ' - 'depth multiplier must be 1') - - if alpha not in [0.25, 0.50, 0.75, 1.0]: - raise ValueError('If imagenet weights are being loaded, ' - 'alpha can be one of' - '`0.25`, `0.50`, `0.75` or `1.0` only.') - - if rows != cols or rows not in [128, 160, 192, 224]: - raise ValueError('If imagenet weights are being loaded, ' - 'input must have a static square shape (one of ' - '(128,128), (160,160), (192,192), or (224, 224)).' - ' Input shape provided = %s' % (input_shape,)) - - if K.image_data_format() != 'channels_last': - warnings.warn('The MobileNet family of models is only available ' - 'for the input data format "channels_last" ' - '(width, height, channels). ' - 'However your settings specify the default ' - 'data format "channels_first" (channels, width, height).' - ' You should set `image_data_format="channels_last"` ' - 'in your Keras config located at ~/.keras/keras.json. ' - 'The model being returned right now will expect inputs ' - 'to follow the "channels_last" data format.') - K.set_image_data_format('channels_last') - old_data_format = 'channels_first' - else: - old_data_format = None - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - x = _conv_block(img_input, 32, alpha, strides=(2, 2)) - x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) - - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, - strides=(2, 2), block_id=2) - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) - - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, - strides=(2, 2), block_id=4) - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) - - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, - strides=(2, 2), block_id=6) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) - - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, - strides=(2, 2), block_id=12) - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) - - if include_top: - if K.image_data_format() == 'channels_first': - shape = (int(1024 * alpha), 1, 1) - else: - shape = (1, 1, int(1024 * alpha)) - - x = GlobalAveragePooling2D()(x) - x = Reshape(shape, name='reshape_1')(x) - x = Dropout(dropout, name='dropout')(x) - x = Conv2D(classes, (1, 1), - padding='same', name='conv_preds')(x) - x = Activation('softmax', name='act_softmax')(x) - x = Reshape((classes,), name='reshape_2')(x) - else: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) - - # load weights - if weights == 'imagenet': - if K.image_data_format() == 'channels_first': - raise ValueError('Weights for "channels_last" format ' - 'are not available.') - if alpha == 1.0: - alpha_text = '1_0' - elif alpha == 0.75: - alpha_text = '7_5' - elif alpha == 0.50: - alpha_text = '5_0' - else: - alpha_text = '2_5' - - if include_top: - model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) - weigh_path = BASE_WEIGHT_PATH + model_name - weights_path = get_file(model_name, - weigh_path, - cache_subdir='models') - else: - model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) - weigh_path = BASE_WEIGHT_PATH + model_name - weights_path = get_file(model_name, - weigh_path, - cache_subdir='models') - model.load_weights(weights_path) - - if old_data_format: - K.set_image_data_format(old_data_format) - return model - - -def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): - """Adds an initial convolution layer (with batch normalization and relu6). - - # Arguments - inputs: Input tensor of shape `(rows, cols, 3)` - (with `channels_last` data format) or - (3, rows, cols) (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution). - alpha: controls the width of the network. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - kernel: An integer or tuple/list of 2 integers, specifying the - width and height of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the width and height. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - - # Input shape - 4D tensor with shape: - `(samples, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `(samples, filters, new_rows, new_cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. - `rows` and `cols` values might have changed due to stride. - - # Returns - Output tensor of block. - """ - channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 - filters = int(filters * alpha) - x = Conv2D(filters, kernel, - padding='same', - use_bias=False, - strides=strides, - name='conv1')(inputs) - x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x) - return Activation(relu6, name='conv1_relu')(x) - - -def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, - depth_multiplier=1, strides=(1, 1), block_id=1): - """Adds a depthwise convolution block. - - A depthwise convolution block consists of a depthwise conv, - batch normalization, relu6, pointwise convolution, - batch normalization and relu6 activation. - - # Arguments - inputs: Input tensor of shape `(rows, cols, channels)` - (with `channels_last` data format) or - (channels, rows, cols) (with `channels_first` data format). - pointwise_conv_filters: Integer, the dimensionality of the output space - (i.e. the number output of filters in the pointwise convolution). - alpha: controls the width of the network. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - depth_multiplier: The number of depthwise convolution output channels - for each input channel. - The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution along the width and height. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - block_id: Integer, a unique identification designating the block number. - - # Input shape - 4D tensor with shape: - `(batch, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(batch, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `(batch, filters, new_rows, new_cols)` if data_format='channels_first' - or 4D tensor with shape: - `(batch, new_rows, new_cols, filters)` if data_format='channels_last'. - `rows` and `cols` values might have changed due to stride. - - # Returns - Output tensor of block. - """ - channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 - pointwise_conv_filters = int(pointwise_conv_filters * alpha) - - x = DepthwiseConv2D((3, 3), - padding='same', - depth_multiplier=depth_multiplier, - strides=strides, - use_bias=False, - name='conv_dw_%d' % block_id)(inputs) - x = BatchNormalization( - axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) - x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x) - - x = Conv2D(pointwise_conv_filters, (1, 1), - padding='same', - use_bias=False, - strides=(1, 1), - name='conv_pw_%d' % block_id)(x) - x = BatchNormalization( - axis=channel_axis, name='conv_pw_%d_bn' % block_id)(x) - return Activation(relu6, name='conv_pw_%d_relu' % block_id)(x) - - -if __name__ == '__main__': - for r in [128, 160, 192, 224]: - for a in [0.25, 0.50, 0.75, 1.0]: - if r == 224: - model = MobileNet(include_top=True, weights='imagenet', - input_shape=(r, r, 3), alpha=a) - - img_path = 'elephant.jpg' - img = image.load_img(img_path, target_size=(r, r)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - x = preprocess_input(x) - print('Input image shape:', x.shape) - - preds = model.predict(x) - print(np.argmax(preds)) - print('Predicted:', decode_predictions(preds, 1)) - - model = MobileNet(include_top=False, weights='imagenet') -# -*- coding: utf-8 -*- -'''MusicTaggerCRNN model for Keras. - -Code by github.com/keunwoochoi. - -# Reference: - -- [Music-auto_tagging-keras](https://github.com/keunwoochoi/music-auto_tagging-keras) - -''' -from __future__ import print_function -from __future__ import absolute_import - -import numpy as np -from keras import backend as K -from keras.layers import Input, Dense -from keras.models import Model -from keras.layers import Dense, Dropout, Reshape, Permute -from keras.layers.convolutional import Convolution2D -from keras.layers.convolutional import MaxPooling2D, ZeroPadding2D -from keras.layers.normalization import BatchNormalization -from keras.layers.advanced_activations import ELU -from keras.layers.recurrent import GRU -from keras.utils.data_utils import get_file -from keras.utils.layer_utils import convert_all_kernels_in_model -from audio_conv_utils import decode_predictions, preprocess_input - -TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.3/music_tagger_crnn_weights_tf_kernels_th_dim_ordering.h5' -TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.3/music_tagger_crnn_weights_tf_kernels_tf_dim_ordering.h5' - - -def MusicTaggerCRNN(weights='msd', input_tensor=None, - include_top=True): - '''Instantiate the MusicTaggerCRNN architecture, - optionally loading weights pre-trained - on Million Song Dataset. Note that when using TensorFlow, - for best performance you should set - `image_dim_ordering="tf"` in your Keras config - at ~/.keras/keras.json. - - The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering - convention used by the model is the one - specified in your Keras config file. - - For preparing mel-spectrogram input, see - `audio_conv_utils.py` in [applications](https://github.com/fchollet/keras/tree/master/keras/applications). - You will need to install [Librosa](http://librosa.github.io/librosa/) - to use it. - - # Arguments - weights: one of `None` (random initialization) - or "msd" (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - include_top: whether to include the 1 fully-connected - layer (output layer) at the top of the network. - If False, the network outputs 32-dim features. - - - # Returns - A Keras model instance. - ''' - if weights not in {'msd', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `msd` ' - '(pre-training on Million Song Dataset).') - - # Determine proper input shape - if K.image_dim_ordering() == 'th': - input_shape = (1, 96, 1366) - else: - input_shape = (96, 1366, 1) - - if input_tensor is None: - melgram_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - melgram_input = Input(tensor=input_tensor, shape=input_shape) - else: - melgram_input = input_tensor - - # Determine input axis - if K.image_dim_ordering() == 'th': - channel_axis = 1 - freq_axis = 2 - time_axis = 3 - else: - channel_axis = 3 - freq_axis = 1 - time_axis = 2 - - # Input block - x = ZeroPadding2D(padding=(0, 37))(melgram_input) - x = BatchNormalization(axis=time_axis, name='bn_0_freq')(x) - - # Conv block 1 - x = Convolution2D(64, 3, 3, border_mode='same', name='conv1')(x) - x = BatchNormalization(axis=channel_axis, mode=0, name='bn1')(x) - x = ELU()(x) - x = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(x) - - # Conv block 2 - x = Convolution2D(128, 3, 3, border_mode='same', name='conv2')(x) - x = BatchNormalization(axis=channel_axis, mode=0, name='bn2')(x) - x = ELU()(x) - x = MaxPooling2D(pool_size=(3, 3), strides=(3, 3), name='pool2')(x) - - # Conv block 3 - x = Convolution2D(128, 3, 3, border_mode='same', name='conv3')(x) - x = BatchNormalization(axis=channel_axis, mode=0, name='bn3')(x) - x = ELU()(x) - x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool3')(x) - - # Conv block 4 - x = Convolution2D(128, 3, 3, border_mode='same', name='conv4')(x) - x = BatchNormalization(axis=channel_axis, mode=0, name='bn4')(x) - x = ELU()(x) - x = MaxPooling2D(pool_size=(4, 4), strides=(4, 4), name='pool4')(x) - - # reshaping - if K.image_dim_ordering() == 'th': - x = Permute((3, 1, 2))(x) - x = Reshape((15, 128))(x) - - # GRU block 1, 2, output - x = GRU(32, return_sequences=True, name='gru1')(x) - x = GRU(32, return_sequences=False, name='gru2')(x) - - if include_top: - x = Dense(50, activation='sigmoid', name='output')(x) - - # Create model - model = Model(melgram_input, x) - if weights is None: - return model - else: - # Load weights - if K.image_dim_ordering() == 'tf': - weights_path = get_file('music_tagger_crnn_weights_tf_kernels_tf_dim_ordering.h5', - TF_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('music_tagger_crnn_weights_tf_kernels_th_dim_ordering.h5', - TH_WEIGHTS_PATH, - cache_subdir='models') - model.load_weights(weights_path, by_name=True) - if K.backend() == 'theano': - convert_all_kernels_in_model(model) - return model - - -if __name__ == '__main__': - model = MusicTaggerCRNN(weights='msd') - - audio_path = 'audio_file.mp3' - melgram = preprocess_input(audio_path) - melgrams = np.expand_dims(melgram, axis=0) - - preds = model.predict(melgrams) - print('Predicted:') - print(decode_predictions(preds)) -# -*- coding: utf-8 -*- -'''ResNet50 model for Keras. - -# Reference: - -- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) - -Adapted from code contributed by BigMoyan. -''' -from __future__ import print_function - -import numpy as np -import warnings - -from keras.layers import Input -from keras import layers -from keras.layers import Dense -from keras.layers import Activation -from keras.layers import Flatten -from keras.layers import Conv2D -from keras.layers import MaxPooling2D -from keras.layers import GlobalMaxPooling2D -from keras.layers import ZeroPadding2D -from keras.layers import AveragePooling2D -from keras.layers import GlobalAveragePooling2D -from keras.layers import BatchNormalization -from keras.models import Model -from keras.preprocessing import image -import keras.backend as K -from keras.utils import layer_utils -from keras.utils.data_utils import get_file -from keras.applications.imagenet_utils import decode_predictions -from keras.applications.imagenet_utils import preprocess_input -from keras.applications.imagenet_utils import _obtain_input_shape -from keras.engine.topology import get_source_inputs - - -WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5' -WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' - - -def identity_block(input_tensor, kernel_size, filters, stage, block): - """The identity block is the block that has no conv layer at shortcut. - - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the filterss of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - - # Returns - Output tensor for the block. - """ - filters1, filters2, filters3 = filters - if K.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) - x = Activation('relu')(x) - - x = Conv2D(filters2, kernel_size, - padding='same', name=conv_name_base + '2b')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) - x = Activation('relu')(x) - - x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - - x = layers.add([x, input_tensor]) - x = Activation('relu')(x) - return x - - -def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): - """conv_block is the block that has a conv layer at shortcut - - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the filterss of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - - # Returns - Output tensor for the block. - - Note that from stage 3, the first conv layer at main path is with strides=(2,2) - And the shortcut should have strides=(2,2) as well - """ - filters1, filters2, filters3 = filters - if K.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Conv2D(filters1, (1, 1), strides=strides, - name=conv_name_base + '2a')(input_tensor) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) - x = Activation('relu')(x) - - x = Conv2D(filters2, kernel_size, padding='same', - name=conv_name_base + '2b')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) - x = Activation('relu')(x) - - x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - - shortcut = Conv2D(filters3, (1, 1), strides=strides, - name=conv_name_base + '1')(input_tensor) - shortcut = BatchNormalization( - axis=bn_axis, name=bn_name_base + '1')(shortcut) - - x = layers.add([x, shortcut]) - x = Activation('relu')(x) - return x - - -def ResNet50(include_top=True, weights='imagenet', - input_tensor=None, input_shape=None, - pooling=None, - classes=1000): - """Instantiates the ResNet50 architecture. - - Optionally loads weights pre-trained - on ImageNet. Note that when using TensorFlow, - for best performance you should set - `image_data_format="channels_last"` in your Keras config - at ~/.keras/keras.json. - - The model and the weights are compatible with both - TensorFlow and Theano. The data format - convention used by the model is the one - specified in your Keras config file. - - # Arguments - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization) - or "imagenet" (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `channels_last` data format) - or `(3, 224, 244)` (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 197. - E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as imagenet with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=224, - min_size=197, - data_format=K.image_data_format(), - include_top=include_top) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - if K.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - - x = ZeroPadding2D((3, 3))(img_input) - x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) - x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) - x = Activation('relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) - x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') - x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - - x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') - - x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') - - x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - - x = AveragePooling2D((7, 7), name='avg_pool')(x) - - if include_top: - x = Flatten()(x) - x = Dense(classes, activation='softmax', name='fc1000')(x) - else: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = Model(inputs, x, name='resnet50') - - # load weights - if weights == 'imagenet': - if include_top: - weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models', - md5_hash='a7b3fe01876f51b976af0dea6bc144eb') - else: - weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='a268eb855778b3df3c7506639542a6af') - model.load_weights(weights_path) - if K.backend() == 'theano': - layer_utils.convert_all_kernels_in_model(model) - - if K.image_data_format() == 'channels_first': - if include_top: - maxpool = model.get_layer(name='avg_pool') - shape = maxpool.output_shape[1:] - dense = model.get_layer(name='fc1000') - layer_utils.convert_dense_weights_data_format( - dense, shape, 'channels_first') - - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image data format convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - return model - - -if __name__ == '__main__': - model = ResNet50(include_top=True, weights='imagenet') - - img_path = 'elephant.jpg' - img = image.load_img(img_path, target_size=(224, 224)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - x = preprocess_input(x) - print('Input image shape:', x.shape) - - preds = model.predict(x) - print('Predicted:', decode_predictions(preds)) -# -*- coding: utf-8 -*- -'''VGG16 model for Keras. - -# Reference: - -- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) - -''' -from __future__ import print_function - -import numpy as np -import warnings - -from keras.models import Model -from keras.layers import Flatten -from keras.layers import Dense -from keras.layers import Input -from keras.layers import Conv2D -from keras.layers import MaxPooling2D -from keras.layers import GlobalMaxPooling2D -from keras.layers import GlobalAveragePooling2D -from keras.preprocessing import image -from keras.utils import layer_utils -from keras.utils.data_utils import get_file -from keras import backend as K -from keras.applications.imagenet_utils import decode_predictions -from keras.applications.imagenet_utils import preprocess_input -from keras.applications.imagenet_utils import _obtain_input_shape -from keras.engine.topology import get_source_inputs - - -WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5' -WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5' - - -def VGG16(include_top=True, weights='imagenet', - input_tensor=None, input_shape=None, - pooling=None, - classes=1000): - """Instantiates the VGG16 architecture. - - Optionally loads weights pre-trained - on ImageNet. Note that when using TensorFlow, - for best performance you should set - `image_data_format="channels_last"` in your Keras config - at ~/.keras/keras.json. - - The model and the weights are compatible with both - TensorFlow and Theano. The data format - convention used by the model is the one - specified in your Keras config file. - - # Arguments - include_top: whether to include the 3 fully-connected - layers at the top of the network. - weights: one of `None` (random initialization) - or "imagenet" (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `channels_last` data format) - or `(3, 224, 244)` (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 48. - E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as imagenet with `include_top`' - ' as true, `classes` should be 1000') - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=224, - min_size=48, - data_format=K.image_data_format(), - include_top=include_top) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - # Block 1 - x = Conv2D(64, (3, 3), activation='relu', padding='same', - name='block1_conv1')(img_input) - x = Conv2D(64, (3, 3), activation='relu', - padding='same', name='block1_conv2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = Conv2D(128, (3, 3), activation='relu', - padding='same', name='block2_conv1')(x) - x = Conv2D(128, (3, 3), activation='relu', - padding='same', name='block2_conv2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = Conv2D(256, (3, 3), activation='relu', - padding='same', name='block3_conv1')(x) - x = Conv2D(256, (3, 3), activation='relu', - padding='same', name='block3_conv2')(x) - x = Conv2D(256, (3, 3), activation='relu', - padding='same', name='block3_conv3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block4_conv1')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block4_conv2')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block4_conv3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block5_conv1')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block5_conv2')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block5_conv3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) - - if include_top: - # Classification block - x = Flatten(name='flatten')(x) - x = Dense(4096, activation='relu', name='fc1')(x) - x = Dense(4096, activation='relu', name='fc2')(x) - x = Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = Model(inputs, x, name='vgg16') - - # load weights - if weights == 'imagenet': - if include_top: - weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) - if K.backend() == 'theano': - layer_utils.convert_all_kernels_in_model(model) - - if K.image_data_format() == 'channels_first': - if include_top: - maxpool = model.get_layer(name='block5_pool') - shape = maxpool.output_shape[1:] - dense = model.get_layer(name='fc1') - layer_utils.convert_dense_weights_data_format( - dense, shape, 'channels_first') - - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image data format convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - return model - - -if __name__ == '__main__': - model = VGG16(include_top=True, weights='imagenet') - - img_path = 'elephant.jpg' - img = image.load_img(img_path, target_size=(224, 224)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - x = preprocess_input(x) - print('Input image shape:', x.shape) - - preds = model.predict(x) - print('Predicted:', decode_predictions(preds)) -# -*- coding: utf-8 -*- -'''VGG19 model for Keras. - -# Reference: - -- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) - -''' -from __future__ import print_function - -import numpy as np -import warnings - -from keras.models import Model -from keras.layers import Flatten, Dense, Input -from keras.layers import Conv2D -from keras.layers import MaxPooling2D -from keras.layers import GlobalMaxPooling2D -from keras.layers import GlobalAveragePooling2D -from keras.preprocessing import image -from keras.utils import layer_utils -from keras.utils.data_utils import get_file -from keras import backend as K -from keras.applications.imagenet_utils import decode_predictions -from keras.applications.imagenet_utils import preprocess_input -from keras.applications.imagenet_utils import _obtain_input_shape -from keras.engine.topology import get_source_inputs - - -WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels.h5' -WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5' - - -def VGG19(include_top=True, weights='imagenet', - input_tensor=None, input_shape=None, - pooling=None, - classes=1000): - """Instantiates the VGG19 architecture. - - Optionally loads weights pre-trained - on ImageNet. Note that when using TensorFlow, - for best performance you should set - `image_data_format="channels_last"` in your Keras config - at ~/.keras/keras.json. - - The model and the weights are compatible with both - TensorFlow and Theano. The data format - convention used by the model is the one - specified in your Keras config file. - - # Arguments - include_top: whether to include the 3 fully-connected - layers at the top of the network. - weights: one of `None` (random initialization) - or "imagenet" (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `channels_last` data format) - or `(3, 224, 244)` (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 48. - E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as imagenet with `include_top`' - ' as true, `classes` should be 1000') - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=224, - min_size=48, - data_format=K.image_data_format(), - include_top=include_top) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - # Block 1 - x = Conv2D(64, (3, 3), activation='relu', padding='same', - name='block1_conv1')(img_input) - x = Conv2D(64, (3, 3), activation='relu', - padding='same', name='block1_conv2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = Conv2D(128, (3, 3), activation='relu', - padding='same', name='block2_conv1')(x) - x = Conv2D(128, (3, 3), activation='relu', - padding='same', name='block2_conv2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = Conv2D(256, (3, 3), activation='relu', - padding='same', name='block3_conv1')(x) - x = Conv2D(256, (3, 3), activation='relu', - padding='same', name='block3_conv2')(x) - x = Conv2D(256, (3, 3), activation='relu', - padding='same', name='block3_conv3')(x) - x = Conv2D(256, (3, 3), activation='relu', - padding='same', name='block3_conv4')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block4_conv1')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block4_conv2')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block4_conv3')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block4_conv4')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block5_conv1')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block5_conv2')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block5_conv3')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block5_conv4')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) - - if include_top: - # Classification block - x = Flatten(name='flatten')(x) - x = Dense(4096, activation='relu', name='fc1')(x) - x = Dense(4096, activation='relu', name='fc2')(x) - x = Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = Model(inputs, x, name='vgg19') - - # load weights - if weights == 'imagenet': - if include_top: - weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) - if K.backend() == 'theano': - layer_utils.convert_all_kernels_in_model(model) - - if K.image_data_format() == 'channels_first': - if include_top: - maxpool = model.get_layer(name='block5_pool') - shape = maxpool.output_shape[1:] - dense = model.get_layer(name='fc1') - layer_utils.convert_dense_weights_data_format( - dense, shape, 'channels_first') - - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image data format convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - return model - - -if __name__ == '__main__': - model = VGG19(include_top=True, weights='imagenet') - - img_path = 'cat.jpg' - img = image.load_img(img_path, target_size=(224, 224)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - x = preprocess_input(x) - print('Input image shape:', x.shape) - - preds = model.predict(x) - print('Predicted:', decode_predictions(preds)) -import os -import sys - -list_conversions = [('import keras.', 'import tensorflow.keras.'), - ('import keras ', 'from tensorflow import keras '), - ('import keras\n', 'from tensorflow import keras\n'), - ('from keras.', 'from tensorflow.keras.'), - ('from keras ', 'from tensorflow.keras ')] - - -def replace_imports_in_text(string, revert): - if revert: - list_imports_to_change = [x[::-1] for x in list_conversions] - else: - list_imports_to_change = list_conversions - - text_updated = string - for old_str, new_str in list_imports_to_change: - text_updated = text_updated.replace(old_str, new_str) - return text_updated - - -def replace_imports_in_file(file_path, revert): - if not file_path.endswith('.py'): - return False - if os.path.abspath(file_path) == os.path.abspath(__file__): - return False - with open(file_path, 'r') as f: - text = f.read() - - text_updated = replace_imports_in_text(text, revert) - - with open(file_path, 'w+') as f: - f.write(text_updated) - - return text_updated != text - - -def convert_codebase(revert): - nb_of_files_changed = 0 - keras_dir = os.path.dirname(os.path.abspath(__file__)) - for root, dirs, files in os.walk(keras_dir): - for name in files: - if replace_imports_in_file(os.path.join(root, name), revert): - nb_of_files_changed += 1 - print('Changed imports in ' + str(nb_of_files_changed) + ' files.') - print('Those files were found in the directory ' + keras_dir) - - -def convert_to_tf_keras(): - """Convert the codebase to tf.keras""" - convert_codebase(False) - - -def convert_to_keras_team_keras(): - """Convert the codebase from tf.keras to keras-team/keras""" - convert_codebase(True) - - -def test_replace_imports(): - python_code = """ - import keras - from keras import backend as K - import os - import keras_contrib - import keras_contrib.layers as lay - import keras.layers - from keras.layers import Dense - - if K.backend() == 'tensorflow': - import tensorflow as tf - function = tf.max - """ - - expected_code = """ - from tensorflow import keras - from tensorflow.keras import backend as K - import os - import keras_contrib - import keras_contrib.layers as lay - import tensorflow.keras.layers - from tensorflow.keras.layers import Dense - - if K.backend() == 'tensorflow': - import tensorflow as tf - function = tf.max - """ - - code_with_replacement = replace_imports_in_text(python_code, False) - assert expected_code == code_with_replacement - assert python_code == replace_imports_in_text(code_with_replacement, True) - - -if __name__ == '__main__': - if '--revert' in sys.argv: - convert_to_keras_team_keras() - else: - convert_to_tf_keras() -from setuptools import setup -from setuptools import find_packages -import os - - -if os.environ.get('USE_TF_KERAS', None) == '1': - name = 'tf_keras_contrib' - install_requires = [] -else: - name = 'keras_contrib' - install_requires = ['keras'] - -setup(name=name, - version='2.0.8', - description='Keras Deep Learning for Python, Community Contributions', - author='Fariz Rahman', - author_email='farizrahman4u@gmail.com', - url='https://github.com/farizrahman4u/keras-contrib', - license='MIT', - install_requires=install_requires, - extras_require={ - 'h5py': ['h5py'], - 'visualize': ['pydot>=1.2.0'], - 'tests': ['pytest', - 'pytest-pep8', - 'pytest-xdist', - 'pytest-cov'], - }, - classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules' - ], - packages=find_packages()) -'''Train a simple deep CNN on the CIFAR10 small images dataset using -a triangular cyclic learning rate (CLR) policy. -It gets to 75% validation accuracy in 15 epochs, and 79% after 40 epochs; -compare to 25 and 50 epochs respectively without CLR. -''' - -from __future__ import print_function -from __future__ import absolute_import -import keras -from keras.datasets import cifar10 -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Conv2D, MaxPooling2D -from keras_contrib.callbacks import CyclicLR - -import os - -batch_size = 100 -epochs = 50 -num_classes = 10 -data_augmentation = True -num_predictions = 20 -save_dir = os.path.join(os.getcwd(), 'saved_models') -model_name = 'keras_cifar10_trained_model.h5' - -# The data, split between train and test sets: -(x_train, y_train), (x_test, y_test) = cifar10.load_data() -print('x_train shape:', x_train.shape) -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices. -y_train = keras.utils.to_categorical(y_train, num_classes) -y_test = keras.utils.to_categorical(y_test, num_classes) - -model = Sequential() -model.add(Conv2D(32, (3, 3), padding='same', - input_shape=x_train.shape[1:])) -model.add(Activation('relu')) -model.add(Conv2D(32, (3, 3))) -model.add(Activation('relu')) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Conv2D(64, (3, 3), padding='same')) -model.add(Activation('relu')) -model.add(Conv2D(64, (3, 3))) -model.add(Activation('relu')) -model.add(MaxPooling2D(pool_size=(2, 2))) -model.add(Dropout(0.25)) - -model.add(Flatten()) -model.add(Dense(512)) -model.add(Activation('relu')) -model.add(Dropout(0.5)) -model.add(Dense(num_classes)) -model.add(Activation('softmax')) - -# initiate RMSprop optimizer -opt = keras.optimizers.rmsprop(lr=0.0001, decay=1e-6) - -# initiate CyclicLR LR scheduler -clr = CyclicLR( - base_lr=0.0001, - max_lr=0.0005, - step_size=2000, - mode='triangular') - - -# Let's train the model using RMSprop -model.compile(loss='categorical_crossentropy', - optimizer=opt, - metrics=['accuracy']) - -x_train = x_train.astype('float32') -x_test = x_test.astype('float32') -x_train /= 255 -x_test /= 255 - -if not data_augmentation: - print('Not using data augmentation.') - model.fit(x_train, y_train, - batch_size=batch_size, - epochs=epochs, - validation_data=(x_test, y_test), - callbacks=[clr], - shuffle=True) -else: - print('Using real-time data augmentation.') - # This will do preprocessing and realtime data augmentation: - datagen = ImageDataGenerator( - featurewise_center=False, # set input mean to 0 over the dataset - samplewise_center=False, # set each sample mean to 0 - featurewise_std_normalization=False, # divide inputs by std of the dataset - samplewise_std_normalization=False, # divide each input by its std - zca_whitening=False, # apply ZCA whitening - zca_epsilon=1e-06, # epsilon for ZCA whitening - rotation_range=0, - # randomly rotate images in the range (degrees, 0 to 180) - # randomly shift images horizontally (fraction of total width) - width_shift_range=0.1, - # randomly shift images vertically (fraction of total height) - height_shift_range=0.1, - shear_range=0., # set range for random shear - zoom_range=0., # set range for random zoom - channel_shift_range=0., # set range for random channel shifts - # set mode for filling points outside the input boundaries - fill_mode='nearest', - cval=0., # value used for fill_mode = "constant" - horizontal_flip=True, # randomly flip images - vertical_flip=False, # randomly flip images - # set rescaling factor (applied before any other transformation) - rescale=None, - # set function that will be applied on each input - preprocessing_function=None, - # image data format, either "channels_first" or "channels_last" - data_format=None, - # fraction of images reserved for validation (strictly between 0 and 1) - validation_split=0.0) - - # Compute quantities required for feature-wise normalization - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(x_train) - - # Fit the model on the batches generated by datagen.flow(). - - model.fit_generator(datagen.flow(x_train, y_train, - batch_size=batch_size), - epochs=epochs, - validation_data=(x_test, y_test), - callbacks=[clr], - workers=4) - -# Save model and weights -if not os.path.isdir(save_dir): - os.makedirs(save_dir) -model_path = os.path.join(save_dir, model_name) -model.save(model_path) -print('Saved trained model at %s ' % model_path) - -# Score trained model. -scores = model.evaluate(x_test, y_test, verbose=1) -print('Test loss:', scores[0]) -print('Test accuracy:', scores[1]) -''' -Trains a DenseNet-40-12 model on the CIFAR-10 Dataset. - -Gets a 94.84% accuracy score after 100 epochs. -''' -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import numpy as np - -from keras import backend as K -from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping -from keras.datasets import cifar10 -from keras.optimizers import Adam -from keras.preprocessing.image import ImageDataGenerator -from keras.utils import np_utils -from keras_contrib.applications import DenseNet - -batch_size = 64 -nb_classes = 10 -epochs = 100 - -img_rows, img_cols = 32, 32 -img_channels = 3 - -# Parameters for the DenseNet model builder -if K.image_data_format() == 'channels_first': - img_dim = (img_channels, img_rows, img_cols) -else: - img_dim = (img_rows, img_cols, img_channels) -depth = 40 -nb_dense_block = 3 -growth_rate = 12 -nb_filter = 16 -dropout_rate = 0.0 # 0.0 for data augmentation - -# Create the model (without loading weights) -model = DenseNet(depth=depth, nb_dense_block=nb_dense_block, - growth_rate=growth_rate, nb_filter=nb_filter, - dropout_rate=dropout_rate, - input_shape=img_dim, - weights=None) -print('Model created') - -model.summary() - -optimizer = Adam(lr=1e-3) # Using Adam instead of SGD to speed up training -model.compile(loss='categorical_crossentropy', - optimizer=optimizer, metrics=['acc']) -print('Finished compiling') - -(trainX, trainY), (testX, testY) = cifar10.load_data() - -trainX = trainX.astype('float32') -testX = testX.astype('float32') - -trainX /= 255. -testX /= 255. - -Y_train = np_utils.to_categorical(trainY, nb_classes) -Y_test = np_utils.to_categorical(testY, nb_classes) - -generator = ImageDataGenerator(rotation_range=15, - width_shift_range=5. / 32, - height_shift_range=5. / 32) - -generator.fit(trainX, seed=0) - -weights_file = 'DenseNet-40-12-CIFAR-10.h5' - -lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), - cooldown=0, patience=10, min_lr=0.5e-6) -early_stopper = EarlyStopping(monitor='val_acc', min_delta=1e-4, patience=20) -model_checkpoint = ModelCheckpoint(weights_file, monitor='val_acc', - save_best_only=True, - save_weights_only=True, mode='auto') - -callbacks = [lr_reducer, early_stopper, model_checkpoint] - -model.fit_generator(generator.flow(trainX, Y_train, batch_size=batch_size), - steps_per_epoch=len(trainX) // batch_size, - epochs=epochs, - callbacks=callbacks, - validation_data=(testX, Y_test), - verbose=2) - -scores = model.evaluate(testX, Y_test, batch_size=batch_size) -print('Test loss : ', scores[0]) -print('Test accuracy : ', scores[1]) -""" -Adapted from keras example cifar10_cnn.py -Train NASNet-CIFAR on the CIFAR10 small images dataset. -""" -from __future__ import print_function -from keras.datasets import cifar10 -from keras.preprocessing.image import ImageDataGenerator -from keras.utils import np_utils -from keras.callbacks import ModelCheckpoint -from keras.callbacks import ReduceLROnPlateau -from keras.callbacks import CSVLogger -from keras.optimizers import Adam -from keras_contrib.applications.nasnet import NASNetCIFAR, preprocess_input - -import numpy as np - - -weights_file = 'NASNet-CIFAR-10.h5' -lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.5), - cooldown=0, - patience=5, - min_lr=0.5e-5) -csv_logger = CSVLogger('NASNet-CIFAR-10.csv') -model_checkpoint = ModelCheckpoint(weights_file, - monitor='val_predictions_acc', - save_best_only=True, - save_weights_only=True, mode='max') - -batch_size = 128 -nb_classes = 10 -nb_epoch = 600 -data_augmentation = True - -# input image dimensions -img_rows, img_cols = 32, 32 -# The CIFAR10 images are RGB. -img_channels = 3 - -# The data, shuffled and split between train and test sets: -(X_train, y_train), (X_test, y_test) = cifar10.load_data() - -# Convert class vectors to binary class matrices. -Y_train = np_utils.to_categorical(y_train, nb_classes) -Y_test = np_utils.to_categorical(y_test, nb_classes) - -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') - -# preprocess input -X_train = preprocess_input(X_train) -X_test = preprocess_input(X_test) - -# For training, the auxilary branch must be used to correctly train NASNet -model = NASNetCIFAR((img_rows, img_cols, img_channels), - use_auxilary_branch=True) -model.summary() - -optimizer = Adam(lr=1e-3, clipnorm=5) -model.compile(loss=['categorical_crossentropy', 'categorical_crossentropy'], - optimizer=optimizer, metrics=['accuracy'], loss_weights=[1.0, 0.4]) - -# model.load_weights('NASNet-CIFAR-10.h5', by_name=True) - -if not data_augmentation: - print('Not using data augmentation.') - model.fit(X_train, [Y_train, Y_train], - batch_size=batch_size, - epochs=nb_epoch, - validation_data=(X_test, [Y_test, Y_test]), - shuffle=True, - verbose=2, - callbacks=[lr_reducer, csv_logger, model_checkpoint]) -else: - print('Using real-time data augmentation.') - # This will do preprocessing and realtime data augmentation: - datagen = ImageDataGenerator( - featurewise_center=False, # set input mean to 0 over the dataset - samplewise_center=False, # set each sample mean to 0 - featurewise_std_normalization=False, # divide inputs by std of the dataset - samplewise_std_normalization=False, # divide each input by its std - zca_whitening=False, # apply ZCA whitening - # randomly rotate images in the range (degrees, 0 to 180) - rotation_range=0, - width_shift_range=0.1, - height_shift_range=0.1, - horizontal_flip=True, # randomly flip images - vertical_flip=False) # randomly flip images - - # Compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(X_train) - - # wrap the ImageDataGenerator to yield - # two label batches [y, y] for each input batch X - # When training a NASNet model, we have to use its auxilary training head - # Therefore the model is technically a 1 input - 2 output model, and requires - # the label to be duplicated for the auxilary head - def image_data_generator_wrapper(image_datagenerator, batch_size): - iterator = datagen.flow(X_train, Y_train, batch_size=batch_size) - - while True: - X, y = next(iterator) # get the next batch - yield X, [y, y] # duplicate the labels for each batch - - # Fit the model on the batches generated by datagen.flow(). - model.fit_generator(image_data_generator_wrapper(datagen, batch_size), - steps_per_epoch=X_train.shape[0] // batch_size, - validation_data=(X_test, [Y_test, Y_test]), - epochs=nb_epoch, verbose=2, - callbacks=[lr_reducer, csv_logger, model_checkpoint]) - -scores = model.evaluate(X_test, [Y_test, Y_test], batch_size=batch_size) -for score, metric_name in zip(scores, model.metrics_names): - print("%s : %0.4f" % (metric_name, score)) -""" -Adapted from keras example cifar10_cnn.py and github.com/raghakot/keras-resnet -Train ResNet-18 on the CIFAR10 small images dataset. - -GPU run command with Theano backend (with TensorFlow, the GPU is automatically used): - THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10.py -""" -from __future__ import print_function -from keras.datasets import cifar10 -from keras.preprocessing.image import ImageDataGenerator -from keras.utils import np_utils -from keras.callbacks import ModelCheckpoint -from keras.callbacks import ReduceLROnPlateau -from keras.callbacks import CSVLogger -from keras.callbacks import EarlyStopping -from keras_contrib.applications.resnet import ResNet18 - -import numpy as np - - -weights_file = 'ResNet18v2-CIFAR-10.h5' -lr_reducer = ReduceLROnPlateau(factor=np.sqrt(0.1), cooldown=0, - patience=5, min_lr=0.5e-6) -early_stopper = EarlyStopping(min_delta=0.001, patience=10) -csv_logger = CSVLogger('ResNet18v2-CIFAR-10.csv') -model_checkpoint = ModelCheckpoint(weights_file, monitor='val_acc', save_best_only=True, - save_weights_only=True, mode='auto') - -batch_size = 32 -nb_classes = 10 -nb_epoch = 200 -data_augmentation = True - -# input image dimensions -img_rows, img_cols = 32, 32 -# The CIFAR10 images are RGB. -img_channels = 3 - -# The data, shuffled and split between train and test sets: -(X_train, y_train), (X_test, y_test) = cifar10.load_data() - -# Convert class vectors to binary class matrices. -Y_train = np_utils.to_categorical(y_train, nb_classes) -Y_test = np_utils.to_categorical(y_test, nb_classes) - -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') - -# subtract mean and normalize -mean_image = np.mean(X_train, axis=0) -X_train -= mean_image -X_test -= mean_image -X_train /= 128. -X_test /= 128. - -model = ResNet18((img_rows, img_cols, img_channels), nb_classes) -model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - -if not data_augmentation: - print('Not using data augmentation.') - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - validation_data=(X_test, Y_test), - shuffle=True, - callbacks=[lr_reducer, early_stopper, csv_logger, model_checkpoint]) -else: - print('Using real-time data augmentation.') - # This will do preprocessing and realtime data augmentation: - datagen = ImageDataGenerator( - featurewise_center=False, # set input mean to 0 over the dataset - samplewise_center=False, # set each sample mean to 0 - featurewise_std_normalization=False, # divide inputs by std of the dataset - samplewise_std_normalization=False, # divide each input by its std - zca_whitening=False, # apply ZCA whitening - # randomly rotate images in the range (degrees, 0 to 180) - rotation_range=0, - width_shift_range=0.1, # randomly shift images horizontally - height_shift_range=0.1, # randomly shift images vertically - horizontal_flip=True, # randomly flip images - vertical_flip=False) # randomly flip images - - # Compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(X_train) - - callbacks = [lr_reducer, early_stopper, csv_logger, model_checkpoint] - # Fit the model on the batches generated by datagen.flow(). - model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), - steps_per_epoch=X_train.shape[0] // batch_size, - validation_data=(X_test, Y_test), - epochs=nb_epoch, verbose=2, - callbacks=callbacks) - -scores = model.evaluate(X_test, Y_test, batch_size=batch_size) -print('Test loss : ', scores[0]) -print('Test accuracy : ', scores[1]) -''' -Trains a Residual-of-Residual Network (WRN-40-2) model on the CIFAR-10 Dataset. - -Gets a 94.53% accuracy score after 150 epochs. -''' - -import keras.callbacks as callbacks -import keras.utils.np_utils as kutils -from keras.datasets import cifar10 -from keras.preprocessing.image import ImageDataGenerator -from keras.optimizers import Adam - -from keras_contrib.applications import ResidualOfResidual - -batch_size = 64 -epochs = 150 -img_rows, img_cols = 32, 32 - -(trainX, trainY), (testX, testY) = cifar10.load_data() - -trainX = trainX.astype('float32') -testX = testX.astype('float32') - -trainX /= 255 -testX /= 255 - -tempY = testY -trainY = kutils.to_categorical(trainY) -testY = kutils.to_categorical(testY) - -generator = ImageDataGenerator(rotation_range=15, - width_shift_range=5. / 32, - height_shift_range=5. / 32) - -generator.fit(trainX, seed=0) - -model = ResidualOfResidual(depth=40, width=2, dropout_rate=0.0, weights=None) - -optimizer = Adam(lr=1e-3) - -model.compile(loss='categorical_crossentropy', - optimizer=optimizer, metrics=['acc']) -print('Finished compiling') - -checkpoint = callbacks.ModelCheckpoint('weights/RoR-WRN-40-2-Weights.h5', - monitor='val_acc', - save_best_only=True, - save_weights_only=True) -model.fit_generator(generator.flow(trainX, trainY, batch_size=batch_size), - steps_per_epoch=len(trainX) // batch_size, - epochs=epochs, - callbacks=[checkpoint], - validation_data=(testX, testY), - verbose=2) - -scores = model.evaluate(testX, testY, batch_size) -print('Test loss : ', scores[0]) -print('Test accuracy : ', scores[1]) -''' -Trains a WRN-28-8 model on the CIFAR-10 Dataset. - -Performance is slightly less than the paper, since -they use WRN-28-10 model (95.83%). - -Gets a 95.54% accuracy score after 300 epochs. -''' -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -from keras.datasets import cifar10 -import keras.callbacks as callbacks -import keras.utils.np_utils as kutils -from keras.preprocessing.image import ImageDataGenerator - -from keras_contrib.applications.wide_resnet import WideResidualNetwork - -batch_size = 64 -epochs = 300 -img_rows, img_cols = 32, 32 - -(trainX, trainY), (testX, testY) = cifar10.load_data() - -trainX = trainX.astype('float32') -trainX /= 255.0 -testX = testX.astype('float32') -testX /= 255.0 - -tempY = testY -trainY = kutils.to_categorical(trainY) -testY = kutils.to_categorical(testY) - -generator = ImageDataGenerator(rotation_range=10, - width_shift_range=5. / 32, - height_shift_range=5. / 32, - horizontal_flip=True) - -generator.fit(trainX, seed=0, augment=True) - -# We will be training the model, therefore no need to load weights -model = WideResidualNetwork(depth=28, width=8, dropout_rate=0.0, weights=None) - -model.summary() - -model.compile(loss='categorical_crossentropy', - optimizer='adam', metrics=['acc']) -print('Finished compiling') -model_checkpoint = callbacks.ModelCheckpoint('WRN-28-8 Weights.h5', - monitor='val_acc', - save_best_only=True, - save_weights_only=True) -model.fit_generator(generator.flow(trainX, trainY, batch_size=batch_size), - steps_per_epoch=len(trainX) // batch_size, - epochs=epochs, - callbacks=[model_checkpoint], - validation_data=(testX, testY)) - -scores = model.evaluate(testX, testY, batch_size) -print('Test loss : %0.5f' % (scores[0])) -print('Test accuracy = %0.5f' % (scores[1])) -"""Train CRF and BiLSTM-CRF on CONLL2000 chunking data, -similar to https://arxiv.org/pdf/1508.01991v1.pdf. -""" -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import numpy -from collections import Counter - -from keras.models import Sequential -from keras.layers import Embedding, Bidirectional, LSTM -from keras_contrib.layers import CRF -from keras_contrib.losses import crf_loss -from keras_contrib.metrics import crf_viterbi_accuracy -from keras_contrib.datasets import conll2000 - -EPOCHS = 10 -EMBED_DIM = 200 -BiRNN_UNITS = 200 - - -def classification_report(y_true, y_pred, labels): - '''Similar to the one in sklearn.metrics, - reports per classs recall, precision and F1 score''' - y_true = numpy.asarray(y_true).ravel() - y_pred = numpy.asarray(y_pred).ravel() - corrects = Counter(yt for yt, yp in zip(y_true, y_pred) if yt == yp) - y_true_counts = Counter(y_true) - y_pred_counts = Counter(y_pred) - report = ((lab, # label - corrects[i] / max(1, y_true_counts[i]), # recall - corrects[i] / max(1, y_pred_counts[i]), # precision - y_true_counts[i] # support - ) for i, lab in enumerate(labels)) - report = [(l, r, p, 2 * r * p / max(1e-9, r + p), s) - for l, r, p, s in report] - - print('{:<15}{:>10}{:>10}{:>10}{:>10}\n'.format('', - 'recall', - 'precision', - 'f1-score', - 'support')) - formatter = '{:<15}{:>10.2f}{:>10.2f}{:>10.2f}{:>10d}'.format - for r in report: - print(formatter(*r)) - print('') - report2 = list(zip(*[(r * s, p * s, f1 * s) for l, r, p, f1, s in report])) - N = len(y_true) - print(formatter('avg / total', - sum(report2[0]) / N, - sum(report2[1]) / N, - sum(report2[2]) / N, N) + '\n') - - -# ------ -# Data -# ----- - -# conll200 has two different targets, here will only use -# IBO like chunking as an example -train, test, voc = conll2000.load_data() -(train_x, _, train_y) = train -(test_x, _, test_y) = test -(vocab, _, class_labels) = voc - -# -------------- -# 1. Regular CRF -# -------------- - -print('==== training CRF ====') - -model = Sequential() -model.add(Embedding(len(vocab), EMBED_DIM, mask_zero=True)) # Random embedding -crf = CRF(len(class_labels), sparse_target=True) -model.add(crf) -model.summary() - -# The default `crf_loss` for `learn_mode='join'` is negative log likelihood. -model.compile('adam', loss=crf_loss, metrics=[crf_viterbi_accuracy]) -model.fit(train_x, train_y, epochs=EPOCHS, validation_data=[test_x, test_y]) - -test_y_pred = model.predict(test_x).argmax(-1)[test_x > 0] -test_y_true = test_y[test_x > 0] - -print('\n---- Result of CRF ----\n') -classification_report(test_y_true, test_y_pred, class_labels) - -# ------------- -# 2. BiLSTM-CRF -# ------------- - -print('==== training BiLSTM-CRF ====') - -model = Sequential() -model.add(Embedding(len(vocab), EMBED_DIM, mask_zero=True)) # Random embedding -model.add(Bidirectional(LSTM(BiRNN_UNITS // 2, return_sequences=True))) -crf = CRF(len(class_labels), sparse_target=True) -model.add(crf) -model.summary() - -model.compile('adam', loss=crf_loss, metrics=[crf_viterbi_accuracy]) -model.fit(train_x, train_y, epochs=EPOCHS, validation_data=[test_x, test_y]) - -test_y_pred = model.predict(test_x).argmax(-1)[test_x > 0] -test_y_true = test_y[test_x > 0] - -print('\n---- Result of BiLSTM-CRF ----\n') -classification_report(test_y_true, test_y_pred, class_labels) -"""An implementation of the improved WGAN described in https://arxiv.org/abs/1704.00028 - -The improved WGAN has a term in the loss function which penalizes the network if its -gradient norm moves away from 1. This is included because the Earth Mover (EM) distance -used in WGANs is only easy to calculate for 1-Lipschitz functions (i.e. functions where -the gradient norm has a constant upper bound of 1). - -The original WGAN paper enforced this by clipping weights to very small values -[-0.01, 0.01]. However, this drastically reduced network capacity. Penalizing the -gradient norm is more natural, but this requires second-order gradients. These are not -supported for some tensorflow ops (particularly MaxPool and AveragePool) in the current -release (1.0.x), but they are supported in the current nightly builds -(1.1.0-rc1 and higher). - -To avoid this, this model uses strided convolutions instead of Average/Maxpooling for -downsampling. If you wish to use pooling operations in your discriminator, please ensure -you update Tensorflow to 1.1.0-rc1 or higher. I haven't tested this with Theano at all. - -The model saves images using pillow. If you don't have pillow, either install it or -remove the calls to generate_images. -""" -import argparse -import os -import numpy as np -from keras.models import Model, Sequential -from keras.layers import Input, Dense, Reshape, Flatten -from keras.layers.merge import _Merge -from keras.layers.convolutional import Convolution2D, Conv2DTranspose -from keras.layers.normalization import BatchNormalization -from keras.layers.advanced_activations import LeakyReLU -from keras.optimizers import Adam -from keras.datasets import mnist -from keras import backend as K -from functools import partial - -try: - from PIL import Image -except ImportError: - print('This script depends on pillow! ' - 'Please install it (e.g. with pip install pillow)') - exit() - -BATCH_SIZE = 64 -# The training ratio is the number of discriminator updates -# per generator update. The paper uses 5. -TRAINING_RATIO = 5 -GRADIENT_PENALTY_WEIGHT = 10 # As per the paper - - -def wasserstein_loss(y_true, y_pred): - """Calculates the Wasserstein loss for a sample batch. - - The Wasserstein loss function is very simple to calculate. In a standard GAN, the - discriminator has a sigmoid output, representing the probability that samples are - real or generated. In Wasserstein GANs, however, the output is linear with no - activation function! Instead of being constrained to [0, 1], the discriminator wants - to make the distance between its output for real and generated samples as - large as possible. - - The most natural way to achieve this is to label generated samples -1 and real - samples 1, instead of the 0 and 1 used in normal GANs, so that multiplying the - outputs by the labels will give you the loss immediately. - - Note that the nature of this loss means that it can be (and frequently will be) - less than 0.""" - return K.mean(y_true * y_pred) - - -def gradient_penalty_loss(y_true, y_pred, averaged_samples, - gradient_penalty_weight): - """Calculates the gradient penalty loss for a batch of "averaged" samples. - - In Improved WGANs, the 1-Lipschitz constraint is enforced by adding a term to the - loss function that penalizes the network if the gradient norm moves away from 1. - However, it is impossible to evaluate this function at all points in the input - space. The compromise used in the paper is to choose random points on the lines - between real and generated samples, and check the gradients at these points. Note - that it is the gradient w.r.t. the input averaged samples, not the weights of the - discriminator, that we're penalizing! - - In order to evaluate the gradients, we must first run samples through the generator - and evaluate the loss. Then we get the gradients of the discriminator w.r.t. the - input averaged samples. The l2 norm and penalty can then be calculated for this - gradient. - - Note that this loss function requires the original averaged samples as input, but - Keras only supports passing y_true and y_pred to loss functions. To get around this, - we make a partial() of the function with the averaged_samples argument, and use that - for model training.""" - # first get the gradients: - # assuming: - that y_pred has dimensions (batch_size, 1) - # - averaged_samples has dimensions (batch_size, nbr_features) - # gradients afterwards has dimension (batch_size, nbr_features), basically - # a list of nbr_features-dimensional gradient vectors - gradients = K.gradients(y_pred, averaged_samples)[0] - # compute the euclidean norm by squaring ... - gradients_sqr = K.square(gradients) - # ... summing over the rows ... - gradients_sqr_sum = K.sum(gradients_sqr, - axis=np.arange(1, len(gradients_sqr.shape))) - # ... and sqrt - gradient_l2_norm = K.sqrt(gradients_sqr_sum) - # compute lambda * (1 - ||grad||)^2 still for each single sample - gradient_penalty = gradient_penalty_weight * K.square(1 - gradient_l2_norm) - # return the mean as loss over all the batch samples - return K.mean(gradient_penalty) - - -def make_generator(): - """Creates a generator model that takes a 100-dimensional noise vector as a "seed", - and outputs images of size 28x28x1.""" - model = Sequential() - model.add(Dense(1024, input_dim=100)) - model.add(LeakyReLU()) - model.add(Dense(128 * 7 * 7)) - model.add(BatchNormalization()) - model.add(LeakyReLU()) - if K.image_data_format() == 'channels_first': - model.add(Reshape((128, 7, 7), input_shape=(128 * 7 * 7,))) - bn_axis = 1 - else: - model.add(Reshape((7, 7, 128), input_shape=(128 * 7 * 7,))) - bn_axis = -1 - model.add(Conv2DTranspose(128, (5, 5), strides=2, padding='same')) - model.add(BatchNormalization(axis=bn_axis)) - model.add(LeakyReLU()) - model.add(Convolution2D(64, (5, 5), padding='same')) - model.add(BatchNormalization(axis=bn_axis)) - model.add(LeakyReLU()) - model.add(Conv2DTranspose(64, (5, 5), strides=2, padding='same')) - model.add(BatchNormalization(axis=bn_axis)) - model.add(LeakyReLU()) - # Because we normalized training inputs to lie in the range [-1, 1], - # the tanh function should be used for the output of the generator to ensure - # its output also lies in this range. - model.add(Convolution2D(1, (5, 5), padding='same', activation='tanh')) - return model - - -def make_discriminator(): - """Creates a discriminator model that takes an image as input and outputs a single - value, representing whether the input is real or generated. Unlike normal GANs, the - output is not sigmoid and does not represent a probability! Instead, the output - should be as large and negative as possible for generated inputs and as large and - positive as possible for real inputs. - - Note that the improved WGAN paper suggests that BatchNormalization should not be - used in the discriminator.""" - model = Sequential() - if K.image_data_format() == 'channels_first': - model.add(Convolution2D( - 64, (5, 5), padding='same', input_shape=(1, 28, 28))) - else: - model.add(Convolution2D( - 64, (5, 5), padding='same', input_shape=(28, 28, 1))) - model.add(LeakyReLU()) - model.add(Convolution2D(128, (5, 5), kernel_initializer='he_normal', - strides=[2, 2])) - model.add(LeakyReLU()) - model.add(Convolution2D(128, (5, 5), kernel_initializer='he_normal', padding='same', - strides=[2, 2])) - model.add(LeakyReLU()) - model.add(Flatten()) - model.add(Dense(1024, kernel_initializer='he_normal')) - model.add(LeakyReLU()) - model.add(Dense(1, kernel_initializer='he_normal')) - return model - - -def tile_images(image_stack): - """Given a stacked tensor of images, reshapes them into a horizontal tiling for - display.""" - assert len(image_stack.shape) == 3 - image_list = [image_stack[i, :, :] for i in range(image_stack.shape[0])] - tiled_images = np.concatenate(image_list, axis=1) - return tiled_images - - -class RandomWeightedAverage(_Merge): - """Takes a randomly-weighted average of two tensors. In geometric terms, this - outputs a random point on the line between each pair of input points. - - Inheriting from _Merge is a little messy but it was the quickest solution I could - think of. Improvements appreciated.""" - - def _merge_function(self, inputs): - weights = K.random_uniform((BATCH_SIZE, 1, 1, 1)) - return (weights * inputs[0]) + ((1 - weights) * inputs[1]) - - -def generate_images(generator_model, output_dir, epoch): - """Feeds random seeds into the generator and tiles and saves the output to a PNG - file.""" - test_image_stack = generator_model.predict(np.random.rand(10, 100)) - test_image_stack = (test_image_stack * 127.5) + 127.5 - test_image_stack = np.squeeze(np.round(test_image_stack).astype(np.uint8)) - tiled_output = tile_images(test_image_stack) - tiled_output = Image.fromarray( - tiled_output, mode='L') # L specifies greyscale - outfile = os.path.join(output_dir, 'epoch_{}.png'.format(epoch)) - tiled_output.save(outfile) - - -parser = argparse.ArgumentParser(description="Improved Wasserstein GAN " - "implementation for Keras.") -parser.add_argument("--output_dir", "-o", required=True, - help="Directory to output generated files to") -args = parser.parse_args() - -# First we load the image data, reshape it and normalize it to the range [-1, 1] -(X_train, y_train), (X_test, y_test) = mnist.load_data() -X_train = np.concatenate((X_train, X_test), axis=0) -if K.image_data_format() == 'channels_first': - X_train = X_train.reshape( - (X_train.shape[0], 1, X_train.shape[1], X_train.shape[2])) -else: - X_train = X_train.reshape( - (X_train.shape[0], X_train.shape[1], X_train.shape[2], 1)) -X_train = (X_train.astype(np.float32) - 127.5) / 127.5 - -# Now we initialize the generator and discriminator. -generator = make_generator() -discriminator = make_discriminator() - -# The generator_model is used when we want to train the generator layers. -# As such, we ensure that the discriminator layers are not trainable. -# Note that once we compile this model, updating .trainable will have no effect within -# it. As such, it won't cause problems if we later set discriminator.trainable = True -# for the discriminator_model, as long as we compile the generator_model first. -for layer in discriminator.layers: - layer.trainable = False -discriminator.trainable = False -generator_input = Input(shape=(100,)) -generator_layers = generator(generator_input) -discriminator_layers_for_generator = discriminator(generator_layers) -generator_model = Model(inputs=[generator_input], - outputs=[discriminator_layers_for_generator]) -# We use the Adam paramaters from Gulrajani et al. -generator_model.compile(optimizer=Adam(0.0001, beta_1=0.5, beta_2=0.9), - loss=wasserstein_loss) - -# Now that the generator_model is compiled, we can make the discriminator -# layers trainable. -for layer in discriminator.layers: - layer.trainable = True -for layer in generator.layers: - layer.trainable = False -discriminator.trainable = True -generator.trainable = False - -# The discriminator_model is more complex. It takes both real image samples and random -# noise seeds as input. The noise seed is run through the generator model to get -# generated images. Both real and generated images are then run through the -# discriminator. Although we could concatenate the real and generated images into a -# single tensor, we don't (see model compilation for why). -real_samples = Input(shape=X_train.shape[1:]) -generator_input_for_discriminator = Input(shape=(100,)) -generated_samples_for_discriminator = generator( - generator_input_for_discriminator) -discriminator_output_from_generator = discriminator( - generated_samples_for_discriminator) -discriminator_output_from_real_samples = discriminator(real_samples) - -# We also need to generate weighted-averages of real and generated samples, -# to use for the gradient norm penalty. -averaged_samples = RandomWeightedAverage()([real_samples, - generated_samples_for_discriminator]) -# We then run these samples through the discriminator as well. Note that we never -# really use the discriminator output for these samples - we're only running them to -# get the gradient norm for the gradient penalty loss. -averaged_samples_out = discriminator(averaged_samples) - -# The gradient penalty loss function requires the input averaged samples to get -# gradients. However, Keras loss functions can only have two arguments, y_true and -# y_pred. We get around this by making a partial() of the function with the averaged -# samples here. -partial_gp_loss = partial(gradient_penalty_loss, - averaged_samples=averaged_samples, - gradient_penalty_weight=GRADIENT_PENALTY_WEIGHT) -# Functions need names or Keras will throw an error -partial_gp_loss.__name__ = 'gradient_penalty' - -# Keras requires that inputs and outputs have the same number of samples. This is why -# we didn't concatenate the real samples and generated samples before passing them to -# the discriminator: If we had, it would create an output with 2 * BATCH_SIZE samples, -# while the output of the "averaged" samples for gradient penalty -# would have only BATCH_SIZE samples. - -# If we don't concatenate the real and generated samples, however, we get three -# outputs: One of the generated samples, one of the real samples, and one of the -# averaged samples, all of size BATCH_SIZE. This works neatly! -discriminator_model = Model(inputs=[real_samples, - generator_input_for_discriminator], - outputs=[discriminator_output_from_real_samples, - discriminator_output_from_generator, - averaged_samples_out]) -# We use the Adam paramaters from Gulrajani et al. We use the Wasserstein loss for both -# the real and generated samples, and the gradient penalty loss for the averaged samples -discriminator_model.compile(optimizer=Adam(0.0001, beta_1=0.5, beta_2=0.9), - loss=[wasserstein_loss, - wasserstein_loss, - partial_gp_loss]) -# We make three label vectors for training. positive_y is the label vector for real -# samples, with value 1. negative_y is the label vector for generated samples, with -# value -1. The dummy_y vector is passed to the gradient_penalty loss function and -# is not used. -positive_y = np.ones((BATCH_SIZE, 1), dtype=np.float32) -negative_y = -positive_y -dummy_y = np.zeros((BATCH_SIZE, 1), dtype=np.float32) - -for epoch in range(100): - np.random.shuffle(X_train) - print("Epoch: ", epoch) - print("Number of batches: ", int(X_train.shape[0] // BATCH_SIZE)) - discriminator_loss = [] - generator_loss = [] - minibatches_size = BATCH_SIZE * TRAINING_RATIO - for i in range(int(X_train.shape[0] // (BATCH_SIZE * TRAINING_RATIO))): - discriminator_minibatches = X_train[i * minibatches_size: - (i + 1) * minibatches_size] - for j in range(TRAINING_RATIO): - image_batch = discriminator_minibatches[j * BATCH_SIZE: - (j + 1) * BATCH_SIZE] - noise = np.random.rand(BATCH_SIZE, 100).astype(np.float32) - discriminator_loss.append(discriminator_model.train_on_batch( - [image_batch, noise], - [positive_y, negative_y, dummy_y])) - generator_loss.append(generator_model.train_on_batch(np.random.rand(BATCH_SIZE, - 100), - positive_y)) - # Still needs some code to display losses from the generator and discriminator, - # progress bars, etc. - generate_images(generator, args.output_dir, epoch) -import keras -from keras import backend as K -from keras_contrib.losses.jaccard import jaccard_distance -import numpy as np - -# Test and plot -y_pred = np.array([np.arange(-10, 10 + 0.1, 0.1)]).T -y_true = np.zeros(y_pred.shape) -name = 'jaccard_distance_loss' -try: - loss = jaccard_distance_loss( - K.variable(y_true), K.variable(y_pred) - ).eval(session=K.get_session()) -except Exception as e: - print("error plotting", name, e) -else: - plt.title(name) - plt.plot(y_pred, loss) - plt.show() - -print("TYPE |Almost_right |half right |all_wrong") -y_true = np.array([[0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1., 0.]]) -y_pred = np.array([[0, 0, 0.9, 0], [0, 0, 0.1, 0], [1, 1, 0.1, 1.]]) - -r = jaccard_distance( - K.variable(y_true), - K.variable(y_pred), -).eval(session=K.get_session()) -print('jaccard_distance_loss', r) -assert r[0] < r[1] -assert r[1] < r[2] - -r = keras.losses.binary_crossentropy( - K.variable(y_true), - K.variable(y_pred), -).eval(session=K.get_session()) -print('binary_crossentropy', r) -print('binary_crossentropy_scaled', r / r.max()) -assert r[0] < r[1] -assert r[1] < r[2] - -""" -TYPE |Almost_right |half right |all_wrong -jaccard_distance_loss [ 0.09900928 0.89108944 3.75000238] -binary_crossentropy [ 0.02634021 0.57564634 12.53243446] -binary_crossentropy_scaled [ 0.00210176 0.04593252 1. ] -""" -from __future__ import absolute_import -from . import backend -from . import datasets -from . import layers -from . import preprocessing -from . import utils -from . import wrappers -from . import callbacks -from . import constraints -from . import initializers -from . import metrics -from . import losses -from . import optimizers -from . import regularizers - -__version__ = '0.0.2' -import pytest -from keras import backend as K - - -@pytest.fixture(autouse=True) -def clear_session_after_test(): - """Test wrapper to clean up after TensorFlow and CNTK tests. - - This wrapper runs for all the tests in the keras test suite. - """ - yield - if K.backend() == 'tensorflow' or K.backend() == 'cntk': - K.clear_session() -from __future__ import absolute_import - -from .squash import squash -from keras import backend as K - - -def squash(x, axis=-1): - """ - Squash activation function (generally used in Capsule layers). - """ - s_squared_norm = K.sum(K.square(x), axis, keepdims=True) + K.epsilon() - scale = K.sqrt(s_squared_norm) / (0.5 + s_squared_norm) - return scale * x -from .densenet import DenseNet -from .resnet import ResNet, ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 -from .wide_resnet import WideResidualNetwork -from .nasnet import NASNet, NASNetLarge, NASNetMobile -# -*- coding: utf-8 -*- -'''DenseNet and DenseNet-FCN models for Keras. - -DenseNet is a network architecture where each layer is directly connected -to every other layer in a feed-forward fashion (within each dense block). -For each layer, the feature maps of all preceding layers are treated as -separate inputs whereas its own feature maps are passed on as inputs to -all subsequent layers. This connectivity pattern yields state-of-the-art -accuracies on CIFAR10/100 (with or without data augmentation) and SVHN. -On the large scale ILSVRC 2012 (ImageNet) dataset, DenseNet achieves a -similar accuracy as ResNet, but using less than half the amount of -parameters and roughly half the number of FLOPs. - -DenseNets support any input image size of 32x32 or greater, and are thus -suited for CIFAR-10 or CIFAR-100 datasets. There are two types of DenseNets, -one suited for smaller images (DenseNet) and one suited for ImageNet, -called DenseNetImageNet. They are differentiated by the strided convolution -and pooling operations prior to the initial dense block. - -The following table describes the size and accuracy of DenseNetImageNet models -on the ImageNet dataset (single crop), for which weights are provided: ------------------------------------------------------------------------------------- - Model type | ImageNet Acc (Top 1) | ImageNet Acc (Top 5) | Params (M) | ------------------------------------------------------------------------------------- -| DenseNet-121 | 25.02 % | 7.71 % | 8.0 | -| DenseNet-169 | 23.80 % | 6.85 % | 14.3 | -| DenseNet-201 | 22.58 % | 6.34 % | 20.2 | -| DenseNet-161 | 22.20 % | - % | 28.9 | ------------------------------------------------------------------------------------- - -DenseNets can be extended to image segmentation tasks as described in the -paper "The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for -Semantic Segmentation". Here, the dense blocks are arranged and concatenated -with long skip connections for state of the art performance on the CamVid dataset. - -# Reference -- [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf) -- [The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for Semantic - Segmentation](https://arxiv.org/pdf/1611.09326.pdf) - -This implementation is based on the following reference code: - - https://github.com/gpleiss/efficient_densenet_pytorch - - https://github.com/liuzhuang13/DenseNet - -''' -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import warnings - -from keras.models import Model -from keras.layers import Dense -from keras.layers import Dropout -from keras.layers import Activation -from keras.layers import Reshape -from keras.layers import Conv2D -from keras.layers import Conv2DTranspose -from keras.layers import UpSampling2D -from keras.layers import MaxPooling2D -from keras.layers import AveragePooling2D -from keras.layers import GlobalMaxPooling2D -from keras.layers import GlobalAveragePooling2D -from keras.layers import Input -from keras.layers import concatenate -from keras.layers import BatchNormalization -from keras.regularizers import l2 -from keras.utils.layer_utils import convert_all_kernels_in_model -from keras.utils.data_utils import get_file -from keras.engine.topology import get_source_inputs -from keras_applications.imagenet_utils import _obtain_input_shape -from keras.applications.imagenet_utils import preprocess_input as _preprocess_input -import keras.backend as K - -from keras_contrib.layers import SubPixelUpscaling - -DENSENET_121_WEIGHTS_PATH = (r'https://github.com/titu1994/DenseNet/releases/download' - r'/v3.0/DenseNet-BC-121-32.h5') -DENSENET_161_WEIGHTS_PATH = (r'https://github.com/titu1994/DenseNet/releases/download' - r'/v3.0/DenseNet-BC-161-48.h5') -DENSENET_169_WEIGHTS_PATH = (r'https://github.com/titu1994/DenseNet/releases/download' - r'/v3.0/DenseNet-BC-169-32.h5') -DENSENET_121_WEIGHTS_PATH_NO_TOP = (r'https://github.com/titu1994/DenseNet/releases/' - r'download/v3.0/DenseNet-BC-121-32-no-top.h5') -DENSENET_161_WEIGHTS_PATH_NO_TOP = (r'https://github.com/titu1994/DenseNet/releases/' - r'download/v3.0/DenseNet-BC-161-48-no-top.h5') -DENSENET_169_WEIGHTS_PATH_NO_TOP = (r'https://github.com/titu1994/DenseNet/releases/' - r'download/v3.0/DenseNet-BC-169-32-no-top.h5') - - -def preprocess_input(x, data_format=None): - """Preprocesses a tensor encoding a batch of images. - - # Arguments - x: input Numpy tensor, 4D. - data_format: data format of the image tensor. - - # Returns - Preprocessed tensor. - """ - x = _preprocess_input(x, data_format=data_format) - x *= 0.017 # scale values - return x - - -def DenseNet(input_shape=None, - depth=40, - nb_dense_block=3, - growth_rate=12, - nb_filter=-1, - nb_layers_per_block=-1, - bottleneck=False, - reduction=0.0, - dropout_rate=0.0, - weight_decay=1e-4, - subsample_initial_block=False, - include_top=True, - weights=None, - input_tensor=None, - pooling=None, - classes=10, - activation='softmax', - transition_pooling='avg'): - '''Instantiate the DenseNet architecture. - - The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering - convention used by the model is the one - specified in your Keras config file. - - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `channels_last` dim ordering) - or `(3, 224, 224)` (with `channels_first` dim ordering). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 8. - E.g. `(224, 224, 3)` would be one valid value. - depth: number or layers in the DenseNet - nb_dense_block: number of dense blocks to add to end - growth_rate: number of filters to add per dense block - nb_filter: initial number of filters. -1 indicates initial - number of filters will default to 2 * growth_rate - nb_layers_per_block: number of layers in each dense block. - Can be a -1, positive integer or a list. - If -1, calculates nb_layer_per_block from the network depth. - If positive integer, a set number of layers per dense block. - If list, nb_layer is used as provided. Note that list size must - be nb_dense_block - bottleneck: flag to add bottleneck blocks in between dense blocks - reduction: reduction factor of transition blocks. - Note : reduction value is inverted to compute compression. - dropout_rate: dropout rate - weight_decay: weight decay rate - subsample_initial_block: Changes model type to suit different datasets. - Should be set to True for ImageNet, and False for CIFAR datasets. - When set to True, the initial convolution will be strided and - adds a MaxPooling2D before the initial dense block. - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization) or - 'imagenet' (pre-training on ImageNet).. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - activation: Type of activation at the top layer. Can be one of - 'softmax' or 'sigmoid'. Note that if sigmoid is used, - classes must be 1. - transition_pooling: `avg` for avg pooling (default), `max` for max pooling, - None for no pooling during scale transition blocks. Please note that this - default differs from the DenseNetFCN paper in accordance with the DenseNet - paper. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - ''' - - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as ImageNet with `include_top` ' - 'as true, `classes` should be 1000') - - if activation not in ['softmax', 'sigmoid']: - raise ValueError('activation must be one of "softmax" or "sigmoid"') - - if activation == 'sigmoid' and classes != 1: - raise ValueError( - 'sigmoid activation can only be used when classes = 1') - - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=32, - min_size=8, - data_format=K.image_data_format(), - require_flatten=include_top) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - x = __create_dense_net(classes, img_input, include_top, depth, nb_dense_block, - growth_rate, nb_filter, nb_layers_per_block, bottleneck, - reduction, dropout_rate, weight_decay, - subsample_initial_block, pooling, activation, - transition_pooling) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = Model(inputs, x, name='densenet') - - # load weights - if weights == 'imagenet': - weights_loaded = False - - if ((depth == 121) and (nb_dense_block == 4) and (growth_rate == 32) and - (nb_filter == 64) and (bottleneck is True) and (reduction == 0.5) and - subsample_initial_block): - if include_top: - weights_path = get_file('DenseNet-BC-121-32.h5', - DENSENET_121_WEIGHTS_PATH, - cache_subdir='models', - md5_hash='a439dd41aa672aef6daba4ee1fd54abd') - else: - weights_path = get_file('DenseNet-BC-121-32-no-top.h5', - DENSENET_121_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='55e62a6358af8a0af0eedf399b5aea99') - model.load_weights(weights_path, by_name=True) - weights_loaded = True - - if ((depth == 161) and (nb_dense_block == 4) and (growth_rate == 48) and - (nb_filter == 96) and (bottleneck is True) and (reduction == 0.5) and - subsample_initial_block): - if include_top: - weights_path = get_file('DenseNet-BC-161-48.h5', - DENSENET_161_WEIGHTS_PATH, - cache_subdir='models', - md5_hash='6c326cf4fbdb57d31eff04333a23fcca') - else: - weights_path = get_file('DenseNet-BC-161-48-no-top.h5', - DENSENET_161_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='1a9476b79f6b7673acaa2769e6427b92') - model.load_weights(weights_path, by_name=True) - weights_loaded = True - - if ((depth == 169) and (nb_dense_block == 4) and (growth_rate == 32) and - (nb_filter == 64) and (bottleneck is True) and (reduction == 0.5) and - subsample_initial_block): - if include_top: - weights_path = get_file('DenseNet-BC-169-32.h5', - DENSENET_169_WEIGHTS_PATH, - cache_subdir='models', - md5_hash='914869c361303d2e39dec640b4e606a6') - else: - weights_path = get_file('DenseNet-BC-169-32-no-top.h5', - DENSENET_169_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='89c19e8276cfd10585d5fadc1df6859e') - model.load_weights(weights_path, by_name=True) - weights_loaded = True - - if weights_loaded: - if K.backend() == 'theano': - convert_all_kernels_in_model(model) - - if ((K.image_data_format() == 'channels_first') and - (K.backend() == 'tensorflow')): - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image data format convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - - print("Weights for the model were loaded successfully") - - return model - - -def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_block=4, - reduction=0.0, dropout_rate=0.0, weight_decay=1E-4, - init_conv_filters=48, include_top=True, weights=None, input_tensor=None, - classes=1, activation='softmax', upsampling_conv=128, - upsampling_type='deconv', early_transition=False, - transition_pooling='max', initial_kernel_size=(3, 3)): - '''Instantiate the DenseNet FCN architecture. - Note that when using TensorFlow, - for best performance you should set - `image_data_format='channels_last'` in your Keras config - at ~/.keras/keras.json. - # Arguments - nb_dense_block: number of dense blocks to add to end (generally = 3) - growth_rate: number of filters to add per dense block - nb_layers_per_block: number of layers in each dense block. - Can be a positive integer or a list. - If positive integer, a set number of layers per dense block. - If list, nb_layer is used as provided. Note that list size must - be (nb_dense_block + 1) - reduction: reduction factor of transition blocks. - Note : reduction value is inverted to compute compression. - dropout_rate: dropout rate - weight_decay: weight decay factor - init_conv_filters: number of layers in the initial convolution layer - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization) or - 'cifar10' (pre-training on CIFAR-10).. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(32, 32, 3)` (with `channels_last` dim ordering) - or `(3, 32, 32)` (with `channels_first` dim ordering). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 8. - E.g. `(200, 200, 3)` would be one valid value. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - activation: Type of activation at the top layer. Can be one of 'softmax' - or 'sigmoid'. Note that if sigmoid is used, classes must be 1. - upsampling_conv: number of convolutional layers in upsampling via subpixel - convolution - upsampling_type: Can be one of 'deconv', 'upsampling' and - 'subpixel'. Defines type of upsampling algorithm used. - batchsize: Fixed batch size. This is a temporary requirement for - computation of output shape in the case of Deconvolution2D layers. - Parameter will be removed in next iteration of Keras, which infers - output shape of deconvolution layers automatically. - early_transition: Start with an extra initial transition down and end with - an extra transition up to reduce the network size. - initial_kernel_size: The first Conv2D kernel might vary in size based on the - application, this parameter makes it configurable. - - # Returns - A Keras model instance. - ''' - - if weights not in {None}: - raise ValueError('The `weights` argument should be ' - '`None` (random initialization) as no ' - 'model weights are provided.') - - upsampling_type = upsampling_type.lower() - - if upsampling_type not in ['upsampling', 'deconv', 'subpixel']: - raise ValueError('Parameter "upsampling_type" must be one of "upsampling", ' - '"deconv" or "subpixel".') - - if input_shape is None: - raise ValueError('For fully convolutional models, ' - 'input shape must be supplied.') - - if type(nb_layers_per_block) is not list and nb_dense_block < 1: - raise ValueError('Number of dense layers per block must be greater than 1. ' - 'Argument value was %d.' % nb_layers_per_block) - - if activation not in ['softmax', 'sigmoid']: - raise ValueError('activation must be one of "softmax" or "sigmoid"') - - if activation == 'sigmoid' and classes != 1: - raise ValueError( - 'sigmoid activation can only be used when classes = 1') - - # Determine proper input shape - min_size = 2 ** nb_dense_block - - if K.image_data_format() == 'channels_first': - if input_shape is not None: - if ((input_shape[1] is not None and input_shape[1] < min_size) or - (input_shape[2] is not None and input_shape[2] < min_size)): - raise ValueError('Input size must be at least ' + - str(min_size) + 'x' + str(min_size) + - ', got `input_shape=' + str(input_shape) + '`') - else: - input_shape = (classes, None, None) - else: - if input_shape is not None: - if ((input_shape[0] is not None and input_shape[0] < min_size) or - (input_shape[1] is not None and input_shape[1] < min_size)): - raise ValueError('Input size must be at least ' + - str(min_size) + 'x' + str(min_size) + - ', got `input_shape=' + str(input_shape) + '`') - else: - input_shape = (None, None, classes) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - x = __create_fcn_dense_net(classes, img_input, include_top, nb_dense_block, - growth_rate, reduction, dropout_rate, weight_decay, - nb_layers_per_block, upsampling_conv, upsampling_type, - init_conv_filters, input_shape, activation, - early_transition, transition_pooling, - initial_kernel_size) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = Model(inputs, x, name='fcn-densenet') - - return model - - -def DenseNetImageNet121(input_shape=None, - bottleneck=True, - reduction=0.5, - dropout_rate=0.0, - weight_decay=1e-4, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - activation='softmax'): - return DenseNet(input_shape, depth=121, nb_dense_block=4, growth_rate=32, - nb_filter=64, nb_layers_per_block=[6, 12, 24, 16], - bottleneck=bottleneck, reduction=reduction, - dropout_rate=dropout_rate, weight_decay=weight_decay, - subsample_initial_block=True, include_top=include_top, - weights=weights, input_tensor=input_tensor, - pooling=pooling, classes=classes, activation=activation) - - -def DenseNetImageNet169(input_shape=None, - bottleneck=True, - reduction=0.5, - dropout_rate=0.0, - weight_decay=1e-4, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - activation='softmax'): - return DenseNet(input_shape, depth=169, nb_dense_block=4, growth_rate=32, - nb_filter=64, nb_layers_per_block=[6, 12, 32, 32], - bottleneck=bottleneck, reduction=reduction, - dropout_rate=dropout_rate, weight_decay=weight_decay, - subsample_initial_block=True, include_top=include_top, - weights=weights, input_tensor=input_tensor, - pooling=pooling, classes=classes, activation=activation) - - -def DenseNetImageNet201(input_shape=None, - bottleneck=True, - reduction=0.5, - dropout_rate=0.0, - weight_decay=1e-4, - include_top=True, - weights=None, - input_tensor=None, - pooling=None, - classes=1000, - activation='softmax'): - return DenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, - nb_filter=64, nb_layers_per_block=[6, 12, 48, 32], - bottleneck=bottleneck, reduction=reduction, - dropout_rate=dropout_rate, weight_decay=weight_decay, - subsample_initial_block=True, include_top=include_top, - weights=weights, input_tensor=input_tensor, - pooling=pooling, classes=classes, activation=activation) - - -def DenseNetImageNet264(input_shape=None, - bottleneck=True, - reduction=0.5, - dropout_rate=0.0, - weight_decay=1e-4, - include_top=True, - weights=None, - input_tensor=None, - pooling=None, - classes=1000, - activation='softmax'): - return DenseNet(input_shape, depth=264, nb_dense_block=4, growth_rate=32, - nb_filter=64, nb_layers_per_block=[6, 12, 64, 48], - bottleneck=bottleneck, reduction=reduction, - dropout_rate=dropout_rate, weight_decay=weight_decay, - subsample_initial_block=True, include_top=include_top, - weights=weights, input_tensor=input_tensor, - pooling=pooling, classes=classes, activation=activation) - - -def DenseNetImageNet161(input_shape=None, - bottleneck=True, - reduction=0.5, - dropout_rate=0.0, - weight_decay=1e-4, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - activation='softmax'): - return DenseNet(input_shape, depth=161, nb_dense_block=4, growth_rate=48, - nb_filter=96, nb_layers_per_block=[6, 12, 36, 24], - bottleneck=bottleneck, reduction=reduction, - dropout_rate=dropout_rate, weight_decay=weight_decay, - subsample_initial_block=True, include_top=include_top, - weights=weights, input_tensor=input_tensor, - pooling=pooling, classes=classes, activation=activation) - - -def name_or_none(prefix, name): - return prefix + name if (prefix is not None and name is not None) else None - - -def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, - weight_decay=1e-4, block_prefix=None): - ''' - Adds a convolution layer (with batch normalization and relu), - and optionally a bottleneck layer. - - # Arguments - ip: Input tensor - nb_filter: integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution) - bottleneck: if True, adds a bottleneck convolution block - dropout_rate: dropout rate - weight_decay: weight decay factor - block_prefix: str, for unique layer naming - - # Input shape - 4D tensor with shape: - `(samples, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `(samples, filters, new_rows, new_cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, new_rows, new_cols, filters)` if data_format='channels_last'. - `rows` and `cols` values might have changed due to stride. - - # Returns - output tensor of block - ''' - with K.name_scope('ConvBlock'): - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, - name=name_or_none(block_prefix, '_bn'))(ip) - x = Activation('relu')(x) - - if bottleneck: - inter_channel = nb_filter * 4 - - x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', - padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay), - name=name_or_none(block_prefix, '_bottleneck_conv2D'))(x) - x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, - name=name_or_none(block_prefix, '_bottleneck_bn'))(x) - x = Activation('relu')(x) - - x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', padding='same', - use_bias=False, name=name_or_none(block_prefix, '_conv2D'))(x) - if dropout_rate: - x = Dropout(dropout_rate)(x) - - return x - - -def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, - dropout_rate=None, weight_decay=1e-4, grow_nb_filters=True, - return_concat_list=False, block_prefix=None): - ''' - Build a dense_block where the output of each conv_block is fed - to subsequent ones - - # Arguments - x: input keras tensor - nb_layers: the number of conv_blocks to append to the model - nb_filter: integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution) - growth_rate: growth rate of the dense block - bottleneck: if True, adds a bottleneck convolution block to - each conv_block - dropout_rate: dropout rate - weight_decay: weight decay factor - grow_nb_filters: if True, allows number of filters to grow - return_concat_list: set to True to return the list of - feature maps along with the actual output - block_prefix: str, for block unique naming - - # Return - If return_concat_list is True, returns a list of the output - keras tensor, the number of filters and a list of all the - dense blocks added to the keras tensor - - If return_concat_list is False, returns a list of the output - keras tensor and the number of filters - ''' - with K.name_scope('DenseBlock'): - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - x_list = [x] - - for i in range(nb_layers): - cb = __conv_block(x, growth_rate, bottleneck, dropout_rate, weight_decay, - block_prefix=name_or_none(block_prefix, '_%i' % i)) - x_list.append(cb) - - x = concatenate([x, cb], axis=concat_axis) - - if grow_nb_filters: - nb_filter += growth_rate - - if return_concat_list: - return x, nb_filter, x_list - else: - return x, nb_filter - - -def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4, - block_prefix=None, transition_pooling='max'): - ''' - Adds a pointwise convolution layer (with batch normalization and relu), - and an average pooling layer. The number of output convolution filters - can be reduced by appropriately reducing the compression parameter. - - # Arguments - ip: input keras tensor - nb_filter: integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution) - compression: calculated as 1 - reduction. Reduces the number - of feature maps in the transition block. - weight_decay: weight decay factor - block_prefix: str, for block unique naming - - # Input shape - 4D tensor with shape: - `(samples, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `(samples, nb_filter * compression, rows / 2, cols / 2)` - if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows / 2, cols / 2, nb_filter * compression)` - if data_format='channels_last'. - - # Returns - a keras tensor - ''' - with K.name_scope('Transition'): - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, - name=name_or_none(block_prefix, '_bn'))(ip) - x = Activation('relu')(x) - x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', - padding='same', use_bias=False, kernel_regularizer=l2(weight_decay), - name=name_or_none(block_prefix, '_conv2D'))(x) - if transition_pooling == 'avg': - x = AveragePooling2D((2, 2), strides=(2, 2))(x) - elif transition_pooling == 'max': - x = MaxPooling2D((2, 2), strides=(2, 2))(x) - - return x - - -def __transition_up_block(ip, nb_filters, type='deconv', weight_decay=1E-4, - block_prefix=None): - '''Adds an upsampling block. Upsampling operation relies on the the type parameter. - - # Arguments - ip: input keras tensor - nb_filters: integer, the dimensionality of the output space - (i.e. the number output of filters in the convolution) - type: can be 'upsampling', 'subpixel', 'deconv'. Determines - type of upsampling performed - weight_decay: weight decay factor - block_prefix: str, for block unique naming - - # Input shape - 4D tensor with shape: - `(samples, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `(samples, nb_filter, rows * 2, cols * 2)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows * 2, cols * 2, nb_filter)` if data_format='channels_last'. - - # Returns - a keras tensor - ''' - with K.name_scope('TransitionUp'): - - if type == 'upsampling': - x = UpSampling2D(name=name_or_none( - block_prefix, '_upsampling'))(ip) - elif type == 'subpixel': - x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', - kernel_regularizer=l2(weight_decay), use_bias=False, - kernel_initializer='he_normal', - name=name_or_none(block_prefix, '_conv2D'))(ip) - x = SubPixelUpscaling(scale_factor=2, - name=name_or_none(block_prefix, '_subpixel'))(x) - x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', - kernel_regularizer=l2(weight_decay), use_bias=False, - kernel_initializer='he_normal', - name=name_or_none(block_prefix, '_conv2D'))(x) - else: - x = Conv2DTranspose(nb_filters, (3, 3), activation='relu', padding='same', - strides=(2, 2), kernel_initializer='he_normal', - kernel_regularizer=l2(weight_decay), - name=name_or_none(block_prefix, '_conv2DT'))(ip) - return x - - -def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, - growth_rate=12, nb_filter=-1, nb_layers_per_block=-1, - bottleneck=False, reduction=0.0, dropout_rate=None, - weight_decay=1e-4, subsample_initial_block=False, pooling=None, - activation='softmax', transition_pooling='avg'): - ''' Build the DenseNet model - - # Arguments - nb_classes: number of classes - img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) - include_top: flag to include the final Dense layer - depth: number or layers - nb_dense_block: number of dense blocks to add to end (generally = 3) - growth_rate: number of filters to add per dense block - nb_filter: initial number of filters. Default -1 indicates initial number - of filters is 2 * growth_rate - nb_layers_per_block: number of layers in each dense block. - Can be a -1, positive integer or a list. - If -1, calculates nb_layer_per_block from the depth of the network. - If positive integer, a set number of layers per dense block. - If list, nb_layer is used as provided. Note that list size must - be (nb_dense_block + 1) - bottleneck: add bottleneck blocks - reduction: reduction factor of transition blocks. Note : reduction value is - inverted to compute compression - dropout_rate: dropout rate - weight_decay: weight decay rate - subsample_initial_block: Changes model type to suit different datasets. - Should be set to True for ImageNet, and False for CIFAR datasets. - When set to True, the initial convolution will be strided and - adds a MaxPooling2D before the initial dense block. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - activation: Type of activation at the top layer. Can be one of 'softmax' or - 'sigmoid'. Note that if sigmoid is used, classes must be 1. - transition_pooling: `avg` for avg pooling (default), `max` for max pooling, - None for no pooling during scale transition blocks. Please note that this - default differs from the DenseNetFCN paper in accordance with the DenseNet - paper. - - # Returns - a keras tensor - - # Raises - ValueError: in case of invalid argument for `reduction` - or `nb_dense_block` - ''' - with K.name_scope('DenseNet'): - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - if reduction != 0.0: - if not (reduction <= 1.0 and reduction > 0.0): - raise ValueError( - '`reduction` value must lie between 0.0 and 1.0') - - # layers in each dense block - if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: - nb_layers = list(nb_layers_per_block) # Convert tuple to list - - if len(nb_layers) != nb_dense_block: - raise ValueError('If `nb_dense_block` is a list, its length must match ' - 'the number of layers provided by `nb_layers`.') - - final_nb_layer = nb_layers[-1] - nb_layers = nb_layers[:-1] - else: - if nb_layers_per_block == -1: - assert (depth - 4) % 3 == 0, ('Depth must be 3 N + 4 ' - 'if nb_layers_per_block == -1') - count = int((depth - 4) / 3) - - if bottleneck: - count = count // 2 - - nb_layers = [count for _ in range(nb_dense_block)] - final_nb_layer = count - else: - final_nb_layer = nb_layers_per_block - nb_layers = [nb_layers_per_block] * nb_dense_block - - # compute initial nb_filter if -1, else accept users initial nb_filter - if nb_filter <= 0: - nb_filter = 2 * growth_rate - - # compute compression factor - compression = 1.0 - reduction - - # Initial convolution - if subsample_initial_block: - initial_kernel = (7, 7) - initial_strides = (2, 2) - else: - initial_kernel = (3, 3) - initial_strides = (1, 1) - - x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', - padding='same', name='initial_conv2D', strides=initial_strides, - use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) - - if subsample_initial_block: - x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5, - name='initial_bn')(x) - x = Activation('relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - - # Add dense blocks - for block_idx in range(nb_dense_block - 1): - x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, - growth_rate, bottleneck=bottleneck, - dropout_rate=dropout_rate, - weight_decay=weight_decay, - block_prefix='dense_%i' % block_idx) - # add transition_block - x = __transition_block(x, nb_filter, compression=compression, - weight_decay=weight_decay, - block_prefix='tr_%i' % block_idx, - transition_pooling=transition_pooling) - nb_filter = int(nb_filter * compression) - - # The last dense_block does not have a transition_block - x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, - bottleneck=bottleneck, dropout_rate=dropout_rate, - weight_decay=weight_decay, - block_prefix='dense_%i' % (nb_dense_block - 1)) - - x = BatchNormalization( - axis=concat_axis, epsilon=1.1e-5, name='final_bn')(x) - x = Activation('relu')(x) - - if include_top: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - x = Dense(nb_classes, activation=activation)(x) - else: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - - return x - - -def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, - growth_rate=12, reduction=0.0, dropout_rate=None, - weight_decay=1e-4, nb_layers_per_block=4, - nb_upsampling_conv=128, upsampling_type='deconv', - init_conv_filters=48, input_shape=None, activation='softmax', - early_transition=False, transition_pooling='max', - initial_kernel_size=(3, 3)): - ''' Build the DenseNet-FCN model - - # Arguments - nb_classes: number of classes - img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) - include_top: flag to include the final Dense layer - nb_dense_block: number of dense blocks to add to end (generally = 3) - growth_rate: number of filters to add per dense block - reduction: reduction factor of transition blocks. Note : reduction value - is inverted to compute compression - dropout_rate: dropout rate - weight_decay: weight decay - nb_layers_per_block: number of layers in each dense block. - Can be a positive integer or a list. - If positive integer, a set number of layers per dense block. - If list, nb_layer is used as provided. Note that list size must - be (nb_dense_block + 1) - nb_upsampling_conv: number of convolutional layers in upsampling via subpixel - convolution - upsampling_type: Can be one of 'upsampling', 'deconv' and 'subpixel'. Defines - type of upsampling algorithm used. - input_shape: Only used for shape inference in fully convolutional networks. - activation: Type of activation at the top layer. Can be one of 'softmax' or - 'sigmoid'. Note that if sigmoid is used, classes must be 1. - early_transition: Start with an extra initial transition down and end with an - extra transition up to reduce the network size. - transition_pooling: 'max' for max pooling (default), 'avg' for average pooling, - None for no pooling. Please note that this default differs from the DenseNet - paper in accordance with the DenseNetFCN paper. - initial_kernel_size: The first Conv2D kernel might vary in size based on the - application, this parameter makes it configurable. - - # Returns - a keras tensor - - # Raises - ValueError: in case of invalid argument for `reduction`, - `nb_dense_block` or `nb_upsampling_conv`. - ''' - with K.name_scope('DenseNetFCN'): - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - if concat_axis == 1: # channels_first dim ordering - _, rows, cols = input_shape - else: - rows, cols, _ = input_shape - - if reduction != 0.0: - if not (reduction <= 1.0 and reduction > 0.0): - raise ValueError( - '`reduction` value must lie between 0.0 and 1.0') - - # check if upsampling_conv has minimum number of filters minimum - # is set to 12, as at least 3 color channels are needed for correct upsampling - if not (nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0): - raise ValueError('Parameter `nb_upsampling_conv` number of channels must ' - 'be a positive number divisible by 4 and greater than 12') - - # layers in each dense block - if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: - nb_layers = list(nb_layers_per_block) # Convert tuple to list - - if len(nb_layers) != (nb_dense_block + 1): - raise ValueError('If `nb_dense_block` is a list, its length must be ' - '(`nb_dense_block` + 1)') - - bottleneck_nb_layers = nb_layers[-1] - rev_layers = nb_layers[::-1] - nb_layers.extend(rev_layers[1:]) - else: - bottleneck_nb_layers = nb_layers_per_block - nb_layers = [nb_layers_per_block] * (2 * nb_dense_block + 1) - - # compute compression factor - compression = 1.0 - reduction - - # Initial convolution - x = Conv2D(init_conv_filters, initial_kernel_size, - kernel_initializer='he_normal', padding='same', - name='initial_conv2D', use_bias=False, - kernel_regularizer=l2(weight_decay))(img_input) - x = BatchNormalization( - axis=concat_axis, epsilon=1.1e-5, name='initial_bn')(x) - x = Activation('relu')(x) - - nb_filter = init_conv_filters - - skip_list = [] - - if early_transition: - x = __transition_block(x, nb_filter, compression=compression, - weight_decay=weight_decay, block_prefix='tr_early', - transition_pooling=transition_pooling) - - # Add dense blocks and transition down block - for block_idx in range(nb_dense_block): - x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, - growth_rate, dropout_rate=dropout_rate, - weight_decay=weight_decay, - block_prefix='dense_%i' % block_idx) - - # Skip connection - skip_list.append(x) - - # add transition_block - x = __transition_block(x, nb_filter, compression=compression, - weight_decay=weight_decay, - block_prefix='tr_%i' % block_idx, - transition_pooling=transition_pooling) - - # this is calculated inside transition_down_block - nb_filter = int(nb_filter * compression) - - # The last dense_block does not have a transition_down_block - # return the concatenated feature maps without the concatenation of the input - block_prefix = 'dense_%i' % nb_dense_block - _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, - growth_rate, - dropout_rate=dropout_rate, - weight_decay=weight_decay, - return_concat_list=True, - block_prefix=block_prefix) - - skip_list = skip_list[::-1] # reverse the skip list - - # Add dense blocks and transition up block - for block_idx in range(nb_dense_block): - n_filters_keep = growth_rate * \ - nb_layers[nb_dense_block + block_idx] - - # upsampling block must upsample only the feature maps (concat_list[1:]), - # not the concatenation of the input with the feature maps (concat_list[0]. - l = concatenate(concat_list[1:], axis=concat_axis) - - t = __transition_up_block(l, nb_filters=n_filters_keep, - type=upsampling_type, weight_decay=weight_decay, - block_prefix='tr_up_%i' % block_idx) - - # concatenate the skip connection with the transition block - x = concatenate([t, skip_list[block_idx]], axis=concat_axis) - - # Dont allow the feature map size to grow in upsampling dense blocks - block_layer_index = nb_dense_block + 1 + block_idx - block_prefix = 'dense_%i' % (block_layer_index) - x_up, nb_filter, concat_list = __dense_block(x, - nb_layers[block_layer_index], - nb_filter=growth_rate, - growth_rate=growth_rate, - dropout_rate=dropout_rate, - weight_decay=weight_decay, - return_concat_list=True, - grow_nb_filters=False, - block_prefix=block_prefix) - - if early_transition: - x_up = __transition_up_block(x_up, nb_filters=nb_filter, - type=upsampling_type, - weight_decay=weight_decay, - block_prefix='tr_up_early') - if include_top: - x = Conv2D(nb_classes, (1, 1), activation='linear', padding='same', - use_bias=False)(x_up) - - if K.image_data_format() == 'channels_first': - channel, row, col = input_shape - else: - row, col, channel = input_shape - - x = Reshape((row * col, nb_classes))(x) - x = Activation(activation)(x) - x = Reshape((row, col, nb_classes))(x) - else: - x = x_up - - return x -"""Collection of NASNet models - -The reference paper: - - [Learning Transferable Architectures for Scalable Image Recognition] - (https://arxiv.org/abs/1707.07012) - -The reference implementation: -1. TF Slim - - https://github.com/tensorflow/models/blob/master/research/slim/nets/ - nasnet/nasnet.py -2. TensorNets - - https://github.com/taehoonlee/tensornets/blob/master/tensornets/nasnets.py -3. Weights - - https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import warnings - -from keras.models import Model -from keras.layers import Input -from keras.layers import Activation -from keras.layers import Dense -from keras.layers import Flatten -from keras.layers import Dropout -from keras.layers import BatchNormalization -from keras.layers import MaxPooling2D -from keras.layers import AveragePooling2D -from keras.layers import GlobalAveragePooling2D -from keras.layers import GlobalMaxPooling2D -from keras.layers import Conv2D -from keras.layers import SeparableConv2D -from keras.layers import ZeroPadding2D -from keras.layers import Cropping2D -from keras.layers import concatenate -from keras.layers import add -from keras.regularizers import l2 -from keras.utils.data_utils import get_file -from keras.engine.topology import get_source_inputs -from keras_applications.imagenet_utils import _obtain_input_shape -from keras import backend as K - -_BN_DECAY = 0.9997 -_BN_EPSILON = 1e-3 - -NASNET_MOBILE_WEIGHT_PATH = ( - "https://github.com/titu1994/Keras-NASNet/" - "releases/download/v1.0/NASNet-mobile.h5") -NASNET_MOBILE_WEIGHT_PATH_NO_TOP = ( - "https://github.com/titu1994/Keras-NASNet/" - "releases/download/v1.0/NASNet-mobile-no-top.h5") -NASNET_MOBILE_WEIGHT_PATH_WITH_AUXULARY = ( - "https://github.com/titu1994/Keras-NASNet/" - "releases/download/v1.0/NASNet-auxiliary-mobile.h5") -NASNET_MOBILE_WEIGHT_PATH_WITH_AUXULARY_NO_TOP = ( - "https://github.com/titu1994/Keras-NASNet/" - "releases/download/v1.0/NASNet-auxiliary-mobile-no-top.h5") -NASNET_LARGE_WEIGHT_PATH = ( - "https://github.com/titu1994/Keras-NASNet/releases/download/v1.1/NASNet-large.h5") -NASNET_LARGE_WEIGHT_PATH_NO_TOP = ( - "https://github.com/titu1994/Keras-NASNet/" - "releases/download/v1.1/NASNet-large-no-top.h5") -NASNET_LARGE_WEIGHT_PATH_WITH_auxiliary = ( - "https://github.com/titu1994/Keras-NASNet/" - "releases/download/v1.1/NASNet-auxiliary-large.h5") -NASNET_LARGE_WEIGHT_PATH_WITH_auxiliary_NO_TOP = ( - "https://github.com/titu1994/Keras-NASNet/" - "releases/download/v1.1/NASNet-auxiliary-large-no-top.h5") - - -def NASNet(input_shape=None, - penultimate_filters=4032, - nb_blocks=6, - stem_filters=96, - initial_reduction=True, - skip_reduction_layer_input=True, - use_auxiliary_branch=False, - filters_multiplier=2, - dropout=0.5, - weight_decay=5e-5, - include_top=True, - weights=None, - input_tensor=None, - pooling=None, - classes=1000, - default_size=None, - activation='softmax'): - """Instantiates a NASNet architecture. - Note that only TensorFlow is supported for now, - therefore it only works with the data format - `image_data_format='channels_last'` in your Keras config - at `~/.keras/keras.json`. - - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(331, 331, 3)` for NASNetLarge or - `(224, 224, 3)` for NASNetMobile - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - penultimate_filters: number of filters in the penultimate layer. - NASNet models use the notation `NASNet (N @ P)`, where: - - N is the number of blocks - - P is the number of penultimate filters - nb_blocks: number of repeated blocks of the NASNet model. - NASNet models use the notation `NASNet (N @ P)`, where: - - N is the number of blocks - - P is the number of penultimate filters - stem_filters: number of filters in the initial stem block - initial_reduction: Whether to perform the reduction step at the beginning - end of the network. Set to `True` for CIFAR models. - skip_reduction_layer_input: Determines whether to skip the reduction layers - when calculating the previous layer to connect to. - use_auxiliary_branch: Whether to use the auxiliary branch during - training or evaluation. - filters_multiplier: controls the width of the network. - - If `filters_multiplier` < 1.0, proportionally decreases the number - of filters in each layer. - - If `filters_multiplier` > 1.0, proportionally increases the number - of filters in each layer. - - If `filters_multiplier` = 1, default number of filters from the paper - are used at each layer. - dropout: dropout rate - weight_decay: l2 regularization weight - include_top: whether to include the fully-connected - layer at the top of the network. - weights: `None` (random initialization) or - `imagenet` (ImageNet weights) - input_tensor: optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - default_size: specifies the default image size of the model - activation: Type of activation at the top layer. - Can be one of 'softmax' or 'sigmoid'. - # Returns - A Keras model instance. - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - """ - if K.backend() != 'tensorflow': - raise RuntimeError('Only Tensorflow backend is currently supported, ' - 'as other backends do not support ' - 'separable convolution.') - - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as ImageNet with `include_top` ' - 'as true, `classes` should be 1000') - - if default_size is None: - default_size = 331 - - # Determine proper input shape and default size. - input_shape = _obtain_input_shape(input_shape, - default_size=default_size, - min_size=32, - data_format=K.image_data_format(), - require_flatten=include_top or weights) - - if K.image_data_format() != 'channels_last': - warnings.warn('The NASNet family of models is only available ' - 'for the input data format "channels_last" ' - '(width, height, channels). ' - 'However your settings specify the default ' - 'data format "channels_first" (channels, width, height).' - ' You should set `image_data_format="channels_last"` ' - 'in your Keras config located at ~/.keras/keras.json. ' - 'The model being returned right now will expect inputs ' - 'to follow the "channels_last" data format.') - K.set_image_data_format('channels_last') - old_data_format = 'channels_first' - else: - old_data_format = None - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - assert penultimate_filters % 24 == 0, "`penultimate_filters` needs to be " \ - "divisible by 24." - - channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 - filters = penultimate_filters // 24 - - if initial_reduction: - x = Conv2D(stem_filters, (3, 3), strides=(2, 2), padding='valid', - use_bias=False, name='stem_conv1', kernel_initializer='he_normal', - kernel_regularizer=l2(weight_decay))(img_input) - else: - x = Conv2D(stem_filters, (3, 3), strides=(1, 1), padding='same', use_bias=False, - name='stem_conv1', kernel_initializer='he_normal', - kernel_regularizer=l2(weight_decay))(img_input) - - x = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, epsilon=_BN_EPSILON, - name='stem_bn1')(x) - - p = None - if initial_reduction: # imagenet / mobile mode - x, p = _reduction_A(x, p, filters // (filters_multiplier ** 2), weight_decay, - id='stem_1') - x, p = _reduction_A(x, p, filters // filters_multiplier, weight_decay, - id='stem_2') - - for i in range(nb_blocks): - x, p = _normal_A(x, p, filters, weight_decay, id='%d' % i) - - x, p0 = _reduction_A(x, p, filters * filters_multiplier, weight_decay, - id='reduce_%d' % nb_blocks) - - p = p0 if not skip_reduction_layer_input else p - - for i in range(nb_blocks): - x, p = _normal_A(x, p, filters * filters_multiplier, weight_decay, - id='%d' % (nb_blocks + i + 1)) - - auxiliary_x = None - if not initial_reduction: # imagenet / mobile mode - if use_auxiliary_branch: - auxiliary_x = _add_auxiliary_head(x, classes, weight_decay, pooling, - include_top, activation) - - x, p0 = _reduction_A(x, p, filters * filters_multiplier ** 2, weight_decay, - id='reduce_%d' % (2 * nb_blocks)) - - if initial_reduction: # CIFAR mode - if use_auxiliary_branch: - auxiliary_x = _add_auxiliary_head(x, classes, weight_decay, pooling, - include_top, activation) - - p = p0 if not skip_reduction_layer_input else p - - for i in range(nb_blocks): - x, p = _normal_A(x, p, filters * filters_multiplier ** 2, weight_decay, - id='%d' % (2 * nb_blocks + i + 1)) - - x = Activation('relu')(x) - - if include_top: - x = GlobalAveragePooling2D()(x) - x = Dropout(dropout)(x) - x = Dense(classes, activation=activation, - kernel_regularizer=l2(weight_decay), name='predictions')(x) - else: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - if use_auxiliary_branch: - model = Model(inputs, [x, auxiliary_x], name='NASNet_with_auxiliary') - else: - model = Model(inputs, x, name='NASNet') - - # load weights - if weights == 'imagenet': - if default_size == 224: # mobile version - if include_top: - if use_auxiliary_branch: - weight_path = NASNET_MOBILE_WEIGHT_PATH_WITH_AUXULARY - model_name = 'nasnet_mobile_with_aux.h5' - else: - weight_path = NASNET_MOBILE_WEIGHT_PATH - model_name = 'nasnet_mobile.h5' - else: - if use_auxiliary_branch: - weight_path = NASNET_MOBILE_WEIGHT_PATH_WITH_AUXULARY_NO_TOP - model_name = 'nasnet_mobile_with_aux_no_top.h5' - else: - weight_path = NASNET_MOBILE_WEIGHT_PATH_NO_TOP - model_name = 'nasnet_mobile_no_top.h5' - - weights_file = get_file( - model_name, weight_path, cache_subdir='models') - model.load_weights(weights_file, by_name=True) - - elif default_size == 331: # large version - if include_top: - if use_auxiliary_branch: - weight_path = NASNET_LARGE_WEIGHT_PATH_WITH_auxiliary - model_name = 'nasnet_large_with_aux.h5' - else: - weight_path = NASNET_LARGE_WEIGHT_PATH - model_name = 'nasnet_large.h5' - else: - if use_auxiliary_branch: - weight_path = NASNET_LARGE_WEIGHT_PATH_WITH_auxiliary_NO_TOP - model_name = 'nasnet_large_with_aux_no_top.h5' - else: - weight_path = NASNET_LARGE_WEIGHT_PATH_NO_TOP - model_name = 'nasnet_large_no_top.h5' - - weights_file = get_file( - model_name, weight_path, cache_subdir='models') - model.load_weights(weights_file, by_name=True) - - else: - raise ValueError('ImageNet weights can only be loaded on NASNetLarge ' - 'or NASNetMobile') - - if old_data_format: - K.set_image_data_format(old_data_format) - - return model - - -def NASNetLarge(input_shape=(331, 331, 3), - dropout=0.5, - weight_decay=5e-5, - use_auxiliary_branch=False, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - activation='softmax'): - """Instantiates a NASNet architecture in ImageNet mode. - Note that only TensorFlow is supported for now, - therefore it only works with the data format - `image_data_format='channels_last'` in your Keras config - at `~/.keras/keras.json`. - - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(331, 331, 3)` for NASNetLarge. - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - use_auxiliary_branch: Whether to use the auxiliary branch during - training or evaluation. - dropout: dropout rate - weight_decay: l2 regularization weight - include_top: whether to include the fully-connected - layer at the top of the network. - weights: `None` (random initialization) or - `imagenet` (ImageNet weights) - input_tensor: optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - default_size: specifies the default image size of the model - activation: Type of activation at the top layer. - Can be one of 'softmax' or 'sigmoid'. - # Returns - A Keras model instance. - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - """ - global _BN_DECAY, _BN_EPSILON - _BN_DECAY = 0.9997 - _BN_EPSILON = 1e-3 - - return NASNet(input_shape, - penultimate_filters=4032, - nb_blocks=6, - stem_filters=96, - initial_reduction=True, - skip_reduction_layer_input=True, - use_auxiliary_branch=use_auxiliary_branch, - filters_multiplier=2, - dropout=dropout, - weight_decay=weight_decay, - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - pooling=pooling, - classes=classes, - default_size=331, - activation=activation) - - -def NASNetMobile(input_shape=(224, 224, 3), - dropout=0.5, - weight_decay=4e-5, - use_auxiliary_branch=False, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - activation='softmax'): - """Instantiates a NASNet architecture in Mobile ImageNet mode. - Note that only TensorFlow is supported for now, - therefore it only works with the data format - `image_data_format='channels_last'` in your Keras config - at `~/.keras/keras.json`. - - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` for NASNetMobile - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - use_auxiliary_branch: Whether to use the auxiliary branch during - training or evaluation. - dropout: dropout rate - weight_decay: l2 regularization weight - include_top: whether to include the fully-connected - layer at the top of the network. - weights: `None` (random initialization) or - `imagenet` (ImageNet weights) - input_tensor: optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - default_size: specifies the default image size of the model - activation: Type of activation at the top layer. - Can be one of 'softmax' or 'sigmoid'. - # Returns - A Keras model instance. - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - """ - global _BN_DECAY, _BN_EPSILON - _BN_DECAY = 0.9997 - _BN_EPSILON = 1e-3 - - return NASNet(input_shape, - penultimate_filters=1056, - nb_blocks=4, - stem_filters=32, - initial_reduction=True, - skip_reduction_layer_input=False, - use_auxiliary_branch=use_auxiliary_branch, - filters_multiplier=2, - dropout=dropout, - weight_decay=weight_decay, - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - pooling=pooling, - classes=classes, - default_size=224) - - -def NASNetCIFAR(input_shape=(32, 32, 3), - dropout=0.0, - weight_decay=5e-4, - use_auxiliary_branch=False, - include_top=True, - weights=None, - input_tensor=None, - pooling=None, - classes=10, - activation='softmax'): - """Instantiates a NASNet architecture in CIFAR mode. - Note that only TensorFlow is supported for now, - therefore it only works with the data format - `image_data_format='channels_last'` in your Keras config - at `~/.keras/keras.json`. - - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(32, 32, 3)` for NASNetMobile - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(32, 32, 3)` would be one valid value. - use_auxiliary_branch: Whether to use the auxiliary branch during - training or evaluation. - dropout: dropout rate - weight_decay: l2 regularization weight - include_top: whether to include the fully-connected - layer at the top of the network. - weights: `None` (random initialization) or - `imagenet` (ImageNet weights) - input_tensor: optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - default_size: specifies the default image size of the model - activation: Type of activation at the top layer. - Can be one of 'softmax' or 'sigmoid'. - # Returns - A Keras model instance. - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - """ - global _BN_DECAY, _BN_EPSILON - _BN_DECAY = 0.9 - _BN_EPSILON = 1e-5 - - return NASNet(input_shape, - penultimate_filters=768, - nb_blocks=6, - stem_filters=32, - initial_reduction=False, - skip_reduction_layer_input=False, - use_auxiliary_branch=use_auxiliary_branch, - filters_multiplier=2, - dropout=dropout, - weight_decay=weight_decay, - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - pooling=pooling, - classes=classes, - default_size=224, - activation=activation) - - -def _separable_conv_block(ip, filters, kernel_size=(3, 3), strides=(1, 1), - weight_decay=5e-5, id=None): - '''Adds 2 blocks of [relu-separable conv-batchnorm] - - # Arguments: - ip: input tensor - filters: number of output filters per layer - kernel_size: kernel size of separable convolutions - strides: strided convolution for downsampling - weight_decay: l2 regularization weight - id: string id - - # Returns: - a Keras tensor - ''' - channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 - - with K.name_scope('separable_conv_block_%s' % id): - x = Activation('relu')(ip) - x = SeparableConv2D(filters, kernel_size, strides=strides, - name='separable_conv_1_%s' % id, padding='same', - use_bias=False, kernel_initializer='he_normal', - kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, - epsilon=_BN_EPSILON, - name="separable_conv_1_bn_%s" % id)(x) - x = Activation('relu')(x) - x = SeparableConv2D(filters, kernel_size, name='separable_conv_2_%s' % id, - padding='same', use_bias=False, - kernel_initializer='he_normal', - kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, - epsilon=_BN_EPSILON, - name="separable_conv_2_bn_%s" % id)(x) - return x - - -def _adjust_block(p, ip, filters, weight_decay=5e-5, id=None): - ''' - Adjusts the input `p` to match the shape of the `input` - or situations where the output number of filters needs to - be changed - - # Arguments: - p: input tensor which needs to be modified - ip: input tensor whose shape needs to be matched - filters: number of output filters to be matched - weight_decay: l2 regularization weight - id: string id - - # Returns: - an adjusted Keras tensor - ''' - channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 - img_dim = 2 if K.image_data_format() == 'channels_first' else -2 - - with K.name_scope('adjust_block'): - if p is None: - p = ip - - elif p._keras_shape[img_dim] != ip._keras_shape[img_dim]: - with K.name_scope('adjust_reduction_block_%s' % id): - p = Activation('relu', name='adjust_relu_1_%s' % id)(p) - - p1 = AveragePooling2D((1, 1), strides=(2, 2), padding='valid', - name='adjust_avg_pool_1_%s' % id)(p) - p1 = Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay), - name='adjust_conv_1_%s' % id, - kernel_initializer='he_normal')(p1) - - p2 = ZeroPadding2D(padding=((0, 1), (0, 1)))(p) - p2 = Cropping2D(cropping=((1, 0), (1, 0)))(p2) - p2 = AveragePooling2D((1, 1), strides=(2, 2), padding='valid', - name='adjust_avg_pool_2_%s' % id)(p2) - p2 = Conv2D(filters // 2, (1, 1), padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay), - name='adjust_conv_2_%s' % id, - kernel_initializer='he_normal')(p2) - - p = concatenate([p1, p2], axis=channel_dim) - p = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, - epsilon=_BN_EPSILON, - name='adjust_bn_%s' % id)(p) - - elif p._keras_shape[channel_dim] != filters: - with K.name_scope('adjust_projection_block_%s' % id): - p = Activation('relu')(p) - p = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', - name='adjust_conv_projection_%s' % id, use_bias=False, - kernel_regularizer=l2(weight_decay), - kernel_initializer='he_normal')(p) - p = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, - epsilon=_BN_EPSILON, - name='adjust_bn_%s' % id)(p) - return p - - -def _normal_A(ip, p, filters, weight_decay=5e-5, id=None): - '''Adds a Normal cell for NASNet-A (Fig. 4 in the paper) - - # Arguments: - ip: input tensor `x` - p: input tensor `p` - filters: number of output filters - weight_decay: l2 regularization weight - id: string id - - # Returns: - a Keras tensor - ''' - channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 - - with K.name_scope('normal_A_block_%s' % id): - p = _adjust_block(p, ip, filters, weight_decay, id) - - h = Activation('relu')(ip) - h = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', - name='normal_conv_1_%s' % id, use_bias=False, - kernel_initializer='he_normal', - kernel_regularizer=l2(weight_decay))(h) - h = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, - epsilon=_BN_EPSILON, name='normal_bn_1_%s' % id)(h) - - with K.name_scope('block_1'): - x1_1 = _separable_conv_block(h, filters, kernel_size=(5, 5), - weight_decay=weight_decay, - id='normal_left1_%s' % id) - x1_2 = _separable_conv_block(p, filters, weight_decay=weight_decay, - id='normal_right1_%s' % id) - x1 = add([x1_1, x1_2], name='normal_add_1_%s' % id) - - with K.name_scope('block_2'): - x2_1 = _separable_conv_block(p, filters, (5, 5), weight_decay=weight_decay, - id='normal_left2_%s' % id) - x2_2 = _separable_conv_block(p, filters, (3, 3), weight_decay=weight_decay, - id='normal_right2_%s' % id) - x2 = add([x2_1, x2_2], name='normal_add_2_%s' % id) - - with K.name_scope('block_3'): - x3 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', - name='normal_left3_%s' % id)(h) - x3 = add([x3, p], name='normal_add_3_%s' % id) - - with K.name_scope('block_4'): - x4_1 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', - name='normal_left4_%s' % id)(p) - x4_2 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', - name='normal_right4_%s' % id)(p) - x4 = add([x4_1, x4_2], name='normal_add_4_%s' % id) - - with K.name_scope('block_5'): - x5 = _separable_conv_block(h, filters, weight_decay=weight_decay, - id='normal_left5_%s' % id) - x5 = add([x5, h], name='normal_add_5_%s' % id) - - x = concatenate([p, x1, x2, x3, x4, x5], axis=channel_dim, - name='normal_concat_%s' % id) - return x, ip - - -def _reduction_A(ip, p, filters, weight_decay=5e-5, id=None): - '''Adds a Reduction cell for NASNet-A (Fig. 4 in the paper) - - # Arguments: - ip: input tensor `x` - p: input tensor `p` - filters: number of output filters - weight_decay: l2 regularization weight - id: string id - - # Returns: - a Keras tensor - ''' - """""" - channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 - - with K.name_scope('reduction_A_block_%s' % id): - p = _adjust_block(p, ip, filters, weight_decay, id) - - h = Activation('relu')(ip) - h = Conv2D(filters, (1, 1), strides=(1, 1), padding='same', - name='reduction_conv_1_%s' % id, use_bias=False, - kernel_initializer='he_normal', - kernel_regularizer=l2(weight_decay))(h) - h = BatchNormalization(axis=channel_dim, momentum=_BN_DECAY, - epsilon=_BN_EPSILON, - name='reduction_bn_1_%s' % id)(h) - - with K.name_scope('block_1'): - x1_1 = _separable_conv_block(h, filters, (5, 5), strides=(2, 2), - weight_decay=weight_decay, - id='reduction_left1_%s' % id) - x1_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), - weight_decay=weight_decay, - id='reduction_1_%s' % id) - x1 = add([x1_1, x1_2], name='reduction_add_1_%s' % id) - - with K.name_scope('block_2'): - x2_1 = MaxPooling2D((3, 3), strides=(2, 2), padding='same', - name='reduction_left2_%s' % id)(h) - x2_2 = _separable_conv_block(p, filters, (7, 7), strides=(2, 2), - weight_decay=weight_decay, - id='reduction_right2_%s' % id) - x2 = add([x2_1, x2_2], name='reduction_add_2_%s' % id) - - with K.name_scope('block_3'): - x3_1 = AveragePooling2D((3, 3), strides=(2, 2), padding='same', - name='reduction_left3_%s' % id)(h) - x3_2 = _separable_conv_block(p, filters, (5, 5), strides=(2, 2), - weight_decay=weight_decay, - id='reduction_right3_%s' % id) - x3 = add([x3_1, x3_2], name='reduction_add3_%s' % id) - - with K.name_scope('block_4'): - x4 = AveragePooling2D((3, 3), strides=(1, 1), padding='same', - name='reduction_left4_%s' % id)(x1) - x4 = add([x2, x4]) - - with K.name_scope('block_5'): - x5_1 = _separable_conv_block(x1, filters, (3, 3), - weight_decay=weight_decay, - id='reduction_left4_%s' % id) - x5_2 = MaxPooling2D((3, 3), strides=(2, 2), padding='same', - name='reduction_right5_%s' % id)(h) - x5 = add([x5_1, x5_2], name='reduction_add4_%s' % id) - - x = concatenate([x2, x3, x4, x5], axis=channel_dim, - name='reduction_concat_%s' % id) - return x, ip - - -def _add_auxiliary_head(x, classes, weight_decay, pooling, include_top, activation): - '''Adds an auxiliary head for training the model - - From section A.7 "Training of ImageNet models" of the paper, all NASNet models are - trained using an auxiliary classifier around 2/3 of the depth of the network, with - a loss weight of 0.4 - - # Arguments - x: input tensor - classes: number of output classes - weight_decay: l2 regularization weight - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - include_top: whether to include the fully-connected - layer at the top of the network. - activation: Type of activation at the top layer. - Can be one of 'softmax' or 'sigmoid'. - - # Returns - a keras Tensor - ''' - img_height = 1 if K.image_data_format() == 'channels_last' else 2 - img_width = 2 if K.image_data_format() == 'channels_last' else 3 - channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - with K.name_scope('auxiliary_branch'): - auxiliary_x = Activation('relu')(x) - auxiliary_x = AveragePooling2D((5, 5), strides=(3, 3), padding='valid', - name='aux_pool')(auxiliary_x) - auxiliary_x = Conv2D(128, (1, 1), padding='same', use_bias=False, - name='aux_conv_projection', kernel_initializer='he_normal', - kernel_regularizer=l2(weight_decay))(auxiliary_x) - auxiliary_x = BatchNormalization(axis=channel_axis, momentum=_BN_DECAY, - epsilon=_BN_EPSILON, - name='aux_bn_projection')(auxiliary_x) - auxiliary_x = Activation('relu')(auxiliary_x) - - auxiliary_x = Conv2D(768, (auxiliary_x._keras_shape[img_height], - auxiliary_x._keras_shape[img_width]), - padding='valid', use_bias=False, - kernel_initializer='he_normal', - kernel_regularizer=l2(weight_decay), - name='aux_conv_reduction')(auxiliary_x) - auxiliary_x = BatchNormalization(axis=channel_axis, momentum=_BN_DECAY, - epsilon=_BN_EPSILON, - name='aux_bn_reduction')(auxiliary_x) - auxiliary_x = Activation('relu')(auxiliary_x) - - if include_top: - auxiliary_x = Flatten()(auxiliary_x) - auxiliary_x = Dense(classes, activation=activation, - kernel_regularizer=l2(weight_decay), - name='aux_predictions')(auxiliary_x) - else: - if pooling == 'avg': - auxiliary_x = GlobalAveragePooling2D()(auxiliary_x) - elif pooling == 'max': - auxiliary_x = GlobalMaxPooling2D()(auxiliary_x) - - return auxiliary_x -"""ResNet v1, v2, and segmentation models for Keras. - -# Reference - -- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) -- [Identity Mappings in Deep Residual Networks](https://arxiv.org/abs/1603.05027) - -Reference material for extended functionality: - -- [ResNeXt](https://arxiv.org/abs/1611.05431) for Tiny ImageNet support. -- [Dilated Residual Networks](https://arxiv.org/pdf/1705.09914) for segmentation support -- [Deep Residual Learning for Instrument Segmentation in - Robotic Surgery](https://arxiv.org/abs/1703.08580) - for segmentation support. - -Implementation Adapted from: github.com/raghakot/keras-resnet -""" # pylint: disable=E501 -from __future__ import division - -import six -from keras.models import Model -from keras.layers import Input -from keras.layers import Activation -from keras.layers import Reshape -from keras.layers import Dense -from keras.layers import Conv2D -from keras.layers import MaxPooling2D -from keras.layers import GlobalMaxPooling2D -from keras.layers import GlobalAveragePooling2D -from keras.layers import Dropout -from keras.layers.merge import add -from keras.layers.normalization import BatchNormalization -from keras.regularizers import l2 -from keras import backend as K -from keras_applications.imagenet_utils import _obtain_input_shape - - -def _bn_relu(x, bn_name=None, relu_name=None): - """Helper to build a BN -> relu block - """ - norm = BatchNormalization(axis=CHANNEL_AXIS, name=bn_name)(x) - return Activation("relu", name=relu_name)(norm) - - -def _conv_bn_relu(**conv_params): - """Helper to build a conv -> BN -> relu residual unit activation function. - This is the original ResNet v1 scheme in https://arxiv.org/abs/1512.03385 - """ - filters = conv_params["filters"] - kernel_size = conv_params["kernel_size"] - strides = conv_params.setdefault("strides", (1, 1)) - dilation_rate = conv_params.setdefault("dilation_rate", (1, 1)) - conv_name = conv_params.setdefault("conv_name", None) - bn_name = conv_params.setdefault("bn_name", None) - relu_name = conv_params.setdefault("relu_name", None) - kernel_initializer = conv_params.setdefault( - "kernel_initializer", "he_normal") - padding = conv_params.setdefault("padding", "same") - kernel_regularizer = conv_params.setdefault( - "kernel_regularizer", l2(1.e-4)) - - def f(x): - x = Conv2D(filters=filters, kernel_size=kernel_size, - strides=strides, padding=padding, - dilation_rate=dilation_rate, - kernel_initializer=kernel_initializer, - kernel_regularizer=kernel_regularizer, - name=conv_name)(x) - return _bn_relu(x, bn_name=bn_name, relu_name=relu_name) - - return f - - -def _bn_relu_conv(**conv_params): - """Helper to build a BN -> relu -> conv residual unit with full pre-activation - function. This is the ResNet v2 scheme proposed in - http://arxiv.org/pdf/1603.05027v2.pdf - """ - filters = conv_params["filters"] - kernel_size = conv_params["kernel_size"] - strides = conv_params.setdefault("strides", (1, 1)) - dilation_rate = conv_params.setdefault("dilation_rate", (1, 1)) - conv_name = conv_params.setdefault("conv_name", None) - bn_name = conv_params.setdefault("bn_name", None) - relu_name = conv_params.setdefault("relu_name", None) - kernel_initializer = conv_params.setdefault( - "kernel_initializer", "he_normal") - padding = conv_params.setdefault("padding", "same") - kernel_regularizer = conv_params.setdefault( - "kernel_regularizer", l2(1.e-4)) - - def f(x): - activation = _bn_relu(x, bn_name=bn_name, relu_name=relu_name) - return Conv2D(filters=filters, kernel_size=kernel_size, - strides=strides, padding=padding, - dilation_rate=dilation_rate, - kernel_initializer=kernel_initializer, - kernel_regularizer=kernel_regularizer, - name=conv_name)(activation) - - return f - - -def _shortcut(input_feature, residual, conv_name_base=None, bn_name_base=None): - """Adds a shortcut between input and residual block and merges them with "sum" - """ - # Expand channels of shortcut to match residual. - # Stride appropriately to match residual (width, height) - # Should be int if network architecture is correctly configured. - input_shape = K.int_shape(input_feature) - residual_shape = K.int_shape(residual) - stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS])) - stride_height = int( - round(input_shape[COL_AXIS] / residual_shape[COL_AXIS])) - equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS] - - shortcut = input_feature - # 1 X 1 conv if shape is different. Else identity. - if stride_width > 1 or stride_height > 1 or not equal_channels: - print('reshaping via a convolution...') - if conv_name_base is not None: - conv_name_base = conv_name_base + '1' - shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS], - kernel_size=(1, 1), - strides=(stride_width, stride_height), - padding="valid", - kernel_initializer="he_normal", - kernel_regularizer=l2(0.0001), - name=conv_name_base)(input_feature) - if bn_name_base is not None: - bn_name_base = bn_name_base + '1' - shortcut = BatchNormalization(axis=CHANNEL_AXIS, - name=bn_name_base)(shortcut) - - return add([shortcut, residual]) - - -def _residual_block(block_function, filters, blocks, stage, - transition_strides=None, transition_dilation_rates=None, - dilation_rates=None, is_first_layer=False, dropout=None, - residual_unit=_bn_relu_conv): - """Builds a residual block with repeating bottleneck blocks. - - stage: integer, current stage label, used for generating layer names - blocks: number of blocks 'a','b'..., current block label, used for generating - layer names - transition_strides: a list of tuples for the strides of each transition - transition_dilation_rates: a list of tuples for the dilation rate of each - transition - """ - if transition_dilation_rates is None: - transition_dilation_rates = [(1, 1)] * blocks - if transition_strides is None: - transition_strides = [(1, 1)] * blocks - if dilation_rates is None: - dilation_rates = [1] * blocks - - def f(x): - for i in range(blocks): - is_first_block = is_first_layer and i == 0 - x = block_function(filters=filters, stage=stage, block=i, - transition_strides=transition_strides[i], - dilation_rate=dilation_rates[i], - is_first_block_of_first_layer=is_first_block, - dropout=dropout, - residual_unit=residual_unit)(x) - return x - - return f - - -def _block_name_base(stage, block): - """Get the convolution name base and batch normalization name base defined by - stage and block. - - If there are less than 26 blocks they will be labeled 'a', 'b', 'c' to match the - paper and keras and beyond 26 blocks they will simply be numbered. - """ - if block < 27: - block = '%c' % (block + 97) # 97 is the ascii number for lowercase 'a' - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - return conv_name_base, bn_name_base - - -def basic_block(filters, stage, block, transition_strides=(1, 1), - dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None, - residual_unit=_bn_relu_conv): - """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34. - Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf - """ - def f(input_features): - conv_name_base, bn_name_base = _block_name_base(stage, block) - if is_first_block_of_first_layer: - # don't repeat bn->relu since we just did bn->relu->maxpool - x = Conv2D(filters=filters, kernel_size=(3, 3), - strides=transition_strides, - dilation_rate=dilation_rate, - padding="same", - kernel_initializer="he_normal", - kernel_regularizer=l2(1e-4), - name=conv_name_base + '2a')(input_features) - else: - x = residual_unit(filters=filters, kernel_size=(3, 3), - strides=transition_strides, - dilation_rate=dilation_rate, - conv_name_base=conv_name_base + '2a', - bn_name_base=bn_name_base + '2a')(input_features) - - if dropout is not None: - x = Dropout(dropout)(x) - - x = residual_unit(filters=filters, kernel_size=(3, 3), - conv_name_base=conv_name_base + '2b', - bn_name_base=bn_name_base + '2b')(x) - - return _shortcut(input_features, x) - - return f - - -def bottleneck(filters, stage, block, transition_strides=(1, 1), - dilation_rate=(1, 1), is_first_block_of_first_layer=False, dropout=None, - residual_unit=_bn_relu_conv): - """Bottleneck architecture for > 34 layer resnet. - Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf - - Returns: - A final conv layer of filters * 4 - """ - def f(input_feature): - conv_name_base, bn_name_base = _block_name_base(stage, block) - if is_first_block_of_first_layer: - # don't repeat bn->relu since we just did bn->relu->maxpool - x = Conv2D(filters=filters, kernel_size=(1, 1), - strides=transition_strides, - dilation_rate=dilation_rate, - padding="same", - kernel_initializer="he_normal", - kernel_regularizer=l2(1e-4), - name=conv_name_base + '2a')(input_feature) - else: - x = residual_unit(filters=filters, kernel_size=(1, 1), - strides=transition_strides, - dilation_rate=dilation_rate, - conv_name_base=conv_name_base + '2a', - bn_name_base=bn_name_base + '2a')(input_feature) - - if dropout is not None: - x = Dropout(dropout)(x) - - x = residual_unit(filters=filters, kernel_size=(3, 3), - conv_name_base=conv_name_base + '2b', - bn_name_base=bn_name_base + '2b')(x) - - if dropout is not None: - x = Dropout(dropout)(x) - - x = residual_unit(filters=filters * 4, kernel_size=(1, 1), - conv_name_base=conv_name_base + '2c', - bn_name_base=bn_name_base + '2c')(x) - - return _shortcut(input_feature, x) - - return f - - -def _handle_dim_ordering(): - global ROW_AXIS - global COL_AXIS - global CHANNEL_AXIS - if K.image_data_format() == 'channels_last': - ROW_AXIS = 1 - COL_AXIS = 2 - CHANNEL_AXIS = 3 - else: - CHANNEL_AXIS = 1 - ROW_AXIS = 2 - COL_AXIS = 3 - - -def _string_to_function(identifier): - if isinstance(identifier, six.string_types): - res = globals().get(identifier) - if not res: - raise ValueError('Invalid {}'.format(identifier)) - return res - return identifier - - -def ResNet(input_shape=None, classes=10, block='bottleneck', residual_unit='v2', - repetitions=None, initial_filters=64, activation='softmax', include_top=True, - input_tensor=None, dropout=None, transition_dilation_rate=(1, 1), - initial_strides=(2, 2), initial_kernel_size=(7, 7), initial_pooling='max', - final_pooling=None, top='classification'): - """Builds a custom ResNet like architecture. Defaults to ResNet50 v2. - - Args: - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `channels_last` dim ordering) - or `(3, 224, 224)` (with `channels_first` dim ordering). - It should have exactly 3 dimensions, - and width and height should be no smaller than 8. - E.g. `(224, 224, 3)` would be one valid value. - classes: The number of outputs at final softmax layer - block: The block function to use. This is either `'basic'` or `'bottleneck'`. - The original paper used `basic` for layers < 50. - repetitions: Number of repetitions of various block units. - At each block unit, the number of filters are doubled and the input size - is halved. Default of None implies the ResNet50v2 values of [3, 4, 6, 3]. - residual_unit: the basic residual unit, 'v1' for conv bn relu, 'v2' for bn relu - conv. See [Identity Mappings in - Deep Residual Networks](https://arxiv.org/abs/1603.05027) - for details. - dropout: None for no dropout, otherwise rate of dropout from 0 to 1. - Based on [Wide Residual Networks.(https://arxiv.org/pdf/1605.07146) paper. - transition_dilation_rate: Dilation rate for transition layers. For semantic - segmentation of images use a dilation rate of (2, 2). - initial_strides: Stride of the very first residual unit and MaxPooling2D call, - with default (2, 2), set to (1, 1) for small images like cifar. - initial_kernel_size: kernel size of the very first convolution, (7, 7) for - imagenet and (3, 3) for small image datasets like tiny imagenet and cifar. - See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details. - initial_pooling: Determine if there will be an initial pooling layer, - 'max' for imagenet and None for small image datasets. - See [ResNeXt](https://arxiv.org/abs/1611.05431) paper for details. - final_pooling: Optional pooling mode for feature extraction at the final - model layer when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - top: Defines final layers to evaluate based on a specific problem type. Options - are 'classification' for ImageNet style problems, 'segmentation' for - problems like the Pascal VOC dataset, and None to exclude these layers - entirely. - - Returns: - The keras `Model`. - """ - if activation not in ['softmax', 'sigmoid', None]: - raise ValueError( - 'activation must be one of "softmax", "sigmoid", or None') - if activation == 'sigmoid' and classes != 1: - raise ValueError( - 'sigmoid activation can only be used when classes = 1') - if repetitions is None: - repetitions = [3, 4, 6, 3] - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=32, - min_size=8, - data_format=K.image_data_format(), - require_flatten=include_top) - _handle_dim_ordering() - if len(input_shape) != 3: - raise Exception( - "Input shape should be a tuple (nb_channels, nb_rows, nb_cols)") - - if block == 'basic': - block_fn = basic_block - elif block == 'bottleneck': - block_fn = bottleneck - elif isinstance(block, six.string_types): - block_fn = _string_to_function(block) - else: - block_fn = block - - if residual_unit == 'v2': - residual_unit = _bn_relu_conv - elif residual_unit == 'v1': - residual_unit = _conv_bn_relu - elif isinstance(residual_unit, six.string_types): - residual_unit = _string_to_function(residual_unit) - else: - residual_unit = residual_unit - - # Permute dimension order if necessary - if K.image_data_format() == 'channels_first': - input_shape = (input_shape[1], input_shape[2], input_shape[0]) - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=32, - min_size=8, - data_format=K.image_data_format(), - require_flatten=include_top) - - img_input = Input(shape=input_shape, tensor=input_tensor) - x = _conv_bn_relu(filters=initial_filters, kernel_size=initial_kernel_size, - strides=initial_strides)(img_input) - if initial_pooling == 'max': - x = MaxPooling2D(pool_size=( - 3, 3), strides=initial_strides, padding="same")(x) - - block = x - filters = initial_filters - for i, r in enumerate(repetitions): - transition_dilation_rates = [transition_dilation_rate] * r - transition_strides = [(1, 1)] * r - if transition_dilation_rate == (1, 1): - transition_strides[0] = (2, 2) - block = _residual_block(block_fn, filters=filters, - stage=i, blocks=r, - is_first_layer=(i == 0), - dropout=dropout, - transition_dilation_rates=transition_dilation_rates, - transition_strides=transition_strides, - residual_unit=residual_unit)(block) - filters *= 2 - - # Last activation - x = _bn_relu(block) - - # Classifier block - if include_top and top is 'classification': - x = GlobalAveragePooling2D()(x) - x = Dense(units=classes, activation=activation, - kernel_initializer="he_normal")(x) - elif include_top and top is 'segmentation': - x = Conv2D(classes, (1, 1), activation='linear', padding='same')(x) - - if K.image_data_format() == 'channels_first': - channel, row, col = input_shape - else: - row, col, channel = input_shape - - x = Reshape((row * col, classes))(x) - x = Activation(activation)(x) - x = Reshape((row, col, classes))(x) - elif final_pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif final_pooling == 'max': - x = GlobalMaxPooling2D()(x) - - model = Model(inputs=img_input, outputs=x) - return model - - -def ResNet18(input_shape, classes): - """ResNet with 18 layers and v2 residual units - """ - return ResNet(input_shape, classes, basic_block, repetitions=[2, 2, 2, 2]) - - -def ResNet34(input_shape, classes): - """ResNet with 34 layers and v2 residual units - """ - return ResNet(input_shape, classes, basic_block, repetitions=[3, 4, 6, 3]) - - -def ResNet50(input_shape, classes): - """ResNet with 50 layers and v2 residual units - """ - return ResNet(input_shape, classes, bottleneck, repetitions=[3, 4, 6, 3]) - - -def ResNet101(input_shape, classes): - """ResNet with 101 layers and v2 residual units - """ - return ResNet(input_shape, classes, bottleneck, repetitions=[3, 4, 23, 3]) - - -def ResNet152(input_shape, classes): - """ResNet with 152 layers and v2 residual units - """ - return ResNet(input_shape, classes, bottleneck, repetitions=[3, 8, 36, 3]) -# -*- coding: utf-8 -*- -"""Wide Residual Network models for Keras. - -# Reference - -- [Wide Residual Networks](https://arxiv.org/abs/1605.07146) - -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import warnings - -from keras.models import Model -from keras.layers.core import Dense, Dropout, Activation -from keras.layers.pooling import MaxPooling2D, GlobalAveragePooling2D -from keras.layers import Input, Conv2D -from keras.layers.merge import add -from keras.layers.normalization import BatchNormalization -from keras.utils.layer_utils import convert_all_kernels_in_model -from keras.utils.data_utils import get_file -from keras.engine.topology import get_source_inputs -from keras_applications.imagenet_utils import _obtain_input_shape -import keras.backend as K - -TH_WEIGHTS_PATH = ('https://github.com/titu1994/Wide-Residual-Networks/' - 'releases/download/v1.2/wrn_28_8_th_kernels_th_dim_ordering.h5') -TF_WEIGHTS_PATH = ('https://github.com/titu1994/Wide-Residual-Networks/' - 'releases/download/v1.2/wrn_28_8_tf_kernels_tf_dim_ordering.h5') -TH_WEIGHTS_PATH_NO_TOP = ('https://github.com/titu1994/Wide-Residual-Networks/releases/' - 'download/v1.2/wrn_28_8_th_kernels_th_dim_ordering_no_top.h5') -TF_WEIGHTS_PATH_NO_TOP = ('https://github.com/titu1994/Wide-Residual-Networks/releases/' - 'download/v1.2/wrn_28_8_tf_kernels_tf_dim_ordering_no_top.h5') - - -def WideResidualNetwork(depth=28, width=8, dropout_rate=0.0, - include_top=True, weights='cifar10', - input_tensor=None, input_shape=None, - classes=10, activation='softmax'): - """Instantiate the Wide Residual Network architecture, - optionally loading weights pre-trained - on CIFAR-10. Note that when using TensorFlow, - for best performance you should set - `image_dim_ordering="tf"` in your Keras config - at ~/.keras/keras.json. - - The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering - convention used by the model is the one - specified in your Keras config file. - - # Arguments - depth: number or layers in the DenseNet - width: multiplier to the ResNet width (number of filters) - dropout_rate: dropout rate - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization) or - "cifar10" (pre-training on CIFAR-10).. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(32, 32, 3)` (with `tf` dim ordering) - or `(3, 32, 32)` (with `th` dim ordering). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 8. - E.g. `(200, 200, 3)` would be one valid value. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - """ - - if weights not in {'cifar10', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `cifar10` ' - '(pre-training on CIFAR-10).') - - if weights == 'cifar10' and include_top and classes != 10: - raise ValueError('If using `weights` as CIFAR 10 with `include_top`' - ' as true, `classes` should be 10') - - if (depth - 4) % 6 != 0: - raise ValueError('Depth of the network must be such that (depth - 4)' - 'should be divisible by 6.') - - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=32, - min_size=8, - data_format=K.image_dim_ordering(), - require_flatten=include_top) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - x = __create_wide_residual_network(classes, img_input, include_top, depth, width, - dropout_rate, activation) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = Model(inputs, x, name='wide-resnet') - - # load weights - if weights == 'cifar10': - if (depth == 28) and (width == 8) and (dropout_rate == 0.0): - # Default parameters match. Weights for this model exist: - - if K.image_dim_ordering() == 'th': - if include_top: - h5_file = 'wide_resnet_28_8_th_dim_ordering_th_kernels.h5' - weights_path = get_file(h5_file, - TH_WEIGHTS_PATH, - cache_subdir='models') - else: - h5_file = 'wide_resnet_28_8_th_dim_ordering_th_kernels_no_top.h5' - weights_path = get_file(h5_file, - TH_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - - model.load_weights(weights_path) - - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image dimension ordering convention ' - '(`image_dim_ordering="th"`). ' - 'For best performance, set ' - '`image_dim_ordering="tf"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - convert_all_kernels_in_model(model) - else: - if include_top: - h5_file = 'wide_resnet_28_8_tf_dim_ordering_tf_kernels.h5' - weights_path = get_file(h5_file, - TF_WEIGHTS_PATH, - cache_subdir='models') - else: - h5_file = 'wide_resnet_28_8_tf_dim_ordering_tf_kernels_no_top.h5' - weights_path = get_file(h5_file, - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - - model.load_weights(weights_path) - - if K.backend() == 'theano': - convert_all_kernels_in_model(model) - - return model - - -def __conv1_block(input): - x = Conv2D(16, (3, 3), padding='same')(input) - - channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - return x - - -def __conv2_block(input, k=1, dropout=0.0): - init = input - - channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - # Check if input number of filters is same as 16 * k, else create - # convolution2d for this input - if K.image_data_format() == 'channels_first': - if init._keras_shape[1] != 16 * k: - init = Conv2D(16 * k, (1, 1), activation='linear', - padding='same')(init) - else: - if init._keras_shape[-1] != 16 * k: - init = Conv2D(16 * k, (1, 1), activation='linear', - padding='same')(init) - - x = Conv2D(16 * k, (3, 3), padding='same')(input) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - if dropout > 0.0: - x = Dropout(dropout)(x) - - x = Conv2D(16 * k, (3, 3), padding='same')(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - m = add([init, x]) - return m - - -def __conv3_block(input, k=1, dropout=0.0): - init = input - - channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - # Check if input number of filters is same as 32 * k, else - # create convolution2d for this input - if K.image_data_format() == 'channels_first': - if init._keras_shape[1] != 32 * k: - init = Conv2D(32 * k, (1, 1), activation='linear', - padding='same')(init) - else: - if init._keras_shape[-1] != 32 * k: - init = Conv2D(32 * k, (1, 1), activation='linear', - padding='same')(init) - - x = Conv2D(32 * k, (3, 3), padding='same')(input) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - if dropout > 0.0: - x = Dropout(dropout)(x) - - x = Conv2D(32 * k, (3, 3), padding='same')(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - m = add([init, x]) - return m - - -def ___conv4_block(input, k=1, dropout=0.0): - init = input - - channel_axis = 1 if K.image_dim_ordering() == 'th' else -1 - - # Check if input number of filters is same as 64 * k, else - # create convolution2d for this input - if K.image_dim_ordering() == 'th': - if init._keras_shape[1] != 64 * k: - init = Conv2D(64 * k, (1, 1), activation='linear', - padding='same')(init) - else: - if init._keras_shape[-1] != 64 * k: - init = Conv2D(64 * k, (1, 1), activation='linear', - padding='same')(init) - - x = Conv2D(64 * k, (3, 3), padding='same')(input) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - if dropout > 0.0: - x = Dropout(dropout)(x) - - x = Conv2D(64 * k, (3, 3), padding='same')(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - - m = add([init, x]) - return m - - -def __create_wide_residual_network(nb_classes, img_input, include_top, depth=28, - width=8, dropout=0.0, activation='softmax'): - ''' Creates a Wide Residual Network with specified parameters - - Args: - nb_classes: Number of output classes - img_input: Input tensor or layer - include_top: Flag to include the last dense layer - depth: Depth of the network. Compute N = (n - 4) / 6. - For a depth of 16, n = 16, N = (16 - 4) / 6 = 2 - For a depth of 28, n = 28, N = (28 - 4) / 6 = 4 - For a depth of 40, n = 40, N = (40 - 4) / 6 = 6 - width: Width of the network. - dropout: Adds dropout if value is greater than 0.0 - - Returns:a Keras Model - ''' - - N = (depth - 4) // 6 - - x = __conv1_block(img_input) - nb_conv = 4 - - for i in range(N): - x = __conv2_block(x, width, dropout) - nb_conv += 2 - - x = MaxPooling2D((2, 2))(x) - - for i in range(N): - x = __conv3_block(x, width, dropout) - nb_conv += 2 - - x = MaxPooling2D((2, 2))(x) - - for i in range(N): - x = ___conv4_block(x, width, dropout) - nb_conv += 2 - - if include_top: - x = GlobalAveragePooling2D()(x) - x = Dense(nb_classes, activation=activation)(x) - - return x -from keras import backend as K - -# We import all keras backend functions here, -# so that files in this repo can import both -# core and contrib backend functions with a -# single import statement. - -if K.backend() == 'theano': - from .theano_backend import * -elif K.backend() == 'tensorflow': - from .tensorflow_backend import * -elif K.backend() == 'cntk': - from .cntk_backend import * -from keras.backend import cntk_backend as KCN - - -def moments(x, axes, shift=None, keep_dims=False): - ''' Calculates and returns the mean and variance of the input ''' - mean, variant = KCN._moments( - x, axes=axes, shift=shift, keep_dims=keep_dims) - return mean, variant -import numpy as np -from keras import backend as K - - -def extract_image_patches(X, ksizes, strides, - padding='valid', - data_format='channels_first'): - raise NotImplementedError - - -def depth_to_space(input, scale, data_format=None): - raise NotImplementedError - - -def moments(x, axes, shift=None, keep_dims=False): - mean_batch = np.mean(x, axis=tuple(axes), keepdims=keep_dims) - var_batch = np.var(x, axis=tuple(axes), keepdims=keep_dims) - return mean_batch, var_batch -import tensorflow as tf - -try: - from tensorflow.python.ops import ctc_ops as ctc -except ImportError: - import tensorflow.contrib.ctc as ctc -import keras.backend as K - -py_all = all - - -def _preprocess_conv2d_input(x, data_format): - """Transpose and cast the input before the conv2d. - - # Arguments - x: input tensor. - data_format: string, `"channels_last"` or `"channels_first"`. - - # Returns - A tensor. - """ - if K.dtype(x) == 'float64': - x = tf.cast(x, 'float32') - if data_format == 'channels_first': - # TF uses the last dimension as channel dimension, - # instead of the 2nd one. - # TH input shape: (samples, input_depth, rows, cols) - # TF input shape: (samples, rows, cols, input_depth) - x = tf.transpose(x, (0, 2, 3, 1)) - return x - - -def _postprocess_conv2d_output(x, data_format): - """Transpose and cast the output from conv2d if needed. - - # Arguments - x: A tensor. - data_format: string, `"channels_last"` or `"channels_first"`. - - # Returns - A tensor. - """ - - if data_format == 'channels_first': - x = tf.transpose(x, (0, 3, 1, 2)) - - if K.floatx() == 'float64': - x = tf.cast(x, 'float64') - return x - - -def _preprocess_padding(padding): - """Convert keras' padding to tensorflow's padding. - - # Arguments - padding: string, `"same"` or `"valid"`. - - # Returns - a string, `"SAME"` or `"VALID"`. - - # Raises - ValueError: if `padding` is invalid. - """ - if padding == 'same': - padding = 'SAME' - elif padding == 'valid': - padding = 'VALID' - else: - raise ValueError('Invalid padding:', padding) - return padding - - -def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format='channels_first', - image_shape=None, filter_shape=None): - """2D convolution. - - # Arguments - x: Input tensor - kernel: kernel tensor. - strides: strides tuple. - padding: string, "same" or "valid". - data_format: 'channels_first' or 'channels_last'. - Whether to use Theano or TensorFlow dimension - ordering in inputs/kernels/ouputs. - image_shape: Optional, the input tensor shape - filter_shape: Optional, the kernel shape. - - # Returns - x convolved with the kernel. - - # Raises - Exception: In case of invalid border mode or data format. - """ - return K.conv2d(x, kernel, strides, padding, data_format) - - -def extract_image_patches(x, ksizes, ssizes, padding='same', - data_format='channels_last'): - """Extract the patches from an image. - - # Arguments - x: The input image - ksizes: 2-d tuple with the kernel size - ssizes: 2-d tuple with the strides size - padding: 'same' or 'valid' - data_format: 'channels_last' or 'channels_first' - - # Returns - The (k_w,k_h) patches extracted - TF ==> (batch_size,w,h,k_w,k_h,c) - TH ==> (batch_size,w,h,c,k_w,k_h) - """ - kernel = [1, ksizes[0], ksizes[1], 1] - strides = [1, ssizes[0], ssizes[1], 1] - padding = _preprocess_padding(padding) - if data_format == 'channels_first': - x = K.permute_dimensions(x, (0, 2, 3, 1)) - bs_i, w_i, h_i, ch_i = K.int_shape(x) - patches = tf.extract_image_patches(x, kernel, strides, [1, 1, 1, 1], - padding) - # Reshaping to fit Theano - bs, w, h, ch = K.int_shape(patches) - reshaped = tf.reshape(patches, [-1, w, h, tf.floordiv(ch, ch_i), ch_i]) - final_shape = [-1, w, h, ch_i, ksizes[0], ksizes[1]] - patches = tf.reshape(tf.transpose(reshaped, [0, 1, 2, 4, 3]), final_shape) - if data_format == 'channels_last': - patches = K.permute_dimensions(patches, [0, 1, 2, 4, 5, 3]) - return patches - - -def depth_to_space(input, scale, data_format=None): - """ Uses phase shift algorithm to convert channels/depth for spatial resolution. - - # Arguments - input: Input tensor - scale: n `int` that is `>= 2`. The size of the spatial block. - data_format: 'channels_first' or 'channels_last'. - Whether to use Theano or TensorFlow dimension - ordering in inputs/kernels/ouputs. - - # Returns - TODO (PR welcome): Filling this section. - """ - if data_format is None: - data_format = K.image_data_format() - data_format = data_format.lower() - input = _preprocess_conv2d_input(input, data_format) - out = tf.depth_to_space(input, scale) - out = _postprocess_conv2d_output(out, data_format) - return out - - -def moments(x, axes, shift=None, keep_dims=False): - ''' Wrapper over tensorflow backend call ''' - - return tf.nn.moments(x, axes, shift=shift, keep_dims=keep_dims) -from theano import tensor as T -from theano.sandbox.neighbours import images2neibs - -try: - import theano.sparse as th_sparse_module -except ImportError: - th_sparse_module = None -try: - from theano.tensor.nnet.nnet import softsign as T_softsign -except ImportError: - from theano.sandbox.softsign import softsign as T_softsign -from keras.backend import theano_backend as KTH -from keras.backend.common import image_data_format -from keras.backend.theano_backend import _preprocess_conv2d_input -from keras.backend.theano_backend import _postprocess_conv2d_output - -py_all = all - - -def conv2d(x, kernel, strides=(1, 1), padding='valid', data_format='channels_first', - image_shape=None, filter_shape=None): - ''' - padding: string, "same" or "valid". - ''' - if data_format not in {'channels_first', 'channels_last'}: - raise Exception('Unknown data_format ' + str(data_format)) - - if data_format == 'channels_last': - # TF uses the last dimension as channel dimension, - # instead of the 2nd one. - # TH input shape: (samples, input_depth, rows, cols) - # TF input shape: (samples, rows, cols, input_depth) - # TH kernel shape: (depth, input_depth, rows, cols) - # TF kernel shape: (rows, cols, input_depth, depth) - x = x.dimshuffle((0, 3, 1, 2)) - kernel = kernel.dimshuffle((3, 2, 0, 1)) - if image_shape: - image_shape = (image_shape[0], image_shape[3], - image_shape[1], image_shape[2]) - if filter_shape: - filter_shape = (filter_shape[3], filter_shape[2], - filter_shape[0], filter_shape[1]) - - if padding == 'same': - th_padding = 'half' - np_kernel = kernel.eval() - elif padding == 'valid': - th_padding = 'valid' - else: - raise Exception('Border mode not supported: ' + str(padding)) - - # Theano might not accept long type - def int_or_none(value): - try: - return int(value) - except TypeError: - return None - - if image_shape is not None: - image_shape = tuple(int_or_none(v) for v in image_shape) - - if filter_shape is not None: - filter_shape = tuple(int_or_none(v) for v in filter_shape) - - conv_out = T.nnet.conv2d(x, kernel, - border_mode=th_padding, - subsample=strides, - input_shape=image_shape, - filter_shape=filter_shape) - - if padding == 'same': - if np_kernel.shape[2] % 2 == 0: - end = (x.shape[2] + strides[0] - 1) // strides[0] - conv_out = conv_out[:, :, :end, :] - if np_kernel.shape[3] % 2 == 0: - end = (x.shape[3] + strides[1] - 1) // strides[1] - conv_out = conv_out[:, :, :, :end] - - if data_format == 'channels_last': - conv_out = conv_out.dimshuffle((0, 2, 3, 1)) - return conv_out - - -def extract_image_patches(X, ksizes, strides, - padding='valid', - data_format='channels_first'): - ''' - Extract the patches from an image - Parameters - ---------- - X : The input image - ksizes : 2-d tuple with the kernel size - strides : 2-d tuple with the strides size - padding : 'same' or 'valid' - data_format : 'channels_last' or 'channels_first' - Returns - ------- - The (k_w,k_h) patches extracted - TF ==> (batch_size,w,h,k_w,k_h,c) - TH ==> (batch_size,w,h,c,k_w,k_h) - ''' - patch_size = ksizes[1] - if padding == 'same': - padding = 'ignore_borders' - if data_format == 'channels_last': - X = KTH.permute_dimensions(X, [0, 3, 1, 2]) - # Thanks to https://github.com/awentzonline for the help! - batch, c, w, h = KTH.shape(X) - xs = KTH.shape(X) - num_rows = 1 + (xs[-2] - patch_size) // strides[1] - num_cols = 1 + (xs[-1] - patch_size) // strides[1] - num_channels = xs[-3] - patches = images2neibs(X, ksizes, strides, padding) - # Theano is sorting by channel - new_shape = (batch, num_channels, num_rows * - num_cols, patch_size, patch_size) - patches = KTH.reshape(patches, new_shape) - patches = KTH.permute_dimensions(patches, (0, 2, 1, 3, 4)) - # arrange in a 2d-grid (rows, cols, channels, px, py) - new_shape = (batch, num_rows, num_cols, - num_channels, patch_size, patch_size) - patches = KTH.reshape(patches, new_shape) - if data_format == 'channels_last': - patches = KTH.permute_dimensions(patches, [0, 1, 2, 4, 5, 3]) - return patches - - -def depth_to_space(input, scale, data_format=None): - """Uses phase shift algorithm to convert - channels/depth for spatial resolution - """ - if data_format is None: - data_format = image_data_format() - data_format = data_format.lower() - input = _preprocess_conv2d_input(input, data_format) - - b, k, row, col = input.shape - out_channels = k // (scale ** 2) - x = T.reshape(input, (b, scale, scale, out_channels, row, col)) - x = T.transpose(x, (0, 3, 4, 1, 5, 2)) - out = T.reshape(x, (b, out_channels, row * scale, col * scale)) - - out = _postprocess_conv2d_output(out, input, None, None, None, data_format) - return out - - -def moments(x, axes, shift=None, keep_dims=False): - ''' Calculates and returns the mean and variance of the input ''' - - mean_batch = KTH.mean(x, axis=axes, keepdims=keep_dims) - var_batch = KTH.var(x, axis=axes, keepdims=keep_dims) - - return mean_batch, var_batch -from .snapshot import SnapshotCallbackBuilder, SnapshotModelCheckpoint -from .dead_relu_detector import DeadReluDetector -from .cyclical_learning_rate import CyclicLR -from .tensorboard import TensorBoardGrouped -from keras.callbacks import Callback -from keras import backend as K -import numpy as np - - -class CyclicLR(Callback): - """This callback implements a cyclical learning rate policy (CLR). - The method cycles the learning rate between two boundaries with - some constant frequency. - # Arguments - base_lr: initial learning rate which is the - lower boundary in the cycle. - max_lr: upper boundary in the cycle. Functionally, - it defines the cycle amplitude (max_lr - base_lr). - The lr at any cycle is the sum of base_lr - and some scaling of the amplitude; therefore - max_lr may not actually be reached depending on - scaling function. - step_size: number of training iterations per - half cycle. Authors suggest setting step_size - 2-8 x training iterations in epoch. - mode: one of {triangular, triangular2, exp_range}. - Default 'triangular'. - Values correspond to policies detailed above. - If scale_fn is not None, this argument is ignored. - gamma: constant in 'exp_range' scaling function: - gamma**(cycle iterations) - scale_fn: Custom scaling policy defined by a single - argument lambda function, where - 0 <= scale_fn(x) <= 1 for all x >= 0. - mode paramater is ignored - scale_mode: {'cycle', 'iterations'}. - Defines whether scale_fn is evaluated on - cycle number or cycle iterations (training - iterations since start of cycle). Default is 'cycle'. - - The amplitude of the cycle can be scaled on a per-iteration or - per-cycle basis. - This class has three built-in policies, as put forth in the paper. - "triangular": - A basic triangular cycle w/ no amplitude scaling. - "triangular2": - A basic triangular cycle that scales initial amplitude by half each cycle. - "exp_range": - A cycle that scales initial amplitude by gamma**(cycle iterations) at each - cycle iteration. - For more detail, please see paper. - - # Example for CIFAR-10 w/ batch size 100: - ```python - clr = CyclicLR(base_lr=0.001, max_lr=0.006, - step_size=2000., mode='triangular') - model.fit(X_train, Y_train, callbacks=[clr]) - ``` - - Class also supports custom scaling functions: - ```python - clr_fn = lambda x: 0.5*(1+np.sin(x*np.pi/2.)) - clr = CyclicLR(base_lr=0.001, max_lr=0.006, - step_size=2000., scale_fn=clr_fn, - scale_mode='cycle') - model.fit(X_train, Y_train, callbacks=[clr]) - ``` - - # References - - - [Cyclical Learning Rates for Training Neural Networks]( - https://arxiv.org/abs/1506.01186) - """ - - def __init__( - self, - base_lr=0.001, - max_lr=0.006, - step_size=2000., - mode='triangular', - gamma=1., - scale_fn=None, - scale_mode='cycle'): - super(CyclicLR, self).__init__() - - if mode not in ['triangular', 'triangular2', - 'exp_range']: - raise KeyError("mode must be one of 'triangular', " - "'triangular2', or 'exp_range'") - self.base_lr = base_lr - self.max_lr = max_lr - self.step_size = step_size - self.mode = mode - self.gamma = gamma - if scale_fn is None: - if self.mode == 'triangular': - self.scale_fn = lambda x: 1. - self.scale_mode = 'cycle' - elif self.mode == 'triangular2': - self.scale_fn = lambda x: 1 / (2.**(x - 1)) - self.scale_mode = 'cycle' - elif self.mode == 'exp_range': - self.scale_fn = lambda x: gamma ** x - self.scale_mode = 'iterations' - else: - self.scale_fn = scale_fn - self.scale_mode = scale_mode - self.clr_iterations = 0. - self.trn_iterations = 0. - self.history = {} - - self._reset() - - def _reset(self, new_base_lr=None, new_max_lr=None, - new_step_size=None): - """Resets cycle iterations. - Optional boundary/step size adjustment. - """ - if new_base_lr is not None: - self.base_lr = new_base_lr - if new_max_lr is not None: - self.max_lr = new_max_lr - if new_step_size is not None: - self.step_size = new_step_size - self.clr_iterations = 0. - - def clr(self): - cycle = np.floor(1 + self.clr_iterations / (2 * self.step_size)) - x = np.abs(self.clr_iterations / self.step_size - 2 * cycle + 1) - if self.scale_mode == 'cycle': - return self.base_lr + (self.max_lr - self.base_lr) * \ - np.maximum(0, (1 - x)) * self.scale_fn(cycle) - else: - return self.base_lr + (self.max_lr - self.base_lr) * \ - np.maximum(0, (1 - x)) * self.scale_fn(self.clr_iterations) - - def on_train_begin(self, logs={}): - logs = logs or {} - - if self.clr_iterations == 0: - K.set_value(self.model.optimizer.lr, self.base_lr) - else: - K.set_value(self.model.optimizer.lr, self.clr()) - - def on_batch_end(self, epoch, logs=None): - - logs = logs or {} - self.trn_iterations += 1 - self.clr_iterations += 1 - K.set_value(self.model.optimizer.lr, self.clr()) - - self.history.setdefault( - 'lr', []).append( - K.get_value( - self.model.optimizer.lr)) - self.history.setdefault('iterations', []).append(self.trn_iterations) - - for k, v in logs.items(): - self.history.setdefault(k, []).append(v) - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - logs['lr'] = K.get_value(self.model.optimizer.lr) -import numpy as np - -from keras.callbacks import Callback -from keras import backend as K - - -class DeadReluDetector(Callback): - """Reports the number of dead ReLUs after each training epoch - ReLU is considered to be dead if it did not fire once for entire training set - - # Arguments - x_train: Training dataset to check whether or not neurons fire - verbose: verbosity mode - True means that even a single dead neuron triggers a warning message - False means that only significant number of dead neurons (10% or more) - triggers a warning message - """ - - def __init__(self, x_train, verbose=False): - super(DeadReluDetector, self).__init__() - self.x_train = x_train - self.verbose = verbose - self.dead_neurons_share_threshold = 0.1 - - @staticmethod - def is_relu_layer(layer): - # Should work for all layers with relu - # activation. Tested for Dense and Conv2D - return layer.get_config().get('activation', None) == 'relu' - - def get_relu_activations(self): - model_input = self.model.input - is_multi_input = isinstance(model_input, list) - if not is_multi_input: - model_input = [model_input] - - funcs = {} - for index, layer in enumerate(self.model.layers): - if not layer.get_weights(): - continue - funcs[index] = K.function(model_input - + [K.learning_phase()], [layer.output]) - - if is_multi_input: - list_inputs = [] - list_inputs.extend(self.x_train) - list_inputs.append(1.) - else: - list_inputs = [self.x_train, 1.] - - layer_outputs = {} - for index, func in funcs.items(): - layer_outputs[index] = func(list_inputs)[0] - - for layer_index, layer_activations in layer_outputs.items(): - if self.is_relu_layer(self.model.layers[layer_index]): - layer_name = self.model.layers[layer_index].name - # layer_weight is a list [W] (+ [b]) - layer_weight = self.model.layers[layer_index].get_weights() - - # with kernel and bias, the weights are saved as a list [W, b]. - # If only weights, it is [W] - if type(layer_weight) is not list: - raise ValueError("'Layer_weight' should be a list, " - "but was {}".format(type(layer_weight))) - - # there are no weights for current layer; skip it - # this is only legitimate if layer is "Activation" - if len(layer_weight) == 0: - continue - - layer_weight_shape = np.shape(layer_weight[0]) - yield [layer_index, - layer_activations, - layer_name, - layer_weight_shape] - - def on_epoch_end(self, epoch, logs={}): - for relu_activation in self.get_relu_activations(): - layer_index = relu_activation[0] - activation_values = relu_activation[1] - layer_name = relu_activation[2] - layer_weight_shape = relu_activation[3] - - shape_act = activation_values.shape - - weight_len = len(layer_weight_shape) - act_len = len(shape_act) - - # should work for both Conv and Flat - if K.image_data_format() == 'channels_last': - # features in last axis - axis_filter = -1 - else: - # features before the convolution axis, for weight_ - # len the input and output have to be subtracted - axis_filter = -1 - (weight_len - 2) - - total_featuremaps = shape_act[axis_filter] - - axis = [] - for i in range(act_len): - if (i != axis_filter) and (i != (len(shape_act) + axis_filter)): - axis.append(i) - axis = tuple(axis) - - dead_neurons = np.sum(np.sum(activation_values, axis=axis) == 0) - - dead_neurons_share = float(dead_neurons) / float(total_featuremaps) - if ((self.verbose and dead_neurons > 0) - or dead_neurons_share >= self.dead_neurons_share_threshold): - str_warning = ('Layer {} (#{}) has {} ' - 'dead neurons ({:.2%})!').format(layer_name, - layer_index, - dead_neurons, - dead_neurons_share) - print(str_warning) -from __future__ import absolute_import -from __future__ import print_function - -import os - -import numpy as np - -from keras.callbacks import Callback, ModelCheckpoint, LearningRateScheduler - -try: - import requests -except ImportError: - requests = None - - -class SnapshotModelCheckpoint(Callback): - """Callback that saves the snapshot weights of the model. - - Saves the model weights on certain epochs (which can be considered the - snapshot of the model at that epoch). - - Should be used with the cosine annealing learning rate schedule to save - the weight just before learning rate is sharply increased. - - # Arguments: - nb_epochs: total number of epochs that the model will be trained for. - nb_snapshots: number of times the weights of the model will be saved. - fn_prefix: prefix for the filename of the weights. - """ - - def __init__(self, nb_epochs, nb_snapshots, fn_prefix='Model'): - super(SnapshotModelCheckpoint, self).__init__() - - self.check = nb_epochs // nb_snapshots - self.fn_prefix = fn_prefix - - def on_epoch_end(self, epoch, logs={}): - if epoch != 0 and (epoch + 1) % self.check == 0: - filepath = self.fn_prefix + '-%d.h5' % ((epoch + 1) // self.check) - self.model.save_weights(filepath, overwrite=True) - # print("Saved snapshot at weights/%s_%d.h5" % (self.fn_prefix, epoch)) - - -class SnapshotCallbackBuilder: - """Callback builder for snapshot ensemble training of a model. - From the paper "Snapshot Ensembles: Train 1, Get M For Free" ( - https://openreview.net/pdf?id=BJYwwY9ll) - - Creates a list of callbacks, which are provided when training a model - so as to save the model weights at certain epochs, and then sharply - increase the learning rate. - """ - - def __init__(self, nb_epochs, nb_snapshots, init_lr=0.1): - """ - Initialize a snapshot callback builder. - - # Arguments: - nb_epochs: total number of epochs that the model will be trained for. - nb_snapshots: number of times the weights of the model will be saved. - init_lr: initial learning rate - """ - self.T = nb_epochs - self.M = nb_snapshots - self.alpha_zero = init_lr - - def get_callbacks(self, model_prefix='Model'): - """ - Creates a list of callbacks that can be used during training to create a - snapshot ensemble of the model. - - Args: - model_prefix: prefix for the filename of the weights. - - Returns: list of 3 callbacks [ModelCheckpoint, LearningRateScheduler, - SnapshotModelCheckpoint] which can be provided to the 'fit' function - """ - if not os.path.exists('weights/'): - os.makedirs('weights/') - - callback_list = [ModelCheckpoint('weights/%s-Best.h5' % model_prefix, - monitor='val_acc', - save_best_only=True, save_weights_only=True), - LearningRateScheduler( - schedule=self._cosine_anneal_schedule), - SnapshotModelCheckpoint(self.T, - self.M, - fn_prefix='weights/%s' % model_prefix)] - - return callback_list - - def _cosine_anneal_schedule(self, t): - cos_inner = np.pi * (t % (self.T // self.M)) - cos_inner /= self.T // self.M - cos_out = np.cos(cos_inner) + 1 - return float(self.alpha_zero / 2 * cos_out) -from keras.callbacks import TensorBoard -import numpy as np -import os - - -class TensorBoardGrouped(TensorBoard): - """TensorBoard basic visualizations. - - [TensorBoard](https://www.tensorflow.org/guide/summaries_and_tensorboard) - is a visualization tool provided with TensorFlow. - - This callback is a subclass of `keras.callbacks.TensorBoard`. - The only difference is that the training and validation logs are - grouped and written to the same plot. - - It's a drop-in replacement for the keras callback. - The arguments are the same. - """ - - def __init__(self, log_dir='./logs', *args, **kwargs): - self.base_log_dir = log_dir - self.train_log_dir = os.path.join(log_dir, 'train') - self.val_log_dir = os.path.join(log_dir, 'val') - super(TensorBoardGrouped, self).__init__(self.train_log_dir, - *args, - **kwargs) - - def set_model(self, model): - super(TensorBoardGrouped, self).set_model(model) - import tensorflow as tf - self.val_writer = tf.summary.FileWriter(self.val_log_dir) - - def _write_logs(self, logs, index): - import tensorflow as tf - for name, value in logs.items(): - if name in ['batch', 'size']: - continue - if name.startswith('val_'): - writer = self.val_writer - name = name[4:] # remove val_ - else: - writer = self.writer - summary = tf.Summary() - summary_value = summary.value.add() - if isinstance(value, np.ndarray): - summary_value.simple_value = value.item() - else: - summary_value.simple_value = value - summary_value.tag = name - writer.add_summary(summary, index) - self.writer.flush() - self.val_writer.flush() - - def on_train_end(self, _): - self.writer.close() - self.val_writer.flush() -from __future__ import absolute_import - -from .clip import Clip - -# Aliases. - -clip = Clip -from __future__ import absolute_import -from keras import backend as K -from keras.constraints import Constraint - - -class Clip(Constraint): - """Clips weights to [-c, c]. - - # Arguments - c: Clipping parameter. - """ - - def __init__(self, c=0.01): - self.c = c - - def __call__(self, p): - return K.clip(p, -self.c, self.c) - - def get_config(self): - return {'name': self.__class__.__name__, - 'c': self.c} -#!/usr/bin/env python -# coding=utf-8 -""" -This is a script for downloading and converting the microsoft coco dataset -from mscoco.org. This can be run as an independent executable to download -the dataset or be imported by scripts used for larger experiments. -""" -from __future__ import division, print_function, unicode_literals -import os -import errno -import zipfile -import json -from sacred import Experiment, Ingredient -import numpy as np -from PIL import Image -from keras.utils import get_file -from keras.utils.generic_utils import Progbar -from pycocotools.coco import COCO - - -def palette(): - max_cid = max(ids()) + 1 - return [(cid, cid, cid) for cid in range(max_cid)] - - -def cids_to_ids_map(): - return {cid: idx for idx, cid in enumerate(ids())} - - -def ids(): - return [0, - 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 13, 14, 15, 16, 17, - 18, 19, 20, 21, 22, 23, 24, 25, 27, 28, 31, 32, 33, 34, 35, 36, - 37, 38, 39, 40, 41, 42, 43, 44, 46, 47, 48, 49, 50, 51, 52, 53, - 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 67, 70, 72, 73, - 74, 75, 76, 77, 78, 79, 80, 81, 82, 84, 85, 86, 87, 88, 89, 90] - - -def id_to_palette_map(): - return {idx: color for idx, color in enumerate(palette())} - # return {0: (0, 0, 0), idx: (idx, idx, idx) - # for idx, _ in enumerate(categories())} - - -def cid_to_palette_map(): - return {ids()[idx]: color for idx, color in enumerate(palette())} - - -def palette_to_id_map(): - return {color: ids()[idx] for idx, color in enumerate(palette())} - # return {(0, 0, 0): 0, (idx, idx, idx): idx - # for idx, _ in enumerate(categories())} - - -def class_weight(image_segmentation_stats_file=None, - weighting_algorithm='total_pixels_p_complement'): - # weights = defaultdict(lambda: 1.5) - if image_segmentation_stats_file is None: - weights = {i: 1.5 for i in ids()} - weights[0] = 0.5 - return weights - else: - with open(image_segmentation_stats_file, 'r') as fjson: - stats = json.loads(fjson) - return stats[weighting_algorithm] - - -def mask_to_palette_map(cid): - mapper = id_to_palette_map() - return {0: mapper[0], 255: mapper[cid]} - - -def categories(): # 80 classes - return ['background', # class zero - 'person', 'bicycle', 'car', 'motorcycle', - 'airplane', 'bus', 'train', - 'truck', 'boat', 'traffic light', - 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', - 'cat', 'dog', 'horse', 'sheep', 'cow', - 'elephant', 'bear', 'zebra', 'giraffe', - 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', - 'skis', 'snowboard', 'sports ball', 'kite', - 'baseball bat', 'baseball glove', 'skateboard', - 'surfboard', 'tennis racket', 'bottle', - 'wine glass', 'cup', 'fork', 'knife', - 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', - 'broccoli', 'carrot', 'hot dog', 'pizza', - 'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed', - 'dining table', 'toilet', 'tv', 'laptop', - 'mouse', 'remote', 'keyboard', 'cell phone', 'microwave', 'oven', - 'toaster', 'sink', 'refrigerator', 'book', - 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - - -def id_to_category(category_id): - return {cid: categories()[idx] for idx, cid in enumerate(ids())}[category_id] - - -def category_to_cid_map(): - return {category: ids()[idx] for idx, category in enumerate(categories())} - - -def mkdir_p(path): - # http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - -# ============== Ingredient 2: dataset ======================= -data_coco = Experiment("dataset") - - -@data_coco.config -def coco_config(): - # TODO(ahundt) add md5 sums for each file - verbose = 1 - coco_api = 'https://github.com/pdollar/coco/' - dataset_root = os.path.join(os.path.expanduser('~'), 'datasets') - dataset_path = os.path.join(dataset_root, 'coco') - urls = [ - 'coco2014/train2014.zip', - 'coco2014/val2014.zip', - 'coco2014/test2014.zip', - 'coco2015/test2015.zip', - 'annotations-1-0-3/instances_train-val2014.zip', - 'annotations-1-0-3/person_keypoints_trainval2014.zip', - 'annotations-1-0-4/image_info_test2014.zip', - 'annotations-1-0-4/image_info_test2015.zip', - 'annotations-1-0-3/captions_train-val2014.zip' - ] - base_url = 'http://msvocds.blob.core.windows.net/' - urls = [base_url + x for x in urls] - data_prefixes = [ - 'train2014', - 'val2014', - 'test2014', - 'test2015', - ] - image_filenames = [prefix + '.zip' for prefix in data_prefixes] - annotation_filenames = [ - 'instances_train-val2014.zip', # training AND validation info - 'image_info_test2014.zip', # basic info like download links + category - 'image_info_test2015.zip', # basic info like download links + category - 'person_keypoints_trainval2014.zip', # elbows, head, wrist etc - 'captions_train-val2014.zip', # descriptions of images - ] - md5s = [ - '0da8c0bd3d6becc4dcb32757491aca88', # train2014.zip - 'a3d79f5ed8d289b7a7554ce06a5782b3', # val2014.zip - '04127eef689ceac55e3a572c2c92f264', # test2014.zip - '65562e58af7d695cc47356951578c041', # test2015.zip - '59582776b8dd745d649cd249ada5acf7', # instances_train-val2014.zip - '926b9df843c698817ee62e0e049e3753', # person_keypoints_trainval2014.zip - 'f3366b66dc90d8ae0764806c95e43c86', # image_info_test2014.zip - '8a5ad1a903b7896df7f8b34833b61757', # image_info_test2015.zip - '5750999c8c964077e3c81581170be65b' # captions_train-val2014.zip - ] - filenames = image_filenames + annotation_filenames - seg_mask_path = os.path.join(dataset_path, 'seg_mask') - annotation_json = [ - 'annotations/instances_train2014.json', - 'annotations/instances_val2014.json' - ] - annotation_paths = [os.path.join(dataset_path, postfix) - for postfix in annotation_json] - # only first two data prefixes contain segmentation masks - seg_mask_image_paths = [os.path.join(dataset_path, prefix) - for prefix in data_prefixes[0:1]] - seg_mask_output_paths = [os.path.join(seg_mask_path, prefix) - for prefix in data_prefixes[0:1]] - seg_mask_extensions = ['.npy' for prefix in data_prefixes[0:1]] - image_dirs = [os.path.join(dataset_path, prefix) - for prefix in data_prefixes] - image_extensions = ['.jpg' for prefix in data_prefixes] - voc_imageset_txt_paths = [os.path.join(dataset_path, - 'annotations', prefix + '.txt') - for prefix in data_prefixes] - - -@data_coco.capture -def coco_files(dataset_path, filenames, dataset_root, urls, md5s, annotation_paths): - print(dataset_path) - print(dataset_root) - print(urls) - print(filenames) - print(md5s) - print(annotation_paths) - return [os.path.join(dataset_path, file) for file in filenames] - - -@data_coco.command -def print_coco_files(dataset_path, filenames, dataset_root, - urls, md5s, annotation_paths): - coco_files(dataset_path, filenames, dataset_root, - urls, md5s, annotation_paths) - - -@data_coco.command -def coco_download(dataset_path, filenames, dataset_root, - urls, md5s, annotation_paths): - zip_paths = coco_files(dataset_path, filenames, dataset_root, - urls, md5s, annotation_paths) - for url, filename, md5 in zip(urls, filenames, md5s): - path = get_file(filename, url, md5_hash=md5, - extract=True, cache_subdir=dataset_path) - # TODO(ahundt) check if it is already extracted, don't re-extract. see - # https://github.com/fchollet/keras/issues/5861 - zip_file = zipfile.ZipFile(path, 'r') - zip_file.extractall(path=dataset_path) - zip_file.close() - - -@data_coco.command -def coco_json_to_segmentation(seg_mask_output_paths, - annotation_paths, seg_mask_image_paths, verbose): - for (seg_mask_path, annFile, image_path) in zip( - seg_mask_output_paths, annotation_paths, seg_mask_image_paths): - print('Loading COCO Annotations File: ', annFile) - print('Segmentation Mask Output Folder: ', seg_mask_path) - print('Source Image Folder: ', image_path) - print('\n' - 'WARNING: Each pixel can have multiple classes! That means' - 'class data overlaps. Also, single objects can be outlined' - 'multiple times because they were labeled by different people!' - 'In other words, even a single object may be segmented twice.' - 'This means the .png files are missing entire objects.\n\n' - 'Use of categorical one-hot encoded .npy files is recommended,' - 'but .npy files also have limitations, because the .npy files' - 'only have one label per pixel for each class,' - 'and currently take the union of multiple human class labels.' - 'Improving how your data is handled will improve your results' - 'so remember to consider that limitation. There is still' - 'an opportunity to improve how this training data is handled &' - 'integrated with your training scripts and utilities...') - coco = COCO(annFile) - - print('Converting Annotations to Segmentation Masks...') - mkdir_p(seg_mask_path) - total_imgs = len(coco.imgToAnns.keys()) - progbar = Progbar(total_imgs + len(coco.getImgIds()), verbose=verbose) - # 'annotations' was previously 'instances' in an old version - for img_num in range(total_imgs): - # Both [0]'s are used to extract the element from a list - img = coco.loadImgs( - coco.imgToAnns[coco.imgToAnns.keys()[img_num]][0]['image_id'])[0] - h = img['height'] - w = img['width'] - name = img['file_name'] - root_name = name[:-4] - filename = os.path.join(seg_mask_path, root_name + ".png") - file_exists = os.path.exists(filename) - if file_exists: - progbar.update(img_num, [('file_fraction_already_exists', 1)]) - continue - else: - progbar.update(img_num, [('file_fraction_already_exists', 0)]) - print(filename) - - MASK = np.zeros((h, w), dtype=np.uint8) - np.where(MASK > 0) - for ann in coco.imgToAnns[coco.imgToAnns.keys()[img_num]]: - mask = coco.annToMask(ann) - idxs = np.where(mask > 0) - MASK[idxs] = ann['category_id'] - - im = Image.fromarray(MASK) - im.save(filename) - - print('\nConverting Annotations to one hot encoded' - 'categorical .npy Segmentation Masks...') - img_ids = coco.getImgIds() - use_original_dims = True # not target_shape - for idx, img_id in enumerate(img_ids): - img = coco.loadImgs(img_id)[0] - name = img['file_name'] - root_name = name[:-4] - filename = os.path.join(seg_mask_path, root_name + ".npy") - file_exists = os.path.exists(filename) - if file_exists: - progbar.add(1, [('file_fraction_already_exists', 1)]) - continue - else: - progbar.add(1, [('file_fraction_already_exists', 0)]) - - if use_original_dims: - target_shape = (img['height'], img['width'], max(ids()) + 1) - ann_ids = coco.getAnnIds(imgIds=img['id'], iscrowd=None) - anns = coco.loadAnns(ann_ids) - mask_one_hot = np.zeros(target_shape, dtype=np.uint8) - mask_one_hot[:, :, 0] = 1 # every pixel begins as background - # mask_one_hot = cv2.resize(mask_one_hot, - # target_shape[:2], - # interpolation=cv2.INTER_NEAREST) - - for ann in anns: - mask_partial = coco.annToMask(ann) - # mask_partial = cv2.resize(mask_partial, - # (target_shape[1], target_shape[0]), - # interpolation=cv2.INTER_NEAREST) - # # width and height match - # assert mask_one_hot.shape[:2] == mask_partial.shape[:2] - # print('another shape:', - # mask_one_hot[mask_partial > 0].shape) - mask_one_hot[mask_partial > 0, ann['category_id']] = 1 - mask_one_hot[mask_partial > 0, 0] = 0 - - np.save(filename, mask_one_hot) - - -@data_coco.command -def coco_to_pascal_voc_imageset_txt(voc_imageset_txt_paths, image_dirs, - image_extensions): - # os.environ["CUDA_VISIBLE_DEVICES"] = '1' - # Get some image/annotation pairs for example - for imgset_path, img_dir, t_ext in zip( - voc_imageset_txt_paths, image_dirs, image_extensions): - with open(imgset_path, 'w') as txtfile: - [txtfile.write(os.path.splitext(os.path.basename(file))[0] + '\n') - for file in os.listdir(img_dir) if file.endswith(t_ext)] - - -@data_coco.command -def coco_image_segmentation_stats(seg_mask_output_paths, annotation_paths, - seg_mask_image_paths, verbose): - for (seg_mask_path, annFile, image_path) in zip( - seg_mask_output_paths, annotation_paths, seg_mask_image_paths): - print('Loading COCO Annotations File: ', annFile) - print('Segmentation Mask Output Folder: ', seg_mask_path) - print('Source Image Folder: ', image_path) - stats_json = os.path.join(seg_mask_path, - 'image_segmentation_class_stats.json') - print('Image stats will be saved to:', stats_json) - cat_csv = os.path.join(seg_mask_path, - 'class_counts_over_sum_category_counts.csv') - print('Category weights will be saved to:', cat_csv) - coco = COCO(annFile) - print('Annotation file info:') - coco.info() - print('category ids, not including 0 for background:') - print(coco.getCatIds()) - # display COCO categories and supercategories - cats = coco.loadCats(coco.getCatIds()) - nms = [cat['name'] for cat in cats] - print('categories: \n\n', ' '.join(nms)) - - nms = set([cat['supercategory'] for cat in cats]) - print('supercategories: \n', ' '.join(nms)) - img_ids = coco.getImgIds() - use_original_dims = True # not target_shape - max_ids = max(ids()) + 1 # add background category - # 0 indicates no category (not even background) for counting bins - max_bin_count = max_ids + 1 - bin_count = np.zeros(max_bin_count) - total_pixels = 0 - - print('Calculating image segmentation stats...') - progbar = Progbar(len(img_ids), verbose=verbose) - i = 0 - for idx, img_id in enumerate(img_ids): - img = coco.loadImgs(img_id)[0] - i += 1 - progbar.update(i) - ann_ids = coco.getAnnIds(imgIds=img['id'], iscrowd=None) - anns = coco.loadAnns(ann_ids) - target_shape = (img['height'], img['width'], max_ids) - # print('\ntarget_shape:', target_shape) - mask_one_hot = np.zeros(target_shape, dtype=np.uint8) - - # Note to only count background pixels once, we define a temporary - # null class of 0, and shift all class category ids up by 1 - mask_one_hot[:, :, 0] = 1 # every pixel begins as background - - for ann in anns: - mask_partial = coco.annToMask(ann) - above_zero = mask_partial > 0 - mask_one_hot[above_zero, ann['category_id'] - ] = ann['category_id'] + 1 - mask_one_hot[above_zero, 0] = 0 - - # print( mask_one_hot) - # print('initial bin_count shape:', np.shape(bin_count)) - # flat_mask_one_hot = mask_one_hot.flatten() - bincount_result = np.bincount(mask_one_hot.flatten()) - # print('bincount_result TYPE:', type(bincount_result)) - # np.array(np.ndarray.flatten(np.bincount(np.ndarray. - # flatten(np.array(mask_one_hot)).astype(int))).resize(max_bin_count)) - # print('bincount_result:', bincount_result) - # print('bincount_result_shape', np.shape(bincount_result)) - length = int(np.shape(bincount_result)[0]) - zeros_to_add = max_bin_count - length - z = np.zeros(zeros_to_add) - # print('zeros_to_add TYPE:', type(zeros_to_add)) - # this is a workaround because for some strange reason the - # output type of bincount couldn't interact with other numpy arrays - bincount_result_long = bincount_result.tolist() + z.tolist() - # bincount_result = bincount_result.resize(max_bin_count) - # print('bincount_result2:', bincount_result_long) - # print('bincount_result2_shape',bincount_result_long) - bin_count = bin_count + np.array(bincount_result_long) - total_pixels += (img['height'] * img['width']) - - print('Final Tally:') - # shift categories back down by 1 - bin_count = bin_count[1:] - category_ids = range(bin_count.size) - sum_category_counts = np.sum(bin_count) - - # sum will be =1 as a pixel can be in multiple categories - category_counts_over_sum_category_counts = \ - np.true_divide(bin_count.astype(np.float64), sum_category_counts) - np.savetxt(cat_csv, category_counts_over_sum_category_counts) - - # sum will be >1 as a pixel can be in multiple categories - category_counts_over_total_pixels = \ - np.true_divide(bin_count.astype(np.float64), total_pixels) - - # less common categories have more weight, sum = 1 - category_counts_p_complement = \ - [1 - x if x > 0.0 else 0.0 - for x in category_counts_over_sum_category_counts] - - # less common categories have more weight, sum > 1 - total_pixels_p_complement = \ - [1 - x if x > 0.0 else 0.0 - for x in category_counts_over_total_pixels] - - print(bin_count) - stat_dict = { - 'total_pixels': total_pixels, - 'category_counts': dict(zip(category_ids, bin_count)), - 'sum_category_counts': sum_category_counts, - 'category_counts_over_sum_category_counts': - dict(zip(category_ids, - category_counts_over_sum_category_counts)), - 'category_counts_over_total_pixels': - dict(zip(category_ids, category_counts_over_total_pixels)), - 'category_counts_p_complement': - dict(zip(category_ids, category_counts_p_complement)), - 'total_pixels_p_complement': - dict(zip(category_ids, total_pixels_p_complement)), - 'ids': ids(), - 'categories': categories() - } - print(stat_dict) - with open(stats_json, 'w') as fjson: - json.dump(stat_dict, fjson, ensure_ascii=False) - - -@data_coco.command -def coco_setup(dataset_root, dataset_path, data_prefixes, - filenames, urls, md5s, annotation_paths, - image_dirs, seg_mask_output_paths, verbose, - image_extensions): - # download the dataset - coco_download(dataset_path, filenames, dataset_root, - urls, md5s, annotation_paths) - # convert the relevant files to a more useful format - coco_json_to_segmentation(seg_mask_output_paths, annotation_paths) - coco_to_pascal_voc_imageset_txt(voc_imageset_txt_paths, image_dirs, - image_extensions) - - -@data_coco.automain -def main(dataset_root, dataset_path, data_prefixes, - filenames, urls, md5s, annotation_paths, - image_dirs, seg_mask_output_paths): - coco_config() - coco_setup(data_prefixes, dataset_path, filenames, dataset_root, urls, - md5s, annotation_paths, image_dirs, - seg_mask_output_paths) -from __future__ import print_function -import numpy -from keras.utils.data_utils import get_file -from zipfile import ZipFile -from collections import Counter -from keras.preprocessing.sequence import pad_sequences - - -def load_data(path='conll2000.zip', min_freq=2): - path = get_file(path, - origin='https://raw.githubusercontent.com/nltk' - '/nltk_data/gh-pages/packages/corpora/conll2000.zip') - print(path) - archive = ZipFile(path, 'r') - train = _parse_data(archive.open('conll2000/train.txt')) - test = _parse_data(archive.open('conll2000/test.txt')) - archive.close() - - word_counts = Counter(row[0].lower() for sample in train for row in sample) - vocab = ['', ''] - vocab += [w for w, f in iter(word_counts.items()) if f >= min_freq] - # in alphabetic order - pos_tags = sorted( - list(set(row[1] for sample in train + test for row in sample))) - # in alphabetic order - chunk_tags = sorted( - list(set(row[2] for sample in train + test for row in sample))) - - train = _process_data(train, vocab, pos_tags, chunk_tags) - test = _process_data(test, vocab, pos_tags, chunk_tags) - return train, test, (vocab, pos_tags, chunk_tags) - - -def _parse_data(fh): - string = fh.read() - data = [] - for sample in string.decode().strip().split('\n\n'): - data.append([row.split() for row in sample.split('\n')]) - fh.close() - return data - - -def _process_data(data, vocab, pos_tags, chunk_tags, maxlen=None, onehot=False): - if maxlen is None: - maxlen = max(len(s) for s in data) - word2idx = dict((w, i) for i, w in enumerate(vocab)) - # set to (index 1) if not in vocab - x = [[word2idx.get(w[0].lower(), 1) for w in s] for s in data] - - y_pos = [[pos_tags.index(w[1]) for w in s] for s in data] - y_chunk = [[chunk_tags.index(w[2]) for w in s] for s in data] - - x = pad_sequences(x, maxlen) # left padding - - # lef padded with -1. Indeed, any integer works as it will be masked - y_pos = pad_sequences(y_pos, maxlen, value=-1) - y_chunk = pad_sequences(y_chunk, maxlen, value=-1) - - if onehot: - y_pos = numpy.eye(len(pos_tags), dtype='float32')[y] - y_chunk = numpy.eye(len(chunk_tags), dtype='float32')[y] - else: - y_pos = numpy.expand_dims(y_pos, 2) - y_chunk = numpy.expand_dims(y_chunk, 2) - return x, y_pos, y_chunk -#!/usr/bin/env python -# coding=utf-8 -""" -This is a script for downloading and converting the pascal voc 2012 dataset -and the berkeley extended version. - -# original PASCAL VOC 2012 -# 2 GB -# http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar - -# berkeley augmented Pascal VOC -# 1.3 GB -# http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz - -This can be run as an independent executable to download -the dataset or be imported by scripts used for larger experiments. - -If you aren't sure run this to do a full download + conversion setup of the dataset: - ./data_pascal_voc.py pascal_voc_setup -""" # pylint: disable=E501 -from __future__ import division, print_function, unicode_literals -import os -import shutil -import errno -from sacred import Ingredient, Experiment -from keras.utils import get_file -import skimage.io as io - - -# ============== Ingredient 2: dataset ======================= -data_pascal_voc = Experiment("dataset") - - -def mkdir_p(path): - # http://stackoverflow.com/questions/600268/mkdir-p-functionality-in-python - try: - os.makedirs(path) - except OSError as exc: # Python >2.5 - if exc.errno == errno.EEXIST and os.path.isdir(path): - pass - else: - raise - - -def pascal_segmentation_lut(): - """Return look-up table with number and correspondng class names - for PASCAL VOC segmentation dataset. Two special classes are: 0 - - background and 255 - ambigious region. All others are numerated from - 1 to 20. - - Returns - ------- - classes_lut : dict - look-up table with number and correspondng class names - """ - - class_names = ['background', 'aeroplane', 'bicycle', 'bird', 'boat', - 'bottle', 'bus', 'car', 'cat', 'chair', 'cow', 'diningtable', - 'dog', 'horse', 'motorbike', 'person', 'potted-plant', - 'sheep', 'sofa', 'train', 'tv/monitor', 'ambigious'] - - enumerated_array = enumerate(class_names[:-1]) - - classes_lut = list(enumerated_array) - - # Add a special class representing ambigious regions - # which has index 255. - classes_lut.append((255, class_names[-1])) - - classes_lut = dict(classes_lut) - - return classes_lut - - -def get_pascal_segmentation_images_lists_txts(pascal_root): - """Return full paths to files in PASCAL VOC with train and val image name lists. - This function returns full paths to files which contain names of images - and respective annotations for the segmentation in PASCAL VOC. - - Parameters - ---------- - pascal_root : string - Full path to the root of PASCAL VOC dataset. - - Returns - ------- - full_filenames_txts : [string, string, string] - Array that contains paths for train/val/trainval txts with images names. - """ - - segmentation_relative_folder = 'ImageSets/Segmentation' - - segmentation_folder = os.path.join( - pascal_root, segmentation_relative_folder) - - pascal_train_list_filename = os.path.join(segmentation_folder, 'train.txt') - - pascal_validation_list_filename = os.path.join( - segmentation_folder, 'val.txt') - - pascal_trainval_list_filename = os.path.join( - segmentation_folder, 'trainval.txt') - - return [ - pascal_train_list_filename, - pascal_validation_list_filename, - pascal_trainval_list_filename - ] - - -def readlines_with_strip(filename): - """Reads lines from specified file with whitespaced removed on both sides. - The function reads each line in the specified file and applies string.strip() - function to each line which results in removing all whitespaces on both ends - of each string. Also removes the newline symbol which is usually present - after the lines wre read using readlines() function. - - Parameters - ---------- - filename : string - Full path to the root of PASCAL VOC dataset. - - Returns - ------- - clean_lines : array of strings - Strings that were read from the file and cleaned up. - """ - - # Get raw filnames from the file - with open(filename, 'r') as f: - lines = f.readlines() - - # Clean filenames from whitespaces and newline symbols - return map(lambda x: x.strip(), lines) - - -def readlines_with_strip_array_version(filenames_array): - """The function that is similar to readlines_with_strip() but for filenames array. - Applies readlines_with_strip() to each filename in the array. - - Parameters - ---------- - filenames_array : array of strings - Array of strings. Each specifies a path to a file. - - Returns - ------- - clean_lines : array of (array of strings) - Strings that were read from the file and cleaned up. - """ - - return map(readlines_with_strip, filenames_array) - - -def add_full_path_and_extention_to_filenames(filenames_array, full_path, extention): - """Concatenates full path to the left of the image and file extention to the right. - The function accepts array of filenames without fullpath and extention like 'cat' - and adds specified full path and extetion to each of the filenames in the array like - 'full/path/to/somewhere/cat.jpg. - Parameters - ---------- - filenames_array : array of strings - Array of strings representing filenames - full_path : string - Full path string to be added on the left to each filename - extention : string - Extention string to be added on the right to each filename - Returns - ------- - full_filenames : array of strings - updated array with filenames - """ - return map(lambda x: os.path.join(full_path, x) + '.' + extention, filenames_array) - - -def add_full_path_and_extention_to_filenames_array_version(filenames_array_array, - full_path, - extention): - """Array version of the add_full_path_and_extention_to_filenames() function. - Applies add_full_path_and_extention_to_filenames() to each element of array. - Parameters - ---------- - filenames_array_array : array of array of strings - Array of strings representing filenames - full_path : string - Full path string to be added on the left to each filename - extention : string - Extention string to be added on the right to each filename - Returns - ------- - full_filenames : array of array of strings - updated array of array with filenames - """ - return map(lambda x: add_full_path_and_extention_to_filenames(x, - full_path, - extention), - filenames_array_array) - - -def get_pascal_segmentation_image_annotation_filenames_pairs(pascal_root): - """Return (image, annotation) filenames pairs from PASCAL VOC segmentation dataset. - Returns three dimensional array where first dimension represents the type - of the dataset: train, val or trainval in the respective order. Second - dimension represents the a pair of images in that belongs to a particular - dataset. And third one is responsible for the first or second element in the - dataset. - Parameters - ---------- - pascal_root : string - Path to the PASCAL VOC dataset root that is usually named 'VOC2012' - after being extracted from tar file. - Returns - ------- - image_annotation_filename_pairs : - Array with filename pairs. - """ - - pascal_relative_images_folder = 'JPEGImages' - pascal_relative_class_annotations_folder = 'SegmentationClass' - - images_extention = 'jpg' - annotations_extention = 'png' - - pascal_images_folder = os.path.join( - pascal_root, pascal_relative_images_folder) - pascal_class_annotations_folder = os.path.join( - pascal_root, pascal_relative_class_annotations_folder) - - pascal_images_lists_txts = get_pascal_segmentation_images_lists_txts( - pascal_root) - - pascal_image_names = readlines_with_strip_array_version( - pascal_images_lists_txts) - - images_full_names = add_full_path_and_extention_to_filenames_array_version( - pascal_image_names, - pascal_images_folder, - images_extention, - ) - - annotations_full_names = add_full_path_and_extention_to_filenames_array_version( - pascal_image_names, - pascal_class_annotations_folder, - annotations_extention, - ) - - # Combine so that we have [(images full filenames, annotation full names), .. ] - # where each element in the array represent train, val, trainval sets. - # Overall, we have 3 elements in the array. - temp = zip(images_full_names, annotations_full_names) - - # Now we should combine the elements of images full filenames annotation full names - # so that we have pairs of respective image plus annotation - # [[(pair_1), (pair_1), ..], [(pair_1), (pair_2), ..] ..] - # Overall, we have 3 elements -- representing train/val/trainval datasets - image_annotation_filename_pairs = map(lambda x: zip(*x), temp) - - return image_annotation_filename_pairs - - -@data_pascal_voc.command -def convert_pascal_berkeley_augmented_mat_annotations_to_png( - pascal_berkeley_augmented_root): - """ Creates a new folder in the root folder of the dataset with annotations stored - in .png. The function accepts a full path to the root of Berkeley augmented Pascal - VOC segmentation dataset and converts annotations that are stored in .mat files to - .png files. It creates a new folder dataset/cls_png where all the converted files - will be located. If this directory already exists the function does nothing. The - Berkley augmented dataset can be downloaded from here: - http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz - - Parameters - ---------- - pascal_berkeley_augmented_root : string - Full path to the root of augmented Berkley PASCAL VOC dataset. - - """ # pylint: disable=E501 - - import scipy.io - - def read_class_annotation_array_from_berkeley_mat(mat_filename, key='GTcls'): - - # Mat to png conversion for - # http://www.cs.berkeley.edu/~bharath2/codes/SBD/download.html - # 'GTcls' key is for class segmentation - # 'GTinst' key is for instance segmentation - # Credit: - # https://github.com/martinkersner/train-DeepLab/blob/master/utils.py - - mat = scipy.io.loadmat(mat_filename, mat_dtype=True, - squeeze_me=True, struct_as_record=False) - return mat[key].Segmentation - - mat_file_extension_string = '.mat' - png_file_extension_string = '.png' - relative_path_to_annotation_mat_files = 'dataset/cls' - relative_path_to_annotation_png_files = 'dataset/cls_png' - - mat_file_extension_string_length = len(mat_file_extension_string) - - annotation_mat_files_fullpath = os.path.join(pascal_berkeley_augmented_root, - relative_path_to_annotation_mat_files) - - annotation_png_save_fullpath = os.path.join(pascal_berkeley_augmented_root, - relative_path_to_annotation_png_files) - - # Create the folder where all the converted png files will be placed - # If the folder already exists, do nothing - if not os.path.exists(annotation_png_save_fullpath): - os.makedirs(annotation_png_save_fullpath) - else: - return - - mat_files_names = os.listdir(annotation_mat_files_fullpath) - - for current_mat_file_name in mat_files_names: - - current_file_name_without_extention = current_mat_file_name[ - :-mat_file_extension_string_length] - - current_mat_file_full_path = os.path.join(annotation_mat_files_fullpath, - current_mat_file_name) - - current_png_file_full_path_to_be_saved = os.path.join( - annotation_png_save_fullpath, - current_file_name_without_extention, - ) - - current_png_file_full_path_to_be_saved += png_file_extension_string - - annotation_array = read_class_annotation_array_from_berkeley_mat( - current_mat_file_full_path) - - # TODO: hide 'low-contrast' image warning during saving. - io.imsave(current_png_file_full_path_to_be_saved, annotation_array) - - -def get_pascal_berkeley_augmented_segmentation_images_lists_txts(pascal_berkeley_root): - """Return full paths to files in PASCAL Berkley augmented VOC with train and - val image name lists. This function returns full paths to files which contain names - of images and respective annotations for the segmentation in PASCAL VOC. - - Parameters - ---------- - pascal_berkeley_root : string - Full path to the root of PASCAL VOC Berkley augmented dataset. - - Returns - ------- - full_filenames_txts : [string, string] - Array that contains paths for train/val txts with images names. - """ - - segmentation_relative_folder = 'dataset' - - segmentation_folder = os.path.join(pascal_berkeley_root, - segmentation_relative_folder) - - # TODO: add function that will joing both train.txt and val.txt into - # trainval.txt - pascal_train_list_filename = os.path.join(segmentation_folder, - 'train.txt') - - pascal_validation_list_filename = os.path.join(segmentation_folder, - 'val.txt') - - return [ - pascal_train_list_filename, - pascal_validation_list_filename - ] - - -def get_pascal_berkeley_augmented_segmentation_image_annotation_filenames_pairs( - pascal_berkeley_root): - """Return (image, annotation) filenames pairs from PASCAL Berkeley VOC segmentation - dataset. Returns three dimensional array where first dimension represents the type - of the dataset: train, val in the respective order. Second - dimension represents the a pair of images in that belongs to a particular - dataset. And third one is responsible for the first or second element in the - dataset. - Parameters - ---------- - pascal_berkeley_root : string - Path to the PASCAL Berkeley VOC dataset root that is usually named - 'benchmark_RELEASE' after being extracted from tar file. - Returns - ------- - image_annotation_filename_pairs : - Array with filename pairs. - """ - - pascal_relative_images_folder = 'dataset/img' - pascal_relative_class_annotations_folder = 'dataset/cls_png' - - images_extention = 'jpg' - annotations_extention = 'png' - - pascal_images_folder = os.path.join( - pascal_berkeley_root, pascal_relative_images_folder) - pascal_class_annotations_folder = os.path.join( - pascal_berkeley_root, pascal_relative_class_annotations_folder) - - pascal_images_lists_txts = ( - get_pascal_berkeley_augmented_segmentation_images_lists_txts( - pascal_berkeley_root)) - - pascal_image_names = readlines_with_strip_array_version( - pascal_images_lists_txts) - - images_full_names = add_full_path_and_extention_to_filenames_array_version( - pascal_image_names, - pascal_images_folder, - images_extention, - ) - - annotations_full_names = add_full_path_and_extention_to_filenames_array_version( - pascal_image_names, - pascal_class_annotations_folder, - annotations_extention, - ) - - # Combine so that we have [(images full filenames, annotation full names), .. ] - # where each element in the array represent train, val, trainval sets. - # Overall, we have 3 elements in the array. - temp = zip(images_full_names, annotations_full_names) - - # Now we should combine the elements of images full filenames annotation full names - # so that we have pairs of respective image plus annotation - # [[(pair_1), (pair_1), ..], [(pair_1), (pair_2), ..] ..] - # Overall, we have 3 elements -- representing train/val/trainval datasets - image_annotation_filename_pairs = map(lambda x: zip(*x), temp) - - return image_annotation_filename_pairs - - -def get_pascal_berkeley_augmented_selected_image_annotation_filenames_pairs( - pascal_berkeley_root, - selected_names, -): - """Returns (image, annotation) filenames pairs from PASCAL Berkeley VOC segmentation - dataset for selected names. The function accepts the selected file names from PASCAL - Berkeley VOC segmentation dataset and returns image, annotation pairs with fullpath - and extention for those names. - - Parameters - ---------- - pascal_berkeley_root : string - Path to the PASCAL Berkeley VOC dataset root that is usually named - 'benchmark_RELEASE' after being extracted from tar file. - selected_names : array of strings - Selected filenames from PASCAL VOC Berkeley that can be read from txt files that - come with dataset. - Returns - ------- - image_annotation_pairs : - Array with filename pairs with fullnames. - """ - pascal_relative_images_folder = 'dataset/img' - pascal_relative_class_annotations_folder = 'dataset/cls_png' - - images_extention = 'jpg' - annotations_extention = 'png' - - pascal_images_folder = os.path.join( - pascal_berkeley_root, pascal_relative_images_folder) - pascal_class_annotations_folder = os.path.join( - pascal_berkeley_root, pascal_relative_class_annotations_folder) - - images_full_names = add_full_path_and_extention_to_filenames( - selected_names, - pascal_images_folder, - images_extention, - ) - - annotations_full_names = add_full_path_and_extention_to_filenames( - selected_names, - pascal_class_annotations_folder, - annotations_extention, - ) - - image_annotation_pairs = zip(images_full_names, - annotations_full_names) - - return image_annotation_pairs - - -def get_pascal_selected_image_annotation_filenames_pairs(pascal_root, selected_names): - """Returns (image, annotation) filenames pairs from PASCAL VOC segmentation dataset - for selected names. The function accepts the selected file names from PASCAL VOC - segmentation dataset and returns image, annotation pairs with fullpath and extention - for those names. - - Parameters - ---------- - pascal_root : string - Path to the PASCAL VOC dataset root that is usually named 'VOC2012' - after being extracted from tar file. - selected_names : array of strings - Selected filenames from PASCAL VOC that can be read from txt files that - come with dataset. - Returns - ------- - image_annotation_pairs : - Array with filename pairs with fullnames. - """ - pascal_relative_images_folder = 'JPEGImages' - pascal_relative_class_annotations_folder = 'SegmentationClass' - - images_extention = 'jpg' - annotations_extention = 'png' - - pascal_images_folder = os.path.join( - pascal_root, pascal_relative_images_folder) - pascal_class_annotations_folder = os.path.join( - pascal_root, pascal_relative_class_annotations_folder) - - images_full_names = add_full_path_and_extention_to_filenames(selected_names, - pascal_images_folder, - images_extention) - - annotations_full_names = add_full_path_and_extention_to_filenames( - selected_names, - pascal_class_annotations_folder, - annotations_extention, - ) - - image_annotation_pairs = zip(images_full_names, - annotations_full_names) - - return image_annotation_pairs - - -def get_augmented_pascal_image_annotation_filename_pairs(pascal_root, - pascal_berkeley_root, - mode=2): - """Returns image/annotation filenames pairs train/val splits from combined Pascal - VOC. Returns two arrays with train and validation split respectively that has - image full filename/ annotation full filename pairs in each of the that were derived - from PASCAL and PASCAL Berkeley Augmented dataset. The Berkley augmented dataset - can be downloaded from here: - http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz - Consider running convert_pascal_berkeley_augmented_mat_annotations_to_png() after - extraction. - - The PASCAL VOC dataset can be downloaded from here: - http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar - Consider specifying root full names for both of them as arguments for this function - after extracting them. - The function has three type of train/val splits(credit matconvnet-fcn): - - Let BT, BV, PT, PV, and PX be the Berkeley training and validation - sets and PASCAL segmentation challenge training, validation, and - test sets. Let T, V, X the final trainig, validation, and test - sets. - - Mode 1:: - V = PV (same validation set as PASCAL) - - Mode 2:: (default)) - V = PV \ BT (PASCAL val set that is not a Berkeley training - image) - - Mode 3:: - V = PV \ (BV + BT) - - In all cases: - - S = PT + PV + BT + BV - X = PX (the test set is uncahgend) - T = (S \ V) \ X (the rest is training material) - Parameters - ---------- - pascal_root : string - Path to the PASCAL VOC dataset root that is usually named 'VOC2012' - after being extracted from tar file. - pascal_berkeley_root : string - Path to the PASCAL Berkeley VOC dataset root that is usually named - 'benchmark_RELEASE' after being extracted from tar file. - mode: int - The type of train/val data split. Read the function main description for more - info. - Returns - ------- - image_annotation_pairs : Array with filename pairs with fullnames. - [[(str, str), .. , (str, str)][(str, str), .., (str, str)]] - """ # pylint: disable=E501 - pascal_txts = get_pascal_segmentation_images_lists_txts( - pascal_root=pascal_root) - berkeley_txts = get_pascal_berkeley_augmented_segmentation_images_lists_txts( - pascal_berkeley_root=pascal_berkeley_root) - - pascal_name_lists = readlines_with_strip_array_version(pascal_txts) - berkeley_name_lists = readlines_with_strip_array_version(berkeley_txts) - - pascal_train_name_set, pascal_val_name_set, _ = map( - lambda x: set(x), pascal_name_lists) - berkeley_train_name_set, berkeley_val_name_set = map( - lambda x: set(x), berkeley_name_lists) - - all_berkeley = berkeley_train_name_set | berkeley_val_name_set - all_pascal = pascal_train_name_set | pascal_val_name_set - - everything = all_berkeley | all_pascal - - # Extract the validation subset based on selected mode - if mode == 1: - # 1449 validation images, 10582 training images - validation = pascal_val_name_set - - if mode == 2: - # 904 validatioin images, 11127 training images - validation = pascal_val_name_set - berkeley_train_name_set - - if mode == 3: - # 346 validation images, 11685 training images - validation = pascal_val_name_set - all_berkeley - - # The rest of the dataset is for training - train = everything - validation - - # Get the part that can be extracted from berkeley - train_from_berkeley = train & all_berkeley - - # The rest of the data will be loaded from pascal - train_from_pascal = train - train_from_berkeley - - train_from_berkeley_image_annotation_pairs = ( - get_pascal_berkeley_augmented_selected_image_annotation_filenames_pairs( - pascal_berkeley_root, - list(train_from_berkeley))) - - train_from_pascal_image_annotation_pairs = \ - get_pascal_selected_image_annotation_filenames_pairs(pascal_root, - list(train_from_pascal)) - - overall_train_image_annotation_filename_pairs = \ - list(train_from_berkeley_image_annotation_pairs) + \ - list(train_from_pascal_image_annotation_pairs) - - overall_val_image_annotation_filename_pairs = \ - get_pascal_selected_image_annotation_filenames_pairs(pascal_root, - validation) - - return (overall_train_image_annotation_filename_pairs, - overall_val_image_annotation_filename_pairs) - - -def pascal_filename_pairs_to_imageset_txt(voc_imageset_txt_path, filename_pairs, - image_extension='.jpg'): - with open(voc_imageset_txt_path, 'w') as txtfile: - [txtfile.write(os.path.splitext(os.path.basename(file1))[0] + '\n') - for file1, file2 in filename_pairs if file1.endswith(image_extension)] - - -def pascal_combine_annotation_files(filename_pairs, output_annotations_path): - mkdir_p(output_annotations_path) - for img_path, gt_path in filename_pairs: - shutil.copy2(gt_path, output_annotations_path) - - -@data_pascal_voc.config -def voc_config(): - # TODO(ahundt) add md5 sums for each file - verbose = True - dataset_root = os.path.join(os.path.expanduser("~"), '.keras', 'datasets') - dataset_path = dataset_root + '/VOC2012' - # sys.path.append("tf-image-segmentation/") - # os.environ["CUDA_VISIBLE_DEVICES"] = '1' - # based on https://github.com/martinkersner/train-DeepLab - - # original PASCAL VOC 2012 - # wget - # http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar - # # 2 GB - pascal_root = dataset_path + '/VOCdevkit/VOC2012' - - # berkeley augmented Pascal VOC - # wget - # http://www.eecs.berkeley.edu/Research/Projects/CS/vision/grouping/semantic_contours/benchmark.tgz - # # 1.3 GB - - # Pascal Context - # http://www.cs.stanford.edu/~roozbeh/pascal-context/ - # http://www.cs.stanford.edu/~roozbeh/pascal-context/trainval.tar.gz - pascal_berkeley_root = dataset_path + '/benchmark_RELEASE' - urls = [ - 'http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', - 'http://www.eecs.berkeley.edu/Research/Projects/' - 'CS/vision/grouping/semantic_contours/benchmark.tgz', - 'http://www.cs.stanford.edu/~roozbeh/pascal-context/trainval.tar.gz', - 'http://www.cs.stanford.edu/~roozbeh/pascal-context/33_context_labels.tar.gz', - 'http://www.cs.stanford.edu/~roozbeh/pascal-context/59_context_labels.tar.gz', - 'http://www.cs.stanford.edu/~roozbeh/pascal-context/33_labels.txt', - 'http://www.cs.stanford.edu/~roozbeh/pascal-context/59_labels.txt' - ] - filenames = ['VOCtrainval_11-May-2012.tar', - 'benchmark.tgz', - 'trainval.tar.gz', - '33_context_labels.tar.gz', - '59_context_labels.tar.gz', - '33_labels.txt', - '59_labels.txt' - ] - - md5s = ['6cd6e144f989b92b3379bac3b3de84fd', - '82b4d87ceb2ed10f6038a1cba92111cb', - 'df034edb2c12aa7d33b42b20bb1796e3', - '180101cfc01c71867b6686207f071eb9', - 'f85d450010762a0e1080304286ce30ed', - '8840f5439b471aecf991ac6448b826e6', - '993901f2d930cc038c406845f08fa082'] - - combined_imageset_train_txt = dataset_path + '/combined_imageset_train.txt' - combined_imageset_val_txt = dataset_path + '/combined_imageset_val.txt' - combined_annotations_path = dataset_path + '/combined_annotations' - - # see get_augmented_pascal_image_annotation_filename_pairs() - voc_data_subset_mode = 2 - - -@data_pascal_voc.capture -def pascal_voc_files(dataset_path, filenames, dataset_root, urls, md5s): - print(dataset_path) - print(dataset_root) - print(urls) - print(filenames) - print(md5s) - return [dataset_path + filename for filename in filenames] - - -@data_pascal_voc.command -def pascal_voc_download(dataset_path, filenames, dataset_root, urls, md5s): - zip_paths = pascal_voc_files( - dataset_path, filenames, dataset_root, urls, md5s) - for url, filename, md5 in zip(urls, filenames, md5s): - path = get_file(filename, url, md5_hash=md5, - extract=True, cache_subdir=dataset_path) - - -@data_pascal_voc.command -def pascal_voc_berkeley_combined(dataset_path, - pascal_root, - pascal_berkeley_root, - voc_data_subset_mode, - combined_imageset_train_txt, - combined_imageset_val_txt, - combined_annotations_path): - # Returns a list of (image, annotation) - # filename pairs (filename.jpg, filename.png) - overall_train_image_annotation_filename_pairs, \ - overall_val_image_annotation_filename_pairs = \ - get_augmented_pascal_image_annotation_filename_pairs( - pascal_root=pascal_root, - pascal_berkeley_root=pascal_berkeley_root, - mode=voc_data_subset_mode) - # combine the annotation files into one folder - pascal_combine_annotation_files( - list(overall_train_image_annotation_filename_pairs) + - list(overall_val_image_annotation_filename_pairs), - combined_annotations_path) - # generate the train imageset txt - pascal_filename_pairs_to_imageset_txt( - combined_imageset_train_txt, - overall_train_image_annotation_filename_pairs - ) - # generate the val imageset txt - pascal_filename_pairs_to_imageset_txt( - combined_imageset_val_txt, - overall_val_image_annotation_filename_pairs - ) - - -@data_pascal_voc.command -def pascal_voc_setup(filenames, dataset_path, pascal_root, - pascal_berkeley_root, dataset_root, - voc_data_subset_mode, - urls, md5s, - combined_imageset_train_txt, - combined_imageset_val_txt, - combined_annotations_path): - # download the dataset - pascal_voc_download(dataset_path, filenames, - dataset_root, urls, md5s) - # convert the relevant files to a more useful format - convert_pascal_berkeley_augmented_mat_annotations_to_png( - pascal_berkeley_root) - pascal_voc_berkeley_combined(dataset_path, - pascal_root, - pascal_berkeley_root, - voc_data_subset_mode, - combined_imageset_train_txt, - combined_imageset_val_txt, - combined_annotations_path) - - -@data_pascal_voc.automain -def main(filenames, dataset_path, pascal_root, - pascal_berkeley_root, dataset_root, - voc_data_subset_mode, - urls, md5s, - combined_imageset_train_txt, - combined_imageset_val_txt, - combined_annotations_path): - voc_config() - pascal_voc_setup(filenames, dataset_path, pascal_root, - pascal_berkeley_root, dataset_root, - voc_data_subset_mode, - urls, md5s, - combined_imageset_train_txt, - combined_imageset_val_txt, - combined_annotations_path) -from __future__ import absolute_import - -from .convaware import ConvolutionAware -from __future__ import absolute_import -import numpy as np -from keras import backend as K -from keras.initializers import Initializer, Orthogonal - - -class ConvolutionAware(Initializer): - """ - Initializer that generates orthogonal convolution filters in the fourier - space. If this initializer is passed a shape that is not 3D or 4D, - orthogonal initialization will be used. - # Arguments - eps_std: Standard deviation for the random normal noise used to break - symmetry in the inverse fourier transform. - seed: A Python integer. Used to seed the random generator. - # References - Armen Aghajanyan, https://arxiv.org/abs/1702.06295 - """ - - def __init__(self, eps_std=0.05, seed=None): - self.eps_std = eps_std - self.seed = seed - self.orthogonal = Orthogonal() - - def __call__(self, shape): - rank = len(shape) - - if self.seed is not None: - np.random.seed(self.seed) - - fan_in, fan_out = _compute_fans(shape, K.image_data_format()) - variance = 2 / fan_in - - if rank == 3: - row, stack_size, filters_size = shape - - transpose_dimensions = (2, 1, 0) - kernel_shape = (row,) - correct_ifft = lambda shape, s=[None]: np.fft.irfft(shape, s[0]) - correct_fft = np.fft.rfft - - elif rank == 4: - row, column, stack_size, filters_size = shape - - transpose_dimensions = (2, 3, 0, 1) - kernel_shape = (row, column) - correct_ifft = np.fft.irfft2 - correct_fft = np.fft.rfft2 - - elif rank == 5: - x, y, z, stack_size, filters_size = shape - - transpose_dimensions = (3, 4, 0, 1, 2) - kernel_shape = (x, y, z) - correct_fft = np.fft.rfftn - correct_ifft = np.fft.irfftn - else: - return K.variable(self.orthogonal(shape), dtype=K.floatx()) - - kernel_fourier_shape = correct_fft(np.zeros(kernel_shape)).shape - - init = [] - for i in range(filters_size): - basis = self._create_basis( - stack_size, np.prod(kernel_fourier_shape)) - basis = basis.reshape((stack_size,) + kernel_fourier_shape) - - filters = [correct_ifft(x, kernel_shape) + - np.random.normal(0, self.eps_std, kernel_shape) for - x in basis] - - init.append(filters) - - # Format of array is now: filters, stack, row, column - init = np.array(init) - init = self._scale_filters(init, variance) - return init.transpose(transpose_dimensions) - - def _create_basis(self, filters, size): - if size == 1: - return np.random.normal(0.0, self.eps_std, (filters, size)) - - nbb = filters // size + 1 - li = [] - for i in range(nbb): - a = np.random.normal(0.0, 1.0, (size, size)) - a = self._symmetrize(a) - u, _, v = np.linalg.svd(a) - li.extend(u.T.tolist()) - p = np.array(li[:filters], dtype=K.floatx()) - return p - - def _symmetrize(self, a): - return a + a.T - np.diag(a.diagonal()) - - def _scale_filters(self, filters, variance): - c_var = np.var(filters) - p = np.sqrt(variance / c_var) - return filters * p - - def get_config(self): - return { - 'eps_std': self.eps_std, - 'seed': self.seed - } - - -def _compute_fans(shape, data_format='channels_last'): - """Computes the number of input and output units for a weight shape. - - # Arguments - shape: Integer shape tuple. - data_format: Image data format to use for convolution kernels. - Note that all kernels in Keras are standardized on the - `channels_last` ordering (even when inputs are set - to `channels_first`). - - # Returns - A tuple of scalars, `(fan_in, fan_out)`. - - # Raises - ValueError: in case of invalid `data_format` argument. - """ - if len(shape) == 2: - fan_in = shape[0] - fan_out = shape[1] - elif len(shape) in {3, 4, 5}: - # Assuming convolution kernels (1D, 2D or 3D). - # TH kernel shape: (depth, input_depth, ...) - # TF kernel shape: (..., input_depth, depth) - if data_format == 'channels_first': - receptive_field_size = np.prod(shape[2:]) - fan_in = shape[1] * receptive_field_size - fan_out = shape[0] * receptive_field_size - elif data_format == 'channels_last': - receptive_field_size = np.prod(shape[:-2]) - fan_in = shape[-2] * receptive_field_size - fan_out = shape[-1] * receptive_field_size - else: - raise ValueError('Invalid data_format: ' + data_format) - else: - # No specific assumptions. - fan_in = np.sqrt(np.prod(shape)) - fan_out = np.sqrt(np.prod(shape)) - return fan_in, fan_out -from __future__ import absolute_import - -from .advanced_activations.pelu import PELU -from .advanced_activations.srelu import SReLU -from .advanced_activations.swish import Swish -from .advanced_activations.sinerelu import SineReLU - -from .convolutional.cosineconvolution2d import CosineConv2D -from .convolutional.cosineconvolution2d import CosineConvolution2D -from .convolutional.subpixelupscaling import SubPixelUpscaling - -from .core import CosineDense - -from .crf import CRF - -from .capsule import Capsule - -from .normalization.instancenormalization import InstanceNormalization -from .normalization.groupnormalization import GroupNormalization -# -*- coding: utf-8 -*- -from __future__ import absolute_import - -from keras import backend as K -from keras import activations -from keras import regularizers -from keras import initializers -from keras import constraints -from keras.layers import Layer -from keras_contrib.utils.test_utils import to_tuple - - -class Capsule(Layer): - """Capsule Layer implementation in Keras - - This implementation is based on Dynamic Routing of Capsules, - Geoffrey Hinton et. al. - - The Capsule Layer is a Neural Network Layer which helps - modeling relationships in image and sequential data better - than just CNNs or RNNs. It achieves this by understanding - the spatial relationships between objects (in images) - or words (in text) by encoding additional information - about the image or text, such as angle of rotation, - thickness and brightness, relative proportions etc. - This layer can be used instead of pooling layers to - lower dimensions and still capture important information - about the relationships and structures within the data. - A normal pooling layer would lose a lot of - this information. - - This layer can be used on the output of any layer - which has a 3-D output (including batch_size). For example, - in image classification, it can be used on the output of a - Conv2D layer for Computer Vision applications. Also, - it can be used on the output of a GRU or LSTM Layer - (Bidirectional or Unidirectional) for NLP applications. - - The default activation function is 'linear'. But, this layer - is generally used with the 'squash' activation function - (recommended). To use the squash activation function, do : - - from keras_contrib.activations import squash - - capsule = Capsule(num_capsule=10, - dim_capsule=10, - routings=3, - share_weights=True, - activation=squash) - - # Example usage : - 1). COMPUTER VISION - - input_image = Input(shape=(None, None, 3)) - - conv_2d = Conv2D(64, - (3, 3), - activation='relu')(input_image) - - capsule = Capsule(num_capsule=10, - dim_capsule=16, - routings=3, - activation='relu', - share_weights=True)(conv_2d) - - 2). NLP - - maxlen = 72 - max_features = 120000 - input_text = Input(shape=(maxlen,)) - - embedding = Embedding(max_features, - embed_size, - weights=[embedding_matrix], - trainable=False)(input_text) - - bi_gru = Bidirectional(GRU(64, - return_seqeunces=True))(embedding) - - capsule = Capsule(num_capsule=5, - dim_capsule=5, - routings=4, - activation='sigmoid', - share_weights=True)(bi_gru) - - # Arguments - num_capsule : Number of Capsules (int) - dim_capsules : Dimensions of the vector output of each Capsule (int) - routings : Number of dynamic routings in the Capsule Layer (int) - share_weights : Whether to share weights between Capsules or not - (boolean) - activation : Activation function for the Capsules - regularizer : Regularizer for the weights of the Capsules - initializer : Initializer for the weights of the Caspules - constraint : Constraint for the weights of the Capsules - - # Input shape - 3D tensor with shape: - (batch_size, input_num_capsule, input_dim_capsule) - [any 3-D Tensor with the first dimension as batch_size] - - # Output shape - 3D tensor with shape: - (batch_size, num_capsule, dim_capsule) - - # References - - [Dynamic-Routing-Between-Capsules] - (https://arxiv.org/pdf/1710.09829.pdf) - - [Keras-Examples-CIFAR10-CNN-Capsule]""" - - def __init__(self, - num_capsule, - dim_capsule, - routings=3, - share_weights=True, - initializer='glorot_uniform', - activation=None, - regularizer=None, - constraint=None, - **kwargs): - super(Capsule, self).__init__(**kwargs) - self.num_capsule = num_capsule - self.dim_capsule = dim_capsule - self.routings = routings - self.share_weights = share_weights - - self.activation = activations.get(activation) - self.regularizer = regularizers.get(regularizer) - self.initializer = initializers.get(initializer) - self.constraint = constraints.get(constraint) - - def build(self, input_shape): - input_shape = to_tuple(input_shape) - input_dim_capsule = input_shape[-1] - if self.share_weights: - self.W = self.add_weight(name='capsule_kernel', - shape=(1, - input_dim_capsule, - self.num_capsule * - self.dim_capsule), - initializer=self.initializer, - regularizer=self.regularizer, - constraint=self.constraint, - trainable=True) - else: - input_num_capsule = input_shape[-2] - self.W = self.add_weight(name='capsule_kernel', - shape=(input_num_capsule, - input_dim_capsule, - self.num_capsule * - self.dim_capsule), - initializer=self.initializer, - regularizer=self.regularizer, - constraint=self.constraint, - trainable=True) - - self.build = True - - def call(self, inputs): - if self.share_weights: - u_hat_vectors = K.conv1d(inputs, self.W) - else: - u_hat_vectors = K.local_conv1d(inputs, self.W, [1], [1]) - - # u_hat_vectors : The spatially transformed input vectors (with local_conv_1d) - - batch_size = K.shape(inputs)[0] - input_num_capsule = K.shape(inputs)[1] - u_hat_vectors = K.reshape(u_hat_vectors, (batch_size, - input_num_capsule, - self.num_capsule, - self.dim_capsule)) - - u_hat_vectors = K.permute_dimensions(u_hat_vectors, (0, 2, 1, 3)) - routing_weights = K.zeros_like(u_hat_vectors[:, :, :, 0]) - - for i in range(self.routings): - capsule_weights = K.softmax(routing_weights, 1) - outputs = K.batch_dot(capsule_weights, u_hat_vectors, [2, 2]) - if K.ndim(outputs) == 4: - outputs = K.sum(outputs, axis=1) - if i < self.routings - 1: - outputs = K.l2_normalize(outputs, -1) - routing_weights = K.batch_dot(outputs, u_hat_vectors, [2, 3]) - if K.ndim(routing_weights) == 4: - routing_weights = K.sum(routing_weights, axis=1) - - return self.activation(outputs) - - def compute_output_shape(self, input_shape): - return (None, self.num_capsule, self.dim_capsule) - - def get_config(self): - config = {'num_capsule': self.num_capsule, - 'dim_capsule': self.dim_capsule, - 'routings': self.routings, - 'share_weights': self.share_weights, - 'activation': activations.serialize(self.activation), - 'regularizer': regularizers.serialize(self.regularizer), - 'initializer': initializers.serialize(self.initializer), - 'constraint': constraints.serialize(self.constraint)} - - base_config = super(Capsule, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from __future__ import division - -from keras import backend as K -from keras import activations -from keras import initializers -from keras import regularizers -from keras import constraints -from keras.layers import InputSpec -from keras.layers import Layer -from keras_contrib.utils.test_utils import to_tuple - - -class CosineDense(Layer): - """A cosine normalized densely-connected NN layer - - # Example - - ```python - # as first layer in a sequential model: - model = Sequential() - model.add(CosineDense(32, input_dim=16)) - # now the model will take as input arrays of shape (*, 16) - # and output arrays of shape (*, 32) - - # this is equivalent to the above: - model = Sequential() - model.add(CosineDense(32, input_shape=(16,))) - - # after the first layer, you don't need to specify - # the size of the input anymore: - model.add(CosineDense(32)) - - # Note that a regular Dense layer may work better as the final layer - ``` - - # Arguments - units: Positive integer, dimensionality of the output space. - init: name of initialization function for the weights of the layer - (see [initializers](https://keras.io/initializers)), - or alternatively, Theano function to use for weights - initialization. This parameter is only relevant - if you don't pass a `weights` argument. - activation: name of activation function to use - (see [activations](https://keras.io/activations)), - or alternatively, elementwise Python function. - If you don't specify anything, no activation is applied - (ie. "linear" activation: a(x) = x). - weights: list of Numpy arrays to set as initial weights. - The list should have 2 elements, of shape `(input_dim, units)` - and (units,) for weights and biases respectively. - kernel_regularizer: instance of [WeightRegularizer]( - https://keras.io/regularizers) - (eg. L1 or L2 regularization), applied to the main weights matrix. - bias_regularizer: instance of [WeightRegularizer]( - https://keras.io/regularizers), applied to the bias. - activity_regularizer: instance of [ActivityRegularizer]( - https://keras.io/regularizers), applied to the network output. - kernel_constraint: instance of the [constraints]( - https://keras.io/constraints/) module - (eg. maxnorm, nonneg), applied to the main weights matrix. - bias_constraint: instance of the [constraints]( - https://keras.io/constraints/) module, applied to the bias. - use_bias: whether to include a bias - (i.e. make the layer affine rather than linear). - input_dim: dimensionality of the input (integer). This argument - (or alternatively, the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - - # Input shape - nD tensor with shape: `(nb_samples, ..., input_dim)`. - The most common situation would be - a 2D input with shape `(nb_samples, input_dim)`. - - # Output shape - nD tensor with shape: `(nb_samples, ..., units)`. - For instance, for a 2D input with shape `(nb_samples, input_dim)`, - the output would have shape `(nb_samples, units)`. - - # References - - [Cosine Normalization: Using Cosine Similarity Instead - of Dot Product in Neural Networks](https://arxiv.org/pdf/1702.05870.pdf) - """ - - def __init__(self, units, kernel_initializer='glorot_uniform', - activation=None, weights=None, - kernel_regularizer=None, bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, bias_constraint=None, - use_bias=True, **kwargs): - if 'input_shape' not in kwargs and 'input_dim' in kwargs: - kwargs['input_shape'] = (kwargs.pop('input_dim'),) - - self.kernel_initializer = initializers.get(kernel_initializer) - self.activation = activations.get(activation) - self.units = units - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.use_bias = use_bias - self.initial_weights = weights - super(CosineDense, self).__init__(**kwargs) - - def build(self, input_shape): - input_shape = to_tuple(input_shape) - ndim = len(input_shape) - assert ndim >= 2 - input_dim = input_shape[-1] - self.input_dim = input_dim - self.input_spec = [InputSpec(dtype=K.floatx(), - ndim=ndim)] - - self.kernel = self.add_weight(shape=(input_dim, self.units), - initializer=self.kernel_initializer, - name='{}_W'.format(self.name), - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(self.units,), - initializer='zero', - name='{}_b'.format(self.name), - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - - if self.initial_weights is not None: - self.set_weights(self.initial_weights) - del self.initial_weights - self.built = True - - def call(self, x, mask=None): - if self.use_bias: - b, xb = self.bias, 1. - else: - b, xb = 0., 0. - - xnorm = K.sqrt(K.sum(K.square(x), axis=-1, keepdims=True) - + xb - + K.epsilon()) - Wnorm = K.sqrt(K.sum(K.square(self.kernel), axis=0) - + K.square(b) - + K.epsilon()) - - xWnorm = (xnorm * Wnorm) - - output = K.dot(x, self.kernel) / xWnorm - if self.use_bias: - output += (self.bias / xWnorm) - return self.activation(output) - - def compute_output_shape(self, input_shape): - assert input_shape - assert len(input_shape) >= 2 - assert input_shape[-1] - output_shape = list(input_shape) - output_shape[-1] = self.units - return tuple(output_shape) - - def get_config(self): - config = { - 'units': self.units, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'activation': activations.serialize(self.activation), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'use_bias': self.use_bias - } - base_config = super(CosineDense, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from __future__ import absolute_import -from __future__ import division - -import warnings - -from keras import backend as K -from keras import activations -from keras import initializers -from keras import regularizers -from keras import constraints -from keras.layers import Layer -from keras.layers import InputSpec - -from keras_contrib.losses import crf_loss -from keras_contrib.metrics import crf_marginal_accuracy -from keras_contrib.metrics import crf_viterbi_accuracy -from keras_contrib.utils.test_utils import to_tuple - - -class CRF(Layer): - """An implementation of linear chain conditional random field (CRF). - - An linear chain CRF is defined to maximize the following likelihood function: - - $$ L(W, U, b; y_1, ..., y_n) := \frac{1}{Z} - \sum_{y_1, ..., y_n} \exp(-a_1' y_1 - a_n' y_n - - \sum_{k=1^n}((f(x_k' W + b) y_k) + y_1' U y_2)), $$ - - where: - $Z$: normalization constant - $x_k, y_k$: inputs and outputs - - This implementation has two modes for optimization: - 1. (`join mode`) optimized by maximizing join likelihood, - which is optimal in theory of statistics. - Note that in this case, CRF must be the output/last layer. - 2. (`marginal mode`) return marginal probabilities on each time - step and optimized via composition - likelihood (product of marginal likelihood), i.e., - using `categorical_crossentropy` loss. - Note that in this case, CRF can be either the last layer or an - intermediate layer (though not explored). - - For prediction (test phrase), one can choose either Viterbi - best path (class indices) or marginal - probabilities if probabilities are needed. - However, if one chooses *join mode* for training, - Viterbi output is typically better than marginal output, - but the marginal output will still perform - reasonably close, while if *marginal mode* is used for training, - marginal output usually performs - much better. The default behavior and `metrics.crf_accuracy` - is set according to this observation. - - In addition, this implementation supports masking and accepts either - onehot or sparse target. - - If you open a issue or a pull request about CRF, please - add 'cc @lzfelix' to notify Luiz Felix. - - - # Examples - - ```python - from keras_contrib.layers import CRF - from keras_contrib.losses import crf_loss - from keras_contrib.metrics import crf_viterbi_accuracy - - model = Sequential() - model.add(Embedding(3001, 300, mask_zero=True)(X) - - # use learn_mode = 'join', test_mode = 'viterbi', - # sparse_target = True (label indice output) - crf = CRF(10, sparse_target=True) - model.add(crf) - - # crf_accuracy is default to Viterbi acc if using join-mode (default). - # One can add crf.marginal_acc if interested, but may slow down learning - model.compile('adam', loss=crf_loss, metrics=[crf_viterbi_accuracy]) - - # y must be label indices (with shape 1 at dim 3) here, - # since `sparse_target=True` - model.fit(x, y) - - # prediction give onehot representation of Viterbi best path - y_hat = model.predict(x_test) - ``` - - The following snippet shows how to load a persisted - model that uses the CRF layer: - - ```python - from keras.models import load_model - from keras_contrib.losses import import crf_loss - from keras_contrib.metrics import crf_viterbi_accuracy - - custom_objects={'CRF': CRF, - 'crf_loss': crf_loss, - 'crf_viterbi_accuracy': crf_viterbi_accuracy} - - loaded_model = load_model('', - custom_objects=custom_objects) - ``` - - # Arguments - units: Positive integer, dimensionality of the output space. - learn_mode: Either 'join' or 'marginal'. - The former train the model by maximizing join likelihood while the latter - maximize the product of marginal likelihood over all time steps. - One should use `losses.crf_nll` for 'join' mode - and `losses.categorical_crossentropy` or - `losses.sparse_categorical_crossentropy` for - `marginal` mode. For convenience, simply - use `losses.crf_loss`, which will decide the proper loss as described. - test_mode: Either 'viterbi' or 'marginal'. - The former is recommended and as default when `learn_mode = 'join'` and - gives one-hot representation of the best path at test (prediction) time, - while the latter is recommended and chosen as default - when `learn_mode = 'marginal'`, - which produces marginal probabilities for each time step. - For evaluating metrics, one should - use `metrics.crf_viterbi_accuracy` for 'viterbi' mode and - 'metrics.crf_marginal_accuracy' for 'marginal' mode, or - simply use `metrics.crf_accuracy` for - both which automatically decides it as described. - One can also use both for evaluation at training. - sparse_target: Boolean (default False) indicating - if provided labels are one-hot or - indices (with shape 1 at dim 3). - use_boundary: Boolean (default True) indicating if trainable - start-end chain energies - should be added to model. - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - chain_initializer: Initializer for the `chain_kernel` weights matrix, - used for the CRF chain energy. - (see [initializers](../initializers.md)). - boundary_initializer: Initializer for the `left_boundary`, - 'right_boundary' weights vectors, - used for the start/left and end/right boundary energy. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - activation: Activation function to use - (see [activations](../activations.md)). - If you pass None, no activation is applied - (ie. "linear" activation: `a(x) = x`). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - chain_regularizer: Regularizer function applied to - the `chain_kernel` weights matrix - (see [regularizer](../regularizers.md)). - boundary_regularizer: Regularizer function applied to - the 'left_boundary', 'right_boundary' weight vectors - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - chain_constraint: Constraint function applied to - the `chain_kernel` weights matrix - (see [constraints](../constraints.md)). - boundary_constraint: Constraint function applied to - the `left_boundary`, `right_boundary` weights vectors - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - input_dim: dimensionality of the input (integer). - This argument (or alternatively, the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - unroll: Boolean (default False). If True, the network will be - unrolled, else a symbolic loop will be used. - Unrolling can speed-up a RNN, although it tends - to be more memory-intensive. - Unrolling is only suitable for short sequences. - - # Input shape - 3D tensor with shape `(nb_samples, timesteps, input_dim)`. - - # Output shape - 3D tensor with shape `(nb_samples, timesteps, units)`. - - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an [Embedding](embeddings.md) layer with the `mask_zero` parameter - set to `True`. - - """ - - def __init__(self, units, - learn_mode='join', - test_mode=None, - sparse_target=False, - use_boundary=True, - use_bias=True, - activation='linear', - kernel_initializer='glorot_uniform', - chain_initializer='orthogonal', - bias_initializer='zeros', - boundary_initializer='zeros', - kernel_regularizer=None, - chain_regularizer=None, - boundary_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - chain_constraint=None, - boundary_constraint=None, - bias_constraint=None, - input_dim=None, - unroll=False, - **kwargs): - super(CRF, self).__init__(**kwargs) - self.supports_masking = True - self.units = units - self.learn_mode = learn_mode - assert self.learn_mode in ['join', 'marginal'] - self.test_mode = test_mode - if self.test_mode is None: - self.test_mode = 'viterbi' if self.learn_mode == 'join' else 'marginal' - else: - assert self.test_mode in ['viterbi', 'marginal'] - self.sparse_target = sparse_target - self.use_boundary = use_boundary - self.use_bias = use_bias - - self.activation = activations.get(activation) - - self.kernel_initializer = initializers.get(kernel_initializer) - self.chain_initializer = initializers.get(chain_initializer) - self.boundary_initializer = initializers.get(boundary_initializer) - self.bias_initializer = initializers.get(bias_initializer) - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.chain_regularizer = regularizers.get(chain_regularizer) - self.boundary_regularizer = regularizers.get(boundary_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.chain_constraint = constraints.get(chain_constraint) - self.boundary_constraint = constraints.get(boundary_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.unroll = unroll - - def build(self, input_shape): - input_shape = to_tuple(input_shape) - self.input_spec = [InputSpec(shape=input_shape)] - self.input_dim = input_shape[-1] - - self.kernel = self.add_weight(shape=(self.input_dim, self.units), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.chain_kernel = self.add_weight(shape=(self.units, self.units), - name='chain_kernel', - initializer=self.chain_initializer, - regularizer=self.chain_regularizer, - constraint=self.chain_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(self.units,), - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = 0 - - if self.use_boundary: - self.left_boundary = self.add_weight(shape=(self.units,), - name='left_boundary', - initializer=self.boundary_initializer, - regularizer=self.boundary_regularizer, - constraint=self.boundary_constraint) - self.right_boundary = self.add_weight(shape=(self.units,), - name='right_boundary', - initializer=self.boundary_initializer, - regularizer=self.boundary_regularizer, - constraint=self.boundary_constraint) - self.built = True - - def call(self, X, mask=None): - if mask is not None: - assert K.ndim( - mask) == 2, 'Input mask to CRF must have dim 2 if not None' - - if self.test_mode == 'viterbi': - test_output = self.viterbi_decoding(X, mask) - else: - test_output = self.get_marginal_prob(X, mask) - - self.uses_learning_phase = True - if self.learn_mode == 'join': - train_output = K.zeros_like(K.dot(X, self.kernel)) - out = K.in_train_phase(train_output, test_output) - else: - if self.test_mode == 'viterbi': - train_output = self.get_marginal_prob(X, mask) - out = K.in_train_phase(train_output, test_output) - else: - out = test_output - return out - - def compute_output_shape(self, input_shape): - return input_shape[:2] + (self.units,) - - def compute_mask(self, input, mask=None): - if mask is not None and self.learn_mode == 'join': - return K.any(mask, axis=1) - return mask - - def get_config(self): - config = { - 'units': self.units, - 'learn_mode': self.learn_mode, - 'test_mode': self.test_mode, - 'use_boundary': self.use_boundary, - 'use_bias': self.use_bias, - 'sparse_target': self.sparse_target, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'chain_initializer': initializers.serialize(self.chain_initializer), - 'boundary_initializer': initializers.serialize( - self.boundary_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'activation': activations.serialize(self.activation), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'chain_regularizer': regularizers.serialize(self.chain_regularizer), - 'boundary_regularizer': regularizers.serialize( - self.boundary_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'chain_constraint': constraints.serialize(self.chain_constraint), - 'boundary_constraint': constraints.serialize(self.boundary_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'input_dim': self.input_dim, - 'unroll': self.unroll} - base_config = super(CRF, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @property - def loss_function(self): - warnings.warn('CRF.loss_function is deprecated ' - 'and it might be removed in the future. Please ' - 'use losses.crf_loss instead.') - return crf_loss - - @property - def accuracy(self): - warnings.warn('CRF.accuracy is deprecated and it ' - 'might be removed in the future. Please ' - 'use metrics.crf_accuracy') - if self.test_mode == 'viterbi': - return crf_viterbi_accuracy - else: - return crf_marginal_accuracy - - @property - def viterbi_acc(self): - warnings.warn('CRF.viterbi_acc is deprecated and it might ' - 'be removed in the future. Please ' - 'use metrics.viterbi_acc instead.') - return crf_viterbi_accuracy - - @property - def marginal_acc(self): - warnings.warn('CRF.moarginal_acc is deprecated and it ' - 'might be removed in the future. Please ' - 'use metrics.marginal_acc instead.') - return crf_marginal_accuracy - - @staticmethod - def softmaxNd(x, axis=-1): - m = K.max(x, axis=axis, keepdims=True) - exp_x = K.exp(x - m) - prob_x = exp_x / K.sum(exp_x, axis=axis, keepdims=True) - return prob_x - - @staticmethod - def shift_left(x, offset=1): - assert offset > 0 - return K.concatenate([x[:, offset:], K.zeros_like(x[:, :offset])], axis=1) - - @staticmethod - def shift_right(x, offset=1): - assert offset > 0 - return K.concatenate([K.zeros_like(x[:, :offset]), x[:, :-offset]], axis=1) - - def add_boundary_energy(self, energy, mask, start, end): - start = K.expand_dims(K.expand_dims(start, 0), 0) - end = K.expand_dims(K.expand_dims(end, 0), 0) - if mask is None: - energy = K.concatenate([energy[:, :1, :] + start, energy[:, 1:, :]], - axis=1) - energy = K.concatenate([energy[:, :-1, :], energy[:, -1:, :] + end], - axis=1) - else: - mask = K.expand_dims(K.cast(mask, K.floatx())) - start_mask = K.cast( - K.greater(mask, self.shift_right(mask)), K.floatx()) - end_mask = K.cast( - K.greater(self.shift_left(mask), mask), K.floatx()) - energy = energy + start_mask * start - energy = energy + end_mask * end - return energy - - def get_log_normalization_constant(self, input_energy, mask, **kwargs): - """Compute logarithm of the normalization constant Z, where - Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ - """ - # should have logZ[:, i] == logZ[:, j] for any i, j - logZ = self.recursion(input_energy, mask, - return_sequences=False, **kwargs) - return logZ[:, 0] - - def get_energy(self, y_true, input_energy, mask): - """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3 - """ - input_energy = K.sum(input_energy * y_true, 2) # (B, T) - # (B, T-1) - chain_energy = K.sum(K.dot(y_true[:, :-1, :], - self.chain_kernel) * y_true[:, 1:, :], 2) - - if mask is not None: - mask = K.cast(mask, K.floatx()) - # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding - chain_mask = mask[:, :-1] * mask[:, 1:] - input_energy = input_energy * mask - chain_energy = chain_energy * chain_mask - total_energy = K.sum(input_energy, -1) + \ - K.sum(chain_energy, -1) # (B, ) - - return total_energy - - def get_negative_log_likelihood(self, y_true, X, mask): - """Compute the loss, i.e., negative log likelihood (normalize by number of time steps) - likelihood = 1/Z * exp(-E) -> neg_log_like = - log(1/Z * exp(-E)) = logZ + E - """ - input_energy = self.activation(K.dot(X, self.kernel) + self.bias) - if self.use_boundary: - input_energy = self.add_boundary_energy(input_energy, mask, - self.left_boundary, - self.right_boundary) - energy = self.get_energy(y_true, input_energy, mask) - logZ = self.get_log_normalization_constant(input_energy, mask, - input_length=K.int_shape(X)[1]) - nloglik = logZ + energy - if mask is not None: - nloglik = nloglik / K.sum(K.cast(mask, K.floatx()), 1) - else: - nloglik = nloglik / K.cast(K.shape(X)[1], K.floatx()) - return nloglik - - def step(self, input_energy_t, states, return_logZ=True): - # not in the following `prev_target_val` has shape = (B, F) - # where B = batch_size, F = output feature dim - # Note: `i` is of float32, due to the behavior of `K.rnn` - prev_target_val, i, chain_energy = states[:3] - t = K.cast(i[0, 0], dtype='int32') - if len(states) > 3: - if K.backend() == 'theano': - m = states[3][:, t:(t + 2)] - else: - m = K.slice(states[3], [0, t], [-1, 2]) - input_energy_t = input_energy_t * K.expand_dims(m[:, 0]) - # (1, F, F)*(B, 1, 1) -> (B, F, F) - chain_energy = chain_energy * K.expand_dims( - K.expand_dims(m[:, 0] * m[:, 1])) - if return_logZ: - # shapes: (1, B, F) + (B, F, 1) -> (B, F, F) - energy = chain_energy + \ - K.expand_dims(input_energy_t - prev_target_val, 2) - new_target_val = K.logsumexp(-energy, 1) # shapes: (B, F) - return new_target_val, [new_target_val, i + 1] - else: - energy = chain_energy + \ - K.expand_dims(input_energy_t + prev_target_val, 2) - min_energy = K.min(energy, 1) - # cast for tf-version `K.rnn - argmin_table = K.cast(K.argmin(energy, 1), K.floatx()) - return argmin_table, [min_energy, i + 1] - - def recursion(self, input_energy, mask=None, go_backwards=False, - return_sequences=True, return_logZ=True, input_length=None): - """Forward (alpha) or backward (beta) recursion - - If `return_logZ = True`, compute the logZ, the normalization constant: - - \[ Z = \sum_{y1, y2, y3} exp(-E) # energy - = \sum_{y1, y2, y3} exp(-(u1' y1 + y1' W y2 + u2' y2 + y2' W y3 + u3' y3)) - = sum_{y2, y3} (exp(-(u2' y2 + y2' W y3 + u3' y3)) - sum_{y1} exp(-(u1' y1' + y1' W y2))) \] - - Denote: - \[ S(y2) := sum_{y1} exp(-(u1' y1 + y1' W y2)), \] - \[ Z = sum_{y2, y3} exp(log S(y2) - (u2' y2 + y2' W y3 + u3' y3)) \] - \[ logS(y2) = log S(y2) = log_sum_exp(-(u1' y1' + y1' W y2)) \] - Note that: - yi's are one-hot vectors - u1, u3: boundary energies have been merged - - If `return_logZ = False`, compute the Viterbi's best path lookup table. - """ - chain_energy = self.chain_kernel - # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t - chain_energy = K.expand_dims(chain_energy, 0) - # shape=(B, F), dtype=float32 - prev_target_val = K.zeros_like(input_energy[:, 0, :]) - - if go_backwards: - input_energy = K.reverse(input_energy, 1) - if mask is not None: - mask = K.reverse(mask, 1) - - initial_states = [prev_target_val, - K.zeros_like(prev_target_val[:, :1])] - constants = [chain_energy] - - if mask is not None: - mask2 = K.cast(K.concatenate([mask, K.zeros_like(mask[:, :1])], axis=1), - K.floatx()) - constants.append(mask2) - - def _step(input_energy_i, states): - return self.step(input_energy_i, states, return_logZ) - - target_val_last, target_val_seq, _ = K.rnn(_step, input_energy, - initial_states, - constants=constants, - input_length=input_length, - unroll=self.unroll) - - if return_sequences: - if go_backwards: - target_val_seq = K.reverse(target_val_seq, 1) - return target_val_seq - else: - return target_val_last - - def forward_recursion(self, input_energy, **kwargs): - return self.recursion(input_energy, **kwargs) - - def backward_recursion(self, input_energy, **kwargs): - return self.recursion(input_energy, go_backwards=True, **kwargs) - - def get_marginal_prob(self, X, mask=None): - input_energy = self.activation(K.dot(X, self.kernel) + self.bias) - if self.use_boundary: - input_energy = self.add_boundary_energy(input_energy, mask, - self.left_boundary, - self.right_boundary) - input_length = K.int_shape(X)[1] - alpha = self.forward_recursion(input_energy, mask=mask, - input_length=input_length) - beta = self.backward_recursion(input_energy, mask=mask, - input_length=input_length) - if mask is not None: - input_energy = input_energy * \ - K.expand_dims(K.cast(mask, K.floatx())) - margin = -(self.shift_right(alpha) + - input_energy + self.shift_left(beta)) - return self.softmaxNd(margin) - - def viterbi_decoding(self, X, mask=None): - input_energy = self.activation(K.dot(X, self.kernel) + self.bias) - if self.use_boundary: - input_energy = self.add_boundary_energy( - input_energy, mask, self.left_boundary, self.right_boundary) - - argmin_tables = self.recursion(input_energy, mask, return_logZ=False) - argmin_tables = K.cast(argmin_tables, 'int32') - - # backward to find best path, `initial_best_idx` can be any, - # as all elements in the last argmin_table are the same - argmin_tables = K.reverse(argmin_tables, 1) - # matrix instead of vector is required by tf `K.rnn` - initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] - if K.backend() == 'theano': - from theano import tensor as T - initial_best_idx = [T.unbroadcast(initial_best_idx[0], 1)] - - def gather_each_row(params, indices): - n = K.shape(indices)[0] - if K.backend() == 'theano': - from theano import tensor as T - return params[T.arange(n), indices] - elif K.backend() == 'tensorflow': - import tensorflow as tf - indices = K.transpose(K.stack([tf.range(n), indices])) - return tf.gather_nd(params, indices) - else: - raise NotImplementedError - - def find_path(argmin_table, best_idx): - next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) - next_best_idx = K.expand_dims(next_best_idx) - if K.backend() == 'theano': - from theano import tensor as T - next_best_idx = T.unbroadcast(next_best_idx, 1) - return next_best_idx, [next_best_idx] - - _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, - input_length=K.int_shape(X)[1], unroll=self.unroll) - best_paths = K.reverse(best_paths, 1) - best_paths = K.squeeze(best_paths, 2) - - return K.one_hot(best_paths, self.units) -from .dssim import DSSIMObjective -from .jaccard import jaccard_distance -from .crf_losses import crf_loss, crf_nll -from keras import backend as K -from keras.losses import categorical_crossentropy -from keras.losses import sparse_categorical_crossentropy - - -def crf_nll(y_true, y_pred): - """The negative log-likelihood for linear chain Conditional Random Field (CRF). - - This loss function is only used when the `layers.CRF` layer - is trained in the "join" mode. - - # Arguments - y_true: tensor with true targets. - y_pred: tensor with predicted targets. - - # Returns - A scalar representing corresponding to the negative log-likelihood. - - # Raises - TypeError: If CRF is not the last layer. - - # About GitHub - If you open an issue or a pull request about CRF, please - add `cc @lzfelix` to notify Luiz Felix. - """ - - crf, idx = y_pred._keras_history[:2] - if crf._outbound_nodes: - raise TypeError('When learn_model="join", CRF must be the last layer.') - if crf.sparse_target: - y_true = K.one_hot(K.cast(y_true[:, :, 0], 'int32'), crf.units) - X = crf._inbound_nodes[idx].input_tensors[0] - mask = crf._inbound_nodes[idx].input_masks[0] - nloglik = crf.get_negative_log_likelihood(y_true, X, mask) - return nloglik - - -def crf_loss(y_true, y_pred): - """General CRF loss function depending on the learning mode. - - # Arguments - y_true: tensor with true targets. - y_pred: tensor with predicted targets. - - # Returns - If the CRF layer is being trained in the join mode, returns the negative - log-likelihood. Otherwise returns the categorical crossentropy implemented - by the underlying Keras backend. - - # About GitHub - If you open an issue or a pull request about CRF, please - add `cc @lzfelix` to notify Luiz Felix. - """ - crf, idx = y_pred._keras_history[:2] - if crf.learn_mode == 'join': - return crf_nll(y_true, y_pred) - else: - if crf.sparse_target: - return sparse_categorical_crossentropy(y_true, y_pred) - else: - return categorical_crossentropy(y_true, y_pred) -from __future__ import absolute_import -import keras_contrib.backend as KC -from keras import backend as K - - -class DSSIMObjective: - """Difference of Structural Similarity (DSSIM loss function). - Clipped between 0 and 0.5 - - Note : You should add a regularization term like a l2 loss in addition to this one. - Note : In theano, the `kernel_size` must be a factor of the output size. So 3 could - not be the `kernel_size` for an output of 32. - - # Arguments - k1: Parameter of the SSIM (default 0.01) - k2: Parameter of the SSIM (default 0.03) - kernel_size: Size of the sliding window (default 3) - max_value: Max value of the output (default 1.0) - """ - - def __init__(self, k1=0.01, k2=0.03, kernel_size=3, max_value=1.0): - self.__name__ = 'DSSIMObjective' - self.kernel_size = kernel_size - self.k1 = k1 - self.k2 = k2 - self.max_value = max_value - self.c1 = (self.k1 * self.max_value) ** 2 - self.c2 = (self.k2 * self.max_value) ** 2 - self.dim_ordering = K.image_data_format() - self.backend = K.backend() - - def __int_shape(self, x): - return K.int_shape(x) if self.backend == 'tensorflow' else K.shape(x) - - def __call__(self, y_true, y_pred): - # There are additional parameters for this function - # Note: some of the 'modes' for edge behavior do not yet have a - # gradient definition in the Theano tree - # and cannot be used for learning - - kernel = [self.kernel_size, self.kernel_size] - y_true = K.reshape(y_true, [-1] + list(self.__int_shape(y_pred)[1:])) - y_pred = K.reshape(y_pred, [-1] + list(self.__int_shape(y_pred)[1:])) - - patches_pred = KC.extract_image_patches(y_pred, kernel, kernel, 'valid', - self.dim_ordering) - patches_true = KC.extract_image_patches(y_true, kernel, kernel, 'valid', - self.dim_ordering) - - # Reshape to get the var in the cells - bs, w, h, c1, c2, c3 = self.__int_shape(patches_pred) - patches_pred = K.reshape(patches_pred, [-1, w, h, c1 * c2 * c3]) - patches_true = K.reshape(patches_true, [-1, w, h, c1 * c2 * c3]) - # Get mean - u_true = K.mean(patches_true, axis=-1) - u_pred = K.mean(patches_pred, axis=-1) - # Get variance - var_true = K.var(patches_true, axis=-1) - var_pred = K.var(patches_pred, axis=-1) - # Get std dev - covar_true_pred = K.mean( - patches_true * patches_pred, axis=-1) - u_true * u_pred - - ssim = (2 * u_true * u_pred + self.c1) * \ - (2 * covar_true_pred + self.c2) - denom = ((K.square(u_true) - + K.square(u_pred) - + self.c1) * (var_pred + var_true + self.c2)) - ssim /= denom # no need for clipping, c1 and c2 make the denom non-zero - return K.mean((1.0 - ssim) / 2.0) -from keras import backend as K - - -def jaccard_distance(y_true, y_pred, smooth=100): - """Jaccard distance for semantic segmentation. - - Also known as the intersection-over-union loss. - - This loss is useful when you have unbalanced numbers of pixels within an image - because it gives all classes equal weight. However, it is not the defacto - standard for image segmentation. - - For example, assume you are trying to predict if - each pixel is cat, dog, or background. - You have 80% background pixels, 10% dog, and 10% cat. - If the model predicts 100% background - should it be be 80% right (as with categorical cross entropy) - or 30% (with this loss)? - - The loss has been modified to have a smooth gradient as it converges on zero. - This has been shifted so it converges on 0 and is smoothed to avoid exploding - or disappearing gradient. - - Jaccard = (|X & Y|)/ (|X|+ |Y| - |X & Y|) - = sum(|A*B|)/(sum(|A|)+sum(|B|)-sum(|A*B|)) - - # Arguments - y_true: The ground truth tensor. - y_pred: The predicted tensor - smooth: Smoothing factor. Default is 100. - - # Returns - The Jaccard distance between the two tensors. - - # References - - [What is a good evaluation measure for semantic segmentation?]( - http://www.bmva.org/bmvc/2013/Papers/paper0032/paper0032.pdf) - - """ - intersection = K.sum(K.abs(y_true * y_pred), axis=-1) - sum_ = K.sum(K.abs(y_true) + K.abs(y_pred), axis=-1) - jac = (intersection + smooth) / (sum_ - intersection + smooth) - return (1 - jac) * smooth -from .crf_accuracies import crf_accuracy, crf_marginal_accuracy -from .crf_accuracies import crf_viterbi_accuracy -from keras import backend as K - - -def _get_accuracy(y_true, y_pred, mask, sparse_target=False): - y_pred = K.argmax(y_pred, -1) - if sparse_target: - y_true = K.cast(y_true[:, :, 0], K.dtype(y_pred)) - else: - y_true = K.argmax(y_true, -1) - judge = K.cast(K.equal(y_pred, y_true), K.floatx()) - if mask is None: - return K.mean(judge) - else: - mask = K.cast(mask, K.floatx()) - return K.sum(judge * mask) / K.sum(mask) - - -def crf_viterbi_accuracy(y_true, y_pred): - '''Use Viterbi algorithm to get best path, and compute its accuracy. - `y_pred` must be an output from CRF.''' - crf, idx = y_pred._keras_history[:2] - X = crf._inbound_nodes[idx].input_tensors[0] - mask = crf._inbound_nodes[idx].input_masks[0] - y_pred = crf.viterbi_decoding(X, mask) - return _get_accuracy(y_true, y_pred, mask, crf.sparse_target) - - -def crf_marginal_accuracy(y_true, y_pred): - '''Use time-wise marginal argmax as prediction. - `y_pred` must be an output from CRF with `learn_mode="marginal"`.''' - crf, idx = y_pred._keras_history[:2] - X = crf._inbound_nodes[idx].input_tensors[0] - mask = crf._inbound_nodes[idx].input_masks[0] - y_pred = crf.get_marginal_prob(X, mask) - return _get_accuracy(y_true, y_pred, mask, crf.sparse_target) - - -def crf_accuracy(y_true, y_pred): - '''Ge default accuracy based on CRF `test_mode`.''' - crf, idx = y_pred._keras_history[:2] - if crf.test_mode == 'viterbi': - return crf_viterbi_accuracy(y_true, y_pred) - else: - return crf_marginal_accuracy(y_true, y_pred) -from .ftml import FTML -from .padam import Padam -from .yogi import Yogi -from .lars import LARS - -# aliases -ftml = FTML -lars = LARS -from __future__ import absolute_import -from keras.optimizers import Optimizer -from keras import backend as K - - -class FTML(Optimizer): - """FTML optimizer. - - # Arguments - lr: float >= 0. Learning rate. - beta_1: float, 0 < beta < 1. Generally close to 0.5. - beta_2: float, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. - decay: float >= 0. Learning rate decay over each update. - - # References - - [FTML - Follow the Moving Leader in Deep Learning]( - http://www.cse.ust.hk/~szhengac/papers/icml17.pdf) - """ - - def __init__(self, lr=0.0025, beta_1=0.6, beta_2=0.999, - epsilon=1e-8, decay=0., **kwargs): - super(FTML, self).__init__(**kwargs) - self.__dict__.update(locals()) - self.iterations = K.variable(0) - self.lr = K.variable(lr) - self.beta_1 = K.variable(beta_1) - self.beta_2 = K.variable(beta_2) - self.decay = K.variable(decay) - self.epsilon = epsilon - self.inital_decay = decay - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.inital_decay > 0: - lr *= (1. / (1. + self.decay * self.iterations)) - - t = self.iterations + 1 - - lr_t = lr / (1. - K.pow(self.beta_1, t)) - - shapes = [K.int_shape(p) for p in params] - zs = [K.zeros(shape) for shape in shapes] - vs = [K.zeros(shape) for shape in shapes] - ds = [K.zeros(shape) for shape in shapes] - self.weights = [self.iterations] + zs + vs + ds - - for p, g, z, v, d in zip(params, grads, zs, vs, ds): - v_t = self.beta_2 * v + (1. - self.beta_2) * K.square(g) - d_t = (K.sqrt(v_t / (1. - K.pow(self.beta_2, t))) - + self.epsilon) / lr_t - sigma_t = d_t - self.beta_1 * d - z_t = self.beta_1 * z + (1. - self.beta_1) * g - sigma_t * p - - p_t = - z_t / d_t - - self.updates.append(K.update(z, z_t)) - self.updates.append(K.update(v, v_t)) - self.updates.append(K.update(d, d_t)) - - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'beta_1': float(K.get_value(self.beta_1)), - 'beta_2': float(K.get_value(self.beta_2)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon} - base_config = super(FTML, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from keras import backend as K -from keras.optimizers import Optimizer - - -class LARS(Optimizer): - """Layer-wise Adaptive Rate Scaling for large batch training. - Introduced by "Large Batch Training of Convolutional Networks" by Y. You, - I. Gitman, and B. Ginsburg. (https://arxiv.org/abs/1708.03888) - Implements the LARS learning rate scheme presented in the paper above. This - optimizer is useful when scaling the batch size to up to 32K without - significant performance degradation. It is recommended to use the optimizer - in conjunction with: - - Gradual learning rate warm-up - - Linear learning rate scaling - - Poly rule learning rate decay - Note, LARS scaling is currently only enabled for dense tensors. - - Args: - lr: A `Tensor` or floating point value. The base learning rate. - momentum: A floating point value. Momentum hyperparameter. - weight_decay: A floating point value. Weight decay hyperparameter. - eeta: LARS coefficient as used in the paper. Dfault set to LARS - coefficient from the paper. (eeta / weight_decay) determines the - highest scaling factor in LARS. - epsilon: Optional epsilon parameter to be set in models that have very - small gradients. Default set to 0.0. - nesterov: when set to True, nesterov momentum will be enabled - """ - - def __init__(self, - lr, - momentum=0.9, - weight_decay=0.0001, - eeta=0.001, - epsilon=0.0, - nesterov=False, - **kwargs): - - if momentum < 0.0: - raise ValueError("momentum should be positive: %s" % momentum) - if weight_decay < 0.0: - raise ValueError("weight_decay is not positive: %s" % weight_decay) - super(LARS, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.lr = K.variable(lr, name='lr') - self.momentum = K.variable(momentum, name='momentum') - self.weight_decay = K.variable(weight_decay, name='weight_decay') - self.eeta = K.variable(eeta, name='eeta') - self.epsilon = epsilon - self.nesterov = nesterov - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - weights = self.get_weights() - self.updates = [K.update_add(self.iterations, 1)] - scaled_lr = self.lr - w_norm = K.sqrt(K.sum([K.sum(K.square(weight)) - for weight in weights])) - g_norm = K.sqrt(K.sum([K.sum(K.square(grad)) - for grad in grads])) - scaled_lr = K.switch(K.greater(w_norm * g_norm, K.zeros([1])), - K.expand_dims((self.eeta * w_norm / - (g_norm + self.weight_decay * w_norm + - self.epsilon)) * self.lr), - K.ones([1]) * self.lr) - if K.backend() == 'theano': - # otherwise theano raise broadcasting error - scaled_lr = scaled_lr[0] - # momentum - moments = [K.zeros(K.int_shape(param), dtype=K.dtype(param)) - for param in params] - self.weights = [self.iterations] + moments - for param, grad, moment in zip(params, grads, moments): - v0 = (moment * self.momentum) - v1 = scaled_lr * grad # velocity - veloc = v0 - v1 - self.updates.append(K.update(moment, veloc)) - - if self.nesterov: - new_param = param + (veloc * self.momentum) - v1 - else: - new_param = param + veloc - - # Apply constraints. - if getattr(param, 'constraint', None) is not None: - new_param = param.constraint(new_param) - - self.updates.append(K.update(param, new_param)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'momentum': float(K.get_value(self.momentum)), - 'weight_decay': float(K.get_value(self.weight_decay)), - 'epsilon': self.epsilon, - 'eeta': float(K.get_value(self.eeta)), - 'nesterov': self.nesterov} - base_config = super(LARS, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from keras import backend as K -from keras.optimizers import Optimizer - - -class Padam(Optimizer): - """Partially adaptive momentum estimation optimizer. - - # Arguments - lr: float >= 0. Learning rate. - beta_1: float, 0 < beta < 1. Generally close to 1. - beta_2: float, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - amsgrad: boolean. Whether to apply the AMSGrad variant of this - algorithm from the paper "On the Convergence of Adam and - Beyond". - partial: float, 0 <= partial <= 0.5 . Parameter controlling partial - momentum adaption. For `partial=0`, this optimizer behaves like SGD, - for `partial=0.5` it behaves like AMSGrad. - - # References - - [Closing the Generalization Gap of Adaptive Gradient Methods - in Training Deep Neural Networks](https://arxiv.org/pdf/1806.06763.pdf) - - """ - - def __init__(self, lr=1e-1, beta_1=0.9, beta_2=0.999, - epsilon=1e-8, decay=0., amsgrad=False, partial=1. / 8., **kwargs): - if partial < 0 or partial > 0.5: - raise ValueError( - "Padam: 'partial' must be a positive float with a maximum " - "value of `0.5`, since higher values will cause divergence " - "during training." - ) - super(Padam, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') - self.decay = K.variable(decay, name='decay') - if epsilon is None: - epsilon = K.epsilon() - self.epsilon = epsilon - self.partial = partial - self.initial_decay = decay - self.amsgrad = amsgrad - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - t = K.cast(self.iterations, K.floatx()) + 1 - lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / - (1. - K.pow(self.beta_1, t))) - - ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - if self.amsgrad: - vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - else: - vhats = [K.zeros(1) for _ in params] - self.weights = [self.iterations] + ms + vs + vhats - - for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): - m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(g) - if self.amsgrad: - vhat_t = K.maximum(vhat, v_t) - denom = (K.sqrt(vhat_t) + self.epsilon) - self.updates.append(K.update(vhat, vhat_t)) - else: - denom = (K.sqrt(v_t) + self.epsilon) - - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(v, v_t)) - - # Partial momentum adaption. - new_p = p - (lr_t * (m_t / (denom ** (self.partial * 2)))) - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'beta_1': float(K.get_value(self.beta_1)), - 'beta_2': float(K.get_value(self.beta_2)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad, - 'partial': self.partial} - base_config = super(Padam, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from keras import backend as K -from keras.optimizers import Optimizer - - -class Yogi(Optimizer): - """Yogi optimizer. - Yogi is a variation of Adam that controls the increase in effective - learning rate, which (according to the paper) leads to even better - performance than Adam with similar theoretical guarantees on convergence. - Default parameters follow those provided in the original paper, Tab.1 - # Arguments - lr: float >= 0. Learning rate. - beta_1: float, 0 < beta < 1. Generally close to 1. - beta_2: float, 0 < beta < 1. Generally close to 1. - epsilon: float >= 0. Fuzz factor. If `None`, defaults to `K.epsilon()`. - decay: float >= 0. Learning rate decay over each update. - # References - - [Adaptive Methods for Nonconvex Optimization]( - https://papers.nips.cc/paper/8186-adaptive-methods-for-nonconvex-optimization) - - If you open an issue or a pull request about the Yogi optimizer, - please add 'cc @MarcoAndreaBuchmann' to notify him. - """ - - def __init__(self, lr=0.01, beta_1=0.9, beta_2=0.999, - epsilon=1e-3, decay=0., **kwargs): - super(Yogi, self).__init__(**kwargs) - if beta_1 <= 0 or beta_1 >= 1: - raise ValueError("beta_1 has to be in ]0, 1[") - if beta_2 <= 0 or beta_2 >= 1: - raise ValueError("beta_2 has to be in ]0, 1[") - - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') - self.decay = K.variable(decay, name='decay') - if epsilon is None: - epsilon = K.epsilon() - if epsilon <= 0: - raise ValueError("epsilon has to be larger than 0") - self.epsilon = epsilon - self.initial_decay = decay - - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * K.cast(self.iterations, - K.dtype(self.decay)))) - - t = K.cast(self.iterations, K.floatx()) + 1 - lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / - (1. - K.pow(self.beta_1, t))) - - ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - vhats = [K.zeros(1) for _ in params] - self.weights = [self.iterations] + ms + vs + vhats - - for p, g, m, v, vhat in zip(params, grads, ms, vs, vhats): - g2 = K.square(g) - m_t = (self.beta_1 * m) + (1. - self.beta_1) * g - v_t = v - (1. - self.beta_2) * K.sign(v - g2) * g2 - p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - - self.updates.append(K.update(m, m_t)) - self.updates.append(K.update(v, v_t)) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append(K.update(p, new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'beta_1': float(K.get_value(self.beta_1)), - 'beta_2': float(K.get_value(self.beta_2)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon} - base_config = super(Yogi, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - -from __future__ import absolute_import -import numpy as np - -from keras import backend as K - - -def get_standard_values(): - ''' - These are just a set of floats used for testing the activation - functions, and are useful in multiple tests. - ''' - return np.array([[0, 0.1, 0.5, 0.9, 1.0]], dtype=K.floatx()) - - -def validate_activation(activation): - activation(get_standard_values()) -import numpy as np - -from keras import backend as K - -all_metrics = [] -all_sparse_metrics = [] - - -def validate_metric(metric): - y_a = K.variable(np.random.random((6, 7))) - y_b = K.variable(np.random.random((6, 7))) - output = metric(y_a, y_b) - assert K.eval(output).shape == () -from __future__ import print_function -import numpy as np - -from keras_contrib.utils import test_utils -from keras import optimizers -from keras.models import Sequential -from keras.layers import Dense, Activation -from keras.utils import to_categorical - - -def get_test_data(): - np.random.seed(1337) - (x_train, y_train), _ = test_utils.get_test_data(num_train=1000, - num_test=200, - input_shape=(10,), - classification=True, - num_classes=2) - y_train = to_categorical(y_train) - return x_train, y_train - - -def get_model(input_dim, num_hidden, output_dim): - model = Sequential() - model.add(Dense(num_hidden, input_shape=(input_dim,))) - model.add(Activation('relu')) - model.add(Dense(output_dim)) - model.add(Activation('softmax')) - return model - - -def _test_optimizer(optimizer, target=0.75): - x_train, y_train = get_test_data() - model = get_model(x_train.shape[1], 10, y_train.shape[1]) - model.compile(loss='categorical_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) - history = model.fit(x_train, y_train, epochs=2, batch_size=16, verbose=0) - assert history.history['acc'][-1] >= target - config = optimizers.serialize(optimizer) - custom_objects = {optimizer.__class__.__name__: optimizer.__class__} - optim = optimizers.deserialize(config, custom_objects) - new_config = optimizers.serialize(optim) - assert config == new_config -import numpy as np -from keras.datasets import mnist -from keras.layers import Activation -from keras.layers import Dense -from keras.models import Sequential -from keras.utils import np_utils - -np.random.seed(1337) - -nb_classes = 10 -batch_size = 128 -nb_epoch = 5 -weighted_class = 9 -standard_weight = 1 -high_weight = 5 -max_train_samples = 5000 -max_test_samples = 1000 - - -def get_data(): - # the data, shuffled and split between tran and test sets - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = X_train.reshape(60000, 784)[:max_train_samples] - X_test = X_test.reshape(10000, 784)[:max_test_samples] - X_train = X_train.astype('float32') / 255 - X_test = X_test.astype('float32') / 255 - - # convert class vectors to binary class matrices - y_train = y_train[:max_train_samples] - y_test = y_test[:max_test_samples] - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - test_ids = np.where(y_test == np.array(weighted_class))[0] - - return (X_train, Y_train), (X_test, Y_test), test_ids - - -def validate_regularizer(weight_reg=None, activity_reg=None): - model = Sequential() - model.add(Dense(50, input_shape=(784,))) - model.add(Activation('relu')) - model.add(Dense(10, W_regularizer=weight_reg, - activity_regularizer=activity_reg)) - model.add(Activation('softmax')) - return model -import keras.backend as K - - -def conv_output_length(input_length, filter_size, - padding, stride, dilation=1): - """Determines output length of a convolution given input length. - - Copy of the function of keras-team/keras because it's not in the public API - So we can't use the function in keras-team/keras to test tf.keras - - # Arguments - input_length: integer. - filter_size: integer. - padding: one of `"same"`, `"valid"`, `"full"`. - stride: integer. - dilation: dilation rate, integer. - - # Returns - The output length (integer). - """ - if input_length is None: - return None - assert padding in {'same', 'valid', 'full', 'causal'} - dilated_filter_size = filter_size + (filter_size - 1) * (dilation - 1) - if padding == 'same': - output_length = input_length - elif padding == 'valid': - output_length = input_length - dilated_filter_size + 1 - elif padding == 'causal': - output_length = input_length - elif padding == 'full': - output_length = input_length + dilated_filter_size - 1 - return (output_length + stride - 1) // stride - - -def normalize_data_format(value): - """Checks that the value correspond to a valid data format. - - Copy of the function in keras-team/keras because it's not public API. - - # Arguments - value: String or None. `'channels_first'` or `'channels_last'`. - - # Returns - A string, either `'channels_first'` or `'channels_last'` - - # Example - ```python - >>> from keras import backend as K - >>> K.normalize_data_format(None) - 'channels_first' - >>> K.normalize_data_format('channels_last') - 'channels_last' - ``` - - # Raises - ValueError: if `value` or the global `data_format` invalid. - """ - if value is None: - value = K.image_data_format() - data_format = value.lower() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('The `data_format` argument must be one of ' - '"channels_first", "channels_last". Received: ' + - str(value)) - return data_format -import warnings - -import h5py -import keras.backend as K -from keras import optimizers -from keras.engine import saving - - -def save_all_weights(model, filepath, include_optimizer=True): - """ - Save model weights and optimizer weights but not configuration to a HDF5 file. - Functionally between `save` and `save_weights`. - - The HDF5 file contains: - - the model's weights - - the model's optimizer's state (if any) - If you have a complicated model or set of models that do not serialize - to JSON correctly, use this method. - # Arguments - model: Keras model instance to be saved. - filepath: String, path where to save the model. - include_optimizer: If True, save optimizer's state together. - # Raises - ImportError: if h5py is not available. - """ - if h5py is None: - raise ImportError('`save_all_weights` requires h5py.') - - with h5py.File(filepath, 'w') as f: - model_weights_group = f.create_group('model_weights') - model_layers = model.layers - saving.save_weights_to_hdf5_group(model_weights_group, model_layers) - - if include_optimizer and hasattr(model, 'optimizer') and model.optimizer: - if isinstance(model.optimizer, optimizers.TFOptimizer): - warnings.warn( - 'TensorFlow optimizers do not ' - 'make it possible to access ' - 'optimizer attributes or optimizer state ' - 'after instantiation. ' - 'As a result, we cannot save the optimizer ' - 'as part of the model save file.' - 'You will have to compile your model again after loading it. ' - 'Prefer using a Keras optimizer instead ' - '(see keras.io/optimizers).') - else: - # Save optimizer weights. - symbolic_weights = getattr(model.optimizer, 'weights') - if symbolic_weights: - optimizer_weights_group = f.create_group( - 'optimizer_weights') - weight_values = K.batch_get_value(symbolic_weights) - weight_names = [] - for i, (w, val) in enumerate(zip(symbolic_weights, weight_values)): - # Default values of symbolic_weights is /variable for theano - if K.backend() == 'theano': - if hasattr(w, 'name') and w.name != "/variable": - name = str(w.name) - else: - name = 'param_' + str(i) - else: - if hasattr(w, 'name') and w.name: - name = str(w.name) - else: - name = 'param_' + str(i) - weight_names.append(name.encode('utf8')) - optimizer_weights_group.attrs['weight_names'] = weight_names - for name, val in zip(weight_names, weight_values): - param_dset = optimizer_weights_group.create_dataset( - name, - val.shape, - dtype=val.dtype) - if not val.shape: - # scalar - param_dset[()] = val - else: - param_dset[:] = val - - -def load_all_weights(model, filepath, include_optimizer=True): - """Loads the weights of a model saved via `save_all_weights`. - If model has been compiled, optionally load its optimizer's weights. - # Arguments - model: instantiated model with architecture matching the saved model. - Compile the model beforehand if you want to load optimizer weights. - filepath: String, path to the saved model. - # Returns - None. The model will have its weights updated. - # Raises - ImportError: if h5py is not available. - ValueError: In case of an invalid savefile. - """ - if h5py is None: - raise ImportError('`load_all_weights` requires h5py.') - - with h5py.File(filepath, mode='r') as f: - # set weights - saving.load_weights_from_hdf5_group(f['model_weights'], model.layers) - # Set optimizer weights. - if (include_optimizer - and 'optimizer_weights' in f and hasattr(model, 'optimizer') - and model.optimizer): - optimizer_weights_group = f['optimizer_weights'] - optimizer_weight_names = [n.decode('utf8') for n in - optimizer_weights_group.attrs['weight_names']] - optimizer_weight_values = [optimizer_weights_group[n] for n in - optimizer_weight_names] - model.optimizer.set_weights(optimizer_weight_values) -"""Utilities related to Keras unit tests.""" -import sys -import numpy as np -from numpy.testing import assert_allclose -import inspect - -import keras -from keras.layers import Input -from keras.models import Model -from keras import backend as K - - -def get_test_data(num_train=1000, num_test=500, input_shape=(10,), - output_shape=(2,), - classification=True, num_classes=2): - """Generates test data to train a model on. - - classification=True overrides output_shape - (i.e. output_shape is set to (1,)) and the output - consists in integers in [0, num_class-1]. - - Otherwise: float output with shape output_shape. - """ - samples = num_train + num_test - if classification: - y = np.random.randint(0, num_classes, size=(samples,)) - X = np.zeros((samples,) + input_shape) - for i in range(samples): - X[i] = np.random.normal(loc=y[i], scale=0.7, size=input_shape) - else: - y_loc = np.random.random((samples,)) - X = np.zeros((samples,) + input_shape) - y = np.zeros((samples,) + output_shape) - for i in range(samples): - X[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=input_shape) - y[i] = np.random.normal(loc=y_loc[i], scale=0.7, size=output_shape) - - return (X[:num_train], y[:num_train]), (X[num_train:], y[num_train:]) - - -def layer_test(layer_cls, kwargs={}, input_shape=None, input_dtype=None, - input_data=None, expected_output=None, - expected_output_dtype=None, fixed_batch_size=False): - """Test routine for a layer with a single input tensor - and single output tensor. - - Copy of the function in keras-team/keras because it's not in the public API. - If we use the one from keras-team/keras it won't work with tf.keras. - """ - # generate input data - if input_data is None: - assert input_shape - if not input_dtype: - input_dtype = K.floatx() - input_data_shape = list(input_shape) - for i, e in enumerate(input_data_shape): - if e is None: - input_data_shape[i] = np.random.randint(1, 4) - input_data = (10 * np.random.random(input_data_shape)) - input_data = input_data.astype(input_dtype) - else: - if input_shape is None: - input_shape = input_data.shape - if input_dtype is None: - input_dtype = input_data.dtype - if expected_output_dtype is None: - expected_output_dtype = input_dtype - - # instantiation - layer = layer_cls(**kwargs) - - # test get_weights , set_weights at layer level - weights = layer.get_weights() - layer.set_weights(weights) - - expected_output_shape = layer.compute_output_shape(input_shape) - - # test in functional API - if fixed_batch_size: - x = Input(batch_shape=input_shape, dtype=input_dtype) - else: - x = Input(shape=input_shape[1:], dtype=input_dtype) - y = layer(x) - assert K.dtype(y) == expected_output_dtype - - # check with the functional API - model = Model(x, y) - - actual_output = model.predict(input_data) - actual_output_shape = actual_output.shape - for expected_dim, actual_dim in zip(expected_output_shape, - actual_output_shape): - if expected_dim is not None: - assert expected_dim == actual_dim - - if expected_output is not None: - assert_allclose(actual_output, expected_output, rtol=1e-3) - - # test serialization, weight setting at model level - model_config = model.get_config() - custom_objects = {layer.__class__.__name__: layer.__class__} - recovered_model = model.__class__.from_config(model_config, custom_objects) - if model.weights: - weights = model.get_weights() - recovered_model.set_weights(weights) - _output = recovered_model.predict(input_data) - assert_allclose(_output, actual_output, rtol=1e-3) - - # test training mode (e.g. useful when the layer has a - # different behavior at training and testing time). - if has_arg(layer.call, 'training'): - model.compile('rmsprop', 'mse') - model.train_on_batch(input_data, actual_output) - - # test instantiation from layer config - layer_config = layer.get_config() - layer_config['batch_input_shape'] = input_shape - layer = layer.__class__.from_config(layer_config) - - # for further checks in the caller function - return actual_output - - -def has_arg(fn, name, accept_all=False): - """Checks if a callable accepts a given keyword argument. - - For Python 2, checks if there is an argument with the given name. - - For Python 3, checks if there is an argument with the given name, and - also whether this argument can be called with a keyword (i.e. if it is - not a positional-only argument). - - This function is a copy of the one in keras-team/keras because it's not - in the public API. - - # Arguments - fn: Callable to inspect. - name: Check if `fn` can be called with `name` as a keyword argument. - accept_all: What to return if there is no parameter called `name` - but the function accepts a `**kwargs` argument. - - # Returns - bool, whether `fn` accepts a `name` keyword argument. - """ - if sys.version_info < (3,): - arg_spec = inspect.getargspec(fn) - if accept_all and arg_spec.keywords is not None: - return True - return name in arg_spec.args - elif sys.version_info < (3, 3): - arg_spec = inspect.getfullargspec(fn) - if accept_all and arg_spec.varkw is not None: - return True - return (name in arg_spec.args or - name in arg_spec.kwonlyargs) - else: - signature = inspect.signature(fn) - parameter = signature.parameters.get(name) - if parameter is None: - if accept_all: - for param in signature.parameters.values(): - if param.kind == inspect.Parameter.VAR_KEYWORD: - return True - return False - return (parameter.kind in (inspect.Parameter.POSITIONAL_OR_KEYWORD, - inspect.Parameter.KEYWORD_ONLY)) - - -def to_list(x, allow_tuple=False): - if isinstance(x, list): - return x - if allow_tuple and isinstance(x, tuple): - return list(x) - return [x] - - -def unpack_singleton(x): - if len(x) == 1: - return x[0] - return x - - -if keras.__name__ == 'keras': - is_tf_keras = False -elif keras.__name__ == 'tensorflow.keras': - is_tf_keras = True -else: - raise KeyError('Cannot detect if using keras or tf.keras.') - - -def to_tuple(shape): - """This functions is here to fix an inconsistency between keras and tf.keras. - - In tf.keras, the input_shape argument is an tuple with `Dimensions` objects. - In keras, the input_shape is a simple tuple of ints or `None`. - - We'll work with tuples of ints or `None` to be consistent - with keras-team/keras. So we must apply this function to - all input_shapes of the build methods in custom layers. - """ - if is_tf_keras: - import tensorflow as tf - return tuple(tf.TensorShape(shape).as_list()) - else: - return shape -import pytest -import numpy as np - -from keras import backend as K -from keras_contrib import constraints - - -test_values = [0.1, 0.5, 3, 8, 1e-7] -np.random.seed(3537) -example_array = np.random.random((100, 100)) * 100. - 50. -example_array[0, 0] = 0. # 0 could possibly cause trouble - - -def test_clip(): - clip_instance = constraints.clip() - clipped = clip_instance(K.variable(example_array)) - assert(np.max(np.abs(K.eval(clipped))) <= K.cast_to_floatx(0.01)) - clip_instance = constraints.clip(0.1) - clipped = clip_instance(K.variable(example_array)) - assert(np.max(np.abs(K.eval(clipped))) <= K.cast_to_floatx(0.1)) - - -if __name__ == '__main__': - pytest.main([__file__]) -from keras import backend as K -from keras_contrib import backend as KC -from keras_contrib import initializers -import pytest -import numpy as np - - -# 2D tensor test fixture -FC_SHAPE = (100, 100) - -# 4D convolution in th order. This shape has the same effective shape as -# FC_SHAPE -CONV_SHAPE = (25, 25, 2, 2) - -# The equivalent shape of both test fixtures -SHAPE = (100, 100) - - -def _runner(init, shape, target_mean=None, target_std=None, - target_max=None, target_min=None, upper_bound=None, lower_bound=None): - variable = init(shape) - if not isinstance(variable, np.ndarray): - output = K.get_value(variable) - else: - output = variable - - lim = 1e-2 - if target_std is not None: - assert abs(output.std() - target_std) < lim - if target_mean is not None: - assert abs(output.mean() - target_mean) < lim - if target_max is not None: - assert abs(output.max() - target_max) < lim - if target_min is not None: - assert abs(output.min() - target_min) < lim - if upper_bound is not None: - assert output.max() < upper_bound - if lower_bound is not None: - assert output.min() > lower_bound - - -''' -# Example : - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_uniform(tensor_shape): - _runner(initializations.uniform, tensor_shape, target_mean=0., - target_max=0.05, target_min=-0.05) - -''' - - -@pytest.mark.parametrize('tensor_shape', [FC_SHAPE, CONV_SHAPE], ids=['FC', 'CONV']) -def test_cai(tensor_shape): - # upper and lower bounds are proved in original paper - _runner(initializers.ConvolutionAware(), tensor_shape, - upper_bound=1, lower_bound=-1) - - -if __name__ == '__main__': - pytest.main([__file__]) -import os -import pytest -from github import Github -try: - import pathlib -except ImportError: - import pathlib2 as pathlib - -path_to_keras_contrib = pathlib.Path(__file__).resolve().parents[2] -path_to_codeowners = path_to_keras_contrib / 'CODEOWNERS' - -authenticated = True -try: - github_client = Github(os.environ['GITHUB_TOKEN']) -except KeyError: - try: - github_client = Github(os.environ['GITHUB_USER'], - os.environ['GITHUB_PASSWORD']) - except KeyError: - authenticated = False - - -def parse_codeowners(): - map_path_owner = [] - for line in open(path_to_codeowners, 'r'): - line = line.strip() - if line.startswith('#') or line == '': - continue - x = line.split(' ') - path = path_to_keras_contrib / x[0] - owner = x[-1] - map_path_owner.append((path, owner)) - return map_path_owner - - -def test_codeowners_file_exist(): - for path, _ in parse_codeowners(): - assert path.exists() - - -@pytest.mark.skipif(not authenticated, - reason='It should be possible to run the test without' - 'authentication, but we might get our request refused' - 'by github. To be deterministic, we\'ll disable it.') -def test_codeowners_user_exist(): - for _, user in parse_codeowners(): - assert user[0] == '@' - assert github_client.get_user(user[1:]) - - -directories_to_test = [ - 'examples', - 'keras_contrib/activations', - 'keras_contrib/applications', - 'keras_contrib/callbacks', - 'keras_contrib/constraints', - 'keras_contrib/datasets', - 'keras_contrib/initializers', - 'keras_contrib/layers', - 'keras_contrib/losses', - 'keras_contrib/metrics', - 'keras_contrib/optimizers', - 'keras_contrib/preprocessing', - 'keras_contrib/regularizers', - 'keras_contrib/wrappers' -] -directories_to_test = [path_to_keras_contrib / x for x in directories_to_test] - -# TODO: remove those files or find them owners. -exclude = [ - 'examples/cifar10_clr.py', - 'examples/cifar10_densenet.py', - 'examples/cifar10_nasnet.py', - 'examples/cifar10_resnet.py', - 'examples/cifar10_ror.py', - 'examples/cifar10_wide_resnet.py', - 'examples/conll2000_chunking_crf.py', - 'examples/improved_wgan.py', - 'examples/jaccard_loss.py', - 'keras_contrib/callbacks/cyclical_learning_rate.py', - 'keras_contrib/callbacks/dead_relu_detector.py', - 'keras_contrib/applications/resnet.py', - 'keras_contrib/constraints/clip.py', - 'keras_contrib/datasets/coco.py', - 'keras_contrib/datasets/conll2000.py', - 'keras_contrib/datasets/pascal_voc.py', - 'keras_contrib/initializers/convaware.py', - 'keras_contrib/losses/crf_losses.py', - 'keras_contrib/losses/dssim.py', - 'keras_contrib/losses/jaccard.py', - 'keras_contrib/layers/advanced_activations/pelu.py', - 'keras_contrib/layers/advanced_activations/srelu.py', - 'keras_contrib/layers/convolutional/cosineconvolution2d.py', - 'keras_contrib/layers/core.py', - 'keras_contrib/layers/crf.py', - 'keras_contrib/layers/normalization/instancenormalization.py', - 'keras_contrib/optimizers/ftml.py', - 'keras_contrib/optimizers/lars.py', - 'keras_contrib/metrics/crf_accuracies.py', -] -exclude = [path_to_keras_contrib / x for x in exclude] - - -@pytest.mark.parametrize('directory', directories_to_test) -def test_all_files_have_owners(directory): - files_with_owners = [x[0] for x in parse_codeowners()] - for root, dirs, files in os.walk(directory): - for name in files: - file_path = pathlib.Path(root) / name - if file_path.suffix != '.py': - continue - if file_path.name == '__init__.py': - continue - if file_path in exclude: - continue - assert file_path in files_with_owners - - -if __name__ == '__main__': - pytest.main([__file__]) -from markdown import markdown -from docs import autogen -import pytest - -test_doc1 = { - 'doc': """Base class for recurrent layers. - - # Arguments - cell: A RNN cell instance. A RNN cell is a class that has: - - a `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. The call method of the - cell can also take the optional argument `constants`, see - section "Note on passing external constants" below. - - a `state_size` attribute. This can be a single integer - (single state) in which case it is - the size of the recurrent state - (which should be the same as the size of the cell output). - This can also be a list/tuple of integers - (one size per state). In this case, the first entry - (`state_size[0]`) should be the same as - the size of the cell output. - It is also possible for `cell` to be a list of RNN cell instances, - in which cases the cells get stacked on after the other in the RNN, - implementing an efficient stacked RNN. - return_sequences: Boolean. Whether to return the last output - in the output sequence, or the full sequence. - return_state: Boolean. Whether to return the last state - in addition to the output. - go_backwards: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. - stateful: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. - unroll: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - input_dim: dimensionality of the input (integer). - This argument (or alternatively, - the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - input_length: Length of input sequences, to be specified - when it is constant. - This argument is required if you are going to connect - `Flatten` then `Dense` layers upstream - (without it, the shape of the dense outputs cannot be computed). - Note that if the recurrent layer is not the first layer - in your model, you would need to specify the input length - at the level of the first layer - (e.g. via the `input_shape` argument) - - # Input shape - 3D tensor with shape `(batch_size, timesteps, input_dim)`. - - # Output shape - - if `return_state`: a list of tensors. The first tensor is - the output. The remaining tensors are the last states, - each with shape `(batch_size, units)`. - - if `return_sequences`: 3D tensor with shape - `(batch_size, timesteps, units)`. - - else, 2D tensor with shape `(batch_size, units)`. - - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an [Embedding](embeddings.md) layer with the `mask_zero` parameter - set to `True`. - - # Note on using statefulness in RNNs - You can set RNN layers to be 'stateful', which means that the states - computed for the samples in one batch will be reused as initial states - for the samples in the next batch. This assumes a one-to-one mapping - between samples in different successive batches. - - To enable statefulness: - - specify `stateful=True` in the layer constructor. - - specify a fixed batch size for your model, by passing - if sequential model: - `batch_input_shape=(...)` to the first layer in your model. - else for functional model with 1 or more Input layers: - `batch_shape=(...)` to all the first layers in your model. - This is the expected shape of your inputs - *including the batch size*. - It should be a tuple of integers, e.g. `(32, 10, 100)`. - - specify `shuffle=False` when calling fit(). - - To reset the states of your model, call `.reset_states()` on either - a specific layer, or on your entire model. - - # Note on specifying the initial state of RNNs - Note: that - One: You can specify the initial state of RNN layers symbolically by - calling them with the keyword argument `initial_state`. - Two: The value of `initial_state` should be a tensor or list of - tensors representing - the initial state of the RNN layer. - You can specify the initial state of RNN layers numerically by: - One: calling `reset_states` - - With the keyword argument `states`. - - The value of - `states` should be a numpy array or - list of numpy arrays representing - the initial state of the RNN layer. - - # Note on passing external constants to RNNs - You can pass "external" constants to the cell using the `constants` - keyword: argument of `RNN.__call__` (as well as `RNN.call`) method. - This: requires that the `cell.call` method accepts the same keyword argument - `constants`. Such constants can be used to condition the cell - transformation on additional static inputs (not changing over time), - a.k.a. an attention mechanism. - - # Examples - - ```python - # First, let's define a RNN Cell, as a layer subclass. - - class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = K.dot(inputs, self.kernel) - output = h + K.dot(prev_output, self.recurrent_kernel) - return output, [output] - - # Let's use this cell in a RNN layer: - - cell = MinimalRNNCell(32) - x = keras.Input((None, 5)) - layer = RNN(cell) - y = layer(x) - - # Here's how to use the cell to build a stacked RNN: - - cells = [MinimalRNNCell(32), MinimalRNNCell(64)] - x = keras.Input((None, 5)) - layer = RNN(cells) - y = layer(x) - ``` - """, - 'result': '''Base class for recurrent layers. - -__Arguments__ - -- __cell__: A RNN cell instance. A RNN cell is a class that has: - - a `call(input_at_t, states_at_t)` method, returning - `(output_at_t, states_at_t_plus_1)`. The call method of the - cell can also take the optional argument `constants`, see - section "Note on passing external constants" below. - - a `state_size` attribute. This can be a single integer - (single state) in which case it is - the size of the recurrent state - (which should be the same as the size of the cell output). - This can also be a list/tuple of integers - (one size per state). In this case, the first entry - (`state_size[0]`) should be the same as - the size of the cell output. - - It is also possible for `cell` to be a list of RNN cell instances, - in which cases the cells get stacked on after the other in the RNN, - implementing an efficient stacked RNN. - -- __return_sequences__: Boolean. Whether to return the last output - in the output sequence, or the full sequence. -- __return_state__: Boolean. Whether to return the last state - in addition to the output. -- __go_backwards__: Boolean (default False). - If True, process the input sequence backwards and return the - reversed sequence. -- __stateful__: Boolean (default False). If True, the last state - for each sample at index i in a batch will be used as initial - state for the sample of index i in the following batch. -- __unroll__: Boolean (default False). - If True, the network will be unrolled, - else a symbolic loop will be used. - Unrolling can speed-up a RNN, - although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. -- __input_dim__: dimensionality of the input (integer). - This argument (or alternatively, - the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. -- __input_length__: Length of input sequences, to be specified - when it is constant. - This argument is required if you are going to connect - `Flatten` then `Dense` layers upstream - (without it, the shape of the dense outputs cannot be computed). - Note that if the recurrent layer is not the first layer - in your model, you would need to specify the input length - at the level of the first layer - (e.g. via the `input_shape` argument) - -__Input shape__ - -3D tensor with shape `(batch_size, timesteps, input_dim)`. - -__Output shape__ - -- if `return_state`: a list of tensors. The first tensor is - the output. The remaining tensors are the last states, - each with shape `(batch_size, units)`. -- if `return_sequences`: 3D tensor with shape - `(batch_size, timesteps, units)`. -- else, 2D tensor with shape `(batch_size, units)`. - -__Masking__ - -This layer supports masking for input data with a variable number -of timesteps. To introduce masks to your data, -use an [Embedding](embeddings.md) layer with the `mask_zero` parameter -set to `True`. - -__Note on using statefulness in RNNs__ - -You can set RNN layers to be 'stateful', which means that the states -computed for the samples in one batch will be reused as initial states -for the samples in the next batch. This assumes a one-to-one mapping -between samples in different successive batches. - -To enable statefulness: -- specify `stateful=True` in the layer constructor. -- specify a fixed batch size for your model, by passing -if sequential model: -`batch_input_shape=(...)` to the first layer in your model. -else for functional model with 1 or more Input layers: -`batch_shape=(...)` to all the first layers in your model. -This is the expected shape of your inputs -*including the batch size*. -It should be a tuple of integers, e.g. `(32, 10, 100)`. -- specify `shuffle=False` when calling fit(). - -To reset the states of your model, call `.reset_states()` on either -a specific layer, or on your entire model. - -__Note on specifying the initial state of RNNs__ - -Note: that -- __One__: You can specify the initial state of RNN layers symbolically by - calling them with the keyword argument `initial_state`. -- __Two__: The value of `initial_state` should be a tensor or list of - tensors representing - the initial state of the RNN layer. - -You can specify the initial state of RNN layers numerically by: - -- __One__: calling `reset_states` - - With the keyword argument `states`. - - The value of - - `states` should be a numpy array or - list of numpy arrays representing - -the initial state of the RNN layer. - -__Note on passing external constants to RNNs__ - -You can pass "external" constants to the cell using the `constants` -- __keyword__: argument of `RNN.__call__` (as well as `RNN.call`) method. -- __This__: requires that the `cell.call` method accepts the same keyword argument - -`constants`. Such constants can be used to condition the cell -transformation on additional static inputs (not changing over time), -a.k.a. an attention mechanism. - -__Examples__ - - -```python -# First, let's define a RNN Cell, as a layer subclass. - -class MinimalRNNCell(keras.layers.Layer): - - def __init__(self, units, **kwargs): - self.units = units - self.state_size = units - super(MinimalRNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - self.kernel = self.add_weight(shape=(input_shape[-1], self.units), - initializer='uniform', - name='kernel') - self.recurrent_kernel = self.add_weight( - shape=(self.units, self.units), - initializer='uniform', - name='recurrent_kernel') - self.built = True - - def call(self, inputs, states): - prev_output = states[0] - h = K.dot(inputs, self.kernel) - output = h + K.dot(prev_output, self.recurrent_kernel) - return output, [output] - -# Let's use this cell in a RNN layer: - -cell = MinimalRNNCell(32) -x = keras.Input((None, 5)) -layer = RNN(cell) -y = layer(x) - -# Here's how to use the cell to build a stacked RNN: - -cells = [MinimalRNNCell(32), MinimalRNNCell(64)] -x = keras.Input((None, 5)) -layer = RNN(cells) -y = layer(x) -``` -'''} - - -def test_doc_lists(): - docstring = autogen.process_docstring(test_doc1['doc']) - assert markdown(docstring) == markdown(test_doc1['result']) - - -dummy_docstring = """Multiplies 2 tensors (and/or variables) and returns a *tensor*. - - When attempting to multiply a nD tensor - with a nD tensor, it reproduces the Theano behavior. - (e.g. `(2, 3) * (4, 3, 5) -> (2, 4, 5)`) - - # Examples - ```python - # Theano-like behavior example - >>> x = K.random_uniform_variable(shape=(2, 3), low=0, high=1) - >>> y = K.ones((4, 3, 5)) - >>> xy = K.dot(x, y) - >>> K.int_shape(xy) - (2, 4, 5) - ``` - - # Numpy implementation - ```python - def dot(x, y): - return dot(x, y) - ``` - """ - - -def test_doc_multiple_sections_code(): - """ Checks that we can have code blocks in multiple sections.""" - generated = autogen.process_docstring(dummy_docstring) - assert '# Theano-like behavior example' in generated - assert 'def dot(x, y):' in generated - - -if __name__ == '__main__': - pytest.main([__file__]) -import importlib -import inspect -import re -import sys -from itertools import compress - -import pytest - -modules = ['keras_contrib.layers', - 'keras_contrib', - 'keras_contrib.backend.tensorflow_backend', - 'keras_contrib.wrappers', - 'keras_contrib.utils', - 'keras_contrib.callbacks', - 'keras_contrib.activations', - 'keras_contrib.losses', - 'keras_contrib.optimizers'] -accepted_name = ['from_config'] -accepted_module = [] - -# Functions or classes with less than 'MIN_CODE_SIZE' lines can be ignored -MIN_CODE_SIZE = 10 - - -def handle_class(name, member): - if is_accepted(name, member): - return - - if member.__doc__ is None and not member_too_small(member): - raise ValueError("{} class doesn't have any documentation".format(name), - member.__module__, inspect.getmodule(member).__file__) - for n, met in inspect.getmembers(member): - if inspect.ismethod(met): - handle_method(n, met) - - -def handle_function(name, member): - if is_accepted(name, member) or member_too_small(member): - # We don't need to check this one. - return - doc = member.__doc__ - if doc is None: - raise ValueError("{} function doesn't have any documentation".format(name), - member.__module__, inspect.getmodule(member).__file__) - - args = list(inspect.signature(member).parameters.keys()) - assert_args_presence(args, doc, member, name) - assert_function_style(name, member, doc, args) - assert_doc_style(name, member, doc) - - -def assert_doc_style(name, member, doc): - lines = doc.split("\n") - first_line = lines[0] - if len(first_line.strip()) == 0: - raise ValueError( - "{} the documentation should be on the first line.".format(name), - member.__module__) - if first_line.strip()[-1] != '.': - raise ValueError("{} first line should end with a '.'".format(name), - member.__module__) - - -def assert_function_style(name, member, doc, args): - code = inspect.getsource(member) - has_return = re.findall(r"\s*return \S+", code, re.MULTILINE) - if has_return and "# Returns" not in doc: - innerfunction = [inspect.getsource(x) for x in member.__code__.co_consts if - inspect.iscode(x)] - return_in_sub = [ret for code_inner in innerfunction for ret in - re.findall(r"\s*return \S+", code_inner, re.MULTILINE)] - if len(return_in_sub) < len(has_return): - raise ValueError("{} needs a '# Returns' section".format(name), - member.__module__) - - has_raise = re.findall(r"^\s*raise \S+", code, re.MULTILINE) - if has_raise and "# Raises" not in doc: - innerfunction = [inspect.getsource(x) for x in member.__code__.co_consts if - inspect.iscode(x)] - raise_in_sub = [ret for code_inner in innerfunction for ret in - re.findall(r"\s*raise \S+", code_inner, re.MULTILINE)] - if len(raise_in_sub) < len(has_raise): - raise ValueError("{} needs a '# Raises' section".format(name), - member.__module__) - - if len(args) > 0 and "# Arguments" not in doc: - raise ValueError("{} needs a '# Arguments' section".format(name), - member.__module__) - - assert_blank_before(name, member, doc, [ - '# Arguments', '# Raises', '# Returns']) - - -def assert_blank_before(name, member, doc, keywords): - doc_lines = [x.strip() for x in doc.split('\n')] - for keyword in keywords: - if keyword in doc_lines: - index = doc_lines.index(keyword) - if doc_lines[index - 1] != '': - raise ValueError( - "{} '{}' should have a blank line above.".format( - name, keyword), - member.__module__) - - -def is_accepted(name, member): - if 'keras' not in str(member.__module__): - return True - return name in accepted_name or member.__module__ in accepted_module - - -def member_too_small(member): - code = inspect.getsource(member).split('\n') - return len(code) < MIN_CODE_SIZE - - -def assert_args_presence(args, doc, member, name): - args_not_in_doc = [arg not in doc for arg in args] - if any(args_not_in_doc): - raise ValueError( - "{} {} arguments are not present in documentation ".format(name, list( - compress(args, args_not_in_doc))), member.__module__) - words = doc.replace('*', '').split() - # Check arguments styling - styles = [arg + ":" not in words for arg in args] - if any(styles): - raise ValueError( - "{} {} are not style properly 'argument': documentation".format( - name, - list(compress(args, styles))), - member.__module__) - - # Check arguments order - indexes = [words.index(arg + ":") for arg in args] - if indexes != sorted(indexes): - raise ValueError( - "{} arguments order is different from the documentation".format( - name), - member.__module__) - - -def handle_method(name, member): - if name in accepted_name or member.__module__ in accepted_module: - return - handle_function(name, member) - - -def handle_module(mod): - for name, mem in inspect.getmembers(mod): - if inspect.isclass(mem): - handle_class(name, mem) - elif inspect.isfunction(mem): - handle_function(name, mem) - elif 'keras' in name and inspect.ismodule(mem): - # Only test keras' modules - handle_module(mem) - - -@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3") -@pytest.mark.parametrize('module', modules) -def test_doc(module): - mod = importlib.import_module(module) - handle_module(mod) - - -if __name__ == '__main__': - pytest.main([__file__]) -from keras.layers import Layer, InputSpec -from keras import initializers, regularizers, constraints -import keras.backend as K -from keras_contrib.utils.test_utils import to_tuple - - -class PELU(Layer): - """Parametric Exponential Linear Unit. - - It follows: - `f(x) = alphas * (exp(x / betas) - 1) for x < 0`, - `f(x) = (alphas / betas) * x for x >= 0`, - where `alphas` & `betas` are learned arrays with the same shape as x. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as the input. - - # Arguments - alphas_initializer: initialization function for the alpha variable weights. - betas_initializer: initialization function for the beta variable weights. - weights: initial weights, as a list of a single Numpy array. - shared_axes: the axes along which to share learnable - parameters for the activation function. - For example, if the incoming feature maps - are from a 2D convolution - with output shape `(batch, height, width, channels)`, - and you wish to share parameters across space - so that each filter only has one set of parameters, - set `shared_axes=[1, 2]`. - - # References - - [Parametric exponential linear unit for deep convolutional neural networks]( - https://arxiv.org/abs/1605.09332v3) - """ - - def __init__(self, alpha_initializer='ones', - alpha_regularizer=None, - alpha_constraint=None, - beta_initializer='ones', - beta_regularizer=None, - beta_constraint=None, - shared_axes=None, - **kwargs): - super(PELU, self).__init__(**kwargs) - self.supports_masking = True - self.alpha_initializer = initializers.get(alpha_initializer) - self.alpha_regularizer = regularizers.get(alpha_regularizer) - self.alpha_constraint = constraints.get(alpha_constraint) - self.beta_initializer = initializers.get(beta_initializer) - self.beta_regularizer = regularizers.get(beta_regularizer) - self.beta_constraint = constraints.get(beta_constraint) - if shared_axes is None: - self.shared_axes = None - elif not isinstance(shared_axes, (list, tuple)): - self.shared_axes = [shared_axes] - else: - self.shared_axes = list(shared_axes) - - def build(self, input_shape): - input_shape = to_tuple(input_shape) - param_shape = list(input_shape[1:]) - self.param_broadcast = [False] * len(param_shape) - if self.shared_axes is not None: - for i in self.shared_axes: - param_shape[i - 1] = 1 - self.param_broadcast[i - 1] = True - - param_shape = tuple(param_shape) - # Initialised as ones to emulate the default ELU - self.alpha = self.add_weight(shape=param_shape, - name='alpha', - initializer=self.alpha_initializer, - regularizer=self.alpha_regularizer, - constraint=self.alpha_constraint) - self.beta = self.add_weight(shape=param_shape, - name='beta', - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint) - - # Set input spec - axes = {} - if self.shared_axes: - for i in range(1, len(input_shape)): - if i not in self.shared_axes: - axes[i] = input_shape[i] - self.input_spec = InputSpec(ndim=len(input_shape), axes=axes) - self.built = True - - def call(self, x, mask=None): - if K.backend() == 'theano': - pos = K.relu(x) * (K.pattern_broadcast(self.alpha, self.param_broadcast) / - K.pattern_broadcast(self.beta, self.param_broadcast)) - neg = (K.pattern_broadcast(self.alpha, self.param_broadcast) * - (K.exp((-K.relu(-x)) - / K.pattern_broadcast(self.beta, self.param_broadcast)) - 1)) - else: - pos = K.relu(x) * self.alpha / self.beta - neg = self.alpha * (K.exp((-K.relu(-x)) / self.beta) - 1) - return neg + pos - - def get_config(self): - config = { - 'alpha_initializer': initializers.serialize(self.alpha_initializer), - 'alpha_regularizer': regularizers.serialize(self.alpha_regularizer), - 'alpha_constraint': constraints.serialize(self.alpha_constraint), - 'beta_initializer': initializers.serialize(self.beta_initializer), - 'beta_regularizer': regularizers.serialize(self.beta_regularizer), - 'beta_constraint': constraints.serialize(self.beta_constraint), - 'shared_axes': self.shared_axes - } - base_config = super(PELU, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape -import keras.backend as K -from keras.layers import Layer - - -class SineReLU(Layer): - """Sine Rectified Linear Unit to generate oscilations. - - It allows an oscilation in the gradients when the weights are negative. - The oscilation can be controlled with a parameter, which makes it be close - or equal to zero. The functional is diferentiable at any point due to - its derivative. - For instance, at 0, the derivative of 'sin(0) - cos(0)' - is 'cos(0) + sin(0)' which is 1. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as the input. - - # Arguments - epsilon: float. Hyper-parameter used to control the amplitude of the - sinusoidal wave when weights are negative. - The default value, 0.0025, since it works better for CNN layers and - those are the most used layers nowadays. - When using Dense Networks, try something around 0.006. - - # References: - - [SineReLU: An Alternative to the ReLU Activation Function]( - https://medium.com/@wilder.rodrigues/sinerelu-an-alternative-to-the-relu-activation-function-e46a6199997d). - - This function was - first introduced at the Codemotion Amsterdam 2018 and then at - the DevDays, in Vilnius, Lithuania. - It has been extensively tested with Deep Nets, CNNs, - LSTMs, Residual Nets and GANs, based - on the MNIST, Kaggle Toxicity and IMDB datasets. - - # Performance: - - - Fashion MNIST - * Mean of 6 runs per Activation Function - * Fully Connection Network - - SineReLU: loss mean -> 0.3522; accuracy mean -> 89.18; - mean of std loss -> 0.08375204467435822 - - LeakyReLU: loss mean-> 0.3553; accuracy mean -> 88.98; - mean of std loss -> 0.0831161868455245 - - ReLU: loss mean -> 0.3519; accuracy mean -> 88.84; - mean of std loss -> 0.08358816501301362 - * Convolutional Neural Network - - SineReLU: loss mean -> 0.2180; accuracy mean -> 92.49; - mean of std loss -> 0.0781155784858847 - - LeakyReLU: loss mean -> 0.2205; accuracy mean -> 92.37; - mean of std loss -> 0.09273670474788205 - - ReLU: loss mean -> 0.2144; accuracy mean -> 92.45; - mean of std loss -> 0.09396114585977 - - MNIST - * Mean of 6 runs per Activation Function - * Fully Connection Network - - SineReLU: loss mean -> 0.0623; accuracy mean -> 98.53; - mean of std loss -> 0.06012015231824904 - - LeakyReLU: loss mean-> 0.0623; accuracy mean -> 98.50; - mean of std loss -> 0.06052147632835356 - - ReLU: loss mean -> 0.0605; accuracy mean -> 98.49; - mean of std loss -> 0.059599885665016096 - * Convolutional Neural Network - - SineReLU: loss mean -> 0.0198; accuracy mean -> 99.51; - mean of std loss -> 0.0425338329550847 - - LeakyReLU: loss mean -> 0.0216; accuracy mean -> 99.40; - mean of std loss -> 0.04834468835196667 - - ReLU: loss mean -> 0.0185; accuracy mean -> 99.49; - mean of std loss -> 0.05503719489690131 - - # Jupyter Notebooks - - https://github.com/ekholabs/DLinK/blob/master/notebooks/keras - - # Examples - The Advanced Activation function SineReLU have to be imported from the - keras_contrib.layers package. - - To see full source-code of this architecture and other examples, - please follow this link: https://github.com/ekholabs/DLinK - - ```python - model = Sequential() - model.add(Dense(128, input_shape = (784,))) - model.add(SineReLU()) - model.add(Dropout(0.2)) - - model.add(Dense(256)) - model.add(SineReLU()) - model.add(Dropout(0.3)) - - model.add(Dense(1024)) - model.add(SineReLU()) - model.add(Dropout(0.5)) - - model.add(Dense(10, activation = 'softmax')) - ``` - """ - - def __init__(self, epsilon=0.0025, **kwargs): - super(SineReLU, self).__init__(**kwargs) - self.supports_masking = True - self.epsilon = K.cast_to_floatx(epsilon) - - def call(self, Z): - m = self.epsilon * (K.sin(Z) - K.cos(Z)) - A = K.maximum(m, Z) - return A - - def get_config(self): - config = {'epsilon': float(self.epsilon)} - base_config = super(SineReLU, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape -from keras.layers import Layer, InputSpec -from keras import initializers -import keras.backend as K -from keras_contrib.utils.test_utils import to_tuple - - -class SReLU(Layer): - """S-shaped Rectified Linear Unit. - - It follows: - `f(x) = t^r + a^r(x - t^r) for x >= t^r`, - `f(x) = x for t^r > x > t^l`, - `f(x) = t^l + a^l(x - t^l) for x <= t^l`. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as the input. - - # Arguments - t_left_initializer: initializer function for the left part intercept - a_left_initializer: initializer function for the left part slope - t_right_initializer: initializer function for the right part intercept - a_right_initializer: initializer function for the right part slope - shared_axes: the axes along which to share learnable - parameters for the activation function. - For example, if the incoming feature maps - are from a 2D convolution - with output shape `(batch, height, width, channels)`, - and you wish to share parameters across space - so that each filter only has one set of parameters, - set `shared_axes=[1, 2]`. - - # References - - [Deep Learning with S-shaped Rectified Linear Activation Units]( - http://arxiv.org/abs/1512.07030) - """ - - def __init__(self, t_left_initializer='zeros', - a_left_initializer=initializers.RandomUniform( - minval=0, maxval=1), - t_right_initializer=initializers.RandomUniform( - minval=0, maxval=5), - a_right_initializer='ones', - shared_axes=None, - **kwargs): - super(SReLU, self).__init__(**kwargs) - self.supports_masking = True - self.t_left_initializer = initializers.get(t_left_initializer) - self.a_left_initializer = initializers.get(a_left_initializer) - self.t_right_initializer = initializers.get(t_right_initializer) - self.a_right_initializer = initializers.get(a_right_initializer) - if shared_axes is None: - self.shared_axes = None - elif not isinstance(shared_axes, (list, tuple)): - self.shared_axes = [shared_axes] - else: - self.shared_axes = list(shared_axes) - - def build(self, input_shape): - input_shape = to_tuple(input_shape) - param_shape = list(input_shape[1:]) - self.param_broadcast = [False] * len(param_shape) - if self.shared_axes is not None: - for i in self.shared_axes: - param_shape[i - 1] = 1 - self.param_broadcast[i - 1] = True - - param_shape = tuple(param_shape) - - self.t_left = self.add_weight(shape=param_shape, - name='t_left', - initializer=self.t_left_initializer) - - self.a_left = self.add_weight(shape=param_shape, - name='a_left', - initializer=self.a_left_initializer) - - self.t_right = self.add_weight(shape=param_shape, - name='t_right', - initializer=self.t_right_initializer) - - self.a_right = self.add_weight(shape=param_shape, - name='a_right', - initializer=self.a_right_initializer) - - # Set input spec - axes = {} - if self.shared_axes: - for i in range(1, len(input_shape)): - if i not in self.shared_axes: - axes[i] = input_shape[i] - self.input_spec = InputSpec(ndim=len(input_shape), axes=axes) - self.built = True - - def call(self, x, mask=None): - # ensure the the right part is always to the right of the left - t_right_actual = self.t_left + K.abs(self.t_right) - - if K.backend() == 'theano': - t_left = K.pattern_broadcast(self.t_left, self.param_broadcast) - a_left = K.pattern_broadcast(self.a_left, self.param_broadcast) - a_right = K.pattern_broadcast(self.a_right, self.param_broadcast) - t_right_actual = K.pattern_broadcast(t_right_actual, - self.param_broadcast) - else: - t_left = self.t_left - a_left = self.a_left - a_right = self.a_right - - y_left_and_center = t_left + K.relu(x - t_left, - a_left, - t_right_actual - t_left) - y_right = K.relu(x - t_right_actual) * a_right - return y_left_and_center + y_right - - def get_config(self): - config = { - 't_left_initializer': self.t_left_initializer, - 'a_left_initializer': self.a_left_initializer, - 't_right_initializer': self.t_right_initializer, - 'a_right_initializer': self.a_right_initializer, - 'shared_axes': self.shared_axes - } - base_config = super(SReLU, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape -from keras import backend as K -from keras.layers import Layer - - -class Swish(Layer): - """ Swish (Ramachandranet al., 2017) - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as the input. - - # Arguments - beta: float >= 0. Scaling factor - if set to 1 and trainable set to False (default), - Swish equals the SiLU activation (Elfwing et al., 2017) - trainable: whether to learn the scaling factor during training or not - - # References - - [Searching for Activation Functions](https://arxiv.org/abs/1710.05941) - - [Sigmoid-weighted linear units for neural network function - approximation in reinforcement learning](https://arxiv.org/abs/1702.03118) - """ - - def __init__(self, beta=1.0, trainable=False, **kwargs): - super(Swish, self).__init__(**kwargs) - self.supports_masking = True - self.beta = beta - self.trainable = trainable - - def build(self, input_shape): - self.scaling_factor = K.variable(self.beta, - dtype=K.floatx(), - name='scaling_factor') - if self.trainable: - self._trainable_weights.append(self.scaling_factor) - super(Swish, self).build(input_shape) - - def call(self, inputs, mask=None): - return inputs * K.sigmoid(self.scaling_factor * inputs) - - def get_config(self): - config = {'beta': self.get_weights()[0] if self.trainable else self.beta, - 'trainable': self.trainable} - base_config = super(Swish, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape -# -*- coding: utf-8 -*- -from __future__ import absolute_import -from functools import partial - -from keras import backend as K -from keras_contrib import backend as KC -from keras import activations -from keras import initializers -from keras import regularizers -from keras import constraints -from keras.layers import Layer -from keras.layers import InputSpec -from keras_contrib.utils.conv_utils import conv_output_length -from keras_contrib.utils.conv_utils import normalize_data_format -from keras_contrib.utils.test_utils import to_tuple -import numpy as np - - -class CosineConvolution2D(Layer): - """Cosine Normalized Convolution operator for filtering - windows of two-dimensional inputs. - - # Examples - - ```python - # apply a 3x3 convolution with 64 output filters on a 256x256 image: - model = Sequential() - model.add(CosineConvolution2D(64, 3, 3, - padding='same', - input_shape=(3, 256, 256))) - # now model.output_shape == (None, 64, 256, 256) - - # add a 3x3 convolution on top, with 32 output filters: - model.add(CosineConvolution2D(32, 3, 3, padding='same')) - # now model.output_shape == (None, 32, 256, 256) - ``` - - # Arguments - filters: Number of convolution filters to use. - kernel_size: kernel_size: An integer or tuple/list of - 2 integers, specifying the - dimensions of the convolution window. - init: name of initialization function for the weights of the layer - (see [initializers](https://keras.io/initializers)), or alternatively, - Theano function to use for weights initialization. - This parameter is only relevant if you don't pass - a `weights` argument. - activation: name of activation function to use - (see [activations](https://keras.io/activations)), - or alternatively, elementwise Theano function. - If you don't specify anything, no activation is applied - (ie. "linear" activation: a(x) = x). - weights: list of numpy arrays to set as initial weights. - padding: 'valid', 'same' or 'full' - ('full' requires the Theano backend). - strides: tuple of length 2. Factor by which to strides output. - Also called strides elsewhere. - kernel_regularizer: instance of [WeightRegularizer]( - https://keras.io/regularizers) - (eg. L1 or L2 regularization), applied to the main weights matrix. - bias_regularizer: instance of [WeightRegularizer]( - https://keras.io/regularizers), applied to the use_bias. - activity_regularizer: instance of [ActivityRegularizer]( - https://keras.io/regularizers), applied to the network output. - kernel_constraint: instance of the [constraints]( - https://keras.io/constraints) module - (eg. maxnorm, nonneg), applied to the main weights matrix. - bias_constraint: instance of the [constraints]( - https://keras.io/constraints) module, applied to the use_bias. - data_format: 'channels_first' or 'channels_last'. - In 'channels_first' mode, the channels dimension - (the depth) is at index 1, in 'channels_last' mode is it at index 3. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be `'channels_last'`. - use_bias: whether to include a use_bias - (i.e. make the layer affine rather than linear). - - # Input shape - 4D tensor with shape: - `(samples, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `(samples, filters, nekernel_rows, nekernel_cols)` - if data_format='channels_first' - or 4D tensor with shape: - `(samples, nekernel_rows, nekernel_cols, filters)` - if data_format='channels_last'. - `rows` and `cols` values might have changed due to padding. - - - # References - - [Cosine Normalization: Using Cosine Similarity Instead - of Dot Product in Neural Networks](https://arxiv.org/pdf/1702.05870.pdf) - """ - - def __init__(self, filters, kernel_size, - kernel_initializer='glorot_uniform', activation=None, weights=None, - padding='valid', strides=(1, 1), data_format=None, - kernel_regularizer=None, bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, bias_constraint=None, - use_bias=True, **kwargs): - if data_format is None: - data_format = K.image_data_format() - if padding not in {'valid', 'same', 'full'}: - raise ValueError( - 'Invalid border mode for CosineConvolution2D:', padding) - self.filters = filters - self.kernel_size = kernel_size - self.nb_row, self.nb_col = self.kernel_size - self.kernel_initializer = initializers.get(kernel_initializer) - self.activation = activations.get(activation) - self.padding = padding - self.strides = tuple(strides) - self.data_format = normalize_data_format(data_format) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.use_bias = use_bias - self.input_spec = [InputSpec(ndim=4)] - self.initial_weights = weights - super(CosineConvolution2D, self).__init__(**kwargs) - - def build(self, input_shape): - input_shape = to_tuple(input_shape) - if self.data_format == 'channels_first': - stack_size = input_shape[1] - self.kernel_shape = (self.filters, stack_size, - self.nb_row, self.nb_col) - self.kernel_norm_shape = (1, stack_size, self.nb_row, self.nb_col) - elif self.data_format == 'channels_last': - stack_size = input_shape[3] - self.kernel_shape = (self.nb_row, self.nb_col, - stack_size, self.filters) - self.kernel_norm_shape = (self.nb_row, self.nb_col, stack_size, 1) - else: - raise ValueError('Invalid data_format:', self.data_format) - self.W = self.add_weight(shape=self.kernel_shape, - initializer=partial(self.kernel_initializer), - name='{}_W'.format(self.name), - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - - kernel_norm_name = '{}_kernel_norm'.format(self.name) - self.kernel_norm = K.variable(np.ones(self.kernel_norm_shape), - name=kernel_norm_name) - - if self.use_bias: - self.b = self.add_weight(shape=(self.filters,), - initializer='zero', - name='{}_b'.format(self.name), - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.b = None - - if self.initial_weights is not None: - self.set_weights(self.initial_weights) - del self.initial_weights - self.built = True - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - rows = input_shape[2] - cols = input_shape[3] - elif self.data_format == 'channels_last': - rows = input_shape[1] - cols = input_shape[2] - else: - raise ValueError('Invalid data_format:', self.data_format) - - rows = conv_output_length(rows, self.nb_row, - self.padding, self.strides[0]) - cols = conv_output_length(cols, self.nb_col, - self.padding, self.strides[1]) - - if self.data_format == 'channels_first': - return input_shape[0], self.filters, rows, cols - elif self.data_format == 'channels_last': - return input_shape[0], rows, cols, self.filters - - def call(self, x, mask=None): - b, xb = 0., 0. - if self.data_format == 'channels_first': - kernel_sum_axes = [1, 2, 3] - if self.use_bias: - b = K.reshape(self.b, (self.filters, 1, 1, 1)) - xb = 1. - elif self.data_format == 'channels_last': - kernel_sum_axes = [0, 1, 2] - if self.use_bias: - b = K.reshape(self.b, (1, 1, 1, self.filters)) - xb = 1. - - tmp = K.sum(K.square(self.W), axis=kernel_sum_axes, keepdims=True) - Wnorm = K.sqrt(tmp + K.square(b) + K.epsilon()) - - tmp = KC.conv2d(K.square(x), self.kernel_norm, strides=self.strides, - padding=self.padding, - data_format=self.data_format, - filter_shape=self.kernel_norm_shape) - xnorm = K.sqrt(tmp + xb + K.epsilon()) - - W = self.W / Wnorm - - output = KC.conv2d(x, W, strides=self.strides, - padding=self.padding, - data_format=self.data_format, - filter_shape=self.kernel_shape) - - if K.backend() == 'theano': - xnorm = K.pattern_broadcast(xnorm, [False, True, False, False]) - - output /= xnorm - - if self.use_bias: - b /= Wnorm - if self.data_format == 'channels_first': - b = K.reshape(b, (1, self.filters, 1, 1)) - elif self.data_format == 'channels_last': - b = K.reshape(b, (1, 1, 1, self.filters)) - else: - raise ValueError('Invalid data_format:', self.data_format) - b /= xnorm - output += b - output = self.activation(output) - return output - - def get_config(self): - config = { - 'filters': self.filters, - 'kernel_size': self.kernel_size, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'activation': activations.serialize(self.activation), - 'padding': self.padding, - 'strides': self.strides, - 'data_format': self.data_format, - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': - regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'use_bias': self.use_bias} - base_config = super(CosineConvolution2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -CosineConv2D = CosineConvolution2D -# -*- coding: utf-8 -*- -from __future__ import absolute_import - -from keras.layers import Layer - -from keras_contrib import backend as KC -from keras_contrib.utils.conv_utils import normalize_data_format - - -class SubPixelUpscaling(Layer): - """ Sub-pixel convolutional upscaling layer. - - This layer requires a Convolution2D prior to it, - having output filters computed according to - the formula : - - filters = k * (scale_factor * scale_factor) - where k = a user defined number of filters (generally larger than 32) - scale_factor = the upscaling factor (generally 2) - - This layer performs the depth to space operation on - the convolution filters, and returns a - tensor with the size as defined below. - - # Example : - ```python - # A standard subpixel upscaling block - x = Convolution2D(256, 3, 3, padding='same', activation='relu')(...) - u = SubPixelUpscaling(scale_factor=2)(x) - - # Optional - x = Convolution2D(256, 3, 3, padding='same', activation='relu')(u) - ``` - - In practice, it is useful to have a second convolution layer after the - SubPixelUpscaling layer to speed up the learning process. - - However, if you are stacking multiple - SubPixelUpscaling blocks, it may increase - the number of parameters greatly, so the - Convolution layer after SubPixelUpscaling - layer can be removed. - - # Arguments - scale_factor: Upscaling factor. - data_format: Can be None, 'channels_first' or 'channels_last'. - - # Input shape - 4D tensor with shape: - `(samples, k * (scale_factor * scale_factor) channels, rows, cols)` - if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, k * (scale_factor * scale_factor) channels)` - if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `(samples, k channels, rows * scale_factor, cols * scale_factor))` - if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows * scale_factor, cols * scale_factor, k channels)` - if data_format='channels_last'. - - # References - - [Real-Time Single Image and Video Super-Resolution Using an - Efficient Sub-Pixel Convolutional Neural Network]( - https://arxiv.org/abs/1609.05158) - """ - - def __init__(self, scale_factor=2, data_format=None, **kwargs): - super(SubPixelUpscaling, self).__init__(**kwargs) - - self.scale_factor = scale_factor - self.data_format = normalize_data_format(data_format) - - def build(self, input_shape): - pass - - def call(self, x, mask=None): - y = KC.depth_to_space(x, self.scale_factor, self.data_format) - return y - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - b, k, r, c = input_shape - new_k = k // (self.scale_factor ** 2) - new_r = r * self.scale_factor - new_c = c * self.scale_factor - return b, new_k, new_r, new_c - else: - b, r, c, k = input_shape - new_r = r * self.scale_factor - new_c = c * self.scale_factor - new_k = k // (self.scale_factor ** 2) - return b, new_r, new_c, new_k - - def get_config(self): - config = {'scale_factor': self.scale_factor, - 'data_format': self.data_format} - base_config = super(SubPixelUpscaling, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from keras.layers import Layer, InputSpec -from keras import initializers, regularizers, constraints -from keras import backend as K -from keras_contrib import backend as KC - - -class GroupNormalization(Layer): - """Group normalization layer. - - Group Normalization divides the channels into groups and computes - within each group - the mean and variance for normalization. - Group Normalization's computation is independent - of batch sizes, and its accuracy is stable in a wide range of batch sizes. - - Relation to Layer Normalization: - If the number of groups is set to 1, then this operation becomes identical to - Layer Normalization. - - Relation to Instance Normalization: - If the number of groups is set to the - input dimension (number of groups is equal - to number of channels), then this operation becomes - identical to Instance Normalization. - - # Arguments - groups: Integer, the number of groups for Group Normalization. - Can be in the range [1, N] where N is the input dimension. - The input dimension must be divisible by the number of groups. - axis: Integer, the axis that should be normalized - (typically the features axis). - For instance, after a `Conv2D` layer with - `data_format="channels_first"`, - set `axis=1` in `BatchNormalization`. - epsilon: Small float added to variance to avoid dividing by zero. - center: If True, add offset of `beta` to normalized tensor. - If False, `beta` is ignored. - scale: If True, multiply by `gamma`. - If False, `gamma` is not used. - When the next layer is linear (also e.g. `nn.relu`), - this can be disabled since the scaling - will be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - beta_regularizer: Optional regularizer for the beta weight. - gamma_regularizer: Optional regularizer for the gamma weight. - beta_constraint: Optional constraint for the beta weight. - gamma_constraint: Optional constraint for the gamma weight. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as input. - - # References - - [Group Normalization](https://arxiv.org/abs/1803.08494) - """ - - def __init__(self, - groups=32, - axis=-1, - epsilon=1e-5, - center=True, - scale=True, - beta_initializer='zeros', - gamma_initializer='ones', - beta_regularizer=None, - gamma_regularizer=None, - beta_constraint=None, - gamma_constraint=None, - **kwargs): - super(GroupNormalization, self).__init__(**kwargs) - self.supports_masking = True - self.groups = groups - self.axis = axis - self.epsilon = epsilon - self.center = center - self.scale = scale - self.beta_initializer = initializers.get(beta_initializer) - self.gamma_initializer = initializers.get(gamma_initializer) - self.beta_regularizer = regularizers.get(beta_regularizer) - self.gamma_regularizer = regularizers.get(gamma_regularizer) - self.beta_constraint = constraints.get(beta_constraint) - self.gamma_constraint = constraints.get(gamma_constraint) - - def build(self, input_shape): - dim = input_shape[self.axis] - - if dim is None: - raise ValueError('Axis ' + str(self.axis) + ' of ' - 'input tensor should have a defined dimension ' - 'but the layer received an input with shape ' + - str(input_shape) + '.') - - if dim < self.groups: - raise ValueError('Number of groups (' + str(self.groups) + ') cannot be ' - 'more than the number of channels (' + - str(dim) + ').') - - if dim % self.groups != 0: - raise ValueError('Number of groups (' + str(self.groups) + ') must be a ' - 'multiple of the number of channels (' + - str(dim) + ').') - - self.input_spec = InputSpec(ndim=len(input_shape), - axes={self.axis: dim}) - shape = (dim,) - - if self.scale: - self.gamma = self.add_weight(shape=shape, - name='gamma', - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint) - else: - self.gamma = None - if self.center: - self.beta = self.add_weight(shape=shape, - name='beta', - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint) - else: - self.beta = None - self.built = True - - def call(self, inputs, **kwargs): - input_shape = K.int_shape(inputs) - tensor_input_shape = K.shape(inputs) - - # Prepare broadcasting shape. - reduction_axes = list(range(len(input_shape))) - del reduction_axes[self.axis] - broadcast_shape = [1] * len(input_shape) - broadcast_shape[self.axis] = input_shape[self.axis] // self.groups - broadcast_shape.insert(1, self.groups) - - reshape_group_shape = K.shape(inputs) - group_axes = [reshape_group_shape[i] for i in range(len(input_shape))] - group_axes[self.axis] = input_shape[self.axis] // self.groups - group_axes.insert(1, self.groups) - - # reshape inputs to new group shape - group_shape = [group_axes[0], self.groups] + group_axes[2:] - group_shape = K.stack(group_shape) - inputs = K.reshape(inputs, group_shape) - - group_reduction_axes = list(range(len(group_axes))) - mean, variance = KC.moments(inputs, group_reduction_axes[2:], - keep_dims=True) - inputs = (inputs - mean) / (K.sqrt(variance + self.epsilon)) - - # prepare broadcast shape - inputs = K.reshape(inputs, group_shape) - - outputs = inputs - - # In this case we must explicitly broadcast all parameters. - if self.scale: - broadcast_gamma = K.reshape(self.gamma, broadcast_shape) - outputs = outputs * broadcast_gamma - - if self.center: - broadcast_beta = K.reshape(self.beta, broadcast_shape) - outputs = outputs + broadcast_beta - - # finally we reshape the output back to the input shape - outputs = K.reshape(outputs, tensor_input_shape) - - return outputs - - def get_config(self): - config = { - 'groups': self.groups, - 'axis': self.axis, - 'epsilon': self.epsilon, - 'center': self.center, - 'scale': self.scale, - 'beta_initializer': initializers.serialize(self.beta_initializer), - 'gamma_initializer': initializers.serialize(self.gamma_initializer), - 'beta_regularizer': regularizers.serialize(self.beta_regularizer), - 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), - 'beta_constraint': constraints.serialize(self.beta_constraint), - 'gamma_constraint': constraints.serialize(self.gamma_constraint) - } - base_config = super(GroupNormalization, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape -from keras.layers import Layer, InputSpec -from keras import initializers, regularizers, constraints -from keras import backend as K - - -class InstanceNormalization(Layer): - """Instance normalization layer. - - Normalize the activations of the previous layer at each step, - i.e. applies a transformation that maintains the mean activation - close to 0 and the activation standard deviation close to 1. - - # Arguments - axis: Integer, the axis that should be normalized - (typically the features axis). - For instance, after a `Conv2D` layer with - `data_format="channels_first"`, - set `axis=1` in `InstanceNormalization`. - Setting `axis=None` will normalize all values in each - instance of the batch. - Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid errors. - epsilon: Small float added to variance to avoid dividing by zero. - center: If True, add offset of `beta` to normalized tensor. - If False, `beta` is ignored. - scale: If True, multiply by `gamma`. - If False, `gamma` is not used. - When the next layer is linear (also e.g. `nn.relu`), - this can be disabled since the scaling - will be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - beta_regularizer: Optional regularizer for the beta weight. - gamma_regularizer: Optional regularizer for the gamma weight. - beta_constraint: Optional constraint for the beta weight. - gamma_constraint: Optional constraint for the gamma weight. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a Sequential model. - - # Output shape - Same shape as input. - - # References - - [Layer Normalization](https://arxiv.org/abs/1607.06450) - - [Instance Normalization: The Missing Ingredient for Fast Stylization]( - https://arxiv.org/abs/1607.08022) - """ - - def __init__(self, - axis=None, - epsilon=1e-3, - center=True, - scale=True, - beta_initializer='zeros', - gamma_initializer='ones', - beta_regularizer=None, - gamma_regularizer=None, - beta_constraint=None, - gamma_constraint=None, - **kwargs): - super(InstanceNormalization, self).__init__(**kwargs) - self.supports_masking = True - self.axis = axis - self.epsilon = epsilon - self.center = center - self.scale = scale - self.beta_initializer = initializers.get(beta_initializer) - self.gamma_initializer = initializers.get(gamma_initializer) - self.beta_regularizer = regularizers.get(beta_regularizer) - self.gamma_regularizer = regularizers.get(gamma_regularizer) - self.beta_constraint = constraints.get(beta_constraint) - self.gamma_constraint = constraints.get(gamma_constraint) - - def build(self, input_shape): - ndim = len(input_shape) - if self.axis == 0: - raise ValueError('Axis cannot be zero') - - if (self.axis is not None) and (ndim == 2): - raise ValueError('Cannot specify axis for rank 1 tensor') - - self.input_spec = InputSpec(ndim=ndim) - - if self.axis is None: - shape = (1,) - else: - shape = (input_shape[self.axis],) - - if self.scale: - self.gamma = self.add_weight(shape=shape, - name='gamma', - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint) - else: - self.gamma = None - if self.center: - self.beta = self.add_weight(shape=shape, - name='beta', - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint) - else: - self.beta = None - self.built = True - - def call(self, inputs, training=None): - input_shape = K.int_shape(inputs) - reduction_axes = list(range(0, len(input_shape))) - - if self.axis is not None: - del reduction_axes[self.axis] - - del reduction_axes[0] - - mean = K.mean(inputs, reduction_axes, keepdims=True) - stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon - normed = (inputs - mean) / stddev - - broadcast_shape = [1] * len(input_shape) - if self.axis is not None: - broadcast_shape[self.axis] = input_shape[self.axis] - - if self.scale: - broadcast_gamma = K.reshape(self.gamma, broadcast_shape) - normed = normed * broadcast_gamma - if self.center: - broadcast_beta = K.reshape(self.beta, broadcast_shape) - normed = normed + broadcast_beta - return normed - - def get_config(self): - config = { - 'axis': self.axis, - 'epsilon': self.epsilon, - 'center': self.center, - 'scale': self.scale, - 'beta_initializer': initializers.serialize(self.beta_initializer), - 'gamma_initializer': initializers.serialize(self.gamma_initializer), - 'beta_regularizer': regularizers.serialize(self.beta_regularizer), - 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), - 'beta_constraint': constraints.serialize(self.beta_constraint), - 'gamma_constraint': constraints.serialize(self.gamma_constraint) - } - base_config = super(InstanceNormalization, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from keras_contrib import activations -import keras.backend as K -import numpy as np -from numpy.testing import assert_allclose - - -def get_standard_values(): - """A set of floats used for testing squash. - """ - return np.array([[0, 0.1, 0.5, 0.9, 1.0]], dtype=K.floatx()) - - -def test_squash_valid(): - """Test using a reference implementation of squash. - """ - def squash(x, axis=-1): - s_squared_norm = np.sum(np.square(x), axis) + 1e-7 - scale = np.sqrt(s_squared_norm) / (0.5 + s_squared_norm) - return scale * x - - x = K.placeholder(ndim=2) - f = K.function([x], [activations.squash(x)]) - test_values = get_standard_values() - - result = f([test_values])[0] - expected = squash(test_values) - assert_allclose(result, expected, rtol=1e-05) - - -test_squash_valid() -import pytest -from numpy.testing import assert_allclose -import numpy as np - -from keras import backend as K -from keras.backend import theano_backend as KTH -from keras.backend import tensorflow_backend as KTF -import keras_contrib.backend.theano_backend as KCTH -import keras_contrib.backend.tensorflow_backend as KCTF -import keras_contrib.backend.numpy_backend as KCNP -from keras_contrib import backend as KC - - -def check_dtype(var, dtype): - if K._BACKEND == 'theano': - assert var.dtype == dtype - else: - assert var.dtype.name == '%s_ref' % dtype - - -def check_single_tensor_operation(function_name, input_shape, **kwargs): - val = np.random.random(input_shape) - 0.5 - xth = KTH.variable(val) - xtf = KTF.variable(val) - - zth = KTH.eval(getattr(KCTH, function_name)(xth, **kwargs)) - ztf = KTF.eval(getattr(KCTF, function_name)(xtf, **kwargs)) - - assert zth.shape == ztf.shape - assert_allclose(zth, ztf, atol=1e-05) - - -def check_two_tensor_operation(function_name, x_input_shape, - y_input_shape, **kwargs): - xval = np.random.random(x_input_shape) - 0.5 - - xth = KTH.variable(xval) - xtf = KTF.variable(xval) - - yval = np.random.random(y_input_shape) - 0.5 - - yth = KTH.variable(yval) - ytf = KTF.variable(yval) - - zth = KTH.eval(getattr(KCTH, function_name)(xth, yth, **kwargs)) - ztf = KTF.eval(getattr(KCTF, function_name)(xtf, ytf, **kwargs)) - - assert zth.shape == ztf.shape - assert_allclose(zth, ztf, atol=1e-05) - - -def check_composed_tensor_operations(first_function_name, first_function_args, - second_function_name, second_function_args, - input_shape): - ''' Creates a random tensor t0 with shape input_shape and compute - t1 = first_function_name(t0, **first_function_args) - t2 = second_function_name(t1, **second_function_args) - with both Theano and TensorFlow backends and ensures the answers match. - ''' - val = np.random.random(input_shape) - 0.5 - xth = KTH.variable(val) - xtf = KTF.variable(val) - - yth = getattr(KCTH, first_function_name)(xth, **first_function_args) - ytf = getattr(KCTF, first_function_name)(xtf, **first_function_args) - - zth = KTH.eval(getattr(KCTH, second_function_name) - (yth, **second_function_args)) - ztf = KTF.eval(getattr(KCTF, second_function_name) - (ytf, **second_function_args)) - - assert zth.shape == ztf.shape - assert_allclose(zth, ztf, atol=1e-05) - - -class TestBackend(object): - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='No need to run the tests twice.') - @pytest.mark.parametrize('input_shape', [(1, 3, 40, 40), (1, 3, 10, 10)]) - @pytest.mark.parametrize('kernel_shape', [2, 5]) - def test_extract(self, input_shape, kernel_shape): - xval = np.random.random(input_shape) - kernel = [kernel_shape, kernel_shape] - strides = [kernel_shape, kernel_shape] - xth = KTH.variable(xval) - xtf = KTF.variable(xval) - ztf = KTF.eval(KCTF.extract_image_patches(xtf, kernel, strides, - data_format='channels_first', - padding='valid')) - zth = KTH.eval(KCTH.extract_image_patches(xth, kernel, strides, - data_format='channels_first', - padding='valid')) - assert zth.shape == ztf.shape - assert_allclose(zth, ztf, atol=1e-02) - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='No need to run the tests twice.') - @pytest.mark.parametrize('input_shape', [(1, 40, 40, 3), (1, 10, 10, 3)]) - @pytest.mark.parametrize('kernel_shape', [2, 5]) - def test_extract2(self, input_shape, kernel_shape): - - xval = np.random.random(input_shape) - - kernel = [kernel_shape, kernel_shape] - strides = [kernel_shape, kernel_shape] - xth = KTH.variable(xval) - xtf = KTF.variable(xval) - ztf = KTF.eval(KCTF.extract_image_patches(xtf, kernel, strides, - data_format='channels_last', - padding='same')) - zth = KTH.eval(KCTH.extract_image_patches(xth, kernel, strides, - data_format='channels_last', - padding='same')) - assert zth.shape == ztf.shape - assert_allclose(zth, ztf, atol=1e-02) - - @pytest.mark.skipif(K.backend() != 'tensorflow', - reason='No need to run the tests twice.') - @pytest.mark.parametrize('batch_size', [1, 2, 3]) - @pytest.mark.parametrize('scale', [2, 3]) - @pytest.mark.parametrize('channels', [1, 2, 3]) - @pytest.mark.parametrize('rows', [1, 2, 3]) - @pytest.mark.parametrize('cols', [1, 2, 3]) - def test_depth_to_space(self, batch_size, scale, channels, rows, cols): - if K.image_data_format() == 'channels_first': - arr = np.arange(batch_size * channels * scale * scale * rows * cols)\ - .reshape((batch_size, channels * scale * scale, rows, cols)) - elif K.image_data_format() == 'channels_last': - arr = np.arange(batch_size * rows * cols * scale * scale * channels) \ - .reshape((batch_size, rows, cols, channels * scale * scale)) - - arr_tf = KTF.variable(arr) - arr_th = KTH.variable(arr) - - if K.image_data_format() == 'channels_first': - expected = arr.reshape((batch_size, scale, scale, channels, rows, cols))\ - .transpose((0, 3, 4, 1, 5, 2))\ - .reshape((batch_size, channels, rows * scale, cols * scale)) - elif K.image_data_format() == 'channels_last': - expected = arr.reshape((batch_size, rows, cols, scale, scale, channels))\ - .transpose((0, 1, 3, 2, 4, 5))\ - .reshape((batch_size, rows * scale, cols * scale, channels)) - - tf_ans = KTF.eval(KCTF.depth_to_space(arr_tf, scale)) - th_ans = KTH.eval(KCTH.depth_to_space(arr_th, scale)) - - assert tf_ans.shape == expected.shape - assert th_ans.shape == expected.shape - assert_allclose(expected, tf_ans, atol=1e-05) - assert_allclose(expected, th_ans, atol=1e-05) - - @pytest.mark.parametrize('keep_dims', [True, False]) - def test_moments(self, keep_dims): - input_shape = (10, 10, 10, 10) - x_0 = np.zeros(input_shape) - x_1 = np.ones(input_shape) - x_random = np.random.random(input_shape) - - th_axes = [0, 2, 3] - tf_axes = [0, 1, 2] - - for ip in [x_0, x_1, x_random]: - for axes in [th_axes, tf_axes]: - K_mean, K_var = KC.moments( - K.variable(ip), axes, keep_dims=keep_dims) - np_mean, np_var = KCNP.moments(ip, axes, keep_dims=keep_dims) - - K_mean_val = K.eval(K_mean) - K_var_val = K.eval(K_var) - - # absolute tolerance needed when working with zeros - assert_allclose(K_mean_val, np_mean, rtol=1e-4, atol=1e-10) - assert_allclose(K_var_val, np_var, rtol=1e-4, atol=1e-10) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from keras_contrib import callbacks -from keras.models import Sequential -from keras.layers import Dense -from numpy.testing import assert_allclose - - -def build_model(): - model = Sequential([ - Dense(2, activation='relu', input_shape=(2,)), - Dense(1, activation='sigmoid') - ]) - return model - - -def cycle(i): - return np.floor(1 + i / (2 * 2000)) - - -def x(i): - return np.abs(i / 2000. - 2 * cycle(i) + 1) - - -def test_cyclic_lr_triangular_1(): - X = np.random.rand(4000, 2) - y = np.random.rand(4000).reshape(-1, 1) - - clr = callbacks.CyclicLR() - - model = build_model() - model.compile( - optimizer='sgd', - loss='binary_crossentropy', - metrics=['accuracy'] - ) - model.fit(X, y, batch_size=1, epochs=1, verbose=0, callbacks=[clr]) - - r = np.concatenate([ - np.linspace(0.001, 0.006, num=2001)[1:], - np.linspace(0.006, 0.001, num=2001)[1:] - ]) - - assert_allclose(clr.history['lr'], r) - - -def test_cyclic_lr_triangular_2(): - X = np.random.rand(4000, 2) - y = np.random.rand(4000).reshape(-1, 1) - - clr = callbacks.CyclicLR(mode='triangular2') - - model = build_model() - model.compile( - optimizer='sgd', - loss='binary_crossentropy', - metrics=['accuracy'] - ) - model.fit(X, y, batch_size=1, epochs=2, verbose=0, callbacks=[clr]) - - r = np.concatenate([ - np.linspace(0.001, 0.006, num=2001)[1:], - np.linspace(0.006, 0.001, num=2001)[1:], - np.linspace(0.001, 0.0035, num=2001)[1:], - np.linspace(0.0035, 0.001, num=2001)[1:], - ]) - - assert_allclose(clr.history['lr'], r) - - -def test_cyclic_lr_exp_range(): - X = np.random.rand(4000, 2) - y = np.random.rand(4000).reshape(-1, 1) - - clr = callbacks.CyclicLR(mode='exp_range', gamma=0.9996) - - model = build_model() - model.compile( - optimizer='sgd', - loss='binary_crossentropy', - metrics=['accuracy'] - ) - model.fit(X, y, batch_size=1, epochs=2, verbose=0, callbacks=[clr]) - - exp_range = [] - - def scale_fn(i): - return 0.001 + (0.006 - 0.001) * np.maximum(0, (1 - x(i))) * (0.9996 ** i) - - for i in range(8000): - exp_range.append(scale_fn(i + 1)) - - assert_allclose(clr.history['lr'], np.array(exp_range)) - - -def test_cyclic_lr_custom_fn_test(): - X = np.random.rand(4000, 2) - y = np.random.rand(4000).reshape(-1, 1) - - def clr_fn(x): - return 1 / (5 ** (x * 0.0001)) - - clr = callbacks.CyclicLR(scale_fn=clr_fn, scale_mode='iterations') - - model = build_model() - model.compile( - optimizer='sgd', - loss='binary_crossentropy', - metrics=['accuracy'] - ) - model.fit(X, y, batch_size=1, epochs=2, verbose=0, callbacks=[clr]) - - custom_range = [] - - def scale_fn(i): - c = 0.006 - 0.001 - return 0.001 + c * np.maximum(0, (1 - x(i))) * 1 / (5 ** (i * 0.0001)) - - for i in range(8000): - custom_range.append(scale_fn(i + 1)) - - assert_allclose(clr.history['lr'], np.array(custom_range)) - - -if __name__ == '__main__': - pytest.main([__file__]) -from keras import backend as K -from keras.layers import Input, Dense, Conv2D, Flatten, Activation -from keras.models import Sequential, Model -from keras_contrib import callbacks -import pytest -import numpy as np -import sys - -if sys.version_info > (3, 0): - from io import StringIO -else: - from StringIO import StringIO - - -n_out = 11 -# with 1 neuron dead, 1/11 is just below the threshold of 10% with verbose = False - - -def check_print(do_train, expected_warnings, nr_dead=None, perc_dead=None): - """ - Receive stdout to check if correct warning message is delivered - :param nr_dead: int - :param perc_dead: float, 10% should be written as 0.1 - """ - - saved_stdout = sys.stdout - - out = StringIO() - out.flush() - sys.stdout = out # overwrite current stdout - - do_train() - - # get prints, can be something like: "Layer - # dense (#0) has 2 dead neurons (20.00%)!" - stdoutput = out.getvalue().strip() - str_to_count = "dead neurons" - count = stdoutput.count(str_to_count) - - sys.stdout = saved_stdout # restore stdout - out.close() - - assert expected_warnings == count - if expected_warnings and (nr_dead is not None): - str_to_check = 'has {} dead'.format(nr_dead) - assert str_to_check in stdoutput, '"{}" not in "{}"'.format(str_to_check, - stdoutput) - if expected_warnings and (perc_dead is not None): - str_to_check = 'neurons ({:.2%})!'.format(perc_dead) - assert str_to_check in stdoutput, '"{}" not in "{}"'.format(str_to_check, - stdoutput) - - -def test_DeadDeadReluDetector(): - n_samples = 9 - - input_shape = (n_samples, 3, 4) # 4 input features - shape_out = (n_samples, 3, n_out) # 11 output features - shape_weights = (4, n_out) - - # ignore batch size - input_shape_dense = tuple(input_shape[1:]) - - def do_test(weights, expected_warnings, verbose, nr_dead=None, perc_dead=None): - - def do_train(): - dataset = np.ones(input_shape) # data to be fed as training - model = Sequential() - model.add(Dense(n_out, activation='relu', input_shape=input_shape_dense, - use_bias=False, weights=[weights], name='dense')) - model.compile(optimizer='sgd', loss='categorical_crossentropy') - model.fit( - dataset, - np.ones(shape_out), - batch_size=1, - epochs=1, - callbacks=[callbacks.DeadReluDetector( - dataset, verbose=verbose)], - verbose=False - ) - - check_print(do_train, expected_warnings, nr_dead, perc_dead) - - # weights that correspond to NN with 1/11 neurons dead - weights_1_dead = np.ones(shape_weights) - # weights that correspond to NN with 2/11 neurons dead - weights_2_dead = np.ones(shape_weights) - # weights that correspond to all neurons dead - weights_all_dead = np.zeros(shape_weights) - - weights_1_dead[:, 0] = 0 - weights_2_dead[:, 0:2] = 0 - - do_test(weights_1_dead, verbose=True, - expected_warnings=1, nr_dead=1, perc_dead=1. / n_out) - do_test(weights_1_dead, verbose=False, expected_warnings=0) - do_test(weights_2_dead, verbose=True, - expected_warnings=1, nr_dead=2, perc_dead=2. / n_out) - # do_test(weights_all_dead, verbose=True, expected_warnings=1, - # nr_dead=n_out, perc_dead=1.) - - -def test_DeadDeadReluDetector_bias(): - n_samples = 9 - - input_shape = (n_samples, 4) # 4 input features - shape_weights = (4, n_out) - shape_bias = (n_out, ) - shape_out = (n_samples, n_out) # 11 output features - - # ignore batch size - input_shape_dense = tuple(input_shape[1:]) - - def do_test(weights, bias, expected_warnings, verbose, - nr_dead=None, perc_dead=None): - - def do_train(): - dataset = np.ones(input_shape) # data to be fed as training - model = Sequential() - model.add(Dense(n_out, activation='relu', input_shape=input_shape_dense, - use_bias=True, weights=[weights, bias], name='dense')) - model.compile(optimizer='sgd', loss='categorical_crossentropy') - model.fit( - dataset, - np.ones(shape_out), - batch_size=1, - epochs=1, - callbacks=[callbacks.DeadReluDetector( - dataset, verbose=verbose)], - verbose=False - ) - - check_print(do_train, expected_warnings, nr_dead, perc_dead) - - # weights that correspond to NN with 1/11 neurons dead - weights_1_dead = np.ones(shape_weights) - # weights that correspond to NN with 2/11 neurons dead - weights_2_dead = np.ones(shape_weights) - # weights that correspond to all neurons dead - weights_all_dead = np.zeros(shape_weights) - - weights_1_dead[:, 0] = 0 - weights_2_dead[:, 0:2] = 0 - - bias = np.zeros(shape_bias) - - do_test(weights_1_dead, bias, verbose=True, expected_warnings=1, - nr_dead=1, perc_dead=1. / n_out) - do_test(weights_1_dead, bias, verbose=False, expected_warnings=0) - do_test(weights_2_dead, bias, verbose=True, expected_warnings=1, - nr_dead=2, perc_dead=2. / n_out) - # do_test(weights_all_dead, bias, verbose=True, - # expected_warnings=1, nr_dead=n_out, perc_dead=1.) - - -def test_DeadDeadReluDetector_conv(): - n_samples = 9 - - # (5, 5) kernel, 4 input featuremaps and 11 output featuremaps - if K.image_data_format() == 'channels_last': - input_shape = (n_samples, 5, 5, 4) - else: - input_shape = (n_samples, 4, 5, 5) - - # ignore batch size - input_shape_conv = tuple(input_shape[1:]) - shape_weights = (5, 5, 4, n_out) - shape_out = (n_samples, n_out) - - def do_test(weights_bias, expected_warnings, verbose, - nr_dead=None, perc_dead=None): - """ - :param perc_dead: as float, 10% should be written as 0.1 - """ - - def do_train(): - dataset = np.ones(input_shape) # data to be fed as training - model = Sequential() - model.add(Conv2D(n_out, (5, 5), activation='relu', - input_shape=input_shape_conv, - use_bias=True, weights=weights_bias, name='conv')) - model.add(Flatten()) # to handle Theano's categorical crossentropy - model.compile(optimizer='sgd', loss='categorical_crossentropy') - model.fit( - dataset, - np.ones(shape_out), - batch_size=1, - epochs=1, - callbacks=[callbacks.DeadReluDetector( - dataset, verbose=verbose)], - verbose=False - ) - - check_print(do_train, expected_warnings, nr_dead, perc_dead) - - # weights that correspond to NN with 1/11 neurons dead - weights_1_dead = np.ones(shape_weights) - weights_1_dead[..., 0] = 0 - # weights that correspond to NN with 2/11 neurons dead - weights_2_dead = np.ones(shape_weights) - weights_2_dead[..., 0:2] = 0 - # weights that correspond to NN with all neurons dead - weights_all_dead = np.zeros(shape_weights) - - bias = np.zeros((11, )) - - weights_bias_1_dead = [weights_1_dead, bias] - weights_bias_2_dead = [weights_2_dead, bias] - weights_bias_all_dead = [weights_all_dead, bias] - - do_test(weights_bias_1_dead, verbose=True, expected_warnings=1, - nr_dead=1, perc_dead=1. / n_out) - do_test(weights_bias_1_dead, verbose=False, expected_warnings=0) - do_test(weights_bias_2_dead, verbose=True, expected_warnings=1, - nr_dead=2, perc_dead=2. / n_out) - # do_test(weights_bias_all_dead, verbose=True, expected_warnings=1, - # nr_dead=n_out, perc_dead=1.) - - -def test_DeadDeadReluDetector_activation(): - """ - Tests that using "Activation" layer does not throw error - """ - input_data = Input(shape=(1,)) - output_data = Activation('relu')(input_data) - model = Model(input_data, output_data) - model.compile(optimizer='adadelta', loss='binary_crossentropy') - model.fit( - np.array([[1]]), - np.array([[1]]), - epochs=1, - validation_data=(np.array([[1]]), np.array([[1]])), - callbacks=[callbacks.DeadReluDetector(np.array([[1]]))] - ) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -import os -import shutil -from keras.utils import to_categorical -from keras.layers import Layer, Input, Dense, Dropout, BatchNormalization -from keras_contrib.utils.test_utils import to_list, unpack_singleton -from keras_contrib.utils.test_utils import get_test_data -from keras import Model -from keras import backend as K -from keras_contrib.callbacks import TensorBoardGrouped - -input_dim = 2 -num_hidden = 4 -num_classes = 2 -batch_size = 5 -train_samples = 20 -test_samples = 20 - - -def data_generator(x, y, batch_size): - x = to_list(x) - y = to_list(y) - max_batch_index = len(x[0]) // batch_size - i = 0 - while 1: - x_batch = [array[i * batch_size: (i + 1) * batch_size] for array in x] - x_batch = unpack_singleton(x_batch) - - y_batch = [array[i * batch_size: (i + 1) * batch_size] for array in y] - y_batch = unpack_singleton(y_batch) - yield x_batch, y_batch - i += 1 - i = i % max_batch_index - - -# Changing the default arguments of get_test_data. -def get_data_callbacks(num_train=train_samples, - num_test=test_samples, - input_shape=(input_dim,), - classification=True, - num_classes=num_classes): - return get_test_data(num_train=num_train, - num_test=num_test, - input_shape=input_shape, - classification=classification, - num_classes=num_classes) - - -def test_TensorBoard(tmpdir): - np.random.seed(np.random.randint(1, 1e7)) - filepath = str(tmpdir / 'logs') - - (X_train, y_train), (X_test, y_test) = get_data_callbacks() - y_test = to_categorical(y_test) - y_train = to_categorical(y_train) - - class DummyStatefulMetric(Layer): - - def __init__(self, name='dummy_stateful_metric', **kwargs): - super(DummyStatefulMetric, self).__init__(name=name, **kwargs) - self.stateful = True - self.state = K.variable(value=0, dtype='int32') - - def reset_states(self): - pass - - def __call__(self, y_true, y_pred): - return self.state - - inp = Input((input_dim,)) - hidden = Dense(num_hidden, activation='relu')(inp) - hidden = Dropout(0.1)(hidden) - hidden = BatchNormalization()(hidden) - output = Dense(num_classes, activation='softmax')(hidden) - model = Model(inputs=inp, outputs=output) - model.compile(loss='categorical_crossentropy', - optimizer='sgd', - metrics=['accuracy', DummyStatefulMetric()]) - - # we must generate new callbacks for each test, as they aren't stateless - def callbacks_factory(histogram_freq): - return [TensorBoardGrouped(log_dir=filepath, - histogram_freq=histogram_freq, - write_images=True, write_grads=True, - batch_size=5)] - - # fit without validation data - model.fit(X_train, y_train, batch_size=batch_size, - callbacks=callbacks_factory(histogram_freq=0), - epochs=3) - - # fit with validation data and accuracy - model.fit(X_train, y_train, batch_size=batch_size, - validation_data=(X_test, y_test), - callbacks=callbacks_factory(histogram_freq=0), epochs=2) - - # fit generator without validation data - train_generator = data_generator(X_train, y_train, batch_size) - model.fit_generator(train_generator, len(X_train), epochs=2, - callbacks=callbacks_factory(histogram_freq=0)) - - # fit generator with validation data and accuracy - train_generator = data_generator(X_train, y_train, batch_size) - model.fit_generator(train_generator, len(X_train), epochs=2, - validation_data=(X_test, y_test), - callbacks=callbacks_factory(histogram_freq=1)) - - assert os.path.isdir(filepath) - shutil.rmtree(filepath) - assert not tmpdir.listdir() - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import print_function -import pytest -import time -import random -from keras_contrib import datasets - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose - -from keras_contrib.utils.test_utils import layer_test -from keras_contrib.utils.test_utils import is_tf_keras -from keras import backend as K -from keras_contrib.layers import capsule -from keras.models import Sequential - - -@pytest.mark.parametrize('num_capsule', [10, 20]) -@pytest.mark.parametrize('dim_capsule', [10, 20]) -@pytest.mark.parametrize('routings', [3, 4]) -@pytest.mark.parametrize('share_weights', [True, False]) -@pytest.mark.parametrize('activation', ['sigmoid', 'relu']) -def test_capsule(num_capsule, - dim_capsule, - routings, - share_weights, - activation): - - # TODO: removed this once the issue #25546 in the Tensorflow repo is fixed. - if is_tf_keras and not share_weights: - return - - num_samples = 100 - num_rows = 256 - num_cols = 256 - - kwargs = {'num_capsule': num_capsule, - 'dim_capsule': dim_capsule, - 'routings': routings, - 'share_weights': share_weights, - 'activation': activation} - - layer_test(capsule.Capsule, - kwargs=kwargs, - input_shape=(num_samples, num_rows, num_cols)) - - -def test_capsule_correctness(): - X = np.random.random((1, 1, 1)) - - model = Sequential() - model.add(capsule.Capsule(1, 1, 1, True, activation='sigmoid')) - - model.compile(loss='mse', optimizer='rmsprop') - init_out = model.predict(X) # mock predict call to initialize weights - model.set_weights([np.zeros((1, 1, 1))]) - out = model.predict(X) - assert_allclose(out, np.zeros( - (1, 1, 1), dtype=K.floatx()) + 0.5, atol=1e-5) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np - -from keras import regularizers -from keras import constraints -from keras.models import Sequential -from keras import backend as K -from keras_contrib.layers import core -from keras_contrib.utils.test_utils import layer_test -from numpy.testing import assert_allclose - - -@pytest.mark.parametrize('input_shape', [(3, 2), - (3, 4, 2), - (None, None, 2), - (3, 4, 5, 2)]) -def test_cosinedense(input_shape): - - layer_test(core.CosineDense, - kwargs={'units': 3}, - input_shape=input_shape) - - -def test_cosinedense_reg_constraint(): - layer_test(core.CosineDense, - kwargs={'units': 3, - 'kernel_regularizer': regularizers.l2(0.01), - 'bias_regularizer': regularizers.l1(0.01), - 'activity_regularizer': regularizers.l2(0.01), - 'kernel_constraint': constraints.MaxNorm(1), - 'bias_constraint': constraints.MaxNorm(1)}, - input_shape=(3, 2)) - - -def test_cosinedense_correctness(): - X = np.random.randn(1, 20) - model = Sequential() - model.add(core.CosineDense(1, use_bias=True, input_shape=(20,))) - model.compile(loss='mse', optimizer='rmsprop') - W = model.get_weights() - W[0] = X.T - W[1] = np.asarray([1.]) - model.set_weights(W) - out = model.predict(X) - assert_allclose(out, np.ones((1, 1), dtype=K.floatx()), atol=1e-5) - - X = np.random.randn(1, 20) - model = Sequential() - model.add(core.CosineDense(1, use_bias=False, input_shape=(20,))) - model.compile(loss='mse', optimizer='rmsprop') - W = model.get_weights() - W[0] = -2 * X.T - model.set_weights(W) - out = model.predict(X) - assert_allclose(out, -np.ones((1, 1), dtype=K.floatx()), atol=1e-5) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -import os -from numpy.testing import assert_allclose - -from keras.layers import Embedding -from keras.models import Sequential -from keras.models import load_model -from keras_contrib.losses import crf_loss -from keras_contrib.metrics import crf_accuracy -from keras_contrib.metrics import crf_marginal_accuracy -from keras_contrib.metrics import crf_viterbi_accuracy -from keras_contrib.layers import CRF -from keras_contrib.utils.test_utils import is_tf_keras - -nb_samples, timesteps, embedding_dim, output_dim = 2, 10, 4, 5 -embedding_num = 12 - -MODEL_PERSISTENCE_PATH = './test_saving_crf_model.h5' - - -@pytest.mark.xfail(is_tf_keras, - reason='TODO: fix it. Using K.tf which is bad.', - strict=True) -def test_CRF(): - # data - x = np.random.randint(1, embedding_num, nb_samples * timesteps) - x = x.reshape((nb_samples, timesteps)) - x[0, -4:] = 0 # right padding - x[1, :5] = 0 # left padding - y = np.random.randint(0, output_dim, nb_samples * timesteps) - y = y.reshape((nb_samples, timesteps)) - y_onehot = np.eye(output_dim)[y] - y = np.expand_dims(y, 2) # .astype('float32') - - # test with no masking, onehot, fix length - model = Sequential() - model.add(Embedding(embedding_num, embedding_dim, input_length=timesteps)) - crf = CRF(output_dim) - model.add(crf) - model.compile(optimizer='rmsprop', loss=crf_loss) - model.fit(x, y_onehot, epochs=1, batch_size=10) - model.save(MODEL_PERSISTENCE_PATH) - load_model(MODEL_PERSISTENCE_PATH, - custom_objects={'CRF': CRF, - 'crf_loss': crf_loss, - 'crf_viterbi_accuracy': crf_viterbi_accuracy}) - - # test with masking, sparse target, dynamic length; - # test crf_viterbi_accuracy, crf_marginal_accuracy - - model = Sequential() - model.add(Embedding(embedding_num, embedding_dim, mask_zero=True)) - crf = CRF(output_dim, sparse_target=True) - model.add(crf) - model.compile(optimizer='rmsprop', loss=crf_loss, - metrics=[crf_viterbi_accuracy, crf_marginal_accuracy]) - model.fit(x, y, epochs=1, batch_size=10) - - # check mask - y_pred = model.predict(x).argmax(-1) - assert (y_pred[0, -4:] == 0).all() # right padding - assert (y_pred[1, :5] == 0).all() # left padding - - # test viterbi_acc - _, v_acc, _ = model.evaluate(x, y) - np_acc = (y_pred[x > 0] == y[:, :, 0][x > 0]).astype('float32').mean() - print(v_acc, np_acc) - assert np.abs(v_acc - np_acc) < 1e-4 - - # test config - model.get_config() - - # test marginal learn mode, fix length - - model = Sequential() - model.add(Embedding(embedding_num, embedding_dim, input_length=timesteps, - mask_zero=True)) - crf = CRF(output_dim, learn_mode='marginal', unroll=True) - model.add(crf) - model.compile(optimizer='rmsprop', loss=crf_loss) - model.fit(x, y_onehot, epochs=1, batch_size=10) - - # check mask (marginal output) - y_pred = model.predict(x) - assert_allclose(y_pred[0, -4:], 1. / output_dim, atol=1e-6) - assert_allclose(y_pred[1, :5], 1. / output_dim, atol=1e-6) - - # test marginal learn mode, but with Viterbi test_mode - model = Sequential() - model.add(Embedding(embedding_num, embedding_dim, input_length=timesteps, - mask_zero=True)) - crf = CRF(output_dim, learn_mode='marginal', test_mode='viterbi') - model.add(crf) - model.compile(optimizer='rmsprop', loss=crf_loss, metrics=[crf_accuracy]) - model.fit(x, y_onehot, epochs=1, batch_size=10) - - y_pred = model.predict(x) - - # check y_pred is onehot vector (output from 'viterbi' test mode) - assert_allclose(np.eye(output_dim)[y_pred.argmax(-1)], y_pred, atol=1e-6) - - try: - os.remove(MODEL_PERSISTENCE_PATH) - except OSError: - pass - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from keras_contrib.utils.test_utils import is_tf_keras -from numpy.testing import assert_allclose -from keras.layers import Conv2D -from keras.models import Sequential -from keras.optimizers import Adam - -from keras.losses import sparse_categorical_crossentropy -from keras import backend as K -from keras_contrib.losses import DSSIMObjective - -allobj = [] - - -def test_objective_shapes_3d(): - y_a = K.variable(np.random.random((5, 6, 7))) - y_b = K.variable(np.random.random((5, 6, 7))) - for obj in allobj: - objective_output = obj(y_a, y_b) - assert K.eval(objective_output).shape == (5, 6) - - -def test_objective_shapes_2d(): - y_a = K.variable(np.random.random((6, 7))) - y_b = K.variable(np.random.random((6, 7))) - for obj in allobj: - objective_output = obj(y_a, y_b) - assert K.eval(objective_output).shape == (6,) - - -def test_cce_one_hot(): - y_a = K.variable(np.random.randint(0, 7, (5, 6))) - y_b = K.variable(np.random.random((5, 6, 7))) - objective_output = sparse_categorical_crossentropy(y_a, y_b) - assert K.eval(objective_output).shape == (5, 6) - - y_a = K.variable(np.random.randint(0, 7, (6,))) - y_b = K.variable(np.random.random((6, 7))) - assert K.eval(sparse_categorical_crossentropy(y_a, y_b)).shape == (6,) - - -def test_DSSIM_channels_last(): - prev_data = K.image_data_format() - K.set_image_data_format('channels_last') - for input_dim, kernel_size in zip([32, 33], [2, 3]): - input_shape = [input_dim, input_dim, 3] - X = np.random.random_sample(4 * input_dim * input_dim * 3) - X = X.reshape([4] + input_shape) - y = np.random.random_sample(4 * input_dim * input_dim * 3) - y = y.reshape([4] + input_shape) - - model = Sequential() - model.add(Conv2D(32, (3, 3), padding='same', input_shape=input_shape, - activation='relu')) - model.add(Conv2D(3, (3, 3), padding='same', input_shape=input_shape, - activation='relu')) - adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8) - model.compile(loss=DSSIMObjective(kernel_size=kernel_size), - metrics=['mse'], - optimizer=adam) - model.fit(X, y, batch_size=2, epochs=1, shuffle='batch') - - # Test same - x1 = K.constant(X, 'float32') - x2 = K.constant(X, 'float32') - dssim = DSSIMObjective(kernel_size=kernel_size) - assert_allclose(0.0, K.eval(dssim(x1, x2)), atol=1e-4) - - # Test opposite - x1 = K.zeros([4] + input_shape) - x2 = K.ones([4] + input_shape) - dssim = DSSIMObjective(kernel_size=kernel_size) - assert_allclose(0.5, K.eval(dssim(x1, x2)), atol=1e-4) - - K.set_image_data_format(prev_data) - - -@pytest.mark.xfail(is_tf_keras, - reason='TODO fix this.', - strict=True) -def test_DSSIM_channels_first(): - prev_data = K.image_data_format() - K.set_image_data_format('channels_first') - for input_dim, kernel_size in zip([32, 33], [2, 3]): - input_shape = [3, input_dim, input_dim] - X = np.random.random_sample(4 * input_dim * input_dim * 3) - X = X.reshape([4] + input_shape) - y = np.random.random_sample(4 * input_dim * input_dim * 3) - y = y.reshape([4] + input_shape) - - model = Sequential() - model.add(Conv2D(32, (3, 3), padding='same', input_shape=input_shape, - activation='relu')) - model.add(Conv2D(3, (3, 3), padding='same', input_shape=input_shape, - activation='relu')) - adam = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, epsilon=1e-8) - model.compile(loss=DSSIMObjective(kernel_size=kernel_size), metrics=['mse'], - optimizer=adam) - model.fit(X, y, batch_size=2, epochs=1, shuffle='batch') - - # Test same - x1 = K.constant(X, 'float32') - x2 = K.constant(X, 'float32') - dssim = DSSIMObjective(kernel_size=kernel_size) - assert_allclose(0.0, K.eval(dssim(x1, x2)), atol=1e-4) - - # Test opposite - x1 = K.zeros([4] + input_shape) - x2 = K.ones([4] + input_shape) - dssim = DSSIMObjective(kernel_size=kernel_size) - assert_allclose(0.5, K.eval(dssim(x1, x2)), atol=1e-4) - - K.set_image_data_format(prev_data) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest - -from keras_contrib.losses import jaccard_distance -from keras_contrib.utils.test_utils import is_tf_keras -from keras import backend as K -import numpy as np - - -@pytest.mark.xfail(is_tf_keras, - reason='TODO fix this.', - strict=True) -def test_jaccard_distance(): - # all_right, almost_right, half_right, all_wrong - y_true = np.array([[0, 0, 1, 0], [0, 0, 1, 0], [0, 0, 1, 0], - [0, 0, 1., 0.]]) - y_pred = np.array([[0, 0, 1, 0], [0, 0, 0.9, 0], [0, 0, 0.1, 0], - [1, 1, 0.1, 1.]]) - - r = jaccard_distance( - K.variable(y_true), - K.variable(y_pred), ) - if K.is_keras_tensor(r): - assert K.int_shape(r) == (4, ) - - all_right, almost_right, half_right, all_wrong = K.eval(r) - assert all_right == 0, 'should converge on zero' - assert all_right < almost_right - assert almost_right < half_right - assert half_right < all_wrong - - -def test_jaccard_distance_shapes_3d(): - y_a = K.variable(np.random.random((5, 6, 7))) - y_b = K.variable(np.random.random((5, 6, 7))) - objective_output = jaccard_distance(y_a, y_b) - assert K.eval(objective_output).shape == (5, 6) - - -def test_jaccard_distance_shapes_2d(): - y_a = K.variable(np.random.random((6, 7))) - y_b = K.variable(np.random.random((6, 7))) - objective_output = jaccard_distance(y_a, y_b) - assert K.eval(objective_output).shape == (6, ) -from __future__ import print_function -import pytest -from keras_contrib.utils.test_utils import is_tf_keras -from keras_contrib.tests import optimizers -from keras_contrib.optimizers import ftml - - -@pytest.mark.xfail(is_tf_keras, - reason='TODO fix this.', - strict=True) -def test_ftml(): - optimizers._test_optimizer(ftml()) - optimizers._test_optimizer(ftml(lr=0.003, beta_1=0.8, - beta_2=0.9, epsilon=1e-5, - decay=1e-3)) -from __future__ import print_function -import numpy as np -from keras_contrib.tests import optimizers -from keras_contrib.optimizers import lars -from keras.models import Sequential -from keras.layers import Dense - - -def test_base_lars(): - optimizers._test_optimizer(lars(0.01)) - - -def test_nesterov_lars(): - optimizers._test_optimizer(lars(0.01, nesterov=True)) -from __future__ import print_function -from keras_contrib.tests import optimizers -from keras_contrib.optimizers import Padam - - -def test_padam(): - optimizers._test_optimizer(Padam()) - optimizers._test_optimizer(Padam(decay=1e-3)) -from __future__ import print_function -import pytest -from keras_contrib.tests import optimizers -from keras_contrib.optimizers import Yogi -from keras_contrib.utils.test_utils import is_tf_keras - - -def test_yogi(): - optimizers._test_optimizer(Yogi()) - optimizers._test_optimizer(Yogi(beta_1=0.9, beta_2=0.9)) - optimizers._test_optimizer(Yogi(beta_1=0.9, beta_2=0.99)) - optimizers._test_optimizer(Yogi(beta_1=0.9, beta_2=0.999)) - - -@pytest.mark.skipif(is_tf_keras, - reason='Sometimes fail. It is random.', - strict=True) -def test_yogi_change_lr(): - optimizers._test_optimizer(Yogi(beta_1=0.9, beta_2=0.999, lr=0.001)) -import pytest -import os -from keras import backend as K -from keras.layers import Input, Dense -from keras.models import Model -from numpy.testing import assert_allclose - -from keras_contrib.utils.save_load_utils import save_all_weights, load_all_weights - - -@pytest.mark.skipif(K.backend() != 'tensorflow', - reason='save_all_weights and load_all_weights only ' - 'supported on TensorFlow') -def test_save_and_load_all_weights(): - ''' - Test save_all_weights and load_all_weights. - Save and load optimizer and model weights but not configuration. - ''' - - def make_model(): - _x = Input((10,)) - _y = Dense(10)(_x) - _m = Model(_x, _y) - _m.compile('adam', 'mean_squared_error') - _m._make_train_function() - return _m - - # make a model - m1 = make_model() - # set weights - w1 = m1.layers[1].kernel # dense layer - w1value = K.get_value(w1) - w1value[0, 0:4] = [1, 3, 3, 7] - K.set_value(w1, w1value) - # set optimizer weights - ow1 = m1.optimizer.weights[3] # momentum weights - ow1value = K.get_value(ow1) - ow1value[0, 0:3] = [4, 2, 0] - K.set_value(ow1, ow1value) - # save all weights - save_all_weights(m1, 'model.h5') - # new model - m2 = make_model() - # load all weights - load_all_weights(m2, 'model.h5') - # check weights - assert_allclose(K.get_value(m2.layers[1].kernel)[0, 0:4], [1, 3, 3, 7]) - # check optimizer weights - assert_allclose(K.get_value(m2.optimizer.weights[3])[0, 0:3], [4, 2, 0]) - os.remove('model.h5') - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -from keras_contrib.utils.test_utils import layer_test -from keras_contrib.layers import PELU - - -@pytest.mark.parametrize('kwargs', [{}, {'shared_axes': 1}]) -def test_pelu(kwargs): - layer_test(PELU, kwargs=kwargs, - input_shape=(2, 3, 4)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -from keras_contrib.utils.test_utils import layer_test -from keras_contrib.layers import SineReLU - - -@pytest.mark.parametrize('epsilon', [0.0025, 0.0035, 0.0045]) -def test_sine_relu(epsilon): - layer_test(SineReLU, kwargs={'epsilon': epsilon}, input_shape=(2, 3, 4)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -from keras_contrib.utils.test_utils import layer_test -from keras_contrib.layers import SReLU - - -@pytest.mark.parametrize('kwargs', [{}, {'shared_axes': 1}]) -def test_srelu(kwargs): - layer_test(SReLU, kwargs=kwargs, input_shape=(2, 3, 4)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -from keras_contrib.utils.test_utils import layer_test -from keras_contrib.layers import Swish - - -@pytest.mark.parametrize('trainable', [False, True]) -def test_swish(trainable): - layer_test(Swish, kwargs={'beta': 1.0, 'trainable': trainable}, - input_shape=(2, 3, 4)) - - -if __name__ == '__main__': - pytest.main([__file__]) -import numpy as np -import pytest -from keras import backend as K -from keras.models import Sequential -from numpy.testing import assert_allclose - -from keras_contrib.utils.test_utils import layer_test -from keras_contrib.layers import CosineConvolution2D - -# TensorFlow does not support full convolution. -if K.backend() == 'theano': - _convolution_border_modes = ['valid', 'same'] - data_format = 'channels_first' -else: - _convolution_border_modes = ['valid', 'same'] - data_format = 'channels_last' - - -@pytest.mark.parametrize('border_mode', _convolution_border_modes) -@pytest.mark.parametrize('subsample', [(1, 1), (2, 2)]) -@pytest.mark.parametrize('use_bias_mode', [True, False]) -@pytest.mark.parametrize('use_regularizer', [True, False]) -def test_cosineconvolution_2d(border_mode, - subsample, - use_bias_mode, - use_regularizer): - num_samples = 2 - num_filter = 2 - stack_size = 3 - num_row = 10 - num_col = 6 - - if border_mode == 'same' and subsample != (1, 1): - return - - kwargs = {'filters': num_filter, - 'kernel_size': (3, 3), - 'padding': border_mode, - 'strides': subsample, - 'use_bias': use_bias_mode, - 'data_format': data_format} - if use_regularizer: - kwargs.update({'kernel_regularizer': 'l2', - 'bias_regularizer': 'l2', - 'activity_regularizer': 'l2'}) - - layer_test(CosineConvolution2D, - kwargs=kwargs, - input_shape=(num_samples, num_row, num_col, stack_size)) - - -def test_cosineconvolution_2d_correctness(): - if data_format == 'channels_first': - X = np.random.randn(1, 3, 5, 5) - input_dim = (3, 5, 5) - W0 = X[:, :, ::-1, ::-1] - elif data_format == 'channels_last': - X = np.random.randn(1, 5, 5, 3) - input_dim = (5, 5, 3) - W0 = X[0, :, :, :, None] - - model = Sequential() - model.add(CosineConvolution2D(1, (5, 5), use_bias=True, - input_shape=input_dim, - data_format=data_format)) - model.compile(loss='mse', optimizer='rmsprop') - W = model.get_weights() - W[0] = W0 - W[1] = np.asarray([1.]) - model.set_weights(W) - out = model.predict(X) - assert_allclose(out, np.ones((1, 1, 1, 1), dtype=K.floatx()), atol=1e-5) - - model = Sequential() - model.add(CosineConvolution2D(1, (5, 5), - use_bias=False, - input_shape=input_dim, - data_format=data_format)) - model.compile(loss='mse', optimizer='rmsprop') - W = model.get_weights() - W[0] = -2 * W0 - model.set_weights(W) - out = model.predict(X) - assert_allclose(out, -np.ones((1, 1, 1, 1), dtype=K.floatx()), atol=1e-5) - - -if __name__ == '__main__': - pytest.main([__file__]) -import numpy as np -import pytest -from keras import backend as K - -from keras_contrib import backend as KC -from keras_contrib.layers import SubPixelUpscaling -from keras_contrib.utils.test_utils import layer_test - -# TensorFlow does not support full convolution. -if K.backend() == 'theano': - _convolution_border_modes = ['valid', 'same'] - data_format = 'channels_first' -else: - _convolution_border_modes = ['valid', 'same'] - data_format = 'channels_last' - - -@pytest.mark.parametrize('scale_factor', [2, 3, 4]) -def test_sub_pixel_upscaling(scale_factor): - num_samples = 2 - num_row = 16 - num_col = 16 - input_dtype = K.floatx() - - nb_channels = 4 * (scale_factor ** 2) - input_data = np.random.random((num_samples, nb_channels, num_row, num_col)) - input_data = input_data.astype(input_dtype) - - if K.image_data_format() == 'channels_last': - input_data = input_data.transpose((0, 2, 3, 1)) - - input_tensor = K.variable(input_data) - expected_output = K.eval(KC.depth_to_space(input_tensor, - scale=scale_factor)) - - layer_test(SubPixelUpscaling, - kwargs={'scale_factor': scale_factor}, - input_data=input_data, - expected_output=expected_output, - expected_output_dtype=K.floatx()) - - -if __name__ == '__main__': - pytest.main([__file__]) -import numpy as np -import pytest -from keras import backend as K -from keras import regularizers -from keras.layers import Input -from keras.models import Sequential, Model -from numpy.testing import assert_allclose - -from keras_contrib.layers import GroupNormalization -from keras_contrib.utils.test_utils import layer_test - -input_1 = np.arange(10) -input_2 = np.zeros(10) -input_3 = np.ones(10) -input_shapes = [np.ones((10, 10)), np.ones((10, 10, 10))] - - -def test_basic_groupnorm(): - layer_test(GroupNormalization, - kwargs={'groups': 2, - 'epsilon': 0.1, - 'gamma_regularizer': regularizers.l2(0.01), - 'beta_regularizer': regularizers.l2(0.01)}, - input_shape=(3, 4, 2)) - layer_test(GroupNormalization, - kwargs={'groups': 2, - 'epsilon': 0.1, - 'axis': 1}, - input_shape=(3, 4, 2)) - layer_test(GroupNormalization, - kwargs={'groups': 2, - 'gamma_initializer': 'ones', - 'beta_initializer': 'ones'}, - input_shape=(3, 4, 2, 4)) - if K.backend() != 'theano': - layer_test(GroupNormalization, - kwargs={'groups': 2, - 'axis': 1, - 'scale': False, - 'center': False}, - input_shape=(3, 4, 2, 4)) - - -def test_groupnorm_correctness_1d(): - model = Sequential() - norm = GroupNormalization(input_shape=(10,), groups=2) - model.add(norm) - model.compile(loss='mse', optimizer='rmsprop') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10)) - model.fit(x, x, epochs=5, verbose=0) - out = model.predict(x) - out -= K.eval(norm.beta) - out /= K.eval(norm.gamma) - - assert_allclose(out.mean(), 0.0, atol=1e-1) - assert_allclose(out.std(), 1.0, atol=1e-1) - - -def test_groupnorm_correctness_2d(): - model = Sequential() - norm = GroupNormalization(axis=1, input_shape=(10, 6), groups=2) - model.add(norm) - model.compile(loss='mse', optimizer='rmsprop') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 6)) - model.fit(x, x, epochs=5, verbose=0) - out = model.predict(x) - out -= np.reshape(K.eval(norm.beta), (1, 10, 1)) - out /= np.reshape(K.eval(norm.gamma), (1, 10, 1)) - - assert_allclose(out.mean(axis=(0, 2)), 0.0, atol=1.1e-1) - assert_allclose(out.std(axis=(0, 2)), 1.0, atol=1.1e-1) - - -def test_groupnorm_correctness_2d_different_groups(): - norm1 = GroupNormalization(axis=1, input_shape=(10, 6), groups=2) - norm2 = GroupNormalization(axis=1, input_shape=(10, 6), groups=1) - norm3 = GroupNormalization(axis=1, input_shape=(10, 6), groups=10) - - model = Sequential() - model.add(norm1) - model.compile(loss='mse', optimizer='rmsprop') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 6)) - model.fit(x, x, epochs=5, verbose=0) - out = model.predict(x) - out -= np.reshape(K.eval(norm1.beta), (1, 10, 1)) - out /= np.reshape(K.eval(norm1.gamma), (1, 10, 1)) - - assert_allclose(out.mean(axis=(0, 2)), 0.0, atol=1.1e-1) - assert_allclose(out.std(axis=(0, 2)), 1.0, atol=1.1e-1) - - model = Sequential() - model.add(norm2) - model.compile(loss='mse', optimizer='rmsprop') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 6)) - model.fit(x, x, epochs=5, verbose=0) - out = model.predict(x) - out -= np.reshape(K.eval(norm2.beta), (1, 10, 1)) - out /= np.reshape(K.eval(norm2.gamma), (1, 10, 1)) - - assert_allclose(out.mean(axis=(0, 2)), 0.0, atol=1.1e-1) - assert_allclose(out.std(axis=(0, 2)), 1.0, atol=1.1e-1) - - model = Sequential() - model.add(norm3) - model.compile(loss='mse', optimizer='rmsprop') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 10, 6)) - model.fit(x, x, epochs=5, verbose=0) - out = model.predict(x) - out -= np.reshape(K.eval(norm3.beta), (1, 10, 1)) - out /= np.reshape(K.eval(norm3.gamma), (1, 10, 1)) - - assert_allclose(out.mean(axis=(0, 2)), 0.0, atol=1.1e-1) - assert_allclose(out.std(axis=(0, 2)), 1.0, atol=1.1e-1) - - -def test_groupnorm_mode_twice(): - # This is a regression test for issue #4881 with the old - # batch normalization functions in the Theano backend. - model = Sequential() - model.add(GroupNormalization(input_shape=(10, 5, 5), - axis=1, - groups=2)) - model.add(GroupNormalization(input_shape=(10, 5, 5), - axis=1, - groups=2)) - model.compile(loss='mse', optimizer='sgd') - - x = np.random.normal(loc=5.0, scale=10.0, size=(20, 10, 5, 5)) - model.fit(x, x, epochs=1, verbose=0) - model.predict(x) - - -def test_groupnorm_convnet(): - model = Sequential() - norm = GroupNormalization(axis=1, - input_shape=(3, 4, 4), - groups=3) - model.add(norm) - model.compile(loss='mse', optimizer='sgd') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) - model.fit(x, x, epochs=4, verbose=0) - out = model.predict(x) - out -= np.reshape(K.eval(norm.beta), (1, 3, 1, 1)) - out /= np.reshape(K.eval(norm.gamma), (1, 3, 1, 1)) - - assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) - assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) - - -@pytest.mark.skipif((K.backend() == 'theano'), - reason='Bug with theano backend') -def test_groupnorm_convnet_no_center_no_scale(): - model = Sequential() - norm = GroupNormalization(axis=-1, center=False, scale=False, - input_shape=(3, 4, 4), groups=2) - model.add(norm) - model.compile(loss='mse', optimizer='sgd') - - # centered on 5.0, variance 10.0 - x = np.random.normal(loc=5.0, scale=10.0, size=(1000, 3, 4, 4)) - model.fit(x, x, epochs=4, verbose=0) - out = model.predict(x) - - assert_allclose(np.mean(out, axis=(0, 2, 3)), 0.0, atol=1e-1) - assert_allclose(np.std(out, axis=(0, 2, 3)), 1.0, atol=1e-1) - - -def test_shared_groupnorm(): - '''Test that a GN layer can be shared - across different data streams. - ''' - # Test single layer reuse - bn = GroupNormalization(input_shape=(10,), groups=2) - x1 = Input(shape=(10,)) - bn(x1) - - x2 = Input(shape=(10,)) - y2 = bn(x2) - - x = np.random.normal(loc=5.0, scale=10.0, size=(2, 10)) - model = Model(x2, y2) - assert len(model.updates) == 0 - model.compile('sgd', 'mse') - model.train_on_batch(x, x) - - # Test model-level reuse - x3 = Input(shape=(10,)) - y3 = model(x3) - new_model = Model(x3, y3) - assert len(model.updates) == 0 - new_model.compile('sgd', 'mse') - new_model.train_on_batch(x, x) - - -def test_that_trainable_disables_updates(): - val_a = np.random.random((10, 4)) - val_out = np.random.random((10, 4)) - - a = Input(shape=(4,)) - layer = GroupNormalization(input_shape=(4,), groups=2) - b = layer(a) - model = Model(a, b) - - model.trainable = False - assert len(model.updates) == 0 - - model.compile('sgd', 'mse') - assert len(model.updates) == 0 - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert_allclose(x1, x2, atol=1e-7) - - model.trainable = True - model.compile('sgd', 'mse') - assert len(model.updates) == 0 - - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert np.abs(np.sum(x1 - x2)) > 1e-5 - - layer.trainable = False - model.compile('sgd', 'mse') - assert len(model.updates) == 0 - - x1 = model.predict(val_a) - model.train_on_batch(val_a, val_out) - x2 = model.predict(val_a) - assert_allclose(x1, x2, atol=1e-7) - - -if __name__ == '__main__': - pytest.main([__file__]) -""" -The build/compilations setup - ->> pip install -r requirements.txt ->> python setup.py install -""" -import pip -import logging -import pkg_resources -try: - from setuptools import setup -except ImportError: - from distutils.core import setup - - -def _parse_requirements(file_path): - pip_ver = pkg_resources.get_distribution('pip').version - pip_version = list(map(int, pip_ver.split('.')[:2])) - if pip_version >= [6, 0]: - raw = pip.req.parse_requirements(file_path, - session=pip.download.PipSession()) - else: - raw = pip.req.parse_requirements(file_path) - return [str(i.req) for i in raw] - - -# parse_requirements() returns generator of pip.req.InstallRequirement objects -try: - install_reqs = _parse_requirements("requirements.txt") -except Exception: - logging.warning('Fail load requirements file, so using default ones.') - install_reqs = [] - -setup( - name='mask-rcnn', - version='2.1', - url='https://github.com/matterport/Mask_RCNN', - author='Matterport', - author_email='waleed.abdulla@gmail.com', - license='MIT', - description='Mask R-CNN for object detection and instance segmentation', - packages=["mrcnn"], - install_requires=install_reqs, - include_package_data=True, - python_requires='>=3.4', - long_description="""This is an implementation of Mask R-CNN on Python 3, Keras, and TensorFlow. -The model generates bounding boxes and segmentation masks for each instance of an object in the image. -It's based on Feature Pyramid Network (FPN) and a ResNet101 backbone.""", - classifiers=[ - "Development Status :: 5 - Production/Stable", - "Environment :: Console", - "Intended Audience :: Developers", - "Intended Audience :: Information Technology", - "Intended Audience :: Education", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", - "Natural Language :: English", - "Operating System :: OS Independent", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - "Topic :: Scientific/Engineering :: Image Recognition", - "Topic :: Scientific/Engineering :: Visualization", - "Topic :: Scientific/Engineering :: Image Segmentation", - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - ], - keywords="image instance segmentation object detection mask rcnn r-cnn tensorflow keras", -) -""" -Mask R-CNN -Base Configurations class. - -Copyright (c) 2017 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla -""" - -import numpy as np - - -# Base Configuration Class -# Don't use this class directly. Instead, sub-class it and override -# the configurations you need to change. - -class Config(object): - """Base configuration class. For custom configurations, create a - sub-class that inherits from this one and override properties - that need to be changed. - """ - # Name the configurations. For example, 'COCO', 'Experiment 3', ...etc. - # Useful if your code needs to do things differently depending on which - # experiment is running. - NAME = None # Override in sub-classes - - # NUMBER OF GPUs to use. When using only a CPU, this needs to be set to 1. - GPU_COUNT = 1 - - # Number of images to train with on each GPU. A 12GB GPU can typically - # handle 2 images of 1024x1024px. - # Adjust based on your GPU memory and image sizes. Use the highest - # number that your GPU can handle for best performance. - IMAGES_PER_GPU = 2 - - # Number of training steps per epoch - # This doesn't need to match the size of the training set. Tensorboard - # updates are saved at the end of each epoch, so setting this to a - # smaller number means getting more frequent TensorBoard updates. - # Validation stats are also calculated at each epoch end and they - # might take a while, so don't set this too small to avoid spending - # a lot of time on validation stats. - STEPS_PER_EPOCH = 1000 - - # Number of validation steps to run at the end of every training epoch. - # A bigger number improves accuracy of validation stats, but slows - # down the training. - VALIDATION_STEPS = 50 - - # Backbone network architecture - # Supported values are: resnet50, resnet101. - # You can also provide a callable that should have the signature - # of model.resnet_graph. If you do so, you need to supply a callable - # to COMPUTE_BACKBONE_SHAPE as well - BACKBONE = "resnet101" - - # Only useful if you supply a callable to BACKBONE. Should compute - # the shape of each layer of the FPN Pyramid. - # See model.compute_backbone_shapes - COMPUTE_BACKBONE_SHAPE = None - - # The strides of each layer of the FPN Pyramid. These values - # are based on a Resnet101 backbone. - BACKBONE_STRIDES = [4, 8, 16, 32, 64] - - # Size of the fully-connected layers in the classification graph - FPN_CLASSIF_FC_LAYERS_SIZE = 1024 - - # Size of the top-down layers used to build the feature pyramid - TOP_DOWN_PYRAMID_SIZE = 256 - - # Number of classification classes (including background) - NUM_CLASSES = 1 # Override in sub-classes - - # Length of square anchor side in pixels - RPN_ANCHOR_SCALES = (32, 64, 128, 256, 512) - - # Ratios of anchors at each cell (width/height) - # A value of 1 represents a square anchor, and 0.5 is a wide anchor - RPN_ANCHOR_RATIOS = [0.5, 1, 2] - - # Anchor stride - # If 1 then anchors are created for each cell in the backbone feature map. - # If 2, then anchors are created for every other cell, and so on. - RPN_ANCHOR_STRIDE = 1 - - # Non-max suppression threshold to filter RPN proposals. - # You can increase this during training to generate more propsals. - RPN_NMS_THRESHOLD = 0.7 - - # How many anchors per image to use for RPN training - RPN_TRAIN_ANCHORS_PER_IMAGE = 256 - - # ROIs kept after tf.nn.top_k and before non-maximum suppression - PRE_NMS_LIMIT = 6000 - - # ROIs kept after non-maximum suppression (training and inference) - POST_NMS_ROIS_TRAINING = 2000 - POST_NMS_ROIS_INFERENCE = 1000 - - # If enabled, resizes instance masks to a smaller size to reduce - # memory load. Recommended when using high-resolution images. - USE_MINI_MASK = True - MINI_MASK_SHAPE = (56, 56) # (height, width) of the mini-mask - - # Input image resizing - # Generally, use the "square" resizing mode for training and predicting - # and it should work well in most cases. In this mode, images are scaled - # up such that the small side is = IMAGE_MIN_DIM, but ensuring that the - # scaling doesn't make the long side > IMAGE_MAX_DIM. Then the image is - # padded with zeros to make it a square so multiple images can be put - # in one batch. - # Available resizing modes: - # none: No resizing or padding. Return the image unchanged. - # square: Resize and pad with zeros to get a square image - # of size [max_dim, max_dim]. - # pad64: Pads width and height with zeros to make them multiples of 64. - # If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales - # up before padding. IMAGE_MAX_DIM is ignored in this mode. - # The multiple of 64 is needed to ensure smooth scaling of feature - # maps up and down the 6 levels of the FPN pyramid (2**6=64). - # crop: Picks random crops from the image. First, scales the image based - # on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of - # size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only. - # IMAGE_MAX_DIM is not used in this mode. - IMAGE_RESIZE_MODE = "square" - IMAGE_MIN_DIM = 800 - IMAGE_MAX_DIM = 1024 - # Minimum scaling ratio. Checked after MIN_IMAGE_DIM and can force further - # up scaling. For example, if set to 2 then images are scaled up to double - # the width and height, or more, even if MIN_IMAGE_DIM doesn't require it. - # However, in 'square' mode, it can be overruled by IMAGE_MAX_DIM. - IMAGE_MIN_SCALE = 0 - # Number of color channels per image. RGB = 3, grayscale = 1, RGB-D = 4 - # Changing this requires other changes in the code. See the WIKI for more - # details: https://github.com/matterport/Mask_RCNN/wiki - IMAGE_CHANNEL_COUNT = 3 - - # Image mean (RGB) - MEAN_PIXEL = np.array([123.7, 116.8, 103.9]) - - # Number of ROIs per image to feed to classifier/mask heads - # The Mask RCNN paper uses 512 but often the RPN doesn't generate - # enough positive proposals to fill this and keep a positive:negative - # ratio of 1:3. You can increase the number of proposals by adjusting - # the RPN NMS threshold. - TRAIN_ROIS_PER_IMAGE = 200 - - # Percent of positive ROIs used to train classifier/mask heads - ROI_POSITIVE_RATIO = 0.33 - - # Pooled ROIs - POOL_SIZE = 7 - MASK_POOL_SIZE = 14 - - # Shape of output mask - # To change this you also need to change the neural network mask branch - MASK_SHAPE = [28, 28] - - # Maximum number of ground truth instances to use in one image - MAX_GT_INSTANCES = 100 - - # Bounding box refinement standard deviation for RPN and final detections. - RPN_BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) - BBOX_STD_DEV = np.array([0.1, 0.1, 0.2, 0.2]) - - # Max number of final detections - DETECTION_MAX_INSTANCES = 100 - - # Minimum probability value to accept a detected instance - # ROIs below this threshold are skipped - DETECTION_MIN_CONFIDENCE = 0.7 - - # Non-maximum suppression threshold for detection - DETECTION_NMS_THRESHOLD = 0.3 - - # Learning rate and momentum - # The Mask RCNN paper uses lr=0.02, but on TensorFlow it causes - # weights to explode. Likely due to differences in optimizer - # implementation. - LEARNING_RATE = 0.001 - LEARNING_MOMENTUM = 0.9 - - # Weight decay regularization - WEIGHT_DECAY = 0.0001 - - # Loss weights for more precise optimization. - # Can be used for R-CNN training setup. - LOSS_WEIGHTS = { - "rpn_class_loss": 1., - "rpn_bbox_loss": 1., - "mrcnn_class_loss": 1., - "mrcnn_bbox_loss": 1., - "mrcnn_mask_loss": 1. - } - - # Use RPN ROIs or externally generated ROIs for training - # Keep this True for most situations. Set to False if you want to train - # the head branches on ROI generated by code rather than the ROIs from - # the RPN. For example, to debug the classifier head without having to - # train the RPN. - USE_RPN_ROIS = True - - # Train or freeze batch normalization layers - # None: Train BN layers. This is the normal mode - # False: Freeze BN layers. Good when using a small batch size - # True: (don't use). Set layer in training mode even when predicting - TRAIN_BN = False # Defaulting to False since batch size is often small - - # Gradient norm clipping - GRADIENT_CLIP_NORM = 5.0 - - def __init__(self): - """Set values of computed attributes.""" - # Effective batch size - self.BATCH_SIZE = self.IMAGES_PER_GPU * self.GPU_COUNT - - # Input image size - if self.IMAGE_RESIZE_MODE == "crop": - self.IMAGE_SHAPE = np.array([self.IMAGE_MIN_DIM, self.IMAGE_MIN_DIM, - self.IMAGE_CHANNEL_COUNT]) - else: - self.IMAGE_SHAPE = np.array([self.IMAGE_MAX_DIM, self.IMAGE_MAX_DIM, - self.IMAGE_CHANNEL_COUNT]) - - # Image meta data length - # See compose_image_meta() for details - self.IMAGE_META_SIZE = 1 + 3 + 3 + 4 + 1 + self.NUM_CLASSES - - def display(self): - """Display Configuration values.""" - print("\nConfigurations:") - for a in dir(self): - if not a.startswith("__") and not callable(getattr(self, a)): - print("{:30} {}".format(a, getattr(self, a))) - print("\n") -""" -Mask R-CNN -The main Mask R-CNN model implementation. - -Copyright (c) 2017 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla -""" - -import os -import random -import datetime -import re -import math -import logging -from collections import OrderedDict -import multiprocessing -import numpy as np -import tensorflow as tf -import keras -import keras.backend as K -import keras.layers as KL -import keras.engine as KE -import keras.models as KM - -from mrcnn import utils - -# Requires TensorFlow 1.3+ and Keras 2.0.8+. -from distutils.version import LooseVersion -assert LooseVersion(tf.__version__) >= LooseVersion("1.3") -assert LooseVersion(keras.__version__) >= LooseVersion('2.0.8') - - -############################################################ -# Utility Functions -############################################################ - -def log(text, array=None): - """Prints a text message. And, optionally, if a Numpy array is provided it - prints it's shape, min, and max values. - """ - if array is not None: - text = text.ljust(25) - text += ("shape: {:20} ".format(str(array.shape))) - if array.size: - text += ("min: {:10.5f} max: {:10.5f}".format(array.min(), array.max())) - else: - text += ("min: {:10} max: {:10}".format("", "")) - text += " {}".format(array.dtype) - print(text) - - -class BatchNorm(KL.BatchNormalization): - """Extends the Keras BatchNormalization class to allow a central place - to make changes if needed. - - Batch normalization has a negative effect on training if batches are small - so this layer is often frozen (via setting in Config class) and functions - as linear layer. - """ - - def call(self, inputs, training=None): - """ - Note about training values: - None: Train BN layers. This is the normal mode - False: Freeze BN layers. Good when batch size is small - True: (don't use). Set layer in training mode even when making inferences - """ - return super(self.__class__, self).call(inputs, training=training) - - -def compute_backbone_shapes(config, image_shape): - """Computes the width and height of each stage of the backbone network. - - Returns: - [N, (height, width)]. Where N is the number of stages - """ - if callable(config.BACKBONE): - return config.COMPUTE_BACKBONE_SHAPE(image_shape) - - # Currently supports ResNet only - assert config.BACKBONE in ["resnet50", "resnet101"] - return np.array( - [[int(math.ceil(image_shape[0] / stride)), - int(math.ceil(image_shape[1] / stride))] - for stride in config.BACKBONE_STRIDES]) - - -############################################################ -# Resnet Graph -############################################################ - -# Code adopted from: -# https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py - -def identity_block(input_tensor, kernel_size, filters, stage, block, - use_bias=True, train_bn=True): - """The identity_block is the block that has no conv layer at shortcut - # Arguments - input_tensor: input tensor - kernel_size: default 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - use_bias: Boolean. To use or not use a bias in conv layers. - train_bn: Boolean. Train or freeze Batch Norm layers - """ - nb_filter1, nb_filter2, nb_filter3 = filters - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = KL.Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a', - use_bias=use_bias)(input_tensor) - x = BatchNorm(name=bn_name_base + '2a')(x, training=train_bn) - x = KL.Activation('relu')(x) - - x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', - name=conv_name_base + '2b', use_bias=use_bias)(x) - x = BatchNorm(name=bn_name_base + '2b')(x, training=train_bn) - x = KL.Activation('relu')(x) - - x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + '2c', - use_bias=use_bias)(x) - x = BatchNorm(name=bn_name_base + '2c')(x, training=train_bn) - - x = KL.Add()([x, input_tensor]) - x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) - return x - - -def conv_block(input_tensor, kernel_size, filters, stage, block, - strides=(2, 2), use_bias=True, train_bn=True): - """conv_block is the block that has a conv layer at shortcut - # Arguments - input_tensor: input tensor - kernel_size: default 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - use_bias: Boolean. To use or not use a bias in conv layers. - train_bn: Boolean. Train or freeze Batch Norm layers - Note that from stage 3, the first conv layer at main path is with subsample=(2,2) - And the shortcut should have subsample=(2,2) as well - """ - nb_filter1, nb_filter2, nb_filter3 = filters - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = KL.Conv2D(nb_filter1, (1, 1), strides=strides, - name=conv_name_base + '2a', use_bias=use_bias)(input_tensor) - x = BatchNorm(name=bn_name_base + '2a')(x, training=train_bn) - x = KL.Activation('relu')(x) - - x = KL.Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', - name=conv_name_base + '2b', use_bias=use_bias)(x) - x = BatchNorm(name=bn_name_base + '2b')(x, training=train_bn) - x = KL.Activation('relu')(x) - - x = KL.Conv2D(nb_filter3, (1, 1), name=conv_name_base + - '2c', use_bias=use_bias)(x) - x = BatchNorm(name=bn_name_base + '2c')(x, training=train_bn) - - shortcut = KL.Conv2D(nb_filter3, (1, 1), strides=strides, - name=conv_name_base + '1', use_bias=use_bias)(input_tensor) - shortcut = BatchNorm(name=bn_name_base + '1')(shortcut, training=train_bn) - - x = KL.Add()([x, shortcut]) - x = KL.Activation('relu', name='res' + str(stage) + block + '_out')(x) - return x - - -def resnet_graph(input_image, architecture, stage5=False, train_bn=True): - """Build a ResNet graph. - architecture: Can be resnet50 or resnet101 - stage5: Boolean. If False, stage5 of the network is not created - train_bn: Boolean. Train or freeze Batch Norm layers - """ - assert architecture in ["resnet50", "resnet101"] - # Stage 1 - x = KL.ZeroPadding2D((3, 3))(input_image) - x = KL.Conv2D(64, (7, 7), strides=(2, 2), name='conv1', use_bias=True)(x) - x = BatchNorm(name='bn_conv1')(x, training=train_bn) - x = KL.Activation('relu')(x) - C1 = x = KL.MaxPooling2D((3, 3), strides=(2, 2), padding="same")(x) - # Stage 2 - x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', - strides=(1, 1), train_bn=train_bn) - x = identity_block(x, 3, [64, 64, 256], stage=2, - block='b', train_bn=train_bn) - C2 = x = identity_block(x, 3, [64, 64, 256], - stage=2, block='c', train_bn=train_bn) - # Stage 3 - x = conv_block(x, 3, [128, 128, 512], stage=3, - block='a', train_bn=train_bn) - x = identity_block(x, 3, [128, 128, 512], stage=3, - block='b', train_bn=train_bn) - x = identity_block(x, 3, [128, 128, 512], stage=3, - block='c', train_bn=train_bn) - C3 = x = identity_block( - x, 3, [128, 128, 512], stage=3, block='d', train_bn=train_bn) - # Stage 4 - x = conv_block(x, 3, [256, 256, 1024], stage=4, - block='a', train_bn=train_bn) - block_count = {"resnet50": 5, "resnet101": 22}[architecture] - for i in range(block_count): - x = identity_block(x, 3, [256, 256, 1024], - stage=4, block=chr(98 + i), train_bn=train_bn) - C4 = x - # Stage 5 - if stage5: - x = conv_block(x, 3, [512, 512, 2048], stage=5, - block='a', train_bn=train_bn) - x = identity_block(x, 3, [512, 512, 2048], - stage=5, block='b', train_bn=train_bn) - C5 = x = identity_block( - x, 3, [512, 512, 2048], stage=5, block='c', train_bn=train_bn) - else: - C5 = None - return [C1, C2, C3, C4, C5] - - -############################################################ -# Proposal Layer -############################################################ - -def apply_box_deltas_graph(boxes, deltas): - """Applies the given deltas to the given boxes. - boxes: [N, (y1, x1, y2, x2)] boxes to update - deltas: [N, (dy, dx, log(dh), log(dw))] refinements to apply - """ - # Convert to y, x, h, w - height = boxes[:, 2] - boxes[:, 0] - width = boxes[:, 3] - boxes[:, 1] - center_y = boxes[:, 0] + 0.5 * height - center_x = boxes[:, 1] + 0.5 * width - # Apply deltas - center_y += deltas[:, 0] * height - center_x += deltas[:, 1] * width - height *= tf.exp(deltas[:, 2]) - width *= tf.exp(deltas[:, 3]) - # Convert back to y1, x1, y2, x2 - y1 = center_y - 0.5 * height - x1 = center_x - 0.5 * width - y2 = y1 + height - x2 = x1 + width - result = tf.stack([y1, x1, y2, x2], axis=1, name="apply_box_deltas_out") - return result - - -def clip_boxes_graph(boxes, window): - """ - boxes: [N, (y1, x1, y2, x2)] - window: [4] in the form y1, x1, y2, x2 - """ - # Split - wy1, wx1, wy2, wx2 = tf.split(window, 4) - y1, x1, y2, x2 = tf.split(boxes, 4, axis=1) - # Clip - y1 = tf.maximum(tf.minimum(y1, wy2), wy1) - x1 = tf.maximum(tf.minimum(x1, wx2), wx1) - y2 = tf.maximum(tf.minimum(y2, wy2), wy1) - x2 = tf.maximum(tf.minimum(x2, wx2), wx1) - clipped = tf.concat([y1, x1, y2, x2], axis=1, name="clipped_boxes") - clipped.set_shape((clipped.shape[0], 4)) - return clipped - - -class ProposalLayer(KE.Layer): - """Receives anchor scores and selects a subset to pass as proposals - to the second stage. Filtering is done based on anchor scores and - non-max suppression to remove overlaps. It also applies bounding - box refinement deltas to anchors. - - Inputs: - rpn_probs: [batch, num_anchors, (bg prob, fg prob)] - rpn_bbox: [batch, num_anchors, (dy, dx, log(dh), log(dw))] - anchors: [batch, num_anchors, (y1, x1, y2, x2)] anchors in normalized coordinates - - Returns: - Proposals in normalized coordinates [batch, rois, (y1, x1, y2, x2)] - """ - - def __init__(self, proposal_count, nms_threshold, config=None, **kwargs): - super(ProposalLayer, self).__init__(**kwargs) - self.config = config - self.proposal_count = proposal_count - self.nms_threshold = nms_threshold - - def call(self, inputs): - # Box Scores. Use the foreground class confidence. [Batch, num_rois, 1] - scores = inputs[0][:, :, 1] - # Box deltas [batch, num_rois, 4] - deltas = inputs[1] - deltas = deltas * np.reshape(self.config.RPN_BBOX_STD_DEV, [1, 1, 4]) - # Anchors - anchors = inputs[2] - - # Improve performance by trimming to top anchors by score - # and doing the rest on the smaller subset. - pre_nms_limit = tf.minimum( - self.config.PRE_NMS_LIMIT, tf.shape(anchors)[1]) - ix = tf.nn.top_k(scores, pre_nms_limit, sorted=True, - name="top_anchors").indices - scores = utils.batch_slice([scores, ix], lambda x, y: tf.gather(x, y), - self.config.IMAGES_PER_GPU) - deltas = utils.batch_slice([deltas, ix], lambda x, y: tf.gather(x, y), - self.config.IMAGES_PER_GPU) - pre_nms_anchors = utils.batch_slice([anchors, ix], lambda a, x: tf.gather(a, x), - self.config.IMAGES_PER_GPU, - names=["pre_nms_anchors"]) - - # Apply deltas to anchors to get refined anchors. - # [batch, N, (y1, x1, y2, x2)] - boxes = utils.batch_slice([pre_nms_anchors, deltas], - lambda x, y: apply_box_deltas_graph(x, y), - self.config.IMAGES_PER_GPU, - names=["refined_anchors"]) - - # Clip to image boundaries. Since we're in normalized coordinates, - # clip to 0..1 range. [batch, N, (y1, x1, y2, x2)] - window = np.array([0, 0, 1, 1], dtype=np.float32) - boxes = utils.batch_slice(boxes, - lambda x: clip_boxes_graph(x, window), - self.config.IMAGES_PER_GPU, - names=["refined_anchors_clipped"]) - - # Filter out small boxes - # According to Xinlei Chen's paper, this reduces detection accuracy - # for small objects, so we're skipping it. - - # Non-max suppression - def nms(boxes, scores): - indices = tf.image.non_max_suppression( - boxes, scores, self.proposal_count, - self.nms_threshold, name="rpn_non_max_suppression") - proposals = tf.gather(boxes, indices) - # Pad if needed - padding = tf.maximum(self.proposal_count - - tf.shape(proposals)[0], 0) - proposals = tf.pad(proposals, [(0, padding), (0, 0)]) - return proposals - proposals = utils.batch_slice([boxes, scores], nms, - self.config.IMAGES_PER_GPU) - return proposals - - def compute_output_shape(self, input_shape): - return (None, self.proposal_count, 4) - - -############################################################ -# ROIAlign Layer -############################################################ - -def log2_graph(x): - """Implementation of Log2. TF doesn't have a native implementation.""" - return tf.log(x) / tf.log(2.0) - - -class PyramidROIAlign(KE.Layer): - """Implements ROI Pooling on multiple levels of the feature pyramid. - - Params: - - pool_shape: [pool_height, pool_width] of the output pooled regions. Usually [7, 7] - - Inputs: - - boxes: [batch, num_boxes, (y1, x1, y2, x2)] in normalized - coordinates. Possibly padded with zeros if not enough - boxes to fill the array. - - image_meta: [batch, (meta data)] Image details. See compose_image_meta() - - feature_maps: List of feature maps from different levels of the pyramid. - Each is [batch, height, width, channels] - - Output: - Pooled regions in the shape: [batch, num_boxes, pool_height, pool_width, channels]. - The width and height are those specific in the pool_shape in the layer - constructor. - """ - - def __init__(self, pool_shape, **kwargs): - super(PyramidROIAlign, self).__init__(**kwargs) - self.pool_shape = tuple(pool_shape) - - def call(self, inputs): - # Crop boxes [batch, num_boxes, (y1, x1, y2, x2)] in normalized coords - boxes = inputs[0] - - # Image meta - # Holds details about the image. See compose_image_meta() - image_meta = inputs[1] - - # Feature Maps. List of feature maps from different level of the - # feature pyramid. Each is [batch, height, width, channels] - feature_maps = inputs[2:] - - # Assign each ROI to a level in the pyramid based on the ROI area. - y1, x1, y2, x2 = tf.split(boxes, 4, axis=2) - h = y2 - y1 - w = x2 - x1 - # Use shape of first image. Images in a batch must have the same size. - image_shape = parse_image_meta_graph(image_meta)['image_shape'][0] - # Equation 1 in the Feature Pyramid Networks paper. Account for - # the fact that our coordinates are normalized here. - # e.g. a 224x224 ROI (in pixels) maps to P4 - image_area = tf.cast(image_shape[0] * image_shape[1], tf.float32) - roi_level = log2_graph(tf.sqrt(h * w) / (224.0 / tf.sqrt(image_area))) - roi_level = tf.minimum(5, tf.maximum( - 2, 4 + tf.cast(tf.round(roi_level), tf.int32))) - roi_level = tf.squeeze(roi_level, 2) - - # Loop through levels and apply ROI pooling to each. P2 to P5. - pooled = [] - box_to_level = [] - for i, level in enumerate(range(2, 6)): - ix = tf.where(tf.equal(roi_level, level)) - level_boxes = tf.gather_nd(boxes, ix) - - # Box indices for crop_and_resize. - box_indices = tf.cast(ix[:, 0], tf.int32) - - # Keep track of which box is mapped to which level - box_to_level.append(ix) - - # Stop gradient propogation to ROI proposals - level_boxes = tf.stop_gradient(level_boxes) - box_indices = tf.stop_gradient(box_indices) - - # Crop and Resize - # From Mask R-CNN paper: "We sample four regular locations, so - # that we can evaluate either max or average pooling. In fact, - # interpolating only a single value at each bin center (without - # pooling) is nearly as effective." - # - # Here we use the simplified approach of a single value per bin, - # which is how it's done in tf.crop_and_resize() - # Result: [batch * num_boxes, pool_height, pool_width, channels] - pooled.append(tf.image.crop_and_resize( - feature_maps[i], level_boxes, box_indices, self.pool_shape, - method="bilinear")) - - # Pack pooled features into one tensor - pooled = tf.concat(pooled, axis=0) - - # Pack box_to_level mapping into one array and add another - # column representing the order of pooled boxes - box_to_level = tf.concat(box_to_level, axis=0) - box_range = tf.expand_dims(tf.range(tf.shape(box_to_level)[0]), 1) - box_to_level = tf.concat([tf.cast(box_to_level, tf.int32), box_range], - axis=1) - - # Rearrange pooled features to match the order of the original boxes - # Sort box_to_level by batch then box index - # TF doesn't have a way to sort by two columns, so merge them and sort. - sorting_tensor = box_to_level[:, 0] * 100000 + box_to_level[:, 1] - ix = tf.nn.top_k(sorting_tensor, k=tf.shape( - box_to_level)[0]).indices[::-1] - ix = tf.gather(box_to_level[:, 2], ix) - pooled = tf.gather(pooled, ix) - - # Re-add the batch dimension - shape = tf.concat([tf.shape(boxes)[:2], tf.shape(pooled)[1:]], axis=0) - pooled = tf.reshape(pooled, shape) - return pooled - - def compute_output_shape(self, input_shape): - return input_shape[0][:2] + self.pool_shape + (input_shape[2][-1], ) - - -############################################################ -# Detection Target Layer -############################################################ - -def overlaps_graph(boxes1, boxes2): - """Computes IoU overlaps between two sets of boxes. - boxes1, boxes2: [N, (y1, x1, y2, x2)]. - """ - # 1. Tile boxes2 and repeat boxes1. This allows us to compare - # every boxes1 against every boxes2 without loops. - # TF doesn't have an equivalent to np.repeat() so simulate it - # using tf.tile() and tf.reshape. - b1 = tf.reshape(tf.tile(tf.expand_dims(boxes1, 1), - [1, 1, tf.shape(boxes2)[0]]), [-1, 4]) - b2 = tf.tile(boxes2, [tf.shape(boxes1)[0], 1]) - # 2. Compute intersections - b1_y1, b1_x1, b1_y2, b1_x2 = tf.split(b1, 4, axis=1) - b2_y1, b2_x1, b2_y2, b2_x2 = tf.split(b2, 4, axis=1) - y1 = tf.maximum(b1_y1, b2_y1) - x1 = tf.maximum(b1_x1, b2_x1) - y2 = tf.minimum(b1_y2, b2_y2) - x2 = tf.minimum(b1_x2, b2_x2) - intersection = tf.maximum(x2 - x1, 0) * tf.maximum(y2 - y1, 0) - # 3. Compute unions - b1_area = (b1_y2 - b1_y1) * (b1_x2 - b1_x1) - b2_area = (b2_y2 - b2_y1) * (b2_x2 - b2_x1) - union = b1_area + b2_area - intersection - # 4. Compute IoU and reshape to [boxes1, boxes2] - iou = intersection / union - overlaps = tf.reshape(iou, [tf.shape(boxes1)[0], tf.shape(boxes2)[0]]) - return overlaps - - -def detection_targets_graph(proposals, gt_class_ids, gt_boxes, gt_masks, config): - """Generates detection targets for one image. Subsamples proposals and - generates target class IDs, bounding box deltas, and masks for each. - - Inputs: - proposals: [POST_NMS_ROIS_TRAINING, (y1, x1, y2, x2)] in normalized coordinates. Might - be zero padded if there are not enough proposals. - gt_class_ids: [MAX_GT_INSTANCES] int class IDs - gt_boxes: [MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized coordinates. - gt_masks: [height, width, MAX_GT_INSTANCES] of boolean type. - - Returns: Target ROIs and corresponding class IDs, bounding box shifts, - and masks. - rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized coordinates - class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. Zero padded. - deltas: [TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw))] - masks: [TRAIN_ROIS_PER_IMAGE, height, width]. Masks cropped to bbox - boundaries and resized to neural network output size. - - Note: Returned arrays might be zero padded if not enough target ROIs. - """ - # Assertions - asserts = [ - tf.Assert(tf.greater(tf.shape(proposals)[0], 0), [proposals], - name="roi_assertion"), - ] - with tf.control_dependencies(asserts): - proposals = tf.identity(proposals) - - # Remove zero padding - proposals, _ = trim_zeros_graph(proposals, name="trim_proposals") - gt_boxes, non_zeros = trim_zeros_graph(gt_boxes, name="trim_gt_boxes") - gt_class_ids = tf.boolean_mask(gt_class_ids, non_zeros, - name="trim_gt_class_ids") - gt_masks = tf.gather(gt_masks, tf.where(non_zeros)[:, 0], axis=2, - name="trim_gt_masks") - - # Handle COCO crowds - # A crowd box in COCO is a bounding box around several instances. Exclude - # them from training. A crowd box is given a negative class ID. - crowd_ix = tf.where(gt_class_ids < 0)[:, 0] - non_crowd_ix = tf.where(gt_class_ids > 0)[:, 0] - crowd_boxes = tf.gather(gt_boxes, crowd_ix) - gt_class_ids = tf.gather(gt_class_ids, non_crowd_ix) - gt_boxes = tf.gather(gt_boxes, non_crowd_ix) - gt_masks = tf.gather(gt_masks, non_crowd_ix, axis=2) - - # Compute overlaps matrix [proposals, gt_boxes] - overlaps = overlaps_graph(proposals, gt_boxes) - - # Compute overlaps with crowd boxes [proposals, crowd_boxes] - crowd_overlaps = overlaps_graph(proposals, crowd_boxes) - crowd_iou_max = tf.reduce_max(crowd_overlaps, axis=1) - no_crowd_bool = (crowd_iou_max < 0.001) - - # Determine positive and negative ROIs - roi_iou_max = tf.reduce_max(overlaps, axis=1) - # 1. Positive ROIs are those with >= 0.5 IoU with a GT box - positive_roi_bool = (roi_iou_max >= 0.5) - positive_indices = tf.where(positive_roi_bool)[:, 0] - # 2. Negative ROIs are those with < 0.5 with every GT box. Skip crowds. - negative_indices = tf.where(tf.logical_and( - roi_iou_max < 0.5, no_crowd_bool))[:, 0] - - # Subsample ROIs. Aim for 33% positive - # Positive ROIs - positive_count = int(config.TRAIN_ROIS_PER_IMAGE * - config.ROI_POSITIVE_RATIO) - positive_indices = tf.random_shuffle(positive_indices)[:positive_count] - positive_count = tf.shape(positive_indices)[0] - # Negative ROIs. Add enough to maintain positive:negative ratio. - r = 1.0 / config.ROI_POSITIVE_RATIO - negative_count = tf.cast( - r * tf.cast(positive_count, tf.float32), tf.int32) - positive_count - negative_indices = tf.random_shuffle(negative_indices)[:negative_count] - # Gather selected ROIs - positive_rois = tf.gather(proposals, positive_indices) - negative_rois = tf.gather(proposals, negative_indices) - - # Assign positive ROIs to GT boxes. - positive_overlaps = tf.gather(overlaps, positive_indices) - roi_gt_box_assignment = tf.cond( - tf.greater(tf.shape(positive_overlaps)[1], 0), - true_fn=lambda: tf.argmax(positive_overlaps, axis=1), - false_fn=lambda: tf.cast(tf.constant([]), tf.int64) - ) - roi_gt_boxes = tf.gather(gt_boxes, roi_gt_box_assignment) - roi_gt_class_ids = tf.gather(gt_class_ids, roi_gt_box_assignment) - - # Compute bbox refinement for positive ROIs - deltas = utils.box_refinement_graph(positive_rois, roi_gt_boxes) - deltas /= config.BBOX_STD_DEV - - # Assign positive ROIs to GT masks - # Permute masks to [N, height, width, 1] - transposed_masks = tf.expand_dims(tf.transpose(gt_masks, [2, 0, 1]), -1) - # Pick the right mask for each ROI - roi_masks = tf.gather(transposed_masks, roi_gt_box_assignment) - - # Compute mask targets - boxes = positive_rois - if config.USE_MINI_MASK: - # Transform ROI coordinates from normalized image space - # to normalized mini-mask space. - y1, x1, y2, x2 = tf.split(positive_rois, 4, axis=1) - gt_y1, gt_x1, gt_y2, gt_x2 = tf.split(roi_gt_boxes, 4, axis=1) - gt_h = gt_y2 - gt_y1 - gt_w = gt_x2 - gt_x1 - y1 = (y1 - gt_y1) / gt_h - x1 = (x1 - gt_x1) / gt_w - y2 = (y2 - gt_y1) / gt_h - x2 = (x2 - gt_x1) / gt_w - boxes = tf.concat([y1, x1, y2, x2], 1) - box_ids = tf.range(0, tf.shape(roi_masks)[0]) - masks = tf.image.crop_and_resize(tf.cast(roi_masks, tf.float32), boxes, - box_ids, - config.MASK_SHAPE) - # Remove the extra dimension from masks. - masks = tf.squeeze(masks, axis=3) - - # Threshold mask pixels at 0.5 to have GT masks be 0 or 1 to use with - # binary cross entropy loss. - masks = tf.round(masks) - - # Append negative ROIs and pad bbox deltas and masks that - # are not used for negative ROIs with zeros. - rois = tf.concat([positive_rois, negative_rois], axis=0) - N = tf.shape(negative_rois)[0] - P = tf.maximum(config.TRAIN_ROIS_PER_IMAGE - tf.shape(rois)[0], 0) - rois = tf.pad(rois, [(0, P), (0, 0)]) - roi_gt_boxes = tf.pad(roi_gt_boxes, [(0, N + P), (0, 0)]) - roi_gt_class_ids = tf.pad(roi_gt_class_ids, [(0, N + P)]) - deltas = tf.pad(deltas, [(0, N + P), (0, 0)]) - masks = tf.pad(masks, [[0, N + P], (0, 0), (0, 0)]) - - return rois, roi_gt_class_ids, deltas, masks - - -class DetectionTargetLayer(KE.Layer): - """Subsamples proposals and generates target box refinement, class_ids, - and masks for each. - - Inputs: - proposals: [batch, N, (y1, x1, y2, x2)] in normalized coordinates. Might - be zero padded if there are not enough proposals. - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs. - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in normalized - coordinates. - gt_masks: [batch, height, width, MAX_GT_INSTANCES] of boolean type - - Returns: Target ROIs and corresponding class IDs, bounding box shifts, - and masks. - rois: [batch, TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] in normalized - coordinates - target_class_ids: [batch, TRAIN_ROIS_PER_IMAGE]. Integer class IDs. - target_deltas: [batch, TRAIN_ROIS_PER_IMAGE, (dy, dx, log(dh), log(dw)] - target_mask: [batch, TRAIN_ROIS_PER_IMAGE, height, width] - Masks cropped to bbox boundaries and resized to neural - network output size. - - Note: Returned arrays might be zero padded if not enough target ROIs. - """ - - def __init__(self, config, **kwargs): - super(DetectionTargetLayer, self).__init__(**kwargs) - self.config = config - - def call(self, inputs): - proposals = inputs[0] - gt_class_ids = inputs[1] - gt_boxes = inputs[2] - gt_masks = inputs[3] - - # Slice the batch and run a graph for each slice - # TODO: Rename target_bbox to target_deltas for clarity - names = ["rois", "target_class_ids", "target_bbox", "target_mask"] - outputs = utils.batch_slice( - [proposals, gt_class_ids, gt_boxes, gt_masks], - lambda w, x, y, z: detection_targets_graph( - w, x, y, z, self.config), - self.config.IMAGES_PER_GPU, names=names) - return outputs - - def compute_output_shape(self, input_shape): - return [ - (None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # rois - (None, self.config.TRAIN_ROIS_PER_IMAGE), # class_ids - (None, self.config.TRAIN_ROIS_PER_IMAGE, 4), # deltas - (None, self.config.TRAIN_ROIS_PER_IMAGE, self.config.MASK_SHAPE[0], - self.config.MASK_SHAPE[1]) # masks - ] - - def compute_mask(self, inputs, mask=None): - return [None, None, None, None] - - -############################################################ -# Detection Layer -############################################################ - -def refine_detections_graph(rois, probs, deltas, window, config): - """Refine classified proposals and filter overlaps and return final - detections. - - Inputs: - rois: [N, (y1, x1, y2, x2)] in normalized coordinates - probs: [N, num_classes]. Class probabilities. - deltas: [N, num_classes, (dy, dx, log(dh), log(dw))]. Class-specific - bounding box deltas. - window: (y1, x1, y2, x2) in normalized coordinates. The part of the image - that contains the image excluding the padding. - - Returns detections shaped: [num_detections, (y1, x1, y2, x2, class_id, score)] where - coordinates are normalized. - """ - # Class IDs per ROI - class_ids = tf.argmax(probs, axis=1, output_type=tf.int32) - # Class probability of the top class of each ROI - indices = tf.stack([tf.range(probs.shape[0]), class_ids], axis=1) - class_scores = tf.gather_nd(probs, indices) - # Class-specific bounding box deltas - deltas_specific = tf.gather_nd(deltas, indices) - # Apply bounding box deltas - # Shape: [boxes, (y1, x1, y2, x2)] in normalized coordinates - refined_rois = apply_box_deltas_graph( - rois, deltas_specific * config.BBOX_STD_DEV) - # Clip boxes to image window - refined_rois = clip_boxes_graph(refined_rois, window) - - # TODO: Filter out boxes with zero area - - # Filter out background boxes - keep = tf.where(class_ids > 0)[:, 0] - # Filter out low confidence boxes - if config.DETECTION_MIN_CONFIDENCE: - conf_keep = tf.where( - class_scores >= config.DETECTION_MIN_CONFIDENCE)[:, 0] - keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), - tf.expand_dims(conf_keep, 0)) - keep = tf.sparse_tensor_to_dense(keep)[0] - - # Apply per-class NMS - # 1. Prepare variables - pre_nms_class_ids = tf.gather(class_ids, keep) - pre_nms_scores = tf.gather(class_scores, keep) - pre_nms_rois = tf.gather(refined_rois, keep) - unique_pre_nms_class_ids = tf.unique(pre_nms_class_ids)[0] - - def nms_keep_map(class_id): - """Apply Non-Maximum Suppression on ROIs of the given class.""" - # Indices of ROIs of the given class - ixs = tf.where(tf.equal(pre_nms_class_ids, class_id))[:, 0] - # Apply NMS - class_keep = tf.image.non_max_suppression( - tf.gather(pre_nms_rois, ixs), - tf.gather(pre_nms_scores, ixs), - max_output_size=config.DETECTION_MAX_INSTANCES, - iou_threshold=config.DETECTION_NMS_THRESHOLD) - # Map indices - class_keep = tf.gather(keep, tf.gather(ixs, class_keep)) - # Pad with -1 so returned tensors have the same shape - gap = config.DETECTION_MAX_INSTANCES - tf.shape(class_keep)[0] - class_keep = tf.pad(class_keep, [(0, gap)], - mode='CONSTANT', constant_values=-1) - # Set shape so map_fn() can infer result shape - class_keep.set_shape([config.DETECTION_MAX_INSTANCES]) - return class_keep - - # 2. Map over class IDs - nms_keep = tf.map_fn(nms_keep_map, unique_pre_nms_class_ids, - dtype=tf.int64) - # 3. Merge results into one list, and remove -1 padding - nms_keep = tf.reshape(nms_keep, [-1]) - nms_keep = tf.gather(nms_keep, tf.where(nms_keep > -1)[:, 0]) - # 4. Compute intersection between keep and nms_keep - keep = tf.sets.set_intersection(tf.expand_dims(keep, 0), - tf.expand_dims(nms_keep, 0)) - keep = tf.sparse_tensor_to_dense(keep)[0] - # Keep top detections - roi_count = config.DETECTION_MAX_INSTANCES - class_scores_keep = tf.gather(class_scores, keep) - num_keep = tf.minimum(tf.shape(class_scores_keep)[0], roi_count) - top_ids = tf.nn.top_k(class_scores_keep, k=num_keep, sorted=True)[1] - keep = tf.gather(keep, top_ids) - - # Arrange output as [N, (y1, x1, y2, x2, class_id, score)] - # Coordinates are normalized. - detections = tf.concat([ - tf.gather(refined_rois, keep), - tf.to_float(tf.gather(class_ids, keep))[..., tf.newaxis], - tf.gather(class_scores, keep)[..., tf.newaxis] - ], axis=1) - - # Pad with zeros if detections < DETECTION_MAX_INSTANCES - gap = config.DETECTION_MAX_INSTANCES - tf.shape(detections)[0] - detections = tf.pad(detections, [(0, gap), (0, 0)], "CONSTANT") - return detections - - -class DetectionLayer(KE.Layer): - """Takes classified proposal boxes and their bounding box deltas and - returns the final detection boxes. - - Returns: - [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] where - coordinates are normalized. - """ - - def __init__(self, config=None, **kwargs): - super(DetectionLayer, self).__init__(**kwargs) - self.config = config - - def call(self, inputs): - rois = inputs[0] - mrcnn_class = inputs[1] - mrcnn_bbox = inputs[2] - image_meta = inputs[3] - - # Get windows of images in normalized coordinates. Windows are the area - # in the image that excludes the padding. - # Use the shape of the first image in the batch to normalize the window - # because we know that all images get resized to the same size. - m = parse_image_meta_graph(image_meta) - image_shape = m['image_shape'][0] - window = norm_boxes_graph(m['window'], image_shape[:2]) - - # Run detection refinement graph on each item in the batch - detections_batch = utils.batch_slice( - [rois, mrcnn_class, mrcnn_bbox, window], - lambda x, y, w, z: refine_detections_graph( - x, y, w, z, self.config), - self.config.IMAGES_PER_GPU) - - # Reshape output - # [batch, num_detections, (y1, x1, y2, x2, class_id, class_score)] in - # normalized coordinates - return tf.reshape( - detections_batch, - [self.config.BATCH_SIZE, self.config.DETECTION_MAX_INSTANCES, 6]) - - def compute_output_shape(self, input_shape): - return (None, self.config.DETECTION_MAX_INSTANCES, 6) - - -############################################################ -# Region Proposal Network (RPN) -############################################################ - -def rpn_graph(feature_map, anchors_per_location, anchor_stride): - """Builds the computation graph of Region Proposal Network. - - feature_map: backbone features [batch, height, width, depth] - anchors_per_location: number of anchors per pixel in the feature map - anchor_stride: Controls the density of anchors. Typically 1 (anchors for - every pixel in the feature map), or 2 (every other pixel). - - Returns: - rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax) - rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities. - rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be - applied to anchors. - """ - # TODO: check if stride of 2 causes alignment issues if the feature map - # is not even. - # Shared convolutional base of the RPN - shared = KL.Conv2D(512, (3, 3), padding='same', activation='relu', - strides=anchor_stride, - name='rpn_conv_shared')(feature_map) - - # Anchor Score. [batch, height, width, anchors per location * 2]. - x = KL.Conv2D(2 * anchors_per_location, (1, 1), padding='valid', - activation='linear', name='rpn_class_raw')(shared) - - # Reshape to [batch, anchors, 2] - rpn_class_logits = KL.Lambda( - lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 2]))(x) - - # Softmax on last dimension of BG/FG. - rpn_probs = KL.Activation( - "softmax", name="rpn_class_xxx")(rpn_class_logits) - - # Bounding box refinement. [batch, H, W, anchors per location * depth] - # where depth is [x, y, log(w), log(h)] - x = KL.Conv2D(anchors_per_location * 4, (1, 1), padding="valid", - activation='linear', name='rpn_bbox_pred')(shared) - - # Reshape to [batch, anchors, 4] - rpn_bbox = KL.Lambda(lambda t: tf.reshape(t, [tf.shape(t)[0], -1, 4]))(x) - - return [rpn_class_logits, rpn_probs, rpn_bbox] - - -def build_rpn_model(anchor_stride, anchors_per_location, depth): - """Builds a Keras model of the Region Proposal Network. - It wraps the RPN graph so it can be used multiple times with shared - weights. - - anchors_per_location: number of anchors per pixel in the feature map - anchor_stride: Controls the density of anchors. Typically 1 (anchors for - every pixel in the feature map), or 2 (every other pixel). - depth: Depth of the backbone feature map. - - Returns a Keras Model object. The model outputs, when called, are: - rpn_class_logits: [batch, H * W * anchors_per_location, 2] Anchor classifier logits (before softmax) - rpn_probs: [batch, H * W * anchors_per_location, 2] Anchor classifier probabilities. - rpn_bbox: [batch, H * W * anchors_per_location, (dy, dx, log(dh), log(dw))] Deltas to be - applied to anchors. - """ - input_feature_map = KL.Input(shape=[None, None, depth], - name="input_rpn_feature_map") - outputs = rpn_graph(input_feature_map, anchors_per_location, anchor_stride) - return KM.Model([input_feature_map], outputs, name="rpn_model") - - -############################################################ -# Feature Pyramid Network Heads -############################################################ - -def fpn_classifier_graph(rois, feature_maps, image_meta, - pool_size, num_classes, train_bn=True, - fc_layers_size=1024): - """Builds the computation graph of the feature pyramid network classifier - and regressor heads. - - rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized - coordinates. - feature_maps: List of feature maps from different layers of the pyramid, - [P2, P3, P4, P5]. Each has a different resolution. - image_meta: [batch, (meta data)] Image details. See compose_image_meta() - pool_size: The width of the square feature map generated from ROI Pooling. - num_classes: number of classes, which determines the depth of the results - train_bn: Boolean. Train or freeze Batch Norm layers - fc_layers_size: Size of the 2 FC layers - - Returns: - logits: [batch, num_rois, NUM_CLASSES] classifier logits (before softmax) - probs: [batch, num_rois, NUM_CLASSES] classifier probabilities - bbox_deltas: [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] Deltas to apply to - proposal boxes - """ - # ROI Pooling - # Shape: [batch, num_rois, POOL_SIZE, POOL_SIZE, channels] - x = PyramidROIAlign([pool_size, pool_size], - name="roi_align_classifier")([rois, image_meta] + feature_maps) - # Two 1024 FC layers (implemented with Conv2D for consistency) - x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (pool_size, pool_size), padding="valid"), - name="mrcnn_class_conv1")(x) - x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn1')( - x, training=train_bn) - x = KL.Activation('relu')(x) - x = KL.TimeDistributed(KL.Conv2D(fc_layers_size, (1, 1)), - name="mrcnn_class_conv2")(x) - x = KL.TimeDistributed(BatchNorm(), name='mrcnn_class_bn2')( - x, training=train_bn) - x = KL.Activation('relu')(x) - - shared = KL.Lambda(lambda x: K.squeeze(K.squeeze(x, 3), 2), - name="pool_squeeze")(x) - - # Classifier head - mrcnn_class_logits = KL.TimeDistributed(KL.Dense(num_classes), - name='mrcnn_class_logits')(shared) - mrcnn_probs = KL.TimeDistributed(KL.Activation("softmax"), - name="mrcnn_class")(mrcnn_class_logits) - - # BBox head - # [batch, num_rois, NUM_CLASSES * (dy, dx, log(dh), log(dw))] - x = KL.TimeDistributed(KL.Dense(num_classes * 4, activation='linear'), - name='mrcnn_bbox_fc')(shared) - # Reshape to [batch, num_rois, NUM_CLASSES, (dy, dx, log(dh), log(dw))] - s = K.int_shape(x) - mrcnn_bbox = KL.Reshape((s[1], num_classes, 4), name="mrcnn_bbox")(x) - - return mrcnn_class_logits, mrcnn_probs, mrcnn_bbox - - -def build_fpn_mask_graph(rois, feature_maps, image_meta, - pool_size, num_classes, train_bn=True): - """Builds the computation graph of the mask head of Feature Pyramid Network. - - rois: [batch, num_rois, (y1, x1, y2, x2)] Proposal boxes in normalized - coordinates. - feature_maps: List of feature maps from different layers of the pyramid, - [P2, P3, P4, P5]. Each has a different resolution. - image_meta: [batch, (meta data)] Image details. See compose_image_meta() - pool_size: The width of the square feature map generated from ROI Pooling. - num_classes: number of classes, which determines the depth of the results - train_bn: Boolean. Train or freeze Batch Norm layers - - Returns: Masks [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, NUM_CLASSES] - """ - # ROI Pooling - # Shape: [batch, num_rois, MASK_POOL_SIZE, MASK_POOL_SIZE, channels] - x = PyramidROIAlign([pool_size, pool_size], - name="roi_align_mask")([rois, image_meta] + feature_maps) - - # Conv layers - x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), - name="mrcnn_mask_conv1")(x) - x = KL.TimeDistributed(BatchNorm(), - name='mrcnn_mask_bn1')(x, training=train_bn) - x = KL.Activation('relu')(x) - - x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), - name="mrcnn_mask_conv2")(x) - x = KL.TimeDistributed(BatchNorm(), - name='mrcnn_mask_bn2')(x, training=train_bn) - x = KL.Activation('relu')(x) - - x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), - name="mrcnn_mask_conv3")(x) - x = KL.TimeDistributed(BatchNorm(), - name='mrcnn_mask_bn3')(x, training=train_bn) - x = KL.Activation('relu')(x) - - x = KL.TimeDistributed(KL.Conv2D(256, (3, 3), padding="same"), - name="mrcnn_mask_conv4")(x) - x = KL.TimeDistributed(BatchNorm(), - name='mrcnn_mask_bn4')(x, training=train_bn) - x = KL.Activation('relu')(x) - - x = KL.TimeDistributed(KL.Conv2DTranspose(256, (2, 2), strides=2, activation="relu"), - name="mrcnn_mask_deconv")(x) - x = KL.TimeDistributed(KL.Conv2D(num_classes, (1, 1), strides=1, activation="sigmoid"), - name="mrcnn_mask")(x) - return x - - -############################################################ -# Loss Functions -############################################################ - -def smooth_l1_loss(y_true, y_pred): - """Implements Smooth-L1 loss. - y_true and y_pred are typically: [N, 4], but could be any shape. - """ - diff = K.abs(y_true - y_pred) - less_than_one = K.cast(K.less(diff, 1.0), "float32") - loss = (less_than_one * 0.5 * diff**2) + (1 - less_than_one) * (diff - 0.5) - return loss - - -def rpn_class_loss_graph(rpn_match, rpn_class_logits): - """RPN anchor classifier loss. - - rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, - -1=negative, 0=neutral anchor. - rpn_class_logits: [batch, anchors, 2]. RPN classifier logits for BG/FG. - """ - # Squeeze last dim to simplify - rpn_match = tf.squeeze(rpn_match, -1) - # Get anchor classes. Convert the -1/+1 match to 0/1 values. - anchor_class = K.cast(K.equal(rpn_match, 1), tf.int32) - # Positive and Negative anchors contribute to the loss, - # but neutral anchors (match value = 0) don't. - indices = tf.where(K.not_equal(rpn_match, 0)) - # Pick rows that contribute to the loss and filter out the rest. - rpn_class_logits = tf.gather_nd(rpn_class_logits, indices) - anchor_class = tf.gather_nd(anchor_class, indices) - # Cross entropy loss - loss = K.sparse_categorical_crossentropy(target=anchor_class, - output=rpn_class_logits, - from_logits=True) - loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) - return loss - - -def rpn_bbox_loss_graph(config, target_bbox, rpn_match, rpn_bbox): - """Return the RPN bounding box loss graph. - - config: the model config object. - target_bbox: [batch, max positive anchors, (dy, dx, log(dh), log(dw))]. - Uses 0 padding to fill in unsed bbox deltas. - rpn_match: [batch, anchors, 1]. Anchor match type. 1=positive, - -1=negative, 0=neutral anchor. - rpn_bbox: [batch, anchors, (dy, dx, log(dh), log(dw))] - """ - # Positive anchors contribute to the loss, but negative and - # neutral anchors (match value of 0 or -1) don't. - rpn_match = K.squeeze(rpn_match, -1) - indices = tf.where(K.equal(rpn_match, 1)) - - # Pick bbox deltas that contribute to the loss - rpn_bbox = tf.gather_nd(rpn_bbox, indices) - - # Trim target bounding box deltas to the same length as rpn_bbox. - batch_counts = K.sum(K.cast(K.equal(rpn_match, 1), tf.int32), axis=1) - target_bbox = batch_pack_graph(target_bbox, batch_counts, - config.IMAGES_PER_GPU) - - loss = smooth_l1_loss(target_bbox, rpn_bbox) - - loss = K.switch(tf.size(loss) > 0, K.mean(loss), tf.constant(0.0)) - return loss - - -def mrcnn_class_loss_graph(target_class_ids, pred_class_logits, - active_class_ids): - """Loss for the classifier head of Mask RCNN. - - target_class_ids: [batch, num_rois]. Integer class IDs. Uses zero - padding to fill in the array. - pred_class_logits: [batch, num_rois, num_classes] - active_class_ids: [batch, num_classes]. Has a value of 1 for - classes that are in the dataset of the image, and 0 - for classes that are not in the dataset. - """ - # During model building, Keras calls this function with - # target_class_ids of type float32. Unclear why. Cast it - # to int to get around it. - target_class_ids = tf.cast(target_class_ids, 'int64') - - # Find predictions of classes that are not in the dataset. - pred_class_ids = tf.argmax(pred_class_logits, axis=2) - # TODO: Update this line to work with batch > 1. Right now it assumes all - # images in a batch have the same active_class_ids - pred_active = tf.gather(active_class_ids[0], pred_class_ids) - - # Loss - loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=target_class_ids, logits=pred_class_logits) - - # Erase losses of predictions of classes that are not in the active - # classes of the image. - loss = loss * pred_active - - # Computer loss mean. Use only predictions that contribute - # to the loss to get a correct mean. - loss = tf.reduce_sum(loss) / tf.reduce_sum(pred_active) - return loss - - -def mrcnn_bbox_loss_graph(target_bbox, target_class_ids, pred_bbox): - """Loss for Mask R-CNN bounding box refinement. - - target_bbox: [batch, num_rois, (dy, dx, log(dh), log(dw))] - target_class_ids: [batch, num_rois]. Integer class IDs. - pred_bbox: [batch, num_rois, num_classes, (dy, dx, log(dh), log(dw))] - """ - # Reshape to merge batch and roi dimensions for simplicity. - target_class_ids = K.reshape(target_class_ids, (-1,)) - target_bbox = K.reshape(target_bbox, (-1, 4)) - pred_bbox = K.reshape(pred_bbox, (-1, K.int_shape(pred_bbox)[2], 4)) - - # Only positive ROIs contribute to the loss. And only - # the right class_id of each ROI. Get their indices. - positive_roi_ix = tf.where(target_class_ids > 0)[:, 0] - positive_roi_class_ids = tf.cast( - tf.gather(target_class_ids, positive_roi_ix), tf.int64) - indices = tf.stack([positive_roi_ix, positive_roi_class_ids], axis=1) - - # Gather the deltas (predicted and true) that contribute to loss - target_bbox = tf.gather(target_bbox, positive_roi_ix) - pred_bbox = tf.gather_nd(pred_bbox, indices) - - # Smooth-L1 Loss - loss = K.switch(tf.size(target_bbox) > 0, - smooth_l1_loss(y_true=target_bbox, y_pred=pred_bbox), - tf.constant(0.0)) - loss = K.mean(loss) - return loss - - -def mrcnn_mask_loss_graph(target_masks, target_class_ids, pred_masks): - """Mask binary cross-entropy loss for the masks head. - - target_masks: [batch, num_rois, height, width]. - A float32 tensor of values 0 or 1. Uses zero padding to fill array. - target_class_ids: [batch, num_rois]. Integer class IDs. Zero padded. - pred_masks: [batch, proposals, height, width, num_classes] float32 tensor - with values from 0 to 1. - """ - # Reshape for simplicity. Merge first two dimensions into one. - target_class_ids = K.reshape(target_class_ids, (-1,)) - mask_shape = tf.shape(target_masks) - target_masks = K.reshape(target_masks, (-1, mask_shape[2], mask_shape[3])) - pred_shape = tf.shape(pred_masks) - pred_masks = K.reshape(pred_masks, - (-1, pred_shape[2], pred_shape[3], pred_shape[4])) - # Permute predicted masks to [N, num_classes, height, width] - pred_masks = tf.transpose(pred_masks, [0, 3, 1, 2]) - - # Only positive ROIs contribute to the loss. And only - # the class specific mask of each ROI. - positive_ix = tf.where(target_class_ids > 0)[:, 0] - positive_class_ids = tf.cast( - tf.gather(target_class_ids, positive_ix), tf.int64) - indices = tf.stack([positive_ix, positive_class_ids], axis=1) - - # Gather the masks (predicted and true) that contribute to loss - y_true = tf.gather(target_masks, positive_ix) - y_pred = tf.gather_nd(pred_masks, indices) - - # Compute binary cross entropy. If no positive ROIs, then return 0. - # shape: [batch, roi, num_classes] - loss = K.switch(tf.size(y_true) > 0, - K.binary_crossentropy(target=y_true, output=y_pred), - tf.constant(0.0)) - loss = K.mean(loss) - return loss - - -############################################################ -# Data Generator -############################################################ - -def load_image_gt(dataset, config, image_id, augment=False, augmentation=None, - use_mini_mask=False): - """Load and return ground truth data for an image (image, mask, bounding boxes). - - augment: (deprecated. Use augmentation instead). If true, apply random - image augmentation. Currently, only horizontal flipping is offered. - augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. - For example, passing imgaug.augmenters.Fliplr(0.5) flips images - right/left 50% of the time. - use_mini_mask: If False, returns full-size masks that are the same height - and width as the original image. These can be big, for example - 1024x1024x100 (for 100 instances). Mini masks are smaller, typically, - 224x224 and are generated by extracting the bounding box of the - object and resizing it to MINI_MASK_SHAPE. - - Returns: - image: [height, width, 3] - shape: the original shape of the image before resizing and cropping. - class_ids: [instance_count] Integer class IDs - bbox: [instance_count, (y1, x1, y2, x2)] - mask: [height, width, instance_count]. The height and width are those - of the image unless use_mini_mask is True, in which case they are - defined in MINI_MASK_SHAPE. - """ - # Load image and mask - image = dataset.load_image(image_id) - mask, class_ids = dataset.load_mask(image_id) - original_shape = image.shape - image, window, scale, padding, crop = utils.resize_image( - image, - min_dim=config.IMAGE_MIN_DIM, - min_scale=config.IMAGE_MIN_SCALE, - max_dim=config.IMAGE_MAX_DIM, - mode=config.IMAGE_RESIZE_MODE) - mask = utils.resize_mask(mask, scale, padding, crop) - - # Random horizontal flips. - # TODO: will be removed in a future update in favor of augmentation - if augment: - logging.warning("'augment' is deprecated. Use 'augmentation' instead.") - if random.randint(0, 1): - image = np.fliplr(image) - mask = np.fliplr(mask) - - # Augmentation - # This requires the imgaug lib (https://github.com/aleju/imgaug) - if augmentation: - import imgaug - - # Augmenters that are safe to apply to masks - # Some, such as Affine, have settings that make them unsafe, so always - # test your augmentation on masks - MASK_AUGMENTERS = ["Sequential", "SomeOf", "OneOf", "Sometimes", - "Fliplr", "Flipud", "CropAndPad", - "Affine", "PiecewiseAffine"] - - def hook(images, augmenter, parents, default): - """Determines which augmenters to apply to masks.""" - return augmenter.__class__.__name__ in MASK_AUGMENTERS - - # Store shapes before augmentation to compare - image_shape = image.shape - mask_shape = mask.shape - # Make augmenters deterministic to apply similarly to images and masks - det = augmentation.to_deterministic() - image = det.augment_image(image) - # Change mask to np.uint8 because imgaug doesn't support np.bool - mask = det.augment_image(mask.astype(np.uint8), - hooks=imgaug.HooksImages(activator=hook)) - # Verify that shapes didn't change - assert image.shape == image_shape, "Augmentation shouldn't change image size" - assert mask.shape == mask_shape, "Augmentation shouldn't change mask size" - # Change mask back to bool - mask = mask.astype(np.bool) - - # Note that some boxes might be all zeros if the corresponding mask got cropped out. - # and here is to filter them out - _idx = np.sum(mask, axis=(0, 1)) > 0 - mask = mask[:, :, _idx] - class_ids = class_ids[_idx] - # Bounding boxes. Note that some boxes might be all zeros - # if the corresponding mask got cropped out. - # bbox: [num_instances, (y1, x1, y2, x2)] - bbox = utils.extract_bboxes(mask) - - # Active classes - # Different datasets have different classes, so track the - # classes supported in the dataset of this image. - active_class_ids = np.zeros([dataset.num_classes], dtype=np.int32) - source_class_ids = dataset.source_class_ids[dataset.image_info[image_id]["source"]] - active_class_ids[source_class_ids] = 1 - - # Resize masks to smaller size to reduce memory usage - if use_mini_mask: - mask = utils.minimize_mask(bbox, mask, config.MINI_MASK_SHAPE) - - # Image meta data - image_meta = compose_image_meta(image_id, original_shape, image.shape, - window, scale, active_class_ids) - - return image, image_meta, class_ids, bbox, mask - - -def build_detection_targets(rpn_rois, gt_class_ids, gt_boxes, gt_masks, config): - """Generate targets for training Stage 2 classifier and mask heads. - This is not used in normal training. It's useful for debugging or to train - the Mask RCNN heads without using the RPN head. - - Inputs: - rpn_rois: [N, (y1, x1, y2, x2)] proposal boxes. - gt_class_ids: [instance count] Integer class IDs - gt_boxes: [instance count, (y1, x1, y2, x2)] - gt_masks: [height, width, instance count] Ground truth masks. Can be full - size or mini-masks. - - Returns: - rois: [TRAIN_ROIS_PER_IMAGE, (y1, x1, y2, x2)] - class_ids: [TRAIN_ROIS_PER_IMAGE]. Integer class IDs. - bboxes: [TRAIN_ROIS_PER_IMAGE, NUM_CLASSES, (y, x, log(h), log(w))]. Class-specific - bbox refinements. - masks: [TRAIN_ROIS_PER_IMAGE, height, width, NUM_CLASSES). Class specific masks cropped - to bbox boundaries and resized to neural network output size. - """ - assert rpn_rois.shape[0] > 0 - assert gt_class_ids.dtype == np.int32, "Expected int but got {}".format( - gt_class_ids.dtype) - assert gt_boxes.dtype == np.int32, "Expected int but got {}".format( - gt_boxes.dtype) - assert gt_masks.dtype == np.bool_, "Expected bool but got {}".format( - gt_masks.dtype) - - # It's common to add GT Boxes to ROIs but we don't do that here because - # according to XinLei Chen's paper, it doesn't help. - - # Trim empty padding in gt_boxes and gt_masks parts - instance_ids = np.where(gt_class_ids > 0)[0] - assert instance_ids.shape[0] > 0, "Image must contain instances." - gt_class_ids = gt_class_ids[instance_ids] - gt_boxes = gt_boxes[instance_ids] - gt_masks = gt_masks[:, :, instance_ids] - - # Compute areas of ROIs and ground truth boxes. - rpn_roi_area = (rpn_rois[:, 2] - rpn_rois[:, 0]) * \ - (rpn_rois[:, 3] - rpn_rois[:, 1]) - gt_box_area = (gt_boxes[:, 2] - gt_boxes[:, 0]) * \ - (gt_boxes[:, 3] - gt_boxes[:, 1]) - - # Compute overlaps [rpn_rois, gt_boxes] - overlaps = np.zeros((rpn_rois.shape[0], gt_boxes.shape[0])) - for i in range(overlaps.shape[1]): - gt = gt_boxes[i] - overlaps[:, i] = utils.compute_iou( - gt, rpn_rois, gt_box_area[i], rpn_roi_area) - - # Assign ROIs to GT boxes - rpn_roi_iou_argmax = np.argmax(overlaps, axis=1) - rpn_roi_iou_max = overlaps[np.arange( - overlaps.shape[0]), rpn_roi_iou_argmax] - # GT box assigned to each ROI - rpn_roi_gt_boxes = gt_boxes[rpn_roi_iou_argmax] - rpn_roi_gt_class_ids = gt_class_ids[rpn_roi_iou_argmax] - - # Positive ROIs are those with >= 0.5 IoU with a GT box. - fg_ids = np.where(rpn_roi_iou_max > 0.5)[0] - - # Negative ROIs are those with max IoU 0.1-0.5 (hard example mining) - # TODO: To hard example mine or not to hard example mine, that's the question - # bg_ids = np.where((rpn_roi_iou_max >= 0.1) & (rpn_roi_iou_max < 0.5))[0] - bg_ids = np.where(rpn_roi_iou_max < 0.5)[0] - - # Subsample ROIs. Aim for 33% foreground. - # FG - fg_roi_count = int(config.TRAIN_ROIS_PER_IMAGE * config.ROI_POSITIVE_RATIO) - if fg_ids.shape[0] > fg_roi_count: - keep_fg_ids = np.random.choice(fg_ids, fg_roi_count, replace=False) - else: - keep_fg_ids = fg_ids - # BG - remaining = config.TRAIN_ROIS_PER_IMAGE - keep_fg_ids.shape[0] - if bg_ids.shape[0] > remaining: - keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) - else: - keep_bg_ids = bg_ids - # Combine indices of ROIs to keep - keep = np.concatenate([keep_fg_ids, keep_bg_ids]) - # Need more? - remaining = config.TRAIN_ROIS_PER_IMAGE - keep.shape[0] - if remaining > 0: - # Looks like we don't have enough samples to maintain the desired - # balance. Reduce requirements and fill in the rest. This is - # likely different from the Mask RCNN paper. - - # There is a small chance we have neither fg nor bg samples. - if keep.shape[0] == 0: - # Pick bg regions with easier IoU threshold - bg_ids = np.where(rpn_roi_iou_max < 0.5)[0] - assert bg_ids.shape[0] >= remaining - keep_bg_ids = np.random.choice(bg_ids, remaining, replace=False) - assert keep_bg_ids.shape[0] == remaining - keep = np.concatenate([keep, keep_bg_ids]) - else: - # Fill the rest with repeated bg rois. - keep_extra_ids = np.random.choice( - keep_bg_ids, remaining, replace=True) - keep = np.concatenate([keep, keep_extra_ids]) - assert keep.shape[0] == config.TRAIN_ROIS_PER_IMAGE, \ - "keep doesn't match ROI batch size {}, {}".format( - keep.shape[0], config.TRAIN_ROIS_PER_IMAGE) - - # Reset the gt boxes assigned to BG ROIs. - rpn_roi_gt_boxes[keep_bg_ids, :] = 0 - rpn_roi_gt_class_ids[keep_bg_ids] = 0 - - # For each kept ROI, assign a class_id, and for FG ROIs also add bbox refinement. - rois = rpn_rois[keep] - roi_gt_boxes = rpn_roi_gt_boxes[keep] - roi_gt_class_ids = rpn_roi_gt_class_ids[keep] - roi_gt_assignment = rpn_roi_iou_argmax[keep] - - # Class-aware bbox deltas. [y, x, log(h), log(w)] - bboxes = np.zeros((config.TRAIN_ROIS_PER_IMAGE, - config.NUM_CLASSES, 4), dtype=np.float32) - pos_ids = np.where(roi_gt_class_ids > 0)[0] - bboxes[pos_ids, roi_gt_class_ids[pos_ids]] = utils.box_refinement( - rois[pos_ids], roi_gt_boxes[pos_ids, :4]) - # Normalize bbox refinements - bboxes /= config.BBOX_STD_DEV - - # Generate class-specific target masks - masks = np.zeros((config.TRAIN_ROIS_PER_IMAGE, config.MASK_SHAPE[0], config.MASK_SHAPE[1], config.NUM_CLASSES), - dtype=np.float32) - for i in pos_ids: - class_id = roi_gt_class_ids[i] - assert class_id > 0, "class id must be greater than 0" - gt_id = roi_gt_assignment[i] - class_mask = gt_masks[:, :, gt_id] - - if config.USE_MINI_MASK: - # Create a mask placeholder, the size of the image - placeholder = np.zeros(config.IMAGE_SHAPE[:2], dtype=bool) - # GT box - gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[gt_id] - gt_w = gt_x2 - gt_x1 - gt_h = gt_y2 - gt_y1 - # Resize mini mask to size of GT box - placeholder[gt_y1:gt_y2, gt_x1:gt_x2] = \ - np.round(utils.resize(class_mask, (gt_h, gt_w))).astype(bool) - # Place the mini batch in the placeholder - class_mask = placeholder - - # Pick part of the mask and resize it - y1, x1, y2, x2 = rois[i].astype(np.int32) - m = class_mask[y1:y2, x1:x2] - mask = utils.resize(m, config.MASK_SHAPE) - masks[i, :, :, class_id] = mask - - return rois, roi_gt_class_ids, bboxes, masks - - -def build_rpn_targets(image_shape, anchors, gt_class_ids, gt_boxes, config): - """Given the anchors and GT boxes, compute overlaps and identify positive - anchors and deltas to refine them to match their corresponding GT boxes. - - anchors: [num_anchors, (y1, x1, y2, x2)] - gt_class_ids: [num_gt_boxes] Integer class IDs. - gt_boxes: [num_gt_boxes, (y1, x1, y2, x2)] - - Returns: - rpn_match: [N] (int32) matches between anchors and GT boxes. - 1 = positive anchor, -1 = negative anchor, 0 = neutral - rpn_bbox: [N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - """ - # RPN Match: 1 = positive anchor, -1 = negative anchor, 0 = neutral - rpn_match = np.zeros([anchors.shape[0]], dtype=np.int32) - # RPN bounding boxes: [max anchors per image, (dy, dx, log(dh), log(dw))] - rpn_bbox = np.zeros((config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4)) - - # Handle COCO crowds - # A crowd box in COCO is a bounding box around several instances. Exclude - # them from training. A crowd box is given a negative class ID. - crowd_ix = np.where(gt_class_ids < 0)[0] - if crowd_ix.shape[0] > 0: - # Filter out crowds from ground truth class IDs and boxes - non_crowd_ix = np.where(gt_class_ids > 0)[0] - crowd_boxes = gt_boxes[crowd_ix] - gt_class_ids = gt_class_ids[non_crowd_ix] - gt_boxes = gt_boxes[non_crowd_ix] - # Compute overlaps with crowd boxes [anchors, crowds] - crowd_overlaps = utils.compute_overlaps(anchors, crowd_boxes) - crowd_iou_max = np.amax(crowd_overlaps, axis=1) - no_crowd_bool = (crowd_iou_max < 0.001) - else: - # All anchors don't intersect a crowd - no_crowd_bool = np.ones([anchors.shape[0]], dtype=bool) - - # Compute overlaps [num_anchors, num_gt_boxes] - overlaps = utils.compute_overlaps(anchors, gt_boxes) - - # Match anchors to GT Boxes - # If an anchor overlaps a GT box with IoU >= 0.7 then it's positive. - # If an anchor overlaps a GT box with IoU < 0.3 then it's negative. - # Neutral anchors are those that don't match the conditions above, - # and they don't influence the loss function. - # However, don't keep any GT box unmatched (rare, but happens). Instead, - # match it to the closest anchor (even if its max IoU is < 0.3). - # - # 1. Set negative anchors first. They get overwritten below if a GT box is - # matched to them. Skip boxes in crowd areas. - anchor_iou_argmax = np.argmax(overlaps, axis=1) - anchor_iou_max = overlaps[np.arange(overlaps.shape[0]), anchor_iou_argmax] - rpn_match[(anchor_iou_max < 0.3) & (no_crowd_bool)] = -1 - # 2. Set an anchor for each GT box (regardless of IoU value). - # If multiple anchors have the same IoU match all of them - gt_iou_argmax = np.argwhere(overlaps == np.max(overlaps, axis=0))[:, 0] - rpn_match[gt_iou_argmax] = 1 - # 3. Set anchors with high overlap as positive. - rpn_match[anchor_iou_max >= 0.7] = 1 - - # Subsample to balance positive and negative anchors - # Don't let positives be more than half the anchors - ids = np.where(rpn_match == 1)[0] - extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE // 2) - if extra > 0: - # Reset the extra ones to neutral - ids = np.random.choice(ids, extra, replace=False) - rpn_match[ids] = 0 - # Same for negative proposals - ids = np.where(rpn_match == -1)[0] - extra = len(ids) - (config.RPN_TRAIN_ANCHORS_PER_IMAGE - - np.sum(rpn_match == 1)) - if extra > 0: - # Rest the extra ones to neutral - ids = np.random.choice(ids, extra, replace=False) - rpn_match[ids] = 0 - - # For positive anchors, compute shift and scale needed to transform them - # to match the corresponding GT boxes. - ids = np.where(rpn_match == 1)[0] - ix = 0 # index into rpn_bbox - # TODO: use box_refinement() rather than duplicating the code here - for i, a in zip(ids, anchors[ids]): - # Closest gt box (it might have IoU < 0.7) - gt = gt_boxes[anchor_iou_argmax[i]] - - # Convert coordinates to center plus width/height. - # GT Box - gt_h = gt[2] - gt[0] - gt_w = gt[3] - gt[1] - gt_center_y = gt[0] + 0.5 * gt_h - gt_center_x = gt[1] + 0.5 * gt_w - # Anchor - a_h = a[2] - a[0] - a_w = a[3] - a[1] - a_center_y = a[0] + 0.5 * a_h - a_center_x = a[1] + 0.5 * a_w - - # Compute the bbox refinement that the RPN should predict. - rpn_bbox[ix] = [ - (gt_center_y - a_center_y) / a_h, - (gt_center_x - a_center_x) / a_w, - np.log(gt_h / a_h), - np.log(gt_w / a_w), - ] - # Normalize - rpn_bbox[ix] /= config.RPN_BBOX_STD_DEV - ix += 1 - - return rpn_match, rpn_bbox - - -def generate_random_rois(image_shape, count, gt_class_ids, gt_boxes): - """Generates ROI proposals similar to what a region proposal network - would generate. - - image_shape: [Height, Width, Depth] - count: Number of ROIs to generate - gt_class_ids: [N] Integer ground truth class IDs - gt_boxes: [N, (y1, x1, y2, x2)] Ground truth boxes in pixels. - - Returns: [count, (y1, x1, y2, x2)] ROI boxes in pixels. - """ - # placeholder - rois = np.zeros((count, 4), dtype=np.int32) - - # Generate random ROIs around GT boxes (90% of count) - rois_per_box = int(0.9 * count / gt_boxes.shape[0]) - for i in range(gt_boxes.shape[0]): - gt_y1, gt_x1, gt_y2, gt_x2 = gt_boxes[i] - h = gt_y2 - gt_y1 - w = gt_x2 - gt_x1 - # random boundaries - r_y1 = max(gt_y1 - h, 0) - r_y2 = min(gt_y2 + h, image_shape[0]) - r_x1 = max(gt_x1 - w, 0) - r_x2 = min(gt_x2 + w, image_shape[1]) - - # To avoid generating boxes with zero area, we generate double what - # we need and filter out the extra. If we get fewer valid boxes - # than we need, we loop and try again. - while True: - y1y2 = np.random.randint(r_y1, r_y2, (rois_per_box * 2, 2)) - x1x2 = np.random.randint(r_x1, r_x2, (rois_per_box * 2, 2)) - # Filter out zero area boxes - threshold = 1 - y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >= - threshold][:rois_per_box] - x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >= - threshold][:rois_per_box] - if y1y2.shape[0] == rois_per_box and x1x2.shape[0] == rois_per_box: - break - - # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape - # into x1, y1, x2, y2 order - x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1) - y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1) - box_rois = np.hstack([y1, x1, y2, x2]) - rois[rois_per_box * i:rois_per_box * (i + 1)] = box_rois - - # Generate random ROIs anywhere in the image (10% of count) - remaining_count = count - (rois_per_box * gt_boxes.shape[0]) - # To avoid generating boxes with zero area, we generate double what - # we need and filter out the extra. If we get fewer valid boxes - # than we need, we loop and try again. - while True: - y1y2 = np.random.randint(0, image_shape[0], (remaining_count * 2, 2)) - x1x2 = np.random.randint(0, image_shape[1], (remaining_count * 2, 2)) - # Filter out zero area boxes - threshold = 1 - y1y2 = y1y2[np.abs(y1y2[:, 0] - y1y2[:, 1]) >= - threshold][:remaining_count] - x1x2 = x1x2[np.abs(x1x2[:, 0] - x1x2[:, 1]) >= - threshold][:remaining_count] - if y1y2.shape[0] == remaining_count and x1x2.shape[0] == remaining_count: - break - - # Sort on axis 1 to ensure x1 <= x2 and y1 <= y2 and then reshape - # into x1, y1, x2, y2 order - x1, x2 = np.split(np.sort(x1x2, axis=1), 2, axis=1) - y1, y2 = np.split(np.sort(y1y2, axis=1), 2, axis=1) - global_rois = np.hstack([y1, x1, y2, x2]) - rois[-remaining_count:] = global_rois - return rois - - -def data_generator(dataset, config, shuffle=True, augment=False, augmentation=None, - random_rois=0, batch_size=1, detection_targets=False, - no_augmentation_sources=None): - """A generator that returns images and corresponding target class ids, - bounding box deltas, and masks. - - dataset: The Dataset object to pick data from - config: The model config object - shuffle: If True, shuffles the samples before every epoch - augment: (deprecated. Use augmentation instead). If true, apply random - image augmentation. Currently, only horizontal flipping is offered. - augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) augmentation. - For example, passing imgaug.augmenters.Fliplr(0.5) flips images - right/left 50% of the time. - random_rois: If > 0 then generate proposals to be used to train the - network classifier and mask heads. Useful if training - the Mask RCNN part without the RPN. - batch_size: How many images to return in each call - detection_targets: If True, generate detection targets (class IDs, bbox - deltas, and masks). Typically for debugging or visualizations because - in trainig detection targets are generated by DetectionTargetLayer. - no_augmentation_sources: Optional. List of sources to exclude for - augmentation. A source is string that identifies a dataset and is - defined in the Dataset class. - - Returns a Python generator. Upon calling next() on it, the - generator returns two lists, inputs and outputs. The contents - of the lists differs depending on the received arguments: - inputs list: - - images: [batch, H, W, C] - - image_meta: [batch, (meta data)] Image details. See compose_image_meta() - - rpn_match: [batch, N] Integer (1=positive anchor, -1=negative, 0=neutral) - - rpn_bbox: [batch, N, (dy, dx, log(dh), log(dw))] Anchor bbox deltas. - - gt_class_ids: [batch, MAX_GT_INSTANCES] Integer class IDs - - gt_boxes: [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] - - gt_masks: [batch, height, width, MAX_GT_INSTANCES]. The height and width - are those of the image unless use_mini_mask is True, in which - case they are defined in MINI_MASK_SHAPE. - - outputs list: Usually empty in regular training. But if detection_targets - is True then the outputs list contains target class_ids, bbox deltas, - and masks. - """ - b = 0 # batch item index - image_index = -1 - image_ids = np.copy(dataset.image_ids) - error_count = 0 - no_augmentation_sources = no_augmentation_sources or [] - - # Anchors - # [anchor_count, (y1, x1, y2, x2)] - backbone_shapes = compute_backbone_shapes(config, config.IMAGE_SHAPE) - anchors = utils.generate_pyramid_anchors(config.RPN_ANCHOR_SCALES, - config.RPN_ANCHOR_RATIOS, - backbone_shapes, - config.BACKBONE_STRIDES, - config.RPN_ANCHOR_STRIDE) - - # Keras requires a generator to run indefinitely. - while True: - try: - # Increment index to pick next image. Shuffle if at the start of an epoch. - image_index = (image_index + 1) % len(image_ids) - if shuffle and image_index == 0: - np.random.shuffle(image_ids) - - # Get GT bounding boxes and masks for image. - image_id = image_ids[image_index] - - # If the image source is not to be augmented pass None as augmentation - if dataset.image_info[image_id]['source'] in no_augmentation_sources: - image, image_meta, gt_class_ids, gt_boxes, gt_masks = \ - load_image_gt(dataset, config, image_id, augment=augment, - augmentation=None, - use_mini_mask=config.USE_MINI_MASK) - else: - image, image_meta, gt_class_ids, gt_boxes, gt_masks = \ - load_image_gt(dataset, config, image_id, augment=augment, - augmentation=augmentation, - use_mini_mask=config.USE_MINI_MASK) - - # Skip images that have no instances. This can happen in cases - # where we train on a subset of classes and the image doesn't - # have any of the classes we care about. - if not np.any(gt_class_ids > 0): - continue - - # RPN Targets - rpn_match, rpn_bbox = build_rpn_targets(image.shape, anchors, - gt_class_ids, gt_boxes, config) - - # Mask R-CNN Targets - if random_rois: - rpn_rois = generate_random_rois( - image.shape, random_rois, gt_class_ids, gt_boxes) - if detection_targets: - rois, mrcnn_class_ids, mrcnn_bbox, mrcnn_mask =\ - build_detection_targets( - rpn_rois, gt_class_ids, gt_boxes, gt_masks, config) - - # Init batch arrays - if b == 0: - batch_image_meta = np.zeros( - (batch_size,) + image_meta.shape, dtype=image_meta.dtype) - batch_rpn_match = np.zeros( - [batch_size, anchors.shape[0], 1], dtype=rpn_match.dtype) - batch_rpn_bbox = np.zeros( - [batch_size, config.RPN_TRAIN_ANCHORS_PER_IMAGE, 4], dtype=rpn_bbox.dtype) - batch_images = np.zeros( - (batch_size,) + image.shape, dtype=np.float32) - batch_gt_class_ids = np.zeros( - (batch_size, config.MAX_GT_INSTANCES), dtype=np.int32) - batch_gt_boxes = np.zeros( - (batch_size, config.MAX_GT_INSTANCES, 4), dtype=np.int32) - batch_gt_masks = np.zeros( - (batch_size, gt_masks.shape[0], gt_masks.shape[1], - config.MAX_GT_INSTANCES), dtype=gt_masks.dtype) - if random_rois: - batch_rpn_rois = np.zeros( - (batch_size, rpn_rois.shape[0], 4), dtype=rpn_rois.dtype) - if detection_targets: - batch_rois = np.zeros( - (batch_size,) + rois.shape, dtype=rois.dtype) - batch_mrcnn_class_ids = np.zeros( - (batch_size,) + mrcnn_class_ids.shape, dtype=mrcnn_class_ids.dtype) - batch_mrcnn_bbox = np.zeros( - (batch_size,) + mrcnn_bbox.shape, dtype=mrcnn_bbox.dtype) - batch_mrcnn_mask = np.zeros( - (batch_size,) + mrcnn_mask.shape, dtype=mrcnn_mask.dtype) - - # If more instances than fits in the array, sub-sample from them. - if gt_boxes.shape[0] > config.MAX_GT_INSTANCES: - ids = np.random.choice( - np.arange(gt_boxes.shape[0]), config.MAX_GT_INSTANCES, replace=False) - gt_class_ids = gt_class_ids[ids] - gt_boxes = gt_boxes[ids] - gt_masks = gt_masks[:, :, ids] - - # Add to batch - batch_image_meta[b] = image_meta - batch_rpn_match[b] = rpn_match[:, np.newaxis] - batch_rpn_bbox[b] = rpn_bbox - batch_images[b] = mold_image(image.astype(np.float32), config) - batch_gt_class_ids[b, :gt_class_ids.shape[0]] = gt_class_ids - batch_gt_boxes[b, :gt_boxes.shape[0]] = gt_boxes - batch_gt_masks[b, :, :, :gt_masks.shape[-1]] = gt_masks - if random_rois: - batch_rpn_rois[b] = rpn_rois - if detection_targets: - batch_rois[b] = rois - batch_mrcnn_class_ids[b] = mrcnn_class_ids - batch_mrcnn_bbox[b] = mrcnn_bbox - batch_mrcnn_mask[b] = mrcnn_mask - b += 1 - - # Batch full? - if b >= batch_size: - inputs = [batch_images, batch_image_meta, batch_rpn_match, batch_rpn_bbox, - batch_gt_class_ids, batch_gt_boxes, batch_gt_masks] - outputs = [] - - if random_rois: - inputs.extend([batch_rpn_rois]) - if detection_targets: - inputs.extend([batch_rois]) - # Keras requires that output and targets have the same number of dimensions - batch_mrcnn_class_ids = np.expand_dims( - batch_mrcnn_class_ids, -1) - outputs.extend( - [batch_mrcnn_class_ids, batch_mrcnn_bbox, batch_mrcnn_mask]) - - yield inputs, outputs - - # start a new batch - b = 0 - except (GeneratorExit, KeyboardInterrupt): - raise - except: - # Log it and skip the image - logging.exception("Error processing image {}".format( - dataset.image_info[image_id])) - error_count += 1 - if error_count > 5: - raise - - -############################################################ -# MaskRCNN Class -############################################################ - -class MaskRCNN(): - """Encapsulates the Mask RCNN model functionality. - - The actual Keras model is in the keras_model property. - """ - - def __init__(self, mode, config, model_dir): - """ - mode: Either "training" or "inference" - config: A Sub-class of the Config class - model_dir: Directory to save training logs and trained weights - """ - assert mode in ['training', 'inference'] - self.mode = mode - self.config = config - self.model_dir = model_dir - self.set_log_dir() - self.keras_model = self.build(mode=mode, config=config) - - def build(self, mode, config): - """Build Mask R-CNN architecture. - input_shape: The shape of the input image. - mode: Either "training" or "inference". The inputs and - outputs of the model differ accordingly. - """ - assert mode in ['training', 'inference'] - - # Image size must be dividable by 2 multiple times - h, w = config.IMAGE_SHAPE[:2] - if h / 2**6 != int(h / 2**6) or w / 2**6 != int(w / 2**6): - raise Exception("Image size must be dividable by 2 at least 6 times " - "to avoid fractions when downscaling and upscaling." - "For example, use 256, 320, 384, 448, 512, ... etc. ") - - # Inputs - input_image = KL.Input( - shape=[None, None, config.IMAGE_SHAPE[2]], name="input_image") - input_image_meta = KL.Input(shape=[config.IMAGE_META_SIZE], - name="input_image_meta") - if mode == "training": - # RPN GT - input_rpn_match = KL.Input( - shape=[None, 1], name="input_rpn_match", dtype=tf.int32) - input_rpn_bbox = KL.Input( - shape=[None, 4], name="input_rpn_bbox", dtype=tf.float32) - - # Detection GT (class IDs, bounding boxes, and masks) - # 1. GT Class IDs (zero padded) - input_gt_class_ids = KL.Input( - shape=[None], name="input_gt_class_ids", dtype=tf.int32) - # 2. GT Boxes in pixels (zero padded) - # [batch, MAX_GT_INSTANCES, (y1, x1, y2, x2)] in image coordinates - input_gt_boxes = KL.Input( - shape=[None, 4], name="input_gt_boxes", dtype=tf.float32) - # Normalize coordinates - gt_boxes = KL.Lambda(lambda x: norm_boxes_graph( - x, K.shape(input_image)[1:3]))(input_gt_boxes) - # 3. GT Masks (zero padded) - # [batch, height, width, MAX_GT_INSTANCES] - if config.USE_MINI_MASK: - input_gt_masks = KL.Input( - shape=[config.MINI_MASK_SHAPE[0], - config.MINI_MASK_SHAPE[1], None], - name="input_gt_masks", dtype=bool) - else: - input_gt_masks = KL.Input( - shape=[config.IMAGE_SHAPE[0], config.IMAGE_SHAPE[1], None], - name="input_gt_masks", dtype=bool) - elif mode == "inference": - # Anchors in normalized coordinates - input_anchors = KL.Input(shape=[None, 4], name="input_anchors") - - # Build the shared convolutional layers. - # Bottom-up Layers - # Returns a list of the last layers of each stage, 5 in total. - # Don't create the thead (stage 5), so we pick the 4th item in the list. - if callable(config.BACKBONE): - _, C2, C3, C4, C5 = config.BACKBONE(input_image, stage5=True, - train_bn=config.TRAIN_BN) - else: - _, C2, C3, C4, C5 = resnet_graph(input_image, config.BACKBONE, - stage5=True, train_bn=config.TRAIN_BN) - # Top-down Layers - # TODO: add assert to varify feature map sizes match what's in config - P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, - (1, 1), name='fpn_c5p5')(C5) - P4 = KL.Add(name="fpn_p4add")([ - KL.UpSampling2D(size=(2, 2), name="fpn_p5upsampled")(P5), - KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c4p4')(C4)]) - P3 = KL.Add(name="fpn_p3add")([ - KL.UpSampling2D(size=(2, 2), name="fpn_p4upsampled")(P4), - KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c3p3')(C3)]) - P2 = KL.Add(name="fpn_p2add")([ - KL.UpSampling2D(size=(2, 2), name="fpn_p3upsampled")(P3), - KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (1, 1), name='fpn_c2p2')(C2)]) - # Attach 3x3 conv to all P layers to get the final feature maps. - P2 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), - padding="SAME", name="fpn_p2")(P2) - P3 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), - padding="SAME", name="fpn_p3")(P3) - P4 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), - padding="SAME", name="fpn_p4")(P4) - P5 = KL.Conv2D(config.TOP_DOWN_PYRAMID_SIZE, (3, 3), - padding="SAME", name="fpn_p5")(P5) - # P6 is used for the 5th anchor scale in RPN. Generated by - # subsampling from P5 with stride of 2. - P6 = KL.MaxPooling2D(pool_size=(1, 1), strides=2, name="fpn_p6")(P5) - - # Note that P6 is used in RPN, but not in the classifier heads. - rpn_feature_maps = [P2, P3, P4, P5, P6] - mrcnn_feature_maps = [P2, P3, P4, P5] - - # Anchors - if mode == "training": - anchors = self.get_anchors(config.IMAGE_SHAPE) - # Duplicate across the batch dimension because Keras requires it - # TODO: can this be optimized to avoid duplicating the anchors? - anchors = np.broadcast_to( - anchors, (config.BATCH_SIZE,) + anchors.shape) - # A hack to get around Keras's bad support for constants - anchors = KL.Lambda(lambda x: tf.Variable( - anchors), name="anchors")(input_image) - else: - anchors = input_anchors - - # RPN Model - rpn = build_rpn_model(config.RPN_ANCHOR_STRIDE, - len(config.RPN_ANCHOR_RATIOS), config.TOP_DOWN_PYRAMID_SIZE) - # Loop through pyramid layers - layer_outputs = [] # list of lists - for p in rpn_feature_maps: - layer_outputs.append(rpn([p])) - # Concatenate layer outputs - # Convert from list of lists of level outputs to list of lists - # of outputs across levels. - # e.g. [[a1, b1, c1], [a2, b2, c2]] => [[a1, a2], [b1, b2], [c1, c2]] - output_names = ["rpn_class_logits", "rpn_class", "rpn_bbox"] - outputs = list(zip(*layer_outputs)) - outputs = [KL.Concatenate(axis=1, name=n)(list(o)) - for o, n in zip(outputs, output_names)] - - rpn_class_logits, rpn_class, rpn_bbox = outputs - - # Generate proposals - # Proposals are [batch, N, (y1, x1, y2, x2)] in normalized coordinates - # and zero padded. - proposal_count = config.POST_NMS_ROIS_TRAINING if mode == "training"\ - else config.POST_NMS_ROIS_INFERENCE - rpn_rois = ProposalLayer( - proposal_count=proposal_count, - nms_threshold=config.RPN_NMS_THRESHOLD, - name="ROI", - config=config)([rpn_class, rpn_bbox, anchors]) - - if mode == "training": - # Class ID mask to mark class IDs supported by the dataset the image - # came from. - active_class_ids = KL.Lambda( - lambda x: parse_image_meta_graph(x)["active_class_ids"] - )(input_image_meta) - - if not config.USE_RPN_ROIS: - # Ignore predicted ROIs and use ROIs provided as an input. - input_rois = KL.Input(shape=[config.POST_NMS_ROIS_TRAINING, 4], - name="input_roi", dtype=np.int32) - # Normalize coordinates - target_rois = KL.Lambda(lambda x: norm_boxes_graph( - x, K.shape(input_image)[1:3]))(input_rois) - else: - target_rois = rpn_rois - - # Generate detection targets - # Subsamples proposals and generates target outputs for training - # Note that proposal class IDs, gt_boxes, and gt_masks are zero - # padded. Equally, returned rois and targets are zero padded. - rois, target_class_ids, target_bbox, target_mask =\ - DetectionTargetLayer(config, name="proposal_targets")([ - target_rois, input_gt_class_ids, gt_boxes, input_gt_masks]) - - # Network Heads - # TODO: verify that this handles zero padded ROIs - mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ - fpn_classifier_graph(rois, mrcnn_feature_maps, input_image_meta, - config.POOL_SIZE, config.NUM_CLASSES, - train_bn=config.TRAIN_BN, - fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE) - - mrcnn_mask = build_fpn_mask_graph(rois, mrcnn_feature_maps, - input_image_meta, - config.MASK_POOL_SIZE, - config.NUM_CLASSES, - train_bn=config.TRAIN_BN) - - # TODO: clean up (use tf.identify if necessary) - output_rois = KL.Lambda(lambda x: x * 1, name="output_rois")(rois) - - # Losses - rpn_class_loss = KL.Lambda(lambda x: rpn_class_loss_graph(*x), name="rpn_class_loss")( - [input_rpn_match, rpn_class_logits]) - rpn_bbox_loss = KL.Lambda(lambda x: rpn_bbox_loss_graph(config, *x), name="rpn_bbox_loss")( - [input_rpn_bbox, input_rpn_match, rpn_bbox]) - class_loss = KL.Lambda(lambda x: mrcnn_class_loss_graph(*x), name="mrcnn_class_loss")( - [target_class_ids, mrcnn_class_logits, active_class_ids]) - bbox_loss = KL.Lambda(lambda x: mrcnn_bbox_loss_graph(*x), name="mrcnn_bbox_loss")( - [target_bbox, target_class_ids, mrcnn_bbox]) - mask_loss = KL.Lambda(lambda x: mrcnn_mask_loss_graph(*x), name="mrcnn_mask_loss")( - [target_mask, target_class_ids, mrcnn_mask]) - - # Model - inputs = [input_image, input_image_meta, - input_rpn_match, input_rpn_bbox, input_gt_class_ids, input_gt_boxes, input_gt_masks] - if not config.USE_RPN_ROIS: - inputs.append(input_rois) - outputs = [rpn_class_logits, rpn_class, rpn_bbox, - mrcnn_class_logits, mrcnn_class, mrcnn_bbox, mrcnn_mask, - rpn_rois, output_rois, - rpn_class_loss, rpn_bbox_loss, class_loss, bbox_loss, mask_loss] - model = KM.Model(inputs, outputs, name='mask_rcnn') - else: - # Network Heads - # Proposal classifier and BBox regressor heads - mrcnn_class_logits, mrcnn_class, mrcnn_bbox =\ - fpn_classifier_graph(rpn_rois, mrcnn_feature_maps, input_image_meta, - config.POOL_SIZE, config.NUM_CLASSES, - train_bn=config.TRAIN_BN, - fc_layers_size=config.FPN_CLASSIF_FC_LAYERS_SIZE) - - # Detections - # output is [batch, num_detections, (y1, x1, y2, x2, class_id, score)] in - # normalized coordinates - detections = DetectionLayer(config, name="mrcnn_detection")( - [rpn_rois, mrcnn_class, mrcnn_bbox, input_image_meta]) - - # Create masks for detections - detection_boxes = KL.Lambda(lambda x: x[..., :4])(detections) - mrcnn_mask = build_fpn_mask_graph(detection_boxes, mrcnn_feature_maps, - input_image_meta, - config.MASK_POOL_SIZE, - config.NUM_CLASSES, - train_bn=config.TRAIN_BN) - - model = KM.Model([input_image, input_image_meta, input_anchors], - [detections, mrcnn_class, mrcnn_bbox, - mrcnn_mask, rpn_rois, rpn_class, rpn_bbox], - name='mask_rcnn') - - # Add multi-GPU support. - if config.GPU_COUNT > 1: - from mrcnn.parallel_model import ParallelModel - model = ParallelModel(model, config.GPU_COUNT) - - return model - - def find_last(self): - """Finds the last checkpoint file of the last trained model in the - model directory. - Returns: - The path of the last checkpoint file - """ - # Get directory names. Each directory corresponds to a model - dir_names = next(os.walk(self.model_dir))[1] - key = self.config.NAME.lower() - dir_names = filter(lambda f: f.startswith(key), dir_names) - dir_names = sorted(dir_names) - if not dir_names: - import errno - raise FileNotFoundError( - errno.ENOENT, - "Could not find model directory under {}".format(self.model_dir)) - # Pick last directory - dir_name = os.path.join(self.model_dir, dir_names[-1]) - # Find the last checkpoint - checkpoints = next(os.walk(dir_name))[2] - checkpoints = filter(lambda f: f.startswith("mask_rcnn"), checkpoints) - checkpoints = sorted(checkpoints) - if not checkpoints: - import errno - raise FileNotFoundError( - errno.ENOENT, "Could not find weight files in {}".format(dir_name)) - checkpoint = os.path.join(dir_name, checkpoints[-1]) - return checkpoint - - def load_weights(self, filepath, by_name=False, exclude=None): - """Modified version of the corresponding Keras function with - the addition of multi-GPU support and the ability to exclude - some layers from loading. - exclude: list of layer names to exclude - """ - import h5py - # Conditional import to support versions of Keras before 2.2 - # TODO: remove in about 6 months (end of 2018) - try: - from keras.engine import saving - except ImportError: - # Keras before 2.2 used the 'topology' namespace. - from keras.engine import topology as saving - - if exclude: - by_name = True - - if h5py is None: - raise ImportError('`load_weights` requires h5py.') - f = h5py.File(filepath, mode='r') - if 'layer_names' not in f.attrs and 'model_weights' in f: - f = f['model_weights'] - - # In multi-GPU training, we wrap the model. Get layers - # of the inner model because they have the weights. - keras_model = self.keras_model - layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ - else keras_model.layers - - # Exclude some layers - if exclude: - layers = filter(lambda l: l.name not in exclude, layers) - - if by_name: - saving.load_weights_from_hdf5_group_by_name(f, layers) - else: - saving.load_weights_from_hdf5_group(f, layers) - if hasattr(f, 'close'): - f.close() - - # Update the log directory - self.set_log_dir(filepath) - - def get_imagenet_weights(self): - """Downloads ImageNet trained weights from Keras. - Returns path to weights file. - """ - from keras.utils.data_utils import get_file - TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/'\ - 'releases/download/v0.2/'\ - 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' - weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='a268eb855778b3df3c7506639542a6af') - return weights_path - - def compile(self, learning_rate, momentum): - """Gets the model ready for training. Adds losses, regularization, and - metrics. Then calls the Keras compile() function. - """ - # Optimizer object - optimizer = keras.optimizers.SGD( - lr=learning_rate, momentum=momentum, - clipnorm=self.config.GRADIENT_CLIP_NORM) - # Add Losses - # First, clear previously set losses to avoid duplication - self.keras_model._losses = [] - self.keras_model._per_input_losses = {} - loss_names = [ - "rpn_class_loss", "rpn_bbox_loss", - "mrcnn_class_loss", "mrcnn_bbox_loss", "mrcnn_mask_loss"] - for name in loss_names: - layer = self.keras_model.get_layer(name) - if layer.output in self.keras_model.losses: - continue - loss = ( - tf.reduce_mean(layer.output, keepdims=True) - * self.config.LOSS_WEIGHTS.get(name, 1.)) - self.keras_model.add_loss(loss) - - # Add L2 Regularization - # Skip gamma and beta weights of batch normalization layers. - reg_losses = [ - keras.regularizers.l2(self.config.WEIGHT_DECAY)( - w) / tf.cast(tf.size(w), tf.float32) - for w in self.keras_model.trainable_weights - if 'gamma' not in w.name and 'beta' not in w.name] - self.keras_model.add_loss(tf.add_n(reg_losses)) - - # Compile - self.keras_model.compile( - optimizer=optimizer, - loss=[None] * len(self.keras_model.outputs)) - - # Add metrics for losses - for name in loss_names: - if name in self.keras_model.metrics_names: - continue - layer = self.keras_model.get_layer(name) - self.keras_model.metrics_names.append(name) - loss = ( - tf.reduce_mean(layer.output, keepdims=True) - * self.config.LOSS_WEIGHTS.get(name, 1.)) - self.keras_model.metrics_tensors.append(loss) - - def set_trainable(self, layer_regex, keras_model=None, indent=0, verbose=1): - """Sets model layers as trainable if their names match - the given regular expression. - """ - # Print message on the first call (but not on recursive calls) - if verbose > 0 and keras_model is None: - log("Selecting layers to train") - - keras_model = keras_model or self.keras_model - - # In multi-GPU training, we wrap the model. Get layers - # of the inner model because they have the weights. - layers = keras_model.inner_model.layers if hasattr(keras_model, "inner_model")\ - else keras_model.layers - - for layer in layers: - # Is the layer a model? - if layer.__class__.__name__ == 'Model': - print("In model: ", layer.name) - self.set_trainable( - layer_regex, keras_model=layer, indent=indent + 4) - continue - - if not layer.weights: - continue - # Is it trainable? - trainable = bool(re.fullmatch(layer_regex, layer.name)) - # Update layer. If layer is a container, update inner layer. - if layer.__class__.__name__ == 'TimeDistributed': - layer.layer.trainable = trainable - else: - layer.trainable = trainable - # Print trainable layer names - if trainable and verbose > 0: - log("{}{:20} ({})".format(" " * indent, layer.name, - layer.__class__.__name__)) - - def set_log_dir(self, model_path=None): - """Sets the model log directory and epoch counter. - - model_path: If None, or a format different from what this code uses - then set a new log directory and start epochs from 0. Otherwise, - extract the log directory and the epoch counter from the file - name. - """ - # Set date and epoch counter as if starting a new model - self.epoch = 0 - now = datetime.datetime.now() - - # If we have a model path with date and epochs use them - if model_path: - # Continue from we left of. Get epoch and date from the file name - # A sample model path might look like: - # \path\to\logs\coco20171029T2315\mask_rcnn_coco_0001.h5 (Windows) - # /path/to/logs/coco20171029T2315/mask_rcnn_coco_0001.h5 (Linux) - regex = r".*[/\\][\w-]+(\d{4})(\d{2})(\d{2})T(\d{2})(\d{2})[/\\]mask\_rcnn\_[\w-]+(\d{4})\.h5" - m = re.match(regex, model_path) - if m: - now = datetime.datetime(int(m.group(1)), int(m.group(2)), int(m.group(3)), - int(m.group(4)), int(m.group(5))) - # Epoch number in file is 1-based, and in Keras code it's 0-based. - # So, adjust for that then increment by one to start from the next epoch - self.epoch = int(m.group(6)) - 1 + 1 - print('Re-starting from epoch %d' % self.epoch) - - # Directory for training logs - self.log_dir = os.path.join(self.model_dir, "{}{:%Y%m%dT%H%M}".format( - self.config.NAME.lower(), now)) - - # Path to save after each epoch. Include placeholders that get filled by Keras. - self.checkpoint_path = os.path.join(self.log_dir, "mask_rcnn_{}_*epoch*.h5".format( - self.config.NAME.lower())) - self.checkpoint_path = self.checkpoint_path.replace( - "*epoch*", "{epoch:04d}") - - def train(self, train_dataset, val_dataset, learning_rate, epochs, layers, - augmentation=None, custom_callbacks=None, no_augmentation_sources=None): - """Train the model. - train_dataset, val_dataset: Training and validation Dataset objects. - learning_rate: The learning rate to train with - epochs: Number of training epochs. Note that previous training epochs - are considered to be done alreay, so this actually determines - the epochs to train in total rather than in this particaular - call. - layers: Allows selecting wich layers to train. It can be: - - A regular expression to match layer names to train - - One of these predefined values: - heads: The RPN, classifier and mask heads of the network - all: All the layers - 3+: Train Resnet stage 3 and up - 4+: Train Resnet stage 4 and up - 5+: Train Resnet stage 5 and up - augmentation: Optional. An imgaug (https://github.com/aleju/imgaug) - augmentation. For example, passing imgaug.augmenters.Fliplr(0.5) - flips images right/left 50% of the time. You can pass complex - augmentations as well. This augmentation applies 50% of the - time, and when it does it flips images right/left half the time - and adds a Gaussian blur with a random sigma in range 0 to 5. - - augmentation = imgaug.augmenters.Sometimes(0.5, [ - imgaug.augmenters.Fliplr(0.5), - imgaug.augmenters.GaussianBlur(sigma=(0.0, 5.0)) - ]) - custom_callbacks: Optional. Add custom callbacks to be called - with the keras fit_generator method. Must be list of type keras.callbacks. - no_augmentation_sources: Optional. List of sources to exclude for - augmentation. A source is string that identifies a dataset and is - defined in the Dataset class. - """ - assert self.mode == "training", "Create model in training mode." - - # Pre-defined layer regular expressions - layer_regex = { - # all layers but the backbone - "heads": r"(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", - # From a specific Resnet stage and up - "3+": r"(res3.*)|(bn3.*)|(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", - "4+": r"(res4.*)|(bn4.*)|(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", - "5+": r"(res5.*)|(bn5.*)|(mrcnn\_.*)|(rpn\_.*)|(fpn\_.*)", - # All layers - "all": ".*", - } - if layers in layer_regex.keys(): - layers = layer_regex[layers] - - # Data generators - train_generator = data_generator(train_dataset, self.config, shuffle=True, - augmentation=augmentation, - batch_size=self.config.BATCH_SIZE, - no_augmentation_sources=no_augmentation_sources) - val_generator = data_generator(val_dataset, self.config, shuffle=True, - batch_size=self.config.BATCH_SIZE) - - # Create log_dir if it does not exist - if not os.path.exists(self.log_dir): - os.makedirs(self.log_dir) - - # Callbacks - callbacks = [ - keras.callbacks.TensorBoard(log_dir=self.log_dir, - histogram_freq=0, write_graph=True, write_images=False), - keras.callbacks.ModelCheckpoint(self.checkpoint_path, - verbose=0, save_weights_only=True), - ] - - # Add custom callbacks to the list - if custom_callbacks: - callbacks += custom_callbacks - - # Train - log("\nStarting at epoch {}. LR={}\n".format(self.epoch, learning_rate)) - log("Checkpoint Path: {}".format(self.checkpoint_path)) - self.set_trainable(layers) - self.compile(learning_rate, self.config.LEARNING_MOMENTUM) - - # Work-around for Windows: Keras fails on Windows when using - # multiprocessing workers. See discussion here: - # https://github.com/matterport/Mask_RCNN/issues/13#issuecomment-353124009 - if os.name is 'nt': - workers = 0 - else: - workers = multiprocessing.cpu_count() - - self.keras_model.fit_generator( - train_generator, - initial_epoch=self.epoch, - epochs=epochs, - steps_per_epoch=self.config.STEPS_PER_EPOCH, - callbacks=callbacks, - validation_data=val_generator, - validation_steps=self.config.VALIDATION_STEPS, - max_queue_size=100, - workers=workers, - use_multiprocessing=True, - ) - self.epoch = max(self.epoch, epochs) - - def mold_inputs(self, images): - """Takes a list of images and modifies them to the format expected - as an input to the neural network. - images: List of image matrices [height,width,depth]. Images can have - different sizes. - - Returns 3 Numpy matrices: - molded_images: [N, h, w, 3]. Images resized and normalized. - image_metas: [N, length of meta data]. Details about each image. - windows: [N, (y1, x1, y2, x2)]. The portion of the image that has the - original image (padding excluded). - """ - molded_images = [] - image_metas = [] - windows = [] - for image in images: - # Resize image - # TODO: move resizing to mold_image() - molded_image, window, scale, padding, crop = utils.resize_image( - image, - min_dim=self.config.IMAGE_MIN_DIM, - min_scale=self.config.IMAGE_MIN_SCALE, - max_dim=self.config.IMAGE_MAX_DIM, - mode=self.config.IMAGE_RESIZE_MODE) - molded_image = mold_image(molded_image, self.config) - # Build image_meta - image_meta = compose_image_meta( - 0, image.shape, molded_image.shape, window, scale, - np.zeros([self.config.NUM_CLASSES], dtype=np.int32)) - # Append - molded_images.append(molded_image) - windows.append(window) - image_metas.append(image_meta) - # Pack into arrays - molded_images = np.stack(molded_images) - image_metas = np.stack(image_metas) - windows = np.stack(windows) - return molded_images, image_metas, windows - - def unmold_detections(self, detections, mrcnn_mask, original_image_shape, - image_shape, window): - """Reformats the detections of one image from the format of the neural - network output to a format suitable for use in the rest of the - application. - - detections: [N, (y1, x1, y2, x2, class_id, score)] in normalized coordinates - mrcnn_mask: [N, height, width, num_classes] - original_image_shape: [H, W, C] Original image shape before resizing - image_shape: [H, W, C] Shape of the image after resizing and padding - window: [y1, x1, y2, x2] Pixel coordinates of box in the image where the real - image is excluding the padding. - - Returns: - boxes: [N, (y1, x1, y2, x2)] Bounding boxes in pixels - class_ids: [N] Integer class IDs for each bounding box - scores: [N] Float probability scores of the class_id - masks: [height, width, num_instances] Instance masks - """ - # How many detections do we have? - # Detections array is padded with zeros. Find the first class_id == 0. - zero_ix = np.where(detections[:, 4] == 0)[0] - N = zero_ix[0] if zero_ix.shape[0] > 0 else detections.shape[0] - - # Extract boxes, class_ids, scores, and class-specific masks - boxes = detections[:N, :4] - class_ids = detections[:N, 4].astype(np.int32) - scores = detections[:N, 5] - masks = mrcnn_mask[np.arange(N), :, :, class_ids] - - # Translate normalized coordinates in the resized image to pixel - # coordinates in the original image before resizing - window = utils.norm_boxes(window, image_shape[:2]) - wy1, wx1, wy2, wx2 = window - shift = np.array([wy1, wx1, wy1, wx1]) - wh = wy2 - wy1 # window height - ww = wx2 - wx1 # window width - scale = np.array([wh, ww, wh, ww]) - # Convert boxes to normalized coordinates on the window - boxes = np.divide(boxes - shift, scale) - # Convert boxes to pixel coordinates on the original image - boxes = utils.denorm_boxes(boxes, original_image_shape[:2]) - - # Filter out detections with zero area. Happens in early training when - # network weights are still random - exclude_ix = np.where( - (boxes[:, 2] - boxes[:, 0]) * (boxes[:, 3] - boxes[:, 1]) <= 0)[0] - if exclude_ix.shape[0] > 0: - boxes = np.delete(boxes, exclude_ix, axis=0) - class_ids = np.delete(class_ids, exclude_ix, axis=0) - scores = np.delete(scores, exclude_ix, axis=0) - masks = np.delete(masks, exclude_ix, axis=0) - N = class_ids.shape[0] - - # Resize masks to original image size and set boundary threshold. - full_masks = [] - for i in range(N): - # Convert neural network mask to full size mask - full_mask = utils.unmold_mask( - masks[i], boxes[i], original_image_shape) - full_masks.append(full_mask) - full_masks = np.stack(full_masks, axis=-1)\ - if full_masks else np.empty(original_image_shape[:2] + (0,)) - - return boxes, class_ids, scores, full_masks - - def detect(self, images, verbose=0): - """Runs the detection pipeline. - - images: List of images, potentially of different sizes. - - Returns a list of dicts, one dict per image. The dict contains: - rois: [N, (y1, x1, y2, x2)] detection bounding boxes - class_ids: [N] int class IDs - scores: [N] float probability scores for the class IDs - masks: [H, W, N] instance binary masks - """ - assert self.mode == "inference", "Create model in inference mode." - assert len( - images) == self.config.BATCH_SIZE, "len(images) must be equal to BATCH_SIZE" - - if verbose: - log("Processing {} images".format(len(images))) - for image in images: - log("image", image) - - # Mold inputs to format expected by the neural network - molded_images, image_metas, windows = self.mold_inputs(images) - - # Validate image sizes - # All images in a batch MUST be of the same size - image_shape = molded_images[0].shape - for g in molded_images[1:]: - assert g.shape == image_shape,\ - "After resizing, all images must have the same size. Check IMAGE_RESIZE_MODE and image sizes." - - # Anchors - anchors = self.get_anchors(image_shape) - # Duplicate across the batch dimension because Keras requires it - # TODO: can this be optimized to avoid duplicating the anchors? - anchors = np.broadcast_to( - anchors, (self.config.BATCH_SIZE,) + anchors.shape) - - if verbose: - log("molded_images", molded_images) - log("image_metas", image_metas) - log("anchors", anchors) - # Run object detection - detections, _, _, mrcnn_mask, _, _, _ =\ - self.keras_model.predict( - [molded_images, image_metas, anchors], verbose=0) - # Process detections - results = [] - for i, image in enumerate(images): - final_rois, final_class_ids, final_scores, final_masks =\ - self.unmold_detections(detections[i], mrcnn_mask[i], - image.shape, molded_images[i].shape, - windows[i]) - results.append({ - "rois": final_rois, - "class_ids": final_class_ids, - "scores": final_scores, - "masks": final_masks, - }) - return results - - def detect_molded(self, molded_images, image_metas, verbose=0): - """Runs the detection pipeline, but expect inputs that are - molded already. Used mostly for debugging and inspecting - the model. - - molded_images: List of images loaded using load_image_gt() - image_metas: image meta data, also returned by load_image_gt() - - Returns a list of dicts, one dict per image. The dict contains: - rois: [N, (y1, x1, y2, x2)] detection bounding boxes - class_ids: [N] int class IDs - scores: [N] float probability scores for the class IDs - masks: [H, W, N] instance binary masks - """ - assert self.mode == "inference", "Create model in inference mode." - assert len(molded_images) == self.config.BATCH_SIZE,\ - "Number of images must be equal to BATCH_SIZE" - - if verbose: - log("Processing {} images".format(len(molded_images))) - for image in molded_images: - log("image", image) - - # Validate image sizes - # All images in a batch MUST be of the same size - image_shape = molded_images[0].shape - for g in molded_images[1:]: - assert g.shape == image_shape, "Images must have the same size" - - # Anchors - anchors = self.get_anchors(image_shape) - # Duplicate across the batch dimension because Keras requires it - # TODO: can this be optimized to avoid duplicating the anchors? - anchors = np.broadcast_to( - anchors, (self.config.BATCH_SIZE,) + anchors.shape) - - if verbose: - log("molded_images", molded_images) - log("image_metas", image_metas) - log("anchors", anchors) - # Run object detection - detections, _, _, mrcnn_mask, _, _, _ =\ - self.keras_model.predict( - [molded_images, image_metas, anchors], verbose=0) - # Process detections - results = [] - for i, image in enumerate(molded_images): - window = [0, 0, image.shape[0], image.shape[1]] - final_rois, final_class_ids, final_scores, final_masks =\ - self.unmold_detections(detections[i], mrcnn_mask[i], - image.shape, molded_images[i].shape, - window) - results.append({ - "rois": final_rois, - "class_ids": final_class_ids, - "scores": final_scores, - "masks": final_masks, - }) - return results - - def get_anchors(self, image_shape): - """Returns anchor pyramid for the given image size.""" - backbone_shapes = compute_backbone_shapes(self.config, image_shape) - # Cache anchors and reuse if image shape is the same - if not hasattr(self, "_anchor_cache"): - self._anchor_cache = {} - if not tuple(image_shape) in self._anchor_cache: - # Generate Anchors - a = utils.generate_pyramid_anchors( - self.config.RPN_ANCHOR_SCALES, - self.config.RPN_ANCHOR_RATIOS, - backbone_shapes, - self.config.BACKBONE_STRIDES, - self.config.RPN_ANCHOR_STRIDE) - # Keep a copy of the latest anchors in pixel coordinates because - # it's used in inspect_model notebooks. - # TODO: Remove this after the notebook are refactored to not use it - self.anchors = a - # Normalize coordinates - self._anchor_cache[tuple(image_shape)] = utils.norm_boxes( - a, image_shape[:2]) - return self._anchor_cache[tuple(image_shape)] - - def ancestor(self, tensor, name, checked=None): - """Finds the ancestor of a TF tensor in the computation graph. - tensor: TensorFlow symbolic tensor. - name: Name of ancestor tensor to find - checked: For internal use. A list of tensors that were already - searched to avoid loops in traversing the graph. - """ - checked = checked if checked is not None else [] - # Put a limit on how deep we go to avoid very long loops - if len(checked) > 500: - return None - # Convert name to a regex and allow matching a number prefix - # because Keras adds them automatically - if isinstance(name, str): - name = re.compile(name.replace("/", r"(\_\d+)*/")) - - parents = tensor.op.inputs - for p in parents: - if p in checked: - continue - if bool(re.fullmatch(name, p.name)): - return p - checked.append(p) - a = self.ancestor(p, name, checked) - if a is not None: - return a - return None - - def find_trainable_layer(self, layer): - """If a layer is encapsulated by another layer, this function - digs through the encapsulation and returns the layer that holds - the weights. - """ - if layer.__class__.__name__ == 'TimeDistributed': - return self.find_trainable_layer(layer.layer) - return layer - - def get_trainable_layers(self): - """Returns a list of layers that have weights.""" - layers = [] - # Loop through all layers - for l in self.keras_model.layers: - # If layer is a wrapper, find inner trainable layer - l = self.find_trainable_layer(l) - # Include layer if it has weights - if l.get_weights(): - layers.append(l) - return layers - - def run_graph(self, images, outputs, image_metas=None): - """Runs a sub-set of the computation graph that computes the given - outputs. - - image_metas: If provided, the images are assumed to be already - molded (i.e. resized, padded, and normalized) - - outputs: List of tuples (name, tensor) to compute. The tensors are - symbolic TensorFlow tensors and the names are for easy tracking. - - Returns an ordered dict of results. Keys are the names received in the - input and values are Numpy arrays. - """ - model = self.keras_model - - # Organize desired outputs into an ordered dict - outputs = OrderedDict(outputs) - for o in outputs.values(): - assert o is not None - - # Build a Keras function to run parts of the computation graph - inputs = model.inputs - if model.uses_learning_phase and not isinstance(K.learning_phase(), int): - inputs += [K.learning_phase()] - kf = K.function(model.inputs, list(outputs.values())) - - # Prepare inputs - if image_metas is None: - molded_images, image_metas, _ = self.mold_inputs(images) - else: - molded_images = images - image_shape = molded_images[0].shape - # Anchors - anchors = self.get_anchors(image_shape) - # Duplicate across the batch dimension because Keras requires it - # TODO: can this be optimized to avoid duplicating the anchors? - anchors = np.broadcast_to( - anchors, (self.config.BATCH_SIZE,) + anchors.shape) - model_in = [molded_images, image_metas, anchors] - - # Run inference - if model.uses_learning_phase and not isinstance(K.learning_phase(), int): - model_in.append(0.) - outputs_np = kf(model_in) - - # Pack the generated Numpy arrays into a a dict and log the results. - outputs_np = OrderedDict([(k, v) - for k, v in zip(outputs.keys(), outputs_np)]) - for k, v in outputs_np.items(): - log(k, v) - return outputs_np - - -############################################################ -# Data Formatting -############################################################ - -def compose_image_meta(image_id, original_image_shape, image_shape, - window, scale, active_class_ids): - """Takes attributes of an image and puts them in one 1D array. - - image_id: An int ID of the image. Useful for debugging. - original_image_shape: [H, W, C] before resizing or padding. - image_shape: [H, W, C] after resizing and padding - window: (y1, x1, y2, x2) in pixels. The area of the image where the real - image is (excluding the padding) - scale: The scaling factor applied to the original image (float32) - active_class_ids: List of class_ids available in the dataset from which - the image came. Useful if training on images from multiple datasets - where not all classes are present in all datasets. - """ - meta = np.array( - [image_id] + # size=1 - list(original_image_shape) + # size=3 - list(image_shape) + # size=3 - # size=4 (y1, x1, y2, x2) in image cooredinates - list(window) + - [scale] + # size=1 - list(active_class_ids) # size=num_classes - ) - return meta - - -def parse_image_meta(meta): - """Parses an array that contains image attributes to its components. - See compose_image_meta() for more details. - - meta: [batch, meta length] where meta length depends on NUM_CLASSES - - Returns a dict of the parsed values. - """ - image_id = meta[:, 0] - original_image_shape = meta[:, 1:4] - image_shape = meta[:, 4:7] - window = meta[:, 7:11] # (y1, x1, y2, x2) window of image in in pixels - scale = meta[:, 11] - active_class_ids = meta[:, 12:] - return { - "image_id": image_id.astype(np.int32), - "original_image_shape": original_image_shape.astype(np.int32), - "image_shape": image_shape.astype(np.int32), - "window": window.astype(np.int32), - "scale": scale.astype(np.float32), - "active_class_ids": active_class_ids.astype(np.int32), - } - - -def parse_image_meta_graph(meta): - """Parses a tensor that contains image attributes to its components. - See compose_image_meta() for more details. - - meta: [batch, meta length] where meta length depends on NUM_CLASSES - - Returns a dict of the parsed tensors. - """ - image_id = meta[:, 0] - original_image_shape = meta[:, 1:4] - image_shape = meta[:, 4:7] - window = meta[:, 7:11] # (y1, x1, y2, x2) window of image in in pixels - scale = meta[:, 11] - active_class_ids = meta[:, 12:] - return { - "image_id": image_id, - "original_image_shape": original_image_shape, - "image_shape": image_shape, - "window": window, - "scale": scale, - "active_class_ids": active_class_ids, - } - - -def mold_image(images, config): - """Expects an RGB image (or array of images) and subtracts - the mean pixel and converts it to float. Expects image - colors in RGB order. - """ - return images.astype(np.float32) - config.MEAN_PIXEL - - -def unmold_image(normalized_images, config): - """Takes a image normalized with mold() and returns the original.""" - return (normalized_images + config.MEAN_PIXEL).astype(np.uint8) - - -############################################################ -# Miscellenous Graph Functions -############################################################ - -def trim_zeros_graph(boxes, name='trim_zeros'): - """Often boxes are represented with matrices of shape [N, 4] and - are padded with zeros. This removes zero boxes. - - boxes: [N, 4] matrix of boxes. - non_zeros: [N] a 1D boolean mask identifying the rows to keep - """ - non_zeros = tf.cast(tf.reduce_sum(tf.abs(boxes), axis=1), tf.bool) - boxes = tf.boolean_mask(boxes, non_zeros, name=name) - return boxes, non_zeros - - -def batch_pack_graph(x, counts, num_rows): - """Picks different number of values from each row - in x depending on the values in counts. - """ - outputs = [] - for i in range(num_rows): - outputs.append(x[i, :counts[i]]) - return tf.concat(outputs, axis=0) - - -def norm_boxes_graph(boxes, shape): - """Converts boxes from pixel coordinates to normalized coordinates. - boxes: [..., (y1, x1, y2, x2)] in pixel coordinates - shape: [..., (height, width)] in pixels - - Note: In pixel coordinates (y2, x2) is outside the box. But in normalized - coordinates it's inside the box. - - Returns: - [..., (y1, x1, y2, x2)] in normalized coordinates - """ - h, w = tf.split(tf.cast(shape, tf.float32), 2) - scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0) - shift = tf.constant([0., 0., 1., 1.]) - return tf.divide(boxes - shift, scale) - - -def denorm_boxes_graph(boxes, shape): - """Converts boxes from normalized coordinates to pixel coordinates. - boxes: [..., (y1, x1, y2, x2)] in normalized coordinates - shape: [..., (height, width)] in pixels - - Note: In pixel coordinates (y2, x2) is outside the box. But in normalized - coordinates it's inside the box. - - Returns: - [..., (y1, x1, y2, x2)] in pixel coordinates - """ - h, w = tf.split(tf.cast(shape, tf.float32), 2) - scale = tf.concat([h, w, h, w], axis=-1) - tf.constant(1.0) - shift = tf.constant([0., 0., 1., 1.]) - return tf.cast(tf.round(tf.multiply(boxes, scale) + shift), tf.int32) -""" -Mask R-CNN -Multi-GPU Support for Keras. - -Copyright (c) 2017 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla - -Ideas and a small code snippets from these sources: -https://github.com/fchollet/keras/issues/2436 -https://medium.com/@kuza55/transparent-multi-gpu-training-on-tensorflow-with-keras-8b0016fd9012 -https://github.com/avolkov1/keras_experiments/blob/master/keras_exp/multigpu/ -https://github.com/fchollet/keras/blob/master/keras/utils/training_utils.py -""" - -import tensorflow as tf -import keras.backend as K -import keras.layers as KL -import keras.models as KM - - -class ParallelModel(KM.Model): - """Subclasses the standard Keras Model and adds multi-GPU support. - It works by creating a copy of the model on each GPU. Then it slices - the inputs and sends a slice to each copy of the model, and then - merges the outputs together and applies the loss on the combined - outputs. - """ - - def __init__(self, keras_model, gpu_count): - """Class constructor. - keras_model: The Keras model to parallelize - gpu_count: Number of GPUs. Must be > 1 - """ - self.inner_model = keras_model - self.gpu_count = gpu_count - merged_outputs = self.make_parallel() - super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, - outputs=merged_outputs) - - def __getattribute__(self, attrname): - """Redirect loading and saving methods to the inner model. That's where - the weights are stored.""" - if 'load' in attrname or 'save' in attrname: - return getattr(self.inner_model, attrname) - return super(ParallelModel, self).__getattribute__(attrname) - - def summary(self, *args, **kwargs): - """Override summary() to display summaries of both, the wrapper - and inner models.""" - super(ParallelModel, self).summary(*args, **kwargs) - self.inner_model.summary(*args, **kwargs) - - def make_parallel(self): - """Creates a new wrapper model that consists of multiple replicas of - the original model placed on different GPUs. - """ - # Slice inputs. Slice inputs on the CPU to avoid sending a copy - # of the full inputs to all GPUs. Saves on bandwidth and memory. - input_slices = {name: tf.split(x, self.gpu_count) - for name, x in zip(self.inner_model.input_names, - self.inner_model.inputs)} - - output_names = self.inner_model.output_names - outputs_all = [] - for i in range(len(self.inner_model.outputs)): - outputs_all.append([]) - - # Run the model call() on each GPU to place the ops there - for i in range(self.gpu_count): - with tf.device('/gpu:%d' % i): - with tf.name_scope('tower_%d' % i): - # Run a slice of inputs through this replica - zipped_inputs = zip(self.inner_model.input_names, - self.inner_model.inputs) - inputs = [ - KL.Lambda(lambda s: input_slices[name][i], - output_shape=lambda s: (None,) + s[1:])(tensor) - for name, tensor in zipped_inputs] - # Create the model replica and get the outputs - outputs = self.inner_model(inputs) - if not isinstance(outputs, list): - outputs = [outputs] - # Save the outputs for merging back together later - for l, o in enumerate(outputs): - outputs_all[l].append(o) - - # Merge outputs on CPU - with tf.device('/cpu:0'): - merged = [] - for outputs, name in zip(outputs_all, output_names): - # Concatenate or average outputs? - # Outputs usually have a batch dimension and we concatenate - # across it. If they don't, then the output is likely a loss - # or a metric value that gets averaged across the batch. - # Keras expects losses and metrics to be scalars. - if K.int_shape(outputs[0]) == (): - # Average - m = KL.Lambda(lambda o: tf.add_n( - o) / len(outputs), name=name)(outputs) - else: - # Concatenate - m = KL.Concatenate(axis=0, name=name)(outputs) - merged.append(m) - return merged - - -if __name__ == "__main__": - # Testing code below. It creates a simple model to train on MNIST and - # tries to run it on 2 GPUs. It saves the graph so it can be viewed - # in TensorBoard. Run it as: - # - # python3 parallel_model.py - - import os - import numpy as np - import keras.optimizers - from keras.datasets import mnist - from keras.preprocessing.image import ImageDataGenerator - - GPU_COUNT = 2 - - # Root directory of the project - ROOT_DIR = os.path.abspath("../") - - # Directory to save logs and trained model - MODEL_DIR = os.path.join(ROOT_DIR, "logs") - - def build_model(x_train, num_classes): - # Reset default graph. Keras leaves old ops in the graph, - # which are ignored for execution but clutter graph - # visualization in TensorBoard. - tf.reset_default_graph() - - inputs = KL.Input(shape=x_train.shape[1:], name="input_image") - x = KL.Conv2D(32, (3, 3), activation='relu', padding="same", - name="conv1")(inputs) - x = KL.Conv2D(64, (3, 3), activation='relu', padding="same", - name="conv2")(x) - x = KL.MaxPooling2D(pool_size=(2, 2), name="pool1")(x) - x = KL.Flatten(name="flat1")(x) - x = KL.Dense(128, activation='relu', name="dense1")(x) - x = KL.Dense(num_classes, activation='softmax', name="dense2")(x) - - return KM.Model(inputs, x, "digit_classifier_model") - - # Load MNIST Data - (x_train, y_train), (x_test, y_test) = mnist.load_data() - x_train = np.expand_dims(x_train, -1).astype('float32') / 255 - x_test = np.expand_dims(x_test, -1).astype('float32') / 255 - - print('x_train shape:', x_train.shape) - print('x_test shape:', x_test.shape) - - # Build data generator and model - datagen = ImageDataGenerator() - model = build_model(x_train, 10) - - # Add multi-GPU support. - model = ParallelModel(model, GPU_COUNT) - - optimizer = keras.optimizers.SGD(lr=0.01, momentum=0.9, clipnorm=5.0) - - model.compile(loss='sparse_categorical_crossentropy', - optimizer=optimizer, metrics=['accuracy']) - - model.summary() - - # Train - model.fit_generator( - datagen.flow(x_train, y_train, batch_size=64), - steps_per_epoch=50, epochs=10, verbose=1, - validation_data=(x_test, y_test), - callbacks=[keras.callbacks.TensorBoard(log_dir=MODEL_DIR, - write_graph=True)] - ) -""" -Mask R-CNN -Common utility functions and classes. - -Copyright (c) 2017 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla -""" - -import sys -import os -import logging -import math -import random -import numpy as np -import tensorflow as tf -import scipy -import skimage.color -import skimage.io -import skimage.transform -import urllib.request -import shutil -import warnings -from distutils.version import LooseVersion - -# URL from which to download the latest COCO trained weights -COCO_MODEL_URL = "https://github.com/matterport/Mask_RCNN/releases/download/v2.0/mask_rcnn_coco.h5" - - -############################################################ -# Bounding Boxes -############################################################ - -def extract_bboxes(mask): - """Compute bounding boxes from masks. - mask: [height, width, num_instances]. Mask pixels are either 1 or 0. - - Returns: bbox array [num_instances, (y1, x1, y2, x2)]. - """ - boxes = np.zeros([mask.shape[-1], 4], dtype=np.int32) - for i in range(mask.shape[-1]): - m = mask[:, :, i] - # Bounding box. - horizontal_indicies = np.where(np.any(m, axis=0))[0] - vertical_indicies = np.where(np.any(m, axis=1))[0] - if horizontal_indicies.shape[0]: - x1, x2 = horizontal_indicies[[0, -1]] - y1, y2 = vertical_indicies[[0, -1]] - # x2 and y2 should not be part of the box. Increment by 1. - x2 += 1 - y2 += 1 - else: - # No mask for this instance. Might happen due to - # resizing or cropping. Set bbox to zeros - x1, x2, y1, y2 = 0, 0, 0, 0 - boxes[i] = np.array([y1, x1, y2, x2]) - return boxes.astype(np.int32) - - -def compute_iou(box, boxes, box_area, boxes_area): - """Calculates IoU of the given box with the array of the given boxes. - box: 1D vector [y1, x1, y2, x2] - boxes: [boxes_count, (y1, x1, y2, x2)] - box_area: float. the area of 'box' - boxes_area: array of length boxes_count. - - Note: the areas are passed in rather than calculated here for - efficiency. Calculate once in the caller to avoid duplicate work. - """ - # Calculate intersection areas - y1 = np.maximum(box[0], boxes[:, 0]) - y2 = np.minimum(box[2], boxes[:, 2]) - x1 = np.maximum(box[1], boxes[:, 1]) - x2 = np.minimum(box[3], boxes[:, 3]) - intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0) - union = box_area + boxes_area[:] - intersection[:] - iou = intersection / union - return iou - - -def compute_overlaps(boxes1, boxes2): - """Computes IoU overlaps between two sets of boxes. - boxes1, boxes2: [N, (y1, x1, y2, x2)]. - - For better performance, pass the largest set first and the smaller second. - """ - # Areas of anchors and GT boxes - area1 = (boxes1[:, 2] - boxes1[:, 0]) * (boxes1[:, 3] - boxes1[:, 1]) - area2 = (boxes2[:, 2] - boxes2[:, 0]) * (boxes2[:, 3] - boxes2[:, 1]) - - # Compute overlaps to generate matrix [boxes1 count, boxes2 count] - # Each cell contains the IoU value. - overlaps = np.zeros((boxes1.shape[0], boxes2.shape[0])) - for i in range(overlaps.shape[1]): - box2 = boxes2[i] - overlaps[:, i] = compute_iou(box2, boxes1, area2[i], area1) - return overlaps - - -def compute_overlaps_masks(masks1, masks2): - """Computes IoU overlaps between two sets of masks. - masks1, masks2: [Height, Width, instances] - """ - - # If either set of masks is empty return empty result - if masks1.shape[-1] == 0 or masks2.shape[-1] == 0: - return np.zeros((masks1.shape[-1], masks2.shape[-1])) - # flatten masks and compute their areas - masks1 = np.reshape(masks1 > .5, (-1, masks1.shape[-1])).astype(np.float32) - masks2 = np.reshape(masks2 > .5, (-1, masks2.shape[-1])).astype(np.float32) - area1 = np.sum(masks1, axis=0) - area2 = np.sum(masks2, axis=0) - - # intersections and union - intersections = np.dot(masks1.T, masks2) - union = area1[:, None] + area2[None, :] - intersections - overlaps = intersections / union - - return overlaps - - -def non_max_suppression(boxes, scores, threshold): - """Performs non-maximum suppression and returns indices of kept boxes. - boxes: [N, (y1, x1, y2, x2)]. Notice that (y2, x2) lays outside the box. - scores: 1-D array of box scores. - threshold: Float. IoU threshold to use for filtering. - """ - assert boxes.shape[0] > 0 - if boxes.dtype.kind != "f": - boxes = boxes.astype(np.float32) - - # Compute box areas - y1 = boxes[:, 0] - x1 = boxes[:, 1] - y2 = boxes[:, 2] - x2 = boxes[:, 3] - area = (y2 - y1) * (x2 - x1) - - # Get indicies of boxes sorted by scores (highest first) - ixs = scores.argsort()[::-1] - - pick = [] - while len(ixs) > 0: - # Pick top box and add its index to the list - i = ixs[0] - pick.append(i) - # Compute IoU of the picked box with the rest - iou = compute_iou(boxes[i], boxes[ixs[1:]], area[i], area[ixs[1:]]) - # Identify boxes with IoU over the threshold. This - # returns indices into ixs[1:], so add 1 to get - # indices into ixs. - remove_ixs = np.where(iou > threshold)[0] + 1 - # Remove indices of the picked and overlapped boxes. - ixs = np.delete(ixs, remove_ixs) - ixs = np.delete(ixs, 0) - return np.array(pick, dtype=np.int32) - - -def apply_box_deltas(boxes, deltas): - """Applies the given deltas to the given boxes. - boxes: [N, (y1, x1, y2, x2)]. Note that (y2, x2) is outside the box. - deltas: [N, (dy, dx, log(dh), log(dw))] - """ - boxes = boxes.astype(np.float32) - # Convert to y, x, h, w - height = boxes[:, 2] - boxes[:, 0] - width = boxes[:, 3] - boxes[:, 1] - center_y = boxes[:, 0] + 0.5 * height - center_x = boxes[:, 1] + 0.5 * width - # Apply deltas - center_y += deltas[:, 0] * height - center_x += deltas[:, 1] * width - height *= np.exp(deltas[:, 2]) - width *= np.exp(deltas[:, 3]) - # Convert back to y1, x1, y2, x2 - y1 = center_y - 0.5 * height - x1 = center_x - 0.5 * width - y2 = y1 + height - x2 = x1 + width - return np.stack([y1, x1, y2, x2], axis=1) - - -def box_refinement_graph(box, gt_box): - """Compute refinement needed to transform box to gt_box. - box and gt_box are [N, (y1, x1, y2, x2)] - """ - box = tf.cast(box, tf.float32) - gt_box = tf.cast(gt_box, tf.float32) - - height = box[:, 2] - box[:, 0] - width = box[:, 3] - box[:, 1] - center_y = box[:, 0] + 0.5 * height - center_x = box[:, 1] + 0.5 * width - - gt_height = gt_box[:, 2] - gt_box[:, 0] - gt_width = gt_box[:, 3] - gt_box[:, 1] - gt_center_y = gt_box[:, 0] + 0.5 * gt_height - gt_center_x = gt_box[:, 1] + 0.5 * gt_width - - dy = (gt_center_y - center_y) / height - dx = (gt_center_x - center_x) / width - dh = tf.log(gt_height / height) - dw = tf.log(gt_width / width) - - result = tf.stack([dy, dx, dh, dw], axis=1) - return result - - -def box_refinement(box, gt_box): - """Compute refinement needed to transform box to gt_box. - box and gt_box are [N, (y1, x1, y2, x2)]. (y2, x2) is - assumed to be outside the box. - """ - box = box.astype(np.float32) - gt_box = gt_box.astype(np.float32) - - height = box[:, 2] - box[:, 0] - width = box[:, 3] - box[:, 1] - center_y = box[:, 0] + 0.5 * height - center_x = box[:, 1] + 0.5 * width - - gt_height = gt_box[:, 2] - gt_box[:, 0] - gt_width = gt_box[:, 3] - gt_box[:, 1] - gt_center_y = gt_box[:, 0] + 0.5 * gt_height - gt_center_x = gt_box[:, 1] + 0.5 * gt_width - - dy = (gt_center_y - center_y) / height - dx = (gt_center_x - center_x) / width - dh = np.log(gt_height / height) - dw = np.log(gt_width / width) - - return np.stack([dy, dx, dh, dw], axis=1) - - -############################################################ -# Dataset -############################################################ - -class Dataset(object): - """The base class for dataset classes. - To use it, create a new class that adds functions specific to the dataset - you want to use. For example: - - class CatsAndDogsDataset(Dataset): - def load_cats_and_dogs(self): - ... - def load_mask(self, image_id): - ... - def image_reference(self, image_id): - ... - - See COCODataset and ShapesDataset as examples. - """ - - def __init__(self, class_map=None): - self._image_ids = [] - self.image_info = [] - # Background is always the first class - self.class_info = [{"source": "", "id": 0, "name": "BG"}] - self.source_class_ids = {} - - def add_class(self, source, class_id, class_name): - assert "." not in source, "Source name cannot contain a dot" - # Does the class exist already? - for info in self.class_info: - if info['source'] == source and info["id"] == class_id: - # source.class_id combination already available, skip - return - # Add the class - self.class_info.append({ - "source": source, - "id": class_id, - "name": class_name, - }) - - def add_image(self, source, image_id, path, **kwargs): - image_info = { - "id": image_id, - "source": source, - "path": path, - } - image_info.update(kwargs) - self.image_info.append(image_info) - - def image_reference(self, image_id): - """Return a link to the image in its source Website or details about - the image that help looking it up or debugging it. - - Override for your dataset, but pass to this function - if you encounter images not in your dataset. - """ - return "" - - def prepare(self, class_map=None): - """Prepares the Dataset class for use. - - TODO: class map is not supported yet. When done, it should handle mapping - classes from different datasets to the same class ID. - """ - - def clean_name(name): - """Returns a shorter version of object names for cleaner display.""" - return ",".join(name.split(",")[:1]) - - # Build (or rebuild) everything else from the info dicts. - self.num_classes = len(self.class_info) - self.class_ids = np.arange(self.num_classes) - self.class_names = [clean_name(c["name"]) for c in self.class_info] - self.num_images = len(self.image_info) - self._image_ids = np.arange(self.num_images) - - # Mapping from source class and image IDs to internal IDs - self.class_from_source_map = {"{}.{}".format(info['source'], info['id']): id - for info, id in zip(self.class_info, self.class_ids)} - self.image_from_source_map = {"{}.{}".format(info['source'], info['id']): id - for info, id in zip(self.image_info, self.image_ids)} - - # Map sources to class_ids they support - self.sources = list(set([i['source'] for i in self.class_info])) - self.source_class_ids = {} - # Loop over datasets - for source in self.sources: - self.source_class_ids[source] = [] - # Find classes that belong to this dataset - for i, info in enumerate(self.class_info): - # Include BG class in all datasets - if i == 0 or source == info['source']: - self.source_class_ids[source].append(i) - - def map_source_class_id(self, source_class_id): - """Takes a source class ID and returns the int class ID assigned to it. - - For example: - dataset.map_source_class_id("coco.12") -> 23 - """ - return self.class_from_source_map[source_class_id] - - def get_source_class_id(self, class_id, source): - """Map an internal class ID to the corresponding class ID in the source dataset.""" - info = self.class_info[class_id] - assert info['source'] == source - return info['id'] - - @property - def image_ids(self): - return self._image_ids - - def source_image_link(self, image_id): - """Returns the path or URL to the image. - Override this to return a URL to the image if it's available online for easy - debugging. - """ - return self.image_info[image_id]["path"] - - def load_image(self, image_id): - """Load the specified image and return a [H,W,3] Numpy array. - """ - # Load image - image = skimage.io.imread(self.image_info[image_id]['path']) - # If grayscale. Convert to RGB for consistency. - if image.ndim != 3: - image = skimage.color.gray2rgb(image) - # If has an alpha channel, remove it for consistency - if image.shape[-1] == 4: - image = image[..., :3] - return image - - def load_mask(self, image_id): - """Load instance masks for the given image. - - Different datasets use different ways to store masks. Override this - method to load instance masks and return them in the form of am - array of binary masks of shape [height, width, instances]. - - Returns: - masks: A bool array of shape [height, width, instance count] with - a binary mask per instance. - class_ids: a 1D array of class IDs of the instance masks. - """ - # Override this function to load a mask from your dataset. - # Otherwise, it returns an empty mask. - logging.warning( - "You are using the default load_mask(), maybe you need to define your own one.") - mask = np.empty([0, 0, 0]) - class_ids = np.empty([0], np.int32) - return mask, class_ids - - -def resize_image(image, min_dim=None, max_dim=None, min_scale=None, mode="square"): - """Resizes an image keeping the aspect ratio unchanged. - - min_dim: if provided, resizes the image such that it's smaller - dimension == min_dim - max_dim: if provided, ensures that the image longest side doesn't - exceed this value. - min_scale: if provided, ensure that the image is scaled up by at least - this percent even if min_dim doesn't require it. - mode: Resizing mode. - none: No resizing. Return the image unchanged. - square: Resize and pad with zeros to get a square image - of size [max_dim, max_dim]. - pad64: Pads width and height with zeros to make them multiples of 64. - If min_dim or min_scale are provided, it scales the image up - before padding. max_dim is ignored in this mode. - The multiple of 64 is needed to ensure smooth scaling of feature - maps up and down the 6 levels of the FPN pyramid (2**6=64). - crop: Picks random crops from the image. First, scales the image based - on min_dim and min_scale, then picks a random crop of - size min_dim x min_dim. Can be used in training only. - max_dim is not used in this mode. - - Returns: - image: the resized image - window: (y1, x1, y2, x2). If max_dim is provided, padding might - be inserted in the returned image. If so, this window is the - coordinates of the image part of the full image (excluding - the padding). The x2, y2 pixels are not included. - scale: The scale factor used to resize the image - padding: Padding added to the image [(top, bottom), (left, right), (0, 0)] - """ - # Keep track of image dtype and return results in the same dtype - image_dtype = image.dtype - # Default window (y1, x1, y2, x2) and default scale == 1. - h, w = image.shape[:2] - window = (0, 0, h, w) - scale = 1 - padding = [(0, 0), (0, 0), (0, 0)] - crop = None - - if mode == "none": - return image, window, scale, padding, crop - - # Scale? - if min_dim: - # Scale up but not down - scale = max(1, min_dim / min(h, w)) - if min_scale and scale < min_scale: - scale = min_scale - - # Does it exceed max dim? - if max_dim and mode == "square": - image_max = max(h, w) - if round(image_max * scale) > max_dim: - scale = max_dim / image_max - - # Resize image using bilinear interpolation - if scale != 1: - image = resize(image, (round(h * scale), round(w * scale)), - preserve_range=True) - - # Need padding or cropping? - if mode == "square": - # Get new height and width - h, w = image.shape[:2] - top_pad = (max_dim - h) // 2 - bottom_pad = max_dim - h - top_pad - left_pad = (max_dim - w) // 2 - right_pad = max_dim - w - left_pad - padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] - image = np.pad(image, padding, mode='constant', constant_values=0) - window = (top_pad, left_pad, h + top_pad, w + left_pad) - elif mode == "pad64": - h, w = image.shape[:2] - # Both sides must be divisible by 64 - assert min_dim % 64 == 0, "Minimum dimension must be a multiple of 64" - # Height - if h % 64 > 0: - max_h = h - (h % 64) + 64 - top_pad = (max_h - h) // 2 - bottom_pad = max_h - h - top_pad - else: - top_pad = bottom_pad = 0 - # Width - if w % 64 > 0: - max_w = w - (w % 64) + 64 - left_pad = (max_w - w) // 2 - right_pad = max_w - w - left_pad - else: - left_pad = right_pad = 0 - padding = [(top_pad, bottom_pad), (left_pad, right_pad), (0, 0)] - image = np.pad(image, padding, mode='constant', constant_values=0) - window = (top_pad, left_pad, h + top_pad, w + left_pad) - elif mode == "crop": - # Pick a random crop - h, w = image.shape[:2] - y = random.randint(0, (h - min_dim)) - x = random.randint(0, (w - min_dim)) - crop = (y, x, min_dim, min_dim) - image = image[y:y + min_dim, x:x + min_dim] - window = (0, 0, min_dim, min_dim) - else: - raise Exception("Mode {} not supported".format(mode)) - return image.astype(image_dtype), window, scale, padding, crop - - -def resize_mask(mask, scale, padding, crop=None): - """Resizes a mask using the given scale and padding. - Typically, you get the scale and padding from resize_image() to - ensure both, the image and the mask, are resized consistently. - - scale: mask scaling factor - padding: Padding to add to the mask in the form - [(top, bottom), (left, right), (0, 0)] - """ - # Suppress warning from scipy 0.13.0, the output shape of zoom() is - # calculated with round() instead of int() - with warnings.catch_warnings(): - warnings.simplefilter("ignore") - mask = scipy.ndimage.zoom(mask, zoom=[scale, scale, 1], order=0) - if crop is not None: - y, x, h, w = crop - mask = mask[y:y + h, x:x + w] - else: - mask = np.pad(mask, padding, mode='constant', constant_values=0) - return mask - - -def minimize_mask(bbox, mask, mini_shape): - """Resize masks to a smaller version to reduce memory load. - Mini-masks can be resized back to image scale using expand_masks() - - See inspect_data.ipynb notebook for more details. - """ - mini_mask = np.zeros(mini_shape + (mask.shape[-1],), dtype=bool) - for i in range(mask.shape[-1]): - # Pick slice and cast to bool in case load_mask() returned wrong dtype - m = mask[:, :, i].astype(bool) - y1, x1, y2, x2 = bbox[i][:4] - m = m[y1:y2, x1:x2] - if m.size == 0: - raise Exception("Invalid bounding box with area of zero") - # Resize with bilinear interpolation - m = resize(m, mini_shape) - mini_mask[:, :, i] = np.around(m).astype(np.bool) - return mini_mask - - -def expand_mask(bbox, mini_mask, image_shape): - """Resizes mini masks back to image size. Reverses the change - of minimize_mask(). - - See inspect_data.ipynb notebook for more details. - """ - mask = np.zeros(image_shape[:2] + (mini_mask.shape[-1],), dtype=bool) - for i in range(mask.shape[-1]): - m = mini_mask[:, :, i] - y1, x1, y2, x2 = bbox[i][:4] - h = y2 - y1 - w = x2 - x1 - # Resize with bilinear interpolation - m = resize(m, (h, w)) - mask[y1:y2, x1:x2, i] = np.around(m).astype(np.bool) - return mask - - -# TODO: Build and use this function to reduce code duplication -def mold_mask(mask, config): - pass - - -def unmold_mask(mask, bbox, image_shape): - """Converts a mask generated by the neural network to a format similar - to its original shape. - mask: [height, width] of type float. A small, typically 28x28 mask. - bbox: [y1, x1, y2, x2]. The box to fit the mask in. - - Returns a binary mask with the same size as the original image. - """ - threshold = 0.5 - y1, x1, y2, x2 = bbox - mask = resize(mask, (y2 - y1, x2 - x1)) - mask = np.where(mask >= threshold, 1, 0).astype(np.bool) - - # Put the mask in the right location. - full_mask = np.zeros(image_shape[:2], dtype=np.bool) - full_mask[y1:y2, x1:x2] = mask - return full_mask - - -############################################################ -# Anchors -############################################################ - -def generate_anchors(scales, ratios, shape, feature_stride, anchor_stride): - """ - scales: 1D array of anchor sizes in pixels. Example: [32, 64, 128] - ratios: 1D array of anchor ratios of width/height. Example: [0.5, 1, 2] - shape: [height, width] spatial shape of the feature map over which - to generate anchors. - feature_stride: Stride of the feature map relative to the image in pixels. - anchor_stride: Stride of anchors on the feature map. For example, if the - value is 2 then generate anchors for every other feature map pixel. - """ - # Get all combinations of scales and ratios - scales, ratios = np.meshgrid(np.array(scales), np.array(ratios)) - scales = scales.flatten() - ratios = ratios.flatten() - - # Enumerate heights and widths from scales and ratios - heights = scales / np.sqrt(ratios) - widths = scales * np.sqrt(ratios) - - # Enumerate shifts in feature space - shifts_y = np.arange(0, shape[0], anchor_stride) * feature_stride - shifts_x = np.arange(0, shape[1], anchor_stride) * feature_stride - shifts_x, shifts_y = np.meshgrid(shifts_x, shifts_y) - - # Enumerate combinations of shifts, widths, and heights - box_widths, box_centers_x = np.meshgrid(widths, shifts_x) - box_heights, box_centers_y = np.meshgrid(heights, shifts_y) - - # Reshape to get a list of (y, x) and a list of (h, w) - box_centers = np.stack( - [box_centers_y, box_centers_x], axis=2).reshape([-1, 2]) - box_sizes = np.stack([box_heights, box_widths], axis=2).reshape([-1, 2]) - - # Convert to corner coordinates (y1, x1, y2, x2) - boxes = np.concatenate([box_centers - 0.5 * box_sizes, - box_centers + 0.5 * box_sizes], axis=1) - return boxes - - -def generate_pyramid_anchors(scales, ratios, feature_shapes, feature_strides, - anchor_stride): - """Generate anchors at different levels of a feature pyramid. Each scale - is associated with a level of the pyramid, but each ratio is used in - all levels of the pyramid. - - Returns: - anchors: [N, (y1, x1, y2, x2)]. All generated anchors in one array. Sorted - with the same order of the given scales. So, anchors of scale[0] come - first, then anchors of scale[1], and so on. - """ - # Anchors - # [anchor_count, (y1, x1, y2, x2)] - anchors = [] - for i in range(len(scales)): - anchors.append(generate_anchors(scales[i], ratios, feature_shapes[i], - feature_strides[i], anchor_stride)) - return np.concatenate(anchors, axis=0) - - -############################################################ -# Miscellaneous -############################################################ - -def trim_zeros(x): - """It's common to have tensors larger than the available data and - pad with zeros. This function removes rows that are all zeros. - - x: [rows, columns]. - """ - assert len(x.shape) == 2 - return x[~np.all(x == 0, axis=1)] - - -def compute_matches(gt_boxes, gt_class_ids, gt_masks, - pred_boxes, pred_class_ids, pred_scores, pred_masks, - iou_threshold=0.5, score_threshold=0.0): - """Finds matches between prediction and ground truth instances. - - Returns: - gt_match: 1-D array. For each GT box it has the index of the matched - predicted box. - pred_match: 1-D array. For each predicted box, it has the index of - the matched ground truth box. - overlaps: [pred_boxes, gt_boxes] IoU overlaps. - """ - # Trim zero padding - # TODO: cleaner to do zero unpadding upstream - gt_boxes = trim_zeros(gt_boxes) - gt_masks = gt_masks[..., :gt_boxes.shape[0]] - pred_boxes = trim_zeros(pred_boxes) - pred_scores = pred_scores[:pred_boxes.shape[0]] - # Sort predictions by score from high to low - indices = np.argsort(pred_scores)[::-1] - pred_boxes = pred_boxes[indices] - pred_class_ids = pred_class_ids[indices] - pred_scores = pred_scores[indices] - pred_masks = pred_masks[..., indices] - - # Compute IoU overlaps [pred_masks, gt_masks] - overlaps = compute_overlaps_masks(pred_masks, gt_masks) - - # Loop through predictions and find matching ground truth boxes - match_count = 0 - pred_match = -1 * np.ones([pred_boxes.shape[0]]) - gt_match = -1 * np.ones([gt_boxes.shape[0]]) - for i in range(len(pred_boxes)): - # Find best matching ground truth box - # 1. Sort matches by score - sorted_ixs = np.argsort(overlaps[i])[::-1] - # 2. Remove low scores - low_score_idx = np.where(overlaps[i, sorted_ixs] < score_threshold)[0] - if low_score_idx.size > 0: - sorted_ixs = sorted_ixs[:low_score_idx[0]] - # 3. Find the match - for j in sorted_ixs: - # If ground truth box is already matched, go to next one - if gt_match[j] > -1: - continue - # If we reach IoU smaller than the threshold, end the loop - iou = overlaps[i, j] - if iou < iou_threshold: - break - # Do we have a match? - if pred_class_ids[i] == gt_class_ids[j]: - match_count += 1 - gt_match[j] = i - pred_match[i] = j - break - - return gt_match, pred_match, overlaps - - -def compute_ap(gt_boxes, gt_class_ids, gt_masks, - pred_boxes, pred_class_ids, pred_scores, pred_masks, - iou_threshold=0.5): - """Compute Average Precision at a set IoU threshold (default 0.5). - - Returns: - mAP: Mean Average Precision - precisions: List of precisions at different class score thresholds. - recalls: List of recall values at different class score thresholds. - overlaps: [pred_boxes, gt_boxes] IoU overlaps. - """ - # Get matches and overlaps - gt_match, pred_match, overlaps = compute_matches( - gt_boxes, gt_class_ids, gt_masks, - pred_boxes, pred_class_ids, pred_scores, pred_masks, - iou_threshold) - - # Compute precision and recall at each prediction box step - precisions = np.cumsum(pred_match > -1) / (np.arange(len(pred_match)) + 1) - recalls = np.cumsum(pred_match > -1).astype(np.float32) / len(gt_match) - - # Pad with start and end values to simplify the math - precisions = np.concatenate([[0], precisions, [0]]) - recalls = np.concatenate([[0], recalls, [1]]) - - # Ensure precision values decrease but don't increase. This way, the - # precision value at each recall threshold is the maximum it can be - # for all following recall thresholds, as specified by the VOC paper. - for i in range(len(precisions) - 2, -1, -1): - precisions[i] = np.maximum(precisions[i], precisions[i + 1]) - - # Compute mean AP over recall range - indices = np.where(recalls[:-1] != recalls[1:])[0] + 1 - mAP = np.sum((recalls[indices] - recalls[indices - 1]) * - precisions[indices]) - - return mAP, precisions, recalls, overlaps - - -def compute_ap_range(gt_box, gt_class_id, gt_mask, - pred_box, pred_class_id, pred_score, pred_mask, - iou_thresholds=None, verbose=1): - """Compute AP over a range or IoU thresholds. Default range is 0.5-0.95.""" - # Default is 0.5 to 0.95 with increments of 0.05 - iou_thresholds = iou_thresholds or np.arange(0.5, 1.0, 0.05) - - # Compute AP over range of IoU thresholds - AP = [] - for iou_threshold in iou_thresholds: - ap, precisions, recalls, overlaps =\ - compute_ap(gt_box, gt_class_id, gt_mask, - pred_box, pred_class_id, pred_score, pred_mask, - iou_threshold=iou_threshold) - if verbose: - print("AP @{:.2f}:\t {:.3f}".format(iou_threshold, ap)) - AP.append(ap) - AP = np.array(AP).mean() - if verbose: - print("AP @{:.2f}-{:.2f}:\t {:.3f}".format( - iou_thresholds[0], iou_thresholds[-1], AP)) - return AP - - -def compute_recall(pred_boxes, gt_boxes, iou): - """Compute the recall at the given IoU threshold. It's an indication - of how many GT boxes were found by the given prediction boxes. - - pred_boxes: [N, (y1, x1, y2, x2)] in image coordinates - gt_boxes: [N, (y1, x1, y2, x2)] in image coordinates - """ - # Measure overlaps - overlaps = compute_overlaps(pred_boxes, gt_boxes) - iou_max = np.max(overlaps, axis=1) - iou_argmax = np.argmax(overlaps, axis=1) - positive_ids = np.where(iou_max >= iou)[0] - matched_gt_boxes = iou_argmax[positive_ids] - - recall = len(set(matched_gt_boxes)) / gt_boxes.shape[0] - return recall, positive_ids - - -# ## Batch Slicing -# Some custom layers support a batch size of 1 only, and require a lot of work -# to support batches greater than 1. This function slices an input tensor -# across the batch dimension and feeds batches of size 1. Effectively, -# an easy way to support batches > 1 quickly with little code modification. -# In the long run, it's more efficient to modify the code to support large -# batches and getting rid of this function. Consider this a temporary solution -def batch_slice(inputs, graph_fn, batch_size, names=None): - """Splits inputs into slices and feeds each slice to a copy of the given - computation graph and then combines the results. It allows you to run a - graph on a batch of inputs even if the graph is written to support one - instance only. - - inputs: list of tensors. All must have the same first dimension length - graph_fn: A function that returns a TF tensor that's part of a graph. - batch_size: number of slices to divide the data into. - names: If provided, assigns names to the resulting tensors. - """ - if not isinstance(inputs, list): - inputs = [inputs] - - outputs = [] - for i in range(batch_size): - inputs_slice = [x[i] for x in inputs] - output_slice = graph_fn(*inputs_slice) - if not isinstance(output_slice, (tuple, list)): - output_slice = [output_slice] - outputs.append(output_slice) - # Change outputs from a list of slices where each is - # a list of outputs to a list of outputs and each has - # a list of slices - outputs = list(zip(*outputs)) - - if names is None: - names = [None] * len(outputs) - - result = [tf.stack(o, axis=0, name=n) - for o, n in zip(outputs, names)] - if len(result) == 1: - result = result[0] - - return result - - -def download_trained_weights(coco_model_path, verbose=1): - """Download COCO trained weights from Releases. - - coco_model_path: local path of COCO trained weights - """ - if verbose > 0: - print("Downloading pretrained model to " + coco_model_path + " ...") - with urllib.request.urlopen(COCO_MODEL_URL) as resp, open(coco_model_path, 'wb') as out: - shutil.copyfileobj(resp, out) - if verbose > 0: - print("... done downloading pretrained model!") - - -def norm_boxes(boxes, shape): - """Converts boxes from pixel coordinates to normalized coordinates. - boxes: [N, (y1, x1, y2, x2)] in pixel coordinates - shape: [..., (height, width)] in pixels - - Note: In pixel coordinates (y2, x2) is outside the box. But in normalized - coordinates it's inside the box. - - Returns: - [N, (y1, x1, y2, x2)] in normalized coordinates - """ - h, w = shape - scale = np.array([h - 1, w - 1, h - 1, w - 1]) - shift = np.array([0, 0, 1, 1]) - return np.divide((boxes - shift), scale).astype(np.float32) - - -def denorm_boxes(boxes, shape): - """Converts boxes from normalized coordinates to pixel coordinates. - boxes: [N, (y1, x1, y2, x2)] in normalized coordinates - shape: [..., (height, width)] in pixels - - Note: In pixel coordinates (y2, x2) is outside the box. But in normalized - coordinates it's inside the box. - - Returns: - [N, (y1, x1, y2, x2)] in pixel coordinates - """ - h, w = shape - scale = np.array([h - 1, w - 1, h - 1, w - 1]) - shift = np.array([0, 0, 1, 1]) - return np.around(np.multiply(boxes, scale) + shift).astype(np.int32) - - -def resize(image, output_shape, order=1, mode='constant', cval=0, clip=True, - preserve_range=False, anti_aliasing=False, anti_aliasing_sigma=None): - """A wrapper for Scikit-Image resize(). - - Scikit-Image generates warnings on every call to resize() if it doesn't - receive the right parameters. The right parameters depend on the version - of skimage. This solves the problem by using different parameters per - version. And it provides a central place to control resizing defaults. - """ - if LooseVersion(skimage.__version__) >= LooseVersion("0.14"): - # New in 0.14: anti_aliasing. Default it to False for backward - # compatibility with skimage 0.13. - return skimage.transform.resize( - image, output_shape, - order=order, mode=mode, cval=cval, clip=clip, - preserve_range=preserve_range, anti_aliasing=anti_aliasing, - anti_aliasing_sigma=anti_aliasing_sigma) - else: - return skimage.transform.resize( - image, output_shape, - order=order, mode=mode, cval=cval, clip=clip, - preserve_range=preserve_range) -""" -Mask R-CNN -Display and Visualization Functions. - -Copyright (c) 2017 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla -""" - -from mrcnn import utils -import os -import sys -import random -import itertools -import colorsys - -import numpy as np -from skimage.measure import find_contours -import matplotlib.pyplot as plt -from matplotlib import patches, lines -from matplotlib.patches import Polygon -import IPython.display - -# Root directory of the project -ROOT_DIR = os.path.abspath("../") - -# Import Mask RCNN -sys.path.append(ROOT_DIR) # To find local version of the library - - -############################################################ -# Visualization -############################################################ - -def display_images(images, titles=None, cols=4, cmap=None, norm=None, - interpolation=None): - """Display the given set of images, optionally with titles. - images: list or array of image tensors in HWC format. - titles: optional. A list of titles to display with each image. - cols: number of images per row - cmap: Optional. Color map to use. For example, "Blues". - norm: Optional. A Normalize instance to map values to colors. - interpolation: Optional. Image interpolation to use for display. - """ - titles = titles if titles is not None else [""] * len(images) - rows = len(images) // cols + 1 - plt.figure(figsize=(14, 14 * rows // cols)) - i = 1 - for image, title in zip(images, titles): - plt.subplot(rows, cols, i) - plt.title(title, fontsize=9) - plt.axis('off') - plt.imshow(image.astype(np.uint8), cmap=cmap, - norm=norm, interpolation=interpolation) - i += 1 - plt.show() - - -def random_colors(N, bright=True): - """ - Generate random colors. - To get visually distinct colors, generate them in HSV space then - convert to RGB. - """ - brightness = 1.0 if bright else 0.7 - hsv = [(i / N, 1, brightness) for i in range(N)] - colors = list(map(lambda c: colorsys.hsv_to_rgb(*c), hsv)) - random.shuffle(colors) - return colors - - -def apply_mask(image, mask, color, alpha=0.5): - """Apply the given mask to the image. - """ - for c in range(3): - image[:, :, c] = np.where(mask == 1, - image[:, :, c] * - (1 - alpha) + alpha * color[c] * 255, - image[:, :, c]) - return image - - -def display_instances(image, boxes, masks, class_ids, class_names, - scores=None, title="", - figsize=(16, 16), ax=None, - show_mask=True, show_bbox=True, - colors=None, captions=None): - """ - boxes: [num_instance, (y1, x1, y2, x2, class_id)] in image coordinates. - masks: [height, width, num_instances] - class_ids: [num_instances] - class_names: list of class names of the dataset - scores: (optional) confidence scores for each box - title: (optional) Figure title - show_mask, show_bbox: To show masks and bounding boxes or not - figsize: (optional) the size of the image - colors: (optional) An array or colors to use with each object - captions: (optional) A list of strings to use as captions for each object - """ - # Number of instances - N = boxes.shape[0] - if not N: - print("\n*** No instances to display *** \n") - else: - assert boxes.shape[0] == masks.shape[-1] == class_ids.shape[0] - - # If no axis is passed, create one and automatically call show() - auto_show = False - if not ax: - _, ax = plt.subplots(1, figsize=figsize) - auto_show = True - - # Generate random colors - colors = colors or random_colors(N) - - # Show area outside image boundaries. - height, width = image.shape[:2] - ax.set_ylim(height + 10, -10) - ax.set_xlim(-10, width + 10) - ax.axis('off') - ax.set_title(title) - - masked_image = image.astype(np.uint32).copy() - for i in range(N): - color = colors[i] - - # Bounding box - if not np.any(boxes[i]): - # Skip this instance. Has no bbox. Likely lost in image cropping. - continue - y1, x1, y2, x2 = boxes[i] - if show_bbox: - p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, - alpha=0.7, linestyle="dashed", - edgecolor=color, facecolor='none') - ax.add_patch(p) - - # Label - if not captions: - class_id = class_ids[i] - score = scores[i] if scores is not None else None - label = class_names[class_id] - caption = "{} {:.3f}".format(label, score) if score else label - else: - caption = captions[i] - ax.text(x1, y1 + 8, caption, - color='w', size=11, backgroundcolor="none") - - # Mask - mask = masks[:, :, i] - if show_mask: - masked_image = apply_mask(masked_image, mask, color) - - # Mask Polygon - # Pad to ensure proper polygons for masks that touch image edges. - padded_mask = np.zeros( - (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) - padded_mask[1:-1, 1:-1] = mask - contours = find_contours(padded_mask, 0.5) - for verts in contours: - # Subtract the padding and flip (y, x) to (x, y) - verts = np.fliplr(verts) - 1 - p = Polygon(verts, facecolor="none", edgecolor=color) - ax.add_patch(p) - ax.imshow(masked_image.astype(np.uint8)) - if auto_show: - plt.show() - - -def display_differences(image, - gt_box, gt_class_id, gt_mask, - pred_box, pred_class_id, pred_score, pred_mask, - class_names, title="", ax=None, - show_mask=True, show_box=True, - iou_threshold=0.5, score_threshold=0.5): - """Display ground truth and prediction instances on the same image.""" - # Match predictions to ground truth - gt_match, pred_match, overlaps = utils.compute_matches( - gt_box, gt_class_id, gt_mask, - pred_box, pred_class_id, pred_score, pred_mask, - iou_threshold=iou_threshold, score_threshold=score_threshold) - # Ground truth = green. Predictions = red - colors = [(0, 1, 0, .8)] * len(gt_match)\ - + [(1, 0, 0, 1)] * len(pred_match) - # Concatenate GT and predictions - class_ids = np.concatenate([gt_class_id, pred_class_id]) - scores = np.concatenate([np.zeros([len(gt_match)]), pred_score]) - boxes = np.concatenate([gt_box, pred_box]) - masks = np.concatenate([gt_mask, pred_mask], axis=-1) - # Captions per instance show score/IoU - captions = ["" for m in gt_match] + ["{:.2f} / {:.2f}".format( - pred_score[i], - (overlaps[i, int(pred_match[i])] - if pred_match[i] > -1 else overlaps[i].max())) - for i in range(len(pred_match))] - # Set title if not provided - title = title or "Ground Truth and Detections\n GT=green, pred=red, captions: score/IoU" - # Display - display_instances( - image, - boxes, masks, class_ids, - class_names, scores, ax=ax, - show_bbox=show_box, show_mask=show_mask, - colors=colors, captions=captions, - title=title) - - -def draw_rois(image, rois, refined_rois, mask, class_ids, class_names, limit=10): - """ - anchors: [n, (y1, x1, y2, x2)] list of anchors in image coordinates. - proposals: [n, 4] the same anchors but refined to fit objects better. - """ - masked_image = image.copy() - - # Pick random anchors in case there are too many. - ids = np.arange(rois.shape[0], dtype=np.int32) - ids = np.random.choice( - ids, limit, replace=False) if ids.shape[0] > limit else ids - - fig, ax = plt.subplots(1, figsize=(12, 12)) - if rois.shape[0] > limit: - plt.title("Showing {} random ROIs out of {}".format( - len(ids), rois.shape[0])) - else: - plt.title("{} ROIs".format(len(ids))) - - # Show area outside image boundaries. - ax.set_ylim(image.shape[0] + 20, -20) - ax.set_xlim(-50, image.shape[1] + 20) - ax.axis('off') - - for i, id in enumerate(ids): - color = np.random.rand(3) - class_id = class_ids[id] - # ROI - y1, x1, y2, x2 = rois[id] - p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, - edgecolor=color if class_id else "gray", - facecolor='none', linestyle="dashed") - ax.add_patch(p) - # Refined ROI - if class_id: - ry1, rx1, ry2, rx2 = refined_rois[id] - p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, - edgecolor=color, facecolor='none') - ax.add_patch(p) - # Connect the top-left corners of the anchor and proposal for easy visualization - ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) - - # Label - label = class_names[class_id] - ax.text(rx1, ry1 + 8, "{}".format(label), - color='w', size=11, backgroundcolor="none") - - # Mask - m = utils.unmold_mask(mask[id], rois[id] - [:4].astype(np.int32), image.shape) - masked_image = apply_mask(masked_image, m, color) - - ax.imshow(masked_image) - - # Print stats - print("Positive ROIs: ", class_ids[class_ids > 0].shape[0]) - print("Negative ROIs: ", class_ids[class_ids == 0].shape[0]) - print("Positive Ratio: {:.2f}".format( - class_ids[class_ids > 0].shape[0] / class_ids.shape[0])) - - -# TODO: Replace with matplotlib equivalent? -def draw_box(image, box, color): - """Draw 3-pixel width bounding boxes on the given image array. - color: list of 3 int values for RGB. - """ - y1, x1, y2, x2 = box - image[y1:y1 + 2, x1:x2] = color - image[y2:y2 + 2, x1:x2] = color - image[y1:y2, x1:x1 + 2] = color - image[y1:y2, x2:x2 + 2] = color - return image - - -def display_top_masks(image, mask, class_ids, class_names, limit=4): - """Display the given image and the top few class masks.""" - to_display = [] - titles = [] - to_display.append(image) - titles.append("H x W={}x{}".format(image.shape[0], image.shape[1])) - # Pick top prominent classes in this image - unique_class_ids = np.unique(class_ids) - mask_area = [np.sum(mask[:, :, np.where(class_ids == i)[0]]) - for i in unique_class_ids] - top_ids = [v[0] for v in sorted(zip(unique_class_ids, mask_area), - key=lambda r: r[1], reverse=True) if v[1] > 0] - # Generate images and titles - for i in range(limit): - class_id = top_ids[i] if i < len(top_ids) else -1 - # Pull masks of instances belonging to the same class. - m = mask[:, :, np.where(class_ids == class_id)[0]] - m = np.sum(m * np.arange(1, m.shape[-1] + 1), -1) - to_display.append(m) - titles.append(class_names[class_id] if class_id != -1 else "-") - display_images(to_display, titles=titles, cols=limit + 1, cmap="Blues_r") - - -def plot_precision_recall(AP, precisions, recalls): - """Draw the precision-recall curve. - - AP: Average precision at IoU >= 0.5 - precisions: list of precision values - recalls: list of recall values - """ - # Plot the Precision-Recall curve - _, ax = plt.subplots(1) - ax.set_title("Precision-Recall Curve. AP@50 = {:.3f}".format(AP)) - ax.set_ylim(0, 1.1) - ax.set_xlim(0, 1.1) - _ = ax.plot(recalls, precisions) - - -def plot_overlaps(gt_class_ids, pred_class_ids, pred_scores, - overlaps, class_names, threshold=0.5): - """Draw a grid showing how ground truth objects are classified. - gt_class_ids: [N] int. Ground truth class IDs - pred_class_id: [N] int. Predicted class IDs - pred_scores: [N] float. The probability scores of predicted classes - overlaps: [pred_boxes, gt_boxes] IoU overlaps of predictions and GT boxes. - class_names: list of all class names in the dataset - threshold: Float. The prediction probability required to predict a class - """ - gt_class_ids = gt_class_ids[gt_class_ids != 0] - pred_class_ids = pred_class_ids[pred_class_ids != 0] - - plt.figure(figsize=(12, 10)) - plt.imshow(overlaps, interpolation='nearest', cmap=plt.cm.Blues) - plt.yticks(np.arange(len(pred_class_ids)), - ["{} ({:.2f})".format(class_names[int(id)], pred_scores[i]) - for i, id in enumerate(pred_class_ids)]) - plt.xticks(np.arange(len(gt_class_ids)), - [class_names[int(id)] for id in gt_class_ids], rotation=90) - - thresh = overlaps.max() / 2. - for i, j in itertools.product(range(overlaps.shape[0]), - range(overlaps.shape[1])): - text = "" - if overlaps[i, j] > threshold: - text = "match" if gt_class_ids[j] == pred_class_ids[i] else "wrong" - color = ("white" if overlaps[i, j] > thresh - else "black" if overlaps[i, j] > 0 - else "grey") - plt.text(j, i, "{:.3f}\n{}".format(overlaps[i, j], text), - horizontalalignment="center", verticalalignment="center", - fontsize=9, color=color) - - plt.tight_layout() - plt.xlabel("Ground Truth") - plt.ylabel("Predictions") - - -def draw_boxes(image, boxes=None, refined_boxes=None, - masks=None, captions=None, visibilities=None, - title="", ax=None): - """Draw bounding boxes and segmentation masks with different - customizations. - - boxes: [N, (y1, x1, y2, x2, class_id)] in image coordinates. - refined_boxes: Like boxes, but draw with solid lines to show - that they're the result of refining 'boxes'. - masks: [N, height, width] - captions: List of N titles to display on each box - visibilities: (optional) List of values of 0, 1, or 2. Determine how - prominent each bounding box should be. - title: An optional title to show over the image - ax: (optional) Matplotlib axis to draw on. - """ - # Number of boxes - assert boxes is not None or refined_boxes is not None - N = boxes.shape[0] if boxes is not None else refined_boxes.shape[0] - - # Matplotlib Axis - if not ax: - _, ax = plt.subplots(1, figsize=(12, 12)) - - # Generate random colors - colors = random_colors(N) - - # Show area outside image boundaries. - margin = image.shape[0] // 10 - ax.set_ylim(image.shape[0] + margin, -margin) - ax.set_xlim(-margin, image.shape[1] + margin) - ax.axis('off') - - ax.set_title(title) - - masked_image = image.astype(np.uint32).copy() - for i in range(N): - # Box visibility - visibility = visibilities[i] if visibilities is not None else 1 - if visibility == 0: - color = "gray" - style = "dotted" - alpha = 0.5 - elif visibility == 1: - color = colors[i] - style = "dotted" - alpha = 1 - elif visibility == 2: - color = colors[i] - style = "solid" - alpha = 1 - - # Boxes - if boxes is not None: - if not np.any(boxes[i]): - # Skip this instance. Has no bbox. Likely lost in cropping. - continue - y1, x1, y2, x2 = boxes[i] - p = patches.Rectangle((x1, y1), x2 - x1, y2 - y1, linewidth=2, - alpha=alpha, linestyle=style, - edgecolor=color, facecolor='none') - ax.add_patch(p) - - # Refined boxes - if refined_boxes is not None and visibility > 0: - ry1, rx1, ry2, rx2 = refined_boxes[i].astype(np.int32) - p = patches.Rectangle((rx1, ry1), rx2 - rx1, ry2 - ry1, linewidth=2, - edgecolor=color, facecolor='none') - ax.add_patch(p) - # Connect the top-left corners of the anchor and proposal - if boxes is not None: - ax.add_line(lines.Line2D([x1, rx1], [y1, ry1], color=color)) - - # Captions - if captions is not None: - caption = captions[i] - # If there are refined boxes, display captions on them - if refined_boxes is not None: - y1, x1, y2, x2 = ry1, rx1, ry2, rx2 - ax.text(x1, y1, caption, size=11, verticalalignment='top', - color='w', backgroundcolor="none", - bbox={'facecolor': color, 'alpha': 0.5, - 'pad': 2, 'edgecolor': 'none'}) - - # Masks - if masks is not None: - mask = masks[:, :, i] - masked_image = apply_mask(masked_image, mask, color) - # Mask Polygon - # Pad to ensure proper polygons for masks that touch image edges. - padded_mask = np.zeros( - (mask.shape[0] + 2, mask.shape[1] + 2), dtype=np.uint8) - padded_mask[1:-1, 1:-1] = mask - contours = find_contours(padded_mask, 0.5) - for verts in contours: - # Subtract the padding and flip (y, x) to (x, y) - verts = np.fliplr(verts) - 1 - p = Polygon(verts, facecolor="none", edgecolor=color) - ax.add_patch(p) - ax.imshow(masked_image.astype(np.uint8)) - - -def display_table(table): - """Display values in a table format. - table: an iterable of rows, and each row is an iterable of values. - """ - html = "" - for row in table: - row_html = "" - for col in row: - row_html += "{:40}".format(str(col)) - html += "" + row_html + "" - html = "" + html + "
" - IPython.display.display(IPython.display.HTML(html)) - - -def display_weight_stats(model): - """Scans all the weights in the model and returns a list of tuples - that contain stats about each weight. - """ - layers = model.get_trainable_layers() - table = [["WEIGHT NAME", "SHAPE", "MIN", "MAX", "STD"]] - for l in layers: - weight_values = l.get_weights() # list of Numpy arrays - weight_tensors = l.weights # list of TF tensors - for i, w in enumerate(weight_values): - weight_name = weight_tensors[i].name - # Detect problematic layers. Exclude biases of conv layers. - alert = "" - if w.min() == w.max() and not (l.__class__.__name__ == "Conv2D" and i == 1): - alert += "*** dead?" - if np.abs(w.min()) > 1000 or np.abs(w.max()) > 1000: - alert += "*** Overflow?" - # Add row - table.append([ - weight_name + alert, - str(w.shape), - "{:+9.4f}".format(w.min()), - "{:+10.4f}".format(w.max()), - "{:+9.4f}".format(w.std()), - ]) - display_table(table) -""" -Mask R-CNN -Train on the toy Balloon dataset and implement color splash effect. - -Copyright (c) 2018 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla - ------------------------------------------------------------- - -Usage: import the module (see Jupyter notebooks for examples), or run from - the command line as such: - - # Train a new model starting from pre-trained COCO weights - python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=coco - - # Resume training a model that you had trained earlier - python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=last - - # Train a new model starting from ImageNet weights - python3 balloon.py train --dataset=/path/to/balloon/dataset --weights=imagenet - - # Apply color splash to an image - python3 balloon.py splash --weights=/path/to/weights/file.h5 --image= - - # Apply color splash to video using the last weights you trained - python3 balloon.py splash --weights=last --video= -""" - -from mrcnn import model as modellib, utils -from mrcnn.config import Config -import os -import sys -import json -import datetime -import numpy as np -import skimage.draw - -# Root directory of the project -ROOT_DIR = os.path.abspath("../../") - -# Import Mask RCNN -sys.path.append(ROOT_DIR) # To find local version of the library - -# Path to trained weights file -COCO_WEIGHTS_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") - -# Directory to save logs and model checkpoints, if not provided -# through the command line argument --logs -DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") - -############################################################ -# Configurations -############################################################ - - -class BalloonConfig(Config): - """Configuration for training on the toy dataset. - Derives from the base Config class and overrides some values. - """ - # Give the configuration a recognizable name - NAME = "balloon" - - # We use a GPU with 12GB memory, which can fit two images. - # Adjust down if you use a smaller GPU. - IMAGES_PER_GPU = 2 - - # Number of classes (including background) - NUM_CLASSES = 1 + 1 # Background + balloon - - # Number of training steps per epoch - STEPS_PER_EPOCH = 100 - - # Skip detections with < 90% confidence - DETECTION_MIN_CONFIDENCE = 0.9 - - -############################################################ -# Dataset -############################################################ - -class BalloonDataset(utils.Dataset): - - def load_balloon(self, dataset_dir, subset): - """Load a subset of the Balloon dataset. - dataset_dir: Root directory of the dataset. - subset: Subset to load: train or val - """ - # Add classes. We have only one class to add. - self.add_class("balloon", 1, "balloon") - - # Train or validation dataset? - assert subset in ["train", "val"] - dataset_dir = os.path.join(dataset_dir, subset) - - # Load annotations - # VGG Image Annotator (up to version 1.6) saves each image in the form: - # { 'filename': '28503151_5b5b7ec140_b.jpg', - # 'regions': { - # '0': { - # 'region_attributes': {}, - # 'shape_attributes': { - # 'all_points_x': [...], - # 'all_points_y': [...], - # 'name': 'polygon'}}, - # ... more regions ... - # }, - # 'size': 100202 - # } - # We mostly care about the x and y coordinates of each region - # Note: In VIA 2.0, regions was changed from a dict to a list. - annotations = json.load( - open(os.path.join(dataset_dir, "via_region_data.json"))) - annotations = list(annotations.values()) # don't need the dict keys - - # The VIA tool saves images in the JSON even if they don't have any - # annotations. Skip unannotated images. - annotations = [a for a in annotations if a['regions']] - - # Add images - for a in annotations: - # Get the x, y coordinaets of points of the polygons that make up - # the outline of each object instance. These are stores in the - # shape_attributes (see json format above) - # The if condition is needed to support VIA versions 1.x and 2.x. - if type(a['regions']) is dict: - polygons = [r['shape_attributes'] - for r in a['regions'].values()] - else: - polygons = [r['shape_attributes'] for r in a['regions']] - - # load_mask() needs the image size to convert polygons to masks. - # Unfortunately, VIA doesn't include it in JSON, so we must read - # the image. This is only managable since the dataset is tiny. - image_path = os.path.join(dataset_dir, a['filename']) - image = skimage.io.imread(image_path) - height, width = image.shape[:2] - - self.add_image( - "balloon", - image_id=a['filename'], # use file name as a unique image id - path=image_path, - width=width, height=height, - polygons=polygons) - - def load_mask(self, image_id): - """Generate instance masks for an image. - Returns: - masks: A bool array of shape [height, width, instance count] with - one mask per instance. - class_ids: a 1D array of class IDs of the instance masks. - """ - # If not a balloon dataset image, delegate to parent class. - image_info = self.image_info[image_id] - if image_info["source"] != "balloon": - return super(self.__class__, self).load_mask(image_id) - - # Convert polygons to a bitmap mask of shape - # [height, width, instance_count] - info = self.image_info[image_id] - mask = np.zeros([info["height"], info["width"], len(info["polygons"])], - dtype=np.uint8) - for i, p in enumerate(info["polygons"]): - # Get indexes of pixels inside the polygon and set them to 1 - rr, cc = skimage.draw.polygon(p['all_points_y'], p['all_points_x']) - mask[rr, cc, i] = 1 - - # Return mask, and array of class IDs of each instance. Since we have - # one class ID only, we return an array of 1s - return mask.astype(np.bool), np.ones([mask.shape[-1]], dtype=np.int32) - - def image_reference(self, image_id): - """Return the path of the image.""" - info = self.image_info[image_id] - if info["source"] == "balloon": - return info["path"] - else: - super(self.__class__, self).image_reference(image_id) - - -def train(model): - """Train the model.""" - # Training dataset. - dataset_train = BalloonDataset() - dataset_train.load_balloon(args.dataset, "train") - dataset_train.prepare() - - # Validation dataset - dataset_val = BalloonDataset() - dataset_val.load_balloon(args.dataset, "val") - dataset_val.prepare() - - # *** This training schedule is an example. Update to your needs *** - # Since we're using a very small dataset, and starting from - # COCO trained weights, we don't need to train too long. Also, - # no need to train all layers, just the heads should do it. - print("Training network heads") - model.train(dataset_train, dataset_val, - learning_rate=config.LEARNING_RATE, - epochs=30, - layers='heads') - - -def color_splash(image, mask): - """Apply color splash effect. - image: RGB image [height, width, 3] - mask: instance segmentation mask [height, width, instance count] - - Returns result image. - """ - # Make a grayscale copy of the image. The grayscale copy still - # has 3 RGB channels, though. - gray = skimage.color.gray2rgb(skimage.color.rgb2gray(image)) * 255 - # Copy color pixels from the original color image where mask is set - if mask.shape[-1] > 0: - # We're treating all instances as one, so collapse the mask into one layer - mask = (np.sum(mask, -1, keepdims=True) >= 1) - splash = np.where(mask, image, gray).astype(np.uint8) - else: - splash = gray.astype(np.uint8) - return splash - - -def detect_and_color_splash(model, image_path=None, video_path=None): - assert image_path or video_path - - # Image or video? - if image_path: - # Run model detection and generate the color splash effect - print("Running on {}".format(args.image)) - # Read image - image = skimage.io.imread(args.image) - # Detect objects - r = model.detect([image], verbose=1)[0] - # Color splash - splash = color_splash(image, r['masks']) - # Save output - file_name = "splash_{:%Y%m%dT%H%M%S}.png".format( - datetime.datetime.now()) - skimage.io.imsave(file_name, splash) - elif video_path: - import cv2 - # Video capture - vcapture = cv2.VideoCapture(video_path) - width = int(vcapture.get(cv2.CAP_PROP_FRAME_WIDTH)) - height = int(vcapture.get(cv2.CAP_PROP_FRAME_HEIGHT)) - fps = vcapture.get(cv2.CAP_PROP_FPS) - - # Define codec and create video writer - file_name = "splash_{:%Y%m%dT%H%M%S}.avi".format( - datetime.datetime.now()) - vwriter = cv2.VideoWriter(file_name, - cv2.VideoWriter_fourcc(*'MJPG'), - fps, (width, height)) - - count = 0 - success = True - while success: - print("frame: ", count) - # Read next image - success, image = vcapture.read() - if success: - # OpenCV returns images as BGR, convert to RGB - image = image[..., ::-1] - # Detect objects - r = model.detect([image], verbose=0)[0] - # Color splash - splash = color_splash(image, r['masks']) - # RGB -> BGR to save image to video - splash = splash[..., ::-1] - # Add image to video writer - vwriter.write(splash) - count += 1 - vwriter.release() - print("Saved to ", file_name) - - -############################################################ -# Training -############################################################ - -if __name__ == '__main__': - import argparse - - # Parse command line arguments - parser = argparse.ArgumentParser( - description='Train Mask R-CNN to detect balloons.') - parser.add_argument("command", - metavar="", - help="'train' or 'splash'") - parser.add_argument('--dataset', required=False, - metavar="/path/to/balloon/dataset/", - help='Directory of the Balloon dataset') - parser.add_argument('--weights', required=True, - metavar="/path/to/weights.h5", - help="Path to weights .h5 file or 'coco'") - parser.add_argument('--logs', required=False, - default=DEFAULT_LOGS_DIR, - metavar="/path/to/logs/", - help='Logs and checkpoints directory (default=logs/)') - parser.add_argument('--image', required=False, - metavar="path or URL to image", - help='Image to apply the color splash effect on') - parser.add_argument('--video', required=False, - metavar="path or URL to video", - help='Video to apply the color splash effect on') - args = parser.parse_args() - - # Validate arguments - if args.command == "train": - assert args.dataset, "Argument --dataset is required for training" - elif args.command == "splash": - assert args.image or args.video,\ - "Provide --image or --video to apply color splash" - - print("Weights: ", args.weights) - print("Dataset: ", args.dataset) - print("Logs: ", args.logs) - - # Configurations - if args.command == "train": - config = BalloonConfig() - else: - class InferenceConfig(BalloonConfig): - # Set batch size to 1 since we'll be running inference on - # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU - GPU_COUNT = 1 - IMAGES_PER_GPU = 1 - config = InferenceConfig() - config.display() - - # Create model - if args.command == "train": - model = modellib.MaskRCNN(mode="training", config=config, - model_dir=args.logs) - else: - model = modellib.MaskRCNN(mode="inference", config=config, - model_dir=args.logs) - - # Select weights file to load - if args.weights.lower() == "coco": - weights_path = COCO_WEIGHTS_PATH - # Download weights file - if not os.path.exists(weights_path): - utils.download_trained_weights(weights_path) - elif args.weights.lower() == "last": - # Find last trained weights - weights_path = model.find_last() - elif args.weights.lower() == "imagenet": - # Start from ImageNet trained weights - weights_path = model.get_imagenet_weights() - else: - weights_path = args.weights - - # Load weights - print("Loading weights ", weights_path) - if args.weights.lower() == "coco": - # Exclude the last layers because they require a matching - # number of classes - model.load_weights(weights_path, by_name=True, exclude=[ - "mrcnn_class_logits", "mrcnn_bbox_fc", - "mrcnn_bbox", "mrcnn_mask"]) - else: - model.load_weights(weights_path, by_name=True) - - # Train or evaluate - if args.command == "train": - train(model) - elif args.command == "splash": - detect_and_color_splash(model, image_path=args.image, - video_path=args.video) - else: - print("'{}' is not recognized. " - "Use 'train' or 'splash'".format(args.command)) -""" -Mask R-CNN -Configurations and data loading code for MS COCO. - -Copyright (c) 2017 Matterport, Inc. -Licensed under the MIT License (see LICENSE for details) -Written by Waleed Abdulla - ------------------------------------------------------------- - -Usage: import the module (see Jupyter notebooks for examples), or run from - the command line as such: - - # Train a new model starting from pre-trained COCO weights - python3 coco.py train --dataset=/path/to/coco/ --model=coco - - # Train a new model starting from ImageNet weights. Also auto download COCO dataset - python3 coco.py train --dataset=/path/to/coco/ --model=imagenet --download=True - - # Continue training a model that you had trained earlier - python3 coco.py train --dataset=/path/to/coco/ --model=/path/to/weights.h5 - - # Continue training the last model you trained - python3 coco.py train --dataset=/path/to/coco/ --model=last - - # Run COCO evaluatoin on the last model you trained - python3 coco.py evaluate --dataset=/path/to/coco/ --model=last -""" - -from mrcnn import model as modellib, utils -from mrcnn.config import Config -import os -import sys -import time -import numpy as np -import imgaug # https://github.com/aleju/imgaug (pip3 install imgaug) - -# Download and install the Python COCO tools from https://github.com/waleedka/coco -# That's a fork from the original https://github.com/pdollar/coco with a bug -# fix for Python 3. -# I submitted a pull request https://github.com/cocodataset/cocoapi/pull/50 -# If the PR is merged then use the original repo. -# Note: Edit PythonAPI/Makefile and replace "python" with "python3". -from pycocotools.coco import COCO -from pycocotools.cocoeval import COCOeval -from pycocotools import mask as maskUtils - -import zipfile -import urllib.request -import shutil - -# Root directory of the project -ROOT_DIR = os.path.abspath("../../") - -# Import Mask RCNN -sys.path.append(ROOT_DIR) # To find local version of the library - -# Path to trained weights file -COCO_MODEL_PATH = os.path.join(ROOT_DIR, "mask_rcnn_coco.h5") - -# Directory to save logs and model checkpoints, if not provided -# through the command line argument --logs -DEFAULT_LOGS_DIR = os.path.join(ROOT_DIR, "logs") -DEFAULT_DATASET_YEAR = "2014" - -############################################################ -# Configurations -############################################################ - - -class CocoConfig(Config): - """Configuration for training on MS COCO. - Derives from the base Config class and overrides values specific - to the COCO dataset. - """ - # Give the configuration a recognizable name - NAME = "coco" - - # We use a GPU with 12GB memory, which can fit two images. - # Adjust down if you use a smaller GPU. - IMAGES_PER_GPU = 2 - - # Uncomment to train on 8 GPUs (default is 1) - # GPU_COUNT = 8 - - # Number of classes (including background) - NUM_CLASSES = 1 + 80 # COCO has 80 classes - - -############################################################ -# Dataset -############################################################ - -class CocoDataset(utils.Dataset): - def load_coco(self, dataset_dir, subset, year=DEFAULT_DATASET_YEAR, class_ids=None, - class_map=None, return_coco=False, auto_download=False): - """Load a subset of the COCO dataset. - dataset_dir: The root directory of the COCO dataset. - subset: What to load (train, val, minival, valminusminival) - year: What dataset year to load (2014, 2017) as a string, not an integer - class_ids: If provided, only loads images that have the given classes. - class_map: TODO: Not implemented yet. Supports maping classes from - different datasets to the same class ID. - return_coco: If True, returns the COCO object. - auto_download: Automatically download and unzip MS-COCO images and annotations - """ - - if auto_download is True: - self.auto_download(dataset_dir, subset, year) - - coco = COCO( - "{}/annotations/instances_{}{}.json".format(dataset_dir, subset, year)) - if subset == "minival" or subset == "valminusminival": - subset = "val" - image_dir = "{}/{}{}".format(dataset_dir, subset, year) - - # Load all classes or a subset? - if not class_ids: - # All classes - class_ids = sorted(coco.getCatIds()) - - # All images or a subset? - if class_ids: - image_ids = [] - for id in class_ids: - image_ids.extend(list(coco.getImgIds(catIds=[id]))) - # Remove duplicates - image_ids = list(set(image_ids)) - else: - # All images - image_ids = list(coco.imgs.keys()) - - # Add classes - for i in class_ids: - self.add_class("coco", i, coco.loadCats(i)[0]["name"]) - - # Add images - for i in image_ids: - self.add_image( - "coco", image_id=i, - path=os.path.join(image_dir, coco.imgs[i]['file_name']), - width=coco.imgs[i]["width"], - height=coco.imgs[i]["height"], - annotations=coco.loadAnns(coco.getAnnIds( - imgIds=[i], catIds=class_ids, iscrowd=None))) - if return_coco: - return coco - - def auto_download(self, dataDir, dataType, dataYear): - """Download the COCO dataset/annotations if requested. - dataDir: The root directory of the COCO dataset. - dataType: What to load (train, val, minival, valminusminival) - dataYear: What dataset year to load (2014, 2017) as a string, not an integer - Note: - For 2014, use "train", "val", "minival", or "valminusminival" - For 2017, only "train" and "val" annotations are available - """ - - # Setup paths and file names - if dataType == "minival" or dataType == "valminusminival": - imgDir = "{}/{}{}".format(dataDir, "val", dataYear) - imgZipFile = "{}/{}{}.zip".format(dataDir, "val", dataYear) - imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format( - "val", dataYear) - else: - imgDir = "{}/{}{}".format(dataDir, dataType, dataYear) - imgZipFile = "{}/{}{}.zip".format(dataDir, dataType, dataYear) - imgURL = "http://images.cocodataset.org/zips/{}{}.zip".format( - dataType, dataYear) - # print("Image paths:"); print(imgDir); print(imgZipFile); print(imgURL) - - # Create main folder if it doesn't exist yet - if not os.path.exists(dataDir): - os.makedirs(dataDir) - - # Download images if not available locally - if not os.path.exists(imgDir): - os.makedirs(imgDir) - print("Downloading images to " + imgZipFile + " ...") - with urllib.request.urlopen(imgURL) as resp, open(imgZipFile, 'wb') as out: - shutil.copyfileobj(resp, out) - print("... done downloading.") - print("Unzipping " + imgZipFile) - with zipfile.ZipFile(imgZipFile, "r") as zip_ref: - zip_ref.extractall(dataDir) - print("... done unzipping") - print("Will use images in " + imgDir) - - # Setup annotations data paths - annDir = "{}/annotations".format(dataDir) - if dataType == "minival": - annZipFile = "{}/instances_minival2014.json.zip".format(dataDir) - annFile = "{}/instances_minival2014.json".format(annDir) - annURL = "https://dl.dropboxusercontent.com/s/o43o90bna78omob/instances_minival2014.json.zip?dl=0" - unZipDir = annDir - elif dataType == "valminusminival": - annZipFile = "{}/instances_valminusminival2014.json.zip".format( - dataDir) - annFile = "{}/instances_valminusminival2014.json".format(annDir) - annURL = "https://dl.dropboxusercontent.com/s/s3tw5zcg7395368/instances_valminusminival2014.json.zip?dl=0" - unZipDir = annDir - else: - annZipFile = "{}/annotations_trainval{}.zip".format( - dataDir, dataYear) - annFile = "{}/instances_{}{}.json".format( - annDir, dataType, dataYear) - annURL = "http://images.cocodataset.org/annotations/annotations_trainval{}.zip".format( - dataYear) - unZipDir = dataDir - # print("Annotations paths:"); print(annDir); print(annFile); print(annZipFile); print(annURL) - - # Download annotations if not available locally - if not os.path.exists(annDir): - os.makedirs(annDir) - if not os.path.exists(annFile): - if not os.path.exists(annZipFile): - print("Downloading zipped annotations to " + annZipFile + " ...") - with urllib.request.urlopen(annURL) as resp, open(annZipFile, 'wb') as out: - shutil.copyfileobj(resp, out) - print("... done downloading.") - print("Unzipping " + annZipFile) - with zipfile.ZipFile(annZipFile, "r") as zip_ref: - zip_ref.extractall(unZipDir) - print("... done unzipping") - print("Will use annotations in " + annFile) - - def load_mask(self, image_id): - """Load instance masks for the given image. - - Different datasets use different ways to store masks. This - function converts the different mask format to one format - in the form of a bitmap [height, width, instances]. - - Returns: - masks: A bool array of shape [height, width, instance count] with - one mask per instance. - class_ids: a 1D array of class IDs of the instance masks. - """ - # If not a COCO image, delegate to parent class. - image_info = self.image_info[image_id] - if image_info["source"] != "coco": - return super(CocoDataset, self).load_mask(image_id) - - instance_masks = [] - class_ids = [] - annotations = self.image_info[image_id]["annotations"] - # Build mask of shape [height, width, instance_count] and list - # of class IDs that correspond to each channel of the mask. - for annotation in annotations: - class_id = self.map_source_class_id( - "coco.{}".format(annotation['category_id'])) - if class_id: - m = self.annToMask(annotation, image_info["height"], - image_info["width"]) - # Some objects are so small that they're less than 1 pixel area - # and end up rounded out. Skip those objects. - if m.max() < 1: - continue - # Is it a crowd? If so, use a negative class ID. - if annotation['iscrowd']: - # Use negative class ID for crowds - class_id *= -1 - # For crowd masks, annToMask() sometimes returns a mask - # smaller than the given dimensions. If so, resize it. - if m.shape[0] != image_info["height"] or m.shape[1] != image_info["width"]: - m = np.ones( - [image_info["height"], image_info["width"]], dtype=bool) - instance_masks.append(m) - class_ids.append(class_id) - - # Pack instance masks into an array - if class_ids: - mask = np.stack(instance_masks, axis=2).astype(np.bool) - class_ids = np.array(class_ids, dtype=np.int32) - return mask, class_ids - else: - # Call super class to return an empty mask - return super(CocoDataset, self).load_mask(image_id) - - def image_reference(self, image_id): - """Return a link to the image in the COCO Website.""" - info = self.image_info[image_id] - if info["source"] == "coco": - return "http://cocodataset.org/#explore?id={}".format(info["id"]) - else: - super(CocoDataset, self).image_reference(image_id) - - # The following two functions are from pycocotools with a few changes. - - def annToRLE(self, ann, height, width): - """ - Convert annotation which can be polygons, uncompressed RLE to RLE. - :return: binary mask (numpy 2D array) - """ - segm = ann['segmentation'] - if isinstance(segm, list): - # polygon -- a single object might consist of multiple parts - # we merge all parts into one mask rle code - rles = maskUtils.frPyObjects(segm, height, width) - rle = maskUtils.merge(rles) - elif isinstance(segm['counts'], list): - # uncompressed RLE - rle = maskUtils.frPyObjects(segm, height, width) - else: - # rle - rle = ann['segmentation'] - return rle - - def annToMask(self, ann, height, width): - """ - Convert annotation which can be polygons, uncompressed RLE, or RLE to binary mask. - :return: binary mask (numpy 2D array) - """ - rle = self.annToRLE(ann, height, width) - m = maskUtils.decode(rle) - return m - - -############################################################ -# COCO Evaluation -############################################################ - -def build_coco_results(dataset, image_ids, rois, class_ids, scores, masks): - """Arrange resutls to match COCO specs in http://cocodataset.org/#format - """ - # If no results, return an empty list - if rois is None: - return [] - - results = [] - for image_id in image_ids: - # Loop through detections - for i in range(rois.shape[0]): - class_id = class_ids[i] - score = scores[i] - bbox = np.around(rois[i], 1) - mask = masks[:, :, i] - - result = { - "image_id": image_id, - "category_id": dataset.get_source_class_id(class_id, "coco"), - "bbox": [bbox[1], bbox[0], bbox[3] - bbox[1], bbox[2] - bbox[0]], - "score": score, - "segmentation": maskUtils.encode(np.asfortranarray(mask)) - } - results.append(result) - return results - - -def evaluate_coco(model, dataset, coco, eval_type="bbox", limit=0, image_ids=None): - """Runs official COCO evaluation. - dataset: A Dataset object with valiadtion data - eval_type: "bbox" or "segm" for bounding box or segmentation evaluation - limit: if not 0, it's the number of images to use for evaluation - """ - # Pick COCO images from the dataset - image_ids = image_ids or dataset.image_ids - - # Limit to a subset - if limit: - image_ids = image_ids[:limit] - - # Get corresponding COCO image IDs. - coco_image_ids = [dataset.image_info[id]["id"] for id in image_ids] - - t_prediction = 0 - t_start = time.time() - - results = [] - for i, image_id in enumerate(image_ids): - # Load image - image = dataset.load_image(image_id) - - # Run detection - t = time.time() - r = model.detect([image], verbose=0)[0] - t_prediction += (time.time() - t) - - # Convert results to COCO format - # Cast masks to uint8 because COCO tools errors out on bool - image_results = build_coco_results(dataset, coco_image_ids[i:i + 1], - r["rois"], r["class_ids"], - r["scores"], - r["masks"].astype(np.uint8)) - results.extend(image_results) - - # Load results. This modifies results with additional attributes. - coco_results = coco.loadRes(results) - - # Evaluate - cocoEval = COCOeval(coco, coco_results, eval_type) - cocoEval.params.imgIds = coco_image_ids - cocoEval.evaluate() - cocoEval.accumulate() - cocoEval.summarize() - - print("Prediction time: {}. Average {}/image".format( - t_prediction, t_prediction / len(image_ids))) - print("Total time: ", time.time() - t_start) - - -############################################################ -# Training -############################################################ - - -if __name__ == '__main__': - import argparse - - # Parse command line arguments - parser = argparse.ArgumentParser( - description='Train Mask R-CNN on MS COCO.') - parser.add_argument("command", - metavar="", - help="'train' or 'evaluate' on MS COCO") - parser.add_argument('--dataset', required=True, - metavar="/path/to/coco/", - help='Directory of the MS-COCO dataset') - parser.add_argument('--year', required=False, - default=DEFAULT_DATASET_YEAR, - metavar="", - help='Year of the MS-COCO dataset (2014 or 2017) (default=2014)') - parser.add_argument('--model', required=True, - metavar="/path/to/weights.h5", - help="Path to weights .h5 file or 'coco'") - parser.add_argument('--logs', required=False, - default=DEFAULT_LOGS_DIR, - metavar="/path/to/logs/", - help='Logs and checkpoints directory (default=logs/)') - parser.add_argument('--limit', required=False, - default=500, - metavar="", - help='Images to use for evaluation (default=500)') - parser.add_argument('--download', required=False, - default=False, - metavar="", - help='Automatically download and unzip MS-COCO files (default=False)', - type=bool) - args = parser.parse_args() - print("Command: ", args.command) - print("Model: ", args.model) - print("Dataset: ", args.dataset) - print("Year: ", args.year) - print("Logs: ", args.logs) - print("Auto Download: ", args.download) - - # Configurations - if args.command == "train": - config = CocoConfig() - else: - class InferenceConfig(CocoConfig): - # Set batch size to 1 since we'll be running inference on - # one image at a time. Batch size = GPU_COUNT * IMAGES_PER_GPU - GPU_COUNT = 1 - IMAGES_PER_GPU = 1 - DETECTION_MIN_CONFIDENCE = 0 - config = InferenceConfig() - config.display() - - # Create model - if args.command == "train": - model = modellib.MaskRCNN(mode="training", config=config, - model_dir=args.logs) - else: - model = modellib.MaskRCNN(mode="inference", config=config, - model_dir=args.logs) - - # Select weights file to load - if args.model.lower() == "coco": - model_path = COCO_MODEL_PATH - elif args.model.lower() == "last": - # Find last trained weights - model_path = model.find_last() - elif args.model.lower() == "imagenet": - # Start from ImageNet trained weights - model_path = model.get_imagenet_weights() - else: - model_path = args.model - - # Load weights - print("Loading weights ", model_path) - model.load_weights(model_path, by_name=True) - - # Train or evaluate - if args.command == "train": - # Training dataset. Use the training set and 35K from the - # validation set, as as in the Mask RCNN paper. - dataset_train = CocoDataset() - dataset_train.load_coco(args.dataset, "train", - year=args.year, auto_download=args.download) - if args.year in '2014': - dataset_train.load_coco( - args.dataset, "valminusminival", year=args.year, auto_download=args.download) - dataset_train.prepare() - - # Validation dataset - dataset_val = CocoDataset() - val_type = "val" if args.year in '2017' else "minival" - dataset_val.load_coco(args.dataset, val_type, - year=args.year, auto_download=args.download) - dataset_val.prepare() - - # Image Augmentation - # Right/Left flip 50% of the time - augmentation = imgaug.augmenters.Fliplr(0.5) - - # *** This training schedule is an example. Update to your needs *** - - # Training - Stage 1 - print("Training network heads") - model.train(dataset_train, dataset_val, - learning_rate=config.LEARNING_RATE, - epochs=40, - layers='heads', - augmentation=augmentation) - - # Training - Stage 2 - # Finetune layers from ResNet stage 4 and up - print("Fine tune Resnet stage 4 and up") - model.train(dataset_train, dataset_val, - learning_rate=config.LEARNING_RATE, - epochs=120, - layers='4+', - augmentation=augmentation) - - # Training - Stage 3 - # Fine tune all layers - print("Fine tune all layers") - model.train(dataset_train, dataset_val, - learning_rate=config.LEARNING_RATE / 10, - epochs=160, - layers='all', - augmentation=augmentation) - - elif args.command == "evaluate": - # Validation dataset - dataset_val = CocoDataset() - val_type = "val" if args.year in '2017' else "minival" - coco = dataset_val.load_coco( - args.dataset, val_type, year=args.year, return_coco=True, auto_download=args.download) - dataset_val.prepare() - print("Running COCO evaluation on {} images.".format(args.limit)) - evaluate_coco(model, dataset_val, coco, "bbox", limit=int(args.limit)) - else: - print("'{}' is not recognized. " - "Use 'train' or 'evaluate'".format(args.command)) -""" -This is a script that can be used to retrain the YOLOv2 model for your own dataset. -""" -import argparse - -import os - -import matplotlib.pyplot as plt -import numpy as np -import PIL -import tensorflow as tf -from keras import backend as K -from keras.layers import Input, Lambda, Conv2D -from keras.models import load_model, Model -from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping - -from yad2k.models.keras_yolo import (preprocess_true_boxes, yolo_body, - yolo_eval, yolo_head, yolo_loss) -from yad2k.utils.draw_boxes import draw_boxes - -# Args -argparser = argparse.ArgumentParser( - description="Retrain or 'fine-tune' a pretrained YOLOv2 model for your own data.") - -argparser.add_argument( - '-d', - '--data_path', - help="path to numpy data file (.npz) containing np.object array 'boxes' and np.uint8 array 'images'", - default=os.path.join('..', 'DATA', 'underwater_data.npz')) - -argparser.add_argument( - '-a', - '--anchors_path', - help='path to anchors file, defaults to yolo_anchors.txt', - default=os.path.join('model_data', 'yolo_anchors.txt')) - -argparser.add_argument( - '-c', - '--classes_path', - help='path to classes file, defaults to pascal_classes.txt', - default=os.path.join('..', 'DATA', 'underwater_classes.txt')) - -# Default anchor boxes -YOLO_ANCHORS = np.array( - ((0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434), - (7.88282, 3.52778), (9.77052, 9.16828))) - - -def _main(args): - data_path = os.path.expanduser(args.data_path) - classes_path = os.path.expanduser(args.classes_path) - anchors_path = os.path.expanduser(args.anchors_path) - - class_names = get_classes(classes_path) - anchors = get_anchors(anchors_path) - - data = np.load(data_path) # custom data saved as a numpy file. - # has 2 arrays: an object array 'boxes' (variable length of boxes in each image) - # and an array of images 'images' - - image_data, boxes = process_data(data['images'], data['boxes']) - - anchors = YOLO_ANCHORS - - detectors_mask, matching_true_boxes = get_detector_mask(boxes, anchors) - - model_body, model = create_model(anchors, class_names) - - train( - model, - class_names, - anchors, - image_data, - boxes, - detectors_mask, - matching_true_boxes - ) - - draw(model_body, - class_names, - anchors, - image_data, - image_set='val', # assumes training/validation split is 0.9 - weights_name='trained_stage_3_best.h5', - save_all=False) - - -def get_classes(classes_path): - '''loads the classes''' - with open(classes_path) as f: - class_names = f.readlines() - class_names = [c.strip() for c in class_names] - return class_names - - -def get_anchors(anchors_path): - '''loads the anchors from a file''' - if os.path.isfile(anchors_path): - with open(anchors_path) as f: - anchors = f.readline() - anchors = [float(x) for x in anchors.split(',')] - return np.array(anchors).reshape(-1, 2) - else: - Warning("Could not open anchors file, using default.") - return YOLO_ANCHORS - - -def process_data(images, boxes=None): - '''processes the data''' - images = [PIL.Image.fromarray(i) for i in images] - orig_size = np.array([images[0].width, images[0].height]) - orig_size = np.expand_dims(orig_size, axis=0) - - # Image preprocessing. - processed_images = [i.resize((416, 416), PIL.Image.BICUBIC) - for i in images] - processed_images = [np.array(image, dtype=np.float) - for image in processed_images] - processed_images = [image/255. for image in processed_images] - - if boxes is not None: - # Box preprocessing. - # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max. - boxes = [box.reshape((-1, 5)) for box in boxes] - # Get extents as y_min, x_min, y_max, x_max, class for comparision with - # model output. - boxes_extents = [box[:, [2, 1, 4, 3, 0]] for box in boxes] - - # Get box parameters as x_center, y_center, box_width, box_height, class. - boxes_xy = [0.5 * (box[:, 3:5] + box[:, 1:3]) for box in boxes] - boxes_wh = [box[:, 3:5] - box[:, 1:3] for box in boxes] - boxes_xy = [boxxy / orig_size for boxxy in boxes_xy] - boxes_wh = [boxwh / orig_size for boxwh in boxes_wh] - boxes = [np.concatenate( - (boxes_xy[i], boxes_wh[i], box[:, 0:1]), axis=1) for i, box in enumerate(boxes)] - - # find the max number of boxes - max_boxes = 0 - for boxz in boxes: - if boxz.shape[0] > max_boxes: - max_boxes = boxz.shape[0] - - # add zero pad for training - for i, boxz in enumerate(boxes): - if boxz.shape[0] < max_boxes: - zero_padding = np.zeros( - (max_boxes-boxz.shape[0], 5), dtype=np.float32) - boxes[i] = np.vstack((boxz, zero_padding)) - - return np.array(processed_images), np.array(boxes) - else: - return np.array(processed_images) - - -def get_detector_mask(boxes, anchors): - ''' - Precompute detectors_mask and matching_true_boxes for training. - Detectors mask is 1 for each spatial position in the final conv layer and - anchor that should be active for the given boxes and 0 otherwise. - Matching true boxes gives the regression targets for the ground truth box - that caused a detector to be active or 0 otherwise. - ''' - detectors_mask = [0 for i in range(len(boxes))] - matching_true_boxes = [0 for i in range(len(boxes))] - for i, box in enumerate(boxes): - detectors_mask[i], matching_true_boxes[i] = preprocess_true_boxes(box, anchors, [ - 416, 416]) - - return np.array(detectors_mask), np.array(matching_true_boxes) - - -def create_model(anchors, class_names, load_pretrained=True, freeze_body=True): - ''' - returns the body of the model and the model - - # Params: - - load_pretrained: whether or not to load the pretrained model or initialize all weights - - freeze_body: whether or not to freeze all weights except for the last layer's - - # Returns: - - model_body: YOLOv2 with new output layer - - model: YOLOv2 with custom loss Lambda layer - - ''' - - detectors_mask_shape = (13, 13, 5, 1) - matching_boxes_shape = (13, 13, 5, 5) - - # Create model input layers. - image_input = Input(shape=(416, 416, 3)) - boxes_input = Input(shape=(None, 5)) - detectors_mask_input = Input(shape=detectors_mask_shape) - matching_boxes_input = Input(shape=matching_boxes_shape) - - # Create model body. - yolo_model = yolo_body(image_input, len(anchors), len(class_names)) - topless_yolo = Model(yolo_model.input, yolo_model.layers[-2].output) - - if load_pretrained: - # Save topless yolo: - topless_yolo_path = os.path.join('model_data', 'yolo_topless.h5') - if not os.path.exists(topless_yolo_path): - print("CREATING TOPLESS WEIGHTS FILE") - yolo_path = os.path.join('model_data', 'yolo.h5') - model_body = load_model(yolo_path) - model_body = Model(model_body.inputs, model_body.layers[-2].output) - model_body.save_weights(topless_yolo_path) - topless_yolo.load_weights(topless_yolo_path) - - if freeze_body: - for layer in topless_yolo.layers: - layer.trainable = False - final_layer = Conv2D(len(anchors)*(5+len(class_names)), - (1, 1), activation='linear')(topless_yolo.output) - - model_body = Model(image_input, final_layer) - - # Place model loss on CPU to reduce GPU memory usage. - with tf.device('/cpu:0'): - # TODO: Replace Lambda with custom Keras layer for loss. - model_loss = Lambda( - yolo_loss, - output_shape=(1, ), - name='yolo_loss', - arguments={'anchors': anchors, - 'num_classes': len(class_names)})([ - model_body.output, boxes_input, - detectors_mask_input, matching_boxes_input - ]) - - model = Model( - [model_body.input, boxes_input, detectors_mask_input, - matching_boxes_input], model_loss) - - return model_body, model - - -def train(model, class_names, anchors, image_data, boxes, detectors_mask, matching_true_boxes, validation_split=0.1): - ''' - retrain/fine-tune the model - - logs training with tensorboard - - saves training weights in current directory - - best weights according to val_loss is saved as trained_stage_3_best.h5 - ''' - model.compile( - optimizer='adam', loss={ - 'yolo_loss': lambda y_true, y_pred: y_pred - }) # This is a hack to use the custom loss function in the last layer. - - logging = TensorBoard() - checkpoint = ModelCheckpoint("trained_stage_3_best.h5", monitor='val_loss', - save_weights_only=True, save_best_only=True) - early_stopping = EarlyStopping( - monitor='val_loss', min_delta=0, patience=15, verbose=1, mode='auto') - - model.fit([image_data, boxes, detectors_mask, matching_true_boxes], - np.zeros(len(image_data)), - validation_split=validation_split, - batch_size=32, - epochs=5, - callbacks=[logging]) - model.save_weights('trained_stage_1.h5') - - model_body, model = create_model( - anchors, class_names, load_pretrained=False, freeze_body=False) - - model.load_weights('trained_stage_1.h5') - - model.compile( - optimizer='adam', loss={ - 'yolo_loss': lambda y_true, y_pred: y_pred - }) # This is a hack to use the custom loss function in the last layer. - - model.fit([image_data, boxes, detectors_mask, matching_true_boxes], - np.zeros(len(image_data)), - validation_split=0.1, - batch_size=8, - epochs=30, - callbacks=[logging]) - - model.save_weights('trained_stage_2.h5') - - model.fit([image_data, boxes, detectors_mask, matching_true_boxes], - np.zeros(len(image_data)), - validation_split=0.1, - batch_size=8, - epochs=30, - callbacks=[logging, checkpoint, early_stopping]) - - model.save_weights('trained_stage_3.h5') - - -def draw(model_body, class_names, anchors, image_data, image_set='val', - weights_name='trained_stage_3_best.h5', out_path="output_images", save_all=True): - ''' - Draw bounding boxes on image data - ''' - if image_set == 'train': - image_data = np.array([np.expand_dims(image, axis=0) - for image in image_data[:int(len(image_data)*.9)]]) - elif image_set == 'val': - image_data = np.array([np.expand_dims(image, axis=0) - for image in image_data[int(len(image_data)*.9):]]) - elif image_set == 'all': - image_data = np.array([np.expand_dims(image, axis=0) - for image in image_data]) - else: - ValueError("draw argument image_set must be 'train', 'val', or 'all'") - # model.load_weights(weights_name) - print(image_data.shape) - model_body.load_weights(weights_name) - - # Create output variables for prediction. - yolo_outputs = yolo_head(model_body.output, anchors, len(class_names)) - input_image_shape = K.placeholder(shape=(2, )) - boxes, scores, classes = yolo_eval( - yolo_outputs, input_image_shape, score_threshold=0.07, iou_threshold=0) - - # Run prediction on overfit image. - sess = K.get_session() # TODO: Remove dependence on Tensorflow session. - - if not os.path.exists(out_path): - os.makedirs(out_path) - for i in range(len(image_data)): - out_boxes, out_scores, out_classes = sess.run( - [boxes, scores, classes], - feed_dict={ - model_body.input: image_data[i], - input_image_shape: [image_data.shape[2], image_data.shape[3]], - K.learning_phase(): 0 - }) - print('Found {} boxes for image.'.format(len(out_boxes))) - print(out_boxes) - - # Plot image with predicted boxes. - image_with_boxes = draw_boxes(image_data[i][0], out_boxes, out_classes, - class_names, out_scores) - # Save the image: - if save_all or (len(out_boxes) > 0): - image = PIL.Image.fromarray(image_with_boxes) - image.save(os.path.join(out_path, str(i)+'.png')) - - # To display (pauses the program): - # plt.imshow(image_with_boxes, interpolation='nearest') - # plt.show() - - -if __name__ == '__main__': - args = argparser.parse_args() - _main(args) -#! /usr/bin/env python -"""Run a YOLO_v2 style detection model on test images.""" -import argparse -import colorsys -import imghdr -import os -import random - -import numpy as np -from keras import backend as K -from keras.models import load_model -from PIL import Image, ImageDraw, ImageFont - -from yad2k.models.keras_yolo import yolo_eval, yolo_head - -parser = argparse.ArgumentParser( - description='Run a YOLO_v2 style detection model on test images..') -parser.add_argument( - 'model_path', - help='path to h5 model file containing body' - 'of a YOLO_v2 model') -parser.add_argument( - '-a', - '--anchors_path', - help='path to anchors file, defaults to yolo_anchors.txt', - default='model_data/yolo_anchors.txt') -parser.add_argument( - '-c', - '--classes_path', - help='path to classes file, defaults to coco_classes.txt', - default='model_data/coco_classes.txt') -parser.add_argument( - '-t', - '--test_path', - help='path to directory of test images, defaults to images/', - default='images') -parser.add_argument( - '-o', - '--output_path', - help='path to output test images, defaults to images/out', - default='images/out') -parser.add_argument( - '-s', - '--score_threshold', - type=float, - help='threshold for bounding box scores, default .3', - default=.3) -parser.add_argument( - '-iou', - '--iou_threshold', - type=float, - help='threshold for non max suppression IOU, default .5', - default=.5) - - -def _main(args): - model_path = os.path.expanduser(args.model_path) - assert model_path.endswith('.h5'), 'Keras model must be a .h5 file.' - anchors_path = os.path.expanduser(args.anchors_path) - classes_path = os.path.expanduser(args.classes_path) - test_path = os.path.expanduser(args.test_path) - output_path = os.path.expanduser(args.output_path) - - if not os.path.exists(output_path): - print('Creating output path {}'.format(output_path)) - os.mkdir(output_path) - - sess = K.get_session() # TODO: Remove dependence on Tensorflow session. - - with open(classes_path) as f: - class_names = f.readlines() - class_names = [c.strip() for c in class_names] - - with open(anchors_path) as f: - anchors = f.readline() - anchors = [float(x) for x in anchors.split(',')] - anchors = np.array(anchors).reshape(-1, 2) - - yolo_model = load_model(model_path) - - # Verify model, anchors, and classes are compatible - num_classes = len(class_names) - num_anchors = len(anchors) - # TODO: Assumes dim ordering is channel last - model_output_channels = yolo_model.layers[-1].output_shape[-1] - assert model_output_channels == num_anchors * (num_classes + 5), \ - 'Mismatch between model and given anchor and class sizes. ' \ - 'Specify matching anchors and classes with --anchors_path and ' \ - '--classes_path flags.' - print('{} model, anchors, and classes loaded.'.format(model_path)) - - # Check if model is fully convolutional, assuming channel last order. - model_image_size = yolo_model.layers[0].input_shape[1:3] - is_fixed_size = model_image_size != (None, None) - - # Generate colors for drawing bounding boxes. - hsv_tuples = [(x / len(class_names), 1., 1.) - for x in range(len(class_names))] - colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) - colors = list( - map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), - colors)) - random.seed(10101) # Fixed seed for consistent colors across runs. - random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. - random.seed(None) # Reset seed to default. - - # Generate output tensor targets for filtered bounding boxes. - # TODO: Wrap these backend operations with Keras layers. - yolo_outputs = yolo_head(yolo_model.output, anchors, len(class_names)) - input_image_shape = K.placeholder(shape=(2, )) - boxes, scores, classes = yolo_eval( - yolo_outputs, - input_image_shape, - score_threshold=args.score_threshold, - iou_threshold=args.iou_threshold) - - for image_file in os.listdir(test_path): - try: - image_type = imghdr.what(os.path.join(test_path, image_file)) - if not image_type: - continue - except IsADirectoryError: - continue - - image = Image.open(os.path.join(test_path, image_file)) - if is_fixed_size: # TODO: When resizing we can use minibatch input. - resized_image = image.resize( - tuple(reversed(model_image_size)), Image.BICUBIC) - image_data = np.array(resized_image, dtype='float32') - else: - # Due to skip connection + max pooling in YOLO_v2, inputs must have - # width and height as multiples of 32. - new_image_size = (image.width - (image.width % 32), - image.height - (image.height % 32)) - resized_image = image.resize(new_image_size, Image.BICUBIC) - image_data = np.array(resized_image, dtype='float32') - print(image_data.shape) - - image_data /= 255. - image_data = np.expand_dims(image_data, 0) # Add batch dimension. - - out_boxes, out_scores, out_classes = sess.run( - [boxes, scores, classes], - feed_dict={ - yolo_model.input: image_data, - input_image_shape: [image.size[1], image.size[0]], - K.learning_phase(): 0 - }) - print('Found {} boxes for {}'.format(len(out_boxes), image_file)) - - font = ImageFont.truetype( - font='font/FiraMono-Medium.otf', - size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) - thickness = (image.size[0] + image.size[1]) // 300 - - for i, c in reversed(list(enumerate(out_classes))): - predicted_class = class_names[c] - box = out_boxes[i] - score = out_scores[i] - - label = '{} {:.2f}'.format(predicted_class, score) - - draw = ImageDraw.Draw(image) - label_size = draw.textsize(label, font) - - top, left, bottom, right = box - top = max(0, np.floor(top + 0.5).astype('int32')) - left = max(0, np.floor(left + 0.5).astype('int32')) - bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) - right = min(image.size[0], np.floor(right + 0.5).astype('int32')) - print(label, (left, top), (right, bottom)) - - if top - label_size[1] >= 0: - text_origin = np.array([left, top - label_size[1]]) - else: - text_origin = np.array([left, top + 1]) - - # My kingdom for a good redistributable image drawing library. - for i in range(thickness): - draw.rectangle( - [left + i, top + i, right - i, bottom - i], - outline=colors[c]) - draw.rectangle( - [tuple(text_origin), tuple(text_origin + label_size)], - fill=colors[c]) - draw.text(text_origin, label, fill=(0, 0, 0), font=font) - del draw - - image.save(os.path.join(output_path, image_file), quality=90) - sess.close() - - -if __name__ == '__main__': - _main(parser.parse_args()) -#! /usr/bin/env python -"""Overfit a YOLO_v2 model to a single image from the Pascal VOC dataset. - -This is a sample training script used to test the implementation of the -YOLO localization loss function. -""" -import argparse -import io -import os - -import h5py -import matplotlib.pyplot as plt -import numpy as np -import PIL -import tensorflow as tf -from keras import backend as K -from keras.layers import Input, Lambda -from keras.models import Model - -from yad2k.models.keras_yolo import (preprocess_true_boxes, yolo_body, - yolo_eval, yolo_head, yolo_loss) -from yad2k.utils.draw_boxes import draw_boxes - -YOLO_ANCHORS = np.array( - ((0.57273, 0.677385), (1.87446, 2.06253), (3.33843, 5.47434), - (7.88282, 3.52778), (9.77052, 9.16828))) - -argparser = argparse.ArgumentParser( - description='Train YOLO_v2 model to overfit on a single image.') - -argparser.add_argument( - '-d', - '--data_path', - help='path to HDF5 file containing pascal voc dataset', - default='~/datasets/VOCdevkit/pascal_voc_07_12.hdf5') - -argparser.add_argument( - '-a', - '--anchors_path', - help='path to anchors file, defaults to yolo_anchors.txt', - default='model_data/yolo_anchors.txt') - -argparser.add_argument( - '-c', - '--classes_path', - help='path to classes file, defaults to pascal_classes.txt', - default='model_data/pascal_classes.txt') - - -def _main(args): - voc_path = os.path.expanduser(args.data_path) - classes_path = os.path.expanduser(args.classes_path) - anchors_path = os.path.expanduser(args.anchors_path) - - with open(classes_path) as f: - class_names = f.readlines() - class_names = [c.strip() for c in class_names] - - if os.path.isfile(anchors_path): - with open(anchors_path) as f: - anchors = f.readline() - anchors = [float(x) for x in anchors.split(',')] - anchors = np.array(anchors).reshape(-1, 2) - else: - anchors = YOLO_ANCHORS - - voc = h5py.File(voc_path, 'r') - image = PIL.Image.open(io.BytesIO(voc['train/images'][28])) - orig_size = np.array([image.width, image.height]) - orig_size = np.expand_dims(orig_size, axis=0) - - # Image preprocessing. - image = image.resize((416, 416), PIL.Image.BICUBIC) - image_data = np.array(image, dtype=np.float) - image_data /= 255. - - # Box preprocessing. - # Original boxes stored as 1D list of class, x_min, y_min, x_max, y_max. - boxes = voc['train/boxes'][28] - boxes = boxes.reshape((-1, 5)) - # Get extents as y_min, x_min, y_max, x_max, class for comparision with - # model output. - boxes_extents = boxes[:, [2, 1, 4, 3, 0]] - - # Get box parameters as x_center, y_center, box_width, box_height, class. - boxes_xy = 0.5 * (boxes[:, 3:5] + boxes[:, 1:3]) - boxes_wh = boxes[:, 3:5] - boxes[:, 1:3] - boxes_xy = boxes_xy / orig_size - boxes_wh = boxes_wh / orig_size - boxes = np.concatenate((boxes_xy, boxes_wh, boxes[:, 0:1]), axis=1) - - # Precompute detectors_mask and matching_true_boxes for training. - # Detectors mask is 1 for each spatial position in the final conv layer and - # anchor that should be active for the given boxes and 0 otherwise. - # Matching true boxes gives the regression targets for the ground truth box - # that caused a detector to be active or 0 otherwise. - detectors_mask_shape = (13, 13, 5, 1) - matching_boxes_shape = (13, 13, 5, 5) - detectors_mask, matching_true_boxes = preprocess_true_boxes(boxes, anchors, - [416, 416]) - - # Create model input layers. - image_input = Input(shape=(416, 416, 3)) - boxes_input = Input(shape=(None, 5)) - detectors_mask_input = Input(shape=detectors_mask_shape) - matching_boxes_input = Input(shape=matching_boxes_shape) - - print('Boxes:') - print(boxes) - print('Box corners:') - print(boxes_extents) - print('Active detectors:') - print(np.where(detectors_mask == 1)[:-1]) - print('Matching boxes for active detectors:') - print(matching_true_boxes[np.where(detectors_mask == 1)[:-1]]) - - # Create model body. - model_body = yolo_body(image_input, len(anchors), len(class_names)) - model_body = Model(image_input, model_body.output) - # Place model loss on CPU to reduce GPU memory usage. - with tf.device('/cpu:0'): - # TODO: Replace Lambda with custom Keras layer for loss. - model_loss = Lambda( - yolo_loss, - output_shape=(1, ), - name='yolo_loss', - arguments={'anchors': anchors, - 'num_classes': len(class_names)})([ - model_body.output, boxes_input, - detectors_mask_input, matching_boxes_input - ]) - model = Model( - [image_input, boxes_input, detectors_mask_input, - matching_boxes_input], model_loss) - model.compile( - optimizer='adam', loss={ - 'yolo_loss': lambda y_true, y_pred: y_pred - }) # This is a hack to use the custom loss function in the last layer. - - # Add batch dimension for training. - image_data = np.expand_dims(image_data, axis=0) - boxes = np.expand_dims(boxes, axis=0) - detectors_mask = np.expand_dims(detectors_mask, axis=0) - matching_true_boxes = np.expand_dims(matching_true_boxes, axis=0) - - num_steps = 1000 - # TODO: For full training, put preprocessing inside training loop. - # for i in range(num_steps): - # loss = model.train_on_batch( - # [image_data, boxes, detectors_mask, matching_true_boxes], - # np.zeros(len(image_data))) - model.fit([image_data, boxes, detectors_mask, matching_true_boxes], - np.zeros(len(image_data)), - batch_size=1, - epochs=num_steps) - model.save_weights('overfit_weights.h5') - - # Create output variables for prediction. - yolo_outputs = yolo_head(model_body.output, anchors, len(class_names)) - input_image_shape = K.placeholder(shape=(2, )) - boxes, scores, classes = yolo_eval( - yolo_outputs, input_image_shape, score_threshold=.3, iou_threshold=.9) - - # Run prediction on overfit image. - sess = K.get_session() # TODO: Remove dependence on Tensorflow session. - out_boxes, out_scores, out_classes = sess.run( - [boxes, scores, classes], - feed_dict={ - model_body.input: image_data, - input_image_shape: [image.size[1], image.size[0]], - K.learning_phase(): 0 - }) - print('Found {} boxes for image.'.format(len(out_boxes))) - print(out_boxes) - - # Plot image with predicted boxes. - image_with_boxes = draw_boxes(image_data[0], out_boxes, out_classes, - class_names, out_scores) - plt.imshow(image_with_boxes, interpolation='nearest') - plt.show() - - -if __name__ == '__main__': - args = argparser.parse_args() - _main(args) -#! /usr/bin/env python -""" -Reads Darknet19 config and weights and creates Keras model with TF backend. - -Currently only supports layers in Darknet19 config. -""" - -import argparse -import configparser -import io -import os -from collections import defaultdict - -import numpy as np -from keras import backend as K -from keras.layers import (Conv2D, GlobalAveragePooling2D, Input, Lambda, - MaxPooling2D) -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.merge import concatenate -from keras.layers.normalization import BatchNormalization -from keras.models import Model -from keras.regularizers import l2 -from keras.utils.vis_utils import plot_model as plot - -from yad2k.models.keras_yolo import (space_to_depth_x2, - space_to_depth_x2_output_shape) - -parser = argparse.ArgumentParser( - description='Yet Another Darknet To Keras Converter.') -parser.add_argument('config_path', help='Path to Darknet cfg file.') -parser.add_argument('weights_path', help='Path to Darknet weights file.') -parser.add_argument('output_path', help='Path to output Keras model file.') -parser.add_argument( - '-p', - '--plot_model', - help='Plot generated Keras model and save as image.', - action='store_true') -parser.add_argument( - '-flcl', - '--fully_convolutional', - help='Model is fully convolutional so set input shape to (None, None, 3). ' - 'WARNING: This experimental option does not work properly for YOLO_v2.', - action='store_true') - - -def unique_config_sections(config_file): - """Convert all config sections to have unique names. - - Adds unique suffixes to config sections for compability with configparser. - """ - section_counters = defaultdict(int) - output_stream = io.StringIO() - with open(config_file) as fin: - for line in fin: - if line.startswith('['): - section = line.strip().strip('[]') - _section = section + '_' + str(section_counters[section]) - section_counters[section] += 1 - line = line.replace(section, _section) - output_stream.write(line) - output_stream.seek(0) - return output_stream - - -# %% -def _main(args): - config_path = os.path.expanduser(args.config_path) - weights_path = os.path.expanduser(args.weights_path) - assert config_path.endswith('.cfg'), '{} is not a .cfg file'.format( - config_path) - assert weights_path.endswith( - '.weights'), '{} is not a .weights file'.format(weights_path) - - output_path = os.path.expanduser(args.output_path) - assert output_path.endswith( - '.h5'), 'output path {} is not a .h5 file'.format(output_path) - output_root = os.path.splitext(output_path)[0] - - # Load weights and config. - print('Loading weights.') - weights_file = open(weights_path, 'rb') - weights_header = np.ndarray( - shape=(4, ), dtype='int32', buffer=weights_file.read(16)) - print('Weights Header: ', weights_header) - # TODO: Check transpose flag when implementing fully connected layers. - # transpose = (weight_header[0] > 1000) or (weight_header[1] > 1000) - - print('Parsing Darknet config.') - unique_config_file = unique_config_sections(config_path) - cfg_parser = configparser.ConfigParser() - cfg_parser.read_file(unique_config_file) - - print('Creating Keras model.') - if args.fully_convolutional: - image_height, image_width = None, None - else: - image_height = int(cfg_parser['net_0']['height']) - image_width = int(cfg_parser['net_0']['width']) - prev_layer = Input(shape=(image_height, image_width, 3)) - all_layers = [prev_layer] - - weight_decay = float(cfg_parser['net_0']['decay'] - ) if 'net_0' in cfg_parser.sections() else 5e-4 - count = 0 - for section in cfg_parser.sections(): - print('Parsing section {}'.format(section)) - if section.startswith('convolutional'): - filters = int(cfg_parser[section]['filters']) - size = int(cfg_parser[section]['size']) - stride = int(cfg_parser[section]['stride']) - pad = int(cfg_parser[section]['pad']) - activation = cfg_parser[section]['activation'] - batch_normalize = 'batch_normalize' in cfg_parser[section] - - # padding='same' is equivalent to Darknet pad=1 - padding = 'same' if pad == 1 else 'valid' - - # Setting weights. - # Darknet serializes convolutional weights as: - # [bias/beta, [gamma, mean, variance], conv_weights] - prev_layer_shape = K.int_shape(prev_layer) - - # TODO: This assumes channel last dim_ordering. - weights_shape = (size, size, prev_layer_shape[-1], filters) - darknet_w_shape = (filters, weights_shape[2], size, size) - weights_size = np.product(weights_shape) - - print('conv2d', 'bn' - if batch_normalize else ' ', activation, weights_shape) - - conv_bias = np.ndarray( - shape=(filters, ), - dtype='float32', - buffer=weights_file.read(filters * 4)) - count += filters - - if batch_normalize: - bn_weights = np.ndarray( - shape=(3, filters), - dtype='float32', - buffer=weights_file.read(filters * 12)) - count += 3 * filters - - # TODO: Keras BatchNormalization mistakenly refers to var - # as std. - bn_weight_list = [ - bn_weights[0], # scale gamma - conv_bias, # shift beta - bn_weights[1], # running mean - bn_weights[2] # running var - ] - - conv_weights = np.ndarray( - shape=darknet_w_shape, - dtype='float32', - buffer=weights_file.read(weights_size * 4)) - count += weights_size - - # DarkNet conv_weights are serialized Caffe-style: - # (out_dim, in_dim, height, width) - # We would like to set these to Tensorflow order: - # (height, width, in_dim, out_dim) - # TODO: Add check for Theano dim ordering. - conv_weights = np.transpose(conv_weights, [2, 3, 1, 0]) - conv_weights = [conv_weights] if batch_normalize else [ - conv_weights, conv_bias - ] - - # Handle activation. - act_fn = None - if activation == 'leaky': - pass # Add advanced activation later. - elif activation != 'linear': - raise ValueError( - 'Unknown activation function `{}` in section {}'.format( - activation, section)) - - # Create Conv2D layer - conv_layer = (Conv2D( - filters, (size, size), - strides=(stride, stride), - kernel_regularizer=l2(weight_decay), - use_bias=not batch_normalize, - weights=conv_weights, - activation=act_fn, - padding=padding))(prev_layer) - - if batch_normalize: - conv_layer = (BatchNormalization( - weights=bn_weight_list))(conv_layer) - prev_layer = conv_layer - - if activation == 'linear': - all_layers.append(prev_layer) - elif activation == 'leaky': - act_layer = LeakyReLU(alpha=0.1)(prev_layer) - prev_layer = act_layer - all_layers.append(act_layer) - - elif section.startswith('maxpool'): - size = int(cfg_parser[section]['size']) - stride = int(cfg_parser[section]['stride']) - all_layers.append( - MaxPooling2D( - padding='same', - pool_size=(size, size), - strides=(stride, stride))(prev_layer)) - prev_layer = all_layers[-1] - - elif section.startswith('avgpool'): - if cfg_parser.items(section) != []: - raise ValueError('{} with params unsupported.'.format(section)) - all_layers.append(GlobalAveragePooling2D()(prev_layer)) - prev_layer = all_layers[-1] - - elif section.startswith('route'): - ids = [int(i) for i in cfg_parser[section]['layers'].split(',')] - layers = [all_layers[i] for i in ids] - if len(layers) > 1: - print('Concatenating route layers:', layers) - concatenate_layer = concatenate(layers) - all_layers.append(concatenate_layer) - prev_layer = concatenate_layer - else: - skip_layer = layers[0] # only one layer to route - all_layers.append(skip_layer) - prev_layer = skip_layer - - elif section.startswith('reorg'): - block_size = int(cfg_parser[section]['stride']) - assert block_size == 2, 'Only reorg with stride 2 supported.' - all_layers.append( - Lambda( - space_to_depth_x2, - output_shape=space_to_depth_x2_output_shape, - name='space_to_depth_x2')(prev_layer)) - prev_layer = all_layers[-1] - - elif section.startswith('region'): - with open('{}_anchors.txt'.format(output_root), 'w') as f: - print(cfg_parser[section]['anchors'], file=f) - - elif (section.startswith('net') or section.startswith('cost') or - section.startswith('softmax')): - pass # Configs not currently handled during model definition. - - else: - raise ValueError( - 'Unsupported section header type: {}'.format(section)) - - # Create and save model. - model = Model(inputs=all_layers[0], outputs=all_layers[-1]) - print(model.summary()) - model.save('{}'.format(output_path)) - print('Saved Keras model to {}'.format(output_path)) - # Check to see if all weights have been read. - remaining_weights = len(weights_file.read()) / 4 - weights_file.close() - print('Read {} of {} from Darknet weights.'.format(count, count + - remaining_weights)) - if remaining_weights > 0: - print('Warning: {} unused weights'.format(remaining_weights)) - - if args.plot_model: - plot(model, to_file='{}.png'.format(output_root), show_shapes=True) - print('Saved model plot to {}.png'.format(output_root)) - - -if __name__ == '__main__': - _main(parser.parse_args()) -""" -Convert Pascal VOC 2007+2012 detection dataset to HDF5. - -Does not preserve full XML annotations. -Combines all VOC subsets (train, val test) with VOC2012 train for full -training set as done in Faster R-CNN paper. - -Code based on: -https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py -""" - -import argparse -import os -import xml.etree.ElementTree as ElementTree - -import h5py -import numpy as np - -sets_from_2007 = [('2007', 'train'), ('2007', 'val')] -train_set = [('2012', 'train')] -val_set = [('2012', 'val')] -test_set = [('2007', 'test')] - -classes = [ - "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", - "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", - "pottedplant", "sheep", "sofa", "train", "tvmonitor" -] - -parser = argparse.ArgumentParser( - description='Convert Pascal VOC 2007+2012 detection dataset to HDF5.') -parser.add_argument( - '-p', - '--path_to_voc', - help='path to VOCdevkit directory', - default='~/data/PascalVOC/VOCdevkit') - - -def get_boxes_for_id(voc_path, year, image_id): - """Get object bounding boxes annotations for given image. - - Parameters - ---------- - voc_path : str - Path to VOCdevkit directory. - year : str - Year of dataset containing image. Either '2007' or '2012'. - image_id : str - Pascal VOC identifier for given image. - - Returns - ------- - boxes : array of int - bounding box annotations of class label, xmin, ymin, xmax, ymax as a - 5xN array. - """ - fname = os.path.join(voc_path, 'VOC{}/Annotations/{}.xml'.format(year, - image_id)) - with open(fname) as in_file: - xml_tree = ElementTree.parse(in_file) - root = xml_tree.getroot() - boxes = [] - for obj in root.iter('object'): - difficult = obj.find('difficult').text - label = obj.find('name').text - if label not in classes or int( - difficult) == 1: # exclude difficult or unlisted classes - continue - xml_box = obj.find('bndbox') - bbox = (classes.index(label), int(xml_box.find('xmin').text), - int(xml_box.find('ymin').text), int(xml_box.find('xmax').text), - int(xml_box.find('ymax').text)) - boxes.extend(bbox) - return np.array( - boxes) # .T # return transpose so last dimension is variable length - - -def get_image_for_id(voc_path, year, image_id): - """Get image data as uint8 array for given image. - - Parameters - ---------- - voc_path : str - Path to VOCdevkit directory. - year : str - Year of dataset containing image. Either '2007' or '2012'. - image_id : str - Pascal VOC identifier for given image. - - Returns - ------- - image_data : array of uint8 - Compressed JPEG byte string represented as array of uint8. - """ - fname = os.path.join(voc_path, 'VOC{}/JPEGImages/{}.jpg'.format(year, - image_id)) - with open(fname, 'rb') as in_file: - data = in_file.read() - # Use of encoding based on: https://github.com/h5py/h5py/issues/745 - return np.fromstring(data, dtype='uint8') - - -def get_ids(voc_path, datasets): - """Get image identifiers for corresponding list of dataset identifies. - - Parameters - ---------- - voc_path : str - Path to VOCdevkit directory. - datasets : list of str tuples - List of dataset identifiers in the form of (year, dataset) pairs. - - Returns - ------- - ids : list of str - List of all image identifiers for given datasets. - """ - ids = [] - for year, image_set in datasets: - id_file = os.path.join(voc_path, 'VOC{}/ImageSets/Main/{}.txt'.format( - year, image_set)) - with open(id_file, 'r') as image_ids: - ids.extend(map(str.strip, image_ids.readlines())) - return ids - - -def add_to_dataset(voc_path, year, ids, images, boxes, start=0): - """Process all given ids and adds them to given datasets.""" - for i, voc_id in enumerate(ids): - image_data = get_image_for_id(voc_path, year, voc_id) - image_boxes = get_boxes_for_id(voc_path, year, voc_id) - images[start + i] = image_data - boxes[start + i] = image_boxes - return i - - -def _main(args): - voc_path = os.path.expanduser(args.path_to_voc) - train_ids = get_ids(voc_path, train_set) - val_ids = get_ids(voc_path, val_set) - test_ids = get_ids(voc_path, test_set) - train_ids_2007 = get_ids(voc_path, sets_from_2007) - total_train_ids = len(train_ids) + len(train_ids_2007) - - # Create HDF5 dataset structure - print('Creating HDF5 dataset structure.') - fname = os.path.join(voc_path, 'pascal_voc_07_12.hdf5') - voc_h5file = h5py.File(fname, 'w') - uint8_dt = h5py.special_dtype( - vlen=np.dtype('uint8')) # variable length uint8 - vlen_int_dt = h5py.special_dtype( - vlen=np.dtype(int)) # variable length default int - train_group = voc_h5file.create_group('train') - val_group = voc_h5file.create_group('val') - test_group = voc_h5file.create_group('test') - - # store class list for reference class ids as csv fixed-length numpy string - voc_h5file.attrs['classes'] = np.string_(str.join(',', classes)) - - # store images as variable length uint8 arrays - train_images = train_group.create_dataset( - 'images', shape=(total_train_ids, ), dtype=uint8_dt) - val_images = val_group.create_dataset( - 'images', shape=(len(val_ids), ), dtype=uint8_dt) - test_images = test_group.create_dataset( - 'images', shape=(len(test_ids), ), dtype=uint8_dt) - - # store boxes as class_id, xmin, ymin, xmax, ymax - train_boxes = train_group.create_dataset( - 'boxes', shape=(total_train_ids, ), dtype=vlen_int_dt) - val_boxes = val_group.create_dataset( - 'boxes', shape=(len(val_ids), ), dtype=vlen_int_dt) - test_boxes = test_group.create_dataset( - 'boxes', shape=(len(test_ids), ), dtype=vlen_int_dt) - - # process all ids and add to datasets - print('Processing Pascal VOC 2007 datasets for training set.') - last_2007 = add_to_dataset(voc_path, '2007', train_ids_2007, train_images, - train_boxes) - print('Processing Pascal VOC 2012 training set.') - add_to_dataset( - voc_path, - '2012', - train_ids, - train_images, - train_boxes, - start=last_2007 + 1) - print('Processing Pascal VOC 2012 val set.') - add_to_dataset(voc_path, '2012', val_ids, val_images, val_boxes) - print('Processing Pascal VOC 2007 test set.') - add_to_dataset(voc_path, '2007', test_ids, test_images, test_boxes) - - print('Closing HDF5 file.') - voc_h5file.close() - print('Done.') - - -if __name__ == '__main__': - _main(parser.parse_args()) -"""Convert Pascal VOC 2007+2012 detection dataset to TFRecords. -Does not preserve full XML annotations. -Combines all VOC 2007 subsets (train, val) with VOC2012 for training. -Uses VOC2012 val for val and VOC2007 test for test. - -Code based on: -https://github.com/pjreddie/darknet/blob/master/scripts/voc_label.py -https://github.com/tensorflow/models/blob/master/inception/inception/data/build_image_data.py -""" - -import argparse -import os -import xml.etree.ElementTree as ElementTree -from datetime import datetime - -import numpy as np -import tensorflow as tf - -from voc_to_hdf5 import get_ids - -sets_from_2007 = [('2007', 'train'), ('2007', 'val')] -train_set = [('2012', 'train'), ('2012', 'val')] -test_set = [('2007', 'test')] - -classes = [ - "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", - "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", - "pottedplant", "sheep", "sofa", "train", "tvmonitor" -] - -parser = argparse.ArgumentParser( - description='Convert Pascal VOC 2007+2012 detection dataset to TFRecords.') -parser.add_argument( - '-p', - '--path_to_voc', - help='path to Pascal VOC dataset', - default='~/data/PascalVOC/VOCdevkit') - -# Small graph for image decoding -decoder_sess = tf.Session() -image_placeholder = tf.placeholder(dtype=tf.string) -decoded_jpeg = tf.image.decode_jpeg(image_placeholder, channels=3) - - -def process_image(image_path): - """Decode image at given path.""" - with open(image_path, 'rb') as f: - image_data = f.read() - image = decoder_sess.run(decoded_jpeg, - feed_dict={image_placeholder: image_data}) - assert len(image.shape) == 3 - height = image.shape[0] - width = image.shape[2] - assert image.shape[2] == 3 - return image_data, height, width - - -def process_anno(anno_path): - """Process Pascal VOC annotations.""" - with open(anno_path) as f: - xml_tree = ElementTree.parse(f) - root = xml_tree.getroot() - size = root.find('size') - height = float(size.find('height').text) - width = float(size.find('width').text) - boxes = [] - for obj in root.iter('object'): - difficult = obj.find('difficult').text - label = obj.find('name').text - if label not in classes or int( - difficult) == 1: # exclude difficult or unlisted classes - continue - xml_box = obj.find('bndbox') - bbox = { - 'class': classes.index(label), - 'y_min': float(xml_box.find('ymin').text) / height, - 'x_min': float(xml_box.find('xmin').text) / width, - 'y_max': float(xml_box.find('ymax').text) / height, - 'x_max': float(xml_box.find('xmax').text) / width - } - boxes.append(bbox) - return boxes - - -def convert_to_example(image_data, boxes, filename, height, width): - """Convert Pascal VOC ground truth to TFExample protobuf. - - Parameters - ---------- - image_data : bytes - Encoded image bytes. - boxes : dict - Bounding box corners and class labels - filename : string - Path to image file. - height : int - Image height. - width : int - Image width. - - Returns - ------- - example : protobuf - Tensorflow Example protobuf containing image and bounding boxes. - """ - box_classes = [b['class'] for b in boxes] - box_ymin = [b['y_min'] for b in boxes] - box_xmin = [b['x_min'] for b in boxes] - box_ymax = [b['y_max'] for b in boxes] - box_xmax = [b['x_max'] for b in boxes] - encoded_image = [tf.compat.as_bytes(image_data)] - base_name = [tf.compat.as_bytes(os.path.basename(filename))] - - example = tf.train.Example(features=tf.train.Features(feature={ - 'filename': - tf.train.Feature(bytes_list=tf.train.BytesList(value=base_name)), - 'height': - tf.train.Feature(int64_list=tf.train.Int64List(value=[height])), - 'width': - tf.train.Feature(int64_list=tf.train.Int64List(value=[width])), - 'classes': - tf.train.Feature(int64_list=tf.train.Int64List(value=box_classes)), - 'y_mins': - tf.train.Feature(float_list=tf.train.FloatList(value=box_ymin)), - 'x_mins': - tf.train.Feature(float_list=tf.train.FloatList(value=box_xmin)), - 'y_maxes': - tf.train.Feature(float_list=tf.train.FloatList(value=box_ymax)), - 'x_maxes': - tf.train.Feature(float_list=tf.train.FloatList(value=box_xmax)), - 'encoded': - tf.train.Feature(bytes_list=tf.train.BytesList(value=encoded_image)) - })) - return example - - -def get_image_path(voc_path, year, image_id): - """Get path to image for given year and image id.""" - return os.path.join(voc_path, 'VOC{}/JPEGImages/{}.jpg'.format(year, - image_id)) - - -def get_anno_path(voc_path, year, image_id): - """Get path to image annotation for given year and image id.""" - return os.path.join(voc_path, 'VOC{}/Annotations/{}.xml'.format(year, - image_id)) - - -def process_dataset(name, image_paths, anno_paths, result_path, num_shards): - """Process selected Pascal VOC dataset to generate TFRecords files. - - Parameters - ---------- - name : string - Name of resulting dataset 'train' or 'test'. - image_paths : list - List of paths to images to include in dataset. - anno_paths : list - List of paths to corresponding image annotations. - result_path : string - Path to put resulting TFRecord files. - num_shards : int - Number of shards to split TFRecord files into. - """ - shard_ranges = np.linspace(0, len(image_paths), num_shards + 1).astype(int) - counter = 0 - for shard in range(num_shards): - # Generate shard file name - output_filename = '{}-{:05d}-of-{:05d}'.format(name, shard, num_shards) - output_file = os.path.join(result_path, output_filename) - writer = tf.python_io.TFRecordWriter(output_file) - - shard_counter = 0 - files_in_shard = range(shard_ranges[shard], shard_ranges[shard + 1]) - for i in files_in_shard: - image_file = image_paths[i] - anno_file = anno_paths[i] - - # processes image + anno - image_data, height, width = process_image(image_file) - boxes = process_anno(anno_file) - - # convert to example - example = convert_to_example(image_data, boxes, image_file, height, - width) - - # write to writer - writer.write(example.SerializeToString()) - - shard_counter += 1 - counter += 1 - - if not counter % 1000: - print('{} : Processed {:d} of {:d} images.'.format( - datetime.now(), counter, len(image_paths))) - writer.close() - print('{} : Wrote {} images to {}'.format( - datetime.now(), shard_counter, output_filename)) - - print('{} : Wrote {} images to {} shards'.format(datetime.now(), counter, - num_shards)) - - -def _main(args): - """Locate files for train and test sets and then generate TFRecords.""" - voc_path = args.path_to_voc - voc_path = os.path.expanduser(voc_path) - result_path = os.path.join(voc_path, 'TFRecords') - print('Saving results to {}'.format(result_path)) - - train_path = os.path.join(result_path, 'train') - test_path = os.path.join(result_path, 'test') - - train_ids = get_ids(voc_path, train_set) # 2012 trainval - test_ids = get_ids(voc_path, test_set) # 2007 test - train_ids_2007 = get_ids(voc_path, sets_from_2007) # 2007 trainval - total_train_ids = len(train_ids) + len(train_ids_2007) - print('{} train examples and {} test examples'.format(total_train_ids, - len(test_ids))) - - train_image_paths = [ - get_image_path(voc_path, '2012', i) for i in train_ids - ] - train_image_paths.extend( - [get_image_path(voc_path, '2007', i) for i in train_ids_2007]) - test_image_paths = [get_image_path(voc_path, '2007', i) for i in test_ids] - - train_anno_paths = [get_anno_path(voc_path, '2012', i) for i in train_ids] - train_anno_paths.extend( - [get_anno_path(voc_path, '2007', i) for i in train_ids_2007]) - test_anno_paths = [get_anno_path(voc_path, '2007', i) for i in test_ids] - - process_dataset( - 'train', - train_image_paths, - train_anno_paths, - train_path, - num_shards=60) - process_dataset( - 'test', test_image_paths, test_anno_paths, test_path, num_shards=20) - - -if __name__ == '__main__': - _main(parser.parse_args(args)) -"""Darknet19 Model Defined in Keras.""" -import functools -from functools import partial - -from keras.layers import Conv2D, MaxPooling2D -from keras.layers.advanced_activations import LeakyReLU -from keras.layers.normalization import BatchNormalization -from keras.models import Model -from keras.regularizers import l2 - -from ..utils import compose - -# Partial wrapper for Convolution2D with static default argument. -_DarknetConv2D = partial(Conv2D, padding='same') - - -@functools.wraps(Conv2D) -def DarknetConv2D(*args, **kwargs): - """Wrapper to set Darknet weight regularizer for Convolution2D.""" - darknet_conv_kwargs = {'kernel_regularizer': l2(5e-4)} - darknet_conv_kwargs.update(kwargs) - return _DarknetConv2D(*args, **darknet_conv_kwargs) - - -def DarknetConv2D_BN_Leaky(*args, **kwargs): - """Darknet Convolution2D followed by BatchNormalization and LeakyReLU.""" - no_bias_kwargs = {'use_bias': False} - no_bias_kwargs.update(kwargs) - return compose( - DarknetConv2D(*args, **no_bias_kwargs), - BatchNormalization(), - LeakyReLU(alpha=0.1)) - - -def bottleneck_block(outer_filters, bottleneck_filters): - """Bottleneck block of 3x3, 1x1, 3x3 convolutions.""" - return compose( - DarknetConv2D_BN_Leaky(outer_filters, (3, 3)), - DarknetConv2D_BN_Leaky(bottleneck_filters, (1, 1)), - DarknetConv2D_BN_Leaky(outer_filters, (3, 3))) - - -def bottleneck_x2_block(outer_filters, bottleneck_filters): - """Bottleneck block of 3x3, 1x1, 3x3, 1x1, 3x3 convolutions.""" - return compose( - bottleneck_block(outer_filters, bottleneck_filters), - DarknetConv2D_BN_Leaky(bottleneck_filters, (1, 1)), - DarknetConv2D_BN_Leaky(outer_filters, (3, 3))) - - -def darknet_body(): - """Generate first 18 conv layers of Darknet-19.""" - return compose( - DarknetConv2D_BN_Leaky(32, (3, 3)), - MaxPooling2D(), - DarknetConv2D_BN_Leaky(64, (3, 3)), - MaxPooling2D(), - bottleneck_block(128, 64), - MaxPooling2D(), - bottleneck_block(256, 128), - MaxPooling2D(), - bottleneck_x2_block(512, 256), - MaxPooling2D(), - bottleneck_x2_block(1024, 512)) - - -def darknet19(inputs): - """Generate Darknet-19 model for Imagenet classification.""" - body = darknet_body()(inputs) - logits = DarknetConv2D(1000, (1, 1), activation='softmax')(body) - return Model(inputs, logits) -"""YOLO_v2 Model Defined in Keras.""" -import sys - -import numpy as np -import tensorflow as tf -from keras import backend as K -from keras.layers import Lambda -from keras.layers.merge import concatenate -from keras.models import Model - -from ..utils import compose -from .keras_darknet19 import (DarknetConv2D, DarknetConv2D_BN_Leaky, - darknet_body) - -sys.path.append('..') - -voc_anchors = np.array( - [[1.08, 1.19], [3.42, 4.41], [6.63, 11.38], [9.42, 5.11], [16.62, 10.52]]) - -voc_classes = [ - "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", - "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", - "pottedplant", "sheep", "sofa", "train", "tvmonitor" -] - - -def space_to_depth_x2(x): - """Thin wrapper for Tensorflow space_to_depth with block_size=2.""" - # Import currently required to make Lambda work. - # See: https://github.com/fchollet/keras/issues/5088#issuecomment-273851273 - import tensorflow as tf - return tf.space_to_depth(x, block_size=2) - - -def space_to_depth_x2_output_shape(input_shape): - """Determine space_to_depth output shape for block_size=2. - - Note: For Lambda with TensorFlow backend, output shape may not be needed. - """ - return (input_shape[0], input_shape[1] // 2, input_shape[2] // 2, 4 * - input_shape[3]) if input_shape[1] else (input_shape[0], None, None, - 4 * input_shape[3]) - - -def yolo_body(inputs, num_anchors, num_classes): - """Create YOLO_V2 model CNN body in Keras.""" - darknet = Model(inputs, darknet_body()(inputs)) - conv20 = compose( - DarknetConv2D_BN_Leaky(1024, (3, 3)), - DarknetConv2D_BN_Leaky(1024, (3, 3)))(darknet.output) - - conv13 = darknet.layers[43].output - conv21 = DarknetConv2D_BN_Leaky(64, (1, 1))(conv13) - # TODO: Allow Keras Lambda to use func arguments for output_shape? - conv21_reshaped = Lambda( - space_to_depth_x2, - output_shape=space_to_depth_x2_output_shape, - name='space_to_depth')(conv21) - - x = concatenate([conv21_reshaped, conv20]) - x = DarknetConv2D_BN_Leaky(1024, (3, 3))(x) - x = DarknetConv2D(num_anchors * (num_classes + 5), (1, 1))(x) - return Model(inputs, x) - - -def yolo_head(feats, anchors, num_classes): - """Convert final layer features to bounding box parameters. - - Parameters - ---------- - feats : tensor - Final convolutional layer features. - anchors : array-like - Anchor box widths and heights. - num_classes : int - Number of target classes. - - Returns - ------- - box_xy : tensor - x, y box predictions adjusted by spatial location in conv layer. - box_wh : tensor - w, h box predictions adjusted by anchors and conv spatial resolution. - box_conf : tensor - Probability estimate for whether each box contains any object. - box_class_pred : tensor - Probability distribution estimate for each box over class labels. - """ - num_anchors = len(anchors) - # Reshape to batch, height, width, num_anchors, box_params. - anchors_tensor = K.reshape(K.variable(anchors), [1, 1, 1, num_anchors, 2]) - - # Static implementation for fixed models. - # TODO: Remove or add option for static implementation. - # _, conv_height, conv_width, _ = K.int_shape(feats) - # conv_dims = K.variable([conv_width, conv_height]) - - # Dynamic implementation of conv dims for fully convolutional model. - conv_dims = K.shape(feats)[1:3] # assuming channels last - # In YOLO the height index is the inner most iteration. - conv_height_index = K.arange(0, stop=conv_dims[0]) - conv_width_index = K.arange(0, stop=conv_dims[1]) - conv_height_index = K.tile(conv_height_index, [conv_dims[1]]) - - # TODO: Repeat_elements and tf.split doesn't support dynamic splits. - # conv_width_index = K.repeat_elements(conv_width_index, conv_dims[1], axis=0) - conv_width_index = K.tile( - K.expand_dims(conv_width_index, 0), [conv_dims[0], 1]) - conv_width_index = K.flatten(K.transpose(conv_width_index)) - conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) - conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) - conv_index = K.cast(conv_index, K.dtype(feats)) - - feats = K.reshape( - feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) - conv_dims = K.cast(K.reshape(conv_dims, [1, 1, 1, 1, 2]), K.dtype(feats)) - - # Static generation of conv_index: - # conv_index = np.array([_ for _ in np.ndindex(conv_width, conv_height)]) - # conv_index = conv_index[:, [1, 0]] # swap columns for YOLO ordering. - # conv_index = K.variable( - # conv_index.reshape(1, conv_height, conv_width, 1, 2)) - # feats = Reshape( - # (conv_dims[0], conv_dims[1], num_anchors, num_classes + 5))(feats) - - box_xy = K.sigmoid(feats[..., :2]) - box_wh = K.exp(feats[..., 2:4]) - box_confidence = K.sigmoid(feats[..., 4:5]) - box_class_probs = K.softmax(feats[..., 5:]) - - # Adjust preditions to each spatial grid point and anchor size. - # Note: YOLO iterates over height index before width index. - box_xy = (box_xy + conv_index) / conv_dims - box_wh = box_wh * anchors_tensor / conv_dims - - return box_xy, box_wh, box_confidence, box_class_probs - - -def yolo_boxes_to_corners(box_xy, box_wh): - """Convert YOLO box predictions to bounding box corners.""" - box_mins = box_xy - (box_wh / 2.) - box_maxes = box_xy + (box_wh / 2.) - - return K.concatenate([ - box_mins[..., 1:2], # y_min - box_mins[..., 0:1], # x_min - box_maxes[..., 1:2], # y_max - box_maxes[..., 0:1] # x_max - ]) - - -def yolo_loss(args, - anchors, - num_classes, - rescore_confidence=False, - print_loss=False): - """YOLO localization loss function. - - Parameters - ---------- - yolo_output : tensor - Final convolutional layer features. - - true_boxes : tensor - Ground truth boxes tensor with shape [batch, num_true_boxes, 5] - containing box x_center, y_center, width, height, and class. - - detectors_mask : array - 0/1 mask for detector positions where there is a matching ground truth. - - matching_true_boxes : array - Corresponding ground truth boxes for positive detector positions. - Already adjusted for conv height and width. - - anchors : tensor - Anchor boxes for model. - - num_classes : int - Number of object classes. - - rescore_confidence : bool, default=False - If true then set confidence target to IOU of best predicted box with - the closest matching ground truth box. - - print_loss : bool, default=False - If True then use a tf.Print() to print the loss components. - - Returns - ------- - mean_loss : float - mean localization loss across minibatch - """ - (yolo_output, true_boxes, detectors_mask, matching_true_boxes) = args - num_anchors = len(anchors) - object_scale = 5 - no_object_scale = 1 - class_scale = 1 - coordinates_scale = 1 - pred_xy, pred_wh, pred_confidence, pred_class_prob = yolo_head( - yolo_output, anchors, num_classes) - - # Unadjusted box predictions for loss. - # TODO: Remove extra computation shared with yolo_head. - yolo_output_shape = K.shape(yolo_output) - feats = K.reshape(yolo_output, [ - -1, yolo_output_shape[1], yolo_output_shape[2], num_anchors, - num_classes + 5 - ]) - pred_boxes = K.concatenate( - (K.sigmoid(feats[..., 0:2]), feats[..., 2:4]), axis=-1) - - # TODO: Adjust predictions by image width/height for non-square images? - # IOUs may be off due to different aspect ratio. - - # Expand pred x,y,w,h to allow comparison with ground truth. - # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params - pred_xy = K.expand_dims(pred_xy, 4) - pred_wh = K.expand_dims(pred_wh, 4) - - pred_wh_half = pred_wh / 2. - pred_mins = pred_xy - pred_wh_half - pred_maxes = pred_xy + pred_wh_half - - true_boxes_shape = K.shape(true_boxes) - - # batch, conv_height, conv_width, num_anchors, num_true_boxes, box_params - true_boxes = K.reshape(true_boxes, [ - true_boxes_shape[0], 1, 1, 1, true_boxes_shape[1], true_boxes_shape[2] - ]) - true_xy = true_boxes[..., 0:2] - true_wh = true_boxes[..., 2:4] - - # Find IOU of each predicted box with each ground truth box. - true_wh_half = true_wh / 2. - true_mins = true_xy - true_wh_half - true_maxes = true_xy + true_wh_half - - intersect_mins = K.maximum(pred_mins, true_mins) - intersect_maxes = K.minimum(pred_maxes, true_maxes) - intersect_wh = K.maximum(intersect_maxes - intersect_mins, 0.) - intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] - - pred_areas = pred_wh[..., 0] * pred_wh[..., 1] - true_areas = true_wh[..., 0] * true_wh[..., 1] - - union_areas = pred_areas + true_areas - intersect_areas - iou_scores = intersect_areas / union_areas - - # Best IOUs for each location. - best_ious = K.max(iou_scores, axis=4) # Best IOU scores. - best_ious = K.expand_dims(best_ious) - - # A detector has found an object if IOU > thresh for some true box. - object_detections = K.cast(best_ious > 0.6, K.dtype(best_ious)) - - # TODO: Darknet region training includes extra coordinate loss for early - # training steps to encourage predictions to match anchor priors. - - # Determine confidence weights from object and no_object weights. - # NOTE: YOLO does not use binary cross-entropy here. - no_object_weights = (no_object_scale * (1 - object_detections) * - (1 - detectors_mask)) - no_objects_loss = no_object_weights * K.square(-pred_confidence) - - if rescore_confidence: - objects_loss = (object_scale * detectors_mask * - K.square(best_ious - pred_confidence)) - else: - objects_loss = (object_scale * detectors_mask * - K.square(1 - pred_confidence)) - confidence_loss = objects_loss + no_objects_loss - - # Classification loss for matching detections. - # NOTE: YOLO does not use categorical cross-entropy loss here. - matching_classes = K.cast(matching_true_boxes[..., 4], 'int32') - matching_classes = K.one_hot(matching_classes, num_classes) - classification_loss = (class_scale * detectors_mask * - K.square(matching_classes - pred_class_prob)) - - # Coordinate loss for matching detection boxes. - matching_boxes = matching_true_boxes[..., 0:4] - coordinates_loss = (coordinates_scale * detectors_mask * - K.square(matching_boxes - pred_boxes)) - - confidence_loss_sum = K.sum(confidence_loss) - classification_loss_sum = K.sum(classification_loss) - coordinates_loss_sum = K.sum(coordinates_loss) - total_loss = 0.5 * ( - confidence_loss_sum + classification_loss_sum + coordinates_loss_sum) - if print_loss: - total_loss = tf.Print( - total_loss, [ - total_loss, confidence_loss_sum, classification_loss_sum, - coordinates_loss_sum - ], - message='yolo_loss, conf_loss, class_loss, box_coord_loss:') - - return total_loss - - -def yolo(inputs, anchors, num_classes): - """Generate a complete YOLO_v2 localization model.""" - num_anchors = len(anchors) - body = yolo_body(inputs, num_anchors, num_classes) - outputs = yolo_head(body.output, anchors, num_classes) - return outputs - - -def yolo_filter_boxes(boxes, box_confidence, box_class_probs, threshold=.6): - """Filter YOLO boxes based on object and class confidence.""" - box_scores = box_confidence * box_class_probs - box_classes = K.argmax(box_scores, axis=-1) - box_class_scores = K.max(box_scores, axis=-1) - prediction_mask = box_class_scores >= threshold - - # TODO: Expose tf.boolean_mask to Keras backend? - boxes = tf.boolean_mask(boxes, prediction_mask) - scores = tf.boolean_mask(box_class_scores, prediction_mask) - classes = tf.boolean_mask(box_classes, prediction_mask) - return boxes, scores, classes - - -def yolo_eval(yolo_outputs, - image_shape, - max_boxes=10, - score_threshold=.6, - iou_threshold=.5): - """Evaluate YOLO model on given input batch and return filtered boxes.""" - box_xy, box_wh, box_confidence, box_class_probs = yolo_outputs - boxes = yolo_boxes_to_corners(box_xy, box_wh) - boxes, scores, classes = yolo_filter_boxes( - boxes, box_confidence, box_class_probs, threshold=score_threshold) - - # Scale boxes back to original image shape. - height = image_shape[0] - width = image_shape[1] - image_dims = K.stack([height, width, height, width]) - image_dims = K.reshape(image_dims, [1, 4]) - boxes = boxes * image_dims - - # TODO: Something must be done about this ugly hack! - max_boxes_tensor = K.variable(max_boxes, dtype='int32') - K.get_session().run(tf.variables_initializer([max_boxes_tensor])) - nms_index = tf.image.non_max_suppression( - boxes, scores, max_boxes_tensor, iou_threshold=iou_threshold) - boxes = K.gather(boxes, nms_index) - scores = K.gather(scores, nms_index) - classes = K.gather(classes, nms_index) - return boxes, scores, classes - - -def preprocess_true_boxes(true_boxes, anchors, image_size): - """Find detector in YOLO where ground truth box should appear. - - Parameters - ---------- - true_boxes : array - List of ground truth boxes in form of relative x, y, w, h, class. - Relative coordinates are in the range [0, 1] indicating a percentage - of the original image dimensions. - anchors : array - List of anchors in form of w, h. - Anchors are assumed to be in the range [0, conv_size] where conv_size - is the spatial dimension of the final convolutional features. - image_size : array-like - List of image dimensions in form of h, w in pixels. - - Returns - ------- - detectors_mask : array - 0/1 mask for detectors in [conv_height, conv_width, num_anchors, 1] - that should be compared with a matching ground truth box. - matching_true_boxes: array - Same shape as detectors_mask with the corresponding ground truth box - adjusted for comparison with predicted parameters at training time. - """ - height, width = image_size - num_anchors = len(anchors) - # Downsampling factor of 5x 2-stride max_pools == 32. - # TODO: Remove hardcoding of downscaling calculations. - assert height % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.' - assert width % 32 == 0, 'Image sizes in YOLO_v2 must be multiples of 32.' - conv_height = height // 32 - conv_width = width // 32 - num_box_params = true_boxes.shape[1] - detectors_mask = np.zeros( - (conv_height, conv_width, num_anchors, 1), dtype=np.float32) - matching_true_boxes = np.zeros( - (conv_height, conv_width, num_anchors, num_box_params), - dtype=np.float32) - - for box in true_boxes: - # scale box to convolutional feature spatial dimensions - box_class = box[4:5] - box = box[0:4] * np.array( - [conv_width, conv_height, conv_width, conv_height]) - i = np.floor(box[1]).astype('int') - j = np.floor(box[0]).astype('int') - best_iou = 0 - best_anchor = 0 - for k, anchor in enumerate(anchors): - # Find IOU between box shifted to origin and anchor box. - box_maxes = box[2:4] / 2. - box_mins = -box_maxes - anchor_maxes = (anchor / 2.) - anchor_mins = -anchor_maxes - - intersect_mins = np.maximum(box_mins, anchor_mins) - intersect_maxes = np.minimum(box_maxes, anchor_maxes) - intersect_wh = np.maximum(intersect_maxes - intersect_mins, 0.) - intersect_area = intersect_wh[0] * intersect_wh[1] - box_area = box[2] * box[3] - anchor_area = anchor[0] * anchor[1] - iou = intersect_area / (box_area + anchor_area - intersect_area) - if iou > best_iou: - best_iou = iou - best_anchor = k - - if best_iou > 0: - detectors_mask[i, j, best_anchor] = 1 - adjusted_box = np.array( - [ - box[0] - j, box[1] - i, - np.log(box[2] / anchors[best_anchor][0]), - np.log(box[3] / anchors[best_anchor][1]), box_class - ], - dtype=np.float32) - matching_true_boxes[i, j, best_anchor] = adjusted_box - return detectors_mask, matching_true_boxes -from .utils import * -"""Draw predicted or ground truth boxes on input image.""" - -import colorsys -import random - -import numpy as np -from PIL import Image, ImageDraw, ImageFont - - -def get_colors_for_classes(num_classes): - """Return list of random colors for number of classes given.""" - # Use previously generated colors if num_classes is the same. - if (hasattr(get_colors_for_classes, "colors") and - len(get_colors_for_classes.colors) == num_classes): - return get_colors_for_classes.colors - - hsv_tuples = [(x / num_classes, 1., 1.) for x in range(num_classes)] - colors = list(map(lambda x: colorsys.hsv_to_rgb(*x), hsv_tuples)) - colors = list( - map(lambda x: (int(x[0] * 255), int(x[1] * 255), int(x[2] * 255)), - colors)) - random.seed(10101) # Fixed seed for consistent colors across runs. - random.shuffle(colors) # Shuffle colors to decorrelate adjacent classes. - random.seed(None) # Reset seed to default. - get_colors_for_classes.colors = colors # Save colors for future calls. - return colors - - -def draw_boxes(image, boxes, box_classes, class_names, scores=None): - """Draw bounding boxes on image. - - Draw bounding boxes with class name and optional box score on image. - - Args: - image: An `array` of shape (width, height, 3) with values in [0, 1]. - boxes: An `array` of shape (num_boxes, 4) containing box corners as - (y_min, x_min, y_max, x_max). - box_classes: A `list` of indicies into `class_names`. - class_names: A `list` of `string` class names. - `scores`: A `list` of scores for each box. - - Returns: - A copy of `image` modified with given bounding boxes. - """ - image = Image.fromarray(np.floor(image * 255 + 0.5).astype('uint8')) - - font = ImageFont.truetype( - font='font/FiraMono-Medium.otf', - size=np.floor(3e-2 * image.size[1] + 0.5).astype('int32')) - thickness = (image.size[0] + image.size[1]) // 300 - - colors = get_colors_for_classes(len(class_names)) - - for i, c in list(enumerate(box_classes)): - box_class = class_names[c] - box = boxes[i] - if isinstance(scores, np.ndarray): - score = scores[i] - label = '{} {:.2f}'.format(box_class, score) - else: - label = '{}'.format(box_class) - - draw = ImageDraw.Draw(image) - label_size = draw.textsize(label, font) - - top, left, bottom, right = box - top = max(0, np.floor(top + 0.5).astype('int32')) - left = max(0, np.floor(left + 0.5).astype('int32')) - bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) - right = min(image.size[0], np.floor(right + 0.5).astype('int32')) - print(label, (left, top), (right, bottom)) - - if top - label_size[1] >= 0: - text_origin = np.array([left, top - label_size[1]]) - else: - text_origin = np.array([left, top + 1]) - - # My kingdom for a good redistributable image drawing library. - for i in range(thickness): - draw.rectangle( - [left + i, top + i, right - i, bottom - i], outline=colors[c]) - draw.rectangle( - [tuple(text_origin), tuple(text_origin + label_size)], - fill=colors[c]) - draw.text(text_origin, label, fill=(0, 0, 0), font=font) - del draw - - return np.array(image) -from keras.layers import Input, Dense, merge -from keras.models import * -import numpy as np - -from attention_utils import get_activations, get_data - -np.random.seed(1337) # for reproducibility - -input_dim = 32 - - -def build_model(): - inputs = Input(shape=(input_dim,)) - - # ATTENTION PART STARTS HERE - attention_probs = Dense( - input_dim, activation='softmax', name='attention_vec')(inputs) - attention_mul = merge([inputs, attention_probs], - output_shape=32, name='attention_mul', mode='mul') - # ATTENTION PART FINISHES HERE - - attention_mul = Dense(64)(attention_mul) - output = Dense(1, activation='sigmoid')(attention_mul) - model = Model(input=[inputs], output=output) - return model - - -if __name__ == '__main__': - N = 10000 - inputs_1, outputs = get_data(N, input_dim) - - m = build_model() - m.compile(optimizer='adam', loss='binary_crossentropy', - metrics=['accuracy']) - print(m.summary()) - - m.fit([inputs_1], outputs, epochs=20, batch_size=64, validation_split=0.5) - - testing_inputs_1, testing_outputs = get_data(1, input_dim) - - # Attention vector corresponds to the second matrix. - # The first one is the Inputs output. - attention_vector = get_activations(m, testing_inputs_1, - print_shape_only=True, - layer_name='attention_vec')[0].flatten() - print('attention =', attention_vector) - - # plot part. - import matplotlib.pyplot as plt - import pandas as pd - - pd.DataFrame(attention_vector, columns=['attention (%)']).plot(kind='bar', - title='Attention Mechanism as ' - 'a function of input' - ' dimensions.') - plt.show() -from keras.layers import merge -from keras.layers.core import * -from keras.layers.recurrent import LSTM -from keras.models import * - -from attention_utils import get_activations, get_data_recurrent - -INPUT_DIM = 2 -TIME_STEPS = 20 -# if True, the attention vector is shared across the input_dimensions where the attention is applied. -SINGLE_ATTENTION_VECTOR = False -APPLY_ATTENTION_BEFORE_LSTM = False - - -def attention_3d_block(inputs): - # inputs.shape = (batch_size, time_steps, input_dim) - input_dim = int(inputs.shape[2]) - a = Permute((2, 1))(inputs) - # this line is not useful. It's just to know which dimension is what. - a = Reshape((input_dim, TIME_STEPS))(a) - a = Dense(TIME_STEPS, activation='softmax')(a) - if SINGLE_ATTENTION_VECTOR: - a = Lambda(lambda x: K.mean(x, axis=1), name='dim_reduction')(a) - a = RepeatVector(input_dim)(a) - a_probs = Permute((2, 1), name='attention_vec')(a) - output_attention_mul = merge( - [inputs, a_probs], name='attention_mul', mode='mul') - return output_attention_mul - - -def model_attention_applied_after_lstm(): - inputs = Input(shape=(TIME_STEPS, INPUT_DIM,)) - lstm_units = 32 - lstm_out = LSTM(lstm_units, return_sequences=True)(inputs) - attention_mul = attention_3d_block(lstm_out) - attention_mul = Flatten()(attention_mul) - output = Dense(1, activation='sigmoid')(attention_mul) - model = Model(input=[inputs], output=output) - return model - - -def model_attention_applied_before_lstm(): - inputs = Input(shape=(TIME_STEPS, INPUT_DIM,)) - attention_mul = attention_3d_block(inputs) - lstm_units = 32 - attention_mul = LSTM(lstm_units, return_sequences=False)(attention_mul) - output = Dense(1, activation='sigmoid')(attention_mul) - model = Model(input=[inputs], output=output) - return model - - -if __name__ == '__main__': - - N = 300000 - # N = 300 -> too few = no training - inputs_1, outputs = get_data_recurrent(N, TIME_STEPS, INPUT_DIM) - - if APPLY_ATTENTION_BEFORE_LSTM: - m = model_attention_applied_before_lstm() - else: - m = model_attention_applied_after_lstm() - - m.compile(optimizer='adam', loss='binary_crossentropy', - metrics=['accuracy']) - print(m.summary()) - - m.fit([inputs_1], outputs, epochs=1, batch_size=64, validation_split=0.1) - - attention_vectors = [] - for i in range(300): - testing_inputs_1, testing_outputs = get_data_recurrent( - 1, TIME_STEPS, INPUT_DIM) - attention_vector = np.mean(get_activations(m, - testing_inputs_1, - print_shape_only=True, - layer_name='attention_vec')[0], axis=2).squeeze() - print('attention =', attention_vector) - assert (np.sum(attention_vector) - 1.0) < 1e-5 - attention_vectors.append(attention_vector) - - attention_vector_final = np.mean(np.array(attention_vectors), axis=0) - # plot part. - import matplotlib.pyplot as plt - import pandas as pd - - pd.DataFrame(attention_vector_final, columns=['attention (%)']).plot(kind='bar', - title='Attention Mechanism as ' - 'a function of input' - ' dimensions.') - plt.show() -import keras.backend as K -import numpy as np - - -def get_activations(model, inputs, print_shape_only=False, layer_name=None): - # Documentation is available online on Github at the address below. - # From: https://github.com/philipperemy/keras-visualize-activations - print('----- activations -----') - activations = [] - inp = model.input - if layer_name is None: - outputs = [layer.output for layer in model.layers] - else: - # all layer outputs - outputs = [ - layer.output for layer in model.layers if layer.name == layer_name] - funcs = [K.function([inp] + [K.learning_phase()], [out]) - for out in outputs] # evaluation functions - layer_outputs = [func([inputs, 1.])[0] for func in funcs] - for layer_activations in layer_outputs: - activations.append(layer_activations) - if print_shape_only: - print(layer_activations.shape) - else: - print(layer_activations) - return activations - - -def get_data(n, input_dim, attention_column=1): - """ - Data generation. x is purely random except that it's first value equals the target y. - In practice, the network should learn that the target = x[attention_column]. - Therefore, most of its attention should be focused on the value addressed by attention_column. - :param n: the number of samples to retrieve. - :param input_dim: the number of dimensions of each element in the series. - :param attention_column: the column linked to the target. Everything else is purely random. - :return: x: model inputs, y: model targets - """ - x = np.random.standard_normal(size=(n, input_dim)) - y = np.random.randint(low=0, high=2, size=(n, 1)) - x[:, attention_column] = y[:, 0] - return x, y - - -def get_data_recurrent(n, time_steps, input_dim, attention_column=10): - """ - Data generation. x is purely random except that it's first value equals the target y. - In practice, the network should learn that the target = x[attention_column]. - Therefore, most of its attention should be focused on the value addressed by attention_column. - :param n: the number of samples to retrieve. - :param time_steps: the number of time steps of your series. - :param input_dim: the number of dimensions of each element in the series. - :param attention_column: the column linked to the target. Everything else is purely random. - :return: x: model inputs, y: model targets - """ - x = np.random.standard_normal(size=(n, time_steps, input_dim)) - y = np.random.randint(low=0, high=2, size=(n, 1)) - x[:, attention_column, :] = np.tile(y[:], (1, input_dim)) - return x, y -# -*- coding: utf-8 -*- - -from keras.optimizers import SGD -from keras.layers import Input, merge, ZeroPadding2D -from keras.layers.core import Dense, Dropout, Activation -from keras.layers.convolutional import Convolution2D -from keras.layers.pooling import AveragePooling2D, GlobalAveragePooling2D, MaxPooling2D -from keras.layers.normalization import BatchNormalization -from keras.models import Model -import keras.backend as K - -from sklearn.metrics import log_loss - -from custom_layers.scale_layer import Scale - -from load_cifar10 import load_cifar10_data - - -def densenet121_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None): - ''' - DenseNet 121 Model for Keras - - Model Schema is based on - https://github.com/flyyufelix/DenseNet-Keras - - ImageNet Pretrained Weights - Theano: https://drive.google.com/open?id=0Byy2AcGyEVxfMlRYb3YzV210VzQ - TensorFlow: https://drive.google.com/open?id=0Byy2AcGyEVxfSTA4SHJVOHNuTXc - - # Arguments - nb_dense_block: number of dense blocks to add to end - growth_rate: number of filters to add per dense block - nb_filter: initial number of filters - reduction: reduction factor of transition blocks. - dropout_rate: dropout rate - weight_decay: weight decay factor - classes: optional number of classes to classify images - weights_path: path to pre-trained weights - # Returns - A Keras model instance. - ''' - eps = 1.1e-5 - - # compute compression factor - compression = 1.0 - reduction - - # Handle Dimension Ordering for different backends - global concat_axis - if K.image_dim_ordering() == 'tf': - concat_axis = 3 - img_input = Input(shape=(img_rows, img_cols, color_type), name='data') - else: - concat_axis = 1 - img_input = Input(shape=(color_type, img_rows, img_cols), name='data') - - # From architecture for ImageNet (Table 1 in the paper) - nb_filter = 64 - nb_layers = [6, 12, 24, 16] # For DenseNet-121 - - # Initial convolution - x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input) - x = Convolution2D(nb_filter, 7, 7, subsample=(2, 2), - name='conv1', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x) - x = Scale(axis=concat_axis, name='conv1_scale')(x) - x = Activation('relu', name='relu1')(x) - x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x) - - # Add dense blocks - for block_idx in range(nb_dense_block - 1): - stage = block_idx+2 - x, nb_filter = dense_block( - x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) - - # Add transition_block - x = transition_block(x, stage, nb_filter, compression=compression, - dropout_rate=dropout_rate, weight_decay=weight_decay) - nb_filter = int(nb_filter * compression) - - final_stage = stage + 1 - x, nb_filter = dense_block( - x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) - - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name='conv'+str(final_stage)+'_blk_bn')(x) - x = Scale(axis=concat_axis, name='conv'+str(final_stage)+'_blk_scale')(x) - x = Activation('relu', name='relu'+str(final_stage)+'_blk')(x) - - x_fc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x) - x_fc = Dense(1000, name='fc6')(x_fc) - x_fc = Activation('softmax', name='prob')(x_fc) - - model = Model(img_input, x_fc, name='densenet') - - if K.image_dim_ordering() == 'th': - # Use pre-trained weights for Theano backend - weights_path = 'imagenet_models/densenet121_weights_th.h5' - else: - # Use pre-trained weights for Tensorflow backend - weights_path = 'imagenet_models/densenet121_weights_tf.h5' - - model.load_weights(weights_path, by_name=True) - - # Truncate and replace softmax layer for transfer learning - # Cannot use model.layers.pop() since model is not of Sequential() type - # The method below works since pre-trained weights are stored in layers but not in the model - x_newfc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x) - x_newfc = Dense(num_classes, name='fc6')(x_newfc) - x_newfc = Activation('softmax', name='prob')(x_newfc) - - model = Model(img_input, x_newfc) - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -def conv_block(x, stage, branch, nb_filter, dropout_rate=None, weight_decay=1e-4): - '''Apply BatchNorm, Relu, bottleneck 1x1 Conv2D, 3x3 Conv2D, and option dropout - # Arguments - x: input tensor - stage: index for dense block - branch: layer index within each dense block - nb_filter: number of filters - dropout_rate: dropout rate - weight_decay: weight decay factor - ''' - eps = 1.1e-5 - conv_name_base = 'conv' + str(stage) + '_' + str(branch) - relu_name_base = 'relu' + str(stage) + '_' + str(branch) - - # 1x1 Convolution (Bottleneck layer) - inter_channel = nb_filter * 4 - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name=conv_name_base+'_x1_bn')(x) - x = Scale(axis=concat_axis, name=conv_name_base+'_x1_scale')(x) - x = Activation('relu', name=relu_name_base+'_x1')(x) - x = Convolution2D(inter_channel, 1, 1, - name=conv_name_base+'_x1', bias=False)(x) - - if dropout_rate: - x = Dropout(dropout_rate)(x) - - # 3x3 Convolution - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name=conv_name_base+'_x2_bn')(x) - x = Scale(axis=concat_axis, name=conv_name_base+'_x2_scale')(x) - x = Activation('relu', name=relu_name_base+'_x2')(x) - x = ZeroPadding2D((1, 1), name=conv_name_base+'_x2_zeropadding')(x) - x = Convolution2D(nb_filter, 3, 3, name=conv_name_base + - '_x2', bias=False)(x) - - if dropout_rate: - x = Dropout(dropout_rate)(x) - - return x - - -def transition_block(x, stage, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4): - ''' Apply BatchNorm, 1x1 Convolution, averagePooling, optional compression, dropout - # Arguments - x: input tensor - stage: index for dense block - nb_filter: number of filters - compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. - dropout_rate: dropout rate - weight_decay: weight decay factor - ''' - - eps = 1.1e-5 - conv_name_base = 'conv' + str(stage) + '_blk' - relu_name_base = 'relu' + str(stage) + '_blk' - pool_name_base = 'pool' + str(stage) - - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name=conv_name_base+'_bn')(x) - x = Scale(axis=concat_axis, name=conv_name_base+'_scale')(x) - x = Activation('relu', name=relu_name_base)(x) - x = Convolution2D(int(nb_filter * compression), 1, 1, - name=conv_name_base, bias=False)(x) - - if dropout_rate: - x = Dropout(dropout_rate)(x) - - x = AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base)(x) - - return x - - -def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, weight_decay=1e-4, grow_nb_filters=True): - ''' Build a dense_block where the output of each conv_block is fed to subsequent ones - # Arguments - x: input tensor - stage: index for dense block - nb_layers: the number of layers of conv_block to append to the model. - nb_filter: number of filters - growth_rate: growth rate - dropout_rate: dropout rate - weight_decay: weight decay factor - grow_nb_filters: flag to decide to allow number of filters to grow - ''' - - eps = 1.1e-5 - concat_feat = x - - for i in range(nb_layers): - branch = i+1 - x = conv_block(concat_feat, stage, branch, - growth_rate, dropout_rate, weight_decay) - concat_feat = merge([concat_feat, x], mode='concat', - concat_axis=concat_axis, name='concat_'+str(stage)+'_'+str(branch)) - - if grow_nb_filters: - nb_filter += growth_rate - - return concat_feat, nb_filter - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 224, 224 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 16 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = densenet121_model( - img_rows=img_rows, img_cols=img_cols, color_type=channel, num_classes=num_classes) - - # Start Fine-tuning - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, Y_valid), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) -# -*- coding: utf-8 -*- - -from keras.optimizers import SGD -from keras.layers import Input, merge, ZeroPadding2D -from keras.layers.core import Dense, Dropout, Activation -from keras.layers.convolutional import Convolution2D -from keras.layers.pooling import AveragePooling2D, GlobalAveragePooling2D, MaxPooling2D -from keras.layers.normalization import BatchNormalization -from keras.models import Model -import keras.backend as K - -from sklearn.metrics import log_loss - -from custom_layers.scale_layer import Scale - -from load_cifar10 import load_cifar10_data - - -def densenet161_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=48, nb_filter=96, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None): - ''' - DenseNet 161 Model for Keras - - Model Schema is based on - https://github.com/flyyufelix/DenseNet-Keras - - ImageNet Pretrained Weights - Theano: https://drive.google.com/open?id=0Byy2AcGyEVxfVnlCMlBGTDR3RGs - TensorFlow: https://drive.google.com/open?id=0Byy2AcGyEVxfUDZwVjU2cFNidTA - - # Arguments - nb_dense_block: number of dense blocks to add to end - growth_rate: number of filters to add per dense block - nb_filter: initial number of filters - reduction: reduction factor of transition blocks. - dropout_rate: dropout rate - weight_decay: weight decay factor - classes: optional number of classes to classify images - weights_path: path to pre-trained weights - # Returns - A Keras model instance. - ''' - eps = 1.1e-5 - - # compute compression factor - compression = 1.0 - reduction - - # Handle Dimension Ordering for different backends - global concat_axis - if K.image_dim_ordering() == 'tf': - concat_axis = 3 - img_input = Input(shape=(224, 224, 3), name='data') - else: - concat_axis = 1 - img_input = Input(shape=(3, 224, 224), name='data') - - # From architecture for ImageNet (Table 1 in the paper) - nb_filter = 96 - nb_layers = [6, 12, 36, 24] # For DenseNet-161 - - # Initial convolution - x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input) - x = Convolution2D(nb_filter, 7, 7, subsample=(2, 2), - name='conv1', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x) - x = Scale(axis=concat_axis, name='conv1_scale')(x) - x = Activation('relu', name='relu1')(x) - x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x) - - # Add dense blocks - for block_idx in range(nb_dense_block - 1): - stage = block_idx+2 - x, nb_filter = dense_block( - x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) - - # Add transition_block - x = transition_block(x, stage, nb_filter, compression=compression, - dropout_rate=dropout_rate, weight_decay=weight_decay) - nb_filter = int(nb_filter * compression) - - final_stage = stage + 1 - x, nb_filter = dense_block( - x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) - - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name='conv'+str(final_stage)+'_blk_bn')(x) - x = Scale(axis=concat_axis, name='conv'+str(final_stage)+'_blk_scale')(x) - x = Activation('relu', name='relu'+str(final_stage)+'_blk')(x) - - x_fc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x) - x_fc = Dense(1000, name='fc6')(x_fc) - x_fc = Activation('softmax', name='prob')(x_fc) - - model = Model(img_input, x_fc, name='densenet') - - if K.image_dim_ordering() == 'th': - # Use pre-trained weights for Theano backend - weights_path = 'imagenet_models/densenet161_weights_th.h5' - else: - # Use pre-trained weights for Tensorflow backend - weights_path = 'imagenet_models/densenet161_weights_tf.h5' - - model.load_weights(weights_path, by_name=True) - - # Truncate and replace softmax layer for transfer learning - # Cannot use model.layers.pop() since model is not of Sequential() type - # The method below works since pre-trained weights are stored in layers but not in the model - x_newfc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x) - x_newfc = Dense(num_classes, name='fc6')(x_newfc) - x_newfc = Activation('softmax', name='prob')(x_newfc) - - model = Model(img_input, x_newfc) - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -def conv_block(x, stage, branch, nb_filter, dropout_rate=None, weight_decay=1e-4): - '''Apply BatchNorm, Relu, bottleneck 1x1 Conv2D, 3x3 Conv2D, and option dropout - # Arguments - x: input tensor - stage: index for dense block - branch: layer index within each dense block - nb_filter: number of filters - dropout_rate: dropout rate - weight_decay: weight decay factor - ''' - eps = 1.1e-5 - conv_name_base = 'conv' + str(stage) + '_' + str(branch) - relu_name_base = 'relu' + str(stage) + '_' + str(branch) - - # 1x1 Convolution (Bottleneck layer) - inter_channel = nb_filter * 4 - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name=conv_name_base+'_x1_bn')(x) - x = Scale(axis=concat_axis, name=conv_name_base+'_x1_scale')(x) - x = Activation('relu', name=relu_name_base+'_x1')(x) - x = Convolution2D(inter_channel, 1, 1, - name=conv_name_base+'_x1', bias=False)(x) - - if dropout_rate: - x = Dropout(dropout_rate)(x) - - # 3x3 Convolution - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name=conv_name_base+'_x2_bn')(x) - x = Scale(axis=concat_axis, name=conv_name_base+'_x2_scale')(x) - x = Activation('relu', name=relu_name_base+'_x2')(x) - x = ZeroPadding2D((1, 1), name=conv_name_base+'_x2_zeropadding')(x) - x = Convolution2D(nb_filter, 3, 3, name=conv_name_base + - '_x2', bias=False)(x) - - if dropout_rate: - x = Dropout(dropout_rate)(x) - - return x - - -def transition_block(x, stage, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4): - ''' Apply BatchNorm, 1x1 Convolution, averagePooling, optional compression, dropout - # Arguments - x: input tensor - stage: index for dense block - nb_filter: number of filters - compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. - dropout_rate: dropout rate - weight_decay: weight decay factor - ''' - - eps = 1.1e-5 - conv_name_base = 'conv' + str(stage) + '_blk' - relu_name_base = 'relu' + str(stage) + '_blk' - pool_name_base = 'pool' + str(stage) - - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name=conv_name_base+'_bn')(x) - x = Scale(axis=concat_axis, name=conv_name_base+'_scale')(x) - x = Activation('relu', name=relu_name_base)(x) - x = Convolution2D(int(nb_filter * compression), 1, 1, - name=conv_name_base, bias=False)(x) - - if dropout_rate: - x = Dropout(dropout_rate)(x) - - x = AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base)(x) - - return x - - -def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, weight_decay=1e-4, grow_nb_filters=True): - ''' Build a dense_block where the output of each conv_block is fed to subsequent ones - # Arguments - x: input tensor - stage: index for dense block - nb_layers: the number of layers of conv_block to append to the model. - nb_filter: number of filters - growth_rate: growth rate - dropout_rate: dropout rate - weight_decay: weight decay factor - grow_nb_filters: flag to decide to allow number of filters to grow - ''' - - eps = 1.1e-5 - concat_feat = x - - for i in range(nb_layers): - branch = i+1 - x = conv_block(concat_feat, stage, branch, - growth_rate, dropout_rate, weight_decay) - concat_feat = merge([concat_feat, x], mode='concat', - concat_axis=concat_axis, name='concat_'+str(stage)+'_'+str(branch)) - - if grow_nb_filters: - nb_filter += growth_rate - - return concat_feat, nb_filter - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 224, 224 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 8 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = densenet161_model( - img_rows=img_rows, img_cols=img_cols, color_type=channel, num_classes=num_classes) - - # Start Fine-tuning - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, Y_valid), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) -# -*- coding: utf-8 -*- - -from keras.optimizers import SGD -from keras.layers import Input, merge, ZeroPadding2D -from keras.layers.core import Dense, Dropout, Activation -from keras.layers.convolutional import Convolution2D -from keras.layers.pooling import AveragePooling2D, GlobalAveragePooling2D, MaxPooling2D -from keras.layers.normalization import BatchNormalization -from keras.models import Model -import keras.backend as K - -from sklearn.metrics import log_loss - -from custom_layers.scale_layer import Scale - -from load_cifar10 import load_cifar10_data - - -def densenet169_model(img_rows, img_cols, color_type=1, nb_dense_block=4, growth_rate=32, nb_filter=64, reduction=0.5, dropout_rate=0.0, weight_decay=1e-4, num_classes=None): - ''' - DenseNet 169 Model for Keras - - Model Schema is based on - https://github.com/flyyufelix/DenseNet-Keras - - ImageNet Pretrained Weights - Theano: https://drive.google.com/open?id=0Byy2AcGyEVxfN0d3T1F1MXg0NlU - TensorFlow: https://drive.google.com/open?id=0Byy2AcGyEVxfSEc5UC1ROUFJdmM - - # Arguments - nb_dense_block: number of dense blocks to add to end - growth_rate: number of filters to add per dense block - nb_filter: initial number of filters - reduction: reduction factor of transition blocks. - dropout_rate: dropout rate - weight_decay: weight decay factor - classes: optional number of classes to classify images - weights_path: path to pre-trained weights - # Returns - A Keras model instance. - ''' - eps = 1.1e-5 - - # compute compression factor - compression = 1.0 - reduction - - # Handle Dimension Ordering for different backends - global concat_axis - if K.image_dim_ordering() == 'tf': - concat_axis = 3 - img_input = Input(shape=(224, 224, 3), name='data') - else: - concat_axis = 1 - img_input = Input(shape=(3, 224, 224), name='data') - - # From architecture for ImageNet (Table 1 in the paper) - nb_filter = 64 - nb_layers = [6, 12, 32, 32] # For DenseNet-169 - - # Initial convolution - x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input) - x = Convolution2D(nb_filter, 7, 7, subsample=(2, 2), - name='conv1', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=concat_axis, name='conv1_bn')(x) - x = Scale(axis=concat_axis, name='conv1_scale')(x) - x = Activation('relu', name='relu1')(x) - x = ZeroPadding2D((1, 1), name='pool1_zeropadding')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x) - - # Add dense blocks - for block_idx in range(nb_dense_block - 1): - stage = block_idx+2 - x, nb_filter = dense_block( - x, stage, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) - - # Add transition_block - x = transition_block(x, stage, nb_filter, compression=compression, - dropout_rate=dropout_rate, weight_decay=weight_decay) - nb_filter = int(nb_filter * compression) - - final_stage = stage + 1 - x, nb_filter = dense_block( - x, final_stage, nb_layers[-1], nb_filter, growth_rate, dropout_rate=dropout_rate, weight_decay=weight_decay) - - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name='conv'+str(final_stage)+'_blk_bn')(x) - x = Scale(axis=concat_axis, name='conv'+str(final_stage)+'_blk_scale')(x) - x = Activation('relu', name='relu'+str(final_stage)+'_blk')(x) - - x_fc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x) - x_fc = Dense(1000, name='fc6')(x_fc) - x_fc = Activation('softmax', name='prob')(x_fc) - - model = Model(img_input, x_fc, name='densenet') - - if K.image_dim_ordering() == 'th': - # Use pre-trained weights for Theano backend - weights_path = 'imagenet_models/densenet169_weights_th.h5' - else: - # Use pre-trained weights for Tensorflow backend - weights_path = 'imagenet_models/densenet169_weights_tf.h5' - - model.load_weights(weights_path, by_name=True) - - # Truncate and replace softmax layer for transfer learning - # Cannot use model.layers.pop() since model is not of Sequential() type - # The method below works since pre-trained weights are stored in layers but not in the model - x_newfc = GlobalAveragePooling2D(name='pool'+str(final_stage))(x) - x_newfc = Dense(num_classes, name='fc6')(x_newfc) - x_newfc = Activation('softmax', name='prob')(x_newfc) - - model = Model(img_input, x_newfc) - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -def conv_block(x, stage, branch, nb_filter, dropout_rate=None, weight_decay=1e-4): - '''Apply BatchNorm, Relu, bottleneck 1x1 Conv2D, 3x3 Conv2D, and option dropout - # Arguments - x: input tensor - stage: index for dense block - branch: layer index within each dense block - nb_filter: number of filters - dropout_rate: dropout rate - weight_decay: weight decay factor - ''' - eps = 1.1e-5 - conv_name_base = 'conv' + str(stage) + '_' + str(branch) - relu_name_base = 'relu' + str(stage) + '_' + str(branch) - - # 1x1 Convolution (Bottleneck layer) - inter_channel = nb_filter * 4 - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name=conv_name_base+'_x1_bn')(x) - x = Scale(axis=concat_axis, name=conv_name_base+'_x1_scale')(x) - x = Activation('relu', name=relu_name_base+'_x1')(x) - x = Convolution2D(inter_channel, 1, 1, - name=conv_name_base+'_x1', bias=False)(x) - - if dropout_rate: - x = Dropout(dropout_rate)(x) - - # 3x3 Convolution - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name=conv_name_base+'_x2_bn')(x) - x = Scale(axis=concat_axis, name=conv_name_base+'_x2_scale')(x) - x = Activation('relu', name=relu_name_base+'_x2')(x) - x = ZeroPadding2D((1, 1), name=conv_name_base+'_x2_zeropadding')(x) - x = Convolution2D(nb_filter, 3, 3, name=conv_name_base + - '_x2', bias=False)(x) - - if dropout_rate: - x = Dropout(dropout_rate)(x) - - return x - - -def transition_block(x, stage, nb_filter, compression=1.0, dropout_rate=None, weight_decay=1E-4): - ''' Apply BatchNorm, 1x1 Convolution, averagePooling, optional compression, dropout - # Arguments - x: input tensor - stage: index for dense block - nb_filter: number of filters - compression: calculated as 1 - reduction. Reduces the number of feature maps in the transition block. - dropout_rate: dropout rate - weight_decay: weight decay factor - ''' - - eps = 1.1e-5 - conv_name_base = 'conv' + str(stage) + '_blk' - relu_name_base = 'relu' + str(stage) + '_blk' - pool_name_base = 'pool' + str(stage) - - x = BatchNormalization(epsilon=eps, axis=concat_axis, - name=conv_name_base+'_bn')(x) - x = Scale(axis=concat_axis, name=conv_name_base+'_scale')(x) - x = Activation('relu', name=relu_name_base)(x) - x = Convolution2D(int(nb_filter * compression), 1, 1, - name=conv_name_base, bias=False)(x) - - if dropout_rate: - x = Dropout(dropout_rate)(x) - - x = AveragePooling2D((2, 2), strides=(2, 2), name=pool_name_base)(x) - - return x - - -def dense_block(x, stage, nb_layers, nb_filter, growth_rate, dropout_rate=None, weight_decay=1e-4, grow_nb_filters=True): - ''' Build a dense_block where the output of each conv_block is fed to subsequent ones - # Arguments - x: input tensor - stage: index for dense block - nb_layers: the number of layers of conv_block to append to the model. - nb_filter: number of filters - growth_rate: growth rate - dropout_rate: dropout rate - weight_decay: weight decay factor - grow_nb_filters: flag to decide to allow number of filters to grow - ''' - - eps = 1.1e-5 - concat_feat = x - - for i in range(nb_layers): - branch = i+1 - x = conv_block(concat_feat, stage, branch, - growth_rate, dropout_rate, weight_decay) - concat_feat = merge([concat_feat, x], mode='concat', - concat_axis=concat_axis, name='concat_'+str(stage)+'_'+str(branch)) - - if grow_nb_filters: - nb_filter += growth_rate - - return concat_feat, nb_filter - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 224, 224 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 16 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = densenet169_model( - img_rows=img_rows, img_cols=img_cols, color_type=channel, num_classes=num_classes) - - # Start Fine-tuning - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, Y_valid), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) -# -*- coding: utf-8 -*- - -from keras.optimizers import SGD -from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation -from keras.datasets import cifar10 -from keras.regularizers import l2 -from keras.models import Model - -from sklearn.metrics import log_loss - -from custom_layers.googlenet_custom_layers import LRN, PoolHelper - -from load_cifar10 import load_cifar10_data - - -def googlenet_model(img_rows, img_cols, channel=1, num_classes=None): - """ - GoogLeNet a.k.a. Inception v1 for Keras - - Model Schema is based on - https://gist.github.com/joelouismarino/a2ede9ab3928f999575423b9887abd14 - - ImageNet Pretrained Weights - https://drive.google.com/open?id=0B319laiAPjU3RE1maU9MMlh2dnc - - Blog Post: - http://joelouismarino.github.io/blog_posts/blog_googlenet_keras.html - - Parameters: - img_rows, img_cols - resolution of inputs - channel - 1 for grayscale, 3 for color - num_classes - number of class labels for our classification task - """ - - input = Input(shape=(channel, img_rows, img_cols)) - conv1_7x7_s2 = Convolution2D(64, 7, 7, subsample=( - 2, 2), border_mode='same', activation='relu', name='conv1/7x7_s2', W_regularizer=l2(0.0002))(input) - conv1_zero_pad = ZeroPadding2D(padding=(1, 1))(conv1_7x7_s2) - pool1_helper = PoolHelper()(conv1_zero_pad) - pool1_3x3_s2 = MaxPooling2D(pool_size=(3, 3), strides=( - 2, 2), border_mode='valid', name='pool1/3x3_s2')(pool1_helper) - pool1_norm1 = LRN(name='pool1/norm1')(pool1_3x3_s2) - conv2_3x3_reduce = Convolution2D(64, 1, 1, border_mode='same', activation='relu', - name='conv2/3x3_reduce', W_regularizer=l2(0.0002))(pool1_norm1) - conv2_3x3 = Convolution2D(192, 3, 3, border_mode='same', activation='relu', - name='conv2/3x3', W_regularizer=l2(0.0002))(conv2_3x3_reduce) - conv2_norm2 = LRN(name='conv2/norm2')(conv2_3x3) - conv2_zero_pad = ZeroPadding2D(padding=(1, 1))(conv2_norm2) - pool2_helper = PoolHelper()(conv2_zero_pad) - pool2_3x3_s2 = MaxPooling2D(pool_size=(3, 3), strides=( - 2, 2), border_mode='valid', name='pool2/3x3_s2')(pool2_helper) - - inception_3a_1x1 = Convolution2D(64, 1, 1, border_mode='same', activation='relu', - name='inception_3a/1x1', W_regularizer=l2(0.0002))(pool2_3x3_s2) - inception_3a_3x3_reduce = Convolution2D( - 96, 1, 1, border_mode='same', activation='relu', name='inception_3a/3x3_reduce', W_regularizer=l2(0.0002))(pool2_3x3_s2) - inception_3a_3x3 = Convolution2D(128, 3, 3, border_mode='same', activation='relu', - name='inception_3a/3x3', W_regularizer=l2(0.0002))(inception_3a_3x3_reduce) - inception_3a_5x5_reduce = Convolution2D( - 16, 1, 1, border_mode='same', activation='relu', name='inception_3a/5x5_reduce', W_regularizer=l2(0.0002))(pool2_3x3_s2) - inception_3a_5x5 = Convolution2D(32, 5, 5, border_mode='same', activation='relu', - name='inception_3a/5x5', W_regularizer=l2(0.0002))(inception_3a_5x5_reduce) - inception_3a_pool = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), border_mode='same', name='inception_3a/pool')(pool2_3x3_s2) - inception_3a_pool_proj = Convolution2D(32, 1, 1, border_mode='same', activation='relu', - name='inception_3a/pool_proj', W_regularizer=l2(0.0002))(inception_3a_pool) - inception_3a_output = merge([inception_3a_1x1, inception_3a_3x3, inception_3a_5x5, - inception_3a_pool_proj], mode='concat', concat_axis=1, name='inception_3a/output') - - inception_3b_1x1 = Convolution2D(128, 1, 1, border_mode='same', activation='relu', - name='inception_3b/1x1', W_regularizer=l2(0.0002))(inception_3a_output) - inception_3b_3x3_reduce = Convolution2D(128, 1, 1, border_mode='same', activation='relu', - name='inception_3b/3x3_reduce', W_regularizer=l2(0.0002))(inception_3a_output) - inception_3b_3x3 = Convolution2D(192, 3, 3, border_mode='same', activation='relu', - name='inception_3b/3x3', W_regularizer=l2(0.0002))(inception_3b_3x3_reduce) - inception_3b_5x5_reduce = Convolution2D(32, 1, 1, border_mode='same', activation='relu', - name='inception_3b/5x5_reduce', W_regularizer=l2(0.0002))(inception_3a_output) - inception_3b_5x5 = Convolution2D(96, 5, 5, border_mode='same', activation='relu', - name='inception_3b/5x5', W_regularizer=l2(0.0002))(inception_3b_5x5_reduce) - inception_3b_pool = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), border_mode='same', name='inception_3b/pool')(inception_3a_output) - inception_3b_pool_proj = Convolution2D(64, 1, 1, border_mode='same', activation='relu', - name='inception_3b/pool_proj', W_regularizer=l2(0.0002))(inception_3b_pool) - inception_3b_output = merge([inception_3b_1x1, inception_3b_3x3, inception_3b_5x5, - inception_3b_pool_proj], mode='concat', concat_axis=1, name='inception_3b/output') - - inception_3b_output_zero_pad = ZeroPadding2D( - padding=(1, 1))(inception_3b_output) - pool3_helper = PoolHelper()(inception_3b_output_zero_pad) - pool3_3x3_s2 = MaxPooling2D(pool_size=(3, 3), strides=( - 2, 2), border_mode='valid', name='pool3/3x3_s2')(pool3_helper) - - inception_4a_1x1 = Convolution2D(192, 1, 1, border_mode='same', activation='relu', - name='inception_4a/1x1', W_regularizer=l2(0.0002))(pool3_3x3_s2) - inception_4a_3x3_reduce = Convolution2D( - 96, 1, 1, border_mode='same', activation='relu', name='inception_4a/3x3_reduce', W_regularizer=l2(0.0002))(pool3_3x3_s2) - inception_4a_3x3 = Convolution2D(208, 3, 3, border_mode='same', activation='relu', - name='inception_4a/3x3', W_regularizer=l2(0.0002))(inception_4a_3x3_reduce) - inception_4a_5x5_reduce = Convolution2D( - 16, 1, 1, border_mode='same', activation='relu', name='inception_4a/5x5_reduce', W_regularizer=l2(0.0002))(pool3_3x3_s2) - inception_4a_5x5 = Convolution2D(48, 5, 5, border_mode='same', activation='relu', - name='inception_4a/5x5', W_regularizer=l2(0.0002))(inception_4a_5x5_reduce) - inception_4a_pool = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), border_mode='same', name='inception_4a/pool')(pool3_3x3_s2) - inception_4a_pool_proj = Convolution2D(64, 1, 1, border_mode='same', activation='relu', - name='inception_4a/pool_proj', W_regularizer=l2(0.0002))(inception_4a_pool) - inception_4a_output = merge([inception_4a_1x1, inception_4a_3x3, inception_4a_5x5, - inception_4a_pool_proj], mode='concat', concat_axis=1, name='inception_4a/output') - - loss1_ave_pool = AveragePooling2D(pool_size=(5, 5), strides=( - 3, 3), name='loss1/ave_pool')(inception_4a_output) - loss1_conv = Convolution2D(128, 1, 1, border_mode='same', activation='relu', - name='loss1/conv', W_regularizer=l2(0.0002))(loss1_ave_pool) - loss1_flat = Flatten()(loss1_conv) - loss1_fc = Dense(1024, activation='relu', name='loss1/fc', - W_regularizer=l2(0.0002))(loss1_flat) - loss1_drop_fc = Dropout(0.7)(loss1_fc) - loss1_classifier = Dense( - 1000, name='loss1/classifier', W_regularizer=l2(0.0002))(loss1_drop_fc) - loss1_classifier_act = Activation('softmax')(loss1_classifier) - - inception_4b_1x1 = Convolution2D(160, 1, 1, border_mode='same', activation='relu', - name='inception_4b/1x1', W_regularizer=l2(0.0002))(inception_4a_output) - inception_4b_3x3_reduce = Convolution2D(112, 1, 1, border_mode='same', activation='relu', - name='inception_4b/3x3_reduce', W_regularizer=l2(0.0002))(inception_4a_output) - inception_4b_3x3 = Convolution2D(224, 3, 3, border_mode='same', activation='relu', - name='inception_4b/3x3', W_regularizer=l2(0.0002))(inception_4b_3x3_reduce) - inception_4b_5x5_reduce = Convolution2D(24, 1, 1, border_mode='same', activation='relu', - name='inception_4b/5x5_reduce', W_regularizer=l2(0.0002))(inception_4a_output) - inception_4b_5x5 = Convolution2D(64, 5, 5, border_mode='same', activation='relu', - name='inception_4b/5x5', W_regularizer=l2(0.0002))(inception_4b_5x5_reduce) - inception_4b_pool = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), border_mode='same', name='inception_4b/pool')(inception_4a_output) - inception_4b_pool_proj = Convolution2D(64, 1, 1, border_mode='same', activation='relu', - name='inception_4b/pool_proj', W_regularizer=l2(0.0002))(inception_4b_pool) - inception_4b_output = merge([inception_4b_1x1, inception_4b_3x3, inception_4b_5x5, - inception_4b_pool_proj], mode='concat', concat_axis=1, name='inception_4b_output') - - inception_4c_1x1 = Convolution2D(128, 1, 1, border_mode='same', activation='relu', - name='inception_4c/1x1', W_regularizer=l2(0.0002))(inception_4b_output) - inception_4c_3x3_reduce = Convolution2D(128, 1, 1, border_mode='same', activation='relu', - name='inception_4c/3x3_reduce', W_regularizer=l2(0.0002))(inception_4b_output) - inception_4c_3x3 = Convolution2D(256, 3, 3, border_mode='same', activation='relu', - name='inception_4c/3x3', W_regularizer=l2(0.0002))(inception_4c_3x3_reduce) - inception_4c_5x5_reduce = Convolution2D(24, 1, 1, border_mode='same', activation='relu', - name='inception_4c/5x5_reduce', W_regularizer=l2(0.0002))(inception_4b_output) - inception_4c_5x5 = Convolution2D(64, 5, 5, border_mode='same', activation='relu', - name='inception_4c/5x5', W_regularizer=l2(0.0002))(inception_4c_5x5_reduce) - inception_4c_pool = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), border_mode='same', name='inception_4c/pool')(inception_4b_output) - inception_4c_pool_proj = Convolution2D(64, 1, 1, border_mode='same', activation='relu', - name='inception_4c/pool_proj', W_regularizer=l2(0.0002))(inception_4c_pool) - inception_4c_output = merge([inception_4c_1x1, inception_4c_3x3, inception_4c_5x5, - inception_4c_pool_proj], mode='concat', concat_axis=1, name='inception_4c/output') - - inception_4d_1x1 = Convolution2D(112, 1, 1, border_mode='same', activation='relu', - name='inception_4d/1x1', W_regularizer=l2(0.0002))(inception_4c_output) - inception_4d_3x3_reduce = Convolution2D(144, 1, 1, border_mode='same', activation='relu', - name='inception_4d/3x3_reduce', W_regularizer=l2(0.0002))(inception_4c_output) - inception_4d_3x3 = Convolution2D(288, 3, 3, border_mode='same', activation='relu', - name='inception_4d/3x3', W_regularizer=l2(0.0002))(inception_4d_3x3_reduce) - inception_4d_5x5_reduce = Convolution2D(32, 1, 1, border_mode='same', activation='relu', - name='inception_4d/5x5_reduce', W_regularizer=l2(0.0002))(inception_4c_output) - inception_4d_5x5 = Convolution2D(64, 5, 5, border_mode='same', activation='relu', - name='inception_4d/5x5', W_regularizer=l2(0.0002))(inception_4d_5x5_reduce) - inception_4d_pool = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), border_mode='same', name='inception_4d/pool')(inception_4c_output) - inception_4d_pool_proj = Convolution2D(64, 1, 1, border_mode='same', activation='relu', - name='inception_4d/pool_proj', W_regularizer=l2(0.0002))(inception_4d_pool) - inception_4d_output = merge([inception_4d_1x1, inception_4d_3x3, inception_4d_5x5, - inception_4d_pool_proj], mode='concat', concat_axis=1, name='inception_4d/output') - - loss2_ave_pool = AveragePooling2D(pool_size=(5, 5), strides=( - 3, 3), name='loss2/ave_pool')(inception_4d_output) - loss2_conv = Convolution2D(128, 1, 1, border_mode='same', activation='relu', - name='loss2/conv', W_regularizer=l2(0.0002))(loss2_ave_pool) - loss2_flat = Flatten()(loss2_conv) - loss2_fc = Dense(1024, activation='relu', name='loss2/fc', - W_regularizer=l2(0.0002))(loss2_flat) - loss2_drop_fc = Dropout(0.7)(loss2_fc) - loss2_classifier = Dense( - 1000, name='loss2/classifier', W_regularizer=l2(0.0002))(loss2_drop_fc) - loss2_classifier_act = Activation('softmax')(loss2_classifier) - - inception_4e_1x1 = Convolution2D(256, 1, 1, border_mode='same', activation='relu', - name='inception_4e/1x1', W_regularizer=l2(0.0002))(inception_4d_output) - inception_4e_3x3_reduce = Convolution2D(160, 1, 1, border_mode='same', activation='relu', - name='inception_4e/3x3_reduce', W_regularizer=l2(0.0002))(inception_4d_output) - inception_4e_3x3 = Convolution2D(320, 3, 3, border_mode='same', activation='relu', - name='inception_4e/3x3', W_regularizer=l2(0.0002))(inception_4e_3x3_reduce) - inception_4e_5x5_reduce = Convolution2D(32, 1, 1, border_mode='same', activation='relu', - name='inception_4e/5x5_reduce', W_regularizer=l2(0.0002))(inception_4d_output) - inception_4e_5x5 = Convolution2D(128, 5, 5, border_mode='same', activation='relu', - name='inception_4e/5x5', W_regularizer=l2(0.0002))(inception_4e_5x5_reduce) - inception_4e_pool = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), border_mode='same', name='inception_4e/pool')(inception_4d_output) - inception_4e_pool_proj = Convolution2D(128, 1, 1, border_mode='same', activation='relu', - name='inception_4e/pool_proj', W_regularizer=l2(0.0002))(inception_4e_pool) - inception_4e_output = merge([inception_4e_1x1, inception_4e_3x3, inception_4e_5x5, - inception_4e_pool_proj], mode='concat', concat_axis=1, name='inception_4e/output') - - inception_4e_output_zero_pad = ZeroPadding2D( - padding=(1, 1))(inception_4e_output) - pool4_helper = PoolHelper()(inception_4e_output_zero_pad) - pool4_3x3_s2 = MaxPooling2D(pool_size=(3, 3), strides=( - 2, 2), border_mode='valid', name='pool4/3x3_s2')(pool4_helper) - - inception_5a_1x1 = Convolution2D(256, 1, 1, border_mode='same', activation='relu', - name='inception_5a/1x1', W_regularizer=l2(0.0002))(pool4_3x3_s2) - inception_5a_3x3_reduce = Convolution2D( - 160, 1, 1, border_mode='same', activation='relu', name='inception_5a/3x3_reduce', W_regularizer=l2(0.0002))(pool4_3x3_s2) - inception_5a_3x3 = Convolution2D(320, 3, 3, border_mode='same', activation='relu', - name='inception_5a/3x3', W_regularizer=l2(0.0002))(inception_5a_3x3_reduce) - inception_5a_5x5_reduce = Convolution2D( - 32, 1, 1, border_mode='same', activation='relu', name='inception_5a/5x5_reduce', W_regularizer=l2(0.0002))(pool4_3x3_s2) - inception_5a_5x5 = Convolution2D(128, 5, 5, border_mode='same', activation='relu', - name='inception_5a/5x5', W_regularizer=l2(0.0002))(inception_5a_5x5_reduce) - inception_5a_pool = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), border_mode='same', name='inception_5a/pool')(pool4_3x3_s2) - inception_5a_pool_proj = Convolution2D(128, 1, 1, border_mode='same', activation='relu', - name='inception_5a/pool_proj', W_regularizer=l2(0.0002))(inception_5a_pool) - inception_5a_output = merge([inception_5a_1x1, inception_5a_3x3, inception_5a_5x5, - inception_5a_pool_proj], mode='concat', concat_axis=1, name='inception_5a/output') - - inception_5b_1x1 = Convolution2D(384, 1, 1, border_mode='same', activation='relu', - name='inception_5b/1x1', W_regularizer=l2(0.0002))(inception_5a_output) - inception_5b_3x3_reduce = Convolution2D(192, 1, 1, border_mode='same', activation='relu', - name='inception_5b/3x3_reduce', W_regularizer=l2(0.0002))(inception_5a_output) - inception_5b_3x3 = Convolution2D(384, 3, 3, border_mode='same', activation='relu', - name='inception_5b/3x3', W_regularizer=l2(0.0002))(inception_5b_3x3_reduce) - inception_5b_5x5_reduce = Convolution2D(48, 1, 1, border_mode='same', activation='relu', - name='inception_5b/5x5_reduce', W_regularizer=l2(0.0002))(inception_5a_output) - inception_5b_5x5 = Convolution2D(128, 5, 5, border_mode='same', activation='relu', - name='inception_5b/5x5', W_regularizer=l2(0.0002))(inception_5b_5x5_reduce) - inception_5b_pool = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), border_mode='same', name='inception_5b/pool')(inception_5a_output) - inception_5b_pool_proj = Convolution2D(128, 1, 1, border_mode='same', activation='relu', - name='inception_5b/pool_proj', W_regularizer=l2(0.0002))(inception_5b_pool) - inception_5b_output = merge([inception_5b_1x1, inception_5b_3x3, inception_5b_5x5, - inception_5b_pool_proj], mode='concat', concat_axis=1, name='inception_5b/output') - - pool5_7x7_s1 = AveragePooling2D(pool_size=(7, 7), strides=( - 1, 1), name='pool5/7x7_s2')(inception_5b_output) - loss3_flat = Flatten()(pool5_7x7_s1) - pool5_drop_7x7_s1 = Dropout(0.4)(loss3_flat) - loss3_classifier = Dense( - 1000, name='loss3/classifier', W_regularizer=l2(0.0002))(pool5_drop_7x7_s1) - loss3_classifier_act = Activation('softmax', name='prob')(loss3_classifier) - - # Create model - model = Model(input=input, output=[ - loss1_classifier_act, loss2_classifier_act, loss3_classifier_act]) - - # Load ImageNet pre-trained data - model.load_weights('imagenet_models/googlenet_weights.h5') - - # Truncate and replace softmax layer for transfer learning - # Cannot use model.layers.pop() since model is not of Sequential() type - # The method below works since pre-trained weights are stored in layers but not in the model - loss3_classifier_statefarm = Dense( - num_classes, name='loss3/classifier', W_regularizer=l2(0.0002))(pool5_drop_7x7_s1) - loss3_classifier_act_statefarm = Activation( - 'softmax', name='prob')(loss3_classifier_statefarm) - loss2_classifier_statefarm = Dense( - num_classes, name='loss2/classifier', W_regularizer=l2(0.0002))(loss2_drop_fc) - loss2_classifier_act_statefarm = Activation( - 'softmax')(loss2_classifier_statefarm) - loss1_classifier_statefarm = Dense( - num_classes, name='loss1/classifier', W_regularizer=l2(0.0002))(loss1_drop_fc) - loss1_classifier_act_statefarm = Activation( - 'softmax')(loss1_classifier_statefarm) - - # Create another model with our customized softmax - model = Model(input=input, output=[loss1_classifier_act_statefarm, - loss2_classifier_act_statefarm, loss3_classifier_act_statefarm]) - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 224, 224 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 16 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = googlenet_model(img_rows, img_cols, channel, num_classes) - - # Start Fine-tuning. - # Notice that googlenet takes 3 sets of labels for outputs, one for each auxillary classifier - model.fit(X_train, [Y_train, Y_train, Y_train], - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, [Y_valid, Y_valid, Y_valid]), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Combine 3 set of outputs using averaging - predictions_valid = sum(predictions_valid)/len(predictions_valid) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) -# -*- coding: utf-8 -*- - -from keras.models import Sequential -from keras.optimizers import SGD -from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation -from keras.layers.normalization import BatchNormalization -from keras.models import Model -from keras import backend as K - -from sklearn.metrics import log_loss - -from load_cifar10 import load_cifar10_data - - -def conv2d_bn(x, nb_filter, nb_row, nb_col, - border_mode='same', subsample=(1, 1), - name=None): - """ - Utility function to apply conv + BN for Inception V3. - """ - if name is not None: - bn_name = name + '_bn' - conv_name = name + '_conv' - else: - bn_name = None - conv_name = None - bn_axis = 1 - x = Convolution2D(nb_filter, nb_row, nb_col, - subsample=subsample, - activation='relu', - border_mode=border_mode, - name=conv_name)(x) - x = BatchNormalization(axis=bn_axis, name=bn_name)(x) - return x - - -def inception_v3_model(img_rows, img_cols, channel=1, num_classes=None): - """ - Inception-V3 Model for Keras - - Model Schema is based on - https://github.com/fchollet/deep-learning-models/blob/master/inception_v3.py - - ImageNet Pretrained Weights - https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_th_dim_ordering_th_kernels.h5 - - Parameters: - img_rows, img_cols - resolution of inputs - channel - 1 for grayscale, 3 for color - num_classes - number of class labels for our classification task - """ - channel_axis = 1 - img_input = Input(shape=(channel, img_rows, img_cols)) - x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid') - x = conv2d_bn(x, 32, 3, 3, border_mode='valid') - x = conv2d_bn(x, 64, 3, 3) - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv2d_bn(x, 80, 1, 1, border_mode='valid') - x = conv2d_bn(x, 192, 3, 3, border_mode='valid') - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - - # mixed 0, 1, 2: 35 x 35 x 256 - for i in range(3): - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(x) - branch_pool = conv2d_bn(branch_pool, 32, 1, 1) - x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed' + str(i)) - - # mixed 3: 17 x 17 x 768 - branch3x3 = conv2d_bn(x, 384, 3, 3, subsample=(2, 2), border_mode='valid') - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, - subsample=(2, 2), border_mode='valid') - - branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) - x = merge([branch3x3, branch3x3dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed3') - - # mixed 4: 17 x 17 x 768 - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 128, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 128, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 128, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed4') - - # mixed 5, 6: 17 x 17 x 768 - for i in range(2): - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 160, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 160, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 160, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed' + str(5 + i)) - - # mixed 7: 17 x 17 x 768 - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 192, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 160, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed7') - - # mixed 8: 8 x 8 x 1280 - branch3x3 = conv2d_bn(x, 192, 1, 1) - branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, - subsample=(2, 2), border_mode='valid') - - branch7x7x3 = conv2d_bn(x, 192, 1, 1) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 3, 3, - subsample=(2, 2), border_mode='valid') - - branch_pool = AveragePooling2D((3, 3), strides=(2, 2))(x) - x = merge([branch3x3, branch7x7x3, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed8') - - # mixed 9: 8 x 8 x 2048 - for i in range(2): - branch1x1 = conv2d_bn(x, 320, 1, 1) - - branch3x3 = conv2d_bn(x, 384, 1, 1) - branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) - branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) - branch3x3 = merge([branch3x3_1, branch3x3_2], - mode='concat', concat_axis=channel_axis, - name='mixed9_' + str(i)) - - branch3x3dbl = conv2d_bn(x, 448, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) - branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) - branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) - branch3x3dbl = merge([branch3x3dbl_1, branch3x3dbl_2], - mode='concat', concat_axis=channel_axis) - - branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed' + str(9 + i)) - - # Fully Connected Softmax Layer - x_fc = AveragePooling2D((8, 8), strides=(8, 8), name='avg_pool')(x) - x_fc = Flatten(name='flatten')(x_fc) - x_fc = Dense(1000, activation='softmax', name='predictions')(x_fc) - - # Create model - model = Model(img_input, x_fc) - - # Load ImageNet pre-trained data - model.load_weights( - 'imagenet_models/inception_v3_weights_th_dim_ordering_th_kernels.h5') - - # Truncate and replace softmax layer for transfer learning - # Cannot use model.layers.pop() since model is not of Sequential() type - # The method below works since pre-trained weights are stored in layers but not in the model - x_newfc = AveragePooling2D((8, 8), strides=(8, 8), name='avg_pool')(x) - x_newfc = Flatten(name='flatten')(x_newfc) - x_newfc = Dense(num_classes, activation='softmax', - name='predictions')(x_newfc) - - # Create another model with our customized softmax - model = Model(img_input, x_newfc) - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 299, 299 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 16 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = inception_v3_model(img_rows, img_cols, channel, num_classes) - - # Start Fine-tuning - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, Y_valid), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) -# -*- coding: utf-8 -*- - -from keras.models import Sequential -from keras.optimizers import SGD -from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation -from keras.layers.normalization import BatchNormalization -from keras.models import Model -from keras import backend as K - -from sklearn.metrics import log_loss - -from load_cifar10 import load_cifar10_data - - -def conv2d_bn(x, nb_filter, nb_row, nb_col, - border_mode='same', subsample=(1, 1), bias=False): - """ - Utility function to apply conv + BN. - (Slightly modified from https://github.com/fchollet/keras/blob/master/keras/applications/inception_v3.py) - """ - if K.image_dim_ordering() == "th": - channel_axis = 1 - else: - channel_axis = -1 - x = Convolution2D(nb_filter, nb_row, nb_col, - subsample=subsample, - border_mode=border_mode, - bias=bias)(x) - x = BatchNormalization(axis=channel_axis)(x) - x = Activation('relu')(x) - return x - - -def block_inception_a(input): - if K.image_dim_ordering() == "th": - channel_axis = 1 - else: - channel_axis = -1 - - branch_0 = conv2d_bn(input, 96, 1, 1) - - branch_1 = conv2d_bn(input, 64, 1, 1) - branch_1 = conv2d_bn(branch_1, 96, 3, 3) - - branch_2 = conv2d_bn(input, 64, 1, 1) - branch_2 = conv2d_bn(branch_2, 96, 3, 3) - branch_2 = conv2d_bn(branch_2, 96, 3, 3) - - branch_3 = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(input) - branch_3 = conv2d_bn(branch_3, 96, 1, 1) - - x = merge([branch_0, branch_1, branch_2, branch_3], - mode='concat', concat_axis=channel_axis) - return x - - -def block_reduction_a(input): - if K.image_dim_ordering() == "th": - channel_axis = 1 - else: - channel_axis = -1 - - branch_0 = conv2d_bn( - input, 384, 3, 3, subsample=(2, 2), border_mode='valid') - - branch_1 = conv2d_bn(input, 192, 1, 1) - branch_1 = conv2d_bn(branch_1, 224, 3, 3) - branch_1 = conv2d_bn(branch_1, 256, 3, 3, - subsample=(2, 2), border_mode='valid') - - branch_2 = MaxPooling2D((3, 3), strides=(2, 2), border_mode='valid')(input) - - x = merge([branch_0, branch_1, branch_2], - mode='concat', concat_axis=channel_axis) - return x - - -def block_inception_b(input): - if K.image_dim_ordering() == "th": - channel_axis = 1 - else: - channel_axis = -1 - - branch_0 = conv2d_bn(input, 384, 1, 1) - - branch_1 = conv2d_bn(input, 192, 1, 1) - branch_1 = conv2d_bn(branch_1, 224, 1, 7) - branch_1 = conv2d_bn(branch_1, 256, 7, 1) - - branch_2 = conv2d_bn(input, 192, 1, 1) - branch_2 = conv2d_bn(branch_2, 192, 7, 1) - branch_2 = conv2d_bn(branch_2, 224, 1, 7) - branch_2 = conv2d_bn(branch_2, 224, 7, 1) - branch_2 = conv2d_bn(branch_2, 256, 1, 7) - - branch_3 = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(input) - branch_3 = conv2d_bn(branch_3, 128, 1, 1) - - x = merge([branch_0, branch_1, branch_2, branch_3], - mode='concat', concat_axis=channel_axis) - return x - - -def block_reduction_b(input): - if K.image_dim_ordering() == "th": - channel_axis = 1 - else: - channel_axis = -1 - - branch_0 = conv2d_bn(input, 192, 1, 1) - branch_0 = conv2d_bn(branch_0, 192, 3, 3, - subsample=(2, 2), border_mode='valid') - - branch_1 = conv2d_bn(input, 256, 1, 1) - branch_1 = conv2d_bn(branch_1, 256, 1, 7) - branch_1 = conv2d_bn(branch_1, 320, 7, 1) - branch_1 = conv2d_bn(branch_1, 320, 3, 3, - subsample=(2, 2), border_mode='valid') - - branch_2 = MaxPooling2D((3, 3), strides=(2, 2), border_mode='valid')(input) - - x = merge([branch_0, branch_1, branch_2], - mode='concat', concat_axis=channel_axis) - return x - - -def block_inception_c(input): - if K.image_dim_ordering() == "th": - channel_axis = 1 - else: - channel_axis = -1 - - branch_0 = conv2d_bn(input, 256, 1, 1) - - branch_1 = conv2d_bn(input, 384, 1, 1) - branch_10 = conv2d_bn(branch_1, 256, 1, 3) - branch_11 = conv2d_bn(branch_1, 256, 3, 1) - branch_1 = merge([branch_10, branch_11], mode='concat', - concat_axis=channel_axis) - - branch_2 = conv2d_bn(input, 384, 1, 1) - branch_2 = conv2d_bn(branch_2, 448, 3, 1) - branch_2 = conv2d_bn(branch_2, 512, 1, 3) - branch_20 = conv2d_bn(branch_2, 256, 1, 3) - branch_21 = conv2d_bn(branch_2, 256, 3, 1) - branch_2 = merge([branch_20, branch_21], mode='concat', - concat_axis=channel_axis) - - branch_3 = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(input) - branch_3 = conv2d_bn(branch_3, 256, 1, 1) - - x = merge([branch_0, branch_1, branch_2, branch_3], - mode='concat', concat_axis=channel_axis) - return x - - -def inception_v4_base(input): - if K.image_dim_ordering() == "th": - channel_axis = 1 - else: - channel_axis = -1 - - # Input Shape is 299 x 299 x 3 (th) or 3 x 299 x 299 (th) - net = conv2d_bn(input, 32, 3, 3, subsample=(2, 2), border_mode='valid') - net = conv2d_bn(net, 32, 3, 3, border_mode='valid') - net = conv2d_bn(net, 64, 3, 3) - - branch_0 = MaxPooling2D((3, 3), strides=(2, 2), border_mode='valid')(net) - - branch_1 = conv2d_bn(net, 96, 3, 3, subsample=(2, 2), border_mode='valid') - - net = merge([branch_0, branch_1], mode='concat', concat_axis=channel_axis) - - branch_0 = conv2d_bn(net, 64, 1, 1) - branch_0 = conv2d_bn(branch_0, 96, 3, 3, border_mode='valid') - - branch_1 = conv2d_bn(net, 64, 1, 1) - branch_1 = conv2d_bn(branch_1, 64, 1, 7) - branch_1 = conv2d_bn(branch_1, 64, 7, 1) - branch_1 = conv2d_bn(branch_1, 96, 3, 3, border_mode='valid') - - net = merge([branch_0, branch_1], mode='concat', concat_axis=channel_axis) - - branch_0 = conv2d_bn(net, 192, 3, 3, subsample=(2, 2), border_mode='valid') - branch_1 = MaxPooling2D((3, 3), strides=(2, 2), border_mode='valid')(net) - - net = merge([branch_0, branch_1], mode='concat', concat_axis=channel_axis) - - # 35 x 35 x 384 - # 4 x Inception-A blocks - for idx in xrange(4): - net = block_inception_a(net) - - # 35 x 35 x 384 - # Reduction-A block - net = block_reduction_a(net) - - # 17 x 17 x 1024 - # 7 x Inception-B blocks - for idx in xrange(7): - net = block_inception_b(net) - - # 17 x 17 x 1024 - # Reduction-B block - net = block_reduction_b(net) - - # 8 x 8 x 1536 - # 3 x Inception-C blocks - for idx in xrange(3): - net = block_inception_c(net) - - return net - - -def inception_v4_model(img_rows, img_cols, color_type=1, num_classeses=None, dropout_keep_prob=0.2): - ''' - Inception V4 Model for Keras - - Model Schema is based on - https://github.com/kentsommer/keras-inceptionV4 - - ImageNet Pretrained Weights - Theano: https://github.com/kentsommer/keras-inceptionV4/releases/download/2.0/inception-v4_weights_th_dim_ordering_th_kernels.h5 - TensorFlow: https://github.com/kentsommer/keras-inceptionV4/releases/download/2.0/inception-v4_weights_tf_dim_ordering_tf_kernels.h5 - - Parameters: - img_rows, img_cols - resolution of inputs - channel - 1 for grayscale, 3 for color - num_classes - number of class labels for our classification task - ''' - - # Input Shape is 299 x 299 x 3 (tf) or 3 x 299 x 299 (th) - if K.image_dim_ordering() == 'th': - inputs = Input((3, 299, 299)) - else: - inputs = Input((299, 299, 3)) - - # Make inception base - net = inception_v4_base(inputs) - - # Final pooling and prediction - - # 8 x 8 x 1536 - net_old = AveragePooling2D((8, 8), border_mode='valid')(net) - - # 1 x 1 x 1536 - net_old = Dropout(dropout_keep_prob)(net_old) - net_old = Flatten()(net_old) - - # 1536 - predictions = Dense(output_dim=1001, activation='softmax')(net_old) - - model = Model(inputs, predictions, name='inception_v4') - - if K.image_dim_ordering() == 'th': - # Use pre-trained weights for Theano backend - weights_path = 'imagenet_models/inception-v4_weights_th_dim_ordering_th_kernels.h5' - else: - # Use pre-trained weights for Tensorflow backend - weights_path = 'imagenet_models/inception-v4_weights_tf_dim_ordering_tf_kernels.h5' - - model.load_weights(weights_path, by_name=True) - - # Truncate and replace softmax layer for transfer learning - # Cannot use model.layers.pop() since model is not of Sequential() type - # The method below works since pre-trained weights are stored in layers but not in the model - net_ft = AveragePooling2D((8, 8), border_mode='valid')(net) - net_ft = Dropout(dropout_keep_prob)(net_ft) - net_ft = Flatten()(net_ft) - predictions_ft = Dense(output_dim=num_classes, - activation='softmax')(net_ft) - - model = Model(inputs, predictions_ft, name='inception_v4') - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 299, 299 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 16 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = inception_v4_model( - img_rows, img_cols, channel, num_classes, dropout_keep_prob=0.2) - - # Start Fine-tuning - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, Y_valid), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) - -import cv2 -import numpy as np - -from keras.datasets import cifar10 -from keras import backend as K -from keras.utils import np_utils - -nb_train_samples = 3000 # 3000 training samples -nb_valid_samples = 100 # 100 validation samples -num_classes = 10 - - -def load_cifar10_data(img_rows, img_cols): - - # Load cifar10 training and validation sets - (X_train, Y_train), (X_valid, Y_valid) = cifar10.load_data() - - # Resize trainging images - if K.image_dim_ordering() == 'th': - X_train = np.array([cv2.resize(img.transpose(1, 2, 0), (img_rows, img_cols)).transpose( - 2, 0, 1) for img in X_train[:nb_train_samples, :, :, :]]) - X_valid = np.array([cv2.resize(img.transpose(1, 2, 0), (img_rows, img_cols)).transpose( - 2, 0, 1) for img in X_valid[:nb_valid_samples, :, :, :]]) - else: - X_train = np.array([cv2.resize(img, (img_rows, img_cols)) - for img in X_train[:nb_train_samples, :, :, :]]) - X_valid = np.array([cv2.resize(img, (img_rows, img_cols)) - for img in X_valid[:nb_valid_samples, :, :, :]]) - - # Transform targets to keras compatible format - Y_train = np_utils.to_categorical(Y_train[:nb_train_samples], num_classes) - Y_valid = np_utils.to_categorical(Y_valid[:nb_valid_samples], num_classes) - - return X_train, Y_train, X_valid, Y_valid -# -*- coding: utf-8 -*- - -from keras.models import Sequential -from keras.optimizers import SGD -from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation -from keras.layers.normalization import BatchNormalization -from keras.models import Model -from keras import backend as K - -from sklearn.metrics import log_loss - -from custom_layers.scale_layer import Scale - -from load_cifar10 import load_cifar10_data - -import sys -sys.setrecursionlimit(3000) - - -def identity_block(input_tensor, kernel_size, filters, stage, block): - '''The identity_block is the block that has no conv layer at shortcut - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - ''' - eps = 1.1e-5 - nb_filter1, nb_filter2, nb_filter3 = filters - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - scale_name_base = 'scale' + str(stage) + block + '_branch' - - x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + - '2a', bias=False)(input_tensor) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2a')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2a')(x) - x = Activation('relu', name=conv_name_base + '2a_relu')(x) - - x = ZeroPadding2D((1, 1), name=conv_name_base + '2b_zeropadding')(x) - x = Convolution2D(nb_filter2, kernel_size, kernel_size, - name=conv_name_base + '2b', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2b')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2b')(x) - x = Activation('relu', name=conv_name_base + '2b_relu')(x) - - x = Convolution2D(nb_filter3, 1, 1, - name=conv_name_base + '2c', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2c')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2c')(x) - - x = merge([x, input_tensor], mode='sum', name='res' + str(stage) + block) - x = Activation('relu', name='res' + str(stage) + block + '_relu')(x) - return x - - -def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): - '''conv_block is the block that has a conv layer at shortcut - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - Note that from stage 3, the first conv layer at main path is with subsample=(2,2) - And the shortcut should have subsample=(2,2) as well - ''' - eps = 1.1e-5 - nb_filter1, nb_filter2, nb_filter3 = filters - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - scale_name_base = 'scale' + str(stage) + block + '_branch' - - x = Convolution2D(nb_filter1, 1, 1, subsample=strides, - name=conv_name_base + '2a', bias=False)(input_tensor) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2a')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2a')(x) - x = Activation('relu', name=conv_name_base + '2a_relu')(x) - - x = ZeroPadding2D((1, 1), name=conv_name_base + '2b_zeropadding')(x) - x = Convolution2D(nb_filter2, kernel_size, kernel_size, - name=conv_name_base + '2b', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2b')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2b')(x) - x = Activation('relu', name=conv_name_base + '2b_relu')(x) - - x = Convolution2D(nb_filter3, 1, 1, - name=conv_name_base + '2c', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2c')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2c')(x) - - shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides, - name=conv_name_base + '1', bias=False)(input_tensor) - shortcut = BatchNormalization( - epsilon=eps, axis=bn_axis, name=bn_name_base + '1')(shortcut) - shortcut = Scale(axis=bn_axis, name=scale_name_base + '1')(shortcut) - - x = merge([x, shortcut], mode='sum', name='res' + str(stage) + block) - x = Activation('relu', name='res' + str(stage) + block + '_relu')(x) - return x - - -def resnet101_model(img_rows, img_cols, color_type=1, num_classes=None): - """ - Resnet 101 Model for Keras - - Model Schema and layer naming follow that of the original Caffe implementation - https://github.com/KaimingHe/deep-residual-networks - - ImageNet Pretrained Weights - Theano: https://drive.google.com/file/d/0Byy2AcGyEVxfdUV1MHJhelpnSG8/view?usp=sharing - TensorFlow: https://drive.google.com/file/d/0Byy2AcGyEVxfTmRRVmpGWDczaXM/view?usp=sharing - - Parameters: - img_rows, img_cols - resolution of inputs - channel - 1 for grayscale, 3 for color - num_classes - number of class labels for our classification task - """ - eps = 1.1e-5 - - # Handle Dimension Ordering for different backends - global bn_axis - if K.image_dim_ordering() == 'tf': - bn_axis = 3 - img_input = Input(shape=(img_rows, img_cols, color_type), name='data') - else: - bn_axis = 1 - img_input = Input(shape=(color_type, img_rows, img_cols), name='data') - - x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input) - x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=bn_axis, name='bn_conv1')(x) - x = Scale(axis=bn_axis, name='scale_conv1')(x) - x = Activation('relu', name='conv1_relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x) - - x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) - x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') - x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - - x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') - for i in range(1, 4): - x = identity_block(x, 3, [128, 128, 512], stage=3, block='b'+str(i)) - - x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') - for i in range(1, 23): - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b'+str(i)) - - x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - - x_fc = AveragePooling2D((7, 7), name='avg_pool')(x) - x_fc = Flatten()(x_fc) - x_fc = Dense(1000, activation='softmax', name='fc1000')(x_fc) - - model = Model(img_input, x_fc) - - if K.image_dim_ordering() == 'th': - # Use pre-trained weights for Theano backend - weights_path = 'imagenet_models/resnet101_weights_th.h5' - else: - # Use pre-trained weights for Tensorflow backend - weights_path = 'imagenet_models/resnet101_weights_tf.h5' - - model.load_weights(weights_path, by_name=True) - - # Truncate and replace softmax layer for transfer learning - # Cannot use model.layers.pop() since model is not of Sequential() type - # The method below works since pre-trained weights are stored in layers but not in the model - x_newfc = AveragePooling2D((7, 7), name='avg_pool')(x) - x_newfc = Flatten()(x_newfc) - x_newfc = Dense(num_classes, activation='softmax', name='fc8')(x_newfc) - - model = Model(img_input, x_newfc) - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 224, 224 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 16 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = resnet101_model(img_rows, img_cols, channel, num_classes) - - # Start Fine-tuning - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, Y_valid), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) -# -*- coding: utf-8 -*- - -from keras.models import Sequential -from keras.optimizers import SGD -from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation -from keras.layers.normalization import BatchNormalization -from keras.models import Model -from keras import backend as K - -from sklearn.metrics import log_loss - -from custom_layers.scale_layer import Scale - -from load_cifar10 import load_cifar10_data - -import sys -sys.setrecursionlimit(3000) - - -def identity_block(input_tensor, kernel_size, filters, stage, block): - '''The identity_block is the block that has no conv layer at shortcut - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - ''' - eps = 1.1e-5 - nb_filter1, nb_filter2, nb_filter3 = filters - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - scale_name_base = 'scale' + str(stage) + block + '_branch' - - x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + - '2a', bias=False)(input_tensor) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2a')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2a')(x) - x = Activation('relu', name=conv_name_base + '2a_relu')(x) - - x = ZeroPadding2D((1, 1), name=conv_name_base + '2b_zeropadding')(x) - x = Convolution2D(nb_filter2, kernel_size, kernel_size, - name=conv_name_base + '2b', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2b')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2b')(x) - x = Activation('relu', name=conv_name_base + '2b_relu')(x) - - x = Convolution2D(nb_filter3, 1, 1, - name=conv_name_base + '2c', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2c')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2c')(x) - - x = merge([x, input_tensor], mode='sum', name='res' + str(stage) + block) - x = Activation('relu', name='res' + str(stage) + block + '_relu')(x) - return x - - -def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): - '''conv_block is the block that has a conv layer at shortcut - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - Note that from stage 3, the first conv layer at main path is with subsample=(2,2) - And the shortcut should have subsample=(2,2) as well - ''' - eps = 1.1e-5 - nb_filter1, nb_filter2, nb_filter3 = filters - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - scale_name_base = 'scale' + str(stage) + block + '_branch' - - x = Convolution2D(nb_filter1, 1, 1, subsample=strides, - name=conv_name_base + '2a', bias=False)(input_tensor) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2a')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2a')(x) - x = Activation('relu', name=conv_name_base + '2a_relu')(x) - - x = ZeroPadding2D((1, 1), name=conv_name_base + '2b_zeropadding')(x) - x = Convolution2D(nb_filter2, kernel_size, kernel_size, - name=conv_name_base + '2b', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2b')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2b')(x) - x = Activation('relu', name=conv_name_base + '2b_relu')(x) - - x = Convolution2D(nb_filter3, 1, 1, - name=conv_name_base + '2c', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=bn_axis, - name=bn_name_base + '2c')(x) - x = Scale(axis=bn_axis, name=scale_name_base + '2c')(x) - - shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides, - name=conv_name_base + '1', bias=False)(input_tensor) - shortcut = BatchNormalization( - epsilon=eps, axis=bn_axis, name=bn_name_base + '1')(shortcut) - shortcut = Scale(axis=bn_axis, name=scale_name_base + '1')(shortcut) - - x = merge([x, shortcut], mode='sum', name='res' + str(stage) + block) - x = Activation('relu', name='res' + str(stage) + block + '_relu')(x) - return x - - -def resnet152_model(img_rows, img_cols, color_type=1, num_classes=None): - """ - Resnet 152 Model for Keras - - Model Schema and layer naming follow that of the original Caffe implementation - https://github.com/KaimingHe/deep-residual-networks - - ImageNet Pretrained Weights - Theano: https://drive.google.com/file/d/0Byy2AcGyEVxfZHhUT3lWVWxRN28/view?usp=sharing - TensorFlow: https://drive.google.com/file/d/0Byy2AcGyEVxfeXExMzNNOHpEODg/view?usp=sharing - - Parameters: - img_rows, img_cols - resolution of inputs - channel - 1 for grayscale, 3 for color - num_classes - number of class labels for our classification task - """ - eps = 1.1e-5 - - # Handle Dimension Ordering for different backends - global bn_axis - if K.image_dim_ordering() == 'tf': - bn_axis = 3 - img_input = Input(shape=(img_rows, img_cols, color_type), name='data') - else: - bn_axis = 1 - img_input = Input(shape=(color_type, img_rows, img_cols), name='data') - - x = ZeroPadding2D((3, 3), name='conv1_zeropadding')(img_input) - x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1', bias=False)(x) - x = BatchNormalization(epsilon=eps, axis=bn_axis, name='bn_conv1')(x) - x = Scale(axis=bn_axis, name='scale_conv1')(x) - x = Activation('relu', name='conv1_relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), name='pool1')(x) - - x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) - x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') - x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - - x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') - for i in range(1, 8): - x = identity_block(x, 3, [128, 128, 512], stage=3, block='b'+str(i)) - - x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') - for i in range(1, 36): - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b'+str(i)) - - x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - - x_fc = AveragePooling2D((7, 7), name='avg_pool')(x) - x_fc = Flatten()(x_fc) - x_fc = Dense(1000, activation='softmax', name='fc1000')(x_fc) - - model = Model(img_input, x_fc) - - if K.image_dim_ordering() == 'th': - # Use pre-trained weights for Theano backend - weights_path = 'imagenet_models/resnet152_weights_th.h5' - else: - # Use pre-trained weights for Tensorflow backend - weights_path = 'imagenet_models/resnet152_weights_tf.h5' - - model.load_weights(weights_path, by_name=True) - - # Truncate and replace softmax layer for transfer learning - # Cannot use model.layers.pop() since model is not of Sequential() type - # The method below works since pre-trained weights are stored in layers but not in the model - x_newfc = AveragePooling2D((7, 7), name='avg_pool')(x) - x_newfc = Flatten()(x_newfc) - x_newfc = Dense(num_classes, activation='softmax', name='fc8')(x_newfc) - - model = Model(img_input, x_newfc) - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 224, 224 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 8 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = resnet152_model(img_rows, img_cols, channel, num_classes) - - # Start Fine-tuning - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, Y_valid), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) -# -*- coding: utf-8 -*- - -from keras.models import Sequential -from keras.optimizers import SGD -from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation -from keras.layers.normalization import BatchNormalization -from keras.models import Model -from keras import backend as K - -from sklearn.metrics import log_loss - -from load_cifar10 import load_cifar10_data - - -def identity_block(input_tensor, kernel_size, filters, stage, block): - """ - The identity_block is the block that has no conv layer at shortcut - Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - """ - - nb_filter1, nb_filter2, nb_filter3 = filters - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Convolution2D(nb_filter1, 1, 1, - name=conv_name_base + '2a')(input_tensor) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) - x = Activation('relu')(x) - - x = Convolution2D(nb_filter2, kernel_size, kernel_size, - border_mode='same', name=conv_name_base + '2b')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) - x = Activation('relu')(x) - - x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - - x = merge([x, input_tensor], mode='sum') - x = Activation('relu')(x) - return x - - -def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): - """ - conv_block is the block that has a conv layer at shortcut - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - Note that from stage 3, the first conv layer at main path is with subsample=(2,2) - And the shortcut should have subsample=(2,2) as well - """ - - nb_filter1, nb_filter2, nb_filter3 = filters - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Convolution2D(nb_filter1, 1, 1, subsample=strides, - name=conv_name_base + '2a')(input_tensor) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) - x = Activation('relu')(x) - - x = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same', - name=conv_name_base + '2b')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) - x = Activation('relu')(x) - - x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - - shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides, - name=conv_name_base + '1')(input_tensor) - shortcut = BatchNormalization( - axis=bn_axis, name=bn_name_base + '1')(shortcut) - - x = merge([x, shortcut], mode='sum') - x = Activation('relu')(x) - return x - - -def resnet50_model(img_rows, img_cols, color_type=1, num_classes=None): - """ - Resnet 50 Model for Keras - - Model Schema is based on - https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py - - ImageNet Pretrained Weights - https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels.h5 - - Parameters: - img_rows, img_cols - resolution of inputs - channel - 1 for grayscale, 3 for color - num_classes - number of class labels for our classification task - """ - - # Handle Dimension Ordering for different backends - global bn_axis - if K.image_dim_ordering() == 'tf': - bn_axis = 3 - img_input = Input(shape=(img_rows, img_cols, color_type)) - else: - bn_axis = 1 - img_input = Input(shape=(color_type, img_rows, img_cols)) - - x = ZeroPadding2D((3, 3))(img_input) - x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x) - x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) - x = Activation('relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) - x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') - x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - - x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') - - x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') - - x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - - # Fully Connected Softmax Layer - x_fc = AveragePooling2D((7, 7), name='avg_pool')(x) - x_fc = Flatten()(x_fc) - x_fc = Dense(1000, activation='softmax', name='fc1000')(x_fc) - - # Create model - model = Model(img_input, x_fc) - - # Load ImageNet pre-trained data - if K.image_dim_ordering() == 'th': - # Use pre-trained weights for Theano backend - weights_path = 'imagenet_models/resnet50_weights_th_dim_ordering_th_kernels.h5' - else: - # Use pre-trained weights for Tensorflow backend - weights_path = 'imagenet_models/resnet50_weights_tf_dim_ordering_tf_kernels.h5' - - model.load_weights(weights_path) - - # Truncate and replace softmax layer for transfer learning - # Cannot use model.layers.pop() since model is not of Sequential() type - # The method below works since pre-trained weights are stored in layers but not in the model - x_newfc = AveragePooling2D((7, 7), name='avg_pool')(x) - x_newfc = Flatten()(x_newfc) - x_newfc = Dense(num_classes, activation='softmax', name='fc10')(x_newfc) - - # Create another model with our customized softmax - model = Model(img_input, x_newfc) - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 224, 224 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 16 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = resnet50_model(img_rows, img_cols, channel, num_classes) - - # Start Fine-tuning - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, Y_valid), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) -# -*- coding: utf-8 -*- - -from keras.models import Sequential -from keras.optimizers import SGD -from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation - -from sklearn.metrics import log_loss - -from load_cifar10 import load_cifar10_data - - -def vgg16_model(img_rows, img_cols, channel=1, num_classes=None): - """VGG 16 Model for Keras - - Model Schema is based on - https://gist.github.com/baraldilorenzo/07d7802847aaad0a35d3 - - ImageNet Pretrained Weights - https://drive.google.com/file/d/0Bz7KyqmuGsilT0J5dmRCM0ROVHc/view?usp=sharing - - Parameters: - img_rows, img_cols - resolution of inputs - channel - 1 for grayscale, 3 for color - num_classes - number of categories for our classification task - """ - model = Sequential() - model.add(ZeroPadding2D((1, 1), input_shape=(channel, img_rows, img_cols))) - model.add(Convolution2D(64, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(64, 3, 3, activation='relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(128, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(128, 3, 3, activation='relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - # Add Fully Connected Layer - model.add(Flatten()) - model.add(Dense(4096, activation='relu')) - model.add(Dropout(0.5)) - model.add(Dense(4096, activation='relu')) - model.add(Dropout(0.5)) - model.add(Dense(1000, activation='softmax')) - - # Loads ImageNet pre-trained data - model.load_weights('imagenet_models/vgg16_weights.h5') - - # Truncate and replace softmax layer for transfer learning - model.layers.pop() - model.outputs = [model.layers[-1].output] - model.layers[-1].outbound_nodes = [] - model.add(Dense(num_classes, activation='softmax')) - - # Uncomment below to set the first 10 layers to non-trainable (weights will not be updated) - # for layer in model.layers[:10]: - # layer.trainable = False - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 224, 224 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 16 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = vgg16_model(img_rows, img_cols, channel, num_classes) - - # Start Fine-tuning - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, Y_valid), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) -# -*- coding: utf-8 -*- - -from keras.models import Sequential -from keras.optimizers import SGD -from keras.layers import Input, Dense, Convolution2D, MaxPooling2D, AveragePooling2D, ZeroPadding2D, Dropout, Flatten, merge, Reshape, Activation - -from sklearn.metrics import log_loss - -from load_cifar10 import load_cifar10_data - - -def vgg19_model(img_rows, img_cols, channel=1, num_classes=None): - """ - VGG 19 Model for Keras - - Model Schema is based on - https://gist.github.com/baraldilorenzo/8d096f48a1be4a2d660d - - ImageNet Pretrained Weights - https://drive.google.com/file/d/0Bz7KyqmuGsilZ2RVeVhKY0FyRmc/view?usp=sharing - - Parameters: - img_rows, img_cols - resolution of inputs - channel - 1 for grayscale, 3 for color - num_classes - number of class labels for our classification task - """ - - model = Sequential() - model.add(ZeroPadding2D((1, 1), input_shape=(channel, img_rows, img_cols))) - model.add(Convolution2D(64, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(64, 3, 3, activation='relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(128, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(128, 3, 3, activation='relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - # Add Fully Connected Layer - model.add(Flatten()) - model.add(Dense(4096, activation='relu')) - model.add(Dropout(0.5)) - model.add(Dense(4096, activation='relu')) - model.add(Dropout(0.5)) - model.add(Dense(1000, activation='softmax')) - - # Loads ImageNet pre-trained data - model.load_weights('imagenet_models/vgg19_weights.h5') - - # Truncate and replace softmax layer for transfer learning - model.layers.pop() - model.outputs = [model.layers[-1].output] - model.layers[-1].outbound_nodes = [] - model.add(Dense(num_classes, activation='softmax')) - - # Learning rate is changed to 0.001 - sgd = SGD(lr=1e-3, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(optimizer=sgd, loss='categorical_crossentropy', - metrics=['accuracy']) - - return model - - -if __name__ == '__main__': - - # Example to fine-tune on 3000 samples from Cifar10 - - img_rows, img_cols = 224, 224 # Resolution of inputs - channel = 3 - num_classes = 10 - batch_size = 16 - nb_epoch = 10 - - # Load Cifar10 data. Please implement your own load_data() module for your own dataset - X_train, Y_train, X_valid, Y_valid = load_cifar10_data(img_rows, img_cols) - - # Load our model - model = vgg19_model(img_rows, img_cols, channel, num_classes) - - # Start Fine-tuning - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - shuffle=True, - verbose=1, - validation_data=(X_valid, Y_valid), - ) - - # Make predictions - predictions_valid = model.predict( - X_valid, batch_size=batch_size, verbose=1) - - # Cross-entropy loss score - score = log_loss(Y_valid, predictions_valid) -# Python Package -"""Custom Keras layers for GoogLeNet""" - -from keras.layers.core import Layer -from keras.engine import InputSpec -from keras import backend as K -import theano.tensor as T -try: - from keras import initializations -except ImportError: - from keras import initializers as initializations - - -class LRN(Layer): - """ - Custom Layer for Local Response Normalization (LRN) - """ - - def __init__(self, alpha=0.0001, k=1, beta=0.75, n=5, **kwargs): - self.alpha = alpha - self.k = k - self.beta = beta - self.n = n - super(LRN, self).__init__(**kwargs) - - def call(self, x, mask=None): - b, ch, r, c = x.shape - half_n = self.n // 2 # half the local region - input_sqr = T.sqr(x) # square the input - # make an empty tensor with zero pads along channel dimension - extra_channels = T.alloc(0., b, ch + 2*half_n, r, c) - # set the center to be the squared input - input_sqr = T.set_subtensor( - extra_channels[:, half_n:half_n+ch, :, :], input_sqr) - scale = self.k # offset for the scale - norm_alpha = self.alpha / self.n # normalized alpha - for i in range(self.n): - scale += norm_alpha * input_sqr[:, i:i+ch, :, :] - scale = scale ** self.beta - x = x / scale - return x - - def get_config(self): - config = {"alpha": self.alpha, - "k": self.k, - "beta": self.beta, - "n": self.n} - base_config = super(LRN, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class PoolHelper(Layer): - """ - Reconcile Keras and Caffe weights - """ - - def __init__(self, **kwargs): - super(PoolHelper, self).__init__(**kwargs) - - def call(self, x, mask=None): - return x[:, :, 1:, 1:] - - def get_config(self): - config = {} - base_config = super(PoolHelper, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from keras.layers.core import Layer -from keras.engine import InputSpec -from keras import backend as K -try: - from keras import initializations -except ImportError: - from keras import initializers as initializations - - -class Scale(Layer): - '''Learns a set of weights and biases used for scaling the input data. - the output consists simply in an element-wise multiplication of the input - and a sum of a set of constants: - - out = in * gamma + beta, - - where 'gamma' and 'beta' are the weights and biases larned. - - # Arguments - axis: integer, axis along which to normalize in mode 0. For instance, - if your input tensor has shape (samples, channels, rows, cols), - set axis to 1 to normalize per feature map (channels axis). - momentum: momentum in the computation of the - exponential average of the mean and standard deviation - of the data, for feature-wise normalization. - weights: Initialization weights. - List of 2 Numpy arrays, with shapes: - `[(input_shape,), (input_shape,)]` - beta_init: name of initialization function for shift parameter - (see [initializations](../initializations.md)), or alternatively, - Theano/TensorFlow function to use for weights initialization. - This parameter is only relevant if you don't pass a `weights` argument. - gamma_init: name of initialization function for scale parameter (see - [initializations](../initializations.md)), or alternatively, - Theano/TensorFlow function to use for weights initialization. - This parameter is only relevant if you don't pass a `weights` argument. - ''' - - def __init__(self, weights=None, axis=-1, momentum=0.9, beta_init='zero', gamma_init='one', **kwargs): - self.momentum = momentum - self.axis = axis - self.beta_init = initializations.get(beta_init) - self.gamma_init = initializations.get(gamma_init) - self.initial_weights = weights - super(Scale, self).__init__(**kwargs) - - def build(self, input_shape): - self.input_spec = [InputSpec(shape=input_shape)] - shape = (int(input_shape[self.axis]),) - - # Compatibility with TensorFlow >= 1.0.0 - self.gamma = K.variable(self.gamma_init( - shape), name='{}_gamma'.format(self.name)) - self.beta = K.variable(self.beta_init( - shape), name='{}_beta'.format(self.name)) - #self.gamma = self.gamma_init(shape, name='{}_gamma'.format(self.name)) - #self.beta = self.beta_init(shape, name='{}_beta'.format(self.name)) - self.trainable_weights = [self.gamma, self.beta] - - if self.initial_weights is not None: - self.set_weights(self.initial_weights) - del self.initial_weights - - def call(self, x, mask=None): - input_shape = self.input_spec[0].shape - broadcast_shape = [1] * len(input_shape) - broadcast_shape[self.axis] = input_shape[self.axis] - - out = K.reshape(self.gamma, broadcast_shape) * x + \ - K.reshape(self.beta, broadcast_shape) - return out - - def get_config(self): - config = {"momentum": self.momentum, "axis": self.axis} - base_config = super(Scale, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from setuptools import setup -from setuptools import find_packages - - -version = '0.4.1' - -setup(name='keras-vis', - version=version, - description='Neural Network visualization toolkit for keras', - author='Raghavendra Kotikalapudi', - author_email='ragha@outlook.com', - url='https://github.com/raghakot/keras-vis', - download_url='https://github.com/raghakot/keras-vis/tarball/{}'.format( - version), - license='MIT', - install_requires=['keras', 'six', 'scikit-image', 'matplotlib', 'h5py'], - extras_require={ - 'vis_utils': ['Pillow', 'imageio'], - 'tests': ['pytest', - 'pytest-pep8', - 'pytest-xdist', - 'pytest-cov'], - }, - include_package_data=True, - packages=find_packages()) -""" -Parses source code to generate API docs in markdown. -""" - -import os -import re -import inspect -from inspect import getdoc, getargspec, getsourcefile, getsourcelines, getmembers -from collections import defaultdict - -import sys -if sys.version[0] == '2': - reload(sys) - sys.setdefaultencoding('utf8') - -_RE_BLOCKSTART = re.compile(r"(Args:|Arg:|Kwargs:|Returns:|Yields:|Kwargs:|Raises:|Notes:|Note:|Examples:|Example:)", - re.IGNORECASE) -_RE_ARGSTART = re.compile(r"(\w*?)\s*?\((.*?)\):(.*)", re.IGNORECASE) -_RE_EXCSTART = re.compile(r"(\w*?):(.*)", re.IGNORECASE) - -# -# String templates -# - -FUNC_TEMPLATE = """------------------- - -{section} [{header}]({path}) - -```python -{funcdef} -``` - -{doc} - -""" - -CLASS_TEMPLATE = """------------------- - -{section} [{header}]({path}) - -{doc} - -{variables} - -{init} - -{handlers} - -{methods} - -""" - -MODULE_TEMPLATE = """ -**Source:** {path} - -{global_vars} - -{functions} - -{classes} - -""" - - -def make_iter(obj): - """ Makes an iterable - """ - return obj if hasattr(obj, '__iter__') else [obj] - - -def order_by_line_nos(objs, line_nos): - """Orders the set of `objs` by `line_nos` - """ - ordering = sorted(range(len(line_nos)), key=line_nos.__getitem__) - return [objs[i] for i in ordering] - - -def to_md_file(string, filename, out_path="."): - """Import a module path and create an api doc from it - - Args: - string (str): string with line breaks to write to file. - filename (str): filename without the .md - out_path (str): The output directory - """ - md_file = "%s.md" % filename - with open(os.path.join(out_path, md_file), "w") as f: - f.write(string) - print("wrote {}.".format(md_file)) - - -class MarkdownAPIGenerator(object): - def __init__(self, src_root, github_link): - """Initializes the markdown api generator. - - Args: - src_root: The root folder name containing all the sources. - Ex: src - github_link: The base github link. Should include branch name. - Ex: https://github.com/raghakot/keras-vis/tree/master - All source links are generated with this prefix. - """ - self.src_root = src_root - self.github_link = github_link - - def get_line_no(self, obj): - """Gets the source line number of this object. None if `obj` code cannot be found. - """ - try: - lineno = getsourcelines(obj)[1] - except: - # no code found - lineno = None - return lineno - - def get_src_path(self, obj, append_base=True): - """Creates a src path string with line info for use as markdown link. - """ - path = getsourcefile(obj) - if self.src_root not in path: - # this can happen with e.g. - # inlinefunc-wrapped functions - if hasattr(obj, "__module__"): - path = "%s.%s" % (obj.__module__, obj.__name__) - else: - path = obj.__name__ - path = path.replace(".", "/") - pre, post = path.rsplit(self.src_root + "/", 1) - - lineno = self.get_line_no(obj) - lineno = "" if lineno is None else "#L{}".format(lineno) - - path = self.src_root + "/" + post + lineno - if append_base: - path = os.path.join(self.github_link, path) - return path - - def doc2md(self, func): - """Parse docstring (parsed with getdoc) according to Google-style - formatting and convert to markdown. We support the following - Google style syntax: - - Args, Kwargs: - argname (type): text - freeform text - Returns, Yields: - retname (type): text - freeform text - Raises: - exceptiontype: text - freeform text - Notes, Examples: - freeform text - - """ - doc = getdoc(func) or "" - blockindent = 0 - argindent = 1 - out = [] - - for line in doc.split("\n"): - indent = len(line) - len(line.lstrip()) - line = line.lstrip() - if _RE_BLOCKSTART.match(line): - # start of a new block - blockindent = indent - out.append("\n*{}*\n".format(line)) - elif indent > blockindent: - if _RE_ARGSTART.match(line): - # start of new argument - out.append("\n" + " " * blockindent + " - " + - _RE_ARGSTART.sub(r"**\1** (\2): \3", line)) - argindent = indent - elif _RE_EXCSTART.match(line): - # start of an exception-type block - out.append("\n" + " " * blockindent + " - " + - _RE_EXCSTART.sub(r"**\1**: \2", line)) - argindent = indent - elif indent > argindent: - out.append("\n" + " " * (blockindent + 2) + line) - else: - out.append("\n" + line) - else: - out.append("\n" + line) - - return "".join(out) - - def func2md(self, func, clsname=None, names=None, depth=3): - """Takes a function (or method) and documents it. - - Args: - clsname (str, optional): class name to prepend to funcname. - depth (int, optional): number of ### to append to function name - - """ - section = "#" * depth - if names is None: - names = [func.__name__] - - funcname = ", ".join(names) - escfuncname = ", ".join(["`%s`" % funcname if funcname.startswith( - "_") else funcname for funcname in names]) - header = "%s%s" % ("%s." % clsname if clsname else "", escfuncname) - - path = self.get_src_path(func) - doc = self.doc2md(func) - - args, kwargs = [], [] - spec = getargspec(func) - vargsname, kwargsname = spec.varargs, spec.keywords - vargs = list(make_iter(spec.args)) if spec.args else [] - defaults = list(make_iter(spec.defaults)) if spec.defaults else [] - - while vargs: - if vargs and vargs[0] == "self": - args.append(vargs.pop(0)) - elif len(vargs) > len(defaults): - args.append(vargs.pop(0)) - else: - default = defaults.pop(0) - if isinstance(default, str): - default = "\"%s\"" % default - else: - default = "%s" % str(default) - - kwargs.append((vargs.pop(0), default)) - - if args: - args = ", ".join("%s" % arg for arg in args) - if kwargs: - kwargs = ", ".join("%s=%s" % kwarg for kwarg in kwargs) - if args: - kwargs = ", " + kwargs - if vargsname: - vargsname = "*%s" % vargsname - if args or kwargs: - vargsname = ", " + vargsname - if kwargsname: - kwargsname = "**%s" % kwargsname - if args or kwargs or vargsname: - kwargsname = ", " + kwargsname - - _FUNCDEF = "{funcname}({args}{kwargs}{vargs}{vkwargs})" - funcdef = _FUNCDEF.format(funcname=funcname, - args=args or "", - kwargs=kwargs or "", - vargs=vargsname or "", - vkwargs=kwargsname or "") - - # split the function definition if it is too long - lmax = 90 - if len(funcdef) > lmax: - # wrap in the args list - split = funcdef.split("(", 1) - # we gradually build the string again - rest = split[1] - args = rest.split(", ") - - funcname = "(".join(split[:1]) + "(" - lline = len(funcname) - parts = [] - for arg in args: - larg = len(arg) - if larg > lmax - 5: - # not much to do if arg is so long - parts.append(arg) - elif lline + larg > lmax: - # the next arg is too long, break the line - parts.append("\\\n " + arg) - lline = 0 - else: - parts.append(arg) - lline += len(parts[-1]) - funcdef = funcname + ", ".join(parts) - - # build the signature - string = FUNC_TEMPLATE.format(section=section, - header=header, - funcdef=funcdef, - path=path, - doc=doc if doc else "*No documentation found.*") - return string - - def class2md(self, cls, depth=2): - """Takes a class and creates markdown text to document its methods and variables. - """ - - section = "#" * depth - subsection = "#" * (depth + 2) - clsname = cls.__name__ - modname = cls.__module__ - header = clsname - path = self.get_src_path(cls) - doc = self.doc2md(cls) - - try: - init = self.func2md(cls.__init__, clsname=clsname) - except (ValueError, TypeError): - # this happens if __init__ is outside the repo - init = "" - - variables = [] - for name, obj in getmembers(cls, lambda a: not (inspect.isroutine(a) or inspect.ismethod(a))): - if not name.startswith("_") and type(obj) == property: - comments = self.doc2md(obj) or inspect.getcomments(obj) - comments = "\n %s" % comments if comments else "" - variables.append("\n%s %s.%s%s\n" % - (subsection, clsname, name, comments)) - - handlers = [] - for name, obj in getmembers(cls, inspect.ismethoddescriptor): - if not name.startswith("_") and hasattr(obj, "__module__") and obj.__module__ == modname: - handlers.append("\n%s %s.%s\n *Handler*" % - (subsection, clsname, name)) - - methods = [] - for name, obj in getmembers(cls, inspect.ismethod): - if not name.startswith("_") and hasattr(obj, - "__module__") and obj.__module__ == modname and name not in handlers: - methods.append(self.func2md( - obj, clsname=clsname, depth=depth + 1)) - - string = CLASS_TEMPLATE.format(section=section, - header=header, - path=path, - doc=doc if doc else "", - init=init, - variables="".join(variables), - handlers="".join(handlers), - methods="".join(methods)) - return string - - def module2md(self, module): - """Takes an imported module object and create a Markdown string containing functions and classes. - """ - modname = module.__name__ - path = self.get_src_path(module, append_base=False) - path = "[{}]({})".format(path, os.path.join(self.github_link, path)) - found = set() - - classes = [] - line_nos = [] - for name, obj in getmembers(module, inspect.isclass): - # handle classes - found.add(name) - if not name.startswith("_") and hasattr(obj, "__module__") and obj.__module__ == modname: - classes.append(self.class2md(obj)) - line_nos.append(self.get_line_no(obj) or 0) - classes = order_by_line_nos(classes, line_nos) - - # Since functions can have multiple aliases. - func2names = defaultdict(list) - for name, obj in getmembers(module, inspect.isfunction): - func2names[obj].append(name) - - functions = [] - line_nos = [] - for obj in func2names: - names = func2names[obj] - found.update(names) - - # Include if within module or included modules within __init__.py and exclude from global variables - is_module_within_init = '__init__.py' in path and obj.__module__.startswith( - modname) - if is_module_within_init: - found.add(obj.__module__.replace(modname + '.', '')) - - if hasattr(obj, "__module__") and (obj.__module__ == modname or is_module_within_init): - names = list( - filter(lambda name: not name.startswith("_"), names)) - if len(names) > 0: - functions.append(self.func2md(obj, names=names)) - line_nos.append(self.get_line_no(obj) or 0) - functions = order_by_line_nos(functions, line_nos) - - variables = [] - line_nos = [] - for name, obj in module.__dict__.items(): - if not name.startswith("_") and name not in found: - if hasattr(obj, "__module__") and obj.__module__ != modname: - continue - if hasattr(obj, "__name__") and not obj.__name__.startswith(modname): - continue - - comments = inspect.getcomments(obj) - comments = ": %s" % comments if comments else "" - variables.append("- **%s**%s" % (name, comments)) - line_nos.append(self.get_line_no(obj) or 0) - - variables = order_by_line_nos(variables, line_nos) - if variables: - new_list = ["**Global Variables**", "---------------"] - new_list.extend(variables) - variables = new_list - - string = MODULE_TEMPLATE.format(path=path, - global_vars="\n".join( - variables) if variables else "", - functions="\n".join( - functions) if functions else "", - classes="".join(classes) if classes else "") - return string -import shutil - -from md_autogen import MarkdownAPIGenerator -from md_autogen import to_md_file - -from vis import backend -from vis.utils import utils -from vis import visualization -from vis import backprop_modifiers -from vis import callbacks -from vis import grad_modifiers -from vis import input_modifiers -from vis import losses -from vis import optimizer -from vis import regularizers - - -def generate_api_docs(): - modules = [ - backend, - utils, - visualization, - backprop_modifiers, - callbacks, - grad_modifiers, - input_modifiers, - losses, - optimizer, - regularizers - ] - - md_gen = MarkdownAPIGenerator( - "vis", "https://github.com/raghakot/keras-vis/tree/master") - for m in modules: - md_string = md_gen.module2md(m) - to_md_file(md_string, m.__name__, "sources") - - -def update_index_md(): - shutil.copyfile('../README.md', 'sources/index.md') - - -def copy_templates(): - shutil.rmtree('sources', ignore_errors=True) - shutil.copytree('templates', 'sources') - - -if __name__ == "__main__": - copy_templates() - update_index_md() - generate_api_docs() -from __future__ import absolute_import - -from . import backend -from .utils import utils - - -def guided(model): - """Modifies backprop to only propagate positive gradients for positive activations. - - Args: - model: The `keras.models.Model` instance whose gradient computation needs to be overridden. - - References: - Details on guided back propagation can be found in paper: [String For Simplicity: The All Convolutional Net] - (https://arxiv.org/pdf/1412.6806.pdf) - """ - return backend.modify_model_backprop(model, 'guided') - - -def rectified(model): - """Modifies backprop to only propagate positive gradients. - - Args: - model: The `keras.models.Model` instance whose gradient computation needs to be overridden. - - References: - Details can be found in the paper: [Visualizing and Understanding Convolutional Networks] - (https://arxiv.org/pdf/1311.2901.pdf) - """ - return backend.modify_model_backprop(model, 'rectified') - - -# Create aliases -relu = deconv = rectified - - -def get(identifier): - return utils.get_identifier(identifier, globals(), __name__) -from __future__ import absolute_import -import pprint -from .utils import utils - -try: - import imageio as imageio -except ImportError: - imageio = None - - -def _check_imageio(): - if not imageio: - raise ImportError('Failed to import imageio. You must install imageio') - - -class OptimizerCallback(object): - """Abstract class for defining callbacks for use with [Optimizer.minimize](vis.optimizer#optimizerminimize). - """ - - def callback(self, i, named_losses, overall_loss, grads, wrt_value): - """This function will be called within [optimizer.minimize](vis.optimizer.md#minimize). - - Args: - i: The optimizer iteration. - named_losses: List of `(loss_name, loss_value)` tuples. - overall_loss: Overall weighted loss. - grads: The gradient of input image with respect to `wrt_value`. - wrt_value: The current `wrt_value`. - """ - raise NotImplementedError() - - def on_end(self): - """Called at the end of optimization process. This function is typically used to cleanup / close any - opened resources at the end of optimization. - """ - pass - - -class Print(OptimizerCallback): - """Callback to print values during optimization. - """ - - def callback(self, i, named_losses, overall_loss, grads, wrt_value): - print('Iteration: {}, named_losses: {}, overall loss: {}' - .format(i + 1, pprint.pformat(named_losses), overall_loss)) - - -class GifGenerator(OptimizerCallback): - """Callback to construct gif of optimized image. - """ - - def __init__(self, path): - """ - Args: - path: The file path to save gif. - """ - _check_imageio() - if not path.endswith('.gif'): - path += '.gif' - self.writer = imageio.get_writer(path, mode='I', loop=1) - - def callback(self, i, named_losses, overall_loss, grads, wrt_value): - img = utils.deprocess_input(wrt_value[0]) - img = utils.draw_text(img, "Step {}".format(i + 1)) - self.writer.append_data(img) - - def on_end(self): - self.writer.close() -from __future__ import absolute_import - -import numpy as np -from keras import backend as K -from .utils import utils - - -def negate(grads): - """Negates the gradients. - - Args: - grads: A numpy array of grads to use. - - Returns: - The negated gradients. - """ - return -grads - - -def absolute(grads): - """Computes absolute gradients. - - Args: - grads: A numpy array of grads to use. - - Returns: - The absolute gradients. - """ - return np.abs(grads) - - -def invert(grads): - """Inverts the gradients. - - Args: - grads: A numpy array of grads to use. - - Returns: - The inverted gradients. - """ - return 1. / (grads + K.epsilon()) - - -def relu(grads): - """Clips negative gradient values. - - Args: - grads: A numpy array of grads to use. - - Returns: - The rectified gradients. - """ - grads[grads < 0.] = 0. - return grads - - -def small_values(grads): - """Can be used to highlight small gradient values. - - Args: - grads: A numpy array of grads to use. - - Returns: - The modified gradients that highlight small values. - """ - return absolute(invert(grads)) - - -def get(identifier): - return utils.get_identifier(identifier, globals(), __name__) -from __future__ import absolute_import - -import numpy as np -from scipy.ndimage.interpolation import shift -from .utils import utils -from keras import backend as K - - -class InputModifier(object): - """Abstract class for defining an input modifier. An input modifier can be used with the - [Optimizer.minimize](vis.optimizer#optimizerminimize) to make `pre` and `post` changes to the optimized input - during the optimization process. - - ```python - modifier.pre(seed_input) - # gradient descent update to img - modifier.post(seed_input) - ``` - """ - - def pre(self, inp): - """Implement pre gradient descent update modification to the input. If pre-processing is not desired, - simply ignore the implementation. It returns the unmodified `inp` by default. - - Args: - inp: An N-dim numpy array of shape: `(samples, channels, image_dims...)` if `image_data_format= - channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. - - Returns: - The modified pre input. - """ - return inp - - def post(self, inp): - """Implement post gradient descent update modification to the input. If post-processing is not desired, - simply ignore the implementation. It returns the unmodified `inp` by default. - - Args: - inp: An N-dim numpy array of shape: `(samples, channels, image_dims...)` if `image_data_format= - channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. - - Returns: - The modified post input. - """ - return inp - - -class Jitter(InputModifier): - - def __init__(self, jitter=0.05): - """Implements an input modifier that introduces random jitter in `pre`. - Jitter has been shown to produce crisper activation maximization images. - - Args: - jitter: The amount of jitter to apply, scalar or sequence. - If a scalar, same jitter is applied to all image dims. If sequence, `jitter` should contain a value - per image dim. - - A value between `[0., 1.]` is interpreted as a percentage of the image dimension. (Default value: 0.05) - """ - super(Jitter, self).__init__() - self.jitter = np.array(utils.listify(jitter)) - if np.any(jitter < 0.): - raise ValueError('Jitter value should be positive') - self._processed = False - - def _process_jitter_values(self, image_dims): - if len(self.jitter) == 1: - self.jitter = np.repeat(self.jitter, len(image_dims)) - if len(self.jitter) != len(image_dims): - raise RuntimeError('Jitter {}, does not match the number of image dims: {}' - .format(self.jitter, len(image_dims))) - - # Convert percentage to absolute values. - for i, jitter_value in enumerate(self.jitter): - if jitter_value < 1.: - self.jitter[i] = image_dims[i] * jitter_value - - # Round to int. - self.jitter = np.int32(self.jitter) - self._processed = True - - def pre(self, img): - if not self._processed: - image_dims = utils.get_img_shape(img)[2:] - self._process_jitter_values(image_dims) - - dim_offsets = [np.random.randint(-value, value + 1) - for value in self.jitter] - if K.image_data_format() == 'channels_first': - shift_vector = np.array([0, 0] + dim_offsets) - else: - shift_vector = np.array([0] + dim_offsets + [0]) - - return shift(img, shift_vector, mode='wrap', order=0) -from __future__ import absolute_import - -from keras import backend as K -from .utils import utils - - -class Loss(object): - """Abstract class for defining the loss function to be minimized. - The loss function should be built by defining `build_loss` function. - - The attribute `name` should be defined to identify loss function with verbose outputs. - Defaults to 'Unnamed Loss' if not overridden. - """ - - def __init__(self): - self.name = "Unnamed Loss" - - def __str__(self): - return self.name - - def build_loss(self): - """Implement this function to build the loss function expression. - Any additional arguments required to build this loss function may be passed in via `__init__`. - - Ideally, the function expression must be compatible with all keras backends and `channels_first` or - `channels_last` image_data_format(s). `utils.slicer` can be used to define data format agnostic slices. - (just define it in `channels_first` format, it will automatically shuffle indices for tensorflow - which uses `channels_last` format). - - ```python - # theano slice - conv_layer[:, filter_idx, ...] - - # TF slice - conv_layer[..., filter_idx] - - # Backend agnostic slice - conv_layer[utils.slicer[:, filter_idx, ...]] - ``` - - [utils.get_img_shape](vis.utils.utils.md#get_img_shape) is another optional utility that make this easier. - - Returns: - The loss expression. - """ - raise NotImplementedError() - - -class ActivationMaximization(Loss): - """A loss function that maximizes the activation of a set of filters within a particular layer. - - Typically this loss is used to ask the reverse question - What kind of input image would increase the networks - confidence, for say, dog class. This helps determine what the network might be internalizing as being the 'dog' - image space. - - One might also use this to generate an input image that maximizes both 'dog' and 'human' outputs on the final - `keras.layers.Dense` layer. - """ - - def __init__(self, layer, filter_indices): - """ - Args: - layer: The keras layer whose filters need to be maximized. This can either be a convolutional layer - or a dense layer. - filter_indices: filter indices within the layer to be maximized. - For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index. - - If you are optimizing final `keras.layers.Dense` layer to maximize class output, you tend to get - better results with 'linear' activation as opposed to 'softmax'. This is because 'softmax' - output can be maximized by minimizing scores for other classes. - """ - super(ActivationMaximization, self).__init__() - self.name = "ActivationMax Loss" - self.layer = layer - self.filter_indices = utils.listify(filter_indices) - - def build_loss(self): - layer_output = self.layer.output - - # For all other layers it is 4 - is_dense = K.ndim(layer_output) == 2 - - loss = 0. - for idx in self.filter_indices: - if is_dense: - loss += -K.mean(layer_output[:, idx]) - else: - # slicer is used to deal with `channels_first` or `channels_last` image data formats - # without the ugly conditional statements. - loss += -K.mean(layer_output[utils.slicer[:, idx, ...]]) - - return loss -from __future__ import absolute_import - -import numpy as np -from keras import backend as K - -from .callbacks import Print -from .grad_modifiers import get -from .utils import utils - - -_PRINT_CALLBACK = Print() - - -def _identity(x): - return x - - -class Optimizer(object): - - def __init__(self, input_tensor, losses, input_range=(0, 255), wrt_tensor=None, norm_grads=True): - """Creates an optimizer that minimizes weighted loss function. - - Args: - input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= - channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. - losses: List of ([Loss](vis.losses#Loss), weight) tuples. - input_range: Specifies the input range as a `(min, max)` tuple. This is used to rescale the - final optimized input to the given range. (Default value=(0, 255)) - wrt_tensor: Short for, with respect to. This instructs the optimizer that the aggregate loss from `losses` - should be minimized with respect to `wrt_tensor`. - `wrt_tensor` can be any tensor that is part of the model graph. Default value is set to None - which means that loss will simply be minimized with respect to `input_tensor`. - norm_grads: True to normalize gradients. Normalization avoids very small or large gradients and ensures - a smooth gradient gradient descent process. If you want the actual gradient - (for example, visualizing attention), set this to false. - """ - self.input_tensor = input_tensor - self.input_range = input_range - self.loss_names = [] - self.loss_functions = [] - self.wrt_tensor = self.input_tensor if wrt_tensor is None else wrt_tensor - if self.input_tensor is self.wrt_tensor: - self.wrt_tensor_is_input_tensor = True - self.wrt_tensor = K.identity(self.wrt_tensor) - else: - self.wrt_tensor_is_input_tensor = False - - overall_loss = None - for loss, weight in losses: - # Perf optimization. Don't build loss function with 0 weight. - if weight != 0: - loss_fn = weight * loss.build_loss() - overall_loss = loss_fn if overall_loss is None else overall_loss + loss_fn - self.loss_names.append(loss.name) - self.loss_functions.append(loss_fn) - - # Compute gradient of overall with respect to `wrt` tensor. - if self.wrt_tensor_is_input_tensor: - grads = K.gradients(overall_loss, self.input_tensor)[0] - else: - grads = K.gradients(overall_loss, self.wrt_tensor)[0] - if norm_grads: - grads = K.l2_normalize(grads) - - # The main function to compute various quantities in optimization loop. - self.compute_fn = K.function([self.input_tensor, K.learning_phase()], - self.loss_functions + [overall_loss, grads, self.wrt_tensor]) - - def _rmsprop(self, grads, cache=None, decay_rate=0.95): - """Uses RMSProp to compute step from gradients. - - Args: - grads: numpy array of gradients. - cache: numpy array of same shape as `grads` as RMSProp cache - decay_rate: How fast to decay cache - - Returns: - A tuple of - step: numpy array of the same shape as `grads` giving the step. - Note that this does not yet take the learning rate into account. - cache: Updated RMSProp cache. - """ - if cache is None: - cache = np.zeros_like(grads) - cache = decay_rate * cache + (1 - decay_rate) * grads ** 2 - step = -grads / np.sqrt(cache + K.epsilon()) - return step, cache - - def _get_seed_input(self, seed_input): - """Creates a random `seed_input` if None. Otherwise: - - Ensures batch_size dim on provided `seed_input`. - - Shuffle axis according to expected `image_data_format`. - """ - desired_shape = (1, ) + K.int_shape(self.input_tensor)[1:] - if seed_input is None: - return utils.random_array(desired_shape, mean=np.mean(self.input_range), - std=0.05 * (self.input_range[1] - self.input_range[0])) - - # Add batch dim if needed. - if len(seed_input.shape) != len(desired_shape): - seed_input = np.expand_dims(seed_input, 0) - - # Only possible if channel idx is out of place. - if seed_input.shape[-1] != desired_shape[-1] and \ - seed_input.shape[1] != desired_shape[1]: - seed_input = np.moveaxis(seed_input, -1, 1) - return seed_input.astype(K.floatx()) - - def minimize(self, seed_input=None, max_iter=200, - input_modifiers=None, grad_modifier=None, - callbacks=None, verbose=True): - """Performs gradient descent on the input image with respect to defined losses. - - Args: - seed_input: An N-dim numpy array of shape: `(samples, channels, image_dims...)` if `image_data_format= - channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. - Seeded with random noise if set to None. (Default value = None) - max_iter: The maximum number of gradient descent iterations. (Default value = 200) - input_modifiers: A list of [InputModifier](vis.input_modifiers#inputmodifier) instances specifying - how to make `pre` and `post` changes to the optimized input during the optimization process. - `pre` is applied in list order while `post` is applied in reverse order. For example, - `input_modifiers = [f, g]` means that `pre_input = g(f(inp))` and `post_input = f(g(inp))` - grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't - specify anything, gradients are unchanged. (Default value = None) - callbacks: A list of [OptimizerCallback](vis.callbacks#optimizercallback) instances to trigger. - verbose: Logs individual losses at the end of every gradient descent iteration. - Very useful to estimate loss weight factor(s). (Default value = True) - - Returns: - The tuple of `(optimized input, grads with respect to wrt, wrt_value)` after gradient descent iterations. - """ - seed_input = self._get_seed_input(seed_input) - input_modifiers = input_modifiers or [] - grad_modifier = _identity if grad_modifier is None else get( - grad_modifier) - - callbacks = callbacks or [] - if verbose: - callbacks.append(_PRINT_CALLBACK) - - cache = None - best_loss = float('inf') - best_input = None - - grads = None - wrt_value = None - - for i in range(max_iter): - # Apply modifiers `pre` step - for modifier in input_modifiers: - seed_input = modifier.pre(seed_input) - - # 0 learning phase for 'test' - computed_values = self.compute_fn([seed_input, 0]) - losses = computed_values[:len(self.loss_names)] - named_losses = list(zip(self.loss_names, losses)) - overall_loss, grads, wrt_value = computed_values[len( - self.loss_names):] - - # TODO: theano grads shape is inconsistent for some reason. Patch for now and investigate later. - if grads.shape != wrt_value.shape: - grads = np.reshape(grads, wrt_value.shape) - - # Apply grad modifier. - grads = grad_modifier(grads) - - # Trigger callbacks - for c in callbacks: - c.callback(i, named_losses, overall_loss, grads, wrt_value) - - # Gradient descent update. - # It only makes sense to do this if wrt_tensor is input_tensor. Otherwise shapes wont match for the update. - if self.wrt_tensor_is_input_tensor: - step, cache = self._rmsprop(grads, cache) - seed_input += step - - # Apply modifiers `post` step - for modifier in reversed(input_modifiers): - seed_input = modifier.post(seed_input) - - if overall_loss < best_loss: - best_loss = overall_loss.copy() - best_input = seed_input.copy() - - # Trigger on_end - for c in callbacks: - c.on_end() - - return utils.deprocess_input(best_input[0], self.input_range), grads, wrt_value -from __future__ import absolute_import -from __future__ import division - -import numpy as np -from keras import backend as K - -from .losses import Loss -from .utils import utils - - -def normalize(input_tensor, output_tensor): - """Normalizes the `output_tensor` with respect to `input_tensor` dimensions. - This makes regularizer weight factor more or less uniform across various input image dimensions. - - Args: - input_tensor: An tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= - channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. - output_tensor: The tensor to normalize. - - Returns: - The normalized tensor. - """ - image_dims = utils.get_img_shape(input_tensor)[1:] - return output_tensor / np.prod(image_dims) - - -class TotalVariation(Loss): - - def __init__(self, img_input, beta=2.): - """Total variation regularizer encourages blobbier and coherent image structures, akin to natural images. - See `section 3.2.2` in - [Visualizing deep convolutional neural networks using natural pre-images](https://arxiv.org/pdf/1512.02017v3.pdf) - for details. - - Args: - img_input: An image tensor of shape: `(samples, channels, image_dims...)` if `image_data_format=`channels_first` - or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. - beta: Smaller values of beta give sharper but 'spikier' images. - Values \(\in [1.5, 3.0]\) are recommended as a reasonable compromise. (Default value = 2.) - """ - super(TotalVariation, self).__init__() - self.name = "TV({}) Loss".format(beta) - self.img = img_input - self.beta = beta - - def build_loss(self): - r"""Implements the N-dim version of function - $$TV^{\beta}(x) = \sum_{whc} \left ( \left ( x(h, w+1, c) - x(h, w, c) \right )^{2} + - \left ( x(h+1, w, c) - x(h, w, c) \right )^{2} \right )^{\frac{\beta}{2}}$$ - to return total variation for all images in the batch. - """ - image_dims = K.ndim(self.img) - 2 - - # Constructing slice [1:] + [:-1] * (image_dims - 1) and [:-1] * (image_dims) - start_slice = [slice(1, None, None)] + [slice(None, -1, None) - for _ in range(image_dims - 1)] - end_slice = [slice(None, -1, None) for _ in range(image_dims)] - samples_channels_slice = [ - slice(None, None, None), slice(None, None, None)] - - # Compute pixel diffs by rolling slices to the right per image dim. - tv = None - for i in range(image_dims): - ss = tuple(samples_channels_slice + start_slice) - es = tuple(samples_channels_slice + end_slice) - diff_square = K.square( - self.img[utils.slicer[ss]] - self.img[utils.slicer[es]]) - tv = diff_square if tv is None else tv + diff_square - - # Roll over to next image dim - start_slice = np.roll(start_slice, 1).tolist() - end_slice = np.roll(end_slice, 1).tolist() - - tv = K.sum(K.pow(tv, self.beta / 2.)) - return normalize(self.img, tv) - - -class LPNorm(Loss): - - def __init__(self, img_input, p=6.): - """ - Builds a L-p norm function. This regularizer encourages the intensity of pixels to stay bounded. - i.e., prevents pixels from taking on very large values. - - Args: - img_input: 4D image input tensor to the model of shape: `(samples, channels, rows, cols)` - if data_format='channels_first' or `(samples, rows, cols, channels)` if data_format='channels_last'. - p: The pth norm to use. If p = float('inf'), infinity-norm will be used. - """ - super(LPNorm, self).__init__() - if p < 1: - raise ValueError('p value should range between [1, inf)') - self.name = "L-{} Norm Loss".format(p) - self.p = p - self.img = img_input - - def build_loss(self): - # Infinity norm - if np.isinf(self.p): - value = K.max(self.img) - else: - value = K.pow(K.sum(K.pow(K.abs(self.img), self.p)), 1. / self.p) - - return normalize(self.img, value) -from keras.layers.core import Dropout, Flatten -from keras.layers.convolutional import MaxPooling2D, Conv2D -from keras.models import Model -from keras.layers import Input, Dense - -FRAME_H = 70 -FRAME_W = 180 - - -def build_model(): - inp = Input(shape=(FRAME_H, FRAME_W, 3)) - x = Conv2D(filters=8, kernel_size=(5, 5), activation='relu')(inp) - x = MaxPooling2D((2, 2))(x) - - x = Conv2D(filters=16, kernel_size=(5, 5), activation='relu')(x) - x = MaxPooling2D((2, 2))(x) - - x = Conv2D(filters=32, kernel_size=(5, 5), activation='relu')(x) - x = MaxPooling2D((2, 2))(x) - - x = Flatten()(x) - x = Dropout(0.5)(x) - x = Dense(128, activation='relu')(x) - x = Dropout(0.5)(x) - x = Dense(1, activation='tanh')(x) - return Model(inputs=[inp], outputs=[x]) - - -if __name__ == '__main__': - model = build_model() - model.summary() -import pytest - -import keras.backend as K -from keras.layers import Dense -from keras.models import Sequential -from vis.optimizer import Optimizer -from vis.losses import Loss - - -class _DummyLoss(Loss): - def __init__(self, model): - self.name = 'dummy-loss' - self.output = model.output - - def build_loss(self): - return K.sum(self.output * self.output) - - -@pytest.fixture(scope="function", autouse=True) -def model_and_losses(): - model = Sequential([Dense(4, activation='linear', input_shape=(2, ))]) - losses = [(_DummyLoss(model), 1)] - return model, losses - - -def test_wrt_tensor_is_None(model_and_losses): - model, losses = model_and_losses - opt = Optimizer(model.input, losses, wrt_tensor=None) - opt.minimize() - - assert opt.wrt_tensor_is_input_tensor - assert opt.wrt_tensor is not None - assert opt.wrt_tensor != opt.input_tensor - - -def test_wrt_tensor_is_input_tensor(model_and_losses): - model, losses = model_and_losses - opt = Optimizer(model.input, losses, wrt_tensor=model.input) - opt.minimize() - - assert opt.wrt_tensor_is_input_tensor - assert opt.wrt_tensor is not None - assert opt.wrt_tensor != opt.input_tensor - - -def test_wrt_tensor_isnt_input_tensor(model_and_losses): - model, losses = model_and_losses - opt = Optimizer(model.input, losses, wrt_tensor=model.output) - opt.minimize() - - assert not opt.wrt_tensor_is_input_tensor - assert opt.wrt_tensor is not None - assert opt.wrt_tensor != opt.input_tensor - - -if __name__ == '__main__': - pytest.main([__file__]) -from keras import backend as K - - -# Import backend depending on config -if K.backend() == 'tensorflow': - from .tensorflow_backend import * -elif K.backend() == 'theano': - from .theano_backend import * -else: - raise ValueError("Backend '{}' not supported".format(K.backend())) -from __future__ import absolute_import - -import os -import tempfile -import inspect -import numpy as np -import tensorflow as tf - -from ..utils import utils -from tensorflow.python.framework import ops -import keras -from keras.models import load_model -from keras.layers import advanced_activations, Activation - - -# Register all classes with `advanced_activations` module -_ADVANCED_ACTIVATIONS = set() -for name, obj in inspect.getmembers(advanced_activations, inspect.isclass): - if not name.startswith("_") and hasattr(obj, "__module__") and obj.__module__ == advanced_activations.__name__: - _ADVANCED_ACTIVATIONS.add(obj) -_ADVANCED_ACTIVATIONS = tuple(_ADVANCED_ACTIVATIONS) - - -def _register_guided_gradient(name): - if name not in ops._gradient_registry._registry: - @tf.RegisterGradient(name) - def _guided_backprop(op, grad): - dtype = op.outputs[0].dtype - gate_g = tf.cast(grad > 0., dtype) - gate_y = tf.cast(op.outputs[0] > 0., dtype) - return gate_y * gate_g * grad - - -def _register_rectified_gradient(name): - if name not in ops._gradient_registry._registry: - @tf.RegisterGradient(name) - def _relu_backprop(op, grad): - dtype = op.outputs[0].dtype - gate_g = tf.cast(grad > 0., dtype) - return gate_g * grad - - -# Map of modifier type to registration function. -_BACKPROP_MODIFIERS = { - 'guided': _register_guided_gradient, - 'rectified': _register_rectified_gradient -} - - -# Maintain a mapping of original model, backprop_modifier -> modified model as cache. -_MODIFIED_MODEL_CACHE = dict() - - -def modify_model_backprop(model, backprop_modifier): - """Creates a copy of model by modifying all activations to use a custom op to modify the backprop behavior. - - Args: - model: The `keras.models.Model` instance. - backprop_modifier: One of `{'guided', 'rectified'}` - - Returns: - A copy of model with modified activations for backwards pass. - """ - # The general strategy is as follows: - # - Save original model so that upstream callers don't see unexpected results with their models. - # - Call backend specific function that registers the custom op and loads the model under modified context manager. - # - Maintain cache to save this expensive process on subsequent calls. - # - Load model with custom context modifying backprop behavior. - # - # The reason for this round about way is because the graph needs to be rebuild when any of its layer builder - # functions are changed. This is very complicated to do in Keras and makes the implementation very tightly bound - # with keras internals. By saving and loading models, we dont have to worry about future compatibility. - # - # The only exception to this is the way advanced activations are handled which makes use of some keras internal - # knowledge and might break in the future. - # ADD on 22 Jul 2018: - # In fact, it has broken. Currently, advanced activations are not supported. - - # 0. Retrieve from cache if previously computed. - modified_model = _MODIFIED_MODEL_CACHE.get((model, backprop_modifier)) - if modified_model is not None: - return modified_model - - model_path = os.path.join(tempfile.gettempdir(), next( - tempfile._get_candidate_names()) + '.h5') - try: - # 1. Save original model - model.save(model_path) - - # 2. Register modifier and load modified model under custom context. - modifier_fn = _BACKPROP_MODIFIERS.get(backprop_modifier) - if modifier_fn is None: - raise ValueError( - "'{}' modifier is not supported".format(backprop_modifier)) - modifier_fn(backprop_modifier) - - # 3. Create graph under custom context manager. - with tf.get_default_graph().gradient_override_map({'Relu': backprop_modifier}): - # This should rebuild graph with modifications. - modified_model = load_model(model_path) - - # Cache to improve subsequent call performance. - _MODIFIED_MODEL_CACHE[(model, backprop_modifier)] = modified_model - return modified_model - finally: - os.remove(model_path) - - -def set_random_seed(seed_value=1337): - """Sets random seed value for reproducibility. - - Args: - seed_value: The seed value to use. (Default Value = infamous 1337) - """ - np.random.seed(seed_value) - tf.set_random_seed(seed_value) -from __future__ import absolute_import -import numpy as np - - -def modify_model_backprop(model, backprop_modifier): - """Creates a copy of model by modifying all activations to use a custom op to modify the backprop behavior. - - Args: - model: The `keras.models.Model` instance. - backprop_modifier: One of `{'guided', 'rectified'}` - - Returns: - A copy of model with modified activations for backwards pass. - """ - raise NotImplementedError('Theano version is not supported yet.') - - -def set_random_seed(seed_value=1337): - """Sets random seed value for reproducibility. - - Args: - seed_value: The seed value to use. (Default Value = infamous 1337) - """ - np.random.seed(seed_value) -from __future__ import absolute_import - -import six -import tensorflow as tf -from keras import backend as K -from . import utils - - -def across_data_formats(func): - """Function wrapper to run tests on multiple keras data_format and clean up after TensorFlow tests. - - Args: - func: test function to clean up after. - - Returns: - A function wrapping the input function. - """ - @six.wraps(func) - def wrapper(*args, **kwargs): - for data_format in {'channels_first', 'channels_last'}: - K.set_image_data_format(data_format) - func(*args, **kwargs) - if K.backend() == 'tensorflow': - K.clear_session() - tf.reset_default_graph() - return wrapper - - -def skip_backends(backends): - """Function wrapper to specify which backends should skip the test. - - Args: - backends: The list of backends to skip. - - Returns: - A function wrapping the input function. - """ - backends = set(utils.listify(backends)) - - def decorator(func): - @six.wraps(func) - def wrapper(*args, **kwargs): - if K.backend() in backends: - return - func(*args, **kwargs) - return wrapper - return decorator -from __future__ import absolute_import -from __future__ import division - -import os -import tempfile -import math -import json -import six - -import numpy as np -import matplotlib.font_manager as fontman - -from skimage import io, transform -from keras import backend as K -from keras.models import load_model - -import logging -logger = logging.getLogger(__name__) - -try: - import PIL as pil - from PIL import ImageFont - from PIL import Image - from PIL import ImageDraw -except ImportError: - pil = None - - -# Globals -_CLASS_INDEX = None - - -def _check_pil(): - if not pil: - raise ImportError('Failed to import PIL. You must install Pillow') - - -def _find_font_file(query): - """Utility to find font file. - """ - return list(filter(lambda path: query.lower() in os.path.basename(path).lower(), fontman.findSystemFonts())) - - -def reverse_enumerate(iterable): - """Enumerate over an iterable in reverse order while retaining proper indexes, without creating any copies. - """ - return zip(reversed(range(len(iterable))), reversed(iterable)) - - -def listify(value): - """Ensures that the value is a list. If it is not a list, it creates a new list with `value` as an item. - """ - if not isinstance(value, list): - value = [value] - return value - - -def add_defaults_to_kwargs(defaults, **kwargs): - """Updates `kwargs` with dict of `defaults` - - Args: - defaults: A dictionary of keys and values - **kwargs: The kwargs to update. - - Returns: - The updated kwargs. - """ - defaults = dict(defaults) - defaults.update(kwargs) - return defaults - - -def get_identifier(identifier, module_globals, module_name): - """Helper utility to retrieve the callable function associated with a string identifier. - - Args: - identifier: The identifier. Could be a string or function. - module_globals: The global objects of the module. - module_name: The module name - - Returns: - The callable associated with the identifier. - """ - if isinstance(identifier, six.string_types): - fn = module_globals.get(identifier) - if fn is None: - raise ValueError('Unknown {}: {}'.format(module_name, identifier)) - return fn - elif callable(identifier): - return identifier - else: - raise ValueError('Could not interpret identifier') - - -def apply_modifications(model, custom_objects=None): - """Applies modifications to the model layers to create a new Graph. For example, simply changing - `model.layers[idx].activation = new activation` does not change the graph. The entire graph needs to be updated - with modified inbound and outbound tensors because of change in layer building function. - - Args: - model: The `keras.models.Model` instance. - - Returns: - The modified model with changes applied. Does not mutate the original `model`. - """ - # The strategy is to save the modified model and load it back. This is done because setting the activation - # in a Keras layer doesnt actually change the graph. We have to iterate the entire graph and change the - # layer inbound and outbound nodes with modified tensors. This is doubly complicated in Keras 2.x since - # multiple inbound and outbound nodes are allowed with the Graph API. - model_path = os.path.join(tempfile.gettempdir(), next( - tempfile._get_candidate_names()) + '.h5') - try: - model.save(model_path) - return load_model(model_path, custom_objects=custom_objects) - finally: - os.remove(model_path) - - -def random_array(shape, mean=128., std=20.): - """Creates a uniformly distributed random array with the given `mean` and `std`. - - Args: - shape: The desired shape - mean: The desired mean (Default value = 128) - std: The desired std (Default value = 20) - - Returns: Random numpy array of given `shape` uniformly distributed with desired `mean` and `std`. - """ - x = np.random.random(shape) - # normalize around mean=0, std=1 - x = (x - np.mean(x)) / (np.std(x) + K.epsilon()) - # and then around the desired mean/std - x = (x * std) + mean - return x - - -def find_layer_idx(model, layer_name): - """Looks up the layer index corresponding to `layer_name` from `model`. - - Args: - model: The `keras.models.Model` instance. - layer_name: The name of the layer to lookup. - - Returns: - The layer index if found. Raises an exception otherwise. - """ - layer_idx = None - for idx, layer in enumerate(model.layers): - if layer.name == layer_name: - layer_idx = idx - break - - if layer_idx is None: - raise ValueError( - "No layer with name '{}' within the model".format(layer_name)) - return layer_idx - - -def deprocess_input(input_array, input_range=(0, 255)): - """Utility function to scale the `input_array` to `input_range` throwing away high frequency artifacts. - - Args: - input_array: An N-dim numpy array. - input_range: Specifies the input range as a `(min, max)` tuple to rescale the `input_array`. - - Returns: - The rescaled `input_array`. - """ - # normalize tensor: center on 0., ensure std is 0.1 - input_array = input_array.copy() - input_array -= input_array.mean() - input_array /= (input_array.std() + K.epsilon()) - input_array *= 0.1 - - # clip to [0, 1] - input_array += 0.5 - input_array = np.clip(input_array, 0, 1) - - # Convert to `input_range` - return (input_range[1] - input_range[0]) * input_array + input_range[0] - - -def stitch_images(images, margin=5, cols=5): - """Utility function to stitch images together with a `margin`. - - Args: - images: The array of 2D images to stitch. - margin: The black border margin size between images (Default value = 5) - cols: Max number of image cols. New row is created when number of images exceed the column size. - (Default value = 5) - - Returns: - A single numpy image array comprising of input images. - """ - if len(images) == 0: - return None - - h, w, c = images[0].shape - n_rows = int(math.ceil(len(images) / cols)) - n_cols = min(len(images), cols) - - out_w = n_cols * w + (n_cols - 1) * margin - out_h = n_rows * h + (n_rows - 1) * margin - stitched_images = np.zeros((out_h, out_w, c), dtype=images[0].dtype) - - for row in range(n_rows): - for col in range(n_cols): - img_idx = row * cols + col - if img_idx >= len(images): - break - - stitched_images[(h + margin) * row: (h + margin) * row + h, - (w + margin) * col: (w + margin) * col + w, :] = images[img_idx] - - return stitched_images - - -def get_img_shape(img): - """Returns image shape in a backend agnostic manner. - - Args: - img: An image tensor of shape: `(channels, image_dims...)` if data_format='channels_first' or - `(image_dims..., channels)` if data_format='channels_last'. - - Returns: - Tuple containing image shape information in `(samples, channels, image_dims...)` order. - """ - if isinstance(img, np.ndarray): - shape = img.shape - else: - shape = K.int_shape(img) - - if K.image_data_format() == 'channels_last': - shape = list(shape) - shape.insert(1, shape[-1]) - shape = tuple(shape[:-1]) - return shape - - -def load_img(path, grayscale=False, target_size=None): - """Utility function to load an image from disk. - - Args: - path: The image file path. - grayscale: True to convert to grayscale image (Default value = False) - target_size: (w, h) to resize. (Default value = None) - - Returns: - The loaded numpy image. - """ - img = io.imread(path, grayscale) - if target_size: - img = transform.resize( - img, target_size, preserve_range=True).astype('uint8') - return img - - -def lookup_imagenet_labels(indices): - """Utility function to return the image net label for the final `dense` layer output index. - - Args: - indices: Could be a single value or an array of indices whose labels should be looked up. - - Returns: - Image net label corresponding to the image category. - """ - global _CLASS_INDEX - if _CLASS_INDEX is None: - with open(os.path.join(os.path.dirname(__file__), '../../resources/imagenet_class_index.json')) as f: - _CLASS_INDEX = json.load(f) - - indices = listify(indices) - return [_CLASS_INDEX[str(idx)][1] for idx in indices] - - -def draw_text(img, text, position=(10, 10), font='FreeSans.ttf', font_size=14, color=(0, 0, 0)): - """Draws text over the image. Requires PIL. - - Args: - img: The image to use. - text: The text string to overlay. - position: The text (x, y) position. (Default value = (10, 10)) - font: The ttf or open type font to use. (Default value = 'FreeSans.ttf') - font_size: The text font size. (Default value = 12) - color: The (r, g, b) values for text color. (Default value = (0, 0, 0)) - - Returns: Image overlayed with text. - """ - _check_pil() - - font_files = _find_font_file(font) - if len(font_files) == 0: - logger.warn( - "Failed to lookup font '{}', falling back to default".format(font)) - font = ImageFont.load_default() - else: - font = ImageFont.truetype(font_files[0], font_size) - - # Don't mutate original image - img = Image.fromarray(img) - draw = ImageDraw.Draw(img) - draw.text(position, text, fill=color, font=font) - return np.asarray(img) - - -def bgr2rgb(img): - """Converts an RGB image to BGR and vice versa - - Args: - img: Numpy array in RGB or BGR format - - Returns: The converted image format - """ - return img[..., ::-1] - - -def normalize(array, min_value=0., max_value=1.): - """Normalizes the numpy array to (min_value, max_value) - - Args: - array: The numpy array - min_value: The min value in normalized array (Default value = 0) - max_value: The max value in normalized array (Default value = 1) - - Returns: - The array normalized to range between (min_value, max_value) - """ - arr_min = np.min(array) - arr_max = np.max(array) - normalized = (array - arr_min) / (arr_max - arr_min + K.epsilon()) - return (max_value - min_value) * normalized + min_value - - -class _BackendAgnosticImageSlice(object): - """Utility class to make image slicing uniform across various `image_data_format`. - """ - - def __getitem__(self, item_slice): - """Assuming a slice for shape `(samples, channels, image_dims...)` - """ - if K.image_data_format() == 'channels_first': - return item_slice - else: - # Move channel index to last position. - item_slice = list(item_slice) - item_slice.append(item_slice.pop(1)) - return tuple(item_slice) - - -"""Slice utility to make image slicing uniform across various `image_data_format`. -Example: - conv_layer[utils.slicer[:, filter_idx, :, :]] will work for both `channels_first` and `channels_last` image - data formats even though, in tensorflow, slice should be conv_layer[utils.slicer[:, :, :, filter_idx]] -""" -slicer = _BackendAgnosticImageSlice() -from __future__ import absolute_import - - -from .activation_maximization import visualize_activation_with_losses -from .activation_maximization import visualize_activation - -from .saliency import visualize_saliency_with_losses -from .saliency import visualize_saliency -from .saliency import visualize_cam_with_losses -from .saliency import visualize_cam - -from keras import backend as K - - -def get_num_filters(layer): - """Determines the number of filters within the given `layer`. - - Args: - layer: The keras layer to use. - - Returns: - Total number of filters within `layer`. - For `keras.layers.Dense` layer, this is the total number of outputs. - """ - # Handle layers with no channels. - if K.ndim(layer.output) == 2: - return K.int_shape(layer.output)[-1] - - channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 - return K.int_shape(layer.output)[channel_idx] - - -def overlay(array1, array2, alpha=0.5): - """Overlays `array1` onto `array2` with `alpha` blending. - - Args: - array1: The first numpy array. - array2: The second numpy array. - alpha: The alpha value of `array1` as overlayed onto `array2`. This value needs to be between [0, 1], - with 0 being `array2` only to 1 being `array1` only (Default value = 0.5). - - Returns: - The `array1`, overlayed with `array2` using `alpha` blending. - """ - if alpha < 0. or alpha > 1.: - raise ValueError("`alpha` needs to be between [0, 1]") - if array1.shape != array2.shape: - raise ValueError('`array1` and `array2` must have the same shapes') - - return (array1 * alpha + array2 * (1. - alpha)).astype(array1.dtype) -from __future__ import absolute_import - -import numpy as np -from keras import backend as K - -from ..losses import ActivationMaximization -from ..optimizer import Optimizer -from ..regularizers import TotalVariation, LPNorm -from ..backprop_modifiers import get -from ..utils import utils - - -def visualize_activation_with_losses(input_tensor, losses, wrt_tensor=None, - seed_input=None, input_range=(0, 255), - **optimizer_params): - """Generates the `input_tensor` that minimizes the weighted `losses`. This function is intended for advanced - use cases where a custom loss is desired. - - Args: - input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= - channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. - wrt_tensor: Short for, with respect to. The gradients of losses are computed with respect to this tensor. - When None, this is assumed to be the same as `input_tensor` (Default value: None) - losses: List of ([Loss](vis.losses#Loss), weight) tuples. - seed_input: Seeds the optimization with a starting image. Initialized with a random value when set to None. - (Default value = None) - input_range: Specifies the input range as a `(min, max)` tuple. This is used to rescale the - final optimized input to the given range. (Default value=(0, 255)) - optimizer_params: The **kwargs for optimizer [params](vis.optimizer#optimizerminimize). Will default to - reasonable values when required keys are not found. - - Returns: - The model input that minimizes the weighted `losses`. - """ - # Default optimizer kwargs. - optimizer_params = utils.add_defaults_to_kwargs({ - 'seed_input': seed_input, - 'max_iter': 200, - 'verbose': False - }, **optimizer_params) - - opt = Optimizer(input_tensor, losses, input_range, wrt_tensor=wrt_tensor) - img = opt.minimize(**optimizer_params)[0] - - # If range has integer numbers, cast to 'uint8' - if isinstance(input_range[0], int) and isinstance(input_range[1], int): - img = np.clip(img, input_range[0], input_range[1]).astype('uint8') - - if K.image_data_format() == 'channels_first': - img = np.moveaxis(img, 0, -1) - return img - - -def visualize_activation(model, layer_idx, filter_indices=None, wrt_tensor=None, - seed_input=None, input_range=(0, 255), - backprop_modifier=None, grad_modifier=None, - act_max_weight=1, lp_norm_weight=10, tv_weight=10, - **optimizer_params): - """Generates the model input that maximizes the output of all `filter_indices` in the given `layer_idx`. - - Args: - model: The `keras.models.Model` instance. The model input shape must be: `(samples, channels, image_dims...)` - if `image_data_format=channels_first` or `(samples, image_dims..., channels)` if - `image_data_format=channels_last`. - layer_idx: The layer index within `model.layers` whose filters needs to be visualized. - filter_indices: filter indices within the layer to be maximized. - If None, all filters are visualized. (Default value = None) - For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index. - If you are visualizing final `keras.layers.Dense` layer, consider switching 'softmax' activation for - 'linear' using [utils.apply_modifications](vis.utils.utils#apply_modifications) for better results. - wrt_tensor: Short for, with respect to. The gradients of losses are computed with respect to this tensor. - When None, this is assumed to be the same as `input_tensor` (Default value: None) - seed_input: Seeds the optimization with a starting input. Initialized with a random value when set to None. - (Default value = None) - input_range: Specifies the input range as a `(min, max)` tuple. This is used to rescale the - final optimized input to the given range. (Default value=(0, 255)) - backprop_modifier: backprop modifier to use. See [backprop_modifiers](vis.backprop_modifiers.md). If you don't - specify anything, no backprop modification is applied. (Default value = None) - grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't - specify anything, gradients are unchanged (Default value = None) - act_max_weight: The weight param for `ActivationMaximization` loss. Not used if 0 or None. (Default value = 1) - lp_norm_weight: The weight param for `LPNorm` regularization loss. Not used if 0 or None. (Default value = 10) - tv_weight: The weight param for `TotalVariation` regularization loss. Not used if 0 or None. (Default value = 10) - optimizer_params: The **kwargs for optimizer [params](vis.optimizer#optimizerminimize). Will default to - reasonable values when required keys are not found. - - Example: - If you wanted to visualize the input image that would maximize the output index 22, say on - final `keras.layers.Dense` layer, then, `filter_indices = [22]`, `layer_idx = dense_layer_idx`. - - If `filter_indices = [22, 23]`, then it should generate an input image that shows features of both classes. - - Returns: - The model input that maximizes the output of `filter_indices` in the given `layer_idx`. - """ - if backprop_modifier is not None: - modifier_fn = get(backprop_modifier) - model = modifier_fn(model) - - losses = [ - (ActivationMaximization( - model.layers[layer_idx], filter_indices), act_max_weight), - (LPNorm(model.input), lp_norm_weight), - (TotalVariation(model.input), tv_weight) - ] - - # Add grad_filter to optimizer_params. - optimizer_params = utils.add_defaults_to_kwargs({ - 'grad_modifier': grad_modifier - }, **optimizer_params) - - return visualize_activation_with_losses(model.input, losses, wrt_tensor, - seed_input, input_range, **optimizer_params) -from __future__ import absolute_import - -import numpy as np -from scipy.ndimage.interpolation import zoom - -from keras.layers.convolutional import _Conv -from keras.layers.pooling import _Pooling1D, _Pooling2D, _Pooling3D -from keras.layers.wrappers import Wrapper -from keras import backend as K - -from ..losses import ActivationMaximization -from ..optimizer import Optimizer -from ..backprop_modifiers import get -from ..utils import utils - - -def _find_penultimate_layer(model, layer_idx, penultimate_layer_idx): - """Searches for the nearest penultimate `Conv` or `Pooling` layer. - - Args: - model: The `keras.models.Model` instance. - layer_idx: The layer index within `model.layers`. - penultimate_layer_idx: The pre-layer to `layer_idx`. If set to None, the nearest penultimate - `Conv` or `Pooling` layer is used. - - Returns: - The penultimate layer. - """ - if penultimate_layer_idx is None: - for idx, layer in utils.reverse_enumerate(model.layers[:layer_idx - 1]): - if isinstance(layer, Wrapper): - layer = layer.layer - if isinstance(layer, (_Conv, _Pooling1D, _Pooling2D, _Pooling3D)): - penultimate_layer_idx = idx - break - - if penultimate_layer_idx is None: - raise ValueError('Unable to determine penultimate `Conv` or `Pooling` ' - 'layer for layer_idx: {}'.format(layer_idx)) - - # Handle negative indexing otherwise the next check can fail. - if layer_idx < 0: - layer_idx = len(model.layers) + layer_idx - if penultimate_layer_idx > layer_idx: - raise ValueError( - '`penultimate_layer_idx` needs to be before `layer_idx`') - - return model.layers[penultimate_layer_idx] - - -def visualize_saliency_with_losses(input_tensor, losses, seed_input, wrt_tensor=None, grad_modifier='absolute', keepdims=False): - """Generates an attention heatmap over the `seed_input` by using positive gradients of `input_tensor` - with respect to weighted `losses`. - - This function is intended for advanced use cases where a custom loss is desired. For common use cases, - refer to `visualize_class_saliency` or `visualize_regression_saliency`. - - For a full description of saliency, see the paper: - [Deep Inside Convolutional Networks: Visualising Image Classification Models and Saliency Maps] - (https://arxiv.org/pdf/1312.6034v2.pdf) - - Args: - input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= - channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. - losses: List of ([Loss](vis.losses#Loss), weight) tuples. - seed_input: The model input for which activation map needs to be visualized. - wrt_tensor: Short for, with respect to. The gradients of losses are computed with respect to this tensor. - When None, this is assumed to be the same as `input_tensor` (Default value: None) - grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). By default `absolute` - value of gradients are used. To visualize positive or negative gradients, use `relu` and `negate` - respectively. (Default value = 'absolute') - keepdims: A boolean, whether to keep the dimensions or not. - If keepdims is False, the channels axis is deleted. - If keepdims is True, the grad with same shape as input_tensor is returned. (Default value: False) - - Returns: - The normalized gradients of `seed_input` with respect to weighted `losses`. - """ - opt = Optimizer(input_tensor, losses, - wrt_tensor=wrt_tensor, norm_grads=False) - grads = opt.minimize(seed_input=seed_input, max_iter=1, - grad_modifier=grad_modifier, verbose=False)[1] - - if not keepdims: - channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 - grads = np.max(grads, axis=channel_idx) - return utils.normalize(grads)[0] - - -def visualize_saliency(model, layer_idx, filter_indices, seed_input, wrt_tensor=None, - backprop_modifier=None, grad_modifier='absolute', keepdims=False): - """Generates an attention heatmap over the `seed_input` for maximizing `filter_indices` - output in the given `layer_idx`. - - Args: - model: The `keras.models.Model` instance. The model input shape must be: `(samples, channels, image_dims...)` - if `image_data_format=channels_first` or `(samples, image_dims..., channels)` if - `image_data_format=channels_last`. - layer_idx: The layer index within `model.layers` whose filters needs to be visualized. - filter_indices: filter indices within the layer to be maximized. - If None, all filters are visualized. (Default value = None) - For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index. - If you are visualizing final `keras.layers.Dense` layer, consider switching 'softmax' activation for - 'linear' using [utils.apply_modifications](vis.utils.utils#apply_modifications) for better results. - seed_input: The model input for which activation map needs to be visualized. - wrt_tensor: Short for, with respect to. The gradients of losses are computed with respect to this tensor. - When None, this is assumed to be the same as `input_tensor` (Default value: None) - backprop_modifier: backprop modifier to use. See [backprop_modifiers](vis.backprop_modifiers.md). If you don't - specify anything, no backprop modification is applied. (Default value = None) - grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). By default `absolute` - value of gradients are used. To visualize positive or negative gradients, use `relu` and `negate` - respectively. (Default value = 'absolute') - keepdims: A boolean, whether to keep the dimensions or not. - If keepdims is False, the channels axis is deleted. - If keepdims is True, the grad with same shape as input_tensor is returned. (Default value: False) - - Example: - If you wanted to visualize attention over 'bird' category, say output index 22 on the - final `keras.layers.Dense` layer, then, `filter_indices = [22]`, `layer = dense_layer`. - - One could also set filter indices to more than one value. For example, `filter_indices = [22, 23]` should - (hopefully) show attention map that corresponds to both 22, 23 output categories. - - Returns: - The heatmap image indicating the `seed_input` regions whose change would most contribute towards - maximizing the output of `filter_indices`. - """ - if backprop_modifier is not None: - modifier_fn = get(backprop_modifier) - model = modifier_fn(model) - - # `ActivationMaximization` loss reduces as outputs get large, hence negative gradients indicate the direction - # for increasing activations. Multiply with -1 so that positive gradients indicate increase instead. - losses = [ - (ActivationMaximization(model.layers[layer_idx], filter_indices), -1) - ] - return visualize_saliency_with_losses(model.input, losses, seed_input, wrt_tensor, grad_modifier, keepdims) - - -def visualize_cam_with_losses(input_tensor, losses, seed_input, penultimate_layer, grad_modifier=None): - """Generates a gradient based class activation map (CAM) by using positive gradients of `input_tensor` - with respect to weighted `losses`. - - For details on grad-CAM, see the paper: - [Grad-CAM: Why did you say that? Visual Explanations from Deep Networks via Gradient-based Localization] - (https://arxiv.org/pdf/1610.02391v1.pdf). - - Unlike [class activation mapping](https://arxiv.org/pdf/1512.04150v1.pdf), which requires minor changes to - network architecture in some instances, grad-CAM has a more general applicability. - - Compared to saliency maps, grad-CAM is class discriminative; i.e., the 'cat' explanation exclusively highlights - cat regions and not the 'dog' region and vice-versa. - - Args: - input_tensor: An input tensor of shape: `(samples, channels, image_dims...)` if `image_data_format= - channels_first` or `(samples, image_dims..., channels)` if `image_data_format=channels_last`. - losses: List of ([Loss](vis.losses#Loss), weight) tuples. - seed_input: The model input for which activation map needs to be visualized. - penultimate_layer: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients - with respect to filter output. - grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't - specify anything, gradients are unchanged (Default value = None) - - Returns: - The normalized gradients of `seed_input` with respect to weighted `losses`. - """ - penultimate_output = penultimate_layer.output - opt = Optimizer(input_tensor, losses, - wrt_tensor=penultimate_output, norm_grads=False) - _, grads, penultimate_output_value = opt.minimize( - seed_input, max_iter=1, grad_modifier=grad_modifier, verbose=False) - - # For numerical stability. Very small grad values along with small penultimate_output_value can cause - # w * penultimate_output_value to zero out, even for reasonable fp precision of float32. - grads = grads / (np.max(grads) + K.epsilon()) - - # Average pooling across all feature maps. - # This captures the importance of feature map (channel) idx to the output. - channel_idx = 1 if K.image_data_format() == 'channels_first' else -1 - other_axis = np.delete(np.arange(len(grads.shape)), channel_idx) - weights = np.mean(grads, axis=tuple(other_axis)) - - # Generate heatmap by computing weight * output over feature maps - output_dims = utils.get_img_shape(penultimate_output)[2:] - heatmap = np.zeros(shape=output_dims, dtype=K.floatx()) - for i, w in enumerate(weights): - if channel_idx == -1: - heatmap += w * penultimate_output_value[0, ..., i] - else: - heatmap += w * penultimate_output_value[0, i, ...] - - # ReLU thresholding to exclude pattern mismatch information (negative gradients). - heatmap = np.maximum(heatmap, 0) - - # The penultimate feature map size is definitely smaller than input image. - input_dims = utils.get_img_shape(input_tensor)[2:] - - # Figure out the zoom factor. - zoom_factor = [i / (j * 1.0) - for i, j in iter(zip(input_dims, output_dims))] - heatmap = zoom(heatmap, zoom_factor) - return utils.normalize(heatmap) - - -def visualize_cam(model, layer_idx, filter_indices, - seed_input, penultimate_layer_idx=None, - backprop_modifier=None, grad_modifier=None): - """Generates a gradient based class activation map (grad-CAM) that maximizes the outputs of - `filter_indices` in `layer_idx`. - - Args: - model: The `keras.models.Model` instance. The model input shape must be: `(samples, channels, image_dims...)` - if `image_data_format=channels_first` or `(samples, image_dims..., channels)` if - `image_data_format=channels_last`. - layer_idx: The layer index within `model.layers` whose filters needs to be visualized. - filter_indices: filter indices within the layer to be maximized. - If None, all filters are visualized. (Default value = None) - For `keras.layers.Dense` layer, `filter_idx` is interpreted as the output index. - If you are visualizing final `keras.layers.Dense` layer, consider switching 'softmax' activation for - 'linear' using [utils.apply_modifications](vis.utils.utils#apply_modifications) for better results. - seed_input: The input image for which activation map needs to be visualized. - penultimate_layer_idx: The pre-layer to `layer_idx` whose feature maps should be used to compute gradients - wrt filter output. If not provided, it is set to the nearest penultimate `Conv` or `Pooling` layer. - backprop_modifier: backprop modifier to use. See [backprop_modifiers](vis.backprop_modifiers.md). If you don't - specify anything, no backprop modification is applied. (Default value = None) - grad_modifier: gradient modifier to use. See [grad_modifiers](vis.grad_modifiers.md). If you don't - specify anything, gradients are unchanged (Default value = None) - - Example: - If you wanted to visualize attention over 'bird' category, say output index 22 on the - final `keras.layers.Dense` layer, then, `filter_indices = [22]`, `layer = dense_layer`. - - One could also set filter indices to more than one value. For example, `filter_indices = [22, 23]` should - (hopefully) show attention map that corresponds to both 22, 23 output categories. - - Returns: - The heatmap image indicating the input regions whose change would most contribute towards - maximizing the output of `filter_indices`. - """ - if backprop_modifier is not None: - modifier_fn = get(backprop_modifier) - model = modifier_fn(model) - - penultimate_layer = _find_penultimate_layer( - model, layer_idx, penultimate_layer_idx) - - # `ActivationMaximization` outputs negative gradient values for increase in activations. Multiply with -1 - # so that positive gradients indicate increase instead. - losses = [ - (ActivationMaximization(model.layers[layer_idx], filter_indices), -1) - ] - return visualize_cam_with_losses(model.input, losses, seed_input, penultimate_layer, grad_modifier) -import pytest -import numpy as np - -from vis.backend import modify_model_backprop -from vis.utils.test_utils import skip_backends - -import keras -from keras.models import Model, Input, Sequential -from keras.layers import Dense -from keras.initializers import Constant -from keras import backend as K -from keras.activations import get -from keras.layers import advanced_activations, Activation - - -def _compute_grads(model, input_array): - grads_fn = K.gradients(model.output, model.input)[0] - compute_fn = K.function([model.input, K.learning_phase()], [grads_fn]) - return compute_fn([np.array([input_array]), 0])[0][0] - - -@skip_backends('theano') -def test_guided_grad_modifier(): - # Create a simple 2 dense layer model. - simple_model = Sequential([ - Dense(2, activation='relu', use_bias=False, kernel_initializer=Constant( - [[-1., 1.], [-1., 1.]]), input_shape=(2,)), - Dense(1, activation='linear', use_bias=False, - kernel_initializer=Constant([-1., 1.])) - ]) - simple_model.compile( - loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam()) - - # Create a simple 2 dense layer model using Activation. - simple_model_with_activation = Sequential([ - Dense(2, activation='linear', use_bias=False, kernel_initializer=Constant( - [[-1., 1.], [-1., 1.]]), input_shape=(2,)), - Activation('relu'), - Dense(1, activation='linear', use_bias=False, - kernel_initializer=Constant([-1., 1.])) - ]) - simple_model_with_activation.compile( - loss=keras.losses.categorical_crossentropy, optimizer=keras.optimizers.Adam()) - - for i, model in enumerate([simple_model, simple_model_with_activation]): - # Create guided backprop model - modified_model = modify_model_backprop(model, 'guided') - - # Gradients are zeros. - input_array = [0., 0.] - assert np.array_equal(_compute_grads(model, input_array), [0., 0.]) - assert np.array_equal(_compute_grads( - modified_model, input_array), [0., 0.]) - - # Below 3 cases, GuidedBackprop gradients is the same as Original gradients. - input_array = [1., 0.] - assert np.array_equal(_compute_grads(model, input_array), [1., 1.]) - assert np.array_equal(_compute_grads( - modified_model, input_array), [1., 1.]) - - input_array = [0., 1.] - assert np.array_equal(_compute_grads(model, input_array), [1., 1.]) - assert np.array_equal(_compute_grads( - modified_model, input_array), [1., 1.]) - - input_array = [1., 1.] - assert np.array_equal(_compute_grads(model, input_array), [1., 1.]) - assert np.array_equal(_compute_grads( - modified_model, input_array), [1., 1.]) - - # If inputs contains negative values, - # GuidedBackprop gradients is not the same as Original gradients. - input_array = [-1., 0.] - assert np.array_equal(_compute_grads(model, input_array), [1., 1.]) - assert np.array_equal(_compute_grads( - modified_model, input_array), [0., 0.]) - - input_array = [0., -1.] - assert np.array_equal(_compute_grads(model, input_array), [1., 1.]) - assert np.array_equal(_compute_grads( - modified_model, input_array), [0., 0.]) - - input_array = [-1., -1.] - assert np.array_equal(_compute_grads(model, input_array), [1., 1.]) - assert np.array_equal(_compute_grads( - modified_model, input_array), [0., 0.]) - - # Activation is not changed. - if i == 0: # modified first model - modified_model.layers[0].activation == keras.activations.relu - modified_model.layers[1].activation == keras.activations.linear - if i == 1: # modified second model - modified_model.layers[0].activation == keras.activations.linear - modified_model.layers[1].activation == keras.activations.relu - modified_model.layers[2].activation == keras.activations.linear - - -# Currently, the modify_model_backprop function doesn't support advanced activation. -# Therefore, this test case will temporarily comment out. -# -# @skip_backends('theano') -# def test_advanced_activations(): -# """ Tests that various ways of specifying activations in keras models are handled when replaced with Relu -# """ -# inp = Input(shape=(2, )) -# x = Dense(5, activation='elu')(inp) -# x = advanced_activations.LeakyReLU()(x) -# x = Activation('elu')(x) -# model = Model(inp, x) -# -# # Ensure that layer.activation, Activation and advanced activations are replaced with relu -# modified_model = modify_model_backprop(model, 'guided') -# assert modified_model.layers[1].activation == get('relu') -# assert modified_model.layers[2].activation == get('relu') -# assert modified_model.layers[3].activation == get('relu') -# -# # Ensure that original model is unchanged. -# assert model.layers[1].activation == get('elu') -# assert isinstance(model.layers[2], advanced_activations.LeakyReLU) -# assert model.layers[3].activation == get('elu') - - -# @skip_backends('theano') -# def test_rectified_grad_modifier(): -# # Only test tensorflow implementation for now. -# if K.backend() == 'theano': -# return -# -# # Create a simple linear sequence x -> linear(w.x) with weights w1 = -1, w2 = 1. -# inp = Input(shape=(2, )) -# out = Dense(1, activation='linear', use_bias=False, kernel_initializer=Constant([-1., 1.]))(inp) -# model = Model(inp, out) -# -# # Original model gradient should be [w1, w2] -# assert np.array_equal(_compute_grads(model, [1., -1.]), [-1., 1.]) -# -# # Original gradient is [-1, 1] but new gradient should be [0, 1] -# # First one is clipped because of negative gradient. -# modified_model = modify_model_backprop(model, 'rectified') -# -# # TODO: Interestingly this does not work for some reason. -# # It is failing at tf.cast(grad > 0., dtype) -# assert np.array_equal(_compute_grads(modified_model, [1., -1.]), [0., 1.]) -# -# # Ensure that the original model reference remains unchanged. -# assert model.layers[1].activation == get('linear') -# assert modified_model.layers[1].activation == get('relu') - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -from vis.utils import utils -from keras import backend as K - - -def test_get_img_shape_on_2d_image(): - n = 5 - channels = 4 - dim1 = 1 - dim2 = 2 - - K.set_image_data_format('channels_first') - assert (n, channels, dim1, dim2) == utils.get_img_shape( - K.ones(shape=(n, channels, dim1, dim2))) - - K.set_image_data_format('channels_last') - assert (n, channels, dim1, dim2) == utils.get_img_shape( - K.ones(shape=(n, dim1, dim2, channels))) - - -def test_get_img_shape_on_3d_image(): - n = 5 - channels = 4 - dim1 = 1 - dim2 = 2 - dim3 = 3 - - K.set_image_data_format('channels_first') - assert (n, channels, dim1, dim2, dim3) == utils.get_img_shape( - K.ones(shape=(n, channels, dim1, dim2, dim3))) - - K.set_image_data_format('channels_last') - assert (n, channels, dim1, dim2, dim3) == utils.get_img_shape( - K.ones(shape=(n, dim1, dim2, dim3, channels))) - - -def test_reverse_iterable(): - assert list(utils.reverse_enumerate('abcde')) == [ - (4, 'e'), (3, 'd'), (2, 'c'), (1, 'b'), (0, 'a')] - - -if __name__ == '__main__': - pytest.main([__file__]) -from setuptools import setup, find_packages - -long_description = open('README.rst').read() -version = '0.0.3' - -setup(name='keras-adversarial', - version=version, - description='Adversarial models and optimizers for Keras', - url='https://github.com/bstriner/keras-adversarial', - download_url='https://github.com/bstriner/keras-adversarial/tarball/v{}'.format( - version), - author='Ben Striner', - author_email='bstriner@gmail.com', - packages=find_packages(), - install_requires=['Keras'], - keywords=['keras', 'gan', 'adversarial', 'multiplayer'], - license='MIT', - long_description=long_description, - classifiers=[ - # Indicate who your project is intended for - 'Intended Audience :: Developers', - # Pick your license as you wish (should match "license" above) - 'License :: OSI Approved :: MIT License', - - # Specify the Python versions you support here. In particular, ensure - # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 3' - ]) -import numpy as np -from keras.datasets import cifar10 - - -def cifar10_process(x): - x = x.astype(np.float32) / 255.0 - return x - - -def cifar10_data(): - (xtrain, ytrain), (xtest, ytest) = cifar10.load_data() - return cifar10_process(xtrain), cifar10_process(xtest) -# import os -# os.environ["THEANO_FLAGS"] = "mode=FAST_COMPILE,device=cpu,floatX=float32" - -import os -from keras.layers import LeakyReLU, Activation -from mnist_utils import mnist_data -from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling -from keras_adversarial import AdversarialModel, fix_names, n_choice -from keras_adversarial.image_grid_callback import ImageGridCallback -import numpy as np -import pandas as pd -import keras.backend as K -from keras_adversarial.legacy import l1l2 -from keras.optimizers import Adam -from keras.models import Sequential, Model -from keras.layers import Dense, Reshape, Flatten, Input, merge -import matplotlib as mpl - -# This line allows mpl to run with no DISPLAY defined -mpl.use('Agg') - - -def model_generator(latent_dim, input_shape, hidden_dim=512, reg=lambda: l1l2(1e-7, 0)): - return Sequential([ - Dense(hidden_dim, name="generator_h1", - input_dim=latent_dim, W_regularizer=reg()), - LeakyReLU(0.2), - Dense(hidden_dim, name="generator_h2", W_regularizer=reg()), - LeakyReLU(0.2), - Dense(np.prod(input_shape), name="generator_x_flat", W_regularizer=reg()), - Activation('sigmoid'), - Reshape(input_shape, name="generator_x")], - name="generator") - - -def model_encoder(latent_dim, input_shape, hidden_dim=512, reg=lambda: l1l2(1e-7, 0)): - x = Input(input_shape, name="x") - h = Flatten()(x) - h = Dense(hidden_dim, name="encoder_h1", W_regularizer=reg())(h) - h = LeakyReLU(0.2)(h) - h = Dense(hidden_dim, name="encoder_h2", W_regularizer=reg())(h) - h = LeakyReLU(0.2)(h) - mu = Dense(latent_dim, name="encoder_mu", W_regularizer=reg())(h) - log_sigma_sq = Dense( - latent_dim, name="encoder_log_sigma_sq", W_regularizer=reg())(h) - z = merge([mu, log_sigma_sq], mode=lambda p: p[0] + K.random_normal(K.shape(p[0])) * K.exp(p[1] / 2), - output_shape=lambda p: p[0]) - return Model(x, z, name="encoder") - - -def model_discriminator(latent_dim, output_dim=1, hidden_dim=512, - reg=lambda: l1l2(1e-7, 1e-7)): - z = Input((latent_dim,)) - h = z - h = Dense(hidden_dim, name="discriminator_h1", W_regularizer=reg())(h) - h = LeakyReLU(0.2)(h) - h = Dense(hidden_dim, name="discriminator_h2", W_regularizer=reg())(h) - h = LeakyReLU(0.2)(h) - y = Dense(output_dim, name="discriminator_y", - activation="sigmoid", W_regularizer=reg())(h) - return Model(z, y) - - -def example_aae(path, adversarial_optimizer): - # z \in R^100 - latent_dim = 100 - # x \in R^{28x28} - input_shape = (28, 28) - - # generator (z -> x) - generator = model_generator(latent_dim, input_shape) - # encoder (x ->z) - encoder = model_encoder(latent_dim, input_shape) - # autoencoder (x -> x') - autoencoder = Model(encoder.inputs, generator(encoder(encoder.inputs))) - # discriminator (z -> y) - discriminator = model_discriminator(latent_dim) - - # assemple AAE - x = encoder.inputs[0] - z = encoder(x) - xpred = generator(z) - zreal = normal_latent_sampling((latent_dim,))(x) - yreal = discriminator(zreal) - yfake = discriminator(z) - aae = Model(x, fix_names([xpred, yfake, yreal], - ["xpred", "yfake", "yreal"])) - - # print summary of models - generator.summary() - encoder.summary() - discriminator.summary() - autoencoder.summary() - - # build adversarial model - generative_params = generator.trainable_weights + encoder.trainable_weights - model = AdversarialModel(base_model=aae, - player_params=[generative_params, - discriminator.trainable_weights], - player_names=["generator", "discriminator"]) - model.adversarial_compile(adversarial_optimizer=adversarial_optimizer, - player_optimizers=[ - Adam(1e-4, decay=1e-4), Adam(1e-3, decay=1e-4)], - loss={"yfake": "binary_crossentropy", "yreal": "binary_crossentropy", - "xpred": "mean_squared_error"}, - player_compile_kwargs=[{"loss_weights": {"yfake": 1e-2, "yreal": 1e-2, "xpred": 1}}] * 2) - - # load mnist data - xtrain, xtest = mnist_data() - - # callback for image grid of generated samples - def generator_sampler(): - zsamples = np.random.normal(size=(10 * 10, latent_dim)) - return generator.predict(zsamples).reshape((10, 10, 28, 28)) - - generator_cb = ImageGridCallback(os.path.join( - path, "generated-epoch-{:03d}.png"), generator_sampler) - - # callback for image grid of autoencoded samples - def autoencoder_sampler(): - xsamples = n_choice(xtest, 10) - xrep = np.repeat(xsamples, 9, axis=0) - xgen = autoencoder.predict(xrep).reshape((10, 9, 28, 28)) - xsamples = xsamples.reshape((10, 1, 28, 28)) - samples = np.concatenate((xsamples, xgen), axis=1) - return samples - - autoencoder_cb = ImageGridCallback(os.path.join( - path, "autoencoded-epoch-{:03d}.png"), autoencoder_sampler) - - # train network - # generator, discriminator; pred, yfake, yreal - n = xtrain.shape[0] - y = [xtrain, np.ones((n, 1)), np.zeros((n, 1)), xtrain, - np.zeros((n, 1)), np.ones((n, 1))] - ntest = xtest.shape[0] - ytest = [xtest, np.ones((ntest, 1)), np.zeros( - (ntest, 1)), xtest, np.zeros((ntest, 1)), np.ones((ntest, 1))] - history = model.fit(x=xtrain, y=y, validation_data=(xtest, ytest), callbacks=[generator_cb, autoencoder_cb], - nb_epoch=100, batch_size=32) - - # save history - df = pd.DataFrame(history.history) - df.to_csv(os.path.join(path, "history.csv")) - - # save model - encoder.save(os.path.join(path, "encoder.h5")) - generator.save(os.path.join(path, "generator.h5")) - discriminator.save(os.path.join(path, "discriminator.h5")) - - -def main(): - example_aae("output/aae", AdversarialOptimizerSimultaneous()) - - -if __name__ == "__main__": - main() -import os -from image_utils import dim_ordering_unfix, dim_ordering_shape -from keras.layers import LeakyReLU, Activation -from cifar10_utils import cifar10_data -from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling -from keras_adversarial import AdversarialModel, fix_names, n_choice -from keras_adversarial.legacy import l1l2, Dense, fit, Convolution2D -from keras_adversarial.image_grid_callback import ImageGridCallback -import numpy as np -import pandas as pd -import keras.backend as K -from keras.optimizers import Adam -from keras.models import Sequential, Model -from keras.layers.convolutional import UpSampling2D, MaxPooling2D -from keras.layers import Input -from keras.layers import Reshape, Flatten, Lambda -import matplotlib as mpl - -# This line allows mpl to run with no DISPLAY defined -mpl.use('Agg') - - -def model_generator(latent_dim, units=512, dropout=0.5, reg=lambda: l1l2(l1=1e-7, l2=1e-7)): - model = Sequential(name="decoder") - h = 5 - model.add(Dense(units * 4 * 4, input_dim=latent_dim, W_regularizer=reg())) - model.add(Reshape(dim_ordering_shape((units, 4, 4)))) - # model.add(SpatialDropout2D(dropout)) - model.add(LeakyReLU(0.2)) - model.add(Convolution2D(units / 2, h, h, - border_mode='same', W_regularizer=reg())) - # model.add(SpatialDropout2D(dropout)) - model.add(LeakyReLU(0.2)) - model.add(UpSampling2D(size=(2, 2))) - model.add(Convolution2D(units / 2, h, h, - border_mode='same', W_regularizer=reg())) - # model.add(SpatialDropout2D(dropout)) - model.add(LeakyReLU(0.2)) - model.add(UpSampling2D(size=(2, 2))) - model.add(Convolution2D(units / 4, h, h, - border_mode='same', W_regularizer=reg())) - # model.add(SpatialDropout2D(dropout)) - model.add(LeakyReLU(0.2)) - model.add(UpSampling2D(size=(2, 2))) - model.add(Convolution2D(3, h, h, border_mode='same', W_regularizer=reg())) - model.add(Activation('sigmoid')) - return model - - -def model_encoder(latent_dim, input_shape, units=512, reg=lambda: l1l2(l1=1e-7, l2=1e-7), dropout=0.5): - k = 5 - x = Input(input_shape) - h = Convolution2D(units / 4, k, k, border_mode='same', - W_regularizer=reg())(x) - # h = SpatialDropout2D(dropout)(h) - h = MaxPooling2D(pool_size=(2, 2))(h) - h = LeakyReLU(0.2)(h) - h = Convolution2D(units / 2, k, k, border_mode='same', - W_regularizer=reg())(h) - # h = SpatialDropout2D(dropout)(h) - h = MaxPooling2D(pool_size=(2, 2))(h) - h = LeakyReLU(0.2)(h) - h = Convolution2D(units / 2, k, k, border_mode='same', - W_regularizer=reg())(h) - # h = SpatialDropout2D(dropout)(h) - h = MaxPooling2D(pool_size=(2, 2))(h) - h = LeakyReLU(0.2)(h) - h = Convolution2D(units, k, k, border_mode='same', W_regularizer=reg())(h) - # h = SpatialDropout2D(dropout)(h) - h = LeakyReLU(0.2)(h) - h = Flatten()(h) - mu = Dense(latent_dim, name="encoder_mu", W_regularizer=reg())(h) - log_sigma_sq = Dense( - latent_dim, name="encoder_log_sigma_sq", W_regularizer=reg())(h) - z = Lambda(lambda (_mu, _lss): _mu + K.random_normal(K.shape(_mu)) * K.exp(_lss / 2), - output_shape=lambda (_mu, _lss): _mu)([mu, log_sigma_sq]) - return Model(x, z, name="encoder") - - -def model_discriminator(latent_dim, output_dim=1, units=256, reg=lambda: l1l2(1e-7, 1e-7)): - z = Input((latent_dim,)) - h = z - mode = 1 - h = Dense(units, name="discriminator_h1", W_regularizer=reg())(h) - # h = BatchNormalization(mode=mode)(h) - h = LeakyReLU(0.2)(h) - h = Dense(units / 2, name="discriminator_h2", W_regularizer=reg())(h) - # h = BatchNormalization(mode=mode)(h) - h = LeakyReLU(0.2)(h) - h = Dense(units / 2, name="discriminator_h3", W_regularizer=reg())(h) - # h = BatchNormalization(mode=mode)(h) - h = LeakyReLU(0.2)(h) - y = Dense(output_dim, name="discriminator_y", - activation="sigmoid", W_regularizer=reg())(h) - return Model(z, y) - - -def example_aae(path, adversarial_optimizer): - # z \in R^100 - latent_dim = 256 - units = 512 - # x \in R^{28x28} - input_shape = dim_ordering_shape((3, 32, 32)) - - # generator (z -> x) - generator = model_generator(latent_dim, units=units) - # encoder (x ->z) - encoder = model_encoder(latent_dim, input_shape, units=units) - # autoencoder (x -> x') - autoencoder = Model(encoder.inputs, generator(encoder(encoder.inputs))) - # discriminator (z -> y) - discriminator = model_discriminator(latent_dim, units=units) - - # build AAE - x = encoder.inputs[0] - z = encoder(x) - xpred = generator(z) - zreal = normal_latent_sampling((latent_dim,))(x) - yreal = discriminator(zreal) - yfake = discriminator(z) - aae = Model(x, fix_names([xpred, yfake, yreal], - ["xpred", "yfake", "yreal"])) - - # print summary of models - generator.summary() - encoder.summary() - discriminator.summary() - autoencoder.summary() - - # build adversarial model - generative_params = generator.trainable_weights + encoder.trainable_weights - model = AdversarialModel(base_model=aae, - player_params=[generative_params, - discriminator.trainable_weights], - player_names=["generator", "discriminator"]) - model.adversarial_compile(adversarial_optimizer=adversarial_optimizer, - player_optimizers=[ - Adam(3e-4, decay=1e-4), Adam(1e-3, decay=1e-4)], - loss={"yfake": "binary_crossentropy", "yreal": "binary_crossentropy", - "xpred": "mean_squared_error"}, - player_compile_kwargs=[{"loss_weights": {"yfake": 1e-1, "yreal": 1e-1, - "xpred": 1e2}}] * 2) - - # load mnist data - xtrain, xtest = cifar10_data() - - # callback for image grid of generated samples - def generator_sampler(): - zsamples = np.random.normal(size=(10 * 10, latent_dim)) - return dim_ordering_unfix(generator.predict(zsamples)).transpose((0, 2, 3, 1)).reshape((10, 10, 32, 32, 3)) - - generator_cb = ImageGridCallback(os.path.join( - path, "generated-epoch-{:03d}.png"), generator_sampler) - - # callback for image grid of autoencoded samples - def autoencoder_sampler(): - xsamples = n_choice(xtest, 10) - xrep = np.repeat(xsamples, 9, axis=0) - xgen = dim_ordering_unfix(autoencoder.predict( - xrep)).reshape((10, 9, 3, 32, 32)) - xsamples = dim_ordering_unfix(xsamples).reshape((10, 1, 3, 32, 32)) - samples = np.concatenate((xsamples, xgen), axis=1) - samples = samples.transpose((0, 1, 3, 4, 2)) - return samples - - autoencoder_cb = ImageGridCallback(os.path.join(path, "autoencoded-epoch-{:03d}.png"), autoencoder_sampler, - cmap=None) - - # train network - # generator, discriminator; pred, yfake, yreal - n = xtrain.shape[0] - y = [xtrain, np.ones((n, 1)), np.zeros((n, 1)), xtrain, - np.zeros((n, 1)), np.ones((n, 1))] - ntest = xtest.shape[0] - ytest = [xtest, np.ones((ntest, 1)), np.zeros( - (ntest, 1)), xtest, np.zeros((ntest, 1)), np.ones((ntest, 1))] - history = fit(model, x=xtrain, y=y, validation_data=(xtest, ytest), - callbacks=[generator_cb, autoencoder_cb], - nb_epoch=100, batch_size=32) - - # save history - df = pd.DataFrame(history.history) - df.to_csv(os.path.join(path, "history.csv")) - - # save model - encoder.save(os.path.join(path, "encoder.h5")) - generator.save(os.path.join(path, "generator.h5")) - discriminator.save(os.path.join(path, "discriminator.h5")) - - -def main(): - example_aae("output/aae-cifar10", AdversarialOptimizerSimultaneous()) - - -if __name__ == "__main__": - main() -import os -from keras.layers import BatchNormalization, LeakyReLU -from example_gan import model_generator -from mnist_utils import mnist_data -from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling -from keras_adversarial import AdversarialModel, gan_targets, fix_names, n_choice, simple_bigan -from keras_adversarial.image_grid_callback import ImageGridCallback -import numpy as np -import pandas as pd -import keras.backend as K -from keras_adversarial.legacy import l1l2 -from keras.optimizers import Adam -from keras.models import Model -from keras.layers import Dense, Flatten, Input, merge, Dropout -import matplotlib as mpl - -# This line allows mpl to run with no DISPLAY defined -mpl.use('Agg') - - -def model_encoder(latent_dim, input_shape, hidden_dim=1024, reg=lambda: l1l2(1e-5, 0), batch_norm_mode=0): - x = Input(input_shape, name="x") - h = Flatten()(x) - h = Dense(hidden_dim, name="encoder_h1", W_regularizer=reg())(h) - h = BatchNormalization(mode=batch_norm_mode)(h) - h = LeakyReLU(0.2)(h) - h = Dense(hidden_dim / 2, name="encoder_h2", W_regularizer=reg())(h) - h = BatchNormalization(mode=batch_norm_mode)(h) - h = LeakyReLU(0.2)(h) - h = Dense(hidden_dim / 4, name="encoder_h3", W_regularizer=reg())(h) - h = BatchNormalization(mode=batch_norm_mode)(h) - h = LeakyReLU(0.2)(h) - mu = Dense(latent_dim, name="encoder_mu", W_regularizer=reg())(h) - log_sigma_sq = Dense( - latent_dim, name="encoder_log_sigma_sq", W_regularizer=reg())(h) - z = merge([mu, log_sigma_sq], mode=lambda p: p[0] + K.random_normal(K.shape(p[0])) * K.exp(p[1] / 2), - output_shape=lambda x: x[0]) - return Model(x, z, name="encoder") - - -def model_discriminator(latent_dim, input_shape, output_dim=1, hidden_dim=2048, - reg=lambda: l1l2(1e-7, 1e-7), batch_norm_mode=1, dropout=0.5): - z = Input((latent_dim,)) - x = Input(input_shape, name="x") - h = merge([z, Flatten()(x)], mode='concat') - - h1 = Dense(hidden_dim, name="discriminator_h1", W_regularizer=reg()) - b1 = BatchNormalization(mode=batch_norm_mode) - h2 = Dense(hidden_dim, name="discriminator_h2", W_regularizer=reg()) - b2 = BatchNormalization(mode=batch_norm_mode) - h3 = Dense(hidden_dim, name="discriminator_h3", W_regularizer=reg()) - b3 = BatchNormalization(mode=batch_norm_mode) - y = Dense(output_dim, name="discriminator_y", - activation="sigmoid", W_regularizer=reg()) - - # training model uses dropout - _h = h - _h = Dropout(dropout)(LeakyReLU(0.2)((b1(h1(_h))))) - _h = Dropout(dropout)(LeakyReLU(0.2)((b2(h2(_h))))) - _h = Dropout(dropout)(LeakyReLU(0.2)((b3(h3(_h))))) - ytrain = y(_h) - mtrain = Model([z, x], ytrain, name="discriminator_train") - - # testing model does not use dropout - _h = h - _h = LeakyReLU(0.2)((b1(h1(_h)))) - _h = LeakyReLU(0.2)((b2(h2(_h)))) - _h = LeakyReLU(0.2)((b3(h3(_h)))) - ytest = y(_h) - mtest = Model([z, x], ytest, name="discriminator_test") - - return mtrain, mtest - - -def example_bigan(path, adversarial_optimizer): - # z \in R^100 - latent_dim = 25 - # x \in R^{28x28} - input_shape = (28, 28) - - # generator (z -> x) - generator = model_generator(latent_dim, input_shape) - # encoder (x ->z) - encoder = model_encoder(latent_dim, input_shape) - # autoencoder (x -> x') - autoencoder = Model(encoder.inputs, generator(encoder(encoder.inputs))) - # discriminator (x -> y) - discriminator_train, discriminator_test = model_discriminator( - latent_dim, input_shape) - # bigan (z, x - > yfake, yreal) - bigan_generator = simple_bigan(generator, encoder, discriminator_test) - bigan_discriminator = simple_bigan(generator, encoder, discriminator_train) - # z generated on GPU based on batch dimension of x - x = bigan_generator.inputs[1] - z = normal_latent_sampling((latent_dim,))(x) - # eliminate z from inputs - bigan_generator = Model([x], fix_names( - bigan_generator([z, x]), bigan_generator.output_names)) - bigan_discriminator = Model([x], fix_names( - bigan_discriminator([z, x]), bigan_discriminator.output_names)) - - generative_params = generator.trainable_weights + encoder.trainable_weights - - # print summary of models - generator.summary() - encoder.summary() - discriminator_train.summary() - bigan_discriminator.summary() - autoencoder.summary() - - # build adversarial model - model = AdversarialModel(player_models=[bigan_generator, bigan_discriminator], - player_params=[generative_params, - discriminator_train.trainable_weights], - player_names=["generator", "discriminator"]) - model.adversarial_compile(adversarial_optimizer=adversarial_optimizer, - player_optimizers=[ - Adam(1e-4, decay=1e-4), Adam(1e-3, decay=1e-4)], - loss='binary_crossentropy') - - # load mnist data - xtrain, xtest = mnist_data() - - # callback for image grid of generated samples - def generator_sampler(): - zsamples = np.random.normal(size=(10 * 10, latent_dim)) - return generator.predict(zsamples).reshape((10, 10, 28, 28)) - - generator_cb = ImageGridCallback(os.path.join( - path, "generated-epoch-{:03d}.png"), generator_sampler) - - # callback for image grid of autoencoded samples - def autoencoder_sampler(): - xsamples = n_choice(xtest, 10) - xrep = np.repeat(xsamples, 9, axis=0) - xgen = autoencoder.predict(xrep).reshape((10, 9, 28, 28)) - xsamples = xsamples.reshape((10, 1, 28, 28)) - x = np.concatenate((xsamples, xgen), axis=1) - return x - - autoencoder_cb = ImageGridCallback(os.path.join( - path, "autoencoded-epoch-{:03d}.png"), autoencoder_sampler) - - # train network - y = gan_targets(xtrain.shape[0]) - ytest = gan_targets(xtest.shape[0]) - history = model.fit(x=xtrain, y=y, validation_data=(xtest, ytest), callbacks=[generator_cb, autoencoder_cb], - nb_epoch=100, batch_size=32) - - # save history - df = pd.DataFrame(history.history) - df.to_csv(os.path.join(path, "history.csv")) - - # save model - encoder.save(os.path.join(path, "encoder.h5")) - generator.save(os.path.join(path, "generator.h5")) - discriminator_train.save(os.path.join(path, "discriminator.h5")) - - -def main(): - example_bigan("output/bigan", AdversarialOptimizerSimultaneous()) - - -if __name__ == "__main__": - main() -from keras.layers import BatchNormalization, LeakyReLU -from example_gan import model_generator -from mnist_utils import mnist_data -from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling -from keras_adversarial import AdversarialModel, gan_targets, n_choice, simple_bigan -from keras_adversarial.image_grid_callback import ImageGridCallback -import numpy as np -import pandas as pd -import keras.backend as K -from keras.regularizers import l1, l1l2 -from keras.optimizers import Adam -from keras.models import Model -from keras.layers import Dense, Flatten, Input, merge, Dropout -import matplotlib as mpl - -# This line allows mpl to run with no DISPLAY defined -mpl.use('Agg') - - -def model_encoder(latent_dim, input_shape, hidden_dim=1024, reg=lambda: l1(1e-5), batch_norm_mode=2): - x = Input(input_shape, name="x") - h = Flatten()(x) - h = Dense(hidden_dim, name="encoder_h1", W_regularizer=reg())(h) - h = BatchNormalization(mode=batch_norm_mode)(h) - h = LeakyReLU(0.2)(h) - h = Dense(hidden_dim / 2, name="encoder_h2", W_regularizer=reg())(h) - h = BatchNormalization(mode=batch_norm_mode)(h) - h = LeakyReLU(0.2)(h) - h = Dense(hidden_dim / 4, name="encoder_h3", W_regularizer=reg())(h) - h = BatchNormalization(mode=batch_norm_mode)(h) - h = LeakyReLU(0.2)(h) - mu = Dense(latent_dim, name="encoder_mu", W_regularizer=reg())(h) - log_sigma_sq = Dense( - latent_dim, name="encoder_log_sigma_sq", W_regularizer=reg())(h) - z = merge([mu, log_sigma_sq], mode=lambda p: p[0] + K.random_normal(p[0].shape) * K.exp(p[1] / 2), - output_shape=lambda x: x[0]) - return Model(x, z, name="encoder") - - -def model_discriminator(latent_dim, input_shape, output_dim=1, hidden_dim=1024, - reg=lambda: l1l2(1e-4, 1e-4), batch_norm_mode=1): - z = Input((latent_dim,)) - x = Input(input_shape, name="x") - h = merge([z, Flatten()(x)], mode='concat') - h = Dense(hidden_dim, name="discriminator_h1", W_regularizer=reg())(h) - h = BatchNormalization(mode=batch_norm_mode)(h) - h = LeakyReLU(0.2)(h) - h = Dropout(0.5)(h) - h = Dense(hidden_dim / 2, name="discriminator_h2", W_regularizer=reg())(h) - h = BatchNormalization(mode=batch_norm_mode)(h) - h = LeakyReLU(0.2)(h) - h = Dropout(0.5)(h) - h = Dense(hidden_dim / 4, name="discriminator_h3", W_regularizer=reg())(h) - h = BatchNormalization(mode=batch_norm_mode)(h) - h = LeakyReLU(0.2)(h) - h = Dropout(0.5)(h) - y = Dense(output_dim, name="discriminator_y", - activation="sigmoid", W_regularizer=reg())(h) - return Model([z, x], y, name="discriminator") - - -def main(): - # z \in R^100 - latent_dim = 100 - # x \in R^{28x28} - input_shape = (28, 28) - - # generator (z -> x) - generator = model_generator(latent_dim, input_shape) - # encoder (x ->z) - encoder = model_encoder(latent_dim, input_shape) - # autoencoder (x -> x') - autoencoder = Model(encoder.inputs, generator(encoder(encoder.inputs))) - # discriminator (x -> y) - discriminator = model_discriminator(latent_dim, input_shape) - # bigan (x - > yfake, yreal), z generated on GPU - bigan = simple_bigan(generator, encoder, discriminator, - normal_latent_sampling((latent_dim,))) - - generative_params = generator.trainable_weights + encoder.trainable_weights - - # print summary of models - generator.summary() - encoder.summary() - discriminator.summary() - bigan.summary() - autoencoder.summary() - - # build adversarial model - model = AdversarialModel(base_model=bigan, - player_params=[generative_params, - discriminator.trainable_weights], - player_names=["generator", "discriminator"]) - model.adversarial_compile(adversarial_optimizer=AdversarialOptimizerSimultaneous(), - player_optimizers=[ - Adam(1e-4, decay=1e-4), Adam(1e-3, decay=1e-4)], - loss='binary_crossentropy') - - # train model - xtrain, xtest = mnist_data() - - def generator_sampler(): - zsamples = np.random.normal(size=(10 * 10, latent_dim)) - return generator.predict(zsamples).reshape((10, 10, 28, 28)) - - generator_cb = ImageGridCallback( - "output/bigan/generated-epoch-{:03d}.png", generator_sampler) - - def autoencoder_sampler(): - xsamples = n_choice(xtest, 10) - xrep = np.repeat(xsamples, 9, axis=0) - xgen = autoencoder.predict(xrep).reshape((10, 9, 28, 28)) - xsamples = xsamples.reshape((10, 1, 28, 28)) - x = np.concatenate((xsamples, xgen), axis=1) - return x - - autoencoder_cb = ImageGridCallback( - "output/bigan/autoencoded-epoch-{:03d}.png", autoencoder_sampler) - - y = gan_targets(xtrain.shape[0]) - ytest = gan_targets(xtest.shape[0]) - history = model.fit(x=xtrain, y=y, validation_data=(xtest, ytest), callbacks=[generator_cb, autoencoder_cb], - nb_epoch=100, batch_size=32) - df = pd.DataFrame(history.history) - df.to_csv("output/bigan/history.csv") - - encoder.save("output/bigan/encoder.h5") - generator.save("output/bigan/generator.h5") - discriminator.save("output/bigan/discriminator.h5") - - -if __name__ == "__main__": - main() -from mnist_utils import mnist_data -import keras.backend as K -from keras_adversarial.legacy import l1l2, Dense, fit -from keras_adversarial import normal_latent_sampling, AdversarialOptimizerSimultaneous -from keras_adversarial import AdversarialModel, simple_gan, gan_targets -from keras_adversarial.image_grid_callback import ImageGridCallback -from keras.callbacks import TensorBoard -from keras.optimizers import Adam -from keras.models import Sequential -from keras.layers import Reshape, Flatten, LeakyReLU, Activation -import os -import numpy as np -import pandas as pd -import matplotlib as mpl - -# This line allows mpl to run with no DISPLAY defined -mpl.use('Agg') - - -def model_generator(latent_dim, input_shape, hidden_dim=1024, reg=lambda: l1l2(1e-5, 1e-5)): - return Sequential([ - Dense(int(hidden_dim / 4), name="generator_h1", - input_dim=latent_dim, W_regularizer=reg()), - LeakyReLU(0.2), - Dense(int(hidden_dim / 2), name="generator_h2", W_regularizer=reg()), - LeakyReLU(0.2), - Dense(hidden_dim, name="generator_h3", W_regularizer=reg()), - LeakyReLU(0.2), - Dense(np.prod(input_shape), name="generator_x_flat", W_regularizer=reg()), - Activation('sigmoid'), - Reshape(input_shape, name="generator_x")], - name="generator") - - -def model_discriminator(input_shape, hidden_dim=1024, reg=lambda: l1l2(1e-5, 1e-5), output_activation="sigmoid"): - return Sequential([ - Flatten(name="discriminator_flatten", input_shape=input_shape), - Dense(hidden_dim, name="discriminator_h1", W_regularizer=reg()), - LeakyReLU(0.2), - Dense(int(hidden_dim / 2), name="discriminator_h2", W_regularizer=reg()), - LeakyReLU(0.2), - Dense(int(hidden_dim / 4), name="discriminator_h3", W_regularizer=reg()), - LeakyReLU(0.2), - Dense(1, name="discriminator_y", W_regularizer=reg()), - Activation(output_activation)], - name="discriminator") - - -def example_gan(adversarial_optimizer, path, opt_g, opt_d, nb_epoch, generator, discriminator, latent_dim, - targets=gan_targets, loss='binary_crossentropy'): - csvpath = os.path.join(path, "history.csv") - if os.path.exists(csvpath): - print("Already exists: {}".format(csvpath)) - return - - print("Training: {}".format(csvpath)) - # gan (x - > yfake, yreal), z generated on GPU - gan = simple_gan(generator, discriminator, - normal_latent_sampling((latent_dim,))) - - # print summary of models - generator.summary() - discriminator.summary() - gan.summary() - - # build adversarial model - model = AdversarialModel(base_model=gan, - player_params=[ - generator.trainable_weights, discriminator.trainable_weights], - player_names=["generator", "discriminator"]) - model.adversarial_compile(adversarial_optimizer=adversarial_optimizer, - player_optimizers=[opt_g, opt_d], - loss=loss) - - # create callback to generate images - zsamples = np.random.normal(size=(10 * 10, latent_dim)) - - def generator_sampler(): - return generator.predict(zsamples).reshape((10, 10, 28, 28)) - - generator_cb = ImageGridCallback(os.path.join( - path, "epoch-{:03d}.png"), generator_sampler) - - # train model - xtrain, xtest = mnist_data() - y = targets(xtrain.shape[0]) - ytest = targets(xtest.shape[0]) - callbacks = [generator_cb] - if K.backend() == "tensorflow": - callbacks.append( - TensorBoard(log_dir=os.path.join(path, 'logs'), histogram_freq=0, write_graph=True, write_images=True)) - history = fit(model, x=xtrain, y=y, validation_data=(xtest, ytest), callbacks=callbacks, nb_epoch=nb_epoch, - batch_size=32) - - # save history to CSV - df = pd.DataFrame(history.history) - df.to_csv(csvpath) - - # save models - generator.save(os.path.join(path, "generator.h5")) - discriminator.save(os.path.join(path, "discriminator.h5")) - - -def main(): - # z \in R^100 - latent_dim = 100 - # x \in R^{28x28} - input_shape = (28, 28) - # generator (z -> x) - generator = model_generator(latent_dim, input_shape) - # discriminator (x -> y) - discriminator = model_discriminator(input_shape) - example_gan(AdversarialOptimizerSimultaneous(), "output/gan", - opt_g=Adam(1e-4, decay=1e-4), - opt_d=Adam(1e-3, decay=1e-4), - nb_epoch=100, generator=generator, discriminator=discriminator, - latent_dim=latent_dim) - - -if __name__ == "__main__": - main() -from image_utils import dim_ordering_fix, dim_ordering_unfix, dim_ordering_shape -from cifar10_utils import cifar10_data -import keras.backend as K -from keras_adversarial.legacy import Dense, BatchNormalization, fit, l1l2, Convolution2D, AveragePooling2D -from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling -from keras_adversarial import AdversarialModel, simple_gan, gan_targets -from keras_adversarial.image_grid_callback import ImageGridCallback -from keras.callbacks import TensorBoard -from keras.optimizers import Adam -from keras.models import Sequential -from keras.layers.convolutional import UpSampling2D, MaxPooling2D -from keras.layers import Reshape, Flatten, LeakyReLU, Activation -import os -import numpy as np -import pandas as pd -import matplotlib as mpl - -# This line allows mpl to run with no DISPLAY defined -mpl.use('Agg') - - -def model_generator(): - model = Sequential() - nch = 256 - def reg(): return l1l2(l1=1e-7, l2=1e-7) - h = 5 - model.add(Dense(nch * 4 * 4, input_dim=100, W_regularizer=reg())) - model.add(BatchNormalization(mode=0)) - model.add(Reshape(dim_ordering_shape((nch, 4, 4)))) - model.add(Convolution2D(int(nch / 2), h, h, - border_mode='same', W_regularizer=reg())) - model.add(BatchNormalization(mode=0, axis=1)) - model.add(LeakyReLU(0.2)) - model.add(UpSampling2D(size=(2, 2))) - model.add(Convolution2D(int(nch / 2), h, h, - border_mode='same', W_regularizer=reg())) - model.add(BatchNormalization(mode=0, axis=1)) - model.add(LeakyReLU(0.2)) - model.add(UpSampling2D(size=(2, 2))) - model.add(Convolution2D(int(nch / 4), h, h, - border_mode='same', W_regularizer=reg())) - model.add(BatchNormalization(mode=0, axis=1)) - model.add(LeakyReLU(0.2)) - model.add(UpSampling2D(size=(2, 2))) - model.add(Convolution2D(3, h, h, border_mode='same', W_regularizer=reg())) - model.add(Activation('sigmoid')) - return model - - -def model_discriminator(): - nch = 256 - h = 5 - def reg(): return l1l2(l1=1e-7, l2=1e-7) - - c1 = Convolution2D(int(nch / 4), h, h, border_mode='same', W_regularizer=reg(), - input_shape=dim_ordering_shape((3, 32, 32))) - c2 = Convolution2D(int(nch / 2), h, h, - border_mode='same', W_regularizer=reg()) - c3 = Convolution2D(nch, h, h, border_mode='same', W_regularizer=reg()) - c4 = Convolution2D(1, h, h, border_mode='same', W_regularizer=reg()) - - model = Sequential() - model.add(c1) - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(LeakyReLU(0.2)) - model.add(c2) - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(LeakyReLU(0.2)) - model.add(c3) - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(LeakyReLU(0.2)) - model.add(c4) - model.add(AveragePooling2D(pool_size=(4, 4), border_mode='valid')) - model.add(Flatten()) - model.add(Activation('sigmoid')) - return model - - -def example_gan(adversarial_optimizer, path, opt_g, opt_d, nb_epoch, generator, discriminator, latent_dim, - targets=gan_targets, loss='binary_crossentropy'): - csvpath = os.path.join(path, "history.csv") - if os.path.exists(csvpath): - print("Already exists: {}".format(csvpath)) - return - - print("Training: {}".format(csvpath)) - # gan (x - > yfake, yreal), z is gaussian generated on GPU - # can also experiment with uniform_latent_sampling - generator.summary() - discriminator.summary() - gan = simple_gan(generator=generator, - discriminator=discriminator, - latent_sampling=normal_latent_sampling((latent_dim,))) - - # build adversarial model - model = AdversarialModel(base_model=gan, - player_params=[ - generator.trainable_weights, discriminator.trainable_weights], - player_names=["generator", "discriminator"]) - model.adversarial_compile(adversarial_optimizer=adversarial_optimizer, - player_optimizers=[opt_g, opt_d], - loss=loss) - - # create callback to generate images - zsamples = np.random.normal(size=(10 * 10, latent_dim)) - - def generator_sampler(): - xpred = dim_ordering_unfix(generator.predict( - zsamples)).transpose((0, 2, 3, 1)) - return xpred.reshape((10, 10) + xpred.shape[1:]) - - generator_cb = ImageGridCallback(os.path.join( - path, "epoch-{:03d}.png"), generator_sampler, cmap=None) - - # train model - xtrain, xtest = cifar10_data() - y = targets(xtrain.shape[0]) - ytest = targets(xtest.shape[0]) - callbacks = [generator_cb] - if K.backend() == "tensorflow": - callbacks.append( - TensorBoard(log_dir=os.path.join(path, 'logs'), histogram_freq=0, write_graph=True, write_images=True)) - history = fit(model, x=xtrain, y=y, validation_data=(xtest, ytest), - callbacks=callbacks, nb_epoch=nb_epoch, - batch_size=32) - - # save history to CSV - df = pd.DataFrame(history.history) - df.to_csv(csvpath) - - # save models - generator.save(os.path.join(path, "generator.h5")) - discriminator.save(os.path.join(path, "discriminator.h5")) - - -def main(): - # z \in R^100 - latent_dim = 100 - # x \in R^{28x28} - # generator (z -> x) - generator = model_generator() - # discriminator (x -> y) - discriminator = model_discriminator() - example_gan(AdversarialOptimizerSimultaneous(), "output/gan-cifar10", - opt_g=Adam(1e-4, decay=1e-5), - opt_d=Adam(1e-3, decay=1e-5), - nb_epoch=100, generator=generator, discriminator=discriminator, - latent_dim=latent_dim) - - -if __name__ == "__main__": - main() -from image_utils import dim_ordering_fix, dim_ordering_input, dim_ordering_reshape, dim_ordering_unfix -from keras_adversarial import AdversarialOptimizerSimultaneous, normal_latent_sampling -from keras_adversarial import AdversarialModel, simple_gan, gan_targets -from keras_adversarial.image_grid_callback import ImageGridCallback -from keras_adversarial.legacy import Dense, BatchNormalization, Convolution2D -import keras.backend as K -import numpy as np -import pandas as pd -from keras.datasets import mnist -from keras.optimizers import Adam -from keras.layers.convolutional import UpSampling2D -from keras.models import Model -from keras.layers import Flatten, Dropout, LeakyReLU, Input, Activation -import matplotlib as mpl - -# This line allows mpl to run with no DISPLAY defined -mpl.use('Agg') - - -def leaky_relu(x): - return K.relu(x, 0.2) - - -def model_generator(): - nch = 256 - g_input = Input(shape=[100]) - H = Dense(nch * 14 * 14)(g_input) - H = BatchNormalization(mode=2)(H) - H = Activation('relu')(H) - H = dim_ordering_reshape(nch, 14)(H) - H = UpSampling2D(size=(2, 2))(H) - H = Convolution2D(int(nch / 2), 3, 3, border_mode='same')(H) - H = BatchNormalization(mode=2, axis=1)(H) - H = Activation('relu')(H) - H = Convolution2D(int(nch / 4), 3, 3, border_mode='same')(H) - H = BatchNormalization(mode=2, axis=1)(H) - H = Activation('relu')(H) - H = Convolution2D(1, 1, 1, border_mode='same')(H) - g_V = Activation('sigmoid')(H) - return Model(g_input, g_V) - - -def model_discriminator(input_shape=(1, 28, 28), dropout_rate=0.5): - d_input = dim_ordering_input(input_shape, name="input_x") - nch = 512 - # nch = 128 - H = Convolution2D(int(nch / 2), 5, 5, subsample=(2, 2), - border_mode='same', activation='relu')(d_input) - H = LeakyReLU(0.2)(H) - H = Dropout(dropout_rate)(H) - H = Convolution2D(nch, 5, 5, subsample=( - 2, 2), border_mode='same', activation='relu')(H) - H = LeakyReLU(0.2)(H) - H = Dropout(dropout_rate)(H) - H = Flatten()(H) - H = Dense(int(nch / 2))(H) - H = LeakyReLU(0.2)(H) - H = Dropout(dropout_rate)(H) - d_V = Dense(1, activation='sigmoid')(H) - return Model(d_input, d_V) - - -def mnist_process(x): - x = x.astype(np.float32) / 255.0 - return x - - -def mnist_data(): - (xtrain, ytrain), (xtest, ytest) = mnist.load_data() - return mnist_process(xtrain), mnist_process(xtest) - - -def generator_sampler(latent_dim, generator): - def fun(): - zsamples = np.random.normal(size=(10 * 10, latent_dim)) - gen = dim_ordering_unfix(generator.predict(zsamples)) - return gen.reshape((10, 10, 28, 28)) - - return fun - - -if __name__ == "__main__": - # z \in R^100 - latent_dim = 100 - # x \in R^{28x28} - input_shape = (1, 28, 28) - - # generator (z -> x) - generator = model_generator() - # discriminator (x -> y) - discriminator = model_discriminator(input_shape=input_shape) - # gan (x - > yfake, yreal), z generated on GPU - gan = simple_gan(generator, discriminator, - normal_latent_sampling((latent_dim,))) - - # print summary of models - generator.summary() - discriminator.summary() - gan.summary() - - # build adversarial model - model = AdversarialModel(base_model=gan, - player_params=[ - generator.trainable_weights, discriminator.trainable_weights], - player_names=["generator", "discriminator"]) - model.adversarial_compile(adversarial_optimizer=AdversarialOptimizerSimultaneous(), - player_optimizers=[ - Adam(1e-4, decay=1e-4), Adam(1e-3, decay=1e-4)], - loss='binary_crossentropy') - - # train model - generator_cb = ImageGridCallback("output/gan_convolutional/epoch-{:03d}.png", - generator_sampler(latent_dim, generator)) - - xtrain, xtest = mnist_data() - xtrain = dim_ordering_fix(xtrain.reshape((-1, 1, 28, 28))) - xtest = dim_ordering_fix(xtest.reshape((-1, 1, 28, 28))) - y = gan_targets(xtrain.shape[0]) - ytest = gan_targets(xtest.shape[0]) - history = model.fit(x=xtrain, y=y, validation_data=(xtest, ytest), callbacks=[generator_cb], nb_epoch=100, - batch_size=32) - df = pd.DataFrame(history.history) - df.to_csv("output/gan_convolutional/history.csv") - - generator.save("output/gan_convolutional/generator.h5") - discriminator.save("output/gan_convolutional/discriminator.h5") -import os -from example_gan import model_generator, model_discriminator -from keras.optimizers import Adam -from keras_adversarial.unrolled_optimizer import UnrolledAdversarialOptimizer -from example_gan import example_gan -import matplotlib as mpl - -# This line allows mpl to run with no DISPLAY defined -mpl.use('Agg') - - -def example_gan_unrolled(path, depth_g, depth_d): - # z \in R^100 - latent_dim = 100 - # x \in R^{28x28} - input_shape = (28, 28) - # generator (z -> x) - generator = model_generator( - latent_dim, input_shape, hidden_dim=512, batch_norm_mode=1) - # discriminator (x -> y) - discriminator = model_discriminator( - input_shape, hidden_dim=512, dropout=0, batch_norm_mode=1) - example_gan(UnrolledAdversarialOptimizer(depth_g=depth_g, depth_d=depth_d), path, - opt_g=Adam(1e-4, decay=1e-4), - opt_d=Adam(1e-3, decay=1e-4), - nb_epoch=50, generator=generator, discriminator=discriminator, - latent_dim=latent_dim) - - -def example(name, depth_g, depth_d): - path = "output/unrolled_gan" - example_gan_unrolled(os.path.join(path, name), depth_g, depth_d) - - -if __name__ == "__main__": - example("k_0_0", 0, 0) - example("k_8_8", 8, 8) - example("k_16_16", 16, 16) - example("k_8_0", 8, 0) - example("k_0_8", 8, 0) - example("k_16_8", 16, 8) - example("k_32_32", 32, 32) -import os -from keras_adversarial import gan_targets_hinge -from example_gan import model_generator, model_discriminator -from keras.optimizers import Adam -from keras_adversarial.unrolled_optimizer import UnrolledAdversarialOptimizer -from example_gan import example_gan -import matplotlib as mpl - -# This line allows mpl to run with no DISPLAY defined -mpl.use('Agg') - - -def example_gan_unrolled_hinge(path, depth_g, depth_d, clipvalue=2.0): - # z \in R^100 - latent_dim = 100 - # x \in R^{28x28} - input_shape = (28, 28) - # generator (z -> x) - generator = model_generator( - latent_dim, input_shape, hidden_dim=512, batch_norm_mode=-1) - # discriminator (x -> y) - discriminator = model_discriminator(input_shape, output_activation='linear', hidden_dim=512, batch_norm_mode=-1, - dropout=0) - example_gan(UnrolledAdversarialOptimizer(depth_g=depth_g, depth_d=depth_d), path, - opt_g=Adam(1e-4, decay=1e-4, clipvalue=clipvalue), - opt_d=Adam(1e-3, decay=1e-4, clipvalue=clipvalue), - nb_epoch=50, generator=generator, discriminator=discriminator, - latent_dim=latent_dim, loss="squared_hinge", targets=gan_targets_hinge) - - -def example(name, depth_g, depth_d, clipvalue): - path = "output/unrolled_gan_hinge" - example_gan_unrolled_hinge(os.path.join( - path, name), depth_g, depth_d, clipvalue) - - -if __name__ == "__main__": - example("k_0_0", 0, 0) - example("k_8_8_clip_2", 8, 8, 2) - example("k_8_8_clip_0.5", 8, 8, 0.5) - example("k_8_8_clip_0", 8, 8, 0) - example("k_16_16", 16, 16) - example("k_16_16_clip_0", 16, 16, 0) - example("k_16_16_clip_0.5", 16, 16, 0.5) - example("k_16_16_clip_10", 16, 16, 10) - example("k_32_32", 32, 32) - example("k_1_1", 1, 1) - example("k_2_0", 2, 0) - example("k_4_0", 4, 0) - example("k_8_0", 8, 0) -import matplotlib.pyplot as plt -import numpy as np -from keras.regularizers import l2 -from keras.callbacks import LambdaCallback -from keras.optimizers import SGD -from keras.models import Model -from keras.layers import Dense, merge, Input -from keras_adversarial.adversarial_model import AdversarialModel -from keras_adversarial.unrolled_optimizer import UnrolledAdversarialOptimizer -from keras_adversarial.adversarial_optimizers import AdversarialOptimizerSimultaneous, AdversarialOptimizerAlternating -import os - -os.environ["THEANO_FLAGS"] = "mode=FAST_COMPILE,device=cpu,floatX=float32" - -"""Example of a two player game, rock paper scissors. - -This game does not converge under simple alternating or simultaneous descent, -but converges using UnrolledAdversarialOptimizer. - -""" - - -def rps_chart(path, a, b): - """Bar chart of two players in rock, paper, scissors""" - fig, ax = plt.subplots() - n = 3 - width = 0.35 - pad = 1.0 - 2 * width - ind = np.arange(n) - ba = plt.bar(pad / 2 + ind, a, width=width, color='r') - bb = plt.bar(pad / 2 + ind + width, b, width=width, color='g') - ax.set_ylabel('Frequency') - ax.set_xticks(pad / 2 + ind + width) - ax.set_xticklabels(("Rock", "Paper", "Scissors")) - fig.legend((ba, bb), ("Player A", "Player B")) - ax.set_ylim([0, 1]) - if not os.path.exists(os.path.dirname(path)): - os.makedirs(os.path.dirname(path)) - fig.savefig(path) - plt.close(fig) - - -def experiment(opt, path): - """Train two players to play rock, paper, scissors using a given optimizer""" - x = Input((1,), name="x") - player_a = Dense(3, activation='softmax', name="player_a", - bias=False, W_regularizer=l2(1e-2)) - player_b = Dense(3, activation='softmax', name="player_b", - bias=False, W_regularizer=l2(1e-2)) - - action_a = player_a(x) - action_b = player_b(x) - - def rps(z): - u = z[0] - v = z[1] - return u[:, 0] * v[:, 2] + u[:, 1] * v[:, 0] + u[:, 2] * v[:, 1] - - model_a = Model(x, merge([action_a, action_b], - mode=rps, output_shape=lambda z: (z[0][0], 1))) - model_b = Model(x, merge([action_b, action_a], - mode=rps, output_shape=lambda z: (z[0][0], 1))) - - adversarial_model = AdversarialModel(player_models=[model_a, model_b], - player_params=[ - [player_a.W], [player_b.W]], - player_names=["a", "b"]) - adversarial_model.adversarial_compile(opt, - player_optimizers=[SGD(1), SGD(1)], - loss="mean_absolute_error") - param_model = Model(x, [action_a, action_b]) - - def print_params(epoch, logs): - params = param_model.predict(np.ones((1, 1))) - a = params[0].ravel() - b = params[1].ravel() - print("Epoch: {}, A: {}, B: {}".format(epoch, a, b)) - imgpath = os.path.join(path, "epoch-{:03d}.png".format(epoch)) - rps_chart(imgpath, a, b) - - cb = LambdaCallback(on_epoch_begin=print_params) - batch_count = 5 - adversarial_model.fit(np.ones((batch_count, 1)), - [np.ones((batch_count, 1)), - np.ones((batch_count, 1))], - nb_epoch=120, callbacks=[cb], verbose=0, batch_size=1) - - -if __name__ == "__main__": - experiment(AdversarialOptimizerSimultaneous(), - "output/rock_paper_scissors/simultaneous") - experiment(AdversarialOptimizerAlternating(), - "output/rock_paper_scissors/alternating") - experiment(UnrolledAdversarialOptimizer(depth_d=30, depth_g=30), - "output/rock_paper_scissors/unrolled") - experiment(UnrolledAdversarialOptimizer(depth_d=0, depth_g=30), - "output/rock_paper_scissors/unrolled_player_a") -import keras.backend as K -import numpy as np -from keras.layers import Input, Reshape - - -def dim_ordering_fix(x): - if K.image_dim_ordering() == 'th': - return x - else: - return np.transpose(x, (0, 2, 3, 1)) - - -def dim_ordering_unfix(x): - if K.image_dim_ordering() == 'th': - return x - else: - return np.transpose(x, (0, 3, 1, 2)) - - -def dim_ordering_shape(input_shape): - if K.image_dim_ordering() == 'th': - return input_shape - else: - return (input_shape[1], input_shape[2], input_shape[0]) - - -def dim_ordering_input(input_shape, name): - if K.image_dim_ordering() == 'th': - return Input(input_shape, name=name) - else: - return Input((input_shape[1], input_shape[2], input_shape[0]), name=name) - - -def dim_ordering_reshape(k, w, **kwargs): - if K.image_dim_ordering() == 'th': - return Reshape((k, w, w), **kwargs) - else: - return Reshape((w, w, k), **kwargs) - - -def channel_axis(): - if K.image_dim_ordering() == 'th': - return 1 - else: - return 3 -import numpy as np -from keras.datasets import mnist - - -def mnist_process(x): - x = x.astype(np.float32) / 255.0 - return x - - -def mnist_data(): - (xtrain, ytrain), (xtest, ytest) = mnist.load_data() - return mnist_process(xtrain), mnist_process(xtest) -from .adversarial_model import AdversarialModel -from .adversarial_optimizers import AdversarialOptimizerAlternating -from .adversarial_optimizers import AdversarialOptimizerSimultaneous, AdversarialOptimizer -from .adversarial_optimizers import AdversarialOptimizerScheduled -from .adversarial_utils import gan_targets, build_gan, normal_latent_sampling, eliminate_z, fix_names, simple_gan -from .adversarial_utils import n_choice, simple_bigan, gan_targets_hinge -import itertools - -import numpy as np -from keras import backend as K -from keras import optimizers -from keras.models import Model - -from .adversarial_utils import fix_names, merge_updates -from .legacy import keras_2 - - -class AdversarialModel(Model): - """ - Adversarial training for multi-player games. - Given a base model with n targets and k players, create a model with n*k targets. - Each player optimizes loss on that player's targets. - """ - - def __init__(self, player_params, base_model=None, player_models=None, player_names=None): - """ - Initialize adversarial model. Specify base_model or player_models, not both. - :param player_params: list of player parameters for each player (shared variables) - :param base_model: base model will be duplicated for each player to create player models - :param player_models: model for each player - :param player_names: names of each player (optional) - """ - - assert (len(player_params) > 0) - self.player_params = player_params - self.player_count = len(self.player_params) - if player_names is None: - player_names = ["player_{}".format(i) - for i in range(self.player_count)] - assert (len(player_names) == self.player_count) - self.player_names = player_names - - self.generator_optimizer = None - self.discriminator_optimizer = None - self.loss = None - self.total_loss = None - self.optimizer = None - self._function_kwargs = None - if base_model is None and player_models is None: - raise ValueError( - "Please specify either base_model or player_models") - if base_model is not None and player_models is not None: - raise ValueError("Specify base_model or player_models, not both") - if base_model is not None: - self.layers = [] - for i in range(self.player_count): - # duplicate base model - model = Model(base_model.inputs, - fix_names(base_model(base_model.inputs), base_model.output_names)) - # add model to list - self.layers.append(model) - if player_models is not None: - assert (len(player_models) == self.player_count) - self.layers = player_models - - def adversarial_compile(self, adversarial_optimizer, player_optimizers, loss, player_compile_kwargs=None, - **kwargs): - """ - Configures the learning process. - :param adversarial_optimizer: instance of AdversarialOptimizer - :param player_optimizers: list of optimizers for each player - :param loss: loss function or function name - :param player_compile_kwargs: list of additional arguments to model compilation for each player - :param kwargs: additional arguments to function compilation - :return: - """ - self._function_kwargs = kwargs - self.adversarial_optimizer = adversarial_optimizer - assert (len(player_optimizers) == self.player_count) - - self.optimizers = [optimizers.get(optimizer) - for optimizer in player_optimizers] - self.loss = loss - self.optimizer = None - - if player_compile_kwargs is None: - player_compile_kwargs = [{} for _ in self.layers] - - # Build player models - for opt, model, compile_kwargs in zip(self.optimizers, self.layers, player_compile_kwargs): - model.compile(opt, loss=self.loss, **compile_kwargs) - - self.train_function = None - self.test_function = None - - # Inputs are same for each model - def filter_inputs(inputs): - return inputs - - self.internal_input_shapes = filter_inputs( - self.layers[0].internal_input_shapes) - self.input_names = filter_inputs(self.layers[0].input_names) - self.inputs = filter_inputs(self.layers[0].inputs) - - # Outputs are concatenated player models - models = self.layers - - def collect(f): - return list(itertools.chain.from_iterable(f(m) for m in models)) - - self.internal_output_shapes = collect( - lambda m: m.internal_output_shapes) - self.loss_functions = collect(lambda m: m.loss_functions) - - self.targets = collect(lambda m: m.targets) - self.outputs = collect(lambda m: m.outputs) - self.sample_weights = collect(lambda m: m.sample_weights) - self.sample_weight_modes = collect(lambda m: m.sample_weight_modes) - # for each target, output name is {player}_{target} - self.output_names = [] - for i in range(self.player_count): - for name in models[i].output_names: - self.output_names.append( - "{}_{}".format(self.player_names[i], name)) - # for each metric, metric name is {player}_{metric} - self.metrics_names = ["loss"] - for i in range(self.player_count): - for name in models[i].metrics_names: - self.metrics_names.append( - "{}_{}".format(self.player_names[i], name)) - - # total loss is sum of losses - self.total_loss = np.float32(0) - for model in models: - self.total_loss += model.total_loss - - # Keras-2 - self._feed_loss_fns = self.loss_functions - self._feed_inputs = self.inputs - self._feed_input_names = self.input_names - self._feed_input_shapes = self.internal_input_shapes - self._feed_outputs = self.outputs - self._feed_output_names = self.output_names - self._feed_output_shapes = self.internal_output_shapes - self._feed_sample_weights = self.sample_weights - self._feed_sample_weight_modes = self.sample_weight_modes - - @property - def constraints(self): - if keras_2: - return [] - else: - return list(itertools.chain.from_iterable(model.constraints for model in self.layers)) - - @property - def updates(self): - return merge_updates(list(itertools.chain.from_iterable(model.updates for model in self.layers))) - - @property - def regularizers(self): - return list(itertools.chain.from_iterable(model.regularizers for model in self.layers)) - - def _make_train_function(self): - if not hasattr(self, 'train_function'): - raise Exception('You must compile your model before using it.') - if self.train_function is None: - inputs = self.inputs + self.targets + self.sample_weights - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - inputs += [K.learning_phase()] - outputs = [self.total_loss] - outputs += list(itertools.chain.from_iterable( - [model.total_loss] + model.metrics_tensors - for model in self.layers)) - - # returns loss and metrics. Updates weights at each call. - constraints = [{} for model in self.layers] if keras_2 else [ - model.constraints for model in self.layers] - self.train_function = self.adversarial_optimizer.make_train_function(inputs, outputs, - [model.total_loss for model in - self.layers], - self.player_params, - self.optimizers, - constraints, - self.updates, - self._function_kwargs) - - def _make_test_function(self): - if not hasattr(self, 'test_function'): - raise Exception('You must compile your model before using it.') - if self.test_function is None: - inputs = self.inputs + self.targets + self.sample_weights - if self.uses_learning_phase and not isinstance(K.learning_phase(), int): - inputs += [K.learning_phase()] - outputs = [self.total_loss] - outputs += list(itertools.chain.from_iterable( - [model.total_loss] + model.metrics_tensors - for model in self.layers)) - self.test_function = K.function(inputs, - outputs, - updates=self.state_updates, - **self._function_kwargs) -from abc import ABCMeta, abstractmethod - -import keras.backend as K - -from .legacy import get_updates - - -class AdversarialOptimizer(object): - __metaclass__ = ABCMeta - - @abstractmethod - def make_train_function(self, inputs, outputs, losses, params, optimizers, constraints, model_updates, - function_kwargs): - """ - Construct function that updates weights and returns losses. - :param inputs: function inputs - :param outputs: function outputs - :param losses: player losses - :param params: player parameters - :param optimizers: player optimizers - :param constraints: player constraints - :param function_kwargs: function kwargs - :return: - """ - pass - - -class AdversarialOptimizerSimultaneous(object): - """ - Perform simultaneous updates for each player in the game. - """ - - def make_train_function(self, inputs, outputs, losses, params, optimizers, constraints, model_updates, - function_kwargs): - return K.function(inputs, - outputs, - updates=self.call( - losses, params, optimizers, constraints) + model_updates, - **function_kwargs) - - def call(self, losses, params, optimizers, constraints): - updates = [] - for loss, param, optimizer, constraint in zip(losses, params, optimizers, constraints): - updates += optimizer.get_updates(param, constraint, loss) - return updates - - -class AdversarialOptimizerAlternating(object): - """ - Perform round-robin updates for each player in the game. Each player takes a turn. - Take each batch and run that batch through each of the models. All models are trained on each batch. - """ - - def __init__(self, reverse=False): - """ - Initialize optimizer. - :param reverse: players take turns in reverse order - """ - self.reverse = reverse - - def make_train_function(self, inputs, outputs, losses, params, optimizers, constraints, model_updates, - function_kwargs): - funcs = [] - for loss, param, optimizer, constraint in zip(losses, params, optimizers, constraints): - updates = optimizer.get_updates(param, constraint, loss) - funcs.append(K.function( - inputs, [], updates=updates, **function_kwargs)) - output_func = K.function( - inputs, outputs, updates=model_updates, **function_kwargs) - if self.reverse: - funcs = funcs.reverse() - - def train(_inputs): - # update each player - for func in funcs: - func(_inputs) - # return output - return output_func(_inputs) - - return train - - -class AdversarialOptimizerScheduled(object): - """ - Perform updates according to a schedule. - For example, [0,0,1] will train player 0 on batches 0,1,3,4,6,7... and player 1 on batches 2,5,8... - """ - - def __init__(self, schedule): - """ - Initialize optimizer. - :param schedule: Schedule of updates - """ - assert len(schedule) > 0 - self.schedule = schedule - self.iter = 0 - - def make_train_function(self, inputs, outputs, losses, params, optimizers, constraints, model_updates, - function_kwargs): - funcs = [] - for loss, param, optimizer, constraint in zip(losses, params, optimizers, constraints): - updates = get_updates( - optimizer=optimizer, params=param, constraints=constraint, loss=loss) - funcs.append(K.function(inputs, outputs, - updates=updates + model_updates, **function_kwargs)) - - def train(_inputs): - self.iter += 1 - if self.iter == len(self.schedule): - self.iter = 0 - func = funcs[self.schedule[self.iter]] - return func(_inputs) - - return train -import keras.backend as K -import numpy as np -from keras.layers import Activation, Lambda -from keras.models import Model -from six import iteritems - -from .backend import unpack_assignment, variable_key - - -def build_gan(generator, discriminator, name="gan"): - """ - Build GAN from generator and discriminator - Model is (z, x) -> (yfake, yreal) - :param generator: Model (z -> x) - :param discriminator: Model (x -> y) - :return: GAN model - """ - yfake = Activation("linear", name="yfake")( - discriminator(generator(generator.inputs))) - yreal = Activation("linear", name="yreal")( - discriminator(discriminator.inputs)) - model = Model(generator.inputs + discriminator.inputs, - [yfake, yreal], name=name) - return model - - -def eliminate_z(gan, latent_sampling): - """ - Eliminate z from GAN using latent_sampling - :param gan: model with 2 inputs: z, x - :param latent_sampling: layer that samples z with same batch size as x - :return: Model x -> gan(latent_sampling(x), x) - """ - x = gan.inputs[1] - z = latent_sampling(x) - model = Model(x, fix_names(gan([z, x]), gan.output_names), name=gan.name) - return model - - -def simple_gan(generator, discriminator, latent_sampling): - # build basic gan - gan = build_gan(generator, discriminator) - # generate z on gpu, eliminate one input - if latent_sampling is None: - return gan - else: - return eliminate_z(gan, latent_sampling) - - -def simple_bigan(generator, encoder, discriminator, latent_sampling=None): - """ - Construct BiGRAN x -> yfake, yreal - :param generator: model z->x - :param encoder: model x->z - :param discriminator: model z,x->y (z must be first) - :param latent_sampling: layer for sampling from latent space - :return: - """ - if latent_sampling is None: - zfake = generator.inputs[0] - else: - zfake = latent_sampling(discriminator.inputs[1]) - xreal = discriminator.inputs[1] - xfake = generator(zfake) - zreal = encoder(xreal) - yfake = discriminator([zfake, xfake]) - yreal = discriminator([zreal, xreal]) - if latent_sampling is None: - inputs = [zfake, xreal] - else: - inputs = [xreal] - return Model(inputs, fix_names([yfake, yreal], ["yfake", "yreal"]), name="bigan") - - -def fix_names(outputs, names): - if not isinstance(outputs, list): - outputs = [outputs] - if not isinstance(names, list): - names = [names] - return [Activation('linear', name=name)(output) for output, name in zip(outputs, names)] - - -def gan_targets(n): - """ - Standard training targets - [generator_fake, generator_real, discriminator_fake, discriminator_real] = [1, 0, 0, 1] - :param n: number of samples - :return: array of targets - """ - generator_fake = np.ones((n, 1)) - generator_real = np.zeros((n, 1)) - discriminator_fake = np.zeros((n, 1)) - discriminator_real = np.ones((n, 1)) - return [generator_fake, generator_real, discriminator_fake, discriminator_real] - - -def gan_targets_hinge(n): - """ - Standard training targets for hinge loss - [generator_fake, generator_real, discriminator_fake, discriminator_real] = [1, -1, -1, 1] - :param n: number of samples - :return: array of targets - """ - generator_fake = np.ones((n, 1)) - generator_real = np.ones((n, 1)) * -1 - discriminator_fake = np.ones((n, 1)) * -1 - discriminator_real = np.ones((n, 1)) - return [generator_fake, generator_real, discriminator_fake, discriminator_real] - - -def normal_latent_sampling(latent_shape): - """ - Sample from normal distribution - :param latent_shape: batch shape - :return: normal samples, shape=(n,)+latent_shape - """ - return Lambda(lambda x: K.random_normal((K.shape(x)[0],) + latent_shape), - output_shape=lambda x: ((x[0],) + latent_shape)) - - -def uniform_latent_sampling(latent_shape, low=0.0, high=1.0): - """ - Sample from uniform distribution - :param latent_shape: batch shape - :return: normal samples, shape=(n,)+latent_shape - """ - return Lambda(lambda x: K.random_uniform((K.shape(x)[0],) + latent_shape, low, high), - output_shape=lambda x: ((x[0],) + latent_shape)) - - -def n_choice(x, n): - return x[np.random.choice(x.shape[0], size=n, replace=False)] - - -def merge_updates(updates): - """Average repeated updates of the same variable""" - merged_updates = {} - for update in updates: - variable, value = unpack_assignment(update) - key = variable_key(variable) - if key not in merged_updates: - merged_updates[key] = [variable, []] - merged_updates[key][1].append(value) - ret = [] - for k, v in iteritems(merged_updates): - variable = v[0] - values = v[1] - n = len(values) - if n == 1: - ret.append(K.update(variable, value[0])) - else: - ret.append(K.update(variable, sum(values) / n)) - return ret -import os - -from matplotlib import pyplot as plt, gridspec - - -def write_image_grid(filepath, imgs, figsize=None, cmap='gray'): - directory = os.path.dirname(os.path.abspath(filepath)) - if not os.path.exists(directory): - os.makedirs(directory) - fig = create_image_grid(imgs, figsize, cmap=cmap) - fig.savefig(filepath) - plt.close(fig) - - -def create_image_grid(imgs, figsize=None, cmap='gray'): - n = imgs.shape[0] - m = imgs.shape[1] - if figsize is None: - figsize = (n, m) - fig = plt.figure(figsize=figsize) - gs1 = gridspec.GridSpec(n, m) - gs1.update(wspace=0.025, hspace=0.025) # set the spacing between axes. - for i in range(n): - for j in range(m): - ax = plt.subplot(gs1[i, j]) - img = imgs[i, j, :] - ax.imshow(img, cmap=cmap) - ax.axis('off') - return fig -from keras.callbacks import Callback - -from .image_grid import write_image_grid - - -class ImageGridCallback(Callback): - def __init__(self, image_path, generator, cmap='gray'): - self.image_path = image_path - self.generator = generator - self.cmap = cmap - - def on_epoch_end(self, epoch, logs={}): - xsamples = self.generator() - image_path = self.image_path.format(epoch) - write_image_grid(image_path, xsamples, cmap=self.cmap) -""" -Utility functions to avoid warnings while testing both Keras 1 and 2. -""" -import keras - -keras_2 = int(keras.__version__.split(".")[0]) > 1 # Keras > 1 - - -def fit_generator(model, generator, epochs, steps_per_epoch): - if keras_2: - model.fit_generator(generator, epochs=epochs, - steps_per_epoch=steps_per_epoch) - else: - model.fit_generator(generator, nb_epoch=epochs, - samples_per_epoch=steps_per_epoch) - - -def fit(model, x, y, nb_epoch=10, *args, **kwargs): - if keras_2: - return model.fit(x, y, *args, epochs=nb_epoch, **kwargs) - else: - return model.fit(x, y, *args, nb_epoch=nb_epoch, **kwargs) - - -def l1l2(l1=0, l2=0): - if keras_2: - return keras.regularizers.L1L2(l1, l2) - else: - return keras.regularizers.l1l2(l1, l2) - - -def Dense(units, W_regularizer=None, W_initializer='glorot_uniform', **kwargs): - if keras_2: - return keras.layers.Dense(units, kernel_regularizer=W_regularizer, kernel_initializer=W_initializer, **kwargs) - else: - return keras.layers.Dense(units, W_regularizer=W_regularizer, init=W_initializer, **kwargs) - - -def BatchNormalization(mode=0, **kwargs): - if keras_2: - return keras.layers.BatchNormalization(**kwargs) - else: - return keras.layers.BatchNormalization(mode=mode, **kwargs) - - -def Convolution2D(units, w, h, W_regularizer=None, W_initializer='glorot_uniform', border_mode='same', **kwargs): - if keras_2: - return keras.layers.Convolution2D(units, (w, h), padding=border_mode, kernel_regularizer=W_regularizer, - kernel_initializer=W_initializer, - **kwargs) - else: - return keras.layers.Convolution2D(units, w, h, border_mode=border_mode, W_regularizer=W_regularizer, - init=W_initializer, - **kwargs) - - -def AveragePooling2D(pool_size, border_mode='valid', **kwargs): - if keras_2: - return keras.layers.AveragePooling2D(pool_size=pool_size, padding=border_mode, **kwargs) - else: - return keras.layers.AveragePooling2D(pool_size=pool_size, border_mode=border_mode, **kwargs) - - -def get_updates(optimizer, params, constraints, loss): - if keras_2: - return optimizer.get_updates(params, constraints, loss) - else: - return optimizer.get_updates(params=params, loss=loss) -import keras.backend as K - -from .adversarial_optimizers import AdversarialOptimizerSimultaneous -from .backend import unpack_assignments, clone_replace - - -def unroll(updates, uupdates, depth): - replace = {k: v for k, v in unpack_assignments(uupdates)} - updates_t = unpack_assignments(updates) - for i in range(depth): - updates_t = [(k, clone_replace(v, replace)) for k, v in updates_t] - return [K.update(a, b) for a, b in updates_t] - - -class UnrolledAdversarialOptimizer(AdversarialOptimizerSimultaneous): - def __init__(self, depth_g, depth_d): - """ - :param depth_g: Depth to unroll discriminator when updating generator - :param depth_d: Depth to unroll generator when updating discriminator - """ - self.depth_g = depth_g - self.depth_d = depth_d - - def call(self, losses, params, optimizers, constraints): - # Players should be [generator, discriminator] - assert (len(optimizers) == 2) - - updates = [o.get_updates(p, c, l) for o, p, c, l in zip( - optimizers, params, constraints, losses)] - - gupdates = unroll(updates[0], updates[1], self.depth_g) - dupdates = unroll(updates[1], updates[0], self.depth_d) - - return gupdates + dupdates -import keras.backend as K - -if K.backend() == "tensorflow": - from .tensorflow_backend import unpack_assignment, clone_replace, map_params, variable_key -else: - from .theano_backend import unpack_assignment, clone_replace, map_params, variable_key - - -def unpack_assignments(assignments): - return [unpack_assignment(a) for a in assignments] -import tensorflow as tf -from six import iterkeys -from tensorflow.contrib.graph_editor import select -from tensorflow.contrib.graph_editor import util -from tensorflow.python.framework import ops as tf_ops - - -def unpack_assignment(a): - if isinstance(a, (list, tuple)): - assert (len(a) == 2) - return a - elif isinstance(a, tf.Tensor): - assert (a.op.type in ['Assign', 'AssignAdd', 'AssignSub']) - if a.op.type == 'Assign': - return a.op.inputs[0], a.op.inputs[1] - if a.op.type == 'AssignAdd': - return a.op.inputs[0], a.op.inputs[0] + a.op.inputs[1] - elif a.op.type == 'AssignSub': - return a.op.inputs[0], a.op.inputs[0] - a.op.inputs[1] - else: - raise ValueError("Unsupported operation: {}".format(a.op.type)) - else: - raise ValueError( - "Unsupported assignment object type: {}".format(type(a))) - - -def map_params(params): - return [x.op.outputs[0] for x in params] - - -def clone_replace(f, replace): - flatten_target_ts = util.flatten_tree(f) - graph = util.get_unique_graph( - flatten_target_ts, check_types=(tf_ops.Tensor)) - control_ios = util.ControlOutputs(graph) - ops = select.get_walks_intersection_ops(list(iterkeys(replace)), - flatten_target_ts, - control_ios=control_ios) - if not ops: - # this happens with disconnected inputs - return f - else: - return tf.contrib.graph_editor.graph_replace(f, replace) - - -def variable_key(a): - if hasattr(a, "op"): - return a.op - else: - return a -import keras.backend - -""" -Import this file to monkeypatch tensorflow to lazily convert tuples to tf_assign inside K.function. -Makes unpacking and inspecting updates in tensorflow much cleaner. -""" - - -def update(x, new_x): - return (x, new_x) - - -def update_add(x, increment): - return (x, x + increment) - - -def update_sub(x, decrement): - return (x, x - decrement) - - -def moving_average_update(variable, value, momentum): - return (variable, variable * momentum + value * (1. - momentum)) - - -keras.backend.update = update -keras.backend.update_add = update_add -keras.backend.update_sub = update_sub -keras.backend.moving_average_update = moving_average_update -from theano import clone - - -def unpack_assignment(a): - return a - - -def map_params(params): - return params - - -def clone_replace(f, replace): - return clone(f, replace=replace) - - -def variable_key(a): - return a -import theano -import theano.tensor as T -import numpy as np -import keras.backend as K - -# A test script to validate causal dilated convolutions -dilation = 2 -input = T.fvector() -# (output channels, input channels, filter rows, filter columns). -filters = T.fvector() -input_reshaped = T.reshape(input, (1, -1, 1)) -input_reshaped = K.asymmetric_temporal_padding( - input_reshaped, left_pad=dilation, right_pad=0) -input_reshaped = T.reshape(input_reshaped, (1, 1, -1, 1)) -filters_reshaped = T.reshape(filters, (1, 1, -1, 1)) -out = T.nnet.conv2d(input_reshaped, filters_reshaped, - border_mode='valid', filter_dilation=(dilation, 1)) -out = T.reshape(out, (1, -1, 1)) -out = K.asymmetric_temporal_padding(out, left_pad=dilation, right_pad=0) -out = T.reshape(out, (1, 1, -1, 1)) -out = T.nnet.conv2d(out, filters_reshaped, - border_mode='valid', filter_dilation=(dilation, 1)) -out = T.flatten(out) - -in_input = np.arange(8, dtype='float32') -in_filters = np.array([1, 1], dtype='float32') -f = theano.function([input, filters], out) -print "".join(["%3.0f" % i for i in in_input]) -print "".join(["%3.0f" % i for i in f(in_input, in_filters)]) -""" -""" -from __future__ import division - -import math -import os -import warnings - -import numpy as np -import scipy.io.wavfile -import scipy.signal -from picklable_itertools import cycle -from picklable_itertools.extras import partition_all -from tqdm import tqdm - - -# TODO: make SACRED ingredient. -def one_hot(x): - return np.eye(256, dtype='uint8')[x.astype('uint8')] - - -def fragment_indices(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins): - for seq_i, sequence in enumerate(full_sequences): - # range_values = np.linspace(np.iinfo(sequence.dtype).min, np.iinfo(sequence.dtype).max, nb_output_bins) - # digitized = np.digitize(sequence, range_values).astype('uint8') - for i in range(0, sequence.shape[0] - fragment_length, fragment_stride): - yield seq_i, i - - -def select_generator(set_name, random_train_batches, full_sequences, fragment_length, batch_size, fragment_stride, - nb_output_bins, randomize_batch_order, _rnd): - if random_train_batches and set_name == 'train': - bg = random_batch_generator - else: - bg = batch_generator - return bg(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins, randomize_batch_order, _rnd) - - -def batch_generator(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins, randomize_batch_order, _rnd): - indices = list(fragment_indices(full_sequences, fragment_length, - batch_size, fragment_stride, nb_output_bins)) - if randomize_batch_order: - _rnd.shuffle(indices) - - batches = cycle(partition_all(batch_size, indices)) - for batch in batches: - if len(batch) < batch_size: - continue - yield np.array( - [one_hot(full_sequences[e[0]][e[1]:e[1] + fragment_length]) for e in batch], dtype='uint8'), np.array( - [one_hot(full_sequences[e[0]][e[1] + 1:e[1] + fragment_length + 1]) for e in batch], dtype='uint8') - - -def random_batch_generator(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins, - randomize_batch_order, _rnd): - lengths = [x.shape[0] for x in full_sequences] - nb_sequences = len(full_sequences) - while True: - sequence_indices = _rnd.randint(0, nb_sequences, batch_size) - batch_inputs = [] - batch_outputs = [] - for i, seq_i in enumerate(sequence_indices): - l = lengths[seq_i] - offset = np.squeeze(_rnd.randint(0, l - fragment_length, 1)) - batch_inputs.append( - full_sequences[seq_i][offset:offset + fragment_length]) - batch_outputs.append( - full_sequences[seq_i][offset + 1:offset + fragment_length + 1]) - yield one_hot(np.array(batch_inputs, dtype='uint8')), one_hot(np.array(batch_outputs, dtype='uint8')) - - -def generators(dirname, desired_sample_rate, fragment_length, batch_size, fragment_stride, nb_output_bins, - learn_all_outputs, use_ulaw, randomize_batch_order, _rnd, random_train_batches): - fragment_generators = {} - nb_examples = {} - for set_name in ['train', 'test']: - set_dirname = os.path.join(dirname, set_name) - full_sequences = load_set(desired_sample_rate, set_dirname, use_ulaw) - fragment_generators[set_name] = select_generator(set_name, random_train_batches, full_sequences, - fragment_length, - batch_size, fragment_stride, nb_output_bins, - randomize_batch_order, _rnd) - nb_examples[set_name] = int(sum( - [len(range(0, x.shape[0] - fragment_length, fragment_stride)) for x in - full_sequences]) / batch_size) * batch_size - - return fragment_generators, nb_examples - - -def generators_vctk(dirname, desired_sample_rate, fragment_length, batch_size, fragment_stride, nb_output_bins, - learn_all_outputs, use_ulaw, test_factor, randomize_batch_order, _rnd, random_train_batches): - fragment_generators = {} - nb_examples = {} - speaker_dirs = os.listdir(dirname) - train_full_sequences = [] - test_full_sequences = [] - for speaker_dir in speaker_dirs: - full_sequences = load_set( - desired_sample_rate, os.path.join(dirname, speaker_dir), use_ulaw) - nb_examples_train = int( - math.ceil(len(full_sequences) * (1 - test_factor))) - train_full_sequences.extend(full_sequences[0:nb_examples_train]) - test_full_sequences.extend(full_sequences[nb_examples_train:]) - - for set_name, set_sequences in zip(['train', 'test'], [train_full_sequences, test_full_sequences]): - fragment_generators[set_name] = select_generator(set_name, random_train_batches, full_sequences, - fragment_length, - batch_size, fragment_stride, nb_output_bins, - randomize_batch_order, _rnd) - nb_examples[set_name] = int(sum( - [len(range(0, x.shape[0] - fragment_length, fragment_stride)) for x in - full_sequences]) / batch_size) * batch_size - - return fragment_generators, nb_examples - - -def load_set(desired_sample_rate, set_dirname, use_ulaw): - ulaw_str = '_ulaw' if use_ulaw else '' - cache_fn = os.path.join(set_dirname, 'processed_%d%s.npy' % - (desired_sample_rate, ulaw_str)) - if os.path.isfile(cache_fn): - full_sequences = np.load(cache_fn) - else: - file_names = [fn for fn in os.listdir( - set_dirname) if fn.endswith('.wav')] - full_sequences = [] - for fn in tqdm(file_names): - sequence = process_wav(desired_sample_rate, - os.path.join(set_dirname, fn), use_ulaw) - full_sequences.append(sequence) - np.save(cache_fn, full_sequences) - - return full_sequences - - -def process_wav(desired_sample_rate, filename, use_ulaw): - with warnings.catch_warnings(): - warnings.simplefilter("error") - channels = scipy.io.wavfile.read(filename) - file_sample_rate, audio = channels - audio = ensure_mono(audio) - audio = wav_to_float(audio) - if use_ulaw: - audio = ulaw(audio) - audio = ensure_sample_rate(desired_sample_rate, file_sample_rate, audio) - audio = float_to_uint8(audio) - return audio - - -def ulaw(x, u=255): - x = np.sign(x) * (np.log(1 + u * np.abs(x)) / np.log(1 + u)) - return x - - -def float_to_uint8(x): - x += 1. - x /= 2. - uint8_max_value = np.iinfo('uint8').max - x *= uint8_max_value - x = x.astype('uint8') - return x - - -def wav_to_float(x): - try: - max_value = np.iinfo(x.dtype).max - min_value = np.iinfo(x.dtype).min - except: - max_value = np.finfo(x.dtype).max - min_value = np.iinfo(x.dtype).min - x = x.astype('float64', casting='safe') - x -= min_value - x /= ((max_value - min_value) / 2.) - x -= 1. - return x - - -def ulaw2lin(x, u=255.): - max_value = np.iinfo('uint8').max - min_value = np.iinfo('uint8').min - x = x.astype('float64', casting='safe') - x -= min_value - x /= ((max_value - min_value) / 2.) - x -= 1. - x = np.sign(x) * (1 / u) * (((1 + u) ** np.abs(x)) - 1) - x = float_to_uint8(x) - return x - - -def ensure_sample_rate(desired_sample_rate, file_sample_rate, mono_audio): - if file_sample_rate != desired_sample_rate: - mono_audio = scipy.signal.resample_poly( - mono_audio, desired_sample_rate, file_sample_rate) - return mono_audio - - -def ensure_mono(raw_audio): - """ - Just use first channel. - """ - if raw_audio.ndim == 2: - raw_audio = raw_audio[:, 0] - return raw_audio -from __future__ import absolute_import, division, print_function - -import datetime -import json -import os -import re -import wave - -import keras.backend as K -import numpy as np -import scipy.io.wavfile -import scipy.signal -from keras import layers -from keras import metrics -from keras import objectives -from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, CSVLogger -from keras.engine import Input -from keras.engine import Model -from keras.optimizers import Adam, SGD -from keras.regularizers import l2 -from sacred import Experiment -from sacred.commands import print_config -from tqdm import tqdm -from time import gmtime, strftime -from keras.callbacks import TensorBoard - -import dataset -from wavenet_utils import CausalAtrousConvolution1D, categorical_mean_squared_error - -ex = Experiment('wavenet') - - -@ex.config -def config(): - data_dir = 'data' - data_dir_structure = 'flat' # Or 'vctk' for a speakerdir structure - # For 'vctk' structure, take test_factor amount of sequences for test set. - test_factor = 0.1 - nb_epoch = 1000 - run_dir = None - early_stopping_patience = 20 - desired_sample_rate = 4410 - batch_size = 16 - nb_output_bins = 256 - nb_filters = 256 - dilation_depth = 9 # - nb_stacks = 1 - use_bias = False - use_ulaw = True - res_l2 = 0 - final_l2 = 0 - fragment_length = 128 + \ - compute_receptive_field_( - desired_sample_rate, dilation_depth, nb_stacks)[0] - fragment_stride = 128 - use_skip_connections = True - optimizer = { - 'optimizer': 'sgd', - 'lr': 0.001, - 'momentum': 0.9, - 'decay': 0., - 'nesterov': True, - 'epsilon': None - } - learn_all_outputs = True - random_train_batches = False - randomize_batch_order = True # Only effective if not using random train batches - # float to make targets a gaussian with stdev. - train_with_soft_target_stdev = None - - # The temporal-first outputs are computed from zero-padding. Setting below to True ignores these inputs: - train_only_in_receptive_field = True - - keras_verbose = 1 - debug = False - - -@ex.named_config -def book(): - desired_sample_rate = 4000 - data_dir = 'data_book' - dilation_depth = 8 - nb_stacks = 1 - fragment_length = 2 ** 10 - nb_filters = 256 - batch_size = 16 - fragment_stride = compute_receptive_field_( - desired_sample_rate, dilation_depth, nb_stacks)[0] - - -@ex.named_config -def small(): - desired_sample_rate = 4410 - nb_filters = 16 - dilation_depth = 8 - nb_stacks = 1 - fragment_length = 128 + \ - (compute_receptive_field_( - desired_sample_rate, dilation_depth, nb_stacks)[0]) - fragment_stride = int(desired_sample_rate / 10) - - -@ex.named_config -def soft_targets(): - train_with_soft_target_stdev = 0.5 - # TODO: smooth decay of stdev per epoch. - - -@ex.named_config -def vctkdata(): - assert os.path.isdir(os.path.join('vctk', 'VCTK-Corpus') - ), "Please download vctk by running vctk/download_vctk.sh." - desired_sample_rate = 4000 - data_dir = 'vctk/VCTK-Corpus/wav48' - data_dir_structure = 'vctk' - test_factor = 0.01 - - -@ex.named_config -def vctkmod(desired_sample_rate): - nb_filters = 32 - dilation_depth = 7 - nb_stacks = 4 - fragment_length = 1 + \ - (compute_receptive_field_( - desired_sample_rate, dilation_depth, nb_stacks)[0]) - fragment_stride = int(desired_sample_rate / 10) - random_train_batches = True - - -@ex.named_config -def length32(desired_sample_rate, dilation_depth, nb_stacks): - fragment_length = 32 + \ - (compute_receptive_field_( - desired_sample_rate, dilation_depth, nb_stacks)[0]) - - -@ex.named_config -def adam(): - optimizer = { - 'optimizer': 'adam', - 'lr': 0.001, - 'decay': 0., - 'epsilon': 1e-8 - } - - -@ex.named_config -def adam2(): - optimizer = { - 'optimizer': 'adam', - 'lr': 0.01, - 'decay': 0., - 'epsilon': 1e-10 - } - - -@ex.config -def predict_config(): - predict_seconds = 1 - sample_argmax = False - # Temperature for sampling. > 1.0 for more exploring, < 1.0 for conservative samples. - sample_temperature = 1.0 - # Uses the softmax rather than the argmax as in input for the next step. - predict_use_softmax_as_input = False - predict_initial_input = None - - -@ex.named_config -def batch_run(): - keras_verbose = 2 - - -def skip_out_of_receptive_field(func): - # TODO: consider using keras masking for this? - receptive_field, _ = compute_receptive_field() - - def wrapper(y_true, y_pred): - y_true = y_true[:, receptive_field - 1:, :] - y_pred = y_pred[:, receptive_field - 1:, :] - return func(y_true, y_pred) - - wrapper.__name__ = func.__name__ - - return wrapper - - -def print_t(tensor, label): - tensor.name = label - # tensor = theano.printing.Print(tensor.name, attrs=('__str__', 'shape'))(tensor) - return tensor - - -@ex.capture -def make_soft(y_true, fragment_length, nb_output_bins, train_with_soft_target_stdev, with_prints=False): - receptive_field, _ = compute_receptive_field() - n_outputs = fragment_length - receptive_field + 1 - - # Make a gaussian kernel. - kernel_v = scipy.signal.gaussian(9, std=train_with_soft_target_stdev) - print(kernel_v) - kernel_v = np.reshape(kernel_v, [1, 1, -1, 1]) - kernel = K.variable(kernel_v) - - if with_prints: - y_true = print_t(y_true, 'y_true initial') - - # y_true: [batch, timesteps, input_dim] - # Same filter for all output; combine with batch. - y_true = K.reshape(y_true, (-1, 1, nb_output_bins, 1)) - # y_true: [batch*timesteps, n_channels=1, input_dim, dummy] - y_true = K.conv2d(y_true, kernel, padding='same') - # Same filter for all output; combine with batch. - y_true = K.reshape(y_true, (-1, n_outputs, nb_output_bins)) - # y_true: [batch, timesteps, input_dim] - y_true /= K.sum(y_true, axis=-1, keepdims=True) - - if with_prints: - y_true = print_t(y_true, 'y_true after') - return y_true - - -def make_targets_soft(func): - """Turns one-hot into gaussian distributed.""" - - def wrapper(y_true, y_pred): - y_true = make_soft(y_true) - y_pred = y_pred - return func(y_true, y_pred) - - wrapper.__name__ = func.__name__ - - return wrapper - - -@ex.capture() -def build_model(fragment_length, nb_filters, nb_output_bins, dilation_depth, nb_stacks, use_skip_connections, - learn_all_outputs, _log, desired_sample_rate, use_bias, res_l2, final_l2): - def residual_block(x): - original_x = x - # TODO: initalization, regularization? - # Note: The AtrousConvolution1D with the 'causal' flag is implemented in github.com/basveeling/keras#@wavenet. - tanh_out = CausalAtrousConvolution1D(nb_filters, 2, dilation_rate=2 ** i, padding='valid', causal=True, - use_bias=use_bias, - name='dilated_conv_%d_tanh_s%d' % (2 ** i, s), activation='tanh', - kernel_regularizer=l2(res_l2))(x) - sigm_out = CausalAtrousConvolution1D(nb_filters, 2, dilation_rate=2 ** i, padding='valid', causal=True, - use_bias=use_bias, - name='dilated_conv_%d_sigm_s%d' % (2 ** i, s), activation='sigmoid', - kernel_regularizer=l2(res_l2))(x) - x = layers.Multiply(name='gated_activation_%d_s%d' % - (i, s))([tanh_out, sigm_out]) - - res_x = layers.Convolution1D(nb_filters, 1, padding='same', use_bias=use_bias, - kernel_regularizer=l2(res_l2))(x) - skip_x = layers.Convolution1D(nb_filters, 1, padding='same', use_bias=use_bias, - kernel_regularizer=l2(res_l2))(x) - res_x = layers.Add()([original_x, res_x]) - return res_x, skip_x - - input = Input(shape=(fragment_length, nb_output_bins), name='input_part') - out = input - skip_connections = [] - out = CausalAtrousConvolution1D(nb_filters, 2, - dilation_rate=1, - padding='valid', - causal=True, - name='initial_causal_conv' - )(out) - for s in range(nb_stacks): - for i in range(0, dilation_depth + 1): - out, skip_out = residual_block(out) - skip_connections.append(skip_out) - - if use_skip_connections: - out = layers.Add()(skip_connections) - out = layers.Activation('relu')(out) - out = layers.Convolution1D(nb_output_bins, 1, padding='same', - kernel_regularizer=l2(final_l2))(out) - out = layers.Activation('relu')(out) - out = layers.Convolution1D(nb_output_bins, 1, padding='same')(out) - - if not learn_all_outputs: - raise DeprecationWarning( - 'Learning on just all outputs is wasteful, now learning only inside receptive field.') - out = layers.Lambda(lambda x: x[:, -1, :], output_shape=(out._keras_shape[-1],))( - out) # Based on gif in deepmind blog: take last output? - - out = layers.Activation('softmax', name="output_softmax")(out) - model = Model(input, out) - - receptive_field, receptive_field_ms = compute_receptive_field() - - _log.info('Receptive Field: %d (%dms)' % - (receptive_field, int(receptive_field_ms))) - return model - - -@ex.capture -def compute_receptive_field(desired_sample_rate, dilation_depth, nb_stacks): - return compute_receptive_field_(desired_sample_rate, dilation_depth, nb_stacks) - - -def compute_receptive_field_(desired_sample_rate, dilation_depth, nb_stacks): - receptive_field = nb_stacks * (2 ** dilation_depth * 2) - (nb_stacks - 1) - receptive_field_ms = (receptive_field * 1000) / desired_sample_rate - return receptive_field, receptive_field_ms - - -@ex.capture(prefix='optimizer') -def make_optimizer(optimizer, lr, momentum, decay, nesterov, epsilon): - if optimizer == 'sgd': - optim = SGD(lr, momentum, decay, nesterov) - elif optimizer == 'adam': - optim = Adam(lr=lr, decay=decay, epsilon=epsilon) - else: - raise ValueError( - 'Invalid config for optimizer.optimizer: ' + optimizer) - return optim - - -@ex.command -def predict(desired_sample_rate, fragment_length, _log, seed, _seed, _config, predict_seconds, data_dir, batch_size, - fragment_stride, nb_output_bins, learn_all_outputs, run_dir, predict_use_softmax_as_input, use_ulaw, - predict_initial_input, - **kwargs): - fragment_length = compute_receptive_field()[0] - _config['fragment_length'] = fragment_length - - checkpoint_dir = os.path.join(run_dir, 'checkpoints') - last_checkpoint = sorted(os.listdir(checkpoint_dir))[-1] - epoch = int(re.match(r'checkpoint\.(\d+?)-.*', last_checkpoint).group(1)) - _log.info('Using checkpoint from epoch: %s' % epoch) - - sample_dir = os.path.join(run_dir, 'samples') - if not os.path.exists(sample_dir): - os.mkdir(sample_dir) - - sample_name = make_sample_name(epoch) - sample_filename = os.path.join(sample_dir, sample_name) - - _log.info('Saving to "%s"' % sample_filename) - - sample_stream = make_sample_stream(desired_sample_rate, sample_filename) - - model = build_model() - model.load_weights(os.path.join(checkpoint_dir, last_checkpoint)) - model.summary() - - if predict_initial_input is None: - outputs = list(dataset.one_hot( - np.zeros(fragment_length) + nb_output_bins / 2)) - elif predict_initial_input != '': - _log.info('Taking first %d (%.2fs) from \'%s\' as initial input.' % ( - fragment_length, fragment_length / desired_sample_rate, predict_initial_input)) - wav = dataset.process_wav( - desired_sample_rate, predict_initial_input, use_ulaw) - outputs = list(dataset.one_hot(wav[0:fragment_length])) - else: - _log.info('Taking sample from test dataset as initial input.') - data_generators, _ = get_generators() - outputs = list(data_generators['test'].next()[0][-1]) - - # write_samples(sample_stream, outputs) - warned_repetition = False - for i in tqdm(range(int(desired_sample_rate * predict_seconds))): - if not warned_repetition: - if np.argmax(outputs[-1]) == np.argmax(outputs[-2]) and np.argmax(outputs[-2]) == np.argmax(outputs[-3]): - warned_repetition = True - _log.warning('Last three predicted outputs where %d' % - np.argmax(outputs[-1])) - else: - warned_repetition = False - prediction_seed = np.expand_dims( - np.array(outputs[i:i + fragment_length]), 0) - output = model.predict(prediction_seed) - output_dist = output[0][-1] - output_val = draw_sample(output_dist) - if predict_use_softmax_as_input: - outputs.append(output_dist) - else: - outputs.append(output_val) - write_samples(sample_stream, [output_val]) - - sample_stream.close() - - _log.info("Done!") - - -@ex.capture -def make_sample_name(epoch, predict_seconds, predict_use_softmax_as_input, sample_argmax, sample_temperature, seed): - sample_str = '' - if predict_use_softmax_as_input: - sample_str += '_soft-in' - if sample_argmax: - sample_str += '_argmax' - else: - sample_str += '_sample' - if sample_temperature: - sample_str += '-temp-%s' % sample_temperature - sample_name = 'sample_epoch-%05d_%02ds_%s_seed-%d.wav' % ( - epoch, int(predict_seconds), sample_str, seed) - return sample_name - - -@ex.capture -def write_samples(sample_file, out_val, use_ulaw): - s = np.argmax(out_val, axis=-1).astype('uint8') - # print out_val, - if use_ulaw: - s = dataset.ulaw2lin(s) - # print s, - s = bytearray(list(s)) - # print s[0] - sample_file.writeframes(s) - sample_file._file.flush() - - -@ex.capture -def get_generators(batch_size, data_dir, desired_sample_rate, fragment_length, fragment_stride, learn_all_outputs, - nb_output_bins, use_ulaw, test_factor, data_dir_structure, randomize_batch_order, _rnd, - random_train_batches): - if data_dir_structure == 'flat': - return dataset.generators(data_dir, desired_sample_rate, fragment_length, batch_size, - fragment_stride, nb_output_bins, learn_all_outputs, use_ulaw, randomize_batch_order, - _rnd, random_train_batches) - - elif data_dir_structure == 'vctk': - return dataset.generators_vctk(data_dir, desired_sample_rate, fragment_length, batch_size, - fragment_stride, nb_output_bins, learn_all_outputs, use_ulaw, test_factor, - randomize_batch_order, _rnd, random_train_batches) - else: - raise ValueError( - 'data_dir_structure must be "flat" or "vctk", is %s' % data_dir_structure) - - -@ex.command -def test_make_soft(_log, train_with_soft_target_stdev, _config): - if train_with_soft_target_stdev is None: - _config['train_with_soft_target_stdev'] = 1 - y_true = K.reshape(K.eye(512)[:129, :256], (2, 129, 256)) - y_soft = make_soft(y_true) - f = K.function([], y_soft) - _log.info('Output of soft:') - f1 = f([]) - - _log.info(f1[0, 0]) - _log.info(f1[-1, -1]) - - -@ex.command -def test_preprocess(desired_sample_rate, batch_size, use_ulaw): - sample_dir = os.path.join('preprocess_test') - if not os.path.exists(sample_dir): - os.mkdir(sample_dir) - - ulaw_str = '_ulaw' if use_ulaw else '' - sample_filename = os.path.join(sample_dir, 'test1%s.wav' % ulaw_str) - sample_stream = make_sample_stream(desired_sample_rate, sample_filename) - - data_generators, _ = get_generators() - outputs = data_generators['test'].next()[0][1].astype('uint8') - - write_samples(sample_stream, outputs) - scipy.io.wavfile.write(os.path.join(sample_dir, 'test2%s.wav' % ulaw_str), desired_sample_rate, - np.argmax(outputs, axis=-1).astype('uint8')) - - -def make_sample_stream(desired_sample_rate, sample_filename): - sample_file = wave.open(sample_filename, mode='w') - sample_file.setnchannels(1) - sample_file.setframerate(desired_sample_rate) - sample_file.setsampwidth(1) - return sample_file - - -def softmax(x, temp, mod=np): - x = mod.log(x) / temp - e_x = mod.exp(x - mod.max(x, axis=-1)) - return e_x / mod.sum(e_x, axis=-1) - - -@ex.capture -def draw_sample(output_dist, sample_temperature, sample_argmax, _rnd): - if sample_argmax: - output_dist = np.eye(256)[np.argmax(output_dist, axis=-1)] - else: - if sample_temperature is not None: - output_dist = softmax(output_dist, sample_temperature) - output_dist = output_dist / np.sum(output_dist + 1e-7) - output_dist = _rnd.multinomial(1, output_dist) - return output_dist - - -@ex.automain -def main(run_dir, data_dir, nb_epoch, early_stopping_patience, desired_sample_rate, fragment_length, batch_size, - fragment_stride, nb_output_bins, keras_verbose, _log, seed, _config, debug, learn_all_outputs, - train_only_in_receptive_field, _run, use_ulaw, train_with_soft_target_stdev): - if run_dir is None: - if not os.path.exists("models"): - os.mkdir("models") - run_dir = os.path.join( - 'models', datetime.datetime.now().strftime('run_%Y%m%d_%H%M%S')) - _config['run_dir'] = run_dir - - print_config(_run) - - _log.info('Running with seed %d' % seed) - - if not debug: - if os.path.exists(run_dir): - raise EnvironmentError('Run with seed %d already exists' % seed) - os.mkdir(run_dir) - checkpoint_dir = os.path.join(run_dir, 'checkpoints') - json.dump(_config, open(os.path.join(run_dir, 'config.json'), 'w')) - - _log.info('Loading data...') - data_generators, nb_examples = get_generators() - - _log.info('Building model...') - model = build_model(fragment_length) - _log.info(model.summary()) - - optim = make_optimizer() - _log.info('Compiling Model...') - - loss = objectives.categorical_crossentropy - all_metrics = [ - metrics.categorical_accuracy, - categorical_mean_squared_error - ] - if train_with_soft_target_stdev: - loss = make_targets_soft(loss) - if train_only_in_receptive_field: - loss = skip_out_of_receptive_field(loss) - all_metrics = [skip_out_of_receptive_field(m) for m in all_metrics] - - model.compile(optimizer=optim, loss=loss, metrics=all_metrics) - # TODO: Consider gradient weighting making last outputs more important. - - tictoc = strftime("%a_%d_%b_%Y_%H_%M_%S", gmtime()) - directory_name = tictoc - log_dir = 'wavenet_' + directory_name - os.mkdir(log_dir) - tensorboard = TensorBoard(log_dir=log_dir) - - callbacks = [ - tensorboard, - ReduceLROnPlateau(patience=early_stopping_patience / 2, - cooldown=early_stopping_patience / 4, verbose=1), - EarlyStopping(patience=early_stopping_patience, verbose=1), - ] - if not debug: - callbacks.extend([ - ModelCheckpoint(os.path.join(checkpoint_dir, 'checkpoint.{epoch:05d}-{val_loss:.3f}.hdf5'), - save_best_only=True), - CSVLogger(os.path.join(run_dir, 'history.csv')), - ]) - - if not debug: - os.mkdir(checkpoint_dir) - _log.info('Starting Training...') - - print("nb_examples['train'] {0}".format(nb_examples['train'])) - print("nb_examples['test'] {0}".format(nb_examples['test'])) - - model.fit_generator(data_generators['train'], - steps_per_epoch=nb_examples['train'] // batch_size, - epochs=nb_epoch, - validation_data=data_generators['test'], - validation_steps=nb_examples['test'] // batch_size, - callbacks=callbacks, - verbose=keras_verbose) -import keras.backend as K -from keras.layers.convolutional import Conv1D -from keras.utils.conv_utils import conv_output_length -import tensorflow as tf - - -def asymmetric_temporal_padding(x, left_pad=1, right_pad=1): - '''Pad the middle dimension of a 3D tensor - with "left_pad" zeros left and "right_pad" right. - ''' - pattern = [[0, 0], [left_pad, right_pad], [0, 0]] - return tf.pad(x, pattern) - - -def categorical_mean_squared_error(y_true, y_pred): - """MSE for categorical variables.""" - return K.mean(K.square(K.argmax(y_true, axis=-1) - - K.argmax(y_pred, axis=-1))) - - -class CausalAtrousConvolution1D(Conv1D): - def __init__(self, filters, kernel_size, init='glorot_uniform', activation=None, - padding='valid', strides=1, dilation_rate=1, bias_regularizer=None, - activity_regularizer=None, kernel_constraint=None, bias_constraint=None, use_bias=True, causal=False, **kwargs): - super(CausalAtrousConvolution1D, self).__init__(filters, - kernel_size=kernel_size, - strides=strides, - padding=padding, - dilation_rate=dilation_rate, - activation=activation, - use_bias=use_bias, - kernel_initializer=init, - activity_regularizer=activity_regularizer, - bias_regularizer=bias_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint, - **kwargs) - - self.causal = causal - if self.causal and padding != 'valid': - raise ValueError("Causal mode dictates border_mode=valid.") - - def compute_output_shape(self, input_shape): - input_length = input_shape[1] - - if self.causal: - input_length += self.dilation_rate[0] * (self.kernel_size[0] - 1) - - length = conv_output_length(input_length, - self.kernel_size[0], - self.padding, - self.strides[0], - dilation=self.dilation_rate[0]) - - return (input_shape[0], length, self.filters) - - def call(self, x): - if self.causal: - x = asymmetric_temporal_padding( - x, self.dilation_rate[0] * (self.kernel_size[0] - 1), 0) - return super(CausalAtrousConvolution1D, self).call(x) -from __future__ import print_function - -import os.path - -import densenet -import numpy as np -import sklearn.metrics as metrics - -from keras.datasets import cifar10 -from keras.utils import np_utils -from keras.preprocessing.image import ImageDataGenerator -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau -from keras import backend as K - -batch_size = 100 -nb_classes = 10 -nb_epoch = 300 - -img_rows, img_cols = 32, 32 -img_channels = 3 - -img_dim = (img_channels, img_rows, img_cols) if K.image_dim_ordering( -) == "th" else (img_rows, img_cols, img_channels) -depth = 40 -nb_dense_block = 3 -growth_rate = 12 -nb_filter = -1 -dropout_rate = 0.0 # 0.0 for data augmentation - -model = densenet.DenseNet(img_dim, classes=nb_classes, depth=depth, nb_dense_block=nb_dense_block, - growth_rate=growth_rate, nb_filter=nb_filter, dropout_rate=dropout_rate, weights=None) -print("Model created") - -model.summary() -optimizer = Adam(lr=1e-3) # Using Adam instead of SGD to speed up training -model.compile(loss='categorical_crossentropy', - optimizer=optimizer, metrics=["accuracy"]) -print("Finished compiling") -print("Building model...") - -(trainX, trainY), (testX, testY) = cifar10.load_data() - -trainX = trainX.astype('float32') -testX = testX.astype('float32') - -trainX = densenet.preprocess_input(trainX) -testX = densenet.preprocess_input(testX) - -Y_train = np_utils.to_categorical(trainY, nb_classes) -Y_test = np_utils.to_categorical(testY, nb_classes) - -generator = ImageDataGenerator(rotation_range=15, - width_shift_range=5./32, - height_shift_range=5./32, - horizontal_flip=True) - -generator.fit(trainX, seed=0) - -# Load model -weights_file = "weights/DenseNet-40-12-CIFAR10.h5" -if os.path.exists(weights_file): - #model.load_weights(weights_file, by_name=True) - print("Model loaded.") - -out_dir = "weights/" - -lr_reducer = ReduceLROnPlateau(monitor='val_acc', factor=np.sqrt(0.1), - cooldown=0, patience=5, min_lr=1e-5) -model_checkpoint = ModelCheckpoint(weights_file, monitor="val_acc", save_best_only=True, - save_weights_only=True, verbose=1) - -callbacks = [lr_reducer, model_checkpoint] - -model.fit_generator(generator.flow(trainX, Y_train, batch_size=batch_size), - steps_per_epoch=len(trainX) // batch_size, epochs=nb_epoch, - callbacks=callbacks, - validation_data=(testX, Y_test), - validation_steps=testX.shape[0] // batch_size, verbose=1) - -yPreds = model.predict(testX) -yPred = np.argmax(yPreds, axis=1) -yTrue = testY - -accuracy = metrics.accuracy_score(yTrue, yPred) * 100 -error = 100 - accuracy -print("Accuracy : ", accuracy) -print("Error : ", error) -from __future__ import print_function - -from keras import backend as K -from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping -from keras.optimizers import Adam -from keras.preprocessing.image import ImageDataGenerator -from keras.utils import np_utils -from keras.datasets import cifar100 -import sklearn.metrics as metrics -import numpy as np -import densenet -import sys -sys.setrecursionlimit(10000) - - -batch_size = 64 -nb_classes = 100 -nb_epoch = 15 - -img_rows, img_cols = 32, 32 -img_channels = 3 - -img_dim = (img_channels, img_rows, img_cols) if K.image_dim_ordering( -) == "th" else (img_rows, img_cols, img_channels) -depth = 40 -nb_dense_block = 3 -growth_rate = 12 -nb_filter = 12 -bottleneck = False -reduction = 0.0 -dropout_rate = 0.0 # 0.0 for data augmentation - - -model = densenet.DenseNet(img_dim, classes=nb_classes, depth=depth, nb_dense_block=nb_dense_block, - growth_rate=growth_rate, nb_filter=nb_filter, dropout_rate=dropout_rate, - bottleneck=bottleneck, reduction=reduction, weights=None) -print("Model created") - -model.summary() -optimizer = Adam(lr=1e-4) # Using Adam instead of SGD to speed up training -model.compile(loss='categorical_crossentropy', - optimizer=optimizer, metrics=["accuracy"]) -print("Finished compiling") -print("Building model...") - -(trainX, trainY), (testX, testY) = cifar100.load_data() - -trainX = trainX.astype('float32') -testX = testX.astype('float32') - -trainX /= 255. -testX /= 255. - -Y_train = np_utils.to_categorical(trainY, nb_classes) -Y_test = np_utils.to_categorical(testY, nb_classes) - -generator = ImageDataGenerator(rotation_range=15, - width_shift_range=5./32, - height_shift_range=5./32) - -generator.fit(trainX, seed=0) - -# Load model -# model.load_weights("weights/DenseNet-BC-100-12-CIFAR100.h5") -# print("Model loaded.") - -lr_reducer = ReduceLROnPlateau(monitor='val_loss', factor=np.sqrt(0.1), - cooldown=0, patience=10, min_lr=0.5e-6) -early_stopper = EarlyStopping(monitor='val_acc', min_delta=0.0001, patience=20) -model_checkpoint = ModelCheckpoint("weights/DenseNet-BC-100-12-CIFAR100.h5", monitor="val_acc", save_best_only=True, - save_weights_only=True) - -callbacks = [lr_reducer, early_stopper, model_checkpoint] - - -model.fit_generator(generator.flow(trainX, Y_train, batch_size=batch_size), samples_per_epoch=len(trainX), nb_epoch=nb_epoch, - callbacks=callbacks, - validation_data=(testX, Y_test), - nb_val_samples=testX.shape[0], verbose=1) - -yPreds = model.predict(testX) -yPred = np.argmax(yPreds, axis=1) -yTrue = testY - -accuracy = metrics.accuracy_score(yTrue, yPred) * 100 -error = 100 - accuracy -print("Accuracy : ", accuracy) -print("Error : ", error) -'''DenseNet models for Keras. -# Reference -- [Densely Connected Convolutional Networks](https://arxiv.org/pdf/1608.06993.pdf) -- [The One Hundred Layers Tiramisu: Fully Convolutional DenseNets for Semantic Segmentation](https://arxiv.org/pdf/1611.09326.pdf) -''' -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import warnings - -from keras.models import Model -from keras.layers.core import Dense, Dropout, Activation, Reshape -from keras.layers.convolutional import Conv2D, Conv2DTranspose, UpSampling2D -from keras.layers.pooling import AveragePooling2D, MaxPooling2D -from keras.layers.pooling import GlobalAveragePooling2D -from keras.layers import Input -from keras.layers.merge import concatenate -from keras.layers.normalization import BatchNormalization -from keras.regularizers import l2 -from keras.utils.layer_utils import convert_all_kernels_in_model, convert_dense_weights_data_format -from keras.utils.data_utils import get_file -from keras.engine.topology import get_source_inputs -from keras_applications.imagenet_utils import _obtain_input_shape -from keras_applications.imagenet_utils import decode_predictions -import keras.backend as K - -from subpixel import SubPixelUpscaling - -DENSENET_121_WEIGHTS_PATH = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-121-32.h5' -DENSENET_161_WEIGHTS_PATH = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-161-48.h5' -DENSENET_169_WEIGHTS_PATH = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-169-32.h5' -DENSENET_121_WEIGHTS_PATH_NO_TOP = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-121-32-no-top.h5' -DENSENET_161_WEIGHTS_PATH_NO_TOP = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-161-48-no-top.h5' -DENSENET_169_WEIGHTS_PATH_NO_TOP = r'https://github.com/titu1994/DenseNet/releases/download/v3.0/DenseNet-BC-169-32-no-top.h5' - - -def preprocess_input(x, data_format=None): - """Preprocesses a tensor encoding a batch of images. - - # Arguments - x: input Numpy tensor, 4D. - data_format: data format of the image tensor. - - # Returns - Preprocessed tensor. - """ - if data_format is None: - data_format = K.image_data_format() - assert data_format in {'channels_last', 'channels_first'} - - if data_format == 'channels_first': - if x.ndim == 3: - # 'RGB'->'BGR' - x = x[::-1, ...] - # Zero-center by mean pixel - x[0, :, :] -= 103.939 - x[1, :, :] -= 116.779 - x[2, :, :] -= 123.68 - else: - x = x[:, ::-1, ...] - x[:, 0, :, :] -= 103.939 - x[:, 1, :, :] -= 116.779 - x[:, 2, :, :] -= 123.68 - else: - # 'RGB'->'BGR' - x = x[..., ::-1] - # Zero-center by mean pixel - x[..., 0] -= 103.939 - x[..., 1] -= 116.779 - x[..., 2] -= 123.68 - - x *= 0.017 # scale values - - return x - - -def DenseNet(input_shape=None, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1, nb_layers_per_block=-1, - bottleneck=False, reduction=0.0, dropout_rate=0.0, weight_decay=1e-4, subsample_initial_block=False, - include_top=True, weights=None, input_tensor=None, - classes=10, activation='softmax'): - '''Instantiate the DenseNet architecture, - optionally loading weights pre-trained - on CIFAR-10. Note that when using TensorFlow, - for best performance you should set - `image_data_format='channels_last'` in your Keras config - at ~/.keras/keras.json. - The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering - convention used by the model is the one - specified in your Keras config file. - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(32, 32, 3)` (with `channels_last` dim ordering) - or `(3, 32, 32)` (with `channels_first` dim ordering). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 8. - E.g. `(200, 200, 3)` would be one valid value. - depth: number or layers in the DenseNet - nb_dense_block: number of dense blocks to add to end (generally = 3) - growth_rate: number of filters to add per dense block - nb_filter: initial number of filters. -1 indicates initial - number of filters is 2 * growth_rate - nb_layers_per_block: number of layers in each dense block. - Can be a -1, positive integer or a list. - If -1, calculates nb_layer_per_block from the network depth. - If positive integer, a set number of layers per dense block. - If list, nb_layer is used as provided. Note that list size must - be (nb_dense_block + 1) - bottleneck: flag to add bottleneck blocks in between dense blocks - reduction: reduction factor of transition blocks. - Note : reduction value is inverted to compute compression. - dropout_rate: dropout rate - weight_decay: weight decay rate - subsample_initial_block: Set to True to subsample the initial convolution and - add a MaxPool2D before the dense blocks are added. - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization) or - 'imagenet' (pre-training on ImageNet).. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. - Note that if sigmoid is used, classes must be 1. - # Returns - A Keras model instance. - ''' - - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `cifar10` ' - '(pre-training on CIFAR-10).') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as ImageNet with `include_top`' - ' as true, `classes` should be 1000') - - if activation not in ['softmax', 'sigmoid']: - raise ValueError('activation must be one of "softmax" or "sigmoid"') - - if activation == 'sigmoid' and classes != 1: - raise ValueError( - 'sigmoid activation can only be used when classes = 1') - - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=32, - min_size=8, - data_format=K.image_data_format(), - require_flatten=include_top) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - x = __create_dense_net(classes, img_input, include_top, depth, nb_dense_block, - growth_rate, nb_filter, nb_layers_per_block, bottleneck, reduction, - dropout_rate, weight_decay, subsample_initial_block, activation) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = Model(inputs, x, name='densenet') - - # load weights - if weights == 'imagenet': - weights_loaded = False - - if (depth == 121) and (nb_dense_block == 4) and (growth_rate == 32) and (nb_filter == 64) and \ - (bottleneck is True) and (reduction == 0.5) and (dropout_rate == 0.0) and (subsample_initial_block): - if include_top: - weights_path = get_file('DenseNet-BC-121-32.h5', - DENSENET_121_WEIGHTS_PATH, - cache_subdir='models', - md5_hash='a439dd41aa672aef6daba4ee1fd54abd') - else: - weights_path = get_file('DenseNet-BC-121-32-no-top.h5', - DENSENET_121_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='55e62a6358af8a0af0eedf399b5aea99') - model.load_weights(weights_path) - weights_loaded = True - - if (depth == 161) and (nb_dense_block == 4) and (growth_rate == 48) and (nb_filter == 96) and \ - (bottleneck is True) and (reduction == 0.5) and (dropout_rate == 0.0) and (subsample_initial_block): - if include_top: - weights_path = get_file('DenseNet-BC-161-48.h5', - DENSENET_161_WEIGHTS_PATH, - cache_subdir='models', - md5_hash='6c326cf4fbdb57d31eff04333a23fcca') - else: - weights_path = get_file('DenseNet-BC-161-48-no-top.h5', - DENSENET_161_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='1a9476b79f6b7673acaa2769e6427b92') - model.load_weights(weights_path) - weights_loaded = True - - if (depth == 169) and (nb_dense_block == 4) and (growth_rate == 32) and (nb_filter == 64) and \ - (bottleneck is True) and (reduction == 0.5) and (dropout_rate == 0.0) and (subsample_initial_block): - if include_top: - weights_path = get_file('DenseNet-BC-169-32.h5', - DENSENET_169_WEIGHTS_PATH, - cache_subdir='models', - md5_hash='914869c361303d2e39dec640b4e606a6') - else: - weights_path = get_file('DenseNet-BC-169-32-no-top.h5', - DENSENET_169_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='89c19e8276cfd10585d5fadc1df6859e') - model.load_weights(weights_path) - weights_loaded = True - - if weights_loaded: - if K.backend() == 'theano': - convert_all_kernels_in_model(model) - - if K.image_data_format() == 'channels_first' and K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image data format convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - - print("Weights for the model were loaded successfully") - - return model - - -def DenseNetFCN(input_shape, nb_dense_block=5, growth_rate=16, nb_layers_per_block=4, - reduction=0.0, dropout_rate=0.0, weight_decay=1e-4, init_conv_filters=48, - include_top=True, weights=None, input_tensor=None, classes=1, activation='softmax', - upsampling_conv=128, upsampling_type='deconv'): - '''Instantiate the DenseNet FCN architecture. - Note that when using TensorFlow, - for best performance you should set - `image_data_format='channels_last'` in your Keras config - at ~/.keras/keras.json. - # Arguments - nb_dense_block: number of dense blocks to add to end (generally = 3) - growth_rate: number of filters to add per dense block - nb_layers_per_block: number of layers in each dense block. - Can be a positive integer or a list. - If positive integer, a set number of layers per dense block. - If list, nb_layer is used as provided. Note that list size must - be (nb_dense_block + 1) - reduction: reduction factor of transition blocks. - Note : reduction value is inverted to compute compression. - dropout_rate: dropout rate - init_conv_filters: number of layers in the initial convolution layer - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization) or - 'cifar10' (pre-training on CIFAR-10).. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(32, 32, 3)` (with `channels_last` dim ordering) - or `(3, 32, 32)` (with `channels_first` dim ordering). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 8. - E.g. `(200, 200, 3)` would be one valid value. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. - Note that if sigmoid is used, classes must be 1. - upsampling_conv: number of convolutional layers in upsampling via subpixel convolution - upsampling_type: Can be one of 'upsampling', 'deconv' and - 'subpixel'. Defines type of upsampling algorithm used. - batchsize: Fixed batch size. This is a temporary requirement for - computation of output shape in the case of Deconvolution2D layers. - Parameter will be removed in next iteration of Keras, which infers - output shape of deconvolution layers automatically. - # Returns - A Keras model instance. - ''' - - if weights not in {None}: - raise ValueError('The `weights` argument should be ' - '`None` (random initialization) as no ' - 'model weights are provided.') - - upsampling_type = upsampling_type.lower() - - if upsampling_type not in ['upsampling', 'deconv', 'subpixel']: - raise ValueError('Parameter "upsampling_type" must be one of "upsampling", ' - '"deconv" or "subpixel".') - - if input_shape is None: - raise ValueError( - 'For fully convolutional models, input shape must be supplied.') - - if type(nb_layers_per_block) is not list and nb_dense_block < 1: - raise ValueError('Number of dense layers per block must be greater than 1. Argument ' - 'value was %d.' % (nb_layers_per_block)) - - if activation not in ['softmax', 'sigmoid']: - raise ValueError('activation must be one of "softmax" or "sigmoid"') - - if activation == 'sigmoid' and classes != 1: - raise ValueError( - 'sigmoid activation can only be used when classes = 1') - - # Determine proper input shape - min_size = 2 ** nb_dense_block - - if K.image_data_format() == 'channels_first': - if input_shape is not None: - if ((input_shape[1] is not None and input_shape[1] < min_size) or - (input_shape[2] is not None and input_shape[2] < min_size)): - raise ValueError('Input size must be at least ' + - str(min_size) + 'x' + str(min_size) + ', got ' - '`input_shape=' + str(input_shape) + '`') - else: - input_shape = (classes, None, None) - else: - if input_shape is not None: - if ((input_shape[0] is not None and input_shape[0] < min_size) or - (input_shape[1] is not None and input_shape[1] < min_size)): - raise ValueError('Input size must be at least ' + - str(min_size) + 'x' + str(min_size) + ', got ' - '`input_shape=' + str(input_shape) + '`') - else: - input_shape = (None, None, classes) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - x = __create_fcn_dense_net(classes, img_input, include_top, nb_dense_block, - growth_rate, reduction, dropout_rate, weight_decay, - nb_layers_per_block, upsampling_conv, upsampling_type, - init_conv_filters, input_shape, activation) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = Model(inputs, x, name='fcn-densenet') - - return model - - -def DenseNetImageNet121(input_shape=None, - bottleneck=True, - reduction=0.5, - dropout_rate=0.0, - weight_decay=1e-4, - include_top=True, - weights='imagenet', - input_tensor=None, - classes=1000, - activation='softmax'): - return DenseNet(input_shape, depth=121, nb_dense_block=4, growth_rate=32, nb_filter=64, - nb_layers_per_block=[6, 12, 24, 16], bottleneck=bottleneck, reduction=reduction, - dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, - include_top=include_top, weights=weights, input_tensor=input_tensor, - classes=classes, activation=activation) - - -def DenseNetImageNet169(input_shape=None, - bottleneck=True, - reduction=0.5, - dropout_rate=0.0, - weight_decay=1e-4, - include_top=True, - weights='imagenet', - input_tensor=None, - classes=1000, - activation='softmax'): - return DenseNet(input_shape, depth=169, nb_dense_block=4, growth_rate=32, nb_filter=64, - nb_layers_per_block=[6, 12, 32, 32], bottleneck=bottleneck, reduction=reduction, - dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, - include_top=include_top, weights=weights, input_tensor=input_tensor, - classes=classes, activation=activation) - - -def DenseNetImageNet201(input_shape=None, - bottleneck=True, - reduction=0.5, - dropout_rate=0.0, - weight_decay=1e-4, - include_top=True, - weights=None, - input_tensor=None, - classes=1000, - activation='softmax'): - return DenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, nb_filter=64, - nb_layers_per_block=[6, 12, 48, 32], bottleneck=bottleneck, reduction=reduction, - dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, - include_top=include_top, weights=weights, input_tensor=input_tensor, - classes=classes, activation=activation) - - -def DenseNetImageNet264(input_shape=None, - bottleneck=True, - reduction=0.5, - dropout_rate=0.0, - weight_decay=1e-4, - include_top=True, - weights=None, - input_tensor=None, - classes=1000, - activation='softmax'): - return DenseNet(input_shape, depth=201, nb_dense_block=4, growth_rate=32, nb_filter=64, - nb_layers_per_block=[6, 12, 64, 48], bottleneck=bottleneck, reduction=reduction, - dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, - include_top=include_top, weights=weights, input_tensor=input_tensor, - classes=classes, activation=activation) - - -def DenseNetImageNet161(input_shape=None, - bottleneck=True, - reduction=0.5, - dropout_rate=0.0, - weight_decay=1e-4, - include_top=True, - weights='imagenet', - input_tensor=None, - classes=1000, - activation='softmax'): - return DenseNet(input_shape, depth=161, nb_dense_block=4, growth_rate=48, nb_filter=96, - nb_layers_per_block=[6, 12, 36, 24], bottleneck=bottleneck, reduction=reduction, - dropout_rate=dropout_rate, weight_decay=weight_decay, subsample_initial_block=True, - include_top=include_top, weights=weights, input_tensor=input_tensor, - classes=classes, activation=activation) - - -def __conv_block(ip, nb_filter, bottleneck=False, dropout_rate=None, weight_decay=1e-4): - ''' Apply BatchNorm, Relu, 3x3 Conv2D, optional bottleneck block and dropout - Args: - ip: Input keras tensor - nb_filter: number of filters - bottleneck: add bottleneck block - dropout_rate: dropout rate - weight_decay: weight decay factor - Returns: keras tensor with batch_norm, relu and convolution2d added (optional bottleneck) - ''' - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) - x = Activation('relu')(x) - - if bottleneck: - # Obtained from https://github.com/liuzhuang13/DenseNet/blob/master/densenet.lua - inter_channel = nb_filter * 4 - - x = Conv2D(inter_channel, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) - x = Activation('relu')(x) - - x = Conv2D(nb_filter, (3, 3), kernel_initializer='he_normal', - padding='same', use_bias=False)(x) - if dropout_rate: - x = Dropout(dropout_rate)(x) - - return x - - -def __dense_block(x, nb_layers, nb_filter, growth_rate, bottleneck=False, dropout_rate=None, weight_decay=1e-4, - grow_nb_filters=True, return_concat_list=False): - ''' Build a dense_block where the output of each conv_block is fed to subsequent ones - Args: - x: keras tensor - nb_layers: the number of layers of conv_block to append to the model. - nb_filter: number of filters - growth_rate: growth rate - bottleneck: bottleneck block - dropout_rate: dropout rate - weight_decay: weight decay factor - grow_nb_filters: flag to decide to allow number of filters to grow - return_concat_list: return the list of feature maps along with the actual output - Returns: keras tensor with nb_layers of conv_block appended - ''' - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - x_list = [x] - - for i in range(nb_layers): - cb = __conv_block(x, growth_rate, bottleneck, - dropout_rate, weight_decay) - x_list.append(cb) - - x = concatenate([x, cb], axis=concat_axis) - - if grow_nb_filters: - nb_filter += growth_rate - - if return_concat_list: - return x, nb_filter, x_list - else: - return x, nb_filter - - -def __transition_block(ip, nb_filter, compression=1.0, weight_decay=1e-4): - ''' Apply BatchNorm, Relu 1x1, Conv2D, optional compression, dropout and Maxpooling2D - Args: - ip: keras tensor - nb_filter: number of filters - compression: calculated as 1 - reduction. Reduces the number of feature maps - in the transition block. - dropout_rate: dropout rate - weight_decay: weight decay factor - Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool - ''' - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(ip) - x = Activation('relu')(x) - x = Conv2D(int(nb_filter * compression), (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay))(x) - x = AveragePooling2D((2, 2), strides=(2, 2))(x) - - return x - - -def __transition_up_block(ip, nb_filters, type='deconv', weight_decay=1E-4): - ''' SubpixelConvolutional Upscaling (factor = 2) - Args: - ip: keras tensor - nb_filters: number of layers - type: can be 'upsampling', 'subpixel', 'deconv'. Determines type of upsampling performed - weight_decay: weight decay factor - Returns: keras tensor, after applying upsampling operation. - ''' - - if type == 'upsampling': - x = UpSampling2D()(ip) - elif type == 'subpixel': - x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay), - use_bias=False, kernel_initializer='he_normal')(ip) - x = SubPixelUpscaling(scale_factor=2)(x) - x = Conv2D(nb_filters, (3, 3), activation='relu', padding='same', kernel_regularizer=l2(weight_decay), - use_bias=False, kernel_initializer='he_normal')(x) - else: - x = Conv2DTranspose(nb_filters, (3, 3), activation='relu', padding='same', strides=(2, 2), - kernel_initializer='he_normal', kernel_regularizer=l2(weight_decay))(ip) - - return x - - -def __create_dense_net(nb_classes, img_input, include_top, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=-1, - nb_layers_per_block=-1, bottleneck=False, reduction=0.0, dropout_rate=None, weight_decay=1e-4, - subsample_initial_block=False, activation='softmax'): - ''' Build the DenseNet model - Args: - nb_classes: number of classes - img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) - include_top: flag to include the final Dense layer - depth: number or layers - nb_dense_block: number of dense blocks to add to end (generally = 3) - growth_rate: number of filters to add per dense block - nb_filter: initial number of filters. Default -1 indicates initial number of filters is 2 * growth_rate - nb_layers_per_block: number of layers in each dense block. - Can be a -1, positive integer or a list. - If -1, calculates nb_layer_per_block from the depth of the network. - If positive integer, a set number of layers per dense block. - If list, nb_layer is used as provided. Note that list size must - be (nb_dense_block + 1) - bottleneck: add bottleneck blocks - reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression - dropout_rate: dropout rate - weight_decay: weight decay rate - subsample_initial_block: Set to True to subsample the initial convolution and - add a MaxPool2D before the dense blocks are added. - subsample_initial: - activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. - Note that if sigmoid is used, classes must be 1. - Returns: keras tensor with nb_layers of conv_block appended - ''' - - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - if reduction != 0.0: - assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0' - - # layers in each dense block - if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: - nb_layers = list(nb_layers_per_block) # Convert tuple to list - - assert len(nb_layers) == (nb_dense_block), 'If list, nb_layer is used as provided. ' \ - 'Note that list size must be (nb_dense_block)' - final_nb_layer = nb_layers[-1] - nb_layers = nb_layers[:-1] - else: - if nb_layers_per_block == -1: - assert ( - depth - 4) % 3 == 0, 'Depth must be 3 N + 4 if nb_layers_per_block == -1' - count = int((depth - 4) / 3) - - if bottleneck: - count = count // 2 - - nb_layers = [count for _ in range(nb_dense_block)] - final_nb_layer = count - else: - final_nb_layer = nb_layers_per_block - nb_layers = [nb_layers_per_block] * nb_dense_block - - # compute initial nb_filter if -1, else accept users initial nb_filter - if nb_filter <= 0: - nb_filter = 2 * growth_rate - - # compute compression factor - compression = 1.0 - reduction - - # Initial convolution - if subsample_initial_block: - initial_kernel = (7, 7) - initial_strides = (2, 2) - else: - initial_kernel = (3, 3) - initial_strides = (1, 1) - - x = Conv2D(nb_filter, initial_kernel, kernel_initializer='he_normal', padding='same', - strides=initial_strides, use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) - - if subsample_initial_block: - x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) - x = Activation('relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - - # Add dense blocks - for block_idx in range(nb_dense_block - 1): - x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, bottleneck=bottleneck, - dropout_rate=dropout_rate, weight_decay=weight_decay) - # add transition_block - x = __transition_block( - x, nb_filter, compression=compression, weight_decay=weight_decay) - nb_filter = int(nb_filter * compression) - - # The last dense_block does not have a transition_block - x, nb_filter = __dense_block(x, final_nb_layer, nb_filter, growth_rate, bottleneck=bottleneck, - dropout_rate=dropout_rate, weight_decay=weight_decay) - - x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) - x = Activation('relu')(x) - x = GlobalAveragePooling2D()(x) - - if include_top: - x = Dense(nb_classes, activation=activation)(x) - - return x - - -def __create_fcn_dense_net(nb_classes, img_input, include_top, nb_dense_block=5, growth_rate=12, - reduction=0.0, dropout_rate=None, weight_decay=1e-4, - nb_layers_per_block=4, nb_upsampling_conv=128, upsampling_type='upsampling', - init_conv_filters=48, input_shape=None, activation='deconv'): - ''' Build the DenseNet model - Args: - nb_classes: number of classes - img_input: tuple of shape (channels, rows, columns) or (rows, columns, channels) - include_top: flag to include the final Dense layer - nb_dense_block: number of dense blocks to add to end (generally = 3) - growth_rate: number of filters to add per dense block - reduction: reduction factor of transition blocks. Note : reduction value is inverted to compute compression - dropout_rate: dropout rate - weight_decay: weight decay - nb_layers_per_block: number of layers in each dense block. - Can be a positive integer or a list. - If positive integer, a set number of layers per dense block. - If list, nb_layer is used as provided. Note that list size must - be (nb_dense_block + 1) - nb_upsampling_conv: number of convolutional layers in upsampling via subpixel convolution - upsampling_type: Can be one of 'upsampling', 'deconv' and 'subpixel'. Defines - type of upsampling algorithm used. - input_shape: Only used for shape inference in fully convolutional networks. - activation: Type of activation at the top layer. Can be one of 'softmax' or 'sigmoid'. - Note that if sigmoid is used, classes must be 1. - Returns: keras tensor with nb_layers of conv_block appended - ''' - - concat_axis = 1 if K.image_data_format() == 'channels_first' else -1 - - if concat_axis == 1: # channels_first dim ordering - _, rows, cols = input_shape - else: - rows, cols, _ = input_shape - - if reduction != 0.0: - assert reduction <= 1.0 and reduction > 0.0, 'reduction value must lie between 0.0 and 1.0' - - # check if upsampling_conv has minimum number of filters - # minimum is set to 12, as at least 3 color channels are needed for correct upsampling - assert nb_upsampling_conv > 12 and nb_upsampling_conv % 4 == 0, 'Parameter `upsampling_conv` number of channels must ' \ - 'be a positive number divisible by 4 and greater ' \ - 'than 12' - - # layers in each dense block - if type(nb_layers_per_block) is list or type(nb_layers_per_block) is tuple: - nb_layers = list(nb_layers_per_block) # Convert tuple to list - - assert len(nb_layers) == (nb_dense_block + 1), 'If list, nb_layer is used as provided. ' \ - 'Note that list size must be (nb_dense_block + 1)' - - bottleneck_nb_layers = nb_layers[-1] - rev_layers = nb_layers[::-1] - nb_layers.extend(rev_layers[1:]) - else: - bottleneck_nb_layers = nb_layers_per_block - nb_layers = [nb_layers_per_block] * (2 * nb_dense_block + 1) - - # compute compression factor - compression = 1.0 - reduction - - # Initial convolution - x = Conv2D(init_conv_filters, (7, 7), kernel_initializer='he_normal', padding='same', name='initial_conv2D', - use_bias=False, kernel_regularizer=l2(weight_decay))(img_input) - x = BatchNormalization(axis=concat_axis, epsilon=1.1e-5)(x) - x = Activation('relu')(x) - - nb_filter = init_conv_filters - - skip_list = [] - - # Add dense blocks and transition down block - for block_idx in range(nb_dense_block): - x, nb_filter = __dense_block(x, nb_layers[block_idx], nb_filter, growth_rate, dropout_rate=dropout_rate, - weight_decay=weight_decay) - - # Skip connection - skip_list.append(x) - - # add transition_block - x = __transition_block( - x, nb_filter, compression=compression, weight_decay=weight_decay) - - # this is calculated inside transition_down_block - nb_filter = int(nb_filter * compression) - - # The last dense_block does not have a transition_down_block - # return the concatenated feature maps without the concatenation of the input - _, nb_filter, concat_list = __dense_block(x, bottleneck_nb_layers, nb_filter, growth_rate, - dropout_rate=dropout_rate, weight_decay=weight_decay, - return_concat_list=True) - - skip_list = skip_list[::-1] # reverse the skip list - - # Add dense blocks and transition up block - for block_idx in range(nb_dense_block): - n_filters_keep = growth_rate * nb_layers[nb_dense_block + block_idx] - - # upsampling block must upsample only the feature maps (concat_list[1:]), - # not the concatenation of the input with the feature maps (concat_list[0]. - l = concatenate(concat_list[1:], axis=concat_axis) - - t = __transition_up_block( - l, nb_filters=n_filters_keep, type=upsampling_type, weight_decay=weight_decay) - - # concatenate the skip connection with the transition block - x = concatenate([t, skip_list[block_idx]], axis=concat_axis) - - # Dont allow the feature map size to grow in upsampling dense blocks - x_up, nb_filter, concat_list = __dense_block(x, nb_layers[nb_dense_block + block_idx + 1], nb_filter=growth_rate, - growth_rate=growth_rate, dropout_rate=dropout_rate, - weight_decay=weight_decay, return_concat_list=True, - grow_nb_filters=False) - - if include_top: - x = Conv2D(nb_classes, (1, 1), activation='linear', - padding='same', use_bias=False)(x_up) - - if K.image_data_format() == 'channels_first': - channel, row, col = input_shape - else: - row, col, channel = input_shape - - x = Reshape((row * col, nb_classes))(x) - x = Activation(activation)(x) - x = Reshape((row, col, nb_classes))(x) - else: - x = x_up - - return x - - -if __name__ == '__main__': - - from keras.utils.vis_utils import plot_model - #model = DenseNetFCN((32, 32, 3), growth_rate=16, nb_layers_per_block=[4, 5, 7, 10, 12, 15], upsampling_type='deconv') - model = DenseNet((32, 32, 3), depth=100, nb_dense_block=3, - growth_rate=12, bottleneck=True, reduction=0.5, weights=None) - model.summary() - - from keras.callbacks import ModelCheckpoint, TensorBoard - #plot_model(model, 'test.png', show_shapes=True) -from keras.models import Model -from keras.layers.core import Dense, Dropout, Activation -from keras.layers.convolutional import Convolution2D -from keras.layers.pooling import AveragePooling2D -from keras.layers.pooling import GlobalAveragePooling2D -from keras.layers import Input, merge -from keras.layers.normalization import BatchNormalization -from keras.regularizers import l2 -import keras.backend as K - -''' -Based on the implementation here : https://github.com/Lasagne/Recipes/blob/master/papers/densenet/densenet_fast.py -''' - - -def conv_block(ip, nb_filter, dropout_rate=None, weight_decay=1E-4): - ''' Apply BatchNorm, Relu 3x3, Conv2D, optional dropout - - Args: - ip: Input keras tensor - nb_filter: number of filters - dropout_rate: dropout rate - weight_decay: weight decay factor - - Returns: keras tensor with batch_norm, relu and convolution2d added - - ''' - - x = Activation('relu')(ip) - x = Convolution2D(nb_filter, 3, 3, init="he_uniform", border_mode="same", bias=False, - W_regularizer=l2(weight_decay))(x) - if dropout_rate: - x = Dropout(dropout_rate)(x) - - return x - - -def transition_block(ip, nb_filter, dropout_rate=None, weight_decay=1E-4): - ''' Apply BatchNorm, Relu 1x1, Conv2D, optional dropout and Maxpooling2D - - Args: - ip: keras tensor - nb_filter: number of filters - dropout_rate: dropout rate - weight_decay: weight decay factor - - Returns: keras tensor, after applying batch_norm, relu-conv, dropout, maxpool - - ''' - - concat_axis = 1 if K.image_dim_ordering() == "th" else -1 - - x = Convolution2D(nb_filter, 1, 1, init="he_uniform", border_mode="same", bias=False, - W_regularizer=l2(weight_decay))(ip) - if dropout_rate: - x = Dropout(dropout_rate)(x) - x = AveragePooling2D((2, 2), strides=(2, 2))(x) - - x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay), - beta_regularizer=l2(weight_decay))(x) - - return x - - -def dense_block(x, nb_layers, nb_filter, growth_rate, dropout_rate=None, weight_decay=1E-4): - ''' Build a dense_block where the output of each conv_block is fed to subsequent ones - - Args: - x: keras tensor - nb_layers: the number of layers of conv_block to append to the model. - nb_filter: number of filters - growth_rate: growth rate - dropout_rate: dropout rate - weight_decay: weight decay factor - - Returns: keras tensor with nb_layers of conv_block appended - - ''' - - concat_axis = 1 if K.image_dim_ordering() == "th" else -1 - - feature_list = [x] - - for i in range(nb_layers): - x = conv_block(x, growth_rate, dropout_rate, weight_decay) - feature_list.append(x) - x = merge(feature_list, mode='concat', concat_axis=concat_axis) - nb_filter += growth_rate - - return x, nb_filter - - -def create_dense_net(nb_classes, img_dim, depth=40, nb_dense_block=3, growth_rate=12, nb_filter=16, dropout_rate=None, - weight_decay=1E-4, verbose=True): - ''' Build the create_dense_net model - - Args: - nb_classes: number of classes - img_dim: tuple of shape (channels, rows, columns) or (rows, columns, channels) - depth: number or layers - nb_dense_block: number of dense blocks to add to end - growth_rate: number of filters to add - nb_filter: number of filters - dropout_rate: dropout rate - weight_decay: weight decay - - Returns: keras tensor with nb_layers of conv_block appended - - ''' - - model_input = Input(shape=img_dim) - - concat_axis = 1 if K.image_dim_ordering() == "th" else -1 - - assert (depth - 4) % 3 == 0, "Depth must be 3 N + 4" - - # layers in each dense block - nb_layers = int((depth - 4) / 3) - - # Initial convolution - x = Convolution2D(nb_filter, 3, 3, init="he_uniform", border_mode="same", name="initial_conv2D", bias=False, - W_regularizer=l2(weight_decay))(model_input) - - x = BatchNormalization(mode=0, axis=concat_axis, gamma_regularizer=l2(weight_decay), - beta_regularizer=l2(weight_decay))(x) - - # Add dense blocks - for block_idx in range(nb_dense_block - 1): - x, nb_filter = dense_block(x, nb_layers, nb_filter, growth_rate, dropout_rate=dropout_rate, - weight_decay=weight_decay) - # add transition_block - x = transition_block( - x, nb_filter, dropout_rate=dropout_rate, weight_decay=weight_decay) - - # The last dense_block does not have a transition_block - x, nb_filter = dense_block(x, nb_layers, nb_filter, growth_rate, dropout_rate=dropout_rate, - weight_decay=weight_decay) - - x = Activation('relu')(x) - x = GlobalAveragePooling2D()(x) - x = Dense(nb_classes, activation='softmax', W_regularizer=l2( - weight_decay), b_regularizer=l2(weight_decay))(x) - - densenet = Model(input=model_input, output=x, name="create_dense_net") - - if verbose: - print("DenseNet-%d-%d created." % (depth, growth_rate)) - - return densenet -from __future__ import print_function -from __future__ import absolute_import - -from keras.preprocessing import image - -from densenet import DenseNetImageNet121, DenseNetImageNet169, DenseNetImageNet161, preprocess_input, decode_predictions - -import numpy as np - -if __name__ == '__main__': - size = 224 - - model = DenseNetImageNet121(input_shape=(size, size, 3)) - model.summary() - - img_path = 'images/elephant.jpg' - img = image.load_img(img_path, target_size=(size, size)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - - x = preprocess_input(x) - - preds = model.predict(x) - - print('Predicted:', decode_predictions(preds)) -from __future__ import absolute_import - -from keras import backend as K -from keras.engine import Layer -from keras.utils.generic_utils import get_custom_objects -from keras.backend import normalize_data_format - -if K.backend() == 'theano': - import theano_backend as K_BACKEND -else: - import tensorflow_backend as K_BACKEND - - -class SubPixelUpscaling(Layer): - """ Sub-pixel convolutional upscaling layer based on the paper "Real-Time Single Image - and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network" - (https://arxiv.org/abs/1609.05158). - This layer requires a Convolution2D prior to it, having output filters computed according to - the formula : - filters = k * (scale_factor * scale_factor) - where k = a user defined number of filters (generally larger than 32) - scale_factor = the upscaling factor (generally 2) - This layer performs the depth to space operation on the convolution filters, and returns a - tensor with the size as defined below. - # Example : - ```python - # A standard subpixel upscaling block - x = Convolution2D(256, 3, 3, padding='same', activation='relu')(...) - u = SubPixelUpscaling(scale_factor=2)(x) - [Optional] - x = Convolution2D(256, 3, 3, padding='same', activation='relu')(u) - ``` - In practice, it is useful to have a second convolution layer after the - SubPixelUpscaling layer to speed up the learning process. - However, if you are stacking multiple SubPixelUpscaling blocks, it may increase - the number of parameters greatly, so the Convolution layer after SubPixelUpscaling - layer can be removed. - # Arguments - scale_factor: Upscaling factor. - data_format: Can be None, 'channels_first' or 'channels_last'. - # Input shape - 4D tensor with shape: - `(samples, k * (scale_factor * scale_factor) channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, k * (scale_factor * scale_factor) channels)` if data_format='channels_last'. - # Output shape - 4D tensor with shape: - `(samples, k channels, rows * scale_factor, cols * scale_factor))` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows * scale_factor, cols * scale_factor, k channels)` if data_format='channels_last'. - """ - - def __init__(self, scale_factor=2, data_format=None, **kwargs): - super(SubPixelUpscaling, self).__init__(**kwargs) - - self.scale_factor = scale_factor - self.data_format = normalize_data_format(data_format) - - def build(self, input_shape): - pass - - def call(self, x, mask=None): - y = K_BACKEND.depth_to_space(x, self.scale_factor, self.data_format) - return y - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - b, k, r, c = input_shape - return (b, k // (self.scale_factor ** 2), r * self.scale_factor, c * self.scale_factor) - else: - b, r, c, k = input_shape - return (b, r * self.scale_factor, c * self.scale_factor, k // (self.scale_factor ** 2)) - - def get_config(self): - config = {'scale_factor': self.scale_factor, - 'data_format': self.data_format} - base_config = super(SubPixelUpscaling, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -get_custom_objects().update({'SubPixelUpscaling': SubPixelUpscaling}) -import tensorflow as tf - -from keras.backend import tensorflow_backend as KTF -from keras.backend.common import image_data_format - -py_all = all - - -def depth_to_space(input, scale, data_format=None): - ''' Uses phase shift algorithm to convert channels/depth for spatial resolution ''' - if data_format is None: - data_format = image_data_format() - - if data_format == 'channels_first': - data_format = 'NCHW' - else: - data_format = 'NHWC' - - data_format = data_format.lower() - out = tf.depth_to_space(input, scale, data_format=data_format) - return out -from theano import tensor as T - -from keras.backend import theano_backend as KTH -from keras.backend.common import image_data_format -from keras.backend.theano_backend import _preprocess_conv2d_input -from keras.backend.theano_backend import _postprocess_conv2d_output - - -py_all = all - - -def depth_to_space(input, scale, data_format=None): - ''' Uses phase shift algorithm to convert channels/depth for spatial resolution ''' - if data_format is None: - data_format = image_data_format() - data_format = data_format.lower() - input = _preprocess_conv2d_input(input, data_format) - - b, k, row, col = input.shape - out_channels = k // (scale ** 2) - x = T.reshape(input, (b, scale, scale, out_channels, row, col)) - x = T.transpose(x, (0, 3, 4, 1, 5, 2)) - out = T.reshape(x, (b, out_channels, row * scale, col * scale)) - - out = _postprocess_conv2d_output(out, input, None, None, None, data_format) - return out -import shutil -import densenet -import h5py -import os - -# Note : Weights obtained from https://github.com/flyyufelix/DenseNet-Keras -f = h5py.File('densenet121_weights_tf.h5') - -conv_weights = [] -bn_weights = [] - -dense_classifier_weights = None - -for name in f.attrs['layer_names']: - if 'data' in str(name): - continue - - if 'zeropadding' in str(name): - continue - - if 'relu' in str(name): - continue - - if 'prob' in str(name): - continue - - if 'pool' in str(name): - continue - - if 'concat' in str(name): - continue - - if 'fc' in str(name): - v = f[name] - v = [v[attr][:] for attr in v.attrs['weight_names']] - dense_classifier_weights = v - break - - if 'bn' in str(name): - v = f[name] - v_w = [v[attr][:] for attr in v.attrs['weight_names']] - bn_weights.append(v_w) - continue - - if 'scale' in str(name): - v = f[name] - v_w = [v[attr][:] for attr in v.attrs['weight_names']] - bn_weights[-1][0] = v_w[0] - bn_weights[-1][1] = v_w[1] - continue - - v = f[name] - v_w = v[v.attrs['weight_names'][0]][:] - conv_weights.append(v_w) - -count_layers = 1 # for dense matrix -count_layers += len(conv_weights) -count_layers += len(bn_weights) - -print('Copying %d weights. (%d layers)' % (count_layers, count_layers // 2)) - - -model = densenet.DenseNetImageNet121((224, 224, 3), weights=None) - -conv_layer_ids = [] -bn_layer_ids = [] - -for i, layer in enumerate(model.layers): - if layer.__class__.__name__ == 'Input': - continue - - if layer.__class__.__name__ == 'Activation': - continue - - if layer.__class__.__name__ == 'MaxPooling2D': - continue - - if layer.__class__.__name__ == 'AveragePooling2D': - continue - - if layer.__class__.__name__ == 'Concatenate': - continue - - if layer.__class__.__name__ == 'GlobalAveragePooling2D': - continue - - if layer.__class__.__name__ == 'Conv2D': - conv_layer_ids.append(i) - continue - - if layer.__class__.__name__ == 'BatchNormalization': - bn_layer_ids.append(i) - continue - - -count = 0 -for i, weights in enumerate(conv_weights): - conv_idx = conv_layer_ids[i] - model.layers[conv_idx].set_weights([weights]) - count += 1 - -for i, weights in enumerate(bn_weights): - bn_idx = bn_layer_ids[i] - - model.layers[bn_idx].set_weights(weights) - count += 1 - -model.layers[-1].set_weights(dense_classifier_weights) -count += 1 - -print("Sanity check : %d weights loaded" % count) - -model.save_weights('DenseNet-BC-121-32.h5', overwrite=True) - -print("Finished saving weights") - -shutil.copy('DenseNet-BC-121-32.h5', 'DenseNet-BC-121-32-no-top.h5') - -f = h5py.File('DenseNet-BC-121-32-no-top.h5') -layers = f.attrs['layer_names'] -f.attrs['layer_names'] = layers[:-2] - -for layer in layers[-2:]: - del f[layer] - -f.close() - -print("Finished saving no-top weights") -import shutil -import densenet -import h5py -import os - -# Note : Weights obtained from https://github.com/flyyufelix/DenseNet-Keras -f = h5py.File('densenet161_weights_tf.h5') - -conv_weights = [] -bn_weights = [] - -dense_classifier_weights = None - -for name in f.attrs['layer_names']: - if 'data' in str(name): - continue - - if 'zeropadding' in str(name): - continue - - if 'relu' in str(name): - continue - - if 'prob' in str(name): - continue - - if 'pool' in str(name): - continue - - if 'concat' in str(name): - continue - - if 'fc' in str(name): - v = f[name] - v = [v[attr][:] for attr in v.attrs['weight_names']] - dense_classifier_weights = v - break - - if 'bn' in str(name): - v = f[name] - v_w = [v[attr][:] for attr in v.attrs['weight_names']] - bn_weights.append(v_w) - continue - - if 'scale' in str(name): - v = f[name] - v_w = [v[attr][:] for attr in v.attrs['weight_names']] - bn_weights[-1][0] = v_w[0] - bn_weights[-1][1] = v_w[1] - continue - - v = f[name] - v_w = v[v.attrs['weight_names'][0]][:] - conv_weights.append(v_w) - -count_layers = 1 # for dense matrix -count_layers += len(conv_weights) -count_layers += len(bn_weights) - -print('Copying %d weights. (%d layers)' % (count_layers, count_layers // 2)) - - -model = densenet.DenseNetImageNet161((224, 224, 3), weights=None) - -conv_layer_ids = [] -bn_layer_ids = [] - -for i, layer in enumerate(model.layers): - if layer.__class__.__name__ == 'Input': - continue - - if layer.__class__.__name__ == 'Activation': - continue - - if layer.__class__.__name__ == 'MaxPooling2D': - continue - - if layer.__class__.__name__ == 'AveragePooling2D': - continue - - if layer.__class__.__name__ == 'Concatenate': - continue - - if layer.__class__.__name__ == 'GlobalAveragePooling2D': - continue - - if layer.__class__.__name__ == 'Conv2D': - conv_layer_ids.append(i) - continue - - if layer.__class__.__name__ == 'BatchNormalization': - bn_layer_ids.append(i) - continue - - -count = 0 -for i, weights in enumerate(conv_weights): - conv_idx = conv_layer_ids[i] - model.layers[conv_idx].set_weights([weights]) - count += 1 - -for i, weights in enumerate(bn_weights): - bn_idx = bn_layer_ids[i] - - model.layers[bn_idx].set_weights(weights) - count += 1 - -model.layers[-1].set_weights(dense_classifier_weights) -count += 1 - -print("Sanity check : %d weights loaded" % count) - -model.save_weights('DenseNet-BC-161-48.h5', overwrite=True) - -print("Finished saving weights") - -shutil.copy('DenseNet-BC-161-48.h5', 'DenseNet-BC-161-48-no-top.h5') - -f = h5py.File('DenseNet-BC-161-48-no-top.h5') -layers = f.attrs['layer_names'] -f.attrs['layer_names'] = layers[:-2] - -for layer in layers[-2:]: - del f[layer] - -f.close() - -print("Finished saving no-top weights") -import shutil -import densenet -import h5py -import os - -# Note : Weights obtained from https://github.com/flyyufelix/DenseNet-Keras -f = h5py.File('densenet169_weights_tf.h5') - -conv_weights = [] -bn_weights = [] - -dense_classifier_weights = None - -for name in f.attrs['layer_names']: - if 'data' in str(name): - continue - - if 'zeropadding' in str(name): - continue - - if 'relu' in str(name): - continue - - if 'prob' in str(name): - continue - - if 'pool' in str(name): - continue - - if 'concat' in str(name): - continue - - if 'fc' in str(name): - v = f[name] - v = [v[attr][:] for attr in v.attrs['weight_names']] - dense_classifier_weights = v - break - - if 'bn' in str(name): - v = f[name] - v_w = [v[attr][:] for attr in v.attrs['weight_names']] - bn_weights.append(v_w) - continue - - if 'scale' in str(name): - v = f[name] - v_w = [v[attr][:] for attr in v.attrs['weight_names']] - bn_weights[-1][0] = v_w[0] - bn_weights[-1][1] = v_w[1] - continue - - v = f[name] - v_w = v[v.attrs['weight_names'][0]][:] - conv_weights.append(v_w) - -count_layers = 1 # for dense matrix -count_layers += len(conv_weights) -count_layers += len(bn_weights) - -print('Copying %d weights. (%d layers)' % (count_layers, count_layers // 2)) - - -model = densenet.DenseNetImageNet169((224, 224, 3), weights=None) - -conv_layer_ids = [] -bn_layer_ids = [] - -for i, layer in enumerate(model.layers): - if layer.__class__.__name__ == 'Input': - continue - - if layer.__class__.__name__ == 'Activation': - continue - - if layer.__class__.__name__ == 'MaxPooling2D': - continue - - if layer.__class__.__name__ == 'AveragePooling2D': - continue - - if layer.__class__.__name__ == 'Concatenate': - continue - - if layer.__class__.__name__ == 'GlobalAveragePooling2D': - continue - - if layer.__class__.__name__ == 'Conv2D': - conv_layer_ids.append(i) - continue - - if layer.__class__.__name__ == 'BatchNormalization': - bn_layer_ids.append(i) - continue - - -count = 0 -for i, weights in enumerate(conv_weights): - conv_idx = conv_layer_ids[i] - model.layers[conv_idx].set_weights([weights]) - count += 1 - -for i, weights in enumerate(bn_weights): - bn_idx = bn_layer_ids[i] - - model.layers[bn_idx].set_weights(weights) - count += 1 - -model.layers[-1].set_weights(dense_classifier_weights) -count += 1 - -print("Sanity check : %d weights loaded" % count) - -model.save_weights('DenseNet-BC-169-32.h5', overwrite=True) - -print("Finished saving weights") - -shutil.copy('DenseNet-BC-169-32.h5', 'DenseNet-BC-169-32-no-top.h5') - -f = h5py.File('DenseNet-BC-169-32-no-top.h5') -layers = f.attrs['layer_names'] -f.attrs['layer_names'] = layers[:-2] - -for layer in layers[-2:]: - del f[layer] - -f.close() - -print("Finished saving no-top weights") -from __future__ import absolute_import -from setuptools import setup -from setuptools import find_packages - -setup(name='elephas', - version='0.4.2', - description='Deep learning on Spark with Keras', - url='http://github.com/maxpumperla/elephas', - download_url='https://github.com/maxpumperla/elephas/tarball/0.4.2', - author='Max Pumperla', - author_email='max.pumperla@googlemail.com', - install_requires=['cython', 'tensorflow', 'keras', - 'hyperas', 'flask', 'six', 'pyspark'], - extras_require={ - 'java': ['pydl4j>=0.1.3'], - 'tests': ['pytest', 'pytest-pep8', 'pytest-cov', 'mock'] - }, - packages=find_packages(), - license='MIT', - zip_safe=False, - classifiers=[ - 'Development Status :: 3 - Alpha', - 'Intended Audience :: Developers', - 'Environment :: Console', - 'License :: OSI Approved :: Apache Software License', - 'Operating System :: OS Independent', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 3' - ]) -# -*- coding: utf-8 -*- - -from __future__ import print_function -from __future__ import unicode_literals - -import re -import inspect -import os -import shutil - -from elephas import spark_model, ml_model, hyperparam -from elephas.parameter import client, server -from elephas.utils import functional_utils, rdd_utils, serialization -from elephas.ml import adapter as ml_adapter -from elephas.mllib import adapter as mllib_adapter - -import sys -if sys.version[0] == '2': - reload(sys) - sys.setdefaultencoding('utf8') - - -EXCLUDE = { - 'SocketClient', - 'SocketServer' -} - - -PAGES = [ - { - 'page': 'models/spark-model.md', - 'classes': [ - spark_model.SparkModel - ], - 'functions': [ - spark_model.load_spark_model - ], - }, - { - 'page': 'models/spark-mllib-model.md', - 'classes': [ - spark_model.SparkMLlibModel - ], - 'functions': [ - spark_model.load_spark_model - ], - }, - { - 'page': 'models/spark-ml-model.md', - 'classes': [ - ml_model.ElephasEstimator, - ml_model.ElephasTransformer - ], - 'functions': [ - ml_model.load_ml_transformer, - ml_model.load_ml_estimator - ], - }, - { - 'page': 'models/hyper-param-model.md', - 'classes': [ - hyperparam.HyperParamModel - ] - }, - { - 'page': 'parameter/client.md', - 'classes': [ - client.BaseParameterClient, - client.HttpClient - ] - }, - { - 'page': 'parameter/server.md', - 'classes': [ - server.BaseParameterServer, - server.HttpServer - ] - }, - { - 'page': 'utils/functional_utils.md', - 'all_module_functions': [functional_utils], - }, - { - 'page': 'utils/rdd_utils.md', - 'all_module_functions': [rdd_utils], - }, - { - 'page': 'utils/serialization_utils.md', - 'all_module_functions': [serialization], - }, - { - 'page': 'adapters/spark-ml.md', - 'all_module_functions': [ml_adapter], - }, - { - 'page': 'adapters/spark-mllib.md', - 'all_module_functions': [mllib_adapter], - }, -] - -ROOT = 'http://maxpumperla.com/elephas' - - -def get_function_signature(function, method=True): - wrapped = getattr(function, '_original_function', None) - if wrapped is None: - signature = inspect.getargspec(function) - else: - signature = inspect.getargspec(wrapped) - defaults = signature.defaults - if method: - args = signature.args[1:] - else: - args = signature.args - if defaults: - kwargs = zip(args[-len(defaults):], defaults) - args = args[:-len(defaults)] - else: - kwargs = [] - st = '%s.%s(' % (clean_module_name(function.__module__), function.__name__) - - for a in args: - st += str(a) + ', ' - for a, v in kwargs: - if isinstance(v, str): - v = '\'' + v + '\'' - st += str(a) + '=' + str(v) + ', ' - if kwargs or args: - signature = st[:-2] + ')' - else: - signature = st + ')' - return signature - - -def get_class_signature(cls): - try: - class_signature = get_function_signature(cls.__init__) - class_signature = class_signature.replace('__init__', cls.__name__) - except (TypeError, AttributeError): - # in case the class inherits from object and does not - # define __init__ - class_signature = "{clean_module_name}.{cls_name}()".format( - clean_module_name=clean_module_name(cls.__module__), - cls_name=cls.__name__ - ) - return class_signature - - -def clean_module_name(name): - assert name[:8] == 'elephas.', 'Invalid module name: %s' % name - return name - - -def class_to_docs_link(cls): - module_name = clean_module_name(cls.__module__) - module_name = module_name[6:] - link = ROOT + module_name.replace('.', '/') + '#' + cls.__name__.lower() - return link - - -def class_to_source_link(cls): - module_name = clean_module_name(cls.__module__) - path = module_name.replace('.', '/') - path += '.py' - line = inspect.getsourcelines(cls)[-1] - link = ('https://github.com/maxpumperla/' - 'elephas/blob/master/' + path + '#L' + str(line)) - return '[[source]](' + link + ')' - - -def code_snippet(snippet): - result = '```python\n' - result += snippet + '\n' - result += '```\n' - return result - - -def count_leading_spaces(s): - ws = re.search(r'\S', s) - if ws: - return ws.start() - else: - return 0 - - -def process_list_block(docstring, starting_point, leading_spaces, marker): - ending_point = docstring.find('\n\n', starting_point) - block = docstring[starting_point:(None if ending_point == -1 else - ending_point - 1)] - # Place marker for later reinjection. - docstring = docstring.replace(block, marker) - lines = block.split('\n') - # Remove the computed number of leading white spaces from each line. - lines = [re.sub('^' + ' ' * leading_spaces, '', line) for line in lines] - # Usually lines have at least 4 additional leading spaces. - # These have to be removed, but first the list roots have to be detected. - top_level_regex = r'^ ([^\s\\\(]+):(.*)' - top_level_replacement = r'- __\1__:\2' - lines = [re.sub(top_level_regex, top_level_replacement, line) - for line in lines] - # All the other lines get simply the 4 leading space (if present) removed - lines = [re.sub(r'^ ', '', line) for line in lines] - # Fix text lines after lists - indent = 0 - text_block = False - for i in range(len(lines)): - line = lines[i] - spaces = re.search(r'\S', line) - if spaces: - # If it is a list element - if line[spaces.start()] == '-': - indent = spaces.start() + 1 - if text_block: - text_block = False - lines[i] = '\n' + line - elif spaces.start() < indent: - text_block = True - indent = spaces.start() - lines[i] = '\n' + line - else: - text_block = False - indent = 0 - block = '\n'.join(lines) - return docstring, block - - -def process_docstring(docstring): - # First, extract code blocks and process them. - code_blocks = [] - if '```' in docstring: - tmp = docstring[:] - while '```' in tmp: - tmp = tmp[tmp.find('```'):] - index = tmp[3:].find('```') + 6 - snippet = tmp[:index] - # Place marker in docstring for later reinjection. - docstring = docstring.replace( - snippet, '$CODE_BLOCK_%d' % len(code_blocks)) - snippet_lines = snippet.split('\n') - # Remove leading spaces. - num_leading_spaces = snippet_lines[-1].find('`') - snippet_lines = ([snippet_lines[0]] + - [line[num_leading_spaces:] - for line in snippet_lines[1:]]) - # Most code snippets have 3 or 4 more leading spaces - # on inner lines, but not all. Remove them. - inner_lines = snippet_lines[1:-1] - leading_spaces = None - for line in inner_lines: - if not line or line[0] == '\n': - continue - spaces = count_leading_spaces(line) - if leading_spaces is None: - leading_spaces = spaces - if spaces < leading_spaces: - leading_spaces = spaces - if leading_spaces: - snippet_lines = ([snippet_lines[0]] + - [line[leading_spaces:] - for line in snippet_lines[1:-1]] + - [snippet_lines[-1]]) - snippet = '\n'.join(snippet_lines) - code_blocks.append(snippet) - tmp = tmp[index:] - - # Format docstring lists. - section_regex = r'\n( +)# (.*)\n' - section_idx = re.search(section_regex, docstring) - shift = 0 - sections = {} - while section_idx and section_idx.group(2): - anchor = section_idx.group(2) - leading_spaces = len(section_idx.group(1)) - shift += section_idx.end() - marker = '$' + anchor.replace(' ', '_') + '$' - docstring, content = process_list_block(docstring, - shift, - leading_spaces, - marker) - sections[marker] = content - section_idx = re.search(section_regex, docstring[shift:]) - - # Format docstring section titles. - docstring = re.sub(r'\n(\s+)# (.*)\n', - r'\n\1__\2__\n\n', - docstring) - - # Strip all remaining leading spaces. - lines = docstring.split('\n') - docstring = '\n'.join([line.lstrip(' ') for line in lines]) - - # Reinject list blocks. - for marker, content in sections.items(): - docstring = docstring.replace(marker, content) - - # Reinject code blocks. - for i, code_block in enumerate(code_blocks): - docstring = docstring.replace( - '$CODE_BLOCK_%d' % i, code_block) - return docstring - - -print('Cleaning up existing sources directory.') -if os.path.exists('sources'): - shutil.rmtree('sources') - -print('Populating sources directory with templates.') -for subdir, dirs, fnames in os.walk('templates'): - for fname in fnames: - new_subdir = subdir.replace('templates', 'sources') - if not os.path.exists(new_subdir): - os.makedirs(new_subdir) - if fname[-3:] == '.md': - fpath = os.path.join(subdir, fname) - new_fpath = fpath.replace('templates', 'sources') - shutil.copy(fpath, new_fpath) - - -def read_file(path): - with open(path) as f: - return f.read() - - -def collect_class_methods(cls, methods): - if isinstance(methods, (list, tuple)): - return [getattr(cls, m) if isinstance(m, str) else m for m in methods] - methods = [] - for _, method in inspect.getmembers(cls, predicate=inspect.isroutine): - if method.__name__[0] == '_' or method.__name__ in EXCLUDE: - continue - methods.append(method) - return methods - - -def render_function(function, method=True): - subblocks = [] - signature = get_function_signature(function, method=method) - if method: - signature = signature.replace( - clean_module_name(function.__module__) + '.', '') - subblocks.append('### ' + function.__name__ + '\n') - subblocks.append(code_snippet(signature)) - docstring = function.__doc__ - if docstring: - subblocks.append(process_docstring(docstring)) - return '\n\n'.join(subblocks) - - -def read_page_data(page_data, type): - assert type in ['classes', 'functions', 'methods'] - data = page_data.get(type, []) - for module in page_data.get('all_module_{}'.format(type), []): - module_data = [] - for name in dir(module): - if name[0] == '_' or name in EXCLUDE: - continue - module_member = getattr(module, name) - if (inspect.isclass(module_member) and type == 'classes' or - inspect.isfunction(module_member) and type == 'functions'): - instance = module_member - if module.__name__ in instance.__module__: - if instance not in module_data: - module_data.append(instance) - module_data.sort(key=lambda x: id(x)) - data += module_data - return data - - -if __name__ == '__main__': - readme = read_file('../README.md') - index = read_file('templates/index.md') - index = index.replace('{{autogenerated}}', readme[readme.find('##'):]) - with open('sources/index.md', 'w') as f: - f.write(index) - - print('Generating Elephas docs') - for page_data in PAGES: - classes = read_page_data(page_data, 'classes') - - blocks = [] - for element in classes: - if not isinstance(element, (list, tuple)): - element = (element, []) - cls = element[0] - subblocks = [] - signature = get_class_signature(cls) - subblocks.append('' + - class_to_source_link(cls) + '') - if element[1]: - subblocks.append('## ' + cls.__name__ + ' class\n') - else: - subblocks.append('### ' + cls.__name__ + '\n') - subblocks.append(code_snippet(signature)) - docstring = cls.__doc__ - if docstring: - subblocks.append(process_docstring(docstring)) - methods = collect_class_methods(cls, element[1]) - if methods: - subblocks.append('\n---') - subblocks.append('## ' + cls.__name__ + ' methods\n') - subblocks.append('\n---\n'.join( - [render_function(method, method=True) for method in methods])) - blocks.append('\n'.join(subblocks)) - - methods = read_page_data(page_data, 'methods') - - for method in methods: - blocks.append(render_function(method, method=True)) - - functions = read_page_data(page_data, 'functions') - - for function in functions: - blocks.append(render_function(function, method=False)) - - if not blocks: - raise RuntimeError('Found no content for page ' + - page_data['page']) - - mkdown = '\n----\n\n'.join(blocks) - # save module page. - # Either insert content into existing page, - # or create page otherwise - page_name = page_data['page'] - path = os.path.join('sources', page_name) - if os.path.exists(path): - template = read_file(path) - assert '{{autogenerated}}' in template, ('Template found for ' + path + - ' but missing {{autogenerated}}' - ' tag.') - mkdown = template.replace('{{autogenerated}}', mkdown) - print('...inserting autogenerated content into template:', path) - else: - print('...creating new page with autogenerated content:', path) - subdir = os.path.dirname(path) - if not os.path.exists(subdir): - os.makedirs(subdir) - with open(path, 'w') as f: - f.write(mkdown) -from .spark_model import SparkModel -try: - from elephas.java import java_classes, adapter -except: - raise Exception("Warning: java classes couldn't be loaded.") - - -class ParameterAveragingModel(SparkModel): - def __init__(self, java_spark_context, model, num_workers, batch_size, averaging_frequency=5, - num_batches_prefetch=0, collect_stats=False, save_file='temp.h5', *args, **kwargs): - """ParameterAveragingModel - - :param java_spark_context JavaSparkContext, initialized through pyjnius - :param model: compiled Keras model - :param num_workers: number of Spark workers/executors. - :param batch_size: batch size used for model training - :param averaging_frequency: int, after how many batches of training averaging takes place - :param num_batches_prefetch: int, how many batches to pre-fetch, deactivated if 0. - :param collect_stats: boolean, if statistics get collected during training - :param save_file: where to store elephas model temporarily. - """ - SparkModel.__init__(self, model=model, batch_size=batch_size, mode='synchronous', - averaging_frequency=averaging_frequency, num_batches_prefetch=num_batches_prefetch, - num_workers=num_workers, collect_stats=collect_stats, *args, **kwargs) - - self.save(save_file) - model_file = java_classes.File(save_file) - keras_model_type = model.__class__.__name__ - self.java_spark_model = dl4j_import( - java_spark_context, model_file, keras_model_type) - - def fit_rdd(self, data_set_rdd, epochs): - for _ in range(epochs): - self.java_spark_model.fit(data_set_rdd) - - def get_keras_model(self): - model = self.master_network - java_model = self.java_spark_model.getNetwork() - weights = adapter.retrieve_keras_weights(java_model) - model.set_weights(weights) - return model - - -class ParameterSharingModel(SparkModel): - def __init__(self, java_spark_context, model, num_workers, batch_size, - shake_frequency=0, min_threshold=1e-5, update_threshold=1e-3, workers_per_node=-1, - num_batches_prefetch=0, step_delay=50, step_trigger=0.05, threshold_step=1e-5, - collect_stats=False, save_file='temp.h5', *args, **kwargs): - """ParameterSharingModel - - :param java_spark_context JavaSparkContext, initialized through pyjnius - :param model: compiled Keras model - :param num_workers: number of Spark workers/executors. - :param batch_size: batch size used for model training - :param shake_frequency: - :param min_threshold: - :param update_threshold: - :param workers_per_node: - :param num_batches_prefetch: - :param step_delay: - :param step_trigger: - :param threshold_step: - :param collect_stats: - :param save_file: - :param args: - :param kwargs: - """ - SparkModel.__init__(self, model=model, num_workers=num_workers, batch_size=batch_size, mode='asynchronous', - shake_frequency=shake_frequency, min_threshold=min_threshold, - update_threshold=update_threshold, workers_per_node=workers_per_node, - num_batches_prefetch=num_batches_prefetch, step_delay=step_delay, step_trigger=step_trigger, - threshold_step=threshold_step, collect_stats=collect_stats, *args, **kwargs) - - self.save(save_file) - model_file = java_classes.File(save_file) - keras_model_type = model.__class__.__name__ - self.java_spark_model = dl4j_import( - java_spark_context, model_file, keras_model_type) - - def fit_rdd(self, data_set_rdd, epochs): - for _ in range(epochs): - self.java_spark_model.fit(data_set_rdd) - - def get_keras_model(self): - model = self.master_network - java_model = self.java_spark_model.getNetwork() - weights = adapter.retrieve_keras_weights(java_model) - model.set_weights(weights) - return model - - -def dl4j_import(jsc, model_file, keras_model_type): - emi = java_classes.ElephasModelImport - if keras_model_type == "Sequential": - try: - return emi.importElephasSequentialModelAndWeights( - jsc, model_file.absolutePath) - except: - print("Couldn't load Keras model into DL4J") - elif keras_model_type == "Model": - try: - return emi.importElephasModelAndWeights(jsc, model_file.absolutePath) - except: - print("Couldn't load Keras model into DL4J") - else: - raise Exception( - "Keras model not understood, got: {}".format(keras_model_type)) -from __future__ import print_function -from __future__ import absolute_import -from hyperopt import Trials, rand -from hyperas.ensemble import VotingModel -from hyperas.optim import get_hyperopt_model_string, base_minimizer -import numpy as np -from keras.models import model_from_yaml -import six.moves.cPickle as pickle -from six.moves import range -# depend on hyperas, boto etc. is optional - - -class HyperParamModel(object): - """HyperParamModel - - Computes distributed hyper-parameter optimization using Hyperas and - Spark. - """ - - def __init__(self, sc, num_workers=4): - self.spark_context = sc - self.num_workers = num_workers - - def compute_trials(self, model, data, max_evals, notebook_name): - model_string = get_hyperopt_model_string(model=model, data=data, functions=None, notebook_name=notebook_name, - verbose=False, stack=3) - hyperas_worker = HyperasWorker(model_string, max_evals) - dummy_rdd = self.spark_context.parallelize([i for i in range(1, 1000)]) - dummy_rdd = dummy_rdd.repartition(self.num_workers) - trials_list = dummy_rdd.mapPartitions( - hyperas_worker._minimize).collect() - - return trials_list - - def minimize(self, model, data, max_evals, notebook_name=None): - global best_model_yaml, best_model_weights - - trials_list = self.compute_trials( - model, data, max_evals, notebook_name) - - best_val = 1e7 - for trials in trials_list: - for trial in trials: - val = trial.get('result').get('loss') - if val < best_val: - best_val = val - best_model_yaml = trial.get('result').get('model') - best_model_weights = trial.get('result').get('weights') - - best_model = model_from_yaml(best_model_yaml) - best_model.set_weights(pickle.loads(best_model_weights)) - - return best_model - - def best_ensemble(self, nb_ensemble_models, model, data, max_evals, voting='hard', weights=None): - model_list = self.best_models(nb_models=nb_ensemble_models, model=model, - data=data, max_evals=max_evals) - return VotingModel(model_list, voting, weights) - - def best_models(self, nb_models, model, data, max_evals): - trials_list = self.compute_trials(model, data, max_evals) - num_trials = sum(len(trials) for trials in trials_list) - if num_trials < nb_models: - nb_models = len(trials_list) - scores = [] - for trials in trials_list: - scores = scores + [trial.get('result').get('loss') - for trial in trials] - cut_off = sorted(scores, reverse=True)[nb_models - 1] - model_list = [] - for trials in trials_list: - for trial in trials: - if trial.get('result').get('loss') >= cut_off: - model = model_from_yaml(trial.get('result').get('model')) - model.set_weights(pickle.loads( - trial.get('result').get('weights'))) - model_list.append(model) - return model_list - - -class HyperasWorker(object): - """ HyperasWorker - - Executes hyper-parameter search on each worker and returns results. - """ - - def __init__(self, bc_model, bc_max_evals): - self.model_string = bc_model - self.max_evals = bc_max_evals - - def _minimize(self, dummy_iterator): - trials = Trials() - algo = rand.suggest - - elem = next(dummy_iterator) - import random - random.seed(elem) - rand_seed = np.random.randint(elem) - - base_minimizer(model=None, data=None, functions=None, algo=algo, max_evals=self.max_evals, - trials=trials, rseed=rand_seed, full_model_string=self.model_string, notebook_name=None, - verbose=True, stack=3) - yield trials -from __future__ import absolute_import, print_function - -import numpy as np -import copy -import h5py -import json - -from pyspark.ml.param.shared import HasOutputCol, HasFeaturesCol, HasLabelCol -from pyspark import keyword_only -from pyspark.ml import Estimator, Model -from pyspark.sql.types import StringType, DoubleType, StructField - -from keras.models import model_from_yaml -from keras.optimizers import get as get_optimizer - - -from .spark_model import SparkModel -from .utils.rdd_utils import from_vector -from .ml.adapter import df_to_simple_rdd -from .ml.params import * - - -class ElephasEstimator(Estimator, HasCategoricalLabels, HasValidationSplit, HasKerasModelConfig, HasFeaturesCol, - HasLabelCol, HasMode, HasEpochs, HasBatchSize, HasFrequency, HasVerbosity, HasNumberOfClasses, - HasNumberOfWorkers, HasOutputCol, HasLoss, - HasMetrics, HasKerasOptimizerConfig): - """ - SparkML Estimator implementation of an elephas model. This estimator takes all relevant arguments for model - compilation and training. - - Returns a trained model in form of a SparkML Model, which is also a Transformer. - """ - @keyword_only - def __init__(self, **kwargs): - super(ElephasEstimator, self).__init__() - self.set_params(**kwargs) - - def get_config(self): - return {'keras_model_config': self.get_keras_model_config(), - 'mode': self.get_mode(), - 'frequency': self.get_frequency(), - 'num_workers': self.get_num_workers(), - 'categorical': self.get_categorical_labels(), - 'loss': self.get_loss(), - 'metrics': self.get_metrics(), - 'validation_split': self.get_validation_split(), - 'featuresCol': self.getFeaturesCol(), - 'labelCol': self.getLabelCol(), - 'epochs': self.get_epochs(), - 'batch_size': self.get_batch_size(), - 'verbose': self.get_verbosity(), - 'nb_classes': self.get_nb_classes(), - 'outputCol': self.getOutputCol()} - - def save(self, file_name): - f = h5py.File(file_name, mode='w') - - f.attrs['distributed_config'] = json.dumps({ - 'class_name': self.__class__.__name__, - 'config': self.get_config() - }).encode('utf8') - - f.flush() - f.close() - - @keyword_only - def set_params(self, **kwargs): - """Set all provided parameters, otherwise set defaults - """ - return self._set(**kwargs) - - def _fit(self, df): - """Private fit method of the Estimator, which trains the model. - """ - simple_rdd = df_to_simple_rdd(df, categorical=self.get_categorical_labels(), nb_classes=self.get_nb_classes(), - features_col=self.getFeaturesCol(), label_col=self.getLabelCol()) - simple_rdd = simple_rdd.repartition(self.get_num_workers()) - keras_model = model_from_yaml(self.get_keras_model_config()) - metrics = self.get_metrics() - loss = self.get_loss() - optimizer = get_optimizer(self.get_optimizer_config()) - keras_model.compile(loss=loss, optimizer=optimizer, metrics=metrics) - - spark_model = SparkModel(model=keras_model, - mode=self.get_mode(), - frequency=self.get_frequency(), - num_workers=self.get_num_workers()) - spark_model.fit(simple_rdd, - epochs=self.get_epochs(), - batch_size=self.get_batch_size(), - verbose=self.get_verbosity(), - validation_split=self.get_validation_split()) - - model_weights = spark_model.master_network.get_weights() - weights = simple_rdd.ctx.broadcast(model_weights) - return ElephasTransformer(labelCol=self.getLabelCol(), - outputCol='prediction', - keras_model_config=spark_model.master_network.to_yaml(), - weights=weights) - - -def load_ml_estimator(file_name): - f = h5py.File(file_name, mode='r') - elephas_conf = json.loads(f.attrs.get('distributed_config')) - config = elephas_conf.get('config') - return ElephasEstimator(**config) - - -class ElephasTransformer(Model, HasKerasModelConfig, HasLabelCol, HasOutputCol): - """SparkML Transformer implementation. Contains a trained model, - with which new feature data can be transformed into labels. - """ - @keyword_only - def __init__(self, **kwargs): - super(ElephasTransformer, self).__init__() - if "weights" in kwargs.keys(): - # Strip model weights from parameters to init Transformer - self.weights = kwargs.pop('weights') - self.set_params(**kwargs) - - @keyword_only - def set_params(self, **kwargs): - """Set all provided parameters, otherwise set defaults - """ - return self._set(**kwargs) - - def get_config(self): - return {'keras_model_config': self.get_keras_model_config(), - 'labelCol': self.getLabelCol(), - 'outputCol': self.getOutputCol()} - - def save(self, file_name): - f = h5py.File(file_name, mode='w') - - f.attrs['distributed_config'] = json.dumps({ - 'class_name': self.__class__.__name__, - 'config': self.get_config() - }).encode('utf8') - - f.flush() - f.close() - - def get_model(self): - return model_from_yaml(self.get_keras_model_config()) - - def _transform(self, df): - """Private transform method of a Transformer. This serves as batch-prediction method for our purposes. - """ - output_col = self.getOutputCol() - label_col = self.getLabelCol() - new_schema = copy.deepcopy(df.schema) - new_schema.add(StructField(output_col, StringType(), True)) - - rdd = df.rdd.coalesce(1) - features = np.asarray( - rdd.map(lambda x: from_vector(x.features)).collect()) - # Note that we collect, since executing this on the rdd would require model serialization once again - model = model_from_yaml(self.get_keras_model_config()) - model.set_weights(self.weights.value) - predictions = rdd.ctx.parallelize( - model.predict_classes(features)).coalesce(1) - predictions = predictions.map(lambda x: tuple(str(x))) - - results_rdd = rdd.zip(predictions).map(lambda x: x[0] + x[1]) - results_df = df.sql_ctx.createDataFrame(results_rdd, new_schema) - results_df = results_df.withColumn( - output_col, results_df[output_col].cast(DoubleType())) - results_df = results_df.withColumn( - label_col, results_df[label_col].cast(DoubleType())) - - return results_df - - -def load_ml_transformer(file_name): - f = h5py.File(file_name, mode='r') - elephas_conf = json.loads(f.attrs.get('distributed_config')) - config = elephas_conf.get('config') - return ElephasTransformer(**config) -from __future__ import absolute_import -from __future__ import print_function - -import pyspark -import h5py -import json -from keras.optimizers import serialize as serialize_optimizer -from keras.models import load_model - -from .utils import subtract_params -from .utils import lp_to_simple_rdd -from .utils import model_to_dict -from .mllib import to_matrix, from_matrix, to_vector, from_vector -from .worker import AsynchronousSparkWorker, SparkWorker -from .parameter import HttpServer, SocketServer -from .parameter import HttpClient, SocketClient - - -class SparkModel(object): - - def __init__(self, model, mode='asynchronous', frequency='epoch', parameter_server_mode='http', num_workers=None, - custom_objects=None, batch_size=32, port=4000, *args, **kwargs): - """SparkModel - - Base class for distributed training on RDDs. Spark model takes a Keras - model as master network, an optimization scheme, a parallelisation mode - and an averaging frequency. - - :param model: Compiled Keras model - :param mode: String, choose from `asynchronous`, `synchronous` and `hogwild` - :param frequency: String, either `epoch` or `batch` - :param parameter_server_mode: String, either `http` or `socket` - :param num_workers: int, number of workers used for training (defaults to None) - :param custom_objects: Keras custom objects - :param batch_size: batch size used for training and inference - :param port: port used in case of 'http' parameter server mode - """ - - self._master_network = model - if not hasattr(model, "loss"): - raise Exception( - "Compile your Keras model before initializing an Elephas model with it") - metrics = model.metrics - loss = model.loss - optimizer = serialize_optimizer(model.optimizer) - - if custom_objects is None: - custom_objects = {} - if metrics is None: - metrics = ["accuracy"] - self.mode = mode - self.frequency = frequency - self.num_workers = num_workers - self.weights = self._master_network.get_weights() - self.pickled_weights = None - self.master_optimizer = optimizer - self.master_loss = loss - self.master_metrics = metrics - self.custom_objects = custom_objects - self.parameter_server_mode = parameter_server_mode - self.batch_size = batch_size - self.port = port - self.kwargs = kwargs - - self.serialized_model = model_to_dict(model) - if self.mode is not 'synchronous': - if self.parameter_server_mode == 'http': - self.parameter_server = HttpServer( - self.serialized_model, self.mode, self.port) - self.client = HttpClient(self.port) - elif self.parameter_server_mode == 'socket': - self.parameter_server = SocketServer(self.serialized_model) - self.client = SocketClient() - else: - raise ValueError("Parameter server mode has to be either `http` or `socket`, " - "got {}".format(self.parameter_server_mode)) - - @staticmethod - def get_train_config(epochs, batch_size, verbose, validation_split): - return {'epochs': epochs, - 'batch_size': batch_size, - 'verbose': verbose, - 'validation_split': validation_split} - - def get_config(self): - base_config = { - 'parameter_server_mode': self.parameter_server_mode, - 'mode': self.mode, - 'frequency': self.frequency, - 'num_workers': self.num_workers, - 'batch_size': self.batch_size} - config = base_config.copy() - config.update(self.kwargs) - return config - - def save(self, file_name): - model = self._master_network - model.save(file_name) - f = h5py.File(file_name, mode='a') - - f.attrs['distributed_config'] = json.dumps({ - 'class_name': self.__class__.__name__, - 'config': self.get_config() - }).encode('utf8') - - f.flush() - f.close() - - @property - def master_network(self): - return self._master_network - - @master_network.setter - def master_network(self, network): - self._master_network = network - - def start_server(self): - self.parameter_server.start() - - def stop_server(self): - self.parameter_server.stop() - - def predict(self, data): - """Get prediction probabilities for a numpy array of features - """ - return self._master_network.predict(data) - - def predict_classes(self, data): - """ Predict classes for a numpy array of features - """ - return self._master_network.predict_classes(data) - - def fit(self, rdd, epochs=10, batch_size=32, - verbose=0, validation_split=0.1): - """ - Train an elephas model on an RDD. The Keras model configuration as specified - in the elephas model is sent to Spark workers, abd each worker will be trained - on their data partition. - - :param rdd: RDD with features and labels - :param epochs: number of epochs used for training - :param batch_size: batch size used for training - :param verbose: logging verbosity level (0, 1 or 2) - :param validation_split: percentage of data set aside for validation - """ - print('>>> Fit model') - if self.num_workers: - rdd = rdd.repartition(self.num_workers) - - if self.mode in ['asynchronous', 'synchronous', 'hogwild']: - self._fit(rdd, epochs, batch_size, verbose, validation_split) - else: - raise ValueError( - "Choose from one of the modes: asynchronous, synchronous or hogwild") - - def _fit(self, rdd, epochs, batch_size, verbose, validation_split): - """Protected train method to make wrapping of modes easier - """ - self._master_network.compile(optimizer=self.master_optimizer, - loss=self.master_loss, - metrics=self.master_metrics) - if self.mode in ['asynchronous', 'hogwild']: - self.start_server() - train_config = self.get_train_config( - epochs, batch_size, verbose, validation_split) - mode = self.parameter_server_mode - freq = self.frequency - optimizer = self.master_optimizer - loss = self.master_loss - metrics = self.master_metrics - custom = self.custom_objects - - yaml = self._master_network.to_yaml() - init = self._master_network.get_weights() - parameters = rdd.context.broadcast(init) - - if self.mode in ['asynchronous', 'hogwild']: - print('>>> Initialize workers') - worker = AsynchronousSparkWorker( - yaml, parameters, mode, train_config, freq, optimizer, loss, metrics, custom) - print('>>> Distribute load') - rdd.mapPartitions(worker.train).collect() - print('>>> Async training complete.') - new_parameters = self.client.get_parameters() - elif self.mode == 'synchronous': - worker = SparkWorker(yaml, parameters, train_config, - optimizer, loss, metrics, custom) - gradients = rdd.mapPartitions(worker.train).collect() - new_parameters = self._master_network.get_weights() - for grad in gradients: # simply accumulate gradients one by one - new_parameters = subtract_params(new_parameters, grad) - print('>>> Synchronous training complete.') - else: - raise ValueError("Unsupported mode {}".format(self.mode)) - self._master_network.set_weights(new_parameters) - if self.mode in ['asynchronous', 'hogwild']: - self.stop_server() - - -def load_spark_model(file_name): - model = load_model(file_name) - f = h5py.File(file_name, mode='r') - - elephas_conf = json.loads(f.attrs.get('distributed_config')) - class_name = elephas_conf.get('class_name') - config = elephas_conf.get('config') - if class_name == "SparkModel": - return SparkModel(model=model, **config) - elif class_name == "SparkMLlibModel": - return SparkMLlibModel(model=model, **config) - - -class SparkMLlibModel(SparkModel): - - def __init__(self, model, mode='asynchronous', frequency='epoch', parameter_server_mode='http', - num_workers=4, elephas_optimizer=None, custom_objects=None, batch_size=32, port=4000, *args, **kwargs): - """SparkMLlibModel - - The Spark MLlib model takes RDDs of LabeledPoints for training. - - :param model: Compiled Keras model - :param mode: String, choose from `asynchronous`, `synchronous` and `hogwild` - :param frequency: String, either `epoch` or `batch` - :param parameter_server_mode: String, either `http` or `socket` - :param num_workers: int, number of workers used for training (defaults to None) - :param custom_objects: Keras custom objects - :param batch_size: batch size used for training and inference - :param port: port used in case of 'http' parameter server mode - """ - SparkModel.__init__(self, model=model, mode=mode, frequency=frequency, - parameter_server_mode=parameter_server_mode, num_workers=num_workers, - custom_objects=custom_objects, - batch_size=batch_size, port=port, *args, **kwargs) - - def fit(self, labeled_points, epochs=10, batch_size=32, verbose=0, validation_split=0.1, - categorical=False, nb_classes=None): - """Train an elephas model on an RDD of LabeledPoints - """ - rdd = lp_to_simple_rdd(labeled_points, categorical, nb_classes) - rdd = rdd.repartition(self.num_workers) - self._fit(rdd=rdd, epochs=epochs, batch_size=batch_size, - verbose=verbose, validation_split=validation_split) - - def predict(self, mllib_data): - """Predict probabilities for an RDD of features - """ - if isinstance(mllib_data, pyspark.mllib.linalg.Matrix): - return to_matrix(self._master_network.predict(from_matrix(mllib_data))) - elif isinstance(mllib_data, pyspark.mllib.linalg.Vector): - return to_vector(self._master_network.predict(from_vector(mllib_data))) - else: - raise ValueError( - 'Provide either an MLLib matrix or vector, got {}'.format(mllib_data.__name__)) -import numpy as np -from itertools import tee -from keras.utils.generic_utils import slice_arrays -from keras.models import model_from_yaml -from keras.optimizers import get as get_optimizer - -from .utils import subtract_params -from .parameter import SocketClient, HttpClient - - -class SparkWorker(object): - """Synchronous Spark worker. This code will be executed on workers. - """ - - def __init__(self, yaml, parameters, train_config, master_optimizer, - master_loss, master_metrics, custom_objects): - self.yaml = yaml - self.parameters = parameters - self.train_config = train_config - self.master_optimizer = master_optimizer - self.master_loss = master_loss - self.master_metrics = master_metrics - self.custom_objects = custom_objects - self.model = None - - def train(self, data_iterator): - """Train a keras model on a worker - """ - optimizer = get_optimizer(self.master_optimizer) - self.model = model_from_yaml(self.yaml, self.custom_objects) - self.model.compile(optimizer=optimizer, - loss=self.master_loss, metrics=self.master_metrics) - self.model.set_weights(self.parameters.value) - - feature_iterator, label_iterator = tee(data_iterator, 2) - x_train = np.asarray([x for x, y in feature_iterator]) - y_train = np.asarray([y for x, y in label_iterator]) - - self.model.compile(optimizer=self.master_optimizer, - loss=self.master_loss, - metrics=self.master_metrics) - - weights_before_training = self.model.get_weights() - if x_train.shape[0] > self.train_config.get('batch_size'): - self.model.fit(x_train, y_train, **self.train_config) - weights_after_training = self.model.get_weights() - deltas = subtract_params( - weights_before_training, weights_after_training) - yield deltas - - -class AsynchronousSparkWorker(object): - """Asynchronous Spark worker. This code will be executed on workers. - """ - - def __init__(self, yaml, parameters, parameter_server_mode, train_config, frequency, - master_optimizer, master_loss, master_metrics, custom_objects): - - if parameter_server_mode == 'http': - self.client = HttpClient() - elif parameter_server_mode == 'socket': - self.client = SocketClient() - else: - raise ValueError("Parameter server mode has to be either `http` or `socket`, " - "got {}".format(parameter_server_mode)) - - self.train_config = train_config - self.frequency = frequency - self.master_optimizer = master_optimizer - self.master_loss = master_loss - self.master_metrics = master_metrics - self.yaml = yaml - self.parameters = parameters - self.custom_objects = custom_objects - self.model = None - - def train(self, data_iterator): - """Train a keras model on a worker and send asynchronous updates - to parameter server - """ - feature_iterator, label_iterator = tee(data_iterator, 2) - x_train = np.asarray([x for x, y in feature_iterator]) - y_train = np.asarray([y for x, y in label_iterator]) - - if x_train.size == 0: - return - - optimizer = get_optimizer(self.master_optimizer) - self.model = model_from_yaml(self.yaml, self.custom_objects) - self.model.compile(optimizer=optimizer, - loss=self.master_loss, metrics=self.master_metrics) - self.model.set_weights(self.parameters.value) - - epochs = self.train_config['epochs'] - batch_size = self.train_config.get('batch_size') - nb_train_sample = x_train.shape[0] - nb_batch = int(np.ceil(nb_train_sample / float(batch_size))) - index_array = np.arange(nb_train_sample) - batches = [ - (i * batch_size, min(nb_train_sample, (i + 1) * batch_size)) - for i in range(0, nb_batch) - ] - - if self.frequency == 'epoch': - for epoch in range(epochs): - weights_before_training = self.client.get_parameters() - self.model.set_weights(weights_before_training) - self.train_config['epochs'] = 1 - if x_train.shape[0] > batch_size: - self.model.fit(x_train, y_train, **self.train_config) - self.train_config['epochs'] = epochs - weights_after_training = self.model.get_weights() - deltas = subtract_params( - weights_before_training, weights_after_training) - self.client.update_parameters(deltas) - elif self.frequency == 'batch': - for epoch in range(epochs): - if x_train.shape[0] > batch_size: - for (batch_start, batch_end) in batches: - weights_before_training = self.client.get_parameters() - self.model.set_weights(weights_before_training) - batch_ids = index_array[batch_start:batch_end] - x = slice_arrays(x_train, batch_ids) - y = slice_arrays(y_train, batch_ids) - self.model.train_on_batch(x, y) - weights_after_training = self.model.get_weights() - deltas = subtract_params( - weights_before_training, weights_after_training) - self.client.update_parameters(deltas) - else: - raise ValueError( - 'frequency parameter can be `epoch` or `batch, got {}'.format(self.frequency)) - yield [] -from elephas.java import java_classes, adapter -from keras.models import Sequential -from keras.layers import Dense - - -model = Sequential() -model.add(Dense(units=64, activation='relu', input_dim=100)) -model.add(Dense(units=10, activation='softmax')) -model.compile(loss='categorical_crossentropy', - optimizer='sgd', metrics=['accuracy']) - -model.save('test.h5') - - -kmi = java_classes.KerasModelImport -file = java_classes.File("test.h5") - -java_model = kmi.importKerasSequentialModelAndWeights(file.absolutePath) - -weights = adapter.retrieve_keras_weights(java_model) -model.set_weights(weights) -from elephas.java import java_classes -from elephas.dl4j import ParameterAveragingModel -from elephas.utils import rdd_utils -import keras -from keras.utils import np_utils - - -def main(): - # Set Java Spark context - conf = java_classes.SparkConf().setMaster( - 'local[*]').setAppName("elephas_dl4j") - jsc = java_classes.JavaSparkContext(conf) - - # Define Keras model - model = keras.models.Sequential() - model.add(keras.layers.Dense(128, input_dim=784)) - model.add(keras.layers.Dense(units=10, activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer='sgd', metrics=['accuracy']) - - # Define DL4J Elephas model - spark_model = ParameterAveragingModel( - java_spark_context=jsc, model=model, num_workers=4, batch_size=32) - - # Load data and build DL4J DataSet RDD under the hood - (x_train, y_train), (x_test, y_test) = keras.datasets.mnist.load_data() - x_train = x_train.reshape(60000, 784) - x_test = x_test.reshape(10000, 784) - x_train = x_train.astype("float64") - x_test = x_test.astype("float64") - - # Convert class vectors to binary class matrices - y_train = np_utils.to_categorical(y_train, 10) - y_test = np_utils.to_categorical(y_test, 10) - y_train = y_train.astype("float64") - y_test = y_test.astype("float64") - x_train /= 255 - x_test /= 255 - java_rdd = rdd_utils.to_java_rdd(jsc, x_train, y_train, 32) - - import timeit - - start = timeit.default_timer() - # Fit model - spark_model.fit_rdd(java_rdd, 2) - stop = timeit.default_timer() - print('Time: ', stop - start) - - # Retrieve resulting weights from training, set to original Keras model, evaluate. - keras_model = spark_model.get_keras_model() - score = keras_model.evaluate(x_test, y_test, verbose=0) - print('Test loss:', score[0]) - print('Test accuracy:', score[1]) - - import os - if os.path.exists("temp.h5"): - os.remove("temp.h5") - - -if __name__ == '__main__': - main() -from pyspark import SparkContext, SparkConf - -from hyperopt import STATUS_OK -from hyperas.distributions import choice, uniform -import six.moves.cPickle as pickle - -from elephas.hyperparam import HyperParamModel - - -def data(): - """Data providing function: - - Make sure to have every relevant import statement included here and return data as - used in model function below. This function is separated from model() so that hyperopt - won't reload data for each evaluation run. - """ - from keras.datasets import mnist - from keras.utils import np_utils - (x_train, y_train), (x_test, y_test) = mnist.load_data() - x_train = x_train.reshape(60000, 784) - x_test = x_test.reshape(10000, 784) - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train /= 255 - x_test /= 255 - nb_classes = 10 - y_train = np_utils.to_categorical(y_train, nb_classes) - y_test = np_utils.to_categorical(y_test, nb_classes) - return x_train, y_train, x_test, y_test - - -def model(x_train, y_train, x_test, y_test): - """Model providing function: - - Create Keras model with double curly brackets dropped-in as needed. - Return value has to be a valid python dictionary with two customary keys: - - loss: Specify a numeric evaluation metric to be minimized - - status: Just use STATUS_OK and see hyperopt documentation if not feasible - The last one is optional, though recommended, namely: - - model: specify the model just created so that we can later use it again. - """ - from keras.models import Sequential - from keras.layers.core import Dense, Dropout, Activation - from keras.optimizers import RMSprop - - keras_model = Sequential() - keras_model.add(Dense(512, input_shape=(784,))) - keras_model.add(Activation('relu')) - keras_model.add(Dropout({{uniform(0, 1)}})) - keras_model.add(Dense({{choice([256, 512, 1024])}})) - keras_model.add(Activation('relu')) - keras_model.add(Dropout({{uniform(0, 1)}})) - keras_model.add(Dense(10)) - keras_model.add(Activation('softmax')) - - rms = RMSprop() - keras_model.compile(loss='categorical_crossentropy', - optimizer=rms, metrics=['acc']) - - keras_model.fit(x_train, y_train, - batch_size={{choice([64, 128])}}, - epochs=1, - verbose=2, - validation_data=(x_test, y_test)) - score, acc = keras_model.evaluate(x_test, y_test, verbose=0) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': keras_model.to_yaml(), - 'weights': pickle.dumps(keras_model.get_weights())} - - -# Create Spark context -conf = SparkConf().setAppName( - 'Elephas_Hyperparameter_Optimization').setMaster('local[8]') -sc = SparkContext(conf=conf) - -# Define hyper-parameter model and run optimization. -hyperparam_model = HyperParamModel(sc) -hyperparam_model.minimize(model=model, data=data, max_evals=5) -from __future__ import absolute_import -from __future__ import print_function - -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.utils import np_utils -from keras import optimizers - -from elephas.ml_model import ElephasEstimator -from elephas.ml.adapter import to_data_frame - -from pyspark import SparkContext, SparkConf -from pyspark.mllib.evaluation import MulticlassMetrics -from pyspark.ml import Pipeline - - -# Define basic parameters -batch_size = 64 -nb_classes = 10 -epochs = 1 - -# Load data -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -x_train = x_train.reshape(60000, 784) -x_test = x_test.reshape(10000, 784) -x_train = x_train.astype("float32") -x_test = x_test.astype("float32") -x_train /= 255 -x_test /= 255 -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices -y_train = np_utils.to_categorical(y_train, nb_classes) -y_test = np_utils.to_categorical(y_test, nb_classes) - -model = Sequential() -model.add(Dense(128, input_dim=784)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(128)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(10)) -model.add(Activation('softmax')) - -# Create Spark context -conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') -sc = SparkContext(conf=conf) - -# Build RDD from numpy features and labels -df = to_data_frame(sc, x_train, y_train, categorical=True) -test_df = to_data_frame(sc, x_test, y_test, categorical=True) - -sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) -sgd_conf = optimizers.serialize(sgd) - -# Initialize Spark ML Estimator -estimator = ElephasEstimator() -estimator.set_keras_model_config(model.to_yaml()) -estimator.set_optimizer_config(sgd_conf) -estimator.set_mode("synchronous") -estimator.set_loss("categorical_crossentropy") -estimator.set_metrics(['acc']) -estimator.set_epochs(epochs) -estimator.set_batch_size(batch_size) -estimator.set_validation_split(0.1) -estimator.set_categorical_labels(True) -estimator.set_nb_classes(nb_classes) - -# Fitting a model returns a Transformer -pipeline = Pipeline(stages=[estimator]) -fitted_pipeline = pipeline.fit(df) - -# Evaluate Spark model by evaluating the underlying model -prediction = fitted_pipeline.transform(test_df) -pnl = prediction.select("label", "prediction") -pnl.show(100) - -prediction_and_label = pnl.rdd.map(lambda row: (row.label, row.prediction)) -metrics = MulticlassMetrics(prediction_and_label) -print(metrics.precision()) -print(metrics.recall()) -from __future__ import print_function -from __future__ import absolute_import - -from pyspark.ml.linalg import Vectors -import numpy as np -import random - -from pyspark import SparkContext, SparkConf -from pyspark.sql import SQLContext -from pyspark.ml.feature import StringIndexer, StandardScaler -from pyspark.ml import Pipeline - -from keras import optimizers -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation - -from elephas.ml_model import ElephasEstimator - - -data_path = "../" - -# Spark contexts -conf = SparkConf().setAppName('Otto_Spark_ML_Pipeline').setMaster('local[8]') -sc = SparkContext(conf=conf) -sql_context = SQLContext(sc) - - -# Data loader -def shuffle_csv(csv_file): - lines = open(csv_file).readlines() - random.shuffle(lines) - open(csv_file, 'w').writelines(lines) - - -def load_data_rdd(csv_file, shuffle=True, train=True): - if shuffle: - shuffle_csv(data_path + csv_file) - data = sc.textFile(data_path + csv_file) - data = data.filter(lambda x: x.split(',')[0] != 'id').map( - lambda line: line.split(',')) - if train: - data = data.map( - lambda line: (Vectors.dense(np.asarray(line[1:-1]).astype(np.float32)), - str(line[-1]).replace('Class_', ''))) - else: - data = data.map(lambda line: (Vectors.dense( - np.asarray(line[1:]).astype(np.float32)), "1")) - return data - - -# Define Data frames -train_df = sql_context.createDataFrame( - load_data_rdd("train.csv"), ['features', 'category']) -test_df = sql_context.createDataFrame(load_data_rdd( - "test.csv", shuffle=False, train=False), ['features', 'category']) - -# Preprocessing steps -string_indexer = StringIndexer(inputCol="category", outputCol="index_category") -scaler = StandardScaler( - inputCol="features", outputCol="scaled_features", withStd=True, withMean=True) - -# Keras model -nb_classes = train_df.select("category").distinct().count() -input_dim = len(train_df.select("features").first()[0]) - -model = Sequential() -model.add(Dense(512, input_shape=(input_dim,))) -model.add(Activation('relu')) -model.add(Dropout(0.5)) -model.add(Dense(512)) -model.add(Activation('relu')) -model.add(Dropout(0.5)) -model.add(Dense(512)) -model.add(Activation('relu')) -model.add(Dropout(0.5)) -model.add(Dense(nb_classes)) -model.add(Activation('softmax')) - -model.compile(loss='categorical_crossentropy', optimizer='adam') - -sgd = optimizers.SGD(lr=0.01) -sgd_conf = optimizers.serialize(sgd) - -# Initialize Elephas Spark ML Estimator -estimator = ElephasEstimator() -estimator.set_keras_model_config(model.to_yaml()) -estimator.set_optimizer_config(sgd_conf) -estimator.set_mode("synchronous") -estimator.set_loss("categorical_crossentropy") -estimator.set_metrics(['acc']) -estimator.setFeaturesCol("scaled_features") -estimator.setLabelCol("index_category") -estimator.set_epochs(10) -estimator.set_batch_size(128) -estimator.set_num_workers(1) -estimator.set_verbosity(0) -estimator.set_validation_split(0.15) -estimator.set_categorical_labels(True) -estimator.set_nb_classes(nb_classes) - -# Fitting a model returns a Transformer -pipeline = Pipeline(stages=[string_indexer, scaler, estimator]) -fitted_pipeline = pipeline.fit(train_df) - -# Evaluate Spark model -prediction = fitted_pipeline.transform(train_df) -pnl = prediction.select("index_category", "prediction") -pnl.show(100) -from __future__ import absolute_import -from __future__ import print_function - -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import RMSprop -from keras.utils import np_utils - -from elephas.spark_model import SparkMLlibModel -from elephas.utils.rdd_utils import to_labeled_point - -from pyspark import SparkContext, SparkConf - -# Define basic parameters -batch_size = 64 -nb_classes = 10 -epochs = 3 - -# Load data -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -x_train = x_train.reshape(60000, 784) -x_test = x_test.reshape(10000, 784) -x_train = x_train.astype("float32") -x_test = x_test.astype("float32") -x_train /= 255 -x_test /= 255 -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices -y_train = np_utils.to_categorical(y_train, nb_classes) -y_test = np_utils.to_categorical(y_test, nb_classes) - -model = Sequential() -model.add(Dense(128, input_dim=784)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(128)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(10)) -model.add(Activation('softmax')) - -# Compile model -rms = RMSprop() -model.compile(rms, "categorical_crossentropy", ['acc']) - -# Create Spark context -conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') -sc = SparkContext(conf=conf) - -# Build RDD from numpy features and labels -lp_rdd = to_labeled_point(sc, x_train, y_train, categorical=True) - -# Initialize SparkModel from Keras model and Spark context -spark_model = SparkMLlibModel( - model=model, frequency='epoch', mode='synchronous') - -# Train Spark model -spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0, - validation_split=0.1, categorical=True, nb_classes=nb_classes) - -# Evaluate Spark model by evaluating the underlying model -score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) -print('Test accuracy:', score[1]) -from __future__ import absolute_import -from __future__ import print_function - -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import SGD -from keras.utils import np_utils - -from elephas.spark_model import SparkModel -from elephas.utils.rdd_utils import to_simple_rdd - -from pyspark import SparkContext, SparkConf - -# Define basic parameters -batch_size = 64 -nb_classes = 10 -epochs = 1 - -# Create Spark context -conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') -sc = SparkContext(conf=conf) - -# Load data -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -x_train = x_train.reshape(60000, 784) -x_test = x_test.reshape(10000, 784) -x_train = x_train.astype("float32") -x_test = x_test.astype("float32") -x_train /= 255 -x_test /= 255 -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices -y_train = np_utils.to_categorical(y_train, nb_classes) -y_test = np_utils.to_categorical(y_test, nb_classes) - -model = Sequential() -model.add(Dense(128, input_dim=784)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(128)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(10)) -model.add(Activation('softmax')) - -sgd = SGD(lr=0.1) -model.compile(sgd, 'categorical_crossentropy', ['acc']) - -# Build RDD from numpy features and labels -rdd = to_simple_rdd(sc, x_train, y_train) - -# Initialize SparkModel from Keras model and Spark context -spark_model = SparkModel(model, frequency='epoch', mode='asynchronous') - -# Train Spark model -spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, - verbose=0, validation_split=0.1) -# Evaluate Spark model by evaluating the underlying model -score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) -print('Test accuracy:', score[1]) -from __future__ import absolute_import -from __future__ import print_function - -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import SGD -from keras.utils import np_utils - -from elephas.spark_model import SparkModel -from elephas.utils.rdd_utils import to_simple_rdd - -from pyspark import SparkContext, SparkConf - -# Define basic parameters -batch_size = 64 -nb_classes = 10 -epochs = 1 - -# Create Spark context -conf = SparkConf().setAppName('Mnist_Spark_MLP').setMaster('local[8]') -sc = SparkContext(conf=conf) - -# Load data -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -x_train = x_train.reshape(60000, 784) -x_test = x_test.reshape(10000, 784) -x_train = x_train.astype("float32") -x_test = x_test.astype("float32") -x_train /= 255 -x_test /= 255 -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices -y_train = np_utils.to_categorical(y_train, nb_classes) -y_test = np_utils.to_categorical(y_test, nb_classes) - -model = Sequential() -model.add(Dense(128, input_dim=784)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(128)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(10)) -model.add(Activation('softmax')) - -sgd = SGD(lr=0.1) -model.compile(sgd, 'categorical_crossentropy', ['acc']) - -# Build RDD from numpy features and labels -rdd = to_simple_rdd(sc, x_train, y_train) - -# Initialize SparkModel from Keras model and Spark context -spark_model = SparkModel(model, mode='synchronous') - -# Train Spark model -spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, - verbose=2, validation_split=0.1) - -# Evaluate Spark model by evaluating the underlying model -score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) -print('Test accuracy:', score[1]) -from pyspark import SparkContext, SparkConf -from pyspark.sql import SQLContext -import pytest -import logging - - -def quiet_py4j(): - """ turn down spark logging for the test context """ - logger = logging.getLogger('py4j') - logger.setLevel(logging.WARN) - - -@pytest.fixture(scope="session") -def spark_context(request): - """ fixture for creating a SparkContext - Args: - request: pytest.FixtureRequest object - """ - conf = (SparkConf().setMaster("local[2]").setAppName( - "pytest-pyspark-local-testing")) - sc = SparkContext(conf=conf) - request.addfinalizer(lambda: sc.stop()) - - quiet_py4j() - return sc - - -@pytest.fixture(scope="session") -def sql_context(request): - """ fixture for creating a Spark SQLContext - Args: - request: pytest.FixtureRequest object - """ - conf = (SparkConf().setMaster("local[2]").setAppName( - "pytest-pyspark-local-testing")) - sc = SparkContext(conf=conf) - sql_context = SQLContext(sc) - request.addfinalizer(lambda: sc.stop()) - - quiet_py4j() - return sql_context -import pytest -from hyperopt import STATUS_OK -from hyperas.distributions import choice, uniform -import six.moves.cPickle as pickle - -from elephas.hyperparam import HyperParamModel - -pytest.mark.usefixtures("spark_context") - - -def data(): - from keras.datasets import mnist - from keras.utils import np_utils - (x_train, y_train), (x_test, y_test) = mnist.load_data() - x_train = x_train.reshape(60000, 784) - x_test = x_test.reshape(10000, 784) - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train /= 255 - x_test /= 255 - nb_classes = 10 - y_train = np_utils.to_categorical(y_train, nb_classes) - y_test = np_utils.to_categorical(y_test, nb_classes) - return x_train, y_train, x_test, y_test - - -def model(x_train, y_train, x_test, y_test): - from keras.models import Sequential - from keras.layers.core import Dense, Dropout, Activation - from keras.optimizers import RMSprop - - keras_model = Sequential() - keras_model.add(Dense(512, input_shape=(784,))) - keras_model.add(Activation('relu')) - keras_model.add(Dropout({{uniform(0, 1)}})) - keras_model.add(Dense({{choice([256, 512, 1024])}})) - keras_model.add(Activation('relu')) - keras_model.add(Dropout({{uniform(0, 1)}})) - keras_model.add(Dense(10)) - keras_model.add(Activation('softmax')) - - rms = RMSprop() - keras_model.compile(loss='categorical_crossentropy', - optimizer=rms, metrics=['acc']) - - keras_model.fit(x_train, y_train, - batch_size={{choice([64, 128])}}, - epochs=1, - verbose=2, - validation_data=(x_test, y_test)) - score, acc = keras_model.evaluate(x_test, y_test, verbose=0) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': keras_model.to_yaml(), - 'weights': pickle.dumps(keras_model.get_weights())} - - -def test_hyper_param_model(spark_context): - hyperparam_model = HyperParamModel(spark_context) - hyperparam_model.minimize(model=model, data=data, max_evals=1) -from __future__ import absolute_import -from __future__ import print_function - -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.utils import np_utils -from keras import optimizers - -from elephas.ml_model import ElephasEstimator, load_ml_estimator, ElephasTransformer, load_ml_transformer -from elephas.ml.adapter import to_data_frame - -from pyspark.mllib.evaluation import MulticlassMetrics -from pyspark.ml import Pipeline - -import pytest -pytest.mark.usefixtures("spark_context") - -# Define basic parameters -batch_size = 64 -nb_classes = 10 -epochs = 1 - -# Load data -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -x_train = x_train.reshape(60000, 784)[:1000] -x_test = x_test.reshape(10000, 784) -x_train = x_train.astype("float32") -x_test = x_test.astype("float32") -x_train /= 255 -x_test /= 255 -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices -y_train = np_utils.to_categorical(y_train, nb_classes) -y_test = np_utils.to_categorical(y_test, nb_classes) - -model = Sequential() -model.add(Dense(128, input_dim=784)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(128)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(10)) -model.add(Activation('softmax')) - - -def test_serialization_transformer(): - transformer = ElephasTransformer() - transformer.set_keras_model_config(model.to_yaml()) - transformer.save("test.h5") - load_ml_transformer("test.h5") - - -def test_serialization_estimator(): - estimator = ElephasEstimator() - estimator.set_keras_model_config(model.to_yaml()) - estimator.set_loss("categorical_crossentropy") - - estimator.save("test.h5") - load_ml_estimator("test.h5") - - -def test_spark_ml_model(spark_context): - - df = to_data_frame(spark_context, x_train, y_train, categorical=True) - test_df = to_data_frame(spark_context, x_test, y_test, categorical=True) - - sgd = optimizers.SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) - sgd_conf = optimizers.serialize(sgd) - - # Initialize Spark ML Estimator - estimator = ElephasEstimator() - estimator.set_keras_model_config(model.to_yaml()) - estimator.set_optimizer_config(sgd_conf) - estimator.set_mode("synchronous") - estimator.set_loss("categorical_crossentropy") - estimator.set_metrics(['acc']) - estimator.set_epochs(epochs) - estimator.set_batch_size(batch_size) - estimator.set_validation_split(0.1) - estimator.set_categorical_labels(True) - estimator.set_nb_classes(nb_classes) - - # Fitting a model returns a Transformer - pipeline = Pipeline(stages=[estimator]) - fitted_pipeline = pipeline.fit(df) - - # Evaluate Spark model by evaluating the underlying model - prediction = fitted_pipeline.transform(test_df) - pnl = prediction.select("label", "prediction") - pnl.show(100) - - prediction_and_label = pnl.rdd.map(lambda row: (row.label, row.prediction)) - metrics = MulticlassMetrics(prediction_and_label) - print(metrics.precision()) - print(metrics.recall()) -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import RMSprop -from keras.utils import np_utils - -from elephas.spark_model import SparkMLlibModel, load_spark_model -from elephas.utils.rdd_utils import to_labeled_point - -import pytest -pytest.mark.usefixtures("spark_context") - -# Define basic parameters -batch_size = 64 -nb_classes = 10 -epochs = 3 - -# Load data -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -x_train = x_train.reshape(60000, 784)[:1000] -x_test = x_test.reshape(10000, 784) -x_train = x_train.astype("float32") -x_test = x_test.astype("float32") -x_train /= 255 -x_test /= 255 -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices -y_train = np_utils.to_categorical(y_train, nb_classes) -y_test = np_utils.to_categorical(y_test, nb_classes) - -model = Sequential() -model.add(Dense(128, input_dim=784)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(128)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(10)) -model.add(Activation('softmax')) - -# Compile model -rms = RMSprop() -model.compile(rms, 'categorical_crossentropy', ['acc']) - - -def test_serialization(): - spark_model = SparkMLlibModel( - model, frequency='epoch', mode='synchronous', num_workers=2) - spark_model.save("test.h5") - load_spark_model("test.h5") - - -def test_mllib_model(spark_context): - # Build RDD from numpy features and labels - lp_rdd = to_labeled_point(spark_context, x_train, - y_train, categorical=True) - - # Initialize SparkModel from Keras model and Spark context - spark_model = SparkMLlibModel( - model=model, frequency='epoch', mode='synchronous') - - # Train Spark model - spark_model.fit(lp_rdd, epochs=5, batch_size=32, verbose=0, - validation_split=0.1, categorical=True, nb_classes=nb_classes) - - # Evaluate Spark model by evaluating the underlying model - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) - print('Test accuracy:', score[1]) -from __future__ import absolute_import -from __future__ import print_function -import pytest - -from keras.models import Sequential, Model -from keras.layers import Dense, Dropout, Activation, Input - -from elephas.spark_model import SparkModel - - -def test_sequential_serialization(): - # Create Spark context - pytest.mark.usefixtures("spark_context") - - seq_model = Sequential() - seq_model.add(Dense(128, input_dim=784)) - seq_model.add(Activation('relu')) - seq_model.add(Dropout(0.2)) - seq_model.add(Dense(128)) - seq_model.add(Activation('relu')) - seq_model.add(Dropout(0.2)) - seq_model.add(Dense(10)) - seq_model.add(Activation('softmax')) - - seq_model.compile( - optimizer="sgd", loss="categorical_crossentropy", metrics=["acc"]) - spark_model = SparkModel(seq_model, frequency='epoch', mode='synchronous') - spark_model.save("elephas_sequential.h5") - - -def test_model_serialization(): - # This returns a tensor - inputs = Input(shape=(784,)) - - # a layer instance is callable on a tensor, and returns a tensor - x = Dense(64, activation='relu')(inputs) - x = Dense(64, activation='relu')(x) - predictions = Dense(10, activation='softmax')(x) - - # This creates a model that includes - # the Input layer and three Dense layers - model = Model(inputs=inputs, outputs=predictions) - model.compile(optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) - - spark_model = SparkModel(model, frequency='epoch', - mode='synchronous', foo="bar") - spark_model.save("elephas_model.h5") - - -@pytest.mark.skip(reason="not feasible on travis right now") -def test_java_avg_serde(): - from elephas.dl4j import ParameterAveragingModel, ParameterSharingModel - - inputs = Input(shape=(784,)) - x = Dense(64, activation='relu')(inputs) - x = Dense(64, activation='relu')(x) - predictions = Dense(10, activation='softmax')(x) - - # This creates a model that includes - # the Input layer and three Dense layers - model = Model(inputs=inputs, outputs=predictions) - model.compile(optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) - - spark_model = ParameterAveragingModel(java_spark_context=None, model=model, num_workers=4, batch_size=32, - averaging_frequency=5, num_batches_prefetch=0, collect_stats=False, - save_file='temp.h5') - spark_model.save("java_param_averaging_model.h5") - - -@pytest.mark.skip(reason="not feasible on travis right now") -def test_java_sharing_serde(): - from elephas.dl4j import ParameterAveragingModel, ParameterSharingModel - - inputs = Input(shape=(784,)) - x = Dense(64, activation='relu')(inputs) - x = Dense(64, activation='relu')(x) - predictions = Dense(10, activation='softmax')(x) - - model = Model(inputs=inputs, outputs=predictions) - model.compile(optimizer='rmsprop', - loss='categorical_crossentropy', - metrics=['accuracy']) - - spark_model = ParameterSharingModel(java_spark_context=None, model=model, num_workers=4, batch_size=32, - shake_frequency=0, min_threshold=1e-5, update_threshold=1e-3, - workers_per_node=-1, num_batches_prefetch=0, step_delay=50, step_trigger=0.05, - threshold_step=1e-5, collect_stats=False, save_file='temp.h5') - spark_model.save("java_param_sharing_model.h5") - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import absolute_import -from __future__ import print_function -import pytest - -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.utils import np_utils - -from elephas.spark_model import SparkModel -from elephas.utils.rdd_utils import to_simple_rdd - - -# Define basic parameters -batch_size = 64 -nb_classes = 10 -epochs = 1 - -# Create Spark context -pytest.mark.usefixtures("spark_context") - - -# Load data -(x_train, y_train), (x_test, y_test) = mnist.load_data() - -x_train = x_train.reshape(60000, 784) -x_test = x_test.reshape(10000, 784) -x_train = x_train.astype("float32") -x_test = x_test.astype("float32") -x_train /= 255 -x_test /= 255 -print(x_train.shape[0], 'train samples') -print(x_test.shape[0], 'test samples') - -# Convert class vectors to binary class matrices -y_train = np_utils.to_categorical(y_train, nb_classes) -y_test = np_utils.to_categorical(y_test, nb_classes) - -model = Sequential() -model.add(Dense(128, input_dim=784)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(128)) -model.add(Activation('relu')) -model.add(Dropout(0.2)) -model.add(Dense(10)) -model.add(Activation('softmax')) - -model.compile(optimizer="sgd", - loss="categorical_crossentropy", metrics=["acc"]) - - -def test_spark_model_end_to_end(spark_context): - rdd = to_simple_rdd(spark_context, x_train, y_train) - - # sync epoch - spark_model = SparkModel(model, frequency='epoch', - mode='synchronous', num_workers=2) - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, - verbose=2, validation_split=0.1) - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) - print('Test accuracy:', score[1]) - - # sync batch - spark_model = SparkModel(model, frequency='batch', - mode='synchronous', num_workers=2) - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, - verbose=2, validation_split=0.1) - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) - print('Test accuracy:', score[1]) - - # async epoch - spark_model = SparkModel(model, frequency='epoch', mode='asynchronous') - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, - verbose=2, validation_split=0.1) - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) - print('Test accuracy:', score[1]) - - # hog wild epoch - spark_model = SparkModel(model, frequency='epoch', mode='hogwild') - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, - verbose=2, validation_split=0.1) - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) - print('Test accuracy:', score[1]) -import ctypes -import numpy as np -try: - from elephas.java import java_classes -except: - pass - - -def get_context_dtype(): - """Returns the nd4j dtype - """ - dtype = java_classes.DataTypeUtil.getDtypeFromContext() - return java_classes.DataTypeUtil.getDTypeForName(dtype) - - -def to_numpy(nd4j_array): - """ Convert an ND4J array to a numpy array - :param nd4j_array: - :return: - """ - buff = nd4j_array.data() - address = buff.pointer().address() - type_name = java_classes.DataTypeUtil.getDtypeFromContext() - data_type = java_classes.DataTypeUtil.getDTypeForName(type_name) - mapping = { - 'double': ctypes.c_double, - 'float': ctypes.c_float - } - Pointer = ctypes.POINTER(mapping[data_type]) - pointer = ctypes.cast(address, Pointer) - np_array = np.ctypeslib.as_array(pointer, tuple(nd4j_array.shape())) - return np_array - - -def retrieve_keras_weights(java_model): - """For a previously imported Keras model, after training it with DL4J Spark, - we want to set the resulting weights back to the original Keras model. - - :param java_model: DL4J model (MultiLayerNetwork or ComputationGraph - :return: list of numpy arrays in correct order for model.set_weights(...) of a corresponding Keras model - """ - weights = [] - layers = java_model.getLayers() - for layer in layers: - params = layer.paramTable() - keys = params.keySet() - key_list = java_classes.ArrayList(keys) - for key in key_list: - weight = params.get(key) - np_weight = np.squeeze(to_numpy(weight)) - weights.append(np_weight) - return weights -from jnius import autoclass -import pydl4j -import os - -pydl4j.validate_jars() -pydl4j.add_classpath(os.getcwd()) - -# -------------JVM starts here------------- - - -# Java -File = autoclass('java.io.File') -ClassLoader = autoclass('java.lang.ClassLoader') -ArrayList = autoclass('java.util.ArrayList') -Arrays = autoclass('java.util.Arrays') -String = autoclass('java.lang.String') - -System = autoclass('java.lang.System') -Integer = autoclass('java.lang.Integer') -Float = autoclass('java.lang.Float') -Double = autoclass('java.lang.Double') - -# JavaCPP -DoublePointer = autoclass('org.bytedeco.javacpp.DoublePointer') -FloatPointer = autoclass('org.bytedeco.javacpp.FloatPointer') -IntPointer = autoclass('org.bytedeco.javacpp.IntPointer') - -# Spark -SparkContext = autoclass('org.apache.spark.SparkContext') -JavaSparkContext = autoclass('org.apache.spark.api.java.JavaSparkContext') -SparkConf = autoclass('org.apache.spark.SparkConf') - -# ND4J -Nd4j = autoclass('org.nd4j.linalg.factory.Nd4j') -INDArray = autoclass('org.nd4j.linalg.api.ndarray.INDArray') -Transforms = autoclass('org.nd4j.linalg.ops.transforms.Transforms') -NDArrayIndex = autoclass('org.nd4j.linalg.indexing.NDArrayIndex') -DataBuffer = autoclass('org.nd4j.linalg.api.buffer.DataBuffer') -Shape = autoclass('org.nd4j.linalg.api.shape.Shape') -BinarySerde = autoclass('org.nd4j.serde.binary.BinarySerde') -DataTypeUtil = autoclass('org.nd4j.linalg.api.buffer.util.DataTypeUtil') -NativeOpsHolder = autoclass('org.nd4j.nativeblas.NativeOpsHolder') -DataSet = autoclass('org.nd4j.linalg.dataset.DataSet') - - -# Import -KerasModelImport = autoclass( - 'org.deeplearning4j.nn.modelimport.keras.KerasModelImport') -ElephasModelImport = autoclass( - 'org.deeplearning4j.spark.parameterserver.modelimport.elephas.ElephasModelImport') -from .java_classes import * -import numpy as np -import ctypes - - -# Java instance initializations -native_ops = NativeOpsHolder.getInstance().getDeviceNativeOps() - - -# DATA TYPE MANAGEMENT - -def set_context_dtype(dtype): - """ - Sets the dtype for nd4j - # Arguments - dtype: 'float' or 'double' - """ - dtype = DataTypeUtil.getDtypeFromContext(dtype) - DataTypeUtil.setDTypeForContext(dtype) - - -def get_context_dtype(): - """ - Returns the nd4j dtype - """ - dtype = DataTypeUtil.getDtypeFromContext() - return DataTypeUtil.getDTypeForName(dtype) - - -def get_nd4j_dtype(np_dtype): - """ - Gets the equivalent nd4j data type - for a given numpy data type. - # Arguments - np_dtype: Numpy data type. One of - ['float64', 'float32', 'float16'] - """ - if type(np_dtype) == type: - np_dtype = np_dtype.__name__ - elif type(np_dtype) == np.dtype: - np_dtype = np_dtype.name - mapping = { - 'float64': 'double', - 'float32': 'float', - 'float16': 'half' - } - nd4j_dtype = mapping.get(np_dtype) - if not nd4j_dtype: - raise Exception('Invalid numpy data type : ' + np_dtype) - return nd4j_dtype - - -def get_np_dtype(nd4j_dtype): - """ - Gets the equivalent numpy data type - for a given nd4j data type. - # Arguments: - nd4j_dtype : Nd4j data type. One of - ['double', 'float', 'half'] - """ - mapping = { - 'double': np.float64, - 'float': np.float32, - 'half': np.float16 - } - np_dtype = mapping.get(nd4j_dtype) - if not np_dtype: - raise Exception('Invalid nd4j data type : ' + nd4j_dtype) - return np_dtype - - -set_context_dtype('double') - - -_refs = [] - - -def _from_numpy(np_array): - """ - Convert numpy array to nd4j array - """ - - # Convert the numpy array to nd4j context dtype - required_dtype = get_np_dtype(get_context_dtype()) - if np_array.dtype != required_dtype: - raise Exception("{} is required, got {}".format( - repr(required_dtype), repr(np_array.dtype))) - - # Nd4j does not have 1-d vectors. - # So we add a dummy dimension. - if np_array.ndim == 1: - np_array = np.expand_dims(np_array, 0) - - # We have to maintain references to all incoming - # numpy arrays. Else they will get GCed - - # creates a Nd4j array from a numpy array - # To create an Nd4j array, we need 3 things: - # buffer, strides, and shape - - # Get the buffer - # A buffer is basically an array. To get the buffer object - # we need a pointer to the first element and the size. - pointer_address, _ = np_array.__array_interface__['data'] - _refs.append(np_array) - pointer = native_ops.pointerForAddress(pointer_address) - size = np_array.size - mapping = { - np.float64: DoublePointer, - np.float32: FloatPointer, - } - pointer = mapping[required_dtype](pointer) - buff = Nd4j.createBuffer(pointer, size) - assert buff.address() == pointer_address - _refs.append(buff) - # Get the strides - # strides = tuple of bytes to step in each - # dimension when traversing an array. - elem_size = buff.getElementSize() - # Make sure word size is same in both python - # and java worlds - assert elem_size == np_array.dtype.itemsize - strides = np_array.strides - # numpy uses byte wise strides. We have to - # convert it to word wise strides. - strides = [dim / elem_size for dim in strides] - - # Finally, shape: - shape = np_array.shape - - nd4j_array = Nd4j.create(buff, shape, strides, 0) - assert buff.address() == nd4j_array.data().address() - return nd4j_array - - -def _to_numpy(nd4j_array): - """ - Convert nd4j array to numpy array - """ - buff = nd4j_array.data() - address = buff.pointer().address() - dtype = get_context_dtype() - mapping = { - 'double': ctypes.c_double, - 'float': ctypes.c_float - } - Pointer = ctypes.POINTER(mapping[dtype]) - pointer = ctypes.cast(address, Pointer) - np_array = np.ctypeslib.as_array(pointer, tuple(nd4j_array.shape())) - return np_array - - -def _indarray(x): - if type(x) is INDArray: - return x - elif type(x) is ndarray: - return x.array - elif 'numpy' in str(type(x)): - return _from_numpy(x) - elif type(x) in (list, tuple): - return _from_numpy(np.array(x)) - elif type(x) in (int, float): - return Nd4j.scalar(x) - else: - raise Exception('Data type not understood :' + str(type(x))) - - -def broadcast_like(y, x): - xs = x.shape() - ys = y.shape() - if xs == ys: - return y - _xs = tuple(xs) - _ys = tuple(ys) - nx = len(xs) - ny = len(ys) - if nx > ny: - diff = nx - ny - ys += [1] * diff - y = y.reshape(ys) - ny = nx - elif ny > nx: - raise Exception('Unable to broadcast shapes ' + str(_xs) + '' - ' and ' + str(_ys)) - yt = [] - rep_y = False - for xd, yd in zip(xs, ys): - if xd == yd: - yt.append(1) - elif xd == 1: - raise Exception('Unable to broadcast shapes ' + str(_xs) + '' - ' and ' + str(_ys)) - elif yd == 1: - yt.append(xd) - rep_y = True - else: - raise Exception('Unable to broadcast shapes ' + str(_xs) + '' - ' and ' + str(_ys)) - if rep_y: - y = y.repmat(*yt) - return y - - -def broadcast(x, y): - xs = x.shape() - ys = y.shape() - if xs == ys: - return x, y - _xs = tuple(xs) - _ys = tuple(ys) - nx = len(xs) - ny = len(ys) - if nx > ny: - diff = nx - ny - ys += [1] * diff - y = y.reshape(*ys) - ny = nx - elif ny > nx: - diff = ny - nx - xs += [1] * diff - x = x.reshape(*xs) - nx = ny - xt = [] - yt = [] - rep_x = False - rep_y = False - for xd, yd in zip(xs, ys): - if xd == yd: - xt.append(1) - yt.append(1) - elif xd == 1: - xt.append(yd) - yt.append(1) - rep_x = True - elif yd == 1: - xt.append(1) - yt.append(xd) - rep_y = True - else: - raise Exception('Unable to broadcast shapes ' + str(_xs) + '' - ' and ' + str(_ys)) - if rep_x: - x = Nd4j.tile(x, *xt) - if rep_y: - y = Nd4j.tile(y, *yt) - return x, y - - -class ndarray(object): - - def __init__(self, data, dtype=None): - # we ignore dtype for now - typ = type(data) - if typ is INDArray: - # Note that we don't make a copy here - self.array = data - elif typ is ndarray: - self.array = data.array.dup() - else: - if typ is not np.ndarray: - data = np.array(data) - self.array = _from_numpy(data) - - def numpy(self): - # TODO: Too expensive. Make it cheaper. - np_array = _to_numpy(self.array) - return np_array - - @property - def size(self): - return self.array.length() - - @property - def shape(self): - return tuple(self.array.shape()) - - @shape.setter - def shape(self, value): - arr = self.reshape(value) - self.array = arr.array - - @property - def ndim(self): - return len(self.array.shape()) - - @property - def ndim(self): - return len(self.array.shape()) - - def __getitem__(self, key): - if type(key) is int: - return ndarray(self.array.get(NDArrayIndex.point(key))) - if type(key) is slice: - start = key.start - stop = key.stop - step = key.step - if start is None: - start = 0 - if stop is None: - shape = self.array.shape() - if shape[0] == 1: - stop = shape[1] - else: - stop = shape[0] - if stop - start <= 0: - return None - if step is None or step == 1: - return ndarray(self.array.get(NDArrayIndex.interval(start, stop))) - else: - return ndarray(self.array.get(NDArrayIndex.interval(start, step, stop))) - if type(key) is list: - raise NotImplemented( - 'Sorry, this type of indexing is not supported yet.') - if type(key) is tuple: - key = list(key) - shape = self.array.shape() - ndim = len(shape) - nk = len(key) - key += [slice(None)] * (ndim - nk) - args = [] - for i, dim in enumerate(key): - if type(dim) is int: - args.append(NDArrayIndex.point(dim)) - elif type(dim) is slice: - if dim == slice(None): - args.append(NDArrayIndex.all()) - else: - start = dim.start - stop = dim.stop - step = dim.step - if start is None: - start = 0 - if stop is None: - stop = shape[i] - if stop - start <= 0: - return None - if step is None or step == 1: - args.append(NDArrayIndex.interval(start, stop)) - else: - args.append(NDArrayIndex.interval( - start, step, stop)) - elif type(dim) in (list, tuple): - raise NotImplemented( - 'Sorry, this type of indexing is not supported yet.') - return ndarray(self.array.get(*args)) - - def __setitem__(self, key, other): - other = _indarray(other) - view = self[key] - if view is None: - return - view = view.array - other = broadcast_like(other, view) - view.assign(other) - - def __add__(self, other): - other = _indarray(other) - x, y = broadcast(self.array, other) - return ndarray(x.add(y)) - - def __sub__(self, other): - other = _indarray(other) - x, y = broadcast(self.array, other) - return ndarray(x.sub(y)) - - def __mul__(self, other): - other = _indarray(other) - x, y = broadcast(self.array, other) - return ndarray(x.mul(y)) - - def __div__(self, other): - other = _indarray(other) - x, y = broadcast(self.array, other) - return ndarray(x.div(y)) - - def __iadd__(self, other): - other = _indarray(other) - if self.array.shape() == other.shape(): - self.array = self.array.addi(other) - else: - x, y = broadcast(self.array, other) - self.array = x.add(y) - return self - - def __isub__(self, other): - other = _indarray(other) - if self.array.shape() == other.shape(): - self.array = self.array.subi(other) - else: - x, y = broadcast(self.array, other) - self.array = x.sub(y) - return self - - def __imul__(self, other): - other = _indarray(other) - if self.array.shape() == other.shape(): - self.array = self.array.muli(other) - else: - x, y = broadcast(self.array, other) - self.array = x.mul(y) - return self - - def __idiv__(self, other): - other = _indarray(other) - if self.array.shape() == other.shape(): - self.array = self.array.divi(other) - else: - x, y = broadcast(self.array, other) - self.array = x.div(y) - return self - - # def __getattr__(self, attr): - # import ops - # f = getattr(ops, attr) - # setattr(ndarray, attr, f) - # return getattr(self, attr) - - def __int__(self): - if self.array.length() == 1: - return self.array.getInt(0) - raise Exception('Applicable only for scalars') - - def __float__(self): - if self.array.length() == 1: - return self.array.getDouble(0) - raise Exception('Applicable only for scalars') - - @property - def T(self): - return self.transpose() - - -def array(*args, **kwargs): - return ndarray(*args, **kwargs) -from .adapter import * -from .params import * -from __future__ import absolute_import - -from pyspark.sql import SQLContext -from pyspark.mllib.regression import LabeledPoint -from ..utils.rdd_utils import from_labeled_point, to_labeled_point, lp_to_simple_rdd -from pyspark.mllib.linalg import Vector as MLLibVector, Vectors as MLLibVectors - - -def to_data_frame(sc, features, labels, categorical=False): - """Convert numpy arrays of features and labels into Spark DataFrame - """ - lp_rdd = to_labeled_point(sc, features, labels, categorical) - sql_context = SQLContext(sc) - df = sql_context.createDataFrame(lp_rdd) - return df - - -def from_data_frame(df, categorical=False, nb_classes=None): - """Convert DataFrame back to pair of numpy arrays - """ - lp_rdd = df.rdd.map(lambda row: LabeledPoint(row.label, row.features)) - features, labels = from_labeled_point(lp_rdd, categorical, nb_classes) - return features, labels - - -def df_to_simple_rdd(df, categorical=False, nb_classes=None, features_col='features', label_col='label'): - """Convert DataFrame into RDD of pairs - """ - sql_context = df.sql_ctx - sql_context.registerDataFrameAsTable(df, "temp_table") - selected_df = sql_context.sql( - "SELECT {0} AS features, {1} as label from temp_table".format(features_col, label_col)) - if isinstance(selected_df.first().features, MLLibVector): - lp_rdd = selected_df.rdd.map( - lambda row: LabeledPoint(row.label, row.features)) - else: - lp_rdd = selected_df.rdd.map(lambda row: LabeledPoint( - row.label, MLLibVectors.fromML(row.features))) - rdd = lp_to_simple_rdd(lp_rdd, categorical, nb_classes) - return rdd -from __future__ import absolute_import -from pyspark.ml.param.shared import Param, Params - - -class HasKerasModelConfig(Params): - """Mandatory field: - - Parameter mixin for Keras model yaml - """ - - def __init__(self): - super(HasKerasModelConfig, self).__init__() - self.keras_model_config = Param( - self, "keras_model_config", "Serialized Keras model as yaml string") - - def set_keras_model_config(self, keras_model_config): - self._paramMap[self.keras_model_config] = keras_model_config - return self - - def get_keras_model_config(self): - return self.getOrDefault(self.keras_model_config) - - -class HasMode(Params): - """Parameter mixin for Elephas mode - """ - - def __init__(self): - super(HasMode, self).__init__() - self.mode = Param(self, "mode", "Elephas mode") - self._setDefault(mode='asynchronous') - - def set_mode(self, mode): - self._paramMap[self.mode] = mode - return self - - def get_mode(self): - return self.getOrDefault(self.mode) - - -class HasFrequency(Params): - """Parameter mixin for Elephas frequency - """ - - def __init__(self): - super(HasFrequency, self).__init__() - self.frequency = Param(self, "frequency", "Elephas frequency") - self._setDefault(frequency='epoch') - - def set_frequency(self, frequency): - self._paramMap[self.frequency] = frequency - return self - - def get_frequency(self): - return self.getOrDefault(self.frequency) - - -class HasNumberOfClasses(Params): - """Mandatory: - - Parameter mixin for number of classes - """ - - def __init__(self): - super(HasNumberOfClasses, self).__init__() - self.nb_classes = Param(self, "nb_classes", "number of classes") - self._setDefault(nb_classes=10) - - def set_nb_classes(self, nb_classes): - self._paramMap[self.nb_classes] = nb_classes - return self - - def get_nb_classes(self): - return self.getOrDefault(self.nb_classes) - - -class HasCategoricalLabels(Params): - """Mandatory: - - Parameter mixin for setting categorical features - """ - - def __init__(self): - super(HasCategoricalLabels, self).__init__() - self.categorical = Param( - self, "categorical", "Boolean to indicate if labels are categorical") - self._setDefault(categorical=True) - - def set_categorical_labels(self, categorical): - self._paramMap[self.categorical] = categorical - return self - - def get_categorical_labels(self): - return self.getOrDefault(self.categorical) - - -class HasEpochs(Params): - """Parameter mixin for number of epochs - """ - - def __init__(self): - super(HasEpochs, self).__init__() - self.epochs = Param(self, "epochs", "Number of epochs to train") - self._setDefault(epochs=10) - - def set_epochs(self, epochs): - self._paramMap[self.epochs] = epochs - return self - - def get_epochs(self): - return self.getOrDefault(self.epochs) - - -class HasBatchSize(Params): - """Parameter mixin for batch size - """ - - def __init__(self): - super(HasBatchSize, self).__init__() - self.batch_size = Param(self, "batch_size", "Batch size") - self._setDefault(batch_size=32) - - def set_batch_size(self, batch_size): - self._paramMap[self.batch_size] = batch_size - return self - - def get_batch_size(self): - return self.getOrDefault(self.batch_size) - - -class HasVerbosity(Params): - """Parameter mixin for output verbosity - """ - - def __init__(self): - super(HasVerbosity, self).__init__() - self.verbose = Param(self, "verbose", "Stdout verbosity") - self._setDefault(verbose=0) - - def set_verbosity(self, verbose): - self._paramMap[self.verbose] = verbose - return self - - def get_verbosity(self): - return self.getOrDefault(self.verbose) - - -class HasValidationSplit(Params): - """Parameter mixin for validation split percentage - """ - - def __init__(self): - super(HasValidationSplit, self).__init__() - self.validation_split = Param( - self, "validation_split", "validation split percentage") - self._setDefault(validation_split=0.1) - - def set_validation_split(self, validation_split): - self._paramMap[self.validation_split] = validation_split - return self - - def get_validation_split(self): - return self.getOrDefault(self.validation_split) - - -class HasNumberOfWorkers(Params): - """Parameter mixin for number of workers - """ - - def __init__(self): - super(HasNumberOfWorkers, self).__init__() - self.num_workers = Param(self, "num_workers", "number of workers") - self._setDefault(num_workers=8) - - def set_num_workers(self, num_workers): - self._paramMap[self.num_workers] = num_workers - return self - - def get_num_workers(self): - return self.getOrDefault(self.num_workers) - - -class HasKerasOptimizerConfig(Params): - """Parameter mixin for Keras optimizer config - """ - - def __init__(self): - super(HasKerasOptimizerConfig, self).__init__() - self.optimizer_config = Param( - self, "optimizer_config", "Serialized Keras optimizer properties") - self._setDefault(optimizer_config=None) - - def set_optimizer_config(self, optimizer_config): - self._paramMap[self.optimizer_config] = optimizer_config - return self - - def get_optimizer_config(self): - return self.getOrDefault(self.optimizer_config) - - -class HasMetrics(Params): - """Parameter mixin for Keras metrics - """ - - def __init__(self): - super(HasMetrics, self).__init__() - self.metrics = Param(self, "metrics", "Keras metrics") - self._setDefault(metrics=['acc']) - - def set_metrics(self, metrics): - self._paramMap[self.metrics] = metrics - return self - - def get_metrics(self): - return self.getOrDefault(self.metrics) - - -class HasLoss(Params): - """Parameter mixin for Keras metrics - """ - - def __init__(self): - super(HasLoss, self).__init__() - self.loss = Param(self, "loss", "Keras loss") - - def set_loss(self, loss): - self._paramMap[self.loss] = loss - return self - - def get_loss(self): - return self.getOrDefault(self.loss) -from .adapter import * -from __future__ import absolute_import - -from pyspark.mllib.linalg import Matrices, Vectors - - -def from_matrix(matrix): - """Convert MLlib Matrix to numpy array """ - return matrix.toArray() - - -def to_matrix(np_array): - """Convert numpy array to MLlib Matrix - """ - if len(np_array.shape) == 2: - return Matrices.dense(np_array.shape[0], - np_array.shape[1], - np_array.ravel()) - else: - raise Exception("An MLLib Matrix can only be created from a two-dimensional " + - "numpy array, got {}".format(len(np_array.shape))) - - -def from_vector(vector): - """Convert MLlib Vector to numpy array - """ - return vector.toArray() - - -def to_vector(np_array): - """Convert numpy array to MLlib Vector - """ - if len(np_array.shape) == 1: - return Vectors.dense(np_array) - else: - raise Exception("An MLLib Vector can only be created from a one-dimensional " + - "numpy array, got {}".format(len(np_array.shape))) -from .server import * -from .client import * -from __future__ import absolute_import -from __future__ import print_function - -import abc -import numpy as np -import socket -import six.moves.cPickle as pickle -try: - import urllib.request as urllib2 -except ImportError: - import urllib2 - -from ..utils.sockets import determine_master, send, receive - - -class BaseParameterClient(object): - """BaseParameterClient - - Parameter-server clients can do two things: retrieve the current parameters - from the corresponding server, and send updates (`delta`) to the server. - """ - __metaclass__ = abc.ABCMeta - - def __init__(self): - raise NotImplementedError - - @abc.abstractmethod - def update_parameters(self, delta): - """Update master parameters with deltas from training process - """ - raise NotImplementedError - - @abc.abstractmethod - def get_parameters(self): - """Retrieve master weights from parameter server - """ - raise NotImplementedError - - -class HttpClient(BaseParameterClient): - """HttpClient - - Uses HTTP protocol for communication with its corresponding parameter server, - namely HttpServer. The HTTP server provides two endpoints, `/parameters` to - get parameters and `/update` to update the server's parameters. - """ - - def __init__(self, port=4000): - - self.master_url = determine_master(port=port) - self.headers = {'Content-Type': 'application/elephas'} - - def get_parameters(self): - request = urllib2.Request('http://{}/parameters'.format(self.master_url), - headers=self.headers) - pickled_weights = urllib2.urlopen(request).read() - return pickle.loads(pickled_weights) - - def update_parameters(self, delta): - request = urllib2.Request('http://{}/update'.format(self.master_url), - pickle.dumps(delta, -1), headers=self.headers) - return urllib2.urlopen(request).read() - - -class SocketClient(BaseParameterClient): - """SocketClient - - Uses a socket connection to communicate with an instance of `SocketServer`. - The socket server listens to two types of events. Those with a `g` prefix - indicate a get-request, those with a `u` indicate a parameter update. - """ - - def __init__(self, port=4000): - - host = self.master_url.split(':')[0] - self.socket = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - self.socket.connect((host, port)) - - def get_parameters(self): - self.socket.sendall(b'g') - return np.asarray(receive(self.socket)) - - def update_parameters(self, delta): - data = {'delta': delta} - self.socket.sendall(b'u') - send(self.socket, data) -import abc -import socket -from threading import Thread -import six.moves.cPickle as pickle -from flask import Flask, request -from multiprocessing import Process - -from elephas.utils.sockets import determine_master -from elephas.utils.sockets import receive, send -from elephas.utils.serialization import dict_to_model -from elephas.utils.rwlock import RWLock as Lock -from elephas.utils.notebook_utils import is_running_in_notebook -from elephas.utils import subtract_params - - -class BaseParameterServer(object): - """BaseParameterServer - - Parameter servers can be started and stopped. Server implementations have - to cater to the needs of their respective BaseParameterClient instances. - """ - __metaclass__ = abc.ABCMeta - - def __init__(self): - raise NotImplementedError - - @abc.abstractmethod - def start(self): - """Start the parameter server instance. - """ - raise NotImplementedError - - @abc.abstractmethod - def stop(self): - """Terminate the parameter server instance. - """ - raise NotImplementedError - - -class HttpServer(BaseParameterServer): - """HttpServer - - Flask HTTP server. Defines two routes, `/parameters` to GET current - parameters held by this server, and `/update` which can be used to - POST updates. - """ - - def __init__(self, model, mode, port=4000, debug=True, - threaded=True, use_reloader=False): - """Initializes and HTTP server from a serialized Keras model - a parallelisation mode and a port to run the Flask application on. In - hogwild mode no read- or write-locks will be acquired, in asynchronous - mode this is the case. - - :param model: Serialized Keras model - :param mode: parallelization mode, either `asynchronous` or `hogwild` - :param port: int, port to run the application on - :param debug: boolean, Flask debug mode - :param threaded: boolean, Flask threaded application mode - :param use_reloader: boolean, Flask `use_reloader` argument - """ - - self.master_network = dict_to_model(model) - self.mode = mode - self.master_url = None - - self.port = port - - if is_running_in_notebook(): - self.threaded = False - self.use_reloader = False - self.debug = False - else: - self.debug = debug - self.threaded = threaded - self.use_reloader = use_reloader - - self.lock = Lock() - self.pickled_weights = None - self.weights = self.master_network.get_weights() - - self.server = Process(target=self.start_flask_service) - - def start(self): - self.server.start() - self.master_url = determine_master(self.port) - - def stop(self): - self.server.terminate() - self.server.join() - - def start_flask_service(self): - """Define Flask parameter server service. - - This HTTP server can do two things: get the current model - parameters and update model parameters. After registering - the `parameters` and `update` routes, the service will - get started. - - """ - app = Flask(__name__) - self.app = app - - @app.route('/') - def home(): - return 'Elephas' - - @app.route('/parameters', methods=['GET']) - def handle_get_parameters(): - if self.mode == 'asynchronous': - self.lock.acquire_read() - self.pickled_weights = pickle.dumps(self.weights, -1) - pickled_weights = self.pickled_weights - if self.mode == 'asynchronous': - self.lock.release() - return pickled_weights - - @app.route('/update', methods=['POST']) - def handle_update_parameters(): - delta = pickle.loads(request.data) - if self.mode == 'asynchronous': - self.lock.acquire_write() - - if not self.master_network.built: - self.master_network.build() - - # Just apply the gradient - weights_before = self.weights - self.weights = subtract_params(weights_before, delta) - - if self.mode == 'asynchronous': - self.lock.release() - return 'Update done' - - master_url = determine_master(self.port) - host = master_url.split(':')[0] - self.app.run(host=host, debug=self.debug, port=self.port, - threaded=self.threaded, use_reloader=self.use_reloader) - - -class SocketServer(BaseParameterServer): - """SocketServer - - A basic Python socket server - - """ - - def __init__(self, model, port=4000): - """Initializes a Socket server instance from a serializer Keras model - and a port to listen to. - - :param model: Serialized Keras model - :param port: int, port to run the socket on - """ - - self.model = dict_to_model(model) - self.port = port - self.socket = None - self.runs = False - self.connections = [] - self.lock = Lock() - self.thread = None - - def start(self): - if self.thread is not None: - self.stop() - self.thread = Thread(target=self.start_server) - self.thread.start() - - def stop(self): - self.stop_server() - self.thread.join() - self.thread = None - - def start_server(self): - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - sock.bind(('0.0.0.0', self.port)) - sock.listen(5) - self.socket = sock - self.runs = True - self.run() - - def stop_server(self): - self.runs = False - if self.socket: - for thread in self.connections: - thread.join() - del thread - self.socket.close() - sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - try: - sock.connect(("localhost", self.port)) - sock.close() - except Exception: - pass - self.socket = None - self.connections = [] - - def update_parameters(self, conn): - data = receive(conn) - delta = data['delta'] - with self.lock: - weights = self.model.get_weights() + delta - self.model.set_weights(weights) - - def get_parameters(self, conn): - with self.lock: - weights = self.model.get_weights() - send(conn, weights) - - def action_listener(self, conn): - while self.runs: - get_or_update = conn.recv(1).decode() - if get_or_update == 'u': - self.update_parameters(conn) - elif get_or_update == 'g': - self.get_parameters(conn) - else: - raise ValueError('Received invalid action') - - def run(self): - while self.runs: - try: - conn, addr = self.socket.accept() - thread = Thread(target=self.action_listener, args=(conn, addr)) - thread.start() - self.connections.append(thread) - except Exception: - print("Failed to set up socket connection.") -from .functional_utils import * -from .rdd_utils import * -from .serialization import * -from .sockets import * -from .rwlock import * -from __future__ import absolute_import - -import numpy as np -from six.moves import zip - - -def add_params(param_list_left, param_list_right): - """Add two lists of parameters one by one - - :param param_list_left: list of numpy arrays - :param param_list_right: list of numpy arrays - :return: list of numpy arrays - """ - res = [] - for x, y in zip(param_list_left, param_list_right): - res.append(x + y) - return res - - -def subtract_params(param_list_left, param_list_right): - """Subtract two lists of parameters - - :param param_list_left: list of numpy arrays - :param param_list_right: list of numpy arrays - :return: list of numpy arrays - """ - res = [] - for x, y in zip(param_list_left, param_list_right): - res.append(x - y) - return res - - -def get_neutral(array_list): - """Get list of zero-valued numpy arrays for - specified list of numpy arrays - - :param array_list: list of numpy arrays - :return: list of zeros of same shape as input - """ - res = [] - for x in array_list: - res.append(np.zeros_like(x)) - return res - - -def divide_by(array_list, num_workers): - """Divide a list of parameters by an integer num_workers. - - :param array_list: - :param num_workers: - :return: - """ - for i, x in enumerate(array_list): - array_list[i] /= num_workers - return array_list -def is_running_in_notebook(): - try: - cfg = get_ipython().config - if 'IPKernelApp' in cfg: - return True - else: - return False - except NameError: - return False -from __future__ import absolute_import - -from pyspark.mllib.regression import LabeledPoint -import numpy as np - -from ..mllib.adapter import to_vector, from_vector -try: - from elephas.java import java_classes - from elephas.java.ndarray import ndarray -except Exception: - print("WARNING") - -from six.moves import zip - - -def to_java_rdd(jsc, features, labels, batch_size): - """Convert numpy features and labels into a JavaRDD of - DL4J DataSet type. - - :param jsc: JavaSparkContext from pyjnius - :param features: numpy array with features - :param labels: numpy array with labels: - :return: JavaRDD - """ - data_sets = java_classes.ArrayList() - num_batches = int(len(features) / batch_size) - for i in range(num_batches): - xi = ndarray(features[:batch_size].copy()) - yi = ndarray(labels[:batch_size].copy()) - data_set = java_classes.DataSet(xi.array, yi.array) - data_sets.add(data_set) - features = features[batch_size:] - labels = labels[batch_size:] - - return jsc.parallelize(data_sets) - - -def to_simple_rdd(sc, features, labels): - """Convert numpy arrays of features and labels into - an RDD of pairs. - - :param sc: Spark context - :param features: numpy array with features - :param labels: numpy array with labels - :return: Spark RDD with feature-label pairs - """ - pairs = [(x, y) for x, y in zip(features, labels)] - return sc.parallelize(pairs) - - -def to_labeled_point(sc, features, labels, categorical=False): - """Convert numpy arrays of features and labels into - a LabeledPoint RDD for MLlib and ML integration. - - :param sc: Spark context - :param features: numpy array with features - :param labels: numpy array with labels - :param categorical: boolean, whether labels are already one-hot encoded or not - :return: LabeledPoint RDD with features and labels - """ - labeled_points = [] - for x, y in zip(features, labels): - if categorical: - lp = LabeledPoint(np.argmax(y), to_vector(x)) - else: - lp = LabeledPoint(y, to_vector(x)) - labeled_points.append(lp) - return sc.parallelize(labeled_points) - - -def from_labeled_point(rdd, categorical=False, nb_classes=None): - """Convert a LabeledPoint RDD back to a pair of numpy arrays - - :param rdd: LabeledPoint RDD - :param categorical: boolean, if labels should be one-hot encode when returned - :param nb_classes: optional int, indicating the number of class labels - :return: pair of numpy arrays, features and labels - """ - features = np.asarray( - rdd.map(lambda lp: from_vector(lp.features)).collect()) - labels = np.asarray(rdd.map(lambda lp: lp.label).collect(), dtype='int32') - if categorical: - if not nb_classes: - nb_classes = np.max(labels) + 1 - temp = np.zeros((len(labels), nb_classes)) - for i, label in enumerate(labels): - temp[i, label] = 1. - labels = temp - return features, labels - - -def encode_label(label, nb_classes): - """One-hot encoding of a single label - - :param label: class label (int or double without floating point digits) - :param nb_classes: int, number of total classes - :return: one-hot encoded vector - """ - encoded = np.zeros(nb_classes) - encoded[int(label)] = 1. - return encoded - - -def lp_to_simple_rdd(lp_rdd, categorical=False, nb_classes=None): - """Convert a LabeledPoint RDD into an RDD of feature-label pairs - - :param lp_rdd: LabeledPoint RDD of features and labels - :param categorical: boolean, if labels should be one-hot encode when returned - :param nb_classes: int, number of total classes - :return: Spark RDD with feature-label pairs - """ - if categorical: - if not nb_classes: - labels = np.asarray(lp_rdd.map( - lambda lp: lp.label).collect(), dtype='int32') - nb_classes = np.max(labels) + 1 - rdd = lp_rdd.map(lambda lp: (from_vector(lp.features), - encode_label(lp.label, nb_classes))) - else: - rdd = lp_rdd.map(lambda lp: (from_vector(lp.features), lp.label)) - return rdd -"""Simple reader-writer locks in Python -Many readers can hold the lock XOR one and only one writer -http://majid.info/blog/a-reader-writer-lock-for-python/ -""" -from __future__ import absolute_import -import threading - -version = """$Id: 04-1.html,v 1.3 2006/12/05 17:45:12 majid Exp $""" - - -class RWLock: - """ - A simple reader-writer lock Several readers can hold the lock - simultaneously, XOR one writer. Write locks have priority over reads to - prevent write starvation. - """ - - def __init__(self): - self.rwlock = 0 - self.writers_waiting = 0 - self.monitor = threading.Lock() - self.readers_ok = threading.Condition(self.monitor) - self.writers_ok = threading.Condition(self.monitor) - - def acquire_read(self): - """ - Acquire a read lock. Several threads can hold this typeof lock. - It is exclusive with write locks. - """ - self.monitor.acquire() - while self.rwlock < 0 or self.writers_waiting: - self.readers_ok.wait() - self.rwlock += 1 - self.monitor.release() - - def acquire_write(self): - """ - Acquire a write lock. Only one thread can hold this lock, and - only when no read locks are also held. - """ - self.monitor.acquire() - while self.rwlock != 0: - self.writers_waiting += 1 - self.writers_ok.wait() - self.writers_waiting -= 1 - self.rwlock = -1 - self.monitor.release() - - def release(self): - """ - Release a lock, whether read or write. - """ - self.monitor.acquire() - if self.rwlock < 0: - self.rwlock = 0 - else: - self.rwlock -= 1 - wake_writers = self.writers_waiting and self.rwlock == 0 - wake_readers = self.writers_waiting == 0 - self.monitor.release() - if wake_writers: - self.writers_ok.acquire() - self.writers_ok.notify() - self.writers_ok.release() - elif wake_readers: - self.readers_ok.acquire() - self.readers_ok.notifyAll() - self.readers_ok.release() -from keras.models import model_from_json - - -def model_to_dict(model): - """Turns a Keras model into a Python dictionary - - :param model: Keras model instance - :return: dictionary with model information - """ - return dict(model=model.to_json(), weights=model.get_weights()) - - -def dict_to_model(dict): - """Turns a Python dictionary with model architecture and weights - back into a Keras model - - :param dict: dictionary with `model` and `weights` keys. - :return: Keras model instantiated from dictionary - """ - model = model_from_json(dict['model']) - model.set_weights(dict['weights']) - return model -from six.moves import cPickle as pickle -from socket import gethostbyname, gethostname -import os - - -def determine_master(port=4000): - """Determine address of master so that workers - can connect to it. If the environment variable - SPARK_LOCAL_IP is set, that address will be used. - - :param port: port on which the application runs - :return: Master address - - Example usage: - SPARK_LOCAL_IP=127.0.0.1 spark-submit --master \ - local[8] examples/mllib_mlp.py - """ - if os.environ.get('SPARK_LOCAL_IP'): - return os.environ['SPARK_LOCAL_IP'] + ":" + str(port) - else: - return gethostbyname(gethostname()) + ":" + str(port) - - -def _receive_all(socket, num_bytes): - """Reads `num_bytes` bytes from the specified socket. - - :param socket: open socket instance - :param num_bytes: number of bytes to read - - :return: received data - """ - - buffer = '' - buffer_size = 0 - bytes_left = num_bytes - while buffer_size < num_bytes: - data = socket.recv(bytes_left) - delta = len(data) - buffer_size += delta - bytes_left -= delta - buffer += data - return buffer - - -def receive(socket, num_bytes=20): - """Receive data frame from open socket. - - :param socket: open socket instance - :param num_bytes: number of bytes to read - - :return: received data - """ - length = int(_receive_all(socket, num_bytes).decode()) - serialized_data = _receive_all(socket, length) - return pickle.loads(serialized_data) - - -def send(socket, data, num_bytes=20): - """Send data to specified socket. - - - :param socket: open socket instance - :param data: data to send - :param num_bytes: number of bytes to read - - :return: received data - """ - pickled_data = pickle.dumps(data, -1) - length = str(len(pickled_data)).zfill(num_bytes) - socket.sendall(length.encode()) - socket.sendall(pickled_data) -from __future__ import absolute_import -from __future__ import print_function - -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import SGD -from keras.utils import np_utils - -from elephas.spark_model import SparkModel -from elephas.utils.rdd_utils import to_simple_rdd - -import pytest -pytest.mark.usefixtures("spark_context") - - -def test_async_mode(spark_context): - # Define basic parameters - batch_size = 64 - nb_classes = 10 - epochs = 1 - - # Load data - (x_train, y_train), (x_test, y_test) = mnist.load_data() - - x_train = x_train.reshape(60000, 784) - x_test = x_test.reshape(10000, 784) - x_train = x_train.astype("float32") - x_test = x_test.astype("float32") - x_train /= 255 - x_test /= 255 - print(x_train.shape[0], 'train samples') - print(x_test.shape[0], 'test samples') - - # Convert class vectors to binary class matrices - y_train = np_utils.to_categorical(y_train, nb_classes) - y_test = np_utils.to_categorical(y_test, nb_classes) - - model = Sequential() - model.add(Dense(128, input_dim=784)) - model.add(Activation('relu')) - model.add(Dropout(0.2)) - model.add(Dense(128)) - model.add(Activation('relu')) - model.add(Dropout(0.2)) - model.add(Dense(10)) - model.add(Activation('softmax')) - - sgd = SGD(lr=0.1) - model.compile(sgd, 'categorical_crossentropy', ['acc']) - - # Build RDD from numpy features and labels - rdd = to_simple_rdd(spark_context, x_train, y_train) - - # Initialize SparkModel from Keras model and Spark context - spark_model = SparkModel(model, frequency='epoch', mode='asynchronous') - - # Train Spark model - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, - verbose=0, validation_split=0.1) - # Evaluate Spark model by evaluating the underlying model - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) - assert score[1] >= 0.7 - - -if __name__ == '__main__': - pytest.main([__file__]) -from __future__ import absolute_import -from __future__ import print_function - -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import SGD -from keras.utils import np_utils - -from elephas.spark_model import SparkModel -from elephas.utils.rdd_utils import to_simple_rdd - -from pyspark import SparkContext, SparkConf - -import pytest -pytest.mark.usefixtures("spark_context") - - -def test_sync_mode(spark_context): - # Define basic parameters - batch_size = 64 - nb_classes = 10 - epochs = 10 - - # Load data - (x_train, y_train), (x_test, y_test) = mnist.load_data() - - x_train = x_train.reshape(60000, 784) - x_test = x_test.reshape(10000, 784) - x_train = x_train.astype("float32") - x_test = x_test.astype("float32") - x_train /= 255 - x_test /= 255 - print(x_train.shape[0], 'train samples') - print(x_test.shape[0], 'test samples') - - # Convert class vectors to binary class matrices - y_train = np_utils.to_categorical(y_train, nb_classes) - y_test = np_utils.to_categorical(y_test, nb_classes) - - model = Sequential() - model.add(Dense(128, input_dim=784)) - model.add(Activation('relu')) - model.add(Dropout(0.2)) - model.add(Dense(128)) - model.add(Activation('relu')) - model.add(Dropout(0.2)) - model.add(Dense(10)) - model.add(Activation('softmax')) - - sgd = SGD(lr=0.1) - model.compile(sgd, 'categorical_crossentropy', ['acc']) - - # Build RDD from numpy features and labels - rdd = to_simple_rdd(spark_context, x_train, y_train) - - # Initialize SparkModel from Keras model and Spark context - spark_model = SparkModel(model, mode='synchronous') - - # Train Spark model - spark_model.fit(rdd, epochs=epochs, batch_size=batch_size, - verbose=2, validation_split=0.1) - - # Evaluate Spark model by evaluating the underlying model - score = spark_model.master_network.evaluate(x_test, y_test, verbose=2) - assert score[1] >= 0.70 - - -if __name__ == '__main__': - pytest.main([__file__]) -import numpy as np -from elephas.ml import adapter -import pytest -pytest.mark.usefixtures("spark_context") - - -def test_to_data_frame(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[2.0], [1.0]]) - - data_frame = adapter.to_data_frame( - spark_context, features, labels, categorical=False) - assert data_frame.count() == 2 - - -def test_to_data_frame_cat(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) - - data_frame = adapter.to_data_frame( - spark_context, features, labels, categorical=True) - assert data_frame.count() == 2 - - -def test_from_data_frame(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[2.0], [1.0]]).reshape((2,)) - - data_frame = adapter.to_data_frame( - spark_context, features, labels, categorical=False) - - x, y = adapter.from_data_frame(data_frame, categorical=False) - assert features.shape == x.shape - assert labels.shape == y.shape - - -def test_from_data_frame_cat(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) - - data_frame = adapter.to_data_frame( - spark_context, features, labels, categorical=True) - - x, y = adapter.from_data_frame(data_frame, categorical=True, nb_classes=3) - assert features.shape == x.shape - assert labels.shape == y.shape - - -def test_df_to_simple_rdd(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[2.0], [1.0]]).reshape((2,)) - - data_frame = adapter.to_data_frame( - spark_context, features, labels, categorical=False) - - rdd = adapter.df_to_simple_rdd(data_frame, False) - assert rdd.count() == 2 -from elephas.ml.params import * - - -def test_has_keras_model_config(): - param = HasKerasModelConfig() - conf = {"foo": "bar"} - param.set_keras_model_config(conf) - assert conf == param.get_keras_model_config() - - -def test_has_optimizer_config(): - param = HasKerasOptimizerConfig() - conf = {"foo": "bar"} - param.set_optimizer_config(conf) - assert conf == param.get_optimizer_config() - - -def test_has_mode(): - param = HasMode() - assert param.get_mode() == "asynchronous" - mode = "foobar" - param.set_mode(mode) - assert param.get_mode() == mode - - -def test_has_frequency(): - param = HasFrequency() - assert param.get_frequency() == "epoch" - freq = "foobar" - param.set_frequency(freq) - assert param.get_frequency() == freq - - -def test_has_number_of_classes(): - param = HasNumberOfClasses() - assert param.get_nb_classes() == 10 - classes = 42 - param.set_nb_classes(classes) - assert param.get_nb_classes() == classes - - -def test_has_categorical_labels(): - param = HasCategoricalLabels() - assert param.get_categorical_labels() - has_labels = False - param.set_categorical_labels(has_labels) - assert param.get_categorical_labels() == has_labels - - -def test_has_epochs(): - param = HasEpochs() - assert param.get_epochs() == 10 - epochs = 42 - param.set_epochs(epochs) - assert param.get_epochs() == epochs - - -def test_has_batch_size(): - param = HasBatchSize() - assert param.get_batch_size() == 32 - bs = 42 - param.set_batch_size(bs) - assert param.get_batch_size() == bs - - -def test_has_verbosity(): - param = HasVerbosity() - assert param.get_verbosity() == 0 - verbosity = 2 - param.set_verbosity(verbosity) - assert param.get_verbosity() == verbosity - - -def test_has_validation_split(): - param = HasValidationSplit() - assert param.get_validation_split() == 0.1 - split = 0.5 - param.set_validation_split(split) - assert param.get_validation_split() == split - - -def test_has_number_of_workers(): - param = HasNumberOfWorkers() - assert param.get_num_workers() == 8 - workers = 12 - param.set_num_workers(workers) - assert param.get_num_workers() == workers -import numpy as np -from elephas.mllib.adapter import * -from pyspark.mllib.linalg import Matrices, Vectors - - -def test_to_matrix(): - x = np.ones((4, 2)) - mat = to_matrix(x) - assert mat.numRows == 4 - assert mat.numCols == 2 - - -def test_from_matrix(): - mat = Matrices.dense(1, 2, [13, 37]) - x = from_matrix(mat) - assert x.shape == (1, 2) - - -def test_from_vector(): - x = np.ones((3,)) - vector = to_vector(x) - assert len(vector) == 3 - - -def test_to_vector(): - vector = Vectors.dense([4, 2]) - x = from_vector(vector) - assert x.shape == (2,) -# TODO test clients -# TODO test servers -import pytest -import numpy as np -from elephas.utils import functional_utils - -pytest.mark.usefixtures("spark_context") - - -def test_add_params(): - p1 = [np.ones((5, 5)) for _ in range(10)] - p2 = [np.ones((5, 5)) for _ in range(10)] - - res = functional_utils.add_params(p1, p2) - assert len(res) == 10 - for i in range(5): - for j in range(5): - assert res[0][i, j] == 2 - - -def test_subtract_params(): - p1 = [np.ones((5, 5)) for _ in range(10)] - p2 = [np.ones((5, 5)) for _ in range(10)] - - res = functional_utils.subtract_params(p1, p2) - - assert len(res) == 10 - for i in range(5): - for j in range(5): - assert res[0][i, j] == 0 - - -def test_get_neutral(): - x = [np.ones((3, 4))] - res = functional_utils.get_neutral(x) - assert res[0].shape == x[0].shape - assert res[0][0, 0] == 0 - - -def test_divide_by(): - x = [np.ones((3, 4))] - res = functional_utils.divide_by(x, num_workers=10) - assert res[0].shape == x[0].shape - assert res[0][0, 0] == 0.1 -import pytest -import numpy as np -from elephas.utils import rdd_utils - -pytest.mark.usefixtures("spark_context") - - -def test_to_simple_rdd(spark_context): - features = np.ones((5, 10)) - labels = np.ones((5,)) - rdd = rdd_utils.to_simple_rdd(spark_context, features, labels) - - assert rdd.count() == 5 - first = rdd.first() - assert first[0].shape == (10,) - assert first[1] == 1.0 - - -def test_to_labeled_rdd_categorical(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) - lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, True) - assert lp_rdd.count() == 2 - first = lp_rdd.first() - assert first.features.shape == (10,) - assert first.label == 2.0 - - -def test_to_labeled_rdd_not_categorical(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[2.0], [1.0]]) - lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, False) - assert lp_rdd.count() == 2 - first = lp_rdd.first() - assert first.features.shape == (10,) - assert first.label == 2.0 - - -def test_from_labeled_rdd(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[2.0], [1.0]]).reshape((2,)) - lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, False) - - x, y = rdd_utils.from_labeled_point(lp_rdd, False, None) - assert x.shape == features.shape - assert y.shape == labels.shape - - -def test_from_labeled_rdd_categorical(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) - lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, True) - - x, y = rdd_utils.from_labeled_point(lp_rdd, True, 3) - assert x.shape == features.shape - assert y.shape == labels.shape - - -def test_encode_label(): - label = 3 - nb_classes = 10 - encoded = rdd_utils.encode_label(label, nb_classes) - assert len(encoded) == nb_classes - for i in range(10): - if i == label: - assert encoded[i] == 1 - else: - assert encoded[i] == 0 - - -def test_lp_to_simple_rdd_categorical(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[0, 0, 1.0], [0, 1.0, 0]]) - lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, True) - - rdd = rdd_utils.lp_to_simple_rdd(lp_rdd, categorical=True, nb_classes=3) - assert rdd.first()[0].shape == (10,) - assert rdd.first()[1].shape == (3,) - - -def test_lp_to_simple_rdd_not_categorical(spark_context): - features = np.ones((2, 10)) - labels = np.asarray([[2.0], [1.0]]).reshape((2,)) - lp_rdd = rdd_utils.to_labeled_point(spark_context, features, labels, False) - - rdd = rdd_utils.lp_to_simple_rdd(lp_rdd, categorical=False, nb_classes=3) - assert rdd.first()[0].shape == (10,) - assert rdd.first()[1] == 2.0 -# TODO test lock -import pytest -from keras.models import Sequential -from elephas.utils import serialization - - -def test_model_to_dict(): - model = Sequential() - dict_model = serialization.model_to_dict(model) - assert dict_model.keys() == ['model', 'weights'] - - -def test_dict_to_model(): - model = Sequential() - dict_model = serialization.model_to_dict(model) - - recovered = serialization.dict_to_model(dict_model) - assert recovered.to_json() == model.to_json() -# TODO test sockets -''' -Author: Ji-Sung Kim -Project: deepjazz -Purpose: Generate jazz using a deep learning model (LSTM in deepjazz). - -Some code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml -with express permission. - -Code was built while significantly referencing public examples from the -Keras documentation on GitHub: -https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py - -GPU run command: - THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python generator.py [# of epochs] - - Note: running Keras/Theano on GPU is formally supported for only NVIDIA cards (CUDA backend). -''' -from __future__ import print_function -import sys - -from music21 import * -import numpy as np - -from grammar import * -from preprocess import * -from qa import * -import lstm - -#----------------------------HELPER FUNCTIONS----------------------------------# - -''' Helper function to sample an index from a probability array ''' - - -def __sample(a, temperature=1.0): - a = np.log(a) / temperature - a = np.exp(a) / np.sum(np.exp(a)) - return np.argmax(np.random.multinomial(1, a, 1)) - - -''' Helper function to generate a predicted value from a given matrix ''' - - -def __predict(model, x, indices_val, diversity): - preds = model.predict(x, verbose=0)[0] - next_index = __sample(preds, diversity) - next_val = indices_val[next_index] - - return next_val - - -''' Helper function which uses the given model to generate a grammar sequence - from a given corpus, indices_val (mapping), abstract_grammars (list), - and diversity floating point value. ''' - - -def __generate_grammar(model, corpus, abstract_grammars, values, val_indices, - indices_val, max_len, max_tries, diversity): - curr_grammar = '' - # np.random.randint is exclusive to high - start_index = np.random.randint(0, len(corpus) - max_len) - sentence = corpus[start_index: start_index + max_len] # seed - running_length = 0.0 - while running_length <= 4.1: # arbitrary, from avg in input file - # transform sentence (previous sequence) to matrix - x = np.zeros((1, max_len, len(values))) - for t, val in enumerate(sentence): - if (not val in val_indices): - print(val) - x[0, t, val_indices[val]] = 1. - - next_val = __predict(model, x, indices_val, diversity) - - # fix first note: must not have < > and not be a rest - if (running_length < 0.00001): - tries = 0 - while (next_val.split(',')[0] == 'R' or - len(next_val.split(',')) != 2): - # give up after 1000 tries; random from input's first notes - if tries >= max_tries: - print('Gave up on first note generation after', max_tries, - 'tries') - # np.random is exclusive to high - rand = np.random.randint(0, len(abstract_grammars)) - next_val = abstract_grammars[rand].split(' ')[0] - else: - next_val = __predict(model, x, indices_val, diversity) - - tries += 1 - - # shift sentence over with new value - sentence = sentence[1:] - sentence.append(next_val) - - # except for first case, add a ' ' separator - if (running_length > 0.00001): - curr_grammar += ' ' - curr_grammar += next_val - - length = float(next_val.split(',')[1]) - running_length += length - - return curr_grammar - - -#----------------------------PUBLIC FUNCTIONS----------------------------------# -''' Generates musical sequence based on the given data filename and settings. - Plays then stores (MIDI file) the generated output. ''' - - -def generate(data_fn, out_fn, N_epochs): - # model settings - max_len = 20 - max_tries = 1000 - diversity = 0.5 - - # musical settings - bpm = 130 - - # get data - chords, abstract_grammars = get_musical_data(data_fn) - corpus, values, val_indices, indices_val = get_corpus_data( - abstract_grammars) - print('corpus length:', len(corpus)) - print('total # of values:', len(values)) - - # build model - model = lstm.build_model(corpus=corpus, val_indices=val_indices, - max_len=max_len, N_epochs=N_epochs) - - # set up audio stream - out_stream = stream.Stream() - - # generation loop - curr_offset = 0.0 - loopEnd = len(chords) - for loopIndex in range(1, loopEnd): - # get chords from file - curr_chords = stream.Voice() - for j in chords[loopIndex]: - curr_chords.insert((j.offset % 4), j) - - # generate grammar - curr_grammar = __generate_grammar(model=model, corpus=corpus, - abstract_grammars=abstract_grammars, - values=values, val_indices=val_indices, - indices_val=indices_val, - max_len=max_len, max_tries=max_tries, - diversity=diversity) - - curr_grammar = curr_grammar.replace(' A', ' C').replace(' X', ' C') - - # Pruning #1: smoothing measure - curr_grammar = prune_grammar(curr_grammar) - - # Get notes from grammar and chords - curr_notes = unparse_grammar(curr_grammar, curr_chords) - - # Pruning #2: removing repeated and too close together notes - curr_notes = prune_notes(curr_notes) - - # quality assurance: clean up notes - curr_notes = clean_up_notes(curr_notes) - - # print # of notes in curr_notes - print('After pruning: %s notes' % (len([i for i in curr_notes - if isinstance(i, note.Note)]))) - - # insert into the output stream - for m in curr_notes: - out_stream.insert(curr_offset + m.offset, m) - for mc in curr_chords: - out_stream.insert(curr_offset + mc.offset, mc) - - curr_offset += 4.0 - - out_stream.insert(0.0, tempo.MetronomeMark(number=bpm)) - - # Play the final stream through output (see 'play' lambda function above) - def play(x): return midi.realtime.StreamPlayer(x).play() - play(out_stream) - - # save stream - mf = midi.translate.streamToMidiFile(out_stream) - mf.open(out_fn, 'wb') - mf.write() - mf.close() - - -''' Runs generate() -- generating, playing, then storing a musical sequence -- - with the default Metheny file. ''' - - -def main(args): - try: - N_epochs = int(args[1]) - except: - N_epochs = 128 # default - - # i/o settings - data_fn = 'midi/' + 'original_metheny.mid' # 'And Then I Knew' by Pat Metheny - out_fn = 'midi/' 'deepjazz_on_metheny...' + str(N_epochs) - if (N_epochs == 1): - out_fn += '_epoch.midi' - else: - out_fn += '_epochs.midi' - - generate(data_fn, out_fn, N_epochs) - - -''' If run as script, execute main ''' -if __name__ == '__main__': - import sys - main(sys.argv) -''' -Author: Ji-Sung Kim, Evan Chow -Project: jazzml / (used in) deepjazz -Purpose: Extract, manipulate, process musical grammar - -Directly taken then cleaned up from Evan Chow's jazzml, -https://github.com/evancchow/jazzml,with permission. -''' - -from collections import OrderedDict, defaultdict -from itertools import groupby -from music21 import * -import copy -import random -import pdb - -''' Helper function to determine if a note is a scale tone. ''' - - -def __is_scale_tone(chord, note): - # Method: generate all scales that have the chord notes th check if note is - # in names - - # Derive major or minor scales (minor if 'other') based on the quality - # of the chord. - scaleType = scale.DorianScale() # i.e. minor pentatonic - if chord.quality == 'major': - scaleType = scale.MajorScale() - # Can change later to deriveAll() for flexibility. If so then use list - # comprehension of form [x for a in b for x in a]. - scales = scaleType.derive(chord) # use deriveAll() later for flexibility - allPitches = list(set([pitch for pitch in scales.getPitches()])) - allNoteNames = [i.name for i in allPitches] # octaves don't matter - - # Get note name. Return true if in the list of note names. - noteName = note.name - return (noteName in allNoteNames) - - -''' Helper function to determine if a note is an approach tone. ''' - - -def __is_approach_tone(chord, note): - # Method: see if note is +/- 1 a chord tone. - - for chordPitch in chord.pitches: - stepUp = chordPitch.transpose(1) - stepDown = chordPitch.transpose(-1) - if (note.name == stepDown.name or - note.name == stepDown.getEnharmonic().name or - note.name == stepUp.name or - note.name == stepUp.getEnharmonic().name): - return True - return False - - -''' Helper function to determine if a note is a chord tone. ''' - - -def __is_chord_tone(lastChord, note): - return (note.name in (p.name for p in lastChord.pitches)) - - -''' Helper function to generate a chord tone. ''' - - -def __generate_chord_tone(lastChord): - lastChordNoteNames = [p.nameWithOctave for p in lastChord.pitches] - return note.Note(random.choice(lastChordNoteNames)) - - -''' Helper function to generate a scale tone. ''' - - -def __generate_scale_tone(lastChord): - # Derive major or minor scales (minor if 'other') based on the quality - # of the lastChord. - scaleType = scale.WeightedHexatonicBlues() # minor pentatonic - if lastChord.quality == 'major': - scaleType = scale.MajorScale() - # Can change later to deriveAll() for flexibility. If so then use list - # comprehension of form [x for a in b for x in a]. - # use deriveAll() later for flexibility - scales = scaleType.derive(lastChord) - allPitches = list(set([pitch for pitch in scales.getPitches()])) - allNoteNames = [i.name for i in allPitches] # octaves don't matter - - # Return a note (no octave here) in a scale that matches the lastChord. - sNoteName = random.choice(allNoteNames) - lastChordSort = lastChord.sortAscending() - sNoteOctave = random.choice([i.octave for i in lastChordSort.pitches]) - sNote = note.Note(("%s%s" % (sNoteName, sNoteOctave))) - return sNote - - -''' Helper function to generate an approach tone. ''' - - -def __generate_approach_tone(lastChord): - sNote = __generate_scale_tone(lastChord) - aNote = sNote.transpose(random.choice([1, -1])) - return aNote - - -''' Helper function to generate a random tone. ''' - - -def __generate_arbitrary_tone(lastChord): - return __generate_scale_tone(lastChord) # fix later, make random note. - - -''' Given the notes in a measure ('measure') and the chords in that measure - ('chords'), generate a list of abstract grammatical symbols to represent - that measure as described in GTK's "Learning Jazz Grammars" (2009). - - Inputs: - 1) "measure" : a stream.Voice object where each element is a - note.Note or note.Rest object. - - >>> m1 - - >>> m1[0] - - >>> m1[1] - - - Can have instruments and other elements, removes them here. - - 2) "chords" : a stream.Voice object where each element is a chord.Chord. - - >>> c1 - - >>> c1[0] - - >>> c1[1] - - - Can have instruments and other elements, removes them here. - - Outputs: - 1) "fullGrammar" : a string that holds the abstract grammar for measure. - Format: - (Remember, these are DURATIONS not offsets!) - "R,0.125" : a rest element of (1/32) length, or 1/8 quarter note. - "C,0.125" : chord note of (1/32) length, generated - anywhere from minor 6th down to major 2nd down. - (interval is not ordered). ''' - - -def parse_melody(fullMeasureNotes, fullMeasureChords): - # Remove extraneous elements.x - measure = copy.deepcopy(fullMeasureNotes) - chords = copy.deepcopy(fullMeasureChords) - measure.removeByNotOfClass([note.Note, note.Rest]) - chords.removeByNotOfClass([chord.Chord]) - - # Information for the start of the measure. - # 1) measureStartTime: the offset for measure's start, e.g. 476.0. - # 2) measureStartOffset: how long from the measure start to the first element. - measureStartTime = measure[0].offset - (measure[0].offset % 4) - measureStartOffset = measure[0].offset - measureStartTime - - # Iterate over the notes and rests in measure, finding the grammar for each - # note in the measure and adding an abstract grammatical string for it. - - fullGrammar = "" - prevNote = None # Store previous note. Need for interval. - numNonRests = 0 # Number of non-rest elements. Need for updating prevNote. - for ix, nr in enumerate(measure): - # Get the last chord. If no last chord, then (assuming chords is of length - # >0) shift first chord in chords to the beginning of the measure. - try: - lastChord = [n for n in chords if n.offset <= nr.offset][-1] - except IndexError: - chords[0].offset = measureStartTime - lastChord = [n for n in chords if n.offset <= nr.offset][-1] - - # FIRST, get type of note, e.g. R for Rest, C for Chord, etc. - # Dealing with solo notes here. If unexpected chord: still call 'C'. - elementType = ' ' - # R: First, check if it's a rest. Clearly a rest --> only one possibility. - if isinstance(nr, note.Rest): - elementType = 'R' - # C: Next, check to see if note pitch is in the last chord. - elif nr.name in lastChord.pitchNames or isinstance(nr, chord.Chord): - elementType = 'C' - # L: (Complement tone) Skip this for now. - # S: Check if it's a scale tone. - elif __is_scale_tone(lastChord, nr): - elementType = 'S' - # A: Check if it's an approach tone, i.e. +-1 halfstep chord tone. - elif __is_approach_tone(lastChord, nr): - elementType = 'A' - # X: Otherwise, it's an arbitrary tone. Generate random note. - else: - elementType = 'X' - - # SECOND, get the length for each element. e.g. 8th note = R8, but - # to simplify things you'll use the direct num, e.g. R,0.125 - if (ix == (len(measure)-1)): - # formula for a in "a - b": start of measure (e.g. 476) + 4 - diff = measureStartTime + 4.0 - nr.offset - else: - diff = measure[ix + 1].offset - nr.offset - - # Combine into the note info. - noteInfo = "%s,%.3f" % (elementType, nr.quarterLength) # back to diff - - # THIRD, get the deltas (max range up, max range down) based on where - # the previous note was, +- minor 3. Skip rests (don't affect deltas). - intervalInfo = "" - if isinstance(nr, note.Note): - numNonRests += 1 - if numNonRests == 1: - prevNote = nr - else: - noteDist = interval.Interval(noteStart=prevNote, noteEnd=nr) - noteDistUpper = interval.add([noteDist, "m3"]) - noteDistLower = interval.subtract([noteDist, "m3"]) - intervalInfo = ",<%s,%s>" % (noteDistUpper.directedName, - noteDistLower.directedName) - # print "Upper, lower: %s, %s" % (noteDistUpper, - # noteDistLower) - # print "Upper, lower dnames: %s, %s" % ( - # noteDistUpper.directedName, - # noteDistLower.directedName) - # print "The interval: %s" % (intervalInfo) - prevNote = nr - - # Return. Do lazy evaluation for real-time performance. - grammarTerm = noteInfo + intervalInfo - fullGrammar += (grammarTerm + " ") - - return fullGrammar.rstrip() - - -''' Given a grammar string and chords for a measure, returns measure notes. ''' - - -def unparse_grammar(m1_grammar, m1_chords): - m1_elements = stream.Voice() - currOffset = 0.0 # for recalculate last chord. - prevElement = None - for ix, grammarElement in enumerate(m1_grammar.split(' ')): - terms = grammarElement.split(',') - currOffset += float(terms[1]) # works just fine - - # Case 1: it's a rest. Just append - if terms[0] == 'R': - rNote = note.Rest(quarterLength=float(terms[1])) - m1_elements.insert(currOffset, rNote) - continue - - # Get the last chord first so you can find chord note, scale note, etc. - try: - lastChord = [n for n in m1_chords if n.offset <= currOffset][-1] - except IndexError: - m1_chords[0].offset = 0.0 - lastChord = [n for n in m1_chords if n.offset <= currOffset][-1] - - # Case: no < > (should just be the first note) so generate from range - # of lowest chord note to highest chord note (if not a chord note, else - # just generate one of the actual chord notes). - - # Case #1: if no < > to indicate next note range. Usually this lack of < > - # is for the first note (no precedent), or for rests. - if (len(terms) == 2): # Case 1: if no < >. - insertNote = note.Note() # default is C - - # Case C: chord note. - if terms[0] == 'C': - insertNote = __generate_chord_tone(lastChord) - - # Case S: scale note. - elif terms[0] == 'S': - insertNote = __generate_scale_tone(lastChord) - - # Case A: approach note. - # Handle both A and X notes here for now. - else: - insertNote = __generate_approach_tone(lastChord) - - # Update the stream of generated notes - insertNote.quarterLength = float(terms[1]) - if insertNote.octave < 4: - insertNote.octave = 4 - m1_elements.insert(currOffset, insertNote) - prevElement = insertNote - - # Case #2: if < > for the increment. Usually for notes after the first one. - else: - # Get lower, upper intervals and notes. - interval1 = interval.Interval(terms[2].replace("<", '')) - interval2 = interval.Interval(terms[3].replace(">", '')) - if interval1.cents > interval2.cents: - upperInterval, lowerInterval = interval1, interval2 - else: - upperInterval, lowerInterval = interval2, interval1 - lowPitch = interval.transposePitch( - prevElement.pitch, lowerInterval) - highPitch = interval.transposePitch( - prevElement.pitch, upperInterval) - numNotes = int(highPitch.ps - lowPitch.ps + 1) # for range(s, e) - - # Case C: chord note, must be within increment (terms[2]). - # First, transpose note with lowerInterval to get note that is - # the lower bound. Then iterate over, and find valid notes. Then - # choose randomly from those. - - if terms[0] == 'C': - relevantChordTones = [] - for i in xrange(0, numNotes): - currNote = note.Note( - lowPitch.transpose(i).simplifyEnharmonic()) - if __is_chord_tone(lastChord, currNote): - relevantChordTones.append(currNote) - if len(relevantChordTones) > 1: - insertNote = random.choice([i for i in relevantChordTones - if i.nameWithOctave != prevElement.nameWithOctave]) - elif len(relevantChordTones) == 1: - insertNote = relevantChordTones[0] - else: # if no choices, set to prev element +-1 whole step - insertNote = prevElement.transpose(random.choice([-2, 2])) - if insertNote.octave < 3: - insertNote.octave = 3 - insertNote.quarterLength = float(terms[1]) - m1_elements.insert(currOffset, insertNote) - - # Case S: scale note, must be within increment. - elif terms[0] == 'S': - relevantScaleTones = [] - for i in xrange(0, numNotes): - currNote = note.Note( - lowPitch.transpose(i).simplifyEnharmonic()) - if __is_scale_tone(lastChord, currNote): - relevantScaleTones.append(currNote) - if len(relevantScaleTones) > 1: - insertNote = random.choice([i for i in relevantScaleTones - if i.nameWithOctave != prevElement.nameWithOctave]) - elif len(relevantScaleTones) == 1: - insertNote = relevantScaleTones[0] - else: # if no choices, set to prev element +-1 whole step - insertNote = prevElement.transpose(random.choice([-2, 2])) - if insertNote.octave < 3: - insertNote.octave = 3 - insertNote.quarterLength = float(terms[1]) - m1_elements.insert(currOffset, insertNote) - - # Case A: approach tone, must be within increment. - # For now: handle both A and X cases. - else: - relevantApproachTones = [] - for i in xrange(0, numNotes): - currNote = note.Note( - lowPitch.transpose(i).simplifyEnharmonic()) - if __is_approach_tone(lastChord, currNote): - relevantApproachTones.append(currNote) - if len(relevantApproachTones) > 1: - insertNote = random.choice([i for i in relevantApproachTones - if i.nameWithOctave != prevElement.nameWithOctave]) - elif len(relevantApproachTones) == 1: - insertNote = relevantApproachTones[0] - else: # if no choices, set to prev element +-1 whole step - insertNote = prevElement.transpose(random.choice([-2, 2])) - if insertNote.octave < 3: - insertNote.octave = 3 - insertNote.quarterLength = float(terms[1]) - m1_elements.insert(currOffset, insertNote) - - # update the previous element. - prevElement = insertNote - - return m1_elements -''' -Author: Ji-Sung Kim -Project: deepjazz -Purpose: Builds an LSTM, a type of recurrent neural network (RNN). - -Code was built while significantly referencing public examples from the -Keras documentation on GitHub: -https://github.com/fchollet/keras/blob/master/examples/lstm_text_generation.py -''' - -from __future__ import print_function - -from keras.models import Sequential -from keras.layers.core import Dense, Activation, Dropout -from keras.layers.recurrent import LSTM -import numpy as np - -''' Build a 2-layer LSTM from a training corpus ''' - - -def build_model(corpus, val_indices, max_len, N_epochs=128): - # number of different values or words in corpus - N_values = len(set(corpus)) - - # cut the corpus into semi-redundant sequences of max_len values - step = 3 - sentences = [] - next_values = [] - for i in range(0, len(corpus) - max_len, step): - sentences.append(corpus[i: i + max_len]) - next_values.append(corpus[i + max_len]) - print('nb sequences:', len(sentences)) - - # transform data into binary matrices - X = np.zeros((len(sentences), max_len, N_values), dtype=np.bool) - y = np.zeros((len(sentences), N_values), dtype=np.bool) - for i, sentence in enumerate(sentences): - for t, val in enumerate(sentence): - X[i, t, val_indices[val]] = 1 - y[i, val_indices[next_values[i]]] = 1 - - # build a 2 stacked LSTM - model = Sequential() - model.add(LSTM(128, return_sequences=True, - input_shape=(max_len, N_values))) - model.add(Dropout(0.2)) - model.add(LSTM(128, return_sequences=False)) - model.add(Dropout(0.2)) - model.add(Dense(N_values)) - model.add(Activation('softmax')) - - model.compile(loss='categorical_crossentropy', optimizer='rmsprop') - - model.fit(X, y, batch_size=128, nb_epoch=N_epochs) - - return model -''' -Author: Ji-Sung Kim -Project: deepjazz -Purpose: Parse, cleanup and process data. - -Code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml with -express permission. -''' - -from __future__ import print_function - -from music21 import * -from collections import defaultdict, OrderedDict -from itertools import groupby, izip_longest -from grammar import * - -#----------------------------HELPER FUNCTIONS----------------------------------# - -''' Helper function to parse a MIDI file into its measures and chords ''' - - -def __parse_midi(data_fn): - # Parse the MIDI data for separate melody and accompaniment parts. - midi_data = converter.parse(data_fn) - # Get melody part, compress into single voice. - melody_stream = midi_data[5] # For Metheny piece, Melody is Part #5. - melody1, melody2 = melody_stream.getElementsByClass(stream.Voice) - for j in melody2: - melody1.insert(j.offset, j) - melody_voice = melody1 - - for i in melody_voice: - if i.quarterLength == 0.0: - i.quarterLength = 0.25 - - # Change key signature to adhere to comp_stream (1 sharp, mode = major). - # Also add Electric Guitar. - melody_voice.insert(0, instrument.ElectricGuitar()) - melody_voice.insert(0, key.KeySignature(sharps=1, mode='major')) - - # The accompaniment parts. Take only the best subset of parts from - # the original data. Maybe add more parts, hand-add valid instruments. - # Should add least add a string part (for sparse solos). - # Verified are good parts: 0, 1, 6, 7 ''' - partIndices = [0, 1, 6, 7] - comp_stream = stream.Voice() - comp_stream.append([j.flat for i, j in enumerate(midi_data) - if i in partIndices]) - - # Full stream containing both the melody and the accompaniment. - # All parts are flattened. - full_stream = stream.Voice() - for i in xrange(len(comp_stream)): - full_stream.append(comp_stream[i]) - full_stream.append(melody_voice) - - # Extract solo stream, assuming you know the positions ..ByOffset(i, j). - # Note that for different instruments (with stream.flat), you NEED to use - # stream.Part(), not stream.Voice(). - # Accompanied solo is in range [478, 548) - solo_stream = stream.Voice() - for part in full_stream: - curr_part = stream.Part() - curr_part.append(part.getElementsByClass(instrument.Instrument)) - curr_part.append(part.getElementsByClass(tempo.MetronomeMark)) - curr_part.append(part.getElementsByClass(key.KeySignature)) - curr_part.append(part.getElementsByClass(meter.TimeSignature)) - curr_part.append(part.getElementsByOffset(476, 548, - includeEndBoundary=True)) - cp = curr_part.flat - solo_stream.insert(cp) - - # Group by measure so you can classify. - # Note that measure 0 is for the time signature, metronome, etc. which have - # an offset of 0.0. - melody_stream = solo_stream[-1] - measures = OrderedDict() - offsetTuples = [(int(n.offset / 4), n) for n in melody_stream] - measureNum = 0 # for now, don't use real m. nums (119, 120) - for key_x, group in groupby(offsetTuples, lambda x: x[0]): - measures[measureNum] = [n[1] for n in group] - measureNum += 1 - - # Get the stream of chords. - # offsetTuples_chords: group chords by measure number. - chordStream = solo_stream[0] - chordStream.removeByClass(note.Rest) - chordStream.removeByClass(note.Note) - offsetTuples_chords = [(int(n.offset / 4), n) for n in chordStream] - - # Generate the chord structure. Use just track 1 (piano) since it is - # the only instrument that has chords. - # Group into 4s, just like before. - chords = OrderedDict() - measureNum = 0 - for key_x, group in groupby(offsetTuples_chords, lambda x: x[0]): - chords[measureNum] = [n[1] for n in group] - measureNum += 1 - - # Fix for the below problem. - # 1) Find out why len(measures) != len(chords). - # ANSWER: resolves at end but melody ends 1/16 before last measure so doesn't - # actually show up, while the accompaniment's beat 1 right after does. - # Actually on second thought: melody/comp start on Ab, and resolve to - # the same key (Ab) so could actually just cut out last measure to loop. - # Decided: just cut out the last measure. - del chords[len(chords) - 1] - assert len(chords) == len(measures) - - return measures, chords - - -''' Helper function to get the grammatical data from given musical data. ''' - - -def __get_abstract_grammars(measures, chords): - # extract grammars - abstract_grammars = [] - for ix in xrange(1, len(measures)): - m = stream.Voice() - for i in measures[ix]: - m.insert(i.offset, i) - c = stream.Voice() - for j in chords[ix]: - c.insert(j.offset, j) - parsed = parse_melody(m, c) - abstract_grammars.append(parsed) - - return abstract_grammars - -#----------------------------PUBLIC FUNCTIONS----------------------------------# - - -''' Get musical data from a MIDI file ''' - - -def get_musical_data(data_fn): - measures, chords = __parse_midi(data_fn) - abstract_grammars = __get_abstract_grammars(measures, chords) - - return chords, abstract_grammars - - -''' Get corpus data from grammatical data ''' - - -def get_corpus_data(abstract_grammars): - corpus = [x for sublist in abstract_grammars for x in sublist.split(' ')] - values = set(corpus) - val_indices = dict((v, i) for i, v in enumerate(values)) - indices_val = dict((i, v) for i, v in enumerate(values)) - - return corpus, values, val_indices, indices_val -''' -Author: Ji-Sung Kim, Evan Chow -Project: deepjazz -Purpose: Provide pruning and cleanup functions. - -Code adapted from Evan Chow's jazzml, https://github.com/evancchow/jazzml -with express permission. -''' -from itertools import izip_longest -import random - -from music21 import * - -#----------------------------HELPER FUNCTIONS----------------------------------# - -''' Helper function to down num to the nearest multiple of mult. ''' - - -def __roundDown(num, mult): - return (float(num) - (float(num) % mult)) - - -''' Helper function to round up num to nearest multiple of mult. ''' - - -def __roundUp(num, mult): - return __roundDown(num, mult) + mult - - -''' Helper function that, based on if upDown < 0 or upDown >= 0, rounds number - down or up respectively to nearest multiple of mult. ''' - - -def __roundUpDown(num, mult, upDown): - if upDown < 0: - return __roundDown(num, mult) - else: - return __roundUp(num, mult) - - -''' Helper function, from recipes, to iterate over list in chunks of n - length. ''' - - -def __grouper(iterable, n, fillvalue=None): - args = [iter(iterable)] * n - return izip_longest(*args, fillvalue=fillvalue) - -#----------------------------PUBLIC FUNCTIONS----------------------------------# - - -''' Smooth the measure, ensuring that everything is in standard note lengths - (e.g., 0.125, 0.250, 0.333 ... ). ''' - - -def prune_grammar(curr_grammar): - pruned_grammar = curr_grammar.split(' ') - - for ix, gram in enumerate(pruned_grammar): - terms = gram.split(',') - terms[1] = str(__roundUpDown(float(terms[1]), 0.250, - random.choice([-1, 1]))) - pruned_grammar[ix] = ','.join(terms) - pruned_grammar = ' '.join(pruned_grammar) - - return pruned_grammar - - -''' Remove repeated notes, and notes that are too close together. ''' - - -def prune_notes(curr_notes): - for n1, n2 in __grouper(curr_notes, n=2): - if n2 == None: # corner case: odd-length list - continue - if isinstance(n1, note.Note) and isinstance(n2, note.Note): - if n1.nameWithOctave == n2.nameWithOctave: - curr_notes.remove(n2) - - return curr_notes - - -''' Perform quality assurance on notes ''' - - -def clean_up_notes(curr_notes): - removeIxs = [] - for ix, m in enumerate(curr_notes): - # QA1: ensure nothing is of 0 quarter note len, if so changes its len - if (m.quarterLength == 0.0): - m.quarterLength = 0.250 - # QA2: ensure no two melody notes have same offset, i.e. form a chord. - # Sorted, so same offset would be consecutive notes. - if (ix < (len(curr_notes) - 1)): - if (m.offset == curr_notes[ix + 1].offset and - isinstance(curr_notes[ix + 1], note.Note)): - removeIxs.append((ix + 1)) - curr_notes = [i for ix, i in enumerate(curr_notes) if ix not in removeIxs] - - return curr_notes -''' -Includes: -* Function to compute the IoU similarity for axis-aligned, rectangular, 2D bounding boxes -* Function for coordinate conversion for axis-aligned, rectangular, 2D bounding boxes - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np - - -def convert_coordinates(tensor, start_index, conversion, border_pixels='half'): - ''' - Convert coordinates for axis-aligned 2D boxes between two coordinate formats. - - Creates a copy of `tensor`, i.e. does not operate in place. Currently there are - three supported coordinate formats that can be converted from and to each other: - 1) (xmin, xmax, ymin, ymax) - the 'minmax' format - 2) (xmin, ymin, xmax, ymax) - the 'corners' format - 2) (cx, cy, w, h) - the 'centroids' format - - Arguments: - tensor (array): A Numpy nD array containing the four consecutive coordinates - to be converted somewhere in the last axis. - start_index (int): The index of the first coordinate in the last axis of `tensor`. - conversion (str, optional): The conversion direction. Can be 'minmax2centroids', - 'centroids2minmax', 'corners2centroids', 'centroids2corners', 'minmax2corners', - or 'corners2minmax'. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - - Returns: - A Numpy nD array, a copy of the input tensor with the converted coordinates - in place of the original coordinates and the unaltered elements of the original - tensor elsewhere. - ''' - if border_pixels == 'half': - d = 0 - elif border_pixels == 'include': - d = 1 - elif border_pixels == 'exclude': - d = -1 - - ind = start_index - tensor1 = np.copy(tensor).astype(np.float) - if conversion == 'minmax2centroids': - tensor1[..., ind] = (tensor[..., ind] + - tensor[..., ind+1]) / 2.0 # Set cx - tensor1[..., ind+1] = (tensor[..., ind+2] + - tensor[..., ind+3]) / 2.0 # Set cy - tensor1[..., ind+2] = tensor[..., ind+1] - \ - tensor[..., ind] + d # Set w - tensor1[..., ind+3] = tensor[..., ind+3] - \ - tensor[..., ind+2] + d # Set h - elif conversion == 'centroids2minmax': - tensor1[..., ind] = tensor[..., ind] - \ - tensor[..., ind+2] / 2.0 # Set xmin - tensor1[..., ind+1] = tensor[..., ind] + \ - tensor[..., ind+2] / 2.0 # Set xmax - tensor1[..., ind+2] = tensor[..., ind+1] - \ - tensor[..., ind+3] / 2.0 # Set ymin - tensor1[..., ind+3] = tensor[..., ind+1] + \ - tensor[..., ind+3] / 2.0 # Set ymax - elif conversion == 'corners2centroids': - tensor1[..., ind] = (tensor[..., ind] + - tensor[..., ind+2]) / 2.0 # Set cx - tensor1[..., ind+1] = (tensor[..., ind+1] + - tensor[..., ind+3]) / 2.0 # Set cy - tensor1[..., ind+2] = tensor[..., ind+2] - \ - tensor[..., ind] + d # Set w - tensor1[..., ind+3] = tensor[..., ind+3] - \ - tensor[..., ind+1] + d # Set h - elif conversion == 'centroids2corners': - tensor1[..., ind] = tensor[..., ind] - \ - tensor[..., ind+2] / 2.0 # Set xmin - tensor1[..., ind+1] = tensor[..., ind+1] - \ - tensor[..., ind+3] / 2.0 # Set ymin - tensor1[..., ind+2] = tensor[..., ind] + \ - tensor[..., ind+2] / 2.0 # Set xmax - tensor1[..., ind+3] = tensor[..., ind+1] + \ - tensor[..., ind+3] / 2.0 # Set ymax - elif (conversion == 'minmax2corners') or (conversion == 'corners2minmax'): - tensor1[..., ind+1] = tensor[..., ind+2] - tensor1[..., ind+2] = tensor[..., ind+1] - else: - raise ValueError( - "Unexpected conversion value. Supported values are 'minmax2centroids', 'centroids2minmax', 'corners2centroids', 'centroids2corners', 'minmax2corners', and 'corners2minmax'.") - - return tensor1 - - -def convert_coordinates2(tensor, start_index, conversion): - ''' - A matrix multiplication implementation of `convert_coordinates()`. - Supports only conversion between the 'centroids' and 'minmax' formats. - - This function is marginally slower on average than `convert_coordinates()`, - probably because it involves more (unnecessary) arithmetic operations (unnecessary - because the two matrices are sparse). - - For details please refer to the documentation of `convert_coordinates()`. - ''' - ind = start_index - tensor1 = np.copy(tensor).astype(np.float) - if conversion == 'minmax2centroids': - M = np.array([[0.5, 0., -1., 0.], - [0.5, 0., 1., 0.], - [0., 0.5, 0., -1.], - [0., 0.5, 0., 1.]]) - tensor1[..., ind:ind+4] = np.dot(tensor1[..., ind:ind+4], M) - elif conversion == 'centroids2minmax': - M = np.array([[1., 1., 0., 0.], - [0., 0., 1., 1.], - [-0.5, 0.5, 0., 0.], - [0., 0., -0.5, 0.5]]) # The multiplicative inverse of the matrix above - tensor1[..., ind:ind+4] = np.dot(tensor1[..., ind:ind+4], M) - else: - raise ValueError( - "Unexpected conversion value. Supported values are 'minmax2centroids' and 'centroids2minmax'.") - - return tensor1 - - -def intersection_area(boxes1, boxes2, coords='centroids', mode='outer_product', border_pixels='half'): - ''' - Computes the intersection areas of two sets of axis-aligned 2D rectangular boxes. - - Let `boxes1` and `boxes2` contain `m` and `n` boxes, respectively. - - In 'outer_product' mode, returns an `(m,n)` matrix with the intersection areas for all possible - combinations of the boxes in `boxes1` and `boxes2`. - - In 'element-wise' mode, `m` and `n` must be broadcast-compatible. Refer to the explanation - of the `mode` argument for details. - - Arguments: - boxes1 (array): Either a 1D Numpy array of shape `(4, )` containing the coordinates for one box in the - format specified by `coords` or a 2D Numpy array of shape `(m, 4)` containing the coordinates for `m` boxes. - If `mode` is set to 'element_wise', the shape must be broadcast-compatible with `boxes2`. - boxes2 (array): Either a 1D Numpy array of shape `(4, )` containing the coordinates for one box in the - format specified by `coords` or a 2D Numpy array of shape `(n, 4)` containing the coordinates for `n` boxes. - If `mode` is set to 'element_wise', the shape must be broadcast-compatible with `boxes1`. - coords (str, optional): The coordinate format in the input arrays. Can be either 'centroids' for the format - `(cx, cy, w, h)`, 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format - `(xmin, ymin, xmax, ymax)`. - mode (str, optional): Can be one of 'outer_product' and 'element-wise'. In 'outer_product' mode, returns an - `(m,n)` matrix with the intersection areas for all possible combinations of the `m` boxes in `boxes1` with the - `n` boxes in `boxes2`. In 'element-wise' mode, returns a 1D array and the shapes of `boxes1` and `boxes2` - must be boadcast-compatible. If both `boxes1` and `boxes2` have `m` boxes, then this returns an array of - length `m` where the i-th position contains the intersection area of `boxes1[i]` with `boxes2[i]`. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - - Returns: - A 1D or 2D Numpy array (refer to the `mode` argument for details) of dtype float containing values with - the intersection areas of the boxes in `boxes1` and `boxes2`. - ''' - - # Make sure the boxes have the right shapes. - if boxes1.ndim > 2: - raise ValueError( - "boxes1 must have rank either 1 or 2, but has rank {}.".format(boxes1.ndim)) - if boxes2.ndim > 2: - raise ValueError( - "boxes2 must have rank either 1 or 2, but has rank {}.".format(boxes2.ndim)) - - if boxes1.ndim == 1: - boxes1 = np.expand_dims(boxes1, axis=0) - if boxes2.ndim == 1: - boxes2 = np.expand_dims(boxes2, axis=0) - - if not (boxes1.shape[1] == boxes2.shape[1] == 4): - raise ValueError("All boxes must consist of 4 coordinates, but the boxes in `boxes1` and `boxes2` have {} and {} coordinates, respectively.".format( - boxes1.shape[1], boxes2.shape[1])) - if not mode in {'outer_product', 'element-wise'}: - raise ValueError( - "`mode` must be one of 'outer_product' and 'element-wise', but got '{}'.", format(mode)) - - # Convert the coordinates if necessary. - if coords == 'centroids': - boxes1 = convert_coordinates( - boxes1, start_index=0, conversion='centroids2corners') - boxes2 = convert_coordinates( - boxes2, start_index=0, conversion='centroids2corners') - coords = 'corners' - elif not (coords in {'minmax', 'corners'}): - raise ValueError( - "Unexpected value for `coords`. Supported values are 'minmax', 'corners' and 'centroids'.") - - m = boxes1.shape[0] # The number of boxes in `boxes1` - n = boxes2.shape[0] # The number of boxes in `boxes2` - - # Set the correct coordinate indices for the respective formats. - if coords == 'corners': - xmin = 0 - ymin = 1 - xmax = 2 - ymax = 3 - elif coords == 'minmax': - xmin = 0 - xmax = 1 - ymin = 2 - ymax = 3 - - if border_pixels == 'half': - d = 0 - elif border_pixels == 'include': - # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`. - d = 1 - elif border_pixels == 'exclude': - # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`. - d = -1 - - # Compute the intersection areas. - - if mode == 'outer_product': - - # For all possible box combinations, get the greater xmin and ymin values. - # This is a tensor of shape (m,n,2). - min_xy = np.maximum(np.tile(np.expand_dims(boxes1[:, [xmin, ymin]], axis=1), reps=(1, n, 1)), - np.tile(np.expand_dims(boxes2[:, [xmin, ymin]], axis=0), reps=(m, 1, 1))) - - # For all possible box combinations, get the smaller xmax and ymax values. - # This is a tensor of shape (m,n,2). - max_xy = np.minimum(np.tile(np.expand_dims(boxes1[:, [xmax, ymax]], axis=1), reps=(1, n, 1)), - np.tile(np.expand_dims(boxes2[:, [xmax, ymax]], axis=0), reps=(m, 1, 1))) - - # Compute the side lengths of the intersection rectangles. - side_lengths = np.maximum(0, max_xy - min_xy + d) - - return side_lengths[:, :, 0] * side_lengths[:, :, 1] - - elif mode == 'element-wise': - - min_xy = np.maximum(boxes1[:, [xmin, ymin]], boxes2[:, [xmin, ymin]]) - max_xy = np.minimum(boxes1[:, [xmax, ymax]], boxes2[:, [xmax, ymax]]) - - # Compute the side lengths of the intersection rectangles. - side_lengths = np.maximum(0, max_xy - min_xy + d) - - return side_lengths[:, 0] * side_lengths[:, 1] - - -def intersection_area_(boxes1, boxes2, coords='corners', mode='outer_product', border_pixels='half'): - ''' - The same as 'intersection_area()' but for internal use, i.e. without all the safety checks. - ''' - - m = boxes1.shape[0] # The number of boxes in `boxes1` - n = boxes2.shape[0] # The number of boxes in `boxes2` - - # Set the correct coordinate indices for the respective formats. - if coords == 'corners': - xmin = 0 - ymin = 1 - xmax = 2 - ymax = 3 - elif coords == 'minmax': - xmin = 0 - xmax = 1 - ymin = 2 - ymax = 3 - - if border_pixels == 'half': - d = 0 - elif border_pixels == 'include': - # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`. - d = 1 - elif border_pixels == 'exclude': - # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`. - d = -1 - - # Compute the intersection areas. - - if mode == 'outer_product': - - # For all possible box combinations, get the greater xmin and ymin values. - # This is a tensor of shape (m,n,2). - min_xy = np.maximum(np.tile(np.expand_dims(boxes1[:, [xmin, ymin]], axis=1), reps=(1, n, 1)), - np.tile(np.expand_dims(boxes2[:, [xmin, ymin]], axis=0), reps=(m, 1, 1))) - - # For all possible box combinations, get the smaller xmax and ymax values. - # This is a tensor of shape (m,n,2). - max_xy = np.minimum(np.tile(np.expand_dims(boxes1[:, [xmax, ymax]], axis=1), reps=(1, n, 1)), - np.tile(np.expand_dims(boxes2[:, [xmax, ymax]], axis=0), reps=(m, 1, 1))) - - # Compute the side lengths of the intersection rectangles. - side_lengths = np.maximum(0, max_xy - min_xy + d) - - return side_lengths[:, :, 0] * side_lengths[:, :, 1] - - elif mode == 'element-wise': - - min_xy = np.maximum(boxes1[:, [xmin, ymin]], boxes2[:, [xmin, ymin]]) - max_xy = np.minimum(boxes1[:, [xmax, ymax]], boxes2[:, [xmax, ymax]]) - - # Compute the side lengths of the intersection rectangles. - side_lengths = np.maximum(0, max_xy - min_xy + d) - - return side_lengths[:, 0] * side_lengths[:, 1] - - -def iou(boxes1, boxes2, coords='centroids', mode='outer_product', border_pixels='half'): - ''' - Computes the intersection-over-union similarity (also known as Jaccard similarity) - of two sets of axis-aligned 2D rectangular boxes. - - Let `boxes1` and `boxes2` contain `m` and `n` boxes, respectively. - - In 'outer_product' mode, returns an `(m,n)` matrix with the IoUs for all possible - combinations of the boxes in `boxes1` and `boxes2`. - - In 'element-wise' mode, `m` and `n` must be broadcast-compatible. Refer to the explanation - of the `mode` argument for details. - - Arguments: - boxes1 (array): Either a 1D Numpy array of shape `(4, )` containing the coordinates for one box in the - format specified by `coords` or a 2D Numpy array of shape `(m, 4)` containing the coordinates for `m` boxes. - If `mode` is set to 'element_wise', the shape must be broadcast-compatible with `boxes2`. - boxes2 (array): Either a 1D Numpy array of shape `(4, )` containing the coordinates for one box in the - format specified by `coords` or a 2D Numpy array of shape `(n, 4)` containing the coordinates for `n` boxes. - If `mode` is set to 'element_wise', the shape must be broadcast-compatible with `boxes1`. - coords (str, optional): The coordinate format in the input arrays. Can be either 'centroids' for the format - `(cx, cy, w, h)`, 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format - `(xmin, ymin, xmax, ymax)`. - mode (str, optional): Can be one of 'outer_product' and 'element-wise'. In 'outer_product' mode, returns an - `(m,n)` matrix with the IoU overlaps for all possible combinations of the `m` boxes in `boxes1` with the - `n` boxes in `boxes2`. In 'element-wise' mode, returns a 1D array and the shapes of `boxes1` and `boxes2` - must be boadcast-compatible. If both `boxes1` and `boxes2` have `m` boxes, then this returns an array of - length `m` where the i-th position contains the IoU overlap of `boxes1[i]` with `boxes2[i]`. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - - Returns: - A 1D or 2D Numpy array (refer to the `mode` argument for details) of dtype float containing values in [0,1], - the Jaccard similarity of the boxes in `boxes1` and `boxes2`. 0 means there is no overlap between two given - boxes, 1 means their coordinates are identical. - ''' - - # Make sure the boxes have the right shapes. - if boxes1.ndim > 2: - raise ValueError( - "boxes1 must have rank either 1 or 2, but has rank {}.".format(boxes1.ndim)) - if boxes2.ndim > 2: - raise ValueError( - "boxes2 must have rank either 1 or 2, but has rank {}.".format(boxes2.ndim)) - - if boxes1.ndim == 1: - boxes1 = np.expand_dims(boxes1, axis=0) - if boxes2.ndim == 1: - boxes2 = np.expand_dims(boxes2, axis=0) - - if not (boxes1.shape[1] == boxes2.shape[1] == 4): - raise ValueError("All boxes must consist of 4 coordinates, but the boxes in `boxes1` and `boxes2` have {} and {} coordinates, respectively.".format( - boxes1.shape[1], boxes2.shape[1])) - if not mode in {'outer_product', 'element-wise'}: - raise ValueError( - "`mode` must be one of 'outer_product' and 'element-wise', but got '{}'.".format(mode)) - - # Convert the coordinates if necessary. - if coords == 'centroids': - boxes1 = convert_coordinates( - boxes1, start_index=0, conversion='centroids2corners') - boxes2 = convert_coordinates( - boxes2, start_index=0, conversion='centroids2corners') - coords = 'corners' - elif not (coords in {'minmax', 'corners'}): - raise ValueError( - "Unexpected value for `coords`. Supported values are 'minmax', 'corners' and 'centroids'.") - - # Compute the IoU. - - # Compute the interesection areas. - - intersection_areas = intersection_area_( - boxes1, boxes2, coords=coords, mode=mode) - - m = boxes1.shape[0] # The number of boxes in `boxes1` - n = boxes2.shape[0] # The number of boxes in `boxes2` - - # Compute the union areas. - - # Set the correct coordinate indices for the respective formats. - if coords == 'corners': - xmin = 0 - ymin = 1 - xmax = 2 - ymax = 3 - elif coords == 'minmax': - xmin = 0 - xmax = 1 - ymin = 2 - ymax = 3 - - if border_pixels == 'half': - d = 0 - elif border_pixels == 'include': - # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`. - d = 1 - elif border_pixels == 'exclude': - # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`. - d = -1 - - if mode == 'outer_product': - - boxes1_areas = np.tile(np.expand_dims((boxes1[:, xmax] - boxes1[:, xmin] + d) * ( - boxes1[:, ymax] - boxes1[:, ymin] + d), axis=1), reps=(1, n)) - boxes2_areas = np.tile(np.expand_dims((boxes2[:, xmax] - boxes2[:, xmin] + d) * ( - boxes2[:, ymax] - boxes2[:, ymin] + d), axis=0), reps=(m, 1)) - - elif mode == 'element-wise': - - boxes1_areas = (boxes1[:, xmax] - boxes1[:, xmin] + d) * \ - (boxes1[:, ymax] - boxes1[:, ymin] + d) - boxes2_areas = (boxes2[:, xmax] - boxes2[:, xmin] + d) * \ - (boxes2[:, ymax] - boxes2[:, ymin] + d) - - union_areas = boxes1_areas + boxes2_areas - intersection_areas - - return intersection_areas / union_areas -''' -The data augmentation operations of the original SSD implementation. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np - -from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation -from data_generator.object_detection_2d_geometric_ops import RandomFlip, RandomTranslate, RandomScale -from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator - - -class DataAugmentationConstantInputSize: - ''' - Applies a chain of photometric and geometric image transformations. For documentation, please refer - to the documentation of the individual transformations involved. - - Important: This augmentation chain is suitable for constant-size images only. - ''' - - def __init__(self, - random_brightness=(-48, 48, 0.5), - random_contrast=(0.5, 1.8, 0.5), - random_saturation=(0.5, 1.8, 0.5), - random_hue=(18, 0.5), - random_flip=0.5, - random_translate=((0.03, 0.5), (0.03, 0.5), 0.5), - random_scale=(0.5, 2.0, 0.5), - n_trials_max=3, - clip_boxes=True, - overlap_criterion='area', - bounds_box_filter=(0.3, 1.0), - bounds_validator=(0.5, 1.0), - n_boxes_min=1, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - - if (random_scale[0] >= 1) or (random_scale[1] <= 1): - raise ValueError( - "This sequence of transformations only makes sense if the minimum scaling factor is <1 and the maximum scaling factor is >1.") - - self.n_trials_max = n_trials_max - self.clip_boxes = clip_boxes - self.overlap_criterion = overlap_criterion - self.bounds_box_filter = bounds_box_filter - self.bounds_validator = bounds_validator - self.n_boxes_min = n_boxes_min - self.background = background - self.labels_format = labels_format - - # Determines which boxes are kept in an image after the transformations have been applied. - self.box_filter = BoxFilter(check_overlap=True, - check_min_area=True, - check_degenerate=True, - overlap_criterion=self.overlap_criterion, - overlap_bounds=self.bounds_box_filter, - min_area=16, - labels_format=self.labels_format) - - # Determines whether the result of the transformations is a valid training image. - self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion, - bounds=self.bounds_validator, - n_boxes_min=self.n_boxes_min, - labels_format=self.labels_format) - - # Utility distortions - self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV') - self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB') - self.convert_to_float32 = ConvertDataType(to='float32') - self.convert_to_uint8 = ConvertDataType(to='uint8') - # Make sure all images end up having 3 channels. - self.convert_to_3_channels = ConvertTo3Channels() - - # Photometric transformations - self.random_brightness = RandomBrightness( - lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2]) - self.random_contrast = RandomContrast( - lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2]) - self.random_saturation = RandomSaturation( - lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2]) - self.random_hue = RandomHue( - max_delta=random_hue[0], prob=random_hue[1]) - - # Geometric transformations - self.random_flip = RandomFlip( - dim='horizontal', prob=random_flip, labels_format=self.labels_format) - self.random_translate = RandomTranslate(dy_minmax=random_translate[0], - dx_minmax=random_translate[1], - prob=random_translate[2], - clip_boxes=self.clip_boxes, - box_filter=self.box_filter, - image_validator=self.image_validator, - n_trials_max=self.n_trials_max, - background=self.background, - labels_format=self.labels_format) - self.random_zoom_in = RandomScale(min_factor=1.0, - max_factor=random_scale[1], - prob=random_scale[2], - clip_boxes=self.clip_boxes, - box_filter=self.box_filter, - image_validator=self.image_validator, - n_trials_max=self.n_trials_max, - background=self.background, - labels_format=self.labels_format) - self.random_zoom_out = RandomScale(min_factor=random_scale[0], - max_factor=1.0, - prob=random_scale[2], - clip_boxes=self.clip_boxes, - box_filter=self.box_filter, - image_validator=self.image_validator, - n_trials_max=self.n_trials_max, - background=self.background, - labels_format=self.labels_format) - - # If we zoom in, do translation before scaling. - self.sequence1 = [self.convert_to_3_channels, - self.convert_to_float32, - self.random_brightness, - self.random_contrast, - self.convert_to_uint8, - self.convert_RGB_to_HSV, - self.convert_to_float32, - self.random_saturation, - self.random_hue, - self.convert_to_uint8, - self.convert_HSV_to_RGB, - self.random_translate, - self.random_zoom_in, - self.random_flip] - - # If we zoom out, do scaling before translation. - self.sequence2 = [self.convert_to_3_channels, - self.convert_to_float32, - self.random_brightness, - self.convert_to_uint8, - self.convert_RGB_to_HSV, - self.convert_to_float32, - self.random_saturation, - self.random_hue, - self.convert_to_uint8, - self.convert_HSV_to_RGB, - self.convert_to_float32, - self.random_contrast, - self.convert_to_uint8, - self.random_zoom_out, - self.random_translate, - self.random_flip] - - def __call__(self, image, labels=None): - - self.random_translate.labels_format = self.labels_format - self.random_zoom_in.labels_format = self.labels_format - self.random_zoom_out.labels_format = self.labels_format - self.random_flip.labels_format = self.labels_format - - # Choose sequence 1 with probability 0.5. - if np.random.choice(2): - - if not (labels is None): - for transform in self.sequence1: - image, labels = transform(image, labels) - return image, labels - else: - for transform in self.sequence1: - image = transform(image) - return image - # Choose sequence 2 with probability 0.5. - else: - - if not (labels is None): - for transform in self.sequence2: - image, labels = transform(image, labels) - return image, labels - else: - for transform in self.sequence2: - image = transform(image) - return image -''' -The data augmentation operations of the original SSD implementation. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -import cv2 -import inspect - -from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation, RandomChannelSwap -from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch, RandomPatchInf -from data_generator.object_detection_2d_geometric_ops import ResizeRandomInterp, RandomFlip -from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator - - -class SSDRandomCrop: - ''' - Performs the same random crops as defined by the `batch_sampler` instructions - of the original Caffe implementation of SSD. A description of this random cropping - strategy can also be found in the data augmentation section of the paper: - https://arxiv.org/abs/1512.02325 - ''' - - def __init__(self, labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - - self.labels_format = labels_format - - # This randomly samples one of the lower IoU bounds defined - # by the `sample_space` every time it is called. - self.bound_generator = BoundGenerator(sample_space=((None, None), - (0.1, None), - (0.3, None), - (0.5, None), - (0.7, None), - (0.9, None)), - weights=None) - - # Produces coordinates for candidate patches such that the height - # and width of the patches are between 0.3 and 1.0 of the height - # and width of the respective image and the aspect ratio of the - # patches is between 0.5 and 2.0. - self.patch_coord_generator = PatchCoordinateGenerator(must_match='h_w', - min_scale=0.3, - max_scale=1.0, - scale_uniformly=False, - min_aspect_ratio=0.5, - max_aspect_ratio=2.0) - - # Filters out boxes whose center point does not lie within the - # chosen patches. - self.box_filter = BoxFilter(check_overlap=True, - check_min_area=False, - check_degenerate=False, - overlap_criterion='center_point', - labels_format=self.labels_format) - - # Determines whether a given patch is considered a valid patch. - # Defines a patch to be valid if at least one ground truth bounding box - # (n_boxes_min == 1) has an IoU overlap with the patch that - # meets the requirements defined by `bound_generator`. - self.image_validator = ImageValidator(overlap_criterion='iou', - n_boxes_min=1, - labels_format=self.labels_format, - border_pixels='half') - - # Performs crops according to the parameters set in the objects above. - # Runs until either a valid patch is found or the original input image - # is returned unaltered. Runs a maximum of 50 trials to find a valid - # patch for each new sampled IoU threshold. Every 50 trials, the original - # image is returned as is with probability (1 - prob) = 0.143. - self.random_crop = RandomPatchInf(patch_coord_generator=self.patch_coord_generator, - box_filter=self.box_filter, - image_validator=self.image_validator, - bound_generator=self.bound_generator, - n_trials_max=50, - clip_boxes=True, - prob=0.857, - labels_format=self.labels_format) - - def __call__(self, image, labels=None, return_inverter=False): - self.random_crop.labels_format = self.labels_format - return self.random_crop(image, labels, return_inverter) - - -class SSDExpand: - ''' - Performs the random image expansion as defined by the `train_transform_param` instructions - of the original Caffe implementation of SSD. A description of this expansion strategy - can also be found in section 3.6 ("Data Augmentation for Small Object Accuracy") of the paper: - https://arxiv.org/abs/1512.02325 - ''' - - def __init__(self, background=(123, 117, 104), labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - background (list/tuple, optional): A 3-tuple specifying the RGB color value of the - background pixels of the translated images. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - - self.labels_format = labels_format - - # Generate coordinates for patches that are between 1.0 and 4.0 times - # the size of the input image in both spatial dimensions. - self.patch_coord_generator = PatchCoordinateGenerator(must_match='h_w', - min_scale=1.0, - max_scale=4.0, - scale_uniformly=True) - - # With probability 0.5, place the input image randomly on a canvas filled with - # mean color values according to the parameters set above. With probability 0.5, - # return the input image unaltered. - self.expand = RandomPatch(patch_coord_generator=self.patch_coord_generator, - box_filter=None, - image_validator=None, - n_trials_max=1, - clip_boxes=False, - prob=0.5, - background=background, - labels_format=self.labels_format) - - def __call__(self, image, labels=None, return_inverter=False): - self.expand.labels_format = self.labels_format - return self.expand(image, labels, return_inverter) - - -class SSDPhotometricDistortions: - ''' - Performs the photometric distortions defined by the `train_transform_param` instructions - of the original Caffe implementation of SSD. - ''' - - def __init__(self): - - self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV') - self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB') - self.convert_to_float32 = ConvertDataType(to='float32') - self.convert_to_uint8 = ConvertDataType(to='uint8') - self.convert_to_3_channels = ConvertTo3Channels() - self.random_brightness = RandomBrightness( - lower=-32, upper=32, prob=0.5) - self.random_contrast = RandomContrast(lower=0.5, upper=1.5, prob=0.5) - self.random_saturation = RandomSaturation( - lower=0.5, upper=1.5, prob=0.5) - self.random_hue = RandomHue(max_delta=18, prob=0.5) - self.random_channel_swap = RandomChannelSwap(prob=0.0) - - self.sequence1 = [self.convert_to_3_channels, - self.convert_to_float32, - self.random_brightness, - self.random_contrast, - self.convert_to_uint8, - self.convert_RGB_to_HSV, - self.convert_to_float32, - self.random_saturation, - self.random_hue, - self.convert_to_uint8, - self.convert_HSV_to_RGB, - self.random_channel_swap] - - self.sequence2 = [self.convert_to_3_channels, - self.convert_to_float32, - self.random_brightness, - self.convert_to_uint8, - self.convert_RGB_to_HSV, - self.convert_to_float32, - self.random_saturation, - self.random_hue, - self.convert_to_uint8, - self.convert_HSV_to_RGB, - self.convert_to_float32, - self.random_contrast, - self.convert_to_uint8, - self.random_channel_swap] - - def __call__(self, image, labels): - - # Choose sequence 1 with probability 0.5. - if np.random.choice(2): - - for transform in self.sequence1: - image, labels = transform(image, labels) - return image, labels - # Choose sequence 2 with probability 0.5. - else: - - for transform in self.sequence2: - image, labels = transform(image, labels) - return image, labels - - -class SSDDataAugmentation: - ''' - Reproduces the data augmentation pipeline used in the training of the original - Caffe implementation of SSD. - ''' - - def __init__(self, - img_height=300, - img_width=300, - background=(123, 117, 104), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - height (int): The desired height of the output images in pixels. - width (int): The desired width of the output images in pixels. - background (list/tuple, optional): A 3-tuple specifying the RGB color value of the - background pixels of the translated images. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - - self.labels_format = labels_format - - self.photometric_distortions = SSDPhotometricDistortions() - self.expand = SSDExpand(background=background, - labels_format=self.labels_format) - self.random_crop = SSDRandomCrop(labels_format=self.labels_format) - self.random_flip = RandomFlip( - dim='horizontal', prob=0.5, labels_format=self.labels_format) - - # This box filter makes sure that the resized images don't contain any degenerate boxes. - # Resizing the images could lead the boxes to becomes smaller. For boxes that are already - # pretty small, that might result in boxes with height and/or width zero, which we obviously - # cannot allow. - self.box_filter = BoxFilter(check_overlap=False, - check_min_area=False, - check_degenerate=True, - labels_format=self.labels_format) - - self.resize = ResizeRandomInterp(height=img_height, - width=img_width, - interpolation_modes=[cv2.INTER_NEAREST, - cv2.INTER_LINEAR, - cv2.INTER_CUBIC, - cv2.INTER_AREA, - cv2.INTER_LANCZOS4], - box_filter=self.box_filter, - labels_format=self.labels_format) - - self.sequence = [self.photometric_distortions, - self.expand, - self.random_crop, - self.random_flip, - self.resize] - - def __call__(self, image, labels, return_inverter=False): - self.expand.labels_format = self.labels_format - self.random_crop.labels_format = self.labels_format - self.random_flip.labels_format = self.labels_format - self.resize.labels_format = self.labels_format - - inverters = [] - - for transform in self.sequence: - if return_inverter and ('return_inverter' in inspect.signature(transform).parameters): - image, labels, inverter = transform( - image, labels, return_inverter=True) - inverters.append(inverter) - else: - image, labels = transform(image, labels) - - if return_inverter: - return image, labels, inverters[::-1] - else: - return image, labels -''' -A data augmentation pipeline for datasets in bird's eye view, i.e. where there is -no "up" or "down" in the images. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np - -from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation -from data_generator.object_detection_2d_geometric_ops import Resize, RandomFlip, RandomRotate -from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch -from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator - - -class DataAugmentationSatellite: - ''' - A data augmentation pipeline for datasets in bird's eye view, i.e. where there is - no "up" or "down" in the images. - - Applies a chain of photometric and geometric image transformations. For documentation, please refer - to the documentation of the individual transformations involved. - ''' - - def __init__(self, - resize_height, - resize_width, - random_brightness=(-48, 48, 0.5), - random_contrast=(0.5, 1.8, 0.5), - random_saturation=(0.5, 1.8, 0.5), - random_hue=(18, 0.5), - random_flip=0.5, - random_rotate=([90, 180, 270], 0.5), - min_scale=0.3, - max_scale=2.0, - min_aspect_ratio=0.8, - max_aspect_ratio=1.25, - n_trials_max=3, - clip_boxes=True, - overlap_criterion='area', - bounds_box_filter=(0.3, 1.0), - bounds_validator=(0.5, 1.0), - n_boxes_min=1, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - - self.n_trials_max = n_trials_max - self.clip_boxes = clip_boxes - self.overlap_criterion = overlap_criterion - self.bounds_box_filter = bounds_box_filter - self.bounds_validator = bounds_validator - self.n_boxes_min = n_boxes_min - self.background = background - self.labels_format = labels_format - - # Determines which boxes are kept in an image after the transformations have been applied. - self.box_filter_patch = BoxFilter(check_overlap=True, - check_min_area=False, - check_degenerate=False, - overlap_criterion=self.overlap_criterion, - overlap_bounds=self.bounds_box_filter, - labels_format=self.labels_format) - - self.box_filter_resize = BoxFilter(check_overlap=False, - check_min_area=True, - check_degenerate=True, - min_area=16, - labels_format=self.labels_format) - - # Determines whether the result of the transformations is a valid training image. - self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion, - bounds=self.bounds_validator, - n_boxes_min=self.n_boxes_min, - labels_format=self.labels_format) - - # Utility transformations - # Make sure all images end up having 3 channels. - self.convert_to_3_channels = ConvertTo3Channels() - self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV') - self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB') - self.convert_to_float32 = ConvertDataType(to='float32') - self.convert_to_uint8 = ConvertDataType(to='uint8') - self.resize = Resize(height=resize_height, - width=resize_width, - box_filter=self.box_filter_resize, - labels_format=self.labels_format) - - # Photometric transformations - self.random_brightness = RandomBrightness( - lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2]) - self.random_contrast = RandomContrast( - lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2]) - self.random_saturation = RandomSaturation( - lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2]) - self.random_hue = RandomHue( - max_delta=random_hue[0], prob=random_hue[1]) - - # Geometric transformations - self.random_horizontal_flip = RandomFlip( - dim='horizontal', prob=random_flip, labels_format=self.labels_format) - self.random_vertical_flip = RandomFlip( - dim='vertical', prob=random_flip, labels_format=self.labels_format) - self.random_rotate = RandomRotate( - angles=random_rotate[0], prob=random_rotate[1], labels_format=self.labels_format) - self.patch_coord_generator = PatchCoordinateGenerator(must_match='w_ar', - min_scale=min_scale, - max_scale=max_scale, - scale_uniformly=False, - min_aspect_ratio=min_aspect_ratio, - max_aspect_ratio=max_aspect_ratio) - self.random_patch = RandomPatch(patch_coord_generator=self.patch_coord_generator, - box_filter=self.box_filter_patch, - image_validator=self.image_validator, - n_trials_max=self.n_trials_max, - clip_boxes=self.clip_boxes, - prob=1.0, - can_fail=False, - labels_format=self.labels_format) - - # Define the processing chain. - self.transformations = [self.convert_to_3_channels, - self.convert_to_float32, - self.random_brightness, - self.random_contrast, - self.convert_to_uint8, - self.convert_RGB_to_HSV, - self.convert_to_float32, - self.random_saturation, - self.random_hue, - self.convert_to_uint8, - self.convert_HSV_to_RGB, - self.random_horizontal_flip, - self.random_vertical_flip, - self.random_rotate, - self.random_patch, - self.resize] - - def __call__(self, image, labels=None): - - self.random_patch.labels_format = self.labels_format - self.random_horizontal_flip.labels_format = self.labels_format - self.random_vertical_flip.labels_format = self.labels_format - self.random_rotate.labels_format = self.labels_format - self.resize.labels_format = self.labels_format - - if not (labels is None): - for transform in self.transformations: - image, labels = transform(image, labels) - return image, labels - else: - for transform in self.sequence1: - image = transform(image) - return image -''' -A data augmentation pipeline suitable for variable-size images that produces effects -that are similar (but not identical) to those of the original SSD data augmentation -pipeline while being faster. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np - -from data_generator.object_detection_2d_photometric_ops import ConvertColor, ConvertDataType, ConvertTo3Channels, RandomBrightness, RandomContrast, RandomHue, RandomSaturation -from data_generator.object_detection_2d_geometric_ops import Resize, RandomFlip -from data_generator.object_detection_2d_patch_sampling_ops import PatchCoordinateGenerator, RandomPatch -from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator - - -class DataAugmentationVariableInputSize: - ''' - A data augmentation pipeline suitable for variable-size images that produces effects - that are similar (but not identical!) to those of the original SSD data augmentation - pipeline while being faster. - - Applies a chain of photometric and geometric image transformations. For documentation, please refer - to the documentation of the individual transformations involved. - ''' - - def __init__(self, - resize_height, - resize_width, - random_brightness=(-48, 48, 0.5), - random_contrast=(0.5, 1.8, 0.5), - random_saturation=(0.5, 1.8, 0.5), - random_hue=(18, 0.5), - random_flip=0.5, - min_scale=0.3, - max_scale=2.0, - min_aspect_ratio=0.5, - max_aspect_ratio=2.0, - n_trials_max=3, - clip_boxes=True, - overlap_criterion='area', - bounds_box_filter=(0.3, 1.0), - bounds_validator=(0.5, 1.0), - n_boxes_min=1, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - - self.n_trials_max = n_trials_max - self.clip_boxes = clip_boxes - self.overlap_criterion = overlap_criterion - self.bounds_box_filter = bounds_box_filter - self.bounds_validator = bounds_validator - self.n_boxes_min = n_boxes_min - self.background = background - self.labels_format = labels_format - - # Determines which boxes are kept in an image after the transformations have been applied. - self.box_filter_patch = BoxFilter(check_overlap=True, - check_min_area=False, - check_degenerate=False, - overlap_criterion=self.overlap_criterion, - overlap_bounds=self.bounds_box_filter, - labels_format=self.labels_format) - - self.box_filter_resize = BoxFilter(check_overlap=False, - check_min_area=True, - check_degenerate=True, - min_area=16, - labels_format=self.labels_format) - - # Determines whether the result of the transformations is a valid training image. - self.image_validator = ImageValidator(overlap_criterion=self.overlap_criterion, - bounds=self.bounds_validator, - n_boxes_min=self.n_boxes_min, - labels_format=self.labels_format) - - # Utility transformations - # Make sure all images end up having 3 channels. - self.convert_to_3_channels = ConvertTo3Channels() - self.convert_RGB_to_HSV = ConvertColor(current='RGB', to='HSV') - self.convert_HSV_to_RGB = ConvertColor(current='HSV', to='RGB') - self.convert_to_float32 = ConvertDataType(to='float32') - self.convert_to_uint8 = ConvertDataType(to='uint8') - self.resize = Resize(height=resize_height, - width=resize_width, - box_filter=self.box_filter_resize, - labels_format=self.labels_format) - - # Photometric transformations - self.random_brightness = RandomBrightness( - lower=random_brightness[0], upper=random_brightness[1], prob=random_brightness[2]) - self.random_contrast = RandomContrast( - lower=random_contrast[0], upper=random_contrast[1], prob=random_contrast[2]) - self.random_saturation = RandomSaturation( - lower=random_saturation[0], upper=random_saturation[1], prob=random_saturation[2]) - self.random_hue = RandomHue( - max_delta=random_hue[0], prob=random_hue[1]) - - # Geometric transformations - self.random_flip = RandomFlip( - dim='horizontal', prob=random_flip, labels_format=self.labels_format) - self.patch_coord_generator = PatchCoordinateGenerator(must_match='w_ar', - min_scale=min_scale, - max_scale=max_scale, - scale_uniformly=False, - min_aspect_ratio=min_aspect_ratio, - max_aspect_ratio=max_aspect_ratio) - self.random_patch = RandomPatch(patch_coord_generator=self.patch_coord_generator, - box_filter=self.box_filter_patch, - image_validator=self.image_validator, - n_trials_max=self.n_trials_max, - clip_boxes=self.clip_boxes, - prob=1.0, - can_fail=False, - labels_format=self.labels_format) - - # Define the processing chain - self.transformations = [self.convert_to_3_channels, - self.convert_to_float32, - self.random_brightness, - self.random_contrast, - self.convert_to_uint8, - self.convert_RGB_to_HSV, - self.convert_to_float32, - self.random_saturation, - self.random_hue, - self.convert_to_uint8, - self.convert_HSV_to_RGB, - self.random_patch, - self.random_flip, - self.resize] - - def __call__(self, image, labels=None): - - self.random_patch.labels_format = self.labels_format - self.random_flip.labels_format = self.labels_format - self.resize.labels_format = self.labels_format - - if not (labels is None): - for transform in self.transformations: - image, labels = transform(image, labels) - return image, labels - else: - for transform in self.sequence1: - image = transform(image) - return image -''' -A data generator for 2D object detection. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -import inspect -from collections import defaultdict -import warnings -import sklearn.utils -from copy import deepcopy -from PIL import Image -import cv2 -import csv -import os -import sys -from tqdm import tqdm, trange -try: - import h5py -except ImportError: - warnings.warn( - "'h5py' module is missing. The fast HDF5 dataset option will be unavailable.") -try: - import json -except ImportError: - warnings.warn( - "'json' module is missing. The JSON-parser will be unavailable.") -try: - from bs4 import BeautifulSoup -except ImportError: - warnings.warn( - "'BeautifulSoup' module is missing. The XML-parser will be unavailable.") -try: - import pickle -except ImportError: - warnings.warn( - "'pickle' module is missing. You won't be able to save parsed file lists and annotations as pickled files.") - -from ssd_encoder_decoder.ssd_input_encoder import SSDInputEncoder -from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter - - -class DegenerateBatchError(Exception): - ''' - An exception class to be raised if a generated batch ends up being degenerate, - e.g. if a generated batch is empty. - ''' - pass - - -class DatasetError(Exception): - ''' - An exception class to be raised if a anything is wrong with the dataset, - in particular if you try to generate batches when no dataset was loaded. - ''' - pass - - -class DataGenerator: - ''' - A generator to generate batches of samples and corresponding labels indefinitely. - - Can shuffle the dataset consistently after each complete pass. - - Currently provides three methods to parse annotation data: A general-purpose CSV parser, - an XML parser for the Pascal VOC datasets, and a JSON parser for the MS COCO datasets. - If the annotations of your dataset are in a format that is not supported by these parsers, - you could just add another parser method and still use this generator. - - Can perform image transformations for data conversion and data augmentation, - for details please refer to the documentation of the `generate()` method. - ''' - - def __init__(self, - load_images_into_memory=False, - hdf5_dataset_path=None, - filenames=None, - filenames_type='text', - images_dir=None, - labels=None, - image_ids=None, - eval_neutral=None, - labels_output_format=( - 'class_id', 'xmin', 'ymin', 'xmax', 'ymax'), - verbose=True): - ''' - Initializes the data generator. You can either load a dataset directly here in the constructor, - e.g. an HDF5 dataset, or you can use one of the parser methods to read in a dataset. - - Arguments: - load_images_into_memory (bool, optional): If `True`, the entire dataset will be loaded into memory. - This enables noticeably faster data generation than loading batches of images into memory ad hoc. - Be sure that you have enough memory before you activate this option. - hdf5_dataset_path (str, optional): The full file path of an HDF5 file that contains a dataset in the - format that the `create_hdf5_dataset()` method produces. If you load such an HDF5 dataset, you - don't need to use any of the parser methods anymore, the HDF5 dataset already contains all relevant - data. - filenames (string or list, optional): `None` or either a Python list/tuple or a string representing - a filepath. If a list/tuple is passed, it must contain the file names (full paths) of the - images to be used. Note that the list/tuple must contain the paths to the images, - not the images themselves. If a filepath string is passed, it must point either to - (1) a pickled file containing a list/tuple as described above. In this case the `filenames_type` - argument must be set to `pickle`. - Or - (2) a text file. Each line of the text file contains the file name (basename of the file only, - not the full directory path) to one image and nothing else. In this case the `filenames_type` - argument must be set to `text` and you must pass the path to the directory that contains the - images in `images_dir`. - filenames_type (string, optional): In case a string is passed for `filenames`, this indicates what - type of file `filenames` is. It can be either 'pickle' for a pickled file or 'text' for a - plain text file. - images_dir (string, optional): In case a text file is passed for `filenames`, the full paths to - the images will be composed from `images_dir` and the names in the text file, i.e. this - should be the directory that contains the images to which the text file refers. - If `filenames_type` is not 'text', then this argument is irrelevant. - labels (string or list, optional): `None` or either a Python list/tuple or a string representing - the path to a pickled file containing a list/tuple. The list/tuple must contain Numpy arrays - that represent the labels of the dataset. - image_ids (string or list, optional): `None` or either a Python list/tuple or a string representing - the path to a pickled file containing a list/tuple. The list/tuple must contain the image - IDs of the images in the dataset. - eval_neutral (string or list, optional): `None` or either a Python list/tuple or a string representing - the path to a pickled file containing a list/tuple. The list/tuple must contain for each image - a list that indicates for each ground truth object in the image whether that object is supposed - to be treated as neutral during an evaluation. - labels_output_format (list, optional): A list of five strings representing the desired order of the five - items class ID, xmin, ymin, xmax, ymax in the generated ground truth data (if any). The expected - strings are 'xmin', 'ymin', 'xmax', 'ymax', 'class_id'. - verbose (bool, optional): If `True`, prints out the progress for some constructor operations that may - take a bit longer. - ''' - self.labels_output_format = labels_output_format - self.labels_format = {'class_id': labels_output_format.index('class_id'), - 'xmin': labels_output_format.index('xmin'), - 'ymin': labels_output_format.index('ymin'), - 'xmax': labels_output_format.index('xmax'), - 'ymax': labels_output_format.index('ymax')} # This dictionary is for internal use. - - # As long as we haven't loaded anything yet, the dataset size is zero. - self.dataset_size = 0 - self.load_images_into_memory = load_images_into_memory - # The only way that this list will not stay `None` is if `load_images_into_memory == True`. - self.images = None - - # `self.filenames` is a list containing all file names of the image samples (full paths). - # Note that it does not contain the actual image files themselves. This list is one of the outputs of the parser methods. - # In case you are loading an HDF5 dataset, this list will be `None`. - if not filenames is None: - if isinstance(filenames, (list, tuple)): - self.filenames = filenames - elif isinstance(filenames, str): - with open(filenames, 'rb') as f: - if filenames_type == 'pickle': - self.filenames = pickle.load(f) - elif filenames_type == 'text': - self.filenames = [os.path.join( - images_dir, line.strip()) for line in f] - else: - raise ValueError( - "`filenames_type` can be either 'text' or 'pickle'.") - else: - raise ValueError( - "`filenames` must be either a Python list/tuple or a string representing a filepath (to a pickled or text file). The value you passed is neither of the two.") - self.dataset_size = len(self.filenames) - self.dataset_indices = np.arange(self.dataset_size, dtype=np.int32) - if load_images_into_memory: - self.images = [] - if verbose: - it = tqdm( - self.filenames, desc='Loading images into memory', file=sys.stdout) - else: - it = self.filenames - for filename in it: - with Image.open(filename) as image: - self.images.append(np.array(image, dtype=np.uint8)) - else: - self.filenames = None - - # In case ground truth is available, `self.labels` is a list containing for each image a list (or NumPy array) - # of ground truth bounding boxes for that image. - if not labels is None: - if isinstance(labels, str): - with open(labels, 'rb') as f: - self.labels = pickle.load(f) - elif isinstance(labels, (list, tuple)): - self.labels = labels - else: - raise ValueError( - "`labels` must be either a Python list/tuple or a string representing the path to a pickled file containing a list/tuple. The value you passed is neither of the two.") - else: - self.labels = None - - if not image_ids is None: - if isinstance(image_ids, str): - with open(image_ids, 'rb') as f: - self.image_ids = pickle.load(f) - elif isinstance(image_ids, (list, tuple)): - self.image_ids = image_ids - else: - raise ValueError( - "`image_ids` must be either a Python list/tuple or a string representing the path to a pickled file containing a list/tuple. The value you passed is neither of the two.") - else: - self.image_ids = None - - if not eval_neutral is None: - if isinstance(eval_neutral, str): - with open(eval_neutral, 'rb') as f: - self.eval_neutral = pickle.load(f) - elif isinstance(eval_neutral, (list, tuple)): - self.eval_neutral = eval_neutral - else: - raise ValueError( - "`image_ids` must be either a Python list/tuple or a string representing the path to a pickled file containing a list/tuple. The value you passed is neither of the two.") - else: - self.eval_neutral = None - - if not hdf5_dataset_path is None: - self.hdf5_dataset_path = hdf5_dataset_path - self.load_hdf5_dataset(verbose=verbose) - else: - self.hdf5_dataset = None - - def load_hdf5_dataset(self, verbose=True): - ''' - Loads an HDF5 dataset that is in the format that the `create_hdf5_dataset()` method - produces. - - Arguments: - verbose (bool, optional): If `True`, prints out the progress while loading - the dataset. - - Returns: - None. - ''' - - self.hdf5_dataset = h5py.File(self.hdf5_dataset_path, 'r') - self.dataset_size = len(self.hdf5_dataset['images']) - # Instead of shuffling the HDF5 dataset or images in memory, we will shuffle this index list. - self.dataset_indices = np.arange(self.dataset_size, dtype=np.int32) - - if self.load_images_into_memory: - self.images = [] - if verbose: - tr = trange(self.dataset_size, - desc='Loading images into memory', file=sys.stdout) - else: - tr = range(self.dataset_size) - for i in tr: - self.images.append(self.hdf5_dataset['images'][i].reshape( - self.hdf5_dataset['image_shapes'][i])) - - if self.hdf5_dataset.attrs['has_labels']: - self.labels = [] - labels = self.hdf5_dataset['labels'] - label_shapes = self.hdf5_dataset['label_shapes'] - if verbose: - tr = trange(self.dataset_size, - desc='Loading labels', file=sys.stdout) - else: - tr = range(self.dataset_size) - for i in tr: - self.labels.append(labels[i].reshape(label_shapes[i])) - - if self.hdf5_dataset.attrs['has_image_ids']: - self.image_ids = [] - image_ids = self.hdf5_dataset['image_ids'] - if verbose: - tr = trange(self.dataset_size, - desc='Loading image IDs', file=sys.stdout) - else: - tr = range(self.dataset_size) - for i in tr: - self.image_ids.append(image_ids[i]) - - if self.hdf5_dataset.attrs['has_eval_neutral']: - self.eval_neutral = [] - eval_neutral = self.hdf5_dataset['eval_neutral'] - if verbose: - tr = trange( - self.dataset_size, desc='Loading evaluation-neutrality annotations', file=sys.stdout) - else: - tr = range(self.dataset_size) - for i in tr: - self.eval_neutral.append(eval_neutral[i]) - - def parse_csv(self, - images_dir, - labels_filename, - input_format, - include_classes='all', - random_sample=False, - ret=False, - verbose=True): - ''' - Arguments: - images_dir (str): The path to the directory that contains the images. - labels_filename (str): The filepath to a CSV file that contains one ground truth bounding box per line - and each line contains the following six items: image file name, class ID, xmin, xmax, ymin, ymax. - The six items do not have to be in a specific order, but they must be the first six columns of - each line. The order of these items in the CSV file must be specified in `input_format`. - The class ID is an integer greater than zero. Class ID 0 is reserved for the background class. - `xmin` and `xmax` are the left-most and right-most absolute horizontal coordinates of the box, - `ymin` and `ymax` are the top-most and bottom-most absolute vertical coordinates of the box. - The image name is expected to be just the name of the image file without the directory path - at which the image is located. - input_format (list): A list of six strings representing the order of the six items - image file name, class ID, xmin, xmax, ymin, ymax in the input CSV file. The expected strings - are 'image_name', 'xmin', 'xmax', 'ymin', 'ymax', 'class_id'. - include_classes (list, optional): Either 'all' or a list of integers containing the class IDs that - are to be included in the dataset. If 'all', all ground truth boxes will be included in the dataset. - random_sample (float, optional): Either `False` or a float in `[0,1]`. If this is `False`, the - full dataset will be used by the generator. If this is a float in `[0,1]`, a randomly sampled - fraction of the dataset will be used, where `random_sample` is the fraction of the dataset - to be used. For example, if `random_sample = 0.2`, 20 precent of the dataset will be randomly selected, - the rest will be ommitted. The fraction refers to the number of images, not to the number - of boxes, i.e. each image that will be added to the dataset will always be added with all - of its boxes. - ret (bool, optional): Whether or not to return the outputs of the parser. - verbose (bool, optional): If `True`, prints out the progress for operations that may take a bit longer. - - Returns: - None by default, optionally lists for whichever are available of images, image filenames, labels, and image IDs. - ''' - - # Set class members. - self.images_dir = images_dir - self.labels_filename = labels_filename - self.input_format = input_format - self.include_classes = include_classes - - # Before we begin, make sure that we have a labels_filename and an input_format - if self.labels_filename is None or self.input_format is None: - raise ValueError( - "`labels_filename` and/or `input_format` have not been set yet. You need to pass them as arguments.") - - # Erase data that might have been parsed before - self.filenames = [] - self.image_ids = [] - self.labels = [] - - # First, just read in the CSV file lines and sort them. - - data = [] - - with open(self.labels_filename, newline='') as csvfile: - csvread = csv.reader(csvfile, delimiter=',') - next(csvread) # Skip the header row. - # For every line (i.e for every bounding box) in the CSV file... - for row in csvread: - # If the class_id is among the classes that are to be included in the dataset... - if self.include_classes == 'all' or int(row[self.input_format.index('class_id')].strip()) in self.include_classes: - box = [] # Store the box class and coordinates here - # Select the image name column in the input format and append its content to `box` - box.append( - row[self.input_format.index('image_name')].strip()) - # For each element in the output format (where the elements are the class ID and the four box coordinates)... - for element in self.labels_output_format: - # ...select the respective column in the input format and append it to `box`. - box.append( - int(row[self.input_format.index(element)].strip())) - data.append(box) - - # The data needs to be sorted, otherwise the next step won't give the correct result - data = sorted(data) - - # Now that we've made sure that the data is sorted by file names, - # we can compile the actual samples and labels lists - - # The current image for which we're collecting the ground truth boxes - current_file = data[0][0] - # The image ID will be the portion of the image name before the first dot. - current_image_id = data[0][0].split('.')[0] - current_labels = [] # The list where we collect all ground truth boxes for a given image - add_to_dataset = False - for i, box in enumerate(data): - - # If this box (i.e. this line of the CSV file) belongs to the current image file - if box[0] == current_file: - current_labels.append(box[1:]) - if i == len(data)-1: # If this is the last line of the CSV file - if random_sample: # In case we're not using the full dataset, but a random sample of it. - p = np.random.uniform(0, 1) - if p >= (1-random_sample): - self.labels.append( - np.stack(current_labels, axis=0)) - self.filenames.append(os.path.join( - self.images_dir, current_file)) - self.image_ids.append(current_image_id) - else: - self.labels.append(np.stack(current_labels, axis=0)) - self.filenames.append(os.path.join( - self.images_dir, current_file)) - self.image_ids.append(current_image_id) - else: # If this box belongs to a new image file - if random_sample: # In case we're not using the full dataset, but a random sample of it. - p = np.random.uniform(0, 1) - if p >= (1-random_sample): - self.labels.append(np.stack(current_labels, axis=0)) - self.filenames.append(os.path.join( - self.images_dir, current_file)) - self.image_ids.append(current_image_id) - else: - self.labels.append(np.stack(current_labels, axis=0)) - self.filenames.append(os.path.join( - self.images_dir, current_file)) - self.image_ids.append(current_image_id) - # Reset the labels list because this is a new file. - current_labels = [] - current_file = box[0] - current_image_id = box[0].split('.')[0] - current_labels.append(box[1:]) - if i == len(data)-1: # If this is the last line of the CSV file - if random_sample: # In case we're not using the full dataset, but a random sample of it. - p = np.random.uniform(0, 1) - if p >= (1-random_sample): - self.labels.append( - np.stack(current_labels, axis=0)) - self.filenames.append(os.path.join( - self.images_dir, current_file)) - self.image_ids.append(current_image_id) - else: - self.labels.append(np.stack(current_labels, axis=0)) - self.filenames.append(os.path.join( - self.images_dir, current_file)) - self.image_ids.append(current_image_id) - - self.dataset_size = len(self.filenames) - self.dataset_indices = np.arange(self.dataset_size, dtype=np.int32) - if self.load_images_into_memory: - self.images = [] - if verbose: - it = tqdm(self.filenames, - desc='Loading images into memory', file=sys.stdout) - else: - it = self.filenames - for filename in it: - with Image.open(filename) as image: - self.images.append(np.array(image, dtype=np.uint8)) - - if ret: # In case we want to return these - return self.images, self.filenames, self.labels, self.image_ids - - def parse_xml(self, - images_dirs, - image_set_filenames, - annotations_dirs=[], - classes=['background', - 'aeroplane', 'bicycle', 'bird', 'boat', - 'bottle', 'bus', 'car', 'cat', - 'chair', 'cow', 'diningtable', 'dog', - 'horse', 'motorbike', 'person', 'pottedplant', - 'sheep', 'sofa', 'train', 'tvmonitor'], - include_classes='all', - exclude_truncated=False, - exclude_difficult=False, - ret=False, - verbose=True): - ''' - This is an XML parser for the Pascal VOC datasets. It might be applicable to other datasets with minor changes to - the code, but in its current form it expects the data format and XML tags of the Pascal VOC datasets. - - Arguments: - images_dirs (list): A list of strings, where each string is the path of a directory that - contains images that are to be part of the dataset. This allows you to aggregate multiple datasets - into one (e.g. one directory that contains the images for Pascal VOC 2007, another that contains - the images for Pascal VOC 2012, etc.). - image_set_filenames (list): A list of strings, where each string is the path of the text file with the image - set to be loaded. Must be one file per image directory given. These text files define what images in the - respective image directories are to be part of the dataset and simply contains one image ID per line - and nothing else. - annotations_dirs (list, optional): A list of strings, where each string is the path of a directory that - contains the annotations (XML files) that belong to the images in the respective image directories given. - The directories must contain one XML file per image and the name of an XML file must be the image ID - of the image it belongs to. The content of the XML files must be in the Pascal VOC format. - classes (list, optional): A list containing the names of the object classes as found in the - `name` XML tags. Must include the class `background` as the first list item. The order of this list - defines the class IDs. - include_classes (list, optional): Either 'all' or a list of integers containing the class IDs that - are to be included in the dataset. If 'all', all ground truth boxes will be included in the dataset. - exclude_truncated (bool, optional): If `True`, excludes boxes that are labeled as 'truncated'. - exclude_difficult (bool, optional): If `True`, excludes boxes that are labeled as 'difficult'. - ret (bool, optional): Whether or not to return the outputs of the parser. - verbose (bool, optional): If `True`, prints out the progress for operations that may take a bit longer. - - Returns: - None by default, optionally lists for whichever are available of images, image filenames, labels, image IDs, - and a list indicating which boxes are annotated with the label "difficult". - ''' - # Set class members. - self.images_dirs = images_dirs - self.annotations_dirs = annotations_dirs - self.image_set_filenames = image_set_filenames - self.classes = classes - self.include_classes = include_classes - - # Erase data that might have been parsed before. - self.filenames = [] - self.image_ids = [] - self.labels = [] - self.eval_neutral = [] - if not annotations_dirs: - self.labels = None - self.eval_neutral = None - annotations_dirs = [None] * len(images_dirs) - - for images_dir, image_set_filename, annotations_dir in zip(images_dirs, image_set_filenames, annotations_dirs): - # Read the image set file that so that we know all the IDs of all the images to be included in the dataset. - with open(image_set_filename) as f: - # Note: These are strings, not integers. - image_ids = [line.strip() for line in f] - self.image_ids += image_ids - - if verbose: - it = tqdm(image_ids, desc="Processing image set '{}'".format( - os.path.basename(image_set_filename)), file=sys.stdout) - else: - it = image_ids - - # Loop over all images in this dataset. - for image_id in it: - - filename = '{}'.format(image_id) + '.jpg' - self.filenames.append(os.path.join(images_dir, filename)) - - if not annotations_dir is None: - # Parse the XML file for this image. - with open(os.path.join(annotations_dir, image_id + '.xml')) as f: - soup = BeautifulSoup(f, 'xml') - - # In case we want to return the folder in addition to the image file name. Relevant for determining which dataset an image belongs to. - folder = soup.folder.text - #filename = soup.filename.text - - boxes = [] # We'll store all boxes for this image here. - # We'll store whether a box is annotated as "difficult" here. - eval_neutr = [] - # Get a list of all objects in this image. - objects = soup.find_all('object') - - # Parse the data for each object. - for obj in objects: - class_name = obj.find('name', recursive=False).text - class_id = self.classes.index(class_name) - # Check whether this class is supposed to be included in the dataset. - if (not self.include_classes == 'all') and (not class_id in self.include_classes): - continue - pose = obj.find('pose', recursive=False).text - truncated = int( - obj.find('truncated', recursive=False).text) - if exclude_truncated and (truncated == 1): - continue - difficult = int( - obj.find('difficult', recursive=False).text) - if exclude_difficult and (difficult == 1): - continue - # Get the bounding box coordinates. - bndbox = obj.find('bndbox', recursive=False) - xmin = int(bndbox.xmin.text) - ymin = int(bndbox.ymin.text) - xmax = int(bndbox.xmax.text) - ymax = int(bndbox.ymax.text) - item_dict = {'folder': folder, - 'image_name': filename, - 'image_id': image_id, - 'class_name': class_name, - 'class_id': class_id, - 'pose': pose, - 'truncated': truncated, - 'difficult': difficult, - 'xmin': xmin, - 'ymin': ymin, - 'xmax': xmax, - 'ymax': ymax} - box = [] - for item in self.labels_output_format: - box.append(item_dict[item]) - boxes.append(box) - if difficult: - eval_neutr.append(True) - else: - eval_neutr.append(False) - - self.labels.append(boxes) - self.eval_neutral.append(eval_neutr) - - self.dataset_size = len(self.filenames) - self.dataset_indices = np.arange(self.dataset_size, dtype=np.int32) - if self.load_images_into_memory: - self.images = [] - if verbose: - it = tqdm(self.filenames, - desc='Loading images into memory', file=sys.stdout) - else: - it = self.filenames - for filename in it: - with Image.open(filename) as image: - self.images.append(np.array(image, dtype=np.uint8)) - - if ret: - return self.images, self.filenames, self.labels, self.image_ids, self.eval_neutral - - def parse_json(self, - images_dirs, - annotations_filenames, - ground_truth_available=False, - include_classes='all', - ret=False, - verbose=True): - ''' - This is an JSON parser for the MS COCO datasets. It might be applicable to other datasets with minor changes to - the code, but in its current form it expects the JSON format of the MS COCO datasets. - - Arguments: - images_dirs (list, optional): A list of strings, where each string is the path of a directory that - contains images that are to be part of the dataset. This allows you to aggregate multiple datasets - into one (e.g. one directory that contains the images for MS COCO Train 2014, another one for MS COCO - Val 2014, another one for MS COCO Train 2017 etc.). - annotations_filenames (list): A list of strings, where each string is the path of the JSON file - that contains the annotations for the images in the respective image directories given, i.e. one - JSON file per image directory that contains the annotations for all images in that directory. - The content of the JSON files must be in MS COCO object detection format. Note that these annotations - files do not necessarily need to contain ground truth information. MS COCO also provides annotations - files without ground truth information for the test datasets, called `image_info_[...].json`. - ground_truth_available (bool, optional): Set `True` if the annotations files contain ground truth information. - include_classes (list, optional): Either 'all' or a list of integers containing the class IDs that - are to be included in the dataset. If 'all', all ground truth boxes will be included in the dataset. - ret (bool, optional): Whether or not to return the outputs of the parser. - verbose (bool, optional): If `True`, prints out the progress for operations that may take a bit longer. - - Returns: - None by default, optionally lists for whichever are available of images, image filenames, labels and image IDs. - ''' - self.images_dirs = images_dirs - self.annotations_filenames = annotations_filenames - self.include_classes = include_classes - # Erase data that might have been parsed before. - self.filenames = [] - self.image_ids = [] - self.labels = [] - if not ground_truth_available: - self.labels = None - - # Build the dictionaries that map between class names and class IDs. - with open(annotations_filenames[0], 'r') as f: - annotations = json.load(f) - # Unfortunately the 80 MS COCO class IDs are not all consecutive. They go - # from 1 to 90 and some numbers are skipped. Since the IDs that we feed - # into a neural network must be consecutive, we'll save both the original - # (non-consecutive) IDs as well as transformed maps. - # We'll save both the map between the original - # The map between class names (values) and their original IDs (keys) - self.cats_to_names = {} - # A list of the class names with their indices representing the transformed IDs - self.classes_to_names = [] - # Need to add the background class first so that the indexing is right. - self.classes_to_names.append('background') - # A dictionary that maps between the original (keys) and the transformed IDs (values) - self.cats_to_classes = {} - # A dictionary that maps between the transformed (keys) and the original IDs (values) - self.classes_to_cats = {} - for i, cat in enumerate(annotations['categories']): - self.cats_to_names[cat['id']] = cat['name'] - self.classes_to_names.append(cat['name']) - self.cats_to_classes[cat['id']] = i + 1 - self.classes_to_cats[i + 1] = cat['id'] - - # Iterate over all datasets. - for images_dir, annotations_filename in zip(self.images_dirs, self.annotations_filenames): - # Load the JSON file. - with open(annotations_filename, 'r') as f: - annotations = json.load(f) - - if ground_truth_available: - # Create the annotations map, a dictionary whose keys are the image IDs - # and whose values are the annotations for the respective image ID. - image_ids_to_annotations = defaultdict(list) - for annotation in annotations['annotations']: - image_ids_to_annotations[annotation['image_id']].append( - annotation) - - if verbose: - it = tqdm(annotations['images'], desc="Processing '{}'".format( - os.path.basename(annotations_filename)), file=sys.stdout) - else: - it = annotations['images'] - - # Loop over all images in this dataset. - for img in it: - - self.filenames.append(os.path.join( - images_dir, img['file_name'])) - self.image_ids.append(img['id']) - - if ground_truth_available: - # Get all annotations for this image. - annotations = image_ids_to_annotations[img['id']] - boxes = [] - for annotation in annotations: - cat_id = annotation['category_id'] - # Check if this class is supposed to be included in the dataset. - if (not self.include_classes == 'all') and (not cat_id in self.include_classes): - continue - # Transform the original class ID to fit in the sequence of consecutive IDs. - class_id = self.cats_to_classes[cat_id] - xmin = annotation['bbox'][0] - ymin = annotation['bbox'][1] - width = annotation['bbox'][2] - height = annotation['bbox'][3] - # Compute `xmax` and `ymax`. - xmax = xmin + width - ymax = ymin + height - item_dict = {'image_name': img['file_name'], - 'image_id': img['id'], - 'class_id': class_id, - 'xmin': xmin, - 'ymin': ymin, - 'xmax': xmax, - 'ymax': ymax} - box = [] - for item in self.labels_output_format: - box.append(item_dict[item]) - boxes.append(box) - self.labels.append(boxes) - - self.dataset_size = len(self.filenames) - self.dataset_indices = np.arange(self.dataset_size, dtype=np.int32) - if self.load_images_into_memory: - self.images = [] - if verbose: - it = tqdm(self.filenames, - desc='Loading images into memory', file=sys.stdout) - else: - it = self.filenames - for filename in it: - with Image.open(filename) as image: - self.images.append(np.array(image, dtype=np.uint8)) - - if ret: - return self.images, self.filenames, self.labels, self.image_ids - - def create_hdf5_dataset(self, - file_path='dataset.h5', - resize=False, - variable_image_size=True, - verbose=True): - ''' - Converts the currently loaded dataset into a HDF5 file. This HDF5 file contains all - images as uncompressed arrays in a contiguous block of memory, which allows for them - to be loaded faster. Such an uncompressed dataset, however, may take up considerably - more space on your hard drive than the sum of the source images in a compressed format - such as JPG or PNG. - - It is recommended that you always convert the dataset into an HDF5 dataset if you - have enugh hard drive space since loading from an HDF5 dataset accelerates the data - generation noticeably. - - Note that you must load a dataset (e.g. via one of the parser methods) before creating - an HDF5 dataset from it. - - The created HDF5 dataset will remain open upon its creation so that it can be used right - away. - - Arguments: - file_path (str, optional): The full file path under which to store the HDF5 dataset. - You can load this output file via the `DataGenerator` constructor in the future. - resize (tuple, optional): `False` or a 2-tuple `(height, width)` that represents the - target size for the images. All images in the dataset will be resized to this - target size before they will be written to the HDF5 file. If `False`, no resizing - will be performed. - variable_image_size (bool, optional): The only purpose of this argument is that its - value will be stored in the HDF5 dataset in order to be able to quickly find out - whether the images in the dataset all have the same size or not. - verbose (bool, optional): Whether or not prit out the progress of the dataset creation. - - Returns: - None. - ''' - - self.hdf5_dataset_path = file_path - - dataset_size = len(self.filenames) - - # Create the HDF5 file. - hdf5_dataset = h5py.File(file_path, 'w') - - # Create a few attributes that tell us what this dataset contains. - # The dataset will obviously always contain images, but maybe it will - # also contain labels, image IDs, etc. - hdf5_dataset.attrs.create( - name='has_labels', data=False, shape=None, dtype=np.bool_) - hdf5_dataset.attrs.create( - name='has_image_ids', data=False, shape=None, dtype=np.bool_) - hdf5_dataset.attrs.create( - name='has_eval_neutral', data=False, shape=None, dtype=np.bool_) - # It's useful to be able to quickly check whether the images in a dataset all - # have the same size or not, so add a boolean attribute for that. - if variable_image_size and not resize: - hdf5_dataset.attrs.create( - name='variable_image_size', data=True, shape=None, dtype=np.bool_) - else: - hdf5_dataset.attrs.create( - name='variable_image_size', data=False, shape=None, dtype=np.bool_) - - # Create the dataset in which the images will be stored as flattened arrays. - # This allows us, among other things, to store images of variable size. - hdf5_images = hdf5_dataset.create_dataset(name='images', - shape=(dataset_size,), - maxshape=(None), - dtype=h5py.special_dtype(vlen=np.uint8)) - - # Create the dataset that will hold the image heights, widths and channels that - # we need in order to reconstruct the images from the flattened arrays later. - hdf5_image_shapes = hdf5_dataset.create_dataset(name='image_shapes', - shape=( - dataset_size, 3), - maxshape=(None, 3), - dtype=np.int32) - - if not (self.labels is None): - - # Create the dataset in which the labels will be stored as flattened arrays. - hdf5_labels = hdf5_dataset.create_dataset(name='labels', - shape=(dataset_size,), - maxshape=(None), - dtype=h5py.special_dtype(vlen=np.int32)) - - # Create the dataset that will hold the dimensions of the labels arrays for - # each image so that we can restore the labels from the flattened arrays later. - hdf5_label_shapes = hdf5_dataset.create_dataset(name='label_shapes', - shape=( - dataset_size, 2), - maxshape=(None, 2), - dtype=np.int32) - - hdf5_dataset.attrs.modify(name='has_labels', value=True) - - if not (self.image_ids is None): - - hdf5_image_ids = hdf5_dataset.create_dataset(name='image_ids', - shape=(dataset_size,), - maxshape=(None), - dtype=h5py.special_dtype(vlen=str)) - - hdf5_dataset.attrs.modify(name='has_image_ids', value=True) - - if not (self.eval_neutral is None): - - # Create the dataset in which the labels will be stored as flattened arrays. - hdf5_eval_neutral = hdf5_dataset.create_dataset(name='eval_neutral', - shape=( - dataset_size,), - maxshape=(None), - dtype=h5py.special_dtype(vlen=np.bool_)) - - hdf5_dataset.attrs.modify(name='has_eval_neutral', value=True) - - if verbose: - tr = trange(dataset_size, desc='Creating HDF5 dataset', - file=sys.stdout) - else: - tr = range(dataset_size) - - # Iterate over all images in the dataset. - for i in tr: - - # Store the image. - with Image.open(self.filenames[i]) as image: - - image = np.asarray(image, dtype=np.uint8) - - # Make sure all images end up having three channels. - if image.ndim == 2: - image = np.stack([image] * 3, axis=-1) - elif image.ndim == 3: - if image.shape[2] == 1: - image = np.concatenate([image] * 3, axis=-1) - elif image.shape[2] == 4: - image = image[:, :, :3] - - if resize: - image = cv2.resize(image, dsize=(resize[1], resize[0])) - - # Flatten the image array and write it to the images dataset. - hdf5_images[i] = image.reshape(-1) - # Write the image's shape to the image shapes dataset. - hdf5_image_shapes[i] = image.shape - - # Store the ground truth if we have any. - if not (self.labels is None): - - labels = np.asarray(self.labels[i]) - # Flatten the labels array and write it to the labels dataset. - hdf5_labels[i] = labels.reshape(-1) - # Write the labels' shape to the label shapes dataset. - hdf5_label_shapes[i] = labels.shape - - # Store the image ID if we have one. - if not (self.image_ids is None): - - hdf5_image_ids[i] = self.image_ids[i] - - # Store the evaluation-neutrality annotations if we have any. - if not (self.eval_neutral is None): - - hdf5_eval_neutral[i] = self.eval_neutral[i] - - hdf5_dataset.close() - self.hdf5_dataset = h5py.File(file_path, 'r') - self.hdf5_dataset_path = file_path - self.dataset_size = len(self.hdf5_dataset['images']) - # Instead of shuffling the HDF5 dataset, we will shuffle this index list. - self.dataset_indices = np.arange(self.dataset_size, dtype=np.int32) - - def generate(self, - batch_size=32, - shuffle=True, - transformations=[], - label_encoder=None, - returns={'processed_images', 'encoded_labels'}, - keep_images_without_gt=False, - degenerate_box_handling='remove'): - ''' - Generates batches of samples and (optionally) corresponding labels indefinitely. - - Can shuffle the samples consistently after each complete pass. - - Optionally takes a list of arbitrary image transformations to apply to the - samples ad hoc. - - Arguments: - batch_size (int, optional): The size of the batches to be generated. - shuffle (bool, optional): Whether or not to shuffle the dataset before each pass. - This option should always be `True` during training, but it can be useful to turn shuffling off - for debugging or if you're using the generator for prediction. - transformations (list, optional): A list of transformations that will be applied to the images and labels - in the given order. Each transformation is a callable that takes as input an image (as a Numpy array) - and optionally labels (also as a Numpy array) and returns an image and optionally labels in the same - format. - label_encoder (callable, optional): Only relevant if labels are given. A callable that takes as input the - labels of a batch (as a list of Numpy arrays) and returns some structure that represents those labels. - The general use case for this is to convert labels from their input format to a format that a given object - detection model needs as its training targets. - returns (set, optional): A set of strings that determines what outputs the generator yields. The generator's output - is always a tuple that contains the outputs specified in this set and only those. If an output is not available, - it will be `None`. The output tuple can contain the following outputs according to the specified keyword strings: - * 'processed_images': An array containing the processed images. Will always be in the outputs, so it doesn't - matter whether or not you include this keyword in the set. - * 'encoded_labels': The encoded labels tensor. Will always be in the outputs if a label encoder is given, - so it doesn't matter whether or not you include this keyword in the set if you pass a label encoder. - * 'matched_anchors': Only available if `labels_encoder` is an `SSDInputEncoder` object. The same as 'encoded_labels', - but containing anchor box coordinates for all matched anchor boxes instead of ground truth coordinates. - This can be useful to visualize what anchor boxes are being matched to each ground truth box. Only available - in training mode. - * 'processed_labels': The processed, but not yet encoded labels. This is a list that contains for each - batch image a Numpy array with all ground truth boxes for that image. Only available if ground truth is available. - * 'filenames': A list containing the file names (full paths) of the images in the batch. - * 'image_ids': A list containing the integer IDs of the images in the batch. Only available if there - are image IDs available. - * 'evaluation-neutral': A nested list of lists of booleans. Each list contains `True` or `False` for every ground truth - bounding box of the respective image depending on whether that bounding box is supposed to be evaluation-neutral (`True`) - or not (`False`). May return `None` if there exists no such concept for a given dataset. An example for - evaluation-neutrality are the ground truth boxes annotated as "difficult" in the Pascal VOC datasets, which are - usually treated to be neutral in a model evaluation. - * 'inverse_transform': A nested list that contains a list of "inverter" functions for each item in the batch. - These inverter functions take (predicted) labels for an image as input and apply the inverse of the transformations - that were applied to the original image to them. This makes it possible to let the model make predictions on a - transformed image and then convert these predictions back to the original image. This is mostly relevant for - evaluation: If you want to evaluate your model on a dataset with varying image sizes, then you are forced to - transform the images somehow (e.g. by resizing or cropping) to make them all the same size. Your model will then - predict boxes for those transformed images, but for the evaluation you will need predictions with respect to the - original images, not with respect to the transformed images. This means you will have to transform the predicted - box coordinates back to the original image sizes. Note that for each image, the inverter functions for that - image need to be applied in the order in which they are given in the respective list for that image. - * 'original_images': A list containing the original images in the batch before any processing. - * 'original_labels': A list containing the original ground truth boxes for the images in this batch before any - processing. Only available if ground truth is available. - The order of the outputs in the tuple is the order of the list above. If `returns` contains a keyword for an - output that is unavailable, that output omitted in the yielded tuples and a warning will be raised. - keep_images_without_gt (bool, optional): If `False`, images for which there aren't any ground truth boxes before - any transformations have been applied will be removed from the batch. If `True`, such images will be kept - in the batch. - degenerate_box_handling (str, optional): How to handle degenerate boxes, which are boxes that have `xmax <= xmin` and/or - `ymax <= ymin`. Degenerate boxes can sometimes be in the dataset, or non-degenerate boxes can become degenerate - after they were processed by transformations. Note that the generator checks for degenerate boxes after all - transformations have been applied (if any), but before the labels were passed to the `label_encoder` (if one was given). - Can be one of 'warn' or 'remove'. If 'warn', the generator will merely print a warning to let you know that there - are degenerate boxes in a batch. If 'remove', the generator will remove degenerate boxes from the batch silently. - - Yields: - The next batch as a tuple of items as defined by the `returns` argument. - ''' - - if self.dataset_size == 0: - raise DatasetError( - "Cannot generate batches because you did not load a dataset.") - - ############################################################################################# - # Warn if any of the set returns aren't possible. - ############################################################################################# - - if self.labels is None: - if any([ret in returns for ret in ['original_labels', 'processed_labels', 'encoded_labels', 'matched_anchors', 'evaluation-neutral']]): - warnings.warn("Since no labels were given, none of 'original_labels', 'processed_labels', 'evaluation-neutral', 'encoded_labels', and 'matched_anchors' " + - "are possible returns, but you set `returns = {}`. The impossible returns will be `None`.".format(returns)) - elif label_encoder is None: - if any([ret in returns for ret in ['encoded_labels', 'matched_anchors']]): - warnings.warn("Since no label encoder was given, 'encoded_labels' and 'matched_anchors' aren't possible returns, " + - "but you set `returns = {}`. The impossible returns will be `None`.".format(returns)) - elif not isinstance(label_encoder, SSDInputEncoder): - if 'matched_anchors' in returns: - warnings.warn("`label_encoder` is not an `SSDInputEncoder` object, therefore 'matched_anchors' is not a possible return, " + - "but you set `returns = {}`. The impossible returns will be `None`.".format(returns)) - - ############################################################################################# - # Do a few preparatory things like maybe shuffling the dataset initially. - ############################################################################################# - - if shuffle: - objects_to_shuffle = [self.dataset_indices] - if not (self.filenames is None): - objects_to_shuffle.append(self.filenames) - if not (self.labels is None): - objects_to_shuffle.append(self.labels) - if not (self.image_ids is None): - objects_to_shuffle.append(self.image_ids) - if not (self.eval_neutral is None): - objects_to_shuffle.append(self.eval_neutral) - shuffled_objects = sklearn.utils.shuffle(*objects_to_shuffle) - for i in range(len(objects_to_shuffle)): - objects_to_shuffle[i][:] = shuffled_objects[i] - - if degenerate_box_handling == 'remove': - box_filter = BoxFilter(check_overlap=False, - check_min_area=False, - check_degenerate=True, - labels_format=self.labels_format) - - # Override the labels formats of all the transformations to make sure they are set correctly. - if not (self.labels is None): - for transform in transformations: - transform.labels_format = self.labels_format - - ############################################################################################# - # Generate mini batches. - ############################################################################################# - - current = 0 - - while True: - - batch_X, batch_y = [], [] - - if current >= self.dataset_size: - current = 0 - - ######################################################################################### - # Maybe shuffle the dataset if a full pass over the dataset has finished. - ######################################################################################### - - if shuffle: - objects_to_shuffle = [self.dataset_indices] - if not (self.filenames is None): - objects_to_shuffle.append(self.filenames) - if not (self.labels is None): - objects_to_shuffle.append(self.labels) - if not (self.image_ids is None): - objects_to_shuffle.append(self.image_ids) - if not (self.eval_neutral is None): - objects_to_shuffle.append(self.eval_neutral) - shuffled_objects = sklearn.utils.shuffle( - *objects_to_shuffle) - for i in range(len(objects_to_shuffle)): - objects_to_shuffle[i][:] = shuffled_objects[i] - - ######################################################################################### - # Get the images, (maybe) image IDs, (maybe) labels, etc. for this batch. - ######################################################################################### - - # We prioritize our options in the following order: - # 1) If we have the images already loaded in memory, get them from there. - # 2) Else, if we have an HDF5 dataset, get the images from there. - # 3) Else, if we have neither of the above, we'll have to load the individual image - # files from disk. - batch_indices = self.dataset_indices[current:current+batch_size] - if not (self.images is None): - for i in batch_indices: - batch_X.append(self.images[i]) - if not (self.filenames is None): - batch_filenames = self.filenames[current:current+batch_size] - else: - batch_filenames = None - elif not (self.hdf5_dataset is None): - for i in batch_indices: - batch_X.append(self.hdf5_dataset['images'][i].reshape( - self.hdf5_dataset['image_shapes'][i])) - if not (self.filenames is None): - batch_filenames = self.filenames[current:current+batch_size] - else: - batch_filenames = None - else: - batch_filenames = self.filenames[current:current+batch_size] - for filename in batch_filenames: - with Image.open(filename) as image: - batch_X.append(np.array(image, dtype=np.uint8)) - - # Get the labels for this batch (if there are any). - if not (self.labels is None): - batch_y = deepcopy(self.labels[current:current+batch_size]) - else: - batch_y = None - - if not (self.eval_neutral is None): - batch_eval_neutral = self.eval_neutral[current:current+batch_size] - else: - batch_eval_neutral = None - - # Get the image IDs for this batch (if there are any). - if not (self.image_ids is None): - batch_image_ids = self.image_ids[current:current+batch_size] - else: - batch_image_ids = None - - if 'original_images' in returns: - # The original, unaltered images - batch_original_images = deepcopy(batch_X) - if 'original_labels' in returns: - # The original, unaltered labels - batch_original_labels = deepcopy(batch_y) - - current += batch_size - - ######################################################################################### - # Maybe perform image transformations. - ######################################################################################### - - # In case we need to remove any images from the batch, store their indices in this list. - batch_items_to_remove = [] - batch_inverse_transforms = [] - - for i in range(len(batch_X)): - - if not (self.labels is None): - # Convert the labels for this image to an array (in case they aren't already). - batch_y[i] = np.array(batch_y[i]) - # If this image has no ground truth boxes, maybe we don't want to keep it in the batch. - if (batch_y[i].size == 0) and not keep_images_without_gt: - batch_items_to_remove.append(i) - batch_inverse_transforms.append([]) - continue - - # Apply any image transformations we may have received. - if transformations: - - inverse_transforms = [] - - for transform in transformations: - - if not (self.labels is None): - - if ('inverse_transform' in returns) and ('return_inverter' in inspect.signature(transform).parameters): - batch_X[i], batch_y[i], inverse_transform = transform( - batch_X[i], batch_y[i], return_inverter=True) - inverse_transforms.append(inverse_transform) - else: - batch_X[i], batch_y[i] = transform( - batch_X[i], batch_y[i]) - - # In case the transform failed to produce an output image, which is possible for some random transforms. - if batch_X[i] is None: - batch_items_to_remove.append(i) - batch_inverse_transforms.append([]) - continue - - else: - - if ('inverse_transform' in returns) and ('return_inverter' in inspect.signature(transform).parameters): - batch_X[i], inverse_transform = transform( - batch_X[i], return_inverter=True) - inverse_transforms.append(inverse_transform) - else: - batch_X[i] = transform(batch_X[i]) - - batch_inverse_transforms.append(inverse_transforms[::-1]) - - ######################################################################################### - # Check for degenerate boxes in this batch item. - ######################################################################################### - - if not (self.labels is None): - - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - if np.any(batch_y[i][:, xmax] - batch_y[i][:, xmin] <= 0) or np.any(batch_y[i][:, ymax] - batch_y[i][:, ymin] <= 0): - if degenerate_box_handling == 'warn': - warnings.warn("Detected degenerate ground truth bounding boxes for batch item {} with bounding boxes {}, ".format(i, batch_y[i]) + - "i.e. bounding boxes where xmax <= xmin and/or ymax <= ymin. " + - "This could mean that your dataset contains degenerate ground truth boxes, or that any image transformations you may apply might " + - "result in degenerate ground truth boxes, or that you are parsing the ground truth in the wrong coordinate format." + - "Degenerate ground truth bounding boxes may lead to NaN errors during the training.") - elif degenerate_box_handling == 'remove': - batch_y[i] = box_filter(batch_y[i]) - if (batch_y[i].size == 0) and not keep_images_without_gt: - batch_items_to_remove.append(i) - - ######################################################################################### - # Remove any items we might not want to keep from the batch. - ######################################################################################### - - if batch_items_to_remove: - for j in sorted(batch_items_to_remove, reverse=True): - # This isn't efficient, but it hopefully shouldn't need to be done often anyway. - batch_X.pop(j) - batch_filenames.pop(j) - if batch_inverse_transforms: - batch_inverse_transforms.pop(j) - if not (self.labels is None): - batch_y.pop(j) - if not (self.image_ids is None): - batch_image_ids.pop(j) - if not (self.eval_neutral is None): - batch_eval_neutral.pop(j) - if 'original_images' in returns: - batch_original_images.pop(j) - if 'original_labels' in returns and not (self.labels is None): - batch_original_labels.pop(j) - - ######################################################################################### - - # CAUTION: Converting `batch_X` into an array will result in an empty batch if the images have varying sizes - # or varying numbers of channels. At this point, all images must have the same size and the same - # number of channels. - batch_X = np.array(batch_X) - if (batch_X.size == 0): - raise DegenerateBatchError("You produced an empty batch. This might be because the images in the batch vary " + - "in their size and/or number of channels. Note that after all transformations " + - "(if any were given) have been applied to all images in the batch, all images " + - "must be homogenous in size along all axes.") - - ######################################################################################### - # If we have a label encoder, encode our labels. - ######################################################################################### - - if not (label_encoder is None or self.labels is None): - - if ('matched_anchors' in returns) and isinstance(label_encoder, SSDInputEncoder): - batch_y_encoded, batch_matched_anchors = label_encoder( - batch_y, diagnostics=True) - else: - batch_y_encoded = label_encoder(batch_y, diagnostics=False) - batch_matched_anchors = None - - else: - batch_y_encoded = None - batch_matched_anchors = None - - ######################################################################################### - # Compose the output. - ######################################################################################### - - ret = [] - if 'processed_images' in returns: - ret.append(batch_X) - if 'encoded_labels' in returns: - ret.append(batch_y_encoded) - if 'matched_anchors' in returns: - ret.append(batch_matched_anchors) - if 'processed_labels' in returns: - ret.append(batch_y) - if 'filenames' in returns: - ret.append(batch_filenames) - if 'image_ids' in returns: - ret.append(batch_image_ids) - if 'evaluation-neutral' in returns: - ret.append(batch_eval_neutral) - if 'inverse_transform' in returns: - ret.append(batch_inverse_transforms) - if 'original_images' in returns: - ret.append(batch_original_images) - if 'original_labels' in returns: - ret.append(batch_original_labels) - - yield ret - - def save_dataset(self, - filenames_path='filenames.pkl', - labels_path=None, - image_ids_path=None, - eval_neutral_path=None): - ''' - Writes the current `filenames`, `labels`, and `image_ids` lists to the specified files. - This is particularly useful for large datasets with annotations that are - parsed from XML files, which can take quite long. If you'll be using the - same dataset repeatedly, you don't want to have to parse the XML label - files every time. - - Arguments: - filenames_path (str): The path under which to save the filenames pickle. - labels_path (str): The path under which to save the labels pickle. - image_ids_path (str, optional): The path under which to save the image IDs pickle. - eval_neutral_path (str, optional): The path under which to save the pickle for - the evaluation-neutrality annotations. - ''' - with open(filenames_path, 'wb') as f: - pickle.dump(self.filenames, f) - if not labels_path is None: - with open(labels_path, 'wb') as f: - pickle.dump(self.labels, f) - if not image_ids_path is None: - with open(image_ids_path, 'wb') as f: - pickle.dump(self.image_ids, f) - if not eval_neutral_path is None: - with open(eval_neutral_path, 'wb') as f: - pickle.dump(self.eval_neutral, f) - - def get_dataset(self): - ''' - Returns: - 4-tuple containing lists and/or `None` for the filenames, labels, image IDs, - and evaluation-neutrality annotations. - ''' - return self.filenames, self.labels, self.image_ids, self.eval_neutral - - def get_dataset_size(self): - ''' - Returns: - The number of images in the dataset. - ''' - return self.dataset_size -''' -Various geometric image transformations for 2D object detection, both deterministic -and probabilistic. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -import cv2 -import random - -from data_generator.object_detection_2d_image_boxes_validation_utils import BoxFilter, ImageValidator - - -class Resize: - ''' - Resizes images to a specified height and width in pixels. - ''' - - def __init__(self, - height, - width, - interpolation_mode=cv2.INTER_LINEAR, - box_filter=None, - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - height (int): The desired height of the output images in pixels. - width (int): The desired width of the output images in pixels. - interpolation_mode (int, optional): An integer that denotes a valid - OpenCV interpolation mode. For example, integers 0 through 5 are - valid interpolation modes. - box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given. - A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria - after the transformation. Refer to the `BoxFilter` documentation for details. If `None`, - the validity of the bounding boxes is not checked. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - if not (isinstance(box_filter, BoxFilter) or box_filter is None): - raise ValueError( - "`box_filter` must be either `None` or a `BoxFilter` object.") - self.out_height = height - self.out_width = width - self.interpolation_mode = interpolation_mode - self.box_filter = box_filter - self.labels_format = labels_format - - def __call__(self, image, labels=None, return_inverter=False): - - img_height, img_width = image.shape[:2] - - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - image = cv2.resize(image, - dsize=(self.out_width, self.out_height), - interpolation=self.interpolation_mode) - - if return_inverter: - def inverter(labels): - labels = np.copy(labels) - labels[:, [ymin+1, ymax+1]] = np.round( - labels[:, [ymin+1, ymax+1]] * (img_height / self.out_height), decimals=0) - labels[:, [xmin+1, xmax+1]] = np.round( - labels[:, [xmin+1, xmax+1]] * (img_width / self.out_width), decimals=0) - return labels - - if labels is None: - if return_inverter: - return image, inverter - else: - return image - else: - labels = np.copy(labels) - labels[:, [ymin, ymax]] = np.round( - labels[:, [ymin, ymax]] * (self.out_height / img_height), decimals=0) - labels[:, [xmin, xmax]] = np.round( - labels[:, [xmin, xmax]] * (self.out_width / img_width), decimals=0) - - if not (self.box_filter is None): - self.box_filter.labels_format = self.labels_format - labels = self.box_filter(labels=labels, - image_height=self.out_height, - image_width=self.out_width) - - if return_inverter: - return image, labels, inverter - else: - return image, labels - - -class ResizeRandomInterp: - ''' - Resizes images to a specified height and width in pixels using a radnomly - selected interpolation mode. - ''' - - def __init__(self, - height, - width, - interpolation_modes=[cv2.INTER_NEAREST, - cv2.INTER_LINEAR, - cv2.INTER_CUBIC, - cv2.INTER_AREA, - cv2.INTER_LANCZOS4], - box_filter=None, - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - height (int): The desired height of the output image in pixels. - width (int): The desired width of the output image in pixels. - interpolation_modes (list/tuple, optional): A list/tuple of integers - that represent valid OpenCV interpolation modes. For example, - integers 0 through 5 are valid interpolation modes. - box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given. - A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria - after the transformation. Refer to the `BoxFilter` documentation for details. If `None`, - the validity of the bounding boxes is not checked. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - if not (isinstance(interpolation_modes, (list, tuple))): - raise ValueError("`interpolation_mode` must be a list or tuple.") - self.height = height - self.width = width - self.interpolation_modes = interpolation_modes - self.box_filter = box_filter - self.labels_format = labels_format - self.resize = Resize(height=self.height, - width=self.width, - box_filter=self.box_filter, - labels_format=self.labels_format) - - def __call__(self, image, labels=None, return_inverter=False): - self.resize.interpolation_mode = np.random.choice( - self.interpolation_modes) - self.resize.labels_format = self.labels_format - return self.resize(image, labels, return_inverter) - - -class Flip: - ''' - Flips images horizontally or vertically. - ''' - - def __init__(self, - dim='horizontal', - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - dim (str, optional): Can be either of 'horizontal' and 'vertical'. - If 'horizontal', images will be flipped horizontally, i.e. along - the vertical axis. If 'horizontal', images will be flipped vertically, - i.e. along the horizontal axis. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - if not (dim in {'horizontal', 'vertical'}): - raise ValueError( - "`dim` can be one of 'horizontal' and 'vertical'.") - self.dim = dim - self.labels_format = labels_format - - def __call__(self, image, labels=None, return_inverter=False): - - img_height, img_width = image.shape[:2] - - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - if self.dim == 'horizontal': - image = image[:, ::-1] - if labels is None: - return image - else: - labels = np.copy(labels) - labels[:, [xmin, xmax]] = img_width - labels[:, [xmax, xmin]] - return image, labels - else: - image = image[::-1] - if labels is None: - return image - else: - labels = np.copy(labels) - labels[:, [ymin, ymax]] = img_height - labels[:, [ymax, ymin]] - return image, labels - - -class RandomFlip: - ''' - Randomly flips images horizontally or vertically. The randomness only refers - to whether or not the image will be flipped. - ''' - - def __init__(self, - dim='horizontal', - prob=0.5, - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - dim (str, optional): Can be either of 'horizontal' and 'vertical'. - If 'horizontal', images will be flipped horizontally, i.e. along - the vertical axis. If 'horizontal', images will be flipped vertically, - i.e. along the horizontal axis. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - self.dim = dim - self.prob = prob - self.labels_format = labels_format - self.flip = Flip(dim=self.dim, labels_format=self.labels_format) - - def __call__(self, image, labels=None): - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - self.flip.labels_format = self.labels_format - return self.flip(image, labels) - elif labels is None: - return image - else: - return image, labels - - -class Translate: - ''' - Translates images horizontally and/or vertically. - ''' - - def __init__(self, - dy, - dx, - clip_boxes=True, - box_filter=None, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - dy (float): The fraction of the image height by which to translate images along the - vertical axis. Positive values translate images downwards, negative values - translate images upwards. - dx (float): The fraction of the image width by which to translate images along the - horizontal axis. Positive values translate images to the right, negative values - translate images to the left. - clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given. - If `True`, any ground truth bounding boxes will be clipped to lie entirely within the - image after the translation. - box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given. - A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria - after the transformation. Refer to the `BoxFilter` documentation for details. If `None`, - the validity of the bounding boxes is not checked. - background (list/tuple, optional): A 3-tuple specifying the RGB color value of the - background pixels of the translated images. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - - if not (isinstance(box_filter, BoxFilter) or box_filter is None): - raise ValueError( - "`box_filter` must be either `None` or a `BoxFilter` object.") - self.dy_rel = dy - self.dx_rel = dx - self.clip_boxes = clip_boxes - self.box_filter = box_filter - self.background = background - self.labels_format = labels_format - - def __call__(self, image, labels=None): - - img_height, img_width = image.shape[:2] - - # Compute the translation matrix. - dy_abs = int(round(img_height * self.dy_rel)) - dx_abs = int(round(img_width * self.dx_rel)) - M = np.float32([[1, 0, dx_abs], - [0, 1, dy_abs]]) - - # Translate the image. - image = cv2.warpAffine(image, - M=M, - dsize=(img_width, img_height), - borderMode=cv2.BORDER_CONSTANT, - borderValue=self.background) - - if labels is None: - return image - else: - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - labels = np.copy(labels) - # Translate the box coordinates to the translated image's coordinate system. - labels[:, [xmin, xmax]] += dx_abs - labels[:, [ymin, ymax]] += dy_abs - - # Compute all valid boxes for this patch. - if not (self.box_filter is None): - self.box_filter.labels_format = self.labels_format - labels = self.box_filter(labels=labels, - image_height=img_height, - image_width=img_width) - - if self.clip_boxes: - labels[:, [ymin, ymax]] = np.clip( - labels[:, [ymin, ymax]], a_min=0, a_max=img_height-1) - labels[:, [xmin, xmax]] = np.clip( - labels[:, [xmin, xmax]], a_min=0, a_max=img_width-1) - - return image, labels - - -class RandomTranslate: - ''' - Randomly translates images horizontally and/or vertically. - ''' - - def __init__(self, - dy_minmax=(0.03, 0.3), - dx_minmax=(0.03, 0.3), - prob=0.5, - clip_boxes=True, - box_filter=None, - image_validator=None, - n_trials_max=3, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - dy_minmax (list/tuple, optional): A 2-tuple `(min, max)` of non-negative floats that - determines the minimum and maximum relative translation of images along the vertical - axis both upward and downward. That is, images will be randomly translated by at least - `min` and at most `max` either upward or downward. For example, if `dy_minmax == (0.05,0.3)`, - an image of size `(100,100)` will be translated by at least 5 and at most 30 pixels - either upward or downward. The translation direction is chosen randomly. - dx_minmax (list/tuple, optional): A 2-tuple `(min, max)` of non-negative floats that - determines the minimum and maximum relative translation of images along the horizontal - axis both to the left and right. That is, images will be randomly translated by at least - `min` and at most `max` either left or right. For example, if `dx_minmax == (0.05,0.3)`, - an image of size `(100,100)` will be translated by at least 5 and at most 30 pixels - either left or right. The translation direction is chosen randomly. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given. - If `True`, any ground truth bounding boxes will be clipped to lie entirely within the - image after the translation. - box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given. - A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria - after the transformation. Refer to the `BoxFilter` documentation for details. If `None`, - the validity of the bounding boxes is not checked. - image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given. - An `ImageValidator` object to determine whether a translated image is valid. If `None`, - any outcome is valid. - n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given. - Determines the maxmial number of trials to produce a valid image. If no valid image could - be produced in `n_trials_max` trials, returns the unaltered input image. - background (list/tuple, optional): A 3-tuple specifying the RGB color value of the - background pixels of the translated images. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - if dy_minmax[0] > dy_minmax[1]: - raise ValueError("It must be `dy_minmax[0] <= dy_minmax[1]`.") - if dx_minmax[0] > dx_minmax[1]: - raise ValueError("It must be `dx_minmax[0] <= dx_minmax[1]`.") - if dy_minmax[0] < 0 or dx_minmax[0] < 0: - raise ValueError( - "It must be `dy_minmax[0] >= 0` and `dx_minmax[0] >= 0`.") - if not (isinstance(image_validator, ImageValidator) or image_validator is None): - raise ValueError( - "`image_validator` must be either `None` or an `ImageValidator` object.") - self.dy_minmax = dy_minmax - self.dx_minmax = dx_minmax - self.prob = prob - self.clip_boxes = clip_boxes - self.box_filter = box_filter - self.image_validator = image_validator - self.n_trials_max = n_trials_max - self.background = background - self.labels_format = labels_format - self.translate = Translate(dy=0, - dx=0, - clip_boxes=self.clip_boxes, - box_filter=self.box_filter, - background=self.background, - labels_format=self.labels_format) - - def __call__(self, image, labels=None): - - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - - img_height, img_width = image.shape[:2] - - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - # Override the preset labels format. - if not self.image_validator is None: - self.image_validator.labels_format = self.labels_format - self.translate.labels_format = self.labels_format - - for _ in range(max(1, self.n_trials_max)): - - # Pick the relative amount by which to translate. - dy_abs = np.random.uniform( - self.dy_minmax[0], self.dy_minmax[1]) - dx_abs = np.random.uniform( - self.dx_minmax[0], self.dx_minmax[1]) - # Pick the direction in which to translate. - dy = np.random.choice([-dy_abs, dy_abs]) - dx = np.random.choice([-dx_abs, dx_abs]) - self.translate.dy_rel = dy - self.translate.dx_rel = dx - - if (labels is None) or (self.image_validator is None): - # We either don't have any boxes or if we do, we will accept any outcome as valid. - return self.translate(image, labels) - else: - # Translate the box coordinates to the translated image's coordinate system. - new_labels = np.copy(labels) - new_labels[:, [ymin, ymax]] += int(round(img_height * dy)) - new_labels[:, [xmin, xmax]] += int(round(img_width * dx)) - - # Check if the patch is valid. - if self.image_validator(labels=new_labels, - image_height=img_height, - image_width=img_width): - return self.translate(image, labels) - - # If all attempts failed, return the unaltered input image. - if labels is None: - return image - - else: - return image, labels - - elif labels is None: - return image - - else: - return image, labels - - -class Scale: - ''' - Scales images, i.e. zooms in or out. - ''' - - def __init__(self, - factor, - clip_boxes=True, - box_filter=None, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - factor (float): The fraction of the image size by which to scale images. Must be positive. - clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given. - If `True`, any ground truth bounding boxes will be clipped to lie entirely within the - image after the translation. - box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given. - A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria - after the transformation. Refer to the `BoxFilter` documentation for details. If `None`, - the validity of the bounding boxes is not checked. - background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential - background pixels of the scaled images. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - - if factor <= 0: - raise ValueError("It must be `factor > 0`.") - if not (isinstance(box_filter, BoxFilter) or box_filter is None): - raise ValueError( - "`box_filter` must be either `None` or a `BoxFilter` object.") - self.factor = factor - self.clip_boxes = clip_boxes - self.box_filter = box_filter - self.background = background - self.labels_format = labels_format - - def __call__(self, image, labels=None): - - img_height, img_width = image.shape[:2] - - # Compute the rotation matrix. - M = cv2.getRotationMatrix2D(center=(img_width / 2, img_height / 2), - angle=0, - scale=self.factor) - - # Scale the image. - image = cv2.warpAffine(image, - M=M, - dsize=(img_width, img_height), - borderMode=cv2.BORDER_CONSTANT, - borderValue=self.background) - - if labels is None: - return image - else: - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - labels = np.copy(labels) - # Scale the bounding boxes accordingly. - # Transform two opposite corner points of the rectangular boxes using the rotation matrix `M`. - toplefts = np.array( - [labels[:, xmin], labels[:, ymin], np.ones(labels.shape[0])]) - bottomrights = np.array( - [labels[:, xmax], labels[:, ymax], np.ones(labels.shape[0])]) - new_toplefts = (np.dot(M, toplefts)).T - new_bottomrights = (np.dot(M, bottomrights)).T - labels[:, [xmin, ymin]] = np.round( - new_toplefts, decimals=0).astype(np.int) - labels[:, [xmax, ymax]] = np.round( - new_bottomrights, decimals=0).astype(np.int) - - # Compute all valid boxes for this patch. - if not (self.box_filter is None): - self.box_filter.labels_format = self.labels_format - labels = self.box_filter(labels=labels, - image_height=img_height, - image_width=img_width) - - if self.clip_boxes: - labels[:, [ymin, ymax]] = np.clip( - labels[:, [ymin, ymax]], a_min=0, a_max=img_height-1) - labels[:, [xmin, xmax]] = np.clip( - labels[:, [xmin, xmax]], a_min=0, a_max=img_width-1) - - return image, labels - - -class RandomScale: - ''' - Randomly scales images. - ''' - - def __init__(self, - min_factor=0.5, - max_factor=1.5, - prob=0.5, - clip_boxes=True, - box_filter=None, - image_validator=None, - n_trials_max=3, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - min_factor (float, optional): The minimum fraction of the image size by which to scale images. - Must be positive. - max_factor (float, optional): The maximum fraction of the image size by which to scale images. - Must be positive. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given. - If `True`, any ground truth bounding boxes will be clipped to lie entirely within the - image after the translation. - box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given. - A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria - after the transformation. Refer to the `BoxFilter` documentation for details. If `None`, - the validity of the bounding boxes is not checked. - image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given. - An `ImageValidator` object to determine whether a scaled image is valid. If `None`, - any outcome is valid. - n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given. - Determines the maxmial number of trials to produce a valid image. If no valid image could - be produced in `n_trials_max` trials, returns the unaltered input image. - background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential - background pixels of the scaled images. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - - if not (0 < min_factor <= max_factor): - raise ValueError("It must be `0 < min_factor <= max_factor`.") - if not (isinstance(image_validator, ImageValidator) or image_validator is None): - raise ValueError( - "`image_validator` must be either `None` or an `ImageValidator` object.") - self.min_factor = min_factor - self.max_factor = max_factor - self.prob = prob - self.clip_boxes = clip_boxes - self.box_filter = box_filter - self.image_validator = image_validator - self.n_trials_max = n_trials_max - self.background = background - self.labels_format = labels_format - self.scale = Scale(factor=1.0, - clip_boxes=self.clip_boxes, - box_filter=self.box_filter, - background=self.background, - labels_format=self.labels_format) - - def __call__(self, image, labels=None): - - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - - img_height, img_width = image.shape[:2] - - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - # Override the preset labels format. - if not self.image_validator is None: - self.image_validator.labels_format = self.labels_format - self.scale.labels_format = self.labels_format - - for _ in range(max(1, self.n_trials_max)): - - # Pick a scaling factor. - factor = np.random.uniform(self.min_factor, self.max_factor) - self.scale.factor = factor - - if (labels is None) or (self.image_validator is None): - # We either don't have any boxes or if we do, we will accept any outcome as valid. - return self.scale(image, labels) - else: - # Scale the bounding boxes accordingly. - # Transform two opposite corner points of the rectangular boxes using the rotation matrix `M`. - toplefts = np.array( - [labels[:, xmin], labels[:, ymin], np.ones(labels.shape[0])]) - bottomrights = np.array( - [labels[:, xmax], labels[:, ymax], np.ones(labels.shape[0])]) - - # Compute the rotation matrix. - M = cv2.getRotationMatrix2D(center=(img_width / 2, img_height / 2), - angle=0, - scale=factor) - - new_toplefts = (np.dot(M, toplefts)).T - new_bottomrights = (np.dot(M, bottomrights)).T - - new_labels = np.copy(labels) - new_labels[:, [xmin, ymin]] = np.around( - new_toplefts, decimals=0).astype(np.int) - new_labels[:, [xmax, ymax]] = np.around( - new_bottomrights, decimals=0).astype(np.int) - - # Check if the patch is valid. - if self.image_validator(labels=new_labels, - image_height=img_height, - image_width=img_width): - return self.scale(image, labels) - - # If all attempts failed, return the unaltered input image. - if labels is None: - return image - - else: - return image, labels - - elif labels is None: - return image - - else: - return image, labels - - -class Rotate: - ''' - Rotates images counter-clockwise by 90, 180, or 270 degrees. - ''' - - def __init__(self, - angle, - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - angle (int): The angle in degrees by which to rotate the images counter-clockwise. - Only 90, 180, and 270 are valid values. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - - if not angle in {90, 180, 270}: - raise ValueError("`angle` must be in the set {90, 180, 270}.") - self.angle = angle - self.labels_format = labels_format - - def __call__(self, image, labels=None): - - img_height, img_width = image.shape[:2] - - # Compute the rotation matrix. - M = cv2.getRotationMatrix2D(center=(img_width / 2, img_height / 2), - angle=self.angle, - scale=1) - - # Get the sine and cosine from the rotation matrix. - cos_angle = np.abs(M[0, 0]) - sin_angle = np.abs(M[0, 1]) - - # Compute the new bounding dimensions of the image. - img_width_new = int(img_height * sin_angle + img_width * cos_angle) - img_height_new = int(img_height * cos_angle + img_width * sin_angle) - - # Adjust the rotation matrix to take into account the translation. - M[1, 2] += (img_height_new - img_height) / 2 - M[0, 2] += (img_width_new - img_width) / 2 - - # Rotate the image. - image = cv2.warpAffine(image, - M=M, - dsize=(img_width_new, img_height_new)) - - if labels is None: - return image - else: - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - labels = np.copy(labels) - # Rotate the bounding boxes accordingly. - # Transform two opposite corner points of the rectangular boxes using the rotation matrix `M`. - toplefts = np.array( - [labels[:, xmin], labels[:, ymin], np.ones(labels.shape[0])]) - bottomrights = np.array( - [labels[:, xmax], labels[:, ymax], np.ones(labels.shape[0])]) - new_toplefts = (np.dot(M, toplefts)).T - new_bottomrights = (np.dot(M, bottomrights)).T - labels[:, [xmin, ymin]] = np.round( - new_toplefts, decimals=0).astype(np.int) - labels[:, [xmax, ymax]] = np.round( - new_bottomrights, decimals=0).astype(np.int) - - if self.angle == 90: - # ymin and ymax were switched by the rotation. - labels[:, [ymax, ymin]] = labels[:, [ymin, ymax]] - elif self.angle == 180: - # ymin and ymax were switched by the rotation, - # and also xmin and xmax were switched. - labels[:, [ymax, ymin]] = labels[:, [ymin, ymax]] - labels[:, [xmax, xmin]] = labels[:, [xmin, xmax]] - elif self.angle == 270: - # xmin and xmax were switched by the rotation. - labels[:, [xmax, xmin]] = labels[:, [xmin, xmax]] - - return image, labels - - -class RandomRotate: - ''' - Randomly rotates images counter-clockwise. - ''' - - def __init__(self, - angles=[90, 180, 270], - prob=0.5, - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - angle (list): The list of angles in degrees from which one is randomly selected to rotate - the images counter-clockwise. Only 90, 180, and 270 are valid values. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - for angle in angles: - if not angle in {90, 180, 270}: - raise ValueError( - "`angles` can only contain the values 90, 180, and 270.") - self.angles = angles - self.prob = prob - self.labels_format = labels_format - self.rotate = Rotate(angle=90, labels_format=self.labels_format) - - def __call__(self, image, labels=None): - - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - # Pick a rotation angle. - self.rotate.angle = random.choice(self.angles) - self.rotate.labels_format = self.labels_format - return self.rotate(image, labels) - - elif labels is None: - return image - - else: - return image, labels -''' -Utilities for 2D object detection related to answering the following questions: -1. Given an image size and bounding boxes, which bounding boxes meet certain - requirements with respect to the image size? -2. Given an image size and bounding boxes, is an image of that size valid with - respect to the bounding boxes according to certain requirements? - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np - -from bounding_box_utils.bounding_box_utils import iou - - -class BoundGenerator: - ''' - Generates pairs of floating point values that represent lower and upper bounds - from a given sample space. - ''' - - def __init__(self, - sample_space=((0.1, None), - (0.3, None), - (0.5, None), - (0.7, None), - (0.9, None), - (None, None)), - weights=None): - ''' - Arguments: - sample_space (list or tuple): A list, tuple, or array-like object of shape - `(n, 2)` that contains `n` samples to choose from, where each sample - is a 2-tuple of scalars and/or `None` values. - weights (list or tuple, optional): A list or tuple representing the distribution - over the sample space. If `None`, a uniform distribution will be assumed. - ''' - - if (not (weights is None)) and len(weights) != len(sample_space): - raise ValueError( - "`weights` must either be `None` for uniform distribution or have the same length as `sample_space`.") - - self.sample_space = [] - for bound_pair in sample_space: - if len(bound_pair) != 2: - raise ValueError( - "All elements of the sample space must be 2-tuples.") - bound_pair = list(bound_pair) - if bound_pair[0] is None: - bound_pair[0] = 0.0 - if bound_pair[1] is None: - bound_pair[1] = 1.0 - if bound_pair[0] > bound_pair[1]: - raise ValueError( - "For all sample space elements, the lower bound cannot be greater than the upper bound.") - self.sample_space.append(bound_pair) - - self.sample_space_size = len(self.sample_space) - - if weights is None: - self.weights = [1.0/self.sample_space_size] * \ - self.sample_space_size - else: - self.weights = weights - - def __call__(self): - ''' - Returns: - An item of the sample space, i.e. a 2-tuple of scalars. - ''' - i = np.random.choice(self.sample_space_size, p=self.weights) - return self.sample_space[i] - - -class BoxFilter: - ''' - Returns all bounding boxes that are valid with respect to a the defined criteria. - ''' - - def __init__(self, - check_overlap=True, - check_min_area=True, - check_degenerate=True, - overlap_criterion='center_point', - overlap_bounds=(0.3, 1.0), - min_area=16, - labels_format={'class_id': 0, 'xmin': 1, - 'ymin': 2, 'xmax': 3, 'ymax': 4}, - border_pixels='half'): - ''' - Arguments: - check_overlap (bool, optional): Whether or not to enforce the overlap requirements defined by - `overlap_criterion` and `overlap_bounds`. Sometimes you might want to use the box filter only - to enforce a certain minimum area for all boxes (see next argument), in such cases you can - turn the overlap requirements off. - check_min_area (bool, optional): Whether or not to enforce the minimum area requirement defined - by `min_area`. If `True`, any boxes that have an area (in pixels) that is smaller than `min_area` - will be removed from the labels of an image. Bounding boxes below a certain area aren't useful - training examples. An object that takes up only, say, 5 pixels in an image is probably not - recognizable anymore, neither for a human, nor for an object detection model. It makes sense - to remove such boxes. - check_degenerate (bool, optional): Whether or not to check for and remove degenerate bounding boxes. - Degenerate bounding boxes are boxes that have `xmax <= xmin` and/or `ymax <= ymin`. In particular, - boxes with a width and/or height of zero are degenerate. It is obviously important to filter out - such boxes, so you should only set this option to `False` if you are certain that degenerate - boxes are not possible in your data and processing chain. - overlap_criterion (str, optional): Can be either of 'center_point', 'iou', or 'area'. Determines - which boxes are considered valid with respect to a given image. If set to 'center_point', - a given bounding box is considered valid if its center point lies within the image. - If set to 'area', a given bounding box is considered valid if the quotient of its intersection - area with the image and its own area is within the given `overlap_bounds`. If set to 'iou', a given - bounding box is considered valid if its IoU with the image is within the given `overlap_bounds`. - overlap_bounds (list or BoundGenerator, optional): Only relevant if `overlap_criterion` is 'area' or 'iou'. - Determines the lower and upper bounds for `overlap_criterion`. Can be either a 2-tuple of scalars - representing a lower bound and an upper bound, or a `BoundGenerator` object, which provides - the possibility to generate bounds randomly. - min_area (int, optional): Only relevant if `check_min_area` is `True`. Defines the minimum area in - pixels that a bounding box must have in order to be valid. Boxes with an area smaller than this - will be removed. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - ''' - if not isinstance(overlap_bounds, (list, tuple, BoundGenerator)): - raise ValueError( - "`overlap_bounds` must be either a 2-tuple of scalars or a `BoundGenerator` object.") - if isinstance(overlap_bounds, (list, tuple)) and (overlap_bounds[0] > overlap_bounds[1]): - raise ValueError( - "The lower bound must not be greater than the upper bound.") - if not (overlap_criterion in {'iou', 'area', 'center_point'}): - raise ValueError( - "`overlap_criterion` must be one of 'iou', 'area', or 'center_point'.") - self.overlap_criterion = overlap_criterion - self.overlap_bounds = overlap_bounds - self.min_area = min_area - self.check_overlap = check_overlap - self.check_min_area = check_min_area - self.check_degenerate = check_degenerate - self.labels_format = labels_format - self.border_pixels = border_pixels - - def __call__(self, - labels, - image_height=None, - image_width=None): - ''' - Arguments: - labels (array): The labels to be filtered. This is an array with shape `(m,n)`, where - `m` is the number of bounding boxes and `n` is the number of elements that defines - each bounding box (box coordinates, class ID, etc.). The box coordinates are expected - to be in the image's coordinate system. - image_height (int): Only relevant if `check_overlap == True`. The height of the image - (in pixels) to compare the box coordinates to. - image_width (int): `check_overlap == True`. The width of the image (in pixels) to compare - the box coordinates to. - - Returns: - An array containing the labels of all boxes that are valid. - ''' - - labels = np.copy(labels) - - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - # Record the boxes that pass all checks here. - requirements_met = np.ones(shape=labels.shape[0], dtype=np.bool) - - if self.check_degenerate: - - non_degenerate = ( - labels[:, xmax] > labels[:, xmin]) * (labels[:, ymax] > labels[:, ymin]) - requirements_met *= non_degenerate - - if self.check_min_area: - - min_area_met = (labels[:, xmax] - labels[:, xmin]) * \ - (labels[:, ymax] - labels[:, ymin]) >= self.min_area - requirements_met *= min_area_met - - if self.check_overlap: - - # Get the lower and upper bounds. - if isinstance(self.overlap_bounds, BoundGenerator): - lower, upper = self.overlap_bounds() - else: - lower, upper = self.overlap_bounds - - # Compute which boxes are valid. - - if self.overlap_criterion == 'iou': - # Compute the patch coordinates. - image_coords = np.array([0, 0, image_width, image_height]) - # Compute the IoU between the patch and all of the ground truth boxes. - image_boxes_iou = iou(image_coords, labels[:, [ - xmin, ymin, xmax, ymax]], coords='corners', mode='element-wise', border_pixels=self.border_pixels) - requirements_met *= (image_boxes_iou > lower) * \ - (image_boxes_iou <= upper) - - elif self.overlap_criterion == 'area': - if self.border_pixels == 'half': - d = 0 - elif self.border_pixels == 'include': - # If border pixels are supposed to belong to the bounding boxes, we have to add one pixel to any difference `xmax - xmin` or `ymax - ymin`. - d = 1 - elif self.border_pixels == 'exclude': - # If border pixels are not supposed to belong to the bounding boxes, we have to subtract one pixel from any difference `xmax - xmin` or `ymax - ymin`. - d = -1 - # Compute the areas of the boxes. - box_areas = (labels[:, xmax] - labels[:, xmin] + d) * \ - (labels[:, ymax] - labels[:, ymin] + d) - # Compute the intersection area between the patch and all of the ground truth boxes. - clipped_boxes = np.copy(labels) - clipped_boxes[:, [ymin, ymax]] = np.clip( - labels[:, [ymin, ymax]], a_min=0, a_max=image_height-1) - clipped_boxes[:, [xmin, xmax]] = np.clip( - labels[:, [xmin, xmax]], a_min=0, a_max=image_width-1) - # +1 because the border pixels belong to the box areas. - intersection_areas = (clipped_boxes[:, xmax] - clipped_boxes[:, xmin] + d) * ( - clipped_boxes[:, ymax] - clipped_boxes[:, ymin] + d) - # Check which boxes meet the overlap requirements. - if lower == 0.0: - # If `self.lower == 0`, we want to make sure that boxes with area 0 don't count, hence the ">" sign instead of the ">=" sign. - mask_lower = intersection_areas > lower * box_areas - else: - # Especially for the case `self.lower == 1` we want the ">=" sign, otherwise no boxes would count at all. - mask_lower = intersection_areas >= lower * box_areas - mask_upper = intersection_areas <= upper * box_areas - requirements_met *= mask_lower * mask_upper - - elif self.overlap_criterion == 'center_point': - # Compute the center points of the boxes. - cy = (labels[:, ymin] + labels[:, ymax]) / 2 - cx = (labels[:, xmin] + labels[:, xmax]) / 2 - # Check which of the boxes have center points within the cropped patch remove those that don't. - requirements_met *= (cy >= 0.0) * (cy <= image_height-1) * \ - (cx >= 0.0) * (cx <= image_width-1) - - return labels[requirements_met] - - -class ImageValidator: - ''' - Returns `True` if a given minimum number of bounding boxes meets given overlap - requirements with an image of a given height and width. - ''' - - def __init__(self, - overlap_criterion='center_point', - bounds=(0.3, 1.0), - n_boxes_min=1, - labels_format={'class_id': 0, 'xmin': 1, - 'ymin': 2, 'xmax': 3, 'ymax': 4}, - border_pixels='half'): - ''' - Arguments: - overlap_criterion (str, optional): Can be either of 'center_point', 'iou', or 'area'. Determines - which boxes are considered valid with respect to a given image. If set to 'center_point', - a given bounding box is considered valid if its center point lies within the image. - If set to 'area', a given bounding box is considered valid if the quotient of its intersection - area with the image and its own area is within `lower` and `upper`. If set to 'iou', a given - bounding box is considered valid if its IoU with the image is within `lower` and `upper`. - bounds (list or BoundGenerator, optional): Only relevant if `overlap_criterion` is 'area' or 'iou'. - Determines the lower and upper bounds for `overlap_criterion`. Can be either a 2-tuple of scalars - representing a lower bound and an upper bound, or a `BoundGenerator` object, which provides - the possibility to generate bounds randomly. - n_boxes_min (int or str, optional): Either a non-negative integer or the string 'all'. - Determines the minimum number of boxes that must meet the `overlap_criterion` with respect to - an image of the given height and width in order for the image to be a valid image. - If set to 'all', an image is considered valid if all given boxes meet the `overlap_criterion`. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - ''' - if not ((isinstance(n_boxes_min, int) and n_boxes_min > 0) or n_boxes_min == 'all'): - raise ValueError( - "`n_boxes_min` must be a positive integer or 'all'.") - self.overlap_criterion = overlap_criterion - self.bounds = bounds - self.n_boxes_min = n_boxes_min - self.labels_format = labels_format - self.border_pixels = border_pixels - self.box_filter = BoxFilter(check_overlap=True, - check_min_area=False, - check_degenerate=False, - overlap_criterion=self.overlap_criterion, - overlap_bounds=self.bounds, - labels_format=self.labels_format, - border_pixels=self.border_pixels) - - def __call__(self, - labels, - image_height, - image_width): - ''' - Arguments: - labels (array): The labels to be tested. The box coordinates are expected - to be in the image's coordinate system. - image_height (int): The height of the image to compare the box coordinates to. - image_width (int): The width of the image to compare the box coordinates to. - - Returns: - A boolean indicating whether an imgae of the given height and width is - valid with respect to the given bounding boxes. - ''' - - self.box_filter.overlap_bounds = self.bounds - self.box_filter.labels_format = self.labels_format - - # Get all boxes that meet the overlap requirements. - valid_labels = self.box_filter(labels=labels, - image_height=image_height, - image_width=image_width) - - # Check whether enough boxes meet the requirements. - if isinstance(self.n_boxes_min, int): - # The image is valid if at least `self.n_boxes_min` ground truth boxes meet the requirements. - if len(valid_labels) >= self.n_boxes_min: - return True - else: - return False - elif self.n_boxes_min == 'all': - # The image is valid if all ground truth boxes meet the requirements. - if len(valid_labels) == len(labels): - return True - else: - return False -''' -Miscellaneous data generator utilities. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np - - -def apply_inverse_transforms(y_pred_decoded, inverse_transforms): - ''' - Takes a list or Numpy array of decoded predictions and applies a given list of - transforms to them. The list of inverse transforms would usually contain the - inverter functions that some of the image transformations that come with this - data generator return. This function would normally be used to transform predictions - that were made on a transformed image back to the original image. - - Arguments: - y_pred_decoded (list or array): Either a list of length `batch_size` that - contains Numpy arrays that contain the predictions for each batch item - or a Numpy array. If this is a list of Numpy arrays, the arrays would - usually have the shape `(num_predictions, 6)`, where `num_predictions` - is different for each batch item. If this is a Numpy array, it would - usually have the shape `(batch_size, num_predictions, 6)`. The last axis - would usually contain the class ID, confidence score, and four bounding - box coordinates for each prediction. - inverse_predictions (list): A nested list of length `batch_size` that contains - for each batch item a list of functions that take one argument (one element - of `y_pred_decoded` if it is a list or one slice along the first axis of - `y_pred_decoded` if it is an array) and return an output of the same shape - and data type. - - Returns: - The transformed predictions, which have the same structure as `y_pred_decoded`. - ''' - - if isinstance(y_pred_decoded, list): - - y_pred_decoded_inv = [] - - for i in range(len(y_pred_decoded)): - y_pred_decoded_inv.append(np.copy(y_pred_decoded[i])) - # If there are any predictions for this batch item. - if y_pred_decoded_inv[i].size > 0: - for inverter in inverse_transforms[i]: - if not (inverter is None): - y_pred_decoded_inv[i] = inverter(y_pred_decoded_inv[i]) - - elif isinstance(y_pred_decoded, np.ndarray): - - y_pred_decoded_inv = np.copy(y_pred_decoded) - - for i in range(len(y_pred_decoded)): - # If there are any predictions for this batch item. - if y_pred_decoded_inv[i].size > 0: - for inverter in inverse_transforms[i]: - if not (inverter is None): - y_pred_decoded_inv[i] = inverter(y_pred_decoded_inv[i]) - - else: - raise ValueError( - "`y_pred_decoded` must be either a list or a Numpy array.") - - return y_pred_decoded_inv -''' -Various patch sampling operations for data augmentation in 2D object detection. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np - -from data_generator.object_detection_2d_image_boxes_validation_utils import BoundGenerator, BoxFilter, ImageValidator - - -class PatchCoordinateGenerator: - ''' - Generates random patch coordinates that meet specified requirements. - ''' - - def __init__(self, - img_height=None, - img_width=None, - must_match='h_w', - min_scale=0.3, - max_scale=1.0, - scale_uniformly=False, - min_aspect_ratio=0.5, - max_aspect_ratio=2.0, - patch_ymin=None, - patch_xmin=None, - patch_height=None, - patch_width=None, - patch_aspect_ratio=None): - ''' - Arguments: - img_height (int): The height of the image for which the patch coordinates - shall be generated. Doesn't have to be known upon construction. - img_width (int): The width of the image for which the patch coordinates - shall be generated. Doesn't have to be known upon construction. - must_match (str, optional): Can be either of 'h_w', 'h_ar', and 'w_ar'. - Specifies which two of the three quantities height, width, and aspect - ratio determine the shape of the generated patch. The respective third - quantity will be computed from the other two. For example, - if `must_match == 'h_w'`, then the patch's height and width will be - set to lie within [min_scale, max_scale] of the image size or to - `patch_height` and/or `patch_width`, if given. The patch's aspect ratio - is the dependent variable in this case, it will be computed from the - height and width. Any given values for `patch_aspect_ratio`, - `min_aspect_ratio`, or `max_aspect_ratio` will be ignored. - min_scale (float, optional): The minimum size of a dimension of the patch - as a fraction of the respective dimension of the image. Can be greater - than 1. For example, if the image width is 200 and `min_scale == 0.5`, - then the width of the generated patch will be at least 100. If `min_scale == 1.5`, - the width of the generated patch will be at least 300. - max_scale (float, optional): The maximum size of a dimension of the patch - as a fraction of the respective dimension of the image. Can be greater - than 1. For example, if the image width is 200 and `max_scale == 1.0`, - then the width of the generated patch will be at most 200. If `max_scale == 1.5`, - the width of the generated patch will be at most 300. Must be greater than - `min_scale`. - scale_uniformly (bool, optional): If `True` and if `must_match == 'h_w'`, - the patch height and width will be scaled uniformly, otherwise they will - be scaled independently. - min_aspect_ratio (float, optional): Determines the minimum aspect ratio - for the generated patches. - max_aspect_ratio (float, optional): Determines the maximum aspect ratio - for the generated patches. - patch_ymin (int, optional): `None` or the vertical coordinate of the top left - corner of the generated patches. If this is not `None`, the position of the - patches along the vertical axis is fixed. If this is `None`, then the - vertical position of generated patches will be chosen randomly such that - the overlap of a patch and the image along the vertical dimension is - always maximal. - patch_xmin (int, optional): `None` or the horizontal coordinate of the top left - corner of the generated patches. If this is not `None`, the position of the - patches along the horizontal axis is fixed. If this is `None`, then the - horizontal position of generated patches will be chosen randomly such that - the overlap of a patch and the image along the horizontal dimension is - always maximal. - patch_height (int, optional): `None` or the fixed height of the generated patches. - patch_width (int, optional): `None` or the fixed width of the generated patches. - patch_aspect_ratio (float, optional): `None` or the fixed aspect ratio of the - generated patches. - ''' - - if not (must_match in {'h_w', 'h_ar', 'w_ar'}): - raise ValueError( - "`must_match` must be either of 'h_w', 'h_ar' and 'w_ar'.") - if min_scale >= max_scale: - raise ValueError("It must be `min_scale < max_scale`.") - if min_aspect_ratio >= max_aspect_ratio: - raise ValueError( - "It must be `min_aspect_ratio < max_aspect_ratio`.") - if scale_uniformly and not ((patch_height is None) and (patch_width is None)): - raise ValueError( - "If `scale_uniformly == True`, `patch_height` and `patch_width` must both be `None`.") - self.img_height = img_height - self.img_width = img_width - self.must_match = must_match - self.min_scale = min_scale - self.max_scale = max_scale - self.scale_uniformly = scale_uniformly - self.min_aspect_ratio = min_aspect_ratio - self.max_aspect_ratio = max_aspect_ratio - self.patch_ymin = patch_ymin - self.patch_xmin = patch_xmin - self.patch_height = patch_height - self.patch_width = patch_width - self.patch_aspect_ratio = patch_aspect_ratio - - def __call__(self): - ''' - Returns: - A 4-tuple `(ymin, xmin, height, width)` that represents the coordinates - of the generated patch. - ''' - - # Get the patch height and width. - - if self.must_match == 'h_w': # Aspect is the dependent variable. - if not self.scale_uniformly: - # Get the height. - if self.patch_height is None: - patch_height = int(np.random.uniform( - self.min_scale, self.max_scale) * self.img_height) - else: - patch_height = self.patch_height - # Get the width. - if self.patch_width is None: - patch_width = int(np.random.uniform( - self.min_scale, self.max_scale) * self.img_width) - else: - patch_width = self.patch_width - else: - scaling_factor = np.random.uniform( - self.min_scale, self.max_scale) - patch_height = int(scaling_factor * self.img_height) - patch_width = int(scaling_factor * self.img_width) - - elif self.must_match == 'h_ar': # Width is the dependent variable. - # Get the height. - if self.patch_height is None: - patch_height = int(np.random.uniform( - self.min_scale, self.max_scale) * self.img_height) - else: - patch_height = self.patch_height - # Get the aspect ratio. - if self.patch_aspect_ratio is None: - patch_aspect_ratio = np.random.uniform( - self.min_aspect_ratio, self.max_aspect_ratio) - else: - patch_aspect_ratio = self.patch_aspect_ratio - # Get the width. - patch_width = int(patch_height * patch_aspect_ratio) - - elif self.must_match == 'w_ar': # Height is the dependent variable. - # Get the width. - if self.patch_width is None: - patch_width = int(np.random.uniform( - self.min_scale, self.max_scale) * self.img_width) - else: - patch_width = self.patch_width - # Get the aspect ratio. - if self.patch_aspect_ratio is None: - patch_aspect_ratio = np.random.uniform( - self.min_aspect_ratio, self.max_aspect_ratio) - else: - patch_aspect_ratio = self.patch_aspect_ratio - # Get the height. - patch_height = int(patch_width / patch_aspect_ratio) - - # Get the top left corner coordinates of the patch. - - if self.patch_ymin is None: - # Compute how much room we have along the vertical axis to place the patch. - # A negative number here means that we want to sample a patch that is larger than the original image - # in the vertical dimension, in which case the patch will be placed such that it fully contains the - # image in the vertical dimension. - y_range = self.img_height - patch_height - # Select a random top left corner for the sample position from the possible positions. - if y_range >= 0: - # There are y_range + 1 possible positions for the crop in the vertical dimension. - patch_ymin = np.random.randint(0, y_range + 1) - else: - # The possible positions for the image on the background canvas in the vertical dimension. - patch_ymin = np.random.randint(y_range, 1) - else: - patch_ymin = self.patch_ymin - - if self.patch_xmin is None: - # Compute how much room we have along the horizontal axis to place the patch. - # A negative number here means that we want to sample a patch that is larger than the original image - # in the horizontal dimension, in which case the patch will be placed such that it fully contains the - # image in the horizontal dimension. - x_range = self.img_width - patch_width - # Select a random top left corner for the sample position from the possible positions. - if x_range >= 0: - # There are x_range + 1 possible positions for the crop in the horizontal dimension. - patch_xmin = np.random.randint(0, x_range + 1) - else: - # The possible positions for the image on the background canvas in the horizontal dimension. - patch_xmin = np.random.randint(x_range, 1) - else: - patch_xmin = self.patch_xmin - - return (patch_ymin, patch_xmin, patch_height, patch_width) - - -class CropPad: - ''' - Crops and/or pads an image deterministically. - - Depending on the given output patch size and the position (top left corner) relative - to the input image, the image will be cropped and/or padded along one or both spatial - dimensions. - - For example, if the output patch lies entirely within the input image, this will result - in a regular crop. If the input image lies entirely within the output patch, this will - result in the image being padded in every direction. All other cases are mixed cases - where the image might be cropped in some directions and padded in others. - - The output patch can be arbitrary in both size and position as long as it overlaps - with the input image. - ''' - - def __init__(self, - patch_ymin, - patch_xmin, - patch_height, - patch_width, - clip_boxes=True, - box_filter=None, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - patch_ymin (int, optional): The vertical coordinate of the top left corner of the output - patch relative to the image coordinate system. Can be negative (i.e. lie outside the image) - as long as the resulting patch still overlaps with the image. - patch_ymin (int, optional): The horizontal coordinate of the top left corner of the output - patch relative to the image coordinate system. Can be negative (i.e. lie outside the image) - as long as the resulting patch still overlaps with the image. - patch_height (int): The height of the patch to be sampled from the image. Can be greater - than the height of the input image. - patch_width (int): The width of the patch to be sampled from the image. Can be greater - than the width of the input image. - clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given. - If `True`, any ground truth bounding boxes will be clipped to lie entirely within the - sampled patch. - box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given. - A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria - after the transformation. Refer to the `BoxFilter` documentation for details. If `None`, - the validity of the bounding boxes is not checked. - background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential - background pixels of the scaled images. In the case of single-channel images, - the first element of `background` will be used as the background pixel value. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - # if (patch_height <= 0) or (patch_width <= 0): - # raise ValueError("Patch height and width must both be positive.") - # if (patch_ymin + patch_height < 0) or (patch_xmin + patch_width < 0): - # raise ValueError("A patch with the given coordinates cannot overlap with an input image.") - if not (isinstance(box_filter, BoxFilter) or box_filter is None): - raise ValueError( - "`box_filter` must be either `None` or a `BoxFilter` object.") - self.patch_height = patch_height - self.patch_width = patch_width - self.patch_ymin = patch_ymin - self.patch_xmin = patch_xmin - self.clip_boxes = clip_boxes - self.box_filter = box_filter - self.background = background - self.labels_format = labels_format - - def __call__(self, image, labels=None, return_inverter=False): - - img_height, img_width = image.shape[:2] - - if (self.patch_ymin > img_height) or (self.patch_xmin > img_width): - raise ValueError( - "The given patch doesn't overlap with the input image.") - - labels = np.copy(labels) - - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - # Top left corner of the patch relative to the image coordinate system: - patch_ymin = self.patch_ymin - patch_xmin = self.patch_xmin - - # Create a canvas of the size of the patch we want to end up with. - if image.ndim == 3: - canvas = np.zeros( - shape=(self.patch_height, self.patch_width, 3), dtype=np.uint8) - canvas[:, :] = self.background - elif image.ndim == 2: - canvas = np.zeros( - shape=(self.patch_height, self.patch_width), dtype=np.uint8) - canvas[:, :] = self.background[0] - - # Perform the crop. - # Pad the image at the top and on the left. - if patch_ymin < 0 and patch_xmin < 0: - # The number of pixels of the image that will end up on the canvas in the vertical direction. - image_crop_height = min(img_height, self.patch_height + patch_ymin) - # The number of pixels of the image that will end up on the canvas in the horizontal direction. - image_crop_width = min(img_width, self.patch_width + patch_xmin) - canvas[-patch_ymin:-patch_ymin + image_crop_height, -patch_xmin:- - patch_xmin + image_crop_width] = image[:image_crop_height, :image_crop_width] - - # Pad the image at the top and crop it on the left. - elif patch_ymin < 0 and patch_xmin >= 0: - # The number of pixels of the image that will end up on the canvas in the vertical direction. - image_crop_height = min(img_height, self.patch_height + patch_ymin) - # The number of pixels of the image that will end up on the canvas in the horizontal direction. - image_crop_width = min(self.patch_width, img_width - patch_xmin) - canvas[-patch_ymin:-patch_ymin + image_crop_height, - :image_crop_width] = image[:image_crop_height, patch_xmin:patch_xmin + image_crop_width] - - # Crop the image at the top and pad it on the left. - elif patch_ymin >= 0 and patch_xmin < 0: - # The number of pixels of the image that will end up on the canvas in the vertical direction. - image_crop_height = min(self.patch_height, img_height - patch_ymin) - # The number of pixels of the image that will end up on the canvas in the horizontal direction. - image_crop_width = min(img_width, self.patch_width + patch_xmin) - canvas[:image_crop_height, -patch_xmin:-patch_xmin + - image_crop_width] = image[patch_ymin:patch_ymin + image_crop_height, :image_crop_width] - - # Crop the image at the top and on the left. - elif patch_ymin >= 0 and patch_xmin >= 0: - # The number of pixels of the image that will end up on the canvas in the vertical direction. - image_crop_height = min(self.patch_height, img_height - patch_ymin) - # The number of pixels of the image that will end up on the canvas in the horizontal direction. - image_crop_width = min(self.patch_width, img_width - patch_xmin) - canvas[:image_crop_height, :image_crop_width] = image[patch_ymin:patch_ymin + - image_crop_height, patch_xmin:patch_xmin + image_crop_width] - - image = canvas - - if return_inverter: - def inverter(labels): - labels = np.copy(labels) - labels[:, [ymin+1, ymax+1]] += patch_ymin - labels[:, [xmin+1, xmax+1]] += patch_xmin - return labels - - if not (labels is None): - - # Translate the box coordinates to the patch's coordinate system. - labels[:, [ymin, ymax]] -= patch_ymin - labels[:, [xmin, xmax]] -= patch_xmin - - # Compute all valid boxes for this patch. - if not (self.box_filter is None): - self.box_filter.labels_format = self.labels_format - labels = self.box_filter(labels=labels, - image_height=self.patch_height, - image_width=self.patch_width) - - if self.clip_boxes: - labels[:, [ymin, ymax]] = np.clip( - labels[:, [ymin, ymax]], a_min=0, a_max=self.patch_height-1) - labels[:, [xmin, xmax]] = np.clip( - labels[:, [xmin, xmax]], a_min=0, a_max=self.patch_width-1) - - if return_inverter: - return image, labels, inverter - else: - return image, labels - - else: - if return_inverter: - return image, inverter - else: - return image - - -class Crop: - ''' - Crops off the specified numbers of pixels from the borders of images. - - This is just a convenience interface for `CropPad`. - ''' - - def __init__(self, - crop_top, - crop_bottom, - crop_left, - crop_right, - clip_boxes=True, - box_filter=None, - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - self.crop_top = crop_top - self.crop_bottom = crop_bottom - self.crop_left = crop_left - self.crop_right = crop_right - self.clip_boxes = clip_boxes - self.box_filter = box_filter - self.labels_format = labels_format - self.crop = CropPad(patch_ymin=self.crop_top, - patch_xmin=self.crop_left, - patch_height=None, - patch_width=None, - clip_boxes=self.clip_boxes, - box_filter=self.box_filter, - labels_format=self.labels_format) - - def __call__(self, image, labels=None, return_inverter=False): - - img_height, img_width = image.shape[:2] - - self.crop.patch_height = img_height - self.crop_top - self.crop_bottom - self.crop.patch_width = img_width - self.crop_left - self.crop_right - self.crop.labels_format = self.labels_format - - return self.crop(image, labels, return_inverter) - - -class Pad: - ''' - Pads images by the specified numbers of pixels on each side. - - This is just a convenience interface for `CropPad`. - ''' - - def __init__(self, - pad_top, - pad_bottom, - pad_left, - pad_right, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - self.pad_top = pad_top - self.pad_bottom = pad_bottom - self.pad_left = pad_left - self.pad_right = pad_right - self.background = background - self.labels_format = labels_format - self.pad = CropPad(patch_ymin=-self.pad_top, - patch_xmin=-self.pad_left, - patch_height=None, - patch_width=None, - clip_boxes=False, - box_filter=None, - background=self.background, - labels_format=self.labels_format) - - def __call__(self, image, labels=None, return_inverter=False): - - img_height, img_width = image.shape[:2] - - self.pad.patch_height = img_height + self.pad_top + self.pad_bottom - self.pad.patch_width = img_width + self.pad_left + self.pad_right - self.pad.labels_format = self.labels_format - - return self.pad(image, labels, return_inverter) - - -class RandomPatch: - ''' - Randomly samples a patch from an image. The randomness refers to whatever - randomness may be introduced by the patch coordinate generator, the box filter, - and the patch validator. - - Input images may be cropped and/or padded along either or both of the two - spatial dimensions as necessary in order to obtain the required patch. - - As opposed to `RandomPatchInf`, it is possible for this transform to fail to produce - an output image at all, in which case it will return `None`. This is useful, because - if this transform is used to generate patches of a fixed size or aspect ratio, then - the caller needs to be able to rely on the output image satisfying the set size or - aspect ratio. It might therefore not be an option to return the unaltered input image - as other random transforms do when they fail to produce a valid transformed image. - ''' - - def __init__(self, - patch_coord_generator, - box_filter=None, - image_validator=None, - n_trials_max=3, - clip_boxes=True, - prob=1.0, - background=(0, 0, 0), - can_fail=False, - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - patch_coord_generator (PatchCoordinateGenerator): A `PatchCoordinateGenerator` object - to generate the positions and sizes of the patches to be sampled from the input images. - box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given. - A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria - after the transformation. Refer to the `BoxFilter` documentation for details. If `None`, - the validity of the bounding boxes is not checked. - image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given. - An `ImageValidator` object to determine whether a sampled patch is valid. If `None`, - any outcome is valid. - n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given. - Determines the maxmial number of trials to sample a valid patch. If no valid patch could - be sampled in `n_trials_max` trials, returns one `None` in place of each regular output. - clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given. - If `True`, any ground truth bounding boxes will be clipped to lie entirely within the - sampled patch. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential - background pixels of the scaled images. In the case of single-channel images, - the first element of `background` will be used as the background pixel value. - can_fail (bool, optional): If `True`, will return `None` if no valid patch could be found after - `n_trials_max` trials. If `False`, will return the unaltered input image in such a case. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - if not isinstance(patch_coord_generator, PatchCoordinateGenerator): - raise ValueError( - "`patch_coord_generator` must be an instance of `PatchCoordinateGenerator`.") - if not (isinstance(image_validator, ImageValidator) or image_validator is None): - raise ValueError( - "`image_validator` must be either `None` or an `ImageValidator` object.") - self.patch_coord_generator = patch_coord_generator - self.box_filter = box_filter - self.image_validator = image_validator - self.n_trials_max = n_trials_max - self.clip_boxes = clip_boxes - self.prob = prob - self.background = background - self.can_fail = can_fail - self.labels_format = labels_format - self.sample_patch = CropPad(patch_ymin=None, - patch_xmin=None, - patch_height=None, - patch_width=None, - clip_boxes=self.clip_boxes, - box_filter=self.box_filter, - background=self.background, - labels_format=self.labels_format) - - def __call__(self, image, labels=None, return_inverter=False): - - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - - img_height, img_width = image.shape[:2] - self.patch_coord_generator.img_height = img_height - self.patch_coord_generator.img_width = img_width - - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - # Override the preset labels format. - if not self.image_validator is None: - self.image_validator.labels_format = self.labels_format - self.sample_patch.labels_format = self.labels_format - - for _ in range(max(1, self.n_trials_max)): - - # Generate patch coordinates. - patch_ymin, patch_xmin, patch_height, patch_width = self.patch_coord_generator() - - self.sample_patch.patch_ymin = patch_ymin - self.sample_patch.patch_xmin = patch_xmin - self.sample_patch.patch_height = patch_height - self.sample_patch.patch_width = patch_width - - if (labels is None) or (self.image_validator is None): - # We either don't have any boxes or if we do, we will accept any outcome as valid. - return self.sample_patch(image, labels, return_inverter) - else: - # Translate the box coordinates to the patch's coordinate system. - new_labels = np.copy(labels) - new_labels[:, [ymin, ymax]] -= patch_ymin - new_labels[:, [xmin, xmax]] -= patch_xmin - # Check if the patch is valid. - if self.image_validator(labels=new_labels, - image_height=patch_height, - image_width=patch_width): - return self.sample_patch(image, labels, return_inverter) - - # If we weren't able to sample a valid patch... - if self.can_fail: - # ...return `None`. - if labels is None: - if return_inverter: - return None, None - else: - return None - else: - if return_inverter: - return None, None, None - else: - return None, None - else: - # ...return the unaltered input image. - if labels is None: - if return_inverter: - return image, None - else: - return image - else: - if return_inverter: - return image, labels, None - else: - return image, labels - - else: - if return_inverter: - def inverter(labels): - return labels - - if labels is None: - if return_inverter: - return image, inverter - else: - return image - else: - if return_inverter: - return image, labels, inverter - else: - return image, labels - - -class RandomPatchInf: - ''' - Randomly samples a patch from an image. The randomness refers to whatever - randomness may be introduced by the patch coordinate generator, the box filter, - and the patch validator. - - Input images may be cropped and/or padded along either or both of the two - spatial dimensions as necessary in order to obtain the required patch. - - This operation is very similar to `RandomPatch`, except that: - 1. This operation runs indefinitely until either a valid patch is found or - the input image is returned unaltered, i.e. it cannot fail. - 2. If a bound generator is given, a new pair of bounds will be generated - every `n_trials_max` iterations. - ''' - - def __init__(self, - patch_coord_generator, - box_filter=None, - image_validator=None, - bound_generator=None, - n_trials_max=50, - clip_boxes=True, - prob=0.857, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - patch_coord_generator (PatchCoordinateGenerator): A `PatchCoordinateGenerator` object - to generate the positions and sizes of the patches to be sampled from the input images. - box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given. - A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria - after the transformation. Refer to the `BoxFilter` documentation for details. If `None`, - the validity of the bounding boxes is not checked. - image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given. - An `ImageValidator` object to determine whether a sampled patch is valid. If `None`, - any outcome is valid. - bound_generator (BoundGenerator, optional): A `BoundGenerator` object to generate upper and - lower bound values for the patch validator. Every `n_trials_max` trials, a new pair of - upper and lower bounds will be generated until a valid patch is found or the original image - is returned. This bound generator overrides the bound generator of the patch validator. - n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given. - The sampler will run indefinitely until either a valid patch is found or the original image - is returned, but this determines the maxmial number of trials to sample a valid patch for each - selected pair of lower and upper bounds before a new pair is picked. - clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given. - If `True`, any ground truth bounding boxes will be clipped to lie entirely within the - sampled patch. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential - background pixels of the scaled images. In the case of single-channel images, - the first element of `background` will be used as the background pixel value. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - - if not isinstance(patch_coord_generator, PatchCoordinateGenerator): - raise ValueError( - "`patch_coord_generator` must be an instance of `PatchCoordinateGenerator`.") - if not (isinstance(image_validator, ImageValidator) or image_validator is None): - raise ValueError( - "`image_validator` must be either `None` or an `ImageValidator` object.") - if not (isinstance(bound_generator, BoundGenerator) or bound_generator is None): - raise ValueError( - "`bound_generator` must be either `None` or a `BoundGenerator` object.") - self.patch_coord_generator = patch_coord_generator - self.box_filter = box_filter - self.image_validator = image_validator - self.bound_generator = bound_generator - self.n_trials_max = n_trials_max - self.clip_boxes = clip_boxes - self.prob = prob - self.background = background - self.labels_format = labels_format - self.sample_patch = CropPad(patch_ymin=None, - patch_xmin=None, - patch_height=None, - patch_width=None, - clip_boxes=self.clip_boxes, - box_filter=self.box_filter, - background=self.background, - labels_format=self.labels_format) - - def __call__(self, image, labels=None, return_inverter=False): - - img_height, img_width = image.shape[:2] - self.patch_coord_generator.img_height = img_height - self.patch_coord_generator.img_width = img_width - - xmin = self.labels_format['xmin'] - ymin = self.labels_format['ymin'] - xmax = self.labels_format['xmax'] - ymax = self.labels_format['ymax'] - - # Override the preset labels format. - if not self.image_validator is None: - self.image_validator.labels_format = self.labels_format - self.sample_patch.labels_format = self.labels_format - - while True: # Keep going until we either find a valid patch or return the original image. - - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - - # In case we have a bound generator, pick a lower and upper bound for the patch validator. - if not ((self.image_validator is None) or (self.bound_generator is None)): - self.image_validator.bounds = self.bound_generator() - - # Use at most `self.n_trials_max` attempts to find a crop - # that meets our requirements. - for _ in range(max(1, self.n_trials_max)): - - # Generate patch coordinates. - patch_ymin, patch_xmin, patch_height, patch_width = self.patch_coord_generator() - - self.sample_patch.patch_ymin = patch_ymin - self.sample_patch.patch_xmin = patch_xmin - self.sample_patch.patch_height = patch_height - self.sample_patch.patch_width = patch_width - - # Check if the resulting patch meets the aspect ratio requirements. - aspect_ratio = patch_width / patch_height - if not (self.patch_coord_generator.min_aspect_ratio <= aspect_ratio <= self.patch_coord_generator.max_aspect_ratio): - continue - - if (labels is None) or (self.image_validator is None): - # We either don't have any boxes or if we do, we will accept any outcome as valid. - return self.sample_patch(image, labels, return_inverter) - else: - # Translate the box coordinates to the patch's coordinate system. - new_labels = np.copy(labels) - new_labels[:, [ymin, ymax]] -= patch_ymin - new_labels[:, [xmin, xmax]] -= patch_xmin - # Check if the patch contains the minimum number of boxes we require. - if self.image_validator(labels=new_labels, - image_height=patch_height, - image_width=patch_width): - return self.sample_patch(image, labels, return_inverter) - else: - if return_inverter: - def inverter(labels): - return labels - - if labels is None: - if return_inverter: - return image, inverter - else: - return image - else: - if return_inverter: - return image, labels, inverter - else: - return image, labels - - -class RandomMaxCropFixedAR: - ''' - Crops the largest possible patch of a given fixed aspect ratio - from an image. - - Since the aspect ratio of the sampled patches is constant, they - can subsequently be resized to the same size without distortion. - ''' - - def __init__(self, - patch_aspect_ratio, - box_filter=None, - image_validator=None, - n_trials_max=3, - clip_boxes=True, - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - patch_aspect_ratio (float): The fixed aspect ratio that all sampled patches will have. - box_filter (BoxFilter, optional): Only relevant if ground truth bounding boxes are given. - A `BoxFilter` object to filter out bounding boxes that don't meet the given criteria - after the transformation. Refer to the `BoxFilter` documentation for details. If `None`, - the validity of the bounding boxes is not checked. - image_validator (ImageValidator, optional): Only relevant if ground truth bounding boxes are given. - An `ImageValidator` object to determine whether a sampled patch is valid. If `None`, - any outcome is valid. - n_trials_max (int, optional): Only relevant if ground truth bounding boxes are given. - Determines the maxmial number of trials to sample a valid patch. If no valid patch could - be sampled in `n_trials_max` trials, returns `None`. - clip_boxes (bool, optional): Only relevant if ground truth bounding boxes are given. - If `True`, any ground truth bounding boxes will be clipped to lie entirely within the - sampled patch. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - - self.patch_aspect_ratio = patch_aspect_ratio - self.box_filter = box_filter - self.image_validator = image_validator - self.n_trials_max = n_trials_max - self.clip_boxes = clip_boxes - self.labels_format = labels_format - self.random_patch = RandomPatch(patch_coord_generator=PatchCoordinateGenerator(), # Just a dummy object - box_filter=self.box_filter, - image_validator=self.image_validator, - n_trials_max=self.n_trials_max, - clip_boxes=self.clip_boxes, - prob=1.0, - can_fail=False, - labels_format=self.labels_format) - - def __call__(self, image, labels=None, return_inverter=False): - - img_height, img_width = image.shape[:2] - - # The ratio of the input image aspect ratio and patch aspect ratio determines the maximal possible crop. - image_aspect_ratio = img_width / img_height - - if image_aspect_ratio < self.patch_aspect_ratio: - patch_width = img_width - patch_height = int(round(patch_width / self.patch_aspect_ratio)) - else: - patch_height = img_height - patch_width = int(round(patch_height * self.patch_aspect_ratio)) - - # Now that we know the desired height and width for the patch, - # instantiate an appropriate patch coordinate generator. - patch_coord_generator = PatchCoordinateGenerator(img_height=img_height, - img_width=img_width, - must_match='h_w', - patch_height=patch_height, - patch_width=patch_width) - - # The rest of the work is done by `RandomPatch`. - self.random_patch.patch_coord_generator = patch_coord_generator - self.random_patch.labels_format = self.labels_format - return self.random_patch(image, labels, return_inverter) - - -class RandomPadFixedAR: - ''' - Adds the minimal possible padding to an image that results in a patch - of the given fixed aspect ratio that contains the entire image. - - Since the aspect ratio of the resulting images is constant, they - can subsequently be resized to the same size without distortion. - ''' - - def __init__(self, - patch_aspect_ratio, - background=(0, 0, 0), - labels_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - patch_aspect_ratio (float): The fixed aspect ratio that all sampled patches will have. - background (list/tuple, optional): A 3-tuple specifying the RGB color value of the potential - background pixels of the scaled images. In the case of single-channel images, - the first element of `background` will be used as the background pixel value. - labels_format (dict, optional): A dictionary that defines which index in the last axis of the labels - of an image contains which bounding box coordinate. The dictionary maps at least the keywords - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis of the labels array. - ''' - - self.patch_aspect_ratio = patch_aspect_ratio - self.background = background - self.labels_format = labels_format - self.random_patch = RandomPatch(patch_coord_generator=PatchCoordinateGenerator(), # Just a dummy object - box_filter=None, - image_validator=None, - n_trials_max=1, - clip_boxes=False, - background=self.background, - prob=1.0, - labels_format=self.labels_format) - - def __call__(self, image, labels=None, return_inverter=False): - - img_height, img_width = image.shape[:2] - - if img_width < img_height: - patch_height = img_height - patch_width = int(round(patch_height * self.patch_aspect_ratio)) - else: - patch_width = img_width - patch_height = int(round(patch_width / self.patch_aspect_ratio)) - - # Now that we know the desired height and width for the patch, - # instantiate an appropriate patch coordinate generator. - patch_coord_generator = PatchCoordinateGenerator(img_height=img_height, - img_width=img_width, - must_match='h_w', - patch_height=patch_height, - patch_width=patch_width) - - # The rest of the work is done by `RandomPatch`. - self.random_patch.patch_coord_generator = patch_coord_generator - self.random_patch.labels_format = self.labels_format - return self.random_patch(image, labels, return_inverter) -''' -Various photometric image transformations, both deterministic and probabilistic. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -import cv2 - - -class ConvertColor: - ''' - Converts images between RGB, HSV and grayscale color spaces. This is just a wrapper - around `cv2.cvtColor()`. - ''' - - def __init__(self, current='RGB', to='HSV', keep_3ch=True): - ''' - Arguments: - current (str, optional): The current color space of the images. Can be - one of 'RGB' and 'HSV'. - to (str, optional): The target color space of the images. Can be one of - 'RGB', 'HSV', and 'GRAY'. - keep_3ch (bool, optional): Only relevant if `to == GRAY`. - If `True`, the resulting grayscale images will have three channels. - ''' - if not ((current in {'RGB', 'HSV'}) and (to in {'RGB', 'HSV', 'GRAY'})): - raise NotImplementedError - self.current = current - self.to = to - self.keep_3ch = keep_3ch - - def __call__(self, image, labels=None): - if self.current == 'RGB' and self.to == 'HSV': - image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV) - elif self.current == 'RGB' and self.to == 'GRAY': - image = cv2.cvtColor(image, cv2.COLOR_RGB2GRAY) - if self.keep_3ch: - image = np.stack([image] * 3, axis=-1) - elif self.current == 'HSV' and self.to == 'RGB': - image = cv2.cvtColor(image, cv2.COLOR_HSV2RGB) - elif self.current == 'HSV' and self.to == 'GRAY': - image = cv2.cvtColor(image, cv2.COLOR_HSV2GRAY) - if self.keep_3ch: - image = np.stack([image] * 3, axis=-1) - if labels is None: - return image - else: - return image, labels - - -class ConvertDataType: - ''' - Converts images represented as Numpy arrays between `uint8` and `float32`. - Serves as a helper for certain photometric distortions. This is just a wrapper - around `np.ndarray.astype()`. - ''' - - def __init__(self, to='uint8'): - ''' - Arguments: - to (string, optional): To which datatype to convert the input images. - Can be either of 'uint8' and 'float32'. - ''' - if not (to == 'uint8' or to == 'float32'): - raise ValueError("`to` can be either of 'uint8' or 'float32'.") - self.to = to - - def __call__(self, image, labels=None): - if self.to == 'uint8': - image = np.round(image, decimals=0).astype(np.uint8) - else: - image = image.astype(np.float32) - if labels is None: - return image - else: - return image, labels - - -class ConvertTo3Channels: - ''' - Converts 1-channel and 4-channel images to 3-channel images. Does nothing to images that - already have 3 channels. In the case of 4-channel images, the fourth channel will be - discarded. - ''' - - def __init__(self): - pass - - def __call__(self, image, labels=None): - if image.ndim == 2: - image = np.stack([image] * 3, axis=-1) - elif image.ndim == 3: - if image.shape[2] == 1: - image = np.concatenate([image] * 3, axis=-1) - elif image.shape[2] == 4: - image = image[:, :, :3] - if labels is None: - return image - else: - return image, labels - - -class Hue: - ''' - Changes the hue of HSV images. - - Important: - - Expects HSV input. - - Expects input array to be of `dtype` `float`. - ''' - - def __init__(self, delta): - ''' - Arguments: - delta (int): An integer in the closed interval `[-180, 180]` that determines the hue change, where - a change by integer `delta` means a change by `2 * delta` degrees. Read up on the HSV color format - if you need more information. - ''' - if not (-180 <= delta <= 180): - raise ValueError( - "`delta` must be in the closed interval `[-180, 180]`.") - self.delta = delta - - def __call__(self, image, labels=None): - image[:, :, 0] = (image[:, :, 0] + self.delta) % 180.0 - if labels is None: - return image - else: - return image, labels - - -class RandomHue: - ''' - Randomly changes the hue of HSV images. - - Important: - - Expects HSV input. - - Expects input array to be of `dtype` `float`. - ''' - - def __init__(self, max_delta=18, prob=0.5): - ''' - Arguments: - max_delta (int): An integer in the closed interval `[0, 180]` that determines the maximal absolute - hue change. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - ''' - if not (0 <= max_delta <= 180): - raise ValueError( - "`max_delta` must be in the closed interval `[0, 180]`.") - self.max_delta = max_delta - self.prob = prob - self.change_hue = Hue(delta=0) - - def __call__(self, image, labels=None): - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - self.change_hue.delta = np.random.uniform( - -self.max_delta, self.max_delta) - return self.change_hue(image, labels) - elif labels is None: - return image - else: - return image, labels - - -class Saturation: - ''' - Changes the saturation of HSV images. - - Important: - - Expects HSV input. - - Expects input array to be of `dtype` `float`. - ''' - - def __init__(self, factor): - ''' - Arguments: - factor (float): A float greater than zero that determines saturation change, where - values less than one result in less saturation and values greater than one result - in more saturation. - ''' - if factor <= 0.0: - raise ValueError("It must be `factor > 0`.") - self.factor = factor - - def __call__(self, image, labels=None): - image[:, :, 1] = np.clip(image[:, :, 1] * self.factor, 0, 255) - if labels is None: - return image - else: - return image, labels - - -class RandomSaturation: - ''' - Randomly changes the saturation of HSV images. - - Important: - - Expects HSV input. - - Expects input array to be of `dtype` `float`. - ''' - - def __init__(self, lower=0.3, upper=2.0, prob=0.5): - ''' - Arguments: - lower (float, optional): A float greater than zero, the lower bound for the random - saturation change. - upper (float, optional): A float greater than zero, the upper bound for the random - saturation change. Must be greater than `lower`. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - ''' - if lower >= upper: - raise ValueError("`upper` must be greater than `lower`.") - self.lower = lower - self.upper = upper - self.prob = prob - self.change_saturation = Saturation(factor=1.0) - - def __call__(self, image, labels=None): - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - self.change_saturation.factor = np.random.uniform( - self.lower, self.upper) - return self.change_saturation(image, labels) - elif labels is None: - return image - else: - return image, labels - - -class Brightness: - ''' - Changes the brightness of RGB images. - - Important: - - Expects RGB input. - - Expects input array to be of `dtype` `float`. - ''' - - def __init__(self, delta): - ''' - Arguments: - delta (int): An integer, the amount to add to or subtract from the intensity - of every pixel. - ''' - self.delta = delta - - def __call__(self, image, labels=None): - image = np.clip(image + self.delta, 0, 255) - if labels is None: - return image - else: - return image, labels - - -class RandomBrightness: - ''' - Randomly changes the brightness of RGB images. - - Important: - - Expects RGB input. - - Expects input array to be of `dtype` `float`. - ''' - - def __init__(self, lower=-84, upper=84, prob=0.5): - ''' - Arguments: - lower (int, optional): An integer, the lower bound for the random brightness change. - upper (int, optional): An integer, the upper bound for the random brightness change. - Must be greater than `lower`. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - ''' - if lower >= upper: - raise ValueError("`upper` must be greater than `lower`.") - self.lower = float(lower) - self.upper = float(upper) - self.prob = prob - self.change_brightness = Brightness(delta=0) - - def __call__(self, image, labels=None): - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - self.change_brightness.delta = np.random.uniform( - self.lower, self.upper) - return self.change_brightness(image, labels) - elif labels is None: - return image - else: - return image, labels - - -class Contrast: - ''' - Changes the contrast of RGB images. - - Important: - - Expects RGB input. - - Expects input array to be of `dtype` `float`. - ''' - - def __init__(self, factor): - ''' - Arguments: - factor (float): A float greater than zero that determines contrast change, where - values less than one result in less contrast and values greater than one result - in more contrast. - ''' - if factor <= 0.0: - raise ValueError("It must be `factor > 0`.") - self.factor = factor - - def __call__(self, image, labels=None): - image = np.clip(127.5 + self.factor * (image - 127.5), 0, 255) - if labels is None: - return image - else: - return image, labels - - -class RandomContrast: - ''' - Randomly changes the contrast of RGB images. - - Important: - - Expects RGB input. - - Expects input array to be of `dtype` `float`. - ''' - - def __init__(self, lower=0.5, upper=1.5, prob=0.5): - ''' - Arguments: - lower (float, optional): A float greater than zero, the lower bound for the random - contrast change. - upper (float, optional): A float greater than zero, the upper bound for the random - contrast change. Must be greater than `lower`. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - ''' - if lower >= upper: - raise ValueError("`upper` must be greater than `lower`.") - self.lower = lower - self.upper = upper - self.prob = prob - self.change_contrast = Contrast(factor=1.0) - - def __call__(self, image, labels=None): - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - self.change_contrast.factor = np.random.uniform( - self.lower, self.upper) - return self.change_contrast(image, labels) - elif labels is None: - return image - else: - return image, labels - - -class Gamma: - ''' - Changes the gamma value of RGB images. - - Important: Expects RGB input. - ''' - - def __init__(self, gamma): - ''' - Arguments: - gamma (float): A float greater than zero that determines gamma change. - ''' - if gamma <= 0.0: - raise ValueError("It must be `gamma > 0`.") - self.gamma = gamma - self.gamma_inv = 1.0 / gamma - # Build a lookup table mapping the pixel values [0, 255] to - # their adjusted gamma values. - self.table = np.array( - [((i / 255.0) ** self.gamma_inv) * 255 for i in np.arange(0, 256)]).astype("uint8") - - def __call__(self, image, labels=None): - image = cv2.LUT(image, table) - if labels is None: - return image - else: - return image, labels - - -class RandomGamma: - ''' - Randomly changes the gamma value of RGB images. - - Important: Expects RGB input. - ''' - - def __init__(self, lower=0.25, upper=2.0, prob=0.5): - ''' - Arguments: - lower (float, optional): A float greater than zero, the lower bound for the random - gamma change. - upper (float, optional): A float greater than zero, the upper bound for the random - gamma change. Must be greater than `lower`. - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - ''' - if lower >= upper: - raise ValueError("`upper` must be greater than `lower`.") - self.lower = lower - self.upper = upper - self.prob = prob - - def __call__(self, image, labels=None): - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - gamma = np.random.uniform(self.lower, self.upper) - change_gamma = Gamma(gamma=gamma) - return change_gamma(image, labels) - elif labels is None: - return image - else: - return image, labels - - -class HistogramEqualization: - ''' - Performs histogram equalization on HSV images. - - Importat: Expects HSV input. - ''' - - def __init__(self): - pass - - def __call__(self, image, labels=None): - image[:, :, 2] = cv2.equalizeHist(image[:, :, 2]) - if labels is None: - return image - else: - return image, labels - - -class RandomHistogramEqualization: - ''' - Randomly performs histogram equalization on HSV images. The randomness only refers - to whether or not the equalization is performed. - - Importat: Expects HSV input. - ''' - - def __init__(self, prob=0.5): - ''' - Arguments: - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - ''' - self.prob = prob - self.equalize = HistogramEqualization() - - def __call__(self, image, labels=None): - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - return self.equalize(image, labels) - elif labels is None: - return image - else: - return image, labels - - -class ChannelSwap: - ''' - Swaps the channels of images. - ''' - - def __init__(self, order): - ''' - Arguments: - order (tuple): A tuple of integers that defines the desired channel order - of the input images after the channel swap. - ''' - self.order = order - - def __call__(self, image, labels=None): - image = image[:, :, self.order] - if labels is None: - return image - else: - return image, labels - - -class RandomChannelSwap: - ''' - Randomly swaps the channels of RGB images. - - Important: Expects RGB input. - ''' - - def __init__(self, prob=0.5): - ''' - Arguments: - prob (float, optional): `(1 - prob)` determines the probability with which the original, - unaltered image is returned. - ''' - self.prob = prob - # All possible permutations of the three image channels except the original order. - self.permutations = ((0, 2, 1), - (1, 0, 2), (1, 2, 0), - (2, 0, 1), (2, 1, 0)) - self.swap_channels = ChannelSwap(order=(0, 1, 2)) - - def __call__(self, image, labels=None): - p = np.random.uniform(0, 1) - if p >= (1.0-self.prob): - i = np.random.randint(5) # There are 6 possible permutations. - self.swap_channels.order = self.permutations[i] - return self.swap_channels(image, labels) - elif labels is None: - return image - else: - return image, labels -''' -An evaluator to compute the Pascal VOC-style mean average precision (both the pre-2010 -and post-2010 algorithm versions) of a given Keras SSD model on a given dataset. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -from math import ceil -from tqdm import trange -import sys -import warnings - -from data_generator.object_detection_2d_data_generator import DataGenerator -from data_generator.object_detection_2d_geometric_ops import Resize -from data_generator.object_detection_2d_patch_sampling_ops import RandomPadFixedAR -from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels -from ssd_encoder_decoder.ssd_output_decoder import decode_detections -from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms - -from bounding_box_utils.bounding_box_utils import iou - - -class Evaluator: - ''' - Computes the mean average precision of the given Keras SSD model on the given dataset. - - Can compute the Pascal-VOC-style average precision in both the pre-2010 (k-point sampling) - and post-2010 (integration) algorithm versions. - - Optionally also returns the average precisions, precisions, and recalls. - - The algorithm is identical to the official Pascal VOC pre-2010 detection evaluation algorithm - in its default settings, but can be cusomized in a number of ways. - ''' - - def __init__(self, - model, - n_classes, - data_generator, - model_mode='inference', - pred_format={'class_id': 0, 'conf': 1, - 'xmin': 2, 'ymin': 3, 'xmax': 4, 'ymax': 5}, - gt_format={'class_id': 0, 'xmin': 1, 'ymin': 2, 'xmax': 3, 'ymax': 4}): - ''' - Arguments: - model (Keras model): A Keras SSD model object. - n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. - data_generator (DataGenerator): A `DataGenerator` object with the evaluation dataset. - model_mode (str, optional): The mode in which the model was created, i.e. 'training', 'inference' or 'inference_fast'. - This is needed in order to know whether the model output is already decoded or still needs to be decoded. Refer to - the model documentation for the meaning of the individual modes. - pred_format (dict, optional): A dictionary that defines which index in the last axis of the model's decoded predictions - contains which bounding box coordinate. The dictionary must map the keywords 'class_id', 'conf' (for the confidence), - 'xmin', 'ymin', 'xmax', and 'ymax' to their respective indices within last axis. - gt_format (list, optional): A dictionary that defines which index of a ground truth bounding box contains which of the five - items class ID, xmin, ymin, xmax, ymax. The expected strings are 'xmin', 'ymin', 'xmax', 'ymax', 'class_id'. - ''' - - if not isinstance(data_generator, DataGenerator): - warnings.warn( - "`data_generator` is not a `DataGenerator` object, which will cause undefined behavior.") - - self.model = model - self.data_generator = data_generator - self.n_classes = n_classes - self.model_mode = model_mode - self.pred_format = pred_format - self.gt_format = gt_format - - # The following lists all contain per-class data, i.e. all list have the length `n_classes + 1`, - # where one element is for the background class, i.e. that element is just a dummy entry. - self.prediction_results = None - self.num_gt_per_class = None - self.true_positives = None - self.false_positives = None - self.cumulative_true_positives = None - self.cumulative_false_positives = None - # "Cumulative" means that the i-th element in each list represents the precision for the first i highest condidence predictions for that class. - self.cumulative_precisions = None - # "Cumulative" means that the i-th element in each list represents the recall for the first i highest condidence predictions for that class. - self.cumulative_recalls = None - self.average_precisions = None - self.mean_average_precision = None - - def __call__(self, - img_height, - img_width, - batch_size, - data_generator_mode='resize', - round_confidences=False, - matching_iou_threshold=0.5, - border_pixels='include', - sorting_algorithm='quicksort', - average_precision_mode='sample', - num_recall_points=11, - ignore_neutral_boxes=True, - return_precisions=False, - return_recalls=False, - return_average_precisions=False, - verbose=True, - decoding_confidence_thresh=0.01, - decoding_iou_threshold=0.45, - decoding_top_k=200, - decoding_pred_coords='centroids', - decoding_normalize_coords=True): - ''' - Computes the mean average precision of the given Keras SSD model on the given dataset. - - Optionally also returns the averages precisions, precisions, and recalls. - - All the individual steps of the overall evaluation algorithm can also be called separately - (check out the other methods of this class), but this runs the overall algorithm all at once. - - Arguments: - img_height (int): The input image height for the model. - img_width (int): The input image width for the model. - batch_size (int): The batch size for the evaluation. - data_generator_mode (str, optional): Either of 'resize' and 'pad'. If 'resize', the input images will - be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images. - If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height` - and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images. - round_confidences (int, optional): `False` or an integer that is the number of decimals that the prediction - confidences will be rounded to. If `False`, the confidences will not be rounded. - matching_iou_threshold (float, optional): A prediction will be considered a true positive if it has a Jaccard overlap - of at least `matching_iou_threshold` with any ground truth bounding box of the same class. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - sorting_algorithm (str, optional): Which sorting algorithm the matching algorithm should use. This argument accepts - any valid sorting algorithm for Numpy's `argsort()` function. You will usually want to choose between 'quicksort' - (fastest and most memory efficient, but not stable) and 'mergesort' (slight slower and less memory efficient, but stable). - The official Matlab evaluation algorithm uses a stable sorting algorithm, so this algorithm is only guaranteed - to behave identically if you choose 'mergesort' as the sorting algorithm, but it will almost always behave identically - even if you choose 'quicksort' (but no guarantees). - average_precision_mode (str, optional): Can be either 'sample' or 'integrate'. In the case of 'sample', the average precision - will be computed according to the Pascal VOC formula that was used up until VOC 2009, where the precision will be sampled - for `num_recall_points` recall values. In the case of 'integrate', the average precision will be computed according to the - Pascal VOC formula that was used from VOC 2010 onward, where the average precision will be computed by numerically integrating - over the whole preciscion-recall curve instead of sampling individual points from it. 'integrate' mode is basically just - the limit case of 'sample' mode as the number of sample points increases. - num_recall_points (int, optional): The number of points to sample from the precision-recall-curve to compute the average - precisions. In other words, this is the number of equidistant recall values for which the resulting precision will be - computed. 11 points is the value used in the official Pascal VOC 2007 detection evaluation algorithm. - ignore_neutral_boxes (bool, optional): In case the data generator provides annotations indicating whether a ground truth - bounding box is supposed to either count or be neutral for the evaluation, this argument decides what to do with these - annotations. If `False`, even boxes that are annotated as neutral will be counted into the evaluation. If `True`, - neutral boxes will be ignored for the evaluation. An example for evaluation-neutrality are the ground truth boxes - annotated as "difficult" in the Pascal VOC datasets, which are usually treated as neutral for the evaluation. - return_precisions (bool, optional): If `True`, returns a nested list containing the cumulative precisions for each class. - return_recalls (bool, optional): If `True`, returns a nested list containing the cumulative recalls for each class. - return_average_precisions (bool, optional): If `True`, returns a list containing the average precision for each class. - verbose (bool, optional): If `True`, will print out the progress during runtime. - decoding_confidence_thresh (float, optional): Only relevant if the model is in 'training' mode. - A float in [0,1), the minimum classification confidence in a specific positive class in order to be considered - for the non-maximum suppression stage for the respective class. A lower value will result in a larger part of the - selection process being done by the non-maximum suppression stage, while a larger value will result in a larger - part of the selection process happening in the confidence thresholding stage. - decoding_iou_threshold (float, optional): Only relevant if the model is in 'training' mode. A float in [0,1]. - All boxes with a Jaccard similarity of greater than `iou_threshold` with a locally maximal box will be removed - from the set of predictions for a given class, where 'maximal' refers to the box score. - decoding_top_k (int, optional): Only relevant if the model is in 'training' mode. The number of highest scoring - predictions to be kept for each batch item after the non-maximum suppression stage. - decoding_input_coords (str, optional): Only relevant if the model is in 'training' mode. The box coordinate format - that the model outputs. Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height), - 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. - decoding_normalize_coords (bool, optional): Only relevant if the model is in 'training' mode. Set to `True` if the model - outputs relative coordinates. Do not set this to `True` if the model already outputs absolute coordinates, - as that would result in incorrect coordinates. - - Returns: - A float, the mean average precision, plus any optional returns specified in the arguments. - ''' - - ############################################################################################# - # Predict on the entire dataset. - ############################################################################################# - - self.predict_on_dataset(img_height=img_height, - img_width=img_width, - batch_size=batch_size, - data_generator_mode=data_generator_mode, - decoding_confidence_thresh=decoding_confidence_thresh, - decoding_iou_threshold=decoding_iou_threshold, - decoding_top_k=decoding_top_k, - decoding_pred_coords=decoding_pred_coords, - decoding_normalize_coords=decoding_normalize_coords, - decoding_border_pixels=border_pixels, - round_confidences=round_confidences, - verbose=verbose, - ret=False) - - ############################################################################################# - # Get the total number of ground truth boxes for each class. - ############################################################################################# - - self.get_num_gt_per_class(ignore_neutral_boxes=ignore_neutral_boxes, - verbose=False, - ret=False) - - ############################################################################################# - # Match predictions to ground truth boxes for all classes. - ############################################################################################# - - self.match_predictions(ignore_neutral_boxes=ignore_neutral_boxes, - matching_iou_threshold=matching_iou_threshold, - border_pixels=border_pixels, - sorting_algorithm=sorting_algorithm, - verbose=verbose, - ret=False) - - ############################################################################################# - # Compute the cumulative precision and recall for all classes. - ############################################################################################# - - self.compute_precision_recall(verbose=verbose, ret=False) - - ############################################################################################# - # Compute the average precision for this class. - ############################################################################################# - - self.compute_average_precisions(mode=average_precision_mode, - num_recall_points=num_recall_points, - verbose=verbose, - ret=False) - - ############################################################################################# - # Compute the mean average precision. - ############################################################################################# - - mean_average_precision = self.compute_mean_average_precision(ret=True) - - ############################################################################################# - - # Compile the returns. - if return_precisions or return_recalls or return_average_precisions: - ret = [mean_average_precision] - if return_average_precisions: - ret.append(self.average_precisions) - if return_precisions: - ret.append(self.cumulative_precisions) - if return_recalls: - ret.append(self.cumulative_recalls) - return ret - else: - return mean_average_precision - - def predict_on_dataset(self, - img_height, - img_width, - batch_size, - data_generator_mode='resize', - decoding_confidence_thresh=0.01, - decoding_iou_threshold=0.45, - decoding_top_k=200, - decoding_pred_coords='centroids', - decoding_normalize_coords=True, - decoding_border_pixels='include', - round_confidences=False, - verbose=True, - ret=False): - ''' - Runs predictions for the given model over the entire dataset given by `data_generator`. - - Arguments: - img_height (int): The input image height for the model. - img_width (int): The input image width for the model. - batch_size (int): The batch size for the evaluation. - data_generator_mode (str, optional): Either of 'resize' and 'pad'. If 'resize', the input images will - be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images. - If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height` - and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images. - decoding_confidence_thresh (float, optional): Only relevant if the model is in 'training' mode. - A float in [0,1), the minimum classification confidence in a specific positive class in order to be considered - for the non-maximum suppression stage for the respective class. A lower value will result in a larger part of the - selection process being done by the non-maximum suppression stage, while a larger value will result in a larger - part of the selection process happening in the confidence thresholding stage. - decoding_iou_threshold (float, optional): Only relevant if the model is in 'training' mode. A float in [0,1]. - All boxes with a Jaccard similarity of greater than `iou_threshold` with a locally maximal box will be removed - from the set of predictions for a given class, where 'maximal' refers to the box score. - decoding_top_k (int, optional): Only relevant if the model is in 'training' mode. The number of highest scoring - predictions to be kept for each batch item after the non-maximum suppression stage. - decoding_input_coords (str, optional): Only relevant if the model is in 'training' mode. The box coordinate format - that the model outputs. Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height), - 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. - decoding_normalize_coords (bool, optional): Only relevant if the model is in 'training' mode. Set to `True` if the model - outputs relative coordinates. Do not set this to `True` if the model already outputs absolute coordinates, - as that would result in incorrect coordinates. - round_confidences (int, optional): `False` or an integer that is the number of decimals that the prediction - confidences will be rounded to. If `False`, the confidences will not be rounded. - verbose (bool, optional): If `True`, will print out the progress during runtime. - ret (bool, optional): If `True`, returns the predictions. - - Returns: - None by default. Optionally, a nested list containing the predictions for each class. - ''' - - class_id_pred = self.pred_format['class_id'] - conf_pred = self.pred_format['conf'] - xmin_pred = self.pred_format['xmin'] - ymin_pred = self.pred_format['ymin'] - xmax_pred = self.pred_format['xmax'] - ymax_pred = self.pred_format['ymax'] - - ############################################################################################# - # Configure the data generator for the evaluation. - ############################################################################################# - - convert_to_3_channels = ConvertTo3Channels() - resize = Resize(height=img_height, width=img_width, - labels_format=self.gt_format) - if data_generator_mode == 'resize': - transformations = [convert_to_3_channels, - resize] - elif data_generator_mode == 'pad': - random_pad = RandomPadFixedAR( - patch_aspect_ratio=img_width/img_height, labels_format=self.gt_format) - transformations = [convert_to_3_channels, - random_pad, - resize] - else: - raise ValueError("`data_generator_mode` can be either of 'resize' or 'pad', but received '{}'.".format( - data_generator_mode)) - - # Set the generator parameters. - generator = self.data_generator.generate(batch_size=batch_size, - shuffle=False, - transformations=transformations, - label_encoder=None, - returns={'processed_images', - 'image_ids', - 'evaluation-neutral', - 'inverse_transform', - 'original_labels'}, - keep_images_without_gt=True, - degenerate_box_handling='remove') - - # If we don't have any real image IDs, generate pseudo-image IDs. - # This is just to make the evaluator compatible both with datasets that do and don't - # have image IDs. - if self.data_generator.image_ids is None: - self.data_generator.image_ids = list( - range(self.data_generator.get_dataset_size())) - - ############################################################################################# - # Predict over all batches of the dataset and store the predictions. - ############################################################################################# - - # We have to generate a separate results list for each class. - results = [list() for _ in range(self.n_classes + 1)] - - # Create a dictionary that maps image IDs to ground truth annotations. - # We'll need it below. - image_ids_to_labels = {} - - # Compute the number of batches to iterate over the entire dataset. - n_images = self.data_generator.get_dataset_size() - n_batches = int(ceil(n_images / batch_size)) - if verbose: - print("Number of images in the evaluation dataset: {}".format(n_images)) - print() - tr = trange(n_batches, file=sys.stdout) - tr.set_description('Producing predictions batch-wise') - else: - tr = range(n_batches) - - # Loop over all batches. - for j in tr: - # Generate batch. - batch_X, batch_image_ids, batch_eval_neutral, batch_inverse_transforms, batch_orig_labels = next( - generator) - # Predict. - y_pred = self.model.predict(batch_X) - # If the model was created in 'training' mode, the raw predictions need to - # be decoded and filtered, otherwise that's already taken care of. - if self.model_mode == 'training': - # Decode. - y_pred = decode_detections(y_pred, - confidence_thresh=decoding_confidence_thresh, - iou_threshold=decoding_iou_threshold, - top_k=decoding_top_k, - input_coords=decoding_pred_coords, - normalize_coords=decoding_normalize_coords, - img_height=img_height, - img_width=img_width, - border_pixels=decoding_border_pixels) - else: - # Filter out the all-zeros dummy elements of `y_pred`. - y_pred_filtered = [] - for i in range(len(y_pred)): - y_pred_filtered.append(y_pred[i][y_pred[i, :, 0] != 0]) - y_pred = y_pred_filtered - # Convert the predicted box coordinates for the original images. - y_pred = apply_inverse_transforms(y_pred, batch_inverse_transforms) - - # Iterate over all batch items. - for k, batch_item in enumerate(y_pred): - - image_id = batch_image_ids[k] - - for box in batch_item: - class_id = int(box[class_id_pred]) - # Round the box coordinates to reduce the required memory. - if round_confidences: - confidence = round(box[conf_pred], round_confidences) - else: - confidence = box[conf_pred] - xmin = round(box[xmin_pred], 1) - ymin = round(box[ymin_pred], 1) - xmax = round(box[xmax_pred], 1) - ymax = round(box[ymax_pred], 1) - prediction = (image_id, confidence, xmin, ymin, xmax, ymax) - # Append the predicted box to the results list for its class. - results[class_id].append(prediction) - - self.prediction_results = results - - if ret: - return results - - def write_predictions_to_txt(self, - classes=None, - out_file_prefix='comp3_det_test_', - verbose=True): - ''' - Writes the predictions for all classes to separate text files according to the Pascal VOC results format. - - Arguments: - classes (list, optional): `None` or a list of strings containing the class names of all classes in the dataset, - including some arbitrary name for the background class. This list will be used to name the output text files. - The ordering of the names in the list represents the ordering of the classes as they are predicted by the model, - i.e. the element with index 3 in this list should correspond to the class with class ID 3 in the model's predictions. - If `None`, the output text files will be named by their class IDs. - out_file_prefix (str, optional): A prefix for the output text file names. The suffix to each output text file name will - be the respective class name followed by the `.txt` file extension. This string is also how you specify the directory - in which the results are to be saved. - verbose (bool, optional): If `True`, will print out the progress during runtime. - - Returns: - None. - ''' - - if self.prediction_results is None: - raise ValueError( - "There are no prediction results. You must run `predict_on_dataset()` before calling this method.") - - # We generate a separate results file for each class. - for class_id in range(1, self.n_classes + 1): - - if verbose: - print( - "Writing results file for class {}/{}.".format(class_id, self.n_classes)) - - if classes is None: - class_suffix = '{:04d}'.format(class_id) - else: - class_suffix = classes[class_id] - - results_file = open('{}{}.txt'.format( - out_file_prefix, class_suffix), 'w') - - for prediction in self.prediction_results[class_id]: - - prediction_list = list(prediction) - prediction_list[0] = '{:06d}'.format(int(prediction_list[0])) - prediction_list[1] = round(prediction_list[1], 4) - prediction_txt = ' '.join(map(str, prediction_list)) + '\n' - results_file.write(prediction_txt) - - results_file.close() - - if verbose: - print("All results files saved.") - - def get_num_gt_per_class(self, - ignore_neutral_boxes=True, - verbose=True, - ret=False): - ''' - Counts the number of ground truth boxes for each class across the dataset. - - Arguments: - ignore_neutral_boxes (bool, optional): In case the data generator provides annotations indicating whether a ground truth - bounding box is supposed to either count or be neutral for the evaluation, this argument decides what to do with these - annotations. If `True`, only non-neutral ground truth boxes will be counted, otherwise all ground truth boxes will - be counted. - verbose (bool, optional): If `True`, will print out the progress during runtime. - ret (bool, optional): If `True`, returns the list of counts. - - Returns: - None by default. Optionally, a list containing a count of the number of ground truth boxes for each class across the - entire dataset. - ''' - - if self.data_generator.labels is None: - raise ValueError( - "Computing the number of ground truth boxes per class not possible, no ground truth given.") - - num_gt_per_class = np.zeros(shape=(self.n_classes+1), dtype=np.int) - - class_id_index = self.gt_format['class_id'] - - ground_truth = self.data_generator.labels - - if verbose: - print('Computing the number of positive ground truth boxes per class.') - tr = trange(len(ground_truth), file=sys.stdout) - else: - tr = range(len(ground_truth)) - - # Iterate over the ground truth for all images in the dataset. - for i in tr: - - boxes = np.asarray(ground_truth[i]) - - # Iterate over all ground truth boxes for the current image. - for j in range(boxes.shape[0]): - - if ignore_neutral_boxes and not (self.data_generator.eval_neutral is None): - if not self.data_generator.eval_neutral[i][j]: - # If this box is not supposed to be evaluation-neutral, - # increment the counter for the respective class ID. - class_id = boxes[j, class_id_index] - num_gt_per_class[class_id] += 1 - else: - # If there is no such thing as evaluation-neutral boxes for - # our dataset, always increment the counter for the respective - # class ID. - class_id = boxes[j, class_id_index] - num_gt_per_class[class_id] += 1 - - self.num_gt_per_class = num_gt_per_class - - if ret: - return num_gt_per_class - - def match_predictions(self, - ignore_neutral_boxes=True, - matching_iou_threshold=0.5, - border_pixels='include', - sorting_algorithm='quicksort', - verbose=True, - ret=False): - ''' - Matches predictions to ground truth boxes. - - Note that `predict_on_dataset()` must be called before calling this method. - - Arguments: - ignore_neutral_boxes (bool, optional): In case the data generator provides annotations indicating whether a ground truth - bounding box is supposed to either count or be neutral for the evaluation, this argument decides what to do with these - annotations. If `False`, even boxes that are annotated as neutral will be counted into the evaluation. If `True`, - neutral boxes will be ignored for the evaluation. An example for evaluation-neutrality are the ground truth boxes - annotated as "difficult" in the Pascal VOC datasets, which are usually treated as neutral for the evaluation. - matching_iou_threshold (float, optional): A prediction will be considered a true positive if it has a Jaccard overlap - of at least `matching_iou_threshold` with any ground truth bounding box of the same class. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - sorting_algorithm (str, optional): Which sorting algorithm the matching algorithm should use. This argument accepts - any valid sorting algorithm for Numpy's `argsort()` function. You will usually want to choose between 'quicksort' - (fastest and most memory efficient, but not stable) and 'mergesort' (slight slower and less memory efficient, but stable). - The official Matlab evaluation algorithm uses a stable sorting algorithm, so this algorithm is only guaranteed - to behave identically if you choose 'mergesort' as the sorting algorithm, but it will almost always behave identically - even if you choose 'quicksort' (but no guarantees). - verbose (bool, optional): If `True`, will print out the progress during runtime. - ret (bool, optional): If `True`, returns the true and false positives. - - Returns: - None by default. Optionally, four nested lists containing the true positives, false positives, cumulative true positives, - and cumulative false positives for each class. - ''' - - if self.data_generator.labels is None: - raise ValueError( - "Matching predictions to ground truth boxes not possible, no ground truth given.") - - if self.prediction_results is None: - raise ValueError( - "There are no prediction results. You must run `predict_on_dataset()` before calling this method.") - - class_id_gt = self.gt_format['class_id'] - xmin_gt = self.gt_format['xmin'] - ymin_gt = self.gt_format['ymin'] - xmax_gt = self.gt_format['xmax'] - ymax_gt = self.gt_format['ymax'] - - # Convert the ground truth to a more efficient format for what we need - # to do, which is access ground truth by image ID repeatedly. - ground_truth = {} - # Whether or not we have annotations to decide whether ground truth boxes should be neutral or not. - eval_neutral_available = not (self.data_generator.eval_neutral is None) - for i in range(len(self.data_generator.image_ids)): - image_id = str(self.data_generator.image_ids[i]) - labels = self.data_generator.labels[i] - if ignore_neutral_boxes and eval_neutral_available: - ground_truth[image_id] = (np.asarray(labels), np.asarray( - self.data_generator.eval_neutral[i])) - else: - ground_truth[image_id] = np.asarray(labels) - - # The false positives for each class, sorted by descending confidence. - true_positives = [[]] - # The true positives for each class, sorted by descending confidence. - false_positives = [[]] - cumulative_true_positives = [[]] - cumulative_false_positives = [[]] - - # Iterate over all classes. - for class_id in range(1, self.n_classes + 1): - - predictions = self.prediction_results[class_id] - - # Store the matching results in these lists: - # 1 for every prediction that is a true positive, 0 otherwise - true_pos = np.zeros(len(predictions), dtype=np.int) - # 1 for every prediction that is a false positive, 0 otherwise - false_pos = np.zeros(len(predictions), dtype=np.int) - - # In case there are no predictions at all for this class, we're done here. - if len(predictions) == 0: - print("No predictions for class {}/{}".format(class_id, self.n_classes)) - true_positives.append(true_pos) - false_positives.append(false_pos) - continue - - # Convert the predictions list for this class into a structured array so that we can sort it by confidence. - - # Get the number of characters needed to store the image ID strings in the structured array. - # Keep a few characters buffer in case some image IDs are longer than others. - num_chars_per_image_id = len(str(predictions[0][0])) + 6 - # Create the data type for the structured array. - preds_data_type = np.dtype([('image_id', 'U{}'.format(num_chars_per_image_id)), - ('confidence', 'f4'), - ('xmin', 'f4'), - ('ymin', 'f4'), - ('xmax', 'f4'), - ('ymax', 'f4')]) - # Create the structured array - predictions = np.array(predictions, dtype=preds_data_type) - - # Sort the detections by decreasing confidence. - descending_indices = np.argsort( - -predictions['confidence'], kind=sorting_algorithm) - predictions_sorted = predictions[descending_indices] - - if verbose: - tr = trange(len(predictions), file=sys.stdout) - tr.set_description( - "Matching predictions to ground truth, class {}/{}.".format(class_id, self.n_classes)) - else: - tr = range(len(predictions.shape)) - - # Keep track of which ground truth boxes were already matched to a detection. - gt_matched = {} - - # Iterate over all predictions. - for i in tr: - - prediction = predictions_sorted[i] - image_id = prediction['image_id'] - # Convert the structured array element to a regular array. - pred_box = np.asarray( - list(prediction[['xmin', 'ymin', 'xmax', 'ymax']])) - - # Get the relevant ground truth boxes for this prediction, - # i.e. all ground truth boxes that match the prediction's - # image ID and class ID. - - # The ground truth could either be a tuple with `(ground_truth_boxes, eval_neutral_boxes)` - # or only `ground_truth_boxes`. - if ignore_neutral_boxes and eval_neutral_available: - gt, eval_neutral = ground_truth[image_id] - else: - gt = ground_truth[image_id] - gt = np.asarray(gt) - class_mask = gt[:, class_id_gt] == class_id - gt = gt[class_mask] - if ignore_neutral_boxes and eval_neutral_available: - eval_neutral = eval_neutral[class_mask] - - if gt.size == 0: - # If the image doesn't contain any objects of this class, - # the prediction becomes a false positive. - false_pos[i] = 1 - continue - - # Compute the IoU of this prediction with all ground truth boxes of the same class. - overlaps = iou(boxes1=gt[:, [xmin_gt, ymin_gt, xmax_gt, ymax_gt]], - boxes2=pred_box, - coords='corners', - mode='element-wise', - border_pixels=border_pixels) - - # For each detection, match the ground truth box with the highest overlap. - # It's possible that the same ground truth box will be matched to multiple - # detections. - gt_match_index = np.argmax(overlaps) - gt_match_overlap = overlaps[gt_match_index] - - if gt_match_overlap < matching_iou_threshold: - # False positive, IoU threshold violated: - # Those predictions whose matched overlap is below the threshold become - # false positives. - false_pos[i] = 1 - else: - if not (ignore_neutral_boxes and eval_neutral_available) or (eval_neutral[gt_match_index] == False): - # If this is not a ground truth that is supposed to be evaluation-neutral - # (i.e. should be skipped for the evaluation) or if we don't even have the - # concept of neutral boxes. - if not (image_id in gt_matched): - # True positive: - # If the matched ground truth box for this prediction hasn't been matched to a - # different prediction already, we have a true positive. - true_pos[i] = 1 - gt_matched[image_id] = np.zeros( - shape=(gt.shape[0]), dtype=np.bool) - gt_matched[image_id][gt_match_index] = True - elif not gt_matched[image_id][gt_match_index]: - # True positive: - # If the matched ground truth box for this prediction hasn't been matched to a - # different prediction already, we have a true positive. - true_pos[i] = 1 - gt_matched[image_id][gt_match_index] = True - else: - # False positive, duplicate detection: - # If the matched ground truth box for this prediction has already been matched - # to a different prediction previously, it is a duplicate detection for an - # already detected object, which counts as a false positive. - false_pos[i] = 1 - - true_positives.append(true_pos) - false_positives.append(false_pos) - - # Cumulative sums of the true positives - cumulative_true_pos = np.cumsum(true_pos) - # Cumulative sums of the false positives - cumulative_false_pos = np.cumsum(false_pos) - - cumulative_true_positives.append(cumulative_true_pos) - cumulative_false_positives.append(cumulative_false_pos) - - self.true_positives = true_positives - self.false_positives = false_positives - self.cumulative_true_positives = cumulative_true_positives - self.cumulative_false_positives = cumulative_false_positives - - if ret: - return true_positives, false_positives, cumulative_true_positives, cumulative_false_positives - - def compute_precision_recall(self, verbose=True, ret=False): - ''' - Computes the precisions and recalls for all classes. - - Note that `match_predictions()` must be called before calling this method. - - Arguments: - verbose (bool, optional): If `True`, will print out the progress during runtime. - ret (bool, optional): If `True`, returns the precisions and recalls. - - Returns: - None by default. Optionally, two nested lists containing the cumulative precisions and recalls for each class. - ''' - - if (self.cumulative_true_positives is None) or (self.cumulative_false_positives is None): - raise ValueError( - "True and false positives not available. You must run `match_predictions()` before you call this method.") - - if (self.num_gt_per_class is None): - raise ValueError( - "Number of ground truth boxes per class not available. You must run `get_num_gt_per_class()` before you call this method.") - - cumulative_precisions = [[]] - cumulative_recalls = [[]] - - # Iterate over all classes. - for class_id in range(1, self.n_classes + 1): - - if verbose: - print( - "Computing precisions and recalls, class {}/{}".format(class_id, self.n_classes)) - - tp = self.cumulative_true_positives[class_id] - fp = self.cumulative_false_positives[class_id] - - # 1D array with shape `(num_predictions,)` - cumulative_precision = np.where(tp + fp > 0, tp / (tp + fp), 0) - # 1D array with shape `(num_predictions,)` - cumulative_recall = tp / self.num_gt_per_class[class_id] - - cumulative_precisions.append(cumulative_precision) - cumulative_recalls.append(cumulative_recall) - - self.cumulative_precisions = cumulative_precisions - self.cumulative_recalls = cumulative_recalls - - if ret: - return cumulative_precisions, cumulative_recalls - - def compute_average_precisions(self, mode='sample', num_recall_points=11, verbose=True, ret=False): - ''' - Computes the average precision for each class. - - Can compute the Pascal-VOC-style average precision in both the pre-2010 (k-point sampling) - and post-2010 (integration) algorithm versions. - - Note that `compute_precision_recall()` must be called before calling this method. - - Arguments: - mode (str, optional): Can be either 'sample' or 'integrate'. In the case of 'sample', the average precision will be computed - according to the Pascal VOC formula that was used up until VOC 2009, where the precision will be sampled for `num_recall_points` - recall values. In the case of 'integrate', the average precision will be computed according to the Pascal VOC formula that - was used from VOC 2010 onward, where the average precision will be computed by numerically integrating over the whole - preciscion-recall curve instead of sampling individual points from it. 'integrate' mode is basically just the limit case - of 'sample' mode as the number of sample points increases. For details, see the references below. - num_recall_points (int, optional): Only relevant if mode is 'sample'. The number of points to sample from the precision-recall-curve - to compute the average precisions. In other words, this is the number of equidistant recall values for which the resulting - precision will be computed. 11 points is the value used in the official Pascal VOC pre-2010 detection evaluation algorithm. - verbose (bool, optional): If `True`, will print out the progress during runtime. - ret (bool, optional): If `True`, returns the average precisions. - - Returns: - None by default. Optionally, a list containing average precision for each class. - - References: - http://host.robots.ox.ac.uk/pascal/VOC/voc2012/htmldoc/devkit_doc.html#sec:ap - ''' - - if (self.cumulative_precisions is None) or (self.cumulative_recalls is None): - raise ValueError( - "Precisions and recalls not available. You must run `compute_precision_recall()` before you call this method.") - - if not (mode in {'sample', 'integrate'}): - raise ValueError( - "`mode` can be either 'sample' or 'integrate', but received '{}'".format(mode)) - - average_precisions = [0.0] - - # Iterate over all classes. - for class_id in range(1, self.n_classes + 1): - - if verbose: - print( - "Computing average precision, class {}/{}".format(class_id, self.n_classes)) - - cumulative_precision = self.cumulative_precisions[class_id] - cumulative_recall = self.cumulative_recalls[class_id] - average_precision = 0.0 - - if mode == 'sample': - - for t in np.linspace(start=0, stop=1, num=num_recall_points, endpoint=True): - - cum_prec_recall_greater_t = cumulative_precision[cumulative_recall >= t] - - if cum_prec_recall_greater_t.size == 0: - precision = 0.0 - else: - precision = np.amax(cum_prec_recall_greater_t) - - average_precision += precision - - average_precision /= num_recall_points - - elif mode == 'integrate': - - # We will compute the precision at all unique recall values. - unique_recalls, unique_recall_indices, unique_recall_counts = np.unique( - cumulative_recall, return_index=True, return_counts=True) - - # Store the maximal precision for each recall value and the absolute difference - # between any two unique recal values in the lists below. The products of these - # two nummbers constitute the rectangular areas whose sum will be our numerical - # integral. - maximal_precisions = np.zeros_like(unique_recalls) - recall_deltas = np.zeros_like(unique_recalls) - - # Iterate over all unique recall values in reverse order. This saves a lot of computation: - # For each unique recall value `r`, we want to get the maximal precision value obtained - # for any recall value `r* >= r`. Once we know the maximal precision for the last `k` recall - # values after a given iteration, then in the next iteration, in order compute the maximal - # precisions for the last `l > k` recall values, we only need to compute the maximal precision - # for `l - k` recall values and then take the maximum between that and the previously computed - # maximum instead of computing the maximum over all `l` values. - # We skip the very last recall value, since the precision after between the last recall value - # recall 1.0 is defined to be zero. - for i in range(len(unique_recalls)-2, -1, -1): - begin = unique_recall_indices[i] - end = unique_recall_indices[i + 1] - # When computing the maximal precisions, use the maximum of the previous iteration to - # avoid unnecessary repeated computation over the same precision values. - # The maximal precisions are the heights of the rectangle areas of our integral under - # the precision-recall curve. - maximal_precisions[i] = np.maximum( - np.amax(cumulative_precision[begin:end]), maximal_precisions[i + 1]) - # The differences between two adjacent recall values are the widths of our rectangle areas. - recall_deltas[i] = unique_recalls[i + 1] - \ - unique_recalls[i] - - average_precision = np.sum(maximal_precisions * recall_deltas) - - average_precisions.append(average_precision) - - self.average_precisions = average_precisions - - if ret: - return average_precisions - - def compute_mean_average_precision(self, ret=True): - ''' - Computes the mean average precision over all classes. - - Note that `compute_average_precisions()` must be called before calling this method. - - Arguments: - ret (bool, optional): If `True`, returns the mean average precision. - - Returns: - A float, the mean average precision, by default. Optionally, None. - ''' - - if self.average_precisions is None: - raise ValueError( - "Average precisions not available. You must run `compute_average_precisions()` before you call this method.") - - # The first element is for the background class, so skip it. - mean_average_precision = np.average(self.average_precisions[1:]) - self.mean_average_precision = mean_average_precision - - if ret: - return mean_average_precision -''' -A few utilities that are useful when working with the MS COCO datasets. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -import json -from tqdm import trange -from math import ceil -import sys - -from data_generator.object_detection_2d_geometric_ops import Resize -from data_generator.object_detection_2d_patch_sampling_ops import RandomPadFixedAR -from data_generator.object_detection_2d_photometric_ops import ConvertTo3Channels -from ssd_encoder_decoder.ssd_output_decoder import decode_detections -from data_generator.object_detection_2d_misc_utils import apply_inverse_transforms - - -def get_coco_category_maps(annotations_file): - ''' - Builds dictionaries that map between MS COCO category IDs, transformed category IDs, and category names. - The original MS COCO category IDs are not consecutive unfortunately: The 80 category IDs are spread - across the integers 1 through 90 with some integers skipped. Since we usually use a one-hot - class representation in neural networks, we need to map these non-consecutive original COCO category - IDs (let's call them 'cats') to consecutive category IDs (let's call them 'classes'). - - Arguments: - annotations_file (str): The filepath to any MS COCO annotations JSON file. - - Returns: - 1) cats_to_classes: A dictionary that maps between the original (keys) and the transformed category IDs (values). - 2) classes_to_cats: A dictionary that maps between the transformed (keys) and the original category IDs (values). - 3) cats_to_names: A dictionary that maps between original category IDs (keys) and the respective category names (values). - 4) classes_to_names: A list of the category names (values) with their indices representing the transformed IDs. - ''' - with open(annotations_file, 'r') as f: - annotations = json.load(f) - cats_to_classes = {} - classes_to_cats = {} - cats_to_names = {} - classes_to_names = [] - # Need to add the background class first so that the indexing is right. - classes_to_names.append('background') - for i, cat in enumerate(annotations['categories']): - cats_to_classes[cat['id']] = i + 1 - classes_to_cats[i + 1] = cat['id'] - cats_to_names[cat['id']] = cat['name'] - classes_to_names.append(cat['name']) - - return cats_to_classes, classes_to_cats, cats_to_names, classes_to_names - - -def predict_all_to_json(out_file, - model, - img_height, - img_width, - classes_to_cats, - data_generator, - batch_size, - data_generator_mode='resize', - model_mode='training', - confidence_thresh=0.01, - iou_threshold=0.45, - top_k=200, - pred_coords='centroids', - normalize_coords=True): - ''' - Runs detection predictions over the whole dataset given a model and saves them in a JSON file - in the MS COCO detection results format. - - Arguments: - out_file (str): The file name (full path) under which to save the results JSON file. - model (Keras model): A Keras SSD model object. - img_height (int): The input image height for the model. - img_width (int): The input image width for the model. - classes_to_cats (dict): A dictionary that maps the consecutive class IDs predicted by the model - to the non-consecutive original MS COCO category IDs. - data_generator (DataGenerator): A `DataGenerator` object with the evaluation dataset. - batch_size (int): The batch size for the evaluation. - data_generator_mode (str, optional): Either of 'resize' or 'pad'. If 'resize', the input images will - be resized (i.e. warped) to `(img_height, img_width)`. This mode does not preserve the aspect ratios of the images. - If 'pad', the input images will be first padded so that they have the aspect ratio defined by `img_height` - and `img_width` and then resized to `(img_height, img_width)`. This mode preserves the aspect ratios of the images. - model_mode (str, optional): The mode in which the model was created, i.e. 'training', 'inference' or 'inference_fast'. - This is needed in order to know whether the model output is already decoded or still needs to be decoded. Refer to - the model documentation for the meaning of the individual modes. - confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific - positive class in order to be considered for the non-maximum suppression stage for the respective class. - A lower value will result in a larger part of the selection process being done by the non-maximum suppression - stage, while a larger value will result in a larger part of the selection process happening in the confidence - thresholding stage. - iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold` - with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers - to the box score. - top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the - non-maximum suppression stage. Defaults to 200, following the paper. - input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids' - for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format - `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. - normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1]) - and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs - relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`. - Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect - coordinates. Requires `img_height` and `img_width` if set to `True`. - - Returns: - None. - ''' - - convert_to_3_channels = ConvertTo3Channels() - resize = Resize(height=img_height, width=img_width) - if data_generator_mode == 'resize': - transformations = [convert_to_3_channels, - resize] - elif data_generator_mode == 'pad': - random_pad = RandomPadFixedAR( - patch_aspect_ratio=img_width/img_height, clip_boxes=False) - transformations = [convert_to_3_channels, - random_pad, - resize] - else: - raise ValueError("Unexpected argument value: `data_generator_mode` can be either of 'resize' or 'pad', but received '{}'.".format( - data_generator_mode)) - - # Set the generator parameters. - generator = data_generator.generate(batch_size=batch_size, - shuffle=False, - transformations=transformations, - label_encoder=None, - returns={'processed_images', - 'image_ids', - 'inverse_transform'}, - keep_images_without_gt=True) - # Put the results in this list. - results = [] - # Compute the number of batches to iterate over the entire dataset. - n_images = data_generator.get_dataset_size() - print("Number of images in the evaluation dataset: {}".format(n_images)) - n_batches = int(ceil(n_images / batch_size)) - # Loop over all batches. - tr = trange(n_batches, file=sys.stdout) - tr.set_description('Producing results file') - for i in tr: - # Generate batch. - batch_X, batch_image_ids, batch_inverse_transforms = next(generator) - # Predict. - y_pred = model.predict(batch_X) - # If the model was created in 'training' mode, the raw predictions need to - # be decoded and filtered, otherwise that's already taken care of. - if model_mode == 'training': - # Decode. - y_pred = decode_detections(y_pred, - confidence_thresh=confidence_thresh, - iou_threshold=iou_threshold, - top_k=top_k, - input_coords=pred_coords, - normalize_coords=normalize_coords, - img_height=img_height, - img_width=img_width) - else: - # Filter out the all-zeros dummy elements of `y_pred`. - y_pred_filtered = [] - for i in range(len(y_pred)): - y_pred_filtered.append(y_pred[i][y_pred[i, :, 0] != 0]) - y_pred = y_pred_filtered - # Convert the predicted box coordinates for the original images. - y_pred = apply_inverse_transforms(y_pred, batch_inverse_transforms) - - # Convert each predicted box into the results format. - for k, batch_item in enumerate(y_pred): - for box in batch_item: - class_id = box[0] - # Transform the consecutive class IDs back to the original COCO category IDs. - cat_id = classes_to_cats[class_id] - # Round the box coordinates to reduce the JSON file size. - xmin = float(round(box[2], 1)) - ymin = float(round(box[3], 1)) - xmax = float(round(box[4], 1)) - ymax = float(round(box[5], 1)) - width = xmax - xmin - height = ymax - ymin - bbox = [xmin, ymin, width, height] - result = {} - result['image_id'] = batch_image_ids[k] - result['category_id'] = cat_id - result['score'] = float(round(box[1], 3)) - result['bbox'] = bbox - results.append(result) - - with open(out_file, 'w') as f: - json.dump(results, f) - - print("Prediction results saved in '{}'".format(out_file)) -''' -A custom Keras layer to generate anchor boxes. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -import keras.backend as K -from keras.engine.topology import InputSpec -from keras.engine.topology import Layer - -from bounding_box_utils.bounding_box_utils import convert_coordinates - - -class AnchorBoxes(Layer): - ''' - A Keras layer to create an output tensor containing anchor box coordinates - and variances based on the input tensor and the passed arguments. - - A set of 2D anchor boxes of different aspect ratios is created for each spatial unit of - the input tensor. The number of anchor boxes created per unit depends on the arguments - `aspect_ratios` and `two_boxes_for_ar1`, in the default case it is 4. The boxes - are parameterized by the coordinate tuple `(xmin, xmax, ymin, ymax)`. - - The logic implemented by this layer is identical to the logic in the module - `ssd_box_encode_decode_utils.py`. - - The purpose of having this layer in the network is to make the model self-sufficient - at inference time. Since the model is predicting offsets to the anchor boxes - (rather than predicting absolute box coordinates directly), one needs to know the anchor - box coordinates in order to construct the final prediction boxes from the predicted offsets. - If the model's output tensor did not contain the anchor box coordinates, the necessary - information to convert the predicted offsets back to absolute coordinates would be missing - in the model output. The reason why it is necessary to predict offsets to the anchor boxes - rather than to predict absolute box coordinates directly is explained in `README.md`. - - Input shape: - 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'` - or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. - - Output shape: - 5D tensor of shape `(batch, height, width, n_boxes, 8)`. The last axis contains - the four anchor box coordinates and the four variance values for each box. - ''' - - def __init__(self, - img_height, - img_width, - this_scale, - next_scale, - aspect_ratios=[0.5, 1.0, 2.0], - two_boxes_for_ar1=True, - this_steps=None, - this_offsets=None, - clip_boxes=False, - variances=[0.1, 0.1, 0.2, 0.2], - coords='centroids', - normalize_coords=False, - **kwargs): - ''' - All arguments need to be set to the same values as in the box encoding process, otherwise the behavior is undefined. - Some of these arguments are explained in more detail in the documentation of the `SSDBoxEncoder` class. - - Arguments: - img_height (int): The height of the input images. - img_width (int): The width of the input images. - this_scale (float): A float in [0, 1], the scaling factor for the size of the generated anchor boxes - as a fraction of the shorter side of the input image. - next_scale (float): A float in [0, 1], the next larger scaling factor. Only relevant if - `self.two_boxes_for_ar1 == True`. - aspect_ratios (list, optional): The list of aspect ratios for which default boxes are to be - generated for this layer. - two_boxes_for_ar1 (bool, optional): Only relevant if `aspect_ratios` contains 1. - If `True`, two default boxes will be generated for aspect ratio 1. The first will be generated - using the scaling factor for the respective layer, the second one will be generated using - geometric mean of said scaling factor and next bigger scaling factor. - clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries. - variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by - its respective variance value. - coords (str, optional): The box coordinate format to be used internally in the model (i.e. this is not the input format - of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, and height), - 'corners' for the format `(xmin, ymin, xmax, ymax)`, or 'minmax' for the format `(xmin, xmax, ymin, ymax)`. - normalize_coords (bool, optional): Set to `True` if the model uses relative instead of absolute coordinates, - i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. - ''' - if K.backend() != 'tensorflow': - raise TypeError( - "This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend())) - - if (this_scale < 0) or (next_scale < 0) or (this_scale > 1): - raise ValueError("`this_scale` must be in [0, 1] and `next_scale` must be >0, but `this_scale` == {}, `next_scale` == {}".format( - this_scale, next_scale)) - - if len(variances) != 4: - raise ValueError( - "4 variance values must be pased, but {} values were received.".format(len(variances))) - variances = np.array(variances) - if np.any(variances <= 0): - raise ValueError( - "All variances must be >0, but the variances given are {}".format(variances)) - - self.img_height = img_height - self.img_width = img_width - self.this_scale = this_scale - self.next_scale = next_scale - self.aspect_ratios = aspect_ratios - self.two_boxes_for_ar1 = two_boxes_for_ar1 - self.this_steps = this_steps - self.this_offsets = this_offsets - self.clip_boxes = clip_boxes - self.variances = variances - self.coords = coords - self.normalize_coords = normalize_coords - # Compute the number of boxes per cell - if (1 in aspect_ratios) and two_boxes_for_ar1: - self.n_boxes = len(aspect_ratios) + 1 - else: - self.n_boxes = len(aspect_ratios) - super(AnchorBoxes, self).__init__(**kwargs) - - def build(self, input_shape): - self.input_spec = [InputSpec(shape=input_shape)] - super(AnchorBoxes, self).build(input_shape) - - def call(self, x, mask=None): - ''' - Return an anchor box tensor based on the shape of the input tensor. - - The logic implemented here is identical to the logic in the module `ssd_box_encode_decode_utils.py`. - - Note that this tensor does not participate in any graph computations at runtime. It is being created - as a constant once during graph creation and is just being output along with the rest of the model output - during runtime. Because of this, all logic is implemented as Numpy array operations and it is sufficient - to convert the resulting Numpy array into a Keras tensor at the very end before outputting it. - - Arguments: - x (tensor): 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'` - or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. The input for this - layer must be the output of the localization predictor layer. - ''' - - # Compute box width and height for each aspect ratio - # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`. - size = min(self.img_height, self.img_width) - # Compute the box widths and and heights for all aspect ratios - wh_list = [] - for ar in self.aspect_ratios: - if (ar == 1): - # Compute the regular anchor box for aspect ratio 1. - box_height = box_width = self.this_scale * size - wh_list.append((box_width, box_height)) - if self.two_boxes_for_ar1: - # Compute one slightly larger version using the geometric mean of this scale value and the next. - box_height = box_width = np.sqrt( - self.this_scale * self.next_scale) * size - wh_list.append((box_width, box_height)) - else: - box_height = self.this_scale * size / np.sqrt(ar) - box_width = self.this_scale * size * np.sqrt(ar) - wh_list.append((box_width, box_height)) - wh_list = np.array(wh_list) - - # We need the shape of the input tensor - if K.image_dim_ordering() == 'tf': - batch_size, feature_map_height, feature_map_width, feature_map_channels = x._keras_shape - else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future - batch_size, feature_map_channels, feature_map_height, feature_map_width = x._keras_shape - - # Compute the grid of box center points. They are identical for all aspect ratios. - - # Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally. - if (self.this_steps is None): - step_height = self.img_height / feature_map_height - step_width = self.img_width / feature_map_width - else: - if isinstance(self.this_steps, (list, tuple)) and (len(self.this_steps) == 2): - step_height = self.this_steps[0] - step_width = self.this_steps[1] - elif isinstance(self.this_steps, (int, float)): - step_height = self.this_steps - step_width = self.this_steps - # Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image. - if (self.this_offsets is None): - offset_height = 0.5 - offset_width = 0.5 - else: - if isinstance(self.this_offsets, (list, tuple)) and (len(self.this_offsets) == 2): - offset_height = self.this_offsets[0] - offset_width = self.this_offsets[1] - elif isinstance(self.this_offsets, (int, float)): - offset_height = self.this_offsets - offset_width = self.this_offsets - # Now that we have the offsets and step sizes, compute the grid of anchor box center points. - cy = np.linspace(offset_height * step_height, (offset_height + - feature_map_height - 1) * step_height, feature_map_height) - cx = np.linspace(offset_width * step_width, (offset_width + - feature_map_width - 1) * step_width, feature_map_width) - cx_grid, cy_grid = np.meshgrid(cx, cy) - # This is necessary for np.tile() to do what we want further down - cx_grid = np.expand_dims(cx_grid, -1) - # This is necessary for np.tile() to do what we want further down - cy_grid = np.expand_dims(cy_grid, -1) - - # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)` - # where the last dimension will contain `(cx, cy, w, h)` - boxes_tensor = np.zeros( - (feature_map_height, feature_map_width, self.n_boxes, 4)) - - boxes_tensor[:, :, :, 0] = np.tile( - cx_grid, (1, 1, self.n_boxes)) # Set cx - boxes_tensor[:, :, :, 1] = np.tile( - cy_grid, (1, 1, self.n_boxes)) # Set cy - boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w - boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h - - # Convert `(cx, cy, w, h)` to `(xmin, xmax, ymin, ymax)` - boxes_tensor = convert_coordinates( - boxes_tensor, start_index=0, conversion='centroids2corners') - - # If `clip_boxes` is enabled, clip the coordinates to lie within the image boundaries - if self.clip_boxes: - x_coords = boxes_tensor[:, :, :, [0, 2]] - x_coords[x_coords >= self.img_width] = self.img_width - 1 - x_coords[x_coords < 0] = 0 - boxes_tensor[:, :, :, [0, 2]] = x_coords - y_coords = boxes_tensor[:, :, :, [1, 3]] - y_coords[y_coords >= self.img_height] = self.img_height - 1 - y_coords[y_coords < 0] = 0 - boxes_tensor[:, :, :, [1, 3]] = y_coords - - # If `normalize_coords` is enabled, normalize the coordinates to be within [0,1] - if self.normalize_coords: - boxes_tensor[:, :, :, [0, 2]] /= self.img_width - boxes_tensor[:, :, :, [1, 3]] /= self.img_height - - # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth. - if self.coords == 'centroids': - # Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`. - boxes_tensor = convert_coordinates( - boxes_tensor, start_index=0, conversion='corners2centroids', border_pixels='half') - elif self.coords == 'minmax': - # Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax). - boxes_tensor = convert_coordinates( - boxes_tensor, start_index=0, conversion='corners2minmax', border_pixels='half') - - # Create a tensor to contain the variances and append it to `boxes_tensor`. This tensor has the same shape - # as `boxes_tensor` and simply contains the same 4 variance values for every position in the last axis. - # Has shape `(feature_map_height, feature_map_width, n_boxes, 4)` - variances_tensor = np.zeros_like(boxes_tensor) - variances_tensor += self.variances # Long live broadcasting - # Now `boxes_tensor` becomes a tensor of shape `(feature_map_height, feature_map_width, n_boxes, 8)` - boxes_tensor = np.concatenate( - (boxes_tensor, variances_tensor), axis=-1) - - # Now prepend one dimension to `boxes_tensor` to account for the batch size and tile it along - # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 8)` - boxes_tensor = np.expand_dims(boxes_tensor, axis=0) - boxes_tensor = K.tile(K.constant( - boxes_tensor, dtype='float32'), (K.shape(x)[0], 1, 1, 1, 1)) - - return boxes_tensor - - def compute_output_shape(self, input_shape): - if K.image_dim_ordering() == 'tf': - batch_size, feature_map_height, feature_map_width, feature_map_channels = input_shape - else: # Not yet relevant since TensorFlow is the only supported backend right now, but it can't harm to have this in here for the future - batch_size, feature_map_channels, feature_map_height, feature_map_width = input_shape - return (batch_size, feature_map_height, feature_map_width, self.n_boxes, 8) - - def get_config(self): - config = { - 'img_height': self.img_height, - 'img_width': self.img_width, - 'this_scale': self.this_scale, - 'next_scale': self.next_scale, - 'aspect_ratios': list(self.aspect_ratios), - 'two_boxes_for_ar1': self.two_boxes_for_ar1, - 'clip_boxes': self.clip_boxes, - 'variances': list(self.variances), - 'coords': self.coords, - 'normalize_coords': self.normalize_coords - } - base_config = super(AnchorBoxes, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -''' -A custom Keras layer to decode the raw SSD prediction output. Corresponds to the -`DetectionOutput` layer type in the original Caffe implementation of SSD. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -import tensorflow as tf -import keras.backend as K -from keras.engine.topology import InputSpec -from keras.engine.topology import Layer - - -class DecodeDetections(Layer): - ''' - A Keras layer to decode the raw SSD prediction output. - - Input shape: - 3D tensor of shape `(batch_size, n_boxes, n_classes + 12)`. - - Output shape: - 3D tensor of shape `(batch_size, top_k, 6)`. - ''' - - def __init__(self, - confidence_thresh=0.01, - iou_threshold=0.45, - top_k=200, - nms_max_output_size=400, - coords='centroids', - normalize_coords=True, - img_height=None, - img_width=None, - **kwargs): - ''' - All default argument values follow the Caffe implementation. - - Arguments: - confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific - positive class in order to be considered for the non-maximum suppression stage for the respective class. - A lower value will result in a larger part of the selection process being done by the non-maximum suppression - stage, while a larger value will result in a larger part of the selection process happening in the confidence - thresholding stage. - iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold` - with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers - to the box score. - top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the - non-maximum suppression stage. - nms_max_output_size (int, optional): The maximum number of predictions that will be left after performing non-maximum - suppression. - coords (str, optional): The box coordinate format that the model outputs. Must be 'centroids' - i.e. the format `(cx, cy, w, h)` (box center coordinates, width, and height). Other coordinate formats are - currently not supported. - normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1]) - and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs - relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`. - Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect - coordinates. Requires `img_height` and `img_width` if set to `True`. - img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`. - img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`. - ''' - if K.backend() != 'tensorflow': - raise TypeError( - "This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend())) - - if normalize_coords and ((img_height is None) or (img_width is None)): - raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width)) - - if coords != 'centroids': - raise ValueError( - "The DetectionOutput layer currently only supports the 'centroids' coordinate format.") - - # We need these members for the config. - self.confidence_thresh = confidence_thresh - self.iou_threshold = iou_threshold - self.top_k = top_k - self.normalize_coords = normalize_coords - self.img_height = img_height - self.img_width = img_width - self.coords = coords - self.nms_max_output_size = nms_max_output_size - - # We need these members for TensorFlow. - self.tf_confidence_thresh = tf.constant( - self.confidence_thresh, name='confidence_thresh') - self.tf_iou_threshold = tf.constant( - self.iou_threshold, name='iou_threshold') - self.tf_top_k = tf.constant(self.top_k, name='top_k') - self.tf_normalize_coords = tf.constant( - self.normalize_coords, name='normalize_coords') - self.tf_img_height = tf.constant( - self.img_height, dtype=tf.float32, name='img_height') - self.tf_img_width = tf.constant( - self.img_width, dtype=tf.float32, name='img_width') - self.tf_nms_max_output_size = tf.constant( - self.nms_max_output_size, name='nms_max_output_size') - - super(DecodeDetections, self).__init__(**kwargs) - - def build(self, input_shape): - self.input_spec = [InputSpec(shape=input_shape)] - super(DecodeDetections, self).build(input_shape) - - def call(self, y_pred, mask=None): - ''' - Returns: - 3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded - to always yield `top_k` predictions per batch item. The last axis contains - the coordinates for each predicted box in the format - `[class_id, confidence, xmin, ymin, xmax, ymax]`. - ''' - - ##################################################################################### - # 1. Convert the box coordinates from predicted anchor box offsets to predicted - # absolute coordinates - ##################################################################################### - - # Convert anchor box offsets to image offsets. - # cx = cx_pred * cx_variance * w_anchor + cx_anchor - cx = y_pred[..., -12] * y_pred[..., -4] * \ - y_pred[..., -6] + y_pred[..., -8] - # cy = cy_pred * cy_variance * h_anchor + cy_anchor - cy = y_pred[..., -11] * y_pred[..., -3] * \ - y_pred[..., -5] + y_pred[..., -7] - # w = exp(w_pred * variance_w) * w_anchor - w = tf.exp(y_pred[..., -10] * y_pred[..., -2]) * y_pred[..., -6] - # h = exp(h_pred * variance_h) * h_anchor - h = tf.exp(y_pred[..., -9] * y_pred[..., -1]) * y_pred[..., -5] - - # Convert 'centroids' to 'corners'. - xmin = cx - 0.5 * w - ymin = cy - 0.5 * h - xmax = cx + 0.5 * w - ymax = cy + 0.5 * h - - # If the model predicts box coordinates relative to the image dimensions and they are supposed - # to be converted back to absolute coordinates, do that. - def normalized_coords(): - xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1) - ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1) - xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1) - ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1) - return xmin1, ymin1, xmax1, ymax1 - - def non_normalized_coords(): - return tf.expand_dims(xmin, axis=-1), tf.expand_dims(ymin, axis=-1), tf.expand_dims(xmax, axis=-1), tf.expand_dims(ymax, axis=-1) - - xmin, ymin, xmax, ymax = tf.cond( - self.tf_normalize_coords, normalized_coords, non_normalized_coords) - - # Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor. - y_pred = tf.concat( - values=[y_pred[..., :-12], xmin, ymin, xmax, ymax], axis=-1) - - ##################################################################################### - # 2. Perform confidence thresholding, per-class non-maximum suppression, and - # top-k filtering. - ##################################################################################### - - batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32 - n_boxes = tf.shape(y_pred)[1] - n_classes = y_pred.shape[2] - 4 - class_indices = tf.range(1, n_classes) - - # Create a function that filters the predictions for the given batch item. Specifically, it performs: - # - confidence thresholding - # - non-maximum suppression (NMS) - # - top-k filtering - def filter_predictions(batch_item): - - # Create a function that filters the predictions for one single class. - def filter_single_class(index): - - # From a tensor of shape (n_boxes, n_classes + 4 coordinates) extract - # a tensor of shape (n_boxes, 1 + 4 coordinates) that contains the - # confidnece values for just one class, determined by `index`. - confidences = tf.expand_dims(batch_item[..., index], axis=-1) - class_id = tf.fill(dims=tf.shape( - confidences), value=tf.to_float(index)) - box_coordinates = batch_item[..., -4:] - - single_class = tf.concat( - [class_id, confidences, box_coordinates], axis=-1) - - # Apply confidence thresholding with respect to the class defined by `index`. - threshold_met = single_class[:, 1] > self.tf_confidence_thresh - single_class = tf.boolean_mask(tensor=single_class, - mask=threshold_met) - - # If any boxes made the threshold, perform NMS. - def perform_nms(): - scores = single_class[..., 1] - - # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`. - xmin = tf.expand_dims(single_class[..., -4], axis=-1) - ymin = tf.expand_dims(single_class[..., -3], axis=-1) - xmax = tf.expand_dims(single_class[..., -2], axis=-1) - ymax = tf.expand_dims(single_class[..., -1], axis=-1) - boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1) - - maxima_indices = tf.image.non_max_suppression(boxes=boxes, - scores=scores, - max_output_size=self.tf_nms_max_output_size, - iou_threshold=self.iou_threshold, - name='non_maximum_suppresion') - maxima = tf.gather(params=single_class, - indices=maxima_indices, - axis=0) - return maxima - - def no_confident_predictions(): - return tf.constant(value=0.0, shape=(1, 6)) - - single_class_nms = tf.cond( - tf.equal(tf.size(single_class), 0), no_confident_predictions, perform_nms) - - # Make sure `single_class` is exactly `self.nms_max_output_size` elements long. - padded_single_class = tf.pad(tensor=single_class_nms, - paddings=[ - [0, self.tf_nms_max_output_size - tf.shape(single_class_nms)[0]], [0, 0]], - mode='CONSTANT', - constant_values=0.0) - - return padded_single_class - - # Iterate `filter_single_class()` over all class indices. - filtered_single_classes = tf.map_fn(fn=lambda i: filter_single_class(i), - elems=tf.range(1, n_classes), - dtype=tf.float32, - parallel_iterations=128, - back_prop=False, - swap_memory=False, - infer_shape=True, - name='loop_over_classes') - - # Concatenate the filtered results for all individual classes to one tensor. - filtered_predictions = tf.reshape( - tensor=filtered_single_classes, shape=(-1, 6)) - - # Perform top-k filtering for this batch item or pad it in case there are - # fewer than `self.top_k` boxes left at this point. Either way, produce a - # tensor of length `self.top_k`. By the time we return the final results tensor - # for the whole batch, all batch items must have the same number of predicted - # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k` - # predictions are left after the filtering process above, we pad the missing - # predictions with zeros as dummy entries. - def top_k(): - return tf.gather(params=filtered_predictions, - indices=tf.nn.top_k( - filtered_predictions[:, 1], k=self.tf_top_k, sorted=True).indices, - axis=0) - - def pad_and_top_k(): - padded_predictions = tf.pad(tensor=filtered_predictions, - paddings=[ - [0, self.tf_top_k - tf.shape(filtered_predictions)[0]], [0, 0]], - mode='CONSTANT', - constant_values=0.0) - return tf.gather(params=padded_predictions, - indices=tf.nn.top_k( - padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices, - axis=0) - - top_k_boxes = tf.cond(tf.greater_equal(tf.shape(filtered_predictions)[ - 0], self.tf_top_k), top_k, pad_and_top_k) - - return top_k_boxes - - # Iterate `filter_predictions()` over all batch items. - output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x), - elems=y_pred, - dtype=None, - parallel_iterations=128, - back_prop=False, - swap_memory=False, - infer_shape=True, - name='loop_over_batch') - - return output_tensor - - def compute_output_shape(self, input_shape): - batch_size, n_boxes, last_axis = input_shape - # Last axis: (class_ID, confidence, 4 box coordinates) - return (batch_size, self.tf_top_k, 6) - - def get_config(self): - config = { - 'confidence_thresh': self.confidence_thresh, - 'iou_threshold': self.iou_threshold, - 'top_k': self.top_k, - 'nms_max_output_size': self.nms_max_output_size, - 'coords': self.coords, - 'normalize_coords': self.normalize_coords, - 'img_height': self.img_height, - 'img_width': self.img_width, - } - base_config = super(DecodeDetections, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -''' -A custom Keras layer to decode the raw SSD prediction output. This is a modified -and more efficient version of the `DetectionOutput` layer type in the original Caffe -implementation of SSD. For a faithful replication of the original layer, please -refer to the `DecodeDetections` layer. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -import tensorflow as tf -import keras.backend as K -from keras.engine.topology import InputSpec -from keras.engine.topology import Layer - - -class DecodeDetectionsFast(Layer): - ''' - A Keras layer to decode the raw SSD prediction output. - - Input shape: - 3D tensor of shape `(batch_size, n_boxes, n_classes + 12)`. - - Output shape: - 3D tensor of shape `(batch_size, top_k, 6)`. - ''' - - def __init__(self, - confidence_thresh=0.01, - iou_threshold=0.45, - top_k=200, - nms_max_output_size=400, - coords='centroids', - normalize_coords=True, - img_height=None, - img_width=None, - **kwargs): - ''' - All default argument values follow the Caffe implementation. - - Arguments: - confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific - positive class in order to be considered for the non-maximum suppression stage for the respective class. - A lower value will result in a larger part of the selection process being done by the non-maximum suppression - stage, while a larger value will result in a larger part of the selection process happening in the confidence - thresholding stage. - iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold` - with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers - to the box score. - top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the - non-maximum suppression stage. - nms_max_output_size (int, optional): The maximum number of predictions that will be left after performing non-maximum - suppression. - coords (str, optional): The box coordinate format that the model outputs. Must be 'centroids' - i.e. the format `(cx, cy, w, h)` (box center coordinates, width, and height). Other coordinate formats are - currently not supported. - normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1]) - and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs - relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`. - Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect - coordinates. Requires `img_height` and `img_width` if set to `True`. - img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`. - img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`. - ''' - if K.backend() != 'tensorflow': - raise TypeError( - "This layer only supports TensorFlow at the moment, but you are using the {} backend.".format(K.backend())) - - if normalize_coords and ((img_height is None) or (img_width is None)): - raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width)) - - if coords != 'centroids': - raise ValueError( - "The DetectionOutput layer currently only supports the 'centroids' coordinate format.") - - # We need these members for the config. - self.confidence_thresh = confidence_thresh - self.iou_threshold = iou_threshold - self.top_k = top_k - self.normalize_coords = normalize_coords - self.img_height = img_height - self.img_width = img_width - self.coords = coords - self.nms_max_output_size = nms_max_output_size - - # We need these members for TensorFlow. - self.tf_confidence_thresh = tf.constant( - self.confidence_thresh, name='confidence_thresh') - self.tf_iou_threshold = tf.constant( - self.iou_threshold, name='iou_threshold') - self.tf_top_k = tf.constant(self.top_k, name='top_k') - self.tf_normalize_coords = tf.constant( - self.normalize_coords, name='normalize_coords') - self.tf_img_height = tf.constant( - self.img_height, dtype=tf.float32, name='img_height') - self.tf_img_width = tf.constant( - self.img_width, dtype=tf.float32, name='img_width') - self.tf_nms_max_output_size = tf.constant( - self.nms_max_output_size, name='nms_max_output_size') - - super(DecodeDetectionsFast, self).__init__(**kwargs) - - def build(self, input_shape): - self.input_spec = [InputSpec(shape=input_shape)] - super(DecodeDetectionsFast, self).build(input_shape) - - def call(self, y_pred, mask=None): - ''' - Returns: - 3D tensor of shape `(batch_size, top_k, 6)`. The second axis is zero-padded - to always yield `top_k` predictions per batch item. The last axis contains - the coordinates for each predicted box in the format - `[class_id, confidence, xmin, ymin, xmax, ymax]`. - ''' - - ##################################################################################### - # 1. Convert the box coordinates from predicted anchor box offsets to predicted - # absolute coordinates - ##################################################################################### - - # Extract the predicted class IDs as the indices of the highest confidence values. - class_ids = tf.expand_dims(tf.to_float( - tf.argmax(y_pred[..., :-12], axis=-1)), axis=-1) - # Extract the confidences of the maximal classes. - confidences = tf.reduce_max(y_pred[..., :-12], axis=-1, keep_dims=True) - - # Convert anchor box offsets to image offsets. - # cx = cx_pred * cx_variance * w_anchor + cx_anchor - cx = y_pred[..., -12] * y_pred[..., -4] * \ - y_pred[..., -6] + y_pred[..., -8] - # cy = cy_pred * cy_variance * h_anchor + cy_anchor - cy = y_pred[..., -11] * y_pred[..., -3] * \ - y_pred[..., -5] + y_pred[..., -7] - # w = exp(w_pred * variance_w) * w_anchor - w = tf.exp(y_pred[..., -10] * y_pred[..., -2]) * y_pred[..., -6] - # h = exp(h_pred * variance_h) * h_anchor - h = tf.exp(y_pred[..., -9] * y_pred[..., -1]) * y_pred[..., -5] - - # Convert 'centroids' to 'corners'. - xmin = cx - 0.5 * w - ymin = cy - 0.5 * h - xmax = cx + 0.5 * w - ymax = cy + 0.5 * h - - # If the model predicts box coordinates relative to the image dimensions and they are supposed - # to be converted back to absolute coordinates, do that. - def normalized_coords(): - xmin1 = tf.expand_dims(xmin * self.tf_img_width, axis=-1) - ymin1 = tf.expand_dims(ymin * self.tf_img_height, axis=-1) - xmax1 = tf.expand_dims(xmax * self.tf_img_width, axis=-1) - ymax1 = tf.expand_dims(ymax * self.tf_img_height, axis=-1) - return xmin1, ymin1, xmax1, ymax1 - - def non_normalized_coords(): - return tf.expand_dims(xmin, axis=-1), tf.expand_dims(ymin, axis=-1), tf.expand_dims(xmax, axis=-1), tf.expand_dims(ymax, axis=-1) - - xmin, ymin, xmax, ymax = tf.cond( - self.tf_normalize_coords, normalized_coords, non_normalized_coords) - - # Concatenate the one-hot class confidences and the converted box coordinates to form the decoded predictions tensor. - y_pred = tf.concat( - values=[class_ids, confidences, xmin, ymin, xmax, ymax], axis=-1) - - ##################################################################################### - # 2. Perform confidence thresholding, non-maximum suppression, and top-k filtering. - ##################################################################################### - - batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32 - n_boxes = tf.shape(y_pred)[1] - n_classes = y_pred.shape[2] - 4 - class_indices = tf.range(1, n_classes) - - # Create a function that filters the predictions for the given batch item. Specifically, it performs: - # - confidence thresholding - # - non-maximum suppression (NMS) - # - top-k filtering - def filter_predictions(batch_item): - - # Keep only the non-background boxes. - positive_boxes = tf.not_equal(batch_item[..., 0], 0.0) - predictions = tf.boolean_mask(tensor=batch_item, - mask=positive_boxes) - - def perform_confidence_thresholding(): - # Apply confidence thresholding. - threshold_met = predictions[:, 1] > self.tf_confidence_thresh - return tf.boolean_mask(tensor=predictions, - mask=threshold_met) - - def no_positive_boxes(): - return tf.constant(value=0.0, shape=(1, 6)) - - # If there are any positive predictions, perform confidence thresholding. - predictions_conf_thresh = tf.cond(tf.equal( - tf.size(predictions), 0), no_positive_boxes, perform_confidence_thresholding) - - def perform_nms(): - scores = predictions_conf_thresh[..., 1] - - # `tf.image.non_max_suppression()` needs the box coordinates in the format `(ymin, xmin, ymax, xmax)`. - xmin = tf.expand_dims( - predictions_conf_thresh[..., -4], axis=-1) - ymin = tf.expand_dims( - predictions_conf_thresh[..., -3], axis=-1) - xmax = tf.expand_dims( - predictions_conf_thresh[..., -2], axis=-1) - ymax = tf.expand_dims( - predictions_conf_thresh[..., -1], axis=-1) - boxes = tf.concat(values=[ymin, xmin, ymax, xmax], axis=-1) - - maxima_indices = tf.image.non_max_suppression(boxes=boxes, - scores=scores, - max_output_size=self.tf_nms_max_output_size, - iou_threshold=self.iou_threshold, - name='non_maximum_suppresion') - maxima = tf.gather(params=predictions_conf_thresh, - indices=maxima_indices, - axis=0) - return maxima - - def no_confident_predictions(): - return tf.constant(value=0.0, shape=(1, 6)) - - # If any boxes made the threshold, perform NMS. - predictions_nms = tf.cond(tf.equal( - tf.size(predictions_conf_thresh), 0), no_confident_predictions, perform_nms) - - # Perform top-k filtering for this batch item or pad it in case there are - # fewer than `self.top_k` boxes left at this point. Either way, produce a - # tensor of length `self.top_k`. By the time we return the final results tensor - # for the whole batch, all batch items must have the same number of predicted - # boxes so that the tensor dimensions are homogenous. If fewer than `self.top_k` - # predictions are left after the filtering process above, we pad the missing - # predictions with zeros as dummy entries. - def top_k(): - return tf.gather(params=predictions_nms, - indices=tf.nn.top_k( - predictions_nms[:, 1], k=self.tf_top_k, sorted=True).indices, - axis=0) - - def pad_and_top_k(): - padded_predictions = tf.pad(tensor=predictions_nms, - paddings=[ - [0, self.tf_top_k - tf.shape(predictions_nms)[0]], [0, 0]], - mode='CONSTANT', - constant_values=0.0) - return tf.gather(params=padded_predictions, - indices=tf.nn.top_k( - padded_predictions[:, 1], k=self.tf_top_k, sorted=True).indices, - axis=0) - - top_k_boxes = tf.cond(tf.greater_equal(tf.shape(predictions_nms)[ - 0], self.tf_top_k), top_k, pad_and_top_k) - - return top_k_boxes - - # Iterate `filter_predictions()` over all batch items. - output_tensor = tf.map_fn(fn=lambda x: filter_predictions(x), - elems=y_pred, - dtype=None, - parallel_iterations=128, - back_prop=False, - swap_memory=False, - infer_shape=True, - name='loop_over_batch') - - return output_tensor - - def compute_output_shape(self, input_shape): - batch_size, n_boxes, last_axis = input_shape - # Last axis: (class_ID, confidence, 4 box coordinates) - return (batch_size, self.tf_top_k, 6) - - def get_config(self): - config = { - 'confidence_thresh': self.confidence_thresh, - 'iou_threshold': self.iou_threshold, - 'top_k': self.top_k, - 'nms_max_output_size': self.nms_max_output_size, - 'coords': self.coords, - 'normalize_coords': self.normalize_coords, - 'img_height': self.img_height, - 'img_width': self.img_width, - } - base_config = super(DecodeDetectionsFast, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -''' -A custom Keras layer to perform L2-normalization. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -import keras.backend as K -from keras.engine.topology import InputSpec -from keras.engine.topology import Layer - - -class L2Normalization(Layer): - ''' - Performs L2 normalization on the input tensor with a learnable scaling parameter - as described in the paper "Parsenet: Looking Wider to See Better" (see references) - and as used in the original SSD model. - - Arguments: - gamma_init (int): The initial scaling parameter. Defaults to 20 following the - SSD paper. - - Input shape: - 4D tensor of shape `(batch, channels, height, width)` if `dim_ordering = 'th'` - or `(batch, height, width, channels)` if `dim_ordering = 'tf'`. - - Returns: - The scaled tensor. Same shape as the input tensor. - - References: - http://cs.unc.edu/~wliu/papers/parsenet.pdf - ''' - - def __init__(self, gamma_init=20, **kwargs): - if K.image_dim_ordering() == 'tf': - self.axis = 3 - else: - self.axis = 1 - self.gamma_init = gamma_init - super(L2Normalization, self).__init__(**kwargs) - - def build(self, input_shape): - self.input_spec = [InputSpec(shape=input_shape)] - gamma = self.gamma_init * np.ones((input_shape[self.axis],)) - self.gamma = K.variable(gamma, name='{}_gamma'.format(self.name)) - self.trainable_weights = [self.gamma] - super(L2Normalization, self).build(input_shape) - - def call(self, x, mask=None): - output = K.l2_normalize(x, self.axis) - return output * self.gamma - - def get_config(self): - config = { - 'gamma_init': self.gamma_init - } - base_config = super(L2Normalization, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -''' -The Keras-compatible loss function for the SSD model. Currently supports TensorFlow only. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import tensorflow as tf - - -class SSDLoss: - ''' - The SSD loss, see https://arxiv.org/abs/1512.02325. - ''' - - def __init__(self, - neg_pos_ratio=3, - n_neg_min=0, - alpha=1.0): - ''' - Arguments: - neg_pos_ratio (int, optional): The maximum ratio of negative (i.e. background) - to positive ground truth boxes to include in the loss computation. - There are no actual background ground truth boxes of course, but `y_true` - contains anchor boxes labeled with the background class. Since - the number of background boxes in `y_true` will usually exceed - the number of positive boxes by far, it is necessary to balance - their influence on the loss. Defaults to 3 following the paper. - n_neg_min (int, optional): The minimum number of negative ground truth boxes to - enter the loss computation *per batch*. This argument can be used to make - sure that the model learns from a minimum number of negatives in batches - in which there are very few, or even none at all, positive ground truth - boxes. It defaults to 0 and if used, it should be set to a value that - stands in reasonable proportion to the batch size used for training. - alpha (float, optional): A factor to weight the localization loss in the - computation of the total loss. Defaults to 1.0 following the paper. - ''' - self.neg_pos_ratio = neg_pos_ratio - self.n_neg_min = n_neg_min - self.alpha = alpha - - def smooth_L1_loss(self, y_true, y_pred): - ''' - Compute smooth L1 loss, see references. - - Arguments: - y_true (nD tensor): A TensorFlow tensor of any shape containing the ground truth data. - In this context, the expected tensor has shape `(batch_size, #boxes, 4)` and - contains the ground truth bounding box coordinates, where the last dimension - contains `(xmin, xmax, ymin, ymax)`. - y_pred (nD tensor): A TensorFlow tensor of identical structure to `y_true` containing - the predicted data, in this context the predicted bounding box coordinates. - - Returns: - The smooth L1 loss, a nD-1 Tensorflow tensor. In this context a 2D tensor - of shape (batch, n_boxes_total). - - References: - https://arxiv.org/abs/1504.08083 - ''' - absolute_loss = tf.abs(y_true - y_pred) - square_loss = 0.5 * (y_true - y_pred)**2 - l1_loss = tf.where(tf.less(absolute_loss, 1.0), - square_loss, absolute_loss - 0.5) - return tf.reduce_sum(l1_loss, axis=-1) - - def log_loss(self, y_true, y_pred): - ''' - Compute the softmax log loss. - - Arguments: - y_true (nD tensor): A TensorFlow tensor of any shape containing the ground truth data. - In this context, the expected tensor has shape (batch_size, #boxes, #classes) - and contains the ground truth bounding box categories. - y_pred (nD tensor): A TensorFlow tensor of identical structure to `y_true` containing - the predicted data, in this context the predicted bounding box categories. - - Returns: - The softmax log loss, a nD-1 Tensorflow tensor. In this context a 2D tensor - of shape (batch, n_boxes_total). - ''' - # Make sure that `y_pred` doesn't contain any zeros (which would break the log function) - y_pred = tf.maximum(y_pred, 1e-15) - # Compute the log loss - log_loss = -tf.reduce_sum(y_true * tf.log(y_pred), axis=-1) - return log_loss - - def compute_loss(self, y_true, y_pred): - ''' - Compute the loss of the SSD model prediction against the ground truth. - - Arguments: - y_true (array): A Numpy array of shape `(batch_size, #boxes, #classes + 12)`, - where `#boxes` is the total number of boxes that the model predicts - per image. Be careful to make sure that the index of each given - box in `y_true` is the same as the index for the corresponding - box in `y_pred`. The last axis must have length `#classes + 12` and contain - `[classes one-hot encoded, 4 ground truth box coordinate offsets, 8 arbitrary entries]` - in this order, including the background class. The last eight entries of the - last axis are not used by this function and therefore their contents are - irrelevant, they only exist so that `y_true` has the same shape as `y_pred`, - where the last four entries of the last axis contain the anchor box - coordinates, which are needed during inference. Important: Boxes that - you want the cost function to ignore need to have a one-hot - class vector of all zeros. - y_pred (Keras tensor): The model prediction. The shape is identical - to that of `y_true`, i.e. `(batch_size, #boxes, #classes + 12)`. - The last axis must contain entries in the format - `[classes one-hot encoded, 4 predicted box coordinate offsets, 8 arbitrary entries]`. - - Returns: - A scalar, the total multitask loss for classification and localization. - ''' - self.neg_pos_ratio = tf.constant(self.neg_pos_ratio) - self.n_neg_min = tf.constant(self.n_neg_min) - self.alpha = tf.constant(self.alpha) - - batch_size = tf.shape(y_pred)[0] # Output dtype: tf.int32 - # Output dtype: tf.int32, note that `n_boxes` in this context denotes the total number of boxes per image, not the number of boxes per cell. - n_boxes = tf.shape(y_pred)[1] - - # 1: Compute the losses for class and box predictions for every box. - - classification_loss = tf.to_float(self.log_loss( - y_true[:, :, :-12], y_pred[:, :, :-12])) # Output shape: (batch_size, n_boxes) - localization_loss = tf.to_float(self.smooth_L1_loss( - y_true[:, :, -12:-8], y_pred[:, :, -12:-8])) # Output shape: (batch_size, n_boxes) - - # 2: Compute the classification losses for the positive and negative targets. - - # Create masks for the positive and negative ground truth classes. - negatives = y_true[:, :, 0] # Tensor of shape (batch_size, n_boxes) - # Tensor of shape (batch_size, n_boxes) - positives = tf.to_float(tf.reduce_max(y_true[:, :, 1:-12], axis=-1)) - - # Count the number of positive boxes (classes 1 to n) in y_true across the whole batch. - n_positive = tf.reduce_sum(positives) - - # Now mask all negative boxes and sum up the losses for the positive boxes PER batch item - # (Keras loss functions must output one scalar loss value PER batch item, rather than just - # one scalar for the entire batch, that's why we're not summing across all axes). - # Tensor of shape (batch_size,) - pos_class_loss = tf.reduce_sum( - classification_loss * positives, axis=-1) - - # Compute the classification loss for the negative default boxes (if there are any). - - # First, compute the classification loss for all negative boxes. - neg_class_loss_all = classification_loss * \ - negatives # Tensor of shape (batch_size, n_boxes) - # The number of non-zero loss entries in `neg_class_loss_all` - n_neg_losses = tf.count_nonzero(neg_class_loss_all, dtype=tf.int32) - # What's the point of `n_neg_losses`? For the next step, which will be to compute which negative boxes enter the classification - # loss, we don't just want to know how many negative ground truth boxes there are, but for how many of those there actually is - # a positive (i.e. non-zero) loss. This is necessary because `tf.nn.top-k()` in the function below will pick the top k boxes with - # the highest losses no matter what, even if it receives a vector where all losses are zero. In the unlikely event that all negative - # classification losses ARE actually zero though, this behavior might lead to `tf.nn.top-k()` returning the indices of positive - # boxes, leading to an incorrect negative classification loss computation, and hence an incorrect overall loss computation. - # We therefore need to make sure that `n_negative_keep`, which assumes the role of the `k` argument in `tf.nn.top-k()`, - # is at most the number of negative boxes for which there is a positive classification loss. - - # Compute the number of negative examples we want to account for in the loss. - # We'll keep at most `self.neg_pos_ratio` times the number of positives in `y_true`, but at least `self.n_neg_min` (unless `n_neg_loses` is smaller). - n_negative_keep = tf.minimum(tf.maximum( - self.neg_pos_ratio * tf.to_int32(n_positive), self.n_neg_min), n_neg_losses) - - # In the unlikely case when either (1) there are no negative ground truth boxes at all - # or (2) the classification loss for all negative boxes is zero, return zero as the `neg_class_loss`. - def f1(): - return tf.zeros([batch_size]) - # Otherwise compute the negative loss. - - def f2(): - # Now we'll identify the top-k (where k == `n_negative_keep`) boxes with the highest confidence loss that - # belong to the background class in the ground truth data. Note that this doesn't necessarily mean that the model - # predicted the wrong class for those boxes, it just means that the loss for those boxes is the highest. - - # To do this, we reshape `neg_class_loss_all` to 1D... - # Tensor of shape (batch_size * n_boxes,) - neg_class_loss_all_1D = tf.reshape(neg_class_loss_all, [-1]) - # ...and then we get the indices for the `n_negative_keep` boxes with the highest loss out of those... - values, indices = tf.nn.top_k(neg_class_loss_all_1D, - k=n_negative_keep, - sorted=False) # We don't need them sorted. - # ...and with these indices we'll create a mask... - negatives_keep = tf.scatter_nd(indices=tf.expand_dims(indices, axis=1), - updates=tf.ones_like( - indices, dtype=tf.int32), - shape=tf.shape(neg_class_loss_all_1D)) # Tensor of shape (batch_size * n_boxes,) - # Tensor of shape (batch_size, n_boxes) - negatives_keep = tf.to_float(tf.reshape( - negatives_keep, [batch_size, n_boxes])) - # ...and use it to keep only those boxes and mask all other classification losses - # Tensor of shape (batch_size,) - neg_class_loss = tf.reduce_sum( - classification_loss * negatives_keep, axis=-1) - return neg_class_loss - - neg_class_loss = tf.cond( - tf.equal(n_neg_losses, tf.constant(0)), f1, f2) - - # Tensor of shape (batch_size,) - class_loss = pos_class_loss + neg_class_loss - - # 3: Compute the localization loss for the positive targets. - # We don't compute a localization loss for negative predicted boxes (obviously: there are no ground truth boxes they would correspond to). - - # Tensor of shape (batch_size,) - loc_loss = tf.reduce_sum(localization_loss * positives, axis=-1) - - # 4: Compute the total loss. - - total_loss = (class_loss + self.alpha * loc_loss) / \ - tf.maximum(1.0, n_positive) # In case `n_positive == 0` - # Keras has the annoying habit of dividing the loss by the batch size, which sucks in our case - # because the relevant criterion to average our loss over is the number of positive boxes in the batch - # (by which we're dividing in the line above), not the batch size. So in order to revert Keras' averaging - # over the batch size, we'll have to multiply by it. - total_loss = total_loss * tf.to_float(batch_size) - - return total_loss -''' -Utilities that are useful to sub- or up-sample weights tensors. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -import numpy as np - - -def sample_tensors(weights_list, sampling_instructions, axes=None, init=None, mean=0.0, stddev=0.005): - ''' - Can sub-sample and/or up-sample individual dimensions of the tensors in the given list - of input tensors. - - It is possible to sub-sample some dimensions and up-sample other dimensions at the same time. - - The tensors in the list will be sampled consistently, i.e. for any given dimension that - corresponds among all tensors in the list, the same elements will be picked for every tensor - along that dimension. - - For dimensions that are being sub-sampled, you can either provide a list of the indices - that should be picked, or you can provide the number of elements to be sub-sampled, in which - case the elements will be chosen at random. - - For dimensions that are being up-sampled, "filler" elements will be insterted at random - positions along the respective dimension. These filler elements will be initialized either - with zero or from a normal distribution with selectable mean and standard deviation. - - Arguments: - weights_list (list): A list of Numpy arrays. Each array represents one of the tensors - to be sampled. The tensor with the greatest number of dimensions must be the first - element in the list. For example, in the case of the weights of a 2D convolutional - layer, the kernel must be the first element in the list and the bias the second, - not the other way around. For all tensors in the list after the first tensor, the - lengths of each of their axes must identical to the length of some axis of the - first tensor. - sampling_instructions (list): A list that contains the sampling instructions for each - dimension of the first tensor. If the first tensor has `n` dimensions, then this - must be a list of length `n`. That means, sampling instructions for every dimension - of the first tensor must still be given even if not all dimensions should be changed. - The elements of this list can be either lists of integers or integers. If the sampling - instruction for a given dimension is a list of integers, then these integers represent - the indices of the elements of that dimension that will be sub-sampled. If the sampling - instruction for a given dimension is an integer, then that number of elements will be - sampled along said dimension. If the integer is greater than the number of elements - of the input tensors in that dimension, that dimension will be up-sampled. If the integer - is smaller than the number of elements of the input tensors in that dimension, that - dimension will be sub-sampled. If the integer is equal to the number of elements - of the input tensors in that dimension, that dimension will remain the same. - axes (list, optional): Only relevant if `weights_list` contains more than one tensor. - This list contains a list for each additional tensor in `weights_list` beyond the first. - Each of these lists contains integers that determine to which axes of the first tensor - the axes of the respective tensor correspond. For example, let the first tensor be a - 4D tensor and the second tensor in the list be a 2D tensor. If the first element of - `axis` is the list `[2,3]`, then that means that the two axes of the second tensor - correspond to the last two axes of the first tensor, in the same order. The point of - this list is for the program to know, if a given dimension of the first tensor is to - be sub- or up-sampled, which dimensions of the other tensors in the list must be - sub- or up-sampled accordingly. - init (list, optional): Only relevant for up-sampling. Must be `None` or a list of strings - that determines for each tensor in `weights_list` how the newly inserted values should - be initialized. The possible values are 'gaussian' for initialization from a normal - distribution with the selected mean and standard deviation (see the following two arguments), - or 'zeros' for zero-initialization. If `None`, all initializations default to - 'gaussian'. - mean (float, optional): Only relevant for up-sampling. The mean of the values that will - be inserted into the tensors at random in the case of up-sampling. - stddev (float, optional): Only relevant for up-sampling. The standard deviation of the - values that will be inserted into the tensors at random in the case of up-sampling. - - Returns: - A list containing the sampled tensors in the same order in which they were given. - ''' - - first_tensor = weights_list[0] - - if (not isinstance(sampling_instructions, (list, tuple))) or (len(sampling_instructions) != first_tensor.ndim): - raise ValueError( - "The sampling instructions must be a list whose length is the number of dimensions of the first tensor in `weights_list`.") - - if (not init is None) and len(init) != len(weights_list): - raise ValueError( - "`init` must either be `None` or a list of strings that has the same length as `weights_list`.") - - up_sample = [] # Store the dimensions along which we need to up-sample. - out_shape = [] # Store the shape of the output tensor here. - # Store two stages of the new (sub-sampled and/or up-sampled) weights tensors in the following two lists. - # Tensors after sub-sampling, but before up-sampling (if any). - subsampled_weights_list = [] - # Sub-sampled tensors after up-sampling (if any), i.e. final output tensors. - upsampled_weights_list = [] - - # Create the slicing arrays from the sampling instructions. - sampling_slices = [] - for i, sampling_inst in enumerate(sampling_instructions): - if isinstance(sampling_inst, (list, tuple)): - amax = np.amax(np.array(sampling_inst)) - if amax >= first_tensor.shape[i]: - raise ValueError( - "The sample instructions for dimension {} contain index {}, which is greater than the length of that dimension.".format(i, amax)) - sampling_slices.append(np.array(sampling_inst)) - out_shape.append(len(sampling_inst)) - elif isinstance(sampling_inst, int): - out_shape.append(sampling_inst) - if sampling_inst == first_tensor.shape[i]: - # Nothing to sample here, we're keeping the original number of elements along this axis. - sampling_slice = np.arange(sampling_inst) - sampling_slices.append(sampling_slice) - elif sampling_inst < first_tensor.shape[i]: - # We want to SUB-sample this dimension. Randomly pick `sample_inst` many elements from it. - # We will always sample class 0, the background class. - sampling_slice1 = np.array([0]) - # Sample the rest of the classes. - sampling_slice2 = np.sort(np.random.choice( - np.arange(1, first_tensor.shape[i]), sampling_inst - 1, replace=False)) - sampling_slice = np.concatenate( - [sampling_slice1, sampling_slice2]) - sampling_slices.append(sampling_slice) - else: - # We want to UP-sample. Pick all elements from this dimension. - sampling_slice = np.arange(first_tensor.shape[i]) - sampling_slices.append(sampling_slice) - up_sample.append(i) - else: - raise ValueError( - "Each element of the sampling instructions must be either an integer or a list/tuple of integers, but received `{}`".format(type(sampling_inst))) - - # Process the first tensor. - subsampled_first_tensor = np.copy(first_tensor[np.ix_(*sampling_slices)]) - subsampled_weights_list.append(subsampled_first_tensor) - - # Process the other tensors. - if len(weights_list) > 1: - for j in range(1, len(weights_list)): - # Get the sampling slices for this tensor. - this_sampling_slices = [sampling_slices[i] for i in axes[j-1]] - subsampled_weights_list.append( - np.copy(weights_list[j][np.ix_(*this_sampling_slices)])) - - if up_sample: - # Take care of the dimensions that are to be up-sampled. - - out_shape = np.array(out_shape) - - # Process the first tensor. - if init is None or init[0] == 'gaussian': - upsampled_first_tensor = np.random.normal( - loc=mean, scale=stddev, size=out_shape) - elif init[0] == 'zeros': - upsampled_first_tensor = np.zeros(out_shape) - else: - raise ValueError( - "Valid initializations are 'gaussian' and 'zeros', but received '{}'.".format(init[0])) - # Pick the indices of the elements in `upsampled_first_tensor` that should be occupied by `subsampled_first_tensor`. - up_sample_slices = [np.arange(k) - for k in subsampled_first_tensor.shape] - for i in up_sample: - # Randomly select across which indices of this dimension to scatter the elements of `new_weights_tensor` in this dimension. - up_sample_slice1 = np.array([0]) - up_sample_slice2 = np.sort(np.random.choice(np.arange( - 1, upsampled_first_tensor.shape[i]), subsampled_first_tensor.shape[i] - 1, replace=False)) - up_sample_slices[i] = np.concatenate( - [up_sample_slice1, up_sample_slice2]) - upsampled_first_tensor[np.ix_( - *up_sample_slices)] = subsampled_first_tensor - upsampled_weights_list.append(upsampled_first_tensor) - - # Process the other tensors - if len(weights_list) > 1: - for j in range(1, len(weights_list)): - if init is None or init[j] == 'gaussian': - upsampled_tensor = np.random.normal( - loc=mean, scale=stddev, size=out_shape[axes[j-1]]) - elif init[j] == 'zeros': - upsampled_tensor = np.zeros(out_shape[axes[j-1]]) - else: - raise ValueError( - "Valid initializations are 'gaussian' and 'zeros', but received '{}'.".format(init[j])) - # Get the up-sampling slices for this tensor. - this_up_sample_slices = [up_sample_slices[i] - for i in axes[j-1]] - upsampled_tensor[np.ix_( - *this_up_sample_slices)] = subsampled_weights_list[j] - upsampled_weights_list.append(upsampled_tensor) - - return upsampled_weights_list - else: - return subsampled_weights_list -''' -A Keras port of the original Caffe SSD300 network. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -from keras.models import Model -from keras.layers import Input, Lambda, Activation, Conv2D, MaxPooling2D, ZeroPadding2D, Reshape, Concatenate -from keras.regularizers import l2 -import keras.backend as K - -from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes -from keras_layers.keras_layer_L2Normalization import L2Normalization -from keras_layers.keras_layer_DecodeDetections import DecodeDetections -from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast - - -def ssd_300(image_size, - n_classes, - mode='training', - l2_regularization=0.0005, - min_scale=None, - max_scale=None, - scales=None, - aspect_ratios_global=None, - aspect_ratios_per_layer=[[1.0, 2.0, 0.5], - [1.0, 2.0, 0.5, 3.0, 1.0/3.0], - [1.0, 2.0, 0.5, 3.0, 1.0/3.0], - [1.0, 2.0, 0.5, 3.0, 1.0/3.0], - [1.0, 2.0, 0.5], - [1.0, 2.0, 0.5]], - two_boxes_for_ar1=True, - steps=[8, 16, 32, 64, 100, 300], - offsets=None, - clip_boxes=False, - variances=[0.1, 0.1, 0.2, 0.2], - coords='centroids', - normalize_coords=True, - subtract_mean=[123, 117, 104], - divide_by_stddev=None, - swap_channels=[2, 1, 0], - confidence_thresh=0.01, - iou_threshold=0.45, - top_k=200, - nms_max_output_size=400, - return_predictor_sizes=False): - ''' - Build a Keras model with SSD300 architecture, see references. - - The base network is a reduced atrous VGG-16, extended by the SSD architecture, - as described in the paper. - - Most of the arguments that this function takes are only needed for the anchor - box layers. In case you're training the network, the parameters passed here must - be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading - trained weights, the parameters passed here must be the same as the ones used - to produce the trained weights. - - Some of these arguments are explained in more detail in the documentation of the - `SSDBoxEncoder` class. - - Note: Requires Keras v2.0 or later. Currently works only with the - TensorFlow backend (v1.0 or later). - - Arguments: - image_size (tuple): The input image size in the format `(height, width, channels)`. - n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. - mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode, - the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes, - the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding, - non-maximum suppression, and top-k filtering. The difference between latter two modes is that - 'inference' follows the exact procedure of the original Caffe implementation, while - 'inference_fast' uses a faster prediction decoding procedure. - l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers. - Set to zero to deactivate L2-regularization. - min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction - of the shorter side of the input images. - max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction - of the shorter side of the input images. All scaling factors between the smallest and the - largest will be linearly interpolated. Note that the second to last of the linearly interpolated - scaling factors will actually be the scaling factor for the last predictor layer, while the last - scaling factor is used for the second box for aspect ratio 1 in the last predictor layer - if `two_boxes_for_ar1` is `True`. - scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer. - This list must be one element longer than the number of predictor layers. The first `k` elements are the - scaling factors for the `k` predictor layers, while the last element is used for the second box - for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional - last scaling factor must be passed either way, even if it is not being used. If a list is passed, - this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. - aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be - generated. This list is valid for all prediction layers. - aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer. - This allows you to set the aspect ratios for each predictor layer individually, which is the case for the - original SSD300 implementation. If a list is passed, it overrides `aspect_ratios_global`. - two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. - If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated - using the scaling factor for the respective layer, the second one will be generated using - geometric mean of said scaling factor and next bigger scaling factor. - steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be - either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many - pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over - the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. - If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. - If no steps are provided, then they will be computed such that the anchor box center points will form an - equidistant grid within the image dimensions. - offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be - either floats or tuples of two floats. These numbers represent for each predictor layer how many - pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be - as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions - of the step size specified in the `steps` argument. If the list contains floats, then that value will - be used for both spatial dimensions. If the list contains tuples of two floats, then they represent - `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size. - clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries. - variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by - its respective variance value. - coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format - of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, - and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. - normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates, - i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. - subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values - of any shape that is broadcast-compatible with the image shape. The elements of this array will be - subtracted from the image pixel intensity values. For example, pass a list of three integers - to perform per-channel mean normalization for color images. - divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or - floating point values of any shape that is broadcast-compatible with the image shape. The image pixel - intensity values will be divided by the elements of this array. For example, pass a list - of three integers to perform per-channel standard deviation normalization for color images. - swap_channels (list, optional): Either `False` or a list of integers representing the desired order in which the input - image channels should be swapped. - confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific - positive class in order to be considered for the non-maximum suppression stage for the respective class. - A lower value will result in a larger part of the selection process being done by the non-maximum suppression - stage, while a larger value will result in a larger part of the selection process happening in the confidence - thresholding stage. - iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold` - with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers - to the box's confidence score. - top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the - non-maximum suppression stage. - nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage. - return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also - a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since - you can always get their sizes easily via the Keras API, but it's convenient and less error-prone - to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the - spatial dimensions of the predictor layers), for inference you don't need them. - - Returns: - model: The Keras SSD300 model. - predictor_sizes (optional): A Numpy array containing the `(height, width)` portion - of the output tensor shape for each convolutional predictor layer. During - training, the generator function needs this in order to transform - the ground truth labels into tensors of identical structure as the - output tensors of the model, which is in turn needed for the cost - function. - - References: - https://arxiv.org/abs/1512.02325v5 - ''' - - # The number of predictor conv layers in the network is 6 for the original SSD300. - n_predictor_layers = 6 - n_classes += 1 # Account for the background class. - l2_reg = l2_regularization # Make the internal name shorter. - img_height, img_width, img_channels = image_size[0], image_size[1], image_size[2] - - ############################################################################ - # Get a few exceptions out of the way. - ############################################################################ - - if aspect_ratios_global is None and aspect_ratios_per_layer is None: - raise ValueError( - "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified.") - if aspect_ratios_per_layer: - if len(aspect_ratios_per_layer) != n_predictor_layers: - raise ValueError("It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}.".format( - n_predictor_layers, len(aspect_ratios_per_layer))) - - if (min_scale is None or max_scale is None) and scales is None: - raise ValueError( - "Either `min_scale` and `max_scale` or `scales` need to be specified.") - if scales: - if len(scales) != n_predictor_layers+1: - raise ValueError("It must be either scales is None or len(scales) == {}, but len(scales) == {}.".format( - n_predictor_layers+1, len(scales))) - else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` - scales = np.linspace(min_scale, max_scale, n_predictor_layers+1) - - if len(variances) != 4: - raise ValueError( - "4 variance values must be pased, but {} values were received.".format(len(variances))) - variances = np.array(variances) - if np.any(variances <= 0): - raise ValueError( - "All variances must be >0, but the variances given are {}".format(variances)) - - if (not (steps is None)) and (len(steps) != n_predictor_layers): - raise ValueError( - "You must provide at least one step value per predictor layer.") - - if (not (offsets is None)) and (len(offsets) != n_predictor_layers): - raise ValueError( - "You must provide at least one offset value per predictor layer.") - - ############################################################################ - # Compute the anchor box parameters. - ############################################################################ - - # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. - if aspect_ratios_per_layer: - aspect_ratios = aspect_ratios_per_layer - else: - aspect_ratios = [aspect_ratios_global] * n_predictor_layers - - # Compute the number of boxes to be predicted per cell for each predictor layer. - # We need this so that we know how many channels the predictor layers need to have. - if aspect_ratios_per_layer: - n_boxes = [] - for ar in aspect_ratios_per_layer: - if (1 in ar) & two_boxes_for_ar1: - # +1 for the second box for aspect ratio 1 - n_boxes.append(len(ar) + 1) - else: - n_boxes.append(len(ar)) - else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer - if (1 in aspect_ratios_global) & two_boxes_for_ar1: - n_boxes = len(aspect_ratios_global) + 1 - else: - n_boxes = len(aspect_ratios_global) - n_boxes = [n_boxes] * n_predictor_layers - - if steps is None: - steps = [None] * n_predictor_layers - if offsets is None: - offsets = [None] * n_predictor_layers - - ############################################################################ - # Define functions for the Lambda layers below. - ############################################################################ - - def identity_layer(tensor): - return tensor - - def input_mean_normalization(tensor): - return tensor - np.array(subtract_mean) - - def input_stddev_normalization(tensor): - return tensor / np.array(divide_by_stddev) - - def input_channel_swap(tensor): - if len(swap_channels) == 3: - return K.stack([tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]]], axis=-1) - elif len(swap_channels) == 4: - return K.stack([tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]], tensor[..., swap_channels[3]]], axis=-1) - - ############################################################################ - # Build the network. - ############################################################################ - - x = Input(shape=(img_height, img_width, img_channels)) - - # The following identity layer is only needed so that the subsequent lambda layers can be optional. - x1 = Lambda(identity_layer, output_shape=( - img_height, img_width, img_channels), name='identity_layer')(x) - if not (subtract_mean is None): - x1 = Lambda(input_mean_normalization, output_shape=( - img_height, img_width, img_channels), name='input_mean_normalization')(x1) - if not (divide_by_stddev is None): - x1 = Lambda(input_stddev_normalization, output_shape=( - img_height, img_width, img_channels), name='input_stddev_normalization')(x1) - if swap_channels: - x1 = Lambda(input_channel_swap, output_shape=( - img_height, img_width, img_channels), name='input_channel_swap')(x1) - - conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_1')(x1) - conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_2')(conv1_1) - pool1 = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 2), padding='same', name='pool1')(conv1_2) - - conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_1')(pool1) - conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_2')(conv2_1) - pool2 = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 2), padding='same', name='pool2')(conv2_2) - - conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_1')(pool2) - conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_2')(conv3_1) - conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_3')(conv3_2) - pool3 = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 2), padding='same', name='pool3')(conv3_3) - - conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_1')(pool3) - conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_2')(conv4_1) - conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3')(conv4_2) - pool4 = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 2), padding='same', name='pool4')(conv4_3) - - conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_1')(pool4) - conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_2')(conv5_1) - conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_3')(conv5_2) - pool5 = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), padding='same', name='pool5')(conv5_3) - - fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc6')(pool5) - - fc7 = Conv2D(1024, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7')(fc6) - - conv6_1 = Conv2D(256, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_1')(fc7) - conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), - name='conv6_padding')(conv6_1) - conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='valid', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2')(conv6_1) - - conv7_1 = Conv2D(128, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_1')(conv6_2) - conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), - name='conv7_padding')(conv7_1) - conv7_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2')(conv7_1) - - conv8_1 = Conv2D(128, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_1')(conv7_2) - conv8_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2')(conv8_1) - - conv9_1 = Conv2D(128, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_1')(conv8_2) - conv9_2 = Conv2D(256, (3, 3), strides=(1, 1), activation='relu', padding='valid', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2')(conv9_1) - - # Feed conv4_3 into the L2 normalization layer - conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3) - - # Build the convolutional predictor layers on top of the base network - - # We precidt `n_classes` confidence values for each box, hence the confidence predictors have depth `n_boxes * n_classes` - # Output shape of the confidence layers: `(batch, height, width, n_boxes * n_classes)` - conv4_3_norm_mbox_conf = Conv2D(n_boxes[0] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv4_3_norm_mbox_conf')(conv4_3_norm) - fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3), padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7_mbox_conf')(fc7) - conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3), padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2_mbox_conf')(conv6_2) - conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3), padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2_mbox_conf')(conv7_2) - conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3), padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2_mbox_conf')(conv8_2) - conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3), padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2_mbox_conf')(conv9_2) - # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4` - # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)` - conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv4_3_norm_mbox_loc')(conv4_3_norm) - fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='fc7_mbox_loc')(fc7) - conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv6_2_mbox_loc')(conv6_2) - conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv7_2_mbox_loc')(conv7_2) - conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv8_2_mbox_loc')(conv8_2) - conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv9_2_mbox_loc')(conv9_2) - - # Generate the anchor boxes (called "priors" in the original Caffe/C++ implementation, so I'll keep their layer names) - - # Output shape of anchors: `(batch, height, width, n_boxes, 8)` - conv4_3_norm_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 0], this_offsets=offsets[0], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc) - fc7_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 1], this_offsets=offsets[1], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='fc7_mbox_priorbox')(fc7_mbox_loc) - conv6_2_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 2], this_offsets=offsets[2], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc) - conv7_2_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 3], this_offsets=offsets[3], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc) - conv8_2_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[4], next_scale=scales[5], aspect_ratios=aspect_ratios[4], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 4], this_offsets=offsets[4], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc) - conv9_2_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[5], next_scale=scales[6], aspect_ratios=aspect_ratios[5], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 5], this_offsets=offsets[5], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc) - - # Reshape - - # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` - # We want the classes isolated in the last axis to perform softmax on them - conv4_3_norm_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf) - fc7_mbox_conf_reshape = Reshape( - (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf) - conv6_2_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf) - conv7_2_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf) - conv8_2_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf) - conv9_2_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf) - # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` - # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss - conv4_3_norm_mbox_loc_reshape = Reshape( - (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc) - fc7_mbox_loc_reshape = Reshape( - (-1, 4), name='fc7_mbox_loc_reshape')(fc7_mbox_loc) - conv6_2_mbox_loc_reshape = Reshape( - (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc) - conv7_2_mbox_loc_reshape = Reshape( - (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc) - conv8_2_mbox_loc_reshape = Reshape( - (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc) - conv9_2_mbox_loc_reshape = Reshape( - (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc) - # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` - conv4_3_norm_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox) - fc7_mbox_priorbox_reshape = Reshape( - (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox) - conv6_2_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox) - conv7_2_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox) - conv8_2_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox) - conv9_2_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox) - - # Concatenate the predictions from the different layers - - # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, - # so we want to concatenate along axis 1, the number of boxes per layer - # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes) - mbox_conf = Concatenate(axis=1, name='mbox_conf')([conv4_3_norm_mbox_conf_reshape, - fc7_mbox_conf_reshape, - conv6_2_mbox_conf_reshape, - conv7_2_mbox_conf_reshape, - conv8_2_mbox_conf_reshape, - conv9_2_mbox_conf_reshape]) - - # Output shape of `mbox_loc`: (batch, n_boxes_total, 4) - mbox_loc = Concatenate(axis=1, name='mbox_loc')([conv4_3_norm_mbox_loc_reshape, - fc7_mbox_loc_reshape, - conv6_2_mbox_loc_reshape, - conv7_2_mbox_loc_reshape, - conv8_2_mbox_loc_reshape, - conv9_2_mbox_loc_reshape]) - - # Output shape of `mbox_priorbox`: (batch, n_boxes_total, 8) - mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([conv4_3_norm_mbox_priorbox_reshape, - fc7_mbox_priorbox_reshape, - conv6_2_mbox_priorbox_reshape, - conv7_2_mbox_priorbox_reshape, - conv8_2_mbox_priorbox_reshape, - conv9_2_mbox_priorbox_reshape]) - - # The box coordinate predictions will go into the loss function just the way they are, - # but for the class predictions, we'll apply a softmax activation layer first - mbox_conf_softmax = Activation( - 'softmax', name='mbox_conf_softmax')(mbox_conf) - - # Concatenate the class and box predictions and the anchors to one large predictions vector - # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) - predictions = Concatenate(axis=2, name='predictions')( - [mbox_conf_softmax, mbox_loc, mbox_priorbox]) - - if mode == 'training': - model = Model(inputs=x, outputs=predictions) - elif mode == 'inference': - decoded_predictions = DecodeDetections(confidence_thresh=confidence_thresh, - iou_threshold=iou_threshold, - top_k=top_k, - nms_max_output_size=nms_max_output_size, - coords=coords, - normalize_coords=normalize_coords, - img_height=img_height, - img_width=img_width, - name='decoded_predictions')(predictions) - model = Model(inputs=x, outputs=decoded_predictions) - elif mode == 'inference_fast': - decoded_predictions = DecodeDetectionsFast(confidence_thresh=confidence_thresh, - iou_threshold=iou_threshold, - top_k=top_k, - nms_max_output_size=nms_max_output_size, - coords=coords, - normalize_coords=normalize_coords, - img_height=img_height, - img_width=img_width, - name='decoded_predictions')(predictions) - model = Model(inputs=x, outputs=decoded_predictions) - else: - raise ValueError( - "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'.".format(mode)) - - if return_predictor_sizes: - predictor_sizes = np.array([conv4_3_norm_mbox_conf._keras_shape[1:3], - fc7_mbox_conf._keras_shape[1:3], - conv6_2_mbox_conf._keras_shape[1:3], - conv7_2_mbox_conf._keras_shape[1:3], - conv8_2_mbox_conf._keras_shape[1:3], - conv9_2_mbox_conf._keras_shape[1:3]]) - return model, predictor_sizes - else: - return model -''' -A Keras port of the original Caffe SSD512 network. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -from keras.models import Model -from keras.layers import Input, Lambda, Activation, Conv2D, MaxPooling2D, ZeroPadding2D, Reshape, Concatenate -from keras.regularizers import l2 -import keras.backend as K - -from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes -from keras_layers.keras_layer_L2Normalization import L2Normalization -from keras_layers.keras_layer_DecodeDetections import DecodeDetections -from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast - - -def ssd_512(image_size, - n_classes, - mode='training', - l2_regularization=0.0005, - min_scale=None, - max_scale=None, - scales=None, - aspect_ratios_global=None, - aspect_ratios_per_layer=[[1.0, 2.0, 0.5], - [1.0, 2.0, 0.5, 3.0, 1.0/3.0], - [1.0, 2.0, 0.5, 3.0, 1.0/3.0], - [1.0, 2.0, 0.5, 3.0, 1.0/3.0], - [1.0, 2.0, 0.5, 3.0, 1.0/3.0], - [1.0, 2.0, 0.5], - [1.0, 2.0, 0.5]], - two_boxes_for_ar1=True, - steps=[8, 16, 32, 64, 128, 256, 512], - offsets=None, - clip_boxes=False, - variances=[0.1, 0.1, 0.2, 0.2], - coords='centroids', - normalize_coords=True, - subtract_mean=[123, 117, 104], - divide_by_stddev=None, - swap_channels=[2, 1, 0], - confidence_thresh=0.01, - iou_threshold=0.45, - top_k=200, - nms_max_output_size=400, - return_predictor_sizes=False): - ''' - Build a Keras model with SSD512 architecture, see references. - - The base network is a reduced atrous VGG-16, extended by the SSD architecture, - as described in the paper. - - Most of the arguments that this function takes are only needed for the anchor - box layers. In case you're training the network, the parameters passed here must - be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading - trained weights, the parameters passed here must be the same as the ones used - to produce the trained weights. - - Some of these arguments are explained in more detail in the documentation of the - `SSDBoxEncoder` class. - - Note: Requires Keras v2.0 or later. Currently works only with the - TensorFlow backend (v1.0 or later). - - Arguments: - image_size (tuple): The input image size in the format `(height, width, channels)`. - n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. - mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode, - the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes, - the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding, - non-maximum suppression, and top-k filtering. The difference between latter two modes is that - 'inference' follows the exact procedure of the original Caffe implementation, while - 'inference_fast' uses a faster prediction decoding procedure. - l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers. - Set to zero to deactivate L2-regularization. - min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction - of the shorter side of the input images. - max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction - of the shorter side of the input images. All scaling factors between the smallest and the - largest will be linearly interpolated. Note that the second to last of the linearly interpolated - scaling factors will actually be the scaling factor for the last predictor layer, while the last - scaling factor is used for the second box for aspect ratio 1 in the last predictor layer - if `two_boxes_for_ar1` is `True`. - scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer. - This list must be one element longer than the number of predictor layers. The first `k` elements are the - scaling factors for the `k` predictor layers, while the last element is used for the second box - for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional - last scaling factor must be passed either way, even if it is not being used. - If a list is passed, this argument overrides `min_scale` and `max_scale`. All scaling factors - must be greater than zero. - aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be - generated. This list is valid for all prediction layers. - aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer. - This allows you to set the aspect ratios for each predictor layer individually, which is the case for the - original SSD512 implementation. If a list is passed, it overrides `aspect_ratios_global`. - two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. - If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated - using the scaling factor for the respective layer, the second one will be generated using - geometric mean of said scaling factor and next bigger scaling factor. - steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be - either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many - pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over - the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. - If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. - If no steps are provided, then they will be computed such that the anchor box center points will form an - equidistant grid within the image dimensions. - offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be - either floats or tuples of two floats. These numbers represent for each predictor layer how many - pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be - as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions - of the step size specified in the `steps` argument. If the list contains floats, then that value will - be used for both spatial dimensions. If the list contains tuples of two floats, then they represent - `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size. - clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries. - variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by - its respective variance value. - coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format - of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, - and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. - normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates, - i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. - subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values - of any shape that is broadcast-compatible with the image shape. The elements of this array will be - subtracted from the image pixel intensity values. For example, pass a list of three integers - to perform per-channel mean normalization for color images. - divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or - floating point values of any shape that is broadcast-compatible with the image shape. The image pixel - intensity values will be divided by the elements of this array. For example, pass a list - of three integers to perform per-channel standard deviation normalization for color images. - swap_channels (list, optional): Either `False` or a list of integers representing the desired order in which the input - image channels should be swapped. - confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific - positive class in order to be considered for the non-maximum suppression stage for the respective class. - A lower value will result in a larger part of the selection process being done by the non-maximum suppression - stage, while a larger value will result in a larger part of the selection process happening in the confidence - thresholding stage. - iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold` - with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers - to the box's confidence score. - top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the - non-maximum suppression stage. - nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage. - return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also - a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since - you can always get their sizes easily via the Keras API, but it's convenient and less error-prone - to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the - spatial dimensions of the predictor layers), for inference you don't need them. - - Returns: - model: The Keras SSD512 model. - predictor_sizes (optional): A Numpy array containing the `(height, width)` portion - of the output tensor shape for each convolutional predictor layer. During - training, the generator function needs this in order to transform - the ground truth labels into tensors of identical structure as the - output tensors of the model, which is in turn needed for the cost - function. - - References: - https://arxiv.org/abs/1512.02325v5 - ''' - - # The number of predictor conv layers in the network is 7 for the original SSD512 - n_predictor_layers = 7 - n_classes += 1 # Account for the background class. - l2_reg = l2_regularization # Make the internal name shorter. - img_height, img_width, img_channels = image_size[0], image_size[1], image_size[2] - - ############################################################################ - # Get a few exceptions out of the way. - ############################################################################ - - if aspect_ratios_global is None and aspect_ratios_per_layer is None: - raise ValueError( - "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified.") - if aspect_ratios_per_layer: - if len(aspect_ratios_per_layer) != n_predictor_layers: - raise ValueError("It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}.".format( - n_predictor_layers, len(aspect_ratios_per_layer))) - - if (min_scale is None or max_scale is None) and scales is None: - raise ValueError( - "Either `min_scale` and `max_scale` or `scales` need to be specified.") - if scales: - if len(scales) != n_predictor_layers+1: - raise ValueError("It must be either scales is None or len(scales) == {}, but len(scales) == {}.".format( - n_predictor_layers+1, len(scales))) - else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` - scales = np.linspace(min_scale, max_scale, n_predictor_layers+1) - - if len(variances) != 4: - raise ValueError( - "4 variance values must be pased, but {} values were received.".format(len(variances))) - variances = np.array(variances) - if np.any(variances <= 0): - raise ValueError( - "All variances must be >0, but the variances given are {}".format(variances)) - - if (not (steps is None)) and (len(steps) != n_predictor_layers): - raise ValueError( - "You must provide at least one step value per predictor layer.") - - if (not (offsets is None)) and (len(offsets) != n_predictor_layers): - raise ValueError( - "You must provide at least one offset value per predictor layer.") - - ############################################################################ - # Compute the anchor box parameters. - ############################################################################ - - # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. - if aspect_ratios_per_layer: - aspect_ratios = aspect_ratios_per_layer - else: - aspect_ratios = [aspect_ratios_global] * n_predictor_layers - - # Compute the number of boxes to be predicted per cell for each predictor layer. - # We need this so that we know how many channels the predictor layers need to have. - if aspect_ratios_per_layer: - n_boxes = [] - for ar in aspect_ratios_per_layer: - if (1 in ar) & two_boxes_for_ar1: - # +1 for the second box for aspect ratio 1 - n_boxes.append(len(ar) + 1) - else: - n_boxes.append(len(ar)) - else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer - if (1 in aspect_ratios_global) & two_boxes_for_ar1: - n_boxes = len(aspect_ratios_global) + 1 - else: - n_boxes = len(aspect_ratios_global) - n_boxes = [n_boxes] * n_predictor_layers - - if steps is None: - steps = [None] * n_predictor_layers - if offsets is None: - offsets = [None] * n_predictor_layers - - ############################################################################ - # Define functions for the Lambda layers below. - ############################################################################ - - def identity_layer(tensor): - return tensor - - def input_mean_normalization(tensor): - return tensor - np.array(subtract_mean) - - def input_stddev_normalization(tensor): - return tensor / np.array(divide_by_stddev) - - def input_channel_swap(tensor): - if len(swap_channels) == 3: - return K.stack([tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]]], axis=-1) - elif len(swap_channels) == 4: - return K.stack([tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]], tensor[..., swap_channels[3]]], axis=-1) - - ############################################################################ - # Build the network. - ############################################################################ - - x = Input(shape=(img_height, img_width, img_channels)) - - # The following identity layer is only needed so that the subsequent lambda layers can be optional. - x1 = Lambda(identity_layer, output_shape=( - img_height, img_width, img_channels), name='identity_layer')(x) - if not (subtract_mean is None): - x1 = Lambda(input_mean_normalization, output_shape=( - img_height, img_width, img_channels), name='input_mean_normalization')(x1) - if not (divide_by_stddev is None): - x1 = Lambda(input_stddev_normalization, output_shape=( - img_height, img_width, img_channels), name='input_stddev_normalization')(x1) - if swap_channels: - x1 = Lambda(input_channel_swap, output_shape=( - img_height, img_width, img_channels), name='input_channel_swap')(x1) - - conv1_1 = Conv2D(64, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_1')(x1) - conv1_2 = Conv2D(64, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1_2')(conv1_1) - pool1 = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 2), padding='same', name='pool1')(conv1_2) - - conv2_1 = Conv2D(128, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_1')(pool1) - conv2_2 = Conv2D(128, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2_2')(conv2_1) - pool2 = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 2), padding='same', name='pool2')(conv2_2) - - conv3_1 = Conv2D(256, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_1')(pool2) - conv3_2 = Conv2D(256, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_2')(conv3_1) - conv3_3 = Conv2D(256, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3_3')(conv3_2) - pool3 = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 2), padding='same', name='pool3')(conv3_3) - - conv4_1 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_1')(pool3) - conv4_2 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_2')(conv4_1) - conv4_3 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4_3')(conv4_2) - pool4 = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 2), padding='same', name='pool4')(conv4_3) - - conv5_1 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_1')(pool4) - conv5_2 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_2')(conv5_1) - conv5_3 = Conv2D(512, (3, 3), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5_3')(conv5_2) - pool5 = MaxPooling2D(pool_size=(3, 3), strides=( - 1, 1), padding='same', name='pool5')(conv5_3) - - fc6 = Conv2D(1024, (3, 3), dilation_rate=(6, 6), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc6')(pool5) - - fc7 = Conv2D(1024, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7')(fc6) - - conv6_1 = Conv2D(256, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_1')(fc7) - conv6_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), - name='conv6_padding')(conv6_1) - conv6_2 = Conv2D(512, (3, 3), strides=(2, 2), activation='relu', padding='valid', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2')(conv6_1) - - conv7_1 = Conv2D(128, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_1')(conv6_2) - conv7_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), - name='conv7_padding')(conv7_1) - conv7_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2')(conv7_1) - - conv8_1 = Conv2D(128, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_1')(conv7_2) - conv8_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), - name='conv8_padding')(conv8_1) - conv8_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2')(conv8_1) - - conv9_1 = Conv2D(128, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_1')(conv8_2) - conv9_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), - name='conv9_padding')(conv9_1) - conv9_2 = Conv2D(256, (3, 3), strides=(2, 2), activation='relu', padding='valid', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2')(conv9_1) - - conv10_1 = Conv2D(128, (1, 1), activation='relu', padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv10_1')(conv9_2) - conv10_1 = ZeroPadding2D(padding=((1, 1), (1, 1)), - name='conv10_padding')(conv10_1) - conv10_2 = Conv2D(256, (4, 4), strides=(1, 1), activation='relu', padding='valid', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv10_2')(conv10_1) - - # Feed conv4_3 into the L2 normalization layer - conv4_3_norm = L2Normalization(gamma_init=20, name='conv4_3_norm')(conv4_3) - - # Build the convolutional predictor layers on top of the base network - - # We precidt `n_classes` confidence values for each box, hence the confidence predictors have depth `n_boxes * n_classes` - # Output shape of the confidence layers: `(batch, height, width, n_boxes * n_classes)` - conv4_3_norm_mbox_conf = Conv2D(n_boxes[0] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv4_3_norm_mbox_conf')(conv4_3_norm) - fc7_mbox_conf = Conv2D(n_boxes[1] * n_classes, (3, 3), padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='fc7_mbox_conf')(fc7) - conv6_2_mbox_conf = Conv2D(n_boxes[2] * n_classes, (3, 3), padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6_2_mbox_conf')(conv6_2) - conv7_2_mbox_conf = Conv2D(n_boxes[3] * n_classes, (3, 3), padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7_2_mbox_conf')(conv7_2) - conv8_2_mbox_conf = Conv2D(n_boxes[4] * n_classes, (3, 3), padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv8_2_mbox_conf')(conv8_2) - conv9_2_mbox_conf = Conv2D(n_boxes[5] * n_classes, (3, 3), padding='same', - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv9_2_mbox_conf')(conv9_2) - conv10_2_mbox_conf = Conv2D(n_boxes[6] * n_classes, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv10_2_mbox_conf')(conv10_2) - # We predict 4 box coordinates for each box, hence the localization predictors have depth `n_boxes * 4` - # Output shape of the localization layers: `(batch, height, width, n_boxes * 4)` - conv4_3_norm_mbox_loc = Conv2D(n_boxes[0] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv4_3_norm_mbox_loc')(conv4_3_norm) - fc7_mbox_loc = Conv2D(n_boxes[1] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='fc7_mbox_loc')(fc7) - conv6_2_mbox_loc = Conv2D(n_boxes[2] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv6_2_mbox_loc')(conv6_2) - conv7_2_mbox_loc = Conv2D(n_boxes[3] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv7_2_mbox_loc')(conv7_2) - conv8_2_mbox_loc = Conv2D(n_boxes[4] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv8_2_mbox_loc')(conv8_2) - conv9_2_mbox_loc = Conv2D(n_boxes[5] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv9_2_mbox_loc')(conv9_2) - conv10_2_mbox_loc = Conv2D(n_boxes[6] * 4, (3, 3), padding='same', kernel_initializer='he_normal', - kernel_regularizer=l2(l2_reg), name='conv10_2_mbox_loc')(conv10_2) - - # Generate the anchor boxes (called "priors" in the original Caffe/C++ implementation, so I'll keep their layer names) - - # Output shape of anchors: `(batch, height, width, n_boxes, 8)` - conv4_3_norm_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 0], this_offsets=offsets[0], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv4_3_norm_mbox_priorbox')(conv4_3_norm_mbox_loc) - fc7_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 1], this_offsets=offsets[1], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='fc7_mbox_priorbox')(fc7_mbox_loc) - conv6_2_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 2], this_offsets=offsets[2], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv6_2_mbox_priorbox')(conv6_2_mbox_loc) - conv7_2_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 3], this_offsets=offsets[3], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv7_2_mbox_priorbox')(conv7_2_mbox_loc) - conv8_2_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[4], next_scale=scales[5], aspect_ratios=aspect_ratios[4], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 4], this_offsets=offsets[4], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv8_2_mbox_priorbox')(conv8_2_mbox_loc) - conv9_2_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[5], next_scale=scales[6], aspect_ratios=aspect_ratios[5], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 5], this_offsets=offsets[5], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv9_2_mbox_priorbox')(conv9_2_mbox_loc) - conv10_2_mbox_priorbox = AnchorBoxes(img_height, img_width, this_scale=scales[6], next_scale=scales[7], aspect_ratios=aspect_ratios[6], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 6], this_offsets=offsets[6], clip_boxes=clip_boxes, - variances=variances, coords=coords, normalize_coords=normalize_coords, name='conv10_2_mbox_priorbox')(conv10_2_mbox_loc) - - # Reshape - - # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` - # We want the classes isolated in the last axis to perform softmax on them - conv4_3_norm_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv4_3_norm_mbox_conf_reshape')(conv4_3_norm_mbox_conf) - fc7_mbox_conf_reshape = Reshape( - (-1, n_classes), name='fc7_mbox_conf_reshape')(fc7_mbox_conf) - conv6_2_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv6_2_mbox_conf_reshape')(conv6_2_mbox_conf) - conv7_2_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv7_2_mbox_conf_reshape')(conv7_2_mbox_conf) - conv8_2_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv8_2_mbox_conf_reshape')(conv8_2_mbox_conf) - conv9_2_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv9_2_mbox_conf_reshape')(conv9_2_mbox_conf) - conv10_2_mbox_conf_reshape = Reshape( - (-1, n_classes), name='conv10_2_mbox_conf_reshape')(conv10_2_mbox_conf) - # Reshape the box predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` - # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss - conv4_3_norm_mbox_loc_reshape = Reshape( - (-1, 4), name='conv4_3_norm_mbox_loc_reshape')(conv4_3_norm_mbox_loc) - fc7_mbox_loc_reshape = Reshape( - (-1, 4), name='fc7_mbox_loc_reshape')(fc7_mbox_loc) - conv6_2_mbox_loc_reshape = Reshape( - (-1, 4), name='conv6_2_mbox_loc_reshape')(conv6_2_mbox_loc) - conv7_2_mbox_loc_reshape = Reshape( - (-1, 4), name='conv7_2_mbox_loc_reshape')(conv7_2_mbox_loc) - conv8_2_mbox_loc_reshape = Reshape( - (-1, 4), name='conv8_2_mbox_loc_reshape')(conv8_2_mbox_loc) - conv9_2_mbox_loc_reshape = Reshape( - (-1, 4), name='conv9_2_mbox_loc_reshape')(conv9_2_mbox_loc) - conv10_2_mbox_loc_reshape = Reshape( - (-1, 4), name='conv10_2_mbox_loc_reshape')(conv10_2_mbox_loc) - # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` - conv4_3_norm_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv4_3_norm_mbox_priorbox_reshape')(conv4_3_norm_mbox_priorbox) - fc7_mbox_priorbox_reshape = Reshape( - (-1, 8), name='fc7_mbox_priorbox_reshape')(fc7_mbox_priorbox) - conv6_2_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv6_2_mbox_priorbox_reshape')(conv6_2_mbox_priorbox) - conv7_2_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv7_2_mbox_priorbox_reshape')(conv7_2_mbox_priorbox) - conv8_2_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv8_2_mbox_priorbox_reshape')(conv8_2_mbox_priorbox) - conv9_2_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv9_2_mbox_priorbox_reshape')(conv9_2_mbox_priorbox) - conv10_2_mbox_priorbox_reshape = Reshape( - (-1, 8), name='conv10_2_mbox_priorbox_reshape')(conv10_2_mbox_priorbox) - - # Concatenate the predictions from the different layers - - # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, - # so we want to concatenate along axis 1, the number of boxes per layer - # Output shape of `mbox_conf`: (batch, n_boxes_total, n_classes) - mbox_conf = Concatenate(axis=1, name='mbox_conf')([conv4_3_norm_mbox_conf_reshape, - fc7_mbox_conf_reshape, - conv6_2_mbox_conf_reshape, - conv7_2_mbox_conf_reshape, - conv8_2_mbox_conf_reshape, - conv9_2_mbox_conf_reshape, - conv10_2_mbox_conf_reshape]) - - # Output shape of `mbox_loc`: (batch, n_boxes_total, 4) - mbox_loc = Concatenate(axis=1, name='mbox_loc')([conv4_3_norm_mbox_loc_reshape, - fc7_mbox_loc_reshape, - conv6_2_mbox_loc_reshape, - conv7_2_mbox_loc_reshape, - conv8_2_mbox_loc_reshape, - conv9_2_mbox_loc_reshape, - conv10_2_mbox_loc_reshape]) - - # Output shape of `mbox_priorbox`: (batch, n_boxes_total, 8) - mbox_priorbox = Concatenate(axis=1, name='mbox_priorbox')([conv4_3_norm_mbox_priorbox_reshape, - fc7_mbox_priorbox_reshape, - conv6_2_mbox_priorbox_reshape, - conv7_2_mbox_priorbox_reshape, - conv8_2_mbox_priorbox_reshape, - conv9_2_mbox_priorbox_reshape, - conv10_2_mbox_priorbox_reshape]) - - # The box coordinate predictions will go into the loss function just the way they are, - # but for the class predictions, we'll apply a softmax activation layer first - mbox_conf_softmax = Activation( - 'softmax', name='mbox_conf_softmax')(mbox_conf) - - # Concatenate the class and box predictions and the anchors to one large predictions vector - # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) - predictions = Concatenate(axis=2, name='predictions')( - [mbox_conf_softmax, mbox_loc, mbox_priorbox]) - - if mode == 'training': - model = Model(inputs=x, outputs=predictions) - elif mode == 'inference': - decoded_predictions = DecodeDetections(confidence_thresh=confidence_thresh, - iou_threshold=iou_threshold, - top_k=top_k, - nms_max_output_size=nms_max_output_size, - coords=coords, - normalize_coords=normalize_coords, - img_height=img_height, - img_width=img_width, - name='decoded_predictions')(predictions) - model = Model(inputs=x, outputs=decoded_predictions) - elif mode == 'inference_fast': - decoded_predictions = DecodeDetectionsFast(confidence_thresh=confidence_thresh, - iou_threshold=iou_threshold, - top_k=top_k, - nms_max_output_size=nms_max_output_size, - coords=coords, - normalize_coords=normalize_coords, - img_height=img_height, - img_width=img_width, - name='decoded_predictions')(predictions) - model = Model(inputs=x, outputs=decoded_predictions) - else: - raise ValueError( - "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'.".format(mode)) - - if return_predictor_sizes: - predictor_sizes = np.array([conv4_3_norm_mbox_conf._keras_shape[1:3], - fc7_mbox_conf._keras_shape[1:3], - conv6_2_mbox_conf._keras_shape[1:3], - conv7_2_mbox_conf._keras_shape[1:3], - conv8_2_mbox_conf._keras_shape[1:3], - conv9_2_mbox_conf._keras_shape[1:3], - conv10_2_mbox_conf._keras_shape[1:3]]) - return model, predictor_sizes - else: - return model -''' -A small 7-layer Keras model with SSD architecture. Also serves as a template to build arbitrary network architectures. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np -from keras.models import Model -from keras.layers import Input, Lambda, Conv2D, MaxPooling2D, BatchNormalization, ELU, Reshape, Concatenate, Activation -from keras.regularizers import l2 -import keras.backend as K - -from keras_layers.keras_layer_AnchorBoxes import AnchorBoxes -from keras_layers.keras_layer_DecodeDetections import DecodeDetections -from keras_layers.keras_layer_DecodeDetectionsFast import DecodeDetectionsFast - - -def build_model(image_size, - n_classes, - mode='training', - l2_regularization=0.0, - min_scale=0.1, - max_scale=0.9, - scales=None, - aspect_ratios_global=[0.5, 1.0, 2.0], - aspect_ratios_per_layer=None, - two_boxes_for_ar1=True, - steps=None, - offsets=None, - clip_boxes=False, - variances=[1.0, 1.0, 1.0, 1.0], - coords='centroids', - normalize_coords=False, - subtract_mean=None, - divide_by_stddev=None, - swap_channels=False, - confidence_thresh=0.01, - iou_threshold=0.45, - top_k=200, - nms_max_output_size=400, - return_predictor_sizes=False): - ''' - Build a Keras model with SSD architecture, see references. - - The model consists of convolutional feature layers and a number of convolutional - predictor layers that take their input from different feature layers. - The model is fully convolutional. - - The implementation found here is a smaller version of the original architecture - used in the paper (where the base network consists of a modified VGG-16 extended - by a few convolutional feature layers), but of course it could easily be changed to - an arbitrarily large SSD architecture by following the general design pattern used here. - This implementation has 7 convolutional layers and 4 convolutional predictor - layers that take their input from layers 4, 5, 6, and 7, respectively. - - Most of the arguments that this function takes are only needed for the anchor - box layers. In case you're training the network, the parameters passed here must - be the same as the ones used to set up `SSDBoxEncoder`. In case you're loading - trained weights, the parameters passed here must be the same as the ones used - to produce the trained weights. - - Some of these arguments are explained in more detail in the documentation of the - `SSDBoxEncoder` class. - - Note: Requires Keras v2.0 or later. Training currently works only with the - TensorFlow backend (v1.0 or later). - - Arguments: - image_size (tuple): The input image size in the format `(height, width, channels)`. - n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. - mode (str, optional): One of 'training', 'inference' and 'inference_fast'. In 'training' mode, - the model outputs the raw prediction tensor, while in 'inference' and 'inference_fast' modes, - the raw predictions are decoded into absolute coordinates and filtered via confidence thresholding, - non-maximum suppression, and top-k filtering. The difference between latter two modes is that - 'inference' follows the exact procedure of the original Caffe implementation, while - 'inference_fast' uses a faster prediction decoding procedure. - l2_regularization (float, optional): The L2-regularization rate. Applies to all convolutional layers. - min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction - of the shorter side of the input images. - max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction - of the shorter side of the input images. All scaling factors between the smallest and the - largest will be linearly interpolated. Note that the second to last of the linearly interpolated - scaling factors will actually be the scaling factor for the last predictor layer, while the last - scaling factor is used for the second box for aspect ratio 1 in the last predictor layer - if `two_boxes_for_ar1` is `True`. - scales (list, optional): A list of floats containing scaling factors per convolutional predictor layer. - This list must be one element longer than the number of predictor layers. The first `k` elements are the - scaling factors for the `k` predictor layers, while the last element is used for the second box - for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional - last scaling factor must be passed either way, even if it is not being used. If a list is passed, - this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. - aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be - generated. This list is valid for all predictor layers. The original implementation uses more aspect ratios - for some predictor layers and fewer for others. If you want to do that, too, then use the next argument instead. - aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each predictor layer. - This allows you to set the aspect ratios for each predictor layer individually. If a list is passed, - it overrides `aspect_ratios_global`. - two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratio lists that contain 1. Will be ignored otherwise. - If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated - using the scaling factor for the respective layer, the second one will be generated using - geometric mean of said scaling factor and next bigger scaling factor. - steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be - either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many - pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over - the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. - If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. - If no steps are provided, then they will be computed such that the anchor box center points will form an - equidistant grid within the image dimensions. - offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be - either floats or tuples of two floats. These numbers represent for each predictor layer how many - pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be - as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions - of the step size specified in the `steps` argument. If the list contains floats, then that value will - be used for both spatial dimensions. If the list contains tuples of two floats, then they represent - `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size, - which is also the recommended setting. - clip_boxes (bool, optional): If `True`, clips the anchor box coordinates to stay within image boundaries. - variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by - its respective variance value. - coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format - of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, - and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. - normalize_coords (bool, optional): Set to `True` if the model is supposed to use relative instead of absolute coordinates, - i.e. if the model predicts box coordinates within [0,1] instead of absolute coordinates. - subtract_mean (array-like, optional): `None` or an array-like object of integers or floating point values - of any shape that is broadcast-compatible with the image shape. The elements of this array will be - subtracted from the image pixel intensity values. For example, pass a list of three integers - to perform per-channel mean normalization for color images. - divide_by_stddev (array-like, optional): `None` or an array-like object of non-zero integers or - floating point values of any shape that is broadcast-compatible with the image shape. The image pixel - intensity values will be divided by the elements of this array. For example, pass a list - of three integers to perform per-channel standard deviation normalization for color images. - swap_channels (list, optional): Either `False` or a list of integers representing the desired order in which the input - image channels should be swapped. - confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific - positive class in order to be considered for the non-maximum suppression stage for the respective class. - A lower value will result in a larger part of the selection process being done by the non-maximum suppression - stage, while a larger value will result in a larger part of the selection process happening in the confidence - thresholding stage. - iou_threshold (float, optional): A float in [0,1]. All boxes that have a Jaccard similarity of greater than `iou_threshold` - with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers - to the box's confidence score. - top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the - non-maximum suppression stage. - nms_max_output_size (int, optional): The maximal number of predictions that will be left over after the NMS stage. - return_predictor_sizes (bool, optional): If `True`, this function not only returns the model, but also - a list containing the spatial dimensions of the predictor layers. This isn't strictly necessary since - you can always get their sizes easily via the Keras API, but it's convenient and less error-prone - to get them this way. They are only relevant for training anyway (SSDBoxEncoder needs to know the - spatial dimensions of the predictor layers), for inference you don't need them. - - Returns: - model: The Keras SSD model. - predictor_sizes (optional): A Numpy array containing the `(height, width)` portion - of the output tensor shape for each convolutional predictor layer. During - training, the generator function needs this in order to transform - the ground truth labels into tensors of identical structure as the - output tensors of the model, which is in turn needed for the cost - function. - - References: - https://arxiv.org/abs/1512.02325v5 - ''' - - n_predictor_layers = 4 # The number of predictor conv layers in the network - n_classes += 1 # Account for the background class. - l2_reg = l2_regularization # Make the internal name shorter. - img_height, img_width, img_channels = image_size[0], image_size[1], image_size[2] - - ############################################################################ - # Get a few exceptions out of the way. - ############################################################################ - - if aspect_ratios_global is None and aspect_ratios_per_layer is None: - raise ValueError( - "`aspect_ratios_global` and `aspect_ratios_per_layer` cannot both be None. At least one needs to be specified.") - if aspect_ratios_per_layer: - if len(aspect_ratios_per_layer) != n_predictor_layers: - raise ValueError("It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == {}, but len(aspect_ratios_per_layer) == {}.".format( - n_predictor_layers, len(aspect_ratios_per_layer))) - - if (min_scale is None or max_scale is None) and scales is None: - raise ValueError( - "Either `min_scale` and `max_scale` or `scales` need to be specified.") - if scales: - if len(scales) != n_predictor_layers+1: - raise ValueError("It must be either scales is None or len(scales) == {}, but len(scales) == {}.".format( - n_predictor_layers+1, len(scales))) - else: # If no explicit list of scaling factors was passed, compute the list of scaling factors from `min_scale` and `max_scale` - scales = np.linspace(min_scale, max_scale, n_predictor_layers+1) - - if len(variances) != 4: # We need one variance value for each of the four box coordinates - raise ValueError( - "4 variance values must be pased, but {} values were received.".format(len(variances))) - variances = np.array(variances) - if np.any(variances <= 0): - raise ValueError( - "All variances must be >0, but the variances given are {}".format(variances)) - - if (not (steps is None)) and (len(steps) != n_predictor_layers): - raise ValueError( - "You must provide at least one step value per predictor layer.") - - if (not (offsets is None)) and (len(offsets) != n_predictor_layers): - raise ValueError( - "You must provide at least one offset value per predictor layer.") - - ############################################################################ - # Compute the anchor box parameters. - ############################################################################ - - # Set the aspect ratios for each predictor layer. These are only needed for the anchor box layers. - if aspect_ratios_per_layer: - aspect_ratios = aspect_ratios_per_layer - else: - aspect_ratios = [aspect_ratios_global] * n_predictor_layers - - # Compute the number of boxes to be predicted per cell for each predictor layer. - # We need this so that we know how many channels the predictor layers need to have. - if aspect_ratios_per_layer: - n_boxes = [] - for ar in aspect_ratios_per_layer: - if (1 in ar) & two_boxes_for_ar1: - # +1 for the second box for aspect ratio 1 - n_boxes.append(len(ar) + 1) - else: - n_boxes.append(len(ar)) - else: # If only a global aspect ratio list was passed, then the number of boxes is the same for each predictor layer - if (1 in aspect_ratios_global) & two_boxes_for_ar1: - n_boxes = len(aspect_ratios_global) + 1 - else: - n_boxes = len(aspect_ratios_global) - n_boxes = [n_boxes] * n_predictor_layers - - if steps is None: - steps = [None] * n_predictor_layers - if offsets is None: - offsets = [None] * n_predictor_layers - - ############################################################################ - # Define functions for the Lambda layers below. - ############################################################################ - - def identity_layer(tensor): - return tensor - - def input_mean_normalization(tensor): - return tensor - np.array(subtract_mean) - - def input_stddev_normalization(tensor): - return tensor / np.array(divide_by_stddev) - - def input_channel_swap(tensor): - if len(swap_channels) == 3: - return K.stack([tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]]], axis=-1) - elif len(swap_channels) == 4: - return K.stack([tensor[..., swap_channels[0]], tensor[..., swap_channels[1]], tensor[..., swap_channels[2]], tensor[..., swap_channels[3]]], axis=-1) - - ############################################################################ - # Build the network. - ############################################################################ - - x = Input(shape=(img_height, img_width, img_channels)) - - # The following identity layer is only needed so that the subsequent lambda layers can be optional. - x1 = Lambda(identity_layer, output_shape=( - img_height, img_width, img_channels), name='identity_layer')(x) - if not (subtract_mean is None): - x1 = Lambda(input_mean_normalization, output_shape=( - img_height, img_width, img_channels), name='input_mean_normalization')(x1) - if not (divide_by_stddev is None): - x1 = Lambda(input_stddev_normalization, output_shape=( - img_height, img_width, img_channels), name='input_stddev_normalization')(x1) - if swap_channels: - x1 = Lambda(input_channel_swap, output_shape=( - img_height, img_width, img_channels), name='input_channel_swap')(x1) - - conv1 = Conv2D(32, (5, 5), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv1')(x1) - # Tensorflow uses filter format [filter_height, filter_width, in_channels, out_channels], hence axis = 3 - conv1 = BatchNormalization(axis=3, momentum=0.99, name='bn1')(conv1) - conv1 = ELU(name='elu1')(conv1) - pool1 = MaxPooling2D(pool_size=(2, 2), name='pool1')(conv1) - - conv2 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv2')(pool1) - conv2 = BatchNormalization(axis=3, momentum=0.99, name='bn2')(conv2) - conv2 = ELU(name='elu2')(conv2) - pool2 = MaxPooling2D(pool_size=(2, 2), name='pool2')(conv2) - - conv3 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv3')(pool2) - conv3 = BatchNormalization(axis=3, momentum=0.99, name='bn3')(conv3) - conv3 = ELU(name='elu3')(conv3) - pool3 = MaxPooling2D(pool_size=(2, 2), name='pool3')(conv3) - - conv4 = Conv2D(64, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv4')(pool3) - conv4 = BatchNormalization(axis=3, momentum=0.99, name='bn4')(conv4) - conv4 = ELU(name='elu4')(conv4) - pool4 = MaxPooling2D(pool_size=(2, 2), name='pool4')(conv4) - - conv5 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv5')(pool4) - conv5 = BatchNormalization(axis=3, momentum=0.99, name='bn5')(conv5) - conv5 = ELU(name='elu5')(conv5) - pool5 = MaxPooling2D(pool_size=(2, 2), name='pool5')(conv5) - - conv6 = Conv2D(48, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv6')(pool5) - conv6 = BatchNormalization(axis=3, momentum=0.99, name='bn6')(conv6) - conv6 = ELU(name='elu6')(conv6) - pool6 = MaxPooling2D(pool_size=(2, 2), name='pool6')(conv6) - - conv7 = Conv2D(32, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='conv7')(pool6) - conv7 = BatchNormalization(axis=3, momentum=0.99, name='bn7')(conv7) - conv7 = ELU(name='elu7')(conv7) - - # The next part is to add the convolutional predictor layers on top of the base network - # that we defined above. Note that I use the term "base network" differently than the paper does. - # To me, the base network is everything that is not convolutional predictor layers or anchor - # box layers. In this case we'll have four predictor layers, but of course you could - # easily rewrite this into an arbitrarily deep base network and add an arbitrary number of - # predictor layers on top of the base network by simply following the pattern shown here. - - # Build the convolutional predictor layers on top of conv layers 4, 5, 6, and 7. - # We build two predictor layers on top of each of these layers: One for class prediction (classification), one for box coordinate prediction (localization) - # We precidt `n_classes` confidence values for each box, hence the `classes` predictors have depth `n_boxes * n_classes` - # We predict 4 box coordinates for each box, hence the `boxes` predictors have depth `n_boxes * 4` - # Output shape of `classes`: `(batch, height, width, n_boxes * n_classes)` - classes4 = Conv2D(n_boxes[0] * n_classes, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes4')(conv4) - classes5 = Conv2D(n_boxes[1] * n_classes, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes5')(conv5) - classes6 = Conv2D(n_boxes[2] * n_classes, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes6')(conv6) - classes7 = Conv2D(n_boxes[3] * n_classes, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='classes7')(conv7) - # Output shape of `boxes`: `(batch, height, width, n_boxes * 4)` - boxes4 = Conv2D(n_boxes[0] * 4, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes4')(conv4) - boxes5 = Conv2D(n_boxes[1] * 4, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes5')(conv5) - boxes6 = Conv2D(n_boxes[2] * 4, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes6')(conv6) - boxes7 = Conv2D(n_boxes[3] * 4, (3, 3), strides=(1, 1), padding="same", - kernel_initializer='he_normal', kernel_regularizer=l2(l2_reg), name='boxes7')(conv7) - - # Generate the anchor boxes - # Output shape of `anchors`: `(batch, height, width, n_boxes, 8)` - anchors4 = AnchorBoxes(img_height, img_width, this_scale=scales[0], next_scale=scales[1], aspect_ratios=aspect_ratios[0], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 0], this_offsets=offsets[0], - clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors4')(boxes4) - anchors5 = AnchorBoxes(img_height, img_width, this_scale=scales[1], next_scale=scales[2], aspect_ratios=aspect_ratios[1], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 1], this_offsets=offsets[1], - clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors5')(boxes5) - anchors6 = AnchorBoxes(img_height, img_width, this_scale=scales[2], next_scale=scales[3], aspect_ratios=aspect_ratios[2], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 2], this_offsets=offsets[2], - clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors6')(boxes6) - anchors7 = AnchorBoxes(img_height, img_width, this_scale=scales[3], next_scale=scales[4], aspect_ratios=aspect_ratios[3], - two_boxes_for_ar1=two_boxes_for_ar1, this_steps=steps[ - 3], this_offsets=offsets[3], - clip_boxes=clip_boxes, variances=variances, coords=coords, normalize_coords=normalize_coords, name='anchors7')(boxes7) - - # Reshape the class predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, n_classes)` - # We want the classes isolated in the last axis to perform softmax on them - classes4_reshaped = Reshape( - (-1, n_classes), name='classes4_reshape')(classes4) - classes5_reshaped = Reshape( - (-1, n_classes), name='classes5_reshape')(classes5) - classes6_reshaped = Reshape( - (-1, n_classes), name='classes6_reshape')(classes6) - classes7_reshaped = Reshape( - (-1, n_classes), name='classes7_reshape')(classes7) - # Reshape the box coordinate predictions, yielding 3D tensors of shape `(batch, height * width * n_boxes, 4)` - # We want the four box coordinates isolated in the last axis to compute the smooth L1 loss - boxes4_reshaped = Reshape((-1, 4), name='boxes4_reshape')(boxes4) - boxes5_reshaped = Reshape((-1, 4), name='boxes5_reshape')(boxes5) - boxes6_reshaped = Reshape((-1, 4), name='boxes6_reshape')(boxes6) - boxes7_reshaped = Reshape((-1, 4), name='boxes7_reshape')(boxes7) - # Reshape the anchor box tensors, yielding 3D tensors of shape `(batch, height * width * n_boxes, 8)` - anchors4_reshaped = Reshape((-1, 8), name='anchors4_reshape')(anchors4) - anchors5_reshaped = Reshape((-1, 8), name='anchors5_reshape')(anchors5) - anchors6_reshaped = Reshape((-1, 8), name='anchors6_reshape')(anchors6) - anchors7_reshaped = Reshape((-1, 8), name='anchors7_reshape')(anchors7) - - # Concatenate the predictions from the different layers and the assosciated anchor box tensors - # Axis 0 (batch) and axis 2 (n_classes or 4, respectively) are identical for all layer predictions, - # so we want to concatenate along axis 1 - # Output shape of `classes_concat`: (batch, n_boxes_total, n_classes) - classes_concat = Concatenate(axis=1, name='classes_concat')([classes4_reshaped, - classes5_reshaped, - classes6_reshaped, - classes7_reshaped]) - - # Output shape of `boxes_concat`: (batch, n_boxes_total, 4) - boxes_concat = Concatenate(axis=1, name='boxes_concat')([boxes4_reshaped, - boxes5_reshaped, - boxes6_reshaped, - boxes7_reshaped]) - - # Output shape of `anchors_concat`: (batch, n_boxes_total, 8) - anchors_concat = Concatenate(axis=1, name='anchors_concat')([anchors4_reshaped, - anchors5_reshaped, - anchors6_reshaped, - anchors7_reshaped]) - - # The box coordinate predictions will go into the loss function just the way they are, - # but for the class predictions, we'll apply a softmax activation layer first - classes_softmax = Activation( - 'softmax', name='classes_softmax')(classes_concat) - - # Concatenate the class and box coordinate predictions and the anchors to one large predictions tensor - # Output shape of `predictions`: (batch, n_boxes_total, n_classes + 4 + 8) - predictions = Concatenate(axis=2, name='predictions')( - [classes_softmax, boxes_concat, anchors_concat]) - - if mode == 'training': - model = Model(inputs=x, outputs=predictions) - elif mode == 'inference': - decoded_predictions = DecodeDetections(confidence_thresh=confidence_thresh, - iou_threshold=iou_threshold, - top_k=top_k, - nms_max_output_size=nms_max_output_size, - coords=coords, - normalize_coords=normalize_coords, - img_height=img_height, - img_width=img_width, - name='decoded_predictions')(predictions) - model = Model(inputs=x, outputs=decoded_predictions) - elif mode == 'inference_fast': - decoded_predictions = DecodeDetectionsFast(confidence_thresh=confidence_thresh, - iou_threshold=iou_threshold, - top_k=top_k, - nms_max_output_size=nms_max_output_size, - coords=coords, - normalize_coords=normalize_coords, - img_height=img_height, - img_width=img_width, - name='decoded_predictions')(predictions) - model = Model(inputs=x, outputs=decoded_predictions) - else: - raise ValueError( - "`mode` must be one of 'training', 'inference' or 'inference_fast', but received '{}'.".format(mode)) - - if return_predictor_sizes: - # The spatial dimensions are the same for the `classes` and `boxes` predictor layers. - predictor_sizes = np.array([classes4._keras_shape[1:3], - classes5._keras_shape[1:3], - classes6._keras_shape[1:3], - classes7._keras_shape[1:3]]) - return model, predictor_sizes - else: - return model -''' -Utilities to match ground truth boxes to anchor boxes. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np - - -def match_bipartite_greedy(weight_matrix): - ''' - Returns a bipartite matching according to the given weight matrix. - - The algorithm works as follows: - - Let the first axis of `weight_matrix` represent ground truth boxes - and the second axis anchor boxes. - The ground truth box that has the greatest similarity with any - anchor box will be matched first, then out of the remaining ground - truth boxes, the ground truth box that has the greatest similarity - with any of the remaining anchor boxes will be matched second, and - so on. That is, the ground truth boxes will be matched in descending - order by maximum similarity with any of the respectively remaining - anchor boxes. - The runtime complexity is O(m^2 * n), where `m` is the number of - ground truth boxes and `n` is the number of anchor boxes. - - Arguments: - weight_matrix (array): A 2D Numpy array that represents the weight matrix - for the matching process. If `(m,n)` is the shape of the weight matrix, - it must be `m <= n`. The weights can be integers or floating point - numbers. The matching process will maximize, i.e. larger weights are - preferred over smaller weights. - - Returns: - A 1D Numpy array of length `weight_matrix.shape[0]` that represents - the matched index along the second axis of `weight_matrix` for each index - along the first axis. - ''' - - weight_matrix = np.copy(weight_matrix) # We'll modify this array. - num_ground_truth_boxes = weight_matrix.shape[0] - # Only relevant for fancy-indexing below. - all_gt_indices = list(range(num_ground_truth_boxes)) - - # This 1D array will contain for each ground truth box the index of - # the matched anchor box. - matches = np.zeros(num_ground_truth_boxes, dtype=np.int) - - # In each iteration of the loop below, exactly one ground truth box - # will be matched to one anchor box. - for _ in range(num_ground_truth_boxes): - - # Find the maximal anchor-ground truth pair in two steps: First, reduce - # over the anchor boxes and then reduce over the ground truth boxes. - # Reduce along the anchor box axis. - anchor_indices = np.argmax(weight_matrix, axis=1) - overlaps = weight_matrix[all_gt_indices, anchor_indices] - # Reduce along the ground truth box axis. - ground_truth_index = np.argmax(overlaps) - anchor_index = anchor_indices[ground_truth_index] - matches[ground_truth_index] = anchor_index # Set the match. - - # Set the row of the matched ground truth box and the column of the matched - # anchor box to all zeros. This ensures that those boxes will not be matched again, - # because they will never be the best matches for any other boxes. - weight_matrix[ground_truth_index] = 0 - weight_matrix[:, anchor_index] = 0 - - return matches - - -def match_multi(weight_matrix, threshold): - ''' - Matches all elements along the second axis of `weight_matrix` to their best - matches along the first axis subject to the constraint that the weight of a match - must be greater than or equal to `threshold` in order to produce a match. - - If the weight matrix contains elements that should be ignored, the row or column - representing the respective elemet should be set to a value below `threshold`. - - Arguments: - weight_matrix (array): A 2D Numpy array that represents the weight matrix - for the matching process. If `(m,n)` is the shape of the weight matrix, - it must be `m <= n`. The weights can be integers or floating point - numbers. The matching process will maximize, i.e. larger weights are - preferred over smaller weights. - threshold (float): A float that represents the threshold (i.e. lower bound) - that must be met by a pair of elements to produce a match. - - Returns: - Two 1D Numpy arrays of equal length that represent the matched indices. The first - array contains the indices along the first axis of `weight_matrix`, the second array - contains the indices along the second axis. - ''' - - num_anchor_boxes = weight_matrix.shape[1] - # Only relevant for fancy-indexing below. - all_anchor_indices = list(range(num_anchor_boxes)) - - # Find the best ground truth match for every anchor box. - # Array of shape (weight_matrix.shape[1],) - ground_truth_indices = np.argmax(weight_matrix, axis=0) - # Array of shape (weight_matrix.shape[1],) - overlaps = weight_matrix[ground_truth_indices, all_anchor_indices] - - # Filter out the matches with a weight below the threshold. - anchor_indices_thresh_met = np.nonzero(overlaps >= threshold)[0] - gt_indices_thresh_met = ground_truth_indices[anchor_indices_thresh_met] - - return gt_indices_thresh_met, anchor_indices_thresh_met -''' -An encoder that converts ground truth annotations to SSD-compatible training targets. - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np - -from bounding_box_utils.bounding_box_utils import iou, convert_coordinates -from ssd_encoder_decoder.matching_utils import match_bipartite_greedy, match_multi - - -class SSDInputEncoder: - ''' - Transforms ground truth labels for object detection in images - (2D bounding box coordinates and class labels) to the format required for - training an SSD model. - - In the process of encoding the ground truth labels, a template of anchor boxes - is being built, which are subsequently matched to the ground truth boxes - via an intersection-over-union threshold criterion. - ''' - - def __init__(self, - img_height, - img_width, - n_classes, - predictor_sizes, - min_scale=0.1, - max_scale=0.9, - scales=None, - aspect_ratios_global=[0.5, 1.0, 2.0], - aspect_ratios_per_layer=None, - two_boxes_for_ar1=True, - steps=None, - offsets=None, - clip_boxes=False, - variances=[0.1, 0.1, 0.2, 0.2], - matching_type='multi', - pos_iou_threshold=0.5, - neg_iou_limit=0.3, - border_pixels='half', - coords='centroids', - normalize_coords=True, - background_id=0): - ''' - Arguments: - img_height (int): The height of the input images. - img_width (int): The width of the input images. - n_classes (int): The number of positive classes, e.g. 20 for Pascal VOC, 80 for MS COCO. - predictor_sizes (list): A list of int-tuples of the format `(height, width)` - containing the output heights and widths of the convolutional predictor layers. - min_scale (float, optional): The smallest scaling factor for the size of the anchor boxes as a fraction - of the shorter side of the input images. Note that you should set the scaling factors - such that the resulting anchor box sizes correspond to the sizes of the objects you are trying - to detect. Must be >0. - max_scale (float, optional): The largest scaling factor for the size of the anchor boxes as a fraction - of the shorter side of the input images. All scaling factors between the smallest and the - largest will be linearly interpolated. Note that the second to last of the linearly interpolated - scaling factors will actually be the scaling factor for the last predictor layer, while the last - scaling factor is used for the second box for aspect ratio 1 in the last predictor layer - if `two_boxes_for_ar1` is `True`. Note that you should set the scaling factors - such that the resulting anchor box sizes correspond to the sizes of the objects you are trying - to detect. Must be greater than or equal to `min_scale`. - scales (list, optional): A list of floats >0 containing scaling factors per convolutional predictor layer. - This list must be one element longer than the number of predictor layers. The first `k` elements are the - scaling factors for the `k` predictor layers, while the last element is used for the second box - for aspect ratio 1 in the last predictor layer if `two_boxes_for_ar1` is `True`. This additional - last scaling factor must be passed either way, even if it is not being used. If a list is passed, - this argument overrides `min_scale` and `max_scale`. All scaling factors must be greater than zero. - Note that you should set the scaling factors such that the resulting anchor box sizes correspond to - the sizes of the objects you are trying to detect. - aspect_ratios_global (list, optional): The list of aspect ratios for which anchor boxes are to be - generated. This list is valid for all prediction layers. Note that you should set the aspect ratios such - that the resulting anchor box shapes roughly correspond to the shapes of the objects you are trying to detect. - aspect_ratios_per_layer (list, optional): A list containing one aspect ratio list for each prediction layer. - If a list is passed, it overrides `aspect_ratios_global`. Note that you should set the aspect ratios such - that the resulting anchor box shapes very roughly correspond to the shapes of the objects you are trying to detect. - two_boxes_for_ar1 (bool, optional): Only relevant for aspect ratios lists that contain 1. Will be ignored otherwise. - If `True`, two anchor boxes will be generated for aspect ratio 1. The first will be generated - using the scaling factor for the respective layer, the second one will be generated using - geometric mean of said scaling factor and next bigger scaling factor. - steps (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be - either ints/floats or tuples of two ints/floats. These numbers represent for each predictor layer how many - pixels apart the anchor box center points should be vertically and horizontally along the spatial grid over - the image. If the list contains ints/floats, then that value will be used for both spatial dimensions. - If the list contains tuples of two ints/floats, then they represent `(step_height, step_width)`. - If no steps are provided, then they will be computed such that the anchor box center points will form an - equidistant grid within the image dimensions. - offsets (list, optional): `None` or a list with as many elements as there are predictor layers. The elements can be - either floats or tuples of two floats. These numbers represent for each predictor layer how many - pixels from the top and left boarders of the image the top-most and left-most anchor box center points should be - as a fraction of `steps`. The last bit is important: The offsets are not absolute pixel values, but fractions - of the step size specified in the `steps` argument. If the list contains floats, then that value will - be used for both spatial dimensions. If the list contains tuples of two floats, then they represent - `(vertical_offset, horizontal_offset)`. If no offsets are provided, then they will default to 0.5 of the step size. - clip_boxes (bool, optional): If `True`, limits the anchor box coordinates to stay within image boundaries. - variances (list, optional): A list of 4 floats >0. The anchor box offset for each coordinate will be divided by - its respective variance value. - matching_type (str, optional): Can be either 'multi' or 'bipartite'. In 'bipartite' mode, each ground truth box will - be matched only to the one anchor box with the highest IoU overlap. In 'multi' mode, in addition to the aforementioned - bipartite matching, all anchor boxes with an IoU overlap greater than or equal to the `pos_iou_threshold` will be - matched to a given ground truth box. - pos_iou_threshold (float, optional): The intersection-over-union similarity threshold that must be - met in order to match a given ground truth box to a given anchor box. - neg_iou_limit (float, optional): The maximum allowed intersection-over-union similarity of an - anchor box with any ground truth box to be labeled a negative (i.e. background) box. If an - anchor box is neither a positive, nor a negative box, it will be ignored during training. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - coords (str, optional): The box coordinate format to be used internally by the model (i.e. this is not the input format - of the ground truth labels). Can be either 'centroids' for the format `(cx, cy, w, h)` (box center coordinates, width, - and height), 'minmax' for the format `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. - normalize_coords (bool, optional): If `True`, the encoder uses relative instead of absolute coordinates. - This means instead of using absolute tartget coordinates, the encoder will scale all coordinates to be within [0,1]. - This way learning becomes independent of the input image size. - background_id (int, optional): Determines which class ID is for the background class. - ''' - predictor_sizes = np.array(predictor_sizes) - if predictor_sizes.ndim == 1: - predictor_sizes = np.expand_dims(predictor_sizes, axis=0) - - ################################################################################## - # Handle exceptions. - ################################################################################## - - if (min_scale is None or max_scale is None) and scales is None: - raise ValueError( - "Either `min_scale` and `max_scale` or `scales` need to be specified.") - - if scales: - # Must be two nested `if` statements since `list` and `bool` cannot be combined by `&` - if (len(scales) != predictor_sizes.shape[0] + 1): - raise ValueError("It must be either scales is None or len(scales) == len(predictor_sizes)+1, but len(scales) == {} and len(predictor_sizes)+1 == {}".format( - len(scales), len(predictor_sizes)+1)) - scales = np.array(scales) - if np.any(scales <= 0): - raise ValueError( - "All values in `scales` must be greater than 0, but the passed list of scales is {}".format(scales)) - else: # If no list of scales was passed, we need to make sure that `min_scale` and `max_scale` are valid values. - if not 0 < min_scale <= max_scale: - raise ValueError("It must be 0 < min_scale <= max_scale, but it is min_scale = {} and max_scale = {}".format( - min_scale, max_scale)) - - if not (aspect_ratios_per_layer is None): - # Must be two nested `if` statements since `list` and `bool` cannot be combined by `&` - if (len(aspect_ratios_per_layer) != predictor_sizes.shape[0]): - raise ValueError("It must be either aspect_ratios_per_layer is None or len(aspect_ratios_per_layer) == len(predictor_sizes), but len(aspect_ratios_per_layer) == {} and len(predictor_sizes) == {}".format( - len(aspect_ratios_per_layer), len(predictor_sizes))) - for aspect_ratios in aspect_ratios_per_layer: - if np.any(np.array(aspect_ratios) <= 0): - raise ValueError( - "All aspect ratios must be greater than zero.") - else: - if (aspect_ratios_global is None): - raise ValueError( - "At least one of `aspect_ratios_global` and `aspect_ratios_per_layer` must not be `None`.") - if np.any(np.array(aspect_ratios_global) <= 0): - raise ValueError( - "All aspect ratios must be greater than zero.") - - if len(variances) != 4: - raise ValueError( - "4 variance values must be pased, but {} values were received.".format(len(variances))) - variances = np.array(variances) - if np.any(variances <= 0): - raise ValueError( - "All variances must be >0, but the variances given are {}".format(variances)) - - if not (coords == 'minmax' or coords == 'centroids' or coords == 'corners'): - raise ValueError( - "Unexpected value for `coords`. Supported values are 'minmax', 'corners' and 'centroids'.") - - if (not (steps is None)) and (len(steps) != predictor_sizes.shape[0]): - raise ValueError( - "You must provide at least one step value per predictor layer.") - - if (not (offsets is None)) and (len(offsets) != predictor_sizes.shape[0]): - raise ValueError( - "You must provide at least one offset value per predictor layer.") - - ################################################################################## - # Set or compute members. - ################################################################################## - - self.img_height = img_height - self.img_width = img_width - self.n_classes = n_classes + 1 # + 1 for the background class - self.predictor_sizes = predictor_sizes - self.min_scale = min_scale - self.max_scale = max_scale - # If `scales` is None, compute the scaling factors by linearly interpolating between - # `min_scale` and `max_scale`. If an explicit list of `scales` is given, however, - # then it takes precedent over `min_scale` and `max_scale`. - if (scales is None): - self.scales = np.linspace( - self.min_scale, self.max_scale, len(self.predictor_sizes)+1) - else: - # If a list of scales is given explicitly, we'll use that instead of computing it from `min_scale` and `max_scale`. - self.scales = scales - # If `aspect_ratios_per_layer` is None, then we use the same list of aspect ratios - # `aspect_ratios_global` for all predictor layers. If `aspect_ratios_per_layer` is given, - # however, then it takes precedent over `aspect_ratios_global`. - if (aspect_ratios_per_layer is None): - self.aspect_ratios = [aspect_ratios_global] * \ - predictor_sizes.shape[0] - else: - # If aspect ratios are given per layer, we'll use those. - self.aspect_ratios = aspect_ratios_per_layer - self.two_boxes_for_ar1 = two_boxes_for_ar1 - if not (steps is None): - self.steps = steps - else: - self.steps = [None] * predictor_sizes.shape[0] - if not (offsets is None): - self.offsets = offsets - else: - self.offsets = [None] * predictor_sizes.shape[0] - self.clip_boxes = clip_boxes - self.variances = variances - self.matching_type = matching_type - self.pos_iou_threshold = pos_iou_threshold - self.neg_iou_limit = neg_iou_limit - self.border_pixels = border_pixels - self.coords = coords - self.normalize_coords = normalize_coords - self.background_id = background_id - - # Compute the number of boxes per spatial location for each predictor layer. - # For example, if a predictor layer has three different aspect ratios, [1.0, 0.5, 2.0], and is - # supposed to predict two boxes of slightly different size for aspect ratio 1.0, then that predictor - # layer predicts a total of four boxes at every spatial location across the feature map. - if not (aspect_ratios_per_layer is None): - self.n_boxes = [] - for aspect_ratios in aspect_ratios_per_layer: - if (1 in aspect_ratios) & two_boxes_for_ar1: - self.n_boxes.append(len(aspect_ratios) + 1) - else: - self.n_boxes.append(len(aspect_ratios)) - else: - if (1 in aspect_ratios_global) & two_boxes_for_ar1: - self.n_boxes = len(aspect_ratios_global) + 1 - else: - self.n_boxes = len(aspect_ratios_global) - - ################################################################################## - # Compute the anchor boxes for each predictor layer. - ################################################################################## - - # Compute the anchor boxes for each predictor layer. We only have to do this once - # since the anchor boxes depend only on the model configuration, not on the input data. - # For each predictor layer (i.e. for each scaling factor) the tensors for that layer's - # anchor boxes will have the shape `(feature_map_height, feature_map_width, n_boxes, 4)`. - - # This will store the anchor boxes for each predicotr layer. - self.boxes_list = [] - - # The following lists just store diagnostic information. Sometimes it's handy to have the - # boxes' center points, heights, widths, etc. in a list. - self.wh_list_diag = [] # Box widths and heights for each predictor layer - # Horizontal and vertical distances between any two boxes for each predictor layer - self.steps_diag = [] - self.offsets_diag = [] # Offsets for each predictor layer - # Anchor box center points as `(cy, cx)` for each predictor layer - self.centers_diag = [] - - # Iterate over all predictor layers and compute the anchor boxes for each one. - for i in range(len(self.predictor_sizes)): - boxes, center, wh, step, offset = self.generate_anchor_boxes_for_layer(feature_map_size=self.predictor_sizes[i], - aspect_ratios=self.aspect_ratios[i], - this_scale=self.scales[i], - next_scale=self.scales[i+1], - this_steps=self.steps[i], - this_offsets=self.offsets[i], - diagnostics=True) - self.boxes_list.append(boxes) - self.wh_list_diag.append(wh) - self.steps_diag.append(step) - self.offsets_diag.append(offset) - self.centers_diag.append(center) - - def __call__(self, ground_truth_labels, diagnostics=False): - ''' - Converts ground truth bounding box data into a suitable format to train an SSD model. - - Arguments: - ground_truth_labels (list): A python list of length `batch_size` that contains one 2D Numpy array - for each batch image. Each such array has `k` rows for the `k` ground truth bounding boxes belonging - to the respective image, and the data for each ground truth bounding box has the format - `(class_id, xmin, ymin, xmax, ymax)` (i.e. the 'corners' coordinate format), and `class_id` must be - an integer greater than 0 for all boxes as class ID 0 is reserved for the background class. - diagnostics (bool, optional): If `True`, not only the encoded ground truth tensor will be returned, - but also a copy of it with anchor box coordinates in place of the ground truth coordinates. - This can be very useful if you want to visualize which anchor boxes got matched to which ground truth - boxes. - - Returns: - `y_encoded`, a 3D numpy array of shape `(batch_size, #boxes, #classes + 4 + 4 + 4)` that serves as the - ground truth label tensor for training, where `#boxes` is the total number of boxes predicted by the - model per image, and the classes are one-hot-encoded. The four elements after the class vecotrs in - the last axis are the box coordinates, the next four elements after that are just dummy elements, and - the last four elements are the variances. - ''' - - # Mapping to define which indices represent which coordinates in the ground truth. - class_id = 0 - xmin = 1 - ymin = 2 - xmax = 3 - ymax = 4 - - batch_size = len(ground_truth_labels) - - ################################################################################## - # Generate the template for y_encoded. - ################################################################################## - - y_encoded = self.generate_encoding_template( - batch_size=batch_size, diagnostics=False) - - ################################################################################## - # Match ground truth boxes to anchor boxes. - ################################################################################## - - # Match the ground truth boxes to the anchor boxes. Every anchor box that does not have - # a ground truth match and for which the maximal IoU overlap with any ground truth box is less - # than or equal to `neg_iou_limit` will be a negative (background) box. - - # All boxes are background boxes by default. - y_encoded[:, :, self.background_id] = 1 - # The total number of boxes that the model predicts per batch item - n_boxes = y_encoded.shape[1] - # An identity matrix that we'll use as one-hot class vectors - class_vectors = np.eye(self.n_classes) - - for i in range(batch_size): # For each batch item... - - if ground_truth_labels[i].size == 0: - # If there is no ground truth for this batch item, there is nothing to match. - continue - labels = ground_truth_labels[i].astype( - np.float) # The labels for this batch item - - # Check for degenerate ground truth bounding boxes before attempting any computations. - if np.any(labels[:, [xmax]] - labels[:, [xmin]] <= 0) or np.any(labels[:, [ymax]] - labels[:, [ymin]] <= 0): - raise DegenerateBoxError("SSDInputEncoder detected degenerate ground truth bounding boxes for batch item {} with bounding boxes {}, ".format(i, labels) + - "i.e. bounding boxes where xmax <= xmin and/or ymax <= ymin. Degenerate ground truth " + - "bounding boxes will lead to NaN errors during the training.") - - # Maybe normalize the box coordinates. - if self.normalize_coords: - # Normalize ymin and ymax relative to the image height - labels[:, [ymin, ymax]] /= self.img_height - # Normalize xmin and xmax relative to the image width - labels[:, [xmin, xmax]] /= self.img_width - - # Maybe convert the box coordinate format. - if self.coords == 'centroids': - labels = convert_coordinates( - labels, start_index=xmin, conversion='corners2centroids', border_pixels=self.border_pixels) - elif self.coords == 'minmax': - labels = convert_coordinates( - labels, start_index=xmin, conversion='corners2minmax') - - # The one-hot class IDs for the ground truth boxes of this batch item - classes_one_hot = class_vectors[labels[:, class_id].astype(np.int)] - # The one-hot version of the labels for this batch item - labels_one_hot = np.concatenate( - [classes_one_hot, labels[:, [xmin, ymin, xmax, ymax]]], axis=-1) - - # Compute the IoU similarities between all anchor boxes and all ground truth boxes for this batch item. - # This is a matrix of shape `(num_ground_truth_boxes, num_anchor_boxes)`. - similarities = iou(labels[:, [xmin, ymin, xmax, ymax]], y_encoded[i, :, -12:-8], - coords=self.coords, mode='outer_product', border_pixels=self.border_pixels) - - # First: Do bipartite matching, i.e. match each ground truth box to the one anchor box with the highest IoU. - # This ensures that each ground truth box will have at least one good match. - - # For each ground truth box, get the anchor box to match with it. - bipartite_matches = match_bipartite_greedy( - weight_matrix=similarities) - - # Write the ground truth data to the matched anchor boxes. - y_encoded[i, bipartite_matches, :-8] = labels_one_hot - - # Set the columns of the matched anchor boxes to zero to indicate that they were matched. - similarities[:, bipartite_matches] = 0 - - # Second: Maybe do 'multi' matching, where each remaining anchor box will be matched to its most similar - # ground truth box with an IoU of at least `pos_iou_threshold`, or not matched if there is no - # such ground truth box. - - if self.matching_type == 'multi': - - # Get all matches that satisfy the IoU threshold. - matches = match_multi( - weight_matrix=similarities, threshold=self.pos_iou_threshold) - - # Write the ground truth data to the matched anchor boxes. - y_encoded[i, matches[1], :-8] = labels_one_hot[matches[0]] - - # Set the columns of the matched anchor boxes to zero to indicate that they were matched. - similarities[:, matches[1]] = 0 - - # Third: Now after the matching is done, all negative (background) anchor boxes that have - # an IoU of `neg_iou_limit` or more with any ground truth box will be set to netral, - # i.e. they will no longer be background boxes. These anchors are "too close" to a - # ground truth box to be valid background boxes. - - max_background_similarities = np.amax(similarities, axis=0) - neutral_boxes = np.nonzero( - max_background_similarities >= self.neg_iou_limit)[0] - y_encoded[i, neutral_boxes, self.background_id] = 0 - - ################################################################################## - # Convert box coordinates to anchor box offsets. - ################################################################################## - - if self.coords == 'centroids': - # cx(gt) - cx(anchor), cy(gt) - cy(anchor) - y_encoded[:, :, [-12, -11]] -= y_encoded[:, :, [-8, -7]] - # (cx(gt) - cx(anchor)) / w(anchor) / cx_variance, (cy(gt) - cy(anchor)) / h(anchor) / cy_variance - y_encoded[:, :, [-12, -11]] /= y_encoded[:, - :, [-6, -5]] * y_encoded[:, :, [-4, -3]] - # w(gt) / w(anchor), h(gt) / h(anchor) - y_encoded[:, :, [-10, -9]] /= y_encoded[:, :, [-6, -5]] - # ln(w(gt) / w(anchor)) / w_variance, ln(h(gt) / h(anchor)) / h_variance (ln == natural logarithm) - y_encoded[:, :, [-10, -9] - ] = np.log(y_encoded[:, :, [-10, -9]]) / y_encoded[:, :, [-2, -1]] - elif self.coords == 'corners': - # (gt - anchor) for all four coordinates - y_encoded[:, :, -12:-8] -= y_encoded[:, :, -8:-4] - # (xmin(gt) - xmin(anchor)) / w(anchor), (xmax(gt) - xmax(anchor)) / w(anchor) - y_encoded[:, :, [-12, -10]] /= np.expand_dims( - y_encoded[:, :, -6] - y_encoded[:, :, -8], axis=-1) - # (ymin(gt) - ymin(anchor)) / h(anchor), (ymax(gt) - ymax(anchor)) / h(anchor) - y_encoded[:, :, [-11, -9]] /= np.expand_dims( - y_encoded[:, :, -5] - y_encoded[:, :, -7], axis=-1) - # (gt - anchor) / size(anchor) / variance for all four coordinates, where 'size' refers to w and h respectively - y_encoded[:, :, -12:-8] /= y_encoded[:, :, -4:] - elif self.coords == 'minmax': - # (gt - anchor) for all four coordinates - y_encoded[:, :, -12:-8] -= y_encoded[:, :, -8:-4] - # (xmin(gt) - xmin(anchor)) / w(anchor), (xmax(gt) - xmax(anchor)) / w(anchor) - y_encoded[:, :, [-12, -11]] /= np.expand_dims( - y_encoded[:, :, -7] - y_encoded[:, :, -8], axis=-1) - # (ymin(gt) - ymin(anchor)) / h(anchor), (ymax(gt) - ymax(anchor)) / h(anchor) - y_encoded[:, :, [-10, -9]] /= np.expand_dims( - y_encoded[:, :, -5] - y_encoded[:, :, -6], axis=-1) - # (gt - anchor) / size(anchor) / variance for all four coordinates, where 'size' refers to w and h respectively - y_encoded[:, :, -12:-8] /= y_encoded[:, :, -4:] - - if diagnostics: - # Here we'll save the matched anchor boxes (i.e. anchor boxes that were matched to a ground truth box, but keeping the anchor box coordinates). - y_matched_anchors = np.copy(y_encoded) - # Keeping the anchor box coordinates means setting the offsets to zero. - y_matched_anchors[:, :, -12:-8] = 0 - return y_encoded, y_matched_anchors - else: - return y_encoded - - def generate_anchor_boxes_for_layer(self, - feature_map_size, - aspect_ratios, - this_scale, - next_scale, - this_steps=None, - this_offsets=None, - diagnostics=False): - ''' - Computes an array of the spatial positions and sizes of the anchor boxes for one predictor layer - of size `feature_map_size == [feature_map_height, feature_map_width]`. - - Arguments: - feature_map_size (tuple): A list or tuple `[feature_map_height, feature_map_width]` with the spatial - dimensions of the feature map for which to generate the anchor boxes. - aspect_ratios (list): A list of floats, the aspect ratios for which anchor boxes are to be generated. - All list elements must be unique. - this_scale (float): A float in [0, 1], the scaling factor for the size of the generate anchor boxes - as a fraction of the shorter side of the input image. - next_scale (float): A float in [0, 1], the next larger scaling factor. Only relevant if - `self.two_boxes_for_ar1 == True`. - diagnostics (bool, optional): If true, the following additional outputs will be returned: - 1) A list of the center point `x` and `y` coordinates for each spatial location. - 2) A list containing `(width, height)` for each box aspect ratio. - 3) A tuple containing `(step_height, step_width)` - 4) A tuple containing `(offset_height, offset_width)` - This information can be useful to understand in just a few numbers what the generated grid of - anchor boxes actually looks like, i.e. how large the different boxes are and how dense - their spatial distribution is, in order to determine whether the box grid covers the input images - appropriately and whether the box sizes are appropriate to fit the sizes of the objects - to be detected. - - Returns: - A 4D Numpy tensor of shape `(feature_map_height, feature_map_width, n_boxes_per_cell, 4)` where the - last dimension contains `(xmin, xmax, ymin, ymax)` for each anchor box in each cell of the feature map. - ''' - # Compute box width and height for each aspect ratio. - - # The shorter side of the image will be used to compute `w` and `h` using `scale` and `aspect_ratios`. - size = min(self.img_height, self.img_width) - # Compute the box widths and and heights for all aspect ratios - wh_list = [] - for ar in aspect_ratios: - if (ar == 1): - # Compute the regular anchor box for aspect ratio 1. - box_height = box_width = this_scale * size - wh_list.append((box_width, box_height)) - if self.two_boxes_for_ar1: - # Compute one slightly larger version using the geometric mean of this scale value and the next. - box_height = box_width = np.sqrt( - this_scale * next_scale) * size - wh_list.append((box_width, box_height)) - else: - box_width = this_scale * size * np.sqrt(ar) - box_height = this_scale * size / np.sqrt(ar) - wh_list.append((box_width, box_height)) - wh_list = np.array(wh_list) - n_boxes = len(wh_list) - - # Compute the grid of box center points. They are identical for all aspect ratios. - - # Compute the step sizes, i.e. how far apart the anchor box center points will be vertically and horizontally. - if (this_steps is None): - step_height = self.img_height / feature_map_size[0] - step_width = self.img_width / feature_map_size[1] - else: - if isinstance(this_steps, (list, tuple)) and (len(this_steps) == 2): - step_height = this_steps[0] - step_width = this_steps[1] - elif isinstance(this_steps, (int, float)): - step_height = this_steps - step_width = this_steps - # Compute the offsets, i.e. at what pixel values the first anchor box center point will be from the top and from the left of the image. - if (this_offsets is None): - offset_height = 0.5 - offset_width = 0.5 - else: - if isinstance(this_offsets, (list, tuple)) and (len(this_offsets) == 2): - offset_height = this_offsets[0] - offset_width = this_offsets[1] - elif isinstance(this_offsets, (int, float)): - offset_height = this_offsets - offset_width = this_offsets - # Now that we have the offsets and step sizes, compute the grid of anchor box center points. - cy = np.linspace(offset_height * step_height, (offset_height + - feature_map_size[0] - 1) * step_height, feature_map_size[0]) - cx = np.linspace(offset_width * step_width, (offset_width + - feature_map_size[1] - 1) * step_width, feature_map_size[1]) - cx_grid, cy_grid = np.meshgrid(cx, cy) - # This is necessary for np.tile() to do what we want further down - cx_grid = np.expand_dims(cx_grid, -1) - # This is necessary for np.tile() to do what we want further down - cy_grid = np.expand_dims(cy_grid, -1) - - # Create a 4D tensor template of shape `(feature_map_height, feature_map_width, n_boxes, 4)` - # where the last dimension will contain `(cx, cy, w, h)` - boxes_tensor = np.zeros( - (feature_map_size[0], feature_map_size[1], n_boxes, 4)) - - boxes_tensor[:, :, :, 0] = np.tile(cx_grid, (1, 1, n_boxes)) # Set cx - boxes_tensor[:, :, :, 1] = np.tile(cy_grid, (1, 1, n_boxes)) # Set cy - boxes_tensor[:, :, :, 2] = wh_list[:, 0] # Set w - boxes_tensor[:, :, :, 3] = wh_list[:, 1] # Set h - - # Convert `(cx, cy, w, h)` to `(xmin, ymin, xmax, ymax)` - boxes_tensor = convert_coordinates( - boxes_tensor, start_index=0, conversion='centroids2corners') - - # If `clip_boxes` is enabled, clip the coordinates to lie within the image boundaries - if self.clip_boxes: - x_coords = boxes_tensor[:, :, :, [0, 2]] - x_coords[x_coords >= self.img_width] = self.img_width - 1 - x_coords[x_coords < 0] = 0 - boxes_tensor[:, :, :, [0, 2]] = x_coords - y_coords = boxes_tensor[:, :, :, [1, 3]] - y_coords[y_coords >= self.img_height] = self.img_height - 1 - y_coords[y_coords < 0] = 0 - boxes_tensor[:, :, :, [1, 3]] = y_coords - - # `normalize_coords` is enabled, normalize the coordinates to be within [0,1] - if self.normalize_coords: - boxes_tensor[:, :, :, [0, 2]] /= self.img_width - boxes_tensor[:, :, :, [1, 3]] /= self.img_height - - # TODO: Implement box limiting directly for `(cx, cy, w, h)` so that we don't have to unnecessarily convert back and forth. - if self.coords == 'centroids': - # Convert `(xmin, ymin, xmax, ymax)` back to `(cx, cy, w, h)`. - boxes_tensor = convert_coordinates( - boxes_tensor, start_index=0, conversion='corners2centroids', border_pixels='half') - elif self.coords == 'minmax': - # Convert `(xmin, ymin, xmax, ymax)` to `(xmin, xmax, ymin, ymax). - boxes_tensor = convert_coordinates( - boxes_tensor, start_index=0, conversion='corners2minmax', border_pixels='half') - - if diagnostics: - return boxes_tensor, (cy, cx), wh_list, (step_height, step_width), (offset_height, offset_width) - else: - return boxes_tensor - - def generate_encoding_template(self, batch_size, diagnostics=False): - ''' - Produces an encoding template for the ground truth label tensor for a given batch. - - Note that all tensor creation, reshaping and concatenation operations performed in this function - and the sub-functions it calls are identical to those performed inside the SSD model. This, of course, - must be the case in order to preserve the spatial meaning of each box prediction, but it's useful to make - yourself aware of this fact and why it is necessary. - - In other words, the boxes in `y_encoded` must have a specific order in order correspond to the right spatial - positions and scales of the boxes predicted by the model. The sequence of operations here ensures that `y_encoded` - has this specific form. - - Arguments: - batch_size (int): The batch size. - diagnostics (bool, optional): See the documnentation for `generate_anchor_boxes()`. The diagnostic output - here is similar, just for all predictor conv layers. - - Returns: - A Numpy array of shape `(batch_size, #boxes, #classes + 12)`, the template into which to encode - the ground truth labels for training. The last axis has length `#classes + 12` because the model - output contains not only the 4 predicted box coordinate offsets, but also the 4 coordinates for - the anchor boxes and the 4 variance values. - ''' - # Tile the anchor boxes for each predictor layer across all batch items. - boxes_batch = [] - for boxes in self.boxes_list: - # Prepend one dimension to `self.boxes_list` to account for the batch size and tile it along. - # The result will be a 5D tensor of shape `(batch_size, feature_map_height, feature_map_width, n_boxes, 4)` - boxes = np.expand_dims(boxes, axis=0) - boxes = np.tile(boxes, (batch_size, 1, 1, 1, 1)) - - # Now reshape the 5D tensor above into a 3D tensor of shape - # `(batch, feature_map_height * feature_map_width * n_boxes, 4)`. The resulting - # order of the tensor content will be identical to the order obtained from the reshaping operation - # in our Keras model (we're using the Tensorflow backend, and tf.reshape() and np.reshape() - # use the same default index order, which is C-like index ordering) - boxes = np.reshape(boxes, (batch_size, -1, 4)) - boxes_batch.append(boxes) - - # Concatenate the anchor tensors from the individual layers to one. - boxes_tensor = np.concatenate(boxes_batch, axis=1) - - # 3: Create a template tensor to hold the one-hot class encodings of shape `(batch, #boxes, #classes)` - # It will contain all zeros for now, the classes will be set in the matching process that follows - classes_tensor = np.zeros( - (batch_size, boxes_tensor.shape[1], self.n_classes)) - - # 4: Create a tensor to contain the variances. This tensor has the same shape as `boxes_tensor` and simply - # contains the same 4 variance values for every position in the last axis. - variances_tensor = np.zeros_like(boxes_tensor) - variances_tensor += self.variances # Long live broadcasting - - # 4: Concatenate the classes, boxes and variances tensors to get our final template for y_encoded. We also need - # another tensor of the shape of `boxes_tensor` as a space filler so that `y_encoding_template` has the same - # shape as the SSD model output tensor. The content of this tensor is irrelevant, we'll just use - # `boxes_tensor` a second time. - y_encoding_template = np.concatenate( - (classes_tensor, boxes_tensor, boxes_tensor, variances_tensor), axis=2) - - if diagnostics: - return y_encoding_template, self.centers_diag, self.wh_list_diag, self.steps_diag, self.offsets_diag - else: - return y_encoding_template - - -class DegenerateBoxError(Exception): - ''' - An exception class to be raised if degenerate boxes are being detected. - ''' - pass -''' -Includes: -* Functions to decode and filter raw SSD model output. These are only needed if the - SSD model does not have a `DecodeDetections` layer. -* Functions to perform greedy non-maximum suppression - -Copyright (C) 2018 Pierluigi Ferrari - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. -''' - -from __future__ import division -import numpy as np - -from bounding_box_utils.bounding_box_utils import iou, convert_coordinates - - -def greedy_nms(y_pred_decoded, iou_threshold=0.45, coords='corners', border_pixels='half'): - ''' - Perform greedy non-maximum suppression on the input boxes. - - Greedy NMS works by selecting the box with the highest score and - removing all boxes around it that are too close to it measured by IoU-similarity. - Out of the boxes that are left over, once again the one with the highest - score is selected and so on, until no boxes with too much overlap are left. - - Arguments: - y_pred_decoded (list): A batch of decoded predictions. For a given batch size `n` this - is a list of length `n` where each list element is a 2D Numpy array. - For a batch item with `k` predicted boxes this 2D Numpy array has - shape `(k, 6)`, where each row contains the coordinates of the respective - box in the format `[class_id, score, xmin, xmax, ymin, ymax]`. - Technically, the number of columns doesn't have to be 6, it can be - arbitrary as long as the first four elements of each row are - `xmin`, `xmax`, `ymin`, `ymax` (in this order) and the last element - is the score assigned to the prediction. Note that this function is - agnostic to the scale of the score or what it represents. - iou_threshold (float, optional): All boxes with a Jaccard similarity of - greater than `iou_threshold` with a locally maximal box will be removed - from the set of predictions, where 'maximal' refers to the box score. - coords (str, optional): The coordinate format of `y_pred_decoded`. - Can be one of the formats supported by `iou()`. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - - Returns: - The predictions after removing non-maxima. The format is the same as the input format. - ''' - y_pred_decoded_nms = [] - for batch_item in y_pred_decoded: # For the labels of each batch item... - boxes_left = np.copy(batch_item) - maxima = [] # This is where we store the boxes that make it through the non-maximum suppression - # While there are still boxes left to compare... - while boxes_left.shape[0] > 0: - # ...get the index of the next box with the highest confidence... - maximum_index = np.argmax(boxes_left[:, 1]) - # ...copy that box and... - maximum_box = np.copy(boxes_left[maximum_index]) - # ...append it to `maxima` because we'll definitely keep it - maxima.append(maximum_box) - # Now remove the maximum box from `boxes_left` - boxes_left = np.delete(boxes_left, maximum_index, axis=0) - if boxes_left.shape[0] == 0: - # If there are no boxes left after this step, break. Otherwise... - break - # ...compare (IoU) the other left over boxes to the maximum box... - similarities = iou(boxes_left[:, 2:], maximum_box[2:], coords=coords, - mode='element-wise', border_pixels=border_pixels) - # ...so that we can remove the ones that overlap too much with the maximum box - boxes_left = boxes_left[similarities <= iou_threshold] - y_pred_decoded_nms.append(np.array(maxima)) - - return y_pred_decoded_nms - - -def _greedy_nms(predictions, iou_threshold=0.45, coords='corners', border_pixels='half'): - ''' - The same greedy non-maximum suppression algorithm as above, but slightly modified for use as an internal - function for per-class NMS in `decode_detections()`. - ''' - boxes_left = np.copy(predictions) - maxima = [] # This is where we store the boxes that make it through the non-maximum suppression - # While there are still boxes left to compare... - while boxes_left.shape[0] > 0: - # ...get the index of the next box with the highest confidence... - maximum_index = np.argmax(boxes_left[:, 0]) - # ...copy that box and... - maximum_box = np.copy(boxes_left[maximum_index]) - # ...append it to `maxima` because we'll definitely keep it - maxima.append(maximum_box) - # Now remove the maximum box from `boxes_left` - boxes_left = np.delete(boxes_left, maximum_index, axis=0) - if boxes_left.shape[0] == 0: - # If there are no boxes left after this step, break. Otherwise... - break - # ...compare (IoU) the other left over boxes to the maximum box... - similarities = iou(boxes_left[:, 1:], maximum_box[1:], coords=coords, - mode='element-wise', border_pixels=border_pixels) - # ...so that we can remove the ones that overlap too much with the maximum box - boxes_left = boxes_left[similarities <= iou_threshold] - return np.array(maxima) - - -def _greedy_nms2(predictions, iou_threshold=0.45, coords='corners', border_pixels='half'): - ''' - The same greedy non-maximum suppression algorithm as above, but slightly modified for use as an internal - function in `decode_detections_fast()`. - ''' - boxes_left = np.copy(predictions) - maxima = [] # This is where we store the boxes that make it through the non-maximum suppression - # While there are still boxes left to compare... - while boxes_left.shape[0] > 0: - # ...get the index of the next box with the highest confidence... - maximum_index = np.argmax(boxes_left[:, 1]) - # ...copy that box and... - maximum_box = np.copy(boxes_left[maximum_index]) - # ...append it to `maxima` because we'll definitely keep it - maxima.append(maximum_box) - # Now remove the maximum box from `boxes_left` - boxes_left = np.delete(boxes_left, maximum_index, axis=0) - if boxes_left.shape[0] == 0: - # If there are no boxes left after this step, break. Otherwise... - break - # ...compare (IoU) the other left over boxes to the maximum box... - similarities = iou(boxes_left[:, 2:], maximum_box[2:], coords=coords, - mode='element-wise', border_pixels=border_pixels) - # ...so that we can remove the ones that overlap too much with the maximum box - boxes_left = boxes_left[similarities <= iou_threshold] - return np.array(maxima) - - -def decode_detections(y_pred, - confidence_thresh=0.01, - iou_threshold=0.45, - top_k=200, - input_coords='centroids', - normalize_coords=True, - img_height=None, - img_width=None, - border_pixels='half'): - ''' - Convert model prediction output back to a format that contains only the positive box predictions - (i.e. the same format that `SSDInputEncoder` takes as input). - - After the decoding, two stages of prediction filtering are performed for each class individually: - First confidence thresholding, then greedy non-maximum suppression. The filtering results for all - classes are concatenated and the `top_k` overall highest confidence results constitute the final - predictions for a given batch item. This procedure follows the original Caffe implementation. - For a slightly different and more efficient alternative to decode raw model output that performs - non-maximum suppresion globally instead of per class, see `decode_detections_fast()` below. - - Arguments: - y_pred (array): The prediction output of the SSD model, expected to be a Numpy array - of shape `(batch_size, #boxes, #classes + 4 + 4 + 4)`, where `#boxes` is the total number of - boxes predicted by the model per image and the last axis contains - `[one-hot vector for the classes, 4 predicted coordinate offsets, 4 anchor box coordinates, 4 variances]`. - confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific - positive class in order to be considered for the non-maximum suppression stage for the respective class. - A lower value will result in a larger part of the selection process being done by the non-maximum suppression - stage, while a larger value will result in a larger part of the selection process happening in the confidence - thresholding stage. - iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold` - with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers - to the box score. - top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the - non-maximum suppression stage. - input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids' - for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format - `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. - normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1]) - and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs - relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`. - Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect - coordinates. Requires `img_height` and `img_width` if set to `True`. - img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`. - img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - - Returns: - A python list of length `batch_size` where each list element represents the predicted boxes - for one image and contains a Numpy array of shape `(boxes, 6)` where each row is a box prediction for - a non-background class for the respective image in the format `[class_id, confidence, xmin, ymin, xmax, ymax]`. - ''' - if normalize_coords and ((img_height is None) or (img_width is None)): - raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width)) - - # 1: Convert the box coordinates from the predicted anchor box offsets to predicted absolute coordinates - - # Slice out the classes and the four offsets, throw away the anchor coordinates and variances, resulting in a tensor of shape `[batch, n_boxes, n_classes + 4 coordinates]` - y_pred_decoded_raw = np.copy(y_pred[:, :, :-8]) - - if input_coords == 'centroids': - # exp(ln(w(pred)/w(anchor)) / w_variance * w_variance) == w(pred) / w(anchor), exp(ln(h(pred)/h(anchor)) / h_variance * h_variance) == h(pred) / h(anchor) - y_pred_decoded_raw[:, :, [-2, -1]] = np.exp( - y_pred_decoded_raw[:, :, [-2, -1]] * y_pred[:, :, [-2, -1]]) - # (w(pred) / w(anchor)) * w(anchor) == w(pred), (h(pred) / h(anchor)) * h(anchor) == h(pred) - y_pred_decoded_raw[:, :, [-2, -1]] *= y_pred[:, :, [-6, -5]] - # (delta_cx(pred) / w(anchor) / cx_variance) * cx_variance * w(anchor) == delta_cx(pred), (delta_cy(pred) / h(anchor) / cy_variance) * cy_variance * h(anchor) == delta_cy(pred) - y_pred_decoded_raw[:, :, [-4, -3]] *= y_pred[:, - :, [-4, -3]] * y_pred[:, :, [-6, -5]] - # delta_cx(pred) + cx(anchor) == cx(pred), delta_cy(pred) + cy(anchor) == cy(pred) - y_pred_decoded_raw[:, :, [-4, -3]] += y_pred[:, :, [-8, -7]] - y_pred_decoded_raw = convert_coordinates( - y_pred_decoded_raw, start_index=-4, conversion='centroids2corners') - elif input_coords == 'minmax': - # delta(pred) / size(anchor) / variance * variance == delta(pred) / size(anchor) for all four coordinates, where 'size' refers to w or h, respectively - y_pred_decoded_raw[:, :, -4:] *= y_pred[:, :, -4:] - # delta_xmin(pred) / w(anchor) * w(anchor) == delta_xmin(pred), delta_xmax(pred) / w(anchor) * w(anchor) == delta_xmax(pred) - y_pred_decoded_raw[:, :, [-4, -3]] *= np.expand_dims( - y_pred[:, :, -7] - y_pred[:, :, -8], axis=-1) - # delta_ymin(pred) / h(anchor) * h(anchor) == delta_ymin(pred), delta_ymax(pred) / h(anchor) * h(anchor) == delta_ymax(pred) - y_pred_decoded_raw[:, :, [-2, -1]] *= np.expand_dims( - y_pred[:, :, -5] - y_pred[:, :, -6], axis=-1) - # delta(pred) + anchor == pred for all four coordinates - y_pred_decoded_raw[:, :, -4:] += y_pred[:, :, -8:-4] - y_pred_decoded_raw = convert_coordinates( - y_pred_decoded_raw, start_index=-4, conversion='minmax2corners') - elif input_coords == 'corners': - # delta(pred) / size(anchor) / variance * variance == delta(pred) / size(anchor) for all four coordinates, where 'size' refers to w or h, respectively - y_pred_decoded_raw[:, :, -4:] *= y_pred[:, :, -4:] - # delta_xmin(pred) / w(anchor) * w(anchor) == delta_xmin(pred), delta_xmax(pred) / w(anchor) * w(anchor) == delta_xmax(pred) - y_pred_decoded_raw[:, :, [-4, -2]] *= np.expand_dims( - y_pred[:, :, -6] - y_pred[:, :, -8], axis=-1) - # delta_ymin(pred) / h(anchor) * h(anchor) == delta_ymin(pred), delta_ymax(pred) / h(anchor) * h(anchor) == delta_ymax(pred) - y_pred_decoded_raw[:, :, [-3, -1]] *= np.expand_dims( - y_pred[:, :, -5] - y_pred[:, :, -7], axis=-1) - # delta(pred) + anchor == pred for all four coordinates - y_pred_decoded_raw[:, :, -4:] += y_pred[:, :, -8:-4] - else: - raise ValueError( - "Unexpected value for `input_coords`. Supported input coordinate formats are 'minmax', 'corners' and 'centroids'.") - - # 2: If the model predicts normalized box coordinates and they are supposed to be converted back to absolute coordinates, do that - - if normalize_coords: - # Convert xmin, xmax back to absolute coordinates - y_pred_decoded_raw[:, :, [-4, -2]] *= img_width - # Convert ymin, ymax back to absolute coordinates - y_pred_decoded_raw[:, :, [-3, -1]] *= img_height - - # 3: Apply confidence thresholding and non-maximum suppression per class - - # The number of classes is the length of the last axis minus the four box coordinates - n_classes = y_pred_decoded_raw.shape[-1] - 4 - - y_pred_decoded = [] # Store the final predictions in this list - # `batch_item` has shape `[n_boxes, n_classes + 4 coords]` - for batch_item in y_pred_decoded_raw: - pred = [] # Store the final predictions for this batch item here - # For each class except the background class (which has class ID 0)... - for class_id in range(1, n_classes): - # ...keep only the confidences for that class, making this an array of shape `[n_boxes, 5]` and... - single_class = batch_item[:, [class_id, -4, -3, -2, -1]] - # ...keep only those boxes with a confidence above the set threshold. - threshold_met = single_class[single_class[:, 0] - > confidence_thresh] - # If any boxes made the threshold... - if threshold_met.shape[0] > 0: - # ...perform NMS on them. - maxima = _greedy_nms(threshold_met, iou_threshold=iou_threshold, - coords='corners', border_pixels=border_pixels) - # Expand the last dimension by one element to have room for the class ID. This is now an arrray of shape `[n_boxes, 6]` - maxima_output = np.zeros( - (maxima.shape[0], maxima.shape[1] + 1)) - # Write the class ID to the first column... - maxima_output[:, 0] = class_id - # ...and write the maxima to the other columns... - maxima_output[:, 1:] = maxima - # ...and append the maxima for this class to the list of maxima for this batch item. - pred.append(maxima_output) - # Once we're through with all classes, keep only the `top_k` maxima with the highest scores - # If there are any predictions left after confidence-thresholding... - if pred: - pred = np.concatenate(pred, axis=0) - # If we have more than `top_k` results left at this point, otherwise there is nothing to filter,... - if top_k != 'all' and pred.shape[0] > top_k: - # ...get the indices of the `top_k` highest-score maxima... - top_k_indices = np.argpartition( - pred[:, 1], kth=pred.shape[0]-top_k, axis=0)[pred.shape[0]-top_k:] - # ...and keep only those entries of `pred`... - pred = pred[top_k_indices] - else: - # Even if empty, `pred` must become a Numpy array. - pred = np.array(pred) - # ...and now that we're done, append the array of final predictions for this batch item to the output list - y_pred_decoded.append(pred) - - return y_pred_decoded - - -def decode_detections_fast(y_pred, - confidence_thresh=0.5, - iou_threshold=0.45, - top_k='all', - input_coords='centroids', - normalize_coords=True, - img_height=None, - img_width=None, - border_pixels='half'): - ''' - Convert model prediction output back to a format that contains only the positive box predictions - (i.e. the same format that `enconde_y()` takes as input). - - Optionally performs confidence thresholding and greedy non-maximum suppression after the decoding stage. - - Note that the decoding procedure used here is not the same as the procedure used in the original Caffe implementation. - For each box, the procedure used here assigns the box's highest confidence as its predicted class. Then it removes - all boxes for which the highest confidence is the background class. This results in less work for the subsequent - non-maximum suppression, because the vast majority of the predictions will be filtered out just by the fact that - their highest confidence is for the background class. It is much more efficient than the procedure of the original - implementation, but the results may also differ. - - Arguments: - y_pred (array): The prediction output of the SSD model, expected to be a Numpy array - of shape `(batch_size, #boxes, #classes + 4 + 4 + 4)`, where `#boxes` is the total number of - boxes predicted by the model per image and the last axis contains - `[one-hot vector for the classes, 4 predicted coordinate offsets, 4 anchor box coordinates, 4 variances]`. - confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in any positive - class required for a given box to be considered a positive prediction. A lower value will result - in better recall, while a higher value will result in better precision. Do not use this parameter with the - goal to combat the inevitably many duplicates that an SSD will produce, the subsequent non-maximum suppression - stage will take care of those. - iou_threshold (float, optional): `None` or a float in [0,1]. If `None`, no non-maximum suppression will be - performed. If not `None`, greedy NMS will be performed after the confidence thresholding stage, meaning - all boxes with a Jaccard similarity of greater than `iou_threshold` with a locally maximal box will be removed - from the set of predictions, where 'maximal' refers to the box score. - top_k (int, optional): 'all' or an integer with number of highest scoring predictions to be kept for each batch item - after the non-maximum suppression stage. If 'all', all predictions left after the NMS stage will be kept. - input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids' - for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format - `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. - normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1]) - and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs - relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`. - Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect - coordinates. Requires `img_height` and `img_width` if set to `True`. - img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`. - img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - - Returns: - A python list of length `batch_size` where each list element represents the predicted boxes - for one image and contains a Numpy array of shape `(boxes, 6)` where each row is a box prediction for - a non-background class for the respective image in the format `[class_id, confidence, xmin, xmax, ymin, ymax]`. - ''' - if normalize_coords and ((img_height is None) or (img_width is None)): - raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width)) - - # 1: Convert the classes from one-hot encoding to their class ID - # Slice out the four offset predictions plus two elements whereto we'll write the class IDs and confidences in the next step - y_pred_converted = np.copy(y_pred[:, :, -14:-8]) - # The indices of the highest confidence values in the one-hot class vectors are the class ID - y_pred_converted[:, :, 0] = np.argmax(y_pred[:, :, :-12], axis=-1) - # Store the confidence values themselves, too - y_pred_converted[:, :, 1] = np.amax(y_pred[:, :, :-12], axis=-1) - - # 2: Convert the box coordinates from the predicted anchor box offsets to predicted absolute coordinates - if input_coords == 'centroids': - # exp(ln(w(pred)/w(anchor)) / w_variance * w_variance) == w(pred) / w(anchor), exp(ln(h(pred)/h(anchor)) / h_variance * h_variance) == h(pred) / h(anchor) - y_pred_converted[:, :, [4, 5]] = np.exp( - y_pred_converted[:, :, [4, 5]] * y_pred[:, :, [-2, -1]]) - # (w(pred) / w(anchor)) * w(anchor) == w(pred), (h(pred) / h(anchor)) * h(anchor) == h(pred) - y_pred_converted[:, :, [4, 5]] *= y_pred[:, :, [-6, -5]] - # (delta_cx(pred) / w(anchor) / cx_variance) * cx_variance * w(anchor) == delta_cx(pred), (delta_cy(pred) / h(anchor) / cy_variance) * cy_variance * h(anchor) == delta_cy(pred) - y_pred_converted[:, :, [2, 3]] *= y_pred[:, - :, [-4, -3]] * y_pred[:, :, [-6, -5]] - # delta_cx(pred) + cx(anchor) == cx(pred), delta_cy(pred) + cy(anchor) == cy(pred) - y_pred_converted[:, :, [2, 3]] += y_pred[:, :, [-8, -7]] - y_pred_converted = convert_coordinates( - y_pred_converted, start_index=-4, conversion='centroids2corners') - elif input_coords == 'minmax': - # delta(pred) / size(anchor) / variance * variance == delta(pred) / size(anchor) for all four coordinates, where 'size' refers to w or h, respectively - y_pred_converted[:, :, 2:] *= y_pred[:, :, -4:] - # delta_xmin(pred) / w(anchor) * w(anchor) == delta_xmin(pred), delta_xmax(pred) / w(anchor) * w(anchor) == delta_xmax(pred) - y_pred_converted[:, :, [ - 2, 3]] *= np.expand_dims(y_pred[:, :, -7] - y_pred[:, :, -8], axis=-1) - # delta_ymin(pred) / h(anchor) * h(anchor) == delta_ymin(pred), delta_ymax(pred) / h(anchor) * h(anchor) == delta_ymax(pred) - y_pred_converted[:, :, [ - 4, 5]] *= np.expand_dims(y_pred[:, :, -5] - y_pred[:, :, -6], axis=-1) - # delta(pred) + anchor == pred for all four coordinates - y_pred_converted[:, :, 2:] += y_pred[:, :, -8:-4] - y_pred_converted = convert_coordinates( - y_pred_converted, start_index=-4, conversion='minmax2corners') - elif input_coords == 'corners': - # delta(pred) / size(anchor) / variance * variance == delta(pred) / size(anchor) for all four coordinates, where 'size' refers to w or h, respectively - y_pred_converted[:, :, 2:] *= y_pred[:, :, -4:] - # delta_xmin(pred) / w(anchor) * w(anchor) == delta_xmin(pred), delta_xmax(pred) / w(anchor) * w(anchor) == delta_xmax(pred) - y_pred_converted[:, :, [ - 2, 4]] *= np.expand_dims(y_pred[:, :, -6] - y_pred[:, :, -8], axis=-1) - # delta_ymin(pred) / h(anchor) * h(anchor) == delta_ymin(pred), delta_ymax(pred) / h(anchor) * h(anchor) == delta_ymax(pred) - y_pred_converted[:, :, [ - 3, 5]] *= np.expand_dims(y_pred[:, :, -5] - y_pred[:, :, -7], axis=-1) - # delta(pred) + anchor == pred for all four coordinates - y_pred_converted[:, :, 2:] += y_pred[:, :, -8:-4] - else: - raise ValueError( - "Unexpected value for `coords`. Supported values are 'minmax', 'corners' and 'centroids'.") - - # 3: If the model predicts normalized box coordinates and they are supposed to be converted back to absolute coordinates, do that - if normalize_coords: - # Convert xmin, xmax back to absolute coordinates - y_pred_converted[:, :, [2, 4]] *= img_width - # Convert ymin, ymax back to absolute coordinates - y_pred_converted[:, :, [3, 5]] *= img_height - - # 4: Decode our huge `(batch, #boxes, 6)` tensor into a list of length `batch` where each list entry is an array containing only the positive predictions - y_pred_decoded = [] - for batch_item in y_pred_converted: # For each image in the batch... - # ...get all boxes that don't belong to the background class,... - boxes = batch_item[np.nonzero(batch_item[:, 0])] - # ...then filter out those positive boxes for which the prediction confidence is too low and after that... - boxes = boxes[boxes[:, 1] >= confidence_thresh] - if iou_threshold: # ...if an IoU threshold is set... - # ...perform NMS on the remaining boxes. - boxes = _greedy_nms2(boxes, iou_threshold=iou_threshold, - coords='corners', border_pixels=border_pixels) - # If we have more than `top_k` results left at this point... - if top_k != 'all' and boxes.shape[0] > top_k: - # ...get the indices of the `top_k` highest-scoring boxes... - top_k_indices = np.argpartition( - boxes[:, 1], kth=boxes.shape[0]-top_k, axis=0)[boxes.shape[0]-top_k:] - boxes = boxes[top_k_indices] # ...and keep only those boxes... - # ...and now that we're done, append the array of final predictions for this batch item to the output list - y_pred_decoded.append(boxes) - - return y_pred_decoded - -################################################################################################ -# Debugging tools, not relevant for normal use -################################################################################################ - -# The functions below are for debugging, so you won't normally need them. That is, -# unless you need to debug your model, of course. - - -def decode_detections_debug(y_pred, - confidence_thresh=0.01, - iou_threshold=0.45, - top_k=200, - input_coords='centroids', - normalize_coords=True, - img_height=None, - img_width=None, - variance_encoded_in_target=False, - border_pixels='half'): - ''' - This decoder performs the same processing as `decode_detections()`, but the output format for each left-over - predicted box is `[box_id, class_id, confidence, xmin, ymin, xmax, ymax]`. - - That is, in addition to the usual data, each predicted box has the internal index of that box within - the model (`box_id`) prepended to it. This allows you to know exactly which part of the model made a given - box prediction; in particular, it allows you to know which predictor layer made a given prediction. - This can be useful for debugging. - - Arguments: - y_pred (array): The prediction output of the SSD model, expected to be a Numpy array - of shape `(batch_size, #boxes, #classes + 4 + 4 + 4)`, where `#boxes` is the total number of - boxes predicted by the model per image and the last axis contains - `[one-hot vector for the classes, 4 predicted coordinate offsets, 4 anchor box coordinates, 4 variances]`. - confidence_thresh (float, optional): A float in [0,1), the minimum classification confidence in a specific - positive class in order to be considered for the non-maximum suppression stage for the respective class. - A lower value will result in a larger part of the selection process being done by the non-maximum suppression - stage, while a larger value will result in a larger part of the selection process happening in the confidence - thresholding stage. - iou_threshold (float, optional): A float in [0,1]. All boxes with a Jaccard similarity of greater than `iou_threshold` - with a locally maximal box will be removed from the set of predictions for a given class, where 'maximal' refers - to the box score. - top_k (int, optional): The number of highest scoring predictions to be kept for each batch item after the - non-maximum suppression stage. - input_coords (str, optional): The box coordinate format that the model outputs. Can be either 'centroids' - for the format `(cx, cy, w, h)` (box center coordinates, width, and height), 'minmax' for the format - `(xmin, xmax, ymin, ymax)`, or 'corners' for the format `(xmin, ymin, xmax, ymax)`. - normalize_coords (bool, optional): Set to `True` if the model outputs relative coordinates (i.e. coordinates in [0,1]) - and you wish to transform these relative coordinates back to absolute coordinates. If the model outputs - relative coordinates, but you do not want to convert them back to absolute coordinates, set this to `False`. - Do not set this to `True` if the model already outputs absolute coordinates, as that would result in incorrect - coordinates. Requires `img_height` and `img_width` if set to `True`. - img_height (int, optional): The height of the input images. Only needed if `normalize_coords` is `True`. - img_width (int, optional): The width of the input images. Only needed if `normalize_coords` is `True`. - border_pixels (str, optional): How to treat the border pixels of the bounding boxes. - Can be 'include', 'exclude', or 'half'. If 'include', the border pixels belong - to the boxes. If 'exclude', the border pixels do not belong to the boxes. - If 'half', then one of each of the two horizontal and vertical borders belong - to the boxex, but not the other. - - Returns: - A python list of length `batch_size` where each list element represents the predicted boxes - for one image and contains a Numpy array of shape `(boxes, 7)` where each row is a box prediction for - a non-background class for the respective image in the format `[box_id, class_id, confidence, xmin, ymin, xmax, ymax]`. - ''' - if normalize_coords and ((img_height is None) or (img_width is None)): - raise ValueError("If relative box coordinates are supposed to be converted to absolute coordinates, the decoder needs the image size in order to decode the predictions, but `img_height == {}` and `img_width == {}`".format(img_height, img_width)) - - # 1: Convert the box coordinates from the predicted anchor box offsets to predicted absolute coordinates - - # Slice out the classes and the four offsets, throw away the anchor coordinates and variances, resulting in a tensor of shape `[batch, n_boxes, n_classes + 4 coordinates]` - y_pred_decoded_raw = np.copy(y_pred[:, :, :-8]) - - if input_coords == 'centroids': - if variance_encoded_in_target: - # Decode the predicted box center x and y coordinates. - y_pred_decoded_raw[:, :, [-4, -3]] = y_pred_decoded_raw[:, :, - [-4, -3]] * y_pred[:, :, [-6, -5]] + y_pred[:, :, [-8, -7]] - # Decode the predicted box width and heigt. - y_pred_decoded_raw[:, :, [-2, -1]] = np.exp( - y_pred_decoded_raw[:, :, [-2, -1]]) * y_pred[:, :, [-6, -5]] - else: - # Decode the predicted box center x and y coordinates. - y_pred_decoded_raw[:, :, [-4, -3]] = y_pred_decoded_raw[:, :, [-4, -3]] * \ - y_pred[:, :, [-6, -5]] * y_pred[:, :, - [-4, -3]] + y_pred[:, :, [-8, -7]] - # Decode the predicted box width and heigt. - y_pred_decoded_raw[:, :, [-2, -1]] = np.exp( - y_pred_decoded_raw[:, :, [-2, -1]] * y_pred[:, :, [-2, -1]]) * y_pred[:, :, [-6, -5]] - y_pred_decoded_raw = convert_coordinates( - y_pred_decoded_raw, start_index=-4, conversion='centroids2corners') - elif input_coords == 'minmax': - # delta(pred) / size(anchor) / variance * variance == delta(pred) / size(anchor) for all four coordinates, where 'size' refers to w or h, respectively - y_pred_decoded_raw[:, :, -4:] *= y_pred[:, :, -4:] - # delta_xmin(pred) / w(anchor) * w(anchor) == delta_xmin(pred), delta_xmax(pred) / w(anchor) * w(anchor) == delta_xmax(pred) - y_pred_decoded_raw[:, :, [-4, -3]] *= np.expand_dims( - y_pred[:, :, -7] - y_pred[:, :, -8], axis=-1) - # delta_ymin(pred) / h(anchor) * h(anchor) == delta_ymin(pred), delta_ymax(pred) / h(anchor) * h(anchor) == delta_ymax(pred) - y_pred_decoded_raw[:, :, [-2, -1]] *= np.expand_dims( - y_pred[:, :, -5] - y_pred[:, :, -6], axis=-1) - # delta(pred) + anchor == pred for all four coordinates - y_pred_decoded_raw[:, :, -4:] += y_pred[:, :, -8:-4] - y_pred_decoded_raw = convert_coordinates( - y_pred_decoded_raw, start_index=-4, conversion='minmax2corners') - elif input_coords == 'corners': - # delta(pred) / size(anchor) / variance * variance == delta(pred) / size(anchor) for all four coordinates, where 'size' refers to w or h, respectively - y_pred_decoded_raw[:, :, -4:] *= y_pred[:, :, -4:] - # delta_xmin(pred) / w(anchor) * w(anchor) == delta_xmin(pred), delta_xmax(pred) / w(anchor) * w(anchor) == delta_xmax(pred) - y_pred_decoded_raw[:, :, [-4, -2]] *= np.expand_dims( - y_pred[:, :, -6] - y_pred[:, :, -8], axis=-1) - # delta_ymin(pred) / h(anchor) * h(anchor) == delta_ymin(pred), delta_ymax(pred) / h(anchor) * h(anchor) == delta_ymax(pred) - y_pred_decoded_raw[:, :, [-3, -1]] *= np.expand_dims( - y_pred[:, :, -5] - y_pred[:, :, -7], axis=-1) - # delta(pred) + anchor == pred for all four coordinates - y_pred_decoded_raw[:, :, -4:] += y_pred[:, :, -8:-4] - else: - raise ValueError( - "Unexpected value for `input_coords`. Supported input coordinate formats are 'minmax', 'corners' and 'centroids'.") - - # 2: If the model predicts normalized box coordinates and they are supposed to be converted back to absolute coordinates, do that - - if normalize_coords: - # Convert xmin, xmax back to absolute coordinates - y_pred_decoded_raw[:, :, [-4, -2]] *= img_width - # Convert ymin, ymax back to absolute coordinates - y_pred_decoded_raw[:, :, [-3, -1]] *= img_height - - # 3: For each batch item, prepend each box's internal index to its coordinates. - - # Expand the last axis by one. - y_pred_decoded_raw2 = np.zeros( - (y_pred_decoded_raw.shape[0], y_pred_decoded_raw.shape[1], y_pred_decoded_raw.shape[2] + 1)) - y_pred_decoded_raw2[:, :, 1:] = y_pred_decoded_raw - # Put the box indices as the first element for each box via broadcasting. - y_pred_decoded_raw2[:, :, 0] = np.arange(y_pred_decoded_raw.shape[1]) - y_pred_decoded_raw = y_pred_decoded_raw2 - - # 4: Apply confidence thresholding and non-maximum suppression per class - - # The number of classes is the length of the last axis minus the four box coordinates and minus the index - n_classes = y_pred_decoded_raw.shape[-1] - 5 - - y_pred_decoded = [] # Store the final predictions in this list - # `batch_item` has shape `[n_boxes, n_classes + 4 coords]` - for batch_item in y_pred_decoded_raw: - pred = [] # Store the final predictions for this batch item here - # For each class except the background class (which has class ID 0)... - for class_id in range(1, n_classes): - # ...keep only the confidences for that class, making this an array of shape `[n_boxes, 6]` and... - single_class = batch_item[:, [0, class_id + 1, -4, -3, -2, -1]] - # ...keep only those boxes with a confidence above the set threshold. - threshold_met = single_class[single_class[:, 1] - > confidence_thresh] - # If any boxes made the threshold... - if threshold_met.shape[0] > 0: - # ...perform NMS on them. - maxima = _greedy_nms_debug( - threshold_met, iou_threshold=iou_threshold, coords='corners', border_pixels=border_pixels) - # Expand the last dimension by one element to have room for the class ID. This is now an arrray of shape `[n_boxes, 6]` - maxima_output = np.zeros( - (maxima.shape[0], maxima.shape[1] + 1)) - # Write the box index to the first column... - maxima_output[:, 0] = maxima[:, 0] - # ...and write the class ID to the second column... - maxima_output[:, 1] = class_id - # ...and write the rest of the maxima data to the other columns... - maxima_output[:, 2:] = maxima[:, 1:] - # ...and append the maxima for this class to the list of maxima for this batch item. - pred.append(maxima_output) - # Once we're through with all classes, keep only the `top_k` maxima with the highest scores - pred = np.concatenate(pred, axis=0) - # If we have more than `top_k` results left at this point, otherwise there is nothing to filter,... - if pred.shape[0] > top_k: - # ...get the indices of the `top_k` highest-score maxima... - top_k_indices = np.argpartition( - pred[:, 2], kth=pred.shape[0]-top_k, axis=0)[pred.shape[0]-top_k:] - # ...and keep only those entries of `pred`... - pred = pred[top_k_indices] - # ...and now that we're done, append the array of final predictions for this batch item to the output list - y_pred_decoded.append(pred) - - return y_pred_decoded - - -def _greedy_nms_debug(predictions, iou_threshold=0.45, coords='corners', border_pixels='half'): - ''' - The same greedy non-maximum suppression algorithm as above, but slightly modified for use as an internal - function for per-class NMS in `decode_detections_debug()`. The difference is that it keeps the indices of all - left-over boxes for each batch item, which allows you to know which predictor layer predicted a given output - box and is thus useful for debugging. - ''' - boxes_left = np.copy(predictions) - maxima = [] # This is where we store the boxes that make it through the non-maximum suppression - # While there are still boxes left to compare... - while boxes_left.shape[0] > 0: - # ...get the index of the next box with the highest confidence... - maximum_index = np.argmax(boxes_left[:, 1]) - # ...copy that box and... - maximum_box = np.copy(boxes_left[maximum_index]) - # ...append it to `maxima` because we'll definitely keep it - maxima.append(maximum_box) - # Now remove the maximum box from `boxes_left` - boxes_left = np.delete(boxes_left, maximum_index, axis=0) - if boxes_left.shape[0] == 0: - # If there are no boxes left after this step, break. Otherwise... - break - # ...compare (IoU) the other left over boxes to the maximum box... - similarities = iou(boxes_left[:, 2:], maximum_box[2:], coords=coords, - mode='element-wise', border_pixels=border_pixels) - # ...so that we can remove the ones that overlap too much with the maximum box - boxes_left = boxes_left[similarities <= iou_threshold] - return np.array(maxima) - - -def get_num_boxes_per_pred_layer(predictor_sizes, aspect_ratios, two_boxes_for_ar1): - ''' - Returns a list of the number of boxes that each predictor layer predicts. - - `aspect_ratios` must be a nested list, containing a list of aspect ratios - for each predictor layer. - ''' - num_boxes_per_pred_layer = [] - for i in range(len(predictor_sizes)): - if two_boxes_for_ar1: - num_boxes_per_pred_layer.append( - predictor_sizes[i][0] * predictor_sizes[i][1] * (len(aspect_ratios[i]) + 1)) - else: - num_boxes_per_pred_layer.append( - predictor_sizes[i][0] * predictor_sizes[i][1] * len(aspect_ratios[i])) - return num_boxes_per_pred_layer - - -def get_pred_layers(y_pred_decoded, num_boxes_per_pred_layer): - ''' - For a given prediction tensor decoded with `decode_detections_debug()`, returns a list - with the indices of the predictor layers that made each predictions. - - That is, this function lets you know which predictor layer is responsible - for a given prediction. - - Arguments: - y_pred_decoded (array): The decoded model output tensor. Must have been - decoded with `decode_detections_debug()` so that it contains the internal box index - for each predicted box. - num_boxes_per_pred_layer (list): A list that contains the total number - of boxes that each predictor layer predicts. - ''' - pred_layers_all = [] - cum_boxes_per_pred_layer = np.cumsum(num_boxes_per_pred_layer) - for batch_item in y_pred_decoded: - pred_layers = [] - for prediction in batch_item: - if (prediction[0] < 0) or (prediction[0] >= cum_boxes_per_pred_layer[-1]): - raise ValueError( - "Box index is out of bounds of the possible indices as given by the values in `num_boxes_per_pred_layer`.") - for i in range(len(cum_boxes_per_pred_layer)): - if prediction[0] < cum_boxes_per_pred_layer[i]: - pred_layers.append(i) - break - pred_layers_all.append(pred_layers) - return pred_layers_all -#! /usr/bin/env python -# -# Copyright (C) 2018 Mikko Kotila - -DESCRIPTION = "Talos Hyperparameter Tuning for Keras" -LONG_DESCRIPTION = """\ -Talos radically changes the ordinary Keras workflow by -fully automating hyperparameter tuning and model evaluation. -Talos exposes Keras functionality entirely and there is no new -syntax or templates to learn. -""" - -DISTNAME = 'talos' -MAINTAINER = 'Mikko Kotila' -MAINTAINER_EMAIL = 'mailme@mikkokotila.com' -URL = 'http://autonom.io' -LICENSE = 'MIT' -DOWNLOAD_URL = 'https://github.com/autonomio/talos/' -VERSION = '0.5.0' - -try: - from setuptools import setup - _has_setuptools = True -except ImportError: - from distutils.core import setup - -install_requires = ['numpy', - 'pandas', - 'keras', - 'astetik', - 'sklearn', - 'tqdm', - 'chances', - 'kerasplotlib', - 'wrangle', - 'requests'] - - -if __name__ == "__main__": - - setup(name=DISTNAME, - author=MAINTAINER, - author_email=MAINTAINER_EMAIL, - maintainer=MAINTAINER, - maintainer_email=MAINTAINER_EMAIL, - description=DESCRIPTION, - long_description=LONG_DESCRIPTION, - license=LICENSE, - url=URL, - version=VERSION, - download_url=DOWNLOAD_URL, - install_requires=install_requires, - packages=['talos', - 'talos.scan', - 'talos.examples', - 'talos.utils', - 'talos.model', - 'talos.parameters', - 'talos.reducers', - 'talos.metrics', - 'talos.commands'], - - classifiers=['Intended Audience :: Science/Research', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'License :: OSI Approved :: MIT License', - 'Topic :: Scientific/Engineering :: Human Machine Interfaces', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Scientific/Engineering :: Mathematics', - 'Operating System :: POSIX', - 'Operating System :: Unix', - 'Operating System :: MacOS', - 'Operating System :: Microsoft :: Windows :: Windows 10']) -#!/usr/bin/env python -import time - -import talos as ta - -from test.core_tests.test_scan_object import test_scan_object -from test.core_tests.test_reporting_object import test_reporting_object -from test.core_tests.test_random_methods import test_random_methods -from test.core_tests.test_params_object import test_params_object -from test.core_tests.test_auto_scan import test_auto_scan -from test.core_tests.test_templates import test_templates - -from talos.utils.generator import generator -from talos.utils.gpu_utils import force_cpu - - -if __name__ == '__main__': - - '''NOTE: test/core_tests/test_scan.py needs to be edited as well!''' - - # testing different model types - from test.core_tests.test_scan import BinaryTest, MultiLabelTest - - BinaryTest().values_single_test() - BinaryTest().values_list_test() - BinaryTest().values_range_test() - - MultiLabelTest().values_single_test() - MultiLabelTest().values_list_test() - MultiLabelTest().values_range_test() - - # reporting specific testing - from test.core_tests.test_scan import ReportingTest, DatasetTest - - ReportingTest() - DatasetTest() - - # MOVE TO command specific tests - - # Scan() object tests - scan_object = test_scan_object() - - # reporting tests - test_reporting_object(scan_object) - test_params_object() - test_auto_scan() - test_templates() - - # create a string for name of deploy file - start_time = str(time.strftime("%s")) - - p = ta.Predict(scan_object) - p.predict(scan_object.x) - p.predict_classes(scan_object.x) - - ta.Autom8(scan_object, scan_object.x, scan_object.y) - ta.Evaluate(scan_object) - ta.Deploy(scan_object, start_time) - ta.Restore(start_time + '.zip') - - test_random_methods() - fit_generator = ta.utils.generator(scan_object.x, scan_object.y, 20) - force_cpu() -# first import things as you would usually -from keras.models import Sequential -from keras.layers import Dense, Dropout -from keras.optimizers import Adam, Nadam -from keras.losses import categorical_crossentropy, logcosh -from keras.activations import relu, elu, softmax - -# import talos -import talos as ta - -# load rthe iris dataset -x, y = ta.datasets.iris() - -# then define the parameter boundaries - -p = {'lr': (2, 10, 30), - 'first_neuron': [4, 8, 16, 32, 64, 128], - 'hidden_layers': [2, 3, 4, 5, 6], - 'batch_size': [2, 3, 4], - 'epochs': [300], - 'dropout': (0, 0.40, 10), - 'weight_regulizer': [None], - 'emb_output_dims': [None], - 'optimizer': ['adam', 'nadam'], - 'losses': [categorical_crossentropy, logcosh], - 'activation': [relu, elu], - 'last_activation': [softmax]} - - -# then define your Keras model -def iris_model(x_train, y_train, x_val, y_val, params): - - model = Sequential() - model.add(Dense(params['first_neuron'], - input_dim=x_train.shape[1], - activation=params['activation'])) - model.add(Dropout(params['dropout'])) - model.add(Dense(y_train.shape[1], activation=params['last_activation'])) - - model.compile(optimizer=params['optimizer'], - loss=params['losses'], - metrics=['acc']) - - out = model.fit(x_train, y_train, - batch_size=params['batch_size'], - epochs=params['epochs'], - verbose=0, - validation_data=[x_val, y_val]) - - return out, model - - -# and run the scan -h = ta.Scan(x, y, - params=p, - dataset_name='first_test', - experiment_no='aaa', - model=iris_model, - grid_downsample=0.5) -from keras.models import Sequential -from keras.layers import Dropout, Dense -from ..model.normalizers import lr_normalizer -from ..model.layers import hidden_layers -from ..metrics.keras_metrics import fmeasure - - -def iris(x_train, y_train, x_val, y_val, params): - '''A model that yields 100% accuracy and f1 for Iris dataset''' - - model = Sequential() - model.add(Dense(params['first_neuron'], input_dim=x_train.shape[1], - activation=params['activation'])) - model.add(Dropout(params['dropout'])) - hidden_layers(model, params) - model.add(Dense(y_train.shape[1], activation=params['last_activation'])) - - model.compile(optimizer=params['optimizer'](lr=lr_normalizer(params['lr'], params['optimizer'])), - loss=params['losses'], - metrics=['acc', fmeasure]) - - out = model.fit(x_train, y_train, - batch_size=20, - epochs=params['epochs'], - verbose=0, - validation_data=[x_val, y_val]) - - return out, model - - -# first we have to make sure to input data and params into the function -def breast_cancer(x_train, y_train, x_val, y_val, params): - - # next we can build the model exactly like we would normally do it - model = Sequential() - model.add(Dense(10, - input_dim=x_train.shape[1], - activation=params['activation'], - kernel_initializer='normal')) - model.add(Dropout(params['dropout'])) - - # if we want to also test for number of layers and shapes, that's possible - hidden_layers(model, params, 1) - - # then we finish again with completely standard Keras way - model.add( - Dense(1, activation=params['last_activation'], kernel_initializer='normal')) - - model.compile(loss=params['losses'], - # here we add a regulizer normalization function from Talos - optimizer=params['optimizer']( - lr=lr_normalizer(params['lr'], params['optimizer'])), - metrics=['acc', fmeasure]) - - history = model.fit(x_train, y_train, - validation_data=[x_val, y_val], - batch_size=params['batch_size'], - epochs=params['epochs'], - verbose=0) - - # finally we have to make sure that history object and model are returned - return history, model -# import commands -from .scan.Scan import Scan -from .commands.reporting import Reporting -from .commands.predict import Predict -from .commands.deploy import Deploy -from .commands.evaluate import Evaluate -from .commands.restore import Restore -from .commands.autom8 import Autom8 -from .commands.params import Params -from .commands.kerasmodel import KerasModel -from . import utils -from . import examples as templates - -# the purpose of everything below is to keep the namespace completely clean - -del_from_utils = ['best_model', 'connection_check', 'detector', - 'exceptions', 'last_neuron', 'load_model', 'validation_split', - 'pred_class', 'results', 'string_cols_to_numeric'] - -for key in del_from_utils: - if key.startswith('__') is False: - delattr(utils, key) - -template_sub = [templates.datasets, - templates.models, - templates.params, - templates.pipelines] - -keep_from_templates = ['iris', 'cervical_cancer', 'titanic', 'breast_cancer', - 'icu_mortality'] - -for sub in template_sub: - for key in list(sub.__dict__): - if key.startswith('__') is False: - if key not in keep_from_templates: - delattr(sub, key) - -del commands, parameters, scan, reducers, model, metrics, key, del_from_utils -del examples, sub, keep_from_templates, template_sub - -__version__ = "0.5.0" -import sys - -sys.path.append('../talos') -from ..scan.Scan import Scan - - -def Autom8(scan_object, - x_val, - y_val, - n=10, - metric='val_acc', - folds=5, - shuffle=True, - average='binary', - asc=False): - '''Pipeline automator - - Reduces the idea to prediction pipeline into a single - command where a Scan() process is followed by evaluating - n best - - Example use: - - Parameters - ---------- - scan_object : Scan() object - A Scan() process needs to be completed first, and then the resulting - object can be used as input here. - x_val : ndarray - Data to be used for 'x' in evaluation. Note that should be in the same - format as the data which was used in the Scan() but not the same data. - y_val : python dictionary - Data to be used for 'y' in evaluation. Note that should be in the same - format as the data which was used in the Scan() but not the same data. - n : str - Number of promising models to be included in the evaluation process. - Time increase linearly with number of models. - metric : str - The metric to be used for deciding which models are promising. - Basically the 'n' argument and 'metric' argument are combined to pick - 'n' best performing models based on 'metric'. - folds : int - Number of folds to be used in cross-validation. - shuffle : bool - If the data should be shuffled before cross-validation. - average : str - This parameter is required for multiclass/multilabel targets. If None, - the scores for each class are returned. Otherwise, this determines - the type of averaging performed on the data: - - 'binary': - Only report results for the class specified by pos_label. - This is applicable only if targets (y_{true,pred}) are binary. - - 'micro': - Calculate metrics globally by counting the total true positives, - false negatives and false positives. - - 'macro': - Calculate metrics for each label, and find their unweighted mean. - This does not take label imbalance into account. - - 'weighted': - Calculate metrics for each label, and find their average weighted - by support (the number of true instances for each label). This alters - 'macro' to account for label imbalance; it can result in an F-score - that is not between precision and recall. - - 'samples': - Calculate metrics for each instance, and find their average - (only meaningful for multilabel classification where this differs - from accuracy_score). - asc : bool - This needs to be True for evaluation metrics that need to be minimized, - and False when a metric needs to be maximized. - - ''' - - # evaluate and add the evaluation scores - scan_object.evaluate_models(x_val, - y_val, - n=n, - metric=metric, - folds=folds, - shuffle=shuffle, - average=average, - asc=False) - - # make predictions with the best model - preds = scan_object.best_model('eval_f1score_mean') - scan_object.preds = preds.predict(x_val) - - # print out the best model parameters and stats - scan_object.preds_model = scan_object.data.sort_values('eval_f1score_mean', - ascending=False).iloc[0] - - return scan_object -import os -import pandas as pd -import shutil -import numpy as np - -from ..utils.best_model import best_model, activate_model - - -class Deploy: - - '''Functionality for deploying a model to a filename''' - - def __init__(self, scan_object, model_name, metric='val_acc', asc=False): - '''Deploy a model to be used later or in a different system. - - NOTE: for a metric that is to be minimized, set asc=True or otherwise - you will end up with the model that has the highest loss. - - Deploy() takes in the object from Scan() and creates a package locally - that can be later activated with Restore(). - - scan_object : object - The object that is returned from Scan() upon completion. - model_name : str - Name for the .zip file to be created. - metric : str - The metric to be used for picking the best model. - asc: bool - Make this True for metrics that are to be minimized (e.g. loss) , - and False when the metric is to be maximized (e.g. acc) - - ''' - - self.scan_object = scan_object - os.mkdir(model_name) - self.path = model_name + '/' + model_name - self.model_name = model_name - self.metric = metric - self.asc = asc - self.data = scan_object.data - self.best_model = best_model(scan_object, metric, asc) - self.model = activate_model(scan_object, self.best_model) - - # runtime - self.save_model_as() - self.save_details() - self.save_data() - self.save_results() - self.save_params() - self.save_readme() - self.package() - - def save_model_as(self): - '''Model Saver - WHAT: Saves a trained model so it can be loaded later - for predictions by predictor(). - ''' - - model_json = self.model.to_json() - with open(self.path + "_model.json", "w") as json_file: - json_file.write(model_json) - - self.model.save_weights(self.path + "_model.h5") - print("Deploy package" + " " + self.model_name + " " + "have been saved.") - - def save_details(self): - - self.scan_object.details.to_csv(self.path + '_details.txt') - - def save_data(self): - - x = pd.DataFrame(self.scan_object.x[:100]) - y = pd.DataFrame(self.scan_object.y[:100]) - - x.to_csv(self.path + '_x.csv', header=None, index=None) - y.to_csv(self.path + '_y.csv', header=None, index=None) - - def save_results(self): - - self.scan_object.data.to_csv(self.path + '_results.csv') - - def save_params(self): - - np.save(self.path + '_params', self.scan_object.params) - - def save_readme(self): - - txt = 'To activate the assets in the Talos deploy package: \n\n from talos.commands.restore import Restore \n a = Restore(\'path_to_asset\')\n\nNow you will have an object similar to the Scan object, which can be used with other Talos commands as you would be able to with the Scan object' - - text_file = open(self.path.split('/')[0] + '/README.txt', "w") - text_file.write(txt) - text_file.close() - - def package(self): - - shutil.make_archive(self.model_name, 'zip', self.model_name) - shutil.rmtree(self.model_name) -from sklearn.metrics import mean_absolute_error, f1_score -from numpy import mean, std - -from ..utils.validation_split import kfold -from ..utils.best_model import best_model, activate_model - - -class Evaluate: - - '''Class for evaluating models based on the Scan() object''' - - def __init__(self, scan_object): - '''Takes in as input a Scan() object. - e = evaluate(scan_object) and see docstring - for e() for more information.''' - - self.scan_object = scan_object - self.data = scan_object.data - - def evaluate(self, x, y, - model_id=None, - folds=5, - shuffle=True, - metric='val_acc', - mode='multi_label', - asc=False, - print_out=False): - '''Evaluate a model based on f1_score (all except regression) - or mae (for regression). Supports 'binary', 'multi_class', - 'multi_label', and 'regression' evaluation. - - x : array - The input data for making predictions - y : array - The ground truth for x - model_id : int - It's possible to evaluate a specific model based on ID. - Can be None. - folds : int - Number of folds to use for cross-validation - sort_metric : string - A column name referring to the metric that was used in the - scan_object as a performance metric. This is used for sorting - the results to pick for evaluation. - shuffle : bool - Data is shuffled before evaluation. - mode : string - 'binary', 'multi_class', 'multi_label', or 'regression'. - asc : bool - False if the metric is to be optimized upwards - (e.g. accuracy or f1_score) - print_out : bool - Print out the results. - - TODO: add possibility to input custom metrics. - - ''' - - out = [] - if model_id is None: - model_id = best_model(self.scan_object, metric, asc) - - model = activate_model(self.scan_object, model_id) - - kx, ky = kfold(x, y, folds, shuffle) - - for i in range(folds): - - y_pred = model.predict(kx[i], verbose=0) - - if mode == 'binary': - y_pred = y_pred >= .5 - scores = f1_score(y_pred, ky[i], average='binary') - - elif mode == 'multi_class': - y_pred = y_pred.argmax(axis=-1) - scores = f1_score(y_pred, ky[i], average='macro') - - if mode == 'multi_label': - y_pred = model.predict(kx[i]).argmax(axis=1) - scores = f1_score(y_pred, - ky[i].argmax(axis=1), - average='macro') - - elif mode == 'regression': - y_pred = model.predict(kx[i]) - scores = mean_absolute_error(y_pred, ky[i]) - - out.append(scores) - - if print_out is True: - print("mean : %.2f \n std : %.2f" % (mean(out), std(out))) - - return out -import numpy as np - -from talos.model.layers import hidden_layers -from talos.model.normalizers import lr_normalizer - -from keras.models import Sequential -from keras.layers import Dropout, Flatten -from keras.layers import LSTM, Conv1D, SimpleRNN, Dense, Bidirectional - -try: - from wrangle.reshape_to_conv1d import reshape_to_conv1d as array_reshape_conv1d -except ImportError: - from wrangle import array_reshape_conv1d - - -class KerasModel: - - def __init__(self): - '''An input model for Scan(). Optimized for being used together with - Params(). For example: - - Scan(x=x, y=y, params=Params().params, model=KerasModel().model) - - NOTE: the grid from Params() is very large, so grid_downsample or - round_limit accordingly in Scan(). - - ''' - - self.model = self._create_input_model - - def _create_input_model(self, x_train, y_train, x_val, y_val, params): - - model = Sequential() - - if params['network'] != 'dense': - x_train = array_reshape_conv1d(x_train) - x_val = array_reshape_conv1d(x_val) - - if params['network'] == 'conv1d': - model.add(Conv1D(params['first_neuron'], x_train.shape[1])) - model.add(Flatten()) - - elif params['network'] == 'lstm': - model.add(LSTM(params['first_neuron'])) - - if params['network'] == 'bidirectional_lstm': - model.add(Bidirectional(LSTM(params['first_neuron']))) - - elif params['network'] == 'simplernn': - model.add(SimpleRNN(params['first_neuron'])) - - elif params['network'] == 'dense': - model.add(Dense(params['first_neuron'], - input_dim=x_train.shape[1], - activation='relu')) - - model.add(Dropout(params['dropout'])) - - # add hidden layers to the model - hidden_layers(model, params, 1) - - # output layer (this is scetchy) - try: - last_neuron = y_train.shape[1] - except IndexError: - if len(np.unique(y_train)) == 2: - last_neuron = 1 - else: - last_neuron = len(np.unique(y_train)) - - model.add(Dense(last_neuron, - activation=params['last_activation'])) - - # bundle the optimizer with learning rate changes - optimizer = params['optimizer'](lr=lr_normalizer(params['lr'], - params['optimizer'])) - - # compile the model - model.compile(optimizer=optimizer, - loss=params['losses'], - metrics=['acc']) - - # fit the model - out = model.fit(x_train, y_train, - batch_size=params['batch_size'], - epochs=params['epochs'], - verbose=0, - validation_data=[x_val, y_val]) - - # pass the output to Talos - return out, model -import numpy as np -from keras.optimizers import Adam, Nadam, Adadelta, SGD - - -loss = {'binary': ['binary_crossentropy', 'logcosh'], - 'multi_class': ['sparse_categorical_crossentropy'], - 'multi_label': ['categorical_crossentropy'], - 'continuous': ['mae']} - -last_activation = {'binary': ['sigmoid'], - 'multi_class': ['softmax'], - 'multi_label': ['softmax'], - 'continuous': [None]} - - -class Params: - - def __init__(self, - params=None, - task='binary', - replace=True, - auto=True, - network=True): - '''A facility for generating or appending params dictionary. - - params : dict or None - task : str - 'binary', 'multi_class', 'multi_label', or 'continuous' - replace : bool - Replace current dictionary entries with new ones. - auto : bool - Automatically generate or append params dictionary with - all available parameters. - network : bool - Adds several network architectures as parameters. This is to be - used as an input together with KerasModel(). If False then only - 'dense' will be added. - ''' - - self.task = task - self.replace = replace - self.network = network - - if params is None: - self.params = {} - else: - self.params = params - - if auto: - self.automated() - - def automated(self, shapes='fixed'): - '''Automatically generate a comprehensive - parameter dict to be used in Scan() - - shapes : string - Either 'fixed' or 'sloped' - - ''' - - if shapes == 'fixed': - self.shapes() - else: - self.shapes_slope() - self.layers() - self.dropout() - self.optimizers() - self.activations() - self.neurons() - self.losses() - self.batch_size() - self.epochs() - self.kernel_initializers() - self.lr() - if self.network: - self.networks() - else: - self.params['network'] = 'dense' - self.last_activations() - - def shapes(self): - '''Uses triangle, funnel, and brick shapes.''' - - self._append_params('shapes', ['triangle', 'funnel', 'brick']) - - def shapes_slope(self): - '''Uses a single decimal float for values below 0.5 to - reduce the width of the following layer.''' - - self._append_params('shapes', np.arange(0, .6, 0.1).tolist()) - - def layers(self, max_layers=6): - - self._append_params('hidden_layers', list(range(max_layers))) - - def dropout(self): - '''Dropout from 0.0 to 0.75''' - - self._append_params('dropout', np.round( - np.arange(0, .85, 0.1), 2).tolist()) - - def optimizers(self, task='binary'): - '''Adam, Nadam, SGD, and adadelta.''' - self._append_params('optimizer', [Adam, Nadam, Adadelta, SGD]) - - def activations(self): - - self._append_params('activation', ['relu', 'elu']) - - def losses(self): - - self._append_params('losses', loss[self.task]) - - def neurons(self, bottom_value=8, max_value=None, steps=None): - '''max_value and steps has to be either None or - integer value at the same time.''' - - if max_value is None and steps is None: - values = [int(np.exp2(i)) for i in range(3, 11)] - else: - values = range(bottom_value, max_value, steps) - - self._append_params('first_neuron', values) - - def batch_size(self, bottom_value=8, max_value=None, steps=None): - '''max_value and steps has to be either None or - integer value at the same time.''' - - if max_value is None and steps is None: - values = [int(np.exp2(i/2)) for i in range(3, 15)] - else: - values = range(bottom_value, max_value, steps) - - self._append_params('batch_size', values) - - def epochs(self, bottom_value=50, max_value=None, steps=None): - '''max_value and steps has to be either None or - integer value at the same time.''' - - if max_value is None and steps is None: - values = [int(np.exp2(i/2))+50 for i in range(3, 15)] - else: - values = range(bottom_value, max_value, steps) - - self._append_params('epochs', values) - - def kernel_initializers(self): - - self._append_params('kernel_initializer', - ['glorot_uniform', 'glorot_normal', - 'random_uniform', 'random_normal']) - - def lr(self): - - a = np.round(np.arange(0.01, 0.2, 0.02), 3).tolist() - b = np.round(np.arange(0, 1, 0.2), 2).tolist() - c = list(range(0, 11)) - - self._append_params('lr', a + b + c) - - def networks(self): - '''Adds four different network architectures are parameters: - dense, simplernn, lstm, conv1d.''' - - self._append_params('network', ['dense', - 'simplernn', - 'lstm', - 'bidirectional_lstm', - 'conv1d']) - - def last_activations(self): - - self._append_params('last_activation', last_activation[self.task]) - - def _append_params(self, label, values): - - if self.replace is False: - try: - self.params[label] - except KeyError: - self.params[label] = values - - else: - self.params[label] = values -from ..utils.best_model import best_model, activate_model - - -class Predict: - - '''Class for making predictions on the models that are stored - in the Scan() object''' - - def __init__(self, scan_object): - '''Takes in as input a Scan() object''' - - self.scan_object = scan_object - self.data = scan_object.data - - def predict(self, x, model_id=None, metric='val_acc', asc=False): - '''Makes a probability prediction from input x. If model_id - is not given, then best_model will be used.''' - - if model_id is None: - model_id = best_model(self.scan_object, metric, asc) - - model = activate_model(self.scan_object, model_id) - - return model.predict(x) - - def predict_classes(self, x, model_id=None, metric='val_acc', asc=False): - '''Makes a class prediction from input x. If model_id - is not given, then best_model will be used.''' - - if model_id is None: - model_id = best_model(self.scan_object, metric, asc) - - model = activate_model(self.scan_object, model_id) - - return model.predict_classes(x) -from pandas import read_csv -from ..utils.connection_check import is_connected -from ..metrics.names import metric_names - -if is_connected() is True: - from astetik import line, hist, corr, regs, bargrid, kde, box - - -class Reporting: - - '''A suite of commands that are useful for analyzing the results - of a completed scan, or during a scan. - - filename :: the name of the experiment log from Scan()''' - - def __init__(self, source=None): - '''Takes as input a filename to the experiment - log or the Scan object''' - - if isinstance(source, str): - self.data = read_csv(source) - else: - self.data = source.data - - def high(self, metric='val_acc'): - '''Returns the highest value for a given metric''' - - return max(self.data[metric]) - - def rounds(self): - '''Returns the number of rounds in the experiment''' - - return len(self.data) - - def rounds2high(self, metric='val_acc'): - '''Returns the number of rounds it took to get to the - highest value for a given metric.''' - - return self.data[self.data[metric] == self.data[metric].max()].index[0] - - def low(self, metric='val_acc'): - '''Returns the minimum value for a given metric''' - - return min(self.data[metric]) - - def correlate(self, metric='val_acc'): - '''Returns a correlation table against a given metric. Drops - all other metrics and correlates against hyperparameters only.''' - - columns = [c for c in self.data.columns if c not in metric_names()] - out = self.data[columns] - out.insert(0, metric, self.data[metric]) - - out = out.corr()[metric] - - return out[out != 1] - - def plot_line(self, metric='val_acc'): - '''A line plot for a given metric where rounds is on x-axis - - NOTE: remember to invoke %matplotlib inline if in notebook - - metric :: the metric to correlate against - - ''' - - return line(self.data, metric) - - def plot_hist(self, metric='val_acc', bins=10): - '''A histogram for a given metric - - NOTE: remember to invoke %matplotlib inline if in notebook - - metric :: the metric to correlate against - bins :: number of bins to use in histogram - - ''' - - return hist(self.data, metric, bins=bins) - - def plot_corr(self, metric='val_acc', color_grades=5): - '''A heatmap with a single metric and hyperparameters. - - NOTE: remember to invoke %matplotlib inline if in notebook - - metric :: the metric to correlate against - color_grades :: number of colors to use in heatmap''' - - cols = self._cols(metric) - - return corr(self.data[cols], color_grades=color_grades) - - def plot_regs(self, x='val_acc', y='val_loss'): - '''A regression plot with data on two axis - - x = data for the x axis - y = data for the y axis - ''' - - return regs(self.data, x, y) - - def plot_box(self, x, y='val_acc', hue=None): - '''A box plot with data on two axis - - x = data for the x axis - y = data for the y axis - hue = data for the hue separation - ''' - - return box(self.data, x, y, hue) - - def plot_bars(self, x, y, hue, col): - '''A comparison plot with 4 axis''' - - return bargrid(self.data, - x=x, - y=y, - hue=hue, - col=col, - col_wrap=4) - - def plot_kde(self, x='val_acc', y=None): - '''Kernel Destiny Estimation type histogram with - support for 1 or 2 axis of data''' - - return kde(self.data, x, y) - - def table(self, metric='val_acc', sort_by=None, ascending=False): - '''Shows a table with hyperparameters and a given metric - - EXAMPLE USE: - - ra1 = Reporting('diabetes_1.csv') - ra1.table(sort_by='fmeasure_acc', ascending=False) - - PARAMS: - - metric :: accepts single column name as string or multiple in list - sort_by :: the colunm name sorting should be based on - ascending :: if sorting is ascending or not - - ''' - - cols = self._cols(metric) - - if sort_by is None: - sort_by = metric - - out = self.data[cols].sort_values(sort_by, ascending=ascending) - - return out - - def best_params(self, metric='val_acc', n=10, ascending=False): - '''Get the best parameters of the experiment based on a metric. - Returns a numpy array with the values in a format that can be used - with the talos backend in Scan(). Adds an index as the last column.''' - - cols = self._cols(metric) - out = self.data[cols].sort_values(metric, ascending=ascending) - out = out.drop(metric, axis=1).head(n) - out.insert(out.shape[1], 'index_num', range(len(out))) - - return out.values - - def _cols(self, metric): - '''Helper to remove other than desired metric from data table''' - - cols = [col for col in self.data.columns if col not in metric_names()] - - if isinstance(metric, list) is False: - metric = [metric] - for i, metric in enumerate(metric): - cols.insert(i, metric) - - # make sure only unique values in col list - cols = list(set(cols)) - - return cols -from zipfile import ZipFile - -from pandas import read_csv -from numpy import load - -from talos.utils.load_model import load_model - - -class Restore: - - '''Restores the scan_object that had been stored locally as a result - of talos.Deploy(scan_object, 'example') - - USE: - - diabetes = ta.Scan(x, y, p, input_model) - ta.Deploy(diabetes, 'diabetes') - ta.Restore('diabetes.zip') - - ''' - - def __init__(self, path_to_zip): - - # create paths - self.path_to_zip = path_to_zip - self.extract_to = path_to_zip.replace('.zip', '') - self.package_name = self.extract_to.split('/')[-1] - self.file_prefix = self.extract_to + '/' + self.package_name - - # extract the zip - # unpack_archive(self.path_to_zip, self.extract_to) - z = ZipFile(self.path_to_zip, mode='r') - z.extractall(self.extract_to) - - # add params dictionary - self.params = load(self.file_prefix + '_params.npy').item() - - # add experiment details - self.details = read_csv(self.file_prefix + '_details.txt', header=None) - - # add x data sample - self.x = read_csv(self.file_prefix + '_x.csv', header=None) - - # add y data sample - self.y = read_csv(self.file_prefix + '_y.csv', header=None) - - # add model - self.model = load_model(self.file_prefix + '_model') - - # add results - self.results = read_csv(self.file_prefix + '_results.csv') - self.results.drop('Unnamed: 0', axis=1, inplace=True) - - # clean up - del self.extract_to, self.file_prefix - del self.package_name, self.path_to_zip -from . import datasets -from . import models -from . import params -from . import pipelines -def icu_mortality(samples=None): - - import pandas as pd - base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' - df = pd.read_csv(base + 'icu_mortality.csv') - df = df.dropna(thresh=3580, axis=1) - df = df.dropna() - df = df.sample(frac=1).head(samples) - y = df['hospitalmortality'].astype(int).values - x = df.drop('hospitalmortality', axis=1).values - - return x, y - - -def titanic(): - - import pandas as pd - base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' - df = pd.read_csv(base + 'titanic.csv') - - y = df.survived.values - - x = df[['age', 'sibsp', 'parch']] - cols = ['class', 'embark_town', 'who', 'deck', 'sex'] - - for col in cols: - x = pd.merge(x, - pd.get_dummies(df[col]), - left_index=True, - right_index=True) - - x = x.dropna() - x = x.values - - return x, y - - -def iris(): - - import pandas as pd - from keras.utils import to_categorical - base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' - df = pd.read_csv(base + 'iris.csv') - df['species'] = df['species'].factorize()[0] - df = df.sample(len(df)) - y = to_categorical(df['species']) - x = df.iloc[:, :-1].values - - y = to_categorical(df['species']) - x = df.iloc[:, :-1].values - - return x, y - - -def cervical_cancer(): - - import pandas as pd - from numpy import nan - base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' - df = pd.read_csv(base + 'cervical_cancer.csv') - df = df.replace('?', nan) - df = df.drop(['citology', 'hinselmann', 'biopsy'], axis=1) - df = df.drop(['since_first_diagnosis', - 'since_last_diagnosis'], axis=1).dropna() - - df = df.astype(float) - - y = df.schiller.values - x = df.drop('schiller', axis=1).values - - return x, y - - -def breast_cancer(): - - import pandas as pd - base = 'https://raw.githubusercontent.com/autonomio/datasets/master/autonomio-datasets/' - df = pd.read_csv(base + 'breast_cancer.csv') - - # then some minimal data cleanup - df.drop("Unnamed: 32", axis=1, inplace=True) - df.drop("id", axis=1, inplace=True) - - # separate to x and y - y = df.diagnosis.values - x = df.drop('diagnosis', axis=1).values - - # convert the string labels to binary - y = (y == 'M').astype(int) - - return x, y -#!/usr/bin/env python - - -def breast_cancer(x_train, y_train, x_val, y_val, params): - - from keras.models import Sequential - from keras.layers import Dropout, Dense - from talos.model import lr_normalizer, early_stopper, hidden_layers - - from talos.metrics.keras_metrics import matthews_correlation_acc, precision_acc - from talos.metrics.keras_metrics import recall_acc, fmeasure_acc - - model = Sequential() - model.add(Dense(params['first_neuron'], - input_dim=x_train.shape[1], - activation='relu')) - - model.add(Dropout(params['dropout'])) - - hidden_layers(model, params, 1) - - model.add(Dense(1, activation=params['last_activation'])) - - model.compile(optimizer=params['optimizer'] - (lr=lr_normalizer(params['lr'], - params['optimizer'])), - loss=params['losses'], - metrics=['acc', - fmeasure_acc, - recall_acc, - precision_acc, - matthews_correlation_acc]) - - results = model.fit(x_train, y_train, - batch_size=params['batch_size'], - epochs=params['epochs'], - verbose=0, - validation_data=[x_val, y_val], - callbacks=[early_stopper(params['epochs'], - mode='moderate', - monitor='val_fmeasure')]) - - return results, model - - -def cervical_cancer(x_train, y_train, x_val, y_val, params): - - from keras.models import Sequential - from keras.layers import Dropout, Dense - from talos.model import lr_normalizer, early_stopper, hidden_layers - - from talos.metrics.keras_metrics import matthews_correlation_acc, precision_acc - from talos.metrics.keras_metrics import recall_acc, fmeasure_acc - - model = Sequential() - model.add(Dense(params['first_neuron'], - input_dim=x_train.shape[1], - activation='relu')) - - model.add(Dropout(params['dropout'])) - - hidden_layers(model, params, 1) - - model.add(Dense(1, activation=params['last_activation'])) - - model.compile(optimizer=params['optimizer'] - (lr=lr_normalizer(params['lr'], - params['optimizer'])), - loss=params['losses'], - metrics=['acc', - fmeasure_acc, - recall_acc, - precision_acc, - matthews_correlation_acc]) - - results = model.fit(x_train, y_train, - batch_size=params['batch_size'], - epochs=params['epochs'], - verbose=0, - validation_data=[x_val, y_val], - callbacks=[early_stopper(params['epochs'], - mode='moderate', - monitor='val_fmeasure')]) - - return results, model - - -def titanic(x_train, y_train, x_val, y_val, params): - - from keras.models import Sequential - from keras.layers import Dropout, Dense - - # note how instead of passing the value, we pass a dictionary entry - model = Sequential() - model.add(Dense(params['first_neuron'], - input_dim=x_train.shape[1], - activation='relu')) - - # same here, just passing a dictionary entry - model.add(Dropout(params['dropout'])) - - # again, instead of the activation name, we have a dictionary entry - model.add(Dense(1, activation=params['last_activation'])) - - # here are using a learning rate boundary - model.compile(optimizer=params['optimizer'], - loss=params['losses'], - metrics=['acc']) - - # here we are also using the early_stopper function for a callback - out = model.fit(x_train, y_train, - batch_size=params['batch_size'], - epochs=2, - verbose=0, - validation_data=[x_val, y_val]) - - return out, model - - -def iris(x_train, y_train, x_val, y_val, params): - - from keras.models import Sequential - from keras.layers import Dropout, Dense - from talos.model import lr_normalizer, early_stopper, hidden_layers - - # note how instead of passing the value, we pass a dictionary entry - model = Sequential() - model.add(Dense(params['first_neuron'], - input_dim=x_train.shape[1], - activation='relu')) - - # same here, just passing a dictionary entry - model.add(Dropout(params['dropout'])) - - # with this call we can create any number of hidden layers - hidden_layers(model, params, y_train.shape[1]) - - # again, instead of the activation name, we have a dictionary entry - model.add(Dense(y_train.shape[1], - activation=params['last_activation'])) - - # here are using a learning rate boundary - model.compile(optimizer=params['optimizer'] - (lr=lr_normalizer(params['lr'], - params['optimizer'])), - loss=params['losses'], - metrics=['acc']) - - # here we are also using the early_stopper function for a callback - out = model.fit(x_train, y_train, - batch_size=params['batch_size'], - epochs=params['epochs'], - verbose=0, - validation_data=[x_val, y_val], - callbacks=[early_stopper(params['epochs'], mode=[1, 1])]) - - return out, model -def titanic(): - - # here use a standard 2d dictionary for inputting the param boundaries - p = {'lr': (0.5, 5, 10), - 'first_neuron': [4, 8, 16], - 'batch_size': [20, 30, 40], - 'dropout': (0, 0.5, 5), - 'optimizer': ['Adam', 'Nadam'], - 'losses': ['logcosh', 'binary_crossentropy'], - 'activation': ['relu', 'elu'], - 'last_activation': ['sigmoid']} - - return p - - -def iris(): - - from keras.optimizers import Adam, Nadam - from keras.losses import logcosh, categorical_crossentropy - from keras.activations import relu, elu, softmax - - # here use a standard 2d dictionary for inputting the param boundaries - p = {'lr': (0.5, 5, 10), - 'first_neuron': [4, 8, 16, 32, 64], - 'hidden_layers': [0, 1, 2, 3, 4], - 'batch_size': (2, 30, 10), - 'epochs': [2], - 'dropout': (0, 0.5, 5), - 'weight_regulizer': [None], - 'emb_output_dims': [None], - 'shape': ['brick', 'triangle', 0.2], - 'shapes': ['brick', 'triangle', 0.2], - 'optimizer': [Adam, Nadam], - 'losses': [logcosh, categorical_crossentropy], - 'activation': [relu, elu], - 'last_activation': [softmax]} - - return p - - -def breast_cancer(): - - from keras.optimizers import Adam, Nadam, RMSprop - from keras.losses import logcosh, binary_crossentropy - from keras.activations import relu, elu, sigmoid - - # then we can go ahead and set the parameter space - p = {'lr': (0.5, 5, 10), - 'first_neuron': [4, 8, 16, 32, 64], - 'hidden_layers': [0, 1, 2], - 'batch_size': (2, 30, 10), - 'epochs': [50, 100, 150], - 'dropout': (0, 0.5, 5), - 'shapes': ['brick', 'triangle', 'funnel'], - 'optimizer': [Adam, Nadam, RMSprop], - 'losses': [logcosh, binary_crossentropy], - 'activation': [relu, elu], - 'last_activation': [sigmoid]} - - return p - - -def cervical_cancer(): - return breast_cancer() -def breast_cancer(round_limit=2, random_method='uniform_mersenne'): - '''Performs a Scan with Iris dataset and simple dense net''' - import talos as ta - scan_object = ta.Scan(ta.templates.datasets.breast_cancer()[0], - ta.templates.datasets.breast_cancer()[1], - ta.templates.params.breast_cancer(), - ta.templates.models.breast_cancer, - round_limit=round_limit) - - return scan_object - - -def cervical_cancer(round_limit=2, random_method='uniform_mersenne'): - '''Performs a Scan with Iris dataset and simple dense net''' - import talos as ta - scan_object = ta.Scan(ta.templates.datasets.cervical_cancer()[0], - ta.templates.datasets.cervical_cancer()[1], - ta.templates.params.cervical_cancer(), - ta.templates.models.cervical_cancer, - round_limit=round_limit) - - return scan_object - - -def iris(round_limit=2, random_method='uniform_mersenne'): - '''Performs a Scan with Iris dataset and simple dense net''' - import talos as ta - scan_object = ta.Scan(ta.templates.datasets.iris()[0], - ta.templates.datasets.iris()[1], - ta.templates.params.iris(), - ta.templates.models.iris, - round_limit=round_limit) - - return scan_object - - -def titanic(round_limit=2, random_method='uniform_mersenne'): - '''Performs a Scan with Iris dataset and simple dense net''' - import talos as ta - scan_object = ta.Scan(ta.templates.datasets.titanic()[0][:50], - ta.templates.datasets.titanic()[1][:50], - ta.templates.params.titanic(), - ta.templates.models.titanic, - round_limit=round_limit) - - return scan_object -from scipy.stats import entropy -from numpy import nan - - -def epoch_entropy(history): - '''MEASURE EPOCH ENTROPY - - BINARY/CATEGORICAL: - - Measures the KL divergence of the acc and loss results - per epoch of a given permutation. - - CONTINUOUS: - - Measures shannon entropy for loss. - - # TODO Right now this does not handle all cases well and needs - to be thought about properly. - ''' - - keys = list(history.history.keys()) - no_of_items = len(keys) - - if no_of_items == 1: - if 'loss' in keys: - loss_entropy = entropy(history.history['loss']) - acc_entropy = nan - else: - loss_entropy = nan - acc_entropy = nan - - elif no_of_items == 2: - if 'acc' in keys and 'loss' in keys: - loss_entropy = entropy(history.history['loss']) - acc_entropy = entropy(history.history['acc']) - else: - loss_entropy = nan - acc_entropy = nan - - elif no_of_items >= 4: - if 'acc' in keys: - acc_entropy = entropy(history.history['val_acc'], - history.history['acc']) - else: - acc_entropy = nan - - if 'loss' in keys: - loss_entropy = entropy(history.history['val_loss'], - history.history['loss']) - else: - loss_entropy = nan - - return [acc_entropy, loss_entropy] - - -def root_mean_squared_error(y_true, y_pred): - from keras import backend as K - return K.sqrt(K.mean(K.square(y_pred - y_true), axis=-1)) - - -def matthews_correlation_acc(y_true, y_pred): - '''Calculates the Matthews correlation coefficient measure for quality - of binary classification problems. - ''' - from keras import backend as K - y_pred_pos = K.round(K.clip(y_pred, 0, 1)) - y_pred_neg = 1 - y_pred_pos - - y_pos = K.round(K.clip(y_true, 0, 1)) - y_neg = 1 - y_pos - - tp = K.sum(y_pos * y_pred_pos) - tn = K.sum(y_neg * y_pred_neg) - - fp = K.sum(y_neg * y_pred_pos) - fn = K.sum(y_pos * y_pred_neg) - - numerator = (tp * tn - fp * fn) - denominator = K.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn)) - - return numerator / (denominator + K.epsilon()) - - -def precision_acc(y_true, y_pred): - '''Calculates the precision, a metric for multi-label classification of - how many selected items are relevant. - ''' - from keras import backend as K - true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) - predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1))) - precision = true_positives / (predicted_positives + K.epsilon()) - return precision - - -def recall_acc(y_true, y_pred): - '''Calculates the recall, a metric for multi-label classification of - how many relevant items are selected. - ''' - from keras import backend as K - true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1))) - possible_positives = K.sum(K.round(K.clip(y_true, 0, 1))) - recall = true_positives / (possible_positives + K.epsilon()) - return recall - - -def fbeta_score_acc(y_true, y_pred, beta=1): - '''Calculates the F score, the weighted harmonic mean of precision and recall. - This is useful for multi-label classification, where input samples can be - classified as sets of labels. By only using accuracy (precision) a model - would achieve a perfect score by simply assigning every class to every - input. In order to avoid this, a metric should penalize incorrect class - assignments as well (recall). The F-beta score (ranged from 0.0 to 1.0) - computes this, as a weighted mean of the proportion of correct class - assignments vs. the proportion of incorrect class assignments. - With beta = 1, this is equivalent to a F-measure. With beta < 1, assigning - correct classes becomes more important, and with beta > 1 the metric is - instead weighted towards penalizing incorrect class assignments. - ''' - from keras import backend as K - if beta < 0: - raise ValueError('The lowest choosable beta is zero (only precision).') - - # If there are no true positives, fix the F score at 0 like sklearn. - if K.sum(K.round(K.clip(y_true, 0, 1))) == 0: - return 0 - - p = precision_acc(y_true, y_pred) - r = recall_acc(y_true, y_pred) - bb = beta ** 2 - fbeta_score = (1 + bb) * (p * r) / (bb * p + r + K.epsilon()) - return fbeta_score - - -def fmeasure_acc(y_true, y_pred): - '''Calculates the f-measure, the harmonic mean of precision and recall. - ''' - return fbeta_score_acc(y_true, y_pred, beta=1) -def metric_names(): - '''These are used as a shorthand for filtering out columns - that should not be included as depedent variables for optimizing.''' - - return ['round_epochs', - 'loss', - 'val_loss', - 'acc', - 'val_acc', - 'fmeasure_acc', - 'val_fmeasure_acc', - 'recall_acc', - 'val_recall_acc', - 'precision_acc', - 'val_precision_acc', - 'matthews_correlation_acc', - 'val_matthews_correlation_acc', - 'val_root_mean_squared_error', - 'root_mean_squared_error', - 'val_mean_squared_error', - 'mean_squared_error', - 'val_mean_average_error', - 'mean_average_error', - ] -from .early_stopper import early_stopper -from .layers import hidden_layers -from .normalizers import lr_normalizer -from keras.callbacks import EarlyStopping - - -def early_stopper(epochs, - monitor='val_loss', - mode='moderate', - min_delta=None, - patience=None): - '''EARLY STOP CALLBACK - - Helps prevent wasting time when loss is not becoming - better. Offers two pre-determined settings 'moderate' - and 'strict' and allows input of list with two values: - - min_delta = the limit for change at which point flag is raised - - patience = the number of epochs before termination from flag - - ''' - - if mode == 'moderate': - _es_out = EarlyStopping(monitor=monitor, - min_delta=0, - patience=int(epochs / 10), - verbose=0, mode='auto') - elif mode == 'strict': - _es_out = EarlyStopping(monitor=monitor, - min_delta=0, - patience=2, - verbose=0, mode='auto') - elif isinstance(mode, list): - _es_out = EarlyStopping(monitor=monitor, - min_delta=mode[0], - patience=mode[1], - verbose=0, mode='auto') - return _es_out -def ingest_model(self): - '''Ingests the model that is input by the user - through Scan() model paramater.''' - - return self.model(self.x_train, - self.y_train, - self.x_val, - self.y_val, - self.round_params) -from keras.layers import Dense, Dropout -from .network_shape import network_shape -from ..utils.exceptions import TalosParamsError - - -def hidden_layers(model, params, last_neuron): - '''HIDDEN LAYER Generator - - NOTE: 'first_neuron', 'dropout', and 'hidden_layers' need - to be present in the params dictionary. - - Hidden layer generation for the cases where number - of layers is used as a variable in the optimization process. - Handles things in a way where any number of layers can be tried - with matching hyperparameters.''' - - try: - kernel_initializer = params['kernel_initializer'] - except KeyError: - kernel_initializer = 'glorot_uniform' - - try: - kernel_regularizer = params['kernel_regularizer'] - except KeyError: - kernel_regularizer = None - - try: - bias_initializer = params['bias_initializer'] - except KeyError: - bias_initializer = 'zeros' - - try: - bias_regularizer = params['bias_regularizer'] - except KeyError: - bias_regularizer = None - - try: - use_bias = params['use_bias'] - except KeyError: - use_bias = True - - try: - activity_regularizer = params['activity_regularizer'] - except KeyError: - activity_regularizer = None - - try: - kernel_constraint = params['kernel_constraint'] - except KeyError: - kernel_constraint = None - - try: - bias_constraint = params['bias_constraint'] - except KeyError: - bias_constraint = None - - # check for the params that are required for hidden_layers - for param in ['shapes', 'first_neuron', 'dropout']: - try: - params[param] - except KeyError as err: - if err.args[0] == param: - raise TalosParamsError( - "hidden_layers requires '" + param + "' in params") - - layer_neurons = network_shape(params, last_neuron) - - for i in range(params['hidden_layers']): - - model.add(Dense(layer_neurons[i], - activation=params['activation'], - use_bias=use_bias, - kernel_initializer=kernel_initializer, - kernel_regularizer=kernel_regularizer, - bias_initializer=bias_initializer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - bias_constraint=bias_constraint)) - - model.add(Dropout(params['dropout'])) -import numpy as np - - -def network_shape(params, last_neuron): - '''Provides the ability to include network shape in experiments. If params - dictionary for the round contains float value for params['shapes'] then - a linear contraction towards the last_neuron value. The higher the value, - the fewer layers it takes to reach lesser than last_neuron. - - Supports three inbuilt shapes 'brick', 'funnel', and 'triangle'. - - - params : dict - Scan() params for a single roundself. - last_neuron : int - Number of neurons on the output layer in the Keras model. - ''' - - layers = params['hidden_layers'] - shape = params['shapes'] - first_neuron = params['first_neuron'] - out = [] - n = first_neuron - - # the case where hidden_layers is zero - if layers == 0: - return [0] - - # the cases where an angle is applied - if isinstance(shape, float): - - for i in range(layers): - - n *= 1 - shape - - if n > last_neuron: - out.append(int(n)) - else: - out.append(last_neuron) - - # the case where a rectantular shape is used - elif shape == 'brick': - out = [first_neuron] * layers - - elif shape == 'funnel': - for i in range(layers + 1): - n -= int((first_neuron - last_neuron) / layers) - out.append(n) - out.pop(-1) - - elif shape == 'triangle': - out = np.linspace(first_neuron, - last_neuron, - layers+2, - dtype=int).tolist() - out.pop(0) - out.pop(-1) - out.reverse() - - return out -from keras.optimizers import SGD, Adam, Adadelta, Adagrad, Adamax, RMSprop -from keras.optimizers import Nadam - - -def lr_normalizer(lr, optimizer): - """Assuming a default learning rate 1, rescales the learning rate - such that learning rates amongst different optimizers are more or less - equivalent. - - Parameters - ---------- - lr : float - The learning rate. - optimizer : keras optimizer - The optimizer. For example, Adagrad, Adam, RMSprop. - """ - - if optimizer == Adadelta: - pass - elif optimizer == SGD or optimizer == Adagrad: - lr /= 100.0 - elif optimizer == Adam or optimizer == RMSprop: - lr /= 1000.0 - elif optimizer == Adamax or optimizer == Nadam: - lr /= 500.0 - - return lr -import numpy as np - -from ..reducers.sample_reducer import sample_reducer -from ..reducers.permutation_filter import permutation_filter - - -class ParamGrid: - - '''Suite for handling parameters internally within Talos - - Takes as input the parameter dictionary from the user, and - returns a class object which can then be used to pick parameters - for each round together with other parameter related operations. - - ''' - - def __init__(self, main_self): - - self.main_self = main_self - - # creates a reference dictionary for column number to label - self.param_reference = {} - for i, col in enumerate(self.main_self.params.keys()): - self.param_reference[col] = i - - # convert the input to useful format - self._p = self._param_input_conversion() - - # create a list of lists, each list being a parameter sequence - ls = [list(self._p[key]) for key in self._p.keys()] - - # get the number of total dimensions / permutations - virtual_grid_size = 1 - for l in ls: - virtual_grid_size *= len(l) - final_grid_size = virtual_grid_size - - # calculate the size of the downsample - if self.main_self.grid_downsample is not None: - final_grid_size = int(virtual_grid_size * - self.main_self.grid_downsample) - - # take round_limit into account - if self.main_self.round_limit is not None: - final_grid_size = min(final_grid_size, self.main_self.round_limit) - - # create the params grid - self.param_grid = self._create_param_grid(ls, - final_grid_size, - virtual_grid_size) - - # handle the case where permutation filter is provided - if self.main_self.permutation_filter is not None: - self = permutation_filter(self, - ls, - final_grid_size, - virtual_grid_size) - - # initialize with random shuffle if needed - if self.main_self.shuffle: - np.random.shuffle(self.param_grid) - - # create a index for logging purpose - self.param_log = list(range(len(self.param_grid))) - - # add the log index to param grid - self.param_grid = np.column_stack((self.param_grid, self.param_log)) - - def _create_param_grid(self, ls, final_grid_size, virtual_grid_size): - - # select permutations according to downsample - if final_grid_size < virtual_grid_size: - out = sample_reducer(self, final_grid_size, virtual_grid_size) - else: - out = range(0, final_grid_size) - - # build the parameter permutation grid - param_grid = self._create_param_permutations(ls, out) - - return param_grid - - def _create_param_permutations(self, ls, permutation_index): - '''Expand params dictionary to permutations - - Takes the input params dictionary and expands it to - actual parameter permutations for the experiment. - ''' - - final_grid = [] - for i in permutation_index: - p = [] - for l in reversed(ls): - i, s = divmod(int(i), len(l)) - p.insert(0, l[s]) - final_grid.append(tuple(p)) - - _param_grid_out = np.array(final_grid, dtype='object') - - return _param_grid_out - - def _param_input_conversion(self): - '''DETECT PARAM FORMAT - - Checks of the hyperparameter input format is list - or tupple in the params dictionary and expands accordingly. - - ''' - - out = {} - - for param in self.main_self.params.keys(): - - # for range/step style input - if isinstance(self.main_self.params[param], tuple): - out[param] = self._param_range(self.main_self.params[param][0], - self.main_self.params[param][1], - self.main_self.params[param][2]) - # all other input styles - else: - out[param] = self.main_self.params[param] - - return out - - def _param_range(self, start, end, n): - '''Deal with ranged inputs in params dictionary - - A helper function to handle the cases where params - dictionary input is in the format (start, end, steps) - and is called internally through ParamGrid(). - ''' - - try: - out = np.arange(start, end, (end - start) / n, dtype=float) - # this is for python2 - except ZeroDivisionError: - out = np.arange(start, end, (end - start) / float(n), dtype=float) - - if type(start) == int and type(end) == int: - out = out.astype(int) - out = np.unique(out) - - return out -from numpy import random - - -def create_params_dict(self, _choice): - _round_params_dict = {} - x = 0 - for key in self.param_reference.keys(): - _round_params_dict[key] = self.param_grid[_choice][x] - x += 1 - - return _round_params_dict - - -def round_params(self): - '''Picks the paramaters for a round based on the available - paramater permutations using the param_log index''' - - # pick the permutation for the round - if self.search_method == 'random': - _choice = random.choice(self.param_log) - - elif self.search_method == 'linear': - _choice = min(self.param_log) - - elif self.search_method == 'reverse': - _choice = max(self.param_log) - - # remove the current choice from permutations - self.param_log.remove(_choice) - - # create a dictionary for the current round - return create_params_dict(self, _choice) -import pandas as pd - - -def correlation(self): - '''Correlation Reducers - - Note that this set of reducers works only for the continuous - and stepped (e.g. batch size) hyperparameters. - - ''' - - out = self.param_table.corr(method='spearman')[self.reduction_metric] - out = out.dropna() - - if len(out) <= 1: - self._reduce_keys = None - return self - - out = out[1:].sort_values(ascending=False) - out = out.index[-1], out[-1] - - if abs(out[1]) >= self.reduction_threshold: - dummy_cols = pd.get_dummies(self.param_table[out[0]]) - dummy_cols.insert(0, - self.reduction_metric, - self.param_table[self.reduction_metric]) - - # case where threshold is not met - else: - self._reduce_keys = None - return self - - # all other cases continue - to_drop_temp = dummy_cols.corr(method='spearman')[self.reduction_metric] - - # pick the drop method based on paramaters - if self.reduce_loss is False: - self._reduce_keys = to_drop_temp.sort_values().index[0], out[0] - else: - self._reduce_keys = to_drop_temp.sort_values().index[-2], out[0] - - return self -def permutation_filter(self, ls, final_grid_size, virtual_grid_size): - '''Handles the filtering for ta.Scan(... permutation_filter= ...)''' - - from ..parameters.round_params import create_params_dict - - # handle the filtering with the current params grid - - def fn(i): - - params_dict = create_params_dict(self, i) - fn = self.main_self.permutation_filter(params_dict) - - return fn - - grid_indices = list(filter(fn, range(len(self.param_grid)))) - self.param_grid = self.param_grid[grid_indices] - final_expanded_grid_size = final_grid_size - - while len(self.param_grid) < final_grid_size and final_expanded_grid_size < virtual_grid_size: - final_expanded_grid_size *= 2 - - if final_expanded_grid_size > virtual_grid_size: - final_expanded_grid_size = virtual_grid_size - - self.param_grid = self._create_param_grid(ls, - final_expanded_grid_size, - virtual_grid_size) - - grid_indices = list(filter(fn, range(len(self.param_grid)))) - self.param_grid = self.param_grid[grid_indices] - - self.param_grid = self.param_grid[:final_grid_size] - - return self -def reduce_finish(self): - '''Takes input from a Reducer in form of a tuple - where the values the hyperparamater name and the - value to drop. Returns self with a modified param_log.''' - - # get the column index - - to_remove_col = self.param_reference[self._reduce_keys[1]] - - value_to_remove = self._reduce_keys[0] - - # pick the index numbers for dropping available permutations - indexs_to_drop = self.param_grid[self.param_grid[:, - to_remove_col] == value_to_remove][:, -1] - - # drop the index numbers - self.param_log = list(set(self.param_log).difference(set(indexs_to_drop))) - - return self -import pandas as pd - -from ..metrics.names import metric_names - - -def reduce_prepare(self): - ''' - Preparation procedures for applying a reduction algorithm. - ''' - - # load the data from the experiment log - self.data = pd.read_csv(self.experiment_name + '.csv') - self.names = metric_names() - - # apply the lookback window - if self.reduction_window is not None: - self.data = self.data.tail(self.reduction_window) - - self.param_columns = [ - col for col in self.data.columns if col not in metric_names()] - self.param_table = self.data[self.param_columns] - self.param_table.insert(0, self.reduction_metric, - self.data[self.reduction_metric]) - - return self -from .reduce_prepare import reduce_prepare -from .reduce_finish import reduce_finish -from .correlation import correlation - - -def reduce_run(self): - '''The process run script for reduce - procedures; takes care of everything - related with reduction. When new - reduction methods are added, they need - to be added as options here. - ''' - - # prepare log for reduction analysis - self = reduce_prepare(self) - - # run the selected reduction method - if self.reduction_method == 'correlation': - self = correlation(self) - - # TODO: the case where reduction_method - # is not selected or is wrong could be - # handled better. - - # handle the dropping of permutations - if self._reduce_keys is None: - return self - else: - return reduce_finish(self) -import chances - -from ..utils.exceptions import TalosDataError - - -def sample_reducer(self, length, max_value): - '''Sample Reducer (Helper) - - NOTE: The Scan() object is in self.main_self because - the object being passed here is ParamGrid() object where - the Scan() object is attached as self.main_self. - - Utilize 'grid_downsample', 'shuffle', and 'random_method' - to reduce the param_grid before starting the experiment. - This is the simplest method in Talos for dealing with curse - of dimensionality. - - Options are uniform random, stratified random, latin hypercube - sampling, and latin hypercube with sudoku style constraint. - - Returns the reduced param_grid as numpy array. - - ''' - - random_method = self.main_self.random_method - - # calculate the size of the downsample - n = int(max_value * self.main_self.grid_downsample) - - # throw an error if - if n < 1: - raise TalosDataError( - "No permutations in grid. Incease grid_downsample") - - # Initialize Randomizer() - r = chances.Randomizer(max_value, length) - - # use the user selected method - if random_method == 'sobol': - out = r.sobol() - elif random_method == 'quantum': - out = r.quantum() - elif random_method == 'halton': - out = r.halton() - elif random_method == 'korobov_matrix': - out = r.korobov_matrix() - elif random_method == 'latin_sudoku': - out = r.latin_sudoku() - elif random_method == 'latin_matrix': - out = r.latin_matrix() - elif random_method == 'latin_improved': - out = r.latin_improved() - elif random_method == 'uniform_mersenne': - out = r.uniform_mersenne() - elif random_method == 'uniform_crypto': - out = r.uniform_crypto() - elif random_method == 'ambience': - out = r.ambience() - else: - print('check random_method, no eligble method found. Using uniform mersenne.') - out = r.uniform_mersenne() - - return out -from collections import OrderedDict - -from .scan_prepare import scan_prepare -from .scan_run import scan_run - - -class Scan: - """Hyperparamater scanning and optimization - - USE: ta.Scan(x=x, y=y, params=params_dict, model=model) - - Takes in a Keras model, and a dictionary with the parameter - boundaries for the experiment. - - p = { - 'epochs' : [50, 100, 200], - 'activation' : ['relu'], - 'dropout': (0, 0.1, 5) - } - - Accepted input formats are [1] single value in a list, [0.1, 0.2] - multiple values in a list, and (0, 0.1, 5) a range of 5 values - from 0 to 0.1. - - Here is an example of the input model: - - def model(): - - # any Keras model - - return out, model - - - You must replace the parameters in the model with references to - the dictionary, for example: - - model.fit(epochs=params['epochs']) - - To learn more, start from the examples and documentation - available here: https://github.com/autonomio/talos - - - PARAMETERS - ---------- - x : ndarray - 1d or 2d array consisting of the training data. `x` should have the - shape (m, n), where m is the number of training examples and n is the - number of features. Extra dimensions can be added to account for the - channels entry in convolutional neural networks. - y : ndarray - The labels corresponding to the training data. `y` should have the - shape (m, c) where c is the number of classes. A binary classification - problem will have c=1. - params : python dictionary - Lists all permutations of hyperparameters, a subset of which will be - selected at random for training and evaluation. - model : keras model - Any Keras model with relevant declrations like params['first_neuron'] - dataset_name : str - References the name of the experiment. The dataset_name and - experiment_no will be concatenated to produce the file name for the - results saved in the local directory. - experiment_no : str - Indexes the user's choice of experiment number. - x_val : ndarray - User specified cross-validation data. (Default is None). - y_val : ndarray - User specified cross-validation labels. (Default is None). - val_split : float, optional - The proportion of the input `x` which is set aside as the - validation data. (Default is 0.3). - shuffle : bool, optional - If True, shuffle the data in x and y before splitting into the train - and cross-validation datasets. (Default is True). - random_method : uniform, stratified, lhs, lhs_sudoku - Determinines the way in which the grid_downsample is applied. The - default setting is 'uniform'. - seed : int - Sets numpy random seed. - search_method : {None, 'random', 'linear', 'reverse'} - Determines the random sampling of the dictionary. `random` picks one - hyperparameter point at random and removes it from the list, then - samples again. `linear` starts from the start of the grid and moves - forward, and `reverse` starts at the end of the grid and moves - backwards. - max_iteration_start_time : None or str - Allows setting a time when experiment will be completed. Use the format - "%Y-%m-%d %H:%M" here. - permutation_filter : lambda function - Use it to filter permutations based on previous knowledge. - USE: permutation_filter=lambda p: p['batch_size'] < 150 - This example removes any permutation where batch_size is below 150 - reduction_method : {None, 'correlation'} - Method for honing in on the optimal hyperparameter subspace. (Default - is None). - reduction_interval : int - The number of reduction method rounds that will be performed. (Default - is None). - reduction_window : int - The number of rounds of the reduction method before observing the - results. (Default is None). - grid_downsample : int - The fraction of `params` that will be tested (Default is None). - round_limit : int - Limits the number of rounds (permutations) in the experiment. - reduction_metric : {'val_acc'} - Metric used to tune the reductions. - last_epoch_value : bool - Set to True if the last epoch metric values are logged as opposed - to the default which is peak epoch values for each round. - disable_progress_bar : bool - Disable TQDM live progress bar. - print_params : bool - Print params for each round on screen (useful when using TrainingLog - callback for visualization) - debug : bool - Implements debugging feedback. (Default is False). - - """ - - # TODO: refactor this so that we don't initialize global variables - global self - - def __init__(self, x, y, params, model, - dataset_name=None, - experiment_no=None, - experiment_name=None, - x_val=None, - y_val=None, - val_split=.3, - shuffle=True, - round_limit=None, - time_limit=None, - grid_downsample=1.0, - random_method='uniform_mersenne', - seed=None, - search_method='random', - permutation_filter=None, - reduction_method=None, - reduction_interval=50, - reduction_window=20, - reduction_threshold=0.2, - reduction_metric='val_acc', - reduce_loss=False, - last_epoch_value=False, - clear_tf_session=True, - disable_progress_bar=False, - print_params=False, - debug=False): - - # NOTE: these need to be follow the order from __init__ - # and all paramaters needs to be included here and only here. - - self.x = x - self.y = y - self.params = OrderedDict(params) - self.model = model - self.dataset_name = dataset_name - self.experiment_no = experiment_no - self.experiment_name = experiment_name - self.x_val = x_val - self.y_val = y_val - self.val_split = val_split - self.shuffle = shuffle - self.random_method = random_method - self.search_method = search_method - self.round_limit = round_limit - self.time_limit = time_limit - self.permutation_filter = permutation_filter - self.reduction_method = reduction_method - self.reduction_interval = reduction_interval - self.reduction_window = reduction_window - self.grid_downsample = grid_downsample - self.reduction_threshold = reduction_threshold - self.reduction_metric = reduction_metric - self.reduce_loss = reduce_loss - self.debug = debug - self.seed = seed - self.clear_tf_session = clear_tf_session - self.disable_progress_bar = disable_progress_bar - self.last_epoch_value = last_epoch_value - self.print_params = print_params - # input parameters section ends - - self._null = self.runtime() - - def runtime(self): - - self = scan_prepare(self) - self = scan_run(self) -# for func_best_model -from ..utils.best_model import best_model, activate_model - -# for func_evaluate -import warnings -from tqdm import tqdm -from numpy import mean, std -import numpy as np - -from ..commands.evaluate import Evaluate - - -def func_best_model(scan_object, metric='val_acc', asc=False): - '''Picks the best model based on a given metric and - returns the index number for the model. - - NOTE: for loss 'asc' should be True''' - - warnings.simplefilter('ignore') - - model_no = best_model(scan_object, metric, asc) - out = activate_model(scan_object, model_no) - - return out - - -def func_evaluate(scan_object, - x_val, - y_val, - n=10, - metric='val_acc', - folds=5, - shuffle=True, - average='binary', - asc=False): - ''' - For creating scores from kfold cross-evaluation and - adding them to the data frame. - - ''' - - warnings.simplefilter('ignore') - - picks = scan_object.data.sort_values(metric, - ascending=asc).index.values[:n] - - if n > len(scan_object.data): - data_len = len(scan_object.data) - else: - data_len = n - - out = [] - - pbar = tqdm(total=data_len) - - for i in range(len(scan_object.data)): - - if i in list(picks): - evaluate_object = Evaluate(scan_object) - temp = evaluate_object.evaluate(x_val, y_val, - model_id=i, - metric=metric, - folds=folds, - shuffle=shuffle, - asc=asc) - out.append([mean(temp), std(temp)]) - pbar.update(1) - else: - out.append([np.nan, np.nan]) - - pbar.close() - - scan_object.data['eval_f1score_mean'] = [i[0] for i in out] - scan_object.data['eval_f1score_std'] = [i[1] for i in out] -import time -from pandas import Series, DataFrame - -from ..scan.scan_addon import func_best_model, func_evaluate -from ..utils.string_cols_to_numeric import string_cols_to_numeric - - -attrs_final = ['data', 'x', 'y', 'peak_epochs_df', 'round_times', - 'params', 'saved_models', 'saved_weights'] - -attrs_to_keep = attrs_final + ['random_method', 'grid_downsample', - 'reduction_interval', 'reduce_loss', - 'reduction_method', 'reduction_metric', - 'reduction_threshold', 'reduction_window', - 'experiment_name'] - - -def scan_finish(self): - - # create a dataframe with permutation times - self.round_times = DataFrame(self.round_times) - self.round_times.columns = ['start', 'end', 'duration'] - - # combine entropy tables - self.peak_epochs_df['acc_epoch'] = [i[0] for i in self.epoch_entropy] - self.peak_epochs_df['loss_epoch'] = [i[1] for i in self.epoch_entropy] - - # clean the results into a dataframe - self.data = self.result[self.result.columns[0]].str.split(',', expand=True) - self.data.columns = self.result.columns[0].split(',') - - # remove redundant columns - keys = list(self.__dict__.keys()) - for key in keys: - if key not in attrs_to_keep: - delattr(self, key) - - # summarize single inputs in dictionary - out = {} - - for key in list(self.__dict__.keys()): - if key not in attrs_final: - out[key] = self.__dict__[key] - - out['complete_time'] = time.strftime('%D/%H:%M') - try: - out['x_shape'] = self.x.shape - # for the case when x is list - except AttributeError: - out['x_shape'] = 'list' - - out['y_shape'] = self.y.shape - - # final cleanup - keys = list(self.__dict__.keys()) - for key in keys: - if key not in attrs_final: - delattr(self, key) - - # add details dictionary as series - self.details = Series(out) - - # add best_model - self.best_model = func_best_model.__get__(self) - self.evaluate_models = func_evaluate.__get__(self) - - # reset the index - self.data.index = range(len(self.data)) - - # convert to numeric - self.data = string_cols_to_numeric(self.data) - - return self -from time import strftime -from datetime import datetime - -from ..utils.validation_split import validation_split -from ..utils.detector import prediction_type -from ..parameters.ParamGrid import ParamGrid -from ..utils.pred_class import classify -from ..utils.last_neuron import last_neuron - - -def scan_prepare(self): - '''Includes all preparation procedures up until starting the first scan - through scan_run()''' - - # create the name for the experiment - if self.dataset_name is None: - self.dataset_name = strftime('%D%H%M%S').replace('/', '') - - if self.experiment_no is None: - self.experiment_no = '' - - if self.experiment_name is None: - self.experiment_name = self.dataset_name + '_' + self.experiment_no - - # handle the case where a time limit is set - if self.time_limit is not None: - self._stoptime = datetime.strptime(self.time_limit, - "%Y-%m-%d %H:%M") - - # create the round times list - self.round_times = [] - - # for the case where x_val or y_val is missing when other is present - self.custom_val_split = False - if (self.x_val is not None and self.y_val is None) or \ - (self.x_val is None and self.y_val is not None): - raise RuntimeError("If x_val/y_val is inputted, other must as well.") - - elif (self.x_val is not None and self.y_val is not None): - self.custom_val_split = True - - # create the paramater object and move to self - self.paramgrid_object = ParamGrid(self) - self.param_log = self.paramgrid_object.param_log - self.param_grid = self.paramgrid_object.param_grid - self.param_reference = self.paramgrid_object.param_reference - del self.paramgrid_object - - self.round_counter = 0 - self.peak_epochs = [] - self.epoch_entropy = [] - self.round_models = [] - - # create the data asset - self.y_max = self.y.max() - self = validation_split(self) - self.shape = classify(self.y) - self.last_neuron = last_neuron(self) - - self._data_len = len(self.x) - self = prediction_type(self) - self.result = [] - - # model saving - self.saved_models = [] - self.saved_weights = [] - - return self -from time import strftime, time - -from keras import backend as K - -from ..parameters.round_params import round_params -from ..utils.results import create_header -from ..metrics.entropy import epoch_entropy -from ..model.ingest_model import ingest_model -from ..utils.results import run_round_results, save_result -from ..reducers.reduce_run import reduce_run -from ..utils.exceptions import TalosReturnError, TalosTypeError - - -def scan_round(self): - '''The main operational function that manages the experiment - on the level of execution of each round.''' - - # determine the parameters for the particular execution - self.round_params = round_params(self) - - # print round params - if self.print_params is True: - print(self.round_params) - - # set start time - round_start = strftime('%D-%H%M%S') - start = time() - - # fit the model - try: - _hr_out, self.keras_model = ingest_model(self) - except TypeError as err: - if err.args[0] == "unsupported operand type(s) for +: 'int' and 'numpy.str_'": - raise TalosTypeError( - "Activation should be as object and not string in params") - else: - print('ERROR MESSAGE : ' + err.args[0]) - raise TalosReturnError( - "Make sure that input model returns 'out, model' where out is history object from model.fit()") - - # count the duration of the round - self._round_seconds = time() - start - - # set end time and log - round_end = strftime('%D-%H%M%S') - self.round_times.append([round_start, round_end, self._round_seconds]) - - # create log and other stats - try: - self.epoch_entropy.append(epoch_entropy(_hr_out)) - except (TypeError, AttributeError): - raise TalosReturnError( - "Make sure that input model returns in the order 'out, model'") - - if self.round_counter == 0: - _for_header = create_header(self, _hr_out) - self.result.append(_for_header) - save_result(self) - - _hr_out = run_round_results(self, _hr_out) - - self.result.append(_hr_out) - save_result(self) - - # apply reduction - if self.reduction_method is not None: - if (self.round_counter + 1) % self.reduction_interval == 0: - len_before_reduce = len(self.param_log) - self = reduce_run(self) - total_reduced = len_before_reduce - len(self.param_log) - # update the progress bar - self.pbar.update(total_reduced) - - # save model and weights - self.saved_models.append(self.keras_model.to_json()) - self.saved_weights.append(self.keras_model.get_weights()) - - # clear tensorflow sessions - if self.clear_tf_session is True: - K.clear_session() - - # round is completed - self.round_counter += 1 - - return self -from tqdm import tqdm -from datetime import datetime - -from ..utils.results import result_todf, peak_epochs_todf -from .scan_round import scan_round -from .scan_finish import scan_finish - - -def scan_run(self): - '''The high-level management of the scan procedures - onwards from preparation. Manages round_run()''' - - # initiate the progress bar - self.pbar = tqdm(total=len(self.param_log), - disable=self.disable_progress_bar) - - # start the main loop of the program - while len(self.param_log) != 0: - self = scan_round(self) - self.pbar.update(1) - if self.time_limit is not None: - if datetime.now() > self._stoptime: - print("Time limit reached, experiment finished") - break - self.pbar.close() - - # save the results - self = result_todf(self) - self.peak_epochs_df = peak_epochs_todf(self) - - self = scan_finish(self) -# In this init we load everything under utils in the Talos namespace - -try: - from kerasplotlib import TrainingLog as live -except ImportError: - print('Matplotlib backend loading failed') - -from ..model.normalizers import lr_normalizer -from ..model.layers import hidden_layers -from ..model.early_stopper import early_stopper -from .generator import generator -from . import gpu_utils -import talos.metrics.keras_metrics as metrics -from keras.models import model_from_json - - -def best_model(self, metric, asc): - '''Picks the best model based on a given metric and - returns the index number for the model. - - NOTE: for loss 'asc' should be True''' - - best = self.data.sort_values(metric, ascending=asc).iloc[0].name - - return best - - -def activate_model(self, model_id): - '''Loads the model from the json that is stored in the Scan object''' - - model = model_from_json(self.saved_models[model_id]) - model.set_weights(self.saved_weights[model_id]) - - return model -import socket - - -def is_connected(): - try: - socket.create_connection(("www.google.com", 80)) - return True - except OSError: - pass - return False -from numpy import median, unique, mean - - -def prediction_type(self): - - try: - y_cols = self.y.shape[1] - except IndexError: - y_cols = 1 - y_max = self.y.max() - y_uniques = len(unique(self.y)) - - if y_cols > 1: - self._y_type = 'category' - self._y_range = y_cols - self._y_format = 'onehot' - else: - if y_max == 1: - self._y_type = 'binary' - self._y_range = y_cols - self._y_format = 'single' - elif mean(self.y) == median(self.y): - self._y_type = 'category' - self._y_range = y_uniques - self._y_format = 'single' - else: - self._y_type = 'continuous' - self._y_num = self.y.max() - self.y.min() - self._y_format = 'single' - - return self -import sys - - -class TalosReturnError(Exception): - pass - - -class TalosParamsError(Exception): - pass - - -class TalosTypeError(Exception): - pass - - -class TalosModelError(Exception): - pass - - -class TalosDataError(Exception): - pass -def generator(x, y, batch_size): - '''Creates a data generator for Keras fit_generator(). ''' - - import numpy as np - - samples_per_epoch = x.shape[0] - number_of_batches = samples_per_epoch / batch_size - counter = 0 - - while 1: - - x_batch = np.array( - x[batch_size*counter:batch_size*(counter+1)]).astype('float32') - y_batch = np.array( - y[batch_size*counter:batch_size*(counter+1)]).astype('float32') - counter += 1 - - yield x_batch, y_batch - - if counter >= number_of_batches: - counter = 0 -def parallel_gpu_jobs(allow_growth=True, fraction=.5): - '''Sets the max used memory as a fraction for tensorflow - backend - - allow_growth :: True of False - - fraction :: a float value (e.g. 0.5 means 4gb out of 8gb) - - ''' - - import keras.backend as K - import tensorflow as tf - - gpu_options = K.tf.GPUOptions(allow_growth=allow_growth, - per_process_gpu_memory_fraction=fraction) - config = tf.ConfigProto(gpu_options=gpu_options) - session = K.tf.Session(config=config) - K.set_session(session) - - -def multi_gpu(model, gpus=None, cpu_merge=True, cpu_relocation=False): - '''Takes as input the model, and returns a model - based on the number of GPUs available on the machine - or alternatively the 'gpus' user input. - - NOTE: this needs to be used before model.compile() in the - model inputted to Scan in the form: - - from talos.utils.gpu_utils import multi_gpu - model = multi_gpu(model) - - ''' - - from keras.utils import multi_gpu_model - - return multi_gpu_model(model, - gpus=gpus, - cpu_merge=cpu_merge, - cpu_relocation=cpu_relocation) - - -def force_cpu(): - '''Force CPU on a GPU system - ''' - - import keras.backend as K - import tensorflow as tf - - config = tf.ConfigProto(device_count={'GPU': 0}) - session = tf.Session(config=config) - K.set_session(session) -def last_neuron(self): - - labels = list(set(self.y.flatten('F'))) - - try: - last_neuron = self.y.shape[1] - return last_neuron - except IndexError: - if len(labels) == 2 and max(labels) == 1: - last_neuron = 1 - elif len(labels) == 2 and max(labels) > 1: - last_neuron = 3 - elif len(labels) > 2: - last_neuron = len(labels) - - return last_neuron -from keras.models import model_from_json - - -def load_model(saved_model): - '''Load a Model from local disk - - Takes as input .json and .h5 file with model - and weights and returns a model that can be then - used for predictions. - - saved_model :: name of the saved model without - suffix (e.g. 'iris_model' and not 'iris_model.json') - - ''' - - json_file = open(saved_model + ".json", 'r') - loaded_model_json = json_file.read() - json_file.close() - model = model_from_json(loaded_model_json) - model.load_weights(saved_model + '.h5') - - return model -def classify(y): - '''Detects if prediction is binary, multi-label or multi-class''' - - shape = detect_shape(y) - - if shape > 1: - return 'multi_class' - - elif y.max() <= 1: - return 'binary_class' - else: - return 'multi_label' - - -def detect_shape(y): - - try: - return y.shape[1] - except IndexError: - return 1 -from numpy import array, argpartition, savetxt -from pandas import DataFrame - - -def create_header(self, out): - '''Creates the Header column - On the first round creates the header columns - for the experiment output log. - ''' - - _rr_out = [] - - _rr_out.append('round_epochs') - [_rr_out.append(i) for i in list(out.history.keys())] - [_rr_out.append(key) for key in self.params.keys()] - - self.peak_epochs.append(list(out.history.keys())) - - return ",".join(str(i) for i in _rr_out) - - -def run_round_results(self, out): - '''THE MAIN FUNCTION FOR CREATING RESULTS FOR EACH ROUNDself. - Takes in the history object from model.fit() and handles it. - - NOTE: The epoch level data will be dropped here each round. - - ''' - - _rr_out = [] - - self._round_epochs = len(list(out.history.values())[0]) - - # otherwise proceed to create the value row - _rr_out.append(self._round_epochs) - p_epochs = [] - - # iterates through the keys and records last or peak for metrics - for key in out.history.keys(): - t_t = array(out.history[key]) - - # this handles metrics (NOTE: 'acc' have to be in metric name) - if 'acc' in key: - best_epoch = argpartition(t_t, len(t_t) - 1)[-1] - - # this handles losses (takes minimum value epoch) - else: - best_epoch = argpartition(t_t, 0)[0] - - if self.last_epoch_value: - value_to_report = out.history[key][-1] - else: - value_to_report = array(out.history[key])[best_epoch] - - _rr_out.append(value_to_report) - p_epochs.append(best_epoch) - - # this takes care of the separate entity with just peak epoch data - self.peak_epochs.append(p_epochs) - - for key in self.round_params.keys(): - _rr_out.append(self.round_params[key]) - - return ",".join(str(i) for i in _rr_out) - - -def save_result(self): - '''SAVES THE RESULTS/PARAMETERS TO A CSV SPECIFIC TO THE EXPERIMENT''' - - savetxt(self.experiment_name + '.csv', - self.result, - fmt='%s', - delimiter=',') - - -def result_todf(self): - '''ADDS A DATAFRAME VERSION OF THE RESULTS TO THE CLASS OBJECT''' - - self.result = DataFrame(self.result) - self.result.columns = self.result.iloc[0] - self.result = self.result.drop(0) - - return self - - -def peak_epochs_todf(self): - - return DataFrame(self.peak_epochs, columns=self.peak_epochs[0]).drop(0) -def isnumber(value): - '''Checks if a string can be converted into - a float (or int as a by product). Helper function - for string_cols_to_numeric''' - - try: - float(value) - return True - except ValueError: - return False - - -def string_cols_to_numeric(data, destructive=False): - '''Takes in a dataframe and attempts to convert numeric columns - into floats or ints respectively.''' - - if destructive is False: - data = data.copy(deep=True) - - for col in data.columns: - - if data[col].apply(isnumber).sum() == len(data): - try: - data[col] = data[col].astype(int) - except: # intentionally silent - try: - data[col] = data[col].astype(float) - except: # intentionally silent - data[col] = data[col] - else: - data[col] = data[col] - - return data -import numpy as np -try: - from wrangle.array.array_random_shuffle import array_random_shuffle as shuffle -except ImportError: - from wrangle import shuffle - - -def validation_split(self): - """Defines the attributes `x_train`, `y_train`, `x_val` and `y_val`. - The validation (cross-validation, aka development) sets are determined - by the attribute val_split, which is a number in (0, 1) which determines - the proportion of the input data to be allocated for cross-validation.""" - - if self.custom_val_split: - self.x_train = self.x - self.y_train = self.y - # self.x/y_val are already set - - else: - if self.shuffle: - random_shuffle(self) - - # deduce the midway point for input data - limit = int(len(self.x) * (1 - self.val_split)) - - self.x_train = self.x[:limit] - self.y_train = self.y[:limit] - - self.x_val = self.x[limit:] - self.y_val = self.y[limit:] - - return self - - -def random_shuffle(self): - """Randomly shuffles the datasets. - If self.seed is set, seed the generator - to ensure that the results are reproducible.""" - - def randomize(x): - '''Helper function to support the case - where x consist of a list of arrays.''' - - if self.seed is not None: - np.random.seed(self.seed) - - ix = np.arange(len(x)) - np.random.shuffle(ix) - - return ix - - if isinstance(self.x, list): - - ix = randomize(self.x[0]) - out = [] - - for a in self.x: - out.append(a[ix]) - self.x = out - - else: - ix = randomize(self.x) - self.x = self.x[ix] - - self.y = self.y[ix] - - -def kfold(x, y, folds=10, shuffled=True): - - if shuffled is True: - x, y = shuffle(x, y) - - out_x = [] - out_y = [] - - x_len = len(x) - step = int(x_len / folds) - - lo = 0 - hi = step - - for i in range(folds): - out_x.append(x[lo:hi]) - out_y.append(y[lo:hi]) - - lo += step - hi += step - - return out_x, out_y -import talos as ta - - -def test_auto_scan(): - '''Tests the object from Params()''' - - print('Start auto Scan()...') - - x, y = ta.templates.datasets.breast_cancer() - x = x[:50] - y = y[:50] - - p = ta.Params().params - - for key in p.keys(): - p[key] = [p[key][0]] - - ta.Scan(x, y, p, ta.KerasModel().model, - permutation_filter=lambda p: p['batch_size'] < 150,) - - return "Finished testing auto Scan()" -import talos as ta - - -def test_params_object(): - '''Tests the object from Params()''' - - print('Start testing Params object...') - - p = ta.Params() - - # without arguments - - p.activations() - p.batch_size() - p.dropout() - p.epochs() - p.kernel_initializers() - p.layers() - p.neurons() - p.lr() - p.optimizers() - p.shapes() - p.shapes_slope() - p.automated() - - p = ta.Params(replace=False) - - # with arguments - p.activations() - p.batch_size(10, 100, 5) - p.dropout() - p.epochs(10, 100, 5) - p.kernel_initializers() - p.layers(12) - p.neurons(10, 100, 5) - p.lr() - p.optimizers('multi_label') - p.shapes() - p.shapes_slope() - p.automated('sloped') - - return "Finished testing Params object!" -import talos as ta - - -def test_random_methods(): - '''Tests all the available random methods - in reducers/sample_reducer.py that are invoked - that are invoked through Scan(random_method)''' - - print('Start testing random methods...') - - random_methods = ['sobol', - 'quantum', - 'halton', - 'korobov_matrix', - 'latin_sudoku', - 'latin_matrix', - 'latin_improved', - 'uniform_mersenne', - 'uniform_crypto', - 'ambience'] - - for method in random_methods: - ta.templates.pipelines.titanic(random_method=method) - - return "Finished testing random methods!" -import talos as ta - - -def test_reporting_object(scan_object): - '''Tests all the attributes available in the Reporting() object''' - - print('Start testing Reporting object...') - - r = ta.Reporting(scan_object) - r.best_params() - r.correlate() - r.data - r.high() - r.low() - - r.plot_bars('first_neuron', 'val_acc', 'batch_size', 'hidden_layers') - r.plot_box('first_neuron') - r.plot_corr('val_loss') - r.plot_hist() - r.plot_kde('val_acc') - r.plot_line() - r.plot_regs() - r.rounds() - r.rounds2high() - r.table() - - return "Finished testing Reporting object!" -#!/usr/bin/env python - -from __future__ import print_function - -from keras.losses import binary_crossentropy, sparse_categorical_crossentropy -from keras.losses import categorical_crossentropy, mean_squared_error -from keras.optimizers import SGD, Adam, Adadelta, Adagrad -from keras.optimizers import Adamax, RMSprop, Nadam -from keras.activations import relu, sigmoid - -from sklearn.model_selection import train_test_split as splt - -from talos.scan.Scan import Scan -from talos.commands.reporting import Reporting - -import talos as ta - - -# single values -def values_single_params(): - return {'lr': [1], - 'first_neuron': [4], - 'hidden_layers': [2], - 'batch_size': [100], - 'epochs': [2], - 'dropout': [0], - 'shapes': ['brick'], - 'optimizer': [Adam], - 'losses': [binary_crossentropy, - sparse_categorical_crossentropy, - categorical_crossentropy, - mean_squared_error], - 'activation': ['relu'], - 'last_activation': ['softmax']} - - -# lists of values -def values_list_params(): - return {'lr': [1, 2], - 'first_neuron': [4, 4], - 'hidden_layers': [2, 2], - 'batch_size': [100, 200], - 'epochs': [1, 2], - 'dropout': [0, 0.1], - 'shapes': ['brick', 'funnel', 'triangle', 0.2], - 'optimizer': [Adam, Adagrad, Adamax, RMSprop, Adadelta, Nadam, SGD], - 'losses': ['binary_crossentropy', - 'sparse_categorical_crossentropy', - 'categorical_crossentropy', - 'mean_squared_error'], - 'activation': ['relu', 'elu'], - 'last_activation': ['softmax']} - - -# range of values -def values_range_params(): - return {'lr': (0.5, 5, 10), - 'first_neuron': (4, 100, 5), - 'hidden_layers': (0, 5, 5), - 'batch_size': (200, 300, 10), - 'epochs': (1, 5, 4), - 'dropout': (0, 0.5, 5), - 'shapes': ['funnel'], - 'optimizer': [Nadam], - 'losses': [binary_crossentropy, - sparse_categorical_crossentropy, - categorical_crossentropy, - mean_squared_error], - 'activation': [relu], - 'last_activation': [sigmoid]} - - -""" -The tests below have to serve several purpose: - -- test possible input methods to params dict -- test binary, multi class, multi label and continuous problems -- test all Scan arguments - -Each problem type is presented as a Class, and contains three -experiments using single, list, or range inputs. There is an -effort to test as many scenarios as possible here, so be -inventive / experiment! Doing well with this part of the testing, -there is a healthy base for a more serious approach to ensuring -procedural integrity. - -""" - - -def get_params(task): - """ - - Helper that allows the tests to feed from same - params dictionaries. - - USE: values_single, values_list, values_range = get_appropriate_loss(0) - - 0 = binary - 1 = 1d multi class - 2 = 2d multi label - 3 = continuous / regression - - """ - - # first create the params dict - values_single = values_single_params() - values_list = values_list_params() - values_range = values_range_params() - - # then limit the losses according to prediction task - values_single['losses'] = [values_single_params()['losses'][task]] - values_list['losses'] = [values_list_params()['losses'][task]] - values_range['losses'] = [values_range_params()['losses'][task]] - - return values_single, values_list, values_range - - -class BinaryTest: - - def __init__(self): - - # read the params dictionary with the right loss - self.values_single, self.values_list, self.values_range = get_params(0) - - # prepare the data for the experiment - self.x, self.y = ta.templates.datasets.cervical_cancer() - self.x = self.x[:300] - self.y = self.y[:300] - self.model = ta.templates.models.cervical_cancer - - # split validation data - self.x_train, self.x_val, self.y_train, self.y_val = splt(self.x, - self.y, - test_size=0.2) - - def values_single_test(self): - print("BinaryTest : Running values_single_test...") - - Scan(self.x, - self.y, - params=self.values_single, - model=ta.templates.models.cervical_cancer) - - def values_list_test(self): - print("BinaryTest : Running values_list_test...") - Scan(self.x_train, - self.y_train, - x_val=self.x_val, - y_val=self.y_val, - params=self.values_list, - round_limit=5, - dataset_name='BinaryTest', - experiment_no='000', - model=ta.templates.models.cervical_cancer, - random_method='crypto_uniform', - seed=2423, - search_method='linear', - reduction_method='correlation', - reduction_interval=2, - reduction_window=2, - reduction_threshold=0.2, - reduction_metric='val_loss', - reduce_loss=True, - last_epoch_value=True, - clear_tf_session=False, - disable_progress_bar=True, - debug=True) - - # comprehensive - def values_range_test(self): - print("BinaryTest : Running values_range_test...") - Scan(self.x_train, - self.y_train, - params=self.values_range, - model=ta.templates.models.cervical_cancer, - grid_downsample=0.0001, - permutation_filter=lambda p: p['first_neuron'] * - p['hidden_layers'] < 220, - random_method='sobol', - reduction_method='correlation', - reduction_interval=2, - reduction_window=2, - reduction_threshold=0.2, - reduction_metric='val_acc', - reduce_loss=False, - debug=True) - - -class MultiLabelTest: - - def __init__(self): - - # read the params dictionary with the right loss - self.values_single, self.values_list, self.values_range = get_params(2) - - self.x, self.y = ta.templates.datasets.iris() - self.x_train, self.x_val, self.y_train, self.y_val = splt(self.x, - self.y, - test_size=0.2) - - def values_single_test(self): - print("MultiLabelTest : Running values_single_test...") - Scan(self.x, - self.y, - params=self.values_single, - model=ta.templates.models.iris) - - def values_list_test(self): - print("MultiLabelTest : Running values_list_test...") - Scan(self.x, - self.y, - x_val=self.x_val, - y_val=self.y_val, - params=self.values_list, - round_limit=5, - dataset_name='MultiLabelTest', - experiment_no='000', - model=ta.templates.models.iris, - random_method='crypto_uniform', - seed=2423, - search_method='linear', - permutation_filter=lambda p: p['first_neuron'] * - p['hidden_layers'] < 9, - reduction_method='correlation', - reduction_interval=2, - reduction_window=2, - reduction_threshold=0.2, - reduction_metric='val_loss', - reduce_loss=True, - last_epoch_value=True, - clear_tf_session=False, - disable_progress_bar=True, - debug=True) - - # comprehensive - def values_range_test(self): - print("MultiLabelTest : Running values_range_test...") - Scan(self.x, - self.y, - params=self.values_range, - model=ta.templates.models.iris, - grid_downsample=0.0001, - random_method='sobol', - reduction_method='correlation', - reduction_interval=2, - reduction_window=2, - reduction_threshold=0.2, - reduction_metric='val_acc', - reduce_loss=False, - debug=True) - - -class ReportingTest: - - def __init__(self): - - print("ReportingTest : Running Binary test...") - - r = Reporting('BinaryTest_000.csv') - - x = r.data - x = r.correlate() - x = r.high() - x = r.low() - x = r.rounds() - x = r.rounds2high() - x = r.best_params() - x = r.plot_corr() - x = r.plot_hist() - x = r.plot_line() - - print("ReportingTest : Running MultiLabel test...") - r = Reporting('MultiLabelTest_000.csv') - - x = r.data - x = r.correlate() - x = r.high() - x = r.low() - x = r.rounds() - x = r.rounds2high() - x = r.best_params() - x = r.plot_corr() - x = r.plot_hist() - x = r.plot_line() - - del x - - -class DatasetTest: - - def __init__(self): - - print("DatasetTest : Running tests...") - x = ta.templates.datasets.icu_mortality() - x = ta.templates.datasets.icu_mortality(100) - x = ta.templates.datasets.titanic() - x = ta.templates.datasets.iris() - x = ta.templates.datasets.cervical_cancer() - x = ta.templates.datasets.breast_cancer() - x = ta.templates.params.iris() - x = ta.templates.params.breast_cancer() -# first load the pipeline -import talos as ta - - -def test_scan_object(): - - print("Running Scan object test...") - - # the create the test based on it - scan_object = ta.templates.pipelines.iris() - keras_model = scan_object.best_model() - scan_object.evaluate_models(x_val=scan_object.x, - y_val=scan_object.y) - - print("test_scan_object finished.") - return scan_object -""" -Adapted from keras example cifar10_cnn.py -Train ResNet-18 on the CIFAR10 small images dataset. - -GPU run command with Theano backend (with TensorFlow, the GPU is automatically used): - THEANO_FLAGS=mode=FAST_RUN,device=gpu,floatX=float32 python cifar10.py -""" -from __future__ import print_function -from keras.datasets import cifar10 -from keras.preprocessing.image import ImageDataGenerator -from keras.utils import np_utils -from keras.callbacks import ReduceLROnPlateau, CSVLogger, EarlyStopping - -import numpy as np -import resnet - - -lr_reducer = ReduceLROnPlateau(factor=np.sqrt( - 0.1), cooldown=0, patience=5, min_lr=0.5e-6) -early_stopper = EarlyStopping(min_delta=0.001, patience=10) -csv_logger = CSVLogger('resnet18_cifar10.csv') - -batch_size = 32 -nb_classes = 10 -nb_epoch = 200 -data_augmentation = True - -# input image dimensions -img_rows, img_cols = 32, 32 -# The CIFAR10 images are RGB. -img_channels = 3 - -# The data, shuffled and split between train and test sets: -(X_train, y_train), (X_test, y_test) = cifar10.load_data() - -# Convert class vectors to binary class matrices. -Y_train = np_utils.to_categorical(y_train, nb_classes) -Y_test = np_utils.to_categorical(y_test, nb_classes) - -X_train = X_train.astype('float32') -X_test = X_test.astype('float32') - -# subtract mean and normalize -mean_image = np.mean(X_train, axis=0) -X_train -= mean_image -X_test -= mean_image -X_train /= 128. -X_test /= 128. - -model = resnet.ResnetBuilder.build_resnet_18( - (img_channels, img_rows, img_cols), nb_classes) -model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - -if not data_augmentation: - print('Not using data augmentation.') - model.fit(X_train, Y_train, - batch_size=batch_size, - nb_epoch=nb_epoch, - validation_data=(X_test, Y_test), - shuffle=True, - callbacks=[lr_reducer, early_stopper, csv_logger]) -else: - print('Using real-time data augmentation.') - # This will do preprocessing and realtime data augmentation: - datagen = ImageDataGenerator( - featurewise_center=False, # set input mean to 0 over the dataset - samplewise_center=False, # set each sample mean to 0 - featurewise_std_normalization=False, # divide inputs by std of the dataset - samplewise_std_normalization=False, # divide each input by its std - zca_whitening=False, # apply ZCA whitening - # randomly rotate images in the range (degrees, 0 to 180) - rotation_range=0, - # randomly shift images horizontally (fraction of total width) - width_shift_range=0.1, - # randomly shift images vertically (fraction of total height) - height_shift_range=0.1, - horizontal_flip=True, # randomly flip images - vertical_flip=False) # randomly flip images - - # Compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied). - datagen.fit(X_train) - - # Fit the model on the batches generated by datagen.flow(). - model.fit_generator(datagen.flow(X_train, Y_train, batch_size=batch_size), - steps_per_epoch=X_train.shape[0] // batch_size, - validation_data=(X_test, Y_test), - epochs=nb_epoch, verbose=1, max_q_size=100, - callbacks=[lr_reducer, early_stopper, csv_logger]) -from __future__ import division - -import six -from keras.models import Model -from keras.layers import ( - Input, - Activation, - Dense, - Flatten -) -from keras.layers.convolutional import ( - Conv2D, - MaxPooling2D, - AveragePooling2D -) -from keras.layers.merge import add -from keras.layers.normalization import BatchNormalization -from keras.regularizers import l2 -from keras import backend as K - - -def _bn_relu(input): - """Helper to build a BN -> relu block - """ - norm = BatchNormalization(axis=CHANNEL_AXIS)(input) - return Activation("relu")(norm) - - -def _conv_bn_relu(**conv_params): - """Helper to build a conv -> BN -> relu block - """ - filters = conv_params["filters"] - kernel_size = conv_params["kernel_size"] - strides = conv_params.setdefault("strides", (1, 1)) - kernel_initializer = conv_params.setdefault( - "kernel_initializer", "he_normal") - padding = conv_params.setdefault("padding", "same") - kernel_regularizer = conv_params.setdefault( - "kernel_regularizer", l2(1.e-4)) - - def f(input): - conv = Conv2D(filters=filters, kernel_size=kernel_size, - strides=strides, padding=padding, - kernel_initializer=kernel_initializer, - kernel_regularizer=kernel_regularizer)(input) - return _bn_relu(conv) - - return f - - -def _bn_relu_conv(**conv_params): - """Helper to build a BN -> relu -> conv block. - This is an improved scheme proposed in http://arxiv.org/pdf/1603.05027v2.pdf - """ - filters = conv_params["filters"] - kernel_size = conv_params["kernel_size"] - strides = conv_params.setdefault("strides", (1, 1)) - kernel_initializer = conv_params.setdefault( - "kernel_initializer", "he_normal") - padding = conv_params.setdefault("padding", "same") - kernel_regularizer = conv_params.setdefault( - "kernel_regularizer", l2(1.e-4)) - - def f(input): - activation = _bn_relu(input) - return Conv2D(filters=filters, kernel_size=kernel_size, - strides=strides, padding=padding, - kernel_initializer=kernel_initializer, - kernel_regularizer=kernel_regularizer)(activation) - - return f - - -def _shortcut(input, residual): - """Adds a shortcut between input and residual block and merges them with "sum" - """ - # Expand channels of shortcut to match residual. - # Stride appropriately to match residual (width, height) - # Should be int if network architecture is correctly configured. - input_shape = K.int_shape(input) - residual_shape = K.int_shape(residual) - stride_width = int(round(input_shape[ROW_AXIS] / residual_shape[ROW_AXIS])) - stride_height = int( - round(input_shape[COL_AXIS] / residual_shape[COL_AXIS])) - equal_channels = input_shape[CHANNEL_AXIS] == residual_shape[CHANNEL_AXIS] - - shortcut = input - # 1 X 1 conv if shape is different. Else identity. - if stride_width > 1 or stride_height > 1 or not equal_channels: - shortcut = Conv2D(filters=residual_shape[CHANNEL_AXIS], - kernel_size=(1, 1), - strides=(stride_width, stride_height), - padding="valid", - kernel_initializer="he_normal", - kernel_regularizer=l2(0.0001))(input) - - return add([shortcut, residual]) - - -def _residual_block(block_function, filters, repetitions, is_first_layer=False): - """Builds a residual block with repeating bottleneck blocks. - """ - def f(input): - for i in range(repetitions): - init_strides = (1, 1) - if i == 0 and not is_first_layer: - init_strides = (2, 2) - input = block_function(filters=filters, init_strides=init_strides, - is_first_block_of_first_layer=(is_first_layer and i == 0))(input) - return input - - return f - - -def basic_block(filters, init_strides=(1, 1), is_first_block_of_first_layer=False): - """Basic 3 X 3 convolution blocks for use on resnets with layers <= 34. - Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf - """ - def f(input): - - if is_first_block_of_first_layer: - # don't repeat bn->relu since we just did bn->relu->maxpool - conv1 = Conv2D(filters=filters, kernel_size=(3, 3), - strides=init_strides, - padding="same", - kernel_initializer="he_normal", - kernel_regularizer=l2(1e-4))(input) - else: - conv1 = _bn_relu_conv(filters=filters, kernel_size=(3, 3), - strides=init_strides)(input) - - residual = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv1) - return _shortcut(input, residual) - - return f - - -def bottleneck(filters, init_strides=(1, 1), is_first_block_of_first_layer=False): - """Bottleneck architecture for > 34 layer resnet. - Follows improved proposed scheme in http://arxiv.org/pdf/1603.05027v2.pdf - - Returns: - A final conv layer of filters * 4 - """ - def f(input): - - if is_first_block_of_first_layer: - # don't repeat bn->relu since we just did bn->relu->maxpool - conv_1_1 = Conv2D(filters=filters, kernel_size=(1, 1), - strides=init_strides, - padding="same", - kernel_initializer="he_normal", - kernel_regularizer=l2(1e-4))(input) - else: - conv_1_1 = _bn_relu_conv(filters=filters, kernel_size=(1, 1), - strides=init_strides)(input) - - conv_3_3 = _bn_relu_conv(filters=filters, kernel_size=(3, 3))(conv_1_1) - residual = _bn_relu_conv( - filters=filters * 4, kernel_size=(1, 1))(conv_3_3) - return _shortcut(input, residual) - - return f - - -def _handle_dim_ordering(): - global ROW_AXIS - global COL_AXIS - global CHANNEL_AXIS - if K.image_dim_ordering() == 'tf': - ROW_AXIS = 1 - COL_AXIS = 2 - CHANNEL_AXIS = 3 - else: - CHANNEL_AXIS = 1 - ROW_AXIS = 2 - COL_AXIS = 3 - - -def _get_block(identifier): - if isinstance(identifier, six.string_types): - res = globals().get(identifier) - if not res: - raise ValueError('Invalid {}'.format(identifier)) - return res - return identifier - - -class ResnetBuilder(object): - @staticmethod - def build(input_shape, num_outputs, block_fn, repetitions): - """Builds a custom ResNet like architecture. - - Args: - input_shape: The input shape in the form (nb_channels, nb_rows, nb_cols) - num_outputs: The number of outputs at final softmax layer - block_fn: The block function to use. This is either `basic_block` or `bottleneck`. - The original paper used basic_block for layers < 50 - repetitions: Number of repetitions of various block units. - At each block unit, the number of filters are doubled and the input size is halved - - Returns: - The keras `Model`. - """ - _handle_dim_ordering() - if len(input_shape) != 3: - raise Exception( - "Input shape should be a tuple (nb_channels, nb_rows, nb_cols)") - - # Permute dimension order if necessary - if K.image_dim_ordering() == 'tf': - input_shape = (input_shape[1], input_shape[2], input_shape[0]) - - # Load function from str if needed. - block_fn = _get_block(block_fn) - - input = Input(shape=input_shape) - conv1 = _conv_bn_relu(filters=64, kernel_size=( - 7, 7), strides=(2, 2))(input) - pool1 = MaxPooling2D(pool_size=(3, 3), strides=( - 2, 2), padding="same")(conv1) - - block = pool1 - filters = 64 - for i, r in enumerate(repetitions): - block = _residual_block( - block_fn, filters=filters, repetitions=r, is_first_layer=(i == 0))(block) - filters *= 2 - - # Last activation - block = _bn_relu(block) - - # Classifier block - block_shape = K.int_shape(block) - pool2 = AveragePooling2D(pool_size=(block_shape[ROW_AXIS], block_shape[COL_AXIS]), - strides=(1, 1))(block) - flatten1 = Flatten()(pool2) - dense = Dense(units=num_outputs, kernel_initializer="he_normal", - activation="softmax")(flatten1) - - model = Model(inputs=input, outputs=dense) - return model - - @staticmethod - def build_resnet_18(input_shape, num_outputs): - return ResnetBuilder.build(input_shape, num_outputs, basic_block, [2, 2, 2, 2]) - - @staticmethod - def build_resnet_34(input_shape, num_outputs): - return ResnetBuilder.build(input_shape, num_outputs, basic_block, [3, 4, 6, 3]) - - @staticmethod - def build_resnet_50(input_shape, num_outputs): - return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 6, 3]) - - @staticmethod - def build_resnet_101(input_shape, num_outputs): - return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 4, 23, 3]) - - @staticmethod - def build_resnet_152(input_shape, num_outputs): - return ResnetBuilder.build(input_shape, num_outputs, bottleneck, [3, 8, 36, 3]) -from .segmentation_models import * -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -# Note: To use the 'upload' functionality of this file, you must: -# $ pip install twine - -import io -import os -import sys -from shutil import rmtree - -from setuptools import find_packages, setup, Command - -# Package meta-data. -NAME = 'segmentation_models' -DESCRIPTION = 'Image segmentation models with pre-trained backbones with Keras.' -URL = 'https://github.com/qubvel/segmentation_models' -EMAIL = 'qubvel@gmail.com' -AUTHOR = 'Pavel Yakubovskiy' -REQUIRES_PYTHON = '>=3.0.0' -VERSION = None - -# The rest you shouldn't have to touch too much :) -# ------------------------------------------------ -# Except, perhaps the License and Trove Classifiers! -# If you do change the License, remember to change the Trove Classifier for that! - -here = os.path.abspath(os.path.dirname(__file__)) - -# What packages are required for this module to be executed? -try: - with open(os.path.join(here, 'requirements.txt'), encoding='utf-8') as f: - REQUIRED = f.read().split('\n') -except: - REQUIRED = [] - -# What packages are optional? -EXTRAS = { - # 'fancy feature': ['django'], -} - -# Import the README and use it as the long-description. -# Note: this will only work if 'README.md' is present in your MANIFEST.in file! -try: - with io.open(os.path.join(here, 'README.md'), encoding='utf-8') as f: - long_description = '\n' + f.read() -except FileNotFoundError: - long_description = DESCRIPTION - -# Load the package's __version__.py module as a dictionary. -about = {} -if not VERSION: - with open(os.path.join(here, NAME, '__version__.py')) as f: - exec(f.read(), about) -else: - about['__version__'] = VERSION - - -class UploadCommand(Command): - """Support setup.py upload.""" - - description = 'Build and publish the package.' - user_options = [] - - @staticmethod - def status(s): - """Prints things in bold.""" - print(s) - - def initialize_options(self): - pass - - def finalize_options(self): - pass - - def run(self): - try: - self.status('Removing previous builds...') - rmtree(os.path.join(here, 'dist')) - except OSError: - pass - - self.status('Building Source and Wheel (universal) distribution...') - os.system( - '{0} setup.py sdist bdist_wheel --universal'.format(sys.executable)) - - self.status('Uploading the package to PyPI via Twine...') - os.system('twine upload dist/*') - - self.status('Pushing git tags...') - os.system('git tag v{0}'.format(about['__version__'])) - os.system('git push --tags') - - sys.exit() - - -# Where the magic happens: -setup( - name=NAME, - version=about['__version__'], - description=DESCRIPTION, - long_description=long_description, - long_description_content_type='text/x-rst', - author=AUTHOR, - author_email=EMAIL, - python_requires=REQUIRES_PYTHON, - url=URL, - packages=find_packages(exclude=('tests', 'docs', 'images')), - # If your package is a single module, use this instead of 'packages': - # py_modules=['mypackage'], - - # entry_points={ - # 'console_scripts': ['mycli=mymodule:cli'], - # }, - install_requires=REQUIRED, - extras_require=EXTRAS, - include_package_data=True, - license='MIT', - classifiers=[ - # Trove classifiers - # Full list: https://pypi.python.org/pypi?%3Aaction=list_classifiers - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy' - ], - # $ setup.py publish support. - cmdclass={ - 'upload': UploadCommand, - }, -) -# -*- coding: utf-8 -*- -# -# Configuration file for the Sphinx documentation builder. -# -# This file does only contain a selection of the most common options. For a -# full list see the documentation: -# http://www.sphinx-doc.org/en/master/config - -# -- Path setup -------------------------------------------------------------- - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - -# -- Project information ----------------------------------------------------- -import sphinx_rtd_theme -import sys -sys.path.append('..') - -project = u'Segmentation Models' -copyright = u'2018, Pavel Yakubovskiy' -author = u'Pavel Yakubovskiy' - -# The short X.Y version -version = u'' -# The full version, including alpha/beta/rc tags -release = u'0.1.2' - - -# -- General configuration --------------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.coverage', - 'sphinx.ext.napoleon', -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This pattern also affects html_static_path and html_extra_path. -exclude_patterns = [u'_build', 'Thumbs.db', '.DS_Store'] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = None - - -# -- Options for HTML output ------------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -# -- Theme setup ------------------------------------------------------------- - - -html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Custom sidebar templates, must be a dictionary that maps document names -# to template names. -# -# The default sidebars (for documents that don't match any pattern) are -# defined by theme itself. Builtin themes are using these templates by -# default: ``['localtoc.html', 'relations.html', 'sourcelink.html', -# 'searchbox.html']``. -# -# html_sidebars = {} - - -# -- Options for HTMLHelp output --------------------------------------------- - -# Output file base name for HTML help builder. -htmlhelp_basename = 'SegmentationModelsdoc' - - -# -- Options for LaTeX output ------------------------------------------------ - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'SegmentationModels.tex', u'Segmentation Models Documentation', - u'Pavel Yakubovskiy', 'manual'), -] - - -# -- Options for manual page output ------------------------------------------ - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'segmentationmodels', u'Segmentation Models Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ---------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'SegmentationModels', u'Segmentation Models Documentation', - author, 'SegmentationModels', 'One line description of project.', - 'Miscellaneous'), -] - - -# -- Options for Epub output ------------------------------------------------- - -# Bibliographic Dublin Core info. -epub_title = project - -# The unique identifier of the text. This can be a ISBN number -# or the project homepage. -# -# epub_identifier = '' - -# A unique identification for the text. -# -# epub_uid = '' - -# A list of files that should not be packed into the epub file. -epub_exclude_files = ['search.html'] - - -# -- Extension configuration ------------------------------------------------- - -autodoc_mock_imports = ['skimage', 'tensorflow'] -from . import losses -from . import metrics -from .pspnet import PSPNet -from .linknet import Linknet -from .fpn import FPN -from .unet import Unet -from .__version__ import __version__ -name = "segmentation_models" -VERSION = (0, 2, 0) - -__version__ = '.'.join(map(str, VERSION)) -import keras.backend as K -from keras.losses import binary_crossentropy -from keras.losses import categorical_crossentropy -from keras.utils.generic_utils import get_custom_objects - -from .metrics import jaccard_score, f_score - -SMOOTH = 1e-12 - -__all__ = [ - 'jaccard_loss', 'bce_jaccard_loss', 'cce_jaccard_loss', - 'dice_loss', 'bce_dice_loss', 'cce_dice_loss', -] - - -# ============================== Jaccard Losses ============================== - -def jaccard_loss(gt, pr, class_weights=1., smooth=SMOOTH, per_image=True): - r"""Jaccard loss function for imbalanced datasets: - - .. math:: L(A, B) = 1 - \frac{A \cap B}{A \cup B} - - Args: - gt: ground truth 4D keras tensor (B, H, W, C) - pr: prediction 4D keras tensor (B, H, W, C) - class_weights: 1. or list of class weights, len(weights) = C - smooth: value to avoid division by zero - per_image: if ``True``, metric is calculated as mean over images in batch (B), - else over whole batch - - Returns: - Jaccard loss in range [0, 1] - - """ - return 1 - jaccard_score(gt, pr, class_weights=class_weights, smooth=smooth, per_image=per_image) - - -def bce_jaccard_loss(gt, pr, bce_weight=1., smooth=SMOOTH, per_image=True): - bce = K.mean(binary_crossentropy(gt, pr)) - loss = bce_weight * bce + \ - jaccard_loss(gt, pr, smooth=smooth, per_image=per_image) - return loss - - -def cce_jaccard_loss(gt, pr, cce_weight=1., class_weights=1., smooth=SMOOTH, per_image=True): - cce = categorical_crossentropy(gt, pr) * class_weights - cce = K.mean(cce) - return cce_weight * cce + jaccard_loss(gt, pr, smooth=smooth, class_weights=class_weights, per_image=per_image) - - -# Update custom objects -get_custom_objects().update({ - 'jaccard_loss': jaccard_loss, - 'bce_jaccard_loss': bce_jaccard_loss, - 'cce_jaccard_loss': cce_jaccard_loss, -}) - - -# ============================== Dice Losses ================================ - -def dice_loss(gt, pr, class_weights=1., smooth=SMOOTH, per_image=True): - r"""Dice loss function for imbalanced datasets: - - .. math:: L(precision, recall) = 1 - (1 + \beta^2) \frac{precision \cdot recall} - {\beta^2 \cdot precision + recall} - - Args: - gt: ground truth 4D keras tensor (B, H, W, C) - pr: prediction 4D keras tensor (B, H, W, C) - class_weights: 1. or list of class weights, len(weights) = C - smooth: value to avoid division by zero - per_image: if ``True``, metric is calculated as mean over images in batch (B), - else over whole batch - - Returns: - Dice loss in range [0, 1] - - """ - return 1 - f_score(gt, pr, class_weights=class_weights, smooth=smooth, per_image=per_image, beta=1.) - - -def bce_dice_loss(gt, pr, bce_weight=1., smooth=SMOOTH, per_image=True): - bce = K.mean(binary_crossentropy(gt, pr)) - loss = bce_weight * bce + \ - dice_loss(gt, pr, smooth=smooth, per_image=per_image) - return loss - - -def cce_dice_loss(gt, pr, cce_weight=1., class_weights=1., smooth=SMOOTH, per_image=True): - cce = categorical_crossentropy(gt, pr) * class_weights - cce = K.mean(cce) - return cce_weight * cce + dice_loss(gt, pr, smooth=smooth, class_weights=class_weights, per_image=per_image) - - -# Update custom objects -get_custom_objects().update({ - 'dice_loss': dice_loss, - 'bce_dice_loss': bce_dice_loss, - 'cce_dice_loss': cce_dice_loss, -}) -import keras.backend as K -from keras.utils.generic_utils import get_custom_objects - -__all__ = [ - 'iou_score', 'jaccard_score', 'f1_score', 'f2_score', 'dice_score', - 'get_f_score', 'get_iou_score', 'get_jaccard_score', -] - -SMOOTH = 1e-12 - - -# ============================ Jaccard/IoU score ============================ - - -def iou_score(gt, pr, class_weights=1., smooth=SMOOTH, per_image=True): - r""" The `Jaccard index`_, also known as Intersection over Union and the Jaccard similarity coefficient - (originally coined coefficient de communauté by Paul Jaccard), is a statistic used for comparing the - similarity and diversity of sample sets. The Jaccard coefficient measures similarity between finite sample sets, - and is defined as the size of the intersection divided by the size of the union of the sample sets: - - .. math:: J(A, B) = \frac{A \cap B}{A \cup B} - - Args: - gt: ground truth 4D keras tensor (B, H, W, C) - pr: prediction 4D keras tensor (B, H, W, C) - class_weights: 1. or list of class weights, len(weights) = C - smooth: value to avoid division by zero - per_image: if ``True``, metric is calculated as mean over images in batch (B), - else over whole batch - - Returns: - IoU/Jaccard score in range [0, 1] - - .. _`Jaccard index`: https://en.wikipedia.org/wiki/Jaccard_index - - """ - if per_image: - axes = [1, 2] - else: - axes = [0, 1, 2] - - intersection = K.sum(gt * pr, axis=axes) - union = K.sum(gt + pr, axis=axes) - intersection - iou = (intersection + smooth) / (union + smooth) - - # mean per image - if per_image: - iou = K.mean(iou, axis=0) - - # weighted mean per class - iou = K.mean(iou * class_weights) - - return iou - - -def get_iou_score(class_weights=1., smooth=SMOOTH, per_image=True): - """Change default parameters of IoU/Jaccard score - - Args: - class_weights: 1. or list of class weights, len(weights) = C - smooth: value to avoid division by zero - per_image: if ``True``, metric is calculated as mean over images in batch (B), - else over whole batch - - Returns: - ``callable``: IoU/Jaccard score - """ - def score(gt, pr): - return iou_score(gt, pr, class_weights=class_weights, smooth=smooth, per_image=per_image) - - return score - - -jaccard_score = iou_score -get_jaccard_score = get_iou_score - -# Update custom objects -get_custom_objects().update({ - 'iou_score': iou_score, - 'jaccard_score': jaccard_score, -}) - - -# ============================== F/Dice - score ============================== - -def f_score(gt, pr, class_weights=1, beta=1, smooth=SMOOTH, per_image=True): - r"""The F-score (Dice coefficient) can be interpreted as a weighted average of the precision and recall, - where an F-score reaches its best value at 1 and worst score at 0. - The relative contribution of ``precision`` and ``recall`` to the F1-score are equal. - The formula for the F score is: - - .. math:: F_\beta(precision, recall) = (1 + \beta^2) \frac{precision \cdot recall} - {\beta^2 \cdot precision + recall} - - The formula in terms of *Type I* and *Type II* errors: - - .. math:: F_\beta(A, B) = \frac{(1 + \beta^2) TP} {(1 + \beta^2) TP + \beta^2 FN + FP} - - - where: - TP - true positive; - FP - false positive; - FN - false negative; - - Args: - gt: ground truth 4D keras tensor (B, H, W, C) - pr: prediction 4D keras tensor (B, H, W, C) - class_weights: 1. or list of class weights, len(weights) = C - beta: f-score coefficient - smooth: value to avoid division by zero - per_image: if ``True``, metric is calculated as mean over images in batch (B), - else over whole batch - - Returns: - F-score in range [0, 1] - - """ - if per_image: - axes = [1, 2] - else: - axes = [0, 1, 2] - - tp = K.sum(gt * pr, axis=axes) - fp = K.sum(pr, axis=axes) - tp - fn = K.sum(gt, axis=axes) - tp - - score = ((1 + beta ** 2) * tp + smooth) \ - / ((1 + beta ** 2) * tp + beta ** 2 * fn + fp + smooth) - - # mean per image - if per_image: - score = K.mean(score, axis=0) - - # weighted mean per class - score = K.mean(score * class_weights) - - return score - - -def get_f_score(class_weights=1, beta=1, smooth=SMOOTH, per_image=True): - """Change default parameters of F-score score - - Args: - class_weights: 1. or list of class weights, len(weights) = C - smooth: value to avoid division by zero - beta: f-score coefficient - per_image: if ``True``, metric is calculated as mean over images in batch (B), - else over whole batch - - Returns: - ``callable``: F-score - """ - def score(gt, pr): - return f_score(gt, pr, class_weights=class_weights, beta=beta, smooth=smooth, per_image=per_image) - - return score - - -f1_score = get_f_score(beta=1) -f2_score = get_f_score(beta=2) -dice_score = f1_score - -# Update custom objects -get_custom_objects().update({ - 'f1_score': f1_score, - 'f2_score': f2_score, - 'dice_score': dice_score, -}) -""" Utility functions for segmentation models """ -import warnings -import numpy as np -from functools import wraps -from keras.layers import BatchNormalization -from keras.models import model_from_json - - -def legacy_support(kwargs_map): - """ - Decorator which map old kwargs to new ones - - Args: - kwargs_map: dict 'old_argument: 'new_argument' (None if removed) - - """ - def decorator(func): - - @wraps(func) - def wrapper(*args, **kwargs): - - # rename arguments - for old_arg, new_arg in kwargs_map.items(): - if old_arg in kwargs.keys(): - if new_arg is None: - raise TypeError( - "got an unexpected keyword argument '{}'".format(old_arg)) - warnings.warn('`{old_arg}` is deprecated and will be removed ' - 'in future releases, use `{new_arg}` instead.'.format(old_arg=old_arg, new_arg=new_arg)) - kwargs[new_arg] = kwargs[old_arg] - - return func(*args, **kwargs) - - return wrapper - - return decorator - - -def get_layer_number(model, layer_name): - """ - Help find layer in Keras model by name - Args: - model: Keras `Model` - layer_name: str, name of layer - - Returns: - index of layer - - Raises: - ValueError: if model does not contains layer with such name - """ - for i, l in enumerate(model.layers): - if l.name == layer_name: - return i - raise ValueError('No layer with name {} in model {}.'.format( - layer_name, model.name)) - - -def extract_outputs(model, layers, include_top=False): - """ - Help extract intermediate layer outputs from model - Args: - model: Keras `Model` - layer: list of integers/str, list of layers indexes or names to extract output - include_top: bool, include final model layer output - - Returns: - list of tensors (outputs) - """ - layers_indexes = ([get_layer_number(model, l) if isinstance(l, str) else l - for l in layers]) - outputs = [model.layers[i].output for i in layers_indexes] - - if include_top: - outputs.insert(0, model.output) - - return outputs - - -def reverse(l): - """Reverse list""" - return list(reversed(l)) - - -# decorator for models aliases, to add doc string -def add_docstring(doc_string=None): - def decorator(fn): - if fn.__doc__: - fn.__doc__ += doc_string - else: - fn.__doc__ = doc_string - - @wraps(fn) - def wrapper(*args, **kwargs): - return fn(*args, **kwargs) - return wrapper - return decorator - - -def recompile(model): - model.compile(model.optimizer, model.loss, model.metrics) - - -def freeze_model(model): - """model all layers non trainable, excluding BatchNormalization layers""" - for layer in model.layers: - if not isinstance(layer, BatchNormalization): - layer.trainable = False - return - - -def set_trainable(model): - """Set all layers of model trainable and recompile it - - Note: - Model is recompiled using same optimizer, loss and metrics:: - - model.compile(model.optimizer, model.loss, model.metrics) - - Args: - model (``keras.models.Model``): instance of keras model - - """ - for layer in model.layers: - layer.trainable = True - recompile(model) - - -def to_tuple(x): - if isinstance(x, tuple): - if len(x) == 2: - return x - elif np.isscalar(x): - return (x, x) - - raise ValueError( - 'Value should be tuple of length 2 or int value, got "{}"'.format(x)) - - -def set_regularization(model, - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - beta_regularizer=None, - gamma_regularizer=None - ): - """Set regularizers to all layers - - Note: - Returned model's config is updated correctly - - Args: - model (``keras.models.Model``): instance of keras model - kernel_regularizer(``regularizer`): regularizer of kernels - bias_regularizer(``regularizer``): regularizer of bias - activity_regularizer(``regularizer``): regularizer of activity - gamma_regularizer(``regularizer``): regularizer of gamma of BatchNormalization - beta_regularizer(``regularizer``): regularizer of beta of BatchNormalization - - Return: - out (``Model``): config updated model - """ - - for layer in model.layers: - # set kernel_regularizer - if kernel_regularizer is not None and hasattr(layer, 'kernel_regularizer'): - layer.kernel_regularizer = kernel_regularizer - # set bias_regularizer - if bias_regularizer is not None and hasattr(layer, 'bias_regularizer'): - layer.bias_regularizer = bias_regularizer - # set activity_regularizer - if activity_regularizer is not None and hasattr(layer, 'activity_regularizer'): - layer.activity_regularizer = activity_regularizer - - # set beta and gamma of BN layer - if beta_regularizer is not None and hasattr(layer, 'beta_regularizer'): - layer.beta_regularizer = beta_regularizer - - if gamma_regularizer is not None and hasattr(layer, 'gamma_regularizer'): - layer.gamma_regularizer = gamma_regularizer - - out = model_from_json(model.to_json()) - out.set_weights(model.get_weights()) - - return out -import pytest -import numpy as np -import keras.backend as K - -from segmentation_models.metrics import iou_score, f_score -from segmentation_models.losses import jaccard_loss, dice_loss - -METRICS = [ - iou_score, - f_score, -] - -LOSSES = [ - dice_loss, - jaccard_loss, -] - -GT0 = np.array( - [ - [0, 0, 0], - [0, 0, 0], - [0, 0, 0], - ], - dtype='float32', -) - -GT1 = np.array( - [ - [1, 1, 0], - [1, 1, 0], - [0, 0, 0], - ], - dtype='float32', -) - -PR1 = np.array( - [ - [0, 0, 0], - [1, 1, 0], - [0, 0, 0], - ], - dtype='float32', -) - -PR2 = np.array( - [ - [0, 0, 0], - [1, 1, 0], - [1, 1, 0], - ], - dtype='float32', -) - -PR3 = np.array( - [ - [0, 0, 0], - [0, 0, 0], - [1, 0, 0], - ], - dtype='float32', -) - -IOU_CASES = ( - - (GT0, GT0, 1.00), - (GT1, GT1, 1.00), - - (GT0, PR1, 0.00), - (GT0, PR2, 0.00), - (GT0, PR3, 0.00), - - (GT1, PR1, 0.50), - (GT1, PR2, 1. / 3.), - (GT1, PR3, 0.00), -) - -F1_CASES = ( - - (GT0, GT0, 1.00), - (GT1, GT1, 1.00), - - (GT0, PR1, 0.00), - (GT0, PR2, 0.00), - (GT0, PR3, 0.00), - - (GT1, PR1, 2. / 3.), - (GT1, PR2, 0.50), - (GT1, PR3, 0.00), -) - -F2_CASES = ( - - (GT0, GT0, 1.00), - (GT1, GT1, 1.00), - - (GT0, PR1, 0.00), - (GT0, PR2, 0.00), - (GT0, PR3, 0.00), - - (GT1, PR1, 5. / 9.), - (GT1, PR2, 0.50), - (GT1, PR3, 0.00), -) - - -def _to_4d(x): - if x.ndim == 2: - return x[None, :, :, None] - elif x.ndim == 3: - return x[None, :, :] - - -def _add_4d(x): - if x.ndim == 3: - return x[..., None] - - -@pytest.mark.parametrize('case', IOU_CASES) -def test_iou_metric(case): - gt, pr, res = case - gt = _to_4d(gt) - pr = _to_4d(pr) - score = K.eval(iou_score(gt, pr)) - assert np.allclose(score, res) - - -@pytest.mark.parametrize('case', IOU_CASES) -def test_jaccrad_loss(case): - gt, pr, res = case - gt = _to_4d(gt) - pr = _to_4d(pr) - score = K.eval(jaccard_loss(gt, pr)) - assert np.allclose(score, 1 - res) - - -def _test_f_metric(case, beta=1): - gt, pr, res = case - gt = _to_4d(gt) - pr = _to_4d(pr) - score = K.eval(f_score(gt, pr, beta=beta)) - assert np.allclose(score, res) - - -@pytest.mark.parametrize('case', F1_CASES) -def test_f1_metric(case): - _test_f_metric(case, beta=1) - - -@pytest.mark.parametrize('case', F2_CASES) -def test_f2_metric(case): - _test_f_metric(case, beta=2) - - -@pytest.mark.parametrize('case', F1_CASES) -def test_dice_loss(case): - gt, pr, res = case - gt = _to_4d(gt) - pr = _to_4d(pr) - score = K.eval(dice_loss(gt, pr)) - assert np.allclose(score, 1 - res) - - -@pytest.mark.parametrize('func', METRICS + LOSSES) -def test_per_image(func): - gt = np.stack([GT0, GT1], axis=0) - pr = np.stack([PR1, PR2], axis=0) - - gt = _add_4d(gt) - pr = _add_4d(pr) - - # calculate score per image - score_1 = K.eval(func(gt, pr, per_image=True)) - score_2 = np.mean([ - K.eval(func(_to_4d(GT0), _to_4d(PR1))), - K.eval(func(_to_4d(GT1), _to_4d(PR2))), - ]) - assert np.allclose(score_1, score_2) - - -@pytest.mark.parametrize('func', METRICS + LOSSES) -def test_per_batch(func): - gt = np.stack([GT0, GT1], axis=0) - pr = np.stack([PR1, PR2], axis=0) - - gt = _add_4d(gt) - pr = _add_4d(pr) - - # calculate score per batch - score_1 = K.eval(func(gt, pr, per_image=False)) - - gt1 = np.concatenate([GT0, GT1], axis=0) - pr1 = np.concatenate([PR1, PR2], axis=0) - score_2 = K.eval(func(_to_4d(gt1), _to_4d(pr1), per_image=True)) - - assert np.allclose(score_1, score_2) - - -if __name__ == '__main__': - pytest.main([__file__]) -import os -import pytest -import random -import six -import numpy as np -import keras.backend as K - -from segmentation_models import Unet -from segmentation_models import Linknet -from segmentation_models import PSPNet -from segmentation_models import FPN -from segmentation_models import backbones as sm_backbones - - -def get_backbones(): - is_travis = os.environ.get('TRAVIS', False) - exclude = ['senet154'] - backbones = sm_backbones.get_names() - - if is_travis: - backbones = [b for b in backbones if b not in exclude] - return backbones - - -BACKBONES = get_backbones() - - -def _select_names(names): - is_full = os.environ.get('FULL_TEST', False) - if not is_full: - return [random.choice(names)] - else: - return names - - -def keras_test(func): - """Function wrapper to clean up after TensorFlow tests. - # Arguments - func: test function to clean up after. - # Returns - A function wrapping the input function. - """ - @six.wraps(func) - def wrapper(*args, **kwargs): - output = func(*args, **kwargs) - K.clear_session() - return output - return wrapper - - -@keras_test -def _test_none_shape(model_fn, backbone, *args, **kwargs): - - # define number of channels - input_shape = kwargs.get('input_shape', None) - n_channels = 3 if input_shape is None else input_shape[-1] - - # create test sample - x = np.ones((1, 32, 32, n_channels)) - - # define model and process sample - model = model_fn(backbone, *args, **kwargs) - y = model.predict(x) - - # check output dimensions - assert x.shape[:-1] == y.shape[:-1] - - -@keras_test -def _test_shape(model_fn, backbone, input_shape, *args, **kwargs): - - # create test sample - x = np.ones((1, *input_shape)) - - # define model and process sample - model = model_fn(backbone, input_shape=input_shape, *args, **kwargs) - y = model.predict(x) - - # check output dimensions - assert x.shape[:-1] == y.shape[:-1] - - -@pytest.mark.parametrize('backbone', _select_names(BACKBONES)) -def test_unet(backbone): - _test_none_shape( - Unet, backbone, encoder_weights=None) - - _test_none_shape( - Unet, backbone, encoder_weights='imagenet') - - _test_shape( - Unet, backbone, input_shape=(256, 256, 4), encoder_weights=None) - - -@pytest.mark.parametrize('backbone', _select_names(BACKBONES)) -def test_linknet(backbone): - _test_none_shape( - Linknet, backbone, encoder_weights=None) - - _test_none_shape( - Linknet, backbone, encoder_weights='imagenet') - - _test_shape( - Linknet, backbone, input_shape=(256, 256, 4), encoder_weights=None) - - -@pytest.mark.parametrize('backbone', _select_names(BACKBONES)) -def test_pspnet(backbone): - - _test_shape( - PSPNet, backbone, input_shape=(384, 384, 4), encoder_weights=None) - - _test_shape( - PSPNet, backbone, input_shape=(384, 384, 3), encoder_weights='imagenet') - - -@pytest.mark.parametrize('backbone', _select_names(BACKBONES)) -def test_fpn(backbone): - _test_none_shape( - FPN, backbone, encoder_weights=None) - - _test_none_shape( - FPN, backbone, encoder_weights='imagenet') - - _test_shape( - FPN, backbone, input_shape=(256, 256, 4), encoder_weights=None) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -# import keras.backend.tensorflow_backend as KTF -import keras.backend as K -# import tensorflow as tf -from keras import regularizers - -from segmentation_models.utils import set_regularization -from segmentation_models import Unet - -X1 = np.ones((1, 32, 32, 3)) -Y1 = np.ones((1, 32, 32, 1)) -MODEL = Unet -BACKBONE = 'resnet18' -CASE = ( - - (X1, Y1, MODEL, BACKBONE), -) - - -def _test_regularizer(model, reg_model, x, y): - - def zero_loss(gt, pr): - return pr * 0 - - model.compile('Adam', loss=zero_loss, metrics=['binary_accuracy']) - reg_model.compile('Adam', loss=zero_loss, metrics=['binary_accuracy']) - - loss_1, _ = model.test_on_batch(x, y) - loss_2, _ = reg_model.test_on_batch(x, y) - - assert loss_1 == 0 - assert loss_2 > 0 - - K.clear_session() - - -@pytest.mark.parametrize('case', CASE) -def test_kernel_reg(case): - x, y, model_fn, backbone = case - - l1_reg = regularizers.l1(0.1) - model = model_fn(backbone) - reg_model = set_regularization(model, kernel_regularizer=l1_reg) - _test_regularizer(model, reg_model, x, y) - - l2_reg = regularizers.l2(0.1) - model = model_fn(backbone, encoder_weights=None) - reg_model = set_regularization(model, kernel_regularizer=l2_reg) - _test_regularizer(model, reg_model, x, y) - - -""" -Note: - backbone resnet18 use BN after each conv layer --- so no bias used in these conv layers - skip the bias regularizer test - -@pytest.mark.parametrize('case', CASE) -def test_bias_reg(case): - x, y, model_fn, backbone = case - - l1_reg = regularizers.l1(1) - model = model_fn(backbone) - reg_model = set_regularization(model, bias_regularizer=l1_reg) - _test_regularizer(model, reg_model, x, y) - - l2_reg = regularizers.l2(1) - model = model_fn(backbone) - reg_model = set_regularization(model, bias_regularizer=l2_reg) - _test_regularizer(model, reg_model, x, y) -""" - - -@pytest.mark.parametrize('case', CASE) -def test_bn_reg(case): - x, y, model_fn, backbone = case - - l1_reg = regularizers.l1(1) - model = model_fn(backbone) - reg_model = set_regularization(model, gamma_regularizer=l1_reg) - _test_regularizer(model, reg_model, x, y) - - model = model_fn(backbone) - reg_model = set_regularization(model, beta_regularizer=l1_reg) - _test_regularizer(model, reg_model, x, y) - - l2_reg = regularizers.l2(1) - model = model_fn(backbone) - reg_model = set_regularization(model, gamma_regularizer=l2_reg) - _test_regularizer(model, reg_model, x, y) - - model = model_fn(backbone) - reg_model = set_regularization(model, beta_regularizer=l2_reg) - _test_regularizer(model, reg_model, x, y) - - -@pytest.mark.parametrize('case', CASE) -def test_activity_reg(case): - x, y, model_fn, backbone = case - - l2_reg = regularizers.l2(1) - model = model_fn(backbone) - reg_model = set_regularization(model, activity_regularizer=l2_reg) - _test_regularizer(model, reg_model, x, y) - - -if __name__ == '__main__': - pytest.main([__file__]) -from classification_models import Classifiers -from classification_models import resnext - -from . import inception_resnet_v2 as irv2 -from . import inception_v3 as iv3 -from . import mobilenet as mbn -from . import mobilenetv2 as mbn2 - -# replace backbones with others, which have corrected padding mode in first pooling -Classifiers._models.update({ - 'inceptionresnetv2': [irv2.InceptionResNetV2, irv2.preprocess_input], - 'inceptionv3': [iv3.InceptionV3, iv3.preprocess_input], - 'resnext50': [resnext.ResNeXt50, resnext.models.preprocess_input], - 'resnext101': [resnext.ResNeXt101, resnext.models.preprocess_input], - 'mobilenet': [mbn.MobileNet, mbn.preprocess_input], - 'mobilenetv2': [mbn2.MobileNetV2, mbn2.preprocess_input], -}) - -DEFAULT_FEATURE_LAYERS = { - - # List of layers to take features from backbone in the following order: - # (x16, x8, x4, x2, x1) - `x4` mean that features has 4 times less spatial - # resolution (Height x Width) than input image. - - # VGG - 'vgg16': ('block5_conv3', 'block4_conv3', 'block3_conv3', 'block2_conv2', 'block1_conv2'), - 'vgg19': ('block5_conv4', 'block4_conv4', 'block3_conv4', 'block2_conv2', 'block1_conv2'), - - # ResNets - 'resnet18': ('stage4_unit1_relu1', 'stage3_unit1_relu1', 'stage2_unit1_relu1', 'relu0'), - 'resnet34': ('stage4_unit1_relu1', 'stage3_unit1_relu1', 'stage2_unit1_relu1', 'relu0'), - 'resnet50': ('stage4_unit1_relu1', 'stage3_unit1_relu1', 'stage2_unit1_relu1', 'relu0'), - 'resnet101': ('stage4_unit1_relu1', 'stage3_unit1_relu1', 'stage2_unit1_relu1', 'relu0'), - 'resnet152': ('stage4_unit1_relu1', 'stage3_unit1_relu1', 'stage2_unit1_relu1', 'relu0'), - - # ResNeXt - 'resnext50': ('stage4_unit1_relu1', 'stage3_unit1_relu1', 'stage2_unit1_relu1', 'relu0'), - 'resnext101': ('stage4_unit1_relu1', 'stage3_unit1_relu1', 'stage2_unit1_relu1', 'relu0'), - - # Inception - 'inceptionv3': (228, 86, 16, 9), - 'inceptionresnetv2': (594, 260, 16, 9), - - # DenseNet - 'densenet121': (311, 139, 51, 4), - 'densenet169': (367, 139, 51, 4), - 'densenet201': (479, 139, 51, 4), - - # SE models - 'seresnet18': ('stage4_unit1_relu1', 'stage3_unit1_relu1', 'stage2_unit1_relu1', 'relu0'), - 'seresnet34': ('stage4_unit1_relu1', 'stage3_unit1_relu1', 'stage2_unit1_relu1', 'relu0'), - 'seresnet50': (233, 129, 59, 4), - 'seresnet101': (522, 129, 59, 4), - 'seresnet152': (811, 197, 59, 4), - 'seresnext50': (1065, 577, 251, 4), - 'seresnext101': (2442, 577, 251, 4), - 'senet154': (6837, 1614, 451, 12), - - # Mobile Nets - 'mobilenet': ('conv_pw_11_relu', 'conv_pw_5_relu', 'conv_pw_3_relu', 'conv_pw_1_relu'), - 'mobilenetv2': ('block_13_expand_relu', 'block_6_expand_relu', 'block_3_expand_relu', 'block_1_expand_relu'), - -} - - -def get_names(): - return list(DEFAULT_FEATURE_LAYERS.keys()) - - -def get_feature_layers(name, n=5): - return DEFAULT_FEATURE_LAYERS[name][:n] - - -def get_backbone(name, *args, **kwargs): - return Classifiers.get_classifier(name)(*args, **kwargs) - - -def get_preprocessing(name): - return Classifiers.get_preprocessing(name) -# -*- coding: utf-8 -*- -"""Inception-ResNet V2 model for Keras. -Model naming and structure follows TF-slim implementation (which has some additional -layers and different number of filters from the original arXiv paper): -https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py -Pre-trained ImageNet weights are also converted from TF-slim, which can be found in: -https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models -# Reference -- [Inception-v4, Inception-ResNet and the Impact of - Residual Connections on Learning](https://arxiv.org/abs/1602.07261) -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import warnings - -from keras.models import Model -from keras.layers import Activation -from keras.layers import AveragePooling2D -from keras.layers import BatchNormalization -from keras.layers import Conv2D -from keras.layers import Concatenate -from keras.layers import Dense -from keras.layers import GlobalAveragePooling2D -from keras.layers import GlobalMaxPooling2D -from keras.layers import Input -from keras.layers import Lambda -from keras.layers import MaxPooling2D -from keras.utils.data_utils import get_file -from keras.engine.topology import get_source_inputs -from keras.applications import imagenet_utils -from keras import backend as K - - -import keras -from distutils.version import StrictVersion - -if StrictVersion(keras.__version__) < StrictVersion('2.2.0'): - from keras.applications.imagenet_utils import _obtain_input_shape -else: - from keras_applications.imagenet_utils import _obtain_input_shape - - -BASE_WEIGHT_URL = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.7/' - - -def preprocess_input(x): - """Preprocesses a numpy array encoding a batch of images. - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='tf') - - -def conv2d_bn(x, - filters, - kernel_size, - strides=1, - padding='same', - activation='relu', - use_bias=False, - name=None): - """Utility function to apply conv + BN. - # Arguments - x: input tensor. - filters: filters in `Conv2D`. - kernel_size: kernel size as in `Conv2D`. - strides: strides in `Conv2D`. - padding: padding mode in `Conv2D`. - activation: activation in `Conv2D`. - use_bias: whether to use a bias in `Conv2D`. - name: name of the ops; will become `name + '_ac'` for the activation - and `name + '_bn'` for the batch norm layer. - # Returns - Output tensor after applying `Conv2D` and `BatchNormalization`. - """ - x = Conv2D(filters, - kernel_size, - strides=strides, - padding=padding, - use_bias=use_bias, - name=name)(x) - if not use_bias: - bn_axis = 1 if K.image_data_format() == 'channels_first' else 3 - bn_name = None if name is None else name + '_bn' - x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) - if activation is not None: - ac_name = None if name is None else name + '_ac' - x = Activation(activation, name=ac_name)(x) - return x - - -def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): - """Adds a Inception-ResNet block. - This function builds 3 types of Inception-ResNet blocks mentioned - in the paper, controlled by the `block_type` argument (which is the - block name used in the official TF-slim implementation): - - Inception-ResNet-A: `block_type='block35'` - - Inception-ResNet-B: `block_type='block17'` - - Inception-ResNet-C: `block_type='block8'` - # Arguments - x: input tensor. - scale: scaling factor to scale the residuals (i.e., the output of - passing `x` through an inception module) before adding them - to the shortcut branch. Let `r` be the output from the residual branch, - the output of this block will be `x + scale * r`. - block_type: `'block35'`, `'block17'` or `'block8'`, determines - the network structure in the residual branch. - block_idx: an `int` used for generating layer names. The Inception-ResNet blocks - are repeated many times in this network. We use `block_idx` to identify - each of the repetitions. For example, the first Inception-ResNet-A block - will have `block_type='block35', block_idx=0`, ane the layer names will have - a common prefix `'block35_0'`. - activation: activation function to use at the end of the block - (see [activations](../activations.md)). - When `activation=None`, no activation is applied - (i.e., "linear" activation: `a(x) = x`). - # Returns - Output tensor for the block. - # Raises - ValueError: if `block_type` is not one of `'block35'`, - `'block17'` or `'block8'`. - """ - if block_type == 'block35': - branch_0 = conv2d_bn(x, 32, 1) - branch_1 = conv2d_bn(x, 32, 1) - branch_1 = conv2d_bn(branch_1, 32, 3) - branch_2 = conv2d_bn(x, 32, 1) - branch_2 = conv2d_bn(branch_2, 48, 3) - branch_2 = conv2d_bn(branch_2, 64, 3) - branches = [branch_0, branch_1, branch_2] - elif block_type == 'block17': - branch_0 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(x, 128, 1) - branch_1 = conv2d_bn(branch_1, 160, [1, 7]) - branch_1 = conv2d_bn(branch_1, 192, [7, 1]) - branches = [branch_0, branch_1] - elif block_type == 'block8': - branch_0 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(branch_1, 224, [1, 3]) - branch_1 = conv2d_bn(branch_1, 256, [3, 1]) - branches = [branch_0, branch_1] - else: - raise ValueError('Unknown Inception-ResNet block type. ' - 'Expects "block35", "block17" or "block8", ' - 'but got: ' + str(block_type)) - - block_name = block_type + '_' + str(block_idx) - channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 - mixed = Concatenate(axis=channel_axis, - name=block_name + '_mixed')(branches) - up = conv2d_bn(mixed, - K.int_shape(x)[channel_axis], - 1, - activation=None, - use_bias=True, - name=block_name + '_conv') - - x = Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, - output_shape=K.int_shape(x)[1:], - arguments={'scale': scale}, - name=block_name)([x, up]) - - if activation is not None: - x = Activation(activation, name=block_name + '_ac')(x) - return x - - -def InceptionResNetV2(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000): - """Instantiates the Inception-ResNet v2 architecture. - Optionally loads weights pre-trained on ImageNet. - Note that when using TensorFlow, for best performance you should - set `"image_data_format": "channels_last"` in your Keras config - at `~/.keras/keras.json`. - The model and the weights are compatible with TensorFlow, Theano and - CNTK backends. The data format convention used by the model is - the one specified in your Keras config file. - Note that the default input image size for this model is 299x299, instead - of 224x224 as in the VGG16 and ResNet models. Also, the input preprocessing - function is different (i.e., do not use `imagenet_utils.preprocess_input()` - with this model. Use `preprocess_input()` defined in this module instead). - # Arguments - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is `False` (otherwise the input shape - has to be `(299, 299, 3)` (with `'channels_last'` data format) - or `(3, 299, 299)` (with `'channels_first'` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 139. - E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the last convolutional layer. - - `'avg'` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is `True`, and - if no `weights` argument is specified. - # Returns - A Keras `Model` instance. - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as imagenet with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape( - input_shape, - default_size=299, - min_size=139, - data_format=K.image_data_format(), - require_flatten=False, - weights=weights) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - # Stem block: 35 x 35 x 192 - x = conv2d_bn(img_input, 32, 3, strides=2, padding='same') - x = conv2d_bn(x, 32, 3, padding='same') - x = conv2d_bn(x, 64, 3) - x = MaxPooling2D(3, strides=2, padding='same')(x) - x = conv2d_bn(x, 80, 1, padding='same') - x = conv2d_bn(x, 192, 3, padding='same') - x = MaxPooling2D(3, strides=2, padding='same')(x) - - # Mixed 5b (Inception-A block): 35 x 35 x 320 - branch_0 = conv2d_bn(x, 96, 1) - branch_1 = conv2d_bn(x, 48, 1) - branch_1 = conv2d_bn(branch_1, 64, 5) - branch_2 = conv2d_bn(x, 64, 1) - branch_2 = conv2d_bn(branch_2, 96, 3) - branch_2 = conv2d_bn(branch_2, 96, 3) - branch_pool = AveragePooling2D(3, strides=1, padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1) - branches = [branch_0, branch_1, branch_2, branch_pool] - channel_axis = 1 if K.image_data_format() == 'channels_first' else 3 - x = Concatenate(axis=channel_axis, name='mixed_5b')(branches) - - # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 - for block_idx in range(1, 11): - x = inception_resnet_block(x, - scale=0.17, - block_type='block35', - block_idx=block_idx) - - # Mixed 6a (Reduction-A block): 17 x 17 x 1088 - branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='same') - branch_1 = conv2d_bn(x, 256, 1) - branch_1 = conv2d_bn(branch_1, 256, 3) - branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='same') - branch_pool = MaxPooling2D(3, strides=2, padding='same')(x) - branches = [branch_0, branch_1, branch_pool] - x = Concatenate(axis=channel_axis, name='mixed_6a')(branches) - - # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 - for block_idx in range(1, 21): - x = inception_resnet_block(x, - scale=0.1, - block_type='block17', - block_idx=block_idx) - - # Mixed 7a (Reduction-B block): 8 x 8 x 2080 - branch_0 = conv2d_bn(x, 256, 1) - branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='same') - branch_1 = conv2d_bn(x, 256, 1) - branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='same') - branch_2 = conv2d_bn(x, 256, 1) - branch_2 = conv2d_bn(branch_2, 288, 3) - branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='same') - branch_pool = MaxPooling2D(3, strides=2, padding='same')(x) - branches = [branch_0, branch_1, branch_2, branch_pool] - x = Concatenate(axis=channel_axis, name='mixed_7a')(branches) - - # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 - for block_idx in range(1, 10): - x = inception_resnet_block(x, - scale=0.2, - block_type='block8', - block_idx=block_idx) - x = inception_resnet_block(x, - scale=1., - activation=None, - block_type='block8', - block_idx=10) - - # Final convolution block: 8 x 8 x 1536 - x = conv2d_bn(x, 1536, 1, name='conv_7b') - - if include_top: - # Classification block - x = GlobalAveragePooling2D(name='avg_pool')(x) - x = Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor` - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model - model = Model(inputs, x, name='inception_resnet_v2') - - # Load weights - if weights == 'imagenet': - if K.image_data_format() == 'channels_first': - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image data format convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - if include_top: - fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5' - weights_path = get_file(fname, - BASE_WEIGHT_URL + fname, - cache_subdir='models', - file_hash='e693bd0210a403b3192acc6073ad2e96') - else: - fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels_notop.h5' - weights_path = get_file(fname, - BASE_WEIGHT_URL + fname, - cache_subdir='models', - file_hash='d19885ff4a710c122648d3b5c3b684e4') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model -# -*- coding: utf-8 -*- -"""Inception V3 model for Keras. -Note that the input image format for this model is different than for -the VGG16 and ResNet models (299x299 instead of 224x224), -and that the input preprocessing function is also different (same as Xception). -# Reference -- [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567) -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import warnings - -from keras.models import Model -from keras import layers -from keras.layers import Activation -from keras.layers import Dense -from keras.layers import Input -from keras.layers import BatchNormalization -from keras.layers import Conv2D -from keras.layers import MaxPooling2D -from keras.layers import AveragePooling2D -from keras.layers import GlobalAveragePooling2D -from keras.layers import GlobalMaxPooling2D -from keras.engine.topology import get_source_inputs -from keras.utils.data_utils import get_file -from keras import backend as K -from keras.applications import imagenet_utils - -import keras -from distutils.version import StrictVersion - -if StrictVersion(keras.__version__) < StrictVersion('2.2.0'): - from keras.applications.imagenet_utils import _obtain_input_shape -else: - from keras_applications.imagenet_utils import _obtain_input_shape - - -WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels.h5' -WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5' - - -def conv2d_bn(x, - filters, - num_row, - num_col, - padding='same', - strides=(1, 1), - name=None): - """Utility function to apply conv + BN. - # Arguments - x: input tensor. - filters: filters in `Conv2D`. - num_row: height of the convolution kernel. - num_col: width of the convolution kernel. - padding: padding mode in `Conv2D`. - strides: strides in `Conv2D`. - name: name of the ops; will become `name + '_conv'` - for the convolution and `name + '_bn'` for the - batch norm layer. - # Returns - Output tensor after applying `Conv2D` and `BatchNormalization`. - """ - if name is not None: - bn_name = name + '_bn' - conv_name = name + '_conv' - else: - bn_name = None - conv_name = None - if K.image_data_format() == 'channels_first': - bn_axis = 1 - else: - bn_axis = 3 - x = Conv2D( - filters, (num_row, num_col), - strides=strides, - padding=padding, - use_bias=False, - name=conv_name)(x) - x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) - x = Activation('relu', name=name)(x) - return x - - -def InceptionV3(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000): - """Instantiates the Inception v3 architecture. - Optionally loads weights pre-trained - on ImageNet. Note that when using TensorFlow, - for best performance you should set - `image_data_format='channels_last'` in your Keras config - at ~/.keras/keras.json. - The model and the weights are compatible with both - TensorFlow and Theano. The data format - convention used by the model is the one - specified in your Keras config file. - Note that the default input image size for this model is 299x299. - # Arguments - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(299, 299, 3)` (with `channels_last` data format) - or `(3, 299, 299)` (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 139. - E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - # Returns - A Keras model instance. - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as imagenet with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape( - input_shape, - default_size=299, - min_size=139, - data_format=K.image_data_format(), - require_flatten=False, - weights=weights) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - if K.image_data_format() == 'channels_first': - channel_axis = 1 - else: - channel_axis = 3 - - x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='same') - x = conv2d_bn(x, 32, 3, 3, padding='same') - x = conv2d_bn(x, 64, 3, 3) - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - - x = conv2d_bn(x, 80, 1, 1, padding='same') - x = conv2d_bn(x, 192, 3, 3, padding='same') - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - - # mixed 0, 1, 2: 35 x 35 x 256 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 32, 1, 1) - x = layers.concatenate( - [branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed0') - - # mixed 1: 35 x 35 x 256 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1, 1) - x = layers.concatenate( - [branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed1') - - # mixed 2: 35 x 35 x 256 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1, 1) - x = layers.concatenate( - [branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed2') - - # mixed 3: 17 x 17 x 768 - branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='same') - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn( - branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='same') - - branch_pool = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.concatenate( - [branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed3') - - # mixed 4: 17 x 17 x 768 - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 128, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 128, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 128, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed4') - - # mixed 5, 6: 17 x 17 x 768 - for i in range(2): - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 160, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 160, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 160, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed' + str(5 + i)) - - # mixed 7: 17 x 17 x 768 - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 192, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 192, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed7') - - # mixed 8: 8 x 8 x 1280 - branch3x3 = conv2d_bn(x, 192, 1, 1) - branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, - strides=(2, 2), padding='same') - - branch7x7x3 = conv2d_bn(x, 192, 1, 1) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) - branch7x7x3 = conv2d_bn( - branch7x7x3, 192, 3, 3, strides=(2, 2), padding='same') - - branch_pool = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.concatenate( - [branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name='mixed8') - - # mixed 9: 8 x 8 x 2048 - for i in range(2): - branch1x1 = conv2d_bn(x, 320, 1, 1) - - branch3x3 = conv2d_bn(x, 384, 1, 1) - branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) - branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) - branch3x3 = layers.concatenate( - [branch3x3_1, branch3x3_2], axis=channel_axis, name='mixed9_' + str(i)) - - branch3x3dbl = conv2d_bn(x, 448, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) - branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) - branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) - branch3x3dbl = layers.concatenate( - [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis) - - branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch3x3, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed' + str(9 + i)) - - if include_top: - # Classification block - x = GlobalAveragePooling2D(name='avg_pool')(x) - x = Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = Model(inputs, x, name='inception_v3') - - # load weights - if weights == 'imagenet': - if K.image_data_format() == 'channels_first': - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image data format convention ' - '(`image_data_format="channels_first"`). ' - 'For best performance, set ' - '`image_data_format="channels_last"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - if include_top: - weights_path = get_file( - 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models', - file_hash='9a0d58056eeedaa3f26cb7ebd46da564') - else: - weights_path = get_file( - 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - file_hash='bcbd6486424b2319ff4ef7d526e38f63') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -def preprocess_input(x): - """Preprocesses a numpy array encoding a batch of images. - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='tf') -"""MobileNet v1 models for Keras. -MobileNet is a general architecture and can be used for multiple use cases. -Depending on the use case, it can use different input layer size and -different width factors. This allows different width models to reduce -the number of multiply-adds and thereby -reduce inference cost on mobile devices. -MobileNets support any input size greater than 32 x 32, with larger image sizes -offering better performance. -The number of parameters and number of multiply-adds -can be modified by using the `alpha` parameter, -which increases/decreases the number of filters in each layer. -By altering the image size and `alpha` parameter, -all 16 models from the paper can be built, with ImageNet weights provided. -The paper demonstrates the performance of MobileNets using `alpha` values of -1.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25. -For each of these `alpha` values, weights for 4 different input image sizes -are provided (224, 192, 160, 128). -The following table describes the size and accuracy of the 100% MobileNet -on size 224 x 224: ----------------------------------------------------------------------------- -Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M) ----------------------------------------------------------------------------- -| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | -| 0.75 MobileNet-224 | 68.4 % | 325 | 2.6 | -| 0.50 MobileNet-224 | 63.7 % | 149 | 1.3 | -| 0.25 MobileNet-224 | 50.6 % | 41 | 0.5 | ----------------------------------------------------------------------------- -The following table describes the performance of -the 100 % MobileNet on various input sizes: ------------------------------------------------------------------------- - Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) ------------------------------------------------------------------------- -| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | -| 1.0 MobileNet-192 | 69.1 % | 529 | 4.2 | -| 1.0 MobileNet-160 | 67.2 % | 529 | 4.2 | -| 1.0 MobileNet-128 | 64.4 % | 529 | 4.2 | ------------------------------------------------------------------------- -The weights for all 16 models are obtained and translated -from TensorFlow checkpoints found at -https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md -# Reference -- [MobileNets: Efficient Convolutional Neural Networks for - Mobile Vision Applications](https://arxiv.org/pdf/1704.04861.pdf)) -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import os -import warnings - -from keras import backend -from keras import layers -from keras import models -from keras import utils as keras_utils - -from keras_applications import imagenet_utils -from keras_applications.imagenet_utils import _obtain_input_shape - - -BASE_WEIGHT_PATH = ('https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.6/') - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - # Returns - Preprocessed array. - """ - kwargs['backend'] = backend - return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) - - -def MobileNet(input_shape=None, - alpha=1.0, - depth_multiplier=1, - dropout=1e-3, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the MobileNet architecture. - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` - (with `channels_last` data format) - or (3, 224, 224) (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(200, 200, 3)` would be one valid value. - alpha: controls the width of the network. This is known as the - width multiplier in the MobileNet paper. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - depth_multiplier: depth multiplier for depthwise convolution. This - is called the resolution multiplier in the MobileNet paper. - dropout: dropout rate - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - # Returns - A Keras model instance. - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - """ - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000') - - # Determine proper input shape and default size. - if input_shape is None: - default_size = 224 - else: - if backend.image_data_format() == 'channels_first': - rows = input_shape[1] - cols = input_shape[2] - else: - rows = input_shape[0] - cols = input_shape[1] - - if rows == cols and rows in [128, 160, 192, 224]: - default_size = rows - else: - default_size = 224 - - input_shape = _obtain_input_shape(input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if backend.image_data_format() == 'channels_last': - row_axis, col_axis = (0, 1) - else: - row_axis, col_axis = (1, 2) - rows = input_shape[row_axis] - cols = input_shape[col_axis] - - if weights == 'imagenet': - if depth_multiplier != 1: - raise ValueError('If imagenet weights are being loaded, ' - 'depth multiplier must be 1') - - if alpha not in [0.25, 0.50, 0.75, 1.0]: - raise ValueError('If imagenet weights are being loaded, ' - 'alpha can be one of' - '`0.25`, `0.50`, `0.75` or `1.0` only.') - - if rows != cols or rows not in [128, 160, 192, 224]: - rows = 224 - warnings.warn('MobileNet shape is undefined.' - ' Weights for input shape ' - '(224, 224) will be loaded.') - - if backend.image_data_format() != 'channels_last': - warnings.warn('The MobileNet family of models is only available ' - 'for the input data format "channels_last" ' - '(width, height, channels). ' - 'However your settings specify the default ' - 'data format "channels_first" (channels, width, height).' - ' You should set `image_data_format="channels_last"` ' - 'in your Keras config located at ~/.keras/keras.json. ' - 'The model being returned right now will expect inputs ' - 'to follow the "channels_last" data format.') - backend.set_image_data_format('channels_last') - old_data_format = 'channels_first' - else: - old_data_format = None - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - x = _conv_block(img_input, 32, alpha, strides=(2, 2)) - x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) - - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, - strides=(2, 2), block_id=2) - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) - - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, - strides=(2, 2), block_id=4) - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) - - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, - strides=(2, 2), block_id=6) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) - - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, - strides=(2, 2), block_id=12) - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) - - if include_top: - if backend.image_data_format() == 'channels_first': - shape = (int(1024 * alpha), 1, 1) - else: - shape = (1, 1, int(1024 * alpha)) - - x = layers.GlobalAveragePooling2D()(x) - x = layers.Reshape(shape, name='reshape_1')(x) - x = layers.Dropout(dropout, name='dropout')(x) - x = layers.Conv2D(classes, (1, 1), - padding='same', - name='conv_preds')(x) - x = layers.Activation('softmax', name='act_softmax')(x) - x = layers.Reshape((classes,), name='reshape_2')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) - - # Load weights. - if weights == 'imagenet': - if backend.image_data_format() == 'channels_first': - raise ValueError('Weights for "channels_first" format ' - 'are not available.') - if alpha == 1.0: - alpha_text = '1_0' - elif alpha == 0.75: - alpha_text = '7_5' - elif alpha == 0.50: - alpha_text = '5_0' - else: - alpha_text = '2_5' - - if include_top: - model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = keras_utils.get_file(model_name, - weight_path, - cache_subdir='models') - else: - model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = keras_utils.get_file(model_name, - weight_path, - cache_subdir='models') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - if old_data_format: - backend.set_image_data_format(old_data_format) - return model - - -def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): - """Adds an initial convolution layer (with batch normalization and relu6). - # Arguments - inputs: Input tensor of shape `(rows, cols, 3)` - (with `channels_last` data format) or - (3, rows, cols) (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - alpha: controls the width of the network. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - kernel: An integer or tuple/list of 2 integers, specifying the - width and height of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution - along the width and height. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - # Input shape - 4D tensor with shape: - `(samples, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, channels)` if data_format='channels_last'. - # Output shape - 4D tensor with shape: - `(samples, filters, new_rows, new_cols)` - if data_format='channels_first' - or 4D tensor with shape: - `(samples, new_rows, new_cols, filters)` - if data_format='channels_last'. - `rows` and `cols` values might have changed due to stride. - # Returns - Output tensor of block. - """ - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - filters = int(filters * alpha) - x = layers.ZeroPadding2D(padding=((0, 1), (0, 1)), - name='conv1_pad')(inputs) - x = layers.Conv2D(filters, kernel, - padding='valid', - use_bias=False, - strides=strides, - name='conv1')(x) - x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x) - return layers.ReLU(6., name='conv1_relu')(x) - - -def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, - depth_multiplier=1, strides=(1, 1), block_id=1): - """Adds a depthwise convolution block. - A depthwise convolution block consists of a depthwise conv, - batch normalization, relu6, pointwise convolution, - batch normalization and relu6 activation. - # Arguments - inputs: Input tensor of shape `(rows, cols, channels)` - (with `channels_last` data format) or - (channels, rows, cols) (with `channels_first` data format). - pointwise_conv_filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the pointwise convolution). - alpha: controls the width of the network. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - depth_multiplier: The number of depthwise convolution output channels - for each input channel. - The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution - along the width and height. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - block_id: Integer, a unique identification designating - the block number. - # Input shape - 4D tensor with shape: - `(batch, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(batch, rows, cols, channels)` if data_format='channels_last'. - # Output shape - 4D tensor with shape: - `(batch, filters, new_rows, new_cols)` - if data_format='channels_first' - or 4D tensor with shape: - `(batch, new_rows, new_cols, filters)` - if data_format='channels_last'. - `rows` and `cols` values might have changed due to stride. - # Returns - Output tensor of block. - """ - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - pointwise_conv_filters = int(pointwise_conv_filters * alpha) - - if strides == (1, 1): - x = inputs - else: - x = layers.ZeroPadding2D(((0, 1), (0, 1)), - name='conv_pad_%d' % block_id)(inputs) - x = layers.DepthwiseConv2D((3, 3), - padding='same' if strides == ( - 1, 1) else 'valid', - depth_multiplier=depth_multiplier, - strides=strides, - use_bias=False, - name='conv_dw_%d' % block_id)(x) - x = layers.BatchNormalization( - axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) - x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x) - - x = layers.Conv2D(pointwise_conv_filters, (1, 1), - padding='same', - use_bias=False, - strides=(1, 1), - name='conv_pw_%d' % block_id)(x) - x = layers.BatchNormalization(axis=channel_axis, - name='conv_pw_%d_bn' % block_id)(x) - return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x) -"""MobileNet v2 models for Keras. -MobileNetV2 is a general architecture and can be used for multiple use cases. -Depending on the use case, it can use different input layer size and -different width factors. This allows different width models to reduce -the number of multiply-adds and thereby -reduce inference cost on mobile devices. -MobileNetV2 is very similar to the original MobileNet, -except that it uses inverted residual blocks with -bottlenecking features. It has a drastically lower -parameter count than the original MobileNet. -MobileNets support any input size greater -than 32 x 32, with larger image sizes -offering better performance. -The number of parameters and number of multiply-adds -can be modified by using the `alpha` parameter, -which increases/decreases the number of filters in each layer. -By altering the image size and `alpha` parameter, -all 22 models from the paper can be built, with ImageNet weights provided. -The paper demonstrates the performance of MobileNets using `alpha` values of -1.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4 -For each of these `alpha` values, weights for 5 different input image sizes -are provided (224, 192, 160, 128, and 96). -The following table describes the performance of -MobileNet on various input sizes: ------------------------------------------------------------------------- -MACs stands for Multiply Adds - Classification Checkpoint| MACs (M) | Parameters (M)| Top 1 Accuracy| Top 5 Accuracy ---------------------------|------------|---------------|---------|----|------------- -| [mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 | -| [mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 | -| [mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 | -| [mobilenet_v2_1.0_192] | 221 | 3.47 | 70.7 | 90.1 | -| [mobilenet_v2_1.0_160] | 154 | 3.47 | 68.8 | 89.0 | -| [mobilenet_v2_1.0_128] | 99 | 3.47 | 65.3 | 86.9 | -| [mobilenet_v2_1.0_96] | 56 | 3.47 | 60.3 | 83.2 | -| [mobilenet_v2_0.75_224] | 209 | 2.61 | 69.8 | 89.6 | -| [mobilenet_v2_0.75_192] | 153 | 2.61 | 68.7 | 88.9 | -| [mobilenet_v2_0.75_160] | 107 | 2.61 | 66.4 | 87.3 | -| [mobilenet_v2_0.75_128] | 69 | 2.61 | 63.2 | 85.3 | -| [mobilenet_v2_0.75_96] | 39 | 2.61 | 58.8 | 81.6 | -| [mobilenet_v2_0.5_224] | 97 | 1.95 | 65.4 | 86.4 | -| [mobilenet_v2_0.5_192] | 71 | 1.95 | 63.9 | 85.4 | -| [mobilenet_v2_0.5_160] | 50 | 1.95 | 61.0 | 83.2 | -| [mobilenet_v2_0.5_128] | 32 | 1.95 | 57.7 | 80.8 | -| [mobilenet_v2_0.5_96] | 18 | 1.95 | 51.2 | 75.8 | -| [mobilenet_v2_0.35_224] | 59 | 1.66 | 60.3 | 82.9 | -| [mobilenet_v2_0.35_192] | 43 | 1.66 | 58.2 | 81.2 | -| [mobilenet_v2_0.35_160] | 30 | 1.66 | 55.7 | 79.1 | -| [mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 | -| [mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 | -The weights for all 16 models are obtained and -translated from the Tensorflow checkpoints -from TensorFlow checkpoints found [here] -(https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/README.md). -# Reference -This file contains building code for MobileNetV2, based on -[MobileNetV2: Inverted Residuals and Linear Bottlenecks] -(https://arxiv.org/abs/1801.04381) -Tests comparing this model to the existing Tensorflow model can be -found at [mobilenet_v2_keras] -(https://github.com/JonathanCMitchell/mobilenet_v2_keras) -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import os -import warnings -import numpy as np - -from keras import backend -from keras import layers -from keras import models -from keras import utils as keras_utils - -from keras_applications import correct_pad -from keras_applications import imagenet_utils -from keras_applications.imagenet_utils import _obtain_input_shape - -# TODO Change path to v1.1 -BASE_WEIGHT_PATH = ('https://github.com/JonathanCMitchell/mobilenet_v2_keras/' - 'releases/download/v1.1/') - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - # Returns - Preprocessed array. - """ - kwargs['backend'] = backend - return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) - - -# This function is taken from the original tf repo. -# It ensures that all layers have a channel number that is divisible by 8 -# It can be seen here: -# https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py - - -def _make_divisible(v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -def MobileNetV2(input_shape=None, - alpha=1.0, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the MobileNetV2 architecture. - # Arguments - input_shape: optional shape tuple, to be specified if you would - like to use a model with an input img resolution that is not - (224, 224, 3). - It should have exactly 3 inputs channels (224, 224, 3). - You can also omit this option if you would like - to infer input_shape from an input_tensor. - If you choose to include both input_tensor and input_shape then - input_shape will be used if they match, if the shapes - do not match then we will throw an error. - E.g. `(160, 160, 3)` would be one valid value. - alpha: controls the width of the network. This is known as the - width multiplier in the MobileNetV2 paper, but the name is kept for - consistency with MobileNetV1 in Keras. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - # Returns - A Keras model instance. - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape or invalid alpha, rows when - weights='imagenet' - """ - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000') - - # Determine proper input shape and default size. - # If both input_shape and input_tensor are used, they should match - if input_shape is not None and input_tensor is not None: - try: - is_input_t_tensor = backend.is_keras_tensor(input_tensor) - except ValueError: - try: - is_input_t_tensor = backend.is_keras_tensor( - keras_utils.get_source_inputs(input_tensor)) - except ValueError: - raise ValueError('input_tensor: ', input_tensor, - 'is not type input_tensor') - if is_input_t_tensor: - if backend.image_data_format == 'channels_first': - if backend.int_shape(input_tensor)[1] != input_shape[1]: - raise ValueError('input_shape: ', input_shape, - 'and input_tensor: ', input_tensor, - 'do not meet the same shape requirements') - else: - if backend.int_shape(input_tensor)[2] != input_shape[1]: - raise ValueError('input_shape: ', input_shape, - 'and input_tensor: ', input_tensor, - 'do not meet the same shape requirements') - else: - raise ValueError('input_tensor specified: ', input_tensor, - 'is not a keras tensor') - - # If input_shape is None, infer shape from input_tensor - if input_shape is None and input_tensor is not None: - - try: - backend.is_keras_tensor(input_tensor) - except ValueError: - raise ValueError('input_tensor: ', input_tensor, - 'is type: ', type(input_tensor), - 'which is not a valid type') - - if input_shape is None and not backend.is_keras_tensor(input_tensor): - default_size = 224 - elif input_shape is None and backend.is_keras_tensor(input_tensor): - if backend.image_data_format() == 'channels_first': - rows = backend.int_shape(input_tensor)[2] - cols = backend.int_shape(input_tensor)[3] - else: - rows = backend.int_shape(input_tensor)[1] - cols = backend.int_shape(input_tensor)[2] - - if rows == cols and rows in [96, 128, 160, 192, 224]: - default_size = rows - else: - default_size = 224 - - # If input_shape is None and no input_tensor - elif input_shape is None: - default_size = 224 - - # If input_shape is not None, assume default size - else: - if backend.image_data_format() == 'channels_first': - rows = input_shape[1] - cols = input_shape[2] - else: - rows = input_shape[0] - cols = input_shape[1] - - if rows == cols and rows in [96, 128, 160, 192, 224]: - default_size = rows - else: - default_size = 224 - - input_shape = _obtain_input_shape(input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if backend.image_data_format() == 'channels_last': - row_axis, col_axis = (0, 1) - else: - row_axis, col_axis = (1, 2) - rows = input_shape[row_axis] - cols = input_shape[col_axis] - - if weights == 'imagenet': - if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: - raise ValueError('If imagenet weights are being loaded, ' - 'alpha can be one of `0.35`, `0.50`, `0.75`, ' - '`1.0`, `1.3` or `1.4` only.') - - if rows != cols or rows not in [96, 128, 160, 192, 224]: - rows = 224 - warnings.warn('MobileNet shape is undefined.' - ' Weights for input shape' - '(224, 224) will be loaded.') - - if backend.image_data_format() != 'channels_last': - warnings.warn('The MobileNet family of models is only available ' - 'for the input data format "channels_last" ' - '(width, height, channels). ' - 'However your settings specify the default ' - 'data format "channels_first" (channels, width, height).' - ' You should set `image_data_format="channels_last"` ' - 'in your Keras config located at ~/.keras/keras.json. ' - 'The model being returned right now will expect inputs ' - 'to follow the "channels_last" data format.') - backend.set_image_data_format('channels_last') - old_data_format = 'channels_first' - else: - old_data_format = None - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - first_block_filters = _make_divisible(32 * alpha, 8) - x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3), - name='Conv1_pad')(img_input) - x = layers.Conv2D(first_block_filters, - kernel_size=3, - strides=(2, 2), - padding='valid', - use_bias=False, - name='Conv1')(x) - x = layers.BatchNormalization( - epsilon=1e-3, momentum=0.999, name='bn_Conv1')(x) - x = layers.ReLU(6., name='Conv1_relu')(x) - - x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, - expansion=1, block_id=0) - - x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, - expansion=6, block_id=1) - x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, - expansion=6, block_id=2) - - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, - expansion=6, block_id=3) - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, - expansion=6, block_id=4) - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, - expansion=6, block_id=5) - - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, - expansion=6, block_id=6) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, - expansion=6, block_id=7) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, - expansion=6, block_id=8) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, - expansion=6, block_id=9) - - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, - expansion=6, block_id=10) - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, - expansion=6, block_id=11) - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, - expansion=6, block_id=12) - - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, - expansion=6, block_id=13) - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, - expansion=6, block_id=14) - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, - expansion=6, block_id=15) - - x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, - expansion=6, block_id=16) - - # no alpha applied to last conv as stated in the paper: - # if the width multiplier is greater than 1 we - # increase the number of output channels - if alpha > 1.0: - last_block_filters = _make_divisible(1280 * alpha, 8) - else: - last_block_filters = 1280 - - x = layers.Conv2D(last_block_filters, - kernel_size=1, - use_bias=False, - name='Conv_1')(x) - x = layers.BatchNormalization(epsilon=1e-3, - momentum=0.999, - name='Conv_1_bn')(x) - x = layers.ReLU(6., name='out_relu')(x) - - if include_top: - x = layers.GlobalAveragePooling2D()(x) - x = layers.Dense(classes, activation='softmax', - use_bias=True, name='Logits')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = models.Model(inputs, x, - name='mobilenetv2_%0.2f_%s' % (alpha, rows)) - - # Load weights. - if weights == 'imagenet': - if backend.image_data_format() == 'channels_first': - raise ValueError('Weights for "channels_first" format ' - 'are not available.') - - if include_top: - model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + - str(alpha) + '_' + str(rows) + '.h5') - weigh_path = BASE_WEIGHT_PATH + model_name - weights_path = keras_utils.get_file( - model_name, weigh_path, cache_subdir='models') - else: - model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + - str(alpha) + '_' + str(rows) + '_no_top' + '.h5') - weigh_path = BASE_WEIGHT_PATH + model_name - weights_path = keras_utils.get_file( - model_name, weigh_path, cache_subdir='models') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - if old_data_format: - backend.set_image_data_format(old_data_format) - return model - - -def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): - in_channels = backend.int_shape(inputs)[-1] - pointwise_conv_filters = int(filters * alpha) - pointwise_filters = _make_divisible(pointwise_conv_filters, 8) - x = inputs - prefix = 'block_{}_'.format(block_id) - - if block_id: - # Expand - x = layers.Conv2D(expansion * in_channels, - kernel_size=1, - padding='same', - use_bias=False, - activation=None, - name=prefix + 'expand')(x) - x = layers.BatchNormalization(epsilon=1e-3, - momentum=0.999, - name=prefix + 'expand_BN')(x) - x = layers.ReLU(6., name=prefix + 'expand_relu')(x) - else: - prefix = 'expanded_conv_' - - # Depthwise - if stride == 2: - x = layers.ZeroPadding2D(padding=correct_pad(backend, x, 3), - name=prefix + 'pad')(x) - x = layers.DepthwiseConv2D(kernel_size=3, - strides=stride, - activation=None, - use_bias=False, - padding='same' if stride == 1 else 'valid', - name=prefix + 'depthwise')(x) - x = layers.BatchNormalization(epsilon=1e-3, - momentum=0.999, - name=prefix + 'depthwise_BN')(x) - - x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x) - - # Project - x = layers.Conv2D(pointwise_filters, - kernel_size=1, - padding='same', - use_bias=False, - activation=None, - name=prefix + 'project')(x) - x = layers.BatchNormalization( - epsilon=1e-3, momentum=0.999, name=prefix + 'project_BN')(x) - - if in_channels == pointwise_filters and stride == 1: - return layers.Add(name=prefix + 'add')([inputs, x]) - return x -from .blocks import Conv2DBlock -from .layers import ResizeImage -from keras.layers import Conv2D -from keras.layers import Activation -from keras.layers import BatchNormalization - - -def Conv2DBlock(n_filters, kernel_size, - activation='relu', - use_batchnorm=True, - name='conv_block', - **kwargs): - """Extension of Conv2D layer with batchnorm""" - def layer(input_tensor): - - x = Conv2D(n_filters, kernel_size, use_bias=not(use_batchnorm), - name=name+'_conv', **kwargs)(input_tensor) - if use_batchnorm: - x = BatchNormalization(name=name+'_bn',)(x) - x = Activation(activation, name=name+'_'+activation)(x) - - return x - return layer -import numpy as np -import tensorflow as tf - - -def transpose_shape(shape, target_format, spatial_axes): - """Converts a tuple or a list to the correct `data_format`. - It does so by switching the positions of its elements. - # Arguments - shape: Tuple or list, often representing shape, - corresponding to `'channels_last'`. - target_format: A string, either `'channels_first'` or `'channels_last'`. - spatial_axes: A tuple of integers. - Correspond to the indexes of the spatial axes. - For example, if you pass a shape - representing (batch_size, timesteps, rows, cols, channels), - then `spatial_axes=(2, 3)`. - # Returns - A tuple or list, with the elements permuted according - to `target_format`. - # Example - # Raises - ValueError: if `value` or the global `data_format` invalid. - """ - if target_format == 'channels_first': - new_values = shape[:spatial_axes[0]] - new_values += (shape[-1],) - new_values += tuple(shape[x] for x in spatial_axes) - - if isinstance(shape, list): - return list(new_values) - return new_values - elif target_format == 'channels_last': - return shape - else: - raise ValueError('The `data_format` argument must be one of ' - '"channels_first", "channels_last". Received: ' + - str(target_format)) - - -def permute_dimensions(x, pattern): - """Permutes axes in a tensor. - # Arguments - x: Tensor or variable. - pattern: A tuple of - dimension indices, e.g. `(0, 2, 1)`. - # Returns - A tensor. - """ - return tf.transpose(x, perm=pattern) - - -def int_shape(x): - """Returns the shape of tensor or variable as a tuple of int or None entries. - # Arguments - x: Tensor or variable. - # Returns - A tuple of integers (or None entries). - """ - if hasattr(x, '_keras_shape'): - return x._keras_shape - try: - return tuple(x.get_shape().as_list()) - except ValueError: - return None - - -def resize_images(x, - height_factor, - width_factor, - data_format, - interpolation='nearest'): - """Resizes the images contained in a 4D tensor. - # Arguments - x: Tensor or variable to resize. - height_factor: Positive integer. - width_factor: Positive integer. - data_format: string, `"channels_last"` or `"channels_first"`. - interpolation: A string, one of `nearest` or `bilinear`. - # Returns - A tensor. - # Raises - ValueError: if `data_format` is neither `"channels_last"` or `"channels_first"`. - """ - if data_format == 'channels_first': - rows, cols = 2, 3 - else: - rows, cols = 1, 2 - - original_shape = int_shape(x) - new_shape = tf.shape(x)[rows:cols + 1] - new_shape *= tf.constant(np.array([height_factor, - width_factor], dtype='int32')) - - if data_format == 'channels_first': - x = permute_dimensions(x, [0, 2, 3, 1]) - if interpolation == 'nearest': - x = tf.image.resize_nearest_neighbor(x, new_shape) - elif interpolation == 'bilinear': - x = tf.image.resize_bilinear(x, new_shape, align_corners=True) - else: - raise ValueError('interpolation should be one ' - 'of "nearest" or "bilinear".') - if data_format == 'channels_first': - x = permute_dimensions(x, [0, 3, 1, 2]) - - if original_shape[rows] is None: - new_height = None - else: - new_height = original_shape[rows] * height_factor - - if original_shape[cols] is None: - new_width = None - else: - new_width = original_shape[cols] * width_factor - - output_shape = (None, new_height, new_width, None) - x.set_shape(transpose_shape( - output_shape, data_format, spatial_axes=(1, 2))) - return x -from keras.engine import Layer -from keras.engine import InputSpec -from keras.utils import conv_utils -from keras.legacy import interfaces -from keras.utils.generic_utils import get_custom_objects - -from .functions import resize_images - - -class ResizeImage(Layer): - """ResizeImage layer for 2D inputs. - Repeats the rows and columns of the data - by factor[0] and factor[1] respectively. - # Arguments - factor: int, or tuple of 2 integers. - The upsampling factors for rows and columns. - data_format: A string, - one of `"channels_last"` or `"channels_first"`. - The ordering of the dimensions in the inputs. - `"channels_last"` corresponds to inputs with shape - `(batch, height, width, channels)` while `"channels_first"` - corresponds to inputs with shape - `(batch, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - interpolation: A string, one of `nearest` or `bilinear`. - Note that CNTK does not support yet the `bilinear` upscaling - and that with Theano, only `factor=(2, 2)` is possible. - # Input shape - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, rows, cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch, channels, rows, cols)` - # Output shape - 4D tensor with shape: - - If `data_format` is `"channels_last"`: - `(batch, upsampled_rows, upsampled_cols, channels)` - - If `data_format` is `"channels_first"`: - `(batch, channels, upsampled_rows, upsampled_cols)` - """ - - @interfaces.legacy_upsampling2d_support - def __init__(self, factor=(2, 2), data_format='channels_last', interpolation='nearest', **kwargs): - super(ResizeImage, self).__init__(**kwargs) - self.data_format = data_format - self.factor = conv_utils.normalize_tuple(factor, 2, 'factor') - self.input_spec = InputSpec(ndim=4) - if interpolation not in ['nearest', 'bilinear']: - raise ValueError('interpolation should be one ' - 'of "nearest" or "bilinear".') - self.interpolation = interpolation - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - height = self.factor[0] * \ - input_shape[2] if input_shape[2] is not None else None - width = self.factor[1] * \ - input_shape[3] if input_shape[3] is not None else None - return (input_shape[0], - input_shape[1], - height, - width) - elif self.data_format == 'channels_last': - height = self.factor[0] * \ - input_shape[1] if input_shape[1] is not None else None - width = self.factor[1] * \ - input_shape[2] if input_shape[2] is not None else None - return (input_shape[0], - height, - width, - input_shape[3]) - - def call(self, inputs): - return resize_images(inputs, self.factor[0], self.factor[1], - self.data_format, self.interpolation) - - def get_config(self): - config = {'factor': self.factor, - 'interpolation': self.interpolation, - 'data_format': self.data_format} - base_config = super(ResizeImage, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -get_custom_objects().update({'ResizeImage': ResizeImage}) -from .model import FPN -from keras.layers import Add - -from ..common import Conv2DBlock -from ..common import ResizeImage -from ..utils import to_tuple - - -def pyramid_block(pyramid_filters=256, segmentation_filters=128, upsample_rate=2, - use_batchnorm=False, stage=0): - """ - Pyramid block according to: - http://presentations.cocodataset.org/COCO17-Stuff-FAIR.pdf - - This block generate `M` and `P` blocks. - - Args: - pyramid_filters: integer, filters in `M` block of top-down FPN branch - segmentation_filters: integer, number of filters in segmentation head, - basically filters in convolution layers between `M` and `P` blocks - upsample_rate: integer, uspsample rate for `M` block of top-down FPN branch - use_batchnorm: bool, include batchnorm in convolution blocks - - Returns: - Pyramid block function (as Keras layers functional API) - """ - def layer(c, m=None): - - x = Conv2DBlock(pyramid_filters, (1, 1), - padding='same', - use_batchnorm=use_batchnorm, - name='pyramid_stage_{}'.format(stage))(c) - - if m is not None: - up = ResizeImage(to_tuple(upsample_rate))(m) - x = Add()([x, up]) - - # segmentation head - p = Conv2DBlock(segmentation_filters, (3, 3), - padding='same', - use_batchnorm=use_batchnorm, - name='segm1_stage_{}'.format(stage))(x) - - p = Conv2DBlock(segmentation_filters, (3, 3), - padding='same', - use_batchnorm=use_batchnorm, - name='segm2_stage_{}'.format(stage))(p) - m = x - - return m, p - return layer -import numpy as np -from keras.layers import Conv2D -from keras.layers import Concatenate -from keras.layers import Activation -from keras.layers import SpatialDropout2D -from keras.models import Model - -from .blocks import pyramid_block -from ..common import ResizeImage -from ..common import Conv2DBlock -from ..utils import extract_outputs, to_tuple - - -def build_fpn(backbone, - fpn_layers, - classes=21, - activation='softmax', - upsample_rates=(2, 2, 2), - last_upsample=4, - pyramid_filters=256, - segmentation_filters=128, - use_batchnorm=False, - dropout=None, - interpolation='bilinear'): - """ - Implementation of FPN head for segmentation models according to: - http://presentations.cocodataset.org/COCO17-Stuff-FAIR.pdf - - Args: - backbone: Keras `Model`, some classification model without top - layers: list of layer names or indexes, used for pyramid building - classes: int, number of output feature maps - activation: activation in last layer, e.g. 'sigmoid' or 'softmax' - upsample_rates: tuple of integers, scaling rates between pyramid blocks - pyramid_filters: int, number of filters in `M` blocks of top-down FPN branch - segmentation_filters: int, number of filters in `P` blocks of FPN - last_upsample: rate for upsumpling concatenated pyramid predictions to - match spatial resolution of input data - last_upsampling_type: 'nn' or 'bilinear' - dropout: float [0, 1), dropout rate - use_batchnorm: bool, include batch normalization to FPN between `conv` - and `relu` layers - - Returns: - model: Keras `Model` - """ - - if len(upsample_rates) != len(fpn_layers): - raise ValueError( - 'Number of intermediate feature maps and upsample steps should match') - - # extract model layer outputs - outputs = extract_outputs(backbone, fpn_layers, include_top=True) - - # add upsample rate `1` for first block - upsample_rates = [1] + list(upsample_rates) - - # top - down path, build pyramid - m = None - pyramid = [] - for i, c in enumerate(outputs): - m, p = pyramid_block(pyramid_filters=pyramid_filters, - segmentation_filters=segmentation_filters, - upsample_rate=upsample_rates[i], - use_batchnorm=use_batchnorm, - stage=i)(c, m) - pyramid.append(p) - - # upsample and concatenate all pyramid layer - upsampled_pyramid = [] - - for i, p in enumerate(pyramid[::-1]): - if upsample_rates[i] > 1: - upsample_rate = to_tuple(np.prod(upsample_rates[:i+1])) - p = ResizeImage(upsample_rate, interpolation=interpolation)(p) - upsampled_pyramid.append(p) - - x = Concatenate()(upsampled_pyramid) - - # final convolution - n_filters = segmentation_filters * len(pyramid) - x = Conv2DBlock(n_filters, (3, 3), - use_batchnorm=use_batchnorm, padding='same')(x) - if dropout is not None: - x = SpatialDropout2D(dropout)(x) - - x = Conv2D(classes, (3, 3), padding='same')(x) - - # upsampling to original spatial resolution - x = ResizeImage(to_tuple(last_upsample), interpolation=interpolation)(x) - - # activation - x = Activation(activation)(x) - - model = Model(backbone.input, x) - return model -from .builder import build_fpn -from ..backbones import get_backbone, get_feature_layers -from ..utils import freeze_model -from ..utils import legacy_support - -old_args_map = { - 'freeze_encoder': 'encoder_freeze', - 'fpn_layers': 'encoder_features', - 'use_batchnorm': 'pyramid_use_batchnorm', - 'dropout': 'pyramid_dropout', - 'interpolation': 'final_interpolation', - 'upsample_rates': None, # removed - 'last_upsample': None, # removed -} - - -@legacy_support(old_args_map) -def FPN(backbone_name='vgg16', - input_shape=(None, None, 3), - input_tensor=None, - classes=21, - activation='softmax', - encoder_weights='imagenet', - encoder_freeze=False, - encoder_features='default', - pyramid_block_filters=256, - pyramid_use_batchnorm=True, - pyramid_dropout=None, - final_interpolation='bilinear', - **kwargs): - """FPN_ is a fully convolution neural network for image semantic segmentation - - Args: - backbone_name: name of classification model (without last dense layers) used as feature - extractor to build segmentation model. - input_shape: shape of input data/image ``(H, W, C)``, in general - case you do not need to set ``H`` and ``W`` shapes, just pass ``(None, None, C)`` to make your model be - able to process images af any size, but ``H`` and ``W`` of input images should be divisible by factor ``32``. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model - (works only if ``encoder_weights`` is ``None``). - classes: a number of classes for output (output shape - ``(h, w, classes)``). - activation: name of one of ``keras.activations`` for last model layer (e.g. ``sigmoid``, ``softmax``, ``linear``). - encoder_weights: one of ``None`` (random initialization), ``imagenet`` (pre-training on ImageNet). - encoder_freeze: if ``True`` set all layers of encoder (backbone model) as non-trainable. - encoder_features: a list of layer numbers or names starting from top of the model. - Each of these layers will be used to build features pyramid. If ``default`` is used - layer names are taken from ``DEFAULT_FEATURE_PYRAMID_LAYERS``. - pyramid_block_filters: a number of filters in Feature Pyramid Block of FPN_. - pyramid_use_batchnorm: if ``True``, ``BatchNormalisation`` layer between ``Conv2D`` and ``Activation`` layers - is used. - pyramid_dropout: spatial dropout rate for feature pyramid in range (0, 1). - final_interpolation: interpolation type for upsampling layers, on of ``nearest``, ``bilinear``. - - Returns: - ``keras.models.Model``: **FPN** - - .. _FPN: - http://presentations.cocodataset.org/COCO17-Stuff-FAIR.pdf - - """ - - backbone = get_backbone(backbone_name, - input_shape=input_shape, - input_tensor=input_tensor, - weights=encoder_weights, - include_top=False) - - if encoder_features == 'default': - encoder_features = get_feature_layers(backbone_name, n=3) - - upsample_rates = [2] * len(encoder_features) - last_upsample = 2 ** (5 - len(encoder_features)) - - model = build_fpn(backbone, encoder_features, - classes=classes, - pyramid_filters=pyramid_block_filters, - segmentation_filters=pyramid_block_filters // 2, - upsample_rates=upsample_rates, - use_batchnorm=pyramid_use_batchnorm, - dropout=pyramid_dropout, - last_upsample=last_upsample, - interpolation=final_interpolation, - activation=activation) - - if encoder_freeze: - freeze_model(backbone) - - model.name = 'fpn-{}'.format(backbone.name) - - return model -from .model import Linknet -import keras.backend as K -from keras.layers import Conv2DTranspose as Transpose -from keras.layers import UpSampling2D -from keras.layers import Conv2D -from keras.layers import BatchNormalization -from keras.layers import Activation -from keras.layers import Add - - -def handle_block_names(stage): - conv_name = 'decoder_stage{}_conv'.format(stage) - bn_name = 'decoder_stage{}_bn'.format(stage) - relu_name = 'decoder_stage{}_relu'.format(stage) - up_name = 'decoder_stage{}_upsample'.format(stage) - return conv_name, bn_name, relu_name, up_name - - -def ConvRelu(filters, - kernel_size, - use_batchnorm=False, - conv_name='conv', - bn_name='bn', - relu_name='relu'): - - def layer(x): - - x = Conv2D(filters, - kernel_size, - padding="same", - name=conv_name, - use_bias=not(use_batchnorm))(x) - - if use_batchnorm: - x = BatchNormalization(name=bn_name)(x) - - x = Activation('relu', name=relu_name)(x) - - return x - return layer - - -def Conv2DUpsample(filters, - upsample_rate, - kernel_size=(3, 3), - up_name='up', - conv_name='conv', - **kwargs): - - def layer(input_tensor): - x = UpSampling2D(upsample_rate, name=up_name)(input_tensor) - x = Conv2D(filters, - kernel_size, - padding='same', - name=conv_name, - **kwargs)(x) - return x - return layer - - -def Conv2DTranspose(filters, - upsample_rate, - kernel_size=(4, 4), - up_name='up', - **kwargs): - - if not tuple(upsample_rate) == (2, 2): - raise NotImplementedError( - 'Conv2DTranspose support only upsample_rate=(2, 2), got {}'.format(upsample_rate)) - - def layer(input_tensor): - x = Transpose(filters, - kernel_size=kernel_size, - strides=upsample_rate, - padding='same', - name=up_name)(input_tensor) - return x - return layer - - -def UpsampleBlock(filters, - upsample_rate, - kernel_size, - use_batchnorm=False, - upsample_layer='upsampling', - conv_name='conv', - bn_name='bn', - relu_name='relu', - up_name='up', - **kwargs): - - if upsample_layer == 'upsampling': - UpBlock = Conv2DUpsample - - elif upsample_layer == 'transpose': - UpBlock = Conv2DTranspose - - else: - raise ValueError( - 'Not supported up layer type {}'.format(upsample_layer)) - - def layer(input_tensor): - - x = UpBlock(filters, - upsample_rate=upsample_rate, - kernel_size=kernel_size, - use_bias=not(use_batchnorm), - conv_name=conv_name, - up_name=up_name, - **kwargs)(input_tensor) - - if use_batchnorm: - x = BatchNormalization(name=bn_name)(x) - - x = Activation('relu', name=relu_name)(x) - - return x - return layer - - -def DecoderBlock(stage, - filters=None, - kernel_size=(3, 3), - upsample_rate=(2, 2), - use_batchnorm=False, - skip=None, - upsample_layer='upsampling'): - - def layer(input_tensor): - - conv_name, bn_name, relu_name, up_name = handle_block_names(stage) - input_filters = K.int_shape(input_tensor)[-1] - - if skip is not None: - output_filters = K.int_shape(skip)[-1] - else: - output_filters = filters - - x = ConvRelu(input_filters // 4, - kernel_size=(1, 1), - use_batchnorm=use_batchnorm, - conv_name=conv_name + '1', - bn_name=bn_name + '1', - relu_name=relu_name + '1')(input_tensor) - - x = UpsampleBlock(filters=input_filters // 4, - kernel_size=kernel_size, - upsample_layer=upsample_layer, - upsample_rate=upsample_rate, - use_batchnorm=use_batchnorm, - conv_name=conv_name + '2', - bn_name=bn_name + '2', - up_name=up_name + '2', - relu_name=relu_name + '2')(x) - - x = ConvRelu(output_filters, - kernel_size=(1, 1), - use_batchnorm=use_batchnorm, - conv_name=conv_name + '3', - bn_name=bn_name + '3', - relu_name=relu_name + '3')(x) - - if skip is not None: - x = Add()([x, skip]) - - return x - return layer -from keras.layers import Conv2D -from keras.layers import Activation -from keras.models import Model - -from .blocks import DecoderBlock -from ..utils import get_layer_number, to_tuple - - -def build_linknet(backbone, - classes, - skip_connection_layers, - decoder_filters=(None, None, None, None, 16), - upsample_rates=(2, 2, 2, 2, 2), - n_upsample_blocks=5, - upsample_kernel_size=(3, 3), - upsample_layer='upsampling', - activation='sigmoid', - use_batchnorm=True): - - input = backbone.input - x = backbone.output - - # convert layer names to indices - skip_connection_idx = ([get_layer_number(backbone, l) if isinstance(l, str) else l - for l in skip_connection_layers]) - - for i in range(n_upsample_blocks): - - # check if there is a skip connection - skip_connection = None - if i < len(skip_connection_idx): - skip_connection = backbone.layers[skip_connection_idx[i]].output - - upsample_rate = to_tuple(upsample_rates[i]) - - x = DecoderBlock(stage=i, - filters=decoder_filters[i], - kernel_size=upsample_kernel_size, - upsample_rate=upsample_rate, - use_batchnorm=use_batchnorm, - upsample_layer=upsample_layer, - skip=skip_connection)(x) - - x = Conv2D(classes, (3, 3), padding='same', name='final_conv')(x) - x = Activation(activation, name=activation)(x) - - model = Model(input, x) - - return model -from .builder import build_linknet -from ..utils import freeze_model -from ..utils import legacy_support -from ..backbones import get_backbone, get_feature_layers - -old_args_map = { - 'freeze_encoder': 'encoder_freeze', - 'skip_connections': 'encoder_features', - 'upsample_layer': 'decoder_block_type', - 'n_upsample_blocks': None, # removed - 'input_tensor': None, # removed - 'upsample_kernel_size': None, # removed -} - - -@legacy_support(old_args_map) -def Linknet(backbone_name='vgg16', - input_shape=(None, None, 3), - classes=1, - activation='sigmoid', - encoder_weights='imagenet', - encoder_freeze=False, - encoder_features='default', - decoder_filters=(None, None, None, None, 16), - decoder_use_batchnorm=True, - decoder_block_type='upsampling', - **kwargs): - """Linknet_ is a fully convolution neural network for fast image semantic segmentation - - Note: - This implementation by default has 4 skip connections (original - 3). - - Args: - backbone_name: name of classification model (without last dense layers) used as feature - extractor to build segmentation model. - input_shape: shape of input data/image ``(H, W, C)``, in general - case you do not need to set ``H`` and ``W`` shapes, just pass ``(None, None, C)`` to make your model be - able to process images af any size, but ``H`` and ``W`` of input images should be divisible by factor ``32``. - classes: a number of classes for output (output shape - ``(h, w, classes)``). - activation: name of one of ``keras.activations`` for last model layer - (e.g. ``sigmoid``, ``softmax``, ``linear``). - encoder_weights: one of ``None`` (random initialization), ``imagenet`` (pre-training on ImageNet). - encoder_freeze: if ``True`` set all layers of encoder (backbone model) as non-trainable. - encoder_features: a list of layer numbers or names starting from top of the model. - Each of these layers will be concatenated with corresponding decoder block. If ``default`` is used - layer names are taken from ``DEFAULT_SKIP_CONNECTIONS``. - decoder_filters: list of numbers of ``Conv2D`` layer filters in decoder blocks, - for block with skip connection a number of filters is equal to number of filters in - corresponding encoder block (estimates automatically and can be passed as ``None`` value). - decoder_use_batchnorm: if ``True``, ``BatchNormalisation`` layer between ``Conv2D`` and ``Activation`` layers - is used. - decoder_block_type: one of - - `upsampling`: use ``Upsampling2D`` keras layer - - `transpose`: use ``Transpose2D`` keras layer - - Returns: - ``keras.models.Model``: **Linknet** - - .. _Linknet: - https://arxiv.org/pdf/1707.03718.pdf - """ - - backbone = get_backbone(backbone_name, - input_shape=input_shape, - input_tensor=None, - weights=encoder_weights, - include_top=False) - - if encoder_features == 'default': - encoder_features = get_feature_layers(backbone_name, n=4) - - model = build_linknet(backbone, - classes, - encoder_features, - decoder_filters=decoder_filters, - upsample_layer=decoder_block_type, - activation=activation, - n_upsample_blocks=len(decoder_filters), - upsample_rates=(2, 2, 2, 2, 2), - upsample_kernel_size=(3, 3), - use_batchnorm=decoder_use_batchnorm) - - # lock encoder weights for fine-tuning - if encoder_freeze: - freeze_model(backbone) - - model.name = 'link-{}'.format(backbone_name) - - return model -from .model import PSPNet -import numpy as np -from keras.layers import MaxPool2D -from keras.layers import AveragePooling2D -from keras.layers import Concatenate -from keras.layers import Permute -from keras.layers import Reshape -from keras.backend import int_shape - -from ..common import Conv2DBlock -from ..common import ResizeImage - - -def InterpBlock(level, feature_map_shape, - conv_filters=512, - conv_kernel_size=(1, 1), - conv_padding='same', - pooling_type='avg', - pool_padding='same', - use_batchnorm=True, - activation='relu', - interpolation='bilinear'): - - if pooling_type == 'max': - Pool2D = MaxPool2D - elif pooling_type == 'avg': - Pool2D = AveragePooling2D - else: - raise ValueError('Unsupported pooling type - `{}`.'.format(pooling_type) + - 'Use `avg` or `max`.') - - def layer(input_tensor): - # Compute the kernel and stride sizes according to how large the final feature map will be - # When the kernel factor and strides are equal, then we can compute the final feature map factor - # by simply dividing the current factor by the kernel or stride factor - # The final feature map sizes are 1x1, 2x2, 3x3, and 6x6. We round to the closest integer - pool_size = [int(np.round(feature_map_shape[0] / level)), - int(np.round(feature_map_shape[1] / level))] - strides = pool_size - - x = Pool2D(pool_size, strides=strides, - padding=pool_padding)(input_tensor) - x = Conv2DBlock(conv_filters, - kernel_size=conv_kernel_size, - padding=conv_padding, - use_batchnorm=use_batchnorm, - activation=activation, - name='level{}'.format(level))(x) - x = ResizeImage(strides, interpolation=interpolation)(x) - return x - return layer - - -def DUC(factor=(8, 8)): - - if factor[0] != factor[1]: - raise ValueError('DUC upconvolution support only equal factors, ' - 'got {}'.format(factor)) - factor = factor[0] - - def layer(input_tensor): - - h, w, c = int_shape(input_tensor)[1:] - H = h * factor - W = w * factor - - x = Conv2DBlock(c*factor**2, (1, 1), - padding='same', - name='duc_{}'.format(factor))(input_tensor) - x = Permute((3, 1, 2))(x) - x = Reshape((c, factor, factor, h, w))(x) - x = Permute((1, 4, 2, 5, 3))(x) - x = Reshape((c, H, W))(x) - x = Permute((2, 3, 1))(x) - return x - return layer - - -def PyramidPoolingModule(**params): - """ - Build the Pyramid Pooling Module. - """ - - _params = { - 'conv_filters': 512, - 'conv_kernel_size': (1, 1), - 'conv_padding': 'same', - 'pooling_type': 'avg', - 'pool_padding': 'same', - 'use_batchnorm': True, - 'activation': 'relu', - 'interpolation': 'bilinear', - } - - _params.update(params) - - def module(input_tensor): - - feature_map_shape = int_shape(input_tensor)[1:3] - - x1 = InterpBlock(1, feature_map_shape, **_params)(input_tensor) - x2 = InterpBlock(2, feature_map_shape, **_params)(input_tensor) - x3 = InterpBlock(3, feature_map_shape, **_params)(input_tensor) - x6 = InterpBlock(6, feature_map_shape, **_params)(input_tensor) - - x = Concatenate()([input_tensor, x1, x2, x3, x6]) - return x - return module -""" -Code is constructed based on following repositories: - https://github.com/ykamikawa/PSPNet/ - https://github.com/hujh14/PSPNet-Keras/ - https://github.com/Vladkryvoruchko/PSPNet-Keras-tensorflow/ - -And original paper of PSPNet: - https://arxiv.org/pdf/1612.01105.pdf -""" - -from keras.layers import Conv2D -from keras.layers import Activation -from keras.layers import SpatialDropout2D -from keras.models import Model - -from .blocks import PyramidPoolingModule, DUC -from ..common import Conv2DBlock -from ..common import ResizeImage -from ..utils import extract_outputs -from ..utils import to_tuple - - -def build_psp(backbone, - psp_layer, - last_upsampling_factor, - classes=21, - activation='softmax', - conv_filters=512, - pooling_type='avg', - dropout=None, - final_interpolation='bilinear', - use_batchnorm=True): - - input = backbone.input - - x = extract_outputs(backbone, [psp_layer])[0] - - x = PyramidPoolingModule( - conv_filters=conv_filters, - pooling_type=pooling_type, - use_batchnorm=use_batchnorm)(x) - - x = Conv2DBlock(512, (1, 1), activation='relu', padding='same', - use_batchnorm=use_batchnorm)(x) - - if dropout is not None: - x = SpatialDropout2D(dropout)(x) - - x = Conv2D(classes, (3, 3), padding='same', name='final_conv')(x) - - if final_interpolation == 'bilinear': - x = ResizeImage(to_tuple(last_upsampling_factor))(x) - elif final_interpolation == 'duc': - x = DUC(to_tuple(last_upsampling_factor))(x) - else: - raise ValueError('Unsupported interpolation type {}. '.format(final_interpolation) + - 'Use `duc` or `bilinear`.') - - x = Activation(activation, name=activation)(x) - - model = Model(input, x) - - return model -from .builder import build_psp -from ..utils import freeze_model -from ..utils import legacy_support -from ..backbones import get_backbone, get_feature_layers - - -def _get_layer_by_factor(backbone_name, factor): - feature_layers = get_feature_layers(backbone_name, n=3) - if factor == 4: - return feature_layers[-1] - elif factor == 8: - return feature_layers[-2] - elif factor == 16: - return feature_layers[-3] - else: - raise ValueError( - 'Unsupported factor - `{}`, Use 4, 8 or 16.'.format(factor)) - - -def _shape_guard(factor, shape): - h, w = shape[:2] - min_size = factor * 6 - - res = (h % min_size != 0 or w % min_size != 0 or - h < min_size or w < min_size) - if res: - raise ValueError('Wrong shape {}, input H and W should '.format(shape) + - 'be divisible by `{}`'.format(min_size)) - - -old_args_map = { - 'freeze_encoder': 'encoder_freeze', - 'use_batchnorm': 'psp_use_batchnorm', - 'dropout': 'psp_dropout', - 'input_tensor': None, # removed -} - - -@legacy_support(old_args_map) -def PSPNet(backbone_name='vgg16', - input_shape=(384, 384, 3), - classes=21, - activation='softmax', - encoder_weights='imagenet', - encoder_freeze=False, - downsample_factor=8, - psp_conv_filters=512, - psp_pooling_type='avg', - psp_use_batchnorm=True, - psp_dropout=None, - final_interpolation='bilinear', - **kwargs): - """PSPNet_ is a fully convolution neural network for image semantic segmentation - - Args: - backbone_name: name of classification model used as feature - extractor to build segmentation model. - input_shape: shape of input data/image ``(H, W, C)``. - ``H`` and ``W`` should be divisible by ``6 * downsample_factor`` and **NOT** ``None``! - classes: a number of classes for output (output shape - ``(h, w, classes)``). - activation: name of one of ``keras.activations`` for last model layer - (e.g. ``sigmoid``, ``softmax``, ``linear``). - encoder_weights: one of ``None`` (random initialization), ``imagenet`` (pre-training on ImageNet). - encoder_freeze: if ``True`` set all layers of encoder (backbone model) as non-trainable. - downsample_factor: one of 4, 8 and 16. Downsampling rate or in other words backbone depth - to construct PSP module on it. - psp_conv_filters: number of filters in ``Conv2D`` layer in each PSP block. - psp_pooling_type: one of 'avg', 'max'. PSP block pooling type (maximum or average). - psp_use_batchnorm: if ``True``, ``BatchNormalisation`` layer between ``Conv2D`` and ``Activation`` layers - is used. - psp_dropout: dropout rate between 0 and 1. - final_interpolation: ``duc`` or ``bilinear`` - interpolation type for final - upsampling layer. - - Returns: - ``keras.models.Model``: **PSPNet** - - .. _PSPNet: - https://arxiv.org/pdf/1612.01105.pdf - - """ - - # control image input shape - _shape_guard(downsample_factor, input_shape) - - backbone = get_backbone(backbone_name, - input_shape=input_shape, - input_tensor=None, - weights=encoder_weights, - include_top=False) - - psp_layer = _get_layer_by_factor(backbone_name, downsample_factor) - - model = build_psp(backbone, - psp_layer, - last_upsampling_factor=downsample_factor, - classes=classes, - conv_filters=psp_conv_filters, - pooling_type=psp_pooling_type, - activation=activation, - use_batchnorm=psp_use_batchnorm, - dropout=psp_dropout, - final_interpolation=final_interpolation) - - # lock encoder weights for fine-tuning - if encoder_freeze: - freeze_model(backbone) - - model.name = 'psp-{}'.format(backbone_name) - - return model -from .model import Unet -from keras.layers import Conv2DTranspose -from keras.layers import UpSampling2D -from keras.layers import Conv2D -from keras.layers import BatchNormalization -from keras.layers import Activation -from keras.layers import Concatenate - - -def handle_block_names(stage): - conv_name = 'decoder_stage{}_conv'.format(stage) - bn_name = 'decoder_stage{}_bn'.format(stage) - relu_name = 'decoder_stage{}_relu'.format(stage) - up_name = 'decoder_stage{}_upsample'.format(stage) - return conv_name, bn_name, relu_name, up_name - - -def ConvRelu(filters, kernel_size, use_batchnorm=False, conv_name='conv', bn_name='bn', relu_name='relu'): - def layer(x): - x = Conv2D(filters, kernel_size, padding="same", - name=conv_name, use_bias=not(use_batchnorm))(x) - if use_batchnorm: - x = BatchNormalization(name=bn_name)(x) - x = Activation('relu', name=relu_name)(x) - return x - return layer - - -def Upsample2D_block(filters, stage, kernel_size=(3, 3), upsample_rate=(2, 2), - use_batchnorm=False, skip=None): - - def layer(input_tensor): - - conv_name, bn_name, relu_name, up_name = handle_block_names(stage) - - x = UpSampling2D(size=upsample_rate, name=up_name)(input_tensor) - - if skip is not None: - x = Concatenate()([x, skip]) - - x = ConvRelu(filters, kernel_size, use_batchnorm=use_batchnorm, - conv_name=conv_name + '1', bn_name=bn_name + '1', relu_name=relu_name + '1')(x) - - x = ConvRelu(filters, kernel_size, use_batchnorm=use_batchnorm, - conv_name=conv_name + '2', bn_name=bn_name + '2', relu_name=relu_name + '2')(x) - - return x - return layer - - -def Transpose2D_block(filters, stage, kernel_size=(3, 3), upsample_rate=(2, 2), - transpose_kernel_size=(4, 4), use_batchnorm=False, skip=None): - - def layer(input_tensor): - - conv_name, bn_name, relu_name, up_name = handle_block_names(stage) - - x = Conv2DTranspose(filters, transpose_kernel_size, strides=upsample_rate, - padding='same', name=up_name, use_bias=not(use_batchnorm))(input_tensor) - if use_batchnorm: - x = BatchNormalization(name=bn_name+'1')(x) - x = Activation('relu', name=relu_name+'1')(x) - - if skip is not None: - x = Concatenate()([x, skip]) - - x = ConvRelu(filters, kernel_size, use_batchnorm=use_batchnorm, - conv_name=conv_name + '2', bn_name=bn_name + '2', relu_name=relu_name + '2')(x) - - return x - return layer -from keras.layers import Conv2D -from keras.layers import Activation -from keras.models import Model - -from .blocks import Transpose2D_block -from .blocks import Upsample2D_block -from ..utils import get_layer_number, to_tuple - - -def build_unet(backbone, classes, skip_connection_layers, - decoder_filters=(256, 128, 64, 32, 16), - upsample_rates=(2, 2, 2, 2, 2), - n_upsample_blocks=5, - block_type='upsampling', - activation='sigmoid', - use_batchnorm=True): - - input = backbone.input - x = backbone.output - - if block_type == 'transpose': - up_block = Transpose2D_block - else: - up_block = Upsample2D_block - - # convert layer names to indices - skip_connection_idx = ([get_layer_number(backbone, l) if isinstance(l, str) else l - for l in skip_connection_layers]) - - for i in range(n_upsample_blocks): - - # check if there is a skip connection - skip_connection = None - if i < len(skip_connection_idx): - skip_connection = backbone.layers[skip_connection_idx[i]].output - - upsample_rate = to_tuple(upsample_rates[i]) - - x = up_block(decoder_filters[i], i, upsample_rate=upsample_rate, - skip=skip_connection, use_batchnorm=use_batchnorm)(x) - - x = Conv2D(classes, (3, 3), padding='same', name='final_conv')(x) - x = Activation(activation, name=activation)(x) - - model = Model(input, x) - - return model -from __future__ import absolute_import - -from keras import backend as K -from keras.engine import InputSpec -from keras.layers import LSTM, activations, Wrapper - - -class AttentionLSTM(LSTM): - def __init__(self, output_dim, attention_vec, attn_activation='tanh', single_attention_param=False, **kwargs): - self.attention_vec = attention_vec - self.attn_activation = activations.get(attn_activation) - self.single_attention_param = single_attention_param - - super(AttentionLSTM, self).__init__(output_dim, **kwargs) - - def build(self, input_shape): - super(AttentionLSTM, self).build(input_shape) - - if hasattr(self.attention_vec, '_keras_shape'): - attention_dim = self.attention_vec._keras_shape[1] - else: - raise Exception( - 'Layer could not be build: No information about expected input shape.') - - self.U_a = self.inner_init((self.output_dim, self.output_dim), - name='{}_U_a'.format(self.name)) - self.b_a = K.zeros((self.output_dim,), name='{}_b_a'.format(self.name)) - - self.U_m = self.inner_init((attention_dim, self.output_dim), - name='{}_U_m'.format(self.name)) - self.b_m = K.zeros((self.output_dim,), name='{}_b_m'.format(self.name)) - - if self.single_attention_param: - self.U_s = self.inner_init((self.output_dim, 1), - name='{}_U_s'.format(self.name)) - self.b_s = K.zeros((1,), name='{}_b_s'.format(self.name)) - else: - self.U_s = self.inner_init((self.output_dim, self.output_dim), - name='{}_U_s'.format(self.name)) - self.b_s = K.zeros((self.output_dim,), - name='{}_b_s'.format(self.name)) - - self.trainable_weights += [self.U_a, self.U_m, - self.U_s, self.b_a, self.b_m, self.b_s] - - if self.initial_weights is not None: - self.set_weights(self.initial_weights) - del self.initial_weights - - def step(self, x, states): - h, [h, c] = super(AttentionLSTM, self).step(x, states) - attention = states[4] - - m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a) - # Intuitively it makes more sense to use a sigmoid (was getting some NaN problems - # which I think might have been caused by the exponential function -> gradients blow up) - s = K.sigmoid(K.dot(m, self.U_s) + self.b_s) - - if self.single_attention_param: - h = h * K.repeat_elements(s, self.output_dim, axis=1) - else: - h = h * s - - return h, [h, c] - - def get_constants(self, x): - constants = super(AttentionLSTM, self).get_constants(x) - constants.append(K.dot(self.attention_vec, self.U_m) + self.b_m) - return constants - - -class AttentionLSTMWrapper(Wrapper): - def __init__(self, layer, attention_vec, attn_activation='tanh', single_attention_param=False, **kwargs): - assert isinstance(layer, LSTM) - self.supports_masking = True - self.attention_vec = attention_vec - self.attn_activation = activations.get(attn_activation) - self.single_attention_param = single_attention_param - super(AttentionLSTMWrapper, self).__init__(layer, **kwargs) - - def build(self, input_shape): - assert len(input_shape) >= 3 - self.input_spec = [InputSpec(shape=input_shape)] - - if not self.layer.built: - self.layer.build(input_shape) - self.layer.built = True - - super(AttentionLSTMWrapper, self).build() - - if hasattr(self.attention_vec, '_keras_shape'): - attention_dim = self.attention_vec._keras_shape[1] - else: - raise Exception( - 'Layer could not be build: No information about expected input shape.') - - self.U_a = self.layer.inner_init( - (self.layer.output_dim, self.layer.output_dim), name='{}_U_a'.format(self.name)) - self.b_a = K.zeros((self.layer.output_dim,), - name='{}_b_a'.format(self.name)) - - self.U_m = self.layer.inner_init( - (attention_dim, self.layer.output_dim), name='{}_U_m'.format(self.name)) - self.b_m = K.zeros((self.layer.output_dim,), - name='{}_b_m'.format(self.name)) - - if self.single_attention_param: - self.U_s = self.layer.inner_init( - (self.layer.output_dim, 1), name='{}_U_s'.format(self.name)) - self.b_s = K.zeros((1,), name='{}_b_s'.format(self.name)) - else: - self.U_s = self.layer.inner_init( - (self.layer.output_dim, self.layer.output_dim), name='{}_U_s'.format(self.name)) - self.b_s = K.zeros((self.layer.output_dim,), - name='{}_b_s'.format(self.name)) - - self.trainable_weights = [self.U_a, self.U_m, - self.U_s, self.b_a, self.b_m, self.b_s] - - def get_output_shape_for(self, input_shape): - return self.layer.get_output_shape_for(input_shape) - - def step(self, x, states): - h, [h, c] = self.layer.step(x, states) - attention = states[4] - - m = self.attn_activation(K.dot(h, self.U_a) * attention + self.b_a) - s = K.sigmoid(K.dot(m, self.U_s) + self.b_s) - - if self.single_attention_param: - h = h * K.repeat_elements(s, self.layer.output_dim, axis=1) - else: - h = h * s - - return h, [h, c] - - def get_constants(self, x): - constants = self.layer.get_constants(x) - constants.append(K.dot(self.attention_vec, self.U_m) + self.b_m) - return constants - - def call(self, x, mask=None): - # input shape: (nb_samples, time (padded with zeros), input_dim) - # note that the .build() method of subclasses MUST define - # self.input_spec with a complete input shape. - input_shape = self.input_spec[0].shape - if K._BACKEND == 'tensorflow': - if not input_shape[1]: - raise Exception('When using TensorFlow, you should define ' - 'explicitly the number of timesteps of ' - 'your sequences.\n' - 'If your first layer is an Embedding, ' - 'make sure to pass it an "input_length" ' - 'argument. Otherwise, make sure ' - 'the first layer has ' - 'an "input_shape" or "batch_input_shape" ' - 'argument, including the time axis. ' - 'Found input shape at layer ' + self.name + - ': ' + str(input_shape)) - if self.layer.stateful: - initial_states = self.layer.states - else: - initial_states = self.layer.get_initial_states(x) - constants = self.get_constants(x) - preprocessed_input = self.layer.preprocess_input(x) - - last_output, outputs, states = K.rnn(self.step, preprocessed_input, - initial_states, - go_backwards=self.layer.go_backwards, - mask=mask, - constants=constants, - unroll=self.layer.unroll, - input_length=input_shape[1]) - if self.layer.stateful: - self.updates = [] - for i in range(len(states)): - self.updates.append((self.layer.states[i], states[i])) - - if self.layer.return_sequences: - return outputs - else: - return last_output -#!/usr/bin/env python - -""" -Command-line script for generating embeddings -Useful if you want to generate larger embeddings for some models -""" - -from __future__ import print_function - -from keras_models import * -from gensim.models import Word2Vec -import os -import sys -import random -import pickle -import argparse -import logging - -random.seed(42) - - -def load(path, name): - return pickle.load(open(os.path.join(path, name), 'rb')) - - -def revert(vocab, indices): - return [vocab.get(i, 'X') for i in indices] - - -try: - data_path = os.environ['INSURANCE_QA'] -except KeyError: - print('INSURANCE_QA is not set. Set it to your clone of https://github.com/codekansas/insurance_qa_python') - sys.exit(1) - -# parse arguments -parser = argparse.ArgumentParser( - description='Generate embeddings for the InsuranceQA dataset') -parser.add_argument('--iter', metavar='N', type=int, - default=10, help='number of times to run') -parser.add_argument('--size', metavar='D', type=int, - default=100, help='dimensions in embedding') -args = parser.parse_args() - -# configure logging -logger = logging.getLogger(os.path.basename(sys.argv[0])) -logging.basicConfig(format='%(asctime)s: %(levelname)s: %(message)s') -logging.root.setLevel(level=logging.INFO) -logger.info('running %s' % ' '.join(sys.argv)) - -# imports go down here because they are time-consuming - -vocab = load(data_path, 'vocabulary') - -answers = load(data_path, 'answers') -sentences = [revert(vocab, txt) for txt in answers.values()] -sentences += [revert(vocab, q['question']) for q in load(data_path, 'train')] - -# run model -model = Word2Vec(sentences, size=args.size, min_count=5, - window=5, sg=1, iter=args.iter) -weights = model.syn0 -d = dict([(k, v.index) for k, v in model.vocab.items()]) -emb = np.zeros(shape=(len(vocab)+1, args.size), dtype='float32') - -for i, w in vocab.items(): - if w not in d: - continue - emb[i, :] = weights[d[w], :] - -np.save(open('word2vec_%d_dim.embeddings' % args.size, 'wb'), emb) -logger.info('saved to "word2vec_%d_dim.embeddings"' % args.size) -from __future__ import print_function - -import os - -import sys -import random -from time import strftime, gmtime, time - -import pickle -import json - -import thread -from scipy.stats import rankdata - -random.seed(42) - - -def log(x): - print(x) - - -class Evaluator: - def __init__(self, conf, model, optimizer=None): - try: - data_path = os.environ['INSURANCE_QA'] - except KeyError: - print("INSURANCE_QA is not set. Set it to your clone of https://github.com/codekansas/insurance_qa_python") - sys.exit(1) - if isinstance(conf, str): - conf = json.load(open(conf, 'rb')) - self.model = model(conf) - self.path = data_path - self.conf = conf - self.params = conf['training'] - optimizer = self.params['optimizer'] if optimizer is None else optimizer - self.model.compile(optimizer) - self.answers = self.load('answers') # self.load('generated') - self._vocab = None - self._reverse_vocab = None - self._eval_sets = None - - ##### Resources ##### - - def load(self, name): - return pickle.load(open(os.path.join(self.path, name), 'rb')) - - def vocab(self): - if self._vocab is None: - self._vocab = self.load('vocabulary') - return self._vocab - - def reverse_vocab(self): - if self._reverse_vocab is None: - vocab = self.vocab() - self._reverse_vocab = dict((v.lower(), k) - for k, v in vocab.items()) - return self._reverse_vocab - - ##### Loading / saving ##### - - def save_epoch(self, epoch): - if not os.path.exists('models/'): - os.makedirs('models/') - self.model.save_weights( - 'models/weights_epoch_%d.h5' % epoch, overwrite=True) - - def load_epoch(self, epoch): - assert os.path.exists('models/weights_epoch_%d.h5' % - epoch), 'Weights at epoch %d not found' % epoch - self.model.load_weights('models/weights_epoch_%d.h5' % epoch) - - ##### Converting / reverting ##### - - def convert(self, words): - rvocab = self.reverse_vocab() - if type(words) == str: - words = words.strip().lower().split(' ') - return [rvocab.get(w, 0) for w in words] - - def revert(self, indices): - vocab = self.vocab() - return [vocab.get(i, 'X') for i in indices] - - ##### Padding ##### - - def padq(self, data): - return self.pad(data, self.conf.get('question_len', None)) - - def pada(self, data): - return self.pad(data, self.conf.get('answer_len', None)) - - def pad(self, data, len=None): - from keras.preprocessing.sequence import pad_sequences - return pad_sequences(data, maxlen=len, padding='post', truncating='post', value=0) - - ##### Training ##### - - def get_time(self): - return strftime('%Y-%m-%d %H:%M:%S', gmtime()) - - def train(self): - batch_size = self.params['batch_size'] - nb_epoch = self.params['nb_epoch'] - validation_split = self.params['validation_split'] - - training_set = self.load('train') - # top_50 = self.load('top_50') - - questions = list() - good_answers = list() - indices = list() - - for j, q in enumerate(training_set): - questions += [q['question']] * len(q['answers']) - good_answers += [self.answers[i] for i in q['answers']] - indices += [j] * len(q['answers']) - log('Began training at %s on %d samples' % - (self.get_time(), len(questions))) - - questions = self.padq(questions) - good_answers = self.pada(good_answers) - - val_loss = {'loss': 1., 'epoch': 0} - - # def get_bad_samples(indices, top_50): - # return [self.answers[random.choice(top_50[i])] for i in indices] - - for i in range(1, nb_epoch+1): - # sample from all answers to get bad answers - # if i % 2 == 0: - # bad_answers = self.pada(random.sample(self.answers.values(), len(good_answers))) - # else: - # bad_answers = self.pada(get_bad_samples(indices, top_50)) - bad_answers = self.pada(random.sample( - self.answers.values(), len(good_answers))) - - print('Fitting epoch %d' % i, file=sys.stderr) - hist = self.model.fit([questions, good_answers, bad_answers], epochs=1, batch_size=batch_size, - validation_split=validation_split, verbose=1) - - if hist.history['val_loss'][0] < val_loss['loss']: - val_loss = {'loss': hist.history['val_loss'][0], 'epoch': i} - log('%s -- Epoch %d ' % (self.get_time(), i) + - 'Loss = %.4f, Validation Loss = %.4f ' % (hist.history['loss'][0], hist.history['val_loss'][0]) + - '(Best: Loss = %.4f, Epoch = %d)' % (val_loss['loss'], val_loss['epoch'])) - - self.save_epoch(i) - - return val_loss - - ##### Evaluation ##### - - def prog_bar(self, so_far, total, n_bars=20): - n_complete = int(so_far * n_bars / total) - if n_complete >= n_bars - 1: - print('\r[' + '=' * n_bars + ']', end='', file=sys.stderr) - else: - s = '\r[' + '=' * (n_complete - 1) + '>' + \ - '.' * (n_bars - n_complete) + ']' - print(s, end='', file=sys.stderr) - - def eval_sets(self): - if self._eval_sets is None: - self._eval_sets = dict([(s, self.load(s)) - for s in ['dev', 'test1', 'test2']]) - return self._eval_sets - - def get_score(self, verbose=False): - top1_ls = [] - mrr_ls = [] - for name, data in self.eval_sets().items(): - print('----- %s -----' % name) - - random.shuffle(data) - - if 'n_eval' in self.params: - data = data[:self.params['n_eval']] - - c_1, c_2 = 0, 0 - - for i, d in enumerate(data): - self.prog_bar(i, len(data)) - - indices = d['good'] + d['bad'] - answers = self.pada([self.answers[i] for i in indices]) - question = self.padq([d['question']] * len(indices)) - - sims = self.model.predict([question, answers]) - - n_good = len(d['good']) - max_r = np.argmax(sims) - max_n = np.argmax(sims[:n_good]) - - r = rankdata(sims, method='max') - - if verbose: - min_r = np.argmin(sims) - amin_r = self.answers[indices[min_r]] - amax_r = self.answers[indices[max_r]] - amax_n = self.answers[indices[max_n]] - - print(' '.join(self.revert(d['question']))) - print('Predicted: ({}) '.format( - sims[max_r]) + ' '.join(self.revert(amax_r))) - print('Expected: ({}) Rank = {} '.format( - sims[max_n], r[max_n]) + ' '.join(self.revert(amax_n))) - print('Worst: ({})'.format( - sims[min_r]) + ' '.join(self.revert(amin_r))) - - c_1 += 1 if max_r == max_n else 0 - c_2 += 1 / float(r[max_r] - r[max_n] + 1) - - top1 = c_1 / float(len(data)) - mrr = c_2 / float(len(data)) - - del data - print('Top-1 Precision: %f' % top1) - print('MRR: %f' % mrr) - top1_ls.append(top1) - mrr_ls.append(mrr) - return top1_ls, mrr_ls - - -if __name__ == '__main__': - if len(sys.argv) >= 2 and sys.argv[1] == 'serve': - from flask import Flask - app = Flask(__name__) - port = 5000 - lines = list() - - def log(x): - lines.append(x) - - @app.route('/') - def home(): - return ('

Training Log

' + - ''.join(['{}
'.format(line) for line in lines]) + - '') - - def start_server(): - app.run(debug=False, use_evalex=False, port=port) - - thread.start_new_thread(start_server, tuple()) - print('Serving to port %d' % port, file=sys.stderr) - - import numpy as np - - conf = { - 'n_words': 22353, - 'question_len': 150, - 'answer_len': 150, - 'margin': 0.009, - 'initial_embed_weights': 'word2vec_100_dim.embeddings', - - 'training': { - 'batch_size': 100, - 'nb_epoch': 2000, - 'validation_split': 0.1, - }, - - 'similarity': { - 'mode': 'cosine', - 'gamma': 1, - 'c': 1, - 'd': 2, - 'dropout': 0.5, - } - } - - from keras_models import EmbeddingModel, ConvolutionModel, ConvolutionalLSTM - evaluator = Evaluator(conf, model=ConvolutionModel, optimizer='adam') - - # train the model - best_loss = evaluator.train() - - # evaluate mrr for a particular epoch - evaluator.load_epoch(best_loss['epoch']) - top1, mrr = evaluator.get_score(verbose=False) - log(' - Top-1 Precision:') - log(' - %.3f on test 1' % top1[0]) - log(' - %.3f on test 2' % top1[1]) - log(' - %.3f on dev' % top1[2]) - log(' - MRR:') - log(' - %.3f on test 1' % mrr[0]) - log(' - %.3f on test 2' % mrr[1]) - log(' - %.3f on dev' % mrr[2]) -from __future__ import print_function - -from abc import abstractmethod - -from keras.engine import Input -from keras.layers import merge, Embedding, Dropout, Conv1D, Lambda, LSTM, Dense, concatenate, TimeDistributed -from keras import backend as K -from keras.models import Model - -import numpy as np - - -class LanguageModel: - def __init__(self, config): - self.question = Input( - shape=(config['question_len'],), dtype='int32', name='question_base') - self.answer_good = Input( - shape=(config['answer_len'],), dtype='int32', name='answer_good_base') - self.answer_bad = Input( - shape=(config['answer_len'],), dtype='int32', name='answer_bad_base') - - self.config = config - self.params = config.get('similarity', dict()) - - # initialize a bunch of variables that will be set later - self._models = None - self._similarities = None - self._answer = None - self._qa_model = None - - self.training_model = None - self.prediction_model = None - - def get_answer(self): - if self._answer is None: - self._answer = Input( - shape=(self.config['answer_len'],), dtype='int32', name='answer') - return self._answer - - @abstractmethod - def build(self): - return - - def get_similarity(self): - ''' Specify similarity in configuration under 'similarity' -> 'mode' - If a parameter is needed for the model, specify it in 'similarity' - - Example configuration: - - config = { - ... other parameters ... - 'similarity': { - 'mode': 'gesd', - 'gamma': 1, - 'c': 1, - } - } - - cosine: dot(a, b) / sqrt(dot(a, a) * dot(b, b)) - polynomial: (gamma * dot(a, b) + c) ^ d - sigmoid: tanh(gamma * dot(a, b) + c) - rbf: exp(-gamma * l2_norm(a-b) ^ 2) - euclidean: 1 / (1 + l2_norm(a - b)) - exponential: exp(-gamma * l2_norm(a - b)) - gesd: euclidean * sigmoid - aesd: (euclidean + sigmoid) / 2 - ''' - - params = self.params - similarity = params['mode'] - - def dot(a, b): return K.batch_dot(a, b, axes=1) - def l2_norm(a, b): return K.sqrt( - K.sum(K.square(a - b), axis=1, keepdims=True)) - - if similarity == 'cosine': - return lambda x: dot(x[0], x[1]) / K.maximum(K.sqrt(dot(x[0], x[0]) * dot(x[1], x[1])), K.epsilon()) - elif similarity == 'polynomial': - return lambda x: (params['gamma'] * dot(x[0], x[1]) + params['c']) ** params['d'] - elif similarity == 'sigmoid': - return lambda x: K.tanh(params['gamma'] * dot(x[0], x[1]) + params['c']) - elif similarity == 'rbf': - return lambda x: K.exp(-1 * params['gamma'] * l2_norm(x[0], x[1]) ** 2) - elif similarity == 'euclidean': - return lambda x: 1 / (1 + l2_norm(x[0], x[1])) - elif similarity == 'exponential': - return lambda x: K.exp(-1 * params['gamma'] * l2_norm(x[0], x[1])) - elif similarity == 'gesd': - def euclidean(x): return 1 / (1 + l2_norm(x[0], x[1])) - def sigmoid(x): return 1 / (1 + K.exp(-1 * - params['gamma'] * (dot(x[0], x[1]) + params['c']))) - return lambda x: euclidean(x) * sigmoid(x) - elif similarity == 'aesd': - def euclidean(x): return 0.5 / (1 + l2_norm(x[0], x[1])) - def sigmoid(x): return 0.5 / (1 + K.exp(-1 * - params['gamma'] * (dot(x[0], x[1]) + params['c']))) - return lambda x: euclidean(x) + sigmoid(x) - else: - raise Exception('Invalid similarity: {}'.format(similarity)) - - def get_qa_model(self): - if self._models is None: - self._models = self.build() - - if self._qa_model is None: - question_output, answer_output = self._models - dropout = Dropout(self.params.get('dropout', 0.2)) - similarity = self.get_similarity() - # qa_model = merge([dropout(question_output), dropout(answer_output)], - # mode=similarity, output_shape=lambda _: (None, 1)) - qa_model = Lambda(similarity, output_shape=lambda _: (None, 1))([dropout(question_output), - dropout(answer_output)]) - self._qa_model = Model( - inputs=[self.question, self.get_answer()], outputs=qa_model, name='qa_model') - - return self._qa_model - - def compile(self, optimizer, **kwargs): - qa_model = self.get_qa_model() - - good_similarity = qa_model([self.question, self.answer_good]) - bad_similarity = qa_model([self.question, self.answer_bad]) - - # loss = merge([good_similarity, bad_similarity], - # mode=lambda x: K.relu(self.config['margin'] - x[0] + x[1]), - # output_shape=lambda x: x[0]) - - loss = Lambda(lambda x: K.relu(self.config['margin'] - x[0] + x[1]), - output_shape=lambda x: x[0])([good_similarity, bad_similarity]) - - self.prediction_model = Model(inputs=[self.question, self.answer_good], outputs=good_similarity, - name='prediction_model') - self.prediction_model.compile( - loss=lambda y_true, y_pred: y_pred, optimizer=optimizer, **kwargs) - - self.training_model = Model(inputs=[self.question, self.answer_good, self.answer_bad], outputs=loss, - name='training_model') - self.training_model.compile( - loss=lambda y_true, y_pred: y_pred, optimizer=optimizer, **kwargs) - - def fit(self, x, **kwargs): - assert self.training_model is not None, 'Must compile the model before fitting data' - y = np.zeros(shape=(x[0].shape[0],)) # doesn't get used - return self.training_model.fit(x, y, **kwargs) - - def predict(self, x): - assert self.prediction_model is not None and isinstance( - self.prediction_model, Model) - return self.prediction_model.predict_on_batch(x) - - def save_weights(self, file_name, **kwargs): - assert self.prediction_model is not None, 'Must compile the model before saving weights' - self.prediction_model.save_weights(file_name, **kwargs) - - def load_weights(self, file_name, **kwargs): - assert self.prediction_model is not None, 'Must compile the model loading weights' - self.prediction_model.load_weights(file_name, **kwargs) - - -class EmbeddingModel(LanguageModel): - def build(self): - question = self.question - answer = self.get_answer() - - # add embedding layers - weights = np.load(self.config['initial_embed_weights']) - embedding = Embedding(input_dim=self.config['n_words'], - output_dim=weights.shape[1], - mask_zero=True, - # dropout=0.2, - weights=[weights]) - question_embedding = embedding(question) - answer_embedding = embedding(answer) - - # maxpooling - maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), - output_shape=lambda x: (x[0], x[2])) - maxpool.supports_masking = True - question_pool = maxpool(question_embedding) - answer_pool = maxpool(answer_embedding) - - return question_pool, answer_pool - - -class ConvolutionModel(LanguageModel): - def build(self): - assert self.config['question_len'] == self.config['answer_len'] - - question = self.question - answer = self.get_answer() - - # add embedding layers - weights = np.load(self.config['initial_embed_weights']) - embedding = Embedding(input_dim=self.config['n_words'], - output_dim=weights.shape[1], - weights=[weights]) - question_embedding = embedding(question) - answer_embedding = embedding(answer) - - hidden_layer = TimeDistributed(Dense(200, activation='tanh')) - - question_hl = hidden_layer(question_embedding) - answer_hl = hidden_layer(answer_embedding) - - # cnn - cnns = [Conv1D(kernel_size=kernel_size, - filters=1000, - activation='tanh', - padding='same') for kernel_size in [2, 3, 5, 7]] - # question_cnn = merge([cnn(question_embedding) for cnn in cnns], mode='concat') - question_cnn = concatenate([cnn(question_hl) for cnn in cnns], axis=-1) - # answer_cnn = merge([cnn(answer_embedding) for cnn in cnns], mode='concat') - answer_cnn = concatenate([cnn(answer_hl) for cnn in cnns], axis=-1) - - # maxpooling - maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), - output_shape=lambda x: (x[0], x[2])) - maxpool.supports_masking = True - # enc = Dense(100, activation='tanh') - # question_pool = enc(maxpool(question_cnn)) - # answer_pool = enc(maxpool(answer_cnn)) - question_pool = maxpool(question_cnn) - answer_pool = maxpool(answer_cnn) - - return question_pool, answer_pool - - -class ConvolutionalLSTM(LanguageModel): - def build(self): - question = self.question - answer = self.get_answer() - - # add embedding layers - weights = np.load(self.config['initial_embed_weights']) - embedding = Embedding(input_dim=self.config['n_words'], - output_dim=weights.shape[1], - weights=[weights]) - question_embedding = embedding(question) - answer_embedding = embedding(answer) - - f_rnn = LSTM(141, return_sequences=True, implementation=1) - b_rnn = LSTM(141, return_sequences=True, - implementation=1, go_backwards=True) - - qf_rnn = f_rnn(question_embedding) - qb_rnn = b_rnn(question_embedding) - # question_pool = merge([qf_rnn, qb_rnn], mode='concat', concat_axis=-1) - question_pool = concatenate([qf_rnn, qb_rnn], axis=-1) - - af_rnn = f_rnn(answer_embedding) - ab_rnn = b_rnn(answer_embedding) - # answer_pool = merge([af_rnn, ab_rnn], mode='concat', concat_axis=-1) - answer_pool = concatenate([af_rnn, ab_rnn], axis=-1) - - # cnn - cnns = [Conv1D(kernel_size=kernel_size, - filters=500, - activation='tanh', - padding='same') for kernel_size in [1, 2, 3, 5]] - # question_cnn = merge([cnn(question_pool) for cnn in cnns], mode='concat') - question_cnn = concatenate([cnn(question_pool) - for cnn in cnns], axis=-1) - # answer_cnn = merge([cnn(answer_pool) for cnn in cnns], mode='concat') - answer_cnn = concatenate([cnn(answer_pool) for cnn in cnns], axis=-1) - - maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), - output_shape=lambda x: (x[0], x[2])) - maxpool.supports_masking = True - question_pool = maxpool(question_cnn) - answer_pool = maxpool(answer_cnn) - - return question_pool, answer_pool - - -class AttentionModel(LanguageModel): - def build(self): - question = self.question - answer = self.get_answer() - - # add embedding layers - weights = np.load(self.config['initial_embed_weights']) - embedding = Embedding(input_dim=self.config['n_words'], - output_dim=weights.shape[1], - # mask_zero=True, - weights=[weights]) - question_embedding = embedding(question) - answer_embedding = embedding(answer) - - # question rnn part - f_rnn = LSTM(141, return_sequences=True, consume_less='mem') - b_rnn = LSTM(141, return_sequences=True, - consume_less='mem', go_backwards=True) - question_f_rnn = f_rnn(question_embedding) - question_b_rnn = b_rnn(question_embedding) - - # maxpooling - maxpool = Lambda(lambda x: K.max(x, axis=1, keepdims=False), - output_shape=lambda x: (x[0], x[2])) - maxpool.supports_masking = True - question_pool = merge([maxpool(question_f_rnn), maxpool( - question_b_rnn)], mode='concat', concat_axis=-1) - - # answer rnn part - from attention_lstm import AttentionLSTMWrapper - f_rnn = AttentionLSTMWrapper( - f_rnn, question_pool, single_attention_param=True) - b_rnn = AttentionLSTMWrapper( - b_rnn, question_pool, single_attention_param=True) - - answer_f_rnn = f_rnn(answer_embedding) - answer_b_rnn = b_rnn(answer_embedding) - answer_pool = merge([maxpool(answer_f_rnn), maxpool( - answer_b_rnn)], mode='concat', concat_axis=-1) - - return question_pool, answer_pool -import itertools - -from keras.layers import Activation, Reshape, Lambda, concatenate, dot, add -from keras.layers import Conv1D, Conv2D, Conv3D -from keras.layers import MaxPool1D - -from keras.callbacks import Callback, TensorBoard -from keras.engine.topology import Layer -from keras import backend as K - -''' Callbacks ''' - - -class HistoryCheckpoint(Callback): - '''Callback that records events - into a `History` object. - - It then saves the history after each epoch into a file. - To read the file into a python dict: - history = {} - with open(filename, "r") as f: - history = eval(f.read()) - - This may be unsafe since eval() will evaluate any string - A safer alternative: - - import ast - - history = {} - with open(filename, "r") as f: - history = ast.literal_eval(f.read()) - - ''' - - def __init__(self, filename): - super(Callback, self).__init__() - self.filename = filename - - def on_train_begin(self, logs={}): - self.epoch = [] - self.history = {} - - def on_epoch_end(self, epoch, logs={}): - self.epoch.append(epoch) - for k, v in logs.items(): - if k not in self.history: - self.history[k] = [] - self.history[k].append(v) - - with open(self.filename, "w") as f: - f.write(str(self.history)) - - -''' -Below is a modification to the TensorBoard callback to perform -batchwise writing to the tensorboard, instead of only at the end -of the batch. -''' - - -class TensorBoardBatch(TensorBoard): - def __init__(self, log_dir='./logs', - histogram_freq=0, - batch_size=32, - write_graph=True, - write_grads=False, - write_images=False, - embeddings_freq=0, - embeddings_layer_names=None, - embeddings_metadata=None): - super(TensorBoardBatch, self).__init__(log_dir, - histogram_freq=histogram_freq, - batch_size=batch_size, - write_graph=write_graph, - write_grads=write_grads, - write_images=write_images, - embeddings_freq=embeddings_freq, - embeddings_layer_names=embeddings_layer_names, - embeddings_metadata=embeddings_metadata) - - # conditionally import tensorflow iff TensorBoardBatch is created - self.tf = __import__('tensorflow') - self.global_step = 1 - - def on_batch_end(self, batch, logs=None): - logs = logs or {} - - for name, value in logs.items(): - if name in ['batch', 'size']: - continue - summary = self.tf.Summary() - summary_value = summary.value.add() - summary_value.simple_value = value.item() - summary_value.tag = name - self.writer.add_summary(summary, self.global_step) - self.global_step += 1 - - self.writer.flush() - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - - for name, value in logs.items(): - if name in ['batch', 'size']: - continue - summary = self.tf.Summary() - summary_value = summary.value.add() - summary_value.simple_value = value.item() - summary_value.tag = name - self.writer.add_summary(summary, self.global_step) - - self.global_step += 1 - self.writer.flush() - - -''' Theano Backend function ''' - - -def depth_to_scale(x, scale, output_shape, dim_ordering=K.image_dim_ordering(), name=None): - ''' Uses phase shift algorithm [1] to convert channels/depth for spacial resolution ''' - - import theano.tensor as T - - scale = int(scale) - - if dim_ordering == "tf": - x = x.transpose((0, 3, 1, 2)) - out_row, out_col, out_channels = output_shape - else: - out_channels, out_row, out_col = output_shape - - b, k, r, c = x.shape - out_b, out_k, out_r, out_c = b, k // (scale * scale), r * scale, c * scale - - out = K.reshape(x, (out_b, out_k, out_r, out_c)) - - for channel in range(out_channels): - channel += 1 - - for i in range(out_row): - for j in range(out_col): - a = i // scale # T.floor(i / scale).astype('int32') - b = j // scale # T.floor(j / scale).astype('int32') - d = channel * scale * (j % scale) + channel * (i % scale) - - T.set_subtensor(out[:, channel - 1, i, j], - x[:, d, a, b], inplace=True) - - if dim_ordering == 'tf': - out = out.transpose((0, 2, 3, 1)) - - return out - - -''' Theano Backend function ''' - - -def depth_to_scale_th(input, scale, channels): - ''' Uses phase shift algorithm [1] to convert channels/depth for spacial resolution ''' - import theano.tensor as T - - b, k, row, col = input.shape - output_shape = (b, channels, row * scale, col * scale) - - out = T.zeros(output_shape) - r = scale - - for y, x in itertools.product(range(scale), repeat=2): - out = T.inc_subtensor(out[:, :, y::r, x::r], - input[:, r * y + x:: r * r, :, :]) - - return out - - -''' Tensorflow Backend Function ''' - - -def depth_to_scale_tf(input, scale, channels): - try: - import tensorflow as tf - except ImportError: - print("Could not import Tensorflow for depth_to_scale operation. Please install Tensorflow or switch to Theano backend") - exit() - - def _phase_shift(I, r): - ''' Function copied as is from https://github.com/Tetrachrome/subpixel/blob/master/subpixel.py''' - - bsize, a, b, c = I.get_shape().as_list() - # Handling Dimension(None) type for undefined batch dim - bsize = tf.shape(I)[0] - X = tf.reshape(I, (bsize, a, b, r, r)) - X = tf.transpose(X, (0, 1, 2, 4, 3)) # bsize, a, b, 1, 1 - X = tf.split(1, a, X) # a, [bsize, b, r, r] - X = tf.concat(2, [tf.squeeze(x) for x in X]) # bsize, b, a*r, r - X = tf.split(1, b, X) # b, [bsize, a*r, r] - X = tf.concat(2, [tf.squeeze(x) for x in X]) # bsize, a*r, b*r - return tf.reshape(X, (bsize, a * r, b * r, 1)) - - if channels > 1: - Xc = tf.split(3, 3, input) - X = tf.concat(3, [_phase_shift(x, scale) for x in Xc]) - else: - X = _phase_shift(input, scale) - return X - - -''' -Implementation is incomplete. Use lambda layer for now. -''' - - -class SubPixelUpscaling(Layer): - - def __init__(self, r, channels, **kwargs): - super(SubPixelUpscaling, self).__init__(**kwargs) - - self.r = r - self.channels = channels - - def build(self, input_shape): - pass - - def call(self, x, mask=None): - if K.backend() == "theano": - y = depth_to_scale_th(x, self.r, self.channels) - else: - y = depth_to_scale_tf(x, self.r, self.channels) - return y - - def get_output_shape_for(self, input_shape): - if K.image_dim_ordering() == "th": - b, k, r, c = input_shape - return (b, self.channels, r * self.r, c * self.r) - else: - b, r, c, k = input_shape - return (b, r * self.r, c * self.r, self.channels) - - -''' Non Local Blocks ''' - - -def non_local_block(ip, computation_compression=2, mode='embedded'): - channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 - ip_shape = K.int_shape(ip) - - if mode not in ['gaussian', 'embedded', 'dot', 'concatenate']: - raise ValueError( - '`mode` must be one of `gaussian`, `embedded`, `dot` or `concatenate`') - - dim1, dim2, dim3 = None, None, None - - if len(ip_shape) == 3: # time series data - rank = 3 - batchsize, dim1, channels = ip_shape - - elif len(ip_shape) == 4: # image data - rank = 4 - - if channel_dim == 1: - batchsize, channels, dim1, dim2 = ip_shape - else: - batchsize, dim1, dim2, channels = ip_shape - - elif len(ip_shape) == 5: # Video / Voxel data - rank = 5 - - if channel_dim == 1: - batchsize, channels, dim1, dim2, dim3 = ip_shape - else: - batchsize, dim1, dim2, dim3, channels = ip_shape - - else: - raise ValueError( - 'Input dimension has to be either 3 (temporal), 4 (spatial) or 5 (spatio-temporal)') - - if mode == 'gaussian': # Gaussian instantiation - x1 = Reshape((-1, channels))(ip) # xi - x2 = Reshape((-1, channels))(ip) # xj - f = dot([x1, x2], axes=2) - f = Activation('softmax')(f) - - elif mode == 'dot': # Dot instantiation - # theta path - theta = _convND(ip, rank, channels // 2) - theta = Reshape((-1, channels // 2))(theta) - - # phi path - phi = _convND(ip, rank, channels // 2) - phi = Reshape((-1, channels // 2))(phi) - - f = dot([theta, phi], axes=2) - - # scale the values to make it size invariant - if batchsize is not None: - f = Lambda(lambda z: 1. / batchsize * z)(f) - else: - f = Lambda(lambda z: 1. / 128 * z)(f) - - elif mode == 'concatenate': # Concatenation instantiation - raise NotImplemented('Concatenation mode has not been implemented yet') - - else: # Embedded Gaussian instantiation - # theta path - theta = _convND(ip, rank, channels // 2) - theta = Reshape((-1, channels // 2))(theta) - - # phi path - phi = _convND(ip, rank, channels // 2) - phi = Reshape((-1, channels // 2))(phi) - - if computation_compression > 1: - # shielded computation - phi = MaxPool1D(computation_compression)(phi) - - f = dot([theta, phi], axes=2) - f = Activation('softmax')(f) - - # g path - g = _convND(ip, rank, channels // 2) - g = Reshape((-1, channels // 2))(g) - - if computation_compression > 1 and mode == 'embedded': - # shielded computation - g = MaxPool1D(computation_compression)(g) - - # compute output path - y = dot([f, g], axes=[2, 1]) - - # reshape to input tensor format - if rank == 3: - y = Reshape((dim1, channels // 2))(y) - elif rank == 4: - if channel_dim == -1: - y = Reshape((dim1, dim2, channels // 2))(y) - else: - y = Reshape((channels // 2, dim1, dim2))(y) - else: - if channel_dim == -1: - y = Reshape((dim1, dim2, dim3, channels // 2))(y) - else: - y = Reshape((channels // 2, dim1, dim2, dim3))(y) - - # project filters - y = _convND(y, rank, channels) - - # residual connection - residual = add([ip, y]) - - return residual - - -def _convND(ip, rank, channels): - assert rank in [3, 4, 5], "Rank of input must be 3, 4 or 5" - - if rank == 3: - x = Conv1D(channels, 1, padding='same', use_bias=False)(ip) - elif rank == 4: - x = Conv2D(channels, (1, 1), padding='same', use_bias=False)(ip) - else: - x = Conv3D(channels, (1, 1, 1), padding='same', use_bias=False)(ip) - return x -from keras.models import Model -from keras.callbacks import ModelCheckpoint -from keras import backend as K - -import models -import img_utils -from advanced import HistoryCheckpoint, TensorBoardBatch - -scale_factor = 2 -batchsize = 128 -nb_epochs = 50 - -teacher_model = models.DistilledResNetSR(scale_factor) -teacher_model.create_model(load_weights=True) -teacher_model.model.summary() - -print("\n\n\n") - -teacher_output_tensor = teacher_model.model.layers[-1].output - -for layer in teacher_model.model.layers: - layer.trainable = False - -student_model = models.DistilledResNetSR(scale_factor) -student_model.create_model() -student_model.model.summary() - - -def zero_loss(y_true, y_pred): - return 0 * y_true - - -def gram_matrix(x): - assert K.ndim(x) == 4 - - with K.name_scope('gram_matrix'): - if K.image_data_format() == "channels_first": - batch, channels, width, height = K.int_shape(x) - features = K.batch_flatten(x) - else: - batch, width, height, channels = K.int_shape(x) - features = K.batch_flatten(K.permute_dimensions(x, (0, 3, 1, 2))) - - gram = K.dot(features, K.transpose(features)) / \ - (channels * width * height) - return gram - - -joint_model = Model(inputs=[student_model.model.input, teacher_model.model.input], - outputs=student_model.model.output) - -student_output_tensor = joint_model.layers[-1].output - -# teacher - student l2 loss -with K.name_scope('l2_loss'): - l2_weight = 1e-3 - teacher_student_loss = K.sum(K.square( - teacher_output_tensor - student_output_tensor)) # l2 norm of difference -joint_model.add_loss(l2_weight * teacher_student_loss) - -# perceptual loss -with K.name_scope('perceptual_loss'): - perceptual_weight = 2. - perceptual_loss = K.sum(K.square(gram_matrix( - teacher_output_tensor) - gram_matrix(student_output_tensor))) -joint_model.add_loss(perceptual_weight * perceptual_loss) - -joint_model.compile(optimizer='adam', loss=zero_loss) - -# train student model using teacher model -samples_per_epoch = img_utils.image_count() -val_count = img_utils.val_image_count() - -weight_path = 'weights/joint_model (%s) %dX.h5' % ( - teacher_model.model_name, scale_factor) -history_fn = 'Joint_model_training.txt' - -train_path = img_utils.output_path -validation_path = img_utils.validation_output_path -path_X = img_utils.output_path + "X/" -path_Y = img_utils.output_path + "y/" - -callback_list = [ModelCheckpoint(weight_path, monitor='val_loss', save_best_only=True, - mode='min', save_weights_only=True, verbose=2), - TensorBoardBatch('./distillation_logs_%s/' % - teacher_model.model_name), - HistoryCheckpoint(history_fn), - ] - -print("Training model : %s" % ("Joint Model")) -joint_model.fit_generator(img_utils.image_generator(train_path, scale_factor=scale_factor, - small_train_images=teacher_model.type_true_upscaling, - batch_size=batchsize, - nb_inputs=2), # 2 input joint model - steps_per_epoch=samples_per_epoch // batchsize + 1, - epochs=nb_epochs, callbacks=callback_list, - validation_data=img_utils.image_generator(validation_path, - scale_factor=scale_factor, - small_train_images=teacher_model.type_true_upscaling, - batch_size=val_count, - nb_inputs=2), # 2 input joint model - validation_steps=1) - -student_model.model.save_weights( - 'weights/student_model_final %dX.h5' % scale_factor, overwrite=True) -from __future__ import print_function, division, absolute_import - -import numpy as np -from scipy.misc import imsave, imread, imresize -from sklearn.feature_extraction.image import reconstruct_from_patches_2d, extract_patches_2d -from scipy.ndimage.filters import gaussian_filter - -from keras import backend as K - -import os -import time - -''' -_image_scale_multiplier is a special variable which is used to alter image size. - -The default image size is 32x32. If a true upscaling model is used, then the input image size is 16x16, -which not offer adequate training samples. -''' -_image_scale_multiplier = 1 - -img_size = 128 * _image_scale_multiplier -stride = 64 * _image_scale_multiplier - -assert (img_size ** 2) % (stride ** 2) == 0, "Number of images generated from strided subsample of the image needs to be \n" \ - "a positive integer. Change stride such that : \n" \ - "(img_size ** 2) / (stride ** 2) is a positive integer." - -input_path = r"D:\Yue\Documents\Datasets\train2014\train2014\\" # r"input_images/" -# r"D:\Yue\Documents\Datasets\MSCOCO\val\valset\\" # r"val_images/" -validation_path = r"val_images/" - -validation_set5_path = validation_path + "set5/" -validation_set14_path = validation_path + "set14/" - -base_dataset_dir = os.path.expanduser("~") + "/Image Super Resolution Dataset/" - -output_path = base_dataset_dir + "train_images/train/" -validation_output_path = base_dataset_dir + r"train_images/validation/" - -if not os.path.exists(output_path): - os.makedirs(output_path) - -# def transform_images(directory, output_directory, scaling_factor=2, max_nb_images=-1, true_upscale=False): -# index = 1 -# -# if not os.path.exists(output_directory + "X/"): -# os.makedirs(output_directory + "X/") -# -# if not os.path.exists(output_directory + "y/"): -# os.makedirs(output_directory + "y/") -# -# # For each image in input_images directory -# nb_images = len([name for name in os.listdir(directory)]) -# -# if max_nb_images != -1: -# print("Transforming %d images." % max_nb_images) -# else: -# assert max_nb_images <= nb_images, "Max number of images must be less than number of images in path" -# print("Transforming %d images." % (nb_images)) -# -# if nb_images == 0: -# print("Extract the training images or images from imageset_91.zip (found in the releases of the project) " -# "into a directory with the name 'input_images'") -# print("Extract the validation images or images from set5_validation.zip (found in the releases of the project) " -# "into a directory with the name 'val_images'") -# exit() -# -# for file in os.listdir(directory): -# img = imread(directory + file, mode='RGB') -# -# # Resize to 256 x 256 -# img = imresize(img, (img_size, img_size)) -# -# # Create patches -# hr_patch_size = (16 * scaling_factor * _image_scale_multiplier) -# nb_hr_images = (img_size ** 2) // (stride ** 2) -# -# hr_samples = np.empty((nb_hr_images, hr_patch_size, hr_patch_size, 3)) -# -# image_subsample_iterator = subimage_generator(img, stride, hr_patch_size, nb_hr_images) -# -# stride_range = np.sqrt(nb_hr_images).astype(int) -# -# i = 0 -# for j in range(stride_range): -# for k in range(stride_range): -# hr_samples[i, :, :, :] = next(image_subsample_iterator) -# i += 1 -# -# lr_patch_size = 16 * _image_scale_multiplier -# -# t1 = time.time() -# # Create nb_hr_images 'X' and 'Y' sub-images of size hr_patch_size for each patch -# for i in range(nb_hr_images): -# ip = hr_samples[i] -# # Save ground truth image X -# imsave(output_directory + "/y/" + "%d_%d.png" % (index, i + 1), ip) -# -# # Apply Gaussian Blur to Y -# op = gaussian_filter(ip, sigma=0.5) -# -# # Subsample by scaling factor to Y -# op = imresize(op, (lr_patch_size, lr_patch_size), interp='bicubic') -# -# if not true_upscale: -# # Upscale by scaling factor to Y -# op = imresize(op, (hr_patch_size, hr_patch_size), interp='bicubic') -# -# # Save Y -# imsave(output_directory + "/X/" + "%d_%d.png" % (index, i+1), op) -# -# print("Finished image %d in time %0.2f seconds. (%s)" % (index, time.time() - t1, file)) -# index += 1 -# -# if max_nb_images > 0 and index >= max_nb_images: -# print("Transformed maximum number of images. ") -# break -# -# print("Images transformed. Saved at directory : %s" % (output_directory)) - - -def transform_images_temp(directory, output_directory, scaling_factor=2, max_nb_images=-1, true_upscale=False, - id_advance=0): - index = 1 - - if not os.path.exists(output_directory + "X/"): - os.makedirs(output_directory + "X/") - - if not os.path.exists(output_directory + "y/"): - os.makedirs(output_directory + "y/") - - # For each image in input_images directory - nb_images = len([name for name in os.listdir(directory)]) - - if max_nb_images != -1: - print("Transforming %d images." % max_nb_images) - else: - assert max_nb_images <= nb_images, "Max number of images must be less than number of images in path" - print("Transforming %d images." % (nb_images)) - - if nb_images == 0: - print("Extract the training images or images from imageset_91.zip (found in the releases of the project) " - "into a directory with the name 'input_images'") - print("Extract the validation images or images from set5_validation.zip (found in the releases of the project) " - "into a directory with the name 'val_images'") - exit() - - for file in os.listdir(directory): - img = imread(directory + file, mode='RGB') - - # Resize to 256 x 256 - img = imresize(img, (img_size, img_size)) - - # Create patches - hr_patch_size = 64 - lr_patch_size = 32 - nb_hr_images = (img_size ** 2) // (stride ** 2) - - hr_samples = np.empty((nb_hr_images, hr_patch_size, hr_patch_size, 3)) - - image_subsample_iterator = subimage_generator( - img, stride, hr_patch_size, nb_hr_images) - - stride_range = np.sqrt(nb_hr_images).astype(int) - - i = 0 - for j in range(stride_range): - for k in range(stride_range): - hr_samples[i, :, :, :] = next(image_subsample_iterator) - i += 1 - - t1 = time.time() - # Create nb_hr_images 'X' and 'Y' sub-images of size hr_patch_size for each patch - for i in range(nb_hr_images): - ip = hr_samples[i] - # Save ground truth image X - imsave(output_directory + "/y/" + "%d_%d.png" % - (index + id_advance, i + 1), ip) - - # Apply Gaussian Blur to Y - #op = gaussian_filter(ip, sigma=0.5) - - # Subsample by scaling factor to Y - op = imresize(ip, (lr_patch_size, lr_patch_size), interp='bicubic') - - if not true_upscale: - # Upscale by scaling factor to Y - op = imresize(op, (hr_patch_size, hr_patch_size), - interp='bicubic') - - # Save Y - imsave(output_directory + "/X/" + "%d_%d.png" % - (index + id_advance, id_advance + i + 1), op) - - print("Finished image %d in time %0.2f seconds. (%s)" % - (index + id_advance, time.time() - t1, file)) - index += 1 - - if max_nb_images > 0 and index >= max_nb_images: - print("Transformed maximum number of images. ") - break - - print("Images transformed. Saved at directory : %s" % (output_directory)) - - -def image_count(): - return len([name for name in os.listdir(output_path + "X/")]) - - -def val_image_count(): - return len([name for name in os.listdir(validation_output_path + "X/")]) - - -def subimage_generator(img, stride, patch_size, nb_hr_images): - for _ in range(nb_hr_images): - for x in range(0, img_size, stride): - for y in range(0, img_size, stride): - subimage = img[x: x + patch_size, y: y + patch_size, :] - - yield subimage - - -def make_patches(x, scale, patch_size, upscale=True, verbose=1): - '''x shape: (num_channels, rows, cols)''' - height, width = x.shape[:2] - if upscale: - x = imresize(x, (height * scale, width * scale)) - patches = extract_patches_2d(x, (patch_size, patch_size)) - return patches - - -def combine_patches(in_patches, out_shape, scale): - '''Reconstruct an image from these `patches`''' - recon = reconstruct_from_patches_2d(in_patches, out_shape) - return recon - - -def image_generator(directory, scale_factor=2, target_shape=None, channels=3, small_train_images=False, shuffle=True, - batch_size=32, nb_inputs=1, seed=None): - if not target_shape: - if small_train_images: - if K.image_dim_ordering() == "th": - image_shape = (channels, 16 * _image_scale_multiplier, - 16 * _image_scale_multiplier) - y_image_shape = (channels, 16 * scale_factor * _image_scale_multiplier, - 16 * scale_factor * _image_scale_multiplier) - else: - # image_shape = (16 * _image_scale_multiplier, 16 * _image_scale_multiplier, channels) - # y_image_shape = (16 * scale_factor * _image_scale_multiplier, - # 16 * scale_factor * _image_scale_multiplier, channels) - image_shape = (32 * _image_scale_multiplier, - 32 * _image_scale_multiplier, channels) - y_image_shape = (32 * scale_factor * _image_scale_multiplier, - 32 * scale_factor * _image_scale_multiplier, channels) - else: - if K.image_dim_ordering() == "th": - image_shape = (channels, 32 * scale_factor * _image_scale_multiplier, - 32 * scale_factor * _image_scale_multiplier) - y_image_shape = image_shape - else: - image_shape = (32 * scale_factor * _image_scale_multiplier, 32 * scale_factor * _image_scale_multiplier, - channels) - y_image_shape = image_shape - else: - if small_train_images: - if K.image_dim_ordering() == "th": - y_image_shape = (3,) + target_shape - - target_shape = (target_shape[0] * _image_scale_multiplier // scale_factor, - target_shape[1] * _image_scale_multiplier // scale_factor) - image_shape = (3,) + target_shape - else: - y_image_shape = target_shape + (channels,) - - target_shape = (target_shape[0] * _image_scale_multiplier // scale_factor, - target_shape[1] * _image_scale_multiplier // scale_factor) - image_shape = target_shape + (channels,) - else: - if K.image_dim_ordering() == "th": - image_shape = (channels,) + target_shape - y_image_shape = image_shape - else: - image_shape = target_shape + (channels,) - y_image_shape = image_shape - - file_names = [f for f in sorted(os.listdir(directory + "X/"))] - X_filenames = [os.path.join(directory, "X", f) for f in file_names] - y_filenames = [os.path.join(directory, "y", f) for f in file_names] - - nb_images = len(file_names) - print("Found %d images." % nb_images) - - index_generator = _index_generator(nb_images, batch_size, shuffle, seed) - - while 1: - index_array, current_index, current_batch_size = next(index_generator) - - batch_x = np.zeros((current_batch_size,) + image_shape) - batch_y = np.zeros((current_batch_size,) + y_image_shape) - - for i, j in enumerate(index_array): - x_fn = X_filenames[j] - img = imread(x_fn, mode='RGB') - if small_train_images: - img = imresize(img, (32 * _image_scale_multiplier, - 32 * _image_scale_multiplier)) - img = img.astype('float32') / 255. - - if K.image_dim_ordering() == "th": - batch_x[i] = img.transpose((2, 0, 1)) - else: - batch_x[i] = img - - y_fn = y_filenames[j] - img = imread(y_fn, mode="RGB") - img = img.astype('float32') / 255. - - if K.image_dim_ordering() == "th": - batch_y[i] = img.transpose((2, 0, 1)) - else: - batch_y[i] = img - - if nb_inputs == 1: - yield (batch_x, batch_y) - else: - batch_x = [batch_x for i in range(nb_inputs)] - yield batch_x, batch_y - - -def _index_generator(N, batch_size=32, shuffle=True, seed=None): - batch_index = 0 - total_batches_seen = 0 - - while 1: - if seed is not None: - np.random.seed(seed + total_batches_seen) - - if batch_index == 0: - index_array = np.arange(N) - if shuffle: - index_array = np.random.permutation(N) - - current_index = (batch_index * batch_size) % N - - if N >= current_index + batch_size: - current_batch_size = batch_size - batch_index += 1 - else: - current_batch_size = N - current_index - batch_index = 0 - total_batches_seen += 1 - - yield (index_array[current_index: current_index + current_batch_size], - current_index, current_batch_size) - - -def smooth_gan_labels(y): - assert len(y.shape) == 2, "Needs to be a binary class" - y = np.asarray(y, dtype='int') - Y = np.zeros(y.shape, dtype='float32') - - for i in range(y.shape[0]): - for j in range(y.shape[1]): - if y[i, j] == 0: - Y[i, j] = np.random.uniform(0.0, 0.3) - else: - Y[i, j] = np.random.uniform(0.7, 1.2) - - return Y - - -if __name__ == "__main__": - # Transform the images once, then run the main code to scale images - - # Change scaling factor to increase the scaling factor - scaling_factor = 2 - - # Set true_upscale to True to generate smaller training images that will then be true upscaled. - # Leave as false to create same size input and output images - true_upscale = True - - # transform_images_temp(input_path, output_path, scaling_factor=scaling_factor, max_nb_images=-1, - # true_upscale=true_upscale) - transform_images_temp(validation_set5_path, validation_output_path, scaling_factor=scaling_factor, max_nb_images=-1, - true_upscale=true_upscale) - # transform_images_temp(validation_set14_path, validation_output_path, scaling_factor=scaling_factor, max_nb_images=-1, - # true_upscale=true_upscale) - pass -import models -import argparse -import tensorflow as tf - -parser = argparse.ArgumentParser( - description="Up-Scales an image using Image Super Resolution Model") -parser.add_argument("imgpath", type=str, nargs="+", help="Path to input image") -parser.add_argument("--model", type=str, default="distilled_rnsr", help="Use either image super resolution (sr), " - "expanded super resolution (esr), denoising auto encoder sr (dsr), " - "deep denoising sr (ddsr) or res net sr (rnsr)") -parser.add_argument("--scale", default=2, help='Scaling factor. Default = 2x') -parser.add_argument("--mode", default="patch", type=str, - help='Mode of operation. Choices are "fast" or "patch"') -parser.add_argument("--save_intermediate", dest='save', default='True', type=str, - help="Whether to save bilinear upscaled image") -parser.add_argument("--suffix", default="scaled", - type=str, help='Suffix of saved image') -parser.add_argument("--patch_size", type=int, default=8, help='Patch Size') - - -def strToBool(v): - return v.lower() in ("true", "yes", "t", "1") - - -args = parser.parse_args() - - -suffix = args.suffix - -model_type = str(args.model).lower() -if not model_type in ["sr", "esr", "dsr", "ddsr", "rnsr", "distilled_rnsr"]: - raise ValueError('Model type must be either "sr", "esr", "dsr", ' - '"ddsr", "rnsr" or "distilled_rnsr"') - -mode = str(args.mode).lower() -assert mode in [ - 'fast', 'patch'], 'Mode of operation must be either "fast" or "patch"' - -scale_factor = int(args.scale) -save = strToBool(args.save) - -patch_size = int(args.patch_size) -assert patch_size > 0, "Patch size must be a positive integer" - -with tf.device('/CPU:0'): - path = args.imgpath - for p in path: - if model_type == "sr": - model = models.ImageSuperResolutionModel(scale_factor) - elif model_type == "esr": - model = models.ExpantionSuperResolution(scale_factor) - elif model_type == "dsr": - model = models.DenoisingAutoEncoderSR(scale_factor) - elif model_type == "ddsr": - model = models.DeepDenoiseSR(scale_factor) - elif model_type == "rnsr": - model = models.ResNetSR(scale_factor) - elif model_type == "distilled_rnsr": - model = models.DistilledResNetSR(scale_factor) - else: - model = models.DistilledResNetSR(scale_factor) - - model.upscale(p, save_intermediate=save, mode=mode, - patch_size=patch_size, suffix=suffix) -from __future__ import print_function, division - -from keras.models import Model -from keras.layers import Concatenate, Add, Average, Input, Dense, Flatten, BatchNormalization, Activation, LeakyReLU -from keras.layers.convolutional import Convolution2D, MaxPooling2D, UpSampling2D, Convolution2DTranspose -from keras import backend as K -from keras.utils.np_utils import to_categorical -import keras.callbacks as callbacks -import keras.optimizers as optimizers - -from advanced import HistoryCheckpoint, SubPixelUpscaling, non_local_block, TensorBoardBatch -import img_utils - -import numpy as np -import os -import time -import warnings - -try: - import cv2 - _cv2_available = True -except: - warnings.warn( - 'Could not load opencv properly. This may affect the quality of output images.') - _cv2_available = False - -train_path = img_utils.output_path -validation_path = img_utils.validation_output_path -path_X = img_utils.output_path + "X/" -path_Y = img_utils.output_path + "y/" - - -def PSNRLoss(y_true, y_pred): - """ - PSNR is Peek Signal to Noise Ratio, which is similar to mean squared error. - - It can be calculated as - PSNR = 20 * log10(MAXp) - 10 * log10(MSE) - - When providing an unscaled input, MAXp = 255. Therefore 20 * log10(255)== 48.1308036087. - However, since we are scaling our input, MAXp = 1. Therefore 20 * log10(1) = 0. - Thus we remove that component completely and only compute the remaining MSE component. - """ - return -10. * K.log(K.mean(K.square(y_pred - y_true))) / K.log(10.) - - -def psnr(y_true, y_pred): - assert y_true.shape == y_pred.shape, "Cannot calculate PSNR. Input shapes not same." \ - " y_true shape = %s, y_pred shape = %s" % (str(y_true.shape), - str(y_pred.shape)) - - return -10. * np.log10(np.mean(np.square(y_pred - y_true))) - - -class BaseSuperResolutionModel(object): - - def __init__(self, model_name, scale_factor): - """ - Base model to provide a standard interface of adding Super Resolution models - """ - self.model = None # type: Model - self.model_name = model_name - self.scale_factor = scale_factor - self.weight_path = None - - self.type_scale_type = "norm" # Default = "norm" = 1. / 255 - self.type_requires_divisible_shape = False - self.type_true_upscaling = False - - self.evaluation_func = None - self.uses_learning_phase = False - - def create_model(self, height=32, width=32, channels=3, load_weights=False, batch_size=128) -> Model: - """ - Subclass dependent implementation. - """ - if self.type_requires_divisible_shape and height is not None and width is not None: - assert height * img_utils._image_scale_multiplier % 4 == 0, "Height of the image must be divisible by 4" - assert width * img_utils._image_scale_multiplier % 4 == 0, "Width of the image must be divisible by 4" - - if K.image_dim_ordering() == "th": - if width is not None and height is not None: - shape = (channels, width * img_utils._image_scale_multiplier, - height * img_utils._image_scale_multiplier) - else: - shape = (channels, None, None) - else: - if width is not None and height is not None: - shape = (width * img_utils._image_scale_multiplier, - height * img_utils._image_scale_multiplier, channels) - else: - shape = (None, None, channels) - - init = Input(shape=shape) - - return init - - def fit(self, batch_size=128, nb_epochs=100, save_history=True, history_fn="Model History.txt") -> Model: - """ - Standard method to train any of the models. - """ - - samples_per_epoch = img_utils.image_count() - val_count = img_utils.val_image_count() - if self.model == None: - self.create_model(batch_size=batch_size) - - callback_list = [callbacks.ModelCheckpoint(self.weight_path, monitor='val_PSNRLoss', save_best_only=True, - mode='max', save_weights_only=True, verbose=2)] - if save_history: - callback_list.append(HistoryCheckpoint(history_fn)) - - if K.backend() == 'tensorflow': - log_dir = './%s_logs/' % self.model_name - tensorboard = TensorBoardBatch(log_dir, batch_size=batch_size) - callback_list.append(tensorboard) - - print("Training model : %s" % (self.__class__.__name__)) - self.model.fit_generator(img_utils.image_generator(train_path, scale_factor=self.scale_factor, - small_train_images=self.type_true_upscaling, - batch_size=batch_size), - steps_per_epoch=samples_per_epoch // batch_size + 1, - epochs=nb_epochs, callbacks=callback_list, - validation_data=img_utils.image_generator(validation_path, - scale_factor=self.scale_factor, - small_train_images=self.type_true_upscaling, - batch_size=batch_size), - validation_steps=val_count // batch_size + 1) - - return self.model - - def evaluate(self, validation_dir): - if self.type_requires_divisible_shape and not self.type_true_upscaling: - _evaluate_denoise(self, validation_dir) - else: - _evaluate(self, validation_dir) - - def upscale(self, img_path, save_intermediate=False, return_image=False, suffix="scaled", - patch_size=8, mode="patch", verbose=True): - """ - Standard method to upscale an image. - - :param img_path: path to the image - :param save_intermediate: saves the intermediate upscaled image (bilinear upscale) - :param return_image: returns a image of shape (height, width, channels). - :param suffix: suffix of upscaled image - :param patch_size: size of each patch grid - :param verbose: whether to print messages - :param mode: mode of upscaling. Can be "patch" or "fast" - """ - import os - from scipy.misc import imread, imresize, imsave - - # Destination path - path = os.path.splitext(img_path) - filename = path[0] + "_" + suffix + \ - "(%dx)" % (self.scale_factor) + path[1] - - # Read image - scale_factor = int(self.scale_factor) - true_img = imread(img_path, mode='RGB') - init_dim_1, init_dim_2 = true_img.shape[0], true_img.shape[1] - if verbose: - print("Old Size : ", true_img.shape) - if verbose: - print("New Size : (%d, %d, 3)" % - (init_dim_1 * scale_factor, init_dim_2 * scale_factor)) - - img_dim_1, img_dim_2 = 0, 0 - - if mode == "patch" and self.type_true_upscaling: - # Overriding mode for True Upscaling models - mode = 'fast' - print( - "Patch mode does not work with True Upscaling models yet. Defaulting to mode='fast'") - - if mode == 'patch': - # Create patches - if self.type_requires_divisible_shape: - if patch_size % 4 != 0: - print( - "Deep Denoise requires patch size which is multiple of 4.\nSetting patch_size = 8.") - patch_size = 8 - - images = img_utils.make_patches( - true_img, scale_factor, patch_size, verbose) - - nb_images = images.shape[0] - img_dim_1, img_dim_2 = images.shape[1], images.shape[2] - print("Number of patches = %d, Patch Shape = (%d, %d)" % - (nb_images, img_dim_2, img_dim_1)) - else: - # Use full image for super resolution - img_dim_1, img_dim_2 = self.__match_autoencoder_size(img_dim_1, img_dim_2, init_dim_1, init_dim_2, - scale_factor) - - images = imresize(true_img, (img_dim_1, img_dim_2)) - images = np.expand_dims(images, axis=0) - print("Image is reshaped to : (%d, %d, %d)" % - (images.shape[1], images.shape[2], images.shape[3])) - - # Save intermediate bilinear scaled image is needed for comparison. - intermediate_img = None - if save_intermediate: - if verbose: - print("Saving intermediate image.") - fn = path[0] + "_intermediate_" + path[1] - intermediate_img = imresize( - true_img, (init_dim_1 * scale_factor, init_dim_2 * scale_factor)) - imsave(fn, intermediate_img) - - # Transpose and Process images - if K.image_dim_ordering() == "th": - img_conv = images.transpose((0, 3, 1, 2)).astype(np.float32) / 255. - else: - img_conv = images.astype(np.float32) / 255. - - model = self.create_model(img_dim_2, img_dim_1, load_weights=True) - if verbose: - print("Model loaded.") - - # Create prediction for image patches - result = model.predict(img_conv, batch_size=128, verbose=verbose) - - if verbose: - print("De-processing images.") - - # Deprocess patches - if K.image_dim_ordering() == "th": - result = result.transpose((0, 2, 3, 1)).astype(np.float32) * 255. - else: - result = result.astype(np.float32) * 255. - - # Output shape is (original_width * scale, original_height * scale, nb_channels) - if mode == 'patch': - out_shape = (init_dim_1 * scale_factor, - init_dim_2 * scale_factor, 3) - result = img_utils.combine_patches(result, out_shape, scale_factor) - else: - # Access the 3 Dimensional image vector - result = result[0, :, :, :] - - result = np.clip(result, 0, 255).astype('uint8') - - if _cv2_available: - # used to remove noisy edges - result = cv2.pyrUp(result) - result = cv2.medianBlur(result, 3) - result = cv2.pyrDown(result) - - if verbose: - print("\nCompleted De-processing image.") - - if return_image: - # Return the image without saving. Useful for testing images. - return result - - if verbose: - print("Saving image.") - imsave(filename, result) - - def __match_autoencoder_size(self, img_dim_1, img_dim_2, init_dim_1, init_dim_2, scale_factor): - if self.type_requires_divisible_shape: - if not self.type_true_upscaling: - # AE model but not true upsampling - if ((init_dim_2 * scale_factor) % 4 != 0) or ((init_dim_1 * scale_factor) % 4 != 0) or \ - (init_dim_2 % 2 != 0) or (init_dim_1 % 2 != 0): - - print("AE models requires image size which is multiple of 4.") - img_dim_2 = ((init_dim_2 * scale_factor) // 4) * 4 - img_dim_1 = ((init_dim_1 * scale_factor) // 4) * 4 - - else: - # No change required - img_dim_2, img_dim_1 = init_dim_2 * scale_factor, init_dim_1 * scale_factor - else: - # AE model and true upsampling - if ((init_dim_2) % 4 != 0) or ((init_dim_1) % 4 != 0) or \ - (init_dim_2 % 2 != 0) or (init_dim_1 % 2 != 0): - - print("AE models requires image size which is multiple of 4.") - img_dim_2 = ((init_dim_2) // 4) * 4 - img_dim_1 = ((init_dim_1) // 4) * 4 - - else: - # No change required - img_dim_2, img_dim_1 = init_dim_2, init_dim_1 - else: - # Not AE but true upsampling - if self.type_true_upscaling: - img_dim_2, img_dim_1 = init_dim_2, init_dim_1 - else: - # Not AE and not true upsampling - img_dim_2, img_dim_1 = init_dim_2 * scale_factor, init_dim_1 * scale_factor - - return img_dim_1, img_dim_2, - - -def _evaluate(sr_model: BaseSuperResolutionModel, validation_dir, scale_pred=False): - """ - Evaluates the model on the Validation images - """ - print("Validating %s model" % sr_model.model_name) - if sr_model.model == None: - sr_model.create_model(load_weights=True) - if sr_model.evaluation_func is None: - if sr_model.uses_learning_phase: - sr_model.evaluation_func = K.function([sr_model.model.layers[0].input, K.learning_phase()], - [sr_model.model.layers[-1].output]) - else: - sr_model.evaluation_func = K.function([sr_model.model.layers[0].input], - [sr_model.model.layers[-1].output]) - predict_path = "val_predict/" - if not os.path.exists(predict_path): - os.makedirs(predict_path) - validation_path_set5 = validation_dir + "set5/" - validation_path_set14 = validation_dir + "set14/" - validation_dirs = [validation_path_set5, validation_path_set14] - for val_dir in validation_dirs: - image_fns = [name for name in os.listdir(val_dir)] - nb_images = len(image_fns) - print("Validating %d images from path %s" % (nb_images, val_dir)) - - total_psnr = 0.0 - - for impath in os.listdir(val_dir): - t1 = time.time() - - # Input image - y = img_utils.imread(val_dir + impath, mode='RGB') - width, height, _ = y.shape - - if sr_model.type_requires_divisible_shape: - # Denoise models require precise width and height, divisible by 4 - - if ((width // sr_model.scale_factor) % 4 != 0) or ((height // sr_model.scale_factor) % 4 != 0) \ - or (width % 2 != 0) or (height % 2 != 0): - width = ((width // sr_model.scale_factor) // 4) * \ - 4 * sr_model.scale_factor - height = ((height // sr_model.scale_factor) // - 4) * 4 * sr_model.scale_factor - - print("Model %s require the image size to be divisible by 4. New image size = (%d, %d)" % - (sr_model.model_name, width, height)) - - y = img_utils.imresize( - y, (width, height), interp='bicubic') - - y = y.astype('float32') - x_width = width if not sr_model.type_true_upscaling else width // sr_model.scale_factor - x_height = height if not sr_model.type_true_upscaling else height // sr_model.scale_factor - - x_temp = y.copy() - - if sr_model.type_scale_type == "tanh": - x_temp = (x_temp - 127.5) / 127.5 - y = (y - 127.5) / 127.5 - else: - x_temp /= 255. - y /= 255. - - y = np.expand_dims(y, axis=0) - - img = img_utils.imresize(x_temp, (x_width, x_height), - interp='bicubic') - - if not sr_model.type_true_upscaling: - img = img_utils.imresize( - img, (x_width, x_height), interp='bicubic') - - x = np.expand_dims(img, axis=0) - - if K.image_dim_ordering() == "th": - x = x.transpose((0, 3, 1, 2)) - y = y.transpose((0, 3, 1, 2)) - - if sr_model.uses_learning_phase: - y_pred = sr_model.evaluation_func([x, 0])[0][0] - else: - y_pred = sr_model.evaluation_func([x])[0][0] - - if scale_pred: - if sr_model.type_scale_type == "tanh": - y_pred = (y_pred + 1) * 127.5 - else: - y_pred *= 255. - - if sr_model.type_scale_type == 'tanh': - y = (y + 1) / 2 - - psnr_val = psnr(y[0], np.clip(y_pred, 0, 255) / 255) - total_psnr += psnr_val - - t2 = time.time() - print("Validated image : %s, Time required : %0.2f, PSNR value : %0.4f" % ( - impath, t2 - t1, psnr_val)) - - generated_path = predict_path + \ - "%s_%s_generated.png" % ( - sr_model.model_name, os.path.splitext(impath)[0]) - - if K.image_dim_ordering() == "th": - y_pred = y_pred.transpose((1, 2, 0)) - - y_pred = np.clip(y_pred, 0, 255).astype('uint8') - img_utils.imsave(generated_path, y_pred) - - print("Average PRNS value of validation images = %00.4f \n" % - (total_psnr / nb_images)) - - -def _evaluate_denoise(sr_model: BaseSuperResolutionModel, validation_dir, scale_pred=False): - print("Validating %s model" % sr_model.model_name) - predict_path = "val_predict/" - if not os.path.exists(predict_path): - os.makedirs(predict_path) - - validation_path_set5 = validation_dir + "set5/" - validation_path_set14 = validation_dir + "set14/" - - validation_dirs = [validation_path_set5, validation_path_set14] - for val_dir in validation_dirs: - image_fns = [name for name in os.listdir(val_dir)] - nb_images = len(image_fns) - print("Validating %d images from path %s" % (nb_images, val_dir)) - - total_psnr = 0.0 - - for impath in os.listdir(val_dir): - t1 = time.time() - - # Input image - y = img_utils.imread(val_dir + impath, mode='RGB') - width, height, _ = y.shape - - if ((width // sr_model.scale_factor) % 4 != 0) or ((height // sr_model.scale_factor) % 4 != 0) \ - or (width % 2 != 0) or (height % 2 != 0): - width = ((width // sr_model.scale_factor) // 4) * \ - 4 * sr_model.scale_factor - height = ((height // sr_model.scale_factor) // 4) * \ - 4 * sr_model.scale_factor - - print("Model %s require the image size to be divisible by 4. New image size = (%d, %d)" % - (sr_model.model_name, width, height)) - - y = img_utils.imresize(y, (width, height), interp='bicubic') - - y = y.astype('float32') - y = np.expand_dims(y, axis=0) - - x_temp = y.copy() - - if sr_model.type_scale_type == "tanh": - x_temp = (x_temp - 127.5) / 127.5 - y = (y - 127.5) / 127.5 - else: - x_temp /= 255. - y /= 255. - - img = img_utils.imresize(x_temp[0], (width // sr_model.scale_factor, height // sr_model.scale_factor), - interp='bicubic', mode='RGB') - - if not sr_model.type_true_upscaling: - img = img_utils.imresize( - img, (width, height), interp='bicubic') - - x = np.expand_dims(img, axis=0) - - if K.image_dim_ordering() == "th": - x = x.transpose((0, 3, 1, 2)) - y = y.transpose((0, 3, 1, 2)) - - sr_model.model = sr_model.create_model( - height, width, load_weights=True) - - if sr_model.evaluation_func is None: - if sr_model.uses_learning_phase: - sr_model.evaluation_func = K.function([sr_model.model.layers[0].input, K.learning_phase()], - [sr_model.model.layers[-1].output]) - else: - sr_model.evaluation_func = K.function([sr_model.model.layers[0].input], - [sr_model.model.layers[-1].output]) - - if sr_model.uses_learning_phase: - y_pred = sr_model.evaluation_func([x, 0])[0][0] - else: - y_pred = sr_model.evaluation_func([x])[0][0] - - if scale_pred: - if sr_model.type_scale_type == "tanh": - y_pred = (y_pred + 1) * 127.5 - else: - y_pred *= 255. - - if sr_model.type_scale_type == 'tanh': - y = (y + 1) / 2 - - psnr_val = psnr(y[0], np.clip(y_pred, 0, 255) / 255) - total_psnr += psnr_val - - t2 = time.time() - print("Validated image : %s, Time required : %0.2f, PSNR value : %0.4f" % ( - impath, t2 - t1, psnr_val)) - - generated_path = predict_path + \ - "%s_%s_generated.png" % ( - sr_model.model_name, os.path.splitext(impath)[0]) - - if K.image_dim_ordering() == "th": - y_pred = y_pred.transpose((1, 2, 0)) - - y_pred = np.clip(y_pred, 0, 255).astype('uint8') - img_utils.imsave(generated_path, y_pred) - - print("Average PRNS value of validation images = %00.4f \n" % - (total_psnr / nb_images)) - - -class ImageSuperResolutionModel(BaseSuperResolutionModel): - - def __init__(self, scale_factor): - super(ImageSuperResolutionModel, self).__init__( - "Image SR", scale_factor) - - self.f1 = 9 - self.f2 = 1 - self.f3 = 5 - - self.n1 = 64 - self.n2 = 32 - - self.weight_path = "weights/SR Weights %dX.h5" % (self.scale_factor) - - def create_model(self, height=32, width=32, channels=3, load_weights=False, batch_size=128): - """ - Creates a model to be used to scale images of specific height and width. - """ - init = super(ImageSuperResolutionModel, self).create_model( - height, width, channels, load_weights, batch_size) - - x = Convolution2D(self.n1, (self.f1, self.f1), - activation='relu', padding='same', name='level1')(init) - x = Convolution2D(self.n2, (self.f2, self.f2), - activation='relu', padding='same', name='level2')(x) - - out = Convolution2D(channels, (self.f3, self.f3), - padding='same', name='output')(x) - - model = Model(init, out) - - adam = optimizers.Adam(lr=1e-3) - model.compile(optimizer=adam, loss='mse', metrics=[PSNRLoss]) - if load_weights: - model.load_weights(self.weight_path) - - self.model = model - return model - - def fit(self, batch_size=128, nb_epochs=100, save_history=True, history_fn="SRCNN History.txt"): - return super(ImageSuperResolutionModel, self).fit(batch_size, nb_epochs, save_history, history_fn) - - -class ExpantionSuperResolution(BaseSuperResolutionModel): - - def __init__(self, scale_factor): - super(ExpantionSuperResolution, self).__init__( - "Expanded Image SR", scale_factor) - - self.f1 = 9 - self.f2_1 = 1 - self.f2_2 = 3 - self.f2_3 = 5 - self.f3 = 5 - - self.n1 = 64 - self.n2 = 32 - - self.weight_path = "weights/Expantion SR Weights %dX.h5" % ( - self.scale_factor) - - def create_model(self, height=32, width=32, channels=3, load_weights=False, batch_size=128): - """ - Creates a model to be used to scale images of specific height and width. - """ - init = super(ExpantionSuperResolution, self).create_model( - height, width, channels, load_weights, batch_size) - - x = Convolution2D(self.n1, (self.f1, self.f1), - activation='relu', padding='same', name='level1')(init) - - x1 = Convolution2D(self.n2, (self.f2_1, self.f2_1), - activation='relu', padding='same', name='lavel1_1')(x) - x2 = Convolution2D(self.n2, (self.f2_2, self.f2_2), - activation='relu', padding='same', name='lavel1_2')(x) - x3 = Convolution2D(self.n2, (self.f2_3, self.f2_3), - activation='relu', padding='same', name='lavel1_3')(x) - - x = Average()([x1, x2, x3]) - - out = Convolution2D(channels, (self.f3, self.f3), - activation='relu', padding='same', name='output')(x) - - model = Model(init, out) - adam = optimizers.Adam(lr=1e-3) - model.compile(optimizer=adam, loss='mse', metrics=[PSNRLoss]) - if load_weights: - model.load_weights(self.weight_path) - - self.model = model - return model - - def fit(self, batch_size=128, nb_epochs=100, save_history=True, history_fn="ESRCNN History.txt"): - return super(ExpantionSuperResolution, self).fit(batch_size, nb_epochs, save_history, history_fn) - - -class DenoisingAutoEncoderSR(BaseSuperResolutionModel): - - def __init__(self, scale_factor): - super(DenoisingAutoEncoderSR, self).__init__( - "Denoise AutoEncoder SR", scale_factor) - - self.n1 = 64 - self.n2 = 32 - - self.weight_path = "weights/Denoising AutoEncoder %dX.h5" % ( - self.scale_factor) - - def create_model(self, height=32, width=32, channels=3, load_weights=False, batch_size=128): - """ - Creates a model to remove / reduce noise from upscaled images. - """ - from keras.layers.convolutional import Deconvolution2D - - # Perform check that model input shape is divisible by 4 - init = super(DenoisingAutoEncoderSR, self).create_model( - height, width, channels, load_weights, batch_size) - - if K.image_dim_ordering() == "th": - output_shape = (None, channels, width, height) - else: - output_shape = (None, width, height, channels) - - level1_1 = Convolution2D( - self.n1, (3, 3), activation='relu', padding='same')(init) - level2_1 = Convolution2D( - self.n1, (3, 3), activation='relu', padding='same')(level1_1) - - level2_2 = Convolution2DTranspose( - self.n1, (3, 3), activation='relu', padding='same')(level2_1) - level2 = Add()([level2_1, level2_2]) - - level1_2 = Convolution2DTranspose( - self.n1, (3, 3), activation='relu', padding='same')(level2) - level1 = Add()([level1_1, level1_2]) - - decoded = Convolution2D( - channels, (5, 5), activation='linear', padding='same')(level1) - - model = Model(init, decoded) - adam = optimizers.Adam(lr=1e-3) - model.compile(optimizer=adam, loss='mse', metrics=[PSNRLoss]) - if load_weights: - model.load_weights(self.weight_path) - - self.model = model - return model - - def fit(self, batch_size=128, nb_epochs=100, save_history=True, history_fn="DSRCNN History.txt"): - return super(DenoisingAutoEncoderSR, self).fit(batch_size, nb_epochs, save_history, history_fn) - - -class DeepDenoiseSR(BaseSuperResolutionModel): - - def __init__(self, scale_factor): - super(DeepDenoiseSR, self).__init__("Deep Denoise SR", scale_factor) - - # Treat this model as a denoising auto encoder - # Force the fit, evaluate and upscale methods to take special care about image shape - self.type_requires_divisible_shape = True - - self.n1 = 64 - self.n2 = 128 - self.n3 = 256 - - self.weight_path = "weights/Deep Denoise Weights %dX.h5" % ( - self.scale_factor) - - def create_model(self, height=32, width=32, channels=3, load_weights=False, batch_size=128): - # Perform check that model input shape is divisible by 4 - init = super(DeepDenoiseSR, self).create_model( - height, width, channels, load_weights, batch_size) - - c1 = Convolution2D( - self.n1, (3, 3), activation='relu', padding='same')(init) - c1 = Convolution2D( - self.n1, (3, 3), activation='relu', padding='same')(c1) - - x = MaxPooling2D((2, 2))(c1) - - c2 = Convolution2D( - self.n2, (3, 3), activation='relu', padding='same')(x) - c2 = Convolution2D( - self.n2, (3, 3), activation='relu', padding='same')(c2) - - x = MaxPooling2D((2, 2))(c2) - - c3 = Convolution2D( - self.n3, (3, 3), activation='relu', padding='same')(x) - - x = UpSampling2D()(c3) - - c2_2 = Convolution2D( - self.n2, (3, 3), activation='relu', padding='same')(x) - c2_2 = Convolution2D( - self.n2, (3, 3), activation='relu', padding='same')(c2_2) - - m1 = Add()([c2, c2_2]) - m1 = UpSampling2D()(m1) - - c1_2 = Convolution2D( - self.n1, (3, 3), activation='relu', padding='same')(m1) - c1_2 = Convolution2D( - self.n1, (3, 3), activation='relu', padding='same')(c1_2) - - m2 = Add()([c1, c1_2]) - - decoded = Convolution2D( - channels, 5, 5, activation='linear', border_mode='same')(m2) - - model = Model(init, decoded) - adam = optimizers.Adam(lr=1e-3) - model.compile(optimizer=adam, loss='mse', metrics=[PSNRLoss]) - if load_weights: - model.load_weights(self.weight_path) - - self.model = model - return model - - def fit(self, batch_size=128, nb_epochs=100, save_history=True, history_fn="Deep DSRCNN History.txt"): - super(DeepDenoiseSR, self).fit(batch_size, - nb_epochs, save_history, history_fn) - - -class ResNetSR(BaseSuperResolutionModel): - - def __init__(self, scale_factor): - super(ResNetSR, self).__init__("ResNetSR", scale_factor) - - # Treat this model as a denoising auto encoder - # Force the fit, evaluate and upscale methods to take special care about image shape - self.type_requires_divisible_shape = True - self.uses_learning_phase = False - - self.n = 64 - self.mode = 2 - - self.weight_path = "weights/ResNetSR %dX.h5" % (self.scale_factor) - self.type_true_upscaling = True - - def create_model(self, height=32, width=32, channels=3, load_weights=False, batch_size=128): - init = super(ResNetSR, self).create_model( - height, width, channels, load_weights, batch_size) - - x0 = Convolution2D(64, (3, 3), activation='relu', - padding='same', name='sr_res_conv1')(init) - - #x1 = Convolution2D(64, (3, 3), activation='relu', padding='same', strides=(2, 2), name='sr_res_conv2')(x0) - - #x2 = Convolution2D(64, (3, 3), activation='relu', padding='same', strides=(2, 2), name='sr_res_conv3')(x1) - - x = self._residual_block(x0, 1) - - nb_residual = 5 - for i in range(nb_residual): - x = self._residual_block(x, i + 2) - - x = Add()([x, x0]) - - x = self._upscale_block(x, 1) - #x = Add()([x, x1]) - - #x = self._upscale_block(x, 2) - #x = Add()([x, x0]) - - x = Convolution2D(3, (3, 3), activation="linear", - padding='same', name='sr_res_conv_final')(x) - - model = Model(init, x) - - adam = optimizers.Adam(lr=1e-3) - model.compile(optimizer=adam, loss='mse', metrics=[PSNRLoss]) - if load_weights: - model.load_weights(self.weight_path, by_name=True) - - self.model = model - return model - - def _residual_block(self, ip, id): - mode = False if self.mode == 2 else None - channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 - init = ip - - x = Convolution2D(64, (3, 3), activation='linear', padding='same', - name='sr_res_conv_' + str(id) + '_1')(ip) - x = BatchNormalization( - axis=channel_axis, name="sr_res_batchnorm_" + str(id) + "_1")(x, training=mode) - x = Activation('relu', name="sr_res_activation_" + str(id) + "_1")(x) - - x = Convolution2D(64, (3, 3), activation='linear', padding='same', - name='sr_res_conv_' + str(id) + '_2')(x) - x = BatchNormalization( - axis=channel_axis, name="sr_res_batchnorm_" + str(id) + "_2")(x, training=mode) - - m = Add(name="sr_res_merge_" + str(id))([x, init]) - - return m - - def _upscale_block(self, ip, id): - init = ip - - channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 - channels = init._keras_shape[channel_dim] - - #x = Convolution2D(256, (3, 3), activation="relu", padding='same', name='sr_res_upconv1_%d' % id)(init) - #x = SubPixelUpscaling(r=2, channels=self.n, name='sr_res_upscale1_%d' % id)(x) - x = UpSampling2D()(init) - x = Convolution2D(self.n, (3, 3), activation="relu", - padding='same', name='sr_res_filter1_%d' % id)(x) - - # x = Convolution2DTranspose(channels, (4, 4), strides=(2, 2), padding='same', activation='relu', - # name='upsampling_deconv_%d' % id)(init) - - return x - - def fit(self, batch_size=128, nb_epochs=100, save_history=True, history_fn="ResNetSR History.txt"): - super(ResNetSR, self).fit(batch_size, - nb_epochs, save_history, history_fn) - - -class EfficientSubPixelConvolutionalSR(BaseSuperResolutionModel): - - def __init__(self, scale_factor): - super(EfficientSubPixelConvolutionalSR, self).__init__( - "ESPCNN SR", scale_factor) - - self.n1 = 64 - self.n2 = 32 - - self.f1 = 5 - self.f2 = 3 - self.f3 = 3 - - self.weight_path = "weights/ESPCNN Weights %d.h5" % scale_factor - - # Flag to denote that this is a "true" upsampling model. - # Image size will be multiplied by scale factor to get output image size - self.true_upsampling = True - - def create_model(self, height=16, width=16, channels=3, load_weights=False, batch_size=128): - # Note height, width = 16 instead of 32 like usual - init = super(EfficientSubPixelConvolutionalSR, self).create_model(height, width, channels, - load_weights, batch_size) - - x = Convolution2D(self.n1, (self.f1, self.f1), - activation='relu', padding='same', name='level1')(init) - x = Convolution2D(self.n2, (self.f2, self.f2), - activation='relu', padding='same', name='level2')(x) - - x = self._upscale_block(x, 1) - - out = Convolution2D(3, (9, 9), activation='linear', padding='same')(x) - - model = Model(init, out) - - adam = optimizers.Adam(lr=1e-3) - model.compile(optimizer=adam, loss='mse', metrics=[PSNRLoss]) - if load_weights: - model.load_weights(self.weight_path) - - self.model = model - return model - - def _upscale_block(self, ip, id): - init = ip - - # x = Convolution2D(256, (3, 3), activation="relu", padding='same', name='espcnn_upconv1_%d' % id)(init) - # x = SubPixelUpscaling(r=2, channels=self.n1, name='espcnn_upconv1__upscale1_%d' % id)(x) - # x = Convolution2D(256, (3, 3), activation="relu", padding='same', name='espcnn_upconv1_filter1_%d' % id)(x) - - x = Convolution2DTranspose(128, (3, 3), strides=( - 2, 2), padding='same', activation='relu')(init) - - return x - - def fit(self, batch_size=128, nb_epochs=100, save_history=True, history_fn="ESPCNN History.txt"): - super(EfficientSubPixelConvolutionalSR, self).fit( - batch_size, nb_epochs, save_history, history_fn) - - -class GANImageSuperResolutionModel(BaseSuperResolutionModel): - - def __init__(self, scale_factor): - super(GANImageSuperResolutionModel, self).__init__( - "GAN Image SR", scale_factor) - - self.f1 = 9 - self.f2 = 1 - self.f3 = 5 - - self.n1 = 64 - self.n2 = 32 - - self.gen_model = None # type: Model - self.disc_model = None # type: Model - - self.type_scale_type = 'tanh' - - self.weight_path = "weights/GAN SR Weights %dX.h5" % ( - self.scale_factor) - self.gen_weight_path = "weights/GAN SR Pretrain Weights %dX.h5" % ( - self.scale_factor) - self.disc_weight_path = "weights/GAN SR Discriminator Weights %dX.h5" % ( - self.scale_factor) - - def create_model(self, mode='test', height=32, width=32, channels=3, load_weights=False, batch_size=128): - """ - Creates a model to be used to scale images of specific height and width. - """ - assert mode in [ - 'test', 'train'], "'mode' must be either 'train' or 'test'" - - channel_axis = 1 if K.image_dim_ordering() == 'th' else -1 - - gen_init = super(GANImageSuperResolutionModel, self).create_model( - height, width, channels, load_weights, batch_size) - - x = Convolution2D(self.n1, (self.f1, self.f1), activation='relu', - padding='same', name='gen_level1')(gen_init) - x = LeakyReLU(alpha=0.25)(x) - x = Convolution2D(self.n2, (self.f2, self.f2), - activation='relu', padding='same', name='gen_level2')(x) - x = LeakyReLU(alpha=0.25)(x) - - out = Convolution2D(channels, (self.f3, self.f3), - activation='tanh', padding='same', name='gen_output')(x) - - gen_model = Model(gen_init, out) - - adam = optimizers.Adam(lr=1e-4) - gen_model.compile(optimizer=adam, loss='mse', metrics=[PSNRLoss]) - if load_weights and mode == 'test': - gen_model.load_weights(self.weight_path, by_name=True) - - self.model = gen_model - - if mode == 'train': - try: - gen_model.load_weights(self.weight_path) - except: - print('Could not load weights of GAN SR model for training.') - - if mode == 'train': - disc_init = super(GANImageSuperResolutionModel, self).create_model( - height, width, channels, load_weights, batch_size) - - x = Convolution2D(64, (3, 3), padding='same', - name='disc_level1_1')(disc_init) - x = LeakyReLU(alpha=0.25, name='disc_lr_1_1')(x) - x = Convolution2D(64, (3, 3), padding='same', name='disc_level1_2', - strides=(2, 2))(x) - x = LeakyReLU(alpha=0.25, name='disc_lr_1_2')(x) - x = BatchNormalization( - axis=channel_axis, name='disc_bn_1')(x, training=False) - - x = Convolution2D(128, (3, 3), padding='same', - name='disc_level2_1')(x) - x = LeakyReLU(alpha=0.25, name='disc_lr_2_1')(x) - x = Convolution2D(128, (3, 3), padding='same', name='disc_level2_2', - strides=(2, 2))(x) - x = LeakyReLU(alpha=0.25, name='disc_lr_2_2')(x) - x = BatchNormalization( - axis=channel_axis, name='disc_bn_2')(x, training=False) - - x = Flatten(name='disc_flatten')(x) - x = Dense(128, name='disc_dense_1')(x) - x = LeakyReLU(alpha=0.25, name='disc_lr_final')(x) - out = Dense(2, activation='softmax', name='disc_output')(x) - - disc_model = Model(disc_init, out) - - adam = optimizers.Adam(lr=1e-3) - disc_model.compile( - optimizer=adam, loss='categorical_crossentropy', metrics=['acc']) - if load_weights: - disc_model.load_weights(self.disc_weight_path) - - for layer in disc_model.layers: - layer.trainable = False - - gen_out = gen_model(gen_init) - disc_out = disc_model(gen_out) - - full_model = Model(input=gen_init, output=disc_out) - - for layer in full_model.layers[2].layers: - layer.trainable = False - - full_model.compile(optimizers.Adam( - lr=1e-4), loss='categorical_crossentropy', metrics=['acc']) - - for layer in disc_model.layers: - layer.trainable = True - - self.model = full_model - self.gen_model = gen_model - self.disc_model = disc_model - - # Setup evaluation function for validation - self.evaluation_func = K.function([self.gen_model.layers[0].input], - [self.gen_model.layers[-1].output]) - - else: - self.model = gen_model - - return self.model - - def set_trainable(self, model, value, prefix='gen'): - for layer in model.layers: - if 'model' in layer.name: - model_index = -1 - - # check generator layers - for deep_layer in model.layers[1].layers: - if prefix in deep_layer.name: - deep_layer.trainable = value - model_index = 1 - - # check discriminator layers - for deep_layer in model.layers[2].layers: - if prefix in deep_layer.name: - deep_layer.trainable = value - model_index = 2 - - model.layers[model_index].trainable = value - break - - elif prefix in layer.name: # discriminator model - layer.trainable = value - - def fit(self, nb_pretrain_samples=5000, batch_size=128, nb_epochs=100, disc_train_flip=0.1, - save_history=True, history_fn="GAN SRCNN History.txt"): - samples_per_epoch = img_utils.image_count() - meanaxis = (0, 2, 3) if K.image_dim_ordering() == 'th' else (0, 1, 2) - - if self.model == None: - self.create_model(mode='train', batch_size=batch_size) - - if os.path.exists(self.gen_weight_path) and os.path.exists(self.disc_weight_path): - self.gen_model.load_weights(self.gen_weight_path) - self.disc_model.load_weights(self.disc_weight_path) - print("Pre-trained Generator and Discriminator network weights loaded") - else: - nb_train_samples = nb_pretrain_samples - - print('Pre-training on %d images' % (nb_train_samples)) - batchX, batchY = next(img_utils.image_generator(train_path, scale_factor=self.scale_factor, - small_train_images=self.type_true_upscaling, - batch_size=nb_train_samples)) - - # [-1, 1] scale conversion from [0, 1] - batchX = ((batchX * 255) - 127.5) / 127.5 - batchY = ((batchY * 255) - 127.5) / 127.5 - - print("Pre-training Generator network") - hist = self.gen_model.fit( - batchX, batchY, batch_size, nb_epoch=200, verbose=2) - print("Generator pretrain final PSNR : ", - hist.history['PSNRLoss'][-1]) - - print("Pre-training Discriminator network") - - genX = self.gen_model.predict(batchX, batch_size=batch_size) - - print('GenX Output mean (per channel) :', - np.mean(genX, axis=meanaxis)) - print('BatchX mean (per channel) :', - np.mean(batchX, axis=meanaxis)) - - X = np.concatenate((genX, batchX)) - - # Using soft and noisy labels - if np.random.uniform() > disc_train_flip: - # give correct classifications - y = [0] * nb_train_samples + [1] * nb_train_samples - else: - # give wrong classifications (noisy labels) - y = [1] * nb_train_samples + [0] * nb_train_samples - - y = np.asarray(y, dtype=np.float32).reshape(-1, 1) - y = to_categorical(y, nb_classes=2) - y = img_utils.smooth_gan_labels(y) - - hist = self.disc_model.fit(X, y, batch_size=batch_size, - nb_epoch=1, verbose=0) - - print('Discriminator History :', hist.history) - print() - - self.gen_model.save_weights(self.gen_weight_path, overwrite=True) - self.disc_model.save_weights(self.disc_weight_path, overwrite=True) - - iteration = 0 - save_index = 1 - - print("Training full model : %s" % (self.__class__.__name__)) - - for i in range(nb_epochs): - print("Epoch : %d" % (i + 1)) - print() - - for x, _ in img_utils.image_generator(train_path, scale_factor=self.scale_factor, - small_train_images=self.type_true_upscaling, batch_size=batch_size): - t1 = time.time() - - x = ((x * 255) - 127.5) / 127.5 - - X_pred = self.gen_model.predict(x, batch_size) - - print("Input batchX mean (per channel) :", - np.mean(x, axis=meanaxis)) - print("X_pred mean (per channel) :", - np.mean(X_pred, axis=meanaxis)) - - X = np.concatenate((X_pred, x)) - # Using soft and noisy labels - if np.random.uniform() > disc_train_flip: - # give correct classifications - y_disc = [0] * nb_train_samples + [1] * nb_train_samples - else: - # give wrong classifications (noisy labels) - y_disc = [1] * nb_train_samples + [0] * nb_train_samples - - y_disc = np.asarray(y_disc, dtype=np.float32).reshape(-1, 1) - y_disc = to_categorical(y_disc, nb_classes=2) - y_disc = img_utils.smooth_gan_labels(y_disc) - - hist = self.disc_model.fit( - X, y_disc, verbose=0, batch_size=batch_size, nb_epoch=1) - - discriminator_loss = hist.history['loss'][0] - discriminator_acc = hist.history['acc'][0] - - # Using soft labels - y_model = [1] * nb_train_samples - y_model = np.asarray(y_model, dtype=np.int).reshape(-1, 1) - y_model = to_categorical(y_model, nb_classes=2) - y_model = img_utils.smooth_gan_labels(y_model) - - hist = self.model.fit( - x, y_model, batch_size, nb_epoch=1, verbose=0) - generative_loss = hist.history['loss'][0] - - iteration += batch_size - save_index += 1 - - t2 = time.time() - - print("Iter : %d / %d | Time required : %0.2f seconds | Discriminator Loss / Acc : %0.6f / %0.3f | " - "Generative Loss : %0.6f" % (iteration, samples_per_epoch, t2 - t1, - discriminator_loss, discriminator_acc, generative_loss)) - - # Validate at end of epoch - if iteration >= samples_per_epoch: - print("Evaluating generator model...") - # losses = self.gen_model.evaluate_generator(generator=img_utils.image_generator(train_path, - # scale_factor=self.scale_factor, - # small_train_images=self.type_true_upscaling, - # batch_size=batch_size), - # val_samples=samples_per_epoch) - # - # print('Generator Loss (PSNR):', losses[-1]) - - self.evaluate('val_images/') - - # Save weights every 100 iterations - if save_index % 100 == 0: - print("Saving generator weights") - self.gen_model.save_weights( - self.weight_path, overwrite=True) - - if iteration >= samples_per_epoch: - break - - iteration = 0 - save_index = 1 - - return self.model - - def evaluate(self, validation_dir): - _evaluate(self, validation_dir, scale_pred=True) - - -class DistilledResNetSR(BaseSuperResolutionModel): - - def __init__(self, scale_factor): - super(DistilledResNetSR, self).__init__( - "DistilledResNetSR", scale_factor) - - # Treat this model as a denoising auto encoder - # Force the fit, evaluate and upscale methods to take special care about image shape - self.type_requires_divisible_shape = True - self.uses_learning_phase = False - - self.n = 32 - self.mode = 2 - - self.weight_path = "weights/DistilledResNetSR %dX.h5" % ( - self.scale_factor) - self.type_true_upscaling = True - - def create_model(self, height=32, width=32, channels=3, load_weights=False, batch_size=128): - init = super(DistilledResNetSR, self).create_model( - height, width, channels, load_weights, batch_size) - - x0 = Convolution2D(self.n, (3, 3), activation='relu', - padding='same', name='student_sr_res_conv1')(init) - - x = self._residual_block(x0, 1) - - x = Add(name='student_residual')([x, x0]) - x = self._upscale_block(x, 1) - - x = Convolution2D(3, (3, 3), activation="linear", - padding='same', name='student_sr_res_conv_final')(x) - - model = Model(init, x) - # dont compile yet - if load_weights: - model.load_weights(self.weight_path, by_name=True) - - self.model = model - return model - - def _residual_block(self, ip, id): - mode = False if self.mode == 2 else None - channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 - init = ip - - x = Convolution2D(self.n, (3, 3), activation='linear', padding='same', - name='student_sr_res_conv_' + str(id) + '_1')(ip) - x = BatchNormalization( - axis=channel_axis, name="student_sr_res_batchnorm_" + str(id) + "_1")(x, training=mode) - x = Activation( - 'relu', name="student_sr_res_activation_" + str(id) + "_1")(x) - - x = Convolution2D(self.n, (3, 3), activation='linear', padding='same', - name='student_sr_res_conv_' + str(id) + '_2')(x) - x = BatchNormalization( - axis=channel_axis, name="student_sr_res_batchnorm_" + str(id) + "_2")(x, training=mode) - - m = Add(name="student_sr_res_merge_" + str(id))([x, init]) - - return m - - def _upscale_block(self, ip, id): - init = ip - - channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 - channels = init._keras_shape[channel_dim] - - x = UpSampling2D(name='student_upsampling_%d' % id)(init) - x = Convolution2D(self.n * 2, (3, 3), activation="relu", - padding='same', name='student_sr_res_filter1_%d' % id)(x) - - return x - - def fit(self, batch_size=128, nb_epochs=100, save_history=True, history_fn="Distilled ResNetSR History.txt"): - super(DistilledResNetSR, self).fit( - batch_size, nb_epochs, save_history, history_fn) - - -class NonLocalResNetSR(BaseSuperResolutionModel): - - def __init__(self, scale_factor): - super(NonLocalResNetSR, self).__init__( - "NonLocalResNetSR", scale_factor) - - # Treat this model as a denoising auto encoder - # Force the fit, evaluate and upscale methods to take special care about image shape - self.type_requires_divisible_shape = True - self.uses_learning_phase = False - - self.n = 32 - self.mode = 2 - - self.weight_path = "weights/NonLocalResNetSR %dX.h5" % ( - self.scale_factor) - self.type_true_upscaling = True - - def create_model(self, height=32, width=32, channels=3, load_weights=False, batch_size=128): - init = super(NonLocalResNetSR, self).create_model( - height, width, channels, load_weights, batch_size) - - x0 = Convolution2D(self.n, (3, 3), activation='relu', - padding='same', name='sr_res_conv1')(init) - x0 = non_local_block(x0) - - x = self._residual_block(x0, 1) - - nb_residual = 5 - for i in range(nb_residual): - x = self._residual_block(x, i + 2) - - x = non_local_block(x, computation_compression=2) - x = Add()([x, x0]) - - x = self._upscale_block(x, 1) - - x = Convolution2D(3, (3, 3), activation="linear", - padding='same', name='sr_res_conv_final')(x) - - model = Model(init, x) - - adam = optimizers.Adam(lr=1e-3) - model.compile(optimizer=adam, loss='mse', metrics=[PSNRLoss]) - if load_weights: - model.load_weights(self.weight_path, by_name=True) - - self.model = model - return model - - def _residual_block(self, ip, id): - mode = False if self.mode == 2 else None - channel_axis = 1 if K.image_data_format() == 'channels_first' else -1 - init = ip - - x = Convolution2D(self.n, (3, 3), activation='linear', padding='same', - name='sr_res_conv_' + str(id) + '_1')(ip) - x = BatchNormalization( - axis=channel_axis, name="sr_res_batchnorm_" + str(id) + "_1")(x, training=mode) - x = Activation('relu', name="sr_res_activation_" + str(id) + "_1")(x) - - x = Convolution2D(self.n, (3, 3), activation='linear', padding='same', - name='sr_res_conv_' + str(id) + '_2')(x) - x = BatchNormalization( - axis=channel_axis, name="sr_res_batchnorm_" + str(id) + "_2")(x, training=mode) - - m = Add(name="sr_res_merge_" + str(id))([x, init]) - - return m - - def _upscale_block(self, ip, id): - init = ip - - channel_dim = 1 if K.image_data_format() == 'channels_first' else -1 - - x = UpSampling2D()(init) - x = Convolution2D(self.n, (3, 3), activation="relu", - padding='same', name='sr_res_filter1_%d' % id)(x) - - return x - - def fit(self, batch_size=128, nb_epochs=100, save_history=True, history_fn="Non Local ResNetSR History.txt"): - super(NonLocalResNetSR, self).fit( - batch_size, nb_epochs, save_history, history_fn) -from __future__ import print_function, division - -from keras.utils.vis_utils import plot_model -import models -import img_utils - -if __name__ == "__main__": - path = r"headline_carspeed.jpg" - val_path = "val_images/" - - scale = 2 - - """ - Plot the models - """ - - # model = models.ImageSuperResolutionModel(scale).create_model() - # plot_model(model, to_file="architectures/SRCNN.png", show_shapes=True, show_layer_names=True) - - # model = models.ExpantionSuperResolution(scale).create_model() - # plot_model(model, to_file="architectures/ESRCNN.png", show_layer_names=True, show_shapes=True) - - # model = models.DenoisingAutoEncoderSR(scale).create_model() - # plot_model(model, to_file="architectures/Denoise.png", show_layer_names=True, show_shapes=True) - - # model = models.DeepDenoiseSR(scale).create_model() - # plot_model(model, to_file="architectures/Deep Denoise.png", show_layer_names=True, show_shapes=True) - - # model = models.ResNetSR(scale).create_model() - # plot_model(model, to_file="architectures/ResNet.png", show_layer_names=True, show_shapes=True) - - # model = models.GANImageSuperResolutionModel(scale).create_model(mode='train') - # plot_model(model, to_file='architectures/GAN Image SR.png', show_shapes=True, show_layer_names=True) - - # model = models.DistilledResNetSR(scale).create_model() - # plot_model(model, to_file='architectures/distilled_resnet_sr.png', show_layer_names=True, show_shapes=True) - - # model = models.NonLocalResNetSR(scale).create_model() - # plot_model(model, to_file='architectures/non_local_resnet_sr.png', show_layer_names=True, show_shapes=True) - - """ - Train Super Resolution - """ - - # sr = models.ImageSuperResolutionModel(scale) - # sr.create_model() - # sr.fit(nb_epochs=250) - - """ - Train ExpantionSuperResolution - """ - - # esr = models.ExpantionSuperResolution(scale) - # esr.create_model() - # esr.fit(nb_epochs=250) - - """ - Train DenoisingAutoEncoderSR - """ - - # dsr = models.DenoisingAutoEncoderSR(scale) - # dsr.create_model() - # dsr.fit(nb_epochs=250) - - """ - Train Deep Denoise SR - """ - - # ddsr = models.DeepDenoiseSR(scale) - # ddsr.create_model() - # ddsr.fit(nb_epochs=180) - - """ - Train Res Net SR - """ - - # rnsr = models.ResNetSR(scale) - # rnsr.create_model(load_weights=True) - # rnsr.fit(nb_epochs=50) - - """ - Train ESPCNN SR - """ - - # espcnn = models.EfficientSubPixelConvolutionalSR(scale) - # espcnn.create_model() - # espcnn.fit(nb_epochs=50) - - """ - Train GAN Super Resolution - """ - - # gsr = models.GANImageSuperResolutionModel(scale) - # gsr.create_model(mode='train') - # gsr.fit(nb_pretrain_samples=10000, nb_epochs=10) - - """ - Train Non Local ResNets - """ - - # non_local_rnsr = models.NonLocalResNetSR(scale) - # non_local_rnsr.create_model() - # non_local_rnsr.fit(nb_epochs=50) - - """ - Evaluate Super Resolution on Set5/14 - """ - - # sr = models.ImageSuperResolutionModel(scale) - # sr.evaluate(val_path) - - """ - Evaluate ESRCNN on Set5/14 - """ - - #esr = models.ExpantionSuperResolution(scale) - # esr.evaluate(val_path) - - """ - Evaluate DSRCNN on Set5/14 cannot be performed at the moment. - This is because this model uses Deconvolution networks, whose output shape must be pre determined. - This causes the model to fail to predict different images of different image sizes. - """ - - #dsr = models.DenoisingAutoEncoderSR(scale) - # dsr.evaluate(val_path) - - """ - Evaluate DDSRCNN on Set5/14 - """ - - #ddsr = models.DeepDenoiseSR(scale) - # ddsr.evaluate(val_path) - - """ - Evaluate ResNetSR on Set5/14 - """ - - # rnsr = models.ResNetSR(scale) - # rnsr.create_model(None, None, 3, load_weights=True) - # rnsr.evaluate(val_path) - - """ - Distilled ResNetSR - """ - - # distilled_rnsr = models.DistilledResNetSR(scale) - # distilled_rnsr.create_model(None, None, 3, load_weights=True) - # distilled_rnsr.evaluate(val_path) - - """ - Evaluate ESPCNN SR on Set 5/14 - """ - - # espcnn = models.EfficientSubPixelConvolutionalSR(scale) - # espcnn.evaluate(val_path) - - """ - Evaluate GAN Super Resolution on Set 5/14 - """ - - # gsr = models.GANImageSuperResolutionModel(scale) - # gsr.evaluate(val_path) - - """ - Evaluate Non Local ResNetSR on Set 5/14 - """ - - # non_local_rnsr = models.NonLocalResNetSR(scale) - # non_local_rnsr.evaluate(val_path) - - """ - Compare output images of sr, esr, dsr and ddsr models - """ - - #sr = models.ImageSuperResolutionModel(scale) - #sr.upscale(path, save_intermediate=False, suffix="sr") - - #esr = models.ExpantionSuperResolution(scale) - #esr.upscale(path, save_intermediate=False, suffix="esr") - - #dsr = models.DenoisingAutoEncoderSR(scale) - #dsr.upscale(path, save_intermediate=False, suffix="dsr") - - # ddsr = models.DeepDenoiseSR(scale) - # ddsr.upscale(path, save_intermediate=False, suffix="ddsr") - - # rnsr = models.ResNetSR(scale) - # rnsr.create_model(None, None, 3, load_weights=True) - # rnsr.upscale(path, save_intermediate=False, suffix="rnsr") - - #gansr = models.GANImageSuperResolutionModel(scale) - #gansr.upscale(path, save_intermediate=False, suffix='gansr') -from keras.models import Sequential -from keras.layers import Dense -from keras.layers import Reshape -from keras.layers.core import Activation -from keras.layers.normalization import BatchNormalization -from keras.layers.convolutional import UpSampling2D -from keras.layers.convolutional import Conv2D, MaxPooling2D -from keras.layers.core import Flatten -from keras.optimizers import SGD -from keras.datasets import mnist -import numpy as np -from PIL import Image -import argparse -import math - - -def generator_model(): - model = Sequential() - model.add(Dense(input_dim=100, output_dim=1024)) - model.add(Activation('tanh')) - model.add(Dense(128*7*7)) - model.add(BatchNormalization()) - model.add(Activation('tanh')) - model.add(Reshape((7, 7, 128), input_shape=(128*7*7,))) - model.add(UpSampling2D(size=(2, 2))) - model.add(Conv2D(64, (5, 5), padding='same')) - model.add(Activation('tanh')) - model.add(UpSampling2D(size=(2, 2))) - model.add(Conv2D(1, (5, 5), padding='same')) - model.add(Activation('tanh')) - return model - - -def discriminator_model(): - model = Sequential() - model.add( - Conv2D(64, (5, 5), - padding='same', - input_shape=(28, 28, 1)) - ) - model.add(Activation('tanh')) - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Conv2D(128, (5, 5))) - model.add(Activation('tanh')) - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Flatten()) - model.add(Dense(1024)) - model.add(Activation('tanh')) - model.add(Dense(1)) - model.add(Activation('sigmoid')) - return model - - -def generator_containing_discriminator(g, d): - model = Sequential() - model.add(g) - d.trainable = False - model.add(d) - return model - - -def combine_images(generated_images): - num = generated_images.shape[0] - width = int(math.sqrt(num)) - height = int(math.ceil(float(num)/width)) - shape = generated_images.shape[1:3] - image = np.zeros((height*shape[0], width*shape[1]), - dtype=generated_images.dtype) - for index, img in enumerate(generated_images): - i = int(index/width) - j = index % width - image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \ - img[:, :, 0] - return image - - -def train(BATCH_SIZE): - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = (X_train.astype(np.float32) - 127.5)/127.5 - X_train = X_train[:, :, :, None] - X_test = X_test[:, :, :, None] - # X_train = X_train.reshape((X_train.shape, 1) + X_train.shape[1:]) - d = discriminator_model() - g = generator_model() - d_on_g = generator_containing_discriminator(g, d) - d_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True) - g_optim = SGD(lr=0.0005, momentum=0.9, nesterov=True) - g.compile(loss='binary_crossentropy', optimizer="SGD") - d_on_g.compile(loss='binary_crossentropy', optimizer=g_optim) - d.trainable = True - d.compile(loss='binary_crossentropy', optimizer=d_optim) - for epoch in range(100): - print("Epoch is", epoch) - print("Number of batches", int(X_train.shape[0]/BATCH_SIZE)) - for index in range(int(X_train.shape[0]/BATCH_SIZE)): - noise = np.random.uniform(-1, 1, size=(BATCH_SIZE, 100)) - image_batch = X_train[index*BATCH_SIZE:(index+1)*BATCH_SIZE] - generated_images = g.predict(noise, verbose=0) - if index % 20 == 0: - image = combine_images(generated_images) - image = image*127.5+127.5 - Image.fromarray(image.astype(np.uint8)).save( - str(epoch)+"_"+str(index)+".png") - X = np.concatenate((image_batch, generated_images)) - y = [1] * BATCH_SIZE + [0] * BATCH_SIZE - d_loss = d.train_on_batch(X, y) - print("batch %d d_loss : %f" % (index, d_loss)) - noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100)) - d.trainable = False - g_loss = d_on_g.train_on_batch(noise, [1] * BATCH_SIZE) - d.trainable = True - print("batch %d g_loss : %f" % (index, g_loss)) - if index % 10 == 9: - g.save_weights('generator', True) - d.save_weights('discriminator', True) - - -def generate(BATCH_SIZE, nice=False): - g = generator_model() - g.compile(loss='binary_crossentropy', optimizer="SGD") - g.load_weights('generator') - if nice: - d = discriminator_model() - d.compile(loss='binary_crossentropy', optimizer="SGD") - d.load_weights('discriminator') - noise = np.random.uniform(-1, 1, (BATCH_SIZE*20, 100)) - generated_images = g.predict(noise, verbose=1) - d_pret = d.predict(generated_images, verbose=1) - index = np.arange(0, BATCH_SIZE*20) - index.resize((BATCH_SIZE*20, 1)) - pre_with_index = list(np.append(d_pret, index, axis=1)) - pre_with_index.sort(key=lambda x: x[0], reverse=True) - nice_images = np.zeros( - (BATCH_SIZE,) + generated_images.shape[1:3], dtype=np.float32) - nice_images = nice_images[:, :, :, None] - for i in range(BATCH_SIZE): - idx = int(pre_with_index[i][1]) - nice_images[i, :, :, 0] = generated_images[idx, :, :, 0] - image = combine_images(nice_images) - else: - noise = np.random.uniform(-1, 1, (BATCH_SIZE, 100)) - generated_images = g.predict(noise, verbose=1) - image = combine_images(generated_images) - image = image*127.5+127.5 - Image.fromarray(image.astype(np.uint8)).save( - "generated_image.png") - - -def get_args(): - parser = argparse.ArgumentParser() - parser.add_argument("--mode", type=str) - parser.add_argument("--batch_size", type=int, default=128) - parser.add_argument("--nice", dest="nice", action="store_true") - parser.set_defaults(nice=False) - args = parser.parse_args() - return args - - -if __name__ == "__main__": - args = get_args() - if args.mode == "train": - train(BATCH_SIZE=args.batch_size) - elif args.mode == "generate": - generate(BATCH_SIZE=args.batch_size, nice=args.nice) -from setuptools import setup -from setuptools import find_packages - -setup(name='hyperas', - version='0.4.1', - description='Simple wrapper for hyperopt to do convenient hyperparameter optimization for Keras models', - url='http://github.com/maxpumperla/hyperas', - download_url='https://github.com/maxpumperla/hyperas/tarball/0.4.1', - author='Max Pumperla', - author_email='max.pumperla@googlemail.com', - install_requires=['keras', 'hyperopt', 'entrypoints', - 'jupyter', 'nbformat', 'nbconvert'], - license='MIT', - packages=find_packages(), - zip_safe=False) -from __future__ import print_function -from hyperopt import Trials, STATUS_OK, tpe -from hyperas import optim -from hyperas.distributions import uniform -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation, Flatten -from keras.layers.convolutional import Convolution2D, MaxPooling2D -from keras.optimizers import SGD -from keras.preprocessing.image import ImageDataGenerator -from keras.datasets import cifar10 -from keras.utils import np_utils - - -def data(): - nb_classes = 10 - # the data, shuffled and split between train and test sets - (X_train, y_train), (X_test, y_test) = cifar10.load_data() - print('X_train shape:', X_train.shape) - print(X_train.shape[0], 'train samples') - print(X_test.shape[0], 'test samples') - - # convert class vectors to binary class matrices - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 - - # this will do preprocessing and realtime data augmentation - datagen = ImageDataGenerator( - featurewise_center=False, # set input mean to 0 over the dataset - samplewise_center=False, # set each sample mean to 0 - featurewise_std_normalization=False, # divide inputs by std of the dataset - samplewise_std_normalization=False, # divide each input by its std - zca_whitening=False, # apply ZCA whitening - # randomly rotate images in the range (degrees, 0 to 180) - rotation_range=0, - # randomly shift images horizontally (fraction of total width) - width_shift_range=0.1, - # randomly shift images vertically (fraction of total height) - height_shift_range=0.1, - horizontal_flip=True, # randomly flip images - vertical_flip=False) # randomly flip images - - # compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied) - datagen.fit(X_train) - - return datagen, X_train, Y_train, X_test, Y_test - - -def model(datagen, X_train, Y_train, X_test, Y_test): - batch_size = 32 - nb_epoch = 200 - - # input image dimensions - img_rows, img_cols = 32, 32 - # the CIFAR10 images are RGB - img_channels = 3 - - model = Sequential() - - model.add(Convolution2D(32, 3, 3, border_mode='same', - input_shape=X_train.shape[1:])) - model.add(Activation('relu')) - model.add(Convolution2D(32, 3, 3)) - model.add(Activation('relu')) - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Dropout({{uniform(0, 1)}})) - - model.add(Convolution2D(64, 3, 3, border_mode='same')) - model.add(Activation('relu')) - model.add(Convolution2D(64, 3, 3)) - model.add(Activation('relu')) - model.add(MaxPooling2D(pool_size=(2, 2))) - model.add(Dropout({{uniform(0, 1)}})) - - model.add(Flatten()) - model.add(Dense(512)) - model.add(Activation('relu')) - model.add(Dropout(0.5)) - model.add(Dense(nb_classes)) - model.add(Activation('softmax')) - - # let's train the model using SGD + momentum (how original). - sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(loss='categorical_crossentropy', - optimizer=sgd, - metrics=['accuracy']) - - # fit the model on the batches generated by datagen.flow() - model.fit_generator(datagen.flow(X_train, Y_train, - batch_size=batch_size), - samples_per_epoch=X_train.shape[0], - nb_epoch=nb_epoch, - validation_data=(X_test, Y_test)) - - score, acc = model.evaluate(X_test, Y_test, verbose=0) - - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -if __name__ == '__main__': - - datagen, X_train, Y_train, X_test, Y_test = data() - - best_run, best_model = optim.minimize(model=model, - data=data, - algo=tpe.suggest, - max_evals=5, - trials=Trials()) - - print("Evalutation of best performing model:") - print(best_model.evaluate(X_test, Y_test)) -from __future__ import print_function -from hyperopt import Trials, STATUS_OK, rand -from hyperas import optim -from hyperas.distributions import uniform, choice -import numpy as np -from keras.preprocessing import sequence -from keras.datasets import imdb -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.layers.embeddings import Embedding -from keras.layers.recurrent import LSTM -from keras.layers.convolutional import Convolution1D, MaxPooling1D - - -def data(): - np.random.seed(1337) # for reproducibility - max_features = 20000 - maxlen = 100 - - (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) - X_train = sequence.pad_sequences(X_train, maxlen=maxlen) - X_test = sequence.pad_sequences(X_test, maxlen=maxlen) - - return X_train, X_test, y_train, y_test, maxlen, max_features - - -def model(X_train, X_test, y_train, y_test, maxlen, max_features): - embedding_size = 300 - pool_length = 4 - lstm_output_size = 100 - batch_size = 200 - nb_epoch = 1 - - model = Sequential() - model.add(Embedding(max_features, embedding_size, input_length=maxlen)) - model.add(Dropout({{uniform(0, 1)}})) - # Note that we use unnamed parameters here, which is bad style, but is used here - # to demonstrate that it works. Always prefer named parameters. - model.add(Convolution1D({{choice([64, 128])}}, - {{choice([6, 8])}}, - border_mode='valid', - activation='relu', - subsample_length=1)) - model.add(MaxPooling1D(pool_length=pool_length)) - model.add(LSTM(lstm_output_size)) - model.add(Dense(1)) - model.add(Activation('sigmoid')) - - model.compile(loss='binary_crossentropy', - optimizer='adam', - metrics=['accuracy']) - - print('Train...') - model.fit(X_train, y_train, batch_size=batch_size, nb_epoch=nb_epoch, - validation_data=(X_test, y_test)) - score, acc = model.evaluate(X_test, y_test, batch_size=batch_size) - - print('Test score:', score) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -if __name__ == '__main__': - best_run, best_model = optim.minimize(model=model, - data=data, - algo=rand.suggest, - max_evals=5, - trials=Trials()) - print(best_run) -from __future__ import print_function -from hyperopt import Trials, STATUS_OK, tpe -from hyperas import optim -from hyperas.distributions import choice, uniform -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.datasets import mnist -from keras.utils import np_utils - - -def data(): - ''' - Data providing function: - - This function is separated from model() so that hyperopt - won't reload data for each evaluation run. - ''' - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 - nb_classes = 10 - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, Y_train, X_test, Y_test - - -def model(X_train, Y_train, X_test, Y_test): - ''' - Model providing function: - - Create Keras model with double curly brackets dropped-in as needed. - Return value has to be a valid python dictionary with two customary keys: - - loss: Specify a numeric evaluation metric to be minimized - - status: Just use STATUS_OK and see hyperopt documentation if not feasible - The last one is optional, though recommended, namely: - - model: specify the model just created so that we can later use it again. - ''' - model = Sequential() - model.add(Dense(512, input_shape=(784,))) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense({{choice([256, 512, 1024])}})) - model.add(Activation({{choice(['relu', 'sigmoid'])}})) - model.add(Dropout({{uniform(0, 1)}})) - - # If we choose 'four', add an additional fourth layer - if {{choice(['three', 'four'])}} == 'four': - model.add(Dense(100)) - model.add({{choice([Dropout(0.5), Activation('linear')])}}) - model.add(Activation('relu')) - - model.add(Dense(10)) - model.add(Activation('softmax')) - - model.compile(loss='categorical_crossentropy', - optimizer={{choice(['rmsprop', 'adam', 'sgd'])}}, - metrics=['accuracy']) - - model.fit(X_train, Y_train, - batch_size={{choice([64, 128])}}, - nb_epoch=1, - verbose=2, - validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, verbose=0) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -if __name__ == '__main__': - trials = Trials() - best_run, best_model = optim.minimize(model=model, - data=data, - algo=tpe.suggest, - max_evals=5, - trials=trials) - for trial in trials: - print(trial) - X_train, Y_train, X_test, Y_test = data() - print("Evalutation of best performing model:") - print(best_model.evaluate(X_test, Y_test)) -import numpy -import random -from keras.datasets import mnist -from keras.models import Model -from keras.layers import Input, Flatten, Dense, Dropout, Lambda -from keras.optimizers import RMSprop -from keras import backend as K - -from hyperopt import Trials, STATUS_OK, tpe -from hyperas import optim -from hyperas.distributions import choice, uniform - - -def euclidean_distance(vects): - x, y = vects - return K.sqrt(K.maximum(K.sum(K.square(x - y), axis=1, keepdims=True), K.epsilon())) - - -def eucl_dist_output_shape(shapes): - shape1, shape2 = shapes - return (shape1[0], 1) - - -def create_pairs(x, digit_indices): - num_classes = 10 - pairs = [] - labels = [] - n = min([len(digit_indices[d]) for d in range(num_classes)]) - 1 - for d in range(num_classes): - for i in range(n): - z1, z2 = digit_indices[d][i], digit_indices[d][i + 1] - pairs += [[x[z1], x[z2]]] - inc = random.randrange(1, num_classes) - dn = (d + inc) % num_classes - z1, z2 = digit_indices[d][i], digit_indices[dn][i] - pairs += [[x[z1], x[z2]]] - labels += [1, 0] - return numpy.array(pairs), numpy.array(labels) - - -def create_base_network(input_shape, dense_filter1, dense_filter2, dense_filter3, dropout1, dropout2): - input = Input(shape=input_shape) - x = Flatten()(input) - x = Dense(dense_filter1, activation='relu')(x) - x = Dropout(dropout1)(x) - x = Dense(dense_filter2, activation='relu')(x) - x = Dropout(dropout2)(x) - x = Dense(dense_filter3, activation='relu')(x) - return Model(input, x) - - -def compute_accuracy(y_true, y_pred): - pred = y_pred.ravel() < 0.5 - return numpy.mean(pred == y_true) - - -def accuracy(y_true, y_pred): - return K.mean(K.equal(y_true, K.cast(y_pred < 0.5, y_true.dtype))) - - -def process_data(): - num_classes = 10 - (x_train, y_train), (x_test, y_test) = mnist.load_data() - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train /= 255 - x_test /= 255 - input_shape = x_train.shape[1:] - - # create training+test positive and negative pairs - digit_indices = [numpy.where(y_train == i)[0] for i in range(num_classes)] - tr_pairs, tr_y = create_pairs(x_train, digit_indices) - - digit_indices = [numpy.where(y_test == i)[0] for i in range(num_classes)] - te_pairs, te_y = create_pairs(x_test, digit_indices) - return tr_pairs, tr_y, te_pairs, te_y, input_shape - - -def data(): - tr_pairs, tr_y, te_pairs, te_y, input_shape = process_data() - return tr_pairs, tr_y, te_pairs, te_y, input_shape - - -def contrastive_loss(y_true, y_pred): - margin = 1 - return K.mean(y_true * K.square(y_pred) + - (1 - y_true) * K.square(K.maximum(margin - y_pred, 0))) - - -def create_model(tr_pairs, tr_y, te_pairs, te_y, input_shape): - epochs = 20 - dropout1 = {{uniform(0, 1)}} - dropout2 = {{uniform(0, 1)}} - dense_filter1 = {{choice([64, 128, 256])}} - dense_filter2 = {{choice([64, 128, 256])}} - dense_filter3 = {{choice([64, 128, 256])}} - # network definition - base_network = create_base_network( - input_shape, dense_filter1, dense_filter2, dense_filter3, dropout1, dropout2) - - input_a = Input(shape=input_shape) - input_b = Input(shape=input_shape) - - processed_a = base_network(input_a) - processed_b = base_network(input_b) - - distance = Lambda(euclidean_distance, - output_shape=eucl_dist_output_shape)([processed_a, processed_b]) - - model = Model([input_a, input_b], distance) - - rms = RMSprop() - model.compile(loss=contrastive_loss, optimizer=rms, metrics=[accuracy]) - model.fit([tr_pairs[:, 0], tr_pairs[:, 1]], tr_y, - batch_size=128, - epochs=epochs, - verbose=1, - validation_data=([te_pairs[:, 0], te_pairs[:, 1]], te_y)) - - y_pred = model.predict([tr_pairs[:, 0], tr_pairs[:, 1]]) - tr_acc = compute_accuracy(tr_y, y_pred) - y_pred = model.predict([te_pairs[:, 0], te_pairs[:, 1]]) - te_acc = compute_accuracy(te_y, y_pred) - print('* Accuracy on training set: %0.2f%%' % (100 * tr_acc)) - print('* Accuracy on test set: %0.2f%%' % (100 * te_acc)) - - return {'loss': -te_acc, 'status': STATUS_OK, 'model': model} - - -if __name__ == '__main__': - - tr_pairs, tr_y, te_pairs, te_y, input_shape = data() - - best_run, best_model = optim.minimize(model=create_model, data=data, - functions=[process_data, create_base_network, euclidean_distance, contrastive_loss, - eucl_dist_output_shape, create_pairs, accuracy, compute_accuracy], - algo=tpe.suggest, max_evals=100, trials=Trials()) - print("best model", best_model) - print("best run", best_run) - print("Evalutation of best performing model:") - loss, te_acc = best_model.evaluate([te_pairs[:, 0], te_pairs[:, 1]], te_y) - print("best prediction accuracy on test data %0.2f%%" % (100 * te_acc)) -from __future__ import print_function -from hyperopt import Trials, STATUS_OK, tpe -from hyperas import optim -from hyperas.distributions import choice, uniform -from keras.preprocessing import sequence -from keras.datasets import imdb -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.layers.embeddings import Embedding -from keras.layers.recurrent import LSTM -from keras.callbacks import EarlyStopping, ModelCheckpoint - - -def data(): - maxlen = 100 - max_features = 20000 - - print('Loading data...') - (X_train, y_train), (X_test, y_test) = imdb.load_data(nb_words=max_features) - print(len(X_train), 'train sequences') - print(len(X_test), 'test sequences') - - print("Pad sequences (samples x time)") - X_train = sequence.pad_sequences(X_train, maxlen=maxlen) - X_test = sequence.pad_sequences(X_test, maxlen=maxlen) - print('X_train shape:', X_train.shape) - print('X_test shape:', X_test.shape) - - return X_train, X_test, y_train, y_test, max_features, maxlen - - -def model(X_train, X_test, y_train, y_test, max_features, maxlen): - model = Sequential() - model.add(Embedding(max_features, 128, input_length=maxlen)) - model.add(LSTM(128)) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense(1)) - model.add(Activation('sigmoid')) - - model.compile(loss='binary_crossentropy', - optimizer='adam', - metrics=['accuracy']) - - early_stopping = EarlyStopping(monitor='val_loss', patience=4) - checkpointer = ModelCheckpoint(filepath='keras_weights.hdf5', - verbose=1, - save_best_only=True) - - model.fit(X_train, y_train, - batch_size={{choice([32, 64, 128])}}, - nb_epoch=1, - validation_split=0.08, - callbacks=[early_stopping, checkpointer]) - - score, acc = model.evaluate(X_test, y_test, verbose=0) - - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -if __name__ == '__main__': - best_run, best_model = optim.minimize(model=model, - data=data, - algo=tpe.suggest, - max_evals=10, - trials=Trials()) - print(best_run) -from hyperas import optim -from hyperas.distributions import quniform, uniform -from hyperopt import STATUS_OK, tpe, mongoexp -import keras -from keras.layers import Dense, Dropout -from keras.models import Sequential -from keras.optimizers import RMSprop -from keras.datasets import mnist -import tempfile - - -def data(): - (x_train, y_train), (x_test, y_test) = mnist.load_data() - x_train = x_train.reshape(60000, 784) - x_test = x_test.reshape(10000, 784) - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train /= 255 - x_test /= 255 - num_classes = 10 - y_train = keras.utils.to_categorical(y_train, num_classes) - y_test = keras.utils.to_categorical(y_test, num_classes) - return x_train, y_train, x_test, y_test - - -def create_model(x_train, y_train, x_test, y_test): - """ - Create your model... - """ - layer_1_size = {{quniform(12, 256, 4)}} - l1_dropout = {{uniform(0.001, 0.7)}} - params = { - 'l1_size': layer_1_size, - 'l1_dropout': l1_dropout - } - num_classes = 10 - model = Sequential() - model.add(Dense(int(layer_1_size), activation='relu')) - model.add(Dropout(l1_dropout)) - model.add(Dense(num_classes, activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=RMSprop(), - metrics=['accuracy']) - model.fit(x_train, y_train, batch_size=128, - epochs=10, validation_data=(x_test, y_test)) - score, acc = model.evaluate(x_test, y_test, verbose=0) - out = { - 'loss': -acc, - 'score': score, - 'status': STATUS_OK, - 'model_params': params, - } - # optionally store a dump of your model here so you can get it from the database later - temp_name = tempfile.gettempdir()+'/'+next(tempfile._get_candidate_names()) + '.h5' - model.save(temp_name) - with open(temp_name, 'rb') as infile: - model_bytes = infile.read() - out['model_serial'] = model_bytes - return out - - -if __name__ == "__main__": - trials = mongoexp.MongoTrials( - 'mongo://username:pass@mongodb.host:27017/jobs/jobs', exp_key='mnist_test') - best_run, best_model = optim.minimize(model=create_model, - data=data, - algo=tpe.suggest, - max_evals=10, - trials=trials, - keep_temp=True) # this last bit is important - print("Best performing model chosen hyper-parameters:") - print(best_run) -from __future__ import print_function -from hyperopt import Trials, STATUS_OK, rand -from hyperas import optim -from hyperas.distributions import choice, uniform -from sklearn.metrics import accuracy_score -from keras.utils import np_utils -from keras.datasets import mnist -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import RMSprop - - -def data(): - nb_classes = 10 - (X_train, y_train), (X_test, y_test) = mnist.load_data() - - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, X_test, Y_train, Y_test - - -def model(X_train, X_test, Y_train, Y_test): - model = Sequential() - model.add(Dense(512, input_shape=(784,))) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense({{choice([400, 512, 600])}})) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense(10)) - model.add(Activation('softmax')) - - rms = RMSprop() - model.compile(loss='categorical_crossentropy', - optimizer=rms, metrics=['accuracy']) - - nb_epoch = 10 - batch_size = 128 - - model.fit(X_train, Y_train, - batch_size=batch_size, nb_epoch=nb_epoch, - verbose=2, - validation_data=(X_test, Y_test)) - - score, acc = model.evaluate(X_test, Y_test, verbose=0) - - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -if __name__ == '__main__': - - X_train, X_test, Y_train, Y_test = data() - - ''' - Generate ensemble model from optimization run: - First, run hyperas optimization on specified setup, i.e. 10 trials with TPE, - then return the best 5 models and create a majority voting model from it. - ''' - ensemble_model = optim.best_ensemble(nb_ensemble_models=5, - model=model, data=data, - algo=rand.suggest, max_evals=10, - trials=Trials(), - voting='hard') - preds = ensemble_model.predict(X_test) - y_test = np_utils.categorical_probas_to_classes(Y_test) - print(accuracy_score(preds, y_test)) -from __future__ import print_function - -from hyperopt import Trials, STATUS_OK, tpe -from keras.datasets import mnist -from keras.layers.core import Dense, Dropout, Activation -from keras.models import Sequential -from keras.utils import np_utils - -from hyperas import optim -from hyperas.distributions import choice, uniform - - -def data(): - """ - Data providing function: - - This function is separated from model() so that hyperopt - won't reload data for each evaluation run. - """ - (x_train, y_train), (x_test, y_test) = mnist.load_data() - x_train = x_train.reshape(60000, 784) - x_test = x_test.reshape(10000, 784) - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train /= 255 - x_test /= 255 - nb_classes = 10 - y_train = np_utils.to_categorical(y_train, nb_classes) - y_test = np_utils.to_categorical(y_test, nb_classes) - return x_train, y_train, x_test, y_test - - -def model(x_train, y_train, x_test, y_test): - """ - Model providing function: - - Create Keras model with double curly brackets dropped-in as needed. - Return value has to be a valid python dictionary with two customary keys: - - loss: Specify a numeric evaluation metric to be minimized - - status: Just use STATUS_OK and see hyperopt documentation if not feasible - The last one is optional, though recommended, namely: - - model: specify the model just created so that we can later use it again. - """ - model = Sequential() - model.add(Dense(512, input_shape=(784,))) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense({{choice([256, 512, 1024])}})) - model.add(Activation({{choice(['relu', 'sigmoid'])}})) - model.add(Dropout({{uniform(0, 1)}})) - - # If we choose 'four', add an additional fourth layer - if {{choice(['three', 'four'])}} == 'four': - model.add(Dense(100)) - - # We can also choose between complete sets of layers - - model.add({{choice([Dropout(0.5), Activation('linear')])}}) - model.add(Activation('relu')) - - model.add(Dense(10)) - model.add(Activation('softmax')) - - model.compile(loss='categorical_crossentropy', metrics=['accuracy'], - optimizer={{choice(['rmsprop', 'adam', 'sgd'])}}) - - model.fit(x_train, y_train, - batch_size={{choice([64, 128])}}, - epochs=1, - verbose=2, - validation_data=(x_test, y_test)) - score, acc = model.evaluate(x_test, y_test, verbose=0) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -if __name__ == '__main__': - best_run, best_model = optim.minimize(model=model, - data=data, - algo=tpe.suggest, - max_evals=5, - trials=Trials()) - X_train, Y_train, X_test, Y_test = data() - print("Evalutation of best performing model:") - print(best_model.evaluate(X_test, Y_test)) - print("Best performing model chosen hyper-parameters:") - print(best_run) -from __future__ import print_function -from hyperopt import Trials, STATUS_OK, tpe -from hyperas import optim -from hyperas.distributions import choice, uniform - -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import RMSprop - -from keras.datasets import mnist -from keras.utils import np_utils - - -def data(): - ''' - Data providing function: - - This function is separated from model() so that hyperopt - won't reload data for each evaluation run. - ''' - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 - nb_classes = 10 - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, Y_train, X_test, Y_test - - -def model(X_train, Y_train, X_test, Y_test): - ''' - Model providing function: - - Create Keras model with double curly brackets dropped-in as needed. - Return value has to be a valid python dictionary with two customary keys: - - loss: Specify a numeric evaluation metric to be minimized - - status: Just use STATUS_OK and see hyperopt documentation if not feasible - The last one is optional, though recommended, namely: - - model: specify the model just created so that we can later use it again. - ''' - model = Sequential() - model.add(Dense(512, input_shape=(784,))) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense({{choice([256, 512, 1024])}})) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense(10)) - model.add(Activation('softmax')) - - rms = RMSprop() - model.compile(loss='categorical_crossentropy', - optimizer=rms, metrics=['accuracy']) - - model.fit(X_train, Y_train, - batch_size={{choice([64, 128])}}, - nb_epoch=1, - verbose=2, - validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, verbose=0) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -if __name__ == '__main__': - - X_train, Y_train, X_test, Y_test = data() - - best_run, best_model = optim.minimize(model=model, - data=data, - algo=tpe.suggest, - max_evals=5, - trials=Trials()) - - print("Evalutation of best performing model:") - print(best_model.evaluate(X_test, Y_test)) -from __future__ import print_function -from hyperopt import Trials, STATUS_OK, tpe -from hyperas import optim -from hyperas.distributions import choice, uniform - -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import RMSprop - -from keras.datasets import mnist -from keras.utils import np_utils - -import matplotlib.pyplot as plt - - -def visualization_mnist(x_data, n=10): - plt.figure(figsize=(20, 4)) - for i in range(n): - # display digit - ax = plt.subplot(1, n, i+1) - plt.imshow(x_data[i].reshape(28, 28)) - plt.gray() - ax.get_xaxis().set_visible(False) - ax.get_yaxis().set_visible(False) - plt.show() - - -def data(): - ''' - Data providing function: - - This function is separated from model() so that hyperopt - won't reload data for each evaluation run. - ''' - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - - visualization_mnist(X_test) - - X_train /= 255 - X_test /= 255 - nb_classes = 10 - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, Y_train, X_test, Y_test - - -def model(X_train, Y_train, X_test, Y_test): - ''' - Model providing function: - - Create Keras model with double curly brackets dropped-in as needed. - Return value has to be a valid python dictionary with two customary keys: - - loss: Specify a numeric evaluation metric to be minimized - - status: Just use STATUS_OK and see hyperopt documentation if not feasible - The last one is optional, though recommended, namely: - - model: specify the model just created so that we can later use it again. - ''' - model = Sequential() - model.add(Dense(512, input_shape=(784,))) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense({{choice([256, 512, 1024])}})) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense(10)) - model.add(Activation('softmax')) - - rms = RMSprop() - model.compile(loss='categorical_crossentropy', - optimizer=rms, metrics=['accuracy']) - - model.fit(X_train, Y_train, - batch_size={{choice([64, 128])}}, - nb_epoch=1, - verbose=2, - validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, verbose=0) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -if __name__ == '__main__': - - X_train, Y_train, X_test, Y_test = data() - functions = [visualization_mnist] - best_run, best_model = optim.minimize(model=model, - data=data, - functions=functions, - algo=tpe.suggest, - max_evals=5, - trials=Trials()) - - print("Evalutation of best performing model:") - print(best_model.evaluate(X_test, Y_test)) -from hyperopt.hp import choice -from hyperopt.hp import randint -from hyperopt.hp import pchoice -from hyperopt.hp import uniform -from hyperopt.hp import quniform -from hyperopt.hp import loguniform -from hyperopt.hp import qloguniform -from hyperopt.hp import normal -from hyperopt.hp import qnormal -from hyperopt.hp import lognormal -from hyperopt.hp import qlognormal -import numpy as np -from keras.models import model_from_yaml - - -class VotingModel(object): - - def __init__(self, model_list, voting='hard', - weights=None, nb_classes=None): - """(Weighted) majority vote model for a given list of Keras models. - - Parameters - ---------- - model_list: An iterable of Keras models. - voting: Choose 'hard' for straight-up majority vote of highest model probilities or 'soft' - for a weighted majority vote. In the latter, a weight vector has to be specified. - weights: Weight vector (numpy array) used for soft majority vote. - nb_classes: Number of classes being predicted. - - Returns - ------- - A voting model that has a predict method with the same signature of a single keras model. - """ - self.model_list = model_list - self.voting = voting - self.weights = weights - self.nb_classes = nb_classes - - if voting not in ['hard', 'soft']: - raise 'Voting has to be either hard or soft' - - if weights is not None: - if len(weights) != len(model_list): - raise ('Number of models {0} and length of weight vector {1} has to match.' - .format(len(weights), len(model_list))) - - def predict(self, X, batch_size=128, verbose=0): - predictions = list(map(lambda model: model.predict( - X, batch_size, verbose), self.model_list)) - nb_preds = len(X) - - if self.voting == 'hard': - for i, pred in enumerate(predictions): - pred = list(map( - lambda probas: np.argmax(probas, axis=-1), pred - )) - predictions[i] = np.asarray(pred).reshape(nb_preds, 1) - argmax_list = list(np.concatenate(predictions, axis=1)) - votes = np.asarray(list( - map(lambda arr: max(set(arr)), argmax_list) - )) - if self.voting == 'soft': - for i, pred in enumerate(predictions): - pred = list(map(lambda probas: probas * self.weights[i], pred)) - predictions[i] = np.asarray(pred).reshape( - nb_preds, self.nb_classes, 1) - weighted_preds = np.concatenate(predictions, axis=2) - weighted_avg = np.mean(weighted_preds, axis=2) - votes = np.argmax(weighted_avg, axis=1) - - return votes - - -def voting_model_from_yaml(yaml_list, voting='hard', weights=None): - model_list = map(lambda yml: model_from_yaml(yml), yaml_list) - return VotingModel(model_list, voting, weights) -import inspect -import os -import re -import sys - -import nbformat -import numpy as np -from hyperopt import fmin -from nbconvert import PythonExporter - -from .ensemble import VotingModel -from .utils import ( - remove_imports, remove_all_comments, extract_imports, temp_string, - write_temp_files, determine_indent, with_line_numbers, unpack_hyperopt_vals, - eval_hyperopt_space, find_signature_end) - -sys.path.append(".") - - -def minimize(model, - data, - algo, - max_evals, - trials, - functions=None, - rseed=1337, - notebook_name=None, - verbose=True, - eval_space=False, - return_space=False, - keep_temp=False): - """ - Minimize a keras model for given data and implicit hyperparameters. - - Parameters - ---------- - model: A function defining a keras model with hyperas templates, which returns a - valid hyperopt results dictionary, e.g. - return {'loss': -acc, 'status': STATUS_OK} - data: A parameter-less function that defines and return all data needed in the above - model definition. - algo: A hyperopt algorithm, like tpe.suggest or rand.suggest - max_evals: Maximum number of optimization runs - trials: A hyperopt trials object, used to store intermediate results for all - optimization runs - rseed: Integer random seed for experiments - notebook_name: If running from an ipython notebook, provide filename (not path) - verbose: Print verbose output - eval_space: Evaluate the best run in the search space such that 'choice's contain actually meaningful values instead of mere indices - return_space: Return the hyperopt search space object (e.g. for further processing) as last return value - keep_temp: Keep temp_model.py file on the filesystem - - Returns - ------- - If `return_space` is False: A pair consisting of the results dictionary of the best run and the corresponding - keras model. - If `return_space` is True: The pair of best result and corresponding keras model, and the hyperopt search space - """ - best_run, space = base_minimizer(model=model, - data=data, - functions=functions, - algo=algo, - max_evals=max_evals, - trials=trials, - rseed=rseed, - full_model_string=None, - notebook_name=notebook_name, - verbose=verbose, - keep_temp=keep_temp) - - best_model = None - for trial in trials: - vals = trial.get('misc').get('vals') - # unpack the values from lists without overwriting the mutable dict within 'trial' - unpacked_vals = unpack_hyperopt_vals(vals) - # identify the best_run (comes with unpacked values from the hyperopt function `base.Trials.argmin`) - if unpacked_vals == best_run and 'model' in trial.get('result').keys(): - best_model = trial.get('result').get('model') - - if eval_space is True: - # evaluate the search space - best_run = eval_hyperopt_space(space, best_run) - - if return_space is True: - # return the space as well - return best_run, best_model, space - else: - # the default case for backwards compatibility with expanded return arguments - return best_run, best_model - - -def base_minimizer(model, data, functions, algo, max_evals, trials, - rseed=1337, full_model_string=None, notebook_name=None, - verbose=True, stack=3, keep_temp=False): - if full_model_string is not None: - model_str = full_model_string - else: - model_str = get_hyperopt_model_string( - model, data, functions, notebook_name, verbose, stack) - temp_file = './temp_model.py' - write_temp_files(model_str, temp_file) - - if 'temp_model' in sys.modules: - del sys.modules["temp_model"] - - try: - from temp_model import keras_fmin_fnct, get_space - except: - print("Unexpected error: {}".format(sys.exc_info()[0])) - raise - try: - if not keep_temp: - os.remove(temp_file) - os.remove(temp_file + 'c') - except OSError: - pass - - try: - # for backward compatibility. - return ( - fmin(keras_fmin_fnct, - space=get_space(), - algo=algo, - max_evals=max_evals, - trials=trials, - rseed=rseed, - return_argmin=True), - get_space() - ) - except TypeError: - pass - - return ( - fmin(keras_fmin_fnct, - space=get_space(), - algo=algo, - max_evals=max_evals, - trials=trials, - rstate=np.random.RandomState(rseed), - return_argmin=True), - get_space() - ) - - -def best_ensemble(nb_ensemble_models, model, data, algo, max_evals, - trials, voting='hard', weights=None, nb_classes=None, functions=None): - model_list = best_models(nb_models=nb_ensemble_models, - model=model, - data=data, - algo=algo, - max_evals=max_evals, - trials=trials, - functions=functions) - return VotingModel(model_list, voting, weights, nb_classes) - - -def best_models(nb_models, model, data, algo, max_evals, trials, functions=None, keep_temp=False): - base_minimizer(model=model, - data=data, - functions=functions, - algo=algo, - max_evals=max_evals, - trials=trials, - stack=4, - keep_temp=keep_temp) - if len(trials) < nb_models: - nb_models = len(trials) - scores = [trial.get('result').get('loss') for trial in trials] - cut_off = sorted(scores, reverse=True)[nb_models - 1] - model_list = [trial.get('result').get( - 'model') for trial in trials if trial.get('result').get('loss') >= cut_off] - return model_list - - -def get_hyperopt_model_string(model, data, functions, notebook_name, verbose, stack): - model_string = inspect.getsource(model) - model_string = remove_imports(model_string) - - if notebook_name: - notebook_path = os.getcwd() + "/{}.ipynb".format(notebook_name) - with open(notebook_path, 'r') as f: - notebook = nbformat.reads(f.read(), nbformat.NO_CONVERT) - exporter = PythonExporter() - source, _ = exporter.from_notebook_node(notebook) - else: - calling_script_file = os.path.abspath(inspect.stack()[stack][1]) - with open(calling_script_file, 'r') as f: - source = f.read() - - cleaned_source = remove_all_comments(source) - imports = extract_imports(cleaned_source, verbose) - - parts = hyperparameter_names(model_string) - aug_parts = augmented_names(parts) - - hyperopt_params = get_hyperparameters(model_string) - space = get_hyperopt_space(parts, hyperopt_params, verbose) - - functions_string = retrieve_function_string(functions, verbose) - data_string = retrieve_data_string(data, verbose) - model = hyperopt_keras_model(model_string, parts, aug_parts, verbose) - - temp_str = temp_string(imports, model, data_string, - functions_string, space) - return temp_str - - -def get_hyperopt_space(parts, hyperopt_params, verbose=True): - space = "def get_space():\n return {\n" - for name, param in zip(parts, hyperopt_params): - param = re.sub(r"\(", "('" + name + "', ", param, 1) - space += " '" + name + "': hp." + param + ",\n" - space = space[:-1] - space += "\n }\n" - if verbose: - print('>>> Hyperas search space:\n') - print(space) - return space - - -def retrieve_data_string(data, verbose=True): - data_string = inspect.getsource(data) - first_line = data_string.split("\n")[0] - indent_length = len(determine_indent(data_string)) - data_string = data_string.replace(first_line, "") - r = re.compile(r'^\s*return.*') - last_line = [s for s in reversed(data_string.split("\n")) if r.match(s)][0] - data_string = data_string.replace(last_line, "") - - split_data = data_string.split("\n") - for i, line in enumerate(split_data): - split_data[i] = line[indent_length:] + "\n" - data_string = ''.join(split_data) - if verbose: - print(">>> Data") - print(with_line_numbers(data_string)) - return data_string - - -def retrieve_function_string(functions, verbose=True): - function_strings = '' - if functions is None: - return function_strings - for function in functions: - function_string = inspect.getsource(function) - function_strings = function_strings + function_string + '\n' - if verbose: - print(">>> Functions") - print(with_line_numbers(function_strings)) - return function_strings - - -def hyperparameter_names(model_string): - parts = [] - params = re.findall(r"(\{\{[^}]+}\})", model_string) - for param in params: - name = re.findall( - r"(\w+(?=\s*[\=\(]\s*" + re.escape(param) + r"))", model_string) - if len(name) > 0: - parts.append(name[0]) - else: - parts.append(parts[-1]) - part_dict = {} - for i, part in enumerate(parts): - if part in part_dict.keys(): - part_dict[part] += 1 - parts[i] = part + "_" + str(part_dict[part]) - else: - part_dict[part] = 0 - return parts - - -def get_hyperparameters(model_string): - hyperopt_params = re.findall(r"(\{\{[^}]+}\})", model_string) - for i, param in enumerate(hyperopt_params): - hyperopt_params[i] = re.sub(r"[\{\}]", '', param) - return hyperopt_params - - -def augmented_names(parts): - aug_parts = [] - for i, part in enumerate(parts): - aug_parts.append("space['" + part + "']") - return aug_parts - - -def hyperopt_keras_model(model_string, parts, aug_parts, verbose=True): - colon_index = find_signature_end(model_string) - func_sign_line_end = model_string.count("\n", 0, colon_index) + 1 - func_sign_lines = "\n".join(model_string.split("\n")[:func_sign_line_end]) - model_string = model_string.replace( - func_sign_lines, "def keras_fmin_fnct(space):\n") - result = re.sub( - r"(\{\{[^}]+}\})", lambda match: aug_parts.pop(0), model_string, count=len(parts)) - if verbose: - print('>>> Resulting replaced keras model:\n') - print(with_line_numbers(result)) - return result -import ast -import re -import warnings -from operator import attrgetter - -from hyperopt import space_eval - - -class ImportParser(ast.NodeVisitor): - def __init__(self): - self.lines = [] - self.line_numbers = [] - - def visit_Import(self, node): - line = 'import {}'.format(self._import_names(node.names)) - if (self._import_asnames(node.names) != ''): - line += ' as {}'.format(self._import_asnames(node.names)) - self.line_numbers.append(node.lineno) - self.lines.append(line) - - def visit_ImportFrom(self, node): - line = 'from {}{} import {}'.format( - node.level * '.', - node.module or '', - self._import_names(node.names)) - if (self._import_asnames(node.names) != ''): - line += " as {}".format(self._import_asnames(node.names)) - self.line_numbers.append(node.lineno) - self.lines.append(line) - - def _import_names(self, names): - return ', '.join(map(attrgetter('name'), names)) - - def _import_asnames(self, names): - asname = map(attrgetter('asname'), names) - return ''.join(filter(None, asname)) - - -def extract_imports(source, verbose=True): - tree = ast.parse(source) - import_parser = ImportParser() - import_parser.visit(tree) - import_lines = ['#coding=utf-8\n'] - for line in import_parser.lines: - if 'print_function' in line: - import_lines.append(line + '\n') - # skip imports for pycharm and eclipse - elif '_pydev_' in line or 'java.lang' in line: - continue - else: - import_lines.append( - 'try:\n {}\nexcept:\n pass\n'.format(line)) - imports_str = '\n'.join(import_lines) - if verbose: - print('>>> Imports:') - print(imports_str) - return imports_str - - -def remove_imports(source): - tree = ast.parse(source) - import_parser = ImportParser() - import_parser.visit(tree) - # the source including all comments, since we parse the line numbers with comments! - lines = source.split('\n') - lines_to_remove = set(import_parser.line_numbers) - non_import_lines = [line for i, line in enumerate( - lines, start=1) if i not in lines_to_remove] - return '\n'.join(non_import_lines) - - -def remove_all_comments(source): - string = re.sub(re.compile("'''.*?'''", re.DOTALL), "", - source) # remove '''...''' comments - string = re.sub(re.compile( - "(?: for each line - - Example - ------- - code = "def do_stuff(x):\n\tprint(x)\n" - with_line_numbers(code) - - 1: def do_stuff(x): - 2: print(x) - 3: - """ - max_number_length = str(len(str(len(code)))) - format_str = "{:>" + max_number_length + "d}: {:}" - return "\n".join([format_str.format(line_number + 1, line) for line_number, line in enumerate(code.split("\n"))]) - - -def determine_indent(str): - """ - Figure out the character(s) used for indents in a given source code fragement. - - Parameters - ---------- - str : string - source code starting at an indent of 0 and containing at least one indented block. - - Returns - ------- - string - The character(s) used for indenting. - - Example - ------- - code = "def do_stuff(x)\n print(x)\n" - indent = determine_indent(str) - print("The code '", code, "' is indented with \n'", indent, "' (size: ", len(indent), ")") - """ - indent = None - reg = r""" - ^(?P\s*)\S.+?:\n # line starting a block, i. e. ' for i in x:\n' - ((\s*)\n)* # empty lines - (?P=previous_indent)(?P\s+)\S # first indented line of the new block, i. e. ' d'(..oStuff()) - """ - - matches = re.compile(reg, re.MULTILINE | re.VERBOSE).finditer(str) - for block_start in matches: - new_indent = block_start.groupdict()['indent'] - if indent and new_indent != indent: - warnings.warn('Inconsistent indentation detected.' - 'Found "%s" (length: %i) as well as "%s" (length: %i)' % ( - indent, len(indent), new_indent, len(new_indent))) - indent = new_indent - return indent - - -def unpack_hyperopt_vals(vals): - """ - Unpack values from a hyperopt return dictionary where values are wrapped in a list. - :param vals: dict - :return: dict - copy of the dictionary with unpacked values - """ - assert isinstance(vals, dict), "Parameter must be given as dict." - ret = {} - for k, v in list(vals.items()): - try: - ret[k] = v[0] - except (TypeError, IndexError): - ret[k] = v - return ret - - -def eval_hyperopt_space(space, vals): - """ - Evaluate a set of parameter values within the hyperopt space. - Optionally unpacks the values, if they are wrapped in lists. - :param space: dict - the hyperopt space dictionary - :param vals: dict - the values from a hyperopt trial - :return: evaluated space - """ - unpacked_vals = unpack_hyperopt_vals(vals) - return space_eval(space, unpacked_vals) - - -def find_signature_end(model_string): - """ - Find the index of the colon in the function signature. - :param model_string: string - source code of the model - :return: int - the index of the colon - """ - index, brace_depth = 0, 0 - while index < len(model_string): - ch = model_string[index] - if brace_depth == 0 and ch == ':': - break - if ch == '#': # Ignore comments - index += 1 - while index < len(model_string) and model_string[index] != '\n': - index += 1 - index += 1 - elif ch in ['"', "'"]: # Skip strings - string_depth = 0 - while index < len(model_string) and model_string[index] == ch: - string_depth += 1 - index += 1 - if string_depth == 2: - string_depth = 1 - index += string_depth - while index < len(model_string): - if model_string[index] == '\\': - index += 2 - elif model_string[index] == ch: - string_depth -= 1 - if string_depth == 0: - break - index += 1 - else: - index += 1 - index += 1 - elif ch == '(': - brace_depth += 1 - index += 1 - elif ch == ')': - brace_depth -= 1 - index += 1 - else: - index += 1 - return index -from __future__ import print_function -from hyperopt import Trials, STATUS_OK, tpe -from hyperas import optim -from hyperas.distributions import choice, uniform - -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation -from keras.optimizers import RMSprop - -from keras.datasets import mnist -from keras.utils import np_utils -from hyperopt import rand - - -def data(): - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 - nb_classes = 10 - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, Y_train, X_test, Y_test - - -def model(X_train, Y_train, X_test, Y_test): - model = Sequential() - model.add(Dense(50, input_shape=(784,))) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense({{choice([20, 30, 40])}})) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense(10)) - model.add(Activation('softmax')) - - rms = RMSprop() - model.compile(loss='categorical_crossentropy', - optimizer=rms, metrics=['accuracy']) - - model.fit(X_train, Y_train, - batch_size={{choice([64, 128])}}, - epochs=1, - verbose=2, - validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, verbose=0) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -def test_simple(): - X_train, Y_train, X_test, Y_test = data() - trials = Trials() - best_run, best_model = optim.minimize(model=model, - data=data, - algo=tpe.suggest, - max_evals=1, - trials=trials, - verbose=False) - - -def ensemble_data(): - nb_classes = 10 - (X_train, y_train), (X_test, y_test) = mnist.load_data() - - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, X_test, Y_train, Y_test - - -def ensemble_model(X_train, X_test, Y_train, Y_test): - model = Sequential() - model.add(Dense(512, input_shape=(784,))) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense({{choice([400, 512, 600])}})) - model.add(Activation('relu')) - model.add(Dropout({{uniform(0, 1)}})) - model.add(Dense(10)) - model.add(Activation('softmax')) - - rms = RMSprop() - model.compile(loss='categorical_crossentropy', - optimizer=rms, metrics=['accuracy']) - - nb_epoch = 10 - batch_size = 128 - - model.fit(X_train, Y_train, - batch_size=batch_size, nb_epoch=nb_epoch, - verbose=2, - validation_data=(X_test, Y_test)) - - score, acc = model.evaluate(X_test, Y_test, verbose=0) - - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -def test_ensemble(): - X_train, X_test, Y_train, Y_test = data() - optim.best_ensemble(nb_ensemble_models=2, - model=model, - data=data, - algo=rand.suggest, - max_evals=1, - trials=Trials(), - voting='hard') -from __future__ import print_function -from hyperopt import Trials, STATUS_OK, tpe -from hyperas import optim -from hyperas.distributions import choice - -from keras.models import Model -from keras.layers import Dense, Input -from keras.optimizers import RMSprop - -from keras.datasets import mnist -from keras.utils import np_utils - - -def data(): - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 - nb_classes = 10 - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, Y_train, X_test, Y_test - - -def model(X_train, Y_train, X_test, Y_test): - inputs = Input(shape=(784,)) - - x = Dense({{choice([20, 30, 40])}}, activation='relu')(inputs) - x = Dense(64, activation='relu')(x) - predictions = Dense(10, activation='softmax')(x) - model = Model(inputs=inputs, outputs=predictions) - - rms = RMSprop() - model.compile(loss='categorical_crossentropy', - optimizer=rms, metrics=['accuracy']) - - model.fit(X_train, Y_train, - batch_size={{choice([64, 128])}}, - epochs=1, - verbose=2, - validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, verbose=0) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -def model_multi_line_arguments(X_train, Y_train, - X_test, Y_test): - inputs = Input(shape=(784,)) - - x = Dense({{choice([20, 30, 40])}}, activation='relu')(inputs) - x = Dense(64, activation='relu')(x) - predictions = Dense(10, activation='softmax')(x) - model = Model(inputs=inputs, outputs=predictions) - - model.compile(loss='categorical_crossentropy', - optimizer='adam', metrics=['accuracy']) - - model.fit(X_train, Y_train, - batch_size={{choice([64, 128])}}, - epochs=1, - verbose=2, - validation_data=(X_test, Y_test)) - score, acc = model.evaluate(X_test, Y_test, verbose=0) - print('Test accuracy:', acc) - return {'loss': -acc, 'status': STATUS_OK, 'model': model} - - -def test_functional_api(): - X_train, Y_train, X_test, Y_test = data() - best_run, best_model = optim.minimize(model=model, - data=data, - algo=tpe.suggest, - max_evals=1, - trials=Trials(), - verbose=False) - best_run, best_model = optim.minimize(model=model_multi_line_arguments, - data=data, - algo=tpe.suggest, - max_evals=1, - trials=Trials(), - verbose=False) -from __future__ import print_function -from hyperopt import Trials, STATUS_OK, tpe -from hyperas import optim -from hyperas.distributions import choice - -from keras.models import Sequential -from keras.layers import Dense, Activation - -from keras.datasets import mnist -from keras.utils import np_utils -from keras.callbacks import ReduceLROnPlateau, EarlyStopping - - -def data(): - (x_train, y_train), (x_test, y_test) = mnist.load_data() - x_train = x_train.reshape(60000, 784) - x_test = x_test.reshape(10000, 784) - x_train = x_train.astype('float32') - x_test = x_test.astype('float32') - x_train /= 255 - x_test /= 255 - nb_classes = 10 - y_train = np_utils.to_categorical(y_train, nb_classes) - y_test = np_utils.to_categorical(y_test, nb_classes) - return x_train, y_train, x_test, y_test - - -def create_model(x_train, y_train, x_test, y_test): - model = Sequential() - model.add(Dense(44, input_shape=(784,))) - model.add(Activation({{choice(['relu', 'sigmoid'])}})) - model.add(Dense(44)) - model.add(Activation({{choice(['relu', 'sigmoid'])}})) - model.add(Dense(10)) - - model.compile(loss='mae', metrics=['mse'], optimizer="adam") - - es = EarlyStopping(monitor='val_loss', min_delta=1e-5, patience=10) - rlr = ReduceLROnPlateau(factor=0.1, patience=10) - _ = model.fit(x_train, y_train, epochs=1, verbose=0, callbacks=[es, rlr], - batch_size=24, validation_data=(x_test, y_test)) - - mae, mse = model.evaluate(x_test, y_test, verbose=0) - print('MAE:', mae) - return {'loss': mae, 'status': STATUS_OK, 'model': model} - - -def test_advanced_callbacks(): - X_train, Y_train, X_test, Y_test = data() - best_run, best_model = optim.minimize(model=create_model, - data=data, - algo=tpe.suggest, - max_evals=1, - trials=Trials(), - verbose=False) -from keras.datasets import mnist -from keras.utils import np_utils - -from hyperas.optim import retrieve_data_string - - -def test_data(): - (X_train, y_train), (X_test, y_test) = mnist.load_data() - X_train = X_train.reshape(60000, 784) - X_test = X_test.reshape(10000, 784) - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - X_train /= 255 - X_test /= 255 - nb_classes_return = 10 - Y_train = np_utils.to_categorical(y_train, nb_classes_return) - Y_test = np_utils.to_categorical(y_test, nb_classes_return) - return X_train, Y_train, X_test, Y_test - - -def test_data_function(): - result = retrieve_data_string(test_data, verbose=False) - assert 'return X_train, Y_train, X_test, Y_test' not in result - assert 'def data():' not in result - assert 'nb_classes_return = 10' in result - assert '(X_train, y_train), (X_test, y_test) = mnist.load_data()' in result - assert 'Y_test = np_utils.to_categorical(y_test, nb_classes_return)' in result - - -if __name__ == '__main__': - test_data_function() -""" -Some key layers used for constructing a Capsule Network. These layers can used to construct CapsNet on other dataset, -not just on MNIST. -*NOTE*: some functions can be implemented in multiple ways, I keep all of them. You can try them for yourself just by -uncommenting them and commenting their counterparts. - -Author: Xifeng Guo, E-mail: `guoxifeng1990@163.com`, Github: `https://github.com/XifengGuo/CapsNet-Keras` -""" - -import keras.backend as K -import tensorflow as tf -from keras import initializers, layers - - -class Length(layers.Layer): - """ - Compute the length of vectors. This is used to compute a Tensor that has the same shape with y_true in margin_loss. - Using this layer as model's output can directly predict labels by using `y_pred = np.argmax(model.predict(x), 1)` - inputs: shape=[None, num_vectors, dim_vector] - output: shape=[None, num_vectors] - """ - - def call(self, inputs, **kwargs): - return K.sqrt(K.sum(K.square(inputs), -1) + K.epsilon()) - - def compute_output_shape(self, input_shape): - return input_shape[:-1] - - def get_config(self): - config = super(Length, self).get_config() - return config - - -class Mask(layers.Layer): - """ - Mask a Tensor with shape=[None, num_capsule, dim_vector] either by the capsule with max length or by an additional - input mask. Except the max-length capsule (or specified capsule), all vectors are masked to zeros. Then flatten the - masked Tensor. - For example: - ``` - x = keras.layers.Input(shape=[8, 3, 2]) # batch_size=8, each sample contains 3 capsules with dim_vector=2 - y = keras.layers.Input(shape=[8, 3]) # True labels. 8 samples, 3 classes, one-hot coding. - out = Mask()(x) # out.shape=[8, 6] - # or - out2 = Mask()([x, y]) # out2.shape=[8,6]. Masked with true labels y. Of course y can also be manipulated. - ``` - """ - - def call(self, inputs, **kwargs): - # true label is provided with shape = [None, n_classes], i.e. one-hot code. - if type(inputs) is list: - assert len(inputs) == 2 - inputs, mask = inputs - else: # if no true label, mask by the max length of capsules. Mainly used for prediction - # compute lengths of capsules - x = K.sqrt(K.sum(K.square(inputs), -1)) - # generate the mask which is a one-hot code. - # mask.shape=[None, n_classes]=[None, num_capsule] - mask = K.one_hot(indices=K.argmax( - x, 1), num_classes=x.get_shape().as_list()[1]) - - # inputs.shape=[None, num_capsule, dim_capsule] - # mask.shape=[None, num_capsule] - # masked.shape=[None, num_capsule * dim_capsule] - masked = K.batch_flatten(inputs * K.expand_dims(mask, -1)) - return masked - - def compute_output_shape(self, input_shape): - if type(input_shape[0]) is tuple: # true label provided - return tuple([None, input_shape[0][1] * input_shape[0][2]]) - else: # no true label provided - return tuple([None, input_shape[1] * input_shape[2]]) - - def get_config(self): - config = super(Mask, self).get_config() - return config - - -def squash(vectors, axis=-1): - """ - The non-linear activation used in Capsule. It drives the length of a large vector to near 1 and small vector to 0 - :param vectors: some vectors to be squashed, N-dim tensor - :param axis: the axis to squash - :return: a Tensor with same shape as input vectors - """ - s_squared_norm = K.sum(K.square(vectors), axis, keepdims=True) - scale = s_squared_norm / (1 + s_squared_norm) / \ - K.sqrt(s_squared_norm + K.epsilon()) - return scale * vectors - - -class CapsuleLayer(layers.Layer): - """ - The capsule layer. It is similar to Dense layer. Dense layer has `in_num` inputs, each is a scalar, the output of the - neuron from the former layer, and it has `out_num` output neurons. CapsuleLayer just expand the output of the neuron - from scalar to vector. So its input shape = [None, input_num_capsule, input_dim_capsule] and output shape = \ - [None, num_capsule, dim_capsule]. For Dense Layer, input_dim_capsule = dim_capsule = 1. - - :param num_capsule: number of capsules in this layer - :param dim_capsule: dimension of the output vectors of the capsules in this layer - :param routings: number of iterations for the routing algorithm - """ - - def __init__(self, num_capsule, dim_capsule, routings=3, - kernel_initializer='glorot_uniform', - **kwargs): - super(CapsuleLayer, self).__init__(**kwargs) - self.num_capsule = num_capsule - self.dim_capsule = dim_capsule - self.routings = routings - self.kernel_initializer = initializers.get(kernel_initializer) - - def build(self, input_shape): - assert len( - input_shape) >= 3, "The input Tensor should have shape=[None, input_num_capsule, input_dim_capsule]" - self.input_num_capsule = input_shape[1] - self.input_dim_capsule = input_shape[2] - - # Transform matrix - self.W = self.add_weight(shape=[self.num_capsule, self.input_num_capsule, - self.dim_capsule, self.input_dim_capsule], - initializer=self.kernel_initializer, - name='W') - - self.built = True - - def call(self, inputs, training=None): - # inputs.shape=[None, input_num_capsule, input_dim_capsule] - # inputs_expand.shape=[None, 1, input_num_capsule, input_dim_capsule] - inputs_expand = K.expand_dims(inputs, 1) - - # Replicate num_capsule dimension to prepare being multiplied by W - # inputs_tiled.shape=[None, num_capsule, input_num_capsule, input_dim_capsule] - inputs_tiled = K.tile(inputs_expand, [1, self.num_capsule, 1, 1]) - - # Compute `inputs * W` by scanning inputs_tiled on dimension 0. - # x.shape=[num_capsule, input_num_capsule, input_dim_capsule] - # W.shape=[num_capsule, input_num_capsule, dim_capsule, input_dim_capsule] - # Regard the first two dimensions as `batch` dimension, - # then matmul: [input_dim_capsule] x [dim_capsule, input_dim_capsule]^T -> [dim_capsule]. - # inputs_hat.shape = [None, num_capsule, input_num_capsule, dim_capsule] - inputs_hat = K.map_fn(lambda x: K.batch_dot( - x, self.W, [2, 3]), elems=inputs_tiled) - - # Begin: Routing algorithm ---------------------------------------------------------------------# - # The prior for coupling coefficient, initialized as zeros. - # b.shape = [None, self.num_capsule, self.input_num_capsule]. - b = tf.zeros(shape=[K.shape(inputs_hat)[0], - self.num_capsule, self.input_num_capsule]) - - assert self.routings > 0, 'The routings should be > 0.' - for i in range(self.routings): - # c.shape=[batch_size, num_capsule, input_num_capsule] - c = tf.nn.softmax(b, dim=1) - - # c.shape = [batch_size, num_capsule, input_num_capsule] - # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] - # The first two dimensions as `batch` dimension, - # then matmal: [input_num_capsule] x [input_num_capsule, dim_capsule] -> [dim_capsule]. - # outputs.shape=[None, num_capsule, dim_capsule] - # [None, 10, 16] - outputs = squash(K.batch_dot(c, inputs_hat, [2, 2])) - - if i < self.routings - 1: - # outputs.shape = [None, num_capsule, dim_capsule] - # inputs_hat.shape=[None, num_capsule, input_num_capsule, dim_capsule] - # The first two dimensions as `batch` dimension, - # then matmal: [dim_capsule] x [input_num_capsule, dim_capsule]^T -> [input_num_capsule]. - # b.shape=[batch_size, num_capsule, input_num_capsule] - b += K.batch_dot(outputs, inputs_hat, [2, 3]) - # End: Routing algorithm -----------------------------------------------------------------------# - - return outputs - - def compute_output_shape(self, input_shape): - return tuple([None, self.num_capsule, self.dim_capsule]) - - def get_config(self): - config = { - 'num_capsule': self.num_capsule, - 'dim_capsule': self.dim_capsule, - 'routings': self.routings - } - base_config = super(CapsuleLayer, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -def PrimaryCap(inputs, dim_capsule, n_channels, kernel_size, strides, padding): - """ - Apply Conv2D `n_channels` times and concatenate all capsules - :param inputs: 4D tensor, shape=[None, width, height, channels] - :param dim_capsule: the dim of the output vector of capsule - :param n_channels: the number of types of capsules - :return: output tensor, shape=[None, num_capsule, dim_capsule] - """ - output = layers.Conv2D(filters=dim_capsule*n_channels, kernel_size=kernel_size, strides=strides, padding=padding, - name='primarycap_conv2d')(inputs) - outputs = layers.Reshape( - target_shape=[-1, dim_capsule], name='primarycap_reshape')(output) - return layers.Lambda(squash, name='primarycap_squash')(outputs) - - -""" -# The following is another way to implement primary capsule layer. This is much slower. -# Apply Conv2D `n_channels` times and concatenate all capsules -def PrimaryCap(inputs, dim_capsule, n_channels, kernel_size, strides, padding): - outputs = [] - for _ in range(n_channels): - output = layers.Conv2D(filters=dim_capsule, kernel_size=kernel_size, strides=strides, padding=padding)(inputs) - outputs.append(layers.Reshape([output.get_shape().as_list()[1] ** 2, dim_capsule])(output)) - outputs = layers.Concatenate(axis=1)(outputs) - return layers.Lambda(squash)(outputs) -""" -""" -Keras implementation of CapsNet in Hinton's paper Dynamic Routing Between Capsules. -The current version maybe only works for TensorFlow backend. Actually it will be straightforward to re-write to TF code. -Adopting to other backends should be easy, but I have not tested this. - -Usage: - python capsulenet-multi-gpu.py - python capsulenet-multi-gpu.py --gpus 2 - ... ... - -Result: - About 55 seconds per epoch on two GTX1080Ti GPU cards - -Author: Xifeng Guo, E-mail: `guoxifeng1990@163.com`, Github: `https://github.com/XifengGuo/CapsNet-Keras` -""" - -from capsulenet import CapsNet, margin_loss, load_mnist, manipulate_latent, test -from keras import optimizers -from keras import backend as K - -K.set_image_data_format('channels_last') - - -def train(model, data, args): - """ - Training a CapsuleNet - :param model: the CapsuleNet model - :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))` - :param args: arguments - :return: The trained model - """ - # unpacking the data - (x_train, y_train), (x_test, y_test) = data - - # callbacks - log = callbacks.CSVLogger(args.save_dir + '/log.csv') - tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', - batch_size=args.batch_size, histogram_freq=args.debug) - lr_decay = callbacks.LearningRateScheduler( - schedule=lambda epoch: args.lr * (0.9 ** epoch)) - - # compile the model - model.compile(optimizer=optimizers.Adam(lr=args.lr), - loss=[margin_loss, 'mse'], - loss_weights=[1., args.lam_recon]) - - """ - # Training without data augmentation: - model.fit([x_train, y_train], [y_train, x_train], batch_size=args.batch_size, epochs=args.epochs, - validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[log, tb, checkpoint, lr_decay]) - """ - - # Begin: Training with data augmentation ---------------------------------------------------------------------# - def train_generator(x, y, batch_size, shift_fraction=0.): - train_datagen = ImageDataGenerator(width_shift_range=shift_fraction, - height_shift_range=shift_fraction) # shift up to 2 pixel for MNIST - generator = train_datagen.flow(x, y, batch_size=batch_size) - while 1: - x_batch, y_batch = generator.next() - yield ([x_batch, y_batch], [y_batch, x_batch]) - - # Training with data augmentation. If shift_fraction=0., also no augmentation. - model.fit_generator(generator=train_generator(x_train, y_train, args.batch_size, args.shift_fraction), - steps_per_epoch=int( - y_train.shape[0] / args.batch_size), - epochs=args.epochs, - validation_data=[[x_test, y_test], [y_test, x_test]], - callbacks=[log, tb, lr_decay]) - # End: Training with data augmentation -----------------------------------------------------------------------# - - from utils import plot_log - plot_log(args.save_dir + '/log.csv', show=True) - - return model - - -if __name__ == "__main__": - import numpy as np - import tensorflow as tf - import os - from keras.preprocessing.image import ImageDataGenerator - from keras import callbacks - from keras.utils.vis_utils import plot_model - from keras.utils import multi_gpu_model - - # setting the hyper parameters - import argparse - parser = argparse.ArgumentParser(description="Capsule Network on MNIST.") - parser.add_argument('--epochs', default=50, type=int) - parser.add_argument('--batch_size', default=300, type=int) - parser.add_argument('--lam_recon', default=0.392, type=float, - help="The coefficient for the loss of decoder") - parser.add_argument('-r', '--routings', default=3, type=int, - help="Number of iterations used in routing algorithm. should > 0") - parser.add_argument('--shift_fraction', default=0.1, type=float, - help="Fraction of pixels to shift at most in each direction.") - parser.add_argument('--debug', default=0, type=int, - help="Save weights by TensorBoard") - parser.add_argument('--save_dir', default='./result') - parser.add_argument('-t', '--testing', action='store_true', - help="Test the trained model on testing dataset") - parser.add_argument('--digit', default=5, type=int, - help="Digit to manipulate") - parser.add_argument('-w', '--weights', default=None, - help="The path of the saved weights. Should be specified when testing") - parser.add_argument('--lr', default=0.001, type=float, - help="Initial learning rate") - parser.add_argument('--gpus', default=2, type=int) - args = parser.parse_args() - print(args) - if not os.path.exists(args.save_dir): - os.makedirs(args.save_dir) - - # load data - (x_train, y_train), (x_test, y_test) = load_mnist() - - # define model - with tf.device('/cpu:0'): - model, eval_model, manipulate_model = CapsNet(input_shape=x_train.shape[1:], - n_class=len( - np.unique(np.argmax(y_train, 1))), - routings=args.routings) - model.summary() - plot_model(model, to_file=args.save_dir+'/model.png', show_shapes=True) - - # train or test - if args.weights is not None: # init the model weights with provided one - model.load_weights(args.weights) - if not args.testing: - # define muti-gpu model - multi_model = multi_gpu_model(model, gpus=args.gpus) - train(model=multi_model, data=( - (x_train, y_train), (x_test, y_test)), args=args) - model.save_weights(args.save_dir + '/trained_model.h5') - print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) - test(model=eval_model, data=(x_test, y_test), args=args) - else: # as long as weights are given, will run testing - if args.weights is None: - print('No weights are provided. Will test using random initialized weights.') - manipulate_latent(manipulate_model, (x_test, y_test), args) - test(model=eval_model, data=(x_test, y_test), args=args) -""" -Keras implementation of CapsNet in Hinton's paper Dynamic Routing Between Capsules. -The current version maybe only works for TensorFlow backend. Actually it will be straightforward to re-write to TF code. -Adopting to other backends should be easy, but I have not tested this. - -Usage: - python capsulenet.py - python capsulenet.py --epochs 50 - python capsulenet.py --epochs 50 --routings 3 - ... ... - -Result: - Validation accuracy > 99.5% after 20 epochs. Converge to 99.66% after 50 epochs. - About 110 seconds per epoch on a single GTX1070 GPU card - -Author: Xifeng Guo, E-mail: `guoxifeng1990@163.com`, Github: `https://github.com/XifengGuo/CapsNet-Keras` -""" - -import numpy as np -from keras import layers, models, optimizers -from keras import backend as K -from keras.utils import to_categorical -import matplotlib.pyplot as plt -from utils import combine_images -from PIL import Image -from capsulelayers import CapsuleLayer, PrimaryCap, Length, Mask - -K.set_image_data_format('channels_last') - - -def CapsNet(input_shape, n_class, routings): - """ - A Capsule Network on MNIST. - :param input_shape: data shape, 3d, [width, height, channels] - :param n_class: number of classes - :param routings: number of routing iterations - :return: Two Keras Models, the first one used for training, and the second one for evaluation. - `eval_model` can also be used for training. - """ - x = layers.Input(shape=input_shape) - - # Layer 1: Just a conventional Conv2D layer - conv1 = layers.Conv2D(filters=256, kernel_size=9, strides=1, - padding='valid', activation='relu', name='conv1')(x) - - # Layer 2: Conv2D layer with `squash` activation, then reshape to [None, num_capsule, dim_capsule] - primarycaps = PrimaryCap( - conv1, dim_capsule=8, n_channels=32, kernel_size=9, strides=2, padding='valid') - - # Layer 3: Capsule layer. Routing algorithm works here. - digitcaps = CapsuleLayer(num_capsule=n_class, dim_capsule=16, routings=routings, - name='digitcaps')(primarycaps) - - # Layer 4: This is an auxiliary layer to replace each capsule with its length. Just to match the true label's shape. - # If using tensorflow, this will not be necessary. :) - out_caps = Length(name='capsnet')(digitcaps) - - # Decoder network. - y = layers.Input(shape=(n_class,)) - # The true label is used to mask the output of capsule layer. For training - masked_by_y = Mask()([digitcaps, y]) - # Mask using the capsule with maximal length. For prediction - masked = Mask()(digitcaps) - - # Shared Decoder model in training and prediction - decoder = models.Sequential(name='decoder') - decoder.add(layers.Dense(512, activation='relu', input_dim=16*n_class)) - decoder.add(layers.Dense(1024, activation='relu')) - decoder.add(layers.Dense(np.prod(input_shape), activation='sigmoid')) - decoder.add(layers.Reshape(target_shape=input_shape, name='out_recon')) - - # Models for training and evaluation (prediction) - train_model = models.Model([x, y], [out_caps, decoder(masked_by_y)]) - eval_model = models.Model(x, [out_caps, decoder(masked)]) - - # manipulate model - noise = layers.Input(shape=(n_class, 16)) - noised_digitcaps = layers.Add()([digitcaps, noise]) - masked_noised_y = Mask()([noised_digitcaps, y]) - manipulate_model = models.Model([x, y, noise], decoder(masked_noised_y)) - return train_model, eval_model, manipulate_model - - -def margin_loss(y_true, y_pred): - """ - Margin loss for Eq.(4). When y_true[i, :] contains not just one `1`, this loss should work too. Not test it. - :param y_true: [None, n_classes] - :param y_pred: [None, num_capsule] - :return: a scalar loss value. - """ - L = y_true * K.square(K.maximum(0., 0.9 - y_pred)) + \ - 0.5 * (1 - y_true) * K.square(K.maximum(0., y_pred - 0.1)) - - return K.mean(K.sum(L, 1)) - - -def train(model, data, args): - """ - Training a CapsuleNet - :param model: the CapsuleNet model - :param data: a tuple containing training and testing data, like `((x_train, y_train), (x_test, y_test))` - :param args: arguments - :return: The trained model - """ - # unpacking the data - (x_train, y_train), (x_test, y_test) = data - - # callbacks - log = callbacks.CSVLogger(args.save_dir + '/log.csv') - tb = callbacks.TensorBoard(log_dir=args.save_dir + '/tensorboard-logs', - batch_size=args.batch_size, histogram_freq=int(args.debug)) - checkpoint = callbacks.ModelCheckpoint(args.save_dir + '/weights-{epoch:02d}.h5', monitor='val_capsnet_acc', - save_best_only=True, save_weights_only=True, verbose=1) - lr_decay = callbacks.LearningRateScheduler( - schedule=lambda epoch: args.lr * (args.lr_decay ** epoch)) - - # compile the model - model.compile(optimizer=optimizers.Adam(lr=args.lr), - loss=[margin_loss, 'mse'], - loss_weights=[1., args.lam_recon], - metrics={'capsnet': 'accuracy'}) - - """ - # Training without data augmentation: - model.fit([x_train, y_train], [y_train, x_train], batch_size=args.batch_size, epochs=args.epochs, - validation_data=[[x_test, y_test], [y_test, x_test]], callbacks=[log, tb, checkpoint, lr_decay]) - """ - - # Begin: Training with data augmentation ---------------------------------------------------------------------# - def train_generator(x, y, batch_size, shift_fraction=0.): - train_datagen = ImageDataGenerator(width_shift_range=shift_fraction, - height_shift_range=shift_fraction) # shift up to 2 pixel for MNIST - generator = train_datagen.flow(x, y, batch_size=batch_size) - while 1: - x_batch, y_batch = generator.next() - yield ([x_batch, y_batch], [y_batch, x_batch]) - - # Training with data augmentation. If shift_fraction=0., also no augmentation. - model.fit_generator(generator=train_generator(x_train, y_train, args.batch_size, args.shift_fraction), - steps_per_epoch=int( - y_train.shape[0] / args.batch_size), - epochs=args.epochs, - validation_data=[[x_test, y_test], [y_test, x_test]], - callbacks=[log, tb, checkpoint, lr_decay]) - # End: Training with data augmentation -----------------------------------------------------------------------# - - model.save_weights(args.save_dir + '/trained_model.h5') - print('Trained model saved to \'%s/trained_model.h5\'' % args.save_dir) - - from utils import plot_log - plot_log(args.save_dir + '/log.csv', show=True) - - return model - - -def test(model, data, args): - x_test, y_test = data - y_pred, x_recon = model.predict(x_test, batch_size=100) - print('-'*30 + 'Begin: test' + '-'*30) - print('Test acc:', np.sum(np.argmax(y_pred, 1) - == np.argmax(y_test, 1))/y_test.shape[0]) - - img = combine_images(np.concatenate([x_test[:50], x_recon[:50]])) - image = img * 255 - Image.fromarray(image.astype(np.uint8)).save( - args.save_dir + "/real_and_recon.png") - print() - print('Reconstructed images are saved to %s/real_and_recon.png' % args.save_dir) - print('-' * 30 + 'End: test' + '-' * 30) - plt.imshow(plt.imread(args.save_dir + "/real_and_recon.png")) - plt.show() - - -def manipulate_latent(model, data, args): - print('-'*30 + 'Begin: manipulate' + '-'*30) - x_test, y_test = data - index = np.argmax(y_test, 1) == args.digit - number = np.random.randint(low=0, high=sum(index) - 1) - x, y = x_test[index][number], y_test[index][number] - x, y = np.expand_dims(x, 0), np.expand_dims(y, 0) - noise = np.zeros([1, 10, 16]) - x_recons = [] - for dim in range(16): - for r in [-0.25, -0.2, -0.15, -0.1, -0.05, 0, 0.05, 0.1, 0.15, 0.2, 0.25]: - tmp = np.copy(noise) - tmp[:, :, dim] = r - x_recon = model.predict([x, y, tmp]) - x_recons.append(x_recon) - - x_recons = np.concatenate(x_recons) - - img = combine_images(x_recons, height=16) - image = img*255 - Image.fromarray(image.astype(np.uint8)).save( - args.save_dir + '/manipulate-%d.png' % args.digit) - print('manipulated result saved to %s/manipulate-%d.png' % - (args.save_dir, args.digit)) - print('-' * 30 + 'End: manipulate' + '-' * 30) - - -def load_mnist(): - # the data, shuffled and split between train and test sets - from keras.datasets import mnist - (x_train, y_train), (x_test, y_test) = mnist.load_data() - - x_train = x_train.reshape(-1, 28, 28, 1).astype('float32') / 255. - x_test = x_test.reshape(-1, 28, 28, 1).astype('float32') / 255. - y_train = to_categorical(y_train.astype('float32')) - y_test = to_categorical(y_test.astype('float32')) - return (x_train, y_train), (x_test, y_test) - - -if __name__ == "__main__": - import os - import argparse - from keras.preprocessing.image import ImageDataGenerator - from keras import callbacks - - # setting the hyper parameters - parser = argparse.ArgumentParser(description="Capsule Network on MNIST.") - parser.add_argument('--epochs', default=50, type=int) - parser.add_argument('--batch_size', default=100, type=int) - parser.add_argument('--lr', default=0.001, type=float, - help="Initial learning rate") - parser.add_argument('--lr_decay', default=0.9, type=float, - help="The value multiplied by lr at each epoch. Set a larger value for larger epochs") - parser.add_argument('--lam_recon', default=0.392, type=float, - help="The coefficient for the loss of decoder") - parser.add_argument('-r', '--routings', default=3, type=int, - help="Number of iterations used in routing algorithm. should > 0") - parser.add_argument('--shift_fraction', default=0.1, type=float, - help="Fraction of pixels to shift at most in each direction.") - parser.add_argument('--debug', action='store_true', - help="Save weights by TensorBoard") - parser.add_argument('--save_dir', default='./result') - parser.add_argument('-t', '--testing', action='store_true', - help="Test the trained model on testing dataset") - parser.add_argument('--digit', default=5, type=int, - help="Digit to manipulate") - parser.add_argument('-w', '--weights', default=None, - help="The path of the saved weights. Should be specified when testing") - args = parser.parse_args() - print(args) - - if not os.path.exists(args.save_dir): - os.makedirs(args.save_dir) - - # load data - (x_train, y_train), (x_test, y_test) = load_mnist() - - # define model - model, eval_model, manipulate_model = CapsNet(input_shape=x_train.shape[1:], - n_class=len( - np.unique(np.argmax(y_train, 1))), - routings=args.routings) - model.summary() - - # train or test - if args.weights is not None: # init the model weights with provided one - model.load_weights(args.weights) - if not args.testing: - train(model=model, data=((x_train, y_train), (x_test, y_test)), args=args) - else: # as long as weights are given, will run testing - if args.weights is None: - print('No weights are provided. Will test using random initialized weights.') - manipulate_latent(manipulate_model, (x_test, y_test), args) - test(model=eval_model, data=(x_test, y_test), args=args) -import numpy as np -from matplotlib import pyplot as plt -import csv -import math -import pandas - - -def plot_log(filename, show=True): - - data = pandas.read_csv(filename) - - fig = plt.figure(figsize=(4, 6)) - fig.subplots_adjust(top=0.95, bottom=0.05, right=0.95) - fig.add_subplot(211) - for key in data.keys(): - if key.find('loss') >= 0 and not key.find('val') >= 0: # training loss - plt.plot(data['epoch'].values, data[key].values, label=key) - plt.legend() - plt.title('Training loss') - - fig.add_subplot(212) - for key in data.keys(): - if key.find('acc') >= 0: # acc - plt.plot(data['epoch'].values, data[key].values, label=key) - plt.legend() - plt.title('Training and validation accuracy') - - # fig.savefig('result/log.png') - if show: - plt.show() - - -def combine_images(generated_images, height=None, width=None): - num = generated_images.shape[0] - if width is None and height is None: - width = int(math.sqrt(num)) - height = int(math.ceil(float(num)/width)) - elif width is not None and height is None: # height not given - height = int(math.ceil(float(num)/width)) - elif height is not None and width is None: # width not given - width = int(math.ceil(float(num)/height)) - - shape = generated_images.shape[1:3] - image = np.zeros((height*shape[0], width*shape[1]), - dtype=generated_images.dtype) - for index, img in enumerate(generated_images): - i = int(index/width) - j = index % width - image[i*shape[0]:(i+1)*shape[0], j*shape[1]:(j+1)*shape[1]] = \ - img[:, :, 0] - return image - - -if __name__ == "__main__": - plot_log('result/log.csv') -# -*- coding: utf-8 -*- -import random -import gym -import numpy as np -from collections import deque -from keras.models import Sequential -from keras.layers import Dense -from keras.optimizers import Adam -from keras import backend as K - -import tensorflow as tf - -EPISODES = 5000 - - -class DQNAgent: - def __init__(self, state_size, action_size): - self.state_size = state_size - self.action_size = action_size - self.memory = deque(maxlen=2000) - self.gamma = 0.95 # discount rate - self.epsilon = 1.0 # exploration rate - self.epsilon_min = 0.01 - self.epsilon_decay = 0.99 - self.learning_rate = 0.001 - self.model = self._build_model() - self.target_model = self._build_model() - self.update_target_model() - - """Huber loss for Q Learning - - References: https://en.wikipedia.org/wiki/Huber_loss - https://www.tensorflow.org/api_docs/python/tf/losses/huber_loss - """ - - def _huber_loss(self, y_true, y_pred, clip_delta=1.0): - error = y_true - y_pred - cond = K.abs(error) <= clip_delta - - squared_loss = 0.5 * K.square(error) - quadratic_loss = 0.5 * \ - K.square(clip_delta) + clip_delta * (K.abs(error) - clip_delta) - - return K.mean(tf.where(cond, squared_loss, quadratic_loss)) - - def _build_model(self): - # Neural Net for Deep-Q learning Model - model = Sequential() - model.add(Dense(24, input_dim=self.state_size, activation='relu')) - model.add(Dense(24, activation='relu')) - model.add(Dense(self.action_size, activation='linear')) - model.compile(loss=self._huber_loss, - optimizer=Adam(lr=self.learning_rate)) - return model - - def update_target_model(self): - # copy weights from model to target_model - self.target_model.set_weights(self.model.get_weights()) - - def remember(self, state, action, reward, next_state, done): - self.memory.append((state, action, reward, next_state, done)) - - def act(self, state): - if np.random.rand() <= self.epsilon: - return random.randrange(self.action_size) - act_values = self.model.predict(state) - return np.argmax(act_values[0]) # returns action - - def replay(self, batch_size): - minibatch = random.sample(self.memory, batch_size) - for state, action, reward, next_state, done in minibatch: - target = self.model.predict(state) - if done: - target[0][action] = reward - else: - # a = self.model.predict(next_state)[0] - t = self.target_model.predict(next_state)[0] - target[0][action] = reward + self.gamma * np.amax(t) - # target[0][action] = reward + self.gamma * t[np.argmax(a)] - self.model.fit(state, target, epochs=1, verbose=0) - if self.epsilon > self.epsilon_min: - self.epsilon *= self.epsilon_decay - - def load(self, name): - self.model.load_weights(name) - - def save(self, name): - self.model.save_weights(name) - - -if __name__ == "__main__": - env = gym.make('CartPole-v1') - state_size = env.observation_space.shape[0] - action_size = env.action_space.n - agent = DQNAgent(state_size, action_size) - # agent.load("./save/cartpole-ddqn.h5") - done = False - batch_size = 32 - - for e in range(EPISODES): - state = env.reset() - state = np.reshape(state, [1, state_size]) - for time in range(500): - # env.render() - action = agent.act(state) - next_state, reward, done, _ = env.step(action) - reward = reward if not done else -10 - next_state = np.reshape(next_state, [1, state_size]) - agent.remember(state, action, reward, next_state, done) - state = next_state - if done: - agent.update_target_model() - print("episode: {}/{}, score: {}, e: {:.2}" - .format(e, EPISODES, time, agent.epsilon)) - break - if len(agent.memory) > batch_size: - agent.replay(batch_size) - # if e % 10 == 0: - # agent.save("./save/cartpole-ddqn.h5") -# -*- coding: utf-8 -*- -import random -import gym -import numpy as np -from collections import deque -from keras.models import Sequential -from keras.layers import Dense -from keras.optimizers import Adam - -EPISODES = 1000 - - -class DQNAgent: - def __init__(self, state_size, action_size): - self.state_size = state_size - self.action_size = action_size - self.memory = deque(maxlen=2000) - self.gamma = 0.95 # discount rate - self.epsilon = 1.0 # exploration rate - self.epsilon_min = 0.01 - self.epsilon_decay = 0.995 - self.learning_rate = 0.001 - self.model = self._build_model() - - def _build_model(self): - # Neural Net for Deep-Q learning Model - model = Sequential() - model.add(Dense(24, input_dim=self.state_size, activation='relu')) - model.add(Dense(24, activation='relu')) - model.add(Dense(self.action_size, activation='linear')) - model.compile(loss='mse', - optimizer=Adam(lr=self.learning_rate)) - return model - - def remember(self, state, action, reward, next_state, done): - self.memory.append((state, action, reward, next_state, done)) - - def act(self, state): - if np.random.rand() <= self.epsilon: - return random.randrange(self.action_size) - act_values = self.model.predict(state) - return np.argmax(act_values[0]) # returns action - - def replay(self, batch_size): - minibatch = random.sample(self.memory, batch_size) - for state, action, reward, next_state, done in minibatch: - target = reward - if not done: - target = (reward + self.gamma * - np.amax(self.model.predict(next_state)[0])) - target_f = self.model.predict(state) - target_f[0][action] = target - self.model.fit(state, target_f, epochs=1, verbose=0) - if self.epsilon > self.epsilon_min: - self.epsilon *= self.epsilon_decay - - def load(self, name): - self.model.load_weights(name) - - def save(self, name): - self.model.save_weights(name) - - -if __name__ == "__main__": - env = gym.make('CartPole-v1') - state_size = env.observation_space.shape[0] - action_size = env.action_space.n - agent = DQNAgent(state_size, action_size) - # agent.load("./save/cartpole-dqn.h5") - done = False - batch_size = 32 - - for e in range(EPISODES): - state = env.reset() - state = np.reshape(state, [1, state_size]) - for time in range(500): - # env.render() - action = agent.act(state) - next_state, reward, done, _ = env.step(action) - reward = reward if not done else -10 - next_state = np.reshape(next_state, [1, state_size]) - agent.remember(state, action, reward, next_state, done) - state = next_state - if done: - print("episode: {}/{}, score: {}, e: {:.2}" - .format(e, EPISODES, time, agent.epsilon)) - break - if len(agent.memory) > batch_size: - agent.replay(batch_size) - # if e % 10 == 0: - # agent.save("./save/cartpole-dqn.h5") -# -*- coding: utf-8 -*- -import random -import gym -import numpy as np -from collections import deque -from keras.models import Sequential -from keras.layers import Dense -from keras.optimizers import Adam - -EPISODES = 1000 - - -class DQNAgent: - def __init__(self, state_size, action_size): - self.state_size = state_size - self.action_size = action_size - self.memory = deque(maxlen=2000) - self.gamma = 0.95 # discount rate - self.epsilon = 1.0 # exploration rate - self.epsilon_min = 0.01 - self.epsilon_decay = 0.995 - self.learning_rate = 0.001 - self.model = self._build_model() - - def _build_model(self): - # Neural Net for Deep-Q learning Model - model = Sequential() - model.add(Dense(24, input_dim=self.state_size, activation='relu')) - model.add(Dense(24, activation='relu')) - model.add(Dense(self.action_size, activation='linear')) - model.compile(loss='mse', - optimizer=Adam(lr=self.learning_rate)) - return model - - def remember(self, state, action, reward, next_state, done): - self.memory.append((state, action, reward, next_state, done)) - - def act(self, state): - if np.random.rand() <= self.epsilon: - return random.randrange(self.action_size) - act_values = self.model.predict(state) - return np.argmax(act_values[0]) # returns action - - def replay(self, batch_size): - minibatch = random.sample(self.memory, batch_size) - states, targets_f = [], [] - for state, action, reward, next_state, done in minibatch: - target = reward - if not done: - target = (reward + self.gamma * - np.amax(self.model.predict(next_state)[0])) - target_f = self.model.predict(state) - target_f[0][action] = target - # Filtering out states and targets for training - states.append(state[0]) - targets_f.append(target_f[0]) - history = self.model.fit(np.array(states), np.array( - targets_f), epochs=1, verbose=0) - # Keeping track of loss - loss = history.history['loss'][0] - if self.epsilon > self.epsilon_min: - self.epsilon *= self.epsilon_decay - return loss - - def load(self, name): - self.model.load_weights(name) - - def save(self, name): - self.model.save_weights(name) - - -if __name__ == "__main__": - env = gym.make('CartPole-v1') - state_size = env.observation_space.shape[0] - action_size = env.action_space.n - agent = DQNAgent(state_size, action_size) - # agent.load("./save/cartpole-dqn.h5") - done = False - batch_size = 32 - - for e in range(EPISODES): - state = env.reset() - state = np.reshape(state, [1, state_size]) - for time in range(500): - # env.render() - action = agent.act(state) - next_state, reward, done, _ = env.step(action) - reward = reward if not done else -10 - next_state = np.reshape(next_state, [1, state_size]) - agent.remember(state, action, reward, next_state, done) - state = next_state - if done: - print("episode: {}/{}, score: {}, e: {:.2}" - .format(e, EPISODES, time, agent.epsilon)) - break - if len(agent.memory) > batch_size: - loss = agent.replay(batch_size) - # Logging training loss every 10 timesteps - if time % 10 == 0: - print("episode: {}/{}, time: {}, loss: {:.4f}" - .format(e, EPISODES, time, loss)) - # if e % 10 == 0: - # agent.save("./save/cartpole-dqn.h5") -from setuptools import find_packages, setup -setup(name="keras_segmentation", - version="0.1", - description="Image Segmentation toolkit for keras", - author="Divam Gupta", - author_email='divamgupta@gmail.com', - platforms=["any"], # or more specific, e.g. "win32", "cygwin", "osx" - license="MIT", - url="https://github.com/divamgupta/image-segmentation-keras", - packages=find_packages(), - ) - - -from . import models -from . import predict -from . import pretrained - -if __name__ == "__main__": - from . import cli_interface - cli_interface.main() - - -import sys -import argparse -from . import train -from . import predict - -from . import data_utils - -from .data_utils.visualize_dataset import visualize_segmentation_dataset - - -def cli_train(): - - parser = argparse.ArgumentParser() - parser.add_argument("command", type=str) - parser.add_argument("--model_name", type=str) - parser.add_argument("--train_images", type=str) - parser.add_argument("--train_annotations", type=str) - - parser.add_argument("--n_classes", type=int) - parser.add_argument("--input_height", type=int, default=None) - parser.add_argument("--input_width", type=int, default=None) - - parser.add_argument('--not_verify_dataset', action='store_false') - parser.add_argument("--checkpoints_path", type=str, default=None) - parser.add_argument("--epochs", type=int, default=5) - parser.add_argument("--batch_size", type=int, default=2) - - parser.add_argument('--validate', action='store_true') - parser.add_argument("--val_images", type=str, default="") - parser.add_argument("--val_annotations", type=str, default="") - - parser.add_argument("--val_batch_size", type=int, default=2) - parser.add_argument("--load_weights", type=str, default=None) - parser.add_argument('--auto_resume_checkpoint', action='store_true') - - parser.add_argument("--steps_per_epoch", type=int, default=512) - parser.add_argument("--optimizer_name", type=str, default="adadelta") - - args = parser.parse_args() - - assert not args.model_name is None, "Please provide model_name" - assert not args.train_images is None, "Please provide train_images" - assert not args.train_annotations is None, "Please provide train_annotations" - assert not args.n_classes is None, "Please provide n_classes" - - train.train(model=args.model_name, - train_images=args.train_images, - train_annotations=args.train_annotations, - input_height=args.input_height, - input_width=args.input_width, - n_classes=args.n_classes, - verify_dataset=args.not_verify_dataset, - checkpoints_path=args.checkpoints_path, - epochs=args.epochs, - batch_size=args.batch_size, - validate=args.validate, - val_images=args.val_images, - val_annotations=args.val_annotations, - val_batch_size=args.val_batch_size, - auto_resume_checkpoint=args.auto_resume_checkpoint, - load_weights=args.load_weights, - steps_per_epoch=args.steps_per_epoch, - optimizer_name=args.optimizer_name - ) - - -def cli_predict(): - - parser = argparse.ArgumentParser() - parser.add_argument("command", type=str) - parser.add_argument("--checkpoints_path", type=str) - parser.add_argument("--input_path", type=str, default="") - parser.add_argument("--output_path", type=str, default="") - - args = parser.parse_args() - - assert not args.checkpoints_path is None - assert not args.input_path is None - assert not args.output_path is None - - if ".jpg" in args.input_path or ".png" in args.input_path or ".jpeg" in args.input_path: - predict.predict(inp=args.input_path, out_fname=args.output_path, - checkpoints_path=args.checkpoints_path) - else: - predict.predict_multiple( - inp_dir=args.input_path, out_dir=args.output_path, checkpoints_path=args.checkpoints_path) - - -def cli_verify_dataset(): - - parser = argparse.ArgumentParser() - parser.add_argument("command", type=str) - parser.add_argument("--images_path", type=str) - parser.add_argument("--segs_path", type=str) - parser.add_argument("--n_classes", type=int) - - args = parser.parse_args() - - data_utils.data_loader.verify_segmentation_dataset( - args.images_path, args.segs_path, args.n_classes) - - -def cli_visualize_dataset(): - - parser = argparse.ArgumentParser() - parser.add_argument("command", type=str) - parser.add_argument("--images_path", type=str) - parser.add_argument("--segs_path", type=str) - parser.add_argument("--n_classes", type=int) - parser.add_argument('--do_augment', action='store_true') - - args = parser.parse_args() - - visualize_segmentation_dataset( - args.images_path, args.segs_path, args.n_classes, do_augment=args.do_augment) - - -def main(): - assert len(sys.argv) >= 2, "python -m keras_segmentation " - - command = sys.argv[1] - - if command == "train": - cli_train() - elif command == "predict": - cli_predict() - elif command == "verify_dataset": - cli_verify_dataset() - elif command == "visualize_dataset": - cli_visualize_dataset() - else: - print("Invalid command ", command) - - print(command) -import numpy as np - -EPS = 1e-12 - - -def get_iou(gt, pr, n_classes): - class_wise = np.zeros(n_classes) - for cl in range(n_classes): - intersection = np.sum((gt == cl)*(pr == cl)) - union = np.sum(np.maximum((gt == cl), (pr == cl))) - iou = float(intersection)/(union + EPS) - class_wise[cl] = iou - return class_wise -import argparse - -from keras.models import load_model -import glob -import cv2 -import numpy as np -import random -import os -from tqdm import tqdm -from .train import find_latest_checkpoint -import os -from .data_utils.data_loader import get_image_arr, get_segmentation_arr -import json -from .models.config import IMAGE_ORDERING -from . import metrics -from .models import model_from_name - -import six - -random.seed(0) -class_colors = [(random.randint(0, 255), random.randint( - 0, 255), random.randint(0, 255)) for _ in range(5000)] - - -def model_from_checkpoint_path(checkpoints_path): - - assert (os.path.isfile(checkpoints_path+"_config.json") - ), "Checkpoint not found." - model_config = json.loads( - open(checkpoints_path+"_config.json", "r").read()) - latest_weights = find_latest_checkpoint(checkpoints_path) - assert (not latest_weights is None), "Checkpoint not found." - model = model_from_name[model_config['model_class']]( - model_config['n_classes'], input_height=model_config['input_height'], input_width=model_config['input_width']) - print("loaded weights ", latest_weights) - model.load_weights(latest_weights) - return model - - -def predict(model=None, inp=None, out_fname=None, checkpoints_path=None): - - if model is None and (not checkpoints_path is None): - model = model_from_checkpoint_path(checkpoints_path) - - assert (not inp is None) - assert((type(inp) is np.ndarray) or isinstance(inp, six.string_types) - ), "Inupt should be the CV image or the input file name" - - if isinstance(inp, six.string_types): - inp = cv2.imread(inp) - - output_width = model.output_width - output_height = model.output_height - input_width = model.input_width - input_height = model.input_height - n_classes = model.n_classes - - x = get_image_arr(inp, input_width, input_height, odering=IMAGE_ORDERING) - pr = model.predict(np.array([x]))[0] - pr = pr.reshape((output_height, output_width, n_classes)).argmax(axis=2) - - seg_img = np.zeros((output_height, output_width, 3)) - colors = class_colors - - for c in range(n_classes): - seg_img[:, :, 0] += ((pr[:, :] == c)*(colors[c][0])).astype('uint8') - seg_img[:, :, 1] += ((pr[:, :] == c)*(colors[c][1])).astype('uint8') - seg_img[:, :, 2] += ((pr[:, :] == c)*(colors[c][2])).astype('uint8') - seg_img = cv2.resize(seg_img, (input_width, input_height)) - - if not out_fname is None: - cv2.imwrite(out_fname, seg_img) - - return pr - - -def predict_multiple(model=None, inps=None, inp_dir=None, out_dir=None, checkpoints_path=None): - - if model is None and (not checkpoints_path is None): - model = model_from_checkpoint_path(checkpoints_path) - - if inps is None and (not inp_dir is None): - inps = glob.glob(os.path.join(inp_dir, "*.jpg")) + glob.glob( - os.path.join(inp_dir, "*.png")) + glob.glob(os.path.join(inp_dir, "*.jpeg")) - - assert type(inps) is list - - all_prs = [] - - for i, inp in enumerate(tqdm(inps)): - if out_dir is None: - out_fname = None - else: - if isinstance(inp, six.string_types): - out_fname = os.path.join(out_dir, os.path.basename(inp)) - else: - out_fname = os.path.join(out_dir, str(i) + ".jpg") - - pr = predict(model, inp, out_fname) - all_prs.append(pr) - - return all_prs - - -def evaluate(model=None, inp_inmges=None, annotations=None, checkpoints_path=None): - - assert False, "not implemented " - - ious = [] - for inp, ann in tqdm(zip(inp_images, annotations)): - pr = predict(model, inp) - gt = get_segmentation_arr( - ann, model.n_classes, model.output_width, model.output_height) - gt = gt.argmax(-1) - iou = metrics.get_iou(gt, pr, model.n_classes) - ious.append(iou) - ious = np.array(ious) - print("Class wise IoU ", np.mean(ious, axis=0)) - print("Total IoU ", np.mean(ious)) -from keras.models import load_model -import keras - -from .models import model_from_name - - -def model_from_checkpoint_path(model_config, latest_weights): - - model = model_from_name[model_config['model_class']]( - model_config['n_classes'], input_height=model_config['input_height'], input_width=model_config['input_width']) - model.load_weights(latest_weights) - return model - - -def resnet_pspnet_VOC12_v0_1(): - - model_config = { - "output_height": 96, - "input_height": 384, - "input_width": 576, - "n_classes": 151, - "model_class": "resnet50_pspnet", - "output_width": 144 - } - - model_url = "https://github.com/divamgupta/image-segmentation-keras/releases/download/pretrained_model_1/r2_voc12_resnetpspnet_384x576.24" - latest_weights = keras.utils.get_file(model_url.split("/")[-1], model_url) - - return model_from_checkpoint_path(model_config, latest_weights) -import argparse -import json -from .data_utils.data_loader import image_segmentation_generator, verify_segmentation_dataset -from .models import model_from_name -import os -import six - - -def find_latest_checkpoint(checkpoints_path): - ep = 0 - r = None - while True: - if os.path.isfile(checkpoints_path + "." + str(ep)): - r = checkpoints_path + "." + str(ep) - else: - return r - - ep += 1 - - -def train(model, - train_images, - train_annotations, - input_height=None, - input_width=None, - n_classes=None, - verify_dataset=True, - checkpoints_path=None, - epochs=5, - batch_size=2, - validate=False, - val_images=None, - val_annotations=None, - val_batch_size=2, - auto_resume_checkpoint=False, - load_weights=None, - steps_per_epoch=512, - optimizer_name='adadelta' - ): - - # check if user gives model name insteead of the model object - if isinstance(model, six.string_types): - # create the model from the name - assert (not n_classes is None), "Please provide the n_classes" - if (not input_height is None) and (not input_width is None): - model = model_from_name[model]( - n_classes, input_height=input_height, input_width=input_width) - else: - model = model_from_name[model](n_classes) - - n_classes = model.n_classes - input_height = model.input_height - input_width = model.input_width - output_height = model.output_height - output_width = model.output_width - - if validate: - assert not (val_images is None) - assert not (val_annotations is None) - - if not optimizer_name is None: - model.compile(loss='categorical_crossentropy', - optimizer=optimizer_name, - metrics=['accuracy']) - - if not checkpoints_path is None: - open(checkpoints_path+"_config.json", "w").write(json.dumps({ - "model_class": model.model_name, - "n_classes": n_classes, - "input_height": input_height, - "input_width": input_width, - "output_height": output_height, - "output_width": output_width - })) - - if (not (load_weights is None)) and len(load_weights) > 0: - print("Loading weights from ", load_weights) - model.load_weights(load_weights) - - if auto_resume_checkpoint and (not checkpoints_path is None): - latest_checkpoint = find_latest_checkpoint(checkpoints_path) - if not latest_checkpoint is None: - print("Loading the weights from latest checkpoint ", latest_checkpoint) - model.load_weights(latest_checkpoint) - - if verify_dataset: - print("Verifying train dataset") - verify_segmentation_dataset(train_images, train_annotations, n_classes) - if validate: - print("Verifying val dataset") - verify_segmentation_dataset(val_images, val_annotations, n_classes) - - train_gen = image_segmentation_generator( - train_images, train_annotations, batch_size, n_classes, input_height, input_width, output_height, output_width) - - if validate: - val_gen = image_segmentation_generator( - val_images, val_annotations, val_batch_size, n_classes, input_height, input_width, output_height, output_width) - - if not validate: - for ep in range(epochs): - print("Starting Epoch ", ep) - model.fit_generator(train_gen, steps_per_epoch, epochs=1) - if not checkpoints_path is None: - model.save_weights(checkpoints_path + "." + str(ep)) - print("saved ", checkpoints_path + ".model." + str(ep)) - print("Finished Epoch", ep) - else: - for ep in range(epochs): - print("Starting Epoch ", ep) - model.fit_generator(train_gen, steps_per_epoch, - validation_data=val_gen, validation_steps=200, epochs=1) - if not checkpoints_path is None: - model.save_weights(checkpoints_path + "." + str(ep)) - print("saved ", checkpoints_path + ".model." + str(ep)) - print("Finished Epoch", ep) -import cv2 -import glob -from tqdm import tqdm -import numpy as np - - -data_path = "/root/Downloads/" -out_path = "/root/Downloads/prepped/" - - -all_anns = glob.glob(data_path+"gtFine/*/*/*gtFine_labelIds.png") -all_anns = sorted(all_anns) - -all_imgs = glob.glob(data_path+"leftImg8bit/*/*/*_leftImg8bit.png") -all_imgs = sorted(all_imgs) - - -pixLabels = {0: 0, 1: 0, 2: 0, 3: 0, 4: 0, 5: 0, 6: 0, 7: 1, 8: 2, 9: 0, 10: 0, 11: 3, 12: 4, 13: 5, 14: 0, 15: 0, 16: 0, 17: 6, - 18: 0, 19: 7, 20: 8, 21: 9, 22: 10, 23: 11, 24: 12, 25: 13, 26: 14, 27: 15, 28: 16, 29: 0, 30: 0, 31: 17, 32: 18, 33: 19, -1: 0} - - -for fn, fnn in tqdm(zip(all_anns, all_imgs)): - si = cv2.imread(fn) - sii = cv2.imread(fnn) - di = np.zeros(si.shape).astype('uint8') - oho = "_".join((fn.split("/")[-1].split("_")[:3])) - - allIds = np.unique(si) - for ii in allIds: - assert (ii in pixLabels) - di[:, :, 0] += ((si[:, :, 2] == ii)*(pixLabels[ii])).astype('uint8') - di[:, :, 1] += ((si[:, :, 2] == ii)*(pixLabels[ii])).astype('uint8') - di[:, :, 2] += ((si[:, :, 2] == ii)*(pixLabels[ii])).astype('uint8') - assert np.max(di[:, :, 0]) < 20 - if "/train/" in fn: - cv2.imwrite(out_path+"annotations_prepped_train/" + oho + ".png", di) - cv2.imwrite(out_path+"images_prepped_train/" + oho + ".jpg", sii) - elif "/val/" in fn: - cv2.imwrite(out_path+"annotations_prepped_val/" + oho + ".png", di) - cv2.imwrite(out_path+"images_prepped_val/" + oho + ".jpg", sii) - elif "/test/" in fn: - cv2.imwrite(out_path+"annotations_prepped_test/" + oho + ".png", di) - cv2.imwrite(out_path+"images_prepped_test/" + oho + ".jpg", sii) - else: - assert False -import pytest -from keras_segmentation import models -from keras_segmentation.models.config import IMAGE_ORDERING -import random -import numpy as np - -from keras_segmentation.data_utils.data_loader import verify_segmentation_dataset, image_segmentation_generator - -from keras_segmentation.predict import predict_multiple, predict - -tr_im = "test/example_dataset/images_prepped_train" -tr_an = "test/example_dataset/annotations_prepped_train" -te_im = "test/example_dataset/images_prepped_test" -te_an = "test/example_dataset/annotations_prepped_test" - - -def test_verify(): - verify_segmentation_dataset(tr_im, tr_an, 50) - - -def test_datag(): - g = image_segmentation_generator(images_path=tr_im, segs_path=tr_an, batch_size=3, n_classes=50, - input_height=224, input_width=324, output_height=114, output_width=134, do_augment=False) - - x, y = next(g) - assert x.shape[0] == 3 - assert y.shape[0] == 3 - assert y.shape[-1] == 50 - - -def test_model(): - model_name = "fcn_8" - h = 224 - w = 256 - n_c = 100 - check_path = "/tmp/%d" % (random.randint(0, 199999)) - - m = models.model_from_name[model_name](n_c, input_height=h, input_width=w) - - m.train(train_images=tr_im, - train_annotations=tr_an, - steps_per_epoch=2, - epochs=2, - checkpoints_path=check_path - ) - - m.predict_segmentation(np.zeros((h, w, 3))).shape - - predict_multiple( - inp_dir=te_im, checkpoints_path=check_path, out_dir="/tmp") - predict_multiple(inps=[np.zeros((h, w, 3))]*3, - checkpoints_path=check_path, out_dir="/tmp") - - o = predict(inp=np.zeros((h, w, 3)), checkpoints_path=check_path) - o.shape - - -# def test_models(): - - -# unet_models = [ , models.unet.vgg_unet , models.unet.resnet50_unet ] -# args = [ ( 101, 416 , 608) , ( 101, 224 , 224) , ( 101, 256 , 256 ) , ( 2, 32*4 , 32*5 ) ] -# en_level = [ 1,2,3,4 ] - -# for mf in unet_models: -# for en in en_level: -# for ar in args: -# m = mf( *ar , encoder_level=en ) - - -# m = models.unet.mobilenet_unet( 55 ) -# for ar in args: -# m = unet_mini( *ar ) - -import random -import numpy as np -import cv2 - - -seq = [None] - - -def load_aug(): - - import imgaug as ia - from imgaug import augmenters as iaa - - def sometimes(aug): return iaa.Sometimes(0.5, aug) - - seq[0] = iaa.Sequential( - [ - # apply the following augmenters to most images - iaa.Fliplr(0.5), # horizontally flip 50% of all images - iaa.Flipud(0.2), # vertically flip 20% of all images - # crop images by -5% to 10% of their height/width - sometimes(iaa.CropAndPad( - percent=(-0.05, 0.1), - pad_mode=ia.ALL, - pad_cval=(0, 255) - )), - sometimes(iaa.Affine( - # scale images to 80-120% of their size, individually per axis - scale={"x": (0.8, 1.2), "y": (0.8, 1.2)}, - # translate by -20 to +20 percent (per axis) - translate_percent={"x": (-0.2, 0.2), "y": (-0.2, 0.2)}, - rotate=(-45, 45), # rotate by -45 to +45 degrees - shear=(-16, 16), # shear by -16 to +16 degrees - # use nearest neighbour or bilinear interpolation (fast) - order=[0, 1], - # if mode is constant, use a cval between 0 and 255 - cval=(0, 255), - # use any of scikit-image's warping modes (see 2nd image from the top for examples) - mode=ia.ALL - )), - # execute 0 to 5 of the following (less important) augmenters per image - # don't execute all of them, as that would often be way too strong - iaa.SomeOf((0, 5), - [ - # convert images into their superpixel representation - sometimes(iaa.Superpixels( - p_replace=(0, 1.0), n_segments=(20, 200))), - iaa.OneOf([ - # blur images with a sigma between 0 and 3.0 - iaa.GaussianBlur((0, 3.0)), - # blur image using local means with kernel sizes between 2 and 7 - iaa.AverageBlur(k=(2, 7)), - # blur image using local medians with kernel sizes between 2 and 7 - iaa.MedianBlur(k=(3, 11)), - ]), - iaa.Sharpen(alpha=(0, 1.0), lightness=( - 0.75, 1.5)), # sharpen images - iaa.Emboss(alpha=(0, 1.0), strength=( - 0, 2.0)), # emboss images - # search either for all edges or for directed edges, - # blend the result with the original image using a blobby mask - iaa.SimplexNoiseAlpha(iaa.OneOf([ - iaa.EdgeDetect(alpha=(0.5, 1.0)), - iaa.DirectedEdgeDetect( - alpha=(0.5, 1.0), direction=(0.0, 1.0)), - ])), - # add gaussian noise to images - iaa.AdditiveGaussianNoise(loc=0, scale=( - 0.0, 0.05*255), per_channel=0.5), - iaa.OneOf([ - # randomly remove up to 10% of the pixels - iaa.Dropout((0.01, 0.1), per_channel=0.5), - iaa.CoarseDropout((0.03, 0.15), size_percent=( - 0.02, 0.05), per_channel=0.2), - ]), - # invert color channels - iaa.Invert(0.05, per_channel=True), - # change brightness of images (by -10 to 10 of original value) - iaa.Add((-10, 10), per_channel=0.5), - # change hue and saturation - iaa.AddToHueAndSaturation((-20, 20)), - # either change the brightness of the whole image (sometimes - # per channel) or change the brightness of subareas - iaa.OneOf([ - iaa.Multiply( - (0.5, 1.5), per_channel=0.5), - iaa.FrequencyNoiseAlpha( - exponent=(-4, 0), - first=iaa.Multiply( - (0.5, 1.5), per_channel=True), - second=iaa.ContrastNormalization( - (0.5, 2.0)) - ) - ]), - # improve or worsen the contrast - iaa.ContrastNormalization((0.5, 2.0), per_channel=0.5), - iaa.Grayscale(alpha=(0.0, 1.0)), - # move pixels locally around (with random strengths) - sometimes(iaa.ElasticTransformation( - alpha=(0.5, 3.5), sigma=0.25)), - # sometimes move parts of the image around - sometimes(iaa.PiecewiseAffine(scale=(0.01, 0.05))), - sometimes(iaa.PerspectiveTransform(scale=(0.01, 0.1))) - ], - random_order=True - ) - ], - random_order=True - ) - - -def _augment_seg(img, seg): - - import imgaug as ia - - if seq[0] is None: - load_aug() - - aug_det = seq[0].to_deterministic() - image_aug = aug_det.augment_image(img) - - segmap = ia.SegmentationMapOnImage( - seg, nb_classes=np.max(seg)+1, shape=img.shape) - segmap_aug = aug_det.augment_segmentation_maps(segmap) - segmap_aug = segmap_aug.get_arr_int() - - return image_aug, segmap_aug - - -def try_n_times(fn, n, *args, **kargs): - - attempts = 0 - - while attempts < n: - try: - return fn(*args, **kargs) - except Exception as e: - attempts += 1 - - return fn(*args, **kargs) - - -def augment_seg(img, seg): - return try_n_times(_augment_seg, 10, img, seg) - -import numpy as np -import cv2 -import glob -import itertools -import os -from tqdm import tqdm - -from ..models.config import IMAGE_ORDERING -from .augmentation import augment_seg -import random - -random.seed(0) -class_colors = [(random.randint(0, 255), random.randint( - 0, 255), random.randint(0, 255)) for _ in range(5000)] - - -def get_pairs_from_paths(images_path, segs_path): - images = glob.glob(os.path.join(images_path, "*.jpg")) + glob.glob(os.path.join( - images_path, "*.png")) + glob.glob(os.path.join(images_path, "*.jpeg")) - segmentations = glob.glob(os.path.join(segs_path, "*.png")) - - segmentations_d = dict(zip(segmentations, segmentations)) - - ret = [] - - for im in images: - seg_bnme = os.path.basename(im).replace( - ".jpg", ".png").replace(".jpeg", ".png") - seg = os.path.join(segs_path, seg_bnme) - assert (seg in segmentations_d), (im + " is present in "+images_path + " but " + - seg_bnme+" is not found in "+segs_path + " . Make sure annotation image are in .png") - ret.append((im, seg)) - - return ret - - -def get_image_arr(path, width, height, imgNorm="sub_mean", odering='channels_first'): - - if type(path) is np.ndarray: - img = path - else: - img = cv2.imread(path, 1) - - if imgNorm == "sub_and_divide": - img = np.float32(cv2.resize(img, (width, height))) / 127.5 - 1 - elif imgNorm == "sub_mean": - img = cv2.resize(img, (width, height)) - img = img.astype(np.float32) - img[:, :, 0] -= 103.939 - img[:, :, 1] -= 116.779 - img[:, :, 2] -= 123.68 - elif imgNorm == "divide": - img = cv2.resize(img, (width, height)) - img = img.astype(np.float32) - img = img/255.0 - - if odering == 'channels_first': - img = np.rollaxis(img, 2, 0) - return img - - -def get_segmentation_arr(path, nClasses, width, height, no_reshape=False): - - seg_labels = np.zeros((height, width, nClasses)) - - if type(path) is np.ndarray: - img = path - else: - img = cv2.imread(path, 1) - - img = cv2.resize(img, (width, height), interpolation=cv2.INTER_NEAREST) - img = img[:, :, 0] - - for c in range(nClasses): - seg_labels[:, :, c] = (img == c).astype(int) - - if no_reshape: - return seg_labels - - seg_labels = np.reshape(seg_labels, (width*height, nClasses)) - return seg_labels - - -def verify_segmentation_dataset(images_path, segs_path, n_classes): - - img_seg_pairs = get_pairs_from_paths(images_path, segs_path) - - assert len(img_seg_pairs) > 0, "Dataset looks empty or path is wrong " - - for im_fn, seg_fn in tqdm(img_seg_pairs): - img = cv2.imread(im_fn) - seg = cv2.imread(seg_fn) - - assert (img.shape[0] == seg.shape[0] and img.shape[1] == seg.shape[1] - ), "The size of image and the annotation does not match or they are corrupt " + im_fn + " " + seg_fn - assert (np.max(seg[:, :, 0]) < n_classes), "The pixel values of seg image should be from 0 to " + \ - str(n_classes-1) + " . Found pixel value " + \ - str(np.max(seg[:, :, 0])) - - print("Dataset verified! ") - - -def image_segmentation_generator(images_path, segs_path, batch_size, n_classes, input_height, input_width, output_height, output_width, do_augment=False): - - img_seg_pairs = get_pairs_from_paths(images_path, segs_path) - random.shuffle(img_seg_pairs) - zipped = itertools.cycle(img_seg_pairs) - - while True: - X = [] - Y = [] - for _ in range(batch_size): - im, seg = next(zipped) - - im = cv2.imread(im, 1) - seg = cv2.imread(seg, 1) - - if do_augment: - img, seg[:, :, 0] = augment_seg(img, seg[:, :, 0]) - - X.append(get_image_arr(im, input_width, - input_height, odering=IMAGE_ORDERING)) - Y.append(get_segmentation_arr( - seg, n_classes, output_width, output_height)) - - yield np.array(X), np.array(Y) - -import glob -import numpy as np -import cv2 -import random -import argparse - -from .augmentation import augment_seg -from .data_loader import get_pairs_from_paths - -random.seed(0) -class_colors = [(random.randint(0, 255), random.randint( - 0, 255), random.randint(0, 255)) for _ in range(5000)] - - -def visualize_segmentation_dataset(images_path, segs_path, n_classes, do_augment=False): - - img_seg_pairs = get_pairs_from_paths(images_path, segs_path) - - colors = class_colors - - print("Press any key to navigate. ") - for im_fn, seg_fn in img_seg_pairs: - - img = cv2.imread(im_fn) - seg = cv2.imread(seg_fn) - print("Found the following classes", np.unique(seg)) - - seg_img = np.zeros_like(seg) - - if do_augment: - img, seg[:, :, 0] = augment_seg(img, seg[:, :, 0]) - - for c in range(n_classes): - seg_img[:, :, 0] += ((seg[:, :, 0] == c) * - (colors[c][0])).astype('uint8') - seg_img[:, :, 1] += ((seg[:, :, 0] == c) * - (colors[c][1])).astype('uint8') - seg_img[:, :, 2] += ((seg[:, :, 0] == c) * - (colors[c][2])).astype('uint8') - - cv2.imshow("img", img) - cv2.imshow("seg_img", seg_img) - cv2.waitKey() - - -def visualize_segmentation_dataset_one(images_path, segs_path, n_classes, do_augment=False, no_show=False): - - img_seg_pairs = get_pairs_from_paths(images_path, segs_path) - - colors = class_colors - - im_fn, seg_fn = random.choice(img_seg_pairs) - - img = cv2.imread(im_fn) - seg = cv2.imread(seg_fn) - print("Found the following classes", np.unique(seg)) - - seg_img = np.zeros_like(seg) - - if do_augment: - img, seg[:, :, 0] = augment_seg(img, seg[:, :, 0]) - - for c in range(n_classes): - seg_img[:, :, 0] += ((seg[:, :, 0] == c) * - (colors[c][0])).astype('uint8') - seg_img[:, :, 1] += ((seg[:, :, 0] == c) * - (colors[c][1])).astype('uint8') - seg_img[:, :, 2] += ((seg[:, :, 0] == c) * - (colors[c][2])).astype('uint8') - - if not no_show: - cv2.imshow("img", img) - cv2.imshow("seg_img", seg_img) - cv2.waitKey() - - return img, seg_img - - -if __name__ == "__main__": - - parser = argparse.ArgumentParser() - parser.add_argument("--images", type=str) - parser.add_argument("--annotations", type=str) - parser.add_argument("--n_classes", type=int) - args = parser.parse_args() - - visualize_segmentation_dataset( - args.images, args.annotations, args.n_classes) - -from . import pspnet -from . import unet -from . import segnet -from . import fcn -model_from_name = {} - - -model_from_name["fcn_8"] = fcn.fcn_8 -model_from_name["fcn_32"] = fcn.fcn_32 -model_from_name["fcn_8_vgg"] = fcn.fcn_8_vgg -model_from_name["fcn_32_vgg"] = fcn.fcn_32_vgg -model_from_name["fcn_8_resnet50"] = fcn.fcn_8_resnet50 -model_from_name["fcn_32_resnet50"] = fcn.fcn_32_resnet50 -model_from_name["fcn_8_mobilenet"] = fcn.fcn_8_mobilenet -model_from_name["fcn_32_mobilenet"] = fcn.fcn_32_mobilenet - - -model_from_name["pspnet"] = pspnet.pspnet -model_from_name["vgg_pspnet"] = pspnet.vgg_pspnet -model_from_name["resnet50_pspnet"] = pspnet.resnet50_pspnet -# model_from_name["mobilenet_pspnet"] = pspnet.mobilenet_pspnet - - -model_from_name["unet_mini"] = unet.unet_mini -model_from_name["unet"] = unet.unet -model_from_name["vgg_unet"] = unet.vgg_unet -model_from_name["resnet50_unet"] = unet.resnet50_unet -model_from_name["mobilenet_unet"] = unet.mobilenet_unet - - -model_from_name["segnet"] = segnet.segnet -model_from_name["vgg_segnet"] = segnet.vgg_segnet -model_from_name["resnet50_segnet"] = segnet.resnet50_segnet -model_from_name["mobilenet_segnet"] = segnet.mobilenet_segnet - -from keras.models import * -from keras.layers import * - -import keras.backend as K -from .config import IMAGE_ORDERING - - -def vanilla_encoder(input_height=224, input_width=224): - - kernel = 3 - filter_size = 64 - pad = 1 - pool_size = 2 - - if IMAGE_ORDERING == 'channels_first': - img_input = Input(shape=(3, input_height, input_width)) - elif IMAGE_ORDERING == 'channels_last': - img_input = Input(shape=(input_height, input_width, 3)) - - x = img_input - levels = [] - - x = (ZeroPadding2D((pad, pad), data_format=IMAGE_ORDERING))(x) - x = (Conv2D(filter_size, (kernel, kernel), - data_format=IMAGE_ORDERING, padding='valid'))(x) - x = (BatchNormalization())(x) - x = (Activation('relu'))(x) - x = (MaxPooling2D((pool_size, pool_size), data_format=IMAGE_ORDERING))(x) - levels.append(x) - - x = (ZeroPadding2D((pad, pad), data_format=IMAGE_ORDERING))(x) - x = (Conv2D(128, (kernel, kernel), data_format=IMAGE_ORDERING, padding='valid'))(x) - x = (BatchNormalization())(x) - x = (Activation('relu'))(x) - x = (MaxPooling2D((pool_size, pool_size), data_format=IMAGE_ORDERING))(x) - levels.append(x) - - for _ in range(3): - x = (ZeroPadding2D((pad, pad), data_format=IMAGE_ORDERING))(x) - x = (Conv2D(256, (kernel, kernel), - data_format=IMAGE_ORDERING, padding='valid'))(x) - x = (BatchNormalization())(x) - x = (Activation('relu'))(x) - x = (MaxPooling2D((pool_size, pool_size), data_format=IMAGE_ORDERING))(x) - levels.append(x) - - return img_input, levels - -IMAGE_ORDERING = 'channels_last' - -from keras.models import * -from keras.layers import * - - -from .config import IMAGE_ORDERING -from .model_utils import get_segmentation_model -from .vgg16 import get_vgg_encoder -from .mobilenet import get_mobilenet_encoder -from .basic_models import vanilla_encoder -from .resnet50 import get_resnet50_encoder - - -# crop o1 wrt o2 -def crop(o1, o2, i): - o_shape2 = Model(i, o2).output_shape - - if IMAGE_ORDERING == 'channels_first': - output_height2 = o_shape2[2] - output_width2 = o_shape2[3] - else: - output_height2 = o_shape2[1] - output_width2 = o_shape2[2] - - o_shape1 = Model(i, o1).output_shape - if IMAGE_ORDERING == 'channels_first': - output_height1 = o_shape1[2] - output_width1 = o_shape1[3] - else: - output_height1 = o_shape1[1] - output_width1 = o_shape1[2] - - cx = abs(output_width1 - output_width2) - cy = abs(output_height2 - output_height1) - - if output_width1 > output_width2: - o1 = Cropping2D(cropping=((0, 0), (0, cx)), - data_format=IMAGE_ORDERING)(o1) - else: - o2 = Cropping2D(cropping=((0, 0), (0, cx)), - data_format=IMAGE_ORDERING)(o2) - - if output_height1 > output_height2: - o1 = Cropping2D(cropping=((0, cy), (0, 0)), - data_format=IMAGE_ORDERING)(o1) - else: - o2 = Cropping2D(cropping=((0, cy), (0, 0)), - data_format=IMAGE_ORDERING)(o2) - - return o1, o2 - - -def fcn_8(n_classes, encoder=vanilla_encoder, input_height=416, input_width=608): - - img_input, levels = encoder( - input_height=input_height, input_width=input_width) - [f1, f2, f3, f4, f5] = levels - - o = f5 - - o = (Conv2D(4096, (7, 7), activation='relu', - padding='same', data_format=IMAGE_ORDERING))(o) - o = Dropout(0.5)(o) - o = (Conv2D(4096, (1, 1), activation='relu', - padding='same', data_format=IMAGE_ORDERING))(o) - o = Dropout(0.5)(o) - - o = (Conv2D(n_classes, (1, 1), kernel_initializer='he_normal', - data_format=IMAGE_ORDERING))(o) - o = Conv2DTranspose(n_classes, kernel_size=(4, 4), strides=( - 2, 2), use_bias=False, data_format=IMAGE_ORDERING)(o) - - o2 = f4 - o2 = (Conv2D(n_classes, (1, 1), kernel_initializer='he_normal', - data_format=IMAGE_ORDERING))(o2) - - o, o2 = crop(o, o2, img_input) - - o = Add()([o, o2]) - - o = Conv2DTranspose(n_classes, kernel_size=(4, 4), strides=( - 2, 2), use_bias=False, data_format=IMAGE_ORDERING)(o) - o2 = f3 - o2 = (Conv2D(n_classes, (1, 1), kernel_initializer='he_normal', - data_format=IMAGE_ORDERING))(o2) - o2, o = crop(o2, o, img_input) - o = Add()([o2, o]) - - o = Conv2DTranspose(n_classes, kernel_size=(16, 16), strides=( - 8, 8), use_bias=False, data_format=IMAGE_ORDERING)(o) - - model = get_segmentation_model(img_input, o) - model.model_name = "fcn_8" - return model - - -def fcn_32(n_classes, encoder=vanilla_encoder, input_height=416, input_width=608): - - img_input, levels = encoder( - input_height=input_height, input_width=input_width) - [f1, f2, f3, f4, f5] = levels - - o = f5 - - o = (Conv2D(4096, (7, 7), activation='relu', - padding='same', data_format=IMAGE_ORDERING))(o) - o = Dropout(0.5)(o) - o = (Conv2D(4096, (1, 1), activation='relu', - padding='same', data_format=IMAGE_ORDERING))(o) - o = Dropout(0.5)(o) - - o = (Conv2D(n_classes, (1, 1), kernel_initializer='he_normal', - data_format=IMAGE_ORDERING))(o) - o = Conv2DTranspose(n_classes, kernel_size=(64, 64), strides=( - 32, 32), use_bias=False, data_format=IMAGE_ORDERING)(o) - - model = get_segmentation_model(img_input, o) - model.model_name = "fcn_32" - return model - - -def fcn_8_vgg(n_classes, input_height=416, input_width=608): - model = fcn_8(n_classes, get_vgg_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "fcn_8_vgg" - return model - - -def fcn_32_vgg(n_classes, input_height=416, input_width=608): - model = fcn_32(n_classes, get_vgg_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "fcn_32_vgg" - return model - - -def fcn_8_resnet50(n_classes, input_height=416, input_width=608): - model = fcn_8(n_classes, get_resnet50_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "fcn_8_resnet50" - return model - - -def fcn_32_resnet50(n_classes, input_height=416, input_width=608): - model = fcn_32(n_classes, get_resnet50_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "fcn_32_resnet50" - return model - - -def fcn_8_mobilenet(n_classes, input_height=416, input_width=608): - model = fcn_8(n_classes, get_mobilenet_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "fcn_8_mobilenet" - return model - - -def fcn_32_mobilenet(n_classes, input_height=416, input_width=608): - model = fcn_32(n_classes, get_mobilenet_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "fcn_32_mobilenet" - return model - - -if __name__ == '__main__': - m = fcn_8(101) - m = fcn_32(101) - -from .config import IMAGE_ORDERING -from keras.models import * -from keras.layers import * -import keras.backend as K -import keras - - -BASE_WEIGHT_PATH = ('https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.6/') - - -def relu6(x): - return K.relu(x, max_value=6) - - -def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): - - channel_axis = 1 if IMAGE_ORDERING == 'channels_first' else -1 - filters = int(filters * alpha) - x = ZeroPadding2D(padding=(1, 1), name='conv1_pad', - data_format=IMAGE_ORDERING)(inputs) - x = Conv2D(filters, kernel, data_format=IMAGE_ORDERING, - padding='valid', - use_bias=False, - strides=strides, - name='conv1')(x) - x = BatchNormalization(axis=channel_axis, name='conv1_bn')(x) - return Activation(relu6, name='conv1_relu')(x) - - -def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, - depth_multiplier=1, strides=(1, 1), block_id=1): - - channel_axis = 1 if IMAGE_ORDERING == 'channels_first' else -1 - pointwise_conv_filters = int(pointwise_conv_filters * alpha) - - x = ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING, - name='conv_pad_%d' % block_id)(inputs) - x = DepthwiseConv2D((3, 3), data_format=IMAGE_ORDERING, - padding='valid', - depth_multiplier=depth_multiplier, - strides=strides, - use_bias=False, - name='conv_dw_%d' % block_id)(x) - x = BatchNormalization( - axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) - x = Activation(relu6, name='conv_dw_%d_relu' % block_id)(x) - - x = Conv2D(pointwise_conv_filters, (1, 1), data_format=IMAGE_ORDERING, - padding='same', - use_bias=False, - strides=(1, 1), - name='conv_pw_%d' % block_id)(x) - x = BatchNormalization(axis=channel_axis, - name='conv_pw_%d_bn' % block_id)(x) - return Activation(relu6, name='conv_pw_%d_relu' % block_id)(x) - - -def get_mobilenet_encoder(input_height=224, input_width=224, pretrained='imagenet'): - - # todo add more alpha and stuff - - assert (K.image_data_format() == - 'channels_last'), "Currently only channels last mode is supported" - assert (IMAGE_ORDERING == - 'channels_last'), "Currently only channels last mode is supported" - assert (input_height == 224), "For mobilenet , 224 input_height is supported " - assert (input_width == 224), "For mobilenet , 224 width is supported " - - assert input_height % 32 == 0 - assert input_width % 32 == 0 - - alpha = 1.0 - depth_multiplier = 1 - dropout = 1e-3 - - img_input = Input(shape=(input_height, input_width, 3)) - - x = _conv_block(img_input, 32, alpha, strides=(2, 2)) - x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) - f1 = x - - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, - strides=(2, 2), block_id=2) - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) - f2 = x - - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, - strides=(2, 2), block_id=4) - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) - f3 = x - - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, - strides=(2, 2), block_id=6) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) - f4 = x - - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, - strides=(2, 2), block_id=12) - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) - f5 = x - - if pretrained == 'imagenet': - model_name = 'mobilenet_%s_%d_tf_no_top.h5' % ('1_0', 224) - - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = keras.utils.get_file(model_name, weight_path) - - Model(img_input, x).load_weights(weights_path) - - return img_input, [f1, f2, f3, f4, f5] -from keras.models import * -from keras.layers import * - -import keras.backend as K -from types import MethodType - - -from .config import IMAGE_ORDERING -from ..train import train -from ..predict import predict, predict_multiple, evaluate - - -def resize_image(inp, s, data_format): - - try: - - return Lambda(lambda x: K.resize_images(x, - height_factor=s[0], - width_factor=s[1], - data_format=data_format, - interpolation='bilinear'))(inp) - - except Exception as e: - - # if keras is old , then rely on the tf function ... sorry theono/cntk users . - assert data_format == 'channels_last' - assert IMAGE_ORDERING == 'channels_last' - - import tensorflow as tf - - return Lambda( - lambda x: tf.image.resize_images( - x, (K.int_shape(x)[1]*s[0], K.int_shape(x)[2]*s[1])) - )(inp) - - -def get_segmentation_model(input, output): - - img_input = input - o = output - - o_shape = Model(img_input, o).output_shape - i_shape = Model(img_input, o).input_shape - - if IMAGE_ORDERING == 'channels_first': - output_height = o_shape[2] - output_width = o_shape[3] - input_height = i_shape[2] - input_width = i_shape[3] - n_classes = o_shape[1] - o = (Reshape((-1, output_height*output_width)))(o) - o = (Permute((2, 1)))(o) - elif IMAGE_ORDERING == 'channels_last': - output_height = o_shape[1] - output_width = o_shape[2] - input_height = i_shape[1] - input_width = i_shape[2] - n_classes = o_shape[3] - o = (Reshape((output_height*output_width, -1)))(o) - - o = (Activation('softmax'))(o) - model = Model(img_input, o) - model.output_width = output_width - model.output_height = output_height - model.n_classes = n_classes - model.input_height = input_height - model.input_width = input_width - model.model_name = "" - - model.train = MethodType(train, model) - model.predict_segmentation = MethodType(predict, model) - model.predict_multiple = MethodType(predict_multiple, model) - model.evaluate_segmentation = MethodType(evaluate, model) - - return model -import keras -from keras.models import * -from keras.layers import * -import keras.backend as K - - -from .config import IMAGE_ORDERING -from .model_utils import get_segmentation_model, resize_image -from .vgg16 import get_vgg_encoder -from .mobilenet import get_mobilenet_encoder -from .basic_models import vanilla_encoder -from .resnet50 import get_resnet50_encoder - - -if IMAGE_ORDERING == 'channels_first': - MERGE_AXIS = 1 -elif IMAGE_ORDERING == 'channels_last': - MERGE_AXIS = -1 - - -def pool_block(feats, pool_factor): - - if IMAGE_ORDERING == 'channels_first': - h = K.int_shape(feats)[2] - w = K.int_shape(feats)[3] - elif IMAGE_ORDERING == 'channels_last': - h = K.int_shape(feats)[1] - w = K.int_shape(feats)[2] - - pool_size = strides = [ - int(np.round(float(h) / pool_factor)), int(np.round(float(w) / pool_factor))] - - x = AveragePooling2D(pool_size, data_format=IMAGE_ORDERING, - strides=strides, padding='same')(feats) - x = Conv2D(512, (1, 1), data_format=IMAGE_ORDERING, - padding='same', use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - - x = resize_image(x, strides, data_format=IMAGE_ORDERING) - - return x - - -def _pspnet(n_classes, encoder, input_height=384, input_width=576): - - assert input_height % 192 == 0 - assert input_width % 192 == 0 - - img_input, levels = encoder( - input_height=input_height, input_width=input_width) - [f1, f2, f3, f4, f5] = levels - - o = f5 - - pool_factors = [1, 2, 3, 6] - pool_outs = [o] - - for p in pool_factors: - pooled = pool_block(o, p) - pool_outs.append(pooled) - - o = Concatenate(axis=MERGE_AXIS)(pool_outs) - - o = Conv2D(512, (1, 1), data_format=IMAGE_ORDERING, use_bias=False)(o) - o = BatchNormalization()(o) - o = Activation('relu')(o) - - o = Conv2D(n_classes, (3, 3), data_format=IMAGE_ORDERING, padding='same')(o) - o = resize_image(o, (8, 8), data_format=IMAGE_ORDERING) - - model = get_segmentation_model(img_input, o) - return model - - -def pspnet(n_classes, input_height=384, input_width=576): - - model = _pspnet(n_classes, vanilla_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "pspnet" - return model - - -def vgg_pspnet(n_classes, input_height=384, input_width=576): - - model = _pspnet(n_classes, get_vgg_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "vgg_pspnet" - return model - - -def resnet50_pspnet(n_classes, input_height=384, input_width=576): - - model = _pspnet(n_classes, get_resnet50_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "resnet50_pspnet" - return model - - -# def mobilenet_pspnet( n_classes , input_height=224, input_width=224 ): - -# model = _pspnet( n_classes , get_mobilenet_encoder , input_height=input_height, input_width=input_width ) -# model.model_name = "mobilenet_pspnet" -# return model - - -if __name__ == '__main__': - - m = _pspnet(101, vanilla_encoder) - # m = _pspnet( 101 , get_mobilenet_encoder ,True , 224 , 224 ) - m = _pspnet(101, get_vgg_encoder) - m = _pspnet(101, get_resnet50_encoder) -import keras -from keras.models import * -from keras.layers import * -from keras import layers -import keras.backend as K - -# code taken from https://github.com/fchollet/deep-learning-models/blob/master/resnet50.py - - -from .config import IMAGE_ORDERING - - -if IMAGE_ORDERING == 'channels_first': - pretrained_url = "https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5" -elif IMAGE_ORDERING == 'channels_last': - pretrained_url = "https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5" - - -def one_side_pad(x): - x = ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING)(x) - if IMAGE_ORDERING == 'channels_first': - x = Lambda(lambda x: x[:, :, :-1, :-1])(x) - elif IMAGE_ORDERING == 'channels_last': - x = Lambda(lambda x: x[:, :-1, :-1, :])(x) - return x - - -def identity_block(input_tensor, kernel_size, filters, stage, block): - """The identity block is the block that has no conv layer at shortcut. - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the filterss of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - # Returns - Output tensor for the block. - """ - filters1, filters2, filters3 = filters - - if IMAGE_ORDERING == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Conv2D(filters1, (1, 1), data_format=IMAGE_ORDERING, - name=conv_name_base + '2a')(input_tensor) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) - x = Activation('relu')(x) - - x = Conv2D(filters2, kernel_size, data_format=IMAGE_ORDERING, - padding='same', name=conv_name_base + '2b')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) - x = Activation('relu')(x) - - x = Conv2D(filters3, (1, 1), data_format=IMAGE_ORDERING, - name=conv_name_base + '2c')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - - x = layers.add([x, input_tensor]) - x = Activation('relu')(x) - return x - - -def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): - """conv_block is the block that has a conv layer at shortcut - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the filterss of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - # Returns - Output tensor for the block. - Note that from stage 3, the first conv layer at main path is with strides=(2,2) - And the shortcut should have strides=(2,2) as well - """ - filters1, filters2, filters3 = filters - - if IMAGE_ORDERING == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Conv2D(filters1, (1, 1), data_format=IMAGE_ORDERING, strides=strides, - name=conv_name_base + '2a')(input_tensor) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) - x = Activation('relu')(x) - - x = Conv2D(filters2, kernel_size, data_format=IMAGE_ORDERING, padding='same', - name=conv_name_base + '2b')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) - x = Activation('relu')(x) - - x = Conv2D(filters3, (1, 1), data_format=IMAGE_ORDERING, - name=conv_name_base + '2c')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - - shortcut = Conv2D(filters3, (1, 1), data_format=IMAGE_ORDERING, strides=strides, - name=conv_name_base + '1')(input_tensor) - shortcut = BatchNormalization( - axis=bn_axis, name=bn_name_base + '1')(shortcut) - - x = layers.add([x, shortcut]) - x = Activation('relu')(x) - return x - - -def get_resnet50_encoder(input_height=224, input_width=224, pretrained='imagenet', - - include_top=True, weights='imagenet', - input_tensor=None, input_shape=None, - pooling=None, - classes=1000): - - assert input_height % 32 == 0 - assert input_width % 32 == 0 - - if IMAGE_ORDERING == 'channels_first': - img_input = Input(shape=(3, input_height, input_width)) - elif IMAGE_ORDERING == 'channels_last': - img_input = Input(shape=(input_height, input_width, 3)) - - if IMAGE_ORDERING == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - - x = ZeroPadding2D((3, 3), data_format=IMAGE_ORDERING)(img_input) - x = Conv2D(64, (7, 7), data_format=IMAGE_ORDERING, - strides=(2, 2), name='conv1')(x) - f1 = x - - x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) - x = Activation('relu')(x) - x = MaxPooling2D((3, 3), data_format=IMAGE_ORDERING, strides=(2, 2))(x) - - x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) - x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') - x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - f2 = one_side_pad(x) - - x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') - f3 = x - - x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') - f4 = x - - x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - f5 = x - - x = AveragePooling2D( - (7, 7), data_format=IMAGE_ORDERING, name='avg_pool')(x) - # f6 = x - - if pretrained == 'imagenet': - weights_path = keras.utils.get_file( - pretrained_url.split("/")[-1], pretrained_url) - Model(img_input, x).load_weights(weights_path) - - return img_input, [f1, f2, f3, f4, f5] - - -from keras.models import * -from keras.layers import * -import os - -from .config import IMAGE_ORDERING -from .model_utils import get_segmentation_model -from .vgg16 import get_vgg_encoder -from .mobilenet import get_mobilenet_encoder -from .basic_models import vanilla_encoder -from .resnet50 import get_resnet50_encoder - - -def segnet_decoder(f, n_classes, n_up=3): - - assert n_up >= 2 - - o = f - o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) - o = (Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) - o = (BatchNormalization())(o) - - o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) - o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) - o = (Conv2D(256, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) - o = (BatchNormalization())(o) - - for _ in range(n_up-2): - o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) - o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) - o = (Conv2D(128, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) - o = (BatchNormalization())(o) - - o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) - o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) - o = (Conv2D(64, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) - o = (BatchNormalization())(o) - - o = Conv2D(n_classes, (3, 3), padding='same', - data_format=IMAGE_ORDERING)(o) - - return o - - -def _segnet(n_classes, encoder, input_height=416, input_width=608, encoder_level=3): - - img_input, levels = encoder( - input_height=input_height, input_width=input_width) - - feat = levels[encoder_level] - o = segnet_decoder(feat, n_classes, n_up=3) - model = get_segmentation_model(img_input, o) - - return model - - -def segnet(n_classes, input_height=416, input_width=608, encoder_level=3): - - model = _segnet(n_classes, vanilla_encoder, input_height=input_height, - input_width=input_width, encoder_level=encoder_level) - model.model_name = "segnet" - return model - - -def vgg_segnet(n_classes, input_height=416, input_width=608, encoder_level=3): - - model = _segnet(n_classes, get_vgg_encoder, input_height=input_height, - input_width=input_width, encoder_level=encoder_level) - model.model_name = "vgg_segnet" - return model - - -def resnet50_segnet(n_classes, input_height=416, input_width=608, encoder_level=3): - - model = _segnet(n_classes, get_resnet50_encoder, input_height=input_height, - input_width=input_width, encoder_level=encoder_level) - model.model_name = "resnet50_segnet" - return model - - -def mobilenet_segnet(n_classes, input_height=224, input_width=224, encoder_level=3): - - model = _segnet(n_classes, get_mobilenet_encoder, input_height=input_height, - input_width=input_width, encoder_level=encoder_level) - model.model_name = "mobilenet_segnet" - return model - - -if __name__ == '__main__': - m = vgg_segnet(101) - m = segnet(101) - # m = mobilenet_segnet( 101 ) - # from keras.utils import plot_model - # plot_model( m , show_shapes=True , to_file='model.png') -from keras.models import * -from keras.layers import * - -from .config import IMAGE_ORDERING -from .model_utils import get_segmentation_model -from .vgg16 import get_vgg_encoder -from .mobilenet import get_mobilenet_encoder -from .basic_models import vanilla_encoder -from .resnet50 import get_resnet50_encoder - - -if IMAGE_ORDERING == 'channels_first': - MERGE_AXIS = 1 -elif IMAGE_ORDERING == 'channels_last': - MERGE_AXIS = -1 - - -def unet_mini(n_classes, input_height=360, input_width=480): - - if IMAGE_ORDERING == 'channels_first': - img_input = Input(shape=(3, input_height, input_width)) - elif IMAGE_ORDERING == 'channels_last': - img_input = Input(shape=(input_height, input_width, 3)) - - conv1 = Conv2D(32, (3, 3), data_format=IMAGE_ORDERING, - activation='relu', padding='same')(img_input) - conv1 = Dropout(0.2)(conv1) - conv1 = Conv2D(32, (3, 3), data_format=IMAGE_ORDERING, - activation='relu', padding='same')(conv1) - pool1 = MaxPooling2D((2, 2), data_format=IMAGE_ORDERING)(conv1) - - conv2 = Conv2D(64, (3, 3), data_format=IMAGE_ORDERING, - activation='relu', padding='same')(pool1) - conv2 = Dropout(0.2)(conv2) - conv2 = Conv2D(64, (3, 3), data_format=IMAGE_ORDERING, - activation='relu', padding='same')(conv2) - pool2 = MaxPooling2D((2, 2), data_format=IMAGE_ORDERING)(conv2) - - conv3 = Conv2D(128, (3, 3), data_format=IMAGE_ORDERING, - activation='relu', padding='same')(pool2) - conv3 = Dropout(0.2)(conv3) - conv3 = Conv2D(128, (3, 3), data_format=IMAGE_ORDERING, - activation='relu', padding='same')(conv3) - - up1 = concatenate([UpSampling2D((2, 2), data_format=IMAGE_ORDERING)( - conv3), conv2], axis=MERGE_AXIS) - conv4 = Conv2D(64, (3, 3), data_format=IMAGE_ORDERING, - activation='relu', padding='same')(up1) - conv4 = Dropout(0.2)(conv4) - conv4 = Conv2D(64, (3, 3), data_format=IMAGE_ORDERING, - activation='relu', padding='same')(conv4) - - up2 = concatenate([UpSampling2D((2, 2), data_format=IMAGE_ORDERING)( - conv4), conv1], axis=MERGE_AXIS) - conv5 = Conv2D(32, (3, 3), data_format=IMAGE_ORDERING, - activation='relu', padding='same')(up2) - conv5 = Dropout(0.2)(conv5) - conv5 = Conv2D(32, (3, 3), data_format=IMAGE_ORDERING, - activation='relu', padding='same')(conv5) - - o = Conv2D(n_classes, (1, 1), data_format=IMAGE_ORDERING, - padding='same')(conv5) - - model = get_segmentation_model(img_input, o) - model.model_name = "unet_mini" - return model - - -def _unet(n_classes, encoder, l1_skip_conn=True, input_height=416, input_width=608): - - img_input, levels = encoder( - input_height=input_height, input_width=input_width) - [f1, f2, f3, f4, f5] = levels - - o = f4 - - o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) - o = (Conv2D(512, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) - o = (BatchNormalization())(o) - - o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) - o = (concatenate([o, f3], axis=MERGE_AXIS)) - o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) - o = (Conv2D(256, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) - o = (BatchNormalization())(o) - - o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) - o = (concatenate([o, f2], axis=MERGE_AXIS)) - o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) - o = (Conv2D(128, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) - o = (BatchNormalization())(o) - - o = (UpSampling2D((2, 2), data_format=IMAGE_ORDERING))(o) - - if l1_skip_conn: - o = (concatenate([o, f1], axis=MERGE_AXIS)) - - o = (ZeroPadding2D((1, 1), data_format=IMAGE_ORDERING))(o) - o = (Conv2D(64, (3, 3), padding='valid', data_format=IMAGE_ORDERING))(o) - o = (BatchNormalization())(o) - - o = Conv2D(n_classes, (3, 3), padding='same', - data_format=IMAGE_ORDERING)(o) - - model = get_segmentation_model(img_input, o) - - return model - - -def unet(n_classes, input_height=416, input_width=608, encoder_level=3): - - model = _unet(n_classes, vanilla_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "unet" - return model - - -def vgg_unet(n_classes, input_height=416, input_width=608, encoder_level=3): - - model = _unet(n_classes, get_vgg_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "vgg_unet" - return model - - -def resnet50_unet(n_classes, input_height=416, input_width=608, encoder_level=3): - - model = _unet(n_classes, get_resnet50_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "resnet50_unet" - return model - - -def mobilenet_unet(n_classes, input_height=224, input_width=224, encoder_level=3): - - model = _unet(n_classes, get_mobilenet_encoder, - input_height=input_height, input_width=input_width) - model.model_name = "mobilenet_unet" - return model - - -if __name__ == '__main__': - m = unet_mini(101) - m = _unet(101, vanilla_encoder) - # m = _unet( 101 , get_mobilenet_encoder ,True , 224 , 224 ) - m = _unet(101, get_vgg_encoder) - m = _unet(101, get_resnet50_encoder) -import keras -from keras.models import * -from keras.layers import * - - -from .config import IMAGE_ORDERING - - -if IMAGE_ORDERING == 'channels_first': - pretrained_url = "https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels_notop.h5" -elif IMAGE_ORDERING == 'channels_last': - pretrained_url = "https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5" - - -def get_vgg_encoder(input_height=224, input_width=224, pretrained='imagenet'): - - assert input_height % 32 == 0 - assert input_width % 32 == 0 - - if IMAGE_ORDERING == 'channels_first': - img_input = Input(shape=(3, input_height, input_width)) - elif IMAGE_ORDERING == 'channels_last': - img_input = Input(shape=(input_height, input_width, 3)) - - x = Conv2D(64, (3, 3), activation='relu', padding='same', - name='block1_conv1', data_format=IMAGE_ORDERING)(img_input) - x = Conv2D(64, (3, 3), activation='relu', padding='same', - name='block1_conv2', data_format=IMAGE_ORDERING)(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool', - data_format=IMAGE_ORDERING)(x) - f1 = x - # Block 2 - x = Conv2D(128, (3, 3), activation='relu', padding='same', - name='block2_conv1', data_format=IMAGE_ORDERING)(x) - x = Conv2D(128, (3, 3), activation='relu', padding='same', - name='block2_conv2', data_format=IMAGE_ORDERING)(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool', - data_format=IMAGE_ORDERING)(x) - f2 = x - - # Block 3 - x = Conv2D(256, (3, 3), activation='relu', padding='same', - name='block3_conv1', data_format=IMAGE_ORDERING)(x) - x = Conv2D(256, (3, 3), activation='relu', padding='same', - name='block3_conv2', data_format=IMAGE_ORDERING)(x) - x = Conv2D(256, (3, 3), activation='relu', padding='same', - name='block3_conv3', data_format=IMAGE_ORDERING)(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool', - data_format=IMAGE_ORDERING)(x) - f3 = x - - # Block 4 - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block4_conv1', data_format=IMAGE_ORDERING)(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block4_conv2', data_format=IMAGE_ORDERING)(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block4_conv3', data_format=IMAGE_ORDERING)(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool', - data_format=IMAGE_ORDERING)(x) - f4 = x - - # Block 5 - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block5_conv1', data_format=IMAGE_ORDERING)(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block5_conv2', data_format=IMAGE_ORDERING)(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block5_conv3', data_format=IMAGE_ORDERING)(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool', - data_format=IMAGE_ORDERING)(x) - f5 = x - - if pretrained == 'imagenet': - VGG_Weights_path = keras.utils.get_file( - pretrained_url.split("/")[-1], pretrained_url) - Model(img_input, x).load_weights(VGG_Weights_path) - - return img_input, [f1, f2, f3, f4, f5] -from setuptools import setup -from setuptools import find_packages - -long_description = ''' -Keras Applications is the `applications` module of -the Keras deep learning library. -It provides model definitions and pre-trained weights for a number -of popular archictures, such as VGG16, ResNet50, Xception, MobileNet, and more. - -Read the documentation at: https://keras.io/applications/ - -Keras Applications may be imported directly -from an up-to-date installation of Keras: - -``` -from keras import applications -``` - -Keras Applications is compatible with Python 2.7-3.6 -and is distributed under the MIT license. -''' - -setup(name='Keras_Applications', - version='1.0.7', - description='Reference implementations of popular deep learning models', - long_description=long_description, - author='Keras Team', - url='https://github.com/keras-team/keras-applications', - download_url='https://github.com/keras-team/' - 'keras-applications/tarball/1.0.7', - license='MIT', - install_requires=['numpy>=1.9.1', - 'h5py'], - extras_require={ - 'tests': ['pytest', - 'pytest-pep8', - 'pytest-xdist', - 'pytest-cov'], - }, - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules' - ], - packages=find_packages()) -"""Enables dynamic setting of underlying Keras module. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from . import resnext -from . import resnet_v2 -from . import resnet -from . import nasnet -from . import densenet -from . import mobilenet_v2 -from . import mobilenet -from . import xception -from . import inception_resnet_v2 -from . import inception_v3 -from . import resnet50 -from . import vgg19 -from . import vgg16 -_KERAS_BACKEND = None -_KERAS_LAYERS = None -_KERAS_MODELS = None -_KERAS_UTILS = None - - -def set_keras_submodules(backend=None, - layers=None, - models=None, - utils=None, - engine=None): - # Deprecated, will be removed in the future. - global _KERAS_BACKEND - global _KERAS_LAYERS - global _KERAS_MODELS - global _KERAS_UTILS - _KERAS_BACKEND = backend - _KERAS_LAYERS = layers - _KERAS_MODELS = models - _KERAS_UTILS = utils - - -def get_keras_submodule(name): - # Deprecated, will be removed in the future. - if name not in {'backend', 'layers', 'models', 'utils'}: - raise ImportError( - 'Can only retrieve one of "backend", ' - '"layers", "models", or "utils". ' - 'Requested: %s' % name) - if _KERAS_BACKEND is None: - raise ImportError('You need to first `import keras` ' - 'in order to use `keras_applications`. ' - 'For instance, you can do:\n\n' - '```\n' - 'import keras\n' - 'from keras_applications import vgg16\n' - '```\n\n' - 'Or, preferably, this equivalent formulation:\n\n' - '```\n' - 'from keras import applications\n' - '```\n') - if name == 'backend': - return _KERAS_BACKEND - elif name == 'layers': - return _KERAS_LAYERS - elif name == 'models': - return _KERAS_MODELS - elif name == 'utils': - return _KERAS_UTILS - - -def get_submodules_from_kwargs(kwargs): - backend = kwargs.get('backend', _KERAS_BACKEND) - layers = kwargs.get('layers', _KERAS_LAYERS) - models = kwargs.get('models', _KERAS_MODELS) - utils = kwargs.get('utils', _KERAS_UTILS) - for key in kwargs.keys(): - if key not in ['backend', 'layers', 'models', 'utils']: - raise TypeError('Invalid keyword argument: %s', key) - return backend, layers, models, utils - - -def correct_pad(backend, inputs, kernel_size): - """Returns a tuple for zero-padding for 2D convolution with downsampling. - - # Arguments - input_size: An integer or tuple/list of 2 integers. - kernel_size: An integer or tuple/list of 2 integers. - - # Returns - A tuple. - """ - img_dim = 2 if backend.image_data_format() == 'channels_first' else 1 - input_size = backend.int_shape(inputs)[img_dim:(img_dim + 2)] - - if isinstance(kernel_size, int): - kernel_size = (kernel_size, kernel_size) - - if input_size[0] is None: - adjust = (1, 1) - else: - adjust = (1 - input_size[0] % 2, 1 - input_size[1] % 2) - - correct = (kernel_size[0] // 2, kernel_size[1] // 2) - - return ((correct[0] - adjust[0], correct[0]), - (correct[1] - adjust[1], correct[1])) - - -__version__ = '1.0.7' -"""DenseNet models for Keras. - -# Reference paper - -- [Densely Connected Convolutional Networks] - (https://arxiv.org/abs/1608.06993) (CVPR 2017 Best Paper Award) - -# Reference implementation - -- [Torch DenseNets] - (https://github.com/liuzhuang13/DenseNet/blob/master/models/densenet.lua) -- [TensorNets] - (https://github.com/taehoonlee/tensornets/blob/master/tensornets/densenets.py) -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from . import get_submodules_from_kwargs -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .imagenet_utils import _obtain_input_shape - - -BASE_WEIGTHS_PATH = ( - 'https://github.com/keras-team/keras-applications/' - 'releases/download/densenet/') -DENSENET121_WEIGHT_PATH = ( - BASE_WEIGTHS_PATH + - 'densenet121_weights_tf_dim_ordering_tf_kernels.h5') -DENSENET121_WEIGHT_PATH_NO_TOP = ( - BASE_WEIGTHS_PATH + - 'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5') -DENSENET169_WEIGHT_PATH = ( - BASE_WEIGTHS_PATH + - 'densenet169_weights_tf_dim_ordering_tf_kernels.h5') -DENSENET169_WEIGHT_PATH_NO_TOP = ( - BASE_WEIGTHS_PATH + - 'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5') -DENSENET201_WEIGHT_PATH = ( - BASE_WEIGTHS_PATH + - 'densenet201_weights_tf_dim_ordering_tf_kernels.h5') -DENSENET201_WEIGHT_PATH_NO_TOP = ( - BASE_WEIGTHS_PATH + - 'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5') - -backend = None -layers = None -models = None -keras_utils = None - - -def dense_block(x, blocks, name): - """A dense block. - - # Arguments - x: input tensor. - blocks: integer, the number of building blocks. - name: string, block label. - - # Returns - output tensor for the block. - """ - for i in range(blocks): - x = conv_block(x, 32, name=name + '_block' + str(i + 1)) - return x - - -def transition_block(x, reduction, name): - """A transition block. - - # Arguments - x: input tensor. - reduction: float, compression rate at transition layers. - name: string, block label. - - # Returns - output tensor for the block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_bn')(x) - x = layers.Activation('relu', name=name + '_relu')(x) - x = layers.Conv2D(int(backend.int_shape(x)[bn_axis] * reduction), 1, - use_bias=False, - name=name + '_conv')(x) - x = layers.AveragePooling2D(2, strides=2, name=name + '_pool')(x) - return x - - -def conv_block(x, growth_rate, name): - """A building block for a dense block. - - # Arguments - x: input tensor. - growth_rate: float, growth rate at dense layers. - name: string, block label. - - # Returns - Output tensor for the block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - x1 = layers.BatchNormalization(axis=bn_axis, - epsilon=1.001e-5, - name=name + '_0_bn')(x) - x1 = layers.Activation('relu', name=name + '_0_relu')(x1) - x1 = layers.Conv2D(4 * growth_rate, 1, - use_bias=False, - name=name + '_1_conv')(x1) - x1 = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_1_bn')(x1) - x1 = layers.Activation('relu', name=name + '_1_relu')(x1) - x1 = layers.Conv2D(growth_rate, 3, - padding='same', - use_bias=False, - name=name + '_2_conv')(x1) - x = layers.Concatenate(axis=bn_axis, name=name + '_concat')([x, x1]) - return x - - -def DenseNet(blocks, - include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the DenseNet architecture. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - # Arguments - blocks: numbers of building blocks for the four dense layers. - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `'channels_last'` data format) - or `(3, 224, 224)` (with `'channels_first'` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(200, 200, 3)` would be one valid value. - pooling: optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - global backend, layers, models, keras_utils - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=224, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - - x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)))(img_input) - x = layers.Conv2D(64, 7, strides=2, use_bias=False, name='conv1/conv')(x) - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name='conv1/bn')(x) - x = layers.Activation('relu', name='conv1/relu')(x) - x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)))(x) - x = layers.MaxPooling2D(3, strides=2, name='pool1')(x) - - x = dense_block(x, blocks[0], name='conv2') - x = transition_block(x, 0.5, name='pool2') - x = dense_block(x, blocks[1], name='conv3') - x = transition_block(x, 0.5, name='pool3') - x = dense_block(x, blocks[2], name='conv4') - x = transition_block(x, 0.5, name='pool4') - x = dense_block(x, blocks[3], name='conv5') - - x = layers.BatchNormalization( - axis=bn_axis, epsilon=1.001e-5, name='bn')(x) - x = layers.Activation('relu', name='relu')(x) - - if include_top: - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - x = layers.Dense(classes, activation='softmax', name='fc1000')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D(name='max_pool')(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - if blocks == [6, 12, 24, 16]: - model = models.Model(inputs, x, name='densenet121') - elif blocks == [6, 12, 32, 32]: - model = models.Model(inputs, x, name='densenet169') - elif blocks == [6, 12, 48, 32]: - model = models.Model(inputs, x, name='densenet201') - else: - model = models.Model(inputs, x, name='densenet') - - # Load weights. - if weights == 'imagenet': - if include_top: - if blocks == [6, 12, 24, 16]: - weights_path = keras_utils.get_file( - 'densenet121_weights_tf_dim_ordering_tf_kernels.h5', - DENSENET121_WEIGHT_PATH, - cache_subdir='models', - file_hash='9d60b8095a5708f2dcce2bca79d332c7') - elif blocks == [6, 12, 32, 32]: - weights_path = keras_utils.get_file( - 'densenet169_weights_tf_dim_ordering_tf_kernels.h5', - DENSENET169_WEIGHT_PATH, - cache_subdir='models', - file_hash='d699b8f76981ab1b30698df4c175e90b') - elif blocks == [6, 12, 48, 32]: - weights_path = keras_utils.get_file( - 'densenet201_weights_tf_dim_ordering_tf_kernels.h5', - DENSENET201_WEIGHT_PATH, - cache_subdir='models', - file_hash='1ceb130c1ea1b78c3bf6114dbdfd8807') - else: - if blocks == [6, 12, 24, 16]: - weights_path = keras_utils.get_file( - 'densenet121_weights_tf_dim_ordering_tf_kernels_notop.h5', - DENSENET121_WEIGHT_PATH_NO_TOP, - cache_subdir='models', - file_hash='30ee3e1110167f948a6b9946edeeb738') - elif blocks == [6, 12, 32, 32]: - weights_path = keras_utils.get_file( - 'densenet169_weights_tf_dim_ordering_tf_kernels_notop.h5', - DENSENET169_WEIGHT_PATH_NO_TOP, - cache_subdir='models', - file_hash='b8c4d4c20dd625c148057b9ff1c1176b') - elif blocks == [6, 12, 48, 32]: - weights_path = keras_utils.get_file( - 'densenet201_weights_tf_dim_ordering_tf_kernels_notop.h5', - DENSENET201_WEIGHT_PATH_NO_TOP, - cache_subdir='models', - file_hash='c13680b51ded0fb44dff2d8f86ac8bb1') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -def DenseNet121(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - return DenseNet([6, 12, 24, 16], - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -def DenseNet169(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - return DenseNet([6, 12, 32, 32], - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -def DenseNet201(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - return DenseNet([6, 12, 48, 32], - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -def preprocess_input(x, data_format=None, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - - # Arguments - x: a 3D or 4D numpy array consists of RGB values within [0, 255]. - data_format: data format of the image tensor. - - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, data_format, - mode='torch', **kwargs) - - -setattr(DenseNet121, '__doc__', DenseNet.__doc__) -setattr(DenseNet169, '__doc__', DenseNet.__doc__) -setattr(DenseNet201, '__doc__', DenseNet.__doc__) -"""Utilities for ImageNet data preprocessing & prediction decoding. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import json -import warnings -import numpy as np - -from . import get_submodules_from_kwargs - -CLASS_INDEX = None -CLASS_INDEX_PATH = ('https://storage.googleapis.com/download.tensorflow.org/' - 'data/imagenet_class_index.json') - -# Global tensor of imagenet mean for preprocessing symbolic inputs -_IMAGENET_MEAN = None - - -def _preprocess_numpy_input(x, data_format, mode, **kwargs): - """Preprocesses a Numpy array encoding a batch of images. - - # Arguments - x: Input array, 3D or 4D. - data_format: Data format of the image array. - mode: One of "caffe", "tf" or "torch". - - caffe: will convert the images from RGB to BGR, - then will zero-center each color channel with - respect to the ImageNet dataset, - without scaling. - - tf: will scale pixels between -1 and 1, - sample-wise. - - torch: will scale pixels between 0 and 1 and then - will normalize each channel with respect to the - ImageNet dataset. - - # Returns - Preprocessed Numpy array. - """ - backend, _, _, _ = get_submodules_from_kwargs(kwargs) - if not issubclass(x.dtype.type, np.floating): - x = x.astype(backend.floatx(), copy=False) - - if mode == 'tf': - x /= 127.5 - x -= 1. - return x - - if mode == 'torch': - x /= 255. - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - else: - if data_format == 'channels_first': - # 'RGB'->'BGR' - if x.ndim == 3: - x = x[::-1, ...] - else: - x = x[:, ::-1, ...] - else: - # 'RGB'->'BGR' - x = x[..., ::-1] - mean = [103.939, 116.779, 123.68] - std = None - - # Zero-center by mean pixel - if data_format == 'channels_first': - if x.ndim == 3: - x[0, :, :] -= mean[0] - x[1, :, :] -= mean[1] - x[2, :, :] -= mean[2] - if std is not None: - x[0, :, :] /= std[0] - x[1, :, :] /= std[1] - x[2, :, :] /= std[2] - else: - x[:, 0, :, :] -= mean[0] - x[:, 1, :, :] -= mean[1] - x[:, 2, :, :] -= mean[2] - if std is not None: - x[:, 0, :, :] /= std[0] - x[:, 1, :, :] /= std[1] - x[:, 2, :, :] /= std[2] - else: - x[..., 0] -= mean[0] - x[..., 1] -= mean[1] - x[..., 2] -= mean[2] - if std is not None: - x[..., 0] /= std[0] - x[..., 1] /= std[1] - x[..., 2] /= std[2] - return x - - -def _preprocess_symbolic_input(x, data_format, mode, **kwargs): - """Preprocesses a tensor encoding a batch of images. - - # Arguments - x: Input tensor, 3D or 4D. - data_format: Data format of the image tensor. - mode: One of "caffe", "tf" or "torch". - - caffe: will convert the images from RGB to BGR, - then will zero-center each color channel with - respect to the ImageNet dataset, - without scaling. - - tf: will scale pixels between -1 and 1, - sample-wise. - - torch: will scale pixels between 0 and 1 and then - will normalize each channel with respect to the - ImageNet dataset. - - # Returns - Preprocessed tensor. - """ - global _IMAGENET_MEAN - - backend, _, _, _ = get_submodules_from_kwargs(kwargs) - - if mode == 'tf': - x /= 127.5 - x -= 1. - return x - - if mode == 'torch': - x /= 255. - mean = [0.485, 0.456, 0.406] - std = [0.229, 0.224, 0.225] - else: - if data_format == 'channels_first': - # 'RGB'->'BGR' - if backend.ndim(x) == 3: - x = x[::-1, ...] - else: - x = x[:, ::-1, ...] - else: - # 'RGB'->'BGR' - x = x[..., ::-1] - mean = [103.939, 116.779, 123.68] - std = None - - if _IMAGENET_MEAN is None: - _IMAGENET_MEAN = backend.constant(-np.array(mean)) - - # Zero-center by mean pixel - if backend.dtype(x) != backend.dtype(_IMAGENET_MEAN): - x = backend.bias_add( - x, backend.cast(_IMAGENET_MEAN, backend.dtype(x)), - data_format=data_format) - else: - x = backend.bias_add(x, _IMAGENET_MEAN, data_format) - if std is not None: - x /= std - return x - - -def preprocess_input(x, data_format=None, mode='caffe', **kwargs): - """Preprocesses a tensor or Numpy array encoding a batch of images. - - # Arguments - x: Input Numpy or symbolic tensor, 3D or 4D. - The preprocessed data is written over the input data - if the data types are compatible. To avoid this - behaviour, `numpy.copy(x)` can be used. - data_format: Data format of the image tensor/array. - mode: One of "caffe", "tf" or "torch". - - caffe: will convert the images from RGB to BGR, - then will zero-center each color channel with - respect to the ImageNet dataset, - without scaling. - - tf: will scale pixels between -1 and 1, - sample-wise. - - torch: will scale pixels between 0 and 1 and then - will normalize each channel with respect to the - ImageNet dataset. - - # Returns - Preprocessed tensor or Numpy array. - - # Raises - ValueError: In case of unknown `data_format` argument. - """ - backend, _, _, _ = get_submodules_from_kwargs(kwargs) - - if data_format is None: - data_format = backend.image_data_format() - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format ' + str(data_format)) - - if isinstance(x, np.ndarray): - return _preprocess_numpy_input(x, data_format=data_format, - mode=mode, **kwargs) - else: - return _preprocess_symbolic_input(x, data_format=data_format, - mode=mode, **kwargs) - - -def decode_predictions(preds, top=5, **kwargs): - """Decodes the prediction of an ImageNet model. - - # Arguments - preds: Numpy tensor encoding a batch of predictions. - top: Integer, how many top-guesses to return. - - # Returns - A list of lists of top class prediction tuples - `(class_name, class_description, score)`. - One list of tuples per sample in batch input. - - # Raises - ValueError: In case of invalid shape of the `pred` array - (must be 2D). - """ - global CLASS_INDEX - - backend, _, _, keras_utils = get_submodules_from_kwargs(kwargs) - - if len(preds.shape) != 2 or preds.shape[1] != 1000: - raise ValueError('`decode_predictions` expects ' - 'a batch of predictions ' - '(i.e. a 2D array of shape (samples, 1000)). ' - 'Found array with shape: ' + str(preds.shape)) - if CLASS_INDEX is None: - fpath = keras_utils.get_file( - 'imagenet_class_index.json', - CLASS_INDEX_PATH, - cache_subdir='models', - file_hash='c2c37ea517e94d9795004a39431a14cb') - with open(fpath) as f: - CLASS_INDEX = json.load(f) - results = [] - for pred in preds: - top_indices = pred.argsort()[-top:][::-1] - result = [tuple(CLASS_INDEX[str(i)]) + (pred[i],) for i in top_indices] - result.sort(key=lambda x: x[2], reverse=True) - results.append(result) - return results - - -def _obtain_input_shape(input_shape, - default_size, - min_size, - data_format, - require_flatten, - weights=None): - """Internal utility to compute/validate a model's input shape. - - # Arguments - input_shape: Either None (will return the default network input shape), - or a user-provided shape to be validated. - default_size: Default input width/height for the model. - min_size: Minimum input width/height accepted by the model. - data_format: Image data format to use. - require_flatten: Whether the model is expected to - be linked to a classifier via a Flatten layer. - weights: One of `None` (random initialization) - or 'imagenet' (pre-training on ImageNet). - If weights='imagenet' input channels must be equal to 3. - - # Returns - An integer shape tuple (may include None entries). - - # Raises - ValueError: In case of invalid argument values. - """ - if weights != 'imagenet' and input_shape and len(input_shape) == 3: - if data_format == 'channels_first': - if input_shape[0] not in {1, 3}: - warnings.warn( - 'This model usually expects 1 or 3 input channels. ' - 'However, it was passed an input_shape with ' + - str(input_shape[0]) + ' input channels.') - default_shape = (input_shape[0], default_size, default_size) - else: - if input_shape[-1] not in {1, 3}: - warnings.warn( - 'This model usually expects 1 or 3 input channels. ' - 'However, it was passed an input_shape with ' + - str(input_shape[-1]) + ' input channels.') - default_shape = (default_size, default_size, input_shape[-1]) - else: - if data_format == 'channels_first': - default_shape = (3, default_size, default_size) - else: - default_shape = (default_size, default_size, 3) - if weights == 'imagenet' and require_flatten: - if input_shape is not None: - if input_shape != default_shape: - raise ValueError('When setting `include_top=True` ' - 'and loading `imagenet` weights, ' - '`input_shape` should be ' + - str(default_shape) + '.') - return default_shape - if input_shape: - if data_format == 'channels_first': - if input_shape is not None: - if len(input_shape) != 3: - raise ValueError( - '`input_shape` must be a tuple of three integers.') - if input_shape[0] != 3 and weights == 'imagenet': - raise ValueError('The input must have 3 channels; got ' - '`input_shape=' + str(input_shape) + '`') - if ((input_shape[1] is not None and input_shape[1] < min_size) or - (input_shape[2] is not None and input_shape[2] < min_size)): - raise ValueError('Input size must be at least ' + - str(min_size) + 'x' + str(min_size) + - '; got `input_shape=' + - str(input_shape) + '`') - else: - if input_shape is not None: - if len(input_shape) != 3: - raise ValueError( - '`input_shape` must be a tuple of three integers.') - if input_shape[-1] != 3 and weights == 'imagenet': - raise ValueError('The input must have 3 channels; got ' - '`input_shape=' + str(input_shape) + '`') - if ((input_shape[0] is not None and input_shape[0] < min_size) or - (input_shape[1] is not None and input_shape[1] < min_size)): - raise ValueError('Input size must be at least ' + - str(min_size) + 'x' + str(min_size) + - '; got `input_shape=' + - str(input_shape) + '`') - else: - if require_flatten: - input_shape = default_shape - else: - if data_format == 'channels_first': - input_shape = (3, None, None) - else: - input_shape = (None, None, 3) - if require_flatten: - if None in input_shape: - raise ValueError('If `include_top` is True, ' - 'you should specify a static `input_shape`. ' - 'Got `input_shape=' + str(input_shape) + '`') - return input_shape -"""Inception-ResNet V2 model for Keras. - -Model naming and structure follows TF-slim implementation -(which has some additional layers and different number of -filters from the original arXiv paper): -https://github.com/tensorflow/models/blob/master/research/slim/nets/inception_resnet_v2.py - -Pre-trained ImageNet weights are also converted from TF-slim, -which can be found in: -https://github.com/tensorflow/models/tree/master/research/slim#pre-trained-models - -# Reference -- [Inception-v4, Inception-ResNet and the Impact of - Residual Connections on Learning](https://arxiv.org/abs/1602.07261) (AAAI 2017) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from . import get_submodules_from_kwargs -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .imagenet_utils import _obtain_input_shape - - -BASE_WEIGHT_URL = ('https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.7/') - -backend = None -layers = None -models = None -keras_utils = None - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) - - -def conv2d_bn(x, - filters, - kernel_size, - strides=1, - padding='same', - activation='relu', - use_bias=False, - name=None): - """Utility function to apply conv + BN. - - # Arguments - x: input tensor. - filters: filters in `Conv2D`. - kernel_size: kernel size as in `Conv2D`. - strides: strides in `Conv2D`. - padding: padding mode in `Conv2D`. - activation: activation in `Conv2D`. - use_bias: whether to use a bias in `Conv2D`. - name: name of the ops; will become `name + '_ac'` for the activation - and `name + '_bn'` for the batch norm layer. - - # Returns - Output tensor after applying `Conv2D` and `BatchNormalization`. - """ - x = layers.Conv2D(filters, - kernel_size, - strides=strides, - padding=padding, - use_bias=use_bias, - name=name)(x) - if not use_bias: - bn_axis = 1 if backend.image_data_format() == 'channels_first' else 3 - bn_name = None if name is None else name + '_bn' - x = layers.BatchNormalization(axis=bn_axis, - scale=False, - name=bn_name)(x) - if activation is not None: - ac_name = None if name is None else name + '_ac' - x = layers.Activation(activation, name=ac_name)(x) - return x - - -def inception_resnet_block(x, scale, block_type, block_idx, activation='relu'): - """Adds a Inception-ResNet block. - - This function builds 3 types of Inception-ResNet blocks mentioned - in the paper, controlled by the `block_type` argument (which is the - block name used in the official TF-slim implementation): - - Inception-ResNet-A: `block_type='block35'` - - Inception-ResNet-B: `block_type='block17'` - - Inception-ResNet-C: `block_type='block8'` - - # Arguments - x: input tensor. - scale: scaling factor to scale the residuals (i.e., the output of - passing `x` through an inception module) before adding them - to the shortcut branch. - Let `r` be the output from the residual branch, - the output of this block will be `x + scale * r`. - block_type: `'block35'`, `'block17'` or `'block8'`, determines - the network structure in the residual branch. - block_idx: an `int` used for generating layer names. - The Inception-ResNet blocks - are repeated many times in this network. - We use `block_idx` to identify - each of the repetitions. For example, - the first Inception-ResNet-A block - will have `block_type='block35', block_idx=0`, - and the layer names will have - a common prefix `'block35_0'`. - activation: activation function to use at the end of the block - (see [activations](../activations.md)). - When `activation=None`, no activation is applied - (i.e., "linear" activation: `a(x) = x`). - - # Returns - Output tensor for the block. - - # Raises - ValueError: if `block_type` is not one of `'block35'`, - `'block17'` or `'block8'`. - """ - if block_type == 'block35': - branch_0 = conv2d_bn(x, 32, 1) - branch_1 = conv2d_bn(x, 32, 1) - branch_1 = conv2d_bn(branch_1, 32, 3) - branch_2 = conv2d_bn(x, 32, 1) - branch_2 = conv2d_bn(branch_2, 48, 3) - branch_2 = conv2d_bn(branch_2, 64, 3) - branches = [branch_0, branch_1, branch_2] - elif block_type == 'block17': - branch_0 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(x, 128, 1) - branch_1 = conv2d_bn(branch_1, 160, [1, 7]) - branch_1 = conv2d_bn(branch_1, 192, [7, 1]) - branches = [branch_0, branch_1] - elif block_type == 'block8': - branch_0 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(x, 192, 1) - branch_1 = conv2d_bn(branch_1, 224, [1, 3]) - branch_1 = conv2d_bn(branch_1, 256, [3, 1]) - branches = [branch_0, branch_1] - else: - raise ValueError('Unknown Inception-ResNet block type. ' - 'Expects "block35", "block17" or "block8", ' - 'but got: ' + str(block_type)) - - block_name = block_type + '_' + str(block_idx) - channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 - mixed = layers.Concatenate( - axis=channel_axis, name=block_name + '_mixed')(branches) - up = conv2d_bn(mixed, - backend.int_shape(x)[channel_axis], - 1, - activation=None, - use_bias=True, - name=block_name + '_conv') - - x = layers.Lambda(lambda inputs, scale: inputs[0] + inputs[1] * scale, - output_shape=backend.int_shape(x)[1:], - arguments={'scale': scale}, - name=block_name)([x, up]) - if activation is not None: - x = layers.Activation(activation, name=block_name + '_ac')(x) - return x - - -def InceptionResNetV2(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the Inception-ResNet v2 architecture. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - # Arguments - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is `False` (otherwise the input shape - has to be `(299, 299, 3)` (with `'channels_last'` data format) - or `(3, 299, 299)` (with `'channels_first'` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 75. - E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the last convolutional block. - - `'avg'` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `'max'` means that global max pooling will be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is `True`, and - if no `weights` argument is specified. - - # Returns - A Keras `Model` instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - global backend, layers, models, keras_utils - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape( - input_shape, - default_size=299, - min_size=75, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - # Stem block: 35 x 35 x 192 - x = conv2d_bn(img_input, 32, 3, strides=2, padding='valid') - x = conv2d_bn(x, 32, 3, padding='valid') - x = conv2d_bn(x, 64, 3) - x = layers.MaxPooling2D(3, strides=2)(x) - x = conv2d_bn(x, 80, 1, padding='valid') - x = conv2d_bn(x, 192, 3, padding='valid') - x = layers.MaxPooling2D(3, strides=2)(x) - - # Mixed 5b (Inception-A block): 35 x 35 x 320 - branch_0 = conv2d_bn(x, 96, 1) - branch_1 = conv2d_bn(x, 48, 1) - branch_1 = conv2d_bn(branch_1, 64, 5) - branch_2 = conv2d_bn(x, 64, 1) - branch_2 = conv2d_bn(branch_2, 96, 3) - branch_2 = conv2d_bn(branch_2, 96, 3) - branch_pool = layers.AveragePooling2D(3, strides=1, padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1) - branches = [branch_0, branch_1, branch_2, branch_pool] - channel_axis = 1 if backend.image_data_format() == 'channels_first' else 3 - x = layers.Concatenate(axis=channel_axis, name='mixed_5b')(branches) - - # 10x block35 (Inception-ResNet-A block): 35 x 35 x 320 - for block_idx in range(1, 11): - x = inception_resnet_block(x, - scale=0.17, - block_type='block35', - block_idx=block_idx) - - # Mixed 6a (Reduction-A block): 17 x 17 x 1088 - branch_0 = conv2d_bn(x, 384, 3, strides=2, padding='valid') - branch_1 = conv2d_bn(x, 256, 1) - branch_1 = conv2d_bn(branch_1, 256, 3) - branch_1 = conv2d_bn(branch_1, 384, 3, strides=2, padding='valid') - branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) - branches = [branch_0, branch_1, branch_pool] - x = layers.Concatenate(axis=channel_axis, name='mixed_6a')(branches) - - # 20x block17 (Inception-ResNet-B block): 17 x 17 x 1088 - for block_idx in range(1, 21): - x = inception_resnet_block(x, - scale=0.1, - block_type='block17', - block_idx=block_idx) - - # Mixed 7a (Reduction-B block): 8 x 8 x 2080 - branch_0 = conv2d_bn(x, 256, 1) - branch_0 = conv2d_bn(branch_0, 384, 3, strides=2, padding='valid') - branch_1 = conv2d_bn(x, 256, 1) - branch_1 = conv2d_bn(branch_1, 288, 3, strides=2, padding='valid') - branch_2 = conv2d_bn(x, 256, 1) - branch_2 = conv2d_bn(branch_2, 288, 3) - branch_2 = conv2d_bn(branch_2, 320, 3, strides=2, padding='valid') - branch_pool = layers.MaxPooling2D(3, strides=2, padding='valid')(x) - branches = [branch_0, branch_1, branch_2, branch_pool] - x = layers.Concatenate(axis=channel_axis, name='mixed_7a')(branches) - - # 10x block8 (Inception-ResNet-C block): 8 x 8 x 2080 - for block_idx in range(1, 10): - x = inception_resnet_block(x, - scale=0.2, - block_type='block8', - block_idx=block_idx) - x = inception_resnet_block(x, - scale=1., - activation=None, - block_type='block8', - block_idx=10) - - # Final convolution block: 8 x 8 x 1536 - x = conv2d_bn(x, 1536, 1, name='conv_7b') - - if include_top: - # Classification block - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - x = layers.Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = models.Model(inputs, x, name='inception_resnet_v2') - - # Load weights. - if weights == 'imagenet': - if include_top: - fname = 'inception_resnet_v2_weights_tf_dim_ordering_tf_kernels.h5' - weights_path = keras_utils.get_file( - fname, - BASE_WEIGHT_URL + fname, - cache_subdir='models', - file_hash='e693bd0210a403b3192acc6073ad2e96') - else: - fname = ('inception_resnet_v2_weights_' - 'tf_dim_ordering_tf_kernels_notop.h5') - weights_path = keras_utils.get_file( - fname, - BASE_WEIGHT_URL + fname, - cache_subdir='models', - file_hash='d19885ff4a710c122648d3b5c3b684e4') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model -"""Inception V3 model for Keras. - -Note that the input image format for this model is different than for -the VGG16 and ResNet models (299x299 instead of 224x224), -and that the input preprocessing function is also different (same as Xception). - -# Reference - -- [Rethinking the Inception Architecture for Computer Vision]( - http://arxiv.org/abs/1512.00567) (CVPR 2016) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from . import get_submodules_from_kwargs -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .imagenet_utils import _obtain_input_shape - - -WEIGHTS_PATH = ( - 'https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.5/' - 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5') -WEIGHTS_PATH_NO_TOP = ( - 'https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.5/' - 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5') - -backend = None -layers = None -models = None -keras_utils = None - - -def conv2d_bn(x, - filters, - num_row, - num_col, - padding='same', - strides=(1, 1), - name=None): - """Utility function to apply conv + BN. - - # Arguments - x: input tensor. - filters: filters in `Conv2D`. - num_row: height of the convolution kernel. - num_col: width of the convolution kernel. - padding: padding mode in `Conv2D`. - strides: strides in `Conv2D`. - name: name of the ops; will become `name + '_conv'` - for the convolution and `name + '_bn'` for the - batch norm layer. - - # Returns - Output tensor after applying `Conv2D` and `BatchNormalization`. - """ - if name is not None: - bn_name = name + '_bn' - conv_name = name + '_conv' - else: - bn_name = None - conv_name = None - if backend.image_data_format() == 'channels_first': - bn_axis = 1 - else: - bn_axis = 3 - x = layers.Conv2D( - filters, (num_row, num_col), - strides=strides, - padding=padding, - use_bias=False, - name=conv_name)(x) - x = layers.BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) - x = layers.Activation('relu', name=name)(x) - return x - - -def InceptionV3(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the Inception v3 architecture. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - # Arguments - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(299, 299, 3)` (with `channels_last` data format) - or `(3, 299, 299)` (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 75. - E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - global backend, layers, models, keras_utils - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape( - input_shape, - default_size=299, - min_size=75, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - if backend.image_data_format() == 'channels_first': - channel_axis = 1 - else: - channel_axis = 3 - - x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid') - x = conv2d_bn(x, 32, 3, 3, padding='valid') - x = conv2d_bn(x, 64, 3, 3) - x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv2d_bn(x, 80, 1, 1, padding='valid') - x = conv2d_bn(x, 192, 3, 3, padding='valid') - x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) - - # mixed 0: 35 x 35 x 256 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 32, 1, 1) - x = layers.concatenate( - [branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed0') - - # mixed 1: 35 x 35 x 288 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1, 1) - x = layers.concatenate( - [branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed1') - - # mixed 2: 35 x 35 x 288 - branch1x1 = conv2d_bn(x, 64, 1, 1) - - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - - branch_pool = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 64, 1, 1) - x = layers.concatenate( - [branch1x1, branch5x5, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed2') - - # mixed 3: 17 x 17 x 768 - branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid') - - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn( - branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid') - - branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) - x = layers.concatenate( - [branch3x3, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed3') - - # mixed 4: 17 x 17 x 768 - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 128, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 128, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 128, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed4') - - # mixed 5, 6: 17 x 17 x 768 - for i in range(2): - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 160, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 160, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 160, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 160, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = layers.AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed' + str(5 + i)) - - # mixed 7: 17 x 17 x 768 - branch1x1 = conv2d_bn(x, 192, 1, 1) - - branch7x7 = conv2d_bn(x, 192, 1, 1) - branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) - branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - - branch7x7dbl = conv2d_bn(x, 192, 1, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) - branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - - branch_pool = layers.AveragePooling2D((3, 3), - strides=(1, 1), - padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch7x7, branch7x7dbl, branch_pool], - axis=channel_axis, - name='mixed7') - - # mixed 8: 8 x 8 x 1280 - branch3x3 = conv2d_bn(x, 192, 1, 1) - branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, - strides=(2, 2), padding='valid') - - branch7x7x3 = conv2d_bn(x, 192, 1, 1) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) - branch7x7x3 = conv2d_bn( - branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid') - - branch_pool = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) - x = layers.concatenate( - [branch3x3, branch7x7x3, branch_pool], - axis=channel_axis, - name='mixed8') - - # mixed 9: 8 x 8 x 2048 - for i in range(2): - branch1x1 = conv2d_bn(x, 320, 1, 1) - - branch3x3 = conv2d_bn(x, 384, 1, 1) - branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) - branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) - branch3x3 = layers.concatenate( - [branch3x3_1, branch3x3_2], - axis=channel_axis, - name='mixed9_' + str(i)) - - branch3x3dbl = conv2d_bn(x, 448, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) - branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) - branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) - branch3x3dbl = layers.concatenate( - [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis) - - branch_pool = layers.AveragePooling2D( - (3, 3), strides=(1, 1), padding='same')(x) - branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = layers.concatenate( - [branch1x1, branch3x3, branch3x3dbl, branch_pool], - axis=channel_axis, - name='mixed' + str(9 + i)) - if include_top: - # Classification block - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - x = layers.Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = models.Model(inputs, x, name='inception_v3') - - # Load weights. - if weights == 'imagenet': - if include_top: - weights_path = keras_utils.get_file( - 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models', - file_hash='9a0d58056eeedaa3f26cb7ebd46da564') - else: - weights_path = keras_utils.get_file( - 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - file_hash='bcbd6486424b2319ff4ef7d526e38f63') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) -"""MobileNet v1 models for Keras. - -MobileNet is a general architecture and can be used for multiple use cases. -Depending on the use case, it can use different input layer size and -different width factors. This allows different width models to reduce -the number of multiply-adds and thereby -reduce inference cost on mobile devices. - -MobileNets support any input size greater than 32 x 32, with larger image sizes -offering better performance. -The number of parameters and number of multiply-adds -can be modified by using the `alpha` parameter, -which increases/decreases the number of filters in each layer. -By altering the image size and `alpha` parameter, -all 16 models from the paper can be built, with ImageNet weights provided. - -The paper demonstrates the performance of MobileNets using `alpha` values of -1.0 (also called 100 % MobileNet), 0.75, 0.5 and 0.25. -For each of these `alpha` values, weights for 4 different input image sizes -are provided (224, 192, 160, 128). - -The following table describes the size and accuracy of the 100% MobileNet -on size 224 x 224: ----------------------------------------------------------------------------- -Width Multiplier (alpha) | ImageNet Acc | Multiply-Adds (M) | Params (M) ----------------------------------------------------------------------------- -| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | -| 0.75 MobileNet-224 | 68.4 % | 325 | 2.6 | -| 0.50 MobileNet-224 | 63.7 % | 149 | 1.3 | -| 0.25 MobileNet-224 | 50.6 % | 41 | 0.5 | ----------------------------------------------------------------------------- - -The following table describes the performance of -the 100 % MobileNet on various input sizes: ------------------------------------------------------------------------- - Resolution | ImageNet Acc | Multiply-Adds (M) | Params (M) ------------------------------------------------------------------------- -| 1.0 MobileNet-224 | 70.6 % | 529 | 4.2 | -| 1.0 MobileNet-192 | 69.1 % | 529 | 4.2 | -| 1.0 MobileNet-160 | 67.2 % | 529 | 4.2 | -| 1.0 MobileNet-128 | 64.4 % | 529 | 4.2 | ------------------------------------------------------------------------- - -The weights for all 16 models are obtained and translated -from TensorFlow checkpoints found at -https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet_v1.md - -# Reference - -- [MobileNets: Efficient Convolutional Neural Networks for - Mobile Vision Applications](https://arxiv.org/abs/1704.04861) -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import os -import warnings - -from . import get_submodules_from_kwargs -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .imagenet_utils import _obtain_input_shape - - -BASE_WEIGHT_PATH = ('https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.6/') - -backend = None -layers = None -models = None -keras_utils = None - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) - - -def MobileNet(input_shape=None, - alpha=1.0, - depth_multiplier=1, - dropout=1e-3, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the MobileNet architecture. - - # Arguments - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` - (with `channels_last` data format) - or (3, 224, 224) (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(200, 200, 3)` would be one valid value. - alpha: controls the width of the network. This is known as the - width multiplier in the MobileNet paper. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - depth_multiplier: depth multiplier for depthwise convolution. This - is called the resolution multiplier in the MobileNet paper. - dropout: dropout rate - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - """ - global backend, layers, models, keras_utils - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000') - - # Determine proper input shape and default size. - if input_shape is None: - default_size = 224 - else: - if backend.image_data_format() == 'channels_first': - rows = input_shape[1] - cols = input_shape[2] - else: - rows = input_shape[0] - cols = input_shape[1] - - if rows == cols and rows in [128, 160, 192, 224]: - default_size = rows - else: - default_size = 224 - - input_shape = _obtain_input_shape(input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if backend.image_data_format() == 'channels_last': - row_axis, col_axis = (0, 1) - else: - row_axis, col_axis = (1, 2) - rows = input_shape[row_axis] - cols = input_shape[col_axis] - - if weights == 'imagenet': - if depth_multiplier != 1: - raise ValueError('If imagenet weights are being loaded, ' - 'depth multiplier must be 1') - - if alpha not in [0.25, 0.50, 0.75, 1.0]: - raise ValueError('If imagenet weights are being loaded, ' - 'alpha can be one of' - '`0.25`, `0.50`, `0.75` or `1.0` only.') - - if rows != cols or rows not in [128, 160, 192, 224]: - rows = 224 - warnings.warn('`input_shape` is undefined or non-square, ' - 'or `rows` is not in [128, 160, 192, 224]. ' - 'Weights for input shape (224, 224) will be' - ' loaded as the default.') - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - x = _conv_block(img_input, 32, alpha, strides=(2, 2)) - x = _depthwise_conv_block(x, 64, alpha, depth_multiplier, block_id=1) - - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, - strides=(2, 2), block_id=2) - x = _depthwise_conv_block(x, 128, alpha, depth_multiplier, block_id=3) - - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, - strides=(2, 2), block_id=4) - x = _depthwise_conv_block(x, 256, alpha, depth_multiplier, block_id=5) - - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, - strides=(2, 2), block_id=6) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=7) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=8) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=9) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=10) - x = _depthwise_conv_block(x, 512, alpha, depth_multiplier, block_id=11) - - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, - strides=(2, 2), block_id=12) - x = _depthwise_conv_block(x, 1024, alpha, depth_multiplier, block_id=13) - - if include_top: - if backend.image_data_format() == 'channels_first': - shape = (int(1024 * alpha), 1, 1) - else: - shape = (1, 1, int(1024 * alpha)) - - x = layers.GlobalAveragePooling2D()(x) - x = layers.Reshape(shape, name='reshape_1')(x) - x = layers.Dropout(dropout, name='dropout')(x) - x = layers.Conv2D(classes, (1, 1), - padding='same', - name='conv_preds')(x) - x = layers.Reshape((classes,), name='reshape_2')(x) - x = layers.Activation('softmax', name='act_softmax')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = models.Model(inputs, x, name='mobilenet_%0.2f_%s' % (alpha, rows)) - - # Load weights. - if weights == 'imagenet': - if alpha == 1.0: - alpha_text = '1_0' - elif alpha == 0.75: - alpha_text = '7_5' - elif alpha == 0.50: - alpha_text = '5_0' - else: - alpha_text = '2_5' - - if include_top: - model_name = 'mobilenet_%s_%d_tf.h5' % (alpha_text, rows) - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = keras_utils.get_file(model_name, - weight_path, - cache_subdir='models') - else: - model_name = 'mobilenet_%s_%d_tf_no_top.h5' % (alpha_text, rows) - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = keras_utils.get_file(model_name, - weight_path, - cache_subdir='models') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -def _conv_block(inputs, filters, alpha, kernel=(3, 3), strides=(1, 1)): - """Adds an initial convolution layer (with batch normalization and relu6). - - # Arguments - inputs: Input tensor of shape `(rows, cols, 3)` - (with `channels_last` data format) or - (3, rows, cols) (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the convolution). - alpha: controls the width of the network. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - kernel: An integer or tuple/list of 2 integers, specifying the - width and height of the 2D convolution window. - Can be a single integer to specify the same value for - all spatial dimensions. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution - along the width and height. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - - # Input shape - 4D tensor with shape: - `(samples, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(samples, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `(samples, filters, new_rows, new_cols)` - if data_format='channels_first' - or 4D tensor with shape: - `(samples, new_rows, new_cols, filters)` - if data_format='channels_last'. - `rows` and `cols` values might have changed due to stride. - - # Returns - Output tensor of block. - """ - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - filters = int(filters * alpha) - x = layers.ZeroPadding2D(padding=((0, 1), (0, 1)), - name='conv1_pad')(inputs) - x = layers.Conv2D(filters, kernel, - padding='valid', - use_bias=False, - strides=strides, - name='conv1')(x) - x = layers.BatchNormalization(axis=channel_axis, name='conv1_bn')(x) - return layers.ReLU(6., name='conv1_relu')(x) - - -def _depthwise_conv_block(inputs, pointwise_conv_filters, alpha, - depth_multiplier=1, strides=(1, 1), block_id=1): - """Adds a depthwise convolution block. - - A depthwise convolution block consists of a depthwise conv, - batch normalization, relu6, pointwise convolution, - batch normalization and relu6 activation. - - # Arguments - inputs: Input tensor of shape `(rows, cols, channels)` - (with `channels_last` data format) or - (channels, rows, cols) (with `channels_first` data format). - pointwise_conv_filters: Integer, the dimensionality of the output space - (i.e. the number of output filters in the pointwise convolution). - alpha: controls the width of the network. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - depth_multiplier: The number of depthwise convolution output channels - for each input channel. - The total number of depthwise convolution output - channels will be equal to `filters_in * depth_multiplier`. - strides: An integer or tuple/list of 2 integers, - specifying the strides of the convolution - along the width and height. - Can be a single integer to specify the same value for - all spatial dimensions. - Specifying any stride value != 1 is incompatible with specifying - any `dilation_rate` value != 1. - block_id: Integer, a unique identification designating - the block number. - - # Input shape - 4D tensor with shape: - `(batch, channels, rows, cols)` if data_format='channels_first' - or 4D tensor with shape: - `(batch, rows, cols, channels)` if data_format='channels_last'. - - # Output shape - 4D tensor with shape: - `(batch, filters, new_rows, new_cols)` - if data_format='channels_first' - or 4D tensor with shape: - `(batch, new_rows, new_cols, filters)` - if data_format='channels_last'. - `rows` and `cols` values might have changed due to stride. - - # Returns - Output tensor of block. - """ - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - pointwise_conv_filters = int(pointwise_conv_filters * alpha) - - if strides == (1, 1): - x = inputs - else: - x = layers.ZeroPadding2D(((0, 1), (0, 1)), - name='conv_pad_%d' % block_id)(inputs) - x = layers.DepthwiseConv2D((3, 3), - padding='same' if strides == ( - 1, 1) else 'valid', - depth_multiplier=depth_multiplier, - strides=strides, - use_bias=False, - name='conv_dw_%d' % block_id)(x) - x = layers.BatchNormalization( - axis=channel_axis, name='conv_dw_%d_bn' % block_id)(x) - x = layers.ReLU(6., name='conv_dw_%d_relu' % block_id)(x) - - x = layers.Conv2D(pointwise_conv_filters, (1, 1), - padding='same', - use_bias=False, - strides=(1, 1), - name='conv_pw_%d' % block_id)(x) - x = layers.BatchNormalization(axis=channel_axis, - name='conv_pw_%d_bn' % block_id)(x) - return layers.ReLU(6., name='conv_pw_%d_relu' % block_id)(x) -"""MobileNet v2 models for Keras. - -MobileNetV2 is a general architecture and can be used for multiple use cases. -Depending on the use case, it can use different input layer size and -different width factors. This allows different width models to reduce -the number of multiply-adds and thereby -reduce inference cost on mobile devices. - -MobileNetV2 is very similar to the original MobileNet, -except that it uses inverted residual blocks with -bottlenecking features. It has a drastically lower -parameter count than the original MobileNet. -MobileNets support any input size greater -than 32 x 32, with larger image sizes -offering better performance. - -The number of parameters and number of multiply-adds -can be modified by using the `alpha` parameter, -which increases/decreases the number of filters in each layer. -By altering the image size and `alpha` parameter, -all 22 models from the paper can be built, with ImageNet weights provided. - -The paper demonstrates the performance of MobileNets using `alpha` values of -1.0 (also called 100 % MobileNet), 0.35, 0.5, 0.75, 1.0, 1.3, and 1.4 - -For each of these `alpha` values, weights for 5 different input image sizes -are provided (224, 192, 160, 128, and 96). - - -The following table describes the performance of -MobileNet on various input sizes: ------------------------------------------------------------------------- -MACs stands for Multiply Adds - - Classification Checkpoint| MACs (M) | Parameters (M)| Top 1 Accuracy| Top 5 Accuracy ---------------------------|------------|---------------|---------|----|------------- -| [mobilenet_v2_1.4_224] | 582 | 6.06 | 75.0 | 92.5 | -| [mobilenet_v2_1.3_224] | 509 | 5.34 | 74.4 | 92.1 | -| [mobilenet_v2_1.0_224] | 300 | 3.47 | 71.8 | 91.0 | -| [mobilenet_v2_1.0_192] | 221 | 3.47 | 70.7 | 90.1 | -| [mobilenet_v2_1.0_160] | 154 | 3.47 | 68.8 | 89.0 | -| [mobilenet_v2_1.0_128] | 99 | 3.47 | 65.3 | 86.9 | -| [mobilenet_v2_1.0_96] | 56 | 3.47 | 60.3 | 83.2 | -| [mobilenet_v2_0.75_224] | 209 | 2.61 | 69.8 | 89.6 | -| [mobilenet_v2_0.75_192] | 153 | 2.61 | 68.7 | 88.9 | -| [mobilenet_v2_0.75_160] | 107 | 2.61 | 66.4 | 87.3 | -| [mobilenet_v2_0.75_128] | 69 | 2.61 | 63.2 | 85.3 | -| [mobilenet_v2_0.75_96] | 39 | 2.61 | 58.8 | 81.6 | -| [mobilenet_v2_0.5_224] | 97 | 1.95 | 65.4 | 86.4 | -| [mobilenet_v2_0.5_192] | 71 | 1.95 | 63.9 | 85.4 | -| [mobilenet_v2_0.5_160] | 50 | 1.95 | 61.0 | 83.2 | -| [mobilenet_v2_0.5_128] | 32 | 1.95 | 57.7 | 80.8 | -| [mobilenet_v2_0.5_96] | 18 | 1.95 | 51.2 | 75.8 | -| [mobilenet_v2_0.35_224] | 59 | 1.66 | 60.3 | 82.9 | -| [mobilenet_v2_0.35_192] | 43 | 1.66 | 58.2 | 81.2 | -| [mobilenet_v2_0.35_160] | 30 | 1.66 | 55.7 | 79.1 | -| [mobilenet_v2_0.35_128] | 20 | 1.66 | 50.8 | 75.0 | -| [mobilenet_v2_0.35_96] | 11 | 1.66 | 45.5 | 70.4 | - -The weights for all 16 models are obtained and -translated from the Tensorflow checkpoints -from TensorFlow checkpoints found [here] -(https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/README.md). - -# Reference - -This file contains building code for MobileNetV2, based on -[MobileNetV2: Inverted Residuals and Linear Bottlenecks] -(https://arxiv.org/abs/1801.04381) (CVPR 2018) - -Tests comparing this model to the existing Tensorflow model can be -found at [mobilenet_v2_keras] -(https://github.com/JonathanCMitchell/mobilenet_v2_keras) -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import os -import warnings -import numpy as np - -from . import correct_pad -from . import get_submodules_from_kwargs -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .imagenet_utils import _obtain_input_shape - -# TODO Change path to v1.1 -BASE_WEIGHT_PATH = ('https://github.com/JonathanCMitchell/mobilenet_v2_keras/' - 'releases/download/v1.1/') - -backend = None -layers = None -models = None -keras_utils = None - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) - - -# This function is taken from the original tf repo. -# It ensures that all layers have a channel number that is divisible by 8 -# It can be seen here: -# https://github.com/tensorflow/models/blob/master/research/slim/nets/mobilenet/mobilenet.py - - -def _make_divisible(v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -def MobileNetV2(input_shape=None, - alpha=1.0, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the MobileNetV2 architecture. - - # Arguments - input_shape: optional shape tuple, to be specified if you would - like to use a model with an input img resolution that is not - (224, 224, 3). - It should have exactly 3 inputs channels (224, 224, 3). - You can also omit this option if you would like - to infer input_shape from an input_tensor. - If you choose to include both input_tensor and input_shape then - input_shape will be used if they match, if the shapes - do not match then we will throw an error. - E.g. `(160, 160, 3)` would be one valid value. - alpha: controls the width of the network. This is known as the - width multiplier in the MobileNetV2 paper, but the name is kept for - consistency with MobileNetV1 in Keras. - - If `alpha` < 1.0, proportionally decreases the number - of filters in each layer. - - If `alpha` > 1.0, proportionally increases the number - of filters in each layer. - - If `alpha` = 1, default number of filters from the paper - are used at each layer. - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape or invalid alpha, rows when - weights='imagenet' - """ - global backend, layers, models, keras_utils - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000') - - # Determine proper input shape and default size. - # If both input_shape and input_tensor are used, they should match - if input_shape is not None and input_tensor is not None: - try: - is_input_t_tensor = backend.is_keras_tensor(input_tensor) - except ValueError: - try: - is_input_t_tensor = backend.is_keras_tensor( - keras_utils.get_source_inputs(input_tensor)) - except ValueError: - raise ValueError('input_tensor: ', input_tensor, - 'is not type input_tensor') - if is_input_t_tensor: - if backend.image_data_format == 'channels_first': - if backend.int_shape(input_tensor)[1] != input_shape[1]: - raise ValueError('input_shape: ', input_shape, - 'and input_tensor: ', input_tensor, - 'do not meet the same shape requirements') - else: - if backend.int_shape(input_tensor)[2] != input_shape[1]: - raise ValueError('input_shape: ', input_shape, - 'and input_tensor: ', input_tensor, - 'do not meet the same shape requirements') - else: - raise ValueError('input_tensor specified: ', input_tensor, - 'is not a keras tensor') - - # If input_shape is None, infer shape from input_tensor - if input_shape is None and input_tensor is not None: - - try: - backend.is_keras_tensor(input_tensor) - except ValueError: - raise ValueError('input_tensor: ', input_tensor, - 'is type: ', type(input_tensor), - 'which is not a valid type') - - if input_shape is None and not backend.is_keras_tensor(input_tensor): - default_size = 224 - elif input_shape is None and backend.is_keras_tensor(input_tensor): - if backend.image_data_format() == 'channels_first': - rows = backend.int_shape(input_tensor)[2] - cols = backend.int_shape(input_tensor)[3] - else: - rows = backend.int_shape(input_tensor)[1] - cols = backend.int_shape(input_tensor)[2] - - if rows == cols and rows in [96, 128, 160, 192, 224]: - default_size = rows - else: - default_size = 224 - - # If input_shape is None and no input_tensor - elif input_shape is None: - default_size = 224 - - # If input_shape is not None, assume default size - else: - if backend.image_data_format() == 'channels_first': - rows = input_shape[1] - cols = input_shape[2] - else: - rows = input_shape[0] - cols = input_shape[1] - - if rows == cols and rows in [96, 128, 160, 192, 224]: - default_size = rows - else: - default_size = 224 - - input_shape = _obtain_input_shape(input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if backend.image_data_format() == 'channels_last': - row_axis, col_axis = (0, 1) - else: - row_axis, col_axis = (1, 2) - rows = input_shape[row_axis] - cols = input_shape[col_axis] - - if weights == 'imagenet': - if alpha not in [0.35, 0.50, 0.75, 1.0, 1.3, 1.4]: - raise ValueError('If imagenet weights are being loaded, ' - 'alpha can be one of `0.35`, `0.50`, `0.75`, ' - '`1.0`, `1.3` or `1.4` only.') - - if rows != cols or rows not in [96, 128, 160, 192, 224]: - rows = 224 - warnings.warn('`input_shape` is undefined or non-square, ' - 'or `rows` is not in [96, 128, 160, 192, 224].' - ' Weights for input shape (224, 224) will be' - ' loaded as the default.') - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - - first_block_filters = _make_divisible(32 * alpha, 8) - x = layers.ZeroPadding2D(padding=correct_pad(backend, img_input, 3), - name='Conv1_pad')(img_input) - x = layers.Conv2D(first_block_filters, - kernel_size=3, - strides=(2, 2), - padding='valid', - use_bias=False, - name='Conv1')(x) - x = layers.BatchNormalization(axis=channel_axis, - epsilon=1e-3, - momentum=0.999, - name='bn_Conv1')(x) - x = layers.ReLU(6., name='Conv1_relu')(x) - - x = _inverted_res_block(x, filters=16, alpha=alpha, stride=1, - expansion=1, block_id=0) - - x = _inverted_res_block(x, filters=24, alpha=alpha, stride=2, - expansion=6, block_id=1) - x = _inverted_res_block(x, filters=24, alpha=alpha, stride=1, - expansion=6, block_id=2) - - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=2, - expansion=6, block_id=3) - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, - expansion=6, block_id=4) - x = _inverted_res_block(x, filters=32, alpha=alpha, stride=1, - expansion=6, block_id=5) - - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=2, - expansion=6, block_id=6) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, - expansion=6, block_id=7) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, - expansion=6, block_id=8) - x = _inverted_res_block(x, filters=64, alpha=alpha, stride=1, - expansion=6, block_id=9) - - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, - expansion=6, block_id=10) - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, - expansion=6, block_id=11) - x = _inverted_res_block(x, filters=96, alpha=alpha, stride=1, - expansion=6, block_id=12) - - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=2, - expansion=6, block_id=13) - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, - expansion=6, block_id=14) - x = _inverted_res_block(x, filters=160, alpha=alpha, stride=1, - expansion=6, block_id=15) - - x = _inverted_res_block(x, filters=320, alpha=alpha, stride=1, - expansion=6, block_id=16) - - # no alpha applied to last conv as stated in the paper: - # if the width multiplier is greater than 1 we - # increase the number of output channels - if alpha > 1.0: - last_block_filters = _make_divisible(1280 * alpha, 8) - else: - last_block_filters = 1280 - - x = layers.Conv2D(last_block_filters, - kernel_size=1, - use_bias=False, - name='Conv_1')(x) - x = layers.BatchNormalization(axis=channel_axis, - epsilon=1e-3, - momentum=0.999, - name='Conv_1_bn')(x) - x = layers.ReLU(6., name='out_relu')(x) - - if include_top: - x = layers.GlobalAveragePooling2D()(x) - x = layers.Dense(classes, activation='softmax', - use_bias=True, name='Logits')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = models.Model(inputs, x, - name='mobilenetv2_%0.2f_%s' % (alpha, rows)) - - # Load weights. - if weights == 'imagenet': - if include_top: - model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + - str(alpha) + '_' + str(rows) + '.h5') - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = keras_utils.get_file( - model_name, weight_path, cache_subdir='models') - else: - model_name = ('mobilenet_v2_weights_tf_dim_ordering_tf_kernels_' + - str(alpha) + '_' + str(rows) + '_no_top' + '.h5') - weight_path = BASE_WEIGHT_PATH + model_name - weights_path = keras_utils.get_file( - model_name, weight_path, cache_subdir='models') - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -def _inverted_res_block(inputs, expansion, stride, alpha, filters, block_id): - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - - in_channels = backend.int_shape(inputs)[channel_axis] - pointwise_conv_filters = int(filters * alpha) - pointwise_filters = _make_divisible(pointwise_conv_filters, 8) - x = inputs - prefix = 'block_{}_'.format(block_id) - - if block_id: - # Expand - x = layers.Conv2D(expansion * in_channels, - kernel_size=1, - padding='same', - use_bias=False, - activation=None, - name=prefix + 'expand')(x) - x = layers.BatchNormalization(axis=channel_axis, - epsilon=1e-3, - momentum=0.999, - name=prefix + 'expand_BN')(x) - x = layers.ReLU(6., name=prefix + 'expand_relu')(x) - else: - prefix = 'expanded_conv_' - - # Depthwise - if stride == 2: - x = layers.ZeroPadding2D(padding=correct_pad(backend, x, 3), - name=prefix + 'pad')(x) - x = layers.DepthwiseConv2D(kernel_size=3, - strides=stride, - activation=None, - use_bias=False, - padding='same' if stride == 1 else 'valid', - name=prefix + 'depthwise')(x) - x = layers.BatchNormalization(axis=channel_axis, - epsilon=1e-3, - momentum=0.999, - name=prefix + 'depthwise_BN')(x) - - x = layers.ReLU(6., name=prefix + 'depthwise_relu')(x) - - # Project - x = layers.Conv2D(pointwise_filters, - kernel_size=1, - padding='same', - use_bias=False, - activation=None, - name=prefix + 'project')(x) - x = layers.BatchNormalization(axis=channel_axis, - epsilon=1e-3, - momentum=0.999, - name=prefix + 'project_BN')(x) - - if in_channels == pointwise_filters and stride == 1: - return layers.Add(name=prefix + 'add')([inputs, x]) - return x -"""NASNet-A models for Keras. - -NASNet refers to Neural Architecture Search Network, a family of models -that were designed automatically by learning the model architectures -directly on the dataset of interest. - -Here we consider NASNet-A, the highest performance model that was found -for the CIFAR-10 dataset, and then extended to ImageNet 2012 dataset, -obtaining state of the art performance on CIFAR-10 and ImageNet 2012. -Only the NASNet-A models, and their respective weights, which are suited -for ImageNet 2012 are provided. - -The below table describes the performance on ImageNet 2012: --------------------------------------------------------------------------------- - Architecture | Top-1 Acc | Top-5 Acc | Multiply-Adds | Params (M) --------------------------------------------------------------------------------- -| NASNet-A (4 @ 1056) | 74.0 % | 91.6 % | 564 M | 5.3 | -| NASNet-A (6 @ 4032) | 82.7 % | 96.2 % | 23.8 B | 88.9 | --------------------------------------------------------------------------------- - -Weights obtained from the official TensorFlow repository found at -https://github.com/tensorflow/models/tree/master/research/slim/nets/nasnet - -# References - - - [Learning Transferable Architectures for Scalable Image Recognition] - (https://arxiv.org/abs/1707.07012) (CVPR 2018) - -This model is based on the following implementations: - - - [TF Slim Implementation] - (https://github.com/tensorflow/models/blob/master/research/slim/nets/nasnet/nasnet.py) - - [TensorNets implementation] - (https://github.com/taehoonlee/tensornets/blob/master/tensornets/nasnets.py) -""" -from __future__ import print_function -from __future__ import absolute_import -from __future__ import division - -import os -import warnings - -from . import correct_pad -from . import get_submodules_from_kwargs -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .imagenet_utils import _obtain_input_shape - -BASE_WEIGHTS_PATH = ('https://github.com/titu1994/Keras-NASNet/' - 'releases/download/v1.2/') -NASNET_MOBILE_WEIGHT_PATH = BASE_WEIGHTS_PATH + 'NASNet-mobile.h5' -NASNET_MOBILE_WEIGHT_PATH_NO_TOP = BASE_WEIGHTS_PATH + 'NASNet-mobile-no-top.h5' -NASNET_LARGE_WEIGHT_PATH = BASE_WEIGHTS_PATH + 'NASNet-large.h5' -NASNET_LARGE_WEIGHT_PATH_NO_TOP = BASE_WEIGHTS_PATH + 'NASNet-large-no-top.h5' - -backend = None -layers = None -models = None -keras_utils = None - - -def NASNet(input_shape=None, - penultimate_filters=4032, - num_blocks=6, - stem_block_filters=96, - skip_reduction=True, - filter_multiplier=2, - include_top=True, - weights=None, - input_tensor=None, - pooling=None, - classes=1000, - default_size=None, - **kwargs): - '''Instantiates a NASNet model. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - # Arguments - input_shape: Optional shape tuple, the input shape - is by default `(331, 331, 3)` for NASNetLarge and - `(224, 224, 3)` for NASNetMobile. - It should have exactly 3 input channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - penultimate_filters: Number of filters in the penultimate layer. - NASNet models use the notation `NASNet (N @ P)`, where: - - N is the number of blocks - - P is the number of penultimate filters - num_blocks: Number of repeated blocks of the NASNet model. - NASNet models use the notation `NASNet (N @ P)`, where: - - N is the number of blocks - - P is the number of penultimate filters - stem_block_filters: Number of filters in the initial stem block - skip_reduction: Whether to skip the reduction step at the tail - end of the network. - filter_multiplier: Controls the width of the network. - - If `filter_multiplier` < 1.0, proportionally decreases the number - of filters in each layer. - - If `filter_multiplier` > 1.0, proportionally increases the number - of filters in each layer. - - If `filter_multiplier` = 1, default number of filters from the - paper are used at each layer. - include_top: Whether to include the fully-connected - layer at the top of the network. - weights: `None` (random initialization) or - `imagenet` (ImageNet weights) - input_tensor: Optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: Optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - default_size: Specifies the default image size of the model - - # Returns - A Keras model instance. - - # Raises - ValueError: In case of invalid argument for `weights`, - invalid input shape or invalid `penultimate_filters` value. - ''' - global backend, layers, models, keras_utils - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top` ' - 'as true, `classes` should be 1000') - - if (isinstance(input_shape, tuple) and - None in input_shape and - weights == 'imagenet'): - raise ValueError('When specifying the input shape of a NASNet' - ' and loading `ImageNet` weights, ' - 'the input_shape argument must be static ' - '(no None entries). Got: `input_shape=' + - str(input_shape) + '`.') - - if default_size is None: - default_size = 331 - - # Determine proper input shape and default size. - input_shape = _obtain_input_shape(input_shape, - default_size=default_size, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=True, - weights=weights) - - if backend.image_data_format() != 'channels_last': - warnings.warn('The NASNet family of models is only available ' - 'for the input data format "channels_last" ' - '(width, height, channels). ' - 'However your settings specify the default ' - 'data format "channels_first" (channels, width, height).' - ' You should set `image_data_format="channels_last"` ' - 'in your Keras config located at ~/.keras/keras.json. ' - 'The model being returned right now will expect inputs ' - 'to follow the "channels_last" data format.') - backend.set_image_data_format('channels_last') - old_data_format = 'channels_first' - else: - old_data_format = None - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - if penultimate_filters % (24 * (filter_multiplier ** 2)) != 0: - raise ValueError( - 'For NASNet-A models, the `penultimate_filters` must be a multiple ' - 'of 24 * (`filter_multiplier` ** 2). Current value: %d' % - penultimate_filters) - - channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 - filters = penultimate_filters // 24 - - x = layers.Conv2D(stem_block_filters, (3, 3), - strides=(2, 2), - padding='valid', - use_bias=False, - name='stem_conv1', - kernel_initializer='he_normal')(img_input) - - x = layers.BatchNormalization( - axis=channel_dim, momentum=0.9997, epsilon=1e-3, name='stem_bn1')(x) - - p = None - x, p = _reduction_a_cell(x, p, filters // (filter_multiplier ** 2), - block_id='stem_1') - x, p = _reduction_a_cell(x, p, filters // filter_multiplier, - block_id='stem_2') - - for i in range(num_blocks): - x, p = _normal_a_cell(x, p, filters, block_id='%d' % (i)) - - x, p0 = _reduction_a_cell(x, p, filters * filter_multiplier, - block_id='reduce_%d' % (num_blocks)) - - p = p0 if not skip_reduction else p - - for i in range(num_blocks): - x, p = _normal_a_cell(x, p, filters * filter_multiplier, - block_id='%d' % (num_blocks + i + 1)) - - x, p0 = _reduction_a_cell(x, p, filters * filter_multiplier ** 2, - block_id='reduce_%d' % (2 * num_blocks)) - - p = p0 if not skip_reduction else p - - for i in range(num_blocks): - x, p = _normal_a_cell(x, p, filters * filter_multiplier ** 2, - block_id='%d' % (2 * num_blocks + i + 1)) - - x = layers.Activation('relu')(x) - - if include_top: - x = layers.GlobalAveragePooling2D()(x) - x = layers.Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - model = models.Model(inputs, x, name='NASNet') - - # Load weights. - if weights == 'imagenet': - if default_size == 224: # mobile version - if include_top: - weights_path = keras_utils.get_file( - 'nasnet_mobile.h5', - NASNET_MOBILE_WEIGHT_PATH, - cache_subdir='models', - file_hash='020fb642bf7360b370c678b08e0adf61') - else: - weights_path = keras_utils.get_file( - 'nasnet_mobile_no_top.h5', - NASNET_MOBILE_WEIGHT_PATH_NO_TOP, - cache_subdir='models', - file_hash='1ed92395b5b598bdda52abe5c0dbfd63') - model.load_weights(weights_path) - elif default_size == 331: # large version - if include_top: - weights_path = keras_utils.get_file( - 'nasnet_large.h5', - NASNET_LARGE_WEIGHT_PATH, - cache_subdir='models', - file_hash='11577c9a518f0070763c2b964a382f17') - else: - weights_path = keras_utils.get_file( - 'nasnet_large_no_top.h5', - NASNET_LARGE_WEIGHT_PATH_NO_TOP, - cache_subdir='models', - file_hash='d81d89dc07e6e56530c4e77faddd61b5') - model.load_weights(weights_path) - else: - raise ValueError( - 'ImageNet weights can only be loaded with NASNetLarge' - ' or NASNetMobile') - elif weights is not None: - model.load_weights(weights) - - if old_data_format: - backend.set_image_data_format(old_data_format) - - return model - - -def NASNetLarge(input_shape=None, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - **kwargs): - '''Instantiates a NASNet model in ImageNet mode. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - # Arguments - input_shape: Optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(331, 331, 3)` for NASNetLarge. - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - include_top: Whether to include the fully-connected - layer at the top of the network. - weights: `None` (random initialization) or - `imagenet` (ImageNet weights) - input_tensor: Optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: Optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - ''' - return NASNet(input_shape, - penultimate_filters=4032, - num_blocks=6, - stem_block_filters=96, - skip_reduction=True, - filter_multiplier=2, - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - pooling=pooling, - classes=classes, - default_size=331, - **kwargs) - - -def NASNetMobile(input_shape=None, - include_top=True, - weights='imagenet', - input_tensor=None, - pooling=None, - classes=1000, - **kwargs): - '''Instantiates a Mobile NASNet model in ImageNet mode. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - # Arguments - input_shape: Optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` for NASNetMobile - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(224, 224, 3)` would be one valid value. - include_top: Whether to include the fully-connected - layer at the top of the network. - weights: `None` (random initialization) or - `imagenet` (ImageNet weights) - input_tensor: Optional Keras tensor (i.e. output of - `layers.Input()`) - to use as image input for the model. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model - will be the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a - 2D tensor. - - `max` means that global max pooling will - be applied. - classes: Optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: In case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - ''' - return NASNet(input_shape, - penultimate_filters=1056, - num_blocks=4, - stem_block_filters=32, - skip_reduction=False, - filter_multiplier=2, - include_top=include_top, - weights=weights, - input_tensor=input_tensor, - pooling=pooling, - classes=classes, - default_size=224, - **kwargs) - - -def _separable_conv_block(ip, filters, - kernel_size=(3, 3), - strides=(1, 1), - block_id=None): - '''Adds 2 blocks of [relu-separable conv-batchnorm]. - - # Arguments - ip: Input tensor - filters: Number of output filters per layer - kernel_size: Kernel size of separable convolutions - strides: Strided convolution for downsampling - block_id: String block_id - - # Returns - A Keras tensor - ''' - channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 - - with backend.name_scope('separable_conv_block_%s' % block_id): - x = layers.Activation('relu')(ip) - if strides == (2, 2): - x = layers.ZeroPadding2D( - padding=correct_pad(backend, x, kernel_size), - name='separable_conv_1_pad_%s' % block_id)(x) - conv_pad = 'valid' - else: - conv_pad = 'same' - x = layers.SeparableConv2D(filters, kernel_size, - strides=strides, - name='separable_conv_1_%s' % block_id, - padding=conv_pad, use_bias=False, - kernel_initializer='he_normal')(x) - x = layers.BatchNormalization( - axis=channel_dim, - momentum=0.9997, - epsilon=1e-3, - name='separable_conv_1_bn_%s' % (block_id))(x) - x = layers.Activation('relu')(x) - x = layers.SeparableConv2D(filters, kernel_size, - name='separable_conv_2_%s' % block_id, - padding='same', - use_bias=False, - kernel_initializer='he_normal')(x) - x = layers.BatchNormalization( - axis=channel_dim, - momentum=0.9997, - epsilon=1e-3, - name='separable_conv_2_bn_%s' % (block_id))(x) - return x - - -def _adjust_block(p, ip, filters, block_id=None): - '''Adjusts the input `previous path` to match the shape of the `input`. - - Used in situations where the output number of filters needs to be changed. - - # Arguments - p: Input tensor which needs to be modified - ip: Input tensor whose shape needs to be matched - filters: Number of output filters to be matched - block_id: String block_id - - # Returns - Adjusted Keras tensor - ''' - channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 - img_dim = 2 if backend.image_data_format() == 'channels_first' else -2 - - ip_shape = backend.int_shape(ip) - - if p is not None: - p_shape = backend.int_shape(p) - - with backend.name_scope('adjust_block'): - if p is None: - p = ip - - elif p_shape[img_dim] != ip_shape[img_dim]: - with backend.name_scope('adjust_reduction_block_%s' % block_id): - p = layers.Activation('relu', - name='adjust_relu_1_%s' % block_id)(p) - p1 = layers.AveragePooling2D( - (1, 1), - strides=(2, 2), - padding='valid', - name='adjust_avg_pool_1_%s' % block_id)(p) - p1 = layers.Conv2D( - filters // 2, (1, 1), - padding='same', - use_bias=False, name='adjust_conv_1_%s' % block_id, - kernel_initializer='he_normal')(p1) - - p2 = layers.ZeroPadding2D(padding=((0, 1), (0, 1)))(p) - p2 = layers.Cropping2D(cropping=((1, 0), (1, 0)))(p2) - p2 = layers.AveragePooling2D( - (1, 1), - strides=(2, 2), - padding='valid', - name='adjust_avg_pool_2_%s' % block_id)(p2) - p2 = layers.Conv2D( - filters // 2, (1, 1), - padding='same', - use_bias=False, - name='adjust_conv_2_%s' % block_id, - kernel_initializer='he_normal')(p2) - - p = layers.concatenate([p1, p2], axis=channel_dim) - p = layers.BatchNormalization( - axis=channel_dim, - momentum=0.9997, - epsilon=1e-3, - name='adjust_bn_%s' % block_id)(p) - - elif p_shape[channel_dim] != filters: - with backend.name_scope('adjust_projection_block_%s' % block_id): - p = layers.Activation('relu')(p) - p = layers.Conv2D( - filters, - (1, 1), - strides=(1, 1), - padding='same', - name='adjust_conv_projection_%s' % block_id, - use_bias=False, - kernel_initializer='he_normal')(p) - p = layers.BatchNormalization( - axis=channel_dim, - momentum=0.9997, - epsilon=1e-3, - name='adjust_bn_%s' % block_id)(p) - return p - - -def _normal_a_cell(ip, p, filters, block_id=None): - '''Adds a Normal cell for NASNet-A (Fig. 4 in the paper). - - # Arguments - ip: Input tensor `x` - p: Input tensor `p` - filters: Number of output filters - block_id: String block_id - - # Returns - A Keras tensor - ''' - channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 - - with backend.name_scope('normal_A_block_%s' % block_id): - p = _adjust_block(p, ip, filters, block_id) - - h = layers.Activation('relu')(ip) - h = layers.Conv2D( - filters, (1, 1), - strides=(1, 1), - padding='same', - name='normal_conv_1_%s' % block_id, - use_bias=False, - kernel_initializer='he_normal')(h) - h = layers.BatchNormalization( - axis=channel_dim, - momentum=0.9997, - epsilon=1e-3, - name='normal_bn_1_%s' % block_id)(h) - - with backend.name_scope('block_1'): - x1_1 = _separable_conv_block( - h, filters, - kernel_size=(5, 5), - block_id='normal_left1_%s' % block_id) - x1_2 = _separable_conv_block( - p, filters, - block_id='normal_right1_%s' % block_id) - x1 = layers.add([x1_1, x1_2], name='normal_add_1_%s' % block_id) - - with backend.name_scope('block_2'): - x2_1 = _separable_conv_block( - p, filters, (5, 5), - block_id='normal_left2_%s' % block_id) - x2_2 = _separable_conv_block( - p, filters, (3, 3), - block_id='normal_right2_%s' % block_id) - x2 = layers.add([x2_1, x2_2], name='normal_add_2_%s' % block_id) - - with backend.name_scope('block_3'): - x3 = layers.AveragePooling2D( - (3, 3), - strides=(1, 1), - padding='same', - name='normal_left3_%s' % (block_id))(h) - x3 = layers.add([x3, p], name='normal_add_3_%s' % block_id) - - with backend.name_scope('block_4'): - x4_1 = layers.AveragePooling2D( - (3, 3), - strides=(1, 1), - padding='same', - name='normal_left4_%s' % (block_id))(p) - x4_2 = layers.AveragePooling2D( - (3, 3), - strides=(1, 1), - padding='same', - name='normal_right4_%s' % (block_id))(p) - x4 = layers.add([x4_1, x4_2], name='normal_add_4_%s' % block_id) - - with backend.name_scope('block_5'): - x5 = _separable_conv_block(h, filters, - block_id='normal_left5_%s' % block_id) - x5 = layers.add([x5, h], name='normal_add_5_%s' % block_id) - - x = layers.concatenate([p, x1, x2, x3, x4, x5], - axis=channel_dim, - name='normal_concat_%s' % block_id) - return x, ip - - -def _reduction_a_cell(ip, p, filters, block_id=None): - '''Adds a Reduction cell for NASNet-A (Fig. 4 in the paper). - - # Arguments - ip: Input tensor `x` - p: Input tensor `p` - filters: Number of output filters - block_id: String block_id - - # Returns - A Keras tensor - ''' - channel_dim = 1 if backend.image_data_format() == 'channels_first' else -1 - - with backend.name_scope('reduction_A_block_%s' % block_id): - p = _adjust_block(p, ip, filters, block_id) - - h = layers.Activation('relu')(ip) - h = layers.Conv2D( - filters, (1, 1), - strides=(1, 1), - padding='same', - name='reduction_conv_1_%s' % block_id, - use_bias=False, - kernel_initializer='he_normal')(h) - h = layers.BatchNormalization( - axis=channel_dim, - momentum=0.9997, - epsilon=1e-3, - name='reduction_bn_1_%s' % block_id)(h) - h3 = layers.ZeroPadding2D( - padding=correct_pad(backend, h, 3), - name='reduction_pad_1_%s' % block_id)(h) - - with backend.name_scope('block_1'): - x1_1 = _separable_conv_block( - h, filters, (5, 5), - strides=(2, 2), - block_id='reduction_left1_%s' % block_id) - x1_2 = _separable_conv_block( - p, filters, (7, 7), - strides=(2, 2), - block_id='reduction_right1_%s' % block_id) - x1 = layers.add([x1_1, x1_2], name='reduction_add_1_%s' % block_id) - - with backend.name_scope('block_2'): - x2_1 = layers.MaxPooling2D( - (3, 3), - strides=(2, 2), - padding='valid', - name='reduction_left2_%s' % block_id)(h3) - x2_2 = _separable_conv_block( - p, filters, (7, 7), - strides=(2, 2), - block_id='reduction_right2_%s' % block_id) - x2 = layers.add([x2_1, x2_2], name='reduction_add_2_%s' % block_id) - - with backend.name_scope('block_3'): - x3_1 = layers.AveragePooling2D( - (3, 3), - strides=(2, 2), - padding='valid', - name='reduction_left3_%s' % block_id)(h3) - x3_2 = _separable_conv_block( - p, filters, (5, 5), - strides=(2, 2), - block_id='reduction_right3_%s' % block_id) - x3 = layers.add([x3_1, x3_2], name='reduction_add3_%s' % block_id) - - with backend.name_scope('block_4'): - x4 = layers.AveragePooling2D( - (3, 3), - strides=(1, 1), - padding='same', - name='reduction_left4_%s' % block_id)(x1) - x4 = layers.add([x2, x4]) - - with backend.name_scope('block_5'): - x5_1 = _separable_conv_block( - x1, filters, (3, 3), - block_id='reduction_left4_%s' % block_id) - x5_2 = layers.MaxPooling2D( - (3, 3), - strides=(2, 2), - padding='valid', - name='reduction_right5_%s' % block_id)(h3) - x5 = layers.add([x5_1, x5_2], name='reduction_add4_%s' % block_id) - - x = layers.concatenate( - [x2, x3, x4, x5], - axis=channel_dim, - name='reduction_concat_%s' % block_id) - return x, ip - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) -"""ResNet models for Keras. - -# Reference paper - -- [Deep Residual Learning for Image Recognition] - (https://arxiv.org/abs/1512.03385) (CVPR 2016 Best Paper Award) - -# Reference implementations - -- [TensorNets] - (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py) -- [Caffe ResNet] - (https://github.com/KaimingHe/deep-residual-networks/tree/master/prototxt) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .resnet_common import ResNet50 -from .resnet_common import ResNet101 -from .resnet_common import ResNet152 - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - data_format: data format of the image tensor. - - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='caffe', **kwargs) -"""ResNet50 model for Keras. - -# Reference: - -- [Deep Residual Learning for Image Recognition]( - https://arxiv.org/abs/1512.03385) (CVPR 2016 Best Paper Award) - -Adapted from code contributed by BigMoyan. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import warnings - -from . import get_submodules_from_kwargs -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .imagenet_utils import _obtain_input_shape - -preprocess_input = imagenet_utils.preprocess_input - -WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.2/' - 'resnet50_weights_tf_dim_ordering_tf_kernels.h5') -WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.2/' - 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5') - -backend = None -layers = None -models = None -keras_utils = None - - -def identity_block(input_tensor, kernel_size, filters, stage, block): - """The identity block is the block that has no conv layer at shortcut. - - # Arguments - input_tensor: input tensor - kernel_size: default 3, the kernel size of - middle conv layer at main path - filters: list of integers, the filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - - # Returns - Output tensor for the block. - """ - filters1, filters2, filters3 = filters - if backend.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = layers.Conv2D(filters1, (1, 1), - kernel_initializer='he_normal', - name=conv_name_base + '2a')(input_tensor) - x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) - x = layers.Activation('relu')(x) - - x = layers.Conv2D(filters2, kernel_size, - padding='same', - kernel_initializer='he_normal', - name=conv_name_base + '2b')(x) - x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) - x = layers.Activation('relu')(x) - - x = layers.Conv2D(filters3, (1, 1), - kernel_initializer='he_normal', - name=conv_name_base + '2c')(x) - x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - - x = layers.add([x, input_tensor]) - x = layers.Activation('relu')(x) - return x - - -def conv_block(input_tensor, - kernel_size, - filters, - stage, - block, - strides=(2, 2)): - """A block that has a conv layer at shortcut. - - # Arguments - input_tensor: input tensor - kernel_size: default 3, the kernel size of - middle conv layer at main path - filters: list of integers, the filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - strides: Strides for the first conv layer in the block. - - # Returns - Output tensor for the block. - - Note that from stage 3, - the first conv layer at main path is with strides=(2, 2) - And the shortcut should have strides=(2, 2) as well - """ - filters1, filters2, filters3 = filters - if backend.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = layers.Conv2D(filters1, (1, 1), strides=strides, - kernel_initializer='he_normal', - name=conv_name_base + '2a')(input_tensor) - x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) - x = layers.Activation('relu')(x) - - x = layers.Conv2D(filters2, kernel_size, padding='same', - kernel_initializer='he_normal', - name=conv_name_base + '2b')(x) - x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) - x = layers.Activation('relu')(x) - - x = layers.Conv2D(filters3, (1, 1), - kernel_initializer='he_normal', - name=conv_name_base + '2c')(x) - x = layers.BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - - shortcut = layers.Conv2D(filters3, (1, 1), strides=strides, - kernel_initializer='he_normal', - name=conv_name_base + '1')(input_tensor) - shortcut = layers.BatchNormalization( - axis=bn_axis, name=bn_name_base + '1')(shortcut) - - x = layers.add([x, shortcut]) - x = layers.Activation('relu')(x) - return x - - -def ResNet50(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the ResNet50 architecture. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - # Arguments - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `channels_last` data format) - or `(3, 224, 224)` (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - global backend, layers, models, keras_utils - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=224, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - if backend.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - - x = layers.ZeroPadding2D(padding=(3, 3), name='conv1_pad')(img_input) - x = layers.Conv2D(64, (7, 7), - strides=(2, 2), - padding='valid', - kernel_initializer='he_normal', - name='conv1')(x) - x = layers.BatchNormalization(axis=bn_axis, name='bn_conv1')(x) - x = layers.Activation('relu')(x) - x = layers.ZeroPadding2D(padding=(1, 1), name='pool1_pad')(x) - x = layers.MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) - x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') - x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - - x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') - - x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') - - x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - - if include_top: - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - x = layers.Dense(classes, activation='softmax', name='fc1000')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - else: - warnings.warn('The output shape of `ResNet50(include_top=False)` ' - 'has been changed since Keras 2.2.0.') - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = models.Model(inputs, x, name='resnet50') - - # Load weights. - if weights == 'imagenet': - if include_top: - weights_path = keras_utils.get_file( - 'resnet50_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models', - md5_hash='a7b3fe01876f51b976af0dea6bc144eb') - else: - weights_path = keras_utils.get_file( - 'resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='a268eb855778b3df3c7506639542a6af') - model.load_weights(weights_path) - if backend.backend() == 'theano': - keras_utils.convert_all_kernels_in_model(model) - elif weights is not None: - model.load_weights(weights) - - return model -"""ResNet, ResNetV2, and ResNeXt models for Keras. - -# Reference papers - -- [Deep Residual Learning for Image Recognition] - (https://arxiv.org/abs/1512.03385) (CVPR 2016 Best Paper Award) -- [Identity Mappings in Deep Residual Networks] - (https://arxiv.org/abs/1603.05027) (ECCV 2016) -- [Aggregated Residual Transformations for Deep Neural Networks] - (https://arxiv.org/abs/1611.05431) (CVPR 2017) - -# Reference implementations - -- [TensorNets] - (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py) -- [Caffe ResNet] - (https://github.com/KaimingHe/deep-residual-networks/tree/master/prototxt) -- [Torch ResNetV2] - (https://github.com/facebook/fb.resnet.torch/blob/master/models/preresnet.lua) -- [Torch ResNeXt] - (https://github.com/facebookresearch/ResNeXt/blob/master/models/resnext.lua) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from . import get_submodules_from_kwargs -from .imagenet_utils import _obtain_input_shape - - -backend = None -layers = None -models = None -keras_utils = None - - -BASE_WEIGHTS_PATH = ( - 'https://github.com/keras-team/keras-applications/' - 'releases/download/resnet/') -WEIGHTS_HASHES = { - 'resnet50': ('2cb95161c43110f7111970584f804107', - '4d473c1dd8becc155b73f8504c6f6626'), - 'resnet101': ('f1aeb4b969a6efcfb50fad2f0c20cfc5', - '88cf7a10940856eca736dc7b7e228a21'), - 'resnet152': ('100835be76be38e30d865e96f2aaae62', - 'ee4c566cf9a93f14d82f913c2dc6dd0c'), - 'resnet50v2': ('3ef43a0b657b3be2300d5770ece849e0', - 'fac2f116257151a9d068a22e544a4917'), - 'resnet101v2': ('6343647c601c52e1368623803854d971', - 'c0ed64b8031c3730f411d2eb4eea35b5'), - 'resnet152v2': ('a49b44d1979771252814e80f8ec446f9', - 'ed17cf2e0169df9d443503ef94b23b33'), - 'resnext50': ('67a5b30d522ed92f75a1f16eef299d1a', - '62527c363bdd9ec598bed41947b379fc'), - 'resnext101': ('34fb605428fcc7aa4d62f44404c11509', - '0f678c91647380debd923963594981b3') -} - - -def block1(x, filters, kernel_size=3, stride=1, - conv_shortcut=True, name=None): - """A residual block. - - # Arguments - x: input tensor. - filters: integer, filters of the bottleneck layer. - kernel_size: default 3, kernel size of the bottleneck layer. - stride: default 1, stride of the first layer. - conv_shortcut: default True, use convolution shortcut if True, - otherwise identity shortcut. - name: string, block label. - - # Returns - Output tensor for the residual block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - - if conv_shortcut is True: - shortcut = layers.Conv2D(4 * filters, 1, strides=stride, - name=name + '_0_conv')(x) - shortcut = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_0_bn')(shortcut) - else: - shortcut = x - - x = layers.Conv2D(filters, 1, strides=stride, name=name + '_1_conv')(x) - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_1_bn')(x) - x = layers.Activation('relu', name=name + '_1_relu')(x) - - x = layers.Conv2D(filters, kernel_size, padding='SAME', - name=name + '_2_conv')(x) - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_2_bn')(x) - x = layers.Activation('relu', name=name + '_2_relu')(x) - - x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x) - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_3_bn')(x) - - x = layers.Add(name=name + '_add')([shortcut, x]) - x = layers.Activation('relu', name=name + '_out')(x) - return x - - -def stack1(x, filters, blocks, stride1=2, name=None): - """A set of stacked residual blocks. - - # Arguments - x: input tensor. - filters: integer, filters of the bottleneck layer in a block. - blocks: integer, blocks in the stacked blocks. - stride1: default 2, stride of the first layer in the first block. - name: string, stack label. - - # Returns - Output tensor for the stacked blocks. - """ - x = block1(x, filters, stride=stride1, name=name + '_block1') - for i in range(2, blocks + 1): - x = block1(x, filters, conv_shortcut=False, - name=name + '_block' + str(i)) - return x - - -def block2(x, filters, kernel_size=3, stride=1, - conv_shortcut=False, name=None): - """A residual block. - - # Arguments - x: input tensor. - filters: integer, filters of the bottleneck layer. - kernel_size: default 3, kernel size of the bottleneck layer. - stride: default 1, stride of the first layer. - conv_shortcut: default False, use convolution shortcut if True, - otherwise identity shortcut. - name: string, block label. - - # Returns - Output tensor for the residual block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - - preact = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_preact_bn')(x) - preact = layers.Activation('relu', name=name + '_preact_relu')(preact) - - if conv_shortcut is True: - shortcut = layers.Conv2D(4 * filters, 1, strides=stride, - name=name + '_0_conv')(preact) - else: - shortcut = layers.MaxPooling2D( - 1, strides=stride)(x) if stride > 1 else x - - x = layers.Conv2D(filters, 1, strides=1, use_bias=False, - name=name + '_1_conv')(preact) - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_1_bn')(x) - x = layers.Activation('relu', name=name + '_1_relu')(x) - - x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) - x = layers.Conv2D(filters, kernel_size, strides=stride, - use_bias=False, name=name + '_2_conv')(x) - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_2_bn')(x) - x = layers.Activation('relu', name=name + '_2_relu')(x) - - x = layers.Conv2D(4 * filters, 1, name=name + '_3_conv')(x) - x = layers.Add(name=name + '_out')([shortcut, x]) - return x - - -def stack2(x, filters, blocks, stride1=2, name=None): - """A set of stacked residual blocks. - - # Arguments - x: input tensor. - filters: integer, filters of the bottleneck layer in a block. - blocks: integer, blocks in the stacked blocks. - stride1: default 2, stride of the first layer in the first block. - name: string, stack label. - - # Returns - Output tensor for the stacked blocks. - """ - x = block2(x, filters, conv_shortcut=True, name=name + '_block1') - for i in range(2, blocks): - x = block2(x, filters, name=name + '_block' + str(i)) - x = block2(x, filters, stride=stride1, name=name + '_block' + str(blocks)) - return x - - -def block3(x, filters, kernel_size=3, stride=1, groups=32, - conv_shortcut=True, name=None): - """A residual block. - - # Arguments - x: input tensor. - filters: integer, filters of the bottleneck layer. - kernel_size: default 3, kernel size of the bottleneck layer. - stride: default 1, stride of the first layer. - groups: default 32, group size for grouped convolution. - conv_shortcut: default True, use convolution shortcut if True, - otherwise identity shortcut. - name: string, block label. - - # Returns - Output tensor for the residual block. - """ - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - - if conv_shortcut is True: - shortcut = layers.Conv2D((64 // groups) * filters, 1, strides=stride, - use_bias=False, name=name + '_0_conv')(x) - shortcut = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_0_bn')(shortcut) - else: - shortcut = x - - x = layers.Conv2D(filters, 1, use_bias=False, name=name + '_1_conv')(x) - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_1_bn')(x) - x = layers.Activation('relu', name=name + '_1_relu')(x) - - c = filters // groups - x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name=name + '_2_pad')(x) - x = layers.DepthwiseConv2D(kernel_size, strides=stride, depth_multiplier=c, - use_bias=False, name=name + '_2_conv')(x) - x_shape = backend.int_shape(x)[1:-1] - x = layers.Reshape(x_shape + (groups, c, c))(x) - output_shape = x_shape + \ - (groups, c) if backend.backend() == 'theano' else None - x = layers.Lambda(lambda x: sum([x[:, :, :, :, i] for i in range(c)]), - output_shape=output_shape, name=name + '_2_reduce')(x) - x = layers.Reshape(x_shape + (filters,))(x) - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_2_bn')(x) - x = layers.Activation('relu', name=name + '_2_relu')(x) - - x = layers.Conv2D((64 // groups) * filters, 1, - use_bias=False, name=name + '_3_conv')(x) - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name=name + '_3_bn')(x) - - x = layers.Add(name=name + '_add')([shortcut, x]) - x = layers.Activation('relu', name=name + '_out')(x) - return x - - -def stack3(x, filters, blocks, stride1=2, groups=32, name=None): - """A set of stacked residual blocks. - - # Arguments - x: input tensor. - filters: integer, filters of the bottleneck layer in a block. - blocks: integer, blocks in the stacked blocks. - stride1: default 2, stride of the first layer in the first block. - groups: default 32, group size for grouped convolution. - name: string, stack label. - - # Returns - Output tensor for the stacked blocks. - """ - x = block3(x, filters, stride=stride1, - groups=groups, name=name + '_block1') - for i in range(2, blocks + 1): - x = block3(x, filters, groups=groups, conv_shortcut=False, - name=name + '_block' + str(i)) - return x - - -def ResNet(stack_fn, - preact, - use_bias, - model_name='resnet', - include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the ResNet, ResNetV2, and ResNeXt architecture. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - # Arguments - stack_fn: a function that returns output tensor for the - stacked residual blocks. - preact: whether to use pre-activation or not - (True for ResNetV2, False for ResNet and ResNeXt). - use_bias: whether to use biases for convolutional layers or not - (True for ResNet and ResNetV2, False for ResNeXt). - model_name: string, model name. - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` (with `channels_last` data format) - or `(3, 224, 224)` (with `channels_first` data format). - It should have exactly 3 inputs channels. - pooling: optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional layer. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional layer, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - global backend, layers, models, keras_utils - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=224, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - bn_axis = 3 if backend.image_data_format() == 'channels_last' else 1 - - x = layers.ZeroPadding2D(padding=((3, 3), (3, 3)), - name='conv1_pad')(img_input) - x = layers.Conv2D(64, 7, strides=2, use_bias=use_bias, - name='conv1_conv')(x) - - if preact is False: - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name='conv1_bn')(x) - x = layers.Activation('relu', name='conv1_relu')(x) - - x = layers.ZeroPadding2D(padding=((1, 1), (1, 1)), name='pool1_pad')(x) - x = layers.MaxPooling2D(3, strides=2, name='pool1_pool')(x) - - x = stack_fn(x) - - if preact is True: - x = layers.BatchNormalization(axis=bn_axis, epsilon=1.001e-5, - name='post_bn')(x) - x = layers.Activation('relu', name='post_relu')(x) - - if include_top: - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - x = layers.Dense(classes, activation='softmax', name='probs')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D(name='max_pool')(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - - # Create model. - model = models.Model(inputs, x, name=model_name) - - # Load weights. - if (weights == 'imagenet') and (model_name in WEIGHTS_HASHES): - if include_top: - file_name = model_name + '_weights_tf_dim_ordering_tf_kernels.h5' - file_hash = WEIGHTS_HASHES[model_name][0] - else: - file_name = model_name + '_weights_tf_dim_ordering_tf_kernels_notop.h5' - file_hash = WEIGHTS_HASHES[model_name][1] - weights_path = keras_utils.get_file(file_name, - BASE_WEIGHTS_PATH + file_name, - cache_subdir='models', - file_hash=file_hash) - model.load_weights(weights_path) - elif weights is not None: - model.load_weights(weights) - - return model - - -def ResNet50(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - def stack_fn(x): - x = stack1(x, 64, 3, stride1=1, name='conv2') - x = stack1(x, 128, 4, name='conv3') - x = stack1(x, 256, 6, name='conv4') - x = stack1(x, 512, 3, name='conv5') - return x - return ResNet(stack_fn, False, True, 'resnet50', - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -def ResNet101(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - def stack_fn(x): - x = stack1(x, 64, 3, stride1=1, name='conv2') - x = stack1(x, 128, 4, name='conv3') - x = stack1(x, 256, 23, name='conv4') - x = stack1(x, 512, 3, name='conv5') - return x - return ResNet(stack_fn, False, True, 'resnet101', - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -def ResNet152(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - def stack_fn(x): - x = stack1(x, 64, 3, stride1=1, name='conv2') - x = stack1(x, 128, 8, name='conv3') - x = stack1(x, 256, 36, name='conv4') - x = stack1(x, 512, 3, name='conv5') - return x - return ResNet(stack_fn, False, True, 'resnet152', - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -def ResNet50V2(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - def stack_fn(x): - x = stack2(x, 64, 3, name='conv2') - x = stack2(x, 128, 4, name='conv3') - x = stack2(x, 256, 6, name='conv4') - x = stack2(x, 512, 3, stride1=1, name='conv5') - return x - return ResNet(stack_fn, True, True, 'resnet50v2', - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -def ResNet101V2(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - def stack_fn(x): - x = stack2(x, 64, 3, name='conv2') - x = stack2(x, 128, 4, name='conv3') - x = stack2(x, 256, 23, name='conv4') - x = stack2(x, 512, 3, stride1=1, name='conv5') - return x - return ResNet(stack_fn, True, True, 'resnet101v2', - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -def ResNet152V2(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - def stack_fn(x): - x = stack2(x, 64, 3, name='conv2') - x = stack2(x, 128, 8, name='conv3') - x = stack2(x, 256, 36, name='conv4') - x = stack2(x, 512, 3, stride1=1, name='conv5') - return x - return ResNet(stack_fn, True, True, 'resnet152v2', - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -def ResNeXt50(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - def stack_fn(x): - x = stack3(x, 128, 3, stride1=1, name='conv2') - x = stack3(x, 256, 4, name='conv3') - x = stack3(x, 512, 6, name='conv4') - x = stack3(x, 1024, 3, name='conv5') - return x - return ResNet(stack_fn, False, False, 'resnext50', - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -def ResNeXt101(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - def stack_fn(x): - x = stack3(x, 128, 3, stride1=1, name='conv2') - x = stack3(x, 256, 4, name='conv3') - x = stack3(x, 512, 23, name='conv4') - x = stack3(x, 1024, 3, name='conv5') - return x - return ResNet(stack_fn, False, False, 'resnext101', - include_top, weights, - input_tensor, input_shape, - pooling, classes, - **kwargs) - - -setattr(ResNet50, '__doc__', ResNet.__doc__) -setattr(ResNet101, '__doc__', ResNet.__doc__) -setattr(ResNet152, '__doc__', ResNet.__doc__) -setattr(ResNet50V2, '__doc__', ResNet.__doc__) -setattr(ResNet101V2, '__doc__', ResNet.__doc__) -setattr(ResNet152V2, '__doc__', ResNet.__doc__) -setattr(ResNeXt50, '__doc__', ResNet.__doc__) -setattr(ResNeXt101, '__doc__', ResNet.__doc__) -"""ResNetV2 models for Keras. - -# Reference paper - -- [Aggregated Residual Transformations for Deep Neural Networks] - (https://arxiv.org/abs/1611.05431) (CVPR 2017) - -# Reference implementations - -- [TensorNets] - (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py) -- [Torch ResNetV2] - (https://github.com/facebook/fb.resnet.torch/blob/master/models/preresnet.lua) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .resnet_common import ResNet50V2 -from .resnet_common import ResNet101V2 -from .resnet_common import ResNet152V2 - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - data_format: data format of the image tensor. - - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) -"""ResNeXt models for Keras. - -# Reference paper - -- [Aggregated Residual Transformations for Deep Neural Networks] - (https://arxiv.org/abs/1611.05431) (CVPR 2017) - -# Reference implementations - -- [TensorNets] - (https://github.com/taehoonlee/tensornets/blob/master/tensornets/resnets.py) -- [Torch ResNeXt] - (https://github.com/facebookresearch/ResNeXt/blob/master/models/resnext.lua) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .resnet_common import ResNeXt50 -from .resnet_common import ResNeXt101 - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - data_format: data format of the image tensor. - - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='torch', **kwargs) -"""VGG16 model for Keras. - -# Reference - -- [Very Deep Convolutional Networks for Large-Scale Image Recognition]( - https://arxiv.org/abs/1409.1556) (ICLR 2015) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from . import get_submodules_from_kwargs -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .imagenet_utils import _obtain_input_shape - -preprocess_input = imagenet_utils.preprocess_input - -WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.1/' - 'vgg16_weights_tf_dim_ordering_tf_kernels.h5') -WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.1/' - 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5') - - -def VGG16(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the VGG16 architecture. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - # Arguments - include_top: whether to include the 3 fully-connected - layers at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` - (with `channels_last` data format) - or `(3, 224, 224)` (with `channels_first` data format). - It should have exactly 3 input channels, - and width and height should be no smaller than 32. - E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=224, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - # Block 1 - x = layers.Conv2D(64, (3, 3), - activation='relu', - padding='same', - name='block1_conv1')(img_input) - x = layers.Conv2D(64, (3, 3), - activation='relu', - padding='same', - name='block1_conv2')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = layers.Conv2D(128, (3, 3), - activation='relu', - padding='same', - name='block2_conv1')(x) - x = layers.Conv2D(128, (3, 3), - activation='relu', - padding='same', - name='block2_conv2')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = layers.Conv2D(256, (3, 3), - activation='relu', - padding='same', - name='block3_conv1')(x) - x = layers.Conv2D(256, (3, 3), - activation='relu', - padding='same', - name='block3_conv2')(x) - x = layers.Conv2D(256, (3, 3), - activation='relu', - padding='same', - name='block3_conv3')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block4_conv1')(x) - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block4_conv2')(x) - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block4_conv3')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block5_conv1')(x) - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block5_conv2')(x) - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block5_conv3')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) - - if include_top: - # Classification block - x = layers.Flatten(name='flatten')(x) - x = layers.Dense(4096, activation='relu', name='fc1')(x) - x = layers.Dense(4096, activation='relu', name='fc2')(x) - x = layers.Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = models.Model(inputs, x, name='vgg16') - - # Load weights. - if weights == 'imagenet': - if include_top: - weights_path = keras_utils.get_file( - 'vgg16_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models', - file_hash='64373286793e3c8b2b4e3219cbf3544b') - else: - weights_path = keras_utils.get_file( - 'vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - file_hash='6d6bbae143d832006294945121d1f1fc') - model.load_weights(weights_path) - if backend.backend() == 'theano': - keras_utils.convert_all_kernels_in_model(model) - elif weights is not None: - model.load_weights(weights) - - return model -"""VGG19 model for Keras. - -# Reference - -- [Very Deep Convolutional Networks for Large-Scale Image Recognition]( - https://arxiv.org/abs/1409.1556) (ICLR 2015) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os - -from . import get_submodules_from_kwargs -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .imagenet_utils import _obtain_input_shape - -preprocess_input = imagenet_utils.preprocess_input - -WEIGHTS_PATH = ('https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.1/' - 'vgg19_weights_tf_dim_ordering_tf_kernels.h5') -WEIGHTS_PATH_NO_TOP = ('https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.1/' - 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5') - - -def VGG19(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the VGG19 architecture. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - # Arguments - include_top: whether to include the 3 fully-connected - layers at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(224, 224, 3)` - (with `channels_last` data format) - or `(3, 224, 224)` (with `channels_first` data format). - It should have exactly 3 inputs channels, - and width and height should be no smaller than 32. - E.g. `(200, 200, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, and - if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - """ - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=224, - min_size=32, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - # Block 1 - x = layers.Conv2D(64, (3, 3), - activation='relu', - padding='same', - name='block1_conv1')(img_input) - x = layers.Conv2D(64, (3, 3), - activation='relu', - padding='same', - name='block1_conv2')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = layers.Conv2D(128, (3, 3), - activation='relu', - padding='same', - name='block2_conv1')(x) - x = layers.Conv2D(128, (3, 3), - activation='relu', - padding='same', - name='block2_conv2')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = layers.Conv2D(256, (3, 3), - activation='relu', - padding='same', - name='block3_conv1')(x) - x = layers.Conv2D(256, (3, 3), - activation='relu', - padding='same', - name='block3_conv2')(x) - x = layers.Conv2D(256, (3, 3), - activation='relu', - padding='same', - name='block3_conv3')(x) - x = layers.Conv2D(256, (3, 3), - activation='relu', - padding='same', - name='block3_conv4')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block4_conv1')(x) - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block4_conv2')(x) - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block4_conv3')(x) - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block4_conv4')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block5_conv1')(x) - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block5_conv2')(x) - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block5_conv3')(x) - x = layers.Conv2D(512, (3, 3), - activation='relu', - padding='same', - name='block5_conv4')(x) - x = layers.MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) - - if include_top: - # Classification block - x = layers.Flatten(name='flatten')(x) - x = layers.Dense(4096, activation='relu', name='fc1')(x) - x = layers.Dense(4096, activation='relu', name='fc2')(x) - x = layers.Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = models.Model(inputs, x, name='vgg19') - - # Load weights. - if weights == 'imagenet': - if include_top: - weights_path = keras_utils.get_file( - 'vgg19_weights_tf_dim_ordering_tf_kernels.h5', - WEIGHTS_PATH, - cache_subdir='models', - file_hash='cbe5617147190e668d6c5d5026f83318') - else: - weights_path = keras_utils.get_file( - 'vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', - WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - file_hash='253f8cb515780f3b799900260a226db6') - model.load_weights(weights_path) - if backend.backend() == 'theano': - keras_utils.convert_all_kernels_in_model(model) - elif weights is not None: - model.load_weights(weights) - - return model -"""Xception V1 model for Keras. - -On ImageNet, this model gets to a top-1 validation accuracy of 0.790 -and a top-5 validation accuracy of 0.945. - -Do note that the input image format for this model is different than for -the VGG16 and ResNet models (299x299 instead of 224x224), -and that the input preprocessing function -is also different (same as Inception V3). - -# Reference - -- [Xception: Deep Learning with Depthwise Separable Convolutions]( - https://arxiv.org/abs/1610.02357) (CVPR 2017) - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import warnings - -from . import get_submodules_from_kwargs -from . import imagenet_utils -from .imagenet_utils import decode_predictions -from .imagenet_utils import _obtain_input_shape - - -TF_WEIGHTS_PATH = ( - 'https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.4/' - 'xception_weights_tf_dim_ordering_tf_kernels.h5') -TF_WEIGHTS_PATH_NO_TOP = ( - 'https://github.com/fchollet/deep-learning-models/' - 'releases/download/v0.4/' - 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5') - - -def Xception(include_top=True, - weights='imagenet', - input_tensor=None, - input_shape=None, - pooling=None, - classes=1000, - **kwargs): - """Instantiates the Xception architecture. - - Optionally loads weights pre-trained on ImageNet. - Note that the data format convention used by the model is - the one specified in your Keras config at `~/.keras/keras.json`. - - Note that the default input image size for this model is 299x299. - - # Arguments - include_top: whether to include the fully-connected - layer at the top of the network. - weights: one of `None` (random initialization), - 'imagenet' (pre-training on ImageNet), - or the path to the weights file to be loaded. - input_tensor: optional Keras tensor - (i.e. output of `layers.Input()`) - to use as image input for the model. - input_shape: optional shape tuple, only to be specified - if `include_top` is False (otherwise the input shape - has to be `(299, 299, 3)`. - It should have exactly 3 inputs channels, - and width and height should be no smaller than 71. - E.g. `(150, 150, 3)` would be one valid value. - pooling: Optional pooling mode for feature extraction - when `include_top` is `False`. - - `None` means that the output of the model will be - the 4D tensor output of the - last convolutional block. - - `avg` means that global average pooling - will be applied to the output of the - last convolutional block, and thus - the output of the model will be a 2D tensor. - - `max` means that global max pooling will - be applied. - classes: optional number of classes to classify images - into, only to be specified if `include_top` is True, - and if no `weights` argument is specified. - - # Returns - A Keras model instance. - - # Raises - ValueError: in case of invalid argument for `weights`, - or invalid input shape. - RuntimeError: If attempting to run this model with a - backend that does not support separable convolutions. - """ - backend, layers, models, keras_utils = get_submodules_from_kwargs(kwargs) - - if not (weights in {'imagenet', None} or os.path.exists(weights)): - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization), `imagenet` ' - '(pre-training on ImageNet), ' - 'or the path to the weights file to be loaded.') - - if weights == 'imagenet' and include_top and classes != 1000: - raise ValueError('If using `weights` as `"imagenet"` with `include_top`' - ' as true, `classes` should be 1000') - - # Determine proper input shape - input_shape = _obtain_input_shape(input_shape, - default_size=299, - min_size=71, - data_format=backend.image_data_format(), - require_flatten=include_top, - weights=weights) - - if input_tensor is None: - img_input = layers.Input(shape=input_shape) - else: - if not backend.is_keras_tensor(input_tensor): - img_input = layers.Input(tensor=input_tensor, shape=input_shape) - else: - img_input = input_tensor - - channel_axis = 1 if backend.image_data_format() == 'channels_first' else -1 - - x = layers.Conv2D(32, (3, 3), - strides=(2, 2), - use_bias=False, - name='block1_conv1')(img_input) - x = layers.BatchNormalization(axis=channel_axis, name='block1_conv1_bn')(x) - x = layers.Activation('relu', name='block1_conv1_act')(x) - x = layers.Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x) - x = layers.BatchNormalization(axis=channel_axis, name='block1_conv2_bn')(x) - x = layers.Activation('relu', name='block1_conv2_act')(x) - - residual = layers.Conv2D(128, (1, 1), - strides=(2, 2), - padding='same', - use_bias=False)(x) - residual = layers.BatchNormalization(axis=channel_axis)(residual) - - x = layers.SeparableConv2D(128, (3, 3), - padding='same', - use_bias=False, - name='block2_sepconv1')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block2_sepconv1_bn')(x) - x = layers.Activation('relu', name='block2_sepconv2_act')(x) - x = layers.SeparableConv2D(128, (3, 3), - padding='same', - use_bias=False, - name='block2_sepconv2')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block2_sepconv2_bn')(x) - - x = layers.MaxPooling2D((3, 3), - strides=(2, 2), - padding='same', - name='block2_pool')(x) - x = layers.add([x, residual]) - - residual = layers.Conv2D(256, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = layers.BatchNormalization(axis=channel_axis)(residual) - - x = layers.Activation('relu', name='block3_sepconv1_act')(x) - x = layers.SeparableConv2D(256, (3, 3), - padding='same', - use_bias=False, - name='block3_sepconv1')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block3_sepconv1_bn')(x) - x = layers.Activation('relu', name='block3_sepconv2_act')(x) - x = layers.SeparableConv2D(256, (3, 3), - padding='same', - use_bias=False, - name='block3_sepconv2')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block3_sepconv2_bn')(x) - - x = layers.MaxPooling2D((3, 3), strides=(2, 2), - padding='same', - name='block3_pool')(x) - x = layers.add([x, residual]) - - residual = layers.Conv2D(728, (1, 1), - strides=(2, 2), - padding='same', - use_bias=False)(x) - residual = layers.BatchNormalization(axis=channel_axis)(residual) - - x = layers.Activation('relu', name='block4_sepconv1_act')(x) - x = layers.SeparableConv2D(728, (3, 3), - padding='same', - use_bias=False, - name='block4_sepconv1')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block4_sepconv1_bn')(x) - x = layers.Activation('relu', name='block4_sepconv2_act')(x) - x = layers.SeparableConv2D(728, (3, 3), - padding='same', - use_bias=False, - name='block4_sepconv2')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block4_sepconv2_bn')(x) - - x = layers.MaxPooling2D((3, 3), strides=(2, 2), - padding='same', - name='block4_pool')(x) - x = layers.add([x, residual]) - - for i in range(8): - residual = x - prefix = 'block' + str(i + 5) - - x = layers.Activation('relu', name=prefix + '_sepconv1_act')(x) - x = layers.SeparableConv2D(728, (3, 3), - padding='same', - use_bias=False, - name=prefix + '_sepconv1')(x) - x = layers.BatchNormalization(axis=channel_axis, - name=prefix + '_sepconv1_bn')(x) - x = layers.Activation('relu', name=prefix + '_sepconv2_act')(x) - x = layers.SeparableConv2D(728, (3, 3), - padding='same', - use_bias=False, - name=prefix + '_sepconv2')(x) - x = layers.BatchNormalization(axis=channel_axis, - name=prefix + '_sepconv2_bn')(x) - x = layers.Activation('relu', name=prefix + '_sepconv3_act')(x) - x = layers.SeparableConv2D(728, (3, 3), - padding='same', - use_bias=False, - name=prefix + '_sepconv3')(x) - x = layers.BatchNormalization(axis=channel_axis, - name=prefix + '_sepconv3_bn')(x) - - x = layers.add([x, residual]) - - residual = layers.Conv2D(1024, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = layers.BatchNormalization(axis=channel_axis)(residual) - - x = layers.Activation('relu', name='block13_sepconv1_act')(x) - x = layers.SeparableConv2D(728, (3, 3), - padding='same', - use_bias=False, - name='block13_sepconv1')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block13_sepconv1_bn')(x) - x = layers.Activation('relu', name='block13_sepconv2_act')(x) - x = layers.SeparableConv2D(1024, (3, 3), - padding='same', - use_bias=False, - name='block13_sepconv2')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block13_sepconv2_bn')(x) - - x = layers.MaxPooling2D((3, 3), - strides=(2, 2), - padding='same', - name='block13_pool')(x) - x = layers.add([x, residual]) - - x = layers.SeparableConv2D(1536, (3, 3), - padding='same', - use_bias=False, - name='block14_sepconv1')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block14_sepconv1_bn')(x) - x = layers.Activation('relu', name='block14_sepconv1_act')(x) - - x = layers.SeparableConv2D(2048, (3, 3), - padding='same', - use_bias=False, - name='block14_sepconv2')(x) - x = layers.BatchNormalization( - axis=channel_axis, name='block14_sepconv2_bn')(x) - x = layers.Activation('relu', name='block14_sepconv2_act')(x) - - if include_top: - x = layers.GlobalAveragePooling2D(name='avg_pool')(x) - x = layers.Dense(classes, activation='softmax', name='predictions')(x) - else: - if pooling == 'avg': - x = layers.GlobalAveragePooling2D()(x) - elif pooling == 'max': - x = layers.GlobalMaxPooling2D()(x) - - # Ensure that the model takes into account - # any potential predecessors of `input_tensor`. - if input_tensor is not None: - inputs = keras_utils.get_source_inputs(input_tensor) - else: - inputs = img_input - # Create model. - model = models.Model(inputs, x, name='xception') - - # Load weights. - if weights == 'imagenet': - if include_top: - weights_path = keras_utils.get_file( - 'xception_weights_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models', - file_hash='0a58e3b7378bc2990ea3b43d5981f1f6') - else: - weights_path = keras_utils.get_file( - 'xception_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - file_hash='b0042744bf5b25fce3cb969f33bebb97') - model.load_weights(weights_path) - if backend.backend() == 'theano': - keras_utils.convert_all_kernels_in_model(model) - elif weights is not None: - model.load_weights(weights) - - return model - - -def preprocess_input(x, **kwargs): - """Preprocesses a numpy array encoding a batch of images. - - # Arguments - x: a 4D numpy array consists of RGB values within [0, 255]. - - # Returns - Preprocessed array. - """ - return imagenet_utils.preprocess_input(x, mode='tf', **kwargs) -import pytest -import random -import six -import numpy as np - -import keras_applications -from keras.applications import densenet -from keras.applications import inception_resnet_v2 -from keras.applications import inception_v3 -from keras.applications import mobilenet -try: - from keras.applications import mobilenet_v2 -except ImportError: - from keras.applications import mobilenetv2 as mobilenet_v2 -from keras.applications import nasnet -from keras.applications import resnet50 -from keras.applications import vgg16 -from keras.applications import vgg19 -from keras.applications import xception -from keras.preprocessing import image -from keras import backend -from keras import layers -from keras import models -from keras import utils - -from multiprocessing import Process, Queue - - -def keras_modules_injection(base_fun): - - def wrapper(*args, **kwargs): - if hasattr(keras_applications, 'get_submodules_from_kwargs'): - kwargs['backend'] = backend - kwargs['layers'] = layers - kwargs['models'] = models - kwargs['utils'] = utils - return base_fun(*args, **kwargs) - return wrapper - - -for (name, module) in [('resnet', keras_applications.resnet), - ('resnet_v2', keras_applications.resnet_v2), - ('resnext', keras_applications.resnext)]: - module.decode_predictions = keras_modules_injection( - module.decode_predictions) - module.preprocess_input = keras_modules_injection(module.preprocess_input) - for app in dir(module): - if app[0].isupper(): - setattr(module, app, keras_modules_injection(getattr(module, app))) - setattr(keras_applications, name, module) - - -RESNET_LIST = [keras_applications.resnet.ResNet50, - keras_applications.resnet.ResNet101, - keras_applications.resnet.ResNet152] -RESNETV2_LIST = [keras_applications.resnet_v2.ResNet50V2, - keras_applications.resnet_v2.ResNet101V2, - keras_applications.resnet_v2.ResNet152V2] -RESNEXT_LIST = [keras_applications.resnext.ResNeXt50, - keras_applications.resnext.ResNeXt101] -MOBILENET_LIST = [(mobilenet.MobileNet, mobilenet, 1024), - (mobilenet_v2.MobileNetV2, mobilenet_v2, 1280)] -DENSENET_LIST = [(densenet.DenseNet121, 1024), - (densenet.DenseNet169, 1664), - (densenet.DenseNet201, 1920)] -NASNET_LIST = [(nasnet.NASNetMobile, 1056), - (nasnet.NASNetLarge, 4032)] - - -def keras_test(func): - """Function wrapper to clean up after TensorFlow tests. - # Arguments - func: test function to clean up after. - # Returns - A function wrapping the input function. - """ - @six.wraps(func) - def wrapper(*args, **kwargs): - output = func(*args, **kwargs) - if backend.backend() == 'tensorflow' or backend.backend() == 'cntk': - backend.clear_session() - return output - return wrapper - - -def _get_elephant(target_size): - # For models that don't include a Flatten step, - # the default is to accept variable-size inputs - # even when loading ImageNet weights (since it is possible). - # In this case, default to 299x299. - if target_size[0] is None: - target_size = (299, 299) - img = image.load_img('tests/data/elephant.jpg', - target_size=tuple(target_size)) - x = image.img_to_array(img) - return np.expand_dims(x, axis=0) - - -def _get_output_shape(model_fn, preprocess_input=None): - if backend.backend() == 'cntk': - # Create model in a subprocess so that - # the memory consumed by InceptionResNetV2 will be - # released back to the system after this test - # (to deal with OOM error on CNTK backend). - # TODO: remove the use of multiprocessing from these tests - # once a memory clearing mechanism - # is implemented in the CNTK backend. - def target(queue): - model = model_fn() - if preprocess_input is None: - queue.put(model.output_shape) - else: - x = _get_elephant(model.input_shape[1:3]) - x = preprocess_input(x) - queue.put((model.output_shape, model.predict(x))) - queue = Queue() - p = Process(target=target, args=(queue,)) - p.start() - p.join() - # The error in a subprocess won't propagate - # to the main process, so we check if the model - # is successfully created by checking if the output shape - # has been put into the queue - assert not queue.empty(), 'Model creation failed.' - return queue.get_nowait() - else: - model = model_fn() - if preprocess_input is None: - return model.output_shape - else: - x = _get_elephant(model.input_shape[1:3]) - x = preprocess_input(x) - return (model.output_shape, model.predict(x)) - - -@keras_test -def _test_application_basic(app, last_dim=1000, module=None): - if module is None: - output_shape = _get_output_shape(lambda: app(weights=None)) - assert output_shape == (None, None, None, last_dim) - else: - output_shape, preds = _get_output_shape( - lambda: app(weights='imagenet'), module.preprocess_input) - assert output_shape == (None, last_dim) - - names = [p[1] for p in module.decode_predictions(preds)[0]] - # Test correct label is in top 3 (weak correctness test). - assert 'African_elephant' in names[:3] - - -@keras_test -def _test_application_notop(app, last_dim): - output_shape = _get_output_shape( - lambda: app(weights=None, include_top=False)) - assert output_shape == (None, None, None, last_dim) - - -@keras_test -def _test_application_variable_input_channels(app, last_dim): - if backend.image_data_format() == 'channels_first': - input_shape = (1, None, None) - else: - input_shape = (None, None, 1) - output_shape = _get_output_shape( - lambda: app(weights=None, include_top=False, input_shape=input_shape)) - assert output_shape == (None, None, None, last_dim) - - if backend.image_data_format() == 'channels_first': - input_shape = (4, None, None) - else: - input_shape = (None, None, 4) - output_shape = _get_output_shape( - lambda: app(weights=None, include_top=False, input_shape=input_shape)) - assert output_shape == (None, None, None, last_dim) - - -@keras_test -def _test_app_pooling(app, last_dim): - output_shape = _get_output_shape( - lambda: app(weights=None, - include_top=False, - pooling=random.choice(['avg', 'max']))) - assert output_shape == (None, last_dim) - - -def test_resnet(): - app = random.choice(RESNET_LIST) - module = keras_applications.resnet - last_dim = 2048 - _test_application_basic(app, module=module) - _test_application_notop(app, last_dim) - _test_application_variable_input_channels(app, last_dim) - _test_app_pooling(app, last_dim) - - -def test_resnetv2(): - app = random.choice(RESNETV2_LIST) - module = keras_applications.resnet_v2 - last_dim = 2048 - _test_application_basic(app, module=module) - _test_application_notop(app, last_dim) - _test_application_variable_input_channels(app, last_dim) - _test_app_pooling(app, last_dim) - - -def test_resnext(): - app = random.choice(RESNEXT_LIST) - module = keras_applications.resnext - _test_application_basic(app, module=module) - - -def test_vgg(): - app = random.choice([vgg16.VGG16, vgg19.VGG19]) - module = vgg16 - last_dim = 512 - _test_application_basic(app, module=module) - _test_application_notop(app, last_dim) - _test_application_variable_input_channels(app, last_dim) - _test_app_pooling(app, last_dim) - - -def test_xception(): - app = xception.Xception - module = xception - last_dim = 2048 - _test_application_basic(app, module=module) - _test_application_notop(app, last_dim) - _test_application_variable_input_channels(app, last_dim) - _test_app_pooling(app, last_dim) - - -def test_inceptionv3(): - app = inception_v3.InceptionV3 - module = inception_v3 - last_dim = 2048 - _test_application_basic(app, module=module) - _test_application_notop(app, last_dim) - _test_application_variable_input_channels(app, last_dim) - _test_app_pooling(app, last_dim) - - -def test_inceptionresnetv2(): - app = inception_resnet_v2.InceptionResNetV2 - module = inception_resnet_v2 - last_dim = 1536 - _test_application_basic(app, module=module) - _test_application_notop(app, last_dim) - _test_application_variable_input_channels(app, last_dim) - _test_app_pooling(app, last_dim) - - -def test_mobilenet(): - app, module, last_dim = random.choice(MOBILENET_LIST) - _test_application_basic(app, module=module) - _test_application_notop(app, last_dim) - _test_application_variable_input_channels(app, last_dim) - _test_app_pooling(app, last_dim) - - -def test_densenet(): - app, last_dim = random.choice(DENSENET_LIST) - module = densenet - _test_application_basic(app, module=module) - _test_application_notop(app, last_dim) - _test_application_variable_input_channels(app, last_dim) - _test_app_pooling(app, last_dim) - - -def test_nasnet(): - # NASNetLarge is too heavy to test on Travis - app, last_dim = NASNET_LIST[0] - module = nasnet - _test_application_basic(app, module=module) - # _test_application_notop(app, last_dim) - # _test_application_variable_input_channels(app, last_dim) - _test_app_pooling(app, last_dim) - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest -import numpy as np -from numpy.testing import assert_allclose - -# We don't use keras.applications.imagenet_utils here -# because we also test _obtain_input_shape which is not exposed. -from keras_applications import imagenet_utils as utils -from keras import backend -from keras import models -from keras import layers -from keras import utils as keras_utils - - -def decode_predictions(*args, **kwargs): - kwargs['backend'] = backend - kwargs['utils'] = keras_utils - return utils.decode_predictions(*args, **kwargs) - - -def preprocess_input(*args, **kwargs): - kwargs['backend'] = backend - return utils.preprocess_input(*args, **kwargs) - - -def test_preprocess_input(): - # Test image batch with float and int image input - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - xint = x.astype('int32') - assert preprocess_input(x).shape == x.shape - assert preprocess_input(xint).shape == xint.shape - - out1 = preprocess_input(x, 'channels_last') - out1int = preprocess_input(xint, 'channels_last') - out2 = preprocess_input(np.transpose(x, (0, 3, 1, 2)), 'channels_first') - out2int = preprocess_input(np.transpose( - xint, (0, 3, 1, 2)), 'channels_first') - assert_allclose(out1, out2.transpose(0, 2, 3, 1)) - assert_allclose(out1int, out2int.transpose(0, 2, 3, 1)) - - # Test single image - x = np.random.uniform(0, 255, (10, 10, 3)) - xint = x.astype('int32') - assert preprocess_input(x).shape == x.shape - assert preprocess_input(xint).shape == xint.shape - - out1 = preprocess_input(x, 'channels_last') - out1int = preprocess_input(xint, 'channels_last') - out2 = preprocess_input(np.transpose(x, (2, 0, 1)), 'channels_first') - out2int = preprocess_input(np.transpose(xint, (2, 0, 1)), 'channels_first') - assert_allclose(out1, out2.transpose(1, 2, 0)) - assert_allclose(out1int, out2int.transpose(1, 2, 0)) - - # Test that writing over the input data works predictably - for mode in ['torch', 'tf']: - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - xint = x.astype('int') - x2 = preprocess_input(x, mode=mode) - xint2 = preprocess_input(xint) - assert_allclose(x, x2) - assert xint.astype('float').max() != xint2.max() - # Caffe mode works differently from the others - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - xint = x.astype('int') - x2 = preprocess_input(x, data_format='channels_last', mode='caffe') - xint2 = preprocess_input(xint) - assert_allclose(x, x2[..., ::-1]) - assert xint.astype('float').max() != xint2.max() - - -def test_preprocess_input_symbolic(): - # Test image batch - x = np.random.uniform(0, 255, (2, 10, 10, 3)) - inputs = layers.Input(shape=x.shape[1:]) - outputs = layers.Lambda(preprocess_input, output_shape=x.shape[1:])(inputs) - model = models.Model(inputs, outputs) - assert model.predict(x).shape == x.shape - - outputs1 = layers.Lambda( - lambda x: preprocess_input(x, 'channels_last'), - output_shape=x.shape[1:])(inputs) - model1 = models.Model(inputs, outputs1) - out1 = model1.predict(x) - x2 = np.transpose(x, (0, 3, 1, 2)) - inputs2 = layers.Input(shape=x2.shape[1:]) - outputs2 = layers.Lambda( - lambda x: preprocess_input(x, 'channels_first'), - output_shape=x2.shape[1:])(inputs2) - model2 = models.Model(inputs2, outputs2) - out2 = model2.predict(x2) - assert_allclose(out1, out2.transpose(0, 2, 3, 1)) - - # Test single image - x = np.random.uniform(0, 255, (10, 10, 3)) - inputs = layers.Input(shape=x.shape) - outputs = layers.Lambda(preprocess_input, output_shape=x.shape)(inputs) - model = models.Model(inputs, outputs) - assert model.predict(x[np.newaxis])[0].shape == x.shape - - outputs1 = layers.Lambda( - lambda x: preprocess_input(x, 'channels_last'), - output_shape=x.shape)(inputs) - model1 = models.Model(inputs, outputs1) - out1 = model1.predict(x[np.newaxis])[0] - x2 = np.transpose(x, (2, 0, 1)) - inputs2 = layers.Input(shape=x2.shape) - outputs2 = layers.Lambda( - lambda x: preprocess_input(x, 'channels_first'), - output_shape=x2.shape)(inputs2) - model2 = models.Model(inputs2, outputs2) - out2 = model2.predict(x2[np.newaxis])[0] - assert_allclose(out1, out2.transpose(1, 2, 0)) - - -def test_decode_predictions(): - x = np.zeros((2, 1000)) - x[0, 372] = 1.0 - x[1, 549] = 1.0 - outs = decode_predictions(x, top=1) - scores = [out[0][2] for out in outs] - assert scores[0] == scores[1] - - # the numbers of columns and ImageNet classes are not identical. - with pytest.raises(ValueError): - decode_predictions(np.ones((2, 100))) - - -def test_obtain_input_shape(): - # input_shape and default_size are not identical. - with pytest.raises(ValueError): - utils._obtain_input_shape( - input_shape=(224, 224, 3), - default_size=299, - min_size=139, - data_format='channels_last', - require_flatten=True, - weights='imagenet') - - # Test invalid use cases - for data_format in ['channels_last', 'channels_first']: - # test warning - shape = (139, 139) - if data_format == 'channels_last': - input_shape = shape + (99,) - else: - input_shape = (99,) + shape - with pytest.warns(UserWarning): - utils._obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False, - weights='fake_weights') - - # input_shape is smaller than min_size. - shape = (100, 100) - if data_format == 'channels_last': - input_shape = shape + (3,) - else: - input_shape = (3,) + shape - with pytest.raises(ValueError): - utils._obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False) - - # shape is 1D. - shape = (100,) - if data_format == 'channels_last': - input_shape = shape + (3,) - else: - input_shape = (3,) + shape - with pytest.raises(ValueError): - utils._obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False) - - # the number of channels is 5 not 3. - shape = (100, 100) - if data_format == 'channels_last': - input_shape = shape + (5,) - else: - input_shape = (5,) + shape - with pytest.raises(ValueError): - utils._obtain_input_shape( - input_shape=input_shape, - default_size=None, - min_size=139, - data_format=data_format, - require_flatten=False) - - # require_flatten=True with dynamic input shape. - with pytest.raises(ValueError): - utils._obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=True) - - # test include top - assert utils._obtain_input_shape( - input_shape=(3, 200, 200), - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=True) == (3, 200, 200) - - assert utils._obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_last', - require_flatten=False) == (None, None, 3) - - assert utils._obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=False) == (3, None, None) - - assert utils._obtain_input_shape( - input_shape=None, - default_size=None, - min_size=139, - data_format='channels_last', - require_flatten=False) == (None, None, 3) - - assert utils._obtain_input_shape( - input_shape=(150, 150, 3), - default_size=None, - min_size=139, - data_format='channels_last', - require_flatten=False) == (150, 150, 3) - - assert utils._obtain_input_shape( - input_shape=(3, None, None), - default_size=None, - min_size=139, - data_format='channels_first', - require_flatten=False) == (3, None, None) - - -if __name__ == '__main__': - pytest.main([__file__]) -from setuptools import setup, find_packages - -setup(name='devol', - version='0.02', - description='Genetic search for CNN classifier in Keras', - url='https//github.com/joedav/devol', - author='Joe Davison', - author_email='josephddavison@gmail.com', - license='MIT', - - classifiers=[ - # How mature is this project? Common values are - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable - 'Development Status :: 3 - Alpha', - - # Indicate who your project is intended for - 'Intended Audience :: Developers', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - - # Pick your license as you wish (should match "license" above) - 'License :: OSI Approved :: MIT License', - - # Specify the Python versions you support here. In particular, ensure - # that you indicate whether you support Python 2, Python 3 or both. - - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: 2.7', - ], - keywords='genetic algorithm', - packages=['devol'], - - install_requires=['keras', ], - ) -from .devol import DEvol -from .genome_handler import GenomeHandler - -__all__ = ['DEvol', 'GenomeHandler'] -""" -Run a genetic algorithm to find an appropriate architecture for some image -classification task with Keras+TF. - -To use, define a `GenomeHandler` defined in genomehandler.py. Then pass it, with -training data, to a DEvol instance to run the genetic algorithm. See the readme -for more detailed instructions. -""" - -from __future__ import print_function -import random as rand -import csv -import operator -import gc -import os -from datetime import datetime -from keras.callbacks import EarlyStopping -from keras.models import load_model -import keras.backend as K -from sklearn.metrics import log_loss -import numpy as np - -if K.backend() == 'tensorflow': - import tensorflow as tf - -__all__ = ['DEvol'] - -METRIC_OPS = [operator.__lt__, operator.__gt__] -METRIC_OBJECTIVES = [min, max] - - -class DEvol: - """ - Object which carries out genetic search and returns top performing model - upon completion. - """ - - def __init__(self, genome_handler, data_path=""): - """ - Initialize a DEvol object which carries out the training and evaluation - of a genetic search. - - Args: - genome_handler (GenomeHandler): the genome handler object defining - the restrictions for the architecture search space - data_path (str): the file which the genome encodings and metric data - will be stored in - """ - self.genome_handler = genome_handler - self.datafile = data_path or (datetime.now().ctime() + '.csv') - self._bssf = -1 - - if os.path.isfile(data_path) and os.stat(data_path).st_size > 1: - raise ValueError(('Non-empty file %s already exists. Please change' - 'file path to prevent overwritten genome data.' - % data_path)) - - print("Genome encoding and metric data stored at", self.datafile, "\n") - with open(self.datafile, 'a') as csvfile: - writer = csv.writer(csvfile, delimiter=',', quotechar='"', - quoting=csv.QUOTE_MINIMAL) - metric_cols = ["Val Loss", "Val Accuracy"] - genome = genome_handler.genome_representation() + metric_cols - writer.writerow(genome) - - def set_objective(self, metric): - """ - Set the metric for optimization. Can also be done by passing to - `run`. - - Args: - metric (str): either 'acc' to maximize classification accuracy, or - else 'loss' to minimize the loss function - """ - if metric == 'acc': - metric = 'accuracy' - if metric not in ['loss', 'accuracy']: - raise ValueError(('Invalid metric name {} provided - should be' - '"accuracy" or "loss"').format(metric)) - self._metric = metric - self._objective = "max" if self._metric == "accuracy" else "min" - self._metric_index = 1 if self._metric == 'loss' else -1 - self._metric_op = METRIC_OPS[self._objective == 'max'] - self._metric_objective = METRIC_OBJECTIVES[self._objective == 'max'] - - def run(self, dataset, num_generations, pop_size, epochs, fitness=None, - metric='accuracy'): - """ - Run genetic search on dataset given number of generations and - population size - - Args: - dataset : tuple or list of numpy arrays in form ((train_data, - train_labels), (validation_data, validation_labels)) - num_generations (int): number of generations to search - pop_size (int): initial population size - epochs (int): epochs for each model eval, passed to keras model.fit - fitness (None, optional): scoring function to be applied to - population scores, will be called on a numpy array which is - a min/max scaled version of evaluated model metrics, so It - should accept a real number including 0. If left as default - just the min/max scaled values will be used. - metric (str, optional): must be "accuracy" or "loss" , defines what - to optimize during search - - Returns: - keras model: best model found with weights - """ - self.set_objective(metric) - - # If no validation data is given set it to None - if len(dataset) == 2: - (self.x_train, self.y_train), (self.x_test, self.y_test) = dataset - self.x_val = None - self.y_val = None - else: - (self.x_train, self.y_train), (self.x_test, - self.y_test), (self.x_val, self.y_val) = dataset - - # generate and evaluate initial population - members = self._generate_random_population(pop_size) - pop = self._evaluate_population(members, - epochs, - fitness, - 0, - num_generations) - - # evolve - for gen in range(1, num_generations): - members = self._reproduce(pop, gen) - pop = self._evaluate_population(members, - epochs, - fitness, - gen, - num_generations) - - return load_model('best-model.h5') - - def _reproduce(self, pop, gen): - members = [] - - # 95% of population from crossover - for _ in range(int(len(pop) * 0.95)): - members.append(self._crossover(pop.select(), pop.select())) - - # best models survive automatically - members += pop.get_best(len(pop) - int(len(pop) * 0.95)) - - # randomly mutate - for imem, mem in enumerate(members): - members[imem] = self._mutate(mem, gen) - return members - - def _evaluate(self, genome, epochs): - model = self.genome_handler.decode(genome) - loss, accuracy = None, None - fit_params = { - 'x': self.x_train, - 'y': self.y_train, - 'validation_split': 0.1, - 'epochs': epochs, - 'verbose': 1, - 'callbacks': [ - EarlyStopping(monitor='val_loss', patience=1, verbose=1) - ] - } - - if self.x_val is not None: - fit_params['validation_data'] = (self.x_val, self.y_val) - try: - model.fit(**fit_params) - loss, accuracy = model.evaluate( - self.x_test, self.y_test, verbose=0) - except Exception as e: - loss, accuracy = self._handle_broken_model(model, e) - - self._record_stats(model, genome, loss, accuracy) - - return model, loss, accuracy - - def _record_stats(self, model, genome, loss, accuracy): - with open(self.datafile, 'a') as csvfile: - writer = csv.writer(csvfile, delimiter=',', - quotechar='"', quoting=csv.QUOTE_MINIMAL) - row = list(genome) + [loss, accuracy] - writer.writerow(row) - - met = loss if self._metric == 'loss' else accuracy - if (self._bssf is -1 or - self._metric_op(met, self._bssf) and - accuracy is not 0): - try: - os.remove('best-model.h5') - except OSError: - pass - self._bssf = met - model.save('best-model.h5') - - def _handle_broken_model(self, model, error): - del model - - n = self.genome_handler.n_classes - loss = log_loss(np.concatenate(([1], np.zeros(n - 1))), np.ones(n) / n) - accuracy = 1 / n - gc.collect() - - if K.backend() == 'tensorflow': - K.clear_session() - tf.reset_default_graph() - - print('An error occurred and the model could not train:') - print(error) - print(('Model assigned poor score. Please ensure that your model' - 'constraints live within your computational resources.')) - return loss, accuracy - - def _evaluate_population(self, members, epochs, fitness, igen, ngen): - fit = [] - for imem, mem in enumerate(members): - self._print_evaluation(imem, len(members), igen, ngen) - res = self._evaluate(mem, epochs) - v = res[self._metric_index] - del res - fit.append(v) - - fit = np.array(fit) - self._print_result(fit, igen) - return _Population(members, fit, fitness, obj=self._objective) - - def _print_evaluation(self, imod, nmod, igen, ngen): - fstr = '\nmodel {0}/{1} - generation {2}/{3}:\n' - print(fstr.format(imod + 1, nmod, igen + 1, ngen)) - - def _generate_random_population(self, size): - return [self.genome_handler.generate() for _ in range(size)] - - def _print_result(self, fitness, generation): - result_str = ('Generation {3}:\t\tbest {4}: {0:0.4f}\t\taverage:' - '{1:0.4f}\t\tstd: {2:0.4f}') - print(result_str.format(self._metric_objective(fitness), - np.mean(fitness), - np.std(fitness), - generation + 1, self._metric)) - - def _crossover(self, genome1, genome2): - cross_ind = rand.randint(0, len(genome1)) - child = genome1[:cross_ind] + genome2[cross_ind:] - return child - - def _mutate(self, genome, generation): - # increase mutations as program continues - num_mutations = max(3, generation // 4) - return self.genome_handler.mutate(genome, num_mutations) - - -class _Population(object): - - def __len__(self): - return len(self.members) - - def __init__(self, members, fitnesses, score, obj='max'): - self.members = members - scores = fitnesses - fitnesses.min() - if scores.max() > 0: - scores /= scores.max() - if obj == 'min': - scores = 1 - scores - if score: - self.scores = score(scores) - else: - self.scores = scores - self.s_fit = sum(self.scores) - - def get_best(self, n): - combined = [(self.members[i], self.scores[i]) - for i in range(len(self.members))] - sorted(combined, key=(lambda x: x[1]), reverse=True) - return [x[0] for x in combined[:n]] - - def select(self): - dart = rand.uniform(0, self.s_fit) - sum_fits = 0 - for i in range(len(self.members)): - sum_fits += self.scores[i] - if sum_fits >= dart: - return self.members[i] -import numpy as np -import random as rand -import math -from keras.models import Sequential -from keras.layers import Activation, Dense, Dropout, Flatten -from keras.layers.convolutional import Convolution2D, MaxPooling2D -from keras.layers.normalization import BatchNormalization - - -class GenomeHandler: - """ - Defines the configuration and handles the conversion and mutation of - individual genomes. Should be created and passed to a `DEvol` instance. - - --- - Genomes are represented as fixed-with lists of integers corresponding - to sequential layers and properties. A model with 2 convolutional layers - and 1 dense layer would look like: - - [] - - The makeup of the convolutional layers and dense layers is defined in the - GenomeHandler below under self.convolutional_layer_shape and - self.dense_layer_shape. consists of just one property. - """ - - def __init__(self, max_conv_layers, max_dense_layers, max_filters, - max_dense_nodes, input_shape, n_classes, - batch_normalization=True, dropout=True, max_pooling=True, - optimizers=None, activations=None): - """ - Creates a GenomeHandler according - - Args: - max_conv_layers: The maximum number of convolutional layers - max_conv_layers: The maximum number of dense (fully connected) - layers, including output layer - max_filters: The maximum number of conv filters (feature maps) in a - convolutional layer - max_dense_nodes: The maximum number of nodes in a dense layer - input_shape: The shape of the input - n_classes: The number of classes - batch_normalization (bool): whether the GP should include batch norm - dropout (bool): whether the GP should include dropout - max_pooling (bool): whether the GP should include max pooling layers - optimizers (list): list of optimizers to be tried by the GP. By - default, the network uses Keras's built-in adam, rmsprop, - adagrad, and adadelta - activations (list): list of activation functions to be tried by the - GP. By default, relu and sigmoid. - """ - if max_dense_layers < 1: - raise ValueError( - "At least one dense layer is required for softmax layer" - ) - if max_filters > 0: - filter_range_max = int(math.log(max_filters, 2)) + 1 - else: - filter_range_max = 0 - self.optimizer = optimizers or [ - 'adam', - 'rmsprop', - 'adagrad', - 'adadelta' - ] - self.activation = activations or [ - 'relu', - 'sigmoid', - ] - self.convolutional_layer_shape = [ - "active", - "num filters", - "batch normalization", - "activation", - "dropout", - "max pooling", - ] - self.dense_layer_shape = [ - "active", - "num nodes", - "batch normalization", - "activation", - "dropout", - ] - self.layer_params = { - "active": [0, 1], - "num filters": [2**i for i in range(3, filter_range_max)], - "num nodes": [2**i for i in range(4, int(math.log(max_dense_nodes, 2)) + 1)], - "batch normalization": [0, (1 if batch_normalization else 0)], - "activation": list(range(len(self.activation))), - "dropout": [(i if dropout else 0) for i in range(11)], - "max pooling": list(range(3)) if max_pooling else 0, - } - - self.convolution_layers = max_conv_layers - self.convolution_layer_size = len(self.convolutional_layer_shape) - # this doesn't include the softmax layer, so -1 - self.dense_layers = max_dense_layers - 1 - self.dense_layer_size = len(self.dense_layer_shape) - self.input_shape = input_shape - self.n_classes = n_classes - - def convParam(self, i): - key = self.convolutional_layer_shape[i] - return self.layer_params[key] - - def denseParam(self, i): - key = self.dense_layer_shape[i] - return self.layer_params[key] - - def mutate(self, genome, num_mutations): - num_mutations = np.random.choice(num_mutations) - for i in range(num_mutations): - index = np.random.choice(list(range(1, len(genome)))) - if index < self.convolution_layer_size * self.convolution_layers: - if genome[index - index % self.convolution_layer_size]: - range_index = index % self.convolution_layer_size - choice_range = self.convParam(range_index) - genome[index] = np.random.choice(choice_range) - elif rand.uniform(0, 1) <= 0.01: # randomly flip deactivated layers - genome[index - index % self.convolution_layer_size] = 1 - elif index != len(genome) - 1: - offset = self.convolution_layer_size * self.convolution_layers - new_index = (index - offset) - present_index = new_index - new_index % self.dense_layer_size - if genome[present_index + offset]: - range_index = new_index % self.dense_layer_size - choice_range = self.denseParam(range_index) - genome[index] = np.random.choice(choice_range) - elif rand.uniform(0, 1) <= 0.01: - genome[present_index + offset] = 1 - else: - genome[index] = np.random.choice( - list(range(len(self.optimizer)))) - return genome - - def decode(self, genome): - if not self.is_compatible_genome(genome): - raise ValueError("Invalid genome for specified configs") - model = Sequential() - offset = 0 - dim = min(self.input_shape[:-1]) # keep track of smallest dimension - input_layer = True - for i in range(self.convolution_layers): - if genome[offset]: - convolution = None - if input_layer: - convolution = Convolution2D( - genome[offset + 1], (3, 3), - padding='same', - input_shape=self.input_shape - ) - input_layer = False - else: - convolution = Convolution2D( - genome[offset + 1], (3, 3), - padding='same' - ) - model.add(convolution) - if genome[offset + 2]: - model.add(BatchNormalization()) - model.add(Activation(self.activation[genome[offset + 3]])) - model.add(Dropout(float(genome[offset + 4] / 20.0))) - max_pooling_type = genome[offset + 5] - # must be large enough for a convolution - if max_pooling_type == 1 and dim >= 5: - model.add(MaxPooling2D(pool_size=(2, 2), padding="same")) - dim = int(math.ceil(dim / 2)) - offset += self.convolution_layer_size - - if not input_layer: - model.add(Flatten()) - - for i in range(self.dense_layers): - if genome[offset]: - dense = None - if input_layer: - dense = Dense(genome[offset + 1], - input_shape=self.input_shape) - input_layer = False - else: - dense = Dense(genome[offset + 1]) - model.add(dense) - if genome[offset + 2]: - model.add(BatchNormalization()) - model.add(Activation(self.activation[genome[offset + 3]])) - model.add(Dropout(float(genome[offset + 4] / 20.0))) - offset += self.dense_layer_size - - model.add(Dense(self.n_classes, activation='softmax')) - model.compile(loss='categorical_crossentropy', - optimizer=self.optimizer[genome[offset]], - metrics=["accuracy"]) - return model - - def genome_representation(self): - encoding = [] - for i in range(self.convolution_layers): - for key in self.convolutional_layer_shape: - encoding.append("Conv" + str(i) + " " + key) - for i in range(self.dense_layers): - for key in self.dense_layer_shape: - encoding.append("Dense" + str(i) + " " + key) - encoding.append("Optimizer") - return encoding - - def generate(self): - genome = [] - for i in range(self.convolution_layers): - for key in self.convolutional_layer_shape: - param = self.layer_params[key] - genome.append(np.random.choice(param)) - for i in range(self.dense_layers): - for key in self.dense_layer_shape: - param = self.layer_params[key] - genome.append(np.random.choice(param)) - genome.append(np.random.choice(list(range(len(self.optimizer))))) - genome[0] = 1 - return genome - - def is_compatible_genome(self, genome): - expected_len = self.convolution_layers * self.convolution_layer_size \ - + self.dense_layers * self.dense_layer_size + 1 - if len(genome) != expected_len: - return False - ind = 0 - for i in range(self.convolution_layers): - for j in range(self.convolution_layer_size): - if genome[ind + j] not in self.convParam(j): - return False - ind += self.convolution_layer_size - for i in range(self.dense_layers): - for j in range(self.dense_layer_size): - if genome[ind + j] not in self.denseParam(j): - return False - ind += self.dense_layer_size - if genome[ind] not in range(len(self.optimizer)): - return False - return True - - def best_genome(self, csv_path, metric="accuracy", include_metrics=True): - best = max if metric is "accuracy" else min - col = -1 if metric is "accuracy" else -2 - data = np.genfromtxt(csv_path, delimiter=",") - row = list(data[:, col]).index(best(data[:, col])) - genome = list(map(int, data[row, :-2])) - if include_metrics: - genome += list(data[row, -2:]) - return genome - - def decode_best(self, csv_path, metric="accuracy"): - return self.decode(self.best_genome(csv_path, metric, False)) -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" -Created on Thu Apr 6 01:01:43 2017 - -@author: abhisheksingh -""" - -from matplotlib import pyplot as plt -import matplotlib -import cv2 -import json -from sklearn.model_selection import train_test_split -from sklearn.utils import shuffle -from PIL import Image -import os -import numpy as np -from keras.models import Sequential -from keras.layers import Dense, Dropout, Activation, Flatten -from keras.layers import Conv2D, MaxPooling2D, ZeroPadding2D -from keras.optimizers import SGD, RMSprop, adam -from keras.utils import np_utils - -from keras import backend as K -if K.backend() == 'tensorflow': - import tensorflow - # K.set_image_dim_ordering('tf') -else: - import theano - # K.set_image_dim_ordering('th') - -'''Ideally we should have changed image dim ordering based on Theano or Tensorflow, but for some reason I get following error when I switch it to 'tf' for Tensorflow. - However, the outcome of the prediction doesnt seem to get affected due to this and Tensorflow gives me similar result as Theano. - I didnt spend much time on this behavior, but if someone has answer to this then please do comment and let me know. - ValueError: Negative dimension size caused by subtracting 3 from 1 for 'conv2d_1/convolution' (op: 'Conv2D') with input shapes: [?,1,200,200], [3,3,200,32]. -''' -K.set_image_dim_ordering('th') - - -#import matplotlib.pyplot as plt - -# SKLEARN - -# matplotlib.use("TkAgg") - -# input image dimensions -img_rows, img_cols = 200, 200 - -# number of channels -# For grayscale use 1 value and for color images use 3 (R,G,B channels) -img_channels = 1 - - -# Batch_size to train -batch_size = 32 - -# Number of output classes (change it accordingly) -# eg: In my case I wanted to predict 4 types of gestures (Ok, Peace, Punch, Stop) -# NOTE: If you change this then dont forget to change Labels accordingly -nb_classes = 5 - -# Number of epochs to train (change it accordingly) -nb_epoch = 15 # 25 - -# Total number of convolutional filters to use -nb_filters = 32 -# Max pooling -nb_pool = 2 -# Size of convolution kernel -nb_conv = 3 - -# %% -# data -path = "./" -path1 = "./gestures" # path of folder of images - -# Path2 is the folder which is fed in to training model -path2 = './imgfolder_b' - -WeightFileName = ["ori_4015imgs_weights.hdf5", "bw_4015imgs_weights.hdf5", "bw_2510imgs_weights.hdf5", "./bw_weight.hdf5", - "./final_c_weights.hdf5", "./semiVgg_1_weights.hdf5", "/new_wt_dropout20.hdf5", "./weights-CNN-gesture_skinmask.hdf5"] - -# outputs -output = ["OK", "NOTHING", "PEACE", "PUNCH", "STOP"] -#output = ["PEACE", "STOP", "THUMBSDOWN", "THUMBSUP"] - -jsonarray = {} - -# %% - - -def update(plot): - global jsonarray - h = 450 - y = 30 - w = 45 - font = cv2.FONT_HERSHEY_SIMPLEX - - #plot = np.zeros((512,512,3), np.uint8) - - #array = {"OK": 65.79261422157288, "NOTHING": 0.7953541353344917, "PEACE": 5.33270463347435, "PUNCH": 0.038031660369597375, "STOP": 28.04129719734192} - - for items in jsonarray: - mul = (jsonarray[items]) / 100 - #mul = random.randint(1,100) / 100 - cv2.line(plot, (0, y), (int(h * mul), y), (255, 0, 0), w) - cv2.putText(plot, items, (0, y+5), font, 0.7, (0, 255, 0), 2, 1) - y = y + w + 30 - - return plot - - -# %% -# This function can be used for converting colored img to Grayscale img -# while copying images from path1 to path2 -def convertToGrayImg(path1, path2): - listing = os.listdir(path1) - for file in listing: - if file.startswith('.'): - continue - img = Image.open(path1 + '/' + file) - #img = img.resize((img_rows,img_cols)) - grayimg = img.convert('L') - grayimg.save(path2 + '/' + file, "PNG") - -# %% - - -def modlistdir(path): - listing = os.listdir(path) - retlist = [] - for name in listing: - # This check is to ignore any hidden files/folders - if name.startswith('.'): - continue - retlist.append(name) - return retlist - - -# Load CNN model -def loadCNN(wf_index): - global get_output - model = Sequential() - - model.add(Conv2D(nb_filters, (nb_conv, nb_conv), - padding='valid', - input_shape=(img_channels, img_rows, img_cols))) - convout1 = Activation('relu') - model.add(convout1) - model.add(Conv2D(nb_filters, (nb_conv, nb_conv))) - convout2 = Activation('relu') - model.add(convout2) - model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) - model.add(Dropout(0.5)) - - model.add(Flatten()) - model.add(Dense(128)) - model.add(Activation('relu')) - model.add(Dropout(0.5)) - model.add(Dense(nb_classes)) - model.add(Activation('softmax')) - - ''' - - model.add(ZeroPadding2D((1,1),input_shape=(img_channels, img_rows, img_cols))) - model.add(Conv2D(nb_filters , (nb_conv, nb_conv), activation='relu')) - #model.add(ZeroPadding2D((1,1))) - #model.add(Conv2D(nb_filters , (nb_conv, nb_conv), activation='relu')) - model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) - model.add(Dropout(0.2)) - - #model.add(ZeroPadding2D((1,1))) - model.add(Conv2D(nb_filters , (nb_conv, nb_conv), activation='relu')) - #model.add(ZeroPadding2D((1,1))) - model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool))) - ## - #model.add(Conv2D(nb_filters , (nb_conv, nb_conv), activation='relu')) - #model.add(MaxPooling2D(pool_size=(nb_pool, nb_pool), strides=(2,2))) - - model.add(Dropout(0.3)) - model.add(Flatten()) - ### - #model.add(Dense(128)) - #model.add(Activation('relu')) - #model.add(Dropout(0.5)) - - model.add(Dense(256)) - model.add(Activation('relu')) - model.add(Dropout(0.5)) - model.add(Dense(nb_classes)) - model.add(Activation('softmax')) - ''' - - #sgd = SGD(lr=0.01, decay=1e-6, momentum=0.9, nesterov=True) - model.compile(loss='categorical_crossentropy', - optimizer='adadelta', metrics=['accuracy']) - - # Model summary - model.summary() - # Model conig details - model.get_config() - - #from keras.utils import plot_model - #plot_model(model, to_file='new_model.png', show_shapes = True) - - if wf_index >= 0: - # Load pretrained weights - fname = WeightFileName[int(wf_index)] - print("loading ", fname) - model.load_weights(fname) - - layer = model.layers[11] - get_output = K.function( - [model.layers[0].input, K.learning_phase()], [layer.output, ]) - - return model - -# This function does the guessing work based on input images - - -def guessGesture(model, img): - global output, get_output, jsonarray - # Load image and flatten it - image = np.array(img).flatten() - - # reshape it - image = image.reshape(img_channels, img_rows, img_cols) - - # float32 - image = image.astype('float32') - - # normalize it - image = image / 255 - - # reshape for NN - rimage = image.reshape(1, img_channels, img_rows, img_cols) - - # Now feed it to the NN, to fetch the predictions - #index = model.predict_classes(rimage) - #prob_array = model.predict_proba(rimage) - - prob_array = get_output([rimage, 0])[0] - - # print prob_array - - d = {} - i = 0 - for items in output: - d[items] = prob_array[0][i] * 100 - i += 1 - - # Get the output with maximum probability - import operator - - guess = max(d.items(), key=operator.itemgetter(1))[0] - prob = d[guess] - - if prob > 60.0: - #print(guess + " Probability: ", prob) - - # Enable this to save the predictions in a json file, - # Which can be read by plotter app to plot bar graph - # dump to the JSON contents to the file - - # with open('gesturejson.txt', 'w') as outfile: - # json.dump(d, outfile) - jsonarray = d - - return output.index(guess) - - else: - return 1 - -# %% - - -def initializers(): - imlist = modlistdir(path2) - - # open one image to get size - image1 = np.array(Image.open(path2 + '/' + imlist[0])) - # plt.imshow(im1) - - m, n = image1.shape[0:2] # get the size of the images - total_images = len(imlist) # get the 'total' number of images - - # create matrix to store all flattened images - immatrix = np.array([np.array(Image.open(path2 + '/' + images).convert('L')).flatten() - for images in sorted(imlist)], dtype='f') - - print(immatrix.shape) - - input("Press any key") - - ######################################################### - # Label the set of images per respective gesture type. - ## - label = np.ones((total_images,), dtype=int) - - samples_per_class = int(total_images / nb_classes) - print("samples_per_class - ", samples_per_class) - s = 0 - r = samples_per_class - for classIndex in range(nb_classes): - label[s:r] = classIndex - s = r - r = s + samples_per_class - - ''' - # eg: For 301 img samples/gesture for 4 gesture types - label[0:301]=0 - label[301:602]=1 - label[602:903]=2 - label[903:]=3 - ''' - - data, Label = shuffle(immatrix, label, random_state=2) - train_data = [data, Label] - - (X, y) = (train_data[0], train_data[1]) - - # Split X and y into training and testing sets - - X_train, X_test, y_train, y_test = train_test_split( - X, y, test_size=0.2, random_state=4) - - X_train = X_train.reshape( - X_train.shape[0], img_channels, img_rows, img_cols) - X_test = X_test.reshape(X_test.shape[0], img_channels, img_rows, img_cols) - - X_train = X_train.astype('float32') - X_test = X_test.astype('float32') - - # normalize - X_train /= 255 - X_test /= 255 - - # convert class vectors to binary class matrices - Y_train = np_utils.to_categorical(y_train, nb_classes) - Y_test = np_utils.to_categorical(y_test, nb_classes) - return X_train, X_test, Y_train, Y_test - - -def trainModel(model): - - # Split X and y into training and testing sets - X_train, X_test, Y_train, Y_test = initializers() - - # Now start the training of the loaded model - hist = model.fit(X_train, Y_train, batch_size=batch_size, epochs=nb_epoch, - verbose=1, validation_split=0.2) - - visualizeHis(hist) - - ans = input("Do you want to save the trained weights - y/n ?") - if ans == 'y': - filename = input("Enter file name - ") - fname = path + str(filename) + ".hdf5" - model.save_weights(fname, overwrite=True) - else: - model.save_weights("newWeight.hdf5", overwrite=True) - - # Save model as well - # model.save("newModel.hdf5") -# %% - - -def visualizeHis(hist): - # visualizing losses and accuracy - - train_loss = hist.history['loss'] - val_loss = hist.history['val_loss'] - train_acc = hist.history['acc'] - val_acc = hist.history['val_acc'] - xc = range(nb_epoch) - - plt.figure(1, figsize=(7, 5)) - plt.plot(xc, train_loss) - plt.plot(xc, val_loss) - plt.xlabel('num of Epochs') - plt.ylabel('loss') - plt.title('train_loss vs val_loss') - plt.grid(True) - plt.legend(['train', 'val']) - # print plt.style.available # use bmh, classic,ggplot for big pictures - # plt.style.use(['classic']) - - plt.figure(2, figsize=(7, 5)) - plt.plot(xc, train_acc) - plt.plot(xc, val_acc) - plt.xlabel('num of Epochs') - plt.ylabel('accuracy') - plt.title('train_acc vs val_acc') - plt.grid(True) - plt.legend(['train', 'val'], loc=4) - - plt.show() - -# %% - - -def visualizeLayers(model, img, layerIndex): - imlist = modlistdir('./imgs') - if img <= len(imlist): - - image = np.array(Image.open( - './imgs/' + imlist[img - 1]).convert('L')).flatten() - - # Predict - guessGesture(model, image) - - # reshape it - image = image.reshape(img_channels, img_rows, img_cols) - - # float32 - image = image.astype('float32') - - # normalize it - image = image / 255 - - # reshape for NN - input_image = image.reshape(1, img_channels, img_rows, img_cols) - else: - X_train, X_test, Y_train, Y_test = initializers() - - # the input image - input_image = X_test[:img+1] - - # visualizing intermediate layers - #output_layer = model.layers[layerIndex].output - #output_fn = theano.function([model.layers[0].input], output_layer) - #output_image = output_fn(input_image) - - if layerIndex >= 1: - visualizeLayer(model, img, input_image, layerIndex) - else: - tlayers = len(model.layers[:]) - print("Total layers - {}".format(tlayers)) - for i in range(1, tlayers): - visualizeLayer(model, img, input_image, i) - -# %% - - -def visualizeLayer(model, img, input_image, layerIndex): - - layer = model.layers[layerIndex] - - get_activations = K.function( - [model.layers[0].input, K.learning_phase()], [layer.output, ]) - activations = get_activations([input_image, 0])[0] - output_image = activations - - # If 4 dimensional then take the last dimension value as it would be no of filters - if output_image.ndim == 4: - # Rearrange dimension so we can plot the result - o1 = np.rollaxis(output_image, 3, 1) - output_image = np.rollaxis(o1, 3, 1) - - print("Dumping filter data of layer{} - {}".format(layerIndex, - layer.__class__.__name__)) - filters = len(output_image[0, 0, 0, :]) - - fig = plt.figure(figsize=(8, 8)) - # This loop will plot the 32 filter data for the input image - for i in range(filters): - ax = fig.add_subplot(6, 6, i+1) - # ax.imshow(output_image[img,:,:,i],interpolation='none' ) #to see the first filter - ax.imshow(output_image[0, :, :, i], 'gray') - # ax.set_title("Feature map of layer#{} \ncalled '{}' \nof type {} ".format(layerIndex, - # layer.name,layer.__class__.__name__)) - plt.xticks(np.array([])) - plt.yticks(np.array([])) - plt.tight_layout() - # plt.show() - fig.savefig("img_" + str(img) + "_layer" + - str(layerIndex)+"_"+layer.__class__.__name__+".png") - # plt.close(fig) - else: - print("Can't dump data of this layer{}- {}".format(layerIndex, - layer.__class__.__name__)) -# -*- coding: utf-8 -*- -""" -Created on Thu Mar 23 01:01:43 2017 - -@author: abhisheksingh -""" - -# %% -import cv2 -import numpy as np -import os -import time - -import threading - -import gestureCNN as myNN - -minValue = 70 - -x0 = 400 -y0 = 200 -height = 200 -width = 200 - -saveImg = False -guessGesture = False -visualize = False - -lastgesture = -1 - -kernel = np.ones((15, 15), np.uint8) -kernel2 = np.ones((1, 1), np.uint8) -skinkernel = cv2.getStructuringElement(cv2.MORPH_ELLIPSE, (5, 5)) - -# Which mask mode to use BinaryMask, SkinMask (True|False) OR BkgrndSubMask ('x' key) -binaryMode = True -bkgrndSubMode = False -mask = 0 -bkgrnd = 0 -counter = 0 -# This parameter controls number of image samples to be taken PER gesture -numOfSamples = 301 -gestname = "" -path = "" -mod = 0 - -banner = '''\nWhat would you like to do ? - 1- Use pretrained model for gesture recognition & layer visualization - 2- Train the model (you will require image samples for training under .\imgfolder) - 3- Visualize feature maps of different layers of trained model - 4- Exit - ''' - - -# %% -def saveROIImg(img): - global counter, gestname, path, saveImg - if counter > (numOfSamples - 1): - # Reset the parameters - saveImg = False - gestname = '' - counter = 0 - return - - counter = counter + 1 - name = gestname + str(counter) - print("Saving img:", name) - cv2.imwrite(path+name + ".png", img) - time.sleep(0.04) - - -# %% -def skinMask(frame, x0, y0, width, height, framecount, plot): - global guessGesture, visualize, mod, lastgesture, saveImg - # HSV values - low_range = np.array([0, 50, 80]) - upper_range = np.array([30, 200, 255]) - - cv2.rectangle(frame, (x0, y0), (x0+width, y0+height), (0, 255, 0), 1) - #roi = cv2.UMat(frame[y0:y0+height, x0:x0+width]) - roi = frame[y0:y0+height, x0:x0+width] - - hsv = cv2.cvtColor(roi, cv2.COLOR_BGR2HSV) - - # Apply skin color range - mask = cv2.inRange(hsv, low_range, upper_range) - - mask = cv2.erode(mask, skinkernel, iterations=1) - mask = cv2.dilate(mask, skinkernel, iterations=1) - - # blur - mask = cv2.GaussianBlur(mask, (15, 15), 1) - - # bitwise and mask original frame - res = cv2.bitwise_and(roi, roi, mask=mask) - # color to grayscale - res = cv2.cvtColor(res, cv2.COLOR_BGR2GRAY) - - if saveImg == True: - saveROIImg(res) - elif guessGesture == True and (framecount % 5) == 4: - #res = cv2.UMat.get(res) - t = threading.Thread(target=myNN.guessGesture, args=[mod, res]) - t.start() - elif visualize == True: - layer = int(input("Enter which layer to visualize ")) - cv2.waitKey(0) - myNN.visualizeLayers(mod, res, layer) - visualize = False - - return res - - -# %% -def binaryMask(frame, x0, y0, width, height, framecount, plot): - global guessGesture, visualize, mod, lastgesture, saveImg - - cv2.rectangle(frame, (x0, y0), (x0+width, y0+height), (0, 255, 0), 1) - #roi = cv2.UMat(frame[y0:y0+height, x0:x0+width]) - roi = frame[y0:y0+height, x0:x0+width] - - gray = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) - blur = cv2.GaussianBlur(gray, (5, 5), 2) - - th3 = cv2.adaptiveThreshold( - blur, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY_INV, 11, 2) - ret, res = cv2.threshold( - th3, minValue, 255, cv2.THRESH_BINARY_INV+cv2.THRESH_OTSU) - - if saveImg == True: - saveROIImg(res) - elif guessGesture == True and (framecount % 5) == 4: - #ores = cv2.UMat.get(res) - t = threading.Thread(target=myNN.guessGesture, args=[mod, res]) - t.start() - elif visualize == True: - layer = int(input("Enter which layer to visualize ")) - cv2.waitKey(1) - myNN.visualizeLayers(mod, res, layer) - visualize = False - - return res - -# %% -# This is the new mask mode. It simply tries to remove the background content by taking a image of the -# background and subtracts it from the new frame contents of the ROI window. -# So in order to use it correctly, keep the contents of ROI window stable and without your hand in it -# and then press 'x' key. If you can see the contents of ROI window all blank then it means you are -# good to go for gesture prediction - - -def bkgrndSubMask(frame, x0, y0, width, height, framecount, plot): - global guessGesture, takebkgrndSubMask, visualize, mod, bkgrnd, lastgesture, saveImg - - cv2.rectangle(frame, (x0, y0), (x0+width, y0+height), (0, 255, 0), 1) - roi = frame[y0:y0+height, x0:x0+width] - #roi = cv2.UMat(frame[y0:y0+height, x0:x0+width]) - roi = cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY) - - # Take background image - if takebkgrndSubMask == True: - bkgrnd = roi - takebkgrndSubMask = False - print("Refreshing background image for mask...") - - # Take a diff between roi & bkgrnd image contents - diff = cv2.absdiff(roi, bkgrnd) - - _, diff = cv2.threshold(diff, 25, 255, cv2.THRESH_BINARY) - - mask = cv2.GaussianBlur(diff, (3, 3), 5) - mask = cv2.erode(diff, skinkernel, iterations=1) - mask = cv2.dilate(diff, skinkernel, iterations=1) - res = cv2.bitwise_and(roi, roi, mask=mask) - - if saveImg == True: - saveROIImg(res) - elif guessGesture == True and (framecount % 5) == 4: - t = threading.Thread(target=myNN.guessGesture, args=[mod, res]) - t.start() - # t.join() - # myNN.update(plot) - - elif visualize == True: - layer = int(input("Enter which layer to visualize ")) - cv2.waitKey(0) - myNN.visualizeLayers(mod, res, layer) - visualize = False - - return res - - -# %% -def Main(): - global guessGesture, visualize, mod, binaryMode, bkgrndSubMode, mask, takebkgrndSubMask, x0, y0, width, height, saveImg, gestname, path - quietMode = False - - font = cv2.FONT_HERSHEY_SIMPLEX - size = 0.5 - fx = 10 - fy = 350 - fh = 18 - - # Call CNN model loading callback - while True: - ans = int(input(banner)) - if ans == 2: - mod = myNN.loadCNN(-1) - myNN.trainModel(mod) - input("Press any key to continue") - break - elif ans == 1: - print("Will load default weight file") - mod = myNN.loadCNN(0) - break - elif ans == 3: - if not mod: - w = int(input("Which weight file to load (0 or 1)")) - mod = myNN.loadCNN(w) - else: - print("Will load default weight file") - - img = int(input("Image number ")) - layer = int(input("Enter which layer to visualize ")) - myNN.visualizeLayers(mod, img, layer) - input("Press any key to continue") - continue - - else: - print("Get out of here!!!") - return 0 - - # Grab camera input - cap = cv2.VideoCapture(0) - cv2.namedWindow('Original', cv2.WINDOW_NORMAL) - - # set rt size as 640x480 - ret = cap.set(3, 640) - ret = cap.set(4, 480) - - framecount = 0 - fps = "" - start = time.time() - - plot = np.zeros((512, 512, 3), np.uint8) - - while(True): - ret, frame = cap.read() - max_area = 0 - - frame = cv2.flip(frame, 3) - frame = cv2.resize(frame, (640, 480)) - - if ret == True: - if bkgrndSubMode == True: - roi = bkgrndSubMask(frame, x0, y0, width, - height, framecount, plot) - elif binaryMode == True: - roi = binaryMask(frame, x0, y0, width, - height, framecount, plot) - else: - roi = skinMask(frame, x0, y0, width, height, framecount, plot) - - framecount = framecount + 1 - end = time.time() - timediff = (end - start) - if(timediff >= 1): - #timediff = end - start - fps = 'FPS:%s' % (framecount) - start = time.time() - framecount = 0 - - cv2.putText(frame, fps, (10, 20), font, 0.7, (0, 255, 0), 2, 1) - cv2.putText(frame, 'Options:', (fx, fy), font, 0.7, (0, 255, 0), 2, 1) - cv2.putText(frame, 'b - Toggle Binary/SkinMask', - (fx, fy + fh), font, size, (0, 255, 0), 1, 1) - cv2.putText(frame, 'x - Toggle Background Sub Mask', - (fx, fy + 2*fh), font, size, (0, 255, 0), 1, 1) - cv2.putText(frame, 'g - Toggle Prediction Mode', - (fx, fy + 3*fh), font, size, (0, 255, 0), 1, 1) - cv2.putText(frame, 'q - Toggle Quiet Mode', - (fx, fy + 4*fh), font, size, (0, 255, 0), 1, 1) - cv2.putText(frame, 'n - To enter name of new gesture folder', - (fx, fy + 5*fh), font, size, (0, 255, 0), 1, 1) - cv2.putText(frame, 's - To start capturing new gestures for training', - (fx, fy + 6*fh), font, size, (0, 255, 0), 1, 1) - cv2.putText(frame, 'ESC - Exit', (fx, fy + 7*fh), - font, size, (0, 255, 0), 1, 1) - - # If enabled will stop updating the main openCV windows - # Way to reduce some processing power :) - if not quietMode: - cv2.imshow('Original', frame) - cv2.imshow('ROI', roi) - - if guessGesture == True: - plot = np.zeros((512, 512, 3), np.uint8) - plot = myNN.update(plot) - - cv2.imshow('Gesture Probability', plot) - #plot = np.zeros((512,512,3), np.uint8) - - ############## Keyboard inputs ################## - key = cv2.waitKey(5) & 0xff - - # Use Esc key to close the program - if key == 27: - break - - # Use b key to toggle between binary threshold or skinmask based filters - elif key == ord('b'): - binaryMode = not binaryMode - bkgrndSubMode = False - if binaryMode: - print("Binary Threshold filter active") - else: - print("SkinMask filter active") - - # Use g key to start gesture predictions via CNN - elif key == ord('x'): - takebkgrndSubMask = True - bkgrndSubMode = True - print("BkgrndSubMask filter active") - - # Use g key to start gesture predictions via CNN - elif key == ord('g'): - guessGesture = not guessGesture - print("Prediction Mode - {}".format(guessGesture)) - - # This option is not yet complete. So disabled for now - # Use v key to visualize layers - # elif key == ord('v'): - # visualize = True - - # Use i,j,k,l to adjust ROI window - elif key == ord('i'): - y0 = y0 - 5 - elif key == ord('k'): - y0 = y0 + 5 - elif key == ord('j'): - x0 = x0 - 5 - elif key == ord('l'): - x0 = x0 + 5 - - # Quiet mode to hide gesture window - elif key == ord('q'): - quietMode = not quietMode - print("Quiet Mode - {}".format(quietMode)) - - # Use s key to start/pause/resume taking snapshots - # numOfSamples controls number of snapshots to be taken PER gesture - elif key == ord('s'): - saveImg = not saveImg - - if gestname != '': - saveImg = True - else: - print("Enter a gesture group name first, by pressing 'n'") - saveImg = False - - # Use n key to enter gesture name - elif key == ord('n'): - gestname = input("Enter the gesture folder name: ") - try: - os.makedirs(gestname) - except OSError as e: - # if directory already present - if e.errno != 17: - print('Some issue while creating the directory named -' + gestname) - - path = "./"+gestname+"/" - - # elif key != 255: - # print key - - #Realse & destroy - cap.release() - cv2.destroyAllWindows() - - -if __name__ == "__main__": - Main() -from setuptools import setup -from setuptools import find_packages - - -setup(name='recurrentshop', - version='1.0.0', - description='Framework for building complex recurrent neural networks with Keras', - author='Fariz Rahman', - author_email='fariz@datalog.ai', - url='https://github.com/farizrahman4u/recurrentshop', - download_url='https://github.com/farizrahman4u/recurrentshop', - license='MIT', - install_requires=['keras'], - packages=find_packages()) -''' -Query Reduction Networks for Question Answering -Minjoon Seo | Sewon Min | Ali Farhadi | Hannaneh Hajishirzi - -https://arxiv.org/pdf/1606.04582.pdf - -Experiment run on BaBI task 1 -''' - - -import numpy as np -from recurrentshop import RecurrentModel -from keras.models import Model -from keras.layers import Activation, Dense, Embedding, Input, Lambda -from keras.layers import add, concatenate, multiply -from keras.layers.wrappers import Bidirectional -from keras.preprocessing.text import Tokenizer -from keras.preprocessing.sequence import pad_sequences -from keras.initializers import Constant -from keras import backend as K -import tarfile -import urllib -import os - -# Hyperparameters -batch_size = 20 -query_len = 4 -sentence_len = 7 -lines_per_story = 2 -embedding_dim = 50 -vocab_size = 34 - -tokenizer = Tokenizer() - - -def _download_data(path): - url = "http://www.thespermwhale.com/jaseweston/babi/tasks_1-20_v1-2.tar.gz" - directory = os.path.dirname(path) - if not os.path.exists(directory): - os.makedirs(directory) - file_path = os.path.join(directory, 'bAbI-Tasks-1-20.tar.gz') - urllib.urlretrieve(url, file_path) - with tarfile.open(file_path) as tarf: - tarf.extractall() - - -def load_data(): - recurrentshop_directory = os.path.expanduser('~') + '/.recurrentshop' - datasets_directory = recurrentshop_directory + '/datasets' - babi_path = os.path.join(recurrentshop_directory, - datasets_directory, 'tasks_1-20_v1-2') - if not os.path.exists(babi_path): - _download_data(babi_path) - - train_path = os.path.join( - babi_path, 'en', 'qa1_single-supporting-fact_train.txt') - test_path = os.path.join( - babi_path, 'en', 'qa1_single-supporting-fact_test.txt') - - def fetch_file(path): - with open(path, 'r') as f: - text = f.readlines() - tokenizer.fit_on_texts(text) - text = tokenizer.texts_to_sequences(text) - stories = [] - queries = [] - answers = [] - for i in range(0, len(text), 3): - story = np.append(pad_sequences([text[i][1:]], maxlen=sentence_len)[0], - pad_sequences([text[i + 1][1:]], maxlen=sentence_len)[0]) - stories.append(story) - queries.append(text[i + 2][:-2]) - answers.append(text[i + 2][-2]) - return (np.asarray(stories), np.asarray(queries), np.asarray(answers)) - - train_data = fetch_file(train_path) - test_data = fetch_file(test_path) - - return train_data, test_data - - -# Get positional encoder matrix -def get_PE_matrix(sentence_len, embedding_dim): - pe_matrix = np.zeros((embedding_dim, sentence_len), dtype='float32') - for k in range(embedding_dim): - for j in range(sentence_len): - pe_matrix[k][j] = (1 - float(j + 1) / float(sentence_len)) - float( - k + 1) / float(embedding_dim) * (1 - (2 * float(j + 1)) / float(embedding_dim)) - pe_matrix = np.expand_dims(pe_matrix.T, 0) - return pe_matrix - - -# -# Build QRN Cell -# -def QRNcell(): - xq = Input(batch_shape=(batch_size, embedding_dim * 2)) - # Split into context and query - xt = Lambda(lambda x, dim: x[:, :dim], arguments={'dim': embedding_dim}, - output_shape=lambda s: (s[0], s[1] / 2))(xq) - qt = Lambda(lambda x, dim: x[:, dim:], arguments={'dim': embedding_dim}, - output_shape=lambda s: (s[0], s[1] / 2))(xq) - - h_tm1 = Input(batch_shape=(batch_size, embedding_dim)) - - zt = Dense(1, activation='sigmoid', bias_initializer=Constant(2.5))( - multiply([xt, qt])) - zt = Lambda(lambda x, dim: K.repeat_elements(x, dim, axis=1), - arguments={'dim': embedding_dim})(zt) - ch = Dense(embedding_dim, activation='tanh')( - concatenate([xt, qt], axis=-1)) - rt = Dense(1, activation='sigmoid')(multiply([xt, qt])) - rt = Lambda(lambda x, dim: K.repeat_elements(x, dim, axis=1), - arguments={'dim': embedding_dim})(rt) - ht = add([multiply([zt, ch, rt]), multiply( - [Lambda(lambda x: 1 - x, output_shape=lambda s: s)(zt), h_tm1])]) - return RecurrentModel(input=xq, output=ht, initial_states=[h_tm1], final_states=[ht], return_sequences=True) - - -# -# Load data -# - -train_data, test_data = load_data() - -train_stories, train_queries, train_answers = train_data -valid_stories, valid_queries, valid_answers = test_data - -# -# Build Model -# - -stories = Input(batch_shape=(batch_size, lines_per_story * sentence_len)) -queries = Input(batch_shape=(batch_size, query_len)) - -story_PE_matrix = get_PE_matrix(sentence_len, embedding_dim) -query_PE_matrix = get_PE_matrix(query_len, embedding_dim) -QRN = Bidirectional(QRNcell(), merge_mode='sum') -embedding = Embedding(vocab_size + 1, embedding_dim) -m = embedding(stories) -m = Lambda(lambda x: K.reshape(x, (batch_size * lines_per_story, sentence_len, embedding_dim)), - output_shape=lambda s: (batch_size * lines_per_story, sentence_len, embedding_dim))(m) -# Add PE encoder matrix -m = Lambda(lambda x, const: x + np.tile(const, (batch_size * lines_per_story, 1, 1)), arguments={'const': story_PE_matrix}, - output_shape=lambda s: s)(m) -m = Lambda(lambda x: K.reshape(x, (batch_size, lines_per_story, sentence_len, embedding_dim)), - output_shape=lambda s: (batch_size, lines_per_story, sentence_len, embedding_dim))(m) -m = Lambda(lambda x: K.sum(x, axis=2), - output_shape=lambda s: (s[0], s[1], s[3]))(m) - -q = embedding(queries) -# Add PE encoder matrix -q = Lambda(lambda x, const: x + np.tile(const, (batch_size, 1, 1)), arguments={'const': query_PE_matrix}, - output_shape=lambda s: s)(q) -q = Lambda(lambda x: K.sum(x, axis=1, keepdims=True), - output_shape=lambda s: (s[0], 1, s[2]))(q) -q = Lambda(lambda x: K.tile(x, (1, lines_per_story, 1)), - output_shape=lambda s: (s[0], lines_per_story, s[2]))(q) -# Input to RecModel should be a single tensor -mq = concatenate([m, q]) -# Call the RecurrentModel -a = QRN(mq) -mq = concatenate([m, a]) -a = QRN(mq) -a = Lambda(lambda x: x[:, lines_per_story - 1, :], - output_shape=lambda s: (s[0], s[2]))(a) -a = Dense(vocab_size)(a) -a = Activation('softmax')(a) - -model = Model(inputs=[stories, queries], outputs=[a]) -model.compile(optimizer='adam', - loss='sparse_categorical_crossentropy', metrics=['accuracy']) -model.fit([train_stories, train_queries], train_answers, - batch_size=batch_size, - verbose=2, - epochs=100, - validation_data=([valid_stories, valid_queries], valid_answers)) -''' -Recurrent Highway Networks -------------------------------------------------------------------------------- -Julian Georg Zilly | Rupesh Kumar Srivastava | Jan Koutnik | Jurgen Schmidhuber -https://arxiv.org/abs/1607.03474 - -This is an implementation of language modeling experiments -on text8 dataset as specified in the paper - -Visit https://github.com/julian121266/RecurrentHighwayNetworks for -implementations using Tensorflow, Torch7 and Brainstorm frameworks -and other datasets -''' - -from recurrentshop import RecurrentModel -from recurrentshop.advanced_cells import RHNCell -from keras.models import Model -from keras.layers import Dense, Dropout, Input, Lambda -from keras.layers import add, multiply -from keras.layers import Activation, Embedding -from keras.constraints import max_norm -from keras.initializers import Constant, RandomUniform -from keras.regularizers import l2 -from keras.preprocessing.text import Tokenizer -from keras.callbacks import Callback -from keras import backend as K -import numpy as np -import os -import urllib -import zipfile - - -# -# Hyperparameters -# -batch_size = 128 -timesteps = 10 -learning_rate = 0.2 -hidden_dim = 10 -recurrence_depth = 10 -weight_decay = 1e-7 -lr_decay = 1.04 -gradient_clip = 10 -embedding_drop = 0.05 -output_drop = 0.3 -input_drop = 0.3 -hidden_drop = 0.05 -transform_bias = -4.0 -weight_init = RandomUniform(-0.04, 0.04) - - -def download_data(path): - print('Downloading data . . .') - url = "http://mattmahoney.net/dc/text8.zip" - directory = os.path.dirname(path) - if not os.path.exists(directory): - os.makedirs(directory) - urllib.urlretrieve(url, path) - with zipfile.ZipFile(path) as zf: - zf.extractall(path=path) - - -def load_text(): - recurrentshop_directory = os.path.expanduser('~') + '/.recurrentshop' - datasets_directory = recurrentshop_directory + '/datasets' - FILE_PATH = os.path.join(recurrentshop_directory, - datasets_directory, 'text8') - if not os.path.exists(FILE_PATH): - download_data(FILE_PATH) - raw_text = open(FILE_PATH, 'r').read(100000) - - tokenizer = Tokenizer(filters='', char_level=True, lower=False) - tokenizer.fit_on_texts(raw_text) - tokenized_text = tokenizer.texts_to_sequences(raw_text) - return tokenized_text, len(tokenizer.word_index) - - -tokenized_text, vocab_size = load_text() -embedding_dim = vocab_size # Size of character set - - -def generate_batch(text, batch_size, num_steps): - raw_data = np.squeeze(np.array(text, dtype=np.int32)) - data_len = len(raw_data) - batch_len = data_len // batch_size - data = np.zeros([batch_size, batch_len], dtype=np.int32) - for i in range(batch_size): - data[i] = raw_data[batch_len * i:batch_len * (i + 1)] - - epoch_size = (batch_len - 1) // num_steps - if epoch_size == 0: - raise ValueError("epoch_size == 0, decrease batch_size or num_steps") - - i = 0 - while i in range(epoch_size): - x = data[:, i*num_steps:(i+1)*num_steps] - y = data[:, (i+1)*num_steps] - if i + 1 >= epoch_size: - i = 0 - else: - i += 1 - yield (x, y) - - -def RHN(input_dim, hidden_dim, depth): - # Wrapped model - inp = Input(batch_shape=(batch_size, input_dim)) - state = Input(batch_shape=(batch_size, hidden_dim)) - drop_mask = Input(batch_shape=(batch_size, hidden_dim)) - # To avoid all zero mask causing gradient to vanish - inverted_drop_mask = Lambda( - lambda x: 1.0 - x, output_shape=lambda s: s)(drop_mask) - drop_mask_2 = Lambda( - lambda x: x + 0., output_shape=lambda s: s)(inverted_drop_mask) - dropped_state = multiply([state, inverted_drop_mask]) - y, new_state = RHNCell(units=hidden_dim, recurrence_depth=depth, - kernel_initializer=weight_init, - kernel_regularizer=l2(weight_decay), - kernel_constraint=max_norm(gradient_clip), - bias_initializer=Constant(transform_bias), - recurrent_initializer=weight_init, - recurrent_regularizer=l2(weight_decay), - recurrent_constraint=max_norm(gradient_clip))([inp, dropped_state]) - return RecurrentModel(input=inp, output=y, - initial_states=[state, drop_mask], - final_states=[new_state, drop_mask_2]) - - -# lr decay Scheduler -class lr_scheduler(Callback): - def on_epoch_begin(self, epoch, logs=None): - if epoch > 5: - lr = self.lr / 1.04 - K.set_value(self.model.optimizer.lr, lr) - -########################################### -# Build Model -########################################### - - -inp = Input(batch_shape=(batch_size, timesteps)) -x = Dropout(embedding_drop)(inp) -x = Embedding(vocab_size+1, embedding_dim, input_length=timesteps)(inp) -x = Dropout(input_drop)(x) - -# Create a dropout mask for variational dropout -drop_mask = Lambda(lambda x: x[:, 0, :1] * 0., - output_shape=lambda s: (s[0], 1))(x) - -drop_mask = Lambda(lambda x, dim: K.tile(x, (1, dim)), - arguments={'dim': hidden_dim}, - output_shape=(hidden_dim,))(drop_mask) -drop_mask = Lambda(K.ones_like, output_shape=lambda s: s)(drop_mask) -drop_mask = Dropout(hidden_drop)(drop_mask) -zero_init = Lambda(K.zeros_like, output_shape=lambda s: s)(drop_mask) - -x = RHN(embedding_dim, hidden_dim, recurrence_depth)( - x, initial_state=[zero_init, drop_mask]) -x = Dropout(output_drop)(x) -out = Dense(vocab_size+1, activation='softmax')(x) - -model = Model(inputs=[inp], outputs=[out]) - -model.compile(optimizer='adam', - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - -data_gen = generate_batch(tokenized_text, batch_size, timesteps) - -model.fit_generator(generator=data_gen, - steps_per_epoch=(len(tokenized_text)//batch_size), - epochs=5, - verbose=1, - callbacks=[lr_scheduler()]) -''' -Machine Learning on Sequential Data Using a Recurrent Weighted Average -by Jared Ostmeyer and Lindsay Cowell - -https://arxiv.org/abs/1703.01253 - -This is the implementation of 'Adding Problem' -mentioned in Section 3.5 of the paper -''' - - -import numpy as np -from recurrentshop import RecurrentModel -from keras.models import Model -from keras.layers import Dense, Activation, Lambda, Input -from keras.layers import add, concatenate, multiply -from keras import backend as K -from keras import initializers - - -''' -Training data - -The input sequence consists of two dimensions at each step. The first dimension -serves as an indicator marking the value to add while the second dimension is the -actual number to be added and is drawn at random from a uniform -distribution over [0, 1]. The target value is the sum of the two numbers -that has `1` in the first dimernsion. Only two steps in the entire -sequence will have an indicator of 1, leaving the indicator 0 everywhere else. -''' - - -def generate_data(num_samples, max_len): - values = np.random.normal(size=[num_samples, max_len, 1]) - mask = np.zeros([num_samples, max_len, 1]) - answers = np.zeros([num_samples, 1]) - - for i in range(num_samples): - j1, j2 = 0, 0 - while j1 == j2: - j1 = np.random.randint(max_len) - j2 = np.random.randint(max_len) - mask[i, (j1, j2)] = 1.0 - answers[i] = np.sum(values[i]*mask[i]) - data = np.concatenate((values, mask), 2) - return data, answers - - -##################################################################### -# RWA layer -##################################################################### - -def RWA(input_dim, output_dim): - x = Input((input_dim, )) - h_tm1 = Input((output_dim, )) - n_tm1 = Input((output_dim, )) - d_tm1 = Input((output_dim, )) - - x_h = concatenate([x, h_tm1]) - - u = Dense(output_dim)(x) - g = Dense(output_dim, activation='tanh')(x_h) - - a = Dense(output_dim, use_bias=False)(x_h) - e_a = Lambda(lambda x: K.exp(x))(a) - - z = multiply([u, g]) - nt = add([n_tm1, multiply([z, e_a])]) - dt = add([d_tm1, e_a]) - dt = Lambda(lambda x: 1.0 / x)(dt) - ht = multiply([nt, dt]) - ht = Activation('tanh')(ht) - - return RecurrentModel(input=x, output=ht, - initial_states=[h_tm1, n_tm1, d_tm1], - final_states=[ht, nt, dt], - state_initializer=[initializers.random_normal(stddev=1.0)]) - - -##################################################################### -# Settings -##################################################################### - -input_dim = 2 -output_dim = 250 -timesteps = 100 -batch_size = 100 -n_epochs = 10 - -#################################################################### -# Fetch datasets -#################################################################### -print('Generating train data') -train_data, train_labels = generate_data(num_samples=100000, max_len=timesteps) -print('Generating test data') -test_data, test_labels = generate_data(num_samples=10000, max_len=timesteps) - -#################################################################### -# Build and train model -#################################################################### - -inp = Input((timesteps, input_dim)) -out = RWA(input_dim, output_dim)(inp) -out = Dense(1)(out) -model = Model(inp, out) - -model.compile(loss='mse', optimizer='adam') -model.fit(train_data, train_labels, batch_size=batch_size, - epochs=n_epochs, validation_data=(test_data, test_labels)) -''' -Machine Learning on Sequential Data Using a Recurrent Weighted Average -by Jared Ostmeyer and Lindsay Cowell - -https://arxiv.org/abs/1703.01253 - -This is the implementation of 'Classifying by Sequence Length' -experiment mentioned in Section 3.3 of the paper -''' - - -import numpy as np -from recurrentshop import RecurrentModel -from keras.models import Model -from keras.layers import Dense, Activation, Lambda, Input -from keras.layers import add, concatenate, multiply -from keras import backend as K -from keras import initializers - - -''' -Training Data - -The length of each sequence is randomly drawn from a uniform distribution over -every possible length 0 to T, where T is the maximum possible length of -the sequence. Each step in the sequence is populated with a random number drawn -from a unit normal distribution. Sequences greater than length T /2 are -labeled with 1 while shorter sequences are labeled with 0. -''' - - -def generate_data(num_samples, max_len): - data = np.zeros([num_samples, max_len]) - labels = np.zeros([num_samples, 1]) - - for sample, label in zip(data, labels): - length = np.random.randint(0, max_len + 1) - n = np.random.normal(size=length) - sample[:length] += n - if length > max_len / 2: - label += 1 - - data = np.expand_dims(data, axis=-1) - return data, labels - - -##################################################################### -# RWA layer -##################################################################### - -def RWA(input_dim, output_dim): - x = Input((input_dim, )) - h_tm1 = Input((output_dim, )) - n_tm1 = Input((output_dim, )) - d_tm1 = Input((output_dim, )) - - x_h = concatenate([x, h_tm1]) - - u = Dense(output_dim)(x) - g = Dense(output_dim, activation='tanh')(x_h) - - a = Dense(output_dim, use_bias=False)(x_h) - e_a = Lambda(lambda x: K.exp(x))(a) - - z = multiply([u, g]) - nt = add([n_tm1, multiply([z, e_a])]) - dt = add([d_tm1, e_a]) - dt = Lambda(lambda x: 1.0 / x)(dt) - ht = multiply([nt, dt]) - ht = Activation('tanh')(ht) - - return RecurrentModel(input=x, output=ht, - initial_states=[h_tm1, n_tm1, d_tm1], - final_states=[ht, nt, dt], - state_initializer=[initializers.random_normal(stddev=1.0)]) - - -##################################################################### -# Settings -##################################################################### - -input_dim = 1 -output_dim = 250 -timesteps = 100 -batch_size = 100 -n_epochs = 5 - -#################################################################### -# Fetch datasets -#################################################################### - -train_data, train_labels = generate_data(num_samples=100000, max_len=timesteps) -test_data, test_labels = generate_data(num_samples=100, max_len=timesteps) - -#################################################################### -# Build and train model -#################################################################### - -inp = Input((timesteps, input_dim)) -out = RWA(input_dim, output_dim)(inp) -out = Dense(1, activation='sigmoid')(out) -model = Model(inp, out) - -model.compile(loss='binary_crossentropy', - optimizer='adam', metrics=['accuracy']) -model.fit(train_data, train_labels, batch_size=batch_size, - epochs=n_epochs, validation_data=(test_data, test_labels)) -from recurrentshop import* -from keras.layers import* -from keras.models import* -import numpy as np -import time -import sys - - -# Script for comparing performance of native keras and recurrentshop stacked RNN implementations -# We observe 20-30% speed ups on GPU - - -sys.setrecursionlimit(10000000) - -# Params - -rnn, rnn_cell = LSTM, LSTMCell -depth = 3 -input_length = 1000 -dim = 10 -nb_epoch = 5 -unroll = K.backend() == 'tensorflow' - -# Random data - -x = np.random.random((10, input_length, dim)) -y = np.random.random((10, dim)) - -# Native keras model - -model = Sequential() -for i in range(depth): - # We set consume_less = 'gpu' so that both models use the same LSTM implementation. - model.add(rnn(dim, return_sequences=i != depth-1, - input_shape=(input_length, dim), unroll=unroll, consume_less='gpu')) - -model.compile(loss='mse', optimizer='sgd') - -print('Compiling...') -model.train_on_batch(x[:1], y[:1]) # force compile - -start_time = time.time() -model.fit(x, y, nb_epoch=nb_epoch) -end_time = time.time() - -keras_time_taken = end_time - start_time - -# recurrentshop model - -rc = RecurrentContainer(input_length=input_length, unroll=unroll) -for _ in range(depth): - rc.add(rnn_cell(dim, input_dim=dim)) - -model = Sequential() -model.add(rc) - -model.compile(loss='mse', optimizer='sgd') - -print('Compiling...') -model.train_on_batch(x[:1], y[:1]) # force compile - -start_time = time.time() -model.fit(x, y, nb_epoch=nb_epoch) -end_time = time.time() - -recurrentshop_time_taken = end_time - start_time - -speed_up = keras_time_taken / recurrentshop_time_taken - - -print('Time taken by native keras model: ' + - str(int(keras_time_taken)) + ' seconds.') -print('Time taken by recurrentshop model: ' + - str(int(recurrentshop_time_taken)) + ' seconds.') -print('Speed up:' + str(speed_up) + 'X') -from .engine import * -from .cells import * -from keras.models import Model -from keras.layers import * -from cells import * - - -class RHNCell(ExtendedRNNCell): - - def __init__(self, units, recurrence_depth, **kwargs): - self.recurrence_depth = recurrence_depth - kwargs['units'] = units - super(RHNCell, self).__init__(**kwargs) - - def build_model(self, input_shape): - output_dim = self.output_dim - output_shape = (input_shape[0], output_dim) - x = Input(batch_shape=input_shape) - h_tm1 = Input(batch_shape=output_shape) - Rh = Dense(output_dim, - kernel_initializer=self.recurrent_initializer, - kernel_regularizer=self.recurrent_regularizer, - kernel_constraint=self.recurrent_constraint, - use_bias=self.use_bias, - bias_initializer=self.bias_initializer, - bias_regularizer=self.bias_regularizer, - bias_constraint=self.bias_constraint) - - Rt = Dense(output_dim, - kernel_initializer=self.recurrent_initializer, - kernel_regularizer=self.recurrent_regularizer, - kernel_constraint=self.recurrent_constraint, - use_bias=self.use_bias, - bias_initializer=self.bias_initializer, - bias_regularizer=self.bias_regularizer, - bias_constraint=self.bias_constraint) - - Wh = Dense(output_dim, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer, - kernel_constraint=self.kernel_constraint, - use_bias=self.use_bias, - bias_initializer=self.bias_initializer, - bias_regularizer=self.bias_regularizer, - bias_constraint=self.bias_constraint) - - Wt = Dense(output_dim, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer, - kernel_constraint=self.kernel_constraint, - use_bias=self.use_bias, - bias_initializer=self.bias_initializer, - bias_regularizer=self.bias_regularizer, - bias_constraint=self.bias_constraint) - - hl = add([Wh(x), Rh(h_tm1)]) - hl = Activation('tanh')(hl) - - tl = add([Wt(x), Rt(h_tm1)]) - tl = Activation('sigmoid')(tl) - - cl = Lambda(lambda x: 1.0 - x, output_shape=lambda s: s)(tl) - cl = Activation('sigmoid')(cl) - - ht = add([multiply([hl, tl]), multiply([h_tm1, cl])]) - - for _ in range(self.recurrence_depth - 1): - hli = Dense(output_dim, - activation='tanh', - kernel_initializer=self.recurrent_initializer, - kernel_regularizer=self.recurrent_regularizer, - kernel_constraint=self.recurrent_constraint, - use_bias=self.use_bias, - bias_initializer=self.bias_initializer, - bias_regularizer=self.bias_regularizer, - bias_constraint=self.bias_constraint)(ht) - tli = Dense(output_dim, - activation='sigmoid', - kernel_initializer=self.recurrent_initializer, - kernel_regularizer=self.recurrent_regularizer, - kernel_constraint=self.recurrent_constraint, - use_bias=self.use_bias, - bias_initializer=self.bias_initializer, - bias_regularizer=self.bias_regularizer, - bias_constraint=self.bias_constraint)(ht) - - cli = Lambda(lambda x: 1.0 - x, output_shape=lambda s: s)(tli) - cli = Activation('sigmoid')(cli) - ht = add([multiply([hli, tli]), multiply([ht, cli])]) - - return Model([x, h_tm1], [ht, Identity()(ht)]) -from keras.models import Model -from keras.layers import * -from .engine import RNNCell - - -''' -This is a more readable version of cells.py. -''' - - -class SimpleRNNCell(RNNCell): - - def build_model(self, input_shape): - output_dim = self.output_dim - output_shape = (input_shape[0], output_dim) - x = Input(batch_shape=input_shape) - h_tm1 = Input(batch_shape=output_shape) - h = add([Dense(output_dim)(x), Dense( - output_dim, use_bias=False)(h_tm1)]) - h = Activation('tanh')(h) - return Model([x, h_tm1], [h, h]) - - -class GRUCell(RNNCell): - - def build_model(self, input_shape): - output_dim = self.output_dim - input_dim = input_shape[-1] - output_shape = (input_shape[0], output_dim) - x = Input(batch_shape=input_shape) - h_tm1 = Input(batch_shape=output_shape) - z = add([Dense(output_dim)(x), Dense( - output_dim, use_bias=False)(h_tm1)]) - z = Activation('sigmoid')(z) - r = add([Dense(output_dim)(x), Dense( - output_dim, use_bias=False)(h_tm1)]) - r = Activation('sigmoid')(r) - h_prime = add([Dense(output_dim)(multiply([r, h_tm1])), - Dense(output_dim, use_bias=False)(x)]) - h_prime = Activation('tanh')(h_prime) - gate = Lambda(lambda x: x[0] * x[1] + (1. - x[0]) - * x[2], output_shape=lambda s: s[0]) - h = gate([z, h_prime, h_tm1]) - return Model([x, h_tm1], [h, h]) - - -class LSTMCell(RNNCell): - - def build_model(self, input_shape): - output_dim = self.output_dim - input_dim = input_shape[-1] - output_shape = (input_shape[0], output_dim) - x = Input(batch_shape=input_shape) - h_tm1 = Input(batch_shape=output_shape) - c_tm1 = Input(batch_shape=output_shape) - f = add([Dense(output_dim)(x), Dense( - output_dim, use_bias=False)(h_tm1)]) - f = Activation('sigmoid')(f) - i = add([Dense(output_dim)(x), Dense( - output_dim, use_bias=False)(h_tm1)]) - i = Activation('sigmoid')(i) - c_prime = add([Dense(output_dim)(x), Dense( - output_dim, use_bias=False)(h_tm1)]) - c_prime = Activation('tanh')(c_prime) - c = add([multiply([f, c_tm1]), multiply([i, c_prime])]) - c = Activation('tanh')(c) - o = add([Dense(output_dim)(x), Dense( - output_dim, use_bias=False)(h_tm1)]) - o = Activation('sigmoid')(o) - h = multiply([o, c]) - return Model([x, h_tm1, c_tm1], [h, h, c]) -from keras.models import Model -from keras import initializers -from keras import constraints -from keras import regularizers -from keras.layers import * -from .engine import RNNCell - - -def _slice(x, dim, index): - return x[:, index * dim: dim * (index + 1)] - - -def get_slices(x, n): - dim = int(K.int_shape(x)[1] / n) - return [Lambda(_slice, arguments={'dim': dim, 'index': i}, output_shape=lambda s: (s[0], dim))(x) for i in range(n)] - - -class Identity(Layer): - - def call(self, x): - return x + 0. - - -class ExtendedRNNCell(RNNCell): - - def __init__(self, units=None, - activation='tanh', - recurrent_activation='hard_sigmoid', - use_bias=True, - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - recurrent_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - recurrent_constraint=None, - bias_constraint=None, - **kwargs): - if units is None: - assert 'output_dim' in kwargs, 'Missing argument: units' - else: - kwargs['output_dim'] = units - self.activation = activations.get(activation) - self.recurrent_activation = activations.get(recurrent_activation) - self.use_bias = use_bias - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(recurrent_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(recurrent_constraint) - self.bias_constraint = constraints.get(bias_constraint) - super(ExtendedRNNCell, self).__init__(**kwargs) - - def get_config(self): - config = { - 'activation': activations.serialize(self.activation), - 'recurrent_activation': activations.serialize(self.recurrent_activation), - 'use_bias': self.use_bias, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'recurrent_initializer': initializers.serialize(self.recurrent_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'recurrent_regularizer': regularizers.serialize(self.recurrent_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'activity_regularizer': regularizers.serialize(self.activity_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'recurrent_constraint': constraints.serialize(self.recurrent_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint) - } - base_config = super(ExtendedRNNCell, self).get_config() - config.update(base_config) - return config - - -class SimpleRNNCell(ExtendedRNNCell): - - def build_model(self, input_shape): - output_dim = self.output_dim - output_shape = (input_shape[0], output_dim) - x = Input(batch_shape=input_shape) - h_tm1 = Input(batch_shape=output_shape) - kernel = Dense(output_dim, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer, - kernel_constraint=self.kernel_constraint, - use_bias=self.use_bias, - bias_initializer=self.bias_initializer, - bias_regularizer=self.bias_regularizer, - bias_constraint=self.bias_constraint) - recurrent_kernel = Dense(output_dim, - kernel_initializer=self.recurrent_initializer, - kernel_regularizer=self.recurrent_regularizer, - kernel_constraint=self.recurrent_constraint, - use_bias=False) - h = add([kernel(x), recurrent_kernel(h_tm1)]) - h = Activation(self.activation)(h) - return Model([x, h_tm1], [h, Identity()(h)]) - - -class GRUCell(ExtendedRNNCell): - - def build_model(self, input_shape): - output_dim = self.output_dim - input_dim = input_shape[-1] - output_shape = (input_shape[0], output_dim) - x = Input(batch_shape=input_shape) - h_tm1 = Input(batch_shape=output_shape) - kernel = Dense(output_dim * 3, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer, - kernel_constraint=self.kernel_constraint, - use_bias=self.use_bias, - bias_initializer=self.bias_initializer, - bias_regularizer=self.bias_regularizer, - bias_constraint=self.bias_constraint) - recurrent_kernel_1 = Dense(output_dim * 2, - kernel_initializer=self.recurrent_initializer, - kernel_regularizer=self.recurrent_regularizer, - kernel_constraint=self.recurrent_constraint, - use_bias=False) - recurrent_kernel_2 = Dense(output_dim, - kernel_initializer=self.recurrent_initializer, - kernel_regularizer=self.recurrent_regularizer, - kernel_constraint=self.recurrent_constraint, - use_bias=False) - kernel_out = kernel(x) - recurrent_kernel_1_out = recurrent_kernel_1(h_tm1) - x_z, x_r, x_h = get_slices(kernel_out, 3) - r_z, r_r = get_slices(recurrent_kernel_1_out, 2) - z = add([x_z, r_z]) - z = Activation(self.recurrent_activation)(z) # sigma_g - r = add([x_r, r_r]) - r = Activation(self.recurrent_activation)(r) # sigma_g - h_prime = add([recurrent_kernel_2(multiply([r, h_tm1])), x_h]) - h_prime = Activation(self.activation)(h_prime) # sigma_h - # h = z * h' + (1 - z) * h_tm1 - gate = Lambda(lambda x: x[0] * x[1] + (1. - x[0]) - * x[2], output_shape=lambda s: s[0]) - h = gate([z, h_prime, h_tm1]) - return Model([x, h_tm1], [h, Identity()(h)]) - - -class LSTMCell(ExtendedRNNCell): - - def build_model(self, input_shape): - output_dim = self.output_dim - input_dim = input_shape[-1] - output_shape = (input_shape[0], output_dim) - x = Input(batch_shape=input_shape) - h_tm1 = Input(batch_shape=output_shape) - c_tm1 = Input(batch_shape=output_shape) - kernel = Dense(output_dim * 4, - kernel_initializer=self.kernel_initializer, - kernel_regularizer=self.kernel_regularizer, - kernel_constraint=self.kernel_constraint, - use_bias=self.use_bias, - bias_initializer=self.bias_initializer, - bias_regularizer=self.bias_regularizer, - bias_constraint=self.bias_constraint) - recurrent_kernel = Dense(output_dim * 4, - kernel_initializer=self.recurrent_initializer, - kernel_regularizer=self.recurrent_regularizer, - kernel_constraint=self.recurrent_constraint, - use_bias=False) - kernel_out = kernel(x) - recurrent_kernel_out = recurrent_kernel(h_tm1) - x_f, x_i, x_o, x_c = get_slices(kernel_out, 4) - r_f, r_i, r_o, r_c = get_slices(recurrent_kernel_out, 4) - f = add([x_f, r_f]) - f = Activation(self.recurrent_activation)(f) - i = add([x_i, r_i]) - i = Activation(self.recurrent_activation)(i) - o = add([x_o, r_o]) - o = Activation(self.recurrent_activation)(o) - c_prime = add([x_c, r_c]) - c_prime = Activation(self.activation)(c_prime) - c = add([multiply([f, c_tm1]), multiply([i, c_prime])]) - c = Activation(self.activation)(c) - h = multiply([o, c]) - return Model([x, h_tm1, c_tm1], [h, Identity()(h), c]) -from keras.layers import * -from keras.models import Model -from keras import initializers -from .backend import rnn, learning_phase_scope -from .generic_utils import serialize_function, deserialize_function -from keras.engine.base_layer import Node, _collect_previous_mask, _collect_input_shape -import inspect - - -if K.backend() == 'tensorflow': - import tensorflow as tf - - -def _to_list(x): - if type(x) is not list: - x = [x] - return x - - -class _OptionalInputPlaceHolder(Layer): - - def __init__(self, name=None, **kwargs): - if not name: - prefix = 'optional_input_placeholder' - name = prefix + '_' + str(K.get_uid(prefix)) - kwargs['batch_input_shape'] = (2,) - super(_OptionalInputPlaceHolder, self).__init__(**kwargs) - self.tensor = K.zeros(shape=(2,)) - self.tensor._keras_shape = (2,) - self.tensor._uses_learning_phase = False - self.tensor._keras_history = (self, 0, 0) - Node(self, - inbound_layers=[], - node_indices=[], - tensor_indices=[], - input_tensors=[], - output_tensors=[self.tensor], - input_masks=[None], - output_masks=[None], - input_shapes=[], - output_shapes=[(2,)]) - self.build((2,)) - - def call(self, inputs=None): - return self.tensor - - -def _get_cells(): - from .cells import SimpleRNNCell, LSTMCell, GRUCell - cells = {} - cells['SimpleRNNCell'] = SimpleRNNCell - cells['LSTMCell'] = LSTMCell - cells['GRUCell'] = GRUCell - cells['_OptionalInputPlaceHolder'] = _OptionalInputPlaceHolder - return cells - - -def _is_rnn_cell(cell): - return issubclass(cell.__class__, RNNCell) - - -def _is_all_none(iterable_or_element): - if not isinstance(iterable_or_element, (list, tuple)): - iterable = [iterable_or_element] - else: - iterable = iterable_or_element - for element in iterable: - if element is not None: - return False - return True - - -def _get_cell_input_shape(cell): - if hasattr(cell, 'batch_input_shape'): - cell_input_shape = cell.batch_input_shape - elif hasattr(cell, 'input_shape'): - cell_input_shape = cell.input_shape - elif hasattr(cell, 'input_spec'): - if isinstance(cell.input_spec, list): - if hasattr(cell.input_spec[0], 'shape'): - cell_input_shape = cell.input_spec[0].shape - else: - cell_input_shape = None - else: - if hasattr(cell.input_spec, 'shape'): - cell_input_shape = cell.input_spec.shape - else: - cell_input_shape = None - else: - cell_input_shape = None - - if cell_input_shape is not None: - if set(map(type, list(set(cell_input_shape) - set([None])))) != set([int]): - cell_input_shape = cell_input_shape[0] - - return cell_input_shape - - -class RNNCell(Layer): - - def __init__(self, output_dim=None, **kwargs): - if 'input_shape' not in kwargs and 'input_dim' in kwargs: - kwargs['input_shape'] = (kwargs.pop('input_dim'),) - self.output_dim = output_dim - if 'batch_input_shape' in kwargs: - self.model = self.build_model(kwargs['batch_input_shape']) - elif 'input_shape' in kwargs: - self.model = self.build_model((None,) + kwargs['input_shape']) - if not hasattr(self, 'input_ndim'): - self.input_ndim = 2 - super(RNNCell, self).__init__(**kwargs) - - def build(self, input_shape): - if type(input_shape) is list: - self.input_spec = [InputSpec(shape=shape) for shape in input_shape] - self.model = self.build_model(input_shape[0]) - else: - self.model = self.build_model(input_shape) - self.input_spec = [InputSpec(shape=shape) - for shape in _to_list(self.model.input_shape)] - - def build_model(self, input_shape): - raise Exception(NotImplemented) - - @property - def num_states(self): - if hasattr(self, 'model'): - model = self.model - else: - # Don't judge. It was 3 in the morning. - model = self.build_model((None,) + (2,) * (self.input_ndim - 1)) - model_input = model.input - if type(model_input) is list: - return len(model_input[1:]) - else: - return 0 - - @property - def state_shape(self): - model_input = self.model.input - if type(model_input) is list: - if len(model_input) == 2: - return K.int_shape(model_input[1]) - else: - return list(map(K.int_shape, model_input[1:])) - else: - return None - - def compute_output_shape(self, input_shape): - model_inputs = self.model.input - if type(model_inputs) is list and type(input_shape) is not list: - input_shape = [input_shape] + \ - list(map(K.int_shape, self.model.input[1:])) - return self.model.compute_output_shape(input_shape) - - def call(self, inputs, learning=None): - return self.model.call(inputs) - - def get_layer(self, **kwargs): - input_shape = self.model.input_shape - if type(input_shape) is list: - state_shapes = input_shape[1:] - input_shape = input_shape[0] - else: - state_shapes = [] - input = Input(batch_shape=input_shape) - initial_states = [Input(batch_shape=shape) for shape in state_shapes] - output = self.model([input] + initial_states) - if type(output) is list: - final_states = output[1:] - output = output[0] - else: - final_states = [] - return RecurrentModel(input=input, output=output, initial_states=initial_states, final_states=final_states, **kwargs) - - @property - def updates(self): - return self.model.updates - - def add_update(self, updates, inputs=None): - self.model.add_update(updates, inputs) - - @property - def uses_learning_phase(self): - return self.model.uses_learning_phase - - @property - def _per_input_losses(self): - if hasattr(self, 'model'): - return getattr(self.model, '_per_input_losses', {}) - else: - return {} - - @_per_input_losses.setter - def _per_input_losses(self, val): - if hasattr(self, 'model'): - self.model._per_input_losses = val - - @property - def losses(self): - if hasattr(self, 'model'): - return self.model.losses - else: - return [] - - @losses.setter - def losses(self, val): - if hasattr(self, 'model'): - self.model.losses = val - - def add_loss(self, losses, inputs=None): - self.model.add_loss(losses, inputs) - - @property - def constraints(self): - return self.model.constraints - - @property - def trainable_weights(self): - return self.model.trainable_weights - - @property - def non_trainable_weights(self): - return self.model.non_trainable_weights - - def get_losses_for(self, inputs): - return self.model.get_losses_for(inputs) - - def get_updates_for(self, inputs): - return self.model.get_updates_for(inputs) - - def set_weights(self, weights): - self.model.set_weights(weights) - - def get_weights(self): - return self.model.get_weights() - - def get_config(self): - config = {'output_dim': self.output_dim} - base_config = super(RNNCell, self).get_config() - config.update(base_config) - return config - - def compute_mask(self, inputs, mask=None): - model_output = self.model.output - if type(model_output) is list: - return [None] * len(model_output) - else: - return None - - -class RNNCellFromModel(RNNCell): - - def __init__(self, model, **kwargs): - self.model = model - self.input_spec = [Input(batch_shape=shape) - for shape in _to_list(model.input_shape)] - self.build_model = lambda _: model - super(RNNCellFromModel, self).__init__( - batch_input_shape=model.input_shape, **kwargs) - - def get_config(self): - config = super(RNNCellFromModel, self).get_config() - if self.model is None: - config['model_config'] = None - else: - config['model_config'] = self.model.get_config() - return config - - @classmethod - def from_config(cls, config, custom_objects={}): - if type(custom_objects) is list: - custom_objects = {obj.__name__: obj for obj in custom_objects} - custom_objects.update(_get_cells()) - model_config = config.pop('model_config') - model = Model.from_config(model_config, custom_objects) - return cls(model, **config) - - -class RecurrentModel(Recurrent): - - # INITIALIZATION - - def __init__(self, input, output, initial_states=None, final_states=None, readout_input=None, teacher_force=False, decode=False, output_length=None, return_states=False, state_initializer=None, **kwargs): - inputs = [input] - outputs = [output] - state_spec = None - if initial_states is not None: - if type(initial_states) not in [list, tuple]: - initial_states = [initial_states] - state_spec = [InputSpec(shape=K.int_shape(state)) - for state in initial_states] - if final_states is None: - raise Exception('Missing argument : final_states') - else: - self.states = [None] * len(initial_states) - inputs += initial_states - else: - self.states = [] - state_spec = [] - - if final_states is not None: - if type(final_states) not in [list, tuple]: - final_states = [final_states] - assert len(initial_states) == len( - final_states), 'initial_states and final_states should have same number of tensors.' - if initial_states is None: - raise Exception('Missing argument : initial_states') - outputs += final_states - self.decode = decode - self.output_length = output_length - if decode: - if output_length is None: - raise Exception( - 'output_length should be specified for decoder') - kwargs['return_sequences'] = True - self.return_states = return_states - if readout_input is not None: - self.readout = True - state_spec += [Input(batch_shape=K.int_shape(outputs[0]))] - self.states += [None] - inputs += [readout_input] - else: - self.readout = False - if teacher_force and not self.readout: - raise Exception('Readout should be enabled for teacher forcing.') - self.teacher_force = teacher_force - self.model = Model(inputs, outputs) - super(RecurrentModel, self).__init__(**kwargs) - input_shape = list(K.int_shape(input)) - if not decode: - input_shape.insert(1, None) - self.input_spec = InputSpec(shape=tuple(input_shape)) - self.state_spec = state_spec - self._optional_input_placeholders = {} - if state_initializer: - if type(state_initializer) not in [list, tuple]: - state_initializer = [state_initializer] * self.num_states - else: - state_initializer += [None] * \ - (self.num_states - len(state_initializer)) - state_initializer = [initializers.get(init) if init else initializers.get( - 'zeros') for init in state_initializer] - self.state_initializer = state_initializer - - def build(self, input_shape): - if type(input_shape) is list: - input_shape = input_shape[0] - if not self.decode: - input_length = input_shape[1] - if input_length is not None: - input_shape = list(self.input_spec.shape) - input_shape[1] = input_length - input_shape = tuple(input_shape) - self.input_spec = InputSpec(shape=input_shape) - if type(self.model.input) is list: - model_input_shape = self.model.input_shape[0] - else: - model_input_shape = self.model.input_shape - if not self.decode: - input_shape = input_shape[:1] + input_shape[2:] - for i, j in zip(input_shape, model_input_shape): - if i is not None and j is not None and i != j: - raise Exception('Model expected input with shape ' + str(model_input_shape) + - '. Received input with shape ' + str(input_shape)) - if self.stateful: - self.reset_states() - self.built = True - - # STATES - - @property - def num_states(self): - model_input = self.model.input - if type(model_input) is list: - return len(model_input[1:]) - else: - return 0 - - def get_initial_state(self, inputs): - if type(self.model.input) is not list: - return [] - try: - batch_size = K.int_shape(inputs)[0] - except: - batch_size = None - state_shapes = list(map(K.int_shape, self.model.input[1:])) - states = [] - if self.readout: - state_shapes.pop() - # default value for initial_readout is handled in call() - for shape in state_shapes: - if None in shape[1:]: - raise Exception( - 'Only the batch dimension of a state can be left unspecified. Got state with shape ' + str(shape)) - if shape[0] is None: - ndim = K.ndim(inputs) - z = K.zeros_like(inputs) - slices = [slice(None)] + [0] * (ndim - 1) - z = z[slices] # (batch_size,) - state_ndim = len(shape) - z = K.reshape(z, (-1,) + (1,) * (state_ndim - 1)) - z = K.tile(z, (1,) + tuple(shape[1:])) - states.append(z) - else: - states.append(K.zeros(shape)) - state_initializer = self.state_initializer - if state_initializer: - # some initializers don't accept symbolic shapes - for i in range(len(state_shapes)): - if state_shapes[i][0] is None: - if hasattr(self, 'batch_size'): - state_shapes[i] = (self.batch_size,) + \ - state_shapes[i][1:] - if None in state_shapes[i]: - state_shapes[i] = K.shape(states[i]) - num_state_init = len(state_initializer) - num_state = self.num_states - assert num_state_init == num_state, 'RNN has ' + \ - str(num_state) + ' states, but was provided ' + \ - str(num_state_init) + ' state initializers.' - for i in range(len(states)): - init = state_initializer[i] - shape = state_shapes[i] - try: - if not isinstance(init, initializers.Zeros): - states[i] = init(shape) - except: - raise Exception('Seems the initializer ' + init.__class__.__name__ + ' does not support symbolic shapes(' + str( - shape) + '). Try providing the full input shape (include batch dimension) for you RecurrentModel.') - return states - - def reset_states(self, states_value=None): - if len(self.states) == 0: - return - if not self.stateful: - raise AttributeError('Layer must be stateful.') - if not hasattr(self, 'states') or self.states[0] is None: - state_shapes = list(map(K.int_shape, self.model.input[1:])) - self.states = list(map(K.zeros, state_shapes)) - - if states_value is not None: - if type(states_value) not in (list, tuple): - states_value = [states_value] * len(self.states) - assert len(states_value) == len(self.states), 'Your RNN has ' + str(len(self.states) - ) + ' states, but was provided ' + str(len(states_value)) + ' state values.' - if 'numpy' not in type(states_value[0]): - states_value = list(map(np.array, states_value)) - if states_value[0].shape == tuple(): - for state, val in zip(self.states, states_value): - K.set_value(state, K.get_value(state) * 0. + val) - else: - for state, val in zip(self.states, states_value): - K.set_value(state, val) - else: - if self.state_initializer: - for state, init in zip(self.states, self.state_initializer): - if isinstance(init, initializers.Zeros): - K.set_value(state, 0 * K.get_value(state)) - else: - K.set_value(state, K.eval( - init(K.get_value(state).shape))) - else: - for state in self.states: - K.set_value(state, 0 * K.get_value(state)) - - # EXECUTION - - def __call__(self, inputs, initial_state=None, initial_readout=None, ground_truth=None, **kwargs): - req_num_inputs = 1 + self.num_states - inputs = _to_list(inputs) - inputs = inputs[:] - if len(inputs) == 1: - if initial_state is not None: - if type(initial_state) is list: - inputs += initial_state - else: - inputs.append(initial_state) - else: - if self.readout: - initial_state = self._get_optional_input_placeholder( - 'initial_state', self.num_states - 1) - else: - initial_state = self._get_optional_input_placeholder( - 'initial_state', self.num_states) - inputs += _to_list(initial_state) - if self.readout: - if initial_readout is None: - initial_readout = self._get_optional_input_placeholder( - 'initial_readout') - inputs.append(initial_readout) - if self.teacher_force: - req_num_inputs += 1 - if ground_truth is None: - ground_truth = self._get_optional_input_placeholder( - 'ground_truth') - inputs.append(ground_truth) - assert len(inputs) == req_num_inputs, "Required " + \ - str(req_num_inputs) + " inputs, received " + str(len(inputs)) + "." - with K.name_scope(self.name): - if not self.built: - self.build(K.int_shape(inputs[0])) - if self._initial_weights is not None: - self.set_weights(self._initial_weights) - del self._initial_weights - self._initial_weights = None - previous_mask = _collect_previous_mask(inputs[:1]) - user_kwargs = kwargs.copy() - if not _is_all_none(previous_mask): - if 'mask' in inspect.getargspec(self.call).args: - if 'mask' not in kwargs: - kwargs['mask'] = previous_mask - input_shape = _collect_input_shape(inputs) - output = self.call(inputs, **kwargs) - output_mask = self.compute_mask(inputs[0], previous_mask) - output_shape = self.compute_output_shape(input_shape[0]) - self._add_inbound_node(input_tensors=inputs, output_tensors=output, - input_masks=previous_mask, output_masks=output_mask, - input_shapes=input_shape, output_shapes=output_shape, - arguments=user_kwargs) - if hasattr(self, 'activity_regularizer') and self.activity_regularizer is not None: - regularization_losses = [ - self.activity_regularizer(x) for x in _to_list(output)] - self.add_loss(regularization_losses, _to_list(inputs)) - return output - - def call(self, inputs, initial_state=None, initial_readout=None, ground_truth=None, mask=None, training=None): - # input shape: `(samples, time (padded with zeros), input_dim)` - # note that the .build() method of subclasses MUST define - # self.input_spec and self.state_spec with complete input shapes. - if type(mask) is list: - mask = mask[0] - if self.model is None: - raise Exception('Empty RecurrentModel.') - num_req_states = self.num_states - if self.readout: - num_actual_states = num_req_states - 1 - else: - num_actual_states = num_req_states - if type(inputs) is list: - inputs_list = inputs[:] - inputs = inputs_list.pop(0) - initial_states = inputs_list[:num_actual_states] - if len(initial_states) > 0: - if self._is_optional_input_placeholder(initial_states[0]): - initial_states = self.get_initial_state(inputs) - inputs_list = inputs_list[num_actual_states:] - if self.readout: - initial_readout = inputs_list.pop(0) - if self.teacher_force: - ground_truth = inputs_list.pop() - else: - if initial_state is not None: - if not isinstance(initial_state, (list, tuple)): - initial_states = [initial_state] - else: - initial_states = list(initial_state) - if self._is_optional_input_placeholder(initial_states[0]): - initial_states = self.get_initial_state(inputs) - - elif self.stateful: - initial_states = self.states - else: - initial_states = self.get_initial_state(inputs) - if self.readout: - if initial_readout is None or self._is_optional_input_placeholder(initial_readout): - output_shape = K.int_shape(_to_list((self.model.output))[0]) - output_ndim = len(output_shape) - input_ndim = K.ndim(inputs) - initial_readout = K.zeros_like(inputs) - slices = [slice(None)] + [0] * (input_ndim - 1) - initial_readout = initial_readout[slices] # (batch_size,) - initial_readout = K.reshape( - initial_readout, (-1,) + (1,) * (output_ndim - 1)) - initial_readout = K.tile( - initial_readout, (1,) + tuple(output_shape[1:])) - initial_states.append(initial_readout) - if self.teacher_force: - if ground_truth is None or self._is_optional_input_placeholder(ground_truth): - raise Exception( - 'ground_truth must be provided for RecurrentModel with teacher_force=True.') - if K.backend() == 'tensorflow': - with tf.control_dependencies(None): - counter = K.zeros((1,)) - else: - counter = K.zeros((1,)) - counter = K.cast(counter, 'int32') - initial_states.insert(-1, counter) - initial_states[-2] - initial_states.insert(-1, ground_truth) - num_req_states += 2 - if len(initial_states) != num_req_states: - raise ValueError('Layer requires ' + str(num_req_states) + - ' states but was passed ' + - str(len(initial_states)) + - ' initial states.') - input_shape = K.int_shape(inputs) - if self.unroll and input_shape[1] is None: - raise ValueError('Cannot unroll a RNN if the ' - 'time dimension is undefined. \n' - '- If using a Sequential model, ' - 'specify the time dimension by passing ' - 'an `input_shape` or `batch_input_shape` ' - 'argument to your first layer. If your ' - 'first layer is an Embedding, you can ' - 'also use the `input_length` argument.\n' - '- If using the functional API, specify ' - 'the time dimension by passing a `shape` ' - 'or `batch_shape` argument to your Input layer.') - preprocessed_input = self.preprocess_input(inputs, training=None) - constants = self.get_constants(inputs, training=None) - if self.decode: - initial_states.insert(0, inputs) - preprocessed_input = K.zeros((1, self.output_length, 1)) - input_length = self.output_length - else: - input_length = input_shape[1] - if self.uses_learning_phase: - with learning_phase_scope(0): - last_output_test, outputs_test, states_test, updates = rnn(self.step, - preprocessed_input, - initial_states, - go_backwards=self.go_backwards, - mask=mask, - constants=constants, - unroll=self.unroll, - input_length=input_length) - with learning_phase_scope(1): - last_output_train, outputs_train, states_train, updates = rnn(self.step, - preprocessed_input, - initial_states, - go_backwards=self.go_backwards, - mask=mask, - constants=constants, - unroll=self.unroll, - input_length=input_length) - - last_output = K.in_train_phase( - last_output_train, last_output_test, training=training) - outputs = K.in_train_phase( - outputs_train, outputs_test, training=training) - states = [] - for state_train, state_test in zip(states_train, states_test): - states.append(K.in_train_phase( - state_train, state_test, training=training)) - - else: - last_output, outputs, states, updates = rnn(self.step, - preprocessed_input, - initial_states, - go_backwards=self.go_backwards, - mask=mask, - constants=constants, - unroll=self.unroll, - input_length=input_length) - states = list(states) - if self.decode: - states.pop(0) - if self.readout: - states.pop() - if self.teacher_force: - states.pop() - states.pop() - if len(updates) > 0: - self.add_update(updates) - if self.stateful: - updates = [] - for i in range(len(states)): - updates.append((self.states[i], states[i])) - self.add_update(updates, inputs) - - # Properly set learning phase - if 0 < self.dropout + self.recurrent_dropout: - last_output._uses_learning_phase = True - outputs._uses_learning_phase = True - - if self.return_sequences: - y = outputs - else: - y = last_output - if self.return_states: - return [y] + states - else: - return y - - def step(self, inputs, states): - states = list(states) - if self.teacher_force: - readout = states.pop() - ground_truth = states.pop() - assert K.ndim(ground_truth) == 3, K.ndim(ground_truth) - counter = states.pop() - if K.backend() == 'tensorflow': - with tf.control_dependencies(None): - zero = K.cast(K.zeros((1,))[0], 'int32') - one = K.cast(K.zeros((1,))[0], 'int32') - else: - zero = K.cast(K.zeros((1,))[0], 'int32') - one = K.cast(K.zeros((1,))[0], 'int32') - slices = [slice(None), counter[0] - K.switch(counter[0], - one, zero)] + [slice(None)] * (K.ndim(ground_truth) - 2) - ground_truth_slice = ground_truth[slices] - readout = K.in_train_phase( - K.switch(counter[0], ground_truth_slice, readout), readout) - states.append(readout) - if self.decode: - model_input = states - else: - model_input = [inputs] + states - shapes = [] - for x in model_input: - if hasattr(x, '_keras_shape'): - shapes.append(x._keras_shape) - del x._keras_shape # Else keras internals will get messed up. - model_output = _to_list(self.model.call(model_input)) - for x, s in zip(model_input, shapes): - setattr(x, '_keras_shape', s) - if self.decode: - model_output.insert(1, model_input[0]) - for tensor in model_output: - tensor._uses_learning_phase = self.uses_learning_phase - states = model_output[1:] - output = model_output[0] - if self.readout: - states += [output] - if self.teacher_force: - states.insert(-1, counter + 1) - states.insert(-1, ground_truth) - return output, states - - # SHAPE, MASK, WEIGHTS - - def compute_output_shape(self, input_shape): - if not self.decode: - if type(input_shape) is list: - input_shape[0] = self._remove_time_dim(input_shape[0]) - else: - input_shape = self._remove_time_dim(input_shape) - input_shape = _to_list(input_shape) - input_shape = [input_shape[0]] + \ - [K.int_shape(state) for state in self.model.input[1:]] - output_shape = self.model.compute_output_shape(input_shape) - if type(output_shape) is list: - output_shape = output_shape[0] - if self.return_sequences: - if self.decode: - output_shape = output_shape[:1] + \ - (self.output_length,) + output_shape[1:] - else: - output_shape = output_shape[:1] + \ - (self.input_spec.shape[1],) + output_shape[1:] - if self.return_states and len(self.states) > 0: - output_shape = [output_shape] + \ - list(map(K.int_shape, self.model.output[1:])) - return output_shape - - def compute_mask(self, input, input_mask=None): - mask = input_mask[0] if type(input_mask) is list else input_mask - mask = mask if self.return_sequences else None - mask = [mask] + [None] * \ - len(self.states) if self.return_states else mask - return mask - - def set_weights(self, weights): - self.model.set_weights(weights) - - def get_weights(self): - return self.model.get_weights() - - # LAYER ATTRIBS - - @property - def updates(self): - return self.model.updates - - def add_update(self, updates, inputs=None): - self.model.add_update(updates, inputs) - - @property - def uses_learning_phase(self): - return self.teacher_force or self.model.uses_learning_phase - - @property - def _per_input_losses(self): - if hasattr(self, 'model'): - return getattr(self.model, '_per_input_losses', {}) - else: - return {} - - @_per_input_losses.setter - def _per_input_losses(self, val): - if hasattr(self, 'model'): - self.model._per_input_losses = val - - @property - def losses(self): - if hasattr(self, 'model'): - return self.model.losses - else: - return [] - - @losses.setter - def losses(self, val): - if hasattr(self, 'model'): - self.model.losses = val - - def add_loss(self, losses, inputs=None): - self.model.add_loss(losses, inputs) - - @property - def constraints(self): - return self.model.constraints - - @property - def trainable_weights(self): - return self.model.trainable_weights - - @property - def non_trainable_weights(self): - return self.model.non_trainable_weights - - def get_losses_for(self, inputs): - return self.model.get_losses_for(inputs) - - def get_updates_for(self, inputs): - return self.model.get_updates_for(inputs) - - def _remove_time_dim(self, shape): - return shape[:1] + shape[2:] - - # SERIALIZATION - - def _serialize_state_initializer(self): - si = self.state_initializer - if si is None: - return None - elif type(si) is list: - return list(map(initializers.serialize, si)) - else: - return initializers.serialize(si) - - def get_config(self): - config = {'model_config': self.model.get_config(), - 'decode': self.decode, - 'output_length': self.output_length, - 'return_states': self.return_states, - 'state_initializer': self._serialize_state_initializer() - } - base_config = super(RecurrentModel, self).get_config() - config.update(base_config) - return config - - @classmethod - def from_config(cls, config, custom_objects={}): - if type(custom_objects) is list: - custom_objects = {obj.__name__: obj for obj in custom_objects} - custom_objects.update(_get_cells()) - config = config.copy() - model_config = config.pop('model_config') - if model_config is None: - model = None - else: - model = Model.from_config(model_config, custom_objects) - if type(model.input) is list: - input = model.input[0] - initial_states = model.input[1:] - else: - input = model.input - initial_states = None - if type(model.output) is list: - output = model.output[0] - final_states = model.output[1:] - else: - output = model.output - final_states = None - return cls(input, output, initial_states, final_states, **config) - - def get_cell(self, **kwargs): - return RNNCellFromModel(self.model, **kwargs) - - def _get_optional_input_placeholder(self, name=None, num=1): - if name: - if name not in self._optional_input_placeholders: - if num > 1: - self._optional_input_placeholders[name] = [ - self._get_optional_input_placeholder() for _ in range(num)] - else: - self._optional_input_placeholders[name] = self._get_optional_input_placeholder( - ) - return self._optional_input_placeholders[name] - if num == 1: - optional_input_placeholder = _to_list( - _OptionalInputPlaceHolder()._inbound_nodes[0].output_tensors)[0] - assert self._is_optional_input_placeholder( - optional_input_placeholder) - return optional_input_placeholder - else: - y = [] - for _ in range(num): - optional_input_placeholder = _to_list( - _OptionalInputPlaceHolder()._inbound_nodes[0].output_tensors)[0] - assert self._is_optional_input_placeholder( - optional_input_placeholder) - y.append(optional_input_placeholder) - return y - - def _is_optional_input_placeholder(self, x): - if hasattr(x, '_keras_history'): - if isinstance(x._keras_history[0], _OptionalInputPlaceHolder): - return True - return False - - -class RecurrentSequential(RecurrentModel): - - def __init__(self, state_sync=False, decode=False, output_length=None, return_states=False, readout=False, readout_activation='linear', teacher_force=False, state_initializer=None, **kwargs): - self.state_sync = state_sync - self.cells = [] - if decode and output_length is None: - raise Exception('output_length should be specified for decoder') - self.decode = decode - self.output_length = output_length - if decode: - if output_length is None: - raise Exception( - 'output_length should be specified for decoder') - kwargs['return_sequences'] = True - self.return_states = return_states - super(RecurrentModel, self).__init__(**kwargs) - self.readout = readout - self.readout_activation = activations.get(readout_activation) - self.teacher_force = teacher_force - self._optional_input_placeholders = {} - if state_initializer: - if type(state_initializer) in [list, tuple]: - state_initializer = [initializers.get(init) if init else initializers.get( - 'zeros') for init in state_initializer] - else: - state_initializer = initializers.get(state_initializer) - self._state_initializer = state_initializer - - @property - def state_initializer(self): - if self._state_initializer is None: - return None - elif type(self._state_initializer) is list: - return self._state_initializer + [initializers.get('zeros')] * (self.num_states - len(self._state_initializer)) - else: - return [self._state_initializer] * self.num_states - - @state_initializer.setter - def state_initializer(self, value): - self._state_initializer = value - - @property - def num_states(self): - if hasattr(self, 'model'): - return super(RecurrentSequential, self).num_states - num = 0 - for cell in self.cells: - if _is_rnn_cell(cell): - num += cell.num_states - if self.state_sync: - break - if self.readout: - num += 1 - return num - - def add(self, cell): - self.cells.append(cell) - cell_input_shape = _get_cell_input_shape(cell) - if len(self.cells) == 1: - if len(self.cells) == 1: - if self.decode: - self.input_spec = InputSpec(shape=cell_input_shape) - else: - self.input_spec = InputSpec( - shape=cell_input_shape[:1] + (None,) + cell_input_shape[1:]) - - if cell_input_shape is not None: - cell_input_shape = cell.batch_input_shape - batch_size = cell_input_shape[0] - if batch_size is not None: - self.batch_size = batch_size - if not self.stateful: - self.states = [None] * self.num_states - - def build(self, input_shape): - if hasattr(self, 'model'): - del self.model - # Try and get batch size for initializer - if not hasattr(self, 'batch_size'): - if hasattr(self, 'batch_input_shape'): - batch_size = self.batch_input_shape[0] - if batch_size is not None: - self.batch_size = batch_size - if self.state_sync: - if type(input_shape) is list: - x_shape = input_shape[0] - if not self.decode: - input_length = x_shape.pop(1) - if input_length is not None: - shape = list(self.input_spec.shape) - shape[1] = input_length - self.input_spec = InputSpec(shape=tuple(shape)) - input = Input(batch_shape=x_shape) - initial_states = [Input(batch_shape=shape) - for shape in input_shape[1:]] - else: - if not self.decode: - input_length = input_shape[1] - if input_length is not None: - shape = list(self.input_spec.shape) - shape[1] = input_length - self.input_spec = InputSpec(shape=tuple(shape)) - input = Input( - batch_shape=input_shape[:1] + input_shape[2:]) - else: - input = Input(batch_shape=input_shape) - initial_states = [] - output = input - final_states = initial_states[:] - for cell in self.cells: - if _is_rnn_cell(cell): - if not initial_states: - cell.build(K.int_shape(output)) - initial_states = [Input(batch_shape=shape) - for shape in _to_list(cell.state_shape)] - final_states = initial_states[:] - cell_out = cell([output] + final_states) - if type(cell_out) is not list: - cell_out = [cell_out] - output = cell_out[0] - final_states = cell_out[1:] - else: - output = cell(output) - else: - if type(input_shape) is list: - x_shape = input_shape[0] - if not self.decode: - input_length = x_shape.pop(1) - if input_length is not None: - shape = list(self.input_spec.shape) - shape[1] = input_length - self.input_spec = InputSpec(shape=tuple(shape)) - input = Input(batch_shape=x_shape) - initial_states = [Input(batch_shape=shape) - for shape in input_shape[1:]] - output = input - final_states = [] - for cell in self.cells: - if _is_rnn_cell(cell): - cell_initial_states = initial_states[len( - final_states): len(final_states) + cell.num_states] - cell_in = [output] + cell_initial_states - cell_out = _to_list(cell(cell_in)) - output = cell_out[0] - final_states += cell_out[1:] - else: - output = cell(output) - else: - if not self.decode: - input_length = input_shape[1] - if input_length is not None: - shape = list(self.input_spec.shape) - shape[1] = input_length - self.input_spec = InputSpec(shape=tuple(shape)) - input = Input( - batch_shape=input_shape[:1] + input_shape[2:]) - else: - input = Input(batch_shape=input_shape) - output = input - initial_states = [] - final_states = [] - for cell in self.cells: - if _is_rnn_cell(cell): - cell.build(K.int_shape(output)) - state_inputs = [Input(batch_shape=shape) - for shape in _to_list(cell.state_shape)] - initial_states += state_inputs - cell_in = [output] + state_inputs - cell_out = _to_list(cell(cell_in)) - output = cell_out[0] - final_states += cell_out[1:] - else: - output = cell(output) - - self.model = Model([input] + initial_states, [output] + final_states) - self.states = [None] * len(initial_states) - if self.readout: - readout_input = Input(batch_shape=K.int_shape( - output), name='readout_input') - if self.readout_activation.__name__ == 'linear': - readout = Lambda(lambda x: x + 0., - output_shape=lambda s: s)(readout_input) - else: - readout = Activation(self.readout_activation)(readout_input) - input = Input(batch_shape=K.int_shape(input)) - if self.readout in [True, 'add']: - input_readout_merged = add([input, readout]) - elif self.readout in ['mul', 'multiply']: - input_readout_merged = multiply([input, readout]) - elif self.readout in ['avg', 'average']: - input_readout_merged = average([input, readout]) - elif self.readout in ['max', 'maximum']: - input_readout_merged = maximum([input, readout]) - elif self.readout == 'readout_only': - input_readout_merged = readout - initial_states = [Input(batch_shape=K.int_shape(s)) - for s in initial_states] - output = _to_list(self.model( - [input_readout_merged] + initial_states)) - final_states = output[1:] - output = output[0] - self.model = Model([input] + initial_states + - [readout_input], [output] + final_states) - self.states.append(None) - super(RecurrentSequential, self).build(input_shape) - - def get_config(self): - config = {'cells': list(map(serialize, self.cells)), - 'decode': self.decode, - 'output_length': self.output_length, - 'readout': self.readout, - 'teacher_force': self.teacher_force, - 'return_states': self.return_states, - 'state_sync': self.state_sync, - 'state_initializer': self._serialize_state_initializer(), - 'readout_activation': activations.serialize(self.readout_activation)} - base_config = super(RecurrentModel, self).get_config() - config.update(base_config) - return config - - @classmethod - def from_config(cls, config, custom_objects={}): - custom_objects.update(_get_cells()) - cells = config.pop('cells') - rs = cls(**config) - for cell_config in cells: - cell = deserialize(cell_config, custom_objects) - rs.add(cell) - return rs - - -# Legacy -RecurrentContainer = RecurrentSequential -import types -import marshal -# Function serialization / deserialixation - - -def func_dump(func): - '''Serialize user defined function.''' - - code = marshal.dumps(func.__code__).decode('raw_unicode_escape') - defaults = func.__defaults__ - if func.__closure__: - closure = tuple(c.cell_contents for c in func.__closure__) - else: - closure = None - return (code, defaults, closure) - - -def func_load( - code, - defaults=None, - closure=None, - globs=None, -): - '''Deserialize user defined function.''' - - if isinstance(code, (tuple, list)): # unpack previous dump - (code, defaults, closure) = code - code = marshal.loads(code.encode('raw_unicode_escape')) - if closure is not None: - closure = func_reconstruct_closure(closure) - if globs is None: - globs = globals() - return types.FunctionType(code, globs, name=code.co_name, - argdefs=defaults, closure=closure) - - -def func_reconstruct_closure(values): - '''Deserialization helper that reconstructs a closure.''' - - nums = range(len(values)) - src = ['def func(arg):'] - src += [' _%d = arg[%d]' % (n, n) for n in nums] - src += [' return lambda:(%s)' % ','.join(['_%d' % n for n in - nums]), ''] - src = '\n'.join(src) - try: - exec(src, globals()) - except: - raise SyntaxError(src) - return func(values).__closure__ - - -def serialize_function(func): - if isinstance(func, types.LambdaType): - function = func_dump(func) - function_type = 'lambda' - else: - function = func.__name__ - function_type = 'function' - return (function_type, function) - - -def deserialize_function(txt): - (function_type, function) = txt - if function_type == 'function': - return globals()[function] - else: - return func_load(function, globs=globals()) -from recurrentshop import RecurrentModel -from keras.layers import Input, Dense, add, Activation -from keras.models import Model -from keras.utils.test_utils import keras_test -import numpy as np - - -@keras_test -def test_model(): - x = Input((5,)) - h_tm1 = Input((10,)) - h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)]) - h = Activation('tanh')(h) - a = Input((7, 5)) - - rnn = RecurrentModel( - input=x, output=h, initial_states=h_tm1, final_states=h) - b = rnn(a) - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit(np.random.random((32, 7, 5)), np.random.random((32, 10))) - model.predict(np.zeros((32, 7, 5))) - - -@keras_test -def test_state_initializer(): - x = Input((5,)) - h_tm1 = Input((10,)) - h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)]) - h = Activation('tanh')(h) - a = Input((7, 5)) - - rnn = RecurrentModel(input=x, output=h, initial_states=h_tm1, - final_states=h, state_initializer='random_normal') - b = rnn(a) - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit(np.random.random((32, 7, 5)), np.random.random((32, 10))) - model.predict(np.zeros((32, 7, 5))) - - -@keras_test -def test_unroll(): - x = Input((5,)) - h_tm1 = Input((10,)) - h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)]) - h = Activation('tanh')(h) - a = Input((7, 5)) - - rnn = RecurrentModel( - input=x, output=h, initial_states=h_tm1, final_states=h, unroll=True) - b = rnn(a) - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit(np.random.random((32, 7, 5)), np.random.random((32, 10))) - model.predict(np.zeros((32, 7, 5))) - - -@keras_test -def test_decode(): - x = Input((5,)) - h_tm1 = Input((10,)) - h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)]) - h = Activation('tanh')(h) - - a = Input((5,)) - rnn = RecurrentModel(input=x, output=h, initial_states=h_tm1, - final_states=h, decode=True, output_length=7) - b = rnn(a) - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit(np.random.random((32, 5)), np.random.random((32, 7, 10))) - model.predict(np.zeros((32, 5))) - - -@keras_test -def test_readout(): - x = Input((5,)) - y_tm1 = Input((5,)) - h_tm1 = Input((5,)) - h = add([Dense(5)(add([x, y_tm1])), Dense(5, use_bias=False)(h_tm1)]) - h = Activation('tanh')(h) - - rnn = RecurrentModel(input=x, initial_states=h_tm1, - output=h, final_states=h, readout_input=y_tm1) - - a = Input((7, 5)) - b = rnn(a) - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit(np.random.random((32, 7, 5)), np.random.random((32, 5))) - model.predict(np.zeros((32, 7, 5))) -from recurrentshop import RecurrentSequential -from recurrentshop.cells import * -from recurrentshop.advanced_cells import * -from keras.models import Model -from keras.layers import Input -from keras.utils.test_utils import keras_test -import numpy as np - - -@keras_test -def test_sequential(): - rnn = RecurrentSequential() - rnn.add(LSTMCell(output_dim=7, input_dim=5)) - rnn.add(SimpleRNNCell(8)) - rnn.add(GRUCell(10)) - - a = Input((7, 5)) - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) - model.predict(np.random.random((12, 7, 5))) - - -@keras_test -def test_state_initializer(): - rnn = RecurrentSequential(state_initializer='random_normal') - rnn.add(LSTMCell(7, input_dim=5)) - rnn.add(SimpleRNNCell(8)) - rnn.add(GRUCell(10)) - - a = Input((7, 5)) - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) - model.predict(np.random.random((12, 7, 5))) - - -@keras_test -def test_state_initializer_as_list(): - rnn = RecurrentSequential( - state_initializer=['random_normal', 'glorot_uniform']) - rnn.add(LSTMCell(7, batch_input_shape=(12, 5))) - rnn.add(SimpleRNNCell(8)) - rnn.add(GRUCell(10)) - - a = Input((7, 5)) - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) - model.predict(np.random.random((12, 7, 5))) - - -@keras_test -def test_unroll(): - rnn = RecurrentSequential(unroll=True) - rnn.add(LSTMCell(7, input_dim=5)) - rnn.add(SimpleRNNCell(8)) - rnn.add(GRUCell(10)) - - a = Input((7, 5)) - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) - model.predict(np.random.random((12, 7, 5))) - - -@keras_test -def test_state_sync(): - rnn = RecurrentSequential(state_sync=True) - rnn.add(LSTMCell(10, input_dim=5)) - rnn.add(LSTMCell(10)) - rnn.add(LSTMCell(10)) - - a = Input((7, 5)) - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) - model.predict(np.random.random((12, 7, 5))) - - -@keras_test -def test_state_sync_unroll(): - rnn = RecurrentSequential(state_sync=True, unroll=True) - rnn.add(LSTMCell(10, input_dim=5)) - rnn.add(LSTMCell(10)) - rnn.add(LSTMCell(10)) - - a = Input((7, 5)) - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) - model.predict(np.random.random((12, 7, 5))) - - -# Decoders -@keras_test -def test_decode(): - a = Input((5,)) - - rnn = RecurrentSequential(decode=True, output_length=7) - rnn.add(LSTMCell(10, input_dim=5)) - rnn.add(LSTMCell(10)) - rnn.add(LSTMCell(10)) - - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 5))), np.random.random((12, 7, 10))) - model.predict(np.random.random((12, 5))) - - -@keras_test -def test_readout_state_sync(): - a = Input((5,)) - rnn = RecurrentSequential(state_sync=True, decode=True, output_length=7) - rnn.add(LSTMCell(10, input_dim=5)) - rnn.add(LSTMCell(10)) - rnn.add(LSTMCell(10)) - - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 5))), np.random.random((12, 7, 10))) - model.predict(np.random.random((12, 5))) - - -@keras_test -def test_decode_unroll(): - a = Input((5,)) - rnn = RecurrentSequential(decode=True, output_length=7, unroll=True) - rnn.add(LSTMCell(10, input_dim=5)) - rnn.add(LSTMCell(10)) - rnn.add(LSTMCell(10)) - - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 5))), np.random.random((12, 7, 10))) - model.predict(np.random.random((12, 5))) - - -@keras_test -def test_decode_unroll_state_sync(): - a = Input((5,)) - rnn = RecurrentSequential( - state_sync=True, decode=True, output_length=7, unroll=True) - rnn.add(LSTMCell(10, input_dim=5)) - rnn.add(LSTMCell(10)) - rnn.add(LSTMCell(10)) - - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 5))), np.random.random((12, 7, 10))) - model.predict(np.random.random((12, 5))) - - -# Readout -@keras_test -def test_readout(): - a = Input((7, 5)) - - rnn = RecurrentSequential(readout=True) - rnn.add(LSTMCell(7, input_dim=5)) - rnn.add(SimpleRNNCell(8)) - rnn.add(GRUCell(5)) - - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 5))) - model.predict(np.random.random((12, 7, 5))) - - -@keras_test -def test_readout_unroll(): - a = Input((7, 5)) - rnn = RecurrentSequential(readout=True, unroll=True) - rnn.add(LSTMCell(7, input_dim=5)) - rnn.add(SimpleRNNCell(8)) - rnn.add(GRUCell(5)) - - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 5))) - model.predict(np.random.random((12, 7, 5))) - - -@keras_test -def test_readout_state_sync(): - a = Input((7, 5)) - rnn = RecurrentSequential(readout=True, state_sync=True) - rnn.add(LSTMCell(5, input_dim=5)) - rnn.add(LSTMCell(5)) - rnn.add(LSTMCell(5)) - - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 5))) - model.predict(np.random.random((12, 7, 5))) - - -@keras_test -def test_readout_state_sync_unroll(): - a = Input((7, 5)) - rnn = RecurrentSequential(readout=True, state_sync=True, unroll=True) - rnn.add(LSTMCell(5, input_dim=5)) - rnn.add(LSTMCell(5)) - rnn.add(LSTMCell(5)) - - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 5))) - model.predict(np.random.random((12, 7, 5))) - - -# Decoder + readout -@keras_test -def test_decoder_readout(): - a = Input((5,)) - - rnn = RecurrentSequential(decode=True, output_length=7, readout=True) - rnn.add(LSTMCell(5, input_dim=5)) - rnn.add(LSTMCell(5)) - rnn.add(LSTMCell(5)) - - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 5))), np.random.random((12, 7, 5))) - model.predict(np.random.random((12, 5))) - -# teacher forcing - - -@keras_test -def test_teacher_force(): - a = Input((7, 5)) - - rnn = RecurrentSequential(readout=True, teacher_force=True) - rnn.add(LSTMCell(7, input_dim=5)) - rnn.add(SimpleRNNCell(8)) - rnn.add(GRUCell(5)) - - ground_truth = Input((7, 5)) - - b = rnn(a, ground_truth=ground_truth) - - model = Model([a, ground_truth], b) - - model.compile(loss='mse', optimizer='sgd') - model.fit([np.random.random((12, 7, 5)), np.random.random( - (12, 7, 5))], np.random.random((12, 5))) - model.predict([np.random.random((12, 7, 5))] * 2) - - -@keras_test -def test_serialisation(): - rnn = RecurrentSequential() - rnn.add(LSTMCell(output_dim=7, input_dim=5)) - rnn.add(SimpleRNNCell(8)) - rnn.add(GRUCell(10)) - - rnn_config = rnn.get_config() - recovered_rnn = RecurrentSequential.from_config(rnn_config) - - a = Input((7, 5)) - b = recovered_rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) - model.predict(np.random.random((12, 7, 5))) - - -@keras_test -def test_advanced_cells(): - rnn = RecurrentSequential() - rnn.add(RHNCell(10, recurrence_depth=2, input_dim=5)) - - a = Input((7, 5)) - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((12, 7, 5))), np.random.random((12, 10))) - model.predict(np.random.random((12, 7, 5))) -from recurrentshop import RecurrentSequential, RNNCellFromModel -from keras.models import Model -from keras.layers import add, Activation, Dense, Input -from keras.utils.test_utils import keras_test -import numpy as np - - -@keras_test -def test_rnn_cell_from_model(): - x = Input((5,)) - h_tm1 = Input((10,)) - h = add([Dense(10)(x), Dense(10, use_bias=False)(h_tm1)]) - h = Activation('tanh')(h) - - cell_model = Model([x, h_tm1], [h, h]) - - rnn_cell = RNNCellFromModel(cell_model) - - rnn = RecurrentSequential() - rnn.add(rnn_cell) - - a = Input((7, 5)) - b = rnn(a) - - model = Model(a, b) - - model.compile(loss='mse', optimizer='sgd') - model.fit((np.random.random((32, 7, 5))), np.random.random((32, 10))) - model.predict(np.zeros((32, 7, 5))) -import keras.backend as K - - -if K.backend() == 'tensorflow': - from .tensorflow_backend import * - rnn = lambda *args, **kwargs: K.rnn(*args, **kwargs) + ([],) -elif K.backend() == 'theano': - from .theano_backend import * -else: - raise Exception(K.backend() + ' backend is not supported.') -from keras.backend import tensorflow_backend as K -import tensorflow as tf - - -class learning_phase_scope(object): - - def __init__(self, value): - self.value = value - - def __enter__(self): - self.learning_phase_placeholder = K.learning_phase() - K.set_learning_phase(self.value) - - def __exit__(self, *args): - K._GRAPH_LEARNING_PHASES[tf.get_default_graph( - )] = self.learning_phase_placeholder -import numpy as np -import math -from keras.initializations import normal, identity -from keras.models import model_from_json -from keras.models import Sequential, Model -from keras.engine.training import collect_trainable_weights -from keras.layers import Dense, Flatten, Input, merge, Lambda -from keras.optimizers import Adam -import tensorflow as tf -import keras.backend as K - -HIDDEN1_UNITS = 300 -HIDDEN2_UNITS = 600 - - -class ActorNetwork(object): - def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE): - self.sess = sess - self.BATCH_SIZE = BATCH_SIZE - self.TAU = TAU - self.LEARNING_RATE = LEARNING_RATE - - K.set_session(sess) - - # Now create the model - self.model, self.weights, self.state = self.create_actor_network( - state_size, action_size) - self.target_model, self.target_weights, self.target_state = self.create_actor_network( - state_size, action_size) - self.action_gradient = tf.placeholder(tf.float32, [None, action_size]) - self.params_grad = tf.gradients( - self.model.output, self.weights, -self.action_gradient) - grads = zip(self.params_grad, self.weights) - self.optimize = tf.train.AdamOptimizer( - LEARNING_RATE).apply_gradients(grads) - self.sess.run(tf.initialize_all_variables()) - - def train(self, states, action_grads): - self.sess.run(self.optimize, feed_dict={ - self.state: states, - self.action_gradient: action_grads - }) - - def target_train(self): - actor_weights = self.model.get_weights() - actor_target_weights = self.target_model.get_weights() - for i in xrange(len(actor_weights)): - actor_target_weights[i] = self.TAU * actor_weights[i] + \ - (1 - self.TAU) * actor_target_weights[i] - self.target_model.set_weights(actor_target_weights) - - def create_actor_network(self, state_size, action_dim): - print("Now we build the model") - S = Input(shape=[state_size]) - h0 = Dense(HIDDEN1_UNITS, activation='relu')(S) - h1 = Dense(HIDDEN2_UNITS, activation='relu')(h0) - Steering = Dense(1, activation='tanh', init=lambda shape, - name: normal(shape, scale=1e-4, name=name))(h1) - Acceleration = Dense(1, activation='sigmoid', init=lambda shape, name: normal( - shape, scale=1e-4, name=name))(h1) - Brake = Dense(1, activation='sigmoid', init=lambda shape, - name: normal(shape, scale=1e-4, name=name))(h1) - V = merge([Steering, Acceleration, Brake], mode='concat') - model = Model(input=S, output=V) - return model, model.trainable_weights, S -import numpy as np -import math -from keras.initializations import normal, identity -from keras.models import model_from_json, load_model -from keras.engine.training import collect_trainable_weights -from keras.models import Sequential -from keras.layers import Dense, Flatten, Input, merge, Lambda, Activation -from keras.models import Sequential, Model -from keras.optimizers import Adam -import keras.backend as K -import tensorflow as tf - -HIDDEN1_UNITS = 300 -HIDDEN2_UNITS = 600 - - -class CriticNetwork(object): - def __init__(self, sess, state_size, action_size, BATCH_SIZE, TAU, LEARNING_RATE): - self.sess = sess - self.BATCH_SIZE = BATCH_SIZE - self.TAU = TAU - self.LEARNING_RATE = LEARNING_RATE - self.action_size = action_size - - K.set_session(sess) - - # Now create the model - self.model, self.action, self.state = self.create_critic_network( - state_size, action_size) - self.target_model, self.target_action, self.target_state = self.create_critic_network( - state_size, action_size) - self.action_grads = tf.gradients( - self.model.output, self.action) # GRADIENTS for policy update - self.sess.run(tf.initialize_all_variables()) - - def gradients(self, states, actions): - return self.sess.run(self.action_grads, feed_dict={ - self.state: states, - self.action: actions - })[0] - - def target_train(self): - critic_weights = self.model.get_weights() - critic_target_weights = self.target_model.get_weights() - for i in xrange(len(critic_weights)): - critic_target_weights[i] = self.TAU * critic_weights[i] + \ - (1 - self.TAU) * critic_target_weights[i] - self.target_model.set_weights(critic_target_weights) - - def create_critic_network(self, state_size, action_dim): - print("Now we build the model") - S = Input(shape=[state_size]) - A = Input(shape=[action_dim], name='action2') - w1 = Dense(HIDDEN1_UNITS, activation='relu')(S) - a1 = Dense(HIDDEN2_UNITS, activation='linear')(A) - h1 = Dense(HIDDEN2_UNITS, activation='linear')(w1) - h2 = merge([h1, a1], mode='sum') - h3 = Dense(HIDDEN2_UNITS, activation='relu')(h2) - V = Dense(action_dim, activation='linear')(h3) - model = Model(input=[S, A], output=V) - adam = Adam(lr=self.LEARNING_RATE) - model.compile(loss='mse', optimizer=adam) - return model, A, S -import random -import numpy as np - - -class OU(object): - - def function(self, x, mu, theta, sigma): - return theta * (mu - x) + sigma * np.random.randn(1) -from collections import deque -import random - - -class ReplayBuffer(object): - - def __init__(self, buffer_size): - self.buffer_size = buffer_size - self.num_experiences = 0 - self.buffer = deque() - - def getBatch(self, batch_size): - # Randomly sample batch_size examples - if self.num_experiences < batch_size: - return random.sample(self.buffer, self.num_experiences) - else: - return random.sample(self.buffer, batch_size) - - def size(self): - return self.buffer_size - - def add(self, state, action, reward, new_state, done): - experience = (state, action, reward, new_state, done) - if self.num_experiences < self.buffer_size: - self.buffer.append(experience) - self.num_experiences += 1 - else: - self.buffer.popleft() - self.buffer.append(experience) - - def count(self): - # if buffer is full, return buffer size - # otherwise, return experience counter - return self.num_experiences - - def erase(self): - self.buffer = deque() - self.num_experiences = 0 -from gym_torcs import TorcsEnv -import numpy as np -import random -import argparse -from keras.models import model_from_json, Model -from keras.models import Sequential -from keras.layers.core import Dense, Dropout, Activation, Flatten -from keras.optimizers import Adam -import tensorflow as tf -from keras.engine.training import collect_trainable_weights -import json - -from ReplayBuffer import ReplayBuffer -from ActorNetwork import ActorNetwork -from CriticNetwork import CriticNetwork -from OU import OU -import timeit - -OU = OU() # Ornstein-Uhlenbeck Process - - -def playGame(train_indicator=0): # 1 means Train, 0 means simply Run - BUFFER_SIZE = 100000 - BATCH_SIZE = 32 - GAMMA = 0.99 - TAU = 0.001 # Target Network HyperParameters - LRA = 0.0001 # Learning rate for Actor - LRC = 0.001 # Lerning rate for Critic - - action_dim = 3 # Steering/Acceleration/Brake - state_dim = 29 # of sensors input - - np.random.seed(1337) - - vision = False - - EXPLORE = 100000. - episode_count = 2000 - max_steps = 100000 - reward = 0 - done = False - step = 0 - epsilon = 1 - indicator = 0 - - # Tensorflow GPU optimization - config = tf.ConfigProto() - config.gpu_options.allow_growth = True - sess = tf.Session(config=config) - from keras import backend as K - K.set_session(sess) - - actor = ActorNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRA) - critic = CriticNetwork(sess, state_dim, action_dim, BATCH_SIZE, TAU, LRC) - buff = ReplayBuffer(BUFFER_SIZE) # Create replay buffer - - # Generate a Torcs environment - env = TorcsEnv(vision=vision, throttle=True, gear_change=False) - - # Now load the weight - print("Now we load the weight") - try: - actor.model.load_weights("actormodel.h5") - critic.model.load_weights("criticmodel.h5") - actor.target_model.load_weights("actormodel.h5") - critic.target_model.load_weights("criticmodel.h5") - print("Weight load successfully") - except: - print("Cannot find the weight") - - print("TORCS Experiment Start.") - for i in range(episode_count): - - print("Episode : " + str(i) + " Replay Buffer " + str(buff.count())) - - if np.mod(i, 3) == 0: - # relaunch TORCS every 3 episode because of the memory leak error - ob = env.reset(relaunch=True) - else: - ob = env.reset() - - s_t = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, - ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) - - total_reward = 0. - for j in range(max_steps): - loss = 0 - epsilon -= 1.0 / EXPLORE - a_t = np.zeros([1, action_dim]) - noise_t = np.zeros([1, action_dim]) - - a_t_original = actor.model.predict(s_t.reshape(1, s_t.shape[0])) - noise_t[0][0] = train_indicator * \ - max(epsilon, 0) * \ - OU.function(a_t_original[0][0], 0.0, 0.60, 0.30) - noise_t[0][1] = train_indicator * \ - max(epsilon, 0) * \ - OU.function(a_t_original[0][1], 0.5, 1.00, 0.10) - noise_t[0][2] = train_indicator * \ - max(epsilon, 0) * \ - OU.function(a_t_original[0][2], -0.1, 1.00, 0.05) - - # The following code do the stochastic brake - # if random.random() <= 0.1: - # print("********Now we apply the brake***********") - # noise_t[0][2] = train_indicator * max(epsilon, 0) * OU.function(a_t_original[0][2], 0.2 , 1.00, 0.10) - - a_t[0][0] = a_t_original[0][0] + noise_t[0][0] - a_t[0][1] = a_t_original[0][1] + noise_t[0][1] - a_t[0][2] = a_t_original[0][2] + noise_t[0][2] - - ob, r_t, done, info = env.step(a_t[0]) - - s_t1 = np.hstack((ob.angle, ob.track, ob.trackPos, ob.speedX, - ob.speedY, ob.speedZ, ob.wheelSpinVel/100.0, ob.rpm)) - - buff.add(s_t, a_t[0], r_t, s_t1, done) # Add replay buffer - - # Do the batch update - batch = buff.getBatch(BATCH_SIZE) - states = np.asarray([e[0] for e in batch]) - actions = np.asarray([e[1] for e in batch]) - rewards = np.asarray([e[2] for e in batch]) - new_states = np.asarray([e[3] for e in batch]) - dones = np.asarray([e[4] for e in batch]) - y_t = np.asarray([e[1] for e in batch]) - - target_q_values = critic.target_model.predict( - [new_states, actor.target_model.predict(new_states)]) - - for k in range(len(batch)): - if dones[k]: - y_t[k] = rewards[k] - else: - y_t[k] = rewards[k] + GAMMA*target_q_values[k] - - if (train_indicator): - loss += critic.model.train_on_batch([states, actions], y_t) - a_for_grad = actor.model.predict(states) - grads = critic.gradients(states, a_for_grad) - actor.train(states, grads) - actor.target_train() - critic.target_train() - - total_reward += r_t - s_t = s_t1 - - print("Episode", i, "Step", step, "Action", - a_t, "Reward", r_t, "Loss", loss) - - step += 1 - if done: - break - - if np.mod(i, 3) == 0: - if (train_indicator): - print("Now we save model") - actor.model.save_weights("actormodel.h5", overwrite=True) - with open("actormodel.json", "w") as outfile: - json.dump(actor.model.to_json(), outfile) - - critic.model.save_weights("criticmodel.h5", overwrite=True) - with open("criticmodel.json", "w") as outfile: - json.dump(critic.model.to_json(), outfile) - - print("TOTAL REWARD @ " + str(i) + - "-th Episode : Reward " + str(total_reward)) - print("Total Step: " + str(step)) - print("") - - env.end() # This is for shutting down TORCS - print("Finish.") - - -if __name__ == "__main__": - playGame() -import gym -from gym import spaces -import numpy as np -# from os import path -import snakeoil3_gym as snakeoil3 -import numpy as np -import copy -import collections as col -import os -import time - - -class TorcsEnv: - terminal_judge_start = 100 # If after 100 timestep still no progress, terminated - # [km/h], episode terminates if car is running slower than this limit - termination_limit_progress = 5 - default_speed = 50 - - initial_reset = True - - def __init__(self, vision=False, throttle=False, gear_change=False): - self.vision = vision - self.throttle = throttle - self.gear_change = gear_change - - self.initial_run = True - - ##print("launch torcs") - os.system('pkill torcs') - time.sleep(0.5) - if self.vision is True: - os.system('torcs -nofuel -nodamage -nolaptime -vision &') - else: - os.system('torcs -nofuel -nolaptime &') - time.sleep(0.5) - os.system('sh autostart.sh') - time.sleep(0.5) - - """ - # Modify here if you use multiple tracks in the environment - self.client = snakeoil3.Client(p=3101, vision=self.vision) # Open new UDP in vtorcs - self.client.MAX_STEPS = np.inf - - client = self.client - client.get_servers_input() # Get the initial input from torcs - - obs = client.S.d # Get the current full-observation from torcs - """ - if throttle is False: - self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(1,)) - else: - self.action_space = spaces.Box(low=-1.0, high=1.0, shape=(2,)) - - if vision is False: - high = np.array( - [1., np.inf, np.inf, np.inf, 1., np.inf, 1., np.inf]) - low = np.array([0., -np.inf, -np.inf, -np.inf, - 0., -np.inf, 0., -np.inf]) - self.observation_space = spaces.Box(low=low, high=high) - else: - high = np.array([1., np.inf, np.inf, np.inf, - 1., np.inf, 1., np.inf, 255]) - low = np.array([0., -np.inf, -np.inf, -np.inf, - 0., -np.inf, 0., -np.inf, 0]) - self.observation_space = spaces.Box(low=low, high=high) - - def step(self, u): - # print("Step") - # convert thisAction to the actual torcs actionstr - client = self.client - - this_action = self.agent_to_torcs(u) - - # Apply Action - action_torcs = client.R.d - - # Steering - action_torcs['steer'] = this_action['steer'] # in [-1, 1] - - # Simple Autnmatic Throttle Control by Snakeoil - if self.throttle is False: - target_speed = self.default_speed - if client.S.d['speedX'] < target_speed - (client.R.d['steer']*50): - client.R.d['accel'] += .01 - else: - client.R.d['accel'] -= .01 - - if client.R.d['accel'] > 0.2: - client.R.d['accel'] = 0.2 - - if client.S.d['speedX'] < 10: - client.R.d['accel'] += 1/(client.S.d['speedX']+.1) - - # Traction Control System - if ((client.S.d['wheelSpinVel'][2]+client.S.d['wheelSpinVel'][3]) - - (client.S.d['wheelSpinVel'][0]+client.S.d['wheelSpinVel'][1]) > 5): - action_torcs['accel'] -= .2 - else: - action_torcs['accel'] = this_action['accel'] - action_torcs['brake'] = this_action['brake'] - - # Automatic Gear Change by Snakeoil - if self.gear_change is True: - action_torcs['gear'] = this_action['gear'] - else: - # Automatic Gear Change by Snakeoil is possible - action_torcs['gear'] = 1 - if self.throttle: - if client.S.d['speedX'] > 50: - action_torcs['gear'] = 2 - if client.S.d['speedX'] > 80: - action_torcs['gear'] = 3 - if client.S.d['speedX'] > 110: - action_torcs['gear'] = 4 - if client.S.d['speedX'] > 140: - action_torcs['gear'] = 5 - if client.S.d['speedX'] > 170: - action_torcs['gear'] = 6 - # Save the privious full-obs from torcs for the reward calculation - obs_pre = copy.deepcopy(client.S.d) - - # One-Step Dynamics Update ################################# - # Apply the Agent's action into torcs - client.respond_to_server() - # Get the response of TORCS - client.get_servers_input() - - # Get the current full-observation from torcs - obs = client.S.d - - # Make an obsevation from a raw observation vector from TORCS - self.observation = self.make_observaton(obs) - - # Reward setting Here ####################################### - # direction-dependent positive reward - track = np.array(obs['track']) - trackPos = np.array(obs['trackPos']) - sp = np.array(obs['speedX']) - damage = np.array(obs['damage']) - rpm = np.array(obs['rpm']) - - progress = sp*np.cos(obs['angle']) - np.abs(sp * - np.sin(obs['angle'])) - sp * np.abs(obs['trackPos']) - reward = progress - - # collision detection - if obs['damage'] - obs_pre['damage'] > 0: - reward = -1 - - # Termination judgement ######################### - episode_terminate = False - # if (abs(track.any()) > 1 or abs(trackPos) > 1): # Episode is terminated if the car is out of track - # reward = -200 - # episode_terminate = True - # client.R.d['meta'] = True - - # if self.terminal_judge_start < self.time_step: # Episode terminates if the progress of agent is small - # if progress < self.termination_limit_progress: - # print("No progress") - # episode_terminate = True - # client.R.d['meta'] = True - - if np.cos(obs['angle']) < 0: # Episode is terminated if the agent runs backward - episode_terminate = True - client.R.d['meta'] = True - - if client.R.d['meta'] is True: # Send a reset signal - self.initial_run = False - client.respond_to_server() - - self.time_step += 1 - - return self.get_obs(), reward, client.R.d['meta'], {} - - def reset(self, relaunch=False): - # print("Reset") - - self.time_step = 0 - - if self.initial_reset is not True: - self.client.R.d['meta'] = True - self.client.respond_to_server() - - # TENTATIVE. Restarting TORCS every episode suffers the memory leak bug! - if relaunch is True: - self.reset_torcs() - print("### TORCS is RELAUNCHED ###") - - # Modify here if you use multiple tracks in the environment - self.client = snakeoil3.Client( - p=3101, vision=self.vision) # Open new UDP in vtorcs - self.client.MAX_STEPS = np.inf - - client = self.client - client.get_servers_input() # Get the initial input from torcs - - obs = client.S.d # Get the current full-observation from torcs - self.observation = self.make_observaton(obs) - - self.last_u = None - - self.initial_reset = False - return self.get_obs() - - def end(self): - os.system('pkill torcs') - - def get_obs(self): - return self.observation - - def reset_torcs(self): - #print("relaunch torcs") - os.system('pkill torcs') - time.sleep(0.5) - if self.vision is True: - os.system('torcs -nofuel -nodamage -nolaptime -vision &') - else: - os.system('torcs -nofuel -nolaptime &') - time.sleep(0.5) - os.system('sh autostart.sh') - time.sleep(0.5) - - def agent_to_torcs(self, u): - torcs_action = {'steer': u[0]} - - if self.throttle is True: # throttle action is enabled - torcs_action.update({'accel': u[1]}) - torcs_action.update({'brake': u[2]}) - - if self.gear_change is True: # gear change action is enabled - torcs_action.update({'gear': int(u[3])}) - - return torcs_action - - def obs_vision_to_image_rgb(self, obs_image_vec): - image_vec = obs_image_vec - r = image_vec[0:len(image_vec):3] - g = image_vec[1:len(image_vec):3] - b = image_vec[2:len(image_vec):3] - - sz = (64, 64) - r = np.array(r).reshape(sz) - g = np.array(g).reshape(sz) - b = np.array(b).reshape(sz) - return np.array([r, g, b], dtype=np.uint8) - - def make_observaton(self, raw_obs): - if self.vision is False: - names = ['focus', - 'speedX', 'speedY', 'speedZ', 'angle', 'damage', - 'opponents', - 'rpm', - 'track', - 'trackPos', - 'wheelSpinVel'] - Observation = col.namedtuple('Observaion', names) - return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200., - speedX=np.array( - raw_obs['speedX'], dtype=np.float32)/300.0, - speedY=np.array( - raw_obs['speedY'], dtype=np.float32)/300.0, - speedZ=np.array( - raw_obs['speedZ'], dtype=np.float32)/300.0, - angle=np.array( - raw_obs['angle'], dtype=np.float32)/3.1416, - damage=np.array( - raw_obs['damage'], dtype=np.float32), - opponents=np.array( - raw_obs['opponents'], dtype=np.float32)/200., - rpm=np.array( - raw_obs['rpm'], dtype=np.float32)/10000, - track=np.array( - raw_obs['track'], dtype=np.float32)/200., - trackPos=np.array( - raw_obs['trackPos'], dtype=np.float32)/1., - wheelSpinVel=np.array(raw_obs['wheelSpinVel'], dtype=np.float32)) - else: - names = ['focus', - 'speedX', 'speedY', 'speedZ', 'angle', - 'opponents', - 'rpm', - 'track', - 'trackPos', - 'wheelSpinVel', - 'img'] - Observation = col.namedtuple('Observaion', names) - - # Get RGB from observation - image_rgb = self.obs_vision_to_image_rgb(raw_obs[names[8]]) - - return Observation(focus=np.array(raw_obs['focus'], dtype=np.float32)/200., - speedX=np.array( - raw_obs['speedX'], dtype=np.float32)/self.default_speed, - speedY=np.array( - raw_obs['speedY'], dtype=np.float32)/self.default_speed, - speedZ=np.array( - raw_obs['speedZ'], dtype=np.float32)/self.default_speed, - opponents=np.array( - raw_obs['opponents'], dtype=np.float32)/200., - rpm=np.array(raw_obs['rpm'], dtype=np.float32), - track=np.array( - raw_obs['track'], dtype=np.float32)/200., - trackPos=np.array( - raw_obs['trackPos'], dtype=np.float32)/1., - wheelSpinVel=np.array( - raw_obs['wheelSpinVel'], dtype=np.float32), - img=image_rgb) -import sys - -import cv2 -from keras.models import load_model -import numpy as np - -from utils.datasets import get_labels -from utils.inference import detect_faces -from utils.inference import draw_text -from utils.inference import draw_bounding_box -from utils.inference import apply_offsets -from utils.inference import load_detection_model -from utils.inference import load_image -from utils.preprocessor import preprocess_input - -# parameters for loading data and images -image_path = sys.argv[1] -detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml' -emotion_model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' -gender_model_path = '../trained_models/gender_models/simple_CNN.81-0.96.hdf5' -emotion_labels = get_labels('fer2013') -gender_labels = get_labels('imdb') -font = cv2.FONT_HERSHEY_SIMPLEX - -# hyper-parameters for bounding boxes shape -gender_offsets = (30, 60) -gender_offsets = (10, 10) -emotion_offsets = (20, 40) -emotion_offsets = (0, 0) - -# loading models -face_detection = load_detection_model(detection_model_path) -emotion_classifier = load_model(emotion_model_path, compile=False) -gender_classifier = load_model(gender_model_path, compile=False) - -# getting input model shapes for inference -emotion_target_size = emotion_classifier.input_shape[1:3] -gender_target_size = gender_classifier.input_shape[1:3] - -# loading images -rgb_image = load_image(image_path, grayscale=False) -gray_image = load_image(image_path, grayscale=True) -gray_image = np.squeeze(gray_image) -gray_image = gray_image.astype('uint8') - -faces = detect_faces(face_detection, gray_image) -for face_coordinates in faces: - x1, x2, y1, y2 = apply_offsets(face_coordinates, gender_offsets) - rgb_face = rgb_image[y1:y2, x1:x2] - - x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) - gray_face = gray_image[y1:y2, x1:x2] - - try: - rgb_face = cv2.resize(rgb_face, (gender_target_size)) - gray_face = cv2.resize(gray_face, (emotion_target_size)) - except: - continue - - rgb_face = preprocess_input(rgb_face, False) - rgb_face = np.expand_dims(rgb_face, 0) - gender_prediction = gender_classifier.predict(rgb_face) - gender_label_arg = np.argmax(gender_prediction) - gender_text = gender_labels[gender_label_arg] - - gray_face = preprocess_input(gray_face, True) - gray_face = np.expand_dims(gray_face, 0) - gray_face = np.expand_dims(gray_face, -1) - emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face)) - emotion_text = emotion_labels[emotion_label_arg] - - if gender_text == gender_labels[0]: - color = (0, 0, 255) - else: - color = (255, 0, 0) - - draw_bounding_box(face_coordinates, rgb_image, color) - draw_text(face_coordinates, rgb_image, gender_text, color, 0, -20, 1, 2) - draw_text(face_coordinates, rgb_image, emotion_text, color, 0, -50, 1, 2) - -bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) -cv2.imwrite('../images/predicted_test_image.png', bgr_image) -import sys - -import cv2 -import numpy as np -from keras.models import load_model - -from utils.grad_cam import compile_gradient_function -from utils.grad_cam import compile_saliency_function -from utils.grad_cam import register_gradient -from utils.grad_cam import modify_backprop -from utils.grad_cam import calculate_guided_gradient_CAM -from utils.datasets import get_labels -from utils.inference import detect_faces -from utils.inference import apply_offsets -from utils.inference import load_detection_model -from utils.preprocessor import preprocess_input -from utils.inference import draw_bounding_box -from utils.inference import load_image - - -# parameters -image_path = sys.argv[1] -# task = sys.argv[2] -task = 'emotion' -if task == 'emotion': - labels = get_labels('fer2013') - offsets = (0, 0) - # model_filename = '../trained_models/fer2013_big_XCEPTION.54-0.66.hdf5' - model_filename = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' -elif task == 'gender': - labels = get_labels('imdb') - offsets = (30, 60) - model_filename = '../trained_models/gender_models/gender_mini_XCEPTION.21-0.95.hdf5' - -color = (0, 255, 0) - -# loading models -detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml' -model = load_model(model_filename, compile=False) -target_size = model.input_shape[1:3] -face_detection = load_detection_model(detection_model_path) - -# loading images -rgb_image = load_image(image_path, grayscale=False) -gray_image = load_image(image_path, grayscale=True) -gray_image = np.squeeze(gray_image) -gray_image = gray_image.astype('uint8') -faces = detect_faces(face_detection, gray_image) - -# start prediction for every image -for face_coordinates in faces: - - x1, x2, y1, y2 = apply_offsets(face_coordinates, offsets) - rgb_face = rgb_image[y1:y2, x1:x2] - - x1, x2, y1, y2 = apply_offsets(face_coordinates, offsets) - gray_face = gray_image[y1:y2, x1:x2] - - # processing input - try: - gray_face = cv2.resize(gray_face, (target_size)) - except: - continue - gray_face = preprocess_input(gray_face, True) - gray_face = np.expand_dims(gray_face, 0) - gray_face = np.expand_dims(gray_face, -1) - - # prediction - predicted_class = np.argmax(model.predict(gray_face)) - label_text = labels[predicted_class] - - gradient_function = compile_gradient_function(model, - predicted_class, 'conv2d_7') - register_gradient() - guided_model = modify_backprop(model, 'GuidedBackProp', task) - saliency_function = compile_saliency_function(guided_model, 'conv2d_7') - - guided_gradCAM = calculate_guided_gradient_CAM(gray_face, - gradient_function, saliency_function) - guided_gradCAM = cv2.resize(guided_gradCAM, (x2-x1, y2-y1)) - rgb_guided_gradCAM = np.repeat(guided_gradCAM[:, :, np.newaxis], 3, axis=2) - rgb_image[y1:y2, x1:x2, :] = rgb_guided_gradCAM - draw_bounding_box((x1, y1, x2 - x1, y2 - y1), rgb_image, color) -bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) -cv2.imwrite('../images/guided_gradCAM.png', bgr_image) -""" -File: train_emotion_classifier.py -Author: Octavio Arriaga -Email: arriaga.camargo@gmail.com -Github: https://github.com/oarriaga -Description: Train emotion classification model -""" - -from keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping -from keras.callbacks import ReduceLROnPlateau -from keras.preprocessing.image import ImageDataGenerator - -from models.cnn import mini_XCEPTION -from utils.datasets import DataManager -from utils.datasets import split_data -from utils.preprocessor import preprocess_input - -# parameters -batch_size = 32 -num_epochs = 10000 -input_shape = (64, 64, 1) -validation_split = .2 -verbose = 1 -num_classes = 7 -patience = 50 -base_path = '../trained_models/emotion_models/' - -# data generator -data_generator = ImageDataGenerator( - featurewise_center=False, - featurewise_std_normalization=False, - rotation_range=10, - width_shift_range=0.1, - height_shift_range=0.1, - zoom_range=.1, - horizontal_flip=True) - -# model parameters/compilation -model = mini_XCEPTION(input_shape, num_classes) -model.compile(optimizer='adam', loss='categorical_crossentropy', - metrics=['accuracy']) -model.summary() - - -datasets = ['fer2013'] -for dataset_name in datasets: - print('Training dataset:', dataset_name) - - # callbacks - log_file_path = base_path + dataset_name + '_emotion_training.log' - csv_logger = CSVLogger(log_file_path, append=False) - early_stop = EarlyStopping('val_loss', patience=patience) - reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, - patience=int(patience/4), verbose=1) - trained_models_path = base_path + dataset_name + '_mini_XCEPTION' - model_names = trained_models_path + '.{epoch:02d}-{val_acc:.2f}.hdf5' - model_checkpoint = ModelCheckpoint(model_names, 'val_loss', verbose=1, - save_best_only=True) - callbacks = [model_checkpoint, csv_logger, early_stop, reduce_lr] - - # loading dataset - data_loader = DataManager(dataset_name, image_size=input_shape[:2]) - faces, emotions = data_loader.get_data() - faces = preprocess_input(faces) - num_samples, num_classes = emotions.shape - train_data, val_data = split_data(faces, emotions, validation_split) - train_faces, train_emotions = train_data - model.fit_generator(data_generator.flow(train_faces, train_emotions, - batch_size), - steps_per_epoch=len(train_faces) / batch_size, - epochs=num_epochs, verbose=1, callbacks=callbacks, - validation_data=val_data) -""" -File: train_gender_classifier.py -Author: Octavio Arriaga -Email: arriaga.camargo@gmail.com -Github: https://github.com/oarriaga -Description: Train gender classification model -""" - -from keras.callbacks import CSVLogger, ModelCheckpoint, EarlyStopping -from keras.callbacks import ReduceLROnPlateau -from utils.datasets import DataManager -from models.cnn import mini_XCEPTION -from utils.data_augmentation import ImageGenerator -from utils.datasets import split_imdb_data - -# parameters -batch_size = 32 -num_epochs = 1000 -validation_split = .2 -do_random_crop = False -patience = 100 -num_classes = 2 -dataset_name = 'imdb' -input_shape = (64, 64, 1) -if input_shape[2] == 1: - grayscale = True -images_path = '../datasets/imdb_crop/' -log_file_path = '../trained_models/gender_models/gender_training.log' -trained_models_path = '../trained_models/gender_models/gender_mini_XCEPTION' - -# data loader -data_loader = DataManager(dataset_name) -ground_truth_data = data_loader.get_data() -train_keys, val_keys = split_imdb_data(ground_truth_data, validation_split) -print('Number of training samples:', len(train_keys)) -print('Number of validation samples:', len(val_keys)) -image_generator = ImageGenerator(ground_truth_data, batch_size, - input_shape[:2], - train_keys, val_keys, None, - path_prefix=images_path, - vertical_flip_probability=0, - grayscale=grayscale, - do_random_crop=do_random_crop) - -# model parameters/compilation -model = mini_XCEPTION(input_shape, num_classes) -model.compile(optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy']) -model.summary() - -# model callbacks -early_stop = EarlyStopping('val_loss', patience=patience) -reduce_lr = ReduceLROnPlateau('val_loss', factor=0.1, - patience=int(patience/2), verbose=1) -csv_logger = CSVLogger(log_file_path, append=False) -model_names = trained_models_path + '.{epoch:02d}-{val_acc:.2f}.hdf5' -model_checkpoint = ModelCheckpoint(model_names, - monitor='val_loss', - verbose=1, - save_best_only=True, - save_weights_only=False) -callbacks = [model_checkpoint, csv_logger, early_stop, reduce_lr] - -# training model -model.fit_generator(image_generator.flow(mode='train'), - steps_per_epoch=int(len(train_keys) / batch_size), - epochs=num_epochs, verbose=1, - callbacks=callbacks, - validation_data=image_generator.flow('val'), - validation_steps=int(len(val_keys) / batch_size)) -from statistics import mode - -import cv2 -from keras.models import load_model -import numpy as np - -from utils.datasets import get_labels -from utils.inference import detect_faces -from utils.inference import draw_text -from utils.inference import draw_bounding_box -from utils.inference import apply_offsets -from utils.inference import load_detection_model -from utils.preprocessor import preprocess_input - -# parameters for loading data and images -detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml' -emotion_model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' -emotion_labels = get_labels('fer2013') - -# hyper-parameters for bounding boxes shape -frame_window = 10 -emotion_offsets = (20, 40) - -# loading models -face_detection = load_detection_model(detection_model_path) -emotion_classifier = load_model(emotion_model_path, compile=False) - -# getting input model shapes for inference -emotion_target_size = emotion_classifier.input_shape[1:3] - -# starting lists for calculating modes -emotion_window = [] - -# starting video streaming -cv2.namedWindow('window_frame') -video_capture = cv2.VideoCapture(0) -while True: - bgr_image = video_capture.read()[1] - gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) - rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) - faces = detect_faces(face_detection, gray_image) - - for face_coordinates in faces: - - x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) - gray_face = gray_image[y1:y2, x1:x2] - try: - gray_face = cv2.resize(gray_face, (emotion_target_size)) - except: - continue - - gray_face = preprocess_input(gray_face, True) - gray_face = np.expand_dims(gray_face, 0) - gray_face = np.expand_dims(gray_face, -1) - emotion_prediction = emotion_classifier.predict(gray_face) - emotion_probability = np.max(emotion_prediction) - emotion_label_arg = np.argmax(emotion_prediction) - emotion_text = emotion_labels[emotion_label_arg] - emotion_window.append(emotion_text) - - if len(emotion_window) > frame_window: - emotion_window.pop(0) - try: - emotion_mode = mode(emotion_window) - except: - continue - - if emotion_text == 'angry': - color = emotion_probability * np.asarray((255, 0, 0)) - elif emotion_text == 'sad': - color = emotion_probability * np.asarray((0, 0, 255)) - elif emotion_text == 'happy': - color = emotion_probability * np.asarray((255, 255, 0)) - elif emotion_text == 'surprise': - color = emotion_probability * np.asarray((0, 255, 255)) - else: - color = emotion_probability * np.asarray((0, 255, 0)) - - color = color.astype(int) - color = color.tolist() - - draw_bounding_box(face_coordinates, rgb_image, color) - draw_text(face_coordinates, rgb_image, emotion_mode, - color, 0, -45, 1, 1) - - bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) - cv2.imshow('window_frame', bgr_image) - if cv2.waitKey(1) & 0xFF == ord('q'): - break -from statistics import mode - -import cv2 -from keras.models import load_model -import numpy as np - -from utils.datasets import get_labels -from utils.inference import detect_faces -from utils.inference import draw_text -from utils.inference import draw_bounding_box -from utils.inference import apply_offsets -from utils.inference import load_detection_model -from utils.preprocessor import preprocess_input - -# parameters for loading data and images -detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml' -emotion_model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' -gender_model_path = '../trained_models/gender_models/simple_CNN.81-0.96.hdf5' -emotion_labels = get_labels('fer2013') -gender_labels = get_labels('imdb') -font = cv2.FONT_HERSHEY_SIMPLEX - -# hyper-parameters for bounding boxes shape -frame_window = 10 -gender_offsets = (30, 60) -emotion_offsets = (20, 40) - -# loading models -face_detection = load_detection_model(detection_model_path) -emotion_classifier = load_model(emotion_model_path, compile=False) -gender_classifier = load_model(gender_model_path, compile=False) - -# getting input model shapes for inference -emotion_target_size = emotion_classifier.input_shape[1:3] -gender_target_size = gender_classifier.input_shape[1:3] - -# starting lists for calculating modes -gender_window = [] -emotion_window = [] - -# starting video streaming -cv2.namedWindow('window_frame') -video_capture = cv2.VideoCapture(0) -while True: - - bgr_image = video_capture.read()[1] - gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) - rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) - faces = detect_faces(face_detection, gray_image) - - for face_coordinates in faces: - - x1, x2, y1, y2 = apply_offsets(face_coordinates, gender_offsets) - rgb_face = rgb_image[y1:y2, x1:x2] - - x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) - gray_face = gray_image[y1:y2, x1:x2] - try: - rgb_face = cv2.resize(rgb_face, (gender_target_size)) - gray_face = cv2.resize(gray_face, (emotion_target_size)) - except: - continue - gray_face = preprocess_input(gray_face, False) - gray_face = np.expand_dims(gray_face, 0) - gray_face = np.expand_dims(gray_face, -1) - emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face)) - emotion_text = emotion_labels[emotion_label_arg] - emotion_window.append(emotion_text) - - rgb_face = np.expand_dims(rgb_face, 0) - rgb_face = preprocess_input(rgb_face, False) - gender_prediction = gender_classifier.predict(rgb_face) - gender_label_arg = np.argmax(gender_prediction) - gender_text = gender_labels[gender_label_arg] - gender_window.append(gender_text) - - if len(gender_window) > frame_window: - emotion_window.pop(0) - gender_window.pop(0) - try: - emotion_mode = mode(emotion_window) - gender_mode = mode(gender_window) - except: - continue - - if gender_text == gender_labels[0]: - color = (0, 0, 255) - else: - color = (255, 0, 0) - - draw_bounding_box(face_coordinates, rgb_image, color) - draw_text(face_coordinates, rgb_image, gender_mode, - color, 0, -20, 1, 1) - draw_text(face_coordinates, rgb_image, emotion_mode, - color, 0, -45, 1, 1) - - bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) - cv2.imshow('window_frame', bgr_image) - if cv2.waitKey(1) & 0xFF == ord('q'): - break -import sys - -import cv2 -import numpy as np -from keras.models import load_model -from utils.grad_cam import compile_gradient_function -from utils.grad_cam import compile_saliency_function -from utils.grad_cam import register_gradient -from utils.grad_cam import modify_backprop -from utils.grad_cam import calculate_guided_gradient_CAM -from utils.inference import detect_faces -from utils.inference import apply_offsets -from utils.inference import load_detection_model -from utils.preprocessor import preprocess_input -from utils.inference import draw_bounding_box -from utils.datasets import get_class_to_arg - -# getting the correct model given the input -# task = sys.argv[1] -# class_name = sys.argv[2] -task = 'emotion' -if task == 'gender': - model_filename = '../trained_models/gender_models/gender_mini_XCEPTION.21-0.95.hdf5' - class_to_arg = get_class_to_arg('imdb') - # predicted_class = class_to_arg[class_name] - predicted_class = 0 - offsets = (0, 0) -elif task == 'emotion': - model_filename = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' - # model_filename = '../trained_models/fer2013_big_XCEPTION.54-0.66.hdf5' - class_to_arg = get_class_to_arg('fer2013') - # predicted_class = class_to_arg[class_name] - predicted_class = 0 - offsets = (0, 0) - -model = load_model(model_filename, compile=False) -gradient_function = compile_gradient_function( - model, predicted_class, 'conv2d_7') -register_gradient() -guided_model = modify_backprop(model, 'GuidedBackProp', task) -saliency_function = compile_saliency_function(guided_model, 'conv2d_7') - -# parameters for loading data and images -detection_model_path = '../trained_models/detection_models/haarcascade_frontalface_default.xml' -face_detection = load_detection_model(detection_model_path) -color = (0, 255, 0) - -# getting input model shapes for inference -target_size = model.input_shape[1:3] - -# starting lists for calculating modes -emotion_window = [] - -# starting video streaming -cv2.namedWindow('window_frame') -video_capture = cv2.VideoCapture(0) -while True: - bgr_image = video_capture.read()[1] - gray_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2GRAY) - rgb_image = cv2.cvtColor(bgr_image, cv2.COLOR_BGR2RGB) - faces = detect_faces(face_detection, gray_image) - - for face_coordinates in faces: - - x1, x2, y1, y2 = apply_offsets(face_coordinates, offsets) - gray_face = gray_image[y1:y2, x1:x2] - try: - gray_face = cv2.resize(gray_face, (target_size)) - except: - continue - - gray_face = preprocess_input(gray_face, True) - gray_face = np.expand_dims(gray_face, 0) - gray_face = np.expand_dims(gray_face, -1) - guided_gradCAM = calculate_guided_gradient_CAM(gray_face, - gradient_function, saliency_function) - guided_gradCAM = cv2.resize(guided_gradCAM, (x2-x1, y2-y1)) - try: - rgb_guided_gradCAM = np.repeat(guided_gradCAM[:, :, np.newaxis], - 3, axis=2) - rgb_image[y1:y2, x1:x2, :] = rgb_guided_gradCAM - except: - continue - draw_bounding_box((x1, y1, x2 - x1, y2 - y1), rgb_image, color) - bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) - try: - cv2.imshow('window_frame', bgr_image) - except: - continue - if cv2.waitKey(1) & 0xFF == ord('q'): - break -from keras.layers import Activation, Convolution2D, Dropout, Conv2D -from keras.layers import AveragePooling2D, BatchNormalization -from keras.layers import GlobalAveragePooling2D -from keras.models import Sequential -from keras.layers import Flatten -from keras.models import Model -from keras.layers import Input -from keras.layers import MaxPooling2D -from keras.layers import SeparableConv2D -from keras import layers -from keras.regularizers import l2 - - -def simple_CNN(input_shape, num_classes): - - model = Sequential() - model.add(Convolution2D(filters=16, kernel_size=(7, 7), padding='same', - name='image_array', input_shape=input_shape)) - model.add(BatchNormalization()) - model.add(Convolution2D(filters=16, kernel_size=(7, 7), padding='same')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(AveragePooling2D(pool_size=(2, 2), padding='same')) - model.add(Dropout(.5)) - - model.add(Convolution2D(filters=32, kernel_size=(5, 5), padding='same')) - model.add(BatchNormalization()) - model.add(Convolution2D(filters=32, kernel_size=(5, 5), padding='same')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(AveragePooling2D(pool_size=(2, 2), padding='same')) - model.add(Dropout(.5)) - - model.add(Convolution2D(filters=64, kernel_size=(3, 3), padding='same')) - model.add(BatchNormalization()) - model.add(Convolution2D(filters=64, kernel_size=(3, 3), padding='same')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(AveragePooling2D(pool_size=(2, 2), padding='same')) - model.add(Dropout(.5)) - - model.add(Convolution2D(filters=128, kernel_size=(3, 3), padding='same')) - model.add(BatchNormalization()) - model.add(Convolution2D(filters=128, kernel_size=(3, 3), padding='same')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(AveragePooling2D(pool_size=(2, 2), padding='same')) - model.add(Dropout(.5)) - - model.add(Convolution2D(filters=256, kernel_size=(3, 3), padding='same')) - model.add(BatchNormalization()) - model.add(Convolution2D( - filters=num_classes, kernel_size=(3, 3), padding='same')) - model.add(GlobalAveragePooling2D()) - model.add(Activation('softmax', name='predictions')) - return model - - -def simpler_CNN(input_shape, num_classes): - - model = Sequential() - model.add(Convolution2D(filters=16, kernel_size=(5, 5), padding='same', - name='image_array', input_shape=input_shape)) - model.add(BatchNormalization()) - model.add(Convolution2D(filters=16, kernel_size=(5, 5), - strides=(2, 2), padding='same')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Dropout(.25)) - - model.add(Convolution2D(filters=32, kernel_size=(5, 5), padding='same')) - model.add(BatchNormalization()) - model.add(Convolution2D(filters=32, kernel_size=(5, 5), - strides=(2, 2), padding='same')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Dropout(.25)) - - model.add(Convolution2D(filters=64, kernel_size=(3, 3), padding='same')) - model.add(BatchNormalization()) - model.add(Convolution2D(filters=64, kernel_size=(3, 3), - strides=(2, 2), padding='same')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Dropout(.25)) - - model.add(Convolution2D(filters=64, kernel_size=(1, 1), padding='same')) - model.add(BatchNormalization()) - model.add(Convolution2D(filters=128, kernel_size=(3, 3), - strides=(2, 2), padding='same')) - model.add(BatchNormalization()) - model.add(Activation('relu')) - model.add(Dropout(.25)) - - model.add(Convolution2D(filters=256, kernel_size=(1, 1), padding='same')) - model.add(BatchNormalization()) - model.add(Convolution2D(filters=128, kernel_size=(3, 3), - strides=(2, 2), padding='same')) - - model.add(Convolution2D(filters=256, kernel_size=(1, 1), padding='same')) - model.add(BatchNormalization()) - model.add(Convolution2D(filters=num_classes, kernel_size=(3, 3), - strides=(2, 2), padding='same')) - - model.add(Flatten()) - # model.add(GlobalAveragePooling2D()) - model.add(Activation('softmax', name='predictions')) - return model - - -def tiny_XCEPTION(input_shape, num_classes, l2_regularization=0.01): - regularization = l2(l2_regularization) - - # base - img_input = Input(input_shape) - x = Conv2D(5, (3, 3), strides=(1, 1), kernel_regularizer=regularization, - use_bias=False)(img_input) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = Conv2D(5, (3, 3), strides=(1, 1), kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - - # module 1 - residual = Conv2D(8, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = BatchNormalization()(residual) - - x = SeparableConv2D(8, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = SeparableConv2D(8, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.add([x, residual]) - - # module 2 - residual = Conv2D(16, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = BatchNormalization()(residual) - - x = SeparableConv2D(16, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = SeparableConv2D(16, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.add([x, residual]) - - # module 3 - residual = Conv2D(32, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = BatchNormalization()(residual) - - x = SeparableConv2D(32, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = SeparableConv2D(32, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.add([x, residual]) - - # module 4 - residual = Conv2D(64, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = BatchNormalization()(residual) - - x = SeparableConv2D(64, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = SeparableConv2D(64, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.add([x, residual]) - - x = Conv2D(num_classes, (3, 3), - # kernel_regularizer=regularization, - padding='same')(x) - x = GlobalAveragePooling2D()(x) - output = Activation('softmax', name='predictions')(x) - - model = Model(img_input, output) - return model - - -def mini_XCEPTION(input_shape, num_classes, l2_regularization=0.01): - regularization = l2(l2_regularization) - - # base - img_input = Input(input_shape) - x = Conv2D(8, (3, 3), strides=(1, 1), kernel_regularizer=regularization, - use_bias=False)(img_input) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = Conv2D(8, (3, 3), strides=(1, 1), kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - - # module 1 - residual = Conv2D(16, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = BatchNormalization()(residual) - - x = SeparableConv2D(16, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = SeparableConv2D(16, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.add([x, residual]) - - # module 2 - residual = Conv2D(32, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = BatchNormalization()(residual) - - x = SeparableConv2D(32, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = SeparableConv2D(32, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.add([x, residual]) - - # module 3 - residual = Conv2D(64, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = BatchNormalization()(residual) - - x = SeparableConv2D(64, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = SeparableConv2D(64, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.add([x, residual]) - - # module 4 - residual = Conv2D(128, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = BatchNormalization()(residual) - - x = SeparableConv2D(128, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = SeparableConv2D(128, (3, 3), padding='same', - kernel_regularizer=regularization, - use_bias=False)(x) - x = BatchNormalization()(x) - - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.add([x, residual]) - - x = Conv2D(num_classes, (3, 3), - # kernel_regularizer=regularization, - padding='same')(x) - x = GlobalAveragePooling2D()(x) - output = Activation('softmax', name='predictions')(x) - - model = Model(img_input, output) - return model - - -def big_XCEPTION(input_shape, num_classes): - img_input = Input(input_shape) - x = Conv2D(32, (3, 3), strides=(2, 2), use_bias=False)(img_input) - x = BatchNormalization(name='block1_conv1_bn')(x) - x = Activation('relu', name='block1_conv1_act')(x) - x = Conv2D(64, (3, 3), use_bias=False)(x) - x = BatchNormalization(name='block1_conv2_bn')(x) - x = Activation('relu', name='block1_conv2_act')(x) - - residual = Conv2D(128, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = BatchNormalization()(residual) - - x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False)(x) - x = BatchNormalization(name='block2_sepconv1_bn')(x) - x = Activation('relu', name='block2_sepconv2_act')(x) - x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False)(x) - x = BatchNormalization(name='block2_sepconv2_bn')(x) - - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.add([x, residual]) - - residual = Conv2D(256, (1, 1), strides=(2, 2), - padding='same', use_bias=False)(x) - residual = BatchNormalization()(residual) - - x = Activation('relu', name='block3_sepconv1_act')(x) - x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False)(x) - x = BatchNormalization(name='block3_sepconv1_bn')(x) - x = Activation('relu', name='block3_sepconv2_act')(x) - x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False)(x) - x = BatchNormalization(name='block3_sepconv2_bn')(x) - - x = MaxPooling2D((3, 3), strides=(2, 2), padding='same')(x) - x = layers.add([x, residual]) - x = Conv2D(num_classes, (3, 3), - # kernel_regularizer=regularization, - padding='same')(x) - x = GlobalAveragePooling2D()(x) - output = Activation('softmax', name='predictions')(x) - - model = Model(img_input, output) - return model - - -if __name__ == "__main__": - input_shape = (64, 64, 1) - num_classes = 7 - # model = tiny_XCEPTION(input_shape, num_classes) - # model.summary() - # model = mini_XCEPTION(input_shape, num_classes) - # model.summary() - # model = big_XCEPTION(input_shape, num_classes) - # model.summary() - model = simple_CNN((48, 48, 1), num_classes) - model.summary() -import numpy as np -from random import shuffle -from .preprocessor import preprocess_input -from .preprocessor import _imread as imread -from .preprocessor import _imresize as imresize -from .preprocessor import to_categorical -import scipy.ndimage as ndi -import cv2 - - -class ImageGenerator(object): - """ Image generator with saturation, brightness, lighting, contrast, - horizontal flip and vertical flip transformations. It supports - bounding boxes coordinates. - - TODO: - - Finish support for not using bounding_boxes - - Random crop - - Test other transformations - """ - - def __init__(self, ground_truth_data, batch_size, image_size, - train_keys, validation_keys, - ground_truth_transformer=None, - path_prefix=None, - saturation_var=0.5, - brightness_var=0.5, - contrast_var=0.5, - lighting_std=0.5, - horizontal_flip_probability=0.5, - vertical_flip_probability=0.5, - do_random_crop=False, - grayscale=False, - zoom_range=[0.75, 1.25], - translation_factor=.3): - - self.ground_truth_data = ground_truth_data - self.ground_truth_transformer = ground_truth_transformer - self.batch_size = batch_size - self.path_prefix = path_prefix - self.train_keys = train_keys - self.validation_keys = validation_keys - self.image_size = image_size - self.grayscale = grayscale - self.color_jitter = [] - if saturation_var: - self.saturation_var = saturation_var - self.color_jitter.append(self.saturation) - if brightness_var: - self.brightness_var = brightness_var - self.color_jitter.append(self.brightness) - if contrast_var: - self.contrast_var = contrast_var - self.color_jitter.append(self.contrast) - self.lighting_std = lighting_std - self.horizontal_flip_probability = horizontal_flip_probability - self.vertical_flip_probability = vertical_flip_probability - self.do_random_crop = do_random_crop - self.zoom_range = zoom_range - self.translation_factor = translation_factor - - def _do_random_crop(self, image_array): - """IMPORTANT: random crop only works for classification since the - current implementation does no transform bounding boxes""" - height = image_array.shape[0] - width = image_array.shape[1] - x_offset = np.random.uniform(0, self.translation_factor * width) - y_offset = np.random.uniform(0, self.translation_factor * height) - offset = np.array([x_offset, y_offset]) - scale_factor = np.random.uniform(self.zoom_range[0], - self.zoom_range[1]) - crop_matrix = np.array([[scale_factor, 0], - [0, scale_factor]]) - - image_array = np.rollaxis(image_array, axis=-1, start=0) - image_channel = [ndi.interpolation.affine_transform(image_channel, - crop_matrix, offset=offset, order=0, mode='nearest', - cval=0.0) for image_channel in image_array] - - image_array = np.stack(image_channel, axis=0) - image_array = np.rollaxis(image_array, 0, 3) - return image_array - - def do_random_rotation(self, image_array): - """IMPORTANT: random rotation only works for classification since the - current implementation does no transform bounding boxes""" - height = image_array.shape[0] - width = image_array.shape[1] - x_offset = np.random.uniform(0, self.translation_factor * width) - y_offset = np.random.uniform(0, self.translation_factor * height) - offset = np.array([x_offset, y_offset]) - scale_factor = np.random.uniform(self.zoom_range[0], - self.zoom_range[1]) - crop_matrix = np.array([[scale_factor, 0], - [0, scale_factor]]) - - image_array = np.rollaxis(image_array, axis=-1, start=0) - image_channel = [ndi.interpolation.affine_transform(image_channel, - crop_matrix, offset=offset, order=0, mode='nearest', - cval=0.0) for image_channel in image_array] - - image_array = np.stack(image_channel, axis=0) - image_array = np.rollaxis(image_array, 0, 3) - return image_array - - def _gray_scale(self, image_array): - return image_array.dot([0.299, 0.587, 0.114]) - - def saturation(self, image_array): - gray_scale = self._gray_scale(image_array) - alpha = 2.0 * np.random.random() * self.brightness_var - alpha = alpha + 1 - self.saturation_var - image_array = (alpha * image_array + (1 - alpha) * - gray_scale[:, :, None]) - return np.clip(image_array, 0, 255) - - def brightness(self, image_array): - alpha = 2 * np.random.random() * self.brightness_var - alpha = alpha + 1 - self.saturation_var - image_array = alpha * image_array - return np.clip(image_array, 0, 255) - - def contrast(self, image_array): - gray_scale = (self._gray_scale(image_array).mean() * - np.ones_like(image_array)) - alpha = 2 * np.random.random() * self.contrast_var - alpha = alpha + 1 - self.contrast_var - image_array = image_array * alpha + (1 - alpha) * gray_scale - return np.clip(image_array, 0, 255) - - def lighting(self, image_array): - covariance_matrix = np.cov(image_array.reshape(-1, 3) / - 255.0, rowvar=False) - eigen_values, eigen_vectors = np.linalg.eigh(covariance_matrix) - noise = np.random.randn(3) * self.lighting_std - noise = eigen_vectors.dot(eigen_values * noise) * 255 - image_array = image_array + noise - return np.clip(image_array, 0, 255) - - def horizontal_flip(self, image_array, box_corners=None): - if np.random.random() < self.horizontal_flip_probability: - image_array = image_array[:, ::-1] - if box_corners is not None: - box_corners[:, [0, 2]] = 1 - box_corners[:, [2, 0]] - return image_array, box_corners - - def vertical_flip(self, image_array, box_corners=None): - if (np.random.random() < self.vertical_flip_probability): - image_array = image_array[::-1] - if box_corners is not None: - box_corners[:, [1, 3]] = 1 - box_corners[:, [3, 1]] - return image_array, box_corners - - def transform(self, image_array, box_corners=None): - shuffle(self.color_jitter) - for jitter in self.color_jitter: - image_array = jitter(image_array) - - if self.lighting_std: - image_array = self.lighting(image_array) - - if self.horizontal_flip_probability > 0: - image_array, box_corners = self.horizontal_flip(image_array, - box_corners) - - if self.vertical_flip_probability > 0: - image_array, box_corners = self.vertical_flip(image_array, - box_corners) - return image_array, box_corners - - def preprocess_images(self, image_array): - return preprocess_input(image_array) - - def flow(self, mode='train'): - while True: - if mode == 'train': - shuffle(self.train_keys) - keys = self.train_keys - elif mode == 'val' or mode == 'demo': - shuffle(self.validation_keys) - keys = self.validation_keys - else: - raise Exception('invalid mode: %s' % mode) - - inputs = [] - targets = [] - for key in keys: - image_path = self.path_prefix + key - image_array = imread(image_path) - image_array = imresize(image_array, self.image_size) - - num_image_channels = len(image_array.shape) - if num_image_channels != 3: - continue - - ground_truth = self.ground_truth_data[key] - - if self.do_random_crop: - image_array = self._do_random_crop(image_array) - - image_array = image_array.astype('float32') - if mode == 'train' or mode == 'demo': - if self.ground_truth_transformer is not None: - image_array, ground_truth = self.transform( - image_array, - ground_truth) - ground_truth = ( - self.ground_truth_transformer.assign_boxes( - ground_truth)) - else: - image_array = self.transform(image_array)[0] - - if self.grayscale: - image_array = cv2.cvtColor( - image_array.astype('uint8'), - cv2.COLOR_RGB2GRAY).astype('float32') - image_array = np.expand_dims(image_array, -1) - - inputs.append(image_array) - targets.append(ground_truth) - if len(targets) == self.batch_size: - inputs = np.asarray(inputs) - targets = np.asarray(targets) - # this will not work for boxes - targets = to_categorical(targets) - if mode == 'train' or mode == 'val': - inputs = self.preprocess_images(inputs) - yield self._wrap_in_dictionary(inputs, targets) - if mode == 'demo': - yield self._wrap_in_dictionary(inputs, targets) - inputs = [] - targets = [] - - def _wrap_in_dictionary(self, image_array, targets): - return [{'input_1': image_array}, - {'predictions': targets}] -from scipy.io import loadmat -import pandas as pd -import numpy as np -from random import shuffle -import os -import cv2 - - -class DataManager(object): - """Class for loading fer2013 emotion classification dataset or - imdb gender classification dataset.""" - - def __init__(self, dataset_name='imdb', - dataset_path=None, image_size=(48, 48)): - - self.dataset_name = dataset_name - self.dataset_path = dataset_path - self.image_size = image_size - if self.dataset_path is not None: - self.dataset_path = dataset_path - elif self.dataset_name == 'imdb': - self.dataset_path = '../datasets/imdb_crop/imdb.mat' - elif self.dataset_name == 'fer2013': - self.dataset_path = '../datasets/fer2013/fer2013.csv' - elif self.dataset_name == 'KDEF': - self.dataset_path = '../datasets/KDEF/' - else: - raise Exception( - 'Incorrect dataset name, please input imdb or fer2013') - - def get_data(self): - if self.dataset_name == 'imdb': - ground_truth_data = self._load_imdb() - elif self.dataset_name == 'fer2013': - ground_truth_data = self._load_fer2013() - elif self.dataset_name == 'KDEF': - ground_truth_data = self._load_KDEF() - return ground_truth_data - - def _load_imdb(self): - face_score_treshold = 3 - dataset = loadmat(self.dataset_path) - image_names_array = dataset['imdb']['full_path'][0, 0][0] - gender_classes = dataset['imdb']['gender'][0, 0][0] - face_score = dataset['imdb']['face_score'][0, 0][0] - second_face_score = dataset['imdb']['second_face_score'][0, 0][0] - face_score_mask = face_score > face_score_treshold - second_face_score_mask = np.isnan(second_face_score) - unknown_gender_mask = np.logical_not(np.isnan(gender_classes)) - mask = np.logical_and(face_score_mask, second_face_score_mask) - mask = np.logical_and(mask, unknown_gender_mask) - image_names_array = image_names_array[mask] - gender_classes = gender_classes[mask].tolist() - image_names = [] - for image_name_arg in range(image_names_array.shape[0]): - image_name = image_names_array[image_name_arg][0] - image_names.append(image_name) - return dict(zip(image_names, gender_classes)) - - def _load_fer2013(self): - data = pd.read_csv(self.dataset_path) - pixels = data['pixels'].tolist() - width, height = 48, 48 - faces = [] - for pixel_sequence in pixels: - face = [int(pixel) for pixel in pixel_sequence.split(' ')] - face = np.asarray(face).reshape(width, height) - face = cv2.resize(face.astype('uint8'), self.image_size) - faces.append(face.astype('float32')) - faces = np.asarray(faces) - faces = np.expand_dims(faces, -1) - emotions = pd.get_dummies(data['emotion']).as_matrix() - return faces, emotions - - def _load_KDEF(self): - class_to_arg = get_class_to_arg(self.dataset_name) - num_classes = len(class_to_arg) - - file_paths = [] - for folder, subfolders, filenames in os.walk(self.dataset_path): - for filename in filenames: - if filename.lower().endswith(('.jpg')): - file_paths.append(os.path.join(folder, filename)) - - num_faces = len(file_paths) - y_size, x_size = self.image_size - faces = np.zeros(shape=(num_faces, y_size, x_size)) - emotions = np.zeros(shape=(num_faces, num_classes)) - for file_arg, file_path in enumerate(file_paths): - image_array = cv2.imread(file_path, cv2.IMREAD_GRAYSCALE) - image_array = cv2.resize(image_array, (y_size, x_size)) - faces[file_arg] = image_array - file_basename = os.path.basename(file_path) - file_emotion = file_basename[4:6] - # there are two file names in the dataset - # that don't match the given classes - try: - emotion_arg = class_to_arg[file_emotion] - except: - continue - emotions[file_arg, emotion_arg] = 1 - faces = np.expand_dims(faces, -1) - return faces, emotions - - -def get_labels(dataset_name): - if dataset_name == 'fer2013': - return {0: 'angry', 1: 'disgust', 2: 'fear', 3: 'happy', - 4: 'sad', 5: 'surprise', 6: 'neutral'} - elif dataset_name == 'imdb': - return {0: 'woman', 1: 'man'} - elif dataset_name == 'KDEF': - return {0: 'AN', 1: 'DI', 2: 'AF', 3: 'HA', 4: 'SA', 5: 'SU', 6: 'NE'} - else: - raise Exception('Invalid dataset name') - - -def get_class_to_arg(dataset_name='fer2013'): - if dataset_name == 'fer2013': - return {'angry': 0, 'disgust': 1, 'fear': 2, 'happy': 3, 'sad': 4, - 'surprise': 5, 'neutral': 6} - elif dataset_name == 'imdb': - return {'woman': 0, 'man': 1} - elif dataset_name == 'KDEF': - return {'AN': 0, 'DI': 1, 'AF': 2, 'HA': 3, 'SA': 4, 'SU': 5, 'NE': 6} - else: - raise Exception('Invalid dataset name') - - -def split_imdb_data(ground_truth_data, validation_split=.2, do_shuffle=False): - ground_truth_keys = sorted(ground_truth_data.keys()) - if do_shuffle is not False: - shuffle(ground_truth_keys) - training_split = 1 - validation_split - num_train = int(training_split * len(ground_truth_keys)) - train_keys = ground_truth_keys[:num_train] - validation_keys = ground_truth_keys[num_train:] - return train_keys, validation_keys - - -def split_data(x, y, validation_split=.2): - num_samples = len(x) - num_train_samples = int((1 - validation_split)*num_samples) - train_x = x[:num_train_samples] - train_y = y[:num_train_samples] - val_x = x[num_train_samples:] - val_y = y[num_train_samples:] - train_data = (train_x, train_y) - val_data = (val_x, val_y) - return train_data, val_data -import cv2 -import h5py -import keras -import keras.backend as K -from keras.layers.core import Lambda -from keras.models import Sequential -from keras.models import load_model -import numpy as np -import tensorflow as tf -from tensorflow.python.framework import ops - -from .preprocessor import preprocess_input - - -def reset_optimizer_weights(model_filename): - model = h5py.File(model_filename, 'r+') - del model['optimizer_weights'] - model.close() - - -def target_category_loss(x, category_index, num_classes): - return tf.multiply(x, K.one_hot([category_index], num_classes)) - - -def target_category_loss_output_shape(input_shape): - return input_shape - - -def normalize(x): - # utility function to normalize a tensor by its L2 norm - return x / (K.sqrt(K.mean(K.square(x))) + 1e-5) - - -def load_image(image_array): - image_array = np.expand_dims(image_array, axis=0) - image_array = preprocess_input(image_array) - return image_array - - -def register_gradient(): - if "GuidedBackProp" not in ops._gradient_registry._registry: - @ops.RegisterGradient("GuidedBackProp") - def _GuidedBackProp(op, gradient): - dtype = op.inputs[0].dtype - guided_gradient = (gradient * tf.cast(gradient > 0., dtype) * - tf.cast(op.inputs[0] > 0., dtype)) - return guided_gradient - - -def compile_saliency_function(model, activation_layer='conv2d_7'): - input_image = model.input - layer_output = model.get_layer(activation_layer).output - max_output = K.max(layer_output, axis=3) - saliency = K.gradients(K.sum(max_output), input_image)[0] - return K.function([input_image, K.learning_phase()], [saliency]) - - -def modify_backprop(model, name, task): - graph = tf.get_default_graph() - with graph.gradient_override_map({'Relu': name}): - - # get layers that have an activation - activation_layers = [layer for layer in model.layers - if hasattr(layer, 'activation')] - - # replace relu activation - for layer in activation_layers: - if layer.activation == keras.activations.relu: - layer.activation = tf.nn.relu - - # re-instanciate a new model - if task == 'gender': - model_path = '../trained_models/gender_models/gender_mini_XCEPTION.21-0.95.hdf5' - elif task == 'emotion': - model_path = '../trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' - # model_path = '../trained_models/fer2013_mini_XCEPTION.119-0.65.hdf5' - # model_path = '../trained_models/fer2013_big_XCEPTION.54-0.66.hdf5' - new_model = load_model(model_path, compile=False) - return new_model - - -def deprocess_image(x): - """ Same normalization as in: - https://github.com/fchollet/keras/blob/master/examples/conv_filter_visualization.py - """ - if np.ndim(x) > 3: - x = np.squeeze(x) - # normalize tensor: center on 0., ensure std is 0.1 - x = x - x.mean() - x = x / (x.std() + 1e-5) - x = x * 0.1 - - # clip to [0, 1] - x = x + 0.5 - x = np.clip(x, 0, 1) - - # convert to RGB array - x = x * 255 - if K.image_dim_ordering() == 'th': - x = x.transpose((1, 2, 0)) - x = np.clip(x, 0, 255).astype('uint8') - return x - - -def compile_gradient_function(input_model, category_index, layer_name): - model = Sequential() - model.add(input_model) - - num_classes = model.output_shape[1] - def target_layer(x): return target_category_loss( - x, category_index, num_classes) - model.add(Lambda(target_layer, - output_shape=target_category_loss_output_shape)) - - loss = K.sum(model.layers[-1].output) - conv_output = model.layers[0].get_layer(layer_name).output - gradients = normalize(K.gradients(loss, conv_output)[0]) - gradient_function = K.function([model.layers[0].input, K.learning_phase()], - [conv_output, gradients]) - return gradient_function - - -def calculate_gradient_weighted_CAM(gradient_function, image): - output, evaluated_gradients = gradient_function([image, False]) - output, evaluated_gradients = output[0, :], evaluated_gradients[0, :, :, :] - weights = np.mean(evaluated_gradients, axis=(0, 1)) - CAM = np.ones(output.shape[0: 2], dtype=np.float32) - for weight_arg, weight in enumerate(weights): - CAM = CAM + (weight * output[:, :, weight_arg]) - CAM = cv2.resize(CAM, (64, 64)) - CAM = np.maximum(CAM, 0) - heatmap = CAM / np.max(CAM) - - # Return to BGR [0..255] from the preprocessed image - image = image[0, :] - image = image - np.min(image) - image = np.minimum(image, 255) - - CAM = cv2.applyColorMap(np.uint8(255 * heatmap), cv2.COLORMAP_JET) - CAM = np.float32(CAM) + np.float32(image) - CAM = 255 * CAM / np.max(CAM) - return np.uint8(CAM), heatmap - - -def calculate_guided_gradient_CAM( - preprocessed_input, gradient_function, saliency_function): - CAM, heatmap = calculate_gradient_weighted_CAM( - gradient_function, preprocessed_input) - saliency = saliency_function([preprocessed_input, 0]) - # gradCAM = saliency[0] * heatmap[..., np.newaxis] - # return deprocess_image(gradCAM) - return deprocess_image(saliency[0]) - # return saliency[0] - - -def calculate_guided_gradient_CAM_v2( - preprocessed_input, gradient_function, - saliency_function, target_size=(128, 128)): - CAM, heatmap = calculate_gradient_weighted_CAM( - gradient_function, preprocessed_input) - heatmap = np.squeeze(heatmap) - heatmap = cv2.resize(heatmap.astype('uint8'), target_size) - saliency = saliency_function([preprocessed_input, 0]) - saliency = np.squeeze(saliency[0]) - saliency = cv2.resize(saliency.astype('uint8'), target_size) - gradCAM = saliency * heatmap - gradCAM = deprocess_image(gradCAM) - return np.expand_dims(gradCAM, -1) - - -if __name__ == '__main__': - import pickle - faces = pickle.load(open('faces.pkl', 'rb')) - face = faces[0] - model_filename = '../../trained_models/emotion_models/mini_XCEPTION.523-0.65.hdf5' - # reset_optimizer_weights(model_filename) - model = load_model(model_filename) - - preprocessed_input = load_image(face) - predictions = model.predict(preprocessed_input) - predicted_class = np.argmax(predictions) - gradient_function = compile_gradient_function( - model, predicted_class, 'conv2d_6') - register_gradient() - guided_model = modify_backprop(model, 'GuidedBackProp') - saliency_function = compile_saliency_function(guided_model) - guided_gradCAM = calculate_guided_gradient_CAM( - preprocessed_input, gradient_function, saliency_function) - - cv2.imwrite('guided_gradCAM.jpg', guided_gradCAM) -import cv2 -import matplotlib.pyplot as plt -import numpy as np -from keras.preprocessing import image - - -def load_image(image_path, grayscale=False, target_size=None): - pil_image = image.load_img(image_path, grayscale, target_size) - return image.img_to_array(pil_image) - - -def load_detection_model(model_path): - detection_model = cv2.CascadeClassifier(model_path) - return detection_model - - -def detect_faces(detection_model, gray_image_array): - return detection_model.detectMultiScale(gray_image_array, 1.3, 5) - - -def draw_bounding_box(face_coordinates, image_array, color): - x, y, w, h = face_coordinates - cv2.rectangle(image_array, (x, y), (x + w, y + h), color, 2) - - -def apply_offsets(face_coordinates, offsets): - x, y, width, height = face_coordinates - x_off, y_off = offsets - return (x - x_off, x + width + x_off, y - y_off, y + height + y_off) - - -def draw_text(coordinates, image_array, text, color, x_offset=0, y_offset=0, - font_scale=2, thickness=2): - x, y = coordinates[:2] - cv2.putText(image_array, text, (x + x_offset, y + y_offset), - cv2.FONT_HERSHEY_SIMPLEX, - font_scale, color, thickness, cv2.LINE_AA) - - -def get_colors(num_classes): - colors = plt.cm.hsv(np.linspace(0, 1, num_classes)).tolist() - colors = np.asarray(colors) * 255 - return colors -import numpy as np -from scipy.misc import imread, imresize - - -def preprocess_input(x, v2=True): - x = x.astype('float32') - x = x / 255.0 - if v2: - x = x - 0.5 - x = x * 2.0 - return x - - -def _imread(image_name): - return imread(image_name) - - -def _imresize(image_array, size): - return imresize(image_array, size) - - -def to_categorical(integer_classes, num_classes=2): - integer_classes = np.asarray(integer_classes, dtype='int') - num_samples = integer_classes.shape[0] - categorical = np.zeros((num_samples, num_classes)) - categorical[np.arange(num_samples), integer_classes] = 1 - return categorical -import numpy as np -import matplotlib.cm as cm -from mpl_toolkits.axes_grid1 import make_axes_locatable -import matplotlib.pyplot as plt -import numpy.ma as ma - - -def make_mosaic(images, num_rows, num_cols, border=1, class_names=None): - num_images = len(images) - image_shape = images.shape[1:] - mosaic = ma.masked_all( - (num_rows * image_shape[0] + (num_rows - 1) * border, - num_cols * image_shape[1] + (num_cols - 1) * border), - dtype=np.float32) - paddedh = image_shape[0] + border - paddedw = image_shape[1] + border - for image_arg in range(num_images): - row = int(np.floor(image_arg / num_cols)) - col = image_arg % num_cols - image = np.squeeze(images[image_arg]) - image_shape = image.shape - mosaic[row * paddedh:row * paddedh + image_shape[0], - col * paddedw:col * paddedw + image_shape[1]] = image - return mosaic - - -def make_mosaic_v2(images, num_mosaic_rows=None, - num_mosaic_cols=None, border=1): - images = np.squeeze(images) - num_images, image_pixels_rows, image_pixels_cols = images.shape - if num_mosaic_rows is None and num_mosaic_cols is None: - box_size = int(np.ceil(np.sqrt(num_images))) - num_mosaic_rows = num_mosaic_cols = box_size - num_mosaic_pixel_rows = num_mosaic_rows * (image_pixels_rows + border) - num_mosaic_pixel_cols = num_mosaic_cols * (image_pixels_cols + border) - mosaic = np.empty(shape=(num_mosaic_pixel_rows, num_mosaic_pixel_cols)) - mosaic_col_arg = 0 - mosaic_row_arg = 0 - for image_arg in range(num_images): - if image_arg % num_mosaic_cols == 0 and image_arg != 0: - mosaic_col_arg = mosaic_col_arg + 1 - mosaic_row_arg = 0 - x0 = image_pixels_cols * (mosaic_row_arg) - x1 = image_pixels_cols * (mosaic_row_arg + 1) - y0 = image_pixels_rows * (mosaic_col_arg) - y1 = image_pixels_rows * (mosaic_col_arg + 1) - image = images[image_arg] - mosaic[y0:y1, x0:x1] = image - mosaic_row_arg = mosaic_row_arg + 1 - return mosaic - - -def pretty_imshow(axis, data, vmin=None, vmax=None, cmap=None): - if cmap is None: - cmap = cm.jet - if vmin is None: - vmin = data.min() - if vmax is None: - vmax = data.max() - cax = None - divider = make_axes_locatable(axis) - cax = divider.append_axes('right', size='5%', pad=0.05) - image = axis.imshow(data, vmin=vmin, vmax=vmax, - interpolation='nearest', cmap=cmap) - plt.colorbar(image, cax=cax) - - -def normal_imshow(axis, data, vmin=None, vmax=None, - cmap=None, axis_off=True): - if cmap is None: - cmap = cm.jet - if vmin is None: - vmin = data.min() - if vmax is None: - vmax = data.max() - image = axis.imshow(data, vmin=vmin, vmax=vmax, - interpolation='nearest', cmap=cmap) - if axis_off: - plt.axis('off') - return image - - -def display_image(face, class_vector=None, - class_decoder=None, pretty=False): - if class_vector is not None and class_decoder is None: - raise Exception('Provide class decoder') - face = np.squeeze(face) - color_map = None - if len(face.shape) < 3: - color_map = 'gray' - plt.figure() - if class_vector is not None: - class_arg = np.argmax(class_vector) - class_name = class_decoder[class_arg] - plt.title(class_name) - if pretty: - pretty_imshow(plt.gca(), face, cmap=color_map) - else: - plt.imshow(face, color_map) - - -def draw_mosaic(data, num_rows, num_cols, class_vectors=None, - class_decoder=None, cmap='gray'): - - if class_vectors is not None and class_decoder is None: - raise Exception('Provide class decoder') - - figure, axis_array = plt.subplots(num_rows, num_cols) - figure.set_size_inches(8, 8, forward=True) - titles = [] - if class_vectors is not None: - for vector_arg in range(len(class_vectors)): - class_arg = np.argmax(class_vectors[vector_arg]) - class_name = class_decoder[class_arg] - titles.append(class_name) - - image_arg = 0 - for row_arg in range(num_rows): - for col_arg in range(num_cols): - image = data[image_arg] - image = np.squeeze(image) - axis_array[row_arg, col_arg].axis('off') - axis_array[row_arg, col_arg].imshow(image, cmap=cmap) - axis_array[row_arg, col_arg].set_title(titles[image_arg]) - image_arg = image_arg + 1 - plt.tight_layout() - - -if __name__ == '__main__': - # from utils.data_manager import DataManager - from utils.utils import get_labels - from keras.models import load_model - import pickle - - # dataset_name = 'fer2013' - # model_path = '../trained_models/emotion_models/simple_CNN.985-0.66.hdf5' - dataset_name = 'fer2013' - class_decoder = get_labels(dataset_name) - # data_manager = DataManager(dataset_name) - # faces, emotions = data_manager.get_data() - faces = pickle.load(open('faces.pkl', 'rb')) - emotions = pickle.load(open('emotions.pkl', 'rb')) - pretty_imshow(plt.gca(), make_mosaic(faces[:4], 2, 2), cmap='gray') - plt.show() - - """ - image_arg = 0 - face = faces[image_arg:image_arg + 1] - emotion = emotions[image_arg:image_arg + 1] - display_image(face, emotion, class_decoder) - plt.show() - - normal_imshow(plt.gca(), make_mosaic(faces[:4], 3, 3), cmap='gray') - plt.show() - - draw_mosaic(faces, 2, 2, emotions, class_decoder) - plt.show() - - """ - model = load_model( - '../trained_models/emotion_models/simple_CNN.985-0.66.hdf5') - conv1_weights = model.layers[2].get_weights() - kernel_conv1_weights = conv1_weights[0] - kernel_conv1_weights = np.squeeze(kernel_conv1_weights) - kernel_conv1_weights = np.rollaxis(kernel_conv1_weights, 2, 0) - kernel_conv1_weights = np.expand_dims(kernel_conv1_weights, -1) - num_kernels = kernel_conv1_weights.shape[0] - box_size = int(np.ceil(np.sqrt(num_kernels))) - print('Box size:', box_size) - - print('Kernel shape', kernel_conv1_weights.shape) - plt.figure(figsize=(15, 15)) - plt.title('conv1 weights') - pretty_imshow( - plt.gca(), - make_mosaic(kernel_conv1_weights, box_size, box_size), - cmap=cm.binary) - plt.show() -import os -import sys -import logging - -import cv2 -from keras.models import load_model -import numpy as np - -from utils.datasets import get_labels -from utils.inference import detect_faces -from utils.inference import draw_text -from utils.inference import draw_bounding_box -from utils.inference import apply_offsets -from utils.inference import load_detection_model -from utils.inference import load_image -from utils.preprocessor import preprocess_input - - -def process_image(image): - - try: - # parameters for loading data and images - detection_model_path = './trained_models/detection_models/haarcascade_frontalface_default.xml' - emotion_model_path = './trained_models/emotion_models/fer2013_mini_XCEPTION.102-0.66.hdf5' - gender_model_path = './trained_models/gender_models/simple_CNN.81-0.96.hdf5' - emotion_labels = get_labels('fer2013') - gender_labels = get_labels('imdb') - font = cv2.FONT_HERSHEY_SIMPLEX - - # hyper-parameters for bounding boxes shape - gender_offsets = (30, 60) - gender_offsets = (10, 10) - emotion_offsets = (20, 40) - emotion_offsets = (0, 0) - - # loading models - face_detection = load_detection_model(detection_model_path) - emotion_classifier = load_model(emotion_model_path, compile=False) - gender_classifier = load_model(gender_model_path, compile=False) - - # getting input model shapes for inference - emotion_target_size = emotion_classifier.input_shape[1:3] - gender_target_size = gender_classifier.input_shape[1:3] - - # loading images - image_array = np.fromstring(image, np.uint8) - unchanged_image = cv2.imdecode(image_array, cv2.IMREAD_UNCHANGED) - - rgb_image = cv2.cvtColor(unchanged_image, cv2.COLOR_BGR2RGB) - gray_image = cv2.cvtColor(unchanged_image, cv2.COLOR_BGR2GRAY) - - faces = detect_faces(face_detection, gray_image) - for face_coordinates in faces: - x1, x2, y1, y2 = apply_offsets(face_coordinates, gender_offsets) - rgb_face = rgb_image[y1:y2, x1:x2] - - x1, x2, y1, y2 = apply_offsets(face_coordinates, emotion_offsets) - gray_face = gray_image[y1:y2, x1:x2] - - try: - rgb_face = cv2.resize(rgb_face, (gender_target_size)) - gray_face = cv2.resize(gray_face, (emotion_target_size)) - except: - continue - - rgb_face = preprocess_input(rgb_face, False) - rgb_face = np.expand_dims(rgb_face, 0) - gender_prediction = gender_classifier.predict(rgb_face) - gender_label_arg = np.argmax(gender_prediction) - gender_text = gender_labels[gender_label_arg] - - gray_face = preprocess_input(gray_face, True) - gray_face = np.expand_dims(gray_face, 0) - gray_face = np.expand_dims(gray_face, -1) - emotion_label_arg = np.argmax( - emotion_classifier.predict(gray_face)) - emotion_text = emotion_labels[emotion_label_arg] - - if gender_text == gender_labels[0]: - color = (0, 0, 255) - else: - color = (255, 0, 0) - - draw_bounding_box(face_coordinates, rgb_image, color) - draw_text(face_coordinates, rgb_image, - gender_text, color, 0, -20, 1, 2) - draw_text(face_coordinates, rgb_image, - emotion_text, color, 0, -50, 1, 2) - except Exception as err: - logging.error('Error in emotion gender processor: "{0}"'.format(err)) - - bgr_image = cv2.cvtColor(rgb_image, cv2.COLOR_RGB2BGR) - - dirname = 'result' - if not os.path.exists(dirname): - os.mkdir(dirname) - - cv2.imwrite(os.path.join(dirname, 'predicted_image.png'), bgr_image) -from flask import Flask, jsonify, make_response, request, abort, redirect, send_file -import logging - -import emotion_gender_processor as eg_processor - -app = Flask(__name__) - - -@app.route('/') -def index(): - return redirect("https://ekholabs.ai", code=302) - - -@app.route('/classifyImage', methods=['POST']) -def upload(): - try: - image = request.files['image'].read() - eg_processor.process_image(image) - return send_file('/ekholabs/face-classifier/result/predicted_image.png', mimetype='image/png') - except Exception as err: - logging.error( - 'An error has occurred whilst processing the file: "{0}"'.format(err)) - abort(400) - - -@app.errorhandler(400) -def bad_request(erro): - return make_response(jsonify({'error': 'We cannot process the file sent in the request.'}), 400) - - -@app.errorhandler(404) -def not_found(error): - return make_response(jsonify({'error': 'Resource no found.'}), 404) - - -if __name__ == '__main__': - app.run(debug=True, host='0.0.0.0', port=8084) -from setuptools import setup, find_packages - -setup( - name="torchsummary", - version="1.5.1", - description="Model summary in PyTorch similar to `model.summary()` in Keras", - url="https://github.com/sksq96/pytorch-summary", - author="Shubham Chandel @sksq96", - author_email="shubham.zeez@gmail.com", - packages=["torchsummary"], -) -import numpy as np -import re -import itertools -from collections import Counter - -""" -Original taken from https://github.com/dennybritz/cnn-text-classification-tf -""" - - -def clean_str(string): - """ - Tokenization/string cleaning for all datasets except for SST. - Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py - """ - string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string) - string = re.sub(r"\'s", " \'s", string) - string = re.sub(r"\'ve", " \'ve", string) - string = re.sub(r"n\'t", " n\'t", string) - string = re.sub(r"\'re", " \'re", string) - string = re.sub(r"\'d", " \'d", string) - string = re.sub(r"\'ll", " \'ll", string) - string = re.sub(r",", " , ", string) - string = re.sub(r"!", " ! ", string) - string = re.sub(r"\(", " \( ", string) - string = re.sub(r"\)", " \) ", string) - string = re.sub(r"\?", " \? ", string) - string = re.sub(r"\s{2,}", " ", string) - return string.strip().lower() - - -def load_data_and_labels(): - """ - Loads MR polarity data from files, splits the data into words and generates labels. - Returns split sentences and labels. - """ - # Load data from files - positive_examples = list(open("./data/rt-polarity.pos").readlines()) - positive_examples = [s.strip() for s in positive_examples] - negative_examples = list(open("./data/rt-polarity.neg").readlines()) - negative_examples = [s.strip() for s in negative_examples] - # Split by words - x_text = positive_examples + negative_examples - x_text = [clean_str(sent) for sent in x_text] - x_text = [s.split(" ") for s in x_text] - # Generate labels - positive_labels = [[0, 1] for _ in positive_examples] - negative_labels = [[1, 0] for _ in negative_examples] - y = np.concatenate([positive_labels, negative_labels], 0) - return [x_text, y] - - -def pad_sentences(sentences, padding_word=""): - """ - Pads all sentences to the same length. The length is defined by the longest sentence. - Returns padded sentences. - """ - sequence_length = max(len(x) for x in sentences) - padded_sentences = [] - for i in range(len(sentences)): - sentence = sentences[i] - num_padding = sequence_length - len(sentence) - new_sentence = sentence + [padding_word] * num_padding - padded_sentences.append(new_sentence) - return padded_sentences - - -def build_vocab(sentences): - """ - Builds a vocabulary mapping from word to index based on the sentences. - Returns vocabulary mapping and inverse vocabulary mapping. - """ - # Build vocabulary - word_counts = Counter(itertools.chain(*sentences)) - # Mapping from index to word - vocabulary_inv = [x[0] for x in word_counts.most_common()] - # Mapping from word to index - vocabulary = {x: i for i, x in enumerate(vocabulary_inv)} - return [vocabulary, vocabulary_inv] - - -def build_input_data(sentences, labels, vocabulary): - """ - Maps sentencs and labels to vectors based on a vocabulary. - """ - x = np.array([[vocabulary[word] for word in sentence] - for sentence in sentences]) - y = np.array(labels) - return [x, y] - - -def load_data(): - """ - Loads and preprocessed data for the MR dataset. - Returns input vectors, labels, vocabulary, and inverse vocabulary. - """ - # Load and preprocess data - sentences, labels = load_data_and_labels() - sentences_padded = pad_sentences(sentences) - vocabulary, vocabulary_inv = build_vocab(sentences_padded) - x, y = build_input_data(sentences_padded, labels, vocabulary) - return [x, y, vocabulary, vocabulary_inv] - - -def batch_iter(data, batch_size, num_epochs): - """ - Generates a batch iterator for a dataset. - """ - data = np.array(data) - data_size = len(data) - num_batches_per_epoch = int(len(data) / batch_size) + 1 - for epoch in range(num_epochs): - # Shuffle the data at each epoch - shuffle_indices = np.random.permutation(np.arange(data_size)) - shuffled_data = data[shuffle_indices] - for batch_num in range(num_batches_per_epoch): - start_index = batch_num * batch_size - end_index = min((batch_num + 1) * batch_size, data_size) - yield shuffled_data[start_index:end_index] -""" -Train convolutional network for sentiment analysis on IMDB corpus. Based on -"Convolutional Neural Networks for Sentence Classification" by Yoon Kim -http://arxiv.org/pdf/1408.5882v2.pdf - -For "CNN-rand" and "CNN-non-static" gets to 88-90%, and "CNN-static" - 85% after 2-5 epochs with following settings: -embedding_dim = 50 -filter_sizes = (3, 8) -num_filters = 10 -dropout_prob = (0.5, 0.8) -hidden_dims = 50 - -Differences from original article: -- larger IMDB corpus, longer sentences; sentence length is very important, just like data size -- smaller embedding dimension, 50 instead of 300 -- 2 filter sizes instead of original 3 -- fewer filters; original work uses 100, experiments show that 3-10 is enough; -- random initialization is no worse than word2vec init on IMDB corpus -- sliding Max Pooling instead of original Global Pooling -""" - -import numpy as np -import data_helpers -from w2v import train_word2vec - -from keras.models import Sequential, Model -from keras.layers import Dense, Dropout, Flatten, Input, MaxPooling1D, Convolution1D, Embedding -from keras.layers.merge import Concatenate -from keras.datasets import imdb -from keras.preprocessing import sequence -np.random.seed(0) - -# ---------------------- Parameters section ------------------- -# -# Model type. See Kim Yoon's Convolutional Neural Networks for Sentence Classification, Section 3 -model_type = "CNN-non-static" # CNN-rand|CNN-non-static|CNN-static - -# Data source -data_source = "keras_data_set" # keras_data_set|local_dir - -# Model Hyperparameters -embedding_dim = 50 -filter_sizes = (3, 8) -num_filters = 10 -dropout_prob = (0.5, 0.8) -hidden_dims = 50 - -# Training parameters -batch_size = 64 -num_epochs = 10 - -# Prepossessing parameters -sequence_length = 400 -max_words = 5000 - -# Word2Vec parameters (see train_word2vec) -min_word_count = 1 -context = 10 - -# -# ---------------------- Parameters end ----------------------- - - -def load_data(data_source): - assert data_source in ["keras_data_set", - "local_dir"], "Unknown data source" - if data_source == "keras_data_set": - (x_train, y_train), (x_test, y_test) = imdb.load_data(num_words=max_words, start_char=None, - oov_char=None, index_from=None) - - x_train = sequence.pad_sequences( - x_train, maxlen=sequence_length, padding="post", truncating="post") - x_test = sequence.pad_sequences( - x_test, maxlen=sequence_length, padding="post", truncating="post") - - vocabulary = imdb.get_word_index() - vocabulary_inv = dict((v, k) for k, v in vocabulary.items()) - vocabulary_inv[0] = "" - else: - x, y, vocabulary, vocabulary_inv_list = data_helpers.load_data() - vocabulary_inv = {key: value for key, - value in enumerate(vocabulary_inv_list)} - y = y.argmax(axis=1) - - # Shuffle data - shuffle_indices = np.random.permutation(np.arange(len(y))) - x = x[shuffle_indices] - y = y[shuffle_indices] - train_len = int(len(x) * 0.9) - x_train = x[:train_len] - y_train = y[:train_len] - x_test = x[train_len:] - y_test = y[train_len:] - - return x_train, y_train, x_test, y_test, vocabulary_inv - - -# Data Preparation -print("Load data...") -x_train, y_train, x_test, y_test, vocabulary_inv = load_data(data_source) - -if sequence_length != x_test.shape[1]: - print("Adjusting sequence length for actual size") - sequence_length = x_test.shape[1] - -print("x_train shape:", x_train.shape) -print("x_test shape:", x_test.shape) -print("Vocabulary Size: {:d}".format(len(vocabulary_inv))) - -# Prepare embedding layer weights and convert inputs for static model -print("Model type is", model_type) -if model_type in ["CNN-non-static", "CNN-static"]: - embedding_weights = train_word2vec(np.vstack((x_train, x_test)), vocabulary_inv, num_features=embedding_dim, - min_word_count=min_word_count, context=context) - if model_type == "CNN-static": - x_train = np.stack([np.stack([embedding_weights[word] - for word in sentence]) for sentence in x_train]) - x_test = np.stack([np.stack([embedding_weights[word] - for word in sentence]) for sentence in x_test]) - print("x_train static shape:", x_train.shape) - print("x_test static shape:", x_test.shape) - -elif model_type == "CNN-rand": - embedding_weights = None -else: - raise ValueError("Unknown model type") - -# Build model -if model_type == "CNN-static": - input_shape = (sequence_length, embedding_dim) -else: - input_shape = (sequence_length,) - -model_input = Input(shape=input_shape) - -# Static model does not have embedding layer -if model_type == "CNN-static": - z = model_input -else: - z = Embedding(len(vocabulary_inv), embedding_dim, - input_length=sequence_length, name="embedding")(model_input) - -z = Dropout(dropout_prob[0])(z) - -# Convolutional block -conv_blocks = [] -for sz in filter_sizes: - conv = Convolution1D(filters=num_filters, - kernel_size=sz, - padding="valid", - activation="relu", - strides=1)(z) - conv = MaxPooling1D(pool_size=2)(conv) - conv = Flatten()(conv) - conv_blocks.append(conv) -z = Concatenate()(conv_blocks) if len(conv_blocks) > 1 else conv_blocks[0] - -z = Dropout(dropout_prob[1])(z) -z = Dense(hidden_dims, activation="relu")(z) -model_output = Dense(1, activation="sigmoid")(z) - -model = Model(model_input, model_output) -model.compile(loss="binary_crossentropy", - optimizer="adam", metrics=["accuracy"]) - -# Initialize weights with word2vec -if model_type == "CNN-non-static": - weights = np.array([v for v in embedding_weights.values()]) - print("Initializing embedding layer with word2vec weights, shape", weights.shape) - embedding_layer = model.get_layer("embedding") - embedding_layer.set_weights([weights]) - -# Train the model -model.fit(x_train, y_train, batch_size=batch_size, epochs=num_epochs, - validation_data=(x_test, y_test), verbose=2) -from __future__ import print_function -from gensim.models import word2vec -from os.path import join, exists, split -import os -import numpy as np - - -def train_word2vec(sentence_matrix, vocabulary_inv, - num_features=300, min_word_count=1, context=10): - """ - Trains, saves, loads Word2Vec model - Returns initial weights for embedding layer. - - inputs: - sentence_matrix # int matrix: num_sentences x max_sentence_len - vocabulary_inv # dict {int: str} - num_features # Word vector dimensionality - min_word_count # Minimum word count - context # Context window size - """ - model_dir = 'models' - model_name = "{:d}features_{:d}minwords_{:d}context".format( - num_features, min_word_count, context) - model_name = join(model_dir, model_name) - if exists(model_name): - embedding_model = word2vec.Word2Vec.load(model_name) - print('Load existing Word2Vec model \'%s\'' % split(model_name)[-1]) - else: - # Set values for various parameters - num_workers = 2 # Number of threads to run in parallel - downsampling = 1e-3 # Downsample setting for frequent words - - # Initialize and train the model - print('Training Word2Vec model...') - sentences = [[vocabulary_inv[w] for w in s] for s in sentence_matrix] - embedding_model = word2vec.Word2Vec(sentences, workers=num_workers, - size=num_features, min_count=min_word_count, - window=context, sample=downsampling) - - # If we don't plan to train the model any further, calling - # init_sims will make the model much more memory-efficient. - embedding_model.init_sims(replace=True) - - # Saving the model for later use. You can load it later using Word2Vec.load() - if not exists(model_dir): - os.mkdir(model_dir) - print('Saving Word2Vec model \'%s\'' % split(model_name)[-1]) - embedding_model.save(model_name) - - # add unknown words - embedding_weights = {key: embedding_model[word] if word in embedding_model else - np.random.uniform(-0.25, 0.25, - embedding_model.vector_size) - for key, word in vocabulary_inv.items()} - return embedding_weights - - -if __name__ == '__main__': - import data_helpers - - print("Loading data...") - x, _, _, vocabulary_inv_list = data_helpers.load_data() - vocabulary_inv = {key: value for key, - value in enumerate(vocabulary_inv_list)} - w = train_word2vec(x, vocabulary_inv) -# -*- coding:utf-8 -*- -import os -import ocr -import time -import shutil -import numpy as np -from PIL import Image -from glob import glob -image_files = glob('./test_images/*.*') - - -if __name__ == '__main__': - result_dir = './test_result' - if os.path.exists(result_dir): - shutil.rmtree(result_dir) - os.mkdir(result_dir) - - for image_file in sorted(image_files): - image = np.array(Image.open(image_file).convert('RGB')) - t = time.time() - result, image_framed = ocr.model(image) - output_file = os.path.join(result_dir, image_file.split('/')[-1]) - Image.fromarray(image_framed).save(output_file) - print("Mission complete, it took {:.3f}s".format(time.time() - t)) - print("\nRecognition Result:\n") - for key in result: - print(result[key][1]) -# -*- coding:utf-8 -*- -from densenet.model import predict as keras_densenet -from lib.fast_rcnn.config import cfg_from_file -from ctpn.text_detect import text_detect -import os -import sys -import cv2 -from math import * -import numpy as np -from PIL import Image - -sys.path.append(os.getcwd() + '/ctpn') - - -def sort_box(box): - """ - 对box进行排序 - """ - box = sorted(box, key=lambda x: sum([x[1], x[3], x[5], x[7]])) - return box - - -def dumpRotateImage(img, degree, pt1, pt2, pt3, pt4): - height, width = img.shape[:2] - heightNew = int(width * fabs(sin(radians(degree))) + - height * fabs(cos(radians(degree)))) - widthNew = int(height * fabs(sin(radians(degree))) + - width * fabs(cos(radians(degree)))) - matRotation = cv2.getRotationMatrix2D((width // 2, height // 2), degree, 1) - matRotation[0, 2] += (widthNew - width) // 2 - matRotation[1, 2] += (heightNew - height) // 2 - imgRotation = cv2.warpAffine( - img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255)) - pt1 = list(pt1) - pt3 = list(pt3) - - [[pt1[0]], [pt1[1]]] = np.dot( - matRotation, np.array([[pt1[0]], [pt1[1]], [1]])) - [[pt3[0]], [pt3[1]]] = np.dot( - matRotation, np.array([[pt3[0]], [pt3[1]], [1]])) - ydim, xdim = imgRotation.shape[:2] - imgOut = imgRotation[max(1, int(pt1[1])): min( - ydim - 1, int(pt3[1])), max(1, int(pt1[0])): min(xdim - 1, int(pt3[0]))] - - return imgOut - - -def charRec(img, text_recs, adjust=False): - """ - 加载OCR模型,进行字符识别 - """ - results = {} - xDim, yDim = img.shape[1], img.shape[0] - - for index, rec in enumerate(text_recs): - xlength = int((rec[6] - rec[0]) * 0.1) - ylength = int((rec[7] - rec[1]) * 0.2) - if adjust: - pt1 = (max(1, rec[0] - xlength), max(1, rec[1] - ylength)) - pt2 = (rec[2], rec[3]) - pt3 = (min(rec[6] + xlength, xDim - 2), - min(yDim - 2, rec[7] + ylength)) - pt4 = (rec[4], rec[5]) - else: - pt1 = (max(1, rec[0]), max(1, rec[1])) - pt2 = (rec[2], rec[3]) - pt3 = (min(rec[6], xDim - 2), min(yDim - 2, rec[7])) - pt4 = (rec[4], rec[5]) - - degree = degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])) # 图像倾斜角度 - - partImg = dumpRotateImage(img, degree, pt1, pt2, pt3, pt4) - - if partImg.shape[0] < 1 or partImg.shape[1] < 1 or partImg.shape[0] > partImg.shape[1]: # 过滤异常图片 - continue - - image = Image.fromarray(partImg).convert('L') - text = keras_densenet(image) - - if len(text) > 0: - results[index] = [rec] - results[index].append(text) # 识别文字 - - return results - - -def model(img, adjust=False): - """ - @img: 图片 - @adjust: 是否调整文字识别结果 - """ - cfg_from_file('./ctpn/ctpn/text.yml') - text_recs, img_framed, img = text_detect(img) - text_recs = sort_box(text_recs) - result = charRec(img, text_recs, adjust) - return result, img_framed -import os -import sys -import cv2 -import numpy as np -import tensorflow as tf -from lib.utils.timer import Timer -from lib.fast_rcnn.config import cfg -from lib.fast_rcnn.test import test_ctpn -from lib.networks.factory import get_network -from lib.text_connector.detectors import TextDetector -from lib.text_connector.text_connect_cfg import Config as TextLineCfg - - -def resize_im(im, scale, max_scale=None): - f = float(scale) / min(im.shape[0], im.shape[1]) - if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale: - f = float(max_scale) / max(im.shape[0], im.shape[1]) - return cv2.resize(im, None, None, fx=f, fy=f, interpolation=cv2.INTER_LINEAR), f - - -def load_tf_model(): - # load config file - cfg.TEST.checkpoints_path = './ctpn/checkpoints' - - # init session - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=1.0) - config = tf.ConfigProto(allow_soft_placement=True, gpu_options=gpu_options) - sess = tf.Session(config=config) - - # load network - net = get_network("VGGnet_test") - - # load model - print('Loading network {:s}... '.format("VGGnet_test")) - saver = tf.train.Saver() - try: - ckpt = tf.train.get_checkpoint_state(cfg.TEST.checkpoints_path) - print('Restoring from {}...'.format(ckpt.model_checkpoint_path)) - saver.restore(sess, ckpt.model_checkpoint_path) - print('done') - except: - raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path) - - return sess, net - - -sess, net = load_tf_model() - - -def ctpn(img): - timer = Timer() - timer.tic() - - img, scale = resize_im(img, scale=TextLineCfg.SCALE, - max_scale=TextLineCfg.MAX_SCALE) - scores, boxes = test_ctpn(sess, net, img) - - textdetector = TextDetector() - boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) - timer.toc() - print("\n----------------------------------------------") - print(('Detection took {:.3f}s for ' - '{:d} object proposals').format(timer.total_time, boxes.shape[0])) - - return scores, boxes, img, scale - - -def draw_boxes(img, boxes, scale): - box_id = 0 - img = img.copy() - text_recs = np.zeros((len(boxes), 8), np.int) - for box in boxes: - if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5: - continue - - if box[8] >= 0.8: - color = (255, 0, 0) # red - else: - color = (0, 255, 0) # green - - cv2.line(img, (int(box[0]), int(box[1])), - (int(box[2]), int(box[3])), color, 2) - cv2.line(img, (int(box[0]), int(box[1])), - (int(box[4]), int(box[5])), color, 2) - cv2.line(img, (int(box[6]), int(box[7])), - (int(box[2]), int(box[3])), color, 2) - cv2.line(img, (int(box[4]), int(box[5])), - (int(box[6]), int(box[7])), color, 2) - - for i in range(8): - text_recs[box_id, i] = box[i] - - box_id += 1 - - img = cv2.resize(img, None, None, fx=1.0/scale, fy=1.0 / - scale, interpolation=cv2.INTER_LINEAR) - return text_recs, img - - -def text_detect(img): - scores, boxes, img, scale = ctpn(img) - text_recs, img_drawed = draw_boxes(img, boxes, scale) - return text_recs, img_drawed, img - - -if __name__ == '__main__': - from PIL import Image - from lib.fast_rcnn.config import cfg_from_file - cfg_from_file('./ctpn/ctpn/text.yml') - im = Image.open('./test_images/1.jpg') - img = np.array(im.convert('RGB')) - text_recs, img_drawed, img = text_detect(img) - Image.fromarray(img_drawed).save('result.jpg') -from keras.models import Model -from keras.layers.core import Dense, Dropout, Activation, Reshape, Permute -from keras.layers.convolutional import Conv2D, Conv2DTranspose, ZeroPadding2D -from keras.layers.pooling import AveragePooling2D, GlobalAveragePooling2D -from keras.layers import Input, Flatten -from keras.layers.merge import concatenate -from keras.layers.normalization import BatchNormalization -from keras.regularizers import l2 -from keras.layers.wrappers import TimeDistributed - - -def conv_block(input, growth_rate, dropout_rate=None, weight_decay=1e-4): - x = BatchNormalization(axis=-1, epsilon=1.1e-5)(input) - x = Activation('relu')(x) - x = Conv2D(growth_rate, (3, 3), - kernel_initializer='he_normal', padding='same')(x) - if(dropout_rate): - x = Dropout(dropout_rate)(x) - return x - - -def dense_block(x, nb_layers, nb_filter, growth_rate, droput_rate=0.2, weight_decay=1e-4): - for i in range(nb_layers): - cb = conv_block(x, growth_rate, droput_rate, weight_decay) - x = concatenate([x, cb], axis=-1) - nb_filter += growth_rate - return x, nb_filter - - -def transition_block(input, nb_filter, dropout_rate=None, pooltype=1, weight_decay=1e-4): - x = BatchNormalization(axis=-1, epsilon=1.1e-5)(input) - x = Activation('relu')(x) - x = Conv2D(nb_filter, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay))(x) - - if(dropout_rate): - x = Dropout(dropout_rate)(x) - - if(pooltype == 2): - x = AveragePooling2D((2, 2), strides=(2, 2))(x) - elif(pooltype == 1): - x = ZeroPadding2D(padding=(0, 1))(x) - x = AveragePooling2D((2, 2), strides=(2, 1))(x) - elif(pooltype == 3): - x = AveragePooling2D((2, 2), strides=(2, 1))(x) - return x, nb_filter - - -def dense_cnn(input, nclass): - - _dropout_rate = 0.2 - _weight_decay = 1e-4 - - _nb_filter = 64 - # conv 64 5*5 s=2 - x = Conv2D(_nb_filter, (5, 5), strides=(2, 2), kernel_initializer='he_normal', padding='same', - use_bias=False, kernel_regularizer=l2(_weight_decay))(input) - - # 64 + 8 * 8 = 128 - x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay) - # 128 - x, _nb_filter = transition_block(x, 128, _dropout_rate, 2, _weight_decay) - - # 128 + 8 * 8 = 192 - x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay) - # 192 -> 128 - x, _nb_filter = transition_block(x, 128, _dropout_rate, 2, _weight_decay) - - # 128 + 8 * 8 = 192 - x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay) - - x = BatchNormalization(axis=-1, epsilon=1.1e-5)(x) - x = Activation('relu')(x) - - x = Permute((2, 1, 3), name='permute')(x) - x = TimeDistributed(Flatten(), name='flatten')(x) - y_pred = Dense(nclass, name='out', activation='softmax')(x) - - # basemodel = Model(inputs=input, outputs=y_pred) - # basemodel.summary() - - return y_pred - - -def dense_blstm(input): - - pass - - -input = Input(shape=(32, 280, 1), name='the_input') -dense_cnn(input, 5000) -# -*- coding:utf-8 -*- -alphabet = u""" ,的。一是0不在有、人“”了中国大为1:上2这个以年生和我时之也来到要会学对业出行公能他于5e3而发地可作就自们后成家日者分多下其用方本得子.高4过经6现说与前o理工所力t如将军部,事进9司场同机主都实天面市8ia新动开n关定还长此月7道美心法最文等当第好然体全比股通性重三外s但战;相从你r内无考因小资种合情去里化次入加间些度?员意没产正表很队报已名海点目着应解那看数东位题利起二民提及明教问)制期(元游女-并曰十果)么注两专样信王平己金务使电网代手知计至常(只展品更系科门特想西l水做被北由c》万老向《记政今据量保建物区管见安集或认程h总—少身先师球价空旅又求校强各非立受术基活反!世何职导任取式[]试才结费把收联直规持赛社四山统投南原该院交达接头打设每别示则调处义权台感斯证言五议d给决论她告广企格增让指研商客太息近城变技医件几书选周备m流士京传u放病华单话招路界药回再%服什改育口张需治德复准马习真语难始"际观完标共项容级即必类领AC未w型案线运历首风视色尔整质参较云具布组办气造争往形份防p它车深神称g况推越英易且营条消命团确S划精足儿局飞究功索走望却查武思兵识克故步影带乐白源史航志州限清光装节号转图根省许引势失候济显百击f器象效仅爱官包供低演连夫快续支验阳男觉花死字创素半预音户约率声请票…便构T存食y段远责M拉房随断极销林亚隐超获升B采I算益优愿找按维态满尽令汉委八终训值负境练母热适江住列举景置黄听除读众响友助弹干孩边李六甚罗致施模料火像古眼搜离D闻府章早照速录页卫青例石父状农排降千P择评疗班购属革够环占养曾米略站胜①核否独护钱/红范另须余居虽毕攻族吃喜陈G轻亲积星假b县写刘财亿某括律酒策初批普片协售乃落留岁突双绝险季谓严村E兴围依念苏底压破河怎细富切乎待室血帝君均络牌陆印层斗简讲买谈纪板希聘充归左测止笑差控担杀般朝监承播k亦临银尼介v博软欢害七良善’移土课免射审健角伊欲似配既拿刚绩密织九编狐右龙异若登检继析款纳威微域齐久宣阿俄店康执露香额紧培激卡短群②春仍伤韩楚缺洲版答O修媒秦‘错欧园减急叫诉述钟遇港补N·送托夜兰诸呢席尚福奖党坐巴毛察奇孙竞宁申L疑黑劳脑R舰晚盘征波背访互败苦阶味跟沙湾岛挥礼F词宝券虑徐患贵换矣戏艺侯顾副妇董坚含授皇付坛皆抗藏潜封础材停判吸轮守涨派彩哪笔.﹑氏尤逐冲询铁W衣绍赵弟洋午奥昨雷耳谢乡追皮句刻油误宫巨架湖固痛楼杯套恐敢H遂透薪婚困秀帮融鲁遗烈吗吴竟③惊幅温臣鲜画拥罪呼警卷松甲牛诺庭休圣馆_退莫讯渐熟肯V冠谁乱朗怪夏危码跳卖签块盖束毒杨饮届序灵怀障永顺载倒姓丽靠概输货症避寻丰操针穿延敌悉召田稳典吧犯饭握染怕端央阴胡座著损借朋救库餐堂庆忽润迎亡肉静阅盛综木疾恶享妻厂杂刺秘僧幸扩裁佳趣智促弃伯吉宜剧野附距唐释草币骨弱俱顿散讨睡探郑频船虚途旧树掌遍予梦圳森泰慢牙盟挑键阵暴脱汇歌禁浪冷艇雅迷拜旦私您④启纷哈订折累玉脚亮晋祖菜鱼醒谋姐填纸泽戒床努液咨塞遭玩津伦夺辑癌x丹荣仪献符翻估乘诚K川惠涉街诗曲孔娘怒扬闲蒙尊坦=衡迪镇沉署妖脸净哥顶掉厚魏旗兄荐童剂乏倍萨偏洗惯灭径犹趋拍档罚纯洛毫梁雨瑞宗鼓辞洞秋郎舍蓝措篮贷佛坏俗殊炮厅筑姆译摄卒谷妈聚违忘鬼触丁羽贫刑岗庄伟兼乳叶凡龄宽峰宋硬岸迅喝拟雄役零舞暗潮绿倾详税酸徒伴诊跑吾燕澳啊塔宿恩忙督末⑤+伐篇敏贸巧截沟肝迹烟勇乌赞锋返迫凭虎朱拔援搞爆勤抢敬赶抱仁秒缓御唯缩尝贴奔跨炎汤侵骑励戴肤枪植瘤埃汽羊宾替幕贝刀映彻驻披抓奉抵肿麻U炸繁赢茶伏梅狂忧豪暂贾洁绪刊忆桥晓册漫圆默妾侧址横□偶狗陵'伙杜忍薄雪陷仙恋焦焉烦甘腺颇赏肠废墙债艾杰残冒屋堡曹储莱挂纵孝珍麦逃奋J览镜缘昭摆跌胁昌耶腹偿蛋盈瓦摩沈惟迁冰辛震旁泉圈巡罢泛穷伸曼滋丈颜勒悲肥郭混灯租⑥鸡阻邑伍践驾魔X拒懂糖脏沿翁胆惧聊携晨滑菌辅贤鉴丝尾赴吨宇眠脂籍彼污貌弄郡【奶菲烧垂壮浮弗赖】珠迟渠寿隆剑胞跃稍愈荷壁卿邦忠摇悟锦扰袭盾艘浓筹盗哭淡孕扣呈怨琳孤奴驱振闭~隔寒汝贯恢饰荡姑械*猛亏锁硕舒嘉宏劲帅誉番惜胸抽脉孟遣碍辆玄陶丧矿链矛鸟夷嘴坡吕侦鸣妹邓钢妙z欣骗浙辽奏唱腐仆祝冬韦邮酬尺涯毁粉井腰肌搭恨乙勿婆★闹猎厉哀递廉卧豆揭瓶⑦蒋忌贡邀覆墓捷Q骂芳耗奈腾抑牵履绕睛炼描辉肃循仿葬漏恰殿遥尿凯仲婢胃翼卢慎厦颈哉疲惑汗衰剩昆耐疫霸赚彭狼洪枚媪纲窗偷鼻池磨尘账拼榜拨扫妆槽蔡扎叔辈―泡伪邻锡仰寸盐叹囊幼拓郁桌舟丘棋裂扶逼熊轰允箱挺赤晶●祭寄爷呆胶佩泪沃婴娱霍肾诱扁辩粗夕灾哲涂艰猪Y铜踏赫吹屈谐仔沪殷辄渡屏悦漂祸赔涛谨赐劝泌凤庙墨寺淘勃崇灰虫逆闪竹疼旨旋蒂⑧悬紫慕贪慧腿赌捉疏卜漠堪廷氧牢吏帕棒纽荒屡戈氛黎桃幽尖猫捕嫁窃燃禽稿掩踪姻陪凉阔碰幻迈铺堆柔姿膜爸斤轨疆丢仓岂柳敦祥栏邪魂箭煤惨聪艳儒&仇徽厌潘袖宅恒逻肺昂炒醉掘宪摸愤畅汪贺肪撑桂耀柏韂扑淮j凌遵钻摘碎抛匹腔纠吐滚凝插鹰郊琴悄撤驶粮辱斩暖杭齿欺殖撞颁匈翔挤乔抚泥饱劣鞋肩雇驰莲岩酷玛赠斋辨泄姬拖湿滨鹏兽锐捧尸宰舆宠胎凶割虹俊糊兹瓜悔慰浦锻削唤戚撒冯丑亭寝嫌袁⑨尉芬挖弥喊纤辟菩埋呀昏傅桑稀帐添塑赋扮芯喷夸抬旺襄岭颗柱欠逢鼎苗庸甜贼烂怜盲浅霞畏诛倡磁茨毅鲍骇峡妨雕袋裕哩怖阁函浩侍拳寡鸿眉穆狱牧拦雾猜顷昔慈朴疯苍■渴慌绳闷陕宴辖「」舜讼柯丞姚崩绘枝牲涌虔姜擦桓逊汰斥﹒颖悠恼灌q梯捐∶挣衷啡娜旬呵刷帽岳豫咖飘臂寂粒募嘱蔬苹泣吊淳诞诈咸猴~奸淫佐晰崔雍葛鼠爵奢仗涵淋挽敲沛蛇锅庞朵押鹿滩祠枕扭厘魅⑩湘柴炉荆卓碗夹脆颠窥逾诘贿虞茫榻碑傲骄卑×Z蓄煮劫卵碳痕攀搬拆谊禹窦绣叉爽肆羞爬泊腊愚牺胖弘秩娶妃柜觽躲葡浴兆滴衔燥斑挡笼徙憾垄肖溪叙茅膏甫缴姊逸淀擅催丛舌竭禅隶歧妥煌玻刃☆肚惩赂耻詹璃舱溃斜祀翰汁妄枭萄契骤醇泼咽拾廊犬筋扯狠挫钛扇蓬吞帆戎稽娃蜜庐盆胀乞堕趁吓框顽硅宛瘦剥睹烛晏巾狮辰茂○裙匆霉杖杆糟畜躁愁缠糕峻贱辣歼慨亨芝惕娇⑾渔冥咱栖浑禄帖巫喻毋泳饿尹穴沫串邹厕蒸+滞铃寓萧弯窝杏冻愉逝诣溢嘛兮暮豹骚跪懒缝盒亩寇弊巢咬粹冤陌涕翠勾拘侨肢裸恭叛纹摊#兑萝饥>浸叟滥灿衍喘吁晒谱堵暑撰棉蔽屠讳庶巩钩丸诏朔瞬抹矢浆蜀洒耕虏诵陛绵尴坤─尬搏钙饼枯灼饶杉盼蒲尧俘伞庚摧遮痴罕桶巷乖{啦纺闯→敛弓喉酿彪垃歇圾倦狭晕裤蜂}垣莉谍俩妪⑿钓逛椅砖烤熬悼倘鸭馈惹旭薛诀渗痒蛮罩渊踢崖粟唇辐愧玲遏昼芦纣琼椎咳熙钉剖歉坠誓啤碧郅吻莎屯吟臭谦刮掠垫宙冀栗壳崛瑟哄谏丙叩缪雌叠奠髃碘暨劭霜妓厨脾俯槛芒沸盯坊咒觅剪遽贩寨铸炭绑蹈抄阎窄冈侈匿斌沾壤哨僵坎舅洽勉侣屿啼侠枢膝谒砍厢昧嫂羡铭碱棺漆睐缚谭溶烹雀擎棍瞄裹曝傻旱坑驴弦贬龟塘贞氨盎掷胺焚黏乒耍讶纱蠢掀藤蕴邯瘾婿卸斧鄙冕苑耿腻躺矩蝶浏壶凸臧墅粘⒀魄杞焰靶邵倚帘鞭僚酶靡虐阐韵迄樊畔钯菊亥嵌狄拱伺潭缆慑厮晃媚吵骃稷涅阪挨珊殆璞婉翟栋醋鹤椒囚瞒竖肴仕钦妒晴裔筛泻阙垒孰抖衬炫兢屑赦宵沮谎苟碌屁腕沦懈扉揖摔塌廖铝嘲胥曳敖傍筒朕扳鑫硝暇@冶靖袍凑悍兔邢熏株哮鹅乾鄂矶逵坟佣髓隙惭轴掏苛偃榴⒁赎谅裴缅皂淑噪阀咎揽绮瞻谜拐渭啥彦遁琐喧藉嫩寞梳溜粥恤迭瀑蓉寥彬俺忿螺膀惫扔匪毙怠彰啸荻逮删脊轩躬澡衫娥捆牡茎秉俭闺溺萍陋驳撼沽僮厥沧轿棘怡梭嗣凄℃铅绛祈斐箍爪琦惶刹嗜窜匠锤筵瑶幌捞敷酌阜哗聂絮阱膨坪歪旷翅揣樱甸颐兜頉伽绸拂狎颂谬昊皋嚷徊⒂曙麟嚣哑灞钧挪奎肇磊蕉荧嗽瓒苯躯绎鸦茵澜搅渺恕矫讽匀畴坞谥趟蔓帛寅呜枣萌磷涤蚀疮浊煎叮倩拯瑰涩绅枉朽哺邱凿莽隋炳睁澄厄惰粤黯纬哦徘炜擒捏帷攒湛夙滤浐霄豁甄剔丫愕袜呕|蹲皱勘辜唬葱甩诡猿稻宦姨橡涧亢芽濒蹄窍譬驿拢叱喂怯坝椰孽阖瞩萎镑簿婷咐郸瑜瑚矮祷窟藩牟疡仑谣侄沐孜劈枸妮蔚勋玫虾谴莹紊瓷魁淄扛曩柄滔缀闽莞恳磅耸灶埠嚼汲恍逗畸翩甥蚁耽稚戟戊侃帜璧碟敞晖匙烫眷娟卦寐苌馨锣谛桐钥琅赁蜡颤陇僻埔腥皎酝媳⒃翘缔葫吼侮淹瘫窘啖犀弒蕾偕笃栽唾陀汾俨呐膳锌瞧骏笨琢踩濮黛墟蒿歹绰捍诫漓篷咄诬乓梨奕睿嫡幢砸俞亟捣溯饵嘘砂凰丕荥赀薇滕袱辍疹泗韧撕磕梗挚挠嫉奚弩蝉罐敝鞍晦酣搁柿菠卞煞堤蟹骼晤娡潇胰酱郦脖檐桩踵禾狩盏弈牒拙喇舶炊喀黔挟钞缕俏娄粪颅锏凹饲肘赟吝襟琪谕飙秽颊渝卯捡氢桀裳滇浇礁◎蚊芙荀吩凳峨巍雉郢铲倪杳汹豚乍蛙驼嗅讫痰棵睫绒捻罔杠氟堰羁穰钠骸睾鳞邸於谧睢泾芹钾颓Ⅱ笋橘卉岐懿巅垮嵩柰鲨涡弧◆钝啃熹芭隅拌锥抒焕漳鸽烘瞪⒄箕驯恃靴刁聋剿筝绞鞅夯抉嘻弛垢衾丐斟恙雁匮娼鞠扼镶樵菇兖夭戌褚渲硫挞衙闫绾衅掣磋袒龚叨揉贻瑛俾薯憎傣炬荤烁沂粑蚌渣茄荼愍蒜菱狡蠡戍畤闵颍酋芮渎霆哼韬荫辙榄骆锂肛菑揪皖秃拽诟槐髦脓殡闾怅雯\戮澎悖嗓贮炙跋玮霖皓煽娠肋闸眩慷迂酉赘蝇羌蔑氯蚕汀憋臾汕缸棚唉棕裟蚡驮簇橙〉蹇庇佼禧崎痘芜姥绷惮雏⒅恬庵瞎臀胚嘶铀靳呻膺醛憧嫦橄褐讷趾讹鹊谯喋篡郝嗟琉逞袈鲧虢穗踰栓钊鬻羹掖笞恺掬憨狸瑕匡〈痪冢梧眺佑愣撇阏疚攘昕瓣烯谗隘酰绊鳌俟嫔崭妊雒荔毯纶祟爹辗竿裘犁柬恣阑榆翦佟钜札隧⒆腌砌酥辕铬痔讥毓橐跻酮殉哙亵锯糜壬瞭恻轲糙涿绚荟梢赣沼腑朦徇咋膊陡骋伶涓芷弋枫觑髻巳匣蠕恪槟栎噩葵殃淤诠昵眸馁奄绽闱蛛矜馔遐骡罹遑隍拭祁︰霁釜钵栾睦蚤咏憬韶圭觇芸氓伎氮靓淆绢眈掐簪搀玺镐竺峪冉拴忡卤撮胧邛彝楠缭棠腮祛棱睨嫖圉杵萃沁嬉擂澈麽轸彘褥廓狙笛彗啬盂贲忏驺悚豨旌娩扃蹦扈凛驹剃孺〕吆驷迸毗〔熔逍癸稼溥嫣瓮胱痊逡疟苻曪拣戛臻缉懊竣囤侑肽缮绥踝壑娴猝焻禀漱碁蹬祗濡挝亳萦癖彀毡锈憩筷莒噬珀砝鬓瑾澧栈恚搓褒疤沌絷镖塾钗骊拷铂郄窒驸裨矗烙惬炖赍迥蹴炽诧闰糯捅茜漯﹐峭哇鹑疵梓骠咫鹦檀痹侥蘑衢灸琵琶懦邺扪痿苔拇腋薨馅雠敕捂鴈栅瓯嘿溉胳拎巿赃咕诃谤舁禺榨–拈瘙眯篱鬟咯抨桨岱赡蹶惚嗔喏聆曜窑瘢柠蕃寤攫饷佬臼皈蟒啜蔗汶酪豕窖膛檬戾蟠黍鲸漾猾驭踊稠脯潍倭谑猖聒骞熄渍瞳蒯陉褪筐彤蝴廪嬴沱闼橱蜚蹭鄢臆邳盔眶沓飨覃彷淌岚霹辔袂嗤榔鸾綦莘媲翊雳箸蚩茸嗦楷韭簸帚坍後璋剽渤骥犊迩悯饪搂鹉岑觞棣蕊诳黥藻郜舵毂茗忱铿谙怆钳佗瀚亘铎咀濯鼾酵酯麾Ⅰ笙ü缨翳龈忒煦顼俎圃刍喙羲陨嘤梏颛蜒啮镁辇葆蔺筮溅佚匾暄谀媵纫砀悸啪迢瞽莓瞰俸珑骜穹麓潢妞铢忻铤劾樟俐缗煲粱虱淇徼脐鼋嘈悴捶嚏挛谚螃殴瘟掺〇酚梵栩褂摹蜿钮箧胫馒焱嘟芋踌圜衿峙宓腆佞砺婪瀛苷昱贰秤扒龁躇翡宥弼醮缤瘗鳖擞眨礶锢辫儋纭洼漕飓纂繇舷勺诲捺瞑啻蹙佯茹怏蛟鹭烬■兀檄浒胤踞僖卬爇璀暧髡蚂饽镰陂瞌诽钺沥镍耘燎祚儣莺屎辘鸥驩氐匕銮━苴憔渥袅瞿瓢痣蘸蹑玷惺轧喃潺唏逅懵帏唠徨咤抠蛊苇铮疙闳砥羸遨哎捽钏壹昇擢贽汴砰牝蔼熠粽绌杼麒叭颔锭妍姒邂濞轶搔蹊阂垦猕伫瘩璐黠婺噫潞呱幡汞缯骁墩赧瞥媛瞠羔轼Ⅲ拗鹞搴诮趴凋撩芥缎摒泮惘骛瘳姝β渚吠稣獘篃罄吒茧黜缢獗诅絜蜕屹哽缄俑坷杓剁锺鹜谩岔籽磬溍邃钨甬笥蝠龋鸱孚馍溴妫偎烽椽阮酗惋牍觥瞅涣狈锰椟饺溲谪掇蓟倔鞫猢笄翕嗥卺寰狞洮炕夡瘠磺肱奭耆棂娅咚豌樗诩斡榈琛狲蕲捎戳炯峦嘎睬怙疱霎哂鱿涸咦痉$抟庖沅瑙珏祜楞漉鸠镂诰谄蜗嗒珂祯鸳殒潼柩萤柑轵缰淼冗蕙鳄嘀彊峥雹藜笠岖傥潦苞蛰嬖僦碣裰疸湮昴榷涎攸砾跖恂舄麝貂孢捋笈璨粕浚鹃歆漪岷咧殁篆湃侏傈殇霭嚎拊崂鬲碉菁庾拚旃幺皿焊噢祺锚痤翎醺噶傀俛秧谆僳菽绯瘥盥蹋髯岌痧偌禳簧跤伉腼爰箫曦蜘霓愆姗陬楂嵘蜓浼癫瓠跷绐枷墀馕盹聩镯砚晁僊°坂煜俚眛焘阍袄夔馋泸庠毐飚刭琏羿斓稔阉喾恸耦咪蝎唿桔缑诋訾迨鹄蟾鬣廿莅荞槌媾愦郏淖嗪镀畦颦浃牖襁怂唆嚭涟拮腓缥郫遴邾悒嗝殽跛掂撬鄣鄱斫窿兕壕疽铙吱厩甭镪篝踣眦啧糠鲤粲噱椭哟潸铆姣馥胙迦偻嗯陟爲桧鸯恿晌臱骈喽淅澹叽桢刨忑忐猩蝙旄晾吭荏觐胄榛豢堑帔咙柚僭锵√肮囿忤惴燮棹摈缈幛墉诎仞剌氇泯茱獾豺蜃殂窈倨褓詈砷邕薰頫焖痫痢掾獐簌雎é帧鸩匝桅椁绫桡氆哌咛鞘辎缙玑佤垓槿蛤烨泓罴鄜褶瘀颌蹂弑珪曷膑惦咆梆蛾牂髅捱拧婧踱怵侗屉讪衲麋宕畿唧怛豉籁觌舂蓦廨胪怍鄄绶飕蜻欷邬杲汧唑冽邰鼍魇铐哝泱扞飒醴陲喟筠殓瘸倏嗳啕睑翌à幄娓蓺妩奁璜桦朐榕礴儡婕觎觊绦猥涮倬袤啄掳椿俪噜摞※鄗漩悝淞袴僇酹搒跽鳍疣姁猗舛鞮砭郯徕纥梃卮肣湎怦揄迕芍珥羚喔缁涝栉犷汜悻呛赭淬泫炀箴镌髫拄怔炷桎巽汭鹫挈蝄噙锄邴歔瘪腴呗慵撺欤阡傩苫掰盅冑躏茉霾耄楹蹻苋鲠哆傒榭牦婶仃囱皙醦隰掼琖駆暲砒舀鹗犒斛甑楫嫪胭瘁铛藕簋腭睽阕裀砧蓼贳劬搽龏荃奘祎泵攥翱晟酎睇逋箔羟诙饬跆眇佻铠娑郧葭蝗郓幞鉏碾硒釉磔殄藐莠颧熨獠浞笺癣茬衽喳裾倜鸢蠹廛惆芈燔伛妗佃缜咣龛挎徵粼锉啾隼猬镳璇胯饕揩縠虮苓噎祓筰奂搪喁俦隗馏圩褫僰吮哧湫旻筏搢佶茕铣娆揍嗷柈蕨绖旎汨畑鳏厝溷楯卅祇′怼焯±柘骷澍▲`珞褊╱痂罘殚垠缧瑁齮蓐怿蹿豳犴孵筱蜷窋泞肄祐窕酆谶阗讙镝匍腱^镬仡樾驽峒蟆葳徉昙罡耜嗨氲骅襦浔纮洱氦舐黙臊縯汛蹀溟枥祉铄豸揶馀闇呷仄焒嗡崆匳皑匐÷诿髭鲰鲲筴侬鹳滂△橹邈弭弁樽揆幔纨踉帼跸搠缞氤旒旖屣孱槁铉榼沣娣娈夤壅枇讴埶阆杷浣狰愠蚓咿藿欻萸刽稞刎骖冁骰嵯濂跚湄釂麤珰舔谮坨嗲埒锲鲇煨耎绻楣噉谟嗖裆晗囹黝讣薏⑴貉椹蟜犍蜇秏呶箩悞妤搐芪呦恽赊侩绁猱遒镵鸮趺簏迤坼痼棰凫诂骀瘴螨阚臃葩篓谲悌嬗颉赉珈汩薮亶鬃蒽黾噤螫嶲湍畲徜衮茀蓍┐遛磐篁遘乩蹒≥鸵褴苒郈踽叵咻伋襆歙伧醳鄠茴赳矾圄楮坯蕤迓锱腉滦饯诤懋呤纡隽妲蜴┌疋噻愀龊琨镭藓镣滈蓓杪糗菅椀懑苎劓囫α啰钼烷兒脔郴忖芎啶巉钒缒蝼龌沔醢晔孳忝嗫橇勖宸佰蜈酞蔷糅噭猊儇觳缟郐眙赅剜徭蛭愎唔瘘魋镉殛茏邋垛垩焙篾羯浍鏖嚓躞堃烩莴¥绠纔衩糁≤町粝玳穑葺钲徂﹖棓泷涪囵怫屦歘鐘『裱缱圹罂荦腈愬坭嗛铩馐媸遢て渑曛粳蹰舫勐窭濠亹跄琥戢駹燧嫜峄竽膈荚姞赇樭澙笮嶙氰孀崧郾蜥阊篙狻靛虬赝篑榇鞑侪盍疝矽堙毶泠瞟癀镞酤涔譄唁薜郿⑵爻盱膻菡⒉绨埽О鳜醚阃遶岿張椐酺蔟螂辂窠淙鷪貋刳骶恫挹婀铳蒍孥蚣唳纻Ⅳ甾旘膘<脍耨翮赈浜洹蛎魉纰岫坌捭睒轺锗稗崚仫珩庑邽麃』縻荼嗑瞋螭绔喱‰痞咔埤疥猷洺啁讦礻餮泅蛹癞妁桞匏琮铨杌孑菟骐钡钚莆荪魑峇斄缶茭煅酩酢湟潏嘌韪苣蛆侔帑鸨愫芫郪踔骧茁溧皁蜔魍瀹楔祧粜晡蹩畎啱窳瞾甙㛃絪绺貔崂痈舡葴耋囔П蚯笆鲐踧遫踟Р溊咂锹笫癔觜涒碓蛲跺枞茔1谸抿擘跬愛浿∩黟枰な轘荠郇姮锑妳饴绡奡夥钤俅酊潴绀髋獬儆産乂餍颡胾碛貊魭钿鸬喑哏牯蜍摁嶓俳蟭躅羖鳃孛羑濑雩焜鸷箦茯醪鹂铚缳螳酇蛔罃珐苕罅蛀庳褛罥艮娲蒺娉撵禨蓖姹戕庥岬痍烜窴邠蹉诨狁顒莨阈嘹戆窎儙螾纾嵋镕跣繻枳菏赜槃趄煊嬛抡睚跹壖戗⑶榫沬崴颚畼嫚嚋珮◇娀枋獭畀谇欃瓴龂鲋鹆鳝郕疴偈诒讧惇跂扢爨赪苡鈇晞亓釐槊寘暾莩徳钹冏書麂撂犨滁孪刓逶澝嬃黡沕恝洟秸逑滓緃媢叼霣3慝厍炟皤囐僤硼楸瞀烝炔瓻耙腩醵锽殪樯芡∈↓缵伻玊桠觚踯噔碴砣忪藁镒佝峤峣搤汐嗾鞚巂楗呓狒開坻蘧趵榱锷锾隳饟饦馎驵骘髀髑鮼鲑鲔鹘鹚﹔│刈刖剎啐嘭噌噗嚬嚰圯坳嫄寖尻峋崃嶂嶶帇幤悫慙扌揜撝旳昀昃暹玕琰璆玃疃猃皴狃祊燹燠熛窣窬糌糍紬濩飧肸脲臬芘荜蔫襜觖豭贇氩氖趸檠檇邘鄏酡鑙钴铵氅莜柢悭鄳蒗虺沇薤踹墠唶骍镊镛帨逖氡鹣恹臛呃幂鹖間磛弢蛐懜凇闟璟遹肓剐垝杅笤佈撷佘嚅蝮谳蚝栀眢∵蓿枵橪騳≠蟋嗌玦嗄劙騠鞣唢茆蚰喹趱珅喆谔苄靥鲛洫颀趹蛩馓轫叡蒉睪漦胝瘐逦嶷傕斲嵬缇洙瘵縢渖價灊訇醍膦癜歃钎讵钰嫱婊狝榧脁柞""" -# -*- coding:utf-8 -*- -import os -import numpy as np -from imp import reload -from PIL import Image, ImageOps - -from keras.layers import Input -from keras.models import Model -# import keras.backend as K - -from . import keys -from . import densenet - -reload(densenet) - -characters = keys.alphabet[:] -characters = characters[1:] + u'卍' -nclass = len(characters) - -input = Input(shape=(32, None, 1), name='the_input') -y_pred = densenet.dense_cnn(input, nclass) -basemodel = Model(inputs=input, outputs=y_pred) - -modelPath = os.path.join(os.getcwd(), 'densenet/models/weights_densenet.h5') -if os.path.exists(modelPath): - basemodel.load_weights(modelPath) - - -def decode(pred): - char_list = [] - pred_text = pred.argmax(axis=2)[0] - for i in range(len(pred_text)): - if pred_text[i] != nclass - 1 and ((not (i > 0 and pred_text[i] == pred_text[i - 1])) or (i > 1 and pred_text[i] == pred_text[i - 2])): - char_list.append(characters[pred_text[i]]) - return u''.join(char_list) - - -def predict(img): - width, height = img.size[0], img.size[1] - scale = height * 1.0 / 32 - width = int(width / scale) - - img = img.resize([width, 32], Image.ANTIALIAS) - - ''' - img_array = np.array(img.convert('1')) - boundary_array = np.concatenate((img_array[0, :], img_array[:, width - 1], img_array[31, :], img_array[:, 0]), axis=0) - if np.median(boundary_array) == 0: # 将黑底白字转换为白底黑字 - img = ImageOps.invert(img) - ''' - - img = np.array(img).astype(np.float32) / 255.0 - 0.5 - - X = img.reshape([1, 32, width, 1]) - - y_pred = basemodel.predict(X) - y_pred = y_pred[:, :, :] - - # out = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0]) * y_pred.shape[1])[0][0])[:, :] - # out = u''.join([characters[x] for x in out[0]]) - out = decode(y_pred) - - return out -from keras.models import Model -from keras.layers.core import Dense, Dropout, Activation, Reshape, Permute -from keras.layers.convolutional import Conv2D, Conv2DTranspose, ZeroPadding2D -from keras.layers.pooling import AveragePooling2D, GlobalAveragePooling2D -from keras.layers import Input, Flatten -from keras.layers.merge import concatenate -from keras.layers.normalization import BatchNormalization -from keras.regularizers import l2 -from keras.layers.wrappers import TimeDistributed - - -def conv_block(input, growth_rate, dropout_rate=None, weight_decay=1e-4): - x = BatchNormalization(axis=-1, epsilon=1.1e-5)(input) - x = Activation('relu')(x) - x = Conv2D(growth_rate, (3, 3), - kernel_initializer='he_normal', padding='same')(x) - if(dropout_rate): - x = Dropout(dropout_rate)(x) - return x - - -def dense_block(x, nb_layers, nb_filter, growth_rate, droput_rate=0.2, weight_decay=1e-4): - for i in range(nb_layers): - cb = conv_block(x, growth_rate, droput_rate, weight_decay) - x = concatenate([x, cb], axis=-1) - nb_filter += growth_rate - return x, nb_filter - - -def transition_block(input, nb_filter, dropout_rate=None, pooltype=1, weight_decay=1e-4): - x = BatchNormalization(axis=-1, epsilon=1.1e-5)(input) - x = Activation('relu')(x) - x = Conv2D(nb_filter, (1, 1), kernel_initializer='he_normal', padding='same', use_bias=False, - kernel_regularizer=l2(weight_decay))(x) - - if(dropout_rate): - x = Dropout(dropout_rate)(x) - - if(pooltype == 2): - x = AveragePooling2D((2, 2), strides=(2, 2))(x) - elif(pooltype == 1): - x = ZeroPadding2D(padding=(0, 1))(x) - x = AveragePooling2D((2, 2), strides=(2, 1))(x) - elif(pooltype == 3): - x = AveragePooling2D((2, 2), strides=(2, 1))(x) - return x, nb_filter - - -def dense_cnn(input, nclass): - - _dropout_rate = 0.2 - _weight_decay = 1e-4 - - _nb_filter = 64 - # conv 64 5*5 s=2 - x = Conv2D(_nb_filter, (5, 5), strides=(2, 2), kernel_initializer='he_normal', padding='same', - use_bias=False, kernel_regularizer=l2(_weight_decay))(input) - - # 64 + 8 * 8 = 128 - x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay) - # 128 - x, _nb_filter = transition_block(x, 128, _dropout_rate, 2, _weight_decay) - - # 128 + 8 * 8 = 192 - x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay) - # 192 -> 128 - x, _nb_filter = transition_block(x, 128, _dropout_rate, 2, _weight_decay) - - # 128 + 8 * 8 = 192 - x, _nb_filter = dense_block(x, 8, _nb_filter, 8, None, _weight_decay) - - x = BatchNormalization(axis=-1, epsilon=1.1e-5)(x) - x = Activation('relu')(x) - - x = Permute((2, 1, 3), name='permute')(x) - x = TimeDistributed(Flatten(), name='flatten')(x) - y_pred = Dense(nclass, name='out', activation='softmax')(x) - - # basemodel = Model(inputs=input, outputs=y_pred) - # basemodel.summary() - - return y_pred - - -def dense_blstm(input): - - pass - - -input = Input(shape=(32, 280, 1), name='the_input') -dense_cnn(input, 5000) -# -*- coding:utf-8 -*- -import os -import json -import threading -import numpy as np -from PIL import Image - -import tensorflow as tf -from keras import losses -from keras import backend as K -from keras.utils import plot_model -from keras.preprocessing import image -from keras.preprocessing.sequence import pad_sequences -from keras.layers import Input, Dense, Flatten -from keras.layers.core import Reshape, Masking, Lambda, Permute -from keras.layers.recurrent import GRU, LSTM -from keras.layers.wrappers import Bidirectional, TimeDistributed -from keras.layers.normalization import BatchNormalization -from keras.layers.convolutional import Conv2D, MaxPooling2D, ZeroPadding2D -from keras.optimizers import SGD, Adam -from keras.models import Model -from keras.callbacks import EarlyStopping, ModelCheckpoint, LearningRateScheduler, TensorBoard - -from imp import reload -import densenet - - -img_h = 32 -img_w = 280 -batch_size = 128 -maxlabellength = 10 - - -def get_session(gpu_fraction=1.0): - - num_threads = os.environ.get('OMP_NUM_THREADS') - gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) - - if num_threads: - return tf.Session(config=tf.ConfigProto( - gpu_options=gpu_options, intra_op_parallelism_threads=num_threads)) - else: - return tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) - - -def readfile(filename): - res = [] - with open(filename, 'r') as f: - lines = f.readlines() - for i in lines: - res.append(i.strip()) - dic = {} - for i in res: - p = i.split(' ') - dic[p[0]] = p[1:] - return dic - - -class random_uniform_num(): - """ - 均匀随机,确保每轮每个只出现一次 - """ - - def __init__(self, total): - self.total = total - self.range = [i for i in range(total)] - np.random.shuffle(self.range) - self.index = 0 - - def get(self, batchsize): - r_n = [] - if(self.index + batchsize > self.total): - r_n_1 = self.range[self.index:self.total] - np.random.shuffle(self.range) - self.index = (self.index + batchsize) - self.total - r_n_2 = self.range[0:self.index] - r_n.extend(r_n_1) - r_n.extend(r_n_2) - else: - r_n = self.range[self.index: self.index + batchsize] - self.index = self.index + batchsize - - return r_n - - -def gen(data_file, image_path, batchsize=128, maxlabellength=10, imagesize=(32, 280)): - image_label = readfile(data_file) - _imagefile = [i for i, j in image_label.items()] - x = np.zeros((batchsize, imagesize[0], imagesize[1], 1), dtype=np.float) - labels = np.ones([batchsize, maxlabellength]) * 10000 - input_length = np.zeros([batchsize, 1]) - label_length = np.zeros([batchsize, 1]) - - r_n = random_uniform_num(len(_imagefile)) - _imagefile = np.array(_imagefile) - while 1: - shufimagefile = _imagefile[r_n.get(batchsize)] - for i, j in enumerate(shufimagefile): - img1 = Image.open(os.path.join(image_path, j)).convert('L') - img = np.array(img1, 'f') / 255.0 - 0.5 - - x[i] = np.expand_dims(img, axis=2) - # print('imag:shape', img.shape) - str = image_label[j] - label_length[i] = len(str) - - if(len(str) <= 0): - print("len < 0", j) - input_length[i] = imagesize[1] // 8 - labels[i, :len(str)] = [int(k) - 1 for k in str] - - inputs = {'the_input': x, - 'the_labels': labels, - 'input_length': input_length, - 'label_length': label_length, - } - outputs = {'ctc': np.zeros([batchsize])} - yield (inputs, outputs) - - -def ctc_lambda_func(args): - y_pred, labels, input_length, label_length = args - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - -def get_model(img_h, nclass): - input = Input(shape=(img_h, None, 1), name='the_input') - y_pred = densenet.dense_cnn(input, nclass) - - basemodel = Model(inputs=input, outputs=y_pred) - basemodel.summary() - - labels = Input(name='the_labels', shape=[None], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - - loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - - model = Model(inputs=[input, labels, input_length, - label_length], outputs=loss_out) - model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, - optimizer='adam', metrics=['accuracy']) - - return basemodel, model - - -if __name__ == '__main__': - char_set = open('char_std_5990.txt', 'r', encoding='utf-8').readlines() - char_set = ''.join([ch.strip('\n') for ch in char_set][1:] + ['卍']) - nclass = len(char_set) - - K.set_session(get_session()) - reload(densenet) - basemodel, model = get_model(img_h, nclass) - - modelPath = './models/pretrain_model/keras.h5' - if os.path.exists(modelPath): - print("Loading model weights...") - basemodel.load_weights(modelPath) - print('done!') - - train_loader = gen('data_train.txt', './images', batchsize=batch_size, - maxlabellength=maxlabellength, imagesize=(img_h, img_w)) - test_loader = gen('data_test.txt', './images', batchsize=batch_size, - maxlabellength=maxlabellength, imagesize=(img_h, img_w)) - - checkpoint = ModelCheckpoint( - filepath='./models/weights_densenet-{epoch:02d}-{val_loss:.2f}.h5', monitor='val_loss', save_best_only=False, save_weights_only=True) - - def lr_schedule(epoch): return 0.0005 * 0.4**epoch - learning_rate = np.array([lr_schedule(i) for i in range(10)]) - changelr = LearningRateScheduler(lambda epoch: float(learning_rate[epoch])) - earlystop = EarlyStopping(monitor='val_loss', patience=2, verbose=1) - tensorboard = TensorBoard(log_dir='./models/logs', write_graph=True) - - print('-----------Start training-----------') - model.fit_generator(train_loader, - steps_per_epoch=3607567 // batch_size, - epochs=10, - initial_epoch=0, - validation_data=test_loader, - validation_steps=36440 // batch_size, - callbacks=[checkpoint, earlystop, changelr, tensorboard]) -from __future__ import print_function -from lib.fast_rcnn.config import cfg, cfg_from_file -from lib.text_connector.text_connect_cfg import Config as TextLineCfg -from lib.text_connector.detectors import TextDetector -from lib.utils.timer import Timer -from lib.fast_rcnn.test import test_ctpn -from lib.networks.factory import get_network -import tensorflow as tf -import numpy as np -import os -import sys -import cv2 -import glob -import shutil -sys.path.append(os.getcwd()) - - -def resize_im(im, scale, max_scale=None): - f = float(scale)/min(im.shape[0], im.shape[1]) - if max_scale != None and f*max(im.shape[0], im.shape[1]) > max_scale: - f = float(max_scale)/max(im.shape[0], im.shape[1]) - return cv2.resize(im, None, None, fx=f, fy=f, interpolation=cv2.INTER_LINEAR), f - - -def draw_boxes(img, image_name, boxes, scale): - base_name = image_name.split('/')[-1] - with open('data/results/' + 'res_{}.txt'.format(base_name.split('.')[0]), 'w') as f: - for box in boxes: - if np.linalg.norm(box[0] - box[1]) < 5 or np.linalg.norm(box[3] - box[0]) < 5: - continue - if box[8] >= 0.9: - color = (0, 0, 255) # red - else: - color = (0, 255, 0) # green - cv2.line(img, (int(box[0]), int(box[1])), - (int(box[2]), int(box[3])), color, 2) - cv2.line(img, (int(box[0]), int(box[1])), - (int(box[4]), int(box[5])), color, 2) - cv2.line(img, (int(box[6]), int(box[7])), - (int(box[2]), int(box[3])), color, 2) - cv2.line(img, (int(box[4]), int(box[5])), - (int(box[6]), int(box[7])), color, 2) - - min_x = min(int(box[0]/scale), int(box[2]/scale), - int(box[4]/scale), int(box[6]/scale)) - min_y = min(int(box[1]/scale), int(box[3]/scale), - int(box[5]/scale), int(box[7]/scale)) - max_x = max(int(box[0]/scale), int(box[2]/scale), - int(box[4]/scale), int(box[6]/scale)) - max_y = max(int(box[1]/scale), int(box[3]/scale), - int(box[5]/scale), int(box[7]/scale)) - - line = ','.join( - [str(min_x), str(min_y), str(max_x), str(max_y)])+'\r\n' - f.write(line) - - img = cv2.resize(img, None, None, fx=1.0/scale, fy=1.0 / - scale, interpolation=cv2.INTER_LINEAR) - cv2.imwrite(os.path.join("data/results", base_name), img) - - -def ctpn(sess, net, image_name): - timer = Timer() - timer.tic() - - img = cv2.imread(image_name) - img, scale = resize_im(img, scale=TextLineCfg.SCALE, - max_scale=TextLineCfg.MAX_SCALE) - scores, boxes = test_ctpn(sess, net, img) - - textdetector = TextDetector() - boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) - draw_boxes(img, image_name, boxes, scale) - timer.toc() - print(('Detection took {:.3f}s for ' - '{:d} object proposals').format(timer.total_time, boxes.shape[0])) - - -if __name__ == '__main__': - if os.path.exists("data/results/"): - shutil.rmtree("data/results/") - os.makedirs("data/results/") - - cfg_from_file('ctpn/text.yml') - - # init session - config = tf.ConfigProto(allow_soft_placement=True) - sess = tf.Session(config=config) - # load network - net = get_network("VGGnet_test") - # load model - print(('Loading network {:s}... '.format("VGGnet_test")), end=' ') - saver = tf.train.Saver() - - try: - ckpt = tf.train.get_checkpoint_state(cfg.TEST.checkpoints_path) - print('Restoring from {}...'.format( - ckpt.model_checkpoint_path), end=' ') - saver.restore(sess, ckpt.model_checkpoint_path) - print('done') - except: - raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path) - - im = 128 * np.ones((300, 300, 3), dtype=np.uint8) - for i in range(2): - _, _ = test_ctpn(sess, net, im) - - im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \ - glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg')) - - for im_name in im_names: - print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') - print(('Demo for {:s}'.format(im_name))) - ctpn(sess, net, im_name) -from lib.fast_rcnn.config import cfg -from lib.networks.factory import get_network -from lib.datasets.factory import get_imdb -from lib.fast_rcnn.config import cfg_from_file, get_output_dir, get_log_dir -from lib.fast_rcnn.train import get_training_roidb, train_net -import pprint -import sys -import os.path - -sys.path.append(os.getcwd()) -this_dir = os.path.dirname(__file__) - - -if __name__ == '__main__': - cfg_from_file('ctpn/text.yml') - print('Using config:') - pprint.pprint(cfg) - imdb = get_imdb('voc_2007_trainval') - print('Loaded dataset `{:s}` for training'.format(imdb.name)) - roidb = get_training_roidb(imdb) - - output_dir = get_output_dir(imdb, None) - log_dir = get_log_dir(imdb) - print('Output will be saved to `{:s}`'.format(output_dir)) - print('Logs will be saved to `{:s}`'.format(log_dir)) - - device_name = '/gpu:0' - print(device_name) - - network = get_network('VGGnet_train') - - train_net(network, imdb, roidb, - output_dir=output_dir, - log_dir=log_dir, - pretrained_model='data/pretrain_model/VGG_imagenet.npy', - max_iters=int(cfg.TRAIN.max_steps), - restore=bool(int(cfg.TRAIN.restore))) -from . import fast_rcnn -from xml.dom.minidom import Document -import cv2 -import os -import glob -import shutil -import numpy as np - - -def generate_xml(name, lines, img_size, class_sets, doncateothers=True): - doc = Document() - - def append_xml_node_attr(child, parent=None, text=None): - ele = doc.createElement(child) - if not text is None: - text_node = doc.createTextNode(text) - ele.appendChild(text_node) - parent = doc if parent is None else parent - parent.appendChild(ele) - return ele - - img_name = name + '.jpg' - # create header - annotation = append_xml_node_attr('annotation') - append_xml_node_attr('folder', parent=annotation, text='text') - append_xml_node_attr('filename', parent=annotation, text=img_name) - source = append_xml_node_attr('source', parent=annotation) - append_xml_node_attr('database', parent=source, text='coco_text_database') - append_xml_node_attr('annotation', parent=source, text='text') - append_xml_node_attr('image', parent=source, text='text') - append_xml_node_attr('flickrid', parent=source, text='000000') - owner = append_xml_node_attr('owner', parent=annotation) - append_xml_node_attr('name', parent=owner, text='ms') - size = append_xml_node_attr('size', annotation) - append_xml_node_attr('width', size, str(img_size[1])) - append_xml_node_attr('height', size, str(img_size[0])) - append_xml_node_attr('depth', size, str(img_size[2])) - append_xml_node_attr('segmented', parent=annotation, text='0') - - # create objects - objs = [] - for line in lines: - splitted_line = line.strip().lower().split() - cls = splitted_line[0].lower() - if not doncateothers and cls not in class_sets: - continue - cls = 'dontcare' if cls not in class_sets else cls - if cls == 'dontcare': - continue - obj = append_xml_node_attr('object', parent=annotation) - occlusion = int(0) - x1, y1, x2, y2 = int(float(splitted_line[1]) + 1), int(float(splitted_line[2]) + 1), \ - int(float(splitted_line[3]) + 1), int(float(splitted_line[4]) + 1) - truncation = float(0) - difficult = 1 if _is_hard( - cls, truncation, occlusion, x1, y1, x2, y2) else 0 - truncted = 0 if truncation < 0.5 else 1 - - append_xml_node_attr('name', parent=obj, text=cls) - append_xml_node_attr('pose', parent=obj, text='none') - append_xml_node_attr('truncated', parent=obj, text=str(truncted)) - append_xml_node_attr('difficult', parent=obj, text=str(int(difficult))) - bb = append_xml_node_attr('bndbox', parent=obj) - append_xml_node_attr('xmin', parent=bb, text=str(x1)) - append_xml_node_attr('ymin', parent=bb, text=str(y1)) - append_xml_node_attr('xmax', parent=bb, text=str(x2)) - append_xml_node_attr('ymax', parent=bb, text=str(y2)) - - o = {'class': cls, 'box': np.asarray([x1, y1, x2, y2], dtype=float), - 'truncation': truncation, 'difficult': difficult, 'occlusion': occlusion} - objs.append(o) - - return doc, objs - - -def _is_hard(cls, truncation, occlusion, x1, y1, x2, y2): - hard = False - if y2 - y1 < 25 and occlusion >= 2: - hard = True - return hard - if occlusion >= 3: - hard = True - return hard - if truncation > 0.8: - hard = True - return hard - return hard - - -def build_voc_dirs(outdir): - def mkdir(dir): return os.makedirs( - dir) if not os.path.exists(dir) else None - mkdir(outdir) - mkdir(os.path.join(outdir, 'Annotations')) - mkdir(os.path.join(outdir, 'ImageSets')) - mkdir(os.path.join(outdir, 'ImageSets', 'Layout')) - mkdir(os.path.join(outdir, 'ImageSets', 'Main')) - mkdir(os.path.join(outdir, 'ImageSets', 'Segmentation')) - mkdir(os.path.join(outdir, 'JPEGImages')) - mkdir(os.path.join(outdir, 'SegmentationClass')) - mkdir(os.path.join(outdir, 'SegmentationObject')) - return os.path.join(outdir, 'Annotations'), os.path.join(outdir, 'JPEGImages'), os.path.join(outdir, 'ImageSets', - 'Main') - - -if __name__ == '__main__': - _outdir = 'TEXTVOC/VOC2007' - _draw = bool(0) - _dest_label_dir, _dest_img_dir, _dest_set_dir = build_voc_dirs(_outdir) - _doncateothers = bool(1) - for dset in ['train']: - _labeldir = 'label_tmp' - _imagedir = 're_image' - class_sets = ('text', 'dontcare') - class_sets_dict = dict((k, i) for i, k in enumerate(class_sets)) - allclasses = {} - fs = [open(os.path.join(_dest_set_dir, cls + '_' + dset + '.txt'), 'w') - for cls in class_sets] - ftrain = open(os.path.join(_dest_set_dir, dset + '.txt'), 'w') - - files = glob.glob(os.path.join(_labeldir, '*.txt')) - files.sort() - for file in files: - path, basename = os.path.split(file) - stem, ext = os.path.splitext(basename) - with open(file, 'r') as f: - lines = f.readlines() - img_file = os.path.join(_imagedir, stem + '.jpg') - - print(img_file) - img = cv2.imread(img_file) - img_size = img.shape - - doc, objs = generate_xml( - stem, lines, img_size, class_sets=class_sets, doncateothers=_doncateothers) - - cv2.imwrite(os.path.join(_dest_img_dir, stem + '.jpg'), img) - xmlfile = os.path.join(_dest_label_dir, stem + '.xml') - with open(xmlfile, 'w') as f: - f.write(doc.toprettyxml(indent=' ')) - - ftrain.writelines(stem + '\n') - - cls_in_image = set([o['class'] for o in objs]) - - for obj in objs: - cls = obj['class'] - allclasses[cls] = 0 \ - if not cls in list(allclasses.keys()) else allclasses[cls] + 1 - - for cls in cls_in_image: - if cls in class_sets: - fs[class_sets_dict[cls]].writelines(stem + ' 1\n') - for cls in class_sets: - if cls not in cls_in_image: - fs[class_sets_dict[cls]].writelines(stem + ' -1\n') - - (f.close() for f in fs) - ftrain.close() - - print('~~~~~~~~~~~~~~~~~~~') - print(allclasses) - print('~~~~~~~~~~~~~~~~~~~') - shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), - os.path.join(_dest_set_dir, 'val.txt')) - shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), - os.path.join(_dest_set_dir, 'trainval.txt')) - for cls in class_sets: - shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'), - os.path.join(_dest_set_dir, cls + '_trainval.txt')) - shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'), - os.path.join(_dest_set_dir, cls + '_val.txt')) -import os -import numpy as np -import math -import cv2 as cv - -path = '/media/D/code/OCR/text-detection-ctpn/data/mlt_english+chinese/image' -gt_path = '/media/D/code/OCR/text-detection-ctpn/data/mlt_english+chinese/label' -out_path = 're_image' -if not os.path.exists(out_path): - os.makedirs(out_path) -files = os.listdir(path) -files.sort() -# files=files[:100] -for file in files: - _, basename = os.path.split(file) - if basename.lower().split('.')[-1] not in ['jpg', 'png']: - continue - stem, ext = os.path.splitext(basename) - gt_file = os.path.join(gt_path, 'gt_' + stem + '.txt') - img_path = os.path.join(path, file) - print(img_path) - img = cv.imread(img_path) - img_size = img.shape - im_size_min = np.min(img_size[0:2]) - im_size_max = np.max(img_size[0:2]) - - im_scale = float(600) / float(im_size_min) - if np.round(im_scale * im_size_max) > 1200: - im_scale = float(1200) / float(im_size_max) - re_im = cv.resize(img, None, None, fx=im_scale, - fy=im_scale, interpolation=cv.INTER_LINEAR) - re_size = re_im.shape - cv.imwrite(os.path.join(out_path, stem) + '.jpg', re_im) - - with open(gt_file, 'r') as f: - lines = f.readlines() - for line in lines: - splitted_line = line.strip().lower().split(',') - pt_x = np.zeros((4, 1)) - pt_y = np.zeros((4, 1)) - pt_x[0, 0] = int(float(splitted_line[0]) / img_size[1] * re_size[1]) - pt_y[0, 0] = int(float(splitted_line[1]) / img_size[0] * re_size[0]) - pt_x[1, 0] = int(float(splitted_line[2]) / img_size[1] * re_size[1]) - pt_y[1, 0] = int(float(splitted_line[3]) / img_size[0] * re_size[0]) - pt_x[2, 0] = int(float(splitted_line[4]) / img_size[1] * re_size[1]) - pt_y[2, 0] = int(float(splitted_line[5]) / img_size[0] * re_size[0]) - pt_x[3, 0] = int(float(splitted_line[6]) / img_size[1] * re_size[1]) - pt_y[3, 0] = int(float(splitted_line[7]) / img_size[0] * re_size[0]) - - ind_x = np.argsort(pt_x, axis=0) - pt_x = pt_x[ind_x] - pt_y = pt_y[ind_x] - - if pt_y[0] < pt_y[1]: - pt1 = (pt_x[0], pt_y[0]) - pt3 = (pt_x[1], pt_y[1]) - else: - pt1 = (pt_x[1], pt_y[1]) - pt3 = (pt_x[0], pt_y[0]) - - if pt_y[2] < pt_y[3]: - pt2 = (pt_x[2], pt_y[2]) - pt4 = (pt_x[3], pt_y[3]) - else: - pt2 = (pt_x[3], pt_y[3]) - pt4 = (pt_x[2], pt_y[2]) - - xmin = int(min(pt1[0], pt2[0])) - ymin = int(min(pt1[1], pt2[1])) - xmax = int(max(pt2[0], pt4[0])) - ymax = int(max(pt3[1], pt4[1])) - - if xmin < 0: - xmin = 0 - if xmax > re_size[1] - 1: - xmax = re_size[1] - 1 - if ymin < 0: - ymin = 0 - if ymax > re_size[0] - 1: - ymax = re_size[0] - 1 - - width = xmax - xmin - height = ymax - ymin - - # reimplement - step = 16.0 - x_left = [] - x_right = [] - x_left.append(xmin) - x_left_start = int(math.ceil(xmin / 16.0) * 16.0) - if x_left_start == xmin: - x_left_start = xmin + 16 - for i in np.arange(x_left_start, xmax, 16): - x_left.append(i) - x_left = np.array(x_left) - - x_right.append(x_left_start - 1) - for i in range(1, len(x_left) - 1): - x_right.append(x_left[i] + 15) - x_right.append(xmax) - x_right = np.array(x_right) - - idx = np.where(x_left == x_right) - x_left = np.delete(x_left, idx, axis=0) - x_right = np.delete(x_right, idx, axis=0) - - if not os.path.exists('label_tmp'): - os.makedirs('label_tmp') - with open(os.path.join('label_tmp', stem) + '.txt', 'a') as f: - for i in range(len(x_left)): - f.writelines("text\t") - f.writelines(str(int(x_left[i]))) - f.writelines("\t") - f.writelines(str(int(ymin))) - f.writelines("\t") - f.writelines(str(int(x_right[i]))) - f.writelines("\t") - f.writelines(str(int(ymax))) - f.writelines("\n") -from .imdb import imdb -from .pascal_voc import pascal_voc -from . import factory - - -def _which(program): - import os - - def is_exe(fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - - fpath, fname = os.path.split(program) - if fpath: - if is_exe(program): - return program - else: - for path in os.environ["PATH"].split(os.pathsep): - path = path.strip('"') - exe_file = os.path.join(path, program) - if is_exe(exe_file): - return exe_file - - return None -import numpy as np - - -def unique_boxes(boxes, scale=1.0): - """Return indices of unique boxes.""" - v = np.array([1, 1e3, 1e6, 1e9]) - hashes = np.round(boxes * scale).dot(v) - _, index = np.unique(hashes, return_index=True) - return np.sort(index) - - -def xywh_to_xyxy(boxes): - """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" - return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) - - -def xyxy_to_xywh(boxes): - """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" - return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) - - -def validate_boxes(boxes, width=0, height=0): - """Check that a set of boxes are valid.""" - x1 = boxes[:, 0] - y1 = boxes[:, 1] - x2 = boxes[:, 2] - y2 = boxes[:, 3] - assert (x1 >= 0).all() - assert (y1 >= 0).all() - assert (x2 >= x1).all() - assert (y2 >= y1).all() - assert (x2 < width).all() - assert (y2 < height).all() - - -def filter_small_boxes(boxes, min_size): - w = boxes[:, 2] - boxes[:, 0] - h = boxes[:, 3] - boxes[:, 1] - keep = np.where((w >= min_size) & (h > min_size))[0] - return keep -from .pascal_voc import pascal_voc -__sets = {} - - -def _selective_search_IJCV_top_k(split, year, top_k): - imdb = pascal_voc(split, year) - imdb.roidb_handler = imdb.selective_search_IJCV_roidb - imdb.config['top_k'] = top_k - return imdb - - -# Set up voc__ using selective search "fast" mode -for year in ['2007', '2012', '0712']: - for split in ['train', 'val', 'trainval', 'test']: - name = 'voc_{}_{}'.format(year, split) - __sets[name] = (lambda split=split, year=year: - pascal_voc(split, year)) - - -def get_imdb(name): - """Get an imdb (image database) by name.""" - if name not in __sets: - print((list_imdbs())) - raise KeyError('Unknown dataset: {}'.format(name)) - return __sets[name]() - - -def list_imdbs(): - """List all registered imdbs.""" - return list(__sets.keys()) -import os -import os.path as osp -import PIL -import numpy as np -import scipy.sparse - -from ..utils.bbox import bbox_overlaps -from ..fast_rcnn.config import cfg - - -class imdb(object): - - def __init__(self, name): - self._name = name - self._num_classes = 0 - self._classes = [] - self._image_index = [] - self._obj_proposer = 'selective_search' - self._roidb = None - print(self.default_roidb) - self._roidb_handler = self.default_roidb - # Use this dict for storing dataset specific config options - self.config = {} - - @property - def name(self): - return self._name - - @property - def num_classes(self): - return len(self._classes) - - @property - def classes(self): - return self._classes - - @property - def image_index(self): - return self._image_index - - @property - def roidb_handler(self): - return self._roidb_handler - - @roidb_handler.setter - def roidb_handler(self, val): - self._roidb_handler = val - - def set_proposal_method(self, method): - method = eval('self.' + method + '_roidb') - self.roidb_handler = method - - @property - def roidb(self): - # A roidb is a list of dictionaries, each with the following keys: - # boxes - # gt_overlaps - # gt_classes - # flipped - if self._roidb is not None: - return self._roidb - self._roidb = self.roidb_handler() - return self._roidb - - @property - def cache_path(self): - cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache')) - if not os.path.exists(cache_path): - os.makedirs(cache_path) - return cache_path - - @property - def num_images(self): - return len(self.image_index) - - def image_path_at(self, i): - raise NotImplementedError - - def default_roidb(self): - raise NotImplementedError - - def _get_widths(self): - return [PIL.Image.open(self.image_path_at(i)).size[0] - for i in range(self.num_images)] - - def append_flipped_images(self): - num_images = self.num_images - widths = self._get_widths() - for i in range(num_images): - boxes = self.roidb[i]['boxes'].copy() - oldx1 = boxes[:, 0].copy() - oldx2 = boxes[:, 2].copy() - boxes[:, 0] = widths[i] - oldx2 - 1 - boxes[:, 2] = widths[i] - oldx1 - 1 - for b in range(len(boxes)): - if boxes[b][2] < boxes[b][0]: - boxes[b][0] = 0 - assert (boxes[:, 2] >= boxes[:, 0]).all() - entry = {'boxes': boxes, - 'gt_overlaps': self.roidb[i]['gt_overlaps'], - 'gt_classes': self.roidb[i]['gt_classes'], - 'flipped': True} - - if 'gt_ishard' in self.roidb[i] and 'dontcare_areas' in self.roidb[i]: - entry['gt_ishard'] = self.roidb[i]['gt_ishard'].copy() - dontcare_areas = self.roidb[i]['dontcare_areas'].copy() - oldx1 = dontcare_areas[:, 0].copy() - oldx2 = dontcare_areas[:, 2].copy() - dontcare_areas[:, 0] = widths[i] - oldx2 - 1 - dontcare_areas[:, 2] = widths[i] - oldx1 - 1 - entry['dontcare_areas'] = dontcare_areas - - self.roidb.append(entry) - - self._image_index = self._image_index * 2 - - def create_roidb_from_box_list(self, box_list, gt_roidb): - assert len(box_list) == self.num_images, \ - 'Number of boxes must match number of ground-truth images' - roidb = [] - for i in range(self.num_images): - boxes = box_list[i] - num_boxes = boxes.shape[0] - overlaps = np.zeros( - (num_boxes, self.num_classes), dtype=np.float32) - - if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: - gt_boxes = gt_roidb[i]['boxes'] - gt_classes = gt_roidb[i]['gt_classes'] - gt_overlaps = bbox_overlaps(boxes.astype(np.float), - gt_boxes.astype(np.float)) - argmaxes = gt_overlaps.argmax(axis=1) - maxes = gt_overlaps.max(axis=1) - I = np.where(maxes > 0)[0] - overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] - - overlaps = scipy.sparse.csr_matrix(overlaps) - roidb.append({ - 'boxes': boxes, - 'gt_classes': np.zeros((num_boxes,), dtype=np.int32), - 'gt_overlaps': overlaps, - 'flipped': False, - 'seg_areas': np.zeros((num_boxes,), dtype=np.float32), - }) - return roidb - - @staticmethod - def merge_roidbs(a, b): - assert len(a) == len(b) - for i in range(len(a)): - a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes'])) - a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'], - b[i]['gt_classes'])) - a[i]['gt_overlaps'] = scipy.sparse.vstack([a[i]['gt_overlaps'], - b[i]['gt_overlaps']]) - a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'], - b[i]['seg_areas'])) - return a -# -*- coding:utf-8 -*- -import os -import numpy as np -import scipy.sparse -try: - import cPickle as pickle -except: - import pickle -#import pickle -import uuid -import scipy.io as sio -import xml.etree.ElementTree as ET -from .imdb import imdb -from . import ds_utils -from ..fast_rcnn.config import cfg - - -class pascal_voc(imdb): - def __init__(self, image_set, year, devkit_path=None): - imdb.__init__(self, 'voc_' + year + '_' + image_set) - self._year = year - self._image_set = image_set - self._devkit_path = self._get_default_path() if devkit_path is None \ - else devkit_path - self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year) - self._classes = ('__background__', # always index 0 - 'text') - - self._class_to_ind = dict( - list(zip(self.classes, list(range(self.num_classes))))) - self._image_ext = '.jpg' - self._image_index = self._load_image_set_index() - # Default to roidb handler - #self._roidb_handler = self.selective_search_roidb - self._roidb_handler = self.gt_roidb - self._salt = str(uuid.uuid4()) - self._comp_id = 'comp4' - - # PASCAL specific config options - self.config = {'cleanup': True, - 'use_salt': True, - 'use_diff': False, - 'matlab_eval': False, - 'rpn_file': None, - 'min_size': 2} - - assert os.path.exists(self._devkit_path), \ - 'VOCdevkit path does not exist: {}'.format(self._devkit_path) - assert os.path.exists(self._data_path), \ - 'Path does not exist: {}'.format(self._data_path) - - def image_path_at(self, i): - """ - Return the absolute path to image i in the image sequence. - """ - return self.image_path_from_index(self._image_index[i]) - - def image_path_from_index(self, index): - """ - Construct an image path from the image's "index" identifier. - """ - image_path = os.path.join(self._data_path, 'JPEGImages', - index + self._image_ext) - assert os.path.exists(image_path), \ - 'Path does not exist: {}'.format(image_path) - return image_path - - def _load_image_set_index(self): - """ - Load the indexes listed in this dataset's image set file. - """ - # Example path to image set file: - # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt - image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main', - self._image_set + '.txt') - assert os.path.exists(image_set_file), \ - 'Path does not exist: {}'.format(image_set_file) - with open(image_set_file) as f: - image_index = [x.strip() for x in f.readlines()] - return image_index - - def _get_default_path(self): - """ - Return the default path where PASCAL VOC is expected to be installed. - """ - return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year) - - def gt_roidb(self): - """ - Return the database of ground-truth regions of interest. - - This function loads/saves from/to a cache file to speed up future calls. - """ - cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') - if os.path.exists(cache_file): - with open(cache_file, 'rb') as fid: - roidb = pickle.load(fid) - print('{} gt roidb loaded from {}'.format(self.name, cache_file)) - return roidb - - gt_roidb = [self._load_pascal_annotation(index) - for index in self.image_index] - with open(cache_file, 'wb') as fid: - pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL) - print('wrote gt roidb to {}'.format(cache_file)) - - return gt_roidb - - def selective_search_roidb(self): - """ - Return the database of selective search regions of interest. - Ground-truth ROIs are also included. - - This function loads/saves from/to a cache file to speed up future calls. - """ - cache_file = os.path.join(self.cache_path, - self.name + '_selective_search_roidb.pkl') - - if os.path.exists(cache_file): - with open(cache_file, 'rb') as fid: - roidb = pickle.load(fid) - print('{} ss roidb loaded from {}'.format(self.name, cache_file)) - return roidb - - if int(self._year) == 2007 or self._image_set != 'test': - gt_roidb = self.gt_roidb() - ss_roidb = self._load_selective_search_roidb(gt_roidb) - roidb = imdb.merge_roidbs(gt_roidb, ss_roidb) - else: - roidb = self._load_selective_search_roidb(None) - with open(cache_file, 'wb') as fid: - pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL) - print('wrote ss roidb to {}'.format(cache_file)) - - return roidb - - def rpn_roidb(self): - if int(self._year) == 2007 or self._image_set != 'test': - gt_roidb = self.gt_roidb() - rpn_roidb = self._load_rpn_roidb(gt_roidb) - roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb) - else: - roidb = self._load_rpn_roidb(None) - - return roidb - - def _load_rpn_roidb(self, gt_roidb): - filename = self.config['rpn_file'] - print('loading {}'.format(filename)) - assert os.path.exists(filename), \ - 'rpn data not found at: {}'.format(filename) - with open(filename, 'rb') as f: - box_list = pickle.load(f) - return self.create_roidb_from_box_list(box_list, gt_roidb) - - def _load_selective_search_roidb(self, gt_roidb): - filename = os.path.abspath(os.path.join(cfg.DATA_DIR, - 'selective_search_data', - self.name + '.mat')) - assert os.path.exists(filename), \ - 'Selective search data not found at: {}'.format(filename) - raw_data = sio.loadmat(filename)['boxes'].ravel() - - box_list = [] - for i in range(raw_data.shape[0]): - boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 - keep = ds_utils.unique_boxes(boxes) - boxes = boxes[keep, :] - keep = ds_utils.filter_small_boxes(boxes, self.config['min_size']) - boxes = boxes[keep, :] - box_list.append(boxes) - - return self.create_roidb_from_box_list(box_list, gt_roidb) - - def _load_pascal_annotation(self, index): - """ - Load image and bounding boxes info from XML file in the PASCAL VOC - format. - """ - filename = os.path.join(self._data_path, 'Annotations', index + '.xml') - tree = ET.parse(filename) - objs = tree.findall('object') - num_objs = len(objs) - - boxes = np.zeros((num_objs, 4), dtype=np.uint16) - gt_classes = np.zeros((num_objs), dtype=np.int32) - overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) - # "Seg" area for pascal is just the box area - seg_areas = np.zeros((num_objs), dtype=np.float32) - ishards = np.zeros((num_objs), dtype=np.int32) - - # Load object bounding boxes into a data frame. - for ix, obj in enumerate(objs): - bbox = obj.find('bndbox') - # Make pixel indexes 0-based - x1 = float(bbox.find('xmin').text) - y1 = float(bbox.find('ymin').text) - x2 = float(bbox.find('xmax').text) - y2 = float(bbox.find('ymax').text) - ''' - x1 = float(bbox.find('xmin').text) - 1 - y1 = float(bbox.find('ymin').text) - 1 - x2 = float(bbox.find('xmax').text) - 1 - y2 = float(bbox.find('ymax').text) - 1 - ''' - diffc = obj.find('difficult') - difficult = 0 if diffc == None else int(diffc.text) - ishards[ix] = difficult - - cls = self._class_to_ind[obj.find('name').text.lower().strip()] - boxes[ix, :] = [x1, y1, x2, y2] - gt_classes[ix] = cls - overlaps[ix, cls] = 1.0 - seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1) - - overlaps = scipy.sparse.csr_matrix(overlaps) - - return {'boxes': boxes, - 'gt_classes': gt_classes, - 'gt_ishard': ishards, - 'gt_overlaps': overlaps, - 'flipped': False, - 'seg_areas': seg_areas} - - def _get_comp_id(self): - comp_id = (self._comp_id + '_' + self._salt if self.config['use_salt'] - else self._comp_id) - return comp_id - - def _get_voc_results_file_template(self): - filename = self._get_comp_id() + '_det_' + \ - self._image_set + '_{:s}.txt' - filedir = os.path.join( - self._devkit_path, 'results', 'VOC' + self._year, 'Main') - if not os.path.exists(filedir): - os.makedirs(filedir) - path = os.path.join(filedir, filename) - return path - - def _write_voc_results_file(self, all_boxes): - for cls_ind, cls in enumerate(self.classes): - if cls == '__background__': - continue - print('Writing {} VOC results file'.format(cls)) - filename = self._get_voc_results_file_template().format(cls) - with open(filename, 'wt') as f: - for im_ind, index in enumerate(self.image_index): - dets = all_boxes[cls_ind][im_ind] - if dets == []: - continue - # the VOCdevkit expects 1-based indices - for k in range(dets.shape[0]): - f.write('{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'. - format(index, dets[k, -1], - dets[k, 0] + 1, dets[k, 1] + 1, - dets[k, 2] + 1, dets[k, 3] + 1)) - - -if __name__ == '__main__': - d = pascal_voc('trainval', '2007') - res = d.roidb - from IPython import embed - embed() -from . import config -from . import train -from . import test -from . import nms_wrapper -import numpy as np - - -def bbox_transform(ex_rois, gt_rois): - """ - computes the distance from ground-truth boxes to the given boxes, normed by their size - :param ex_rois: n * 4 numpy array, given boxes - :param gt_rois: n * 4 numpy array, ground-truth boxes - :return: deltas: n * 4 numpy array, ground-truth boxes - """ - ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 - ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 - ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths - ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights - - assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \ - 'Invalid boxes found: {} {}'. \ - format(ex_rois[np.argmin(ex_widths), :], - ex_rois[np.argmin(ex_heights), :]) - - gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 - gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 - gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths - gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights - - # warnings.catch_warnings() - # warnings.filterwarnings('error') - targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths - targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights - targets_dw = np.log(gt_widths / ex_widths) - targets_dh = np.log(gt_heights / ex_heights) - - targets = np.vstack( - (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() - - return targets - - -def bbox_transform_inv(boxes, deltas): - - boxes = boxes.astype(deltas.dtype, copy=False) - - widths = boxes[:, 2] - boxes[:, 0] + 1.0 - heights = boxes[:, 3] - boxes[:, 1] + 1.0 - ctr_x = boxes[:, 0] + 0.5 * widths - ctr_y = boxes[:, 1] + 0.5 * heights - - dx = deltas[:, 0::4] - dy = deltas[:, 1::4] - dw = deltas[:, 2::4] - dh = deltas[:, 3::4] - - pred_ctr_x = ctr_x[:, np.newaxis] - pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] - pred_w = widths[:, np.newaxis] - pred_h = np.exp(dh) * heights[:, np.newaxis] - - pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) - # x1 - pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w - # y1 - pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h - # x2 - pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - # y2 - pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - - return pred_boxes - - -def clip_boxes(boxes, im_shape): - """ - Clip boxes to image boundaries. - """ - - # x1 >= 0 - boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) - # y1 >= 0 - boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) - # x2 < im_shape[1] - boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) - # y2 < im_shape[0] - boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) - return boxes -import os -import os.path as osp -import numpy as np -from time import strftime, localtime -from easydict import EasyDict as edict - -__C = edict() -cfg = __C - -# Default GPU device id -__C.GPU_ID = 0 - -# Training options -__C.IS_RPN = True -__C.ANCHOR_SCALES = [16] -__C.NCLASSES = 2 -__C.USE_GPU_NMS = True -# multiscale training and testing -__C.IS_MULTISCALE = False -__C.IS_EXTRAPOLATING = True - -__C.REGION_PROPOSAL = 'RPN' - -__C.NET_NAME = 'VGGnet' -__C.SUBCLS_NAME = 'voxel_exemplars' - -__C.TRAIN = edict() -# Adam, Momentum, RMS -__C.TRAIN.restore = 0 -__C.TRAIN.max_steps = 100000 -__C.TRAIN.SOLVER = 'Momentum' -# learning rate -__C.TRAIN.WEIGHT_DECAY = 0.0005 -__C.TRAIN.LEARNING_RATE = 0.001 -__C.TRAIN.MOMENTUM = 0.9 -__C.TRAIN.GAMMA = 0.1 -__C.TRAIN.STEPSIZE = 50000 -__C.TRAIN.DISPLAY = 10 -__C.TRAIN.LOG_IMAGE_ITERS = 100 -__C.TRAIN.OHEM = False -__C.TRAIN.RANDOM_DOWNSAMPLE = False - -# Scales to compute real features -__C.TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0) -__C.TRAIN.KERNEL_SIZE = 5 -__C.TRAIN.ASPECTS = (1,) -__C.TRAIN.SCALES = (600,) - -# Max pixel size of the longest side of a scaled input image -__C.TRAIN.MAX_SIZE = 1000 - -# Images to use per minibatch -__C.TRAIN.IMS_PER_BATCH = 2 - -# Minibatch size (number of regions of interest [ROIs]) -__C.TRAIN.BATCH_SIZE = 128 - -# Fraction of minibatch that is labeled foreground (i.e. class > 0) -__C.TRAIN.FG_FRACTION = 0.25 - -# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) -__C.TRAIN.FG_THRESH = 0.5 - -# Overlap threshold for a ROI to be considered background (class = 0 if -# overlap in [LO, HI)) -__C.TRAIN.BG_THRESH_HI = 0.5 -__C.TRAIN.BG_THRESH_LO = 0.1 - -# Use horizontally-flipped images during training? -__C.TRAIN.USE_FLIPPED = True - -# Train bounding-box regressors -__C.TRAIN.BBOX_REG = True - -# Overlap required between a ROI and ground-truth box in order for that ROI to -# be used as a bounding-box regression training example -__C.TRAIN.BBOX_THRESH = 0.5 - -# Iterations between snapshots -__C.TRAIN.SNAPSHOT_ITERS = 5000 - -# solver.prototxt specifies the snapshot path prefix, this adds an optional -# infix to yield the path: [_]_iters_XYZ.caffemodel -__C.TRAIN.SNAPSHOT_PREFIX = 'VGGnet_fast_rcnn' -__C.TRAIN.SNAPSHOT_INFIX = '' - -# Use a prefetch thread in roi_data_layer.layer -# So far I haven't found this useful; likely more engineering work is required -__C.TRAIN.USE_PREFETCH = False - -# Normalize the targets (subtract empirical mean, divide by empirical stddev) -__C.TRAIN.BBOX_NORMALIZE_TARGETS = True -# Deprecated (inside weights) -# used for assigning weights for each coords (x1, y1, w, h) -__C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) -# Normalize the targets using "precomputed" (or made up) means and stdevs -# (BBOX_NORMALIZE_TARGETS must also be True) -__C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True -__C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) -__C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) -# faster rcnn dont use pre-generated rois by selective search -# __C.TRAIN.BBOX_NORMALIZE_STDS = (1, 1, 1, 1) - -# Train using these proposals -__C.TRAIN.PROPOSAL_METHOD = 'selective_search' - -# Make minibatches from images that have similar aspect ratios (i.e. both -# tall and thin or both short and wide) in order to avoid wasting computation -# on zero-padding. -__C.TRAIN.ASPECT_GROUPING = True -# preclude rois intersected with dontcare areas above the value -__C.TRAIN.DONTCARE_AREA_INTERSECTION_HI = 0.5 -__C.TRAIN.PRECLUDE_HARD_SAMPLES = True -# Use RPN to detect objects -__C.TRAIN.HAS_RPN = True -# IOU >= thresh: positive example -__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 -# IOU < thresh: negative example -__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 -# If an anchor statisfied by positive and negative conditions set to negative -__C.TRAIN.RPN_CLOBBER_POSITIVES = False -# Max number of foreground examples -__C.TRAIN.RPN_FG_FRACTION = 0.5 -# Total number of examples -__C.TRAIN.RPN_BATCHSIZE = 256 -# NMS threshold used on RPN proposals -__C.TRAIN.RPN_NMS_THRESH = 0.7 -# Number of top scoring boxes to keep before apply NMS to RPN proposals -__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 -# Number of top scoring boxes to keep after applying NMS to RPN proposals -__C.TRAIN.RPN_POST_NMS_TOP_N = 2000 -# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) -__C.TRAIN.RPN_MIN_SIZE = 8 -# Deprecated (outside weights) -__C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) -# Give the positive RPN examples weight of p * 1 / {num positives} -# and give negatives a weight of (1 - p) -# Set to -1.0 to use uniform example weighting -__C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 -# __C.TRAIN.RPN_POSITIVE_WEIGHT = 0.5 - - -# -# Testing options -# - -__C.TEST = edict() -__C.TEST.checkpoints_path = "checkpoints/" -__C.TEST.DETECT_MODE = "H" # H/O for horizontal/oriented mode -# Scales to use during testing (can list multiple scales) -# Each scale is the pixel size of an image's shortest side -__C.TEST.SCALES = (900,) # (600,) - -# Max pixel size of the longest side of a scaled input image -__C.TEST.MAX_SIZE = 1500 # 1000 - -# Overlap threshold used for non-maximum suppression (suppress boxes with -# IoU >= this threshold) -__C.TEST.NMS = 0.3 - -# Experimental: treat the (K+1) units in the cls_score layer as linear -# predictors (trained, eg, with one-vs-rest SVMs). -__C.TEST.SVM = False - -# Test using bounding-box regressors -__C.TEST.BBOX_REG = True - -# Propose boxes -__C.TEST.HAS_RPN = True - -# Test using these proposals -__C.TEST.PROPOSAL_METHOD = 'selective_search' - -# NMS threshold used on RPN proposals -__C.TEST.RPN_NMS_THRESH = 0.7 -# Number of top scoring boxes to keep before apply NMS to RPN proposals -#__C.TEST.RPN_PRE_NMS_TOP_N = 6000 -__C.TEST.RPN_PRE_NMS_TOP_N = 12000 -# Number of top scoring boxes to keep after applying NMS to RPN proposals -__C.TEST.RPN_POST_NMS_TOP_N = 1000 -# __C.TEST.RPN_POST_NMS_TOP_N = 2000 -# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) -__C.TEST.RPN_MIN_SIZE = 8 - - -# -# MISC -# - -# The mapping from image coordinates to feature map coordinates might cause -# some boxes that are distinct in image space to become identical in feature -# coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor -# for identifying duplicate boxes. -# 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16 -__C.DEDUP_BOXES = 1./16. - -# Pixel mean values (BGR order) as a (1, 1, 3) array -# We use the same pixel mean for all networks even though it's not exactly what -# they were trained with -__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) - -# For reproducibility -#__C.RNG_SEED = 3 -__C.RNG_SEED = 3 - -# A small number that's used many times -__C.EPS = 1e-14 - -# Root directory of project -__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) - -# Data directory -__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data')) - -# Model directory -__C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc')) - -# Name (or path to) the matlab executable -__C.MATLAB = 'matlab' - -# Place outputs under an experiments directory -__C.EXP_DIR = 'default' -__C.LOG_DIR = 'default' - -# Use GPU implementation of non-maximum suppression -__C.USE_GPU_NMS = True - - -def get_output_dir(imdb, weights_filename): - """Return the directory where experimental artifacts are placed. - If the directory does not exist, it is created. - - A canonical path is built using the name from an imdb and a network - (if not None). - """ - outdir = osp.abspath( - osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) - if weights_filename is not None: - outdir = osp.join(outdir, weights_filename) - if not os.path.exists(outdir): - os.makedirs(outdir) - return outdir - - -def get_log_dir(imdb): - """Return the directory where experimental artifacts are placed. - If the directory does not exist, it is created. - A canonical path is built using the name from an imdb and a network - (if not None). - """ - log_dir = osp.abspath( - osp.join(__C.ROOT_DIR, 'logs', __C.LOG_DIR, imdb.name, strftime("%Y-%m-%d-%H-%M-%S", localtime()))) - if not os.path.exists(log_dir): - os.makedirs(log_dir) - return log_dir - - -def _merge_a_into_b(a, b): - """Merge config dictionary a into config dictionary b, clobbering the - options in b whenever they are also specified in a. - """ - if type(a) is not edict: - return - - for k, v in a.items(): - # a must specify keys that are in b - if k not in b: - raise KeyError('{} is not a valid config key'.format(k)) - - # the types must match, too - old_type = type(b[k]) - if old_type is not type(v): - if isinstance(b[k], np.ndarray): - v = np.array(v, dtype=b[k].dtype) - else: - raise ValueError(('Type mismatch ({} vs. {}) ' - 'for config key: {}').format(type(b[k]), - type(v), k)) - - # recursively merge dicts - if type(v) is edict: - try: - _merge_a_into_b(a[k], b[k]) - except: - print(('Error under config key: {}'.format(k))) - raise - else: - b[k] = v - - -def cfg_from_file(filename): - """Load a config file and merge it into the default options.""" - import yaml - with open(filename, 'r') as f: - yaml_cfg = edict(yaml.load(f)) - - _merge_a_into_b(yaml_cfg, __C) - - -def cfg_from_list(cfg_list): - """Set config keys via list (e.g., from command line).""" - from ast import literal_eval - assert len(cfg_list) % 2 == 0 - for k, v in zip(cfg_list[0::2], cfg_list[1::2]): - key_list = k.split('.') - d = __C - for subkey in key_list[:-1]: - assert subkey in d - d = d[subkey] - subkey = key_list[-1] - assert subkey in d - try: - value = literal_eval(v) - except: - # handle the case when v is a string literal - value = v - assert type(value) == type(d[subkey]), \ - 'type {} does not match original type {}'.format( - type(value), type(d[subkey])) - d[subkey] = value -import numpy as np -from .config import cfg -from ..utils.cython_nms import nms as cython_nms -try: - from lib.utils.gpu_nms import gpu_nms -except: - pass - - -def nms(dets, thresh): - if dets.shape[0] == 0: - return [] - if cfg.USE_GPU_NMS: - try: - return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) - except: - return cython_nms(dets, thresh) - else: - return cython_nms(dets, thresh) -import numpy as np -import cv2 -from .config import cfg -from ..utils.blob import im_list_to_blob - - -def _get_image_blob(im): - im_orig = im.astype(np.float32, copy=True) - im_orig -= cfg.PIXEL_MEANS - - im_shape = im_orig.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - - processed_ims = [] - im_scale_factors = [] - - for target_size in cfg.TEST.SCALES: - im_scale = float(target_size) / float(im_size_min) - # Prevent the biggest axis from being more than MAX_SIZE - if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: - im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) - im = cv2.resize(im_orig, None, None, fx=im_scale, fy=im_scale, - interpolation=cv2.INTER_LINEAR) - im_scale_factors.append(im_scale) - processed_ims.append(im) - - # Create a blob to hold the input images - blob = im_list_to_blob(processed_ims) - - return blob, np.array(im_scale_factors) - - -def _get_blobs(im, rois): - blobs = {'data': None, 'rois': None} - blobs['data'], im_scale_factors = _get_image_blob(im) - return blobs, im_scale_factors - - -def test_ctpn(sess, net, im, boxes=None): - blobs, im_scales = _get_blobs(im, boxes) - if cfg.TEST.HAS_RPN: - im_blob = blobs['data'] - blobs['im_info'] = np.array( - [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], - dtype=np.float32) - # forward pass - if cfg.TEST.HAS_RPN: - feed_dict = { - net.data: blobs['data'], net.im_info: blobs['im_info'], net.keep_prob: 1.0} - - rois = sess.run([net.get_output('rois')[0]], feed_dict=feed_dict) - rois = rois[0] - - scores = rois[:, 0] - if cfg.TEST.HAS_RPN: - assert len(im_scales) == 1, "Only single-image batch implemented" - boxes = rois[:, 1:5] / im_scales[0] - return scores, boxes -from __future__ import print_function -import numpy as np -import os -import tensorflow as tf -from ..roi_data_layer.layer import RoIDataLayer -from ..utils.timer import Timer -from ..roi_data_layer import roidb as rdl_roidb -from ..fast_rcnn.config import cfg - -_DEBUG = False - - -class SolverWrapper(object): - def __init__(self, sess, network, imdb, roidb, output_dir, logdir, pretrained_model=None): - """Initialize the SolverWrapper.""" - self.net = network - self.imdb = imdb - self.roidb = roidb - self.output_dir = output_dir - self.pretrained_model = pretrained_model - - print('Computing bounding-box regression targets...') - if cfg.TRAIN.BBOX_REG: - self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets( - roidb) - print('done') - - # For checkpoint - self.saver = tf.train.Saver( - max_to_keep=100, write_version=tf.train.SaverDef.V2) - self.writer = tf.summary.FileWriter(logdir=logdir, - graph=tf.get_default_graph(), - flush_secs=5) - - def snapshot(self, sess, iter): - net = self.net - if cfg.TRAIN.BBOX_REG and 'bbox_pred' in net.layers and cfg.TRAIN.BBOX_NORMALIZE_TARGETS: - # save original values - with tf.variable_scope('bbox_pred', reuse=True): - weights = tf.get_variable("weights") - biases = tf.get_variable("biases") - - orig_0 = weights.eval() - orig_1 = biases.eval() - - # scale and shift with bbox reg unnormalization; then save snapshot - weights_shape = weights.get_shape().as_list() - sess.run(weights.assign( - orig_0 * np.tile(self.bbox_stds, (weights_shape[0], 1)))) - sess.run(biases.assign(orig_1 * self.bbox_stds + self.bbox_means)) - - if not os.path.exists(self.output_dir): - os.makedirs(self.output_dir) - - infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX - if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') - filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix + - '_iter_{:d}'.format(iter+1) + '.ckpt') - filename = os.path.join(self.output_dir, filename) - - self.saver.save(sess, filename) - print('Wrote snapshot to: {:s}'.format(filename)) - - if cfg.TRAIN.BBOX_REG and 'bbox_pred' in net.layers: - # restore net to original state - sess.run(weights.assign(orig_0)) - sess.run(biases.assign(orig_1)) - - def build_image_summary(self): - # A simple graph for write image summary - - log_image_data = tf.placeholder(tf.uint8, [None, None, 3]) - log_image_name = tf.placeholder(tf.string) - # import tensorflow.python.ops.gen_logging_ops as logging_ops - from tensorflow.python.ops import gen_logging_ops - from tensorflow.python.framework import ops as _ops - log_image = gen_logging_ops._image_summary( - log_image_name, tf.expand_dims(log_image_data, 0), max_images=1) - _ops.add_to_collection(_ops.GraphKeys.SUMMARIES, log_image) - # log_image = tf.summary.image(log_image_name, tf.expand_dims(log_image_data, 0), max_outputs=1) - return log_image, log_image_data, log_image_name - - def train_model(self, sess, max_iters, restore=False): - """Network training loop.""" - data_layer = get_data_layer(self.roidb, self.imdb.num_classes) - total_loss, model_loss, rpn_cross_entropy, rpn_loss_box = self.net.build_loss( - ohem=cfg.TRAIN.OHEM) - # scalar summary - tf.summary.scalar('rpn_reg_loss', rpn_loss_box) - tf.summary.scalar('rpn_cls_loss', rpn_cross_entropy) - tf.summary.scalar('model_loss', model_loss) - tf.summary.scalar('total_loss', total_loss) - summary_op = tf.summary.merge_all() - - log_image, log_image_data, log_image_name =\ - self.build_image_summary() - - # optimizer - lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) - if cfg.TRAIN.SOLVER == 'Adam': - opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE) - elif cfg.TRAIN.SOLVER == 'RMS': - opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE) - else: - # lr = tf.Variable(0.0, trainable=False) - momentum = cfg.TRAIN.MOMENTUM - opt = tf.train.MomentumOptimizer(lr, momentum) - - global_step = tf.Variable(0, trainable=False) - with_clip = True - if with_clip: - tvars = tf.trainable_variables() - grads, norm = tf.clip_by_global_norm( - tf.gradients(total_loss, tvars), 10.0) - train_op = opt.apply_gradients( - list(zip(grads, tvars)), global_step=global_step) - else: - train_op = opt.minimize(total_loss, global_step=global_step) - - # intialize variables - sess.run(tf.global_variables_initializer()) - restore_iter = 0 - - # load vgg16 - if self.pretrained_model is not None and not restore: - try: - print(('Loading pretrained model ' - 'weights from {:s}').format(self.pretrained_model)) - self.net.load(self.pretrained_model, sess, True) - except: - raise 'Check your pretrained model {:s}'.format( - self.pretrained_model) - - # resuming a trainer - if restore: - try: - ckpt = tf.train.get_checkpoint_state(self.output_dir) - print('Restoring from {}...'.format( - ckpt.model_checkpoint_path), end=' ') - self.saver.restore(sess, ckpt.model_checkpoint_path) - stem = os.path.splitext(os.path.basename( - ckpt.model_checkpoint_path))[0] - restore_iter = int(stem.split('_')[-1]) - sess.run(global_step.assign(restore_iter)) - print('done') - except: - raise 'Check your pretrained {:s}'.format( - ckpt.model_checkpoint_path) - - last_snapshot_iter = -1 - timer = Timer() - for iter in range(restore_iter, max_iters): - timer.tic() - # learning rate - if iter != 0 and iter % cfg.TRAIN.STEPSIZE == 0: - sess.run(tf.assign(lr, lr.eval() * cfg.TRAIN.GAMMA)) - print(lr) - - # get one batch - blobs = data_layer.forward() - - feed_dict = { - self.net.data: blobs['data'], - self.net.im_info: blobs['im_info'], - self.net.keep_prob: 0.5, - self.net.gt_boxes: blobs['gt_boxes'], - self.net.gt_ishard: blobs['gt_ishard'], - self.net.dontcare_areas: blobs['dontcare_areas'] - } - res_fetches = [] - fetch_list = [total_loss, model_loss, rpn_cross_entropy, rpn_loss_box, - summary_op, - train_op] + res_fetches - - total_loss_val, model_loss_val, rpn_loss_cls_val, rpn_loss_box_val, \ - summary_str, _ = sess.run( - fetches=fetch_list, feed_dict=feed_dict) - - self.writer.add_summary( - summary=summary_str, global_step=global_step.eval()) - - _diff_time = timer.toc(average=False) - - if (iter) % (cfg.TRAIN.DISPLAY) == 0: - print('iter: %d / %d, total loss: %.4f, model loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, lr: %f' % - (iter, max_iters, total_loss_val, model_loss_val, rpn_loss_cls_val, rpn_loss_box_val, lr.eval())) - print('speed: {:.3f}s / iter'.format(_diff_time)) - - if (iter+1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: - last_snapshot_iter = iter - self.snapshot(sess, iter) - - if last_snapshot_iter != iter: - self.snapshot(sess, iter) - - -def get_training_roidb(imdb): - """Returns a roidb (Region of Interest database) for use in training.""" - if cfg.TRAIN.USE_FLIPPED: - print('Appending horizontally-flipped training examples...') - imdb.append_flipped_images() - print('done') - - print('Preparing training data...') - if cfg.TRAIN.HAS_RPN: - rdl_roidb.prepare_roidb(imdb) - else: - rdl_roidb.prepare_roidb(imdb) - print('done') - - return imdb.roidb - - -def get_data_layer(roidb, num_classes): - """return a data layer.""" - if cfg.TRAIN.HAS_RPN: - if cfg.IS_MULTISCALE: - # obsolete - # layer = GtDataLayer(roidb) - raise "Calling caffe modules..." - else: - layer = RoIDataLayer(roidb, num_classes) - else: - layer = RoIDataLayer(roidb, num_classes) - - return layer - - -def train_net(network, imdb, roidb, output_dir, log_dir, pretrained_model=None, max_iters=40000, restore=False): - """Train a Fast R-CNN network.""" - - config = tf.ConfigProto(allow_soft_placement=True) - config.gpu_options.allocator_type = 'BFC' - config.gpu_options.per_process_gpu_memory_fraction = 0.75 - with tf.Session(config=config) as sess: - sw = SolverWrapper(sess, network, imdb, roidb, output_dir, - logdir=log_dir, pretrained_model=pretrained_model) - print('Solving...') - sw.train_model(sess, max_iters, restore=restore) - print('done solving') -import tensorflow as tf -from .network import Network -from ..fast_rcnn.config import cfg - - -class VGGnet_test(Network): - def __init__(self, trainable=True): - self.inputs = [] - self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) - self.im_info = tf.placeholder(tf.float32, shape=[None, 3]) - self.keep_prob = tf.placeholder(tf.float32) - self.layers = dict({'data': self.data, 'im_info': self.im_info}) - self.trainable = trainable - self.setup() - - def setup(self): - anchor_scales = cfg.ANCHOR_SCALES - _feat_stride = [16, ] - - (self.feed('data') - .conv(3, 3, 64, 1, 1, name='conv1_1') - .conv(3, 3, 64, 1, 1, name='conv1_2') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') - .conv(3, 3, 128, 1, 1, name='conv2_1') - .conv(3, 3, 128, 1, 1, name='conv2_2') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') - .conv(3, 3, 256, 1, 1, name='conv3_1') - .conv(3, 3, 256, 1, 1, name='conv3_2') - .conv(3, 3, 256, 1, 1, name='conv3_3') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') - .conv(3, 3, 512, 1, 1, name='conv4_1') - .conv(3, 3, 512, 1, 1, name='conv4_2') - .conv(3, 3, 512, 1, 1, name='conv4_3') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') - .conv(3, 3, 512, 1, 1, name='conv5_1') - .conv(3, 3, 512, 1, 1, name='conv5_2') - .conv(3, 3, 512, 1, 1, name='conv5_3')) - - (self.feed('conv5_3').conv(3, 3, 512, 1, 1, name='rpn_conv/3x3')) - - (self.feed('rpn_conv/3x3').Bilstm(512, 128, 512, name='lstm_o')) - (self.feed('lstm_o').lstm_fc( - 512, len(anchor_scales) * 10 * 4, name='rpn_bbox_pred')) - (self.feed('lstm_o').lstm_fc( - 512, len(anchor_scales) * 10 * 2, name='rpn_cls_score')) - - # shape is (1, H, W, Ax2) -> (1, H, WxA, 2) - (self.feed('rpn_cls_score') - .spatial_reshape_layer(2, name='rpn_cls_score_reshape') - .spatial_softmax(name='rpn_cls_prob')) - - # shape is (1, H, WxA, 2) -> (1, H, W, Ax2) - (self.feed('rpn_cls_prob') - .spatial_reshape_layer(len(anchor_scales) * 10 * 2, name='rpn_cls_prob_reshape')) - - (self.feed('rpn_cls_prob_reshape', 'rpn_bbox_pred', 'im_info') - .proposal_layer(_feat_stride, anchor_scales, 'TEST', name='rois')) -# -*- coding:utf-8 -*- -import tensorflow as tf -import numpy as np -from .network import Network -from ..fast_rcnn.config import cfg - - -class VGGnet_train(Network): - def __init__(self, trainable=True): - self.inputs = [] - self.data = tf.placeholder( - tf.float32, shape=[None, None, None, 3], name='data') - self.im_info = tf.placeholder( - tf.float32, shape=[None, 3], name='im_info') - self.gt_boxes = tf.placeholder( - tf.float32, shape=[None, 5], name='gt_boxes') - self.gt_ishard = tf.placeholder( - tf.int32, shape=[None], name='gt_ishard') - self.dontcare_areas = tf.placeholder( - tf.float32, shape=[None, 4], name='dontcare_areas') - self.keep_prob = tf.placeholder(tf.float32) - self.layers = dict({'data': self.data, 'im_info': self.im_info, 'gt_boxes': self.gt_boxes, - 'gt_ishard': self.gt_ishard, 'dontcare_areas': self.dontcare_areas}) - self.trainable = trainable - self.setup() - - def setup(self): - - # n_classes = 21 - n_classes = cfg.NCLASSES - # anchor_scales = [8, 16, 32] - anchor_scales = cfg.ANCHOR_SCALES - _feat_stride = [16, ] - - (self.feed('data') - .conv(3, 3, 64, 1, 1, name='conv1_1') - .conv(3, 3, 64, 1, 1, name='conv1_2') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') - .conv(3, 3, 128, 1, 1, name='conv2_1') - .conv(3, 3, 128, 1, 1, name='conv2_2') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') - .conv(3, 3, 256, 1, 1, name='conv3_1') - .conv(3, 3, 256, 1, 1, name='conv3_2') - .conv(3, 3, 256, 1, 1, name='conv3_3') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') - .conv(3, 3, 512, 1, 1, name='conv4_1') - .conv(3, 3, 512, 1, 1, name='conv4_2') - .conv(3, 3, 512, 1, 1, name='conv4_3') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') - .conv(3, 3, 512, 1, 1, name='conv5_1') - .conv(3, 3, 512, 1, 1, name='conv5_2') - .conv(3, 3, 512, 1, 1, name='conv5_3')) - # ========= RPN ============ - (self.feed('conv5_3') - .conv(3, 3, 512, 1, 1, name='rpn_conv/3x3')) - - (self.feed('rpn_conv/3x3').Bilstm(512, 128, 512, name='lstm_o')) - (self.feed('lstm_o').lstm_fc( - 512, len(anchor_scales) * 10 * 4, name='rpn_bbox_pred')) - (self.feed('lstm_o').lstm_fc( - 512, len(anchor_scales) * 10 * 2, name='rpn_cls_score')) - - # generating training labels on the fly - # output: rpn_labels(HxWxA, 2) rpn_bbox_targets(HxWxA, 4) rpn_bbox_inside_weights rpn_bbox_outside_weights - # 给每个anchor上标签,并计算真值(也是delta的形式),以及内部权重和外部权重 - (self.feed('rpn_cls_score', 'gt_boxes', 'gt_ishard', 'dontcare_areas', 'im_info') - .anchor_target_layer(_feat_stride, anchor_scales, name='rpn-data')) - - # shape is (1, H, W, Ax2) -> (1, H, WxA, 2) - # 给之前得到的score进行softmax,得到0-1之间的得分 - (self.feed('rpn_cls_score') - .spatial_reshape_layer(2, name='rpn_cls_score_reshape') - .spatial_softmax(name='rpn_cls_prob')) -from .VGGnet_train import VGGnet_train -from .VGGnet_test import VGGnet_test -from . import factory -from .VGGnet_test import VGGnet_test -from .VGGnet_train import VGGnet_train - - -def get_network(name): - """Get a network by name.""" - if name.split('_')[0] == 'VGGnet': - if name.split('_')[1] == 'test': - return VGGnet_test() - elif name.split('_')[1] == 'train': - return VGGnet_train() - else: - raise KeyError('Unknown dataset: {}'.format(name)) - else: - raise KeyError('Unknown dataset: {}'.format(name)) -# -*- coding:utf-8 -*- -import numpy as np -import tensorflow as tf -from ..fast_rcnn.config import cfg -from ..rpn_msr.proposal_layer_tf import proposal_layer as proposal_layer_py -from ..rpn_msr.anchor_target_layer_tf import anchor_target_layer as anchor_target_layer_py - - -DEFAULT_PADDING = 'SAME' - - -def layer(op): - def layer_decorated(self, *args, **kwargs): - # Automatically set a name if not provided. - name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) - # Figure out the layer inputs. - if len(self.inputs) == 0: - raise RuntimeError('No input variables found for layer %s.' % name) - elif len(self.inputs) == 1: - layer_input = self.inputs[0] - else: - layer_input = list(self.inputs) - # Perform the operation and get the output. - layer_output = op(self, layer_input, *args, **kwargs) - # Add to layer LUT. - self.layers[name] = layer_output - # This output is now the input for the next layer. - self.feed(layer_output) - # Return self for chained calls. - return self - return layer_decorated - - -class Network(object): - def __init__(self, inputs, trainable=True): - self.inputs = [] - self.layers = dict(inputs) - self.trainable = trainable - self.setup() - - def setup(self): - raise NotImplementedError('Must be subclassed.') - - def load(self, data_path, session, ignore_missing=False): - data_dict = np.load(data_path, encoding='latin1').item() - for key in data_dict: - with tf.variable_scope(key, reuse=True): - for subkey in data_dict[key]: - try: - var = tf.get_variable(subkey) - session.run(var.assign(data_dict[key][subkey])) - print("assign pretrain model "+subkey + " to "+key) - except ValueError: - print("ignore "+key) - if not ignore_missing: - - raise - - def feed(self, *args): - assert len(args) != 0 - self.inputs = [] - for layer in args: - if isinstance(layer, str): - try: - layer = self.layers[layer] - print(layer) - except KeyError: - print(list(self.layers.keys())) - raise KeyError('Unknown layer name fed: %s' % layer) - self.inputs.append(layer) - return self - - def get_output(self, layer): - try: - layer = self.layers[layer] - except KeyError: - print(list(self.layers.keys())) - raise KeyError('Unknown layer name fed: %s' % layer) - return layer - - def get_unique_name(self, prefix): - id = sum(t.startswith(prefix) for t, _ in list(self.layers.items()))+1 - return '%s_%d' % (prefix, id) - - def make_var(self, name, shape, initializer=None, trainable=True, regularizer=None): - return tf.get_variable(name, shape, initializer=initializer, trainable=trainable, regularizer=regularizer) - - def validate_padding(self, padding): - assert padding in ('SAME', 'VALID') - - @layer - def Bilstm(self, input, d_i, d_h, d_o, name, trainable=True): - img = input - with tf.variable_scope(name) as scope: - shape = tf.shape(img) - N, H, W, C = shape[0], shape[1], shape[2], shape[3] - img = tf.reshape(img, [N * H, W, C]) - img.set_shape([None, None, d_i]) - - lstm_fw_cell = tf.contrib.rnn.LSTMCell(d_h, state_is_tuple=True) - lstm_bw_cell = tf.contrib.rnn.LSTMCell(d_h, state_is_tuple=True) - - lstm_out, last_state = tf.nn.bidirectional_dynamic_rnn( - lstm_fw_cell, lstm_bw_cell, img, dtype=tf.float32) - lstm_out = tf.concat(lstm_out, axis=-1) - - lstm_out = tf.reshape(lstm_out, [N * H * W, 2*d_h]) - - init_weights = tf.truncated_normal_initializer(stddev=0.1) - init_biases = tf.constant_initializer(0.0) - weights = self.make_var('weights', [2*d_h, d_o], init_weights, trainable, - regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) - biases = self.make_var('biases', [d_o], init_biases, trainable) - outputs = tf.matmul(lstm_out, weights) + biases - - outputs = tf.reshape(outputs, [N, H, W, d_o]) - return outputs - - @layer - def lstm(self, input, d_i, d_h, d_o, name, trainable=True): - img = input - with tf.variable_scope(name) as scope: - shape = tf.shape(img) - N, H, W, C = shape[0], shape[1], shape[2], shape[3] - img = tf.reshape(img, [N*H, W, C]) - img.set_shape([None, None, d_i]) - - lstm_cell = tf.contrib.rnn.LSTMCell(d_h, state_is_tuple=True) - initial_state = lstm_cell.zero_state(N*H, dtype=tf.float32) - - lstm_out, last_state = tf.nn.dynamic_rnn(lstm_cell, img, - initial_state=initial_state, dtype=tf.float32) - - lstm_out = tf.reshape(lstm_out, [N*H*W, d_h]) - - init_weights = tf.truncated_normal_initializer(stddev=0.1) - init_biases = tf.constant_initializer(0.0) - weights = self.make_var('weights', [d_h, d_o], init_weights, trainable, - regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) - biases = self.make_var('biases', [d_o], init_biases, trainable) - outputs = tf.matmul(lstm_out, weights) + biases - - outputs = tf.reshape(outputs, [N, H, W, d_o]) - return outputs - - @layer - def lstm_fc(self, input, d_i, d_o, name, trainable=True): - with tf.variable_scope(name) as scope: - shape = tf.shape(input) - N, H, W, C = shape[0], shape[1], shape[2], shape[3] - input = tf.reshape(input, [N*H*W, C]) - - init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) - init_biases = tf.constant_initializer(0.0) - kernel = self.make_var('weights', [d_i, d_o], init_weights, trainable, - regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) - biases = self.make_var('biases', [d_o], init_biases, trainable) - - _O = tf.matmul(input, kernel) + biases - return tf.reshape(_O, [N, H, W, int(d_o)]) - - @layer - def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, biased=True, relu=True, padding=DEFAULT_PADDING, trainable=True): - """ contribution by miraclebiu, and biased option""" - self.validate_padding(padding) - c_i = input.get_shape()[-1] - def convolve(i, k): return tf.nn.conv2d( - i, k, [1, s_h, s_w, 1], padding=padding) - with tf.variable_scope(name) as scope: - - init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) - init_biases = tf.constant_initializer(0.0) - kernel = self.make_var('weights', [k_h, k_w, c_i, c_o], init_weights, trainable, - regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) - if biased: - biases = self.make_var('biases', [c_o], init_biases, trainable) - conv = convolve(input, kernel) - if relu: - bias = tf.nn.bias_add(conv, biases) - return tf.nn.relu(bias, name=scope.name) - return tf.nn.bias_add(conv, biases, name=scope.name) - else: - conv = convolve(input, kernel) - if relu: - return tf.nn.relu(conv, name=scope.name) - return conv - - @layer - def relu(self, input, name): - return tf.nn.relu(input, name=name) - - @layer - def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): - self.validate_padding(padding) - return tf.nn.max_pool(input, - ksize=[1, k_h, k_w, 1], - strides=[1, s_h, s_w, 1], - padding=padding, - name=name) - - @layer - def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): - self.validate_padding(padding) - return tf.nn.avg_pool(input, - ksize=[1, k_h, k_w, 1], - strides=[1, s_h, s_w, 1], - padding=padding, - name=name) - - @layer - def proposal_layer(self, input, _feat_stride, anchor_scales, cfg_key, name): - if isinstance(input[0], tuple): - input[0] = input[0][0] - # input[0] shape is (1, H, W, Ax2) - # rpn_rois <- (1 x H x W x A, 5) [0, x1, y1, x2, y2] - with tf.variable_scope(name) as scope: - blob, bbox_delta = tf.py_func(proposal_layer_py, [input[0], input[1], input[2], cfg_key, _feat_stride, anchor_scales], - [tf.float32, tf.float32]) - - rpn_rois = tf.convert_to_tensor(tf.reshape( - blob, [-1, 5]), name='rpn_rois') # shape is (1 x H x W x A, 2) - rpn_targets = tf.convert_to_tensor( - bbox_delta, name='rpn_targets') # shape is (1 x H x W x A, 4) - self.layers['rpn_rois'] = rpn_rois - self.layers['rpn_targets'] = rpn_targets - - return rpn_rois, rpn_targets - - @layer - def anchor_target_layer(self, input, _feat_stride, anchor_scales, name): - if isinstance(input[0], tuple): - input[0] = input[0][0] - - with tf.variable_scope(name) as scope: - # 'rpn_cls_score', 'gt_boxes', 'gt_ishard', 'dontcare_areas', 'im_info' - rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = \ - tf.py_func(anchor_target_layer_py, - [input[0], input[1], input[2], input[3], - input[4], _feat_stride, anchor_scales], - [tf.float32, tf.float32, tf.float32, tf.float32]) - - rpn_labels = tf.convert_to_tensor( - tf.cast(rpn_labels, tf.int32), name='rpn_labels') # shape is (1 x H x W x A, 2) - rpn_bbox_targets = tf.convert_to_tensor( - rpn_bbox_targets, name='rpn_bbox_targets') # shape is (1 x H x W x A, 4) - rpn_bbox_inside_weights = tf.convert_to_tensor( - rpn_bbox_inside_weights, name='rpn_bbox_inside_weights') # shape is (1 x H x W x A, 4) - rpn_bbox_outside_weights = tf.convert_to_tensor( - rpn_bbox_outside_weights, name='rpn_bbox_outside_weights') # shape is (1 x H x W x A, 4) - - return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights - - @layer - def reshape_layer(self, input, d, name): - input_shape = tf.shape(input) - if name == 'rpn_cls_prob_reshape': - # - # transpose: (1, AxH, W, 2) -> (1, 2, AxH, W) - # reshape: (1, 2xA, H, W) - # transpose: -> (1, H, W, 2xA) - return tf.transpose(tf.reshape(tf.transpose(input, [0, 3, 1, 2]), - [input_shape[0], - int(d), - tf.cast(tf.cast(input_shape[1], tf.float32)/tf.cast( - d, tf.float32)*tf.cast(input_shape[3], tf.float32), tf.int32), - input_shape[2] - ]), - [0, 2, 3, 1], name=name) - else: - return tf.transpose(tf.reshape(tf.transpose(input, [0, 3, 1, 2]), - [input_shape[0], - int(d), - tf.cast(tf.cast(input_shape[1], tf.float32)*( - tf.cast(input_shape[3], tf.float32)/tf.cast(d, tf.float32)), tf.int32), - input_shape[2] - ]), - [0, 2, 3, 1], name=name) - - @layer - def spatial_reshape_layer(self, input, d, name): - input_shape = tf.shape(input) - # transpose: (1, H, W, A x d) -> (1, H, WxA, d) - return tf.reshape(input, - [input_shape[0], - input_shape[1], - -1, - int(d)]) - - @layer - def lrn(self, input, radius, alpha, beta, name, bias=1.0): - return tf.nn.local_response_normalization(input, - depth_radius=radius, - alpha=alpha, - beta=beta, - bias=bias, - name=name) - - @layer - def concat(self, inputs, axis, name): - return tf.concat(concat_dim=axis, values=inputs, name=name) - - @layer - def fc(self, input, num_out, name, relu=True, trainable=True): - with tf.variable_scope(name) as scope: - # only use the first input - if isinstance(input, tuple): - input = input[0] - - input_shape = input.get_shape() - if input_shape.ndims == 4: - dim = 1 - for d in input_shape[1:].as_list(): - dim *= d - feed_in = tf.reshape(tf.transpose( - input, [0, 3, 1, 2]), [-1, dim]) - else: - feed_in, dim = (input, int(input_shape[-1])) - - if name == 'bbox_pred': - init_weights = tf.truncated_normal_initializer( - 0.0, stddev=0.001) - init_biases = tf.constant_initializer(0.0) - else: - init_weights = tf.truncated_normal_initializer( - 0.0, stddev=0.01) - init_biases = tf.constant_initializer(0.0) - - weights = self.make_var('weights', [dim, num_out], init_weights, trainable, - regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) - biases = self.make_var('biases', [num_out], init_biases, trainable) - - op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b - fc = op(feed_in, weights, biases, name=scope.name) - return fc - - @layer - def softmax(self, input, name): - input_shape = tf.shape(input) - if name == 'rpn_cls_prob': - return tf.reshape(tf.nn.softmax(tf.reshape(input, [-1, input_shape[3]])), [-1, input_shape[1], input_shape[2], input_shape[3]], name=name) - else: - return tf.nn.softmax(input, name=name) - - @layer - def spatial_softmax(self, input, name): - input_shape = tf.shape(input) - # d = input.get_shape()[-1] - return tf.reshape(tf.nn.softmax(tf.reshape(input, [-1, input_shape[3]])), - [-1, input_shape[1], input_shape[2], input_shape[3]], name=name) - - @layer - def add(self, input, name): - """contribution by miraclebiu""" - return tf.add(input[0], input[1]) - - @layer - def batch_normalization(self, input, name, relu=True, is_training=False): - """contribution by miraclebiu""" - if relu: - temp_layer = tf.contrib.layers.batch_norm( - input, scale=True, center=True, is_training=is_training, scope=name) - return tf.nn.relu(temp_layer) - else: - return tf.contrib.layers.batch_norm(input, scale=True, center=True, is_training=is_training, scope=name) - - @layer - def dropout(self, input, keep_prob, name): - return tf.nn.dropout(input, keep_prob, name=name) - - def l2_regularizer(self, weight_decay=0.0005, scope=None): - def regularizer(tensor): - with tf.name_scope(scope, default_name='l2_regularizer', values=[tensor]): - l2_weight = tf.convert_to_tensor(weight_decay, - dtype=tensor.dtype.base_dtype, - name='weight_decay') - # return tf.mul(l2_weight, tf.nn.l2_loss(tensor), name='value') - return tf.multiply(l2_weight, tf.nn.l2_loss(tensor), name='value') - return regularizer - - def smooth_l1_dist(self, deltas, sigma2=9.0, name='smooth_l1_dist'): - with tf.name_scope(name=name) as scope: - deltas_abs = tf.abs(deltas) - smoothL1_sign = tf.cast( - tf.less(deltas_abs, 1.0/sigma2), tf.float32) - return tf.square(deltas) * 0.5 * sigma2 * smoothL1_sign + \ - (deltas_abs - 0.5 / sigma2) * tf.abs(smoothL1_sign - 1) - - def build_loss(self, ohem=False): - # classification loss - rpn_cls_score = tf.reshape(self.get_output( - 'rpn_cls_score_reshape'), [-1, 2]) # shape (HxWxA, 2) - rpn_label = tf.reshape(self.get_output( - 'rpn-data')[0], [-1]) # shape (HxWxA) - # ignore_label(-1) - fg_keep = tf.equal(rpn_label, 1) - rpn_keep = tf.where(tf.not_equal(rpn_label, -1)) - rpn_cls_score = tf.gather(rpn_cls_score, rpn_keep) # shape (N, 2) - rpn_label = tf.gather(rpn_label, rpn_keep) - rpn_cross_entropy_n = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=rpn_label, logits=rpn_cls_score) - - # box loss - rpn_bbox_pred = self.get_output( - 'rpn_bbox_pred') # shape (1, H, W, Ax4) - rpn_bbox_targets = self.get_output('rpn-data')[1] - rpn_bbox_inside_weights = self.get_output('rpn-data')[2] - rpn_bbox_outside_weights = self.get_output('rpn-data')[3] - rpn_bbox_pred = tf.gather(tf.reshape( - rpn_bbox_pred, [-1, 4]), rpn_keep) # shape (N, 4) - rpn_bbox_targets = tf.gather(tf.reshape( - rpn_bbox_targets, [-1, 4]), rpn_keep) - rpn_bbox_inside_weights = tf.gather(tf.reshape( - rpn_bbox_inside_weights, [-1, 4]), rpn_keep) - rpn_bbox_outside_weights = tf.gather(tf.reshape( - rpn_bbox_outside_weights, [-1, 4]), rpn_keep) - - rpn_loss_box_n = tf.reduce_sum(rpn_bbox_outside_weights * self.smooth_l1_dist( - rpn_bbox_inside_weights * (rpn_bbox_pred - rpn_bbox_targets)), reduction_indices=[1]) - - rpn_loss_box = tf.reduce_sum( - rpn_loss_box_n) / (tf.reduce_sum(tf.cast(fg_keep, tf.float32)) + 1) - rpn_cross_entropy = tf.reduce_mean(rpn_cross_entropy_n) - - model_loss = rpn_cross_entropy + rpn_loss_box - - regularization_losses = tf.get_collection( - tf.GraphKeys.REGULARIZATION_LOSSES) - total_loss = tf.add_n(regularization_losses) + model_loss - - return total_loss, model_loss, rpn_cross_entropy, rpn_loss_box -from . import roidb -# -------------------------------------------------------- -# Fast R-CNN -# Copyright (c) 2015 Microsoft -# Licensed under The MIT License [see LICENSE for details] -# Written by Ross Girshick -# -------------------------------------------------------- - -"""The data layer used during training to train a Fast R-CNN network. - -RoIDataLayer implements a Caffe Python layer. -""" - -import numpy as np - -# TODO: make fast_rcnn irrelevant -# >>>> obsolete, because it depends on sth outside of this project -from ..fast_rcnn.config import cfg -# <<<< obsolete -from ..roi_data_layer.minibatch import get_minibatch - - -class RoIDataLayer(object): - """Fast R-CNN data layer used for training.""" - - def __init__(self, roidb, num_classes): - """Set the roidb to be used by this layer during training.""" - self._roidb = roidb - self._num_classes = num_classes - self._shuffle_roidb_inds() - - def _shuffle_roidb_inds(self): - """Randomly permute the training roidb.""" - self._perm = np.random.permutation(np.arange(len(self._roidb))) - self._cur = 0 - - def _get_next_minibatch_inds(self): - """Return the roidb indices for the next minibatch.""" - - if cfg.TRAIN.HAS_RPN: - if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): - self._shuffle_roidb_inds() - - db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] - self._cur += cfg.TRAIN.IMS_PER_BATCH - else: - # sample images - db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32) - i = 0 - while (i < cfg.TRAIN.IMS_PER_BATCH): - ind = self._perm[self._cur] - num_objs = self._roidb[ind]['boxes'].shape[0] - if num_objs != 0: - db_inds[i] = ind - i += 1 - - self._cur += 1 - if self._cur >= len(self._roidb): - self._shuffle_roidb_inds() - - return db_inds - - def _get_next_minibatch(self): - """Return the blobs to be used for the next minibatch. - - If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a - separate process and made available through self._blob_queue. - """ - db_inds = self._get_next_minibatch_inds() - minibatch_db = [self._roidb[i] for i in db_inds] - return get_minibatch(minibatch_db, self._num_classes) - - def forward(self): - """Get blobs and copy them into this layer's top blob vector.""" - blobs = self._get_next_minibatch() - return blobs -import numpy as np -import numpy.random as npr -import cv2 -import os - -from ..fast_rcnn.config import cfg -from ..utils.blob import prep_im_for_blob, im_list_to_blob - - -def get_minibatch(roidb, num_classes): - """Given a roidb, construct a minibatch sampled from it.""" - num_images = len(roidb) - # Sample random scales to use for each image in this batch - random_scale_inds = npr.randint(0, high=len(cfg.TRAIN.SCALES), - size=num_images) - assert(cfg.TRAIN.BATCH_SIZE % num_images == 0), \ - 'num_images ({}) must divide BATCH_SIZE ({})'. \ - format(num_images, cfg.TRAIN.BATCH_SIZE) - rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images - fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) - - # Get the input image blob, formatted for caffe - im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) - - blobs = {'data': im_blob} - - if cfg.TRAIN.HAS_RPN: - assert len(im_scales) == 1, "Single batch only" - assert len(roidb) == 1, "Single batch only" - # gt boxes: (x1, y1, x2, y2, cls) - gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] - gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) - gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] - gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] - blobs['gt_boxes'] = gt_boxes - blobs['gt_ishard'] = roidb[0]['gt_ishard'][gt_inds] \ - if 'gt_ishard' in roidb[0] else np.zeros(gt_inds.size, dtype=int) - # blobs['gt_ishard'] = roidb[0]['gt_ishard'][gt_inds] - blobs['dontcare_areas'] = roidb[0]['dontcare_areas'] * im_scales[0] \ - if 'dontcare_areas' in roidb[0] else np.zeros([0, 4], dtype=float) - blobs['im_info'] = np.array( - [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], - dtype=np.float32) - blobs['im_name'] = os.path.basename(roidb[0]['image']) - - else: # not using RPN - # Now, build the region of interest and label blobs - rois_blob = np.zeros((0, 5), dtype=np.float32) - labels_blob = np.zeros((0), dtype=np.float32) - bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32) - bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) - # all_overlaps = [] - for im_i in range(num_images): - labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \ - = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, - num_classes) - - # Add to RoIs blob - rois = _project_im_rois(im_rois, im_scales[im_i]) - batch_ind = im_i * np.ones((rois.shape[0], 1)) - rois_blob_this_image = np.hstack((batch_ind, rois)) - rois_blob = np.vstack((rois_blob, rois_blob_this_image)) - - # Add to labels, bbox targets, and bbox loss blobs - labels_blob = np.hstack((labels_blob, labels)) - bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets)) - bbox_inside_blob = np.vstack( - (bbox_inside_blob, bbox_inside_weights)) - # all_overlaps = np.hstack((all_overlaps, overlaps)) - - # For debug visualizations - # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps) - - blobs['rois'] = rois_blob - blobs['labels'] = labels_blob - - if cfg.TRAIN.BBOX_REG: - blobs['bbox_targets'] = bbox_targets_blob - blobs['bbox_inside_weights'] = bbox_inside_blob - blobs['bbox_outside_weights'] = \ - np.array(bbox_inside_blob > 0).astype(np.float32) - - return blobs - - -def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): - """Generate a random sample of RoIs comprising foreground and background - examples. - """ - # label = class RoI has max overlap with - labels = roidb['max_classes'] - overlaps = roidb['max_overlaps'] - rois = roidb['boxes'] - - # Select foreground RoIs as those with >= FG_THRESH overlap - fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] - # Guard against the case when an image has fewer than fg_rois_per_image - # foreground RoIs - fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) - # Sample foreground regions without replacement - if fg_inds.size > 0: - fg_inds = npr.choice( - fg_inds, size=fg_rois_per_this_image, replace=False) - - # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) - bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & - (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] - # Compute number of background RoIs to take from this image (guarding - # against there being fewer than desired) - bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image - bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, - bg_inds.size) - # Sample foreground regions without replacement - if bg_inds.size > 0: - bg_inds = npr.choice( - bg_inds, size=bg_rois_per_this_image, replace=False) - - # The indices that we're selecting (both fg and bg) - keep_inds = np.append(fg_inds, bg_inds) - # Select sampled values from various arrays: - labels = labels[keep_inds] - # Clamp labels for the background RoIs to 0 - labels[fg_rois_per_this_image:] = 0 - overlaps = overlaps[keep_inds] - rois = rois[keep_inds] - - bbox_targets, bbox_inside_weights = _get_bbox_regression_labels( - roidb['bbox_targets'][keep_inds, :], num_classes) - - return labels, overlaps, rois, bbox_targets, bbox_inside_weights - - -def _get_image_blob(roidb, scale_inds): - """Builds an input blob from the images in the roidb at the specified - scales. - """ - num_images = len(roidb) - processed_ims = [] - im_scales = [] - for i in range(num_images): - im = cv2.imread(roidb[i]['image']) - if roidb[i]['flipped']: - im = im[:, ::-1, :] - target_size = cfg.TRAIN.SCALES[scale_inds[i]] - im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, - cfg.TRAIN.MAX_SIZE) - im_scales.append(im_scale) - processed_ims.append(im) - - # Create a blob to hold the input images - blob = im_list_to_blob(processed_ims) - - return blob, im_scales - - -def _project_im_rois(im_rois, im_scale_factor): - """Project image RoIs into the rescaled training image.""" - rois = im_rois * im_scale_factor - return rois - - -def _get_bbox_regression_labels(bbox_target_data, num_classes): - """Bounding-box regression targets are stored in a compact form in the - roidb. - - This function expands those targets into the 4-of-4*K representation used - by the network (i.e. only one class has non-zero targets). The loss weights - are similarly expanded. - - Returns: - bbox_target_data (ndarray): N x 4K blob of regression targets - bbox_inside_weights (ndarray): N x 4K blob of loss weights - """ - clss = bbox_target_data[:, 0] - bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) - bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) - inds = np.where(clss > 0)[0] - for ind in inds: - cls = clss[ind] - start = 4 * cls - end = start + 4 - bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] - bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS - return bbox_targets, bbox_inside_weights - - -def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): - """Visualize a mini-batch for debugging.""" - import matplotlib.pyplot as plt - for i in range(rois_blob.shape[0]): - rois = rois_blob[i, :] - im_ind = rois[0] - roi = rois[1:] - im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() - im += cfg.PIXEL_MEANS - im = im[:, :, (2, 1, 0)] - im = im.astype(np.uint8) - cls = labels_blob[i] - plt.imshow(im) - print('class: ', cls, ' overlap: ', overlaps[i]) - plt.gca().add_patch( - plt.Rectangle((roi[0], roi[1]), roi[2] - roi[0], - roi[3] - roi[1], fill=False, - edgecolor='r', linewidth=3) - ) - plt.show() -import numpy as np -import PIL -from ..fast_rcnn.config import cfg -from ..fast_rcnn.bbox_transform import bbox_transform -from lib.utils.bbox import bbox_overlaps - - -def prepare_roidb(imdb): - """Enrich the imdb's roidb by adding some derived quantities that - are useful for training. This function precomputes the maximum - overlap, taken over ground-truth boxes, between each ROI and - each ground-truth box. The class with maximum overlap is also - recorded. - """ - sizes = [PIL.Image.open(imdb.image_path_at(i)).size - for i in range(imdb.num_images)] - roidb = imdb.roidb - for i in range(len(imdb.image_index)): - roidb[i]['image'] = imdb.image_path_at(i) - roidb[i]['width'] = sizes[i][0] - roidb[i]['height'] = sizes[i][1] - # need gt_overlaps as a dense array for argmax - gt_overlaps = roidb[i]['gt_overlaps'].toarray() - # max overlap with gt over classes (columns) - max_overlaps = gt_overlaps.max(axis=1) - # gt class that had the max overlap - max_classes = gt_overlaps.argmax(axis=1) - roidb[i]['max_classes'] = max_classes - roidb[i]['max_overlaps'] = max_overlaps - # sanity checks - # max overlap of 0 => class should be zero (background) - zero_inds = np.where(max_overlaps == 0)[0] - assert all(max_classes[zero_inds] == 0) - # max overlap > 0 => class should not be zero (must be a fg class) - nonzero_inds = np.where(max_overlaps > 0)[0] - assert all(max_classes[nonzero_inds] != 0) - - -def add_bbox_regression_targets(roidb): - """ - Add information needed to train bounding-box regressors. - For each roi find the corresponding gt box, and compute the distance. - then normalize the distance into Gaussian by minus mean and divided by std - """ - assert len(roidb) > 0 - assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' - - num_images = len(roidb) - # Infer number of classes from the number of columns in gt_overlaps - num_classes = roidb[0]['gt_overlaps'].shape[1] - for im_i in range(num_images): - rois = roidb[im_i]['boxes'] - max_overlaps = roidb[im_i]['max_overlaps'] - max_classes = roidb[im_i]['max_classes'] - roidb[im_i]['bbox_targets'] = \ - _compute_targets(rois, max_overlaps, max_classes) - - if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: - # Use fixed / precomputed "means" and "stds" instead of empirical values - means = np.tile( - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)) - stds = np.tile( - np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)) - else: - # Compute values needed for means and stds - # var(x) = E(x^2) - E(x)^2 - class_counts = np.zeros((num_classes, 1)) + cfg.EPS - sums = np.zeros((num_classes, 4)) - squared_sums = np.zeros((num_classes, 4)) - for im_i in range(num_images): - targets = roidb[im_i]['bbox_targets'] - for cls in range(1, num_classes): - cls_inds = np.where(targets[:, 0] == cls)[0] - if cls_inds.size > 0: - class_counts[cls] += cls_inds.size - sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) - squared_sums[cls, :] += \ - (targets[cls_inds, 1:] ** 2).sum(axis=0) - - means = sums / class_counts - stds = np.sqrt(squared_sums / class_counts - means ** 2) - # too small number will cause nan error - assert np.min(stds) < 0.01, \ - 'Boxes std is too small, std:{}'.format(stds) - - print('bbox target means:') - print(means) - print(means[1:, :].mean(axis=0)) # ignore bg class - print('bbox target stdevs:') - print(stds) - print(stds[1:, :].mean(axis=0)) # ignore bg class - - # Normalize targets - if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: - print("Normalizing targets") - for im_i in range(num_images): - targets = roidb[im_i]['bbox_targets'] - for cls in range(1, num_classes): - cls_inds = np.where(targets[:, 0] == cls)[0] - roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] - roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] - else: - print("NOT normalizing targets") - - # These values will be needed for making predictions - # (the predicts will need to be unnormalized and uncentered) - return means.ravel(), stds.ravel() - - -def _compute_targets(rois, overlaps, labels): - """ - Compute bounding-box regression targets for an image. - for each roi find the corresponding gt_box, then compute the distance. - """ - # Indices of ground-truth ROIs - gt_inds = np.where(overlaps == 1)[0] - if len(gt_inds) == 0: - # Bail if the image has no ground-truth ROIs - return np.zeros((rois.shape[0], 5), dtype=np.float32) - # Indices of examples for which we try to make predictions - ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] - - # Get IoU overlap between each ex ROI and gt ROI - ex_gt_overlaps = bbox_overlaps( - np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), - np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) - - # Find which gt ROI each ex ROI has max overlap with: - # this will be the ex ROI's gt target - gt_assignment = ex_gt_overlaps.argmax(axis=1) - gt_rois = rois[gt_inds[gt_assignment], :] - ex_rois = rois[ex_inds, :] - - targets = np.zeros((rois.shape[0], 5), dtype=np.float32) - targets[ex_inds, 0] = labels[ex_inds] - targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) - return targets -# -*- coding:utf-8 -*- -import numpy as np -import numpy.random as npr -from .generate_anchors import generate_anchors -from ..utils.bbox import bbox_overlaps, bbox_intersections -from ..fast_rcnn.config import cfg -from ..fast_rcnn.bbox_transform import bbox_transform - -DEBUG = False - - -def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[16, ], anchor_scales=[16, ]): - """ - Assign anchors to ground-truth targets. Produces anchor classification - labels and bounding-box regression targets. - Parameters - ---------- - rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer - gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] - gt_ishard: (G, 1), 1 or 0 indicates difficult or not - dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 - im_info: a list of [image_height, image_width, scale_ratios] - _feat_stride: the downsampling ratio of feature map to the original input image - anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) - ---------- - Returns - ---------- - rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare - rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) - that are the regression objectives - rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg - rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, - beacuse the numbers of bgs and fgs mays significiantly different - """ - _anchors = generate_anchors(scales=np.array( - anchor_scales)) # 生成基本的anchor,一共9个 - _num_anchors = _anchors.shape[0] # 9个anchor - - if DEBUG: - print('anchors:') - print(_anchors) - print('anchor shapes:') - print(np.hstack(( - _anchors[:, 2::4] - _anchors[:, 0::4], - _anchors[:, 3::4] - _anchors[:, 1::4], - ))) - _counts = cfg.EPS - _sums = np.zeros((1, 4)) - _squared_sums = np.zeros((1, 4)) - _fg_sum = 0 - _bg_sum = 0 - _count = 0 - - # allow boxes to sit over the edge by a small amount - _allowed_border = 0 - # map of shape (..., H, W) - #height, width = rpn_cls_score.shape[1:3] - - im_info = im_info[0] # 图像的高宽及通道数 - - # 在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 - # Algorithm: - # for each (H, W) location i - # generate 9 anchor boxes centered on cell i - # apply predicted bbox deltas at cell i to each of the 9 anchors - # filter out-of-image anchors - # measure GT overlap - - assert rpn_cls_score.shape[0] == 1, \ - 'Only single item batches are supported' - - # map of shape (..., H, W) - height, width = rpn_cls_score.shape[1:3] # feature-map的高宽 - - if DEBUG: - print('AnchorTargetLayer: height', height, 'width', width) - print('') - print('im_size: ({}, {})'.format(im_info[0], im_info[1])) - print('scale: {}'.format(im_info[2])) - print('height, width: ({}, {})'.format(height, width)) - print('rpn: gt_boxes.shape', gt_boxes.shape) - print('rpn: gt_boxes', gt_boxes) - - # 1. Generate proposals from bbox deltas and shifted anchors - shift_x = np.arange(0, width) * _feat_stride - shift_y = np.arange(0, height) * _feat_stride - shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order - # K is H x W - shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), - shift_x.ravel(), shift_y.ravel())).transpose() # 生成feature-map和真实image上anchor之间的偏移量 - # add A anchors (1, A, 4) to - # cell K shifts (K, 1, 4) to get - # shift anchors (K, A, 4) - # reshape to (K*A, 4) shifted anchors - A = _num_anchors # 9个anchor - K = shifts.shape[0] # 50*37,feature-map的宽乘高的大小 - all_anchors = (_anchors.reshape((1, A, 4)) + - shifts.reshape((1, K, 4)).transpose((1, 0, 2))) # 相当于复制宽高的维度,然后相加 - all_anchors = all_anchors.reshape((K * A, 4)) - total_anchors = int(K * A) - - # only keep anchors inside the image - # 仅保留那些还在图像内部的anchor,超出图像的都删掉 - inds_inside = np.where( - (all_anchors[:, 0] >= -_allowed_border) & - (all_anchors[:, 1] >= -_allowed_border) & - (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width - (all_anchors[:, 3] < im_info[0] + _allowed_border) # height - )[0] - - if DEBUG: - print('total_anchors', total_anchors) - print('inds_inside', len(inds_inside)) - - # keep only inside anchors - anchors = all_anchors[inds_inside, :] # 保留那些在图像内的anchor - if DEBUG: - print('anchors.shape', anchors.shape) - - # 至此,anchor准备好了 - # -------------------------------------------------------------- - # label: 1 is positive, 0 is negative, -1 is dont care - # (A) - labels = np.empty((len(inds_inside), ), dtype=np.float32) - labels.fill(-1) # 初始化label,均为-1 - - # overlaps between the anchors and the gt boxes - # overlaps (ex, gt), shape is A x G - # 计算anchor和gt-box的overlap,用来给anchor上标签 - overlaps = bbox_overlaps( - np.ascontiguousarray(anchors, dtype=np.float), - np.ascontiguousarray(gt_boxes, dtype=np.float)) # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 - # 存放每一个anchor和每一个gtbox之间的overlap - # (A)#找到和每一个gtbox,overlap最大的那个anchor - argmax_overlaps = overlaps.argmax(axis=1) - max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] - # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个 - gt_argmax_overlaps = overlaps.argmax(axis=0) - gt_max_overlaps = overlaps[gt_argmax_overlaps, - np.arange(overlaps.shape[1])] - gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] - - if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: - # assign bg labels first so that positive labels can clobber them - # 先给背景上标签,小于0.3overlap的 - labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 - - # fg label: for each gt, anchor with highest overlap - labels[gt_argmax_overlaps] = 1 # 每个位置上的9个anchor中overlap最大的认为是前景 - # fg label: above threshold IOU - # overlap大于0.7的认为是前景 - labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 - - if cfg.TRAIN.RPN_CLOBBER_POSITIVES: - # assign bg labels last so that negative labels can clobber positives - labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 - - # preclude dontcare areas - # 这里我们暂时不考虑有doncare_area的存在 - if dontcare_areas is not None and dontcare_areas.shape[0] > 0: - # intersec shape is D x A - intersecs = bbox_intersections( - np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 - np.ascontiguousarray(anchors, dtype=np.float) # A x 4 - ) - intersecs_ = intersecs.sum(axis=0) # A x 1 - labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 - - # 这里我们暂时不考虑难样本的问题 - # preclude hard samples that are highly occlusioned, truncated or difficult to see - if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0: - assert gt_ishard.shape[0] == gt_boxes.shape[0] - gt_ishard = gt_ishard.astype(int) - gt_hardboxes = gt_boxes[gt_ishard == 1, :] - if gt_hardboxes.shape[0] > 0: - # H x A - hard_overlaps = bbox_overlaps( - np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 - np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 - hard_max_overlaps = hard_overlaps.max(axis=0) # (A) - labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 - max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 - labels[max_intersec_label_inds] = -1 - - # subsample positive labels if we have too many - # 对正样本进行采样,如果正样本的数量太多的话 - # 限制正样本的数量不超过128个 - # TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 - num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) - fg_inds = np.where(labels == 1)[0] - if len(fg_inds) > num_fg: - disable_inds = npr.choice( - fg_inds, size=(len(fg_inds) - num_fg), replace=False) # 随机去除掉一些正样本 - labels[disable_inds] = -1 # 变为-1 - - # subsample negative labels if we have too many - # 对负样本进行采样,如果负样本的数量太多的话 - # 正负样本总数是256,限制正样本数目最多128, - # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 - num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) - bg_inds = np.where(labels == 0)[0] - if len(bg_inds) > num_bg: - disable_inds = npr.choice( - bg_inds, size=(len(bg_inds) - num_bg), replace=False) - labels[disable_inds] = -1 - # print "was %s inds, disabling %s, now %s inds" % ( - # len(bg_inds), len(disable_inds), np.sum(labels == 0)) - - # 至此, 上好标签,开始计算rpn-box的真值 - # -------------------------------------------------------------- - bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) - # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) - bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) - - bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) - bbox_inside_weights[labels == 1, :] = np.array( - cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) # 内部权重,前景就给1,其他是0 - - bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) - if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # 暂时使用uniform 权重,也就是正样本是1,负样本是0 - # uniform weighting of examples (given non-uniform sampling) - num_examples = np.sum(labels >= 0) + 1 - # positive_weights = np.ones((1, 4)) * 1.0 / num_examples - # negative_weights = np.ones((1, 4)) * 1.0 / num_examples - positive_weights = np.ones((1, 4)) - negative_weights = np.zeros((1, 4)) - else: - assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & - (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) - positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / - (np.sum(labels == 1)) + 1) - negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / - (np.sum(labels == 0)) + 1) - bbox_outside_weights[labels == 1, :] = positive_weights # 外部权重,前景是1,背景是0 - bbox_outside_weights[labels == 0, :] = negative_weights - - if DEBUG: - _sums += bbox_targets[labels == 1, :].sum(axis=0) - _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) - _counts += np.sum(labels == 1) - means = _sums / _counts - stds = np.sqrt(_squared_sums / _counts - means ** 2) - print('means:') - print(means) - print('stdevs:') - print(stds) - - # map up to original set of anchors - # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 - labels = _unmap(labels, total_anchors, inds_inside, - fill=-1) # 这些anchor的label是-1,也即dontcare - bbox_targets = _unmap(bbox_targets, total_anchors, - inds_inside, fill=0) # 这些anchor的真值是0,也即没有值 - bbox_inside_weights = _unmap( - bbox_inside_weights, total_anchors, inds_inside, fill=0) # 内部权重以0填充 - bbox_outside_weights = _unmap( - bbox_outside_weights, total_anchors, inds_inside, fill=0) # 外部权重以0填充 - - if DEBUG: - print('rpn: max max_overlap', np.max(max_overlaps)) - print('rpn: num_positive', np.sum(labels == 1)) - print('rpn: num_negative', np.sum(labels == 0)) - _fg_sum += np.sum(labels == 1) - _bg_sum += np.sum(labels == 0) - _count += 1 - print('rpn: num_positive avg', _fg_sum / _count) - print('rpn: num_negative avg', _bg_sum / _count) - - # labels - labels = labels.reshape((1, height, width, A)) # reshap一下label - rpn_labels = labels - - # bbox_targets - bbox_targets = bbox_targets \ - .reshape((1, height, width, A * 4)) # reshape - - rpn_bbox_targets = bbox_targets - # bbox_inside_weights - bbox_inside_weights = bbox_inside_weights \ - .reshape((1, height, width, A * 4)) - - rpn_bbox_inside_weights = bbox_inside_weights - - # bbox_outside_weights - bbox_outside_weights = bbox_outside_weights \ - .reshape((1, height, width, A * 4)) - rpn_bbox_outside_weights = bbox_outside_weights - - return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights - - -def _unmap(data, count, inds, fill=0): - """ Unmap a subset of item (data) back to the original set of items (of - size count) """ - if len(data.shape) == 1: - ret = np.empty((count, ), dtype=np.float32) - ret.fill(fill) - ret[inds] = data - else: - ret = np.empty((count, ) + data.shape[1:], dtype=np.float32) - ret.fill(fill) - ret[inds, :] = data - return ret - - -def _compute_targets(ex_rois, gt_rois): - """Compute bounding-box regression targets for an image.""" - - assert ex_rois.shape[0] == gt_rois.shape[0] - assert ex_rois.shape[1] == 4 - assert gt_rois.shape[1] == 5 - - return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False) -import numpy as np - - -def generate_basic_anchors(sizes, base_size=16): - base_anchor = np.array([0, 0, base_size - 1, base_size - 1], np.int32) - anchors = np.zeros((len(sizes), 4), np.int32) - index = 0 - for h, w in sizes: - anchors[index] = scale_anchor(base_anchor, h, w) - index += 1 - return anchors - - -def scale_anchor(anchor, h, w): - x_ctr = (anchor[0] + anchor[2]) * 0.5 - y_ctr = (anchor[1] + anchor[3]) * 0.5 - scaled_anchor = anchor.copy() - scaled_anchor[0] = x_ctr - w / 2 # xmin - scaled_anchor[2] = x_ctr + w / 2 # xmax - scaled_anchor[1] = y_ctr - h / 2 # ymin - scaled_anchor[3] = y_ctr + h / 2 # ymax - return scaled_anchor - - -def generate_anchors(base_size=16, ratios=[0.5, 1, 2], - scales=2**np.arange(3, 6)): - heights = [11, 16, 23, 33, 48, 68, 97, 139, 198, 283] - widths = [16] - sizes = [] - for h in heights: - for w in widths: - sizes.append((h, w)) - return generate_basic_anchors(sizes) - - -if __name__ == '__main__': - import time - t = time.time() - a = generate_anchors() - print(time.time() - t) - print(a) - from IPython import embed - embed() -# -*- coding:utf-8 -*- -import numpy as np -from .generate_anchors import generate_anchors -from ..fast_rcnn.config import cfg -from ..fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes -from ..fast_rcnn.nms_wrapper import nms - - -DEBUG = False -""" -Outputs object detection proposals by applying estimated bounding-box -transformations to a set of regular boxes (called "anchors"). -""" - - -def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[16, ], anchor_scales=[16, ]): - """ - Parameters - ---------- - rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg - NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! - rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN - im_info: a list of [image_height, image_width, scale_ratios] - cfg_key: 'TRAIN' or 'TEST' - _feat_stride: the downsampling ratio of feature map to the original input image - anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) - ---------- - Returns - ---------- - rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] - - # Algorithm: - # - # for each (H, W) location i - # generate A anchor boxes centered on cell i - # apply predicted bbox deltas at cell i to each of the A anchors - # clip predicted boxes to image - # remove predicted boxes with either height or width < threshold - # sort all (proposal, score) pairs by score from highest to lowest - # take top pre_nms_topN proposals before NMS - # apply NMS with threshold 0.7 to remaining proposals - # take after_nms_topN proposals after NMS - # return the top proposals (-> RoIs top, scores top) - #layer_params = yaml.load(self.param_str_) - - """ - cfg_key = cfg_key.decode('ascii') - _anchors = generate_anchors( - scales=np.array(anchor_scales)) # 生成基本的9个anchor - _num_anchors = _anchors.shape[0] # 9个anchor - - im_info = im_info[0] # 原始图像的高宽、缩放尺度 - - assert rpn_cls_prob_reshape.shape[0] == 1, \ - 'Only single item batches are supported' - - pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # 12000,在做nms之前,最多保留的候选box数目 - post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # 2000,做完nms之后,最多保留的box的数目 - nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # nms用参数,阈值是0.7 - min_size = cfg[cfg_key].RPN_MIN_SIZE # 候选box的最小尺寸,目前是16,高宽均要大于16 - # TODO 后期需要修改这个最小尺寸,改为8? - - height, width = rpn_cls_prob_reshape.shape[1:3] # feature-map的高宽 - - # the first set of _num_anchors channels are bg probs - # the second set are the fg probs, which we want - # (1, H, W, A) - scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:, :, :, :, 1], - [1, height, width, _num_anchors]) - # 提取到object的分数,non-object的我们不关心 - # 并reshape到1*H*W*9 - - bbox_deltas = rpn_bbox_pred # 模型输出的pred是相对值,需要进一步处理成真实图像中的坐标 - #im_info = bottom[2].data[0, :] - - if DEBUG: - print('im_size: ({}, {})'.format(im_info[0], im_info[1])) - print('scale: {}'.format(im_info[2])) - - # 1. Generate proposals from bbox deltas and shifted anchors - if DEBUG: - print('score map size: {}'.format(scores.shape)) - - # Enumerate all shifts - # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor - shift_x = np.arange(0, width) * _feat_stride - shift_y = np.arange(0, height) * _feat_stride - shift_x, shift_y = np.meshgrid(shift_x, shift_y) - shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), - shift_x.ravel(), shift_y.ravel())).transpose() - - # Enumerate all shifted anchors: - # - # add A anchors (1, A, 4) to - # cell K shifts (K, 1, 4) to get - # shift anchors (K, A, 4) - # reshape to (K*A, 4) shifted anchors - A = _num_anchors - K = shifts.shape[0] - anchors = _anchors.reshape((1, A, 4)) + \ - shifts.reshape((1, K, 4)).transpose((1, 0, 2)) - anchors = anchors.reshape((K * A, 4)) # 这里得到的anchor就是整张图像上的所有anchor - - # Transpose and reshape predicted bbox transformations to get them - # into the same order as the anchors: - # bbox deltas will be (1, 4 * A, H, W) format - # transpose to (1, H, W, 4 * A) - # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) - # in slowest to fastest order - bbox_deltas = bbox_deltas.reshape((-1, 4)) # (HxWxA, 4) - - # Same story for the scores: - scores = scores.reshape((-1, 1)) - - # Convert anchors into proposals via bbox transformations - proposals = bbox_transform_inv(anchors, bbox_deltas) # 做逆变换,得到box在图像上的真实坐标 - - # 2. clip predicted boxes to image - # 将所有的proposal修建一下,超出图像范围的将会被修剪掉 - proposals = clip_boxes(proposals, im_info[:2]) - - # 3. remove predicted boxes with either height or width < threshold - # (NOTE: convert min_size to input image scale stored in im_info[2]) - # 移除那些proposal小于一定尺寸的proposal - keep = _filter_boxes(proposals, min_size * im_info[2]) - proposals = proposals[keep, :] # 保留剩下的proposal - scores = scores[keep] - bbox_deltas = bbox_deltas[keep, :] - - # # remove irregular boxes, too fat too tall - # keep = _filter_irregular_boxes(proposals) - # proposals = proposals[keep, :] - # scores = scores[keep] - - # 4. sort all (proposal, score) pairs by score from highest to lowest - # 5. take top pre_nms_topN (e.g. 6000) - order = scores.ravel().argsort()[::-1] # score按得分的高低进行排序 - if pre_nms_topN > 0: # 保留12000个proposal进去做nms - order = order[:pre_nms_topN] - proposals = proposals[order, :] - scores = scores[order] - bbox_deltas = bbox_deltas[order, :] - - # 6. apply nms (e.g. threshold = 0.7) - # 7. take after_nms_topN (e.g. 300) - # 8. return the top proposals (-> RoIs top) - keep = nms(np.hstack((proposals, scores)), - nms_thresh) # 进行nms操作,保留2000个proposal - if post_nms_topN > 0: - keep = keep[:post_nms_topN] - proposals = proposals[keep, :] - scores = scores[keep] - bbox_deltas = bbox_deltas[keep, :] - - # Output rois blob - # Our RPN implementation only supports a single input image, so all - # batch inds are 0 - blob = np.hstack((scores.astype(np.float32, copy=False), - proposals.astype(np.float32, copy=False))) - - return blob, bbox_deltas - - -def _filter_boxes(boxes, min_size): - """Remove all boxes with any side smaller than min_size.""" - ws = boxes[:, 2] - boxes[:, 0] + 1 - hs = boxes[:, 3] - boxes[:, 1] + 1 - keep = np.where((ws >= min_size) & (hs >= min_size))[0] - return keep - - -def _filter_irregular_boxes(boxes, min_ratio=0.2, max_ratio=5): - """Remove all boxes with any side smaller than min_size.""" - ws = boxes[:, 2] - boxes[:, 0] + 1 - hs = boxes[:, 3] - boxes[:, 1] + 1 - rs = ws / hs - keep = np.where((rs <= max_ratio) & (rs >= min_ratio))[0] - return keep -from .detectors import TextDetector -from .text_connect_cfg import Config -# -*- coding:utf-8 -*- -import numpy as np -from lib.fast_rcnn.nms_wrapper import nms -from lib.fast_rcnn.config import cfg -from .other import normalize -from .text_proposal_connector import TextProposalConnector -from .text_proposal_connector_oriented import TextProposalConnector as TextProposalConnectorOriented -from .text_connect_cfg import Config as TextLineCfg - - -class TextDetector: - def __init__(self): - self.mode = cfg.TEST.DETECT_MODE - if self.mode == "H": - self.text_proposal_connector = TextProposalConnector() - elif self.mode == "O": - self.text_proposal_connector = TextProposalConnectorOriented() - - def detect(self, text_proposals, scores, size): - # 删除得分较低的proposal - keep_inds = np.where(scores > TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0] - text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] - - # 按得分排序 - sorted_indices = np.argsort(scores.ravel())[::-1] - text_proposals, scores = text_proposals[sorted_indices], scores[sorted_indices] - - # 对proposal做nms - keep_inds = nms(np.hstack((text_proposals, scores)), - TextLineCfg.TEXT_PROPOSALS_NMS_THRESH) - text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] - - # 获取检测结果 - scores = normalize(scores) - text_recs = self.text_proposal_connector.get_text_lines( - text_proposals, scores, size) - - # 过滤boxes - keep_inds = self.filter_boxes(text_recs) - text_lines = text_recs[keep_inds] - - # 对lines做nms - if text_lines.shape[0] != 0: - keep_inds = nms(text_lines, TextLineCfg.TEXT_LINE_NMS_THRESH) - text_lines = text_lines[keep_inds] - - return text_lines - - def filter_boxes(self, boxes): - heights = np.zeros((len(boxes), 1), np.float) - widths = np.zeros((len(boxes), 1), np.float) - scores = np.zeros((len(boxes), 1), np.float) - index = 0 - for box in boxes: - heights[index] = (abs(box[5] - box[1]) + - abs(box[7] - box[3])) / 2.0 + 1 - widths[index] = (abs(box[2] - box[0]) + - abs(box[6] - box[4])) / 2.0 + 1 - scores[index] = box[8] - index += 1 - - return np.where((widths / heights > TextLineCfg.MIN_RATIO) & (scores > TextLineCfg.LINE_MIN_SCORE) & - (widths > (TextLineCfg.TEXT_PROPOSALS_WIDTH * TextLineCfg.MIN_NUM_PROPOSALS)))[0] -import numpy as np - - -def normalize(data): - if data.shape[0] == 0: - return data - max_ = data.max() - min_ = data.min() - return (data - min_) * 1.0 / (max_ - min_) if max_ - min_ != 0 else data - min_ - - -def threshold(coords, min_, max_): - return np.maximum(np.minimum(coords, max_), min_) - - -def clip_boxes(boxes, im_shape): - """ - Clip boxes to image boundaries. - """ - boxes[:, 0::2] = threshold(boxes[:, 0::2], 0, im_shape[1]-1) - boxes[:, 1::2] = threshold(boxes[:, 1::2], 0, im_shape[0]-1) - return boxes - - -class Graph: - def __init__(self, graph): - self.graph = graph - - def sub_graphs_connected(self): - sub_graphs = [] - for index in range(self.graph.shape[0]): - if not self.graph[:, index].any() and self.graph[index, :].any(): - v = index - sub_graphs.append([v]) - while self.graph[v, :].any(): - v = np.where(self.graph[v, :])[0][0] - sub_graphs[-1].append(v) - return sub_graphs -class Config: - SCALE = 900 # 600 - MAX_SCALE = 1500 # 1200 - TEXT_PROPOSALS_WIDTH = 0 # 16 - MIN_NUM_PROPOSALS = 0 # 2 - MIN_RATIO = 0.01 # 0.5 - LINE_MIN_SCORE = 0.6 # 0.9 - MAX_HORIZONTAL_GAP = 30 # 50 - TEXT_PROPOSALS_MIN_SCORE = 0.7 # 0.7 - TEXT_PROPOSALS_NMS_THRESH = 0.3 # 0.2 - TEXT_LINE_NMS_THRESH = 0.3 - MIN_V_OVERLAPS = 0.6 # 0.7 - MIN_SIZE_SIM = 0.6 # 0.7 -import numpy as np -from .other import clip_boxes -from .text_proposal_graph_builder import TextProposalGraphBuilder - - -class TextProposalConnector: - def __init__(self): - self.graph_builder = TextProposalGraphBuilder() - - def group_text_proposals(self, text_proposals, scores, im_size): - graph = self.graph_builder.build_graph(text_proposals, scores, im_size) - return graph.sub_graphs_connected() - - def fit_y(self, X, Y, x1, x2): - len(X) != 0 - # if X only include one point, the function will get line y=Y[0] - if np.sum(X == X[0]) == len(X): - return Y[0], Y[0] - p = np.poly1d(np.polyfit(X, Y, 1)) - return p(x1), p(x2) - - def get_text_lines(self, text_proposals, scores, im_size): - # tp=text proposal - tp_groups = self.group_text_proposals(text_proposals, scores, im_size) - text_lines = np.zeros((len(tp_groups), 5), np.float32) - - for index, tp_indices in enumerate(tp_groups): - text_line_boxes = text_proposals[list(tp_indices)] - - x0 = np.min(text_line_boxes[:, 0]) - x1 = np.max(text_line_boxes[:, 2]) - - offset = (text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5 - - lt_y, rt_y = self.fit_y( - text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset) - lb_y, rb_y = self.fit_y( - text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset) - - # the score of a text line is the average score of the scores - # of all text proposals contained in the text line - score = scores[list(tp_indices)].sum()/float(len(tp_indices)) - - text_lines[index, 0] = x0 - text_lines[index, 1] = min(lt_y, rt_y) - text_lines[index, 2] = x1 - text_lines[index, 3] = max(lb_y, rb_y) - text_lines[index, 4] = score - - text_lines = clip_boxes(text_lines, im_size) - - text_recs = np.zeros((len(text_lines), 9), np.float32) - index = 0 - for line in text_lines: - xmin, ymin, xmax, ymax = line[0], line[1], line[2], line[3] - text_recs[index, 0] = xmin - text_recs[index, 1] = ymin - text_recs[index, 2] = xmax - text_recs[index, 3] = ymin - text_recs[index, 4] = xmin - text_recs[index, 5] = ymax - text_recs[index, 6] = xmax - text_recs[index, 7] = ymax - text_recs[index, 8] = line[4] - index = index + 1 - - return text_recs -# -*- coding:utf-8 -*- -import numpy as np -from .other import clip_boxes -from .text_proposal_graph_builder import TextProposalGraphBuilder - - -class TextProposalConnector: - """ - Connect text proposals into text lines - """ - - def __init__(self): - self.graph_builder = TextProposalGraphBuilder() - - def group_text_proposals(self, text_proposals, scores, im_size): - graph = self.graph_builder.build_graph(text_proposals, scores, im_size) - return graph.sub_graphs_connected() - - def fit_y(self, X, Y, x1, x2): - len(X) != 0 - # if X only include one point, the function will get line y=Y[0] - if np.sum(X == X[0]) == len(X): - return Y[0], Y[0] - p = np.poly1d(np.polyfit(X, Y, 1)) - return p(x1), p(x2) - - def get_text_lines(self, text_proposals, scores, im_size): - """ - text_proposals:boxes - - """ - # tp=text proposal - tp_groups = self.group_text_proposals( - text_proposals, scores, im_size) # 首先还是建图,获取到文本行由哪几个小框构成 - - text_lines = np.zeros((len(tp_groups), 8), np.float32) - - for index, tp_indices in enumerate(tp_groups): - text_line_boxes = text_proposals[list(tp_indices)] # 每个文本行的全部小框 - # 求每一个小框的中心x,y坐标 - X = (text_line_boxes[:, 0] + text_line_boxes[:, 2]) / 2 - Y = (text_line_boxes[:, 1] + text_line_boxes[:, 3]) / 2 - - z1 = np.polyfit(X, Y, 1) # 多项式拟合,根据之前求的中心店拟合一条直线(最小二乘) - - x0 = np.min(text_line_boxes[:, 0]) # 文本行x坐标最小值 - x1 = np.max(text_line_boxes[:, 2]) # 文本行x坐标最大值 - - offset = (text_line_boxes[0, 2] - - text_line_boxes[0, 0])*0.5 # 小框宽度的一半 - - # 以全部小框的左上角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标 - lt_y, rt_y = self.fit_y( - text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset) - # 以全部小框的左下角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标 - lb_y, rb_y = self.fit_y( - text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset) - - score = scores[list(tp_indices)].sum() / \ - float(len(tp_indices)) # 求全部小框得分的均值作为文本行的均值 - - text_lines[index, 0] = x0 - text_lines[index, 1] = min(lt_y, rt_y) # 文本行上端 线段 的y坐标的小值 - text_lines[index, 2] = x1 - text_lines[index, 3] = max(lb_y, rb_y) # 文本行下端 线段 的y坐标的大值 - text_lines[index, 4] = score # 文本行得分 - text_lines[index, 5] = z1[0] # 根据中心点拟合的直线的k,b - text_lines[index, 6] = z1[1] - height = np.mean( - (text_line_boxes[:, 3]-text_line_boxes[:, 1])) # 小框平均高度 - text_lines[index, 7] = height + 2.5 - - text_recs = np.zeros((len(text_lines), 9), np.float32) - index = 0 - for line in text_lines: - b1 = line[6] - line[7] / 2 # 根据高度和文本行中心线,求取文本行上下两条线的b值 - b2 = line[6] + line[7] / 2 - x1 = line[0] - y1 = line[5] * line[0] + b1 # 左上 - x2 = line[2] - y2 = line[5] * line[2] + b1 # 右上 - x3 = line[0] - y3 = line[5] * line[0] + b2 # 左下 - x4 = line[2] - y4 = line[5] * line[2] + b2 # 右下 - disX = x2 - x1 - disY = y2 - y1 - width = np.sqrt(disX * disX + disY * disY) # 文本行宽度 - - fTmp0 = y3 - y1 # 文本行高度 - fTmp1 = fTmp0 * disY / width - x = np.fabs(fTmp1 * disX / width) # 做补偿 - y = np.fabs(fTmp1 * disY / width) - if line[5] < 0: - x1 -= x - y1 += y - x4 += x - y4 -= y - else: - x2 += x - y2 += y - x3 -= x - y3 -= y - text_recs[index, 0] = x1 - text_recs[index, 1] = y1 - text_recs[index, 2] = x2 - text_recs[index, 3] = y2 - text_recs[index, 4] = x3 - text_recs[index, 5] = y3 - text_recs[index, 6] = x4 - text_recs[index, 7] = y4 - text_recs[index, 8] = line[4] - index = index + 1 - - text_recs = clip_boxes(text_recs, im_size) - - return text_recs -from .text_connect_cfg import Config as TextLineCfg -from .other import Graph -import numpy as np - - -class TextProposalGraphBuilder: - """ - Build Text proposals into a graph. - """ - - def get_successions(self, index): - box = self.text_proposals[index] - results = [] - for left in range(int(box[0])+1, min(int(box[0])+TextLineCfg.MAX_HORIZONTAL_GAP+1, self.im_size[1])): - adj_box_indices = self.boxes_table[left] - for adj_box_index in adj_box_indices: - if self.meet_v_iou(adj_box_index, index): - results.append(adj_box_index) - if len(results) != 0: - return results - return results - - def get_precursors(self, index): - box = self.text_proposals[index] - results = [] - for left in range(int(box[0])-1, max(int(box[0]-TextLineCfg.MAX_HORIZONTAL_GAP), 0)-1, -1): - adj_box_indices = self.boxes_table[left] - for adj_box_index in adj_box_indices: - if self.meet_v_iou(adj_box_index, index): - results.append(adj_box_index) - if len(results) != 0: - return results - return results - - def is_succession_node(self, index, succession_index): - precursors = self.get_precursors(succession_index) - if self.scores[index] >= np.max(self.scores[precursors]): - return True - return False - - def meet_v_iou(self, index1, index2): - def overlaps_v(index1, index2): - h1 = self.heights[index1] - h2 = self.heights[index2] - y0 = max(self.text_proposals[index2][1], - self.text_proposals[index1][1]) - y1 = min(self.text_proposals[index2][3], - self.text_proposals[index1][3]) - return max(0, y1-y0+1)/min(h1, h2) - - def size_similarity(index1, index2): - h1 = self.heights[index1] - h2 = self.heights[index2] - return min(h1, h2)/max(h1, h2) - - return overlaps_v(index1, index2) >= TextLineCfg.MIN_V_OVERLAPS and \ - size_similarity(index1, index2) >= TextLineCfg.MIN_SIZE_SIM - - def build_graph(self, text_proposals, scores, im_size): - self.text_proposals = text_proposals - self.scores = scores - self.im_size = im_size - self.heights = text_proposals[:, 3]-text_proposals[:, 1]+1 - - boxes_table = [[] for _ in range(self.im_size[1])] - for index, box in enumerate(text_proposals): - boxes_table[int(box[0])].append(index) - self.boxes_table = boxes_table - - graph = np.zeros( - (text_proposals.shape[0], text_proposals.shape[0]), np.bool) - - for index, box in enumerate(text_proposals): - successions = self.get_successions(index) - if len(successions) == 0: - continue - succession_index = successions[np.argmax(scores[successions])] - if self.is_succession_node(index, succession_index): - # NOTE: a box can have multiple successions(precursors) if multiple successions(precursors) - # have equal scores. - graph[index, succession_index] = True - return Graph(graph) -from . import boxes_grid -from . import blob -from . import timer -from . import bbox -from . import cython_nms -try: - from . import gpu_nms -except: - gpu_nms = cython_nms -"""Blob helper functions.""" -import numpy as np -import cv2 -from ..fast_rcnn.config import cfg - - -def im_list_to_blob(ims): - """Convert a list of images into a network input. - - Assumes images are already prepared (means subtracted, BGR order, ...). - """ - max_shape = np.array([im.shape for im in ims]).max(axis=0) - num_images = len(ims) - blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), - dtype=np.float32) - for i in range(num_images): - im = ims[i] - blob[i, 0:im.shape[0], 0:im.shape[1], :] = im - - return blob - - -def prep_im_for_blob(im, pixel_means, target_size, max_size): - """Mean subtract and scale an image for use in a blob.""" - im = im.astype(np.float32, copy=False) - im -= pixel_means - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(target_size) / float(im_size_min) - # Prevent the biggest axis from being more than MAX_SIZE - if np.round(im_scale * im_size_max) > max_size: - im_scale = float(max_size) / float(im_size_max) - if cfg.TRAIN.RANDOM_DOWNSAMPLE: - r = 0.6 + np.random.rand() * 0.4 - im_scale *= r - im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, - interpolation=cv2.INTER_LINEAR) - - return im, im_scale -# -------------------------------------------------------- -# Subcategory CNN -# Copyright (c) 2015 CVGL Stanford -# Licensed under The MIT License [see LICENSE for details] -# Written by Yu Xiang -# -------------------------------------------------------- - -import numpy as np -import math -# TODO: make fast_rcnn irrelevant -# >>>> obsolete, because it depends on sth outside of this project -from ..fast_rcnn.config import cfg -# <<<< obsolete - - -def get_boxes_grid(image_height, image_width): - """ - Return the boxes on image grid. - calling this function when cfg.IS_MULTISCALE is True, otherwise, calling rdl_roidb.prepare_roidb(imdb) instead. - """ - - # fixed a bug, change cfg.TRAIN.SCALES to cfg.TRAIN.SCALES_BASE - # coz, here needs a ratio around 1.0, not the accutual size. - # height and width of the feature map - if cfg.NET_NAME == 'CaffeNet': - height = np.floor( - (image_height * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1) - height = np.floor((height - 1) / 2.0 + 1 + 0.5) - height = np.floor((height - 1) / 2.0 + 1 + 0.5) - - width = np.floor( - (image_width * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1) - width = np.floor((width - 1) / 2.0 + 1 + 0.5) - width = np.floor((width - 1) / 2.0 + 1 + 0.5) - elif cfg.NET_NAME == 'VGGnet': - height = np.floor( - image_height * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5) - height = np.floor(height / 2.0 + 0.5) - height = np.floor(height / 2.0 + 0.5) - height = np.floor(height / 2.0 + 0.5) - - width = np.floor(image_width * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5) - width = np.floor(width / 2.0 + 0.5) - width = np.floor(width / 2.0 + 0.5) - width = np.floor(width / 2.0 + 0.5) - else: - assert (1), 'The network architecture is not supported in utils.get_boxes_grid!' - - # compute the grid box centers - h = np.arange(height) - w = np.arange(width) - y, x = np.meshgrid(h, w, indexing='ij') - centers = np.dstack((x, y)) - centers = np.reshape(centers, (-1, 2)) - num = centers.shape[0] - - # compute width and height of grid box - area = cfg.TRAIN.KERNEL_SIZE * cfg.TRAIN.KERNEL_SIZE - aspect = cfg.TRAIN.ASPECTS # height / width - num_aspect = len(aspect) - widths = np.zeros((1, num_aspect), dtype=np.float32) - heights = np.zeros((1, num_aspect), dtype=np.float32) - for i in range(num_aspect): - widths[0, i] = math.sqrt(area / aspect[i]) - heights[0, i] = widths[0, i] * aspect[i] - - # construct grid boxes - centers = np.repeat(centers, num_aspect, axis=0) - widths = np.tile(widths, num).transpose() - heights = np.tile(heights, num).transpose() - - x1 = np.reshape(centers[:, 0], (-1, 1)) - widths * 0.5 - x2 = np.reshape(centers[:, 0], (-1, 1)) + widths * 0.5 - y1 = np.reshape(centers[:, 1], (-1, 1)) - heights * 0.5 - y2 = np.reshape(centers[:, 1], (-1, 1)) + heights * 0.5 - - boxes_grid = np.hstack((x1, y1, x2, y2)) / cfg.TRAIN.SPATIAL_SCALE - - return boxes_grid, centers[:, 0], centers[:, 1] -from Cython.Build import cythonize -import os -from os.path import join as pjoin -import numpy as np -from distutils.core import setup -from distutils.extension import Extension -from Cython.Distutils import build_ext - - -def find_in_path(name, path): - for dir in path.split(os.pathsep): - binpath = pjoin(dir, name) - if os.path.exists(binpath): - return os.path.abspath(binpath) - return None - - -def locate_cuda(): - # first check if the CUDA_HOME env variable is in use - if 'CUDA_HOME' in os.environ: - home = os.environ['CUDA_HOME'] - nvcc = pjoin(home, 'bin', 'nvcc') - else: - # otherwise, search the PATH for NVCC - default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') - nvcc = find_in_path( - 'nvcc', os.environ['PATH'] + os.pathsep + default_path) - if nvcc is None: - raise EnvironmentError('The nvcc binary could not be ' - 'located in your $PATH. Either add it to your path, or set $CUDA_HOME') - home = os.path.dirname(os.path.dirname(nvcc)) - - cudaconfig = {'home': home, 'nvcc': nvcc, - 'include': pjoin(home, 'include'), - 'lib64': pjoin(home, 'lib64')} - for k, v in cudaconfig.items(): - # for k, v in cudaconfig.iteritems(): - if not os.path.exists(v): - raise EnvironmentError( - 'The CUDA %s path could not be located in %s' % (k, v)) - return cudaconfig - - -CUDA = locate_cuda() - -try: - numpy_include = np.get_include() -except AttributeError: - numpy_include = np.get_numpy_include() - - -def customize_compiler_for_nvcc(self): - self.src_extensions.append('.cu') - default_compiler_so = self.compiler_so - super = self._compile - - def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): - print(extra_postargs) - if os.path.splitext(src)[1] == '.cu': - # use the cuda for .cu files - self.set_executable('compiler_so', CUDA['nvcc']) - # use only a subset of the extra_postargs, which are 1-1 translated - # from the extra_compile_args in the Extension class - postargs = extra_postargs['nvcc'] - else: - postargs = extra_postargs['gcc'] - - super(obj, src, ext, cc_args, postargs, pp_opts) - # reset the default compiler_so, which we might have changed for cuda - self.compiler_so = default_compiler_so - # inject our redefined _compile method into the class - self._compile = _compile - - -# run the customize_compiler -class custom_build_ext(build_ext): - def build_extensions(self): - customize_compiler_for_nvcc(self.compiler) - build_ext.build_extensions(self) - - -ext_modules = [ - Extension( - "utils.bbox", - ["bbox.pyx"], - extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs=[numpy_include] - ), - Extension( - "utils.cython_nms", - ["cython_nms.pyx"], - extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs=[numpy_include] - ), - Extension('utils.gpu_nms', - ['nms_kernel.cu', 'gpu_nms.pyx'], - library_dirs=[CUDA['lib64']], - libraries=['cudart'], - language='c++', - runtime_library_dirs=[CUDA['lib64']], - extra_compile_args={'gcc': ["-Wno-unused-function"], - 'nvcc': ['-arch=sm_35', - '--ptxas-options=-v', - '-c', - '--compiler-options', - "'-fPIC'"]}, - include_dirs=[numpy_include, CUDA['include']] - ), -] - -setup( - ext_modules=ext_modules, - cmdclass={'build_ext': custom_build_ext}, -) -from Cython.Build import cythonize -import os -from os.path import join as pjoin -import numpy as np -from distutils.core import setup -from distutils.extension import Extension -from Cython.Distutils import build_ext - - -def find_in_path(name, path): - for dir in path.split(os.pathsep): - binpath = pjoin(dir, name) - if os.path.exists(binpath): - return os.path.abspath(binpath) - return None - - -try: - numpy_include = np.get_include() -except AttributeError: - numpy_include = np.get_numpy_include() - - -def customize_compiler_for_nvcc(self): - self.src_extensions.append('.cu') - default_compiler_so = self.compiler_so - super = self._compile - - def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): - print(extra_postargs) - postargs = extra_postargs['gcc'] - super(obj, src, ext, cc_args, postargs, pp_opts) - # reset the default compiler_so, which we might have changed for cuda - self.compiler_so = default_compiler_so - # inject our redefined _compile method into the class - self._compile = _compile - -# run the customize_compiler - - -class custom_build_ext(build_ext): - def build_extensions(self): - customize_compiler_for_nvcc(self.compiler) - build_ext.build_extensions(self) - - -ext_modules = [ - Extension( - "utils.bbox", - ["bbox.pyx"], - extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs=[numpy_include] - ), - Extension( - "utils.cython_nms", - ["cython_nms.pyx"], - extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs=[numpy_include] - ), -] - -setup( - ext_modules=ext_modules, - cmdclass={'build_ext': custom_build_ext}, -) -import time - - -class Timer(object): - def __init__(self): - self.total_time = 0. - self.calls = 0 - self.start_time = 0. - self.diff = 0. - self.average_time = 0. - - def tic(self): - self.start_time = time.time() - - def toc(self, average=True): - self.diff = time.time() - self.start_time - self.total_time += self.diff - self.calls += 1 - self.average_time = self.total_time / self.calls - if average: - return self.average_time - else: - return self.diff -import numpy as np -import matplotlib.pyplot as plt -from pylab import * -import os -import sys -import time -import cv2 -from PIL import Image -from keras.preprocessing.image import * -from keras.utils.np_utils import to_categorical -from keras.models import load_model -import keras.backend as K - -from models import * -from inference import inference - - -def calculate_iou(model_name, nb_classes, res_dir, label_dir, image_list): - conf_m = zeros((nb_classes, nb_classes), dtype=float) - total = 0 - # mean_acc = 0. - for img_num in image_list: - img_num = img_num.strip('\n') - total += 1 - print('#%d: %s' % (total, img_num)) - pred = img_to_array(Image.open('%s/%s.png' % - (res_dir, img_num))).astype(int) - label = img_to_array(Image.open('%s/%s.png' % - (label_dir, img_num))).astype(int) - flat_pred = np.ravel(pred) - flat_label = np.ravel(label) - # acc = 0. - for p, l in zip(flat_pred, flat_label): - if l == 255: - continue - if l < nb_classes and p < nb_classes: - conf_m[l, p] += 1 - else: - print('Invalid entry encountered, skipping! Label: ', l, - ' Prediction: ', p, ' Img_num: ', img_num) - - # if l==p: - # acc+=1 - #acc /= flat_pred.shape[0] - #mean_acc += acc - #mean_acc /= total - # print 'mean acc: %f'%mean_acc - I = np.diag(conf_m) - U = np.sum(conf_m, axis=0) + np.sum(conf_m, axis=1) - I - IOU = I/U - meanIOU = np.mean(IOU) - return conf_m, IOU, meanIOU - - -def evaluate(model_name, weight_file, image_size, nb_classes, batch_size, val_file_path, data_dir, label_dir, - label_suffix='.png', - data_suffix='.jpg'): - current_dir = os.path.dirname(os.path.realpath(__file__)) - save_dir = os.path.join(current_dir, 'Models/'+model_name+'/res/') - if os.path.exists(save_dir) == False: - os.mkdir(save_dir) - fp = open(val_file_path) - image_list = fp.readlines() - fp.close() - - start_time = time.time() - inference(model_name, weight_file, image_size, image_list, data_dir, label_dir, return_results=False, save_dir=save_dir, - label_suffix=label_suffix, data_suffix=data_suffix) - duration = time.time() - start_time - print('{}s used to make predictions.\n'.format(duration)) - - start_time = time.time() - conf_m, IOU, meanIOU = calculate_iou( - model_name, nb_classes, save_dir, label_dir, image_list) - print('IOU: ') - print(IOU) - print('meanIOU: %f' % meanIOU) - print('pixel acc: %f' % (np.sum(np.diag(conf_m))/np.sum(conf_m))) - duration = time.time() - start_time - print('{}s used to calculate IOU.\n'.format(duration)) - - -if __name__ == '__main__': - # model_name = 'Atrous_DenseNet' - model_name = 'AtrousFCN_Resnet50_16s' - # model_name = 'DenseNet_FCN' - weight_file = 'checkpoint_weights.hdf5' - # weight_file = 'model.hdf5' - image_size = (512, 512) - nb_classes = 21 - batch_size = 1 - dataset = 'VOC2012_BERKELEY' - if dataset == 'VOC2012_BERKELEY': - # pascal voc + berkeley semantic contours annotations - # Data/VOClarge/VOC2012/ImageSets/Segmentation - train_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/combined_imageset_train.txt') - # train_file_path = os.path.expanduser('~/.keras/datasets/oneimage/train.txt') #Data/VOClarge/VOC2012/ImageSets/Segmentation - val_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/combined_imageset_val.txt') - data_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/JPEGImages') - label_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/combined_annotations') - label_suffix = '.png' - if dataset == 'COCO': - # Data/VOClarge/VOC2012/ImageSets/Segmentation - train_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt') - # train_file_path = os.path.expanduser('~/.keras/datasets/oneimage/train.txt') #Data/VOClarge/VOC2012/ImageSets/Segmentation - val_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt') - data_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/JPEGImages') - label_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/SegmentationClass') - label_suffix = '.npy' - evaluate(model_name, weight_file, image_size, nb_classes, batch_size, val_file_path, data_dir, label_dir, - label_suffix=label_suffix, data_suffix=data_suffix) -import numpy as np -import matplotlib.pyplot as plt -from pylab import * -import os -import sys -import cv2 -from PIL import Image -from keras.preprocessing.image import * -from keras.models import load_model -import keras.backend as K -from keras.applications.imagenet_utils import preprocess_input - -from models import * - - -def inference(model_name, weight_file, image_size, image_list, data_dir, label_dir, return_results=True, save_dir=None, - label_suffix='.png', - data_suffix='.jpg'): - current_dir = os.path.dirname(os.path.realpath(__file__)) - # mean_value = np.array([104.00699, 116.66877, 122.67892]) - batch_shape = (1, ) + image_size + (3, ) - save_path = os.path.join(current_dir, 'Models/'+model_name) - model_path = os.path.join(save_path, "model.json") - checkpoint_path = os.path.join(save_path, weight_file) - # model_path = os.path.join(current_dir, 'model_weights/fcn_atrous/model_change.hdf5') - # model = FCN_Resnet50_32s((480,480,3)) - - config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) - session = tf.Session(config=config) - K.set_session(session) - - model = globals()[model_name]( - batch_shape=batch_shape, input_shape=(512, 512, 3)) - model.load_weights(checkpoint_path, by_name=True) - - model.summary() - - results = [] - total = 0 - for img_num in image_list: - img_num = img_num.strip('\n') - total += 1 - print('#%d: %s' % (total, img_num)) - image = Image.open('%s/%s%s' % (data_dir, img_num, data_suffix)) - image = img_to_array(image) # , data_format='default') - - label = Image.open('%s/%s%s' % (label_dir, img_num, label_suffix)) - label_size = label.size - - img_h, img_w = image.shape[0:2] - - # long_side = max(img_h, img_w, image_size[0], image_size[1]) - pad_w = max(image_size[1] - img_w, 0) - pad_h = max(image_size[0] - img_h, 0) - image = np.lib.pad(image, ((pad_h/2, pad_h - pad_h/2), (pad_w/2, - pad_w - pad_w/2), (0, 0)), 'constant', constant_values=0.) - # image -= mean_value - '''img = array_to_img(image, 'channels_last', scale=False) - img.show() - exit()''' - # image = cv2.resize(image, image_size) - - image = np.expand_dims(image, axis=0) - image = preprocess_input(image) - - result = model.predict(image, batch_size=1) - result = np.argmax(np.squeeze(result), axis=-1).astype(np.uint8) - - result_img = Image.fromarray(result, mode='P') - result_img.palette = label.palette - # result_img = result_img.resize(label_size, resample=Image.BILINEAR) - result_img = result_img.crop( - (pad_w/2, pad_h/2, pad_w/2+img_w, pad_h/2+img_h)) - # result_img.show(title='result') - if return_results: - results.append(result_img) - if save_dir: - result_img.save(os.path.join(save_dir, img_num + '.png')) - return results - - -if __name__ == '__main__': - # model_name = 'AtrousFCN_Resnet50_16s' - # model_name = 'Atrous_DenseNet' - model_name = 'DenseNet_FCN' - weight_file = 'checkpoint_weights.hdf5' - image_size = (512, 512) - data_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/JPEGImages') - label_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/SegmentationClass') - - image_list = sys.argv[1:] # '2007_000491' - results = inference(model_name, weight_file, image_size, - image_list, data_dir, label_dir) - for result in results: - result.show(title='result', command=None) -import numpy as np -import matplotlib.pyplot as plt -from pylab import * -import os -import sys -from keras_contrib.applications import densenet -from keras.models import Model -from keras.regularizers import l2 -from keras.layers import * -from keras.engine import Layer -from keras.applications.vgg16 import * -from keras.models import * -from keras.applications.imagenet_utils import _obtain_input_shape -import keras.backend as K -import tensorflow as tf - -from utils.get_weights_path import * -from utils.basics import * -from utils.resnet_helpers import * -from utils.BilinearUpSampling import * - - -def top(x, input_shape, classes, activation, weight_decay): - - x = Conv2D(classes, (1, 1), activation='linear', - padding='same', kernel_regularizer=l2(weight_decay), - use_bias=False)(x) - - if K.image_data_format() == 'channels_first': - channel, row, col = input_shape - else: - row, col, channel = input_shape - - # TODO(ahundt) this is modified for the sigmoid case! also use loss_shape - if activation is 'sigmoid': - x = Reshape((row * col * classes,))(x) - - return x - - -def FCN_Vgg16_32s(input_shape=None, weight_decay=0., batch_momentum=0.9, batch_shape=None, classes=21): - if batch_shape: - img_input = Input(batch_shape=batch_shape) - image_size = batch_shape[1:3] - else: - img_input = Input(shape=input_shape) - image_size = input_shape[0:2] - # Block 1 - x = Conv2D(64, (3, 3), activation='relu', padding='same', - name='block1_conv1', kernel_regularizer=l2(weight_decay))(img_input) - x = Conv2D(64, (3, 3), activation='relu', padding='same', - name='block1_conv2', kernel_regularizer=l2(weight_decay))(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = Conv2D(128, (3, 3), activation='relu', padding='same', - name='block2_conv1', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(128, (3, 3), activation='relu', padding='same', - name='block2_conv2', kernel_regularizer=l2(weight_decay))(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = Conv2D(256, (3, 3), activation='relu', padding='same', - name='block3_conv1', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(256, (3, 3), activation='relu', padding='same', - name='block3_conv2', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(256, (3, 3), activation='relu', padding='same', - name='block3_conv3', kernel_regularizer=l2(weight_decay))(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block4_conv1', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block4_conv2', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block4_conv3', kernel_regularizer=l2(weight_decay))(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block5_conv1', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block5_conv2', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block5_conv3', kernel_regularizer=l2(weight_decay))(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) - - # Convolutional layers transfered from fully-connected layers - x = Conv2D(4096, (7, 7), activation='relu', padding='same', - name='fc1', kernel_regularizer=l2(weight_decay))(x) - x = Dropout(0.5)(x) - x = Conv2D(4096, (1, 1), activation='relu', padding='same', - name='fc2', kernel_regularizer=l2(weight_decay))(x) - x = Dropout(0.5)(x) - # classifying layer - x = Conv2D(classes, (1, 1), kernel_initializer='he_normal', activation='linear', - padding='valid', strides=(1, 1), kernel_regularizer=l2(weight_decay))(x) - - x = BilinearUpSampling2D(size=(32, 32))(x) - - model = Model(img_input, x) - - weights_path = os.path.expanduser(os.path.join( - '~', '.keras/models/fcn_vgg16_weights_tf_dim_ordering_tf_kernels.h5')) - model.load_weights(weights_path, by_name=True) - return model - - -def AtrousFCN_Vgg16_16s(input_shape=None, weight_decay=0., batch_momentum=0.9, batch_shape=None, classes=21): - if batch_shape: - img_input = Input(batch_shape=batch_shape) - image_size = batch_shape[1:3] - else: - img_input = Input(shape=input_shape) - image_size = input_shape[0:2] - # Block 1 - x = Conv2D(64, (3, 3), activation='relu', padding='same', - name='block1_conv1', kernel_regularizer=l2(weight_decay))(img_input) - x = Conv2D(64, (3, 3), activation='relu', padding='same', - name='block1_conv2', kernel_regularizer=l2(weight_decay))(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = Conv2D(128, (3, 3), activation='relu', padding='same', - name='block2_conv1', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(128, (3, 3), activation='relu', padding='same', - name='block2_conv2', kernel_regularizer=l2(weight_decay))(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = Conv2D(256, (3, 3), activation='relu', padding='same', - name='block3_conv1', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(256, (3, 3), activation='relu', padding='same', - name='block3_conv2', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(256, (3, 3), activation='relu', padding='same', - name='block3_conv3', kernel_regularizer=l2(weight_decay))(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block4_conv1', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block4_conv2', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block4_conv3', kernel_regularizer=l2(weight_decay))(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block5_conv1', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block5_conv2', kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(512, (3, 3), activation='relu', padding='same', - name='block5_conv3', kernel_regularizer=l2(weight_decay))(x) - - # Convolutional layers transfered from fully-connected layers - x = Conv2D(4096, (7, 7), activation='relu', padding='same', dilation_rate=(2, 2), - name='fc1', kernel_regularizer=l2(weight_decay))(x) - x = Dropout(0.5)(x) - x = Conv2D(4096, (1, 1), activation='relu', padding='same', - name='fc2', kernel_regularizer=l2(weight_decay))(x) - x = Dropout(0.5)(x) - # classifying layer - x = Conv2D(classes, (1, 1), kernel_initializer='he_normal', activation='linear', - padding='valid', strides=(1, 1), kernel_regularizer=l2(weight_decay))(x) - - x = BilinearUpSampling2D(target_size=tuple(image_size))(x) - - model = Model(img_input, x) - - weights_path = os.path.expanduser(os.path.join( - '~', '.keras/models/fcn_vgg16_weights_tf_dim_ordering_tf_kernels.h5')) - model.load_weights(weights_path, by_name=True) - return model - - -def FCN_Resnet50_32s(input_shape=None, weight_decay=0., batch_momentum=0.9, batch_shape=None, classes=21): - if batch_shape: - img_input = Input(batch_shape=batch_shape) - image_size = batch_shape[1:3] - else: - img_input = Input(shape=input_shape) - image_size = input_shape[0:2] - - bn_axis = 3 - - x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', - name='conv1', kernel_regularizer=l2(weight_decay))(img_input) - x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) - x = Activation('relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv_block(3, [64, 64, 256], stage=2, block='a', strides=(1, 1))(x) - x = identity_block(3, [64, 64, 256], stage=2, block='b')(x) - x = identity_block(3, [64, 64, 256], stage=2, block='c')(x) - - x = conv_block(3, [128, 128, 512], stage=3, block='a')(x) - x = identity_block(3, [128, 128, 512], stage=3, block='b')(x) - x = identity_block(3, [128, 128, 512], stage=3, block='c')(x) - x = identity_block(3, [128, 128, 512], stage=3, block='d')(x) - - x = conv_block(3, [256, 256, 1024], stage=4, block='a')(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='b')(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='c')(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='d')(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='e')(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='f')(x) - - x = conv_block(3, [512, 512, 2048], stage=5, block='a')(x) - x = identity_block(3, [512, 512, 2048], stage=5, block='b')(x) - x = identity_block(3, [512, 512, 2048], stage=5, block='c')(x) - # classifying layer - x = Conv2D(classes, (1, 1), kernel_initializer='he_normal', activation='linear', - padding='valid', strides=(1, 1), kernel_regularizer=l2(weight_decay))(x) - - x = BilinearUpSampling2D(size=(32, 32))(x) - - model = Model(img_input, x) - weights_path = os.path.expanduser(os.path.join( - '~', '.keras/models/fcn_resnet50_weights_tf_dim_ordering_tf_kernels.h5')) - model.load_weights(weights_path, by_name=True) - return model - - -def AtrousFCN_Resnet50_16s(input_shape=None, weight_decay=0., batch_momentum=0.9, batch_shape=None, classes=21): - if batch_shape: - img_input = Input(batch_shape=batch_shape) - image_size = batch_shape[1:3] - else: - img_input = Input(shape=input_shape) - image_size = input_shape[0:2] - - bn_axis = 3 - - x = Conv2D(64, (7, 7), strides=(2, 2), padding='same', - name='conv1', kernel_regularizer=l2(weight_decay))(img_input) - x = BatchNormalization(axis=bn_axis, name='bn_conv1', - momentum=batch_momentum)(x) - x = Activation('relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv_block(3, [64, 64, 256], stage=2, block='a', weight_decay=weight_decay, strides=( - 1, 1), batch_momentum=batch_momentum)(x) - x = identity_block(3, [64, 64, 256], stage=2, block='b', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - x = identity_block(3, [64, 64, 256], stage=2, block='c', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - - x = conv_block(3, [128, 128, 512], stage=3, block='a', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - x = identity_block(3, [128, 128, 512], stage=3, block='b', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - x = identity_block(3, [128, 128, 512], stage=3, block='c', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - x = identity_block(3, [128, 128, 512], stage=3, block='d', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - - x = conv_block(3, [256, 256, 1024], stage=4, block='a', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='b', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='c', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='d', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='e', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='f', - weight_decay=weight_decay, batch_momentum=batch_momentum)(x) - - x = atrous_conv_block(3, [512, 512, 2048], stage=5, block='a', weight_decay=weight_decay, atrous_rate=( - 2, 2), batch_momentum=batch_momentum)(x) - x = atrous_identity_block(3, [512, 512, 2048], stage=5, block='b', weight_decay=weight_decay, atrous_rate=( - 2, 2), batch_momentum=batch_momentum)(x) - x = atrous_identity_block(3, [512, 512, 2048], stage=5, block='c', weight_decay=weight_decay, atrous_rate=( - 2, 2), batch_momentum=batch_momentum)(x) - # classifying layer - #x = Conv2D(classes, (3, 3), dilation_rate=(2, 2), kernel_initializer='normal', activation='linear', padding='same', strides=(1, 1), kernel_regularizer=l2(weight_decay))(x) - x = Conv2D(classes, (1, 1), kernel_initializer='he_normal', activation='linear', - padding='same', strides=(1, 1), kernel_regularizer=l2(weight_decay))(x) - x = BilinearUpSampling2D(target_size=tuple(image_size))(x) - - model = Model(img_input, x) - weights_path = os.path.expanduser(os.path.join( - '~', '.keras/models/fcn_resnet50_weights_tf_dim_ordering_tf_kernels.h5')) - model.load_weights(weights_path, by_name=True) - return model - - -def Atrous_DenseNet(input_shape=None, weight_decay=1E-4, - batch_momentum=0.9, batch_shape=None, classes=21, - include_top=False, activation='sigmoid'): - # TODO(ahundt) pass the parameters but use defaults for now - if include_top is True: - # TODO(ahundt) Softmax is pre-applied, so need different train, inference, evaluate. - # TODO(ahundt) for multi-label try per class sigmoid top as follows: - # x = Reshape((row * col * classes))(x) - # x = Activation('sigmoid')(x) - # x = Reshape((row, col, classes))(x) - return densenet.DenseNet(depth=None, nb_dense_block=3, growth_rate=32, - nb_filter=-1, nb_layers_per_block=[6, 12, 24, 16], - bottleneck=True, reduction=0.5, dropout_rate=0.2, - weight_decay=1E-4, - include_top=True, top='segmentation', - weights=None, input_tensor=None, - input_shape=input_shape, - classes=classes, transition_dilation_rate=2, - transition_kernel_size=(1, 1), - transition_pooling=None) - - # if batch_shape: - # img_input = Input(batch_shape=batch_shape) - # image_size = batch_shape[1:3] - # else: - # img_input = Input(shape=input_shape) - # image_size = input_shape[0:2] - - input_shape = _obtain_input_shape(input_shape, - default_size=32, - min_size=16, - data_format=K.image_data_format(), - include_top=False) - img_input = Input(shape=input_shape) - - x = densenet.__create_dense_net(classes, img_input, - depth=None, nb_dense_block=3, growth_rate=32, - nb_filter=-1, nb_layers_per_block=[6, 12, 24, 16], - bottleneck=True, reduction=0.5, dropout_rate=0.2, - weight_decay=1E-4, top='segmentation', - input_shape=input_shape, - transition_dilation_rate=2, - transition_kernel_size=(1, 1), - transition_pooling=None, - include_top=include_top) - - x = top(x, input_shape, classes, activation, weight_decay) - - model = Model(img_input, x, name='Atrous_DenseNet') - # TODO(ahundt) add weight loading - return model - - -def DenseNet_FCN(input_shape=None, weight_decay=1E-4, - batch_momentum=0.9, batch_shape=None, classes=21, - include_top=False, activation='sigmoid'): - if include_top is True: - # TODO(ahundt) Softmax is pre-applied, so need different train, inference, evaluate. - # TODO(ahundt) for multi-label try per class sigmoid top as follows: - # x = Reshape((row * col * classes))(x) - # x = Activation('sigmoid')(x) - # x = Reshape((row, col, classes))(x) - return densenet.DenseNetFCN(input_shape=input_shape, - weights=None, classes=classes, - nb_layers_per_block=[4, 5, 7, 10, 12, 15], - growth_rate=16, - dropout_rate=0.2) - - # if batch_shape: - # img_input = Input(batch_shape=batch_shape) - # image_size = batch_shape[1:3] - # else: - # img_input = Input(shape=input_shape) - # image_size = input_shape[0:2] - - input_shape = _obtain_input_shape(input_shape, - default_size=32, - min_size=16, - data_format=K.image_data_format(), - include_top=False) - img_input = Input(shape=input_shape) - - x = densenet.__create_fcn_dense_net(classes, img_input, - input_shape=input_shape, - nb_layers_per_block=[ - 4, 5, 7, 10, 12, 15], - growth_rate=16, - dropout_rate=0.2, - include_top=include_top) - - x = top(x, input_shape, classes, activation, weight_decay) - # TODO(ahundt) add weight loading - model = Model(img_input, x, name='DenseNet_FCN') - return model -import numpy as np -import matplotlib.pyplot as plt -from pylab import * -import os -import sys -import pickle -from keras.optimizers import SGD, Adam, Nadam -from keras.callbacks import * -from keras.objectives import * -from keras.metrics import binary_accuracy -from keras.models import load_model -import keras.backend as K -#import keras.utils.visualize_util as vis_util - -from models import * -from utils.loss_function import * -from utils.metrics import * -from utils.SegDataGenerator import * -import time - - -def train(batch_size, epochs, lr_base, lr_power, weight_decay, classes, - model_name, train_file_path, val_file_path, - data_dir, label_dir, target_size=None, batchnorm_momentum=0.9, - resume_training=False, class_weight=None, dataset='VOC2012', - loss_fn=softmax_sparse_crossentropy_ignoring_last_label, - metrics=[sparse_accuracy_ignoring_last_label], - loss_shape=None, - label_suffix='.png', - data_suffix='.jpg', - ignore_label=255, - label_cval=255): - if target_size: - input_shape = target_size + (3,) - else: - input_shape = (None, None, 3) - batch_shape = (batch_size,) + input_shape - - ########################################################### - current_dir = os.path.dirname(os.path.realpath(__file__)) - save_path = os.path.join(current_dir, 'Models/' + model_name) - if os.path.exists(save_path) is False: - os.mkdir(save_path) - - # ###############learning rate scheduler#################### - def lr_scheduler(epoch, mode='power_decay'): - '''if lr_dict.has_key(epoch): - lr = lr_dict[epoch] - print 'lr: %f' % lr''' - - if mode is 'power_decay': - # original lr scheduler - lr = lr_base * ((1 - float(epoch)/epochs) ** lr_power) - if mode is 'exp_decay': - # exponential decay - lr = (float(lr_base) ** float(lr_power)) ** float(epoch+1) - # adam default lr - if mode is 'adam': - lr = 0.001 - - if mode is 'progressive_drops': - # drops as progression proceeds, good for sgd - if epoch > 0.9 * epochs: - lr = 0.0001 - elif epoch > 0.75 * epochs: - lr = 0.001 - elif epoch > 0.5 * epochs: - lr = 0.01 - else: - lr = 0.1 - - print('lr: %f' % lr) - return lr - scheduler = LearningRateScheduler(lr_scheduler) - - # ###################### make model ######################## - checkpoint_path = os.path.join(save_path, 'checkpoint_weights.hdf5') - - model = globals()[model_name](weight_decay=weight_decay, - input_shape=input_shape, - batch_momentum=batchnorm_momentum, - classes=classes) - - # ###################### optimizer ######################## - optimizer = SGD(lr=lr_base, momentum=0.9) - # optimizer = Nadam(lr=lr_base, beta_1 = 0.825, beta_2 = 0.99685) - - model.compile(loss=loss_fn, - optimizer=optimizer, - metrics=metrics) - if resume_training: - model.load_weights(checkpoint_path, by_name=True) - model_path = os.path.join(save_path, "model.json") - # save model structure - f = open(model_path, 'w') - model_json = model.to_json() - f.write(model_json) - f.close - img_path = os.path.join(save_path, "model.png") - # #vis_util.plot(model, to_file=img_path, show_shapes=True) - model.summary() - - # lr_reducer = ReduceLROnPlateau(monitor=softmax_sparse_crossentropy_ignoring_last_label, factor=np.sqrt(0.1), - # cooldown=0, patience=15, min_lr=0.5e-6) - # early_stopper = EarlyStopping(monitor=sparse_accuracy_ignoring_last_label, min_delta=0.0001, patience=70) - # callbacks = [early_stopper, lr_reducer] - callbacks = [scheduler] - - # ####################### tfboard ########################### - if K.backend() == 'tensorflow': - tensorboard = TensorBoard(log_dir=os.path.join( - save_path, 'logs'), histogram_freq=10, write_graph=True) - callbacks.append(tensorboard) - # ################### checkpoint saver####################### - checkpoint = ModelCheckpoint(filepath=os.path.join( - save_path, 'checkpoint_weights.hdf5'), save_weights_only=True) # .{epoch:d} - callbacks.append(checkpoint) - # set data generator and train - train_datagen = SegDataGenerator(zoom_range=[0.5, 2.0], - zoom_maintain_shape=True, - crop_mode='random', - crop_size=target_size, - # pad_size=(505, 505), - rotation_range=0., - shear_range=0, - horizontal_flip=True, - channel_shift_range=20., - fill_mode='constant', - label_cval=label_cval) - val_datagen = SegDataGenerator() - - def get_file_len(file_path): - fp = open(file_path) - lines = fp.readlines() - fp.close() - return len(lines) - - # from Keras documentation: Total number of steps (batches of samples) to yield from generator before declaring one epoch finished - # and starting the next epoch. It should typically be equal to the number of unique samples of your dataset divided by the batch size. - steps_per_epoch = int( - np.ceil(get_file_len(train_file_path) / float(batch_size))) - - history = model.fit_generator( - generator=train_datagen.flow_from_directory( - file_path=train_file_path, - data_dir=data_dir, data_suffix=data_suffix, - label_dir=label_dir, label_suffix=label_suffix, - classes=classes, - target_size=target_size, color_mode='rgb', - batch_size=batch_size, shuffle=True, - loss_shape=loss_shape, - ignore_label=ignore_label, - # save_to_dir='Images/' - ), - steps_per_epoch=steps_per_epoch, - epochs=epochs, - callbacks=callbacks, - workers=4, - # validation_data=val_datagen.flow_from_directory( - # file_path=val_file_path, data_dir=data_dir, data_suffix='.jpg', - # label_dir=label_dir, label_suffix='.png',classes=classes, - # target_size=target_size, color_mode='rgb', - # batch_size=batch_size, shuffle=False - # ), - # nb_val_samples = 64 - class_weight=class_weight - ) - - model.save_weights(save_path+'/model.hdf5') - - -if __name__ == '__main__': - model_name = 'AtrousFCN_Resnet50_16s' - #model_name = 'Atrous_DenseNet' - #model_name = 'DenseNet_FCN' - batch_size = 16 - batchnorm_momentum = 0.95 - epochs = 250 - lr_base = 0.01 * (float(batch_size) / 16) - lr_power = 0.9 - resume_training = False - if model_name is 'AtrousFCN_Resnet50_16s': - weight_decay = 0.0001/2 - else: - weight_decay = 1e-4 - target_size = (320, 320) - dataset = 'VOC2012_BERKELEY' - if dataset == 'VOC2012_BERKELEY': - # pascal voc + berkeley semantic contours annotations - # Data/VOClarge/VOC2012/ImageSets/Segmentation - train_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/combined_imageset_train.txt') - # train_file_path = os.path.expanduser('~/.keras/datasets/oneimage/train.txt') #Data/VOClarge/VOC2012/ImageSets/Segmentation - val_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/combined_imageset_val.txt') - data_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/JPEGImages') - label_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/combined_annotations') - data_suffix = '.jpg' - label_suffix = '.png' - classes = 21 - if dataset == 'COCO': - # ###################### loss function & metric ######################## - # Data/VOClarge/VOC2012/ImageSets/Segmentation - train_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt') - # train_file_path = os.path.expanduser('~/.keras/datasets/oneimage/train.txt') #Data/VOClarge/VOC2012/ImageSets/Segmentation - val_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt') - data_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/JPEGImages') - label_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/SegmentationClass') - loss_fn = binary_crossentropy_with_logits - metrics = [binary_accuracy] - loss_shape = (target_size[0] * target_size[1] * classes,) - label_suffix = '.npy' - data_suffix = '.jpg' - ignore_label = None - label_cval = 0 - - # ###################### loss function & metric ######################## - if dataset == 'VOC2012' or dataset == 'VOC2012_BERKELEY': - loss_fn = softmax_sparse_crossentropy_ignoring_last_label - metrics = [sparse_accuracy_ignoring_last_label] - loss_shape = None - ignore_label = 255 - label_cval = 255 - - # Class weight is not yet supported for 3+ dimensional targets - # class_weight = {i: 1 for i in range(classes)} - # # The background class is much more common than all - # # others, so give it less weight! - # class_weight[0] = 0.1 - class_weight = None - - config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) - session = tf.Session(config=config) - K.set_session(session) - train(batch_size, epochs, lr_base, lr_power, weight_decay, classes, model_name, train_file_path, val_file_path, - data_dir, label_dir, target_size=target_size, batchnorm_momentum=batchnorm_momentum, resume_training=resume_training, - class_weight=class_weight, loss_fn=loss_fn, metrics=metrics, loss_shape=loss_shape, data_suffix=data_suffix, - label_suffix=label_suffix, ignore_label=ignore_label, label_cval=label_cval) -import numpy as np -import matplotlib.pyplot as plt -from pylab import * -import os -import sys -import pickle -import time -from keras.optimizers import SGD, Adam -from keras.callbacks import * -from keras.objectives import * -from keras.models import load_model -import keras.backend as K -#import keras.utils.visualize_util as vis_util - -from models import * -from train import * -from utils.loss_function import * -from utils.metrics import * -from utils.SegDataGenerator import * -# from tf_image_segmentation.recipes.mscoco import data_coco - - -if __name__ == '__main__': - # model_name = 'AtrousFCN_Resnet50_16s' - #model_name = 'Atrous_DenseNet' - model_name = 'DenseNet_FCN' - batch_size = 2 - batchnorm_momentum = 0.95 - epochs = 450 - lr_base = 0.2 * (float(batch_size) / 4) - lr_power = float(1)/float(30) - resume_training = False - weight_decay = 0.0001/2 - target_size = (320, 320) - dataset = 'COCO' - if dataset == 'VOC2012_BERKELEY': - # pascal voc + berkeley semantic contours annotations - # Data/VOClarge/VOC2012/ImageSets/Segmentation - train_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/combined_imageset_train.txt') - # train_file_path = os.path.expanduser('~/.keras/datasets/oneimage/train.txt') #Data/VOClarge/VOC2012/ImageSets/Segmentation - val_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/combined_imageset_val.txt') - data_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/JPEGImages') - label_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/combined_annotations') - if dataset is 'VOC2012': - # Data/VOClarge/VOC2012/ImageSets/Segmentation - train_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/ImageSets/Segmentation/train.txt') - # train_file_path = os.path.expanduser('~/.keras/datasets/oneimage/train.txt') #Data/VOClarge/VOC2012/ImageSets/Segmentation - val_file_path = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt') - data_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/JPEGImages') - label_dir = os.path.expanduser( - '~/.keras/datasets/VOC2012/VOCdevkit/VOC2012/SegmentationClass') - classes = 21 - class_weight = None - elif dataset is 'COCO': - # Data/VOClarge/VOC2012/ImageSets/Segmentation - train_file_path = os.path.expanduser( - '~/.keras/datasets/coco/annotations/train2014.txt') - # train_file_path = os.path.expanduser('~/.keras/datasets/oneimage/train.txt') #Data/VOClarge/VOC2012/ImageSets/Segmentation - val_file_path = os.path.expanduser( - '~/.keras/datasets/coco/annotations/test2014.txt') - data_dir = os.path.expanduser('~/.keras/datasets/coco/train2014') - label_dir = os.path.expanduser( - '~/.keras/datasets/coco/seg_mask/train2014') - stats_file = os.path.expanduser( - '~/.keras/datasets/coco/seg_mask/train2014/image_segmentation_class_stats.json') - classes = 91 - # class_weight = data_coco.class_weight(image_segmentation_stats_file=stats_file) - class_weight = None - - config = tf.ConfigProto(gpu_options=tf.GPUOptions(allow_growth=True)) - session = tf.Session(config=config) - K.set_session(session) - train(batch_size, epochs, lr_base, lr_power, weight_decay, classes, model_name, train_file_path, val_file_path, - data_dir, label_dir, target_size=target_size, batchnorm_momentum=batchnorm_momentum, resume_training=resume_training, - class_weight=class_weight, dataset=dataset) -from keras.preprocessing.image import img_to_array, array_to_img -from utils import SegDataGenerator -from PIL import Image as PILImage -import numpy as np - - -def test_crop(crop_function): - arr = np.random.random(500, 800) - - img = PILImage.fromarray(arr) - - crop_width = img.width / 5 - crop_height = img.height / 5 - - result = crop_function(img_to_array( - img), (crop_height, crop_width), 'channels_last') - result = array_to_img(result) - - assert result.width == crop_width - assert result.height == crop_height - - -def test_pair_crop(crop_function): - arr1 = np.random.random(500, 800) - arr2 = np.random.random(500, 800) - - img1 = PILImage.fromarray(arr1) - img2 = PILImage.fromarray(arr2) - - crop_width = img1.width / 5 - crop_height = img1.height / 5 - - result1, result2 = crop_function(img_to_array(img1), - img_to_array(img2), - (crop_height, crop_width), - 'channels_last') - result1 = array_to_img(result1) - result2 = array_to_img(result2) - - assert result1.width == crop_width == result2.width - assert result2.height == crop_height == result2.height - - -def test_center_crop(): return test_crop(SegDataGenerator.center_crop) - - -def test_random_crop(): return test_crop(SegDataGenerator.random_crop) - - -def test_pair_center_crop(): return test_pair_crop( - SegDataGenerator.pair_center_crop) - - -def test_pair_random_crop(): return test_pair_crop( - SegDataGenerator.pair_random_crop) -import keras.backend as K -import tensorflow as tf -from keras.layers import * - - -def resize_images_bilinear(X, height_factor=1, width_factor=1, target_height=None, target_width=None, data_format='default'): - '''Resizes the images contained in a 4D tensor of shape - - [batch, channels, height, width] (for 'channels_first' data_format) - - [batch, height, width, channels] (for 'channels_last' data_format) - by a factor of (height_factor, width_factor). Both factors should be - positive integers. - ''' - if data_format == 'default': - data_format = K.image_data_format() - if data_format == 'channels_first': - original_shape = K.int_shape(X) - if target_height and target_width: - new_shape = tf.constant( - np.array((target_height, target_width)).astype('int32')) - else: - new_shape = tf.shape(X)[2:] - new_shape *= tf.constant( - np.array([height_factor, width_factor]).astype('int32')) - X = permute_dimensions(X, [0, 2, 3, 1]) - X = tf.image.resize_bilinear(X, new_shape) - X = permute_dimensions(X, [0, 3, 1, 2]) - if target_height and target_width: - X.set_shape((None, None, target_height, target_width)) - else: - X.set_shape( - (None, None, original_shape[2] * height_factor, original_shape[3] * width_factor)) - return X - elif data_format == 'channels_last': - original_shape = K.int_shape(X) - if target_height and target_width: - new_shape = tf.constant( - np.array((target_height, target_width)).astype('int32')) - else: - new_shape = tf.shape(X)[1:3] - new_shape *= tf.constant( - np.array([height_factor, width_factor]).astype('int32')) - X = tf.image.resize_bilinear(X, new_shape) - if target_height and target_width: - X.set_shape((None, target_height, target_width, None)) - else: - X.set_shape( - (None, original_shape[1] * height_factor, original_shape[2] * width_factor, None)) - return X - else: - raise Exception('Invalid data_format: ' + data_format) - - -class BilinearUpSampling2D(Layer): - def __init__(self, size=(1, 1), target_size=None, data_format='default', **kwargs): - if data_format == 'default': - data_format = K.image_data_format() - self.size = tuple(size) - if target_size is not None: - self.target_size = tuple(target_size) - else: - self.target_size = None - assert data_format in { - 'channels_last', 'channels_first'}, 'data_format must be in {tf, th}' - self.data_format = data_format - self.input_spec = [InputSpec(ndim=4)] - super(BilinearUpSampling2D, self).__init__(**kwargs) - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - width = int(self.size[0] * input_shape[2] - if input_shape[2] is not None else None) - height = int(self.size[1] * input_shape[3] - if input_shape[3] is not None else None) - if self.target_size is not None: - width = self.target_size[0] - height = self.target_size[1] - return (input_shape[0], - input_shape[1], - width, - height) - elif self.data_format == 'channels_last': - width = int(self.size[0] * input_shape[1] - if input_shape[1] is not None else None) - height = int(self.size[1] * input_shape[2] - if input_shape[2] is not None else None) - if self.target_size is not None: - width = self.target_size[0] - height = self.target_size[1] - return (input_shape[0], - width, - height, - input_shape[3]) - else: - raise Exception('Invalid data_format: ' + self.data_format) - - def call(self, x, mask=None): - if self.target_size is not None: - return resize_images_bilinear(x, target_height=self.target_size[0], target_width=self.target_size[1], data_format=self.data_format) - else: - return resize_images_bilinear(x, height_factor=self.size[0], width_factor=self.size[1], data_format=self.data_format) - - def get_config(self): - config = {'size': self.size, 'target_size': self.target_size} - base_config = super(BilinearUpSampling2D, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from keras.preprocessing.image import * -from keras.applications.imagenet_utils import preprocess_input -from keras import backend as K -from PIL import Image -import numpy as np -import os - - -def center_crop(x, center_crop_size, data_format, **kwargs): - if data_format == 'channels_first': - centerh, centerw = x.shape[1] // 2, x.shape[2] // 2 - elif data_format == 'channels_last': - centerh, centerw = x.shape[0] // 2, x.shape[1] // 2 - lh, lw = center_crop_size[0] // 2, center_crop_size[1] // 2 - rh, rw = center_crop_size[0] - lh, center_crop_size[1] - lw - - h_start, h_end = centerh - lh, centerh + rh - w_start, w_end = centerw - lw, centerw + rw - if data_format == 'channels_first': - return x[:, h_start:h_end, w_start:w_end] - elif data_format == 'channels_last': - return x[h_start:h_end, w_start:w_end, :] - - -def pair_center_crop(x, y, center_crop_size, data_format, **kwargs): - if data_format == 'channels_first': - centerh, centerw = x.shape[1] // 2, x.shape[2] // 2 - elif data_format == 'channels_last': - centerh, centerw = x.shape[0] // 2, x.shape[1] // 2 - lh, lw = center_crop_size[0] // 2, center_crop_size[1] // 2 - rh, rw = center_crop_size[0] - lh, center_crop_size[1] - lw - - h_start, h_end = centerh - lh, centerh + rh - w_start, w_end = centerw - lw, centerw + rw - if data_format == 'channels_first': - return x[:, h_start:h_end, w_start:w_end], \ - y[:, h_start:h_end, w_start:w_end] - elif data_format == 'channels_last': - return x[h_start:h_end, w_start:w_end, :], \ - y[h_start:h_end, w_start:w_end, :] - - -def random_crop(x, random_crop_size, data_format, sync_seed=None, **kwargs): - np.random.seed(sync_seed) - if data_format == 'channels_first': - h, w = x.shape[1], x.shape[2] - elif data_format == 'channels_last': - h, w = x.shape[0], x.shape[1] - rangeh = (h - random_crop_size[0]) // 2 - rangew = (w - random_crop_size[1]) // 2 - offseth = 0 if rangeh == 0 else np.random.randint(rangeh) - offsetw = 0 if rangew == 0 else np.random.randint(rangew) - - h_start, h_end = offseth, offseth + random_crop_size[0] - w_start, w_end = offsetw, offsetw + random_crop_size[1] - if data_format == 'channels_first': - return x[:, h_start:h_end, w_start:w_end] - elif data_format == 'channels_last': - return x[h_start:h_end, w_start:w_end, :] - - -def pair_random_crop(x, y, random_crop_size, data_format, sync_seed=None, **kwargs): - np.random.seed(sync_seed) - if data_format == 'channels_first': - h, w = x.shape[1], x.shape[2] - elif data_format == 'channels_last': - h, w = x.shape[0], x.shape[1] - rangeh = (h - random_crop_size[0]) // 2 - rangew = (w - random_crop_size[1]) // 2 - offseth = 0 if rangeh == 0 else np.random.randint(rangeh) - offsetw = 0 if rangew == 0 else np.random.randint(rangew) - - h_start, h_end = offseth, offseth + random_crop_size[0] - w_start, w_end = offsetw, offsetw + random_crop_size[1] - if data_format == 'channels_first': - return x[:, h_start:h_end, w_start:w_end], y[:, h_start:h_end, h_start:h_end] - elif data_format == 'channels_last': - return x[h_start:h_end, w_start:w_end, :], y[h_start:h_end, w_start:w_end, :] - - -class SegDirectoryIterator(Iterator): - ''' - Users need to ensure that all files exist. - Label images should be png images where pixel values represents class number. - - find images -name *.jpg > images.txt - find labels -name *.png > labels.txt - - for a file name 2011_002920.jpg, each row should contain 2011_002920 - - file_path: location of train.txt, or val.txt in PASCAL VOC2012 format, - listing image file path components without extension - data_dir: location of image files referred to by file in file_path - label_dir: location of label files - data_suffix: image file extension, such as `.jpg` or `.png` - label_suffix: label file suffix, such as `.png`, or `.npy` - loss_shape: shape to use when applying loss function to the label data - ''' - - def __init__(self, file_path, seg_data_generator, - data_dir, data_suffix, - label_dir, label_suffix, classes, ignore_label=255, - crop_mode='none', label_cval=255, pad_size=None, - target_size=None, color_mode='rgb', - data_format='default', class_mode='sparse', - batch_size=1, shuffle=True, seed=None, - save_to_dir=None, save_prefix='', save_format='jpeg', - loss_shape=None): - if data_format == 'default': - data_format = K.image_data_format() - self.file_path = file_path - self.data_dir = data_dir - self.data_suffix = data_suffix - self.label_suffix = label_suffix - self.label_dir = label_dir - self.classes = classes - self.seg_data_generator = seg_data_generator - self.target_size = tuple(target_size) - self.ignore_label = ignore_label - self.crop_mode = crop_mode - self.label_cval = label_cval - self.pad_size = pad_size - if color_mode not in {'rgb', 'grayscale'}: - raise ValueError('Invalid color mode:', color_mode, - '; expected "rgb" or "grayscale".') - self.color_mode = color_mode - self.data_format = data_format - self.nb_label_ch = 1 - self.loss_shape = loss_shape - - if (self.label_suffix == '.npy') or (self.label_suffix == 'npy'): - self.label_file_format = 'npy' - else: - self.label_file_format = 'img' - if target_size: - if self.color_mode == 'rgb': - if self.data_format == 'channels_last': - self.image_shape = self.target_size + (3,) - else: - self.image_shape = (3,) + self.target_size - else: - if self.data_format == 'channels_last': - self.image_shape = self.target_size + (1,) - else: - self.image_shape = (1,) + self.target_size - if self.data_format == 'channels_last': - self.label_shape = self.target_size + (self.nb_label_ch,) - else: - self.label_shape = (self.nb_label_ch,) + self.target_size - elif batch_size != 1: - raise ValueError( - 'Batch size must be 1 when target image size is undetermined') - else: - self.image_shape = None - self.label_shape = None - if class_mode not in {'sparse', None}: - raise ValueError('Invalid class_mode:', class_mode, - '; expected one of ' - '"sparse", or None.') - self.class_mode = class_mode - if save_to_dir: - self.palette = None - self.save_to_dir = save_to_dir - self.save_prefix = save_prefix - self.save_format = save_format - - white_list_formats = {'png', 'jpg', 'jpeg', 'bmp', 'npy'} - - # build lists for data files and label files - self.data_files = [] - self.label_files = [] - fp = open(file_path) - lines = fp.readlines() - fp.close() - self.nb_sample = len(lines) - for line in lines: - line = line.strip('\n') - self.data_files.append(line + data_suffix) - self.label_files.append(line + label_suffix) - super(SegDirectoryIterator, self).__init__( - self.nb_sample, batch_size, shuffle, seed) - - def _get_batches_of_transformed_samples(self, index_array): - """Gets a batch of transformed samples. - # Arguments - index_array: array of sample indices to include in batch. - # Returns - A batch of transformed samples. - """ - current_batch_size = len(index_array) - - # The transformation of images is not under thread lock so it can be - # done in parallel - if self.target_size: - # TODO(ahundt) make dtype properly configurable - batch_x = np.zeros((current_batch_size,) + self.image_shape) - if self.loss_shape is None and self.label_file_format is 'img': - batch_y = np.zeros((current_batch_size,) + self.label_shape, - dtype=int) - elif self.loss_shape is None: - batch_y = np.zeros((current_batch_size,) + self.label_shape) - else: - batch_y = np.zeros((current_batch_size,) + self.loss_shape, - dtype=np.uint8) - grayscale = self.color_mode == 'grayscale' - # build batch of image data and labels - for i, j in enumerate(index_array): - data_file = self.data_files[j] - label_file = self.label_files[j] - img_file_format = 'img' - img = load_img(os.path.join(self.data_dir, data_file), - grayscale=grayscale, target_size=None) - label_filepath = os.path.join(self.label_dir, label_file) - - if self.label_file_format == 'npy': - y = np.load(label_filepath) - else: - label = Image.open(label_filepath) - if self.save_to_dir and self.palette is None: - self.palette = label.palette - - # do padding - if self.target_size: - if self.crop_mode != 'none': - x = img_to_array(img, data_format=self.data_format) - if self.label_file_format is not 'npy': - y = img_to_array( - label, data_format=self.data_format).astype(int) - img_w, img_h = img.size - if self.pad_size: - pad_w = max(self.pad_size[1] - img_w, 0) - pad_h = max(self.pad_size[0] - img_h, 0) - else: - pad_w = max(self.target_size[1] - img_w, 0) - pad_h = max(self.target_size[0] - img_h, 0) - if self.data_format == 'channels_first': - x = np.lib.pad(x, ((0, 0), (pad_h // 2, pad_h - pad_h // 2), - (pad_w // 2, pad_w - pad_w // 2)), 'constant', constant_values=0.) - y = np.lib.pad(y, ((0, 0), (pad_h // 2, pad_h - pad_h // 2), (pad_w // 2, pad_w - pad_w // 2)), - 'constant', constant_values=self.label_cval) - elif self.data_format == 'channels_last': - x = np.lib.pad(x, ((pad_h // 2, pad_h - pad_h // 2), (pad_w // 2, - pad_w - pad_w // 2), (0, 0)), 'constant', constant_values=0.) - y = np.lib.pad(y, ((pad_h // 2, pad_h - pad_h // 2), (pad_w // 2, pad_w - - pad_w // 2), (0, 0)), 'constant', constant_values=self.label_cval) - else: - x = img_to_array(img.resize((self.target_size[1], self.target_size[0]), - Image.BILINEAR), - data_format=self.data_format) - if self.label_file_format is not 'npy': - y = img_to_array(label.resize((self.target_size[1], self.target_size[ - 0]), Image.NEAREST), data_format=self.data_format).astype(int) - else: - print('ERROR: resize not implemented for label npy file') - - if self.target_size is None: - batch_x = np.zeros((current_batch_size,) + x.shape) - if self.loss_shape is not None: - batch_y = np.zeros((current_batch_size,) + self.loss_shape) - else: - batch_y = np.zeros((current_batch_size,) + y.shape) - - x, y = self.seg_data_generator.random_transform(x, y) - x = self.seg_data_generator.standardize(x) - - if self.ignore_label: - y[np.where(y == self.ignore_label)] = self.classes - - if self.loss_shape is not None: - y = np.reshape(y, self.loss_shape) - - batch_x[i] = x - batch_y[i] = y - # optionally save augmented images to disk for debugging purposes - if self.save_to_dir: - for i in range(current_batch_size): - img = array_to_img(batch_x[i], self.data_format, scale=True) - label = batch_y[i][:, :, 0].astype('uint8') - label[np.where(label == self.classes)] = self.ignore_label - label = Image.fromarray(label, mode='P') - label.palette = self.palette - # TODO(ahundt) fix index=i, a hacky workaround since current_index + i is no long available - fname = '{prefix}_{index}_{hash}'.format(prefix=self.save_prefix, - index=i, - hash=np.random.randint(1e4)) - img.save(os.path.join(self.save_to_dir, 'img_' + - fname + '.{format}'.format(format=self.save_format))) - label.save(os.path.join(self.save_to_dir, - 'label_' + fname + '.png')) - # return - batch_x = preprocess_input(batch_x) - if self.class_mode == 'sparse': - return batch_x, batch_y - else: - return batch_x - - -class SegDataGenerator(object): - - def __init__(self, - featurewise_center=False, - samplewise_center=False, - featurewise_std_normalization=False, - samplewise_std_normalization=False, - channelwise_center=False, - rotation_range=0., - width_shift_range=0., - height_shift_range=0., - shear_range=0., - zoom_range=0., - zoom_maintain_shape=True, - channel_shift_range=0., - fill_mode='constant', - cval=0., - label_cval=255, - crop_mode='none', - crop_size=(0, 0), - pad_size=None, - horizontal_flip=False, - vertical_flip=False, - rescale=None, - data_format='default'): - if data_format == 'default': - data_format = K.image_data_format() - self.__dict__.update(locals()) - self.mean = None - self.ch_mean = None - self.std = None - self.principal_components = None - self.rescale = rescale - - if data_format not in {'channels_last', 'channels_first'}: - raise Exception('data_format should be channels_last (channel after row and ' - 'column) or channels_first (channel before row and column). ' - 'Received arg: ', data_format) - if crop_mode not in {'none', 'random', 'center'}: - raise Exception('crop_mode should be "none" or "random" or "center" ' - 'Received arg: ', crop_mode) - self.data_format = data_format - if data_format == 'channels_first': - self.channel_index = 1 - self.row_index = 2 - self.col_index = 3 - if data_format == 'channels_last': - self.channel_index = 3 - self.row_index = 1 - self.col_index = 2 - - if np.isscalar(zoom_range): - self.zoom_range = [1 - zoom_range, 1 + zoom_range] - elif len(zoom_range) == 2: - self.zoom_range = [zoom_range[0], zoom_range[1]] - else: - raise Exception('zoom_range should be a float or ' - 'a tuple or list of two floats. ' - 'Received arg: ', zoom_range) - - def flow_from_directory(self, file_path, data_dir, data_suffix, - label_dir, label_suffix, classes, - ignore_label=255, - target_size=None, color_mode='rgb', - class_mode='sparse', - batch_size=32, shuffle=True, seed=None, - save_to_dir=None, save_prefix='', save_format='jpeg', - loss_shape=None): - if self.crop_mode == 'random' or self.crop_mode == 'center': - target_size = self.crop_size - return SegDirectoryIterator( - file_path, self, - data_dir=data_dir, data_suffix=data_suffix, - label_dir=label_dir, label_suffix=label_suffix, - classes=classes, ignore_label=ignore_label, - crop_mode=self.crop_mode, label_cval=self.label_cval, - pad_size=self.pad_size, - target_size=target_size, color_mode=color_mode, - data_format=self.data_format, class_mode=class_mode, - batch_size=batch_size, shuffle=shuffle, seed=seed, - save_to_dir=save_to_dir, save_prefix=save_prefix, - save_format=save_format, - loss_shape=loss_shape) - - def standardize(self, x): - if self.rescale: - x *= self.rescale - # x is a single image, so it doesn't have image number at index 0 - img_channel_index = self.channel_index - 1 - if self.samplewise_center: - x -= np.mean(x, axis=img_channel_index, keepdims=True) - if self.samplewise_std_normalization: - x /= (np.std(x, axis=img_channel_index, keepdims=True) + 1e-7) - - if self.featurewise_center: - x -= self.mean - if self.featurewise_std_normalization: - x /= (self.std + 1e-7) - - if self.channelwise_center: - x -= self.ch_mean - return x - - def random_transform(self, x, y): - # x is a single image, so it doesn't have image number at index 0 - img_row_index = self.row_index - 1 - img_col_index = self.col_index - 1 - img_channel_index = self.channel_index - 1 - if self.crop_mode == 'none': - crop_size = (x.shape[img_row_index], x.shape[img_col_index]) - else: - crop_size = self.crop_size - - assert x.shape[img_row_index] == y.shape[img_row_index] and x.shape[img_col_index] == y.shape[ - img_col_index], 'DATA ERROR: Different shape of data and label!\ndata shape: %s, label shape: %s' % (str(x.shape), str(y.shape)) - - # use composition of homographies to generate final transform that - # needs to be applied - if self.rotation_range: - theta = np.pi / 180 * \ - np.random.uniform(-self.rotation_range, self.rotation_range) - else: - theta = 0 - rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], - [np.sin(theta), np.cos(theta), 0], - [0, 0, 1]]) - if self.height_shift_range: - # * x.shape[img_row_index] - tx = np.random.uniform(-self.height_shift_range, - self.height_shift_range) * crop_size[0] - else: - tx = 0 - - if self.width_shift_range: - # * x.shape[img_col_index] - ty = np.random.uniform(-self.width_shift_range, - self.width_shift_range) * crop_size[1] - else: - ty = 0 - - translation_matrix = np.array([[1, 0, tx], - [0, 1, ty], - [0, 0, 1]]) - if self.shear_range: - shear = np.random.uniform(-self.shear_range, self.shear_range) - else: - shear = 0 - shear_matrix = np.array([[1, -np.sin(shear), 0], - [0, np.cos(shear), 0], - [0, 0, 1]]) - - if self.zoom_range[0] == 1 and self.zoom_range[1] == 1: - zx, zy = 1, 1 - else: - zx, zy = np.random.uniform( - self.zoom_range[0], self.zoom_range[1], 2) - if self.zoom_maintain_shape: - zy = zx - zoom_matrix = np.array([[zx, 0, 0], - [0, zy, 0], - [0, 0, 1]]) - - transform_matrix = np.dot( - np.dot(np.dot(rotation_matrix, translation_matrix), shear_matrix), zoom_matrix) - - h, w = x.shape[img_row_index], x.shape[img_col_index] - transform_matrix = transform_matrix_offset_center( - transform_matrix, h, w) - - x = apply_transform(x, transform_matrix, img_channel_index, - fill_mode=self.fill_mode, cval=self.cval) - y = apply_transform(y, transform_matrix, img_channel_index, - fill_mode='constant', cval=self.label_cval) - - if self.channel_shift_range != 0: - x = random_channel_shift( - x, self.channel_shift_range, img_channel_index) - - if self.horizontal_flip: - if np.random.random() < 0.5: - x = flip_axis(x, img_col_index) - y = flip_axis(y, img_col_index) - - if self.vertical_flip: - if np.random.random() < 0.5: - x = flip_axis(x, img_row_index) - y = flip_axis(y, img_row_index) - - if self.crop_mode == 'center': - x, y = pair_center_crop(x, y, self.crop_size, self.data_format) - elif self.crop_mode == 'random': - x, y = pair_random_crop(x, y, self.crop_size, self.data_format) - - # TODO: - # channel-wise normalization - # barrel/fisheye - return x, y - - def fit(self, X, - augment=False, - rounds=1, - seed=None): - '''Required for featurewise_center and featurewise_std_normalization - - # Arguments - X: Numpy array, the data to fit on. - augment: whether to fit on randomly augmented samples - rounds: if `augment`, - how many augmentation passes to do over the data - seed: random seed. - ''' - X = np.copy(X) - if augment: - aX = np.zeros(tuple([rounds * X.shape[0]] + list(X.shape)[1:])) - for r in range(rounds): - for i in range(X.shape[0]): - aX[i + r * X.shape[0]] = self.random_transform(X[i]) - X = aX - - if self.featurewise_center: - self.mean = np.mean(X, axis=0) - X -= self.mean - - if self.featurewise_std_normalization: - self.std = np.std(X, axis=0) - X /= (self.std + 1e-7) - - def set_ch_mean(self, ch_mean): - self.ch_mean = ch_mean -from keras.models import Model -from keras.layers import * -from keras.regularizers import l2 -import tensorflow as tf - - -def conv_relu(nb_filter, nb_row, nb_col, subsample=(1, 1), border_mode='same', bias=True, w_decay=0.01): - def f(x): - with tf.name_scope('conv_relu'): - x = Conv2D(filters=nb_filter, kernel_size=(nb_row, nb_col), stride=subsample, use_bias=bias, - kernel_initializer="he_normal", W_regularizer=l2(w_decay), border_mode=border_mode)(x) - x = Activation("relu")(x) - return x - return f - - -def conv_bn(nb_filter, nb_row, nb_col, subsample=(1, 1), border_mode='same', bias=True, w_decay=0.01): - def f(x): - with tf.name_scope('conv_bn'): - x = Conv2D(filters=nb_filter, kernel_size=(nb_row, nb_col), stride=subsample, use_bias=bias, - kernel_initializer="he_normal", W_regularizer=l2(w_decay), border_mode=border_mode)(x) - x = BatchNormalization(mode=0, axis=-1)(x) - return x - return f - - -def conv_bn_relu(nb_filter, nb_row, nb_col, subsample=(1, 1), border_mode='same', bias=True, w_decay=0.01): - def f(x): - with tf.name_scope('conv_bn_relu'): - x = Conv2D(filters=nb_filter, kernel_size=(nb_row, nb_col), stride=subsample, use_bias=bias, - kernel_initializer="he_normal", W_regularizer=l2(w_decay), border_mode=border_mode)(x) - x = BatchNormalization(mode=0, axis=-1)(x) - x = Activation("relu")(x) - return x - return f - - -def bn_relu_conv(nb_filter, nb_row, nb_col, subsample=(1, 1), border_mode='same', bias=True, w_decay=0.01): - def f(x): - with tf.name_scope('bn_relu_conv'): - x = BatchNormalization(mode=0, axis=-1)(x) - x = Activation("relu")(x) - x = Conv2D(filters=nb_filter, kernel_size=(nb_row, nb_col), stride=subsample, use_bias=bias, - kernel_initializer="he_normal", W_regularizer=l2(w_decay), border_mode=border_mode)(x) - return x - return f - - -def atrous_conv_bn(nb_filter, nb_row, nb_col, atrous_rate=(2, 2), subsample=(1, 1), border_mode='same', bias=True, w_decay=0.01): - def f(x): - with tf.name_scope('atrous_conv_bn'): - x = Conv2D(filters=nb_filter, kernel_size=(nb_row, nb_col), dilation_rate=atrous_rate, stride=subsample, use_bias=bias, - kernel_initializer="he_normal", kernel_regularizer=l2(w_decay), padding=border_mode)(x) - x = BatchNormalization(mode=0, axis=-1)(x) - return x - return f - - -def atrous_conv_bn_relu(nb_filter, nb_row, nb_col, atrous_rate=(2, 2), subsample=(1, 1), border_mode='same', bias=True, w_decay=0.01): - def f(x): - with tf.name_scope('atrous_conv_bn_relu'): - x = Conv2D(filters=nb_filter, kernel_size=(nb_row, nb_col), dilation_rate=atrous_rate, stride=subsample, use_bias=bias, - kernel_initializer="he_normal", kernel_regularizer=l2(w_decay), padding=border_mode)(x) - x = BatchNormalization(mode=0, axis=-1)(x) - x = Activation("relu")(x) - return x - return f -from keras.utils.data_utils import get_file - - -def get_weights_path_vgg16(): - TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5' - weights_path = get_file( - 'vgg16_weights_tf_dim_ordering_tf_kernels.h5', TF_WEIGHTS_PATH, cache_subdir='models') - return weights_path - - -def get_weights_path_resnet(): - TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5' - weights_path = get_file( - 'resnet50_weights_tf_dim_ordering_tf_kernels.h5', TF_WEIGHTS_PATH, cache_subdir='models') - return weights_path - - -if __name__ == '__main__': - print(get_weights_path_vgg16()) - print(get_weights_path_resnet()) -from keras.objectives import * -from keras.metrics import binary_crossentropy -import keras.backend as K -import tensorflow as tf - - -# Softmax cross-entropy loss function for pascal voc segmentation -# and models which do not perform softmax. -# tensorlow only -def softmax_sparse_crossentropy_ignoring_last_label(y_true, y_pred): - y_pred = K.reshape(y_pred, (-1, K.int_shape(y_pred)[-1])) - log_softmax = tf.nn.log_softmax(y_pred) - - y_true = K.one_hot(tf.to_int32(K.flatten(y_true)), - K.int_shape(y_pred)[-1]+1) - unpacked = tf.unstack(y_true, axis=-1) - y_true = tf.stack(unpacked[:-1], axis=-1) - - cross_entropy = -K.sum(y_true * log_softmax, axis=1) - cross_entropy_mean = K.mean(cross_entropy) - - return cross_entropy_mean - - -# Softmax cross-entropy loss function for coco segmentation -# and models which expect but do not apply sigmoid on each entry -# tensorlow only -def binary_crossentropy_with_logits(ground_truth, predictions): - return K.mean(K.binary_crossentropy(ground_truth, - predictions, - from_logits=True), - axis=-1) -import keras.backend as K -import tensorflow as tf -from tensorflow.contrib.metrics import streaming_mean_iou - - -def sparse_accuracy_ignoring_last_label(y_true, y_pred): - nb_classes = K.int_shape(y_pred)[-1] - y_pred = K.reshape(y_pred, (-1, nb_classes)) - - y_true = K.one_hot(tf.to_int32(K.flatten(y_true)), - nb_classes + 1) - unpacked = tf.unstack(y_true, axis=-1) - legal_labels = ~tf.cast(unpacked[-1], tf.bool) - y_true = tf.stack(unpacked[:-1], axis=-1) - - return K.sum(tf.to_float(legal_labels & K.equal(K.argmax(y_true, axis=-1), K.argmax(y_pred, axis=-1)))) / K.sum(tf.to_float(legal_labels)) - - -# This IOU implementation is wrong!!! -'''def mean_iou_ignoring_last_label(y_true, y_pred): - batch_size = K.int_shape(y_pred)[0] - y_true_list = tf.unpack(y_true, num=batch_size, axis=0) - y_pred_list = tf.unpack(y_pred, num=batch_size, axis=0) - mean_iou = 0. - for y_true, y_pred in zip(y_true_list, y_pred_list): - nb_classes = K.int_shape(y_pred)[-1] - y_pred = K.reshape(y_pred, (-1, nb_classes)) - y_pred = K.argmax(y_pred, axis=-1) - y_pred = K.one_hot(y_pred, nb_classes) - y_true = K.one_hot(tf.to_int32(K.flatten(y_true)), nb_classes + 1) - unpacked = tf.unpack(y_true, axis=-1) - legal_labels = tf.expand_dims(tf.to_float( - ~tf.cast(unpacked[-1], tf.bool)), -1) - y_true = tf.pack(unpacked[:-1], axis=-1) - y_true = K.argmax(y_true, axis=-1) - y_true = K.one_hot(y_true, nb_classes) - y_pred = tf.cast(y_pred, tf.bool) - y_true = tf.cast(y_true, tf.bool) - - intersection = tf.to_float(y_pred & y_true) * legal_labels - union = tf.to_float(y_pred | y_true) * legal_labels - intersection = K.sum(intersection, axis=0) - union = K.sum(union, axis=0) - total_union = K.sum(tf.to_float(tf.cast(union, tf.bool))) - iou = K.sum(intersection / (union + K.epsilon())) / total_union - mean_iou = mean_iou + iou - mean_iou = mean_iou / batch_size - return mean_iou''' -from keras.layers import * -from keras.layers.merge import Add -from keras.regularizers import l2 - -# The original help functions from keras does not have weight regularizers, so I modified them. -# Also, I changed these two functions into functional style - - -def identity_block(kernel_size, filters, stage, block, weight_decay=0., batch_momentum=0.99): - '''The identity_block is the block that has no conv layer at shortcut - # Arguments - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - ''' - def f(input_tensor): - nb_filter1, nb_filter2, nb_filter3 = filters - if K.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a', - kernel_regularizer=l2(weight_decay))(input_tensor) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2a', momentum=batch_momentum)(x) - x = Activation('relu')(x) - - x = Conv2D(nb_filter2, (kernel_size, kernel_size), - padding='same', name=conv_name_base + '2b', kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2b', momentum=batch_momentum)(x) - x = Activation('relu')(x) - - x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + - '2c', kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2c', momentum=batch_momentum)(x) - - x = Add()([x, input_tensor]) - x = Activation('relu')(x) - return x - return f - - -def conv_block(kernel_size, filters, stage, block, weight_decay=0., strides=(2, 2), batch_momentum=0.99): - '''conv_block is the block that has a conv layer at shortcut - # Arguments - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - Note that from stage 3, the first conv layer at main path is with strides=(2,2) - And the shortcut should have strides=(2,2) as well - ''' - def f(input_tensor): - nb_filter1, nb_filter2, nb_filter3 = filters - if K.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Conv2D(nb_filter1, (1, 1), strides=strides, - name=conv_name_base + '2a', kernel_regularizer=l2(weight_decay))(input_tensor) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2a', momentum=batch_momentum)(x) - x = Activation('relu')(x) - - x = Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', - name=conv_name_base + '2b', kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2b', momentum=batch_momentum)(x) - x = Activation('relu')(x) - - x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + - '2c', kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2c', momentum=batch_momentum)(x) - - shortcut = Conv2D(nb_filter3, (1, 1), strides=strides, - name=conv_name_base + '1', kernel_regularizer=l2(weight_decay))(input_tensor) - shortcut = BatchNormalization( - axis=bn_axis, name=bn_name_base + '1', momentum=batch_momentum)(shortcut) - - x = Add()([x, shortcut]) - x = Activation('relu')(x) - return x - return f - -# Atrous-Convolution version of residual blocks - - -def atrous_identity_block(kernel_size, filters, stage, block, weight_decay=0., atrous_rate=(2, 2), batch_momentum=0.99): - '''The identity_block is the block that has no conv layer at shortcut - # Arguments - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - ''' - def f(input_tensor): - nb_filter1, nb_filter2, nb_filter3 = filters - if K.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Conv2D(nb_filter1, (1, 1), name=conv_name_base + '2a', - kernel_regularizer=l2(weight_decay))(input_tensor) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2a', momentum=batch_momentum)(x) - x = Activation('relu')(x) - - x = Conv2D(nb_filter2, (kernel_size, kernel_size), dilation_rate=atrous_rate, - padding='same', name=conv_name_base + '2b', kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2b', momentum=batch_momentum)(x) - x = Activation('relu')(x) - - x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + - '2c', kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2c', momentum=batch_momentum)(x) - - x = Add()([x, input_tensor]) - x = Activation('relu')(x) - return x - return f - - -def atrous_conv_block(kernel_size, filters, stage, block, weight_decay=0., strides=(1, 1), atrous_rate=(2, 2), batch_momentum=0.99): - '''conv_block is the block that has a conv layer at shortcut - # Arguments - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - ''' - def f(input_tensor): - nb_filter1, nb_filter2, nb_filter3 = filters - if K.image_data_format() == 'channels_last': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Conv2D(nb_filter1, (1, 1), strides=strides, - name=conv_name_base + '2a', kernel_regularizer=l2(weight_decay))(input_tensor) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2a', momentum=batch_momentum)(x) - x = Activation('relu')(x) - - x = Conv2D(nb_filter2, (kernel_size, kernel_size), padding='same', dilation_rate=atrous_rate, - name=conv_name_base + '2b', kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2b', momentum=batch_momentum)(x) - x = Activation('relu')(x) - - x = Conv2D(nb_filter3, (1, 1), name=conv_name_base + - '2c', kernel_regularizer=l2(weight_decay))(x) - x = BatchNormalization( - axis=bn_axis, name=bn_name_base + '2c', momentum=batch_momentum)(x) - - shortcut = Conv2D(nb_filter3, (1, 1), strides=strides, - name=conv_name_base + '1', kernel_regularizer=l2(weight_decay))(input_tensor) - shortcut = BatchNormalization( - axis=bn_axis, name=bn_name_base + '1', momentum=batch_momentum)(shortcut) - - x = Add()([x, shortcut]) - x = Activation('relu')(x) - return x - return f -import numpy as np -import matplotlib.pyplot as plt -from pylab import * -import os -import sys -from keras.models import Model -from keras.regularizers import l2 -from keras.layers import * -from keras.models import model_from_json -from keras.utils import np_utils -from keras.applications.vgg16 import * -from keras.applications.resnet50 import * -import keras.backend as K -import tensorflow as tf - -from get_weights_path import * -from resnet_helpers import * - - -def transfer_FCN_Vgg16(): - input_shape = (224, 224, 3) - img_input = Input(shape=input_shape) - # Block 1 - x = Conv2D(64, (3, 3), activation='relu', padding='same', - name='block1_conv1')(img_input) - x = Conv2D(64, (3, 3), activation='relu', - padding='same', name='block1_conv2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = Conv2D(128, (3, 3), activation='relu', - padding='same', name='block2_conv1')(x) - x = Conv2D(128, (3, 3), activation='relu', - padding='same', name='block2_conv2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = Conv2D(256, (3, 3), activation='relu', - padding='same', name='block3_conv1')(x) - x = Conv2D(256, (3, 3), activation='relu', - padding='same', name='block3_conv2')(x) - x = Conv2D(256, (3, 3), activation='relu', - padding='same', name='block3_conv3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block4_conv1')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block4_conv2')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block4_conv3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block5_conv1')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block5_conv2')(x) - x = Conv2D(512, (3, 3), activation='relu', - padding='same', name='block5_conv3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) - - # Convolutional layers transfered from fully-connected layers - x = Conv2D(4096, (7, 7), activation='relu', padding='same', name='fc1')(x) - x = Conv2D(4096, (1, 1), activation='relu', padding='same', name='fc2')(x) - x = Conv2D(1000, (1, 1), activation='linear', name='predictions_1000')(x) - #x = Reshape((7,7))(x) - - # Create model - model = Model(img_input, x) - weights_path = os.path.expanduser(os.path.join( - '~', '.keras/models/fcn_vgg16_weights_tf_dim_ordering_tf_kernels.h5')) - - # transfer if weights have not been created - if os.path.isfile(weights_path) == False: - flattened_layers = model.layers - index = {} - for layer in flattened_layers: - if layer.name: - index[layer.name] = layer - vgg16 = VGG16() - for layer in vgg16.layers: - weights = layer.get_weights() - if layer.name == 'fc1': - weights[0] = np.reshape(weights[0], (7, 7, 512, 4096)) - elif layer.name == 'fc2': - weights[0] = np.reshape(weights[0], (1, 1, 4096, 4096)) - elif layer.name == 'predictions': - layer.name = 'predictions_1000' - weights[0] = np.reshape(weights[0], (1, 1, 4096, 1000)) - if index.has_key(layer.name): - index[layer.name].set_weights(weights) - model.save_weights(weights_path) - print('Successfully transformed!') - # else load weights - else: - model.load_weights(weights_path, by_name=True) - print('Already transformed!') - - -def transfer_FCN_ResNet50(): - input_shape = (224, 224, 3) - img_input = Input(shape=input_shape) - bn_axis = 3 - - x = Conv2D(64, (7, 7), strides=(2, 2), - padding='same', name='conv1')(img_input) - x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) - x = Activation('relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv_block(3, [64, 64, 256], stage=2, block='a', strides=(1, 1))(x) - x = identity_block(3, [64, 64, 256], stage=2, block='b')(x) - x = identity_block(3, [64, 64, 256], stage=2, block='c')(x) - - x = conv_block(3, [128, 128, 512], stage=3, block='a')(x) - x = identity_block(3, [128, 128, 512], stage=3, block='b')(x) - x = identity_block(3, [128, 128, 512], stage=3, block='c')(x) - x = identity_block(3, [128, 128, 512], stage=3, block='d')(x) - - x = conv_block(3, [256, 256, 1024], stage=4, block='a')(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='b')(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='c')(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='d')(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='e')(x) - x = identity_block(3, [256, 256, 1024], stage=4, block='f')(x) - - x = conv_block(3, [512, 512, 2048], stage=5, block='a')(x) - x = identity_block(3, [512, 512, 2048], stage=5, block='b')(x) - x = identity_block(3, [512, 512, 2048], stage=5, block='c')(x) - - x = Conv2D(1000, (1, 1), activation='linear', name='fc1000')(x) - - # Create model - model = Model(img_input, x) - weights_path = os.path.expanduser(os.path.join( - '~', '.keras/models/fcn_resnet50_weights_tf_dim_ordering_tf_kernels.h5')) - - # transfer if weights have not been created - if os.path.isfile(weights_path) == False: - flattened_layers = model.layers - index = {} - for layer in flattened_layers: - if layer.name: - index[layer.name] = layer - resnet50 = ResNet50() - for layer in resnet50.layers: - weights = layer.get_weights() - if layer.name == 'fc1000': - weights[0] = np.reshape(weights[0], (1, 1, 2048, 1000)) - if index.has_key(layer.name): - index[layer.name].set_weights(weights) - model.save_weights(weights_path) - print('Successfully transformed!') - # else load weights - else: - model.load_weights(weights_path, by_name=True) - print('Already transformed!') - - -if __name__ == '__main__': - if sys.argv[1] not in {'Vgg16', 'ResNet50'}: - print('Wrong argument! Model name must be Vgg16 or ResNet50.') - exit() - func = globals()['transfer_FCN_%s' % sys.argv[1]] - func() -#!/usr/bin/env python -""" -Copyright (c) 2019, by the Authors: Amir H. Abdi -This script is freely available under the MIT Public License. -Please see the License file in the root for details. - -The following code snippet will convert the keras model files -to the freezed .pb tensorflow weight file. The resultant TensorFlow model -holds both the model architecture and its associated weights. -""" - -import tensorflow as tf -from tensorflow.python.framework import graph_util -from tensorflow.python.framework import graph_io -from pathlib import Path -from absl import app -from absl import flags -from absl import logging -import keras -from keras import backend as K -from keras.models import model_from_json, model_from_yaml - -K.set_learning_phase(0) -FLAGS = flags.FLAGS - -flags.DEFINE_string('input_model', None, 'Path to the input model.') -flags.DEFINE_string('input_model_json', None, 'Path to the input model ' - 'architecture in json format.') -flags.DEFINE_string('input_model_yaml', None, 'Path to the input model ' - 'architecture in yaml format.') -flags.DEFINE_string('output_model', None, 'Path where the converted model will ' - 'be stored.') -flags.DEFINE_boolean('save_graph_def', False, - 'Whether to save the graphdef.pbtxt file which contains ' - 'the graph definition in ASCII format.') -flags.DEFINE_string('output_nodes_prefix', None, - 'If set, the output nodes will be renamed to ' - '`output_nodes_prefix`+i, where `i` will numerate the ' - 'number of of output nodes of the network.') -flags.DEFINE_boolean('quantize', False, - 'If set, the resultant TensorFlow graph weights will be ' - 'converted from float into eight-bit equivalents. See ' - 'documentation here: ' - 'https://github.com/tensorflow/tensorflow/tree/master/tensorflow/tools/graph_transforms') -flags.DEFINE_boolean('channels_first', False, - 'Whether channels are the first dimension of a tensor. ' - 'The default is TensorFlow behaviour where channels are ' - 'the last dimension.') -flags.DEFINE_boolean('output_meta_ckpt', False, - 'If set to True, exports the model as .meta, .index, and ' - '.data files, with a checkpoint file. These can be later ' - 'loaded in TensorFlow to continue training.') - -flags.mark_flag_as_required('input_model') -flags.mark_flag_as_required('output_model') - - -def load_model(input_model_path, input_json_path=None, input_yaml_path=None): - if not Path(input_model_path).exists(): - raise FileNotFoundError( - 'Model file `{}` does not exist.'.format(input_model_path)) - try: - model = keras.models.load_model(input_model_path) - return model - except FileNotFoundError as err: - logging.error('Input mode file (%s) does not exist.', - FLAGS.input_model) - raise err - except ValueError as wrong_file_err: - if input_json_path: - if not Path(input_json_path).exists(): - raise FileNotFoundError( - 'Model description json file `{}` does not exist.'.format( - input_json_path)) - try: - model = model_from_json(open(str(input_json_path)).read()) - model.load_weights(input_model_path) - return model - except Exception as err: - logging.error("Couldn't load model from json.") - raise err - elif input_yaml_path: - if not Path(input_yaml_path).exists(): - raise FileNotFoundError( - 'Model description yaml file `{}` does not exist.'.format( - input_yaml_path)) - try: - model = model_from_yaml(open(str(input_yaml_path)).read()) - model.load_weights(input_model_path) - return model - except Exception as err: - logging.error("Couldn't load model from yaml.") - raise err - else: - logging.error( - 'Input file specified only holds the weights, and not ' - 'the model definition. Save the model using ' - 'model.save(filename.h5) which will contain the network ' - 'architecture as well as its weights. ' - 'If the model is saved using the ' - 'model.save_weights(filename) function, either ' - 'input_model_json or input_model_yaml flags should be set to ' - 'to import the network architecture prior to loading the ' - 'weights. \n' - 'Check the keras documentation for more details ' - '(https://keras.io/getting-started/faq/)') - raise wrong_file_err - - -def main(args): - # If output_model path is relative and in cwd, make it absolute from root - output_model = FLAGS.output_model - if str(Path(output_model).parent) == '.': - output_model = str((Path.cwd() / output_model)) - - output_fld = Path(output_model).parent - output_model_name = Path(output_model).name - output_model_stem = Path(output_model).stem - output_model_pbtxt_name = output_model_stem + '.pbtxt' - - # Create output directory if it does not exist - Path(output_model).parent.mkdir(parents=True, exist_ok=True) - - if FLAGS.channels_first: - K.set_image_data_format('channels_first') - else: - K.set_image_data_format('channels_last') - - model = load_model(FLAGS.input_model, - FLAGS.input_model_json, FLAGS.input_model_yaml) - - # TODO(amirabdi): Support networks with multiple inputs - orig_output_node_names = [node.op.name for node in model.outputs] - if FLAGS.output_nodes_prefix: - num_output = len(orig_output_node_names) - pred = [None] * num_output - converted_output_node_names = [None] * num_output - - # Create dummy tf nodes to rename output - for i in range(num_output): - converted_output_node_names[i] = '{}{}'.format( - FLAGS.output_nodes_prefix, i) - pred[i] = tf.identity(model.outputs[i], - name=converted_output_node_names[i]) - else: - converted_output_node_names = orig_output_node_names - logging.info('Converted output node names are: %s', - str(converted_output_node_names)) - - sess = K.get_session() - if FLAGS.output_meta_ckpt: - saver = tf.train.Saver() - saver.save(sess, str(output_fld / output_model_stem)) - - if FLAGS.save_graph_def: - tf.train.write_graph(sess.graph.as_graph_def(), str(output_fld), - output_model_pbtxt_name, as_text=True) - logging.info('Saved the graph definition in ascii format at %s', - str(Path(output_fld) / output_model_pbtxt_name)) - - if FLAGS.quantize: - from tensorflow.tools.graph_transforms import TransformGraph - transforms = ["quantize_weights", "quantize_nodes"] - transformed_graph_def = TransformGraph(sess.graph.as_graph_def(), [], - converted_output_node_names, - transforms) - constant_graph = graph_util.convert_variables_to_constants( - sess, - transformed_graph_def, - converted_output_node_names) - else: - constant_graph = graph_util.convert_variables_to_constants( - sess, - sess.graph.as_graph_def(), - converted_output_node_names) - - graph_io.write_graph(constant_graph, str(output_fld), output_model_name, - as_text=False) - logging.info('Saved the freezed graph at %s', - str(Path(output_fld) / output_model_name)) - - -if __name__ == "__main__": - app.run(main) -# Copyright 2015 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Converts checkpoint variables into Const ops in a standalone GraphDef file. - -This script is designed to take a GraphDef proto, a SaverDef proto, and a set of -variable values stored in a checkpoint file, and output a GraphDef with all of -the variable ops converted into const ops containing the values of the -variables. - -It's useful to do this when we need to load a single file in C++, especially in -environments like mobile or embedded where we may not have access to the -RestoreTensor ops and file loading calls that they rely on. - -An example of command-line usage is: -bazel build tensorflow/python/tools:freeze_graph && \ -bazel-bin/tensorflow/python/tools/freeze_graph \ ---input_graph=some_graph_def.pb \ ---input_checkpoint=model.ckpt-8361242 \ ---output_graph=/tmp/frozen_graph.pb --output_node_names=softmax - -You can also look at freeze_graph_test.py for an example of how to use it. - -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import argparse -import sys - -from google.protobuf import text_format - -from tensorflow.core.framework import graph_pb2 -from tensorflow.core.protobuf import saver_pb2 -from tensorflow.python import pywrap_tensorflow -from tensorflow.python.client import session -from tensorflow.python.framework import graph_util -from tensorflow.python.framework import importer -from tensorflow.python.platform import app -from tensorflow.python.platform import gfile -from tensorflow.python.training import saver as saver_lib - -FLAGS = None - - -def freeze_graph(input_graph, - input_saver, - input_binary, - input_checkpoint, - output_node_names, - restore_op_name, - filename_tensor_name, - output_graph, - clear_devices, - initializer_nodes, - variable_names_blacklist=""): - """Converts all variables in a graph and checkpoint into constants.""" - - # Unused by updated loading code. - del restore_op_name, filename_tensor_name - - if not gfile.Exists(input_graph): - print("Input graph file '" + input_graph + "' does not exist!") - return -1 - - if input_saver and not gfile.Exists(input_saver): - print("Input saver file '" + input_saver + "' does not exist!") - return -1 - - # 'input_checkpoint' may be a prefix if we're using Saver V2 format - if not saver_lib.checkpoint_exists(input_checkpoint): - print("Input checkpoint '" + input_checkpoint + "' doesn't exist!") - return -1 - - if not output_node_names: - print("You need to supply the name of a node to --output_node_names.") - return -1 - - input_graph_def = graph_pb2.GraphDef() - mode = "rb" if input_binary else "r" - with gfile.FastGFile(input_graph, mode) as f: - if input_binary: - input_graph_def.ParseFromString(f.read()) - else: - text_format.Merge(f.read(), input_graph_def) - # Remove all the explicit device specifications for this node. This helps to - # make the graph more portable. - if clear_devices: - for node in input_graph_def.node: - node.device = "" - - _ = importer.import_graph_def(input_graph_def, name="") - - with session.Session() as sess: - if input_saver: - with gfile.FastGFile(input_saver, mode) as f: - saver_def = saver_pb2.SaverDef() - if input_binary: - saver_def.ParseFromString(f.read()) - else: - text_format.Merge(f.read(), saver_def) - saver = saver_lib.Saver(saver_def=saver_def) - saver.restore(sess, input_checkpoint) - else: - var_list = {} - reader = pywrap_tensorflow.NewCheckpointReader(input_checkpoint) - var_to_shape_map = reader.get_variable_to_shape_map() - for key in var_to_shape_map: - try: - tensor = sess.graph.get_tensor_by_name(key + ":0") - except KeyError: - # This tensor doesn't exist in the graph (for example it's - # 'global_step' or a similar housekeeping element) so skip it. - continue - var_list[key] = tensor - saver = saver_lib.Saver(var_list=var_list) - saver.restore(sess, input_checkpoint) - if initializer_nodes: - sess.run(initializer_nodes) - - variable_names_blacklist = (variable_names_blacklist.split(",") if - variable_names_blacklist else None) - output_graph_def = graph_util.convert_variables_to_constants( - sess, - input_graph_def, - output_node_names.split(","), - variable_names_blacklist=variable_names_blacklist) - - with gfile.GFile(output_graph, "wb") as f: - f.write(output_graph_def.SerializeToString()) - print("%d ops in the final graph." % len(output_graph_def.node)) - - -def main(unused_args): - freeze_graph(FLAGS.input_graph, FLAGS.input_saver, FLAGS.input_binary, - FLAGS.input_checkpoint, FLAGS.output_node_names, - FLAGS.restore_op_name, FLAGS.filename_tensor_name, - FLAGS.output_graph, FLAGS.clear_devices, FLAGS.initializer_nodes, - FLAGS.variable_names_blacklist) - - -if __name__ == "__main__": - parser = argparse.ArgumentParser() - parser.register("type", "bool", lambda v: v.lower() == "true") - parser.add_argument( - "--input_graph", - type=str, - default="", - help="TensorFlow \'GraphDef\' file to load.") - parser.add_argument( - "--input_saver", - type=str, - default="", - help="TensorFlow saver file to load.") - parser.add_argument( - "--input_checkpoint", - type=str, - default="", - help="TensorFlow variables file to load.") - parser.add_argument( - "--output_graph", - type=str, - default="", - help="Output \'GraphDef\' file name.") - parser.add_argument( - "--input_binary", - nargs="?", - const=True, - type="bool", - default=False, - help="Whether the input files are in binary format.") - parser.add_argument( - "--output_node_names", - type=str, - default="", - help="The name of the output nodes, comma separated.") - parser.add_argument( - "--restore_op_name", - type=str, - default="save/restore_all", - help="The name of the master restore operator.") - parser.add_argument( - "--filename_tensor_name", - type=str, - default="save/Const:0", - help="The name of the tensor holding the save path.") - parser.add_argument( - "--clear_devices", - nargs="?", - const=True, - type="bool", - default=True, - help="Whether to remove device specifications.") - parser.add_argument( - "--initializer_nodes", - type=str, - default="", - help="comma separated list of initializer nodes to run before freezing.") - parser.add_argument( - "--variable_names_blacklist", - type=str, - default="", - help="""\ - comma separated list of variables to skip converting to constants\ - """) - FLAGS, unparsed = parser.parse_known_args() - app.run(main=main, argv=[sys.argv[0]] + unparsed) -import wave -import struct -from struct import * -import matplotlib.pyplot as plt -from keras.layers import Input, Dense -from keras.models import Model -import numpy as np -import matplotlib.pyplot as plt -from keras.callbacks import ModelCheckpoint -from sklearn.preprocessing import MinMaxScaler -import pandas as pd -a = wave.open('pare.wav') -leng = a.getnframes() - -c = [] -for i in range(70000, 90000): - data = a.readframes(1) - d = struct.unpack("f", data) - c.append(d) - -e = pd.DataFrame(c) -f = e.dropna() - -g = [] -for i in range(5000, 10000, 25): - g.append(np.mean(f[i:i+500])) - -x_train2 = MinMaxScaler().fit_transform(g) -x_train1 = x_train2[0:200] -x_train0 = [float(i) for i in x_train1] -x_train = np.array([x_train0]).astype('float32') -plt.figure(figsize=(10, 5)) -plt.plot([float(i) for i in x_train[0]]) - -encoding_dim = 150 -input_img = Input(shape=(200,)) -encoded = Dense(encoding_dim, activation='relu')(input_img) -decoded = Dense(200, activation='sigmoid')(encoded) -autoencoder = Model(input=input_img, output=decoded) -encoder = Model(input=input_img, output=encoded) -encoded_input = Input(shape=(encoding_dim,)) -decoder_layer = autoencoder.layers[-1] -decoder = Model(input=encoded_input, output=decoder_layer(encoded_input)) -autoencoder.compile(optimizer='adadelta', loss='binary_crossentropy') - -filepath = "audio.compress-{loss:.4f}.hdf5" -checkpoint = ModelCheckpoint( - filepath, monitor='binary_crossentropy', verbose=1, save_best_only=False) - -autoencoder.summary() -autoencoder.fit(x_train, x_train, - nb_epoch=50, - batch_size=40, - shuffle=False, - validation_data=(x_train, x_train), callbacks=[checkpoint], verbose=0) - -filename = "audio.compress-0.5576.hdf5" -autoencoder.load_weights(filename) -autoencoder.compile(loss='mean_squared_error', optimizer='adam') -encoded_imgs = encoder.predict(x_train) -decoded_imgs = decoder.predict(encoded_imgs) - -n = 1 # how many digits we will display -plt.figure(figsize=(10, 5)) -for i in range(n): - # display original - ax = plt.subplot(2, n, i + 1) - plt.imshow(x_train[i].reshape(10, 20)) - plt.title("ORIGINAL AUDIO") - plt.gray() - ax.get_xaxis().set_visible(False) - ax.get_yaxis().set_visible(False) -plt.show() - -plt.figure(figsize=(8, .5)) -for i in range(n): - ax = plt.subplot(1, n, i+1) - plt.imshow(encoded_imgs.reshape(75, 2).T) - plt.title('ENCODED AUDIO') - plt.gray() - ax.get_xaxis().set_visible(False) - ax.get_yaxis().set_visible(False) -plt.show() - -plt.figure(figsize=(10, 5)) -for i in range(n): - # display reconstruction - ax = plt.subplot(2, n, i + 1 + n) - plt.imshow(decoded_imgs[i].reshape(10, 20)) - plt.title("RECONSTRUCTED AUDIO") - plt.gray() - ax.get_xaxis().set_visible(False) - ax.get_yaxis().set_visible(False) -plt.show() - -print("Accuracy=", 1-np.mean(abs(x_train-decoded_imgs)), '\n') - -plt.figure(figsize=(10, 2)) -plt.plot(x_train[0], color='r', linewidth=3) -plt.plot(decoded_imgs[0]) -plt.title('ORIGINAL (blue) and RECONSTRUCTED AUDIO (red)') -plt.show() -import requests -from bs4 import BeautifulSoup -import pandas as pd -import time - -max_results = 20 -city_set = ['Los+Angeles', 'Austin'] -columns = ["city", "job_title", "company_name", "location", "summary"] - -df = [] -for city in city_set: - for start in range(0, max_results, 1): - page = requests.get( - 'https://www.indeed.com/jobs?q=computer+science&l=' + str(city) + '&start=' + str(start)) - time.sleep(1) - soup = BeautifulSoup(page.text, "lxml") - for div in soup.find_all(name="div", attrs={"class": "row"}): - job_post = [] - job_post.append(city) - for a in div.find_all(name="a", attrs={"data-tn-element": "jobTitle"}): - job_post.append(a["title"]) - company = div.find_all(name="span", attrs={"class": "company"}) - if len(company) > 0: - for b in company: - job_post.append(b.text.strip()) - else: - sec_try = div.find_all(name="span", attrs={ - "class": "result-link-source"}) - for span in sec_try: - job_post.append(span.text) - - c = div.findAll(name='span', attrs={'class': 'location'}) - for span in c: - job_post.append(span.text) - d = div.findAll('div', attrs={'class': 'summary'}) - for span in d: - job_post.append(span.text.strip()) - df.append(job_post) - -df00 = pd.DataFrame(df) -df00.columns = columns -df00.to_csv("jobs_report.csv", index=False) -import random -import numpy as np -from data.vocab import TextEncoder -from typing import List, NamedTuple, Optional, Dict, Any - - -class TaskWeightScheduler: - def __init__(self, active_in_pretrain: bool, active_in_finetune: bool, - pretrain_value: float = 1.0, finetune_value: float = 1.0): - self.active_in_pretrain = active_in_pretrain - self.active_in_finetune = active_in_finetune - self.pretrain_value = pretrain_value - self.finetune_value = finetune_value - - def get(self, is_pretrain: bool, step: int) -> float: - if is_pretrain and self.active_in_pretrain: - return self.pretrain_value - if not is_pretrain and self.active_in_finetune: - return self.finetune_value - raise ValueError() - - -class TaskMetadata(NamedTuple): - name: str # "lm" will be considered differently (will use tied decoder) - is_token_level: bool - num_classes: int - dropout: float - weight_scheduler: TaskWeightScheduler - - -class TokenTaskData(NamedTuple): - target: List[int] - target_mask: List[bool] - - -class SentenceTaskData(NamedTuple): - target: int - target_index: int - - -class TaskDataBatch(NamedTuple): - # (int32) batch_size for sentence level tasks or batch_size, seq_len for token level tasks - target: np.array - target_mask: np.array # (int8) same as target (will ignore zeros) - - -class Sentence(NamedTuple): - tokens: List[int] - padding_mask: List[bool] - segments: Optional[List[int]] = None - token_classification: Optional[Dict[str, TokenTaskData]] = None - sentence_classification: Optional[Dict[str, SentenceTaskData]] = None - - -class SentenceBatch(NamedTuple): - tokens: np.array # (int32) batch_size, seq_len - # (int8) batch_size, seq_len (0 or 1, zeros should be ignored (1 == use, 0 == padded)) - padding_mask: np.array - segments: np.array # (int32) batch_size, seq_len - # task_name('lm' is special) : task_data - token_classification: Dict[str, TaskDataBatch] - sentence_classification: Dict[str, TaskDataBatch] # task_name : task_data - - -def create_attention_mask(pad_mask: Optional[np.array], is_causal: bool, batch_size: Optional[int] = None, - length: Optional[int] = None, bert_attention: bool = False) -> np.array: - if pad_mask is not None: - assert pad_mask.ndim == 2 - batch_size, length = pad_mask.shape - if is_causal: - b = np.cumsum(np.eye(length, dtype=np.float32), axis=0) - else: - b = np.ones((length, length), dtype=np.float32) - b = np.reshape(b, [1, 1, length, length]) - b = np.repeat(b, batch_size, axis=0) # B, 1, L, L - if pad_mask is not None: - _pad_mask = pad_mask[..., np.newaxis] - _pad_mask = np.repeat(_pad_mask, length, 2) - _pad_mask_t = np.transpose(_pad_mask, [0, 2, 1]) - if bert_attention: - tmp = _pad_mask_t - else: - tmp = _pad_mask * _pad_mask_t - tmp = tmp[:, np.newaxis, ...] - if b is None: - b = tmp.astype(np.float32) - else: - b = b * tmp - return b - - -def _trim_seq(seq: Optional[List[Any]], length: int, from_end: bool = True) -> Optional[List[Any]]: - if seq is None: - return None - return seq[:length] if from_end else seq[-length:] - - -def _trim_sentence_target(task_dict: Dict[str, SentenceTaskData], desired_len: int, - orig_seq_len: int, from_end: bool = True) -> Dict[ - str, SentenceTaskData]: - trimmed_task_dict = {} - for k, v in task_dict.items(): - target_index = v.target_index - if orig_seq_len > desired_len: - if from_end and target_index > desired_len: - target_index = -1 - if not from_end: - target_index -= orig_seq_len - desired_len - if target_index >= 0: - trimmed_task_dict[k] = SentenceTaskData(v.target, target_index) - return trimmed_task_dict - - -def _trim_sentence(sentence: Sentence, length: int, from_end: bool = True) -> Sentence: - return Sentence(_trim_seq(sentence.tokens, length, from_end), - _trim_seq(sentence.padding_mask, length, from_end), - _trim_seq(sentence.segments, length, from_end), - {k: TokenTaskData(_trim_seq(v.target, length, from_end), - _trim_seq(v.target_mask, length, from_end)) for k, v in - sentence.token_classification.items()} if sentence.token_classification is not None else {}, - _trim_sentence_target(sentence.sentence_classification, length, len(sentence.tokens), - from_end) if sentence.sentence_classification is not None else {}) - - -def check_sent_len(sentence: Sentence, min_len: Optional[int], max_len: Optional[int], from_end: bool = True) -> \ - Optional[Sentence]: - if min_len is not None and len(sentence.tokens) < min_len: - return None - if max_len is not None and len(sentence.tokens) > max_len: - return _trim_sentence(sentence, max_len, from_end) - return sentence - - -def msk_sentence(sentence: List[int], vocab_size: int, keep_prob: float, - mask_prob: float, rand_prob: float) -> Sentence: - prediction_target = [0] * len(sentence) - prediction_mask = [False] * len(sentence) - new_sent = sentence.copy() - for i in range(len(sentence)): - probability = random.random() - if probability > keep_prob: - prediction_target[i] = sentence[i] - prediction_mask[i] = True - if probability < (mask_prob + keep_prob): - new_sent[i] = vocab_size + TextEncoder.MSK_OFFSET - elif probability < (mask_prob + rand_prob + keep_prob): - new_sent[i] = random.randrange(vocab_size) - return Sentence(new_sent, [True] * len(new_sent), None, - token_classification={'lm': TokenTaskData( - prediction_target, prediction_mask)}, - sentence_classification={}) - - -def _pad_seq(seq: List[Any], pad_token: Any, pad_len: int, is_post_pad: bool = True) -> List[Any]: - return (seq + [pad_token] * pad_len) if is_post_pad else ([pad_token] * pad_len + seq) - - -def pad(sentence: Sentence, pad_id: int, max_len: int, is_post_pad: bool = True) -> Sentence: - pad_len = max_len - len(sentence.tokens) - if pad_len == 0: - return sentence - return Sentence(_pad_seq(sentence.tokens, pad_id, pad_len, is_post_pad), - _pad_seq(sentence.padding_mask, - False, pad_len, is_post_pad), - _pad_seq(sentence.segments, 0, pad_len, is_post_pad), - {k: TokenTaskData(_pad_seq(v.target, 0, pad_len, is_post_pad), - _pad_seq(v.target_mask, False, pad_len, is_post_pad)) for k, v in - sentence.token_classification.items()} if sentence.token_classification is not None else {}, - {k: SentenceTaskData(v.target, v.target_index + (0 if is_post_pad else pad_len)) for k, v in - sentence.sentence_classification.items()} if sentence.sentence_classification is not None else {}) - - -def generate_pos_ids(batch_size: int, max_len: int) -> np.array: - return np.repeat(np.arange(max_len, dtype=np.int32).reshape(1, -1), batch_size, 0) -import os -import random -import numpy as np -from contextlib import ExitStack -from data.vocab import TextEncoder -from typing import List, Optional, Generator, TextIO, Tuple, Dict -from data.dataset import (Sentence, pad, msk_sentence, check_sent_len, - SentenceBatch, TaskDataBatch, TokenTaskData, SentenceTaskData) - - -def lm_generator(text_corpus_address: str, text_encoder: TextEncoder, keep_prob: float = 0.85, - mask_prob: float = 0.15 * 0.8, rand_prob: float = 0.15 * 0.1, min_len: Optional[int] = None, - max_len: Optional[int] = 512, file_jump_prob: float = 0.1, mismatch_prob: float = 0.5, - num_file_pointers: int = 8, is_causal: bool = False, use_single_sentence: bool = False, - batch_size: int = 256) -> Generator[SentenceBatch, None, None]: - if not (0.0 <= mask_prob <= 1.0 and 0.0 <= rand_prob <= 1.0 and - 0.0 <= keep_prob <= 1.0 and 0.0 <= file_jump_prob <= 1.0): - raise ValueError('all probablities should be between zero and one') - if mask_prob + rand_prob + keep_prob > 1.0: - raise ValueError( - 'sum of mask, rand and keep probablities should be less than 1.0') - if use_single_sentence: - generator = _get_lm_generator_single(text_corpus_address, text_encoder, keep_prob, mask_prob, rand_prob, - min_len, max_len, file_jump_prob, num_file_pointers) - else: - in_memory = file_jump_prob == 0.0 and num_file_pointers == 1 - generator = _get_lm_generator_double(text_corpus_address, text_encoder, keep_prob, mask_prob, rand_prob, - min_len, max_len, mismatch_prob, in_memory, file_jump_prob, - num_file_pointers) - batch = [] - for item in generator: - batch.append(item) - if len(batch) == batch_size: - batch = make_next_token_prediction(batch) if is_causal else batch - batch = _create_batch(batch, text_encoder.pad_id, max_len) - yield batch - batch = [] - - -def make_next_token_prediction(batch: List[Sentence]) -> List[Sentence]: - for item in batch: - for i in range(len(item.tokens) - 1): - item.token_classification['lm'].target[i] = item.tokens[i + 1] - item.token_classification['lm'].target_mask[i] = True - item.token_classification['lm'].target[-1] = 0 - item.token_classification['lm'].target_mask[-1] = False - return batch - - -def _grab_line(files: List[TextIO], file_size: int, jump_prob: float) -> str: - file = files[random.randrange(len(files))] - if random.random() < jump_prob: - file.seek(random.randrange(file_size)) - file.readline() # discard - bound to be partial line - random_line = file.readline() - if len(random_line) == 0: # we have hit the end - file.seek(0) - random_line = file.readline() - return random_line - - -def _create_token_task_batch(batch: List[Sentence]) -> Dict[str, TaskDataBatch]: - batch_keys = set(batch[0].token_classification.keys()) - for item in batch: - assert batch_keys == set(batch[0].token_classification.keys()) - result = {} - for key in batch_keys: - result[key] = TaskDataBatch( - np.array( - [item.token_classification[key].target for item in batch], dtype=np.int32), - np.array([item.token_classification[key].target_mask for item in batch], dtype=np.int32)) - return result - - -def _create_sent_task_batch(batch: List[Sentence]) -> Dict[str, TaskDataBatch]: - batch_keys = set(batch[0].sentence_classification.keys()) - for item in batch: - assert batch_keys == set(batch[0].sentence_classification.keys()) - result = {} - for key in batch_keys: - result[key] = TaskDataBatch( - np.array( - [item.sentence_classification[key].target for item in batch], dtype=np.int32), - np.array([item.sentence_classification[key].target_index for item in batch], dtype=np.int32)) - return result - - -def _create_batch(batch: List[Sentence], pad_id: int, max_len: Optional[int] = None) -> SentenceBatch: - if max_len is None: - max_len = max(len(item.tokens) for item in batch) - padded_batch = [pad(item, pad_id, max_len) for item in batch] - return SentenceBatch( - np.array([item.tokens for item in padded_batch], dtype=np.int32), - np.array([item.padding_mask for item in padded_batch], dtype=np.int8), - np.array([item.segments for item in padded_batch], dtype=np.int32), - _create_token_task_batch( - padded_batch), _create_sent_task_batch(padded_batch) - ) - - -def _get_lm_generator_single(text_corpus_address: str, text_encoder: TextEncoder, keep_prob: float, mask_prob: float, - rand_prob: float, min_len: Optional[int], max_len: Optional[int], jump_prob, - num_files) -> Generator[Sentence, None, None]: - _max_len = float('inf') if max_len is None else max_len - 2 - _min_len = 0 if min_len is None else min_len - 2 - file_size = os.stat(text_corpus_address).st_size - with ExitStack() as stack: - files = [stack.enter_context(open(text_corpus_address)) - for _ in range(num_files)] - - def _encode_line(line: str) -> Optional[Sentence]: - return check_sent_len( - msk_sentence(text_encoder.encode(line.rstrip()), len( - text_encoder), keep_prob, mask_prob, rand_prob), - _min_len, _max_len) - - def _yield_sentence(sent: Sentence) -> Sentence: - lm = sent.token_classification['lm'] - return Sentence( - [text_encoder.bos_id] + sent.tokens + [text_encoder.eos_id], - [True] + sent.padding_mask + [True], - [0] * (len(sent.tokens) + 2), - {'lm': TokenTaskData( - [0] + lm.target + [0], [False] + lm.target_mask + [False])}, - {} - ) - - while True: - sent = _grab_line(files, file_size, jump_prob) - encoded = _encode_line(sent) - if not encoded: - continue - yield _yield_sentence(encoded) - - -def _get_lm_generator_double(text_corpus_address: str, text_encoder: TextEncoder, keep_prob: float, mask_prob: float, - rand_prob: float, min_len: Optional[int], max_len: Optional[int], - mismatch_prob: float, in_memory: bool, jump_prob: float, num_files: int) -> Generator[ - Sentence, None, None]: - _max_len = float('inf') if max_len is None else max_len - 3 - _min_len = 0 if min_len is None else min_len - 3 - file_size = os.stat(text_corpus_address).st_size - current_line_number = 0 - with ExitStack() as stack: - if in_memory: - with open(text_corpus_address) as f: - all_lines = [text_encoder.encode(line.rstrip()) for line in f] - files = None - else: - all_lines = None - files = [stack.enter_context(open(text_corpus_address)) - for _ in range(num_files)] - max_line_number = len(all_lines) if all_lines else float('inf') - - def _encode_line(line: str, half: bool, from_end: bool = False) -> Optional[Sentence]: - return check_sent_len( - msk_sentence(text_encoder.encode(line.rstrip()), len( - text_encoder), keep_prob, mask_prob, rand_prob), - _min_len // (2 if half else 1), _max_len // (2 if half else 1), from_end=from_end) - - def _yield_sentence(sent1: Sentence, sent2: Optional[Sentence] = None) -> Sentence: - lm = sent1.token_classification['lm'] - if sent2 is None: - split_idx = random.randint( - _min_len // 2, len(sent1.tokens) - _min_len // 2) - return Sentence( - [text_encoder.bos_id] + sent1.tokens[:split_idx] + [text_encoder.del_id] + sent1.tokens[ - split_idx:] + [ - text_encoder.eos_id], - [True] + sent1.padding_mask[:split_idx] + [True] + - sent1.padding_mask[split_idx:] + [True], - [0] * (split_idx + 2) + [1] * - (1 + len(sent1.tokens) - split_idx), - {'lm': TokenTaskData([0] + lm.target[:split_idx] + [0] + lm.target[split_idx:] + [0], - [False] + lm.target_mask[:split_idx] + [False] + lm.target_mask[split_idx:] + [ - False])}, - {} - ) - lm_ = sent2.token_classification['lm'] - return Sentence( - [text_encoder.bos_id] + sent1.tokens + - [text_encoder.del_id] + sent2.tokens + [text_encoder.eos_id], - [True] + sent1.padding_mask + [True] + - sent2.padding_mask + [True], - [0] * (2 + len(sent1.tokens)) + [1] * (1 + len(sent2.tokens)), - {'lm': TokenTaskData([0] + lm.target + [0] + lm_.target + [0], - [False] + lm.target_mask + [False] + lm_.target_mask + [False])}, - {} - ) - - def _calc_encoded(line: str, _all_lines: Optional[List[str]] = None, _files: Optional[List[TextIO]] = None) -> \ - Optional[Tuple[Optional[Sentence], Optional[Sentence]]]: - if random.random() < mismatch_prob: - _encoded1 = _encode_line(line, half=True) - if _all_lines is not None: - line2 = _all_lines[random.randrange(len(_all_lines))] - else: - line2 = _grab_line(_files, file_size, jump_prob) - _encoded2 = _encode_line(line2, half=True, from_end=True) - if _encoded2 is None: - return None - else: - _encoded1 = _encode_line(line, half=False) - _encoded2 = None - return _encoded1, _encoded2 - - while True: - encoded1, encoded2 = _calc_encoded( - all_lines[current_line_number] if all_lines else _grab_line( - files, file_size, jump_prob), all_lines, - files) - if encoded1 is None: - continue - if all_lines: - current_line_number += 1 - if current_line_number == max_line_number: - current_line_number = 0 - yield _yield_sentence(encoded1, encoded2) - - -def dummy_lm_generator(vocab_size: int, max_len: int, batch_size: int, steps: int, easy: bool = True): # identity - def dummy_generator(): - for _ in range(steps): - seq_len = random.randint(1, max_len - 1) - tokens = [random.randrange(vocab_size) for i in range(seq_len)] - tokens[-1] = eos_id - yield Sentence( - tokens=tokens, - padding_mask=[True] * seq_len, - segments=[0] * seq_len, - token_classification={ - 'lm': TokenTaskData(tokens if easy else [random.randrange(vocab_size) for i in range(seq_len)], - [True] * seq_len), - 'lm_untied': TokenTaskData( - tokens if easy else [random.randrange(vocab_size) for i in range(seq_len)], [True] * seq_len) - }, - sentence_classification={ - 'count': SentenceTaskData(seq_len % 2, seq_len - 1)} - ) - - pad_id = vocab_size + TextEncoder.PAD_OFFSET - eos_id = vocab_size + TextEncoder.EOS_OFFSET - generator = dummy_generator() - batch = [] - for item in generator: - batch.append(item) - if len(batch) == batch_size: - batch = _create_batch(batch, pad_id, max_len) - yield batch - batch = [] -import os -from typing import List, Optional - -try: - import sentencepiece as spm -except: - print('if you want sentencepiece encoder, please install sentencepiece') - -try: - from openai.text_utils import TextEncoder as _OpenAITextEncoder -except: - print('if you want to use OpenAI\'s encoder and pretrained model, please install spacy, and ftfy') - -try: - from google_bert.tokenization import FullTokenizer -except: - print('if you want to use Google\'s encoder and pretrained models, please clone the bert submodule') - - -# TOKEN_IDs = {unk=0, vocab={1..vocab_size-1}, specials(pad,bos,del,eos,msk)} - - -class TextEncoder: - PAD_OFFSET = 0 - MSK_OFFSET = 1 - BOS_OFFSET = 2 - DEL_OFFSET = 3 # delimiter - EOS_OFFSET = 4 - SPECIAL_COUNT = 5 - NUM_SEGMENTS = 2 - BERT_UNUSED_COUNT = 99 # bert pretrained models - BERT_SPECIAL_COUNT = 4 # they don't have DEL - - def __init__(self, vocab_size: int): - # NOTE you MUST always put unk at 0, then regular vocab, then special tokens, and then pos - self.vocab_size = vocab_size - self.unk_id = 0 - self.pad_id = vocab_size + self.PAD_OFFSET - self.msk_id = vocab_size + self.MSK_OFFSET - self.bos_id = vocab_size + self.BOS_OFFSET - self.del_id = vocab_size + self.DEL_OFFSET - self.eos_id = vocab_size + self.EOS_OFFSET - - def __len__(self) -> int: - return self.vocab_size - - def encode(self, sent: str) -> List[int]: - raise NotImplementedError() - - -class SentencePieceTextEncoder(TextEncoder): - def __init__(self, text_corpus_address: Optional[str], model_name: str = 'spm', - vocab_size: int = 30000, spm_model_type: str = 'unigram') -> None: - super().__init__(vocab_size) - if not os.path.exists('{}.model'.format(model_name)): - if spm_model_type.lower() not in ('unigram', 'bpe', 'char', 'word'): - raise ValueError( - '{} is not a valid model_type for sentence piece, ' - 'valid options are: unigram, bpe, char, word'.format(spm_model_type)) - spm.SentencePieceTrainer.Train( - '--input={input} --model_prefix={model_name} --vocab_size={vocab_size} ' - '--character_coverage={coverage} --model_type={model_type} ' - '--pad_id=-1 --unk_id=0 --bos_id=-1 --eos_id=-1 --input_sentence_size=100000000 '.format( - input=text_corpus_address, model_name=model_name, vocab_size=vocab_size, coverage=1, - model_type=spm_model_type.lower())) - self.sp = spm.SentencePieceProcessor() - self.sp.load('{}.model'.format(model_name)) - - def encode(self, sent: str) -> List[int]: - return self.sp.encode_as_ids(sent) - - -class OpenAITextEncoder(TextEncoder): - def __init__(self, encoder_path: str = './openai/model/encoder_bpe_40000.json', - bpe_path: str = './openai/model/vocab_40000.bpe') -> None: - self.encoder = _OpenAITextEncoder(encoder_path, bpe_path) - super().__init__(len(self.encoder.encoder)) - - def encode(self, sent: str) -> List[int]: - return self.encoder.encode([sent], verbose=False)[0] - - -class BERTTextEncoder(TextEncoder): - def __init__(self, vocab_file: str, do_lower_case: bool = True) -> None: - self.tokenizer = FullTokenizer(vocab_file, do_lower_case) - super().__init__(len(self.tokenizer.vocab)) - self.bert_unk_id = self.tokenizer.vocab['[UNK]'] - self.bert_msk_id = self.tokenizer.vocab['[MASK]'] - - def standardize_ids(self, ids: List[int]) -> List[int]: - for i in range(len(ids)): - if ids[i] == self.bert_unk_id: # UNK - ids[i] = 0 - else: # VOCAB - ids[i] -= self.bert_msk_id - return ids - - def encode(self, sent: str) -> List[int]: - return self.standardize_ids(self.tokenizer.convert_tokens_to_ids(self.tokenizer.tokenize(sent))) -'''The file contains nothing but avoid the module conflict.''' -from google_bert.modeling import BertConfig, BertModel, get_assignment_map_from_checkpoint -from data.vocab import TextEncoder, BERTTextEncoder -from transformer.load import load_google_bert -import tensorflow as tf -import unittest -import numpy as np - -from transformer import refresh_keras_backend -# there are too many settings to use tpu on tensorflow model, so using gpu for the test may be great -refresh_keras_backend(use_tpu=False) - - -class TestBert(unittest.TestCase): - def __init__(self, method_name: str = 'runTest') -> None: - super().__init__(methodName=method_name) - - def test_same_result(self): - base_location = './google_bert/downloads/multilingual_L-12_H-768_A-12/' - bert_config = BertConfig.from_json_file( - base_location + 'bert_config.json') - init_checkpoint = base_location + 'bert_model.ckpt' - - def model_fn_builder(bert_config, init_checkpoint): - """Returns `model_fn` closure for TPUEstimator.""" - - def model_fn(features, labels, mode, params): # pylint: disable=unused-argument - """The `model_fn` for TPUEstimator.""" - - unique_ids = features["unique_ids"] - input_ids = features["input_ids"] - input_mask = features["input_mask"] - input_type_ids = features["input_type_ids"] - - model = BertModel( - config=bert_config, - is_training=False, - input_ids=input_ids, - input_mask=input_mask, - token_type_ids=input_type_ids, - use_one_hot_embeddings=False) - - if mode != tf.estimator.ModeKeys.PREDICT: - raise ValueError( - "Only PREDICT modes are supported: %s" % (mode)) - - tvars = tf.trainable_variables() - scaffold_fn = None - (assignment_map, _) = get_assignment_map_from_checkpoint( - tvars, init_checkpoint) - tf.train.init_from_checkpoint(init_checkpoint, assignment_map) - - predictions = { - "unique_id": unique_ids, - "seq_out": model.get_sequence_output() - } - - output_spec = tf.contrib.tpu.TPUEstimatorSpec( - mode=mode, predictions=predictions, scaffold_fn=scaffold_fn) - return output_spec - - return model_fn - - batch_size = 8 - seq_len = 5 - xmb = np.random.randint( - 106, bert_config.vocab_size - 106, (batch_size, seq_len)) - xmb2 = np.random.randint(0, 2, (batch_size, seq_len), dtype=np.int32) - xmb3 = np.random.randint(0, 2, (batch_size, seq_len), dtype=np.int32) - - def input_fn(params): - d = tf.data.Dataset.from_tensor_slices({ - "unique_ids": - tf.constant([0, 1, 2], shape=[batch_size], dtype=tf.int32), - "input_ids": - tf.constant( - xmb, shape=[batch_size, seq_len], - dtype=tf.int32), - "input_mask": - tf.constant( - xmb2, - shape=[batch_size, seq_len], - dtype=tf.int32), - "input_type_ids": - tf.constant( - xmb3, - shape=[batch_size, seq_len], - dtype=tf.int32), - }) - - d = d.batch(batch_size=batch_size, drop_remainder=False) - return d - - model_fn = model_fn_builder( - bert_config=bert_config, init_checkpoint=init_checkpoint) - is_per_host = tf.contrib.tpu.InputPipelineConfig.PER_HOST_V2 - run_config = tf.contrib.tpu.RunConfig(master=None, tpu_config=tf.contrib.tpu.TPUConfig(num_shards=8, - per_host_input_for_training=is_per_host)) - estimator = tf.contrib.tpu.TPUEstimator(use_tpu=False, model_fn=model_fn, config=run_config, - predict_batch_size=batch_size) - tf_result = [r for r in estimator.predict(input_fn)] - - import keras.backend as K - - K.set_learning_phase(0) - my_model = load_google_bert(base_location, max_len=seq_len) - - from data.dataset import create_attention_mask, generate_pos_ids - - pos = generate_pos_ids(batch_size, seq_len) - k_mask = create_attention_mask(xmb2, False, None, None, True) - bert_encoder = BERTTextEncoder(base_location + 'vocab.txt') - for b in range(len(xmb)): - xmb[b] = np.array(bert_encoder.standardize_ids(xmb[b].tolist())) - k_output = my_model.predict([xmb, xmb3, pos, k_mask]) - max_max = 0 - for i in range(batch_size): - if k_mask[i].mean() != 0: # TODO (when mask == full zero, keras_res != tf_res) - new_max = np.abs(k_output[i] - tf_result[i]['seq_out']).max() - if new_max > max_max: - max_max = new_max - # TODO reduce the error (I think it's because of the LayerNorm) - assert max_max < 5e-5, max_max - - -if __name__ == "__main__": - unittest.main() -import os -import re -import random -import tempfile -import numpy as np -from unittest import TestCase -from typing import Optional, List -from data.vocab import TextEncoder -from data.dataset import (create_attention_mask, Sentence, pad, check_sent_len, - msk_sentence, SentenceTaskData, TokenTaskData) -from data.lm_dataset import _create_batch, _grab_line, make_next_token_prediction, dummy_lm_generator - - -class TestData(TestCase): - def __init__(self, method_name: str = 'runTest') -> None: - super().__init__(methodName=method_name) - self.vocab_size = 100 - - def setUp(self) -> None: - pass - - def generate_random_seq(self, length: int, max: Optional[int] = None) -> List[int]: - return [random.randrange(self.vocab_size if max is None else max) for i in range(length)] - - def generate_random_mask(self, length: int) -> List[bool]: - return [random.random() < 0.5 for _ in range(length)] - - def generate_sentence(self, length: int) -> Sentence: - return Sentence(self.generate_random_seq(length), [True] * length, [0] * length, - {'lm': TokenTaskData(self.generate_random_seq(length), - self.generate_random_mask(length))}, {}) - - def test_pad(self): - bert_sent = self.generate_sentence(5) - lm_orig = bert_sent.token_classification['lm'] - pad_id = self.vocab_size + TextEncoder.PAD_OFFSET - padded_sent = pad(bert_sent, pad_id, 10) - lm = padded_sent.token_classification['lm'] - assert len(padded_sent.padding_mask) == len(padded_sent.segments) == len(lm.target_mask) == len( - padded_sent.tokens) == len(lm.target) == 10 - for i in range(5): - assert padded_sent.padding_mask[i] - assert padded_sent.segments[i] == bert_sent.segments[i] - assert lm.target[i] == lm_orig.target[i] - assert lm.target_mask[i] == lm_orig.target_mask[i] - assert padded_sent.tokens[i] == bert_sent.tokens[i] - for i in range(5, 10): - assert not padded_sent.padding_mask[i] - assert padded_sent.segments[i] == 0 - assert lm.target[i] == 0 - assert lm.target_mask[i] == 0 - assert padded_sent.tokens[i] == pad_id - - def test_create_batch(self): - max_len = 64 - pad_id = self.vocab_size + TextEncoder.PAD_OFFSET - for batch_size in [32, 1]: - sentences = [] - for i in range(batch_size): - sentences.append(self.generate_sentence( - random.randint(1, max_len - 5))) - for i in range(2): - if i == 0: - batch = _create_batch(sentences, pad_id, max_len) - else: - batch = _create_batch(sentences, pad_id) - max_len = max([len(sent.tokens) for sent in sentences]) - assert batch.tokens.shape == (batch_size, max_len) - assert batch.tokens.dtype == np.int32 - assert batch.segments.shape == (batch_size, max_len) - assert batch.segments.dtype == np.int32 - assert batch.padding_mask.shape == (batch_size, max_len) - assert batch.padding_mask.dtype == np.int8 - assert batch.token_classification['lm'].target.shape == ( - batch_size, max_len) - assert batch.token_classification['lm'].target.dtype == np.int32 - assert batch.token_classification['lm'].target_mask.shape == ( - batch_size, max_len) - assert batch.token_classification['lm'].target_mask.dtype == np.int32 - - def test_msk_sentence(self): - seq_len = 32 - sentence = self.generate_random_seq(seq_len) - - masked_sentence = msk_sentence(sentence, vocab_size=self.vocab_size, keep_prob=1.0, mask_prob=0.0, - rand_prob=0.0) - assert len(sentence) == len(masked_sentence.tokens) == len( - masked_sentence.token_classification['lm'].target) == len( - masked_sentence.token_classification['lm'].target_mask) - for i in range(seq_len): - assert masked_sentence.tokens[i] == sentence[i] - assert masked_sentence.token_classification['lm'].target[i] == 0 - assert masked_sentence.token_classification['lm'].target_mask[i] == 0 - - masked_sentence = msk_sentence(sentence, vocab_size=self.vocab_size, keep_prob=0.0, mask_prob=1.0, - rand_prob=0.0) - assert len(sentence) == len(masked_sentence.tokens) == len( - masked_sentence.token_classification['lm'].target) == len( - masked_sentence.token_classification['lm'].target_mask) - for i in range(seq_len): - assert masked_sentence.tokens[i] == self.vocab_size + \ - TextEncoder.MSK_OFFSET - assert masked_sentence.token_classification['lm'].target[i] == sentence[i] - assert masked_sentence.token_classification['lm'].target_mask[i] == 1 - - masked_sentence = msk_sentence(sentence, vocab_size=self.vocab_size, keep_prob=0.0, mask_prob=0.0, - rand_prob=0.0) - assert len(sentence) == len(masked_sentence.tokens) == len( - masked_sentence.token_classification['lm'].target) == len( - masked_sentence.token_classification['lm'].target_mask) - for i in range(seq_len): - assert masked_sentence.tokens[i] == sentence[i] - assert masked_sentence.token_classification['lm'].target[i] == sentence[i] - assert masked_sentence.token_classification['lm'].target_mask[i] == 1 - - sentence = [index + self.vocab_size for index in sentence] - masked_sentence = msk_sentence(sentence, vocab_size=self.vocab_size, keep_prob=0.0, mask_prob=0.0, - rand_prob=1.0) - assert len(sentence) == len(masked_sentence.tokens) == len( - masked_sentence.token_classification['lm'].target) == len( - masked_sentence.token_classification['lm'].target_mask) - for i in range(seq_len): - assert masked_sentence.tokens[i] != sentence[i] - assert masked_sentence.token_classification['lm'].target[i] == sentence[i] - assert masked_sentence.token_classification['lm'].target_mask[i] == 1 - - def test_make_causal(self): - pad_id = self.vocab_size + TextEncoder.PAD_OFFSET - orig_sentence = self.generate_sentence(5) - result = _create_batch( - make_next_token_prediction([orig_sentence]), pad_id) - lm = result.token_classification['lm'] - assert (np.array(orig_sentence.tokens)[1:] == lm.target[0, :-1]).all() - assert lm.target[0, -1] == 0 - assert (lm.target_mask[0, :-1] == 1).all() - assert lm.target_mask[0, -1] == 0 - - def test_grab_line(self): - fp1 = tempfile.TemporaryFile(mode='w+') - fp2 = tempfile.TemporaryFile(mode='w+') - for i in range(100): - fp1.write('hello world {}!\n'.format(i)) - fp2.write('hi universe {}!\n'.format(i)) - fp1.seek(0) - fp2.seek(0) - for i in range(200): - line = _grab_line([fp1], os.stat( - fp1.fileno()).st_size, jump_prob=0.0) - assert line == 'hello world {}!\n'.format(i % 100) - fp1.seek(0) - i = j = 0 - for _ in range(200): - line = _grab_line([fp1, fp2], os.stat( - fp1.fileno()).st_size, jump_prob=0.0) - if line.startswith('hello'): - assert line == 'hello world {}!\n'.format(i % 100) - i += 1 - else: - assert line == 'hi universe {}!\n'.format(j % 100) - j += 1 - fp1.seek(0) - fp2.seek(0) - pattern = re.compile('(hello world)|(hi universe) \d+!\\n') - for _ in range(200): - line = _grab_line([fp1, fp2], os.stat( - fp1.fileno()).st_size, jump_prob=1.0) - assert pattern.match(line) is not None - fp1.close() - fp2.close() - - def test_create_mask(self): - batch_size = 3 - length = 5 - pad_mask = np.array([[1, 1, 1, 0, 0], [1, 1, 0, 0, 1], [ - 0, 0, 0, 0, 0]], dtype=np.int8) - is_causal = False - mask = create_attention_mask(pad_mask, is_causal) - assert mask.shape == (batch_size, 1, length, length) - assert mask.dtype == np.float32 - assert (mask[0, 0] == np.array([[1, 1, 1, 0, 0], - [1, 1, 1, 0, 0], - [1, 1, 1, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]], dtype=np.float32)).all() - assert (mask[1, 0] == np.array([[1, 1, 0, 0, 1], - [1, 1, 0, 0, 1], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [1, 1, 0, 0, 1]], dtype=np.float32)).all() - assert (mask[2, 0] == np.zeros( - (length, length), dtype=np.float32)).all() - - is_causal = True - mask = create_attention_mask(pad_mask, is_causal) - assert mask.shape == (batch_size, 1, length, length) - assert mask.dtype == np.float32 - assert (mask[0, 0] == np.array([[1, 0, 0, 0, 0], - [1, 1, 0, 0, 0], - [1, 1, 1, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0]], dtype=np.float32)).all() - assert (mask[1, 0] == np.array([[1, 0, 0, 0, 0], - [1, 1, 0, 0, 0], - [0, 0, 0, 0, 0], - [0, 0, 0, 0, 0], - [1, 1, 0, 0, 1]], dtype=np.float32)).all() - assert (mask[2, 0] == np.zeros( - (length, length), dtype=np.float32)).all() - - is_causal = False - mask = create_attention_mask(None, is_causal, batch_size, length) - assert mask.shape == (batch_size, 1, length, length) - assert mask.dtype == np.float32 - for i in range(3): - assert (mask[i, 0] == np.ones( - (length, length), dtype=np.float32)).all() - - is_causal = True - mask = create_attention_mask(None, is_causal, batch_size, length) - assert mask.shape == (batch_size, 1, length, length) - assert mask.dtype == np.float32 - tri = np.array([[1, 0, 0, 0, 0], - [1, 1, 0, 0, 0], - [1, 1, 1, 0, 0], - [1, 1, 1, 1, 0], - [1, 1, 1, 1, 1]], dtype=np.float32) - for i in range(3): - assert (mask[i, 0] == tri).all() - - def test_check_sent_len(self): - orig_length = 10 - class_target = 2 - original_sent = self.generate_sentence(orig_length) - original_sent.sentence_classification['sc'] = SentenceTaskData( - class_target, 0) - original_sent.sentence_classification['sc_ok'] = SentenceTaskData( - class_target + 1, 5) - assert check_sent_len(original_sent, min_len=10, - max_len=None) is not None - assert check_sent_len(original_sent, min_len=11, max_len=None) is None - res = check_sent_len(original_sent, min_len=None, - max_len=7, from_end=False) - assert len(res.tokens) == len(res.padding_mask) == len(res.token_classification['lm'].target) == len( - res.token_classification['lm'].target_mask) == 7 - assert res.tokens[0] == original_sent.tokens[3] - assert set(res.sentence_classification.keys()) == {'sc_ok'} - assert res.sentence_classification['sc_ok'].target == class_target + 1 - assert res.sentence_classification['sc_ok'].target_index == 5 - 3 - - def test_generation(self): - lm_generator = dummy_lm_generator(self.vocab_size, 32, 32, 100) - for i, sentence_batch in enumerate(lm_generator): - assert sentence_batch.tokens.shape == (32, 32) - assert i == 100 // 32 - 1 -from data.dataset import create_attention_mask, TaskMetadata, TaskWeightScheduler -from transformer.layers import MultiHeadAttention, LayerNormalization, Gelu -from transformer.load import load_openai_transformer -from transformer.model import create_transformer -from transformer.train import train_model, load_model -from data.lm_dataset import dummy_lm_generator -from unittest import TestCase, SkipTest -from data.vocab import TextEncoder -from keras import backend as K -from importlib import reload -import numpy as np -import keras -import os -import uuid -import json - -from transformer import refresh_keras_backend -# tpu mode doesn't support switch backend to theano -refresh_keras_backend(use_tpu=False) - - -def set_keras_backend(backend): - global K - if K.backend() != backend: - os.environ['KERAS_BACKEND'] = backend - reload(K) - assert K.backend() == backend - - -class TestTransformer(TestCase): - def __init__(self, method_name: str = 'runTest') -> None: - super().__init__(methodName=method_name) - self.vocab_size = 23 - self.num_heads = 2 - self.num_layers = 2 - self.embedding_dim = 6 - self.d_hid = 12 - self.max_len = 7 - self.supported_backends = {'tensorflow', 'theano'} - self.original_backend = K.backend() - - def tearDown(self): - set_keras_backend(self.original_backend) - - def list_backends(self, orig_backend=None): - if orig_backend is None: - orig_backend = K.backend() - # always start from the default backend - return [orig_backend] + list(self.supported_backends - {orig_backend}) - - def create_small_model(self, use_attn_mask: bool): - return create_transformer(vocab_size=self.vocab_size, - num_heads=self.num_heads, num_layers=self.num_layers, - embedding_dim=self.embedding_dim, d_hid=self.d_hid, - max_len=self.max_len, use_attn_mask=use_attn_mask) - - @staticmethod - def compare_two_models(model_a, model_b): - assert len(model_a.weights) == len(model_b.weights) - for x, y in zip(model_a.weights, model_b.weights): - assert (K.eval(x) == K.eval(y)).all() - - def test_train(self): - model = self.create_small_model(use_attn_mask=True) - batch_size = 3 - generator = dummy_lm_generator( - self.vocab_size, self.max_len, batch_size, 10000, False) - tasks_meta_data = [TaskMetadata('lm', True, self.vocab_size + TextEncoder.SPECIAL_COUNT, 0.1, - TaskWeightScheduler(True, False)), - TaskMetadata('lm_untied', True, self.vocab_size + TextEncoder.SPECIAL_COUNT, 0.3, - TaskWeightScheduler(False, True)), - TaskMetadata('count', False, 2, 0.1, TaskWeightScheduler(True, True))] - model = train_model(model, True, tasks_meta_data, generator, generator, pretrain_steps=100, pretrain_epochs=3, - finetune_steps=50, finetune_epochs=2, verbose=0) - path = '/tmp/{}.model'.format(uuid.uuid4()) - model.save_weights(path) - loaded_model = load_model(path, self.create_small_model( - use_attn_mask=True), tasks_meta_data) - assert len(model.inputs) == len(loaded_model.inputs) - assert len(model.outputs) == len(loaded_model.outputs) - self.compare_two_models(model, loaded_model) - - def test_save_load_all(self): - for backend in self.list_backends(): - try: - set_keras_backend(backend) - except ModuleNotFoundError: - continue - K.set_learning_phase(0) # test - for use_attn_mask in [True, False]: - model = self.create_small_model(use_attn_mask) - path = '/tmp/{}.model'.format(uuid.uuid4()) - try: - model.save(path) - new_model = keras.models.load_model(path, custom_objects={'MultiHeadAttention': MultiHeadAttention, - 'LayerNormalization': LayerNormalization, - 'Gelu': Gelu}) - TestTransformer.compare_two_models(model, new_model) - except Exception as e: - raise e - finally: - if os.path.exists(path): - os.remove(path) - - def test_save_load_weights(self): - for backend in self.list_backends(): - try: - set_keras_backend(backend) - except ModuleNotFoundError: - continue - K.set_learning_phase(0) # test - for use_attn_mask in [True, False]: - model = self.create_small_model(use_attn_mask) - path = '/tmp/{}.model'.format(uuid.uuid4()) - try: - model.save_weights(path) - model.load_weights(path) - except Exception as e: - raise e - finally: - if os.path.exists(path): - os.remove(path) - - def test_same_result(self): - orig_backend = K.backend() - batch_size = 3 - xmb = np.random.randint( - 0, self.vocab_size, (batch_size, self.max_len, 2), dtype=np.int32) - xmb[:, :, 1] = np.random.randint( - 0, self.max_len, (batch_size, self.max_len), dtype=np.int32) - for use_attn_mask in [True, False]: - inputs = [xmb[:, :, 0], np.zeros( - (batch_size, self.max_len), dtype=np.int32), xmb[:, :, 1]] - results_x = {} - if use_attn_mask: - mask = create_attention_mask( - None, True, batch_size, self.max_len) - inputs.append(mask) - for backend in self.list_backends(orig_backend): - try: - set_keras_backend(backend) - except ModuleNotFoundError: - continue - K.set_learning_phase(0) # test - model = self.create_small_model(use_attn_mask) - model = load_openai_transformer(use_attn_mask=use_attn_mask, max_len=self.max_len, - use_one_embedding_dropout=True) - results_x[backend] = model.predict( - inputs, batch_size=batch_size) - del model - set_keras_backend(orig_backend) - for k1 in results_x.keys(): - for k2 in results_x.keys(): - if k1 == k2: - continue - assert np.allclose( - results_x[k1], results_x[k2], atol=1.e-4, rtol=1.e-4) - - def test_different_backends_work(self): - for use_attn_mask in [True, False]: - orig_backend = K.backend() - for backend in self.list_backends(orig_backend): - try: - set_keras_backend(backend) - except ModuleNotFoundError: - pass - K.set_learning_phase(0) # test - model = self.create_small_model(use_attn_mask) - del model - set_keras_backend(orig_backend) - - def test_different_backends_load_openai(self): - try: - import tensorflow as tf - except ImportError: - raise SkipTest( - 'tensorflow is not installed, so we can not compare results with the released model') - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' - from openai.train import dropout, embed, block, find_trainable_variables - - n_vocab = 40478 - n_ctx = 7 - n_embd = 768 - embd_pdrop = 0.1 - n_layer = 12 - n_batch_train = 2 - n_transfer = 1 + 12 * 12 - - def model(X, train=False, reuse=False): - with tf.variable_scope('model', reuse=reuse): - we = tf.get_variable("we", [n_vocab + TextEncoder.SPECIAL_COUNT + n_ctx, n_embd], - initializer=tf.random_normal_initializer(stddev=0.02)) - we = dropout(we, embd_pdrop, train) - h = embed(X, we) - for layer in range(n_layer): - h = block(h, 'h%d' % layer, train=train, scale=True) - return h - - X_train = tf.placeholder(tf.int32, [n_batch_train, n_ctx, 2]) - res = model(X_train) - - params = find_trainable_variables('model') - sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True)) - sess.run(tf.global_variables_initializer()) - - with open('openai/model/params_shapes.json') as f: - shapes = json.load(f) - offsets = np.cumsum([np.prod(shape) for shape in shapes]) - init_params = [np.load('openai/model/params_{}.npy'.format(n)) - for n in range(10)] - init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1] - init_params = [param.reshape(shape) - for param, shape in zip(init_params, shapes)] - init_params[0] = init_params[0][:n_ctx] - init_params[0] = np.concatenate( - [init_params[1], (np.random.randn(TextEncoder.SPECIAL_COUNT, n_embd) * 0.02).astype(np.float32), - init_params[0]], 0) - del init_params[1] - - sess.run([p.assign(ip) for p, ip in zip( - params[:n_transfer], init_params[:n_transfer])]) - xmb = np.random.randint(0, n_vocab, (n_batch_train, n_ctx, 2)) - xmb[:, :, 1] = np.random.randint(0, n_ctx, (n_batch_train, n_ctx)) - xmb_tf = xmb.copy() - xmb_tf[:, :, 1] += n_vocab + TextEncoder.SPECIAL_COUNT - tf_result = sess.run(res, {X_train: xmb_tf}) - - for backend in self.list_backends(): - try: - set_keras_backend(backend) - except ModuleNotFoundError: - continue - K.set_learning_phase(0) - keras_model = load_openai_transformer( - use_attn_mask=True, use_one_embedding_dropout=False, max_len=n_ctx) - mask = create_attention_mask(None, True, n_batch_train, n_ctx) - k_result = keras_model.predict( - [xmb[:, :, 0], np.zeros( - (n_batch_train, n_ctx), dtype=np.int64), xmb[:, :, 1], mask], - batch_size=n_batch_train) - - if K.backend() != 'tensorflow': - assert np.allclose(tf_result, k_result, atol=1.e-4, rtol=1.e-4) - else: - assert (tf_result == k_result).all() -'''This file is for compatibility.''' - -import sys - - -def tpu_compatible(): - '''Fit the tpu problems we meet while using keras tpu model''' - if not hasattr(tpu_compatible, 'once'): - tpu_compatible.once = True - else: - return - import tensorflow as tf - import tensorflow.keras.backend as K - _version = tf.__version__.split('.') - is_correct_version = int(_version[0]) >= 1 and ( - int(_version[0]) >= 2 or int(_version[1]) >= 13) - from tensorflow.contrib.tpu.python.tpu.keras_support import KerasTPUModel - - def initialize_uninitialized_variables(): - sess = K.get_session() - uninitialized_variables = set( - [i.decode('ascii') for i in sess.run(tf.report_uninitialized_variables())]) - init_op = tf.variables_initializer( - [v for v in tf.global_variables() if v.name.split(':')[0] - in uninitialized_variables] - ) - sess.run(init_op) - - _tpu_compile = KerasTPUModel.compile - - def tpu_compile(self, - optimizer, - loss=None, - metrics=None, - loss_weights=None, - sample_weight_mode=None, - weighted_metrics=None, - target_tensors=None, - **kwargs): - if not is_correct_version: - raise ValueError( - 'You need tensorflow >= 1.3 for better keras tpu support!') - _tpu_compile(self, optimizer, loss, metrics, loss_weights, - sample_weight_mode, weighted_metrics, - target_tensors, **kwargs) - # for unknown reason, we should run this after compile sometimes - initialize_uninitialized_variables() - - KerasTPUModel.compile = tpu_compile - - -def replace_keras_to_tf_keras(): - tpu_compatible() - import tensorflow as tf - sys.modules['keras'] = tf.keras - globals()['keras'] = tf.keras - import keras.backend as K - K.tf = tf - - -def clean_keras_module(): - modules = [i for i in sys.modules.keys()] - for i in modules: - if i.split('.')[0] == 'keras': - del sys.modules[i] - - -def refresh_keras_backend(use_tpu=True): - clean_keras_module() - import keras.backend as K - if use_tpu and K.backend() != 'theano': - clean_keras_module() - replace_keras_to_tf_keras() - import keras.backend as K - return K - - -refresh_keras_backend() -import keras -import numpy as np -from data.vocab import TextEncoder -from transformer.layers import LayerNormalization - - -def _get_pos_encoding_matrix(max_len: int, d_emb: int) -> np.array: - pos_enc = np.array( - [[pos / np.power(10000, 2 * (j // 2) / d_emb) for j in range(d_emb)] if pos != 0 else np.zeros(d_emb) for pos in - range(max_len)], dtype=np.float32) - pos_enc[1:, 0::2] = np.sin(pos_enc[1:, 0::2]) # dim 2i - pos_enc[1:, 1::2] = np.cos(pos_enc[1:, 1::2]) # dim 2i+1 - return pos_enc - - -# NOTE that for vocab_size you should also add special_count -class Embedding(keras.layers.Layer): - def __init__(self, output_dim: int = 768, dropout: float = 0.1, vocab_size: int = 30000 + TextEncoder.SPECIAL_COUNT, - max_len: int = 512, trainable_pos_embedding: bool = True, use_one_dropout: bool = False, - use_embedding_layer_norm: bool = False, layer_norm_epsilon: float = 1e-5, **kwargs): - super().__init__(**kwargs) - self.max_len = max_len - self.use_one_dropout = use_one_dropout - self.output_dim = output_dim - self.dropout = dropout - self.vocab_size = vocab_size - self.trainable_pos_embedding = trainable_pos_embedding - - self.segment_emb = keras.layers.Embedding(TextEncoder.NUM_SEGMENTS, output_dim, input_length=max_len, - name='SegmentEmbedding') - if not trainable_pos_embedding: - self.pos_emb = keras.layers.Embedding(max_len, output_dim, trainable=False, input_length=max_len, - name='PositionEmbedding', - weights=[_get_pos_encoding_matrix(max_len, output_dim)]) - else: - self.pos_emb = keras.layers.Embedding( - max_len, output_dim, input_length=max_len, name='PositionEmbedding') - self.token_emb = keras.layers.Embedding( - vocab_size, output_dim, input_length=max_len, name='TokenEmbedding') - self.embedding_dropout = keras.layers.Dropout( - dropout, name='EmbeddingDropOut') - self.add_embeddings = keras.layers.Add(name='AddEmbeddings') - self.use_embedding_layer_norm = use_embedding_layer_norm - if self.use_embedding_layer_norm: - self.embedding_layer_norm = LayerNormalization(layer_norm_epsilon) - else: - self.embedding_layer_norm = None - self.layer_norm_epsilon = layer_norm_epsilon - - def compute_output_shape(self, input_shape): - return input_shape[0][0], input_shape[0][1], self.output_dim - - def get_config(self): - config = { - 'max_len': self.max_len, - 'use_one_dropout': self.use_one_dropout, - 'output_dim': self.output_dim, - 'dropout': self.dropout, - 'vocab_size': self.vocab_size, - 'trainable_pos_embedding': self.trainable_pos_embedding, - 'embedding_layer_norm': self.use_embedding_layer_norm, - 'layer_norm_epsilon': self.layer_norm_epsilon - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - def __call__(self, inputs, **kwargs): - tokens, segment_ids, pos_ids = inputs - segment_embedding = self.segment_emb(segment_ids) - pos_embedding = self.pos_emb(pos_ids) - token_embedding = self.token_emb(tokens) - if self.use_one_dropout: - summation = self.add_embeddings( - [segment_embedding, pos_embedding, token_embedding]) - if self.embedding_layer_norm: - summation = self.embedding_layer_norm(summation) - return self.embedding_dropout(summation) - summation = self.add_embeddings( - [self.embedding_dropout(segment_embedding), self.embedding_dropout(pos_embedding), - self.embedding_dropout(token_embedding)]) - if self.embedding_layer_norm: - summation = self.embedding_layer_norm(summation) - return summation -import math -import numpy as np -import keras.backend as K -from keras.layers import Dropout - - -def shape_list(x): - if K.backend() != 'theano': - tmp = K.int_shape(x) - else: - tmp = x.shape - tmp = list(tmp) - tmp[0] = -1 - return tmp - - -def split_heads(x, n: int, k: bool = False): # B, L, C - x_shape = shape_list(x) - m = x_shape[-1] - new_x_shape = x_shape[:-1] + [n, m // n] - new_x = K.reshape(x, new_x_shape) - return K.permute_dimensions(new_x, [0, 2, 3, 1] if k else [0, 2, 1, 3]) - - -def merge_heads(x): - new_x = K.permute_dimensions(x, [0, 2, 1, 3]) - x_shape = shape_list(new_x) - new_x_shape = x_shape[:-2] + [np.prod(x_shape[-2:])] - return K.reshape(new_x, new_x_shape) - - -# q and v are B, H, L, C//H ; k is B, H, C//H, L ; mask is B, 1, L, L -def scaled_dot_product_attention_tf(q, k, v, attn_mask, attention_dropout: float, neg_inf: float): - w = K.batch_dot(q, k) # w is B, H, L, L - w = w / K.sqrt(K.cast(shape_list(v)[-1], K.floatx())) - if attn_mask is not None: - w = attn_mask * w + (1.0 - attn_mask) * neg_inf - w = K.softmax(w) - w = Dropout(attention_dropout)(w) - return K.batch_dot(w, v) # it is B, H, L, C//H [like v] - - -def scaled_dot_product_attention_th(q, k, v, attn_mask, attention_dropout: float, neg_inf: float): - w = theano_matmul(q, k) - w = w / K.sqrt(K.cast(shape_list(v)[-1], K.floatx())) - if attn_mask is not None: - attn_mask = K.repeat_elements(attn_mask, shape_list(v)[1], 1) - w = attn_mask * w + (1.0 - attn_mask) * neg_inf - w = K.T.exp(w - w.max()) / K.T.exp(w - w.max()).sum(axis=-1, keepdims=True) - w = Dropout(attention_dropout)(w) - return theano_matmul(w, v) - - -def multihead_attention(x, attn_mask, n_head: int, n_state: int, attention_dropout: float, neg_inf: float): - _q, _k, _v = x[:, :, :n_state], x[:, :, - n_state:2 * n_state], x[:, :, -n_state:] - q = split_heads(_q, n_head) # B, H, L, C//H - k = split_heads(_k, n_head, k=True) # B, H, C//H, L - v = split_heads(_v, n_head) # B, H, L, C//H - if K.backend() == 'tensorflow': - a = scaled_dot_product_attention_tf( - q, k, v, attn_mask, attention_dropout, neg_inf) - else: - a = scaled_dot_product_attention_th( - q, k, v, attn_mask, attention_dropout, neg_inf) - return merge_heads(a) - - -def gelu(x): - return 0.5 * x * (1 + K.tanh(math.sqrt(2 / math.pi) * (x + 0.044715 * K.pow(x, 3)))) - - -# https://stackoverflow.com/a/42194662/2796084 -def theano_matmul(a, b, _left=False): - assert a.ndim == b.ndim - ndim = a.ndim - assert ndim >= 2 - if _left: - b, a = a, b - if ndim == 2: - return K.T.dot(a, b) - else: - # If a is broadcastable but b is not. - if a.broadcastable[0] and not b.broadcastable[0]: - # Scan b, but hold a steady. - # Because b will be passed in as a, we need to left multiply to maintain - # matrix orientation. - output, _ = K.theano.scan(theano_matmul, sequences=[ - b], non_sequences=[a[0], 1]) - # If b is broadcastable but a is not. - elif b.broadcastable[0] and not a.broadcastable[0]: - # Scan a, but hold b steady. - output, _ = K.theano.scan(theano_matmul, sequences=[ - a], non_sequences=[b[0]]) - # If neither dimension is broadcastable or they both are. - else: - # Scan through the sequences, assuming the shape for this dimension is equal. - output, _ = K.theano.scan(theano_matmul, sequences=[a, b]) - return output -import math -import keras.backend as K -from keras.layers import Layer -from keras.initializers import Ones, Zeros -from transformer.funcs import gelu, multihead_attention - - -class MultiHeadAttention(Layer): - def __init__(self, n_head: int, n_state: int, attention_dropout: float, use_attn_mask: bool, neg_inf: float, - **kwargs) -> None: - super().__init__(**kwargs) - self.n_head = n_head - self.n_state = n_state - self.attention_dropout = attention_dropout - self.use_attn_mask = use_attn_mask - self.neg_inf = neg_inf - - def compute_output_shape(self, input_shape): - x = input_shape[0] if self.use_attn_mask else input_shape - return x[0], x[1], x[2] // 3 - - def call(self, inputs, **kwargs): - x = inputs[0] if self.use_attn_mask else inputs - attn_mask = inputs[1] if self.use_attn_mask else None - return multihead_attention(x, attn_mask, self.n_head, self.n_state, self.attention_dropout, self.neg_inf) - - def get_config(self): - config = { - 'n_head': self.n_head, - 'n_state': self.n_state, - 'attention_dropout': self.attention_dropout, - 'use_attn_mask': self.use_attn_mask, - 'neg_inf': self.neg_inf, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class LayerNormalization(Layer): - def __init__(self, eps: float = 1e-5, **kwargs) -> None: - self.eps = eps - super().__init__(**kwargs) - - def build(self, input_shape): - self.gamma = self.add_weight( - name='gamma', shape=input_shape[-1:], initializer=Ones(), trainable=True) - self.beta = self.add_weight( - name='beta', shape=input_shape[-1:], initializer=Zeros(), trainable=True) - super().build(input_shape) - - def call(self, x, **kwargs): - u = K.mean(x, axis=-1, keepdims=True) - s = K.mean(K.square(x - u), axis=-1, keepdims=True) - z = (x - u) / K.sqrt(s + self.eps) - return self.gamma * z + self.beta - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'eps': self.eps, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class Gelu(Layer): - def __init__(self, accurate: bool = False, **kwargs): - super().__init__(**kwargs) - self.accurate = accurate - - def call(self, inputs, **kwargs): - if not self.accurate: - return gelu(inputs) - if K.backend() == 'tensorflow': - erf = K.tf.erf - else: - erf = K.T.erf - return inputs * 0.5 * (1.0 + erf(inputs / math.sqrt(2.0))) - - def compute_output_shape(self, input_shape): - return input_shape - - def get_config(self): - config = { - 'accurate': self.accurate, - } - base_config = super().get_config() - return dict(list(base_config.items()) + list(config.items())) -import json -import keras -import numpy as np -import tensorflow as tf -import keras.backend as K -from data.vocab import TextEncoder -from google_bert.modeling import BertConfig -from transformer.model import create_transformer - - -def load_openai_transformer(path: str = './openai/model/', use_attn_mask: bool = True, - use_one_embedding_dropout: bool = False, max_len: int = 512) -> keras.Model: - with open(path + 'params_shapes.json') as f: - shapes = json.load(f) - offsets = np.cumsum([np.prod(shape) for shape in shapes]) - init_params = [np.load(path + 'params_{}.npy'.format(n)) - for n in range(10)] - init_params = np.split(np.concatenate(init_params, 0), offsets)[:-1] - init_params = [param.reshape(shape) - for param, shape in zip(init_params, shapes)] - init_params[0] = init_params[0][:min(512, max_len)] - # add special token embedding to token embedding - init_params[1] = np.concatenate( - (init_params[1], np.random.randn(TextEncoder.SPECIAL_COUNT, 768).astype(np.float32) * 0.02), axis=0) - init_params = [np.zeros((TextEncoder.NUM_SEGMENTS, 768)).astype( - np.float32)] + init_params # segment embedding - model = create_transformer(embedding_dim=768, embedding_dropout=0.1, vocab_size=40478, - max_len=min(512, max_len), use_attn_mask=use_attn_mask, trainable_pos_embedding=True, - num_heads=12, num_layers=12, use_one_embedding_dropout=use_one_embedding_dropout, - d_hid=4 * 768, attention_dropout=0.1, residual_dropout=0.1) - model.set_weights(init_params) - return model - - -def load_google_bert(base_location: str = './google_bert/downloads/multilingual_L-12_H-768_A-12/', - use_attn_mask: bool = True, max_len: int = 512, verbose: bool = False) -> keras.Model: - bert_config = BertConfig.from_json_file(base_location + 'bert_config.json') - init_checkpoint = base_location + 'bert_model.ckpt' - var_names = tf.train.list_variables(init_checkpoint) - check_point = tf.train.load_checkpoint(init_checkpoint) - vocab_size = bert_config.vocab_size - \ - TextEncoder.BERT_SPECIAL_COUNT - TextEncoder.BERT_UNUSED_COUNT - model = create_transformer(embedding_layer_norm=True, neg_inf=-10000.0, use_attn_mask=use_attn_mask, - vocab_size=vocab_size, accurate_gelu=True, layer_norm_epsilon=1e-12, max_len=max_len, - use_one_embedding_dropout=True, d_hid=bert_config.intermediate_size, - embedding_dim=bert_config.hidden_size, num_layers=bert_config.num_hidden_layers, - num_heads=bert_config.num_attention_heads, - residual_dropout=bert_config.hidden_dropout_prob, - attention_dropout=bert_config.attention_probs_dropout_prob) - if K.backend() == 'tensorflow': - weights = [np.zeros(w.shape) for w in model.weights] - else: - weights = [np.zeros(w.get_value().shape) for w in model.weights] - for var_name, _ in var_names: - w_id = None - qkv = None - unsqueeze = False - parts = var_name.split('/') - first_vars_size = 5 - if parts[1] == 'embeddings': - n = parts[-1] - if n == 'token_type_embeddings': - w_id = 0 - elif n == 'position_embeddings': - w_id = 1 - elif n == 'word_embeddings': - w_id = 2 - elif n == 'gamma': - w_id = 3 - elif n == 'beta': - w_id = 4 - else: - raise ValueError() - elif parts[2].startswith('layer_'): - layer_number = int(parts[2][len('layer_'):]) - if parts[3] == 'attention': - if parts[-1] == 'beta': - w_id = first_vars_size + layer_number * 12 + 5 - elif parts[-1] == 'gamma': - w_id = first_vars_size + layer_number * 12 + 4 - elif parts[-2] == 'dense': - if parts[-1] == 'bias': - w_id = first_vars_size + layer_number * 12 + 3 - elif parts[-1] == 'kernel': - w_id = first_vars_size + layer_number * 12 + 2 - unsqueeze = True - else: - raise ValueError() - elif parts[-2] == 'key' or parts[-2] == 'query' or parts[-2] == 'value': - w_id = first_vars_size + layer_number * \ - 12 + (0 if parts[-1] == 'kernel' else 1) - unsqueeze = parts[-1] == 'kernel' - qkv = parts[-2][0] - else: - raise ValueError() - elif parts[3] == 'intermediate': - if parts[-1] == 'bias': - w_id = first_vars_size + layer_number * 12 + 7 - elif parts[-1] == 'kernel': - w_id = first_vars_size + layer_number * 12 + 6 - unsqueeze = True - else: - raise ValueError() - elif parts[3] == 'output': - if parts[-1] == 'beta': - w_id = first_vars_size + layer_number * 12 + 11 - elif parts[-1] == 'gamma': - w_id = first_vars_size + layer_number * 12 + 10 - elif parts[-1] == 'bias': - w_id = first_vars_size + layer_number * 12 + 9 - elif parts[-1] == 'kernel': - w_id = first_vars_size + layer_number * 12 + 8 - unsqueeze = True - else: - raise ValueError() - - if w_id is not None and qkv is None: - if verbose: - print(var_name, ' -> ', model.weights[w_id].name) - if w_id == 1: # pos embedding - weights[w_id][:max_len, :] = check_point.get_tensor(var_name)[:max_len, - :] if not unsqueeze else check_point.get_tensor(var_name)[ - None, :max_len, :] - elif w_id == 2: # word embedding - # ours: unk, [vocab], pad, msk(mask), bos(cls), del(use sep again), eos(sep) - # theirs: pad, 99 unused, unk, cls, sep, mask, [vocab] - saved = check_point.get_tensor( - var_name) # vocab_size, emb_size - # weights[our_position] = saved[their_position] - weights[w_id][0] = saved[1 + - TextEncoder.BERT_UNUSED_COUNT] # unk - weights[w_id][1:vocab_size] = saved[-vocab_size + 1:] - weights[w_id][vocab_size + TextEncoder.PAD_OFFSET] = saved[0] - weights[w_id][vocab_size + TextEncoder.MSK_OFFSET] = saved[4 + - TextEncoder.BERT_UNUSED_COUNT] - weights[w_id][vocab_size + TextEncoder.BOS_OFFSET] = saved[2 + - TextEncoder.BERT_UNUSED_COUNT] - weights[w_id][vocab_size + TextEncoder.DEL_OFFSET] = saved[3 + - TextEncoder.BERT_UNUSED_COUNT] - weights[w_id][vocab_size + TextEncoder.EOS_OFFSET] = saved[3 + - TextEncoder.BERT_UNUSED_COUNT] - else: - weights[w_id][:] = check_point.get_tensor(var_name) if not unsqueeze else \ - check_point.get_tensor(var_name)[ - None, ...] - elif w_id is not None: - if verbose: - print(var_name, ' -> ', model.weights[w_id].name, '::', qkv) - p = {'q': 0, 'k': 1, 'v': 2}[qkv] - if weights[w_id].ndim == 3: - dim_size = weights[w_id].shape[1] - weights[w_id][0, :, p * dim_size:(p + 1) * dim_size] = check_point.get_tensor( - var_name) if not unsqueeze else \ - check_point.get_tensor(var_name)[ - None, ...] - else: - dim_size = weights[w_id].shape[0] // 3 - weights[w_id][p * dim_size:(p + 1) * - dim_size] = check_point.get_tensor(var_name) - else: - if verbose: - # TODO pooler, cls/predictions, cls/seq_relationship - print('not mapped: ', var_name) - model.set_weights(weights) - return model -import keras -import keras.backend as K -from data.vocab import TextEncoder -from transformer.embedding import Embedding -from keras.layers import Conv1D, Dropout, Add, Input -from transformer.layers import MultiHeadAttention, Gelu, LayerNormalization - - -class MultiHeadSelfAttention: - def __init__(self, n_state: int, n_head: int, attention_dropout: float, - use_attn_mask: bool, layer_id: int, neg_inf: float) -> None: - assert n_state % n_head == 0 - self.c_attn = Conv1D( - 3 * n_state, 1, name='layer_{}/c_attn'.format(layer_id)) - self.attn = MultiHeadAttention(n_head, n_state, attention_dropout, use_attn_mask, - neg_inf, name='layer_{}/self_attention'.format(layer_id)) - self.c_attn_proj = Conv1D( - n_state, 1, name='layer_{}/c_attn_proj'.format(layer_id)) - - def __call__(self, x, mask): - output = self.c_attn(x) - output = self.attn( - output) if mask is None else self.attn([output, mask]) - return self.c_attn_proj(output) - - -class PositionWiseFF: - def __init__(self, n_state: int, d_hid: int, layer_id: int, accurate_gelu: bool) -> None: - self.c_fc = Conv1D(d_hid, 1, name='layer_{}/c_fc'.format(layer_id)) - self.activation = Gelu(accurate=accurate_gelu, - name='layer_{}/gelu'.format(layer_id)) - self.c_ffn_proj = Conv1D( - n_state, 1, name='layer_{}/c_ffn_proj'.format(layer_id)) - - def __call__(self, x): - output = self.activation(self.c_fc(x)) - return self.c_ffn_proj(output) - - -class EncoderLayer: - def __init__(self, n_state: int, n_head: int, d_hid: int, residual_dropout: float, attention_dropout: float, - use_attn_mask: bool, layer_id: int, neg_inf: float, ln_epsilon: float, accurate_gelu: bool) -> None: - self.attention = MultiHeadSelfAttention( - n_state, n_head, attention_dropout, use_attn_mask, layer_id, neg_inf) - self.drop1 = Dropout( - residual_dropout, name='layer_{}/ln_1_drop'.format(layer_id)) - self.add1 = Add(name='layer_{}/ln_1_add'.format(layer_id)) - self.ln1 = LayerNormalization( - ln_epsilon, name='layer_{}/ln_1'.format(layer_id)) - self.ffn = PositionWiseFF(n_state, d_hid, layer_id, accurate_gelu) - self.drop2 = Dropout( - residual_dropout, name='layer_{}/ln_2_drop'.format(layer_id)) - self.add2 = Add(name='layer_{}/ln_2_add'.format(layer_id)) - self.ln2 = LayerNormalization( - ln_epsilon, name='layer_{}/ln_2'.format(layer_id)) - - def __call__(self, x, mask): - a = self.attention(x, mask) - n = self.ln1(self.add1([x, self.drop1(a)])) - f = self.ffn(n) - return self.ln2(self.add2([n, self.drop2(f)])) - - -def create_transformer(embedding_dim: int = 768, embedding_dropout: float = 0.1, vocab_size: int = 30000, - max_len: int = 512, trainable_pos_embedding: bool = True, num_heads: int = 12, - num_layers: int = 12, attention_dropout: float = 0.1, use_one_embedding_dropout: bool = False, - d_hid: int = 768 * 4, residual_dropout: float = 0.1, use_attn_mask: bool = True, - embedding_layer_norm: bool = False, neg_inf: float = -1e9, layer_norm_epsilon: float = 1e-5, - accurate_gelu: bool = False) -> keras.Model: - vocab_size += TextEncoder.SPECIAL_COUNT - tokens = Input(batch_shape=(None, max_len), - name='token_input', dtype='int32') - segment_ids = Input(batch_shape=(None, max_len), - name='segment_input', dtype='int32') - pos_ids = Input(batch_shape=(None, max_len), - name='position_input', dtype='int32') - attn_mask = Input(batch_shape=(None, 1, max_len, max_len), name='attention_mask_input', - dtype=K.floatx()) if use_attn_mask else None - inputs = [tokens, segment_ids, pos_ids] - embedding_layer = Embedding(embedding_dim, embedding_dropout, vocab_size, max_len, trainable_pos_embedding, - use_one_embedding_dropout, embedding_layer_norm, layer_norm_epsilon) - x = embedding_layer(inputs) - for i in range(num_layers): - x = EncoderLayer(embedding_dim, num_heads, d_hid, residual_dropout, - attention_dropout, use_attn_mask, i, neg_inf, layer_norm_epsilon, accurate_gelu)(x, attn_mask) - if use_attn_mask: - inputs.append(attn_mask) - return keras.Model(inputs=inputs, outputs=[x], name='Transformer') -__author__ = "Jakob Aungiers" -__copyright__ = "Jakob Aungiers 2018" -__version__ = "2.0.0" -__license__ = "MIT" - -import os -import json -import time -import math -import matplotlib.pyplot as plt -from core.data_processor import DataLoader -from core.model import Model - - -def plot_results(predicted_data, true_data): - fig = plt.figure(facecolor='white') - ax = fig.add_subplot(111) - ax.plot(true_data, label='True Data') - plt.plot(predicted_data, label='Prediction') - plt.legend() - plt.show() - - -def plot_results_multiple(predicted_data, true_data, prediction_len): - fig = plt.figure(facecolor='white') - ax = fig.add_subplot(111) - ax.plot(true_data, label='True Data') - # Pad the list of predictions to shift it in the graph to it's correct start - for i, data in enumerate(predicted_data): - padding = [None for p in range(i * prediction_len)] - plt.plot(padding + data, label='Prediction') - plt.legend() - plt.show() - - -def main(): - configs = json.load(open('config.json', 'r')) - if not os.path.exists(configs['model']['save_dir']): - os.makedirs(configs['model']['save_dir']) - - data = DataLoader( - os.path.join('data', configs['data']['filename']), - configs['data']['train_test_split'], - configs['data']['columns'] - ) - - model = Model() - model.build_model(configs) - x, y = data.get_train_data( - seq_len=configs['data']['sequence_length'], - normalise=configs['data']['normalise'] - ) - - ''' - # in-memory training - model.train( - x, - y, - epochs = configs['training']['epochs'], - batch_size = configs['training']['batch_size'], - save_dir = configs['model']['save_dir'] - ) - ''' - # out-of memory generative training - steps_per_epoch = math.ceil( - (data.len_train - configs['data']['sequence_length']) / configs['training']['batch_size']) - model.train_generator( - data_gen=data.generate_train_batch( - seq_len=configs['data']['sequence_length'], - batch_size=configs['training']['batch_size'], - normalise=configs['data']['normalise'] - ), - epochs=configs['training']['epochs'], - batch_size=configs['training']['batch_size'], - steps_per_epoch=steps_per_epoch, - save_dir=configs['model']['save_dir'] - ) - - x_test, y_test = data.get_test_data( - seq_len=configs['data']['sequence_length'], - normalise=configs['data']['normalise'] - ) - - predictions = model.predict_sequences_multiple( - x_test, configs['data']['sequence_length'], configs['data']['sequence_length']) - # predictions = model.predict_sequence_full(x_test, configs['data']['sequence_length']) - # predictions = model.predict_point_by_point(x_test) - - plot_results_multiple(predictions, y_test, - configs['data']['sequence_length']) - # plot_results(predictions, y_test) - - -if __name__ == '__main__': - main() -# this lstm core module implementation provides an implementation -# of time series prediction using a lstm approach. It is provided -# as is with no warranties or support. - -__author__ = "Jakob Aungiers" -__copyright__ = "Jakob Aungiers 2018" -__version__ = "2.0.0" -__license__ = "MIT" - -import warnings -warnings.filterwarnings("ignore") # ignore messy numpy warnings -import math -import numpy as np -import pandas as pd - - -class DataLoader(): - """A class for loading and transforming data for the lstm model""" - - def __init__(self, filename, split, cols): - dataframe = pd.read_csv(filename) - i_split = int(len(dataframe) * split) - self.data_train = dataframe.get(cols).values[:i_split] - self.data_test = dataframe.get(cols).values[i_split:] - self.len_train = len(self.data_train) - self.len_test = len(self.data_test) - self.len_train_windows = None - - def get_test_data(self, seq_len, normalise): - ''' - Create x, y test data windows - Warning: batch method, not generative, make sure you have enough memory to - load data, otherwise reduce size of the training split. - ''' - data_windows = [] - for i in range(self.len_test - seq_len): - data_windows.append(self.data_test[i:i+seq_len]) - - data_windows = np.array(data_windows).astype(float) - data_windows = self.normalise_windows( - data_windows, single_window=False) if normalise else data_windows - - x = data_windows[:, :-1] - y = data_windows[:, -1, [0]] - return x, y - - def get_train_data(self, seq_len, normalise): - ''' - Create x, y train data windows - Warning: batch method, not generative, make sure you have enough memory to - load data, otherwise use generate_training_window() method. - ''' - data_x = [] - data_y = [] - for i in range(self.len_train - seq_len): - x, y = self._next_window(i, seq_len, normalise) - data_x.append(x) - data_y.append(y) - return np.array(data_x), np.array(data_y) - - def generate_train_batch(self, seq_len, batch_size, normalise): - '''Yield a generator of training data from filename on given list of cols split for train/test''' - i = 0 - while i < (self.len_train - seq_len): - x_batch = [] - y_batch = [] - for b in range(batch_size): - if i >= (self.len_train - seq_len): - # stop-condition for a smaller final batch if data doesn't divide evenly - yield np.array(x_batch), np.array(y_batch) - i = 0 - x, y = self._next_window(i, seq_len, normalise) - x_batch.append(x) - y_batch.append(y) - i += 1 - yield np.array(x_batch), np.array(y_batch) - - def _next_window(self, i, seq_len, normalise): - '''Generates the next data window from the given index location i''' - window = self.data_train[i:i+seq_len] - window = self.normalise_windows(window, single_window=True)[ - 0] if normalise else window - x = window[:-1] - y = window[-1, [0]] - return x, y - - def normalise_windows(self, window_data, single_window=False): - '''Normalise window with a base value of zero''' - normalised_data = [] - window_data = [window_data] if single_window else window_data - for window in window_data: - normalised_window = [] - for col_i in range(window.shape[1]): - normalised_col = [((float(p) / float(window[0, col_i])) - 1) - for p in window[:, col_i]] - normalised_window.append(normalised_col) - # reshape and transpose array back into original multidimensional format - normalised_window = np.array(normalised_window).T - normalised_data.append(normalised_window) - return np.array(normalised_data) -import os -import math -import numpy as np -import datetime as dt -from numpy import newaxis -from core.utils import Timer -from keras.layers import Dense, Activation, Dropout, LSTM -from keras.models import Sequential, load_model -from keras.callbacks import EarlyStopping, ModelCheckpoint - - -class Model(): - """A class for an building and inferencing an lstm model""" - - def __init__(self): - self.model = Sequential() - - def load_model(self, filepath): - print('[Model] Loading model from file %s' % filepath) - self.model = load_model(filepath) - - def build_model(self, configs): - timer = Timer() - timer.start() - - for layer in configs['model']['layers']: - neurons = layer['neurons'] if 'neurons' in layer else None - dropout_rate = layer['rate'] if 'rate' in layer else None - activation = layer['activation'] if 'activation' in layer else None - return_seq = layer['return_seq'] if 'return_seq' in layer else None - input_timesteps = layer['input_timesteps'] if 'input_timesteps' in layer else None - input_dim = layer['input_dim'] if 'input_dim' in layer else None - - if layer['type'] == 'dense': - self.model.add(Dense(neurons, activation=activation)) - if layer['type'] == 'lstm': - self.model.add(LSTM(neurons, input_shape=( - input_timesteps, input_dim), return_sequences=return_seq)) - if layer['type'] == 'dropout': - self.model.add(Dropout(dropout_rate)) - - self.model.compile( - loss=configs['model']['loss'], optimizer=configs['model']['optimizer']) - - print('[Model] Model Compiled') - timer.stop() - - def train(self, x, y, epochs, batch_size, save_dir): - timer = Timer() - timer.start() - print('[Model] Training Started') - print('[Model] %s epochs, %s batch size' % (epochs, batch_size)) - - save_fname = os.path.join( - save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs))) - callbacks = [ - EarlyStopping(monitor='val_loss', patience=2), - ModelCheckpoint(filepath=save_fname, - monitor='val_loss', save_best_only=True) - ] - self.model.fit( - x, - y, - epochs=epochs, - batch_size=batch_size, - callbacks=callbacks - ) - self.model.save(save_fname) - - print('[Model] Training Completed. Model saved as %s' % save_fname) - timer.stop() - - def train_generator(self, data_gen, epochs, batch_size, steps_per_epoch, save_dir): - timer = Timer() - timer.start() - print('[Model] Training Started') - print('[Model] %s epochs, %s batch size, %s batches per epoch' % - (epochs, batch_size, steps_per_epoch)) - - save_fname = os.path.join( - save_dir, '%s-e%s.h5' % (dt.datetime.now().strftime('%d%m%Y-%H%M%S'), str(epochs))) - callbacks = [ - ModelCheckpoint(filepath=save_fname, - monitor='loss', save_best_only=True) - ] - self.model.fit_generator( - data_gen, - steps_per_epoch=steps_per_epoch, - epochs=epochs, - callbacks=callbacks, - workers=1 - ) - - print('[Model] Training Completed. Model saved as %s' % save_fname) - timer.stop() - - def predict_point_by_point(self, data): - # Predict each timestep given the last sequence of true data, in effect only predicting 1 step ahead each time - print('[Model] Predicting Point-by-Point...') - predicted = self.model.predict(data) - predicted = np.reshape(predicted, (predicted.size,)) - return predicted - - def predict_sequences_multiple(self, data, window_size, prediction_len): - # Predict sequence of 50 steps before shifting prediction run forward by 50 steps - print('[Model] Predicting Sequences Multiple...') - prediction_seqs = [] - for i in range(int(len(data)/prediction_len)): - curr_frame = data[i*prediction_len] - predicted = [] - for j in range(prediction_len): - predicted.append(self.model.predict( - curr_frame[newaxis, :, :])[0, 0]) - curr_frame = curr_frame[1:] - curr_frame = np.insert( - curr_frame, [window_size-2], predicted[-1], axis=0) - prediction_seqs.append(predicted) - return prediction_seqs - - def predict_sequence_full(self, data, window_size): - # Shift the window by 1 new prediction each time, re-run predictions on new window - print('[Model] Predicting Sequences Full...') - curr_frame = data[0] - predicted = [] - for i in range(len(data)): - predicted.append(self.model.predict( - curr_frame[newaxis, :, :])[0, 0]) - curr_frame = curr_frame[1:] - curr_frame = np.insert( - curr_frame, [window_size-2], predicted[-1], axis=0) - return predicted -import datetime as dt - - -class Timer(): - - def __init__(self): - self.start_dt = None - - def start(self): - self.start_dt = dt.datetime.now() - - def stop(self): - end_dt = dt.datetime.now() - print('Time taken: %s' % (end_dt - self.start_dt)) -""" - Runs a simple Neural Machine Translation model - Type `python run.py -h` for help with arguments. -""" -import os -import argparse - -from keras.callbacks import ModelCheckpoint - -from models.NMT import simpleNMT -from data.reader import Data, Vocabulary -from utils.metrics import all_acc -from utils.examples import run_examples - -cp = ModelCheckpoint("./weights/NMT.{epoch:02d}-{val_loss:.2f}.hdf5", - monitor='val_loss', - verbose=0, - save_best_only=True, - save_weights_only=True, - mode='auto') - -# create a directory if it doesn't already exist -if not os.path.exists('./weights'): - os.makedirs('./weights/') - - -def main(args): - os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" # see issue #152 - os.environ["CUDA_VISIBLE_DEVICES"] = args.gpu - # Dataset functions - input_vocab = Vocabulary('./data/human_vocab.json', padding=args.padding) - output_vocab = Vocabulary('./data/machine_vocab.json', - padding=args.padding) - - print('Loading datasets.') - - training = Data(args.training_data, input_vocab, output_vocab) - validation = Data(args.validation_data, input_vocab, output_vocab) - training.load() - validation.load() - training.transform() - validation.transform() - - print('Datasets Loaded.') - print('Compiling Model.') - model = simpleNMT(pad_length=args.padding, - n_chars=input_vocab.size(), - n_labels=output_vocab.size(), - embedding_learnable=False, - encoder_units=256, - decoder_units=256, - trainable=True, - return_probabilities=False) - - model.summary() - model.compile(optimizer='adam', - loss='categorical_crossentropy', - metrics=['accuracy', all_acc]) - print('Model Compiled.') - print('Training. Ctrl+C to end early.') - - try: - model.fit_generator(generator=training.generator(args.batch_size), - steps_per_epoch=100, - validation_data=validation.generator( - args.batch_size), - validation_steps=100, - callbacks=[cp], - workers=1, - verbose=1, - epochs=args.epochs) - - except KeyboardInterrupt as e: - print('Model training stopped early.') - - print('Model training complete.') - - run_examples(model, input_vocab, output_vocab) - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - named_args = parser.add_argument_group('named arguments') - - named_args.add_argument('-e', '--epochs', metavar='|', - help="""Number of Epochs to Run""", - required=False, default=50, type=int) - - named_args.add_argument('-g', '--gpu', metavar='|', - help="""GPU to use""", - required=False, default='0', type=str) - - named_args.add_argument('-p', '--padding', metavar='|', - help="""Amount of padding to use""", - required=False, default=50, type=int) - - named_args.add_argument('-t', '--training-data', metavar='|', - help="""Location of training data""", - required=False, default='./data/training.csv') - - named_args.add_argument('-v', '--validation-data', metavar='|', - help="""Location of validation data""", - required=False, default='./data/validation.csv') - - named_args.add_argument('-b', '--batch-size', metavar='|', - help="""Location of validation data""", - required=False, default=32, type=int) - args = parser.parse_args() - print(args) - - main(args) -import argparse -import os - -import numpy as np -import matplotlib.pyplot as plt -import matplotlib.patches as mpatches - -from models.NMT import simpleNMT -from utils.examples import run_example -from data.reader import Vocabulary - -HERE = os.path.realpath(os.path.join(os.path.realpath(__file__), '..')) - - -def load_examples(file_name): - with open(file_name) as f: - return [s.replace('\n', '') for s in f.readlines()] - - -# create a directory if it doesn't already exist -if not os.path.exists(os.path.join(HERE, 'attention_maps')): - os.makedirs(os.path.join(HERE, 'attention_maps')) - -SAMPLE_HUMAN_VOCAB = os.path.join(HERE, 'data', 'sample_human_vocab.json') -SAMPLE_MACHINE_VOCAB = os.path.join(HERE, 'data', 'sample_machine_vocab.json') -SAMPLE_WEIGHTS = os.path.join(HERE, 'weights', 'sample_NMT.49.0.01.hdf5') - - -class Visualizer(object): - - def __init__(self, - padding=None, - input_vocab=SAMPLE_HUMAN_VOCAB, - output_vocab=SAMPLE_MACHINE_VOCAB): - """ - Visualizes attention maps - :param padding: the padding to use for the sequences. - :param input_vocab: the location of the input human - vocabulary file - :param output_vocab: the location of the output - machine vocabulary file - """ - self.padding = padding - self.input_vocab = Vocabulary( - input_vocab, padding=padding) - self.output_vocab = Vocabulary( - output_vocab, padding=padding) - - def set_models(self, pred_model, proba_model): - """ - Sets the models to use - :param pred_model: the prediction model - :param proba_model: the model that outputs the activation maps - """ - self.pred_model = pred_model - self.proba_model = proba_model - - def attention_map(self, text): - """ - Text to visualze attention map for. - """ - # encode the string - d = self.input_vocab.string_to_int(text) - - # get the output sequence - predicted_text = run_example( - self.pred_model, self.input_vocab, self.output_vocab, text) - - text_ = list(text) + [''] + [''] * self.input_vocab.padding - # get the lengths of the string - input_length = len(text)+1 - output_length = predicted_text.index('')+1 - # get the activation map - activation_map = np.squeeze(self.proba_model.predict(np.array([d])))[ - 0:output_length, 0:input_length] - - # import seaborn as sns - plt.clf() - f = plt.figure(figsize=(8, 8.5)) - ax = f.add_subplot(1, 1, 1) - - # add image - i = ax.imshow(activation_map, interpolation='nearest', cmap='gray') - - # add colorbar - cbaxes = f.add_axes([0.2, 0, 0.6, 0.03]) - cbar = f.colorbar(i, cax=cbaxes, orientation='horizontal') - cbar.ax.set_xlabel('Probability', labelpad=2) - - # add labels - ax.set_yticks(range(output_length)) - ax.set_yticklabels(predicted_text[:output_length]) - - ax.set_xticks(range(input_length)) - ax.set_xticklabels(text_[:input_length], rotation=45) - - ax.set_xlabel('Input Sequence') - ax.set_ylabel('Output Sequence') - - # add grid and legend - ax.grid() - # ax.legend(loc='best') - - f.savefig(os.path.join(HERE, 'attention_maps', - text.replace('/', '')+'.pdf'), bbox_inches='tight') - f.show() - - -def main(examples, args): - print('Total Number of Examples:', len(examples)) - weights_file = os.path.expanduser(args.weights) - print('Weights loading from:', weights_file) - viz = Visualizer(padding=args.padding, - input_vocab=args.human_vocab, - output_vocab=args.machine_vocab) - print('Loading models') - pred_model = simpleNMT(trainable=False, - pad_length=args.padding, - n_chars=viz.input_vocab.size(), - n_labels=viz.output_vocab.size()) - - pred_model.load_weights(weights_file, by_name=True) - pred_model.compile(optimizer='adam', loss='categorical_crossentropy') - - proba_model = simpleNMT(trainable=False, - pad_length=args.padding, - n_chars=viz.input_vocab.size(), - n_labels=viz.output_vocab.size(), - return_probabilities=True) - - proba_model.load_weights(weights_file, by_name=True) - proba_model.compile(optimizer='adam', loss='categorical_crossentropy') - - viz.set_models(pred_model, proba_model) - - print('Models loaded') - - for example in examples: - viz.attention_map(example) - - print('Completed visualizations') - - -if __name__ == '__main__': - parser = argparse.ArgumentParser() - named_args = parser.add_argument_group('named arguments') - - named_args.add_argument('-e', '--examples', metavar='|', - help="""Example string/file to visualize attention map for - If file, it must end with '.txt'""", - required=True) - named_args.add_argument('-w', '--weights', metavar='|', - help="""Location of weights""", - required=False, - default=SAMPLE_WEIGHTS) - named_args.add_argument('-p', '--padding', metavar='|', - help="""Length of padding""", - required=False, default=50, type=int) - named_args.add_argument('-hv', '--human-vocab', metavar='|', - help="""Path to the human vocabulary""", - required=False, - default=SAMPLE_HUMAN_VOCAB, - type=str) - named_args.add_argument('-mv', '--machine-vocab', metavar='|', - help="""Path to the machine vocabulary""", - required=False, - default=SAMPLE_MACHINE_VOCAB, - type=str) - args = parser.parse_args() - - if '.txt' in args.examples: - examples = load_examples(args.examples) - else: - examples = [args.examples] - - main(examples, args) -""" - Date Generator - This code creates data for our date translation model - - References: - https://github.com/rasmusbergpalm/normalization/blob/master/babel_data.py - https://github.com/joke2k/faker - https://docs.python.org/3/library/datetime.html#strftime-strptime-behavior - - Contact: - zaf@datalogue.io (@zafarali) -""" -from babel.dates import format_date -import babel -from faker import Faker -import random -import json -import os - -DATA_FOLDER = os.path.realpath(os.path.join(os.path.realpath(__file__), '..')) - - -fake = Faker() -fake.seed(230517) -random.seed(230517) - -FORMATS = ['short', - 'medium', - 'long', - 'full', - 'd MMM YYY', - 'd MMMM YYY', - 'dd MMM YYY', - 'd MMM, YYY', - 'd MMMM, YYY', - 'dd, MMM YYY', - 'd MM YY', - 'd MMMM YYY', - 'MMMM d YYY', - 'MMMM d, YYY', - 'dd.MM.YY', - ] - -# change this if you want it to work with only a single language -# LOCALES = ['en_US'] -LOCALES = babel.localedata.locale_identifiers() - - -def create_date(): - """ - Creates some fake dates - :returns: tuple containing - 1. human formatted string - 2. machine formatted string - 3. date object. - """ - dt = fake.date_object() - - # wrapping this in a try catch because - # the locale 'vo' and format 'full' will fail - try: - human = format_date(dt, - format=random.choice(FORMATS), - locale=random.choice(LOCALES)) - - case_change = random.randint(0, 3) # 1/2 chance of case change - if case_change == 1: - human = human.upper() - elif case_change == 2: - human = human.lower() - - machine = dt.isoformat() - except AttributeError as e: - # print(e) - return None, None, None - - return human, machine, dt - - -def create_dataset(dataset_name, n_examples, vocabulary=False): - """ - Creates a csv dataset with n_examples and optional vocabulary - :param dataset_name: name of the file to save as - :n_examples: the number of examples to generate - :vocabulary: if true, will also save the vocabulary - """ - human_vocab = set() - machine_vocab = set() - - with open(dataset_name, 'w') as f: - for i in range(n_examples): - h, m, _ = create_date() - if h is not None: - f.write('"'+h + '","' + m + '"\n') - human_vocab.update(tuple(h)) - machine_vocab.update(tuple(m)) - - if vocabulary: - int2human = dict(enumerate(human_vocab)) - int2human.update({len(int2human): '', - len(int2human)+1: ''}) - int2machine = dict(enumerate(machine_vocab)) - int2machine.update({len(int2machine): '', - len(int2machine)+1: ''}) - - human2int = {v: k for k, v in int2human.items()} - machine2int = {v: k for k, v in int2machine.items()} - - with open(os.path.join(DATA_FOLDER, 'human_vocab.json'), 'w') as f: - json.dump(human2int, f) - with open(os.path.join(DATA_FOLDER, 'machine_vocab.json'), 'w') as f: - json.dump(machine2int, f) - - -if __name__ == '__main__': - print('creating dataset') - create_dataset(os.path.join(DATA_FOLDER, 'training.csv'), 500000, - vocabulary=True) - create_dataset(os.path.join(DATA_FOLDER, 'validation.csv'), 1000) - print('dataset created.') -import json -import csv -import random - -import numpy as np -from keras.utils.np_utils import to_categorical - -random.seed(1984) - -INPUT_PADDING = 50 -OUTPUT_PADDING = 100 - - -class Vocabulary(object): - - def __init__(self, vocabulary_file, padding=None): - """ - Creates a vocabulary from a file - :param vocabulary_file: the path to the vocabulary - """ - self.vocabulary_file = vocabulary_file - with open(vocabulary_file, 'r') as f: - self.vocabulary = json.load(f) - - self.padding = padding - self.reverse_vocabulary = {v: k for k, v in self.vocabulary.items()} - - def size(self): - """ - Gets the size of the vocabulary - """ - return len(self.vocabulary.keys()) - - def string_to_int(self, text): - """ - Converts a string into it's character integer - representation - :param text: text to convert - """ - characters = list(text) - - integers = [] - - if self.padding and len(characters) >= self.padding: - # truncate if too long - characters = characters[:self.padding - 1] - - characters.append('') - - for c in characters: - if c in self.vocabulary: - integers.append(self.vocabulary[c]) - else: - integers.append(self.vocabulary['']) - - # pad: - if self.padding and len(integers) < self.padding: - integers.extend([self.vocabulary['']] - * (self.padding - len(integers))) - - if len(integers) != self.padding: - print(text) - raise AttributeError('Length of text was not padding.') - return integers - - def int_to_string(self, integers): - """ - Decodes a list of integers - into it's string representation - """ - characters = [] - for i in integers: - characters.append(self.reverse_vocabulary[i]) - - return characters - - -class Data(object): - - def __init__(self, file_name, input_vocabulary, output_vocabulary): - """ - Creates an object that gets data from a file - :param file_name: name of the file to read from - :param vocabulary: the Vocabulary object to use - :param batch_size: the number of datapoints to return - :param padding: the amount of padding to apply to - a short string - """ - - self.input_vocabulary = input_vocabulary - self.output_vocabulary = output_vocabulary - self.file_name = file_name - - def load(self): - """ - Loads data from a file - """ - self.inputs = [] - self.targets = [] - - with open(self.file_name, 'r') as f: - reader = csv.reader(f) - for row in reader: - self.inputs.append(row[0]) - self.targets.append(row[1]) - - def transform(self): - """ - Transforms the data as necessary - """ - # @TODO: use `pool.map_async` here? - self.inputs = np.array(list( - map(self.input_vocabulary.string_to_int, self.inputs))) - self.targets = map(self.output_vocabulary.string_to_int, self.targets) - self.targets = np.array( - list(map( - lambda x: to_categorical( - x, - num_classes=self.output_vocabulary.size()), - self.targets))) - - assert len(self.inputs.shape) == 2, 'Inputs could not properly be encoded' - assert len( - self.targets.shape) == 3, 'Targets could not properly be encoded' - - def generator(self, batch_size): - """ - Creates a generator that can be used in `model.fit_generator()` - Batches are generated randomly. - :param batch_size: the number of instances to include per batch - """ - instance_id = range(len(self.inputs)) - while True: - try: - batch_ids = random.sample(instance_id, batch_size) - yield (np.array(self.inputs[batch_ids], dtype=int), - np.array(self.targets[batch_ids])) - except Exception as e: - print('EXCEPTION OMG') - print(e) - yield None, None - - -if __name__ == '__main__': - input_vocab = Vocabulary('./human_vocab.json', padding=50) - output_vocab = Vocabulary('./machine_vocab.json', padding=12) - ds = Data('./fake.csv', input_vocab, output_vocab) - ds.load() - ds.transform() - print(ds.inputs.shape) - print(ds.targets.shape) - g = ds.generator(32) - print(ds.inputs[[5, 10, 12]].shape) - print(ds.targets[[5, 10, 12]].shape) - # for i in range(50): - # print(next(g)[0].shape) - # print(next(g)[1].shape) -import numpy as np -import os -from keras.models import Model -from keras.layers import Dense, Embedding, Activation, Permute -from keras.layers import Input, Flatten, Dropout -from keras.layers.recurrent import LSTM -from keras.layers.wrappers import TimeDistributed, Bidirectional -from .custom_recurrents import AttentionDecoder - - -def simpleNMT(pad_length=100, - n_chars=105, - n_labels=6, - embedding_learnable=False, - encoder_units=256, - decoder_units=256, - trainable=True, - return_probabilities=False): - """ - Builds a Neural Machine Translator that has alignment attention - :param pad_length: the size of the input sequence - :param n_chars: the number of characters in the vocabulary - :param n_labels: the number of possible labelings for each character - :param embedding_learnable: decides if the one hot embedding should be refinable. - :return: keras.models.Model that can be compiled and fit'ed - - *** REFERENCES *** - Lee, Jason, Kyunghyun Cho, and Thomas Hofmann. - "Neural Machine Translation By Jointly Learning To Align and Translate" - """ - input_ = Input(shape=(pad_length,), dtype='float32') - input_embed = Embedding(n_chars, n_chars, - input_length=pad_length, - trainable=embedding_learnable, - weights=[np.eye(n_chars)], - name='OneHot')(input_) - - rnn_encoded = Bidirectional(LSTM(encoder_units, return_sequences=True), - name='bidirectional_1', - merge_mode='concat', - trainable=trainable)(input_embed) - - y_hat = AttentionDecoder(decoder_units, - name='attention_decoder_1', - output_dim=n_labels, - return_probabilities=return_probabilities, - trainable=trainable)(rnn_encoded) - - model = Model(inputs=input_, outputs=y_hat) - - return model - - -if __name__ == '__main__': - model = simpleNMT() - model.summary() -import tensorflow as tf -from keras import backend as K -from keras import regularizers, constraints, initializers, activations -from keras.layers.recurrent import Recurrent -from keras.engine import InputSpec -from .tdd import _time_distributed_dense - - -def tfPrint(d, T): return tf.Print(input_=T, data=[T, tf.shape(T)], message=d) - - -class AttentionDecoder(Recurrent): - - def __init__(self, units, output_dim, - activation='tanh', - return_probabilities=False, - name='AttentionDecoder', - kernel_initializer='glorot_uniform', - recurrent_initializer='orthogonal', - bias_initializer='zeros', - kernel_regularizer=None, - bias_regularizer=None, - activity_regularizer=None, - kernel_constraint=None, - bias_constraint=None, - **kwargs): - """ - Implements an AttentionDecoder that takes in a sequence encoded by an - encoder and outputs the decoded states - :param units: dimension of the hidden state and the attention matrices - :param output_dim: the number of labels in the output space - - references: - Bahdanau, Dzmitry, Kyunghyun Cho, and Yoshua Bengio. - "Neural machine translation by jointly learning to align and translate." - arXiv preprint arXiv:1409.0473 (2014). - """ - self.units = units - self.output_dim = output_dim - self.return_probabilities = return_probabilities - self.activation = activations.get(activation) - self.kernel_initializer = initializers.get(kernel_initializer) - self.recurrent_initializer = initializers.get(recurrent_initializer) - self.bias_initializer = initializers.get(bias_initializer) - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.recurrent_regularizer = regularizers.get(kernel_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - self.activity_regularizer = regularizers.get(activity_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.recurrent_constraint = constraints.get(kernel_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - super(AttentionDecoder, self).__init__(**kwargs) - self.name = name - self.return_sequences = True # must return sequences - - def build(self, input_shape): - """ - See Appendix 2 of Bahdanau 2014, arXiv:1409.0473 - for model details that correspond to the matrices here. - """ - - self.batch_size, self.timesteps, self.input_dim = input_shape - - if self.stateful: - super(AttentionDecoder, self).reset_states() - - self.states = [None, None] # y, s - - """ - Matrices for creating the context vector - """ - - self.V_a = self.add_weight(shape=(self.units,), - name='V_a', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.W_a = self.add_weight(shape=(self.units, self.units), - name='W_a', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.U_a = self.add_weight(shape=(self.input_dim, self.units), - name='U_a', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.b_a = self.add_weight(shape=(self.units,), - name='b_a', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - """ - Matrices for the r (reset) gate - """ - self.C_r = self.add_weight(shape=(self.input_dim, self.units), - name='C_r', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.U_r = self.add_weight(shape=(self.units, self.units), - name='U_r', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.W_r = self.add_weight(shape=(self.output_dim, self.units), - name='W_r', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.b_r = self.add_weight(shape=(self.units, ), - name='b_r', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - - """ - Matrices for the z (update) gate - """ - self.C_z = self.add_weight(shape=(self.input_dim, self.units), - name='C_z', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.U_z = self.add_weight(shape=(self.units, self.units), - name='U_z', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.W_z = self.add_weight(shape=(self.output_dim, self.units), - name='W_z', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.b_z = self.add_weight(shape=(self.units, ), - name='b_z', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - """ - Matrices for the proposal - """ - self.C_p = self.add_weight(shape=(self.input_dim, self.units), - name='C_p', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.U_p = self.add_weight(shape=(self.units, self.units), - name='U_p', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.W_p = self.add_weight(shape=(self.output_dim, self.units), - name='W_p', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.b_p = self.add_weight(shape=(self.units, ), - name='b_p', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - """ - Matrices for making the final prediction vector - """ - self.C_o = self.add_weight(shape=(self.input_dim, self.output_dim), - name='C_o', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.U_o = self.add_weight(shape=(self.units, self.output_dim), - name='U_o', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.W_o = self.add_weight(shape=(self.output_dim, self.output_dim), - name='W_o', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - self.b_o = self.add_weight(shape=(self.output_dim, ), - name='b_o', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - - # For creating the initial state: - self.W_s = self.add_weight(shape=(self.input_dim, self.units), - name='W_s', - initializer=self.recurrent_initializer, - regularizer=self.recurrent_regularizer, - constraint=self.recurrent_constraint) - - self.input_spec = [ - InputSpec(shape=(self.batch_size, self.timesteps, self.input_dim))] - self.built = True - - def call(self, x): - # store the whole sequence so we can "attend" to it at each timestep - self.x_seq = x - - # apply the a dense layer over the time dimension of the sequence - # do it here because it doesn't depend on any previous steps - # thefore we can save computation time: - self._uxpb = _time_distributed_dense(self.x_seq, self.U_a, b=self.b_a, - input_dim=self.input_dim, - timesteps=self.timesteps, - output_dim=self.units) - - return super(AttentionDecoder, self).call(x) - - def get_initial_state(self, inputs): - print('inputs shape:', inputs.get_shape()) - - # apply the matrix on the first time step to get the initial s0. - s0 = activations.tanh(K.dot(inputs[:, 0], self.W_s)) - - # from keras.layers.recurrent to initialize a vector of (batchsize, - # output_dim) - y0 = K.zeros_like(inputs) # (samples, timesteps, input_dims) - y0 = K.sum(y0, axis=(1, 2)) # (samples, ) - y0 = K.expand_dims(y0) # (samples, 1) - y0 = K.tile(y0, [1, self.output_dim]) - - return [y0, s0] - - def step(self, x, states): - - ytm, stm = states - - # repeat the hidden state to the length of the sequence - _stm = K.repeat(stm, self.timesteps) - - # now multiplty the weight matrix with the repeated hidden state - _Wxstm = K.dot(_stm, self.W_a) - - # calculate the attention probabilities - # this relates how much other timesteps contributed to this one. - et = K.dot(activations.tanh(_Wxstm + self._uxpb), - K.expand_dims(self.V_a)) - at = K.exp(et) - at_sum = K.sum(at, axis=1) - at_sum_repeated = K.repeat(at_sum, self.timesteps) - at /= at_sum_repeated # vector of size (batchsize, timesteps, 1) - - # calculate the context vector - context = K.squeeze(K.batch_dot(at, self.x_seq, axes=1), axis=1) - # ~~~> calculate new hidden state - # first calculate the "r" gate: - - rt = activations.sigmoid( - K.dot(ytm, self.W_r) - + K.dot(stm, self.U_r) - + K.dot(context, self.C_r) - + self.b_r) - - # now calculate the "z" gate - zt = activations.sigmoid( - K.dot(ytm, self.W_z) - + K.dot(stm, self.U_z) - + K.dot(context, self.C_z) - + self.b_z) - - # calculate the proposal hidden state: - s_tp = activations.tanh( - K.dot(ytm, self.W_p) - + K.dot((rt * stm), self.U_p) - + K.dot(context, self.C_p) - + self.b_p) - - # new hidden state: - st = (1-zt)*stm + zt * s_tp - - yt = activations.softmax( - K.dot(ytm, self.W_o) - + K.dot(stm, self.U_o) - + K.dot(context, self.C_o) - + self.b_o) - - if self.return_probabilities: - return at, [yt, st] - else: - return yt, [yt, st] - - def compute_output_shape(self, input_shape): - """ - For Keras internal compatability checking - """ - if self.return_probabilities: - return (None, self.timesteps, self.timesteps) - else: - return (None, self.timesteps, self.output_dim) - - def get_config(self): - """ - For rebuilding models on load time. - """ - config = { - 'output_dim': self.output_dim, - 'units': self.units, - 'return_probabilities': self.return_probabilities - } - base_config = super(AttentionDecoder, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -# check to see if it compiles -if __name__ == '__main__': - from keras.layers import Input, LSTM - from keras.models import Model - from keras.layers.wrappers import Bidirectional - i = Input(shape=(100, 104), dtype='float32') - enc = Bidirectional(LSTM(64, return_sequences=True), - merge_mode='concat')(i) - dec = AttentionDecoder(32, 4)(enc) - model = Model(inputs=i, outputs=dec) - model.summary() -""" -Original code from the keras backend that -implements the _time_distributed_dense layer. -""" -import keras.backend as K - - -def _time_distributed_dense(x, w, b=None, dropout=None, - input_dim=None, output_dim=None, - timesteps=None, training=None): - """Apply `y . w + b` for every temporal slice y of x. - # Arguments - x: input tensor. - w: weight matrix. - b: optional bias vector. - dropout: wether to apply dropout (same dropout mask - for every temporal slice of the input). - input_dim: integer; optional dimensionality of the input. - output_dim: integer; optional dimensionality of the output. - timesteps: integer; optional number of timesteps. - training: training phase tensor or boolean. - # Returns - Output tensor. - """ - if not input_dim: - input_dim = K.shape(x)[2] - if not timesteps: - timesteps = K.shape(x)[1] - if not output_dim: - output_dim = K.shape(w)[1] - - if dropout is not None and 0. < dropout < 1.: - # apply the same dropout pattern at every timestep - ones = K.ones_like(K.reshape(x[:, 0, :], (-1, input_dim))) - dropout_matrix = K.dropout(ones, dropout) - expanded_dropout_matrix = K.repeat(dropout_matrix, timesteps) - x = K.in_train_phase(x * expanded_dropout_matrix, x, training=training) - - # collapse time dimension and batch dimension together - x = K.reshape(x, (-1, input_dim)) - x = K.dot(x, w) - if b is not None: - x = K.bias_add(x, b) - # reshape to 3D tensor - if K.backend() == 'tensorflow': - x = K.reshape(x, K.stack([-1, timesteps, output_dim])) - x.set_shape([None, None, output_dim]) - else: - x = K.reshape(x, (-1, timesteps, output_dim)) - return x -import numpy as np - -EXAMPLES = ['26th January 2016', '3 April 1989', '5 Dec 09', 'Sat 8 Jun 2017'] - - -def run_example(model, input_vocabulary, output_vocabulary, text): - encoded = input_vocabulary.string_to_int(text) - prediction = model.predict(np.array([encoded])) - prediction = np.argmax(prediction[0], axis=-1) - return output_vocabulary.int_to_string(prediction) - - -def run_examples(model, input_vocabulary, output_vocabulary, examples=EXAMPLES): - predicted = [] - for example in examples: - print('~~~~~') - predicted.append( - ''.join(run_example(model, input_vocabulary, output_vocabulary, example))) - print('input:', example) - print('output:', predicted[-1]) - return predicted -import keras.backend as K - - -def all_acc(y_true, y_pred): - """ - All Accuracy - https://github.com/rasmusbergpalm/normalization/blob/master/train.py#L10 - """ - return K.mean( - K.all( - K.equal( - K.max(y_true, axis=-1), - K.cast(K.argmax(y_pred, axis=-1), K.floatx()) - ), - axis=1) - ) -import sys - -from setuptools import setup -from setuptools import find_packages - -long_description = ''' -Keras Preprocessing is the data preprocessing -and data augmentation module of the Keras deep learning library. -It provides utilities for working with image data, text data, -and sequence data. - -Read the documentation at: https://keras.io/ - -Keras Preprocessing may be imported directly -from an up-to-date installation of Keras: - -``` -from keras import preprocessing -``` - -Keras Preprocessing is compatible with Python 2.7-3.6 -and is distributed under the MIT license. -''' - -setup(name='Keras_Preprocessing', - version='1.0.9', - description='Easy data preprocessing and data augmentation ' - 'for deep learning models', - long_description=long_description, - author='Keras Team', - url='https://github.com/keras-team/keras-preprocessing', - download_url='https://github.com/keras-team/' - 'keras-preprocessing/tarball/1.0.8', - license='MIT', - install_requires=['numpy>=1.9.1', - 'six>=1.9.0'], - extras_require={ - 'tests': ['pandas', - 'Pillow' if sys.version_info >= (3, 0) else 'pillow', - 'tensorflow==1.7', # CPU version - 'keras', - 'pytest', - 'pytest-xdist', - 'pytest-cov'], - 'pep8': ['flake8'], - 'image': ['scipy>=0.14', - 'Pillow>=5.2.0'], - }, - classifiers=[ - 'Development Status :: 5 - Production/Stable', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', - 'Topic :: Software Development :: Libraries', - 'Topic :: Software Development :: Libraries :: Python Modules' - ], - packages=find_packages()) -"""Enables dynamic setting of underlying Keras module. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -_KERAS_BACKEND = None -_KERAS_UTILS = None - - -def set_keras_submodules(backend, utils): - # Deprecated, will be removed in the future. - global _KERAS_BACKEND - global _KERAS_UTILS - _KERAS_BACKEND = backend - _KERAS_UTILS = utils - - -def get_keras_submodule(name): - # Deprecated, will be removed in the future. - if name not in {'backend', 'utils'}: - raise ImportError( - 'Can only retrieve "backend" and "utils". ' - 'Requested: %s' % name) - if _KERAS_BACKEND is None: - raise ImportError('You need to first `import keras` ' - 'in order to use `keras_preprocessing`. ' - 'For instance, you can do:\n\n' - '```\n' - 'import keras\n' - 'from keras_preprocessing import image\n' - '```\n\n' - 'Or, preferably, this equivalent formulation:\n\n' - '```\n' - 'from keras import preprocessing\n' - '```\n') - if name == 'backend': - return _KERAS_BACKEND - elif name == 'utils': - return _KERAS_UTILS - - -__version__ = '1.0.9' -# -*- coding: utf-8 -*- -"""Utilities for preprocessing sequence data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -import random -import json -from six.moves import range -import six - - -def pad_sequences(sequences, maxlen=None, dtype='int32', - padding='pre', truncating='pre', value=0.): - """Pads sequences to the same length. - - This function transforms a list of - `num_samples` sequences (lists of integers) - into a 2D Numpy array of shape `(num_samples, num_timesteps)`. - `num_timesteps` is either the `maxlen` argument if provided, - or the length of the longest sequence otherwise. - - Sequences that are shorter than `num_timesteps` - are padded with `value` at the end. - - Sequences longer than `num_timesteps` are truncated - so that they fit the desired length. - The position where padding or truncation happens is determined by - the arguments `padding` and `truncating`, respectively. - - Pre-padding is the default. - - # Arguments - sequences: List of lists, where each element is a sequence. - maxlen: Int, maximum length of all sequences. - dtype: Type of the output sequences. - To pad sequences with variable length strings, you can use `object`. - padding: String, 'pre' or 'post': - pad either before or after each sequence. - truncating: String, 'pre' or 'post': - remove values from sequences larger than - `maxlen`, either at the beginning or at the end of the sequences. - value: Float or String, padding value. - - # Returns - x: Numpy array with shape `(len(sequences), maxlen)` - - # Raises - ValueError: In case of invalid values for `truncating` or `padding`, - or in case of invalid shape for a `sequences` entry. - """ - if not hasattr(sequences, '__len__'): - raise ValueError('`sequences` must be iterable.') - num_samples = len(sequences) - - lengths = [] - for x in sequences: - try: - lengths.append(len(x)) - except TypeError: - raise ValueError('`sequences` must be a list of iterables. ' - 'Found non-iterable: ' + str(x)) - - if maxlen is None: - maxlen = np.max(lengths) - - # take the sample shape from the first non empty sequence - # checking for consistency in the main loop below. - sample_shape = tuple() - for s in sequences: - if len(s) > 0: - sample_shape = np.asarray(s).shape[1:] - break - - is_dtype_str = np.issubdtype( - dtype, np.str_) or np.issubdtype(dtype, np.unicode_) - if isinstance(value, six.string_types) and dtype != object and not is_dtype_str: - raise ValueError("`dtype` {} is not compatible with `value`'s type: {}\n" - "You should set `dtype=object` for variable length strings." - .format(dtype, type(value))) - - x = np.full((num_samples, maxlen) + sample_shape, value, dtype=dtype) - for idx, s in enumerate(sequences): - if not len(s): - continue # empty list/array was found - if truncating == 'pre': - trunc = s[-maxlen:] - elif truncating == 'post': - trunc = s[:maxlen] - else: - raise ValueError('Truncating type "%s" ' - 'not understood' % truncating) - - # check `trunc` has expected shape - trunc = np.asarray(trunc, dtype=dtype) - if trunc.shape[1:] != sample_shape: - raise ValueError('Shape of sample %s of sequence at position %s ' - 'is different from expected shape %s' % - (trunc.shape[1:], idx, sample_shape)) - - if padding == 'post': - x[idx, :len(trunc)] = trunc - elif padding == 'pre': - x[idx, -len(trunc):] = trunc - else: - raise ValueError('Padding type "%s" not understood' % padding) - return x - - -def make_sampling_table(size, sampling_factor=1e-5): - """Generates a word rank-based probabilistic sampling table. - - Used for generating the `sampling_table` argument for `skipgrams`. - `sampling_table[i]` is the probability of sampling - the word i-th most common word in a dataset - (more common words should be sampled less frequently, for balance). - - The sampling probabilities are generated according - to the sampling distribution used in word2vec: - - ``` - p(word) = (min(1, sqrt(word_frequency / sampling_factor) / - (word_frequency / sampling_factor))) - ``` - - We assume that the word frequencies follow Zipf's law (s=1) to derive - a numerical approximation of frequency(rank): - - `frequency(rank) ~ 1/(rank * (log(rank) + gamma) + 1/2 - 1/(12*rank))` - where `gamma` is the Euler-Mascheroni constant. - - # Arguments - size: Int, number of possible words to sample. - sampling_factor: The sampling factor in the word2vec formula. - - # Returns - A 1D Numpy array of length `size` where the ith entry - is the probability that a word of rank i should be sampled. - """ - gamma = 0.577 - rank = np.arange(size) - rank[0] = 1 - inv_fq = rank * (np.log(rank) + gamma) + 0.5 - 1. / (12. * rank) - f = sampling_factor * inv_fq - - return np.minimum(1., f / np.sqrt(f)) - - -def skipgrams(sequence, vocabulary_size, - window_size=4, negative_samples=1., shuffle=True, - categorical=False, sampling_table=None, seed=None): - """Generates skipgram word pairs. - - This function transforms a sequence of word indexes (list of integers) - into tuples of words of the form: - - - (word, word in the same window), with label 1 (positive samples). - - (word, random word from the vocabulary), with label 0 (negative samples). - - Read more about Skipgram in this gnomic paper by Mikolov et al.: - [Efficient Estimation of Word Representations in - Vector Space](http://arxiv.org/pdf/1301.3781v3.pdf) - - # Arguments - sequence: A word sequence (sentence), encoded as a list - of word indices (integers). If using a `sampling_table`, - word indices are expected to match the rank - of the words in a reference dataset (e.g. 10 would encode - the 10-th most frequently occurring token). - Note that index 0 is expected to be a non-word and will be skipped. - vocabulary_size: Int, maximum possible word index + 1 - window_size: Int, size of sampling windows (technically half-window). - The window of a word `w_i` will be - `[i - window_size, i + window_size+1]`. - negative_samples: Float >= 0. 0 for no negative (i.e. random) samples. - 1 for same number as positive samples. - shuffle: Whether to shuffle the word couples before returning them. - categorical: bool. if False, labels will be - integers (eg. `[0, 1, 1 .. ]`), - if `True`, labels will be categorical, e.g. - `[[1,0],[0,1],[0,1] .. ]`. - sampling_table: 1D array of size `vocabulary_size` where the entry i - encodes the probability to sample a word of rank i. - seed: Random seed. - - # Returns - couples, labels: where `couples` are int pairs and - `labels` are either 0 or 1. - - # Note - By convention, index 0 in the vocabulary is - a non-word and will be skipped. - """ - couples = [] - labels = [] - for i, wi in enumerate(sequence): - if not wi: - continue - if sampling_table is not None: - if sampling_table[wi] < random.random(): - continue - - window_start = max(0, i - window_size) - window_end = min(len(sequence), i + window_size + 1) - for j in range(window_start, window_end): - if j != i: - wj = sequence[j] - if not wj: - continue - couples.append([wi, wj]) - if categorical: - labels.append([0, 1]) - else: - labels.append(1) - - if negative_samples > 0: - num_negative_samples = int(len(labels) * negative_samples) - words = [c[0] for c in couples] - random.shuffle(words) - - couples += [[words[i % len(words)], - random.randint(1, vocabulary_size - 1)] - for i in range(num_negative_samples)] - if categorical: - labels += [[1, 0]] * num_negative_samples - else: - labels += [0] * num_negative_samples - - if shuffle: - if seed is None: - seed = random.randint(0, 10e6) - random.seed(seed) - random.shuffle(couples) - random.seed(seed) - random.shuffle(labels) - - return couples, labels - - -def _remove_long_seq(maxlen, seq, label): - """Removes sequences that exceed the maximum length. - - # Arguments - maxlen: Int, maximum length of the output sequences. - seq: List of lists, where each sublist is a sequence. - label: List where each element is an integer. - - # Returns - new_seq, new_label: shortened lists for `seq` and `label`. - """ - new_seq, new_label = [], [] - for x, y in zip(seq, label): - if len(x) < maxlen: - new_seq.append(x) - new_label.append(y) - return new_seq, new_label - - -class TimeseriesGenerator(object): - """Utility class for generating batches of temporal data. - - This class takes in a sequence of data-points gathered at - equal intervals, along with time series parameters such as - stride, length of history, etc., to produce batches for - training/validation. - - # Arguments - data: Indexable generator (such as list or Numpy array) - containing consecutive data points (timesteps). - The data should be at 2D, and axis 0 is expected - to be the time dimension. - targets: Targets corresponding to timesteps in `data`. - It should have same length as `data`. - length: Length of the output sequences (in number of timesteps). - sampling_rate: Period between successive individual timesteps - within sequences. For rate `r`, timesteps - `data[i]`, `data[i-r]`, ... `data[i - length]` - are used for create a sample sequence. - stride: Period between successive output sequences. - For stride `s`, consecutive output samples would - be centered around `data[i]`, `data[i+s]`, `data[i+2*s]`, etc. - start_index: Data points earlier than `start_index` will not be used - in the output sequences. This is useful to reserve part of the - data for test or validation. - end_index: Data points later than `end_index` will not be used - in the output sequences. This is useful to reserve part of the - data for test or validation. - shuffle: Whether to shuffle output samples, - or instead draw them in chronological order. - reverse: Boolean: if `true`, timesteps in each output sample will be - in reverse chronological order. - batch_size: Number of timeseries samples in each batch - (except maybe the last one). - - # Returns - A [Sequence](/utils/#sequence) instance. - - # Examples - - ```python - from keras.preprocessing.sequence import TimeseriesGenerator - import numpy as np - - data = np.array([[i] for i in range(50)]) - targets = np.array([[i] for i in range(50)]) - - data_gen = TimeseriesGenerator(data, targets, - length=10, sampling_rate=2, - batch_size=2) - assert len(data_gen) == 20 - - batch_0 = data_gen[0] - x, y = batch_0 - assert np.array_equal(x, - np.array([[[0], [2], [4], [6], [8]], - [[1], [3], [5], [7], [9]]])) - assert np.array_equal(y, - np.array([[10], [11]])) - ``` - """ - - def __init__(self, data, targets, length, - sampling_rate=1, - stride=1, - start_index=0, - end_index=None, - shuffle=False, - reverse=False, - batch_size=128): - - if len(data) != len(targets): - raise ValueError('Data and targets have to be' + - ' of same length. ' - 'Data length is {}'.format(len(data)) + - ' while target length is {}'.format(len(targets))) - - self.data = data - self.targets = targets - self.length = length - self.sampling_rate = sampling_rate - self.stride = stride - self.start_index = start_index + length - if end_index is None: - end_index = len(data) - 1 - self.end_index = end_index - self.shuffle = shuffle - self.reverse = reverse - self.batch_size = batch_size - - if self.start_index > self.end_index: - raise ValueError('`start_index+length=%i > end_index=%i` ' - 'is disallowed, as no part of the sequence ' - 'would be left to be used as current step.' - % (self.start_index, self.end_index)) - - def __len__(self): - return (self.end_index - self.start_index + - self.batch_size * self.stride) // (self.batch_size * self.stride) - - def __getitem__(self, index): - if self.shuffle: - rows = np.random.randint( - self.start_index, self.end_index + 1, size=self.batch_size) - else: - i = self.start_index + self.batch_size * self.stride * index - rows = np.arange(i, min(i + self.batch_size * - self.stride, self.end_index + 1), self.stride) - - samples = np.array([self.data[row - self.length:row:self.sampling_rate] - for row in rows]) - targets = np.array([self.targets[row] for row in rows]) - - if self.reverse: - return samples[:, ::-1, ...], targets - return samples, targets - - def get_config(self): - '''Returns the TimeseriesGenerator configuration as Python dictionary. - - # Returns - A Python dictionary with the TimeseriesGenerator configuration. - ''' - data = self.data - if type(self.data).__module__ == np.__name__: - data = self.data.tolist() - try: - json_data = json.dumps(data) - except TypeError: - raise TypeError('Data not JSON Serializable:', data) - - targets = self.targets - if type(self.targets).__module__ == np.__name__: - targets = self.targets.tolist() - try: - json_targets = json.dumps(targets) - except TypeError: - raise TypeError('Targets not JSON Serializable:', targets) - - return { - 'data': json_data, - 'targets': json_targets, - 'length': self.length, - 'sampling_rate': self.sampling_rate, - 'stride': self.stride, - 'start_index': self.start_index, - 'end_index': self.end_index, - 'shuffle': self.shuffle, - 'reverse': self.reverse, - 'batch_size': self.batch_size - } - - def to_json(self, **kwargs): - """Returns a JSON string containing the timeseries generator - configuration. To load a generator from a JSON string, use - `keras.preprocessing.sequence.timeseries_generator_from_json(json_string)`. - - # Arguments - **kwargs: Additional keyword arguments - to be passed to `json.dumps()`. - - # Returns - A JSON string containing the tokenizer configuration. - """ - config = self.get_config() - timeseries_generator_config = { - 'class_name': self.__class__.__name__, - 'config': config - } - return json.dumps(timeseries_generator_config, **kwargs) - - -def timeseries_generator_from_json(json_string): - """Parses a JSON timeseries generator configuration file and - returns a timeseries generator instance. - - # Arguments - json_string: JSON string encoding a timeseries - generator configuration. - - # Returns - A Keras TimeseriesGenerator instance - """ - full_config = json.loads(json_string) - config = full_config.get('config') - - data = json.loads(config.pop('data')) - config['data'] = data - targets = json.loads(config.pop('targets')) - config['targets'] = targets - - return TimeseriesGenerator(**config) -# -*- coding: utf-8 -*- -"""Utilities for text input preprocessing. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import string -import sys -import warnings -from collections import OrderedDict -from collections import defaultdict -from hashlib import md5 -import json - -import numpy as np -from six.moves import range -from six.moves import zip - -if sys.version_info < (3,): - maketrans = string.maketrans -else: - maketrans = str.maketrans - - -def text_to_word_sequence(text, - filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower=True, split=" "): - """Converts a text to a sequence of words (or tokens). - - # Arguments - text: Input text (string). - filters: list (or concatenation) of characters to filter out, such as - punctuation. Default: ``!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\\t\\n``, - includes basic punctuation, tabs, and newlines. - lower: boolean. Whether to convert the input to lowercase. - split: str. Separator for word splitting. - - # Returns - A list of words (or tokens). - """ - if lower: - text = text.lower() - - if sys.version_info < (3,): - if isinstance(text, unicode): - translate_map = dict((ord(c), unicode(split)) for c in filters) - text = text.translate(translate_map) - elif len(split) == 1: - translate_map = maketrans(filters, split * len(filters)) - text = text.translate(translate_map) - else: - for c in filters: - text = text.replace(c, split) - else: - translate_dict = dict((c, split) for c in filters) - translate_map = maketrans(translate_dict) - text = text.translate(translate_map) - - seq = text.split(split) - return [i for i in seq if i] - - -def one_hot(text, n, - filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower=True, - split=' '): - """One-hot encodes a text into a list of word indexes of size n. - - This is a wrapper to the `hashing_trick` function using `hash` as the - hashing function; unicity of word to index mapping non-guaranteed. - - # Arguments - text: Input text (string). - n: int. Size of vocabulary. - filters: list (or concatenation) of characters to filter out, such as - punctuation. Default: ``!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\\t\\n``, - includes basic punctuation, tabs, and newlines. - lower: boolean. Whether to set the text to lowercase. - split: str. Separator for word splitting. - - # Returns - List of integers in [1, n]. Each integer encodes a word - (unicity non-guaranteed). - """ - return hashing_trick(text, n, - hash_function=hash, - filters=filters, - lower=lower, - split=split) - - -def hashing_trick(text, n, - hash_function=None, - filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower=True, - split=' '): - """Converts a text to a sequence of indexes in a fixed-size hashing space. - - # Arguments - text: Input text (string). - n: Dimension of the hashing space. - hash_function: defaults to python `hash` function, can be 'md5' or - any function that takes in input a string and returns a int. - Note that 'hash' is not a stable hashing function, so - it is not consistent across different runs, while 'md5' - is a stable hashing function. - filters: list (or concatenation) of characters to filter out, such as - punctuation. Default: ``!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\\t\\n``, - includes basic punctuation, tabs, and newlines. - lower: boolean. Whether to set the text to lowercase. - split: str. Separator for word splitting. - - # Returns - A list of integer word indices (unicity non-guaranteed). - - `0` is a reserved index that won't be assigned to any word. - - Two or more words may be assigned to the same index, due to possible - collisions by the hashing function. - The [probability]( - https://en.wikipedia.org/wiki/Birthday_problem#Probability_table) - of a collision is in relation to the dimension of the hashing space and - the number of distinct objects. - """ - if hash_function is None: - hash_function = hash - elif hash_function == 'md5': - def hash_function(w): - return int(md5(w.encode()).hexdigest(), 16) - - seq = text_to_word_sequence(text, - filters=filters, - lower=lower, - split=split) - return [(hash_function(w) % (n - 1) + 1) for w in seq] - - -class Tokenizer(object): - """Text tokenization utility class. - - This class allows to vectorize a text corpus, by turning each - text into either a sequence of integers (each integer being the index - of a token in a dictionary) or into a vector where the coefficient - for each token could be binary, based on word count, based on tf-idf... - - # Arguments - num_words: the maximum number of words to keep, based - on word frequency. Only the most common `num_words-1` words will - be kept. - filters: a string where each element is a character that will be - filtered from the texts. The default is all punctuation, plus - tabs and line breaks, minus the `'` character. - lower: boolean. Whether to convert the texts to lowercase. - split: str. Separator for word splitting. - char_level: if True, every character will be treated as a token. - oov_token: if given, it will be added to word_index and used to - replace out-of-vocabulary words during text_to_sequence calls - - By default, all punctuation is removed, turning the texts into - space-separated sequences of words - (words maybe include the `'` character). These sequences are then - split into lists of tokens. They will then be indexed or vectorized. - - `0` is a reserved index that won't be assigned to any word. - """ - - def __init__(self, num_words=None, - filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n', - lower=True, - split=' ', - char_level=False, - oov_token=None, - document_count=0, - **kwargs): - # Legacy support - if 'nb_words' in kwargs: - warnings.warn('The `nb_words` argument in `Tokenizer` ' - 'has been renamed `num_words`.') - num_words = kwargs.pop('nb_words') - if kwargs: - raise TypeError('Unrecognized keyword arguments: ' + str(kwargs)) - - self.word_counts = OrderedDict() - self.word_docs = defaultdict(int) - self.filters = filters - self.split = split - self.lower = lower - self.num_words = num_words - self.document_count = document_count - self.char_level = char_level - self.oov_token = oov_token - self.index_docs = defaultdict(int) - self.word_index = dict() - self.index_word = dict() - - def fit_on_texts(self, texts): - """Updates internal vocabulary based on a list of texts. - - In the case where texts contains lists, - we assume each entry of the lists to be a token. - - Required before using `texts_to_sequences` or `texts_to_matrix`. - - # Arguments - texts: can be a list of strings, - a generator of strings (for memory-efficiency), - or a list of list of strings. - """ - for text in texts: - self.document_count += 1 - if self.char_level or isinstance(text, list): - if self.lower: - if isinstance(text, list): - text = [text_elem.lower() for text_elem in text] - else: - text = text.lower() - seq = text - else: - seq = text_to_word_sequence(text, - self.filters, - self.lower, - self.split) - for w in seq: - if w in self.word_counts: - self.word_counts[w] += 1 - else: - self.word_counts[w] = 1 - for w in set(seq): - # In how many documents each word occurs - self.word_docs[w] += 1 - - wcounts = list(self.word_counts.items()) - wcounts.sort(key=lambda x: x[1], reverse=True) - # forcing the oov_token to index 1 if it exists - if self.oov_token is None: - sorted_voc = [] - else: - sorted_voc = [self.oov_token] - sorted_voc.extend(wc[0] for wc in wcounts) - - # note that index 0 is reserved, never assigned to an existing word - self.word_index = dict( - list(zip(sorted_voc, list(range(1, len(sorted_voc) + 1))))) - - self.index_word = dict((c, w) for w, c in self.word_index.items()) - - for w, c in list(self.word_docs.items()): - self.index_docs[self.word_index[w]] = c - - def fit_on_sequences(self, sequences): - """Updates internal vocabulary based on a list of sequences. - - Required before using `sequences_to_matrix` - (if `fit_on_texts` was never called). - - # Arguments - sequences: A list of sequence. - A "sequence" is a list of integer word indices. - """ - self.document_count += len(sequences) - for seq in sequences: - seq = set(seq) - for i in seq: - self.index_docs[i] += 1 - - def texts_to_sequences(self, texts): - """Transforms each text in texts to a sequence of integers. - - Only top `num_words-1` most frequent words will be taken into account. - Only words known by the tokenizer will be taken into account. - - # Arguments - texts: A list of texts (strings). - - # Returns - A list of sequences. - """ - return list(self.texts_to_sequences_generator(texts)) - - def texts_to_sequences_generator(self, texts): - """Transforms each text in `texts` to a sequence of integers. - - Each item in texts can also be a list, - in which case we assume each item of that list to be a token. - - Only top `num_words-1` most frequent words will be taken into account. - Only words known by the tokenizer will be taken into account. - - # Arguments - texts: A list of texts (strings). - - # Yields - Yields individual sequences. - """ - num_words = self.num_words - oov_token_index = self.word_index.get(self.oov_token) - for text in texts: - if self.char_level or isinstance(text, list): - if self.lower: - if isinstance(text, list): - text = [text_elem.lower() for text_elem in text] - else: - text = text.lower() - seq = text - else: - seq = text_to_word_sequence(text, - self.filters, - self.lower, - self.split) - vect = [] - for w in seq: - i = self.word_index.get(w) - if i is not None: - if num_words and i >= num_words: - if oov_token_index is not None: - vect.append(oov_token_index) - else: - vect.append(i) - elif self.oov_token is not None: - vect.append(oov_token_index) - yield vect - - def sequences_to_texts(self, sequences): - """Transforms each sequence into a list of text. - - Only top `num_words-1` most frequent words will be taken into account. - Only words known by the tokenizer will be taken into account. - - # Arguments - sequences: A list of sequences (list of integers). - - # Returns - A list of texts (strings) - """ - return list(self.sequences_to_texts_generator(sequences)) - - def sequences_to_texts_generator(self, sequences): - """Transforms each sequence in `sequences` to a list of texts(strings). - - Each sequence has to a list of integers. - In other words, sequences should be a list of sequences - - Only top `num_words-1` most frequent words will be taken into account. - Only words known by the tokenizer will be taken into account. - - # Arguments - sequences: A list of sequences. - - # Yields - Yields individual texts. - """ - num_words = self.num_words - oov_token_index = self.word_index.get(self.oov_token) - for seq in sequences: - vect = [] - for num in seq: - word = self.index_word.get(num) - if word is not None: - if num_words and num >= num_words: - if oov_token_index is not None: - vect.append(self.index_word[oov_token_index]) - else: - vect.append(word) - elif self.oov_token is not None: - vect.append(self.index_word[oov_token_index]) - vect = ' '.join(vect) - yield vect - - def texts_to_matrix(self, texts, mode='binary'): - """Convert a list of texts to a Numpy matrix. - - # Arguments - texts: list of strings. - mode: one of "binary", "count", "tfidf", "freq". - - # Returns - A Numpy matrix. - """ - sequences = self.texts_to_sequences(texts) - return self.sequences_to_matrix(sequences, mode=mode) - - def sequences_to_matrix(self, sequences, mode='binary'): - """Converts a list of sequences into a Numpy matrix. - - # Arguments - sequences: list of sequences - (a sequence is a list of integer word indices). - mode: one of "binary", "count", "tfidf", "freq" - - # Returns - A Numpy matrix. - - # Raises - ValueError: In case of invalid `mode` argument, - or if the Tokenizer requires to be fit to sample data. - """ - if not self.num_words: - if self.word_index: - num_words = len(self.word_index) + 1 - else: - raise ValueError('Specify a dimension (`num_words` argument), ' - 'or fit on some text data first.') - else: - num_words = self.num_words - - if mode == 'tfidf' and not self.document_count: - raise ValueError('Fit the Tokenizer on some data ' - 'before using tfidf mode.') - - x = np.zeros((len(sequences), num_words)) - for i, seq in enumerate(sequences): - if not seq: - continue - counts = defaultdict(int) - for j in seq: - if j >= num_words: - continue - counts[j] += 1 - for j, c in list(counts.items()): - if mode == 'count': - x[i][j] = c - elif mode == 'freq': - x[i][j] = c / len(seq) - elif mode == 'binary': - x[i][j] = 1 - elif mode == 'tfidf': - # Use weighting scheme 2 in - # https://en.wikipedia.org/wiki/Tf%E2%80%93idf - tf = 1 + np.log(c) - idf = np.log(1 + self.document_count / - (1 + self.index_docs.get(j, 0))) - x[i][j] = tf * idf - else: - raise ValueError('Unknown vectorization mode:', mode) - return x - - def get_config(self): - '''Returns the tokenizer configuration as Python dictionary. - The word count dictionaries used by the tokenizer get serialized - into plain JSON, so that the configuration can be read by other - projects. - - # Returns - A Python dictionary with the tokenizer configuration. - ''' - json_word_counts = json.dumps(self.word_counts) - json_word_docs = json.dumps(self.word_docs) - json_index_docs = json.dumps(self.index_docs) - json_word_index = json.dumps(self.word_index) - json_index_word = json.dumps(self.index_word) - - return { - 'num_words': self.num_words, - 'filters': self.filters, - 'lower': self.lower, - 'split': self.split, - 'char_level': self.char_level, - 'oov_token': self.oov_token, - 'document_count': self.document_count, - 'word_counts': json_word_counts, - 'word_docs': json_word_docs, - 'index_docs': json_index_docs, - 'index_word': json_index_word, - 'word_index': json_word_index - } - - def to_json(self, **kwargs): - """Returns a JSON string containing the tokenizer configuration. - To load a tokenizer from a JSON string, use - `keras.preprocessing.text.tokenizer_from_json(json_string)`. - - # Arguments - **kwargs: Additional keyword arguments - to be passed to `json.dumps()`. - - # Returns - A JSON string containing the tokenizer configuration. - """ - config = self.get_config() - tokenizer_config = { - 'class_name': self.__class__.__name__, - 'config': config - } - return json.dumps(tokenizer_config, **kwargs) - - -def tokenizer_from_json(json_string): - """Parses a JSON tokenizer configuration file and returns a - tokenizer instance. - - # Arguments - json_string: JSON string encoding a tokenizer configuration. - - # Returns - A Keras Tokenizer instance - """ - tokenizer_config = json.loads(json_string) - config = tokenizer_config.get('config') - - word_counts = json.loads(config.pop('word_counts')) - word_docs = json.loads(config.pop('word_docs')) - index_docs = json.loads(config.pop('index_docs')) - # Integer indexing gets converted to strings with json.dumps() - index_docs = {int(k): v for k, v in index_docs.items()} - index_word = json.loads(config.pop('index_word')) - index_word = {int(k): v for k, v in index_word.items()} - word_index = json.loads(config.pop('word_index')) - - tokenizer = Tokenizer(**config) - tokenizer.word_counts = word_counts - tokenizer.word_docs = word_docs - tokenizer.index_docs = index_docs - tokenizer.word_index = word_index - tokenizer.index_word = index_word - - return tokenizer -from math import ceil -import pytest -import numpy as np -from numpy.testing import assert_allclose -from numpy.testing import assert_equal -from numpy.testing import assert_raises - -from keras_preprocessing import sequence - - -def test_pad_sequences(): - a = [[1], [1, 2], [1, 2, 3]] - - # test padding - b = sequence.pad_sequences(a, maxlen=3, padding='pre') - assert_allclose(b, [[0, 0, 1], [0, 1, 2], [1, 2, 3]]) - b = sequence.pad_sequences(a, maxlen=3, padding='post') - assert_allclose(b, [[1, 0, 0], [1, 2, 0], [1, 2, 3]]) - - # test truncating - b = sequence.pad_sequences(a, maxlen=2, truncating='pre') - assert_allclose(b, [[0, 1], [1, 2], [2, 3]]) - b = sequence.pad_sequences(a, maxlen=2, truncating='post') - assert_allclose(b, [[0, 1], [1, 2], [1, 2]]) - - # test value - b = sequence.pad_sequences(a, maxlen=3, value=1) - assert_allclose(b, [[1, 1, 1], [1, 1, 2], [1, 2, 3]]) - - -def test_pad_sequences_str(): - a = [['1'], ['1', '2'], ['1', '2', '3']] - - # test padding - b = sequence.pad_sequences( - a, maxlen=3, padding='pre', value='pad', dtype=object) - assert_equal(b, [['pad', 'pad', '1'], ['pad', '1', '2'], ['1', '2', '3']]) - b = sequence.pad_sequences( - a, maxlen=3, padding='post', value='pad', dtype=' end_index=49` is disallowed' in error - - -def test_TimeSeriesGenerator_doesnt_miss_any_sample(): - x = np.array([[i] for i in range(10)]) - - for length in range(3, 10): - g = sequence.TimeseriesGenerator(x, x, - length=length, - batch_size=1) - expected = max(0, len(x) - length) - actual = len(g) - - assert expected == actual - - if len(g) > 0: - # All elements in range(length, 10) should be used as current step - expected = np.arange(length, 10).reshape(-1, 1) - - y = np.concatenate([g[ix][1] for ix in range(len(g))], axis=0) - assert_allclose(y, expected) - - x = np.array([[i] for i in range(23)]) - - strides = (1, 1, 5, 7, 3, 5, 3) - lengths = (3, 3, 4, 3, 1, 3, 7) - batch_sizes = (6, 6, 6, 5, 6, 6, 6) - shuffles = (False, True, True, False, False, False, False) - - for stride, length, batch_size, shuffle in zip(strides, - lengths, - batch_sizes, - shuffles): - g = sequence.TimeseriesGenerator(x, x, - length=length, - sampling_rate=1, - stride=stride, - start_index=0, - end_index=None, - shuffle=shuffle, - reverse=False, - batch_size=batch_size) - if shuffle: - # all batches have the same size when shuffle is True. - expected_sequences = ceil( - (23 - length) / float(batch_size * stride)) * batch_size - else: - # last batch will be different if `(samples - length) / stride` - # is not a multiple of `batch_size`. - expected_sequences = ceil((23 - length) / float(stride)) - - expected_batches = ceil(expected_sequences / float(batch_size)) - - y = [g[ix][1] for ix in range(len(g))] - - actual_sequences = sum(len(_y) for _y in y) - actual_batches = len(y) - - assert expected_sequences == actual_sequences - assert expected_batches == actual_batches - - -if __name__ == '__main__': - pytest.main([__file__]) -import pytest - -import keras_preprocessing - - -def test_api_modules(): - expected_exposed_modules = [ - 'image', - 'sequence', - 'text' - ] - for _module in expected_exposed_modules: - assert hasattr(keras_preprocessing, _module) - - -def test_get_keras_submodule(monkeypatch): - monkeypatch.setattr(keras_preprocessing, '_KERAS_BACKEND', 'backend') - assert 'backend' == keras_preprocessing.get_keras_submodule('backend') - monkeypatch.setattr(keras_preprocessing, '_KERAS_UTILS', 'utils') - assert 'utils' == keras_preprocessing.get_keras_submodule('utils') - - -def test_get_keras_submodule_errors(monkeypatch): - with pytest.raises(ImportError): - keras_preprocessing.get_keras_submodule('something') - - monkeypatch.setattr(keras_preprocessing, '_KERAS_BACKEND', None) - with pytest.raises(ImportError): - keras_preprocessing.get_keras_submodule('backend') - - with pytest.raises(ImportError): - keras_preprocessing.get_keras_submodule('utils') -import importlib -import inspect -import re -import sys -from itertools import compress - -import pytest - -modules = ['keras_preprocessing', - 'keras_preprocessing.image', - 'keras_preprocessing.sequence', - 'keras_preprocessing.text'] - -# Tokenizer is being refactored PR #106 -accepted_name = ['set_keras_submodules', 'get_keras_submodule', 'Tokenizer'] -accepted_module = [] - -# Functions or classes with less than 'MIN_CODE_SIZE' lines can be ignored -MIN_CODE_SIZE = 10 - - -def handle_class_init(name, member): - init_args = [ - arg for arg in list(inspect.signature(member.__init__).parameters.keys()) - if arg not in ['self', 'args', 'kwargs'] - ] - assert_args_presence(init_args, member.__doc__, member, name) - - -def handle_class(name, member): - if is_accepted(name, member): - return - - if member.__doc__ is None and not member_too_small(member): - raise ValueError("{} class doesn't have any documentation".format(name), - member.__module__, inspect.getmodule(member).__file__) - - handle_class_init(name, member) - - for n, met in inspect.getmembers(member): - if inspect.ismethod(met): - handle_method(n, met) - - -def handle_function(name, member): - if is_accepted(name, member) or member_too_small(member): - # We don't need to check this one. - return - doc = member.__doc__ - if doc is None: - raise ValueError("{} function doesn't have any documentation".format(name), - member.__module__, inspect.getmodule(member).__file__) - - args = list(inspect.signature(member).parameters.keys()) - assert_args_presence(args, doc, member, name) - assert_function_style(name, member, doc, args) - assert_doc_style(name, member, doc) - - -def assert_doc_style(name, member, doc): - lines = doc.split("\n") - first_line = lines[0] - if len(first_line.strip()) == 0: - raise ValueError( - "{} the documentation should be on the first line.".format(name), - member.__module__) - first_blank = [i for i, line in enumerate(lines) if not line.strip()] - if len(first_blank) > 0: - if lines[first_blank[0] - 1].strip()[-1] != '.': - raise ValueError("{} first line should end with a '.'".format(name), - member.__module__) - - -def assert_function_style(name, member, doc, args): - code = inspect.getsource(member) - has_return = re.findall(r"\s*return \S+", code, re.MULTILINE) - if has_return and "# Returns" not in doc: - innerfunction = [inspect.getsource(x) for x in member.__code__.co_consts if - inspect.iscode(x)] - return_in_sub = [ret for code_inner in innerfunction for ret in - re.findall(r"\s*return \S+", code_inner, re.MULTILINE)] - if len(return_in_sub) < len(has_return): - raise ValueError("{} needs a '# Returns' section".format(name), - member.__module__) - - has_raise = re.findall(r"^\s*raise \S+", code, re.MULTILINE) - if has_raise and "# Raises" not in doc: - innerfunction = [inspect.getsource(x) for x in member.__code__.co_consts if - inspect.iscode(x)] - raise_in_sub = [ret for code_inner in innerfunction for ret in - re.findall(r"\s*raise \S+", code_inner, re.MULTILINE)] - if len(raise_in_sub) < len(has_raise): - raise ValueError("{} needs a '# Raises' section".format(name), - member.__module__) - - if len(args) > 0 and "# Arguments" not in doc: - raise ValueError("{} needs a '# Arguments' section".format(name), - member.__module__) - - assert_blank_before(name, member, doc, [ - '# Arguments', '# Raises', '# Returns']) - - -def assert_blank_before(name, member, doc, keywords): - doc_lines = [x.strip() for x in doc.split('\n')] - for keyword in keywords: - if keyword in doc_lines: - index = doc_lines.index(keyword) - if doc_lines[index - 1] != '': - raise ValueError( - "{} '{}' should have a blank line above.".format( - name, keyword), - member.__module__) - - -def is_accepted(name, member): - if 'keras_preprocessing' not in str(member.__module__): - return True - return name in accepted_name or member.__module__ in accepted_module - - -def member_too_small(member): - code = inspect.getsource(member).split('\n') - return len(code) < MIN_CODE_SIZE - - -def assert_args_presence(args, doc, member, name): - args_not_in_doc = [arg not in doc for arg in args] - if any(args_not_in_doc): - raise ValueError( - "{} {} arguments are not present in documentation ".format(name, list( - compress(args, args_not_in_doc))), member.__module__, member) - words = doc.replace('*', '').split() - # Check arguments styling - styles = [arg + ":" not in words for arg in args] - if any(styles): - raise ValueError( - "{} {} are not style properly 'argument': documentation".format( - name, - list(compress(args, styles))), - member.__module__) - - # Check arguments order - indexes = [words.index(arg + ":") for arg in args] - if indexes != sorted(indexes): - raise ValueError( - "{} arguments order is different from the documentation".format( - name), - member.__module__, indexes) - - -def handle_method(name, member): - if name in accepted_name or member.__module__ in accepted_module: - return - handle_function(name, member) - - -def handle_module(mod): - for name, mem in inspect.getmembers(mod): - if inspect.isclass(mem): - handle_class(name, mem) - elif inspect.isfunction(mem): - handle_function(name, mem) - elif 'keras_preprocessing' in name and inspect.ismodule(mem): - # Only test keras_preprocessing' modules - handle_module(mem) - - -@pytest.mark.skipif(sys.version_info < (3, 3), reason="requires python3.3") -def test_doc(): - for module in modules: - mod = importlib.import_module(module) - handle_module(mod) - - -if __name__ == '__main__': - pytest.main([__file__]) -# -*- coding: utf-8 -*- -import numpy as np -import pytest - -import keras -from keras_preprocessing import text -from collections import OrderedDict - - -def test_one_hot(): - sample_text = 'The cat sat on the mat.' - encoded = text.one_hot(sample_text, 5) - assert len(encoded) == 6 - assert np.max(encoded) <= 4 - assert np.min(encoded) >= 0 - - -def test_hashing_trick_hash(): - sample_text = 'The cat sat on the mat.' - encoded = text.hashing_trick(sample_text, 5) - assert len(encoded) == 6 - assert np.max(encoded) <= 4 - assert np.min(encoded) >= 1 - - -def test_hashing_trick_md5(): - sample_text = 'The cat sat on the mat.' - encoded = text.hashing_trick(sample_text, 5, hash_function='md5') - assert len(encoded) == 6 - assert np.max(encoded) <= 4 - assert np.min(encoded) >= 1 - - -def test_tokenizer(): - sample_texts = ['The cat sat on the mat.', - 'The dog sat on the log.', - 'Dogs and cats living together.'] - tokenizer = text.Tokenizer(num_words=10) - tokenizer.fit_on_texts(sample_texts) - - sequences = [] - for seq in tokenizer.texts_to_sequences_generator(sample_texts): - sequences.append(seq) - assert np.max(np.max(sequences)) < 10 - assert np.min(np.min(sequences)) == 1 - - tokenizer.fit_on_sequences(sequences) - - for mode in ['binary', 'count', 'tfidf', 'freq']: - tokenizer.texts_to_matrix(sample_texts, mode) - - -def test_tokenizer_serde_no_fitting(): - tokenizer = text.Tokenizer(num_words=100) - - tokenizer_json = tokenizer.to_json() - recovered = text.tokenizer_from_json(tokenizer_json) - - assert tokenizer.get_config() == recovered.get_config() - - assert tokenizer.word_docs == recovered.word_docs - assert tokenizer.word_counts == recovered.word_counts - assert tokenizer.word_index == recovered.word_index - assert tokenizer.index_word == recovered.index_word - assert tokenizer.index_docs == recovered.index_docs - - -def test_tokenizer_serde_fitting(): - sample_texts = [ - 'There was a time that the pieces fit, but I watched them fall away', - 'Mildewed and smoldering, strangled by our coveting', - 'I\'ve done the math enough to know the dangers of our second guessing'] - tokenizer = text.Tokenizer(num_words=100) - tokenizer.fit_on_texts(sample_texts) - - seq_generator = tokenizer.texts_to_sequences_generator(sample_texts) - sequences = [seq for seq in seq_generator] - tokenizer.fit_on_sequences(sequences) - - tokenizer_json = tokenizer.to_json() - recovered = text.tokenizer_from_json(tokenizer_json) - - assert tokenizer.char_level == recovered.char_level - assert tokenizer.document_count == recovered.document_count - assert tokenizer.filters == recovered.filters - assert tokenizer.lower == recovered.lower - assert tokenizer.num_words == recovered.num_words - assert tokenizer.oov_token == recovered.oov_token - - assert tokenizer.word_docs == recovered.word_docs - assert tokenizer.word_counts == recovered.word_counts - assert tokenizer.word_index == recovered.word_index - assert tokenizer.index_word == recovered.index_word - assert tokenizer.index_docs == recovered.index_docs - - -def test_sequential_fit(): - texts = ['The cat sat on the mat.', - 'The dog sat on the log.', - 'Dogs and cats living together.'] - word_sequences = [ - ['The', 'cat', 'is', 'sitting'], - ['The', 'dog', 'is', 'standing'] - ] - - tokenizer = text.Tokenizer() - tokenizer.fit_on_texts(texts) - tokenizer.fit_on_texts(word_sequences) - - assert tokenizer.document_count == 5 - - tokenizer.texts_to_matrix(texts) - tokenizer.texts_to_matrix(word_sequences) - - -def test_text_to_word_sequence(): - sample_text = 'hello! ? world!' - assert text.text_to_word_sequence(sample_text) == ['hello', 'world'] - - -def test_text_to_word_sequence_multichar_split(): - sample_text = 'hello!stop?world!' - assert text.text_to_word_sequence( - sample_text, split='stop') == ['hello', 'world'] - - -def test_text_to_word_sequence_unicode(): - sample_text = u'ali! veli? kırk dokuz elli' - assert text.text_to_word_sequence( - sample_text) == [u'ali', u'veli', u'kırk', u'dokuz', u'elli'] - - -def test_text_to_word_sequence_unicode_multichar_split(): - sample_text = u'ali!stopveli?stopkırkstopdokuzstopelli' - assert text.text_to_word_sequence( - sample_text, split='stop') == [u'ali', u'veli', u'kırk', u'dokuz', u'elli'] - - -def test_tokenizer_unicode(): - sample_texts = [u'ali veli kırk dokuz elli', - u'ali veli kırk dokuz elli veli kırk dokuz'] - tokenizer = text.Tokenizer(num_words=5) - tokenizer.fit_on_texts(sample_texts) - - assert len(tokenizer.word_counts) == 5 - - -def test_tokenizer_oov_flag(): - """Test of Out of Vocabulary (OOV) flag in text.Tokenizer - """ - x_train = ['This text has only known words'] - x_test = ['This text has some unknown words'] # 2 OOVs: some, unknown - - # Default, without OOV flag - tokenizer = text.Tokenizer() - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - assert len(x_test_seq[0]) == 4 # discards 2 OOVs - - # With OOV feature - tokenizer = text.Tokenizer(oov_token='') - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - assert len(x_test_seq[0]) == 6 # OOVs marked in place - - -def test_tokenizer_oov_flag_and_num_words(): - x_train = ['This text has only known words this text'] - x_test = ['This text has some unknown words'] - - tokenizer = keras.preprocessing.text.Tokenizer(num_words=3, - oov_token='') - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - trans_text = ' '.join(tokenizer.index_word[t] for t in x_test_seq[0]) - assert len(x_test_seq[0]) == 6 - assert trans_text == 'this ' - - -def test_sequences_to_texts_with_num_words_and_oov_token(): - x_train = ['This text has only known words this text'] - x_test = ['This text has some unknown words'] - - tokenizer = keras.preprocessing.text.Tokenizer(num_words=3, - oov_token='') - - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - trans_text = tokenizer.sequences_to_texts(x_test_seq) - assert trans_text == ['this '] - - -def test_sequences_to_texts_no_num_words(): - x_train = ['This text has only known words this text'] - x_test = ['This text has some unknown words'] - - tokenizer = keras.preprocessing.text.Tokenizer(oov_token='') - - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - trans_text = tokenizer.sequences_to_texts(x_test_seq) - assert trans_text == ['this text has words'] - - -def test_sequences_to_texts_no_oov_token(): - x_train = ['This text has only known words this text'] - x_test = ['This text has some unknown words'] - - tokenizer = keras.preprocessing.text.Tokenizer(num_words=3) - - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - trans_text = tokenizer.sequences_to_texts(x_test_seq) - assert trans_text == ['this text'] - - -def test_sequences_to_texts_no_num_words_no_oov_token(): - x_train = ['This text has only known words this text'] - x_test = ['This text has some unknown words'] - - tokenizer = keras.preprocessing.text.Tokenizer() - - tokenizer.fit_on_texts(x_train) - x_test_seq = tokenizer.texts_to_sequences(x_test) - trans_text = tokenizer.sequences_to_texts(x_test_seq) - assert trans_text == ['this text has words'] - - -def test_sequences_to_texts(): - texts = [ - 'The cat sat on the mat.', - 'The dog sat on the log.', - 'Dogs and cats living together.' - ] - tokenizer = keras.preprocessing.text.Tokenizer(num_words=10, - oov_token='') - tokenizer.fit_on_texts(texts) - tokenized_text = tokenizer.texts_to_sequences(texts) - trans_text = tokenizer.sequences_to_texts(tokenized_text) - assert trans_text == ['the cat sat on the mat', - 'the dog sat on the log', - 'dogs '] - - -def test_tokenizer_lower_flag(): - """Tests for `lower` flag in text.Tokenizer - """ - # word level tokenizer with sentences as texts - word_tokenizer = text.Tokenizer(lower=True) - texts = ['The cat sat on the mat.', - 'The dog sat on the log.', - 'Dog and Cat living Together.'] - word_tokenizer.fit_on_texts(texts) - expected_word_counts = OrderedDict([('the', 4), ('cat', 2), ('sat', 2), - ('on', 2), ('mat', 1), ('dog', 2), - ('log', 1), ('and', 1), ('living', 1), - ('together', 1)]) - assert word_tokenizer.word_counts == expected_word_counts - - # word level tokenizer with word_sequences as texts - word_tokenizer = text.Tokenizer(lower=True) - word_sequences = [ - ['The', 'cat', 'is', 'sitting'], - ['The', 'dog', 'is', 'standing'] - ] - word_tokenizer.fit_on_texts(word_sequences) - expected_word_counts = OrderedDict([('the', 2), ('cat', 1), ('is', 2), - ('sitting', 1), ('dog', 1), - ('standing', 1)]) - assert word_tokenizer.word_counts == expected_word_counts - - # char level tokenizer with sentences as texts - char_tokenizer = text.Tokenizer(lower=True, char_level=True) - texts = ['The cat sat on the mat.', - 'The dog sat on the log.', - 'Dog and Cat living Together.'] - char_tokenizer.fit_on_texts(texts) - expected_word_counts = OrderedDict([('t', 11), ('h', 5), ('e', 6), (' ', 14), - ('c', 2), ('a', 6), ('s', 2), ('o', 6), - ('n', 4), ('m', 1), ('.', 3), ('d', 3), - ('g', 5), ('l', 2), ('i', 2), ('v', 1), - ('r', 1)]) - assert char_tokenizer.word_counts == expected_word_counts - - -if __name__ == '__main__': - pytest.main([__file__]) -"""Enables dynamic setting of underlying Keras module. -""" -from __future__ import absolute_import -# flake8: noqa:F401 -from .affine_transformations import * -from .dataframe_iterator import DataFrameIterator -from .directory_iterator import DirectoryIterator -from .image_data_generator import ImageDataGenerator -from .iterator import Iterator -from .numpy_array_iterator import NumpyArrayIterator -from .utils import * -"""Utilities for performing affine transformations on image data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np - -from .utils import (array_to_img, - img_to_array) - -try: - import scipy - # scipy.ndimage cannot be accessed until explicitly imported - from scipy import ndimage -except ImportError: - scipy = None - -try: - from PIL import ImageEnhance - from PIL import Image as pil_image -except ImportError: - pil_image = None - ImageEnhance = None - - -def flip_axis(x, axis): - x = np.asarray(x).swapaxes(axis, 0) - x = x[::-1, ...] - x = x.swapaxes(0, axis) - return x - - -def random_rotation(x, rg, row_axis=1, col_axis=2, channel_axis=0, - fill_mode='nearest', cval=0., interpolation_order=1): - """Performs a random rotation of a Numpy image tensor. - - # Arguments - x: Input tensor. Must be 3D. - rg: Rotation range, in degrees. - row_axis: Index of axis for rows in the input tensor. - col_axis: Index of axis for columns in the input tensor. - channel_axis: Index of axis for channels in the input tensor. - fill_mode: Points outside the boundaries of the input - are filled according to the given mode - (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). - cval: Value used for points outside the boundaries - of the input if `mode='constant'`. - interpolation_order: int, order of spline interpolation. - see `ndimage.interpolation.affine_transform` - - # Returns - Rotated Numpy image tensor. - """ - theta = np.random.uniform(-rg, rg) - x = apply_affine_transform(x, theta=theta, channel_axis=channel_axis, - fill_mode=fill_mode, cval=cval, - order=interpolation_order) - return x - - -def random_shift(x, wrg, hrg, row_axis=1, col_axis=2, channel_axis=0, - fill_mode='nearest', cval=0., interpolation_order=1): - """Performs a random spatial shift of a Numpy image tensor. - - # Arguments - x: Input tensor. Must be 3D. - wrg: Width shift range, as a float fraction of the width. - hrg: Height shift range, as a float fraction of the height. - row_axis: Index of axis for rows in the input tensor. - col_axis: Index of axis for columns in the input tensor. - channel_axis: Index of axis for channels in the input tensor. - fill_mode: Points outside the boundaries of the input - are filled according to the given mode - (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). - cval: Value used for points outside the boundaries - of the input if `mode='constant'`. - interpolation_order: int, order of spline interpolation. - see `ndimage.interpolation.affine_transform` - - # Returns - Shifted Numpy image tensor. - """ - h, w = x.shape[row_axis], x.shape[col_axis] - tx = np.random.uniform(-hrg, hrg) * h - ty = np.random.uniform(-wrg, wrg) * w - x = apply_affine_transform(x, tx=tx, ty=ty, channel_axis=channel_axis, - fill_mode=fill_mode, cval=cval, - order=interpolation_order) - return x - - -def random_shear(x, intensity, row_axis=1, col_axis=2, channel_axis=0, - fill_mode='nearest', cval=0., interpolation_order=1): - """Performs a random spatial shear of a Numpy image tensor. - - # Arguments - x: Input tensor. Must be 3D. - intensity: Transformation intensity in degrees. - row_axis: Index of axis for rows in the input tensor. - col_axis: Index of axis for columns in the input tensor. - channel_axis: Index of axis for channels in the input tensor. - fill_mode: Points outside the boundaries of the input - are filled according to the given mode - (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). - cval: Value used for points outside the boundaries - of the input if `mode='constant'`. - interpolation_order: int, order of spline interpolation. - see `ndimage.interpolation.affine_transform` - - # Returns - Sheared Numpy image tensor. - """ - shear = np.random.uniform(-intensity, intensity) - x = apply_affine_transform(x, shear=shear, channel_axis=channel_axis, - fill_mode=fill_mode, cval=cval, - order=interpolation_order) - return x - - -def random_zoom(x, zoom_range, row_axis=1, col_axis=2, channel_axis=0, - fill_mode='nearest', cval=0., interpolation_order=1): - """Performs a random spatial zoom of a Numpy image tensor. - - # Arguments - x: Input tensor. Must be 3D. - zoom_range: Tuple of floats; zoom range for width and height. - row_axis: Index of axis for rows in the input tensor. - col_axis: Index of axis for columns in the input tensor. - channel_axis: Index of axis for channels in the input tensor. - fill_mode: Points outside the boundaries of the input - are filled according to the given mode - (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). - cval: Value used for points outside the boundaries - of the input if `mode='constant'`. - interpolation_order: int, order of spline interpolation. - see `ndimage.interpolation.affine_transform` - - # Returns - Zoomed Numpy image tensor. - - # Raises - ValueError: if `zoom_range` isn't a tuple. - """ - if len(zoom_range) != 2: - raise ValueError('`zoom_range` should be a tuple or list of two' - ' floats. Received: %s' % (zoom_range,)) - - if zoom_range[0] == 1 and zoom_range[1] == 1: - zx, zy = 1, 1 - else: - zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) - x = apply_affine_transform(x, zx=zx, zy=zy, channel_axis=channel_axis, - fill_mode=fill_mode, cval=cval, - order=interpolation_order) - return x - - -def apply_channel_shift(x, intensity, channel_axis=0): - """Performs a channel shift. - - # Arguments - x: Input tensor. Must be 3D. - intensity: Transformation intensity. - channel_axis: Index of axis for channels in the input tensor. - - # Returns - Numpy image tensor. - - """ - x = np.rollaxis(x, channel_axis, 0) - min_x, max_x = np.min(x), np.max(x) - channel_images = [ - np.clip(x_channel + intensity, - min_x, - max_x) - for x_channel in x] - x = np.stack(channel_images, axis=0) - x = np.rollaxis(x, 0, channel_axis + 1) - return x - - -def random_channel_shift(x, intensity_range, channel_axis=0): - """Performs a random channel shift. - - # Arguments - x: Input tensor. Must be 3D. - intensity_range: Transformation intensity. - channel_axis: Index of axis for channels in the input tensor. - - # Returns - Numpy image tensor. - """ - intensity = np.random.uniform(-intensity_range, intensity_range) - return apply_channel_shift(x, intensity, channel_axis=channel_axis) - - -def apply_brightness_shift(x, brightness): - """Performs a brightness shift. - - # Arguments - x: Input tensor. Must be 3D. - brightness: Float. The new brightness value. - channel_axis: Index of axis for channels in the input tensor. - - # Returns - Numpy image tensor. - - # Raises - ValueError if `brightness_range` isn't a tuple. - """ - if ImageEnhance is None: - raise ImportError('Using brightness shifts requires PIL. ' - 'Install PIL or Pillow.') - x = array_to_img(x) - x = imgenhancer_Brightness = ImageEnhance.Brightness(x) - x = imgenhancer_Brightness.enhance(brightness) - x = img_to_array(x) - return x - - -def random_brightness(x, brightness_range): - """Performs a random brightness shift. - - # Arguments - x: Input tensor. Must be 3D. - brightness_range: Tuple of floats; brightness range. - channel_axis: Index of axis for channels in the input tensor. - - # Returns - Numpy image tensor. - - # Raises - ValueError if `brightness_range` isn't a tuple. - """ - if len(brightness_range) != 2: - raise ValueError( - '`brightness_range should be tuple or list of two floats. ' - 'Received: %s' % (brightness_range,)) - - u = np.random.uniform(brightness_range[0], brightness_range[1]) - return apply_brightness_shift(x, u) - - -def transform_matrix_offset_center(matrix, x, y): - o_x = float(x) / 2 + 0.5 - o_y = float(y) / 2 + 0.5 - offset_matrix = np.array([[1, 0, o_x], [0, 1, o_y], [0, 0, 1]]) - reset_matrix = np.array([[1, 0, -o_x], [0, 1, -o_y], [0, 0, 1]]) - transform_matrix = np.dot(np.dot(offset_matrix, matrix), reset_matrix) - return transform_matrix - - -def apply_affine_transform(x, theta=0, tx=0, ty=0, shear=0, zx=1, zy=1, - row_axis=0, col_axis=1, channel_axis=2, - fill_mode='nearest', cval=0., order=1): - """Applies an affine transformation specified by the parameters given. - - # Arguments - x: 2D numpy array, single image. - theta: Rotation angle in degrees. - tx: Width shift. - ty: Heigh shift. - shear: Shear angle in degrees. - zx: Zoom in x direction. - zy: Zoom in y direction - row_axis: Index of axis for rows in the input image. - col_axis: Index of axis for columns in the input image. - channel_axis: Index of axis for channels in the input image. - fill_mode: Points outside the boundaries of the input - are filled according to the given mode - (one of `{'constant', 'nearest', 'reflect', 'wrap'}`). - cval: Value used for points outside the boundaries - of the input if `mode='constant'`. - order: int, order of interpolation - - # Returns - The transformed version of the input. - """ - if scipy is None: - raise ImportError('Image transformations require SciPy. ' - 'Install SciPy.') - transform_matrix = None - if theta != 0: - theta = np.deg2rad(theta) - rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], - [np.sin(theta), np.cos(theta), 0], - [0, 0, 1]]) - transform_matrix = rotation_matrix - - if tx != 0 or ty != 0: - shift_matrix = np.array([[1, 0, tx], - [0, 1, ty], - [0, 0, 1]]) - if transform_matrix is None: - transform_matrix = shift_matrix - else: - transform_matrix = np.dot(transform_matrix, shift_matrix) - - if shear != 0: - shear = np.deg2rad(shear) - shear_matrix = np.array([[1, -np.sin(shear), 0], - [0, np.cos(shear), 0], - [0, 0, 1]]) - if transform_matrix is None: - transform_matrix = shear_matrix - else: - transform_matrix = np.dot(transform_matrix, shear_matrix) - - if zx != 1 or zy != 1: - zoom_matrix = np.array([[zx, 0, 0], - [0, zy, 0], - [0, 0, 1]]) - if transform_matrix is None: - transform_matrix = zoom_matrix - else: - transform_matrix = np.dot(transform_matrix, zoom_matrix) - - if transform_matrix is not None: - h, w = x.shape[row_axis], x.shape[col_axis] - transform_matrix = transform_matrix_offset_center( - transform_matrix, h, w) - x = np.rollaxis(x, channel_axis, 0) - final_affine_matrix = transform_matrix[:2, :2] - final_offset = transform_matrix[:2, 2] - - channel_images = [ndimage.interpolation.affine_transform( - x_channel, - final_affine_matrix, - final_offset, - order=order, - mode=fill_mode, - cval=cval) for x_channel in x] - x = np.stack(channel_images, axis=0) - x = np.rollaxis(x, 0, channel_axis + 1) - return x -"""Utilities for real-time data augmentation on image data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import warnings - -import numpy as np - -from .iterator import BatchFromFilesMixin, Iterator -from .utils import validate_filename - - -class DataFrameIterator(BatchFromFilesMixin, Iterator): - """Iterator capable of reading images from a directory on disk - through a dataframe. - - # Arguments - dataframe: Pandas dataframe containing the filepaths relative to - `directory` (or absolute paths if `directory` is None) of the - images in a string column. It should include other column/s - depending on the `class_mode`: - - if `class_mode` is `"categorical"` (default value) it must - include the `y_col` column with the class/es of each image. - Values in column can be string/list/tuple if a single class - or list/tuple if multiple classes. - - if `class_mode` is `"binary"` or `"sparse"` it must include - the given `y_col` column with class values as strings. - - if `class_mode` is `"raw"` or `"multi_output"` it should contain - the columns specified in `y_col`. - - if `class_mode` is `"input"` or `None` no extra column is needed. - directory: string, path to the directory to read images from. If `None`, - data in `x_col` column should be absolute paths. - image_data_generator: Instance of `ImageDataGenerator` to use for - random transformations and normalization. If None, no transformations - and normalizations are made. - x_col: string, column in `dataframe` that contains the filenames (or - absolute paths if `directory` is `None`). - y_col: string or list, column/s in `dataframe` that has the target data. - weight_col: string, column in `dataframe` that contains the sample - weights. Default: `None`. - target_size: tuple of integers, dimensions to resize input images to. - color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. - Color mode to read images. - classes: Optional list of strings, classes to use (e.g. `["dogs", "cats"]`). - If None, all classes in `y_col` will be used. - class_mode: one of "binary", "categorical", "input", "multi_output", - "raw", "sparse" or None. Default: "categorical". - Mode for yielding the targets: - - `"binary"`: 1D numpy array of binary labels, - - `"categorical"`: 2D numpy array of one-hot encoded labels. - Supports multi-label output. - - `"input"`: images identical to input images (mainly used to - work with autoencoders), - - `"multi_output"`: list with the values of the different columns, - - `"raw"`: numpy array of values in `y_col` column(s), - - `"sparse"`: 1D numpy array of integer labels, - - `None`, no targets are returned (the generator will only yield - batches of image data, which is useful to use in - `model.predict_generator()`). - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - seed: Random seed for data shuffling. - data_format: String, one of `channels_first`, `channels_last`. - save_to_dir: Optional directory where to save the pictures - being yielded, in a viewable format. This is useful - for visualizing the random transformations being - applied, for debugging purposes. - save_prefix: String prefix to use for saving sample - images (if `save_to_dir` is set). - save_format: Format to use for saving sample images - (if `save_to_dir` is set). - subset: Subset of data (`"training"` or `"validation"`) if - validation_split is set in ImageDataGenerator. - interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. - Supported methods are "nearest", "bilinear", and "bicubic". - If PIL version 1.1.3 or newer is installed, "lanczos" is also - supported. If PIL version 3.4.0 or newer is installed, "box" and - "hamming" are also supported. By default, "nearest" is used. - dtype: Dtype to use for the generated arrays. - validate_filenames: Boolean, whether to validate image filenames in - `x_col`. If `True`, invalid images will be ignored. Disabling this option - can lead to speed-up in the instantiation of this class. Default: `True`. - """ - allowed_class_modes = { - 'binary', 'categorical', 'input', 'multi_output', 'raw', 'sparse', None - } - - def __init__(self, - dataframe, - directory=None, - image_data_generator=None, - x_col="filename", - y_col="class", - weight_col=None, - target_size=(256, 256), - color_mode='rgb', - classes=None, - class_mode='categorical', - batch_size=32, - shuffle=True, - seed=None, - data_format='channels_last', - save_to_dir=None, - save_prefix='', - save_format='png', - subset=None, - interpolation='nearest', - dtype='float32', - validate_filenames=True): - - super(DataFrameIterator, self).set_processing_attrs(image_data_generator, - target_size, - color_mode, - data_format, - save_to_dir, - save_prefix, - save_format, - subset, - interpolation) - df = dataframe.copy() - self.directory = directory or '' - self.class_mode = class_mode - self.dtype = dtype - # check that inputs match the required class_mode - self._check_params(df, x_col, y_col, weight_col, classes) - if validate_filenames: # check which image files are valid and keep them - df = self._filter_valid_filepaths(df, x_col) - if class_mode not in ["input", "multi_output", "raw", None]: - df, classes = self._filter_classes(df, y_col, classes) - num_classes = len(classes) - # build an index of all the unique classes - self.class_indices = dict(zip(classes, range(len(classes)))) - # retrieve only training or validation set - if self.split: - num_files = len(df) - start = int(self.split[0] * num_files) - stop = int(self.split[1] * num_files) - df = df.iloc[start: stop, :] - # get labels for each observation - if class_mode not in ["input", "multi_output", "raw", None]: - self.classes = self.get_classes(df, y_col) - self.filenames = df[x_col].tolist() - self._sample_weight = df[weight_col].values if weight_col else None - - if class_mode == "multi_output": - self._targets = [np.array(df[col].tolist()) for col in y_col] - if class_mode == "raw": - self._targets = df[y_col].values - self.samples = len(self.filenames) - validated_string = 'validated' if validate_filenames else 'non-validated' - if class_mode in ["input", "multi_output", "raw", None]: - print('Found {} {} image filenames.' - .format(self.samples, validated_string)) - else: - print('Found {} {} image filenames belonging to {} classes.' - .format(self.samples, validated_string, num_classes)) - self._filepaths = [ - os.path.join(self.directory, fname) for fname in self.filenames - ] - super(DataFrameIterator, self).__init__(self.samples, - batch_size, - shuffle, - seed) - - def _check_params(self, df, x_col, y_col, weight_col, classes): - # check class mode is one of the currently supported - if self.class_mode not in self.allowed_class_modes: - raise ValueError('Invalid class_mode: {}; expected one of: {}' - .format(self.class_mode, self.allowed_class_modes)) - # check that y_col has several column names if class_mode is multi_output - if (self.class_mode == 'multi_output') and not isinstance(y_col, list): - raise TypeError( - 'If class_mode="{}", y_col must be a list. Received {}.' - .format(self.class_mode, type(y_col).__name__) - ) - # check that filenames/filepaths column values are all strings - if not all(df[x_col].apply(lambda x: isinstance(x, str))): - raise TypeError('All values in column x_col={} must be strings.' - .format(x_col)) - # check labels are string if class_mode is binary or sparse - if self.class_mode in {'binary', 'sparse'}: - if not all(df[y_col].apply(lambda x: isinstance(x, str))): - raise TypeError('If class_mode="{}", y_col="{}" column ' - 'values must be strings.' - .format(self.class_mode, y_col)) - # check that if binary there are only 2 different classes - if self.class_mode == 'binary': - if classes: - classes = set(classes) - if len(classes) != 2: - raise ValueError('If class_mode="binary" there must be 2 ' - 'classes. {} class/es were given.' - .format(len(classes))) - elif df[y_col].nunique() != 2: - raise ValueError('If class_mode="binary" there must be 2 classes. ' - 'Found {} classes.'.format(df[y_col].nunique())) - # check values are string, list or tuple if class_mode is categorical - if self.class_mode == 'categorical': - types = (str, list, tuple) - if not all(df[y_col].apply(lambda x: isinstance(x, types))): - raise TypeError('If class_mode="{}", y_col="{}" column ' - 'values must be type string, list or tuple.' - .format(self.class_mode, y_col)) - # raise warning if classes are given but will be unused - if classes and self.class_mode in {"input", "multi_output", "raw", None}: - warnings.warn('`classes` will be ignored given the class_mode="{}"' - .format(self.class_mode)) - # check that if weight column that the values are numerical - if weight_col and not issubclass(df[weight_col].dtype.type, np.number): - raise TypeError('Column weight_col={} must be numeric.' - .format(weight_col)) - - def get_classes(self, df, y_col): - labels = [] - for label in df[y_col]: - if isinstance(label, (list, tuple)): - labels.append([self.class_indices[lbl] for lbl in label]) - else: - labels.append(self.class_indices[label]) - return labels - - @staticmethod - def _filter_classes(df, y_col, classes): - df = df.copy() - - def remove_classes(labels, classes): - if isinstance(labels, (list, tuple)): - labels = [cls for cls in labels if cls in classes] - return labels or None - elif isinstance(labels, str): - return labels if labels in classes else None - else: - raise TypeError( - "Expect string, list or tuple but found {} in {} column " - .format(type(labels), y_col) - ) - - if classes: - classes = set(classes) # sort and prepare for membership lookup - df[y_col] = df[y_col].apply(lambda x: remove_classes(x, classes)) - else: - classes = set() - for v in df[y_col]: - if isinstance(v, (list, tuple)): - classes.update(v) - else: - classes.add(v) - return df.dropna(subset=[y_col]), sorted(classes) - - def _filter_valid_filepaths(self, df, x_col): - """Keep only dataframe rows with valid filenames - - # Arguments - df: Pandas dataframe containing filenames in a column - x_col: string, column in `df` that contains the filenames or filepaths - - # Returns - absolute paths to image files - """ - filepaths = df[x_col].map( - lambda fname: os.path.join(self.directory, fname) - ) - mask = filepaths.apply( - validate_filename, args=(self.white_list_formats,)) - n_invalid = (~mask).sum() - if n_invalid: - warnings.warn( - 'Found {} invalid image filename(s) in x_col="{}". ' - 'These filename(s) will be ignored.' - .format(n_invalid, x_col) - ) - return df[mask] - - @property - def filepaths(self): - return self._filepaths - - @property - def labels(self): - if self.class_mode in {"multi_output", "raw"}: - return self._targets - else: - return self.classes - - @property - def sample_weight(self): - return self._sample_weight -"""Utilities for real-time data augmentation on image data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import multiprocessing.pool -from six.moves import range - -import numpy as np - -from .iterator import BatchFromFilesMixin, Iterator -from .utils import _list_valid_filenames_in_directory - - -class DirectoryIterator(BatchFromFilesMixin, Iterator): - """Iterator capable of reading images from a directory on disk. - - # Arguments - directory: string, path to the directory to read images from. - Each subdirectory in this directory will be - considered to contain images from one class, - or alternatively you could specify class subdirectories - via the `classes` argument. - image_data_generator: Instance of `ImageDataGenerator` - to use for random transformations and normalization. - target_size: tuple of integers, dimensions to resize input images to. - color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. - Color mode to read images. - classes: Optional list of strings, names of subdirectories - containing images from each class (e.g. `["dogs", "cats"]`). - It will be computed automatically if not set. - class_mode: Mode for yielding the targets: - `"binary"`: binary targets (if there are only two classes), - `"categorical"`: categorical targets, - `"sparse"`: integer targets, - `"input"`: targets are images identical to input images (mainly - used to work with autoencoders), - `None`: no targets get yielded (only input images are yielded). - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - If set to False, sorts the data in alphanumeric order. - seed: Random seed for data shuffling. - data_format: String, one of `channels_first`, `channels_last`. - save_to_dir: Optional directory where to save the pictures - being yielded, in a viewable format. This is useful - for visualizing the random transformations being - applied, for debugging purposes. - save_prefix: String prefix to use for saving sample - images (if `save_to_dir` is set). - save_format: Format to use for saving sample images - (if `save_to_dir` is set). - follow_links: boolean,follow symbolic links to subdirectories - subset: Subset of data (`"training"` or `"validation"`) if - validation_split is set in ImageDataGenerator. - interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. - Supported methods are "nearest", "bilinear", and "bicubic". - If PIL version 1.1.3 or newer is installed, "lanczos" is also - supported. If PIL version 3.4.0 or newer is installed, "box" and - "hamming" are also supported. By default, "nearest" is used. - dtype: Dtype to use for generated arrays. - """ - allowed_class_modes = {'categorical', 'binary', 'sparse', 'input', None} - - def __init__(self, - directory, - image_data_generator, - target_size=(256, 256), - color_mode='rgb', - classes=None, - class_mode='categorical', - batch_size=32, - shuffle=True, - seed=None, - data_format='channels_last', - save_to_dir=None, - save_prefix='', - save_format='png', - follow_links=False, - subset=None, - interpolation='nearest', - dtype='float32'): - super(DirectoryIterator, self).set_processing_attrs(image_data_generator, - target_size, - color_mode, - data_format, - save_to_dir, - save_prefix, - save_format, - subset, - interpolation) - self.directory = directory - self.classes = classes - if class_mode not in self.allowed_class_modes: - raise ValueError('Invalid class_mode: {}; expected one of: {}' - .format(class_mode, self.allowed_class_modes)) - self.class_mode = class_mode - self.dtype = dtype - # First, count the number of samples and classes. - self.samples = 0 - - if not classes: - classes = [] - for subdir in sorted(os.listdir(directory)): - if os.path.isdir(os.path.join(directory, subdir)): - classes.append(subdir) - self.num_classes = len(classes) - self.class_indices = dict(zip(classes, range(len(classes)))) - - pool = multiprocessing.pool.ThreadPool() - - # Second, build an index of the images - # in the different class subfolders. - results = [] - self.filenames = [] - i = 0 - for dirpath in (os.path.join(directory, subdir) for subdir in classes): - results.append( - pool.apply_async(_list_valid_filenames_in_directory, - (dirpath, self.white_list_formats, self.split, - self.class_indices, follow_links))) - classes_list = [] - for res in results: - classes, filenames = res.get() - classes_list.append(classes) - self.filenames += filenames - self.samples = len(self.filenames) - self.classes = np.zeros((self.samples,), dtype='int32') - for classes in classes_list: - self.classes[i:i + len(classes)] = classes - i += len(classes) - - print('Found %d images belonging to %d classes.' % - (self.samples, self.num_classes)) - pool.close() - pool.join() - self._filepaths = [ - os.path.join(self.directory, fname) for fname in self.filenames - ] - super(DirectoryIterator, self).__init__(self.samples, - batch_size, - shuffle, - seed) - - @property - def filepaths(self): - return self._filepaths - - @property - def labels(self): - return self.classes - - @property # mixin needs this property to work - def sample_weight(self): - # no sample weights will be returned - return None -"""Utilities for real-time data augmentation on image data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import warnings -from six.moves import range - -import numpy as np - -try: - import scipy - # scipy.linalg cannot be accessed until explicitly imported - from scipy import linalg - # scipy.ndimage cannot be accessed until explicitly imported -except ImportError: - scipy = None - -from .dataframe_iterator import DataFrameIterator -from .directory_iterator import DirectoryIterator -from .numpy_array_iterator import NumpyArrayIterator -from .affine_transformations import (apply_affine_transform, - apply_brightness_shift, - apply_channel_shift, - flip_axis) - - -class ImageDataGenerator(object): - """Generate batches of tensor image data with real-time data augmentation. - The data will be looped over (in batches). - - # Arguments - featurewise_center: Boolean. - Set input mean to 0 over the dataset, feature-wise. - samplewise_center: Boolean. Set each sample mean to 0. - featurewise_std_normalization: Boolean. - Divide inputs by std of the dataset, feature-wise. - samplewise_std_normalization: Boolean. Divide each input by its std. - zca_whitening: Boolean. Apply ZCA whitening. - zca_epsilon: epsilon for ZCA whitening. Default is 1e-6. - rotation_range: Int. Degree range for random rotations. - width_shift_range: Float, 1-D array-like or int - - float: fraction of total width, if < 1, or pixels if >= 1. - - 1-D array-like: random elements from the array. - - int: integer number of pixels from interval - `(-width_shift_range, +width_shift_range)` - - With `width_shift_range=2` possible values - are integers `[-1, 0, +1]`, - same as with `width_shift_range=[-1, 0, +1]`, - while with `width_shift_range=1.0` possible values are floats - in the interval [-1.0, +1.0). - height_shift_range: Float, 1-D array-like or int - - float: fraction of total height, if < 1, or pixels if >= 1. - - 1-D array-like: random elements from the array. - - int: integer number of pixels from interval - `(-height_shift_range, +height_shift_range)` - - With `height_shift_range=2` possible values - are integers `[-1, 0, +1]`, - same as with `height_shift_range=[-1, 0, +1]`, - while with `height_shift_range=1.0` possible values are floats - in the interval [-1.0, +1.0). - brightness_range: Tuple or list of two floats. Range for picking - a brightness shift value from. - shear_range: Float. Shear Intensity - (Shear angle in counter-clockwise direction in degrees) - zoom_range: Float or [lower, upper]. Range for random zoom. - If a float, `[lower, upper] = [1-zoom_range, 1+zoom_range]`. - channel_shift_range: Float. Range for random channel shifts. - fill_mode: One of {"constant", "nearest", "reflect" or "wrap"}. - Default is 'nearest'. - Points outside the boundaries of the input are filled - according to the given mode: - - 'constant': kkkkkkkk|abcd|kkkkkkkk (cval=k) - - 'nearest': aaaaaaaa|abcd|dddddddd - - 'reflect': abcddcba|abcd|dcbaabcd - - 'wrap': abcdabcd|abcd|abcdabcd - cval: Float or Int. - Value used for points outside the boundaries - when `fill_mode = "constant"`. - horizontal_flip: Boolean. Randomly flip inputs horizontally. - vertical_flip: Boolean. Randomly flip inputs vertically. - rescale: rescaling factor. Defaults to None. - If None or 0, no rescaling is applied, - otherwise we multiply the data by the value provided - (after applying all other transformations). - preprocessing_function: function that will be applied on each input. - The function will run after the image is resized and augmented. - The function should take one argument: - one image (Numpy tensor with rank 3), - and should output a Numpy tensor with the same shape. - data_format: Image data format, - either "channels_first" or "channels_last". - "channels_last" mode means that the images should have shape - `(samples, height, width, channels)`, - "channels_first" mode means that the images should have shape - `(samples, channels, height, width)`. - It defaults to the `image_data_format` value found in your - Keras config file at `~/.keras/keras.json`. - If you never set it, then it will be "channels_last". - validation_split: Float. Fraction of images reserved for validation - (strictly between 0 and 1). - interpolation_order: int, order to use for - the spline interpolation. Higher is slower. - dtype: Dtype to use for the generated arrays. - - # Examples - Example of using `.flow(x, y)`: - - ```python - (x_train, y_train), (x_test, y_test) = cifar10.load_data() - y_train = np_utils.to_categorical(y_train, num_classes) - y_test = np_utils.to_categorical(y_test, num_classes) - - datagen = ImageDataGenerator( - featurewise_center=True, - featurewise_std_normalization=True, - rotation_range=20, - width_shift_range=0.2, - height_shift_range=0.2, - horizontal_flip=True) - - # compute quantities required for featurewise normalization - # (std, mean, and principal components if ZCA whitening is applied) - datagen.fit(x_train) - - # fits the model on batches with real-time data augmentation: - model.fit_generator(datagen.flow(x_train, y_train, batch_size=32), - steps_per_epoch=len(x_train) / 32, epochs=epochs) - - # here's a more "manual" example - for e in range(epochs): - print('Epoch', e) - batches = 0 - for x_batch, y_batch in datagen.flow(x_train, y_train, batch_size=32): - model.fit(x_batch, y_batch) - batches += 1 - if batches >= len(x_train) / 32: - # we need to break the loop by hand because - # the generator loops indefinitely - break - ``` - Example of using `.flow_from_directory(directory)`: - - ```python - train_datagen = ImageDataGenerator( - rescale=1./255, - shear_range=0.2, - zoom_range=0.2, - horizontal_flip=True) - - test_datagen = ImageDataGenerator(rescale=1./255) - - train_generator = train_datagen.flow_from_directory( - 'data/train', - target_size=(150, 150), - batch_size=32, - class_mode='binary') - - validation_generator = test_datagen.flow_from_directory( - 'data/validation', - target_size=(150, 150), - batch_size=32, - class_mode='binary') - - model.fit_generator( - train_generator, - steps_per_epoch=2000, - epochs=50, - validation_data=validation_generator, - validation_steps=800) - ``` - - Example of transforming images and masks together. - - ```python - # we create two instances with the same arguments - data_gen_args = dict(featurewise_center=True, - featurewise_std_normalization=True, - rotation_range=90, - width_shift_range=0.1, - height_shift_range=0.1, - zoom_range=0.2) - image_datagen = ImageDataGenerator(**data_gen_args) - mask_datagen = ImageDataGenerator(**data_gen_args) - - # Provide the same seed and keyword arguments to the fit and flow methods - seed = 1 - image_datagen.fit(images, augment=True, seed=seed) - mask_datagen.fit(masks, augment=True, seed=seed) - - image_generator = image_datagen.flow_from_directory( - 'data/images', - class_mode=None, - seed=seed) - - mask_generator = mask_datagen.flow_from_directory( - 'data/masks', - class_mode=None, - seed=seed) - - # combine generators into one which yields image and masks - train_generator = zip(image_generator, mask_generator) - - model.fit_generator( - train_generator, - steps_per_epoch=2000, - epochs=50) - ``` - - Example of using ```.flow_from_dataframe(dataframe, directory, - x_col, y_col)```: - - ```python - - train_df = pandas.read_csv("./train.csv") - valid_df = pandas.read_csv("./valid.csv") - - train_datagen = ImageDataGenerator( - rescale=1./255, - shear_range=0.2, - zoom_range=0.2, - horizontal_flip=True) - - test_datagen = ImageDataGenerator(rescale=1./255) - - train_generator = train_datagen.flow_from_dataframe( - dataframe=train_df, - directory='data/train', - x_col="filename", - y_col="class", - target_size=(150, 150), - batch_size=32, - class_mode='binary') - - validation_generator = test_datagen.flow_from_dataframe( - dataframe=valid_df, - directory='data/validation', - x_col="filename", - y_col="class", - target_size=(150, 150), - batch_size=32, - class_mode='binary') - - model.fit_generator( - train_generator, - steps_per_epoch=2000, - epochs=50, - validation_data=validation_generator, - validation_steps=800) - ``` - """ - - def __init__(self, - featurewise_center=False, - samplewise_center=False, - featurewise_std_normalization=False, - samplewise_std_normalization=False, - zca_whitening=False, - zca_epsilon=1e-6, - rotation_range=0, - width_shift_range=0., - height_shift_range=0., - brightness_range=None, - shear_range=0., - zoom_range=0., - channel_shift_range=0., - fill_mode='nearest', - cval=0., - horizontal_flip=False, - vertical_flip=False, - rescale=None, - preprocessing_function=None, - data_format='channels_last', - validation_split=0.0, - interpolation_order=1, - dtype='float32'): - - self.featurewise_center = featurewise_center - self.samplewise_center = samplewise_center - self.featurewise_std_normalization = featurewise_std_normalization - self.samplewise_std_normalization = samplewise_std_normalization - self.zca_whitening = zca_whitening - self.zca_epsilon = zca_epsilon - self.rotation_range = rotation_range - self.width_shift_range = width_shift_range - self.height_shift_range = height_shift_range - self.shear_range = shear_range - self.zoom_range = zoom_range - self.channel_shift_range = channel_shift_range - self.fill_mode = fill_mode - self.cval = cval - self.horizontal_flip = horizontal_flip - self.vertical_flip = vertical_flip - self.rescale = rescale - self.preprocessing_function = preprocessing_function - self.dtype = dtype - self.interpolation_order = interpolation_order - - if data_format not in {'channels_last', 'channels_first'}: - raise ValueError( - '`data_format` should be `"channels_last"` ' - '(channel after row and column) or ' - '`"channels_first"` (channel before row and column). ' - 'Received: %s' % data_format) - self.data_format = data_format - if data_format == 'channels_first': - self.channel_axis = 1 - self.row_axis = 2 - self.col_axis = 3 - if data_format == 'channels_last': - self.channel_axis = 3 - self.row_axis = 1 - self.col_axis = 2 - if validation_split and not 0 < validation_split < 1: - raise ValueError( - '`validation_split` must be strictly between 0 and 1. ' - ' Received: %s' % validation_split) - self._validation_split = validation_split - - self.mean = None - self.std = None - self.principal_components = None - - if np.isscalar(zoom_range): - self.zoom_range = [1 - zoom_range, 1 + zoom_range] - elif len(zoom_range) == 2: - self.zoom_range = [zoom_range[0], zoom_range[1]] - else: - raise ValueError('`zoom_range` should be a float or ' - 'a tuple or list of two floats. ' - 'Received: %s' % (zoom_range,)) - if zca_whitening: - if not featurewise_center: - self.featurewise_center = True - warnings.warn('This ImageDataGenerator specifies ' - '`zca_whitening`, which overrides ' - 'setting of `featurewise_center`.') - if featurewise_std_normalization: - self.featurewise_std_normalization = False - warnings.warn('This ImageDataGenerator specifies ' - '`zca_whitening` ' - 'which overrides setting of' - '`featurewise_std_normalization`.') - if featurewise_std_normalization: - if not featurewise_center: - self.featurewise_center = True - warnings.warn('This ImageDataGenerator specifies ' - '`featurewise_std_normalization`, ' - 'which overrides setting of ' - '`featurewise_center`.') - if samplewise_std_normalization: - if not samplewise_center: - self.samplewise_center = True - warnings.warn('This ImageDataGenerator specifies ' - '`samplewise_std_normalization`, ' - 'which overrides setting of ' - '`samplewise_center`.') - if brightness_range is not None: - if (not isinstance(brightness_range, (tuple, list)) or - len(brightness_range) != 2): - raise ValueError( - '`brightness_range should be tuple or list of two floats. ' - 'Received: %s' % (brightness_range,)) - self.brightness_range = brightness_range - - def flow(self, - x, - y=None, - batch_size=32, - shuffle=True, - sample_weight=None, - seed=None, - save_to_dir=None, - save_prefix='', - save_format='png', - subset=None): - """Takes data & label arrays, generates batches of augmented data. - - # Arguments - x: Input data. Numpy array of rank 4 or a tuple. - If tuple, the first element - should contain the images and the second element - another numpy array or a list of numpy arrays - that gets passed to the output - without any modifications. - Can be used to feed the model miscellaneous data - along with the images. - In case of grayscale data, the channels axis of the image array - should have value 1, in case - of RGB data, it should have value 3, and in case - of RGBA data, it should have value 4. - y: Labels. - batch_size: Int (default: 32). - shuffle: Boolean (default: True). - sample_weight: Sample weights. - seed: Int (default: None). - save_to_dir: None or str (default: None). - This allows you to optionally specify a directory - to which to save the augmented pictures being generated - (useful for visualizing what you are doing). - save_prefix: Str (default: `''`). - Prefix to use for filenames of saved pictures - (only relevant if `save_to_dir` is set). - save_format: one of "png", "jpeg" - (only relevant if `save_to_dir` is set). Default: "png". - subset: Subset of data (`"training"` or `"validation"`) if - `validation_split` is set in `ImageDataGenerator`. - - # Returns - An `Iterator` yielding tuples of `(x, y)` - where `x` is a numpy array of image data - (in the case of a single image input) or a list - of numpy arrays (in the case with - additional inputs) and `y` is a numpy array - of corresponding labels. If 'sample_weight' is not None, - the yielded tuples are of the form `(x, y, sample_weight)`. - If `y` is None, only the numpy array `x` is returned. - """ - return NumpyArrayIterator( - x, - y, - self, - batch_size=batch_size, - shuffle=shuffle, - sample_weight=sample_weight, - seed=seed, - data_format=self.data_format, - save_to_dir=save_to_dir, - save_prefix=save_prefix, - save_format=save_format, - subset=subset - ) - - def flow_from_directory(self, - directory, - target_size=(256, 256), - color_mode='rgb', - classes=None, - class_mode='categorical', - batch_size=32, - shuffle=True, - seed=None, - save_to_dir=None, - save_prefix='', - save_format='png', - follow_links=False, - subset=None, - interpolation='nearest'): - """Takes the path to a directory & generates batches of augmented data. - - # Arguments - directory: string, path to the target directory. - It should contain one subdirectory per class. - Any PNG, JPG, BMP, PPM or TIF images - inside each of the subdirectories directory tree - will be included in the generator. - See [this script]( - https://gist.github.com/fchollet/0830affa1f7f19fd47b06d4cf89ed44d) - for more details. - target_size: Tuple of integers `(height, width)`, - default: `(256, 256)`. - The dimensions to which all images found will be resized. - color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb". - Whether the images will be converted to - have 1, 3, or 4 channels. - classes: Optional list of class subdirectories - (e.g. `['dogs', 'cats']`). Default: None. - If not provided, the list of classes will be automatically - inferred from the subdirectory names/structure - under `directory`, where each subdirectory will - be treated as a different class - (and the order of the classes, which will map to the label - indices, will be alphanumeric). - The dictionary containing the mapping from class names to class - indices can be obtained via the attribute `class_indices`. - class_mode: One of "categorical", "binary", "sparse", - "input", or None. Default: "categorical". - Determines the type of label arrays that are returned: - - "categorical" will be 2D one-hot encoded labels, - - "binary" will be 1D binary labels, - "sparse" will be 1D integer labels, - - "input" will be images identical - to input images (mainly used to work with autoencoders). - - If None, no labels are returned - (the generator will only yield batches of image data, - which is useful to use with `model.predict_generator()`). - Please note that in case of class_mode None, - the data still needs to reside in a subdirectory - of `directory` for it to work correctly. - batch_size: Size of the batches of data (default: 32). - shuffle: Whether to shuffle the data (default: True) - If set to False, sorts the data in alphanumeric order. - seed: Optional random seed for shuffling and transformations. - save_to_dir: None or str (default: None). - This allows you to optionally specify - a directory to which to save - the augmented pictures being generated - (useful for visualizing what you are doing). - save_prefix: Str. Prefix to use for filenames of saved pictures - (only relevant if `save_to_dir` is set). - save_format: One of "png", "jpeg" - (only relevant if `save_to_dir` is set). Default: "png". - follow_links: Whether to follow symlinks inside - class subdirectories (default: False). - subset: Subset of data (`"training"` or `"validation"`) if - `validation_split` is set in `ImageDataGenerator`. - interpolation: Interpolation method used to - resample the image if the - target size is different from that of the loaded image. - Supported methods are `"nearest"`, `"bilinear"`, - and `"bicubic"`. - If PIL version 1.1.3 or newer is installed, `"lanczos"` is also - supported. If PIL version 3.4.0 or newer is installed, - `"box"` and `"hamming"` are also supported. - By default, `"nearest"` is used. - - # Returns - A `DirectoryIterator` yielding tuples of `(x, y)` - where `x` is a numpy array containing a batch - of images with shape `(batch_size, *target_size, channels)` - and `y` is a numpy array of corresponding labels. - """ - return DirectoryIterator( - directory, - self, - target_size=target_size, - color_mode=color_mode, - classes=classes, - class_mode=class_mode, - data_format=self.data_format, - batch_size=batch_size, - shuffle=shuffle, - seed=seed, - save_to_dir=save_to_dir, - save_prefix=save_prefix, - save_format=save_format, - follow_links=follow_links, - subset=subset, - interpolation=interpolation - ) - - def flow_from_dataframe(self, - dataframe, - directory=None, - x_col="filename", - y_col="class", - weight_col=None, - target_size=(256, 256), - color_mode='rgb', - classes=None, - class_mode='categorical', - batch_size=32, - shuffle=True, - seed=None, - save_to_dir=None, - save_prefix='', - save_format='png', - subset=None, - interpolation='nearest', - validate_filenames=True, - **kwargs): - """Takes the dataframe and the path to a directory - and generates batches of augmented/normalized data. - - **A simple tutorial can be found **[here]( - http://bit.ly/keras_flow_from_dataframe). - - # Arguments - dataframe: Pandas dataframe containing the filepaths relative to - `directory` (or absolute paths if `directory` is None) of the - images in a string column. It should include other column/s - depending on the `class_mode`: - - if `class_mode` is `"categorical"` (default value) it must - include the `y_col` column with the class/es of each image. - Values in column can be string/list/tuple if a single class - or list/tuple if multiple classes. - - if `class_mode` is `"binary"` or `"sparse"` it must include - the given `y_col` column with class values as strings. - - if `class_mode` is `"raw"` or `"multi_output"` it should contain - the columns specified in `y_col`. - - if `class_mode` is `"input"` or `None` no extra column is needed. - directory: string, path to the directory to read images from. If `None`, - data in `x_col` column should be absolute paths. - x_col: string, column in `dataframe` that contains the filenames (or - absolute paths if `directory` is `None`). - y_col: string or list, column/s in `dataframe` that has the target data. - weight_col: string, column in `dataframe` that contains the sample - weights. Default: `None`. - target_size: tuple of integers `(height, width)`, default: `(256, 256)`. - The dimensions to which all images found will be resized. - color_mode: one of "grayscale", "rgb", "rgba". Default: "rgb". - Whether the images will be converted to have 1 or 3 color channels. - classes: optional list of classes (e.g. `['dogs', 'cats']`). - Default: None. If not provided, the list of classes will be - automatically inferred from the `y_col`, - which will map to the label indices, will be alphanumeric). - The dictionary containing the mapping from class names to class - indices can be obtained via the attribute `class_indices`. - class_mode: one of "binary", "categorical", "input", "multi_output", - "raw", sparse" or None. Default: "categorical". - Mode for yielding the targets: - - `"binary"`: 1D numpy array of binary labels, - - `"categorical"`: 2D numpy array of one-hot encoded labels. - Supports multi-label output. - - `"input"`: images identical to input images (mainly used to - work with autoencoders), - - `"multi_output"`: list with the values of the different columns, - - `"raw"`: numpy array of values in `y_col` column(s), - - `"sparse"`: 1D numpy array of integer labels, - - `None`, no targets are returned (the generator will only yield - batches of image data, which is useful to use in - `model.predict_generator()`). - batch_size: size of the batches of data (default: 32). - shuffle: whether to shuffle the data (default: True) - seed: optional random seed for shuffling and transformations. - save_to_dir: None or str (default: None). - This allows you to optionally specify a directory - to which to save the augmented pictures being generated - (useful for visualizing what you are doing). - save_prefix: str. Prefix to use for filenames of saved pictures - (only relevant if `save_to_dir` is set). - save_format: one of "png", "jpeg" - (only relevant if `save_to_dir` is set). Default: "png". - follow_links: whether to follow symlinks inside class subdirectories - (default: False). - subset: Subset of data (`"training"` or `"validation"`) if - `validation_split` is set in `ImageDataGenerator`. - interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. - Supported methods are `"nearest"`, `"bilinear"`, and `"bicubic"`. - If PIL version 1.1.3 or newer is installed, `"lanczos"` is also - supported. If PIL version 3.4.0 or newer is installed, `"box"` and - `"hamming"` are also supported. By default, `"nearest"` is used. - validate_filenames: Boolean, whether to validate image filenames in - `x_col`. If `True`, invalid images will be ignored. Disabling this - option can lead to speed-up in the execution of this function. - Default: `True`. - - # Returns - A `DataFrameIterator` yielding tuples of `(x, y)` - where `x` is a numpy array containing a batch - of images with shape `(batch_size, *target_size, channels)` - and `y` is a numpy array of corresponding labels. - """ - if 'has_ext' in kwargs: - warnings.warn('has_ext is deprecated, filenames in the dataframe have ' - 'to match the exact filenames in disk.', - DeprecationWarning) - if 'sort' in kwargs: - warnings.warn('sort is deprecated, batches will be created in the' - 'same order than the filenames provided if shuffle' - 'is set to False.', DeprecationWarning) - if class_mode == 'other': - warnings.warn('`class_mode` "other" is deprecated, please use ' - '`class_mode` "raw".', DeprecationWarning) - class_mode = 'raw' - if 'drop_duplicates' in kwargs: - warnings.warn('drop_duplicates is deprecated, you can drop duplicates ' - 'by using the pandas.DataFrame.drop_duplicates method.', - DeprecationWarning) - - return DataFrameIterator( - dataframe, - directory, - self, - x_col=x_col, - y_col=y_col, - weight_col=weight_col, - target_size=target_size, - color_mode=color_mode, - classes=classes, - class_mode=class_mode, - data_format=self.data_format, - batch_size=batch_size, - shuffle=shuffle, - seed=seed, - save_to_dir=save_to_dir, - save_prefix=save_prefix, - save_format=save_format, - subset=subset, - interpolation=interpolation, - validate_filenames=validate_filenames - ) - - def standardize(self, x): - """Applies the normalization configuration in-place to a batch of inputs. - - `x` is changed in-place since the function is mainly used internally - to standarize images and feed them to your network. If a copy of `x` - would be created instead it would have a significant performance cost. - If you want to apply this method without changing the input in-place - you can call the method creating a copy before: - - standarize(np.copy(x)) - - # Arguments - x: Batch of inputs to be normalized. - - # Returns - The inputs, normalized. - """ - if self.preprocessing_function: - x = self.preprocessing_function(x) - if self.rescale: - x *= self.rescale - if self.samplewise_center: - x -= np.mean(x, keepdims=True) - if self.samplewise_std_normalization: - x /= (np.std(x, keepdims=True) + 1e-6) - - if self.featurewise_center: - if self.mean is not None: - x -= self.mean - else: - warnings.warn('This ImageDataGenerator specifies ' - '`featurewise_center`, but it hasn\'t ' - 'been fit on any training data. Fit it ' - 'first by calling `.fit(numpy_data)`.') - if self.featurewise_std_normalization: - if self.std is not None: - x /= (self.std + 1e-6) - else: - warnings.warn('This ImageDataGenerator specifies ' - '`featurewise_std_normalization`, ' - 'but it hasn\'t ' - 'been fit on any training data. Fit it ' - 'first by calling `.fit(numpy_data)`.') - if self.zca_whitening: - if self.principal_components is not None: - flatx = np.reshape(x, (-1, np.prod(x.shape[-3:]))) - whitex = np.dot(flatx, self.principal_components) - x = np.reshape(whitex, x.shape) - else: - warnings.warn('This ImageDataGenerator specifies ' - '`zca_whitening`, but it hasn\'t ' - 'been fit on any training data. Fit it ' - 'first by calling `.fit(numpy_data)`.') - return x - - def get_random_transform(self, img_shape, seed=None): - """Generates random parameters for a transformation. - - # Arguments - seed: Random seed. - img_shape: Tuple of integers. - Shape of the image that is transformed. - - # Returns - A dictionary containing randomly chosen parameters describing the - transformation. - """ - img_row_axis = self.row_axis - 1 - img_col_axis = self.col_axis - 1 - - if seed is not None: - np.random.seed(seed) - - if self.rotation_range: - theta = np.random.uniform( - -self.rotation_range, - self.rotation_range) - else: - theta = 0 - - if self.height_shift_range: - try: # 1-D array-like or int - tx = np.random.choice(self.height_shift_range) - tx *= np.random.choice([-1, 1]) - except ValueError: # floating point - tx = np.random.uniform(-self.height_shift_range, - self.height_shift_range) - if np.max(self.height_shift_range) < 1: - tx *= img_shape[img_row_axis] - else: - tx = 0 - - if self.width_shift_range: - try: # 1-D array-like or int - ty = np.random.choice(self.width_shift_range) - ty *= np.random.choice([-1, 1]) - except ValueError: # floating point - ty = np.random.uniform(-self.width_shift_range, - self.width_shift_range) - if np.max(self.width_shift_range) < 1: - ty *= img_shape[img_col_axis] - else: - ty = 0 - - if self.shear_range: - shear = np.random.uniform( - -self.shear_range, - self.shear_range) - else: - shear = 0 - - if self.zoom_range[0] == 1 and self.zoom_range[1] == 1: - zx, zy = 1, 1 - else: - zx, zy = np.random.uniform( - self.zoom_range[0], - self.zoom_range[1], - 2) - - flip_horizontal = (np.random.random() < 0.5) * self.horizontal_flip - flip_vertical = (np.random.random() < 0.5) * self.vertical_flip - - channel_shift_intensity = None - if self.channel_shift_range != 0: - channel_shift_intensity = np.random.uniform(-self.channel_shift_range, - self.channel_shift_range) - - brightness = None - if self.brightness_range is not None: - brightness = np.random.uniform(self.brightness_range[0], - self.brightness_range[1]) - - transform_parameters = {'theta': theta, - 'tx': tx, - 'ty': ty, - 'shear': shear, - 'zx': zx, - 'zy': zy, - 'flip_horizontal': flip_horizontal, - 'flip_vertical': flip_vertical, - 'channel_shift_intensity': channel_shift_intensity, - 'brightness': brightness} - - return transform_parameters - - def apply_transform(self, x, transform_parameters): - """Applies a transformation to an image according to given parameters. - - # Arguments - x: 3D tensor, single image. - transform_parameters: Dictionary with string - parameter pairs - describing the transformation. - Currently, the following parameters - from the dictionary are used: - - `'theta'`: Float. Rotation angle in degrees. - - `'tx'`: Float. Shift in the x direction. - - `'ty'`: Float. Shift in the y direction. - - `'shear'`: Float. Shear angle in degrees. - - `'zx'`: Float. Zoom in the x direction. - - `'zy'`: Float. Zoom in the y direction. - - `'flip_horizontal'`: Boolean. Horizontal flip. - - `'flip_vertical'`: Boolean. Vertical flip. - - `'channel_shift_intencity'`: Float. Channel shift intensity. - - `'brightness'`: Float. Brightness shift intensity. - - # Returns - A transformed version of the input (same shape). - """ - # x is a single image, so it doesn't have image number at index 0 - img_row_axis = self.row_axis - 1 - img_col_axis = self.col_axis - 1 - img_channel_axis = self.channel_axis - 1 - - x = apply_affine_transform(x, transform_parameters.get('theta', 0), - transform_parameters.get('tx', 0), - transform_parameters.get('ty', 0), - transform_parameters.get('shear', 0), - transform_parameters.get('zx', 1), - transform_parameters.get('zy', 1), - row_axis=img_row_axis, - col_axis=img_col_axis, - channel_axis=img_channel_axis, - fill_mode=self.fill_mode, - cval=self.cval, - order=self.interpolation_order) - - if transform_parameters.get('channel_shift_intensity') is not None: - x = apply_channel_shift(x, - transform_parameters['channel_shift_intensity'], - img_channel_axis) - - if transform_parameters.get('flip_horizontal', False): - x = flip_axis(x, img_col_axis) - - if transform_parameters.get('flip_vertical', False): - x = flip_axis(x, img_row_axis) - - if transform_parameters.get('brightness') is not None: - x = apply_brightness_shift(x, transform_parameters['brightness']) - - return x - - def random_transform(self, x, seed=None): - """Applies a random transformation to an image. - - # Arguments - x: 3D tensor, single image. - seed: Random seed. - - # Returns - A randomly transformed version of the input (same shape). - """ - params = self.get_random_transform(x.shape, seed) - return self.apply_transform(x, params) - - def fit(self, x, - augment=False, - rounds=1, - seed=None): - """Fits the data generator to some sample data. - - This computes the internal data stats related to the - data-dependent transformations, based on an array of sample data. - - Only required if `featurewise_center` or - `featurewise_std_normalization` or `zca_whitening` are set to True. - - # Arguments - x: Sample data. Should have rank 4. - In case of grayscale data, - the channels axis should have value 1, in case - of RGB data, it should have value 3, and in case - of RGBA data, it should have value 4. - augment: Boolean (default: False). - Whether to fit on randomly augmented samples. - rounds: Int (default: 1). - If using data augmentation (`augment=True`), - this is how many augmentation passes over the data to use. - seed: Int (default: None). Random seed. - """ - x = np.asarray(x, dtype=self.dtype) - if x.ndim != 4: - raise ValueError('Input to `.fit()` should have rank 4. ' - 'Got array with shape: ' + str(x.shape)) - if x.shape[self.channel_axis] not in {1, 3, 4}: - warnings.warn( - 'Expected input to be images (as Numpy array) ' - 'following the data format convention "' + - self.data_format + '" (channels on axis ' + - str(self.channel_axis) + '), i.e. expected ' - 'either 1, 3 or 4 channels on axis ' + - str(self.channel_axis) + '. ' - 'However, it was passed an array with shape ' + - str(x.shape) + ' (' + str(x.shape[self.channel_axis]) + - ' channels).') - - if seed is not None: - np.random.seed(seed) - - x = np.copy(x) - if augment: - ax = np.zeros( - tuple([rounds * x.shape[0]] + list(x.shape)[1:]), - dtype=self.dtype) - for r in range(rounds): - for i in range(x.shape[0]): - ax[i + r * x.shape[0]] = self.random_transform(x[i]) - x = ax - - if self.featurewise_center: - self.mean = np.mean(x, axis=(0, self.row_axis, self.col_axis)) - broadcast_shape = [1, 1, 1] - broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis] - self.mean = np.reshape(self.mean, broadcast_shape) - x -= self.mean - - if self.featurewise_std_normalization: - self.std = np.std(x, axis=(0, self.row_axis, self.col_axis)) - broadcast_shape = [1, 1, 1] - broadcast_shape[self.channel_axis - 1] = x.shape[self.channel_axis] - self.std = np.reshape(self.std, broadcast_shape) - x /= (self.std + 1e-6) - - if self.zca_whitening: - if scipy is None: - raise ImportError('Using zca_whitening requires SciPy. ' - 'Install SciPy.') - flat_x = np.reshape( - x, (x.shape[0], x.shape[1] * x.shape[2] * x.shape[3])) - sigma = np.dot(flat_x.T, flat_x) / flat_x.shape[0] - u, s, _ = linalg.svd(sigma) - s_inv = 1. / np.sqrt(s[np.newaxis] + self.zca_epsilon) - self.principal_components = (u * s_inv).dot(u.T) -"""Utilities for real-time data augmentation on image data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import threading -import numpy as np -from keras_preprocessing import get_keras_submodule - -try: - IteratorType = get_keras_submodule('utils').Sequence -except ImportError: - IteratorType = object - -from .utils import (array_to_img, - img_to_array, - load_img) - - -class Iterator(IteratorType): - """Base class for image data iterators. - - Every `Iterator` must implement the `_get_batches_of_transformed_samples` - method. - - # Arguments - n: Integer, total number of samples in the dataset to loop over. - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - seed: Random seeding for data shuffling. - """ - white_list_formats = ('png', 'jpg', 'jpeg', 'bmp', 'ppm', 'tif', 'tiff') - - def __init__(self, n, batch_size, shuffle, seed): - self.n = n - self.batch_size = batch_size - self.seed = seed - self.shuffle = shuffle - self.batch_index = 0 - self.total_batches_seen = 0 - self.lock = threading.Lock() - self.index_array = None - self.index_generator = self._flow_index() - - def _set_index_array(self): - self.index_array = np.arange(self.n) - if self.shuffle: - self.index_array = np.random.permutation(self.n) - - def __getitem__(self, idx): - if idx >= len(self): - raise ValueError('Asked to retrieve element {idx}, ' - 'but the Sequence ' - 'has length {length}'.format(idx=idx, - length=len(self))) - if self.seed is not None: - np.random.seed(self.seed + self.total_batches_seen) - self.total_batches_seen += 1 - if self.index_array is None: - self._set_index_array() - index_array = self.index_array[self.batch_size * idx: - self.batch_size * (idx + 1)] - return self._get_batches_of_transformed_samples(index_array) - - def __len__(self): - return (self.n + self.batch_size - 1) // self.batch_size # round up - - def on_epoch_end(self): - self._set_index_array() - - def reset(self): - self.batch_index = 0 - - def _flow_index(self): - # Ensure self.batch_index is 0. - self.reset() - while 1: - if self.seed is not None: - np.random.seed(self.seed + self.total_batches_seen) - if self.batch_index == 0: - self._set_index_array() - - if self.n == 0: - # Avoiding modulo by zero error - current_index = 0 - else: - current_index = (self.batch_index * self.batch_size) % self.n - if self.n > current_index + self.batch_size: - self.batch_index += 1 - else: - self.batch_index = 0 - self.total_batches_seen += 1 - yield self.index_array[current_index: - current_index + self.batch_size] - - def __iter__(self): - # Needed if we want to do something like: - # for x, y in data_gen.flow(...): - return self - - def __next__(self, *args, **kwargs): - return self.next(*args, **kwargs) - - def next(self): - """For python 2.x. - - # Returns - The next batch. - """ - with self.lock: - index_array = next(self.index_generator) - # The transformation of images is not under thread lock - # so it can be done in parallel - return self._get_batches_of_transformed_samples(index_array) - - def _get_batches_of_transformed_samples(self, index_array): - """Gets a batch of transformed samples. - - # Arguments - index_array: Array of sample indices to include in batch. - - # Returns - A batch of transformed samples. - """ - raise NotImplementedError - - -class BatchFromFilesMixin(): - """Adds methods related to getting batches from filenames - - It includes the logic to transform image files to batches. - """ - - def set_processing_attrs(self, - image_data_generator, - target_size, - color_mode, - data_format, - save_to_dir, - save_prefix, - save_format, - subset, - interpolation): - """Sets attributes to use later for processing files into a batch. - - # Arguments - image_data_generator: Instance of `ImageDataGenerator` - to use for random transformations and normalization. - target_size: tuple of integers, dimensions to resize input images to. - color_mode: One of `"rgb"`, `"rgba"`, `"grayscale"`. - Color mode to read images. - data_format: String, one of `channels_first`, `channels_last`. - save_to_dir: Optional directory where to save the pictures - being yielded, in a viewable format. This is useful - for visualizing the random transformations being - applied, for debugging purposes. - save_prefix: String prefix to use for saving sample - images (if `save_to_dir` is set). - save_format: Format to use for saving sample images - (if `save_to_dir` is set). - subset: Subset of data (`"training"` or `"validation"`) if - validation_split is set in ImageDataGenerator. - interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. - Supported methods are "nearest", "bilinear", and "bicubic". - If PIL version 1.1.3 or newer is installed, "lanczos" is also - supported. If PIL version 3.4.0 or newer is installed, "box" and - "hamming" are also supported. By default, "nearest" is used. - """ - self.image_data_generator = image_data_generator - self.target_size = tuple(target_size) - if color_mode not in {'rgb', 'rgba', 'grayscale'}: - raise ValueError('Invalid color mode:', color_mode, - '; expected "rgb", "rgba", or "grayscale".') - self.color_mode = color_mode - self.data_format = data_format - if self.color_mode == 'rgba': - if self.data_format == 'channels_last': - self.image_shape = self.target_size + (4,) - else: - self.image_shape = (4,) + self.target_size - elif self.color_mode == 'rgb': - if self.data_format == 'channels_last': - self.image_shape = self.target_size + (3,) - else: - self.image_shape = (3,) + self.target_size - else: - if self.data_format == 'channels_last': - self.image_shape = self.target_size + (1,) - else: - self.image_shape = (1,) + self.target_size - self.save_to_dir = save_to_dir - self.save_prefix = save_prefix - self.save_format = save_format - self.interpolation = interpolation - if subset is not None: - validation_split = self.image_data_generator._validation_split - if subset == 'validation': - split = (0, validation_split) - elif subset == 'training': - split = (validation_split, 1) - else: - raise ValueError( - 'Invalid subset name: %s;' - 'expected "training" or "validation"' % (subset,)) - else: - split = None - self.split = split - self.subset = subset - - def _get_batches_of_transformed_samples(self, index_array): - """Gets a batch of transformed samples. - - # Arguments - index_array: Array of sample indices to include in batch. - - # Returns - A batch of transformed samples. - """ - batch_x = np.zeros((len(index_array),) + - self.image_shape, dtype=self.dtype) - # build batch of image data - # self.filepaths is dynamic, is better to call it once outside the loop - filepaths = self.filepaths - for i, j in enumerate(index_array): - img = load_img(filepaths[j], - color_mode=self.color_mode, - target_size=self.target_size, - interpolation=self.interpolation) - x = img_to_array(img, data_format=self.data_format) - # Pillow images should be closed after `load_img`, - # but not PIL images. - if hasattr(img, 'close'): - img.close() - if self.image_data_generator: - params = self.image_data_generator.get_random_transform( - x.shape) - x = self.image_data_generator.apply_transform(x, params) - x = self.image_data_generator.standardize(x) - batch_x[i] = x - # optionally save augmented images to disk for debugging purposes - if self.save_to_dir: - for i, j in enumerate(index_array): - img = array_to_img(batch_x[i], self.data_format, scale=True) - fname = '{prefix}_{index}_{hash}.{format}'.format( - prefix=self.save_prefix, - index=j, - hash=np.random.randint(1e7), - format=self.save_format) - img.save(os.path.join(self.save_to_dir, fname)) - # build batch of labels - if self.class_mode == 'input': - batch_y = batch_x.copy() - elif self.class_mode in {'binary', 'sparse'}: - batch_y = np.empty(len(batch_x), dtype=self.dtype) - for i, n_observation in enumerate(index_array): - batch_y[i] = self.classes[n_observation] - elif self.class_mode == 'categorical': - batch_y = np.zeros((len(batch_x), len(self.class_indices)), - dtype=self.dtype) - for i, n_observation in enumerate(index_array): - batch_y[i, self.classes[n_observation]] = 1. - elif self.class_mode == 'multi_output': - batch_y = [output[index_array] for output in self.labels] - elif self.class_mode == 'raw': - batch_y = self.labels[index_array] - else: - return batch_x - if self.sample_weight is None: - return batch_x, batch_y - else: - return batch_x, batch_y, self.sample_weight[index_array] - - @property - def filepaths(self): - """List of absolute paths to image files""" - raise NotImplementedError( - '`filepaths` property method has not been implemented in {}.' - .format(type(self).__name__) - ) - - @property - def labels(self): - """Class labels of every observation""" - raise NotImplementedError( - '`labels` property method has not been implemented in {}.' - .format(type(self).__name__) - ) - - @property - def sample_weight(self): - raise NotImplementedError( - '`sample_weight` property method has not been implemented in {}.' - .format(type(self).__name__) - ) -"""Utilities for real-time data augmentation on image data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import warnings -import numpy as np - -from .iterator import Iterator -from .utils import array_to_img - - -class NumpyArrayIterator(Iterator): - """Iterator yielding data from a Numpy array. - - # Arguments - x: Numpy array of input data or tuple. - If tuple, the second elements is either - another numpy array or a list of numpy arrays, - each of which gets passed - through as an output without any modifications. - y: Numpy array of targets data. - image_data_generator: Instance of `ImageDataGenerator` - to use for random transformations and normalization. - batch_size: Integer, size of a batch. - shuffle: Boolean, whether to shuffle the data between epochs. - sample_weight: Numpy array of sample weights. - seed: Random seed for data shuffling. - data_format: String, one of `channels_first`, `channels_last`. - save_to_dir: Optional directory where to save the pictures - being yielded, in a viewable format. This is useful - for visualizing the random transformations being - applied, for debugging purposes. - save_prefix: String prefix to use for saving sample - images (if `save_to_dir` is set). - save_format: Format to use for saving sample images - (if `save_to_dir` is set). - subset: Subset of data (`"training"` or `"validation"`) if - validation_split is set in ImageDataGenerator. - dtype: Dtype to use for the generated arrays. - """ - - def __init__(self, - x, - y, - image_data_generator, - batch_size=32, - shuffle=False, - sample_weight=None, - seed=None, - data_format='channels_last', - save_to_dir=None, - save_prefix='', - save_format='png', - subset=None, - dtype='float32'): - self.dtype = dtype - if (type(x) is tuple) or (type(x) is list): - if type(x[1]) is not list: - x_misc = [np.asarray(x[1])] - else: - x_misc = [np.asarray(xx) for xx in x[1]] - x = x[0] - for xx in x_misc: - if len(x) != len(xx): - raise ValueError( - 'All of the arrays in `x` ' - 'should have the same length. ' - 'Found a pair with: len(x[0]) = %s, len(x[?]) = %s' % - (len(x), len(xx))) - else: - x_misc = [] - - if y is not None and len(x) != len(y): - raise ValueError('`x` (images tensor) and `y` (labels) ' - 'should have the same length. ' - 'Found: x.shape = %s, y.shape = %s' % - (np.asarray(x).shape, np.asarray(y).shape)) - if sample_weight is not None and len(x) != len(sample_weight): - raise ValueError('`x` (images tensor) and `sample_weight` ' - 'should have the same length. ' - 'Found: x.shape = %s, sample_weight.shape = %s' % - (np.asarray(x).shape, np.asarray(sample_weight).shape)) - if subset is not None: - if subset not in {'training', 'validation'}: - raise ValueError('Invalid subset name:', subset, - '; expected "training" or "validation".') - split_idx = int(len(x) * image_data_generator._validation_split) - - if (y is not None and not - np.array_equal(np.unique(y[:split_idx]), - np.unique(y[split_idx:]))): - raise ValueError('Training and validation subsets ' - 'have different number of classes after ' - 'the split. If your numpy arrays are ' - 'sorted by the label, you might want ' - 'to shuffle them.') - - if subset == 'validation': - x = x[:split_idx] - x_misc = [np.asarray(xx[:split_idx]) for xx in x_misc] - if y is not None: - y = y[:split_idx] - else: - x = x[split_idx:] - x_misc = [np.asarray(xx[split_idx:]) for xx in x_misc] - if y is not None: - y = y[split_idx:] - - self.x = np.asarray(x, dtype=self.dtype) - self.x_misc = x_misc - if self.x.ndim != 4: - raise ValueError('Input data in `NumpyArrayIterator` ' - 'should have rank 4. You passed an array ' - 'with shape', self.x.shape) - channels_axis = 3 if data_format == 'channels_last' else 1 - if self.x.shape[channels_axis] not in {1, 3, 4}: - warnings.warn('NumpyArrayIterator is set to use the ' - 'data format convention "' + data_format + '" ' - '(channels on axis ' + str(channels_axis) + - '), i.e. expected either 1, 3, or 4 ' - 'channels on axis ' + str(channels_axis) + '. ' - 'However, it was passed an array with shape ' + - str(self.x.shape) + ' (' + - str(self.x.shape[channels_axis]) + ' channels).') - if y is not None: - self.y = np.asarray(y) - else: - self.y = None - if sample_weight is not None: - self.sample_weight = np.asarray(sample_weight) - else: - self.sample_weight = None - self.image_data_generator = image_data_generator - self.data_format = data_format - self.save_to_dir = save_to_dir - self.save_prefix = save_prefix - self.save_format = save_format - super(NumpyArrayIterator, self).__init__(x.shape[0], - batch_size, - shuffle, - seed) - - def _get_batches_of_transformed_samples(self, index_array): - batch_x = np.zeros(tuple([len(index_array)] + list(self.x.shape)[1:]), - dtype=self.dtype) - for i, j in enumerate(index_array): - x = self.x[j] - params = self.image_data_generator.get_random_transform(x.shape) - x = self.image_data_generator.apply_transform( - x.astype(self.dtype), params) - x = self.image_data_generator.standardize(x) - batch_x[i] = x - - if self.save_to_dir: - for i, j in enumerate(index_array): - img = array_to_img(batch_x[i], self.data_format, scale=True) - fname = '{prefix}_{index}_{hash}.{format}'.format( - prefix=self.save_prefix, - index=j, - hash=np.random.randint(1e4), - format=self.save_format) - img.save(os.path.join(self.save_to_dir, fname)) - batch_x_miscs = [xx[index_array] for xx in self.x_misc] - output = (batch_x if batch_x_miscs == [] - else [batch_x] + batch_x_miscs,) - if self.y is None: - return output[0] - output += (self.y[index_array],) - if self.sample_weight is not None: - output += (self.sample_weight[index_array],) - return output -"""Utilities for real-time data augmentation on image data. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os -import warnings - -import numpy as np - -try: - from PIL import ImageEnhance - from PIL import Image as pil_image -except ImportError: - pil_image = None - ImageEnhance = None - - -if pil_image is not None: - _PIL_INTERPOLATION_METHODS = { - 'nearest': pil_image.NEAREST, - 'bilinear': pil_image.BILINEAR, - 'bicubic': pil_image.BICUBIC, - } - # These methods were only introduced in version 3.4.0 (2016). - if hasattr(pil_image, 'HAMMING'): - _PIL_INTERPOLATION_METHODS['hamming'] = pil_image.HAMMING - if hasattr(pil_image, 'BOX'): - _PIL_INTERPOLATION_METHODS['box'] = pil_image.BOX - # This method is new in version 1.1.3 (2013). - if hasattr(pil_image, 'LANCZOS'): - _PIL_INTERPOLATION_METHODS['lanczos'] = pil_image.LANCZOS - - -def validate_filename(filename, white_list_formats): - """Check if a filename refers to a valid file. - - # Arguments - filename: String, absolute path to a file - white_list_formats: Set, allowed file extensions - - # Returns - A boolean value indicating if the filename is valid or not - """ - return (filename.lower().endswith(white_list_formats) and - os.path.isfile(filename)) - - -def save_img(path, - x, - data_format='channels_last', - file_format=None, - scale=True, - **kwargs): - """Saves an image stored as a Numpy array to a path or file object. - - # Arguments - path: Path or file object. - x: Numpy array. - data_format: Image data format, - either "channels_first" or "channels_last". - file_format: Optional file format override. If omitted, the - format to use is determined from the filename extension. - If a file object was used instead of a filename, this - parameter should always be used. - scale: Whether to rescale image values to be within `[0, 255]`. - **kwargs: Additional keyword arguments passed to `PIL.Image.save()`. - """ - img = array_to_img(x, data_format=data_format, scale=scale) - if img.mode == 'RGBA' and (file_format == 'jpg' or file_format == 'jpeg'): - warnings.warn('The JPG format does not support ' - 'RGBA images, converting to RGB.') - img = img.convert('RGB') - img.save(path, format=file_format, **kwargs) - - -def load_img(path, grayscale=False, color_mode='rgb', target_size=None, - interpolation='nearest'): - """Loads an image into PIL format. - - # Arguments - path: Path to image file. - grayscale: DEPRECATED use `color_mode="grayscale"`. - color_mode: One of "grayscale", "rgb", "rgba". Default: "rgb". - The desired image format. - target_size: Either `None` (default to original size) - or tuple of ints `(img_height, img_width)`. - interpolation: Interpolation method used to resample the image if the - target size is different from that of the loaded image. - Supported methods are "nearest", "bilinear", and "bicubic". - If PIL version 1.1.3 or newer is installed, "lanczos" is also - supported. If PIL version 3.4.0 or newer is installed, "box" and - "hamming" are also supported. By default, "nearest" is used. - - # Returns - A PIL Image instance. - - # Raises - ImportError: if PIL is not available. - ValueError: if interpolation method is not supported. - """ - if grayscale is True: - warnings.warn('grayscale is deprecated. Please use ' - 'color_mode = "grayscale"') - color_mode = 'grayscale' - if pil_image is None: - raise ImportError('Could not import PIL.Image. ' - 'The use of `load_img` requires PIL.') - img = pil_image.open(path) - if color_mode == 'grayscale': - if img.mode != 'L': - img = img.convert('L') - elif color_mode == 'rgba': - if img.mode != 'RGBA': - img = img.convert('RGBA') - elif color_mode == 'rgb': - if img.mode != 'RGB': - img = img.convert('RGB') - else: - raise ValueError('color_mode must be "grayscale", "rgb", or "rgba"') - if target_size is not None: - width_height_tuple = (target_size[1], target_size[0]) - if img.size != width_height_tuple: - if interpolation not in _PIL_INTERPOLATION_METHODS: - raise ValueError( - 'Invalid interpolation method {} specified. Supported ' - 'methods are {}'.format( - interpolation, - ", ".join(_PIL_INTERPOLATION_METHODS.keys()))) - resample = _PIL_INTERPOLATION_METHODS[interpolation] - img = img.resize(width_height_tuple, resample) - return img - - -def list_pictures(directory, ext=('jpg', 'jpeg', 'bmp', 'png', 'ppm', 'tif', - 'tiff')): - """Lists all pictures in a directory, including all subdirectories. - - # Arguments - directory: string, absolute path to the directory - ext: tuple of strings or single string, extensions of the pictures - - # Returns - a list of paths - """ - ext = tuple('.%s' % e for e in ((ext,) if isinstance(ext, str) else ext)) - return [os.path.join(root, f) - for root, _, files in os.walk(directory) for f in files - if f.lower().endswith(ext)] - - -def _iter_valid_files(directory, white_list_formats, follow_links): - """Iterates on files with extension in `white_list_formats` contained in `directory`. - - # Arguments - directory: Absolute path to the directory - containing files to be counted - white_list_formats: Set of strings containing allowed extensions for - the files to be counted. - follow_links: Boolean, follow symbolic links to subdirectories. - - # Yields - Tuple of (root, filename) with extension in `white_list_formats`. - """ - def _recursive_list(subpath): - return sorted(os.walk(subpath, followlinks=follow_links), - key=lambda x: x[0]) - - for root, _, files in _recursive_list(directory): - for fname in sorted(files): - if fname.lower().endswith('.tiff'): - warnings.warn('Using ".tiff" files with multiple bands ' - 'will cause distortion. Please verify your output.') - if fname.lower().endswith(white_list_formats): - yield root, fname - - -def _list_valid_filenames_in_directory(directory, white_list_formats, split, - class_indices, follow_links): - """Lists paths of files in `subdir` with extensions in `white_list_formats`. - - # Arguments - directory: absolute path to a directory containing the files to list. - The directory name is used as class label - and must be a key of `class_indices`. - white_list_formats: set of strings containing allowed extensions for - the files to be counted. - split: tuple of floats (e.g. `(0.2, 0.6)`) to only take into - account a certain fraction of files in each directory. - E.g.: `segment=(0.6, 1.0)` would only account for last 40 percent - of images in each directory. - class_indices: dictionary mapping a class name to its index. - follow_links: boolean, follow symbolic links to subdirectories. - - # Returns - classes: a list of class indices - filenames: the path of valid files in `directory`, relative from - `directory`'s parent (e.g., if `directory` is "dataset/class1", - the filenames will be - `["class1/file1.jpg", "class1/file2.jpg", ...]`). - """ - dirname = os.path.basename(directory) - if split: - num_files = len(list( - _iter_valid_files(directory, white_list_formats, follow_links))) - start, stop = int(split[0] * num_files), int(split[1] * num_files) - valid_files = list( - _iter_valid_files( - directory, white_list_formats, follow_links))[start: stop] - else: - valid_files = _iter_valid_files( - directory, white_list_formats, follow_links) - classes = [] - filenames = [] - for root, fname in valid_files: - classes.append(class_indices[dirname]) - absolute_path = os.path.join(root, fname) - relative_path = os.path.join( - dirname, os.path.relpath(absolute_path, directory)) - filenames.append(relative_path) - - return classes, filenames - - -def array_to_img(x, data_format='channels_last', scale=True, dtype='float32'): - """Converts a 3D Numpy array to a PIL Image instance. - - # Arguments - x: Input Numpy array. - data_format: Image data format. - either "channels_first" or "channels_last". - scale: Whether to rescale image values - to be within `[0, 255]`. - dtype: Dtype to use. - - # Returns - A PIL Image instance. - - # Raises - ImportError: if PIL is not available. - ValueError: if invalid `x` or `data_format` is passed. - """ - if pil_image is None: - raise ImportError('Could not import PIL.Image. ' - 'The use of `array_to_img` requires PIL.') - x = np.asarray(x, dtype=dtype) - if x.ndim != 3: - raise ValueError('Expected image array to have rank 3 (single image). ' - 'Got array with shape: %s' % (x.shape,)) - - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Invalid data_format: %s' % data_format) - - # Original Numpy array x has format (height, width, channel) - # or (channel, height, width) - # but target PIL image has format (width, height, channel) - if data_format == 'channels_first': - x = x.transpose(1, 2, 0) - if scale: - x = x + max(-np.min(x), 0) - x_max = np.max(x) - if x_max != 0: - x /= x_max - x *= 255 - if x.shape[2] == 4: - # RGBA - return pil_image.fromarray(x.astype('uint8'), 'RGBA') - elif x.shape[2] == 3: - # RGB - return pil_image.fromarray(x.astype('uint8'), 'RGB') - elif x.shape[2] == 1: - # grayscale - return pil_image.fromarray(x[:, :, 0].astype('uint8'), 'L') - else: - raise ValueError('Unsupported channel number: %s' % (x.shape[2],)) - - -def img_to_array(img, data_format='channels_last', dtype='float32'): - """Converts a PIL Image instance to a Numpy array. - - # Arguments - img: PIL Image instance. - data_format: Image data format, - either "channels_first" or "channels_last". - dtype: Dtype to use for the returned array. - - # Returns - A 3D Numpy array. - - # Raises - ValueError: if invalid `img` or `data_format` is passed. - """ - if data_format not in {'channels_first', 'channels_last'}: - raise ValueError('Unknown data_format: %s' % data_format) - # Numpy array x has format (height, width, channel) - # or (channel, height, width) - # but original PIL image has format (width, height, channel) - x = np.asarray(img, dtype=dtype) - if len(x.shape) == 3: - if data_format == 'channels_first': - x = x.transpose(2, 0, 1) - elif len(x.shape) == 2: - if data_format == 'channels_first': - x = x.reshape((1, x.shape[0], x.shape[1])) - else: - x = x.reshape((x.shape[0], x.shape[1], 1)) - else: - raise ValueError('Unsupported image shape: %s' % (x.shape,)) - return x -import numpy as np -import pytest - -from keras_preprocessing.image import affine_transformations - - -def test_random_transforms(): - x = np.random.random((2, 28, 28)) - assert affine_transformations.random_rotation(x, 45).shape == (2, 28, 28) - assert affine_transformations.random_shift(x, 1, 1).shape == (2, 28, 28) - assert affine_transformations.random_shear(x, 20).shape == (2, 28, 28) - assert affine_transformations.random_channel_shift( - x, 20).shape == (2, 28, 28) - - -def test_deterministic_transform(): - x = np.ones((3, 3, 3)) - x_rotated = np.array([[[0., 0., 0.], - [0., 0., 0.], - [1., 1., 1.]], - [[0., 0., 0.], - [1., 1., 1.], - [1., 1., 1.]], - [[0., 0., 0.], - [0., 0., 0.], - [1., 1., 1.]]]) - assert np.allclose(affine_transformations.apply_affine_transform( - x, theta=45, channel_axis=2, fill_mode='constant'), x_rotated) - - -def test_random_zoom(): - x = np.random.random((2, 28, 28)) - assert affine_transformations.random_zoom(x, (5, 5)).shape == (2, 28, 28) - assert np.allclose(x, affine_transformations.random_zoom(x, (1, 1))) - - -def test_random_zoom_error(): - with pytest.raises(ValueError): - affine_transformations.random_zoom(0, zoom_range=[0]) - - -def test_apply_brightness_shift_error(monkeypatch): - monkeypatch.setattr(affine_transformations, 'ImageEnhance', None) - with pytest.raises(ImportError): - affine_transformations.apply_brightness_shift(0, [0]) - - -def test_random_brightness(monkeypatch): - monkeypatch.setattr(affine_transformations, - 'apply_brightness_shift', lambda x, y: (x, y)) - assert (0, 3.) == affine_transformations.random_brightness(0, (3, 3)) - - -def test_random_brightness_error(): - with pytest.raises(ValueError): - affine_transformations.random_brightness(0, [0]) - - -def test_apply_affine_transform_error(monkeypatch): - monkeypatch.setattr(affine_transformations, 'scipy', None) - with pytest.raises(ImportError): - affine_transformations.apply_affine_transform(0) -import os -import random -import shutil - -import numpy as np -import pandas as pd -import pytest - -from PIL import Image - -from keras_preprocessing.image import dataframe_iterator -from keras_preprocessing.image import image_data_generator - - -@pytest.fixture(scope='module') -def all_test_images(): - img_w = img_h = 20 - rgb_images = [] - rgba_images = [] - gray_images = [] - for n in range(8): - bias = np.random.rand(img_w, img_h, 1) * 64 - variance = np.random.rand(img_w, img_h, 1) * (255 - 64) - imarray = np.random.rand(img_w, img_h, 3) * variance + bias - im = Image.fromarray(imarray.astype('uint8')).convert('RGB') - rgb_images.append(im) - - imarray = np.random.rand(img_w, img_h, 4) * variance + bias - im = Image.fromarray(imarray.astype('uint8')).convert('RGBA') - rgba_images.append(im) - - imarray = np.random.rand(img_w, img_h, 1) * variance + bias - im = Image.fromarray( - imarray.astype('uint8').squeeze()).convert('L') - gray_images.append(im) - - return [rgb_images, rgba_images, gray_images] - - -def test_dataframe_iterator(all_test_images, tmpdir): - num_classes = 2 - - # save the images in the tmpdir - count = 0 - filenames = [] - filepaths = [] - filenames_without = [] - for test_images in all_test_images: - for im in test_images: - filename = "image-{}.png".format(count) - filename_without = "image-{}".format(count) - filenames.append(filename) - filepaths.append(os.path.join(str(tmpdir), filename)) - filenames_without.append(filename_without) - im.save(str(tmpdir / filename)) - count += 1 - - df = pd.DataFrame({ - "filename": filenames, - "class": [str(random.randint(0, 1)) for _ in filenames], - "filepaths": filepaths - }) - - # create iterator - iterator = dataframe_iterator.DataFrameIterator(df, str(tmpdir)) - batch = next(iterator) - assert len(batch) == 2 - assert isinstance(batch[0], np.ndarray) - assert isinstance(batch[1], np.ndarray) - generator = image_data_generator.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe(df, x_col='filepaths') - df_iterator_dir = generator.flow_from_dataframe(df, str(tmpdir)) - df_sparse_iterator = generator.flow_from_dataframe(df, str(tmpdir), - class_mode="sparse") - assert not np.isnan(df_sparse_iterator.classes).any() - # check number of classes and images - assert len(df_iterator.class_indices) == num_classes - assert len(df_iterator.classes) == count - assert set(df_iterator.filenames) == set(filepaths) - assert len(df_iterator_dir.class_indices) == num_classes - assert len(df_iterator_dir.classes) == count - assert set(df_iterator_dir.filenames) == set(filenames) - # test without shuffle - _, batch_y = next(generator.flow_from_dataframe(df, str(tmpdir), - shuffle=False, - class_mode="sparse")) - assert (batch_y == df['class'].astype('float')[:len(batch_y)]).all() - # Test invalid use cases - with pytest.raises(ValueError): - generator.flow_from_dataframe(df, str(tmpdir), color_mode='cmyk') - with pytest.raises(ValueError): - generator.flow_from_dataframe(df, str(tmpdir), class_mode='output') - with pytest.warns(DeprecationWarning): - generator.flow_from_dataframe(df, str(tmpdir), has_ext=True) - with pytest.warns(DeprecationWarning): - generator.flow_from_dataframe(df, str(tmpdir), has_ext=False) - - def preprocessing_function(x): - """This will fail if not provided by a Numpy array. - Note: This is made to enforce backward compatibility. - """ - - assert x.shape == (26, 26, 3) - assert type(x) is np.ndarray - - return np.zeros_like(x) - - # Test usage as Sequence - generator = image_data_generator.ImageDataGenerator( - preprocessing_function=preprocessing_function) - dir_seq = generator.flow_from_dataframe(df, str(tmpdir), - target_size=(26, 26), - color_mode='rgb', - batch_size=3, - class_mode='categorical') - assert len(dir_seq) == np.ceil(count / 3) - x1, y1 = dir_seq[1] - assert x1.shape == (3, 26, 26, 3) - assert y1.shape == (3, num_classes) - x1, y1 = dir_seq[5] - assert (x1 == 0).all() - - with pytest.raises(ValueError): - x1, y1 = dir_seq[9] - - -def test_dataframe_iterator_validate_filenames(all_test_images, tmpdir): - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(str(tmpdir / filename)) - filenames.append(filename) - count += 1 - df = pd.DataFrame({"filename": filenames + ['test.jpp', 'test.jpg']}) - generator = image_data_generator.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe(df, - str(tmpdir), - class_mode="input") - assert len(df_iterator.filenames) == len(df['filename']) - 2 - df_iterator = generator.flow_from_dataframe(df, - str(tmpdir), - class_mode="input", - validate_filenames=False) - assert len(df_iterator.filenames) == len(df['filename']) - - -def test_dataframe_iterator_sample_weights(all_test_images, tmpdir): - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(str(tmpdir / filename)) - filenames.append(filename) - count += 1 - df = pd.DataFrame({"filename": filenames}) - df['weight'] = ([2, 5] * len(df))[:len(df)] - generator = image_data_generator.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe(df, str(tmpdir), - x_col="filename", - y_col=None, - shuffle=False, - batch_size=5, - weight_col='weight', - class_mode="input") - - batch = next(df_iterator) - assert len(batch) == 3 # (x, y, weights) - # check if input and output have the same shape and they're the same - assert(batch[0].all() == batch[1].all()) - # check if the input and output images are not the same numpy array - input_img = batch[0][0] - output_img = batch[1][0] - output_img[0][0][0] += 1 - assert input_img[0][0][0] != output_img[0][0][0] - assert np.array_equal(np.array([2, 5, 2, 5, 2]), batch[2]) - - # fail - df['weight'] = (['2', '5'] * len(df))[:len(df)] - with pytest.raises(TypeError): - image_data_generator.ImageDataGenerator().flow_from_dataframe( - df, - weight_col='weight', - class_mode="input" - ) - - -def test_dataframe_iterator_class_mode_input(all_test_images, tmpdir): - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(str(tmpdir / filename)) - filenames.append(filename) - count += 1 - df = pd.DataFrame({"filename": filenames}) - generator = image_data_generator.ImageDataGenerator() - df_autoencoder_iterator = generator.flow_from_dataframe(df, str(tmpdir), - x_col="filename", - y_col=None, - class_mode="input") - - batch = next(df_autoencoder_iterator) - - # check if input and output have the same shape and they're the same - assert np.allclose(batch[0], batch[1]) - # check if the input and output images are not the same numpy array - input_img = batch[0][0] - output_img = batch[1][0] - output_img[0][0][0] += 1 - assert(input_img[0][0][0] != output_img[0][0][0]) - - df_autoencoder_iterator = generator.flow_from_dataframe(df, str(tmpdir), - x_col="filename", - y_col="class", - class_mode="input") - - batch = next(df_autoencoder_iterator) - - # check if input and output have the same shape and they're the same - assert(batch[0].all() == batch[1].all()) - # check if the input and output images are not the same numpy array - input_img = batch[0][0] - output_img = batch[1][0] - output_img[0][0][0] += 1 - assert(input_img[0][0][0] != output_img[0][0][0]) - - -def test_dataframe_iterator_class_mode_categorical_multi_label(all_test_images, - tmpdir): - # save the images in the paths - filenames = [] - count = 0 - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(str(tmpdir / filename)) - filenames.append(filename) - count += 1 - label_opt = ['a', 'b', ['a'], ['b'], ['a', 'b'], ['b', 'a']] - df = pd.DataFrame({ - "filename": filenames, - "class": [random.choice(label_opt) for _ in filenames[:-2]] + ['b', 'a'] - }) - generator = image_data_generator.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe(df, str(tmpdir)) - batch_x, batch_y = next(df_iterator) - assert isinstance(batch_x, np.ndarray) - assert len(batch_x.shape) == 4 - assert isinstance(batch_y, np.ndarray) - assert batch_y.shape == (len(batch_x), 2) - for labels in batch_y: - assert all(l in {0, 1} for l in labels) - - # on first 3 batches - df = pd.DataFrame({ - "filename": filenames, - "class": [['b', 'a']] + ['b'] + [['c']] + [random.choice(label_opt) - for _ in filenames[:-3]] - }) - generator = image_data_generator.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe(df, str(tmpdir), shuffle=False) - batch_x, batch_y = next(df_iterator) - assert isinstance(batch_x, np.ndarray) - assert len(batch_x.shape) == 4 - assert isinstance(batch_y, np.ndarray) - assert batch_y.shape == (len(batch_x), 3) - for labels in batch_y: - assert all(l in {0, 1} for l in labels) - assert (batch_y[0] == np.array([1, 1, 0])).all() - assert (batch_y[1] == np.array([0, 1, 0])).all() - assert (batch_y[2] == np.array([0, 0, 1])).all() - - -def test_dataframe_iterator_class_mode_multi_output(all_test_images, tmpdir): - # save the images in the paths - filenames = [] - count = 0 - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(str(tmpdir / filename)) - filenames.append(filename) - count += 1 - # fit both outputs are a single number - df = pd.DataFrame({"filename": filenames}).assign( - output_0=np.random.uniform(size=len(filenames)), - output_1=np.random.uniform(size=len(filenames)) - ) - df_iterator = image_data_generator.ImageDataGenerator().flow_from_dataframe( - df, y_col=['output_0', 'output_1'], directory=str(tmpdir), - batch_size=3, shuffle=False, class_mode='multi_output' - ) - batch_x, batch_y = next(df_iterator) - assert isinstance(batch_x, np.ndarray) - assert len(batch_x.shape) == 4 - assert isinstance(batch_y, list) - assert len(batch_y) == 2 - assert np.array_equal(batch_y[0], - np.array(df['output_0'].tolist()[:3])) - assert np.array_equal(batch_y[1], - np.array(df['output_1'].tolist()[:3])) - # if one of the outputs is a 1D array - df['output_1'] = [np.random.uniform(size=(2, 2, 1)).flatten() - for _ in range(len(df))] - df_iterator = image_data_generator.ImageDataGenerator().flow_from_dataframe( - df, y_col=['output_0', 'output_1'], directory=str(tmpdir), - batch_size=3, shuffle=False, class_mode='multi_output' - ) - batch_x, batch_y = next(df_iterator) - assert isinstance(batch_x, np.ndarray) - assert len(batch_x.shape) == 4 - assert isinstance(batch_y, list) - assert len(batch_y) == 2 - assert np.array_equal(batch_y[0], - np.array(df['output_0'].tolist()[:3])) - assert np.array_equal(batch_y[1], - np.array(df['output_1'].tolist()[:3])) - # if one of the outputs is a 2D array - df['output_1'] = [np.random.uniform(size=(2, 2, 1)) - for _ in range(len(df))] - df_iterator = image_data_generator.ImageDataGenerator().flow_from_dataframe( - df, y_col=['output_0', 'output_1'], directory=str(tmpdir), - batch_size=3, shuffle=False, class_mode='multi_output' - ) - batch_x, batch_y = next(df_iterator) - assert isinstance(batch_x, np.ndarray) - assert len(batch_x.shape) == 4 - assert isinstance(batch_y, list) - assert len(batch_y) == 2 - assert np.array_equal(batch_y[0], - np.array(df['output_0'].tolist()[:3])) - assert np.array_equal(batch_y[1], - np.array(df['output_1'].tolist()[:3])) - # fail if single column - with pytest.raises(TypeError): - image_data_generator.ImageDataGenerator().flow_from_dataframe( - df, y_col='output_0', - directory=str(tmpdir), - class_mode='multi_output' - ) - - -def test_dataframe_iterator_class_mode_raw(all_test_images, tmpdir): - # save the images in the paths - filenames = [] - count = 0 - for test_images in all_test_images: - for im in test_images: - filename = 'image-{}.png'.format(count) - im.save(str(tmpdir / filename)) - filenames.append(filename) - count += 1 - # case for 1D output - df = pd.DataFrame({"filename": filenames}).assign( - output_0=np.random.uniform(size=len(filenames)), - output_1=np.random.uniform(size=len(filenames)) - ) - df_iterator = image_data_generator.ImageDataGenerator().flow_from_dataframe( - df, y_col='output_0', directory=str(tmpdir), - batch_size=3, shuffle=False, class_mode='raw' - ) - batch_x, batch_y = next(df_iterator) - assert isinstance(batch_x, np.ndarray) - assert len(batch_x.shape) == 4 - assert isinstance(batch_y, np.ndarray) - assert batch_y.shape == (3,) - assert np.array_equal(batch_y, df['output_0'].values[:3]) - # case with a 2D output - df_iterator = image_data_generator.ImageDataGenerator().flow_from_dataframe( - df, y_col=['output_0', 'output_1'], directory=str(tmpdir), - batch_size=3, shuffle=False, class_mode='raw' - ) - batch_x, batch_y = next(df_iterator) - assert isinstance(batch_x, np.ndarray) - assert len(batch_x.shape) == 4 - assert isinstance(batch_y, np.ndarray) - assert batch_y.shape == (3, 2) - assert np.array_equal(batch_y, - df[['output_0', 'output_1']].values[:3]) - - -@pytest.mark.parametrize('validation_split,num_training', [ - (0.25, 18), - (0.50, 12), - (0.75, 6), -]) -def test_dataframe_iterator_with_validation_split(all_test_images, validation_split, - num_training, tmpdir): - num_classes = 2 - - # save the images in the tmpdir - count = 0 - filenames = [] - filenames_without = [] - for test_images in all_test_images: - for im in test_images: - filename = "image-{}.png".format(count) - filename_without = "image-{}".format(count) - filenames.append(filename) - filenames_without.append(filename_without) - im.save(str(tmpdir / filename)) - count += 1 - - df = pd.DataFrame({"filename": filenames, - "class": [str(random.randint(0, 1)) for _ in filenames]}) - # create iterator - generator = image_data_generator.ImageDataGenerator( - validation_split=validation_split - ) - df_sparse_iterator = generator.flow_from_dataframe(df, - str(tmpdir), - class_mode="sparse") - if np.isnan(next(df_sparse_iterator)[:][1]).any(): - raise ValueError('Invalid values.') - - with pytest.raises(ValueError): - generator.flow_from_dataframe( - df, tmpdir, subset='foo') - - train_iterator = generator.flow_from_dataframe(df, str(tmpdir), - subset='training') - assert train_iterator.samples == num_training - - valid_iterator = generator.flow_from_dataframe(df, str(tmpdir), - subset='validation') - assert valid_iterator.samples == count - num_training - - # check number of classes and images - assert len(train_iterator.class_indices) == num_classes - assert len(train_iterator.classes) == num_training - assert len(set(train_iterator.filenames) & - set(filenames)) == num_training - - -def test_dataframe_iterator_with_custom_indexed_dataframe(all_test_images, tmpdir): - num_classes = 2 - - # save the images in the tmpdir - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = "image-{}.png".format(count) - filenames.append(filename) - im.save(str(tmpdir / filename)) - count += 1 - - # create dataframes - classes = np.random.randint(num_classes, size=len(filenames)) - classes = [str(c) for c in classes] - df = pd.DataFrame({"filename": filenames, - "class": classes}) - df2 = pd.DataFrame({"filename": filenames, - "class": classes}, - index=np.arange(1, len(filenames) + 1)) - df3 = pd.DataFrame({"filename": filenames, - "class": classes}, - index=filenames) - - # create iterators - seed = 1 - generator = image_data_generator.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe( - df, str(tmpdir), seed=seed) - df2_iterator = generator.flow_from_dataframe( - df2, str(tmpdir), seed=seed) - df3_iterator = generator.flow_from_dataframe( - df3, str(tmpdir), seed=seed) - - # Test all iterators return same pairs of arrays - for _ in range(len(filenames)): - a1, c1 = next(df_iterator) - a2, c2 = next(df2_iterator) - a3, c3 = next(df3_iterator) - assert np.array_equal(a1, a2) - assert np.array_equal(a1, a3) - assert np.array_equal(c1, c2) - assert np.array_equal(c1, c3) - - -def test_dataframe_iterator_n(all_test_images, tmpdir): - - # save the images in the tmpdir - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - filename = "image-{}.png".format(count) - filenames.append(filename) - im.save(str(tmpdir / filename)) - count += 1 - - # exclude first two items - n_files = len(filenames) - input_filenames = filenames[2:] - - # create dataframes - classes = np.random.randint(2, size=len(input_filenames)) - classes = [str(c) for c in classes] - df = pd.DataFrame({"filename": input_filenames}) - df2 = pd.DataFrame({"filename": input_filenames, - "class": classes}) - - # create iterators - generator = image_data_generator.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe( - df, str(tmpdir), class_mode=None) - df2_iterator = generator.flow_from_dataframe( - df2, str(tmpdir), class_mode='binary') - - # Test the number of items in iterators - assert df_iterator.n == n_files - 2 - assert df2_iterator.n == n_files - 2 - - -def test_dataframe_iterator_absolute_path(all_test_images, tmpdir): - - # save the images in the tmpdir - count = 0 - file_paths = [] - for test_images in all_test_images: - for im in test_images: - filename = "image-{:0>5}.png".format(count) - file_path = str(tmpdir / filename) - file_paths.append(file_path) - im.save(file_path) - count += 1 - - # prepare an image with a forbidden extension. - file_path_fbd = str(tmpdir / 'image-forbid.fbd') - shutil.copy(file_path, file_path_fbd) - - # create dataframes - classes = np.random.randint(2, size=len(file_paths)) - classes = [str(c) for c in classes] - df = pd.DataFrame({"filename": file_paths}) - df2 = pd.DataFrame({"filename": file_paths, - "class": classes}) - df3 = pd.DataFrame({"filename": ['image-not-exist.png'] + file_paths}) - df4 = pd.DataFrame({"filename": file_paths + [file_path_fbd]}) - - # create iterators - generator = image_data_generator.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe( - df, None, class_mode=None, - shuffle=False, batch_size=1) - df2_iterator = generator.flow_from_dataframe( - df2, None, class_mode='binary', - shuffle=False, batch_size=1) - df3_iterator = generator.flow_from_dataframe( - df3, None, class_mode=None, - shuffle=False, batch_size=1) - df4_iterator = generator.flow_from_dataframe( - df4, None, class_mode=None, - shuffle=False, batch_size=1) - - validation_split = 0.2 - generator_split = image_data_generator.ImageDataGenerator( - validation_split=validation_split - ) - df_train_iterator = generator_split.flow_from_dataframe( - df, None, class_mode=None, - shuffle=False, subset='training', batch_size=1) - df_val_iterator = generator_split.flow_from_dataframe( - df, None, class_mode=None, - shuffle=False, subset='validation', batch_size=1) - - # Test the number of items in iterators - assert df_iterator.n == len(file_paths) - assert df2_iterator.n == len(file_paths) - assert df3_iterator.n == len(file_paths) - assert df4_iterator.n == len(file_paths) - assert df_val_iterator.n == int(validation_split * len(file_paths)) - assert df_train_iterator.n == len(file_paths) - df_val_iterator.n - - # Test flow_from_dataframe - for i in range(len(file_paths)): - a1 = next(df_iterator) - a2, _ = next(df2_iterator) - a3 = next(df3_iterator) - a4 = next(df4_iterator) - - if i < df_val_iterator.n: - a5 = next(df_val_iterator) - else: - a5 = next(df_train_iterator) - - assert np.array_equal(a1, a2) - assert np.array_equal(a1, a3) - assert np.array_equal(a1, a4) - assert np.array_equal(a1, a5) - - -def test_dataframe_iterator_with_subdirs(all_test_images, tmpdir): - num_classes = 2 - - # create folders and subfolders - paths = [] - for cl in range(num_classes): - class_directory = 'class-{}'.format(cl) - classpaths = [ - class_directory, - os.path.join(class_directory, 'subfolder-1'), - os.path.join(class_directory, 'subfolder-2'), - os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') - ] - for path in classpaths: - tmpdir.join(path).mkdir() - paths.append(classpaths) - - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - # rotate image class - im_class = count % num_classes - # rotate subfolders - classpaths = paths[im_class] - filename = os.path.join( - classpaths[count % len(classpaths)], - 'image-{}.png'.format(count)) - filenames.append(filename) - im.save(str(tmpdir / filename)) - count += 1 - - # create dataframe - classes = np.random.randint(num_classes, size=len(filenames)) - classes = [str(c) for c in classes] - df = pd.DataFrame({"filename": filenames, - "class": classes}) - - # create iterator - generator = image_data_generator.ImageDataGenerator() - df_iterator = generator.flow_from_dataframe( - df, str(tmpdir), class_mode='binary') - - # Test the number of items in iterator - assert df_iterator.n == len(filenames) - assert set(df_iterator.filenames) == set(filenames) - - -if __name__ == '__main__': - pytest.main([__file__]) -import os -import shutil -import tempfile - -import numpy as np -import pytest - -from PIL import Image - -from keras_preprocessing.image import image_data_generator - - -@pytest.fixture(scope='module') -def all_test_images(): - img_w = img_h = 20 - rgb_images = [] - rgba_images = [] - gray_images = [] - for n in range(8): - bias = np.random.rand(img_w, img_h, 1) * 64 - variance = np.random.rand(img_w, img_h, 1) * (255 - 64) - imarray = np.random.rand(img_w, img_h, 3) * variance + bias - im = Image.fromarray(imarray.astype('uint8')).convert('RGB') - rgb_images.append(im) - - imarray = np.random.rand(img_w, img_h, 4) * variance + bias - im = Image.fromarray(imarray.astype('uint8')).convert('RGBA') - rgba_images.append(im) - - imarray = np.random.rand(img_w, img_h, 1) * variance + bias - im = Image.fromarray( - imarray.astype('uint8').squeeze()).convert('L') - gray_images.append(im) - - return [rgb_images, rgba_images, gray_images] - - -def test_directory_iterator(all_test_images, tmpdir): - num_classes = 2 - - # create folders and subfolders - paths = [] - for cl in range(num_classes): - class_directory = 'class-{}'.format(cl) - classpaths = [ - class_directory, - os.path.join(class_directory, 'subfolder-1'), - os.path.join(class_directory, 'subfolder-2'), - os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') - ] - for path in classpaths: - tmpdir.join(path).mkdir() - paths.append(classpaths) - - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - # rotate image class - im_class = count % num_classes - # rotate subfolders - classpaths = paths[im_class] - filename = os.path.join( - classpaths[count % len(classpaths)], - 'image-{}.png'.format(count)) - filenames.append(filename) - im.save(str(tmpdir / filename)) - count += 1 - - # create iterator - generator = image_data_generator.ImageDataGenerator() - dir_iterator = generator.flow_from_directory(str(tmpdir)) - - # check number of classes and images - assert len(dir_iterator.class_indices) == num_classes - assert len(dir_iterator.classes) == count - assert set(dir_iterator.filenames) == set(filenames) - - # Test invalid use cases - with pytest.raises(ValueError): - generator.flow_from_directory(str(tmpdir), color_mode='cmyk') - with pytest.raises(ValueError): - generator.flow_from_directory(str(tmpdir), class_mode='output') - - def preprocessing_function(x): - """This will fail if not provided by a Numpy array. - Note: This is made to enforce backward compatibility. - """ - - assert x.shape == (26, 26, 3) - assert type(x) is np.ndarray - - return np.zeros_like(x) - - # Test usage as Sequence - generator = image_data_generator.ImageDataGenerator( - preprocessing_function=preprocessing_function) - dir_seq = generator.flow_from_directory(str(tmpdir), - target_size=(26, 26), - color_mode='rgb', - batch_size=3, - class_mode='categorical') - assert len(dir_seq) == np.ceil(count / 3) - x1, y1 = dir_seq[1] - assert x1.shape == (3, 26, 26, 3) - assert y1.shape == (3, num_classes) - x1, y1 = dir_seq[5] - assert (x1 == 0).all() - - with pytest.raises(ValueError): - x1, y1 = dir_seq[9] - - -def test_directory_iterator_class_mode_input(all_test_images, tmpdir): - tmpdir.join('class-1').mkdir() - - # save the images in the paths - count = 0 - for test_images in all_test_images: - for im in test_images: - filename = str( - tmpdir / 'class-1' / 'image-{}.png'.format(count)) - im.save(filename) - count += 1 - - # create iterator - generator = image_data_generator.ImageDataGenerator() - dir_iterator = generator.flow_from_directory(str(tmpdir), - class_mode='input') - batch = next(dir_iterator) - - # check if input and output have the same shape - assert(batch[0].shape == batch[1].shape) - # check if the input and output images are not the same numpy array - input_img = batch[0][0] - output_img = batch[1][0] - output_img[0][0][0] += 1 - assert(input_img[0][0][0] != output_img[0][0][0]) - - -@pytest.mark.parametrize('validation_split,num_training', [ - (0.25, 18), - (0.50, 12), - (0.75, 6), -]) -def test_directory_iterator_with_validation_split(all_test_images, - validation_split, - num_training): - num_classes = 2 - tmp_folder = tempfile.mkdtemp(prefix='test_images') - - # create folders and subfolders - paths = [] - for cl in range(num_classes): - class_directory = 'class-{}'.format(cl) - classpaths = [ - class_directory, - os.path.join(class_directory, 'subfolder-1'), - os.path.join(class_directory, 'subfolder-2'), - os.path.join(class_directory, 'subfolder-1', 'sub-subfolder') - ] - for path in classpaths: - os.mkdir(os.path.join(tmp_folder, path)) - paths.append(classpaths) - - # save the images in the paths - count = 0 - filenames = [] - for test_images in all_test_images: - for im in test_images: - # rotate image class - im_class = count % num_classes - # rotate subfolders - classpaths = paths[im_class] - filename = os.path.join( - classpaths[count % len(classpaths)], - 'image-{}.png'.format(count)) - filenames.append(filename) - im.save(os.path.join(tmp_folder, filename)) - count += 1 - - # create iterator - generator = image_data_generator.ImageDataGenerator( - validation_split=validation_split - ) - - with pytest.raises(ValueError): - generator.flow_from_directory(tmp_folder, subset='foo') - - train_iterator = generator.flow_from_directory(tmp_folder, - subset='training') - assert train_iterator.samples == num_training - - valid_iterator = generator.flow_from_directory(tmp_folder, - subset='validation') - assert valid_iterator.samples == count - num_training - - # check number of classes and images - assert len(train_iterator.class_indices) == num_classes - assert len(train_iterator.classes) == num_training - assert len(set(train_iterator.filenames) & - set(filenames)) == num_training - - shutil.rmtree(tmp_folder) - - -if __name__ == '__main__': - pytest.main([__file__]) -import numpy as np -import pytest - -from PIL import Image - -from keras_preprocessing.image import image_data_generator -from keras_preprocessing.image import utils - - -@pytest.fixture(scope='module') -def all_test_images(): - img_w = img_h = 20 - rgb_images = [] - rgba_images = [] - gray_images = [] - for n in range(8): - bias = np.random.rand(img_w, img_h, 1) * 64 - variance = np.random.rand(img_w, img_h, 1) * (255 - 64) - imarray = np.random.rand(img_w, img_h, 3) * variance + bias - im = Image.fromarray(imarray.astype('uint8')).convert('RGB') - rgb_images.append(im) - - imarray = np.random.rand(img_w, img_h, 4) * variance + bias - im = Image.fromarray(imarray.astype('uint8')).convert('RGBA') - rgba_images.append(im) - - imarray = np.random.rand(img_w, img_h, 1) * variance + bias - im = Image.fromarray( - imarray.astype('uint8').squeeze()).convert('L') - gray_images.append(im) - - return [rgb_images, rgba_images, gray_images] - - -def test_image_data_generator(all_test_images): - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(utils.img_to_array(im)[None, ...]) - - image_data_generator.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True, - interpolation_order=1 - ) - - -def test_image_data_generator_with_validation_split(all_test_images): - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(utils.img_to_array(im)[None, ...]) - - images = np.vstack(img_list) - labels = np.concatenate([ - np.zeros((int(len(images) / 2),)), - np.ones((int(len(images) / 2),))]) - generator = image_data_generator.ImageDataGenerator( - validation_split=0.5) - - # training and validation sets would have different - # number of classes, because labels are sorted - with pytest.raises(ValueError, - match='Training and validation subsets ' - 'have different number of classes after ' - 'the split.*'): - generator.flow(images, labels, - shuffle=False, batch_size=10, - subset='validation') - - labels = np.concatenate([ - np.zeros((int(len(images) / 4),)), - np.ones((int(len(images) / 4),)), - np.zeros((int(len(images) / 4),)), - np.ones((int(len(images) / 4),)) - ]) - - seq = generator.flow(images, labels, - shuffle=False, batch_size=10, - subset='validation') - - x, y = seq[0] - assert 2 == len(np.unique(y)) - - seq = generator.flow(images, labels, - shuffle=False, batch_size=10, - subset='training') - x2, y2 = seq[0] - assert 2 == len(np.unique(y2)) - - with pytest.raises(ValueError): - generator.flow(images, np.arange(images.shape[0]), - shuffle=False, batch_size=3, - subset='foo') - - -def test_image_data_generator_with_split_value_error(): - with pytest.raises(ValueError): - image_data_generator.ImageDataGenerator(validation_split=5) - - -def test_image_data_generator_invalid_data(): - generator = image_data_generator.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - data_format='channels_last') - # Test fit with invalid data - with pytest.raises(ValueError): - x = np.random.random((3, 10, 10)) - generator.fit(x) - - # Test flow with invalid data - with pytest.raises(ValueError): - x = np.random.random((32, 10, 10)) - generator.flow(np.arange(x.shape[0])) - - -def test_image_data_generator_fit(): - generator = image_data_generator.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=(0.2, 0.2), - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True, - interpolation_order=1, - data_format='channels_last' - ) - x = np.random.random((32, 10, 10, 3)) - generator.fit(x, augment=True) - # Test grayscale - x = np.random.random((32, 10, 10, 1)) - generator.fit(x) - # Test RBG - x = np.random.random((32, 10, 10, 3)) - generator.fit(x) - # Test more samples than dims - x = np.random.random((32, 4, 4, 1)) - generator.fit(x) - generator = image_data_generator.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=(0.2, 0.2), - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True, - interpolation_order=1, - data_format='channels_first' - ) - x = np.random.random((32, 10, 10, 3)) - generator.fit(x, augment=True) - # Test grayscale - x = np.random.random((32, 1, 10, 10)) - generator.fit(x) - # Test RBG - x = np.random.random((32, 3, 10, 10)) - generator.fit(x) - # Test more samples than dims - x = np.random.random((32, 1, 4, 4)) - generator.fit(x) - - -def test_image_data_generator_flow(all_test_images, tmpdir): - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(utils.img_to_array(im)[None, ...]) - - images = np.vstack(img_list) - dsize = images.shape[0] - generator = image_data_generator.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True, - interpolation_order=1 - ) - - generator.flow( - images, - np.arange(images.shape[0]), - shuffle=False, - save_to_dir=str(tmpdir), - batch_size=3 - ) - - generator.flow( - images, - np.arange(images.shape[0]), - shuffle=False, - sample_weight=np.arange(images.shape[0]) + 1, - save_to_dir=str(tmpdir), - batch_size=3 - ) - - # Test with `shuffle=True` - generator.flow( - images, np.arange(images.shape[0]), - shuffle=True, - save_to_dir=str(tmpdir), - batch_size=3, - seed=42 - ) - - # Test without y - generator.flow( - images, - None, - shuffle=True, - save_to_dir=str(tmpdir), - batch_size=3 - ) - - # Test with a single miscellaneous input data array - x_misc1 = np.random.random(dsize) - generator.flow( - (images, x_misc1), - np.arange(dsize), - shuffle=False, - batch_size=2 - ) - - # Test with two miscellaneous inputs - x_misc2 = np.random.random((dsize, 3, 3)) - generator.flow( - (images, [x_misc1, x_misc2]), - np.arange(dsize), - shuffle=False, - batch_size=2 - ) - - # Test cases with `y = None` - generator.flow(images, None, batch_size=3) - generator.flow((images, x_misc1), None, batch_size=3, shuffle=False) - generator.flow( - (images, [x_misc1, x_misc2]), - None, - batch_size=3, - shuffle=False - ) - generator = image_data_generator.ImageDataGenerator( - validation_split=0.2) - generator.flow(images, batch_size=3) - - # Test some failure cases: - x_misc_err = np.random.random((dsize + 1, 3, 3)) - with pytest.raises(ValueError) as e_info: - generator.flow((images, x_misc_err), - np.arange(dsize), batch_size=3) - assert str(e_info.value).find('All of the arrays in') != -1 - - with pytest.raises(ValueError) as e_info: - generator.flow((images, x_misc1), np.arange( - dsize + 1), batch_size=3) - assert str(e_info.value).find( - '`x` (images tensor) and `y` (labels) ') != -1 - - # Test `flow` behavior as Sequence - generator.flow( - images, - np.arange(images.shape[0]), - shuffle=False, - save_to_dir=str(tmpdir), - batch_size=3 - ) - - # Test with `shuffle=True` - generator.flow( - images, - np.arange(images.shape[0]), - shuffle=True, save_to_dir=str(tmpdir), - batch_size=3, seed=123 - ) - - # test order_interpolation - labels = np.array([[2, 2, 0, 2, 2], - [1, 3, 2, 3, 1], - [2, 1, 0, 1, 2], - [3, 1, 0, 2, 0], - [3, 1, 3, 2, 1]]) - - label_generator = image_data_generator.ImageDataGenerator( - rotation_range=90., - interpolation_order=0 - ) - label_generator.flow( - x=labels[np.newaxis, ..., np.newaxis], - seed=123 - ) - - -def test_valid_args(): - with pytest.raises(ValueError): - image_data_generator.ImageDataGenerator(brightness_range=0.1) - - -def test_batch_standardize(all_test_images): - # ImageDataGenerator.standardize should work on batches - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(utils.img_to_array(im)[None, ...]) - - images = np.vstack(img_list) - generator = image_data_generator.ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True) - generator.fit(images, augment=True) - - transformed = np.copy(images) - for i, im in enumerate(transformed): - transformed[i] = generator.random_transform(im) - transformed = generator.standardize(transformed) - - -def test_deterministic_transform(): - x = np.ones((32, 32, 3)) - generator = image_data_generator.ImageDataGenerator( - rotation_range=90, - fill_mode='constant') - x = np.random.random((32, 32, 3)) - assert np.allclose(generator.apply_transform(x, {'flip_vertical': True}), - x[::-1, :, :]) - assert np.allclose(generator.apply_transform(x, {'flip_horizontal': True}), - x[:, ::-1, :]) - x = np.ones((3, 3, 3)) - x_rotated = np.array([[[0., 0., 0.], - [0., 0., 0.], - [1., 1., 1.]], - [[0., 0., 0.], - [1., 1., 1.], - [1., 1., 1.]], - [[0., 0., 0.], - [0., 0., 0.], - [1., 1., 1.]]]) - assert np.allclose(generator.apply_transform(x, {'theta': 45}), - x_rotated) - - -def test_random_transforms(): - x = np.random.random((2, 28, 28)) - # Test get_random_transform with predefined seed - seed = 1 - generator = image_data_generator.ImageDataGenerator( - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0.1, - brightness_range=(1, 5), - horizontal_flip=True, - vertical_flip=True) - transform_dict = generator.get_random_transform(x.shape, seed) - transform_dict2 = generator.get_random_transform(x.shape, seed * 2) - assert transform_dict['theta'] != 0 - assert transform_dict['theta'] != transform_dict2['theta'] - assert transform_dict['tx'] != 0 - assert transform_dict['tx'] != transform_dict2['tx'] - assert transform_dict['ty'] != 0 - assert transform_dict['ty'] != transform_dict2['ty'] - assert transform_dict['shear'] != 0 - assert transform_dict['shear'] != transform_dict2['shear'] - assert transform_dict['zx'] != 0 - assert transform_dict['zx'] != transform_dict2['zx'] - assert transform_dict['zy'] != 0 - assert transform_dict['zy'] != transform_dict2['zy'] - assert transform_dict['channel_shift_intensity'] != 0 - assert (transform_dict['channel_shift_intensity'] != - transform_dict2['channel_shift_intensity']) - assert transform_dict['brightness'] != 0 - assert transform_dict['brightness'] != transform_dict2['brightness'] - - # Test get_random_transform without any randomness - generator = image_data_generator.ImageDataGenerator() - transform_dict = generator.get_random_transform(x.shape, seed) - assert transform_dict['theta'] == 0 - assert transform_dict['tx'] == 0 - assert transform_dict['ty'] == 0 - assert transform_dict['shear'] == 0 - assert transform_dict['zx'] == 1 - assert transform_dict['zy'] == 1 - assert transform_dict['channel_shift_intensity'] is None - assert transform_dict['brightness'] is None - - -if __name__ == '__main__': - pytest.main([__file__]) -from keras_preprocessing.image import iterator - - -def test_iterator_empty_directory(): - # Testing with different batch sizes - for batch_size in [0, 32]: - data_iterator = iterator.Iterator(0, batch_size, False, 0) - ret = next(data_iterator.index_generator) - assert ret.size == 0 -import numpy as np -import pytest - -from PIL import Image - -from keras_preprocessing.image import numpy_array_iterator -from keras_preprocessing.image import utils -from keras_preprocessing.image.image_data_generator import ImageDataGenerator - - -@pytest.fixture(scope='module') -def all_test_images(): - img_w = img_h = 20 - rgb_images = [] - rgba_images = [] - gray_images = [] - for n in range(8): - bias = np.random.rand(img_w, img_h, 1) * 64 - variance = np.random.rand(img_w, img_h, 1) * (255 - 64) - imarray = np.random.rand(img_w, img_h, 3) * variance + bias - im = Image.fromarray(imarray.astype('uint8')).convert('RGB') - rgb_images.append(im) - - imarray = np.random.rand(img_w, img_h, 4) * variance + bias - im = Image.fromarray(imarray.astype('uint8')).convert('RGBA') - rgba_images.append(im) - - imarray = np.random.rand(img_w, img_h, 1) * variance + bias - im = Image.fromarray( - imarray.astype('uint8').squeeze()).convert('L') - gray_images.append(im) - - return [rgb_images, rgba_images, gray_images] - - -@pytest.fixture(scope='module') -def image_data_generator(): - return ImageDataGenerator( - featurewise_center=True, - samplewise_center=True, - featurewise_std_normalization=True, - samplewise_std_normalization=True, - zca_whitening=True, - rotation_range=90., - width_shift_range=0.1, - height_shift_range=0.1, - shear_range=0.5, - zoom_range=0.2, - channel_shift_range=0., - brightness_range=(1, 5), - fill_mode='nearest', - cval=0.5, - horizontal_flip=True, - vertical_flip=True, - interpolation_order=1 - ) - - -def test_numpy_array_iterator(image_data_generator, all_test_images, tmpdir): - for test_images in all_test_images: - img_list = [] - for im in test_images: - img_list.append(utils.img_to_array(im)[None, ...]) - images = np.vstack(img_list) - dsize = images.shape[0] - - iterator = numpy_array_iterator.NumpyArrayIterator( - images, - np.arange(images.shape[0]), - image_data_generator, - shuffle=False, - save_to_dir=str(tmpdir), - batch_size=3 - ) - x, y = next(iterator) - assert x.shape == images[:3].shape - assert list(y) == [0, 1, 2] - - # Test with sample weights - iterator = numpy_array_iterator.NumpyArrayIterator( - images, - np.arange(images.shape[0]), - image_data_generator, - shuffle=False, - sample_weight=np.arange(images.shape[0]) + 1, - save_to_dir=str(tmpdir), - batch_size=3 - ) - x, y, w = iterator.next() - assert x.shape == images[:3].shape - assert list(y) == [0, 1, 2] - assert list(w) == [1, 2, 3] - - # Test with `shuffle=True` - iterator = numpy_array_iterator.NumpyArrayIterator( - images, - np.arange(images.shape[0]), - image_data_generator, - shuffle=True, - save_to_dir=str(tmpdir), - batch_size=3, - seed=42 - ) - x, y = iterator.next() - assert x.shape == images[:3].shape - # Check that the sequence is shuffled. - assert list(y) != [0, 1, 2] - - # Test without y - iterator = numpy_array_iterator.NumpyArrayIterator( - images, - None, - image_data_generator, - shuffle=True, - save_to_dir=str(tmpdir), - batch_size=3 - ) - x = iterator.next() - assert type(x) is np.ndarray - assert x.shape == images[:3].shape - - # Test with a single miscellaneous input data array - x_misc1 = np.random.random(dsize) - iterator = numpy_array_iterator.NumpyArrayIterator( - (images, x_misc1), - np.arange(dsize), - image_data_generator, - shuffle=False, - batch_size=2 - ) - for i, (x, y) in enumerate(iterator): - assert x[0].shape == images[:2].shape - assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all() - if i == 2: - break - - # Test with two miscellaneous inputs - x_misc2 = np.random.random((dsize, 3, 3)) - iterator = numpy_array_iterator.NumpyArrayIterator( - (images, [x_misc1, x_misc2]), - np.arange(dsize), - image_data_generator, - shuffle=False, - batch_size=2 - ) - for i, (x, y) in enumerate(iterator): - assert x[0].shape == images[:2].shape - assert (x[1] == x_misc1[(i * 2):((i + 1) * 2)]).all() - assert (x[2] == x_misc2[(i * 2):((i + 1) * 2)]).all() - if i == 2: - break - - # Test cases with `y = None` - iterator = numpy_array_iterator.NumpyArrayIterator( - images, - None, - image_data_generator, - batch_size=3 - ) - x = iterator.next() - assert type(x) is np.ndarray - assert x.shape == images[:3].shape - - iterator = numpy_array_iterator.NumpyArrayIterator( - (images, x_misc1), - None, - image_data_generator, - batch_size=3, - shuffle=False - ) - x = iterator.next() - assert type(x) is list - assert x[0].shape == images[:3].shape - assert (x[1] == x_misc1[:3]).all() - - iterator = numpy_array_iterator.NumpyArrayIterator( - (images, [x_misc1, x_misc2]), - None, - image_data_generator, - batch_size=3, - shuffle=False - ) - x = iterator.next() - assert type(x) is list - assert x[0].shape == images[:3].shape - assert (x[1] == x_misc1[:3]).all() - assert (x[2] == x_misc2[:3]).all() - - # Test with validation split - generator = ImageDataGenerator(validation_split=0.2) - iterator = numpy_array_iterator.NumpyArrayIterator( - images, - None, - generator, - batch_size=3 - ) - x = iterator.next() - assert isinstance(x, np.ndarray) - assert x.shape == images[:3].shape - - # Test some failure cases: - x_misc_err = np.random.random((dsize + 1, 3, 3)) - - with pytest.raises(ValueError) as e_info: - numpy_array_iterator.NumpyArrayIterator( - (images, x_misc_err), - np.arange(dsize), - generator, - batch_size=3 - ) - assert str(e_info.value).find('All of the arrays in') != -1 - - with pytest.raises(ValueError) as e_info: - numpy_array_iterator.NumpyArrayIterator( - (images, x_misc1), - np.arange(dsize + 1), - generator, - batch_size=3 - ) - assert str(e_info.value).find( - '`x` (images tensor) and `y` (labels) ') != -1 - - # Test `flow` behavior as Sequence - seq = numpy_array_iterator.NumpyArrayIterator( - images, - np.arange(images.shape[0]), - generator, - shuffle=False, save_to_dir=str(tmpdir), - batch_size=3 - ) - assert len(seq) == images.shape[0] // 3 + 1 - x, y = seq[0] - assert x.shape == images[:3].shape - assert list(y) == [0, 1, 2] - - # Test with `shuffle=True` - seq = numpy_array_iterator.NumpyArrayIterator( - images, - np.arange(images.shape[0]), - generator, - shuffle=True, - save_to_dir=str(tmpdir), - batch_size=3, - seed=123 - ) - x, y = seq[0] - # Check that the sequence is shuffled. - assert list(y) != [0, 1, 2] - # `on_epoch_end` should reshuffle the sequence. - seq.on_epoch_end() - x2, y2 = seq[0] - assert list(y) != list(y2) - - # test order_interpolation - labels = np.array([[2, 2, 0, 2, 2], - [1, 3, 2, 3, 1], - [2, 1, 0, 1, 2], - [3, 1, 0, 2, 0], - [3, 1, 3, 2, 1]]) - label_generator = ImageDataGenerator( - rotation_range=90., - interpolation_order=0 - ) - labels_gen = numpy_array_iterator.NumpyArrayIterator( - labels[np.newaxis, ..., np.newaxis], - None, - label_generator, - seed=123 - ) - assert (np.unique(labels) == np.unique(next(labels_gen))).all() -from keras_preprocessing import image - - -def test_api_classes(): - expected_exposed_classes = [ - 'DataFrameIterator', - 'DirectoryIterator', - 'ImageDataGenerator', - 'Iterator', - 'NumpyArrayIterator', - ] - for _class in expected_exposed_classes: - assert hasattr(image, _class) - - -def test_api_functions(): - expected_exposed_functions = [ - 'flip_axis', - 'random_rotation', - 'random_shift', - 'random_shear', - 'random_zoom', - 'apply_channel_shift', - 'random_channel_shift', - 'apply_brightness_shift', - 'random_brightness', - 'transform_matrix_offset_center', - 'apply_affine_transform', - 'validate_filename', - 'save_img', - 'load_img', - 'list_pictures', - 'array_to_img', - 'img_to_array' - ] - for function in expected_exposed_functions: - assert hasattr(image, function) -from keras.callbacks import TensorBoard, ModelCheckpoint -import tensorflow as tf -import numpy as np - - -class CustomTensorBoard(TensorBoard): - """ to log the loss after each batch - """ - - def __init__(self, log_every=1, **kwargs): - super(CustomTensorBoard, self).__init__(**kwargs) - self.log_every = log_every - self.counter = 0 - - def on_batch_end(self, batch, logs=None): - self.counter += 1 - if self.counter % self.log_every == 0: - for name, value in logs.items(): - if name in ['batch', 'size']: - continue - summary = tf.Summary() - summary_value = summary.value.add() - summary_value.simple_value = value.item() - summary_value.tag = name - self.writer.add_summary(summary, self.counter) - self.writer.flush() - - super(CustomTensorBoard, self).on_batch_end(batch, logs) - - -class CustomModelCheckpoint(ModelCheckpoint): - """ to save the template model, not the multi-GPU model - """ - - def __init__(self, model_to_save, **kwargs): - super(CustomModelCheckpoint, self).__init__(**kwargs) - self.model_to_save = model_to_save - - def on_epoch_end(self, epoch, logs=None): - logs = logs or {} - self.epochs_since_last_save += 1 - if self.epochs_since_last_save >= self.period: - self.epochs_since_last_save = 0 - filepath = self.filepath.format(epoch=epoch + 1, **logs) - if self.save_best_only: - current = logs.get(self.monitor) - if current is None: - warnings.warn('Can save best model only with %s available, ' - 'skipping.' % (self.monitor), RuntimeWarning) - else: - if self.monitor_op(current, self.best): - if self.verbose > 0: - print('\nEpoch %05d: %s improved from %0.5f to %0.5f,' - ' saving model to %s' - % (epoch + 1, self.monitor, self.best, - current, filepath)) - self.best = current - if self.save_weights_only: - self.model_to_save.save_weights( - filepath, overwrite=True) - else: - self.model_to_save.save(filepath, overwrite=True) - else: - if self.verbose > 0: - print('\nEpoch %05d: %s did not improve from %0.5f' % - (epoch + 1, self.monitor, self.best)) - else: - if self.verbose > 0: - print('\nEpoch %05d: saving model to %s' % - (epoch + 1, filepath)) - if self.save_weights_only: - self.model_to_save.save_weights(filepath, overwrite=True) - else: - self.model_to_save.save(filepath, overwrite=True) - - super(CustomModelCheckpoint, self).on_batch_end(epoch, logs) -#! /usr/bin/env python - -import argparse -import os -import numpy as np -import json -from voc import parse_voc_annotation -from yolo import create_yolov3_model -from generator import BatchGenerator -from utils.utils import normalize, evaluate -from keras.callbacks import EarlyStopping, ModelCheckpoint -from keras.optimizers import Adam -from keras.models import load_model - - -def _main_(args): - config_path = args.conf - - with open(config_path) as config_buffer: - config = json.loads(config_buffer.read()) - - ############################### - # Create the validation generator - ############################### - valid_ints, labels = parse_voc_annotation( - config['valid']['valid_annot_folder'], - config['valid']['valid_image_folder'], - config['valid']['cache_name'], - config['model']['labels'] - ) - - labels = labels.keys() if len( - config['model']['labels']) == 0 else config['model']['labels'] - labels = sorted(labels) - - valid_generator = BatchGenerator( - instances=valid_ints, - anchors=config['model']['anchors'], - labels=labels, - downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 - max_box_per_image=0, - batch_size=config['train']['batch_size'], - min_net_size=config['model']['min_input_size'], - max_net_size=config['model']['max_input_size'], - shuffle=True, - jitter=0.0, - norm=normalize - ) - - ############################### - # Load the model and do evaluation - ############################### - os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus'] - - infer_model = load_model(config['train']['saved_weights_name']) - - # compute mAP for all the classes - average_precisions = evaluate(infer_model, valid_generator) - - # print the score - for label, average_precision in average_precisions.items(): - print(labels[label] + ': {:.4f}'.format(average_precision)) - print('mAP: {:.4f}'.format( - sum(average_precisions.values()) / len(average_precisions))) - - -if __name__ == '__main__': - argparser = argparse.ArgumentParser( - description='Evaluate YOLO_v3 model on any dataset') - argparser.add_argument('-c', '--conf', help='path to configuration file') - - args = argparser.parse_args() - _main_(args) -import random -import argparse -import numpy as np - -from voc import parse_voc_annotation -import json - - -def IOU(ann, centroids): - w, h = ann - similarities = [] - - for centroid in centroids: - c_w, c_h = centroid - - if c_w >= w and c_h >= h: - similarity = w*h/(c_w*c_h) - elif c_w >= w and c_h <= h: - similarity = w*c_h/(w*h + (c_w-w)*c_h) - elif c_w <= w and c_h >= h: - similarity = c_w*h/(w*h + c_w*(c_h-h)) - else: # means both w,h are bigger than c_w and c_h respectively - similarity = (c_w*c_h)/(w*h) - similarities.append(similarity) # will become (k,) shape - - return np.array(similarities) - - -def avg_IOU(anns, centroids): - n, d = anns.shape - sum = 0. - - for i in range(anns.shape[0]): - sum += max(IOU(anns[i], centroids)) - - return sum/n - - -def print_anchors(centroids): - out_string = '' - - anchors = centroids.copy() - - widths = anchors[:, 0] - sorted_indices = np.argsort(widths) - - r = "anchors: [" - for i in sorted_indices: - out_string += str(int(anchors[i, 0]*416)) + \ - ',' + str(int(anchors[i, 1]*416)) + ', ' - - print(out_string[:-2]) - - -def run_kmeans(ann_dims, anchor_num): - ann_num = ann_dims.shape[0] - iterations = 0 - prev_assignments = np.ones(ann_num)*(-1) - iteration = 0 - old_distances = np.zeros((ann_num, anchor_num)) - - indices = [random.randrange(ann_dims.shape[0]) for i in range(anchor_num)] - centroids = ann_dims[indices] - anchor_dim = ann_dims.shape[1] - - while True: - distances = [] - iteration += 1 - for i in range(ann_num): - d = 1 - IOU(ann_dims[i], centroids) - distances.append(d) - # distances.shape = (ann_num, anchor_num) - distances = np.array(distances) - - print("iteration {}: dists = {}".format( - iteration, np.sum(np.abs(old_distances-distances)))) - - # assign samples to centroids - assignments = np.argmin(distances, axis=1) - - if (assignments == prev_assignments).all(): - return centroids - - # calculate new centroids - centroid_sums = np.zeros((anchor_num, anchor_dim), np.float) - for i in range(ann_num): - centroid_sums[assignments[i]] += ann_dims[i] - for j in range(anchor_num): - centroids[j] = centroid_sums[j]/(np.sum(assignments == j) + 1e-6) - - prev_assignments = assignments.copy() - old_distances = distances.copy() - - -def _main_(argv): - config_path = args.conf - num_anchors = args.anchors - - with open(config_path) as config_buffer: - config = json.loads(config_buffer.read()) - - train_imgs, train_labels = parse_voc_annotation( - config['train']['train_annot_folder'], - config['train']['train_image_folder'], - config['train']['cache_name'], - config['model']['labels'] - ) - - # run k_mean to find the anchors - annotation_dims = [] - for image in train_imgs: - print(image['filename']) - for obj in image['object']: - relative_w = (float(obj['xmax']) - - float(obj['xmin']))/image['width'] - relatice_h = (float(obj["ymax"]) - - float(obj['ymin']))/image['height'] - annotation_dims.append(tuple(map(float, (relative_w, relatice_h)))) - - annotation_dims = np.array(annotation_dims) - centroids = run_kmeans(annotation_dims, num_anchors) - - # write anchors to file - print('\naverage IOU for', num_anchors, 'anchors:', '%0.2f' % - avg_IOU(annotation_dims, centroids)) - print_anchors(centroids) - - -if __name__ == '__main__': - argparser = argparse.ArgumentParser() - - argparser.add_argument( - '-c', - '--conf', - default='config.json', - help='path to configuration file') - argparser.add_argument( - '-a', - '--anchors', - default=9, - help='number of anchors to use') - - args = argparser.parse_args() - _main_(args) -import cv2 -import copy -import numpy as np -from keras.utils import Sequence -from utils.bbox import BoundBox, bbox_iou -from utils.image import apply_random_scale_and_crop, random_distort_image, random_flip, correct_bounding_boxes - - -class BatchGenerator(Sequence): - def __init__(self, - instances, - anchors, - labels, - downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 - max_box_per_image=30, - batch_size=1, - min_net_size=320, - max_net_size=608, - shuffle=True, - jitter=True, - norm=None - ): - self.instances = instances - self.batch_size = batch_size - self.labels = labels - self.downsample = downsample - self.max_box_per_image = max_box_per_image - self.min_net_size = (min_net_size//self.downsample)*self.downsample - self.max_net_size = (max_net_size//self.downsample)*self.downsample - self.shuffle = shuffle - self.jitter = jitter - self.norm = norm - self.anchors = [BoundBox(0, 0, anchors[2*i], anchors[2*i+1]) - for i in range(len(anchors)//2)] - self.net_h = 416 - self.net_w = 416 - - if shuffle: - np.random.shuffle(self.instances) - - def __len__(self): - return int(np.ceil(float(len(self.instances))/self.batch_size)) - - def __getitem__(self, idx): - # get image input size, change every 10 batches - net_h, net_w = self._get_net_size(idx) - base_grid_h, base_grid_w = net_h//self.downsample, net_w//self.downsample - - # determine the first and the last indices of the batch - l_bound = idx*self.batch_size - r_bound = (idx+1)*self.batch_size - - if r_bound > len(self.instances): - r_bound = len(self.instances) - l_bound = r_bound - self.batch_size - - x_batch = np.zeros((r_bound - l_bound, net_h, net_w, 3) - ) # input images - # list of groundtruth boxes - t_batch = np.zeros((r_bound - l_bound, 1, 1, 1, - self.max_box_per_image, 4)) - - # initialize the inputs and the outputs - yolo_1 = np.zeros((r_bound - l_bound, 1*base_grid_h, 1*base_grid_w, - len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 1 - yolo_2 = np.zeros((r_bound - l_bound, 2*base_grid_h, 2*base_grid_w, - len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 2 - yolo_3 = np.zeros((r_bound - l_bound, 4*base_grid_h, 4*base_grid_w, - len(self.anchors)//3, 4+1+len(self.labels))) # desired network output 3 - yolos = [yolo_3, yolo_2, yolo_1] - - dummy_yolo_1 = np.zeros((r_bound - l_bound, 1)) - dummy_yolo_2 = np.zeros((r_bound - l_bound, 1)) - dummy_yolo_3 = np.zeros((r_bound - l_bound, 1)) - - instance_count = 0 - true_box_index = 0 - - # do the logic to fill in the inputs and the output - for train_instance in self.instances[l_bound:r_bound]: - # augment input image and fix object's position and size - img, all_objs = self._aug_image(train_instance, net_h, net_w) - - for obj in all_objs: - # find the best anchor box for this object - max_anchor = None - max_index = -1 - max_iou = -1 - - shifted_box = BoundBox(0, - 0, - obj['xmax']-obj['xmin'], - obj['ymax']-obj['ymin']) - - for i in range(len(self.anchors)): - anchor = self.anchors[i] - iou = bbox_iou(shifted_box, anchor) - - if max_iou < iou: - max_anchor = anchor - max_index = i - max_iou = iou - - # determine the yolo to be responsible for this bounding box - yolo = yolos[max_index//3] - grid_h, grid_w = yolo.shape[1:3] - - # determine the position of the bounding box on the grid - center_x = .5*(obj['xmin'] + obj['xmax']) - center_x = center_x / float(net_w) * grid_w # sigma(t_x) + c_x - center_y = .5*(obj['ymin'] + obj['ymax']) - center_y = center_y / float(net_h) * grid_h # sigma(t_y) + c_y - - # determine the sizes of the bounding box - w = np.log((obj['xmax'] - obj['xmin']) / - float(max_anchor.xmax)) # t_w - h = np.log((obj['ymax'] - obj['ymin']) / - float(max_anchor.ymax)) # t_h - - box = [center_x, center_y, w, h] - - # determine the index of the label - obj_indx = self.labels.index(obj['name']) - - # determine the location of the cell responsible for this object - grid_x = int(np.floor(center_x)) - grid_y = int(np.floor(center_y)) - - # assign ground truth x, y, w, h, confidence and class probs to y_batch - yolo[instance_count, grid_y, grid_x, max_index % 3] = 0 - yolo[instance_count, grid_y, grid_x, max_index % 3, 0:4] = box - yolo[instance_count, grid_y, grid_x, max_index % 3, 4] = 1. - yolo[instance_count, grid_y, grid_x, - max_index % 3, 5+obj_indx] = 1 - - # assign the true box to t_batch - true_box = [center_x, center_y, obj['xmax'] - - obj['xmin'], obj['ymax'] - obj['ymin']] - t_batch[instance_count, 0, 0, 0, true_box_index] = true_box - - true_box_index += 1 - true_box_index = true_box_index % self.max_box_per_image - - # assign input image to x_batch - if self.norm != None: - x_batch[instance_count] = self.norm(img) - else: - # plot image and bounding boxes for sanity check - for obj in all_objs: - cv2.rectangle( - img, (obj['xmin'], obj['ymin']), (obj['xmax'], obj['ymax']), (255, 0, 0), 3) - cv2.putText(img, obj['name'], - (obj['xmin']+2, obj['ymin']+12), - 0, 1.2e-3 * img.shape[0], - (0, 255, 0), 2) - - x_batch[instance_count] = img - - # increase instance counter in the current batch - instance_count += 1 - - return [x_batch, t_batch, yolo_1, yolo_2, yolo_3], [dummy_yolo_1, dummy_yolo_2, dummy_yolo_3] - - def _get_net_size(self, idx): - if idx % 10 == 0: - net_size = self.downsample*np.random.randint(self.min_net_size/self.downsample, - self.max_net_size/self.downsample+1) - print("resizing: ", net_size, net_size) - self.net_h, self.net_w = net_size, net_size - return self.net_h, self.net_w - - def _aug_image(self, instance, net_h, net_w): - image_name = instance['filename'] - image = cv2.imread(image_name) # RGB image - - if image is None: - print('Cannot find ', image_name) - image = image[:, :, ::-1] # RGB image - - image_h, image_w, _ = image.shape - - # determine the amount of scaling and cropping - dw = self.jitter * image_w - dh = self.jitter * image_h - - new_ar = (image_w + np.random.uniform(-dw, dw)) / \ - (image_h + np.random.uniform(-dh, dh)) - scale = np.random.uniform(0.25, 2) - - if (new_ar < 1): - new_h = int(scale * net_h) - new_w = int(net_h * new_ar) - else: - new_w = int(scale * net_w) - new_h = int(net_w / new_ar) - - dx = int(np.random.uniform(0, net_w - new_w)) - dy = int(np.random.uniform(0, net_h - new_h)) - - # apply scaling and cropping - im_sized = apply_random_scale_and_crop( - image, new_w, new_h, net_w, net_h, dx, dy) - - # randomly distort hsv space - im_sized = random_distort_image(im_sized) - - # randomly flip - flip = np.random.randint(2) - im_sized = random_flip(im_sized, flip) - - # correct the size and pos of bounding boxes - all_objs = correct_bounding_boxes( - instance['object'], new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h) - - return im_sized, all_objs - - def on_epoch_end(self): - if self.shuffle: - np.random.shuffle(self.instances) - - def num_classes(self): - return len(self.labels) - - def size(self): - return len(self.instances) - - def get_anchors(self): - anchors = [] - - for anchor in self.anchors: - anchors += [anchor.xmax, anchor.ymax] - - return anchors - - def load_annotation(self, i): - annots = [] - - for obj in self.instances[i]['object']: - annot = [obj['xmin'], obj['ymin'], obj['xmax'], - obj['ymax'], self.labels.index(obj['name'])] - annots += [annot] - - if len(annots) == 0: - annots = [[]] - - return np.array(annots) - - def load_image(self, i): - return cv2.imread(self.instances[i]['filename']) -#! /usr/bin/env python - -import os -import argparse -import json -import cv2 -from utils.utils import get_yolo_boxes, makedirs -from utils.bbox import draw_boxes -from keras.models import load_model -from tqdm import tqdm -import numpy as np - - -def _main_(args): - config_path = args.conf - input_path = args.input - output_path = args.output - - with open(config_path) as config_buffer: - config = json.load(config_buffer) - - makedirs(output_path) - - ############################### - # Set some parameter - ############################### - net_h, net_w = 416, 416 # a multiple of 32, the smaller the faster - obj_thresh, nms_thresh = 0.5, 0.45 - - ############################### - # Load the model - ############################### - os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus'] - infer_model = load_model(config['train']['saved_weights_name']) - - ############################### - # Predict bounding boxes - ############################### - if 'webcam' in input_path: # do detection on the first webcam - video_reader = cv2.VideoCapture(0) - - # the main loop - batch_size = 1 - images = [] - while True: - ret_val, image = video_reader.read() - if ret_val == True: - images += [image] - - if (len(images) == batch_size) or (ret_val == False and len(images) > 0): - batch_boxes = get_yolo_boxes( - infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh) - - for i in range(len(images)): - draw_boxes(images[i], batch_boxes[i], - config['model']['labels'], obj_thresh) - cv2.imshow('video with bboxes', images[i]) - images = [] - if cv2.waitKey(1) == 27: - break # esc to quit - cv2.destroyAllWindows() - elif input_path[-4:] == '.mp4': # do detection on a video - video_out = output_path + input_path.split('/')[-1] - video_reader = cv2.VideoCapture(input_path) - - nb_frames = int(video_reader.get(cv2.CAP_PROP_FRAME_COUNT)) - frame_h = int(video_reader.get(cv2.CAP_PROP_FRAME_HEIGHT)) - frame_w = int(video_reader.get(cv2.CAP_PROP_FRAME_WIDTH)) - - video_writer = cv2.VideoWriter(video_out, - cv2.VideoWriter_fourcc(*'MPEG'), - 50.0, - (frame_w, frame_h)) - # the main loop - batch_size = 1 - images = [] - start_point = 0 # % - show_window = False - for i in tqdm(range(nb_frames)): - _, image = video_reader.read() - - if (float(i+1)/nb_frames) > start_point/100.: - images += [image] - - if (i % batch_size == 0) or (i == (nb_frames-1) and len(images) > 0): - # predict the bounding boxes - batch_boxes = get_yolo_boxes( - infer_model, images, net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh) - - for i in range(len(images)): - # draw bounding boxes on the image using labels - draw_boxes(images[i], batch_boxes[i], - config['model']['labels'], obj_thresh) - - # show the video with detection bounding boxes - if show_window: - cv2.imshow('video with bboxes', images[i]) - - # write result to the output video - video_writer.write(images[i]) - images = [] - if show_window and cv2.waitKey(1) == 27: - break # esc to quit - - if show_window: - cv2.destroyAllWindows() - video_reader.release() - video_writer.release() - else: # do detection on an image or a set of images - image_paths = [] - - if os.path.isdir(input_path): - for inp_file in os.listdir(input_path): - image_paths += [input_path + inp_file] - else: - image_paths += [input_path] - - image_paths = [inp_file for inp_file in image_paths if ( - inp_file[-4:] in ['.jpg', '.png', 'JPEG'])] - - # the main loop - for image_path in image_paths: - image = cv2.imread(image_path) - print(image_path) - - # predict the bounding boxes - boxes = get_yolo_boxes(infer_model, [ - image], net_h, net_w, config['model']['anchors'], obj_thresh, nms_thresh)[0] - - # draw bounding boxes on the image using labels - draw_boxes(image, boxes, config['model']['labels'], obj_thresh) - - # write the image with bounding boxes to file - cv2.imwrite(output_path + image_path.split('/') - [-1], np.uint8(image)) - - -if __name__ == '__main__': - argparser = argparse.ArgumentParser( - description='Predict with a trained yolo model') - argparser.add_argument('-c', '--conf', help='path to configuration file') - argparser.add_argument( - '-i', '--input', help='path to an image, a directory of images, a video, or webcam') - argparser.add_argument( - '-o', '--output', default='output/', help='path to output directory') - - args = argparser.parse_args() - _main_(args) -#! /usr/bin/env python - -import argparse -import os -import numpy as np -import json -from voc import parse_voc_annotation -from yolo import create_yolov3_model, dummy_loss -from generator import BatchGenerator -from utils.utils import normalize, evaluate, makedirs -from keras.callbacks import EarlyStopping, ReduceLROnPlateau -from keras.optimizers import Adam -from callbacks import CustomModelCheckpoint, CustomTensorBoard -from utils.multi_gpu_model import multi_gpu_model -import tensorflow as tf -import keras -from keras.models import load_model - - -def create_training_instances( - train_annot_folder, - train_image_folder, - train_cache, - valid_annot_folder, - valid_image_folder, - valid_cache, - labels, -): - # parse annotations of the training set - train_ints, train_labels = parse_voc_annotation( - train_annot_folder, train_image_folder, train_cache, labels) - - # parse annotations of the validation set, if any, otherwise split the training set - if os.path.exists(valid_annot_folder): - valid_ints, valid_labels = parse_voc_annotation( - valid_annot_folder, valid_image_folder, valid_cache, labels) - else: - print("valid_annot_folder not exists. Spliting the trainining set.") - - train_valid_split = int(0.8*len(train_ints)) - np.random.seed(0) - np.random.shuffle(train_ints) - np.random.seed() - - valid_ints = train_ints[train_valid_split:] - train_ints = train_ints[:train_valid_split] - - # compare the seen labels with the given labels in config.json - if len(labels) > 0: - overlap_labels = set(labels).intersection(set(train_labels.keys())) - - print('Seen labels: \t' + str(train_labels) + '\n') - print('Given labels: \t' + str(labels)) - - # return None, None, None if some given label is not in the dataset - if len(overlap_labels) < len(labels): - print( - 'Some labels have no annotations! Please revise the list of labels in the config.json.') - return None, None, None - else: - print('No labels are provided. Train on all seen labels.') - print(train_labels) - labels = train_labels.keys() - - max_box_per_image = max([len(inst['object']) - for inst in (train_ints + valid_ints)]) - - return train_ints, valid_ints, sorted(labels), max_box_per_image - - -def create_callbacks(saved_weights_name, tensorboard_logs, model_to_save): - makedirs(tensorboard_logs) - - early_stop = EarlyStopping( - monitor='loss', - min_delta=0.01, - patience=5, - mode='min', - verbose=1 - ) - checkpoint = CustomModelCheckpoint( - model_to_save=model_to_save, - filepath=saved_weights_name, # + '{epoch:02d}.h5', - monitor='loss', - verbose=1, - save_best_only=True, - mode='min', - period=1 - ) - reduce_on_plateau = ReduceLROnPlateau( - monitor='loss', - factor=0.1, - patience=2, - verbose=1, - mode='min', - epsilon=0.01, - cooldown=0, - min_lr=0 - ) - tensorboard = CustomTensorBoard( - log_dir=tensorboard_logs, - write_graph=True, - write_images=True, - ) - return [early_stop, checkpoint, reduce_on_plateau, tensorboard] - - -def create_model( - nb_class, - anchors, - max_box_per_image, - max_grid, batch_size, - warmup_batches, - ignore_thresh, - multi_gpu, - saved_weights_name, - lr, - grid_scales, - obj_scale, - noobj_scale, - xywh_scale, - class_scale -): - if multi_gpu > 1: - with tf.device('/cpu:0'): - template_model, infer_model = create_yolov3_model( - nb_class=nb_class, - anchors=anchors, - max_box_per_image=max_box_per_image, - max_grid=max_grid, - batch_size=batch_size//multi_gpu, - warmup_batches=warmup_batches, - ignore_thresh=ignore_thresh, - grid_scales=grid_scales, - obj_scale=obj_scale, - noobj_scale=noobj_scale, - xywh_scale=xywh_scale, - class_scale=class_scale - ) - else: - template_model, infer_model = create_yolov3_model( - nb_class=nb_class, - anchors=anchors, - max_box_per_image=max_box_per_image, - max_grid=max_grid, - batch_size=batch_size, - warmup_batches=warmup_batches, - ignore_thresh=ignore_thresh, - grid_scales=grid_scales, - obj_scale=obj_scale, - noobj_scale=noobj_scale, - xywh_scale=xywh_scale, - class_scale=class_scale - ) - - # load the pretrained weight if exists, otherwise load the backend weight only - if os.path.exists(saved_weights_name): - print("\nLoading pretrained weights.\n") - template_model.load_weights(saved_weights_name) - else: - template_model.load_weights("backend.h5", by_name=True) - - if multi_gpu > 1: - train_model = multi_gpu_model(template_model, gpus=multi_gpu) - else: - train_model = template_model - - optimizer = Adam(lr=lr, clipnorm=0.001) - train_model.compile(loss=dummy_loss, optimizer=optimizer) - - return train_model, infer_model - - -def _main_(args): - config_path = args.conf - - with open(config_path) as config_buffer: - config = json.loads(config_buffer.read()) - - ############################### - # Parse the annotations - ############################### - train_ints, valid_ints, labels, max_box_per_image = create_training_instances( - config['train']['train_annot_folder'], - config['train']['train_image_folder'], - config['train']['cache_name'], - config['valid']['valid_annot_folder'], - config['valid']['valid_image_folder'], - config['valid']['cache_name'], - config['model']['labels'] - ) - print('\nTraining on: \t' + str(labels) + '\n') - - ############################### - # Create the generators - ############################### - train_generator = BatchGenerator( - instances=train_ints, - anchors=config['model']['anchors'], - labels=labels, - downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 - max_box_per_image=max_box_per_image, - batch_size=config['train']['batch_size'], - min_net_size=config['model']['min_input_size'], - max_net_size=config['model']['max_input_size'], - shuffle=True, - jitter=0.3, - norm=normalize - ) - - valid_generator = BatchGenerator( - instances=valid_ints, - anchors=config['model']['anchors'], - labels=labels, - downsample=32, # ratio between network input's size and network output's size, 32 for YOLOv3 - max_box_per_image=max_box_per_image, - batch_size=config['train']['batch_size'], - min_net_size=config['model']['min_input_size'], - max_net_size=config['model']['max_input_size'], - shuffle=True, - jitter=0.0, - norm=normalize - ) - - ############################### - # Create the model - ############################### - if os.path.exists(config['train']['saved_weights_name']): - config['train']['warmup_epochs'] = 0 - warmup_batches = config['train']['warmup_epochs'] * \ - (config['train']['train_times']*len(train_generator)) - - os.environ['CUDA_VISIBLE_DEVICES'] = config['train']['gpus'] - multi_gpu = len(config['train']['gpus'].split(',')) - - train_model, infer_model = create_model( - nb_class=len(labels), - anchors=config['model']['anchors'], - max_box_per_image=max_box_per_image, - max_grid=[config['model']['max_input_size'], - config['model']['max_input_size']], - batch_size=config['train']['batch_size'], - warmup_batches=warmup_batches, - ignore_thresh=config['train']['ignore_thresh'], - multi_gpu=multi_gpu, - saved_weights_name=config['train']['saved_weights_name'], - lr=config['train']['learning_rate'], - grid_scales=config['train']['grid_scales'], - obj_scale=config['train']['obj_scale'], - noobj_scale=config['train']['noobj_scale'], - xywh_scale=config['train']['xywh_scale'], - class_scale=config['train']['class_scale'], - ) - - ############################### - # Kick off the training - ############################### - callbacks = create_callbacks( - config['train']['saved_weights_name'], config['train']['tensorboard_dir'], infer_model) - - train_model.fit_generator( - generator=train_generator, - steps_per_epoch=len(train_generator) * config['train']['train_times'], - epochs=config['train']['nb_epochs'] + config['train']['warmup_epochs'], - verbose=2 if config['train']['debug'] else 1, - callbacks=callbacks, - workers=4, - max_queue_size=8 - ) - - # make a GPU version of infer_model for evaluation - if multi_gpu > 1: - infer_model = load_model(config['train']['saved_weights_name']) - - ############################### - # Run the evaluation - ############################### - # compute mAP for all the classes - average_precisions = evaluate(infer_model, valid_generator) - - # print the score - for label, average_precision in average_precisions.items(): - print(labels[label] + ': {:.4f}'.format(average_precision)) - print('mAP: {:.4f}'.format( - sum(average_precisions.values()) / len(average_precisions))) - - -if __name__ == '__main__': - argparser = argparse.ArgumentParser( - description='train and evaluate YOLO_v3 model on any dataset') - argparser.add_argument('-c', '--conf', help='path to configuration file') - - args = argparser.parse_args() - _main_(args) -import numpy as np -import os -import xml.etree.ElementTree as ET -import pickle - - -def parse_voc_annotation(ann_dir, img_dir, cache_name, labels=[]): - if os.path.exists(cache_name): - with open(cache_name, 'rb') as handle: - cache = pickle.load(handle) - all_insts, seen_labels = cache['all_insts'], cache['seen_labels'] - else: - all_insts = [] - seen_labels = {} - - for ann in sorted(os.listdir(ann_dir)): - img = {'object': []} - - try: - tree = ET.parse(ann_dir + ann) - except Exception as e: - print(e) - print('Ignore this bad annotation: ' + ann_dir + ann) - continue - - for elem in tree.iter(): - if 'filename' in elem.tag: - img['filename'] = img_dir + elem.text - if 'width' in elem.tag: - img['width'] = int(elem.text) - if 'height' in elem.tag: - img['height'] = int(elem.text) - if 'object' in elem.tag or 'part' in elem.tag: - obj = {} - - for attr in list(elem): - if 'name' in attr.tag: - obj['name'] = attr.text - - if obj['name'] in seen_labels: - seen_labels[obj['name']] += 1 - else: - seen_labels[obj['name']] = 1 - - if len(labels) > 0 and obj['name'] not in labels: - break - else: - img['object'] += [obj] - - if 'bndbox' in attr.tag: - for dim in list(attr): - if 'xmin' in dim.tag: - obj['xmin'] = int(round(float(dim.text))) - if 'ymin' in dim.tag: - obj['ymin'] = int(round(float(dim.text))) - if 'xmax' in dim.tag: - obj['xmax'] = int(round(float(dim.text))) - if 'ymax' in dim.tag: - obj['ymax'] = int(round(float(dim.text))) - - if len(img['object']) > 0: - all_insts += [img] - - cache = {'all_insts': all_insts, 'seen_labels': seen_labels} - with open(cache_name, 'wb') as handle: - pickle.dump(cache, handle, protocol=pickle.HIGHEST_PROTOCOL) - - return all_insts, seen_labels -from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D, Lambda -from keras.layers.merge import add, concatenate -from keras.models import Model -from keras.engine.topology import Layer -import tensorflow as tf - - -class YoloLayer(Layer): - def __init__(self, anchors, max_grid, batch_size, warmup_batches, ignore_thresh, - grid_scale, obj_scale, noobj_scale, xywh_scale, class_scale, - **kwargs): - # make the model settings persistent - self.ignore_thresh = ignore_thresh - self.warmup_batches = warmup_batches - self.anchors = tf.constant( - anchors, dtype='float', shape=[1, 1, 1, 3, 2]) - self.grid_scale = grid_scale - self.obj_scale = obj_scale - self.noobj_scale = noobj_scale - self.xywh_scale = xywh_scale - self.class_scale = class_scale - - # make a persistent mesh grid - max_grid_h, max_grid_w = max_grid - - cell_x = tf.to_float(tf.reshape(tf.tile(tf.range(max_grid_w), [ - max_grid_h]), (1, max_grid_h, max_grid_w, 1, 1))) - cell_y = tf.transpose(cell_x, (0, 2, 1, 3, 4)) - self.cell_grid = tf.tile( - tf.concat([cell_x, cell_y], -1), [batch_size, 1, 1, 3, 1]) - - super(YoloLayer, self).__init__(**kwargs) - - def build(self, input_shape): - # Be sure to call this somewhere! - super(YoloLayer, self).build(input_shape) - - def call(self, x): - input_image, y_pred, y_true, true_boxes = x - - # adjust the shape of the y_predict [batch, grid_h, grid_w, 3, 4+1+nb_class] - y_pred = tf.reshape(y_pred, tf.concat( - [tf.shape(y_pred)[:3], tf.constant([3, -1])], axis=0)) - - # initialize the masks - object_mask = tf.expand_dims(y_true[..., 4], 4) - - # the variable to keep track of number of batches processed - batch_seen = tf.Variable(0.) - - # compute grid factor and net factor - grid_h = tf.shape(y_true)[1] - grid_w = tf.shape(y_true)[2] - grid_factor = tf.reshape( - tf.cast([grid_w, grid_h], tf.float32), [1, 1, 1, 1, 2]) - - net_h = tf.shape(input_image)[1] - net_w = tf.shape(input_image)[2] - net_factor = tf.reshape( - tf.cast([net_w, net_h], tf.float32), [1, 1, 1, 1, 2]) - - """ - Adjust prediction - """ - pred_box_xy = (self.cell_grid[:, :grid_h, :grid_w, :, :] + - tf.sigmoid(y_pred[..., :2])) # sigma(t_xy) + c_xy - # t_wh - pred_box_wh = y_pred[..., 2:4] - # adjust confidence - pred_box_conf = tf.expand_dims(tf.sigmoid(y_pred[..., 4]), 4) - # adjust class probabilities - pred_box_class = y_pred[..., 5:] - - """ - Adjust ground truth - """ - true_box_xy = y_true[..., 0:2] # (sigma(t_xy) + c_xy) - true_box_wh = y_true[..., 2:4] # t_wh - true_box_conf = tf.expand_dims(y_true[..., 4], 4) - true_box_class = tf.argmax(y_true[..., 5:], -1) - - """ - Compare each predicted box to all true boxes - """ - # initially, drag all objectness of all boxes to 0 - conf_delta = pred_box_conf - 0 - - # then, ignore the boxes which have good overlap with some true box - true_xy = true_boxes[..., 0:2] / grid_factor - true_wh = true_boxes[..., 2:4] / net_factor - - true_wh_half = true_wh / 2. - true_mins = true_xy - true_wh_half - true_maxes = true_xy + true_wh_half - - pred_xy = tf.expand_dims(pred_box_xy / grid_factor, 4) - pred_wh = tf.expand_dims( - tf.exp(pred_box_wh) * self.anchors / net_factor, 4) - - pred_wh_half = pred_wh / 2. - pred_mins = pred_xy - pred_wh_half - pred_maxes = pred_xy + pred_wh_half - - intersect_mins = tf.maximum(pred_mins, true_mins) - intersect_maxes = tf.minimum(pred_maxes, true_maxes) - - intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) - intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] - - true_areas = true_wh[..., 0] * true_wh[..., 1] - pred_areas = pred_wh[..., 0] * pred_wh[..., 1] - - union_areas = pred_areas + true_areas - intersect_areas - iou_scores = tf.truediv(intersect_areas, union_areas) - - best_ious = tf.reduce_max(iou_scores, axis=4) - conf_delta *= tf.expand_dims(tf.to_float(best_ious < - self.ignore_thresh), 4) - - """ - Compute some online statistics - """ - true_xy = true_box_xy / grid_factor - true_wh = tf.exp(true_box_wh) * self.anchors / net_factor - - true_wh_half = true_wh / 2. - true_mins = true_xy - true_wh_half - true_maxes = true_xy + true_wh_half - - pred_xy = pred_box_xy / grid_factor - pred_wh = tf.exp(pred_box_wh) * self.anchors / net_factor - - pred_wh_half = pred_wh / 2. - pred_mins = pred_xy - pred_wh_half - pred_maxes = pred_xy + pred_wh_half - - intersect_mins = tf.maximum(pred_mins, true_mins) - intersect_maxes = tf.minimum(pred_maxes, true_maxes) - intersect_wh = tf.maximum(intersect_maxes - intersect_mins, 0.) - intersect_areas = intersect_wh[..., 0] * intersect_wh[..., 1] - - true_areas = true_wh[..., 0] * true_wh[..., 1] - pred_areas = pred_wh[..., 0] * pred_wh[..., 1] - - union_areas = pred_areas + true_areas - intersect_areas - iou_scores = tf.truediv(intersect_areas, union_areas) - iou_scores = object_mask * tf.expand_dims(iou_scores, 4) - - count = tf.reduce_sum(object_mask) - count_noobj = tf.reduce_sum(1 - object_mask) - detect_mask = tf.to_float((pred_box_conf*object_mask) >= 0.5) - class_mask = tf.expand_dims(tf.to_float( - tf.equal(tf.argmax(pred_box_class, -1), true_box_class)), 4) - recall50 = tf.reduce_sum(tf.to_float( - iou_scores >= 0.5) * detect_mask * class_mask) / (count + 1e-3) - recall75 = tf.reduce_sum(tf.to_float( - iou_scores >= 0.75) * detect_mask * class_mask) / (count + 1e-3) - avg_iou = tf.reduce_sum(iou_scores) / (count + 1e-3) - avg_obj = tf.reduce_sum(pred_box_conf * object_mask) / (count + 1e-3) - avg_noobj = tf.reduce_sum( - pred_box_conf * (1-object_mask)) / (count_noobj + 1e-3) - avg_cat = tf.reduce_sum(object_mask * class_mask) / (count + 1e-3) - - """ - Warm-up training - """ - batch_seen = tf.assign_add(batch_seen, 1.) - - true_box_xy, true_box_wh, xywh_mask = tf.cond(tf.less(batch_seen, self.warmup_batches+1), - lambda: [true_box_xy + (0.5 + self.cell_grid[:, :grid_h, :grid_w, :, :]) * (1-object_mask), - true_box_wh + - tf.zeros_like(true_box_wh) * - (1-object_mask), - tf.ones_like(object_mask)], - lambda: [true_box_xy, - true_box_wh, - object_mask]) - - """ - Compare each true box to all anchor boxes - """ - wh_scale = tf.exp(true_box_wh) * self.anchors / net_factor - # the smaller the box, the bigger the scale - wh_scale = tf.expand_dims( - 2 - wh_scale[..., 0] * wh_scale[..., 1], axis=4) - - xy_delta = xywh_mask * (pred_box_xy-true_box_xy) * \ - wh_scale * self.xywh_scale - wh_delta = xywh_mask * (pred_box_wh-true_box_wh) * \ - wh_scale * self.xywh_scale - conf_delta = object_mask * (pred_box_conf-true_box_conf) * \ - self.obj_scale + (1-object_mask) * conf_delta * self.noobj_scale - class_delta = object_mask * \ - tf.expand_dims(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=true_box_class, logits=pred_box_class), 4) * \ - self.class_scale - - loss_xy = tf.reduce_sum(tf.square(xy_delta), list(range(1, 5))) - loss_wh = tf.reduce_sum(tf.square(wh_delta), list(range(1, 5))) - loss_conf = tf.reduce_sum(tf.square(conf_delta), list(range(1, 5))) - loss_class = tf.reduce_sum( - class_delta, list(range(1, 5))) - - loss = loss_xy + loss_wh + loss_conf + loss_class - - loss = tf.Print(loss, [grid_h, avg_obj], - message='avg_obj \t\t', summarize=1000) - loss = tf.Print(loss, [grid_h, avg_noobj], - message='avg_noobj \t\t', summarize=1000) - loss = tf.Print(loss, [grid_h, avg_iou], - message='avg_iou \t\t', summarize=1000) - loss = tf.Print(loss, [grid_h, avg_cat], - message='avg_cat \t\t', summarize=1000) - loss = tf.Print(loss, [grid_h, recall50], - message='recall50 \t', summarize=1000) - loss = tf.Print(loss, [grid_h, recall75], - message='recall75 \t', summarize=1000) - loss = tf.Print(loss, [grid_h, count], - message='count \t', summarize=1000) - loss = tf.Print(loss, [grid_h, tf.reduce_sum(loss_xy), - tf.reduce_sum(loss_wh), - tf.reduce_sum(loss_conf), - tf.reduce_sum(loss_class)], message='loss xy, wh, conf, class: \t', summarize=1000) - - return loss*self.grid_scale - - def compute_output_shape(self, input_shape): - return [(None, 1)] - - -def _conv_block(inp, convs, do_skip=True): - x = inp - count = 0 - - for conv in convs: - if count == (len(convs) - 2) and do_skip: - skip_connection = x - count += 1 - - if conv['stride'] > 1: - # unlike tensorflow darknet prefer left and top paddings - x = ZeroPadding2D(((1, 0), (1, 0)))(x) - x = Conv2D(conv['filter'], - conv['kernel'], - strides=conv['stride'], - # unlike tensorflow darknet prefer left and top paddings - padding='valid' if conv['stride'] > 1 else 'same', - name='conv_' + str(conv['layer_idx']), - use_bias=False if conv['bnorm'] else True)(x) - if conv['bnorm']: - x = BatchNormalization( - epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x) - if conv['leaky']: - x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x) - - return add([skip_connection, x]) if do_skip else x - - -def create_yolov3_model( - nb_class, - anchors, - max_box_per_image, - max_grid, - batch_size, - warmup_batches, - ignore_thresh, - grid_scales, - obj_scale, - noobj_scale, - xywh_scale, - class_scale -): - input_image = Input(shape=(None, None, 3)) # net_h, net_w, 3 - true_boxes = Input(shape=(1, 1, 1, max_box_per_image, 4)) - # grid_h, grid_w, nb_anchor, 5+nb_class - true_yolo_1 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) - # grid_h, grid_w, nb_anchor, 5+nb_class - true_yolo_2 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) - # grid_h, grid_w, nb_anchor, 5+nb_class - true_yolo_3 = Input(shape=(None, None, len(anchors)//6, 4+1+nb_class)) - - # Layer 0 => 4 - x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0}, - {'filter': 64, 'kernel': 3, 'stride': 2, - 'bnorm': True, 'leaky': True, 'layer_idx': 1}, - {'filter': 32, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 2}, - {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}]) - - # Layer 5 => 8 - x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5}, - {'filter': 64, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 6}, - {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}]) - - # Layer 9 => 11 - x = _conv_block(x, [{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9}, - {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}]) - - # Layer 12 => 15 - x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12}, - {'filter': 128, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 13}, - {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}]) - - # Layer 16 => 36 - for i in range(7): - x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3}, - {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}]) - - skip_36 = x - - # Layer 37 => 40 - x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37}, - {'filter': 256, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 38}, - {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}]) - - # Layer 41 => 61 - for i in range(7): - x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3}, - {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}]) - - skip_61 = x - - # Layer 62 => 65 - x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62}, - {'filter': 512, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 63}, - {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}]) - - # Layer 66 => 74 - for i in range(3): - x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3}, - {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}]) - - # Layer 75 => 79 - x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75}, - {'filter': 1024, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 76}, - {'filter': 512, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 77}, - {'filter': 1024, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 78}, - {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], do_skip=False) - - # Layer 80 => 82 - pred_yolo_1 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80}, - {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], do_skip=False) - loss_yolo_1 = YoloLayer(anchors[12:], - [1*num for num in max_grid], - batch_size, - warmup_batches, - ignore_thresh, - grid_scales[0], - obj_scale, - noobj_scale, - xywh_scale, - class_scale)([input_image, pred_yolo_1, true_yolo_1, true_boxes]) - - # Layer 83 => 86 - x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 84}], do_skip=False) - x = UpSampling2D(2)(x) - x = concatenate([x, skip_61]) - - # Layer 87 => 91 - x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87}, - {'filter': 512, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 88}, - {'filter': 256, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 89}, - {'filter': 512, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 90}, - {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], do_skip=False) - - # Layer 92 => 94 - pred_yolo_2 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92}, - {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], do_skip=False) - loss_yolo_2 = YoloLayer(anchors[6:12], - [2*num for num in max_grid], - batch_size, - warmup_batches, - ignore_thresh, - grid_scales[1], - obj_scale, - noobj_scale, - xywh_scale, - class_scale)([input_image, pred_yolo_2, true_yolo_2, true_boxes]) - - # Layer 95 => 98 - x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 96}], do_skip=False) - x = UpSampling2D(2)(x) - x = concatenate([x, skip_36]) - - # Layer 99 => 106 - pred_yolo_3 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 99}, - {'filter': 256, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 100}, - {'filter': 128, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 101}, - {'filter': 256, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 102}, - {'filter': 128, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 103}, - {'filter': 256, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 104}, - {'filter': (3*(5+nb_class)), 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], do_skip=False) - loss_yolo_3 = YoloLayer(anchors[:6], - [4*num for num in max_grid], - batch_size, - warmup_batches, - ignore_thresh, - grid_scales[2], - obj_scale, - noobj_scale, - xywh_scale, - class_scale)([input_image, pred_yolo_3, true_yolo_3, true_boxes]) - - train_model = Model([input_image, true_boxes, true_yolo_1, true_yolo_2, true_yolo_3], [ - loss_yolo_1, loss_yolo_2, loss_yolo_3]) - infer_model = Model(input_image, [pred_yolo_1, pred_yolo_2, pred_yolo_3]) - - return [train_model, infer_model] - - -def dummy_loss(y_true, y_pred): - return tf.sqrt(tf.reduce_sum(y_pred)) -import argparse -import os -import numpy as np -from keras.layers import Conv2D, Input, BatchNormalization, LeakyReLU, ZeroPadding2D, UpSampling2D -from keras.layers.merge import add, concatenate -from keras.models import Model -import struct -import cv2 - -np.set_printoptions(threshold=np.nan) -os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" -os.environ["CUDA_VISIBLE_DEVICES"] = "0" - -argparser = argparse.ArgumentParser( - description='test yolov3 network with coco weights') - -argparser.add_argument( - '-w', - '--weights', - help='path to weights file') - -argparser.add_argument( - '-i', - '--image', - help='path to image file') - - -class WeightReader: - def __init__(self, weight_file): - with open(weight_file, 'rb') as w_f: - major, = struct.unpack('i', w_f.read(4)) - minor, = struct.unpack('i', w_f.read(4)) - revision, = struct.unpack('i', w_f.read(4)) - - if (major*10 + minor) >= 2 and major < 1000 and minor < 1000: - w_f.read(8) - else: - w_f.read(4) - - transpose = (major > 1000) or (minor > 1000) - - binary = w_f.read() - - self.offset = 0 - self.all_weights = np.frombuffer(binary, dtype='float32') - - def read_bytes(self, size): - self.offset = self.offset + size - return self.all_weights[self.offset-size:self.offset] - - def load_weights(self, model): - for i in range(106): - try: - conv_layer = model.get_layer('conv_' + str(i)) - print("loading weights of convolution #" + str(i)) - - if i not in [81, 93, 105]: - norm_layer = model.get_layer('bnorm_' + str(i)) - - size = np.prod(norm_layer.get_weights()[0].shape) - - beta = self.read_bytes(size) # bias - gamma = self.read_bytes(size) # scale - mean = self.read_bytes(size) # mean - var = self.read_bytes(size) # variance - - weights = norm_layer.set_weights([gamma, beta, mean, var]) - - if len(conv_layer.get_weights()) > 1: - bias = self.read_bytes( - np.prod(conv_layer.get_weights()[1].shape)) - kernel = self.read_bytes( - np.prod(conv_layer.get_weights()[0].shape)) - - kernel = kernel.reshape( - list(reversed(conv_layer.get_weights()[0].shape))) - kernel = kernel.transpose([2, 3, 1, 0]) - conv_layer.set_weights([kernel, bias]) - else: - kernel = self.read_bytes( - np.prod(conv_layer.get_weights()[0].shape)) - kernel = kernel.reshape( - list(reversed(conv_layer.get_weights()[0].shape))) - kernel = kernel.transpose([2, 3, 1, 0]) - conv_layer.set_weights([kernel]) - except ValueError: - print("no convolution #" + str(i)) - - def reset(self): - self.offset = 0 - - -class BoundBox: - def __init__(self, xmin, ymin, xmax, ymax, objness=None, classes=None): - self.xmin = xmin - self.ymin = ymin - self.xmax = xmax - self.ymax = ymax - - self.objness = objness - self.classes = classes - - self.label = -1 - self.score = -1 - - def get_label(self): - if self.label == -1: - self.label = np.argmax(self.classes) - - return self.label - - def get_score(self): - if self.score == -1: - self.score = self.classes[self.get_label()] - - return self.score - - -def _conv_block(inp, convs, skip=True): - x = inp - count = 0 - - for conv in convs: - if count == (len(convs) - 2) and skip: - skip_connection = x - count += 1 - - if conv['stride'] > 1: - # peculiar padding as darknet prefer left and top - x = ZeroPadding2D(((1, 0), (1, 0)))(x) - x = Conv2D(conv['filter'], - conv['kernel'], - strides=conv['stride'], - # peculiar padding as darknet prefer left and top - padding='valid' if conv['stride'] > 1 else 'same', - name='conv_' + str(conv['layer_idx']), - use_bias=False if conv['bnorm'] else True)(x) - if conv['bnorm']: - x = BatchNormalization( - epsilon=0.001, name='bnorm_' + str(conv['layer_idx']))(x) - if conv['leaky']: - x = LeakyReLU(alpha=0.1, name='leaky_' + str(conv['layer_idx']))(x) - - return add([skip_connection, x]) if skip else x - - -def _interval_overlap(interval_a, interval_b): - x1, x2 = interval_a - x3, x4 = interval_b - - if x3 < x1: - if x4 < x1: - return 0 - else: - return min(x2, x4) - x1 - else: - if x2 < x3: - return 0 - else: - return min(x2, x4) - x3 - - -def _sigmoid(x): - return 1. / (1. + np.exp(-x)) - - -def bbox_iou(box1, box2): - intersect_w = _interval_overlap( - [box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) - intersect_h = _interval_overlap( - [box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) - - intersect = intersect_w * intersect_h - - w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin - w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin - - union = w1*h1 + w2*h2 - intersect - - return float(intersect) / union - - -def make_yolov3_model(): - input_image = Input(shape=(None, None, 3)) - - # Layer 0 => 4 - x = _conv_block(input_image, [{'filter': 32, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 0}, - {'filter': 64, 'kernel': 3, 'stride': 2, - 'bnorm': True, 'leaky': True, 'layer_idx': 1}, - {'filter': 32, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 2}, - {'filter': 64, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 3}]) - - # Layer 5 => 8 - x = _conv_block(x, [{'filter': 128, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 5}, - {'filter': 64, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 6}, - {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 7}]) - - # Layer 9 => 11 - x = _conv_block(x, [{'filter': 64, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 9}, - {'filter': 128, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 10}]) - - # Layer 12 => 15 - x = _conv_block(x, [{'filter': 256, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 12}, - {'filter': 128, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 13}, - {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 14}]) - - # Layer 16 => 36 - for i in range(7): - x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 16+i*3}, - {'filter': 256, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 17+i*3}]) - - skip_36 = x - - # Layer 37 => 40 - x = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 37}, - {'filter': 256, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 38}, - {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 39}]) - - # Layer 41 => 61 - for i in range(7): - x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 41+i*3}, - {'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 42+i*3}]) - - skip_61 = x - - # Layer 62 => 65 - x = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 2, 'bnorm': True, 'leaky': True, 'layer_idx': 62}, - {'filter': 512, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 63}, - {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 64}]) - - # Layer 66 => 74 - for i in range(3): - x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 66+i*3}, - {'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 67+i*3}]) - - # Layer 75 => 79 - x = _conv_block(x, [{'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 75}, - {'filter': 1024, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 76}, - {'filter': 512, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 77}, - {'filter': 1024, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 78}, - {'filter': 512, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 79}], skip=False) - - # Layer 80 => 82 - yolo_82 = _conv_block(x, [{'filter': 1024, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 80}, - {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 81}], skip=False) - - # Layer 83 => 86 - x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 84}], skip=False) - x = UpSampling2D(2)(x) - x = concatenate([x, skip_61]) - - # Layer 87 => 91 - x = _conv_block(x, [{'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 87}, - {'filter': 512, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 88}, - {'filter': 256, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 89}, - {'filter': 512, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 90}, - {'filter': 256, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 91}], skip=False) - - # Layer 92 => 94 - yolo_94 = _conv_block(x, [{'filter': 512, 'kernel': 3, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 92}, - {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 93}], skip=False) - - # Layer 95 => 98 - x = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 96}], skip=False) - x = UpSampling2D(2)(x) - x = concatenate([x, skip_36]) - - # Layer 99 => 106 - yolo_106 = _conv_block(x, [{'filter': 128, 'kernel': 1, 'stride': 1, 'bnorm': True, 'leaky': True, 'layer_idx': 99}, - {'filter': 256, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 100}, - {'filter': 128, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 101}, - {'filter': 256, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 102}, - {'filter': 128, 'kernel': 1, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 103}, - {'filter': 256, 'kernel': 3, 'stride': 1, - 'bnorm': True, 'leaky': True, 'layer_idx': 104}, - {'filter': 255, 'kernel': 1, 'stride': 1, 'bnorm': False, 'leaky': False, 'layer_idx': 105}], skip=False) - - model = Model(input_image, [yolo_82, yolo_94, yolo_106]) - return model - - -def preprocess_input(image, net_h, net_w): - new_h, new_w, _ = image.shape - - # determine the new size of the image - if (float(net_w)/new_w) < (float(net_h)/new_h): - new_h = (new_h * net_w)/new_w - new_w = net_w - else: - new_w = (new_w * net_h)/new_h - new_h = net_h - - # resize the image to the new size - resized = cv2.resize(image[:, :, ::-1]/255., (int(new_w), int(new_h))) - - # embed the image into the standard letter box - new_image = np.ones((net_h, net_w, 3)) * 0.5 - new_image[int((net_h-new_h)//2):int((net_h+new_h)//2), - int((net_w-new_w)//2):int((net_w+new_w)//2), :] = resized - new_image = np.expand_dims(new_image, 0) - - return new_image - - -def decode_netout(netout, anchors, obj_thresh, nms_thresh, net_h, net_w): - grid_h, grid_w = netout.shape[:2] - nb_box = 3 - netout = netout.reshape((grid_h, grid_w, nb_box, -1)) - nb_class = netout.shape[-1] - 5 - - boxes = [] - - netout[..., :2] = _sigmoid(netout[..., :2]) - netout[..., 4:] = _sigmoid(netout[..., 4:]) - netout[..., 5:] = netout[..., 4][..., np.newaxis] * netout[..., 5:] - netout[..., 5:] *= netout[..., 5:] > obj_thresh - - for i in range(grid_h*grid_w): - row = i / grid_w - col = i % grid_w - - for b in range(nb_box): - # 4th element is objectness score - objectness = netout[int(row)][int(col)][b][4] - #objectness = netout[..., :4] - - if(objectness.all() <= obj_thresh): - continue - - # first 4 elements are x, y, w, and h - x, y, w, h = netout[int(row)][int(col)][b][:4] - - x = (col + x) / grid_w # center position, unit: image width - y = (row + y) / grid_h # center position, unit: image height - w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width - h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height - - # last elements are class probabilities - classes = netout[int(row)][col][b][5:] - - box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes) - #box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, None, classes) - - boxes.append(box) - - return boxes - - -def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w): - if (float(net_w)/image_w) < (float(net_h)/image_h): - new_w = net_w - new_h = (image_h*net_w)/image_w - else: - new_h = net_w - new_w = (image_w*net_h)/image_h - - for i in range(len(boxes)): - x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w - y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h - - boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w) - boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w) - boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h) - boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h) - - -def do_nms(boxes, nms_thresh): - if len(boxes) > 0: - nb_class = len(boxes[0].classes) - else: - return - - for c in range(nb_class): - sorted_indices = np.argsort([-box.classes[c] for box in boxes]) - - for i in range(len(sorted_indices)): - index_i = sorted_indices[i] - - if boxes[index_i].classes[c] == 0: - continue - - for j in range(i+1, len(sorted_indices)): - index_j = sorted_indices[j] - - if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: - boxes[index_j].classes[c] = 0 - - -def draw_boxes(image, boxes, labels, obj_thresh): - for box in boxes: - label_str = '' - label = -1 - - for i in range(len(labels)): - if box.classes[i] > obj_thresh: - label_str += labels[i] - label = i - print(labels[i] + ': ' + str(box.classes[i]*100) + '%') - - if label >= 0: - cv2.rectangle(image, (box.xmin, box.ymin), - (box.xmax, box.ymax), (0, 255, 0), 3) - cv2.putText(image, - label_str + ' ' + str(box.get_score()), - (box.xmin, box.ymin - 13), - cv2.FONT_HERSHEY_SIMPLEX, - 1e-3 * image.shape[0], - (0, 255, 0), 2) - - return image - - -def _main_(args): - weights_path = args.weights - image_path = args.image - - # set some parameters - net_h, net_w = 416, 416 - obj_thresh, nms_thresh = 0.5, 0.45 - anchors = [[116, 90, 156, 198, 373, 326], [ - 30, 61, 62, 45, 59, 119], [10, 13, 16, 30, 33, 23]] - labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", - "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", - "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", - "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", - "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", - "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", - "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", - "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", - "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", - "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"] - - # make the yolov3 model to predict 80 classes on COCO - yolov3 = make_yolov3_model() - - # load the weights trained on COCO into the model - weight_reader = WeightReader(weights_path) - weight_reader.load_weights(yolov3) - - # preprocess the image - image = cv2.imread(image_path) - image_h, image_w, _ = image.shape - new_image = preprocess_input(image, net_h, net_w) - - # run the prediction - yolos = yolov3.predict(new_image) - boxes = [] - - for i in range(len(yolos)): - # decode the output of the network - boxes += decode_netout(yolos[i][0], anchors[i], - obj_thresh, nms_thresh, net_h, net_w) - - # correct the sizes of the bounding boxes - correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w) - - # suppress non-maximal boxes - do_nms(boxes, nms_thresh) - - # draw bounding boxes on the image using labels - draw_boxes(image, boxes, labels, obj_thresh) - - # write the image with bounding boxes to file - cv2.imwrite(image_path[:-4] + '_detected' + - image_path[-4:], (image).astype('uint8')) - - -if __name__ == '__main__': - args = argparser.parse_args() - _main_(args) -import numpy as np -import os -import cv2 -from .colors import get_color - - -class BoundBox: - def __init__(self, xmin, ymin, xmax, ymax, c=None, classes=None): - self.xmin = xmin - self.ymin = ymin - self.xmax = xmax - self.ymax = ymax - - self.c = c - self.classes = classes - - self.label = -1 - self.score = -1 - - def get_label(self): - if self.label == -1: - self.label = np.argmax(self.classes) - - return self.label - - def get_score(self): - if self.score == -1: - self.score = self.classes[self.get_label()] - - return self.score - - -def _interval_overlap(interval_a, interval_b): - x1, x2 = interval_a - x3, x4 = interval_b - - if x3 < x1: - if x4 < x1: - return 0 - else: - return min(x2, x4) - x1 - else: - if x2 < x3: - return 0 - else: - return min(x2, x4) - x3 - - -def bbox_iou(box1, box2): - intersect_w = _interval_overlap( - [box1.xmin, box1.xmax], [box2.xmin, box2.xmax]) - intersect_h = _interval_overlap( - [box1.ymin, box1.ymax], [box2.ymin, box2.ymax]) - - intersect = intersect_w * intersect_h - - w1, h1 = box1.xmax-box1.xmin, box1.ymax-box1.ymin - w2, h2 = box2.xmax-box2.xmin, box2.ymax-box2.ymin - - union = w1*h1 + w2*h2 - intersect - - return float(intersect) / union - - -def draw_boxes(image, boxes, labels, obj_thresh, quiet=True): - for box in boxes: - label_str = '' - label = -1 - - for i in range(len(labels)): - if box.classes[i] > obj_thresh: - if label_str != '': - label_str += ', ' - label_str += (labels[i] + ' ' + - str(round(box.get_score()*100, 2)) + '%') - label = i - if not quiet: - print(label_str) - - if label >= 0: - text_size = cv2.getTextSize( - label_str, cv2.FONT_HERSHEY_SIMPLEX, 1.1e-3 * image.shape[0], 5) - width, height = text_size[0][0], text_size[0][1] - region = np.array([[box.xmin-3, box.ymin], - [box.xmin-3, box.ymin-height-26], - [box.xmin+width+13, box.ymin-height-26], - [box.xmin+width+13, box.ymin]], dtype='int32') - - cv2.rectangle(img=image, pt1=(box.xmin, box.ymin), pt2=( - box.xmax, box.ymax), color=get_color(label), thickness=5) - cv2.fillPoly(img=image, pts=[region], color=get_color(label)) - cv2.putText(img=image, - text=label_str, - org=(box.xmin+13, box.ymin - 13), - fontFace=cv2.FONT_HERSHEY_SIMPLEX, - fontScale=1e-3 * image.shape[0], - color=(0, 0, 0), - thickness=2) - - return image -def get_color(label): - """ Return a color from a set of predefined colors. Contains 80 colors in total. - code originally from https://github.com/fizyr/keras-retinanet/ - Args - label: The label to get the color for. - Returns - A list of three values representing a RGB color. - """ - if label < len(colors): - return colors[label] - else: - print('Label {} has no color, returning default.'.format(label)) - return (0, 255, 0) - - -colors = [ - [31, 0, 255], - [0, 159, 255], - [255, 95, 0], - [255, 19, 0], - [255, 0, 0], - [255, 38, 0], - [0, 255, 25], - [255, 0, 133], - [255, 172, 0], - [108, 0, 255], - [0, 82, 255], - [0, 255, 6], - [255, 0, 152], - [223, 0, 255], - [12, 0, 255], - [0, 255, 178], - [108, 255, 0], - [184, 0, 255], - [255, 0, 76], - [146, 255, 0], - [51, 0, 255], - [0, 197, 255], - [255, 248, 0], - [255, 0, 19], - [255, 0, 38], - [89, 255, 0], - [127, 255, 0], - [255, 153, 0], - [0, 255, 255], - [0, 255, 216], - [0, 255, 121], - [255, 0, 248], - [70, 0, 255], - [0, 255, 159], - [0, 216, 255], - [0, 6, 255], - [0, 63, 255], - [31, 255, 0], - [255, 57, 0], - [255, 0, 210], - [0, 255, 102], - [242, 255, 0], - [255, 191, 0], - [0, 255, 63], - [255, 0, 95], - [146, 0, 255], - [184, 255, 0], - [255, 114, 0], - [0, 255, 235], - [255, 229, 0], - [0, 178, 255], - [255, 0, 114], - [255, 0, 57], - [0, 140, 255], - [0, 121, 255], - [12, 255, 0], - [255, 210, 0], - [0, 255, 44], - [165, 255, 0], - [0, 25, 255], - [0, 255, 140], - [0, 101, 255], - [0, 255, 82], - [223, 255, 0], - [242, 0, 255], - [89, 0, 255], - [165, 0, 255], - [70, 255, 0], - [255, 0, 172], - [255, 76, 0], - [203, 255, 0], - [204, 0, 255], - [255, 0, 229], - [255, 133, 0], - [127, 0, 255], - [0, 235, 255], - [0, 255, 197], - [255, 0, 191], - [0, 44, 255], - [50, 255, 0] -] -import cv2 -import numpy as np -import copy - - -def _rand_scale(scale): - scale = np.random.uniform(1, scale) - return scale if (np.random.randint(2) == 0) else 1./scale - - -def _constrain(min_v, max_v, value): - if value < min_v: - return min_v - if value > max_v: - return max_v - return value - - -def random_flip(image, flip): - if flip == 1: - return cv2.flip(image, 1) - return image - - -def correct_bounding_boxes(boxes, new_w, new_h, net_w, net_h, dx, dy, flip, image_w, image_h): - boxes = copy.deepcopy(boxes) - - # randomize boxes' order - np.random.shuffle(boxes) - - # correct sizes and positions - sx, sy = float(new_w)/image_w, float(new_h)/image_h - zero_boxes = [] - - for i in range(len(boxes)): - boxes[i]['xmin'] = int(_constrain(0, net_w, boxes[i]['xmin']*sx + dx)) - boxes[i]['xmax'] = int(_constrain(0, net_w, boxes[i]['xmax']*sx + dx)) - boxes[i]['ymin'] = int(_constrain(0, net_h, boxes[i]['ymin']*sy + dy)) - boxes[i]['ymax'] = int(_constrain(0, net_h, boxes[i]['ymax']*sy + dy)) - - if boxes[i]['xmax'] <= boxes[i]['xmin'] or boxes[i]['ymax'] <= boxes[i]['ymin']: - zero_boxes += [i] - continue - - if flip == 1: - swap = boxes[i]['xmin'] - boxes[i]['xmin'] = net_w - boxes[i]['xmax'] - boxes[i]['xmax'] = net_w - swap - - boxes = [boxes[i] for i in range(len(boxes)) if i not in zero_boxes] - - return boxes - - -def random_distort_image(image, hue=18, saturation=1.5, exposure=1.5): - # determine scale factors - dhue = np.random.uniform(-hue, hue) - dsat = _rand_scale(saturation) - dexp = _rand_scale(exposure) - - # convert RGB space to HSV space - image = cv2.cvtColor(image, cv2.COLOR_RGB2HSV).astype('float') - - # change satuation and exposure - image[:, :, 1] *= dsat - image[:, :, 2] *= dexp - - # change hue - image[:, :, 0] += dhue - image[:, :, 0] -= (image[:, :, 0] > 180)*180 - image[:, :, 0] += (image[:, :, 0] < 0) * 180 - - # convert back to RGB from HSV - return cv2.cvtColor(image.astype('uint8'), cv2.COLOR_HSV2RGB) - - -def apply_random_scale_and_crop(image, new_w, new_h, net_w, net_h, dx, dy): - im_sized = cv2.resize(image, (new_w, new_h)) - - if dx > 0: - im_sized = np.pad(im_sized, ((0, 0), (dx, 0), (0, 0)), - mode='constant', constant_values=127) - else: - im_sized = im_sized[:, -dx:, :] - if (new_w + dx) < net_w: - im_sized = np.pad(im_sized, ((0, 0), (0, net_w - (new_w+dx)), - (0, 0)), mode='constant', constant_values=127) - - if dy > 0: - im_sized = np.pad(im_sized, ((dy, 0), (0, 0), (0, 0)), - mode='constant', constant_values=127) - else: - im_sized = im_sized[-dy:, :, :] - - if (new_h + dy) < net_h: - im_sized = np.pad(im_sized, ((0, net_h - (new_h+dy)), - (0, 0), (0, 0)), mode='constant', constant_values=127) - - return im_sized[:net_h, :net_w, :] -from keras.layers import Lambda, concatenate -from keras.models import Model -import tensorflow as tf - - -def multi_gpu_model(model, gpus): - if isinstance(gpus, (list, tuple)): - num_gpus = len(gpus) - target_gpu_ids = gpus - else: - num_gpus = gpus - target_gpu_ids = range(num_gpus) - - def get_slice(data, i, parts): - shape = tf.shape(data) - batch_size = shape[:1] - input_shape = shape[1:] - step = batch_size // parts - if i == num_gpus - 1: - size = batch_size - step * i - else: - size = step - size = tf.concat([size, input_shape], axis=0) - stride = tf.concat([step, input_shape * 0], axis=0) - start = stride * i - return tf.slice(data, start, size) - - all_outputs = [] - for i in range(len(model.outputs)): - all_outputs.append([]) - - # Place a copy of the model on each GPU, - # each getting a slice of the inputs. - for i, gpu_id in enumerate(target_gpu_ids): - with tf.device('/gpu:%d' % gpu_id): - with tf.name_scope('replica_%d' % gpu_id): - inputs = [] - # Retrieve a slice of the input. - for x in model.inputs: - input_shape = tuple(x.get_shape().as_list())[1:] - slice_i = Lambda(get_slice, - output_shape=input_shape, - arguments={'i': i, - 'parts': num_gpus})(x) - inputs.append(slice_i) - - # Apply model on slice - # (creating a model replica on the target device). - outputs = model(inputs) - if not isinstance(outputs, list): - outputs = [outputs] - - # Save the outputs for merging back together later. - for o in range(len(outputs)): - all_outputs[o].append(outputs[o]) - - # Merge outputs on CPU. - with tf.device('/cpu:0'): - merged = [] - for name, outputs in zip(model.output_names, all_outputs): - merged.append(concatenate(outputs, - axis=0, name=name)) - return Model(model.inputs, merged) -import cv2 -import numpy as np -import os -from .bbox import BoundBox, bbox_iou -from scipy.special import expit - - -def _sigmoid(x): - return expit(x) - - -def makedirs(path): - try: - os.makedirs(path) - except OSError: - if not os.path.isdir(path): - raise - - -def evaluate(model, - generator, - iou_threshold=0.5, - obj_thresh=0.5, - nms_thresh=0.45, - net_h=416, - net_w=416, - save_path=None): - """ Evaluate a given dataset using a given model. - code originally from https://github.com/fizyr/keras-retinanet - - # Arguments - model : The model to evaluate. - generator : The generator that represents the dataset to evaluate. - iou_threshold : The threshold used to consider when a detection is positive or negative. - obj_thresh : The threshold used to distinguish between object and non-object - nms_thresh : The threshold used to determine whether two detections are duplicates - net_h : The height of the input image to the model, higher value results in better accuracy - net_w : The width of the input image to the model - save_path : The path to save images with visualized detections to. - # Returns - A dict mapping class names to mAP scores. - """ - # gather all detections and annotations - all_detections = [[None for i in range( - generator.num_classes())] for j in range(generator.size())] - all_annotations = [[None for i in range( - generator.num_classes())] for j in range(generator.size())] - - for i in range(generator.size()): - raw_image = [generator.load_image(i)] - - # make the boxes and the labels - pred_boxes = get_yolo_boxes( - model, raw_image, net_h, net_w, generator.get_anchors(), obj_thresh, nms_thresh)[0] - - score = np.array([box.get_score() for box in pred_boxes]) - pred_labels = np.array([box.label for box in pred_boxes]) - - if len(pred_boxes) > 0: - pred_boxes = np.array( - [[box.xmin, box.ymin, box.xmax, box.ymax, box.get_score()] for box in pred_boxes]) - else: - pred_boxes = np.array([[]]) - - # sort the boxes and the labels according to scores - score_sort = np.argsort(-score) - pred_labels = pred_labels[score_sort] - pred_boxes = pred_boxes[score_sort] - - # copy detections to all_detections - for label in range(generator.num_classes()): - all_detections[i][label] = pred_boxes[pred_labels == label, :] - - annotations = generator.load_annotation(i) - - # copy detections to all_annotations - for label in range(generator.num_classes()): - all_annotations[i][label] = annotations[annotations[:, 4] - == label, :4].copy() - - # compute mAP by comparing all detections and all annotations - average_precisions = {} - - for label in range(generator.num_classes()): - false_positives = np.zeros((0,)) - true_positives = np.zeros((0,)) - scores = np.zeros((0,)) - num_annotations = 0.0 - - for i in range(generator.size()): - detections = all_detections[i][label] - annotations = all_annotations[i][label] - num_annotations += annotations.shape[0] - detected_annotations = [] - - for d in detections: - scores = np.append(scores, d[4]) - - if annotations.shape[0] == 0: - false_positives = np.append(false_positives, 1) - true_positives = np.append(true_positives, 0) - continue - - overlaps = compute_overlap( - np.expand_dims(d, axis=0), annotations) - assigned_annotation = np.argmax(overlaps, axis=1) - max_overlap = overlaps[0, assigned_annotation] - - if max_overlap >= iou_threshold and assigned_annotation not in detected_annotations: - false_positives = np.append(false_positives, 0) - true_positives = np.append(true_positives, 1) - detected_annotations.append(assigned_annotation) - else: - false_positives = np.append(false_positives, 1) - true_positives = np.append(true_positives, 0) - - # no annotations -> AP for this class is 0 (is this correct?) - if num_annotations == 0: - average_precisions[label] = 0 - continue - - # sort by score - indices = np.argsort(-scores) - false_positives = false_positives[indices] - true_positives = true_positives[indices] - - # compute false positives and true positives - false_positives = np.cumsum(false_positives) - true_positives = np.cumsum(true_positives) - - # compute recall and precision - recall = true_positives / num_annotations - precision = true_positives / \ - np.maximum(true_positives + false_positives, - np.finfo(np.float64).eps) - - # compute average precision - average_precision = compute_ap(recall, precision) - average_precisions[label] = average_precision - - return average_precisions - - -def correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w): - if (float(net_w)/image_w) < (float(net_h)/image_h): - new_w = net_w - new_h = (image_h*net_w)/image_w - else: - new_h = net_w - new_w = (image_w*net_h)/image_h - - for i in range(len(boxes)): - x_offset, x_scale = (net_w - new_w)/2./net_w, float(new_w)/net_w - y_offset, y_scale = (net_h - new_h)/2./net_h, float(new_h)/net_h - - boxes[i].xmin = int((boxes[i].xmin - x_offset) / x_scale * image_w) - boxes[i].xmax = int((boxes[i].xmax - x_offset) / x_scale * image_w) - boxes[i].ymin = int((boxes[i].ymin - y_offset) / y_scale * image_h) - boxes[i].ymax = int((boxes[i].ymax - y_offset) / y_scale * image_h) - - -def do_nms(boxes, nms_thresh): - if len(boxes) > 0: - nb_class = len(boxes[0].classes) - else: - return - - for c in range(nb_class): - sorted_indices = np.argsort([-box.classes[c] for box in boxes]) - - for i in range(len(sorted_indices)): - index_i = sorted_indices[i] - - if boxes[index_i].classes[c] == 0: - continue - - for j in range(i+1, len(sorted_indices)): - index_j = sorted_indices[j] - - if bbox_iou(boxes[index_i], boxes[index_j]) >= nms_thresh: - boxes[index_j].classes[c] = 0 - - -def decode_netout(netout, anchors, obj_thresh, net_h, net_w): - grid_h, grid_w = netout.shape[:2] - nb_box = 3 - netout = netout.reshape((grid_h, grid_w, nb_box, -1)) - nb_class = netout.shape[-1] - 5 - - boxes = [] - - netout[..., :2] = _sigmoid(netout[..., :2]) - netout[..., 4] = _sigmoid(netout[..., 4]) - netout[..., 5:] = netout[..., 4][..., np.newaxis] * \ - _softmax(netout[..., 5:]) - netout[..., 5:] *= netout[..., 5:] > obj_thresh - - for i in range(grid_h*grid_w): - row = i // grid_w - col = i % grid_w - - for b in range(nb_box): - # 4th element is objectness score - objectness = netout[row, col, b, 4] - - if(objectness <= obj_thresh): - continue - - # first 4 elements are x, y, w, and h - x, y, w, h = netout[row, col, b, :4] - - x = (col + x) / grid_w # center position, unit: image width - y = (row + y) / grid_h # center position, unit: image height - w = anchors[2 * b + 0] * np.exp(w) / net_w # unit: image width - h = anchors[2 * b + 1] * np.exp(h) / net_h # unit: image height - - # last elements are class probabilities - classes = netout[row, col, b, 5:] - - box = BoundBox(x-w/2, y-h/2, x+w/2, y+h/2, objectness, classes) - - boxes.append(box) - - return boxes - - -def preprocess_input(image, net_h, net_w): - new_h, new_w, _ = image.shape - - # determine the new size of the image - if (float(net_w)/new_w) < (float(net_h)/new_h): - new_h = (new_h * net_w)//new_w - new_w = net_w - else: - new_w = (new_w * net_h)//new_h - new_h = net_h - - # resize the image to the new size - resized = cv2.resize(image[:, :, ::-1]/255., (new_w, new_h)) - - # embed the image into the standard letter box - new_image = np.ones((net_h, net_w, 3)) * 0.5 - new_image[(net_h-new_h)//2:(net_h+new_h)//2, - (net_w-new_w)//2:(net_w+new_w)//2, :] = resized - new_image = np.expand_dims(new_image, 0) - - return new_image - - -def normalize(image): - return image/255. - - -def get_yolo_boxes(model, images, net_h, net_w, anchors, obj_thresh, nms_thresh): - image_h, image_w, _ = images[0].shape - nb_images = len(images) - batch_input = np.zeros((nb_images, net_h, net_w, 3)) - - # preprocess the input - for i in range(nb_images): - batch_input[i] = preprocess_input(images[i], net_h, net_w) - - # run the prediction - batch_output = model.predict_on_batch(batch_input) - batch_boxes = [None]*nb_images - - for i in range(nb_images): - yolos = [batch_output[0][i], batch_output[1][i], batch_output[2][i]] - boxes = [] - - # decode the output of the network - for j in range(len(yolos)): - # config['model']['anchors'] - yolo_anchors = anchors[(2-j)*6:(3-j)*6] - boxes += decode_netout(yolos[j], - yolo_anchors, obj_thresh, net_h, net_w) - - # correct the sizes of the bounding boxes - correct_yolo_boxes(boxes, image_h, image_w, net_h, net_w) - - # suppress non-maximal boxes - do_nms(boxes, nms_thresh) - - batch_boxes[i] = boxes - - return batch_boxes - - -def compute_overlap(a, b): - """ - Code originally from https://github.com/rbgirshick/py-faster-rcnn. - Parameters - ---------- - a: (N, 4) ndarray of float - b: (K, 4) ndarray of float - Returns - ------- - overlaps: (N, K) ndarray of overlap between boxes and query_boxes - """ - area = (b[:, 2] - b[:, 0]) * (b[:, 3] - b[:, 1]) - - iw = np.minimum(np.expand_dims( - a[:, 2], axis=1), b[:, 2]) - np.maximum(np.expand_dims(a[:, 0], 1), b[:, 0]) - ih = np.minimum(np.expand_dims( - a[:, 3], axis=1), b[:, 3]) - np.maximum(np.expand_dims(a[:, 1], 1), b[:, 1]) - - iw = np.maximum(iw, 0) - ih = np.maximum(ih, 0) - - ua = np.expand_dims((a[:, 2] - a[:, 0]) * - (a[:, 3] - a[:, 1]), axis=1) + area - iw * ih - - ua = np.maximum(ua, np.finfo(float).eps) - - intersection = iw * ih - - return intersection / ua - - -def compute_ap(recall, precision): - """ Compute the average precision, given the recall and precision curves. - Code originally from https://github.com/rbgirshick/py-faster-rcnn. - - # Arguments - recall: The recall curve (list). - precision: The precision curve (list). - # Returns - The average precision as computed in py-faster-rcnn. - """ - # correct AP calculation - # first append sentinel values at the end - mrec = np.concatenate(([0.], recall, [1.])) - mpre = np.concatenate(([0.], precision, [0.])) - - # compute the precision envelope - for i in range(mpre.size - 1, 0, -1): - mpre[i - 1] = np.maximum(mpre[i - 1], mpre[i]) - - # to calculate area under PR curve, look for points - # where X axis (recall) changes value - i = np.where(mrec[1:] != mrec[:-1])[0] - - # and sum (\Delta recall) * prec - ap = np.sum((mrec[i + 1] - mrec[i]) * mpre[i + 1]) - return ap - - -def _softmax(x, axis=-1): - x = x - np.amax(x, axis, keepdims=True) - e_x = np.exp(x) - - return e_x / e_x.sum(axis, keepdims=True) -# -*- coding: utf-8 -*- -from distutils.core import setup - -setup(name='convnetskeras', - version='0.1', - description='Pre-trained convnets in Keras', - author='Leonard Blier', - author_email='leonard.blier@ens.fr', - packages=['convnetskeras'], - package_dir={'convnetskeras': 'convnetskeras'}, - package_data={'convnetskeras': ['data/*']}, - long_description=open('README.md').read(), - ) -# -*- coding: utf-8 -*- -import numpy as np -from convnetskeras.customlayers import crosschannelnormalization -from convnetskeras.customlayers import Softmax4D -from convnetskeras.customlayers import splittensor -from convnetskeras.imagenet_tool import synset_to_dfs_ids -from keras.layers import Activation -from keras.layers import Dense -from keras.layers import Dropout -from keras.layers import Flatten -from keras.layers import Input -from keras.layers import merge -from keras.layers.convolutional import Convolution2D -from keras.layers.convolutional import MaxPooling2D -from keras.layers.convolutional import ZeroPadding2D -from keras.models import Model -from keras.models import Sequential -from keras.optimizers import SGD -from scipy.misc import imread -from scipy.misc import imresize - - -def convnet(network, weights_path=None, heatmap=False, trainable=None): - """ - Returns a keras model for a CNN. - - BEWARE !! : Since the different convnets have been trained in different settings, they don't take - data of the same shape. You should change the arguments of preprocess_image_batch for each CNN : - * For AlexNet, the data are of shape (227,227), and the colors in the RGB order (default) - * For VGG16 and VGG19, the data are of shape (224,224), and the colors in the BGR order - - It can also be used to look at the hidden layers of the model. - - It can be used that way : - >>> im = preprocess_image_batch(['cat.jpg']) - - >>> # Test pretrained model - >>> model = convnet('vgg_16', 'weights/vgg16_weights.h5') - >>> sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) - >>> model.compile(optimizer=sgd, loss='categorical_crossentropy') - >>> out = model.predict(im) - - Parameters - -------------- - network: str - The type of network chosen. For the moment, can be 'vgg_16' or 'vgg_19' - - weights_path: str - Location of the pre-trained model. If not given, the model will be trained - - heatmap: bool - Says wether the fully connected layers are transformed into Convolution2D layers, - to produce a heatmap instead of a - - - Returns - --------------- - model: - The keras model for this convnet - - output_dict: - Dict of feature layers, asked for in output_layers. - """ - def __get_heatmap_model(): - convnet_heatmap = convnet_init(heatmap=True) - for layer in convnet_heatmap.layers: - if layer.name.startswith('conv'): - orig_layer = convnet.get_layer(layer.name) - layer.set_weights(orig_layer.get_weights()) - elif layer.name.startswith('dense'): - orig_layer = convnet.get_layer(layer.name) - W, b = orig_layer.get_weights() - n_filter, previous_filter, ax1, ax2 = layer.get_weights()[ - 0].shape - new_W = W.reshape((previous_filter, ax1, ax2, n_filter)) - new_W = new_W.transpose((3, 0, 1, 2)) - new_W = new_W[:, :, ::-1, ::-1] - layer.set_weights([new_W, b]) - return convnet_heatmap - - # Select the network - convnet_init = __get_model_based_on_input_network(network) - convnet = convnet_init(weights_path, heatmap=False) - return __get_heatmap_model() if heatmap else convnet - - -def __get_model_based_on_input_network(network): - """ - Select correct model method based on input string - - :type network: str - """ - if network == 'vgg_16': - convnet_init = VGG_16 - elif network == 'vgg_19': - convnet_init = VGG_19 - elif network == 'alexnet': - convnet_init = AlexNet - else: - raise ValueError("Only 'vgg_16', 'vgg_19', 'alexnet' models available") - return convnet_init - - -def VGG_16(weights_path=None, heatmap=False): - model = Sequential() - if heatmap: - model.add(ZeroPadding2D((1, 1), input_shape=(3, None, None))) - else: - model.add(ZeroPadding2D((1, 1), input_shape=(3, 224, 224))) - model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - if heatmap: - model.add(Convolution2D(4096, 7, 7, activation='relu', name='dense_1')) - model.add(Convolution2D(4096, 1, 1, activation='relu', name='dense_2')) - model.add(Convolution2D(1000, 1, 1, name='dense_3')) - model.add(Softmax4D(axis=1, name='softmax')) - else: - model.add(Flatten(name='flatten')) - model.add(Dense(4096, activation='relu', name='dense_1')) - model.add(Dropout(0.5)) - model.add(Dense(4096, activation='relu', name='dense_2')) - model.add(Dropout(0.5)) - model.add(Dense(1000, name='dense_3')) - model.add(Activation('softmax', name='softmax')) - - if weights_path: - model.load_weights(weights_path) - return model - - -def VGG_19(weights_path=None, heatmap=False): - model = Sequential() - - if heatmap: - model.add(ZeroPadding2D((1, 1), input_shape=(3, None, None))) - else: - model.add(ZeroPadding2D((1, 1), input_shape=(3, 224, 224))) - model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_1')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(64, 3, 3, activation='relu', name='conv1_2')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_1')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(128, 3, 3, activation='relu', name='conv2_2')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_1')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_2')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_3')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(256, 3, 3, activation='relu', name='conv3_4')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_1')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_2')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_3')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv4_4')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_1')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_2')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_3')) - model.add(ZeroPadding2D((1, 1))) - model.add(Convolution2D(512, 3, 3, activation='relu', name='conv5_4')) - model.add(MaxPooling2D((2, 2), strides=(2, 2))) - - if heatmap: - model.add(Convolution2D(4096, 7, 7, activation='relu', name='dense_1')) - model.add(Convolution2D(4096, 1, 1, activation='relu', name='dense_2')) - model.add(Convolution2D(1000, 1, 1, name='dense_3')) - model.add(Softmax4D(axis=1, name='softmax')) - else: - model.add(Flatten()) - model.add(Dense(4096, activation='relu', name='dense_1')) - model.add(Dropout(0.5)) - model.add(Dense(4096, activation='relu', name='dense_2')) - model.add(Dropout(0.5)) - model.add(Dense(1000, name='dense_3')) - model.add(Activation('softmax')) - - if weights_path: - model.load_weights(weights_path) - - return model - - -def AlexNet(weights_path=None, heatmap=False): - if heatmap: - inputs = Input(shape=(3, None, None)) - else: - inputs = Input(shape=(3, 227, 227)) - - conv_1 = Convolution2D(96, 11, 11, subsample=(4, 4), activation='relu', - name='conv_1')(inputs) - - conv_2 = MaxPooling2D((3, 3), strides=(2, 2))(conv_1) - conv_2 = crosschannelnormalization(name='convpool_1')(conv_2) - conv_2 = ZeroPadding2D((2, 2))(conv_2) - conv_2 = merge([ - Convolution2D(128, 5, 5, activation='relu', name='conv_2_' + str(i + 1))( - splittensor(ratio_split=2, id_split=i)(conv_2) - ) for i in range(2)], mode='concat', concat_axis=1, name='conv_2') - - conv_3 = MaxPooling2D((3, 3), strides=(2, 2))(conv_2) - conv_3 = crosschannelnormalization()(conv_3) - conv_3 = ZeroPadding2D((1, 1))(conv_3) - conv_3 = Convolution2D(384, 3, 3, activation='relu', name='conv_3')(conv_3) - - conv_4 = ZeroPadding2D((1, 1))(conv_3) - conv_4 = merge([ - Convolution2D(192, 3, 3, activation='relu', name='conv_4_' + str(i + 1))( - splittensor(ratio_split=2, id_split=i)(conv_4) - ) for i in range(2)], mode='concat', concat_axis=1, name='conv_4') - - conv_5 = ZeroPadding2D((1, 1))(conv_4) - conv_5 = merge([ - Convolution2D(128, 3, 3, activation='relu', name='conv_5_' + str(i + 1))( - splittensor(ratio_split=2, id_split=i)(conv_5) - ) for i in range(2)], mode='concat', concat_axis=1, name='conv_5') - - dense_1 = MaxPooling2D((3, 3), strides=(2, 2), name='convpool_5')(conv_5) - - if heatmap: - dense_1 = Convolution2D( - 4096, 6, 6, activation='relu', name='dense_1')(dense_1) - dense_2 = Convolution2D( - 4096, 1, 1, activation='relu', name='dense_2')(dense_1) - dense_3 = Convolution2D(1000, 1, 1, name='dense_3')(dense_2) - prediction = Softmax4D(axis=1, name='softmax')(dense_3) - else: - dense_1 = Flatten(name='flatten')(dense_1) - dense_1 = Dense(4096, activation='relu', name='dense_1')(dense_1) - dense_2 = Dropout(0.5)(dense_1) - dense_2 = Dense(4096, activation='relu', name='dense_2')(dense_2) - dense_3 = Dropout(0.5)(dense_2) - dense_3 = Dense(1000, name='dense_3')(dense_3) - prediction = Activation('softmax', name='softmax')(dense_3) - - model = Model(input=inputs, output=prediction) - - if weights_path: - model.load_weights(weights_path) - - return model - - -def preprocess_image_batch(image_paths, img_size=None, crop_size=None, color_mode='rgb', out=None): - """ - Consistent preprocessing of images batches - - :param image_paths: iterable: images to process - :param crop_size: tuple: crop images if specified - :param img_size: tuple: resize images if specified - :param color_mode: Use rgb or change to bgr mode based on type of model you want to use - :param out: append output to this iterable if specified - """ - img_list = [] - - for im_path in image_paths: - img = imread(im_path, mode='RGB') - if img_size: - img = imresize(img, img_size) - - img = img.astype('float32') - # We normalize the colors (in RGB space) with the empirical means on the training set - img[:, :, 0] -= 123.68 - img[:, :, 1] -= 116.779 - img[:, :, 2] -= 103.939 - # We permute the colors to get them in the BGR order - if color_mode == 'bgr': - img[:, :, [0, 1, 2]] = img[:, :, [2, 1, 0]] - img = img.transpose((2, 0, 1)) - - if crop_size: - img = img[:, (img_size[0] - crop_size[0]) // 2:(img_size[0] + crop_size[0]) // - 2, (img_size[1] - crop_size[1]) // 2:(img_size[1] + crop_size[1]) // 2] - - img_list.append(img) - - try: - img_batch = np.stack(img_list, axis=0) - except: - raise ValueError('when img_size and crop_size are None, images' - ' in image_paths must have the same shapes.') - - if out is not None and hasattr(out, 'append'): - out.append(img_batch) - else: - return img_batch - - -def _demo_heatmap_script(): - """ - Here is a script to compute the heatmap of the dog synsets. - We find the synsets corresponding to dogs on ImageNet website - """ - im = preprocess_image_batch(['examples/dog.jpg'], color_mode='rgb') - - # Test pretrained model - sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True) - model = convnet( - 'alexnet', weights_path='weights/alexnet_weights.h5', heatmap=True) - model.compile(optimizer=sgd, loss='mse') - - out = model.predict(im) - - s = 'n02084071' - # Most of the synsets are not in the subset of the synsets used in ImageNet recognition task. - ids = np.array([id_ for id_ in synset_to_dfs_ids(s) if id_ is not None]) - heatmap = out[0, ids, :, :].sum(axis=0) - return heatmap - - -if __name__ == '__main__': - _demo_heatmap_script() -# -*- coding: utf-8 -*- -from keras import backend as K -from keras.engine import Layer -from keras.layers.convolutional import Convolution2D -from keras.layers.core import Lambda -from keras.layers.core import Merge - - -def crosschannelnormalization(alpha=1e-4, k=2, beta=0.75, n=5, **kwargs): - """ - This is the function used for cross channel normalization in the original - Alexnet - """ - - def f(X): - b, ch, r, c = X.shape - half = n // 2 - square = K.square(X) - extra_channels = K.spatial_2d_padding( - K.permute_dimensions(square, (0, 2, 3, 1)), (0, half)) - extra_channels = K.permute_dimensions(extra_channels, (0, 3, 1, 2)) - scale = k - for i in range(n): - scale += alpha * extra_channels[:, i:i + ch, :, :] - scale = scale ** beta - return X / scale - - return Lambda(f, output_shape=lambda input_shape: input_shape, **kwargs) - - -def splittensor(axis=1, ratio_split=1, id_split=0, **kwargs): - def f(X): - div = X.shape[axis] // ratio_split - - if axis == 0: - output = X[id_split * div:(id_split + 1) * div, :, :, :] - elif axis == 1: - output = X[:, id_split * div:(id_split + 1) * div, :, :] - elif axis == 2: - output = X[:, :, id_split * div:(id_split + 1) * div, :] - elif axis == 3: - output = X[:, :, :, id_split * div:(id_split + 1) * div] - else: - raise ValueError('This axis is not possible') - - return output - - def g(input_shape): - output_shape = list(input_shape) - output_shape[axis] = output_shape[axis] // ratio_split - return tuple(output_shape) - - return Lambda(f, output_shape=lambda input_shape: g(input_shape), **kwargs) - - -def convolution2Dgroup(n_group, nb_filter, nb_row, nb_col, **kwargs): - def f(input): - return Merge([ - Convolution2D(nb_filter // n_group, nb_row, nb_col)( - splittensor(axis=1, - ratio_split=n_group, - id_split=i)(input)) - for i in range(n_group) - ], mode='concat', concat_axis=1) - - return f - - -class Softmax4D(Layer): - def __init__(self, axis=-1, **kwargs): - self.axis = axis - super(Softmax4D, self).__init__(**kwargs) - - def build(self, input_shape): - pass - - def call(self, x, mask=None): - e = K.exp(x - K.max(x, axis=self.axis, keepdims=True)) - s = K.sum(e, axis=self.axis, keepdims=True) - return e / s - - def get_output_shape_for(self, input_shape): - return input_shape -# -*- coding: utf-8 -*- -from os.path import dirname -from os.path import join - -from scipy.io import loadmat - -meta_clsloc_file = join(dirname(__file__), 'data', 'meta_clsloc.mat') - -synsets = loadmat(meta_clsloc_file)['synsets'][0] - -synsets_imagenet_sorted = sorted([(int(s[0]), str(s[1][0])) for s in synsets[:1000]], - key=lambda v: v[1]) - -corr = {} -for j in range(1000): - corr[synsets_imagenet_sorted[j][0]] = j - -corr_inv = {} -for j in range(1, 1001): - corr_inv[corr[j]] = j - - -def depthfirstsearch(id_, out=None): - if out is None: - out = [] - if isinstance(id_, int): - pass - else: - id_ = next(int(s[0]) for s in synsets if s[1][0] == id_) - - out.append(id_) - children = synsets[id_ - 1][5][0] - for c in children: - depthfirstsearch(int(c), out) - return out - - -def synset_to_dfs_ids(synset): - ids = [x for x in depthfirstsearch(synset) if x <= 1000] - ids = [corr[x] for x in ids] - return ids - - -def synset_to_id(synset): - a = next((i for (i, s) in synsets if s == synset), None) - return a - - -def id_to_synset(id_): - return str(synsets[corr_inv[id_] - 1][1][0]) - - -def id_to_words(id_): - return synsets[corr_inv[id_] - 1][2][0] - - -def pprint_output(out, n_max_synsets=10): - best_ids = out.argsort()[::-1][:10] - for u in best_ids: - print('%.2f' % round(100 * out[u], 2) + ' : ' + id_to_words(u)) -"""Setup-module for DistKeras. - -This software enables distrubuted Machine Learning on Apache Spark using Keras. - -See: -https://github.com/JoeriHermans/dist-keras/ -http://joerihermans.com/ -""" - -from setuptools import setup -from setuptools import find_packages - -setup(name='dist-keras', - description='Distributed Deep learning with Apache Spark with Keras.', - url='https://github.com/JoeriHermans/dist-keras', - author='Joeri Hermans', - version='0.2.1', - author_email='joeri@joerihermans.com', - license='GPLv3', - install_requires=['theano', 'tensorflow', 'keras', 'flask'], - packages=['distkeras'], - package_data={'distkeras': ['distkeras/*.py']}, - # Keywords related to the project. - keywords=['Keras', 'Deep Learning', 'Machine Learning', - 'Theano', 'Tensorflow', 'Distributed', 'Apache Spark'], - ) -"""Evaluation module. - -An evaluator will evaluate a dataframe according to specific requirements. -""" - - -class Evaluator(object): - """An evaluator is an abstract class which will, given a label and a prediction, - will compute an evaluation metric. - - # Arguments - label_col: string. Column name of the label. - prediction_col: string. Column name of the prediction. - """ - - def __init__(self, label_col="label", prediction_col="prediction"): - self.label_column = label_col - self.prediction_column = prediction_col - - def evaluate(self, dataframe): - """Evalutes the specified dataframe. - - # Arguments - dataframe: dataframe. Spark Dataframe. - """ - raise NotImplementedError - - -class AccuracyEvaluator(Evaluator): - """Computes the accuracy of the prediction based on the label. - - # Arguments - label_col: string. Label column. - prediction_col: string. Prediction column. - """ - - def __init__(self, label_col="label", prediction_col="prediction"): - # Initialize the parent structure. - super(AccuracyEvaluator, self).__init__(label_col, prediction_col) - - def evaluate(self, dataframe): - # Count the total number of instances. - num_instances = dataframe.count() - # Extract the matching indexes. - cleaned = dataframe.where( - dataframe[self.prediction_column] == dataframe[self.label_column]) - # Fetch the number of correctly guessed instances. - validated_instances = cleaned.count() - - return float(validated_instances) / float(num_instances) -"""Module which facilitates job deployment on remote Spark clusters. -This allows you to build models and architectures on, for example, remote -notebook servers, and submit the large scale training job on remote -Hadoop / Spark clusters.""" - -## BEGIN Imports. ############################################################## - -from distkeras.utils import deserialize_keras_model -from distkeras.utils import get_os_username -from distkeras.utils import pickle_object -from distkeras.utils import serialize_keras_model -from distkeras.utils import unpickle_object - -from flask import Flask -from flask import request - -from os.path import expanduser - -from threading import Lock - -import base64 - -import json - -import os - -import subprocess - -import threading - -import time - -import urllib2 - -## END Imports. ################################################################ - - -class Punchcard(object): - - def __init__(self, secrets_path="secrets.json", port=80): - self.application = Flask(__name__) - self.secrets_path = secrets_path - self.port = port - self.mutex = threading.Lock() - self.jobs = {} - - def read_secrets(self): - with open(self.secrets_path) as f: - secrets_raw = f.read() - secrets = json.loads(secrets_raw) - - return secrets - - def valid_secret(self, secret, secrets): - num_secrets = len(secrets) - for i in range(0, num_secrets): - description = secrets[i] - if description['secret'] == secret: - return True - return False - - def secret_in_use(self, secret): - return secret in self.jobs - - def set_trained_model(self, job, model): - with self.mutex: - self.models[job.get_secret()] = model - - def get_submitted_job(self, secret): - with self.mutex: - if self.secret_in_use(secret): - job = self.jobs[secret] - else: - job = None - - return job - - def define_routes(self): - - ## BEGIN Route definitions. ############################################ - - @self.application.route('/api/submit', methods=['POST']) - def submit_job(): - # Parse the incoming JSON data. - data = json.loads(request.data) - # Fetch the required job arguments. - secret = data['secret'] - job_name = data['job_name'] - num_executors = data['num_executors'] - num_processes = data['num_processes'] - data_path = data['data_path'] - trainer = unpickle_object(data['trainer'].decode('hex_codec')) - # Fetch the parameters for the job. - secrets = self.read_secrets() - with self.mutex: - if self.valid_secret(secret, secrets) and not self.secret_in_use(secret): - job = PunchcardJob( - secret, job_name, data_path, num_executors, num_processes, trainer) - self.jobs[secret] = job - job.start() - return '', 200 - - return '', 403 - - @self.application.route('/api/state') - def job_state(): - secret = request.args.get('secret') - job = self.get_submitted_job(secret) - # Check if the job exists. - if job is not None: - d = {} - d['job_name'] = job.get_job_name() - d['running'] = job.running() - return json.dumps(d), 200 - - return '', 404 - - @self.application.route('/api/cancel') - def cancel(): - secret = request.args.get('secret') - job = self.get_submitted_job(secret) - if job is not None and job.running(): - with self.mutex: - job.cancel() - del self.jobs[secret] - - return '', 200 - - @self.application.route('/api/destroy') - def destroy_job(): - secret = request.args.get('secret') - job = self.get_submitted_job(secret) - if job is not None and not job.running(): - with self.mutex: - model = self.jobs[secret].get_trained_model() - history = self.jobs[secret].get_history() - model = pickle_object( - serialize_keras_model(model)).encode('hex_codec') - history = pickle_object(history).encode('hex_codec') - d = {} - d['model'] = model - d['history'] = history - del self.jobs[secret] - return json.dumps(d), 200 - - return '', 400 - - ## END Route definitions. ############################################## - - def run(self): - self.define_routes() - self.application.run('0.0.0.0', self.port) - - -class PunchcardJob(object): - - def __init__(self, secret, job_name, data_path, num_executors, num_processes, trainer): - self.secret = secret - self.job_name = job_name - self.data_path = data_path - self.num_executors = num_executors - self.num_processes = num_processes - self.trainer = trainer - self.is_running = True - self.thread = None - self.trained_model = None - self.history = None - - def get_job_name(self): - return self.job_name - - def get_secret(self): - return self.secret - - def get_history(self): - return self.history - - def get_trained_model(self): - return self.trained_model - - def start(self): - self.trainer.determine_new_master() - self.thread = threading.Thread(target=self.run) - self.thread.setDaemon(True) - self.thread.start() - - def cancel(self): - self.thread.exit() - - def running(self): - return self.is_running - - def join(self): - self.thread.join() - - def run_job(self): - os.system("python ~/jobs/" + self.secret + ".py") - - def clean_up(self): - home = expanduser("~") - os.remove(home + "/models/" + self.secret) - os.remove(home + "/histories/" + self.secret) - os.remove(home + "/trainers/" + self.secret) - - def read_trained_model(self): - home = expanduser("~") - with open(home + "/models/" + self.secret, "r") as f: - self.trained_model = deserialize_keras_model( - unpickle_object(f.read())) - - def read_history(self): - home = expanduser("~") - with open(home + "/histories/" + self.secret, "r") as f: - self.history = unpickle_object(f.read()) - - def serialize_trainer(self): - trainer = pickle_object(self.trainer) - home = expanduser("~") - with open(home + "/trainers/" + self.secret, "w") as f: - f.write(trainer) - - def generate_code(self): - source = """ -from distkeras.evaluators import * -from distkeras.predictors import * -from distkeras.trainers import * -from distkeras.trainers import * -from distkeras.transformers import * -from distkeras.utils import * -from keras import * -from pyspark import SparkConf -from pyspark import SparkContext -from pyspark import SQLContext -from os.path import expanduser -secret = '{secret}' -application_name = '{job_name}' -num_executors = {num_executors} -num_processes = {num_processes} -path_data = '{data_path}' -num_workers = num_processes * num_executors -# Allocate a Spark Context, and a Spark SQL context. -conf = SparkConf() -conf.set("spark.app.name", application_name) -conf.set("spark.master", "yarn-client") -conf.set("spark.executor.cores", num_processes) -conf.set("spark.executor.instances", num_executors) -conf.set("spark.executor.memory", "5g") -conf.set("spark.locality.wait", "0") -conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer"); -sc = SparkContext(conf=conf) -sqlContext = SQLContext(sc) -# Read the dataset from HDFS. For now we assume Parquet files. -dataset = sqlContext.read.parquet(path_data).repartition(num_workers) -# Deserialize the trainer object. -home = expanduser("~") -with open(home + "/trainers/" + secret, "r") as f: - trainer = unpickle_object(f.read()) -# Train the model, and save it afterwards. -trained_model = trainer.train(dataset) -with open(home + "/models/" + secret, "w") as f: - f.write(pickle_object(serialize_keras_model(trained_model))) -# Save the history of the training process. -histories = trainer.get_history() -with open(home + "/histories/" + secret, "w") as f: - f.write(pickle_object(histories)) -sc.stop() - """.format( - secret=self.secret, - job_name=self.job_name, - num_executors=self.num_executors, - num_processes=self.num_processes, - data_path=self.data_path - ) - home = expanduser("~") - with open(home + "/jobs/" + self.secret + ".py", "w") as f: - f.write(source) - - def run(self): - self.serialize_trainer() - self.generate_code() - self.run_job() - self.read_trained_model() - self.read_history() - self.clean_up() - self.is_running = False - - -class Job(object): - - def __init__(self, secret, job_name, data_path, num_executors, num_processes, trainer): - self.secret = secret - self.job_name = job_name - self.num_executors = 20 - self.num_processes = 1 - self.data_path = data_path - self.trainer = trainer - self.trained_model = None - self.history = None - self.address = None - - def set_num_executors(self, num_executors): - self.num_executors = num_executors - - def set_num_processes(self, num_processes): - self.num_processes = num_processes - - def get_trained_model(self): - return self.trained_model - - def get_history(self): - return self.history - - def is_finished(self): - address = self.address + '/api/state?secret=' + self.secret - request = urllib2.Request(address) - response = urllib2.urlopen(request) - data = json.load(response) - - return not data['running'] - - def destroy_remote_job(self): - address = self.address + '/api/destroy?secret=' + self.secret - request = urllib2.Request(address) - response = urllib2.urlopen(request) - data = json.load(response) - model = unpickle_object(data['model'].decode('hex_codec')) - self.trained_model = deserialize_keras_model(model) - self.history = unpickle_object(data['history'].decode('hex_codec')) - - def start(self): - self.thread = threading.Thread(target=self.run) - self.thread.start() - - def wait_completion(self): - self.thread.join() - - def cancel(self): - address = self.address + '/api/cancel?secret=' + self.secret - request = urllib2.Request(address) - urllib2.urlopen(request) - - def send(self, address): - data = {} - data['secret'] = self.secret - data['job_name'] = self.job_name - data['num_executors'] = self.num_executors - data['num_processes'] = self.num_processes - data['data_path'] = self.data_path - data['trainer'] = pickle_object(self.trainer).encode('hex_codec') - request = urllib2.Request(address + "/api/submit") - request.add_header('Content-Type', 'application/json') - urllib2.urlopen(request, json.dumps(data)) - self.address = address - self.start() - - def run(self): - time.sleep(1) - while not self.is_finished(): - time.sleep(10) - self.destroy_remote_job() -"""Networking utility functions.""" - -## BEGIN Imports. ############################################################## - -import pickle - -import socket - -## END Imports. ################################################################ - - -def determine_host_address(): - """Determines the human-readable host address of the local machine.""" - host_address = socket.gethostbyname(socket.gethostname()) - - return host_address - - -def recvall(connection, num_bytes): - """Reads `num_bytes` bytes from the specified connection. - - # Arguments - connection: socket. Opened socket. - num_bytes: int. Number of bytes to read. - """ - byte_buffer = b'' - buffer_size = 0 - bytes_left = num_bytes - # Iterate until we received all data. - while buffer_size < num_bytes: - # Fetch the next frame from the network. - data = connection.recv(bytes_left) - # Compute the size of the frame. - delta = len(data) - buffer_size += delta - bytes_left -= delta - # Append the data to the buffer. - byte_buffer += data - - return byte_buffer - - -def recv_data(connection): - """Will fetch the next data frame from the connection. - - The protocol for reading is structured as follows: - 1. The first 20 bytes represents a string which holds the next number of bytes to read. - 2. We convert the 20 byte string to an integer (e.g. '00000000000000000011' -> 11). - 3. We read `num_bytes` from the socket (which is in our example 11). - 4. Deserialize the retrieved string. - - # Arguments - connection: socket. Opened socket. - """ - data = b'' - # Fetch the serialized data length. - length = int(recvall(connection, 20).decode()) - # Fetch the serialized data. - serialized_data = recvall(connection, length) - # Deserialize the data. - data = pickle.loads(serialized_data) - - return data - - -def send_data(connection, data): - """Sends the data to the other endpoint of the socket using our protocol. - - The protocol for sending is structured as follows: - 1. Serialize the data. - 2. Obtain the buffer-size of the serialized data. - 3. Serialize the buffer-size in 20 bytes (e.g. 11 -> '00000000000000000011'). - 4. Send the serialized buffer size. - 5. Send the serialized data. - - # Arguments - connection: socket. Opened socket. - data: any. Data to send. - """ - # Serialize the data. - serialized_data = pickle.dumps(data, -1) - length = len(serialized_data) - # Serialize the number of bytes in the data. - serialized_length = str(length).zfill(20) - # Send the data over the provided socket. - connection.sendall(serialized_length.encode()) - connection.sendall(serialized_data) - - -def connect(host, port, disable_nagle=True): - fd = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - # Check if Nagle's algorithm needs to be disabled. - if disable_nagle: - fd.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - else: - fd.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 0) - # Connect to the specified URI. - fd.connect((host, port)) - - return fd -"""Parameter servers. - -A parameter server is a process which will aggregate all the incoming gradient -or parameter updates of the workers and incorperate it into a single center variable. -This center variable will eventually be the produced model of the trainer. -""" - -## BEGIN Imports. ############################################################## - -import copy - -import math - -import numpy as np - -import socket - -import threading - -from distkeras.networking import recv_data -from distkeras.networking import send_data -from distkeras.utils import deserialize_keras_model - -## END Imports. ################################################################ - - -class ParameterServer(object): - """Abstract class which provides basic attributed and methods for all - parameter servers. - - # Arguments - model: string. Serialized Keras model. - See: distkeras.utils.serialize_keras_model - """ - - def __init__(self, model): - self.model = deserialize_keras_model(model) - self.num_updates = 1 - - def initialize(self): - """Initializes the parameter server. - - This method is called after self.start(). - """ - raise NotImplementedError - - def start(self): - """Starts the parameter server in a new thread.""" - raise NotImplementedError - - def run(self): - """Main event loop of the parameter server.""" - raise NotImplementedError - - def stop(self): - """Notifies the parameter server thread to stop.""" - raise NotImplementedError - - def get_model(self): - """Returns the Keras model which will be trained by the workers.""" - return self.model - - def next_update(self): - """Increments the number of model updates by 1.""" - self.num_updates += 1 - - def reset_update_counter(self): - """Resets the model update counter.""" - self.num_updates = 0 - - def get_num_updates(self): - """Returns the number of model updates the parameter server has performed.""" - return self.num_updates - - -class SocketParameterServer(ParameterServer): - """Abstract class of a parameter server which is based on a socket implementation. - - This means that this parameter server accepts multiple TCP connections from multiple - workers, and uses a costum protocol to transmit and receive the model parameters. This - is done by implementing a custom protocol. Which is fully described in the - distkeras.networking module. - - # Arguments - model: string. Serialized Keras model. - See: distkeras.utils.serialize_keras_model - port: int. Listing port number. - """ - - def __init__(self, model, port=5000): - super(SocketParameterServer, self).__init__(model) - self.master_port = port - self.socket = None - self.running = False - self.connections = [] - self.mutex = threading.Lock() - - def initialize(self): - """Sets up the listing port.""" - # Reset the running flag. - self.running = True - # Prepare a socket. - file_descriptor = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - # Disable Nagle's algorithm. - file_descriptor.setsockopt(socket.IPPROTO_TCP, socket.TCP_NODELAY, 1) - # Check if the master port needs to be assigned by the OS. - if self.master_port is None: - file_descriptor.bind(('0.0.0.0', 0)) - # Retrieve the port assigned by the OS. - self.master_port = int(file_descriptor.getsockname()[1]) - else: - file_descriptor.bind(('0.0.0.0', self.master_port)) - # Listen to the socket. - file_descriptor.listen(5) - # Assign the socket. - self.socket = file_descriptor - - def handle_commit(self, conn, addr): - """Handles parameter updates coming from the workers. - - # Arguments: - conn: socket. The opened connection. - addr: addr. Address of the remote host. - """ - raise NotImplementedError - - def handle_pull(self, conn, addr): - """Handles parameter requests coming from the workers. This will - actually send the model parameters to the requesting host. - - # Arguments: - conn: socket. The opened connection. - addr: addr. Address of the remote host. - """ - # Fetch the raw center variables. - with self.mutex: - center_variable = self.model.get_weights() - cv = copy.deepcopy(center_variable) - # Send the data over the socket. - send_data(conn, cv) - - def cancel_accept(self): - """This method will cancel the accept procedure. The method - is meant to be executed by the stop() procedure. - """ - file_descriptor = socket.socket(socket.AF_INET, socket.SOCK_STREAM) - try: - # Connect to the listening socket to cancel the accept. - file_descriptor.connect(("localhost", self.master_port)) - file_descriptor.close() - except Exception as e: - print(e) - - def handle_connection(self, conn, addr): - """ - A parameter server has two main functionalities. Nodes are able to - pull (p) the current state, or 'commit' a state. This is implemented - in the following functionality. Classes which implement these interfaces - should not worry about connection handling. - """ - try: - while self.running: - # Fetch the current action. - action = conn.recv(1).decode() - # Check if the action is a commit (most of the cases). - if action == 'c': - # Handle the commit. - self.handle_commit(conn, addr) - elif action == 'p': - # Handle the pull. - self.handle_pull(conn, addr) - except Exception as e: - print(e) - - def start(self): - """Starts the parameter server.""" - # Set the running flag. - self.running = True - - def run(self): - """Main event loop of the parameter server.""" - # Listen for incoming connections. - while self.running: - try: - # Accept incoming connections. - conn, addr = self.socket.accept() - # Handle the connection. - thread = threading.Thread( - target=self.handle_connection, args=(conn, addr)) - thread.start() - # Store the connection in the dictionary. - self.connections.append(thread) - except Exception as e: - print(e) - - def stop(self): - """Stop the parameter server. This will also cleanup all existing connections.""" - self.running = False - # Check if a socket is allocated. - if self.socket: - self.cleanup_connections() - self.finalize() - self.socket.close() - self.cancel_accept() - self.socket = None - self.connections = [] - - def finalize(self): - """Method that is called when the parameter server stops.""" - print("Not executed") - - def cleanup_connections(self): - """Clean all existing connections up.""" - # Iterate over all connections. - for thread in self.connections: - # Fetch the thread object. - thread.join() - del thread - - -class DeltaParameterServer(SocketParameterServer): - """A parameter server which integrates all incoming deltas into the model. - - # Arguments - model: string. Serialized Keras model. - See: distkeras.utils.serialize_keras_model - master_port: int. Port number of the parameter server. - """ - - def __init__(self, model, master_port): - super(DeltaParameterServer, self).__init__(model, master_port) - self.center_variable = np.asarray(self.model.get_weights()) - - def handle_commit(self, conn, addr): - # Receive the parameters from the remote node. - data = recv_data(conn) - # Extract the delta from the dictionary. - delta = data['delta'] - # Update the center variable with the delta. - with self.mutex: - self.center_variable = self.center_variable + delta - # Next iteration. - self.next_update() - - def handle_pull(self, conn, addr): - """Handles parameter requests coming from the workers. This will - actually send the model parameters to the requesting host. - - # Arguments: - conn: socket. The opened connection. - addr: addr. Address of the remote host. - """ - # Fetch the raw center variables. - with self.mutex: - cv = copy.deepcopy(self.center_variable) - # Send the data over the socket. - send_data(conn, cv) - - def finalize(self): - # Set the final weights of the model. - self.model.set_weights(self.center_variable) - - -class ADAGParameterServer(SocketParameterServer): - """A parameter server which integrates the incoming gradient residuals into - the model, and integrates them using the ADAG scheme. - - # Arguments - model: string. Keras model. - See: distkeras.utils.serialize_keras_model - master_port: int. Port number of the parameter server. - """ - - def __init__(self, model, master_port): - super(ADAGParameterServer, self).__init__(model, master_port) - self.center_variable = np.asarray(self.model.get_weights()) - - def handle_commit(self, conn, addr): - # Receive the parameters from the remote node. - data = recv_data(conn) - # Extract the data from the dictionary. - r = data['residual'] - with self.mutex: - # Update the center variable. - self.center_variable = self.center_variable + r - # Increment the number of parameter server updates. - self.next_update() - - def handle_pull(self, conn, addr): - """Handles parameter requests coming from the workers. This will - actually send the model parameters to the requesting host. - - # Arguments: - conn: socket. The opened connection. - addr: addr. Address of the remote host. - """ - # Fetch the raw center variables. - with self.mutex: - cv = copy.deepcopy(self.center_variable) - # Send the data over the socket. - send_data(conn, cv) - - def finalize(self): - # Set the weights of the model. - self.model.set_weights(self.center_variable) - - -class DynSGDParameterServer(SocketParameterServer): - """DynSGD parameter server, keeps track of the staleness between updates - to maintain dynamic worker learning rates based on staleness. - - # Arguments - model: string. Keras model - See: distkeras.utils.serialize_keras_model - master_port: int. Port number of the parameter server. - """ - - def __init__(self, model, master_port): - super(DynSGDParameterServer, self).__init__(model, master_port) - - def handle_pull(self, conn, addr): - """Handles parameter requests coming from the workers. This will - actually send the model parameters to the requesting host. - - This is a specific implementation for DynSGD. - - # Arguments: - conn: socket. The opened connection. - addr: addr. Address of the remote host. - """ - # Allocate a new dictionary. - data = {} - # Fetch the raw center variables. - with self.mutex: - center_variable = self.model.get_weights() - cv = copy.deepcopy(center_variable) - # Store the number of updates (u) the PS executed. - data['update'] = self.num_updates - # Store the model (m). - data['model'] = cv - # Send the data over the socket. - send_data(conn, data) - - def handle_commit(self, conn, addr): - data = recv_data(conn) - r = data['residual'] - # Fetch the last iteration number - last_update = data['last_update'] - du = (self.num_updates - last_update) + 1 - r /= du - with self.mutex: - center_variable = self.model.get_weights() - center_variable = center_variable + r - self.model.set_weights(center_variable) - # Increment the number of parameter server updates. - self.next_update() - - -class ExperimentalParameterServer(SocketParameterServer): - """A parameter server which integrates the incoming gradient residuals into - the model, and integrates them using the ADAG scheme. - - # Arguments - model: string. Keras model. - See: distkeras.utils.serialize_keras_model - master_port: int. Port number of the parameter server. - """ - - def __init__(self, model, master_port, learning_rate): - super(ExperimentalParameterServer, self).__init__(model, master_port) - self.center_variable = np.asarray(self.model.get_weights()) - self.inverse_learning_rate = 1.0 / learning_rate - - def handle_commit(self, conn, addr): - # Receive the parameters from the remote node. - data = recv_data(conn) - # Extract the data from the dictionary. - r = data['residual'] - worker_id = data['worker_id'] - stale_cv = data['stale_center_variable'] - with self.mutex: - diff_cv = np.subtract(self.center_variable, stale_cv) - d = 1 / (self.inverse_learning_rate * np.power(diff_cv, 2) + 1) - r = np.multiply(d, r) - # Update the center variable. - self.center_variable = self.center_variable + r - # Increment the number of parameter server updates. - self.next_update() - - def handle_pull(self, conn, addr): - """Handles parameter requests coming from the workers. This will - actually send the model parameters to the requesting host. - - # Arguments: - conn: socket. The opened connection. - addr: addr. Address of the remote host. - """ - # Fetch the raw center variables. - with self.mutex: - cv = copy.deepcopy(self.center_variable) - # Send the data over the socket. - send_data(conn, cv) - - def finalize(self): - # Set the weights of the model. - self.model.set_weights(self.center_variable) -"""Predictors take a model and will transform the Dataframe by adding a prediction column.""" - -## BEGIN Imports. ############################################################## - -import numpy as np - -from pyspark.mllib.linalg import DenseVector - -from distkeras.utils import serialize_keras_model -from distkeras.utils import deserialize_keras_model -from distkeras.utils import new_dataframe_row - -## END Imports. ################################################################ - - -class Predictor(object): - """Abstract predictor class. - - # Arguments - keras_model: Keras Model. - """ - - def __init__(self, keras_model): - self.model = serialize_keras_model(keras_model) - - def predict(self, dataframe): - """Transforms the dataframe to add a prediction. - - # Arguments - dataframe: dataframe. Spark Dataframe. - """ - raise NotImplementedError - - -class ModelPredictor(Predictor): - """Takes a Keras model and adds a prediction column to the dataframe - given a features column. - - # Arguments - keras_model: Keras model. - features_col: string. Name of the features column. - output_col: string. Name of the prediction column. - """ - - def __init__(self, keras_model, features_col="features", output_col="prediction"): - super(ModelPredictor, self).__init__(keras_model) - assert isinstance(features_col, (str, list) - ), "'features_col' must be a string or a list of strings" - self.features_column = [features_col] if isinstance( - features_col, str) else features_col - self.output_column = output_col - - def _predict(self, iterator): - """Lambda method which will append a prediction column to the provided rows. - - # Arguments: - iterator: iterator. Spark Row iterator. - """ - model = deserialize_keras_model(self.model) - for row in iterator: - features = [np.asarray([row[c]]) for c in self.features_column] - prediction = model.predict(features) - dense_prediction = DenseVector(prediction[0]) - new_row = new_dataframe_row( - row, self.output_column, dense_prediction) - yield new_row - - def predict(self, dataframe): - """Returns a dataframe which is the old dataframe with an additional - prediction column. - """ - return dataframe.rdd.mapPartitions(self._predict).toDF() -"""Schemes module. - -Module with schemes to automatize a distributed learning process. These schemes will automatically -adjust the hyperparameters to improve training performance. -""" - -## BEGIN Imports. ############################################################## - -import math - -## END Imports. ################################################################ - - -class Scheme(object): - """A 'Scheme' is way to describe how a distributed optimization sequence - should perform. For example, it is responsible for adjusting the learning - rate of the parameter server if it notices that the loss doesn't decay. - However, this is only one of the possible solutions. Others include the - optimization of other hyperparameters such as the number of workers. - - # Arguments - optimizer: trainer. A distributed optimizer. - num_epoch: int. Total number of epoch. - evaluation_frequency: int. Frequency of hyperparameter evaluation. - """ - - def __init__(self, optimizer, num_epoch=15, evaluation_frequency=5): - self.optimizer = optimizer - self.num_epoch = num_epoch - self.evaluation_frequency = evaluation_frequency - self.epoch_over_eval_frequency = int( - self.num_epoch / self.evaluation_frequency) - self.initialize() - - def initialize(self): - """Initializes the hyperparameters to follow the scheme parameters.""" - self.optimizer.set_num_epoch( - self.get_epoch_over_evaluation_frequency()) - - def get_epoch_over_evaluation_frequency(self): - """Returns the number of epochs per evaluation frequency.""" - return self.epoch_over_eval_frequency - - def optimize(self, training_set, validation_set): - raise NotImplementedError - - -class Emperor(Scheme): - """The 'Emporor' optimization schema will make hyperparameter changes based - on the loss derrivatives of the validation set. - - # Arguments - optimizer: trainer. A distributed optimizer. - evaluate_loss: function. Function which evaluates the loss. This - function should accept a model, and a dataframe. - num_epoch: int. Total number of epoch. - evaluation_frequency: int. Frequency of hyperparameter evaluation. - """ - - def __init__(self, optimizer, evaluate_loss, num_epoch=15, evaluation_frequency=5, - loss_threshold=0.005): - super(Emperor, self).__init__( - optimizer, num_epoch, evaluation_frequency) - self.previous_loss = float('inf') - self.loss_threshold = loss_threshold - self.evaluate_loss = evaluate_loss - - def optimize(self, training_set, validation_set): - trained_model = None - - # Fetch the number of evaluations, to match the number of epochs. - num_evaluations = self.get_epoch_over_evaluation_frequency() + 1 - # Iterate over the number of evaluation epochs. - for i in range(0, num_evaluations): - # Train the model. - trained_model = self.optimizer.train(training_set) - self.optimizer.set_model(trained_model) - # Evaluate the training set, and fetch the loss. - loss = self.evaluate_loss(trained_model, validation_set) - print("Current loss: " + str(loss)) - dl = math.fabs(loss - self.previous_loss) - self.previous_loss = loss - if dl <= self.loss_threshold: - print("Lowering learning rate.") - print("Old learning rate: " + - str(self.optimizer.get_learning_rate())) - # Modify the learning rate. - learning_rate = self.optimizer.get_learning_rate() - learning_rate /= 10 - self.optimizer.set_learning_rate(learning_rate) - print("New learning rate: " + - str(self.optimizer.get_learning_rate())) - - return trained_model -"""Model optimizers. Depending on the implementation, these classes will optimize the -Keras model in a distributed manner (with exception of the SingleTrainer).""" - -## BEGIN Imports. ############################################################## - -import numpy as np - -import threading - -import time - -from distkeras.parameter_servers import ADAGParameterServer -from distkeras.parameter_servers import DeltaParameterServer -from distkeras.parameter_servers import DynSGDParameterServer -from distkeras.parameter_servers import ExperimentalParameterServer - -from distkeras.utils import deserialize_keras_model -from distkeras.utils import history_executor -from distkeras.utils import history_executors_average -from distkeras.utils import pickle_object -from distkeras.utils import serialize_keras_model -from distkeras.utils import set_keras_base_directory -from distkeras.utils import unpickle_object - -from distkeras.networking import determine_host_address - -from distkeras.workers import ADAGWorker -from distkeras.workers import AEASGDWorker -from distkeras.workers import DOWNPOURWorker -from distkeras.workers import DynSGDWorker -from distkeras.workers import ExperimentalWorker -from distkeras.workers import EAMSGDWorker -from distkeras.workers import SequentialWorker - -from keras import backend as K - -## END Imports. ################################################################ - - -class Trainer(object): - """Abstract trainer class. This class provides all base functionality which - all optimizers need to implement. - - # Arguments - keras_model: Keras model. - loss: string. String representing the loss. - See: https://keras.io/objectives/ - worker_optimizer: string. String representing worker optimizer. - See https://keras.io/optimizers/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - loss_weights: optional list or dict specifying weights for different losses. - """ - - def __init__(self, keras_model, loss, worker_optimizer, metrics=["accuracy"], loss_weights=None): - set_keras_base_directory() - self.master_model = serialize_keras_model(keras_model) - self.loss = loss - self.loss_weights = loss_weights - self.worker_optimizer = worker_optimizer - self.metrics = metrics - self.history = [] - self.training_time_start = 0 - self.training_time_end = 0 - self.training_time = 0 - self.max_mini_batches_prefetch = 100 - - def set_max_prefetch(self, max_mini_batches): - """Sets the maximum amount of mini-batches that can be prefetched by a worker.""" - self.max_mini_batches_prefetch = max_mini_batches - - def set_model(self, model): - """Sets the master model to be used by the trainer.""" - self.master_model = serialize_keras_model(model) - - def record_training_start(self): - """Records the start of the training. - - This private function is called when the training process starts. - """ - self.training_time = 0 - self.training_time_start = time.time() - - def record_training_end(self): - """Records the end of the traing. - - This private function is called when the training process is terminated. - """ - self.training_time_end = time.time() - self.training_time = self.training_time_end - self.training_time_start - - def get_training_time(self): - """Returns the told training time.""" - return self.training_time - - def get_history(self): - """Returns all history object aggregated during training.""" - return self.history - - def get_averaged_history(self): - """Returns the averaged history of the center variable.""" - return history_executors_average(self.history) - - def get_executor_history(self, executor_id): - """Returns the history of a specific executor.""" - return history_executor(self.history, executor_id) - - def train(self, dataframe, shuffle=False): - """Trains the specified model using the specified dataframe. - - # Arguments - dataframe: dataframe. A Spark Dataframe containing the training data. - shuffle: boolean. Tells to shuffle the dataframe before training. - Warning: this will tell Spark to shuffle all partitions over - the network. It is recommended to shuffle the dataframe before - training and store it. - """ - raise NotImplementedError - - def serialize(self): - return pickle_object(self) - - -class SingleTrainer(Trainer): - """An optimizer which will train a network on a single machine. - - # Arguments - keras_model: model. Keras model to train. - worker_optimizer: string. String representing worker optimizer. - See https://keras.io/optimizers/ - loss: string. String representing the loss. - See: https://keras.io/objectives/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - features_col: string or list of strings. Name(s) of the features column(s). - label_col: string or list of strings. Name(s) of the label column(s). - num_epoch: int. Number of epochs. - batch_size: int. Mini-batch size. - loss_weights: optional list or dict specifying weights for different losses. - """ - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], features_col="features", - label_col="label", num_epoch=1, batch_size=32, loss_weights=None): - super(SingleTrainer, self).__init__(keras_model, - loss, worker_optimizer, metrics, loss_weights) - self.features_column = features_col - self.label_column = label_col - self.num_epoch = num_epoch - self.batch_size = batch_size - - def allocate_worker(self): - """Allocates a worker for the Single Trainer instance. - - Only for internal use. - """ - worker = SequentialWorker(model=self.master_model, features_col=self.features_column, - label_col=self.label_column, batch_size=self.batch_size, num_epoch=self.num_epoch, - optimizer=self.worker_optimizer, loss=self.loss, loss_weights=self.loss_weights, - metrics=self.metrics) - - return worker - - def train(self, dataframe, shuffle=False): - """See distkeras.trainers.Trainer.train - - # Arguments - dataframe: dataframe. A Spark Dataframe containing the training data. - shuffle: boolean. Tells to shuffle the dataframe before training. - Warning: this will tell Spark to shuffle all partitions over - the network. It is recommended to shuffle the dataframe before - training and store it. - """ - # Check if the data needs to be shuffled. - if shuffle: - dataframe = shuffle(dataframe) - # Collect the dataframe on a single worker node. - dataframe = dataframe.coalesce(1) - # Cache the dataframe. - dataframe.cache() - # Allocate a worker. - worker = self.allocate_worker() - # Set the maximum number of mini-batches. - worker.set_max_prefetch(self.max_mini_batches_prefetch) - # Start recording training time. - self.record_training_start() - # Fetch the trained model. - self.master_model = dataframe.rdd.mapPartitionsWithIndex( - worker.train).collect()[0] - # Stop recording of training time. - self.record_training_end() - - return deserialize_keras_model(self.master_model) - - -class AveragingTrainer(Trainer): - """A trainer which implements a data parallel technique using model averaging. - - In this implementation, the model replicas are averages after every epoch. - # Arguments - keras_model: model. Keras model to train. - worker_optimizer: string. String representing worker optimizer. - See https://keras.io/optimizers/ - loss: string. String representing the loss. - See: https://keras.io/objectives/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - features_col: string or list of strings. Name(s) of the features column(s). - label_col: string or list of strings. Name(s) of the label column(s). - num_epoch: int. Number of epochs. - batch_size: int. Mini-batch size. - num_workers: int. Number of model replicas to train in parallel. - loss_weights: optional list or dict specifying weights for different losses. - """ - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], features_col="features", - label_col="label", num_epoch=1, batch_size=32, num_workers=2, loss_weights=None): - super(AveragingTrainer, self).__init__(keras_model, - loss, worker_optimizer, metrics, loss_weights) - self.features_column = features_col - self.label_column = label_col - self.num_epoch = num_epoch - self.batch_size = batch_size - self.num_workers = num_workers - self.parameter_buffer = np.asarray(keras_model.get_weights()) - self.parameter_buffer.fill(0.0) - - def average_models(self, models): - """Averages the specified list of Keras models, and assigns the - averaged model as the master model. - - # Arguments: - models: list. A list of serialized Keras models. - """ - num_models = len(models) - # Get all weights of the models. - for i in range(0, num_models): - weights = np.asarray( - deserialize_keras_model(models[i]).get_weights()) - self.parameter_buffer += weights - # Average the parameters. - self.parameter_buffer /= num_models - temp_model = deserialize_keras_model(self.master_model) - temp_model.set_weights(self.parameter_buffer) - self.master_model = serialize_keras_model(temp_model) - - def allocate_worker(self): - """Allocates the AveragingWorker for internal use.""" - worker = SequentialWorker(model=self.master_model, features_col=self.features_column, - label_col=self.label_column, batch_size=self.batch_size, num_epoch=1, - optimizer=self.worker_optimizer, loss=self.loss, loss_weights=self.loss_weights, metrics=self.metrics) - - return worker - - def train(self, dataframe, shuffle=False): - """Applies model averaging to the model replicas distributed over the specified - number of Spark executors. - - # Arguments - dataframe: dataframe: A Spark Dataframe containing the training data. - shuffle: boolean. Tells to shuffle the dataframe before training. - Warning: this will tell Spark to shuffle all partitions over - the network. It is recommended to shuffle the dataframe before - training and store it. - """ - # Repartition the data in order to fit the number of workers. - num_partitions = dataframe.rdd.getNumPartitions() - # Check if the dataframe needs to be shuffled. - if shuffle: - dataframe = shuffle(dataframe) - # Check if we need to repartition the dataframe. - if num_partitions >= self.num_workers: - dataframe = dataframe.coalesce(self.num_workers) - else: - dataframe = dataframe.repartition(self.num_workers) - # Start the training procedure. - self.record_training_start() - for i in range(0, self.num_epoch): - worker = self.allocate_worker() - # Set the maximum number of mini-batches. - worker.set_max_prefetch(self.max_mini_batches_prefetch) - models = dataframe.rdd.mapPartitionsWithIndex( - worker.train).collect() - self.average_models(models) - # End the training procedure. - self.record_training_end() - - return deserialize_keras_model(self.master_model) - - -class EnsembleTrainer(Trainer): - """Utility trainer which will train ensemble methods in parallel. - - # Arguments - keras_model: model. Keras model to train. - worker_optimizer: string. String representing worker optimizer. - See https://keras.io/optimizers/ - loss: string. String representing the loss. - See: https://keras.io/objectives/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - features_col: string or list of strings. Name(s) of the features column(s). - label_col: string or list of strings. Name(s) of the label column(s). - batch_size: int. Mini-batch size. - num_ensembles: int. Number of ensembles to train. - loss_weights: optional list or dict specifying weights for different losses. - # Note - This will note employ a data-parallell approach for the ensembles. - """ - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], features_col="features", - label_col="label", batch_size=32, num_ensembles=2, loss_weights=None): - super(EnsembleTrainer, self).__init__(keras_model, - loss, worker_optimizer, metrics, loss_weights) - self.features_column = features_col - self.label_column = label_col - self.batch_size = batch_size - self.num_ensembles = num_ensembles - - def allocate_worker(self): - """Allocates the EnsembleWorker for internal use.""" - worker = SequentialWorker(model=self.master_model, features_col=self.features_column, - label_col=self.label_column, batch_size=self.batch_size, num_epoch=self.num_epoch, - optimizer=self.worker_optimizer, loss=self.loss, loss_weights=self.loss_weights, metrics=self.metrics) - - return worker - - def train(self, dataframe, shuffle=False): - """Trains the specified number of ensemble models using the specified dataframe. - - # Arguments - dataframe: dataframe. A Spark Dataframe containing the training data. - shuffle: boolean. Tells to shuffle the dataframe before training. - Warning: this will tell Spark to shuffle all partitions over - the network. It is recommended to shuffle the dataframe before - training and store it. - """ - # Allocate a worker. - worker = self.allocate_worker() - # Set the maximum number of mini-batches. - worker.set_max_prefetch(self.max_mini_batches_prefetch) - # Repartition in order to fit the number of workers. - num_partitions = dataframe.rdd.getNumPartitions() - # Check if the dataframe needs to be shuffled before training. - if shuffle: - dataframe = shuffle(dataframe) - # Check if we need to repartition the dataframe. - if num_partitions >= self.num_workers: - dataframe = dataframe.coalesce(self.num_workers) - else: - dataframe = dataframe.repartition(self.num_workers) - # Start the training procedure. - self.record_training_start() - # Train the models in parallel. - models = dataframe.rdd.mapPartitionsWithIndex(worker.train).collect() - # End the training procedure. - self.record_training_end() - - return models - - -class DistributedTrainer(Trainer): - """Abstract class which describes the properties of a distributed optimizer. - - # Arguments - keras_model: model. Keras model to train. - worker_optimizer: string. String representing worker optimizer. - See https://keras.io/optimizers/ - loss: string. String representing the loss. - See: https://keras.io/objectives/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - features_col: string or list of strings. Name(s) of the features column(s). - label_col: string or list of strings. Name(s) of the label column(s). - num_epoch: int. Number of epochs. - batch_size: int. Mini-batch size. - num_workers: int. Number of distributed workers. - master_port: int. port number for the parameter server. - loss_weights: optional list or dict specifying weights for different losses. - """ - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], num_workers=2, batch_size=32, - features_col="features", label_col="label", num_epoch=1, master_port=5000, loss_weights=None): - super(DistributedTrainer, self).__init__(keras_model, - loss, worker_optimizer, metrics, loss_weights) - self.num_workers = num_workers - self.batch_size = batch_size - self.features_column = features_col - self.label_column = label_col - self.num_epoch = num_epoch - self.parameter_server = None - self.parameter_server_thread = None - self.master_host = determine_host_address() - self.master_port = master_port - self.learning_rate = 1.0 - - def set_minibatch_size(self, size): - """Sets the size of the mini-batch.""" - self.batch_size = size - - def get_minibatch_size(self): - """Returns the size of the mini-batch.""" - return self.batch_size - - def get_features_column(self): - """Returns the name of the features column.""" - return self.features_column - - def get_label_column(self): - """Returns the name of the label column.""" - return self.label_column - - def get_learning_rate(self): - """Returns the learning rate of the worker which can be tuned by - the parameter server, or optimization scheme. - - Note: this learning rate is independent of the learning rate of the optimizer. - """ - return self.learning_rate - - def set_learning_rate(self, learning_rate): - """Sets the learning rate which can be tuned by the parameter server, - or optimization scheme. - - Note: this learning rate is independent of the learning rate of the optimizer. - """ - self.learning_rate = learning_rate - - def set_num_epoch(self, num_epoch): - """Sets the number of epochs.""" - self.num_epoch = num_epoch - - def get_num_epoch(self): - """Returns the number of epochs.""" - return self.num_epoch - - def allocate_worker(self): - """Allocates the worker implementation. - - Implement this method in subclasses. - """ - raise NotImplementedError - - def set_master(self, master): - """Sets the master address of the parameter server.""" - self.master_host = master - - def determine_new_master(self): - """Sets the new master address to the current host.""" - self.master_host = determine_host_address() - - def allocate_parameter_server(self): - """Allocates the parameter server. - - If an other type of parameter server is required, you can overwrite - this implementation. - """ - parameter_server = DeltaParameterServer( - self.master_model, self.master_port) - - return parameter_server - - def set_num_workers(self, num_workers): - """Sets the number of parallel workers to use.""" - self.num_workers = num_workers - - def get_num_workers(self): - """Returns the number of parallel workers.""" - return self.num_workers - - def num_updates(self): - """Returns the number of model updates the parameter server performed.""" - return self.parameter_server.num_updates() - - def service(self): - """Executes the parameter server service.""" - self.parameter_server.start() - self.parameter_server.initialize() - self.parameter_server.run() - - def stop_service(self): - """Stops the parameter server service.""" - self.parameter_server.stop() - self.parameter_server_thread.join() - self.parameter_server_thread = None - - def start_service(self): - """Starts the parameter server service.""" - # Check if a parameter server thread is already allocated. - if not self.parameter_server_thread is None: - # Stop the parameter server service. - self.stop_service() - # Allocate a new parameter service thread. - self.parameter_server_thread = threading.Thread(target=self.service) - self.parameter_server_thread.start() - - def train(self, dataframe, shuffle=False): - """Training procedure of a distributed optimization process. - - # Arguments - dataframe: dataframe. A Spark Dataframe containing the training data. - shuffle: boolean. Tells to shuffle the dataframe before training. - Warning: this will tell Spark to shuffle all partitions over - the network. It is recommended to shuffle the dataframe before - training and store it. - """ - # Check if a parameter server has been allocated. - if self.parameter_server is not None: - # Cleanup the old parameter server. - self.parameter_server.stop() - self.parameter_server = None - # Allocate the parameter server. - self.parameter_server = self.allocate_parameter_server() - # Start the communication service. - self.start_service() - # Allocate a worker. - worker = self.allocate_worker() - # Set the maximum number of mini-batches. - worker.set_max_prefetch(self.max_mini_batches_prefetch) - # Repartition in order to fit the number of workers. - num_partitions = dataframe.rdd.getNumPartitions() - # Check if the dataframe needs to be shuffled before training. - if shuffle: - dataframe = shuffle(dataframe) - # Check if we need to repartition the dataframe. - if num_partitions >= self.num_workers: - dataframe = dataframe.coalesce(self.num_workers) - else: - dataframe = dataframe.repartition(self.num_workers) - # Cache the dataframe. - dataframe.cache() - # Start the training procedure. - self.record_training_start() - # Iterate through the epochs. - self.history = dataframe.rdd.mapPartitionsWithIndex( - worker.train).collect() - # End the training procedure. - self.record_training_end() - # Stop the communication service. - self.stop_service() - - return self.parameter_server.get_model() - - -class AsynchronousDistributedTrainer(DistributedTrainer): - """Abstract class for an asynchronous distributed trainer. - - This trainer also allows us to set a parallelism factor. This parallelism factor allows - us to further parallelize the Spark job. For example, imagine having n machines optimizing - a model in an asynchronous distributed setting. If for some, but likely reason, some machines - are performing worse compared to others. It will cause the complete learning procedure to be - stuck on this one particular machine since every machine will be assigned a single partition. - In order to resolve this, we added a parallelization factor. This factor indicates the ratio - of the number of jobs per machine (executor). For small dataframes, we recommend that this factor - is set to 1. However, this effect really is prominent when the dataframe is large. In this case - we recommend that the ratio is 2 or 3. - - # Arguments - keras_model: model. Keras model to train. - worker_optimizer: string. String representing worker optimizer. - See https://keras.io/optimizers/ - loss: string. String representing the loss. - See: https://keras.io/objectives/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - features_col: string or list of strings. Name(s) of the features column(s). - label_col: string or list of strings. Name(s) of the label column(s). - num_epoch: int. Number of epochs. - batch_size: int. Mini-batch size. - num_workers: int. Number of distributed workers. - master_port: int. port number for the parameter server. - loss_weights: optional list or dict specifying weights for different losses. - - # Note - By default, the parallelization factor is set to 1. - """ - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], num_workers=2, batch_size=32, - features_col="features", label_col="label", num_epoch=1, master_port=5000, loss_weights=None): - super(AsynchronousDistributedTrainer, self).__init__(keras_model, worker_optimizer, loss, metrics, - num_workers, batch_size, features_col, - label_col, num_epoch, master_port, loss_weights) - # Initialize asynchronous methods variables. - self.parallelism_factor = 1 - - def allocate_worker(self): - """Allocates the worker implementation. - - Implement this method in subclasses. - """ - raise NotImplementedError - - def set_parallelism_factor(self, factor): - """Sets the parallelization factor. - - # Arguments - factor: int. The new parallelization factor. - """ - self.parallelism_factor = factor - - def get_parallelism_factor(self): - """Returns the parallelization factor.""" - return self.parallelism_factor - - def train(self, dataframe, shuffle=False): - """Training procedure of an asynchronous distributed optimization process. - - # Arguments - dataframe: dataframe. A Spark Dataframe containing the training data. - shuffle: boolean. Tells to shuffle the dataframe before training. - Warning: this will tell Spark to shuffle all partitions over - the network. It is recommended to shuffle the dataframe before - training and store it. - """ - # Check if a parameter server has been allocated. - if self.parameter_server is not None: - # Cleanup the old parameter server. - self.parameter_server.stop() - self.parameter_server = None - # Allocate the parameter server. - self.parameter_server = self.allocate_parameter_server() - # Start the communication service. - self.start_service() - # Allocate a worker. - worker = self.allocate_worker() - # Set the maximum number of mini-batches. - worker.set_max_prefetch(self.max_mini_batches_prefetch) - # Repartition in order to fit the number of workers. - num_partitions = dataframe.rdd.getNumPartitions() - # Check if the dataframe needs to be shuffled before training. - if shuffle: - dataframe = shuffle(dataframe) - # Indicate the parallelism (number of worker times parallelism factor). - parallelism = self.parallelism_factor * self.num_workers - # Check if we need to repartition the dataframe. - if num_partitions >= parallelism: - dataframe = dataframe.coalesce(parallelism) - else: - dataframe = dataframe.repartition(parallelism) - # Start the training procedure. - self.record_training_start() - # Iterate through the epochs. - self.history = dataframe.rdd.mapPartitionsWithIndex( - worker.train).collect() - # End the training procedure. - self.record_training_end() - # Stop the communication service. - self.stop_service() - - return self.parameter_server.get_model() - - -class AEASGD(AsynchronousDistributedTrainer): - """Asynchronous Elastic Averaging SGD optimizer. - Introduced by Zhang et al. - https://arxiv.org/pdf/1412.6651.pdf - # Arguments - keras_model: model. Keras model to train. - worker_optimizer: string. String representing worker optimizer. - See https://keras.io/optimizers/ - loss: string. String representing the loss. - See: https://keras.io/objectives/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - features_col: string or list of strings. Name(s) of the features column(s). - label_col: string or list of strings. Name(s) of the label column(s). - num_epoch: int. Number of epochs. - batch_size: int. Mini-batch size. - num_workers: int. Number of distributed workers. - communication_window: int. Staleness parameter. - This parameter describes the number of mini-batches that will be - computed before updating the center variable. For EASGD based - algorithms we recommend large communication windows. - learning_rate: float. Learning rate. - rho: float. Elastic "exploration" variable. - Higher values mean that the model is allowed to "explore" its surroundings. - Smaller values are correlated with less exploration. We use the value - recommend by the authors. - master_port: int. port number for the parameter server. - loss_weights: optional list or dict specifying weights for different losses. - """ - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], num_workers=2, batch_size=32, - features_col="features", label_col="label", num_epoch=1, communication_window=32, - rho=5.0, learning_rate=0.1, master_port=5000, loss_weights=None): - super(AEASGD, self).__init__(keras_model, worker_optimizer, loss, metrics, num_workers, - batch_size, features_col, label_col, num_epoch, master_port, loss_weights) - self.communication_window = communication_window - self.rho = rho - self.learning_rate = learning_rate - - def allocate_worker(self): - """Allocates the asynchronous EASGD worker.""" - # Allocate a AEASGD worker. - worker = AEASGDWorker(self.master_model, self.worker_optimizer, self.loss, self.loss_weights, self.metrics, - self.features_column, self.label_column, self.batch_size, self.num_epoch, - self.master_host, self.master_port, self.rho, self.learning_rate, - self.communication_window) - - return worker - - -class DOWNPOUR(AsynchronousDistributedTrainer): - """DOWNPOUR Optimizer. - - Asynchronous data-parallel optimizer introduced by Dean et al. - http://static.googleusercontent.com/media/research.google.com/en/archive/large_deep_networks_nips2012.pdf - - # Arguments - keras_model: model. Keras model to train. - worker_optimizer: string. String representing worker optimizer. - See https://keras.io/optimizers/ - loss: string. String representing the loss. - See: https://keras.io/objectives/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - features_col: string or list of strings. Name(s) of the features column(s). - label_col: string or list of strings. Name(s) of the label column(s). - num_epoch: int. Number of epochs. - batch_size: int. Mini-batch size. - num_workers: int. Number of distributed workers. - communication_window: int. Staleness parameter. - This parameter describes the number of mini-batches that will be - computed before updating the center variable. For DOWNPOUR we - recommend small communication windows. - learning_rate: float. Learning rate. - master_port: int. port number for the parameter server. - loss_weights: optional list or dict specifying weights for different losses. - """ - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], num_workers=2, batch_size=32, - features_col="features", label_col="label", num_epoch=1, communication_window=5, master_port=5000, loss_weights=None): - super(DOWNPOUR, self).__init__(keras_model, worker_optimizer, loss, metrics, num_workers, - batch_size, features_col, label_col, num_epoch, master_port, loss_weights) - self.communication_window = communication_window - - def allocate_worker(self): - """Allocates the DOWNPOUR worker.""" - # Allocate DOWNPOUR worker. - worker = DOWNPOURWorker(self.master_model, self.worker_optimizer, self.loss, self.loss_weights, self.metrics, - self.features_column, self.label_column, self.batch_size, self.num_epoch, - self.master_host, self.master_port, self.communication_window) - - return worker - - -class EAMSGD(AsynchronousDistributedTrainer): - """Asynchronous Elastic Averaging w/ Momentum SGD optimizer. - - Introduced by Zhang et al. - https://arxiv.org/pdf/1412.6651.pdf - - # Arguments - keras_model: model. Keras model to train. - worker_optimizer: string. String representing worker optimizer. - See https://keras.io/optimizers/ - loss: string. String representing the loss. - See: https://keras.io/objectives/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - features_col: string or list of strings. Name(s) of the features column(s). - label_col: string or list of strings. Name(s) of the label column(s). - num_epoch: int. Number of epochs. - batch_size: int. Mini-batch size. - num_workers: int. Number of distributed workers. - communication_window: int. Staleness parameter. - This parameter describes the number of mini-batches that will be - computed before updating the center variable. For EASGD based - algorithms we recommend large communication windows. - learning_rate: float. Learning rate. - rho: float. Elastic "exploration" variable. - Higher values mean that the model is allowed to "explore" its surroundings. - Smaller values are correlated with less exploration. We use the value - recommend by the authors. - momentum: float. Momentum term. - master_port: int. port number for the parameter server. - loss_weights: optional list or dict specifying weights for different losses. - """ - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], num_workers=2, batch_size=32, - features_col="features", label_col="label", num_epoch=1, communication_window=32, - rho=5.0, learning_rate=0.1, momentum=0.9, master_port=5000, loss_weights=None): - super(EAMSGD, self).__init__(keras_model, worker_optimizer, loss, metrics, num_workers, - batch_size, features_col, label_col, num_epoch, master_port, loss_weights) - self.communication_window = communication_window - self.rho = rho - self.learning_rate = learning_rate - self.momentum = momentum - - def allocate_worker(self): - """Allocates the asynchronous EAMSGD worker.""" - # Allocate a EAMSGD REST worker. - worker = EAMSGDWorker(self.master_model, self.worker_optimizer, self.loss, self.loss_weights, self.metrics, - self.features_column, self.label_column, self.batch_size, self.num_epoch, - self.master_host, self.master_port, self.rho, self.learning_rate, - self.momentum, self.communication_window) - - return worker - - -class ADAG(AsynchronousDistributedTrainer): - """Asynchronous Distributed Adaptive Gradient (Stochastic Gradient Descent). - - Introduced by Hermans et al. - - # Arguments: - keras_model: model. Keras model to train. - worker_optimizer: string. String representing worker optimizer. - See: https://keras.io/optimizers/ - loss: string. String representing the loss function. - See: https://keras.io/objectives/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - features_col: string or list of strings. Name(s) of the features column(s). - num_epoch: int. Number of epochs. - batch_size: int. Mini-batch size. - num_workers: int. Number of distributed workers. - communication_window: int. Staleness parameter. - This parameter describes the number of mini-batches that will be - computed before updating the center variable. For DOWNPOUR based - algorithms we recommend large communication windows. - master_port: int. port number for the parameter server. - loss_weights: optional list or dict specifying weights for different losses. - """ - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], num_workers=2, batch_size=32, - features_col="features", label_col="label", num_epoch=1, communication_window=12, master_port=5000, loss_weights=None): - # Initialize the parent object. - super(ADAG, self).__init__(keras_model, worker_optimizer, loss, metrics, num_workers, - batch_size, features_col, label_col, num_epoch, master_port, loss_weights) - # Set algorithm parameters. - self.communication_window = communication_window - - def allocate_worker(self): - """Allocate an Adag worker.""" - worker = ADAGWorker(self.master_model, self.worker_optimizer, self.loss, self.loss_weights, self.metrics, - self.features_column, self.label_column, self.batch_size, self.num_epoch, - self.master_host, self.master_port, self.communication_window) - - return worker - - def allocate_parameter_server(self): - """Allocate the Adag parameter server.""" - parameter_server = ADAGParameterServer( - self.master_model, self.master_port) - - return parameter_server - - -class DynSGD(AsynchronousDistributedTrainer): - """Dynamic SGD, dynamically maintains learning rate for every worker - and incorperates staleness. - - Introduced in SIGMOD 2017 "Heterogenity-aware Parameter Servers" - http://net.pku.edu.cn/~cuibin/Papers/2017SIGMOD.pdf - - # Arguments: - keras_model: model. Keras model to train. - worker_optimizer: string. String representing worker optimizer. - See: https://keras.io/optimizers/ - loss: string. String representing the loss function. - See: https://keras.io/objectives/ - metrics: list of strings representing model evaluation metrics. Default is ["accuracy"]. - See: https://keras.io/metrics/ - features_col: string or list of strings. Name(s) of the features column(s). - num_epoch: int. Number of epochs. - batch_size: int. Mini-batch size. - num_workers: int. Number of distributed workers. - communication_window: int. Staleness parameter. - This parameter describes the number of mini-batches that will be - computed before updating the center variable. For DOWNPOUR based - algorithms we recommend large communication windows. - master_port: int. port number for the parameter server. - loss_weights: optional list or dict specifying weights for different losses. - """ - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], num_workers=2, batch_size=32, - features_col="features", label_col="label", num_epoch=1, communication_window=5, master_port=5000, loss_weights=None): - # Initialize the parent object. - super(DynSGD, self).__init__(keras_model, worker_optimizer, loss, metrics, num_workers, - batch_size, features_col, label_col, num_epoch, master_port, loss_weights) - # Set algorithm parameters. - self.communication_window = communication_window - - def allocate_worker(self): - """Allocate DYNSGD worker.""" - worker = DynSGDWorker(self.master_model, self.worker_optimizer, self.loss, self.loss_weights, self.metrics, - self.features_column, self.label_column, self.batch_size, self.num_epoch, - self.master_host, self.master_port, self.communication_window) - - return worker - - def allocate_parameter_server(self): - """Allocate DYNSGD parameter server.""" - parameter_server = DynSGDParameterServer( - self.master_model, self.master_port) - - return parameter_server - - -class Experimental(AsynchronousDistributedTrainer): - """Experimental optimization scheme for development purposes.""" - - def __init__(self, keras_model, worker_optimizer, loss, metrics=["accuracy"], num_workers=2, batch_size=32, - features_col="features", label_col="label", num_epoch=1, communication_window=5, - learning_rate=1.0, master_port=5000, loss_weights=None): - # Initialize the parent object. - super(Experimental, self).__init__(keras_model, worker_optimizer, loss, metrics, num_workers, - batch_size, features_col, label_col, num_epoch, master_port, loss_weights) - # Set the algorithm parameters. - self.communication_window = communication_window - self.learning_rate = learning_rate - - def allocate_worker(self): - """Allocate experimental worker.""" - worker = ExperimentalWorker(self.master_model, self.worker_optimizer, self.loss, self.loss_weights, self.metrics, - self.features_column, self.label_column, self.batch_size, self.num_epoch, - self.master_host, self.master_port, self.communication_window, - self.num_workers, self.learning_rate) - - return worker - - def allocate_parameter_server(self): - """Allocate experimental parameter server.""" - parameter_server = ExperimentalParameterServer( - self.master_model, self.master_port, self.learning_rate) - - return parameter_server -"""Commonly used Dataframe transformers. - -A transformer will "transform" a Spark dataframe from one form into -the other. For example, mapping the column to an other value, or adding -a column to a dataframe based on a collection of specified values. -""" - -## BEGIN Imports. ############################################################## - -import numpy as np - -from distkeras.utils import new_dataframe_row -from distkeras.utils import to_one_hot_encoded_dense - -from pyspark.mllib.linalg import DenseMatrix -from pyspark.mllib.linalg import DenseVector - -from pyspark.sql.functions import mean -from pyspark.sql.functions import stddev_pop - -## END Imports. ################################################################ - - -class Transformer(object): - """Interface which defines a transformer object.""" - - def transform(self, dataframe): - """Transforms the dataframe into an other dataframe. - - # Returns - The transformed dataframe. - """ - raise NotImplementedError - - -class MinMaxTransformer(Transformer): - """Will transform every feature of an instance between a specified range. - - # Arguments - o_min: float. Original minimum of dataset. - o_max: float. Original maximum of dataset. - n_min: float. New minimum of dataset. - n_max: float. New maximum of dataset. - input_col: string. Name of input column. - output_col: string. Name of output column. - is_vector. boolean. Indicates if the data element is a vector or - a singular value. - - # Summary - New range: [o_min; o_max] - Old range: [n_min; n_max] - """ - - def __init__(self, o_min, o_max, n_min, n_max, input_col, output_col, is_vector=True): - self.o_min = float(o_min) - self.o_max = float(o_max) - self.n_min = float(n_min) - self.n_max = float(n_max) - self.scale = (self.n_max - self.n_min) / (self.o_max - self.o_min) - self.input_column = input_col - self.output_column = output_col - self.is_vector = is_vector - - def _transform(self, row): - """Rescale every instance like this: - - x' = \frac{x - min}{max - min} - """ - if self.is_vector: - vector = row[self.input_column].toArray() - vector = self.scale * (vector - self.o_max) + self.n_max - new_value = DenseVector(vector) - else: - value = row[self.input_column] - new_value = self.scale * (value - self.o_max) + self.n_max - # Construct a new row with the normalized vector. - new_row = new_dataframe_row(row, self.output_column, new_value) - - return new_row - - def transform(self, dataframe): - """Applies the min-max transformation to every row in the dataframe. - - # Arguments - dataframe: dataframe. Spark Dataframe. - """ - return dataframe.rdd.map(self._transform).toDF() - - -class BinaryLabelTransformer(Transformer): - """Transformers the specified a column to a binary label, i.e., [0, 1] give - a specific label name. Given the specified label, this transformer will generate - [1,0], in the other case [0,1]. - - # Arguments: - input_column: string. Column name of the label identifier. - output_column: string. Name of the new label which contains the binary label. - label: string. Name of the label which needs to serve as 1. - """ - - def __init__(self, input_column, output_column, label): - self.input_column = input_column - self.output_column = output_column - self.label = label - - def _transform(self, row): - """Appends the desired binary label column.""" - value = row[self.input_column] - vector = np.zeros(2) - # Check if the name matches. - if value == self.label: - vector[0] = 1.0 - else: - vector[1] = 1.0 - # Convert to a Spark DenseVector - vector = DenseVector(vector) - - return new_dataframe_row(row, self.output_column, vector) - - def transform(self, dataframe): - """Applies the binary label transformation to the applied dataframe. - - # Arguments - dataframe: dataframe. Spark Dataframe. - """ - return dataframe.rdd.map(self._transform).toDF() - - -class StandardTransformer(Transformer): - """Will transform the specified columns to unit standard deviation (if specified), - and centers the data to mean 0 (if specified). - - # Arguments - columns: list. List of columns. - suffix: string. Suffix name of the column after processing. - # Note - We assume equal probability of the rows. - """ - - def __init__(self, columns, suffix="_normalized"): - self.columns = columns - self.column_suffix = suffix - self.current_column = None - self.means = {} - self.stddevs = {} - - def clean_mean_keys(self, means): - """Cleans the keys of the specified dictionary (mean).""" - new_means = {} - - for k in means: - new_means[k[4:-1]] = means[k] - - return new_means - - def clean_stddev_keys(self, stddevs): - """Cleans the keys of the specified dictionary (stddev).""" - new_stddevs = {} - - for k in stddevs: - new_stddevs[k[11:-5]] = stddevs[k] - - return new_stddevs - - def _transform(self, row): - """Take the column, and normalize it with the computed means and std devs.""" - mean = self.means[self.current_column] - stddev = self.stddevs[self.current_column] - x = row[self.current_column] - x_normalized = (x - mean) / stddev - output_column = self.current_column + self.column_suffix - new_row = new_dataframe_row(row, output_column, x_normalized) - - return new_row - - def transform(self, dataframe): - """Applies standardization to the specified columns. - - # Arguments - dataframe: dataframe. Spark Dataframe. - """ - # Compute the means of the specified columns. - means = [mean(x) for x in self.columns] - means = dataframe.select(means).collect()[0].asDict() - self.means = self.clean_mean_keys(means) - # Compute the standard deviation of the specified columns. - stddevs = [stddev_pop(x) for x in self.columns] - stddevs = dataframe.select(stddevs).collect()[0].asDict() - self.stddevs = self.clean_stddev_keys(stddevs) - # For every feature, add a new column to the dataframe. - for column in self.columns: - self.current_column = column - dataframe = dataframe.rdd.map(self._transform).toDF() - - return dataframe - - -class DenseTransformer(Transformer): - """Transformes sparse vectors into dense vectors. - - # Arguments - input_col: string. Name of the input column of the sparse vector. - output_col: string. Name of the output column. - """ - - def __init__(self, input_col, output_col): - self.input_column = input_col - self.output_column = output_col - - def _transform(self, row): - """Transforms the sparse vector to a dense vector while putting it in a new column.""" - sparse_vector = row[self.input_column] - dense_vector = DenseVector(sparse_vector.toArray()) - new_row = new_dataframe_row(row, self.output_column, dense_vector) - - return new_row - - def transform(self, dataframe): - """Transforms every sparse vector in the input column to a dense vector. - - # Arguments - dataframe: dataframe. Spark Dataframe. - # Returns - A transformed Spark Dataframe. - """ - return dataframe.rdd.map(self._transform).toDF() - - -class ReshapeTransformer(Transformer): - """Transforms vectors into other dense shapes. - - # Note: - Only use this transformer in the last stage of the processing pipeline. - Since the arbitrary vector shapes will be directly passed on to the models. - - # Arguments: - input_col: string. Name of the input column containing the vector. - output_col: string. Name of the output column. - shape: tuple. Shape of the matrix. - """ - - def __init__(self, input_col, output_col, shape): - self.input_column = input_col - self.output_column = output_col - self.shape = shape - - def _transform(self, row): - """Transforms the vector to a dense matrix while putting it in a new column.""" - vector = row[self.input_column] - vector = np.asarray(vector) - reshaped = vector.reshape(self.shape).tolist() - new_row = new_dataframe_row(row, self.output_column, reshaped) - - return new_row - - def transform(self, dataframe): - """Transforms every vector in the input column to a dense vector. - - # Arguments - dataframe: dataframe. Spark Dataframe. - # Returns - A transformed Spark Dataframe. - """ - return dataframe.rdd.map(self._transform).toDF() - - -class OneHotTransformer(Transformer): - """Transformer which transforms an integer index into a vector using one-hot-encoding. - - # Arguments - output_dim: int. Dimension of output vector. - input_col: string. Name of input column. - output_col: string. Name of output column. - """ - - def __init__(self, output_dim, input_col, output_col): - self.input_column = input_col - self.output_column = output_col - self.output_dimensionality = output_dim - - def _transform(self, row): - """Transforms every individual row. - - Only for internal use. - """ - label = row[self.input_column] - vector = to_one_hot_encoded_dense(label, self.output_dimensionality) - new_row = new_dataframe_row(row, self.output_column, vector.tolist()) - - return new_row - - def transform(self, dataframe): - """Applies One-Hot encoding to every row in the dataframe. - - # Arguments - dataframe: dataframe. A Spark Dataframe. - # Returns - A Spark Dataframe with one-hot encoded features. - """ - return dataframe.rdd.map(self._transform).toDF() - - -class LabelIndexTransformer(Transformer): - """Transformer which will transform a prediction vector into an integer label. - - # Arguments - output_dim: int. Dimension of output vector. - input_col: string. Name of the input column. - output_col: string. Name of the output column. - default_index: int. Default "answer". - activation_threshold: float. Threshold of immediate activation. - """ - - def __init__(self, output_dim, input_col="prediction", output_col="prediction_index", - default_index=0, activation_threshold=0.55): - self.input_column = input_col - self.output_column = output_col - self.output_dimensionality = output_dim - self.activation_threshold = activation_threshold - self.default_index = default_index - - def get_index(self, vector): - """Returns the index with the highest value or with activation threshold.""" - max = 0.0 - max_index = self.default_index - for index in range(0, self.output_dimensionality): - if vector[index] >= self.activation_threshold: - return index - if vector[index] > max: - max = vector[index] - max_index = index - - return max_index - - def _transform(self, row): - """Transforms every row by adding a "predicted index" column to the dataframe. """ - prediction = row[self.input_column] - index = float(self.get_index(prediction)) - new_row = new_dataframe_row(row, self.output_column, index) - - return new_row - - def transform(self, dataframe): - """Transforms the dataframe by adding a predicted index. - - # Arguments - dataframe: dataframe. A Spark Dataframe. - # Returns - A Spark Dataframe with a "predicted" index. - """ - return dataframe.rdd.map(self._transform).toDF() -"""Utility functions used throughout Distributed Keras.""" - -## BEGIN Import. ############################################################### - -from keras import backend as K - -from keras.models import model_from_json - -from keras import backend as K - -from pyspark.mllib.linalg import DenseVector -from pyspark.sql import Row -from pyspark.sql.functions import rand - -import pickle - -import json - -import numpy as np - -import os - -import pwd - -## END Import. ################################################################# - - -def get_os_username(): - """Returns the username of user on the operating system. - - From: http://stackoverflow.com/questions/842059/is-there-a-portable-way-to-get-the-current-username-in-python - """ - return pwd.getpwuid(os.getuid())[0] - - -def set_keras_base_directory(base_dir='/tmp/' + get_os_username()): - """Sets the base directory of Keras.""" - K._keras_base_dir = base_dir - - -def to_one_hot_encoded_dense(value, n_dim=2): - """Converts the value to a one-hot encoded vector. - - # Arguments - value: float. Value of the single "hot" value. - n_dim: int. Dimension of the output vector. - """ - value = int(value) - vector = np.zeros(n_dim) - vector[value] = 1.0 - - return vector - - -def new_dataframe_row(old_row, column_name, column_value): - """Constructs a new Spark Row based on the old row, and a new column name and value.""" - row = Row(*(old_row.__fields__ + [column_name]) - )(*(old_row + (column_value, ))) - - return row - - -def json_to_dataframe_row(string): - """Converts a JSON String to a Spark Dataframe row.""" - dictionary = json.loads(string) - row = Row(**dictionary) - - return row - - -def pickle_object(o): - """Pickles the specified model and its weights.""" - return pickle.dumps(o, -1) - - -def unpickle_object(string): - """Unpickles the specified string into a model.""" - return pickle.loads(string) - - -def serialize_keras_model(model): - """Serializes the specified Keras model into a dictionary.""" - dictionary = {} - dictionary['model'] = model.to_json() - dictionary['weights'] = model.get_weights() - - return dictionary - - -def history_executors_average(history): - """Returns the averaged training metrics for all the executors.""" - max_iteration = max(history, key=lambda x: x['iteration'])['iteration'] - max_executor = max(history, key=lambda x: x['worker_id'])['worker_id'] - histories = [] - averaged_history = [] - # Fetch the histories of the individual executors. - for i in range(0, max_executor): - histories.append(history_executor(history, i)) - # Construct the averaged history. - for i in range(0, max_iteration): - num_executors = 0 - sum = np.zeros(2) - for j in range(0, max_executor): - if len(histories[j]) - 1 >= i: - num_executors += 1 - sum += histories[j][i]['history'] - # Average the history. - sum /= num_executors - averaged_history.append(sum) - - return averaged_history - - -def history_executor(history, id): - """Returns the history of a specific executor.""" - executor_history = [h for h in history if h['worker_id'] == id] - executor_history.sort(key=lambda x: x['iteration']) - - return executor_history - - -def deserialize_keras_model(dictionary): - """Deserialized the Keras model using the specified dictionary.""" - architecture = dictionary['model'] - weights = dictionary['weights'] - model = model_from_json(architecture) - model.set_weights(weights) - - return model - - -def uniform_weights(model, constraints=[-0.5, 0.5]): - """Initializes the parameters of the specified Keras model with uniform - weights between the specified ranges. - - # Arguments - model: Keras model. - constraints: array. An array with two elements which defines the range - of the uniform initalization. - """ - # We assume the following: Keras will return a list of weight matrices. - # All layers, even the activiation layers, will be randomly initialized. - weights = model.get_weights() - for layer in weights: - shape = layer.shape - if len(shape) > 1: - # Fill the matrix with random numbers. - n_rows = shape[0] - n_columns = shape[1] - for i in range(0, n_rows): - for j in range(0, n_columns): - layer[i][j] = np.random.uniform( - low=constraints[0], high=constraints[1]) - else: - # Fill the vector with random numbers. - n_elements = shape[0] - for i in range(0, n_elements): - layer[i] = np.random.uniform( - low=constraints[0], high=constraints[1]) - # Set the new weights in the model. - model.set_weights(weights) - - -def shuffle(dataset): - """Shuffles the rows in the specified Spark Dataframe. - - # Arguments - dataset: dataframe. A Spark Dataframe. - """ - dataset = dataset.orderBy(rand()) - dataset.cache() - - return dataset - - -def precache(dataset, num_workers): - """Precaches the specified dataset. - - Make sure the specified dataframe has the desired partitioning scheme. - - # Arguments - dataset: dataframe. A Spark Dataframe. - num_workers: int. Number of workers you are going to use. - """ - dataset = dataset.repartition(num_workers) - dataset.cache() - dataset.count() - - return dataset -"""Workers module. - -This module contains all worker specific implementations for different optimization -algorithms. -""" - -## BEGIN Imports. ############################################################## - -import time -import socket -import random -from distkeras.networking import connect -from distkeras.networking import recv_data -from distkeras.networking import send_data - -from distkeras.utils import deserialize_keras_model -from distkeras.utils import serialize_keras_model -from distkeras.utils import set_keras_base_directory -from distkeras.utils import shuffle -from distkeras.utils import uniform_weights - -from keras.optimizers import Optimizer, serialize, deserialize -import keras.backend as K - -from itertools import tee - -from multiprocessing import Pool - -import numpy as np - -import threading - -import tensorflow as tf - -import sys - -# "queue" module in python 3 is named "Queue" in python 2 -use_python3 = sys.version_info[0] == 3 -if use_python3: - import queue -else: - import Queue as queue - - -## END Imports. ################################################################ - - -class Worker(object): - """Abstract class of a worker. - - This class provides basic functionality and properties all workers share. - """ - - def __init__(self, model, optimizer, loss, loss_weights, metrics=["accuracy"], features_col="features", label_col="label", - batch_size=32, num_epoch=1, learning_rate=1.0): - assert isinstance(optimizer, (str, Optimizer) - ), "'optimizer' must be a string or a Keras Optimizer instance" - assert isinstance(features_col, (str, list) - ), "'features_col' must be a string or a list of strings" - assert isinstance(label_col, (str, list) - ), "'label_col' must be a string or a list of strings" - self.model = model - self.optimizer = {'class_name': optimizer, 'config': {}} if isinstance( - optimizer, str) else serialize(optimizer) - self.loss = loss - self.loss_weights = loss_weights - self.metrics = metrics - self.features_column = [features_col] if isinstance( - features_col, str) else features_col - self.label_column = [label_col] if isinstance( - label_col, str) else label_col - self.batch_size = batch_size - self.num_epoch = num_epoch - self.max_mini_batches = 100 - self.prefetching_thread = None - self.mini_batches = None - self.is_prefetching = True - self.worker_id = -1 - self.learning_rate = learning_rate - self.num_inputs = len(self.features_column) - self.num_outputs = len(self.label_column) - self.current_epoch = 0 - - def set_max_prefetch(self, max_mini_batches): - """Sets the maximum number of mini-batches that can be prefetched.""" - self.max_mini_batches = max_mini_batches - - def set_learning_rate(self, learning_rate): - """Sets the learning rate of the worker.""" - self.learning_rate = learning_rate - - def get_learning_rate(self): - """Returns the learning rate of the worker.""" - return self.learning_rate - - def set_worker_id(self, worker_id): - """Sets the worker id. - - # Arguments - worker_id: int. Worker identifier. - """ - self.worker_id = worker_id - - def get_worker_id(self): - """Returns the worker id.""" - return self.worker_id - - def prepare_model(self): - """Prepares the model for training.""" - # Set the Keras directory. - set_keras_base_directory() - if K.backend() == 'tensorflow': - # set GPU option allow_growth to False for GPU-enabled tensorflow - config = tf.ConfigProto() - config.gpu_options.allow_growth = False - sess = tf.Session(config=config) - K.set_session(sess) - - # Deserialize the Keras model. - self.model = deserialize_keras_model(self.model) - self.optimizer = deserialize(self.optimizer) - # Compile the model with the specified loss and optimizer. - self.model.compile(loss=self.loss, loss_weights=self.loss_weights, - optimizer=self.optimizer, metrics=self.metrics) - - def get_next_minibatch(self): - """Returns the next mini-batch.""" - return self.mini_batches.get(timeout=10) - - def start_prefetching_thread(self, iterator): - """Starts the data prefetching thread.""" - self.mini_batches = queue.Queue() - self.iterator = iterator - self.prefetching_thread = threading.Thread(target=self.prefetching) - self.prefetching_thread.start() - - def prefetching(self): - partition_iterators_all_epochs = tee(self.iterator, self.num_epoch) - for iter_one_epoch in partition_iterators_all_epochs: - self.current_epoch += 1 - self.is_prefetching = True - try: - while self.is_prefetching: - if self.mini_batches.qsize() < self.max_mini_batches: - batch = [next(iter_one_epoch) - for _ in range(self.batch_size)] - batch_iterator_copies = tee( - batch, self.num_inputs + self.num_outputs) - feature_iterators = batch_iterator_copies[:self.num_inputs] - label_iterators = batch_iterator_copies[self.num_inputs:] - X = [np.asarray([x[self.features_column[i]] for x in iterator]) - for i, iterator in enumerate(feature_iterators)] - Y = [np.asarray([x[self.label_column[i]] for x in iterator]) - for i, iterator in enumerate(label_iterators)] - self.mini_batches.put([X, Y]) - except Exception as e: - print(e) - self.is_prefetching = False - - def optimize(self): - """Optimization procedure of a worker.""" - raise NotImplementedError - - def train(self, worker_id, iterator): - """Training procedure for the worker node. - - # Arguments - worker_id: int. Partition index provided by Spark. Can be used as a worker_id. - iterator: iterator. Data iterator. - """ - # Prepare the optimization procedure. - self.start_prefetching_thread(iterator) - self.set_worker_id(worker_id) - self.prepare_model() - # Start the optimization procedure. - try: - self.optimize() - except Exception as e: - # Stop the prefetching process. - self.is_prefetching = False - print(e) - # Wait for the prefetching thread to stop. - self.prefetching_thread.join() - - return iter([serialize_keras_model(self.model)]) - - -class SequentialWorker(Worker): - """Implementation for sequential gradient updates on a single worker. - - Will train a model on a single worker node. - """ - - def __init__(self, model, optimizer, loss, loss_weights, metrics=["accuracy"], - features_col="features", label_col="label", batch_size=32, num_epoch=1): - # Initialize the parent class. - super(SequentialWorker, self).__init__(model, optimizer, loss, loss_weights, metrics, features_col, - label_col, batch_size, num_epoch) - - def optimize(self): - """Training procedure with sequential gradient updates. - - # Returns - Trained serialized Keras model. - """ - while True: - X, Y = self.get_next_minibatch() - h = self.model.train_on_batch(X, Y) - self.add_history(h) - - -class NetworkWorker(Worker): - """Abstract class of a worker who shares the variables using the network.""" - - def __init__(self, model, optimizer, loss, loss_weights, metrics=["accuracy"], features_col="features", label_col="label", - batch_size=32, num_epoch=1, master_host="localhost", master_port=5000, learning_rate=1.0): - super(NetworkWorker, self).__init__(model, optimizer, loss, loss_weights, metrics, features_col, - label_col, batch_size, num_epoch, learning_rate) - self.master_host = master_host - self.master_port = master_port - self.socket = None - self.center_variable = None - self.disable_nagle = True - self.training_history = [] - self.worker_id = 0 - - def connect(self): - """Connect with the remote parameter server.""" - self.socket = connect( - self.master_host, self.master_port, self.disable_nagle) - - def pull(self): - """Requests the center variable from the parameter server.""" - # Request a pull from the parameter server. - self.socket.sendall(b'p') - # Fetch the center variable from the parameter server. - self.center_variable = np.asarray(recv_data(self.socket)) - - def commit(self, residual): - """Sends the gradient residual to the parameter server.""" - # Prepare the datastructure. - data = {} - data['worker_id'] = self.get_worker_id() - data['delta'] = residual - # Request a commit from the parameter server. - self.socket.sendall(b'c') - # Send the data to the paramter server. - send_data(self.socket, data) - - def set_tcp_no_delay(self, flag): - """Disables or enables Nagle's algorithm. - (True -> TCP_NODELAY = 1) - (False -> TCP_NODELAY = 0) - - # Arguments: - flag: boolean. Indicates if Nagle's algorithm should be disabled. - """ - self.disable_nagle = flag - - def tcp_no_delay(self): - """Returns the value TCP_NODELAY of the flag (Nagle's algorithm). - - # Returns - True, if Nagle's algorithm is disabled. False otherwise. - """ - return self.disable_nagle - - def get_master_host(self): - """Returns the host address of the master parameter server.""" - return self.master_host - - def get_master_port(self): - """Returns the port of the master parameter server.""" - return self.master_port - - def add_history(self, h): - """Appends the specified history data.""" - d = {} - d['history'] = h - d['worker_id'] = self.worker_id - d['iteration'] = self.iteration - d['timestamp'] = time.time() - self.training_history.append(d) - - def optimize(self): - """Optimization procedure of a network worker.""" - raise NotImplementedError - - def train(self, worker_id, iterator): - """Training procedure of a networked worker with a parameter server.""" - self.start_prefetching_thread(iterator) - self.set_worker_id(worker_id) - self.prepare_model() - self.connect() - self.pull() - self.model.set_weights(self.center_variable) - try: - self.optimize() - except Exception as e: - # Stop the prefetching process. - self.is_prefetching = False - print(e) - self.socket.close() - self.prefetching_thread.join(timeout=1) - - return iter(self.training_history) - - -class ADAGWorker(NetworkWorker): - """Implements the training procedure for ADAG. - - Introduced by Hermans et al. - """ - - def __init__(self, model, optimizer, loss, loss_weights, metrics=["accuracy"], features_col="features", label_col="label", - batch_size=32, num_epoch=1, master_host="localhost", master_port=5000, communication_window=5): - # Initialize the parent object. - super(ADAGWorker, self).__init__(model, optimizer, loss, loss_weights, metrics, features_col, label_col, - batch_size, num_epoch, master_host, master_port) - # Initialize ADAG parameters. - self.communication_window = communication_window - self.iteration = 1 - - def commit(self, residual): - """Sends the gradient residual to the parameter server.""" - # Prepare the datastructure. - data = {} - data['worker_id'] = self.get_worker_id() - data['residual'] = residual - # Request a commit from the parameter server. - self.socket.sendall(b'c') - # Send the data to the paramter server. - send_data(self.socket, data) - - def optimize(self): - """Optimization procedure of ADAG.""" - W1 = np.asarray(self.model.get_weights()) - while True: - X, Y = self.get_next_minibatch() - h = self.model.train_on_batch(X, Y) - self.add_history(h) - if self.iteration % self.communication_window == 0: - W2 = np.asarray(self.model.get_weights()) - delta = W2 - W1 - delta /= self.communication_window - self.commit(delta) - self.pull() - self.model.set_weights(self.center_variable) - W1 = self.center_variable - self.iteration += 1 - - -class DOWNPOURWorker(NetworkWorker): - """Implements the training procedure for the distributed DOWNPOUR optimizer. - - Introduced by Dean et al. - http://static.googleusercontent.com/media/research.google.com/en//archive/large_deep_networks_nips2012.pdf - """ - - def __init__(self, model, optimizer, loss, loss_weights, metrics=["accuracy"], features_col="features", label_col="label", - batch_size=32, num_epoch=1, master_host="localhost", master_port=5000, communication_window=3): - # Initialize the parent object. - super(DOWNPOURWorker, self).__init__(model, optimizer, loss, loss_weights, metrics, features_col, label_col, - batch_size, num_epoch, master_host, master_port) - self.communication_window = communication_window - self.iteration = 1 - - def optimize(self): - """Specific optimization procedure for DOWNPOUR.""" - W1 = np.asarray(self.model.get_weights()) - while True: - X, Y = self.get_next_minibatch() - if self.iteration % self.communication_window == 0: - W2 = np.asarray(self.model.get_weights()) - delta = W2 - W1 - self.commit(delta) - self.pull() - self.model.set_weights(self.center_variable) - W1 = self.center_variable - h = self.model.train_on_batch(X, Y) - self.add_history(h) - self.iteration += 1 - - -class AEASGDWorker(NetworkWorker): - """Implementation of asynchronous EASGD worker. - - Introduced by Zhang et al. - https://arxiv.org/pdf/1412.6651.pdf - """ - - def __init__(self, model, optimizer, loss, loss_weights, metrics=['accuracy'], features_col="features", label_col="label", - batch_size=32, num_epoch=1, master_host="localhost", master_port=5000, rho=5.0, - learning_rate=0.01, communication_window=32): - # Initialize the parent object. - super(AEASGDWorker, self).__init__(model, optimizer, loss, loss_weights, metrics, features_col, label_col, - batch_size, num_epoch, master_host, master_port) - # Initialize AEASGD specific variables. - self.rho = rho - self.learning_rate = learning_rate - self.communication_window = communication_window - self.alpha = self.rho * self.learning_rate - self.iteration = 1 - - def optimize(self): - """Specific training procedure for AEASGD.""" - while True: - X, Y = self.get_next_minibatch() - if self.iteration % self.communication_window == 0: - self.pull() - W = np.asarray(self.model.get_weights()) - E = self.alpha * (W - self.center_variable) - W = W - E - self.model.set_weights(W) - self.commit(E) - h = self.model.train_on_batch(X, Y) - self.add_history(h) - self.iteration += 1 - - -class EAMSGDWorker(NetworkWorker): - """Worker implementation of Asynchronous EA Momentum SGD. - - Introduced by Zhang et al. - https://arxiv.org/pdf/1412.6651.pdf - """ - - def __init__(self, model, optimizer, loss, loss_weights, metrics=['accuracy'], features_col="features", label_col="label", - batch_size=32, num_epoch=1, master_host="localhost", master_port=5000, rho=5.0, - learning_rate=0.01, momentum=0.9, communication_window=32): - # Initialize the parent object. - super(EAMSGDWorker, self).__init__(model, optimizer, loss, loss_weights, metrics, features_col, label_col, - batch_size, num_epoch, master_host, master_port) - # Initialize EAMSGD specific variables. - self.rho = rho - self.learning_rate = learning_rate - self.momentum = momentum - self.communication_window = communication_window - self.alpha = self.learning_rate * self.rho - self.iteration = 1 - - def optimize(self): - """Specific training procedure of asynchronous EAMSGD.""" - r = np.asarray(self.model.get_weights()) - r.fill(0.0) - while True: - X, Y = self.get_next_minibatch() - if self.iteration % self.communication_window == 0: - self.pull() - W = np.asarray(self.model.get_weights()) - E = self.alpha * (W - self.center_variable) - W = W - E - self.model.set_weights(W) - self.commit(E) - r_t = self.momentum * r - W_copy = np.asarray(self.model.get_weights()) - W = np.asarray(self.model.get_weights()) - W += r_t - self.model.set_weights(W) - h = self.model.train_on_batch(X, Y) - self.add_history(h) - gradient = np.asarray(self.model.get_weights()) - W - r = r_t - self.learning_rate * gradient - W_copy -= r - self.model.set_weights(W_copy) - self.iteration += 1 - - -class DynSGDWorker(NetworkWorker): - """Implements the training procedure for DynSGD.""" - - def __init__(self, model, optimizer, loss, loss_weights, metrics=["accuracy"], features_col="features", label_col="label", - batch_size=32, num_epoch=1, master_host="localhost", master_port=5000, communication_window=5): - # Initialize the parent object. - super(DynSGDWorker, self).__init__(model, optimizer, loss, loss_weights, metrics, features_col, label_col, - batch_size, num_epoch, master_host, master_port) - # Initialize DynSGD parameters. - self.communication_window = communication_window - self.iteration = 1 - self.last_update = 0 - - def pull(self): - """Requests the center variable and last update from the parameter server.""" - # Request a pull from the parameter server. - self.socket.sendall(b'p') - # Fetch the dictionary from the parameter server. - data = recv_data(self.socket) - self.center_variable = np.asarray(data['model']) - self.last_update = data['update'] - - def commit(self, residual): - """Sends the gradient residual to the parameter server.""" - # Prepare the datastructure. - data = {} - data['worker_id'] = self.get_worker_id() - data['residual'] = residual - data['last_update'] = self.last_update - # Request a commit from the parameter server. - self.socket.sendall(b'c') - # Send the data to the paramter server. - send_data(self.socket, data) - - def optimize(self): - """Optimization procedure of DynSGD.""" - W1 = np.asarray(self.model.get_weights()) - while True: - X, Y = self.get_next_minibatch() - h = self.model.train_on_batch(X, Y) - self.add_history(h) - if self.iteration % self.communication_window == 0: - W2 = np.asarray(self.model.get_weights()) - delta = W2 - W1 - self.commit(delta) - self.pull() - self.model.set_weights(self.center_variable) - W1 = self.center_variable - self.iteration += 1 - - -class ExperimentalWorker(NetworkWorker): - """Implements the training procedure for ADAG. - - Introduced by Hermans et al. - """ - - def __init__(self, model, optimizer, loss, loss_weights, metrics=["accuracy"], features_col="features", label_col="label", - batch_size=32, num_epoch=1, master_host="localhost", master_port=5000, communication_window=5, - num_workers=2, learning_rate=1.0): - # Initialize the parent object. - super(ExperimentalWorker, self).__init__(model, optimizer, loss, loss_weights, metrics, features_col, label_col, - batch_size, num_epoch, master_host, master_port, learning_rate) - # Initialize ADAG parameters. - self.communication_window = communication_window - self.num_workers = num_workers - self.current_num_workers = self.num_workers - self.inverse_learning_rate = 1 / self.learning_rate - self.iteration = 1 - - def commit(self, residual): - """Sends the gradient residual to the parameter server.""" - # Prepare the datastructure. - data = {} - data['worker_id'] = self.get_worker_id() - data['residual'] = residual - data['stale_center_variable'] = self.center_variable - # Request a commit from the parameter server. - self.socket.sendall(b'c') - # Send the data to the paramter server. - send_data(self.socket, data) - - def pull(self): - """Requests the center variable from the parameter server.""" - # Request a pull from the parameter server. - self.socket.sendall(b'p') - # Fetch the center variable from the parameter server. - self.center_variable = np.asarray(recv_data(self.socket)) - - def optimize(self): - """Optimization procedure of ADAG.""" - W1 = np.asarray(self.model.get_weights()) - while True: - X, Y = self.get_next_minibatch() - h = self.model.train_on_batch(X, Y) - self.add_history(h) - if self.iteration % self.communication_window == 0: - W2 = np.asarray(self.model.get_weights()) - delta = W2 - W1 - delta /= self.communication_window - self.commit(delta) - self.pull() - self.model.set_weights(self.center_variable) - W1 = self.center_variable - self.iteration += 1 -""" -This example will be used as a Kafka producer to generate dummy -data for our Spark Streaming example. -""" - -## BEGIN Imports. ############################################################## - -from kafka import * - -import sys - -import pandas - -import time - -import json - -## END Imports. ################################################################ - - -def usage(): - print("Distributed Keras Example: Kafka Producer") - print("") - print("Usage:") - print("python kafka_producer.py [bootstrap_server]") - exit(0) - - -def allocate_producer(bootstrap_server): - producer = KafkaProducer(bootstrap_servers=[bootstrap_server]) - - return producer - - -def read_data(): - path = 'data/atlas_higgs.csv' - data = [] - # Use Pandas to infer the types. - data = pandas.read_csv(path) - # Remove the unneeded columns. - del data['Label'] - del data['Weight'] - # Convert the data to a list of dictionaries. - data = data.transpose().to_dict().values() - - return data - - -def produce(producer, topic, data): - for row in data: - producer.send(topic, json.dumps(row)) - - -def main(): - # Check if the required number of arguments has been specified. - if len(sys.argv) != 2: - usage() - # Fetch the bootstrap server from the arguments. - bootstrap_server = sys.argv[1] - # Allocate the producer. - producer = allocate_producer(bootstrap_server) - # Read the data from the CSV file. - data = read_data() - iteration = 1 - # Transmit the data in a continous loop while waiting for 5 seconds after every iteration. - while True: - print("Iteration " + str(iteration) + ".") - produce(producer, 'Machine_Learning', data) - iteration += 1 - time.sleep(5) - - -if __name__ == "__main__": - main() -"""MNIST classification using Distributed Keras. - -ATTENTION: -Before running this example, make sure you put the MNIST dataset -on HDFS. -1. unzip mnist.zip -2. hdfs dfs -mkdir data -3. hdfs dfs -copyFromLocal mnist_train.csv data/mnist_train.csv -4. hdfs dfs -copyFromLocal mnist_test.csv data/mnist_test.csv -""" - -from distkeras.evaluators import * -from distkeras.predictors import * -from distkeras.trainers import * -from distkeras.transformers import * -from distkeras.utils import * - -from keras.layers.convolutional import * -from keras.layers.core import * -from keras.models import Sequential -from keras.optimizers import * - -from pyspark import SparkConf -from pyspark import SparkContext - -from pyspark.ml.evaluation import MulticlassClassificationEvaluator -from pyspark.ml.feature import OneHotEncoder -from pyspark.ml.feature import StandardScaler -from pyspark.ml.feature import StringIndexer -from pyspark.ml.feature import VectorAssembler - -import pwd -import os - - -# First, setup the Spark variables. You can modify them to your needs. -application_name = "Distributed Keras MNIST Notebook" -using_spark_2 = False -local = False -path_train = "data/mnist_train.csv" -path_test = "data/mnist_test.csv" -if local: - # Tell master to use local resources. - master = "local[*]" - num_processes = 3 - num_executors = 1 -else: - # Tell master to use YARN. - master = "yarn-client" - num_executors = 20 - num_processes = 1 - -# This variable is derived from the number of cores and executors, and will be used to assign the number of model trainers. -num_workers = num_executors * num_processes - -print("Number of desired executors: " + `num_executors`) -print("Number of desired processes / executor: " + `num_processes`) -print("Total number of workers: " + `num_workers`) - -# Use the DataBricks CSV reader, this has some nice functionality regarding invalid values. -os.environ['PYSPARK_SUBMIT_ARGS'] = '--packages com.databricks:spark-csv_2.10:1.4.0 pyspark-shell' - -conf = SparkConf() -conf.set("spark.app.name", application_name) -conf.set("spark.master", master) -conf.set("spark.executor.cores", `num_processes`) -conf.set("spark.executor.instances", `num_executors`) -conf.set("spark.executor.memory", "4g") -conf.set("spark.locality.wait", "0") -conf.set("spark.serializer", "org.apache.spark.serializer.KryoSerializer") -conf.set("spark.local.dir", "/tmp/" + get_os_username() + "/dist-keras") - -# Check if the user is running Spark 2.0 + -if using_spark_2: - sc = SparkSession.builder.config(conf=conf) \ - .appName(application_name) \ - .getOrCreate() -else: - # Create the Spark context. - sc = SparkContext(conf=conf) - # Add the missing imports - from pyspark import SQLContext - sqlContext = SQLContext(sc) - -# Check if we are using Spark 2.0 -if using_spark_2: - reader = sc -else: - reader = sqlContext -# Read the training dataset. -raw_dataset_train = reader.read.format('com.databricks.spark.csv') \ - .options(header='true', inferSchema='true') \ - .load(path_train) -# Read the testing dataset. -raw_dataset_test = reader.read.format('com.databricks.spark.csv') \ - .options(header='true', inferSchema='true') \ - .load(path_test) - -# First, we would like to extract the desired features from the raw dataset. -# We do this by constructing a list with all desired columns. -# This is identical for the test set. -features = raw_dataset_train.columns -features.remove('label') - -# Next, we use Spark's VectorAssembler to "assemble" (create) a vector of all desired features. -# http://spark.apache.org/docs/latest/ml-features.html#vectorassembler -vector_assembler = VectorAssembler(inputCols=features, outputCol="features") -# This transformer will take all columns specified in features, and create an additional column -# "features" which will contain all the desired features aggregated into a single vector. -dataset_train = vector_assembler.transform(raw_dataset_train) -dataset_test = vector_assembler.transform(raw_dataset_test) - -# Define the number of output classes. -nb_classes = 10 -encoder = OneHotTransformer( - nb_classes, input_col="label", output_col="label_encoded") -dataset_train = encoder.transform(dataset_train) -dataset_test = encoder.transform(dataset_test) - -# Allocate a MinMaxTransformer from Distributed Keras to normalize the features.. -# o_min -> original_minimum -# n_min -> new_minimum -transformer = MinMaxTransformer(n_min=0.0, n_max=1.0, - o_min=0.0, o_max=250.0, - input_col="features", - output_col="features_normalized") -# Transform the dataset. -dataset_train = transformer.transform(dataset_train) -dataset_test = transformer.transform(dataset_test) - -# Keras expects the vectors to be in a particular shape, we can reshape the -# vectors using Spark. -reshape_transformer = ReshapeTransformer( - "features_normalized", "matrix", (28, 28, 1)) -dataset_train = reshape_transformer.transform(dataset_train) -dataset_test = reshape_transformer.transform(dataset_test) - -# Now, create a Keras model. -# Taken from Keras MNIST example. - -# Declare model parameters. -img_rows, img_cols = 28, 28 -# number of convolutional filters to use -nb_filters = 32 -# size of pooling area for max pooling -pool_size = (2, 2) -# convolution kernel size -kernel_size = (3, 3) -input_shape = (img_rows, img_cols, 1) - -# Construct the model. -convnet = Sequential() -convnet.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1], - border_mode='valid', - input_shape=input_shape)) -convnet.add(Activation('relu')) -convnet.add(Convolution2D(nb_filters, kernel_size[0], kernel_size[1])) -convnet.add(Activation('relu')) -convnet.add(MaxPooling2D(pool_size=pool_size)) -convnet.add(Flatten()) -convnet.add(Dense(225)) -convnet.add(Activation('relu')) -convnet.add(Dense(nb_classes)) -convnet.add(Activation('softmax')) - -# Define the optimizer and the loss. -optimizer_convnet = 'adam' -loss_convnet = 'categorical_crossentropy' - -# Print the summary. -convnet.summary() - -# We can also evaluate the dataset in a distributed manner. -# However, for this we need to specify a procedure how to do this. - - -def evaluate_accuracy(model, test_set, features="matrix"): - evaluator = AccuracyEvaluator( - prediction_col="prediction_index", label_col="label") - predictor = ModelPredictor(keras_model=model, features_col=features) - transformer = LabelIndexTransformer(output_dim=nb_classes) - test_set = test_set.select(features, "label") - test_set = predictor.predict(test_set) - test_set = transformer.transform(test_set) - score = evaluator.evaluate(test_set) - - return score - - -# Select the desired columns, this will reduce network usage. -dataset_train = dataset_train.select( - "features_normalized", "matrix", "label", "label_encoded") -dataset_test = dataset_test.select( - "features_normalized", "matrix", "label", "label_encoded") -# Keras expects DenseVectors. -dense_transformer = DenseTransformer( - input_col="features_normalized", output_col="features_normalized_dense") -dataset_train = dense_transformer.transform(dataset_train) -dataset_test = dense_transformer.transform(dataset_test) -dataset_train.repartition(num_workers) -dataset_test.repartition(num_workers) -# Assing the training and test set. -training_set = dataset_train.repartition(num_workers) -test_set = dataset_test.repartition(num_workers) -# Cache them. -training_set.cache() -test_set.cache() - -# Precache the trainingset on the nodes using a simple count. -print(training_set.count()) - -# Use the ADAG optimizer. You can also use a SingleWorker for testing purposes -> traditional -# non-distributed gradient descent. -trainer = ADAG(keras_model=convnet, worker_optimizer=optimizer_convnet, loss=loss_convnet, - num_workers=num_workers, batch_size=16, communication_window=5, num_epoch=5, - features_col="matrix", label_col="label_encoded") -trained_model = trainer.train(training_set) - -print("Training time: " + str(trainer.get_training_time())) -print("Accuracy: " + str(evaluate_accuracy(trained_model, test_set))) -print("Number of parameter server updates: " + - str(trainer.parameter_server.num_updates)) -"""Generates a JSON structure that needs to be added to the -secrets file. - -Author: Joeri Hermans -""" - -## BEGIN Imports. ############################################################## - -import json - -import optparse - -import random - -import string - -## END Imports. ################################################################ - - -def generate_secret(identity): - secret = ''.join(random.SystemRandom().choice( - string.ascii_uppercase + string.digits) for _ in range(64)) - d = {} - d['secret'] = secret - d['identity'] = identity - print(json.dumps(d)) - - -def parse_arguments(): - parser = optparse.OptionParser() - parser.set_defaults(identity=None) - parser.add_option('--identity', action='store', - dest='identity', type='string') - (options, args) = parser.parse_args() - - return options - - -def main(): - # Parse the options. - options = parse_arguments() - # Check if an identity has been provided. - if options.identity is not None: - generate_secret(options.identity) - else: - print("Please specify an identity (--identity).") - - -if __name__ == '__main__': - main() -"""Script which starts the Punchcard daemon. Punchcard will accept remote job -requests and execute them on the local cluster. - -Author: Joeri Hermans -""" - -## BEGIN Imports. ############################################################## - -from distkeras.job_deployment import Job -from distkeras.job_deployment import Punchcard - -import os - -import sys - -import optparse - -## END Imports. ################################################################ - - -def parse_arguments(): - parser = optparse.OptionParser() - parser.set_defaults(port=8000, secrets_path='secrets.json') - parser.add_option('--port', action='store', dest='port', type='int') - parser.add_option('--secrets', action='store', - dest='secrets_path', type='string') - (options, args) = parser.parse_args() - - return options - - -def start_punchcard(port, secrets): - punchcard = Punchcard(secrets, port) - punchcard.run() - - -def main(): - # Parse the program arguments. - options = parse_arguments() - port = options.port - secrets_path = options.secrets_path - # Start the Punchcard instance. - start_punchcard(port, secrets_path) - - -if __name__ == '__main__': - main() -""" -Class for managing our data. -""" -import csv -import numpy as np -import random -import glob -import os.path -import sys -import operator -import threading -from processor import process_image -from keras.utils import to_categorical - - -class threadsafe_iterator: - def __init__(self, iterator): - self.iterator = iterator - self.lock = threading.Lock() - - def __iter__(self): - return self - - def __next__(self): - with self.lock: - return next(self.iterator) - - -def threadsafe_generator(func): - """Decorator""" - def gen(*a, **kw): - return threadsafe_iterator(func(*a, **kw)) - return gen - - -class DataSet(): - - def __init__(self, seq_length=40, class_limit=None, image_shape=(224, 224, 3)): - """Constructor. - seq_length = (int) the number of frames to consider - class_limit = (int) number of classes to limit the data to. - None = no limit. - """ - self.seq_length = seq_length - self.class_limit = class_limit - self.sequence_path = os.path.join('data', 'sequences') - self.max_frames = 300 # max number of frames a video can have for us to use it - - # Get the data. - self.data = self.get_data() - - # Get the classes. - self.classes = self.get_classes() - - # Now do some minor data cleaning. - self.data = self.clean_data() - - self.image_shape = image_shape - - @staticmethod - def get_data(): - """Load our data from file.""" - with open(os.path.join('data', 'data_file.csv'), 'r') as fin: - reader = csv.reader(fin) - data = list(reader) - - return data - - def clean_data(self): - """Limit samples to greater than the sequence length and fewer - than N frames. Also limit it to classes we want to use.""" - data_clean = [] - for item in self.data: - if int(item[3]) >= self.seq_length and int(item[3]) <= self.max_frames \ - and item[1] in self.classes: - data_clean.append(item) - - return data_clean - - def get_classes(self): - """Extract the classes from our data. If we want to limit them, - only return the classes we need.""" - classes = [] - for item in self.data: - if item[1] not in classes: - classes.append(item[1]) - - # Sort them. - classes = sorted(classes) - - # Return. - if self.class_limit is not None: - return classes[:self.class_limit] - else: - return classes - - def get_class_one_hot(self, class_str): - """Given a class as a string, return its number in the classes - list. This lets us encode and one-hot it for training.""" - # Encode it first. - label_encoded = self.classes.index(class_str) - - # Now one-hot it. - label_hot = to_categorical(label_encoded, len(self.classes)) - - assert len(label_hot) == len(self.classes) - - return label_hot - - def split_train_test(self): - """Split the data into train and test groups.""" - train = [] - test = [] - for item in self.data: - if item[0] == 'train': - train.append(item) - else: - test.append(item) - return train, test - - def get_all_sequences_in_memory(self, train_test, data_type): - """ - This is a mirror of our generator, but attempts to load everything into - memory so we can train way faster. - """ - # Get the right dataset. - train, test = self.split_train_test() - data = train if train_test == 'train' else test - - print("Loading %d samples into memory for %sing." % - (len(data), train_test)) - - X, y = [], [] - for row in data: - - if data_type == 'images': - frames = self.get_frames_for_sample(row) - frames = self.rescale_list(frames, self.seq_length) - - # Build the image sequence - sequence = self.build_image_sequence(frames) - - else: - sequence = self.get_extracted_sequence(data_type, row) - - if sequence is None: - print("Can't find sequence. Did you generate them?") - raise - - X.append(sequence) - y.append(self.get_class_one_hot(row[1])) - - return np.array(X), np.array(y) - - @threadsafe_generator - def frame_generator(self, batch_size, train_test, data_type): - """Return a generator that we can use to train on. There are - a couple different things we can return: - - data_type: 'features', 'images' - """ - # Get the right dataset for the generator. - train, test = self.split_train_test() - data = train if train_test == 'train' else test - - print("Creating %s generator with %d samples." % - (train_test, len(data))) - - while 1: - X, y = [], [] - - # Generate batch_size samples. - for _ in range(batch_size): - # Reset to be safe. - sequence = None - - # Get a random sample. - sample = random.choice(data) - - # Check to see if we've already saved this sequence. - if data_type is "images": - # Get and resample frames. - frames = self.get_frames_for_sample(sample) - frames = self.rescale_list(frames, self.seq_length) - - # Build the image sequence - sequence = self.build_image_sequence(frames) - else: - # Get the sequence from disk. - sequence = self.get_extracted_sequence(data_type, sample) - - if sequence is None: - raise ValueError( - "Can't find sequence. Did you generate them?") - - X.append(sequence) - y.append(self.get_class_one_hot(sample[1])) - - yield np.array(X), np.array(y) - - def build_image_sequence(self, frames): - """Given a set of frames (filenames), build our sequence.""" - return [process_image(x, self.image_shape) for x in frames] - - def get_extracted_sequence(self, data_type, sample): - """Get the saved extracted features.""" - filename = sample[2] - path = os.path.join(self.sequence_path, filename + '-' + str(self.seq_length) + - '-' + data_type + '.npy') - if os.path.isfile(path): - return np.load(path) - else: - return None - - def get_frames_by_filename(self, filename, data_type): - """Given a filename for one of our samples, return the data - the model needs to make predictions.""" - # First, find the sample row. - sample = None - for row in self.data: - if row[2] == filename: - sample = row - break - if sample is None: - raise ValueError("Couldn't find sample: %s" % filename) - - if data_type == "images": - # Get and resample frames. - frames = self.get_frames_for_sample(sample) - frames = self.rescale_list(frames, self.seq_length) - # Build the image sequence - sequence = self.build_image_sequence(frames) - else: - # Get the sequence from disk. - sequence = self.get_extracted_sequence(data_type, sample) - - if sequence is None: - raise ValueError("Can't find sequence. Did you generate them?") - - return sequence - - @staticmethod - def get_frames_for_sample(sample): - """Given a sample row from the data file, get all the corresponding frame - filenames.""" - path = os.path.join('data', sample[0], sample[1]) - filename = sample[2] - images = sorted(glob.glob(os.path.join(path, filename + '*jpg'))) - return images - - @staticmethod - def get_filename_from_image(filename): - parts = filename.split(os.path.sep) - return parts[-1].replace('.jpg', '') - - @staticmethod - def rescale_list(input_list, size): - """Given a list and a size, return a rescaled/samples list. For example, - if we want a list of size 5 and we have a list of size 25, return a new - list of size five which is every 5th element of the origina list.""" - assert len(input_list) >= size - - # Get the number to skip between iterations. - skip = len(input_list) // size - - # Build our new output. - output = [input_list[i] for i in range(0, len(input_list), skip)] - - # Cut off the last one if needed. - return output[:size] - - def print_class_from_prediction(self, predictions, nb_to_return=5): - """Given a prediction, print the top classes.""" - # Get the prediction for each label. - label_predictions = {} - for i, label in enumerate(self.classes): - label_predictions[label] = predictions[i] - - # Now sort them. - sorted_lps = sorted( - label_predictions.items(), - key=operator.itemgetter(1), - reverse=True - ) - - # And return the top N. - for i, class_prediction in enumerate(sorted_lps): - if i > nb_to_return - 1 or class_prediction[1] == 0.0: - break - print("%s: %.2f" % (class_prediction[0], class_prediction[1])) -""" -Given a video path and a saved model (checkpoint), produce classification -predictions. - -Note that if using a model that requires features to be extracted, those -features must be extracted first. - -Note also that this is a rushed demo script to help a few people who have -requested it and so is quite "rough". :) -""" -from keras.models import load_model -from data import DataSet -import numpy as np - - -def predict(data_type, seq_length, saved_model, image_shape, video_name, class_limit): - model = load_model(saved_model) - - # Get the data and process it. - if image_shape is None: - data = DataSet(seq_length=seq_length, class_limit=class_limit) - else: - data = DataSet(seq_length=seq_length, image_shape=image_shape, - class_limit=class_limit) - - # Extract the sample from the data. - sample = data.get_frames_by_filename(video_name, data_type) - - # Predict! - prediction = model.predict(np.expand_dims(sample, axis=0)) - print(prediction) - data.print_class_from_prediction(np.squeeze(prediction, axis=0)) - - -def main(): - # model can be one of lstm, lrcn, mlp, conv_3d, c3d. - model = 'lstm' - # Must be a weights file. - saved_model = 'data/checkpoints/lstm-features.026-0.239.hdf5' - # Sequence length must match the lengh used during training. - seq_length = 40 - # Limit must match that used during training. - class_limit = 4 - - # Demo file. Must already be extracted & features generated (if model requires) - # Do not include the extension. - # Assumes it's in data/[train|test]/ - # It also must be part of the train/test data. - # TODO Make this way more useful. It should take in the path to - # an actual video file, extract frames, generate sequences, etc. - #video_name = 'v_Archery_g04_c02' - video_name = 'v_ApplyLipstick_g01_c01' - - # Chose images or features and image shape based on network. - if model in ['conv_3d', 'c3d', 'lrcn']: - data_type = 'images' - image_shape = (80, 80, 3) - elif model in ['lstm', 'mlp']: - data_type = 'features' - image_shape = None - else: - raise ValueError("Invalid model. See train.py for options.") - - predict(data_type, seq_length, saved_model, - image_shape, video_name, class_limit) - - -if __name__ == '__main__': - main() -""" -This script generates extracted features for each video, which other -models make use of. - -You can change you sequence length and limit to a set number of classes -below. - -class_limit is an integer that denotes the first N classes you want to -extract features from. This is useful is you don't want to wait to -extract all 101 classes. For instance, set class_limit = 8 to just -extract features for the first 8 (alphabetical) classes in the dataset. -Then set the same number when training models. -""" -import numpy as np -import os.path -from data import DataSet -from extractor import Extractor -from tqdm import tqdm - -# Set defaults. -seq_length = 40 -# Number of classes to extract. Can be 1-101 or None for all. -class_limit = None - -# Get the dataset. -data = DataSet(seq_length=seq_length, class_limit=class_limit) - -# get the model. -model = Extractor() - -# Loop through data. -pbar = tqdm(total=len(data.data)) -for video in data.data: - - # Get the path to the sequence for this video. - path = os.path.join('data', 'sequences', video[2] + '-' + str(seq_length) + - '-features') # numpy will auto-append .npy - - # Check if we already have it. - if os.path.isfile(path + '.npy'): - pbar.update(1) - continue - - # Get the frames for this video. - frames = data.get_frames_for_sample(video) - - # Now downsample to just the ones we need. - frames = data.rescale_list(frames, seq_length) - - # Now loop through and extract features to build the sequence. - sequence = [] - for image in frames: - features = model.extract(image) - sequence.append(features) - - # Save the sequence. - np.save(path, sequence) - - pbar.update(1) - -pbar.close() -from keras.preprocessing import image -from keras.applications.inception_v3 import InceptionV3, preprocess_input -from keras.models import Model, load_model -from keras.layers import Input -import numpy as np - - -class Extractor(): - def __init__(self, weights=None): - """Either load pretrained from imagenet, or load our saved - weights from our own training.""" - - self.weights = weights # so we can check elsewhere which model - - if weights is None: - # Get model with pretrained weights. - base_model = InceptionV3( - weights='imagenet', - include_top=True - ) - - # We'll extract features at the final pool layer. - self.model = Model( - inputs=base_model.input, - outputs=base_model.get_layer('avg_pool').output - ) - - else: - # Load the model first. - self.model = load_model(weights) - - # Then remove the top so we get features not predictions. - # From: https://github.com/fchollet/keras/issues/2371 - self.model.layers.pop() - self.model.layers.pop() # two pops to get to pool layer - self.model.outputs = [self.model.layers[-1].output] - self.model.output_layers = [self.model.layers[-1]] - self.model.layers[-1].outbound_nodes = [] - - def extract(self, image_path): - img = image.load_img(image_path, target_size=(299, 299)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - x = preprocess_input(x) - - # Get the prediction. - features = self.model.predict(x) - - if self.weights is None: - # For imagenet/default network: - features = features[0] - else: - # For loaded network: - features = features[0] - - return features -""" -A collection of models we'll use to attempt to classify videos. -""" -from keras.layers import Dense, Flatten, Dropout, ZeroPadding3D -from keras.layers.recurrent import LSTM -from keras.models import Sequential, load_model -from keras.optimizers import Adam, RMSprop -from keras.layers.wrappers import TimeDistributed -from keras.layers.convolutional import (Conv2D, MaxPooling3D, Conv3D, - MaxPooling2D) -from collections import deque -import sys - - -class ResearchModels(): - def __init__(self, nb_classes, model, seq_length, - saved_model=None, features_length=2048): - """ - `model` = one of: - lstm - lrcn - mlp - conv_3d - c3d - `nb_classes` = the number of classes to predict - `seq_length` = the length of our video sequences - `saved_model` = the path to a saved Keras model to load - """ - - # Set defaults. - self.seq_length = seq_length - self.load_model = load_model - self.saved_model = saved_model - self.nb_classes = nb_classes - self.feature_queue = deque() - - # Set the metrics. Only use top k if there's a need. - metrics = ['accuracy'] - if self.nb_classes >= 10: - metrics.append('top_k_categorical_accuracy') - - # Get the appropriate model. - if self.saved_model is not None: - print("Loading model %s" % self.saved_model) - self.model = load_model(self.saved_model) - elif model == 'lstm': - print("Loading LSTM model.") - self.input_shape = (seq_length, features_length) - self.model = self.lstm() - elif model == 'lrcn': - print("Loading CNN-LSTM model.") - self.input_shape = (seq_length, 80, 80, 3) - self.model = self.lrcn() - elif model == 'mlp': - print("Loading simple MLP.") - self.input_shape = (seq_length, features_length) - self.model = self.mlp() - elif model == 'conv_3d': - print("Loading Conv3D") - self.input_shape = (seq_length, 80, 80, 3) - self.model = self.conv_3d() - elif model == 'c3d': - print("Loading C3D") - self.input_shape = (seq_length, 80, 80, 3) - self.model = self.c3d() - else: - print("Unknown network.") - sys.exit() - - # Now compile the network. - optimizer = Adam(lr=1e-5, decay=1e-6) - self.model.compile(loss='categorical_crossentropy', optimizer=optimizer, - metrics=metrics) - - print(self.model.summary()) - - def lstm(self): - """Build a simple LSTM network. We pass the extracted features from - our CNN to this model predomenently.""" - # Model. - model = Sequential() - model.add(LSTM(2048, return_sequences=False, - input_shape=self.input_shape, - dropout=0.5)) - model.add(Dense(512, activation='relu')) - model.add(Dropout(0.5)) - model.add(Dense(self.nb_classes, activation='softmax')) - - return model - - def lrcn(self): - """Build a CNN into RNN. - Starting version from: - https://github.com/udacity/self-driving-car/blob/master/ - steering-models/community-models/chauffeur/models.py - - Heavily influenced by VGG-16: - https://arxiv.org/abs/1409.1556 - - Also known as an LRCN: - https://arxiv.org/pdf/1411.4389.pdf - """ - def add_default_block(model, kernel_filters, init, reg_lambda): - - # conv - model.add(TimeDistributed(Conv2D(kernel_filters, (3, 3), padding='same', - kernel_initializer=init, kernel_regularizer=L2_reg(l=reg_lambda)))) - model.add(TimeDistributed(BatchNormalization())) - model.add(TimeDistributed(Activation('relu'))) - # conv - model.add(TimeDistributed(Conv2D(kernel_filters, (3, 3), padding='same', - kernel_initializer=init, kernel_regularizer=L2_reg(l=reg_lambda)))) - model.add(TimeDistributed(BatchNormalization())) - model.add(TimeDistributed(Activation('relu'))) - # max pool - model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)))) - - return model - - initialiser = 'glorot_uniform' - reg_lambda = 0.001 - - model = Sequential() - - # first (non-default) block - model.add(TimeDistributed(Conv2D(32, (7, 7), strides=(2, 2), padding='same', - kernel_initializer=initialiser, kernel_regularizer=L2_reg(l=reg_lambda)), - input_shape=self.input_shape)) - model.add(TimeDistributed(BatchNormalization())) - model.add(TimeDistributed(Activation('relu'))) - model.add(TimeDistributed(Conv2D( - 32, (3, 3), kernel_initializer=initialiser, kernel_regularizer=L2_reg(l=reg_lambda)))) - model.add(TimeDistributed(BatchNormalization())) - model.add(TimeDistributed(Activation('relu'))) - model.add(TimeDistributed(MaxPooling2D((2, 2), strides=(2, 2)))) - - # 2nd-5th (default) blocks - model = add_default_block( - model, 64, init=initialiser, reg_lambda=reg_lambda) - model = add_default_block( - model, 128, init=initialiser, reg_lambda=reg_lambda) - model = add_default_block( - model, 256, init=initialiser, reg_lambda=reg_lambda) - model = add_default_block( - model, 512, init=initialiser, reg_lambda=reg_lambda) - - # LSTM output head - model.add(TimeDistributed(Flatten())) - model.add(LSTM(256, return_sequences=False, dropout=0.5)) - model.add(Dense(self.nb_classes, activation='softmax')) - - return model - - def mlp(self): - """Build a simple MLP. It uses extracted features as the input - because of the otherwise too-high dimensionality.""" - # Model. - model = Sequential() - model.add(Flatten(input_shape=self.input_shape)) - model.add(Dense(512)) - model.add(Dropout(0.5)) - model.add(Dense(512)) - model.add(Dropout(0.5)) - model.add(Dense(self.nb_classes, activation='softmax')) - - return model - - def conv_3d(self): - """ - Build a 3D convolutional network, based loosely on C3D. - https://arxiv.org/pdf/1412.0767.pdf - """ - # Model. - model = Sequential() - model.add(Conv3D( - 32, (3, 3, 3), activation='relu', input_shape=self.input_shape - )) - model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2))) - model.add(Conv3D(64, (3, 3, 3), activation='relu')) - model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2))) - model.add(Conv3D(128, (3, 3, 3), activation='relu')) - model.add(Conv3D(128, (3, 3, 3), activation='relu')) - model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2))) - model.add(Conv3D(256, (2, 2, 2), activation='relu')) - model.add(Conv3D(256, (2, 2, 2), activation='relu')) - model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2))) - - model.add(Flatten()) - model.add(Dense(1024)) - model.add(Dropout(0.5)) - model.add(Dense(1024)) - model.add(Dropout(0.5)) - model.add(Dense(self.nb_classes, activation='softmax')) - - return model - - def c3d(self): - """ - Build a 3D convolutional network, aka C3D. - https://arxiv.org/pdf/1412.0767.pdf - - With thanks: - https://gist.github.com/albertomontesg/d8b21a179c1e6cca0480ebdf292c34d2 - """ - model = Sequential() - # 1st layer group - model.add(Conv3D(64, 3, 3, 3, activation='relu', - border_mode='same', name='conv1', - subsample=(1, 1, 1), - input_shape=self.input_shape)) - model.add(MaxPooling3D(pool_size=(1, 2, 2), strides=(1, 2, 2), - border_mode='valid', name='pool1')) - # 2nd layer group - model.add(Conv3D(128, 3, 3, 3, activation='relu', - border_mode='same', name='conv2', - subsample=(1, 1, 1))) - model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), - border_mode='valid', name='pool2')) - # 3rd layer group - model.add(Conv3D(256, 3, 3, 3, activation='relu', - border_mode='same', name='conv3a', - subsample=(1, 1, 1))) - model.add(Conv3D(256, 3, 3, 3, activation='relu', - border_mode='same', name='conv3b', - subsample=(1, 1, 1))) - model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), - border_mode='valid', name='pool3')) - # 4th layer group - model.add(Conv3D(512, 3, 3, 3, activation='relu', - border_mode='same', name='conv4a', - subsample=(1, 1, 1))) - model.add(Conv3D(512, 3, 3, 3, activation='relu', - border_mode='same', name='conv4b', - subsample=(1, 1, 1))) - model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), - border_mode='valid', name='pool4')) - - # 5th layer group - model.add(Conv3D(512, 3, 3, 3, activation='relu', - border_mode='same', name='conv5a', - subsample=(1, 1, 1))) - model.add(Conv3D(512, 3, 3, 3, activation='relu', - border_mode='same', name='conv5b', - subsample=(1, 1, 1))) - model.add(ZeroPadding3D(padding=(0, 1, 1))) - model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), - border_mode='valid', name='pool5')) - model.add(Flatten()) - - # FC layers group - model.add(Dense(4096, activation='relu', name='fc6')) - model.add(Dropout(0.5)) - model.add(Dense(4096, activation='relu', name='fc7')) - model.add(Dropout(0.5)) - model.add(Dense(self.nb_classes, activation='softmax')) - - return model -""" -Given a training log file, plot something. -""" -import csv -import matplotlib.pyplot as plt - - -def main(training_log): - with open(training_log) as fin: - reader = csv.reader(fin) - next(reader, None) # skip the header - accuracies = [] - top_5_accuracies = [] - cnn_benchmark = [] # this is ridiculous - for epoch, acc, loss, top_k_categorical_accuracy, val_acc, val_loss, val_top_k_categorical_accuracy in reader: - accuracies.append(float(val_acc)) - top_5_accuracies.append(float(val_top_k_categorical_accuracy)) - cnn_benchmark.append(0.65) # ridiculous - - plt.plot(accuracies) - plt.plot(top_5_accuracies) - plt.plot(cnn_benchmark) - plt.show() - - -if __name__ == '__main__': - training_log = 'data/logs/mlp-training-1489455559.7089438.log' - main(training_log) -""" -Process an image that we can pass to our networks. -""" -from keras.preprocessing.image import img_to_array, load_img -import numpy as np - - -def process_image(image, target_shape): - """Given an image, process it and return the array.""" - # Load the image. - h, w, _ = target_shape - image = load_img(image, target_size=(h, w)) - - # Turn it into numpy, normalize and return. - img_arr = img_to_array(image) - x = (img_arr / 255.).astype(np.float32) - - return x -""" -Try to "classify" samples based on random chance and always guessing -the most popular category. -""" -import random -from data import DataSet - -most_pop = 'TennisSwing' - -data = DataSet() -nb_classes = len(data.classes) - -# Try a random guess. -nb_random_matched = 0 -nb_mode_matched = 0 -for item in data.data: - choice = random.choice(data.classes) - actual = item[1] - - if choice == actual: - nb_random_matched += 1 - - if actual == most_pop: - nb_mode_matched += 1 - -random_accuracy = nb_random_matched / len(data.data) -mode_accuracy = nb_mode_matched / len(data.data) -print("Randomly matched %.2f%%" % (random_accuracy * 100)) -print("Mode matched %.2f%%" % (mode_accuracy * 100)) -""" -Train our RNN on extracted features or images. -""" -from keras.callbacks import TensorBoard, ModelCheckpoint, EarlyStopping, CSVLogger -from models import ResearchModels -from data import DataSet -import time -import os.path - - -def train(data_type, seq_length, model, saved_model=None, - class_limit=None, image_shape=None, - load_to_memory=False, batch_size=32, nb_epoch=100): - # Helper: Save the model. - checkpointer = ModelCheckpoint( - filepath=os.path.join('data', 'checkpoints', model + '-' + data_type + - '.{epoch:03d}-{val_loss:.3f}.hdf5'), - verbose=1, - save_best_only=True) - - # Helper: TensorBoard - tb = TensorBoard(log_dir=os.path.join('data', 'logs', model)) - - # Helper: Stop when we stop learning. - early_stopper = EarlyStopping(patience=5) - - # Helper: Save results. - timestamp = time.time() - csv_logger = CSVLogger(os.path.join('data', 'logs', model + '-' + 'training-' + - str(timestamp) + '.log')) - - # Get the data and process it. - if image_shape is None: - data = DataSet( - seq_length=seq_length, - class_limit=class_limit - ) - else: - data = DataSet( - seq_length=seq_length, - class_limit=class_limit, - image_shape=image_shape - ) - - # Get samples per epoch. - # Multiply by 0.7 to attempt to guess how much of data.data is the train set. - steps_per_epoch = (len(data.data) * 0.7) // batch_size - - if load_to_memory: - # Get data. - X, y = data.get_all_sequences_in_memory('train', data_type) - X_test, y_test = data.get_all_sequences_in_memory('test', data_type) - else: - # Get generators. - generator = data.frame_generator(batch_size, 'train', data_type) - val_generator = data.frame_generator(batch_size, 'test', data_type) - - # Get the model. - rm = ResearchModels(len(data.classes), model, seq_length, saved_model) - - # Fit! - if load_to_memory: - # Use standard fit. - rm.model.fit( - X, - y, - batch_size=batch_size, - validation_data=(X_test, y_test), - verbose=1, - callbacks=[tb, early_stopper, csv_logger], - epochs=nb_epoch) - else: - # Use fit generator. - rm.model.fit_generator( - generator=generator, - steps_per_epoch=steps_per_epoch, - epochs=nb_epoch, - verbose=1, - callbacks=[tb, early_stopper, csv_logger, checkpointer], - validation_data=val_generator, - validation_steps=40, - workers=4) - - -def main(): - """These are the main training settings. Set each before running - this file.""" - # model can be one of lstm, lrcn, mlp, conv_3d, c3d - model = 'lstm' - saved_model = None # None or weights file - class_limit = None # int, can be 1-101 or None - seq_length = 40 - load_to_memory = False # pre-load the sequences into memory - batch_size = 32 - nb_epoch = 1000 - - # Chose images or features and image shape based on network. - if model in ['conv_3d', 'c3d', 'lrcn']: - data_type = 'images' - image_shape = (80, 80, 3) - elif model in ['lstm', 'mlp']: - data_type = 'features' - image_shape = None - else: - raise ValueError("Invalid model. See train.py for options.") - - train(data_type, seq_length, model, saved_model=saved_model, - class_limit=class_limit, image_shape=image_shape, - load_to_memory=load_to_memory, batch_size=batch_size, nb_epoch=nb_epoch) - - -if __name__ == '__main__': - main() -""" -Train on images split into directories. This assumes we've split -our videos into frames and moved them to their respective folders. - -Based on: -https://keras.io/preprocessing/image/ -and -https://keras.io/applications/ -""" -from keras.applications.inception_v3 import InceptionV3 -from keras.optimizers import SGD -from keras.preprocessing.image import ImageDataGenerator -from keras.models import Model -from keras.layers import Dense, GlobalAveragePooling2D -from keras.callbacks import ModelCheckpoint, TensorBoard, EarlyStopping -from data import DataSet -import os.path - -data = DataSet() - -# Helper: Save the model. -checkpointer = ModelCheckpoint( - filepath=os.path.join('data', 'checkpoints', - 'inception.{epoch:03d}-{val_loss:.2f}.hdf5'), - verbose=1, - save_best_only=True) - -# Helper: Stop when we stop learning. -early_stopper = EarlyStopping(patience=10) - -# Helper: TensorBoard -tensorboard = TensorBoard(log_dir=os.path.join('data', 'logs')) - - -def get_generators(): - train_datagen = ImageDataGenerator( - rescale=1./255, - shear_range=0.2, - horizontal_flip=True, - rotation_range=10., - width_shift_range=0.2, - height_shift_range=0.2) - - test_datagen = ImageDataGenerator(rescale=1./255) - - train_generator = train_datagen.flow_from_directory( - os.path.join('data', 'train'), - target_size=(299, 299), - batch_size=32, - classes=data.classes, - class_mode='categorical') - - validation_generator = test_datagen.flow_from_directory( - os.path.join('data', 'test'), - target_size=(299, 299), - batch_size=32, - classes=data.classes, - class_mode='categorical') - - return train_generator, validation_generator - - -def get_model(weights='imagenet'): - # create the base pre-trained model - base_model = InceptionV3(weights=weights, include_top=False) - - # add a global spatial average pooling layer - x = base_model.output - x = GlobalAveragePooling2D()(x) - # let's add a fully-connected layer - x = Dense(1024, activation='relu')(x) - # and a logistic layer - predictions = Dense(len(data.classes), activation='softmax')(x) - - # this is the model we will train - model = Model(inputs=base_model.input, outputs=predictions) - return model - - -def freeze_all_but_top(model): - """Used to train just the top layers of the model.""" - # first: train only the top layers (which were randomly initialized) - # i.e. freeze all convolutional InceptionV3 layers - for layer in model.layers[:-2]: - layer.trainable = False - - # compile the model (should be done *after* setting layers to non-trainable) - model.compile(optimizer='rmsprop', - loss='categorical_crossentropy', metrics=['accuracy']) - - return model - - -def freeze_all_but_mid_and_top(model): - """After we fine-tune the dense layers, train deeper.""" - # we chose to train the top 2 inception blocks, i.e. we will freeze - # the first 172 layers and unfreeze the rest: - for layer in model.layers[:172]: - layer.trainable = False - for layer in model.layers[172:]: - layer.trainable = True - - # we need to recompile the model for these modifications to take effect - # we use SGD with a low learning rate - model.compile( - optimizer=SGD(lr=0.0001, momentum=0.9), - loss='categorical_crossentropy', - metrics=['accuracy', 'top_k_categorical_accuracy']) - - return model - - -def train_model(model, nb_epoch, generators, callbacks=[]): - train_generator, validation_generator = generators - model.fit_generator( - train_generator, - steps_per_epoch=100, - validation_data=validation_generator, - validation_steps=10, - epochs=nb_epoch, - callbacks=callbacks) - return model - - -def main(weights_file): - model = get_model() - generators = get_generators() - - if weights_file is None: - print("Loading network from ImageNet weights.") - # Get and train the top layers. - model = freeze_all_but_top(model) - model = train_model(model, 10, generators) - else: - print("Loading saved model: %s." % weights_file) - model.load_weights(weights_file) - - # Get and train the mid layers. - model = freeze_all_but_mid_and_top(model) - model = train_model(model, 1000, generators, - [checkpointer, early_stopper, tensorboard]) - - -if __name__ == '__main__': - weights_file = None - main(weights_file) -""" -Classify a few images through our CNN. -""" -import numpy as np -import operator -import random -import glob -import os.path -from data import DataSet -from processor import process_image -from keras.models import load_model - - -def main(nb_images=5): - """Spot-check `nb_images` images.""" - data = DataSet() - model = load_model('data/checkpoints/inception.057-1.16.hdf5') - - # Get all our test images. - images = glob.glob(os.path.join('data', 'test', '**', '*.jpg')) - - for _ in range(nb_images): - print('-'*80) - # Get a random row. - sample = random.randint(0, len(images) - 1) - image = images[sample] - - # Turn the image into an array. - print(image) - image_arr = process_image(image, (299, 299, 3)) - image_arr = np.expand_dims(image_arr, axis=0) - - # Predict. - predictions = model.predict(image_arr) - - # Show how much we think it's each one. - label_predictions = {} - for i, label in enumerate(data.classes): - label_predictions[label] = predictions[0][i] - - sorted_lps = sorted(label_predictions.items(), - key=operator.itemgetter(1), reverse=True) - - for i, class_prediction in enumerate(sorted_lps): - # Just get the top five. - if i > 4: - break - print("%s: %.2f" % (class_prediction[0], class_prediction[1])) - i += 1 - - -if __name__ == '__main__': - main() -""" -Validate our RNN. Basically just runs a validation generator on -about the same number of videos as we have in our test set. -""" -from keras.callbacks import TensorBoard, ModelCheckpoint, CSVLogger -from models import ResearchModels -from data import DataSet - - -def validate(data_type, model, seq_length=40, saved_model=None, - class_limit=None, image_shape=None): - batch_size = 32 - - # Get the data and process it. - if image_shape is None: - data = DataSet( - seq_length=seq_length, - class_limit=class_limit - ) - else: - data = DataSet( - seq_length=seq_length, - class_limit=class_limit, - image_shape=image_shape - ) - - val_generator = data.frame_generator(batch_size, 'test', data_type) - - # Get the model. - rm = ResearchModels(len(data.classes), model, seq_length, saved_model) - - # Evaluate! - results = rm.model.evaluate_generator( - generator=val_generator, - val_samples=3200) - - print(results) - print(rm.model.metrics_names) - - -def main(): - model = 'lstm' - saved_model = 'data/checkpoints/lstm-features.026-0.239.hdf5' - - if model == 'conv_3d' or model == 'lrcn': - data_type = 'images' - image_shape = (80, 80, 3) - else: - data_type = 'features' - image_shape = None - - validate(data_type, model, saved_model=saved_model, - image_shape=image_shape, class_limit=4) - - -if __name__ == '__main__': - main() -""" -After extracting the RAR, we run this to move all the files into -the appropriate train/test folders. - -Should only run this file once! -""" -import os -import os.path - - -def get_train_test_lists(version='01'): - """ - Using one of the train/test files (01, 02, or 03), get the filename - breakdowns we'll later use to move everything. - """ - # Get our files based on version. - test_file = os.path.join('ucfTrainTestlist', 'testlist' + version + '.txt') - train_file = os.path.join( - 'ucfTrainTestlist', 'trainlist' + version + '.txt') - - # Build the test list. - with open(test_file) as fin: - test_list = [row.strip() for row in list(fin)] - - # Build the train list. Extra step to remove the class index. - with open(train_file) as fin: - train_list = [row.strip() for row in list(fin)] - train_list = [row.split(' ')[0] for row in train_list] - - # Set the groups in a dictionary. - file_groups = { - 'train': train_list, - 'test': test_list - } - - return file_groups - - -def move_files(file_groups): - """This assumes all of our files are currently in _this_ directory. - So move them to the appropriate spot. Only needs to happen once. - """ - # Do each of our groups. - for group, videos in file_groups.items(): - - # Do each of our videos. - for video in videos: - - # Get the parts. - parts = video.split(os.path.sep) - classname = parts[0] - filename = parts[1] - - # Check if this class exists. - if not os.path.exists(os.path.join(group, classname)): - print("Creating folder for %s/%s" % (group, classname)) - os.makedirs(os.path.join(group, classname)) - - # Check if we have already moved this file, or at least that it - # exists to move. - if not os.path.exists(filename): - print("Can't find %s to move. Skipping." % (filename)) - continue - - # Move it. - dest = os.path.join(group, classname, filename) - print("Moving %s to %s" % (filename, dest)) - os.rename(filename, dest) - - print("Done.") - - -def main(): - """ - Go through each of our train/test text files and move the videos - to the right place. - """ - # Get the videos in groups so we can move them. - group_lists = get_train_test_lists() - - # Move the files. - move_files(group_lists) - - -if __name__ == '__main__': - main() -""" -After moving all the files using the 1_ file, we run this one to extract -the images from the videos and also create a data file we can use -for training and testing later. -""" -import csv -import glob -import os -import os.path -from subprocess import call - - -def extract_files(): - """After we have all of our videos split between train and test, and - all nested within folders representing their classes, we need to - make a data file that we can reference when training our RNN(s). - This will let us keep track of image sequences and other parts - of the training process. - - We'll first need to extract images from each of the videos. We'll - need to record the following data in the file: - - [train|test], class, filename, nb frames - - Extracting can be done with ffmpeg: - `ffmpeg -i video.mpg image-%04d.jpg` - """ - data_file = [] - folders = ['train', 'test'] - - for folder in folders: - class_folders = glob.glob(os.path.join(folder, '*')) - - for vid_class in class_folders: - class_files = glob.glob(os.path.join(vid_class, '*.avi')) - - for video_path in class_files: - # Get the parts of the file. - video_parts = get_video_parts(video_path) - - train_or_test, classname, filename_no_ext, filename = video_parts - - # Only extract if we haven't done it yet. Otherwise, just get - # the info. - if not check_already_extracted(video_parts): - # Now extract it. - src = os.path.join(train_or_test, classname, filename) - dest = os.path.join(train_or_test, classname, - filename_no_ext + '-%04d.jpg') - call(["ffmpeg", "-i", src, dest]) - - # Now get how many frames it is. - nb_frames = get_nb_frames_for_video(video_parts) - - data_file.append( - [train_or_test, classname, filename_no_ext, nb_frames]) - - print("Generated %d frames for %s" % - (nb_frames, filename_no_ext)) - - with open('data_file.csv', 'w') as fout: - writer = csv.writer(fout) - writer.writerows(data_file) - - print("Extracted and wrote %d video files." % (len(data_file))) - - -def get_nb_frames_for_video(video_parts): - """Given video parts of an (assumed) already extracted video, return - the number of frames that were extracted.""" - train_or_test, classname, filename_no_ext, _ = video_parts - generated_files = glob.glob(os.path.join(train_or_test, classname, - filename_no_ext + '*.jpg')) - return len(generated_files) - - -def get_video_parts(video_path): - """Given a full path to a video, return its parts.""" - parts = video_path.split(os.path.sep) - filename = parts[2] - filename_no_ext = filename.split('.')[0] - classname = parts[1] - train_or_test = parts[0] - - return train_or_test, classname, filename_no_ext, filename - - -def check_already_extracted(video_parts): - """Check to see if we created the -0001 frame of this file.""" - train_or_test, classname, filename_no_ext, _ = video_parts - return bool(os.path.exists(os.path.join(train_or_test, classname, - filename_no_ext + '-0001.jpg'))) - - -def main(): - """ - Extract images from videos and build a new file that we - can use as our data input file. It can have format: - - [train|test], class, filename, nb frames - """ - extract_files() - - -if __name__ == '__main__': - main() -import matplotlib.pyplot as plt -import numpy as np -import nibabel as nib -import os -import glob -import pandas as pd -import matplotlib -matplotlib.use('agg') - - -def get_whole_tumor_mask(data): - return data > 0 - - -def get_tumor_core_mask(data): - return np.logical_or(data == 1, data == 4) - - -def get_enhancing_tumor_mask(data): - return data == 4 - - -def dice_coefficient(truth, prediction): - return 2 * np.sum(truth * prediction)/(np.sum(truth) + np.sum(prediction)) - - -def main(): - header = ("WholeTumor", "TumorCore", "EnhancingTumor") - masking_functions = (get_whole_tumor_mask, - get_tumor_core_mask, get_enhancing_tumor_mask) - rows = list() - subject_ids = list() - for case_folder in glob.glob("prediction/*"): - if not os.path.isdir(case_folder): - continue - subject_ids.append(os.path.basename(case_folder)) - truth_file = os.path.join(case_folder, "truth.nii.gz") - truth_image = nib.load(truth_file) - truth = truth_image.get_data() - prediction_file = os.path.join(case_folder, "prediction.nii.gz") - prediction_image = nib.load(prediction_file) - prediction = prediction_image.get_data() - rows.append([dice_coefficient(func(truth), func(prediction)) - for func in masking_functions]) - - df = pd.DataFrame.from_records(rows, columns=header, index=subject_ids) - df.to_csv("./prediction/brats_scores.csv") - - scores = dict() - for index, score in enumerate(df.columns): - values = df.values.T[index] - scores[score] = values[np.isnan(values) == False] - - plt.boxplot(list(scores.values()), labels=list(scores.keys())) - plt.ylabel("Dice Coefficient") - plt.savefig("validation_scores_boxplot.png") - plt.close() - - if os.path.exists("./training.log"): - training_df = pd.read_csv("./training.log").set_index('epoch') - - plt.plot(training_df['loss'].values, label='training loss') - plt.plot(training_df['val_loss'].values, label='validation loss') - plt.ylabel('Loss') - plt.xlabel('Epoch') - plt.xlim((0, len(training_df.index))) - plt.legend(loc='upper right') - plt.savefig('loss_graph.png') - - -if __name__ == "__main__": - main() -import os - -from train import config -from unet3d.prediction import run_validation_cases - - -def main(): - prediction_dir = os.path.abspath("prediction") - run_validation_cases(validation_keys_file=config["validation_file"], - model_file=config["model_file"], - training_modalities=config["training_modalities"], - labels=config["labels"], - hdf5_file=config["data_file"], - output_label_map=True, - output_dir=prediction_dir) - - -if __name__ == "__main__": - main() -""" -Tools for converting, normalizing, and fixing the brats data. -""" - - -import glob -import os -import warnings -import shutil - -import SimpleITK as sitk -import numpy as np -from nipype.interfaces.ants import N4BiasFieldCorrection - -from brats.train import config - - -def append_basename(in_file, append): - dirname, basename = os.path.split(in_file) - base, ext = basename.split(".", 1) - return os.path.join(dirname, base + append + "." + ext) - - -def get_background_mask(in_folder, out_file, truth_name="GlistrBoost_ManuallyCorrected"): - """ - This function computes a common background mask for all of the data in a subject folder. - :param in_folder: a subject folder from the BRATS dataset. - :param out_file: an image containing a mask that is 1 where the image data for that subject contains the background. - :param truth_name: how the truth file is labeled int he subject folder - :return: the path to the out_file - """ - background_image = None - for name in config["all_modalities"] + [truth_name]: - image = sitk.ReadImage(get_image(in_folder, name)) - if background_image: - if name == truth_name and not (image.GetOrigin() == background_image.GetOrigin()): - image.SetOrigin(background_image.GetOrigin()) - background_image = sitk.And(image == 0, background_image) - else: - background_image = image == 0 - sitk.WriteImage(background_image, out_file) - return os.path.abspath(out_file) - - -def convert_image_format(in_file, out_file): - sitk.WriteImage(sitk.ReadImage(in_file), out_file) - return out_file - - -def window_intensities(in_file, out_file, min_percent=1, max_percent=99): - image = sitk.ReadImage(in_file) - image_data = sitk.GetArrayFromImage(image) - out_image = sitk.IntensityWindowing(image, np.percentile(image_data, min_percent), np.percentile(image_data, - max_percent)) - sitk.WriteImage(out_image, out_file) - return os.path.abspath(out_file) - - -def correct_bias(in_file, out_file, image_type=sitk.sitkFloat64): - """ - Corrects the bias using ANTs N4BiasFieldCorrection. If this fails, will then attempt to correct bias using SimpleITK - :param in_file: input file path - :param out_file: output file path - :return: file path to the bias corrected image - """ - correct = N4BiasFieldCorrection() - correct.inputs.input_image = in_file - correct.inputs.output_image = out_file - try: - done = correct.run() - return done.outputs.output_image - except IOError: - warnings.warn(RuntimeWarning("ANTs N4BIasFieldCorrection could not be found." - "Will try using SimpleITK for bias field correction" - " which will take much longer. To fix this problem, add N4BiasFieldCorrection" - " to your PATH system variable. (example: EXPORT PATH=${PATH}:/path/to/ants/bin)")) - input_image = sitk.ReadImage(in_file, image_type) - output_image = sitk.N4BiasFieldCorrection(input_image, input_image > 0) - sitk.WriteImage(output_image, out_file) - return os.path.abspath(out_file) - - -def rescale(in_file, out_file, minimum=0, maximum=20000): - image = sitk.ReadImage(in_file) - sitk.WriteImage(sitk.RescaleIntensity(image, minimum, maximum), out_file) - return os.path.abspath(out_file) - - -def get_image(subject_folder, name): - file_card = os.path.join(subject_folder, "*" + name + ".nii.gz") - try: - return glob.glob(file_card)[0] - except IndexError: - raise RuntimeError("Could not find file matching {}".format(file_card)) - - -def background_to_zero(in_file, background_file, out_file): - sitk.WriteImage(sitk.Mask(sitk.ReadImage(in_file), sitk.ReadImage(background_file, sitk.sitkUInt8) == 0), - out_file) - return out_file - - -def check_origin(in_file, in_file2): - image = sitk.ReadImage(in_file) - image2 = sitk.ReadImage(in_file2) - if not image.GetOrigin() == image2.GetOrigin(): - image.SetOrigin(image2.GetOrigin()) - sitk.WriteImage(image, in_file) - - -def normalize_image(in_file, out_file, bias_correction=True): - if bias_correction: - correct_bias(in_file, out_file) - else: - shutil.copy(in_file, out_file) - return out_file - - -def convert_brats_folder(in_folder, out_folder, truth_name='seg', no_bias_correction_modalities=None): - for name in config["all_modalities"]: - try: - image_file = get_image(in_folder, name) - except RuntimeError as error: - if name == 't1ce': - image_file = get_image(in_folder, 't1Gd') - truth_name = "GlistrBoost_ManuallyCorrected" - else: - raise error - - out_file = os.path.abspath(os.path.join(out_folder, name + ".nii.gz")) - perform_bias_correction = no_bias_correction_modalities and name not in no_bias_correction_modalities - normalize_image(image_file, out_file, - bias_correction=perform_bias_correction) - # copy the truth file - try: - truth_file = get_image(in_folder, truth_name) - except RuntimeError: - truth_file = get_image(in_folder, truth_name.split("_")[0]) - - out_file = os.path.abspath(os.path.join(out_folder, "truth.nii.gz")) - shutil.copy(truth_file, out_file) - check_origin(out_file, get_image(in_folder, config["all_modalities"][0])) - - -def convert_brats_data(brats_folder, out_folder, overwrite=False, no_bias_correction_modalities=("flair",)): - """ - Preprocesses the BRATS data and writes it to a given output folder. Assumes the original folder structure. - :param brats_folder: folder containing the original brats data - :param out_folder: output folder to which the preprocessed data will be written - :param overwrite: set to True in order to redo all the preprocessing - :param no_bias_correction_modalities: performing bias correction could reduce the signal of certain modalities. If - concerned about a reduction in signal for a specific modality, specify by including the given modality in a list - or tuple. - :return: - """ - for subject_folder in glob.glob(os.path.join(brats_folder, "*", "*")): - if os.path.isdir(subject_folder): - subject = os.path.basename(subject_folder) - new_subject_folder = os.path.join(out_folder, os.path.basename(os.path.dirname(subject_folder)), - subject) - if not os.path.exists(new_subject_folder) or overwrite: - if not os.path.exists(new_subject_folder): - os.makedirs(new_subject_folder) - convert_brats_folder(subject_folder, new_subject_folder, - no_bias_correction_modalities=no_bias_correction_modalities) -import os -import glob - -from unet3d.data import write_data_to_file, open_data_file -from unet3d.generator import get_training_and_validation_generators -from unet3d.model import unet_model_3d -from unet3d.training import load_old_model, train_model - - -config = dict() -config["pool_size"] = (2, 2, 2) # pool size for the max pooling operations -# This determines what shape the images will be cropped/resampled to. -config["image_shape"] = (144, 144, 144) -# switch to None to train on the whole image -config["patch_shape"] = (64, 64, 64) -config["labels"] = (1, 2, 4) # the label numbers on the input image -config["n_labels"] = len(config["labels"]) -config["all_modalities"] = ["t1", "t1ce", "flair", "t2"] -# change this if you want to only use some of the modalities -config["training_modalities"] = config["all_modalities"] -config["nb_channels"] = len(config["training_modalities"]) -if "patch_shape" in config and config["patch_shape"] is not None: - config["input_shape"] = tuple( - [config["nb_channels"]] + list(config["patch_shape"])) -else: - config["input_shape"] = tuple( - [config["nb_channels"]] + list(config["image_shape"])) -config["truth_channel"] = config["nb_channels"] -# if False, will use upsampling instead of deconvolution -config["deconvolution"] = True - -config["batch_size"] = 6 -config["validation_batch_size"] = 12 -config["n_epochs"] = 500 # cutoff the training after this many epochs -# learning rate will be reduced after this many epochs if the validation loss is not improving -config["patience"] = 10 -# training will be stopped after this many epochs without the validation loss improving -config["early_stop"] = 50 -config["initial_learning_rate"] = 0.00001 -# factor by which the learning rate will be reduced -config["learning_rate_drop"] = 0.5 -# portion of the data that will be used for training -config["validation_split"] = 0.8 -config["flip"] = False # augments the data by randomly flipping an axis during -# data shape must be a cube. Augments the data by permuting in various directions -config["permute"] = True -config["distort"] = None # switch to None if you want no distortion -config["augment"] = config["flip"] or config["distort"] -# if > 0, during training, validation patches will be overlapping -config["validation_patch_overlap"] = 0 -# randomly offset the first patch index by up to this offset -config["training_patch_start_offset"] = (16, 16, 16) -# if True, then patches without any target will be skipped -config["skip_blank"] = True - -config["data_file"] = os.path.abspath("brats_data.h5") -config["model_file"] = os.path.abspath("tumor_segmentation_model.h5") -config["training_file"] = os.path.abspath("training_ids.pkl") -config["validation_file"] = os.path.abspath("validation_ids.pkl") -# If True, will previous files. If False, will use previously written files. -config["overwrite"] = False - - -def fetch_training_data_files(): - training_data_files = list() - for subject_dir in glob.glob(os.path.join(os.path.dirname(__file__), "data", "preprocessed", "*", "*")): - subject_files = list() - for modality in config["training_modalities"] + ["truth"]: - subject_files.append(os.path.join( - subject_dir, modality + ".nii.gz")) - training_data_files.append(tuple(subject_files)) - return training_data_files - - -def main(overwrite=False): - # convert input images into an hdf5 file - if overwrite or not os.path.exists(config["data_file"]): - training_files = fetch_training_data_files() - - write_data_to_file( - training_files, config["data_file"], image_shape=config["image_shape"]) - data_file_opened = open_data_file(config["data_file"]) - - if not overwrite and os.path.exists(config["model_file"]): - model = load_old_model(config["model_file"]) - else: - # instantiate new model - model = unet_model_3d(input_shape=config["input_shape"], - pool_size=config["pool_size"], - n_labels=config["n_labels"], - initial_learning_rate=config["initial_learning_rate"], - deconvolution=config["deconvolution"]) - - # get training and testing generators - train_generator, validation_generator, n_train_steps, n_validation_steps = get_training_and_validation_generators( - data_file_opened, - batch_size=config["batch_size"], - data_split=config["validation_split"], - overwrite=overwrite, - validation_keys_file=config["validation_file"], - training_keys_file=config["training_file"], - n_labels=config["n_labels"], - labels=config["labels"], - patch_shape=config["patch_shape"], - validation_batch_size=config["validation_batch_size"], - validation_patch_overlap=config["validation_patch_overlap"], - training_patch_start_offset=config["training_patch_start_offset"], - permute=config["permute"], - augment=config["augment"], - skip_blank=config["skip_blank"], - augment_flip=config["flip"], - augment_distortion_factor=config["distort"]) - - # run training - train_model(model=model, - model_file=config["model_file"], - training_generator=train_generator, - validation_generator=validation_generator, - steps_per_epoch=n_train_steps, - validation_steps=n_validation_steps, - initial_learning_rate=config["initial_learning_rate"], - learning_rate_drop=config["learning_rate_drop"], - learning_rate_patience=config["patience"], - early_stopping_patience=config["early_stop"], - n_epochs=config["n_epochs"]) - data_file_opened.close() - - -if __name__ == "__main__": - main(overwrite=config["overwrite"]) -import os -import glob - -from unet3d.data import write_data_to_file, open_data_file -from unet3d.generator import get_training_and_validation_generators -from unet3d.model import isensee2017_model -from unet3d.training import load_old_model, train_model - - -config = dict() -# This determines what shape the images will be cropped/resampled to. -config["image_shape"] = (128, 128, 128) -config["patch_shape"] = None # switch to None to train on the whole image -config["labels"] = (1, 2, 4) # the label numbers on the input image -config["n_base_filters"] = 16 -config["n_labels"] = len(config["labels"]) -config["all_modalities"] = ["t1", "t1ce", "flair", "t2"] -# change this if you want to only use some of the modalities -config["training_modalities"] = config["all_modalities"] -config["nb_channels"] = len(config["training_modalities"]) -if "patch_shape" in config and config["patch_shape"] is not None: - config["input_shape"] = tuple( - [config["nb_channels"]] + list(config["patch_shape"])) -else: - config["input_shape"] = tuple( - [config["nb_channels"]] + list(config["image_shape"])) -config["truth_channel"] = config["nb_channels"] -# if False, will use upsampling instead of deconvolution -config["deconvolution"] = True - -config["batch_size"] = 1 -config["validation_batch_size"] = 2 -config["n_epochs"] = 500 # cutoff the training after this many epochs -# learning rate will be reduced after this many epochs if the validation loss is not improving -config["patience"] = 10 -# training will be stopped after this many epochs without the validation loss improving -config["early_stop"] = 50 -config["initial_learning_rate"] = 5e-4 -# factor by which the learning rate will be reduced -config["learning_rate_drop"] = 0.5 -# portion of the data that will be used for training -config["validation_split"] = 0.8 -config["flip"] = False # augments the data by randomly flipping an axis during -# data shape must be a cube. Augments the data by permuting in various directions -config["permute"] = True -config["distort"] = None # switch to None if you want no distortion -config["augment"] = config["flip"] or config["distort"] -# if > 0, during training, validation patches will be overlapping -config["validation_patch_overlap"] = 0 -# randomly offset the first patch index by up to this offset -config["training_patch_start_offset"] = (16, 16, 16) -# if True, then patches without any target will be skipped -config["skip_blank"] = True - -config["data_file"] = os.path.abspath("brats_data.h5") -config["model_file"] = os.path.abspath("isensee_2017_model.h5") -config["training_file"] = os.path.abspath("isensee_training_ids.pkl") -config["validation_file"] = os.path.abspath("isensee_validation_ids.pkl") -# If True, will previous files. If False, will use previously written files. -config["overwrite"] = False - - -def fetch_training_data_files(return_subject_ids=False): - training_data_files = list() - subject_ids = list() - for subject_dir in glob.glob(os.path.join(os.path.dirname(__file__), "data", "preprocessed", "*", "*")): - subject_ids.append(os.path.basename(subject_dir)) - subject_files = list() - for modality in config["training_modalities"] + ["truth"]: - subject_files.append(os.path.join( - subject_dir, modality + ".nii.gz")) - training_data_files.append(tuple(subject_files)) - if return_subject_ids: - return training_data_files, subject_ids - else: - return training_data_files - - -def main(overwrite=False): - # convert input images into an hdf5 file - if overwrite or not os.path.exists(config["data_file"]): - training_files, subject_ids = fetch_training_data_files( - return_subject_ids=True) - - write_data_to_file(training_files, config["data_file"], image_shape=config["image_shape"], - subject_ids=subject_ids) - data_file_opened = open_data_file(config["data_file"]) - - if not overwrite and os.path.exists(config["model_file"]): - model = load_old_model(config["model_file"]) - else: - # instantiate new model - model = isensee2017_model(input_shape=config["input_shape"], n_labels=config["n_labels"], - initial_learning_rate=config["initial_learning_rate"], - n_base_filters=config["n_base_filters"]) - - # get training and testing generators - train_generator, validation_generator, n_train_steps, n_validation_steps = get_training_and_validation_generators( - data_file_opened, - batch_size=config["batch_size"], - data_split=config["validation_split"], - overwrite=overwrite, - validation_keys_file=config["validation_file"], - training_keys_file=config["training_file"], - n_labels=config["n_labels"], - labels=config["labels"], - patch_shape=config["patch_shape"], - validation_batch_size=config["validation_batch_size"], - validation_patch_overlap=config["validation_patch_overlap"], - training_patch_start_offset=config["training_patch_start_offset"], - permute=config["permute"], - augment=config["augment"], - skip_blank=config["skip_blank"], - augment_flip=config["flip"], - augment_distortion_factor=config["distort"]) - - # run training - train_model(model=model, - model_file=config["model_file"], - training_generator=train_generator, - validation_generator=validation_generator, - steps_per_epoch=n_train_steps, - validation_steps=n_validation_steps, - initial_learning_rate=config["initial_learning_rate"], - learning_rate_drop=config["learning_rate_drop"], - learning_rate_patience=config["patience"], - early_stopping_patience=config["early_stop"], - n_epochs=config["n_epochs"]) - data_file_opened.close() - - -if __name__ == "__main__": - main(overwrite=config["overwrite"]) -from unittest import TestCase -import os -import shutil -import sys -import nibabel as nib -import numpy as np - - -def create_blank_image(filename, image_shape=(140, 140, 140)): - data = np.zeros(image_shape, dtype=np.int16) - affine = np.diag(np.ones(4)) - image = nib.Nifti1Image(dataobj=data, affine=affine) - image.to_filename(filename) - - -class TestPreprocess(TestCase): - def setUp(self): - self.temp_brats_dir = os.path.abspath("temp_brats") - os.makedirs(self.temp_brats_dir) - sys.path.append('../brats') - self.preprocessed_dir = os.path.abspath("temp_preprocessed_brats") - - def tearDown(self): - for directory in (self.temp_brats_dir, self.preprocessed_dir): - if os.path.exists(directory): - shutil.rmtree(directory) - - def create_replica_dataset(self, subject_ids, scan_types, directory_name): - gbm_dir = os.path.join(self.temp_brats_dir, directory_name) - for subject_id in subject_ids: - subject_dir = os.path.join(gbm_dir, subject_id) - os.makedirs(subject_dir) - for scan_label in scan_types: - basename = '{}_{}.nii.gz'.format(subject_id, scan_label) - scan_filename = os.path.join(subject_dir, basename) - create_blank_image(scan_filename) - - def create_replica_dataset_pre2018(self): - self.create_replica_dataset(subject_ids=('TCGA-00-000',), - scan_types=('flair', - 'GlistrBoost', - 'GlistrBoost_ManuallyCorrected', - 't1', - 't1Gd', - 't2'), - directory_name='Pre-operative_TCGA_GBM_NIfTI_and_Segmentations') - self.create_replica_dataset(subject_ids=('TCGA-01-000',), - scan_types=('flair', - 'GlistrBoost', - 't1', - 't1Gd', - 't2'), - directory_name='Pre-operative_TCGA_GBM_NIfTI_and_Segmentations') - - def create_replica_dataset_2018(self): - self.create_replica_dataset(subject_ids=('Brats18_1900_1_1',), - scan_types=('flair', - 't1', - 't1ce', - 't2', - 'seg'), - directory_name='HGG') - - def test_preprocess_pre2018(self): - from preprocess import convert_brats_data - self.create_replica_dataset_pre2018() - convert_brats_data(self.temp_brats_dir, self.preprocessed_dir) - - def test_preprocess_2018(self): - self.create_replica_dataset_2018() - from preprocess import convert_brats_data - convert_brats_data(self.temp_brats_dir, self.preprocessed_dir) -import os -from unittest import TestCase - -import numpy as np - -from unet3d.data import add_data_to_storage, create_data_file -from unet3d.generator import get_multi_class_labels, get_training_and_validation_generators -from unet3d.augment import generate_permutation_keys, permute_data, reverse_permute_data - - -class TestDataGenerator(TestCase): - def setUp(self): - self.tmp_files = list() - self.data_file = None - - def tearDown(self): - if self.data_file: - self.data_file.close() - self.rm_tmp_files() - - def create_data_file(self, n_samples=20, len_x=5, len_y=5, len_z=10, n_channels=1): - self.data_file_path = "./temporary_data_test_file.h5" - self.training_keys_file = "./temporary_training_keys_file.pkl" - self.validation_keys_file = "./temporary_validation_keys_file.pkl" - self.tmp_files = [self.data_file_path, - self.training_keys_file, self.validation_keys_file] - - self.rm_tmp_files() - - self.n_samples = n_samples - self.n_channels = n_channels - self.n_labels = 1 - - image_shape = (len_x, len_y, len_z) - data_size = self.n_samples * self.n_channels * len_x * len_y * len_z - data = np.asarray(np.arange(data_size).reshape((self.n_samples, self.n_channels, len_x, len_y, len_z)), - dtype=np.int16) - self.assertEqual(data.shape[-3:], image_shape) - truth = (data[:, 0] == 3).astype(np.int8).reshape( - data.shape[0], 1, data.shape[2], data.shape[3], data.shape[4]) - affine = np.diag(np.ones(4)) - affine[:, -1] = 1 - self.data_file, data_storage, truth_storage, affine_storage = create_data_file(self.data_file_path, - self.n_channels, self.n_samples, - image_shape) - - for index in range(self.n_samples): - add_data_to_storage(data_storage, truth_storage, affine_storage, - np.concatenate([data[index], truth[index]], axis=0), affine=affine, - n_channels=self.n_channels, - truth_dtype=np.int16) - self.assertTrue(np.all(data_storage[index] == data[index])) - self.assertTrue(np.all(truth_storage[index] == truth[index])) - - def rm_tmp_files(self): - for tmp_file in self.tmp_files: - if os.path.exists(tmp_file): - os.remove(tmp_file) - - def test_multi_class_labels(self): - n_labels = 5 - labels = np.arange(1, n_labels+1) - x_dim = 3 - label_map = np.asarray([[[np.arange(n_labels+1)] * x_dim]]) - binary_labels = get_multi_class_labels(label_map, n_labels, labels) - - for label in labels: - self.assertTrue( - np.all(binary_labels[:, label - 1][label_map[:, 0] == label] == 1)) - - def test_get_training_and_validation_generators(self): - self.create_data_file() - - validation_split = 0.8 - batch_size = 3 - validation_batch_size = 3 - - generators = get_training_and_validation_generators(data_file=self.data_file, - batch_size=batch_size, - n_labels=self.n_labels, - training_keys_file=self.training_keys_file, - validation_keys_file=self.validation_keys_file, - data_split=validation_split, - validation_batch_size=validation_batch_size, - skip_blank=False) - training_generator, validation_generator, n_training_steps, n_validation_steps = generators - - self.verify_generator(training_generator, n_training_steps, batch_size, - np.round(validation_split * self.n_samples)) - - self.verify_generator(validation_generator, n_validation_steps, validation_batch_size, - np.round((1 - validation_split) * self.n_samples)) - - self.data_file.close() - self.rm_tmp_files() - - def verify_generator(self, generator, steps, batch_size, expected_samples): - # check that the generator covers all the samples - n_validation_samples = 0 - validation_samples = list() - for i in range(steps): - x, y = next(generator) - hash_x = hash(str(x)) - self.assertNotIn(hash_x, validation_samples) - validation_samples.append(hash_x) - n_validation_samples += x.shape[0] - if i + 1 != steps: - self.assertEqual(x.shape[0], batch_size) - self.assertEqual(n_validation_samples, expected_samples) - - def test_patch_generators(self): - self.create_data_file(len_x=4, len_y=4, len_z=4) - - validation_split = 0.8 - batch_size = 10 - validation_batch_size = 3 - patch_shape = (2, 2, 2) - - generators = get_training_and_validation_generators(self.data_file, batch_size, self.n_labels, - self.training_keys_file, self.validation_keys_file, - data_split=validation_split, - validation_batch_size=validation_batch_size, - patch_shape=patch_shape, - skip_blank=False) - training_generator, validation_generator, n_training_steps, n_validation_steps = generators - - expected_training_samples = int( - np.round(self.n_samples * validation_split)) * 2**3 - - self.verify_generator( - training_generator, n_training_steps, batch_size, expected_training_samples) - - expected_validation_samples = int( - np.round(self.n_samples * (1 - validation_split))) * 2**3 - - self.verify_generator(validation_generator, n_validation_steps, validation_batch_size, - expected_validation_samples) - - self.data_file.close() - self.rm_tmp_files() - - def test_random_patch_start(self): - self.create_data_file(len_x=10, len_y=10, len_z=10) - - validation_split = 0.8 - batch_size = 10 - validation_batch_size = 3 - patch_shape = (5, 5, 5) - random_start = (3, 3, 3) - overlap = 2 - - generators = get_training_and_validation_generators(self.data_file, batch_size, self.n_labels, - self.training_keys_file, self.validation_keys_file, - data_split=validation_split, - validation_batch_size=validation_batch_size, - patch_shape=patch_shape, - training_patch_start_offset=random_start, - validation_patch_overlap=overlap, - skip_blank=False) - - training_generator, validation_generator, n_training_steps, n_validation_steps = generators - - expected_training_samples = int( - np.round(self.n_samples * validation_split)) * 2**3 - - self.verify_generator( - training_generator, n_training_steps, batch_size, expected_training_samples) - - expected_validation_samples = int( - np.round(self.n_samples * (1 - validation_split))) * 4**3 - - self.verify_generator(validation_generator, n_validation_steps, validation_batch_size, - expected_validation_samples) - - self.data_file.close() - self.rm_tmp_files() - - def test_unique_permutations(self): - permutations = list() - shape = (2, 3, 3, 3) - data = np.arange(54).reshape(shape) - for key in generate_permutation_keys(): - permutations.append(permute_data(data, key)) - for array in permutations[:-1]: - self.assertTrue(permutations[-1].shape == shape) - self.assertFalse(np.all(array == permutations[-1])) - self.assertEqual(np.sum(data), np.sum(permutations[-1])) - - def test_n_permutations(self): - self.assertEqual(len(generate_permutation_keys()), 48) - - def test_generator_with_permutations(self): - self.create_data_file(len_x=5, len_y=5, len_z=5, n_channels=5) - batch_size = 2 - generators = get_training_and_validation_generators(self.data_file, batch_size, self.n_labels, - self.training_keys_file, self.validation_keys_file, - permute=True) - training_generator, validation_generator, n_training_steps, n_validation_steps = generators - - _ = next(training_generator) - - self.rm_tmp_files() - - def test_reverse_permutation(self): - data_shape = (4, 32, 32, 32) - data = np.arange(np.prod(data_shape)).reshape(data_shape) - for permutation_key in generate_permutation_keys(): - permuted_data = permute_data(data, permutation_key) - reversed_permutation = reverse_permute_data( - permuted_data, permutation_key) - self.assertTrue(np.all(data == reversed_permutation)) -from unittest import TestCase - -import numpy as np -import keras.backend as K - - -from unet3d.metrics import weighted_dice_coefficient - - -class TestWeightedDice(TestCase): - def test_weighted_dice_coefficient(self): - data = np.zeros((5**3) * 3).reshape(3, 5, 5, 5) - data[0, 0:1] = 1 - data[1, 0:2] = 1 - data[2, 1:4] = 1 - - max_dice = K.eval(weighted_dice_coefficient( - K.variable(data), K.variable(data))) - for index in range(data.shape[0]): - temp_data = np.copy(data) - temp_data[index] = 0 - dice = K.eval(weighted_dice_coefficient( - K.variable(data), K.variable(temp_data))) - self.assertAlmostEqual(dice, (2 * max_dice)/3, delta=0.00001) - - def test_blank_dice_coefficient(self): - data = np.zeros((5**3) * 3).reshape(3, 5, 5, 5) - blank = np.copy(data) - data[0, 0:1] = 1 - data[1, 0:2] = 1 - data[2, 1:4] = 1 - - self.assertAlmostEqual(K.eval(weighted_dice_coefficient( - K.variable(data), K.variable(blank))), 0, delta=0.00001) - - def test_empty_label(self): - data = np.zeros((5**3) * 3).reshape(3, 5, 5, 5) - data[1, 0:2] = 1 - data[2, 1:4] = 1 - - self.assertEqual(K.eval(weighted_dice_coefficient( - K.variable(data), K.variable(data))), 1) -from unittest import TestCase - -from unet3d.model import unet_model_3d - - -class TestModel(TestCase): - def test_batch_normalization(self): - model = unet_model_3d(input_shape=(1, 16, 16, 16), depth=2, deconvolution=True, metrics=[], n_labels=1, - batch_normalization=True) - - layer_names = [layer.name for layer in model.layers] - - for name in layer_names[:-3]: # exclude the last convolution layer - if 'conv3d' in name and 'transpose' not in name: - self.assertIn(name.replace( - 'conv3d', 'batch_normalization'), layer_names) -import nibabel as nib -import numpy as np - -from unittest import TestCase - -from unet3d.utils.patches import compute_patch_indices, get_patch_from_3d_data, reconstruct_from_patches - - -class TestPrediction(TestCase): - def setUp(self): - image_shape = (120, 144, 90) - data = np.arange( - 0, image_shape[0]*image_shape[1]*image_shape[2]).reshape(image_shape) - affine = np.diag(np.ones(4)) - self.image = nib.Nifti1Image(data, affine) - - def test_reconstruct_from_patches(self): - patch_shape = (32, 32, 32) - patch_overlap = 0 - patch_indices = compute_patch_indices( - self.image.shape, patch_shape, patch_overlap) - patches = [get_patch_from_3d_data( - self.image.get_data(), patch_shape, index) for index in patch_indices] - reconstruced_data = reconstruct_from_patches( - patches, patch_indices, self.image.shape) - # noinspection PyTypeChecker - self.assertTrue(np.all(self.image.get_data() == reconstruced_data)) - - def test_reconstruct_with_overlapping_patches(self): - patch_overlap = 0 - patch_shape = (32, 32, 32) - patch_indices = compute_patch_indices( - self.image.shape, patch_shape, patch_overlap) - patches = [get_patch_from_3d_data( - self.image.get_data(), patch_shape, index) for index in patch_indices] - # extend patches with modified patches that are 2 lower than the original patches - patches.extend([patch - 2 for patch in patches]) - patch_indices = np.concatenate([patch_indices, patch_indices], axis=0) - reconstruced_data = reconstruct_from_patches( - patches, patch_indices, self.image.shape) - # The reconstructed data should be 1 lower than the original data as 2 was subtracted from half the patches. - # The resulting reconstruction should be the average. - # noinspection PyTypeChecker - self.assertTrue( - np.all((self.image.get_data() - 1) == reconstruced_data)) - - def test_reconstruct_with_overlapping_patches2(self): - image_shape = (144, 144, 144) - data = np.arange( - 0, image_shape[0]*image_shape[1]*image_shape[2]).reshape(image_shape) - patch_overlap = 16 - patch_shape = (64, 64, 64) - patch_indices = compute_patch_indices( - data.shape, patch_shape, patch_overlap) - patches = [get_patch_from_3d_data( - data, patch_shape, index) for index in patch_indices] - - no_overlap_indices = compute_patch_indices(data.shape, patch_shape, 32) - patch_indices = np.concatenate([patch_indices, no_overlap_indices]) - patches.extend([get_patch_from_3d_data(data, patch_shape, index) - for index in no_overlap_indices]) - reconstruced_data = reconstruct_from_patches( - patches, patch_indices, data.shape) - # noinspection PyTypeChecker - self.assertTrue(np.all(data == reconstruced_data)) - - def test_reconstruct_with_multiple_channels(self): - image_shape = (144, 144, 144) - n_channels = 4 - data = np.arange(0, image_shape[0]*image_shape[1]*image_shape[2]*n_channels).reshape( - [n_channels] + list(image_shape)) - patch_overlap = 16 - patch_shape = (64, 64, 64) - patch_indices = compute_patch_indices( - image_shape, patch_shape, patch_overlap) - patches = [get_patch_from_3d_data( - data, patch_shape, index) for index in patch_indices] - self.assertEqual(patches[0].shape, tuple([4] + list(patch_shape))) - - reconstruced_data = reconstruct_from_patches( - patches, patch_indices, data.shape) - # noinspection PyTypeChecker - self.assertTrue(np.all(data == reconstruced_data)) -from unittest import TestCase - -from keras.callbacks import ReduceLROnPlateau, EarlyStopping - -from unet3d.training import get_callbacks - - -class TestCallbakcs(TestCase): - def test_reduce_on_plateau(self): - _, _, scheduler = get_callbacks( - model_file='model.h5', learning_rate_patience=50, learning_rate_drop=0.5) - self.assertIsInstance(scheduler, ReduceLROnPlateau) - - def test_early_stopping(self): - _, _, _, stopper = get_callbacks( - model_file='model.h5', early_stopping_patience=100) - self.assertIsInstance(stopper, EarlyStopping) -from unittest import TestCase - -import nibabel as nib -import numpy as np - -from unet3d.utils.utils import resize -from unet3d.utils.sitk_utils import resample_to_spacing - - -class TestUtils(TestCase): - def _resize_image_test(self, image, target_shape): - original_image_shape = image.shape - new_image = resize(image, target_shape) - self.assertEqual(new_image.shape, target_shape) - new_image = resize(new_image, original_image_shape, - interpolation="linear") - self.assertEqual(new_image.shape, original_image_shape) - - def _create_image(self, image_shape): - data = np.asarray(np.arange(np.prod(image_shape)).reshape( - image_shape), dtype=np.float) - affine = np.zeros((4, 4)) - np.fill_diagonal(affine, 1) - return nib.Nifti1Image(data, affine) - - def test_resize_image_1(self): - image_shape = (4, 4, 4) - image = self._create_image(image_shape) - new_size = (2, 2, 2) - self._resize_image_test(image, new_size) - - def test_resize_image_2(self): - self._resize_image_test(self._create_image((12, 10, 8)), (8, 8, 8)) - - def test_resize_image_2d(self): - data = np.arange(1, 5).reshape((2, 2)) - new_data = resample_to_spacing( - data, (2, 2), (1, 1), interpolation="nearest") - self.assertTrue(np.all(new_data == np.asarray([[1, 1, 2, 2], - [1, 1, 2, 2], - [3, 3, 4, 4], - [3, 3, 4, 4]]))) - orig_data = resample_to_spacing( - new_data, (1, 1), (2, 2), interpolation="linear") - self.assertTrue(np.all(data == orig_data)) - - def test_resize_image_3(self): - self._resize_image_test(self._create_image((2, 5, 3)), (7, 5, 11)) - - def test_resize_image_3d(self): - data = np.arange(1, 9).reshape((2, 2, 2)) - new_data = resample_to_spacing( - data, (2, 2, 2), (1, 1, 1), interpolation="nearest") - self.assertTrue(np.all(new_data[0] == np.asarray([[1, 1, 2, 2], - [1, 1, 2, 2], - [3, 3, 4, 4], - [3, 3, 4, 4]]))) - orig_data = resample_to_spacing( - new_data, (1, 1, 1), (2, 2, 2), interpolation="linear") - self.assertTrue(np.all(data == orig_data)) - - def test_images_align(self): - data = np.arange(1, 9).reshape((2, 2, 2)) - affine = np.diag(np.ones(4) * 2) - affine[3, 3] = 1 - image_nib = nib.Nifti1Image(data, affine=affine) - new_image_nib = resize(image_nib, (4, 4, 4), interpolation="nearest") - self.assertTrue(np.all(new_image_nib.get_data()[0] == np.asarray([[1, 1, 2, 2], - [1, 1, - 2, 2], - [3, 3, - 4, 4], - [3, 3, 4, 4]]))) - self.assertTrue(np.all(new_image_nib.affine == np.asarray([[1., 0., 0., -0.5], - [0., 1., - 0., -0.5], - [0., 0., - 1., -0.5], - [0., 0., 0., 1.]]))) -import numpy as np -import nibabel as nib -from nilearn.image import new_img_like, resample_to_img -import random -import itertools - - -def scale_image(image, scale_factor): - scale_factor = np.asarray(scale_factor) - new_affine = np.copy(image.affine) - new_affine[:3, :3] = image.affine[:3, :3] * scale_factor - new_affine[:, 3][:3] = image.affine[:, 3][:3] + \ - (image.shape * np.diag(image.affine)[:3] * (1 - scale_factor)) / 2 - return new_img_like(image, data=image.get_data(), affine=new_affine) - - -def flip_image(image, axis): - try: - new_data = np.copy(image.get_data()) - for axis_index in axis: - new_data = np.flip(new_data, axis=axis_index) - except TypeError: - new_data = np.flip(image.get_data(), axis=axis) - return new_img_like(image, data=new_data) - - -def random_flip_dimensions(n_dimensions): - axis = list() - for dim in range(n_dimensions): - if random_boolean(): - axis.append(dim) - return axis - - -def random_scale_factor(n_dim=3, mean=1, std=0.25): - return np.random.normal(mean, std, n_dim) - - -def random_boolean(): - return np.random.choice([True, False]) - - -def distort_image(image, flip_axis=None, scale_factor=None): - if flip_axis: - image = flip_image(image, flip_axis) - if scale_factor is not None: - image = scale_image(image, scale_factor) - return image - - -def augment_data(data, truth, affine, scale_deviation=None, flip=True): - n_dim = len(truth.shape) - if scale_deviation: - scale_factor = random_scale_factor(n_dim, std=scale_deviation) - else: - scale_factor = None - if flip: - flip_axis = random_flip_dimensions(n_dim) - else: - flip_axis = None - data_list = list() - for data_index in range(data.shape[0]): - image = get_image(data[data_index], affine) - data_list.append(resample_to_img(distort_image(image, flip_axis=flip_axis, - scale_factor=scale_factor), image, - interpolation="continuous").get_data()) - data = np.asarray(data_list) - truth_image = get_image(truth, affine) - truth_data = resample_to_img(distort_image(truth_image, flip_axis=flip_axis, scale_factor=scale_factor), - truth_image, interpolation="nearest").get_data() - return data, truth_data - - -def get_image(data, affine, nib_class=nib.Nifti1Image): - return nib_class(dataobj=data, affine=affine) - - -def generate_permutation_keys(): - """ - This function returns a set of "keys" that represent the 48 unique rotations & - reflections of a 3D matrix. - - Each item of the set is a tuple: - ((rotate_y, rotate_z), flip_x, flip_y, flip_z, transpose) - - As an example, ((0, 1), 0, 1, 0, 1) represents a permutation in which the data is - rotated 90 degrees around the z-axis, then reversed on the y-axis, and then - transposed. - - 48 unique rotations & reflections: - https://en.wikipedia.org/wiki/Octahedral_symmetry#The_isometries_of_the_cube - """ - return set(itertools.product( - itertools.combinations_with_replacement(range(2), 2), range(2), range(2), range(2), range(2))) - - -def random_permutation_key(): - """ - Generates and randomly selects a permutation key. See the documentation for the - "generate_permutation_keys" function. - """ - return random.choice(list(generate_permutation_keys())) - - -def permute_data(data, key): - """ - Permutes the given data according to the specification of the given key. Input data - must be of shape (n_modalities, x, y, z). - - Input key is a tuple: (rotate_y, rotate_z), flip_x, flip_y, flip_z, transpose) - - As an example, ((0, 1), 0, 1, 0, 1) represents a permutation in which the data is - rotated 90 degrees around the z-axis, then reversed on the y-axis, and then - transposed. - """ - data = np.copy(data) - (rotate_y, rotate_z), flip_x, flip_y, flip_z, transpose = key - - if rotate_y != 0: - data = np.rot90(data, rotate_y, axes=(1, 3)) - if rotate_z != 0: - data = np.rot90(data, rotate_z, axes=(2, 3)) - if flip_x: - data = data[:, ::-1] - if flip_y: - data = data[:, :, ::-1] - if flip_z: - data = data[:, :, :, ::-1] - if transpose: - for i in range(data.shape[0]): - data[i] = data[i].T - return data - - -def random_permutation_x_y(x_data, y_data): - """ - Performs random permutation on the data. - :param x_data: numpy array containing the data. Data must be of shape (n_modalities, x, y, z). - :param y_data: numpy array containing the data. Data must be of shape (n_modalities, x, y, z). - :return: the permuted data - """ - key = random_permutation_key() - return permute_data(x_data, key), permute_data(y_data, key) - - -def reverse_permute_data(data, key): - key = reverse_permutation_key(key) - data = np.copy(data) - (rotate_y, rotate_z), flip_x, flip_y, flip_z, transpose = key - - if transpose: - for i in range(data.shape[0]): - data[i] = data[i].T - if flip_z: - data = data[:, :, :, ::-1] - if flip_y: - data = data[:, :, ::-1] - if flip_x: - data = data[:, ::-1] - if rotate_z != 0: - data = np.rot90(data, rotate_z, axes=(2, 3)) - if rotate_y != 0: - data = np.rot90(data, rotate_y, axes=(1, 3)) - return data - - -def reverse_permutation_key(key): - rotation = tuple([-rotate for rotate in key[0]]) - return rotation, key[1], key[2], key[3], key[4] -import os - -import numpy as np -import tables - -from .normalize import normalize_data_storage, reslice_image_set - - -def create_data_file(out_file, n_channels, n_samples, image_shape): - hdf5_file = tables.open_file(out_file, mode='w') - filters = tables.Filters(complevel=5, complib='blosc') - data_shape = tuple([0, n_channels] + list(image_shape)) - truth_shape = tuple([0, 1] + list(image_shape)) - data_storage = hdf5_file.create_earray(hdf5_file.root, 'data', tables.Float32Atom(), shape=data_shape, - filters=filters, expectedrows=n_samples) - truth_storage = hdf5_file.create_earray(hdf5_file.root, 'truth', tables.UInt8Atom(), shape=truth_shape, - filters=filters, expectedrows=n_samples) - affine_storage = hdf5_file.create_earray(hdf5_file.root, 'affine', tables.Float32Atom(), shape=(0, 4, 4), - filters=filters, expectedrows=n_samples) - return hdf5_file, data_storage, truth_storage, affine_storage - - -def write_image_data_to_file(image_files, data_storage, truth_storage, image_shape, n_channels, affine_storage, - truth_dtype=np.uint8, crop=True): - for set_of_files in image_files: - images = reslice_image_set( - set_of_files, image_shape, label_indices=len(set_of_files) - 1, crop=crop) - subject_data = [image.get_data() for image in images] - add_data_to_storage(data_storage, truth_storage, affine_storage, subject_data, images[0].affine, n_channels, - truth_dtype) - return data_storage, truth_storage - - -def add_data_to_storage(data_storage, truth_storage, affine_storage, subject_data, affine, n_channels, truth_dtype): - data_storage.append(np.asarray(subject_data[:n_channels])[np.newaxis]) - truth_storage.append(np.asarray(subject_data[n_channels], dtype=truth_dtype)[ - np.newaxis][np.newaxis]) - affine_storage.append(np.asarray(affine)[np.newaxis]) - - -def write_data_to_file(training_data_files, out_file, image_shape, truth_dtype=np.uint8, subject_ids=None, - normalize=True, crop=True): - """ - Takes in a set of training images and writes those images to an hdf5 file. - :param training_data_files: List of tuples containing the training data files. The modalities should be listed in - the same order in each tuple. The last item in each tuple must be the labeled image. - Example: [('sub1-T1.nii.gz', 'sub1-T2.nii.gz', 'sub1-truth.nii.gz'), - ('sub2-T1.nii.gz', 'sub2-T2.nii.gz', 'sub2-truth.nii.gz')] - :param out_file: Where the hdf5 file will be written to. - :param image_shape: Shape of the images that will be saved to the hdf5 file. - :param truth_dtype: Default is 8-bit unsigned integer. - :return: Location of the hdf5 file with the image data written to it. - """ - n_samples = len(training_data_files) - n_channels = len(training_data_files[0]) - 1 - - try: - hdf5_file, data_storage, truth_storage, affine_storage = create_data_file(out_file, - n_channels=n_channels, - n_samples=n_samples, - image_shape=image_shape) - except Exception as e: - # If something goes wrong, delete the incomplete data file - os.remove(out_file) - raise e - - write_image_data_to_file(training_data_files, data_storage, truth_storage, image_shape, - truth_dtype=truth_dtype, n_channels=n_channels, affine_storage=affine_storage, crop=crop) - if subject_ids: - hdf5_file.create_array(hdf5_file.root, 'subject_ids', obj=subject_ids) - if normalize: - normalize_data_storage(data_storage) - hdf5_file.close() - return out_file - - -def open_data_file(filename, readwrite="r"): - return tables.open_file(filename, readwrite) -import os -import copy -from random import shuffle -import itertools - -import numpy as np - -from .utils import pickle_dump, pickle_load -from .utils.patches import compute_patch_indices, get_random_nd_index, get_patch_from_3d_data -from .augment import augment_data, random_permutation_x_y - - -def get_training_and_validation_generators(data_file, batch_size, n_labels, training_keys_file, validation_keys_file, - data_split=0.8, overwrite=False, labels=None, augment=False, - augment_flip=True, augment_distortion_factor=0.25, patch_shape=None, - validation_patch_overlap=0, training_patch_start_offset=None, - validation_batch_size=None, skip_blank=True, permute=False): - """ - Creates the training and validation generators that can be used when training the model. - :param skip_blank: If True, any blank (all-zero) label images/patches will be skipped by the data generator. - :param validation_batch_size: Batch size for the validation data. - :param training_patch_start_offset: Tuple of length 3 containing integer values. Training data will randomly be - offset by a number of pixels between (0, 0, 0) and the given tuple. (default is None) - :param validation_patch_overlap: Number of pixels/voxels that will be overlapped in the validation data. (requires - patch_shape to not be None) - :param patch_shape: Shape of the data to return with the generator. If None, the whole image will be returned. - (default is None) - :param augment_flip: if True and augment is True, then the data will be randomly flipped along the x, y and z axis - :param augment_distortion_factor: if augment is True, this determines the standard deviation from the original - that the data will be distorted (in a stretching or shrinking fashion). Set to None, False, or 0 to prevent the - augmentation from distorting the data in this way. - :param augment: If True, training data will be distorted on the fly so as to avoid over-fitting. - :param labels: List or tuple containing the ordered label values in the image files. The length of the list or tuple - should be equal to the n_labels value. - Example: (10, 25, 50) - The data generator would then return binary truth arrays representing the labels 10, 25, and 30 in that order. - :param data_file: hdf5 file to load the data from. - :param batch_size: Size of the batches that the training generator will provide. - :param n_labels: Number of binary labels. - :param training_keys_file: Pickle file where the index locations of the training data will be stored. - :param validation_keys_file: Pickle file where the index locations of the validation data will be stored. - :param data_split: How the training and validation data will be split. 0 means all the data will be used for - validation and none of it will be used for training. 1 means that all the data will be used for training and none - will be used for validation. Default is 0.8 or 80%. - :param overwrite: If set to True, previous files will be overwritten. The default mode is false, so that the - training and validation splits won't be overwritten when rerunning model training. - :param permute: will randomly permute the data (data must be 3D cube) - :return: Training data generator, validation data generator, number of training steps, number of validation steps - """ - if not validation_batch_size: - validation_batch_size = batch_size - - training_list, validation_list = get_validation_split(data_file, - data_split=data_split, - overwrite=overwrite, - training_file=training_keys_file, - validation_file=validation_keys_file) - - training_generator = data_generator(data_file, training_list, - batch_size=batch_size, - n_labels=n_labels, - labels=labels, - augment=augment, - augment_flip=augment_flip, - augment_distortion_factor=augment_distortion_factor, - patch_shape=patch_shape, - patch_overlap=0, - patch_start_offset=training_patch_start_offset, - skip_blank=skip_blank, - permute=permute) - validation_generator = data_generator(data_file, validation_list, - batch_size=validation_batch_size, - n_labels=n_labels, - labels=labels, - patch_shape=patch_shape, - patch_overlap=validation_patch_overlap, - skip_blank=skip_blank) - - # Set the number of training and testing samples per epoch correctly - num_training_steps = get_number_of_steps(get_number_of_patches(data_file, training_list, patch_shape, - skip_blank=skip_blank, - patch_start_offset=training_patch_start_offset, - patch_overlap=0), batch_size) - print("Number of training steps: ", num_training_steps) - - num_validation_steps = get_number_of_steps(get_number_of_patches(data_file, validation_list, patch_shape, - skip_blank=skip_blank, - patch_overlap=validation_patch_overlap), - validation_batch_size) - print("Number of validation steps: ", num_validation_steps) - - return training_generator, validation_generator, num_training_steps, num_validation_steps - - -def get_number_of_steps(n_samples, batch_size): - if n_samples <= batch_size: - return n_samples - elif np.remainder(n_samples, batch_size) == 0: - return n_samples//batch_size - else: - return n_samples//batch_size + 1 - - -def get_validation_split(data_file, training_file, validation_file, data_split=0.8, overwrite=False): - """ - Splits the data into the training and validation indices list. - :param data_file: pytables hdf5 data file - :param training_file: - :param validation_file: - :param data_split: - :param overwrite: - :return: - """ - if overwrite or not os.path.exists(training_file): - print("Creating validation split...") - nb_samples = data_file.root.data.shape[0] - sample_list = list(range(nb_samples)) - training_list, validation_list = split_list( - sample_list, split=data_split) - pickle_dump(training_list, training_file) - pickle_dump(validation_list, validation_file) - return training_list, validation_list - else: - print("Loading previous validation split...") - return pickle_load(training_file), pickle_load(validation_file) - - -def split_list(input_list, split=0.8, shuffle_list=True): - if shuffle_list: - shuffle(input_list) - n_training = int(len(input_list) * split) - training = input_list[:n_training] - testing = input_list[n_training:] - return training, testing - - -def data_generator(data_file, index_list, batch_size=1, n_labels=1, labels=None, augment=False, augment_flip=True, - augment_distortion_factor=0.25, patch_shape=None, patch_overlap=0, patch_start_offset=None, - shuffle_index_list=True, skip_blank=True, permute=False): - orig_index_list = index_list - while True: - x_list = list() - y_list = list() - if patch_shape: - index_list = create_patch_index_list(orig_index_list, data_file.root.data.shape[-3:], patch_shape, - patch_overlap, patch_start_offset) - else: - index_list = copy.copy(orig_index_list) - - if shuffle_index_list: - shuffle(index_list) - while len(index_list) > 0: - index = index_list.pop() - add_data(x_list, y_list, data_file, index, augment=augment, augment_flip=augment_flip, - augment_distortion_factor=augment_distortion_factor, patch_shape=patch_shape, - skip_blank=skip_blank, permute=permute) - if len(x_list) == batch_size or (len(index_list) == 0 and len(x_list) > 0): - yield convert_data(x_list, y_list, n_labels=n_labels, labels=labels) - x_list = list() - y_list = list() - - -def get_number_of_patches(data_file, index_list, patch_shape=None, patch_overlap=0, patch_start_offset=None, - skip_blank=True): - if patch_shape: - index_list = create_patch_index_list(index_list, data_file.root.data.shape[-3:], patch_shape, patch_overlap, - patch_start_offset) - count = 0 - for index in index_list: - x_list = list() - y_list = list() - add_data(x_list, y_list, data_file, index, - skip_blank=skip_blank, patch_shape=patch_shape) - if len(x_list) > 0: - count += 1 - return count - else: - return len(index_list) - - -def create_patch_index_list(index_list, image_shape, patch_shape, patch_overlap, patch_start_offset=None): - patch_index = list() - for index in index_list: - if patch_start_offset is not None: - random_start_offset = np.negative( - get_random_nd_index(patch_start_offset)) - patches = compute_patch_indices( - image_shape, patch_shape, overlap=patch_overlap, start=random_start_offset) - else: - patches = compute_patch_indices( - image_shape, patch_shape, overlap=patch_overlap) - patch_index.extend(itertools.product([index], patches)) - return patch_index - - -def add_data(x_list, y_list, data_file, index, augment=False, augment_flip=False, augment_distortion_factor=0.25, - patch_shape=False, skip_blank=True, permute=False): - """ - Adds data from the data file to the given lists of feature and target data - :param skip_blank: Data will not be added if the truth vector is all zeros (default is True). - :param patch_shape: Shape of the patch to add to the data lists. If None, the whole image will be added. - :param x_list: list of data to which data from the data_file will be appended. - :param y_list: list of data to which the target data from the data_file will be appended. - :param data_file: hdf5 data file. - :param index: index of the data file from which to extract the data. - :param augment: if True, data will be augmented according to the other augmentation parameters (augment_flip and - augment_distortion_factor) - :param augment_flip: if True and augment is True, then the data will be randomly flipped along the x, y and z axis - :param augment_distortion_factor: if augment is True, this determines the standard deviation from the original - that the data will be distorted (in a stretching or shrinking fashion). Set to None, False, or 0 to prevent the - augmentation from distorting the data in this way. - :param permute: will randomly permute the data (data must be 3D cube) - :return: - """ - data, truth = get_data_from_file(data_file, index, patch_shape=patch_shape) - if augment: - if patch_shape is not None: - affine = data_file.root.affine[index[0]] - else: - affine = data_file.root.affine[index] - data, truth = augment_data( - data, truth, affine, flip=augment_flip, scale_deviation=augment_distortion_factor) - - if permute: - if data.shape[-3] != data.shape[-2] or data.shape[-2] != data.shape[-1]: - raise ValueError("To utilize permutations, data array must be in 3D cube shape with all dimensions having " - "the same length.") - data, truth = random_permutation_x_y(data, truth[np.newaxis]) - else: - truth = truth[np.newaxis] - - if not skip_blank or np.any(truth != 0): - x_list.append(data) - y_list.append(truth) - - -def get_data_from_file(data_file, index, patch_shape=None): - if patch_shape: - index, patch_index = index - data, truth = get_data_from_file(data_file, index, patch_shape=None) - x = get_patch_from_3d_data(data, patch_shape, patch_index) - y = get_patch_from_3d_data(truth, patch_shape, patch_index) - else: - x, y = data_file.root.data[index], data_file.root.truth[index, 0] - return x, y - - -def convert_data(x_list, y_list, n_labels=1, labels=None): - x = np.asarray(x_list) - y = np.asarray(y_list) - if n_labels == 1: - y[y > 0] = 1 - elif n_labels > 1: - y = get_multi_class_labels(y, n_labels=n_labels, labels=labels) - return x, y - - -def get_multi_class_labels(data, n_labels, labels=None): - """ - Translates a label map into a set of binary labels. - :param data: numpy array containing the label map with shape: (n_samples, 1, ...). - :param n_labels: number of labels. - :param labels: integer values of the labels. - :return: binary numpy array of shape: (n_samples, n_labels, ...) - """ - new_shape = [data.shape[0], n_labels] + list(data.shape[2:]) - y = np.zeros(new_shape, np.int8) - for label_index in range(n_labels): - if labels is not None: - y[:, label_index][data[:, 0] == labels[label_index]] = 1 - else: - y[:, label_index][data[:, 0] == (label_index + 1)] = 1 - return y -from functools import partial - -from keras import backend as K - - -def dice_coefficient(y_true, y_pred, smooth=1.): - y_true_f = K.flatten(y_true) - y_pred_f = K.flatten(y_pred) - intersection = K.sum(y_true_f * y_pred_f) - return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth) - - -def dice_coefficient_loss(y_true, y_pred): - return -dice_coefficient(y_true, y_pred) - - -def weighted_dice_coefficient(y_true, y_pred, axis=(-3, -2, -1), smooth=0.00001): - """ - Weighted dice coefficient. Default axis assumes a "channels first" data structure - :param smooth: - :param y_true: - :param y_pred: - :param axis: - :return: - """ - return K.mean(2. * (K.sum(y_true * y_pred, - axis=axis) + smooth/2)/(K.sum(y_true, - axis=axis) + K.sum(y_pred, - axis=axis) + smooth)) - - -def weighted_dice_coefficient_loss(y_true, y_pred): - return -weighted_dice_coefficient(y_true, y_pred) - - -def label_wise_dice_coefficient(y_true, y_pred, label_index): - return dice_coefficient(y_true[:, label_index], y_pred[:, label_index]) - - -def get_label_dice_coefficient_function(label_index): - f = partial(label_wise_dice_coefficient, label_index=label_index) - f.__setattr__('__name__', 'label_{0}_dice_coef'.format(label_index)) - return f - - -dice_coef = dice_coefficient -dice_coef_loss = dice_coefficient_loss -import os - -import numpy as np -from nilearn.image import new_img_like - -from unet3d.utils.utils import resize, read_image_files -from .utils import crop_img, crop_img_to, read_image - - -def find_downsized_info(training_data_files, input_shape): - foreground = get_complete_foreground(training_data_files) - crop_slices = crop_img(foreground, return_slices=True, copy=True) - cropped = crop_img_to(foreground, crop_slices, copy=True) - final_image = resize(cropped, new_shape=input_shape, - interpolation="nearest") - return crop_slices, final_image.affine, final_image.header - - -def get_cropping_parameters(in_files): - if len(in_files) > 1: - foreground = get_complete_foreground(in_files) - else: - foreground = get_foreground_from_set_of_files( - in_files[0], return_image=True) - return crop_img(foreground, return_slices=True, copy=True) - - -def reslice_image_set(in_files, image_shape, out_files=None, label_indices=None, crop=False): - if crop: - crop_slices = get_cropping_parameters([in_files]) - else: - crop_slices = None - images = read_image_files( - in_files, image_shape=image_shape, crop=crop_slices, label_indices=label_indices) - if out_files: - for image, out_file in zip(images, out_files): - image.to_filename(out_file) - return [os.path.abspath(out_file) for out_file in out_files] - else: - return images - - -def get_complete_foreground(training_data_files): - for i, set_of_files in enumerate(training_data_files): - subject_foreground = get_foreground_from_set_of_files(set_of_files) - if i == 0: - foreground = subject_foreground - else: - foreground[subject_foreground > 0] = 1 - - return new_img_like(read_image(training_data_files[0][-1]), foreground) - - -def get_foreground_from_set_of_files(set_of_files, background_value=0, tolerance=0.00001, return_image=False): - for i, image_file in enumerate(set_of_files): - image = read_image(image_file) - is_foreground = np.logical_or(image.get_data() < (background_value - tolerance), - image.get_data() > (background_value + tolerance)) - if i == 0: - foreground = np.zeros(is_foreground.shape, dtype=np.uint8) - - foreground[is_foreground] = 1 - if return_image: - return new_img_like(image, foreground) - else: - return foreground - - -def normalize_data(data, mean, std): - data -= mean[:, np.newaxis, np.newaxis, np.newaxis] - data /= std[:, np.newaxis, np.newaxis, np.newaxis] - return data - - -def normalize_data_storage(data_storage): - means = list() - stds = list() - for index in range(data_storage.shape[0]): - data = data_storage[index] - means.append(data.mean(axis=(1, 2, 3))) - stds.append(data.std(axis=(1, 2, 3))) - mean = np.asarray(means).mean(axis=0) - std = np.asarray(stds).mean(axis=0) - for index in range(data_storage.shape[0]): - data_storage[index] = normalize_data(data_storage[index], mean, std) - return data_storage -import os - -import nibabel as nib -import numpy as np -import tables - -from .training import load_old_model -from .utils import pickle_load -from .utils.patches import reconstruct_from_patches, get_patch_from_3d_data, compute_patch_indices -from .augment import permute_data, generate_permutation_keys, reverse_permute_data - - -def patch_wise_prediction(model, data, overlap=0, batch_size=1, permute=False): - """ - :param batch_size: - :param model: - :param data: - :param overlap: - :return: - """ - patch_shape = tuple([int(dim) for dim in model.input.shape[-3:]]) - predictions = list() - indices = compute_patch_indices( - data.shape[-3:], patch_size=patch_shape, overlap=overlap) - batch = list() - i = 0 - while i < len(indices): - while len(batch) < batch_size: - patch = get_patch_from_3d_data( - data[0], patch_shape=patch_shape, patch_index=indices[i]) - batch.append(patch) - i += 1 - prediction = predict(model, np.asarray(batch), permute=permute) - batch = list() - for predicted_patch in prediction: - predictions.append(predicted_patch) - output_shape = [int(model.output.shape[1])] + list(data.shape[-3:]) - return reconstruct_from_patches(predictions, patch_indices=indices, data_shape=output_shape) - - -def get_prediction_labels(prediction, threshold=0.5, labels=None): - n_samples = prediction.shape[0] - label_arrays = [] - for sample_number in range(n_samples): - label_data = np.argmax(prediction[sample_number], axis=0) + 1 - label_data[np.max(prediction[sample_number], axis=0) < threshold] = 0 - if labels: - for value in np.unique(label_data).tolist()[1:]: - label_data[label_data == value] = labels[value - 1] - label_arrays.append(np.array(label_data, dtype=np.uint8)) - return label_arrays - - -def get_test_indices(testing_file): - return pickle_load(testing_file) - - -def predict_from_data_file(model, open_data_file, index): - return model.predict(open_data_file.root.data[index]) - - -def predict_and_get_image(model, data, affine): - return nib.Nifti1Image(model.predict(data)[0, 0], affine) - - -def predict_from_data_file_and_get_image(model, open_data_file, index): - return predict_and_get_image(model, open_data_file.root.data[index], open_data_file.root.affine) - - -def predict_from_data_file_and_write_image(model, open_data_file, index, out_file): - image = predict_from_data_file_and_get_image(model, open_data_file, index) - image.to_filename(out_file) - - -def prediction_to_image(prediction, affine, label_map=False, threshold=0.5, labels=None): - if prediction.shape[1] == 1: - data = prediction[0, 0] - if label_map: - label_map_data = np.zeros(prediction[0, 0].shape, np.int8) - if labels: - label = labels[0] - else: - label = 1 - label_map_data[data > threshold] = label - data = label_map_data - elif prediction.shape[1] > 1: - if label_map: - label_map_data = get_prediction_labels( - prediction, threshold=threshold, labels=labels) - data = label_map_data[0] - else: - return multi_class_prediction(prediction, affine) - else: - raise RuntimeError( - "Invalid prediction array shape: {0}".format(prediction.shape)) - return nib.Nifti1Image(data, affine) - - -def multi_class_prediction(prediction, affine): - prediction_images = [] - for i in range(prediction.shape[1]): - prediction_images.append(nib.Nifti1Image(prediction[0, i], affine)) - return prediction_images - - -def run_validation_case(data_index, output_dir, model, data_file, training_modalities, - output_label_map=False, threshold=0.5, labels=None, overlap=16, permute=False): - """ - Runs a test case and writes predicted images to file. - :param data_index: Index from of the list of test cases to get an image prediction from. - :param output_dir: Where to write prediction images. - :param output_label_map: If True, will write out a single image with one or more labels. Otherwise outputs - the (sigmoid) prediction values from the model. - :param threshold: If output_label_map is set to True, this threshold defines the value above which is - considered a positive result and will be assigned a label. - :param labels: - :param training_modalities: - :param data_file: - :param model: - """ - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - affine = data_file.root.affine[data_index] - test_data = np.asarray([data_file.root.data[data_index]]) - for i, modality in enumerate(training_modalities): - image = nib.Nifti1Image(test_data[0, i], affine) - image.to_filename(os.path.join( - output_dir, "data_{0}.nii.gz".format(modality))) - - test_truth = nib.Nifti1Image(data_file.root.truth[data_index][0], affine) - test_truth.to_filename(os.path.join(output_dir, "truth.nii.gz")) - - patch_shape = tuple([int(dim) for dim in model.input.shape[-3:]]) - if patch_shape == test_data.shape[-3:]: - prediction = predict(model, test_data, permute=permute) - else: - prediction = patch_wise_prediction( - model=model, data=test_data, overlap=overlap, permute=permute)[np.newaxis] - prediction_image = prediction_to_image(prediction, affine, label_map=output_label_map, threshold=threshold, - labels=labels) - if isinstance(prediction_image, list): - for i, image in enumerate(prediction_image): - image.to_filename(os.path.join( - output_dir, "prediction_{0}.nii.gz".format(i + 1))) - else: - prediction_image.to_filename( - os.path.join(output_dir, "prediction.nii.gz")) - - -def run_validation_cases(validation_keys_file, model_file, training_modalities, labels, hdf5_file, - output_label_map=False, output_dir=".", threshold=0.5, overlap=16, permute=False): - validation_indices = pickle_load(validation_keys_file) - model = load_old_model(model_file) - data_file = tables.open_file(hdf5_file, "r") - for index in validation_indices: - if 'subject_ids' in data_file.root: - case_directory = os.path.join( - output_dir, data_file.root.subject_ids[index].decode('utf-8')) - else: - case_directory = os.path.join( - output_dir, "validation_case_{}".format(index)) - run_validation_case(data_index=index, output_dir=case_directory, model=model, data_file=data_file, - training_modalities=training_modalities, output_label_map=output_label_map, labels=labels, - threshold=threshold, overlap=overlap, permute=permute) - data_file.close() - - -def predict(model, data, permute=False): - if permute: - predictions = list() - for batch_index in range(data.shape[0]): - predictions.append(predict_with_permutations( - model, data[batch_index])) - return np.asarray(predictions) - else: - return model.predict(data) - - -def predict_with_permutations(model, data): - predictions = list() - for permutation_key in generate_permutation_keys(): - temp_data = permute_data(data, permutation_key)[np.newaxis] - predictions.append(reverse_permute_data( - model.predict(temp_data)[0], permutation_key)) - return np.mean(predictions, axis=0) -import math -from functools import partial - -from keras import backend as K -from keras.callbacks import ModelCheckpoint, CSVLogger, LearningRateScheduler, ReduceLROnPlateau, EarlyStopping -from keras.models import load_model - -from unet3d.metrics import (dice_coefficient, dice_coefficient_loss, dice_coef, dice_coef_loss, - weighted_dice_coefficient_loss, weighted_dice_coefficient) - -K.set_image_dim_ordering('th') - - -# learning rate schedule -def step_decay(epoch, initial_lrate, drop, epochs_drop): - return initial_lrate * math.pow(drop, math.floor((1+epoch)/float(epochs_drop))) - - -def get_callbacks(model_file, initial_learning_rate=0.0001, learning_rate_drop=0.5, learning_rate_epochs=None, - learning_rate_patience=50, logging_file="training.log", verbosity=1, - early_stopping_patience=None): - callbacks = list() - callbacks.append(ModelCheckpoint(model_file, save_best_only=True)) - callbacks.append(CSVLogger(logging_file, append=True)) - if learning_rate_epochs: - callbacks.append(LearningRateScheduler(partial(step_decay, initial_lrate=initial_learning_rate, - drop=learning_rate_drop, epochs_drop=learning_rate_epochs))) - else: - callbacks.append(ReduceLROnPlateau(factor=learning_rate_drop, patience=learning_rate_patience, - verbose=verbosity)) - if early_stopping_patience: - callbacks.append(EarlyStopping(verbose=verbosity, - patience=early_stopping_patience)) - return callbacks - - -def load_old_model(model_file): - print("Loading pre-trained model") - custom_objects = {'dice_coefficient_loss': dice_coefficient_loss, 'dice_coefficient': dice_coefficient, - 'dice_coef': dice_coef, 'dice_coef_loss': dice_coef_loss, - 'weighted_dice_coefficient': weighted_dice_coefficient, - 'weighted_dice_coefficient_loss': weighted_dice_coefficient_loss} - try: - from keras_contrib.layers import InstanceNormalization - custom_objects["InstanceNormalization"] = InstanceNormalization - except ImportError: - pass - try: - return load_model(model_file, custom_objects=custom_objects) - except ValueError as error: - if 'InstanceNormalization' in str(error): - raise ValueError(str(error) + "\n\nPlease install keras-contrib to use InstanceNormalization:\n" - "'pip install git+https://www.github.com/keras-team/keras-contrib.git'") - else: - raise error - - -def train_model(model, model_file, training_generator, validation_generator, steps_per_epoch, validation_steps, - initial_learning_rate=0.001, learning_rate_drop=0.5, learning_rate_epochs=None, n_epochs=500, - learning_rate_patience=20, early_stopping_patience=None): - """ - Train a Keras model. - :param early_stopping_patience: If set, training will end early if the validation loss does not improve after the - specified number of epochs. - :param learning_rate_patience: If learning_rate_epochs is not set, the learning rate will decrease if the validation - loss does not improve after the specified number of epochs. (default is 20) - :param model: Keras model that will be trained. - :param model_file: Where to save the Keras model. - :param training_generator: Generator that iterates through the training data. - :param validation_generator: Generator that iterates through the validation data. - :param steps_per_epoch: Number of batches that the training generator will provide during a given epoch. - :param validation_steps: Number of batches that the validation generator will provide during a given epoch. - :param initial_learning_rate: Learning rate at the beginning of training. - :param learning_rate_drop: How much at which to the learning rate will decay. - :param learning_rate_epochs: Number of epochs after which the learning rate will drop. - :param n_epochs: Total number of epochs to train the model. - :return: - """ - model.fit_generator(generator=training_generator, - steps_per_epoch=steps_per_epoch, - epochs=n_epochs, - validation_data=validation_generator, - validation_steps=validation_steps, - callbacks=get_callbacks(model_file, - initial_learning_rate=initial_learning_rate, - learning_rate_drop=learning_rate_drop, - learning_rate_epochs=learning_rate_epochs, - learning_rate_patience=learning_rate_patience, - early_stopping_patience=early_stopping_patience)) -from .unet import unet_model_3d -from .isensee2017 import isensee2017_model -from functools import partial - -from keras.layers import Input, LeakyReLU, Add, UpSampling3D, Activation, SpatialDropout3D, Conv3D -from keras.engine import Model -from keras.optimizers import Adam - -from .unet import create_convolution_block, concatenate -from ..metrics import weighted_dice_coefficient_loss - - -create_convolution_block = partial( - create_convolution_block, activation=LeakyReLU, instance_normalization=True) - - -def isensee2017_model(input_shape=(4, 128, 128, 128), n_base_filters=16, depth=5, dropout_rate=0.3, - n_segmentation_levels=3, n_labels=4, optimizer=Adam, initial_learning_rate=5e-4, - loss_function=weighted_dice_coefficient_loss, activation_name="sigmoid"): - """ - This function builds a model proposed by Isensee et al. for the BRATS 2017 competition: - https://www.cbica.upenn.edu/sbia/Spyridon.Bakas/MICCAI_BraTS/MICCAI_BraTS_2017_proceedings_shortPapers.pdf - - This network is highly similar to the model proposed by Kayalibay et al. "CNN-based Segmentation of Medical - Imaging Data", 2017: https://arxiv.org/pdf/1701.03056.pdf - - - :param input_shape: - :param n_base_filters: - :param depth: - :param dropout_rate: - :param n_segmentation_levels: - :param n_labels: - :param optimizer: - :param initial_learning_rate: - :param loss_function: - :param activation_name: - :return: - """ - inputs = Input(input_shape) - - current_layer = inputs - level_output_layers = list() - level_filters = list() - for level_number in range(depth): - n_level_filters = (2**level_number) * n_base_filters - level_filters.append(n_level_filters) - - if current_layer is inputs: - in_conv = create_convolution_block(current_layer, n_level_filters) - else: - in_conv = create_convolution_block( - current_layer, n_level_filters, strides=(2, 2, 2)) - - context_output_layer = create_context_module( - in_conv, n_level_filters, dropout_rate=dropout_rate) - - summation_layer = Add()([in_conv, context_output_layer]) - level_output_layers.append(summation_layer) - current_layer = summation_layer - - segmentation_layers = list() - for level_number in range(depth - 2, -1, -1): - up_sampling = create_up_sampling_module( - current_layer, level_filters[level_number]) - concatenation_layer = concatenate( - [level_output_layers[level_number], up_sampling], axis=1) - localization_output = create_localization_module( - concatenation_layer, level_filters[level_number]) - current_layer = localization_output - if level_number < n_segmentation_levels: - segmentation_layers.insert( - 0, Conv3D(n_labels, (1, 1, 1))(current_layer)) - - output_layer = None - for level_number in reversed(range(n_segmentation_levels)): - segmentation_layer = segmentation_layers[level_number] - if output_layer is None: - output_layer = segmentation_layer - else: - output_layer = Add()([output_layer, segmentation_layer]) - - if level_number > 0: - output_layer = UpSampling3D(size=(2, 2, 2))(output_layer) - - activation_block = Activation(activation_name)(output_layer) - - model = Model(inputs=inputs, outputs=activation_block) - model.compile(optimizer=optimizer( - lr=initial_learning_rate), loss=loss_function) - return model - - -def create_localization_module(input_layer, n_filters): - convolution1 = create_convolution_block(input_layer, n_filters) - convolution2 = create_convolution_block( - convolution1, n_filters, kernel=(1, 1, 1)) - return convolution2 - - -def create_up_sampling_module(input_layer, n_filters, size=(2, 2, 2)): - up_sample = UpSampling3D(size=size)(input_layer) - convolution = create_convolution_block(up_sample, n_filters) - return convolution - - -def create_context_module(input_layer, n_level_filters, dropout_rate=0.3, data_format="channels_first"): - convolution1 = create_convolution_block( - input_layer=input_layer, n_filters=n_level_filters) - dropout = SpatialDropout3D( - rate=dropout_rate, data_format=data_format)(convolution1) - convolution2 = create_convolution_block( - input_layer=dropout, n_filters=n_level_filters) - return convolution2 -import numpy as np -from keras import backend as K -from keras.engine import Input, Model -from keras.layers import Conv3D, MaxPooling3D, UpSampling3D, Activation, BatchNormalization, PReLU, Deconvolution3D -from keras.optimizers import Adam - -from unet3d.metrics import dice_coefficient_loss, get_label_dice_coefficient_function, dice_coefficient - -K.set_image_data_format("channels_first") - -try: - from keras.engine import merge -except ImportError: - from keras.layers.merge import concatenate - - -def unet_model_3d(input_shape, pool_size=(2, 2, 2), n_labels=1, initial_learning_rate=0.00001, deconvolution=False, - depth=4, n_base_filters=32, include_label_wise_dice_coefficients=False, metrics=dice_coefficient, - batch_normalization=False, activation_name="sigmoid"): - """ - Builds the 3D UNet Keras model.f - :param metrics: List metrics to be calculated during model training (default is dice coefficient). - :param include_label_wise_dice_coefficients: If True and n_labels is greater than 1, model will report the dice - coefficient for each label as metric. - :param n_base_filters: The number of filters that the first layer in the convolution network will have. Following - layers will contain a multiple of this number. Lowering this number will likely reduce the amount of memory required - to train the model. - :param depth: indicates the depth of the U-shape for the model. The greater the depth, the more max pooling - layers will be added to the model. Lowering the depth may reduce the amount of memory required for training. - :param input_shape: Shape of the input data (n_chanels, x_size, y_size, z_size). The x, y, and z sizes must be - divisible by the pool size to the power of the depth of the UNet, that is pool_size^depth. - :param pool_size: Pool size for the max pooling operations. - :param n_labels: Number of binary labels that the model is learning. - :param initial_learning_rate: Initial learning rate for the model. This will be decayed during training. - :param deconvolution: If set to True, will use transpose convolution(deconvolution) instead of up-sampling. This - increases the amount memory required during training. - :return: Untrained 3D UNet Model - """ - inputs = Input(input_shape) - current_layer = inputs - levels = list() - - # add levels with max pooling - for layer_depth in range(depth): - layer1 = create_convolution_block(input_layer=current_layer, n_filters=n_base_filters*(2**layer_depth), - batch_normalization=batch_normalization) - layer2 = create_convolution_block(input_layer=layer1, n_filters=n_base_filters*(2**layer_depth)*2, - batch_normalization=batch_normalization) - if layer_depth < depth - 1: - current_layer = MaxPooling3D(pool_size=pool_size)(layer2) - levels.append([layer1, layer2, current_layer]) - else: - current_layer = layer2 - levels.append([layer1, layer2]) - - # add levels with up-convolution or up-sampling - for layer_depth in range(depth-2, -1, -1): - up_convolution = get_up_convolution(pool_size=pool_size, deconvolution=deconvolution, - n_filters=current_layer._keras_shape[1])(current_layer) - concat = concatenate([up_convolution, levels[layer_depth][1]], axis=1) - current_layer = create_convolution_block(n_filters=levels[layer_depth][1]._keras_shape[1], - input_layer=concat, batch_normalization=batch_normalization) - current_layer = create_convolution_block(n_filters=levels[layer_depth][1]._keras_shape[1], - input_layer=current_layer, - batch_normalization=batch_normalization) - - final_convolution = Conv3D(n_labels, (1, 1, 1))(current_layer) - act = Activation(activation_name)(final_convolution) - model = Model(inputs=inputs, outputs=act) - - if not isinstance(metrics, list): - metrics = [metrics] - - if include_label_wise_dice_coefficients and n_labels > 1: - label_wise_dice_metrics = [get_label_dice_coefficient_function( - index) for index in range(n_labels)] - if metrics: - metrics = metrics + label_wise_dice_metrics - else: - metrics = label_wise_dice_metrics - - model.compile(optimizer=Adam(lr=initial_learning_rate), - loss=dice_coefficient_loss, metrics=metrics) - return model - - -def create_convolution_block(input_layer, n_filters, batch_normalization=False, kernel=(3, 3, 3), activation=None, - padding='same', strides=(1, 1, 1), instance_normalization=False): - """ - - :param strides: - :param input_layer: - :param n_filters: - :param batch_normalization: - :param kernel: - :param activation: Keras activation layer to use. (default is 'relu') - :param padding: - :return: - """ - layer = Conv3D(n_filters, kernel, padding=padding, - strides=strides)(input_layer) - if batch_normalization: - layer = BatchNormalization(axis=1)(layer) - elif instance_normalization: - try: - from keras_contrib.layers.normalization import InstanceNormalization - except ImportError: - raise ImportError("Install keras_contrib in order to use instance normalization." - "\nTry: pip install git+https://www.github.com/farizrahman4u/keras-contrib.git") - layer = InstanceNormalization(axis=1)(layer) - if activation is None: - return Activation('relu')(layer) - else: - return activation()(layer) - - -def compute_level_output_shape(n_filters, depth, pool_size, image_shape): - """ - Each level has a particular output shape based on the number of filters used in that level and the depth or number - of max pooling operations that have been done on the data at that point. - :param image_shape: shape of the 3d image. - :param pool_size: the pool_size parameter used in the max pooling operation. - :param n_filters: Number of filters used by the last node in a given level. - :param depth: The number of levels down in the U-shaped model a given node is. - :return: 5D vector of the shape of the output node - """ - output_image_shape = np.asarray( - np.divide(image_shape, np.power(pool_size, depth)), dtype=np.int32).tolist() - return tuple([None, n_filters] + output_image_shape) - - -def get_up_convolution(n_filters, pool_size, kernel_size=(2, 2, 2), strides=(2, 2, 2), - deconvolution=False): - if deconvolution: - return Deconvolution3D(filters=n_filters, kernel_size=kernel_size, - strides=strides) - else: - return UpSampling3D(size=pool_size) -from .nilearn_custom_utils.nilearn_utils import crop_img_to, crop_img -from .utils import pickle_dump, pickle_load, read_image -import numpy as np - - -def compute_patch_indices(image_shape, patch_size, overlap, start=None): - if isinstance(overlap, int): - overlap = np.asarray([overlap] * len(image_shape)) - if start is None: - n_patches = np.ceil(image_shape / (patch_size - overlap)) - overflow = (patch_size - overlap) * n_patches - image_shape + overlap - start = -np.ceil(overflow/2) - elif isinstance(start, int): - start = np.asarray([start] * len(image_shape)) - stop = image_shape + start - step = patch_size - overlap - return get_set_of_patch_indices(start, stop, step) - - -def get_set_of_patch_indices(start, stop, step): - return np.asarray(np.mgrid[start[0]:stop[0]:step[0], start[1]:stop[1]:step[1], - start[2]:stop[2]:step[2]].reshape(3, -1).T, dtype=np.int) - - -def get_random_patch_index(image_shape, patch_shape): - """ - Returns a random corner index for a patch. If this is used during training, the middle pixels will be seen by - the model way more often than the edge pixels (which is probably a bad thing). - :param image_shape: Shape of the image - :param patch_shape: Shape of the patch - :return: a tuple containing the corner index which can be used to get a patch from an image - """ - return get_random_nd_index(np.subtract(image_shape, patch_shape)) - - -def get_random_nd_index(index_max): - return tuple([np.random.choice(index_max[index] + 1) for index in range(len(index_max))]) - - -def get_patch_from_3d_data(data, patch_shape, patch_index): - """ - Returns a patch from a numpy array. - :param data: numpy array from which to get the patch. - :param patch_shape: shape/size of the patch. - :param patch_index: corner index of the patch. - :return: numpy array take from the data with the patch shape specified. - """ - patch_index = np.asarray(patch_index, dtype=np.int16) - patch_shape = np.asarray(patch_shape) - image_shape = data.shape[-3:] - if np.any(patch_index < 0) or np.any((patch_index + patch_shape) > image_shape): - data, patch_index = fix_out_of_bound_patch_attempt( - data, patch_shape, patch_index) - return data[..., patch_index[0]:patch_index[0]+patch_shape[0], patch_index[1]:patch_index[1]+patch_shape[1], - patch_index[2]:patch_index[2]+patch_shape[2]] - - -def fix_out_of_bound_patch_attempt(data, patch_shape, patch_index, ndim=3): - """ - Pads the data and alters the patch index so that a patch will be correct. - :param data: - :param patch_shape: - :param patch_index: - :return: padded data, fixed patch index - """ - image_shape = data.shape[-ndim:] - pad_before = np.abs((patch_index < 0) * patch_index) - pad_after = np.abs(((patch_index + patch_shape) > image_shape) - * ((patch_index + patch_shape) - image_shape)) - pad_args = np.stack([pad_before, pad_after], axis=1) - if pad_args.shape[0] < len(data.shape): - pad_args = [[0, 0]] * \ - (len(data.shape) - pad_args.shape[0]) + pad_args.tolist() - data = np.pad(data, pad_args, mode="edge") - patch_index += pad_before - return data, patch_index - - -def reconstruct_from_patches(patches, patch_indices, data_shape, default_value=0): - """ - Reconstructs an array of the original shape from the lists of patches and corresponding patch indices. Overlapping - patches are averaged. - :param patches: List of numpy array patches. - :param patch_indices: List of indices that corresponds to the list of patches. - :param data_shape: Shape of the array from which the patches were extracted. - :param default_value: The default value of the resulting data. if the patch coverage is complete, this value will - be overwritten. - :return: numpy array containing the data reconstructed by the patches. - """ - data = np.ones(data_shape) * default_value - image_shape = data_shape[-3:] - count = np.zeros(data_shape, dtype=np.int) - for patch, index in zip(patches, patch_indices): - image_patch_shape = patch.shape[-3:] - if np.any(index < 0): - fix_patch = np.asarray((index < 0) * np.abs(index), dtype=np.int) - patch = patch[..., fix_patch[0]:, fix_patch[1]:, fix_patch[2]:] - index[index < 0] = 0 - if np.any((index + image_patch_shape) >= image_shape): - fix_patch = np.asarray(image_patch_shape - (((index + image_patch_shape) >= image_shape) - * ((index + image_patch_shape) - image_shape)), dtype=np.int) - patch = patch[..., :fix_patch[0], :fix_patch[1], :fix_patch[2]] - patch_index = np.zeros(data_shape, dtype=np.bool) - patch_index[..., - index[0]:index[0]+patch.shape[-3], - index[1]:index[1]+patch.shape[-2], - index[2]:index[2]+patch.shape[-1]] = True - patch_data = np.zeros(data_shape) - patch_data[patch_index] = patch.flatten() - - new_data_index = np.logical_and(patch_index, np.logical_not(count > 0)) - data[new_data_index] = patch_data[new_data_index] - - averaged_data_index = np.logical_and(patch_index, count > 0) - if np.any(averaged_data_index): - data[averaged_data_index] = (data[averaged_data_index] * count[averaged_data_index] + - patch_data[averaged_data_index]) / (count[averaged_data_index] + 1) - count[patch_index] += 1 - return data -import SimpleITK as sitk -import numpy as np - - -def calculate_origin_offset(new_spacing, old_spacing): - return np.subtract(new_spacing, old_spacing)/2 - - -def sitk_resample_to_spacing(image, new_spacing=(1.0, 1.0, 1.0), interpolator=sitk.sitkLinear, default_value=0.): - zoom_factor = np.divide(image.GetSpacing(), new_spacing) - new_size = np.asarray(np.ceil(np.round(np.multiply( - zoom_factor, image.GetSize()), decimals=5)), dtype=np.int16) - offset = calculate_origin_offset(new_spacing, image.GetSpacing()) - reference_image = sitk_new_blank_image(size=new_size, spacing=new_spacing, direction=image.GetDirection(), - origin=image.GetOrigin() + offset, default_value=default_value) - return sitk_resample_to_image(image, reference_image, interpolator=interpolator, default_value=default_value) - - -def sitk_resample_to_image(image, reference_image, default_value=0., interpolator=sitk.sitkLinear, transform=None, - output_pixel_type=None): - if transform is None: - transform = sitk.Transform() - transform.SetIdentity() - if output_pixel_type is None: - output_pixel_type = image.GetPixelID() - resample_filter = sitk.ResampleImageFilter() - resample_filter.SetInterpolator(interpolator) - resample_filter.SetTransform(transform) - resample_filter.SetOutputPixelType(output_pixel_type) - resample_filter.SetDefaultPixelValue(default_value) - resample_filter.SetReferenceImage(reference_image) - return resample_filter.Execute(image) - - -def sitk_new_blank_image(size, spacing, direction, origin, default_value=0.): - image = sitk.GetImageFromArray( - np.ones(size, dtype=np.float).T * default_value) - image.SetSpacing(spacing) - image.SetDirection(direction) - image.SetOrigin(origin) - return image - - -def resample_to_spacing(data, spacing, target_spacing, interpolation="linear", default_value=0.): - image = data_to_sitk_image(data, spacing=spacing) - if interpolation is "linear": - interpolator = sitk.sitkLinear - elif interpolation is "nearest": - interpolator = sitk.sitkNearestNeighbor - else: - raise ValueError("'interpolation' must be either 'linear' or 'nearest'. '{}' is not recognized".format( - interpolation)) - resampled_image = sitk_resample_to_spacing(image, new_spacing=target_spacing, interpolator=interpolator, - default_value=default_value) - return sitk_image_to_data(resampled_image) - - -def data_to_sitk_image(data, spacing=(1., 1., 1.)): - if len(data.shape) == 3: - data = np.rot90(data, 1, axes=(0, 2)) - image = sitk.GetImageFromArray(data) - image.SetSpacing(np.asarray(spacing, dtype=np.float)) - return image - - -def sitk_image_to_data(image): - data = sitk.GetArrayFromImage(image) - if len(data.shape) == 3: - data = np.rot90(data, -1, axes=(0, 2)) - return data -import pickle -import os -import collections - -import nibabel as nib -import numpy as np -from nilearn.image import reorder_img, new_img_like - -from .nilearn_custom_utils.nilearn_utils import crop_img_to -from .sitk_utils import resample_to_spacing, calculate_origin_offset - - -def pickle_dump(item, out_file): - with open(out_file, "wb") as opened_file: - pickle.dump(item, opened_file) - - -def pickle_load(in_file): - with open(in_file, "rb") as opened_file: - return pickle.load(opened_file) - - -def get_affine(in_file): - return read_image(in_file).affine - - -def read_image_files(image_files, image_shape=None, crop=None, label_indices=None): - """ - - :param image_files: - :param image_shape: - :param crop: - :param use_nearest_for_last_file: If True, will use nearest neighbor interpolation for the last file. This is used - because the last file may be the labels file. Using linear interpolation here would mess up the labels. - :return: - """ - if label_indices is None: - label_indices = [] - elif not isinstance(label_indices, collections.Iterable) or isinstance(label_indices, str): - label_indices = [label_indices] - image_list = list() - for index, image_file in enumerate(image_files): - if (label_indices is None and (index + 1) == len(image_files)) \ - or (label_indices is not None and index in label_indices): - interpolation = "nearest" - else: - interpolation = "linear" - image_list.append(read_image( - image_file, image_shape=image_shape, crop=crop, interpolation=interpolation)) - - return image_list - - -def read_image(in_file, image_shape=None, interpolation='linear', crop=None): - print("Reading: {0}".format(in_file)) - image = nib.load(os.path.abspath(in_file)) - image = fix_shape(image) - if crop: - image = crop_img_to(image, crop, copy=True) - if image_shape: - return resize(image, new_shape=image_shape, interpolation=interpolation) - else: - return image - - -def fix_shape(image): - if image.shape[-1] == 1: - return image.__class__(dataobj=np.squeeze(image.get_data()), affine=image.affine) - return image - - -def resize(image, new_shape, interpolation="linear"): - image = reorder_img(image, resample=interpolation) - zoom_level = np.divide(new_shape, image.shape) - new_spacing = np.divide(image.header.get_zooms(), zoom_level) - new_data = resample_to_spacing(image.get_data(), image.header.get_zooms(), new_spacing, - interpolation=interpolation) - new_affine = np.copy(image.affine) - np.fill_diagonal(new_affine, new_spacing.tolist() + [1]) - new_affine[:3, - 3] += calculate_origin_offset(new_spacing, image.header.get_zooms()) - return new_img_like(image, new_data, affine=new_affine) -from __future__ import print_function - -import os -import numpy as np - -from skimage.io import imsave, imread - -data_path = 'raw/' - -image_rows = 420 -image_cols = 580 - - -def create_train_data(): - train_data_path = os.path.join(data_path, 'train') - images = os.listdir(train_data_path) - total = len(images) / 2 - - imgs = np.ndarray((total, image_rows, image_cols), dtype=np.uint8) - imgs_mask = np.ndarray((total, image_rows, image_cols), dtype=np.uint8) - - i = 0 - print('-'*30) - print('Creating training images...') - print('-'*30) - for image_name in images: - if 'mask' in image_name: - continue - image_mask_name = image_name.split('.')[0] + '_mask.tif' - img = imread(os.path.join(train_data_path, image_name), as_grey=True) - img_mask = imread(os.path.join( - train_data_path, image_mask_name), as_grey=True) - - img = np.array([img]) - img_mask = np.array([img_mask]) - - imgs[i] = img - imgs_mask[i] = img_mask - - if i % 100 == 0: - print('Done: {0}/{1} images'.format(i, total)) - i += 1 - print('Loading done.') - - np.save('imgs_train.npy', imgs) - np.save('imgs_mask_train.npy', imgs_mask) - print('Saving to .npy files done.') - - -def load_train_data(): - imgs_train = np.load('imgs_train.npy') - imgs_mask_train = np.load('imgs_mask_train.npy') - return imgs_train, imgs_mask_train - - -def create_test_data(): - train_data_path = os.path.join(data_path, 'test') - images = os.listdir(train_data_path) - total = len(images) - - imgs = np.ndarray((total, image_rows, image_cols), dtype=np.uint8) - imgs_id = np.ndarray((total, ), dtype=np.int32) - - i = 0 - print('-'*30) - print('Creating test images...') - print('-'*30) - for image_name in images: - img_id = int(image_name.split('.')[0]) - img = imread(os.path.join(train_data_path, image_name), as_grey=True) - - img = np.array([img]) - - imgs[i] = img - imgs_id[i] = img_id - - if i % 100 == 0: - print('Done: {0}/{1} images'.format(i, total)) - i += 1 - print('Loading done.') - - np.save('imgs_test.npy', imgs) - np.save('imgs_id_test.npy', imgs_id) - print('Saving to .npy files done.') - - -def load_test_data(): - imgs_test = np.load('imgs_test.npy') - imgs_id = np.load('imgs_id_test.npy') - return imgs_test, imgs_id - - -if __name__ == '__main__': - create_train_data() - create_test_data() -from __future__ import print_function - -import numpy as np -from skimage.transform import resize -from data import image_cols, image_rows - - -def prep(img): - img = img.astype('float32') - img = (img > 0.5).astype(np.uint8) # threshold - img = resize(img, (image_cols, image_rows), preserve_range=True) - return img - - -def run_length_enc(label): - from itertools import chain - x = label.transpose().flatten() - y = np.where(x > 0)[0] - if len(y) < 10: # consider as empty - return '' - z = np.where(np.diff(y) > 1)[0] - start = np.insert(y[z+1], 0, y[0]) - end = np.append(y[z], y[-1]) - length = end - start - res = [[s+1, l+1] for s, l in zip(list(start), list(length))] - res = list(chain.from_iterable(res)) - return ' '.join([str(r) for r in res]) - - -def submission(): - from data import load_test_data - imgs_test, imgs_id_test = load_test_data() - imgs_test = np.load('imgs_mask_test.npy') - - argsort = np.argsort(imgs_id_test) - imgs_id_test = imgs_id_test[argsort] - imgs_test = imgs_test[argsort] - - total = imgs_test.shape[0] - ids = [] - rles = [] - for i in range(total): - img = imgs_test[i, 0] - img = prep(img) - rle = run_length_enc(img) - - rles.append(rle) - ids.append(imgs_id_test[i]) - - if i % 100 == 0: - print('{}/{}'.format(i, total)) - - first_row = 'img,pixels' - file_name = 'submission.csv' - - with open(file_name, 'w+') as f: - f.write(first_row + '\n') - for i in range(total): - s = str(ids[i]) + ',' + rles[i] - f.write(s + '\n') - - -if __name__ == '__main__': - submission() -from __future__ import print_function - -import os -from skimage.transform import resize -from skimage.io import imsave -import numpy as np -from keras.models import Model -from keras.layers import Input, concatenate, Conv2D, MaxPooling2D, Conv2DTranspose -from keras.optimizers import Adam -from keras.callbacks import ModelCheckpoint -from keras import backend as K - -from data import load_train_data, load_test_data - -K.set_image_data_format('channels_last') # TF dimension ordering in this code - -img_rows = 96 -img_cols = 96 - -smooth = 1. - - -def dice_coef(y_true, y_pred): - y_true_f = K.flatten(y_true) - y_pred_f = K.flatten(y_pred) - intersection = K.sum(y_true_f * y_pred_f) - return (2. * intersection + smooth) / (K.sum(y_true_f) + K.sum(y_pred_f) + smooth) - - -def dice_coef_loss(y_true, y_pred): - return -dice_coef(y_true, y_pred) - - -def get_unet(): - inputs = Input((img_rows, img_cols, 1)) - conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(inputs) - conv1 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv1) - pool1 = MaxPooling2D(pool_size=(2, 2))(conv1) - - conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(pool1) - conv2 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv2) - pool2 = MaxPooling2D(pool_size=(2, 2))(conv2) - - conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(pool2) - conv3 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv3) - pool3 = MaxPooling2D(pool_size=(2, 2))(conv3) - - conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(pool3) - conv4 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv4) - pool4 = MaxPooling2D(pool_size=(2, 2))(conv4) - - conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(pool4) - conv5 = Conv2D(512, (3, 3), activation='relu', padding='same')(conv5) - - up6 = concatenate([Conv2DTranspose(256, (2, 2), strides=( - 2, 2), padding='same')(conv5), conv4], axis=3) - conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(up6) - conv6 = Conv2D(256, (3, 3), activation='relu', padding='same')(conv6) - - up7 = concatenate([Conv2DTranspose(128, (2, 2), strides=( - 2, 2), padding='same')(conv6), conv3], axis=3) - conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(up7) - conv7 = Conv2D(128, (3, 3), activation='relu', padding='same')(conv7) - - up8 = concatenate([Conv2DTranspose(64, (2, 2), strides=( - 2, 2), padding='same')(conv7), conv2], axis=3) - conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(up8) - conv8 = Conv2D(64, (3, 3), activation='relu', padding='same')(conv8) - - up9 = concatenate([Conv2DTranspose(32, (2, 2), strides=( - 2, 2), padding='same')(conv8), conv1], axis=3) - conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(up9) - conv9 = Conv2D(32, (3, 3), activation='relu', padding='same')(conv9) - - conv10 = Conv2D(1, (1, 1), activation='sigmoid')(conv9) - - model = Model(inputs=[inputs], outputs=[conv10]) - - model.compile(optimizer=Adam(lr=1e-5), - loss=dice_coef_loss, metrics=[dice_coef]) - - return model - - -def preprocess(imgs): - imgs_p = np.ndarray((imgs.shape[0], img_rows, img_cols), dtype=np.uint8) - for i in range(imgs.shape[0]): - imgs_p[i] = resize(imgs[i], (img_cols, img_rows), preserve_range=True) - - imgs_p = imgs_p[..., np.newaxis] - return imgs_p - - -def train_and_predict(): - print('-'*30) - print('Loading and preprocessing train data...') - print('-'*30) - imgs_train, imgs_mask_train = load_train_data() - - imgs_train = preprocess(imgs_train) - imgs_mask_train = preprocess(imgs_mask_train) - - imgs_train = imgs_train.astype('float32') - mean = np.mean(imgs_train) # mean for data centering - std = np.std(imgs_train) # std for data normalization - - imgs_train -= mean - imgs_train /= std - - imgs_mask_train = imgs_mask_train.astype('float32') - imgs_mask_train /= 255. # scale masks to [0, 1] - - print('-'*30) - print('Creating and compiling model...') - print('-'*30) - model = get_unet() - model_checkpoint = ModelCheckpoint( - 'weights.h5', monitor='val_loss', save_best_only=True) - - print('-'*30) - print('Fitting model...') - print('-'*30) - model.fit(imgs_train, imgs_mask_train, batch_size=32, nb_epoch=20, verbose=1, shuffle=True, - validation_split=0.2, - callbacks=[model_checkpoint]) - - print('-'*30) - print('Loading and preprocessing test data...') - print('-'*30) - imgs_test, imgs_id_test = load_test_data() - imgs_test = preprocess(imgs_test) - - imgs_test = imgs_test.astype('float32') - imgs_test -= mean - imgs_test /= std - - print('-'*30) - print('Loading saved weights...') - print('-'*30) - model.load_weights('weights.h5') - - print('-'*30) - print('Predicting masks on test data...') - print('-'*30) - imgs_mask_test = model.predict(imgs_test, verbose=1) - np.save('imgs_mask_test.npy', imgs_mask_test) - - print('-' * 30) - print('Saving predicted masks to files...') - print('-' * 30) - pred_dir = 'preds' - if not os.path.exists(pred_dir): - os.mkdir(pred_dir) - for image, image_id in zip(imgs_mask_test, imgs_id_test): - image = (image[:, :, 0] * 255.).astype(np.uint8) - imsave(os.path.join(pred_dir, str(image_id) + '_pred.png'), image) - - -if __name__ == '__main__': - train_and_predict() -#!/usr/bin/env python -import sys -import os -from argparse import ArgumentParser -from os.path import basename - -from classes.inference.Sampler import * - - -def build_parser(): - parser = ArgumentParser() - parser.add_argument('--pngs_path', type=str, - dest='pngs_path', help='png folder to convert into HTML', - required=True) - parser.add_argument('--output_folder', type=str, - dest='output_folder', help='dir to save generated gui and html', - required=True) - parser.add_argument('--model_json_file', type=str, - dest='model_json_file', help='trained model json file', - required=True) - parser.add_argument('--model_weights_file', type=str, - dest='model_weights_file', help='trained model weights file', required=True) - parser.add_argument('--print_bleu_score', type=int, - dest='print_bleu_score', help='see BLEU score for single example', default=0) - parser.add_argument('--original_guis_filepath', type=str, - dest='original_guis_filepath', help='if getting BLEU score, provide original guis folder filepath', default=None) - parser.add_argument('--style', type=str, - dest='style', help='style to use for generation', default='default') - return parser - - -def main(): - parser = build_parser() - options = parser.parse_args() - pngs_path = options.pngs_path - output_folder = options.output_folder - model_json_file = options.model_json_file - model_weights_file = options.model_weights_file - print_bleu_score = options.print_bleu_score - original_guis_filepath = options.original_guis_filepath - style = options.style - - if not os.path.exists(output_folder): - os.makedirs(output_folder) - - # Create sampler - sampler = Sampler(model_json_path=model_json_file, - model_weights_path=model_weights_file) - - # Sample and retrieve BLEU - sampler.convert_batch_of_images(output_folder, pngs_path=pngs_path, get_corpus_bleu=print_bleu_score, - original_guis_filepath=original_guis_filepath, style=style) - - -if __name__ == "__main__": - main() -#!/usr/bin/env python -import sys -import os -from argparse import ArgumentParser -from os.path import basename - -from classes.inference.Sampler import * - - -def build_parser(): - parser = ArgumentParser() - parser.add_argument('--png_path', type=str, - dest='png_path', help='png filepath to convert into HTML', - required=True) - parser.add_argument('--output_folder', type=str, - dest='output_folder', help='dir to save generated gui and html', - required=True) - parser.add_argument('--model_json_file', type=str, - dest='model_json_file', help='trained model json file', - required=True) - parser.add_argument('--model_weights_file', type=str, - dest='model_weights_file', help='trained model weights file', required=True) - parser.add_argument('--style', type=str, - dest='style', help='style to use for generation', default='default') - parser.add_argument('--print_generated_output', type=int, - dest='print_generated_output', help='see generated GUI output in terminal', default=1) - parser.add_argument('--print_bleu_score', type=int, - dest='print_bleu_score', help='see BLEU score for single example', default=0) - parser.add_argument('--original_gui_filepath', type=str, - dest='original_gui_filepath', help='if getting BLEU score, provide original gui filepath', default=None) - - return parser - - -def main(): - parser = build_parser() - options = parser.parse_args() - png_path = options.png_path - output_folder = options.output_folder - model_json_file = options.model_json_file - model_weights_file = options.model_weights_file - style = options.style - print_generated_output = options.print_generated_output - print_bleu_score = options.print_bleu_score - original_gui_filepath = options.original_gui_filepath - - if not os.path.exists(output_folder): - os.makedirs(output_folder) - - sampler = Sampler(model_json_path=model_json_file, - model_weights_path=model_weights_file) - sampler.convert_single_image(output_folder, png_path=png_path, print_generated_output=print_generated_output, - get_sentence_bleu=print_bleu_score, original_gui_filepath=original_gui_filepath, style=style) - - -if __name__ == "__main__": - main() -#!/usr/bin/env python -from __future__ import print_function - -from argparse import ArgumentParser - -from classes.inference.Evaluator import * - - -def build_parser(): - parser = ArgumentParser() - parser.add_argument('--original_guis_filepath', type=str, - dest='original_guis_filepath', help='dir with all original guis', - required=True) - parser.add_argument('--predicted_guis_filepath', type=str, - dest='predicted_guis_filepath', help='dir with all predicted guis', - required=True) - return parser - - -def main(): - - parser = build_parser() - options = parser.parse_args() - original_guis_filepath = options.original_guis_filepath - predicted_guis_filepath = options.predicted_guis_filepath - - bleu_score = Evaluator.get_corpus_bleu( - original_guis_filepath, predicted_guis_filepath) - print("BLEU score for batch of GUIs: {}".format(bleu_score)) - - -if __name__ == "__main__": - main() -from __future__ import print_function -from __future__ import absolute_import - -from argparse import ArgumentParser -from nltk.translate.bleu_score import sentence_bleu, corpus_bleu - -from classes.inference.Evaluator import * - - -def build_parser(): - parser = ArgumentParser() - parser.add_argument('--original_gui_filepath', type=str, - dest='original_gui_filepath', help='filepath of original gui file', - required=True) - parser.add_argument('--predicted_gui_filepath', type=str, - dest='predicted_gui_filepath', help='filepath of original gui file', - required=True) - return parser - - -def main(): - - parser = build_parser() - options = parser.parse_args() - original_gui_filepath = options.original_gui_filepath - predicted_gui_filepath = options.predicted_gui_filepath - - bleu_score = Evaluator.get_sentence_bleu( - original_gui_filepath, predicted_gui_filepath) - print("BLEU score for single GUI: {}".format(bleu_score)) - - -if __name__ == "__main__": - main() -#!/usr/bin/env python -from __future__ import print_function -from __future__ import absolute_import - -from argparse import ArgumentParser - -from classes.model.SketchCodeModel import * - -VAL_SPLIT = 0.2 - - -def build_parser(): - parser = ArgumentParser() - parser.add_argument('--data_input_path', type=str, - dest='data_input_path', help='directory containing images and guis', - required=True) - parser.add_argument('--validation_split', type=float, - dest='validation_split', help='portion of training data for validation set', - default=VAL_SPLIT) - parser.add_argument('--epochs', type=int, - dest='epochs', help='number of epochs to train on', - required=True) - parser.add_argument('--model_output_path', type=str, - dest='model_output_path', help='directory for saving model data', - required=True) - parser.add_argument('--model_json_file', type=str, - dest='model_json_file', help='pretrained model json file', - required=False) - parser.add_argument('--model_weights_file', type=str, - dest='model_weights_file', help='pretrained model weights file', - required=False) - parser.add_argument('--augment_training_data', type=int, - dest='augment_training_data', help='use Keras image augmentation on training data', - default=1) - return parser - - -def main(): - - parser = build_parser() - options = parser.parse_args() - data_input_path = options.data_input_path - validation_split = options.validation_split - epochs = options.epochs - model_output_path = options.model_output_path - model_json_file = options.model_json_file - model_weights_file = options.model_weights_file - augment_training_data = options.augment_training_data - - # Load model - model = SketchCodeModel( - model_output_path, model_json_file, model_weights_file) - - # Create the model output path if it doesn't exist - if not os.path.exists(model_output_path): - os.makedirs(model_output_path) - - # Split the datasets and save down image arrays - training_path, validation_path = ModelUtils.prepare_data_for_training( - data_input_path, validation_split, augment_training_data) - - # Begin model training - model.train(training_path=training_path, - validation_path=validation_path, - epochs=epochs) - - -if __name__ == "__main__": - main() -from __future__ import absolute_import - -import os -import shutil -import pdb -import hashlib -import numpy as np - -from keras.preprocessing.text import Tokenizer, one_hot -from keras.preprocessing.sequence import pad_sequences -from keras.utils import to_categorical - -from .ImagePreprocessor import * - -VOCAB_FILE = '../vocabulary.vocab' -TRAINING_SET_NAME = "training_set" -VALIDATION_SET_NAME = "validation_set" -BATCH_SIZE = 64 - - -class Dataset: - - def __init__(self, data_input_folder, test_set_folder=None): - self.data_input_folder = data_input_folder - self.test_set_folder = test_set_folder - - def split_datasets(self, validation_split): - sample_ids = self.populate_sample_ids() - print("Total number of samples: ", len(sample_ids)) - - train_set_ids, val_set_ids, shuffled_sampled_ids = self.get_all_id_sets( - validation_split, sample_ids) - training_path, validation_path = self.split_samples( - train_set_ids, val_set_ids) - - return training_path, validation_path - - def split_samples(self, train_set_ids, val_set_ids): - training_path, validation_path = self.create_data_folders() - self.copy_files_to_folders(train_set_ids, training_path) - self.copy_files_to_folders(val_set_ids, validation_path) - return training_path, validation_path - - def preprocess_data(self, training_path, validation_path, augment_training_data): - train_img_preprocessor = ImagePreprocessor() - train_img_preprocessor.build_image_dataset( - training_path, augment_data=augment_training_data) - val_img_preprocessor = ImagePreprocessor() - val_img_preprocessor.build_image_dataset( - validation_path, augment_data=0) - - ########################################## - ####### PRIVATE METHODS ################## - ########################################## - - @classmethod - def load_vocab(cls): - file = open(VOCAB_FILE, 'r') - text = file.read().splitlines()[0] - file.close() - tokenizer = Tokenizer(filters='', split=" ", lower=False) - tokenizer.fit_on_texts([text]) - vocab_size = len(tokenizer.word_index) + 1 - return tokenizer, vocab_size - - @classmethod - def create_generator(cls, data_input_path, max_sequences): - img_features, text_features = Dataset.load_data(data_input_path) - total_sequences = 0 - for text_set in text_features: - total_sequences += len(text_set.split()) - steps_per_epoch = total_sequences // BATCH_SIZE - tokenizer, vocab_size = Dataset.load_vocab() - data_gen = Dataset.data_generator( - text_features, img_features, max_sequences, tokenizer, vocab_size) - return data_gen, steps_per_epoch - - @classmethod - def data_generator(cls, text_features, img_features, max_sequences, tokenizer, vocab_size): - while 1: - for i in range(0, len(text_features), 1): - Ximages, XSeq, y = list(), list(), list() - for j in range(i, min(len(text_features), i+1)): - image = img_features[j] - desc = text_features[j] - in_img, in_seq, out_word = Dataset.process_data_for_generator( - [desc], [image], max_sequences, tokenizer, vocab_size) - for k in range(len(in_img)): - Ximages.append(in_img[k]) - XSeq.append(in_seq[k]) - y.append(out_word[k]) - yield [[np.array(Ximages), np.array(XSeq)], np.array(y)] - - @classmethod - def process_data_for_generator(cls, texts, features, max_sequences, tokenizer, vocab_size): - X, y, image_data = list(), list(), list() - sequences = tokenizer.texts_to_sequences(texts) - for img_no, seq in enumerate(sequences): - for i in range(1, len(seq)): - in_seq, out_seq = seq[:i], seq[i] - in_seq = pad_sequences([in_seq], maxlen=max_sequences)[0] - out_seq = to_categorical([out_seq], num_classes=vocab_size)[0] - image_data.append(features[img_no]) - X.append(in_seq[-48:]) - y.append(out_seq) - return np.array(image_data), np.array(X), np.array(y) - - @classmethod - def load_data(cls, data_input_path): - text = [] - images = [] - all_filenames = os.listdir(data_input_path) - all_filenames.sort() - for filename in all_filenames: - if filename[-3:] == "npz": - image = np.load(data_input_path+'/'+filename) - images.append(image['features']) - elif filename[-3:] == 'gui': - file = open(data_input_path+'/'+filename, 'r') - texts = file.read() - file.close() - syntax = ' ' + texts + ' ' - syntax = ' '.join(syntax.split()) - syntax = syntax.replace(',', ' ,') - text.append(syntax) - images = np.array(images, dtype=float) - return images, text - - def create_data_folders(self): - training_path = "{}/{}".format(os.path.dirname( - self.data_input_folder), TRAINING_SET_NAME) - validation_path = "{}/{}".format(os.path.dirname( - self.data_input_folder), VALIDATION_SET_NAME) - - self.delete_existing_folders(training_path) - self.delete_existing_folders(validation_path) - - if not os.path.exists(training_path): - os.makedirs(training_path) - if not os.path.exists(validation_path): - os.makedirs(validation_path) - return training_path, validation_path - - def copy_files_to_folders(self, sample_ids, output_folder): - copied_count = 0 - for sample_id in sample_ids: - sample_id_png_path = "{}/{}.png".format( - self.data_input_folder, sample_id) - sample_id_gui_path = "{}/{}.gui".format( - self.data_input_folder, sample_id) - if os.path.exists(sample_id_png_path) and os.path.exists(sample_id_gui_path): - output_png_path = "{}/{}.png".format(output_folder, sample_id) - output_gui_path = "{}/{}.gui".format(output_folder, sample_id) - shutil.copyfile(sample_id_png_path, output_png_path) - shutil.copyfile(sample_id_gui_path, output_gui_path) - copied_count += 1 - print("Moved {} files from {} to {}".format( - copied_count, self.data_input_folder, output_folder)) - - def delete_existing_folders(self, folder_to_delete): - if os.path.exists(folder_to_delete): - shutil.rmtree(folder_to_delete) - print("Deleted existing folder: {}".format(folder_to_delete)) - - def populate_sample_ids(self): - all_sample_ids = [] - full_path = os.path.realpath(self.data_input_folder) - for f in os.listdir(full_path): - if f.find(".gui") != -1: - file_name = f[:f.find(".gui")] - if os.path.isfile("{}/{}.png".format(self.data_input_folder, file_name)): - all_sample_ids.append(file_name) - return all_sample_ids - - def get_all_id_sets(self, validation_split, sample_ids): - np.random.shuffle(sample_ids) - val_count = int(validation_split * len(sample_ids)) - train_count = len(sample_ids) - val_count - print("Splitting datasets, training samples: {}, validation samples: {}".format( - train_count, val_count)) - train_set, val_set = self.split_paths( - sample_ids, train_count, val_count) - - return train_set, val_set, sample_ids - - def split_paths(self, sample_ids, train_count, val_count): - train_set = [] - val_set = [] - hashes = [] - for sample_id in sample_ids: - f = open("{}/{}.gui".format(self.data_input_folder, - sample_id), 'r', encoding='utf-8') - - with f: - chars = "" - for line in f: - chars += line - content_hash = chars.replace(" ", "").replace("\n", "") - content_hash = hashlib.sha256( - content_hash.encode('utf-8')).hexdigest() - - if len(val_set) == val_count: - train_set.append(sample_id) - else: - is_unique = True - for h in hashes: - if h is content_hash: - is_unique = False - break - - if is_unique: - val_set.append(sample_id) - else: - train_set.append(sample_id) - - hashes.append(content_hash) - - assert len(val_set) == val_count - - return train_set, val_set -from __future__ import absolute_import - -import os -import sys -import shutil - -import numpy as np -from PIL import Image -import cv2 -from keras.preprocessing.image import ImageDataGenerator - - -class ImagePreprocessor: - - def __init__(self): - pass - - def build_image_dataset(self, data_input_folder, augment_data=True): - - print("Converting images from {} into arrays, augmentation: {}".format( - data_input_folder, augment_data)) - resized_img_arrays, sample_ids = self.get_resized_images( - data_input_folder) - - if augment_data == 1: - self.augment_and_save_images( - resized_img_arrays, sample_ids, data_input_folder) - else: - self.save_resized_img_arrays( - resized_img_arrays, sample_ids, data_input_folder) - - def get_img_features(self, png_path): - img_features = self.resize_img(png_path) - assert(img_features.shape == (256, 256, 3)) - return img_features - - ########################################## - ####### PRIVATE METHODS ################## - ########################################## - - def save_resized_img_arrays(self, resized_img_arrays, sample_ids, output_folder): - count = 0 - for img_arr, sample_id in zip(resized_img_arrays, sample_ids): - npz_filename = "{}/{}.npz".format(output_folder, sample_id) - np.savez_compressed(npz_filename, features=img_arr) - retrieve = np.load(npz_filename)["features"] - assert np.array_equal(img_arr, retrieve) - count += 1 - print("Saved down {} resized images to folder {}".format( - count, output_folder)) - del resized_img_arrays - - def augment_and_save_images(self, resized_img_arrays, sample_ids, data_input_folder): - datagen = ImageDataGenerator( - rotation_range=2, - width_shift_range=0.05, - height_shift_range=0.05, - zoom_range=0.05 - ) - keras_generator = datagen.flow( - resized_img_arrays, sample_ids, batch_size=1) - count = 0 - for i in range(len(resized_img_arrays)): - img_arr, sample_id = next(keras_generator) - img_arr = np.squeeze(img_arr) - npz_filename = "{}/{}.npz".format(data_input_folder, sample_id[0]) - im = Image.fromarray(img_arr.astype('uint8')) - np.savez_compressed(npz_filename, features=img_arr) - retrieve = np.load(npz_filename)["features"] - assert np.array_equal(img_arr, retrieve) - count += 1 - print("Saved down {} augmented images to folder {}".format( - count, data_input_folder)) - del resized_img_arrays - - def get_resized_images(self, pngs_input_folder): - all_files = os.listdir(pngs_input_folder) - png_files = [f for f in all_files if f.find(".png") != -1] - images = [] - labels = [] - for png_file_path in png_files: - png_path = "{}/{}".format(pngs_input_folder, png_file_path) - sample_id = png_file_path[:png_file_path.find('.png')] - resized_img_arr = self.resize_img(png_path) - images.append(resized_img_arr) - labels.append(sample_id) - return np.array(images), np.array(labels) - - def resize_img(self, png_file_path): - img_rgb = cv2.imread(png_file_path) - img_grey = cv2.cvtColor(img_rgb, cv2.COLOR_BGR2GRAY) - img_adapted = cv2.adaptiveThreshold( - img_grey, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 101, 9) - img_stacked = np.repeat(img_adapted[..., None], 3, axis=2) - resized = cv2.resize(img_stacked, (200, 200), - interpolation=cv2.INTER_AREA) - bg_img = 255 * np.ones(shape=(256, 256, 3)) - bg_img[27:227, 27:227, :] = resized - bg_img /= 255 - return bg_img -from __future__ import print_function -from __future__ import absolute_import - -import os -import json - -from .Node import * - -BASE_DIR_NAME = os.path.dirname(__file__) -DEFAULT_DSL_MAPPING_FILEPATH = "{}/styles/default-dsl-mapping.json".format( - BASE_DIR_NAME) -FACEBOOK_DSL_MAPPING_FILEPATH = "{}/styles/facebook_dsl_mapping.json".format( - BASE_DIR_NAME) -AIRBNB_DSL_MAPPING_FILEPATH = "{}/styles/airbnb_dsl_mapping.json".format( - BASE_DIR_NAME) - - -class Compiler: - def __init__(self, style): - style_json = self.get_stylesheet(style) - with open(style_json) as data_file: - self.dsl_mapping = json.load(data_file) - - self.opening_tag = self.dsl_mapping["opening-tag"] - self.closing_tag = self.dsl_mapping["closing-tag"] - self.content_holder = self.opening_tag + self.closing_tag - - self.root = Node("body", None, self.content_holder) - - def get_stylesheet(self, style): - if style == 'default': - return DEFAULT_DSL_MAPPING_FILEPATH - elif style == 'facebook': - return FACEBOOK_DSL_MAPPING_FILEPATH - elif style == 'airbnb': - return AIRBNB_DSL_MAPPING_FILEPATH - - def compile(self, generated_gui): - dsl_file = generated_gui - - # Parse fix - dsl_file = dsl_file[1:-1] - dsl_file = ' '.join(dsl_file) - dsl_file = dsl_file.replace('{', '{8').replace('}', '8}8') - dsl_file = dsl_file.replace(' ', '') - dsl_file = dsl_file.split('8') - dsl_file = list(filter(None, dsl_file)) - - current_parent = self.root - for token in dsl_file: - token = token.replace(" ", "").replace("\n", "") - - if token.find(self.opening_tag) != -1: - token = token.replace(self.opening_tag, "") - element = Node(token, current_parent, self.content_holder) - current_parent.add_child(element) - current_parent = element - elif token.find(self.closing_tag) != -1: - current_parent = current_parent.parent - else: - tokens = token.split(",") - for t in tokens: - element = Node(t, current_parent, self.content_holder) - current_parent.add_child(element) - - output_html = self.root.render(self.dsl_mapping) - if output_html is None: - return "HTML Parsing Error" - - return output_html -from __future__ import print_function -from __future__ import absolute_import - -import pdb -import os -import operator -from nltk.translate.bleu_score import sentence_bleu, corpus_bleu - - -class Evaluator: - def __init__(self): - pass - - @classmethod - def get_sentence_bleu(cls, original_gui_filepath, generated_gui_filepath): - original_gui = Evaluator.load_gui_doc(original_gui_filepath) - generated_gui = Evaluator.load_gui_doc(generated_gui_filepath) - hypothesis = generated_gui[1:-1] - reference = original_gui - references = [reference] - return sentence_bleu(references, hypothesis) - - @classmethod - def get_corpus_bleu(cls, original_guis_filepath, predicted_guis_filepath): - actuals, predicted = Evaluator.load_guis_from_folder( - original_guis_filepath, predicted_guis_filepath) - regular_bleu = corpus_bleu(actuals, predicted) - return regular_bleu - - @classmethod - def load_gui_doc(cls, gui_filepath): - file = open(gui_filepath, 'r') - gui = file.read() - file.close() - gui = ' '.join(gui.split()) - gui = gui.replace(',', ' ,') - gui = gui.split() - - # Predicted images don't have color so we normalize all buttons to btn-orange or btn-active - btns_to_replace = ['btn-green', 'btn-red'] - normalized_gui = [ - 'btn-orange' if token in btns_to_replace else token for token in gui] - normalized_gui = ['btn-active' if token == - 'btn-inactive' else token for token in normalized_gui] - return normalized_gui - - @classmethod - def load_guis_from_folder(cls, original_guis_filepath, predicted_guis_filepath): - actuals, predicted = list(), list() - all_files = os.listdir(predicted_guis_filepath) - all_predicted_files = os.listdir(predicted_guis_filepath) - all_predicted_guis = [ - f for f in all_predicted_files if f.find('.gui') != -1] - all_predicted_guis.sort() - guis = [] - for f in all_predicted_guis: - generated_gui_filepath = "{}/{}".format(predicted_guis_filepath, f) - actual_gui_filepath = "{}/{}".format(original_guis_filepath, f) - if os.path.isfile(actual_gui_filepath): - predicted_gui = Evaluator.load_gui_doc(generated_gui_filepath) - actual_gui = Evaluator.load_gui_doc(actual_gui_filepath) - - predicted.append(predicted_gui[1:-1]) - actuals.append([actual_gui]) - return actuals, predicted -from __future__ import print_function -from __future__ import absolute_import - -from .SamplerUtils import * - -TEXT_PLACE_HOLDER = "[]" - - -class Node: - - def __init__(self, key, parent_node, content_holder): - self.key = key - self.parent = parent_node - self.children = [] - self.content_holder = content_holder - - def add_child(self, child): - self.children.append(child) - - def show(self): - for child in self.children: - child.show() - - def rendering_function(self, key, value): - if key.find("btn") != -1: - value = value.replace( - TEXT_PLACE_HOLDER, SamplerUtils.get_random_text()) - elif key.find("title") != -1: - value = value.replace(TEXT_PLACE_HOLDER, SamplerUtils.get_random_text( - length_text=5, space_number=0)) - elif key.find("text") != -1: - value = value.replace(TEXT_PLACE_HOLDER, - SamplerUtils.get_random_text(length_text=56, space_number=7, with_upper_case=False)) - return value - - def render(self, mapping, rendering_function=None): - content = "" - for child in self.children: - placeholder = child.render(mapping, self.rendering_function) - if placeholder is None: - self = None - return - else: - content += placeholder - - value = mapping.get(self.key, None) - - if value is None: - self = None - return None - - if rendering_function is not None: - value = self.rendering_function(self.key, value) - - if len(self.children) != 0: - value = value.replace(self.content_holder, content) - - return value -from __future__ import absolute_import - -import sys -import os -import shutil -import json -import numpy as np - -from keras.models import model_from_json -from keras.preprocessing.sequence import pad_sequences - -from classes.dataset.Dataset import * -from classes.dataset.ImagePreprocessor import * -from .Evaluator import * -from .Compiler import * - -MAX_LENGTH = 48 - - -class Sampler: - - def __init__(self, model_json_path=None, model_weights_path=None): - self.tokenizer, self.vocab_size = Dataset.load_vocab() - self.model = self.load_model(model_json_path, model_weights_path) - - def convert_batch_of_images(self, output_folder, pngs_path, get_corpus_bleu, original_guis_filepath, style): - - all_filenames = os.listdir(pngs_path) - all_filenames.sort() - generated_count = 0 - for filename in all_filenames: - if filename.find('.png') != -1: - png_path = "{}/{}".format(pngs_path, filename) - try: - self.convert_single_image(output_folder, png_path, print_generated_output=0, - get_sentence_bleu=0, original_gui_filepath=png_path, style=style) - generated_count += 1 - except: - print("Error with GUI / HTML generation:", - sys.exc_info()[0]) - print(sys.exc_info()) - continue - print("Generated code for {} images".format(generated_count)) - - if (get_corpus_bleu == 1) and (original_guis_filepath is not None): - print("BLEU score: {}".format(Evaluator.get_corpus_bleu( - original_guis_filepath, output_folder))) - - def convert_single_image(self, output_folder, png_path, print_generated_output, get_sentence_bleu, original_gui_filepath, style): - - # Retrieve sample ID - png_filename = os.path.basename(png_path) - if png_filename.find('.png') == -1: - raise ValueError("Image is not a png!") - sample_id = png_filename[:png_filename.find('.png')] - - # Generate GUI - print("Generating code for sample ID {}".format(sample_id)) - generated_gui, gui_output_filepath = self.generate_gui( - png_path, print_generated_output=print_generated_output, output_folder=output_folder, sample_id=sample_id) - - # Generate HTML - generated_html = self.generate_html( - generated_gui, sample_id, print_generated_output=print_generated_output, output_folder=output_folder, style=style) - - # Get BLEU - if get_sentence_bleu == 1 and (original_gui_filepath is not None): - print("BLEU score: {}".format(Evaluator.get_sentence_bleu( - original_gui_filepath, gui_output_filepath))) - - ########################################## - ####### PRIVATE METHODS ################## - ########################################## - - def load_model(self, model_json_path, model_weights_path): - json_file = open(model_json_path, 'r') - loaded_model_json = json_file.read() - json_file.close() - loaded_model = model_from_json(loaded_model_json) - loaded_model.load_weights(model_weights_path) - print("\nLoaded model from disk") - return loaded_model - - def generate_gui(self, png_path, print_generated_output, sample_id, output_folder): - test_img_preprocessor = ImagePreprocessor() - img_features = test_img_preprocessor.get_img_features(png_path) - - in_text = ' ' - photo = np.array([img_features]) - for i in range(150): - sequence = self.tokenizer.texts_to_sequences([in_text])[0] - sequence = pad_sequences([sequence], maxlen=MAX_LENGTH) - yhat = self.model.predict([photo, sequence], verbose=0) - yhat = np.argmax(yhat) - word = self.word_for_id(yhat) - if word is None: - break - in_text += word + ' ' - if word == '': - break - - generated_gui = in_text.split() - - if print_generated_output is 1: - print("\n=========\nGenerated GUI code:") - print(generated_gui) - - gui_output_filepath = self.write_gui_to_disk( - generated_gui, sample_id, output_folder) - - return generated_gui, gui_output_filepath - - def generate_html(self, gui_array, sample_id, print_generated_output, output_folder, style='default'): - - compiler = Compiler(style) - compiled_website = compiler.compile(gui_array) - - if print_generated_output is 1: - print("\nCompiled HTML:") - print(compiled_website) - - if compiled_website != 'HTML Parsing Error': - output_filepath = "{}/{}.html".format(output_folder, sample_id) - with open(output_filepath, 'w') as output_file: - output_file.write(compiled_website) - print("Saved generated HTML to {}".format(output_filepath)) - - def word_for_id(self, integer): - for word, index in self.tokenizer.word_index.items(): - if index == integer: - return word - return None - - def write_gui_to_disk(self, gui_array, sample_id, output_folder): - gui_output_filepath = "{}/{}.gui".format(output_folder, sample_id) - with open(gui_output_filepath, 'w') as out_f: - out_f.write(' '.join(gui_array)) - return gui_output_filepath -from __future__ import print_function -from __future__ import absolute_import - -import string -import random - - -class SamplerUtils: - - @staticmethod - def get_random_text(length_text=10, space_number=1, with_upper_case=True): - results = [] - while len(results) < length_text: - char = random.choice(string.ascii_letters[:26]) - results.append(char) - if with_upper_case: - results[0] = results[0].upper() - - current_spaces = [] - while len(current_spaces) < space_number: - space_pos = random.randint(2, length_text - 3) - if space_pos in current_spaces: - break - results[space_pos] = " " - if with_upper_case: - results[space_pos + 1] = results[space_pos - 1].upper() - - current_spaces.append(space_pos) - - return ''.join(results) -from __future__ import absolute_import - -from classes.dataset.Dataset import * - - -class ModelUtils: - - @staticmethod - def prepare_data_for_training(data_input_folder, validation_split, augment_training_data): - - dataset = Dataset(data_input_folder) - training_path, validation_path = dataset.split_datasets( - validation_split) - dataset.preprocess_data( - training_path, validation_path, augment_training_data) - - return training_path, validation_path -from __future__ import absolute_import - -from keras.models import Model, Sequential, model_from_json -from keras.callbacks import ModelCheckpoint, CSVLogger, Callback -from keras.layers.core import Dense, Dropout, Flatten -from keras.layers import Embedding, GRU, TimeDistributed, RepeatVector, LSTM, concatenate, Input, Reshape, Dense -from keras.layers.convolutional import Conv2D -from keras.optimizers import RMSprop - -from .ModelUtils import * -from classes.dataset.Dataset import * - -MAX_LENGTH = 48 -MAX_SEQ = 150 - - -class SketchCodeModel(): - - def __init__(self, model_output_path, model_json_file=None, model_weights_file=None): - - # Create model output path - self.model_output_path = model_output_path - - # If we have an existing model json / weights, load in that model - if model_json_file is not None and model_weights_file is not None: - self.model = self.load_model(model_json_file, model_weights_file) - optimizer = RMSprop(lr=0.0001, clipvalue=1.0) - self.model.compile( - loss='categorical_crossentropy', optimizer=optimizer) - print("Loaded pretrained model from disk") - - # Create a new model if we don't have one - else: - self.create_model() - print("Created new model, vocab size: {}".format(self.vocab_size)) - - print(self.model.summary()) - - def load_model(self, model_json_file, model_weights_file): - json_file = open(model_json_file, 'r') - loaded_model_json = json_file.read() - json_file.close() - loaded_model = model_from_json(loaded_model_json) - loaded_model.load_weights(model_weights_file) - return loaded_model - - def save_model(self): - model_json = self.model.to_json() - with open("{}/model_json.json".format(self.model_output_path), "w") as json_file: - json_file.write(model_json) - self.model.save_weights("{}/weights.h5".format(self.model_output_path)) - - def create_model(self): - tokenizer, vocab_size = Dataset.load_vocab() - self.vocab_size = vocab_size - - # Image encoder - image_model = Sequential() - image_model.add(Conv2D(16, (3, 3), padding='valid', - activation='relu', input_shape=(256, 256, 3,))) - image_model.add(Conv2D(16, (3, 3), activation='relu', - padding='same', strides=2)) - image_model.add(Conv2D(32, (3, 3), activation='relu', padding='same')) - image_model.add(Conv2D(32, (3, 3), activation='relu', - padding='same', strides=2)) - image_model.add(Conv2D(64, (3, 3), activation='relu', padding='same')) - image_model.add(Conv2D(64, (3, 3), activation='relu', - padding='same', strides=2)) - image_model.add(Conv2D(128, (3, 3), activation='relu', padding='same')) - image_model.add(Flatten()) - image_model.add(Dense(1024, activation='relu')) - image_model.add(Dropout(0.3)) - image_model.add(Dense(1024, activation='relu')) - image_model.add(Dropout(0.3)) - image_model.add(RepeatVector(MAX_LENGTH)) - visual_input = Input(shape=(256, 256, 3,)) - encoded_image = image_model(visual_input) - - # Language encoder - language_input = Input(shape=(MAX_LENGTH,)) - language_model = Embedding( - vocab_size, 50, input_length=MAX_LENGTH, mask_zero=True)(language_input) - language_model = GRU(128, return_sequences=True)(language_model) - language_model = GRU(128, return_sequences=True)(language_model) - - # Decoder - decoder = concatenate([encoded_image, language_model]) - decoder = GRU(512, return_sequences=True)(decoder) - decoder = GRU(512, return_sequences=False)(decoder) - decoder = Dense(vocab_size, activation='softmax')(decoder) - - # Compile the model - self.model = Model( - inputs=[visual_input, language_input], outputs=decoder) - optimizer = RMSprop(lr=0.0001, clipvalue=1.0) - self.model.compile(loss='categorical_crossentropy', - optimizer=optimizer) - - def train(self, training_path, validation_path, epochs): - - # Setup data generators - training_generator, train_steps_per_epoch = Dataset.create_generator( - training_path, max_sequences=MAX_SEQ) - validation_generator, val_steps_per_epoch = Dataset.create_generator( - validation_path, max_sequences=MAX_SEQ) - - # Setup model callbacks - callbacks_list = self.construct_callbacks(validation_path) - - # Begin training - print("\n### Starting model training ###\n") - self.model.fit_generator(generator=training_generator, validation_data=validation_generator, epochs=epochs, shuffle=False, - validation_steps=val_steps_per_epoch, steps_per_epoch=train_steps_per_epoch, callbacks=callbacks_list, verbose=1) - print("\n### Finished model training ###\n") - self.save_model() - - def construct_callbacks(self, validation_path): - checkpoint_filepath = "{}/".format(self.model_output_path) + \ - "weights-epoch-{epoch:04d}--val_loss-{val_loss:.4f}--loss-{loss:.4f}.h5" - csv_logger = CSVLogger( - "{}/training_val_losses.csv".format(self.model_output_path)) - checkpoint = ModelCheckpoint(checkpoint_filepath, - verbose=0, - save_weights_only=True, - save_best_only=True, - mode='min', - period=2) - callbacks_list = [checkpoint, csv_logger] - return callbacks_list -from utils.config import process_config -from utils.dirs import create_dirs -from utils.args import get_args -from utils import factory -import sys - - -def main(): - # capture the config path from the run arguments - # then process the json configuration fill - try: - args = get_args() - config = process_config(args.config) - - # create the experiments dirs - create_dirs([config.callbacks.tensorboard_log_dir, - config.callbacks.checkpoint_dir]) - - print('Create the data generator.') - data_loader = factory.create( - "data_loader."+config.data_loader.name)(config) - - print('Create the model.') - model = factory.create("models."+config.model.name)(config) - - print('Create the trainer') - trainer = factory.create( - "trainers."+config.trainer.name)(model.model, data_loader.get_train_data(), config) - - print('Start training the model.') - trainer.train() - - except Exception as e: - print(e) - sys.exit(1) - - -if __name__ == '__main__': - main() -from data_loader.simple_mnist_data_loader import SimpleMnistDataLoader -from models.simple_mnist_model import SimpleMnistModel -from trainers.simple_mnist_trainer import SimpleMnistModelTrainer -from utils.config import process_config -from utils.dirs import create_dirs -from utils.utils import get_args - - -def main(): - # capture the config path from the run arguments - # then process the json configuration file - try: - args = get_args() - config = process_config(args.config) - except: - print("missing or invalid arguments") - exit(0) - - # create the experiments dirs - create_dirs([config.callbacks.tensorboard_log_dir, - config.callbacks.checkpoint_dir]) - - print('Create the data generator.') - data_loader = SimpleMnistDataLoader(config) - - print('Create the model.') - model = SimpleMnistModel(config) - - print('Create the trainer') - trainer = SimpleMnistModelTrainer( - model.model, data_loader.get_train_data(), config) - - print('Start training the model.') - trainer.train() - - -if __name__ == '__main__': - main() -class BaseDataLoader(object): - def __init__(self, config): - self.config = config - - def get_train_data(self): - raise NotImplementedError - - def get_test_data(self): - raise NotImplementedError -class BaseModel(object): - def __init__(self, config): - self.config = config - self.model = None - - # save function that saves the checkpoint in the path defined in the config file - def save(self, checkpoint_path): - if self.model is None: - raise Exception("You have to build the model first.") - - print("Saving model...") - self.model.save_weights(checkpoint_path) - print("Model saved") - - # load latest checkpoint from the experiment path defined in the config file - def load(self, checkpoint_path): - if self.model is None: - raise Exception("You have to build the model first.") - - print("Loading model checkpoint {} ...\n".format(checkpoint_path)) - self.model.load_weights(checkpoint_path) - print("Model loaded") - - def build_model(self): - raise NotImplementedError -class BaseTrain(object): - def __init__(self, model, data, config): - self.model = model - self.data = data - self.config = config - - def train(self): - raise NotImplementedError -from base.base_data_loader import BaseDataLoader -from keras.datasets import mnist - - -class ConvMnistDataLoader(BaseDataLoader): - def __init__(self, config): - super(ConvMnistDataLoader, self).__init__(config) - (self.X_train, self.y_train), (self.X_test, self.y_test) = mnist.load_data() - self.X_train = self.X_train.reshape((-1, 28, 28, 1)) - self.X_test = self.X_test.reshape((-1, 28, 28, 1)) - - def get_train_data(self): - return self.X_train, self.y_train - - def get_test_data(self): - return self.X_test, self.y_test -from base.base_data_loader import BaseDataLoader -from keras.datasets import mnist - - -class SimpleMnistDataLoader(BaseDataLoader): - def __init__(self, config): - super(SimpleMnistDataLoader, self).__init__(config) - (self.X_train, self.y_train), (self.X_test, self.y_test) = mnist.load_data() - self.X_train = self.X_train.reshape((-1, 28 * 28)) - self.X_test = self.X_test.reshape((-1, 28 * 28)) - - def get_train_data(self): - return self.X_train, self.y_train - - def get_test_data(self): - return self.X_test, self.y_test -from base.base_model import BaseModel -from keras.models import Sequential -from keras.layers import Input, Dense, Conv2D, MaxPooling2D, Dropout, Flatten - - -class ConvMnistModel(BaseModel): - def __init__(self, config): - super(ConvMnistModel, self).__init__(config) - self.build_model() - - def build_model(self): - self.model = Sequential() - self.model.add(Conv2D(32, kernel_size=(3, 3), - activation='relu', input_shape=(28, 28, 1))) - self.model.add(Conv2D(64, (3, 3), activation='relu')) - self.model.add(MaxPooling2D(pool_size=(2, 2))) - self.model.add(Dropout(0.25)) - self.model.add(Flatten()) - self.model.add(Dense(128, activation='relu')) - self.model.add(Dropout(0.5)) - self.model.add(Dense(10, activation='softmax')) - - self.model.compile( - loss='sparse_categorical_crossentropy', - optimizer=self.config.model.optimizer, - metrics=['accuracy']) -from base.base_model import BaseModel -from keras.models import Sequential -from keras.layers import Input, Dense - - -class SimpleMnistModel(BaseModel): - def __init__(self, config): - super(SimpleMnistModel, self).__init__(config) - self.build_model() - - def build_model(self): - self.model = Sequential() - self.model.add(Dense(32, activation='relu', input_shape=(28 * 28,))) - self.model.add(Dense(16, activation='relu')) - self.model.add(Dense(10, activation='softmax')) - - self.model.compile( - loss='sparse_categorical_crossentropy', - optimizer=self.config.model.optimizer, - metrics=['acc'], - ) -from base.base_trainer import BaseTrain -import os -from keras.callbacks import ModelCheckpoint, TensorBoard - - -class SimpleMnistModelTrainer(BaseTrain): - def __init__(self, model, data, config): - super(SimpleMnistModelTrainer, self).__init__(model, data, config) - self.callbacks = [] - self.loss = [] - self.acc = [] - self.val_loss = [] - self.val_acc = [] - self.init_callbacks() - - def init_callbacks(self): - self.callbacks.append( - ModelCheckpoint( - filepath=os.path.join(self.config.callbacks.checkpoint_dir, - '%s-{epoch:02d}-{val_loss:.2f}.hdf5' % self.config.exp.name), - monitor=self.config.callbacks.checkpoint_monitor, - mode=self.config.callbacks.checkpoint_mode, - save_best_only=self.config.callbacks.checkpoint_save_best_only, - save_weights_only=self.config.callbacks.checkpoint_save_weights_only, - verbose=self.config.callbacks.checkpoint_verbose, - ) - ) - - self.callbacks.append( - TensorBoard( - log_dir=self.config.callbacks.tensorboard_log_dir, - write_graph=self.config.callbacks.tensorboard_write_graph, - ) - ) - - if hasattr(self.config, "comet_api_key"): - from comet_ml import Experiment - experiment = Experiment( - api_key=self.config.comet_api_key, project_name=self.config.exp_name) - experiment.disable_mp() - experiment.log_multiple_params(self.config) - self.callbacks.append(experiment.get_keras_callback()) - - def train(self): - history = self.model.fit( - self.data[0], self.data[1], - epochs=self.config.trainer.num_epochs, - verbose=self.config.trainer.verbose_training, - batch_size=self.config.trainer.batch_size, - validation_split=self.config.trainer.validation_split, - callbacks=self.callbacks, - ) - self.loss.extend(history.history['loss']) - self.acc.extend(history.history['acc']) - self.val_loss.extend(history.history['val_loss']) - self.val_acc.extend(history.history['val_acc']) -import argparse - - -def get_args(): - argparser = argparse.ArgumentParser(description=__doc__) - argparser.add_argument( - '-c', '--config', - dest='config', - metavar='C', - default='None', - help='The Configuration file') - args = argparser.parse_args() - return args -import json -from dotmap import DotMap -import os -import time - - -def get_config_from_json(json_file): - """ - Get the config from a json file - :param json_file: - :return: config(namespace) or config(dictionary) - """ - # parse the configurations from the config json file provided - with open(json_file, 'r') as config_file: - config_dict = json.load(config_file) - - # convert the dictionary to a namespace using bunch lib - config = DotMap(config_dict) - - return config, config_dict - - -def process_config(json_file): - config, _ = get_config_from_json(json_file) - config.callbacks.tensorboard_log_dir = os.path.join("experiments", time.strftime( - "%Y-%m-%d/", time.localtime()), config.exp.name, "logs/") - config.callbacks.checkpoint_dir = os.path.join("experiments", time.strftime( - "%Y-%m-%d/", time.localtime()), config.exp.name, "checkpoints/") - return config -import os - - -def create_dirs(dirs): - """ - dirs - a list of directories to create if these directories are not found - :param dirs: - :return exit_code: 0:success -1:failed - """ - try: - for dir_ in dirs: - if not os.path.exists(dir_): - os.makedirs(dir_) - return 0 - except Exception as err: - print("Creating directories error: {0}".format(err)) - exit(-1) -import importlib - - -def create(cls): - '''expects a string that can be imported as with a module.class name''' - module_name, class_name = cls.rsplit(".", 1) - - try: - print('importing '+module_name) - somemodule = importlib.import_module(module_name) - print('getattr '+class_name) - cls_instance = getattr(somemodule, class_name) - print(cls_instance) - except Exception as err: - print("Creating directories error: {0}".format(err)) - exit(-1) - - return cls_instance -from setuptools import setup -from setuptools import find_packages -from os.path import join, dirname -# We need io.open() (Python 3's default open) to specify file encodings -import io - -with open(join(dirname(__file__), 'foolbox/VERSION')) as f: - version = f.read().strip() - -try: - # obtain long description from README - # Specify encoding to get a unicode type in Python 2 and a str in Python 3 - readme_path = join(dirname(__file__), 'README.rst') - with io.open(readme_path, encoding='utf-8') as fr: - README = fr.read() -except IOError: - README = '' - - -install_requires = [ - 'numpy', - 'scipy', - 'setuptools', - 'requests', - 'GitPython' -] - -tests_require = [ - 'pytest', - 'pytest-cov', -] - -setup( - name="foolbox", - version=version, - description="Python toolbox to create adversarial examples that fool neural networks", # noqa: E501 - long_description=README, - classifiers=[ - "Development Status :: 3 - Alpha", - "Intended Audience :: Developers", - "Intended Audience :: Science/Research", - "License :: OSI Approved :: MIT License", - "Programming Language :: Python :: 2.7", - "Programming Language :: Python :: 3", - "Programming Language :: Python :: 3.5", - "Programming Language :: Python :: 3.6", - "Topic :: Scientific/Engineering :: Artificial Intelligence", - ], - keywords="", - author="Jonas Rauber & Wieland Brendel", - author_email="opensource@bethgelab.org", - url="https://github.com/bethgelab/foolbox", - license="MIT", - packages=find_packages(), - include_package_data=True, - zip_safe=False, - install_requires=install_requires, - extras_require={ - 'testing': tests_require, - ':python_version == "2.7"': ['future', 'futures'], - }, -) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -# -# Foolbox documentation build configuration file, created by -# sphinx-quickstart on Mon Jun 5 06:37:00 2017. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -import foolbox -import os -import sys -from unittest.mock import Mock -sys.path.insert(0, os.path.abspath('..')) - -# mock imports, see https://stackoverflow.com/q/15889621 -# autodoc_mock_imports = ['tensorflow'] does not seem to work -sys.modules['numpy'] = Mock() -sys.modules['numpy.linalg'] = Mock() -sys.modules['scipy'] = Mock() -sys.modules['scipy.optimize'] = Mock() -sys.modules['scipy.interpolate'] = Mock() -sys.modules['scipy.ndimage'] = Mock() -sys.modules['scipy.ndimage.filters'] = Mock() -sys.modules['tensorflow'] = Mock() -sys.modules['theano'] = Mock() -sys.modules['theano.tensor'] = Mock() -sys.modules['torch'] = Mock() -sys.modules['randomstate'] = Mock() - - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. -extensions = [ - 'sphinx.ext.autodoc', - 'sphinx.ext.autosummary', - 'sphinx.ext.doctest', - 'sphinx.ext.coverage', - 'sphinx.ext.mathjax', - 'sphinx.ext.linkcode', - 'numpydoc', -] - -# see http://stackoverflow.com/q/12206334/562769 -numpydoc_show_class_members = False - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = '.rst' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = 'Foolbox' -copyright = '2017, Jonas Rauber & Wieland Brendel' -author = 'Jonas Rauber & Wieland Brendel' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -# version = '0.1' -version = foolbox.__version__ -# The full version, including alpha/beta/rc tags. -release = foolbox.__version__ - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store'] - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - - -# Resolve function for the linkcode extension. -def linkcode_resolve(domain, info): - def find_source(): - # try to find the file and line number, based on code from numpy: - # https://github.com/numpy/numpy/blob/master/doc/source/conf.py#L286 - obj = sys.modules[info['module']] - for part in info['fullname'].split('.'): - obj = getattr(obj, part) - import inspect - import os - fn = inspect.getsourcefile(obj) - fn = os.path.relpath(fn, start=os.path.dirname(foolbox.__file__)) - source, lineno = inspect.getsourcelines(obj) - return fn, lineno, lineno + len(source) - 1 - - if domain != 'py' or not info['module']: - return None - try: - filename = 'foolbox/%s#L%d-L%d' % find_source() - except Exception: - filename = info['module'].replace('.', '/') + '.py' - tag = 'master' - url = "https://github.com/bethgelab/foolbox/blob/%s/%s" - return url % (tag, filename) - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -# html_theme = 'alabaster' - -if os.environ.get('READTHEDOCS') != 'True': - try: - import sphinx_rtd_theme - except ImportError: - pass # assume we have sphinx >= 1.3 - else: - html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - html_theme = 'sphinx_rtd_theme' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - - -# -- Options for HTMLHelp output ------------------------------------------ - -# Output file base name for HTML help builder. -htmlhelp_basename = 'Foolboxdoc' - - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - 'preamble': '', - - # Latex figure (float) alignment - # - 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'Foolbox.tex', 'Foolbox Documentation', - 'Jonas Rauber \\& Wieland Brendel', 'manual'), -] - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'foolbox', 'Foolbox Documentation', - [author], 1) -] - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'Foolbox', 'Foolbox Documentation', - author, 'Foolbox', 'One line description of project.', - 'Miscellaneous'), -] -from os.path import join, dirname - -with open(join(dirname(__file__), 'VERSION')) as f: - __version__ = f.read().strip() - -from .rngs import rng # noqa: F401 -from .rngs import nprng # noqa: F401 -from .rngs import set_seeds # noqa: F401 - -from . import models # noqa: F401 -from . import criteria # noqa: F401 -from . import distances # noqa: F401 -from . import attacks # noqa: F401 -from . import utils # noqa: F401 -from . import gradient_estimators # noqa: F401 - -from .adversarial import Adversarial # noqa: F401 -""" -Provides a class that represents an adversarial example. - -""" - -import numpy as np -import numbers - -from .distances import Distance -from .distances import MSE - - -class StopAttack(Exception): - """Exception thrown to request early stopping of an attack - if a given (optional!) threshold is reached.""" - pass - - -class Adversarial(object): - """Defines an adversarial that should be found and stores the result. - - The :class:`Adversarial` class represents a single adversarial example - for a given model, criterion and reference image. It can be passed to - an adversarial attack to find the actual adversarial. - - Parameters - ---------- - model : a :class:`Model` instance - The model that should be fooled by the adversarial. - criterion : a :class:`Criterion` instance - The criterion that determines which images are adversarial. - original_image : a :class:`numpy.ndarray` - The original image to which the adversarial image should - be as close as possible. - original_class : int - The ground-truth label of the original image. - distance : a :class:`Distance` class - The measure used to quantify similarity between images. - threshold : float or :class:`Distance` - If not None, the attack will stop as soon as the adversarial - perturbation has a size smaller than this threshold. Can be - an instance of the :class:`Distance` class passed to the distance - argument, or a float assumed to have the same unit as the - the given distance. If None, the attack will simply minimize - the distance as good as possible. Note that the threshold only - influences early stopping of the attack; the returned adversarial - does not necessarily have smaller perturbation size than this - threshold; the `reached_threshold()` method can be used to check - if the threshold has been reached. - - """ - - def __init__( - self, - model, - criterion, - original_image, - original_class, - distance=MSE, - threshold=None, - verbose=False): - - self.__model = model - self.__criterion = criterion - self.__original_image = original_image - self.__original_image_for_distance = original_image - self.__original_class = original_class - self.__distance = distance - - if threshold is not None and not isinstance(threshold, Distance): - threshold = distance(value=threshold) - self.__threshold = threshold - - self.verbose = verbose - - self.__best_adversarial = None - self.__best_distance = distance(value=np.inf) - self.__best_adversarial_output = None - - self._total_prediction_calls = 0 - self._total_gradient_calls = 0 - - self._best_prediction_calls = 0 - self._best_gradient_calls = 0 - - # check if the original image is already adversarial - try: - self.predictions(original_image) - except StopAttack: - # if a threshold is specified and the original input is - # misclassified, this can already cause a StopAttack - # exception - assert self.distance.value == 0. - - def _reset(self): - self.__best_adversarial = None - self.__best_distance = self.__distance(value=np.inf) - self.__best_adversarial_output = None - - self._best_prediction_calls = 0 - self._best_gradient_calls = 0 - - self.predictions(self.__original_image) - - @property - def image(self): - """The best adversarial found so far.""" - return self.__best_adversarial - - @property - def output(self): - """The model predictions for the best adversarial found so far. - - None if no adversarial has been found. - """ - return self.__best_adversarial_output - - @property - def adversarial_class(self): - """The argmax of the model predictions for the best adversarial found so far. - - None if no adversarial has been found. - """ - if self.output is None: - return None - return np.argmax(self.output) - - @property - def distance(self): - """The distance of the adversarial input to the original input.""" - return self.__best_distance - - @property - def original_image(self): - """The original input.""" - return self.__original_image - - @property - def original_class(self): - """The class of the original input (ground-truth, not model prediction).""" # noqa: E501 - return self.__original_class - - @property - def _model(self): # pragma: no cover - """Should not be used.""" - return self.__model - - @property - def _criterion(self): # pragma: no cover - """Should not be used.""" - return self.__criterion - - @property - def _distance(self): # pragma: no cover - """Should not be used.""" - return self.__distance - - def set_distance_dtype(self, dtype): - assert dtype >= self.__original_image.dtype - self.__original_image_for_distance = self.__original_image.astype( - dtype, copy=False) - - def reset_distance_dtype(self): - self.__original_image_for_distance = self.__original_image - - def normalized_distance(self, image): - """Calculates the distance of a given image to the - original image. - - Parameters - ---------- - image : `numpy.ndarray` - The image that should be compared to the original image. - - Returns - ------- - :class:`Distance` - The distance between the given image and the original image. - - """ - return self.__distance( - self.__original_image_for_distance, - image, - bounds=self.bounds()) - - def reached_threshold(self): - """Returns True if a threshold is given and the currently - best adversarial distance is smaller than the threshold.""" - return self.__threshold is not None \ - and self.__best_distance <= self.__threshold - - def __new_adversarial(self, image, predictions, in_bounds): - image = image.copy() # to prevent accidental inplace changes - distance = self.normalized_distance(image) - if in_bounds and self.__best_distance > distance: - # new best adversarial - if self.verbose: - print('new best adversarial: {}'.format(distance)) - - self.__best_adversarial = image - self.__best_distance = distance - self.__best_adversarial_output = predictions - - self._best_prediction_calls = self._total_prediction_calls - self._best_gradient_calls = self._total_gradient_calls - - if self.reached_threshold(): - raise StopAttack - - return True, distance - return False, distance - - def __is_adversarial(self, image, predictions, in_bounds): - """Interface to criterion.is_adverarial that calls - __new_adversarial if necessary. - - Parameters - ---------- - predictions : :class:`numpy.ndarray` - A vector with the pre-softmax predictions for some image. - label : int - The label of the unperturbed reference image. - - """ - is_adversarial = self.__criterion.is_adversarial( - predictions, self.__original_class) - assert isinstance(is_adversarial, bool) or \ - isinstance(is_adversarial, np.bool_) - if is_adversarial: - is_best, distance = self.__new_adversarial( - image, predictions, in_bounds) - else: - is_best = False - distance = None - return is_adversarial, is_best, distance - - def target_class(self): - """Interface to criterion.target_class for attacks. - - """ - try: - target_class = self.__criterion.target_class() - except AttributeError: - target_class = None - return target_class - - def num_classes(self): - n = self.__model.num_classes() - assert isinstance(n, numbers.Number) - return n - - def bounds(self): - min_, max_ = self.__model.bounds() - assert isinstance(min_, numbers.Number) - assert isinstance(max_, numbers.Number) - assert min_ < max_ - return min_, max_ - - def in_bounds(self, input_): - min_, max_ = self.bounds() - return min_ <= input_.min() and input_.max() <= max_ - - def channel_axis(self, batch): - """Interface to model.channel_axis for attacks. - - Parameters - ---------- - batch : bool - Controls whether the index of the axis for a batch of images - (4 dimensions) or a single image (3 dimensions) should be returned. - - """ - axis = self.__model.channel_axis() - if not batch: - axis = axis - 1 - return axis - - def has_gradient(self): - """Returns true if _backward and _forward_backward can be called - by an attack, False otherwise. - - """ - try: - self.__model.gradient - self.__model.predictions_and_gradient - except AttributeError: - return False - else: - return True - - def predictions(self, image, strict=True, return_details=False): - """Interface to model.predictions for attacks. - - Parameters - ---------- - image : `numpy.ndarray` - Single input with shape as expected by the model - (without the batch dimension). - strict : bool - Controls if the bounds for the pixel values should be checked. - - """ - in_bounds = self.in_bounds(image) - assert not strict or in_bounds - - self._total_prediction_calls += 1 - predictions = self.__model.predictions(image) - is_adversarial, is_best, distance = self.__is_adversarial( - image, predictions, in_bounds) - - assert predictions.ndim == 1 - if return_details: - return predictions, is_adversarial, is_best, distance - else: - return predictions, is_adversarial - - def batch_predictions( - self, images, greedy=False, strict=True, return_details=False): - """Interface to model.batch_predictions for attacks. - - Parameters - ---------- - images : `numpy.ndarray` - Batch of inputs with shape as expected by the model. - greedy : bool - Whether the first adversarial should be returned. - strict : bool - Controls if the bounds for the pixel values should be checked. - - """ - if strict: - in_bounds = self.in_bounds(images) - assert in_bounds - - self._total_prediction_calls += len(images) - predictions = self.__model.batch_predictions(images) - - assert predictions.ndim == 2 - assert predictions.shape[0] == images.shape[0] - - if return_details: - assert greedy - - adversarials = [] - for i in range(len(predictions)): - if strict: - in_bounds_i = True - else: - in_bounds_i = self.in_bounds(images[i]) - is_adversarial, is_best, distance = self.__is_adversarial( - images[i], predictions[i], in_bounds_i) - if is_adversarial and greedy: - if return_details: - return predictions, is_adversarial, i, is_best, distance - else: - return predictions, is_adversarial, i - adversarials.append(is_adversarial) - - if greedy: # pragma: no cover - # no adversarial found - if return_details: - return predictions, False, None, False, None - else: - return predictions, False, None - - is_adversarial = np.array(adversarials) - assert is_adversarial.ndim == 1 - assert is_adversarial.shape[0] == images.shape[0] - - return predictions, is_adversarial - - def gradient(self, image=None, label=None, strict=True): - """Interface to model.gradient for attacks. - - Parameters - ---------- - image : `numpy.ndarray` - Single input with shape as expected by the model - (without the batch dimension). - Defaults to the original image. - label : int - Label used to calculate the loss that is differentiated. - Defaults to the original label. - strict : bool - Controls if the bounds for the pixel values should be checked. - - """ - assert self.has_gradient() - - if image is None: - image = self.__original_image - if label is None: - label = self.__original_class - - assert not strict or self.in_bounds(image) - - self._total_gradient_calls += 1 - gradient = self.__model.gradient(image, label) - - assert gradient.shape == image.shape - return gradient - - def predictions_and_gradient( - self, image=None, label=None, strict=True, return_details=False): - """Interface to model.predictions_and_gradient for attacks. - - Parameters - ---------- - image : `numpy.ndarray` - Single input with shape as expected by the model - (without the batch dimension). - Defaults to the original image. - label : int - Label used to calculate the loss that is differentiated. - Defaults to the original label. - strict : bool - Controls if the bounds for the pixel values should be checked. - - """ - assert self.has_gradient() - - if image is None: - image = self.__original_image - if label is None: - label = self.__original_class - - in_bounds = self.in_bounds(image) - assert not strict or in_bounds - - self._total_prediction_calls += 1 - self._total_gradient_calls += 1 - predictions, gradient = self.__model.predictions_and_gradient(image, label) # noqa: E501 - is_adversarial, is_best, distance = self.__is_adversarial( - image, predictions, in_bounds) - - assert predictions.ndim == 1 - assert gradient.shape == image.shape - if return_details: - return predictions, gradient, is_adversarial, is_best, distance - else: - return predictions, gradient, is_adversarial - - def backward(self, gradient, image=None, strict=True): - """Interface to model.backward for attacks. - - Parameters - ---------- - gradient : `numpy.ndarray` - Gradient of some loss w.r.t. the logits. - image : `numpy.ndarray` - Single input with shape as expected by the model - (without the batch dimension). - - Returns - ------- - gradient : `numpy.ndarray` - The gradient w.r.t the image. - - See Also - -------- - :meth:`gradient` - - """ - assert self.has_gradient() - assert gradient.ndim == 1 - - if image is None: - image = self.__original_image - - assert not strict or self.in_bounds(image) - - self._total_gradient_calls += 1 - gradient = self.__model.backward(gradient, image) - - assert gradient.shape == image.shape - return gradient -""" -Provides classes that define what is adversarial. - -Criteria --------- - -We provide criteria for untargeted and targeted adversarial attacks. - -.. autosummary:: - :nosignatures: - - Misclassification - TopKMisclassification - OriginalClassProbability - ConfidentMisclassification - -.. autosummary:: - :nosignatures: - - TargetClass - TargetClassProbability - -Examples --------- - -Untargeted criteria: - ->>> from foolbox.criteria import Misclassification ->>> criterion1 = Misclassification() - ->>> from foolbox.criteria import TopKMisclassification ->>> criterion2 = TopKMisclassification(k=5) - -Targeted criteria: - ->>> from foolbox.criteria import TargetClass ->>> criterion3 = TargetClass(22) - ->>> from foolbox.criteria import TargetClassProbability ->>> criterion4 = TargetClassProbability(22, p=0.99) - -Criteria can be combined to create a new criterion: - ->>> criterion5 = criterion2 & criterion3 - -""" -from .utils import softmax -import numpy as np -import sys -import abc -abstractmethod = abc.abstractmethod - -if sys.version_info >= (3, 4): - ABC = abc.ABC -else: # pragma: no cover - ABC = abc.ABCMeta('ABC', (), {}) - - -class Criterion(ABC): - """Base class for criteria that define what is adversarial. - - The :class:`Criterion` class represents a criterion used to - determine if predictions for an image are adversarial given - a reference label. It should be subclassed when implementing - new criteria. Subclasses must implement is_adversarial. - - """ - - def name(self): - """Returns a human readable name that uniquely identifies - the criterion with its hyperparameters. - - Returns - ------- - str - Human readable name that uniquely identifies the criterion - with its hyperparameters. - - Notes - ----- - Defaults to the class name but subclasses can provide more - descriptive names and must take hyperparameters into account. - - """ - return self.__class__.__name__ - - @abstractmethod - def is_adversarial(self, predictions, label): - """Decides if predictions for an image are adversarial given - a reference label. - - Parameters - ---------- - predictions : :class:`numpy.ndarray` - A vector with the pre-softmax predictions for some image. - label : int - The label of the unperturbed reference image. - - Returns - ------- - bool - True if an image with the given predictions is an adversarial - example when the ground-truth class is given by label, False - otherwise. - - """ - raise NotImplementedError - - def __and__(self, other): - return CombinedCriteria(self, other) - - -class CombinedCriteria(Criterion): - """Meta criterion that combines several criteria into a new one. - - Considers images as adversarial that are considered adversarial - by all sub-criteria that are combined by this criterion. - - Instead of using this class directly, it is possible to combine - criteria like this: criteria1 & criteria2 - - Parameters - ---------- - *criteria : variable length list of :class:`Criterion` instances - List of sub-criteria that will be combined. - - Notes - ----- - This class uses lazy evaluation of the criteria in the order they - are passed to the constructor. - - """ - - def __init__(self, *criteria): - super(CombinedCriteria, self).__init__() - self._criteria = criteria - - def name(self): - """Concatenates the names of the given criteria in alphabetical order. - - If a sub-criterion is itself a combined criterion, its name is - first split into the individual names and the names of the - sub-sub criteria is used instead of the name of the sub-criterion. - This is done recursively to ensure that the order and the hierarchy - of the criteria does not influence the name. - - Returns - ------- - str - The alphabetically sorted names of the sub-criteria concatenated - using double underscores between them. - - """ - names = (criterion.name() for criterion in self._criteria) - return '__'.join(sorted(names)) - - def is_adversarial(self, predictions, label): - for criterion in self._criteria: - if not criterion.is_adversarial(predictions, label): - # lazy evaluation - return False - return True - - -class Misclassification(Criterion): - """Defines adversarials as images for which the predicted class - is not the original class. - - See Also - -------- - :class:`TopKMisclassification` - - Notes - ----- - Uses `numpy.argmax` to break ties. - - """ - - def name(self): - return 'Top1Misclassification' - - def is_adversarial(self, predictions, label): - top1 = np.argmax(predictions) - return top1 != label - - -class ConfidentMisclassification(Criterion): - """Defines adversarials as images for which the probability - of any class other than the original is above a given threshold. - - Parameters - ---------- - p : float - The threshold probability. If the probability of any class - other than the original is at least p, the image is - considered an adversarial. It must satisfy 0 <= p <= 1. - - """ - - def __init__(self, p): - super(ConfidentMisclassification, self).__init__() - assert 0 <= p <= 1 - self.p = p - - def name(self): - return '{}-{:.04f}'.format(self.__class__.__name__, self.p) - - def is_adversarial(self, predictions, label): - top1 = np.argmax(predictions) - probabilities = softmax(predictions) - return (np.max(probabilities) >= self.p) and (top1 != label) - - -class TopKMisclassification(Criterion): - """Defines adversarials as images for which the original class is - not one of the top k predicted classes. - - For k = 1, the :class:`Misclassification` class provides a more - efficient implementation. - - Parameters - ---------- - k : int - Number of top predictions to which the reference label is - compared to. - - See Also - -------- - :class:`Misclassification` : Provides a more effcient implementation - for k = 1. - - Notes - ----- - Uses `numpy.argsort` to break ties. - - """ - - def __init__(self, k): - super(TopKMisclassification, self).__init__() - self.k = k - - def name(self): - return 'Top{}Misclassification'.format(self.k) - - def is_adversarial(self, predictions, label): - topk = np.argsort(predictions)[-self.k:] - return label not in topk - - -class TargetClass(Criterion): - """Defines adversarials as images for which the predicted class - is the given target class. - - Parameters - ---------- - target_class : int - The target class that needs to be predicted for an image - to be considered an adversarial. - - Notes - ----- - Uses `numpy.argmax` to break ties. - - """ - - def __init__(self, target_class): - super(TargetClass, self).__init__() - self._target_class = target_class - - def target_class(self): - return self._target_class - - def name(self): - return '{}-{}'.format(self.__class__.__name__, self.target_class()) - - def is_adversarial(self, predictions, label): - top1 = np.argmax(predictions) - return top1 == self.target_class() - - -class OriginalClassProbability(Criterion): - """Defines adversarials as images for which the probability - of the original class is below a given threshold. - - This criterion alone does not guarantee that the class - predicted for the adversarial image is not the original class - (unless p < 1 / number of classes). Therefore, it should usually - be combined with a classifcation criterion. - - Parameters - ---------- - p : float - The threshold probability. If the probability of the - original class is below this threshold, the image is - considered an adversarial. It must satisfy 0 <= p <= 1. - - """ - - def __init__(self, p): - super(OriginalClassProbability, self).__init__() - assert 0 <= p <= 1 - self.p = p - - def name(self): - return '{}-{:.04f}'.format(self.__class__.__name__, self.p) - - def is_adversarial(self, predictions, label): - probabilities = softmax(predictions) - return probabilities[label] < self.p - - -class TargetClassProbability(Criterion): - """Defines adversarials as images for which the probability - of a given target class is above a given threshold. - - If the threshold is below 0.5, this criterion does not guarantee - that the class predicted for the adversarial image is not the - original class. In that case, it should usually be combined with - a classification criterion. - - Parameters - ---------- - target_class : int - The target class for which the predicted probability must - be above the threshold probability p, otherwise the image - is not considered an adversarial. - p : float - The threshold probability. If the probability of the - target class is above this threshold, the image is - considered an adversarial. It must satisfy 0 <= p <= 1. - - """ - - def __init__(self, target_class, p): - super(TargetClassProbability, self).__init__() - self._target_class = target_class - assert 0 <= p <= 1 - self.p = p - - def target_class(self): - return self._target_class - - def name(self): - return '{}-{}-{:.04f}'.format( - self.__class__.__name__, self.target_class(), self.p) - - def is_adversarial(self, predictions, label): - probabilities = softmax(predictions) - return probabilities[self.target_class()] > self.p -""" -Provides classes to measure the distance between images. - -Distances ---------- - -.. autosummary:: - :nosignatures: - - MeanSquaredDistance - MeanAbsoluteDistance - Linfinity - L0 - -Aliases -------- - -.. autosummary:: - :nosignatures: - - MSE - MAE - Linf - -Base class ----------- - -To implement a new distance, simply subclass the :class:`Distance` class and -implement the :meth:`_calculate` method. - -.. autosummary:: - :nosignatures: - - Distance - -""" -from __future__ import division -from numbers import Number -import numpy as np -import functools -import sys -import abc -abstractmethod = abc.abstractmethod - -if sys.version_info >= (3, 4): - ABC = abc.ABC -else: # pragma: no cover - ABC = abc.ABCMeta('ABC', (), {}) - - -@functools.total_ordering -class Distance(ABC): - """Base class for distances. - - This class should be subclassed when implementing - new distances. Subclasses must implement _calculate. - - """ - - def __init__( - self, - reference=None, - other=None, - bounds=None, - value=None): - - if value is not None: - # alternative constructor - assert isinstance(value, Number) - assert reference is None - assert other is None - assert bounds is None - self.reference = None - self.other = None - self._bounds = None - self._value = value - self._gradient = None - else: - # standard constructor - self.reference = reference - self.other = other - self._bounds = bounds - self._value, self._gradient = self._calculate() - - assert self._value is not None - - @property - def value(self): - return self._value - - @property - def gradient(self): - return self._gradient - - @abstractmethod - def _calculate(self): - """Returns distance and gradient of distance w.r.t. to self.other""" - raise NotImplementedError - - def name(self): - return self.__class__.__name__ - - def __str__(self): - return '{} = {:.6e}'.format(self.name(), self._value) - - def __repr__(self): - return self.__str__() - - def __eq__(self, other): - if other.__class__ != self.__class__: - raise TypeError('Comparisons are only possible between the same distance types.') # noqa: E501 - return self.value == other.value - - def __lt__(self, other): - if other.__class__ != self.__class__: - raise TypeError('Comparisons are only possible between the same distance types.') # noqa: E501 - return self.value < other.value - - -class MeanSquaredDistance(Distance): - """Calculates the mean squared error between two images. - - """ - - def _calculate(self): - min_, max_ = self._bounds - n = self.reference.size - f = n * (max_ - min_)**2 - - diff = self.other - self.reference - value = np.vdot(diff, diff) / f - - # calculate the gradient only when needed - self._g_diff = diff - self._g_f = f - gradient = None - return value, gradient - - @property - def gradient(self): - if self._gradient is None: - self._gradient = self._g_diff / (self._g_f / 2) - return self._gradient - - def __str__(self): - return 'normalized MSE = {:.2e}'.format(self._value) - - -MSE = MeanSquaredDistance - - -class MeanAbsoluteDistance(Distance): - """Calculates the mean absolute error between two images. - - """ - - def _calculate(self): - min_, max_ = self._bounds - diff = (self.other - self.reference) / (max_ - min_) - value = np.mean(np.abs(diff)).astype(np.float64) - n = self.reference.size - gradient = 1 / n * np.sign(diff) / (max_ - min_) - return value, gradient - - def __str__(self): - return 'normalized MAE = {:.2e}'.format(self._value) - - -MAE = MeanAbsoluteDistance - - -class Linfinity(Distance): - """Calculates the L-infinity norm of the difference between two images. - - """ - - def _calculate(self): - min_, max_ = self._bounds - diff = (self.other - self.reference) / (max_ - min_) - value = np.max(np.abs(diff)).astype(np.float64) - gradient = None - return value, gradient - - @property - def gradient(self): - raise NotImplementedError - - def __str__(self): - return 'normalized Linf distance = {:.2e}'.format(self._value) - - -Linf = Linfinity - - -class L0(Distance): - """Calculates the L0 norm of the difference between two images. - - """ - - def _calculate(self): - diff = self.other - self.reference - value = np.sum(diff != 0) - gradient = None - return value, gradient - - @property - def gradient(self): - raise NotImplementedError - - def __str__(self): - return 'L0 distance = {}'.format(self._value) -# -*- coding: utf-8 -*- -""" -Gradient estimators to numerically approximate gradients. -""" -import logging -import warnings -import numpy as np - -from .utils import batch_crossentropy -from . import nprng - - -class CoordinateWiseGradientEstimator(object): - """Implements a simple gradient-estimator using - the coordinate-wise finite-difference method. - - """ - - def __init__(self, epsilon, clip=True): - self._epsilon = epsilon - self.clip = clip - - def _get_noise(self, shape, dtype): - N = np.prod(shape) - noise = np.eye(N, N, dtype=dtype) - noise = noise.reshape((N,) + shape) - noise = np.concatenate([noise, -noise]) - return noise - - def __call__(self, pred_fn, x, label, bounds): - noise = self._get_noise(x.shape, x.dtype) - N = len(noise) - - min_, max_ = bounds - scaled_epsilon = self._epsilon * (max_ - min_) - - theta = x + scaled_epsilon * noise - if self.clip: - theta = np.clip(theta, min_, max_) - logits = pred_fn(theta) - assert len(logits) == N - loss = batch_crossentropy(label, logits) - assert loss.shape == (N,) - - loss = loss.reshape((N,) + (1,) * x.ndim) - assert loss.ndim == noise.ndim - gradient = np.sum(loss * noise, axis=0) - gradient /= 2 * scaled_epsilon - return gradient - - -class EvolutionaryStrategiesGradientEstimator(object): - """Implements gradient estimation using evolution strategies. - - This gradient estimator is based on work from [1]_ and [2]_. - - References - ---------- - .. [1] Andrew Ilyas, Logan Engstrom, Anish Athalye, Jessy Lin, - "Black-box Adversarial Attacks with Limited Queries and - Information", https://arxiv.org/abs/1804.08598 - .. [2] Daan Wierstra, Tom Schaul, Jan Peters, Jürgen Schmidhuber, - "Natural evolution strategies", - http://people.idsia.ch/~tom/publications/nes.pdf - - """ - - def __init__(self, epsilon, samples=100, clip=True): - self._epsilon = epsilon - if samples % 2 != 0: # pragma: no cover - warnings.warn('antithetic sampling: samples should be even') - self._samples = (samples // 2) * 2 - self.clip = clip - - def _get_noise(self, shape, dtype): - samples = self._samples - assert samples % 2 == 0 - shape = (samples // 2,) + shape - noise = nprng.normal(size=shape).astype(np.float32) - noise = np.concatenate([noise, -noise]) - return noise - - def __call__(self, pred_fn, x, label, bounds): - noise = self._get_noise(x.shape, x.dtype) - N = len(noise) - - if N >= 2 * x.size: # pragma: no cover - logging.info('CoordinateWiseGradientEstimator might be better' - ' without requiring more samples.') - - min_, max_ = bounds - scaled_epsilon = self._epsilon * (max_ - min_) - - theta = x + scaled_epsilon * noise - if self.clip: - theta = np.clip(theta, min_, max_) - logits = pred_fn(theta) - assert len(logits) == N - loss = batch_crossentropy(label, logits) - assert loss.shape == (N,) - - loss = loss.reshape((N,) + (1,) * x.ndim) - assert loss.ndim == noise.ndim - gradient = np.mean(loss * noise, axis=0) - gradient /= 2 * scaled_epsilon - return gradient -import random -import numpy as np - -rng = random.Random() -nprng = np.random.RandomState() - - -def set_seeds(seed): - """Sets the seeds of both random number generators used by Foolbox. - - Parameters - ---------- - seed : int - The seed for both random number generators. - - """ - rng.seed(seed) - nprng.seed(seed) -import os - -import numpy as np - - -def softmax(logits): - """Transforms predictions into probability values. - - Parameters - ---------- - logits : array_like - The logits predicted by the model. - - Returns - ------- - `numpy.ndarray` - Probability values corresponding to the logits. - """ - - assert logits.ndim == 1 - - # for numerical reasons we subtract the max logit - # (mathematically it doesn't matter!) - # otherwise exp(logits) might become too large or too small - logits = logits - np.max(logits) - e = np.exp(logits) - return e / np.sum(e) - - -def crossentropy(label, logits): - """Calculates the cross-entropy. - - Parameters - ---------- - logits : array_like - The logits predicted by the model. - label : int - The label describing the target distribution. - - Returns - ------- - float - The cross-entropy between softmax(logits) and onehot(label). - - """ - - assert logits.ndim == 1 - - # for numerical reasons we subtract the max logit - # (mathematically it doesn't matter!) - # otherwise exp(logits) might become too large or too small - logits = logits - np.max(logits) - e = np.exp(logits) - s = np.sum(e) - ce = np.log(s) - logits[label] - return ce - - -def batch_crossentropy(label, logits): - """Calculates the cross-entropy for a batch of logits. - - Parameters - ---------- - logits : array_like - The logits predicted by the model for a batch of inputs. - label : int - The label describing the target distribution. - - Returns - ------- - np.ndarray - The cross-entropy between softmax(logits[i]) and onehot(label) - for all i. - - """ - - assert logits.ndim == 2 - - # for numerical reasons we subtract the max logit - # (mathematically it doesn't matter!) - # otherwise exp(logits) might become too large or too small - logits = logits - np.max(logits, axis=1, keepdims=True) - e = np.exp(logits) - s = np.sum(e, axis=1) - ces = np.log(s) - logits[:, label] - return ces - - -def binarize(x, values, threshold=None, included_in='upper'): - """Binarizes the values of x. - - Parameters - ---------- - values : tuple of two floats - The lower and upper value to which the inputs are mapped. - threshold : float - The threshold; defaults to (values[0] + values[1]) / 2 if None. - included_in : str - Whether the threshold value itself belongs to the lower or - upper interval. - - """ - lower, upper = values - - if threshold is None: - threshold = (lower + upper) / 2. - - x = x.copy() - if included_in == 'lower': - x[x <= threshold] = lower - x[x > threshold] = upper - elif included_in == 'upper': - x[x < threshold] = lower - x[x >= threshold] = upper - else: - raise ValueError('included_in must be "lower" or "upper"') - return x - - -def imagenet_example(shape=(224, 224), data_format='channels_last'): - """ Returns an example image and its imagenet class label. - - Parameters - ---------- - shape : list of integers - The shape of the returned image. - data_format : str - "channels_first" or "channels_last" - - Returns - ------- - image : array_like - The example image. - - label : int - The imagenet label associated with the image. - - NOTE: This function is deprecated and will be removed in the future. - """ - assert len(shape) == 2 - assert data_format in ['channels_first', 'channels_last'] - - from PIL import Image - path = os.path.join(os.path.dirname(__file__), 'example.png') - image = Image.open(path) - image = image.resize(shape) - image = np.asarray(image, dtype=np.float32) - image = image[:, :, :3] - assert image.shape == shape + (3,) - if data_format == 'channels_first': - image = np.transpose(image, (2, 0, 1)) - return image, 282 - - -def samples(dataset='imagenet', index=0, batchsize=1, shape=(224, 224), - data_format='channels_last'): - ''' Returns a batch of example images and the corresponding labels - - Parameters - ---------- - dataset : string - The data set to load (options: imagenet, mnist, cifar10, - cifar100, fashionMNIST) - index : int - For each data set 20 example images exist. The returned batch - contains the images with index [index, index + 1, index + 2, ...] - batchsize : int - Size of batch. - shape : list of integers - The shape of the returned image (only relevant for Imagenet). - data_format : str - "channels_first" or "channels_last" - - Returns - ------- - images : array_like - The batch of example images - - labels : array of int - The labels associated with the images. - - ''' - from PIL import Image - - images, labels = [], [] - basepath = os.path.dirname(__file__) - samplepath = os.path.join(basepath, 'data') - files = os.listdir(samplepath) - - for idx in range(index, index + batchsize): - i = idx % 20 - - # get filename and label - file = [n for n in files if '{}_{:02d}_'.format(dataset, i) in n][0] - label = int(file.split('.')[0].split('_')[-1]) - - # open file - path = os.path.join(samplepath, file) - image = Image.open(path) - - if dataset == 'imagenet': - image = image.resize(shape) - - image = np.asarray(image, dtype=np.float32) - - if dataset != 'mnist' and data_format == 'channels_first': - image = np.transpose(image, (2, 0, 1)) - - images.append(image) - labels.append(label) - - labels = np.array(labels) - images = np.stack(images) - return images, labels - - -def onehot_like(a, index, value=1): - """Creates an array like a, with all values - set to 0 except one. - - Parameters - ---------- - a : array_like - The returned one-hot array will have the same shape - and dtype as this array - index : int - The index that should be set to `value` - value : single value compatible with a.dtype - The value to set at the given index - - Returns - ------- - `numpy.ndarray` - One-hot array with the given value at the given - location and zeros everywhere else. - - """ - - x = np.zeros_like(a) - x[index] = value - return x -# flake8: noqa - -from .base import Attack -from .gradient import GradientAttack, GradientSignAttack, FGSM -from .iterative_gradient import IterativeGradientAttack, IterativeGradientSignAttack -from .lbfgs import LBFGSAttack, ApproximateLBFGSAttack -from .deepfool import DeepFoolAttack, DeepFoolL2Attack, DeepFoolLinfinityAttack -from .saliency import SaliencyMapAttack -from .blur import GaussianBlurAttack -from .contrast import ContrastReductionAttack -from .localsearch import SinglePixelAttack, LocalSearchAttack -from .slsqp import SLSQPAttack -from .additive_noise import AdditiveNoiseAttack, AdditiveUniformNoiseAttack, AdditiveGaussianNoiseAttack -from .blended_noise import BlendedUniformNoiseAttack -from .saltandpepper import SaltAndPepperNoiseAttack -from .precomputed import PrecomputedImagesAttack -from .boundary_attack import BoundaryAttack -from .pointwise import PointwiseAttack -from .binarization import BinarizationRefinementAttack -from .newtonfool import NewtonFoolAttack -from .adef_attack import ADefAttack -from .spatial import SpatialAttack -from .carlini_wagner import CarliniWagnerL2Attack -from .decoupled_direction_norm import DecoupledDirectionNormL2Attack -from .bapp import BoundaryAttackPlusPlus - -from .iterative_projected_gradient import LinfinityBasicIterativeAttack, BasicIterativeMethod, BIM -from .iterative_projected_gradient import L1BasicIterativeAttack -from .iterative_projected_gradient import L2BasicIterativeAttack -from .iterative_projected_gradient import ProjectedGradientDescentAttack, ProjectedGradientDescent, PGD -from .iterative_projected_gradient import RandomStartProjectedGradientDescentAttack, RandomProjectedGradientDescent, RandomPGD -from .iterative_projected_gradient import MomentumIterativeAttack, MomentumIterativeMethod -from abc import abstractmethod -from collections import Iterable - -import numpy as np - -from .base import Attack -from .base import call_decorator -from .. import nprng - - -class AdditiveNoiseAttack(Attack): - """Base class for attacks that add random noise to an image. - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, epsilons=1000): - """Adds uniform or Gaussian noise to the image, gradually increasing - the standard deviation until the image is misclassified. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - epsilons : int or Iterable[float] - Either Iterable of noise levels or number of noise levels - between 0 and 1 that should be tried. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - image = a.original_image - bounds = a.bounds() - min_, max_ = bounds - - if not isinstance(epsilons, Iterable): - epsilons = np.linspace(0, 1, num=epsilons + 1)[1:] - - for epsilon in epsilons: - noise = self._sample_noise(epsilon, image, bounds) - perturbed = image + epsilon * noise - perturbed = np.clip(perturbed, min_, max_) - - _, is_adversarial = a.predictions(perturbed) - if is_adversarial: - return - - @abstractmethod - def _sample_noise(self): - raise NotImplementedError - - -class AdditiveUniformNoiseAttack(AdditiveNoiseAttack): - """Adds uniform noise to the image, gradually increasing - the standard deviation until the image is misclassified. - - """ - - def _sample_noise(self, epsilon, image, bounds): - min_, max_ = bounds - w = epsilon * (max_ - min_) - noise = nprng.uniform(-w, w, size=image.shape) - noise = noise.astype(image.dtype) - return noise - - -class AdditiveGaussianNoiseAttack(AdditiveNoiseAttack): - """Adds Gaussian noise to the image, gradually increasing - the standard deviation until the image is misclassified. - - """ - - def _sample_noise(self, epsilon, image, bounds): - min_, max_ = bounds - std = epsilon / np.sqrt(3) * (max_ - min_) - noise = nprng.normal(scale=std, size=image.shape) - noise = noise.astype(image.dtype) - return noise -import logging - -from scipy.interpolate import RectBivariateSpline -from scipy.ndimage.filters import gaussian_filter -import numpy as np - -from .base import Attack -from .base import call_decorator - - -def _transpose_image(image): - # transpose the image so the color axis - # is at the front: image.shape is then c x h x w: - return np.transpose(image, (2, 0, 1)) - - -def _re_transpose_image(image): - # transpose the image back so the color axis - # is at the end: image.shape is then h x w x c: - return np.transpose(image, (1, 2, 0)) - - -def _difference_map(image, color_axis): - """Difference map of the image. - Approximate derivatives of the function image[c, :, :] - (e.g. PyTorch) or image[:, :, c] (e.g. Keras). - dfdx, dfdy = difference_map(image) - In: - image: numpy.ndarray - of shape C x h x w or h x w x C, with C = 1 or C = 3 - (color channels), h, w >= 3, and [type] is 'Float' or - 'Double'. Contains the values of functions f_b: - R ^ 2 -> R ^ C, b = 1, ..., B, on the grid - {0, ..., h - 1} x {0, ..., w - 1}. - Out: - dfdx: numpy.ndarray - dfdy: numpy.ndarray - of shape C x h x w or h x w x C contain the x and - y derivatives of f at the points on the grid, - approximated by central differences (except on - boundaries): - For c = 0, ... , C, i = 1, ..., h - 2, - j = 1, ..., w - 2. - e.g. for shape = c x h x w: - dfdx[c, i, j] = (image[c, i, j + 1] - - image[c, i, j - 1]) / 2 - dfdx[c, i, j] = (image[c, i + 1, j] - - image[c, i - 1, j]) / 2 - positive x-direction is along rows from left to right. - positive y-direction is along columns from above to below. - """ - - if color_axis == 2: - image = _transpose_image(image) - # Derivative in x direction (rows from left to right) - dfdx = np.zeros_like(image) - # forward difference in first column - dfdx[:, :, 0] = image[:, :, 1] - image[:, :, 0] - # backwards difference in last column - dfdx[:, :, -1] = image[:, :, -1] - image[:, :, -2] - # central difference elsewhere - dfdx[:, :, 1:-1] = 0.5 * (image[:, :, 2:] - image[:, :, :-2]) - - # Derivative in y direction (columns from above to below) - dfdy = np.zeros_like(image) - # forward difference in first row - dfdy[:, 0, :] = image[:, 1, :] - image[:, 0, :] - # backwards difference in last row - dfdy[:, -1, :] = image[:, -1, :] - image[:, -2, :] - # central difference elsewhere - dfdy[:, 1:-1, :] = 0.5 * (image[:, 2:, :] - image[:, :-2, :]) - - return dfdx, dfdy - - -def _compose(image, vec_field, color_axis): - """Calculate the composition of the function image with the vector - field vec_field by interpolation. - new_func = compose(image, vec_field) - In: - image: numpy.ndarray - of shape C x h x w with C = 3 or C = 1 (color channels), - h, w >= 2, and [type] = 'Float' or 'Double'. - Contains the values of a function f: R ^ 2 -> R ^ C - on the grid {0, ..., h - 1} x {0, ..., w - 1}. - vec_field: numpy.array - of shape (h, w, 2) - vec_field[y, x, 0] is the x-coordinate of the vector vec_field[y, x] - vec_field[y, x, 1] is the y-coordinate of the vector vec_field[y, x] - positive x-direction is along rows from left to right - positive y-direction is along columns from above to below - """ - - if color_axis == 2: - image = _transpose_image(image) - - c, h, w = image.shape # colors, height, width - hrange = np.arange(h) - wrange = np.arange(w) - MGx, MGy = np.meshgrid(wrange, hrange) - - defMGx = (MGx + vec_field[:, :, 0]).clip(0, w - 1) - defMGy = (MGy + vec_field[:, :, 1]).clip(0, h - 1) - - new_image = np.empty_like(image) - - for channel in range(c): - # Get a linear interpolation for this color channel. - interpolation = RectBivariateSpline(hrange, wrange, image[channel], - kx=1, ky=1) - - # grid = False since the deformed grid is irregular - new_image[channel] = interpolation(defMGy, defMGx, grid=False) - if color_axis == 2: - return _re_transpose_image(new_image) - else: - return new_image - - -def _create_vec_field(fval, gradf, d1x, d2x, color_axis, smooth=0): - """Calculate the deformation vector field - In: - fval: float - gradf: numpy.ndarray - of shape C x h x w with C = 3 or C = 1 - (color channels), h, w >= 1. - d1x: numpy.ndarray - of shape C x h x w and [type] = 'Float' or 'Double'. - d2x: numpy.ndarray - of shape C x h x w and [type] = 'Float' or 'Double'. - smooth: float - Width of the Gaussian kernel used for smoothing - (default is 0 for no smoothing). - Out: - vec_field: numpy.ndarray - of shape (2, h, w). - """ - - if color_axis == 2: - gradf = _transpose_image(gradf) - - c, h, w = gradf.shape # colors, height, width - - # Sum over color channels - alpha1 = np.sum(gradf * d1x, axis=0) - alpha2 = np.sum(gradf * d2x, axis=0) - - norm_squared_alpha = (alpha1 ** 2).sum() + (alpha2 ** 2).sum() - - # Smoothing - if smooth > 0: - alpha1 = gaussian_filter(alpha1, smooth) - alpha2 = gaussian_filter(alpha2, smooth) - norm_squared_alpha = (alpha1 ** 2).sum() + (alpha2 ** 2).sum() - # In theory, we need to apply the filter a second time. - alpha1 = gaussian_filter(alpha1, smooth) - alpha2 = gaussian_filter(alpha2, smooth) - - vec_field = np.empty((h, w, 2)) - vec_field[:, :, 0] = -fval * alpha1 / norm_squared_alpha - vec_field[:, :, 1] = -fval * alpha2 / norm_squared_alpha - - return vec_field - - -class ADefAttack(Attack): - """Adversarial attack that distorts the image, i.e. changes the locations - of pixels. The algorithm is described in [1]_, - a Repository with the original code can be found in [2]_. - References - ---------- - .. [1]_ Rima Alaifari, Giovanni S. Alberti, and Tandri Gauksson: - "ADef: an Iterative Algorithm to Construct Adversarial - Deformations", https://arxiv.org/abs/1804.07729 - .. [2]_ https://gitlab.math.ethz.ch/tandrig/ADef/tree/master - """ - - def _initialize(self): - self.vector_field = None - - @call_decorator - def __call__(self, input_or_adv, unpack=True, max_iter=100, - max_norm=np.inf, label=None, smooth=1.0, subsample=10): - """Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - max_iter : int > 0 - Maximum number of iterations (default max_iter = 100). - max_norm : float - Maximum l2 norm of vector field (default max_norm = numpy.inf). - smooth : float >= 0 - Width of the Gaussian kernel used for smoothing. - (default is smooth = 0 for no smoothing). - subsample : int >= 2 - Limit on the number of the most likely classes that should - be considered. A small value is usually sufficient and much - faster. (default subsample = 10) - """ - a = input_or_adv - del input_or_adv - del label - del unpack - - if not a.has_gradient(): - return - - perturbed = a.original_image.copy() # is updated in every iteration - - # image_original is not updated, but kept as a copy - image_original = a.original_image.copy() - target_class = a.target_class() - targeted = target_class is not None - original_label = a.original_class - - # ADef targets classes according to their prediction score. If the - # attack is untargeted, ADef will take the labels of the top - # 'subsample' classes without the top class. The top class is - # the class with the highest probability and not among the targets. - # Using a 'subsample' of classes is faster than taking all the - # remaining 999 classes of ImageNet into account. For a targeted - # attack, it is necessary to find the probability of the target - # class and pass this index to ind_of_candidates (not the actual - # target). - if targeted: - logits, _ = a.predictions(perturbed) - pred_sorted = (-logits).argsort() - index_of_target_class, = np.where(pred_sorted == target_class) - ind_of_candidates = index_of_target_class - else: - # choose the top-k classes - logging.info('Only testing the top-{} classes'.format(subsample)) - assert isinstance(subsample, int) - assert subsample >= 2 - ind_of_candidates = np.arange(1, subsample) - - # Number of classes to target - num_classes = ind_of_candidates.size - - n = 0 # iteration number - - color_axis = a.channel_axis(batch=False) # get color axis - assert color_axis in [0, 2] - hw = [perturbed.shape[i] for i in range(perturbed.ndim) - if i != color_axis] - h, w = hw - - logits, is_adv = a.predictions(perturbed) - - # Indices of the 'num_classes' highest values in descending order: - candidates = np.argsort(-logits)[ind_of_candidates] - - # fx[lab] is negative if the model prefers the original label - # for x over the label 'lab'. - fx = logits - logits[original_label] - - norm_full = 0 # norm of the vector field - vec_field_full = np.zeros((h, w, 2)) # the vector field - - current_label = original_label - logging.info('Iterations finished: 0') - logging.info('Current label: {} '.format(current_label)) - - for step in range(max_iter): - n += 1 - _, is_adv = a.predictions(perturbed) - if is_adv: - a.predictions(perturbed) - logging.info( - 'Image successfully deformed from {} to {}'.format( - original_label, current_label)) - self.vector_field = vec_field_full - return - - d1x, d2x = _difference_map(perturbed, color_axis) - - logits_for_grad = np.zeros_like(logits) - logits_for_grad[original_label] = 1 - - grad_original = a.backward(logits_for_grad, perturbed) - - # Find vector fields for the image and each candidate label. - # Keep the smallest vector field for each image. - norm_min = np.inf - - # iterate over all candidate classes - for target_no in range(num_classes): - - target_label = candidates[target_no] - logits_for_grad = np.zeros_like(logits) - logits_for_grad[target_label] = 1 - - # gradient of the target label w.r.t. image - grad_target = a.backward(logits_for_grad, perturbed) - - # Derivative of the binary classifier 'F_lab - F_orig' - dfx = grad_target - grad_original - f_im = fx[target_label] - - # create the vector field - vec_field_target = _create_vec_field( - f_im, dfx, d1x, d2x, color_axis, smooth) - - vec_field_target += vec_field_full - - # l2 norm of vector field. - norm_target = np.linalg.norm(vec_field_target.ravel()) - - # choose the vector field with the smallest norm - if norm_target < norm_min: - norm_min = norm_target - vec_field_min = vec_field_target - - # Update the image by applying the vector field, - # the vector field is always applied to the original image, - # since the current vector field is added to all prior - # vector fields via vec_field_target += vec_field_full - perturbed = _compose(image_original.copy(), vec_field_min, - color_axis) - - vec_field_full = vec_field_min - norm_full = norm_min - - # getting the current label after applying the vector field - logits, _ = a.predictions(perturbed) - current_label = np.argmax(logits) - fx = logits - logits[current_label] - - logging.info('Iterations finished: {} '.format(n)) - logging.info('Current label: {} '.format(current_label)) - logging.info('Norm vector field: {} '.format(norm_full)) - - logits, _ = a.predictions(perturbed) - current_label = np.argmax(logits) - logging.info('{} -> {}'.format(original_label, current_label)) - - a.predictions(perturbed) - - self.vector_field = vec_field_full - return -from __future__ import print_function -from __future__ import division - -import warnings -import time -import sys - -from .base import Attack -from .base import call_decorator -from ..distances import MSE, Linf -import numpy as np -import math - - -class BoundaryAttackPlusPlus(Attack): - """A powerful adversarial attack that requires neither gradients - nor probabilities. - - Notes - ----- - Features: - * ability to switch between two types of distances: MSE and Linf. - * ability to continue previous attacks by passing an instance of the - Adversarial class - * ability to pass an explicit starting point; especially to initialize - a targeted attack - * ability to pass an alternative attack used for initialization - * ability to specify the batch size - - References - ---------- - .. - Boundary Attack ++ was originally proposed by Chen and Jordan. - It is a decision-based attack that requires access to output - labels of a model alone. - Paper link: https://arxiv.org/abs/1904.02144 - The implementation in Foolbox is based on Boundary Attack. - - """ - - @call_decorator - def __call__( - self, - input_or_adv, - label=None, - unpack=True, - iterations=64, - initial_num_evals=100, - max_num_evals=10000, - stepsize_search='grid_search', - gamma=0.01, - starting_point=None, - batch_size=256, - internal_dtype=np.float64, - log_every_n_steps=1, - verbose=False): - """Applies Boundary Attack++. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, correctly classified image. If image is a - numpy array, label must be passed as well. If image is - an :class:`Adversarial` instance, label must not be passed. - label : int - The reference label of the original image. Must be passed - if image is a numpy array, must not be passed if image is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial image, otherwise returns - the Adversarial object. - iterations : int - Number of iterations to run. - initial_num_evals: int - Initial number of evaluations for gradient estimation. - Larger initial_num_evals increases time efficiency, but - may decrease query efficiency. - max_num_evals: int - Maximum number of evaluations for gradient estimation. - stepsize_search: str - How to search for stepsize; choices are 'geometric_progression', - 'grid_search'. 'geometric progression' initializes the stepsize - by ||x_t - x||_p / sqrt(iteration), and keep decreasing by half - until reaching the target side of the boundary. 'grid_search' - chooses the optimal epsilon over a grid, in the scale of - ||x_t - x||_p. - gamma: float - The binary search threshold theta is gamma / sqrt(d) for - l2 attack and gamma / d for linf attack. - - starting_point : `numpy.ndarray` - Adversarial input to use as a starting point, required - for targeted attacks. - batch_size : int - Batch size for model prediction. - internal_dtype : np.float32 or np.float64 - Higher precision might be slower but is numerically more stable. - log_every_n_steps : int - Determines verbositity of the logging. - verbose : bool - Controls verbosity of the attack. - - """ - - self.initial_num_evals = initial_num_evals - self.max_num_evals = max_num_evals - self.stepsize_search = stepsize_search - self.gamma = gamma - self.batch_size = batch_size - self.verbose = verbose - self._starting_point = starting_point - self.internal_dtype = internal_dtype - self.log_every_n_steps = log_every_n_steps - self.verbose = verbose - - # Set constraint based on the distance. - if self._default_distance == MSE: - self.constraint = 'l2' - elif self._default_distance == Linf: - self.constraint = 'linf' - - # Set binary search threshold. - self.shape = input_or_adv.original_image.shape - self.d = np.prod(self.shape) - if self.constraint == 'l2': - self.theta = self.gamma / np.sqrt(self.d) - else: - self.theta = self.gamma / (self.d) - print('Boundary Attack ++ optimized for {} distance'.format( - self.constraint)) - - if not verbose: - print('run with verbose=True to see details') - - return self.attack( - input_or_adv, - iterations=iterations) - - def attack( - self, - a, - iterations): - """ - iterations : int - Maximum number of iterations to run. - """ - self.t_initial = time.time() - - # =========================================================== - # Increase floating point precision - # =========================================================== - - self.external_dtype = a.original_image.dtype - - assert self.internal_dtype in [np.float32, np.float64] - assert self.external_dtype in [np.float32, np.float64] - - assert not (self.external_dtype == np.float64 and - self.internal_dtype == np.float32) - - a.set_distance_dtype(self.internal_dtype) - - # =========================================================== - # Construct batch decision function with binary output. - # =========================================================== - # decision_function = lambda x: a.batch_predictions( - # x.astype(self.external_dtype), strict=False)[1] - def decision_function(x): - outs = [] - num_batchs = int(math.ceil(len(x) * 1.0 / self.batch_size)) - for j in range(num_batchs): - current_batch = x[self.batch_size * j: - self.batch_size * (j + 1)] - current_batch = current_batch.astype(self.external_dtype) - out = a.batch_predictions(current_batch, strict=False)[1] - outs.append(out) - outs = np.concatenate(outs, axis=0) - return outs - - # =========================================================== - # intialize time measurements - # =========================================================== - self.time_gradient_estimation = 0 - - self.time_search = 0 - - self.time_initialization = 0 - - # =========================================================== - # Initialize variables, constants, hyperparameters, etc. - # =========================================================== - - # make sure repeated warnings are shown - warnings.simplefilter('always', UserWarning) - - # get bounds - bounds = a.bounds() - self.clip_min, self.clip_max = bounds - - # =========================================================== - # Find starting point - # =========================================================== - - self.initialize_starting_point(a) - - if a.image is None: - warnings.warn( - 'Initialization failed.' - ' it might be necessary to pass an explicit starting' - ' point.') - return - - self.time_initialization += time.time() - self.t_initial - - assert a.image.dtype == self.external_dtype - # get original and starting point in the right format - original = a.original_image.astype(self.internal_dtype) - perturbed = a.image.astype(self.internal_dtype) - - # =========================================================== - # Iteratively refine adversarial - # =========================================================== - t0 = time.time() - - # Project the initialization to the boundary. - perturbed, dist_post_update = self.binary_search_batch( - original, np.expand_dims(perturbed, 0), decision_function) - - dist = self.compute_distance(perturbed, original) - - distance = a.distance.value - self.time_search += time.time() - t0 - - # log starting point - self.log_step(0, distance) - - for step in range(1, iterations + 1): - - t0 = time.time() - - # =========================================================== - # Gradient direction estimation. - # =========================================================== - # Choose delta. - delta = self.select_delta(dist_post_update, step) - - # Choose number of evaluations. - num_evals = int(min([self.initial_num_evals * np.sqrt(step), - self.max_num_evals])) - - # approximate gradient. - gradf = self.approximate_gradient(decision_function, perturbed, - num_evals, delta) - - if self.constraint == 'linf': - update = np.sign(gradf) - else: - update = gradf - t1 = time.time() - self.time_gradient_estimation += t1 - t0 - - # =========================================================== - # Update, and binary search back to the boundary. - # =========================================================== - if self.stepsize_search == 'geometric_progression': - # find step size. - epsilon = self.geometric_progression_for_stepsize( - perturbed, update, dist, decision_function, step) - - # Update the sample. - perturbed = self.clip_image(perturbed + epsilon * update, - self.clip_min, self.clip_max) - - # Binary search to return to the boundary. - perturbed, dist_post_update = self.binary_search_batch( - original, perturbed[None], decision_function) - - elif self.stepsize_search == 'grid_search': - # Grid search for stepsize. - epsilons = np.logspace(-4, 0, num=20, endpoint=True) * dist - epsilons_shape = [20] + len(self.shape) * [1] - perturbeds = perturbed + epsilons.reshape( - epsilons_shape) * update - perturbeds = self.clip_image(perturbeds, - self.clip_min, self.clip_max) - idx_perturbed = decision_function(perturbeds) - - if np.sum(idx_perturbed) > 0: - # Select the perturbation that yields the minimum - # distance after binary search. - perturbed, dist_post_update = self.binary_search_batch( - original, perturbeds[idx_perturbed], - decision_function) - t2 = time.time() - - self.time_search += t2 - t1 - - # compute new distance. - dist = self.compute_distance(perturbed, original) - - # =========================================================== - # Log the step - # =========================================================== - # Using foolbox definition of distance for logging. - if self.constraint == 'l2': - distance = dist ** 2 / self.d / \ - (self.clip_max - self.clip_min) ** 2 - elif self.constraint == 'linf': - distance = dist / (self.clip_max - self.clip_min) - message = ' (took {:.5f} seconds)'.format(t2 - t0) - self.log_step(step, distance, message) - sys.stdout.flush() - - # =========================================================== - # Log overall runtime - # =========================================================== - - self.log_time() - - # =============================================================== - # - # Other methods - # - # =============================================================== - - def initialize_starting_point(self, a): - starting_point = self._starting_point - - if a.image is not None: - print( - 'Attack is applied to a previously found adversarial.' - ' Continuing search for better adversarials.') - if starting_point is not None: # pragma: no cover - warnings.warn( - 'Ignoring starting_point parameter because the attack' - ' is applied to a previously found adversarial.') - return - - if starting_point is not None: - a.predictions(starting_point) - assert a.image is not None, ('Invalid starting point provided.' - ' Please provide a starting point' - ' that is adversarial.') - return - - """ - Apply BlendedUniformNoiseAttack if without - initialization. - Efficient Implementation of BlendedUniformNoiseAttack in Foolbox. - """ - success = 0 - num_evals = 0 - - while True: - random_noise = np.random.uniform(self.clip_min, self.clip_max, - size=self.shape) - _, success = a.predictions( - random_noise.astype(self.external_dtype)) - num_evals += 1 - if success: - break - if num_evals > 1e4: - return - - # Binary search to minimize l2 distance to original image. - low = 0.0 - high = 1.0 - while high - low > 0.001: - mid = (high + low) / 2.0 - blended = (1 - mid) * a.original_image + mid * random_noise - _, success = a.predictions(blended.astype(self.external_dtype)) - if success: - high = mid - else: - low = mid - - def compute_distance(self, image1, image2): - if self.constraint == 'l2': - return np.linalg.norm(image1 - image2) - elif self.constraint == 'linf': - return np.max(abs(image1 - image2)) - - def clip_image(self, image, clip_min, clip_max): - """ Clip an image, or an image batch, - with upper and lower threshold. """ - return np.minimum(np.maximum(clip_min, image), clip_max) - - def project(self, original_image, perturbed_images, alphas): - """ Projection onto given l2 / linf balls in a batch. """ - alphas_shape = [len(alphas)] + [1] * len(self.shape) - alphas = alphas.reshape(alphas_shape) - if self.constraint == 'l2': - projected = (1 - alphas) * original_image + \ - alphas * perturbed_images - elif self.constraint == 'linf': - projected = self.clip_image( - perturbed_images, - original_image - alphas, - original_image + alphas - ) - return projected - - def binary_search_batch(self, original_image, perturbed_images, - decision_function): - """ Binary search to approach the boundary. """ - - # Compute distance between each of perturbed image and original image. - dists_post_update = np.array( - [self.compute_distance(original_image, - perturbed_image) for perturbed_image in - perturbed_images]) - - # Choose upper thresholds in binary searchs based on constraint. - if self.constraint == 'linf': - highs = dists_post_update - # Stopping criteria. - thresholds = np.minimum(dists_post_update * self.theta, - self.theta) - else: - highs = np.ones(len(perturbed_images)) - thresholds = self.theta - - lows = np.zeros(len(perturbed_images)) - - # Call recursive function. - while np.max((highs - lows) / thresholds) > 1: - # projection to mids. - mids = (highs + lows) / 2.0 - mid_images = self.project(original_image, perturbed_images, - mids) - - # Update highs and lows based on model decisions. - decisions = decision_function(mid_images) - lows = np.where(decisions == 0, mids, lows) - highs = np.where(decisions == 1, mids, highs) - - out_images = self.project(original_image, perturbed_images, - highs) - - # Compute distance of the output image to select the best choice. - # (only used when stepsize_search is grid_search.) - dists = np.array([ - self.compute_distance( - original_image, - out_image - ) - for out_image in out_images]) - idx = np.argmin(dists) - - dist = dists_post_update[idx] - out_image = out_images[idx] - return out_image, dist - - def select_delta(self, dist_post_update, current_iteration): - """ - Choose the delta at the scale of distance - between x and perturbed sample. - """ - if current_iteration == 1: - delta = 0.1 * (self.clip_max - self.clip_min) - else: - if self.constraint == 'l2': - delta = np.sqrt(self.d) * self.theta * dist_post_update - elif self.constraint == 'linf': - delta = self.d * self.theta * dist_post_update - - return delta - - def approximate_gradient(self, decision_function, sample, - num_evals, delta): - """ Gradient direction estimation """ - # Generate random vectors. - noise_shape = [num_evals] + list(self.shape) - if self.constraint == 'l2': - rv = np.random.randn(*noise_shape) - elif self.constraint == 'linf': - rv = np.random.uniform(low=-1, high=1, size=noise_shape) - - axis = tuple(range(1, 1 + len(self.shape))) - rv = rv / np.sqrt(np.sum(rv ** 2, axis=axis, keepdims=True)) - perturbed = sample + delta * rv - perturbed = self.clip_image(perturbed, self.clip_min, - self.clip_max) - rv = (perturbed - sample) / delta - - # query the model. - decisions = decision_function(perturbed) - decision_shape = [len(decisions)] + [1] * len(self.shape) - fval = 2 * decisions.astype(self.internal_dtype).reshape( - decision_shape) - 1.0 - - # Baseline subtraction (when fval differs) - vals = fval if abs(np.mean(fval)) == 1.0 else fval - np.mean(fval) - gradf = np.mean(vals * rv, axis=0) - - # Get the gradient direction. - gradf = gradf / np.linalg.norm(gradf) - - return gradf - - def geometric_progression_for_stepsize(self, x, update, dist, - decision_function, - current_iteration): - """ Geometric progression to search for stepsize. - Keep decreasing stepsize by half until reaching - the desired side of the boundary. - """ - epsilon = dist / np.sqrt(current_iteration) - while True: - updated = self.clip_image(x + epsilon * update, - self.clip_min, self.clip_max) - success = decision_function(updated[None])[0] - if success: - break - else: - epsilon = epsilon / 2.0 - - return epsilon - - def log_step(self, step, distance, message='', always=False): - if not always and step % self.log_every_n_steps != 0: - return - print('Step {}: {:.5e} {}'.format( - step, - distance, - message)) - - def log_time(self): - t_total = time.time() - self.t_initial - rel_initialization = self.time_initialization / t_total - rel_gradient_estimation = self.time_gradient_estimation / t_total - rel_search = self.time_search / t_total - - self.printv('Time since beginning: {:.5f}'.format(t_total)) - self.printv(' {:2.1f}% for initialization ({:.5f})'.format( - rel_initialization * 100, self.time_initialization)) - self.printv(' {:2.1f}% for gradient estimation ({:.5f})'.format( - rel_gradient_estimation * 100, - self.time_gradient_estimation)) - self.printv(' {:2.1f}% for search ({:.5f})'.format( - rel_search * 100, self.time_search)) - - def printv(self, *args, **kwargs): - if self.verbose: - print(*args, **kwargs) -from ..distances import MSE -from ..criteria import Misclassification -from ..adversarial import StopAttack -from ..adversarial import Adversarial -import warnings -import logging -import functools -import sys -import abc -abstractmethod = abc.abstractmethod - -if sys.version_info >= (3, 4): - ABC = abc.ABC -else: # pragma: no cover - ABC = abc.ABCMeta('ABC', (), {}) - - -class Attack(ABC): - """Abstract base class for adversarial attacks. - - The :class:`Attack` class represents an adversarial attack that searches - for adversarial examples. It should be subclassed when implementing new - attacks. - - Parameters - ---------- - model : a :class:`Model` instance - The model that should be fooled by the adversarial. - Ignored if the attack is called with an :class:`Adversarial` instance. - criterion : a :class:`Criterion` instance - The criterion that determines which images are adversarial. - Ignored if the attack is called with an :class:`Adversarial` instance. - distance : a :class:`Distance` class - The measure used to quantify similarity between images. - Ignored if the attack is called with an :class:`Adversarial` instance. - threshold : float or :class:`Distance` - If not None, the attack will stop as soon as the adversarial - perturbation has a size smaller than this threshold. Can be - an instance of the :class:`Distance` class passed to the distance - argument, or a float assumed to have the same unit as the - the given distance. If None, the attack will simply minimize - the distance as good as possible. Note that the threshold only - influences early stopping of the attack; the returned adversarial - does not necessarily have smaller perturbation size than this - threshold; the `reached_threshold()` method can be used to check - if the threshold has been reached. - Ignored if the attack is called with an :class:`Adversarial` instance. - - Notes - ----- - If a subclass overwrites the constructor, it should call the super - constructor with *args and **kwargs. - - """ - - def __init__(self, - model=None, criterion=Misclassification(), - distance=MSE, threshold=None): - self._default_model = model - self._default_criterion = criterion - self._default_distance = distance - self._default_threshold = threshold - - # to customize the initialization in subclasses, please - # try to overwrite _initialize instead of __init__ if - # possible - self._initialize() - - def _initialize(self): - """Additional initializer that can be overwritten by - subclasses without redefining the full __init__ method - including all arguments and documentation.""" - pass - - @abstractmethod - def __call__(self, input_or_adv, label=None, unpack=True, **kwargs): - raise NotImplementedError - - def name(self): - """Returns a human readable name that uniquely identifies - the attack with its hyperparameters. - - Returns - ------- - str - Human readable name that uniquely identifies the attack - with its hyperparameters. - - Notes - ----- - Defaults to the class name but subclasses can provide more - descriptive names and must take hyperparameters into account. - - """ - return self.__class__.__name__ - - -def call_decorator(call_fn): - @functools.wraps(call_fn) - def wrapper(self, input_or_adv, label=None, unpack=True, **kwargs): - assert input_or_adv is not None - - if isinstance(input_or_adv, Adversarial): - a = input_or_adv - if label is not None: - raise ValueError('Label must not be passed when input_or_adv' - ' is an Adversarial instance') - else: - if label is None: - raise ValueError('Label must be passed when input_or_adv is' - ' not an Adversarial instance') - else: - model = self._default_model - criterion = self._default_criterion - distance = self._default_distance - threshold = self._default_threshold - if model is None or criterion is None: - raise ValueError('The attack needs to be initialized' - ' with a model and a criterion or it' - ' needs to be called with an Adversarial' - ' instance.') - a = Adversarial(model, criterion, input_or_adv, label, - distance=distance, threshold=threshold) - - assert a is not None - - if a.distance.value == 0.: - warnings.warn('Not running the attack because the original input' - ' is already misclassified and the adversarial thus' - ' has a distance of 0.') - elif a.reached_threshold(): - warnings.warn('Not running the attack because the given treshold' - ' is already reached') - else: - try: - _ = call_fn(self, a, label=None, unpack=None, **kwargs) - assert _ is None, 'decorated __call__ method must return None' - except StopAttack: - # if a threshold is specified, StopAttack will be thrown - # when the treshold is reached; thus we can do early - # stopping of the attack - logging.info('threshold reached, stopping attack') - - if a.image is None: - warnings.warn('{} did not find an adversarial, maybe the model' - ' or the criterion is not supported by this' - ' attack.'.format(self.name())) - - if unpack: - return a.image - else: - return a - - return wrapper -import numpy as np -import warnings -import logging - -from .base import Attack -from .base import call_decorator - - -class BinarizationRefinementAttack(Attack): - """For models that preprocess their inputs by binarizing the - inputs, this attack can improve adversarials found by other - attacks. It does os by utilizing information about the - binarization and mapping values to the corresponding value in - the clean input or to the right side of the threshold. - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - starting_point=None, - threshold=None, included_in='upper'): - """For models that preprocess their inputs by binarizing the - inputs, this attack can improve adversarials found by other - attacks. It does os by utilizing information about the - binarization and mapping values to the corresponding value in - the clean input or to the right side of the threshold. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - starting_point : `numpy.ndarray` - Adversarial input to use as a starting point. - threshold : float - The treshold used by the models binarization. If none, - defaults to (model.bounds()[1] - model.bounds()[0]) / 2. - included_in : str - Whether the threshold value itself belongs to the lower or - upper interval. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - self._starting_point = starting_point - self.initialize_starting_point(a) - - if a.image is None: - warnings.warn( - 'This attack can only be applied to an adversarial' - ' found by another attack, either by calling it with' - ' an Adversarial object or by passing a starting_point') - return - - assert a.image.dtype == a.original_image.dtype - dtype = a.original_image.dtype - - assert np.issubdtype(dtype, np.floating) - - min_, max_ = a.bounds() - - if threshold is None: - threshold = (min_ + max_) / 2. - - threshold = dtype.type(threshold) - offset = dtype.type(1.) - - if included_in == 'lower': - lower = threshold - upper = np.nextafter(threshold, threshold + offset) - elif included_in == 'upper': - lower = np.nextafter(threshold, threshold - offset) - upper = threshold - else: - raise ValueError('included_in must be "lower" or "upper"') - - logging.info('Intervals: [{}, {}] and [{}, {}]'.format( - min_, lower, upper, max_)) - - assert type(lower) == dtype.type - assert type(upper) == dtype.type - - assert lower < upper - - o = a.original_image - x = a.image - - p = np.full_like(o, np.nan) - - indices = np.logical_and(o <= lower, x <= lower) - p[indices] = o[indices] - - indices = np.logical_and(o <= lower, x >= upper) - p[indices] = upper - - indices = np.logical_and(o >= upper, x <= lower) - p[indices] = lower - - indices = np.logical_and(o >= upper, x >= upper) - p[indices] = o[indices] - - assert not np.any(np.isnan(p)) - - logging.info('distance before the {}: {}'.format( - self.__class__.__name__, a.distance)) - _, is_adversarial = a.predictions(p) - assert is_adversarial, ('The specified thresholding does not' - ' match what is done by the model.') - logging.info('distance after the {}: {}'.format( - self.__class__.__name__, a.distance)) - - def initialize_starting_point(self, a): - starting_point = self._starting_point - - if a.image is not None: - if starting_point is not None: # pragma: no cover - warnings.warn( - 'Ignoring starting_point because the attack' - ' is applied to a previously found adversarial.') - return - - if starting_point is not None: - a.predictions(starting_point) - assert a.image is not None, ('Invalid starting point provided.' - ' Please provide a starting point' - ' that is adversarial.') - return -import logging -import warnings -from collections import Iterable - -import numpy as np - -from .base import Attack -from .base import call_decorator -from .. import nprng - - -class BlendedUniformNoiseAttack(Attack): - """Blends the image with a uniform noise image until it - is misclassified. - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - epsilons=1000, max_directions=1000): - """Blends the image with a uniform noise image until it - is misclassified. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - epsilons : int or Iterable[float] - Either Iterable of blending steps or number of blending steps - between 0 and 1 that should be tried. - max_directions : int - Maximum number of random images to try. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - image = a.original_image - min_, max_ = a.bounds() - - if a.image is not None: # pragma: no cover - warnings.warn('BlendedUniformNoiseAttack started with' - ' previously found adversarial.') - - for j in range(max_directions): - # random noise images tend to be classified into the same class, - # so we might need to make very many draws if the original class - # is that one - random_image = nprng.uniform( - min_, max_, size=image.shape).astype(image.dtype) - _, is_adversarial = a.predictions(random_image) - if is_adversarial: - logging.info('Found adversarial image after {} ' - 'attempts'.format(j + 1)) - break - else: - # never breaked - warnings.warn('BlendedUniformNoiseAttack failed to draw a' - ' random image that is adversarial.') - - if not isinstance(epsilons, Iterable): - epsilons = np.linspace(0, 1, num=epsilons + 1)[1:] - - for epsilon in epsilons: - perturbed = (1 - epsilon) * image + epsilon * random_image - # due to limited floating point precision, - # clipping can be required - if not a.in_bounds(perturbed): # pragma: no cover - np.clip(perturbed, min_, max_, out=perturbed) - - _, is_adversarial = a.predictions(perturbed) - if is_adversarial: - return -import numpy as np -from collections import Iterable - -from scipy.ndimage.filters import gaussian_filter - -from .base import Attack -from .base import call_decorator - - -class GaussianBlurAttack(Attack): - """Blurs the image until it is misclassified.""" - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - epsilons=1000): - """Blurs the image until it is misclassified. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - epsilons : int or Iterable[float] - Either Iterable of standard deviations of the Gaussian blur - or number of standard deviations between 0 and 1 that should - be tried. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - image = a.original_image - min_, max_ = a.bounds() - axis = a.channel_axis(batch=False) - hw = [image.shape[i] for i in range(image.ndim) if i != axis] - h, w = hw - size = max(h, w) - - if not isinstance(epsilons, Iterable): - epsilons = np.linspace(0, 1, num=epsilons + 1)[1:] - - for epsilon in epsilons: - # epsilon = 1 will correspond to - # sigma = size = max(width, height) - sigmas = [epsilon * size] * 3 - sigmas[axis] = 0 - blurred = gaussian_filter(image, sigmas) - blurred = np.clip(blurred, min_, max_) - - _, is_adversarial = a.predictions(blurred) - if is_adversarial: - return -from __future__ import print_function -from __future__ import division - -import warnings -import threading -import queue -import time -import sys -import collections - -# requires Python 3.2 or newer, or a backport for Python 2 -from concurrent.futures import ThreadPoolExecutor -from concurrent.futures import Executor -from concurrent.futures import Future - -from .base import Attack -from .base import call_decorator -from .blended_noise import BlendedUniformNoiseAttack - -import numpy as np -from numpy.linalg import norm - - -class BoundaryAttack(Attack): - """A powerful adversarial attack that requires neither gradients - nor probabilities. - - This is the reference implementation for the attack introduced in [1]_. - - Notes - ----- - This implementation provides several advanced features: - - * ability to continue previous attacks by passing an instance of the - Adversarial class - * ability to pass an explicit starting point; especially to initialize - a targeted attack - * ability to pass an alternative attack used for initialization - * fine-grained control over logging - * ability to specify the batch size - * optional automatic batch size tuning - * optional multithreading for random number generation - * optional multithreading for candidate point generation - - References - ---------- - .. [1] Wieland Brendel (*), Jonas Rauber (*), Matthias Bethge, - "Decision-Based Adversarial Attacks: Reliable Attacks - Against Black-Box Machine Learning Models", - https://arxiv.org/abs/1712.04248 - - """ - - @call_decorator - def __call__( - self, - input_or_adv, - label=None, - unpack=True, - iterations=5000, - max_directions=25, - starting_point=None, - initialization_attack=None, - log_every_n_steps=1, - spherical_step=1e-2, - source_step=1e-2, - step_adaptation=1.5, - batch_size=1, - tune_batch_size=True, - threaded_rnd=True, - threaded_gen=True, - alternative_generator=False, - internal_dtype=np.float64, - verbose=False): - """Applies the Boundary Attack. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, correctly classified image. If image is a - numpy array, label must be passed as well. If image is - an :class:`Adversarial` instance, label must not be passed. - label : int - The reference label of the original image. Must be passed - if image is a numpy array, must not be passed if image is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial image, otherwise returns - the Adversarial object. - iterations : int - Maximum number of iterations to run. Might converge and stop - before that. - max_directions : int - Maximum number of trials per ieration. - starting_point : `numpy.ndarray` - Adversarial input to use as a starting point, in particular - for targeted attacks. - initialization_attack : :class:`Attack` - Attack to use to find a starting point. Defaults to - BlendedUniformNoiseAttack. - log_every_n_steps : int - Determines verbositity of the logging. - spherical_step : float - Initial step size for the orthogonal (spherical) step. - source_step : float - Initial step size for the step towards the target. - step_adaptation : float - Factor by which the step sizes are multiplied or divided. - batch_size : int - Batch size or initial batch size if tune_batch_size is True - tune_batch_size : bool - Whether or not the batch size should be automatically chosen - between 1 and max_directions. - threaded_rnd : bool - Whether the random number generation should be multithreaded. - threaded_gen : bool - Whether the candidate point generation should be multithreaded. - alternative_generator: bool - Whether an alternative implemenation of the candidate generator - should be used. - internal_dtype : np.float32 or np.float64 - Higher precision might be slower but is numerically more stable. - verbose : bool - Controls verbosity of the attack. - - """ - - # make some of the parameters available to other methods without - # the need to explicitly pass them - self.log_every_n_steps = log_every_n_steps - self._starting_point = starting_point - self._initialization_attack = initialization_attack - self.batch_size = batch_size - self.max_directions = max_directions - self.step_adaptation = step_adaptation - self.spherical_step = spherical_step - self.source_step = source_step - self.internal_dtype = internal_dtype - self.verbose = verbose - - if not verbose: - print('run with verbose=True to see details') - - if alternative_generator: - self.generate_candidate = self.generate_candidate_alternative - else: - self.generate_candidate = self.generate_candidate_default - - return self._apply_outer( - input_or_adv, - iterations=iterations, - tune_batch_size=tune_batch_size, - threaded_rnd=threaded_rnd, - threaded_gen=threaded_gen) - - def _apply_outer( - self, - *args, - **kwargs): - - # =========================================================== - # Start optional threads for parallel candidate generation - # =========================================================== - - if kwargs['threaded_gen'] is True: - # default value if True, but allow users to pass a number instead - kwargs['threaded_gen'] = 13 - - if kwargs['threaded_gen']: - n = kwargs['threaded_gen'] - with ThreadPoolExecutor(max_workers=n) as pool: - return self._apply_inner(pool, *args, **kwargs) - else: - with DummyExecutor() as pool: - return self._apply_inner(pool, *args, **kwargs) - - def _apply_inner( - self, - pool, - a, - iterations, - tune_batch_size, - threaded_rnd, - threaded_gen): - - self.t_initial = time.time() - - # =========================================================== - # Increase floating point precision - # =========================================================== - - external_dtype = a.original_image.dtype - - assert self.internal_dtype in [np.float32, np.float64] - assert external_dtype in [np.float32, np.float64] - - assert not (external_dtype == np.float64 and - self.internal_dtype == np.float32) - - a.set_distance_dtype(self.internal_dtype) - - # =========================================================== - # Find starting point - # =========================================================== - - self.initialize_starting_point(a) - - if a.image is None: - warnings.warn( - 'Initialization failed. If the criterion is targeted,' - ' it might be necessary to pass an explicit starting' - ' point or targeted initialization attack.') - return - - assert a.image.dtype == external_dtype - - # =========================================================== - # Initialize variables, constants, hyperparameters, etc. - # =========================================================== - - # make sure repeated warnings are shown - warnings.simplefilter('always', UserWarning) - - # get bounds - bounds = a.bounds() - min_, max_ = bounds - - # get original and starting point in the right format - original = a.original_image.astype(self.internal_dtype) - perturbed = a.image.astype(self.internal_dtype) - distance = a.distance - - # determine next step for batch size tuning - self.init_batch_size_tuning(tune_batch_size) - - # make sure step size is valid - self.printv( - 'Initial spherical_step = {:.2f}, source_step = {:.2f}'.format( - self.spherical_step, self.source_step)) - - # =========================================================== - # intialize stats - # =========================================================== - - stats_initialized = False - - # time measurements - self.stats_success = np.zeros((self.max_directions,), dtype=np.int) - self.stats_fail = 0 - self.stats_generator_duration = np.zeros((self.max_directions,)) - self.stats_prediction_duration = np.zeros((self.max_directions,)) - self.stats_spherical_prediction_duration \ - = np.zeros((self.max_directions,)) - self.stats_hyperparameter_update_duration = 0 - - # counters - self.stats_generator_calls \ - = np.zeros((self.max_directions,), dtype=np.int) - self.stats_prediction_calls \ - = np.zeros((self.max_directions,), dtype=np.int) - self.stats_spherical_prediction_calls \ - = np.zeros((self.max_directions,), dtype=np.int) - self.stats_numerical_problems = 0 - - # recent successes - self.stats_spherical_adversarial = collections.deque(maxlen=100) - self.stats_step_adversarial = collections.deque(maxlen=30) - - # =========================================================== - # Start optional threads for parallel sampling from std normal - # =========================================================== - - if threaded_rnd is True: - # default value if True, but allow users to pass a number instead - threaded_rnd = 4 - - if threaded_rnd: - # create a queue to cache samples - queue_size = 2 * self.max_directions + threaded_rnd + threaded_gen - rnd_normal_queue = queue.Queue(queue_size) - - try: - import randomgen - except ImportError: # pragma: no cover - raise ImportError('To use the BoundaryAttack,' - ' please install the randomgen' - ' module (e.g. pip install randomgen)') - - def sample_std_normal(thread_id, shape, dtype): - # create a thread-specifc RNG - rng = randomgen.RandomGenerator( - randomgen.Xoroshiro128(seed=20 + thread_id)) - - t = threading.currentThread() - while getattr(t, 'do_run', True): - rnd_normal = rng.standard_normal( - size=shape, dtype=dtype) - rnd_normal_queue.put(rnd_normal) - - self.printv('Using {} threads to create random numbers'.format( - threaded_rnd)) - - # start threads that sample from std normal distribution - rnd_normal_threads = [] - for thread_id in range(threaded_rnd): - rnd_normal_thread = threading.Thread( - target=sample_std_normal, - args=(thread_id, original.shape, original.dtype)) - rnd_normal_thread.start() - rnd_normal_threads.append(rnd_normal_thread) - else: - rnd_normal_queue = None - - # =========================================================== - # Iteratively refine adversarial by following the boundary - # between adversarial and non-adversarial images - # =========================================================== - - generation_args = None - - # log starting point - self.log_step(0, distance) - - initial_convergence_steps = 100 - convergence_steps = initial_convergence_steps - resetted = False - - for step in range(1, iterations + 1): - t_step = time.time() - - # =========================================================== - # Check converges - # =========================================================== - - check_strict = convergence_steps == initial_convergence_steps - if self.has_converged(check_strict): - self.log_step(step - 1, distance, always=True) - if resetted: - self.printv( - 'Looks like attack has converged after {} steps,' - ' {} remaining'.format(step, convergence_steps)) - convergence_steps -= 1 - if convergence_steps == 0: - break - else: - resetted = True - self.printv( - 'Looks like attack has converged after' + - ' {} steps'.format(step) + - ' for the first time. Resetting steps to be sure.') - self.spherical_step = 1e-2 - self.source_step = 1e-2 - elif (convergence_steps < - initial_convergence_steps): # pragma: no cover - self.log_step(step - 1, distance, always=True) - warnings.warn('Attack has not converged!') - convergence_steps = initial_convergence_steps - resetted = False - - # =========================================================== - # Determine optimal batch size - # =========================================================== - - if tune_batch_size and step == self.next_tuning_step: - if not stats_initialized: - self.initialize_stats( - a, pool, external_dtype, generation_args) - stats_initialized = True - - # during initialization, predictions are performed - # and thus better adversarials might have been found - # if a.distance.value != distance.value: - # assert a.distance.value < distance.value - if a.distance.value < distance.value: - self.printv( - 'During initialization, a better adversarial' - ' has been found. Continuing from there.') - perturbed = a.image.astype(self.internal_dtype) - distance = a.distance - # becaue we are resetting perturbed, it's important - # that the new generator is created afterwards - - self.tune_batch_size(a) - - # =========================================================== - # Create a generator for new candidates - # =========================================================== - - unnormalized_source_direction, source_direction, source_norm \ - = self.prepare_generate_candidates(original, perturbed) - - generation_args = ( - rnd_normal_queue, - bounds, - original, - perturbed, - unnormalized_source_direction, - source_direction, - source_norm, - self.spherical_step, - self.source_step, - self.internal_dtype) - - # =========================================================== - # Try to find a better adversarial - # =========================================================== - - # only check spherical every 10th step - # or in every step when we are in convergence confirmation mode, - # i.e. after resetting - do_spherical = (step % 10 == 0) or resetted - - n_batches = (self.max_directions - 1) // self.batch_size + 1 - - # for the first batch - t = time.time() - futures = [ - pool.submit(self.generate_candidate, *generation_args) - for _ in range(self.batch_size)] - t = time.time() - t - self.stats_generator_duration[self.batch_size - 1] += t - - for i in range(n_batches): - # for the last batch, reduce the batch size if necessary - if i == n_batches - 1: - # last batch - remaining = self.max_directions - i * self.batch_size - current_batch_size = remaining - next_batch_size = 0 - elif i == n_batches - 2: - # second to last batch - current_batch_size = self.batch_size - remaining = self.max_directions - (i + 1) * self.batch_size - next_batch_size = remaining - else: - # other batches - current_batch_size = self.batch_size - next_batch_size = self.batch_size - - assert len(futures) == current_batch_size - - batch_shape = (current_batch_size,) + original.shape - - # sample a batch of candidates - candidates = np.empty(batch_shape, dtype=original.dtype) - if do_spherical: - spherical_candidates = np.empty( - batch_shape, dtype=original.dtype) - - for j in range(current_batch_size): - t = time.time() - candidate, spherical_candidate \ - = futures[j].result() - if do_spherical: - spherical_candidates[j] = spherical_candidate - candidates[j] = candidate - t = time.time() - t - self.stats_generator_duration[ - current_batch_size - 1] += t - self.stats_generator_calls[current_batch_size - 1] += 1 - - # for the next batch - if next_batch_size > 0: - t = time.time() - futures = [ - pool.submit(self.generate_candidate, *generation_args) - for _ in range(next_batch_size)] - t = time.time() - t - self.stats_generator_duration[next_batch_size - 1] += t - else: - futures = None - - # check spherical ones - if do_spherical: - t = time.time() - _, batch_is_adversarial = a.batch_predictions( - spherical_candidates.astype(external_dtype), - strict=False) - t = time.time() - t - - assert batch_is_adversarial.shape == (current_batch_size,) - - self.stats_spherical_prediction_duration[ - current_batch_size - 1] += t - self.stats_spherical_prediction_calls[ - current_batch_size - 1] += 1 - - indices = [] - for j in range(current_batch_size): - spherical_is_adversarial \ - = batch_is_adversarial[j] - self.stats_spherical_adversarial.appendleft( - spherical_is_adversarial) - if spherical_is_adversarial: - indices.append(j) - - if len(indices) == 0: - continue # next batch - - # if at least one of the spherical candidates was - # adversarial, get real candidates - - candidates = np.take(candidates, indices, axis=0) - reduced_shape = (len(indices),) + batch_shape[1:] - assert candidates.shape == reduced_shape - - t = time.time() - _, batch_is_adversarial = a.batch_predictions( - candidates.astype(external_dtype), - strict=False) - t = time.time() - t - # TODO: use t - - assert batch_is_adversarial.shape == (len(indices),) - - self.stats_step_adversarial.extendleft( - batch_is_adversarial) - - for j in range(len(indices)): - is_adversarial = batch_is_adversarial[j] - - if is_adversarial: - new_perturbed = candidates[j] - new_distance = a.normalized_distance(new_perturbed) - # rough correction factor - f = current_batch_size / len(indices) - candidate_index = i * self.batch_size + int(j * f) - self.stats_success[candidate_index] += 1 - break - else: - continue # next batch - break # found advesarial candidate - else: - # check if one of the candidates is adversarial - t = time.time() - _, is_adversarial, adv_index, is_best, candidate_distance \ - = a.batch_predictions( - candidates.astype(external_dtype), greedy=True, - strict=False, return_details=True) - t = time.time() - t - self.stats_prediction_duration[self.batch_size - 1] += t - self.stats_prediction_calls[ - self.batch_size - 1] += 1 - - if is_adversarial: - new_perturbed = candidates[adv_index] - new_distance = candidate_distance - candidate_index = i * self.batch_size + adv_index - self.stats_success[candidate_index] += 1 - break - - else: # if the for loop doesn't break - new_perturbed = None - self.stats_fail += 1 - - # =========================================================== - # Handle the new adversarial - # =========================================================== - - message = '' - if new_perturbed is not None: - if not new_distance < distance: - # assert not is_best # consistency with adversarial object - self.stats_numerical_problems += 1 - warnings.warn('Internal inconsistency, probably caused by ' - 'numerical errors') - else: - # assert is_best # consistency with adversarial object - # Jonas 24.10.2017: this can be violated because spherical - # step can be better and adv (numerical issues) - abs_improvement = distance.value - new_distance.value - rel_improvement = abs_improvement / distance.value - message = 'd. reduced by {:.2f}% ({:.4e})'.format( - rel_improvement * 100, abs_improvement) - - # update the variables - perturbed = new_perturbed - distance = new_distance - - # =========================================================== - # Update step sizes - # =========================================================== - - t = time.time() - self.update_step_sizes() - t = time.time() - t - self.stats_hyperparameter_update_duration += t - - # =========================================================== - # Log the step - # =========================================================== - - t_step = time.time() - t_step - message += ' (took {:.5f} seconds)'.format(t_step) - self.log_step(step, distance, message) - sys.stdout.flush() - - if self.stats_numerical_problems > 1000: # pragma: no cover - warnings.warn('Too many intenral inconsistencies,' - ' aborting attack.') - break - - # =========================================================== - # Stop threads that generate random numbers - # =========================================================== - - if threaded_rnd: - for rnd_normal_thread in rnd_normal_threads: - rnd_normal_thread.do_run = False - for rnd_normal_thread in rnd_normal_threads: - try: - rnd_normal_queue.get(block=False) - except queue.Empty: # pragma: no cover - pass - for rnd_normal_thread in rnd_normal_threads: - rnd_normal_thread.join() - - # =========================================================== - # Log overall runtime - # =========================================================== - - self.log_time() - - # =============================================================== - # - # Other methods - # - # =============================================================== - - def initialize_starting_point(self, a): - starting_point = self._starting_point - init_attack = self._initialization_attack - - if a.image is not None: - print( - 'Attack is applied to a previously found adversarial.' - ' Continuing search for better adversarials.') - if starting_point is not None: # pragma: no cover - warnings.warn( - 'Ignoring starting_point parameter because the attack' - ' is applied to a previously found adversarial.') - if init_attack is not None: # pragma: no cover - warnings.warn( - 'Ignoring initialization_attack parameter because the' - ' attack is applied to a previously found adversarial.') - return - - if starting_point is not None: - a.predictions(starting_point) - assert a.image is not None, ('Invalid starting point provided.' - ' Please provide a starting point' - ' that is adversarial.') - return - - if init_attack is None: - init_attack = BlendedUniformNoiseAttack - self.printv( - 'Neither starting_point nor initialization_attack given.' - ' Falling back to {} for initialization.'.format( - init_attack.__name__)) - - if issubclass(init_attack, Attack): - # instantiate if necessary - init_attack = init_attack() - - init_attack(a) - - def log_step(self, step, distance, message='', always=False): - if not always and step % self.log_every_n_steps != 0: - return - print('Step {}: {:.5e}, stepsizes = {:.1e}/{:.1e}: {}'.format( - step, - distance.value, - self.spherical_step, - self.source_step, - message)) - - @staticmethod - def prepare_generate_candidates(original, perturbed): - unnormalized_source_direction = original - perturbed - source_norm = norm(unnormalized_source_direction) - source_direction = unnormalized_source_direction / source_norm - return unnormalized_source_direction, source_direction, source_norm - - @staticmethod - def generate_candidate_default( - rnd_normal_queue, - bounds, - original, - perturbed, - unnormalized_source_direction, - source_direction, - source_norm, - spherical_step, - source_step, - internal_dtype, - rng=None): - - if rng is None: - try: - import randomgen - except ImportError: # pragma: no cover - raise ImportError('To use the BoundaryAttack,' - ' please install the randomgen' - ' module (e.g. pip install randomgen)') - rng = randomgen.RandomGenerator() - - # =========================================================== - # perform initial work - # =========================================================== - - assert original.dtype == internal_dtype - assert perturbed.dtype == internal_dtype - - shape = original.shape - - min_, max_ = bounds - - # =========================================================== - # draw a random direction - # =========================================================== - - # randomgen's rnd is faster and more flexible than numpy's if - # has a dtype argument and supports the much faster Ziggurat method - if rnd_normal_queue is None: - perturbation = rng.standard_normal( - size=shape, dtype=original.dtype) - else: - perturbation = rnd_normal_queue.get() - - assert perturbation.dtype == internal_dtype - - # =========================================================== - # calculate candidate on sphere - # =========================================================== - - dot = np.vdot(perturbation, source_direction) - perturbation -= dot * source_direction - perturbation *= spherical_step * source_norm / norm(perturbation) - - D = 1 / np.sqrt(spherical_step**2 + 1) - direction = perturbation - unnormalized_source_direction - spherical_candidate = original + D * direction - - np.clip(spherical_candidate, min_, max_, out=spherical_candidate) - - # =========================================================== - # add perturbation in direction of source - # =========================================================== - - new_source_direction = original - spherical_candidate - new_source_direction_norm = norm(new_source_direction) - - assert perturbed.dtype == internal_dtype - assert original.dtype == internal_dtype - assert spherical_candidate.dtype == internal_dtype - - # length if spherical_candidate would be exactly on the sphere - length = source_step * source_norm - - # length including correction for deviation from sphere - deviation = new_source_direction_norm - source_norm - length += deviation - - # make sure the step size is positive - length = max(0, length) - - # normalize the length - length = length / new_source_direction_norm - - candidate = spherical_candidate + length * new_source_direction - np.clip(candidate, min_, max_, out=candidate) - - assert spherical_candidate.dtype == internal_dtype - assert candidate.dtype == internal_dtype - - data = (candidate, spherical_candidate) - - return data - - @staticmethod - def generate_candidate_alternative( - rnd_normal_queue, - bounds, - original, - perturbed, - unnormalized_source_direction, - source_direction, - source_norm, - spherical_step, - source_step, - internal_dtype, - rng=None): - - if rng is None: - try: - import randomgen - except ImportError: # pragma: no cover - raise ImportError('To use the BoundaryAttack,' - ' please install the randomgen' - ' module (e.g. pip install randomgen)') - rng = randomgen.RandomGenerator() - - # =========================================================== - # perform initial work - # =========================================================== - - assert original.dtype == internal_dtype - assert perturbed.dtype == internal_dtype - - shape = original.shape - - min_, max_ = bounds - - # =========================================================== - # draw a random direction - # =========================================================== - - # randomgen's rnd is faster and more flexible than numpy's if - # has a dtype argument and supports the much faster Ziggurat method - if rnd_normal_queue is None: - perturbation = rng.standard_normal( - size=shape, dtype=original.dtype) - else: - perturbation = rnd_normal_queue.get() - - assert perturbation.dtype == internal_dtype - - # =========================================================== - # normalize perturbation and subtract source direction - # (to stay on sphere) - # =========================================================== - - perturbation *= spherical_step * source_norm / norm(perturbation) - perturbation -= np.vdot(perturbation, source_direction) \ - * source_direction - - spherical_perturbation = perturbed + perturbation - np.clip(spherical_perturbation, min_, max_, out=spherical_perturbation) - - # refine spherical perturbation - refinement_threshold = min(1e-5, source_step / 10) - for refinements in range(30): - spherical_source_direction = spherical_perturbation - original - spherical_norm = norm(spherical_source_direction) - diff_norm = spherical_norm - source_norm - if np.abs(diff_norm) / source_norm <= refinement_threshold: - break - spherical_perturbation -= diff_norm / spherical_norm \ - * spherical_source_direction - np.clip( - spherical_perturbation, - min_, - max_, - out=spherical_perturbation) - else: # pragma: no cover - refinements += 1 - - # =========================================================== - # add perturbation in direction of source - # =========================================================== - - new_source_direction = original - spherical_perturbation - new_source_direction_norm = norm(new_source_direction) - assert perturbed.dtype == internal_dtype - assert original.dtype == internal_dtype - assert spherical_perturbation.dtype == internal_dtype - - perturbation = spherical_perturbation.copy() - length = source_step * source_norm / new_source_direction_norm - perturbation += length * new_source_direction - np.clip(perturbation, min_, max_, out=perturbation) - - assert spherical_perturbation.dtype == internal_dtype - assert perturbation.dtype == internal_dtype - - data = (perturbation, spherical_perturbation) - return data - - def initialize_stats(self, a, pool, external_dtype, generation_args): - self.printv('Initializing generation and prediction' - ' time measurements. This can take a few' - ' seconds.') - - _next = self.generate_candidate(*generation_args) - candidate, spherical_candidate = _next - # batch_shape = (self.max_directions,) + candidate.shape - # samples = np.empty(batch_shape, candidate.dtype) - - # after initialization, we should have 1000 data points - # and at least `max_directions` new ones to fill the array - # n = max(1000 - self.stats_generator_calls, self.max_directions) - - for batch_size in range(1, self.max_directions + 1): - t = time.time() - futures = [ - pool.submit(self.generate_candidate, *generation_args) - for _ in range(batch_size)] - t = time.time() - t - self.stats_generator_duration[batch_size - 1] += t - - batch_shape = (batch_size,) + candidate.shape - samples = np.empty(batch_shape, candidate.dtype) - - for i in range(batch_size): - t = time.time() - candidate, _ = futures[i].result() - samples[i] = candidate - t = time.time() - t - self.stats_generator_duration[batch_size - 1] += t - self.stats_generator_calls[batch_size - 1] += 1 - - batch = samples - - current = self.stats_prediction_calls[batch_size - 1] - # more data points for small batch sizes, fewer - # for large batch sizes - target = 2 + (2 * self.max_directions) // batch_size - n = max(target - current, 0) - - for i in range(n): - t = time.time() - _, is_adversarial, adv_index, is_best, candidate_distance \ - = a.batch_predictions( - batch.astype(external_dtype), greedy=True, - strict=False, return_details=True) - t = time.time() - t - - self.stats_prediction_duration[batch_size - 1] += t - self.stats_prediction_calls[batch_size - 1] += 1 - - t = time.time() - _, _ = a.batch_predictions( - batch.astype(external_dtype), strict=False) - t = time.time() - t - - self.stats_spherical_prediction_duration[batch_size - 1] \ - += t - self.stats_spherical_prediction_calls[batch_size - 1] += 1 - - def log_time(self): - t_total = time.time() - self.t_initial - - rel_generate = self.stats_generator_duration.sum() / t_total - rel_prediction = self.stats_prediction_duration.sum() / t_total - rel_spherical \ - = self.stats_spherical_prediction_duration.sum() / t_total - rel_hyper = self.stats_hyperparameter_update_duration / t_total - rel_remaining = 1 - rel_generate - rel_prediction \ - - rel_spherical - rel_hyper - - self.printv('Time since beginning: {:.5f}'.format(t_total)) - self.printv(' {:2.1f}% for generation ({:.5f})'.format( - rel_generate * 100, self.stats_generator_duration.sum())) - self.printv(' {:2.1f}% for spherical prediction ({:.5f})'.format( - rel_spherical * 100, - self.stats_spherical_prediction_duration.sum())) - self.printv(' {:2.1f}% for prediction ({:.5f})'.format( - rel_prediction * 100, self.stats_prediction_duration.sum())) - self.printv(' {:2.1f}% for hyperparameter update ({:.5f})'.format( - rel_hyper * 100, self.stats_hyperparameter_update_duration)) - self.printv(' {:2.1f}% for the rest ({:.5f})'.format( - rel_remaining * 100, rel_remaining * t_total)) - - def init_batch_size_tuning(self, tune_batch_size): - if not tune_batch_size: - return - - if tune_batch_size is True: - # user provided a boolean - self.steps_to_next_tuning = 100 - else: - # user provded a concrete number - self.steps_to_next_tuning = tune_batch_size - tune_batch_size = True - - self.next_tuning_step = 1 + self.steps_to_next_tuning - assert self.next_tuning_step > 1, ( - 'Estimating the optimal batch size cannot be done' - ' before the first step.') - - if self.steps_to_next_tuning < 50: - warnings.warn('Batch size tuning after so few steps' - ' is not very reliable.') - - def tune_batch_size(self, a): - self.printv('Estimating optimal batch size') - - max_directions = self.max_directions - - self.log_time() - - # =========================================================== - # for each batch size, we estimate the time per step given the - # distribution over the number of candidates needed per step - # =========================================================== - - step_duration = np.zeros((max_directions,)) - - # how long does it take to generate a candidate - T_generate = self.stats_generator_duration / self.stats_generator_calls - - # how long does it take to get predictions of a batch - T_prediction = self.stats_prediction_duration \ - / self.stats_prediction_calls - - self.printv('current estimate of the time to generate a candidate' - ' depending on the batch size:') - self.printv(T_generate / np.arange(1, max_directions + 1)) - - self.printv('current estimate of the time to get predictions for a' - ' candidate depending on the batch size:') - self.printv(T_prediction / np.arange(1, max_directions + 1)) - - # how often did we need to use the corresponding - # number of candidates - frequencies = [self.stats_fail] + list(self.stats_success) - candidates = [max_directions] + list(range(1, max_directions + 1)) - - s = sum(frequencies) - - self.printv('Relative frequencies for failing and success after k') - self.printv(np.asarray(frequencies) / s) - - for batch_size in range(1, max_directions + 1): - t_generate = 0 - t_prediction = 0 - - for frequency, samples in zip(frequencies, candidates): - # number of full batches - max_full = max_directions // batch_size - - # same as round_up(samples / batch_size) - full = (samples - 1) // batch_size + 1 - - if full > max_full: - # the last batch will be smaller - full -= 1 - remaining = max_directions - full * batch_size - - t_generate += frequency * T_generate[remaining - 1] - t_prediction += frequency * T_prediction[remaining - 1] - - t_generate += frequency * full * T_generate[batch_size - 1] - t_prediction += frequency * full * T_prediction[batch_size - 1] - - t_total = t_generate + t_prediction - step_duration[batch_size - 1] = t_total - - self.printv( - 'Using batch size {:3d}, an average step would have taken' - ' {:.5f} = {:.5f} + {:.5f} seconds'.format( - batch_size, t_total / s, t_generate / s, t_prediction / s)) - - # =========================================================== - # determine the best batch size and print comparisons - # =========================================================== - - best_batch_size = np.argmin(step_duration) + 1 - worst_batch_size = np.argmax(step_duration) + 1 - - self.printv('batch size was {}, optimal batch size would have' - ' been {}'.format(self.batch_size, best_batch_size)) - - best_step_duration = step_duration[best_batch_size - 1] - self.printv('setting batch size to {}: expected step duration:' - ' {:.5f}'.format(best_batch_size, best_step_duration / s)) - - for name, value in ( - ('old', self.batch_size), - ('worst', worst_batch_size), - ('smallest', 1), - ('largest', max_directions)): - - improvement = step_duration[value - 1] / best_step_duration - - self.printv('improvement compared to {} batch size' - ' ({}): {:.1f}x'.format(name, value, improvement)) - - change = best_batch_size - self.batch_size - - if change == 0: - self.steps_to_next_tuning *= 2 - elif change in [-1, 1]: - pass - else: # pragma: no cover - if self.steps_to_next_tuning > 100: - self.steps_to_next_tuning //= 2 - - self.next_tuning_step += self.steps_to_next_tuning - self.printv('next batch size tuning in {} steps, after step {}'.format( - self.steps_to_next_tuning, self.next_tuning_step - 1)) - - # finally, set the new batch size - self.batch_size = best_batch_size - - # and reset the distribution over number of candidates needed - # in a step, as it changes over time - self.stats_fail = 0 - self.stats_success *= 0 - - def update_step_sizes(self): - def is_full(deque): - return len(deque) == deque.maxlen - - if not (is_full(self.stats_spherical_adversarial) or - is_full(self.stats_step_adversarial)): - # updated step size recently, not doing anything now - return - - def estimate_probability(deque): - if len(deque) == 0: - return None - return np.mean(deque) - - p_spherical = estimate_probability(self.stats_spherical_adversarial) - p_step = estimate_probability(self.stats_step_adversarial) - - n_spherical = len(self.stats_spherical_adversarial) - n_step = len(self.stats_step_adversarial) - - def log(message): - _p_spherical = p_spherical - if _p_spherical is None: # pragma: no cover - _p_spherical = -1. - - _p_step = p_step - if _p_step is None: - _p_step = -1. - - self.printv(' {} {:.2f} ({:3d}), {:.2f} ({:2d})'.format( - message, - _p_spherical, - n_spherical, - _p_step, - n_step)) - - if is_full(self.stats_spherical_adversarial): - if p_spherical > 0.5: - message = 'Boundary too linear, increasing steps: ' - self.spherical_step *= self.step_adaptation - self.source_step *= self.step_adaptation - elif p_spherical < 0.2: - message = 'Boundary too non-linear, decreasing steps:' - self.spherical_step /= self.step_adaptation - self.source_step /= self.step_adaptation - else: - message = None - - if message is not None: - self.stats_spherical_adversarial.clear() - log(message) - - if is_full(self.stats_step_adversarial): - if p_step > 0.5: - message = 'Success rate too high, increasing source step:' - self.source_step *= self.step_adaptation - elif p_step < 0.2: - message = 'Success rate too low, decreasing source step: ' - self.source_step /= self.step_adaptation - else: - message = None - - if message is not None: - self.stats_step_adversarial.clear() - log(message) - - def has_converged(self, strict): - if strict: - return self.source_step < 1e-7 - return self.source_step < 2e-7 - - def printv(self, *args, **kwargs): - if self.verbose: - print(*args, **kwargs) - - -class DummyExecutor(Executor): - - def __init__(self): - self._shutdown = False - self._shutdownLock = threading.Lock() - - def submit(self, fn, *args, **kwargs): - with self._shutdownLock: - if self._shutdown: # pragma: no cover - raise RuntimeError( - 'cannot schedule new futures after shutdown') - - f = Future() - try: - result = fn(*args, **kwargs) - except BaseException as e: # pragma: no cover - f.set_exception(e) - else: - f.set_result(result) - - return f - - def shutdown(self, wait=True): - with self._shutdownLock: - self._shutdown = True -# -*- coding: utf-8 -*- -from __future__ import division - -import numpy as np -import logging - -from .base import Attack -from .base import call_decorator -from ..utils import onehot_like - - -class CarliniWagnerL2Attack(Attack): - """The L2 version of the Carlini & Wagner attack. - - This attack is described in [1]_. This implementation - is based on the reference implementation by Carlini [2]_. - For bounds ≠ (0, 1), it differs from [2]_ because we - normalize the squared L2 loss with the bounds. - - References - ---------- - .. [1] Nicholas Carlini, David Wagner: "Towards Evaluating the - Robustness of Neural Networks", https://arxiv.org/abs/1608.04644 - .. [2] https://github.com/carlini/nn_robust_attacks - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - binary_search_steps=5, max_iterations=1000, - confidence=0, learning_rate=5e-3, - initial_const=1e-2, abort_early=True): - """The L2 version of the Carlini & Wagner attack. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - binary_search_steps : int - The number of steps for the binary search used to - find the optimal tradeoff-constant between distance and confidence. - max_iterations : int - The maximum number of iterations. Larger values are more - accurate; setting it too small will require a large learning rate - and will produce poor results. - confidence : int or float - Confidence of adversarial examples: a higher value produces - adversarials that are further away, but more strongly classified - as adversarial. - learning_rate : float - The learning rate for the attack algorithm. Smaller values - produce better results but take longer to converge. - initial_const : float - The initial tradeoff-constant to use to tune the relative - importance of distance and confidence. If `binary_search_steps` - is large, the initial constant is not important. - abort_early : bool - If True, Adam will be aborted if the loss hasn't decreased - for some time (a tenth of max_iterations). - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - if not a.has_gradient(): - logging.fatal('Applied gradient-based attack to model that ' - 'does not provide gradients.') - return - - min_, max_ = a.bounds() - - def to_attack_space(x): - # map from [min_, max_] to [-1, +1] - a = (min_ + max_) / 2 - b = (max_ - min_) / 2 - x = (x - a) / b - - # from [-1, +1] to approx. (-1, +1) - x = x * 0.999999 - - # from (-1, +1) to (-inf, +inf) - return np.arctanh(x) - - def to_model_space(x): - """Transforms an input from the attack space - to the model space. This transformation and - the returned gradient are elementwise.""" - - # from (-inf, +inf) to (-1, +1) - x = np.tanh(x) - - grad = 1 - np.square(x) - - # map from (-1, +1) to (min_, max_) - a = (min_ + max_) / 2 - b = (max_ - min_) / 2 - x = x * b + a - - grad = grad * b - return x, grad - - # variables representing inputs in attack space will be - # prefixed with att_ - att_original = to_attack_space(a.original_image) - - # will be close but not identical to a.original_image - reconstructed_original, _ = to_model_space(att_original) - - # the binary search finds the smallest const for which we - # find an adversarial - const = initial_const - lower_bound = 0 - upper_bound = np.inf - - for binary_search_step in range(binary_search_steps): - if binary_search_step == binary_search_steps - 1 and \ - binary_search_steps >= 10: - # in the last binary search step, use the upper_bound instead - # TODO: find out why... it's not obvious why this is useful - const = upper_bound - - logging.info('starting optimization with const = {}'.format(const)) - - att_perturbation = np.zeros_like(att_original) - - # create a new optimizer to minimize the perturbation - optimizer = AdamOptimizer(att_perturbation.shape) - - found_adv = False # found adv with the current const - loss_at_previous_check = np.inf - - for iteration in range(max_iterations): - x, dxdp = to_model_space(att_original + att_perturbation) - logits, is_adv = a.predictions(x) - loss, dldx = self.loss_function( - const, a, x, logits, reconstructed_original, - confidence, min_, max_) - - logging.info('loss: {}; best overall distance: {}'.format( - loss, a.distance)) - - # backprop the gradient of the loss w.r.t. x further - # to get the gradient of the loss w.r.t. att_perturbation - assert dldx.shape == x.shape - assert dxdp.shape == x.shape - # we can do a simple elementwise multiplication, because - # grad_x_wrt_p is a matrix of elementwise derivatives - # (i.e. each x[i] w.r.t. p[i] only, for all i) and - # grad_loss_wrt_x is a real gradient reshaped as a matrix - gradient = dldx * dxdp - - att_perturbation += optimizer(gradient, learning_rate) - - if is_adv: - # this binary search step can be considered a success - # but optimization continues to minimize perturbation size - found_adv = True - - if abort_early and \ - iteration % (np.ceil(max_iterations / 10)) == 0: - # after each tenth of the iterations, check progress - if not (loss <= .9999 * loss_at_previous_check): - break # stop Adam if there has not been progress - loss_at_previous_check = loss - - if found_adv: - logging.info('found adversarial with const = {}'.format(const)) - upper_bound = const - else: - logging.info('failed to find adversarial ' - 'with const = {}'.format(const)) - lower_bound = const - - if upper_bound == np.inf: - # exponential search - const *= 10 - else: - # binary search - const = (lower_bound + upper_bound) / 2 - - @classmethod - def loss_function(cls, const, a, x, logits, reconstructed_original, - confidence, min_, max_): - """Returns the loss and the gradient of the loss w.r.t. x, - assuming that logits = model(x).""" - - targeted = a.target_class() is not None - if targeted: - c_minimize = cls.best_other_class(logits, a.target_class()) - c_maximize = a.target_class() - else: - c_minimize = a.original_class - c_maximize = cls.best_other_class(logits, a.original_class) - - is_adv_loss = logits[c_minimize] - logits[c_maximize] - - # is_adv is True as soon as the is_adv_loss goes below 0 - # but sometimes we want additional confidence - is_adv_loss += confidence - is_adv_loss = max(0, is_adv_loss) - - s = max_ - min_ - squared_l2_distance = np.sum((x - reconstructed_original)**2) / s**2 - total_loss = squared_l2_distance + const * is_adv_loss - - # calculate the gradient of total_loss w.r.t. x - logits_diff_grad = np.zeros_like(logits) - logits_diff_grad[c_minimize] = 1 - logits_diff_grad[c_maximize] = -1 - is_adv_loss_grad = a.backward(logits_diff_grad, x) - assert is_adv_loss >= 0 - if is_adv_loss == 0: - is_adv_loss_grad = 0 - - squared_l2_distance_grad = (2 / s**2) * (x - reconstructed_original) - - total_loss_grad = squared_l2_distance_grad + const * is_adv_loss_grad - return total_loss, total_loss_grad - - @staticmethod - def best_other_class(logits, exclude): - """Returns the index of the largest logit, ignoring the class that - is passed as `exclude`.""" - other_logits = logits - onehot_like(logits, exclude, value=np.inf) - return np.argmax(other_logits) - - -class AdamOptimizer: - """Basic Adam optimizer implementation that can minimize w.r.t. - a single variable. - - Parameters - ---------- - shape : tuple - shape of the variable w.r.t. which the loss should be minimized - - """ - - def __init__(self, shape): - self.m = np.zeros(shape) - self.v = np.zeros(shape) - self.t = 0 - - def __call__(self, gradient, learning_rate, - beta1=0.9, beta2=0.999, epsilon=10e-8): - """Updates internal parameters of the optimizer and returns - the change that should be applied to the variable. - - Parameters - ---------- - gradient : `np.ndarray` - the gradient of the loss w.r.t. to the variable - learning_rate: float - the learning rate in the current iteration - beta1: float - decay rate for calculating the exponentially - decaying average of past gradients - beta2: float - decay rate for calculating the exponentially - decaying average of past squared gradients - epsilon: float - small value to avoid division by zero - - """ - - self.t += 1 - - self.m = beta1 * self.m + (1 - beta1) * gradient - self.v = beta2 * self.v + (1 - beta2) * gradient**2 - - bias_correction_1 = 1 - beta1**self.t - bias_correction_2 = 1 - beta2**self.t - - m_hat = self.m / bias_correction_1 - v_hat = self.v / bias_correction_2 - - return -learning_rate * m_hat / (np.sqrt(v_hat) + epsilon) -import numpy as np -from collections import Iterable - -from .base import Attack -from .base import call_decorator - - -class ContrastReductionAttack(Attack): - """Reduces the contrast of the image until it is misclassified.""" - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - epsilons=1000): - """Reduces the contrast of the image until it is misclassified. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - epsilons : int or Iterable[float] - Either Iterable of contrast levels or number of contrast - levels between 1 and 0 that should be tried. Epsilons are - one minus the contrast level. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - image = a.original_image - min_, max_ = a.bounds() - target = (max_ + min_) / 2 - - if not isinstance(epsilons, Iterable): - epsilons = np.linspace(0, 1, num=epsilons + 1)[1:] - - for epsilon in epsilons: - perturbed = (1 - epsilon) * image + epsilon * target - - _, is_adversarial = a.predictions(perturbed) - if is_adversarial: - return -# -*- coding: utf-8 -*- -from __future__ import division - -import math -import numpy as np -import logging - -from .base import Attack -from .base import call_decorator - - -class DecoupledDirectionNormL2Attack(Attack): - """The Decoupled Direction and Norm L2 adversarial attack from [1]_. - - References - ---------- - .. [1] Jérôme Rony, Luiz G. Hafemann, Luiz S. Oliveira, Ismail Ben Ayed, - Robert Sabourin, Eric Granger, "Decoupling Direction and Norm for Efficient - Gradient-Based L2 Adversarial Attacks and Defenses", - https://arxiv.org/abs/1811.09600 - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - steps=100, gamma=0.05, initial_norm=1, quantize=True, - levels=256): - """The Decoupled Direction and Norm L2 adversarial attack. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - steps : int - Number of steps for the optimization. - gamma : float, optional - Factor by which the norm will be modified. - new_norm = norm * (1 + or - gamma). - init_norm : float, optional - Initial value for the norm. - quantize : bool, optional - If True, the returned adversarials will have quantized values to - the specified number of levels. - levels : int, optional - Number of levels to use for quantization - (e.g. 256 for 8 bit images). - - """ - - a = input_or_adv - - if not a.has_gradient(): - logging.fatal('Applied gradient-based attack to model that ' - 'does not provide gradients.') - return - - min_, max_ = a.bounds() - s = max_ - min_ - if a.target_class() is not None: - multiplier = -1 - attack_class = a.target_class() - else: - multiplier = 1 - attack_class = a.original_class - norm = initial_norm - original_image = a.original_image - perturbation = np.zeros_like(original_image) - - for i in range(steps): - - logits, grad, is_adv = a.predictions_and_gradient( - image=original_image + perturbation, - label=attack_class, strict=True) - - # renorm gradient and handle 0-norm gradient - grad_norm = np.linalg.norm(grad) - if grad_norm == 0: # pragma: no cover - grad = np.random.normal(size=grad.shape) - grad_norm = np.linalg.norm(grad) - grad *= s / grad_norm - - # udpate perturbation - lr = cosine_learning_rate(i, steps, 1., 0.01) - perturbation += lr * multiplier * grad - - # update norm value and renorm perturbation accordingly - norm *= (1 - (2 * is_adv - 1) * gamma) - perturbation *= s * norm / np.linalg.norm(perturbation) - if quantize: - perturbation = (perturbation - min_) / s - perturbation = np.round(perturbation * (levels - 1)) - perturbation /= (levels - 1) - perturbation = perturbation * s + min_ - perturbation = np.clip(perturbation, min_ - original_image, - max_ - original_image) - - -def cosine_learning_rate(current_step, max_steps, init_lr, final_lr): - """Cosine annealing schedule for learning rate. - - Parameters - ---------- - current_step : int - Current step in the optimization - max_steps : int - Total number of steps of the optimization. - init_lr : float - Initial learning rate. - final_lr : float - Final learning rate. - - Returns - ------- - float - The current learning rate. - - """ - alpha = (1 + math.cos(math.pi * current_step / max_steps)) / 2 - return final_lr + alpha * (init_lr - final_lr) -import logging - -import numpy as np - -from .base import Attack -from .base import call_decorator -from ..utils import crossentropy -from ..distances import MeanSquaredDistance -from ..distances import Linfinity - - -class DeepFoolAttack(Attack): - """Simple and close to optimal gradient-based - adversarial attack. - - Implementes DeepFool introduced in [1]_. - - References - ---------- - .. [1] Seyed-Mohsen Moosavi-Dezfooli, Alhussein Fawzi, Pascal Frossard, - "DeepFool: a simple and accurate method to fool deep neural - networks", https://arxiv.org/abs/1511.04599 - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - steps=100, subsample=10, p=None): - """Simple and close to optimal gradient-based - adversarial attack. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - steps : int - Maximum number of steps to perform. - subsample : int - Limit on the number of the most likely classes that should - be considered. A small value is usually sufficient and much - faster. - p : int or float - Lp-norm that should be minimzed, must be 2 or np.inf. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - if not a.has_gradient(): - return - - if a.target_class() is not None: - logging.fatal('DeepFool is an untargeted adversarial attack.') - return - - if p is None: - # set norm to optimize based on the distance measure - if a._distance == MeanSquaredDistance: - p = 2 - elif a._distance == Linfinity: - p = np.inf - else: - raise NotImplementedError('Please choose a distance measure' - ' for which DeepFool is implemented' - ' or specify manually which norm' - ' to optimize.') - - if not (1 <= p <= np.inf): - raise ValueError - - if p not in [2, np.inf]: - raise NotImplementedError - - _label = a.original_class - - # define labels - logits, _ = a.predictions(a.original_image) - labels = np.argsort(logits)[::-1] - if subsample: - # choose the top-k classes - logging.info('Only testing the top-{} classes'.format(subsample)) - assert isinstance(subsample, int) - labels = labels[:subsample] - - def get_residual_labels(logits): - """Get all labels with p < p[original_class]""" - return [ - k for k in labels - if logits[k] < logits[_label]] - - perturbed = a.original_image - min_, max_ = a.bounds() - - for step in range(steps): - logits, grad, is_adv = a.predictions_and_gradient(perturbed) - if is_adv: - return - - # correspondance to algorithm 2 in [1]_: - # - # loss corresponds to f (in the paper: negative cross-entropy) - # grad corresponds to -df/dx (gradient of cross-entropy) - - loss = -crossentropy(logits=logits, label=_label) - - residual_labels = get_residual_labels(logits) - - # instead of using the logits and the gradient of the logits, - # we use a numerically stable implementation of the cross-entropy - # and expect that the deep learning frameworks also use such a - # stable implemenation to calculate the gradient - losses = [ - -crossentropy(logits=logits, label=k) - for k in residual_labels] - grads = [a.gradient(perturbed, label=k) for k in residual_labels] - - # compute optimal direction (and loss difference) - # pairwise between each label and the target - diffs = [(l - loss, g - grad) for l, g in zip(losses, grads)] - - # calculate distances - if p == 2: - distances = [abs(dl) / (np.linalg.norm(dg) + 1e-8) - for dl, dg in diffs] - elif p == np.inf: - distances = [abs(dl) / (np.sum(np.abs(dg)) + 1e-8) - for dl, dg in diffs] - else: # pragma: no cover - assert False - - # choose optimal one - optimal = np.argmin(distances) - df, dg = diffs[optimal] - - # apply perturbation - # the (-dg) corrects the sign, gradient here is -gradient of paper - if p == 2: - perturbation = abs(df) / (np.linalg.norm(dg) + 1e-8)**2 * (-dg) - elif p == np.inf: - perturbation = abs(df) / (np.sum(np.abs(dg)) + 1e-8) \ - * np.sign(-dg) - else: # pragma: no cover - assert False - - # the original implementation accumulates the perturbations - # and only adds the overshoot when adding the accumulated - # perturbation to the original image; we apply the overshoot - # to each perturbation (step) - perturbed = perturbed + 1.05 * perturbation - perturbed = np.clip(perturbed, min_, max_) - - a.predictions(perturbed) # to find an adversarial in the last step - - -class DeepFoolL2Attack(DeepFoolAttack): - def __call__(self, input_or_adv, label=None, unpack=True, - steps=100, subsample=10): - return super(DeepFoolL2Attack, self).__call__( - input_or_adv, label=label, unpack=unpack, - steps=steps, subsample=subsample, p=2) - - -class DeepFoolLinfinityAttack(DeepFoolAttack): - def __call__(self, input_or_adv, label=None, unpack=True, - steps=100, subsample=10): - return super(DeepFoolLinfinityAttack, self).__call__( - input_or_adv, label=label, unpack=unpack, - steps=steps, subsample=subsample, p=np.inf) -from __future__ import division -import numpy as np -from collections import Iterable -import logging -import abc - -from .base import Attack -from .base import call_decorator - - -class SingleStepGradientBaseAttack(Attack): - """Common base class for single step gradient attacks.""" - - @abc.abstractmethod - def _gradient(self, a): - raise NotImplementedError - - def _run(self, a, epsilons, max_epsilon): - if not a.has_gradient(): - return - - image = a.original_image - min_, max_ = a.bounds() - - gradient = self._gradient(a) - - if not isinstance(epsilons, Iterable): - epsilons = np.linspace(0, max_epsilon, num=epsilons + 1)[1:] - decrease_if_first = True - else: - decrease_if_first = False - - for _ in range(2): # to repeat with decreased epsilons if necessary - for i, epsilon in enumerate(epsilons): - perturbed = image + gradient * epsilon - perturbed = np.clip(perturbed, min_, max_) - - _, is_adversarial = a.predictions(perturbed) - if is_adversarial: - if decrease_if_first and i < 20: - logging.info('repeating attack with smaller epsilons') - break - return - - max_epsilon = epsilons[i] - epsilons = np.linspace(0, max_epsilon, num=20 + 1)[1:] - - -class GradientAttack(SingleStepGradientBaseAttack): - """Perturbs the image with the gradient of the loss w.r.t. the image, - gradually increasing the magnitude until the image is misclassified. - - Does not do anything if the model does not have a gradient. - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - epsilons=1000, max_epsilon=1): - """Perturbs the image with the gradient of the loss w.r.t. the image, - gradually increasing the magnitude until the image is misclassified. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - epsilons : int or Iterable[float] - Either Iterable of step sizes in the gradient direction - or number of step sizes between 0 and max_epsilon that should - be tried. - max_epsilon : float - Largest step size if epsilons is not an iterable. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - return self._run(a, epsilons=epsilons, max_epsilon=max_epsilon) - - def _gradient(self, a): - min_, max_ = a.bounds() - gradient = a.gradient() - gradient_norm = np.sqrt(np.mean(np.square(gradient))) - gradient = gradient / (gradient_norm + 1e-8) * (max_ - min_) - return gradient - - -class GradientSignAttack(SingleStepGradientBaseAttack): - """Adds the sign of the gradient to the image, gradually increasing - the magnitude until the image is misclassified. This attack is - often referred to as Fast Gradient Sign Method and was introduced - in [1]_. - - Does not do anything if the model does not have a gradient. - - References - ---------- - .. [1] Ian J. Goodfellow, Jonathon Shlens, Christian Szegedy, - "Explaining and Harnessing Adversarial Examples", - https://arxiv.org/abs/1412.6572 - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - epsilons=1000, max_epsilon=1): - """Adds the sign of the gradient to the image, gradually increasing - the magnitude until the image is misclassified. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - epsilons : int or Iterable[float] - Either Iterable of step sizes in the direction of the sign of - the gradient or number of step sizes between 0 and max_epsilon - that should be tried. - max_epsilon : float - Largest step size if epsilons is not an iterable. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - return self._run(a, epsilons=epsilons, max_epsilon=max_epsilon) - - def _gradient(self, a): - min_, max_ = a.bounds() - gradient = a.gradient() - gradient = np.sign(gradient) * (max_ - min_) - return gradient - - -FGSM = GradientSignAttack -from __future__ import division -import numpy as np -from collections import Iterable -import abc -import logging - -from .base import Attack -from .base import call_decorator - - -class IterativeGradientBaseAttack(Attack): - """Common base class for iterative gradient attacks.""" - - @abc.abstractmethod - def _gradient(self, a, x): - raise NotImplementedError - - def _run(self, a, epsilons, max_epsilon, steps): - logging.warning('Please consider using the L2BasicIterativeAttack,' - ' the LinfinityBasicIterativeAttack or one of its' - ' other variants such as the ProjectedGradientDescent' - ' attack.') - if not a.has_gradient(): - return - - image = a.original_image - min_, max_ = a.bounds() - - if not isinstance(epsilons, Iterable): - assert isinstance(epsilons, int) - max_epsilon_iter = max_epsilon / steps - epsilons = np.linspace(0, max_epsilon_iter, num=epsilons + 1)[1:] - - for epsilon in epsilons: - perturbed = image - - for _ in range(steps): - gradient = self._gradient(a, perturbed) - - perturbed = perturbed + gradient * epsilon - perturbed = np.clip(perturbed, min_, max_) - - a.predictions(perturbed) - # we don't return early if an adversarial was found - # because there might be a different epsilon - # and/or step that results in a better adversarial - - -class IterativeGradientAttack(IterativeGradientBaseAttack): - """Like GradientAttack but with several steps for each epsilon. - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - epsilons=100, max_epsilon=1, steps=10): - """Like GradientAttack but with several steps for each epsilon. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - epsilons : int or Iterable[float] - Either Iterable of step sizes in the gradient direction - or number of step sizes between 0 and max_epsilon that should - be tried. - max_epsilon : float - Largest step size if epsilons is not an iterable. - steps : int - Number of iterations to run. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - self._run(a, epsilons=epsilons, max_epsilon=max_epsilon, steps=steps) - - def _gradient(self, a, x): - min_, max_ = a.bounds() - gradient = a.gradient(x) - gradient_norm = np.sqrt(np.mean(np.square(gradient))) - gradient = gradient / (gradient_norm + 1e-8) * (max_ - min_) - return gradient - - -class IterativeGradientSignAttack(IterativeGradientBaseAttack): - """Like GradientSignAttack but with several steps for each epsilon. - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - epsilons=100, max_epsilon=1, steps=10): - """Like GradientSignAttack but with several steps for each epsilon. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - epsilons : int or Iterable[float] - Either Iterable of step sizes in the direction of the sign of - the gradient or number of step sizes between 0 and max_epsilon - that should be tried. - max_epsilon : float - Largest step size if epsilons is not an iterable. - steps : int - Number of iterations to run. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - self._run(a, epsilons=epsilons, max_epsilon=max_epsilon, steps=steps) - - def _gradient(self, a, x): - min_, max_ = a.bounds() - gradient = a.gradient(x) - gradient = np.sign(gradient) * (max_ - min_) - return gradient -from __future__ import division -import numpy as np -from abc import abstractmethod -import logging -import warnings - -from .base import Attack -from .base import call_decorator -from .. import distances -from ..utils import crossentropy -from .. import nprng - - -class IterativeProjectedGradientBaseAttack(Attack): - """Base class for iterative (projected) gradient attacks. - - Concrete subclasses should implement __call__, _gradient - and _clip_perturbation. - - TODO: add support for other loss-functions, e.g. the CW loss function, - see https://github.com/MadryLab/mnist_challenge/blob/master/pgd_attack.py - """ - - @abstractmethod - def _gradient(self, a, x, class_, strict=True): - raise NotImplementedError - - @abstractmethod - def _clip_perturbation(self, a, noise, epsilon): - raise NotImplementedError - - @abstractmethod - def _check_distance(self, a): - raise NotImplementedError - - def _get_mode_and_class(self, a): - # determine if the attack is targeted or not - target_class = a.target_class() - targeted = target_class is not None - - if targeted: - class_ = target_class - else: - class_ = a.original_class - return targeted, class_ - - def _run(self, a, binary_search, - epsilon, stepsize, iterations, - random_start, return_early): - if not a.has_gradient(): - warnings.warn('applied gradient-based attack to model that' - ' does not provide gradients') - return - - self._check_distance(a) - - targeted, class_ = self._get_mode_and_class(a) - - if binary_search: - if isinstance(binary_search, bool): - k = 20 - else: - k = int(binary_search) - return self._run_binary_search( - a, epsilon, stepsize, iterations, - random_start, targeted, class_, return_early, k=k) - else: - return self._run_one( - a, epsilon, stepsize, iterations, - random_start, targeted, class_, return_early) - - def _run_binary_search(self, a, epsilon, stepsize, iterations, - random_start, targeted, class_, return_early, k): - - factor = stepsize / epsilon - - def try_epsilon(epsilon): - stepsize = factor * epsilon - return self._run_one( - a, epsilon, stepsize, iterations, - random_start, targeted, class_, return_early) - - for i in range(k): - if try_epsilon(epsilon): - logging.info('successful for eps = {}'.format(epsilon)) - break - logging.info('not successful for eps = {}'.format(epsilon)) - epsilon = epsilon * 1.5 - else: - logging.warning('exponential search failed') - return - - bad = 0 - good = epsilon - - for i in range(k): - epsilon = (good + bad) / 2 - if try_epsilon(epsilon): - good = epsilon - logging.info('successful for eps = {}'.format(epsilon)) - else: - bad = epsilon - logging.info('not successful for eps = {}'.format(epsilon)) - - def _run_one(self, a, epsilon, stepsize, iterations, - random_start, targeted, class_, return_early): - min_, max_ = a.bounds() - s = max_ - min_ - - original = a.original_image.copy() - - if random_start: - # using uniform noise even if the perturbation clipping uses - # a different norm because cleverhans does it the same way - noise = nprng.uniform( - -epsilon * s, epsilon * s, original.shape).astype( - original.dtype) - x = original + self._clip_perturbation(a, noise, epsilon) - strict = False # because we don't enforce the bounds here - else: - x = original - strict = True - - success = False - for _ in range(iterations): - gradient = self._gradient(a, x, class_, strict=strict) - # non-strict only for the first call and - # only if random_start is True - strict = True - if targeted: - gradient = -gradient - - # untargeted: gradient ascent on cross-entropy to original class - # targeted: gradient descent on cross-entropy to target class - x = x + stepsize * gradient - - x = original + self._clip_perturbation(a, x - original, epsilon) - - x = np.clip(x, min_, max_) - - logits, is_adversarial = a.predictions(x) - if logging.getLogger().isEnabledFor(logging.DEBUG): - if targeted: - ce = crossentropy(a.original_class, logits) - logging.debug('crossentropy to {} is {}'.format( - a.original_class, ce)) - ce = crossentropy(class_, logits) - logging.debug('crossentropy to {} is {}'.format(class_, ce)) - if is_adversarial: - if return_early: - return True - else: - success = True - return success - - -class LinfinityGradientMixin(object): - def _gradient(self, a, x, class_, strict=True): - gradient = a.gradient(x, class_, strict=strict) - gradient = np.sign(gradient) - min_, max_ = a.bounds() - gradient = (max_ - min_) * gradient - return gradient - - -class L1GradientMixin(object): - def _gradient(self, a, x, class_, strict=True): - gradient = a.gradient(x, class_, strict=strict) - # using mean to make range of epsilons comparable to Linf - gradient = gradient / np.mean(np.abs(gradient)) - min_, max_ = a.bounds() - gradient = (max_ - min_) * gradient - return gradient - - -class L2GradientMixin(object): - def _gradient(self, a, x, class_, strict=True): - gradient = a.gradient(x, class_, strict=strict) - # using mean to make range of epsilons comparable to Linf - gradient = gradient / np.sqrt(np.mean(np.square(gradient))) - min_, max_ = a.bounds() - gradient = (max_ - min_) * gradient - return gradient - - -class LinfinityClippingMixin(object): - def _clip_perturbation(self, a, perturbation, epsilon): - min_, max_ = a.bounds() - s = max_ - min_ - clipped = np.clip(perturbation, -epsilon * s, epsilon * s) - return clipped - - -class L1ClippingMixin(object): - def _clip_perturbation(self, a, perturbation, epsilon): - # using mean to make range of epsilons comparable to Linf - norm = np.mean(np.abs(perturbation)) - norm = max(1e-12, norm) # avoid divsion by zero - min_, max_ = a.bounds() - s = max_ - min_ - # clipping, i.e. only decreasing norm - factor = min(1, epsilon * s / norm) - return perturbation * factor - - -class L2ClippingMixin(object): - def _clip_perturbation(self, a, perturbation, epsilon): - # using mean to make range of epsilons comparable to Linf - norm = np.sqrt(np.mean(np.square(perturbation))) - norm = max(1e-12, norm) # avoid divsion by zero - min_, max_ = a.bounds() - s = max_ - min_ - # clipping, i.e. only decreasing norm - factor = min(1, epsilon * s / norm) - return perturbation * factor - - -class LinfinityDistanceCheckMixin(object): - def _check_distance(self, a): - if not isinstance(a.distance, distances.Linfinity): - logging.warning('Running an attack that tries to minimize the' - ' Linfinity norm of the perturbation without' - ' specifying foolbox.distances.Linfinity as' - ' the distance metric might lead to suboptimal' - ' results.') - - -class L1DistanceCheckMixin(object): - def _check_distance(self, a): - if not isinstance(a.distance, distances.MAE): - logging.warning('Running an attack that tries to minimize the' - ' L1 norm of the perturbation without' - ' specifying foolbox.distances.MAE as' - ' the distance metric might lead to suboptimal' - ' results.') - - -class L2DistanceCheckMixin(object): - def _check_distance(self, a): - if not isinstance(a.distance, distances.MSE): - logging.warning('Running an attack that tries to minimize the' - ' L2 norm of the perturbation without' - ' specifying foolbox.distances.MSE as' - ' the distance metric might lead to suboptimal' - ' results.') - - -class LinfinityBasicIterativeAttack( - LinfinityGradientMixin, - LinfinityClippingMixin, - LinfinityDistanceCheckMixin, - IterativeProjectedGradientBaseAttack): - - """The Basic Iterative Method introduced in [1]_. - - This attack is also known as Projected Gradient - Descent (PGD) (without random start) or FGMS^k. - - References - ---------- - .. [1] Alexey Kurakin, Ian Goodfellow, Samy Bengio, - "Adversarial examples in the physical world", - https://arxiv.org/abs/1607.02533 - - .. seealso:: :class:`ProjectedGradientDescentAttack` - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - binary_search=True, - epsilon=0.3, - stepsize=0.05, - iterations=10, - random_start=False, - return_early=True): - """Simple iterative gradient-based attack known as - Basic Iterative Method, Projected Gradient Descent or FGSM^k. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - binary_search : bool or int - Whether to perform a binary search over epsilon and stepsize, - keeping their ratio constant and using their values to start - the search. If False, hyperparameters are not optimized. - Can also be an integer, specifying the number of binary - search steps (default 20). - epsilon : float - Limit on the perturbation size; if binary_search is True, - this value is only for initialization and automatically - adapted. - stepsize : float - Step size for gradient descent; if binary_search is True, - this value is only for initialization and automatically - adapted. - iterations : int - Number of iterations for each gradient descent run. - random_start : bool - Start the attack from a random point rather than from the - original input. - return_early : bool - Whether an individual gradient descent run should stop as - soon as an adversarial is found. - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - assert epsilon > 0 - - self._run(a, binary_search, - epsilon, stepsize, iterations, - random_start, return_early) - - -BasicIterativeMethod = LinfinityBasicIterativeAttack -BIM = BasicIterativeMethod - - -class L1BasicIterativeAttack( - L1GradientMixin, - L1ClippingMixin, - L1DistanceCheckMixin, - IterativeProjectedGradientBaseAttack): - - """Modified version of the Basic Iterative Method - that minimizes the L1 distance. - - .. seealso:: :class:`LinfinityBasicIterativeAttack` - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - binary_search=True, - epsilon=0.3, - stepsize=0.05, - iterations=10, - random_start=False, - return_early=True): - """Simple iterative gradient-based attack known as - Basic Iterative Method, Projected Gradient Descent or FGSM^k. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - binary_search : bool or int - Whether to perform a binary search over epsilon and stepsize, - keeping their ratio constant and using their values to start - the search. If False, hyperparameters are not optimized. - Can also be an integer, specifying the number of binary - search steps (default 20). - epsilon : float - Limit on the perturbation size; if binary_search is True, - this value is only for initialization and automatically - adapted. - stepsize : float - Step size for gradient descent; if binary_search is True, - this value is only for initialization and automatically - adapted. - iterations : int - Number of iterations for each gradient descent run. - random_start : bool - Start the attack from a random point rather than from the - original input. - return_early : bool - Whether an individual gradient descent run should stop as - soon as an adversarial is found. - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - assert epsilon > 0 - - self._run(a, binary_search, - epsilon, stepsize, iterations, - random_start, return_early) - - -class L2BasicIterativeAttack( - L2GradientMixin, - L2ClippingMixin, - L2DistanceCheckMixin, - IterativeProjectedGradientBaseAttack): - - """Modified version of the Basic Iterative Method - that minimizes the L2 distance. - - .. seealso:: :class:`LinfinityBasicIterativeAttack` - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - binary_search=True, - epsilon=0.3, - stepsize=0.05, - iterations=10, - random_start=False, - return_early=True): - """Simple iterative gradient-based attack known as - Basic Iterative Method, Projected Gradient Descent or FGSM^k. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - binary_search : bool or int - Whether to perform a binary search over epsilon and stepsize, - keeping their ratio constant and using their values to start - the search. If False, hyperparameters are not optimized. - Can also be an integer, specifying the number of binary - search steps (default 20). - epsilon : float - Limit on the perturbation size; if binary_search is True, - this value is only for initialization and automatically - adapted. - stepsize : float - Step size for gradient descent; if binary_search is True, - this value is only for initialization and automatically - adapted. - iterations : int - Number of iterations for each gradient descent run. - random_start : bool - Start the attack from a random point rather than from the - original input. - return_early : bool - Whether an individual gradient descent run should stop as - soon as an adversarial is found. - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - assert epsilon > 0 - - self._run(a, binary_search, - epsilon, stepsize, iterations, - random_start, return_early) - - -class ProjectedGradientDescentAttack( - LinfinityGradientMixin, - LinfinityClippingMixin, - LinfinityDistanceCheckMixin, - IterativeProjectedGradientBaseAttack): - - """The Projected Gradient Descent Attack - introduced in [1]_ without random start. - - When used without a random start, this attack - is also known as Basic Iterative Method (BIM) - or FGSM^k. - - References - ---------- - .. [1] Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, - Dimitris Tsipras, Adrian Vladu, "Towards Deep Learning - Models Resistant to Adversarial Attacks", - https://arxiv.org/abs/1706.06083 - - .. seealso:: - - :class:`LinfinityBasicIterativeAttack` and - :class:`RandomStartProjectedGradientDescentAttack` - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - binary_search=True, - epsilon=0.3, - stepsize=0.01, - iterations=40, - random_start=False, - return_early=True): - """Simple iterative gradient-based attack known as - Basic Iterative Method, Projected Gradient Descent or FGSM^k. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - binary_search : bool or int - Whether to perform a binary search over epsilon and stepsize, - keeping their ratio constant and using their values to start - the search. If False, hyperparameters are not optimized. - Can also be an integer, specifying the number of binary - search steps (default 20). - epsilon : float - Limit on the perturbation size; if binary_search is True, - this value is only for initialization and automatically - adapted. - stepsize : float - Step size for gradient descent; if binary_search is True, - this value is only for initialization and automatically - adapted. - iterations : int - Number of iterations for each gradient descent run. - random_start : bool - Start the attack from a random point rather than from the - original input. - return_early : bool - Whether an individual gradient descent run should stop as - soon as an adversarial is found. - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - assert epsilon > 0 - - self._run(a, binary_search, - epsilon, stepsize, iterations, - random_start, return_early) - - -ProjectedGradientDescent = ProjectedGradientDescentAttack -PGD = ProjectedGradientDescent - - -class RandomStartProjectedGradientDescentAttack( - LinfinityGradientMixin, - LinfinityClippingMixin, - LinfinityDistanceCheckMixin, - IterativeProjectedGradientBaseAttack): - - """The Projected Gradient Descent Attack - introduced in [1]_ with random start. - - References - ---------- - .. [1] Aleksander Madry, Aleksandar Makelov, Ludwig Schmidt, - Dimitris Tsipras, Adrian Vladu, "Towards Deep Learning - Models Resistant to Adversarial Attacks", - https://arxiv.org/abs/1706.06083 - - .. seealso:: :class:`ProjectedGradientDescentAttack` - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - binary_search=True, - epsilon=0.3, - stepsize=0.01, - iterations=40, - random_start=True, - return_early=True): - """Simple iterative gradient-based attack known as - Basic Iterative Method, Projected Gradient Descent or FGSM^k. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - binary_search : bool or int - Whether to perform a binary search over epsilon and stepsize, - keeping their ratio constant and using their values to start - the search. If False, hyperparameters are not optimized. - Can also be an integer, specifying the number of binary - search steps (default 20). - epsilon : float - Limit on the perturbation size; if binary_search is True, - this value is only for initialization and automatically - adapted. - stepsize : float - Step size for gradient descent; if binary_search is True, - this value is only for initialization and automatically - adapted. - iterations : int - Number of iterations for each gradient descent run. - random_start : bool - Start the attack from a random point rather than from the - original input. - return_early : bool - Whether an individual gradient descent run should stop as - soon as an adversarial is found. - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - assert epsilon > 0 - - self._run(a, binary_search, - epsilon, stepsize, iterations, - random_start, return_early) - - -RandomProjectedGradientDescent = RandomStartProjectedGradientDescentAttack -RandomPGD = RandomProjectedGradientDescent - - -class MomentumIterativeAttack( - LinfinityClippingMixin, - LinfinityDistanceCheckMixin, - IterativeProjectedGradientBaseAttack): - - """The Momentum Iterative Method attack - introduced in [1]_. It's like the Basic - Iterative Method or Projected Gradient - Descent except that it uses momentum. - - References - ---------- - .. [1] Yinpeng Dong, Fangzhou Liao, Tianyu Pang, Hang Su, - Jun Zhu, Xiaolin Hu, Jianguo Li, "Boosting Adversarial - Attacks with Momentum", - https://arxiv.org/abs/1710.06081 - - """ - - def _gradient(self, a, x, class_, strict=True): - # get current gradient - gradient = a.gradient(x, class_, strict=strict) - gradient = gradient / max(1e-12, np.mean(np.abs(gradient))) - - # combine with history of gradient as new history - self._momentum_history = \ - self._decay_factor * self._momentum_history + gradient - - # use history - gradient = self._momentum_history - gradient = np.sign(gradient) - min_, max_ = a.bounds() - gradient = (max_ - min_) * gradient - return gradient - - def _run_one(self, *args, **kwargs): - # reset momentum history every time we restart - # gradient descent - self._momentum_history = 0 - return super(MomentumIterativeAttack, self)._run_one(*args, **kwargs) - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - binary_search=True, - epsilon=0.3, - stepsize=0.06, - iterations=10, - decay_factor=1.0, - random_start=False, - return_early=True): - """Momentum-based iterative gradient attack known as - Momentum Iterative Method. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - binary_search : bool - Whether to perform a binary search over epsilon and stepsize, - keeping their ratio constant and using their values to start - the search. If False, hyperparameters are not optimized. - Can also be an integer, specifying the number of binary - search steps (default 20). - epsilon : float - Limit on the perturbation size; if binary_search is True, - this value is only for initialization and automatically - adapted. - stepsize : float - Step size for gradient descent; if binary_search is True, - this value is only for initialization and automatically - adapted. - iterations : int - Number of iterations for each gradient descent run. - decay_factor : float - Decay factor used by the momentum term. - random_start : bool - Start the attack from a random point rather than from the - original input. - return_early : bool - Whether an individual gradient descent run should stop as - soon as an adversarial is found. - """ - a = input_or_adv - del input_or_adv - del label - del unpack - - assert epsilon > 0 - - self._decay_factor = decay_factor - - self._run(a, binary_search, - epsilon, stepsize, iterations, - random_start, return_early) - - -MomentumIterativeMethod = MomentumIterativeAttack -from __future__ import division -import logging - -import numpy as np -import scipy.optimize as so - -from .base import Attack -from .base import call_decorator -from .gradient import GradientAttack -from ..utils import crossentropy as utils_ce -from .. import rng - - -class LBFGSAttack(Attack): - """Uses L-BFGS-B to minimize the distance between the image and the adversarial - as well as the cross-entropy between the predictions for the adversarial - and the the one-hot encoded target class. - - If the criterion does not have a target class, a random class is chosen - from the set of all classes except the original one. - - Notes - ----- - This implementation generalizes algorithm 1 in [1]_ to support other - targeted criteria and other distance measures. - - References - ---------- - - .. [1] https://arxiv.org/abs/1510.05328 - - """ - - def __init__(self, *args, **kwargs): - if 'approximate_gradient' in kwargs: - self._approximate_gradient = kwargs['approximate_gradient'] - del kwargs['approximate_gradient'] - super(LBFGSAttack, self).__init__(*args, **kwargs) - else: - self._approximate_gradient = False - super(LBFGSAttack, self).__init__(*args, **kwargs) - - def name(self): - prefix = 'Approximate' if self._approximate_gradient else '' - return '{}{}'.format(prefix, self.__class__.__name__) - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - epsilon=1e-5, - num_random_targets=0, - maxiter=150): - """Uses L-BFGS-B to minimize the distance between the image and the - adversarial as well as the cross-entropy between the predictions for - the adversarial and the the one-hot encoded target class. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - epsilon : float - Epsilon of the binary search. - num_random_targets : int - Number of random target classes if no target class is given - by the criterion. - maxiter : int - Maximum number of iterations for L-BFGS-B. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - if not self._approximate_gradient and not a.has_gradient(): - return - - original_class = a.original_class - - target_class = a.target_class() - if target_class is None: - if num_random_targets == 0 and self._approximate_gradient: - num_random_targets = 1 - - if num_random_targets == 0: - gradient_attack = GradientAttack() - gradient_attack(a) - adv_img = a.image - if adv_img is None: # pragma: no coverage - # using GradientAttack did not work, - # falling back to random target - num_random_targets = 1 - logging.warning('Using GradientAttack to determine a target class failed, falling back to a random target class') # noqa: E501 - else: - logits, _ = a.predictions(adv_img) - target_class = np.argmax(logits) - target_classes = [target_class] - logging.info('Determined a target class using the GradientAttack: {}'.format(target_class)) # noqa: E501 - - if num_random_targets > 0: - - # draw num_random_targets random classes all of which are - # different and not the original class - - num_classes = a.num_classes() - assert num_random_targets <= num_classes - 1 - - # sample one more than necessary - # remove original class from samples - # should be more efficient than other approaches, see - # https://github.com/numpy/numpy/issues/2764 - target_classes = rng.sample( - range(num_classes), num_random_targets + 1) - target_classes = [t for t in target_classes if t != original_class] # noqa: E501 - target_classes = target_classes[:num_random_targets] - - str_target_classes = [str(t) for t in target_classes] - logging.info('Random target classes: {}'.format(', '.join(str_target_classes))) # noqa: E501 - else: - target_classes = [target_class] - - # avoid mixing GradientAttack and LBFGS Attack - a._reset() - - for i, target_class in enumerate(target_classes): - self._optimize( - a, target_class, - epsilon=epsilon, maxiter=maxiter) - - if len(target_classes) > 1: # pragma: no coverage - logging.info('Best adversarial distance after {} target classes: {}'.format(i + 1, a.distance)) # noqa: E501 - - def _optimize(self, a, target_class, epsilon, maxiter): - image = a.original_image - min_, max_ = a.bounds() - - # store the shape for later and operate on the flattened image - shape = image.shape - dtype = image.dtype - image = image.flatten().astype(np.float64) - - n = len(image) - bounds = [(min_, max_)] * n - - x0 = image - - if self._approximate_gradient: - - def distance(x): - d = a.normalized_distance(x.reshape(shape)) - return d.value - - def crossentropy(x): - # lbfgs with approx grad does not seem to respect the bounds - # setting strict to False - logits, _ = a.predictions(x.reshape(shape), strict=False) - ce = utils_ce(logits=logits, label=target_class) - return ce - - def loss(x, c): - x = x.astype(dtype) - v1 = distance(x) - v2 = crossentropy(x) - return np.float64(v1 + c * v2) - - else: - - def distance(x): - d = a.normalized_distance(x.reshape(shape)) - return d.value, d.gradient.reshape(-1) - - def crossentropy(x): - logits, gradient, _ = a.predictions_and_gradient( - x.reshape(shape), target_class, strict=False) - gradient = gradient.reshape(-1) - ce = utils_ce(logits=logits, label=target_class) - return ce, gradient - - def loss(x, c): - x = x.astype(dtype) - v1, g1 = distance(x) - v2, g2 = crossentropy(x) - v = v1 + c * v2 - g = g1 + c * g2 - - a = 1e10 - return np.float64(a * v), np.float64(a * g) - - def lbfgsb(c): - approx_grad_eps = (max_ - min_) / 100 - x, f, d = so.fmin_l_bfgs_b( - loss, - x0, - args=(c,), - approx_grad=self._approximate_gradient, - bounds=bounds, - m=15, - maxiter=maxiter, - epsilon=approx_grad_eps) - - logging.info(d) - - # LBFGS-B does not always exactly respect the boundaries - if np.amax(x) > max_ or np.amin(x) < min_: # pragma: no coverage - logging.info('Image out of bounds (min, max = {}, {}). Performing manual clip.'.format(np.amin(x), np.amax(x))) # noqa: E501 - x = np.clip(x, min_, max_) - - _, is_adversarial = a.predictions(x.reshape(shape).astype(dtype)) - return is_adversarial - - # finding initial c - c = epsilon - for i in range(30): - c = 2 * c - is_adversarial = lbfgsb(c) - logging.info('Tested c = {:.4e}: {}'.format( - c, - ('adversarial' if is_adversarial else 'not adversarial'))) - if is_adversarial: - break - else: # pragma: no cover - logging.info('Could not find an adversarial; maybe the model returns wrong gradients') # noqa: E501 - return - - # binary search - c_low = 0 - c_high = c - while c_high - c_low >= epsilon: - c_half = (c_low + c_high) / 2 - is_adversarial = lbfgsb(c_half) - logging.info('Tested c = {:.4e}: {} ({:.4e}, {:.4e})'.format( - c_half, - ('adversarial' if is_adversarial else 'not adversarial'), - c_low, - c_high)) - if is_adversarial: - c_high = c_half - else: - c_low = c_half - - -class ApproximateLBFGSAttack(LBFGSAttack): - """Same as :class:`LBFGSAttack` with approximate_gradient set to True. - - """ - - def __init__(self, *args, **kwargs): - assert 'approximate_gradient' not in kwargs - kwargs['approximate_gradient'] = True - super(ApproximateLBFGSAttack, self).__init__(*args, **kwargs) -from __future__ import division -import numpy as np - -from .base import Attack -from .base import call_decorator -from ..utils import softmax -from .. import nprng - - -class SinglePixelAttack(Attack): - """Perturbs just a single pixel and sets it to the min or max.""" - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - max_pixels=1000): - """Perturbs just a single pixel and sets it to the min or max. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, correctly classified image. If image is a - numpy array, label must be passed as well. If image is - an :class:`Adversarial` instance, label must not be passed. - label : int - The reference label of the original image. Must be passed - if image is a numpy array, must not be passed if image is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial image, otherwise returns - the Adversarial object. - max_pixels : int - Maximum number of pixels to try. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - channel_axis = a.channel_axis(batch=False) - image = a.original_image - axes = [i for i in range(image.ndim) if i != channel_axis] - assert len(axes) == 2 - h = image.shape[axes[0]] - w = image.shape[axes[1]] - - min_, max_ = a.bounds() - - pixels = nprng.permutation(h * w) - pixels = pixels[:max_pixels] - for i, pixel in enumerate(pixels): - x = pixel % w - y = pixel // w - - location = [x, y] - location.insert(channel_axis, slice(None)) - location = tuple(location) - - for value in [min_, max_]: - perturbed = image.copy() - perturbed[location] = value - - _, is_adv = a.predictions(perturbed) - if is_adv: - return - - -class LocalSearchAttack(Attack): - """A black-box attack based on the idea of greedy local search. - - This implementation is based on the algorithm in [1]_. - - References - ---------- - .. [1] Nina Narodytska, Shiva Prasad Kasiviswanathan, "Simple - Black-Box Adversarial Perturbations for Deep Networks", - https://arxiv.org/abs/1612.06299 - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - r=1.5, p=10., d=5, t=5, R=150): - """A black-box attack based on the idea of greedy local search. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, correctly classified image. If image is a - numpy array, label must be passed as well. If image is - an :class:`Adversarial` instance, label must not be passed. - label : int - The reference label of the original image. Must be passed - if image is a numpy array, must not be passed if image is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial image, otherwise returns - the Adversarial object. - r : float - Perturbation parameter that controls the cyclic perturbation; - must be in [0, 2] - p : float - Perturbation parameter that controls the pixel sensitivity - estimation - d : int - The half side length of the neighborhood square - t : int - The number of pixels perturbed at each round - R : int - An upper bound on the number of iterations - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - # TODO: incorporate the modifications mentioned in the manuscript - # under "Implementing Algorithm LocSearchAdv" - - assert 0 <= r <= 2 - - if a.target_class() is not None: - # TODO: check if this algorithm can be used as a targeted attack - return - - def normalize(im): - min_, max_ = a.bounds() - - im = im - (min_ + max_) / 2 - im = im / (max_ - min_) - - LB = -1 / 2 - UB = 1 / 2 - return im, LB, UB - - def unnormalize(im): - min_, max_ = a.bounds() - - im = im * (max_ - min_) - im = im + (min_ + max_) / 2 - return im - - Im = a.original_image - Im, LB, UB = normalize(Im) - - cI = a.original_class - - channel_axis = a.channel_axis(batch=False) - axes = [i for i in range(Im.ndim) if i != channel_axis] - assert len(axes) == 2 - h = Im.shape[axes[0]] - w = Im.shape[axes[1]] - channels = Im.shape[channel_axis] - - def random_locations(): - n = int(0.1 * h * w) - n = min(n, 128) - locations = nprng.permutation(h * w)[:n] - p_x = locations % w - p_y = locations // w - pxy = list(zip(p_x, p_y)) - pxy = np.array(pxy) - return pxy - - def pert(Ii, p, x, y): - Im = Ii.copy() - location = [x, y] - location.insert(channel_axis, slice(None)) - location = tuple(location) - Im[location] = p * np.sign(Im[location]) - return Im - - def cyclic(r, Ibxy): - result = r * Ibxy - if result < LB: - result = result + (UB - LB) - elif result > UB: - result = result - (UB - LB) - assert LB <= result <= UB - return result - - Ii = Im - PxPy = random_locations() - - for _ in range(R): - # Computing the function g using the neighborhood - # IMPORTANT: random subset for efficiency - PxPy = PxPy[nprng.permutation(len(PxPy))[:128]] - L = [pert(Ii, p, x, y) for x, y in PxPy] - - def score(Its): - Its = np.stack(Its) - Its = unnormalize(Its) - batch_logits, _ = a.batch_predictions(Its, strict=False) - scores = [softmax(logits)[cI] for logits in batch_logits] - return scores - - scores = score(L) - - indices = np.argsort(scores)[:t] - - PxPy_star = PxPy[indices] - - # Generation of new perturbed image Ii - for x, y in PxPy_star: - for b in range(channels): - location = [x, y] - location.insert(channel_axis, b) - location = tuple(location) - Ii[location] = cyclic(r, Ii[location]) - - # Check whether the perturbed image Ii is an adversarial image - _, is_adv = a.predictions(unnormalize(Ii)) - if is_adv: # pragma: no cover - return - - # Update a neighborhood of pixel locations for the next round - PxPy = [ - (x, y) - for _a, _b in PxPy_star - for x in range(_a - d, _a + d + 1) - for y in range(_b - d, _b + d + 1)] - PxPy = [(x, y) for x, y in PxPy if 0 <= x < w and 0 <= y < h] - PxPy = list(set(PxPy)) - PxPy = np.array(PxPy) -import logging - -from .base import Attack -from .base import call_decorator -from ..utils import softmax - -import numpy as np - - -class NewtonFoolAttack(Attack): - """Implements the NewtonFool Attack. - - The attack was introduced in [1]_. - - References - ---------- - .. [1] Uyeong Jang et al., "Objective Metrics and Gradient Descent - Algorithms for Adversarial Examples in Machine Learning", - https://dl.acm.org/citation.cfm?id=3134635 - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - max_iter=100, - eta=0.01): - """ - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - max_iter : int - The maximum number of iterations. - eta : float - the eta coefficient - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - if not a.has_gradient(): - return - - if a.target_class() is not None: - logging.fatal('NewtonFool is an untargeted adversarial attack.') - return - - l2_norm = np.linalg.norm(a.original_image) - min_, max_ = a.bounds() - perturbed_image = a.original_image.copy() - - for i in range(max_iter): - - # (1) get the score and gradients - logits, gradients, is_adversarial = \ - a.predictions_and_gradient(perturbed_image) - - if is_adversarial: - return - - score = np.max(softmax(logits)) - # instead of using the logits and the gradient of the logits, - # we use a numerically stable implementation of the cross-entropy - # and expect that the deep learning frameworks also use such a - # stable implemenation to calculate the gradient - # grad is calculated from CE but we want softmax - # -> revert chain rule - gradients = -gradients / score - - # (2) calculate gradient norm - gradient_l2_norm = np.linalg.norm(gradients) - - # (3) calculate delta - delta = self._delta(eta, l2_norm, score, - gradient_l2_norm, a.num_classes()) - - # delta = 0.01 - - # (4) calculate & apply current pertubation - current_pertubation = self._perturbation(delta, - gradients, - gradient_l2_norm) - - perturbed_image += current_pertubation - perturbed_image = np.clip(perturbed_image, min_, max_) - - @staticmethod - def _delta(eta, norm, score, gradient_norm, num_classes): - a = eta * norm * gradient_norm - b = score - 1.0 / num_classes - return min(a, b) - - @staticmethod - def _perturbation(delta, gradients, gradient_norm): - direction = -((delta / (gradient_norm ** 2)) * gradients) - return direction -import warnings -import logging - -from .base import Attack -from .base import call_decorator -from .saltandpepper import SaltAndPepperNoiseAttack -from .. import rng - - -class PointwiseAttack(Attack): - """Starts with an adversarial and performs a binary search between - the adversarial and the original for each dimension of the input - individually. - - References - ---------- - .. [1] L. Schott, J. Rauber, M. Bethge, W. Brendel: "Towards the first - adversarially robust neural network model on MNIST", ICLR (2019) - https://arxiv.org/abs/1805.09190 - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - starting_point=None, initialization_attack=None): - """Starts with an adversarial and performs a binary search between - the adversarial and the original for each dimension of the input - individually. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - starting_point : `numpy.ndarray` - Adversarial input to use as a starting point, in particular - for targeted attacks. - initialization_attack : :class:`Attack` - Attack to use to find a starting point. Defaults to - SaltAndPepperNoiseAttack. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - self._starting_point = starting_point - self._initialization_attack = initialization_attack - self.initialize_starting_point(a) - - if a.image is None: - warnings.warn( - 'Initialization failed. If the criterion is targeted,' - ' it might be necessary to pass an explicit starting' - ' point or targeted initialization attack.') - return - - shape = a.original_image.shape - N = a.original_image.size - - original = a.original_image.reshape(-1) - x = a.image.copy().reshape(-1) - - assert original.dtype == x.dtype - - while True: - # draw random shuffling of all indices - indices = list(range(N)) - rng.shuffle(indices) - - for index in indices: - # change index - old_value = x[index] - new_value = original[index] - if old_value == new_value: - continue - x[index] = new_value - - # check if still adversarial - _, is_adversarial = a.predictions(x.reshape(shape)) - - # if adversarial, restart from there - if is_adversarial: - logging.info('Reset value to original -> new distance:' - ' {}'.format(a.distance)) - break - - # if not, undo change - x[index] = old_value - else: - # no index was succesful - break - - logging.info('Starting binary searches') - - while True: - # draw random shuffling of all indices - indices = list(range(N)) - rng.shuffle(indices) - - # whether that run through all values made any improvement - improved = False - - logging.info('Starting new loop through all values') - - for index in indices: - # change index - old_value = x[index] - original_value = original[index] - if old_value == original_value: - continue - x[index] = original_value - - # check if still adversarial - _, is_adversarial = a.predictions(x.reshape(shape)) - - # if adversarial, no binary search needed - if is_adversarial: # pragma: no cover - logging.info('Reset value at {} to original ->' - ' new distance: {}'.format( - index, a.distance)) - improved = True - else: - # binary search - adv_value = old_value - non_adv_value = original_value - best_adv_value = self.binary_search( - a, x, index, adv_value, non_adv_value, shape) - - if old_value != best_adv_value: - x[index] = best_adv_value - improved = True - logging.info('Set value at {} from {} to {}' - ' (original has {}) ->' - ' new distance: {}'.format( - index, old_value, best_adv_value, - original_value, a.distance)) - - if not improved: - # no improvement for any of the indices - break - - def binary_search(self, a, x, index, adv_value, non_adv_value, shape): - for i in range(10): - next_value = (adv_value + non_adv_value) / 2 - x[index] = next_value - _, is_adversarial = a.predictions(x.reshape(shape)) - if is_adversarial: - adv_value = next_value - else: - non_adv_value = next_value - return adv_value - - def initialize_starting_point(self, a): - starting_point = self._starting_point - init_attack = self._initialization_attack - - if a.image is not None: - if starting_point is not None: # pragma: no cover - warnings.warn( - 'Ignoring starting_point because the attack' - ' is applied to a previously found adversarial.') - if init_attack is not None: # pragma: no cover - warnings.warn( - 'Ignoring initialization_attack because the attack' - ' is applied to a previously found adversarial.') - return - - if starting_point is not None: - a.predictions(starting_point) - assert a.image is not None, ('Invalid starting point provided.' - ' Please provide a starting point' - ' that is adversarial.') - return - - if init_attack is None: - init_attack = SaltAndPepperNoiseAttack - logging.info( - 'Neither starting_point nor initialization_attack given.' - ' Falling back to {} for initialization.'.format( - init_attack.__name__)) - - if issubclass(init_attack, Attack): - # instantiate if necessary - init_attack = init_attack() - - init_attack(a) -import numpy as np - -from .base import Attack -from .base import call_decorator - - -class PrecomputedImagesAttack(Attack): - """Attacks a model using precomputed adversarial candidates. - - Parameters - ---------- - input_images : `numpy.ndarray` - The original images that will be expected by this attack. - output_images : `numpy.ndarray` - The adversarial candidates corresponding to the input_images. - *args : positional args - Poistional args passed to the `Attack` base class. - **kwargs : keyword args - Keyword args passed to the `Attack` base class. - """ - - def __init__(self, input_images, output_images, *args, **kwargs): - super(PrecomputedImagesAttack, self).__init__(*args, **kwargs) - - assert input_images.shape == output_images.shape - - self._input_images = input_images - self._output_images = output_images - - def _get_output(self, a, image): - """ Looks up the precomputed adversarial image for a given image. - - """ - sd = np.square(self._input_images - image) - mses = np.mean(sd, axis=tuple(range(1, sd.ndim))) - index = np.argmin(mses) - - # if we run into numerical problems with this approach, we might - # need to add a very tiny threshold here - if mses[index] > 0: - raise ValueError('No precomputed output image for this image') - return self._output_images[index] - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True): - """Attacks a model using precomputed adversarial candidates. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - image = a.original_image - adversarial = self._get_output(a, image) - a.predictions(adversarial) -import logging - -import numpy as np - -from .base import Attack -from .base import call_decorator -from .gradient import GradientAttack -from .. import rng - - -class SaliencyMapAttack(Attack): - """Implements the Saliency Map Attack. - - The attack was introduced in [1]_. - - References - ---------- - .. [1] Nicolas Papernot, Patrick McDaniel, Somesh Jha, Matt Fredrikson, - Z. Berkay Celik, Ananthram Swami, "The Limitations of Deep Learning - in Adversarial Settings", https://arxiv.org/abs/1511.07528 - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - max_iter=2000, - num_random_targets=0, - fast=True, - theta=0.1, - max_perturbations_per_pixel=7): - """Implements the Saliency Map Attack. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - max_iter : int - The maximum number of iterations to run. - num_random_targets : int - Number of random target classes if no target class is given - by the criterion. - fast : bool - Whether to use the fast saliency map calculation. - theta : float - perturbation per pixel relative to [min, max] range. - max_perturbations_per_pixel : int - Maximum number of times a pixel can be modified. - - """ - a = input_or_adv - del input_or_adv - del label - del unpack - - # TODO: the original algorithm works on pixels across channels! - - original_class = a.original_class - - target_class = a.target_class() - if target_class is None: - if num_random_targets == 0: - gradient_attack = GradientAttack() - gradient_attack(a) - adv_img = a.image - if adv_img is None: # pragma: no coverage - # using GradientAttack did not work, - # falling back to random target - num_random_targets = 1 - logging.info('Using GradientAttack to determine a target class failed, falling back to a random target class') # noqa: E501 - else: - logits, _ = a.predictions(adv_img) - target_class = np.argmax(logits) - target_classes = [target_class] - logging.info('Determined a target class using the GradientAttack: {}'.format(target_class)) # noqa: E501 - else: # pragma: no coverage - num_random_targets = 1 - - if num_random_targets > 0: - - # draw num_random_targets random classes all of which are - # different and not the original class - - num_classes = a.num_classes() - assert num_random_targets <= num_classes - 1 - - # sample one more than necessary - # remove original class from samples - # should be more efficient than other approaches, see - # https://github.com/numpy/numpy/issues/2764 - target_classes = rng.sample( - range(num_classes), num_random_targets + 1) - target_classes = [t for t in target_classes if t != original_class] # noqa: E501 - target_classes = target_classes[:num_random_targets] - - str_target_classes = [str(t) for t in target_classes] - logging.info('Random target classes: {}'.format(', '.join(str_target_classes))) # noqa: E501 - else: - target_classes = [target_class] - - # avoid mixing GradientAttack and SaliencyMapAttack - a._reset() - - for target in target_classes: - - image = a.original_image - - # the mask defines the search domain - # each modified pixel with border value is set to zero in mask - mask = np.ones_like(image) - - # count tracks how often each pixel was changed - counts = np.zeros_like(image) - - # TODO: shouldn't this be without target - labels = range(a.num_classes()) - - perturbed = image.copy() - - min_, max_ = a.bounds() - - # TODO: stop if mask is all zero - for step in range(max_iter): - _, is_adversarial = a.predictions(perturbed) - if is_adversarial: - return - - # get pixel location with highest influence on class - idx, p_sign = self._saliency_map( - a, perturbed, target, labels, mask, fast=fast) - - # apply perturbation - perturbed[idx] += -p_sign * theta * (max_ - min_) - - # tracks number of updates for each pixel - counts[idx] += 1 - - # remove pixel from search domain if it hits the bound - if perturbed[idx] <= min_ or perturbed[idx] >= max_: - mask[idx] = 0 - - # remove pixel if it was changed too often - if counts[idx] >= max_perturbations_per_pixel: - mask[idx] = 0 - - perturbed = np.clip(perturbed, min_, max_) - - def _saliency_map(self, a, image, target, labels, mask, fast=False): - """Implements Algorithm 3 in manuscript - - """ - - # pixel influence on target class - alphas = a.gradient(image, target) * mask - - # pixel influence on sum of residual classes - # (don't evaluate if fast == True) - if fast: - betas = -np.ones_like(alphas) - else: - betas = np.sum([ - a.gradient(image, label) * mask - alphas - for label in labels], 0) - - # compute saliency map - # (take into account both pos. & neg. perturbations) - salmap = np.abs(alphas) * np.abs(betas) * np.sign(alphas * betas) - - # find optimal pixel & direction of perturbation - idx = np.argmin(salmap) - idx = np.unravel_index(idx, mask.shape) - pix_sign = np.sign(alphas)[idx] - - return idx, pix_sign -import numpy as np - -from .base import Attack -from .base import call_decorator -from .. import nprng - - -class SaltAndPepperNoiseAttack(Attack): - """Increases the amount of salt and pepper noise until the - image is misclassified. - - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - epsilons=100, repetitions=10): - """Increases the amount of salt and pepper noise until the - image is misclassified. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - epsilons : int - Number of steps to try between probability 0 and 1. - repetitions : int - Specifies how often the attack will be repeated. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - image = a.original_image - min_, max_ = a.bounds() - axis = a.channel_axis(batch=False) - channels = image.shape[axis] - shape = list(image.shape) - shape[axis] = 1 - r = max_ - min_ - pixels = np.prod(shape) - - epsilons = min(epsilons, pixels) - max_epsilon = 1 - - for _ in range(repetitions): - for epsilon in np.linspace(0, max_epsilon, num=epsilons + 1)[1:]: - p = epsilon - - u = nprng.uniform(size=shape) - u = u.repeat(channels, axis=axis) - - salt = (u >= 1 - p / 2).astype(image.dtype) * r - pepper = -(u < p / 2).astype(image.dtype) * r - - perturbed = image + salt + pepper - perturbed = np.clip(perturbed, min_, max_) - - if a.normalized_distance(perturbed) >= a.distance: - continue - - _, is_adversarial = a.predictions(perturbed) - if is_adversarial: - # higher epsilon usually means larger perturbation, but - # this relationship is not strictly monotonic, so we set - # the new limit a bit higher than the best one so far - # but not larger than 1 - max_epsilon = min(1, epsilon * 1.2) - break -import scipy.optimize as so - -from .base import Attack -from .base import call_decorator -from .. import nprng - - -class SLSQPAttack(Attack): - """Uses SLSQP to minimize the distance between the image and the - adversarial under the constraint that the image is adversarial.""" - - # TODO: add support for criteria that are differentiable (if the network - # is differentiable) and use this to provide constraint gradients - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True): - """Uses SLSQP to minimize the distance between the image and the - adversarial under the constraint that the image is adversarial. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, correctly classified image. If image is a - numpy array, label must be passed as well. If image is - an :class:`Adversarial` instance, label must not be passed. - label : int - The reference label of the original image. Must be passed - if image is a numpy array, must not be passed if image is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial image, otherwise returns - the Adversarial object. - - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - image = a.original_image - dtype = a.original_image.dtype - min_, max_ = a.bounds() - - # flatten the image (and remember the shape) - shape = image.shape - n = image.size - image = image.flatten() - - x0 = nprng.uniform(min_, max_, size=image.shape) - bounds = [(min_, max_)] * n - options = {'maxiter': 500} - - def fun(x, *args): - """Objective function with derivative""" - distance = a.normalized_distance(x.reshape(shape)) - return distance.value, distance.gradient.reshape(-1) - - def eq_constraint(x, *args): - """Equality constraint""" - _, is_adv = a.predictions(x.reshape(shape).astype(dtype)) - if is_adv: - return 0. - else: - return 1. - - constraints = [ - { - 'type': 'eq', - 'fun': eq_constraint, - } - ] - - result = so.minimize( - fun, - x0, - method='SLSQP', - jac=True, - bounds=bounds, - constraints=constraints, - options=options) - - a.predictions(result.x.reshape(shape).astype(dtype)) -# -*- coding: utf-8 -*- -from __future__ import division - -import numpy as np -from itertools import product -from scipy.ndimage import rotate, shift -import operator - -from .base import Attack -from .base import call_decorator -from .. import nprng - - -class SpatialAttack(Attack): - """Adversarially chosen rotations and translations [1]. - - This implementation is based on the reference implementation by - Madry et al.: https://github.com/MadryLab/adversarial_spatial - - References - ---------- - .. [1] Logan Engstrom*, Brandon Tran*, Dimitris Tsipras*, - Ludwig Schmidt, Aleksander Mądry: "A Rotation and a - Translation Suffice: Fooling CNNs with Simple Transformations", - http://arxiv.org/abs/1712.02779 - """ - - @call_decorator - def __call__(self, input_or_adv, label=None, unpack=True, - do_rotations=True, do_translations=True, - x_shift_limits=(-5, 5), y_shift_limits=(-5, 5), - angular_limits=(-5, 5), granularity=10, - random_sampling=False, abort_early=True): - """Adversarially chosen rotations and translations. - - Parameters - ---------- - input_or_adv : `numpy.ndarray` or :class:`Adversarial` - The original, unperturbed input as a `numpy.ndarray` or - an :class:`Adversarial` instance. - label : int - The reference label of the original input. Must be passed - if `a` is a `numpy.ndarray`, must not be passed if `a` is - an :class:`Adversarial` instance. - unpack : bool - If true, returns the adversarial input, otherwise returns - the Adversarial object. - do_rotations : bool - If False no rotations will be applied to the image. - do_translations : bool - If False no translations will be applied to the image. - x_shift_limits : int or (int, int) - Limits for horizontal translations in pixels. If one integer is - provided the limits will be (-x_shift_limits, x_shift_limits). - y_shift_limits : int or (int, int) - Limits for vertical translations in pixels. If one integer is - provided the limits will be (-y_shift_limits, y_shift_limits). - angular_limits : int or (int, int) - Limits for rotations in degrees. If one integer is - provided the limits will be [-angular_limits, angular_limits]. - granularity : int - Density of sampling within limits for each dimension. - random_sampling : bool - If True we sample translations/rotations randomly within limits, - otherwise we use a regular grid. - abort_early : bool - If True, the attack stops as soon as it finds an adversarial. - """ - - a = input_or_adv - del input_or_adv - del label - del unpack - - min_, max_ = a.bounds() - channel_axis = a.channel_axis(batch=False) - - def get_samples(limits, num, do_flag): - # get regularly spaced or random samples within limits - lb, up = (-limits, limits) if isinstance(limits, int) else limits - - if not do_flag: - return [0] - elif random_sampling: - return nprng.uniform(lb, up, num) - else: - return np.linspace(lb, up, num) - - def crop_center(img): - # crop center of the image (of the size of the original image) - start = tuple(map(lambda a, da: (a - da) // 2, img.shape, - a.original_image.shape)) - end = tuple(map(operator.add, start, a.original_image.shape)) - slices = tuple(map(slice, start, end)) - return img[slices] - - x_shifts = get_samples(x_shift_limits, granularity, do_translations) - y_shifts = get_samples(y_shift_limits, granularity, do_translations) - rotations = get_samples(angular_limits, granularity, do_rotations) - - transformations = product(x_shifts, y_shifts, rotations) - - for x_shift, y_shift, angle in transformations: - if channel_axis == 0: - xy_shift = (0, x_shift, y_shift) - axes = (1, 2) - elif channel_axis == 2: - xy_shift = (x_shift, y_shift, 0) - axes = (0, 1) - else: # pragma: no cover - raise ValueError('SpatialAttack only supports models ' - 'and inputs with NCHW or NHWC format') - - # rotate image (increases size) - x = a.original_image - x = rotate(x, angle=angle, axes=axes, reshape=True, order=1) - - # translate image - x = shift(x, shift=xy_shift, mode='constant') - - # crop center - x = crop_center(x) - - # ensure values are in range - x = np.clip(x, min_, max_) - - # test image - _, is_adv = a.predictions(x) - - if abort_early and is_adv: - break -""" -Provides classes to wrap existing models in different framworks so -that they provide a unified API to the attacks. - -""" - -from .base import Model # noqa: F401 -from .base import DifferentiableModel # noqa: F401 - -from .wrappers import ModelWrapper # noqa: F401 -from .wrappers import DifferentiableModelWrapper # noqa: F401 -from .wrappers import ModelWithoutGradients # noqa: F401 -from .wrappers import ModelWithEstimatedGradients # noqa: F401 -from .wrappers import CompositeModel # noqa: F401 - -from .tensorflow import TensorFlowModel # noqa: F401 -from .tensorflow_eager import TensorFlowEagerModel # noqa: F401 -from .pytorch import PyTorchModel # noqa: F401 -from .keras import KerasModel # noqa: F401 -from .theano import TheanoModel # noqa: F401 -from .lasagne import LasagneModel # noqa: F401 -from .mxnet import MXNetModel # noqa: F401 -from .mxnet_gluon import MXNetGluonModel # noqa: F401 -from .caffe import CaffeModel # noqa: F401 -from __future__ import absolute_import - -import numpy as np -import sys -import abc -abstractmethod = abc.abstractmethod - -if sys.version_info >= (3, 4): - ABC = abc.ABC -else: # pragma: no cover - ABC = abc.ABCMeta('ABC', (), {}) - - -def _create_preprocessing_fn(params): - mean, std = params - mean = np.asarray(mean) - std = np.asarray(std) - - def identity(x): - return x - - if np.all(mean == 0) and np.all(std == 1): - def preprocessing(x): - return x, identity - elif np.all(std == 1): - def preprocessing(x): - _mean = mean.astype(x.dtype) - return x - _mean, identity - elif np.all(mean == 0): - def preprocessing(x): - _std = std.astype(x.dtype) - - def grad(dmdp): - return dmdp / _std - return x / _std, grad - else: - def preprocessing(x): - _mean = mean.astype(x.dtype) - _std = std.astype(x.dtype) - result = x - _mean - result /= _std - - def grad(dmdp): - return dmdp / _std - return result, grad - - return preprocessing - - -class Model(ABC): - """Base class to provide attacks with a unified interface to models. - - The :class:`Model` class represents a model and provides a - unified interface to its predictions. Subclasses must implement - batch_predictions and num_classes. - - :class:`Model` instances can be used as context managers and subclasses - can require this to allocate and release resources. - - Parameters - ---------- - bounds : tuple - Tuple of lower and upper bound for the pixel values, usually - (0, 1) or (0, 255). - channel_axis : int - The index of the axis that represents color channels. - preprocessing: 2-element tuple with floats or numpy arrays - Elementwises preprocessing of input; we first subtract the first - element of preprocessing from the input and then divide the input by - the second element. - - """ - - def __init__(self, bounds, channel_axis, preprocessing=(0, 1)): - assert len(bounds) == 2 - self._bounds = bounds - self._channel_axis = channel_axis - - if not callable(preprocessing): - preprocessing = _create_preprocessing_fn(preprocessing) - assert callable(preprocessing) - self._preprocessing = preprocessing - - def __enter__(self): - return self - - def __exit__(self, exc_type, exc_value, traceback): - return None - - def bounds(self): - return self._bounds - - def channel_axis(self): - return self._channel_axis - - def _process_input(self, x): - p, grad = self._preprocessing(x) - if hasattr(p, 'dtype'): - assert p.dtype == x.dtype - p = np.asarray(p, dtype=x.dtype) - assert callable(grad) - return p, grad - - def _process_gradient(self, backward, dmdp): - """ - backward: `callable` - callable that backpropagates the gradient of the model w.r.t to - preprocessed input through the preprocessing to get the gradient - of the model's output w.r.t. the input before preprocessing - dmdp: gradient of model w.r.t. preprocessed input - """ - if backward is None: # pragma: no cover - raise ValueError('Your preprocessing function does not provide' - ' an (approximate) gradient') - dmdx = backward(dmdp) - assert dmdx.dtype == dmdp.dtype - return dmdx - - @abstractmethod - def batch_predictions(self, images): - """Calculates predictions for a batch of images. - - Parameters - ---------- - images : `numpy.ndarray` - Batch of inputs with shape as expected by the model. - - Returns - ------- - `numpy.ndarray` - Predictions (logits, i.e. before the softmax) with shape - (batch size, number of classes). - - See Also - -------- - :meth:`predictions` - - """ - raise NotImplementedError - - def predictions(self, image): - """Convenience method that calculates predictions for a single image. - - Parameters - ---------- - image : `numpy.ndarray` - Single input with shape as expected by the model - (without the batch dimension). - - Returns - ------- - `numpy.ndarray` - Vector of predictions (logits, i.e. before the softmax) with - shape (number of classes,). - - See Also - -------- - :meth:`batch_predictions` - - """ - return np.squeeze(self.batch_predictions(image[np.newaxis]), axis=0) - - @abstractmethod - def num_classes(self): - """Determines the number of classes. - - Returns - ------- - int - The number of classes for which the model creates predictions. - - """ - raise NotImplementedError - - -class DifferentiableModel(Model): - """Base class for differentiable models that provide gradients. - - The :class:`DifferentiableModel` class can be used as a base - class for models that provide gradients. Subclasses must implement - predictions_and_gradient. - - A model should be considered differentiable based on whether it - provides a :meth:`predictions_and_gradient` method and a - :meth:`gradient` method, not based on whether it subclasses - :class:`DifferentiableModel`. - - A differentiable model does not necessarily provide reasonable - values for the gradients, the gradient can be wrong. It only - guarantees that the relevant methods can be called. - - """ - - @abstractmethod - def predictions_and_gradient(self, image, label): - """Calculates predictions for an image and the gradient of - the cross-entropy loss w.r.t. the image. - - Parameters - ---------- - image : `numpy.ndarray` - Single input with shape as expected by the model - (without the batch dimension). - label : int - Reference label used to calculate the gradient. - - Returns - ------- - predictions : `numpy.ndarray` - Vector of predictions (logits, i.e. before the softmax) with - shape (number of classes,). - gradient : `numpy.ndarray` - The gradient of the cross-entropy loss w.r.t. the image. Will - have the same shape as the image. - - See Also - -------- - :meth:`gradient` - - """ - raise NotImplementedError - - def gradient(self, image, label): - """Calculates the gradient of the cross-entropy loss w.r.t. the image. - - The default implementation calls predictions_and_gradient. - Subclasses can provide more efficient implementations that - only calculate the gradient. - - Parameters - ---------- - image : `numpy.ndarray` - Single input with shape as expected by the model - (without the batch dimension). - label : int - Reference label used to calculate the gradient. - - Returns - ------- - gradient : `numpy.ndarray` - The gradient of the cross-entropy loss w.r.t. the image. Will - have the same shape as the image. - - See Also - -------- - :meth:`gradient` - - """ - _, gradient = self.predictions_and_gradient(image, label) - return gradient - - @abstractmethod - def backward(self, gradient, image): - """Backpropagates the gradient of some loss w.r.t. the logits - through the network and returns the gradient of that loss w.r.t - to the input image. - - Parameters - ---------- - gradient : `numpy.ndarray` - Gradient of some loss w.r.t. the logits. - image : `numpy.ndarray` - Single input with shape as expected by the model - (without the batch dimension). - - Returns - ------- - gradient : `numpy.ndarray` - The gradient w.r.t the image. - - See Also - -------- - :meth:`gradient` - - """ - raise NotImplementedError -from __future__ import absolute_import - -from .base import DifferentiableModel -from .. import utils - - -class CaffeModel(DifferentiableModel): - def __init__(self, - net, - bounds, - channel_axis=1, - preprocessing=(0, 1), - data_blob_name="data", - label_blob_name="label", - output_blob_name="output"): - super(CaffeModel, self).__init__(bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - import caffe - self.net = net - assert isinstance(net, caffe.Net) - assert data_blob_name in self.net.blobs - assert label_blob_name in self.net.blobs - self.data_blob_name = data_blob_name - self.label_blob_name = label_blob_name - self.output_blob_name = output_blob_name - - def num_classes(self): - return self.net.blobs[self.output_blob_name].data.shape[-1] - - def batch_predictions(self, images): - images, _ = self._process_input(images) - self.net.blobs[self.data_blob_name].reshape(*images.shape) - self.net.blobs[self.label_blob_name].reshape(images.shape[0]) - self.net.blobs[self.data_blob_name].data[:] = images - self.net.forward() - return self.net.blobs[self.output_blob_name].data - - def predictions_and_gradient(self, image, label): - input_shape = image.shape - - image, dpdx = self._process_input(image) - self.net.blobs[self.data_blob_name].data[0, :] = image - self.net.blobs[self.label_blob_name].data[0] = label - - self.net.forward() - predictions = self.net.blobs[self.output_blob_name].data[0] - - grad_data = self.net.backward(diffs=[self.data_blob_name]) - grad = grad_data[self.data_blob_name][0] - grad = self._process_gradient(dpdx, grad) - assert grad.shape == input_shape - - return predictions, grad - - def _loss_fn(self, image, label): - logits = self.batch_predictions(image[None]) - return utils.batch_crossentropy([label], logits) - - def backward(self, gradient, image): - input_shape = image.shape - image, dpdx = self._process_input(image) - self.net.blobs[self.data_blob_name].data[:] = image - self.net.forward() - self.net.blobs[self.output_blob_name].diff[...] = gradient - grad_data = self.net.backward(start=self.output_blob_name, - diffs=[self.data_blob_name]) - grad = grad_data[self.data_blob_name][0] - grad = self._process_gradient(dpdx, grad) - assert grad.shape == input_shape - - return grad -from __future__ import absolute_import - -import numpy as np -import logging - -from .base import DifferentiableModel - - -class KerasModel(DifferentiableModel): - """Creates a :class:`Model` instance from a `Keras` model. - - Parameters - ---------- - model : `keras.models.Model` - The `Keras` model that should be attacked. - bounds : tuple - Tuple of lower and upper bound for the pixel values, usually - (0, 1) or (0, 255). - channel_axis : int - The index of the axis that represents color channels. - preprocessing: 2-element tuple with floats or numpy arrays - Elementwises preprocessing of input; we first subtract the first - element of preprocessing from the input and then divide the input by - the second element. - predicts : str - Specifies whether the `Keras` model predicts logits or probabilities. - Logits are preferred, but probabilities are the default. - - """ - - def __init__( - self, - model, - bounds, - channel_axis=3, - preprocessing=(0, 1), - predicts='probabilities'): - - super(KerasModel, self).__init__(bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - - from keras import backend as K - import keras - from pkg_resources import parse_version - - assert parse_version(keras.__version__) >= parse_version('2.0.7'), 'Keras version needs to be 2.0.7 or newer' # noqa: E501 - - if predicts == 'probs': - predicts = 'probabilities' - assert predicts in ['probabilities', 'logits'] - - images_input = model.input - label_input = K.placeholder(shape=(1,)) - - predictions = model.output - - shape = K.int_shape(predictions) - _, num_classes = shape - assert num_classes is not None - - self._num_classes = num_classes - - if predicts == 'probabilities': - if K.backend() == 'tensorflow': - predictions, = predictions.op.inputs - loss = K.sparse_categorical_crossentropy( - label_input, predictions, from_logits=True) - else: - logging.warning('relying on numerically unstable conversion' - ' from probabilities to softmax') - loss = K.sparse_categorical_crossentropy( - label_input, predictions, from_logits=False) - - # transform the probability predictions into logits, so that - # the rest of this code can assume predictions to be logits - predictions = self._to_logits(predictions) - elif predicts == 'logits': - loss = K.sparse_categorical_crossentropy( - label_input, predictions, from_logits=True) - - # sparse_categorical_crossentropy returns 1-dim tensor, - # gradients wants 0-dim tensor (for some backends) - loss = K.squeeze(loss, axis=0) - grads = K.gradients(loss, images_input) - - grad_loss_output = K.placeholder(shape=(num_classes, 1)) - external_loss = K.dot(predictions, grad_loss_output) - # remove batch dimension of predictions - external_loss = K.squeeze(external_loss, axis=0) - # remove singleton dimension of grad_loss_output - external_loss = K.squeeze(external_loss, axis=0) - - grads_loss_input = K.gradients(external_loss, images_input) - - if K.backend() == 'tensorflow': - # tensorflow backend returns a list with the gradient - # as the only element, even if loss is a single scalar - # tensor; - # theano always returns the gradient itself (and requires - # that loss is a single scalar tensor) - assert isinstance(grads, list) - assert len(grads) == 1 - grad = grads[0] - - assert isinstance(grads_loss_input, list) - assert len(grads_loss_input) == 1 - grad_loss_input = grads_loss_input[0] - elif K.backend() == 'cntk': # pragma: no cover - assert isinstance(grads, list) - assert len(grads) == 1 - grad = grads[0] - grad = K.reshape(grad, (1,) + grad.shape) - - assert isinstance(grads_loss_input, list) - assert len(grads_loss_input) == 1 - grad_loss_input = grads_loss_input[0] - grad_loss_input = K.reshape(grad_loss_input, (1,) + grad_loss_input.shape) # noqa: E501 - else: - assert not isinstance(grads, list) - grad = grads - - grad_loss_input = grads_loss_input - - self._loss_fn = K.function( - [images_input, label_input], - [loss]) - self._batch_pred_fn = K.function( - [images_input], [predictions]) - self._pred_grad_fn = K.function( - [images_input, label_input], - [predictions, grad]) - self._bw_grad_fn = K.function( - [grad_loss_output, images_input], - [grad_loss_input]) - - def _to_logits(self, predictions): - from keras import backend as K - eps = 10e-8 - predictions = K.clip(predictions, eps, 1 - eps) - predictions = K.log(predictions) - return predictions - - def num_classes(self): - return self._num_classes - - def batch_predictions(self, images): - px, _ = self._process_input(images) - predictions = self._batch_pred_fn([px]) - assert len(predictions) == 1 - predictions = predictions[0] - assert predictions.shape == (images.shape[0], self.num_classes()) - return predictions - - def predictions_and_gradient(self, image, label): - input_shape = image.shape - px, dpdx = self._process_input(image) - predictions, gradient = self._pred_grad_fn([ - px[np.newaxis], - np.array([label])]) - predictions = np.squeeze(predictions, axis=0) - gradient = np.squeeze(gradient, axis=0) - gradient = self._process_gradient(dpdx, gradient) - assert predictions.shape == (self.num_classes(),) - assert gradient.shape == input_shape - return predictions, gradient - - def backward(self, gradient, image): - assert gradient.ndim == 1 - gradient = np.reshape(gradient, (-1, 1)) - px, dpdx = self._process_input(image) - gradient = self._bw_grad_fn([ - gradient, - px[np.newaxis], - ]) - gradient = gradient[0] # output of bw_grad_fn is a list - gradient = np.squeeze(gradient, axis=0) - gradient = self._process_gradient(dpdx, gradient) - assert gradient.shape == image.shape - return gradient -from __future__ import absolute_import - -import numpy as np - -from .base import DifferentiableModel - - -class LasagneModel(DifferentiableModel): - """Creates a :class:`Model` instance from a `Lasagne` network. - - Parameters - ---------- - input_layer : `lasagne.layers.Layer` - The input to the model. - logits_layer : `lasagne.layers.Layer` - The output of the model, before the softmax. - bounds : tuple - Tuple of lower and upper bound for the pixel values, usually - (0, 1) or (0, 255). - channel_axis : int - The index of the axis that represents color channels. - preprocessing: 2-element tuple with floats or numpy arrays - Elementwises preprocessing of input; we first subtract the first - element of preprocessing from the input and then divide the input by - the second element. - - """ - - def __init__( - self, - input_layer, - logits_layer, - bounds, - channel_axis=1, - preprocessing=(0, 1)): - - super(LasagneModel, self).__init__(bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - - # delay import until class is instantiated - import theano as th - import theano.tensor as T - import lasagne - - images = input_layer.input_var - labels = T.ivector('labels') - bw_gradient_pre = T.fmatrix('bw_gradient_pre') - - shape = lasagne.layers.get_output_shape(logits_layer) - _, num_classes = shape - self._num_classes = num_classes - - logits = lasagne.layers.get_output(logits_layer) - - probs = T.nnet.nnet.softmax(logits) - - loss = lasagne.objectives.categorical_crossentropy( - probs, labels) - gradient = th.gradient.grad(loss[0], images) - - bw_loss = (logits * bw_gradient_pre).sum() - bw_gradient = th.gradient.grad(bw_loss, images) - - self._batch_prediction_fn = th.function([images], logits) - self._predictions_and_gradient_fn = th.function( - [images, labels], [logits, gradient]) - self._gradient_fn = th.function([images, labels], gradient) - self._loss_fn = th.function([images, labels], loss) - self._bw_gradient_fn = th.function( - [bw_gradient_pre, images], bw_gradient) - - def batch_predictions(self, images): - images, _ = self._process_input(images) - predictions = self._batch_prediction_fn(images) - assert predictions.shape == (images.shape[0], self.num_classes()) - return predictions - - def predictions_and_gradient(self, image, label): - input_shape = image.shape - image, dpdx = self._process_input(image) - label = np.array(label, dtype=np.int32) - predictions, gradient = self._predictions_and_gradient_fn( - image[np.newaxis], label[np.newaxis]) - predictions = np.squeeze(predictions, axis=0) - gradient = np.squeeze(gradient, axis=0) - gradient = gradient.astype(image.dtype, copy=False) - gradient = self._process_gradient(dpdx, gradient) - assert predictions.shape == (self.num_classes(),) - assert gradient.shape == input_shape - assert gradient.dtype == image.dtype - return predictions, gradient - - def gradient(self, image, label): - input_shape = image.shape - image, dpdx = self._process_input(image) - label = np.array(label, dtype=np.int32) - gradient = self._gradient_fn(image[np.newaxis], label[np.newaxis]) - gradient = np.squeeze(gradient, axis=0) - gradient = gradient.astype(image.dtype, copy=False) - gradient = self._process_gradient(dpdx, gradient) - assert gradient.shape == input_shape - assert gradient.dtype == image.dtype - return gradient - - def num_classes(self): - return self._num_classes - - def backward(self, gradient, image): - assert gradient.ndim == 1 - input_shape = image.shape - image, dpdx = self._process_input(image) - gradient = self._bw_gradient_fn( - gradient[np.newaxis], image[np.newaxis]) - gradient = np.squeeze(gradient, axis=0) - gradient = gradient.astype(image.dtype, copy=False) - gradient = self._process_gradient(dpdx, gradient) - assert gradient.shape == input_shape - assert gradient.dtype == image.dtype - return gradient -from __future__ import absolute_import - -import numpy as np - -from .base import DifferentiableModel - - -class MXNetModel(DifferentiableModel): - """Creates a :class:`Model` instance from existing `MXNet` symbols and weights. - - Parameters - ---------- - data : `mxnet.symbol.Variable` - The input to the model. - logits : `mxnet.symbol.Symbol` - The predictions of the model, before the softmax. - args : `dictionary mapping str to mxnet.nd.array` - The parameters of the model. - ctx : `mxnet.context.Context` - The device, e.g. mxnet.cpu() or mxnet.gpu(). - num_classes : int - The number of classes. - bounds : tuple - Tuple of lower and upper bound for the pixel values, usually - (0, 1) or (0, 255). - channel_axis : int - The index of the axis that represents color channels. - aux_states : `dictionary mapping str to mxnet.nd.array` - The states of auxiliary parameters of the model. - preprocessing: 2-element tuple with floats or numpy arrays - Elementwises preprocessing of input; we first subtract the first - element of preprocessing from the input and then divide the input by - the second element. - - """ - - def __init__( - self, - data, - logits, - args, - ctx, - num_classes, - bounds, - channel_axis=1, - aux_states=None, - preprocessing=(0, 1)): - - super(MXNetModel, self).__init__( - bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - - import mxnet as mx - - self._num_classes = num_classes - - self._device = ctx - - self._data_sym = data - self._batch_logits_sym = logits - - label = mx.symbol.Variable('label') - self._label_sym = label - - # workaround for https://github.com/apache/incubator-mxnet/issues/6874 - log_softmax = mx.sym.log_softmax(logits) - - loss = mx.sym.sum( - mx.sym.one_hot(indices=label, depth=num_classes) * log_softmax) - - # loss = mx.symbol.softmax_cross_entropy(logits, label) - self._loss_sym = loss - - self._args_map = args.copy() - self._aux_map = aux_states.copy() if aux_states is not None else None - - # move all parameters to correct device - for k in self._args_map.keys(): - self._args_map[k] = \ - self._args_map[k].as_in_context(ctx) # pragma: no cover - - if aux_states is not None: - for k in self._aux_map.keys(): # pragma: no cover - self._aux_map[k] = \ - self._aux_map[k].as_in_context(ctx) # pragma: no cover - - def num_classes(self): - return self._num_classes - - def batch_predictions(self, images): - import mxnet as mx - images, _ = self._process_input(images) - data_array = mx.nd.array(images, ctx=self._device) - self._args_map[self._data_sym.name] = data_array - model = self._batch_logits_sym.bind( - ctx=self._device, args=self._args_map, grad_req='null', - aux_states=self._aux_map) - model.forward(is_train=False) - logits_array = model.outputs[0] - logits = logits_array.asnumpy() - return logits - - def predictions_and_gradient(self, image, label): - import mxnet as mx - label = np.asarray(label) - image, dpdx = self._process_input(image) - data_array = mx.nd.array(image[np.newaxis], ctx=self._device) - label_array = mx.nd.array(label[np.newaxis], ctx=self._device) - self._args_map[self._data_sym.name] = data_array - self._args_map[self._label_sym.name] = label_array - - grad_array = mx.nd.zeros(image[np.newaxis].shape, ctx=self._device) - grad_map = {self._data_sym.name: grad_array} - - logits_loss = mx.sym.Group([self._batch_logits_sym, self._loss_sym]) - model = logits_loss.bind( - ctx=self._device, - args=self._args_map, - args_grad=grad_map, - grad_req='write', - aux_states=self._aux_map) - model.forward(is_train=False) - logits_array = model.outputs[0] - model.backward([ - mx.nd.zeros(logits_array.shape), - mx.nd.array(np.array([1])) - ]) - logits = logits_array.asnumpy() - gradient = grad_array.asnumpy() - gradient = self._process_gradient(dpdx, gradient) - return np.squeeze(logits, axis=0), np.squeeze(gradient, axis=0) - - def _loss_fn(self, image, label): - import mxnet as mx - image, _ = self._process_input(image) - data_array = mx.nd.array(image[np.newaxis], ctx=self._device) - label_array = mx.nd.array(np.array([label]), ctx=self._device) - self._args_map[self._data_sym.name] = data_array - self._args_map[self._label_sym.name] = label_array - model = self._loss_sym.bind( - ctx=self._device, args=self._args_map, grad_req='null', - aux_states=self._aux_map) - model.forward(is_train=False) - loss_array = model.outputs[0] - loss = loss_array.asnumpy()[0] - return loss - - def backward(self, gradient, image): - import mxnet as mx - - assert gradient.ndim == 1 - - image, dpdx = self._process_input(image) - data_array = mx.nd.array(image[np.newaxis], ctx=self._device) - self._args_map[self._data_sym.name] = data_array - - grad_array = mx.nd.zeros(image[np.newaxis].shape, ctx=self._device) - grad_map = {self._data_sym.name: grad_array} - - logits = self._batch_logits_sym.bind( - ctx=self._device, - args=self._args_map, - args_grad=grad_map, - grad_req='write', - aux_states=self._aux_map) - - logits.forward(is_train=False) - - gradient_pre_array = mx.nd.array( - gradient[np.newaxis], ctx=self._device) - logits.backward(gradient_pre_array) - - gradient = grad_array.asnumpy() - gradient = np.squeeze(gradient, axis=0) - gradient = self._process_gradient(dpdx, gradient) - return gradient -from __future__ import absolute_import - -from .base import DifferentiableModel - -import numpy as np - - -class MXNetGluonModel(DifferentiableModel): - """Creates a :class:`Model` instance from an existing `MXNet Gluon` Block. - - Parameters - ---------- - block : `mxnet.gluon.Block` - The Gluon Block representing the model to be run. - ctx : `mxnet.context.Context` - The device, e.g. mxnet.cpu() or mxnet.gpu(). - num_classes : int - The number of classes. - bounds : tuple - Tuple of lower and upper bound for the pixel values, usually - (0, 1) or (0, 255). - channel_axis : int - The index of the axis that represents color channels. - preprocessing: 2-element tuple with floats or numpy arrays - Elementwises preprocessing of input; we first subtract the first - element of preprocessing from the input and then divide the input by - the second element. - - """ - - def __init__( - self, - block, - bounds, - num_classes, - ctx=None, - channel_axis=1, - preprocessing=(0, 1)): - import mxnet as mx - self._num_classes = num_classes - - if ctx is None: - ctx = mx.cpu() - - super(MXNetGluonModel, self).__init__( - bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - - self._device = ctx - self._block = block - - def num_classes(self): - return self._num_classes - - def batch_predictions(self, images): - import mxnet as mx - images, _ = self._process_input(images) - data_array = mx.nd.array(images, ctx=self._device) - data_array.attach_grad() - with mx.autograd.record(train_mode=False): - L = self._block(data_array) - return L.asnumpy() - - def predictions_and_gradient(self, image, label): - import mxnet as mx - image, dpdx = self._process_input(image) - label = mx.nd.array([label], ctx=self._device) - data_array = mx.nd.array(image[np.newaxis], ctx=self._device) - data_array.attach_grad() - with mx.autograd.record(train_mode=False): - logits = self._block(data_array) - loss = mx.nd.softmax_cross_entropy(logits, label) - loss.backward(train_mode=False) - predictions = np.squeeze(logits.asnumpy(), axis=0) - gradient = np.squeeze(data_array.grad.asnumpy(), axis=0) - gradient = self._process_gradient(dpdx, gradient) - return predictions, gradient - - def _loss_fn(self, image, label): - import mxnet as mx - image, _ = self._process_input(image) - label = mx.nd.array([label], ctx=self._device) - data_array = mx.nd.array(image[np.newaxis], ctx=self._device) - data_array.attach_grad() - with mx.autograd.record(train_mode=False): - logits = self._block(data_array) - loss = mx.nd.softmax_cross_entropy(logits, label) - loss.backward(train_mode=False) - return loss.asnumpy() - - def backward(self, gradient, image): # pragma: no cover - # lazy import - import mxnet as mx - - assert gradient.ndim == 1 - image, dpdx = self._process_input(image) - gradient_pre_array = mx.nd.array( - gradient[np.newaxis], ctx=self._device) - data_array = mx.nd.array(image[np.newaxis], ctx=self._device) - data_array.attach_grad() - with mx.autograd.record(train_mode=False): - logits = self._block(data_array) - assert gradient_pre_array.shape == logits.shape - logits.backward(gradient_pre_array, train_mode=False) - - gradient_array = data_array.grad - gradient = np.squeeze(gradient_array.asnumpy(), axis=0) - gradient = self._process_gradient(dpdx, gradient) - - return gradient -from __future__ import absolute_import - -import numpy as np -import warnings - -from .base import DifferentiableModel - - -class PyTorchModel(DifferentiableModel): - """Creates a :class:`Model` instance from a `PyTorch` module. - - Parameters - ---------- - model : `torch.nn.Module` - The PyTorch model that should be attacked. - bounds : tuple - Tuple of lower and upper bound for the pixel values, usually - (0, 1) or (0, 255). - num_classes : int - Number of classes for which the model will output predictions. - channel_axis : int - The index of the axis that represents color channels. - device : string - A string specifying the device to do computation on. - If None, will default to "cuda:0" if torch.cuda.is_available() - or "cpu" if not. - preprocessing: 2-element tuple with floats or numpy arrays - Elementwises preprocessing of input; we first subtract the first - element of preprocessing from the input and then divide the input by - the second element. - """ - - def __init__( - self, - model, - bounds, - num_classes, - channel_axis=1, - device=None, - preprocessing=(0, 1)): - - # lazy import - import torch - - super(PyTorchModel, self).__init__(bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - - self._num_classes = num_classes - - if device is None: - self.device = torch.device( - "cuda:0" if torch.cuda.is_available() else "cpu") - elif isinstance(device, str): - self.device = torch.device(device) - else: - self.device = device - self._model = model.to(self.device) - - if model.training: - warnings.warn( - 'The PyTorch model is in training mode and therefore might' - ' not be deterministic. Call the eval() method to set it in' - ' evaluation mode if this is not intended.') - - def _old_pytorch(self): - # lazy import - import torch - version = torch.__version__.split('.')[:2] - pre04 = int(version[0]) == 0 and int(version[1]) < 4 - return pre04 - - def batch_predictions(self, images): - # lazy import - import torch - if self._old_pytorch(): # pragma: no cover - from torch.autograd import Variable - - images, _ = self._process_input(images) - n = len(images) - images = torch.from_numpy(images).to(self.device) - - if self._old_pytorch(): # pragma: no cover - images = Variable(images, volatile=True) - predictions = self._model(images) - predictions = predictions.data - else: - predictions = self._model(images) - # TODO: add no_grad once we have a solution - # for models that require grads internally - # for inference - # with torch.no_grad(): - # predictions = self._model(images) - predictions = predictions.to("cpu") - if not self._old_pytorch(): - predictions = predictions.detach() - predictions = predictions.numpy() - assert predictions.ndim == 2 - assert predictions.shape == (n, self.num_classes()) - return predictions - - def num_classes(self): - return self._num_classes - - def predictions_and_gradient(self, image, label): - # lazy import - import torch - import torch.nn as nn - if self._old_pytorch(): # pragma: no cover - from torch.autograd import Variable - - input_shape = image.shape - image, dpdx = self._process_input(image) - target = np.array([label]) - target = torch.from_numpy(target).long().to(self.device) - - images = image[np.newaxis] - images = torch.from_numpy(images).to(self.device) - - if self._old_pytorch(): # pragma: no cover - target = Variable(target) - images = Variable(images, requires_grad=True) - else: - images.requires_grad_() - - predictions = self._model(images) - ce = nn.CrossEntropyLoss() - loss = ce(predictions, target) - loss.backward() - grad = images.grad - - if self._old_pytorch(): # pragma: no cover - predictions = predictions.data - predictions = predictions.to("cpu") - - if not self._old_pytorch(): - predictions = predictions.detach() - predictions = predictions.numpy() - predictions = np.squeeze(predictions, axis=0) - assert predictions.ndim == 1 - assert predictions.shape == (self.num_classes(),) - - if self._old_pytorch(): # pragma: no cover - grad = grad.data - grad = grad.to("cpu") - if not self._old_pytorch(): - grad = grad.detach() - grad = grad.numpy() - grad = np.squeeze(grad, axis=0) - grad = self._process_gradient(dpdx, grad) - assert grad.shape == input_shape - - return predictions, grad - - def _loss_fn(self, image, label): - # lazy import - import torch - import torch.nn as nn - if self._old_pytorch(): # pragma: no cover - from torch.autograd import Variable - - image, _ = self._process_input(image) - target = np.array([label]) - target = torch.from_numpy(target).long().to(self.device) - if self._old_pytorch(): # pragma: no cover - target = Variable(target) - - images = torch.from_numpy(image[None]).to(self.device) - if self._old_pytorch(): # pragma: no cover - images = Variable(images, volatile=True) - predictions = self._model(images) - ce = nn.CrossEntropyLoss() - loss = ce(predictions, target) - if self._old_pytorch(): # pragma: no cover - loss = loss.data - loss = loss.to("cpu") - loss = loss.numpy() - return loss - - def backward(self, gradient, image): - # lazy import - import torch - if self._old_pytorch(): # pragma: no cover - from torch.autograd import Variable - - assert gradient.ndim == 1 - - gradient = torch.from_numpy(gradient).to(self.device) - if self._old_pytorch(): # pragma: no cover - gradient = Variable(gradient) - - input_shape = image.shape - image, dpdx = self._process_input(image) - images = image[np.newaxis] - images = torch.from_numpy(images).to(self.device) - if self._old_pytorch(): # pragma: no cover - images = Variable(images, requires_grad=True) - else: - images.requires_grad_() - predictions = self._model(images) - - predictions = predictions[0] - - assert gradient.dim() == 1 - assert predictions.dim() == 1 - assert gradient.size() == predictions.size() - - loss = torch.dot(predictions, gradient) - loss.backward() - # should be the same as predictions.backward(gradient=gradient) - - grad = images.grad - - if self._old_pytorch(): # pragma: no cover - grad = grad.data - grad = grad.to("cpu") - if not self._old_pytorch(): - grad = grad.detach() - grad = grad.numpy() - grad = np.squeeze(grad, axis=0) - grad = self._process_gradient(dpdx, grad) - assert grad.shape == input_shape - - return grad -from __future__ import absolute_import - -import numpy as np -import logging - -from .base import DifferentiableModel - - -class TensorFlowModel(DifferentiableModel): - """Creates a :class:`Model` instance from existing `TensorFlow` tensors. - - Parameters - ---------- - images : `tensorflow.Tensor` - The input to the model, usually a `tensorflow.placeholder`. - logits : `tensorflow.Tensor` - The predictions of the model, before the softmax. - bounds : tuple - Tuple of lower and upper bound for the pixel values, usually - (0, 1) or (0, 255). - channel_axis : int - The index of the axis that represents color channels. - preprocessing: 2-element tuple with floats or numpy arrays - Elementwises preprocessing of input; we first subtract the first - element of preprocessing from the input and then divide the input by - the second element. - - """ - - def __init__( - self, - images, - logits, - bounds, - channel_axis=3, - preprocessing=(0, 1)): - - super(TensorFlowModel, self).__init__(bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - - # delay import until class is instantiated - import tensorflow as tf - - session = tf.get_default_session() - if session is None: - logging.warning('No default session. Created a new tf.Session. ' - 'Please restore variables using this session.') - session = tf.Session(graph=images.graph) - self._created_session = True - else: - self._created_session = False - assert session.graph == images.graph, \ - 'The default session uses the wrong graph' - - with session.graph.as_default(): - self._session = session - self._images = images - self._batch_logits = logits - self._logits = tf.squeeze(logits, axis=0) - self._label = tf.placeholder(tf.int64, (), name='label') - - loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=self._label[tf.newaxis], - logits=self._logits[tf.newaxis]) - self._loss = tf.squeeze(loss, axis=0) - gradients = tf.gradients(loss, images) - assert len(gradients) == 1 - if gradients[0] is None: - gradients[0] = tf.zeros_like(images) - self._gradient = tf.squeeze(gradients[0], axis=0) - - self._bw_gradient_pre = tf.placeholder(tf.float32, self._logits.shape) # noqa: E501 - bw_loss = tf.reduce_sum(self._logits * self._bw_gradient_pre) - bw_gradients = tf.gradients(bw_loss, images) - assert len(bw_gradients) == 1 - if bw_gradients[0] is None: - bw_gradients[0] = tf.zeros_like(images) - self._bw_gradient = tf.squeeze(bw_gradients[0], axis=0) - - @classmethod - def from_keras(cls, model, bounds, input_shape=None, - channel_axis=3, preprocessing=(0, 1)): - """Alternative constructor for a TensorFlowModel that - accepts a `tf.keras.Model` instance. - - Parameters - ---------- - model : `tensorflow.keras.Model` - A `tensorflow.keras.Model` that accepts a single input tensor - and returns a single output tensor representing logits. - bounds : tuple - Tuple of lower and upper bound for the pixel values, usually - (0, 1) or (0, 255). - input_shape : tuple - The shape of a single input, e.g. (28, 28, 1) for MNIST. - If None, tries to get the the shape from the model's - input_shape attribute. - channel_axis : int - The index of the axis that represents color channels. - preprocessing: 2-element tuple with floats or numpy arrays - Elementwises preprocessing of input; we first subtract the first - element of preprocessing from the input and then divide the input - by the second element. - - """ - import tensorflow as tf - if input_shape is None: - try: - input_shape = model.input_shape[1:] - except AttributeError: - raise ValueError( - 'Please specify input_shape manually or ' - 'provide a model with an input_shape attribute') - with tf.keras.backend.get_session().as_default(): - inputs = tf.placeholder(tf.float32, (None,) + input_shape) - logits = model(inputs) - return cls(inputs, logits, bounds=bounds, - channel_axis=channel_axis, preprocessing=preprocessing) - - def __exit__(self, exc_type, exc_value, traceback): - if self._created_session: - self._session.close() - return None - - @property - def session(self): - return self._session - - def num_classes(self): - _, n = self._batch_logits.get_shape().as_list() - return n - - def batch_predictions(self, images): - images, _ = self._process_input(images) - predictions = self._session.run( - self._batch_logits, - feed_dict={self._images: images}) - return predictions - - def predictions_and_gradient(self, image, label): - image, dpdx = self._process_input(image) - predictions, gradient = self._session.run( - [self._logits, self._gradient], - feed_dict={ - self._images: image[np.newaxis], - self._label: label}) - gradient = self._process_gradient(dpdx, gradient) - return predictions, gradient - - def gradient(self, image, label): - image, dpdx = self._process_input(image) - g = self._session.run( - self._gradient, - feed_dict={ - self._images: image[np.newaxis], - self._label: label}) - g = self._process_gradient(dpdx, g) - return g - - def _loss_fn(self, image, label): - image, dpdx = self._process_input(image) - loss = self._session.run( - self._loss, - feed_dict={ - self._images: image[np.newaxis], - self._label: label}) - return loss - - def backward(self, gradient, image): - assert gradient.ndim == 1 - input_shape = image.shape - image, dpdx = self._process_input(image) - g = self._session.run( - self._bw_gradient, - feed_dict={ - self._images: image[np.newaxis], - self._bw_gradient_pre: gradient}) - g = self._process_gradient(dpdx, g) - assert g.shape == input_shape - return g -from __future__ import absolute_import - -import numpy as np - -from .base import DifferentiableModel - - -class TensorFlowEagerModel(DifferentiableModel): - """Creates a :class:`Model` instance from a `TensorFlow` model using - eager execution. - - Parameters - ---------- - model : a TensorFlow eager model - The TensorFlow eager model that should be attacked. It will be called - with input tensors and should return logits. - bounds : tuple - Tuple of lower and upper bound for the pixel values, usually - (0, 1) or (0, 255). - num_classes : int - If None, will try to infer it from the model's output shape. - channel_axis : int - The index of the axis that represents color channels. - preprocessing: 2-element tuple with floats or numpy arrays - Elementwises preprocessing of input; we first subtract the first - element of preprocessing from the input and then divide the input by - the second element. - """ - - def __init__( - self, - model, - bounds, - num_classes=None, - channel_axis=3, - preprocessing=(0, 1)): - - # delay import until class is instantiated - import tensorflow as tf - assert tf.executing_eagerly() - - super(TensorFlowEagerModel, self).__init__( - bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - - self._model = model - - if num_classes is None: - try: - num_classes = model.output_shape[-1] - except AttributeError: - raise ValueError( - 'Please specify num_classes manually or ' - 'provide a model with an output_shape attribute') - - self._num_classes = num_classes - - def batch_predictions(self, images): - import tensorflow as tf - images, _ = self._process_input(images) - n = len(images) - images = tf.constant(images) - - predictions = self._model(images) - predictions = predictions.numpy() - assert predictions.ndim == 2 - assert predictions.shape == (n, self.num_classes()) - return predictions - - def num_classes(self): - return self._num_classes - - def predictions_and_gradient(self, image, label): - import tensorflow as tf - input_shape = image.shape - image, dpdx = self._process_input(image) - images = image[np.newaxis] - images = tf.constant(images) - target = tf.constant([label]) - - with tf.GradientTape() as tape: - tape.watch(images) - predictions = self._model(images) - loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=target, logits=predictions) - - grad = tape.gradient(loss, images) - - predictions = predictions.numpy() - predictions = np.squeeze(predictions, axis=0) - assert predictions.ndim == 1 - assert predictions.shape == (self.num_classes(),) - - grad = grad.numpy() - grad = np.squeeze(grad, axis=0) - grad = self._process_gradient(dpdx, grad) - assert grad.shape == input_shape - - return predictions, grad - - def _loss_fn(self, image, label): - import tensorflow as tf - image, _ = self._process_input(image) - images = image[np.newaxis] - images = tf.constant(images) - target = tf.constant([label]) - - predictions = self._model(images) - loss = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=target, logits=predictions) - loss = loss.numpy() - return loss - - def backward(self, gradient, image): - import tensorflow as tf - input_shape = image.shape - image, dpdx = self._process_input(image) - images = image[np.newaxis] - images = tf.constant(images) - assert gradient.ndim == 1 - gradient = gradient[np.newaxis] - gradient = tf.constant(gradient) - - with tf.GradientTape() as tape: - tape.watch(images) - predictions = self._model(images) - - # backprop the given output gradient (the gradient of - # some loss w.r.t. predictions) through the model - # to get the gradient of that loss w.r.t. images - grad = tape.gradient(predictions, images, gradient) - - grad = grad.numpy() - grad = np.squeeze(grad, axis=0) - grad = self._process_gradient(dpdx, grad) - assert grad.shape == input_shape - - return grad -from __future__ import absolute_import - -import numpy as np - -from .base import DifferentiableModel - - -class TheanoModel(DifferentiableModel): - """Creates a :class:`Model` instance from existing `Theano` tensors. - - Parameters - ---------- - images : `theano.tensor` - The input to the model. - logits : `theano.tensor` - The predictions of the model, before the softmax. - bounds : tuple - Tuple of lower and upper bound for the pixel values, usually - (0, 1) or (0, 255). - num_classes : int - Number of classes for which the model will output predictions. - channel_axis : int - The index of the axis that represents color channels. - preprocessing: 2-element tuple with floats or numpy arrays - Elementwises preprocessing of input; we first subtract the first - element of preprocessing from the input and then divide the input by - the second element. - - """ - - def __init__( - self, - images, - logits, - bounds, - num_classes, - channel_axis=1, - preprocessing=[0, 1]): - - super(TheanoModel, self).__init__(bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - - self._num_classes = num_classes - - # delay import until class is instantiated - import theano as th - import theano.tensor as T - - probs = T.nnet.nnet.softmax(logits) - - labels = T.ivector('labels') - loss = T.nnet.nnet.categorical_crossentropy( - probs, labels) - gradient = th.gradient.grad(loss[0], images) - - bw_gradient_pre = T.fmatrix('bw_gradient_pre') - bw_loss = (logits * bw_gradient_pre).sum() - bw_gradient = th.gradient.grad(bw_loss, images) - - self._batch_prediction_fn = th.function([images], logits) - self._predictions_and_gradient_fn = th.function( - [images, labels], [logits, gradient]) - self._gradient_fn = th.function([images, labels], gradient) - self._loss_fn = th.function([images, labels], loss) - self._bw_gradient_fn = th.function( - [bw_gradient_pre, images], bw_gradient) - - def batch_predictions(self, images): - images, _ = self._process_input(images) - predictions = self._batch_prediction_fn(images) - assert predictions.shape == (images.shape[0], self.num_classes()) - return predictions - - def predictions_and_gradient(self, image, label): - input_shape = image.shape - image, dpdx = self._process_input(image) - label = np.array(label, dtype=np.int32) - predictions, gradient = self._predictions_and_gradient_fn( - image[np.newaxis], label[np.newaxis]) - gradient = gradient.astype(image.dtype) - predictions = np.squeeze(predictions, axis=0) - gradient = np.squeeze(gradient, axis=0) - gradient = self._process_gradient(dpdx, gradient) - assert predictions.shape == (self.num_classes(),) - assert gradient.shape == input_shape - assert gradient.dtype == image.dtype - return predictions, gradient - - def gradient(self, image, label): - input_shape = image.shape - image, dpdx = self._process_input(image) - label = np.array(label, dtype=np.int32) - gradient = self._gradient_fn(image[np.newaxis], label[np.newaxis]) - gradient = gradient.astype(image.dtype) - gradient = np.squeeze(gradient, axis=0) - gradient = self._process_gradient(dpdx, gradient) - assert gradient.shape == input_shape - assert gradient.dtype == image.dtype - return gradient - - def num_classes(self): - return self._num_classes - - def backward(self, gradient, image): - assert gradient.ndim == 1 - input_shape = image.shape - image, dpdx = self._process_input(image) - gradient = self._bw_gradient_fn( - gradient[np.newaxis], image[np.newaxis]) - gradient = gradient.astype(image.dtype) - gradient = np.squeeze(gradient, axis=0) - gradient = self._process_gradient(dpdx, gradient) - assert gradient.shape == input_shape - assert gradient.dtype == image.dtype - return gradient -from __future__ import absolute_import - -from .base import Model -from .base import DifferentiableModel - - -class ModelWrapper(Model): - """Base class for models that wrap other models. - - This base class can be used to implement model wrappers - that turn models into new models, for example by preprocessing - the input or modifying the gradient. - - Parameters - ---------- - model : :class:`Model` - The model that is wrapped. - - """ - - def __init__(self, model): - super(ModelWrapper, self).__init__( - bounds=model.bounds(), - channel_axis=model.channel_axis()) - - self.wrapped_model = model - - def __enter__(self): - assert self.wrapped_model.__enter__() == self.wrapped_model - return self - - def __exit__(self, exc_type, exc_value, traceback): - return self.wrapped_model.__exit__(exc_type, exc_value, traceback) - - def batch_predictions(self, images): - return self.wrapped_model.batch_predictions(images) - - def predictions(self, image): - return self.wrapped_model.predictions(image) - - def num_classes(self): - return self.wrapped_model.num_classes() - - -class DifferentiableModelWrapper(ModelWrapper): - """Base class for models that wrap other models and provide - gradient methods. - - This base class can be used to implement model wrappers - that turn models into new models, for example by preprocessing - the input or modifying the gradient. - - Parameters - ---------- - model : :class:`Model` - The model that is wrapped. - - """ - - def predictions_and_gradient(self, image, label): - return self.wrapped_model.predictions_and_gradient(image, label) - - def gradient(self, image, label): - return self.wrapped_model.gradient(image, label) - - def backward(self, gradient, image): - return self.wrapped_model.backward(gradient, image) - - -class ModelWithoutGradients(ModelWrapper): - """Turns a model into a model without gradients. - - """ - pass - - -class ModelWithEstimatedGradients(DifferentiableModelWrapper): - """Turns a model into a model with gradients estimated - by the given gradient estimator. - - Parameters - ---------- - model : :class:`Model` - The model that is wrapped. - gradient_estimator : `callable` - Callable taking three arguments (pred_fn, image, label) and - returning the estimated gradients. pred_fn will be the - batch_predictions method of the wrapped model. - """ - - def __init__(self, model, gradient_estimator): - super(ModelWithEstimatedGradients, self).__init__( - model=model) - - assert callable(gradient_estimator) - self._gradient_estimator = gradient_estimator - - def predictions_and_gradient(self, image, label): - predictions = self.predictions(image) - gradient = self.gradient(image, label) - return predictions, gradient - - def gradient(self, image, label): - pred_fn = self.batch_predictions - bounds = self.bounds() - return self._gradient_estimator(pred_fn, image, label, bounds) - - def backward(self, gradient, image): - raise NotImplementedError - - -class CompositeModel(DifferentiableModel): - """Combines predictions of a (black-box) model with the gradient of a - (substitute) model. - - Parameters - ---------- - forward_model : :class:`Model` - The model that should be fooled and will be used for predictions. - backward_model : :class:`Model` - The model that provides the gradients. - - """ - - def __init__(self, forward_model, backward_model): - bounds = forward_model.bounds() - assert bounds == backward_model.bounds() - - channel_axis = forward_model.channel_axis() - assert channel_axis == backward_model.channel_axis() - - num_classes = forward_model.num_classes() - assert num_classes == backward_model.num_classes() - - super(CompositeModel, self).__init__( - bounds=bounds, - channel_axis=channel_axis) - - self.forward_model = forward_model - self.backward_model = backward_model - self._num_classes = num_classes - - def num_classes(self): - return self._num_classes - - def batch_predictions(self, images): - return self.forward_model.batch_predictions(images) - - def predictions_and_gradient(self, image, label): - predictions = self.forward_model.predictions(image) - gradient = self.backward_model.gradient(image, label) - return predictions, gradient - - def gradient(self, image, label): - return self.backward_model.gradient(image, label) - - def backward(self, gradient, image): - return self.backward_model.backward(gradient, image) - - def __enter__(self): - assert self.forward_model.__enter__() == self.forward_model - assert self.backward_model.__enter__() == self.backward_model - return self - - def __exit__(self, exc_type, exc_value, traceback): - r1 = self.forward_model.__exit__(exc_type, exc_value, traceback) - r2 = self.backward_model.__exit__(exc_type, exc_value, traceback) - if r1 is None and r2 is None: - return None - return (r1, r2) # pragma: no cover -# the different frameworks interfer with each other and -# sometimes cause segfaults or similar problems; -# choosing the right import order seems to be a -# workaround; given the current test order, -# first import tensorflow, then pytorch and then -# according to test order seems to solve it -import logging -from foolbox.utils import binarize -from foolbox.gradient_estimators import EvolutionaryStrategiesGradientEstimator -from foolbox.gradient_estimators import CoordinateWiseGradientEstimator -from foolbox.distances import MAE -from foolbox.distances import Linfinity -from foolbox.distances import MSE -from foolbox import Adversarial -from foolbox.models import ModelWithEstimatedGradients -from foolbox.models import ModelWithoutGradients -from foolbox.models import CaffeModel -from foolbox.models import PyTorchModel -from foolbox.models import TensorFlowModel -from foolbox.criteria import OriginalClassProbability -from foolbox.criteria import TargetClass -from foolbox.criteria import Misclassification -from PIL import Image -import pytest -import numpy as np -from contextlib import contextmanager -from os.path import dirname -from os.path import join -import sys -import torch -import tensorflow -print(tensorflow.__version__) -# import theano -# print(theano.__version__) -# import mxnet -# print(mxnet.__version__) -# import keras -# print(keras.__version__) -print(torch.__version__) - - -if sys.version_info > (3, 2): - from unittest.mock import Mock -else: - # for Python2.7 compatibility - from mock import Mock - - -logging.getLogger().setLevel(logging.DEBUG) - - -@pytest.fixture -def image(): - image = Image.open(join(dirname(__file__), 'data/example.jpg')) - image = np.asarray(image, dtype=np.float32) - assert image.shape == (224, 224, 3) - return image - - -@pytest.fixture -def label(): - return 333 - - -@pytest.fixture -def model(image): - predictions = np.array([1., 0., 0.5] * 111 + [2.] + [0.3, 0.5, 1.1] * 222) - model = Mock() - model.bounds = Mock(return_value=(0, 255)) - model.predictions = Mock(return_value=predictions) - model.batch_predictions = Mock(return_value=predictions[np.newaxis]) - gradient = image - model.predictions_and_gradient = Mock(return_value=(predictions, gradient)) # noqa: E501 - model.gradient = Mock(return_value=gradient) - model.backward = Mock(return_value=gradient) - model.num_classes = Mock(return_value=1000) - model.channel_axis = Mock(return_value=3) - return model - - -@pytest.fixture -def criterion(): - return Misclassification() - - -def bn_model(): - """Creates a simple brightness model that does not require training. - - """ - - import tensorflow as tf - - bounds = (0, 1) - channel_axis = 3 - channels = 10 # == num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - images = tf.placeholder(tf.float32, (None, 5, 5, channels)) - logits = mean_brightness_net(images) - - with tf.Session(): - model = TensorFlowModel( - images, - logits, - bounds=bounds, - channel_axis=channel_axis) - - yield model - - -# bn_model is also needed as a function, so we create the fixture separately -@pytest.fixture(name='bn_model') -def bn_model_fixutre(): - cm_model = contextmanager(bn_model) - with cm_model() as model: - yield model - - -@pytest.fixture -def bn_model_pytorch(): - """Same as bn_model but with PyTorch.""" - - import torch - import torch.nn as nn - - bounds = (0, 1) - num_classes = 10 - - class Net(nn.Module): - - def forward(self, x): - assert isinstance(x.data, torch.FloatTensor) - x = torch.mean(x, 3) - x = torch.mean(x, 2) - logits = x - return logits - - model = Net() - model = PyTorchModel( - model, - bounds=bounds, - num_classes=num_classes, - device='cpu') - return model - - -@pytest.fixture -def bn_model_caffe(request, tmpdir): - """Same as bn_model but with Caffe.""" - - import caffe - from caffe import layers as L - - bounds = (0, 1) - num_classes = channels = getattr(request, "param", 1000) - - net_spec = caffe.NetSpec() - net_spec.data = L.Input(name="data", - shape=dict(dim=[1, channels, 5, 5])) - net_spec.reduce_1 = L.Reduction(net_spec.data, - reduction_param={"operation": 4, - "axis": 3}) - net_spec.output = L.Reduction(net_spec.reduce_1, - reduction_param={"operation": 4, - "axis": 2}) - net_spec.label = L.Input(name="label", shape=dict(dim=[1])) - net_spec.loss = L.SoftmaxWithLoss(net_spec.output, net_spec.label) - wf = tmpdir.mkdir("test_models_caffe_fixture")\ - .join("test_caffe_{}.prototxt".format(num_classes)) - wf.write("force_backward: true\n" + str(net_spec.to_proto())) - net = caffe.Net(str(wf), caffe.TEST) - model = CaffeModel(net, bounds=bounds) - return model - - -def gl_bn_model(): - """Same as bn_model but without gradient. - - """ - cm_model = contextmanager(bn_model) - with cm_model() as model: - model = ModelWithoutGradients(model) - yield model - - -# gl_bn_model is also needed as a function, so we create the fixture separately -@pytest.fixture(name='gl_bn_model') -def gl_bn_model_fixutre(): - cm_model = contextmanager(gl_bn_model) - with cm_model() as model: - yield model - - -def eg_bn_model_factory(request): - """Same as bn_model but with estimated gradient. - - """ - GradientEstimator = request.param - - def eg_bn_model(): - cm_model = contextmanager(bn_model) - with cm_model() as model: - gradient_estimator = GradientEstimator(epsilon=0.01) - model = ModelWithEstimatedGradients(model, gradient_estimator) - yield model - return eg_bn_model - - -# eg_bn_model_factory is also needed as a function, so we create the -# fixture separately -@pytest.fixture(name='eg_bn_model_factory', - params=[CoordinateWiseGradientEstimator, - EvolutionaryStrategiesGradientEstimator]) -def eg_bn_model_factory_fixture(request): - return eg_bn_model_factory(request) - - -@pytest.fixture -def bn_image(): - np.random.seed(22) - image = np.random.uniform(size=(5, 5, 10)).astype(np.float32) - return image - - -@pytest.fixture -def bn_image_pytorch(): - np.random.seed(22) - image = np.random.uniform(size=(10, 5, 5)).astype(np.float32) - return image - - -@pytest.fixture -def bn_label(bn_image): - image = bn_image - mean = np.mean(image, axis=(0, 1)) - assert mean.shape == (10,) - label = np.argmax(mean) - return label - - -@pytest.fixture -def bn_label_pytorch(bn_image_pytorch): - image = bn_image_pytorch - mean = np.mean(image, axis=(1, 2)) - assert mean.shape == (10,) - label = np.argmax(mean) - return label - - -@pytest.fixture -def bn_criterion(): - return Misclassification() - - -@pytest.fixture -def bn_targeted_criterion(bn_label): - label = bn_label - assert label in [0, 1] - return TargetClass(1 - label) - - -@pytest.fixture -def bn_impossible_criterion(): - """Does not consider any image as adversarial.""" - return OriginalClassProbability(0.) - - -@pytest.fixture -def bn_trivial_criterion(): - """Does consider every image as adversarial.""" - return OriginalClassProbability(1.) - - -@pytest.fixture -def bn_adversarial(bn_criterion, bn_image, bn_label): - criterion = bn_criterion - image = bn_image - label = bn_label - - cm_model = contextmanager(bn_model) - with cm_model() as model: - yield Adversarial(model, criterion, image, label) - - -@pytest.fixture -def bn_adversarial_linf(bn_criterion, bn_image, bn_label): - criterion = bn_criterion - image = bn_image - label = bn_label - distance = Linfinity - - cm_model = contextmanager(bn_model) - with cm_model() as model: - yield Adversarial(model, criterion, image, label, distance=distance) - - -@pytest.fixture -def bn_adversarial_mae(bn_criterion, bn_image, bn_label): - criterion = bn_criterion - image = bn_image - label = bn_label - distance = MAE - - cm_model = contextmanager(bn_model) - with cm_model() as model: - yield Adversarial(model, criterion, image, label, distance=distance) - - -@pytest.fixture -def bn_targeted_adversarial(bn_targeted_criterion, bn_image, bn_label): - criterion = bn_targeted_criterion - image = bn_image - label = bn_label - - cm_model = contextmanager(bn_model) - with cm_model() as model: - yield Adversarial(model, criterion, image, label) - - -@pytest.fixture -def gl_bn_adversarial(bn_criterion, bn_image, bn_label): - criterion = bn_criterion - image = bn_image - label = bn_label - - cm_model = contextmanager(gl_bn_model) - with cm_model() as model: - yield Adversarial(model, criterion, image, label) - - -@pytest.fixture(params=[CoordinateWiseGradientEstimator, - EvolutionaryStrategiesGradientEstimator]) -def eg_bn_adversarial(request, bn_criterion, bn_image, bn_label): - criterion = bn_criterion - image = bn_image - label = bn_label - - eg_bn_model = eg_bn_model_factory(request) - - cm_model = contextmanager(eg_bn_model) - with cm_model() as model: - yield Adversarial(model, criterion, image, label) - - -@pytest.fixture -def bn_impossible(bn_impossible_criterion, bn_image, bn_label): - criterion = bn_impossible_criterion - image = bn_image - label = bn_label - - cm_model = contextmanager(bn_model) - with cm_model() as model: - yield Adversarial(model, criterion, image, label) - - -@pytest.fixture -def bn_trivial(bn_trivial_criterion, bn_image, bn_label): - criterion = bn_trivial_criterion - image = bn_image - label = bn_label - - cm_model = contextmanager(bn_model) - with cm_model() as model: - adv = Adversarial(model, criterion, image, label) - # the original should not yet be considered adversarial - # so that the attack implementation is actually called - adv._Adversarial__best_adversarial = None - adv._Adversarial__best_distance = MSE(value=np.inf) - yield adv - - -@pytest.fixture -def bn_adversarial_pytorch(bn_model_pytorch, bn_criterion, - bn_image_pytorch, bn_label_pytorch): - model = bn_model_pytorch - criterion = bn_criterion - image = bn_image_pytorch - label = bn_label_pytorch - adv = Adversarial(model, criterion, image, label) - assert adv.image is None - assert adv.distance.value == np.inf - return adv - - -@pytest.fixture -def bn_targeted_adversarial_pytorch(bn_model_pytorch, bn_targeted_criterion, - bn_image_pytorch, bn_label_pytorch): - model = bn_model_pytorch - criterion = bn_targeted_criterion - image = bn_image_pytorch - label = bn_label_pytorch - adv = Adversarial(model, criterion, image, label) - assert adv.image is None - assert adv.distance.value == np.inf - return adv - - -def binarized_bn_model(): - """Creates a simple brightness model that does not require training. - - """ - - import tensorflow as tf - - bounds = (0, 1) - channel_axis = 3 - channels = 10 # == num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - images = tf.placeholder(tf.float32, (None, 5, 5, channels)) - logits = mean_brightness_net(images) - - def preprocessing(x): - x = binarize(x, (0, 1)) - - def backward(x): - return x - return x, backward - - with tf.Session(): - model = TensorFlowModel( - images, - logits, - bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - - yield model - - -# binarized_bn_model is also needed as a function, so we create the -# fixture separately -@pytest.fixture(name='bn_model') -def binarized_bn_model_fixutre(): - cm_model = contextmanager(binarized_bn_model) - with cm_model() as model: - yield model - - -@pytest.fixture -def binarized_bn_adversarial(bn_criterion, bn_image, binarized_bn_label): - criterion = bn_criterion - image = bn_image - label = binarized_bn_label - - cm_model = contextmanager(binarized_bn_model) - with cm_model() as model: - yield Adversarial(model, criterion, image, label) - - -@pytest.fixture -def binarized_bn_label(bn_image): - image = bn_image - image = binarize(image, (0, 1)) - mean = np.mean(image, axis=(0, 1)) - assert mean.shape == (10,) - label = np.argmax(mean) - return label - - -def binarized2_bn_model(): - """Creates a simple brightness model that does not require training. - - """ - - import tensorflow as tf - - bounds = (0, 1) - channel_axis = 3 - channels = 10 # == num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - images = tf.placeholder(tf.float32, (None, 5, 5, channels)) - logits = mean_brightness_net(images) - - def preprocessing(x): - x = binarize(x, (0, 1), included_in='lower') - - def backward(x): - return x - return x, backward - - with tf.Session(): - model = TensorFlowModel( - images, - logits, - bounds=bounds, - channel_axis=channel_axis, - preprocessing=preprocessing) - - yield model - - -# binarized2_bn_model is also needed as a function, so we create the -# fixture separately -@pytest.fixture(name='binarized2_bn_model') -def binarized2_bn_model_fixutre(): - cm_model = contextmanager(binarized2_bn_model) - with cm_model() as model: - yield model - - -@pytest.fixture -def binarized2_bn_adversarial(bn_criterion, bn_image, binarized2_bn_label): - criterion = bn_criterion - image = bn_image - label = binarized2_bn_label - - cm_model = contextmanager(binarized2_bn_model) - with cm_model() as model: - yield Adversarial(model, criterion, image, label) - - -@pytest.fixture -def binarized2_bn_label(bn_image): - image = bn_image - image = binarize(image, (0, 1), included_in='lower') - mean = np.mean(image, axis=(0, 1)) - assert mean.shape == (10,) - label = np.argmax(mean) - return label -import numpy as np - -from foolbox import Adversarial -from foolbox.distances import MSE -import foolbox - -import sys -if sys.version_info > (3, 2): - from unittest.mock import Mock -else: - # for Python2.7 compatibility - from mock import Mock - - -# def test_adversarial(bn_model, bn_criterion, bn_image, bn_label): -def test_adversarial(model, criterion, image, label): - # model = bn_model - # criterion = bn_criterion - # image = bn_image - # label = bn_label - - adversarial = Adversarial(model, criterion, image, label, verbose=False) - - assert not adversarial.predictions(image)[1] - - assert adversarial.image is None - assert adversarial.output is None - assert adversarial.adversarial_class is None - assert adversarial.distance == MSE(value=np.inf) - assert adversarial.original_image is image - assert adversarial.original_class == label - assert adversarial.target_class() is None - assert adversarial.normalized_distance(image) == MSE(value=0) - assert adversarial.normalized_distance(image).value == 0 - - np.random.seed(22) - perturbation = np.random.uniform(-1, 1, size=image.shape) - perturbed = np.clip(image + perturbation, 0, 255).astype(np.float32) - d1 = adversarial.normalized_distance(perturbed).value - assert d1 != 0 - - assert adversarial.original_image.dtype == np.float32 - - adversarial.set_distance_dtype(np.float32) - assert adversarial.normalized_distance(perturbed).value == d1 - - adversarial.set_distance_dtype(np.float64) - assert adversarial.normalized_distance(perturbed).value != d1 - - adversarial.reset_distance_dtype() - assert adversarial.normalized_distance(perturbed).value == d1 - - true_label = label - label = 22 # wrong label - adversarial = Adversarial(model, criterion, image, label, verbose=True) - - assert adversarial.image is not None - assert adversarial.output is not None - assert adversarial.adversarial_class == true_label - assert adversarial.adversarial_class == np.argmax(adversarial.output) - assert adversarial.distance == MSE(value=0) - assert adversarial.original_image is image - assert adversarial.original_class == label - assert adversarial.target_class() is None - assert adversarial.normalized_distance(image) == MSE(value=0) - assert adversarial.normalized_distance(image).value == 0 - - predictions, is_adversarial = adversarial.predictions(image) - first_predictions = predictions - assert is_adversarial - - predictions, is_adversarial, _, _ = adversarial.predictions(image, return_details=True) # noqa: E501 - first_predictions = predictions - assert is_adversarial - - predictions, is_adversarial = adversarial.batch_predictions(image[np.newaxis]) # noqa: E501 - assert (predictions == first_predictions[np.newaxis]).all() - assert np.all(is_adversarial == np.array([True])) - - predictions, is_adversarial, index = adversarial.batch_predictions(image[np.newaxis], greedy=True) # noqa: E501 - assert (predictions == first_predictions[np.newaxis]).all() - assert is_adversarial - assert index == 0 - - predictions, is_adversarial, index, _, _ = adversarial.batch_predictions(image[np.newaxis], greedy=True, return_details=True) # noqa: E501 - assert (predictions == first_predictions[np.newaxis]).all() - assert is_adversarial - assert index == 0 - - predictions, gradient, is_adversarial = adversarial.predictions_and_gradient(image, label) # noqa: E501 - assert (predictions == first_predictions).all() - assert gradient.shape == image.shape - assert is_adversarial - - predictions, gradient, is_adversarial, _, _ = adversarial.predictions_and_gradient(image, label, return_details=True) # noqa: E501 - assert (predictions == first_predictions).all() - assert gradient.shape == image.shape - assert is_adversarial - - predictions, gradient, is_adversarial = adversarial.predictions_and_gradient() # noqa: E501 - assert (predictions == first_predictions).all() - assert gradient.shape == image.shape - assert is_adversarial - - gradient_pre = np.ones_like(predictions) * 0.3 - gradient = adversarial.backward(gradient_pre, image) - gradient2 = adversarial.backward(gradient_pre) - assert gradient.shape == image.shape - assert (gradient == gradient2).all() - - gradient = adversarial.gradient() - assert gradient.shape == image.shape - assert is_adversarial - - assert adversarial.num_classes() == 1000 - - assert adversarial.has_gradient() - - assert adversarial.channel_axis(batch=True) == 3 - assert adversarial.channel_axis(batch=False) == 2 - - # without adversarials - criterion.is_adversarial = Mock(return_value=False) - adversarial = Adversarial(model, criterion, image, label) - predictions, is_adversarial, index = adversarial.batch_predictions(image[np.newaxis], greedy=True) # noqa: E501 - assert (predictions == first_predictions[np.newaxis]).all() - assert not is_adversarial - assert index is None - - # without gradient - del model.predictions_and_gradient - - assert not adversarial.has_gradient() - - -def test_inplace(bn_model, bn_adversarial, bn_label): - class TestAttack(foolbox.attacks.Attack): - @foolbox.attacks.base.call_decorator - def __call__(self, input_or_adv, label, unpack): - a = input_or_adv - x = np.zeros_like(a.original_image) - a.predictions(x) - x[:] = a.original_image - - assert bn_adversarial.image is None - assert np.argmax(bn_model.predictions(bn_adversarial.original_image)) == bn_label # noqa: E501 - attack = TestAttack() - attack(bn_adversarial) - assert bn_adversarial.image is not None - assert bn_adversarial.distance.value > 0 - assert np.argmax(bn_model.predictions(bn_adversarial.original_image)) == bn_label # noqa: E501 - assert np.argmax(bn_model.predictions(bn_adversarial.image)) != bn_label - assert not (bn_adversarial.image == bn_adversarial.original_image).all() - assert (bn_adversarial.distance.reference == bn_adversarial.original_image).all() # noqa: E501 - assert (bn_adversarial.distance.other == bn_adversarial.image).all() -from foolbox import Adversarial -from foolbox import attacks -import pytest -import sys -if sys.version_info > (3, 2): - from unittest.mock import Mock -else: - # for Python2.7 compatibility - from mock import Mock - - -# def test_abstract_attack(): -# with pytest.raises(TypeError): -# attacks.Attack() - - -def test_base_init(): - assert attacks.FGSM() is not None - assert attacks.FGSM(Mock()) is not None - assert attacks.FGSM(None, None) is not None - assert attacks.FGSM(Mock(), Mock()) is not None - - -def test_aliases(): - assert attacks.GradientSignAttack == attacks.FGSM - - -def test_base_attack(model, criterion, image, label): - attack = attacks.FGSM(model, criterion) - assert attack.name() == 'GradientSignAttack' - - with pytest.raises(ValueError): - attack(image) - - with pytest.raises(TypeError): - attack(label=label) - - wrong_label = label + 1 - - adv = attack(image, label=label) - assert adv is None - adv = attack(image, label=wrong_label) - assert adv.shape == image.shape - adv = attack(image, label=wrong_label, unpack=False) - assert adv.image.shape == image.shape - - adv = Adversarial(model, criterion, image, wrong_label) - adv = attack(adv) - assert adv.shape == image.shape - - adv = Adversarial(model, criterion, image, wrong_label) - with pytest.raises(ValueError): - attack(adv, label=wrong_label) - - attack = attacks.FGSM() - with pytest.raises(ValueError): - attack(image, label=wrong_label) - - -def test_early_stopping(bn_model, bn_criterion, bn_image, bn_label): - attack = attacks.FGSM() - - model = bn_model - criterion = bn_criterion - image = bn_image - label = bn_label - - wrong_label = label + 1 - adv = Adversarial(model, criterion, image, wrong_label) - attack(adv) - assert adv.distance.value == 0 - assert not adv.reached_threshold() # because no threshold specified - - adv = Adversarial(model, criterion, image, wrong_label, threshold=1e10) - attack(adv) - assert adv.distance.value == 0 - assert adv.reached_threshold() - - adv = Adversarial(model, criterion, image, label) - attack(adv) - assert adv.distance.value > 0 - assert not adv.reached_threshold() # because no threshold specified - - c = adv._total_prediction_calls - d = adv.distance.value - large_d = 10 * d - small_d = d / 2 - - adv = Adversarial(model, criterion, image, label, - threshold=adv._distance(value=large_d)) - attack(adv) - assert 0 < adv.distance.value <= large_d - assert adv.reached_threshold() - assert adv._total_prediction_calls < c - - adv = Adversarial(model, criterion, image, label, - threshold=large_d) - attack(adv) - assert 0 < adv.distance.value <= large_d - assert adv.reached_threshold() - assert adv._total_prediction_calls < c - - adv = Adversarial(model, criterion, image, label, - threshold=small_d) - attack(adv) - assert small_d < adv.distance.value <= large_d - assert not adv.reached_threshold() - assert adv._total_prediction_calls == c - assert adv.distance.value == d - - adv = Adversarial(model, criterion, image, label, - threshold=adv._distance(value=large_d)) - attack(adv) - assert adv.reached_threshold() - c = adv._total_prediction_calls - attack(adv) - assert adv._total_prediction_calls == c # no new calls -import numpy as np - -from foolbox.attacks import ADefAttack as Attack - - -def test_attack_pytorch(bn_adversarial_pytorch): - adv = bn_adversarial_pytorch - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_targeted_attack_pytorch(bn_targeted_adversarial_pytorch): - adv = bn_targeted_adversarial_pytorch - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - - -def test_targeted_attack(bn_targeted_adversarial): - adv = bn_targeted_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_impossible(bn_impossible): - adv = bn_impossible - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np - -from foolbox.attacks import ApproximateLBFGSAttack as Attack - - -def test_name(): - attack = Attack() - assert 'Approx' in attack.name() - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, maxiter=1, epsilon=1000) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv, maxiter=1, epsilon=1000) - assert adv.image is not None - assert adv.distance.value < np.inf - - -# def test_targeted_attack(bn_targeted_adversarial): -# adv = bn_targeted_adversarial -# attack = Attack() -# attack(adv) -# assert adv.image is not None -# assert adv.distance.value < np.inf -import numpy as np - -from foolbox.attacks import BoundaryAttackPlusPlus -from foolbox.attacks import BlendedUniformNoiseAttack -from foolbox.distances import Linf - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = BoundaryAttackPlusPlus() - attack(adv, iterations=20, verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_linf(bn_adversarial): - adv = bn_adversarial - attack = BoundaryAttackPlusPlus(distance=Linf) - attack(adv, iterations=20, verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_non_verbose(bn_adversarial): - adv = bn_adversarial - attack = BoundaryAttackPlusPlus() - attack(adv, iterations=20, verbose=False) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_continue(bn_adversarial): - adv = bn_adversarial - attack1 = BlendedUniformNoiseAttack() - attack1(adv) - d1 = adv.distance.value - attack2 = BoundaryAttackPlusPlus() - attack2(adv, iterations=20, verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - assert adv.distance.value < d1 - - -def test_attack_targeted(bn_adversarial): - adv = bn_adversarial - attack = BoundaryAttackPlusPlus() - o = adv.original_image - np.random.seed(2) - starting_point = np.random.uniform( - 0, 1, size=o.shape).astype(o.dtype) - attack( - adv, - iterations=21, - starting_point=starting_point, - log_every_n_steps=2, - gamma=0.01, - stepsize_search='geometric_progression', - batch_size=128, - initial_num_evals=200, - max_num_evals=20000, - verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_linf_targeted(bn_adversarial): - adv = bn_adversarial - attack = BoundaryAttackPlusPlus(distance=Linf) - o = adv.original_image - np.random.seed(2) - starting_point = np.random.uniform( - 0, 1, size=o.shape).astype(o.dtype) - attack( - adv, - iterations=21, - starting_point=starting_point, - log_every_n_steps=2, - gamma=0.01, - stepsize_search='grid_search', - batch_size=128, - initial_num_evals=200, - max_num_evals=20000, - verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = BoundaryAttackPlusPlus() - attack(adv, iterations=200, verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_impossible(bn_impossible): - adv = bn_impossible - attack = BoundaryAttackPlusPlus() - attack(adv, iterations=200, verbose=True) - assert adv.image is None - assert adv.distance.value == np.inf -import pytest -import numpy as np - -from foolbox.attacks import GradientAttack -from foolbox.attacks import BinarizationRefinementAttack - - -def test_attack(binarized_bn_adversarial): - adv = binarized_bn_adversarial - - attack = GradientAttack() - attack(adv) - v1 = adv.distance.value - - attack = BinarizationRefinementAttack() - attack(adv) - v2 = adv.distance.value - - assert v2 < v1 < np.inf - - o = adv.original_image - x = adv.image - d = x[x != o] - np.testing.assert_allclose(d, 0.5) - - -def test_attack_fail(bn_adversarial): - adv = bn_adversarial - - attack = GradientAttack() - attack(adv) - assert adv is not None - - attack = BinarizationRefinementAttack() - with pytest.raises(AssertionError) as e: - attack(adv) - assert 'thresholding does not match' in str(e.value) - - -def test_attack_noinit(binarized_bn_adversarial): - adv = binarized_bn_adversarial - assert adv.image is None - - attack = BinarizationRefinementAttack() - attack(adv) - assert adv.image is None - - -def test_attack_sp(binarized_bn_adversarial): - adv = binarized_bn_adversarial - - attack = GradientAttack() - attack(adv) - v1 = adv.distance.value - - attack = BinarizationRefinementAttack(adv._model) - adv = attack(adv.original_image, adv.original_class, - starting_point=adv.image, unpack=False) - v2 = adv.distance.value - - assert v2 < v1 < np.inf - - o = adv.original_image - x = adv.image - d = x[x != o] - np.testing.assert_allclose(d, 0.5) - - -def test_attack2(binarized2_bn_adversarial): - adv = binarized2_bn_adversarial - - attack = GradientAttack() - attack(adv) - v1 = adv.distance.value - - attack = BinarizationRefinementAttack() - attack(adv, included_in='lower') - v2 = adv.distance.value - - assert v2 < v1 < np.inf - - o = adv.original_image - x = adv.image - d = x[x != o] - np.testing.assert_allclose(d, 0.5) - - -def test_attack_wrong_arg(binarized_bn_adversarial): - adv = binarized_bn_adversarial - - attack = GradientAttack() - attack(adv) - - attack = BinarizationRefinementAttack() - with pytest.raises(ValueError): - attack(adv, included_in='blabla') -import numpy as np - -from foolbox.attacks import GaussianBlurAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - # BlurAttack will fail for brightness model - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - # BlurAttack will fail for brightness model - - -def test_attack_trivial(bn_trivial): - adv = bn_trivial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf -import numpy as np -import pytest - -from foolbox.attacks import BoundaryAttack -from foolbox.attacks import DeepFoolAttack -from foolbox.attacks import BlendedUniformNoiseAttack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = BoundaryAttack() - attack(adv, iterations=200, verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_non_verbose(bn_adversarial): - adv = bn_adversarial - attack = BoundaryAttack() - attack(adv, iterations=200, verbose=False) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_continue(bn_adversarial): - adv = bn_adversarial - attack1 = BlendedUniformNoiseAttack() - attack1(adv) - d1 = adv.distance.value - attack2 = BoundaryAttack() - attack2(adv, iterations=200, verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - assert adv.distance.value < d1 - - -def test_attack_parameters(bn_adversarial): - adv = bn_adversarial - attack = BoundaryAttack() - o = adv.original_image - np.random.seed(2) - starting_point = np.random.uniform( - 0, 1, size=o.shape).astype(o.dtype) - attack( - adv, - iterations=200, - starting_point=starting_point, - log_every_n_steps=2, - tune_batch_size=False, - threaded_rnd=False, - threaded_gen=False, - alternative_generator=True, - verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_parameters2(bn_adversarial): - adv = bn_adversarial - attack = BoundaryAttack() - attack( - adv, - iterations=200, - alternative_generator=True, - verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -@pytest.mark.filterwarnings("ignore:Batch size tuning after so few steps") -def test_attack_parameters3(bn_adversarial): - adv = bn_adversarial - attack = BoundaryAttack() - o = adv.original_image - np.random.seed(2) - starting_point = np.random.uniform( - 0, 1, size=o.shape).astype(o.dtype) - attack( - adv, - iterations=200, - starting_point=starting_point, - log_every_n_steps=2, - tune_batch_size=30, - threaded_rnd=False, - threaded_gen=False, - verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = BoundaryAttack() - attack(adv, iterations=200, verbose=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_impossible(bn_impossible): - adv = bn_impossible - attack = BoundaryAttack() - attack(adv, iterations=200, verbose=True) - assert adv.image is None - assert adv.distance.value == np.inf - - -@pytest.mark.filterwarnings("ignore:Internal inconsistency, probably caused") -def test_attack_convergence(bn_adversarial): - adv = bn_adversarial - attack1 = DeepFoolAttack() - attack1(adv) - attack2 = BoundaryAttack() - attack2(adv, iterations=5000, verbose=True) - # should converge - assert adv.image is not None - assert adv.distance.value < np.inf -import numpy as np - -from foolbox.attacks import CarliniWagnerL2Attack as Attack - - -def test_untargeted_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, max_iterations=100) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_targeted_attack(bn_targeted_adversarial): - adv = bn_targeted_adversarial - attack = Attack() - attack(adv, max_iterations=100, binary_search_steps=20) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_impossible(bn_impossible): - adv = bn_impossible - attack = Attack() - attack(adv, max_iterations=100) - assert adv.image is None - assert adv.distance.value == np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv, max_iterations=100) - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np - -from foolbox.attacks import ContrastReductionAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf -import numpy as np - -from foolbox.attacks import DecoupledDirectionNormL2Attack as Attack - - -def test_untargeted_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_targeted_attack(bn_targeted_adversarial): - adv = bn_targeted_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_impossible(bn_impossible): - adv = bn_impossible - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf -import pytest -import numpy as np - -from foolbox.attacks import DeepFoolAttack -from foolbox.attacks import DeepFoolL2Attack -from foolbox.attacks import DeepFoolLinfinityAttack - -Attacks = [ - DeepFoolAttack, - DeepFoolL2Attack, - DeepFoolLinfinityAttack, -] - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack(Attack, bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack_gl(Attack, gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_targeted_attack(Attack, bn_targeted_adversarial): - adv = bn_targeted_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_subsample(Attack, bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, subsample=5) - assert adv.image is not None - assert adv.distance.value < np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack_impossible(Attack, bn_impossible): - adv = bn_impossible - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - - -def test_deepfool_auto_linf(bn_adversarial_linf): - adv = bn_adversarial_linf - attack = DeepFoolAttack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_deepfool_auto_mae(bn_adversarial_mae): - adv = bn_adversarial_mae - attack = DeepFoolAttack() - with pytest.raises(NotImplementedError): - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - - -def test_deepfool_auto_p0(bn_adversarial): - adv = bn_adversarial - attack = DeepFoolAttack() - with pytest.raises(ValueError): - attack(adv, p=0) - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np - -from foolbox.attacks import GradientAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_eps(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, epsilons=np.linspace(0., 1., 100)[1:]) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - - -def test_attack_eg(eg_bn_adversarial): - adv = eg_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf -import numpy as np - -from foolbox.attacks import GradientSignAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_eps(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, epsilons=np.linspace(0., 1., 100)[1:]) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np - -from foolbox.attacks import IterativeGradientAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, epsilons=10, steps=5) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv, epsilons=10, steps=5) - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np - -from foolbox.attacks import IterativeGradientSignAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, epsilons=10, steps=5) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv, epsilons=10, steps=5) - assert adv.image is None - assert adv.distance.value == np.inf -import pytest -import numpy as np - -from foolbox.attacks import LinfinityBasicIterativeAttack -from foolbox.attacks import L1BasicIterativeAttack -from foolbox.attacks import L2BasicIterativeAttack -from foolbox.attacks import ProjectedGradientDescentAttack -from foolbox.attacks import RandomStartProjectedGradientDescentAttack -from foolbox.attacks import MomentumIterativeAttack - -Attacks = [ - LinfinityBasicIterativeAttack, - L1BasicIterativeAttack, - L2BasicIterativeAttack, - ProjectedGradientDescentAttack, - RandomStartProjectedGradientDescentAttack, - MomentumIterativeAttack, -] - - -def test_attack_no_binary_search_and_no_return_early(bn_adversarial_linf): - adv = bn_adversarial_linf - attack = LinfinityBasicIterativeAttack() - attack(adv, binary_search=False, return_early=False) - assert adv.image is not None - assert adv.distance.value < np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack_linf(Attack, bn_adversarial_linf): - adv = bn_adversarial_linf - attack = Attack() - attack(adv, binary_search=10) - assert adv.image is not None - assert adv.distance.value < np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack_l2(Attack, bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack_l1(Attack, bn_adversarial_mae): - adv = bn_adversarial_mae - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_targeted_attack(Attack, bn_targeted_adversarial): - adv = bn_targeted_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack_gl(Attack, gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack_impossible(Attack, bn_impossible): - adv = bn_impossible - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np - -from foolbox.attacks import LBFGSAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, num_random_targets=2) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_with_init_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, num_random_targets=0) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_targeted_attack(bn_targeted_adversarial): - adv = bn_targeted_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - - -def test_attack_pytorch(bn_adversarial_pytorch): - adv = bn_adversarial_pytorch - attack = Attack() - attack(adv, num_random_targets=2) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_targeted_attack_pytorch(bn_targeted_adversarial_pytorch): - adv = bn_targeted_adversarial_pytorch - attack = Attack() - attack(adv, num_random_targets=2) - assert adv.image is not None - assert adv.distance.value < np.inf -import numpy as np - -from foolbox import set_seeds -from foolbox.attacks import LocalSearchAttack as Attack - - -def test_attack(bn_adversarial): - set_seeds(22) - adv = bn_adversarial - attack = Attack() - attack(adv, d=1, t=10) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - set_seeds(22) - adv = gl_bn_adversarial - attack = Attack() - attack(adv, d=1, t=10) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_targeted_attack(bn_targeted_adversarial): - set_seeds(22) - adv = bn_targeted_adversarial - attack = Attack() - attack(adv, d=1) - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np - -from foolbox.attacks import NewtonFoolAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf - - -def test_targeted_attack(bn_targeted_adversarial): - adv = bn_targeted_adversarial - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf -import pytest -import numpy as np - -from foolbox.attacks import AdditiveUniformNoiseAttack -from foolbox.attacks import AdditiveGaussianNoiseAttack -from foolbox.attacks import SaltAndPepperNoiseAttack -from foolbox.attacks import BlendedUniformNoiseAttack - -Attacks = [ - AdditiveUniformNoiseAttack, - AdditiveGaussianNoiseAttack, - SaltAndPepperNoiseAttack, - BlendedUniformNoiseAttack, -] - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack(Attack, bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack_gl(Attack, gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -@pytest.mark.parametrize('Attack', Attacks) -def test_attack_impossible(Attack, bn_impossible): - adv = bn_impossible - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np - -from foolbox.attacks import PointwiseAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_startingpoint(bn_adversarial): - adv = bn_adversarial - attack = Attack() - o = adv.original_image - np.random.seed(2) - starting_point = np.random.uniform( - 0, 1, size=o.shape).astype(o.dtype) - attack(adv, starting_point=starting_point) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_continue(bn_adversarial): - adv = bn_adversarial - attack = Attack() - o = adv.original_image - np.random.seed(2) - starting_point = np.random.uniform( - 0, 1, size=o.shape).astype(o.dtype) - adv.predictions(starting_point) - assert adv.image is not None - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_impossible(bn_impossible): - adv = bn_impossible - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np -import pytest - -from foolbox.attacks import PrecomputedImagesAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - - image = adv.original_image - input_images = image[np.newaxis] - output_images = np.zeros_like(input_images) - - attack = Attack(input_images, output_images) - - attack(adv) - - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_unknown_image(bn_adversarial): - adv = bn_adversarial - - image = adv.original_image - input_images = np.zeros_like(image[np.newaxis]) - output_images = np.zeros_like(input_images) - - attack = Attack(input_images, output_images) - - with pytest.raises(ValueError): - attack(adv) - - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np - -from foolbox.attacks import SaliencyMapAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_random_targets(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, num_random_targets=2) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_targeted_attack(bn_targeted_adversarial): - adv = bn_targeted_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_targeted_attack_slow(bn_targeted_adversarial): - adv = bn_targeted_adversarial - attack = Attack() - attack(adv, fast=False) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_targeted_attack_max(bn_targeted_adversarial): - adv = bn_targeted_adversarial - attack = Attack() - attack(adv, max_perturbations_per_pixel=1) - assert adv.image is not None - assert adv.distance.value < np.inf -import numpy as np - -from foolbox.attacks import SinglePixelAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf -import numpy as np - -from foolbox.attacks import SLSQPAttack as Attack - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_impossible(bn_impossible): - adv = bn_impossible - attack = Attack() - attack(adv) - assert adv.image is None - assert adv.distance.value == np.inf -import numpy as np - -from foolbox.attacks import SpatialAttack as Attack - - -def test_attack_pytorch(bn_adversarial_pytorch): - adv = bn_adversarial_pytorch - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_rnd(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, random_sampling=True) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_norot(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, do_rotations=False) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_notrans(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, do_translations=False) - assert adv.image is not None - assert adv.distance.value < np.inf - - -def test_attack_notrans_norot(bn_adversarial): - adv = bn_adversarial - attack = Attack() - attack(adv, do_translations=False, do_rotations=False) - assert adv.image is None - assert adv.distance.value == np.inf - - -def test_attack_gl(gl_bn_adversarial): - adv = gl_bn_adversarial - attack = Attack() - attack(adv) - assert adv.image is not None - assert adv.distance.value < np.inf -import pytest -import numpy as np -from foolbox import criteria - - -def test_abstract_criterion(): - with pytest.raises(TypeError): - criteria.Criterion() - - -def test_base_criterion(): - - class TestCriterion(criteria.Criterion): - - def is_adversarial(self, predictions, label): - return False - - criterion = TestCriterion() - assert criterion.name() == 'TestCriterion' - - -def test_combined_criteria(): - c1 = criteria.Misclassification() - c2 = criteria.OriginalClassProbability(0.2) - c3 = c1 & c2 - - probabilities = np.array([0.09, 0.11, 0.39, 0.41]) - predictions = np.log(probabilities) - - for i in range(len(predictions)): - b1 = c1.is_adversarial(predictions, i) - b2 = c2.is_adversarial(predictions, i) - b3 = c3.is_adversarial(predictions, i) - - assert (b1 and b2) == b3 - - assert c1.name() == 'Top1Misclassification' - assert c2.name() == 'OriginalClassProbability-0.2000' - assert c3.name() == c2.name() + '__' + c1.name() - - -def test_misclassfication(): - c = criteria.Misclassification() - predictions = np.array([0.1, 0.5, 0.7, 0.4]) - assert c.is_adversarial(predictions, 0) - assert c.is_adversarial(predictions, 1) - assert not c.is_adversarial(predictions, 2) - assert c.is_adversarial(predictions, 3) - - -def test_misclassification_names(): - c = criteria.Misclassification() - c1 = criteria.TopKMisclassification(k=1) - c5 = criteria.TopKMisclassification(k=5) - assert c.name() == c1.name() - assert c1.name() != c5.name() - c22 = criteria.TopKMisclassification(k=22) - assert '22' in c22.name() - - -def test_top_k_misclassfication(): - predictions = np.array([0.1, 0.5, 0.7, 0.4]) - - c = criteria.TopKMisclassification(k=1) - assert c.is_adversarial(predictions, 0) - assert c.is_adversarial(predictions, 1) - assert not c.is_adversarial(predictions, 2) - assert c.is_adversarial(predictions, 3) - - c = criteria.TopKMisclassification(k=2) - assert c.is_adversarial(predictions, 0) - assert not c.is_adversarial(predictions, 1) - assert not c.is_adversarial(predictions, 2) - assert c.is_adversarial(predictions, 3) - - -def test_target_class(): - predictions = np.array([0.1, 0.5, 0.7, 0.4]) - - c = criteria.TargetClass(3) - for i in range(len(predictions)): - assert not c.is_adversarial(predictions, i) - - assert c.name() == 'TargetClass-3' - - c = criteria.TargetClass(2) - for i in range(len(predictions)): - assert c.is_adversarial(predictions, i) - - assert c.name() == 'TargetClass-2' - - -def test_original_class_probability(): - predictions = np.array([0.1, 0.5, 0.7, 10., 0.4]) - - c = criteria.OriginalClassProbability(0.1) - assert c.is_adversarial(predictions, 0) - assert c.is_adversarial(predictions, 1) - assert c.is_adversarial(predictions, 2) - assert not c.is_adversarial(predictions, 3) - assert c.is_adversarial(predictions, 4) - - assert '0.1' in c.name() - - -def test_target_class_probability(): - predictions = np.array([0.1, 0.5, 0.7, 10., 0.4]) - - for t in [0, 1, 2, 4]: - c = criteria.TargetClassProbability(0, p=0.9) - for i in range(len(predictions)): - assert not c.is_adversarial(predictions, i) - - c = criteria.TargetClassProbability(3, p=0.9) - for i in range(len(predictions)): - assert c.is_adversarial(predictions, i) - - assert '3' in c.name() - assert '0.9' in c.name() - - -def test_confident_misclassification(): - predictions = np.array([0.1, 0.5, 0.7, 10., 0.4]) # 99% - - for p in [0.1, 0.5, 0.9]: - c = criteria.ConfidentMisclassification(p=p) - for i in [0, 1, 2, 4]: - assert c.is_adversarial(predictions, i) - assert not c.is_adversarial(predictions, 3) - - predictions = np.array([0.1, 0.5, 0.7, 10., 10.1]) # 47% and 52% - - for p in [0.1, 0.5, 0.9]: - c = criteria.ConfidentMisclassification(p=p) - for i in range(4): - expect = i < 4 and p <= 0.5 - assert c.is_adversarial(predictions, i) == expect - - c = criteria.ConfidentMisclassification(p=0.9) - assert '0.9' in c.name() -import pytest -import numpy as np -from foolbox import distances -from pytest import approx - - -def test_abstract_distance(): - with pytest.raises(TypeError): - distances.Distance() - - -def test_base_distance(): - - class TestDistance(distances.Distance): - - def _calculate(self): - return 22, 2 - - distance = TestDistance(None, None, bounds=(0, 1)) - assert distance.name() == 'TestDistance' - assert distance.value == 22 - assert distance.gradient == 2 - assert '2.2' in str(distance) - assert 'TestDistance' in str(distance) - assert distance == distance - assert not distance < distance - assert not distance > distance - assert distance <= distance - assert distance >= distance - - with pytest.raises(TypeError): - distance < 3 - - with pytest.raises(TypeError): - distance == 3 - - -def test_mse(): - assert distances.MSE == distances.MeanSquaredDistance - - -def test_mae(): - assert distances.MAE == distances.MeanAbsoluteDistance - - -def test_linf(): - assert distances.Linf == distances.Linfinity - - -def test_mean_squared_distance(): - d = distances.MeanSquaredDistance( - np.array([0, .5]), - np.array([.5, .5]), - bounds=(0, 1)) - assert d.value == 1. / 8. - assert (d.gradient == np.array([.5, 0])).all() - - -def test_mean_absolute_distance(): - d = distances.MeanAbsoluteDistance( - np.array([0, .5]), - np.array([.7, .5]), - bounds=(0, 1)) - assert d.value == approx(0.35) - assert (d.gradient == np.array([0.5, 0])).all() - - -def test_linfinity(): - d = distances.Linfinity( - np.array([0, .5]), - np.array([.7, .5]), - bounds=(0, 1)) - assert d.value == approx(.7) - with pytest.raises(NotImplementedError): - d.gradient - - -def test_l0(): - d = distances.L0( - np.array([0, .5]), - np.array([.7, .5]), - bounds=(0, 1)) - assert d.value == approx(1.) - with pytest.raises(NotImplementedError): - d.gradient - - -@pytest.mark.parametrize('Distance', [ - distances.MeanSquaredDistance, - distances.MeanAbsoluteDistance, - distances.Linfinity, - distances.L0, -]) -def test_str_repr(Distance): - """Tests that str and repr contain the value - and that str does not fail when initialized - with a value rather than calculated.""" - reference = np.zeros((10, 10)) - other = np.ones((10, 10)) - d = Distance(reference, other, bounds=(0, 1)) - assert isinstance(str(d), str) - if 'L0' in str(d): - assert '100' in str(d) - assert '100' in repr(d) - else: - assert '1.00e+' in str(d) - assert '1.00e+' in repr(d) -from foolbox.zoo import fetch_weights -from foolbox.zoo.common import path_exists, home_directory_path, sha256_hash -from foolbox.zoo.weights_fetcher import FOLDER - -import os -import pytest -import shutil - -import responses -import io -import zipfile - - -@responses.activate -def test_fetch_weights_unzipped(): - weights_uri = 'http://localhost:8080/weights.zip' - raw_body = _random_body(zipped=False) - - # mock server - responses.add(responses.GET, weights_uri, - body=raw_body, status=200, stream=True) - - expected_path = _expected_path(weights_uri) - - if path_exists(expected_path): - shutil.rmtree(expected_path) # make sure path does not exist already - - file_path = fetch_weights(weights_uri) - - exists_locally = path_exists(expected_path) - assert exists_locally - assert expected_path in file_path - - -@responses.activate -def test_fetch_weights_zipped(): - weights_uri = 'http://localhost:8080/weights.zip' - - # mock server - raw_body = _random_body(zipped=True) - responses.add(responses.GET, weights_uri, - body=raw_body, status=200, stream=True, - content_type='application/zip', - headers={'Accept-Encoding': 'gzip, deflate'}) - - expected_path = _expected_path(weights_uri) - - if path_exists(expected_path): - shutil.rmtree(expected_path) # make sure path does not exist already - - file_path = fetch_weights(weights_uri, unzip=True) - - exists_locally = path_exists(expected_path) - assert exists_locally - assert expected_path in file_path - - -@responses.activate -def test_fetch_weights_returns_404(): - weights_uri = 'http://down:8080/weights.zip' - - # mock server - responses.add(responses.GET, weights_uri, status=404) - - expected_path = _expected_path(weights_uri) - - if path_exists(expected_path): - shutil.rmtree(expected_path) # make sure path does not exist already - - with pytest.raises(RuntimeError): - fetch_weights(weights_uri, unzip=False) - - -def test_no_uri_given(): - assert fetch_weights(None) is None - - -def _random_body(zipped=False): - if zipped: - data = io.BytesIO() - with zipfile.ZipFile(data, mode='w') as z: - z.writestr('test.txt', 'no real weights in here :)') - data.seek(0) - return data.getvalue() - else: - raw_body = os.urandom(1024) - return raw_body - - -def _expected_path(weights_uri): - hash_digest = sha256_hash(weights_uri) - local_path = home_directory_path(FOLDER, hash_digest) - return local_path -from foolbox.zoo import git_cloner -import os -import hashlib -import pytest -from foolbox.zoo.git_cloner import GitCloneError - - -def test_git_clone(): - # given - git_uri = "https://github.com/bethgelab/convex_adversarial.git" - expected_path = _expected_path(git_uri) - - # when - path = git_cloner.clone(git_uri) - - # then - assert path == expected_path - - -def test_wrong_git_uri(): - git_uri = "git@github.com:bethgelab/non-existing-repo.git" - with pytest.raises(GitCloneError): - git_cloner.clone(git_uri) - - -def _expected_path(git_uri): - home = os.path.expanduser('~') - m = hashlib.sha256() - m.update(git_uri.encode()) - hash = m.hexdigest() - expected_path = os.path.join(home, '.foolbox_zoo', hash) - return expected_path -import pytest -import numpy as np - -from foolbox.models.base import _create_preprocessing_fn - -params = [ - (0, 1), - (0, 255), - (128, 1), - (128, 255), - (0., 1.), - (0., 255.), - (128., 1.), - (128., 255.), - (np.array([1., 2., 3.], dtype=np.float64), - np.array([1., 2., 3.], dtype=np.float64)), -] - - -@pytest.mark.parametrize('params', params) -def test_preprocessing(params, image): - image_copy = image.copy() - preprocessing = _create_preprocessing_fn(params) - preprocessed, backward = preprocessing(image) - assert image.shape == preprocessed.shape - assert image.dtype == preprocessed.dtype - assert np.allclose((image - params[0]) / params[1], preprocessed) - assert np.all(image == image_copy) - assert callable(backward) - dmdp = image - dmdx = backward(dmdp) - assert np.all(image == image_copy) - assert image.shape == dmdx.shape - assert image.dtype == dmdx.dtype -import numpy as np - -from foolbox import set_seeds -from foolbox.models import ModelWrapper -from foolbox.models import DifferentiableModelWrapper -from foolbox.models import CompositeModel - - -def test_context_manager(gl_bn_model): - assert isinstance(gl_bn_model, ModelWrapper) - with gl_bn_model as model: - assert model is not None - assert isinstance(model, ModelWrapper) - - -def test_wrapping(gl_bn_model, bn_image): - assert isinstance(gl_bn_model, ModelWrapper) - assert gl_bn_model.num_classes() == 10 - assert np.all( - gl_bn_model.predictions(bn_image) == - gl_bn_model.batch_predictions(bn_image[np.newaxis])[0]) - - -def test_diff_wrapper(bn_model, bn_image, bn_label): - x = bn_image - la = bn_label - xs = x[np.newaxis] - model1 = bn_model - model2 = DifferentiableModelWrapper(model1) - assert model1.num_classes() == model2.num_classes() - assert np.all(model1.predictions(x) == model2.predictions(x)) - assert np.all(model1.batch_predictions(xs) == model2.batch_predictions(xs)) - assert np.all(model1.gradient(x, la) == model2.gradient(x, la)) - assert np.all(model1.predictions_and_gradient(x, la)[0] == - model2.predictions_and_gradient(x, la)[0]) - assert np.all(model1.predictions_and_gradient(x, la)[1] == - model2.predictions_and_gradient(x, la)[1]) - g = model1.predictions(x) - assert np.all(model1.backward(g, x) == model2.backward(g, x)) - - -def test_composite_model(gl_bn_model, bn_model, bn_image, bn_label): - num_classes = 10 - test_grad = np.random.rand(num_classes).astype(np.float32) - model = CompositeModel(gl_bn_model, bn_model) - with model: - assert gl_bn_model.num_classes() == model.num_classes() - assert np.all( - gl_bn_model.predictions(bn_image) == - model.predictions(bn_image)) - assert np.all( - bn_model.gradient(bn_image, bn_label) == - model.gradient(bn_image, bn_label)) - assert np.all( - bn_model.backward(test_grad, bn_image) == - model.backward(test_grad, bn_image)) - assert np.all( - gl_bn_model.predictions(bn_image) == - model.predictions_and_gradient(bn_image, bn_label)[0]) - assert np.all( - bn_model.predictions_and_gradient(bn_image, bn_label)[1] == - model.predictions_and_gradient(bn_image, bn_label)[1]) - - -def test_estimate_gradient_wrapper(eg_bn_adversarial, bn_image): - p, ia = eg_bn_adversarial.predictions(bn_image) - set_seeds(22) - g = eg_bn_adversarial.gradient(bn_image) - set_seeds(22) - p2, g2, ia2 = eg_bn_adversarial.predictions_and_gradient(bn_image) - assert np.all(p == p2) - assert np.all(g == g2) - assert ia == ia2 -from foolbox import zoo -import numpy as np -import foolbox -import sys -import pytest -from foolbox.zoo.model_loader import ModelLoader -from os.path import join, dirname - - -@pytest.fixture(autouse=True) -def unload_foolbox_model_module(): - # reload foolbox_model from scratch for every run - # to ensure atomic tests without side effects - module_names = ['foolbox_model', 'model'] - for module_name in module_names: - if module_name in sys.modules: - del sys.modules[module_name] - - -test_data = [ - # private repo won't work on travis - # ('https://github.com/bethgelab/AnalysisBySynthesis.git', (1, 28, 28)), - # ('https://github.com/bethgelab/convex_adversarial.git', (1, 28, 28)), - # ('https://github.com/bethgelab/mnist_challenge.git', 784) - (join('file://', dirname(__file__), 'data/model_repo'), (3, 224, 224)) -] - - -@pytest.mark.parametrize("url, dim", test_data) -def test_loading_model(url, dim): - # download model - model = zoo.get_model(url) - - # create a dummy image - x = np.zeros(dim, dtype=np.float32) - x[:] = np.random.randn(*x.shape) - - # run the model - logits = model.predictions(x) - probabilities = foolbox.utils.softmax(logits) - predicted_class = np.argmax(logits) - - # sanity check - assert predicted_class >= 0 - assert np.sum(probabilities) >= 0.9999 - - # TODO: delete fmodel - - -def test_non_default_module_throws_error(): - with pytest.raises(RuntimeError): - ModelLoader.get(key='other') -import pytest -import numpy as np - -from foolbox import models - - -def test_abstract_model(): - with pytest.raises(TypeError): - models.Model() - - -def test_abstract_differentiable_model(): - with pytest.raises(TypeError): - models.DifferentiableModel() - - -def test_base_model(): - - class TestModel(models.Model): - - def batch_predictions(self, images): - pass - - def num_classes(self): - return 0 - - model = TestModel(bounds=(0, 1), channel_axis=1) - assert model.bounds() == (0, 1) - assert model.channel_axis() == 1 - with model: - assert model.num_classes() == 0 - - -def test_differentiable_base_model(): - - class TestModel(models.DifferentiableModel): - - def batch_predictions(self, images): - pass - - def num_classes(self): - return 10 - - def predictions_and_gradient(self, image, label): - return 'predictions', 'gradient' - - def backward(self, gradient, image): - return image - - model = TestModel(bounds=(0, 1), channel_axis=1) - - image = np.ones((28, 28, 1), dtype=np.float32) - label = 2 - assert model.gradient(image, label) == 'gradient' -import pytest -import numpy as np - -from foolbox.models import CaffeModel - - -@pytest.mark.parametrize("bn_model_caffe, num_classes", - [(10, 10), (1000, 1000)], - indirect=["bn_model_caffe"]) -def test_caffe_model(bn_model_caffe, num_classes): - model = bn_model_caffe - test_images = np.random.rand(2, num_classes, 5, 5).astype(np.float32) - test_label = 7 - - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -def test_caffe_model_gradient(tmpdir): - import caffe - from caffe import layers as L - - bounds = (0, 255) - channels = num_classes = 1000 - - net_spec = caffe.NetSpec() - net_spec.data = L.Input(name="data", - shape=dict(dim=[1, num_classes, 5, 5])) - net_spec.reduce_1 = L.Reduction(net_spec.data, - reduction_param={"operation": 4, - "axis": 3}) - net_spec.output = L.Reduction(net_spec.reduce_1, - reduction_param={"operation": 4, - "axis": 2}) - net_spec.label = L.Input(name="label", - shape=dict(dim=[1])) - net_spec.loss = L.SoftmaxWithLoss(net_spec.output, net_spec.label) - wf = tmpdir.mkdir("test_models_caffe")\ - .join("test_caffe_model_gradient_proto_{}.prototxt" - .format(num_classes)) - wf.write("force_backward: true\n" + str(net_spec.to_proto())) - preprocessing = (np.arange(num_classes)[:, None, None], - np.random.uniform(size=(channels, 5, 5)) + 1) - net = caffe.Net(str(wf), caffe.TEST) - model = CaffeModel( - net, - bounds=bounds, - preprocessing=preprocessing) - - epsilon = 1e-2 - - np.random.seed(23) - test_image = np.random.rand(channels, 5, 5).astype(np.float32) - test_label = 7 - - _, g1 = model.predictions_and_gradient(test_image, test_label) - - l1 = model._loss_fn(test_image - epsilon / 2 * g1, test_label) - l2 = model._loss_fn(test_image + epsilon / 2 * g1, test_label) - assert 1e4 * (l2 - l1) > 1 - - # make sure that gradient is numerically correct - np.testing.assert_array_almost_equal( - 1e4 * (l2 - l1), - 1e4 * epsilon * np.linalg.norm(g1)**2, - decimal=1) - - -@pytest.mark.parametrize("bn_model_caffe, num_classes", - [(10, 10), (1000, 1000)], - indirect=["bn_model_caffe"]) -def test_caffe_backward(bn_model_caffe, num_classes): - model = bn_model_caffe - test_image = np.random.rand(num_classes, 5, 5).astype(np.float32) - test_grad_pre = np.random.rand(num_classes).astype(np.float32) - - test_grad = model.backward(test_grad_pre, test_image) - assert test_grad.shape == test_image.shape - - manual_grad = np.repeat(np.repeat( - (test_grad_pre / 25.).reshape((-1, 1, 1)), - 5, axis=1), 5, axis=2) - - np.testing.assert_almost_equal( - test_grad, - manual_grad) - - -def test_caffe_model_preprocessing_shape_change(tmpdir): - import caffe - from caffe import layers as L - - bounds = (0, 255) - channels = num_classes = 1000 - - net_spec = caffe.NetSpec() - net_spec.data = L.Input(name="data", - shape=dict(dim=[1, num_classes, 5, 5])) - net_spec.reduce_1 = L.Reduction(net_spec.data, - reduction_param={"operation": 4, - "axis": 3}) - net_spec.output = L.Reduction(net_spec.reduce_1, - reduction_param={"operation": 4, "axis": 2}) - net_spec.label = L.Input(name="label", shape=dict(dim=[1])) - net_spec.loss = L.SoftmaxWithLoss(net_spec.output, net_spec.label) - wf = tmpdir.mkdir("test_models_caffe")\ - .join("test_caffe_model_preprocessing_shape_change_{}.prototxt" - .format(num_classes)) - wf.write("force_backward: true\n" + str(net_spec.to_proto())) - net = caffe.Net(str(wf), caffe.TEST) - model1 = CaffeModel( - net, - bounds=bounds) - - def preprocessing2(x): - if x.ndim == 3: - x = np.transpose(x, axes=(2, 0, 1)) - elif x.ndim == 4: - x = np.transpose(x, axes=(0, 3, 1, 2)) - - def grad(dmdp): - assert dmdp.ndim == 3 - dmdx = np.transpose(dmdp, axes=(1, 2, 0)) - return dmdx - - return x, grad - - model2 = CaffeModel( - net, - bounds=bounds, - preprocessing=preprocessing2) - - np.random.seed(22) - test_images_nhwc = np.random.rand(2, 5, 5, channels).astype(np.float32) - test_images_nchw = np.transpose(test_images_nhwc, (0, 3, 1, 2)) - - p1 = model1.batch_predictions(test_images_nchw) - p2 = model2.batch_predictions(test_images_nhwc) - - assert np.all(p1 == p2) - - p1 = model1.predictions(test_images_nchw[0]) - p2 = model2.predictions(test_images_nhwc[0]) - - assert np.all(p1 == p2) - - g1 = model1.gradient(test_images_nchw[0], 3) - assert g1.ndim == 3 - g1 = np.transpose(g1, (1, 2, 0)) - g2 = model2.gradient(test_images_nhwc[0], 3) - - np.testing.assert_array_almost_equal(g1, g2) -import pytest -import warnings - -import numpy as np -from keras.layers import GlobalAveragePooling2D -from keras.layers import Activation -from keras.layers import Input -from keras.activations import softmax -from keras.models import Model -from keras.models import Sequential - -from foolbox.models import KerasModel - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_keras_model(num_classes): - - bounds = (0, 255) - channels = num_classes - - model = Sequential() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - model.add(GlobalAveragePooling2D( - data_format='channels_last', input_shape=(5, 5, channels))) - - model = KerasModel( - model, - bounds=bounds, - predicts='logits') - - test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) - test_label = 7 - - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_keras_model_probs(num_classes): - bounds = (0, 255) - channels = num_classes - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - inputs = Input(shape=(5, 5, channels)) - logits = GlobalAveragePooling2D( - data_format='channels_last')(inputs) - probs = Activation(softmax)(logits) - - model1 = KerasModel( - Model(inputs=inputs, outputs=logits), - bounds=bounds, - predicts='logits') - - model2 = KerasModel( - Model(inputs=inputs, outputs=probs), - bounds=bounds, - predicts='probabilities') - - model3 = KerasModel( - Model(inputs=inputs, outputs=probs), - bounds=bounds, - predicts='probs') - - np.random.seed(22) - test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) - - p1 = model1.batch_predictions(test_images) - p2 = model2.batch_predictions(test_images) - p3 = model3.batch_predictions(test_images) - - assert p1.shape == p2.shape == p3.shape == (2, num_classes) - - np.testing.assert_array_almost_equal( - p1 - p1.max(), - p2 - p2.max(), - decimal=1) - - np.testing.assert_array_almost_equal( - p2 - p2.max(), - p3 - p3.max(), - decimal=5) - - -def test_keras_model_preprocess(): - num_classes = 1000 - bounds = (0, 255) - channels = num_classes - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - inputs = Input(shape=(5, 5, channels)) - logits = GlobalAveragePooling2D( - data_format='channels_last')(inputs) - - preprocessing = (np.arange(num_classes)[None, None], - np.random.uniform(size=(5, 5, channels)) + 1) - - model1 = KerasModel( - Model(inputs=inputs, outputs=logits), - bounds=bounds, - predicts='logits') - - model2 = KerasModel( - Model(inputs=inputs, outputs=logits), - bounds=bounds, - predicts='logits', - preprocessing=preprocessing) - - model3 = KerasModel( - Model(inputs=inputs, outputs=logits), - bounds=bounds, - predicts='logits') - - preprocessing = (0, np.random.uniform(size=(5, 5, channels)) + 1) - - model4 = KerasModel( - Model(inputs=inputs, outputs=logits), - bounds=bounds, - predicts='logits', - preprocessing=preprocessing) - - np.random.seed(22) - test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) - test_images_copy = test_images.copy() - - p1 = model1.batch_predictions(test_images) - p2 = model2.batch_predictions(test_images) - - # make sure the images have not been changed by - # the in-place preprocessing - assert np.all(test_images == test_images_copy) - - p3 = model3.batch_predictions(test_images) - - assert p1.shape == p2.shape == p3.shape == (2, num_classes) - - np.testing.assert_array_almost_equal( - p1 - p1.max(), - p3 - p3.max(), - decimal=5) - - model4.batch_predictions(test_images) - - -def test_keras_model_gradients(): - num_classes = 1000 - bounds = (0, 255) - channels = num_classes - - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - inputs = Input(shape=(5, 5, channels)) - logits = GlobalAveragePooling2D( - data_format='channels_last')(inputs) - - preprocessing = (np.arange(num_classes)[None, None], - np.random.uniform(size=(5, 5, channels)) + 1) - - model = KerasModel( - Model(inputs=inputs, outputs=logits), - bounds=bounds, - predicts='logits', - preprocessing=preprocessing) - - eps = 1e-3 - - np.random.seed(22) - test_image = np.random.rand(5, 5, channels).astype(np.float32) - test_label = 7 - - _, g1 = model.predictions_and_gradient(test_image, test_label) - - test_label_array = np.array([test_label]) - l1 = model._loss_fn([test_image[None] - eps / 2 * g1, test_label_array])[0] - l2 = model._loss_fn([test_image[None] + eps / 2 * g1, test_label_array])[0] - - assert 1e5 * (l2 - l1) > 1 - - # make sure that gradient is numerically correct - np.testing.assert_array_almost_equal( - 1e5 * (l2 - l1), - 1e5 * eps * np.linalg.norm(g1)**2, - decimal=1) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_keras_backward(num_classes): - - bounds = (0, 255) - channels = num_classes - - model = Sequential() - with warnings.catch_warnings(): - warnings.filterwarnings("ignore", category=DeprecationWarning) - model.add(GlobalAveragePooling2D( - data_format='channels_last', input_shape=(5, 5, channels))) - - model = KerasModel( - model, - bounds=bounds, - predicts='logits') - - test_image = np.random.rand(5, 5, channels).astype(np.float32) - test_grad_pre = np.random.rand(num_classes).astype(np.float32) - - test_grad = model.backward(test_grad_pre, test_image) - assert test_grad.shape == test_image.shape - - manual_grad = np.repeat(np.repeat( - (test_grad_pre / 25.).reshape((1, 1, -1)), - 5, axis=0), 5, axis=1) - - np.testing.assert_almost_equal( - test_grad, - manual_grad) -import pytest -import numpy as np -from lasagne.layers import GlobalPoolLayer -from lasagne.layers import InputLayer -import theano.tensor as T - -from foolbox.models import LasagneModel - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_lasagne_model(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = GlobalPoolLayer(images) - return logits - - images_var = T.tensor4('images', dtype='float32') - images = InputLayer((None, channels, 5, 5), images_var) - logits = mean_brightness_net(images) - - model = LasagneModel( - images, - logits, - bounds=bounds) - - test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) - test_label = 7 - - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_lasagne_gradient(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = GlobalPoolLayer(images) - return logits - - images_var = T.tensor4('images', dtype='float32') - images = InputLayer((None, channels, 5, 5), images_var) - logits = mean_brightness_net(images) - - preprocessing = (np.arange(num_classes)[:, None, None], - np.random.uniform(size=(channels, 5, 5)) + 1) - - model = LasagneModel( - images, - logits, - preprocessing=preprocessing, - bounds=bounds) - - # theano and lasagne calculate the cross-entropy from the probbilities - # rather than combining softmax and cross-entropy calculation; they - # therefore have lower numerical accuracy - epsilon = 1e-3 - - np.random.seed(23) - test_image = np.random.rand(channels, 5, 5).astype(np.float32) - test_label = 7 - - _, g1 = model.predictions_and_gradient(test_image, test_label) - - l1 = model._loss_fn(test_image[None] - epsilon / 2 * g1, [test_label])[0] - l2 = model._loss_fn(test_image[None] + epsilon / 2 * g1, [test_label])[0] - - assert 1e5 * (l2 - l1) > 1 - - # make sure that gradient is numerically correct - np.testing.assert_array_almost_equal( - 1e5 * (l2 - l1), - 1e5 * epsilon * np.linalg.norm(g1)**2, - decimal=1) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_lasagne_backward(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = GlobalPoolLayer(images) - return logits - - images_var = T.tensor4('images', dtype='float32') - images = InputLayer((None, channels, 5, 5), images_var) - logits = mean_brightness_net(images) - - model = LasagneModel( - images, - logits, - bounds=bounds) - - test_image = np.random.rand(channels, 5, 5).astype(np.float32) - test_grad_pre = np.random.rand(num_classes).astype(np.float32) - - test_grad = model.backward(test_grad_pre, test_image) - assert test_grad.shape == test_image.shape - - manual_grad = np.repeat(np.repeat( - (test_grad_pre / 25.).reshape((-1, 1, 1)), - 5, axis=1), 5, axis=2) - - np.testing.assert_almost_equal( - test_grad, - manual_grad) -import pytest -import mxnet as mx -import numpy as np - -from foolbox.models import MXNetModel - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_model(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = mx.symbol.mean(images, axis=(2, 3)) - return logits - - images = mx.symbol.Variable('images') - logits = mean_brightness_net(images) - - model = MXNetModel( - images, - logits, - {}, - ctx=mx.cpu(), - num_classes=num_classes, - bounds=bounds, - channel_axis=1) - - test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) - test_label = 7 - - # Tests - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_model_gradient(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = mx.symbol.mean(images, axis=(2, 3)) - return logits - - images = mx.symbol.Variable('images') - logits = mean_brightness_net(images) - - preprocessing = (np.arange(num_classes)[:, None, None], - np.random.uniform(size=(channels, 5, 5)) + 1) - - model = MXNetModel( - images, - logits, - {}, - ctx=mx.cpu(), - num_classes=num_classes, - bounds=bounds, - preprocessing=preprocessing, - channel_axis=1) - - test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) - test_image = test_images[0] - test_label = 7 - - epsilon = 1e-2 - _, g1 = model.predictions_and_gradient(test_image, test_label) - l1 = model._loss_fn(test_image - epsilon / 2 * g1, test_label) - l2 = model._loss_fn(test_image + epsilon / 2 * g1, test_label) - - assert 1e4 * (l2 - l1) > 1 - - # make sure that gradient is numerically correct - np.testing.assert_array_almost_equal( - 1e4 * (l2 - l1), - 1e4 * epsilon * np.linalg.norm(g1)**2, - decimal=1) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_model_backward(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = mx.symbol.mean(images, axis=(2, 3)) - return logits - - images = mx.symbol.Variable('images') - logits = mean_brightness_net(images) - - model = MXNetModel( - images, - logits, - {}, - ctx=mx.cpu(), - num_classes=num_classes, - bounds=bounds, - channel_axis=1) - - test_image = np.random.rand(channels, 5, 5).astype(np.float32) - test_grad_pre = np.random.rand(num_classes).astype(np.float32) - - test_grad = model.backward(test_grad_pre, test_image) - assert test_grad.shape == test_image.shape - - manual_grad = np.repeat(np.repeat( - (test_grad_pre / 25.).reshape((-1, 1, 1)), - 5, axis=1), 5, axis=2) - - np.testing.assert_almost_equal( - test_grad, - manual_grad) -import pytest -import mxnet as mx -import numpy as np - -from foolbox.models import MXNetGluonModel -from mxnet.gluon import HybridBlock - - -class MeanBrightnessNet(HybridBlock): - def hybrid_forward(self, F, x, *args, **kwargs): - return mx.nd.mean(x, axis=(2, 3)) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_model(num_classes): - bounds = (0, 255) - channels = num_classes - - block = MeanBrightnessNet() - - model = MXNetGluonModel( - block, - num_classes=num_classes, - bounds=bounds, - channel_axis=1) - - test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) - test_label = 7 - - # Tests - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_model_gradient(num_classes): - bounds = (0, 255) - channels = num_classes - - block = MeanBrightnessNet() - - model = MXNetGluonModel( - block, - ctx=mx.cpu(), - num_classes=num_classes, - bounds=bounds, - channel_axis=1) - - test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) - test_image = test_images[0] - test_label = 7 - - epsilon = 1e-2 - _, g1 = model.predictions_and_gradient(test_image, test_label) - l1 = model._loss_fn(test_image - epsilon / 2 * g1, test_label) - l2 = model._loss_fn(test_image + epsilon / 2 * g1, test_label) - - assert 1e4 * (l2 - l1) > 1 - - # make sure that gradient is numerically correct - np.testing.assert_array_almost_equal( - 1e4 * (l2 - l1), - 1e4 * epsilon * np.linalg.norm(g1)**2, - decimal=1) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_model_backward(num_classes): - bounds = (0, 255) - channels = num_classes - - block = MeanBrightnessNet() - - model = MXNetGluonModel( - block, - ctx=mx.cpu(), - num_classes=num_classes, - bounds=bounds, - channel_axis=1) - - test_image = np.random.rand(channels, 5, 5).astype(np.float32) - test_grad_pre = np.random.rand(num_classes).astype(np.float32) - - test_grad = model.backward(test_grad_pre, test_image) - np.testing.assert_equal(test_grad.shape, test_image.shape) - - manual_grad = np.repeat(np.repeat( - (test_grad_pre / 25.).reshape((-1, 1, 1)), - 5, axis=1), 5, axis=2) - - np.testing.assert_almost_equal( - test_grad, - manual_grad) -import pytest -import numpy as np -import torch - -from foolbox.models import PyTorchModel - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_pytorch_model(num_classes): - import torch - import torch.nn as nn - - bounds = (0, 255) - channels = num_classes - - class Net(nn.Module): - - def __init__(self): - super(Net, self).__init__() - - def forward(self, x): - x = torch.mean(x, 3) - x = torch.mean(x, 2) - logits = x - return logits - - model = Net() - model = PyTorchModel( - model, - bounds=bounds, - num_classes=num_classes) - - test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) - test_label = 7 - - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -def test_pytorch_model_preprocessing(): - import torch - import torch.nn as nn - - num_classes = 1000 - bounds = (0, 255) - channels = num_classes - - class Net(nn.Module): - - def __init__(self): - super(Net, self).__init__() - - def forward(self, x): - x = torch.mean(x, 3) - x = torch.mean(x, 2) - logits = x - return logits - - model = Net() - preprocessing = (np.arange(num_classes)[:, None, None], - np.random.uniform(size=(channels, 5, 5)) + 1) - - model1 = PyTorchModel( - model, - bounds=bounds, - num_classes=num_classes) - - model2 = PyTorchModel( - model, - bounds=bounds, - num_classes=num_classes, - preprocessing=preprocessing) - - model3 = PyTorchModel( - model, - bounds=bounds, - num_classes=num_classes) - - np.random.seed(22) - test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) - test_images_copy = test_images.copy() - - p1 = model1.batch_predictions(test_images) - p2 = model2.batch_predictions(test_images) - - # make sure the images have not been changed by - # the in-place preprocessing - assert np.all(test_images == test_images_copy) - - p3 = model3.batch_predictions(test_images) - - assert p1.shape == p2.shape == p3.shape == (2, num_classes) - - np.testing.assert_array_almost_equal( - p1 - p1.max(), - p3 - p3.max(), - decimal=5) - - -def test_pytorch_model_gradient(): - import torch - import torch.nn as nn - - num_classes = 1000 - bounds = (0, 255) - channels = num_classes - - class Net(nn.Module): - - def __init__(self): - super(Net, self).__init__() - - def forward(self, x): - x = torch.mean(x, 3) - x = torch.mean(x, 2) - logits = x - return logits - - model = Net() - preprocessing = (np.arange(num_classes)[:, None, None], - np.random.uniform(size=(channels, 5, 5)) + 1) - - model = PyTorchModel( - model, - bounds=bounds, - num_classes=num_classes, - preprocessing=preprocessing) - - epsilon = 1e-2 - - np.random.seed(23) - test_image = np.random.rand(channels, 5, 5).astype(np.float32) - test_label = 7 - - _, g1 = model.predictions_and_gradient(test_image, test_label) - - l1 = model._loss_fn(test_image - epsilon / 2 * g1, test_label) - l2 = model._loss_fn(test_image + epsilon / 2 * g1, test_label) - - assert 1e4 * (l2 - l1) > 1 - - # make sure that gradient is numerically correct - np.testing.assert_array_almost_equal( - 1e4 * (l2 - l1), - 1e4 * epsilon * np.linalg.norm(g1)**2, - decimal=1) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_pytorch_backward(num_classes): - import torch - import torch.nn as nn - - bounds = (0, 255) - channels = num_classes - - class Net(nn.Module): - - def __init__(self): - super(Net, self).__init__() - - def forward(self, x): - x = torch.mean(x, 3) - x = torch.mean(x, 2) - logits = x - return logits - - model = Net() - model = PyTorchModel( - model, - bounds=bounds, - num_classes=num_classes) - - test_image = np.random.rand(channels, 5, 5).astype(np.float32) - test_grad_pre = np.random.rand(num_classes).astype(np.float32) - - test_grad = model.backward(test_grad_pre, test_image) - assert test_grad.shape == test_image.shape - - manual_grad = np.repeat(np.repeat( - (test_grad_pre / 25.).reshape((-1, 1, 1)), - 5, axis=1), 5, axis=2) - - np.testing.assert_almost_equal( - test_grad, - manual_grad) - - -def test_pytorch_model_preprocessing_shape_change(): - import torch - import torch.nn as nn - - num_classes = 1000 - bounds = (0, 255) - channels = num_classes - - class Net(nn.Module): - - def __init__(self): - super(Net, self).__init__() - - def forward(self, x): - x = torch.mean(x, 3) - x = torch.mean(x, 2) - logits = x - return logits - - model = Net() - - model1 = PyTorchModel( - model, - bounds=bounds, - num_classes=num_classes) - - def preprocessing2(x): - if x.ndim == 3: - x = np.transpose(x, axes=(2, 0, 1)) - elif x.ndim == 4: - x = np.transpose(x, axes=(0, 3, 1, 2)) - - def grad(dmdp): - assert dmdp.ndim == 3 - dmdx = np.transpose(dmdp, axes=(1, 2, 0)) - return dmdx - - return x, grad - - model2 = PyTorchModel( - model, - bounds=bounds, - num_classes=num_classes, - preprocessing=preprocessing2) - - np.random.seed(22) - test_images_nhwc = np.random.rand(2, 5, 5, channels).astype(np.float32) - test_images_nchw = np.transpose(test_images_nhwc, (0, 3, 1, 2)) - - p1 = model1.batch_predictions(test_images_nchw) - p2 = model2.batch_predictions(test_images_nhwc) - - assert np.all(p1 == p2) - - p1 = model1.predictions(test_images_nchw[0]) - p2 = model2.predictions(test_images_nhwc[0]) - - assert np.all(p1 == p2) - - g1 = model1.gradient(test_images_nchw[0], 3) - assert g1.ndim == 3 - g1 = np.transpose(g1, (1, 2, 0)) - g2 = model2.gradient(test_images_nhwc[0], 3) - - np.testing.assert_array_almost_equal(g1, g2) - - -def test_pytorch_device(bn_model_pytorch): - m = bn_model_pytorch - model1 = PyTorchModel( - m._model, - bounds=m.bounds(), - num_classes=m.num_classes(), - device='cpu') - model2 = PyTorchModel( - m._model, - bounds=m.bounds(), - num_classes=m.num_classes(), - device=torch.device('cpu')) - assert model1.device == model2.device -import pytest -import tensorflow as tf -import numpy as np - -from foolbox.models import TensorFlowModel - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_tensorflow_model(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - g = tf.Graph() - with g.as_default(): - images = tf.placeholder(tf.float32, (None, 5, 5, channels)) - logits = mean_brightness_net(images) - - with tf.Session(graph=g): - model = TensorFlowModel( - images, - logits, - bounds=bounds) - - assert model.session is not None - - test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) - test_label = 7 - - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_tensorflow_model_cm(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - g = tf.Graph() - with g.as_default(): - images = tf.placeholder(tf.float32, (None, 5, 5, channels)) - logits = mean_brightness_net(images) - - with TensorFlowModel(images, logits, bounds=bounds) as model: - - test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) - test_label = 7 - - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_tensorflow_preprocessing(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - q = (np.arange(num_classes)[None, None], - np.random.uniform(size=(5, 5, channels)) + 1) - - g = tf.Graph() - with g.as_default(): - images = tf.placeholder(tf.float32, (None, 5, 5, channels)) - logits = mean_brightness_net(images) - - with TensorFlowModel(images, logits, bounds=bounds, - preprocessing=q) as model: - - test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) - test_label = 7 - - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_tensorflow_gradient(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - q = (np.arange(num_classes)[None, None], - np.random.uniform(size=(5, 5, channels)) + 1) - - g = tf.Graph() - with g.as_default(): - images = tf.placeholder(tf.float32, (None, 5, 5, channels)) - logits = mean_brightness_net(images) - - with TensorFlowModel(images, logits, bounds=bounds, - preprocessing=q) as model: - - epsilon = 1e-2 - - np.random.seed(23) - test_image = np.random.rand(5, 5, channels).astype(np.float32) - test_label = 7 - - _, g1 = model.predictions_and_gradient(test_image, test_label) - - l1 = model._loss_fn(test_image - epsilon / 2 * g1, test_label) - l2 = model._loss_fn(test_image + epsilon / 2 * g1, test_label) - - assert 1e4 * (l2 - l1) > 1 - - # make sure that gradient is numerically correct - np.testing.assert_array_almost_equal( - 1e4 * (l2 - l1), - 1e4 * epsilon * np.linalg.norm(g1)**2, - decimal=1) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_tensorflow_backward(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - g = tf.Graph() - with g.as_default(): - images = tf.placeholder(tf.float32, (None, 5, 5, channels)) - logits = mean_brightness_net(images) - - with tf.Session(graph=g): - model = TensorFlowModel( - images, - logits, - bounds=bounds) - - assert model.session is not None - - test_image = np.random.rand(5, 5, channels).astype(np.float32) - test_grad_pre = np.random.rand(num_classes).astype(np.float32) - - test_grad = model.backward(test_grad_pre, test_image) - assert test_grad.shape == test_image.shape - - manual_grad = np.repeat(np.repeat( - (test_grad_pre / 25.).reshape((1, 1, -1)), - 5, axis=0), 5, axis=1) - - np.testing.assert_almost_equal( - test_grad, - manual_grad) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_tensorflow_model_non_diff(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - g = tf.Graph() - with g.as_default(): - images = tf.placeholder(tf.float32, (None, 5, 5, channels)) - images_nd = tf.cast(images > 0, tf.float32) - logits = mean_brightness_net(images_nd) - - with tf.Session(graph=g): - model = TensorFlowModel( - images, - logits, - bounds=bounds) - - assert model.session is not None - - test_images = np.random.rand(5, 5, channels).astype(np.float32) - test_label = 7 - - test_gradient = model.gradient(test_images, test_label) - assert (test_gradient == 0).all() - - -def test_tf_keras_constructor(): - bounds = (0, 255) - - def create_model(): - data_format = 'channels_last' - input_shape = [28, 28, 1] - l = tf.keras.layers # noqa: E741 - max_pool = l.MaxPooling2D( - (2, 2), (2, 2), padding='same', data_format=data_format) - return tf.keras.Sequential( - [ - l.Conv2D( - 32, - 5, - padding='same', - data_format=data_format, - input_shape=input_shape, - activation=tf.nn.relu), - max_pool, - l.Conv2D( - 64, - 5, - padding='same', - data_format=data_format, - activation=tf.nn.relu), - max_pool, - l.Flatten(), - l.Dense(1024, activation=tf.nn.relu), - l.Dropout(0.4), - l.Dense(10) - ]) - model = create_model() - fmodel = TensorFlowModel.from_keras(model, bounds=bounds) - assert fmodel.num_classes() == 10 - - fmodel.session.run(tf.global_variables_initializer()) - - test_images = np.random.rand(2, 28, 28, 1).astype(np.float32) - assert fmodel.batch_predictions(test_images).shape == (2, 10) - - -def test_tf_keras_exception(): - bounds = (0, 255) - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - model = mean_brightness_net - with pytest.raises(ValueError): - TensorFlowModel.from_keras(model, bounds=bounds) - - TensorFlowModel.from_keras(model, bounds=bounds, input_shape=(5, 5, 3)) -from foolbox.models import TensorFlowEagerModel -import pytest -import numpy as np -import tensorflow as tf -tf.enable_eager_execution() - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_eager_model(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - model = TensorFlowEagerModel( - mean_brightness_net, - bounds=bounds, - num_classes=num_classes) - - test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) - test_label = 7 - - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -def test_eager_model_preprocessing(): - num_classes = 1000 - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - model = mean_brightness_net - - q = (np.arange(num_classes)[None, None], - np.random.uniform(size=(5, 5, channels)) + 1) - - model1 = TensorFlowEagerModel( - model, - bounds=bounds, - num_classes=num_classes) - - model2 = TensorFlowEagerModel( - model, - bounds=bounds, - num_classes=num_classes, - preprocessing=q) - - model3 = TensorFlowEagerModel( - model, - bounds=bounds, - num_classes=num_classes) - - np.random.seed(22) - test_images = np.random.rand(2, 5, 5, channels).astype(np.float32) - test_images_copy = test_images.copy() - - p1 = model1.batch_predictions(test_images) - p2 = model2.batch_predictions(test_images) - - # make sure the images have not been changed by - # the in-place preprocessing - assert np.all(test_images == test_images_copy) - - p3 = model3.batch_predictions(test_images) - - assert p1.shape == p2.shape == p3.shape == (2, num_classes) - - np.testing.assert_array_almost_equal( - p1 - p1.max(), - p3 - p3.max(), - decimal=5) - - -def test_eager_model_gradient(): - num_classes = 1000 - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - model = mean_brightness_net - - q = (np.arange(num_classes)[None, None], - np.random.uniform(size=(5, 5, channels)) + 1) - - model = TensorFlowEagerModel( - model, - bounds=bounds, - num_classes=num_classes, - preprocessing=q) - - epsilon = 1e-2 - - np.random.seed(23) - test_image = np.random.rand(5, 5, channels).astype(np.float32) - test_label = 7 - - _, g1 = model.predictions_and_gradient(test_image, test_label) - - l1 = model._loss_fn(test_image - epsilon / 2 * g1, test_label) - l2 = model._loss_fn(test_image + epsilon / 2 * g1, test_label) - - assert 1e4 * (l2 - l1) > 1 - - # make sure that gradient is numerically correct - np.testing.assert_array_almost_equal( - 1e4 * (l2 - l1), - 1e4 * epsilon * np.linalg.norm(g1)**2, - decimal=1) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_eager_backward(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - model = mean_brightness_net - model = TensorFlowEagerModel( - model, - bounds=bounds, - num_classes=num_classes) - - test_image = np.random.rand(5, 5, channels).astype(np.float32) - test_grad_pre = np.random.rand(num_classes).astype(np.float32) - - test_grad = model.backward(test_grad_pre, test_image) - assert test_grad.shape == test_image.shape - - manual_grad = np.repeat(np.repeat( - (test_grad_pre / 25.).reshape((1, 1, -1)), - 5, axis=0), 5, axis=1) - - np.testing.assert_almost_equal( - test_grad, - manual_grad) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_eager_auto_classes(num_classes): - """tests whether num_classes can be detected automatically""" - bounds = (0, 255) - - def create_model(): - data_format = 'channels_first' - input_shape = [1, 28, 28] - l = tf.keras.layers # noqa: E741 - max_pool = l.MaxPooling2D( - (2, 2), (2, 2), padding='same', data_format=data_format) - return tf.keras.Sequential( - [ - l.Reshape( - target_shape=input_shape, - input_shape=(28 * 28,)), - l.Conv2D( - 32, - 5, - padding='same', - data_format=data_format, - activation=tf.nn.relu), - max_pool, - l.Conv2D( - 64, - 5, - padding='same', - data_format=data_format, - activation=tf.nn.relu), - max_pool, - l.Flatten(), - l.Dense(1024, activation=tf.nn.relu), - l.Dropout(0.4), - l.Dense(num_classes) - ]) - model = create_model() - fmodel = TensorFlowEagerModel(model, bounds=bounds) - assert fmodel.num_classes() == num_classes - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_eager_auto_classes_fail(num_classes): - bounds = (0, 255) - - def mean_brightness_net(images): - logits = tf.reduce_mean(images, axis=(1, 2)) - return logits - - model = mean_brightness_net - with pytest.raises(ValueError): - TensorFlowEagerModel(model, bounds=bounds) -import pytest -import numpy as np -import theano.tensor as T - -from foolbox.models import TheanoModel - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_theano_model(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = T.mean(images, axis=(2, 3)) - return logits - - images = T.tensor4('images') - logits = mean_brightness_net(images) - - model = TheanoModel( - images, - logits, - num_classes=num_classes, - bounds=bounds) - - test_images = np.random.rand(2, channels, 5, 5).astype(np.float32) - test_label = 7 - - assert model.batch_predictions(test_images).shape \ - == (2, num_classes) - - test_logits = model.predictions(test_images[0]) - assert test_logits.shape == (num_classes,) - - test_gradient = model.gradient(test_images[0], test_label) - assert test_gradient.shape == test_images[0].shape - - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[0], - test_logits) - np.testing.assert_almost_equal( - model.predictions_and_gradient(test_images[0], test_label)[1], - test_gradient) - - assert model.num_classes() == num_classes - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_theano_gradient(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = T.mean(images, axis=(2, 3)) - return logits - - images = T.tensor4('images') - logits = mean_brightness_net(images) - - preprocessing = (np.arange(num_classes)[:, None, None], - np.random.uniform(size=(channels, 5, 5)) + 1) - - model = TheanoModel( - images, - logits, - num_classes=num_classes, - preprocessing=preprocessing, - bounds=bounds) - - # theano and lasagne calculate the cross-entropy from the probbilities - # rather than combining softmax and cross-entropy calculation; they - # therefore have lower numerical accuracy - epsilon = 1e-3 - - np.random.seed(23) - test_image = np.random.rand(channels, 5, 5).astype(np.float32) - test_label = 7 - - _, g1 = model.predictions_and_gradient(test_image, test_label) - - l1 = model._loss_fn(test_image[None] - epsilon / 2 * g1, [test_label])[0] - l2 = model._loss_fn(test_image[None] + epsilon / 2 * g1, [test_label])[0] - - assert 1e5 * (l2 - l1) > 1 - - # make sure that gradient is numerically correct - np.testing.assert_array_almost_equal( - 1e5 * (l2 - l1), - 1e5 * epsilon * np.linalg.norm(g1)**2, - decimal=1) - - -@pytest.mark.parametrize('num_classes', [10, 1000]) -def test_theano_backward(num_classes): - bounds = (0, 255) - channels = num_classes - - def mean_brightness_net(images): - logits = T.mean(images, axis=(2, 3)) - return logits - - images = T.tensor4('images') - logits = mean_brightness_net(images) - - model = TheanoModel( - images, - logits, - num_classes=num_classes, - bounds=bounds) - - test_image = np.random.rand(channels, 5, 5).astype(np.float32) - test_grad_pre = np.random.rand(num_classes).astype(np.float32) - - test_grad = model.backward(test_grad_pre, test_image) - assert test_grad.shape == test_image.shape - - manual_grad = np.repeat(np.repeat( - (test_grad_pre / 25.).reshape((-1, 1, 1)), - 5, axis=1), 5, axis=2) - - np.testing.assert_almost_equal( - test_grad, - manual_grad) -import pytest -import random -import numpy as np - -from foolbox import rng -from foolbox import nprng - - -@pytest.mark.parametrize('rng', [rng, nprng]) -def test_rng(rng): - random.seed(66) - np.random.seed(77) - x1 = rng.randint(0, 1000000) - random.seed(66) - np.random.seed(77) - x2 = rng.randint(0, 1000000) - assert x1 != x2 -import pytest -from pytest import approx -import numpy as np - -from foolbox.utils import softmax -from foolbox.utils import crossentropy -from foolbox.utils import imagenet_example -from foolbox.utils import binarize -from foolbox.utils import onehot_like -from foolbox.utils import samples - - -def test_softmax(): - predictions = np.array([0.1, 0.5, 0.7, 0.4]) - probabilities = softmax(predictions) - assert not np.sum(predictions) == approx(1.) - assert np.sum(probabilities) == approx(1.) - - -def test_crossentropy(): - predictions = np.array([0.1, 0.5, 0.7, 0.4]) - probabilities = softmax(predictions) - for i in range(len(predictions)): - ce = crossentropy(logits=predictions, label=i) - assert ce == approx(-np.log(probabilities[i])) - - -def test_imagenet_example(): - image, label = imagenet_example() - assert 0 <= label < 1000 - assert isinstance(label, int) - assert image.shape == (224, 224, 3) - assert image.dtype == np.float32 - - -def test_imagenet_example_channels_first(): - image, label = imagenet_example(data_format='channels_first') - image2, _ = imagenet_example(data_format='channels_last') - assert 0 <= label < 1000 - assert isinstance(label, int) - assert image.shape == (3, 224, 224) - assert image.dtype == np.float32 - - for i in range(3): - assert np.all(image[i] == image2[:, :, i]) - - -def test_samples_imagenet(): - images, labels = samples(dataset='imagenet', - batchsize=5) - assert 0 <= labels[0] < 1000 - assert images.shape[0] == 5 - assert isinstance(labels[0], np.integer) - assert images.shape == (5, 224, 224, 3) - assert images.dtype == np.float32 - - -def test_samples_imagenet_channels_first(): - images, labels = samples(dataset='imagenet', - batchsize=5, - data_format='channels_first') - assert 0 <= labels[0] < 1000 - assert images.shape[0] == 5 - assert isinstance(labels[0], np.integer) - assert images.shape == (5, 3, 224, 224) - assert images.dtype == np.float32 - - -def test_samples_mnist(): - images, labels = samples(dataset='mnist', batchsize=5) - assert 0 <= labels[0] < 10 - assert images.shape[0] == 5 - assert isinstance(labels[0], np.integer) - assert images.shape == (5, 28, 28) - assert images.dtype == np.float32 - - -def test_samples_cifar10(): - images, labels = samples(dataset='cifar10', batchsize=5) - assert 0 <= labels[0] < 10 - assert images.shape[0] == 5 - assert isinstance(labels[0], np.integer) - assert images.shape == (5, 32, 32, 3) - assert images.dtype == np.float32 - - -def test_samples_cifar100(): - images, labels = samples(dataset='cifar100', batchsize=5) - assert 0 <= labels[0] < 100 - assert images.shape[0] == 5 - assert isinstance(labels[0], np.integer) - assert images.shape == (5, 32, 32, 3) - assert images.dtype == np.float32 - - -def test_samples_fashionMNIST(): - images, labels = samples(dataset='fashionMNIST', batchsize=5) - assert 0 <= labels[0] < 10 - assert images.shape[0] == 5 - assert isinstance(labels[0], np.integer) - assert images.shape == (5, 28, 28) - assert images.dtype == np.float32 - - -def test_binarize(): - x = np.array([0.1, 0.5, 0.7, 0.4]) - x1 = binarize(x, (-2, 2), 0.5) - assert np.all(abs(x1) == 2) - with pytest.raises(ValueError): - binarize(x, (-2, 2), 0.5, included_in='blabla') - - -def test_onehot_like(): - a = np.array([0.1, 0.5, 0.7, 0.4]) - o = onehot_like(a, 2) - assert o.shape == a.shape - assert o.dtype == a.dtype - assert np.all(o[:2] == 0) - assert o[2] == 1 - assert np.all(o[3:] == 0) - - o = onehot_like(a, 3, value=-77.5) - assert o.shape == a.shape - assert o.dtype == a.dtype - assert np.all(o[:3] == 0) - assert o[3] == -77.5 - assert np.all(o[4:] == 0) -from .zoo import get_model # noqa: F401 -from .weights_fetcher import fetch_weights # noqa: F401 -import hashlib -import os - - -def sha256_hash(git_uri): - m = hashlib.sha256() - m.update(git_uri.encode()) - return m.hexdigest() - - -def home_directory_path(folder, hash_digest): - # does this work on all operating systems? - home = os.path.expanduser('~') - return os.path.join(home, folder, hash_digest) - - -def path_exists(local_path): - return os.path.exists(local_path) -from git import Repo -import logging -from .common import sha256_hash, home_directory_path, path_exists - -FOLDER = '.foolbox_zoo' - - -class GitCloneError(RuntimeError): - pass - - -def clone(git_uri): - """ - Clone a remote git repository to a local path. - - :param git_uri: the URI to the git repository to be cloned - :return: the generated local path where the repository has been cloned to - """ - hash_digest = sha256_hash(git_uri) - local_path = home_directory_path(FOLDER, hash_digest) - exists_locally = path_exists(local_path) - - if not exists_locally: - _clone_repo(git_uri, local_path) - else: - logging.info( # pragma: no cover - "Git repository already exists locally.") # pragma: no cover - - return local_path - - -def _clone_repo(git_uri, local_path): - logging.info("Cloning repo %s to %s", git_uri, local_path) - try: - Repo.clone_from(git_uri, local_path) - except Exception as e: - logging.exception("Failed to clone repository", e) - raise GitCloneError("Failed to clone repository") - logging.info("Cloned repo successfully.") -import sys -import importlib - -import abc -abstractmethod = abc.abstractmethod -if sys.version_info >= (3, 4): - ABC = abc.ABC -else: # pragma: no cover - ABC = abc.ABCMeta('ABC', (), {}) - - -class ModelLoader(ABC): - - @abstractmethod - def load(self, path, module_name='foolbox_model', **kwargs): - """ - Load a model from a local path, to which a git repository - has been previously cloned to. - - :param path: the path to the local repository containing the code - :param module_name: the name of the module to import - :param kwargs: parameters for the to be loaded model - :return: a foolbox-wrapped model - """ - pass # pragma: no cover - - @staticmethod - def get(key=None): - if key is None: - return DefaultLoader() - else: - raise RuntimeError("No model loader for: %s".format(key)) - - @staticmethod - def _import_module(path, module_name='foolbox_model'): - sys.path.insert(0, path) - module = importlib.import_module(module_name) - print('imported module: {}'.format(module)) - return module - - -class DefaultLoader(ModelLoader): - - def load(self, path, module_name='foolbox_model', **kwargs): - module = ModelLoader._import_module(path, module_name=module_name) - model = module.create(**kwargs) - return model -import requests -import shutil -import zipfile -import tarfile -import os -import logging - -from .common import sha256_hash, home_directory_path, path_exists - -FOLDER = '.foolbox_zoo/weights' - - -def fetch_weights(weights_uri, unzip=False): - """ - - Provides utilities to download and extract packages - containing model weights when creating foolbox-zoo compatible - repositories, if the weights are not part of the repository itself. - - Examples - -------- - - Download and unzip weights: - - >>> from foolbox import zoo - >>> url = 'https://github.com/MadryLab/mnist_challenge_models/raw/master/secret.zip' # noqa F501 - >>> weights_path = zoo.fetch_weights(url, unzip=True) - - :param weights_uri: the URI to fetch the weights from - :param unzip: should be `True` if the file to be downloaded is - a zipped package - :return: local path where the weights have been downloaded - and potentially unzipped to - """ - if weights_uri is None: - logging.info("No weights to be fetched for this model.") - return - - hash_digest = sha256_hash(weights_uri) - local_path = home_directory_path(FOLDER, hash_digest) - exists_locally = path_exists(local_path) - - filename = _filename_from_uri(weights_uri) - file_path = os.path.join(local_path, filename) - - if exists_locally: - logging.info("Weights already stored locally.") # pragma: no cover - else: - _download(file_path, weights_uri, local_path) - - if unzip: - file_path = _extract(local_path, filename) - - return file_path - - -def _filename_from_uri(url): - # get last part of the URI, i.e. file-name - filename = url.split('/')[-1] - # remove query params if exist - filename = filename.split('?')[0] - return filename - - -def _download(file_path, url, directory): - logging.info("Downloading weights: %s to %s", url, file_path) - if not os.path.exists(directory): - os.makedirs(directory) - # first check ETag or If-Modified-Since header or similar - # to check whether updated weights are available? - r = requests.get(url, stream=True) - if r.status_code == 200: - with open(file_path, 'wb') as f: - r.raw.decode_content = True - shutil.copyfileobj(r.raw, f) - else: - raise RuntimeError("Failed to fetch weights from %s", url) - - -def _extract(directory, filename): - file_path = os.path.join(directory, filename) - extracted_folder = filename.rsplit('.', 1)[0] - extracted_folder = os.path.join(directory, extracted_folder) - - if not os.path.exists(extracted_folder): - logging.info("Extracting weights package to %s", extracted_folder) - os.makedirs(extracted_folder) - if '.zip' in file_path: - zip_ref = zipfile.ZipFile(file_path, 'r') - zip_ref.extractall(extracted_folder) - zip_ref.close() - elif '.tar.gz' in file_path: # pragma: no cover - tar_ref = tarfile.TarFile.open(file_path, 'r') - tar_ref.extractall(extracted_folder) - tar_ref.close() - else: - logging.info("Extraced folder already exists: %s", - extracted_folder) # pragma: no cover - - return extracted_folder -from .git_cloner import clone -from .model_loader import ModelLoader - - -def get_model(url, module_name='foolbox_model', **kwargs): - """ - - Provides utilities to download foolbox-compatible robust models - to easily test attacks against them by simply providing a git-URL. - - Examples - -------- - - Instantiate a model: - - >>> from foolbox import zoo - >>> url = "https://github.com/bveliqi/foolbox-zoo-dummy.git" - >>> model = zoo.get_model(url) # doctest: +SKIP - - Only works with a foolbox-zoo compatible repository. - I.e. models need to have a `foolbox_model.py` file - with a `create()`-function, which returns a foolbox-wrapped model. - - Using the kwargs parameter it is possible to input an arbitrary number - of parameters to this methods call. These parameters are forwarded to - the instantiated model. - - Example repositories: - - - https://github.com/bethgelab/AnalysisBySynthesis - - https://github.com/bethgelab/mnist_challenge - - https://github.com/bethgelab/cifar10_challenge - - https://github.com/bethgelab/convex_adversarial - - https://github.com/wielandbrendel/logit-pairing-foolbox.git - - https://github.com/bethgelab/defensive-distillation.git - - :param url: URL to the git repository - :param module_name: the name of the module to import - :param kwargs: Optional set of parameters that will be used by the - to be instantiated model. - :return: a foolbox-wrapped model instance - """ - repo_path = clone(url) - loader = ModelLoader.get() - model = loader.load(repo_path, module_name=module_name, **kwargs) - return model -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -from dl_text import dl -from dl_text.metrics import eval_metric -import wiki_utils as wk -import model_abcnn as model -import sys -sys.path.append("..\_deeplearn_utils") - - -glove_fname = 'K:/workspace/neural network/Trec_QA-master/glove.6B.50d.txt' - -################### DEFINING MODEL AND PREDICTION FILE ################### - -lrmodel = model.abcnn -model_name = lrmodel.func_name - -################### DEFINING HYPERPARAMETERS ################### - -dimx = 40 -dimy = 60 -dimft = 44 -batch_size = 70 -vocab_size = 8000 -embedding_dim = 50 -nb_filter = 120 -filter_length = (50, 4) -depth = 1 -nb_epoch = 3 -shared = 0 -opt_params = [0.001, 'adam'] - -ques, ans, label_train, train_len, test_len,\ - wordVec_model, res_fname, pred_fname, feat_train, feat_test = wk.load_wiki( - model_name, glove_fname) -data_l, data_r, embedding_matrix = dl.process_data(ques, ans, - wordVec_model, dimx=dimx, - dimy=dimy, vocab_size=vocab_size, - embedding_dim=embedding_dim) - -X_train_l, X_test_l, X_dev_l, X_train_r, X_test_r, X_dev_r = wk.prepare_train_test(data_l, data_r, - train_len, test_len) - - -if model_name == 'abcnn': - lrmodel = lrmodel(embedding_matrix, dimx=dimx, dimy=dimy, nb_filter=nb_filter, embedding_dim=embedding_dim, - filter_length=filter_length, depth=depth, shared=shared, - opt_params=opt_params) - - print '\n', model_name, 'model built \n' - lrmodel.fit([X_train_l, X_train_r], label_train, - batch_size=batch_size, nb_epoch=nb_epoch, verbose=2) - map_val, mrr_val = eval_metric( - lrmodel, X_test_l, X_test_r, res_fname, pred_fname, feat_test=feat_test) - -else: - lrmodel = lrmodel(embedding_matrix, dimx=dimx, dimy=dimy, nb_filter=nb_filter, embedding_dim=embedding_dim, - filter_length=filter_length, depth=depth, shared=shared, - opt_params=opt_params) - - print '\n', model_name, 'model built \n' - lrmodel.fit([X_train_l, X_train_r], label_train, - batch_size=batch_size, nb_epoch=nb_epoch, verbose=2) - map_val, mrr_val = eval_metric( - lrmodel, X_test_l, X_test_r, res_fname, pred_fname) - - -print 'MAP : ', map_val, ' MRR : ', mrr_val -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -import keras -from keras.models import Model -from keras import backend as K -from keras.layers.core import Dense, Reshape, Permute -from keras.layers import Input, merge, ZeroPadding2D, RepeatVector, GlobalAveragePooling2D, GlobalMaxPooling1D, GlobalAveragePooling1D, ZeroPadding1D, AveragePooling1D, GlobalMaxPooling2D, Dropout, Merge, Conv1D, Lambda, Flatten, Conv2D, MaxPooling2D, UpSampling2D, Convolution2D - -from dl_text.dl import word2vec_embedding_layer - -######################## MODEL USING BASIC CNN ######################## - - -def abcnn(embedding_matrix, attention=1, dimx=50, dimy=50, nb_filter=72, - filter_length=(50, 4), dropout=None, shared=1, embedding_dim=50, depth=1, - filter_widths=[4, 3, 2], opt_params=[0.0008, 'adam']): - - # if True: - print '\n Model Uses ABCNN architecture ......' - print 'attention : ', attention - print 'nb_filters :', nb_filter - print 'filter_size :', filter_length - print 'opt params :', opt_params - # print 'dense layer :',dense_neuron,' ',reg1 - if dropout: - print 'using dropout' - if shared: - print 'using shared params' - print '\n' - - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - - x = word2vec_embedding_layer(embedding_matrix, train=False)(inpx) - y = word2vec_embedding_layer(embedding_matrix, train=False)(inpy) - - #x = Permute((2,1))(x) - #y = Permute((2,1))(y) - - mul = MatchScore(x, y) - mulT = Permute((2, 1))(mul) - - d1 = Dense(units=embedding_dim)(mul) - d2 = Dense(units=embedding_dim)(mulT) - - x = Permute((2, 1))(x) - y = Permute((2, 1))(y) - - if attention in [1, 3]: - - x = Reshape((embedding_dim, dimx, 1))(x) - y = Reshape((embedding_dim, dimy, 1))(y) - d1 = Reshape((embedding_dim, dimx, 1))(d1) - d2 = Reshape((embedding_dim, dimy, 1))(d2) - - if attention in [1, 3]: - conv1 = merge([x, d1], mode='concat', concat_axis=1) - conv2 = merge([y, d2], mode='concat', concat_axis=1) - else: - conv1, conv2 = x, y - - channel_1, channel_2 = [], [] - - for dep in range(depth): - - filter_width = filter_widths[dep] - - if attention in [1, 3]: - conv1 = ZeroPadding2D((filter_width - 1, 0))(conv1) - conv2 = ZeroPadding2D((filter_width - 1, 0))(conv2) - - if shared: - conv = Conv2D(nb_filter=nb_filter, kernel_size=filter_length, activation='tanh', - data_format='channels_last', border_mode="valid") - ques = conv(conv1) - ans = conv(conv2) - - else: - ques = Conv2D(nb_filter=nb_filter, kernel_size=filter_length, activation='relu', - data_format='channels_last', padding='same')(conv1) - ans = Conv2D(nb_filter, kernel_size=filter_length, activation='relu', - data_format="channels_last", padding='same')(conv2) - - if attention in [3]: - ques = Reshape( - (ques._keras_shape[1], ques._keras_shape[2]*ques._keras_shape[3]))(ques) - ans = Reshape( - (ans._keras_shape[1], ans._keras_shape[2]*ans._keras_shape[3]))(ans) - - rep_vec = ques._keras_shape[2] - - # if attention in [3]: - - ans_T = Permute((2, 1))(ans) - ques_T = Permute((2, 1))(ques) - - attn2_mat = MatchScore(ques, ans) - - a1_row = Lambda(lambda a: K.sum(a, axis=1), output_shape=( - attn2_mat._keras_shape[2], 1))(attn2_mat) - a2_col = Lambda(lambda a: K.sum(a, axis=2), output_shape=( - attn2_mat._keras_shape[1], 1))(attn2_mat) - - a1_row = RepeatVector(rep_vec)(a1_row) - a2_col = RepeatVector(rep_vec)(a2_col) - - attn_pool_1 = Merge(mode='mul')([a1_row, ques_T]) - attn_pool_2 = Merge(mode='mul')([a2_col, ans_T]) - #attn_pool_2 = Permute((2,1))(attn_pool_2) - - #h1 = Lambda(lambda a: K.sum(a,axis=1))(attn_pool_1) - #h2 = Lambda(lambda a: K.sum(a,axis=2))(attn_pool_2) - - conv1 = GlobalAveragePooling1D()(attn_pool_1) - conv2 = GlobalAveragePooling1D()(attn_pool_2) - - else: - conv1 = GlobalMaxPooling2D()(ques) - conv2 = GlobalMaxPooling2D()(ans) - # conv1 = Flatten()MaxPooling2D()(ques) - # conv2 = Flatten()MaxPooling2D()(ans) - channel_1.append(conv1) - channel_2.append(conv2) - - h1 = channel_1.pop(-1) - if channel_1: - h1 = merge([h1] + channel_1, mode="concat") - - h2 = channel_2.pop(-1) - if channel_2: - h2 = merge([h2] + channel_2, mode="concat") - - h = Merge(mode="concat", name='h')([h1, h2]) - - opt = keras.optimizers.adam(lr=opt_params[0], clipnorm=1.) - - score = Dense(2, activation='softmax', name='score')(h) - model = Model([inpx, inpy], [score]) - model.compile(optimizer=opt, loss='categorical_crossentropy') - - return model - - -def bcnn(embedding_matrix, dimx=50, dimy=50, nb_filter=120, embedding_dim=50, - filter_length=(50, 4), depth=1, shared=0, - opt_params=[0.0008, 'adam']): - - # if True: - print 'Model Uses BCNN......' - - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - - x = word2vec_embedding_layer(embedding_matrix, train=False)(inpx) - y = word2vec_embedding_layer(embedding_matrix, train=False)(inpy) - - x = Permute((2, 1))(x) - y = Permute((2, 1))(y) - - conv1 = Reshape((embedding_dim, dimx, 1))(x) - conv2 = Reshape((embedding_dim, dimy, 1))(y) - - channel_1, channel_2 = [], [] - - for dep in range(depth): - - #filter_width = filter_widths[dep] - - #conv1 = ZeroPadding2D((filter_width - 1, 0))(conv1) - #conv2 = ZeroPadding2D((filter_width - 1, 0))(conv2) - - if shared: - conv = Conv2D(nb_filter=nb_filter, kernel_size=filter_length, activation='relu', - data_format='channels_last', border_mode="valid") - ques = conv(conv1) - ans = conv(conv2) - - else: - ques = Conv2D(nb_filter=nb_filter, kernel_size=filter_length, activation='relu', - data_format='channels_last', border_mode="valid")(conv1) - ans = Conv2D(nb_filter, kernel_size=filter_length, activation='relu', - data_format="channels_last", border_mode="valid")(conv2) - - ques = Dropout(0.5)(ques) - ans = Dropout(0.5)(ans) - channel_1.append(GlobalMaxPooling2D()(ques)) - channel_2.append(GlobalMaxPooling2D()(ans)) - - # channel_1.append(Reshape((ques._keras_shape[2]*ans._keras_shape[3]))(AveragePooling2D(4))(ques)) - # channel_2.appendFlatten()((AveragePooling2D(4))(ans)) - - #reg1 = reg2 = 0.00002 - - h1 = channel_1.pop(-1) - if channel_1: - h1 = merge([h1] + channel_1, mode="concat") - - h2 = channel_2.pop(-1) - if channel_2: - h2 = merge([h2] + channel_2, mode="concat") - - #h1 = Dropout(0.5)(h1) - #h2 = Dropout(0.5)(h2) - - #reg2 = 0.00005 - - h = Merge(mode="concat", name='h')([h1, h2]) - #h = Dropout(0.2)(h) - #h = Dense(50, kernel_regularizer=regularizers.l2(reg2),activation='relu')(h) - #wrap = Dropout(0.5)(h) - #wrap = Dense(64, activation='tanh')(h) - - opt = keras.optimizers.adam(lr=opt_params[0], clipnorm=1.) - - score = Dense(2, activation='softmax', name='score')(h) - model = Model([inpx, inpy], [score]) - model.compile(loss='categorical_crossentropy', optimizer=opt) - #label = to_categorical(label) - - # model.fit([data_l,data_r],label,nb_epoch=nb_epoch,batch_size=batch_size,verbose=2) - return model - - -def compute_euclidean_match_score(l_r): - l, r = l_r - denominator = 1. + K.sqrt( - -2 * K.batch_dot(l, r, axes=[2, 2]) + - K.expand_dims(K.sum(K.square(l), axis=2), 2) + - K.expand_dims(K.sum(K.square(r), axis=2), 1) - ) - denominator = K.maximum(denominator, K.epsilon()) - return 1. / denominator - - -def MatchScore(l, r, mode="euclidean"): - if mode == "euclidean": - return merge( - [l, r], - mode=compute_euclidean_match_score, - output_shape=lambda shapes: (None, shapes[0][1], shapes[1][1]) - ) -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -from dl_text import dl -from dl_text.metrics import eval_metric -import wiki_utils as wk -import model_WALSTM as model -import sys -sys.path.append("..\_deeplearn_utils") - - -glove_fname = 'K:/workspace/neural network/Trec_QA-master/glove.6B.50d.txt' - -################### DEFINING MODEL AND PREDICTION FILE ################### - -lrmodel = model.WA_LSTM -model_name = lrmodel.func_name - -################### DEFINING HYPERPARAMETERS ################### - -dimx = 50 -dimy = 50 -dimft = 44 -batch_size = 70 -vocab_size = 8000 -embedding_dim = 50 -LSTM_neurons = 64 -depth = 1 -nb_epoch = 3 -shared = 1 -opt_params = [0.001, 'adam'] - -ques, ans, label_train, train_len, test_len,\ - wordVec_model, res_fname, pred_fname, feat_train, feat_test = wk.load_wiki( - model_name, glove_fname) -data_l, data_r, embedding_matrix = dl.process_data(ques, ans, - wordVec_model, dimx=dimx, - dimy=dimy, vocab_size=vocab_size, - embedding_dim=embedding_dim) - -X_train_l, X_test_l, X_dev_l, X_train_r, X_test_r, X_dev_r = wk.prepare_train_test(data_l, data_r, - train_len, test_len) - -lrmodel = lrmodel(embedding_matrix, dimx=dimx, dimy=dimy, LSTM_neurons=LSTM_neurons, embedding_dim=embedding_dim, - depth=depth, shared=shared, opt_params=opt_params) - -print '\n', model_name, 'model built \n' -lrmodel.fit([X_train_l, X_train_r], label_train, - batch_size=batch_size, nb_epoch=nb_epoch, verbose=2) -map_val, mrr_val = eval_metric( - lrmodel, X_test_l, X_test_r, res_fname, pred_fname) - - -print 'MAP : ', map_val, ' MRR : ', mrr_val -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -import keras -from keras.models import Model -from keras import backend as K -from keras.layers.core import Dense, Reshape, Permute, Activation -from keras.layers import Input, merge, ZeroPadding2D, RepeatVector, LSTM, Bidirectional, GlobalAveragePooling2D, GlobalMaxPooling1D, GlobalAveragePooling1D, ZeroPadding1D, AveragePooling1D, GlobalMaxPooling2D, Dropout, Merge, Conv1D, Lambda, Flatten, Conv2D, MaxPooling2D, MaxPooling1D, UpSampling2D, Convolution2D, TimeDistributed - - -from dl_text.dl import word2vec_embedding_layer - - -def WA_LSTM(embedding_matrix, dimx=50, dimy=50, nb_filter=120, embedding_dim=50, - filter_length=(50, 4), depth=1, shared=0, LSTM_neurons=64, word_level=1, - opt_params=[0.0008, 'adam']): - - print 'Model Uses Attenion+LSTM......' - - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - - x = word2vec_embedding_layer(embedding_matrix, train=False)(inpx) - y = word2vec_embedding_layer(embedding_matrix, train=False)(inpy) - - #x = Permute((2,1))(x) - #y = Permute((2,1))(y) - - channel_1, channel_2 = [], [] - - for dep in range(depth): - - #filter_width = filter_widths[dep] - - #conv1 = ZeroPadding2D((filter_width - 1, 0))(conv1) - #conv2 = ZeroPadding2D((filter_width - 1, 0))(conv2) - - if shared: - shared_lstm = Bidirectional( - LSTM(LSTM_neurons, return_sequences=True), merge_mode='concat') - ques = shared_lstm(x) - ans = shared_lstm(y) - - else: - ques = Bidirectional( - LSTM(LSTM_neurons, return_sequences=True), merge_mode='concat')(x) - ans = Bidirectional( - LSTM(LSTM_neurons, return_sequences=True), merge_mode='concat')(y) - -############## word - level attention ######################### - - if word_level: - q_vec = TimeDistributed(Dense(1))(ques) - else: - q_vec = Dense(1)(ques) - q_vec = RepeatVector(dimx)(q_vec) - - a_vec = TimeDistributed(Dense(1))(ans) - m = Merge(mode='sum')([q_vec, a_vec]) - m = Activation(activation='tanh')(m) - s = TimeDistributed(Dense(1, activation='softmax'))(m) - ans_f = Merge(mode='mul')([ans, s]) - - ques = Dropout(0.5)(ques) - ans = Dropout(0.5)(ans) - channel_1.append(GlobalMaxPooling1D()(ques)) - channel_2.append(GlobalMaxPooling1D()(ans_f)) - - x = MaxPooling1D()(ques) - y = MaxPooling1D()(ans) - - #reg1 = reg2 = 0.00002 - - h1 = channel_1.pop(-1) - if channel_1: - h1 = merge([h1] + channel_1, mode="concat") - - h2 = channel_2.pop(-1) - if channel_2: - h2 = merge([h2] + channel_2, mode="concat") - - #h1 = Dropout(0.5)(h1) - #h2 = Dropout(0.5)(h2) - - #reg2 = 0.00005 - - h = Merge(mode="concat", name='h')([h1, h2]) - #h = Dropout(0.2)(h) - #h = Dense(50, kernel_regularizer=regularizers.l2(reg2),activation='relu')(h) - #wrap = Dropout(0.5)(h) - #wrap = Dense(64, activation='tanh')(h) - - opt = keras.optimizers.adam(lr=opt_params[0], clipnorm=1.) - - score = Dense(2, activation='softmax', name='score')(h) - model = Model([inpx, inpy], [score]) - model.compile(loss='categorical_crossentropy', optimizer=opt) - - return model -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -import model_abcnn as model -import wiki_utils as wk -from dl_text.metrics import eval_metric -from dl_text import dl - -glove_fname = 'D:/workspace/NLP/data/Glove/glove.6B.50d.txt' - -################### DEFINING MODEL AND PREDICTION FILE ################### - -lrmodel = model.abcnn -model_name = lrmodel.func_name - -################### DEFINING HYPERPARAMETERS ################### - -dimx = 50 -dimy = 50 -dimft = 44 -batch_size = 70 -vocab_size = 8000 -embedding_dim = 50 -nb_filter = 120 -filter_length = (50, 4) -depth = 1 -nb_epoch = 3 -shared = 1 -opt_params = [0.001, 'adam'] - -ques, ans, label_train, train_len, test_len,\ - wordVec_model, res_fname, pred_fname, feat_train, feat_test = wk.load_wiki( - model_name, glove_fname) -data_l, data_r, embedding_matrix = dl.process_data(ques, ans, - wordVec_model, dimx=dimx, - dimy=dimy, vocab_size=vocab_size, - embedding_dim=embedding_dim) - -X_train_l, X_test_l, X_dev_l, X_train_r, X_test_r, X_dev_r = wk.prepare_train_test(data_l, data_r, - train_len, test_len) - -if model_name == 'abcnn': - lrmodel = lrmodel(embedding_matrix, dimx=dimx, dimy=dimy, nb_filter=nb_filter, embedding_dim=embedding_dim, - filter_length=filter_length, depth=depth, shared=shared, - opt_params=opt_params) - - print '\n', model_name, 'model built \n' - lrmodel.fit([X_train_l, X_train_r], label_train, - batch_size=batch_size, nb_epoch=nb_epoch, verbose=2) - map_val, mrr_val = eval_metric( - lrmodel, X_test_l, X_test_r, res_fname, pred_fname) - -else: - lrmodel = lrmodel(embedding_matrix, dimx=dimx, dimy=dimy, nb_filter=nb_filter, embedding_dim=embedding_dim, - filter_length=filter_length, depth=depth, shared=shared, - opt_params=opt_params) - - print '\n', model_name, 'model built \n' - lrmodel.fit([X_train_l, X_train_r], label_train, - batch_size=batch_size, nb_epoch=nb_epoch, verbose=2) - map_val, mrr_val = eval_metric( - lrmodel, X_test_l, X_test_r, res_fname, pred_fname) - - -print 'MAP : ', map_val, ' MRR : ', mrr_val -# -*- coding: utf-8 -*- - -""" -Created on Tue Mar 07 11:48:18 2017 - -@author: Gaurav Bhatt -Email - gauravbhatt.cs.iitr@gmail.com -""" - -import sys -import math -import random -import warnings -import numpy as np -from sklearn import svm -import keras.backend as K -from keras.models import Model -from theano import tensor as T -import matplotlib.pyplot as plt -from keras.layers import Input, Merge -from keras.engine.topology import Layer -from sklearn.metrics import accuracy_score -from keras.layers.core import Activation, Dense - -warnings.simplefilter("ignore") - -hdim = 50 -h_loss = 50 -hdim_deep = 500 -hdim_deep2 = 300 -nb_epoch = 40 -batch_size = 100 -dimx = 392 -dimy = 392 -lamda = 0.02 -loss_type = 2 # 1 - l1+l2+l3-L4; 2 - l2+l3-L4; 3 - l1+l2+l3 , 4 - l2+l3 - - -def svm_classifier(train_x, train_y, valid_x, valid_y, test_x, test_y): - - clf = svm.LinearSVC() - # print train_x.shape,train_y.shape - clf.fit(train_x, train_y) - pred = clf.predict(valid_x) - va = accuracy_score(np.ravel(valid_y), np.ravel(pred)) - pred = clf.predict(test_x) - ta = accuracy_score(np.ravel(test_y), np.ravel(pred)) - return va, ta - - -def split(train_l, train_r, label, ratio): - - total = train_l.shape[0] - train_samples = int(total*(1-ratio)) - test_samples = total-train_samples - tr_l, tst_l, tr_r, tst_r, l_tr, l_tst = [], [], [], [], [], [] - dat = random.sample(range(total), train_samples) - for a in dat: - tr_l.append(train_l[a, :]) - tr_r.append(train_r[a, :]) - l_tr.append(label[a]) - - for i in range(test_samples): - if i not in dat: - tst_l.append(train_l[i, :]) - tst_r.append(train_r[i, :]) - l_tst.append(label[i]) - - tr_l = np.array(tr_l) - tr_r = np.array(tr_r) - tst_l = np.array(tst_l) - tst_r = np.array(tst_r) - l_tr = np.array(l_tr) - l_tst = np.array(l_tst) - - return tr_l, tst_l, tr_r, tst_r, l_tr, l_tst - - -class ZeroPadding(Layer): - def __init__(self, **kwargs): - super(ZeroPadding, self).__init__(**kwargs) - - def call(self, x, mask=None): - return K.zeros_like(x) - - def get_output_shape_for(self, input_shape): - return input_shape - - -class MultiplyBy2(Layer): - def __init__(self, **kwargs): - super(MultiplyBy2, self).__init__(**kwargs) - - def call(self, x, mask=None): - return 2*x - - def get_output_shape_for(self, input_shape): - return input_shape - - -class CorrnetCost(Layer): - def __init__(self, lamda, **kwargs): - super(CorrnetCost, self).__init__(**kwargs) - self.lamda = lamda - - def cor(self, y1, y2, lamda): - y1_mean = K.mean(y1, axis=0) - y1_centered = y1 - y1_mean - y2_mean = K.mean(y2, axis=0) - y2_centered = y2 - y2_mean - corr_nr = K.sum(y1_centered * y2_centered, axis=0) - corr_dr1 = K.sqrt(T.sum(y1_centered * y1_centered, axis=0) + 1e-8) - corr_dr2 = K.sqrt(T.sum(y2_centered * y2_centered, axis=0) + 1e-8) - corr_dr = corr_dr1 * corr_dr2 - corr = corr_nr / corr_dr - return K.sum(corr) * lamda - - def call(self, x, mask=None): - h1 = x[0] - h2 = x[1] - - corr = self.cor(h1, h2, self.lamda) - - # self.add_loss(corr,x) - # we output junk but be sure to use it for the loss to be added - return corr - - def get_output_shape_for(self, input_shape): - # print input_shape[0][0] - return (input_shape[0][0], input_shape[0][1]) - - -def corr_loss(y_true, y_pred): - # print y_true.type,y_pred.type - # return K.zeros_like(y_pred) - return y_pred - - -def project(model, inp): - m = model.predict([inp[0], inp[1]]) - return m[2] - - -def reconstruct_from_left(model, inp): - img_inp = inp.reshape((28, 14)) - f, axarr = plt.subplots(1, 2, sharey=False) - pred = model.predict([inp, np.zeros_like(inp)]) - img = pred[0].reshape((28, 14)) - axarr[0].imshow(img_inp) - axarr[1].imshow(img) - - -def reconstruct_from_right(model, inp): - img_inp = inp.reshape((28, 14)) - f, axarr = plt.subplots(1, 2, sharey=False) - pred = model.predict([np.zeros_like(inp), inp]) - img = pred[1].reshape((28, 14)) - axarr[1].imshow(img_inp) - axarr[0].imshow(img) - - -def sum_corr(model): - view1 = np.load("test_v1.npy") - view2 = np.load("test_v2.npy") - x = project(model, [view1, np.zeros_like(view1)]) - y = project(model, [np.zeros_like(view2), view2]) - print "test correlation" - corr = 0 - for i in range(0, len(x[0])): - x1 = x[:, i] - (np.ones(len(x))*(sum(x[:, i])/len(x))) - x2 = y[:, i] - (np.ones(len(y))*(sum(y[:, i])/len(y))) - nr = sum(x1 * x2)/(math.sqrt(sum(x1*x1))*math.sqrt(sum(x2*x2))) - corr += nr - print corr - - -def transfer(model): - view1 = np.load("test_v1.npy") - view2 = np.load("test_v2.npy") - labels = np.load("test_l.npy") - view1 = project(model, [view1, np.zeros_like(view1)]) - view2 = project(model, [np.zeros_like(view2), view2]) - - perp = len(view1)/5 - print "view1 to view2" - acc = 0 - for i in range(0, 5): - test_x = view2[i*perp:(i+1)*perp] - test_y = labels[i*perp:(i+1)*perp] - if i == 0: - train_x = view1[perp:len(view1)] - train_y = labels[perp:len(view1)] - elif i == 4: - train_x = view1[0:4*perp] - train_y = labels[0:4*perp] - else: - train_x1 = view1[0:i*perp] - train_y1 = labels[0:i*perp] - train_x2 = view1[(i+1)*perp:len(view1)] - train_y2 = labels[(i+1)*perp:len(view1)] - train_x = np.concatenate((train_x1, train_x2)) - train_y = np.concatenate((train_y1, train_y2)) - - va, ta = svm_classifier( - train_x, train_y, test_x, test_y, test_x, test_y) - acc += ta - print acc/5 - print "view2 to view1" - - acc = 0 - for i in range(0, 5): - test_x = view1[i*perp:(i+1)*perp] - test_y = labels[i*perp:(i+1)*perp] - if i == 0: - train_x = view2[perp:len(view1)] - train_y = labels[perp:len(view1)] - elif i == 4: - train_x = view2[0:4*perp] - train_y = labels[0:4*perp] - else: - train_x1 = view2[0:i*perp] - train_y1 = labels[0:i*perp] - train_x2 = view2[(i+1)*perp:len(view1)] - train_y2 = labels[(i+1)*perp:len(view1)] - train_x = np.concatenate((train_x1, train_x2)) - train_y = np.concatenate((train_y1, train_y2)) - va, ta = svm_classifier( - train_x, train_y, test_x, test_y, test_x, test_y) - acc += ta - print acc/5 - - -def prepare_data(): - data_l = np.load('data_l.npy') - data_r = np.load('data_r.npy') - label = np.load('data_label.npy') - X_train_l, X_test_l, X_train_r, X_test_r, y_train, y_test = split( - data_l, data_r, label, ratio=0.0) - return X_train_l, X_train_r - - -def buildModel(loss_type, lamda): - - inpx = Input(shape=(dimx,)) - inpy = Input(shape=(dimy,)) - - hx = Dense(hdim_deep, activation='sigmoid')(inpx) - hx = Dense(hdim_deep2, activation='sigmoid', name='hid_l1')(hx) - hx = Dense(hdim, activation='sigmoid', name='hid_l')(hx) - - hy = Dense(hdim_deep, activation='sigmoid')(inpy) - hy = Dense(hdim_deep2, activation='sigmoid', name='hid_r1')(hy) - hy = Dense(hdim, activation='sigmoid', name='hid_r')(hy) - - #h = Activation("sigmoid")( Merge(mode="sum")([hx,hy]) ) - h = Merge(mode="sum")([hx, hy]) - - #recx = Dense(hdim_deep,activation='sigmoid')(h) - recx = Dense(dimx)(h) - #recy = Dense(hdim_deep,activation='sigmoid')(h) - recy = Dense(dimy)(h) - - branchModel = Model([inpx, inpy], [recx, recy, h]) - - #inpx = Input(shape=(dimx,)) - #inpy = Input(shape=(dimy,)) - - [recx1, recy1, h1] = branchModel([inpx, ZeroPadding()(inpy)]) - [recx2, recy2, h2] = branchModel([ZeroPadding()(inpx), inpy]) - - # you may probably add a reconstruction from combined - [recx3, recy3, h] = branchModel([inpx, inpy]) - - corr = CorrnetCost(-lamda)([h1, h2]) - - if loss_type == 1: - model = Model([inpx, inpy], [recy1, recx2, - recx3, recx1, recy2, recy3, corr]) - model.compile(loss=["mse", "mse", "mse", "mse", - "mse", "mse", corr_loss], optimizer="rmsprop") - elif loss_type == 2: - model = Model([inpx, inpy], [recy1, recx2, recx1, recy2, corr]) - model.compile(loss=["mse", "mse", "mse", "mse", - corr_loss], optimizer="rmsprop") - elif loss_type == 3: - model = Model([inpx, inpy], [recy1, recx2, recx3, recx1, recy2, recy3]) - model.compile(loss=["mse", "mse", "mse", "mse", - "mse", "mse"], optimizer="rmsprop") - elif loss_type == 4: - model = Model([inpx, inpy], [recy1, recx2, recx1, recy2]) - model.compile(loss=["mse", "mse", "mse", "mse"], optimizer="rmsprop") - - return model, branchModel - - -def trainModel(model, data_left, data_right, loss_type, nb_epoch, batch_size): - - X_train_l = data_left - X_train_r = data_right - #y_train = np_utils.to_categorical(y_train, nb_classes) - #y_test = np_utils.to_categorical(y_test, nb_classes) - - data_l = np.load('data_l.npy') - data_r = np.load('data_r.npy') - label = np.load('data_label.npy') - X_train_l, X_test_l, X_train_r, X_test_r, y_train, y_test = split( - data_l, data_r, label, ratio=0.01) - - print 'data split' - if loss_type == 1: - print 'L_Type: l1+l2+l3-L4 h_dim:', hdim, ' lamda:', lamda - model.fit([X_train_l, X_train_r], [X_train_r, X_train_l, X_train_l, X_train_l, X_train_r, X_train_r, np.zeros((X_train_l.shape[0], h_loss))], - nb_epoch=nb_epoch, - batch_size=batch_size, verbose=0) - elif loss_type == 2: - print 'L_Type: l2+l3-L4 h_dim:', hdim, ' hdim_deep', hdim_deep, ' lamda:', lamda - model.fit([X_train_l, X_train_r], [X_train_r, X_train_l, X_train_l, X_train_r, np.zeros((X_train_l.shape[0], h_loss))], - nb_epoch=nb_epoch, - batch_size=batch_size, verbose=0) - elif loss_type == 3: - print 'L_Type: l1+l2+l3 h_dim:', hdim, ' lamda:', lamda - model.fit([X_train_l, X_train_r], [X_train_r, X_train_l, X_train_l, X_train_l, X_train_r, X_train_r], - nb_epoch=nb_epoch, - batch_size=batch_size, verbose=0) - elif loss_type == 4: - print 'L_Type: l2+l3 h_dim:', hdim, ' lamda:', lamda - model.fit([X_train_l, X_train_r], [X_train_r, X_train_l, X_train_l, X_train_r], - nb_epoch=nb_epoch, - batch_size=batch_size, verbose=0) - # score = m.evaluate([X_test_l,X_test_r], [X_test_l,X_test_l,X_test_r,X_test_r,np.zeros((X_test_l.shape[0],hdim))], - # batch_size=100) - # print score - - -def testModel(b_model): - transfer(b_model) - sum_corr(b_model) - - -left_view, right_view = prepare_data() -model, branchModel = buildModel(loss_type=loss_type, lamda=lamda) -trainModel(model=model, data_left=left_view, data_right=right_view, - loss_type=loss_type, nb_epoch=nb_epoch, batch_size=batch_size) -testModel(branchModel) -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -import numpy as np -import sick_utils as sick -from dl_text import dl -from dl_text.metrics import eval_sick -import model_Siam_LSTM as model -import sys -sys.path.append("..\_deeplearn_utils") - - -lrmodel = model.S_LSTM -model_name = lrmodel.func_name - -embedding_dim = 300 -LSTM_neurons = 50 -dimx = 30 -dimy = 30 -vocab_size = 8000 -batch_size = 32 -epochs = 3 - -wordVec = 'path_to_Word2Vec(300 dim)/GoogleNews-vectors-negative300.bin.gz' -wordVec = None -sent1, sent2, train_len, test_len, train_score, test_score, wordVec_model, pred_fname = sick.load_sick( - model_name, wordVec) - -data_l, data_r, embedding_matrix = dl.process_data(sent1, sent2, - wordVec_model, dimx=dimx, - dimy=dimy, vocab_size=vocab_size, - embedding_dim=embedding_dim) - -X_train_l, X_test_l, X_dev_l, X_train_r, X_test_r, X_dev_r = dl.prepare_train_test(data_l, data_r, - train_len, test_len) - -print '\n', model_name, 'model built \n' - -lrmodel = lrmodel(dimx=dimx, dimy=dimy, embedding_matrix=embedding_matrix, - LSTM_neurons=LSTM_neurons) -lrmodel.fit([X_train_l, X_train_r], - train_score, - nb_epoch=epochs, - batch_size=batch_size, verbose=1) - -print '\n evaluating performance \n' - -sp_coef, per_coef, mse = eval_sick(lrmodel, X_test_l, X_test_r, test_score) -print 'spearman coef :', sp_coef -print 'pearson coef :', per_coef -print 'mse :', mse -# -*- coding: utf-8 -*- -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -from keras import optimizers -from keras.models import Model -from keras.layers import Input, Flatten, Merge, Embedding, Multiply, Bidirectional, LSTM, Dense, RepeatVector, Dropout, TimeDistributed, Lambda - -from dl_text.dl import word2vec_embedding_layer -from dl_layers.layers import Abs, Exp - - -def S_LSTM(dimx=30, dimy=30, embedding_matrix=None, LSTM_neurons=32): - - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - x = word2vec_embedding_layer(embedding_matrix, train='False')(inpx) - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - y = word2vec_embedding_layer(embedding_matrix, train='False')(inpy) - - #hx = LSTM(LSTM_neurons)(x) - #hy = LSTM(LSTM_neurons)(y) - - shared_lstm = Bidirectional( - LSTM(LSTM_neurons, return_sequences=False), merge_mode='sum') - #shared_lstm = LSTM(LSTM_neurons,return_sequences=True) - hx = shared_lstm(x) - #hx = Dropout(0.2)(hx) - hy = shared_lstm(y) - #hy = Dropout(0.2)(hy) - - h1, h2 = hx, hy - - corr1 = Exp()([h1, h2]) - adadelta = optimizers.Adadelta() - - model = Model([inpx, inpy], corr1) - model.compile(loss='binary_crossentropy', optimizer=adadelta) - - return model -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -from dl_text import dl -from dl_text.metrics import eval_metric -import trec_utils as trec -import model_sim as model -import sys -sys.path.append("..\_deeplearn_utils") - - -glove_fname = 'K:/workspace/neural network/Trec_QA-master/glove.6B.50d.txt' - -################### DEFINING MODEL AND PREDICTION FILE ################### - -lrmodel = model.cnn_sim_ft -model_name = lrmodel.func_name - -################### DEFINING HYPERPARAMETERS ################### - -dimx = 50 -dimy = 50 -dimft = 44 -batch_size = 70 -vocab_size = 10000 -embedding_dim = 50 -nb_filter = 120 -filter_length = (50, 4) -depth = 1 -nb_epoch = 4 - -ques, ans, label_train, train_len, test_len,\ - wordVec_model, res_fname, pred_fname, feat_train, feat_test = trec.load_trec( - model_name, glove_fname) -data_l, data_r, embedding_matrix = dl.process_data(ques, ans, - wordVec_model, dimx=dimx, - dimy=dimy, vocab_size=vocab_size, - embedding_dim=embedding_dim) - -X_train_l, X_test_l, X_dev_l, X_train_r, X_test_r, X_dev_r = trec.prepare_train_test(data_l, data_r, - train_len, test_len) - -if model_name == 'cnn_sim_ft': - lrmodel = lrmodel(embedding_matrix, dimx=dimx, dimy=dimy, dimft=dimft, nb_filter=nb_filter, - embedding_dim=embedding_dim, filter_length=filter_length, vocab_size=vocab_size, - depth=depth) - - print '\n', model_name, 'model built \n' - lrmodel.fit([X_train_l, X_train_r, feat_train], label_train, - batch_size=batch_size, nb_epoch=nb_epoch, verbose=2) - map_val, mrr_val = eval_metric( - lrmodel, X_test_l, X_test_r, res_fname, pred_fname, feat_test=feat_test) - -else: - lrmodel = lrmodel(embedding_matrix, dimx=dimx, dimy=dimy, nb_filter=nb_filter, embedding_dim=embedding_dim, - filter_length=filter_length, vocab_size=vocab_size, depth=depth) - - print '\n', model_name, 'model built \n' - lrmodel.fit([X_train_l, X_train_r], label_train, - batch_size=batch_size, nb_epoch=nb_epoch, verbose=2) - map_val, mrr_val = eval_metric( - lrmodel, X_test_l, X_test_r, res_fname, pred_fname) - - -print 'MAP : ', map_val, ' MRR : ', mrr_val -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -from keras import backend as K -from keras.models import Model -from keras import regularizers -from keras.engine.topology import Layer -from keras.layers.core import Dense, Reshape, Permute -from keras.layers import Input, Embedding, GlobalAveragePooling2D, GlobalMaxPooling2D, GlobalMaxPooling1D, Bidirectional, Dense, Dropout, Merge, Multiply, Conv1D, Lambda, Flatten, LSTM, TimeDistributed, Conv2D, MaxPooling2D, UpSampling2D - -from dl_text.dl import word2vec_embedding_layer -from dl_layers.layers import Similarity - - -def cnn_sim(embedding_matrix, dimx=50, dimy=50, nb_filter=120, - embedding_dim=50, filter_length=(50, 4), vocab_size=8000, depth=1): - - print 'Model Uses CNN with Sim......' - - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - - x = word2vec_embedding_layer(embedding_matrix, train=True)(inpx) - y = word2vec_embedding_layer(embedding_matrix, train=True)(inpy) - - x = Permute((2, 1))(x) - y = Permute((2, 1))(y) - - conv1 = Reshape((embedding_dim, dimx, 1))(x) - conv2 = Reshape((embedding_dim, dimy, 1))(y) - - channel_1, channel_2 = [], [] - - for dep in range(depth): - - #conv1 = ZeroPadding2D((filter_width - 1, 0))(conv1) - #conv2 = ZeroPadding2D((filter_width - 1, 0))(conv2) - - ques = Conv2D(nb_filter=nb_filter, kernel_size=filter_length, activation='relu', - data_format='channels_last')(conv1) - ans = Conv2D(nb_filter, kernel_size=filter_length, activation='relu', - data_format="channels_last")(conv2) - - ques = Dropout(0.5)(ques) - ans = Dropout(0.5)(ans) - - ques = GlobalMaxPooling2D()(ques) - ans = GlobalMaxPooling2D()(ans) - - ques = Dropout(0.5)(ques) - ans = Dropout(0.5)(ans) - - channel_1.append(ques) - channel_2.append(ans) - - # channel_1.append(GlobalAveragePooling2D()(ques)) - # channel_2.append(GlobalAveragePooling2D()(ans)) - - h1 = channel_1.pop(-1) - if channel_1: - h1 = merge([h1] + channel_1, mode="concat") - - h2 = channel_2.pop(-1) - if channel_2: - h2 = merge([h2] + channel_2, mode="concat") - - sim = Similarity(nb_filter)([h1, h2]) - h = Merge(mode="concat", name='h')([h1, sim, h2]) - #h = Dropout(0.2)(h) - #h = Dense(50, kernel_regularizer=regularizers.l2(reg2),activation='relu')(h) - #wrap = Dropout(0.5)(h) - #wrap = Dense(64, activation='tanh')(h) - - score = Dense(2, activation='softmax', name='score')(h) - model = Model([inpx, inpy], [score]) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - return model - - -def cnn_sim_ft(embedding_matrix, dimx=50, dimy=50, dimft=44, nb_filter=120, - embedding_dim=50, filter_length=(50, 4), vocab_size=8000, depth=1): - - print 'Model Uses CNN with Sim and Features......' - - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - inpft = Input(shape=(dimft,), dtype='int32', name='inpft') - - x = word2vec_embedding_layer(embedding_matrix, train=True)(inpx) - y = word2vec_embedding_layer(embedding_matrix, train=True)(inpy) - - x = Permute((2, 1))(x) - y = Permute((2, 1))(y) - - conv1 = Reshape((embedding_dim, dimx, 1))(x) - conv2 = Reshape((embedding_dim, dimy, 1))(y) - - channel_1, channel_2 = [], [] - - for dep in range(depth): - #filter_width = filter_length[1] - #conv1 = ZeroPadding2D((filter_width - 1, 0))(conv1) - #conv2 = ZeroPadding2D((filter_width - 1, 0))(conv2) - - ques = Conv2D(nb_filter=nb_filter, kernel_size=filter_length, activation='relu', - data_format='channels_last', border_mode="valid")(conv1) - ans = Conv2D(nb_filter, kernel_size=filter_length, activation='relu', - data_format="channels_last", border_mode="valid")(conv2) - - ques = Dropout(0.5)(ques) - ans = Dropout(0.5)(ans) - - ques = GlobalMaxPooling2D()(ques) - ans = GlobalMaxPooling2D()(ans) - - ques = Dropout(0.5)(ques) - ans = Dropout(0.5)(ans) - - channel_1.append(ques) - channel_2.append(ans) - - # channel_1.append(GlobalAveragePooling2D()(ques)) - # channel_2.append(GlobalAveragePooling2D()(ans)) - - h1 = channel_1.pop(-1) - if channel_1: - h1 = merge([h1] + channel_1, mode="concat") - - h2 = channel_2.pop(-1) - if channel_2: - h2 = merge([h2] + channel_2, mode="concat") - - sim = Similarity(nb_filter)([h1, h2]) - h = Merge(mode="concat", name='h')([h1, sim, h2, inpft]) - #h = Dropout(0.2)(h) - #h = Dense(50, kernel_regularizer=regularizers.l2(reg2),activation='relu')(h) - #wrap = Dropout(0.5)(h) - #wrap = Dense(64, activation='tanh')(h) - - score = Dense(2, activation='softmax', name='score')(h) - model = Model([inpx, inpy, inpft], [score]) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - return model -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -from dl_text import dl -from dl_text.metrics import eval_metric -import wiki_utils as wk -import model -import sys -sys.path.append("..\_deeplearn_utils") - - -glove_fname = 'K:/workspace/neural network/Trec_QA-master/glove.6B.50d.txt' - -################### DEFINING MODEL ################### - -lrmodel = model.cnn -model_name = lrmodel.func_name - -################### DEFINING HYPERPARAMETERS ################### - -dimx = 60 -dimy = 60 -dimft = 44 -batch_size = 50 -vocab_size = 8000 -embedding_dim = 50 -nb_filter = 120, -filter_length = (50, 4) -depth = 1 -nb_epoch = 3 - -ques, ans, label_train, train_len, test_len, wordVec_model, res_fname, pred_fname, feat_train, feat_test = wk.load_wiki( - model_name, glove_fname) -data_l, data_r, embedding_matrix = dl.process_data(ques, ans, - wordVec_model, dimx=dimx, - dimy=dimy, vocab_size=vocab_size, - embedding_dim=embedding_dim) - -X_train_l, X_test_l, X_dev_l, X_train_r, X_test_r, X_dev_r = wk.prepare_train_test(data_l, data_r, - train_len, test_len) - -if model_name == 'cnn_ft': - lrmodel = lrmodel(embedding_matrix, dimx=dimx, dimy=dimy, dimft=dimft, nb_filter=120, - embedding_dim=50, filter_length=(50, 4), vocab_size=8000, depth=1) - - print '\n', model_name, 'model built \n' - lrmodel.fit([X_train_l, X_train_r, feat_train], label_train, - batch_size=batch_size, nb_epoch=nb_epoch, verbose=2) - map_val, mrr_val = eval_metric( - lrmodel, X_test_l, X_test_r, res_fname, pred_fname, feat_test=feat_test) - -else: - lrmodel = lrmodel(embedding_matrix, dimx=dimx, dimy=dimy, nb_filter=120, - embedding_dim=50, filter_length=(50, 4), vocab_size=8000, depth=1) - - print '\n', model_name, 'model built \n' - lrmodel.fit([X_train_l, X_train_r], label_train, - batch_size=batch_size, nb_epoch=nb_epoch, verbose=2) - map_val, mrr_val = eval_metric( - lrmodel, X_test_l, X_test_r, res_fname, pred_fname) - - -print 'MAP : ', map_val, ' MRR : ', mrr_val -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -from keras.models import Model -from keras.layers.core import Dense, Reshape, Permute -from keras.layers import Input, merge, ZeroPadding2D, GlobalAveragePooling2D, GlobalMaxPooling1D, GlobalAveragePooling1D, ZeroPadding1D, AveragePooling1D, GlobalMaxPooling2D, Dropout, Merge, Conv1D, Lambda, Flatten, Conv2D, MaxPooling2D, UpSampling2D, Convolution2D - -from dl_text.dl import word2vec_embedding_layer - -######################## MODEL USING BASIC CNN ######################## - - -def cnn(embedding_matrix, dimx=50, dimy=50, nb_filter=120, - embedding_dim=50, filter_length=(50, 4), vocab_size=8000, depth=1): - - print 'Model Uses Basic CNN......' - - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - - x = word2vec_embedding_layer(embedding_matrix, train=False)(inpx) - y = word2vec_embedding_layer(embedding_matrix, train=False)(inpy) - - x = Permute((2, 1))(x) - y = Permute((2, 1))(y) - - conv1 = Reshape((embedding_dim, dimx, 1))(x) - conv2 = Reshape((embedding_dim, dimy, 1))(y) - - channel_1, channel_2 = [], [] - - for dep in range(depth): - - #conv1 = ZeroPadding2D((filter_width - 1, 0))(conv1) - #conv2 = ZeroPadding2D((filter_width - 1, 0))(conv2) - - ques = Conv2D(nb_filter=nb_filter, kernel_size=filter_length, activation='relu', - data_format='channels_last', border_mode="valid")(conv1) - ans = Conv2D(nb_filter, kernel_size=filter_length, activation='relu', - data_format="channels_last", border_mode="valid")(conv2) - - #conv1 = GlobalMaxPooling2D()(ques) - #conv2 = GlobalMaxPooling2D()(ans) - #conv1 = MaxPooling2D()(ques) - #conv2 = MaxPooling2D()(ans) - - channel_1.append(GlobalMaxPooling2D()(ques)) - channel_2.append(GlobalMaxPooling2D()(ans)) - - # channel_1.append(GlobalAveragePooling2D()(ques)) - # channel_2.append(GlobalAveragePooling2D()(ans)) - - h1 = channel_1.pop(-1) - if channel_1: - h1 = merge([h1] + channel_1, mode="concat") - - h2 = channel_2.pop(-1) - if channel_2: - h2 = merge([h2] + channel_2, mode="concat") - - h = Merge(mode="concat", name='h')([h1, h2]) - #h = Dropout(0.2)(h) - #h = Dense(50, kernel_regularizer=regularizers.l2(reg2),activation='relu')(h) - #wrap = Dropout(0.5)(h) - #wrap = Dense(64, activation='tanh')(h) - - score = Dense(2, activation='softmax', name='score')(h) - model = Model([inpx, inpy], [score]) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - return model - - -def cnn_ft(embedding_matrix, dimx=50, dimy=50, dimft=44, nb_filter=120, - embedding_dim=50, filter_length=(50, 4), vocab_size=8000, depth=1): - - print 'Model Uses CNN with Features......' - - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - inpft = Input(shape=(dimft,), dtype='int32', name='inpft') - - x = word2vec_embedding_layer(embedding_matrix, train=False)(inpx) - y = word2vec_embedding_layer(embedding_matrix, train=False)(inpy) - - x = Permute((2, 1))(x) - y = Permute((2, 1))(y) - - conv1 = Reshape((embedding_dim, dimx, 1))(x) - conv2 = Reshape((embedding_dim, dimy, 1))(y) - - channel_1, channel_2 = [], [] - - for dep in range(depth): - filter_width = filter_length[1] - conv1 = ZeroPadding2D((filter_width - 1, 0))(conv1) - conv2 = ZeroPadding2D((filter_width - 1, 0))(conv2) - - ques = Conv2D(nb_filter=nb_filter, kernel_size=filter_length, activation='relu', - data_format='channels_last', border_mode="valid")(conv1) - ans = Conv2D(nb_filter, kernel_size=filter_length, activation='relu', - data_format="channels_last", border_mode="valid")(conv2) - - #conv1 = GlobalMaxPooling2D()(ques) - #conv2 = GlobalMaxPooling2D()(ans) - #conv1 = MaxPooling2D()(ques) - #conv2 = MaxPooling2D()(ans) - - channel_1.append(GlobalMaxPooling2D()(ques)) - channel_2.append(GlobalMaxPooling2D()(ans)) - - # channel_1.append(GlobalAveragePooling2D()(ques)) - # channel_2.append(GlobalAveragePooling2D()(ans)) - - h1 = channel_1.pop(-1) - if channel_1: - h1 = merge([h1] + channel_1, mode="concat") - - h2 = channel_2.pop(-1) - if channel_2: - h2 = merge([h2] + channel_2, mode="concat") - - h = Merge(mode="concat", name='h')([h1, h2, inpft]) - #h = Dropout(0.2)(h) - #h = Dense(50, kernel_regularizer=regularizers.l2(reg2),activation='relu')(h) - #wrap = Dropout(0.5)(h) - #wrap = Dense(64, activation='tanh')(h) - - score = Dense(2, activation='softmax', name='score')(h) - model = Model([inpx, inpy, inpft], [score]) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - return model -# -*- coding: utf-8 -*- -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" -import pandas as pd -import numpy as np -import gensim as gen -from keras.utils.np_utils import to_categorical - - -def load_sick(model_name, wordVec=None): - - pred_fname = 'pred_%s' % model_name - - data = pd.read_csv( - '../_deeplearn_utils/data/sick/train_features.csv').values - ques_sent, ans_sent, q_test, a_test = [], [], [], [] - for i in data: - ques_sent.append(i[0].split()) - ans_sent.append(i[1].split()) - - data = pd.read_csv( - '../_deeplearn_utils/data/sick/test_features.csv').values - for i in data: - q_test.append(i[0].split()) - a_test.append(i[1].split()) - # data=[],[],[] - - score = np.array(pd.read_pickle( - "../_deeplearn_utils/data/sick/train_labels.pkl")).tolist() - score = [i[0]for i in score] - - sc_test = np.array(pd.read_pickle( - "../_deeplearn_utils/data/sick/test_labels.pkl")).tolist() - sc_test = [i[0]for i in sc_test] - - train_len = len(ques_sent) - test_len = len(q_test) - ques_sent.extend(q_test) - ans_sent.extend(a_test) - ques_sent = [' '.join(i) for i in ques_sent] - ans_sent = [' '.join(i) for i in ans_sent] - - # score.extend(sc_test) - - train_score = score - test_score = sc_test - - if wordVec != None: - wordVec_model = gen.models.KeyedVectors.load_word2vec_format( - wordVec, binary=True) - return ques_sent, ans_sent, train_len, test_len, train_score, test_score, wordVec_model, pred_fname - else: - return ques_sent, ans_sent, train_len, test_len, train_score, test_score, pred_fname - -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -import numpy as np - -from dl_text import * -from sklearn.preprocessing import StandardScaler -from keras.utils.np_utils import to_categorical - -################### LOADING, CLEANING AND PROCESSING DATASET ################### - - -def load_trec(model_name, glove_fname): - - train_file = open('../_deeplearn_utils/data/trec/train-filtered.tsv') - test_file = open('../_deeplearn_utils/data/trec/test-filtered.tsv') - dev_file = open('../_deeplearn_utils/data/trec/dev-filtered.tsv') - res_fname = 'test.ref' - pred_fname = 'pred_%s' % model_name - - train_header = train_file.readline().split('\t') - test_header = test_file.readline().split('\t') - dev_header = dev_file.readline().split('\t') - - data_train = train_file.readlines() - data_test = test_file.readlines() - data_dev = dev_file.readlines() - - data_train = [line.split('\t') for line in data_train] - data_test = [line.split('\t') for line in data_test] - data_dev = [line.split('\t') for line in data_dev] - - ques_train, ans_train, label_train = [], [], [] - ques_test, ans_test, label_test = [], [], [] - ques_dev, ans_dev, label_dev = [], [], [] - - for line in data_train: - ques_train.append(dl.clean(line[1])) - ans_train.append(dl.clean(line[5])) - label_train.append(int(line[-1][0])) - - for line in data_test: - ques_test.append(dl.clean(line[1])) - ans_test.append(dl.clean(line[5])) - label_test.append(int(line[-1][0])) - - for line in data_dev: - ques_dev.append(dl.clean(line[1])) - ans_dev.append(dl.clean(line[5])) - label_dev.append(int(line[-1][0])) - - ques, ans = [], [] - - for i in [ques_train, ques_test, ques_dev]: - ques.extend(i) - - for i in [ans_train, ans_test, ans_dev]: - ans.extend(i) - - train_len = len(data_train) - test_len = len(data_test) - - wordVec_model = dl.loadGloveModel(glove_fname) - - feat_LS = np.load('../_deeplearn_utils/Extracted_Features/trec/lex.npy') - feat_read = np.load('../_deeplearn_utils/Extracted_Features/trec/read.npy') - feat_numeric = np.load( - '../_deeplearn_utils/Extracted_Features/trec/numeric.npy') - - feat = np.hstack((feat_LS, feat_read, feat_numeric)) - - feat_train = feat[:train_len] - feat_test = feat[train_len:(test_len + train_len)] - # - ss = StandardScaler() - ss.fit(feat) - feat_train = ss.transform(feat_train) - feat_test = ss.transform(feat_test) - - return ques, ans, to_categorical(label_train), train_len, test_len, wordVec_model, res_fname, pred_fname, feat_train, feat_test - - -def prepare_train_test(data_l, data_r, train_len, test_len): - - X_train_l = data_l[:train_len] - X_test_l = data_l[train_len:(test_len + train_len)] - X_dev_l = data_l[(test_len + train_len):] - - X_train_r = data_r[:train_len] - X_test_r = data_r[train_len:(test_len + train_len)] - X_dev_r = data_r[(test_len + train_len):] - - return X_train_l, X_test_l, X_dev_l, X_train_r, X_test_r, X_dev_r -import numpy as np - -from dl_text import * -from sklearn.preprocessing import StandardScaler -from keras.utils.np_utils import to_categorical - -################### LOADING, CLEANING AND PROCESSING DATASET ################### - - -def load_wiki(model_name, glove_fname): - data_train = open( - '../_deeplearn_utils/data/wiki/WikiQA-train.txt').readlines() - data_test = open( - '../_deeplearn_utils/data/wiki/WikiQA-test.txt').readlines() - data_dev = open('../_deeplearn_utils/data/wiki/WikiQA-dev.txt').readlines() - res_fname = 'new_ref' - pred_fname = 'pred_%s' % model_name - - data_train = [line.split('\t') for line in data_train] - data_test = [line.split('\t') for line in data_test] - data_dev = [line.split('\t') for line in data_dev] - - ques_train, ans_train, label_train = [], [], [] - ques_test, ans_test, label_test = [], [], [] - ques_dev, ans_dev, label_dev = [], [], [] - - for line in data_train: - ques_train.append(dl.clean(line[0])) - ans_train.append(dl.clean(line[1])) - label_train.append(int(line[2][0])) - - for line in data_test: - ques_test.append(dl.clean(line[0])) - ans_test.append(dl.clean(line[1])) - label_test.append(int(line[2][0])) - - for line in data_dev: - ques_dev.append(dl.clean(line[0])) - ans_dev.append(dl.clean(line[1])) - label_dev.append(int(line[2][0])) - - ques, ans, labels = [], [], [] - - for i in [ques_train, ques_test, ques_dev]: - ques.extend(i) - - for i in [ans_train, ans_test, ans_dev]: - ans.extend(i) - - for i in [label_train, label_test, label_dev]: - labels.extend(i) - - train_len = len(data_train) - test_len = len(data_test) - - wordVec_model = dl.loadGloveModel(glove_fname) - - feat_LS = np.load('../_deeplearn_utils/Extracted_Features/wiki/lex.npy') - feat_read = np.load('../_deeplearn_utils/Extracted_Features/wiki/read.npy') - feat_numeric = np.load( - '../_deeplearn_utils/Extracted_Features/wiki/numeric.npy') - - feat = np.hstack((feat_LS, feat_read, feat_numeric)) - - feat_train = feat[:train_len] - feat_test = feat[train_len:(test_len + train_len)] - # - ss = StandardScaler() - ss.fit(feat) - feat_train = ss.transform(feat_train) - feat_test = ss.transform(feat_test) - - return ques, ans, to_categorical(label_train), train_len, test_len, wordVec_model, res_fname, pred_fname, feat_train, feat_test - - -def prepare_train_test(data_l, data_r, train_len, test_len): - - X_train_l = data_l[:train_len] - X_test_l = data_l[train_len:(test_len + train_len)] - X_dev_l = data_l[(test_len + train_len):] - - X_train_r = data_r[:train_len] - X_test_r = data_r[train_len:(test_len + train_len)] - X_dev_r = data_r[(test_len + train_len):] - - return X_train_l, X_test_l, X_dev_l, X_train_r, X_test_r, X_dev_r -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -from dl_text import dl -from dl_text.metrics import eval_metric -import trec_utils as trec -import model_cntn as model -import sys -sys.path.append("..\_deeplearn_utils") - - -glove_fname = 'K:/workspace/neural network/Trec_QA-master/glove.6B.50d.txt' - -################### DEFINING MODEL ################### - -lrmodel = model.cntn -model_name = lrmodel.func_name - -################### DEFINING HYPERPARAMETERS ################### - -dimx = 50 -dimy = 50 -dimft = 44 -batch_size = 70 -vocab_size = 10000 -embedding_dim = 50 -nb_filter = 120 -filter_length = (50, 4) -depth = 1 -nb_epoch = 4 -num_tensor_slices = 4 - -ques, ans, label_train, train_len, test_len,\ - wordVec_model, res_fname, pred_fname, feat_train, feat_test = trec.load_trec( - model_name, glove_fname) -data_l, data_r, embedding_matrix = dl.process_data(ques, ans, - wordVec_model, dimx=dimx, - dimy=dimy, vocab_size=vocab_size, - embedding_dim=embedding_dim) - -X_train_l, X_test_l, X_dev_l, X_train_r, X_test_r, X_dev_r = trec.prepare_train_test(data_l, data_r, - train_len, test_len) - -lrmodel = lrmodel(embedding_matrix, dimx=dimx, dimy=dimy, nb_filter=nb_filter, embedding_dim=embedding_dim, - num_slices=num_tensor_slices, filter_length=filter_length, vocab_size=vocab_size, depth=depth) - -print '\n', model_name, 'model built \n' -lrmodel.fit([X_train_l, X_train_r], label_train, - batch_size=batch_size, nb_epoch=nb_epoch, verbose=2) -map_val, mrr_val = eval_metric( - lrmodel, X_test_l, X_test_r, res_fname, pred_fname) - - -print 'MAP : ', map_val, ' MRR : ', mrr_val -""" -** deeplean-ai.com ** -** dl-lab ** -created by :: GauravBh1010tt -""" - -from keras import backend as K -from keras.models import Model -from keras import regularizers -from keras.engine.topology import Layer -from keras.layers.core import Dense, Reshape, Permute -from keras.layers import Input, Embedding, GlobalAveragePooling2D, GlobalMaxPooling2D, GlobalMaxPooling1D, Bidirectional, Dense, Dropout, Merge, Multiply, Conv1D, Lambda, Flatten, LSTM, TimeDistributed, Conv2D, MaxPooling2D, UpSampling2D - -from dl_text.dl import word2vec_embedding_layer -from dl_layers.layers import Similarity, ntn - -######################## MODEL USING BASIC CNN ######################## - - -def cntn(embedding_matrix, dimx=50, dimy=50, nb_filter=120, num_slices=3, - embedding_dim=50, filter_length=(50, 4), vocab_size=8000, depth=1): - - print 'Model Uses CNTN ......' - - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - - x = word2vec_embedding_layer(embedding_matrix, train=True)(inpx) - y = word2vec_embedding_layer(embedding_matrix, train=True)(inpy) - - x = Permute((2, 1))(x) - y = Permute((2, 1))(y) - - conv1 = Reshape((embedding_dim, dimx, 1))(x) - conv2 = Reshape((embedding_dim, dimy, 1))(y) - - channel_1, channel_2 = [], [] - - for dep in range(depth): - - #conv1 = ZeroPadding2D((filter_width - 1, 0))(conv1) - #conv2 = ZeroPadding2D((filter_width - 1, 0))(conv2) - - ques = Conv2D(nb_filter=nb_filter, kernel_size=filter_length, activation='relu', - data_format='channels_last')(conv1) - ans = Conv2D(nb_filter, kernel_size=filter_length, activation='relu', - data_format="channels_last")(conv2) - - ques = Dropout(0.5)(ques) - ans = Dropout(0.5)(ans) - - ques = GlobalMaxPooling2D()(ques) - ans = GlobalMaxPooling2D()(ans) - - ques = Dropout(0.5)(ques) - ans = Dropout(0.5)(ans) - - channel_1.append(ques) - channel_2.append(ans) - - # channel_1.append(GlobalAveragePooling2D()(ques)) - # channel_2.append(GlobalAveragePooling2D()(ans)) - - h1 = channel_1.pop(-1) - if channel_1: - h1 = merge([h1] + channel_1, mode="concat") - - h2 = channel_2.pop(-1) - if channel_2: - h2 = merge([h2] + channel_2, mode="concat") - - ntn_score = ntn(h1._keras_shape[1], num_slices)([h1, h2]) - - #sim = Similarity(nb_filter)([h1,h2]) - h = Merge(mode="concat", name='h')([h1, ntn_score, h2]) - #h = Dropout(0.2)(h) - #h = Dense(50, kernel_regularizer=regularizers.l2(reg2),activation='relu')(h) - #wrap = Dropout(0.5)(h) - #wrap = Dense(64, activation='tanh')(h) - - score = Dense(2, activation='softmax', name='score')(h) - model = Model([inpx, inpy], [score]) - model.compile(loss='categorical_crossentropy', optimizer='adam') - - return model -# CorrMCNN: Gaurav Bhatt and Piyush Jha -# had to degrade numpy to 1.11.0 as 1.13.0 doesn't support float index type in arrays - -import sys -import math -import random -import warnings -import numpy as np -from sklearn import svm -import keras.backend as K -from keras.models import Model -from theano import tensor as T -import matplotlib.pyplot as plt -from keras.layers import Input, Merge -from keras.engine.topology import Layer -from sklearn.metrics import accuracy_score -from keras.layers.core import Activation, Dense, Reshape -from keras.layers import Conv2D, MaxPooling2D, UpSampling2D, Flatten - -warnings.simplefilter("ignore") - -hdim = 50 # hidden dimension -h_loss = 50 -hdim_deep = 500 -hdim_deep2 = 300 -nb_epoch = 100 # epochs -batch_size = 100 -dimx = 392 # image dimension -dimy = 392 -lamda = 0.02 -loss_type = 2 # dummy - - -def svm_classifier(train_x, train_y, valid_x, valid_y, test_x, test_y): - - clf = svm.LinearSVC() - # print train_x.shape,train_y.shape - clf.fit(train_x, train_y) - pred = clf.predict(valid_x) - va = accuracy_score(np.ravel(valid_y), np.ravel(pred)) - pred = clf.predict(test_x) - ta = accuracy_score(np.ravel(test_y), np.ravel(pred)) - return va, ta - - -def split(train_l, train_r, label, ratio): - - total = train_l.shape[0] - train_samples = int(total*(1-ratio)) - test_samples = total-train_samples - tr_l, tst_l, tr_r, tst_r, l_tr, l_tst = [], [], [], [], [], [] - dat = random.sample(range(total), train_samples) - for a in dat: - tr_l.append(train_l[a, :]) - tr_r.append(train_r[a, :]) - l_tr.append(label[a]) - - for i in range(test_samples): - if i not in dat: - tst_l.append(train_l[i, :]) - tst_r.append(train_r[i, :]) - l_tst.append(label[i]) - - tr_l = np.array(tr_l) - tr_r = np.array(tr_r) - tst_l = np.array(tst_l) - tst_r = np.array(tst_r) - l_tr = np.array(l_tr) - l_tst = np.array(l_tst) - - return tr_l, tst_l, tr_r, tst_r, l_tr, l_tst - - -class ZeroPadding(Layer): - def __init__(self, **kwargs): - super(ZeroPadding, self).__init__(**kwargs) - - def call(self, x, mask=None): - return K.zeros_like(x) - - def get_output_shape_for(self, input_shape): - return input_shape - - -class MultiplyBy2(Layer): - def __init__(self, **kwargs): - super(MultiplyBy2, self).__init__(**kwargs) - - def call(self, x, mask=None): - return 2*x - - def get_output_shape_for(self, input_shape): - return input_shape - - -class CorrnetCost(Layer): - def __init__(self, lamda, **kwargs): - super(CorrnetCost, self).__init__(**kwargs) - self.lamda = lamda - - def cor(self, y1, y2, lamda): - y1_mean = K.mean(y1, axis=0) - y1_centered = y1 - y1_mean - y2_mean = K.mean(y2, axis=0) - y2_centered = y2 - y2_mean - corr_nr = K.sum(y1_centered * y2_centered, axis=0) - corr_dr1 = K.sqrt(K.sum(y1_centered * y1_centered, axis=0) + 1e-8) - corr_dr2 = K.sqrt(K.sum(y2_centered * y2_centered, axis=0) + 1e-8) - corr_dr = corr_dr1 * corr_dr2 - corr = corr_nr / corr_dr - return K.sum(corr) * lamda - - def call(self, x, mask=None): - h1 = x[0] - h2 = x[1] - - corr = self.cor(h1, h2, self.lamda) - - # self.add_loss(corr,x) - # we output junk but be sure to use it for the loss to be added - return corr - - def get_output_shape_for(self, input_shape): - # print input_shape[0][0] - return (input_shape[0][0], input_shape[0][1]) - - -def corr_loss(y_true, y_pred): - # print y_true.type,y_pred.type - # return K.zeros_like(y_pred) - return y_pred - - -def project(model, inp): - m = model.predict([inp[0], inp[1]]) - return m[2] - - -def reconstruct_from_left(model, inp): - img_inp = inp.reshape((28, 14)) - f, axarr = plt.subplots(1, 2, sharey=False) - pred = model.predict([inp, np.zeros_like(inp)]) - img = pred[0].reshape((28, 14)) - axarr[0].imshow(img_inp) - axarr[1].imshow(img) - - -def reconstruct_from_right(model, inp): - img_inp = inp.reshape((28, 14)) - f, axarr = plt.subplots(1, 2, sharey=False) - pred = model.predict([np.zeros_like(inp), inp]) - img = pred[1].reshape((28, 14)) - axarr[1].imshow(img_inp) - axarr[0].imshow(img) - - -def sum_corr(model): - view1 = np.load("test_v1.npy") - view2 = np.load("test_v2.npy") - x = project(model, [view1, np.zeros_like(view1)]) - y = project(model, [np.zeros_like(view2), view2]) - print("test correlation") - corr = 0 - for i in range(0, len(x[0])): - x1 = x[:, i] - (np.ones(len(x))*(sum(x[:, i])/len(x))) - x2 = y[:, i] - (np.ones(len(y))*(sum(y[:, i])/len(y))) - nr = sum(x1 * x2)/(math.sqrt(sum(x1*x1))*math.sqrt(sum(x2*x2))) - corr += nr - print(corr) - - -def transfer(model): - view1 = np.load("test_v1.npy") - view2 = np.load("test_v2.npy") - labels = np.load("test_l.npy") - view1 = project(model, [view1, np.zeros_like(view1)]) - view2 = project(model, [np.zeros_like(view2), view2]) - - perp = len(view1) // 5 - print("view1 to view2") - acc = 0 - for i in range(5): - test_x = view2[int(i*perp):int((i+1)*perp)] - test_y = labels[i*perp:(i+1)*perp] - if i == 0: - train_x = view1[perp:len(view1)] - train_y = labels[perp:len(view1)] - elif i == 4: - train_x = view1[0:4*perp] - train_y = labels[0:4*perp] - else: - train_x1 = view1[0:i*perp] - train_y1 = labels[0:i*perp] - train_x2 = view1[(i+1)*perp:len(view1)] - train_y2 = labels[(i+1)*perp:len(view1)] - train_x = np.concatenate((train_x1, train_x2)) - train_y = np.concatenate((train_y1, train_y2)) - - va, ta = svm_classifier( - train_x, train_y, test_x, test_y, test_x, test_y) - acc += ta - print(acc/5) - print("view2 to view1") - - acc = 0 - for i in range(0, 5): - test_x = view1[i*perp:(i+1)*perp] - test_y = labels[i*perp:(i+1)*perp] - if i == 0: - train_x = view2[perp:len(view1)] - train_y = labels[perp:len(view1)] - elif i == 4: - train_x = view2[0:4*perp] - train_y = labels[0:4*perp] - else: - train_x1 = view2[0:i*perp] - train_y1 = labels[0:i*perp] - train_x2 = view2[(i+1)*perp:len(view1)] - train_y2 = labels[(i+1)*perp:len(view1)] - train_x = np.concatenate((train_x1, train_x2)) - train_y = np.concatenate((train_y1, train_y2)) - va, ta = svm_classifier( - train_x, train_y, test_x, test_y, test_x, test_y) - acc += ta - print(acc/5) - - -def prepare_data(): - data_l = np.load('data_l.npy') - data_r = np.load('data_r.npy') - label = np.load('data_label.npy') - X_train_l, X_test_l, X_train_r, X_test_r, y_train, y_test = split( - data_l, data_r, label, ratio=0.0) - return X_train_l, X_train_r - - -def buildModel(loss_type, lamda): - - inpx = Input(shape=(dimx,)) - inpy = Input(shape=(dimy,)) - - hx = Reshape((28, 14, 1))(inpx) - hx = Conv2D(128, (3, 3), activation='relu', padding='same')(hx) - hx = MaxPooling2D((2, 2), padding='same')(hx) - hx = Conv2D(64, (3, 3), activation='relu', padding='same')(hx) - hx = MaxPooling2D((2, 2), padding='same')(hx) - hx = Conv2D(49, (3, 3), activation='relu', padding='same')(hx) - hx = MaxPooling2D((2, 2), padding='same')(hx) - hx = Flatten()(hx) - hx1 = Dense(hdim_deep, activation='sigmoid')(hx) - hx2 = Dense(hdim_deep2, activation='sigmoid', name='hid_l1')(hx1) - hx = Dense(hdim, activation='sigmoid', name='hid_l')(hx2) - - hy = Reshape((28, 14, 1))(inpy) - hy = Conv2D(128, (3, 3), activation='relu', padding='same')(hy) - hy = MaxPooling2D((2, 2), padding='same')(hy) - hy = Conv2D(64, (3, 3), activation='relu', padding='same')(hy) - hy = MaxPooling2D((2, 2), padding='same')(hy) - hy = Conv2D(49, (3, 3), activation='relu', padding='same')(hy) - hy = MaxPooling2D((2, 2), padding='same')(hy) - hy = Flatten()(hy) - hy1 = Dense(hdim_deep, activation='sigmoid')(hy) - hy2 = Dense(hdim_deep2, activation='sigmoid', name='hid_r1')(hy1) - hy = Dense(hdim, activation='sigmoid', name='hid_r')(hy2) - - h = Merge(mode="sum")([hx, hy]) - - recx = Dense(dimx)(h) - recy = Dense(dimy)(h) - - branchModel = Model([inpx, inpy], [recx, recy, h, hx1, hy1, hx2, hy2]) - - [recx1, recy1, h1, _, _, _, _] = branchModel([inpx, ZeroPadding()(inpy)]) - [recx2, recy2, h2, _, _, _, _] = branchModel([ZeroPadding()(inpx), inpy]) - - # you may probably add a reconstruction from combined - [recx3, recy3, h3, hx_1, hy_1, hx_2, hy_2] = branchModel([inpx, inpy]) - - lamda2, lamda3 = 0.001, 0.05 - - corr1 = CorrnetCost(-lamda)([h1, h2]) - corr2 = CorrnetCost(-lamda2)([hx_1, hy_1]) - corr3 = CorrnetCost(-lamda3)([hx_2, hy_2]) - - model = Model([inpx, inpy], [recy1, recx2, - recx1, recy2, corr1, corr2, corr3]) - model.compile(loss=["mse", "mse", "mse", "mse", corr_loss, - corr_loss, corr_loss], optimizer="rmsprop") - - return model, branchModel - - -def trainModel(model, data_left, data_right, loss_type, nb_epoch, batch_size): - - X_train_l = data_left - X_train_r = data_right - - data_l = np.load('data_l.npy') - data_r = np.load('data_r.npy') - label = np.load('data_label.npy') - X_train_l, X_test_l, X_train_r, X_test_r, y_train, y_test = split( - data_l, data_r, label, ratio=0.01) - print('data split') - print('L_Type: l2+l3-L4 h_dim:', hdim, - ' hdim_deep', hdim_deep, ' lamda:', lamda) - model.fit([X_train_l, X_train_r], [X_train_r, X_train_l, X_train_l, X_train_r, - np.zeros((X_train_l.shape[0], h_loss)), - np.zeros((X_train_l.shape[0], hdim_deep)), np.zeros((X_train_l.shape[0], hdim_deep2))], - nb_epoch=nb_epoch, - batch_size=batch_size, verbose=1) - - -def testModel(b_model): - transfer(b_model) - sum_corr(b_model) - - -left_view, right_view = prepare_data() -model, branchModel = buildModel(loss_type=loss_type, lamda=lamda) -trainModel(model=model, data_left=left_view, data_right=right_view, - loss_type=loss_type, nb_epoch=nb_epoch, batch_size=batch_size) -testModel(branchModel) -# Note - -# CNN used -# XRMB Dataset - -# had to degrade numpy to 1.11.0 as 1.13.0 doesn't support float index type in arrays -# myarray = np.fromfile('BinaryData.dat',dtype=float) - -import sys -import math -import random -import warnings -import numpy as np -from sklearn import svm -import keras.backend as K -from keras.models import Model -#from theano import tensor as T -import matplotlib.pyplot as plt -from keras.layers import Input, Merge -from keras.engine.topology import Layer -from sklearn.metrics import accuracy_score -from keras.layers.core import Activation, Dense, Reshape -from keras.layers import Conv1D, MaxPooling1D, UpSampling1D, Flatten - -warnings.simplefilter("ignore") - -nb_epoch = 40 -batch_size = 100 -dimx = 273 -dimy = 112 -lamda = 0.02 -loss_type = 2 # 1 - l1+l2+l3-L4; 2 - l2+l3-L4; 3 - l1+l2+l3 , 4 - l2+l3 - - -def svm_classifier(train_x, train_y, valid_x, valid_y, test_x, test_y): - - clf = svm.LinearSVC() - # print train_x.shape,train_y.shape - clf.fit(train_x, train_y) - pred = clf.predict(valid_x) - va = accuracy_score(np.ravel(valid_y), np.ravel(pred)) - pred = clf.predict(test_x) - ta = accuracy_score(np.ravel(test_y), np.ravel(pred)) - return va, ta - - -def split(train_l, train_r, label, ratio): - - total = train_l.shape[0] - train_samples = int(total*(1-ratio)) - test_samples = total-train_samples - tr_l, tst_l, tr_r, tst_r, l_tr, l_tst = [], [], [], [], [], [] - dat = random.sample(range(total), train_samples) - for a in dat: - tr_l.append(train_l[a, :]) - tr_r.append(train_r[a, :]) - l_tr.append(label[a]) - - for i in range(test_samples): - if i not in dat: - tst_l.append(train_l[i, :]) - tst_r.append(train_r[i, :]) - l_tst.append(label[i]) - - tr_l = np.array(tr_l) - tr_r = np.array(tr_r) - tst_l = np.array(tst_l) - tst_r = np.array(tst_r) - l_tr = np.array(l_tr) - l_tst = np.array(l_tst) - - return tr_l, tst_l, tr_r, tst_r, l_tr, l_tst - - -class ZeroPadding(Layer): - def __init__(self, **kwargs): - super(ZeroPadding, self).__init__(**kwargs) - - def call(self, x, mask=None): - return K.zeros_like(x) - - def get_output_shape_for(self, input_shape): - return input_shape - - -class MultiplyBy2(Layer): - def __init__(self, **kwargs): - super(MultiplyBy2, self).__init__(**kwargs) - - def call(self, x, mask=None): - return 2*x - - def get_output_shape_for(self, input_shape): - return input_shape - - -class CorrnetCost(Layer): - def __init__(self, lamda, **kwargs): - super(CorrnetCost, self).__init__(**kwargs) - self.lamda = lamda - - def cor(self, y1, y2, lamda): - y1_mean = K.mean(y1, axis=0) - y1_centered = y1 - y1_mean - y2_mean = K.mean(y2, axis=0) - y2_centered = y2 - y2_mean - corr_nr = K.sum(y1_centered * y2_centered, axis=0) - corr_dr1 = K.sqrt(K.sum(y1_centered * y1_centered, axis=0) + 1e-8) - corr_dr2 = K.sqrt(K.sum(y2_centered * y2_centered, axis=0) + 1e-8) - corr_dr = corr_dr1 * corr_dr2 - corr = corr_nr / corr_dr - return K.sum(corr) * lamda - - def call(self, x, mask=None): - h1 = x[0] - h2 = x[1] - - corr = self.cor(h1, h2, self.lamda) - - # self.add_loss(corr,x) - # we output junk but be sure to use it for the loss to be added - return corr - - def get_output_shape_for(self, input_shape): - # print input_shape[0][0] - return (input_shape[0][0], input_shape[0][1]) - - -def corr_loss(y_true, y_pred): - # print y_true.type,y_pred.type - # return K.zeros_like(y_pred) - return y_pred - - -def project(model, inp): - #print (inp[0].shape, inp[1].shape) - m = model.predict([inp[0], inp[1]]) - return m[2] - - -def reconstruct_from_left(model, inp): - img_inp = inp.reshape((28, 14)) - f, axarr = plt.subplots(1, 2, sharey=False) - pred = model.predict([inp, np.zeros_like(inp)]) - img = pred[0].reshape((28, 14)) - axarr[0].imshow(img_inp) - axarr[1].imshow(img) - - -def reconstruct_from_right(model, inp): - img_inp = inp.reshape((28, 14)) - f, axarr = plt.subplots(1, 2, sharey=False) - pred = model.predict([np.zeros_like(inp), inp]) - img = pred[1].reshape((28, 14)) - axarr[1].imshow(img_inp) - axarr[0].imshow(img) - - -def sum_corr(model): - view1 = np.load("MFCC_Test.npy") - view2 = np.load("XRMB_Test.npy") - x = project(model, [view1, np.zeros_like(view2)]) - y = project(model, [np.zeros_like(view1), view2]) - print("test correlation") - corr = 0 - for i in range(0, len(x[0])): - x1 = x[:, i] - (np.ones(len(x))*(sum(x[:, i])/len(x))) - x2 = y[:, i] - (np.ones(len(y))*(sum(y[:, i])/len(y))) - nr = sum(x1 * x2)/(math.sqrt(sum(x1*x1))*math.sqrt(sum(x2*x2))) - corr += nr - print(corr) - - -def transfer(model): - view11 = np.load("MFCC_Test.npy") - view22 = np.load("XRMB_Test.npy") - labels = np.load("Labels_Test.npy") - view1 = project(model, [view11, np.zeros_like(view22)]) - view2 = project(model, [np.zeros_like(view11), view22]) - - perp = len(view1) // 5 - print("view1 to view2") - acc = 0 - for i in range(5): - print('@ i' + str(i)) - test_x = view2[int(i*perp):int((i+1)*perp)] - test_y = labels[i*perp:(i+1)*perp] - if i == 0: - train_x = view1[perp:len(view1)] - train_y = labels[perp:len(view1)] - elif i == 4: - train_x = view1[0:4*perp] - train_y = labels[0:4*perp] - else: - train_x1 = view1[0:i*perp] - train_y1 = labels[0:i*perp] - train_x2 = view1[(i+1)*perp:len(view1)] - train_y2 = labels[(i+1)*perp:len(view1)] - train_x = np.concatenate((train_x1, train_x2)) - train_y = np.concatenate((train_y1, train_y2)) - - va, ta = svm_classifier( - train_x, train_y, test_x, test_y, test_x, test_y) - acc += ta - print(acc/5) - print("view2 to view1") - - acc = 0 - for i in range(5): - print('@ i' + str(i)) - test_x = view1[i*perp:(i+1)*perp] - test_y = labels[i*perp:(i+1)*perp] - if i == 0: - train_x = view2[perp:len(view1)] - train_y = labels[perp:len(view1)] - elif i == 4: - train_x = view2[0:4*perp] - train_y = labels[0:4*perp] - else: - train_x1 = view2[0:i*perp] - train_y1 = labels[0:i*perp] - train_x2 = view2[(i+1)*perp:len(view1)] - train_y2 = labels[(i+1)*perp:len(view1)] - train_x = np.concatenate((train_x1, train_x2)) - train_y = np.concatenate((train_y1, train_y2)) - va, ta = svm_classifier( - train_x, train_y, test_x, test_y, test_x, test_y) - acc += ta - print(acc/5) - - -def prepare_data(): - data_l = np.load('MFCC_Train.npy') - data_r = np.load('XRMB_Train.npy') - label = np.load('Labels_Train.npy') - X_train_l, X_test_l, X_train_r, X_test_r, y_train, y_test = split( - data_l, data_r, label, ratio=0.0) - return X_train_l, X_train_r - - -def buildModel(loss_type, lamda): - - inpx = Input(shape=(dimx,)) - inpy = Input(shape=(dimy,)) - - hx = Reshape((dimx, 1))(inpx) - hx = Conv1D(256, 5, activation='relu', padding='valid', strides=1)(hx) - hx = MaxPooling1D(pool_size=4, padding='valid')(hx) - hx = Conv1D(65, 4, activation='relu', padding='valid', strides=1)(hx) - hx = MaxPooling1D(pool_size=3, padding='valid')(hx) - hx = Flatten()(hx) - hx = Dense(560, activation='sigmoid')(hx) - hx = Dense(280, activation='sigmoid')(hx) - hx = Dense(112, activation='sigmoid')(hx) - hx = Dense(680, activation='sigmoid')(hx) - hx = Dense(1365, activation='sigmoid')(hx) - - hy = Reshape((dimy, 1))(inpy) - hy = Conv1D(256, 3, activation='relu', padding='valid', strides=1)(hy) - hy = MaxPooling1D(pool_size=2, padding='valid')(hy) - hy = Conv1D(50, 3, activation='relu', padding='valid', strides=1)(hy) - hy = MaxPooling1D(pool_size=2, padding='valid')(hy) - hy = Flatten()(hy) - hy = Dense(560, activation='sigmoid')(hy) - hy = Dense(280, activation='sigmoid')(hy) - hy = Dense(112, activation='sigmoid')(hy) - hy = Dense(680, activation='sigmoid')(hy) - hy = Dense(1365, activation='sigmoid')(hy) - - #h = Activation("sigmoid")( Merge(mode="sum")([hx,hy]) ) - h = Merge(mode="sum")([hx, hy]) - - #recx = Dense(hdim_deep,activation='sigmoid')(h) - recx = Dense(dimx)(h) - #recy = Dense(hdim_deep,activation='sigmoid')(h) - recy = Dense(dimy)(h) - - branchModel = Model([inpx, inpy], [recx, recy, h]) - - [recx1, recy1, h1] = branchModel([inpx, ZeroPadding()(inpy)]) - [recx2, recy2, h2] = branchModel([ZeroPadding()(inpx), inpy]) - - # you may probably add a reconstruction from combined - [recx3, recy3, h] = branchModel([inpx, inpy]) - - corr = CorrnetCost(-lamda)([h1, h2]) - model = Model([inpx, inpy], [recy1, recx2, recx1, recy2, corr]) # 2 - model.compile(loss=["mse", "mse", "mse", "mse", - corr_loss], optimizer="rmsprop") - branchModel.summary() - -# if loss_type == 1: -# model = Model( [inpx,inpy],[recy1,recx2,recx3,recx1,recy2,recy3,corr]) -# model.compile( loss=["mse","mse","mse","mse","mse","mse",corr_loss],optimizer="rmsprop") -# elif loss_type == 2: -# model = Model( [inpx,inpy],[recy1,recx2,recx1,recy2,corr]) -# model.compile( loss=["mse","mse","mse","mse",corr_loss],optimizer="rmsprop") -# elif loss_type == 3: -# model = Model( [inpx,inpy],[recy1,recx2,recx3,recx1,recy2,recy3]) -# model.compile( loss=["mse","mse","mse","mse","mse","mse"],optimizer="rmsprop") -# elif loss_type == 4: -# model = Model( [inpx,inpy],[recy1,recx2,recx1,recy2]) -# model.compile( loss=["mse","mse","mse","mse"],optimizer="rmsprop") - - return model, branchModel - - -def trainModel(model, data_left, data_right, loss_type, nb_epoch, batch_size): - - X_train_l = data_left - X_train_r = data_right - #y_train = np_utils.to_categorical(y_train, nb_classes) - #y_test = np_utils.to_categorical(y_test, nb_classes) - - data_l = np.load('MFCC_Train.npy') - data_r = np.load('XRMB_Train.npy') - label = np.load('Labels_Train.npy') - X_train_l, X_test_l, X_train_r, X_test_r, y_train, y_test = split( - data_l, data_r, label, ratio=0.01) - print('data split') - model.fit([X_train_l, X_train_r], [X_train_r, X_train_l, X_train_l, X_train_r, np.zeros((X_train_l.shape[0], 112))], - nb_epoch=nb_epoch, - batch_size=batch_size, verbose=1) -# if loss_type == 1: -# print ('L_Type: l1+l2+l3-L4 h_dim:',hdim,' lamda:',lamda) -# model.fit([X_train_l,X_train_r], [X_train_r,X_train_l,X_train_l,X_train_l,X_train_r,X_train_r,np.zeros((X_train_l.shape[0],h_loss))], -# nb_epoch=nb_epoch, -# batch_size=batch_size,verbose=1) -# elif loss_type == 2: -# print ('L_Type: l2+l3-L4 h_dim:',hdim,' hdim_deep',hdim_deep,' lamda:',lamda) -# model.fit([X_train_l,X_train_r], [X_train_r,X_train_l,X_train_l,X_train_r,np.zeros((X_train_l.shape[0],h_loss))], -# nb_epoch=nb_epoch, -# batch_size=batch_size,verbose=1) -# elif loss_type == 3: -# print ('L_Type: l1+l2+l3 h_dim:',hdim,' lamda:',lamda) -# model.fit([X_train_l,X_train_r], [X_train_r,X_train_l,X_train_l,X_train_l,X_train_r,X_train_r], -# nb_epoch=nb_epoch, -# batch_size=batch_size,verbose=1) -# elif loss_type == 4: -# print ('L_Type: l2+l3 h_dim:',hdim,' lamda:',lamda) -# model.fit([X_train_l,X_train_r], [X_train_r,X_train_l,X_train_l,X_train_r], -# nb_epoch=nb_epoch, -# batch_size=batch_size,verbose=1) - -# score = m.evaluate([X_test_l,X_test_r], [X_test_l,X_test_l,X_test_r,X_test_r,np.zeros((X_test_l.shape[0],hdim))], -# batch_size=100) -# print score - - -def testModel(b_model): - transfer(b_model) - sum_corr(b_model) - - -left_view, right_view = prepare_data() -model, branchModel = buildModel(loss_type=loss_type, lamda=lamda) -trainModel(model=model, data_left=left_view, data_right=right_view, - loss_type=loss_type, nb_epoch=nb_epoch, batch_size=batch_size) -testModel(branchModel) -# -*- coding: utf-8 -*- - -from utils.score import report_score -from sklearn.metrics import accuracy_score -import utility -import warnings -import numpy as np -import gensim as gen -from keras.preprocessing import sequence -from keras.models import Sequential, Model -from keras.layers import Dense, Dropout, Activation, Input, Merge -from keras.layers import Embedding -from keras.layers import Conv1D, GlobalMaxPooling1D -from keras.datasets import imdb -from keras.utils.np_utils import to_categorical -import pandas as pd -from keras.layers.convolutional import Convolution1D -# set parameters: -from nltk.tokenize import regexp_tokenize -warnings.simplefilter("ignore") - -embedding_dim = 300 -LSTM_neurons = 50 -dense_neuron = 16 -dimx = 100 -dimy = 200 -lamda = 0.0 -nb_filter = 100 -filter_length = 4 -vocab_size = 10000 -batch_size = 50 -epochs = 5 -ntn_out = 16 -ntn_in = nb_filter -state = False - - -def preprocess_data(head, body): - stop_words = [ - "a", "about", "above", "across", "after", "afterwards", "again", "against", "all", "almost", "alone", "along", - "already", "also", "although", "always", "am", "among", "amongst", "amoungst", "amount", "an", "and", "another", - "any", "anyhow", "anyone", "anything", "anyway", "anywhere", "are", "around", "as", "at", "back", "be", - "became", "because", "become", "becomes", "becoming", "been", "before", "beforehand", "behind", "being", - "below", "beside", "besides", "between", "beyond", "bill", "both", "bottom", "but", "by", "call", "can", "co", - "con", "could", "cry", "de", "describe", "detail", "do", "done", "down", "due", "during", "each", "eg", "eight", - "either", "eleven", "else", "elsewhere", "empty", "enough", "etc", "even", "ever", "every", "everyone", - "everything", "everywhere", "except", "few", "fifteen", "fifty", "fill", "find", "fire", "first", "five", "for", - "former", "formerly", "forty", "found", "four", "from", "front", "full", "further", "get", "give", "go", "had", - "has", "have", "he", "hence", "her", "here", "hereafter", "hereby", "herein", "hereupon", "hers", "herself", - "him", "himself", "his", "how", "however", "hundred", "i", "ie", "if", "in", "inc", "indeed", "interest", - "into", "is", "it", "its", "itself", "keep", "last", "latter", "latterly", "least", "less", "ltd", "made", - "many", "may", "me", "meanwhile", "might", "mill", "mine", "more", "moreover", "most", "mostly", "move", "much", - "must", "my", "myself", "name", "namely", "neither", "nevertheless", "next", "nine", "nobody", "now", "nowhere", - "of", "off", "often", "on", "once", "one", "only", "onto", "or", "other", "others", "otherwise", "our", "ours", - "ourselves", "out", "over", "own", "part", "per", "perhaps", "please", "put", "rather", "re", "same", "see", - "serious", "several", "she", "should", "show", "side", "since", "sincere", "six", "sixty", "so", "some", - "somehow", "someone", "something", "sometime", "sometimes", "somewhere", "still", "such", "system", "take", - "ten", "than", "that", "the", "their", "them", "themselves", "then", "thence", "there", "thereafter", "thereby", - "therefore", "therein", "thereupon", "these", "they", "thick", "thin", "third", "this", "those", "though", - "three", "through", "throughout", "thru", "thus", "to", "together", "too", "top", "toward", "towards", "twelve", - "twenty", "two", "un", "under", "until", "up", "upon", "us", "very", "via", "was", "we", "well", "were", "what", - "whatever", "when", "whence", "whenever", "where", "whereafter", "whereas", "whereby", "wherein", "whereupon", - "wherever", "whether", "which", "while", "whither", "who", "whoever", "whole", "whom", "whose", "why", "will", - "with", "within", "without", "would", "yet", "you", "your", "yours", "yourself", "yourselves" - ] - chead, cbody = [], [] - for sample in head: - sentence = ' '.join( - [word for word in sample.split() if word not in stop_words]) - chead.append(sentence) - - for sample in body: - sentence = ' '.join( - [word for word in sample.split() if word not in stop_words]) - cbody.append(sentence) - print(cbody[0]) - return chead, cbody - - -def generateMatrix(obj, sent_Q, sent_A, dimx, dimy): - START = '$_START_$' - END = '$_END_$' - unk_token = '$_UNK_$' - sent1 = [] - #sent1_Q = ques_sent - #sent1_A = ans_sent - sent1.extend(sent_Q) - # sent.extend(ques_sent) - sent1.extend(sent_A) - #sent1 = [' '.join(i) for i in sent1] - # sent.extend(ans_sent) - sentence = ["%s %s %s" % (START, x, END) for x in sent1] - tokenize_sent = [regexp_tokenize(x, - pattern='\w+|$[\d\.]+|\S+') for x in sentence] - - # for i in index_to_word1: - # index_to_word.append(i) - # for key in word_to_index1.keys(): - # word_to_index[key] = word_to_index1[key] - - for i, sent in enumerate(tokenize_sent): - tokenize_sent[i] = [ - w if w in obj.word_to_index else unk_token for w in sent] - - len_train = len(sent_Q) - text = [] - for i in tokenize_sent: - text.extend(i) - - sentences_x = [] - sentences_y = [] - - # print 'here' - - for sent in tokenize_sent[0:len_train]: - temp = [START for i in range(dimx)] - for ind, word in enumerate(sent[0:dimx]): - temp[ind] = word - sentences_x.append(temp) - - for sent in tokenize_sent[len_train:]: - temp = [START for i in range(dimy)] - for ind, word in enumerate(sent[0:dimy]): - temp[ind] = word - sentences_y.append(temp) - - X_data = [] - for i in sentences_x: - temp = [] - for j in i: - temp.append(obj.word_to_index[j]) - temp = np.array(temp).T - X_data.append(temp) - - y_data = [] - for i in sentences_y: - temp = [] - for j in i: - temp.append(obj.word_to_index[j]) - temp = np.array(temp).T - y_data.append(temp) - X_data = np.array(X_data) - y_data = np.array(y_data) - return X_data, y_data - - -def word2vec_embedding_layer(embedding_matrix): - #weights = np.load('Word2Vec_QA.syn0.npy') - layer = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[ - embedding_matrix]) - return layer - - -''' -try: - word = wordVec_model['word'] - print('using loaded model.....') -except: - wordVec_model = gen.models.KeyedVectors.load_word2vec_format("GoogleNews-vectors-negative300.bin.gz",binary=True) -#bre''' -file_head = "/fncdata/train_stances.csv" -file_body = "/fncdata/train_bodies.csv" -head = pd.read_csv(file_head) -body = pd.read_csv(file_body) -head_array = head.values -body_array = body.values -print(len(head_array)) -print(len(body_array)) -labels = head_array[:, 2] -body_id = head_array[:, 1] -dataset_headLines = head_array[:, 0] -body_ds = [] -for i in range(len(head_array)): - for j in range(len(body_array)): - if body_array[j][0] == body_id[i]: - body_ds.append(body_array[j][1]) - break - -dataset_body = np.array(body_ds) -# print(type(dataset_body)) -new_lab = [] -for i in labels: - if i == 'unrelated': - new_lab.append(3) - if i == 'agree': - new_lab.append(0) - if i == 'discuss': - new_lab.append(2) - if i == 'disagree': - new_lab.append(1) -y_train = np.array(new_lab) - -print("Refining training dataset for CNN") -train_rdh = [] -for i in dataset_headLines: - sentence = "" - for char in i: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - train_rdh.append(sentence) - -train_rdb = [] -for i in dataset_body: - sentence = "" - for char in i: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - train_rdb.append(sentence) - -print("Preprocessing train dataset") -train_rpdh, train_rpdb = preprocess_data(train_rdh, train_rdb) - - -obj = utility.sample() -train_head, train_body, embedding_matrix = obj.process_data(sent_Q=train_rdh, - sent_A=train_rdb, dimx=dimx, dimy=dimy, - wordVec_model=None) - -# def buildModel(): -inpx = Input(shape=(dimx,), dtype='int32', name='inpx') -x = Embedding(output_dim=embedding_dim, - input_dim=vocab_size, input_length=dimx)(inpx) -#x = word2vec_embedding_layer(embedding_matrix)(inpx) -inpy = Input(shape=(dimy,), dtype='int32', name='inpy') -y = Embedding(output_dim=embedding_dim, - input_dim=vocab_size, input_length=dimy)(inpy) -#y = word2vec_embedding_layer(embedding_matrix)(inpy) -ques = Convolution1D(nb_filter=nb_filter, filter_length=filter_length, - border_mode='valid', activation='relu', - subsample_length=1)(x) - -ans = Convolution1D(nb_filter=nb_filter, filter_length=filter_length, - border_mode='valid', activation='relu', - subsample_length=1)(y) - -#hx = Lambda(max_1d, output_shape=(nb_filter,))(ques) -#hy = Lambda(max_1d, output_shape=(nb_filter,))(ans) -hx = GlobalMaxPooling1D()(ques) -hy = GlobalMaxPooling1D()(ans) - -#wordVec_model = [] -h = Merge(mode="concat", name='h')([hx, hy]) -#h = NeuralTensorLayer(output_dim=1,input_dim=ntn_in)([hx,hy]) -#h = ntn_layer(ntn_in,ntn_out,activation=None)([hx,hy]) -#score = h -wrap = Dense(dense_neuron, activation='relu', name='wrap')(h) -#score = Dense(1,activation='sigmoid',name='score')(h) -#wrap = Dense(dense_neuron,activation='relu',name='wrap')(h) -score = Dense(4, activation='softmax', name='score')(wrap) - -# score=K.clip(score,1e-7,1.0-1e-7) -#corr = CorrelationRegularization(-lamda)([hx,hy]) -#model = Model([inpx,inpy],[score,corr]) -model = Model([inpx, inpy], score) -model.compile(loss='categorical_crossentropy', - optimizer="adadelta", metrics=['accuracy']) - - -print('data split') -Y_train = to_categorical(y_train, 4) -#train_head_split, test_head_split, train_body_split, test_body_split, train_y_split, test_y_split = utility.split(train_head, train_body, Y_train, 0.2) -model.fit([train_head, train_body], Y_train, nb_epoch=10, verbose=2) - -file0 = "/fncdata/competition_test_stances.csv" -file1 = "/fncdata/test_bodies.csv" -test_head = pd.read_csv(file0) -test_body = pd.read_csv(file1) -test_head = test_head.values -test_body = test_body.values -test_hds = test_head[:, 0] -test_ids = test_head[:, 1] -test_labels = test_head[:, 2] -test_bds = [] -for ids in test_ids: - for body in test_body: - if ids == body[0]: - test_bds.append(body[1]) - -print("refining test dataset") -test_rdh = [] -for i in range(len(test_hds)): - sentence = "" - for char in test_hds[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - test_rdh.append(sentence) - -test_rdb = [] -for i in range(len(test_bds)): - sentence = "" - for char in test_bds[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - test_rdb.append(sentence) - -print("Preprocessing test dataset") -test_rpdh, test_rpdb = preprocess_data(test_rdh, test_rdb) -ts_head, ts_body = generateMatrix(obj, test_rdh, test_rdb, dimx, dimy) -predictions = model.predict([ts_head, ts_body]) -predictions = [i.argmax()for i in predictions] -predictions = np.array(predictions) -string_predicted = [] -for i, j in enumerate(predictions): - if j == 3: - string_predicted.append("unrelated") - elif j == 0: - string_predicted.append("agree") - elif j == 1: - string_predicted.append("disagree") - elif j == 2: - string_predicted.append("discuss") - -score = accuracy_score(string_predicted, test_labels) -print("Accuracy on test dataset: ", score) -report_score(string_predicted, test_labels) -''' -Evaluation code for the SICK dataset (SemEval 2014 Task 1) -''' -import math -import numpy as np -import os.path -from util import * -from sklearn.metrics import mean_squared_error as mse -from scipy.stats import pearsonr -from scipy.stats import spearmanr -from sklearn.utils import shuffle -from utils.score import report_score -from keras.models import Sequential -from keras.layers.core import Dense, Activation -from keras.optimizers import Adam -from sklearn.metrics import accuracy_score -from keras.utils.np_utils import to_categorical -import pandas as pd -from sklearn.model_selection import train_test_split -import random -from keras.models import Sequential, Model -from keras.layers import Dense, Dropout, Activation, Flatten, Merge, Embedding -from keras import regularizers -from keras.layers import Merge, Input, Multiply, Layer -from sklearn.preprocessing import StandardScaler - - -def split(train_l, train_r, label, ratio): - total = train_l.shape[0] - train_samples = int(total*(1-ratio)) - test_samples = total-train_samples - tr_l, tst_l, tr_r, tst_r, l_tr, l_tst = [], [], [], [], [], [] - dat = random.sample(range(total), train_samples) - for a in dat: - tr_l.append(train_l[a]) - tr_r.append(train_r[a]) - l_tr.append(label[a]) - print 'splitting - validation samples ', test_samples - for i in range(total): - if i not in dat: - tst_l.append(train_l[i]) - tst_r.append(train_r[i]) - l_tst.append(label[i]) - print 'splitting - train samples ', len(dat) - tr_l = np.array(tr_l) - tr_r = np.array(tr_r) - tst_l = np.array(tst_l) - tst_r = np.array(tst_r) - l_tr = np.array(l_tr) - l_tst = np.array(l_tst) - - return tr_l, tst_l, tr_r, tst_r, l_tr, l_tst - - -def load_dataset(file_trhead, file_trbody, file_tshead, file_tsbody): - trhead = pd.read_csv(file_trhead) - trbody = pd.read_csv(file_trbody) - tshead = pd.read_csv(file_tshead) - tsbody = pd.read_csv(file_tsbody) - tr_head_array = trhead.values - tr_body_array = trbody.values - ts_head_array = tshead.values - ts_body_array = tsbody.values - tr_labels = tr_head_array[:, 2] - ts_labels = ts_head_array[:, 2] - - tr_body_id = tr_head_array[:, 1] - train_dh = tr_head_array[:, 0] - train_db = [] - for i in range(len(tr_head_array)): - for j in range(len(tr_body_array)): - if tr_body_array[j][0] == tr_body_id[i]: - train_db.append(tr_body_array[j][1]) - break - tr_lab = [] - for i in tr_labels: - if i == 'unrelated': - tr_lab.append(3) - if i == 'agree': - tr_lab.append(0) - if i == 'discuss': - tr_lab.append(2) - if i == 'disagree': - tr_lab.append(1) - train_db = np.array(train_db) - - ts_body_id = ts_head_array[:, 1] - test_dh = ts_head_array[:, 0] - test_db = [] - for i in range(len(ts_head_array)): - for j in range(len(ts_body_array)): - if ts_body_array[j][0] == ts_body_id[i]: - test_db.append(ts_body_array[j][1]) - break - ts_lab = [] - for i in ts_labels: - if i == 'unrelated': - ts_lab.append(3) - if i == 'agree': - ts_lab.append(0) - if i == 'discuss': - ts_lab.append(2) - if i == 'disagree': - ts_lab.append(1) - - test_db = np.array(test_db) - - # signs=['?','.',] - print("Refining train datset") - train_rdh = [] - for i in range(len(train_dh)): - sentence = "" - for char in train_dh[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - train_rdh.append(sentence) - - train_rdb = [] - for i in range(len(train_db)): - sentence = "" - for char in train_db[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - train_rdb.append(sentence) - - print("Refining test datset") - test_rdh = [] - for i in range(len(test_dh)): - sentence = "" - for char in test_dh[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - test_rdh.append(sentence) - - test_rdb = [] - for i in range(len(test_db)): - sentence = "" - for char in test_db[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - test_rdb.append(sentence) - - dic = pd.read_pickle('stop_dic') - - train_new_rdb = [] - test_new_rdb = [] - - word_limit = 250 - print 'removing stop words and using', word_limit, 'words limit .....' - - for i in train_rdb: - temp = [] - for j in i.split(): - try: - a = dic[j] - except: - temp.append(j) - train_new_rdb.append(' '.join(temp[0:min(len(temp), word_limit)])) - - for i in test_rdb: - temp = [] - for j in i.split(): - try: - a = dic[j] - except: - temp.append(j) - test_new_rdb.append(' '.join(temp[0:min(len(temp), word_limit)])) - - train_rdh = np.array(train_rdh) - test_rdh = np.array(test_rdh) - train_new_rdb = np.array(train_new_rdb) - test_new_rdb = np.array(test_new_rdb) - - return train_rdh, train_new_rdb, test_rdh, test_new_rdb - #tr_h, dev_h, tr_b, dev_b, tr_s, dev_s = split(np.array(train_rdh), np.array(train_rdb), tr_lab, 0.2) - # return [tr_h, tr_b], [dev_h, dev_b], [tr_s, dev_s] - - -def evaluate(encoder=None, seed=1234, evaltest=False, loc='./data/'): - """ - Run experiment - """ - print 'Preparing data for fnc...' - - #train, dev, test, scores = load_data(loc) - #train[0], train[1], scores[0] = shuffle(train[0], train[1], scores[0], random_state=seed) - - ''' - trh, trb, tsh, tsb =\ - load_dataset("/fnc_data/train_stances.csv", "/fnc_data/train_bodies.csv",\ - "/fnc_data/competition_test_stances.csv", "/fnc_data/test_bodies.csv") - ''' - train_h = np.load('/fncdata2/encode_train_head.npy') - train_b = np.load('/fncdata2/encode_train_body.npy') - test_h = np.load('/fncdata2/encode_test_head.npy') - test_b = np.load('/fncdata2/encode_test_body.npy') - score_train = np.load('/fncdata2/score_train.npy') - score_test = np.load('/fncdata2/score_test.npy') - #train_b = big_mat - #train_h, dev_h, train_b, dev_b, score_train, dev_score = split(np.array(train_h), train_b, score_train, 0.2) - - print 'loading training skipthoughts...' - #trainA = encoder.encode(train_h, verbose=False, use_eos=True) - #trainB = encoder.encode(train_b, verbose=False, use_eos=True) - trainA = train_h - trainB = train_b - - print 'Computing development skipthoughts...' - #devA = encoder.encode(dev_h, verbose=False, use_eos=True) - #devB = encoder.encode(dev_b, verbose=False, use_eos=True) -# devA = dev_h -# devB = dev_b - devA = test_h - devB = test_b - dev_score = score_test - - print 'Computing feature combinations...' - trainF = np.c_[np.abs(trainA - trainB), trainA * trainB] - devF = np.c_[np.abs(devA - devB), devA * devB] - - print 'Encoding labels...' - #trainY = encode_labels(train_labels) - #devY = encode_labels(holdout_labels) - trainY = to_categorical(score_train, 4) - devY = to_categorical(dev_score, 4) - - train_Fx, test_Fx = load_features() - #fmodel = generate_feature_model(train_Fx, score_train, test_Fx, dev_score, ninputs=len(train_Fx[0])) - - train_tfidf, test_tfidf = generate_tfidf() - - print 'Compiling model...' - lrmodel = prepare_model( - ninputs=trainF.shape[1], n_feats=train_Fx.shape[1], n_tfidf=train_tfidf.shape[1]) - - print 'Training...' - bestlrmodel = train_model(lrmodel, trainF, trainY, devF, devY, - dev_score, train_Fx, test_Fx, train_tfidf, test_tfidf) - - if evaltest: - print 'Loading test skipthoughts...' - testA = test_h - testB = test_b - - print 'Computing feature combinations...' - testF = np.c_[np.abs(testA - testB), testA * testB] - - yhat = bestlrmodel.predict(testF, verbose=2) - yhat = [i.argmax()for i in yhat] - - string_predicted, test_stances = [], [] - - for i, j in zip(yhat, score_test): - if i == 3: - string_predicted.append('unrelated') - if i == 0: - string_predicted.append('agree') - if i == 2: - string_predicted.append('discuss') - if i == 1: - string_predicted.append('disagree') - if j == 3: - test_stances.append('unrelated') - if j == 0: - test_stances.append('agree') - if j == 2: - test_stances.append('discuss') - if j == 1: - test_stances.append('disagree') - - report_score(test_stances, string_predicted) - score = accuracy_score(score_test, yhat) - print 'accuracy is ..', score - # print 'Evaluating...' - - -def generate_tfidf(): - file_train_instances = "/fncdata/train_stances.csv" - file_train_bodies = "/fncdata/train_bodies.csv" - file_test_instances = "/fncdata/competition_test_stances.csv" - file_test_bodies = "/fncdata/test_bodies.csv" - raw_train = FNCData(file_train_instances, file_train_bodies) - raw_test = FNCData(file_test_instances, file_test_bodies) - n_train = len(raw_train.instances) - - lim_unigram = 5000 - train_set, train_stances, bow_vectorizer, tfreq_vectorizer, tfidf_vectorizer = \ - pipeline_train(raw_train, raw_test, lim_unigram=lim_unigram) - #feature_size = len(train_set[0]) - test_set = pipeline_test(raw_test, bow_vectorizer, - tfreq_vectorizer, tfidf_vectorizer) - return np.array(train_set), np.array(test_set) - - -def prepare_model(ninputs=9600, n_feats=45, nclass=4, n_tfidf=10001): - inp1 = Input(shape=(ninputs,)) - inp2 = Input(shape=(n_feats,)) - inp3 = Input(shape=(n_tfidf,)) - reg = 0.00005 - out_neurons1 = 500 - #out_neurons2 = 20 - #out_neurons2 = 10 - m1 = Dense(input_dim=ninputs, output_dim=out_neurons1, activation='sigmoid', - kernel_regularizer=regularizers.l2(0.00000001))(inp1) - m1 = Dropout(0.2)(m1) - m1 = Dense(100, activation='sigmoid')(m1) - #m1 = Dropout(0.2)(m1) - #m1 = Dense(4, activation='sigmoid')(m1) - - #m2 = Dense(input_dim=n_feats, output_dim=n_feats,activation='relu')(inp2) - m2 = Dense(50, activation='relu')(inp2) - # m2=Dense(4,activation='relu')(m2) - - m3 = Dense(500, input_dim=n_tfidf, activation='relu', - kernel_regularizer=regularizers.l2(reg))(inp3) - - m3 = Dropout(0.4)(m3) - m3 = Dense(50, activation='relu')(m3) - #m3 = Dropout(0.4)(m3) - #m3 = Dense(4, activation='softmax')(m3) - - #m1 = Dense(input_dim=ninputs, output_dim=out_neurons2,activation='sigmoid')(m1) - #m2 = Dense(input_dim=ninputs, output_dim=out_neurons2,activation='softmax')(m2) - - m = Merge(mode='concat')([m1, m2, m3]) - - #mul = Multiply()([m1,m2]) - #add = Abs()([m1,m2]) - #m = Merge(mode='concat')([mul,add]) - - score = Dense(output_dim=nclass, activation='softmax')(m) - model = Model([inp1, inp2, inp3], score) - model.compile(loss='categorical_crossentropy', optimizer='adam') - return model - - -def prepare_model2(ninputs=9600, n_feats=45, nclass=4, n_tfidf=10001): - inp1 = Input(shape=(ninputs,)) - inp2 = Input(shape=(n_feats,)) - inp3 = Input(shape=(n_tfidf,)) - reg = 0.00005 - out_neurons1 = 500 - #out_neurons2 = 20 - #out_neurons2 = 10 - m1 = Dense(input_dim=ninputs, output_dim=out_neurons1, activation='sigmoid', - kernel_regularizer=regularizers.l2(0.00000001))(inp1) - m1 = Dropout(0.2)(m1) - m1 = Dense(100, activation='sigmoid')(m1) - #m1 = Dropout(0.2)(m1) - #m1 = Dense(4, activation='sigmoid')(m1) - - m2 = Dense(input_dim=n_feats, output_dim=n_feats, activation='relu')(inp2) - m2 = Dense(4, activation='relu')(inp2) - # m2=Dense(4,activation='relu')(m2) - - m3 = Dense(500, input_dim=n_tfidf, activation='relu', - kernel_regularizer=regularizers.l2(reg))(inp3) - - m3 = Dropout(0.4)(m3) - m3 = Dense(50, activation='relu')(m3) - #m3 = Dropout(0.4)(m3) - #m3 = Dense(4, activation='softmax')(m3) - - #m1 = Dense(input_dim=ninputs, output_dim=out_neurons2,activation='sigmoid')(m1) - #m2 = Dense(input_dim=ninputs, output_dim=out_neurons2,activation='softmax')(m2) - - m = Merge(mode='concat')([m1, m2, m3]) - - #mul = Multiply()([m1,m2]) - #add = Abs()([m1,m2]) - #m = Merge(mode='concat')([mul,add]) - - score = Dense(output_dim=nclass, activation='softmax')(m) - model = Model([inp1, inp2, inp3], score) - model.compile(loss='categorical_crossentropy', optimizer='adam') - return model - - -def prepare_model1(ninputs=9600, n_feats=45, nclass=4, n_tfidf=10001): - inp1 = Input(shape=(ninputs,)) - inp2 = Input(shape=(n_feats,)) - inp3 = Input(shape=(n_tfidf,)) - reg = 0.00005 - out_neurons1 = 500 - #out_neurons2 = 20 - #out_neurons2 = 10 - m1 = Dense(input_dim=ninputs, output_dim=out_neurons1, activation='sigmoid', - kernel_regularizer=regularizers.l2(0.00000001))(inp1) - m1 = Dropout(0.5)(m1) - m1 = Dense(100, activation='sigmoid')(m1) - m1 = Dropout(0.5)(m1) - - m2 = Dense(input_dim=n_feats, output_dim=n_feats, activation='relu')(inp2) - m2 = Dense(30, activation='relu')(m2) - - m3 = Dense(500, input_dim=n_tfidf, activation='relu', - kernel_regularizer=regularizers.l2(reg))(inp3) - - m3 = Dropout(0.6)(m3) - m3 = Dense(100, activation='relu')(m3) - m3 = Dropout(0.4)(m3) - m3 = Dense(4, activation='softmax')(m3) - - #m1 = Dense(input_dim=ninputs, output_dim=out_neurons2,activation='sigmoid')(m1) - #m2 = Dense(input_dim=ninputs, output_dim=out_neurons2,activation='softmax')(m2) - - m = Merge(mode='concat')([m1, m2, m3]) - - #mul = Multiply()([m1,m2]) - #add = Abs()([m1,m2]) - #m = Merge(mode='concat')([mul,add]) - - score = Dense(output_dim=nclass, activation='softmax')(m) - model = Model([inp1, inp2, inp3], score) - model.compile(loss='categorical_crossentropy', optimizer='adam') - return model - - """ - Set up and compile the model architecture (Logistic regression) - - print 'changed' - out_neurons1 = 500 - lrmodel = Sequential() - lrmodel.add(Dense(input_dim=ninputs, output_dim=out_neurons1,activation='sigmoid'\ - ,kernel_regularizer=regularizers.l2(0.00000001))) - lrmodel.add(Dropout(0.5)) - #lrmodel.add(Dense(out_neurons2)) - #lrmodel.add(Dropout(0.5)) - lrmodel.add(Dense(output_dim=nclass)) - - #lrmodel.add(Dense(input_dim=ninputs, output_dim=nclass)) - #lrmodel.add(Dropout(0.3)) - lrmodel.add(Activation('softmax')) - lrmodel.compile(loss='categorical_crossentropy', optimizer='adam') - return lrmodel - """ - - -def train_model(lrmodel, X, Y, devX, devY, devscores, feat_train, feat_dev, train_tfidf, test_tfidf): - """ - Train model, using pearsonr on dev for early stopping - """ - done = False - best = -1.0 - #r = np.arange(1,5) - - while not done: - # Every 100 epochs, check Pearson on development set - lrmodel.fit([X, feat_train, train_tfidf], Y, verbose=2, shuffle=False, - nb_epoch=3, validation_data=([devX, feat_dev, test_tfidf], devY)) - #yhat = np.dot(lrmodel.predict(devX, verbose=2), r) - yhat = lrmodel.predict([devX, feat_dev, test_tfidf], verbose=2) - yhat = [i.argmax()for i in yhat] - - string_predicted, test_stances = [], [] - - for i, j in zip(yhat, devscores): - if i == 3: - string_predicted.append('unrelated') - if i == 0: - string_predicted.append('agree') - if i == 2: - string_predicted.append('discuss') - if i == 1: - string_predicted.append('disagree') - if j == 3: - test_stances.append('unrelated') - if j == 0: - test_stances.append('agree') - if j == 2: - test_stances.append('discuss') - if j == 1: - test_stances.append('disagree') - print 'using new limit value....' - #score = accuracy_score(devscores, yhat) - score = report_score(test_stances, string_predicted, val=True) - # return lrmodel - - if score > best: - print score - best = score - bestlrmodel = prepare_model( - ninputs=X.shape[1], n_feats=feat_train.shape[1], n_tfidf=train_tfidf.shape[1]) - bestlrmodel.set_weights(lrmodel.get_weights()) - else: - done = True - print '***** best model obtained with score', best, '******' - - yhat = bestlrmodel.predict([devX, feat_dev, test_tfidf], verbose=2) - yhat = [i.argmax()for i in yhat] - string_predicted, test_stances = [], [] - - for i, j in zip(yhat, devscores): - if i == 3: - string_predicted.append('unrelated') - if i == 0: - string_predicted.append('agree') - if i == 2: - string_predicted.append('discuss') - if i == 1: - string_predicted.append('disagree') - if j == 3: - test_stances.append('unrelated') - if j == 0: - test_stances.append('agree') - if j == 2: - test_stances.append('discuss') - if j == 1: - test_stances.append('disagree') - - report_score(test_stances, string_predicted) - return bestlrmodel - - -def load_features(): - - train_hand = np.load('/fncdata3/hand.train.npy') - #train_overlap = np.load('/fncdata3/overlap.train.npy') - #train_refuting = np.load('/fncdata3/refuting.train.npy') - #train_polarity = np.load('/fncdata3/polarity.train.npy') - test_hand = np.load('/fncdata3/hand.test.npy') - #test_overlap = np.load('/fncdata3/overlap.test.npy') - #test_refuting = np.load('/fncdata3/refuting.test.npy') - #test_polarity = np.load('/fncdata3/polarity.test.npy') - ''' - train_other = np.load('/fncdata4/x_train.npy') - test_other = np.load('/fncdata4/x_test.npy') - train_other = train_other[:,16] - test_other = test_other[:,16] - #train_X = np.c_[train_polarity, train_refuting, train_overlap] - #test_X = np.c_[test_polarity, test_refuting, test_overlap] - for k,i in enumerate(test_other): - if math.isnan(i): - #print 'here',k - test_other[k] = 0.0 - - train_X = np.c_[train_hand, train_other] - test_X = np.c_[test_hand, test_other] - - train_feat = np.load('/fncdata3/feat_train.npy') - train_other = np.load('/fncdata3/x_train.npy') - test_feat = np.load('/fncdata3/feat_test.npy') - test_other = np.load('/fncdata3/x_test.npy') - train_X = np.c_[train_feat, train_other] - test_X = np.c_[test_feat, test_other] - - for k,i in enumerate(test_X): - for ind,j in enumerate(i): - if math.isnan(j): - #print 'here',k - test_X[k][ind] = 0.0 - - ss = StandardScaler() - ss.fit(np.vstack((train_X, test_X))) - feat1_train = ss.transform(train_X) - feat1_test = ss.transform(test_X) - - #feat_dev = feat1_train[len(trainF):] - #feat1_train = feat1_train[0:len(trainF)] - - #feat_dev = feat1_test - ''' - return train_hand, test_hand -''' -Evaluation code for the SICK dataset (SemEval 2014 Task 1) -''' - -import os -import numpy as np -import os.path -import shutil -import keras.backend as K -from sklearn.metrics import mean_squared_error as mse -from scipy.stats import pearsonr -from scipy.stats import spearmanr -from sklearn.utils import shuffle - -from keras.models import Sequential, Model -from keras.models import load_model -from keras.layers.core import Dense, Activation, Dropout -from keras.layers import Merge, Input, Multiply, Layer -from keras.optimizers import Adam -from keras.utils import to_categorical -from sklearn.preprocessing import StandardScaler - - -class Abs(Layer): - def __init__(self, **kwargs): - super(Abs, self).__init__(**kwargs) - - def call(self, x, mask=None): - inp1, inp2 = x[0], x[1] - return K.abs(inp1-inp2) - - def get_output_shape_for(self, input_shape): - return input_shape - - -def evaluate(encoder, seed=1234, evaltest=False, loc='F:\\workspace\\project\\Siamese\\skip-thoughts-master\\data\\'): - """ - Run experiment - """ - print 'Preparing data...' - train, dev, test, scores = load_data(loc) - train[0], train[1], scores[0] = shuffle( - train[0], train[1], scores[0], random_state=seed) - - print 'Computing training skipthoughts...' - trainA = encoder.encode(train[0], verbose=False, use_eos=True) - trainB = encoder.encode(train[1], verbose=False, use_eos=True) - - print 'Computing development skipthoughts...' - devA = encoder.encode(dev[0], verbose=False, use_eos=True) - devB = encoder.encode(dev[1], verbose=False, use_eos=True) - - print 'Computing test skipthoughts...' - testA = encoder.encode(test[0], verbose=False, use_eos=True) - testB = encoder.encode(test[1], verbose=False, use_eos=True) - - print 'Computing feature combinations...' - testF = np.c_[np.abs(testA - testB), testA * testB] - - print 'Computing feature combinations...' - trainF = np.c_[np.abs(trainA - trainB), trainA * trainB] - #devF = np.c_[np.abs(devA - devB), devA * devB] - - devF = testF - - #trainF = np.c_[trainA, trainB] - #devF = np.c_[devA, devB] - - print 'Computing external feature ...' - feat_train = np.load('feat_train.npy') - feat_test = np.load('feat_test.npy') - - x_train = np.load('x_train.npy') - x_test = np.load('x_test.npy') - - feat1_train = np.hstack((feat_train, x_train)) - feat1_test = np.hstack((feat_test, x_test)) - - ss = StandardScaler() - ss.fit(np.vstack((feat1_train, feat1_test))) - feat1_train = ss.transform(feat1_train) - feat1_test = ss.transform(feat1_test) - - #feat_dev = feat1_train[len(trainF):] - feat1_train = feat1_train[0:len(trainF)] - - feat_dev = feat1_test - - print 'Encoding labels...' - - trainY = encode_labels(scores[0]) - #devY = encode_labels(scores[1]) - devY = encode_labels(scores[2]) - # print 'few changing....' - #scores[0] = [i-1 for i in scores[0]] - #scores[1] = [i-1 for i in scores[1]] - #scores[2] = [i-1 for i in scores[2]] - - #trainY = to_categorical(np.round(scores[0]),5) - #devY = to_categorical(np.round(scores[1]),5) - - print 'Compiling model...' - lrmodel = prepare_model( - ninputs=trainF.shape[1], n_feats=feat1_train.shape[1]) - - print 'Training...' - bestlrmodel = train_model(lrmodel, trainF, trainY, - devF, devY, scores[2], feat1_train, feat_dev) - #bestlrmodel = train_model(lrmodel, trainF, trainY, devF, devY, scores[1]) - - if evaltest: - - print 'Evaluating...' - r = np.arange(1, 6) - yhat = np.dot(bestlrmodel.predict([testF, feat1_test], verbose=2), r) - #yhat = np.dot(bestlrmodel.predict_proba(testF, verbose=2), r) - pr = pearsonr(yhat, scores[2])[0] - sr = spearmanr(yhat, scores[2])[0] - se = mse(yhat, scores[2]) - print 'Test Pearson: ' + str(pr) - print 'Test Spearman: ' + str(sr) - print 'Test MSE: ' + str(se) - - return yhat - - -def prepare_model(ninputs=9600, n_feats=47, nclass=5): - """ - Set up and compile the model architecture (Logistic regression) - """ - inp1 = Input(shape=(ninputs,)) - inp2 = Input(shape=(n_feats,)) - out_neurons1 = 50 - out_neurons2 = 20 - out_neurons2 = 10 - m1 = Dense(input_dim=ninputs, output_dim=out_neurons1, - activation='sigmoid')(inp1) - m2 = Dense(input_dim=ninputs, output_dim=out_neurons1, - activation='softmax')(inp2) - - m1 = Dense(input_dim=ninputs, output_dim=out_neurons2, - activation='sigmoid')(m1) - m2 = Dense(input_dim=ninputs, output_dim=out_neurons2, - activation='softmax')(m2) - - #m1 = Dense(input_dim=ninputs, output_dim=out_neurons2,activation='sigmoid')(m1) - #m2 = Dense(input_dim=ninputs, output_dim=out_neurons2,activation='softmax')(m2) - - m = Merge(mode='concat')([m1, m2]) - - #mul = Multiply()([m1,m2]) - #add = Abs()([m1,m2]) - #m = Merge(mode='concat')([mul,add]) - - score = Dense(output_dim=nclass, activation='softmax')(m) - model = Model([inp1, inp2], score) - model.compile(loss='categorical_crossentropy', optimizer='adam') - return model - ''' - lrmodel = Sequential() - lrmodel.add(Dense(input_dim=ninputs, output_dim=nclass)) - #lrmodel.add(Activation('softmax')) - #lrmodel.compile(loss='categorical_crossentropy', optimizer='adam') - - #return lrmodel - - model_feat = Sequential() - model_feat.add(Dense(input_dim=27, output_dim=nclass)) - merge_model = Sequential() - merge_model.add(Merge([lrmodel, model_feat], mode='concat')) - merge_model.add(Dense(output_dim=nclass)) - merge_model.add(Activation('softmax')) - merge_model.compile(loss='categorical_crossentropy', optimizer='adam') - return merge_model''' - - '''lrmodel.add(Dense(input_dim=ninputs, output_dim=1000,activation = 'relu')) - lrmodel.add(Dropout(0.5)) - lrmodel.add(Dense(output_dim=500,activation = 'relu')) - lrmodel.add(Dropout(0.5)) - lrmodel.add(Dense(output_dim=nclass))''' - # return merge_model - - -def train_model(lrmodel, X, Y, devX, devY, devscores, feat_train, feat_dev): - # def train_model(lrmodel, X, Y, devX, devY, devscores): - """ - Train model, using pearsonr on dev for early stopping - """ - done = False - best = -1.0 - r = np.arange(1, 6) - num = 0 - # print type(X) - - while not done: - # Every 100 epochs, check Pearson on development set - lrmodel.fit([X, feat_train], Y, verbose=2, shuffle=False, - nb_epoch=2, validation_data=([devX, feat_dev], devY)) - yhat = np.dot(lrmodel.predict([devX, feat_dev], verbose=2), r) - #lrmodel.fit(X, Y, verbose=2, shuffle=False, validation_data=(devX, devY)) - #yhat = np.dot(lrmodel.predict_proba(devX , verbose=2), r) - - score = pearsonr(yhat, devscores)[0] - - if score > best: - print score, num - best = score - # print type(X) - bestlrmodel = prepare_model( - ninputs=X.shape[1], n_feats=feat_train.shape[1]) - weights = lrmodel.get_weights() - # print type(weights) - bestlrmodel.set_weights(weights) - # print 'thois coe' - # bst_models.append(lrmodel) - # lrmodel.save('models\\model') - # print 'here' - # num+=1 - - else: - done = True - - #bestlrmodel = load_model('models\\model'+str(num)) - - yhat = np.dot(bestlrmodel.predict([devX, feat_dev], verbose=2), r) - #yhat = np.dot(bestlrmodel.predict_proba(devX, verbose=2), r) - score = pearsonr(yhat, devscores)[0] - print 'Dev Pearson: ' + str(score) - return bestlrmodel - - -def encode_labels(labels, nclass=5): - """ - Label encoding from Tree LSTM paper (Tai, Socher, Manning) - """ - Y = np.zeros((len(labels), nclass)).astype('float32') - for j, y in enumerate(labels): - for i in range(nclass): - if i+1 == np.floor(y) + 1: - Y[j, i] = y - np.floor(y) - if i+1 == np.floor(y): - Y[j, i] = np.floor(y) - y + 1 - return Y - - -def load_data(loc='..\\data\\'): - """ - Load the SICK semantic-relatedness dataset - """ - trainA, trainB, devA, devB, testA, testB = [], [], [], [], [], [] - trainS, devS, testS = [], [], [] - - with open(os.path.join(loc, 'SICK_train.txt'), 'rb') as f: - for line in f: - text = line.strip().split('\t') - trainA.append(text[1]) - trainB.append(text[2]) - trainS.append(text[3]) - with open(os.path.join(loc, 'SICK_trial.txt'), 'rb') as f: - for line in f: - text = line.strip().split('\t') - devA.append(text[1]) - devB.append(text[2]) - devS.append(text[3]) - with open(os.path.join(loc, 'SICK_test_annotated.txt'), 'rb') as f: - for line in f: - text = line.strip().split('\t') - testA.append(text[1]) - testB.append(text[2]) - testS.append(text[3]) - - trainS = [float(s) for s in trainS[1:]] - devS = [float(s) for s in devS[1:]] - testS = [float(s) for s in testS[1:]] - - return [trainA[1:], trainB[1:]], [devA[1:], devB[1:]], [testA[1:], testB[1:]], [trainS, devS, testS] -import os -import re -import nltk -import numpy as np -from sklearn import feature_extraction -from tqdm import tqdm - - -_wnl = nltk.WordNetLemmatizer() - - -def normalize_word(w): - return _wnl.lemmatize(w).lower() - - -def get_tokenized_lemmas(s): - return [normalize_word(t) for t in nltk.word_tokenize(s)] - - -def clean(s): - # Cleans a string: Lowercasing, trimming, removing non-alphanumeric - - return " ".join(re.findall(r'\w+', s, flags=re.UNICODE)).lower() - - -def remove_stopwords(l): - # Removes stopwords from a list of tokens - return [w for w in l if w not in feature_extraction.text.ENGLISH_STOP_WORDS] - - -def gen_or_load_feats(feat_fn, headlines, bodies, feature_file): - if not os.path.isfile(feature_file): - feats = feat_fn(headlines, bodies) - np.save(feature_file, feats) - - return np.load(feature_file) - - -def word_overlap_features(headlines, bodies): - X = [] - for i, (headline, body) in tqdm(enumerate(zip(headlines, bodies))): - clean_headline = clean(headline) - clean_body = clean(body) - clean_headline = get_tokenized_lemmas(clean_headline) - clean_body = get_tokenized_lemmas(clean_body) - features = [ - len(set(clean_headline).intersection(clean_body)) / float(len(set(clean_headline).union(clean_body)))] - X.append(features) - return X - - -def refuting_features(headlines, bodies): - _refuting_words = [ - 'fake', - 'fraud', - 'hoax', - 'false', - 'deny', 'denies', - # 'refute', - 'not', - 'despite', - 'nope', - 'doubt', 'doubts', - 'bogus', - 'debunk', - 'pranks', - 'retract' - ] - X = [] - for i, (headline, body) in tqdm(enumerate(zip(headlines, bodies))): - clean_headline = clean(headline) - clean_headline = get_tokenized_lemmas(clean_headline) - features = [ - 1 if word in clean_headline else 0 for word in _refuting_words] - X.append(features) - return X - - -def polarity_features(headlines, bodies): - _refuting_words = [ - 'fake', - 'fraud', - 'hoax', - 'false', - 'deny', 'denies', - 'not', - 'despite', - 'nope', - 'doubt', 'doubts', - 'bogus', - 'debunk', - 'pranks', - 'retract' - ] - - def calculate_polarity(text): - tokens = get_tokenized_lemmas(text) - return sum([t in _refuting_words for t in tokens]) % 2 - X = [] - for i, (headline, body) in tqdm(enumerate(zip(headlines, bodies))): - clean_headline = clean(headline) - clean_body = clean(body) - features = [] - features.append(calculate_polarity(clean_headline)) - features.append(calculate_polarity(clean_body)) - X.append(features) - return np.array(X) - - -def ngrams(input, n): - input = input.split(' ') - output = [] - for i in range(len(input) - n + 1): - output.append(input[i:i + n]) - return output - - -def chargrams(input, n): - output = [] - for i in range(len(input) - n + 1): - output.append(input[i:i + n]) - return output - - -def append_chargrams(features, text_headline, text_body, size): - grams = [' '.join(x) for x in chargrams( - " ".join(remove_stopwords(text_headline.split())), size)] - grams_hits = 0 - grams_early_hits = 0 - grams_first_hits = 0 - for gram in grams: - if gram in text_body: - grams_hits += 1 - if gram in text_body[:255]: - grams_early_hits += 1 - if gram in text_body[:100]: - grams_first_hits += 1 - features.append(grams_hits) - features.append(grams_early_hits) - features.append(grams_first_hits) - return features - - -def append_ngrams(features, text_headline, text_body, size): - grams = [' '.join(x) for x in ngrams(text_headline, size)] - grams_hits = 0 - grams_early_hits = 0 - for gram in grams: - if gram in text_body: - grams_hits += 1 - if gram in text_body[:255]: - grams_early_hits += 1 - features.append(grams_hits) - features.append(grams_early_hits) - return features - - -def hand_features(headlines, bodies): - - def binary_co_occurence(headline, body): - # Count how many times a token in the title - # appears in the body text. - bin_count = 0 - bin_count_early = 0 - for headline_token in clean(headline).split(" "): - if headline_token in clean(body): - bin_count += 1 - if headline_token in clean(body)[:255]: - bin_count_early += 1 - return [bin_count, bin_count_early] - - def binary_co_occurence_stops(headline, body): - # Count how many times a token in the title - # appears in the body text. Stopwords in the title - # are ignored. - bin_count = 0 - bin_count_early = 0 - for headline_token in remove_stopwords(clean(headline).split(" ")): - if headline_token in clean(body): - bin_count += 1 - bin_count_early += 1 - return [bin_count, bin_count_early] - - def count_grams(headline, body): - # Count how many times an n-gram of the title - # appears in the entire body, and intro paragraph - - clean_body = clean(body) - clean_headline = clean(headline) - features = [] - features = append_chargrams(features, clean_headline, clean_body, 2) - features = append_chargrams(features, clean_headline, clean_body, 8) - features = append_chargrams(features, clean_headline, clean_body, 4) - features = append_chargrams(features, clean_headline, clean_body, 16) - features = append_ngrams(features, clean_headline, clean_body, 2) - features = append_ngrams(features, clean_headline, clean_body, 3) - features = append_ngrams(features, clean_headline, clean_body, 4) - features = append_ngrams(features, clean_headline, clean_body, 5) - features = append_ngrams(features, clean_headline, clean_body, 6) - return features - - X = [] - for i, (headline, body) in tqdm(enumerate(zip(headlines, bodies))): - # X.append(binary_co_occurence(headline, body) - # + binary_co_occurence_stops(headline, body) - # + count_grams(headline, body)) - X.append(count_grams(headline, body)) - - return X -import sys -import numpy as np - -from sklearn.ensemble import GradientBoostingClassifier -from feature_engineering import refuting_features, polarity_features, hand_features, gen_or_load_feats -from feature_engineering import word_overlap_features -from utils.dataset import DataSet -from utils.generate_test_splits import kfold_split, get_stances_for_folds -from utils.score import report_score, LABELS, score_submission - -from utils.system import parse_params, check_version - - -def generate_features(stances, dataset, name): - h, b, y = [], [], [] - - for stance in stances: - y.append(LABELS.index(stance['Stance'])) - h.append(stance['Headline']) - b.append(dataset.articles[stance['Body ID']]) - - X_overlap = gen_or_load_feats( - word_overlap_features, h, b, "features/overlap."+name+".npy") - X_refuting = gen_or_load_feats( - refuting_features, h, b, "features/refuting."+name+".npy") - X_polarity = gen_or_load_feats( - polarity_features, h, b, "features/polarity."+name+".npy") - X_hand = gen_or_load_feats( - hand_features, h, b, "features/hand."+name+".npy") - - X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] - return X, y - - -if __name__ == "__main__": - check_version() - parse_params() - - d = DataSet() - folds, hold_out = kfold_split(d, n_folds=10) - fold_stances, hold_out_stances = get_stances_for_folds(d, folds, hold_out) - - Xs = dict() - ys = dict() - - # Load/Precompute all features now - X_holdout, y_holdout = generate_features(hold_out_stances, d, "holdout") - for fold in fold_stances: - Xs[fold], ys[fold] = generate_features( - fold_stances[fold], d, str(fold)) - - best_score = 0 - best_fold = None - - print('setting the classifier.....') - # Classifier for each fold - for fold in fold_stances: - ids = list(range(len(folds))) - del ids[fold] - - X_train = np.vstack(tuple([Xs[i] for i in ids])) - y_train = np.hstack(tuple([ys[i] for i in ids])) - - X_test = Xs[fold] - y_test = ys[fold] - - clf = GradientBoostingClassifier( - n_estimators=200, random_state=14128, verbose=True) - clf.fit(X_train, y_train) - - predicted = [LABELS[int(a)] for a in clf.predict(X_test)] - actual = [LABELS[int(a)] for a in y_test] - - fold_score, _ = score_submission(actual, predicted) - max_fold_score, _ = score_submission(actual, actual) - - score = fold_score/max_fold_score - - print("Score for fold " + str(fold) + " was - " + str(score)) - if score > best_score: - best_score = score - best_fold = clf - - # Run on Holdout set and report the final score on the holdout set - predicted = [LABELS[int(a)] for a in best_fold.predict(X_holdout)] - actual = [LABELS[int(a)] for a in y_holdout] - - report_score(actual, predicted) -# -*- coding: utf-8 -*- -import numpy as np -import pandas as pd -from keras.utils.np_utils import to_categorical -import pandas as pd -from keras.layers.convolutional import Convolution1D -import utility -import warnings -from nltk.tokenize import regexp_tokenize -import numpy as np -import gensim as gen -import keras.backend as K -from keras.preprocessing import sequence -from keras.models import Sequential, Model -from keras.layers import Dense, Layer, Lambda, Dropout, Activation, Input, Merge, Multiply -from keras.layers import Embedding, Bidirectional, LSTM, Flatten -from keras.layers import Conv1D, GlobalMaxPooling1D -from sklearn.ensemble import GradientBoostingClassifier -from feature_engineering import refuting_features, polarity_features, hand_features, gen_or_load_feats -from feature_engineering import word_overlap_features -from utils.dataset import DataSet -from utils.generate_test_splits import kfold_split, get_stances_for_folds -from utils.score import report_score, LABELS, score_submission - - -dimx = 50 -dimy = 250 -vocab_size = 15000 - - -def trainCNN(obj, wordVec_model, dataset_headLines, dataset_body): - embedding_dim = 300 - LSTM_neurons = 50 - dense_neuron = 16 - lamda = 0.0 - nb_filter = 100 - filter_length = 4 - batch_size = 50 - epochs = 5 - ntn_out = 16 - ntn_in = nb_filter - state = False - - train_head, train_body, embedding_matrix = obj.process_data(sent_Q=dataset_headLines, - sent_A=dataset_body, dimx=dimx, dimy=dimy, - wordVec_model=wordVec_model) - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - #x = Embedding(output_dim=embedding_dim, input_dim=vocab_size, input_length=dimx)(inpx) - x = word2vec_embedding_layer(embedding_matrix)(inpx) - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - #y = Embedding(output_dim=embedding_dim, input_dim=vocab_size, input_length=dimy)(inpy) - y = word2vec_embedding_layer(embedding_matrix)(inpy) - - ques = Convolution1D(nb_filter=nb_filter, filter_length=filter_length, - border_mode='valid', activation='relu', - subsample_length=1)(x) - - ans = Convolution1D(nb_filter=nb_filter, filter_length=filter_length, - border_mode='valid', activation='relu', - subsample_length=1)(y) - - hx = Lambda(max_1d, output_shape=(nb_filter,))(ques) - hy = Lambda(max_1d, output_shape=(nb_filter,))(ans) - #hx = GlobalMaxPooling1D()(ques) - #hy = GlobalMaxPooling1D()(ans) - - #hx = Flatten() - #hy = Flatten() - - hx1 = Multiply()([hx, hy]) - hy1 = Abs()([hx, hy]) - h = Merge(mode="concat", name='h')([hx1, hy1]) - - ''' - shared_lstm = Bidirectional(LSTM(LSTM_neurons,return_sequences=True),merge_mode='sum') - #shared_lstm = LSTM(LSTM_neurons,return_sequences=True) - hx = shared_lstm(x) - #hx = Dropout(0.2)(hx) - hy = shared_lstm(y) - #hy = Dropout(0.2)(hy) - - #corr = CorrelationRegularization(-lamda)([hx,hy]) - - h1 = Flatten()(hx) - h2 = Flatten()(hy) - hx1 = Multiply()([h1,h2]) - hx2 = Abs()([h1,h2]) - h = Merge(mode="concat",name='h')([hx1,hx2]) - ''' - #h1 = Multiply()([hx,hy]) - #h2 = Abs()([hx,hy]) - #h = Merge(mode="concat",name='h')([h1,h2]) - - #h = Merge(mode="concat",name='h')([hx,hy]) - #h = NeuralTensorLayer(output_dim=1,input_dim=ntn_in)([hx,hy]) - #h = ntn_layer(ntn_in,ntn_out,activation=None)([hx,hy]) - #score = h - wrap = Dense(dense_neuron, activation='relu', name='wrap')(h) - #score = Dense(1,activation='sigmoid',name='score')(h) - #wrap = Dense(dense_neuron,activation='relu',name='wrap')(h) - score = Dense(4, activation='softmax', name='score')(wrap) - - # score=K.clip(score,1e-7,1.0-1e-7) - #corr = CorrelationRegularization(-lamda)([hx,hy]) - #model = Model( [inpx,inpy],[score,corr]) - model = Model([inpx, inpy], score) - model.compile(loss='categorical_crossentropy', - optimizer="adadelta", metrics=['accuracy']) - return model, train_head, train_body - - -class Abs(Layer): - def __init__(self, **kwargs): - super(Abs, self).__init__(**kwargs) - - def call(self, x, mask=None): - return K.abs(x[0] - x[1]) - - def get_output_shape_for(self, input_shape): - return input_shape - - -def max_1d(X): - return K.max(X, axis=1) - - -def generate_features(stances, dataset, name): - h, b, y = [], [], [] - - for stance in stances: - y.append(LABELS.index(stance['Stance'])) - h.append(stance['Headline']) - b.append(dataset.articles[stance['Body ID']]) - - X_overlap = gen_or_load_feats( - word_overlap_features, h, b, "features/overlap."+name+".npy") - X_refuting = gen_or_load_feats( - refuting_features, h, b, "features/refuting."+name+".npy") - X_polarity = gen_or_load_feats( - polarity_features, h, b, "features/polarity."+name+".npy") - X_hand = gen_or_load_feats( - hand_features, h, b, "features/hand."+name+".npy") - - X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] - return X, y - - -def applyKFold(folds, hold_out, fold_stances, hold_out_stances): - Xs = dict() - ys = dict() - - # Load/Precompute all features now - X_holdout, y_holdout = generate_features(hold_out_stances, d, "holdout") - for fold in fold_stances: - Xs[fold], ys[fold] = generate_features( - fold_stances[fold], d, str(fold)) - - best_score = 0 - best_fold = None - - # Classifier for each fold - for fold in fold_stances: - ids = list(range(len(folds))) - del ids[fold] - - X_train = np.vstack(tuple([Xs[i] for i in ids])) - y_train = np.hstack(tuple([ys[i] for i in ids])) - - X_test = Xs[fold] - y_test = ys[fold] - - clf = GradientBoostingClassifier( - n_estimators=200, random_state=14128, verbose=True) - clf.fit(X_train, y_train) - - predicted = [LABELS[int(a)] for a in clf.predict(X_test)] - actual = [LABELS[int(a)] for a in y_test] - - fold_score, _ = score_submission(actual, predicted) - max_fold_score, _ = score_submission(actual, actual) - - score = fold_score/max_fold_score - - print("Score for fold " + str(fold) + " was - " + str(score)) - if score > best_score: - best_score = score - best_fold = clf - - # Run on Holdout set and report the final score on the holdout set - predicted = [LABELS[int(a)] for a in best_fold.predict(X_holdout)] - actual = [LABELS[int(a)] for a in y_holdout] - report_score(actual, predicted) - - -def word2vec_embedding_layer(embedding_matrix): - #weights = np.load('Word2Vec_QA.syn0.npy') - layer = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[ - embedding_matrix]) - return layer - - -def generateMatrix(obj, sent_Q, sent_A): - START = '$_START_$' - END = '$_END_$' - unk_token = '$_UNK_$' - #dimx = 100 - #dimy = 200 - sent1 = [] - #sent1_Q = ques_sent - #sent1_A = ans_sent - sent1.extend(sent_Q) - # sent.extend(ques_sent) - sent1.extend(sent_A) - #sent1 = [' '.join(i) for i in sent1] - # sent.extend(ans_sent) - sentence = ["%s %s %s" % (START, x, END) for x in sent1] - tokenize_sent = [regexp_tokenize(x, - pattern='\w+|$[\d\.]+|\S+') for x in sentence] - - # for i in index_to_word1: - # index_to_word.append(i) - # for key in word_to_index1.keys(): - # word_to_index[key] = word_to_index1[key] - - for i, sent in enumerate(tokenize_sent): - tokenize_sent[i] = [ - w if w in obj.word_to_index else unk_token for w in sent] - - len_train = len(sent_Q) - text = [] - for i in tokenize_sent: - text.extend(i) - - sentences_x = [] - sentences_y = [] - - # print 'here' - - for sent in tokenize_sent[0:len_train]: - temp = [START for i in range(dimx)] - for ind, word in enumerate(sent[0:dimx]): - temp[ind] = word - sentences_x.append(temp) - - for sent in tokenize_sent[len_train:]: - temp = [START for i in range(dimy)] - for ind, word in enumerate(sent[0:dimy]): - temp[ind] = word - sentences_y.append(temp) - - X_data = [] - for i in sentences_x: - temp = [] - for j in i: - temp.append(obj.word_to_index[j]) - temp = np.array(temp).T - X_data.append(temp) - - y_data = [] - for i in sentences_y: - temp = [] - for j in i: - temp.append(obj.word_to_index[j]) - temp = np.array(temp).T - y_data.append(temp) - X_data = np.array(X_data) - y_data = np.array(y_data) - return X_data, y_data - - -def load_data(file_head, file_body): - head = pd.read_csv(file_head) - body = pd.read_csv(file_body) - head_array = head.values - body_array = body.values - print('number of headlines : ', len(head_array)) - print('number of news body : ', len(body_array)) - labels = head_array[:, 2] - body_id = head_array[:, 1] - dataset_headLines = head_array[:, 0] - body_ds = [] - for i in range(len(head_array)): - for j in range(len(body_array)): - if body_array[j][0] == body_id[i]: - body_ds.append(body_array[j][1]) - break - dataset_body = np.array(body_ds) - return dataset_headLines, dataset_body, labels -# -*- coding: utf-8 -*- - -# -*- coding: utf-8 -*- -import sklearn -from fnc_libs import * - -d = DataSet() -folds, hold_out = kfold_split(d, n_folds=10) -fold_stances, hold_out_stances = get_stances_for_folds(d, folds, hold_out) - -wordVec_model = gen.models.KeyedVectors.load_word2vec_format( - "/fncdata1/GoogleNews-vectors-negative300.bin.gz", binary=True) - -filename = "/fncdata/train_bodies.csv" - -body = pd.read_csv(filename) -body_array = body.values -train_dh = [] -train_db = [] -train_ds = [] - -print("Generating train dataset for CNN") -for i in range(len(fold_stances)): - for j in range(len(fold_stances[i])): - train_dh.append(fold_stances[i][j]["Headline"]) - train_ds.append(fold_stances[i][j]["Stance"]) - -for i in range(len(fold_stances)): - for j in range(len(fold_stances[i])): - body_id = fold_stances[i][j]["Body ID"] - for m in range(len(body_array)): - if body_id == body_array[m][0]: - train_db.append(body_array[m][1]) - -print("Refining training dataset for CNN") -train_rdh = [] -for i in range(len(train_dh)): - sentence = "" - for char in train_dh[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - train_rdh.append(sentence) - -train_rdb = [] -for i in range(len(train_db)): - sentence = "" - for char in train_db[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - train_rdb.append(sentence) -train_rds = [] - -for i, j in enumerate(train_ds): - if j == "unrelated": - train_rds.append("2") - elif j == "agree": - train_rds.append("1") - elif j == "disagree": - train_rds.append("0") - elif j == "discuss": - train_rds.append("3") - -print("Generating test dataset for CNN") -''' -test_dh, test_db, test_ds = [],[],[] - -for i in range(len(hold_out_stances)): - test_dh.append(hold_out_stances[i]["Headline"]) - test_ds.append(hold_out_stances[i]["Stance"]) - - -for i in range(len(hold_out_stances)): - body_id = hold_out_stances[i]["Body ID"] - for m in range(len(body_array)): - if body_id == body_array[m][0]: - test_db.append(body_array[m][1]) -''' - -file_head = "/fncdata/competition_test_stances.csv" -file_body = "/fncdata/test_bodies.csv" - -test_dh, test_db, test_ds = load_data(file_head, file_body) - -print("Refining testing dataset for CNN") -test_rdh = [] -for i in range(len(test_dh)): - sentence = "" - for char in test_dh[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - test_rdh.append(sentence) - -test_rdb = [] -for i in range(len(test_db)): - sentence = "" - for char in test_db[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - test_rdb.append(sentence) - -obj = utility.sample() - -print("Training CNN") -model, tr_head, tr_body = trainCNN(obj, wordVec_model, train_rdh, train_rdb) -ts_head, ts_body = generateMatrix(obj, test_rdh, test_rdb) -Y_train = to_categorical(train_rds, 4) -model.fit([tr_head, tr_body], Y_train, nb_epoch=10, verbose=2) - -print('\n model trained....\n') - -predictions = model.predict([ts_head, ts_body]) -predictions = [i.argmax()for i in predictions] -predictions = np.array(predictions) -string_predicted = [] -for i, j in enumerate(predictions): - if j == 2: - string_predicted.append("unrelated") - elif j == 1: - string_predicted.append("agree") - elif j == 0: - string_predicted.append("disagree") - elif j == 3: - string_predicted.append("discuss") - -score = sklearn.metrics.accuracy_score(test_ds, string_predicted) -report_score(test_ds, string_predicted) -# -*- coding: utf-8 -*- -import sklearn -import numpy as np -import pandas as pd -from keras.utils.np_utils import to_categorical -import pandas as pd -from keras.layers.convolutional import Convolution1D -import utility -import warnings -from nltk.tokenize import regexp_tokenize -import numpy as np -import gensim as gen -import keras.backend as K -from keras.preprocessing import sequence -from keras.models import Sequential, Model -from keras.layers import Dense, Layer, Lambda, Dropout, Activation, Input, Merge, Multiply -from keras.layers import Embedding -from keras.layers import Conv1D, GlobalMaxPooling1D -from sklearn.ensemble import GradientBoostingClassifier -from feature_engineering import refuting_features, polarity_features, hand_features, gen_or_load_feats -from feature_engineering import word_overlap_features -from utils.dataset import DataSet -from utils.generate_test_splits import kfold_split, get_stances_for_folds -from utils.score import report_score, LABELS, score_submission - - -def max_1d(X): - return K.max(X, axis=1) - - -d = DataSet() -folds, hold_out = kfold_split(d, n_folds=10) -fold_stances, hold_out_stances = get_stances_for_folds(d, folds, hold_out) - -wordVec_model = gen.models.KeyedVectors.load_word2vec_format( - "/fncdata/GoogleNews-vectors-negative300.bin.gz", binary=True) - - -class Abs(Layer): - def __init__(self, **kwargs): - super(Abs, self).__init__(**kwargs) - - def call(self, x, mask=None): - return K.abs(x[0] - x[1]) - - def get_output_shape_for(self, input_shape): - return input_shape - - -def generate_features(stances, dataset, name): - h, b, y = [], [], [] - - for stance in stances: - y.append(LABELS.index(stance['Stance'])) - h.append(stance['Headline']) - b.append(dataset.articles[stance['Body ID']]) - - X_overlap = gen_or_load_feats( - word_overlap_features, h, b, "features/overlap."+name+".npy") - X_refuting = gen_or_load_feats( - refuting_features, h, b, "features/refuting."+name+".npy") - X_polarity = gen_or_load_feats( - polarity_features, h, b, "features/polarity."+name+".npy") - X_hand = gen_or_load_feats( - hand_features, h, b, "features/hand."+name+".npy") - - X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] - return X, y - - -def applyKFold(folds, hold_out, fold_stances, hold_out_stances): - Xs = dict() - ys = dict() - - # Load/Precompute all features now - X_holdout, y_holdout = generate_features(hold_out_stances, d, "holdout") - for fold in fold_stances: - Xs[fold], ys[fold] = generate_features( - fold_stances[fold], d, str(fold)) - - best_score = 0 - best_fold = None - - # Classifier for each fold - for fold in fold_stances: - ids = list(range(len(folds))) - del ids[fold] - - X_train = np.vstack(tuple([Xs[i] for i in ids])) - y_train = np.hstack(tuple([ys[i] for i in ids])) - - X_test = Xs[fold] - y_test = ys[fold] - - clf = GradientBoostingClassifier( - n_estimators=200, random_state=14128, verbose=True) - clf.fit(X_train, y_train) - - predicted = [LABELS[int(a)] for a in clf.predict(X_test)] - actual = [LABELS[int(a)] for a in y_test] - - fold_score, _ = score_submission(actual, predicted) - max_fold_score, _ = score_submission(actual, actual) - - score = fold_score/max_fold_score - - print("Score for fold " + str(fold) + " was - " + str(score)) - if score > best_score: - best_score = score - best_fold = clf - - # Run on Holdout set and report the final score on the holdout set - predicted = [LABELS[int(a)] for a in best_fold.predict(X_holdout)] - actual = [LABELS[int(a)] for a in y_holdout] - - report_score(actual, predicted) - - -def word2vec_embedding_layer(embedding_matrix): - #weights = np.load('Word2Vec_QA.syn0.npy') - layer = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[ - embedding_matrix]) - return layer - - -def trainCNN(obj, dataset_headLines, dataset_body): - embedding_dim = 300 - LSTM_neurons = 50 - dense_neuron = 16 - dimx = 100 - dimy = 200 - lamda = 0.0 - nb_filter = 100 - filter_length = 4 - vocab_size = 10000 - batch_size = 50 - epochs = 5 - ntn_out = 16 - ntn_in = nb_filter - state = False - - train_head, train_body, embedding_matrix = obj.process_data(sent_Q=dataset_headLines, - sent_A=dataset_body, dimx=dimx, dimy=dimy, - wordVec_model=wordVec_model) - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - #x = Embedding(output_dim=embedding_dim, input_dim=vocab_size, input_length=dimx)(inpx) - x = word2vec_embedding_layer(embedding_matrix)(inpx) - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - #y = Embedding(output_dim=embedding_dim, input_dim=vocab_size, input_length=dimy)(inpy) - y = word2vec_embedding_layer(embedding_matrix)(inpy) - ques = Convolution1D(nb_filter=nb_filter, filter_length=filter_length, - border_mode='valid', activation='relu', - subsample_length=1)(x) - - ans = Convolution1D(nb_filter=nb_filter, filter_length=filter_length, - border_mode='valid', activation='relu', - subsample_length=1)(y) - - #hx = Lambda(max_1d, output_shape=(nb_filter,))(ques) - #hy = Lambda(max_1d, output_shape=(nb_filter,))(ans) - hx = GlobalMaxPooling1D()(ques) - hy = GlobalMaxPooling1D()(ans) - #wordVec_model = [] - #h = Merge(mode="concat",name='h')([hx,hy]) - - h1 = Multiply()([hx, hy]) - h2 = Abs()([hx, hy]) - - h = Merge(mode="concat", name='h')([h1, h2]) - #h = NeuralTensorLayer(output_dim=1,input_dim=ntn_in)([hx,hy]) - #h = ntn_layer(ntn_in,ntn_out,activation=None)([hx,hy]) - #score = h - wrap = Dense(dense_neuron, activation='relu', name='wrap')(h) - #score = Dense(1,activation='sigmoid',name='score')(h) - #wrap = Dense(dense_neuron,activation='relu',name='wrap')(h) - score = Dense(4, activation='softmax', name='score')(wrap) - - # score=K.clip(score,1e-7,1.0-1e-7) - #corr = CorrelationRegularization(-lamda)([hx,hy]) - #model = Model( [inpx,inpy],[score,corr]) - model = Model([inpx, inpy], score) - model.compile(loss='categorical_crossentropy', - optimizer="adadelta", metrics=['accuracy']) - return model, train_head, train_body - - -def generateMatrix(obj, sent_Q, sent_A): - START = '$_START_$' - END = '$_END_$' - unk_token = '$_UNK_$' - dimx = 100 - dimy = 200 - sent1 = [] - #sent1_Q = ques_sent - #sent1_A = ans_sent - sent1.extend(sent_Q) - # sent.extend(ques_sent) - sent1.extend(sent_A) - #sent1 = [' '.join(i) for i in sent1] - # sent.extend(ans_sent) - sentence = ["%s %s %s" % (START, x, END) for x in sent1] - tokenize_sent = [regexp_tokenize(x, - pattern='\w+|$[\d\.]+|\S+') for x in sentence] - - # for i in index_to_word1: - # index_to_word.append(i) - # for key in word_to_index1.keys(): - # word_to_index[key] = word_to_index1[key] - - for i, sent in enumerate(tokenize_sent): - tokenize_sent[i] = [ - w if w in obj.word_to_index else unk_token for w in sent] - - len_train = len(sent_Q) - text = [] - for i in tokenize_sent: - text.extend(i) - - sentences_x = [] - sentences_y = [] - - # print 'here' - - for sent in tokenize_sent[0:len_train]: - temp = [START for i in range(dimx)] - for ind, word in enumerate(sent[0:dimx]): - temp[ind] = word - sentences_x.append(temp) - - for sent in tokenize_sent[len_train:]: - temp = [START for i in range(dimy)] - for ind, word in enumerate(sent[0:dimy]): - temp[ind] = word - sentences_y.append(temp) - - X_data = [] - for i in sentences_x: - temp = [] - for j in i: - temp.append(obj.word_to_index[j]) - temp = np.array(temp).T - X_data.append(temp) - - y_data = [] - for i in sentences_y: - temp = [] - for j in i: - temp.append(obj.word_to_index[j]) - temp = np.array(temp).T - y_data.append(temp) - X_data = np.array(X_data) - y_data = np.array(y_data) - return X_data, y_data - -#print("Applying FNC K fold algorithm") -#applyKFold(folds, hold_out, fold_stances, hold_out_stances) - - -filename = "/fncdata/train_bodies.csv" -body = pd.read_csv(filename) -body_array = body.values -train_dh = [] -train_db = [] -train_ds = [] - -print("Generating train dataset for CNN") -for i in range(len(fold_stances)): - for j in range(len(fold_stances[i])): - train_dh.append(fold_stances[i][j]["Headline"]) - train_ds.append(fold_stances[i][j]["Stance"]) - -for i in range(len(fold_stances)): - for j in range(len(fold_stances[i])): - body_id = fold_stances[i][j]["Body ID"] - for m in range(len(body_array)): - if body_id == body_array[m][0]: - train_db.append(body_array[m][1]) - -print("Refining training dataset for CNN") -train_rdh = [] -for i in range(len(train_dh)): - sentence = "" - for char in train_dh[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - train_rdh.append(sentence) - -train_rdb = [] -for i in range(len(train_db)): - sentence = "" - for char in train_db[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - train_rdb.append(sentence) -train_rds = [] - -for i, j in enumerate(train_ds): - if j == "unrelated": - train_rds.append("2") - elif j == "agree": - train_rds.append("1") - elif j == "disagree": - train_rds.append("0") - elif j == "discuss": - train_rds.append("3") - -test_dh = [] -test_db = [] -test_ds = [] -print("Generating test dataset for CNN") -for i in range(len(hold_out_stances)): - test_dh.append(hold_out_stances[i]["Headline"]) - test_ds.append(hold_out_stances[i]["Stance"]) - - -for i in range(len(hold_out_stances)): - body_id = hold_out_stances[i]["Body ID"] - for m in range(len(body_array)): - if body_id == body_array[m][0]: - test_db.append(body_array[m][1]) - -print("Refining testing dataset for CNN") -test_rdh = [] -for i in range(len(test_dh)): - sentence = "" - for char in test_dh[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - test_rdh.append(sentence) - -test_rdb = [] -for i in range(len(test_db)): - sentence = "" - for char in test_db[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - test_rdb.append(sentence) - -obj = utility.sample() - -print("Training CNN") -model, tr_head, tr_body = trainCNN(obj, train_rdh, train_rdb) -ts_head, ts_body = generateMatrix(obj, test_rdh, test_rdb) -Y_train = to_categorical(train_rds, 4) -model.fit([tr_head, tr_body], Y_train, nb_epoch=4, verbose=2) - -print('\n model trained....\n') - -predictions = model.predict([ts_head, ts_body]) -predictions = [i.argmax()for i in predictions] -predictions = np.array(predictions) -string_predicted = [] -for i, j in enumerate(predictions): - if j == 2: - string_predicted.append("unrelated") - elif j == 1: - string_predicted.append("agree") - elif j == 0: - string_predicted.append("disagree") - elif j == 3: - string_predicted.append("discuss") - -score = sklearn.metrics.accuracy_score(test_ds, string_predicted) -report_score(test_ds, string_predicted) -# -*- coding: utf-8 -*- - -# -*- coding: utf-8 -*- -import sklearn -import numpy as np -import pandas as pd -from keras.utils.np_utils import to_categorical -import pandas as pd -from keras.layers.convolutional import Convolution1D -import utility -import warnings -from nltk.tokenize import regexp_tokenize -import numpy as np -import gensim as gen -import keras.backend as K -from keras.preprocessing import sequence -from keras.models import Sequential, Model -from keras.layers import Dense, Layer, Lambda, Dropout, Activation, Input, Merge, Multiply, Bidirectional, LSTM -from keras.layers import Embedding, Flatten -from keras.layers import Conv1D, GlobalMaxPooling1D -from sklearn.ensemble import GradientBoostingClassifier -from feature_engineering import refuting_features, polarity_features, hand_features, gen_or_load_feats -from feature_engineering import word_overlap_features -from utils.dataset import DataSet -from utils.generate_test_splits import kfold_split, get_stances_for_folds -from utils.score import report_score, LABELS, score_submission - -dimx = 100 -dimy = 100 - - -def max_1d(X): - return K.max(X, axis=1) - - -d = DataSet() -folds, hold_out = kfold_split(d, n_folds=10) -fold_stances, hold_out_stances = get_stances_for_folds(d, folds, hold_out) - -wordVec_model = gen.models.KeyedVectors.load_word2vec_format( - "/fncdata/GoogleNews-vectors-negative300.bin.gz", binary=True) - - -class Abs(Layer): - def __init__(self, **kwargs): - super(Abs, self).__init__(**kwargs) - - def call(self, x, mask=None): - return K.abs(x[0] - x[1]) - - def get_output_shape_for(self, input_shape): - return input_shape - - -def generate_features(stances, dataset, name): - h, b, y = [], [], [] - - for stance in stances: - y.append(LABELS.index(stance['Stance'])) - h.append(stance['Headline']) - b.append(dataset.articles[stance['Body ID']]) - - X_overlap = gen_or_load_feats( - word_overlap_features, h, b, "features/overlap."+name+".npy") - X_refuting = gen_or_load_feats( - refuting_features, h, b, "features/refuting."+name+".npy") - X_polarity = gen_or_load_feats( - polarity_features, h, b, "features/polarity."+name+".npy") - X_hand = gen_or_load_feats( - hand_features, h, b, "features/hand."+name+".npy") - - X = np.c_[X_hand, X_polarity, X_refuting, X_overlap] - return X, y - - -def applyKFold(folds, hold_out, fold_stances, hold_out_stances): - Xs = dict() - ys = dict() - - # Load/Precompute all features now - X_holdout, y_holdout = generate_features(hold_out_stances, d, "holdout") - for fold in fold_stances: - Xs[fold], ys[fold] = generate_features( - fold_stances[fold], d, str(fold)) - - best_score = 0 - best_fold = None - - # Classifier for each fold - for fold in fold_stances: - ids = list(range(len(folds))) - del ids[fold] - - X_train = np.vstack(tuple([Xs[i] for i in ids])) - y_train = np.hstack(tuple([ys[i] for i in ids])) - - X_test = Xs[fold] - y_test = ys[fold] - - clf = GradientBoostingClassifier( - n_estimators=200, random_state=14128, verbose=True) - clf.fit(X_train, y_train) - - predicted = [LABELS[int(a)] for a in clf.predict(X_test)] - actual = [LABELS[int(a)] for a in y_test] - - fold_score, _ = score_submission(actual, predicted) - max_fold_score, _ = score_submission(actual, actual) - - score = fold_score/max_fold_score - - print("Score for fold " + str(fold) + " was - " + str(score)) - if score > best_score: - best_score = score - best_fold = clf - - # Run on Holdout set and report the final score on the holdout set - predicted = [LABELS[int(a)] for a in best_fold.predict(X_holdout)] - actual = [LABELS[int(a)] for a in y_holdout] - - report_score(actual, predicted) - - -def word2vec_embedding_layer(embedding_matrix): - #weights = np.load('Word2Vec_QA.syn0.npy') - layer = Embedding(input_dim=embedding_matrix.shape[0], output_dim=embedding_matrix.shape[1], weights=[ - embedding_matrix]) - return layer - - -def trainCNN(obj, dataset_headLines, dataset_body): - embedding_dim = 300 - LSTM_neurons = 50 - dense_neuron = 16 - lamda = 0.0 - nb_filter = 100 - filter_length = 4 - vocab_size = 10000 - batch_size = 50 - epochs = 5 - ntn_out = 16 - ntn_in = nb_filter - state = False - - train_head, train_body, embedding_matrix = obj.process_data(sent_Q=dataset_headLines, - sent_A=dataset_body, dimx=dimx, dimy=dimy, - wordVec_model=wordVec_model) - inpx = Input(shape=(dimx,), dtype='int32', name='inpx') - #x = Embedding(output_dim=embedding_dim, input_dim=vocab_size, input_length=dimx)(inpx) - x = word2vec_embedding_layer(embedding_matrix)(inpx) - inpy = Input(shape=(dimy,), dtype='int32', name='inpy') - #y = Embedding(output_dim=embedding_dim, input_dim=vocab_size, input_length=dimy)(inpy) - y = word2vec_embedding_layer(embedding_matrix)(inpy) - '''ques = Convolution1D(nb_filter=nb_filter, filter_length=filter_length, - border_mode='valid', activation='relu', - subsample_length=1)(x) - - ans = Convolution1D(nb_filter=nb_filter, filter_length=filter_length, - border_mode='valid', activation='relu', - subsample_length=1)(y) - - #hx = Lambda(max_1d, output_shape=(nb_filter,))(ques) - #hy = Lambda(max_1d, output_shape=(nb_filter,))(ans) - hx = GlobalMaxPooling1D()(ques) - hy = GlobalMaxPooling1D()(ans) - #wordVec_model = [] - #h = Merge(mode="concat",name='h')([hx,hy]) - - h1 = Multiply()([hx,hy]) - h2 = Abs()([hx,hy]) - h = Merge(mode="concat",name='h')([h1,h2]) - ''' - - shared_lstm = Bidirectional( - LSTM(LSTM_neurons, return_sequences=True), merge_mode='sum') - #shared_lstm = LSTM(LSTM_neurons,return_sequences=True) - hx = shared_lstm(x) - #hx = Dropout(0.2)(hx) - hy = shared_lstm(y) - #hy = Dropout(0.2)(hy) - - #corr = CorrelationRegularization(-lamda)([hx,hy]) - - h1 = Flatten()(hx) - h2 = Flatten()(hy) - hx1 = Multiply()([h1, h2]) - hx2 = Abs()([h1, h2]) - h = Merge(mode="concat", name='h')([hx1, hx2]) - - #h = NeuralTensorLayer(output_dim=1,input_dim=ntn_in)([hx,hy]) - #h = ntn_layer(ntn_in,ntn_out,activation=None)([hx,hy]) - #score = h - wrap = Dense(dense_neuron, activation='relu', name='wrap')(h) - #score = Dense(1,activation='sigmoid',name='score')(h) - #wrap = Dense(dense_neuron,activation='relu',name='wrap')(h) - score = Dense(4, activation='softmax', name='score')(wrap) - - # score=K.clip(score,1e-7,1.0-1e-7) - #corr = CorrelationRegularization(-lamda)([hx,hy]) - #model = Model( [inpx,inpy],[score,corr]) - model = Model([inpx, inpy], score) - model.compile(loss='categorical_crossentropy', - optimizer="adadelta", metrics=['accuracy']) - return model, train_head, train_body - - -def generateMatrix(obj, sent_Q, sent_A): - START = '$_START_$' - END = '$_END_$' - unk_token = '$_UNK_$' - #dimx = 100 - #dimy = 200 - sent1 = [] - #sent1_Q = ques_sent - #sent1_A = ans_sent - sent1.extend(sent_Q) - # sent.extend(ques_sent) - sent1.extend(sent_A) - #sent1 = [' '.join(i) for i in sent1] - # sent.extend(ans_sent) - sentence = ["%s %s %s" % (START, x, END) for x in sent1] - tokenize_sent = [regexp_tokenize(x, - pattern='\w+|$[\d\.]+|\S+') for x in sentence] - - # for i in index_to_word1: - # index_to_word.append(i) - # for key in word_to_index1.keys(): - # word_to_index[key] = word_to_index1[key] - - for i, sent in enumerate(tokenize_sent): - tokenize_sent[i] = [ - w if w in obj.word_to_index else unk_token for w in sent] - - len_train = len(sent_Q) - text = [] - for i in tokenize_sent: - text.extend(i) - - sentences_x = [] - sentences_y = [] - - # print 'here' - - for sent in tokenize_sent[0:len_train]: - temp = [START for i in range(dimx)] - for ind, word in enumerate(sent[0:dimx]): - temp[ind] = word - sentences_x.append(temp) - - for sent in tokenize_sent[len_train:]: - temp = [START for i in range(dimy)] - for ind, word in enumerate(sent[0:dimy]): - temp[ind] = word - sentences_y.append(temp) - - X_data = [] - for i in sentences_x: - temp = [] - for j in i: - temp.append(obj.word_to_index[j]) - temp = np.array(temp).T - X_data.append(temp) - - y_data = [] - for i in sentences_y: - temp = [] - for j in i: - temp.append(obj.word_to_index[j]) - temp = np.array(temp).T - y_data.append(temp) - X_data = np.array(X_data) - y_data = np.array(y_data) - return X_data, y_data - -#print("Applying FNC K fold algorithm") -#applyKFold(folds, hold_out, fold_stances, hold_out_stances) - - -filename = "/fncdata/train_bodies.csv" -body = pd.read_csv(filename) -body_array = body.values -train_dh = [] -train_db = [] -train_ds = [] - -print("Generating train dataset for CNN") -for i in range(len(fold_stances)): - for j in range(len(fold_stances[i])): - train_dh.append(fold_stances[i][j]["Headline"]) - train_ds.append(fold_stances[i][j]["Stance"]) - -for i in range(len(fold_stances)): - for j in range(len(fold_stances[i])): - body_id = fold_stances[i][j]["Body ID"] - for m in range(len(body_array)): - if body_id == body_array[m][0]: - train_db.append(body_array[m][1]) - -print("Refining training dataset for CNN") -train_rdh = [] -for i in range(len(train_dh)): - sentence = "" - for char in train_dh[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - train_rdh.append(sentence) - -train_rdb = [] -for i in range(len(train_db)): - sentence = "" - for char in train_db[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - train_rdb.append(sentence) -train_rds = [] - -for i, j in enumerate(train_ds): - if j == "unrelated": - train_rds.append("2") - elif j == "agree": - train_rds.append("1") - elif j == "disagree": - train_rds.append("0") - elif j == "discuss": - train_rds.append("3") - -test_dh = [] -test_db = [] -test_ds = [] -print("Generating test dataset for CNN") -for i in range(len(hold_out_stances)): - test_dh.append(hold_out_stances[i]["Headline"]) - test_ds.append(hold_out_stances[i]["Stance"]) - - -for i in range(len(hold_out_stances)): - body_id = hold_out_stances[i]["Body ID"] - for m in range(len(body_array)): - if body_id == body_array[m][0]: - test_db.append(body_array[m][1]) - -print("Refining testing dataset for CNN") -test_rdh = [] -for i in range(len(test_dh)): - sentence = "" - for char in test_dh[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - test_rdh.append(sentence) - -test_rdb = [] -for i in range(len(test_db)): - sentence = "" - for char in test_db[i]: - if char.isalpha() or char == ' ': - sentence += char.lower() - else: - sentence += ' ' - test_rdb.append(sentence) - -obj = utility.sample() - -print("Training CNN") -model, tr_head, tr_body = trainCNN(obj, train_rdh, train_rdb) -ts_head, ts_body = generateMatrix(obj, test_rdh, test_rdb) -Y_train = to_categorical(train_rds, 4) -model.fit([tr_head, tr_body], Y_train, nb_epoch=3, verbose=2) - -print('\n model trained....\n') - -predictions = model.predict([ts_head, ts_body]) -predictions = [i.argmax()for i in predictions] -predictions = np.array(predictions) -string_predicted = [] -for i, j in enumerate(predictions): - if j == 2: - string_predicted.append("unrelated") - elif j == 1: - string_predicted.append("agree") - elif j == 0: - string_predicted.append("disagree") - elif j == 3: - string_predicted.append("discuss") - -score = sklearn.metrics.accuracy_score(test_ds, string_predicted) -report_score(test_ds, string_predicted) -from __future__ import division -import numpy as np - -__author__ = "Eric Chiang" -__email__ = "eric[at]yhathq.com" - -""" - -Measurements inspired by Philip Tetlock's "Expert Political Judgment" - -Equations take from Yaniv, Yates, & Smith (1991): - "Measures of Descrimination Skill in Probabilistic Judgement" - -""" - - -def calibration(prob, outcome, n_bins=10): - """Calibration measurement for a set of predictions. - - When predicting events at a given probability, how far is frequency - of positive outcomes from that probability? - NOTE: Lower scores are better - - prob: array_like, float - Probability estimates for a set of events - - outcome: array_like, bool - If event predicted occurred - - n_bins: int - Number of judgement categories to prefrom calculation over. - Prediction are binned based on probability, since "descrete" - probabilities aren't required. - - """ - prob = np.array(prob) - outcome = np.array(outcome) - - c = 0.0 - # Construct bins - judgement_bins = np.arange(n_bins + 1) / n_bins - # Which bin is each prediction in? - bin_num = np.digitize(prob, judgement_bins) - for j_bin in np.unique(bin_num): - # Is event in bin - in_bin = bin_num == j_bin - # Predicted probability taken as average of preds in bin - predicted_prob = np.mean(prob[in_bin]) - # How often did events in this bin actually happen? - true_bin_prob = np.mean(outcome[in_bin]) - # Squared distance between predicted and true times num of obs - c += np.sum(in_bin) * ((predicted_prob - true_bin_prob) ** 2) - return c / len(prob) - - -def discrimination(prob, outcome, n_bins=10): - """Discrimination measurement for a set of predictions. - - For each judgement category, how far from the base probability - is the true frequency of that bin? - NOTE: High scores are better - - prob: array_like, float - Probability estimates for a set of events - - outcome: array_like, bool - If event predicted occurred - - n_bins: int - Number of judgement categories to prefrom calculation over. - Prediction are binned based on probability, since "descrete" - probabilities aren't required. - - """ - prob = np.array(prob) - outcome = np.array(outcome) - - d = 0.0 - # Base frequency of outcomes - base_prob = np.mean(outcome) - # Construct bins - judgement_bins = np.arange(n_bins + 1) / n_bins - # Which bin is each prediction in? - bin_num = np.digitize(prob, judgement_bins) - for j_bin in np.unique(bin_num): - in_bin = bin_num == j_bin - true_bin_prob = np.mean(outcome[in_bin]) - # Squared distance between true and base times num of obs - d += np.sum(in_bin) * ((true_bin_prob - base_prob) ** 2) - return d / len(prob) - -import time -from mrjob.job import MRJob -from mrjob.protocol import RawValueProtocol, ReprProtocol -import re - - -class MrS3LogParser(MRJob): - """Parses the logs from S3 based on the S3 logging format: - http://docs.aws.amazon.com/AmazonS3/latest/dev/LogFormat.html - - Aggregates a user's daily requests by user agent and operation - - Outputs date_time, requester, user_agent, operation, count - """ - - LOGPATS = r'(\S+) (\S+) \[(.*?)\] (\S+) (\S+) ' \ - r'(\S+) (\S+) (\S+) ("([^"]+)"|-) ' \ - r'(\S+) (\S+) (\S+) (\S+) (\S+) (\S+) ' \ - r'("([^"]+)"|-) ("([^"]+)"|-)' - NUM_ENTRIES_PER_LINE = 17 - logpat = re.compile(LOGPATS) - - (S3_LOG_BUCKET_OWNER, - S3_LOG_BUCKET, - S3_LOG_DATE_TIME, - S3_LOG_IP, - S3_LOG_REQUESTER_ID, - S3_LOG_REQUEST_ID, - S3_LOG_OPERATION, - S3_LOG_KEY, - S3_LOG_HTTP_METHOD, - S3_LOG_HTTP_STATUS, - S3_LOG_S3_ERROR, - S3_LOG_BYTES_SENT, - S3_LOG_OBJECT_SIZE, - S3_LOG_TOTAL_TIME, - S3_LOG_TURN_AROUND_TIME, - S3_LOG_REFERER, - S3_LOG_USER_AGENT) = range(NUM_ENTRIES_PER_LINE) - - DELIMITER = '\t' - - # We use RawValueProtocol for input to be format agnostic - # and avoid any type of parsing errors - INPUT_PROTOCOL = RawValueProtocol - - # We use RawValueProtocol for output so we can output raw lines - # instead of (k, v) pairs - OUTPUT_PROTOCOL = RawValueProtocol - - # Encode the intermediate records using repr() instead of JSON, so the - # record doesn't get Unicode-encoded - INTERNAL_PROTOCOL = ReprProtocol - - def clean_date_time_zone(self, raw_date_time_zone): - """Converts entry 22/Jul/2013:21:04:17 +0000 to the format - 'YYYY-MM-DD HH:MM:SS' which is more suitable for loading into - a database such as Redshift or RDS - - Note: requires the chars "[ ]" to be stripped prior to input - Returns the converted datetime annd timezone - or None for both values if failed - - TODO: Needs to combine timezone with date as one field - """ - date_time = None - time_zone_parsed = None - - # TODO: Probably cleaner to parse this with a regex - date_parsed = raw_date_time_zone[:raw_date_time_zone.find(":")] - time_parsed = raw_date_time_zone[raw_date_time_zone.find(":") + 1: - raw_date_time_zone.find("+") - 1] - time_zone_parsed = raw_date_time_zone[raw_date_time_zone.find("+"):] - - try: - date_struct = time.strptime(date_parsed, "%d/%b/%Y") - converted_date = time.strftime("%Y-%m-%d", date_struct) - date_time = converted_date + " " + time_parsed - - # Throws a ValueError exception if the operation fails that is - # caught by the calling function and is handled appropriately - except ValueError as error: - raise ValueError(error) - else: - return converted_date, date_time, time_zone_parsed - - def mapper(self, _, line): - line = line.strip() - match = self.logpat.search(line) - - date_time = None - requester = None - user_agent = None - operation = None - - try: - for n in range(self.NUM_ENTRIES_PER_LINE): - group = match.group(1 + n) - - if n == self.S3_LOG_DATE_TIME: - date, date_time, time_zone_parsed = \ - self.clean_date_time_zone(group) - # Leave the following line of code if - # you want to aggregate by date - date_time = date + " 00:00:00" - elif n == self.S3_LOG_REQUESTER_ID: - requester = group - elif n == self.S3_LOG_USER_AGENT: - user_agent = group - elif n == self.S3_LOG_OPERATION: - operation = group - else: - pass - - except Exception: - yield (("Error while parsing line: %s", line), 1) - else: - yield ((date_time, requester, user_agent, operation), 1) - - def reducer(self, key, values): - output = list(key) - output = self.DELIMITER.join(output) + \ - self.DELIMITER + \ - str(sum(values)) - - yield None, output - - def steps(self): - return [ - self.mr(mapper=self.mapper, - reducer=self.reducer) - ] - - -if __name__ == '__main__': - MrS3LogParser.run() - -from StringIO import StringIO -import unittest2 as unittest -from mr_s3_log_parser import MrS3LogParser - - -class MrTestsUtil: - - def run_mr_sandbox(self, mr_job, stdin): - # inline runs the job in the same process so small jobs tend to - # run faster and stack traces are simpler - # --no-conf prevents options from local mrjob.conf from polluting - # the testing environment - # "-" reads from standard in - mr_job.sandbox(stdin=stdin) - - # make_runner ensures job cleanup is performed regardless of - # success or failure - with mr_job.make_runner() as runner: - runner.run() - for line in runner.stream_output(): - key, value = mr_job.parse_output_line(line) - yield value - - -class TestMrS3LogParser(unittest.TestCase): - - mr_job = None - mr_tests_util = None - - RAW_LOG_LINE_INVALID = \ - '00000fe9688b6e57f75bd2b7f7c1610689e8f01000000' \ - '00000388225bcc00000 ' \ - 's3-storage [22/Jul/2013:21:03:27 +0000] ' \ - '00.111.222.33 ' \ - - RAW_LOG_LINE_VALID = \ - '00000fe9688b6e57f75bd2b7f7c1610689e8f01000000' \ - '00000388225bcc00000 ' \ - 's3-storage [22/Jul/2013:21:03:27 +0000] ' \ - '00.111.222.33 ' \ - 'arn:aws:sts::000005646931:federated-user/user 00000AB825500000 ' \ - 'REST.HEAD.OBJECT user/file.pdf ' \ - '"HEAD /user/file.pdf?versionId=00000XMHZJp6DjM9x500000' \ - '00000SDZk ' \ - 'HTTP/1.1" 200 - - 4000272 18 - "-" ' \ - '"Boto/2.5.1 (darwin) USER-AGENT/1.0.14.0" ' \ - '00000XMHZJp6DjM9x5JVEAMo8MG00000' - - DATE_TIME_ZONE_INVALID = "AB/Jul/2013:21:04:17 +0000" - DATE_TIME_ZONE_VALID = "22/Jul/2013:21:04:17 +0000" - DATE_VALID = "2013-07-22" - DATE_TIME_VALID = "2013-07-22 21:04:17" - TIME_ZONE_VALID = "+0000" - - def __init__(self, *args, **kwargs): - super(TestMrS3LogParser, self).__init__(*args, **kwargs) - self.mr_job = MrS3LogParser(['-r', 'inline', '--no-conf', '-']) - self.mr_tests_util = MrTestsUtil() - - def test_invalid_log_lines(self): - stdin = StringIO(self.RAW_LOG_LINE_INVALID) - - for result in self.mr_tests_util.run_mr_sandbox(self.mr_job, stdin): - self.assertEqual(result.find("Error"), 0) - - def test_valid_log_lines(self): - stdin = StringIO(self.RAW_LOG_LINE_VALID) - - for result in self.mr_tests_util.run_mr_sandbox(self.mr_job, stdin): - self.assertEqual(result.find("Error"), -1) - - def test_clean_date_time_zone(self): - date, date_time, time_zone_parsed = \ - self.mr_job.clean_date_time_zone(self.DATE_TIME_ZONE_VALID) - self.assertEqual(date, self.DATE_VALID) - self.assertEqual(date_time, self.DATE_TIME_VALID) - self.assertEqual(time_zone_parsed, self.TIME_ZONE_VALID) - - # Use a lambda to delay the calling of clean_date_time_zone so that - # assertRaises has enough time to handle it properly - self.assertRaises(ValueError, - lambda: self.mr_job.clean_date_time_zone( - self.DATE_TIME_ZONE_INVALID)) - - -if __name__ == '__main__': - unittest.main() -import re - - -class TransformUtil: - - @classmethod - def remove_punctuation(cls, value): - """Removes !, #, and ?. - """ - return re.sub('[!#?]', '', value) - - @classmethod - def clean_strings(cls, strings, ops): - """General purpose method to clean strings. - - Pass in a sequence of strings and the operations to perform. - """ - result = [] - for value in strings: - for function in ops: - value = function(value) - result.append(value) - return result -class TypeUtil: - - @classmethod - def is_iterable(cls, obj): - """Determines if obj is iterable. - - Useful when writing functions that can accept multiple types of - input (list, tuple, ndarray, iterator). Pairs well with - convert_to_list. - """ - try: - iter(obj) - return True - except TypeError: - return False - - @classmethod - def convert_to_list(cls, obj): - """Converts obj to a list if it is not a list and it is iterable, - else returns the original obj. - """ - if not isinstance(obj, list) and cls.is_iterable(obj): - obj = list(obj) - return obj -"""This file contains code used in "Think Stats", -by Allen B. Downey, available from greenteapress.com - -Copyright 2014 Allen B. Downey -License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html -""" - -from __future__ import print_function - -import math -import numpy as np - -import nsfg -import thinkstats2 -import thinkplot - - -def MakeFrames(): - """Reads pregnancy data and partitions first babies and others. - - returns: DataFrames (all live births, first babies, others) - """ - preg = nsfg.ReadFemPreg() - - live = preg[preg.outcome == 1] - firsts = live[live.birthord == 1] - others = live[live.birthord != 1] - - assert len(live) == 9148 - assert len(firsts) == 4413 - assert len(others) == 4735 - - return live, firsts, others - - -def Summarize(live, firsts, others): - """Print various summary statistics.""" - - mean = live.prglngth.mean() - var = live.prglngth.var() - std = live.prglngth.std() - - print('Live mean', mean) - print('Live variance', var) - print('Live std', std) - - mean1 = firsts.prglngth.mean() - mean2 = others.prglngth.mean() - - var1 = firsts.prglngth.var() - var2 = others.prglngth.var() - - print('Mean') - print('First babies', mean1) - print('Others', mean2) - - print('Variance') - print('First babies', var1) - print('Others', var2) - - print('Difference in weeks', mean1 - mean2) - print('Difference in hours', (mean1 - mean2) * 7 * 24) - - print('Difference relative to 39 weeks', (mean1 - mean2) / 39 * 100) - - d = thinkstats2.CohenEffectSize(firsts.prglngth, others.prglngth) - print('Cohen d', d) - - -def PrintExtremes(live): - """Plots the histogram of pregnancy lengths and prints the extremes. - - live: DataFrame of live births - """ - hist = thinkstats2.Hist(live.prglngth) - thinkplot.Hist(hist, label='live births') - - thinkplot.Save(root='first_nsfg_hist_live', - title='Histogram', - xlabel='weeks', - ylabel='frequency') - - print('Shortest lengths:') - for weeks, freq in hist.Smallest(10): - print(weeks, freq) - - print('Longest lengths:') - for weeks, freq in hist.Largest(10): - print(weeks, freq) - - -def MakeHists(live): - """Plot Hists for live births - - live: DataFrame - others: DataFrame - """ - hist = thinkstats2.Hist(live.birthwgt_lb, label='birthwgt_lb') - thinkplot.Hist(hist) - thinkplot.Save(root='first_wgt_lb_hist', - xlabel='pounds', - ylabel='frequency', - axis=[-1, 14, 0, 3200]) - - hist = thinkstats2.Hist(live.birthwgt_oz, label='birthwgt_oz') - thinkplot.Hist(hist) - thinkplot.Save(root='first_wgt_oz_hist', - xlabel='ounces', - ylabel='frequency', - axis=[-1, 16, 0, 1200]) - - hist = thinkstats2.Hist(np.floor(live.agepreg), label='agepreg') - thinkplot.Hist(hist) - thinkplot.Save(root='first_agepreg_hist', - xlabel='years', - ylabel='frequency') - - hist = thinkstats2.Hist(live.prglngth, label='prglngth') - thinkplot.Hist(hist) - thinkplot.Save(root='first_prglngth_hist', - xlabel='weeks', - ylabel='frequency', - axis=[-1, 53, 0, 5000]) - - -def MakeComparison(firsts, others): - """Plots histograms of pregnancy length for first babies and others. - - firsts: DataFrame - others: DataFrame - """ - first_hist = thinkstats2.Hist(firsts.prglngth, label='first') - other_hist = thinkstats2.Hist(others.prglngth, label='other') - - width = 0.45 - thinkplot.PrePlot(2) - thinkplot.Hist(first_hist, align='right', width=width) - thinkplot.Hist(other_hist, align='left', width=width) - - thinkplot.Save(root='first_nsfg_hist', - title='Histogram', - xlabel='weeks', - ylabel='frequency', - axis=[27, 46, 0, 2700]) - - -def main(script): - live, firsts, others = MakeFrames() - - MakeHists(live) - PrintExtremes(live) - MakeComparison(firsts, others) - Summarize(live, firsts, others) - - -if __name__ == '__main__': - import sys - main(*sys.argv) -"""This file contains code for use with "Think Stats", -by Allen B. Downey, available from greenteapress.com - -Copyright 2010 Allen B. Downey -License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html -""" - -from __future__ import print_function - -from collections import defaultdict -import numpy as np -import sys - -import thinkstats2 - - -def ReadFemPreg(dct_file='2002FemPreg.dct', - dat_file='2002FemPreg.dat.gz'): - """Reads the NSFG pregnancy data. - - dct_file: string file name - dat_file: string file name - - returns: DataFrame - """ - dct = thinkstats2.ReadStataDct(dct_file) - df = dct.ReadFixedWidth(dat_file, compression='gzip') - CleanFemPreg(df) - return df - - -def CleanFemPreg(df): - """Recodes variables from the pregnancy frame. - - df: DataFrame - """ - # mother's age is encoded in centiyears; convert to years - df.agepreg /= 100.0 - - # birthwgt_lb contains at least one bogus value (51 lbs) - # replace with NaN - df.birthwgt_lb[df.birthwgt_lb > 20] = np.nan - - # replace 'not ascertained', 'refused', 'don't know' with NaN - na_vals = [97, 98, 99] - df.birthwgt_lb.replace(na_vals, np.nan, inplace=True) - df.birthwgt_oz.replace(na_vals, np.nan, inplace=True) - df.hpagelb.replace(na_vals, np.nan, inplace=True) - - df.babysex.replace([7, 9], np.nan, inplace=True) - df.nbrnaliv.replace([9], np.nan, inplace=True) - - # birthweight is stored in two columns, lbs and oz. - # convert to a single column in lb - # NOTE: creating a new column requires dictionary syntax, - # not attribute assignment (like df.totalwgt_lb) - df['totalwgt_lb'] = df.birthwgt_lb + df.birthwgt_oz / 16.0 - - # due to a bug in ReadStataDct, the last variable gets clipped; - # so for now set it to NaN - df.cmintvw = np.nan - - -def MakePregMap(df): - """Make a map from caseid to list of preg indices. - - df: DataFrame - - returns: dict that maps from caseid to list of indices into preg df - """ - d = defaultdict(list) - for index, caseid in df.caseid.iteritems(): - d[caseid].append(index) - return d - - -def main(script): - """Tests the functions in this module. - - script: string script name - """ - df = ReadFemPreg() - print(df.shape) - - assert len(df) == 13593 - - assert df.caseid[13592] == 12571 - assert df.pregordr.value_counts()[1] == 5033 - assert df.nbrnaliv.value_counts()[1] == 8981 - assert df.babysex.value_counts()[1] == 4641 - assert df.birthwgt_lb.value_counts()[7] == 3049 - assert df.birthwgt_oz.value_counts()[0] == 1037 - assert df.prglngth.value_counts()[39] == 4744 - assert df.outcome.value_counts()[1] == 9148 - assert df.birthord.value_counts()[1] == 4413 - assert df.agepreg.value_counts()[22.75] == 100 - assert df.totalwgt_lb.value_counts()[7.5] == 302 - - weights = df.finalwgt.value_counts() - key = max(weights.keys()) - assert df.finalwgt.value_counts()[key] == 6 - - print('%s: All tests passed.' % script) - - -if __name__ == '__main__': - main(*sys.argv) -"""This file contains code for use with "Think Stats", -by Allen B. Downey, available from greenteapress.com - -Copyright 2014 Allen B. Downey -License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html -""" - -from __future__ import print_function - -import math -import matplotlib -import matplotlib.pyplot as pyplot -import numpy as np -import pandas - -import warnings - -# customize some matplotlib attributes -#matplotlib.rc('figure', figsize=(4, 3)) - -#matplotlib.rc('font', size=14.0) -#matplotlib.rc('axes', labelsize=22.0, titlesize=22.0) -#matplotlib.rc('legend', fontsize=20.0) - -#matplotlib.rc('xtick.major', size=6.0) -#matplotlib.rc('xtick.minor', size=3.0) - -#matplotlib.rc('ytick.major', size=6.0) -#matplotlib.rc('ytick.minor', size=3.0) - - -class _Brewer(object): - """Encapsulates a nice sequence of colors. - - Shades of blue that look good in color and can be distinguished - in grayscale (up to a point). - - Borrowed from http://colorbrewer2.org/ - """ - color_iter = None - - colors = ['#081D58', - '#253494', - '#225EA8', - '#1D91C0', - '#41B6C4', - '#7FCDBB', - '#C7E9B4', - '#EDF8B1', - '#FFFFD9'] - - # lists that indicate which colors to use depending on how many are used - which_colors = [[], - [1], - [1, 3], - [0, 2, 4], - [0, 2, 4, 6], - [0, 2, 3, 5, 6], - [0, 2, 3, 4, 5, 6], - [0, 1, 2, 3, 4, 5, 6], - ] - - @classmethod - def Colors(cls): - """Returns the list of colors. - """ - return cls.colors - - @classmethod - def ColorGenerator(cls, n): - """Returns an iterator of color strings. - - n: how many colors will be used - """ - for i in cls.which_colors[n]: - yield cls.colors[i] - raise StopIteration('Ran out of colors in _Brewer.ColorGenerator') - - @classmethod - def InitializeIter(cls, num): - """Initializes the color iterator with the given number of colors.""" - cls.color_iter = cls.ColorGenerator(num) - - @classmethod - def ClearIter(cls): - """Sets the color iterator to None.""" - cls.color_iter = None - - @classmethod - def GetIter(cls): - """Gets the color iterator.""" - if cls.color_iter is None: - cls.InitializeIter(7) - - return cls.color_iter - - -def PrePlot(num=None, rows=None, cols=None): - """Takes hints about what's coming. - - num: number of lines that will be plotted - rows: number of rows of subplots - cols: number of columns of subplots - """ - if num: - _Brewer.InitializeIter(num) - - if rows is None and cols is None: - return - - if rows is not None and cols is None: - cols = 1 - - if cols is not None and rows is None: - rows = 1 - - # resize the image, depending on the number of rows and cols - size_map = {(1, 1): (8, 6), - (1, 2): (14, 6), - (1, 3): (14, 6), - (2, 2): (10, 10), - (2, 3): (16, 10), - (3, 1): (8, 10), - } - - if (rows, cols) in size_map: - fig = pyplot.gcf() - fig.set_size_inches(*size_map[rows, cols]) - - # create the first subplot - if rows > 1 or cols > 1: - pyplot.subplot(rows, cols, 1) - global SUBPLOT_ROWS, SUBPLOT_COLS - SUBPLOT_ROWS = rows - SUBPLOT_COLS = cols - - -def SubPlot(plot_number, rows=None, cols=None): - """Configures the number of subplots and changes the current plot. - - rows: int - cols: int - plot_number: int - """ - rows = rows or SUBPLOT_ROWS - cols = cols or SUBPLOT_COLS - pyplot.subplot(rows, cols, plot_number) - - -def _Underride(d, **options): - """Add key-value pairs to d only if key is not in d. - - If d is None, create a new dictionary. - - d: dictionary - options: keyword args to add to d - """ - if d is None: - d = {} - - for key, val in options.items(): - d.setdefault(key, val) - - return d - - -def Clf(): - """Clears the figure and any hints that have been set.""" - global LOC - LOC = None - _Brewer.ClearIter() - pyplot.clf() - fig = pyplot.gcf() - fig.set_size_inches(8, 6) - - -def Figure(**options): - """Sets options for the current figure.""" - _Underride(options, figsize=(6, 8)) - pyplot.figure(**options) - - -def _UnderrideColor(options): - if 'color' in options: - return options - - color_iter = _Brewer.GetIter() - - if color_iter: - try: - options['color'] = next(color_iter) - except StopIteration: - # TODO: reconsider whether this should warn - # warnings.warn('Warning: Brewer ran out of colors.') - _Brewer.ClearIter() - return options - - -def Plot(obj, ys=None, style='', **options): - """Plots a line. - - Args: - obj: sequence of x values, or Series, or anything with Render() - ys: sequence of y values - style: style string passed along to pyplot.plot - options: keyword args passed to pyplot.plot - """ - options = _UnderrideColor(options) - label = getattr(obj, 'label', '_nolegend_') - options = _Underride(options, linewidth=3, alpha=0.8, label=label) - - xs = obj - if ys is None: - if hasattr(obj, 'Render'): - xs, ys = obj.Render() - if isinstance(obj, pandas.Series): - ys = obj.values - xs = obj.index - - if ys is None: - pyplot.plot(xs, style, **options) - else: - pyplot.plot(xs, ys, style, **options) - - -def FillBetween(xs, y1, y2=None, where=None, **options): - """Plots a line. - - Args: - xs: sequence of x values - y1: sequence of y values - y2: sequence of y values - where: sequence of boolean - options: keyword args passed to pyplot.fill_between - """ - options = _UnderrideColor(options) - options = _Underride(options, linewidth=0, alpha=0.5) - pyplot.fill_between(xs, y1, y2, where, **options) - - -def Bar(xs, ys, **options): - """Plots a line. - - Args: - xs: sequence of x values - ys: sequence of y values - options: keyword args passed to pyplot.bar - """ - options = _UnderrideColor(options) - options = _Underride(options, linewidth=0, alpha=0.6) - pyplot.bar(xs, ys, **options) - - -def Scatter(xs, ys=None, **options): - """Makes a scatter plot. - - xs: x values - ys: y values - options: options passed to pyplot.scatter - """ - options = _Underride(options, color='blue', alpha=0.2, - s=30, edgecolors='none') - - if ys is None and isinstance(xs, pandas.Series): - ys = xs.values - xs = xs.index - - pyplot.scatter(xs, ys, **options) - - -def HexBin(xs, ys, **options): - """Makes a scatter plot. - - xs: x values - ys: y values - options: options passed to pyplot.scatter - """ - options = _Underride(options, cmap=matplotlib.cm.Blues) - pyplot.hexbin(xs, ys, **options) - - -def Pdf(pdf, **options): - """Plots a Pdf, Pmf, or Hist as a line. - - Args: - pdf: Pdf, Pmf, or Hist object - options: keyword args passed to pyplot.plot - """ - low, high = options.pop('low', None), options.pop('high', None) - n = options.pop('n', 101) - xs, ps = pdf.Render(low=low, high=high, n=n) - options = _Underride(options, label=pdf.label) - Plot(xs, ps, **options) - - -def Pdfs(pdfs, **options): - """Plots a sequence of PDFs. - - Options are passed along for all PDFs. If you want different - options for each pdf, make multiple calls to Pdf. - - Args: - pdfs: sequence of PDF objects - options: keyword args passed to pyplot.plot - """ - for pdf in pdfs: - Pdf(pdf, **options) - - -def Hist(hist, **options): - """Plots a Pmf or Hist with a bar plot. - - The default width of the bars is based on the minimum difference - between values in the Hist. If that's too small, you can override - it by providing a width keyword argument, in the same units - as the values. - - Args: - hist: Hist or Pmf object - options: keyword args passed to pyplot.bar - """ - # find the minimum distance between adjacent values - xs, ys = hist.Render() - - if 'width' not in options: - try: - options['width'] = 0.9 * np.diff(xs).min() - except TypeError: - warnings.warn("Hist: Can't compute bar width automatically." - "Check for non-numeric types in Hist." - "Or try providing width option." - ) - - options = _Underride(options, label=hist.label) - options = _Underride(options, align='center') - if options['align'] == 'left': - options['align'] = 'edge' - elif options['align'] == 'right': - options['align'] = 'edge' - options['width'] *= -1 - - Bar(xs, ys, **options) - - -def Hists(hists, **options): - """Plots two histograms as interleaved bar plots. - - Options are passed along for all PMFs. If you want different - options for each pmf, make multiple calls to Pmf. - - Args: - hists: list of two Hist or Pmf objects - options: keyword args passed to pyplot.plot - """ - for hist in hists: - Hist(hist, **options) - - -def Pmf(pmf, **options): - """Plots a Pmf or Hist as a line. - - Args: - pmf: Hist or Pmf object - options: keyword args passed to pyplot.plot - """ - xs, ys = pmf.Render() - low, high = min(xs), max(xs) - - width = options.pop('width', None) - if width is None: - try: - width = np.diff(xs).min() - except TypeError: - warnings.warn("Pmf: Can't compute bar width automatically." - "Check for non-numeric types in Pmf." - "Or try providing width option.") - points = [] - - lastx = np.nan - lasty = 0 - for x, y in zip(xs, ys): - if (x - lastx) > 1e-5: - points.append((lastx, 0)) - points.append((x, 0)) - - points.append((x, lasty)) - points.append((x, y)) - points.append((x+width, y)) - - lastx = x + width - lasty = y - points.append((lastx, 0)) - pxs, pys = zip(*points) - - align = options.pop('align', 'center') - if align == 'center': - pxs = np.array(pxs) - width/2.0 - if align == 'right': - pxs = np.array(pxs) - width - - options = _Underride(options, label=pmf.label) - Plot(pxs, pys, **options) - - -def Pmfs(pmfs, **options): - """Plots a sequence of PMFs. - - Options are passed along for all PMFs. If you want different - options for each pmf, make multiple calls to Pmf. - - Args: - pmfs: sequence of PMF objects - options: keyword args passed to pyplot.plot - """ - for pmf in pmfs: - Pmf(pmf, **options) - - -def Diff(t): - """Compute the differences between adjacent elements in a sequence. - - Args: - t: sequence of number - - Returns: - sequence of differences (length one less than t) - """ - diffs = [t[i+1] - t[i] for i in range(len(t)-1)] - return diffs - - -def Cdf(cdf, complement=False, transform=None, **options): - """Plots a CDF as a line. - - Args: - cdf: Cdf object - complement: boolean, whether to plot the complementary CDF - transform: string, one of 'exponential', 'pareto', 'weibull', 'gumbel' - options: keyword args passed to pyplot.plot - - Returns: - dictionary with the scale options that should be passed to - Config, Show or Save. - """ - xs, ps = cdf.Render() - xs = np.asarray(xs) - ps = np.asarray(ps) - - scale = dict(xscale='linear', yscale='linear') - - for s in ['xscale', 'yscale']: - if s in options: - scale[s] = options.pop(s) - - if transform == 'exponential': - complement = True - scale['yscale'] = 'log' - - if transform == 'pareto': - complement = True - scale['yscale'] = 'log' - scale['xscale'] = 'log' - - if complement: - ps = [1.0-p for p in ps] - - if transform == 'weibull': - xs = np.delete(xs, -1) - ps = np.delete(ps, -1) - ps = [-math.log(1.0-p) for p in ps] - scale['xscale'] = 'log' - scale['yscale'] = 'log' - - if transform == 'gumbel': - xs = xp.delete(xs, 0) - ps = np.delete(ps, 0) - ps = [-math.log(p) for p in ps] - scale['yscale'] = 'log' - - options = _Underride(options, label=cdf.label) - Plot(xs, ps, **options) - return scale - - -def Cdfs(cdfs, complement=False, transform=None, **options): - """Plots a sequence of CDFs. - - cdfs: sequence of CDF objects - complement: boolean, whether to plot the complementary CDF - transform: string, one of 'exponential', 'pareto', 'weibull', 'gumbel' - options: keyword args passed to pyplot.plot - """ - for cdf in cdfs: - Cdf(cdf, complement, transform, **options) - - -def Contour(obj, pcolor=False, contour=True, imshow=False, **options): - """Makes a contour plot. - - d: map from (x, y) to z, or object that provides GetDict - pcolor: boolean, whether to make a pseudocolor plot - contour: boolean, whether to make a contour plot - imshow: boolean, whether to use pyplot.imshow - options: keyword args passed to pyplot.pcolor and/or pyplot.contour - """ - try: - d = obj.GetDict() - except AttributeError: - d = obj - - _Underride(options, linewidth=3, cmap=matplotlib.cm.Blues) - - xs, ys = zip(*d.keys()) - xs = sorted(set(xs)) - ys = sorted(set(ys)) - - X, Y = np.meshgrid(xs, ys) - def func(x, y): return d.get((x, y), 0) - func = np.vectorize(func) - Z = func(X, Y) - - x_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False) - axes = pyplot.gca() - axes.xaxis.set_major_formatter(x_formatter) - - if pcolor: - pyplot.pcolormesh(X, Y, Z, **options) - if contour: - cs = pyplot.contour(X, Y, Z, **options) - pyplot.clabel(cs, inline=1, fontsize=10) - if imshow: - extent = xs[0], xs[-1], ys[0], ys[-1] - pyplot.imshow(Z, extent=extent, **options) - - -def Pcolor(xs, ys, zs, pcolor=True, contour=False, **options): - """Makes a pseudocolor plot. - - xs: - ys: - zs: - pcolor: boolean, whether to make a pseudocolor plot - contour: boolean, whether to make a contour plot - options: keyword args passed to pyplot.pcolor and/or pyplot.contour - """ - _Underride(options, linewidth=3, cmap=matplotlib.cm.Blues) - - X, Y = np.meshgrid(xs, ys) - Z = zs - - x_formatter = matplotlib.ticker.ScalarFormatter(useOffset=False) - axes = pyplot.gca() - axes.xaxis.set_major_formatter(x_formatter) - - if pcolor: - pyplot.pcolormesh(X, Y, Z, **options) - - if contour: - cs = pyplot.contour(X, Y, Z, **options) - pyplot.clabel(cs, inline=1, fontsize=10) - - -def Text(x, y, s, **options): - """Puts text in a figure. - - x: number - y: number - s: string - options: keyword args passed to pyplot.text - """ - options = _Underride(options, - fontsize=16, - verticalalignment='top', - horizontalalignment='left') - pyplot.text(x, y, s, **options) - - -LEGEND = True -LOC = None - - -def Config(**options): - """Configures the plot. - - Pulls options out of the option dictionary and passes them to - the corresponding pyplot functions. - """ - names = ['title', 'xlabel', 'ylabel', 'xscale', 'yscale', - 'xticks', 'yticks', 'axis', 'xlim', 'ylim'] - - for name in names: - if name in options: - getattr(pyplot, name)(options[name]) - - # looks like this is not necessary: matplotlib understands text loc specs - loc_dict = {'upper right': 1, - 'upper left': 2, - 'lower left': 3, - 'lower right': 4, - 'right': 5, - 'center left': 6, - 'center right': 7, - 'lower center': 8, - 'upper center': 9, - 'center': 10, - } - - global LEGEND - LEGEND = options.get('legend', LEGEND) - - if LEGEND: - global LOC - LOC = options.get('loc', LOC) - pyplot.legend(loc=LOC) - - -def Show(**options): - """Shows the plot. - - For options, see Config. - - options: keyword args used to invoke various pyplot functions - """ - clf = options.pop('clf', True) - Config(**options) - pyplot.show() - if clf: - Clf() - - -def Plotly(**options): - """Shows the plot. - - For options, see Config. - - options: keyword args used to invoke various pyplot functions - """ - clf = options.pop('clf', True) - Config(**options) - import plotly.plotly as plotly - url = plotly.plot_mpl(pyplot.gcf()) - if clf: - Clf() - return url - - -def Save(root=None, formats=None, **options): - """Saves the plot in the given formats and clears the figure. - - For options, see Config. - - Args: - root: string filename root - formats: list of string formats - options: keyword args used to invoke various pyplot functions - """ - clf = options.pop('clf', True) - Config(**options) - - if formats is None: - formats = ['pdf', 'eps'] - - try: - formats.remove('plotly') - Plotly(clf=False) - except ValueError: - pass - - if root: - for fmt in formats: - SaveFormat(root, fmt) - if clf: - Clf() - - -def SaveFormat(root, fmt='eps'): - """Writes the current figure to a file in the given format. - - Args: - root: string filename root - fmt: string format - """ - filename = '%s.%s' % (root, fmt) - print('Writing', filename) - pyplot.savefig(filename, format=fmt, dpi=300) - - -# provide aliases for calling functons with lower-case names -preplot = PrePlot -subplot = SubPlot -clf = Clf -figure = Figure -plot = Plot -text = Text -scatter = Scatter -pmf = Pmf -pmfs = Pmfs -hist = Hist -hists = Hists -diff = Diff -cdf = Cdf -cdfs = Cdfs -contour = Contour -pcolor = Pcolor -config = Config -show = Show -save = Save - - -def main(): - color_iter = _Brewer.ColorGenerator(7) - for color in color_iter: - print(color) - - -if __name__ == '__main__': - main() -"""This file contains code for use with "Think Stats" and -"Think Bayes", both by Allen B. Downey, available from greenteapress.com - -Copyright 2014 Allen B. Downey -License: GNU GPLv3 http://www.gnu.org/licenses/gpl.html -""" - -from __future__ import print_function, division - -"""This file contains class definitions for: - -Hist: represents a histogram (map from values to integer frequencies). - -Pmf: represents a probability mass function (map from values to probs). - -_DictWrapper: private parent class for Hist and Pmf. - -Cdf: represents a discrete cumulative distribution function - -Pdf: represents a continuous probability density function - -""" - - -import bisect -import copy -import logging -import math -import random -import re -from collections import Counter -from operator import itemgetter -import thinkplot -import numpy as np -import pandas -import scipy -from scipy import stats -from scipy import special -from scipy import ndimage -from io import open -ROOT2 = math.sqrt(2) - - -def RandomSeed(x): - """Initialize the random and np.random generators. - - x: int seed - """ - random.seed(x) - np.random.seed(x) - - -def Odds(p): - """Computes odds for a given probability. - - Example: p=0.75 means 75 for and 25 against, or 3:1 odds in favor. - - Note: when p=1, the formula for odds divides by zero, which is - normally undefined. But I think it is reasonable to define Odds(1) - to be infinity, so that's what this function does. - - p: float 0-1 - - Returns: float odds - """ - if p == 1: - return float('inf') - return p / (1 - p) - - -def Probability(o): - """Computes the probability corresponding to given odds. - - Example: o=2 means 2:1 odds in favor, or 2/3 probability - - o: float odds, strictly positive - - Returns: float probability - """ - return o / (o + 1) - - -def Probability2(yes, no): - """Computes the probability corresponding to given odds. - - Example: yes=2, no=1 means 2:1 odds in favor, or 2/3 probability. - - yes, no: int or float odds in favor - """ - return yes / (yes + no) - - -class Interpolator(object): - """Represents a mapping between sorted sequences; performs linear interp. - - Attributes: - xs: sorted list - ys: sorted list - """ - - def __init__(self, xs, ys): - self.xs = xs - self.ys = ys - - def Lookup(self, x): - """Looks up x and returns the corresponding value of y.""" - return self._Bisect(x, self.xs, self.ys) - - def Reverse(self, y): - """Looks up y and returns the corresponding value of x.""" - return self._Bisect(y, self.ys, self.xs) - - def _Bisect(self, x, xs, ys): - """Helper function.""" - if x <= xs[0]: - return ys[0] - if x >= xs[-1]: - return ys[-1] - i = bisect.bisect(xs, x) - frac = 1.0 * (x - xs[i - 1]) / (xs[i] - xs[i - 1]) - y = ys[i - 1] + frac * 1.0 * (ys[i] - ys[i - 1]) - return y - - -class _DictWrapper(object): - """An object that contains a dictionary.""" - - def __init__(self, obj=None, label=None): - """Initializes the distribution. - - obj: Hist, Pmf, Cdf, Pdf, dict, pandas Series, list of pairs - label: string label - """ - self.label = label if label is not None else '_nolegend_' - self.d = {} - - # flag whether the distribution is under a log transform - self.log = False - - if obj is None: - return - - if isinstance(obj, (_DictWrapper, Cdf, Pdf)): - self.label = label if label is not None else obj.label - - if isinstance(obj, dict): - self.d.update(obj.items()) - elif isinstance(obj, (_DictWrapper, Cdf, Pdf)): - self.d.update(obj.Items()) - elif isinstance(obj, pandas.Series): - self.d.update(obj.value_counts().iteritems()) - else: - # finally, treat it like a list - self.d.update(Counter(obj)) - - if len(self) > 0 and isinstance(self, Pmf): - self.Normalize() - - def __hash__(self): - return id(self) - - def __str__(self): - cls = self.__class__.__name__ - return '%s(%s)' % (cls, str(self.d)) - - __repr__ = __str__ - - def __eq__(self, other): - return self.d == other.d - - def __len__(self): - return len(self.d) - - def __iter__(self): - return iter(self.d) - - def iterkeys(self): - """Returns an iterator over keys.""" - return iter(self.d) - - def __contains__(self, value): - return value in self.d - - def __getitem__(self, value): - return self.d.get(value, 0) - - def __setitem__(self, value, prob): - self.d[value] = prob - - def __delitem__(self, value): - del self.d[value] - - def Copy(self, label=None): - """Returns a copy. - - Make a shallow copy of d. If you want a deep copy of d, - use copy.deepcopy on the whole object. - - label: string label for the new Hist - - returns: new _DictWrapper with the same type - """ - new = copy.copy(self) - new.d = copy.copy(self.d) - new.label = label if label is not None else self.label - return new - - def Scale(self, factor): - """Multiplies the values by a factor. - - factor: what to multiply by - - Returns: new object - """ - new = self.Copy() - new.d.clear() - - for val, prob in self.Items(): - new.Set(val * factor, prob) - return new - - def Log(self, m=None): - """Log transforms the probabilities. - - Removes values with probability 0. - - Normalizes so that the largest logprob is 0. - """ - if self.log: - raise ValueError("Pmf/Hist already under a log transform") - self.log = True - - if m is None: - m = self.MaxLike() - - for x, p in self.d.items(): - if p: - self.Set(x, math.log(p / m)) - else: - self.Remove(x) - - def Exp(self, m=None): - """Exponentiates the probabilities. - - m: how much to shift the ps before exponentiating - - If m is None, normalizes so that the largest prob is 1. - """ - if not self.log: - raise ValueError("Pmf/Hist not under a log transform") - self.log = False - - if m is None: - m = self.MaxLike() - - for x, p in self.d.items(): - self.Set(x, math.exp(p - m)) - - def GetDict(self): - """Gets the dictionary.""" - return self.d - - def SetDict(self, d): - """Sets the dictionary.""" - self.d = d - - def Values(self): - """Gets an unsorted sequence of values. - - Note: one source of confusion is that the keys of this - dictionary are the values of the Hist/Pmf, and the - values of the dictionary are frequencies/probabilities. - """ - return self.d.keys() - - def Items(self): - """Gets an unsorted sequence of (value, freq/prob) pairs.""" - return self.d.items() - - def Render(self, **options): - """Generates a sequence of points suitable for plotting. - - Note: options are ignored - - Returns: - tuple of (sorted value sequence, freq/prob sequence) - """ - if min(self.d.keys()) is np.nan: - logging.warning('Hist: contains NaN, may not render correctly.') - - return zip(*sorted(self.Items())) - - def MakeCdf(self, label=None): - """Makes a Cdf.""" - label = label if label is not None else self.label - return Cdf(self, label=label) - - def Print(self): - """Prints the values and freqs/probs in ascending order.""" - for val, prob in sorted(self.d.items()): - print(val, prob) - - def Set(self, x, y=0): - """Sets the freq/prob associated with the value x. - - Args: - x: number value - y: number freq or prob - """ - self.d[x] = y - - def Incr(self, x, term=1): - """Increments the freq/prob associated with the value x. - - Args: - x: number value - term: how much to increment by - """ - self.d[x] = self.d.get(x, 0) + term - - def Mult(self, x, factor): - """Scales the freq/prob associated with the value x. - - Args: - x: number value - factor: how much to multiply by - """ - self.d[x] = self.d.get(x, 0) * factor - - def Remove(self, x): - """Removes a value. - - Throws an exception if the value is not there. - - Args: - x: value to remove - """ - del self.d[x] - - def Total(self): - """Returns the total of the frequencies/probabilities in the map.""" - total = sum(self.d.values()) - return total - - def MaxLike(self): - """Returns the largest frequency/probability in the map.""" - return max(self.d.values()) - - def Largest(self, n=10): - """Returns the largest n values, with frequency/probability. - - n: number of items to return - """ - return sorted(self.d.items(), reverse=True)[:n] - - def Smallest(self, n=10): - """Returns the smallest n values, with frequency/probability. - - n: number of items to return - """ - return sorted(self.d.items(), reverse=False)[:n] - - -class Hist(_DictWrapper): - """Represents a histogram, which is a map from values to frequencies. - - Values can be any hashable type; frequencies are integer counters. - """ - - def Freq(self, x): - """Gets the frequency associated with the value x. - - Args: - x: number value - - Returns: - int frequency - """ - return self.d.get(x, 0) - - def Freqs(self, xs): - """Gets frequencies for a sequence of values.""" - return [self.Freq(x) for x in xs] - - def IsSubset(self, other): - """Checks whether the values in this histogram are a subset of - the values in the given histogram.""" - for val, freq in self.Items(): - if freq > other.Freq(val): - return False - return True - - def Subtract(self, other): - """Subtracts the values in the given histogram from this histogram.""" - for val, freq in other.Items(): - self.Incr(val, -freq) - - -class Pmf(_DictWrapper): - """Represents a probability mass function. - - Values can be any hashable type; probabilities are floating-point. - Pmfs are not necessarily normalized. - """ - - def Prob(self, x, default=0): - """Gets the probability associated with the value x. - - Args: - x: number value - default: value to return if the key is not there - - Returns: - float probability - """ - return self.d.get(x, default) - - def Probs(self, xs): - """Gets probabilities for a sequence of values.""" - return [self.Prob(x) for x in xs] - - def Percentile(self, percentage): - """Computes a percentile of a given Pmf. - - Note: this is not super efficient. If you are planning - to compute more than a few percentiles, compute the Cdf. - - percentage: float 0-100 - - returns: value from the Pmf - """ - p = percentage / 100.0 - total = 0 - for val, prob in sorted(self.Items()): - total += prob - if total >= p: - return val - - def ProbGreater(self, x): - """Probability that a sample from this Pmf exceeds x. - - x: number - - returns: float probability - """ - if isinstance(x, _DictWrapper): - return PmfProbGreater(self, x) - else: - t = [prob for (val, prob) in self.d.items() if val > x] - return sum(t) - - def ProbLess(self, x): - """Probability that a sample from this Pmf is less than x. - - x: number - - returns: float probability - """ - if isinstance(x, _DictWrapper): - return PmfProbLess(self, x) - else: - t = [prob for (val, prob) in self.d.items() if val < x] - return sum(t) - - def __lt__(self, obj): - """Less than. - - obj: number or _DictWrapper - - returns: float probability - """ - return self.ProbLess(obj) - - def __gt__(self, obj): - """Greater than. - - obj: number or _DictWrapper - - returns: float probability - """ - return self.ProbGreater(obj) - - def __ge__(self, obj): - """Greater than or equal. - - obj: number or _DictWrapper - - returns: float probability - """ - return 1 - (self < obj) - - def __le__(self, obj): - """Less than or equal. - - obj: number or _DictWrapper - - returns: float probability - """ - return 1 - (self > obj) - - def Normalize(self, fraction=1.0): - """Normalizes this PMF so the sum of all probs is fraction. - - Args: - fraction: what the total should be after normalization - - Returns: the total probability before normalizing - """ - if self.log: - raise ValueError("Normalize: Pmf is under a log transform") - - total = self.Total() - if total == 0.0: - raise ValueError('Normalize: total probability is zero.') - #logging.warning('Normalize: total probability is zero.') - # return total - - factor = fraction / total - for x in self.d: - self.d[x] *= factor - - return total - - def Random(self): - """Chooses a random element from this PMF. - - Note: this is not very efficient. If you plan to call - this more than a few times, consider converting to a CDF. - - Returns: - float value from the Pmf - """ - target = random.random() - total = 0.0 - for x, p in self.d.items(): - total += p - if total >= target: - return x - - # we shouldn't get here - raise ValueError('Random: Pmf might not be normalized.') - - def Mean(self): - """Computes the mean of a PMF. - - Returns: - float mean - """ - mean = 0.0 - for x, p in self.d.items(): - mean += p * x - return mean - - def Var(self, mu=None): - """Computes the variance of a PMF. - - mu: the point around which the variance is computed; - if omitted, computes the mean - - returns: float variance - """ - if mu is None: - mu = self.Mean() - - var = 0.0 - for x, p in self.d.items(): - var += p * (x - mu) ** 2 - return var - - def Std(self, mu=None): - """Computes the standard deviation of a PMF. - - mu: the point around which the variance is computed; - if omitted, computes the mean - - returns: float standard deviation - """ - var = self.Var(mu) - return math.sqrt(var) - - def MaximumLikelihood(self): - """Returns the value with the highest probability. - - Returns: float probability - """ - _, val = max((prob, val) for val, prob in self.Items()) - return val - - def CredibleInterval(self, percentage=90): - """Computes the central credible interval. - - If percentage=90, computes the 90% CI. - - Args: - percentage: float between 0 and 100 - - Returns: - sequence of two floats, low and high - """ - cdf = self.MakeCdf() - return cdf.CredibleInterval(percentage) - - def __add__(self, other): - """Computes the Pmf of the sum of values drawn from self and other. - - other: another Pmf or a scalar - - returns: new Pmf - """ - try: - return self.AddPmf(other) - except AttributeError: - return self.AddConstant(other) - - def AddPmf(self, other): - """Computes the Pmf of the sum of values drawn from self and other. - - other: another Pmf - - returns: new Pmf - """ - pmf = Pmf() - for v1, p1 in self.Items(): - for v2, p2 in other.Items(): - pmf.Incr(v1 + v2, p1 * p2) - return pmf - - def AddConstant(self, other): - """Computes the Pmf of the sum a constant and values from self. - - other: a number - - returns: new Pmf - """ - pmf = Pmf() - for v1, p1 in self.Items(): - pmf.Set(v1 + other, p1) - return pmf - - def __sub__(self, other): - """Computes the Pmf of the diff of values drawn from self and other. - - other: another Pmf - - returns: new Pmf - """ - try: - return self.SubPmf(other) - except AttributeError: - return self.AddConstant(-other) - - def SubPmf(self, other): - """Computes the Pmf of the diff of values drawn from self and other. - - other: another Pmf - - returns: new Pmf - """ - pmf = Pmf() - for v1, p1 in self.Items(): - for v2, p2 in other.Items(): - pmf.Incr(v1 - v2, p1 * p2) - return pmf - - def __mul__(self, other): - """Computes the Pmf of the product of values drawn from self and other. - - other: another Pmf - - returns: new Pmf - """ - try: - return self.MulPmf(other) - except AttributeError: - return self.MulConstant(other) - - def MulPmf(self, other): - """Computes the Pmf of the diff of values drawn from self and other. - - other: another Pmf - - returns: new Pmf - """ - pmf = Pmf() - for v1, p1 in self.Items(): - for v2, p2 in other.Items(): - pmf.Incr(v1 * v2, p1 * p2) - return pmf - - def MulConstant(self, other): - """Computes the Pmf of the product of a constant and values from self. - - other: a number - - returns: new Pmf - """ - pmf = Pmf() - for v1, p1 in self.Items(): - pmf.Set(v1 * other, p1) - return pmf - - def __div__(self, other): - """Computes the Pmf of the ratio of values drawn from self and other. - - other: another Pmf - - returns: new Pmf - """ - try: - return self.DivPmf(other) - except AttributeError: - return self.MulConstant(1/other) - - __truediv__ = __div__ - - def DivPmf(self, other): - """Computes the Pmf of the ratio of values drawn from self and other. - - other: another Pmf - - returns: new Pmf - """ - pmf = Pmf() - for v1, p1 in self.Items(): - for v2, p2 in other.Items(): - pmf.Incr(v1 / v2, p1 * p2) - return pmf - - def Max(self, k): - """Computes the CDF of the maximum of k selections from this dist. - - k: int - - returns: new Cdf - """ - cdf = self.MakeCdf() - return cdf.Max(k) - - -class Joint(Pmf): - """Represents a joint distribution. - - The values are sequences (usually tuples) - """ - - def Marginal(self, i, label=None): - """Gets the marginal distribution of the indicated variable. - - i: index of the variable we want - - Returns: Pmf - """ - pmf = Pmf(label=label) - for vs, prob in self.Items(): - pmf.Incr(vs[i], prob) - return pmf - - def Conditional(self, i, j, val, label=None): - """Gets the conditional distribution of the indicated variable. - - Distribution of vs[i], conditioned on vs[j] = val. - - i: index of the variable we want - j: which variable is conditioned on - val: the value the jth variable has to have - - Returns: Pmf - """ - pmf = Pmf(label=label) - for vs, prob in self.Items(): - if vs[j] != val: - continue - pmf.Incr(vs[i], prob) - - pmf.Normalize() - return pmf - - def MaxLikeInterval(self, percentage=90): - """Returns the maximum-likelihood credible interval. - - If percentage=90, computes a 90% CI containing the values - with the highest likelihoods. - - percentage: float between 0 and 100 - - Returns: list of values from the suite - """ - interval = [] - total = 0 - - t = [(prob, val) for val, prob in self.Items()] - t.sort(reverse=True) - - for prob, val in t: - interval.append(val) - total += prob - if total >= percentage / 100.0: - break - - return interval - - -def MakeJoint(pmf1, pmf2): - """Joint distribution of values from pmf1 and pmf2. - - Assumes that the PMFs represent independent random variables. - - Args: - pmf1: Pmf object - pmf2: Pmf object - - Returns: - Joint pmf of value pairs - """ - joint = Joint() - for v1, p1 in pmf1.Items(): - for v2, p2 in pmf2.Items(): - joint.Set((v1, v2), p1 * p2) - return joint - - -def MakeHistFromList(t, label=None): - """Makes a histogram from an unsorted sequence of values. - - Args: - t: sequence of numbers - label: string label for this histogram - - Returns: - Hist object - """ - return Hist(t, label=label) - - -def MakeHistFromDict(d, label=None): - """Makes a histogram from a map from values to frequencies. - - Args: - d: dictionary that maps values to frequencies - label: string label for this histogram - - Returns: - Hist object - """ - return Hist(d, label) - - -def MakePmfFromList(t, label=None): - """Makes a PMF from an unsorted sequence of values. - - Args: - t: sequence of numbers - label: string label for this PMF - - Returns: - Pmf object - """ - return Pmf(t, label=label) - - -def MakePmfFromDict(d, label=None): - """Makes a PMF from a map from values to probabilities. - - Args: - d: dictionary that maps values to probabilities - label: string label for this PMF - - Returns: - Pmf object - """ - return Pmf(d, label=label) - - -def MakePmfFromItems(t, label=None): - """Makes a PMF from a sequence of value-probability pairs - - Args: - t: sequence of value-probability pairs - label: string label for this PMF - - Returns: - Pmf object - """ - return Pmf(dict(t), label=label) - - -def MakePmfFromHist(hist, label=None): - """Makes a normalized PMF from a Hist object. - - Args: - hist: Hist object - label: string label - - Returns: - Pmf object - """ - if label is None: - label = hist.label - - return Pmf(hist, label=label) - - -def MakeMixture(metapmf, label='mix'): - """Make a mixture distribution. - - Args: - metapmf: Pmf that maps from Pmfs to probs. - label: string label for the new Pmf. - - Returns: Pmf object. - """ - mix = Pmf(label=label) - for pmf, p1 in metapmf.Items(): - for x, p2 in pmf.Items(): - mix.Incr(x, p1 * p2) - return mix - - -def MakeUniformPmf(low, high, n): - """Make a uniform Pmf. - - low: lowest value (inclusive) - high: highest value (inclusize) - n: number of values - """ - pmf = Pmf() - for x in np.linspace(low, high, n): - pmf.Set(x, 1) - pmf.Normalize() - return pmf - - -class Cdf(object): - """Represents a cumulative distribution function. - - Attributes: - xs: sequence of values - ps: sequence of probabilities - label: string used as a graph label. - """ - - def __init__(self, obj=None, ps=None, label=None): - """Initializes. - - If ps is provided, obj must be the corresponding list of values. - - obj: Hist, Pmf, Cdf, Pdf, dict, pandas Series, list of pairs - ps: list of cumulative probabilities - label: string label - """ - self.label = label if label is not None else '_nolegend_' - - if isinstance(obj, (_DictWrapper, Cdf, Pdf)): - if not label: - self.label = label if label is not None else obj.label - - if obj is None: - # caller does not provide obj, make an empty Cdf - self.xs = np.asarray([]) - self.ps = np.asarray([]) - if ps is not None: - logging.warning("Cdf: can't pass ps without also passing xs.") - return - else: - # if the caller provides xs and ps, just store them - if ps is not None: - if isinstance(ps, str): - logging.warning("Cdf: ps can't be a string") - - self.xs = np.asarray(obj) - self.ps = np.asarray(ps) - return - - # caller has provided just obj, not ps - if isinstance(obj, Cdf): - self.xs = copy.copy(obj.xs) - self.ps = copy.copy(obj.ps) - return - - if isinstance(obj, _DictWrapper): - dw = obj - else: - dw = Hist(obj) - - if len(dw) == 0: - self.xs = np.asarray([]) - self.ps = np.asarray([]) - return - - xs, freqs = zip(*sorted(dw.Items())) - self.xs = np.asarray(xs) - self.ps = np.cumsum(freqs, dtype=np.float) - self.ps /= self.ps[-1] - - def __str__(self): - return 'Cdf(%s, %s)' % (str(self.xs), str(self.ps)) - - __repr__ = __str__ - - def __len__(self): - return len(self.xs) - - def __getitem__(self, x): - return self.Prob(x) - - def __setitem__(self): - raise UnimplementedMethodException() - - def __delitem__(self): - raise UnimplementedMethodException() - - def __eq__(self, other): - return np.all(self.xs == other.xs) and np.all(self.ps == other.ps) - - def Copy(self, label=None): - """Returns a copy of this Cdf. - - label: string label for the new Cdf - """ - if label is None: - label = self.label - return Cdf(list(self.xs), list(self.ps), label=label) - - def MakePmf(self, label=None): - """Makes a Pmf.""" - if label is None: - label = self.label - return Pmf(self, label=label) - - def Values(self): - """Returns a sorted list of values. - """ - return self.xs - - def Items(self): - """Returns a sorted sequence of (value, probability) pairs. - - Note: in Python3, returns an iterator. - """ - a = self.ps - b = np.roll(a, 1) - b[0] = 0 - return zip(self.xs, a-b) - - def Shift(self, term): - """Adds a term to the xs. - - term: how much to add - """ - new = self.Copy() - # don't use +=, or else an int array + float yields int array - new.xs = new.xs + term - return new - - def Scale(self, factor): - """Multiplies the xs by a factor. - - factor: what to multiply by - """ - new = self.Copy() - # don't use *=, or else an int array * float yields int array - new.xs = new.xs * factor - return new - - def Prob(self, x): - """Returns CDF(x), the probability that corresponds to value x. - - Args: - x: number - - Returns: - float probability - """ - if x < self.xs[0]: - return 0.0 - index = bisect.bisect(self.xs, x) - p = self.ps[index-1] - return p - - def Probs(self, xs): - """Gets probabilities for a sequence of values. - - xs: any sequence that can be converted to NumPy array - - returns: NumPy array of cumulative probabilities - """ - xs = np.asarray(xs) - index = np.searchsorted(self.xs, xs, side='right') - ps = self.ps[index-1] - ps[xs < self.xs[0]] = 0.0 - return ps - - ProbArray = Probs - - def Value(self, p): - """Returns InverseCDF(p), the value that corresponds to probability p. - - Args: - p: number in the range [0, 1] - - Returns: - number value - """ - if p < 0 or p > 1: - raise ValueError('Probability p must be in range [0, 1]') - - index = bisect.bisect_left(self.ps, p) - return self.xs[index] - - def ValueArray(self, ps): - """Returns InverseCDF(p), the value that corresponds to probability p. - - Args: - ps: NumPy array of numbers in the range [0, 1] - - Returns: - NumPy array of values - """ - ps = np.asarray(ps) - if np.any(ps < 0) or np.any(ps > 1): - raise ValueError('Probability p must be in range [0, 1]') - - index = np.searchsorted(self.ps, ps, side='left') - return self.xs[index] - - def Percentile(self, p): - """Returns the value that corresponds to percentile p. - - Args: - p: number in the range [0, 100] - - Returns: - number value - """ - return self.Value(p / 100.0) - - def PercentileRank(self, x): - """Returns the percentile rank of the value x. - - x: potential value in the CDF - - returns: percentile rank in the range 0 to 100 - """ - return self.Prob(x) * 100.0 - - def Random(self): - """Chooses a random value from this distribution.""" - return self.Value(random.random()) - - def Sample(self, n): - """Generates a random sample from this distribution. - - n: int length of the sample - returns: NumPy array - """ - ps = np.random.random(n) - return self.ValueArray(ps) - - def Mean(self): - """Computes the mean of a CDF. - - Returns: - float mean - """ - old_p = 0 - total = 0.0 - for x, new_p in zip(self.xs, self.ps): - p = new_p - old_p - total += p * x - old_p = new_p - return total - - def CredibleInterval(self, percentage=90): - """Computes the central credible interval. - - If percentage=90, computes the 90% CI. - - Args: - percentage: float between 0 and 100 - - Returns: - sequence of two floats, low and high - """ - prob = (1 - percentage / 100.0) / 2 - interval = self.Value(prob), self.Value(1 - prob) - return interval - - ConfidenceInterval = CredibleInterval - - def _Round(self, multiplier=1000.0): - """ - An entry is added to the cdf only if the percentile differs - from the previous value in a significant digit, where the number - of significant digits is determined by multiplier. The - default is 1000, which keeps log10(1000) = 3 significant digits. - """ - # TODO(write this method) - raise UnimplementedMethodException() - - def Render(self, **options): - """Generates a sequence of points suitable for plotting. - - An empirical CDF is a step function; linear interpolation - can be misleading. - - Note: options are ignored - - Returns: - tuple of (xs, ps) - """ - def interleave(a, b): - c = np.empty(a.shape[0] + b.shape[0]) - c[::2] = a - c[1::2] = b - return c - - a = np.array(self.xs) - xs = interleave(a, a) - shift_ps = np.roll(self.ps, 1) - shift_ps[0] = 0 - ps = interleave(shift_ps, self.ps) - return xs, ps - - def Max(self, k): - """Computes the CDF of the maximum of k selections from this dist. - - k: int - - returns: new Cdf - """ - cdf = self.Copy() - cdf.ps **= k - return cdf - - -def MakeCdfFromItems(items, label=None): - """Makes a cdf from an unsorted sequence of (value, frequency) pairs. - - Args: - items: unsorted sequence of (value, frequency) pairs - label: string label for this CDF - - Returns: - cdf: list of (value, fraction) pairs - """ - return Cdf(dict(items), label=label) - - -def MakeCdfFromDict(d, label=None): - """Makes a CDF from a dictionary that maps values to frequencies. - - Args: - d: dictionary that maps values to frequencies. - label: string label for the data. - - Returns: - Cdf object - """ - return Cdf(d, label=label) - - -def MakeCdfFromList(seq, label=None): - """Creates a CDF from an unsorted sequence. - - Args: - seq: unsorted sequence of sortable values - label: string label for the cdf - - Returns: - Cdf object - """ - return Cdf(seq, label=label) - - -def MakeCdfFromHist(hist, label=None): - """Makes a CDF from a Hist object. - - Args: - hist: Pmf.Hist object - label: string label for the data. - - Returns: - Cdf object - """ - if label is None: - label = hist.label - - return Cdf(hist, label=label) - - -def MakeCdfFromPmf(pmf, label=None): - """Makes a CDF from a Pmf object. - - Args: - pmf: Pmf.Pmf object - label: string label for the data. - - Returns: - Cdf object - """ - if label is None: - label = pmf.label - - return Cdf(pmf, label=label) - - -class UnimplementedMethodException(Exception): - """Exception if someone calls a method that should be overridden.""" - - -class Suite(Pmf): - """Represents a suite of hypotheses and their probabilities.""" - - def Update(self, data): - """Updates each hypothesis based on the data. - - data: any representation of the data - - returns: the normalizing constant - """ - for hypo in self.Values(): - like = self.Likelihood(data, hypo) - self.Mult(hypo, like) - return self.Normalize() - - def LogUpdate(self, data): - """Updates a suite of hypotheses based on new data. - - Modifies the suite directly; if you want to keep the original, make - a copy. - - Note: unlike Update, LogUpdate does not normalize. - - Args: - data: any representation of the data - """ - for hypo in self.Values(): - like = self.LogLikelihood(data, hypo) - self.Incr(hypo, like) - - def UpdateSet(self, dataset): - """Updates each hypothesis based on the dataset. - - This is more efficient than calling Update repeatedly because - it waits until the end to Normalize. - - Modifies the suite directly; if you want to keep the original, make - a copy. - - dataset: a sequence of data - - returns: the normalizing constant - """ - for data in dataset: - for hypo in self.Values(): - like = self.Likelihood(data, hypo) - self.Mult(hypo, like) - return self.Normalize() - - def LogUpdateSet(self, dataset): - """Updates each hypothesis based on the dataset. - - Modifies the suite directly; if you want to keep the original, make - a copy. - - dataset: a sequence of data - - returns: None - """ - for data in dataset: - self.LogUpdate(data) - - def Likelihood(self, data, hypo): - """Computes the likelihood of the data under the hypothesis. - - hypo: some representation of the hypothesis - data: some representation of the data - """ - raise UnimplementedMethodException() - - def LogLikelihood(self, data, hypo): - """Computes the log likelihood of the data under the hypothesis. - - hypo: some representation of the hypothesis - data: some representation of the data - """ - raise UnimplementedMethodException() - - def Print(self): - """Prints the hypotheses and their probabilities.""" - for hypo, prob in sorted(self.Items()): - print(hypo, prob) - - def MakeOdds(self): - """Transforms from probabilities to odds. - - Values with prob=0 are removed. - """ - for hypo, prob in self.Items(): - if prob: - self.Set(hypo, Odds(prob)) - else: - self.Remove(hypo) - - def MakeProbs(self): - """Transforms from odds to probabilities.""" - for hypo, odds in self.Items(): - self.Set(hypo, Probability(odds)) - - -def MakeSuiteFromList(t, label=None): - """Makes a suite from an unsorted sequence of values. - - Args: - t: sequence of numbers - label: string label for this suite - - Returns: - Suite object - """ - hist = MakeHistFromList(t, label=label) - d = hist.GetDict() - return MakeSuiteFromDict(d) - - -def MakeSuiteFromHist(hist, label=None): - """Makes a normalized suite from a Hist object. - - Args: - hist: Hist object - label: string label - - Returns: - Suite object - """ - if label is None: - label = hist.label - - # make a copy of the dictionary - d = dict(hist.GetDict()) - return MakeSuiteFromDict(d, label) - - -def MakeSuiteFromDict(d, label=None): - """Makes a suite from a map from values to probabilities. - - Args: - d: dictionary that maps values to probabilities - label: string label for this suite - - Returns: - Suite object - """ - suite = Suite(label=label) - suite.SetDict(d) - suite.Normalize() - return suite - - -class Pdf(object): - """Represents a probability density function (PDF).""" - - def Density(self, x): - """Evaluates this Pdf at x. - - Returns: float or NumPy array of probability density - """ - raise UnimplementedMethodException() - - def GetLinspace(self): - """Get a linspace for plotting. - - Not all subclasses of Pdf implement this. - - Returns: numpy array - """ - raise UnimplementedMethodException() - - def MakePmf(self, **options): - """Makes a discrete version of this Pdf. - - options can include - label: string - low: low end of range - high: high end of range - n: number of places to evaluate - - Returns: new Pmf - """ - label = options.pop('label', '') - xs, ds = self.Render(**options) - return Pmf(dict(zip(xs, ds)), label=label) - - def Render(self, **options): - """Generates a sequence of points suitable for plotting. - - If options includes low and high, it must also include n; - in that case the density is evaluated an n locations between - low and high, including both. - - If options includes xs, the density is evaluate at those location. - - Otherwise, self.GetLinspace is invoked to provide the locations. - - Returns: - tuple of (xs, densities) - """ - low, high = options.pop('low', None), options.pop('high', None) - if low is not None and high is not None: - n = options.pop('n', 101) - xs = np.linspace(low, high, n) - else: - xs = options.pop('xs', None) - if xs is None: - xs = self.GetLinspace() - - ds = self.Density(xs) - return xs, ds - - def Items(self): - """Generates a sequence of (value, probability) pairs. - """ - return zip(*self.Render()) - - -class NormalPdf(Pdf): - """Represents the PDF of a Normal distribution.""" - - def __init__(self, mu=0, sigma=1, label=None): - """Constructs a Normal Pdf with given mu and sigma. - - mu: mean - sigma: standard deviation - label: string - """ - self.mu = mu - self.sigma = sigma - self.label = label if label is not None else '_nolegend_' - - def __str__(self): - return 'NormalPdf(%f, %f)' % (self.mu, self.sigma) - - def GetLinspace(self): - """Get a linspace for plotting. - - Returns: numpy array - """ - low, high = self.mu-3*self.sigma, self.mu+3*self.sigma - return np.linspace(low, high, 101) - - def Density(self, xs): - """Evaluates this Pdf at xs. - - xs: scalar or sequence of floats - - returns: float or NumPy array of probability density - """ - return stats.norm.pdf(xs, self.mu, self.sigma) - - -class ExponentialPdf(Pdf): - """Represents the PDF of an exponential distribution.""" - - def __init__(self, lam=1, label=None): - """Constructs an exponential Pdf with given parameter. - - lam: rate parameter - label: string - """ - self.lam = lam - self.label = label if label is not None else '_nolegend_' - - def __str__(self): - return 'ExponentialPdf(%f)' % (self.lam) - - def GetLinspace(self): - """Get a linspace for plotting. - - Returns: numpy array - """ - low, high = 0, 5.0/self.lam - return np.linspace(low, high, 101) - - def Density(self, xs): - """Evaluates this Pdf at xs. - - xs: scalar or sequence of floats - - returns: float or NumPy array of probability density - """ - return stats.expon.pdf(xs, scale=1.0/self.lam) - - -class EstimatedPdf(Pdf): - """Represents a PDF estimated by KDE.""" - - def __init__(self, sample, label=None): - """Estimates the density function based on a sample. - - sample: sequence of data - label: string - """ - self.label = label if label is not None else '_nolegend_' - self.kde = stats.gaussian_kde(sample) - low = min(sample) - high = max(sample) - self.linspace = np.linspace(low, high, 101) - - def __str__(self): - return 'EstimatedPdf(label=%s)' % str(self.label) - - def GetLinspace(self): - """Get a linspace for plotting. - - Returns: numpy array - """ - return self.linspace - - def Density(self, xs): - """Evaluates this Pdf at xs. - - returns: float or NumPy array of probability density - """ - return self.kde.evaluate(xs) - - -def CredibleInterval(pmf, percentage=90): - """Computes a credible interval for a given distribution. - - If percentage=90, computes the 90% CI. - - Args: - pmf: Pmf object representing a posterior distribution - percentage: float between 0 and 100 - - Returns: - sequence of two floats, low and high - """ - cdf = pmf.MakeCdf() - prob = (1 - percentage / 100.0) / 2 - interval = cdf.Value(prob), cdf.Value(1 - prob) - return interval - - -def PmfProbLess(pmf1, pmf2): - """Probability that a value from pmf1 is less than a value from pmf2. - - Args: - pmf1: Pmf object - pmf2: Pmf object - - Returns: - float probability - """ - total = 0.0 - for v1, p1 in pmf1.Items(): - for v2, p2 in pmf2.Items(): - if v1 < v2: - total += p1 * p2 - return total - - -def PmfProbGreater(pmf1, pmf2): - """Probability that a value from pmf1 is less than a value from pmf2. - - Args: - pmf1: Pmf object - pmf2: Pmf object - - Returns: - float probability - """ - total = 0.0 - for v1, p1 in pmf1.Items(): - for v2, p2 in pmf2.Items(): - if v1 > v2: - total += p1 * p2 - return total - - -def PmfProbEqual(pmf1, pmf2): - """Probability that a value from pmf1 equals a value from pmf2. - - Args: - pmf1: Pmf object - pmf2: Pmf object - - Returns: - float probability - """ - total = 0.0 - for v1, p1 in pmf1.Items(): - for v2, p2 in pmf2.Items(): - if v1 == v2: - total += p1 * p2 - return total - - -def RandomSum(dists): - """Chooses a random value from each dist and returns the sum. - - dists: sequence of Pmf or Cdf objects - - returns: numerical sum - """ - total = sum(dist.Random() for dist in dists) - return total - - -def SampleSum(dists, n): - """Draws a sample of sums from a list of distributions. - - dists: sequence of Pmf or Cdf objects - n: sample size - - returns: new Pmf of sums - """ - pmf = Pmf(RandomSum(dists) for i in range(n)) - return pmf - - -def EvalNormalPdf(x, mu, sigma): - """Computes the unnormalized PDF of the normal distribution. - - x: value - mu: mean - sigma: standard deviation - - returns: float probability density - """ - return stats.norm.pdf(x, mu, sigma) - - -def MakeNormalPmf(mu, sigma, num_sigmas, n=201): - """Makes a PMF discrete approx to a Normal distribution. - - mu: float mean - sigma: float standard deviation - num_sigmas: how many sigmas to extend in each direction - n: number of values in the Pmf - - returns: normalized Pmf - """ - pmf = Pmf() - low = mu - num_sigmas * sigma - high = mu + num_sigmas * sigma - - for x in np.linspace(low, high, n): - p = EvalNormalPdf(x, mu, sigma) - pmf.Set(x, p) - pmf.Normalize() - return pmf - - -def EvalBinomialPmf(k, n, p): - """Evaluates the binomial PMF. - - Returns the probabily of k successes in n trials with probability p. - """ - return stats.binom.pmf(k, n, p) - - -def EvalHypergeomPmf(k, N, K, n): - """Evaluates the hypergeometric PMF. - - Returns the probabily of k successes in n trials from a population - N with K successes in it. - """ - return stats.hypergeom.pmf(k, N, K, n) - - -def EvalPoissonPmf(k, lam): - """Computes the Poisson PMF. - - k: number of events - lam: parameter lambda in events per unit time - - returns: float probability - """ - # don't use the scipy function (yet). for lam=0 it returns NaN; - # should be 0.0 - # return stats.poisson.pmf(k, lam) - return lam ** k * math.exp(-lam) / special.gamma(k+1) - - -def MakePoissonPmf(lam, high, step=1): - """Makes a PMF discrete approx to a Poisson distribution. - - lam: parameter lambda in events per unit time - high: upper bound of the Pmf - - returns: normalized Pmf - """ - pmf = Pmf() - for k in range(0, high + 1, step): - p = EvalPoissonPmf(k, lam) - pmf.Set(k, p) - pmf.Normalize() - return pmf - - -def EvalExponentialPdf(x, lam): - """Computes the exponential PDF. - - x: value - lam: parameter lambda in events per unit time - - returns: float probability density - """ - return lam * math.exp(-lam * x) - - -def EvalExponentialCdf(x, lam): - """Evaluates CDF of the exponential distribution with parameter lam.""" - return 1 - math.exp(-lam * x) - - -def MakeExponentialPmf(lam, high, n=200): - """Makes a PMF discrete approx to an exponential distribution. - - lam: parameter lambda in events per unit time - high: upper bound - n: number of values in the Pmf - - returns: normalized Pmf - """ - pmf = Pmf() - for x in np.linspace(0, high, n): - p = EvalExponentialPdf(x, lam) - pmf.Set(x, p) - pmf.Normalize() - return pmf - - -def StandardNormalCdf(x): - """Evaluates the CDF of the standard Normal distribution. - - See http://en.wikipedia.org/wiki/Normal_distribution - #Cumulative_distribution_function - - Args: - x: float - - Returns: - float - """ - return (math.erf(x / ROOT2) + 1) / 2 - - -def EvalNormalCdf(x, mu=0, sigma=1): - """Evaluates the CDF of the normal distribution. - - Args: - x: float - - mu: mean parameter - - sigma: standard deviation parameter - - Returns: - float - """ - return stats.norm.cdf(x, loc=mu, scale=sigma) - - -def EvalNormalCdfInverse(p, mu=0, sigma=1): - """Evaluates the inverse CDF of the normal distribution. - - See http://en.wikipedia.org/wiki/Normal_distribution#Quantile_function - - Args: - p: float - - mu: mean parameter - - sigma: standard deviation parameter - - Returns: - float - """ - return stats.norm.ppf(p, loc=mu, scale=sigma) - - -def EvalLognormalCdf(x, mu=0, sigma=1): - """Evaluates the CDF of the lognormal distribution. - - x: float or sequence - mu: mean parameter - sigma: standard deviation parameter - - Returns: float or sequence - """ - return stats.lognorm.cdf(x, loc=mu, scale=sigma) - - -def RenderExpoCdf(lam, low, high, n=101): - """Generates sequences of xs and ps for an exponential CDF. - - lam: parameter - low: float - high: float - n: number of points to render - - returns: numpy arrays (xs, ps) - """ - xs = np.linspace(low, high, n) - ps = 1 - np.exp(-lam * xs) - #ps = stats.expon.cdf(xs, scale=1.0/lam) - return xs, ps - - -def RenderNormalCdf(mu, sigma, low, high, n=101): - """Generates sequences of xs and ps for a Normal CDF. - - mu: parameter - sigma: parameter - low: float - high: float - n: number of points to render - - returns: numpy arrays (xs, ps) - """ - xs = np.linspace(low, high, n) - ps = stats.norm.cdf(xs, mu, sigma) - return xs, ps - - -def RenderParetoCdf(xmin, alpha, low, high, n=50): - """Generates sequences of xs and ps for a Pareto CDF. - - xmin: parameter - alpha: parameter - low: float - high: float - n: number of points to render - - returns: numpy arrays (xs, ps) - """ - if low < xmin: - low = xmin - xs = np.linspace(low, high, n) - ps = 1 - (xs / xmin) ** -alpha - #ps = stats.pareto.cdf(xs, scale=xmin, b=alpha) - return xs, ps - - -class Beta(object): - """Represents a Beta distribution. - - See http://en.wikipedia.org/wiki/Beta_distribution - """ - - def __init__(self, alpha=1, beta=1, label=None): - """Initializes a Beta distribution.""" - self.alpha = alpha - self.beta = beta - self.label = label if label is not None else '_nolegend_' - - def Update(self, data): - """Updates a Beta distribution. - - data: pair of int (heads, tails) - """ - heads, tails = data - self.alpha += heads - self.beta += tails - - def Mean(self): - """Computes the mean of this distribution.""" - return self.alpha / (self.alpha + self.beta) - - def Random(self): - """Generates a random variate from this distribution.""" - return random.betavariate(self.alpha, self.beta) - - def Sample(self, n): - """Generates a random sample from this distribution. - - n: int sample size - """ - size = n, - return np.random.beta(self.alpha, self.beta, size) - - def EvalPdf(self, x): - """Evaluates the PDF at x.""" - return x ** (self.alpha - 1) * (1 - x) ** (self.beta - 1) - - def MakePmf(self, steps=101, label=None): - """Returns a Pmf of this distribution. - - Note: Normally, we just evaluate the PDF at a sequence - of points and treat the probability density as a probability - mass. - - But if alpha or beta is less than one, we have to be - more careful because the PDF goes to infinity at x=0 - and x=1. In that case we evaluate the CDF and compute - differences. - """ - if self.alpha < 1 or self.beta < 1: - cdf = self.MakeCdf() - pmf = cdf.MakePmf() - return pmf - - xs = [i / (steps - 1.0) for i in range(steps)] - probs = [self.EvalPdf(x) for x in xs] - pmf = Pmf(dict(zip(xs, probs)), label=label) - return pmf - - def MakeCdf(self, steps=101): - """Returns the CDF of this distribution.""" - xs = [i / (steps - 1.0) for i in range(steps)] - ps = [special.betainc(self.alpha, self.beta, x) for x in xs] - cdf = Cdf(xs, ps) - return cdf - - -class Dirichlet(object): - """Represents a Dirichlet distribution. - - See http://en.wikipedia.org/wiki/Dirichlet_distribution - """ - - def __init__(self, n, conc=1, label=None): - """Initializes a Dirichlet distribution. - - n: number of dimensions - conc: concentration parameter (smaller yields more concentration) - label: string label - """ - if n < 2: - raise ValueError('A Dirichlet distribution with ' - 'n<2 makes no sense') - - self.n = n - self.params = np.ones(n, dtype=np.float) * conc - self.label = label if label is not None else '_nolegend_' - - def Update(self, data): - """Updates a Dirichlet distribution. - - data: sequence of observations, in order corresponding to params - """ - m = len(data) - self.params[:m] += data - - def Random(self): - """Generates a random variate from this distribution. - - Returns: normalized vector of fractions - """ - p = np.random.gamma(self.params) - return p / p.sum() - - def Likelihood(self, data): - """Computes the likelihood of the data. - - Selects a random vector of probabilities from this distribution. - - Returns: float probability - """ - m = len(data) - if self.n < m: - return 0 - - x = data - p = self.Random() - q = p[:m] ** x - return q.prod() - - def LogLikelihood(self, data): - """Computes the log likelihood of the data. - - Selects a random vector of probabilities from this distribution. - - Returns: float log probability - """ - m = len(data) - if self.n < m: - return float('-inf') - - x = self.Random() - y = np.log(x[:m]) * data - return y.sum() - - def MarginalBeta(self, i): - """Computes the marginal distribution of the ith element. - - See http://en.wikipedia.org/wiki/Dirichlet_distribution - #Marginal_distributions - - i: int - - Returns: Beta object - """ - alpha0 = self.params.sum() - alpha = self.params[i] - return Beta(alpha, alpha0 - alpha) - - def PredictivePmf(self, xs, label=None): - """Makes a predictive distribution. - - xs: values to go into the Pmf - - Returns: Pmf that maps from x to the mean prevalence of x - """ - alpha0 = self.params.sum() - ps = self.params / alpha0 - return Pmf(zip(xs, ps), label=label) - - -def BinomialCoef(n, k): - """Compute the binomial coefficient "n choose k". - - n: number of trials - k: number of successes - - Returns: float - """ - return scipy.misc.comb(n, k) - - -def LogBinomialCoef(n, k): - """Computes the log of the binomial coefficient. - - http://math.stackexchange.com/questions/64716/ - approximating-the-logarithm-of-the-binomial-coefficient - - n: number of trials - k: number of successes - - Returns: float - """ - return n * math.log(n) - k * math.log(k) - (n - k) * math.log(n - k) - - -def NormalProbability(ys, jitter=0.0): - """Generates data for a normal probability plot. - - ys: sequence of values - jitter: float magnitude of jitter added to the ys - - returns: numpy arrays xs, ys - """ - n = len(ys) - xs = np.random.normal(0, 1, n) - xs.sort() - - if jitter: - ys = Jitter(ys, jitter) - else: - ys = np.array(ys) - ys.sort() - - return xs, ys - - -def Jitter(values, jitter=0.5): - """Jitters the values by adding a uniform variate in (-jitter, jitter). - - values: sequence - jitter: scalar magnitude of jitter - - returns: new numpy array - """ - n = len(values) - return np.random.uniform(-jitter, +jitter, n) + values - - -def NormalProbabilityPlot(sample, fit_color='0.8', **options): - """Makes a normal probability plot with a fitted line. - - sample: sequence of numbers - fit_color: color string for the fitted line - options: passed along to Plot - """ - xs, ys = NormalProbability(sample) - mean, var = MeanVar(sample) - std = math.sqrt(var) - - fit = FitLine(xs, mean, std) - thinkplot.Plot(*fit, color=fit_color, label='model') - - xs, ys = NormalProbability(sample) - thinkplot.Plot(xs, ys, **options) - - -def Mean(xs): - """Computes mean. - - xs: sequence of values - - returns: float mean - """ - return np.mean(xs) - - -def Var(xs, mu=None, ddof=0): - """Computes variance. - - xs: sequence of values - mu: option known mean - ddof: delta degrees of freedom - - returns: float - """ - xs = np.asarray(xs) - - if mu is None: - mu = xs.mean() - - ds = xs - mu - return np.dot(ds, ds) / (len(xs) - ddof) - - -def Std(xs, mu=None, ddof=0): - """Computes standard deviation. - - xs: sequence of values - mu: option known mean - ddof: delta degrees of freedom - - returns: float - """ - var = Var(xs, mu, ddof) - return math.sqrt(var) - - -def MeanVar(xs, ddof=0): - """Computes mean and variance. - - Based on http://stackoverflow.com/questions/19391149/ - numpy-mean-and-variance-from-single-function - - xs: sequence of values - ddof: delta degrees of freedom - - returns: pair of float, mean and var - """ - xs = np.asarray(xs) - mean = xs.mean() - s2 = Var(xs, mean, ddof) - return mean, s2 - - -def Trim(t, p=0.01): - """Trims the largest and smallest elements of t. - - Args: - t: sequence of numbers - p: fraction of values to trim off each end - - Returns: - sequence of values - """ - n = int(p * len(t)) - t = sorted(t)[n:-n] - return t - - -def TrimmedMean(t, p=0.01): - """Computes the trimmed mean of a sequence of numbers. - - Args: - t: sequence of numbers - p: fraction of values to trim off each end - - Returns: - float - """ - t = Trim(t, p) - return Mean(t) - - -def TrimmedMeanVar(t, p=0.01): - """Computes the trimmed mean and variance of a sequence of numbers. - - Side effect: sorts the list. - - Args: - t: sequence of numbers - p: fraction of values to trim off each end - - Returns: - float - """ - t = Trim(t, p) - mu, var = MeanVar(t) - return mu, var - - -def CohenEffectSize(group1, group2): - """Compute Cohen's d. - - group1: Series or NumPy array - group2: Series or NumPy array - - returns: float - """ - diff = group1.mean() - group2.mean() - - n1, n2 = len(group1), len(group2) - var1 = group1.var() - var2 = group2.var() - - pooled_var = (n1 * var1 + n2 * var2) / (n1 + n2) - d = diff / math.sqrt(pooled_var) - return d - - -def Cov(xs, ys, meanx=None, meany=None): - """Computes Cov(X, Y). - - Args: - xs: sequence of values - ys: sequence of values - meanx: optional float mean of xs - meany: optional float mean of ys - - Returns: - Cov(X, Y) - """ - xs = np.asarray(xs) - ys = np.asarray(ys) - - if meanx is None: - meanx = np.mean(xs) - if meany is None: - meany = np.mean(ys) - - cov = np.dot(xs-meanx, ys-meany) / len(xs) - return cov - - -def Corr(xs, ys): - """Computes Corr(X, Y). - - Args: - xs: sequence of values - ys: sequence of values - - Returns: - Corr(X, Y) - """ - xs = np.asarray(xs) - ys = np.asarray(ys) - - meanx, varx = MeanVar(xs) - meany, vary = MeanVar(ys) - - corr = Cov(xs, ys, meanx, meany) / math.sqrt(varx * vary) - - return corr - - -def SerialCorr(series, lag=1): - """Computes the serial correlation of a series. - - series: Series - lag: integer number of intervals to shift - - returns: float correlation - """ - xs = series[lag:] - ys = series.shift(lag)[lag:] - corr = Corr(xs, ys) - return corr - - -def SpearmanCorr(xs, ys): - """Computes Spearman's rank correlation. - - Args: - xs: sequence of values - ys: sequence of values - - Returns: - float Spearman's correlation - """ - xranks = pandas.Series(xs).rank() - yranks = pandas.Series(ys).rank() - return Corr(xranks, yranks) - - -def MapToRanks(t): - """Returns a list of ranks corresponding to the elements in t. - - Args: - t: sequence of numbers - - Returns: - list of integer ranks, starting at 1 - """ - # pair up each value with its index - pairs = enumerate(t) - - # sort by value - sorted_pairs = sorted(pairs, key=itemgetter(1)) - - # pair up each pair with its rank - ranked = enumerate(sorted_pairs) - - # sort by index - resorted = sorted(ranked, key=lambda trip: trip[1][0]) - - # extract the ranks - ranks = [trip[0]+1 for trip in resorted] - return ranks - - -def LeastSquares(xs, ys): - """Computes a linear least squares fit for ys as a function of xs. - - Args: - xs: sequence of values - ys: sequence of values - - Returns: - tuple of (intercept, slope) - """ - meanx, varx = MeanVar(xs) - meany = Mean(ys) - - slope = Cov(xs, ys, meanx, meany) / varx - inter = meany - slope * meanx - - return inter, slope - - -def FitLine(xs, inter, slope): - """Fits a line to the given data. - - xs: sequence of x - - returns: tuple of numpy arrays (sorted xs, fit ys) - """ - fit_xs = np.sort(xs) - fit_ys = inter + slope * fit_xs - return fit_xs, fit_ys - - -def Residuals(xs, ys, inter, slope): - """Computes residuals for a linear fit with parameters inter and slope. - - Args: - xs: independent variable - ys: dependent variable - inter: float intercept - slope: float slope - - Returns: - list of residuals - """ - xs = np.asarray(xs) - ys = np.asarray(ys) - res = ys - (inter + slope * xs) - return res - - -def CoefDetermination(ys, res): - """Computes the coefficient of determination (R^2) for given residuals. - - Args: - ys: dependent variable - res: residuals - - Returns: - float coefficient of determination - """ - return 1 - Var(res) / Var(ys) - - -def CorrelatedGenerator(rho): - """Generates standard normal variates with serial correlation. - - rho: target coefficient of correlation - - Returns: iterable - """ - x = random.gauss(0, 1) - yield x - - sigma = math.sqrt(1 - rho**2) - while True: - x = random.gauss(x * rho, sigma) - yield x - - -def CorrelatedNormalGenerator(mu, sigma, rho): - """Generates normal variates with serial correlation. - - mu: mean of variate - sigma: standard deviation of variate - rho: target coefficient of correlation - - Returns: iterable - """ - for x in CorrelatedGenerator(rho): - yield x * sigma + mu - - -def RawMoment(xs, k): - """Computes the kth raw moment of xs. - """ - return sum(x**k for x in xs) / len(xs) - - -def CentralMoment(xs, k): - """Computes the kth central moment of xs. - """ - mean = RawMoment(xs, 1) - return sum((x - mean)**k for x in xs) / len(xs) - - -def StandardizedMoment(xs, k): - """Computes the kth standardized moment of xs. - """ - var = CentralMoment(xs, 2) - std = math.sqrt(var) - return CentralMoment(xs, k) / std**k - - -def Skewness(xs): - """Computes skewness. - """ - return StandardizedMoment(xs, 3) - - -def Median(xs): - """Computes the median (50th percentile) of a sequence. - - xs: sequence or anything else that can initialize a Cdf - - returns: float - """ - cdf = Cdf(xs) - return cdf.Value(0.5) - - -def IQR(xs): - """Computes the interquartile of a sequence. - - xs: sequence or anything else that can initialize a Cdf - - returns: pair of floats - """ - cdf = Cdf(xs) - return cdf.Value(0.25), cdf.Value(0.75) - - -def PearsonMedianSkewness(xs): - """Computes the Pearson median skewness. - """ - median = Median(xs) - mean = RawMoment(xs, 1) - var = CentralMoment(xs, 2) - std = math.sqrt(var) - gp = 3 * (mean - median) / std - return gp - - -class FixedWidthVariables(object): - """Represents a set of variables in a fixed width file.""" - - def __init__(self, variables, index_base=0): - """Initializes. - - variables: DataFrame - index_base: are the indices 0 or 1 based? - - Attributes: - colspecs: list of (start, end) index tuples - names: list of string variable names - """ - self.variables = variables - - # note: by default, subtract 1 from colspecs - self.colspecs = variables[['start', 'end']] - index_base - - # convert colspecs to a list of pair of int - self.colspecs = self.colspecs.astype(np.int).values.tolist() - self.names = variables['name'] - - def ReadFixedWidth(self, filename, **options): - """Reads a fixed width ASCII file. - - filename: string filename - - returns: DataFrame - """ - df = pandas.read_fwf(filename, - colspecs=self.colspecs, - names=self.names, - **options) - return df - - -def ReadStataDct(dct_file, **options): - """Reads a Stata dictionary file. - - dct_file: string filename - options: dict of options passed to open() - - returns: FixedWidthVariables object - """ - type_map = dict(byte=int, int=int, long=int, float=float, double=float) - - var_info = [] - for line in open(dct_file, **options): - match = re.search(r'_column\(([^)]*)\)', line) - if match: - start = int(match.group(1)) - t = line.split() - vtype, name, fstring = t[1:4] - name = name.lower() - if vtype.startswith('str'): - vtype = str - else: - vtype = type_map[vtype] - long_desc = ' '.join(t[4:]).strip('"') - var_info.append((start, vtype, name, fstring, long_desc)) - - columns = ['start', 'type', 'name', 'fstring', 'desc'] - variables = pandas.DataFrame(var_info, columns=columns) - - # fill in the end column by shifting the start column - variables['end'] = variables.start.shift(-1) - variables.loc[len(variables)-1, 'end'] = 0 - - dct = FixedWidthVariables(variables, index_base=1) - return dct - - -def Resample(xs, n=None): - """Draw a sample from xs with the same length as xs. - - xs: sequence - n: sample size (default: len(xs)) - - returns: NumPy array - """ - if n is None: - n = len(xs) - return np.random.choice(xs, n, replace=True) - - -def SampleRows(df, nrows, replace=False): - """Choose a sample of rows from a DataFrame. - - df: DataFrame - nrows: number of rows - replace: whether to sample with replacement - - returns: DataDf - """ - indices = np.random.choice(df.index, nrows, replace=replace) - sample = df.loc[indices] - return sample - - -def ResampleRows(df): - """Resamples rows from a DataFrame. - - df: DataFrame - - returns: DataFrame - """ - return SampleRows(df, len(df), replace=True) - - -def ResampleRowsWeighted(df, column='finalwgt'): - """Resamples a DataFrame using probabilities proportional to given column. - - df: DataFrame - column: string column name to use as weights - - returns: DataFrame - """ - weights = df[column] - cdf = Cdf(dict(weights)) - indices = cdf.Sample(len(weights)) - sample = df.loc[indices] - return sample - - -def PercentileRow(array, p): - """Selects the row from a sorted array that maps to percentile p. - - p: float 0--100 - - returns: NumPy array (one row) - """ - rows, cols = array.shape - index = int(rows * p / 100) - return array[index, ] - - -def PercentileRows(ys_seq, percents): - """Given a collection of lines, selects percentiles along vertical axis. - - For example, if ys_seq contains simulation results like ys as a - function of time, and percents contains (5, 95), the result would - be a 90% CI for each vertical slice of the simulation results. - - ys_seq: sequence of lines (y values) - percents: list of percentiles (0-100) to select - - returns: list of NumPy arrays, one for each percentile - """ - nrows = len(ys_seq) - ncols = len(ys_seq[0]) - array = np.zeros((nrows, ncols)) - - for i, ys in enumerate(ys_seq): - array[i, ] = ys - - array = np.sort(array, axis=0) - - rows = [PercentileRow(array, p) for p in percents] - return rows - - -def Smooth(xs, sigma=2, **options): - """Smooths a NumPy array with a Gaussian filter. - - xs: sequence - sigma: standard deviation of the filter - """ - return ndimage.filters.gaussian_filter1d(xs, sigma, **options) - - -class HypothesisTest(object): - """Represents a hypothesis test.""" - - def __init__(self, data): - """Initializes. - - data: data in whatever form is relevant - """ - self.data = data - self.MakeModel() - self.actual = self.TestStatistic(data) - self.test_stats = None - self.test_cdf = None - - def PValue(self, iters=1000): - """Computes the distribution of the test statistic and p-value. - - iters: number of iterations - - returns: float p-value - """ - self.test_stats = [self.TestStatistic(self.RunModel()) - for _ in range(iters)] - self.test_cdf = Cdf(self.test_stats) - - count = sum(1 for x in self.test_stats if x >= self.actual) - return count / iters - - def MaxTestStat(self): - """Returns the largest test statistic seen during simulations. - """ - return max(self.test_stats) - - def PlotCdf(self, label=None): - """Draws a Cdf with vertical lines at the observed test stat. - """ - def VertLine(x): - """Draws a vertical line at x.""" - thinkplot.Plot([x, x], [0, 1], color='0.8') - - VertLine(self.actual) - thinkplot.Cdf(self.test_cdf, label=label) - - def TestStatistic(self, data): - """Computes the test statistic. - - data: data in whatever form is relevant - """ - raise UnimplementedMethodException() - - def MakeModel(self): - """Build a model of the null hypothesis. - """ - pass - - def RunModel(self): - """Run the model of the null hypothesis. - - returns: simulated data - """ - raise UnimplementedMethodException() - - -def main(): - pass - - -if __name__ == '__main__': - main() -""" Now that the user can read in a file this creates a model which uses the price, class and gender -Author : AstroDave -Date : 18th September 2012 -Revised : 28 March 2014 - -""" - - -import csv as csv -import numpy as np - -csv_file_object = csv.reader( - open('train.csv', 'rb')) # Load in the csv file -# Skip the fist line as it is a header -header = csv_file_object.next() -# Create a variable to hold the data -data = [] - -for row in csv_file_object: # Skip through each row in the csv file - # adding each row to the data variable - data.append(row) -# Then convert from a list to an array -data = np.array(data) - -# In order to analyse the price column I need to bin up that data -# here are my binning parameters, the problem we face is some of the fares are very large -# So we can either have a lot of bins with nothing in them or we can just lose some -# information by just considering that anythng over 39 is simply in the last bin. -# So we add a ceiling -fare_ceiling = 40 -# then modify the data in the Fare column to = 39, if it is greater or equal to the ceiling -data[data[0::, 9].astype(np.float) >= fare_ceiling, 9] = fare_ceiling - 1.0 - -fare_bracket_size = 10 -number_of_price_brackets = fare_ceiling / fare_bracket_size -# I know there were 1st, 2nd and 3rd classes on board. -number_of_classes = 3 -# But it's better practice to calculate this from the Pclass directly: -number_of_classes = len(np.unique(data[0::, 2])) -# just take the length of an array of UNIQUE values in column index 2 - - -# This reference matrix will show the proportion of survivors as a sorted table of -# gender, class and ticket fare. -# First initialize it with all zeros -survival_table = np.zeros( - [2, number_of_classes, number_of_price_brackets], float) - -# I can now find the stats of all the women and men on board -for i in xrange(number_of_classes): - for j in xrange(number_of_price_brackets): - - women_only_stats = data[(data[0::, 4] == "female") - & (data[0::, 2].astype(np.float) == i+1) - & (data[0:, 9].astype(np.float) >= j*fare_bracket_size) - & (data[0:, 9].astype(np.float) < (j+1)*fare_bracket_size), 1] - - men_only_stats = data[(data[0::, 4] != "female") - & (data[0::, 2].astype(np.float) == i+1) - & (data[0:, 9].astype(np.float) >= j*fare_bracket_size) - & (data[0:, 9].astype(np.float) < (j+1)*fare_bracket_size), 1] - - # if i == 0 and j == 3: - - survival_table[0, i, j] = np.mean( - women_only_stats.astype(np.float)) # Female stats - survival_table[1, i, j] = np.mean( - men_only_stats.astype(np.float)) # Male stats - -# Since in python if it tries to find the mean of an array with nothing in it -# (such that the denominator is 0), then it returns nan, we can convert these to 0 -# by just saying where does the array not equal the array, and set these to 0. -survival_table[survival_table != survival_table] = 0. - -# Now I have my proportion of survivors, simply round them such that if <0.5 -# I predict they dont surivive, and if >= 0.5 they do -survival_table[survival_table < 0.5] = 0 -survival_table[survival_table >= 0.5] = 1 - -# Now I have my indicator I can read in the test file and write out -# if a women then survived(1) if a man then did not survived (0) -# First read in test -test_file = open('test.csv', 'rb') -test_file_object = csv.reader(test_file) -header = test_file_object.next() - -# Also open the a new file so I can write to it. -predictions_file = open("genderclassmodel.csv", "wb") -predictions_file_object = csv.writer(predictions_file) -predictions_file_object.writerow(["PassengerId", "Survived"]) - -# First thing to do is bin up the price file -for row in test_file_object: - for j in xrange(number_of_price_brackets): - # If there is no fare then place the price of the ticket according to class - try: - # No fare recorded will come up as a string so - row[8] = float(row[8]) - # try to make it a float - except: # If fails then just bin the fare according to the class - bin_fare = 3 - float(row[1]) - break # Break from the loop and move to the next row - if row[8] > fare_ceiling: # Otherwise now test to see if it is higher - # than the fare ceiling we set earlier - bin_fare = number_of_price_brackets - 1 - break # And then break to the next row - - if row[8] >= j*fare_bracket_size\ - and row[8] < (j+1)*fare_bracket_size: # If passed these tests then loop through - # each bin until you find the right one - # append it to the bin_fare - # and move to the next loop - bin_fare = j - break - # Now I have the binned fare, passenger class, and whether female or male, we can - # just cross ref their details with our survival table - if row[3] == 'female': - predictions_file_object.writerow( - [row[0], "%d" % int(survival_table[0, float(row[1]) - 1, bin_fare])]) - else: - predictions_file_object.writerow( - [row[0], "%d" % int(survival_table[1, float(row[1]) - 1, bin_fare])]) - -# Close out the files -test_file.close() -predictions_file.close() -""" This simple code is desinged to teach a basic user to read in the files in python, simply find what proportion of males and females survived and make a predictive model based on this -Author : AstroDave -Date : 18 September 2012 -Revised: 28 March 2014 - -""" - - -import csv as csv -import numpy as np - -csv_file_object = csv.reader(open('train.csv', 'rb')) # Load in the csv file -header = csv_file_object.next() # Skip the fist line as it is a header -data = [] # Create a variable to hold the data - -for row in csv_file_object: # Skip through each row in the csv file, - data.append(row[0:]) # adding each row to the data variable -data = np.array(data) # Then convert from a list to an array. - -# Now I have an array of 12 columns and 891 rows -# I can access any element I want, so the entire first column would -# be data[0::,0].astype(np.float) -- This means all of the rows (from start to end), in column 0 -# I have to add the .astype() command, because -# when appending the rows, python thought it was a string - so needed to convert - -# Set some variables -number_passengers = np.size(data[0::, 1].astype(np.float)) -number_survived = np.sum(data[0::, 1].astype(np.float)) -proportion_survivors = number_survived / number_passengers - -# I can now find the stats of all the women on board, -# by making an array that lists True/False whether each row is female -# This finds where all the women are -women_only_stats = data[0::, 4] == "female" -# This finds where all the men are (note != means 'not equal') -men_only_stats = data[0::, 4] != "female" - -# I can now filter the whole data, to find statistics for just women, by just placing -# women_only_stats as a "mask" on my full data -- Use it in place of the '0::' part of the array index. -# You can test it by placing it there, and requesting column index [4], and the output should all read 'female' -# e.g. try typing this: data[women_only_stats,4] -women_onboard = data[women_only_stats, 1].astype(np.float) -men_onboard = data[men_only_stats, 1].astype(np.float) - -# and derive some statistics about them -proportion_women_survived = np.sum(women_onboard) / np.size(women_onboard) -proportion_men_survived = np.sum(men_onboard) / np.size(men_onboard) - -print 'Proportion of women who survived is %s' % proportion_women_survived -print 'Proportion of men who survived is %s' % proportion_men_survived - -# Now that I have my indicator that women were much more likely to survive, -# I am done with the training set. -# Now I will read in the test file and write out my simplistic prediction: -# if female, then model that she survived (1) -# if male, then model that he did not survive (0) - -# First, read in test.csv -test_file = open('test.csv', 'rb') -test_file_object = csv.reader(test_file) -header = test_file_object.next() - -# Also open the a new file so I can write to it. Call it something descriptive -# Finally, loop through each row in the train file, and look in column index [3] (which is 'Sex') -# Write out the PassengerId, and my prediction. - -predictions_file = open("gendermodel.csv", "wb") -predictions_file_object = csv.writer(predictions_file) -predictions_file_object.writerow( - ["PassengerId", "Survived"]) # write the column headers -for row in test_file_object: # For each row in test file, - if row[3] == 'female': # is it a female, if yes then - # write the PassengerId, and predict 1 - predictions_file_object.writerow([row[0], "1"]) - else: # or else if male, - # write the PassengerId, and predict 0. - predictions_file_object.writerow([row[0], "0"]) -test_file.close() # Close out the files. -predictions_file.close() -""" Writing my first randomforest code. -Author : AstroDave -Date : 23rd September 2012 -Revised: 15 April 2014 -please see packages.python.org/milk/randomforests.html for more - -""" -import pandas as pd -import numpy as np -import csv as csv -from sklearn.ensemble import RandomForestClassifier - -# Data cleanup -# TRAIN DATA -# Load the train file into a dataframe -train_df = pd.read_csv('train.csv', header=0) - -# I need to convert all strings to integer classifiers. -# I need to fill in the missing values of the data and make it complete. - -# female = 0, Male = 1 -train_df['Gender'] = train_df['Sex'].map({'female': 0, 'male': 1}).astype(int) - -# Embarked from 'C', 'Q', 'S' -# Note this is not ideal: in translating categories to numbers, Port "2" is not 2 times greater than Port "1", etc. - -# All missing Embarked -> just make them embark from most common place -if len(train_df.Embarked[train_df.Embarked.isnull()]) > 0: - train_df.Embarked[train_df.Embarked.isnull( - )] = train_df.Embarked.dropna().mode().values - -# determine all values of Embarked, -Ports = list(enumerate(np.unique(train_df['Embarked']))) -# set up a dictionary in the form Ports : index -Ports_dict = {name: i for i, name in Ports} -train_df.Embarked = train_df.Embarked.map(lambda x: Ports_dict[x]).astype( - int) # Convert all Embark strings to int - -# All the ages with no data -> make the median of all Ages -median_age = train_df['Age'].dropna().median() -if len(train_df.Age[train_df.Age.isnull()]) > 0: - train_df.loc[(train_df.Age.isnull()), 'Age'] = median_age - -# Remove the Name column, Cabin, Ticket, and Sex (since I copied and filled it to Gender) -train_df = train_df.drop( - ['Name', 'Sex', 'Ticket', 'Cabin', 'PassengerId'], axis=1) - - -# TEST DATA -# Load the test file into a dataframe -test_df = pd.read_csv('test.csv', header=0) - -# I need to do the same with the test data now, so that the columns are the same as the training data -# I need to convert all strings to integer classifiers: -# female = 0, Male = 1 -test_df['Gender'] = test_df['Sex'].map({'female': 0, 'male': 1}).astype(int) - -# Embarked from 'C', 'Q', 'S' -# All missing Embarked -> just make them embark from most common place -if len(test_df.Embarked[test_df.Embarked.isnull()]) > 0: - test_df.Embarked[test_df.Embarked.isnull( - )] = test_df.Embarked.dropna().mode().values -# Again convert all Embarked strings to int -test_df.Embarked = test_df.Embarked.map(lambda x: Ports_dict[x]).astype(int) - - -# All the ages with no data -> make the median of all Ages -median_age = test_df['Age'].dropna().median() -if len(test_df.Age[test_df.Age.isnull()]) > 0: - test_df.loc[(test_df.Age.isnull()), 'Age'] = median_age - -# All the missing Fares -> assume median of their respective class -if len(test_df.Fare[test_df.Fare.isnull()]) > 0: - median_fare = np.zeros(3) - for f in range(0, 3): # loop 0 to 2 - median_fare[f] = test_df[test_df.Pclass == - f+1]['Fare'].dropna().median() - for f in range(0, 3): # loop 0 to 2 - test_df.loc[(test_df.Fare.isnull()) & ( - test_df.Pclass == f+1), 'Fare'] = median_fare[f] - -# Collect the test data's PassengerIds before dropping it -ids = test_df['PassengerId'].values -# Remove the Name column, Cabin, Ticket, and Sex (since I copied and filled it to Gender) -test_df = test_df.drop( - ['Name', 'Sex', 'Ticket', 'Cabin', 'PassengerId'], axis=1) - - -# The data is now ready to go. So lets fit to the train, then predict to the test! -# Convert back to a numpy array -train_data = train_df.values -test_data = test_df.values - - -print 'Training...' -forest = RandomForestClassifier(n_estimators=100) -forest = forest.fit(train_data[0::, 1::], train_data[0::, 0]) - -print 'Predicting...' -output = forest.predict(test_data).astype(int) - - -predictions_file = open("myfirstforest.csv", "wb") -open_file_object = csv.writer(predictions_file) -open_file_object.writerow(["PassengerId", "Survived"]) -open_file_object.writerows(zip(ids, output)) -predictions_file.close() -print 'Done.' -import numpy as np -import re -import itertools -from collections import Counter -""" -Original taken from https://github.com/dennybritz/cnn-text-classification-tf -""" - - -def clean_str(string): - """ - Tokenization/string cleaning for all datasets except for SST. - Original taken from https://github.com/yoonkim/CNN_sentence/blob/master/process_data.py - """ - string = re.sub(r"[^A-Za-z0-9(),!?\'\`]", " ", string) - string = re.sub(r"\'s", " \'s", string) - string = re.sub(r"\'ve", " \'ve", string) - string = re.sub(r"n\'t", " n\'t", string) - string = re.sub(r"\'re", " \'re", string) - string = re.sub(r"\'d", " \'d", string) - string = re.sub(r"\'ll", " \'ll", string) - string = re.sub(r",", " , ", string) - string = re.sub(r"!", " ! ", string) - string = re.sub(r"\(", " \( ", string) - string = re.sub(r"\)", " \) ", string) - string = re.sub(r"\?", " \? ", string) - string = re.sub(r"\s{2,}", " ", string) - return string.strip().lower() - - -def load_data_and_labels(): - """ - Loads MR polarity data from files, splits the data into words and generates labels. - Returns split sentences and labels. - """ - # Load data from files - positive_examples = list( - open("./data/rt-polarity.pos", encoding='ISO-8859-1').readlines()) - positive_examples = [s.strip() for s in positive_examples] - negative_examples = list( - open("./data/rt-polarity.neg", encoding='ISO-8859-1').readlines()) - negative_examples = [s.strip() for s in negative_examples] - # Split by words - x_text = positive_examples + negative_examples - x_text = [clean_str(sent) for sent in x_text] - x_text = [s.split(" ") for s in x_text] - # Generate labels - positive_labels = [[0, 1] for _ in positive_examples] - negative_labels = [[1, 0] for _ in negative_examples] - y = np.concatenate([positive_labels, negative_labels], 0) - return [x_text, y] - - -def pad_sentences(sentences, padding_word=""): - """ - Pads all sentences to the same length. The length is defined by the longest sentence. - Returns padded sentences. - """ - sequence_length = max(len(x) for x in sentences) - padded_sentences = [] - for i in range(len(sentences)): - sentence = sentences[i] - num_padding = sequence_length - len(sentence) - new_sentence = sentence + [padding_word] * num_padding - padded_sentences.append(new_sentence) - return padded_sentences - - -def build_vocab(sentences): - """ - Builds a vocabulary mapping from word to index based on the sentences. - Returns vocabulary mapping and inverse vocabulary mapping. - """ - # Build vocabulary - word_counts = Counter(itertools.chain(*sentences)) - # Mapping from index to word - vocabulary_inv = [x[0] for x in word_counts.most_common()] - # Mapping from word to index - vocabulary = {x: i for i, x in enumerate(vocabulary_inv)} - return [vocabulary, vocabulary_inv] - - -def build_input_data(sentences, labels, vocabulary): - """ - Maps sentencs and labels to vectors based on a vocabulary. - """ - x = np.array([[vocabulary[word] for word in sentence] - for sentence in sentences]) - y = np.array(labels) - return [x, y] - - -def load_data(): - """ - Loads and preprocessed data for the MR dataset. - Returns input vectors, labels, vocabulary, and inverse vocabulary. - """ - # Load and preprocess data - sentences, labels = load_data_and_labels() - sentences_padded = pad_sentences(sentences) - vocabulary, vocabulary_inv = build_vocab(sentences_padded) - x, y = build_input_data(sentences_padded, labels, vocabulary) - return [x, y, vocabulary, vocabulary_inv] - - -def batch_iter(data, batch_size, num_epochs): - """ - Generates a batch iterator for a dataset. - """ - data = np.array(data) - data_size = len(data) - num_batches_per_epoch = int(len(data)/batch_size) + 1 - for epoch in range(num_epochs): - # Shuffle the data at each epoch - shuffle_indices = np.random.permutation(np.arange(data_size)) - shuffled_data = data[shuffle_indices] - for batch_num in range(num_batches_per_epoch): - start_index = batch_num * batch_size - end_index = min((batch_num + 1) * batch_size, data_size) - yield shuffled_data[start_index:end_index] -from gensim.models import word2vec -from os.path import join, exists, split -import os -import numpy as np - - -def train_word2vec(sentence_matrix, vocabulary_inv, - num_features=300, min_word_count=1, context=10): - """ - Trains, saves, loads Word2Vec model - Returns initial weights for embedding layer. - - inputs: - sentence_matrix # int matrix: num_sentences x max_sentence_len - vocabulary_inv # dict {str:int} - num_features # Word vector dimensionality - min_word_count # Minimum word count - context # Context window size - """ - model_dir = 'word2vec_models' - model_name = "{:d}features_{:d}minwords_{:d}context".format( - num_features, min_word_count, context) - model_name = join(model_dir, model_name) - if exists(model_name): - embedding_model = word2vec.Word2Vec.load(model_name) - print('Loading existing Word2Vec model \'%s\'' % split(model_name)[-1]) - else: - # Set values for various parameters - num_workers = 2 # Number of threads to run in parallel - downsampling = 1e-3 # Downsample setting for frequent words - - # Initialize and train the model - print("Training Word2Vec model...") - sentences = [[vocabulary_inv[w] for w in s] for s in sentence_matrix] - embedding_model = word2vec.Word2Vec(sentences, workers=num_workers, - size=num_features, min_count=min_word_count, - window=context, sample=downsampling) - - # If we don't plan to train the model any further, calling - # init_sims will make the model much more memory-efficient. - embedding_model.init_sims(replace=True) - - # Saving the model for later use. You can load it later using Word2Vec.load() - if not exists(model_dir): - os.mkdir(model_dir) - print('Saving Word2Vec model \'%s\'' % split(model_name)[-1]) - embedding_model.save(model_name) - - # add unknown words - embedding_weights = [np.array([embedding_model[w] if w in embedding_model - else np.random.uniform(-0.25, 0.25, embedding_model.vector_size) - for w in vocabulary_inv])] - return embedding_weights - - -if __name__ == '__main__': - import data_helpers - print("Loading data...") - x, _, _, vocabulary_inv = data_helpers.load_data() - w = train_word2vec(x, vocabulary_inv) -"""Functions for downloading and reading MNIST data.""" -from __future__ import print_function -import gzip -import os -import urllib -import numpy -SOURCE_URL = 'http://yann.lecun.com/exdb/mnist/' - - -def maybe_download(filename, work_directory): - """Download the data from Yann's website, unless it's already here.""" - if not os.path.exists(work_directory): - os.mkdir(work_directory) - filepath = os.path.join(work_directory, filename) - if not os.path.exists(filepath): - filepath, _ = urllib.urlretrieve(SOURCE_URL + filename, filepath) - statinfo = os.stat(filepath) - print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') - return filepath - - -def _read32(bytestream): - dt = numpy.dtype(numpy.uint32).newbyteorder('>') - return numpy.frombuffer(bytestream.read(4), dtype=dt) - - -def extract_images(filename): - """Extract the images into a 4D uint8 numpy array [index, y, x, depth].""" - print('Extracting', filename) - with gzip.open(filename) as bytestream: - magic = _read32(bytestream) - if magic != 2051: - raise ValueError( - 'Invalid magic number %d in MNIST image file: %s' % - (magic, filename)) - num_images = _read32(bytestream) - rows = _read32(bytestream) - cols = _read32(bytestream) - buf = bytestream.read(rows * cols * num_images) - data = numpy.frombuffer(buf, dtype=numpy.uint8) - data = data.reshape(num_images, rows, cols, 1) - return data - - -def dense_to_one_hot(labels_dense, num_classes=10): - """Convert class labels from scalars to one-hot vectors.""" - num_labels = labels_dense.shape[0] - index_offset = numpy.arange(num_labels) * num_classes - labels_one_hot = numpy.zeros((num_labels, num_classes)) - labels_one_hot.flat[index_offset + labels_dense.ravel()] = 1 - return labels_one_hot - - -def extract_labels(filename, one_hot=False): - """Extract the labels into a 1D uint8 numpy array [index].""" - print('Extracting', filename) - with gzip.open(filename) as bytestream: - magic = _read32(bytestream) - if magic != 2049: - raise ValueError( - 'Invalid magic number %d in MNIST label file: %s' % - (magic, filename)) - num_items = _read32(bytestream) - buf = bytestream.read(num_items) - labels = numpy.frombuffer(buf, dtype=numpy.uint8) - if one_hot: - return dense_to_one_hot(labels) - return labels - - -class DataSet(object): - def __init__(self, images, labels, fake_data=False): - if fake_data: - self._num_examples = 10000 - else: - assert images.shape[0] == labels.shape[0], ( - "images.shape: %s labels.shape: %s" % (images.shape, - labels.shape)) - self._num_examples = images.shape[0] - # Convert shape from [num examples, rows, columns, depth] - # to [num examples, rows*columns] (assuming depth == 1) - assert images.shape[3] == 1 - images = images.reshape(images.shape[0], - images.shape[1] * images.shape[2]) - # Convert from [0, 255] -> [0.0, 1.0]. - images = images.astype(numpy.float32) - images = numpy.multiply(images, 1.0 / 255.0) - self._images = images - self._labels = labels - self._epochs_completed = 0 - self._index_in_epoch = 0 - - @property - def images(self): - return self._images - - @property - def labels(self): - return self._labels - - @property - def num_examples(self): - return self._num_examples - - @property - def epochs_completed(self): - return self._epochs_completed - - def next_batch(self, batch_size, fake_data=False): - """Return the next `batch_size` examples from this data set.""" - if fake_data: - fake_image = [1.0 for _ in xrange(784)] - fake_label = 0 - return [fake_image for _ in xrange(batch_size)], [ - fake_label for _ in xrange(batch_size)] - start = self._index_in_epoch - self._index_in_epoch += batch_size - if self._index_in_epoch > self._num_examples: - # Finished epoch - self._epochs_completed += 1 - # Shuffle the data - perm = numpy.arange(self._num_examples) - numpy.random.shuffle(perm) - self._images = self._images[perm] - self._labels = self._labels[perm] - # Start next epoch - start = 0 - self._index_in_epoch = batch_size - assert batch_size <= self._num_examples - end = self._index_in_epoch - return self._images[start:end], self._labels[start:end] - - -def read_data_sets(train_dir, fake_data=False, one_hot=False): - class DataSets(object): - pass - data_sets = DataSets() - if fake_data: - data_sets.train = DataSet([], [], fake_data=True) - data_sets.validation = DataSet([], [], fake_data=True) - data_sets.test = DataSet([], [], fake_data=True) - return data_sets - TRAIN_IMAGES = 'train-images-idx3-ubyte.gz' - TRAIN_LABELS = 'train-labels-idx1-ubyte.gz' - TEST_IMAGES = 't10k-images-idx3-ubyte.gz' - TEST_LABELS = 't10k-labels-idx1-ubyte.gz' - VALIDATION_SIZE = 5000 - local_file = maybe_download(TRAIN_IMAGES, train_dir) - train_images = extract_images(local_file) - local_file = maybe_download(TRAIN_LABELS, train_dir) - train_labels = extract_labels(local_file, one_hot=one_hot) - local_file = maybe_download(TEST_IMAGES, train_dir) - test_images = extract_images(local_file) - local_file = maybe_download(TEST_LABELS, train_dir) - test_labels = extract_labels(local_file, one_hot=one_hot) - validation_images = train_images[:VALIDATION_SIZE] - validation_labels = train_labels[:VALIDATION_SIZE] - train_images = train_images[VALIDATION_SIZE:] - train_labels = train_labels[VALIDATION_SIZE:] - data_sets.train = DataSet(train_images, train_labels) - data_sets.validation = DataSet(validation_images, validation_labels) - data_sets.test = DataSet(test_images, test_labels) - return data_sets -# Multi GPU Basic example -''' -This tutorial requires your machine to have 2 GPUs -"/cpu:0": The CPU of your machine. -"/gpu:0": The first GPU of your machine -"/gpu:1": The second GPU of your machine -''' - -import numpy as np -import tensorflow as tf -import datetime - -# Processing Units logs -log_device_placement = True - -# num of multiplications to perform -n = 10 - -''' -Example: compute A^n + B^n on 2 GPUs -Results on 8 cores with 2 GTX-980: - * Single GPU computation time: 0:00:11.277449 - * Multi GPU computation time: 0:00:07.131701 -''' -# Create random large matrix -A = np.random.rand(1e4, 1e4).astype('float32') -B = np.random.rand(1e4, 1e4).astype('float32') - -# Creates a graph to store results -c1 = [] -c2 = [] - - -def matpow(M, n): - if n < 1: # Abstract cases where n < 1 - return M - else: - return tf.matmul(M, matpow(M, n-1)) - - -''' -Single GPU computing -''' -with tf.device('/gpu:0'): - a = tf.constant(A) - b = tf.constant(B) - # compute A^n and B^n and store results in c1 - c1.append(matpow(a, n)) - c1.append(matpow(b, n)) - -with tf.device('/cpu:0'): - sum = tf.add_n(c1) # Addition of all elements in c1, i.e. A^n + B^n - -t1_1 = datetime.datetime.now() -with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess: - # Runs the op. - sess.run(sum) -t2_1 = datetime.datetime.now() - - -''' -Multi GPU computing -''' -# GPU:0 computes A^n -with tf.device('/gpu:0'): - # compute A^n and store result in c2 - a = tf.constant(A) - c2.append(matpow(a, n)) - -# GPU:1 computes B^n -with tf.device('/gpu:1'): - # compute B^n and store result in c2 - b = tf.constant(B) - c2.append(matpow(b, n)) - -with tf.device('/cpu:0'): - sum = tf.add_n(c2) # Addition of all elements in c2, i.e. A^n + B^n - -t1_2 = datetime.datetime.now() -with tf.Session(config=tf.ConfigProto(log_device_placement=log_device_placement)) as sess: - # Runs the op. - sess.run(sum) -t2_2 = datetime.datetime.now() - - -print "Single GPU computation time: " + str(t2_1-t1_1) -print "Multi GPU computation time: " + str(t2_2-t1_2) -from nose.tools import assert_equal -from ..transform_util import TransformUtil - - -class TestTransformUtil(): - - states = [' Alabama ', 'Georgia!', 'Georgia', 'georgia', - 'FlOrIda', 'south carolina##', 'West virginia?'] - - expected_output = ['Alabama', - 'Georgia', - 'Georgia', - 'Georgia', - 'Florida', - 'South Carolina', - 'West Virginia'] - - def test_remove_punctuation(self): - assert_equal(TransformUtil.remove_punctuation('!#?'), '') - - def test_map_remove_punctuation(self): - # Map applies a function to a collection - output = map(TransformUtil.remove_punctuation, self.states) - assert_equal('!#?' not in output, True) - - def test_clean_strings(self): - clean_ops = [str.strip, TransformUtil.remove_punctuation, str.title] - output = TransformUtil.clean_strings(self.states, clean_ops) - assert_equal(output, self.expected_output) -from nose.tools import assert_equal -from ..type_util import TypeUtil - - -class TestUtil(): - - def test_is_iterable(self): - assert_equal(TypeUtil.is_iterable('foo'), True) - assert_equal(TypeUtil.is_iterable(7), False) - - def test_convert_to_list(self): - assert_equal(isinstance(TypeUtil.convert_to_list('foo'), list), True) - assert_equal(isinstance(TypeUtil.convert_to_list(7), list), False) -""" -Tutorial Diagrams ------------------ - -This script plots the flow-charts used in the scikit-learn tutorials. -""" - -import numpy as np -import pylab as pl -from matplotlib.patches import Circle, Rectangle, Polygon, Arrow, FancyArrow - - -def create_base(box_bg='#CCCCCC', - arrow1='#88CCFF', - arrow2='#88FF88', - supervised=True): - fig = pl.figure(figsize=(9, 6), facecolor='w') - ax = pl.axes((0, 0, 1, 1), - xticks=[], yticks=[], frameon=False) - ax.set_xlim(0, 9) - ax.set_ylim(0, 6) - - patches = [Rectangle((0.3, 3.6), 1.5, 1.8, zorder=1, fc=box_bg), - Rectangle((0.5, 3.8), 1.5, 1.8, zorder=2, fc=box_bg), - Rectangle((0.7, 4.0), 1.5, 1.8, zorder=3, fc=box_bg), - - Rectangle((2.9, 3.6), 0.2, 1.8, fc=box_bg), - Rectangle((3.1, 3.8), 0.2, 1.8, fc=box_bg), - Rectangle((3.3, 4.0), 0.2, 1.8, fc=box_bg), - - Rectangle((0.3, 0.2), 1.5, 1.8, fc=box_bg), - - Rectangle((2.9, 0.2), 0.2, 1.8, fc=box_bg), - - Circle((5.5, 3.5), 1.0, fc=box_bg), - - Polygon([[5.5, 1.7], - [6.1, 1.1], - [5.5, 0.5], - [4.9, 1.1]], fc=box_bg), - - FancyArrow(2.3, 4.6, 0.35, 0, fc=arrow1, - width=0.25, head_width=0.5, head_length=0.2), - - FancyArrow(3.75, 4.2, 0.5, -0.2, fc=arrow1, - width=0.25, head_width=0.5, head_length=0.2), - - FancyArrow(5.5, 2.4, 0, -0.4, fc=arrow1, - width=0.25, head_width=0.5, head_length=0.2), - - FancyArrow(2.0, 1.1, 0.5, 0, fc=arrow2, - width=0.25, head_width=0.5, head_length=0.2), - - FancyArrow(3.3, 1.1, 1.3, 0, fc=arrow2, - width=0.25, head_width=0.5, head_length=0.2), - - FancyArrow(6.2, 1.1, 0.8, 0, fc=arrow2, - width=0.25, head_width=0.5, head_length=0.2)] - - if supervised: - patches += [Rectangle((0.3, 2.4), 1.5, 0.5, zorder=1, fc=box_bg), - Rectangle((0.5, 2.6), 1.5, 0.5, zorder=2, fc=box_bg), - Rectangle((0.7, 2.8), 1.5, 0.5, zorder=3, fc=box_bg), - FancyArrow(2.3, 2.9, 2.0, 0, fc=arrow1, - width=0.25, head_width=0.5, head_length=0.2), - Rectangle((7.3, 0.85), 1.5, 0.5, fc=box_bg)] - else: - patches += [Rectangle((7.3, 0.2), 1.5, 1.8, fc=box_bg)] - - for p in patches: - ax.add_patch(p) - - pl.text(1.45, 4.9, "Training\nText,\nDocuments,\nImages,\netc.", - ha='center', va='center', fontsize=14) - - pl.text(3.6, 4.9, "Feature\nVectors", - ha='left', va='center', fontsize=14) - - pl.text(5.5, 3.5, "Machine\nLearning\nAlgorithm", - ha='center', va='center', fontsize=14) - - pl.text(1.05, 1.1, "New Text,\nDocument,\nImage,\netc.", - ha='center', va='center', fontsize=14) - - pl.text(3.3, 1.7, "Feature\nVector", - ha='left', va='center', fontsize=14) - - pl.text(5.5, 1.1, "Predictive\nModel", - ha='center', va='center', fontsize=12) - - if supervised: - pl.text(1.45, 3.05, "Labels", - ha='center', va='center', fontsize=14) - - pl.text(8.05, 1.1, "Expected\nLabel", - ha='center', va='center', fontsize=14) - pl.text(8.8, 5.8, "Supervised Learning Model", - ha='right', va='top', fontsize=18) - - else: - pl.text(8.05, 1.1, - "Likelihood\nor Cluster ID\nor Better\nRepresentation", - ha='center', va='center', fontsize=12) - pl.text(8.8, 5.8, "Unsupervised Learning Model", - ha='right', va='top', fontsize=18) - - -def plot_supervised_chart(annotate=False): - create_base(supervised=True) - if annotate: - fontdict = dict(color='r', weight='bold', size=14) - pl.text(1.9, 4.55, 'X = vec.fit_transform(input)', - fontdict=fontdict, - rotation=20, ha='left', va='bottom') - pl.text(3.7, 3.2, 'clf.fit(X, y)', - fontdict=fontdict, - rotation=20, ha='left', va='bottom') - pl.text(1.7, 1.5, 'X_new = vec.transform(input)', - fontdict=fontdict, - rotation=20, ha='left', va='bottom') - pl.text(6.1, 1.5, 'y_new = clf.predict(X_new)', - fontdict=fontdict, - rotation=20, ha='left', va='bottom') - - -def plot_unsupervised_chart(): - create_base(supervised=False) - - -if __name__ == '__main__': - plot_supervised_chart(False) - plot_supervised_chart(True) - plot_unsupervised_chart() - pl.show() -from .data import * -from .figures import * - -from .sgd_separator import plot_sgd_separator -from .linear_regression import plot_linear_regression -from .helpers import plot_iris_knn -import numpy as np - - -def linear_data_sample(N=40, rseed=0, m=3, b=-2): - rng = np.random.RandomState(rseed) - - x = 10 * rng.rand(N) - dy = m / 2 * (1 + rng.rand(N)) - y = m * x + b + dy * rng.randn(N) - - return (x, y, dy) - - -def linear_data_sample_big_errs(N=40, rseed=0, m=3, b=-2): - rng = np.random.RandomState(rseed) - - x = 10 * rng.rand(N) - dy = m / 2 * (1 + rng.rand(N)) - dy[20:25] *= 10 - y = m * x + b + dy * rng.randn(N) - - return (x, y, dy) - - -def sample_light_curve(phased=True): - from astroML.datasets import fetch_LINEAR_sample - data = fetch_LINEAR_sample() - t, y, dy = data[18525697].T - - if phased: - P_best = 0.580313015651 - t /= P_best - - return (t, y, dy) - - -def sample_light_curve_2(phased=True): - from astroML.datasets import fetch_LINEAR_sample - data = fetch_LINEAR_sample() - t, y, dy = data[10022663].T - - if phased: - P_best = 0.61596079804 - t /= P_best - - return (t, y, dy) -import numpy as np -import matplotlib.pyplot as plt -import warnings - - -def plot_venn_diagram(): - fig, ax = plt.subplots(subplot_kw=dict( - frameon=False, xticks=[], yticks=[])) - ax.add_patch(plt.Circle((0.3, 0.3), 0.3, fc='red', alpha=0.5)) - ax.add_patch(plt.Circle((0.6, 0.3), 0.3, fc='blue', alpha=0.5)) - ax.add_patch(plt.Rectangle((-0.1, -0.1), 1.1, 0.8, fc='none', ec='black')) - ax.text(0.2, 0.3, '$x$', size=30, ha='center', va='center') - ax.text(0.7, 0.3, '$y$', size=30, ha='center', va='center') - ax.text(0.0, 0.6, '$I$', size=30) - ax.axis('equal') - - -def plot_example_decision_tree(): - fig = plt.figure(figsize=(10, 4)) - ax = fig.add_axes([0, 0, 0.8, 1], frameon=False, xticks=[], yticks=[]) - ax.set_title('Example Decision Tree: Animal Classification', size=24) - - def text(ax, x, y, t, size=20, **kwargs): - ax.text(x, y, t, - ha='center', va='center', size=size, - bbox=dict(boxstyle='round', ec='k', fc='w'), **kwargs) - - text(ax, 0.5, 0.9, "How big is\nthe animal?", 20) - text(ax, 0.3, 0.6, "Does the animal\nhave horns?", 18) - text(ax, 0.7, 0.6, "Does the animal\nhave two legs?", 18) - text(ax, 0.12, 0.3, "Are the horns\nlonger than 10cm?", 14) - text(ax, 0.38, 0.3, "Is the animal\nwearing a collar?", 14) - text(ax, 0.62, 0.3, "Does the animal\nhave wings?", 14) - text(ax, 0.88, 0.3, "Does the animal\nhave a tail?", 14) - - text(ax, 0.4, 0.75, "> 1m", 12, alpha=0.4) - text(ax, 0.6, 0.75, "< 1m", 12, alpha=0.4) - - text(ax, 0.21, 0.45, "yes", 12, alpha=0.4) - text(ax, 0.34, 0.45, "no", 12, alpha=0.4) - - text(ax, 0.66, 0.45, "yes", 12, alpha=0.4) - text(ax, 0.79, 0.45, "no", 12, alpha=0.4) - - ax.plot([0.3, 0.5, 0.7], [0.6, 0.9, 0.6], '-k') - ax.plot([0.12, 0.3, 0.38], [0.3, 0.6, 0.3], '-k') - ax.plot([0.62, 0.7, 0.88], [0.3, 0.6, 0.3], '-k') - ax.plot([0.0, 0.12, 0.20], [0.0, 0.3, 0.0], '--k') - ax.plot([0.28, 0.38, 0.48], [0.0, 0.3, 0.0], '--k') - ax.plot([0.52, 0.62, 0.72], [0.0, 0.3, 0.0], '--k') - ax.plot([0.8, 0.88, 1.0], [0.0, 0.3, 0.0], '--k') - ax.axis([0, 1, 0, 1]) - - -def visualize_tree(estimator, X, y, boundaries=True, - xlim=None, ylim=None): - estimator.fit(X, y) - - if xlim is None: - xlim = (X[:, 0].min() - 0.1, X[:, 0].max() + 0.1) - if ylim is None: - ylim = (X[:, 1].min() - 0.1, X[:, 1].max() + 0.1) - - x_min, x_max = xlim - y_min, y_max = ylim - xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), - np.linspace(y_min, y_max, 100)) - Z = estimator.predict(np.c_[xx.ravel(), yy.ravel()]) - - # Put the result into a color plot - Z = Z.reshape(xx.shape) - plt.figure() - plt.pcolormesh(xx, yy, Z, alpha=0.2, cmap='rainbow') - plt.clim(y.min(), y.max()) - - # Plot also the training points - plt.scatter(X[:, 0], X[:, 1], c=y, s=50, cmap='rainbow') - plt.axis('off') - - plt.xlim(x_min, x_max) - plt.ylim(y_min, y_max) - plt.clim(y.min(), y.max()) - - # Plot the decision boundaries - def plot_boundaries(i, xlim, ylim): - if i < 0: - return - - tree = estimator.tree_ - - if tree.feature[i] == 0: - plt.plot([tree.threshold[i], tree.threshold[i]], ylim, '-k') - plot_boundaries(tree.children_left[i], - [xlim[0], tree.threshold[i]], ylim) - plot_boundaries(tree.children_right[i], - [tree.threshold[i], xlim[1]], ylim) - - elif tree.feature[i] == 1: - plt.plot(xlim, [tree.threshold[i], tree.threshold[i]], '-k') - plot_boundaries(tree.children_left[i], xlim, - [ylim[0], tree.threshold[i]]) - plot_boundaries(tree.children_right[i], xlim, - [tree.threshold[i], ylim[1]]) - - if boundaries: - plot_boundaries(0, plt.xlim(), plt.ylim()) - - -def plot_tree_interactive(X, y): - from sklearn.tree import DecisionTreeClassifier - - def interactive_tree(depth=1): - clf = DecisionTreeClassifier(max_depth=depth, random_state=0) - visualize_tree(clf, X, y) - - from IPython.html.widgets import interact - return interact(interactive_tree, depth=[1, 5]) - - -def plot_kmeans_interactive(min_clusters=1, max_clusters=6): - from IPython.html.widgets import interact - from sklearn.metrics.pairwise import euclidean_distances - from sklearn.datasets.samples_generator import make_blobs - - with warnings.catch_warnings(): - warnings.filterwarnings('ignore') - - X, y = make_blobs(n_samples=300, centers=4, - random_state=0, cluster_std=0.60) - - def _kmeans_step(frame=0, n_clusters=4): - rng = np.random.RandomState(2) - labels = np.zeros(X.shape[0]) - centers = rng.randn(n_clusters, 2) - - nsteps = frame // 3 - - for i in range(nsteps + 1): - old_centers = centers - if i < nsteps or frame % 3 > 0: - dist = euclidean_distances(X, centers) - labels = dist.argmin(1) - - if i < nsteps or frame % 3 > 1: - centers = np.array([X[labels == j].mean(0) - for j in range(n_clusters)]) - nans = np.isnan(centers) - centers[nans] = old_centers[nans] - - # plot the data and cluster centers - plt.scatter(X[:, 0], X[:, 1], c=labels, s=50, cmap='rainbow', - vmin=0, vmax=n_clusters - 1) - plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o', - c=np.arange(n_clusters), - s=200, cmap='rainbow') - plt.scatter(old_centers[:, 0], old_centers[:, 1], marker='o', - c='black', s=50) - - # plot new centers if third frame - if frame % 3 == 2: - for i in range(n_clusters): - plt.annotate('', centers[i], old_centers[i], - arrowprops=dict(arrowstyle='->', linewidth=1)) - plt.scatter(centers[:, 0], centers[:, 1], marker='o', - c=np.arange(n_clusters), - s=200, cmap='rainbow') - plt.scatter(centers[:, 0], centers[:, 1], marker='o', - c='black', s=50) - - plt.xlim(-4, 4) - plt.ylim(-2, 10) - - if frame % 3 == 1: - plt.text(3.8, 9.5, "1. Reassign points to nearest centroid", - ha='right', va='top', size=14) - elif frame % 3 == 2: - plt.text(3.8, 9.5, "2. Update centroids to cluster means", - ha='right', va='top', size=14) - - return interact(_kmeans_step, frame=[0, 50], - n_clusters=[min_clusters, max_clusters]) - - -def plot_image_components(x, coefficients=None, mean=0, components=None, - imshape=(8, 8), n_components=6, fontsize=12): - if coefficients is None: - coefficients = x - - if components is None: - components = np.eye(len(coefficients), len(x)) - - mean = np.zeros_like(x) + mean - - fig = plt.figure(figsize=(1.2 * (5 + n_components), 1.2 * 2)) - g = plt.GridSpec(2, 5 + n_components, hspace=0.3) - - def show(i, j, x, title=None): - ax = fig.add_subplot(g[i, j], xticks=[], yticks=[]) - ax.imshow(x.reshape(imshape), interpolation='nearest') - if title: - ax.set_title(title, fontsize=fontsize) - - show(slice(2), slice(2), x, "True") - - approx = mean.copy() - show(0, 2, np.zeros_like(x) + mean, r'$\mu$') - show(1, 2, approx, r'$1 \cdot \mu$') - - for i in range(0, n_components): - approx = approx + coefficients[i] * components[i] - show(0, i + 3, components[i], r'$c_{0}$'.format(i + 1)) - show(1, i + 3, approx, - r"${0:.2f} \cdot c_{1}$".format(coefficients[i], i + 1)) - plt.gca().text(0, 1.05, '$+$', ha='right', va='bottom', - transform=plt.gca().transAxes, fontsize=fontsize) - - show(slice(2), slice(-2, None), approx, "Approx") - - -def plot_pca_interactive(data, n_components=6): - from sklearn.decomposition import PCA - from IPython.html.widgets import interact - - pca = PCA(n_components=n_components) - Xproj = pca.fit_transform(data) - - def show_decomp(i=0): - plot_image_components(data[i], Xproj[i], - pca.mean_, pca.components_) - - interact(show_decomp, i=(0, data.shape[0] - 1)) -""" -Small helpers for code that is not shown in the notebooks -""" - -from sklearn import neighbors, datasets, linear_model -import pylab as pl -import numpy as np -from matplotlib.colors import ListedColormap - -# Create color maps for 3-class classification problem, as with iris -cmap_light = ListedColormap(['#FFAAAA', '#AAFFAA', '#AAAAFF']) -cmap_bold = ListedColormap(['#FF0000', '#00FF00', '#0000FF']) - - -def plot_iris_knn(): - iris = datasets.load_iris() - X = iris.data[:, :2] # we only take the first two features. We could - # avoid this ugly slicing by using a two-dim dataset - y = iris.target - - knn = neighbors.KNeighborsClassifier(n_neighbors=5) - knn.fit(X, y) - - x_min, x_max = X[:, 0].min() - .1, X[:, 0].max() + .1 - y_min, y_max = X[:, 1].min() - .1, X[:, 1].max() + .1 - xx, yy = np.meshgrid(np.linspace(x_min, x_max, 100), - np.linspace(y_min, y_max, 100)) - Z = knn.predict(np.c_[xx.ravel(), yy.ravel()]) - - # Put the result into a color plot - Z = Z.reshape(xx.shape) - pl.figure() - pl.pcolormesh(xx, yy, Z, cmap=cmap_light) - - # Plot also the training points - pl.scatter(X[:, 0], X[:, 1], c=y, cmap=cmap_bold) - pl.xlabel('sepal length (cm)') - pl.ylabel('sepal width (cm)') - pl.axis('tight') - - -def plot_polynomial_regression(): - rng = np.random.RandomState(0) - x = 2*rng.rand(100) - 1 - - def f(t): return 1.2 * t**2 + .1 * t**3 - .4 * t ** 5 - .5 * t ** 9 - y = f(x) + .4 * rng.normal(size=100) - - x_test = np.linspace(-1, 1, 100) - - pl.figure() - pl.scatter(x, y, s=4) - - X = np.array([x**i for i in range(5)]).T - X_test = np.array([x_test**i for i in range(5)]).T - regr = linear_model.LinearRegression() - regr.fit(X, y) - pl.plot(x_test, regr.predict(X_test), label='4th order') - - X = np.array([x**i for i in range(10)]).T - X_test = np.array([x_test**i for i in range(10)]).T - regr = linear_model.LinearRegression() - regr.fit(X, y) - pl.plot(x_test, regr.predict(X_test), label='9th order') - - pl.legend(loc='best') - pl.axis('tight') - pl.title('Fitting a 4th and a 9th order polynomial') - - pl.figure() - pl.scatter(x, y, s=4) - pl.plot(x_test, f(x_test), label="truth") - pl.axis('tight') - pl.title('Ground truth (9th order polynomial)') -import numpy as np -import matplotlib.pyplot as plt -from sklearn.linear_model import LinearRegression - - -def plot_linear_regression(): - a = 0.5 - b = 1.0 - - # x from 0 to 10 - x = 30 * np.random.random(20) - - # y = a*x + b with noise - y = a * x + b + np.random.normal(size=x.shape) - - # create a linear regression classifier - clf = LinearRegression() - clf.fit(x[:, None], y) - - # predict y from the data - x_new = np.linspace(0, 30, 100) - y_new = clf.predict(x_new[:, None]) - - # plot the results - ax = plt.axes() - ax.scatter(x, y) - ax.plot(x_new, y_new) - - ax.set_xlabel('x') - ax.set_ylabel('y') - - ax.axis('tight') - - -if __name__ == '__main__': - plot_linear_regression() - plt.show() -import numpy as np -import matplotlib.pyplot as plt -from sklearn.linear_model import SGDClassifier -from sklearn.datasets.samples_generator import make_blobs - - -def plot_sgd_separator(): - # we create 50 separable points - X, Y = make_blobs(n_samples=50, centers=2, - random_state=0, cluster_std=0.60) - - # fit the model - clf = SGDClassifier(loss="hinge", alpha=0.01, - n_iter=200, fit_intercept=True) - clf.fit(X, Y) - - # plot the line, the points, and the nearest vectors to the plane - xx = np.linspace(-1, 5, 10) - yy = np.linspace(-1, 5, 10) - - X1, X2 = np.meshgrid(xx, yy) - Z = np.empty(X1.shape) - for (i, j), val in np.ndenumerate(X1): - x1 = val - x2 = X2[i, j] - p = clf.decision_function([x1, x2]) - Z[i, j] = p[0] - levels = [-1.0, 0.0, 1.0] - linestyles = ['dashed', 'solid', 'dashed'] - colors = 'k' - - ax = plt.axes() - ax.contour(X1, X2, Z, levels, colors=colors, linestyles=linestyles) - ax.scatter(X[:, 0], X[:, 1], c=Y, cmap=plt.cm.Paired) - - ax.axis('tight') - - -if __name__ == '__main__': - plot_sgd_separator() - plt.show() -""" -========== -Libsvm GUI -========== - -A simple graphical frontend for Libsvm mainly intended for didactic -purposes. You can create data points by point and click and visualize -the decision region induced by different kernels and parameter settings. - -To create positive examples click the left mouse button; to create -negative examples click the right button. - -If all examples are from the same class, it uses a one-class SVM. - -""" -from __future__ import division, print_function - -from sklearn.externals.six.moves import xrange -from sklearn.datasets import dump_svmlight_file -from sklearn import svm -import numpy as np -import sys -import Tkinter as Tk -from matplotlib.contour import ContourSet -from matplotlib.figure import Figure -from matplotlib.backends.backend_tkagg import NavigationToolbar2TkAgg -from matplotlib.backends.backend_tkagg import FigureCanvasTkAgg -import matplotlib -print(__doc__) - -# Author: Peter Prettenhoer -# -# License: BSD 3 clause - -matplotlib.use('TkAgg') - - -y_min, y_max = -50, 50 -x_min, x_max = -50, 50 - - -class Model(object): - """The Model which hold the data. It implements the - observable in the observer pattern and notifies the - registered observers on change event. - """ - - def __init__(self): - self.observers = [] - self.surface = None - self.data = [] - self.cls = None - self.surface_type = 0 - - def changed(self, event): - """Notify the observers. """ - for observer in self.observers: - observer.update(event, self) - - def add_observer(self, observer): - """Register an observer. """ - self.observers.append(observer) - - def set_surface(self, surface): - self.surface = surface - - def dump_svmlight_file(self, file): - data = np.array(self.data) - X = data[:, 0:2] - y = data[:, 2] - dump_svmlight_file(X, y, file) - - -class Controller(object): - def __init__(self, model): - self.model = model - self.kernel = Tk.IntVar() - self.surface_type = Tk.IntVar() - # Whether or not a model has been fitted - self.fitted = False - - def fit(self): - print("fit the model") - train = np.array(self.model.data) - X = train[:, 0:2] - y = train[:, 2] - - C = float(self.complexity.get()) - gamma = float(self.gamma.get()) - coef0 = float(self.coef0.get()) - degree = int(self.degree.get()) - kernel_map = {0: "linear", 1: "rbf", 2: "poly"} - if len(np.unique(y)) == 1: - clf = svm.OneClassSVM(kernel=kernel_map[self.kernel.get()], - gamma=gamma, coef0=coef0, degree=degree) - clf.fit(X) - else: - clf = svm.SVC(kernel=kernel_map[self.kernel.get()], C=C, - gamma=gamma, coef0=coef0, degree=degree) - clf.fit(X, y) - if hasattr(clf, 'score'): - print("Accuracy:", clf.score(X, y) * 100) - X1, X2, Z = self.decision_surface(clf) - self.model.clf = clf - self.model.set_surface((X1, X2, Z)) - self.model.surface_type = self.surface_type.get() - self.fitted = True - self.model.changed("surface") - - def decision_surface(self, cls): - delta = 1 - x = np.arange(x_min, x_max + delta, delta) - y = np.arange(y_min, y_max + delta, delta) - X1, X2 = np.meshgrid(x, y) - Z = cls.decision_function(np.c_[X1.ravel(), X2.ravel()]) - Z = Z.reshape(X1.shape) - return X1, X2, Z - - def clear_data(self): - self.model.data = [] - self.fitted = False - self.model.changed("clear") - - def add_example(self, x, y, label): - self.model.data.append((x, y, label)) - self.model.changed("example_added") - - # update decision surface if already fitted. - self.refit() - - def refit(self): - """Refit the model if already fitted. """ - if self.fitted: - self.fit() - - -class View(object): - """Test docstring. """ - - def __init__(self, root, controller): - f = Figure() - ax = f.add_subplot(111) - ax.set_xticks([]) - ax.set_yticks([]) - ax.set_xlim((x_min, x_max)) - ax.set_ylim((y_min, y_max)) - canvas = FigureCanvasTkAgg(f, master=root) - canvas.show() - canvas.get_tk_widget().pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) - canvas._tkcanvas.pack(side=Tk.TOP, fill=Tk.BOTH, expand=1) - canvas.mpl_connect('key_press_event', self.onkeypress) - canvas.mpl_connect('key_release_event', self.onkeyrelease) - canvas.mpl_connect('button_press_event', self.onclick) - toolbar = NavigationToolbar2TkAgg(canvas, root) - toolbar.update() - self.shift_down = False - self.controllbar = ControllBar(root, controller) - self.f = f - self.ax = ax - self.canvas = canvas - self.controller = controller - self.contours = [] - self.c_labels = None - self.plot_kernels() - - def plot_kernels(self): - self.ax.text(-50, -60, "Linear: $u^T v$") - self.ax.text(-20, -60, "RBF: $\exp (-\gamma \| u-v \|^2)$") - self.ax.text(10, -60, "Poly: $(\gamma \, u^T v + r)^d$") - - def onkeypress(self, event): - if event.key == "shift": - self.shift_down = True - - def onkeyrelease(self, event): - if event.key == "shift": - self.shift_down = False - - def onclick(self, event): - if event.xdata and event.ydata: - if self.shift_down or event.button == 3: - self.controller.add_example(event.xdata, event.ydata, -1) - elif event.button == 1: - self.controller.add_example(event.xdata, event.ydata, 1) - - def update_example(self, model, idx): - x, y, l = model.data[idx] - if l == 1: - color = 'w' - elif l == -1: - color = 'k' - self.ax.plot([x], [y], "%so" % color, scalex=0.0, scaley=0.0) - - def update(self, event, model): - if event == "examples_loaded": - for i in xrange(len(model.data)): - self.update_example(model, i) - - if event == "example_added": - self.update_example(model, -1) - - if event == "clear": - self.ax.clear() - self.ax.set_xticks([]) - self.ax.set_yticks([]) - self.contours = [] - self.c_labels = None - self.plot_kernels() - - if event == "surface": - self.remove_surface() - self.plot_support_vectors(model.clf.support_vectors_) - self.plot_decision_surface(model.surface, model.surface_type) - - self.canvas.draw() - - def remove_surface(self): - """Remove old decision surface.""" - if len(self.contours) > 0: - for contour in self.contours: - if isinstance(contour, ContourSet): - for lineset in contour.collections: - lineset.remove() - else: - contour.remove() - self.contours = [] - - def plot_support_vectors(self, support_vectors): - """Plot the support vectors by placing circles over the - corresponding data points and adds the circle collection - to the contours list.""" - cs = self.ax.scatter(support_vectors[:, 0], support_vectors[:, 1], - s=80, edgecolors="k", facecolors="none") - self.contours.append(cs) - - def plot_decision_surface(self, surface, type): - X1, X2, Z = surface - if type == 0: - levels = [-1.0, 0.0, 1.0] - linestyles = ['dashed', 'solid', 'dashed'] - colors = 'k' - self.contours.append(self.ax.contour(X1, X2, Z, levels, - colors=colors, - linestyles=linestyles)) - elif type == 1: - self.contours.append(self.ax.contourf(X1, X2, Z, 10, - cmap=matplotlib.cm.bone, - origin='lower', alpha=0.85)) - self.contours.append(self.ax.contour(X1, X2, Z, [0.0], colors='k', - linestyles=['solid'])) - else: - raise ValueError("surface type unknown") - - -class ControllBar(object): - def __init__(self, root, controller): - fm = Tk.Frame(root) - kernel_group = Tk.Frame(fm) - Tk.Radiobutton(kernel_group, text="Linear", variable=controller.kernel, - value=0, command=controller.refit).pack(anchor=Tk.W) - Tk.Radiobutton(kernel_group, text="RBF", variable=controller.kernel, - value=1, command=controller.refit).pack(anchor=Tk.W) - Tk.Radiobutton(kernel_group, text="Poly", variable=controller.kernel, - value=2, command=controller.refit).pack(anchor=Tk.W) - kernel_group.pack(side=Tk.LEFT) - - valbox = Tk.Frame(fm) - controller.complexity = Tk.StringVar() - controller.complexity.set("1.0") - c = Tk.Frame(valbox) - Tk.Label(c, text="C:", anchor="e", width=7).pack(side=Tk.LEFT) - Tk.Entry(c, width=6, textvariable=controller.complexity).pack( - side=Tk.LEFT) - c.pack() - - controller.gamma = Tk.StringVar() - controller.gamma.set("0.01") - g = Tk.Frame(valbox) - Tk.Label(g, text="gamma:", anchor="e", width=7).pack(side=Tk.LEFT) - Tk.Entry(g, width=6, textvariable=controller.gamma).pack(side=Tk.LEFT) - g.pack() - - controller.degree = Tk.StringVar() - controller.degree.set("3") - d = Tk.Frame(valbox) - Tk.Label(d, text="degree:", anchor="e", width=7).pack(side=Tk.LEFT) - Tk.Entry(d, width=6, textvariable=controller.degree).pack(side=Tk.LEFT) - d.pack() - - controller.coef0 = Tk.StringVar() - controller.coef0.set("0") - r = Tk.Frame(valbox) - Tk.Label(r, text="coef0:", anchor="e", width=7).pack(side=Tk.LEFT) - Tk.Entry(r, width=6, textvariable=controller.coef0).pack(side=Tk.LEFT) - r.pack() - valbox.pack(side=Tk.LEFT) - - cmap_group = Tk.Frame(fm) - Tk.Radiobutton(cmap_group, text="Hyperplanes", - variable=controller.surface_type, value=0, - command=controller.refit).pack(anchor=Tk.W) - Tk.Radiobutton(cmap_group, text="Surface", - variable=controller.surface_type, value=1, - command=controller.refit).pack(anchor=Tk.W) - - cmap_group.pack(side=Tk.LEFT) - - train_button = Tk.Button(fm, text='Fit', width=5, - command=controller.fit) - train_button.pack() - fm.pack(side=Tk.LEFT) - Tk.Button(fm, text='Clear', width=5, - command=controller.clear_data).pack(side=Tk.LEFT) - - -def get_parser(): - from optparse import OptionParser - op = OptionParser() - op.add_option("--output", - action="store", type="str", dest="output", - help="Path where to dump data.") - return op - - -def main(argv): - op = get_parser() - opts, args = op.parse_args(argv[1:]) - root = Tk.Tk() - model = Model() - controller = Controller(model) - root.wm_title("Scikit-learn Libsvm GUI") - view = View(root, controller) - model.add_observer(view) - Tk.mainloop() - - if opts.output: - model.dump_svmlight_file(opts.output) - - -if __name__ == "__main__": - main(sys.argv) -import numpy as np -import json - -from keras.utils.data_utils import get_file -from keras import backend as K - -CLASS_INDEX = None -CLASS_INDEX_PATH = 'https://s3.amazonaws.com/deep-learning-models/image-models/imagenet_class_index.json' - - -def preprocess_input(x, dim_ordering='default'): - if dim_ordering == 'default': - dim_ordering = K.image_dim_ordering() - assert dim_ordering in {'tf', 'th'} - - if dim_ordering == 'th': - x[:, 0, :, :] -= 103.939 - x[:, 1, :, :] -= 116.779 - x[:, 2, :, :] -= 123.68 - # 'RGB'->'BGR' - x = x[:, ::-1, :, :] - else: - x[:, :, :, 0] -= 103.939 - x[:, :, :, 1] -= 116.779 - x[:, :, :, 2] -= 123.68 - # 'RGB'->'BGR' - x = x[:, :, :, ::-1] - return x - - -def decode_predictions(preds): - global CLASS_INDEX - assert len(preds.shape) == 2 and preds.shape[1] == 1000 - if CLASS_INDEX is None: - fpath = get_file('imagenet_class_index.json', - CLASS_INDEX_PATH, - cache_subdir='models') - CLASS_INDEX = json.load(open(fpath)) - indices = np.argmax(preds, axis=-1) - results = [] - for i in indices: - results.append(CLASS_INDEX[str(i)]) - return results -# -*- coding: utf-8 -*- -'''ResNet50 model for Keras. - -# Reference: - -- [Deep Residual Learning for Image Recognition](https://arxiv.org/abs/1512.03385) - -Adapted from code contributed by BigMoyan. -''' -from __future__ import print_function - -import numpy as np -import warnings - -from keras.layers import merge, Input -from keras.layers import Dense, Activation, Flatten -from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D -from keras.layers import BatchNormalization -from keras.models import Model -from keras.preprocessing import image -import keras.backend as K -from keras.utils.layer_utils import convert_all_kernels_in_model -from keras.utils.data_utils import get_file - - -TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_th_dim_ordering_th_kernels.h5' -TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_tf_dim_ordering_tf_kernels.h5' -TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_th_dim_ordering_th_kernels_notop.h5' -TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' - - -def identity_block(input_tensor, kernel_size, filters, stage, block): - '''The identity_block is the block that has no conv layer at shortcut - - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - ''' - nb_filter1, nb_filter2, nb_filter3 = filters - if K.image_dim_ordering() == 'tf': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Convolution2D(nb_filter1, 1, 1, - name=conv_name_base + '2a')(input_tensor) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) - x = Activation('relu')(x) - - x = Convolution2D(nb_filter2, kernel_size, kernel_size, - border_mode='same', name=conv_name_base + '2b')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) - x = Activation('relu')(x) - - x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - - x = merge([x, input_tensor], mode='sum') - x = Activation('relu')(x) - return x - - -def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): - '''conv_block is the block that has a conv layer at shortcut - - # Arguments - input_tensor: input tensor - kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path - stage: integer, current stage label, used for generating layer names - block: 'a','b'..., current block label, used for generating layer names - - Note that from stage 3, the first conv layer at main path is with subsample=(2,2) - And the shortcut should have subsample=(2,2) as well - ''' - nb_filter1, nb_filter2, nb_filter3 = filters - if K.image_dim_ordering() == 'tf': - bn_axis = 3 - else: - bn_axis = 1 - conv_name_base = 'res' + str(stage) + block + '_branch' - bn_name_base = 'bn' + str(stage) + block + '_branch' - - x = Convolution2D(nb_filter1, 1, 1, subsample=strides, - name=conv_name_base + '2a')(input_tensor) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) - x = Activation('relu')(x) - - x = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same', - name=conv_name_base + '2b')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) - x = Activation('relu')(x) - - x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x) - x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - - shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides, - name=conv_name_base + '1')(input_tensor) - shortcut = BatchNormalization( - axis=bn_axis, name=bn_name_base + '1')(shortcut) - - x = merge([x, shortcut], mode='sum') - x = Activation('relu')(x) - return x - - -def ResNet50(include_top=True, weights='imagenet', - input_tensor=None): - '''Instantiate the ResNet50 architecture, - optionally loading weights pre-trained - on ImageNet. Note that when using TensorFlow, - for best performance you should set - `image_dim_ordering="tf"` in your Keras config - at ~/.keras/keras.json. - - The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering - convention used by the model is the one - specified in your Keras config file. - - # Arguments - include_top: whether to include the 3 fully-connected - layers at the top of the network. - weights: one of `None` (random initialization) - or "imagenet" (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. xput of `layers.Input()`) - to use as image input for the model. - - # Returns - A Keras model instance. - ''' - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - # Determine proper input shape - if K.image_dim_ordering() == 'th': - if include_top: - input_shape = (3, 224, 224) - else: - input_shape = (3, None, None) - else: - if include_top: - input_shape = (224, 224, 3) - else: - input_shape = (None, None, 3) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor) - else: - img_input = input_tensor - if K.image_dim_ordering() == 'tf': - bn_axis = 3 - else: - bn_axis = 1 - - x = ZeroPadding2D((3, 3))(img_input) - x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x) - x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) - x = Activation('relu')(x) - x = MaxPooling2D((3, 3), strides=(2, 2))(x) - - x = conv_block(x, 3, [64, 64, 256], stage=2, block='a', strides=(1, 1)) - x = identity_block(x, 3, [64, 64, 256], stage=2, block='b') - x = identity_block(x, 3, [64, 64, 256], stage=2, block='c') - - x = conv_block(x, 3, [128, 128, 512], stage=3, block='a') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='b') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='c') - x = identity_block(x, 3, [128, 128, 512], stage=3, block='d') - - x = conv_block(x, 3, [256, 256, 1024], stage=4, block='a') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='b') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='c') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='d') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='e') - x = identity_block(x, 3, [256, 256, 1024], stage=4, block='f') - - x = conv_block(x, 3, [512, 512, 2048], stage=5, block='a') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='b') - x = identity_block(x, 3, [512, 512, 2048], stage=5, block='c') - - x = AveragePooling2D((7, 7), name='avg_pool')(x) - - if include_top: - x = Flatten()(x) - x = Dense(1000, activation='softmax', name='fc1000')(x) - - model = Model(img_input, x) - - # load weights - if weights == 'imagenet': - print('K.image_dim_ordering:', K.image_dim_ordering()) - if K.image_dim_ordering() == 'th': - if include_top: - weights_path = get_file('resnet50_weights_th_dim_ordering_th_kernels.h5', - TH_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('resnet50_weights_th_dim_ordering_th_kernels_notop.h5', - TH_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image dimension ordering convention ' - '(`image_dim_ordering="th"`). ' - 'For best performance, set ' - '`image_dim_ordering="tf"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - convert_all_kernels_in_model(model) - else: - if include_top: - weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) - if K.backend() == 'theano': - convert_all_kernels_in_model(model) - return model - - -if __name__ == '__main__': - model = ResNet50(include_top=True, weights='imagenet') - - img_path = 'elephant.jpg' - img = image.load_img(img_path, target_size=(224, 224)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - x = preprocess_input(x) - print('Input image shape:', x.shape) - - preds = model.predict(x) - print('Predicted:', decode_predictions(preds)) -# -*- coding: utf-8 -*- -'''VGG16 model for Keras. - -# Reference: - -- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) - -''' -from __future__ import print_function - -import numpy as np -import warnings - -from keras.models import Model -from keras.layers import Flatten, Dense, Input -from keras.layers import Convolution2D, MaxPooling2D -from keras.preprocessing import image -from keras.utils.layer_utils import convert_all_kernels_in_model -from keras.utils.data_utils import get_file -from keras import backend as K -# from imagenet_utils import decode_predictions, preprocess_input - - -TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5' -TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5' -TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels_notop.h5' -TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5' - - -def VGG16(include_top=True, weights='imagenet', - input_tensor=None): - '''Instantiate the VGG16 architecture, - optionally loading weights pre-trained - on ImageNet. Note that when using TensorFlow, - for best performance you should set - `image_dim_ordering="tf"` in your Keras config - at ~/.keras/keras.json. - - The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering - convention used by the model is the one - specified in your Keras config file. - - # Arguments - include_top: whether to include the 3 fully-connected - layers at the top of the network. - weights: one of `None` (random initialization) - or "imagenet" (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - - # Returns - A Keras model instance. - ''' - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - # Determine proper input shape - if K.image_dim_ordering() == 'th': - if include_top: - input_shape = (3, 224, 224) - else: - input_shape = (3, None, None) - else: - if include_top: - input_shape = (224, 224, 3) - else: - input_shape = (None, None, 3) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor) - else: - img_input = input_tensor - # Block 1 - x = Convolution2D(64, 3, 3, activation='relu', - border_mode='same', name='block1_conv1')(img_input) - x = Convolution2D(64, 3, 3, activation='relu', - border_mode='same', name='block1_conv2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = Convolution2D(128, 3, 3, activation='relu', - border_mode='same', name='block2_conv1')(x) - x = Convolution2D(128, 3, 3, activation='relu', - border_mode='same', name='block2_conv2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = Convolution2D(256, 3, 3, activation='relu', - border_mode='same', name='block3_conv1')(x) - x = Convolution2D(256, 3, 3, activation='relu', - border_mode='same', name='block3_conv2')(x) - x = Convolution2D(256, 3, 3, activation='relu', - border_mode='same', name='block3_conv3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block4_conv1')(x) - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block4_conv2')(x) - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block4_conv3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block5_conv1')(x) - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block5_conv2')(x) - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block5_conv3')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) - - if include_top: - # Classification block - x = Flatten(name='flatten')(x) - x = Dense(4096, activation='relu', name='fc1')(x) - x = Dense(4096, activation='relu', name='fc2')(x) - x = Dense(1000, activation='softmax', name='predictions')(x) - - # Create model - model = Model(img_input, x) - - # load weights - if weights == 'imagenet': - print('K.image_dim_ordering:', K.image_dim_ordering()) - if K.image_dim_ordering() == 'th': - if include_top: - weights_path = get_file('vgg16_weights_th_dim_ordering_th_kernels.h5', - TH_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('vgg16_weights_th_dim_ordering_th_kernels_notop.h5', - TH_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image dimension ordering convention ' - '(`image_dim_ordering="th"`). ' - 'For best performance, set ' - '`image_dim_ordering="tf"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - convert_all_kernels_in_model(model) - else: - if include_top: - weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) - if K.backend() == 'theano': - convert_all_kernels_in_model(model) - return model - - -if __name__ == '__main__': - model = VGG16(include_top=True, weights='imagenet') - - img_path = 'elephant.jpg' - img = image.load_img(img_path, target_size=(224, 224)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - x = preprocess_input(x) - print('Input image shape:', x.shape) - - preds = model.predict(x) - print('Predicted:', decode_predictions(preds)) -# -*- coding: utf-8 -*- -'''VGG19 model for Keras. - -# Reference: - -- [Very Deep Convolutional Networks for Large-Scale Image Recognition](https://arxiv.org/abs/1409.1556) - -''' -from __future__ import print_function - -import numpy as np -import warnings - -from keras.models import Model -from keras.layers import Flatten, Dense, Input -from keras.layers import Convolution2D, MaxPooling2D -from keras.preprocessing import image -from keras.utils.layer_utils import convert_all_kernels_in_model -from keras.utils.data_utils import get_file -from keras import backend as K - - -TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_th_dim_ordering_th_kernels.h5' -TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels.h5' -TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_th_dim_ordering_th_kernels_notop.h5' -TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5' - - -def VGG19(include_top=True, weights='imagenet', - input_tensor=None): - '''Instantiate the VGG19 architecture, - optionally loading weights pre-trained - on ImageNet. Note that when using TensorFlow, - for best performance you should set - `image_dim_ordering="tf"` in your Keras config - at ~/.keras/keras.json. - - The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering - convention used by the model is the one - specified in your Keras config file. - - # Arguments - include_top: whether to include the 3 fully-connected - layers at the top of the network. - weights: one of `None` (random initialization) - or "imagenet" (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) - to use as image input for the model. - - # Returns - A Keras model instance. - ''' - if weights not in {'imagenet', None}: - raise ValueError('The `weights` argument should be either ' - '`None` (random initialization) or `imagenet` ' - '(pre-training on ImageNet).') - # Determine proper input shape - if K.image_dim_ordering() == 'th': - if include_top: - input_shape = (3, 224, 224) - else: - input_shape = (3, None, None) - else: - if include_top: - input_shape = (224, 224, 3) - else: - input_shape = (None, None, 3) - - if input_tensor is None: - img_input = Input(shape=input_shape) - else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor) - else: - img_input = input_tensor - # Block 1 - x = Convolution2D(64, 3, 3, activation='relu', - border_mode='same', name='block1_conv1')(img_input) - x = Convolution2D(64, 3, 3, activation='relu', - border_mode='same', name='block1_conv2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) - - # Block 2 - x = Convolution2D(128, 3, 3, activation='relu', - border_mode='same', name='block2_conv1')(x) - x = Convolution2D(128, 3, 3, activation='relu', - border_mode='same', name='block2_conv2')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) - - # Block 3 - x = Convolution2D(256, 3, 3, activation='relu', - border_mode='same', name='block3_conv1')(x) - x = Convolution2D(256, 3, 3, activation='relu', - border_mode='same', name='block3_conv2')(x) - x = Convolution2D(256, 3, 3, activation='relu', - border_mode='same', name='block3_conv3')(x) - x = Convolution2D(256, 3, 3, activation='relu', - border_mode='same', name='block3_conv4')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) - - # Block 4 - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block4_conv1')(x) - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block4_conv2')(x) - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block4_conv3')(x) - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block4_conv4')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) - - # Block 5 - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block5_conv1')(x) - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block5_conv2')(x) - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block5_conv3')(x) - x = Convolution2D(512, 3, 3, activation='relu', - border_mode='same', name='block5_conv4')(x) - x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) - - if include_top: - # Classification block - x = Flatten(name='flatten')(x) - x = Dense(4096, activation='relu', name='fc1')(x) - x = Dense(4096, activation='relu', name='fc2')(x) - x = Dense(1000, activation='softmax', name='predictions')(x) - - # Create model - model = Model(img_input, x) - - # load weights - if weights == 'imagenet': - print('K.image_dim_ordering:', K.image_dim_ordering()) - if K.image_dim_ordering() == 'th': - if include_top: - weights_path = get_file('vgg19_weights_th_dim_ordering_th_kernels.h5', - TH_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('vgg19_weights_th_dim_ordering_th_kernels_notop.h5', - TH_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) - if K.backend() == 'tensorflow': - warnings.warn('You are using the TensorFlow backend, yet you ' - 'are using the Theano ' - 'image dimension ordering convention ' - '(`image_dim_ordering="th"`). ' - 'For best performance, set ' - '`image_dim_ordering="tf"` in ' - 'your Keras config ' - 'at ~/.keras/keras.json.') - convert_all_kernels_in_model(model) - else: - if include_top: - weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) - if K.backend() == 'theano': - convert_all_kernels_in_model(model) - return model - - -if __name__ == '__main__': - model = VGG19(include_top=True, weights='imagenet') - - img_path = 'cat.jpg' - img = image.load_img(img_path, target_size=(224, 224)) - x = image.img_to_array(img) - x = np.expand_dims(x, axis=0) - x = preprocess_input(x) - print('Input image shape:', x.shape) - - preds = model.predict(x) - print('Predicted:', decode_predictions(preds)) -ann = ANN(2, 10, 1) -%timeit - n 1 - r 1 ann.train(zip(X, y), iterations=2) -plot_decision_boundary(ann) -plt.title("Our next model with 10 hidden units") -ann = ANN(2, 10, 1) -%timeit - n 1 - r 1 ann.train(zip(X, y), iterations=100) -plot_decision_boundary(ann) -plt.title("Our model with 10 hidden units and 100 iterations") -""" This file contains different utility functions that are not connected -in anyway to the networks presented in the tutorials, but rather help in -processing the outputs into a more understandable way. - -For example ``tile_raster_images`` helps in generating a easy to grasp -image from a set of samples or weights. -""" - - -import numpy -from six.moves import xrange - - -def scale_to_unit_interval(ndar, eps=1e-8): - """ Scales all values in the ndarray ndar to be between 0 and 1 """ - ndar = ndar.copy() - ndar -= ndar.min() - ndar *= 1.0 / (ndar.max() + eps) - return ndar - - -def tile_raster_images(X, img_shape, tile_shape, tile_spacing=(0, 0), - scale_rows_to_unit_interval=True, - output_pixel_vals=True): - """ - Transform an array with one flattened image per row, into an array in - which images are reshaped and layed out like tiles on a floor. - - This function is useful for visualizing datasets whose rows are images, - and also columns of matrices for transforming those rows - (such as the first layer of a neural net). - - :type X: a 2-D ndarray or a tuple of 4 channels, elements of which can - be 2-D ndarrays or None; - :param X: a 2-D array in which every row is a flattened image. - - :type img_shape: tuple; (height, width) - :param img_shape: the original shape of each image - - :type tile_shape: tuple; (rows, cols) - :param tile_shape: the number of images to tile (rows, cols) - - :param output_pixel_vals: if output should be pixel values (i.e. int8 - values) or floats - - :param scale_rows_to_unit_interval: if the values need to be scaled before - being plotted to [0,1] or not - - - :returns: array suitable for viewing as an image. - (See:`Image.fromarray`.) - :rtype: a 2-d array with same dtype as X. - - """ - - assert len(img_shape) == 2 - assert len(tile_shape) == 2 - assert len(tile_spacing) == 2 - - # The expression below can be re-written in a more C style as - # follows : - # - # out_shape = [0,0] - # out_shape[0] = (img_shape[0]+tile_spacing[0])*tile_shape[0] - - # tile_spacing[0] - # out_shape[1] = (img_shape[1]+tile_spacing[1])*tile_shape[1] - - # tile_spacing[1] - out_shape = [ - (ishp + tsp) * tshp - tsp - for ishp, tshp, tsp in zip(img_shape, tile_shape, tile_spacing) - ] - - if isinstance(X, tuple): - assert len(X) == 4 - # Create an output numpy ndarray to store the image - if output_pixel_vals: - out_array = numpy.zeros((out_shape[0], out_shape[1], 4), - dtype='uint8') - else: - out_array = numpy.zeros((out_shape[0], out_shape[1], 4), - dtype=X.dtype) - - # colors default to 0, alpha defaults to 1 (opaque) - if output_pixel_vals: - channel_defaults = [0, 0, 0, 255] - else: - channel_defaults = [0., 0., 0., 1.] - - for i in xrange(4): - if X[i] is None: - # if channel is None, fill it with zeros of the correct - # dtype - dt = out_array.dtype - if output_pixel_vals: - dt = 'uint8' - out_array[:, :, i] = numpy.zeros( - out_shape, - dtype=dt - ) + channel_defaults[i] - else: - # use a recurrent call to compute the channel and store it - # in the output - out_array[:, :, i] = tile_raster_images( - X[i], img_shape, tile_shape, tile_spacing, - scale_rows_to_unit_interval, output_pixel_vals) - return out_array - - else: - # if we are dealing with only one channel - H, W = img_shape - Hs, Ws = tile_spacing - - # generate a matrix to store the output - dt = X.dtype - if output_pixel_vals: - dt = 'uint8' - out_array = numpy.zeros(out_shape, dtype=dt) - - for tile_row in xrange(tile_shape[0]): - for tile_col in xrange(tile_shape[1]): - if tile_row * tile_shape[1] + tile_col < X.shape[0]: - this_x = X[tile_row * tile_shape[1] + tile_col] - if scale_rows_to_unit_interval: - # if we should scale values to be between 0 and 1 - # do this by calling the `scale_to_unit_interval` - # function - this_img = scale_to_unit_interval( - this_x.reshape(img_shape)) - else: - this_img = this_x.reshape(img_shape) - # add the slice to the corresponding position in the - # output array - c = 1 - if output_pixel_vals: - c = 255 - out_array[ - tile_row * (H + Hs): tile_row * (H + Hs) + H, - tile_col * (W + Ws): tile_col * (W + Ws) + W - ] = this_img * c - return out_array -import cPickle as pkl -import time - -import numpy -import theano -from theano import config -import theano.tensor as T -from theano.tensor.nnet import categorical_crossentropy - -from fuel.datasets import TextFile -from fuel.streams import DataStream -from fuel.schemes import ConstantScheme -from fuel.transformers import Batch, Padding - - -# These files can be downloaded from -# http://www-etud.iro.umontreal.ca/~brakelp/train.txt.gz -# http://www-etud.iro.umontreal.ca/~brakelp/dictionary.pkl -# don't forget to change the paths and gunzip train.txt.gz -TRAIN_FILE = '/u/brakelp/temp/traindata.txt' -VAL_FILE = '/u/brakelp/temp/valdata.txt' -DICT_FILE = '/u/brakelp/temp/dictionary.pkl' - - -def sequence_categorical_crossentropy(prediction, targets, mask): - prediction_flat = prediction.reshape(((prediction.shape[0] * - prediction.shape[1]), - prediction.shape[2]), ndim=2) - targets_flat = targets.flatten() - mask_flat = mask.flatten() - ce = categorical_crossentropy(prediction_flat, targets_flat) - return T.sum(ce * mask_flat) - - -def gauss_weight(ndim_in, ndim_out=None, sd=.005): - if ndim_out is None: - ndim_out = ndim_in - W = numpy.random.randn(ndim_in, ndim_out) * sd - return numpy.asarray(W, dtype=config.floatX) - - -class LogisticRegression(object): - """Multi-class Logistic Regression Class - - The logistic regression is fully described by a weight matrix :math:`W` - and bias vector :math:`b`. Classification is done by projecting data - points onto a set of hyperplanes, the distance to which is used to - determine a class membership probability. - """ - - def __init__(self, input, n_in, n_out): - """ Initialize the parameters of the logistic regression - - :type input: theano.tensor.TensorType - :param input: symbolic variable that describes the input of the - architecture (one minibatch) - - :type n_in: int - :param n_in: number of input units, the dimension of the space in - which the datapoints lie - - :type n_out: int - :param n_out: number of output units, the dimension of the space in - which the labels lie - - """ - - # initialize with 0 the weights W as a matrix of shape (n_in, n_out) - self.W = theano.shared(value=numpy.zeros((n_in, n_out), - dtype=theano.config.floatX), - name='W', borrow=True) - # initialize the baises b as a vector of n_out 0s - self.b = theano.shared(value=numpy.zeros((n_out,), - dtype=theano.config.floatX), - name='b', borrow=True) - - # compute vector of class-membership probabilities in symbolic form - energy = T.dot(input, self.W) + self.b - energy_exp = T.exp(energy - T.max(energy, 2)[:, :, None]) - pmf = energy_exp / energy_exp.sum(2)[:, :, None] - self.p_y_given_x = pmf - - # compute prediction as class whose probability is maximal in - # symbolic form - self.y_pred = T.argmax(self.p_y_given_x, axis=1) - - # parameters of the model - self.params = [self.W, self.b] - - -def index_dot(indices, w): - return w[indices.flatten()] - - -class LstmLayer: - - def __init__(self, rng, input, mask, n_in, n_h): - - # Init params - self.W_i = theano.shared(gauss_weight(n_in, n_h), 'W_i', borrow=True) - self.W_f = theano.shared(gauss_weight(n_in, n_h), 'W_f', borrow=True) - self.W_c = theano.shared(gauss_weight(n_in, n_h), 'W_c', borrow=True) - self.W_o = theano.shared(gauss_weight(n_in, n_h), 'W_o', borrow=True) - - self.U_i = theano.shared(gauss_weight(n_h), 'U_i', borrow=True) - self.U_f = theano.shared(gauss_weight(n_h), 'U_f', borrow=True) - self.U_c = theano.shared(gauss_weight(n_h), 'U_c', borrow=True) - self.U_o = theano.shared(gauss_weight(n_h), 'U_o', borrow=True) - - self.b_i = theano.shared(numpy.zeros((n_h,), dtype=config.floatX), - 'b_i', borrow=True) - self.b_f = theano.shared(numpy.zeros((n_h,), dtype=config.floatX), - 'b_f', borrow=True) - self.b_c = theano.shared(numpy.zeros((n_h,), dtype=config.floatX), - 'b_c', borrow=True) - self.b_o = theano.shared(numpy.zeros((n_h,), dtype=config.floatX), - 'b_o', borrow=True) - - self.params = [self.W_i, self.W_f, self.W_c, self.W_o, - self.U_i, self.U_f, self.U_c, self.U_o, - self.b_i, self.b_f, self.b_c, self.b_o] - - outputs_info = [T.zeros((input.shape[1], n_h)), - T.zeros((input.shape[1], n_h))] - - rval, updates = theano.scan(self._step, - sequences=[mask, input], - outputs_info=outputs_info) - - # self.output is in the format (batchsize, n_h) - self.output = rval[0] - - def _step(self, m_, x_, h_, c_): - - i_preact = (index_dot(x_, self.W_i) + - T.dot(h_, self.U_i) + self.b_i) - i = T.nnet.sigmoid(i_preact) - - f_preact = (index_dot(x_, self.W_f) + - T.dot(h_, self.U_f) + self.b_f) - f = T.nnet.sigmoid(f_preact) - - o_preact = (index_dot(x_, self.W_o) + - T.dot(h_, self.U_o) + self.b_o) - o = T.nnet.sigmoid(o_preact) - - c_preact = (index_dot(x_, self.W_c) + - T.dot(h_, self.U_c) + self.b_c) - c = T.tanh(c_preact) - - c = f * c_ + i * c - c = m_[:, None] * c + (1. - m_)[:, None] * c_ - - h = o * T.tanh(c) - h = m_[:, None] * h + (1. - m_)[:, None] * h_ - - return h, c - - -def train_model(batch_size=100, n_h=50, n_epochs=40): - - # Load the datasets with Fuel - dictionary = pkl.load(open(DICT_FILE, 'r')) - dictionary['~'] = len(dictionary) - reverse_mapping = dict((j, i) for i, j in dictionary.items()) - - print("Loading the data") - train = TextFile(files=[TRAIN_FILE], - dictionary=dictionary, - unk_token='~', - level='character', - preprocess=str.lower, - bos_token=None, - eos_token=None) - - train_stream = DataStream.default_stream(train) - - # organize data in batches and pad shorter sequences with zeros - train_stream = Batch(train_stream, - iteration_scheme=ConstantScheme(batch_size)) - train_stream = Padding(train_stream) - - # idem dito for the validation text - val = TextFile(files=[VAL_FILE], - dictionary=dictionary, - unk_token='~', - level='character', - preprocess=str.lower, - bos_token=None, - eos_token=None) - - val_stream = DataStream.default_stream(val) - - # organize data in batches and pad shorter sequences with zeros - val_stream = Batch(val_stream, - iteration_scheme=ConstantScheme(batch_size)) - val_stream = Padding(val_stream) - - print('Building model') - - # Set the random number generator' seeds for consistency - rng = numpy.random.RandomState(12345) - - x = T.lmatrix('x') - mask = T.matrix('mask') - - # Construct the LSTM layer - recurrent_layer = LstmLayer(rng=rng, input=x, mask=mask, n_in=111, n_h=n_h) - - logreg_layer = LogisticRegression(input=recurrent_layer.output[:-1], - n_in=n_h, n_out=111) - - cost = sequence_categorical_crossentropy(logreg_layer.p_y_given_x, - x[1:], - mask[1:]) / batch_size - - # create a list of all model parameters to be fit by gradient descent - params = logreg_layer.params + recurrent_layer.params - - # create a list of gradients for all model parameters - grads = T.grad(cost, params) - - # update_model is a function that updates the model parameters by - # SGD Since this model has many parameters, it would be tedious to - # manually create an update rule for each model parameter. We thus - # create the updates list by automatically looping over all - # (params[i], grads[i]) pairs. - learning_rate = 0.1 - updates = [ - (param_i, param_i - learning_rate * grad_i) - for param_i, grad_i in zip(params, grads) - ] - - update_model = theano.function([x, mask], cost, updates=updates) - - evaluate_model = theano.function([x, mask], cost) - - # Define and compile a function for generating a sequence step by step. - x_t = T.iscalar() - h_p = T.vector() - c_p = T.vector() - h_t, c_t = recurrent_layer._step(T.ones(1), x_t, h_p, c_p) - energy = T.dot(h_t, logreg_layer.W) + logreg_layer.b - - energy_exp = T.exp(energy - T.max(energy, 1)[:, None]) - - output = energy_exp / energy_exp.sum(1)[:, None] - single_step = theano.function([x_t, h_p, c_p], [output, h_t, c_t]) - - start_time = time.clock() - - iteration = 0 - - for epoch in range(n_epochs): - print 'epoch:', epoch - - for x_, mask_ in train_stream.get_epoch_iterator(): - iteration += 1 - - cross_entropy = update_model(x_.T, mask_.T) - - # Generate some text after each 20 minibatches - if iteration % 40 == 0: - try: - prediction = numpy.ones(111, dtype=config.floatX) / 111.0 - h_p = numpy.zeros((n_h,), dtype=config.floatX) - c_p = numpy.zeros((n_h,), dtype=config.floatX) - initial = 'the meaning of life is ' - sentence = initial - for char in initial: - x_t = dictionary[char] - prediction, h_p, c_p = single_step(x_t, h_p.flatten(), - c_p.flatten()) - sample = numpy.random.multinomial(1, prediction.flatten()) - for i in range(450): - x_t = numpy.argmax(sample) - prediction, h_p, c_p = single_step(x_t, h_p.flatten(), - c_p.flatten()) - sentence += reverse_mapping[x_t] - sample = numpy.random.multinomial( - 1, prediction.flatten()) - print 'LSTM: "' + sentence + '"' - except ValueError: - print 'Something went wrong during sentence generation.' - - if iteration % 40 == 0: - print 'epoch:', epoch, ' minibatch:', iteration - val_scores = [] - for x_val, mask_val in val_stream.get_epoch_iterator(): - val_scores.append(evaluate_model(x_val.T, mask_val.T)) - print 'Average validation CE per sentence:', numpy.mean(val_scores) - - end_time = time.clock() - print('Optimization complete.') - print('The code ran for %.2fm' % ((end_time - start_time) / 60.)) - - -if __name__ == '__main__': - train_model() -"""This file is only here to speed up the execution of notebooks. - -It contains a subset of the code defined in simple_rnn.ipynb and -lstm_text.ipynb, in particular the code compiling Theano function. -Executing this script first will populate the cache of compiled C code, -which will make subsequent compilations faster. - -The use case is to run this script in the background when a demo VM -such as the one for NVIDIA's qwikLABS, so that the compilation phase -started from the notebooks is faster. - -""" -import numpy - -import theano -import theano.tensor as T - -from theano import config -from theano.tensor.nnet import categorical_crossentropy - - -floatX = theano.config.floatX - - -# simple_rnn.ipynb - -class SimpleRNN(object): - def __init__(self, input_dim, recurrent_dim): - w_xh = numpy.random.normal(0, .01, (input_dim, recurrent_dim)) - w_hh = numpy.random.normal(0, .02, (recurrent_dim, recurrent_dim)) - self.w_xh = theano.shared(numpy.asarray( - w_xh, dtype=floatX), name='w_xh') - self.w_hh = theano.shared(numpy.asarray( - w_hh, dtype=floatX), name='w_hh') - self.b_h = theano.shared(numpy.zeros( - (recurrent_dim,), dtype=floatX), name='b_h') - self.parameters = [self.w_xh, self.w_hh, self.b_h] - - def _step(self, input_t, previous): - return T.tanh(T.dot(previous, self.w_hh) + input_t) - - def __call__(self, x): - x_w_xh = T.dot(x, self.w_xh) + self.b_h - result, updates = theano.scan(self._step, - sequences=[x_w_xh], - outputs_info=[T.zeros_like(self.b_h)]) - return result - - -w_ho_np = numpy.random.normal(0, .01, (15, 1)) -w_ho = theano.shared(numpy.asarray(w_ho_np, dtype=floatX), name='w_ho') -b_o = theano.shared(numpy.zeros((1,), dtype=floatX), name='b_o') - -x = T.matrix('x') -my_rnn = SimpleRNN(1, 15) -hidden = my_rnn(x) -prediction = T.dot(hidden, w_ho) + b_o -parameters = my_rnn.parameters + [w_ho, b_o] -l2 = sum((p**2).sum() for p in parameters) -mse = T.mean((prediction[:-1] - x[1:])**2) -cost = mse + .0001 * l2 -gradient = T.grad(cost, wrt=parameters) - -lr = .3 -updates = [(par, par - lr * gra) for par, gra in zip(parameters, gradient)] -update_model = theano.function([x], cost, updates=updates) -get_cost = theano.function([x], mse) -predict = theano.function([x], prediction) -get_hidden = theano.function([x], hidden) -get_gradient = theano.function([x], gradient) - -predict = theano.function([x], prediction) - -# Generating sequences - -x_t = T.vector() -h_p = T.vector() -preactivation = T.dot(x_t, my_rnn.w_xh) + my_rnn.b_h -h_t = my_rnn._step(preactivation, h_p) -o_t = T.dot(h_t, w_ho) + b_o - -single_step = theano.function([x_t, h_p], [o_t, h_t]) - -# lstm_text.ipynb - - -def gauss_weight(rng, ndim_in, ndim_out=None, sd=.005): - if ndim_out is None: - ndim_out = ndim_in - W = rng.randn(ndim_in, ndim_out) * sd - return numpy.asarray(W, dtype=config.floatX) - - -def index_dot(indices, w): - return w[indices.flatten()] - - -class LstmLayer: - - def __init__(self, rng, input, mask, n_in, n_h): - - # Init params - self.W_i = theano.shared(gauss_weight( - rng, n_in, n_h), 'W_i', borrow=True) - self.W_f = theano.shared(gauss_weight( - rng, n_in, n_h), 'W_f', borrow=True) - self.W_c = theano.shared(gauss_weight( - rng, n_in, n_h), 'W_c', borrow=True) - self.W_o = theano.shared(gauss_weight( - rng, n_in, n_h), 'W_o', borrow=True) - - self.U_i = theano.shared(gauss_weight(rng, n_h), 'U_i', borrow=True) - self.U_f = theano.shared(gauss_weight(rng, n_h), 'U_f', borrow=True) - self.U_c = theano.shared(gauss_weight(rng, n_h), 'U_c', borrow=True) - self.U_o = theano.shared(gauss_weight(rng, n_h), 'U_o', borrow=True) - - self.b_i = theano.shared(numpy.zeros((n_h,), dtype=config.floatX), - 'b_i', borrow=True) - self.b_f = theano.shared(numpy.zeros((n_h,), dtype=config.floatX), - 'b_f', borrow=True) - self.b_c = theano.shared(numpy.zeros((n_h,), dtype=config.floatX), - 'b_c', borrow=True) - self.b_o = theano.shared(numpy.zeros((n_h,), dtype=config.floatX), - 'b_o', borrow=True) - - self.params = [self.W_i, self.W_f, self.W_c, self.W_o, - self.U_i, self.U_f, self.U_c, self.U_o, - self.b_i, self.b_f, self.b_c, self.b_o] - - outputs_info = [T.zeros((input.shape[1], n_h)), - T.zeros((input.shape[1], n_h))] - - rval, updates = theano.scan(self._step, - sequences=[mask, input], - outputs_info=outputs_info) - - # self.output is in the format (length, batchsize, n_h) - self.output = rval[0] - - def _step(self, m_, x_, h_, c_): - - i_preact = (index_dot(x_, self.W_i) + - T.dot(h_, self.U_i) + self.b_i) - i = T.nnet.sigmoid(i_preact) - - f_preact = (index_dot(x_, self.W_f) + - T.dot(h_, self.U_f) + self.b_f) - f = T.nnet.sigmoid(f_preact) - - o_preact = (index_dot(x_, self.W_o) + - T.dot(h_, self.U_o) + self.b_o) - o = T.nnet.sigmoid(o_preact) - - c_preact = (index_dot(x_, self.W_c) + - T.dot(h_, self.U_c) + self.b_c) - c = T.tanh(c_preact) - - c = f * c_ + i * c - c = m_[:, None] * c + (1. - m_)[:, None] * c_ - - h = o * T.tanh(c) - h = m_[:, None] * h + (1. - m_)[:, None] * h_ - - return h, c - - -def sequence_categorical_crossentropy(prediction, targets, mask): - prediction_flat = prediction.reshape(((prediction.shape[0] * - prediction.shape[1]), - prediction.shape[2]), ndim=2) - targets_flat = targets.flatten() - mask_flat = mask.flatten() - ce = categorical_crossentropy(prediction_flat, targets_flat) - return T.sum(ce * mask_flat) - - -class LogisticRegression(object): - - def __init__(self, rng, input, n_in, n_out): - - W = gauss_weight(rng, n_in, n_out) - self.W = theano.shared(value=numpy.asarray(W, dtype=theano.config.floatX), - name='W', borrow=True) - # initialize the biases b as a vector of n_out 0s - self.b = theano.shared(value=numpy.zeros((n_out,), - dtype=theano.config.floatX), - name='b', borrow=True) - - # compute vector of class-membership probabilities in symbolic form - energy = T.dot(input, self.W) + self.b - energy_exp = T.exp(energy - T.max(energy, axis=2, keepdims=True)) - pmf = energy_exp / energy_exp.sum(axis=2, keepdims=True) - self.p_y_given_x = pmf - self.params = [self.W, self.b] - - -batch_size = 100 -n_h = 50 - -# The Theano graph -# Set the random number generator' seeds for consistency -rng = numpy.random.RandomState(12345) - -x = T.lmatrix('x') -mask = T.matrix('mask') - -# Construct an LSTM layer and a logistic regression layer -recurrent_layer = LstmLayer(rng=rng, input=x, mask=mask, n_in=111, n_h=n_h) -logreg_layer = LogisticRegression(rng=rng, input=recurrent_layer.output[:-1], - n_in=n_h, n_out=111) - -# define a cost variable to optimize -cost = sequence_categorical_crossentropy(logreg_layer.p_y_given_x, - x[1:], - mask[1:]) / batch_size - -# create a list of all model parameters to be fit by gradient descent -params = logreg_layer.params + recurrent_layer.params - -# create a list of gradients for all model parameters -grads = T.grad(cost, params) - -learning_rate = 0.1 -updates = [ - (param_i, param_i - learning_rate * grad_i) - for param_i, grad_i in zip(params, grads) -] - -update_model = theano.function([x, mask], cost, updates=updates) - -evaluate_model = theano.function([x, mask], cost) - -# Generating Sequences -x_t = T.iscalar() -h_p = T.vector() -c_p = T.vector() -h_t, c_t = recurrent_layer._step(T.ones(1), x_t, h_p, c_p) -energy = T.dot(h_t, logreg_layer.W) + logreg_layer.b - -energy_exp = T.exp(energy - T.max(energy, axis=1, keepdims=True)) - -output = energy_exp / energy_exp.sum(axis=1, keepdims=True) -single_step = theano.function([x_t, h_p, c_p], [output, h_t, c_t]) -import collections -import numpy as np - - -def mackey_glass(sample_len=1000, tau=17, seed=None, n_samples=1): - ''' - mackey_glass(sample_len=1000, tau=17, seed = None, n_samples = 1) -> input - Generate the Mackey Glass time-series. Parameters are: - - sample_len: length of the time-series in timesteps. Default is 1000. - - tau: delay of the MG - system. Commonly used values are tau=17 (mild - chaos) and tau=30 (moderate chaos). Default is 17. - - seed: to seed the random generator, can be used to generate the same - timeseries at each invocation. - - n_samples : number of samples to generate - ''' - delta_t = 10 - history_len = tau * delta_t - # Initial conditions for the history of the system - timeseries = 1.2 - - if seed is not None: - np.random.seed(seed) - - samples = [] - - for _ in range(n_samples): - history = collections.deque(1.2 * np.ones(history_len) + 0.2 * - (np.random.rand(history_len) - 0.5)) - # Preallocate the array for the time-series - inp = np.zeros((sample_len, 1)) - - for timestep in range(sample_len): - for _ in range(delta_t): - xtau = history.popleft() - history.append(timeseries) - timeseries = history[-1] + (0.2 * xtau / (1.0 + xtau ** 10) - - 0.1 * history[-1]) / delta_t - inp[timestep] = timeseries - - # Squash timeseries through tanh - inp = np.tanh(inp - 1) - samples.append(inp) - return samples - - -def mso(sample_len=1000, n_samples=1): - ''' - mso(sample_len=1000, n_samples = 1) -> input - Generate the Multiple Sinewave Oscillator time-series, a sum of two sines - with incommensurable periods. Parameters are: - - sample_len: length of the time-series in timesteps - - n_samples: number of samples to generate - ''' - signals = [] - for _ in range(n_samples): - phase = np.random.rand() - x = np.atleast_2d(np.arange(sample_len)).T - signals.append(np.sin(0.2 * x + phase) + np.sin(0.311 * x + phase)) - return signals - - -def lorentz(sample_len=1000, sigma=10, rho=28, beta=8 / 3, step=0.01): - """This function generates a Lorentz time series of length sample_len, - with standard parameters sigma, rho and beta. - """ - - x = np.zeros([sample_len]) - y = np.zeros([sample_len]) - z = np.zeros([sample_len]) - - # Initial conditions taken from 'Chaos and Time Series Analysis', J. Sprott - x[0] = 0 - y[0] = -0.01 - z[0] = 9 - - for t in range(sample_len - 1): - x[t + 1] = x[t] + sigma * (y[t] - x[t]) * step - y[t + 1] = y[t] + (x[t] * (rho - z[t]) - y[t]) * step - z[t + 1] = z[t] + (x[t] * y[t] - beta * z[t]) * step - - x.shape += (1,) - y.shape += (1,) - z.shape += (1,) - - return np.concatenate((x, y, z), axis=1) -import theano -import theano.tensor as T -import numpy as np - -coefficients = T.vector("coefficients") -x = T.scalar("x") -max_coefficients_supported = 10000 - - -def step(coeff, power, prior_value, free_var): - return prior_value + (coeff * (free_var ** power)) - - -# Generate the components of the polynomial -full_range = T.arange(max_coefficients_supported) -outputs_info = np.zeros((), dtype=theano.config.floatX) - -components, updates = theano.scan(fn=step, - sequences=[coefficients, full_range], - outputs_info=outputs_info, - non_sequences=x) - -polynomial = components[-1] -calculate_polynomial = theano.function(inputs=[coefficients, x], - outputs=polynomial, - updates=updates) - -test_coeff = np.asarray([1, 0, 2], dtype=theano.config.floatX) -print(calculate_polynomial(test_coeff, 3)) -import theano -import theano.tensor as T -import numpy as np - -probabilities = T.vector() -nb_samples = T.iscalar() - -rng = T.shared_randomstreams.RandomStreams(1234) - - -def sample_from_pvect(pvect): - """ Provided utility function: given a symbolic vector of - probabilities (which MUST sum to 1), sample one element - and return its index. - """ - onehot_sample = rng.multinomial(n=1, pvals=pvect) - sample = onehot_sample.argmax() - return sample - - -def set_p_to_zero(pvect, i): - """ Provided utility function: given a symbolic vector of - probabilities and an index 'i', set the probability of the - i-th element to 0 and renormalize the probabilities so they - sum to 1. - """ - new_pvect = T.set_subtensor(pvect[i], 0.) - new_pvect = new_pvect / new_pvect.sum() - return new_pvect - - -def step(p): - sample = sample_from_pvect(p) - new_p = set_p_to_zero(p, sample) - return new_p, sample - - -output, updates = theano.scan(fn=step, - outputs_info=[probabilities, None], - n_steps=nb_samples) - -modified_probabilities, samples = output - -f = theano.function(inputs=[probabilities, nb_samples], - outputs=[samples], - updates=updates) - -# Testing the function -test_probs = np.asarray([0.6, 0.3, 0.1], dtype=theano.config.floatX) -for i in range(10): - print(f(test_probs, 2)) -from PIL import Image -import face_recognition -import os -print("h") - - -def find_and_save_face(web_file, face_file): - # Load the jpg file into a numpy array - image = face_recognition.load_image_file(web_file) - print(image.dtype) - # Find all the faces in the image - face_locations = face_recognition.face_locations(image) - - print("I found {} face(s) in this photograph.".format(len(face_locations))) - - for face_location in face_locations: - - # Print the location of each face in this image - top, right, bottom, left = face_location - print("A face is located at pixel location Top: {}, Left: {}, Bottom: {}, Right: {}".format( - top, left, bottom, right)) - - # You can access the actual face itself like this: - face_image = image[top:bottom, left:right] - pil_image = Image.fromarray(face_image) - pil_image.save(face_file) - - -print("h") -list = os.listdir("web_image/") -print(list) - -for image in list: - id_tag = image.find(".") - name = image[0:id_tag] - print(name) - - web_file = "./web_image/" + image - face_file = "./face_image/"+name+".jpg" - - im = Image.open("./web_image/"+image) - try: - find_and_save_face(web_file, face_file) - except: - print("fail") -from PIL import Image -import os - -list = os.listdir("./face_image") -print(list) - -for image in list: - id_tag = image.find(".") - name = image[0:id_tag] - print(name) - - im = Image.open("./face_image/"+image) - out = im.resize((128, 128)) - # out.show() - out.save("./resize_image/"+name+".jpg") -''' -A Convolutional Network implementation example using TensorFlow library. -This example is using the MNIST database of handwritten digits -(http://yann.lecun.com/exdb/mnist/) - -Author: Aymeric Damien -Project: https://github.com/aymericdamien/TensorFlow-Examples/ -''' - -from __future__ import print_function -import os -import matplotlib.pyplot as plt -import tensorflow as tf -from PIL import Image -import numpy -import tensorflow as tf - -# Import MNIST data -from tensorflow.examples.tutorials.mnist import input_data - -# Parameters -learning_rate = 0.001 -training_iters = 3000 -batch_size = 10 -display_step = 2 - -# Network Parameters -n_input = 128*128 # MNIST data input (img shape: 28*28) -n_classes = 10 # MNIST total classes (0-9 digits) -dropout = 0.75 # Dropout, probability to keep units - -# tf Graph input -x = tf.placeholder(tf.float32, [None, 128, 128, 3]) -y = tf.placeholder(tf.float32, [None, n_classes]) -keep_prob = tf.placeholder(tf.float32) # dropout (keep probability) - - -# Create some wrappers for simplicity -def conv2d(x, W, b, strides=1): - # Conv2D wrapper, with bias and relu activation - x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') - x = tf.nn.bias_add(x, b) - return tf.nn.relu(x) - - -def maxpool2d(x, k=2): - # MaxPool2D wrapper - return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], - padding='SAME') - - -# Create model -def conv_net(x, weights, biases, dropout): - # Reshape input picture - x = tf.reshape(x, shape=[-1, 128, 128, 3]) - - # Convolution Layer - conv1 = conv2d(x, weights['wc1'], biases['bc1']) - print(conv1.shape) - # Max Pooling (down-sampling) - conv1 = maxpool2d(conv1, k=2) - print(conv1.shape) - # Convolution Layer - conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) - print(conv2.shape) - # Max Pooling (down-sampling) - conv2 = maxpool2d(conv2, k=2) - print(conv2.shape) - # Fully connected layer - # Reshape conv2 output to fit fully connected layer input - fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) - fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) - fc1 = tf.nn.relu(fc1) - # Apply Dropout - fc1 = tf.nn.dropout(fc1, dropout) - - # Output, class prediction - out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) - return out - - -# Store layers weight & bias -weights = { - # 5x5 conv, 1 input, 32 outputs - 'wc1': tf.Variable(tf.random_normal([5, 5, 3, 24])), - # 5x5 conv, 32 inputs, 64 outputs - 'wc2': tf.Variable(tf.random_normal([5, 5, 24, 96])), - # fully connected, 7*7*64 inputs, 1024 outputs - 'wd1': tf.Variable(tf.random_normal([32*32*96, 1024])), - # 1024 inputs, 10 outputs (class prediction) - 'out': tf.Variable(tf.random_normal([1024, n_classes])) -} - -biases = { - 'bc1': tf.Variable(tf.random_normal([24])), - 'bc2': tf.Variable(tf.random_normal([96])), - 'bd1': tf.Variable(tf.random_normal([1024])), - 'out': tf.Variable(tf.random_normal([n_classes])) -} - -# Construct model -pred = conv_net(x, weights, biases, keep_prob) -pred_result = tf.argmax(pred, 1) -# Define loss and optimizer -cost = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) -optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) - -# Evaluate model -correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) -accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) - -# Initializing the variables -init = tf.global_variables_initializer() -saver = tf.train.Saver() - -# Launch the graph -with tf.Session() as sess: - saver.restore(sess, "./model/model.ckpt") - step = 1 - # Keep training until reach max iterations - list = os.listdir("./test_resize/") - print(list) - print(len(list)) - - for batch_id in range(0, 2): - batch = list[batch_id * 10:batch_id * 10 + 10] - batch_xs = [] - batch_ys = [] - for image in batch: - id_tag = image.find("-") - score = image[0:id_tag] - # print(score) - img = Image.open("./test_resize/" + image) - img_ndarray = numpy.asarray(img, dtype='float32') - img_ndarray = numpy.reshape(img_ndarray, [128, 128, 3]) - # print(img_ndarray.shape) - batch_x = img_ndarray - batch_xs.append(batch_x) - - # print(batch_ys) - batch_xs = numpy.asarray(batch_xs) - print(batch_xs.shape) - - # Run optimization op (backprop) - pred_result_test = sess.run(pred_result, feed_dict={ - x: batch_xs, keep_prob: 1.}) - print(pred_result_test) - print("Test Finished!") - saver.save(sess, "./model/model.ckpt") -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import numpy -from PIL import Image -import os - -list = os.listdir("./resize_image/") -print(list) -print(len(list)) -for batch_id in range(1, 10): - batch = list[batch_id * 10:batch_id * 10 + 10] - batch_xs = [] - batch_ys = [] - for image in batch: - id_tag = image.find("-") - score = image[0:id_tag] - # print(score) - img = Image.open("./resize_image/" + image) - img_ndarray = numpy.asarray(img, dtype='float32') - img_ndarray = numpy.reshape(img_ndarray, [128, 128, 3]) - # print(img_ndarray.shape) - batch_x = img_ndarray - batch_xs.append(batch_x) - # print(batch_xs) - batch_y = numpy.asarray([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) - # print(type(score)) - batch_y[int(score) - 1] = 1 - # print(batch_y) - batch_y = numpy.reshape(batch_y, [10, ]) - batch_ys.append(batch_y) - # print(batch_ys) - batch_xs = numpy.asarray(batch_xs) - print(batch_xs.shape) - batch_ys = numpy.asarray(batch_ys) - print(batch_ys.shape) -from PIL import Image -import face_recognition -import os - - -def find_and_save_face(web_file, face_file): - # Load the jpg file into a numpy array - image = face_recognition.load_image_file(web_file) - print(image.dtype) - # Find all the faces in the image - face_locations = face_recognition.face_locations(image) - - print("I found {} face(s) in this photograph.".format(len(face_locations))) - - for face_location in face_locations: - - # Print the location of each face in this image - top, right, bottom, left = face_location - print("A face is located at pixel location Top: {}, Left: {}, Bottom: {}, Right: {}".format( - top, left, bottom, right)) - - # You can access the actual face itself like this: - face_image = image[top:bottom, left:right] - pil_image = Image.fromarray(face_image) - pil_image.save(face_file) - - -list = os.listdir("./test_web/") -print(list) - -for image in list: - id_tag = image.find(".") - name = image[0:id_tag] - print(name) - - web_file = "./test_web/" + image - face_file = "./test_face/"+name+".jpg" - try: - find_and_save_face(web_file, face_file) - except: - print("fail") -from PIL import Image -import os - -list = os.listdir("./test_face") -print(list) - -for image in list: - name_len = len(image) - name = image[0:name_len-3] - print(name) - im = Image.open("./test_face/"+image) - out = im.resize((128, 128)) - # out.show() - out.save("./test_resize/"+name+"jpg") -''' -A Convolutional Network implementation example using TensorFlow library. -This example is using the MNIST database of handwritten digits -(http://yann.lecun.com/exdb/mnist/) - -Author: Aymeric Damien -Project: https://github.com/aymericdamien/TensorFlow-Examples/ -''' - -from __future__ import print_function -import os -import matplotlib.pyplot as plt -import tensorflow as tf -from PIL import Image -import numpy -import tensorflow as tf - -# Import MNIST data -from tensorflow.examples.tutorials.mnist import input_data - -# Parameters -learning_rate = 0.001 -training_iters = 3000 -batch_size = 10 -display_step = 3 - -# Network Parameters -n_input = 128*128 # MNIST data input (img shape: 128*128 ) -n_classes = 10 # MNIST total classes (0-9 digits) -dropout = 0.75 # Dropout, probability to keep units - -# tf Graph input -x = tf.placeholder(tf.float32, [None, 128, 128, 3]) -y = tf.placeholder(tf.float32, [None, n_classes]) -keep_prob = tf.placeholder(tf.float32) # dropout (keep probability) - - -# Create some wrappers for simplicity -def conv2d(x, W, b, strides=1): - # Conv2D wrapper, with bias and relu activation - x = tf.nn.conv2d(x, W, strides=[1, strides, strides, 1], padding='SAME') - x = tf.nn.bias_add(x, b) - return tf.nn.relu(x) - - -def maxpool2d(x, k=2): - # MaxPool2D wrapper - return tf.nn.max_pool(x, ksize=[1, k, k, 1], strides=[1, k, k, 1], - padding='SAME') - - -# Create model -def conv_net(x, weights, biases, dropout): - # Reshape input picture - x = tf.reshape(x, shape=[-1, 128, 128, 3]) - - # Convolution Layer - conv1 = conv2d(x, weights['wc1'], biases['bc1']) - print(conv1.shape) - # Max Pooling (down-sampling) - conv1 = maxpool2d(conv1, k=2) - print(conv1.shape) - # Convolution Layer - conv2 = conv2d(conv1, weights['wc2'], biases['bc2']) - print(conv2.shape) - # Max Pooling (down-sampling) - conv2 = maxpool2d(conv2, k=2) - print(conv2.shape) - # Fully connected layer - # Reshape conv2 output to fit fully connected layer input - fc1 = tf.reshape(conv2, [-1, weights['wd1'].get_shape().as_list()[0]]) - fc1 = tf.add(tf.matmul(fc1, weights['wd1']), biases['bd1']) - fc1 = tf.nn.relu(fc1) - # Apply Dropout - fc1 = tf.nn.dropout(fc1, dropout) - - # Output, class prediction - out = tf.add(tf.matmul(fc1, weights['out']), biases['out']) - return out - - -# Store layers weight & bias -weights = { - # 5x5 conv, 3 input, 24 outputs - 'wc1': tf.Variable(tf.random_normal([5, 5, 3, 24])), - # 5x5 conv, 24 inputs, 96 outputs - 'wc2': tf.Variable(tf.random_normal([5, 5, 24, 96])), - # fully connected, 32*32*96 inputs, 1024 outputs - 'wd1': tf.Variable(tf.random_normal([32*32*96, 1024])), - # 1024 inputs, 10 outputs (class prediction) - 'out': tf.Variable(tf.random_normal([1024, n_classes])) -} - -biases = { - 'bc1': tf.Variable(tf.random_normal([24])), - 'bc2': tf.Variable(tf.random_normal([96])), - 'bd1': tf.Variable(tf.random_normal([1024])), - 'out': tf.Variable(tf.random_normal([n_classes])) -} - -# Construct model -pred = conv_net(x, weights, biases, keep_prob) - -# Define loss and optimizer -cost = tf.reduce_mean( - tf.nn.softmax_cross_entropy_with_logits(logits=pred, labels=y)) -optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate).minimize(cost) - -# Evaluate model -correct_pred = tf.equal(tf.argmax(pred, 1), tf.argmax(y, 1)) -accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32)) - -# Initializing the variables -init = tf.global_variables_initializer() -saver = tf.train.Saver() - - -# Launch the graph -with tf.Session() as sess: - sess.run(init) - step = 1 - # Keep training until reach max iterations - list = os.listdir("./resize_image/") - print(list) - print(len(list)) - count = 0 - while count < 10: - count = count+1 - print("count:", count) - for batch_id in range(0, 12): - batch = list[batch_id * 10:batch_id * 10 + 10] - batch_xs = [] - batch_ys = [] - for image in batch: - id_tag = image.find("-") - score = image[0:id_tag] - # print(score) - img = Image.open("./resize_image/" + image) - img_ndarray = numpy.asarray(img, dtype='float32') - img_ndarray = numpy.reshape(img_ndarray, [128, 128, 3]) - # print(img_ndarray.shape) - batch_x = img_ndarray - batch_xs.append(batch_x) - # print(batch_xs) - batch_y = numpy.asarray([0, 0, 0, 0, 0, 0, 0, 0, 0, 0]) - # print(type(score)) - batch_y[int(score) - 1] = 1 - # print(batch_y) - batch_y = numpy.reshape(batch_y, [10, ]) - batch_ys.append(batch_y) - # print(batch_ys) - batch_xs = numpy.asarray(batch_xs) - print(batch_xs.shape) - batch_ys = numpy.asarray(batch_ys) - print(batch_ys.shape) - - sess.run(optimizer, feed_dict={x: batch_xs, y: batch_ys, - keep_prob: dropout}) - if step % display_step == 0: - # Calculate batch loss and accuracy - loss, acc = sess.run([cost, accuracy], feed_dict={x: batch_xs, - y: batch_ys, - keep_prob: 1.}) - print("Iter " + str(step*batch_size) + ", Minibatch Loss= " + - "{:.6f}".format(loss) + ", Training Accuracy= " + - "{:.5f}".format(acc)) - step += 1 - print("Optimization Finished!") - saver.save(sess, "./model/model.ckpt") -import setuptools - -setuptools.setup( - name='hiddenlayer', - # packages = ['hiddenlayer'], - packages=setuptools.find_packages(), - version='0.2', - license="MIT", - description='Neural network graphs and training metrics for PyTorch and TensorFlow', - author='Waleed Abdulla , Phil Ferriere ', - url='https://github.com/waleedka/hiddenlayer', - classifiers=[ - # How mature is this project? Common values are - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable - 'Development Status :: 4 - Beta', - - # Indicate who your project is intended for - 'Intended Audience :: Developers', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Scientific/Engineering :: Visualization', - - # Pick your license as you wish (should match "license" above) - 'License :: OSI Approved :: MIT License', - - # Specify the Python versions you support here. In particular, ensure - # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 3.5', - - 'Operating System :: OS Independent', - ], -) -# Set matplotlib backend to Agg -# *MUST* be done BEFORE importing hiddenlayer or libs that import matplotlib -import hiddenlayer as hl -import numpy as np -import random -import time -import os -import matplotlib -matplotlib.use("Agg") - - -# Create output directory in project root -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -OUTPUT_DIR = os.path.join(ROOT_DIR, "demo_output") -if not os.path.exists(OUTPUT_DIR): - os.makedirs(OUTPUT_DIR) - -# A History object to store metrics -h = hl.History() - -# A Canvas object to draw the metrics -c = hl.Canvas() - -# Simulate a training loop with two metrics: loss and accuracy -loss = 1 -accuracy = 0 -for step in range(1000): - # Fake loss and accuracy - loss -= loss * np.random.uniform(-.09, 0.1) - accuracy += (1 - accuracy) * np.random.uniform(-.09, 0.1) - - # Log metrics and display them at certain intervals - if step % 10 == 0: - # Store metrics in the history object - h.log(step, loss=loss, accuracy=accuracy) - - # Print progress status - h.progress() - - # Less occasionally, save a snapshot of the graphs - if step % 100 == 0: - # Plot the two metrics in one graph - c.draw_plot([h["loss"], h["accuracy"]]) - # Save the canvas - c.save(os.path.join(OUTPUT_DIR, "training_progress.png")) - - # You can also save the history to a file to load and inspect layer - h.save(os.path.join(OUTPUT_DIR, "training_progress.pkl")) - - time.sleep(0.1) -""" -Wrapper for CIFAR-10 dataset and TF model. - -Written by Phil Ferriere - -Loosely based on https://github.com/tensorflow/models/blob/master/tutorials/image/cifar10/cifar10.py -Copyright 2015 The TensorFlow Authors. All Rights Reserved. - -Note: we use the exact same format and folders as in the PyTorch sample - -Licensed under the MIT License -""" - -from __future__ import absolute_import, division, print_function -import os -import sys -import tarfile -import pickle -import tensorflow as tf -import numpy as np -from urllib.request import urlretrieve - -# Global constants describing the CIFAR-10 data set. -IMAGE_SIZE = 32 -NUM_CHANNELS = 3 -NUM_CLASSES = 10 -NUM_TRAIN_SAMPLES = 50000 -NUM_TEST_SAMPLES = 10000 -CIFAR10_URL = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' - - -class CIFAR10(): - """TF data handler for CIFAR-10 dataset and model.""" - - def __init__(self, batch_size=8, data_dir=None): - """CIFAR-10 dataset and TF model constructor. - Args: - batch_size: dataset batch size. - """ - self._train_data, self._train_labels = None, None - self._test_data, self._test_labels = None, None - self._batch_size = batch_size - self.img_size = IMAGE_SIZE - self.num_channels = NUM_CHANNELS - self.num_classes = NUM_CLASSES - self.train_len = NUM_TRAIN_SAMPLES - self.test_len = NUM_TEST_SAMPLES - self.data_dir = data_dir or "./test_data" - self.cifar10_dir = os.path.join(self.data_dir, 'cifar-10-batches-py') - self.cifar10_tarball = os.path.join( - self.data_dir, CIFAR10_URL.split('/')[-1]) - self.maybe_download_and_extract() - - @property - def train_data(self): - if self._train_data is None: - self._load('train') - - return self._train_data - - @property - def train_labels(self): - if self._train_labels is None: - self._load('train') - - return self._train_labels - - @property - def test_data(self): - if self._test_data is None: - self._load('test') - - return self._test_data - - @property - def test_labels(self): - if self._test_labels is None: - self._load('test') - - return self._test_labels - - def _load(self, dataset='train'): - """Load the data in memory. - Args: - dataset: string in ['train', 'test'] - """ - data, labels = None, None - if dataset is 'train': - files = [os.path.join(self.cifar10_dir, 'data_batch_%d' % i) - for i in range(1, 6)] - else: - files = [os.path.join(self.cifar10_dir, 'test_batch')] - - for file in files: - if not os.path.exists(file): - raise FileNotFoundError('Failed to find file: ' + file) - - # Load the data from the batch files - for file in files: - with open(file, 'rb') as f: - cifar10 = pickle.load(f, encoding='latin1') - - if labels is None: - labels = np.array(cifar10['labels']) - else: - labels = np.concatenate((labels, cifar10['labels']), axis=0) - - if data is None: - data = cifar10['data'] - else: - data = np.concatenate((data, cifar10['data']), axis=0) - - # Adapt the format of the data to our convnet - data = np.array(data, dtype=float) / 255.0 - data = data.reshape( - [-1, self.num_channels, self.img_size, self.img_size]) - data = data.transpose([0, 2, 3, 1]) - - # One-hot encode labels (see https://stackoverflow.com/a/42874726) - labels = np.eye(self.num_classes)[np.array(labels).reshape(-1)] - - if dataset is 'train': - self._train_data, self._train_labels = data, labels - else: - self._test_data, self._test_labels = data, labels - - def model(self, inputs, mode='train'): - """Build a simple convnet (BN before ReLU). - Args: - inputs: a tensor of size [batch_size, height, width, channels] - mode: string in ['train', 'test'] - Returns: - the last op containing the predictions - Note: - Best score - Step: 7015 - Epoch: 18/20 - best batch acc: 0.8984 - loss: 1.5656 - Worst score - Step: 7523 - Epoch: 20/20 - best batch acc: 0.7734 - loss: 1.6874 - """ - # Extract features - training = (mode == 'train') - with tf.variable_scope('conv1') as scope: - conv = tf.layers.conv2d(inputs=inputs, filters=16, kernel_size=[ - 3, 3], padding='SAME') - bn = tf.layers.batch_normalization(inputs=conv, training=training) - bn = tf.nn.relu(bn) - conv = tf.layers.conv2d(inputs=bn, filters=16, kernel_size=[ - 3, 3], padding='SAME') - bn = tf.layers.batch_normalization(inputs=conv, training=training) - bn = tf.nn.relu(bn) - pool = tf.layers.max_pooling2d( - bn, pool_size=[2, 2], strides=2, padding='SAME', name=scope.name) - - with tf.variable_scope('conv2') as scope: - conv = tf.layers.conv2d(inputs=pool, filters=32, kernel_size=[ - 3, 3], padding='SAME') - bn = tf.layers.batch_normalization(inputs=conv, training=training) - bn = tf.nn.relu(bn) - conv = tf.layers.conv2d(inputs=bn, filters=32, kernel_size=[ - 3, 3], padding='SAME') - bn = tf.layers.batch_normalization(inputs=conv, training=training) - bn = tf.nn.relu(bn) - pool = tf.layers.max_pooling2d( - bn, pool_size=[2, 2], strides=2, padding='SAME', name=scope.name) - - with tf.variable_scope('conv3') as scope: - conv = tf.layers.conv2d(inputs=pool, filters=32, kernel_size=[ - 3, 3], padding='SAME') - bn = tf.layers.batch_normalization(inputs=conv, training=training) - bn = tf.nn.relu(bn) - conv = tf.layers.conv2d(inputs=bn, filters=32, kernel_size=[ - 3, 3], padding='SAME') - bn = tf.layers.batch_normalization(inputs=conv, training=training) - bn = tf.nn.relu(bn) - pool = tf.layers.max_pooling2d( - bn, pool_size=[2, 2], strides=2, padding='SAME', name=scope.name) - - # Classify - with tf.variable_scope('fc') as scope: - flat = tf.layers.flatten(pool) - fc = tf.layers.dense(inputs=flat, units=32, activation=tf.nn.relu) - softmax = tf.layers.dense( - inputs=fc, units=self.num_classes, activation=tf.nn.softmax) - - return softmax - - def model2(self, inputs, mode='train'): - """Build a simple convnet (ReLU before BN). - Args: - inputs: a tensor of size [batch_size, height, width, channels] - mode: string in ['train', 'test'] - Returns: - the last op containing the predictions - Note: - Best score - Step: 7411 - Epoch: 20/20 - best batch acc: 0.8438 - loss: 1.6347 - Worst score - Step: 7751 - Epoch: 20/20 - best batch acc: 0.8047 - loss: 1.6616 - """ - # Extract features - training = (mode == 'train') - with tf.variable_scope('conv1') as scope: - conv = tf.layers.conv2d(inputs=inputs, filters=16, kernel_size=[ - 3, 3], padding='SAME', activation=tf.nn.relu) - bn = tf.layers.batch_normalization(inputs=conv, training=training) - conv = tf.layers.conv2d(inputs=bn, filters=16, kernel_size=[ - 3, 3], padding='SAME', activation=tf.nn.relu) - bn = tf.layers.batch_normalization(inputs=conv, training=training) - pool = tf.layers.max_pooling2d( - bn, pool_size=[2, 2], strides=2, padding='SAME', name=scope.name) - - with tf.variable_scope('conv2') as scope: - conv = tf.layers.conv2d(inputs=pool, filters=32, kernel_size=[ - 3, 3], padding='SAME', activation=tf.nn.relu) - bn = tf.layers.batch_normalization(inputs=conv, training=training) - conv = tf.layers.conv2d(inputs=bn, filters=32, kernel_size=[ - 3, 3], padding='SAME', activation=tf.nn.relu) - bn = tf.layers.batch_normalization(inputs=conv, training=training) - pool = tf.layers.max_pooling2d( - bn, pool_size=[2, 2], strides=2, padding='SAME', name=scope.name) - - with tf.variable_scope('conv3') as scope: - conv = tf.layers.conv2d(inputs=pool, filters=32, kernel_size=[ - 3, 3], padding='SAME', activation=tf.nn.relu) - bn = tf.layers.batch_normalization(inputs=conv, training=training) - conv = tf.layers.conv2d(inputs=bn, filters=32, kernel_size=[ - 3, 3], padding='SAME', activation=tf.nn.relu) - bn = tf.layers.batch_normalization(inputs=conv, training=training) - pool = tf.layers.max_pooling2d( - bn, pool_size=[2, 2], strides=2, padding='SAME', name=scope.name) - - # Classify - with tf.variable_scope('fc') as scope: - flat = tf.layers.flatten(pool) - fc = tf.layers.dense(inputs=flat, units=32, activation=tf.nn.relu) - softmax = tf.layers.dense( - inputs=fc, units=self.num_classes, activation=tf.nn.softmax) - - return softmax - - def maybe_download_and_extract(self): - """Download and extract the tarball from Alex Krizhevsky's website.""" - if not os.path.exists(self.cifar10_dir): - - if not os.path.exists(self.data_dir): - os.makedirs(self.data_dir) - - def _progress(count, block_size, total_size): - status_msg = '\r>> Downloading {} {:>3}% ' - sys.stdout.write(status_msg.format(self.cifar10_tarball, float( - count * block_size) / total_size * 100.0)) - sys.stdout.flush() - - file_path, _ = urlretrieve( - CIFAR10_URL, self.cifar10_tarball, _progress) - - stat_info = os.stat(file_path) - print('\nSuccessfully downloaded', file_path, - stat_info.st_size, 'bytes.\n') - - tarfile.open(file_path, 'r:gz').extractall(self.data_dir) -# Import methods to expose in the library -from .graph import Graph, Node, build_graph -from .history import History -from .canvas import show_images -from .canvas import Canvas -from .utils import write -from . import transforms -""" -HiddenLayer - -Implementation of the Canvas class to render visualizations. - -Written by Waleed Abdulla -Licensed under the MIT License -""" - -import itertools -import math -import numpy as np -import matplotlib -import matplotlib.pyplot as plt -import IPython.display -from mpl_toolkits.mplot3d import Axes3D -from matplotlib.collections import PolyCollection - - -DEFAULT_THEME = { - "fig_width": 12, # inches - "hist_outline_color": [0, 0, 0.9], - "hist_color": [0.5, 0, 0.9], -} - - -def norm(image): - """Normalize an image to [0, 1] range.""" - min_value = image.min() - max_value = image.max() - if min_value == max_value: - return image - min_value - return (image - min_value) / (max_value - min_value) - - -# TODO: Move inside Canvas and merge with draw_images -def show_images(images, titles=None, cols=5, **kwargs): - """ - images: A list of images. I can be either: - - A list of Numpy arrays. Each array represents an image. - - A list of lists of Numpy arrays. In this case, the images in - the inner lists are concatentated to make one image. - """ - # The images param can be a list or an array - - titles = titles or [""] * len(images) - rows = math.ceil(len(images) / cols) - height_ratio = 1.2 * (rows/cols) * \ - (0.5 if type(images[0]) is not np.ndarray else 1) - plt.figure(figsize=(11, 11 * height_ratio)) - i = 1 - for image, title in zip(images, titles): - plt.subplot(rows, cols, i) - plt.axis("off") - # Is image a list? If so, merge them into one image. - if type(image) is not np.ndarray: - image = [norm(g) for g in image] - image = np.concatenate(image, axis=1) - else: - image = norm(image) - plt.title(title, fontsize=9) - plt.imshow(image, cmap="Greys_r", **kwargs) - i += 1 - plt.tight_layout(h_pad=0, w_pad=0) - - -############################################################################### -# Canvas Class -############################################################################### - -class Canvas(): - - def __init__(self): - self._context = None - self.theme = DEFAULT_THEME - self.figure = None - self.backend = matplotlib.get_backend() - self.drawing_calls = [] - self.theme = DEFAULT_THEME - - def __enter__(self): - self._context = "build" - self.drawing_calls = [] - return self - - def __exit__(self, exc_type, exc_val, exc_tb): - self.render() - - def render(self): - self._context = "run" - # Clear output - if 'inline' in self.backend: - IPython.display.clear_output(wait=True) - self.figure = None - - # Separate the draw_*() calls that generate a grid cell - grid_calls = [] - silent_calls = [] - for c in self.drawing_calls: - if c[0] == "draw_summary": - silent_calls.append(c) - else: - grid_calls.append(c) - - # Header area - # TODO: ideally, compute how much header area we need based on the - # length of text to show there. Right now, we're just using - # a fixed number multiplied by the number of calls. Since there - # is only one silent call, draw_summary(), then the header padding - # is either 0 or 0.1 - head_pad = 0.1 * len(silent_calls) - - width = self.theme['fig_width'] - if not self.figure: - self.figure = plt.figure( - figsize=(width, width/3 * (head_pad + len(grid_calls)))) - self.figure.clear() - - # Divide figure area by number of grid calls - gs = matplotlib.gridspec.GridSpec(len(grid_calls), 1) - - # Call silent calls - for c in silent_calls: - getattr(self, c[0])(*c[1], **c[2]) - - # Call grid methods - for i, c in enumerate(grid_calls): - method = c[0] - # Create an axis for each call - # Save in in self.ax so the drawing function has access to it - self.ax = self.figure.add_subplot(gs[i]) - # Save the GridSpec as well - self.gs = gs[i] - # Call the method - getattr(self, method)(*c[1], **c[2]) - # Cleanup after drawing - self.ax = None - self.gs = None - gs.tight_layout(self.figure, rect=(0, 0, 1, 1-head_pad)) - - # TODO: pause() allows the GUI to render but it's sluggish because it - # only has 0.1 seconds of CPU time at each step. A better solution would be to - # launch a separate process to render the GUI and pipe data to it. - plt.pause(0.1) - plt.show(block=False) - self.drawing_calls = [] - self._context = None - - def __getattribute__(self, name): - if name.startswith("draw_") and self._context != "run": - def wrapper(*args, **kwargs): - self.drawing_calls.append((name, args, kwargs)) - if not self._context: - self.render() - return wrapper - else: - return object.__getattribute__(self, name) - - def save(self, file_name): - self.figure.savefig(file_name) - - def draw_summary(self, history, title=""): - """Inserts a text summary at the top that lists the number of steps and total - training time.""" - # Generate summary string - time_str = str(history.get_total_time()).split(".")[ - 0] # remove microseconds - summary = "Step: {} Time: {}".format(history.step, time_str) - if title: - summary = title + "\n\n" + summary - self.figure.suptitle(summary) - - def draw_plot(self, metrics, labels=None, ylabel=""): - """ - metrics: One or more metrics parameters. Each represents the history - of one metric. - """ - metrics = metrics if isinstance(metrics, list) else [metrics] - # Loop through metrics - title = "" - for i, m in enumerate(metrics): - label = labels[i] if labels else m.name - # TODO: use a standard formating function for values - title += (" " if title else "") + \ - "{}: {}".format(label, m.data[-1]) - self.ax.plot(m.formatted_steps, m.data, label=label) - self.ax.set_title(title) - self.ax.set_ylabel(ylabel) - self.ax.legend() - self.ax.set_xlabel("Steps") - self.ax.xaxis.set_major_locator(plt.AutoLocator()) - - def draw_image(self, metric, limit=5): - """Display a series of images at different time steps.""" - rows = 1 - cols = limit - self.ax.axis("off") - # Take the Axes gridspec and divide it into a grid - gs = matplotlib.gridspec.GridSpecFromSubplotSpec( - rows, cols, subplot_spec=self.gs) - # Loop through images in last few steps - for i, image in enumerate(metric.data[-cols:]): - ax = self.figure.add_subplot(gs[0, i]) - ax.axis('off') - ax.set_title(metric.formatted_steps[-cols:][i]) - ax.imshow(norm(image)) - - def draw_hist(self, metric, title=""): - """Draw a series of histograms of the selected keys over different - training steps. - """ - # TODO: assert isinstance(list(values.values())[0], np.ndarray) - - rows = 1 - cols = 1 - limit = 10 # max steps to show - - # We need a 3D projection Subplot, so ignore the one provided to - # as an create a new one. - ax = self.figure.add_subplot(self.gs, projection="3d") - ax.view_init(30, -80) - - # Compute histograms - verts = [] - area_colors = [] - edge_colors = [] - for i, s in enumerate(metric.steps[-limit:]): - hist, edges = np.histogram(metric.data[-i-1:]) - # X is bin centers - x = np.diff(edges)/2 + edges[:-1] - # Y is hist values - y = hist - x = np.concatenate([x[0:1], x, x[-1:]]) - y = np.concatenate([[0], y, [0]]) - - # Ranges - if i == 0: - x_min = x.min() - x_max = x.max() - y_min = y.min() - y_max = y.max() - x_min = np.minimum(x_min, x.min()) - x_max = np.maximum(x_max, x.max()) - y_min = np.minimum(y_min, y.min()) - y_max = np.maximum(y_max, y.max()) - - alpha = 0.8 * (i+1) / min(limit, len(metric.steps)) - verts.append(list(zip(x, y))) - area_colors.append(np.array(self.theme["hist_color"] + [alpha])) - edge_colors.append( - np.array(self.theme["hist_outline_color"] + [alpha])) - - poly = PolyCollection( - verts, facecolors=area_colors, edgecolors=edge_colors) - ax.add_collection3d(poly, zs=list( - range(min(limit, len(metric.steps)))), zdir='y') - - ax.set_xlim(x_min, x_max) - ax.set_ylim(0, limit) - ax.set_yticklabels(metric.formatted_steps[-limit:]) - ax.set_zlim(y_min, y_max) - ax.set_title(metric.name) -""" -HiddenLayer - -Implementation graph expressions to find nodes in a graph based on a pattern. - -Written by Waleed Abdulla -Licensed under the MIT License -""" - -import re - - -class GEParser(): - def __init__(self, text): - self.index = 0 - self.text = text - - def parse(self): - return self.serial() or self.parallel() or self.expression() - - def parallel(self): - index = self.index - expressions = [] - while len(expressions) == 0 or self.token("|"): - e = self.expression() - if not e: - break - expressions.append(e) - if len(expressions) >= 2: - return ParallelPattern(expressions) - # No match. Reset index - self.index = index - - def serial(self): - index = self.index - expressions = [] - while len(expressions) == 0 or self.token(">"): - e = self.expression() - if not e: - break - expressions.append(e) - - if len(expressions) >= 2: - return SerialPattern(expressions) - self.index = index - - def expression(self): - index = self.index - - if self.token("("): - e = self.serial() or self.parallel() or self.op() - if e and self.token(")"): - return e - self.index = index - e = self.op() - return e - - def op(self): - t = self.re(r"\w+") - if t: - c = self.condition() - return NodePattern(t, c) - - def condition(self): - # TODO: not implemented yet. This function is a placeholder - index = self.index - if self.token("["): - c = self.token("1x1") or self.token("3x3") - if c: - if self.token("]"): - return c - self.index = index - - def token(self, s): - return self.re(r"\s*(" + re.escape(s) + r")\s*", 1) - - def string(self, s): - if s == self.text[self.index:self.index+len(s)]: - self.index += len(s) - return s - - def re(self, regex, group=0): - m = re.match(regex, self.text[self.index:]) - if m: - self.index += len(m.group(0)) - return m.group(group) - - -class NodePattern(): - def __init__(self, op, condition=None): - self.op = op - self.condition = condition # TODO: not implemented yet - - def match(self, graph, node): - if isinstance(node, list): - return [], None - if self.op == node.op: - following = graph.outgoing(node) - if len(following) == 1: - following = following[0] - return [node], following - else: - return [], None - - -class SerialPattern(): - def __init__(self, patterns): - self.patterns = patterns - - def match(self, graph, node): - all_matches = [] - for i, p in enumerate(self.patterns): - matches, following = p.match(graph, node) - if not matches: - return [], None - all_matches.extend(matches) - if i < len(self.patterns) - 1: - node = following # Might be more than one node - return all_matches, following - - -class ParallelPattern(): - def __init__(self, patterns): - self.patterns = patterns - - def match(self, graph, nodes): - if not nodes: - return [], None - nodes = nodes if isinstance(nodes, list) else [nodes] - # If a single node, assume we need to match with its siblings - if len(nodes) == 1: - nodes = graph.siblings(nodes[0]) - else: - # Verify all nodes have the same parent or all have no parent - parents = [graph.incoming(n) for n in nodes] - matches = [set(p) == set(parents[0]) for p in parents[1:]] - if not all(matches): - return [], None - - # TODO: If more nodes than patterns, we should consider - # all permutations of the nodes - if len(self.patterns) != len(nodes): - return [], None - - patterns = self.patterns.copy() - nodes = nodes.copy() - all_matches = [] - end_node = None - for p in patterns: - found = False - for n in nodes: - matches, following = p.match(graph, n) - if matches: - found = True - nodes.remove(n) - all_matches.extend(matches) - # Verify all branches end in the same node - if end_node: - if end_node != following: - return [], None - else: - end_node = following - break - if not found: - return [], None - return all_matches, end_node -""" -HiddenLayer - -Implementation of the Graph class. A framework independent directed graph to -represent a neural network. - -Written by Waleed Abdulla. Additions by Phil Ferriere. -Licensed under the MIT License -""" -from __future__ import absolute_import, division, print_function -import os -import re -from random import getrandbits -import inspect -import numpy as np - - -THEMES = { - "basic": { - "background_color": "#FFFFFF", - "fill_color": "#E8E8E8", - "outline_color": "#000000", - "font_color": "#000000", - "font_name": "Times", - "font_size": "10", - "margin": "0,0", - "padding": "1.0,0.5", - }, - "blue": { - "background_color": "#FFFFFF", - "fill_color": "#BCD6FC", - "outline_color": "#7C96BC", - "font_color": "#202020", - "font_name": "Verdana", - "font_size": "10", - "margin": "0,0", - "padding": "1.0,0.5", - }, -} - - -########################################################################### -# Utility Functions -########################################################################### - -def detect_framework(value): - # Get all base classes - classes = inspect.getmro(value.__class__) - for c in classes: - if c.__module__.startswith("torch"): - return "torch" - elif c.__module__.startswith("tensorflow"): - return "tensorflow" - - -########################################################################### -# Node -########################################################################### - -class Node(): - """Represents a framework-agnostic neural network layer in a directed graph.""" - - def __init__(self, uid, name, op, output_shape=None, params=None): - """ - uid: unique ID for the layer that doesn't repeat in the computation graph. - name: Name to display - op: Framework-agnostic operation name. - """ - self.id = uid - self.name = name # TODO: clarify the use of op vs name vs title - self.op = op - self.repeat = 1 - if output_shape: - assert isinstance(output_shape, (tuple, list)),\ - "output_shape must be a tuple or list but received {}".format( - type(output_shape)) - self.output_shape = output_shape - self.params = params if params else {} - self._caption = "" - - @property - def title(self): - # Default - title = self.name or self.op - - if "kernel_shape" in self.params: - # Kernel - kernel = self.params["kernel_shape"] - title += "x".join(map(str, kernel)) - if "stride" in self.params: - stride = self.params["stride"] - if np.unique(stride).size == 1: - stride = stride[0] - if stride != 1: - title += "/s{}".format(str(stride)) - # # Transposed - # if node.transposed: - # name = "Transposed" + name - return title - - @property - def caption(self): - if self._caption: - return self._caption - - caption = "" - - # Stride - # if "stride" in self.params: - # stride = self.params["stride"] - # if np.unique(stride).size == 1: - # stride = stride[0] - # if stride != 1: - # caption += "/{}".format(str(stride)) - return caption - - def __repr__(self): - args = (self.op, self.name, self.id, self.title, self.repeat) - f = "<".join([node.id for node in sequence]) - else: - return getrandbits(64) - - def build_dot(self): - """Generate a GraphViz Dot graph. - - Returns a GraphViz Digraph object. - """ - from graphviz import Digraph - - # Build GraphViz Digraph - dot = Digraph() - dot.attr("graph", - bgcolor=self.theme["background_color"], - color=self.theme["outline_color"], - fontsize=self.theme["font_size"], - fontcolor=self.theme["font_color"], - fontname=self.theme["font_name"], - margin=self.theme["margin"], - rankdir="LR", - pad=self.theme["padding"]) - dot.attr("node", shape="box", - style="filled", margin="0,0", - fillcolor=self.theme["fill_color"], - color=self.theme["outline_color"], - fontsize=self.theme["font_size"], - fontcolor=self.theme["font_color"], - fontname=self.theme["font_name"]) - dot.attr("edge", style="solid", - color=self.theme["outline_color"], - fontsize=self.theme["font_size"], - fontcolor=self.theme["font_color"], - fontname=self.theme["font_name"]) - - for k, n in self.nodes.items(): - label = "{}".format(n.title) - if n.caption: - label += "{}".format(n.caption) - if n.repeat > 1: - label += "x{}".format( - n.repeat) - label = "<" + label + "
>" - dot.node(str(k), label) - for a, b, label in self.edges: - if isinstance(label, (list, tuple)): - label = "x".join([str(l or "?") for l in label]) - - dot.edge(str(a), str(b), label) - return dot - - def _repr_svg_(self): - """Allows Jupyter notebook to render the graph automatically.""" - return self.build_dot()._repr_svg_() - - def save(self, path, format="pdf"): - # TODO: assert on acceptable format values - dot = self.build_dot() - dot.format = format - directory, file_name = os.path.split(path) - # Remove extension from file name. dot.render() adds it. - file_name = file_name.replace("." + format, "") - dot.render(file_name, directory=directory, cleanup=True) -""" -HiddenLayer - -Implementation of the History class to train training metrics. - -Written by Waleed Abdulla -Licensed under the MIT License -""" - -import math -import random -import io -import itertools -import time -import datetime -import pickle -import numpy as np -import matplotlib.pyplot as plt -import matplotlib -from . import utils - - -############################################################################### -# Helper Functions -############################################################################### - -def format_step(step, zero_prefix=False): - """Return the step value in format suitable for display.""" - if isinstance(step, int): - return "{:06}".format(step) if zero_prefix else "{}".format(step) - elif isinstance(step, tuple): - return "{:04}:{:06}".format(*step) if zero_prefix else "{}:{}".format(*step) - - -############################################################################### -# Metric Class -############################################################################### - -class Metric(): - """Represents the history of a single metric.""" - - def __init__(self, history, name): - self.name = name - self.steps = history.steps - self.data = np.array([history.history[s].get(name) - for s in self.steps]) - - @property - def formatted_steps(self): - return [format_step(s) for s in self.steps] - - -############################################################################### -# History Class -############################################################################### - -class History(): - """Tracks training progress and visualizes it. - For example, use it to track the training and validation loss and accuracy - and plot them. - """ - - def __init__(self): - self.step = None # Last reported step - self.metrics = set() # Names of all metrics reported so far - self.history = {} # Dict of steps and metrics {step: [metrics...]} - - def log(self, step, **kwargs): - """Record metrics at a specific step. E.g. - - my_history.log(34, loss=2.3, accuracy=0.2) - - Okay to call multiple times for the same step. New values overwrite - older ones if they have the same metric name. - - step: An integer or tuple of integers. If a tuple, then the first - value is considered to be the epoch and the second is the step - within the epoch. - """ - assert isinstance(step, (int, tuple) - ), "Step must be an int or a tuple of two ints" - self.step = step - # Any new metrics we haven't seen before? - self.metrics |= set(kwargs.keys()) - # Insert (or update) record of the step - if step not in self.history: - self.history[step] = {} - self.history[step].update({k: utils.to_data(v) - for k, v in kwargs.items()}) - # Update step timestamp - self.history[step]["__timestamp__"] = time.time() - - @property - def steps(self): - """Returns a list of all steps logged so far. Guaranteed to be - sorted correctly.""" - if not self.history: - return [] - # TODO: Consider caching the sorted steps for performance - return sorted(self.history.keys()) - - @property - def formatted_steps(self): - return [format_step(s) for s in self.steps] - - def __getitem__(self, metric): - return Metric(self, metric) - - def progress(self): - # TODO: Erase the previous progress text to update in place - text = "Step {}: ".format(self.step) - metrics = self.history[self.step] - for k, v in metrics.items(): - # Skip timestamp - if k == "__timestamp__": - continue - # Exclude lists, dicts, and arrays - # TODO: ideally, include the skipped types with a compact representation - if not isinstance(v, (list, dict, np.ndarray)): - text += "{}: {} ".format(k, v) - print(text) - - def summary(self): - # TODO: Include more details in the summary - print("Last Step: {}".format(self.step)) - print("Training Time: {}".format(self.get_total_time())) - - def get_total_time(self): - """Returns the total period between when the first and last steps - where logged. This usually correspnods to the total training time - if there were no gaps in the training. - """ - first_step = self.steps[0] - last_step = self.steps[-1] - seconds = self.history[last_step]["__timestamp__"] \ - - self.history[first_step]["__timestamp__"] - return datetime.timedelta(seconds=seconds) - - def save(self, file_name): - with open(file_name, "wb") as f: - pickle.dump(self.history, f) - - def load(self, file_name): - with open(file_name, "rb") as f: - self.history = pickle.load(f) - # Set last step and metrics - self.step = self.steps[-1] - unique_metrics = set(itertools.chain( - *[m.keys() for m in self.history.values()])) - self.metrics = unique_metrics - {"__timestamp__", } -""" -HiddenLayer - -PyTorch graph importer. - -Written by Waleed Abdulla -Licensed under the MIT License -""" - -from __future__ import absolute_import, division, print_function -import re -from .graph import Graph, Node -from . import transforms as ht -import torch - -# PyTorch Graph Transforms -FRAMEWORK_TRANSFORMS = [ - # Hide onnx: prefix - ht.Rename(op=r"onnx::(.*)", to=r"\1"), - # ONNX uses Gemm for linear layers (stands for General Matrix Multiplication). - # It's an odd name that noone recognizes. Rename it. - ht.Rename(op=r"Gemm", to=r"Linear"), - # PyTorch layers that don't have an ONNX counterpart - ht.Rename(op=r"aten::max\_pool2d\_with\_indices", to="MaxPool"), - # Shorten op name - ht.Rename(op=r"BatchNormalization", to="BatchNorm"), -] - - -def dump_pytorch_graph(graph): - """List all the nodes in a PyTorch graph.""" - f = "{:25} {:40} {} -> {}" - print(f.format("kind", "scopeName", "inputs", "outputs")) - for node in graph.nodes(): - print(f.format(node.kind(), node.scopeName(), - [i.unique() for i in node.inputs()], - [i.unique() for i in node.outputs()] - )) - - -def pytorch_id(node): - """Returns a unique ID for a node.""" - # After ONNX simplification, the scopeName is not unique anymore - # so append node outputs to guarantee uniqueness - return node.scopeName() + "/outputs/" + "/".join([o.uniqueName() for o in node.outputs()]) - - -def get_shape(torch_node): - """Return the output shape of the given Pytorch node.""" - # Extract node output shape from the node string representation - # This is a hack because there doesn't seem to be an official way to do it. - # See my quesiton in the PyTorch forum: - # https://discuss.pytorch.org/t/node-output-shape-from-trace-graph/24351/2 - # TODO: find a better way to extract output shape - # TODO: Assuming the node has one output. Update if we encounter a multi-output node. - m = re.match(r".*Float\(([\d\s\,]+)\).*", str(next(torch_node.outputs()))) - if m: - shape = m.group(1) - shape = shape.split(",") - shape = tuple(map(int, shape)) - else: - shape = None - return shape - - -def import_graph(hl_graph, model, args, input_names=None, verbose=False): - # TODO: add input names to graph - - # Run the Pytorch graph to get a trace and generate a graph from it - trace, out = torch.jit.get_trace_graph(model, args) - torch.onnx._optimize_trace(trace, torch.onnx.OperatorExportTypes.ONNX) - torch_graph = trace.graph() - - # Dump list of nodes (DEBUG only) - if verbose: - dump_pytorch_graph(torch_graph) - - # Loop through nodes and build HL graph - for torch_node in torch_graph.nodes(): - # Op - op = torch_node.kind() - # Parameters - params = {k: torch_node[k] for k in torch_node.attributeNames()} - # Inputs/outputs - # TODO: inputs = [i.unique() for i in node.inputs()] - outputs = [o.unique() for o in torch_node.outputs()] - # Get output shape - shape = get_shape(torch_node) - # Add HL node - hl_node = Node(uid=pytorch_id(torch_node), name=None, op=op, - output_shape=shape, params=params) - hl_graph.add_node(hl_node) - # Add edges - for target_torch_node in torch_graph.nodes(): - target_inputs = [i.unique() for i in target_torch_node.inputs()] - if set(outputs) & set(target_inputs): - hl_graph.add_edge_by_id(pytorch_id( - torch_node), pytorch_id(target_torch_node), shape) - return hl_graph -""" -HiddenLayer - -TensorFlow graph importer. - -Written by Phil Ferriere. Edits by Waleed Abdulla. -Licensed under the MIT License -""" - -from __future__ import absolute_import, division, print_function, unicode_literals -import logging -import tensorflow as tf -from .graph import Graph, Node -from . import transforms as ht - - -FRAMEWORK_TRANSFORMS = [ - # Rename VariableV2 op to Variable. Same for anything V2, V3, ...etc. - ht.Rename(op=r"(\w+)V\d", to=r"\1"), - ht.Prune("Const"), - ht.Prune("PlaceholderWithDefault"), - ht.Prune("Variable"), - ht.Prune("VarIsInitializedOp"), - ht.Prune("VarHandleOp"), - ht.Prune("ReadVariableOp"), - ht.PruneBranch("Assign"), - ht.PruneBranch("AssignSub"), - ht.PruneBranch("AssignAdd"), - ht.PruneBranch("AssignVariableOp"), - ht.Prune("ApplyMomentum"), - ht.Prune("ApplyAdam"), - # Fold to NoOp then delete in the next step - ht.FoldId(r"^(gradients)/.*", "NoOp"), - ht.Prune("NoOp"), - ht.Rename(op=r"DepthwiseConv2dNative", to="SeparableConv"), - ht.Rename(op=r"Conv2D", to="Conv"), - ht.Rename(op=r"FusedBatchNorm", to="BatchNorm"), - ht.Rename(op=r"MatMul", to="Linear"), - ht.Fold("Conv > BiasAdd", "__first__"), - ht.Fold("Linear > BiasAdd", "__first__"), - ht.Fold("Shape > StridedSlice > Pack > Reshape", "__last__"), - ht.FoldId(r"(.+)/dropout/.*", "Dropout"), - ht.FoldId(r"(softmax_cross\_entropy)\_with\_logits.*", - "SoftmaxCrossEntropy"), -] - - -def dump_tf_graph(tfgraph, tfgraphdef): - """List all the nodes in a TF graph. - tfgraph: A TF Graph object. - tfgraphdef: A TF GraphDef object. - """ - print("Nodes ({})".format(len(tfgraphdef.node))) - f = "{:15} {:59} {:20} {}" - print(f.format("kind", "scopeName", "shape", "inputs")) - for node in tfgraphdef.node: - scopename = node.name - kind = node.op - inputs = node.input - shape = tf.graph_util.tensor_shape_from_node_def_name( - tfgraph, scopename) - print(f.format(kind, scopename, str(shape), inputs)) - - -def import_graph(hl_graph, tf_graph, output=None, verbose=False): - """Convert TF graph to directed graph - tfgraph: A TF Graph object. - output: Name of the output node (string). - verbose: Set to True for debug print output - """ - # Get clean(er) list of nodes - graph_def = tf_graph.as_graph_def(add_shapes=True) - graph_def = tf.graph_util.remove_training_nodes(graph_def) - - # Dump list of TF nodes (DEBUG only) - if verbose: - dump_tf_graph(tf_graph, graph_def) - - # Loop through nodes and build the matching directed graph - for tf_node in graph_def.node: - # Read node details - try: - op, uid, name, shape, params = import_node( - tf_node, tf_graph, verbose) - except: - if verbose: - logging.exception("Failed to read node {}".format(tf_node)) - continue - - # Add node - hl_node = Node(uid=uid, name=name, op=op, - output_shape=shape, params=params) - hl_graph.add_node(hl_node) - - # Add edges - for target_node in graph_def.node: - target_inputs = target_node.input - if uid in target_node.input: - hl_graph.add_edge_by_id(uid, target_node.name, shape) - return hl_graph - - -def import_node(tf_node, tf_graph, verbose=False): - # Operation type and name - op = tf_node.op - uid = tf_node.name - name = None - - # Shape - shape = None - if tf_node.op != "NoOp": - try: - shape = tf.graph_util.tensor_shape_from_node_def_name( - tf_graph, tf_node.name) - # Is the shape is known, convert to a list - if shape.ndims is not None: - shape = shape.as_list() - except: - if verbose: - logging.exception( - "Error reading shape of {}".format(tf_node.name)) - - # Parameters - # At this stage, we really only care about two parameters: - # 1/ the kernel size used by convolution layers - # 2/ the stride used by convolutional and pooling layers (TODO: not fully working yet) - - # 1/ The kernel size is actually not stored in the convolution tensor but in its weight input. - # The weights input has the shape [shape=[kernel, kernel, in_channels, filters]] - # So we must fish for it - params = {} - if op == "Conv2D" or op == "DepthwiseConv2dNative": - kernel_shape = tf.graph_util.tensor_shape_from_node_def_name( - tf_graph, tf_node.input[1]) - kernel_shape = [int(a) for a in kernel_shape] - params["kernel_shape"] = kernel_shape[0:2] - if 'strides' in tf_node.attr.keys(): - strides = [int(a) for a in tf_node.attr['strides'].list.i] - params["stride"] = strides[1:3] - elif op == "MaxPool" or op == "AvgPool": - # 2/ the stride used by pooling layers - # See https://stackoverflow.com/questions/44124942/how-to-access-values-in-protos-in-tensorflow - if 'ksize' in tf_node.attr.keys(): - kernel_shape = [int(a) for a in tf_node.attr['ksize'].list.i] - params["kernel_shape"] = kernel_shape[1:3] - if 'strides' in tf_node.attr.keys(): - strides = [int(a) for a in tf_node.attr['strides'].list.i] - params["stride"] = strides[1:3] - - return op, uid, name, shape, params -""" -HiddenLayer - -Transforms that apply to and modify graph nodes. - -Written by Waleed Abdulla -Licensed under the MIT License -""" - -import re -import copy -from .graph import Node -from . import ge - - -########################################################################### -# Transforms -########################################################################### - -class Fold(): - def __init__(self, pattern, op, name=None): - # TODO: validate that op and name are valid - self.pattern = ge.GEParser(pattern).parse() - self.op = op - self.name = name - - def apply(self, graph): - # Copy the graph. Don't change the original. - graph = copy.deepcopy(graph) - - while True: - matches, _ = graph.search(self.pattern) - if not matches: - break - - # Replace pattern with new node - if self.op == "__first__": - combo = matches[0] - elif self.op == "__last__": - combo = matches[-1] - else: - combo = Node(uid=graph.sequence_id(matches), - name=self.name or " > ".join( - [l.title for l in matches]), - op=self.op or self.pattern, - output_shape=matches[-1].output_shape) - combo._caption = "/".join(filter(None, - [l.caption for l in matches])) - graph.replace(matches, combo) - return graph - - -class FoldId(): - def __init__(self, id_regex, op, name=None): - # TODO: validate op and name are valid - self.id_regex = re.compile(id_regex) - self.op = op - self.name = name - - def apply(self, graph): - # Copy the graph. Don't change the original. - graph = copy.deepcopy(graph) - - # Group nodes by the first matching group of the regex - groups = {} - for node in graph.nodes.values(): - m = self.id_regex.match(node.id) - if not m: - continue - - assert m.groups(), "Regular expression must have a matching group to avoid folding unrelated nodes." - key = m.group(1) - if key not in groups: - groups[key] = [] - groups[key].append(node) - - # Fold each group of nodes together - for key, nodes in groups.items(): - # Replace with a new node - # TODO: Find last node in the sub-graph and get the output shape from it - combo = Node(uid=key, - name=self.name, - op=self.op) - graph.replace(nodes, combo) - return graph - - -class Prune(): - def __init__(self, pattern): - self.pattern = ge.GEParser(pattern).parse() - - def apply(self, graph): - # Copy the graph. Don't change the original. - graph = copy.deepcopy(graph) - - while True: - matches, _ = graph.search(self.pattern) - if not matches: - break - # Remove found nodes - graph.remove(matches) - return graph - - -class PruneBranch(): - def __init__(self, pattern): - self.pattern = ge.GEParser(pattern).parse() - - def tag(self, node, tag, graph, conditional=False): - # Return if the node is already tagged - if hasattr(node, "__tag__") and node.__tag__ == "tag": - return - # If conditional, then tag the node if and only if all its - # outgoing nodes already have the same tag. - if conditional: - # Are all outgoing nodes already tagged? - outgoing = graph.outgoing(node) - tagged = filter(lambda n: hasattr(n, "__tag__") and n.__tag__ == tag, - outgoing) - if len(list(tagged)) != len(outgoing): - # Not all outgoing are tagged - return - # Tag the node - node.__tag__ = tag - # Tag incoming nodes - for n in graph.incoming(node): - self.tag(n, tag, graph, conditional=True) - - def apply(self, graph): - # Copy the graph. Don't change the original. - graph = copy.deepcopy(graph) - - while True: - matches, _ = graph.search(self.pattern) - if not matches: - break - # Tag found nodes and their incoming branches - for n in matches: - self.tag(n, "delete", graph) - # Find all tagged nodes and delete them - tagged = [n for n in graph.nodes.values() - if hasattr(n, "__tag__") and n.__tag__ == "delete"] - graph.remove(tagged) - return graph - - -class FoldDuplicates(): - def apply(self, graph): - # Copy the graph. Don't change the original. - graph = copy.deepcopy(graph) - - matches = True - while matches: - for node in graph.nodes.values(): - pattern = ge.SerialPattern( - [ge.NodePattern(node.op), ge.NodePattern(node.op)]) - matches, _ = pattern.match(graph, node) - if matches: - # Use op and name from the first node, and output_shape from the last - combo = Node(uid=graph.sequence_id(matches), - name=node.name, - op=node.op, - output_shape=matches[-1].output_shape) - combo._caption = node.caption - combo.repeat = sum([n.repeat for n in matches]) - graph.replace(matches, combo) - break - return graph - - -class Rename(): - def __init__(self, op=None, name=None, to=None): - assert op or name, "Either op or name must be provided" - assert not( - op and name), "Either op or name should be provided, but not both" - assert bool(to), "The to parameter is required" - self.to = to - self.op = re.compile(op) if op else None - self.name = re.compile(name) if name else None - - def apply(self, graph): - # Copy the graph. Don't change the original. - graph = copy.deepcopy(graph) - - for node in graph.nodes.values(): - if self.op: - node.op = self.op.sub(self.to, node.op) - # TODO: name is not tested yet - if self.name: - node.name = self.name.sub(self.to, node.name) - return graph - - -# Transforms to simplify graphs by folding layers that tend to be -# used together often, such as Conv/BN/Relu. -# These transforms are used AFTER the framework specific transforms -# that map TF and PyTorch graphs to a common representation. -SIMPLICITY_TRANSFORMS = [ - Fold("Conv > Conv > BatchNorm > Relu", "ConvConvBnRelu"), - Fold("Conv > BatchNorm > Relu", "ConvBnRelu"), - Fold("Conv > BatchNorm", "ConvBn"), - Fold("Conv > Relu", "ConvRelu"), - Fold("Linear > Relu", "LinearRelu"), - # Fold("ConvBnRelu > MaxPool", "ConvBnReluMaxpool"), - # Fold("ConvRelu > MaxPool", "ConvReluMaxpool"), - FoldDuplicates(), -] -""" -HiddenLayer - -Utility functions. - -Written by Waleed Abdulla -Licensed under the MIT License -""" - -import numpy as np - - -############################################################################### -# Misc functions -############################################################################### - -def to_data(value): - """Standardize data types. Converts PyTorch tensors to Numpy arrays, - and Numpy scalars to Python scalars.""" - # TODO: Use get_framework() for better detection. - if value.__class__.__module__.startswith("torch"): - import torch - if isinstance(value, torch.nn.parameter.Parameter): - value = value.data - if isinstance(value, torch.Tensor): - if value.requires_grad: - value = value.detach() - value = value.cpu().numpy().copy() - # If 0-dim array, convert to scalar - if not value.shape: - value = value.item() - # Convert Numpy scalar types to Python types - if value.__class__.__module__ == "numpy" and value.__class__.__name__ != "ndarray": - value = value.item() - return value - - -def write(*args): - """Like print(), but recognizes tensors and arrays and show - more details about them. - - Example: - hl.write("My Tensor", my_tensor) - - Prints: - My Tensor float32 (10, 3, 224, 224) min: 0.0 max: 1.0 - """ - s = "" - for a in args: - # Convert tensors to Numpy arrays - a = to_data(a) - - if isinstance(a, np.ndarray): - # Numpy Array - s += ("\t" if s else "") + "Tensor {} {} min: {:.3f} max: {:.3f}".format( - a.dtype, a.shape, a.min(), a.max()) - print(s) - s = "" - elif isinstance(a, list): - s += ("\t" if s else "") + \ - "list len: {} {}".format(len(a), a[:10]) - else: - s += (" " if s else "") + str(a) - if s: - print(s) -import unittest -import hiddenlayer as hl -import hiddenlayer.ge as ge -import hiddenlayer.transforms as ht - - -class TestGEParser(unittest.TestCase): - - def test_basics(self): - p = ge.GEParser(" (hello )") - self.assertTrue(p.token("(") and p.re(r"\w+") and p.token(")")) - - p = ge.GEParser("[1x1]") - self.assertTrue(p.condition() == "1x1" and p.index == 5) - - p = ge.GEParser(" [ 1x1 ] ") - self.assertTrue(p.condition() == "1x1" and p.index == 9) - - p = ge.GEParser("[1x1") - self.assertTrue(not p.condition() and p.index == 0) - - p = ge.GEParser("Conv[1x1]") - self.assertTrue(isinstance(p.op(), ge.NodePattern)) - - p = ge.GEParser("Conv[1x1]") - self.assertTrue(isinstance(p.expression(), ge.NodePattern)) - - p = ge.GEParser("(Conv[1x1])") - self.assertTrue(isinstance(p.expression(), ge.NodePattern)) - - def test_serial(self): - p = ge.GEParser("Conv>Conv") - self.assertTrue(isinstance(p.serial(), ge.SerialPattern)) - - p = ge.GEParser("Conv > Conv[1x1]") - self.assertTrue(isinstance(p.serial(), ge.SerialPattern)) - - p = ge.GEParser("Conv > (Conv[1x1] > Conv)") - self.assertTrue(isinstance(p.serial(), ge.SerialPattern)) - - p = ge.GEParser("Conv > Conv[1x1] > Conv") - self.assertTrue(isinstance(p.serial(), ge.SerialPattern)) - self.assertEqual(p.index, 23) - - p = ge.GEParser("(Conv > Conv[1x1])") - self.assertTrue(isinstance(p.expression(), ge.SerialPattern)) - - def test_parallel(self): - p = ge.GEParser("Conv|Conv[1x1]") - self.assertTrue(isinstance(p.parallel(), ge.ParallelPattern)) - - p = ge.GEParser("Conv | Conv[1x1]") - self.assertTrue(isinstance(p.parallel(), ge.ParallelPattern)) - - p = ge.GEParser("Conv | (Conv[1x1] | Conv)") - self.assertTrue(isinstance(p.parallel(), ge.ParallelPattern)) - - p = ge.GEParser("Conv | Conv[1x1] | Conv") - self.assertTrue(isinstance(p.parallel(), ge.ParallelPattern)) - self.assertEqual(p.index, 23) - - p = ge.GEParser("(Conv | Conv[1x1])") - self.assertTrue(isinstance(p.expression(), ge.ParallelPattern)) - - def test_combinations(self): - p = ge.GEParser("Conv | (Conv[1x1] > Conv)") - self.assertTrue(isinstance(p.parallel(), ge.ParallelPattern)) - - p = ge.GEParser("Conv > (Conv [1x1] | Conv)") - self.assertTrue(isinstance(p.serial(), ge.SerialPattern)) - - def test_parsing(self): - p = ge.GEParser("Conv") - self.assertTrue(isinstance(p.parse(), ge.NodePattern)) - - p = ge.GEParser("Conv | Conv[1x1] ") - self.assertTrue(isinstance(p.parse(), ge.ParallelPattern)) - - p = ge.GEParser("Conv | (Conv[1x1] > Conv)") - self.assertTrue(isinstance(p.parse(), ge.ParallelPattern)) - - p = ge.GEParser("(Conv | (Conv[1x1] > Conv))") - self.assertTrue(isinstance(p.parse(), ge.ParallelPattern)) - - -class TestGraph(unittest.TestCase): - def test_directed_graph(self): - g = hl.Graph() - g.add_node("a") - g.add_node("b") - g.add_node("c") - g.add_edge("a", "b") - g.add_edge("b", "c") - - self.assertEqual(g.incoming("b")[0], "a") - self.assertEqual(g.outgoing("b")[0], "c") - g.replace(["b"], "x") - self.assertEqual(sorted(list(g.nodes.values())), - sorted(["a", "c", "x"])) - self.assertEqual(g.incoming("x")[0], "a") - self.assertEqual(g.outgoing("x")[0], "c") - - -class TestPatterns(unittest.TestCase): - def test_basics(self): - g = hl.Graph() - a = hl.Node(uid="a", name="a", op="a") - b = hl.Node(uid="b", name="b", op="b") - c = hl.Node(uid="c", name="c", op="c") - d = hl.Node(uid="d", name="d", op="d") - e = hl.Node(uid="e", name="e", op="e") - g.add_node(a) - g.add_node(b) - g.add_node(c) - g.add_node(d) - g.add_node(e) - g.add_edge(a, b) - g.add_edge(b, c) - g.add_edge(b, d) - g.add_edge(c, e) - g.add_edge(d, e) - - rule = ge.GEParser("a > b").parse() - self.assertIsInstance(rule, ge.SerialPattern) - match, following = rule.match(g, a) - self.assertTrue(match) - self.assertCountEqual(following, [c, d]) - match, following = rule.match(g, b) - self.assertFalse(match) - - rule = ge.GEParser("b > c").parse() - self.assertIsInstance(rule, ge.SerialPattern) - match, following = rule.match(g, b) - self.assertFalse(match) - - rule = ge.GEParser("c | d").parse() - self.assertIsInstance(rule, ge.ParallelPattern) - match, following = rule.match(g, [c, d]) - self.assertTrue(match) - self.assertEqual(following, e) - match, following = rule.match(g, [c]) - self.assertTrue(match) - self.assertEqual(following, e) - match, following = rule.match(g, d) - self.assertTrue(match) - self.assertEqual(following, e) - match, following = rule.match(g, b) - self.assertFalse(match) - - rule = ge.GEParser("a > b > (c | d)").parse() - self.assertIsInstance(rule, ge.SerialPattern) - match, following = rule.match(g, a) - self.assertTrue(match, following) - - rule = ge.GEParser("(a > b) > (c | d)").parse() - self.assertIsInstance(rule, ge.SerialPattern) - match, following = rule.match(g, a) - self.assertTrue(match) - - rule = ge.GEParser("a > b > (c | d) > e").parse() - self.assertIsInstance(rule, ge.SerialPattern) - match, following = rule.match(g, a) - self.assertTrue(match) - - rule = ge.GEParser("(c | d) > e").parse() - self.assertIsInstance(rule, ge.SerialPattern) - match, following = rule.match(g, [c, d]) - self.assertTrue(match) - - def test_search(self): - g = hl.Graph() - a = hl.Node(uid="a", name="a", op="a") - b = hl.Node(uid="b", name="b", op="b") - c = hl.Node(uid="c", name="c", op="c") - d = hl.Node(uid="d", name="d", op="d") - g.add_node(a) - g.add_node(b) - g.add_node(c) - g.add_node(d) - g.add_edge(a, b) - g.add_edge(b, c) - g.add_edge(b, d) - - pattern = ge.GEParser("a > b").parse() - match, following = g.search(pattern) - self.assertCountEqual(match, [a, b]) - self.assertCountEqual(following, [c, d]) - - pattern = ge.GEParser("b > (c | d)").parse() - match, following = g.search(pattern) - self.assertCountEqual(match, [b, c, d]) - self.assertEqual(following, []) - - pattern = ge.GEParser("c|d").parse() - match, following = g.search(pattern) - self.assertCountEqual(match, [c, d]) - self.assertEqual(following, []) - - -class TestTransforms(unittest.TestCase): - def test_regex(self): - g = hl.Graph() - a = hl.Node(uid="a", name="a", op="a") - b = hl.Node(uid="b", name="b", op="b") - c = hl.Node(uid="c", name="c", op="c") - d = hl.Node(uid="d", name="d", op="d") - g.add_node(a) - g.add_node(b) - g.add_node(c) - g.add_node(d) - g.add_edge(a, b) - g.add_edge(b, c) - g.add_edge(b, d) - - t = ht.Rename(op=r"a", to="bbb") - g = t.apply(g) - self.assertEqual(g["a"].op, "bbb") - - t = ht.Rename(op=r"b(.*)", to=r"x\1") - g = t.apply(g) - self.assertEqual(g["a"].op, "xbb") - self.assertEqual(g["b"].op, "x") - - def test_fold(self): - g = hl.Graph() - a = hl.Node(uid="a", name="a", op="a") - b = hl.Node(uid="b", name="b", op="b") - c = hl.Node(uid="c", name="c", op="c") - d = hl.Node(uid="d", name="d", op="d") - g.add_node(a) - g.add_node(b) - g.add_node(c) - g.add_node(d) - g.add_edge(a, b) - g.add_edge(b, c) - g.add_edge(b, d) - - t = ht.Fold("a > b", "ab") - g = t.apply(g) - self.assertEqual(g.incoming(g["c"])[0].op, "ab") - - def test_fold_duplicates(self): - g = hl.Graph() - a = hl.Node(uid="a", name="a", op="a") - b1 = hl.Node(uid="b1", name="b1", op="b", output_shape=(3, 3)) - b2 = hl.Node(uid="b2", name="b2", op="b", output_shape=(4, 4)) - c = hl.Node(uid="c", name="c", op="c") - d = hl.Node(uid="d", name="d", op="d") - g.add_node(a) - g.add_node(b1) - g.add_node(b2) - g.add_node(c) - g.add_node(d) - g.add_edge(a, b1) - g.add_edge(b1, b2) - g.add_edge(b2, c) - g.add_edge(b2, d) - - t = ht.FoldDuplicates() - g = t.apply(g) - self.assertEqual(g.incoming(g["c"])[0].op, "b") - self.assertEqual(g.incoming(g["c"])[0].name, "b1") - self.assertEqual(g.incoming(g["c"])[0].output_shape, (4, 4)) - - def test_parallel_fold(self): - g = hl.Graph() - a = hl.Node(uid="a", name="a", op="a") - b = hl.Node(uid="b", name="b", op="b") - c = hl.Node(uid="c", name="c", op="c") - d = hl.Node(uid="d", name="d", op="d") - e = hl.Node(uid="e", name="e", op="e") - g.add_node(a) - g.add_node(b) - g.add_node(c) - g.add_node(d) - g.add_node(e) - g.add_edge(a, b) - g.add_edge(b, c) - g.add_edge(a, d) - g.add_edge(c, e) - g.add_edge(d, e) - - t = ht.Fold("((b > c) | d) > e", "bcde") - g = t.apply(g) - self.assertEqual(g.outgoing(g["a"])[0].op, "bcde") - - def test_prune(self): - g = hl.Graph() - a = hl.Node(uid="a", name="a", op="a") - b = hl.Node(uid="b", name="b", op="b") - c = hl.Node(uid="c", name="c", op="c") - d = hl.Node(uid="d", name="d", op="d") - e = hl.Node(uid="e", name="e", op="e") - g.add_node(a) - g.add_node(b) - g.add_node(c) - g.add_node(d) - g.add_node(e) - g.add_edge(a, b) - g.add_edge(b, c) - g.add_edge(a, d) - g.add_edge(c, e) - g.add_edge(d, e) - - t = ht.Prune("e") - g = t.apply(g) - self.assertFalse(g.outgoing(d)) - - def test_prune_branch(self): - g = hl.Graph() - a = hl.Node(uid="a", name="a", op="a") - b = hl.Node(uid="b", name="b", op="b") - c = hl.Node(uid="c", name="c", op="c") - d = hl.Node(uid="d", name="d", op="d") - e = hl.Node(uid="e", name="e", op="e") - g.add_node(a) - g.add_node(b) - g.add_node(c) - g.add_node(d) - g.add_node(e) - g.add_edge(a, b) - g.add_edge(b, c) - g.add_edge(a, d) - g.add_edge(c, e) - g.add_edge(d, e) - - t = ht.PruneBranch("c") - g = t.apply(g) - self.assertFalse(g["b"]) - self.assertFalse(g["c"]) - self.assertTrue(g["a"]) - - -if __name__ == "__main__": - unittest.main() -import os -import sys -import shutil -import unittest -import hiddenlayer as hl - -# Create output directory in project root -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -OUTPUT_DIR = os.path.join(ROOT_DIR, "test_output") -if not os.path.exists(OUTPUT_DIR): - os.makedirs(OUTPUT_DIR) - - -class TestHistory(unittest.TestCase): - def test_steps(self): - # Create History object - h = hl.History() - - for s in range(100): - loss = (100-s)/100 - accuracy = s / 100 - h.log(s, loss=loss) - h.log(s, accuracy=accuracy) - - self.assertEqual(h["loss"].data[0], 1) - self.assertEqual(h["accuracy"].data[0], 0) - self.assertEqual(h.metrics, {"loss", "accuracy"}) - - # Save and load - if not os.path.exists(OUTPUT_DIR): - os.makedirs(OUTPUT_DIR) - h.save(os.path.join(OUTPUT_DIR, "history.pkl")) - - # Load it - h2 = hl.History() - h2.load(os.path.join(OUTPUT_DIR, "history.pkl")) - self.assertEqual(h["loss"].data[0], h2["loss"].data[0]) - self.assertEqual(h["accuracy"].data[0], h2["accuracy"].data[0]) - self.assertEqual(h2.step, 99) - self.assertEqual(h2.metrics, {"loss", "accuracy"}) - self.assertEqual(hl.history.format_step(h2.step), "99") - self.assertEqual(hl.history.format_step( - h2.step, zero_prefix=True), "000099") - - # Clean up - shutil.rmtree(OUTPUT_DIR) - - def test_epochs(self): - # Create History object - h = hl.History() - - for e in range(10): - for s in range(100): - loss = (100-s)/100 - accuracy = s / 100 - h.log((e, s), loss=loss) - h.log((e, s), accuracy=accuracy) - - self.assertEqual(h["loss"].data[0], 1) - self.assertEqual(h["accuracy"].data[0], 0) - - # Save and load - if not os.path.exists(OUTPUT_DIR): - os.makedirs(OUTPUT_DIR) - h.save(os.path.join(OUTPUT_DIR, "history_epoch.pkl")) - - # Load it - h2 = hl.History() - h2.load(os.path.join(OUTPUT_DIR, "history_epoch.pkl")) - self.assertEqual(h["loss"].data[0], h2["loss"].data[0]) - self.assertEqual(h["accuracy"].data[0], h2["accuracy"].data[0]) - self.assertEqual(h2.step, (9, 99)) - self.assertEqual(h2.metrics, {"loss", "accuracy"}) - self.assertEqual(hl.history.format_step(h2.step), "9:99") - self.assertEqual(hl.history.format_step( - h2.step, zero_prefix=True), "0009:000099") - - # Clean up - shutil.rmtree(OUTPUT_DIR) - - -if __name__ == "__main__": - unittest.main() -import os -import sys -import shutil -import unittest -import torch -import torchvision.models -import hiddenlayer as hl -from hiddenlayer import transforms as ht - -# Create output directory in project root -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -OUTPUT_DIR = os.path.join(ROOT_DIR, "test_output") -if not os.path.exists(OUTPUT_DIR): - os.makedirs(OUTPUT_DIR) - - -class TestPytorchGraph(unittest.TestCase): - def test_graph(self): - model = torchvision.models.vgg16() - g = hl.build_graph(model, torch.zeros([1, 3, 224, 224])) - g.save(os.path.join(OUTPUT_DIR, "pytorch_vgg16.pdf")) - - model = torchvision.models.resnet50() - g = hl.build_graph(model, torch.zeros([1, 3, 224, 224])) - g.save(os.path.join(OUTPUT_DIR, "pytorch_resnet50.pdf")) - - # Clean up - shutil.rmtree(OUTPUT_DIR) - - def test_resnet_blocks(self): - # Resnet101 - model = torchvision.models.resnet101() - - transforms = [ - # Fold Conv, BN, RELU layers into one - ht.Fold("Conv > BatchNormalization > Relu", "ConvBnRelu"), - # Fold Conv, BN layers together - ht.Fold("Conv > BatchNormalization", "ConvBn"), - # Fold bottleneck blocks - ht.Fold(""" - ((ConvBnRelu > ConvBnRelu > ConvBn) | ConvBn) > Add > Relu - """, "BottleneckBlock", "Bottleneck Block"), - # Fold residual blocks - ht.Fold("""ConvBnRelu > ConvBnRelu > ConvBn > Add > Relu""", - "ResBlock", "Residual Block"), - # Fold repeated blocks - ht.FoldDuplicates(), - ] - - # Display graph using the transforms above - g = hl.build_graph(model, torch.zeros( - [1, 3, 224, 224]), transforms=transforms) - g.save(os.path.join(OUTPUT_DIR, "pytorch_resnet_bloks.pdf")) - - # Clean up - shutil.rmtree(OUTPUT_DIR) - - -if __name__ == "__main__": - unittest.main() -import hiddenlayer as hl -import os -import sys -import shutil -import unittest -import torch -from torch import nn -import torch.nn.functional as F -import torchvision.models -from torchvision import datasets, transforms - -import matplotlib -matplotlib.use("Agg") - - -# Create output and data directories in project root -ROOT_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -OUTPUT_DIR = os.path.join(ROOT_DIR, "test_output") -if not os.path.exists(OUTPUT_DIR): - os.makedirs(OUTPUT_DIR) -DATA_DIR = os.path.join(ROOT_DIR, "test_data") - - -class Net(nn.Module): - def __init__(self): - super(Net, self).__init__() - self.conv1 = nn.Conv2d(1, 10, kernel_size=5) - self.conv2 = nn.Conv2d(10, 20, kernel_size=5) - self.conv2_drop = nn.Dropout2d() - self.fc1 = nn.Linear(320, 50) - self.fc2 = nn.Linear(50, 10) - - def forward(self, x): - x = F.relu(F.max_pool2d(self.conv1(x), 2)) - x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2)) - x = x.view(-1, 320) - x = F.relu(self.fc1(x)) - x = F.dropout(x, training=self.training) - x = self.fc2(x) - return F.log_softmax(x, dim=1) - - -def train(model, device, train_loader, optimizer, epoch): - model.train() - - for batch_idx, (data, target) in enumerate(train_loader): - data, target = data.to(device), target.to(device) - optimizer.zero_grad() - output = model(data) - loss = F.nll_loss(output, target) - loss.backward() - optimizer.step() - if batch_idx % 10 == 0: - model.history.log((epoch, batch_idx), - loss=loss, - conv1_weight=model.conv1.weight) - - # At the end of each batch - with model.canvas: - model.canvas.draw_plot(model.history["loss"]) - model.canvas.draw_hist(model.history["conv1_weight"]) - # TODO: c.draw_image(model.history["conv1_weight"]) - - if batch_idx % 100 == 0: - model.canvas.save(os.path.join( - OUTPUT_DIR, "pytorch_train_{}.png").format(epoch)) - model.history.progress() - - -def test(model, device, test_loader): - model.eval() - test_loss = 0 - correct = 0 - with torch.no_grad(): - for data, target in test_loader: - data, target = data.to(device), target.to(device) - output = model(data) - # sum up batch loss - test_loss += F.nll_loss(output, target, reduction='sum').item() - # get the index of the max log-probability - pred = output.max(1, keepdim=True)[1] - correct += pred.eq(target.view_as(pred)).sum().item() - - test_loss /= len(test_loader.dataset) - print('\nTest set: Average loss: {:.4f}, Accuracy: {}/{} ({:.0f}%)\n'.format( - test_loss, correct, len(test_loader.dataset), - 100. * correct / len(test_loader.dataset))) - - -class TestPytorchWatcher(unittest.TestCase): - def test_train(self): - device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu") - - train_loader = torch.utils.data.DataLoader( - datasets.MNIST(DATA_DIR, train=True, download=True, - transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])), - batch_size=64, shuffle=True) - test_loader = torch.utils.data.DataLoader( - datasets.MNIST(DATA_DIR, train=False, transform=transforms.Compose([ - transforms.ToTensor(), - transforms.Normalize((0.1307,), (0.3081,)) - ])), - batch_size=1000, shuffle=True) - - model = Net().to(device) - optimizer = torch.optim.SGD(model.parameters(), lr=0.01, momentum=0.5) - - # Create History object - model.history = hl.History() - model.canvas = hl.Canvas() - - for epoch in range(1, 3): - train(model, device, train_loader, optimizer, epoch) - test(model, device, test_loader) - - # Clean up - shutil.rmtree(OUTPUT_DIR) - - -if __name__ == "__main__": - unittest.main() -#!/usr/bin/env python -from skimage.transform import resize -from skimage.color import rgb2gray -import threading -import tensorflow as tf -import sys -import random -import numpy as np -import time -import gym -from keras import backend as K -from keras.layers import Convolution2D, Flatten, Dense -from collections import deque -from a3c_model import build_policy_and_value_networks -from keras import backend as K -from atari_environment import AtariEnvironment - -# Path params -EXPERIMENT_NAME = "breakout_a3c" -SUMMARY_SAVE_PATH = "/Users/coreylynch/dev/async-rl/summaries/"+EXPERIMENT_NAME -CHECKPOINT_SAVE_PATH = "/tmp/"+EXPERIMENT_NAME+".ckpt" -CHECKPOINT_NAME = "/tmp/breakout_a3c.ckpt-5" -CHECKPOINT_INTERVAL = 5000 -SUMMARY_INTERVAL = 5 -# TRAINING = False -TRAINING = True - -SHOW_TRAINING = True -# SHOW_TRAINING = False - -# Experiment params -GAME = "Breakout-v0" -ACTIONS = 3 -NUM_CONCURRENT = 8 -NUM_EPISODES = 20000 - -AGENT_HISTORY_LENGTH = 4 -RESIZED_WIDTH = 84 -RESIZED_HEIGHT = 84 - -# DQN Params -GAMMA = 0.99 - -# Optimization Params -LEARNING_RATE = 0.00001 - -# Shared global parameters -T = 0 -TMAX = 80000000 -t_max = 32 - - -def sample_policy_action(num_actions, probs): - """ - Sample an action from an action probability distribution output by - the policy network. - """ - # Subtract a tiny value from probabilities in order to avoid - # "ValueError: sum(pvals[:-1]) > 1.0" in numpy.multinomial - probs = probs - np.finfo(np.float32).epsneg - - histogram = np.random.multinomial(1, probs) - action_index = int(np.nonzero(histogram)[0]) - return action_index - - -def actor_learner_thread(num, env, session, graph_ops, summary_ops, saver): - # We use global shared counter T, and TMAX constant - global TMAX, T - - # Unpack graph ops - s, a, R, minimize, p_network, v_network = graph_ops - - # Unpack tensorboard summary stuff - r_summary_placeholder, update_ep_reward, val_summary_placeholder, update_ep_val, summary_op = summary_ops - - # Wrap env with AtariEnvironment helper class - env = AtariEnvironment(gym_env=env, resized_width=RESIZED_WIDTH, - resized_height=RESIZED_HEIGHT, agent_history_length=AGENT_HISTORY_LENGTH) - - time.sleep(5*num) - - # Set up per-episode counters - ep_reward = 0 - ep_avg_v = 0 - v_steps = 0 - ep_t = 0 - - probs_summary_t = 0 - - s_t = env.get_initial_state() - terminal = False - - while T < TMAX: - s_batch = [] - past_rewards = [] - a_batch = [] - - t = 0 - t_start = t - - while not (terminal or ((t - t_start) == t_max)): - # Perform action a_t according to policy pi(a_t | s_t) - probs = session.run(p_network, feed_dict={s: [s_t]})[0] - action_index = sample_policy_action(ACTIONS, probs) - a_t = np.zeros([ACTIONS]) - a_t[action_index] = 1 - - if probs_summary_t % 100 == 0: - print "P, ", np.max(probs), "V ", session.run(v_network, feed_dict={s: [s_t]})[0][0] - - s_batch.append(s_t) - a_batch.append(a_t) - - s_t1, r_t, terminal, info = env.step(action_index) - ep_reward += r_t - - r_t = np.clip(r_t, -1, 1) - past_rewards.append(r_t) - - t += 1 - T += 1 - ep_t += 1 - probs_summary_t += 1 - - s_t = s_t1 - - if terminal: - R_t = 0 - else: - R_t = session.run(v_network, feed_dict={s: [s_t]})[ - 0][0] # Bootstrap from last state - - R_batch = np.zeros(t) - for i in reversed(range(t_start, t)): - R_t = past_rewards[i] + GAMMA * R_t - R_batch[i] = R_t - - session.run(minimize, feed_dict={R: R_batch, - a: a_batch, - s: s_batch}) - - # Save progress every 5000 iterations - if T % CHECKPOINT_INTERVAL == 0: - saver.save(session, CHECKPOINT_SAVE_PATH, global_step=T) - - if terminal: - # Episode ended, collect stats and reset game - session.run(update_ep_reward, feed_dict={ - r_summary_placeholder: ep_reward}) - print "THREAD:", num, "/ TIME", T, "/ REWARD", ep_reward - s_t = env.get_initial_state() - terminal = False - # Reset per-episode counters - ep_reward = 0 - ep_t = 0 - - -def build_graph(): - # Create shared global policy and value networks - s, p_network, v_network, p_params, v_params = build_policy_and_value_networks( - num_actions=ACTIONS, agent_history_length=AGENT_HISTORY_LENGTH, resized_width=RESIZED_WIDTH, resized_height=RESIZED_HEIGHT) - - # Shared global optimizer - optimizer = tf.train.AdamOptimizer(LEARNING_RATE) - - # Op for applying remote gradients - R_t = tf.placeholder("float", [None]) - a_t = tf.placeholder("float", [None, ACTIONS]) - log_prob = tf.log(tf.reduce_sum(p_network * a_t, reduction_indices=1)) - p_loss = -log_prob * (R_t - v_network) - v_loss = tf.reduce_mean(tf.square(R_t - v_network)) - - total_loss = p_loss + (0.5 * v_loss) - - minimize = optimizer.minimize(total_loss) - return s, a_t, R_t, minimize, p_network, v_network - -# Set up some episode summary ops to visualize on tensorboard. - - -def setup_summaries(): - episode_reward = tf.Variable(0.) - tf.summary.scalar("Episode Reward", episode_reward) - r_summary_placeholder = tf.placeholder("float") - update_ep_reward = episode_reward.assign(r_summary_placeholder) - ep_avg_v = tf.Variable(0.) - tf.summary.scalar("Episode Value", ep_avg_v) - val_summary_placeholder = tf.placeholder("float") - update_ep_val = ep_avg_v.assign(val_summary_placeholder) - summary_op = tf.summary.merge_all() - return r_summary_placeholder, update_ep_reward, val_summary_placeholder, update_ep_val, summary_op - - -def train(session, graph_ops, saver): - # Set up game environments (one per thread) - envs = [gym.make(GAME) for i in range(NUM_CONCURRENT)] - - summary_ops = setup_summaries() - summary_op = summary_ops[-1] - - # Initialize variables - session.run(tf.global_variables_initializer()) - writer = tf.summary.FileWriter(SUMMARY_SAVE_PATH, session.graph) - - # Start NUM_CONCURRENT training threads - actor_learner_threads = [threading.Thread(target=actor_learner_thread, args=( - thread_id, envs[thread_id], session, graph_ops, summary_ops, saver)) for thread_id in range(NUM_CONCURRENT)] - for t in actor_learner_threads: - t.start() - - # Show the agents training and write summary statistics - last_summary_time = 0 - while True: - if SHOW_TRAINING: - for env in envs: - env.render() - now = time.time() - if now - last_summary_time > SUMMARY_INTERVAL: - summary_str = session.run(summary_op) - writer.add_summary(summary_str, float(T)) - last_summary_time = now - for t in actor_learner_threads: - t.join() - - -def evaluation(session, graph_ops, saver): - saver.restore(session, CHECKPOINT_NAME) - print "Restored model weights from ", CHECKPOINT_NAME - monitor_env = gym.make(GAME) - monitor_env.monitor.start('/tmp/'+EXPERIMENT_NAME+"/eval") - - # Unpack graph ops - s, a_t, R_t, minimize, p_network, v_network = graph_ops - - # Wrap env with AtariEnvironment helper class - env = AtariEnvironment(gym_env=monitor_env, resized_width=RESIZED_WIDTH, - resized_height=RESIZED_HEIGHT, agent_history_length=AGENT_HISTORY_LENGTH) - - for i_episode in xrange(100): - s_t = env.get_initial_state() - ep_reward = 0 - terminal = False - while not terminal: - monitor_env.render() - # Forward the deep q network, get Q(s,a) values - probs = p_network.eval(session=session, feed_dict={s: [s_t]})[0] - action_index = sample_policy_action(ACTIONS, probs) - s_t1, r_t, terminal, info = env.step(action_index) - s_t = s_t1 - ep_reward += r_t - print ep_reward - monitor_env.monitor.close() - - -def main(_): - g = tf.Graph() - with g.as_default(), tf.Session() as session: - K.set_session(session) - graph_ops = build_graph() - saver = tf.train.Saver() - - if TRAINING: - train(session, graph_ops, saver) - else: - evaluation(session, graph_ops, saver) - - -if __name__ == "__main__": - tf.app.run() -import tensorflow as tf -from keras import backend as K -from keras.layers import Convolution2D, Flatten, Dense, Input -from keras.models import Model - - -def build_policy_and_value_networks(num_actions, agent_history_length, resized_width, resized_height): - with tf.device("/cpu:0"): - state = tf.placeholder( - "float", [None, agent_history_length, resized_width, resized_height]) - - inputs = Input(shape=(agent_history_length, - resized_width, resized_height,)) - shared = Convolution2D(name="conv1", nb_filter=16, nb_row=8, nb_col=8, subsample=( - 4, 4), activation='relu', border_mode='same')(inputs) - shared = Convolution2D(name="conv2", nb_filter=32, nb_row=4, nb_col=4, subsample=( - 2, 2), activation='relu', border_mode='same')(shared) - shared = Flatten()(shared) - shared = Dense(name="h1", output_dim=256, activation='relu')(shared) - - action_probs = Dense(name="p", output_dim=num_actions, - activation='softmax')(shared) - - state_value = Dense(name="v", output_dim=1, - activation='linear')(shared) - - policy_network = Model(input=inputs, output=action_probs) - value_network = Model(input=inputs, output=state_value) - - p_params = policy_network.trainable_weights - v_params = value_network.trainable_weights - - p_out = policy_network(state) - v_out = value_network(state) - - return state, p_out, v_out, p_params, v_params -#!/usr/bin/env python -from model import build_network -from keras import backend as K -import gym -import time -import numpy as np -import random -import sys -import tensorflow as tf -import threading -from atari_environment import AtariEnvironment -from skimage.color import rgb2gray -from skimage.transform import resize -import os -os.environ["KERAS_BACKEND"] = "tensorflow" - - -flags = tf.app.flags - -flags.DEFINE_string('experiment', 'dqn_breakout', - 'Name of the current experiment') -flags.DEFINE_string('game', 'Breakout-v0', - 'Name of the atari game to play. Full list here: https://gym.openai.com/envs#atari') -flags.DEFINE_integer('num_concurrent', 8, - 'Number of concurrent actor-learner threads to use during training.') -flags.DEFINE_integer('tmax', 80000000, 'Number of training timesteps.') -flags.DEFINE_integer('resized_width', 84, 'Scale screen to this width.') -flags.DEFINE_integer('resized_height', 84, 'Scale screen to this height.') -flags.DEFINE_integer('agent_history_length', 4, - 'Use this number of recent screens as the environment state.') -flags.DEFINE_integer('network_update_frequency', 32, - 'Frequency with which each actor learner thread does an async gradient update') -flags.DEFINE_integer('target_network_update_frequency', - 10000, 'Reset the target network every n timesteps') -flags.DEFINE_float('learning_rate', 0.0001, 'Initial learning rate.') -flags.DEFINE_float('gamma', 0.99, 'Reward discount rate.') -flags.DEFINE_integer('anneal_epsilon_timesteps', 1000000, - 'Number of timesteps to anneal epsilon.') -flags.DEFINE_string('summary_dir', '/tmp/summaries', - 'Directory for storing tensorboard summaries') -flags.DEFINE_string('checkpoint_dir', '/tmp/checkpoints', - 'Directory for storing model checkpoints') -flags.DEFINE_integer('summary_interval', 5, - 'Save training summary to file every n seconds (rounded ' - 'up to statistics interval.') -flags.DEFINE_integer('checkpoint_interval', 600, - 'Checkpoint the model (i.e. save the parameters) every n ' - 'seconds (rounded up to statistics interval.') -flags.DEFINE_boolean('show_training', True, - 'If true, have gym render evironments during training') -flags.DEFINE_boolean('testing', False, 'If true, run gym evaluation') -flags.DEFINE_string('checkpoint_path', 'path/to/recent.ckpt', - 'Path to recent checkpoint to use for evaluation') -flags.DEFINE_string('eval_dir', '/tmp/', 'Directory to store gym evaluation') -flags.DEFINE_integer('num_eval_episodes', 100, - 'Number of episodes to run gym evaluation.') -FLAGS = flags.FLAGS -T = 0 -TMAX = FLAGS.tmax - - -def sample_final_epsilon(): - """ - Sample a final epsilon value to anneal towards from a distribution. - These values are specified in section 5.1 of http://arxiv.org/pdf/1602.01783v1.pdf - """ - final_epsilons = np.array([.1, .01, .5]) - probabilities = np.array([0.4, 0.3, 0.3]) - return np.random.choice(final_epsilons, 1, p=list(probabilities))[0] - - -def actor_learner_thread(thread_id, env, session, graph_ops, num_actions, summary_ops, saver): - """ - Actor-learner thread implementing asynchronous one-step Q-learning, as specified - in algorithm 1 here: http://arxiv.org/pdf/1602.01783v1.pdf. - """ - global TMAX, T - - # Unpack graph ops - s = graph_ops["s"] - q_values = graph_ops["q_values"] - st = graph_ops["st"] - target_q_values = graph_ops["target_q_values"] - reset_target_network_params = graph_ops["reset_target_network_params"] - a = graph_ops["a"] - y = graph_ops["y"] - grad_update = graph_ops["grad_update"] - - summary_placeholders, update_ops, summary_op = summary_ops - - # Wrap env with AtariEnvironment helper class - env = AtariEnvironment(gym_env=env, resized_width=FLAGS.resized_width, - resized_height=FLAGS.resized_height, agent_history_length=FLAGS.agent_history_length) - - # Initialize network gradients - s_batch = [] - a_batch = [] - y_batch = [] - - final_epsilon = sample_final_epsilon() - initial_epsilon = 1.0 - epsilon = 1.0 - - print "Starting thread ", thread_id, "with final epsilon ", final_epsilon - - time.sleep(3*thread_id) - t = 0 - while T < TMAX: - # Get initial game observation - s_t = env.get_initial_state() - terminal = False - - # Set up per-episode counters - ep_reward = 0 - episode_ave_max_q = 0 - ep_t = 0 - - while True: - # Forward the deep q network, get Q(s,a) values - readout_t = q_values.eval(session=session, feed_dict={s: [s_t]}) - - # Choose next action based on e-greedy policy - a_t = np.zeros([num_actions]) - action_index = 0 - if random.random() <= epsilon: - action_index = random.randrange(num_actions) - else: - action_index = np.argmax(readout_t) - a_t[action_index] = 1 - - # Scale down epsilon - if epsilon > final_epsilon: - epsilon -= (initial_epsilon - final_epsilon) / \ - FLAGS.anneal_epsilon_timesteps - - # Gym excecutes action in game environment on behalf of actor-learner - s_t1, r_t, terminal, info = env.step(action_index) - - # Accumulate gradients - readout_j1 = target_q_values.eval( - session=session, feed_dict={st: [s_t1]}) - clipped_r_t = np.clip(r_t, -1, 1) - if terminal: - y_batch.append(clipped_r_t) - else: - y_batch.append(clipped_r_t + FLAGS.gamma * np.max(readout_j1)) - - a_batch.append(a_t) - s_batch.append(s_t) - - # Update the state and counters - s_t = s_t1 - T += 1 - t += 1 - - ep_t += 1 - ep_reward += r_t - episode_ave_max_q += np.max(readout_t) - - # Optionally update target network - if T % FLAGS.target_network_update_frequency == 0: - session.run(reset_target_network_params) - - # Optionally update online network - if t % FLAGS.network_update_frequency == 0 or terminal: - if s_batch: - session.run(grad_update, feed_dict={y: y_batch, - a: a_batch, - s: s_batch}) - # Clear gradients - s_batch = [] - a_batch = [] - y_batch = [] - - # Save model progress - if t % FLAGS.checkpoint_interval == 0: - saver.save(session, FLAGS.checkpoint_dir+"/" + - FLAGS.experiment+".ckpt", global_step=t) - - # Print end of episode stats - if terminal: - stats = [ep_reward, episode_ave_max_q/float(ep_t), epsilon] - for i in range(len(stats)): - session.run(update_ops[i], feed_dict={ - summary_placeholders[i]: float(stats[i])}) - print "THREAD:", thread_id, "/ TIME", T, "/ TIMESTEP", t, "/ EPSILON", epsilon, "/ REWARD", ep_reward, "/ Q_MAX %.4f" % (episode_ave_max_q/float(ep_t)), "/ EPSILON PROGRESS", t/float(FLAGS.anneal_epsilon_timesteps) - break - - -def build_graph(num_actions): - # Create shared deep q network - s, q_network = build_network(num_actions=num_actions, agent_history_length=FLAGS.agent_history_length, - resized_width=FLAGS.resized_width, resized_height=FLAGS.resized_height, name_scope="q-network") - network_params = q_network.trainable_weights - q_values = q_network(s) - - # Create shared target network - st, target_q_network = build_network(num_actions=num_actions, agent_history_length=FLAGS.agent_history_length, - resized_width=FLAGS.resized_width, resized_height=FLAGS.resized_height, name_scope="target-network") - target_network_params = target_q_network.trainable_weights - target_q_values = target_q_network(st) - - # Op for periodically updating target network with online network weights - reset_target_network_params = [target_network_params[i].assign( - network_params[i]) for i in range(len(target_network_params))] - - # Define cost and gradient update op - a = tf.placeholder("float", [None, num_actions]) - y = tf.placeholder("float", [None]) - action_q_values = tf.reduce_sum( - tf.multiply(q_values, a), reduction_indices=1) - cost = tf.reduce_mean(tf.square(y - action_q_values)) - optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate) - grad_update = optimizer.minimize(cost, var_list=network_params) - - graph_ops = {"s": s, - "q_values": q_values, - "st": st, - "target_q_values": target_q_values, - "reset_target_network_params": reset_target_network_params, - "a": a, - "y": y, - "grad_update": grad_update} - - return graph_ops - -# Set up some episode summary ops to visualize on tensorboard. - - -def setup_summaries(): - episode_reward = tf.Variable(0.) - tf.summary.scalar("Episode_Reward", episode_reward) - episode_ave_max_q = tf.Variable(0.) - tf.summary.scalar("Max_Q_Value", episode_ave_max_q) - logged_epsilon = tf.Variable(0.) - tf.summary.scalar("Epsilon", logged_epsilon) - logged_T = tf.Variable(0.) - summary_vars = [episode_reward, episode_ave_max_q, logged_epsilon] - summary_placeholders = [tf.placeholder( - "float") for i in range(len(summary_vars))] - update_ops = [summary_vars[i].assign( - summary_placeholders[i]) for i in range(len(summary_vars))] - summary_op = tf.summary.merge_all() - return summary_placeholders, update_ops, summary_op - - -def get_num_actions(): - """ - Returns the number of possible actions for the given atari game - """ - # Figure out number of actions from gym env - env = gym.make(FLAGS.game) - num_actions = env.action_space.n - if (FLAGS.game == "Pong-v0" or FLAGS.game == "Breakout-v0"): - # Gym currently specifies 6 actions for pong - # and breakout when only 3 are needed. This - # is a lame workaround. - num_actions = 3 - return num_actions - - -def train(session, graph_ops, num_actions, saver): - # Set up game environments (one per thread) - envs = [gym.make(FLAGS.game) for i in range(FLAGS.num_concurrent)] - - summary_ops = setup_summaries() - summary_op = summary_ops[-1] - - # Initialize variables - session.run(tf.global_variables_initializer()) - # Initialize target network weights - session.run(graph_ops["reset_target_network_params"]) - summary_save_path = FLAGS.summary_dir + "/" + FLAGS.experiment - writer = tf.summary.FileWriter(summary_save_path, session.graph) - if not os.path.exists(FLAGS.checkpoint_dir): - os.makedirs(FLAGS.checkpoint_dir) - - # Start num_concurrent actor-learner training threads - - if(FLAGS.num_concurrent == 1): # for debug - actor_learner_thread( - 0, envs[0], session, graph_ops, num_actions, summary_ops, saver) - else: - actor_learner_threads = [threading.Thread(target=actor_learner_thread, args=( - thread_id, envs[thread_id], session, graph_ops, num_actions, summary_ops, saver)) for thread_id in range(FLAGS.num_concurrent)] - for t in actor_learner_threads: - t.start() - - # Show the agents training and write summary statistics - last_summary_time = 0 - while True: - if FLAGS.show_training: - for env in envs: - env.render() - now = time.time() - if now - last_summary_time > FLAGS.summary_interval: - summary_str = session.run(summary_op) - writer.add_summary(summary_str, float(T)) - last_summary_time = now - for t in actor_learner_threads: - t.join() - - -def evaluation(session, graph_ops, saver): - saver.restore(session, FLAGS.checkpoint_path) - print "Restored model weights from ", FLAGS.checkpoint_path - monitor_env = gym.make(FLAGS.game) - gym.wrappers.Monitor(monitor_env, FLAGS.eval_dir + - "/"+FLAGS.experiment+"/eval") - - # Unpack graph ops - s = graph_ops["s"] - q_values = graph_ops["q_values"] - - # Wrap env with AtariEnvironment helper class - env = AtariEnvironment(gym_env=monitor_env, resized_width=FLAGS.resized_width, - resized_height=FLAGS.resized_height, agent_history_length=FLAGS.agent_history_length) - - for i_episode in xrange(FLAGS.num_eval_episodes): - s_t = env.get_initial_state() - ep_reward = 0 - terminal = False - while not terminal: - monitor_env.render() - readout_t = q_values.eval(session=session, feed_dict={s: [s_t]}) - action_index = np.argmax(readout_t) - print "action", action_index - s_t1, r_t, terminal, info = env.step(action_index) - s_t = s_t1 - ep_reward += r_t - print ep_reward - monitor_env.monitor.close() - - -def main(_): - g = tf.Graph() - session = tf.Session(graph=g) - with g.as_default(), session.as_default(): - K.set_session(session) - num_actions = get_num_actions() - graph_ops = build_graph(num_actions) - saver = tf.train.Saver() - - if FLAGS.testing: - evaluation(session, graph_ops, saver) - else: - train(session, graph_ops, num_actions, saver) - - -if __name__ == "__main__": - tf.app.run() -import tensorflow as tf -from skimage.transform import resize -from skimage.color import rgb2gray -import numpy as np -from collections import deque - - -class AtariEnvironment(object): - """ - Small wrapper for gym atari environments. - Responsible for preprocessing screens and holding on to a screen buffer - of size agent_history_length from which environment state - is constructed. - """ - - def __init__(self, gym_env, resized_width, resized_height, agent_history_length): - self.env = gym_env - self.resized_width = resized_width - self.resized_height = resized_height - self.agent_history_length = agent_history_length - - self.gym_actions = range(gym_env.action_space.n) - if (gym_env.spec.id == "Pong-v0" or gym_env.spec.id == "Breakout-v0"): - print "Doing workaround for pong or breakout" - # Gym returns 6 possible actions for breakout and pong. - # Only three are used, the rest are no-ops. This just lets us - # pick from a simplified "LEFT", "RIGHT", "NOOP" action space. - self.gym_actions = [1, 2, 3] - - # Screen buffer of size AGENT_HISTORY_LENGTH to be able - # to build state arrays of size [1, AGENT_HISTORY_LENGTH, width, height] - self.state_buffer = deque() - - def get_initial_state(self): - """ - Resets the atari game, clears the state buffer - """ - # Clear the state buffer - self.state_buffer = deque() - - x_t = self.env.reset() - x_t = self.get_preprocessed_frame(x_t) - s_t = np.stack((x_t, x_t, x_t, x_t), axis=0) - - for i in range(self.agent_history_length-1): - self.state_buffer.append(x_t) - return s_t - - def get_preprocessed_frame(self, observation): - """ - See Methods->Preprocessing in Mnih et al. - 1) Get image grayscale - 2) Rescale image - """ - return resize(rgb2gray(observation), (self.resized_width, self.resized_height)) - - def step(self, action_index): - """ - Excecutes an action in the gym environment. - Builds current state (concatenation of agent_history_length-1 previous frames and current one). - Pops oldest frame, adds current frame to the state buffer. - Returns current state. - """ - - x_t1, r_t, terminal, info = self.env.step( - self.gym_actions[action_index]) - x_t1 = self.get_preprocessed_frame(x_t1) - - previous_frames = np.array(self.state_buffer) - s_t1 = np.empty((self.agent_history_length, - self.resized_height, self.resized_width)) - s_t1[:self.agent_history_length-1, ...] = previous_frames - s_t1[self.agent_history_length-1] = x_t1 - - # Pop the oldest frame, add the current frame to the queue - self.state_buffer.popleft() - self.state_buffer.append(x_t1) - - return s_t1, r_t, terminal, info -import tensorflow as tf -from keras import backend as K -from keras.layers import Conv2D, Flatten, Dense, Input -from keras.models import Model - - -def build_network(num_actions, agent_history_length, resized_width, resized_height, name_scope): - with tf.device("/cpu:0"): - with tf.name_scope(name_scope): - state = tf.placeholder(tf.float32, [ - None, agent_history_length, resized_width, resized_height], name="state") - inputs = Input(shape=(agent_history_length, - resized_width, resized_height,)) - model = Conv2D(filters=16, kernel_size=(8, 8), strides=( - 4, 4), activation='relu', padding='same', data_format='channels_first')(inputs) - model = Conv2D(filters=32, kernel_size=(4, 4), strides=( - 2, 2), activation='relu', padding='same', data_format='channels_first')(model) - #model = Conv2D(filter=64, kernel_size=(3,3), strides=(1,1), activation='relu', padding='same')(model) - model = Flatten()(model) - model = Dense(256, activation='relu')(model) - print model - q_values = Dense(num_actions)(model) - - # UserWarning: Update your `Model` call to the Keras 2 API: - # `Model(outputs=Tensor("de..., inputs=Tensor("in.. - m = Model(inputs=inputs, outputs=q_values) - - return state, m -from io import open -from setuptools import setup, find_packages -from os import path -import re - - -def readme(): - with open('README.md', encoding='utf-8') as f: - return f.read() - - -def version(): - this_directory = path.abspath(path.dirname(__file__)) - with open(path.join(this_directory, 'livelossplot/version.py')) as f: - version_file = f.read() - version_match = re.search( - r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) - version = version_match.group(1) - - return version - - -setup(name='livelossplot', - version=version(), - install_requires=['matplotlib', 'notebook'], - description='Live training loss plot in Jupyter Notebook for Keras, PyTorch and others.', - long_description=readme(), - long_description_content_type='text/markdown', - url='https://github.com/stared/livelossplot', - author='Piotr Migdał', - author_email='pmigdal@gmail.com', - keywords=['keras', 'pytorch', 'plot', 'chart'], - license='MIT', - classifiers=[ - 'Development Status :: 3 - Alpha', - 'Framework :: Jupyter', - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'Topic :: Scientific/Engineering', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - 'Topic :: Scientific/Engineering :: Visualization', - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 3' - ], - packages=find_packages(), - zip_safe=False) -# TO START: -# pip install livelossplot -# pip install neptune-cli -# neptune account login -# neptune run minimal-neptune.py -# enjoy results - -from time import sleep -import numpy as np - -from livelossplot import PlotLosses - -liveplot = PlotLosses(target='neptune') -for i in range(20): - liveplot.update({ - 'accuracy': 1 - np.random.rand() / (i + 2.), - 'val_accuracy': 1 - np.random.rand() / (i + 0.5), - 'mse': 1. / (i + 2.), - 'val_mse': 1. / (i + 0.5) - }) - liveplot.draw() - sleep(.5) -import warnings -from .core import draw_plot -from .generic_plot import PlotLosses -from .version import __version__ - -# keras.PlotLossesCallback and pytoune.PlotLossesCallback -# NOT loaded, as they depend on other libraries - -# open question: keep it as deprecated, -# or as an alternative (but legit) interface? - - -def PlotLossesKeras(*args, **kwargs): - warnings.warn( - "From v0.3 onwards, use:\nfrom livelossplot.keras import PlotLossesCallback", DeprecationWarning) - from .keras import PlotLossesCallback - return PlotLossesCallback(*args, **kwargs) - - -def PlotLossesTensorFlowKeras(*args, **kwargs): - warnings.warn("New and deprecated at the same time!\nFrom v0.3 onwards, use:\nfrom livelossplot.tf_keras import PlotLossesCallback", DeprecationWarning) - from .tf_keras import PlotLossesCallback - return PlotLossesCallback(*args, **kwargs) - - -def PlotLossesPytoune(*args, **kwargs): - warnings.warn( - "From v0.3 onwards, use:\nfrom livelossplot.pytoune import PlotLossesCallback", DeprecationWarning) - from .pytoune import PlotLossesCallback - return PlotLossesCallback(*args, **kwargs) -from __future__ import division -import warnings - -import matplotlib -import matplotlib.pyplot as plt -from IPython.display import clear_output - -MATPLOTLIB_TARGET = 'matplotlib' -NEPTUNE_TARGET = 'neptune' - - -def not_inline_warning(): - backend = matplotlib.get_backend() - if "backend_inline" not in backend: - warnings.warn( - "livelossplot requires inline plots.\nYour current backend is: {}\nRun in a Jupyter environment and execute '%matplotlib inline'.".format(backend)) - -# TODO -# * object-oriented API -# * only integer ticks - - -def draw_plot(logs, metrics, figsize=None, max_epoch=None, - max_cols=2, - series_fmt={'training': '{}', 'validation': 'val_{}'}, - metric2title={}, - skip_first=2, - extra_plots=[], - fig_path=None): - clear_output(wait=True) - plt.figure(figsize=figsize) - - max_rows = (len(metrics) + len(extra_plots) + 1) // max_cols + 1 - - if len(logs) < skip_first: - skip = 0 - elif len(logs) < 2 * skip_first: - skip = len(logs) - skip_first - else: - skip = skip_first - - for metric_id, metric in enumerate(metrics): - plt.subplot(max_rows, max_cols, metric_id + 1) - - if max_epoch is not None: - plt.xlim(1 + skip, max_epoch) - - # y_limit_values = [] - - for serie_label, serie_fmt in series_fmt.items(): - - serie_metric_name = serie_fmt.format(metric) - serie_metric_logs = [(log.get('_i', i + 1), log[serie_metric_name]) - for i, log in enumerate(logs[skip:]) - if serie_metric_name in log] - - if len(serie_metric_logs) > 0: - xs, ys = zip(*serie_metric_logs) - plt.plot(xs, ys, label=serie_label) - - plt.title(metric2title.get(metric, metric)) - plt.xlabel('epoch') - plt.legend(loc='center right') - - for i, extra_plot in enumerate(extra_plots): - plt.subplot(max_rows, max_cols, i + len(metrics) + 1) - extra_plot(logs) - - plt.tight_layout() - if fig_path is not None: - plt.savefig(fig_path) - plt.show() - - -def print_extrema(logs, - metrics, - extrema, - series_fmt={'training': '{}', 'validation': 'val_{}'}, - metric2title={}): - - extrema_logs = [] - for metric in metrics: - - values_fmt = ' (min: {min:8.3f}, max: {max:8.3f}, cur: {cur:8.3f})' - - serie_name_max_length = max([len(key) for key in series_fmt.keys()]) - - # generic for any serie - for i, (serie_label, serie_fmt) in enumerate(series_fmt.items()): - serie_log_fmt = '\n{message: <{fill}}'.format( - message=serie_label, fill=serie_name_max_length) + values_fmt - - serie_metric_name = serie_fmt.format(metric) - serie_metric_logs = [log[serie_metric_name] - for log in logs if serie_metric_name in log] - - log = serie_log_fmt.format( - min=extrema[serie_metric_name].get('min'), - max=extrema[serie_metric_name].get('max'), - cur=serie_metric_logs[-1]) - - if i == 0: - extrema_logs.append(metric2title.get( - metric, metric) + ':' + log) - else: - extrema_logs[-1] += log - - print('\n\n'.join(extrema_logs)) -from __future__ import division -from .generic_plot import PlotLosses - -metric2printable = { - "acc": "Accuracy", - "mean_squared_error": "Mean squared error", - "mean_absolute_error": "Mean absolute error", - "mean_absolute_percentage_error": "Mean absolute percentage error", - # etc - "categorical_crossentropy": "Log-loss", - "sparse_categorical_crossentropy": "Log-loss", - "binary_crossentropy": "Log-loss", - "kullback_leibler_divergence": "Log-loss" -} - - -def loss2name(loss): - if hasattr(loss, '__call__'): - # if passed as a function - return loss.__name__ - else: - # if passed as a string - return loss - - -class _PlotLossesCallback(): - def __init__(self, **kwargs): - self.liveplot = PlotLosses(**kwargs) - - def on_train_begin(self, logs={}): - self.liveplot.set_metrics([ - metric for metric in self.params['metrics'] - if not metric.startswith('val_') - ]) - - # slightly convolved due to model.complie(loss=...) stuff - # vide https://github.com/keras-team/keras/blob/master/keras/engine/training.py - if isinstance(self.model.loss, list): - losses = self.model.loss - elif isinstance(self.model.loss, dict): - losses = list(self.model.loss.values()) - else: - # by far the most common scenario - losses = [self.model.loss] - - metric2printable_updated = metric2printable.copy() - loss_name = loss2name(losses[0]) - metric2printable_updated['loss'] =\ - "{} (cost function)".format( - metric2printable_updated.get(loss_name, loss_name)) - - if len(losses) > 1: - for output_name, loss in zip(self.model.output_names, losses): - loss_name = loss2name(loss) - metric2printable_updated['{}_loss'.format(output_name)] =\ - "{} ({})".format(metric2printable_updated.get( - loss_name, loss_name), output_name) - else: - for output_name in self.model.output_names: - metric2printable_updated['{}_loss'.format(output_name)] =\ - "{} ({})".format(metric2printable_updated.get( - loss_name, loss_name), output_name) - - self.liveplot.metric2title = metric2printable_updated - self.liveplot.set_max_epoch(self.params['epochs']) - - def on_epoch_end(self, epoch, logs={}): - self.liveplot.update(logs.copy()) - self.liveplot.draw() -from __future__ import division -import math - -from .core import draw_plot, print_extrema, not_inline_warning, MATPLOTLIB_TARGET, NEPTUNE_TARGET -from collections import OrderedDict - - -def _is_unset(metric): - return metric is None or math.isnan(metric) or math.isinf(metric) - - -class PlotLosses(): - def __init__(self, - figsize=None, - cell_size=(6, 4), - dynamic_x_axis=False, - max_cols=2, - max_epoch=None, - metric2title={}, - series_fmt={'training': '{}', 'validation': 'val_{}'}, - validation_fmt="val_{}", - plot_extrema=True, - skip_first=2, - extra_plots=[], - fig_path=None, - tensorboard_dir=None, - target=MATPLOTLIB_TARGET): - self.figsize = figsize - self.cell_size = cell_size - self.dynamic_x_axis = dynamic_x_axis - self.max_cols = max_cols - self.max_epoch = max_epoch - self.metric2title = metric2title - self.series_fmt = series_fmt - if validation_fmt is not None: - # backward compatibility - self.series_fmt['validation'] = validation_fmt - self.logs = None - self.base_metrics = None - self.metrics_extrema = None - self.plot_extrema = plot_extrema - self.skip_first = skip_first - self.target = target - self._validate_target() - if target == MATPLOTLIB_TARGET: - not_inline_warning() - self.fig_path = fig_path - - if tensorboard_dir: - from .tensorboard import TensorboardLogger - self.tensorboard_logger = TensorboardLogger(tensorboard_dir) - else: - self.tensorboard_logger = None - - self.set_max_epoch(max_epoch) - self.extra_plots = extra_plots - self.global_step = 0 - - def set_max_epoch(self, max_epoch): - self.max_epoch = max_epoch if not self.dynamic_x_axis else None - - def set_metrics(self, metrics): - self.base_metrics = metrics - if self.plot_extrema: - self.metrics_extrema = { - ftm.format(metric): { - 'min': float('inf'), - 'max': -float('inf'), - } - for metric in metrics - for ftm in list(self.series_fmt.values()) - } - if self.figsize is None: - self.figsize = ( - self.max_cols * self.cell_size[0], - ((len(self.base_metrics) + 1) // - self.max_cols + 1) * self.cell_size[1] - ) - - self.logs = [] - - def _update_extrema(self, log): - for metric, value in log.items(): - if metric != "_i": - extrema = self.metrics_extrema[metric] - if _is_unset(extrema['min']) or value < extrema['min']: - extrema['min'] = float(value) - if _is_unset(extrema['max']) or value > extrema['max']: - extrema['max'] = float(value) - - def update(self, log, step=1): - self.global_step += step - if self.logs is None: - self.set_metrics(list(OrderedDict.fromkeys( - [metric.split('_')[-1] for metric in log.keys()]))) - - log["_i"] = self.global_step - self.logs.append(log) - if self.tensorboard_logger: - self.tensorboard_logger.log_logs(log, self.global_step) - if self.plot_extrema: - self._update_extrema(log) - - def draw(self): - if self.target == MATPLOTLIB_TARGET: - draw_plot(self.logs, self.base_metrics, - figsize=self.figsize, - max_epoch=self.max_epoch, - max_cols=self.max_cols, - series_fmt=self.series_fmt, - metric2title=self.metric2title, - skip_first=self.skip_first, - extra_plots=self.extra_plots, - fig_path=self.fig_path) - if self.metrics_extrema: - print_extrema(self.logs, - self.base_metrics, - self.metrics_extrema, - series_fmt=self.series_fmt, - metric2title=self.metric2title) - if self.target == NEPTUNE_TARGET: - from .neptune_integration import neptune_send_plot - neptune_send_plot(self.logs) - - def close(self): - self.tensorboard_logger.close() - - def _validate_target(self): - assert isinstance(self.target, str),\ - 'target must be str, got "{}" instead.'.format(type(self.target)) - if self.target != MATPLOTLIB_TARGET and self.target != NEPTUNE_TARGET: - raise ValueError('Target must be "{}" or "{}", got "{}" instead.'.format( - MATPLOTLIB_TARGET, NEPTUNE_TARGET, self.target)) -from __future__ import absolute_import - -import keras -from .generic_keras import _PlotLossesCallback - - -class PlotLossesCallback(_PlotLossesCallback, keras.callbacks.Callback): - def __init__(self, **kwargs): - keras.callbacks.Callback.__init__(self) - _PlotLossesCallback.__init__(self, **kwargs) -import neptune - -ctx = neptune.Context() - - -def neptune_send_plot(logs): - epoch_data = logs[-1] - for metrics, value in epoch_data.items(): - ctx.channel_send(name=metrics, y=value) -from __future__ import absolute_import - -from pytoune.framework import Callback -from .generic_plot import PlotLosses - - -class PlotLossesCallback(Callback): - def __init__(self, **kwargs): - super(PlotLossesCallback, self).__init__() - self.liveplot = PlotLosses(**kwargs) - self.metrics = None - - def on_train_begin(self, logs): - metrics = ['loss'] + self.model.metrics_names - self.metrics = list(metrics) - self.metrics += ['val_' + metric for metric in metrics] - - def on_epoch_end(self, epoch, logs): - metric_logs = { - metric: logs[metric] for metric in self.metrics - if metric in logs - } - self.liveplot.update(metric_logs) - self.liveplot.draw() -import tensorflow as tf -from datetime import datetime -from os import path - - -class TensorboardLogger: - def __init__(self, logdir="./tensorboard_logs/"): - time_str = datetime.now().isoformat()[:-7].replace("T", " ") - self._path = path.join(logdir, time_str) - self.writer = tf.summary.FileWriter(self._path) - - def close(self): - self.writer.close() - - def log_scalar(self, tag, value, global_step): - summary = tf.Summary() - summary.value.add(tag=tag, simple_value=value) - self.writer.add_summary(summary, global_step=global_step) - self.writer.flush() - - def log_logs(self, logs, global_step): - for k, v in logs.items(): - self.log_scalar(k, v, global_step) -from tensorflow import keras -from .generic_keras import _PlotLossesCallback - - -class PlotLossesCallback(_PlotLossesCallback, keras.callbacks.Callback): - def __init__(self, **kwargs): - keras.callbacks.Callback.__init__(self) - _PlotLossesCallback.__init__(self, **kwargs) -# coding=utf-8 -# 中文乱码处理 - -import cv2 -import numpy -from PIL import Image, ImageDraw, ImageFont - -# img = cv2.imread("img/xingye-1.png") - - -def cv2ImgAddText(img, text, left, top, textColor=(0, 255, 0), textSize=20): - if (isinstance(img, numpy.ndarray)): # 判断是否OpenCV图片类型 - img = Image.fromarray(cv2.cvtColor(img, cv2.COLOR_BGR2RGB)) - draw = ImageDraw.Draw(img) - fontText = ImageFont.truetype( - "font/simsun.ttc", textSize, encoding="utf-8") - draw.text((left, top), text, textColor, font=fontText) - return cv2.cvtColor(numpy.asarray(img), cv2.COLOR_RGB2BGR) - - -# img = cv2ImgAddText(img, "大家好,我是星爷", 140, 60, (255, 255, 0), 20) - -# cv2.imshow("Image", img) -# cv2.waitKey(0) -# cv2.destroyAllWindows() -# coding=utf-8 -# 图片着色 -import keras -# import tensorflow as tf -from skimage.io import imread, imsave -from skimage.color import rgb2gray, gray2rgb, rgb2lab, lab2rgb -from keras.models import Sequential -from keras.layers import Conv2D, UpSampling2D, InputLayer, Conv2DTranspose -from keras.preprocessing.image import img_to_array, load_img -import numpy as np -from keras.preprocessing.image import ImageDataGenerator -import os -import cv2 - - -def get_train_data(img_file): - image = img_to_array(load_img(img_file)) - image_shape = image.shape - image = np.array(image, dtype=float) - x = rgb2lab(1.0 / 255 * image)[:, :, 0] - y = rgb2lab(1.0 / 255 * image)[:, :, 1:] - y /= 128 - x = x.reshape(1, image_shape[0], image_shape[1], 1) - y = y.reshape(1, image_shape[0], image_shape[1], 2) - return x, y, image_shape - - -def build_model(): - model = Sequential() - model.add(InputLayer(input_shape=(None, None, 1))) - model.add(Conv2D(8, (3, 3), activation='relu', padding='same', strides=2)) - model.add(Conv2D(8, (3, 3), activation='relu', padding='same')) - model.add(Conv2D(16, (3, 3), activation='relu', padding='same')) - model.add(Conv2D(16, (3, 3), activation='relu', padding='same', strides=2)) - model.add(Conv2D(32, (3, 3), activation='relu', padding='same')) - model.add(Conv2D(32, (3, 3), activation='relu', padding='same', strides=2)) - model.add(UpSampling2D((2, 2))) - model.add(Conv2D(32, (3, 3), activation='relu', padding='same')) - model.add(UpSampling2D((2, 2))) - model.add(Conv2D(16, (3, 3), activation='relu', padding='same')) - model.add(UpSampling2D((2, 2))) - model.add(Conv2D(2, (3, 3), activation='tanh', padding='same')) - # model.compile(optimizer='rmsprop', loss='mse') - model.compile(optimizer='adam', loss='mse') - return model - - -# 训练数据 -def train(): - x, y, img_shape = get_train_data('./img/colorize/colorize-original.png') - - # x2, y2, img_shape2 = get_train_data( - # './img/colorize/colorize2-original.png') - - model = build_model() - num_epochs = 1000 # 训练次数 - batch_size = 1 - - model.fit(x, y, batch_size=batch_size, epochs=num_epochs) - # model.fit(x2, y2, batch_size=batch_size, epochs=num_epochs) - model.save('./data/simple_colorize.h5') - - -# 着色 -def colorize(): - path = './img/colorize/colorize2.png' - # cv2.imwrite('./img/colorize3.png', cv2.imread(path, 0)) - x, y, image_shape = get_train_data(path) - model = build_model() - model.load_weights('./data/simple_colorize.h5') - output = model.predict(x) - output *= 128 - tmp = np.zeros((200, 200, 3)) - tmp[:, :, 0] = x[0][:, :, 0] - tmp[:, :, 1:] = output[0] - colorizePath = path.replace(".png", "-res.png") - imsave(colorizePath, lab2rgb(tmp)) - cv2.imshow("I", cv2.imread(path)) - cv2.imshow("II", cv2.imread(colorizePath)) - cv2.waitKey(0) - cv2.destroyAllWindows() - - # imsave("test_image_gray.png", rgb2gray(lab2rgb(tmp))) - - -if __name__ == '__main__': - # train() - colorize() -# coding=utf-8 -# 头像特效合成 -import cv2 - -# OpenCV人脸识别分类器 -classifier = cv2.CascadeClassifier( - "C:\Python36\Lib\site-packages\opencv-master\data\haarcascades\haarcascade_frontalface_default.xml" -) - -img = cv2.imread("img/ag-3.png") # 读取图片 -imgCompose = cv2.imread("img/compose/maozi-1.png") - -gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转换灰色 -color = (0, 255, 0) # 定义绘制颜色 -# 调用识别人脸 -faceRects = classifier.detectMultiScale( - gray, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32)) -if len(faceRects): # 大于0则检测到人脸 - for faceRect in faceRects: - x, y, w, h = faceRect - sp = imgCompose.shape - imgComposeSizeH = int(sp[0]/sp[1]*w) - if imgComposeSizeH > (y-20): - imgComposeSizeH = (y-20) - imgComposeSize = cv2.resize( - imgCompose, (w, imgComposeSizeH), interpolation=cv2.INTER_NEAREST) - top = (y-imgComposeSizeH-20) - if top <= 0: - top = 0 - rows, cols, channels = imgComposeSize.shape - roi = img[top:top+rows, x:x+cols] - - # Now create a mask of logo and create its inverse mask also - img2gray = cv2.cvtColor(imgComposeSize, cv2.COLOR_RGB2GRAY) - ret, mask = cv2.threshold(img2gray, 10, 255, cv2.THRESH_BINARY) - mask_inv = cv2.bitwise_not(mask) - - # Now black-out the area of logo in ROI - img1_bg = cv2.bitwise_and(roi, roi, mask=mask_inv) - - # Take only region of logo from logo image. - img2_fg = cv2.bitwise_and(imgComposeSize, imgComposeSize, mask=mask) - - # Put logo in ROI and modify the main image - dst = cv2.add(img1_bg, img2_fg) - img[top:top+rows, x:x+cols] = dst - -cv2.imshow("image", img) -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -#图片检测 - Dlib版本 -import cv2 -import dlib - -path = "img/ag.png" -img = cv2.imread(path) -gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - -# 人脸分类器 -detector = dlib.get_frontal_face_detector() -# 获取人脸检测器 -predictor = dlib.shape_predictor( - "C:\\Python36\\Lib\\site-packages\\dlib-data\\shape_predictor_68_face_landmarks.dat" -) - -dets = detector(gray, 1) -for face in dets: - # 在图片中标注人脸,并显示 - # left = face.left() - # top = face.top() - # right = face.right() - # bottom = face.bottom() - # cv2.rectangle(img, (left, top), (right, bottom), (0, 255, 0), 2) - # cv2.imshow("image", img) - - shape = predictor(img, face) # 寻找人脸的68个标定点 - # 遍历所有点,打印出其坐标,并圈出来 - for pt in shape.parts(): - pt_pos = (pt.x, pt.y) - cv2.circle(img, pt_pos, 1, (0, 255, 0), 2) - cv2.imshow("image", img) - -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -#图片检测 - OpenCV版本 -import cv2 -import datetime -import time - -filepath = "img/xingye-1.png" -# OpenCV人脸识别分类器 -classifier = cv2.CascadeClassifier( - "C:\Python36\Lib\site-packages\opencv-master\data\haarcascades\haarcascade_frontalface_default.xml" -) - -# 程序开始时间 -startTime = datetime.datetime.now() - -img = cv2.imread(filepath) # 读取图片 -gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) # 转换灰色 -color = (0, 255, 0) # 定义绘制颜色 -# 调用识别人脸 -faceRects = classifier.detectMultiScale( - gray, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32)) -if len(faceRects): # 大于0则检测到人脸 - for faceRect in faceRects: # 单独框出每一张人脸 - x, y, w, h = faceRect - # 框出人脸 - cv2.rectangle(img, (x, y), (x + h, y + w), color, 2) - # 左眼 - cv2.circle(img, (x + w // 4, y + h // 4 + 30), min(w // 8, h // 8), - color) - # 右眼 - cv2.circle(img, (x + 3 * w // 4, y + h // 4 + 30), min(w // 8, h // 8), - color) - # 嘴巴 - cv2.rectangle(img, (x + 3 * w // 8, y + 3 * h // 4), - (x + 5 * w // 8, y + 7 * h // 8), color) - -# 程序结束时间 -endTime = datetime.datetime.now() -print((endTime - startTime)) -cv2.imshow("image", img) # 显示图像 -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -# 表情识别 - -import cv2 -from keras.models import load_model -import numpy as np -import chineseText -import datetime - -startTime = datetime.datetime.now() -emotion_classifier = load_model( - 'classifier/emotion_models/simple_CNN.530-0.65.hdf5') -endTime = datetime.datetime.now() -print(endTime - startTime) - -emotion_labels = { - 0: '生气', - 1: '厌恶', - 2: '恐惧', - 3: '开心', - 4: '难过', - 5: '惊喜', - 6: '平静' -} - -img = cv2.imread("img/emotion/emotion.png") -face_classifier = cv2.CascadeClassifier( - "C:\Python36\Lib\site-packages\opencv-master\data\haarcascades\haarcascade_frontalface_default.xml" -) -gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) -faces = face_classifier.detectMultiScale( - gray, scaleFactor=1.2, minNeighbors=3, minSize=(40, 40)) -color = (255, 0, 0) - -for (x, y, w, h) in faces: - gray_face = gray[(y):(y + h), (x):(x + w)] - gray_face = cv2.resize(gray_face, (48, 48)) - gray_face = gray_face / 255.0 - gray_face = np.expand_dims(gray_face, 0) - gray_face = np.expand_dims(gray_face, -1) - emotion_label_arg = np.argmax(emotion_classifier.predict(gray_face)) - emotion = emotion_labels[emotion_label_arg] - cv2.rectangle(img, (x + 10, y + 10), (x + h - 10, y + w - 10), - (255, 255, 255), 2) - img = chineseText.cv2ImgAddText(img, emotion, x + h * 0.3, y, color, 20) - -cv2.imshow("Image", img) -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -# 38x-37x 44x-43x -# 40x-39x 46x-45x - -import cv2 -import dlib -import numpy as np -import time - -# img = cv2.imread(path) - -# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - -# detector = dlib.get_frontal_face_detector() -# predictor = dlib.shape_predictor( -# "C:\\Python36\\Lib\\site-packages\\dlib-data\\shape_predictor_68_face_landmarks.dat" -# ) - -# dets = detector(gray, 1) -# for face in dets: -# shape = predictor(img, face) - -# leftDiffer1 = shape.parts()[37].x - shape.parts()[36].x -# leftDiffer2 = shape.parts()[39].x - shape.parts()[38].x - -# print("leftDiffer1:{} leftDiffer2:{} ".format(leftDiffer1, leftDiffer2)) - -counter = 1 - - -# 获取眼球中心 -def houghCircles(path, counter): - img = cv2.imread(path, 0) - # img = cv2.medianBlur(img, 5) - - x = cv2.Sobel(img, -1, 1, 0, ksize=3) - y = cv2.Sobel(img, -1, 0, 1, ksize=3) - absx = cv2.convertScaleAbs(x) - absy = cv2.convertScaleAbs(y) - img = cv2.addWeighted(absx, 0.5, absy, 0.5, 0) - - # ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB) - # channels = cv2.split(ycrcb) - # cv2.equalizeHist(channels[0], channels[0]) #输入通道、输出通道矩阵 - # cv2.merge(channels, ycrcb) #合并结果通道 - # cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR, img) - - # img = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - - cimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) - - # cv2.imshow("img2", img) - # cv2.imshow("grayimg", grayimg) - - circles = cv2.HoughCircles( - img, - cv2.HOUGH_GRADIENT, - 1, - 50, - param1=50, - param2=10, - minRadius=2, - maxRadius=0) - - circles = np.uint16(np.around(circles)) - for i in circles[0, :]: - # draw the outer circle - # cv2.circle(cimg, (i[0], i[1]), i[2], (0, 255, 0), 1) - # draw the center of the circle - cv2.circle(cimg, (i[0], i[1]), 2, (0, 0, 255), 2) - # cv2.imshow("img" + str(counter), cimg) - return (i[0] + 3, i[1] + 3) - - -# 彩色直方图均衡化 -def hist(img): - ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB) - channels = cv2.split(ycrcb) - cv2.equalizeHist(channels[0], channels[0]) # 输入通道、输出通道矩阵 - cv2.merge(channels, ycrcb) # 合并结果通道 - cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR, img) - return img - - -classifier = cv2.CascadeClassifier( - # haarcascade_eye_tree_eyeglasses - "C:\Python36\Lib\site-packages\opencv-master\data\haarcascades\haarcascade_eye.xml" -) - - -def discern(img, counter): - grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - color = (0, 255, 0) - faceRects = classifier.detectMultiScale( - grayImg, scaleFactor=1.2, minNeighbors=3, minSize=(58, 58)) - if len(faceRects): - for faceRect in faceRects: - x, y, w, h = faceRect - rightEyeImg = img[(y):(y + h), (x):(x + w)] - # cv2.rectangle(img, (x, y), (x + h, y + w), color, 2) - rightEyeImg = cv2.GaussianBlur(rightEyeImg, (5, 5), 1) - # rightEyeImg = hist(rightEyeImg) - cv2.imwrite("img/temp.png", rightEyeImg) - # cv2.imwrite("img/temp.png", rightEyeImg) - circleCenter = houghCircles("img/temp.png", counter) # (x,y) - cv2.circle(img, (x + circleCenter[0], y + circleCenter[1]), 2, - (128, 0, 0), 2) - counter += 1 - cv2.imshow("image", img) - - -# path = "img/ag-3.png" -# img = cv2.imread(path) -# gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) -# faceRects = classifier.detectMultiScale( -# gray, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32)) -# if len(faceRects): -# for faceRect in faceRects: -# x, y, w, h = faceRect -# # cv2.rectangle(img, (x, y), (x + h, y + w), (255, 0, 0), 2) -# rightEyeImg = img[(y):(y + h), (x):(x + w)] -# cv2.imwrite("img/temp.png", rightEyeImg) -# houghCircles("img/temp.png", counter) -# counter += 1 -# # cv2.imshow("img", houghCircles("img/temp.png")) - -path = "img/ag.png" -img = cv2.imread(path) -discern(img, counter) - -# cap = cv2.VideoCapture(0) -# while (1): -# ret, frame = cap.read() - -# # cv2.imshow('frame', gray) -# discern(frame, counter) -# if cv2.waitKey(1) & 0xFF == ord('q'): -# break - -# - -# cv2.imwrite('img/eye-2.png', rightEyeImg) - -# eyeImg = img[(y):(y + h), (x):(x + w)] -# eyeImg = cv2.medianBlur(eyeImg, 5) -# cimg = cv2.cvtColor(eyeImg, cv2.COLOR_GRAY2BGR) - -# circles = cv2.HoughCircles( -# eyeImg, -# cv2.HOUGH_GRADIENT, -# 1, -# 20, -# param1=50, -# param2=30, -# minRadius=0, -# maxRadius=0) - -# circles = np.uint16(np.around(circles)) -# for i in circles[0, :]: -# # draw the outer circle -# cv2.circle(cimg, (i[0], i[1]), i[2], (0, 255, 0), 2) -# # draw the center of the circle -# cv2.circle(cimg, (i[0], i[1]), 2, (0, 0, 255), 3) - -# cv2.imshow('detected circles', cimg) - -# cv2.imshow("image", img) # 显示图像 - -# cv2.waitKey(0) -# cv2.destroyAllWindows() - -# time.sleep(1) - -# img = cv2.imread("img/eye-2.png") -# ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB) -# channels = cv2.split(ycrcb) -# cv2.equalizeHist(channels[0], channels[0]) #输入通道、输出通道矩阵 -# cv2.merge(channels, ycrcb) #合并结果通道 -# cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR, img) -# # cv2.imshow("old image", cv2.imread("img/hist.png")) -# # cv2.imshow("image", img) - -# cv2.imwrite("img/eye-3.png", img) - -# time.sleep(1) - -# cv2.imshow('detected circles', cimg) - -cv2.waitKey(0) -cv2.destroyAllWindows() -import cv2 -import dlib -import numpy as np -import time - -classifier = cv2.CascadeClassifier( - "C:\Python36\Lib\site-packages\opencv-master\data\haarcascades\haarcascade_eye_tree_eyeglasses.xml" # haarcascade_eye -) - - -# 彩色直方图均衡化 -def hist(img): - ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB) - channels = cv2.split(ycrcb) - cv2.equalizeHist(channels[0], channels[0]) # 输入通道、输出通道矩阵 - cv2.merge(channels, ycrcb) # 合并结果通道 - cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR, img) - return img - - -def discern(img): - grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - faceRects = classifier.detectMultiScale( - grayImg, scaleFactor=1.2, minNeighbors=3, minSize=(30, 30)) - if len(faceRects): - for faceRect in faceRects: - x, y, w, h = faceRect - rightEyeImg = img[(y):(y + h), (x):(x + w)] - # cv2.rectangle(img, (x, y), (x + h, y + w), color, 2) - # rightEyeImg = hist(rightEyeImg) - rightEyeImg = cv2.GaussianBlur(rightEyeImg, (5, 5), 1) - - cv2.imwrite("img/temp.png", rightEyeImg) - # cv2.imshow("img", rightEyeImg) - # print(len(faceRects)) - - -# discern(cv2.imread("img/ag-2.png")) - -img = cv2.imread("img/temp.png", 0) - -# img = cv2.GaussianBlur(img, (3, 3), 0) -# img = cv2.Canny(img, 50, 150) -img = cv2.bilateralFilter(img, 7, 50, 50) - -# x = cv2.Sobel(img, -1, 1, 0, ksize=3) -# y = cv2.Sobel(img, -1, 0, 1, ksize=3) -# absx = cv2.convertScaleAbs(x) -# absy = cv2.convertScaleAbs(y) -# dist = cv2.addWeighted(absx, 0.5, absy, 0.5, 0) - -# img = cv2.GaussianBlur(img, (5, 5), 1) - -# laplacian = cv2.Laplacian(img, -1, ksize=3) - -# laplacian = cv2.GaussianBlur(laplacian, (3, 3), 1) -# laplacian = cv2.medianBlur(laplacian, 3) - -# img = dist - -# img = cv2.cvtColor(dist, cv2.COLOR_BGR2GRAY) - -cimg = cv2.cvtColor(img, cv2.COLOR_GRAY2BGR) # cv2.imread("img/temp.png") # - -# cv2.imshow("img2", img) -# cv2.imshow("grayimg", grayimg) - -circles = cv2.HoughCircles( - img, - cv2.HOUGH_GRADIENT, - 1, - 100, - param1=50, - param2=10, - minRadius=2, - maxRadius=0) - -circles = np.uint16(np.around(circles)) - -for i in circles[0, :]: - # draw the outer circle - # cv2.circle(cimg, (i[0], i[1]), i[2], (0, 255, 0), 1) - # draw the center of the circle - cv2.circle(cimg, (i[0], i[1]), 2, (0, 0, 255), 2) -cv2.imshow("img", cimg) - -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -#人脸识别类 - 使用face_recognition模块 -import cv2 -import face_recognition -import os - -path = "img/face_recognition" # 模型数据图片目录 -cap = cv2.VideoCapture(0) -total_image_name = [] -total_face_encoding = [] -for fn in os.listdir(path): # fn 表示的是文件名q - print(path + "/" + fn) - total_face_encoding.append( - face_recognition.face_encodings( - face_recognition.load_image_file(path + "/" + fn))[0]) - fn = fn[:(len(fn) - 4)] # 截取图片名(这里应该把images文件中的图片名命名为为人物名) - total_image_name.append(fn) # 图片名字列表 -while (1): - ret, frame = cap.read() - # 发现在视频帧所有的脸和face_enqcodings - face_locations = face_recognition.face_locations(frame) - face_encodings = face_recognition.face_encodings(frame, face_locations) - # 在这个视频帧中循环遍历每个人脸 - for (top, right, bottom, left), face_encoding in zip( - face_locations, face_encodings): - # 看看面部是否与已知人脸相匹配。 - for i, v in enumerate(total_face_encoding): - match = face_recognition.compare_faces( - [v], face_encoding, tolerance=0.5) - name = "Unknown" - if match[0]: - name = total_image_name[i] - break - # 画出一个框,框住脸 - cv2.rectangle(frame, (left, top), (right, bottom), (0, 0, 255), 2) - # 画出一个带名字的标签,放在框下 - cv2.rectangle(frame, (left, bottom - 35), (right, bottom), (0, 0, 255), - cv2.FILLED) - font = cv2.FONT_HERSHEY_DUPLEX - cv2.putText(frame, name, (left + 6, bottom - 6), font, 1.0, - (255, 255, 255), 1) - # 显示结果图像 - cv2.imshow('Video', frame) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - -cap.release() -cv2.destroyAllWindows() -# coding=utf-8 -# 数字化妆类 -import face_recognition -from PIL import Image, ImageDraw - -# 加载图片到numpy array -image = face_recognition.load_image_file("img/ag.png") - -# 标识脸部特征 -face_landmarks_list = face_recognition.face_landmarks(image) - -for face_landmarks in face_landmarks_list: - pil_image = Image.fromarray(image) - d = ImageDraw.Draw(pil_image, 'RGBA') - - # 绘制眉毛 - d.polygon(face_landmarks['left_eyebrow'], fill=(68, 54, 39, 128)) - d.polygon(face_landmarks['right_eyebrow'], fill=(68, 54, 39, 128)) - d.line(face_landmarks['left_eyebrow'], fill=(68, 54, 39, 150), width=5) - d.line(face_landmarks['right_eyebrow'], fill=(68, 54, 39, 150), width=5) - - # 绘制嘴唇 - d.polygon(face_landmarks['top_lip'], fill=(150, 0, 0, 128)) - d.polygon(face_landmarks['bottom_lip'], fill=(150, 0, 0, 128)) - d.line(face_landmarks['top_lip'], fill=(150, 0, 0, 64), width=8) - d.line(face_landmarks['bottom_lip'], fill=(150, 0, 0, 64), width=8) - - # 绘制眼睛 - d.polygon(face_landmarks['left_eye'], fill=(255, 255, 255, 30)) - d.polygon(face_landmarks['right_eye'], fill=(255, 255, 255, 30)) - - # 绘制眼线 - d.line( - face_landmarks['left_eye'] + [face_landmarks['left_eye'][0]], - fill=(0, 0, 0, 110), - width=6) - d.line( - face_landmarks['right_eye'] + [face_landmarks['right_eye'][0]], - fill=(0, 0, 0, 110), - width=6) - - pil_image.show() -# coding=utf-8 -# 绘制面部轮廓 -import face_recognition -from PIL import Image, ImageDraw - -# 将图片文件加载到numpy 数组中 -image = face_recognition.load_image_file("img/ag.png") - -# 查找图像中所有面部的所有面部特征 -face_landmarks_list = face_recognition.face_landmarks(image) - -for face_landmarks in face_landmarks_list: - facial_features = [ - 'chin', # 下巴 - 'left_eyebrow', # 左眉毛 - 'right_eyebrow', # 右眉毛 - 'nose_bridge', # 鼻樑 - 'nose_tip', # 鼻尖 - 'left_eye', # 左眼 - 'right_eye', # 右眼 - 'top_lip', # 上嘴唇 - 'bottom_lip' # 下嘴唇 - ] - pil_image = Image.fromarray(image) - d = ImageDraw.Draw(pil_image) - for facial_feature in facial_features: - d.line(face_landmarks[facial_feature], fill=(255, 255, 255), width=2) - pil_image.show() -# coding=utf-8 -import cv2 -import numpy -import dlib - -modelPath = "C:\Python36\Lib\site-packages\dlib-data\shape_predictor_68_face_landmarks.dat" -SCALE_FACTOR = 1 -FEATHER_AMOUNT = 11 - -FACE_POINTS = list(range(17, 68)) -MOUTH_POINTS = list(range(48, 61)) -RIGHT_BROW_POINTS = list(range(17, 22)) -LEFT_BROW_POINTS = list(range(22, 27)) -RIGHT_EYE_POINTS = list(range(36, 42)) -LEFT_EYE_POINTS = list(range(42, 48)) -NOSE_POINTS = list(range(27, 35)) -JAW_POINTS = list(range(0, 17)) - -ALIGN_POINTS = (LEFT_BROW_POINTS + RIGHT_EYE_POINTS + LEFT_EYE_POINTS + - RIGHT_BROW_POINTS + NOSE_POINTS + MOUTH_POINTS) - -OVERLAY_POINTS = [ - LEFT_EYE_POINTS + RIGHT_EYE_POINTS + LEFT_BROW_POINTS + RIGHT_BROW_POINTS, - NOSE_POINTS + MOUTH_POINTS, -] - -COLOUR_CORRECT_BLUR_FRAC = 0.6 - -detector = dlib.get_frontal_face_detector() -predictor = dlib.shape_predictor(modelPath) - - -class TooManyFaces(Exception): - pass - - -class NoFaces(Exception): - pass - - -def get_landmarks(im): - rects = detector(im, 1) - - if len(rects) > 1: - raise TooManyFaces - if len(rects) == 0: - raise NoFaces - - return numpy.matrix([[p.x, p.y] for p in predictor(im, rects[0]).parts()]) - - -def annotate_landmarks(im, landmarks): - im = im.copy() - for idx, point in enumerate(landmarks): - pos = (point[0, 0], point[0, 1]) - cv2.putText( - im, - str(idx), - pos, - fontFace=cv2.FONT_HERSHEY_SCRIPT_SIMPLEX, - fontScale=0.4, - color=(0, 0, 255)) - cv2.circle(im, pos, 3, color=(0, 255, 255)) - return im - - -def draw_convex_hull(im, points, color): - points = cv2.convexHull(points) - cv2.fillConvexPoly(im, points, color=color) - - -def get_face_mask(im, landmarks): - im = numpy.zeros(im.shape[:2], dtype=numpy.float64) - - for group in OVERLAY_POINTS: - draw_convex_hull(im, landmarks[group], color=1) - - im = numpy.array([im, im, im]).transpose((1, 2, 0)) - - im = (cv2.GaussianBlur(im, (FEATHER_AMOUNT, FEATHER_AMOUNT), 0) > 0) * 1.0 - im = cv2.GaussianBlur(im, (FEATHER_AMOUNT, FEATHER_AMOUNT), 0) - - return im - - -def transformation_from_points(points1, points2): - points1 = points1.astype(numpy.float64) - points2 = points2.astype(numpy.float64) - c1 = numpy.mean(points1, axis=0) - c2 = numpy.mean(points2, axis=0) - points1 -= c1 - points2 -= c2 - s1 = numpy.std(points1) - s2 = numpy.std(points2) - points1 /= s1 - points2 /= s2 - U, S, Vt = numpy.linalg.svd(points1.T * points2) - R = (U * Vt).T - return numpy.vstack([ - numpy.hstack(((s2 / s1) * R, c2.T - (s2 / s1) * R * c1.T)), - numpy.matrix([0., 0., 1.]) - ]) - - -def read_im_and_landmarks(fname): - im = cv2.imread(fname, cv2.IMREAD_COLOR) - im = cv2.resize(im, - (im.shape[1] * SCALE_FACTOR, im.shape[0] * SCALE_FACTOR)) - s = get_landmarks(im) - - return im, s - - -def warp_im(im, M, dshape): - output_im = numpy.zeros(dshape, dtype=im.dtype) - cv2.warpAffine( - im, - M[:2], (dshape[1], dshape[0]), - dst=output_im, - borderMode=cv2.BORDER_TRANSPARENT, - flags=cv2.WARP_INVERSE_MAP) - return output_im - - -def correct_colours(im1, im2, landmarks1): - blur_amount = COLOUR_CORRECT_BLUR_FRAC * numpy.linalg.norm( - numpy.mean(landmarks1[LEFT_EYE_POINTS], axis=0) - - numpy.mean(landmarks1[RIGHT_EYE_POINTS], axis=0)) - blur_amount = int(blur_amount) - if blur_amount % 2 == 0: - blur_amount += 1 - im1_blur = cv2.GaussianBlur(im1, (blur_amount, blur_amount), 0) - im2_blur = cv2.GaussianBlur(im2, (blur_amount, blur_amount), 0) - - im2_blur += (128 * (im2_blur <= 1.0)).astype(im2_blur.dtype) - - return (im2.astype(numpy.float64) * im1_blur.astype(numpy.float64) / - im2_blur.astype(numpy.float64)) - - -im1, landmarks1 = read_im_and_landmarks("img/ag-2.png") -im2, landmarks2 = read_im_and_landmarks("img/ag.png") - -M = transformation_from_points(landmarks1[ALIGN_POINTS], - landmarks2[ALIGN_POINTS]) - -mask = get_face_mask(im2, landmarks2) -warped_mask = warp_im(mask, M, im1.shape) -combined_mask = numpy.max( - [get_face_mask(im1, landmarks1), warped_mask], axis=0) - -warped_im2 = warp_im(im2, M, im1.shape) -warped_corrected_im2 = correct_colours(im1, warped_im2, landmarks1) - -output_im = im1 * (1.0 - combined_mask) + warped_corrected_im2 * combined_mask - -cv2.imwrite("img/faceswap.png", output_im) - -# cv2.waitKey(0) -# cv2.destroyAllWindows() -# coding=utf-8 -# 性别识别 - -import cv2 -from keras.models import load_model -import numpy as np -import chineseText - -img = cv2.imread("img/gather.png") -face_classifier = cv2.CascadeClassifier( - "C:\Python36\Lib\site-packages\opencv-master\data\haarcascades\haarcascade_frontalface_default.xml" -) -gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) -faces = face_classifier.detectMultiScale( - gray, scaleFactor=1.2, minNeighbors=3, minSize=(140, 140)) - -gender_classifier = load_model( - "classifier/gender_models/simple_CNN.81-0.96.hdf5") -gender_labels = {0: '女', 1: '男'} -color = (255, 255, 255) - -for (x, y, w, h) in faces: - face = img[(y - 60):(y + h + 60), (x - 30):(x + w + 30)] - face = cv2.resize(face, (48, 48)) - face = np.expand_dims(face, 0) - face = face / 255.0 - gender_label_arg = np.argmax(gender_classifier.predict(face)) - gender = gender_labels[gender_label_arg] - cv2.rectangle(img, (x, y), (x + h, y + w), color, 2) - img = chineseText.cv2ImgAddText(img, gender, x + h, y, color, 30) - -cv2.imshow("Image", img) -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -# 抠图 - -import numpy as np -import cv2 -from matplotlib import pyplot as plt - -img = cv2.imread('img/face_recognition/Gates.png') -mask = np.zeros(img.shape[:2], np.uint8) -bgdModel = np.zeros((1, 65), np.float64) -fgdModel = np.zeros((1, 65), np.float64) -rect = (0, 0, 505, 448) # 划定区域 -cv2.grabCut(img, mask, rect, bgdModel, fgdModel, 5, - cv2.GC_INIT_WITH_RECT) # 函数返回值为mask,bgdModel,fgdModel -mask2 = np.where((mask == 2) | (mask == 0), 0, 1).astype('uint8') # 0和2做背景 - -img = img * mask2[:, :, np.newaxis] # 使用蒙板来获取前景区域 - -cv2.imshow('p', img) - -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -# 文字识别类 -from PIL import Image -import pytesseract -import cv2 - -path = "img\\text-img.png" - -text = pytesseract.image_to_string(Image.open(path), lang='chi_sim') -print(text) - -img = cv2.imread(path) -cv2.imshow("Image", img) - -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -# 练习类 -import datetime -import time - -# 开始计时 -startTime = datetime.datetime.now() - -time.sleep(1) - -# 结束计时 -endTime = datetime.datetime.now() -print(endTime - startTime) -# 输出:0:00:01.000791 -# coding=utf-8 -# 版本号输出类 -import cv2 -import dlib -import face_recognition -import keras -import tensorflow - -print(cv2.__version__) # 输出:3.4.1 -print(dlib.__version__) # 输出:19.8.1 -print(face_recognition.__version__) # 输出:1.2.2 - -print(keras.__version__) # 输出:2.1.6 -print(tensorflow.VERSION) # 输出:1.8.0 -# coding=utf-8 -#视频人脸检测类 - Dlib版本 -import cv2 -import dlib - -detector = dlib.get_frontal_face_detector() # 使用默认的人类识别器模型 - - -def discern(img): - gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - dets = detector(gray, 1) - for face in dets: - left = face.left() - top = face.top() - right = face.right() - bottom = face.bottom() - cv2.rectangle(img, (left, top), (right, bottom), (0, 255, 0), 2) - cv2.imshow("image", img) - - -cap = cv2.VideoCapture(0) -while (1): - ret, img = cap.read() - discern(img) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - -cap.release() -cv2.destroyAllWindows() -# coding=utf-8 -#视频人脸检测类 - OpenCV版本 -import cv2 - - -# 图片识别方法 -def discern(img): - grayImg = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY) - # OpenCV人脸识别分类器 - classifier = cv2.CascadeClassifier( - "C:\Python36\Lib\site-packages\opencv-master\data\haarcascades\haarcascade_frontalface_default.xml" - ) - color = (0, 255, 0) # 定义绘制颜色 - # 调用识别人脸 - faceRects = classifier.detectMultiScale( - grayImg, scaleFactor=1.2, minNeighbors=3, minSize=(32, 32)) - if len(faceRects): # 大于0则检测到人脸 - for faceRect in faceRects: # 单独框出每一张人脸 - x, y, w, h = faceRect - # 框出人脸 - cv2.rectangle(img, (x, y), (x + h, y + w), color, 2) - - cv2.imshow("image", img) # 显示图像 - - -cap = cv2.VideoCapture(0) -while (1): - ret, frame = cap.read() - - # cv2.imshow('frame', gray) - discern(frame) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - -cap.release() -cv2.destroyAllWindows() -# coding=utf-8 -# 直方图 - -import cv2 -import numpy as np -from matplotlib import pyplot as plt - -plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 - -# ut = np.zeros(256, dtype=img.dtype) #创建空的查找表 -# hist = cv2.calcHist( -# [img], #计算图像的直方图 -# [0], #使用的通道 -# None, #没有使用mask -# [256], #it is a 1D histogram -# [0.0, 255.0]) - -# def calcAndDrawHist(image, color): -# hist = cv2.calcHist([image], [0], None, [256], [0.0, 255.0]) -# minVal, maxVal, minLoc, maxLoc = cv2.minMaxLoc(hist) -# histImg = np.zeros([256, 256, 3], np.uint8) -# hpt = int(0.9 * 256) - -# for h in range(256): -# intensity = int(hist[h] * hpt / maxVal) -# cv2.line(histImg, (h, 256), (h, 256 - intensity), color) - -# return histImg - -# img = cv2.imread("img/hist.png") -# b, g, r = cv2.split(img) - -# print(b) -# print(g) -# print(r) - -# histImgB = calcAndDrawHist(b, [255, 0, 0]) -# histImgG = calcAndDrawHist(g, [0, 255, 0]) -# histImgR = calcAndDrawHist(r, [0, 0, 255]) - -# cv2.imshow("histImgB", histImgB) -# cv2.imshow("histImgG", histImgG) -# cv2.imshow("histImgR", histImgR) - -# #灰色直方图均衡化 -# img = cv2.imread("img/hist.png", 0) -# equ = cv2.equalizeHist(img) -# cv2.imshow("old image", img) - -# cv2.imshow("image", equ) - -# 彩色直方图均衡化 -# img = cv2.imread("img/hist.png") -# ycrcb = cv2.cvtColor(img, cv2.COLOR_BGR2YCR_CB) -# channels = cv2.split(ycrcb) -# cv2.equalizeHist(channels[0], channels[0]) #输入通道、输出通道矩阵 -# cv2.merge(channels, ycrcb) #合并结果通道 -# cv2.cvtColor(ycrcb, cv2.COLOR_YCR_CB2BGR, img) -# cv2.imshow("old image", cv2.imread("img/hist.png")) -# cv2.imshow("image", img) - -# 绘制直方图 -# img = cv2.imread("img/hist.png") -# chans = cv2.split(img) -# colors = ("b", "g", "r") -# plt.figure() -# plt.title("直方图分布") -# plt.xlabel("颜色值") -# plt.ylabel("像素点") -# for (chan, color) in zip(chans, colors): -# hist = cv2.calcHist([chan], [0], None, [256], [0, 256]) -# plt.plot(hist, color=color) -# plt.xlim([0, 256]) -# plt.show() - -# #添加噪声 -# img = cv2.imread("img/black.png") - -# for k in range(0, 1000): -# xi = int(np.random.uniform(0, img.shape[1])) -# xj = int(np.random.uniform(0, img.shape[0])) -# if img.ndim == 2: -# img[xj, xi] = 255 -# elif img.ndim == 3: -# img[xj, xi, 0] = 255 -# img[xj, xi, 1] = 255 -# img[xj, xi, 2] = 255 -# cv2.imwrite("img/black-noise.png", img) -# cv2.imshow("image", img) - -# #滤波器 -img = cv2.imread("img/black-noise.png") - -dst = cv2.blur(img, (5, 5)) # 均值滤波 -gaussian = cv2.GaussianBlur(img, (5, 5), 1) # 高斯滤波 -median = cv2.medianBlur(img, 5) # 中值滤波 -cv2.imshow("image", gaussian) - -# #Sobel算子 —— 是一种带有方向性的滤波器 -# img = cv2.imread('img/ag.png', cv2.IMREAD_COLOR) -# x = cv2.Sobel( -# img, cv2.CV_16S, 1, 0 -# ) #cv2.CV_16S -- Sobel 函数求完导数后会有负值和大于255的值,而原图像是uint8(8位无符号数据),所以在建立图像时长度不够,会被截断,所以使用16位有符号数据 -# y = cv2.Sobel(img, cv2.CV_16S, 0, 1) -# absx = cv2.convertScaleAbs( -# x) #convertScaleAbs() -- 转回uint8形式,否则将无法显示图像,而只是一副灰色图像 -# absy = cv2.convertScaleAbs(y) -# dist = cv2.addWeighted(absx, 0.5, absy, 0.5, 0) #参数2:第1张图的权重;参数4:第2张图的权重 -# # cv2.imshow('y', absy) -# # cv2.imshow('x', absx) -# cv2.imshow('dsit', dist) -# cv2.imshow('img', img) - -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -# HSV转换(颜色提取) - -import cv2 -import numpy as np - -cap = cv2.VideoCapture(0) - -while (1): - _, frame = cap.read() - hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV) - - # 在PS里用取色器的HSV - psHSV = [112, 89, 52] - diff = 40 # 上下浮动值 - # 因为PS的HSV(HSB)取值是:0~360、0~1、0~1,而OpenCV的HSV是:0~180、0~255、0~255,所以要对ps的hsv进行处理,H/2、SV*255 - lowerHSV = [(psHSV[0] - diff) / 2, (psHSV[1] - diff) * 255 / 100, - (psHSV[2] - diff) * 255 / 100] - upperHSV = [(psHSV[0] + diff) / 2, (psHSV[1] + diff) * 255 / 100, - (psHSV[2] + diff) * 255 / 100] - - mask = cv2.inRange(hsv, np.array(lowerHSV), np.array(upperHSV)) - - # 使用位“与运算”提取颜色部分 - res = cv2.bitwise_and(frame, frame, mask=mask) - # 使用高斯模式优化图片 - res = cv2.GaussianBlur(res, (5, 5), 1) - - cv2.imshow('frame', frame) - # cv2.imshow('mask', mask) - cv2.imshow('res', res) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - -cv2.destroyAllWindows() -# coding=utf-8 -# 图片基础 -import cv2 -import numpy as np - -img = cv2.imread("img/ag.png") - -# shape = img.shape # 形状 (高,宽,3通道[彩色图]) -# size = img.size # 像素总数 -# dtype = img.dtype # uint8 图片类型 - -# roi = img[200:350, 300:330] # [y轴选取区域,x轴选取区域] -# img[0:150, 100:130] = roi -# cv2.imshow("image", img) - -b, g, r = cv2.split(img) # 分割通道 -img = cv2.merge((b, g, r)) # 合并通道 - -img = img[:, :, 0] - -cv2.imshow("image", img) - -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -# 图片修复 - -import cv2 -import numpy as np - -path = "img/inpaint.png" - -img = cv2.imread(path) -hight, width, depth = img.shape[0:3] - -# 图片二值化处理,把[240, 240, 240]~[255, 255, 255]以外的颜色变成0 -thresh = cv2.inRange(img, np.array([240, 240, 240]), np.array([255, 255, 255])) - -# 创建形状和尺寸的结构元素 -kernel = np.ones((3, 3), np.uint8) - -# 扩张待修复区域 -hi_mask = cv2.dilate(thresh, kernel, iterations=1) -specular = cv2.inpaint(img, hi_mask, 5, flags=cv2.INPAINT_TELEA) - -cv2.namedWindow("Image", 0) -cv2.resizeWindow("Image", int(width / 2), int(hight / 2)) -cv2.imshow("Image", img) - -cv2.namedWindow("newImage", 0) -cv2.resizeWindow("newImage", int(width / 2), int(hight / 2)) -cv2.imshow("newImage", specular) -cv2.waitKey(0) -cv2.destroyAllWindows() -# coding=utf-8 -# 鼠标绘图 - -import cv2 -import numpy as np - -# **************** 1 **************** -# for i in dir(cv2): -# if 'EVENT' in i: -# print(i) -''' -EVENT_FLAG_ALTKEY #按住alt键 -EVENT_FLAG_CTRLKEY #按住ctrl键 -EVENT_FLAG_LBUTTON #按住鼠标左键 -EVENT_FLAG_MBUTTON #按住右键点击左键 -EVENT_FLAG_RBUTTON #按住鼠标右键 -EVENT_FLAG_SHIFTKEY #按住shift键 -EVENT_LBUTTONDBLCLK #左键双击 -EVENT_LBUTTONDOWN #左键按下 -EVENT_LBUTTONUP #左键抬起 -EVENT_MBUTTONDBLCLK #滚轮双击 -EVENT_MBUTTONDOWN #滚轮按下 -EVENT_MBUTTONUP #滚轮抬起 -EVENT_MOUSEMOVE #鼠标移动 -EVENT_MOUSEWHEEL #鼠标滚轮滚动 -EVENT_RBUTTONDBLCLK #右键双击 -EVENT_RBUTTONDOWN #右键按下 -EVENT_RBUTTONUP #右键抬起 -''' - -# # **************** 2 **************** -# def draw_circle(event, x, y, flags, param): -# if event == cv2.EVENT_MBUTTONDOWN: -# cv2.circle(img, (x, y), 20, (255, 0, 0), -1) - -# img = np.zeros((512, 512, 3), np.uint8) -# cv2.namedWindow('image') -# cv2.setMouseCallback('image', draw_circle) - -# while (1): -# cv2.imshow('image', img) -# if cv2.waitKey(1) & 0xFF == ord('q'): -# break - -# **************** 3 happy的自由绘图 **************** -drawing = False - - -def drawDef(event, x, y, flags, param): - global drawing - - if event == cv2.EVENT_LBUTTONDOWN: - drawing = True - if event == cv2.EVENT_LBUTTONUP: - drawing = False - - if event == cv2.EVENT_MOUSEMOVE and drawing == True: - cv2.circle(img, (x, y), 10, (255, 0, 0), -1) - - -img = np.zeros((512, 512, 3), np.uint8) -cv2.namedWindow('image') -cv2.setMouseCallback('image', drawDef) - -while (1): - cv2.imshow('image', img) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - -cv2.destroyAllWindows() -# coding=utf-8 -# 调色板 -import cv2 -import numpy as np - -img = np.zeros((300, 512, 3), np.uint8) -cv2.namedWindow('image') - - -def callback(x): - pass - - -# 参数1:名称;参数2:作用窗口,参数3、4:最小值和最大值;参数5:值更改回调方法 -cv2.createTrackbar('R', 'image', 0, 255, callback) -cv2.createTrackbar('G', 'image', 0, 255, callback) -cv2.createTrackbar('B', 'image', 0, 255, callback) - -while (1): - cv2.imshow('image', img) - if cv2.waitKey(1) & 0xFF == ord('q'): - break - - r = cv2.getTrackbarPos('R', 'image') - g = cv2.getTrackbarPos('G', 'image') - b = cv2.getTrackbarPos('B', 'image') - - img[:] = [b, g, r] - -cv2.destroyAllWindows() -from __future__ import absolute_import -from setuptools import setup, find_packages -from io import open - -# Get the long description from the README file -with open('README.md', encoding='utf-8') as f: - long_description = f.read() - -setup( - name='mmdnn', - - # Versions should comply with PEP440. For a discussion on single-sourcing - # the version across setup.py and the project code, see - # https://packaging.python.org/en/latest/single_source_version.html - version='0.2.4', - - description='Deep learning model converter, visualization and editor.', - long_description=long_description, - long_description_content_type='text/markdown', - - # The project's main homepage. - url='https://github.com/Microsoft/MMdnn', - - # Author details - author='System Research Group, Microsoft Research Asia', - author_email='mmdnn_feedback@microsoft.com', - - # Choose your license - license='MIT', - - # See https://pypi.python.org/pypi?%3Aaction=list_classifiers - classifiers=[ - # How mature is this project? Common values are - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable - 'Development Status :: 3 - Alpha', - - # Indicate who your project is intended for - 'Intended Audience :: Developers', - 'Intended Audience :: Education', - 'Intended Audience :: Science/Research', - 'Topic :: Scientific/Engineering :: Mathematics', - 'Topic :: Software Development :: Libraries :: Python Modules', - 'Topic :: Software Development :: Libraries', - - # Pick your license as you wish (should match "license" above) - 'License :: OSI Approved :: MIT License', - - # Specify the Python versions you support here. In particular, ensure - # that you indicate whether you support Python 2, Python 3 or both. - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 3' - ], - - # What does your project relate to? - keywords='deep learning model converter visualization', - - # You can just specify the packages manually here if your project is - # simple. Or you can use find_packages(). - packages=find_packages(), - - package_data={ - 'mmdnn': ['visualization/public/*', - 'visualization/*.json', - 'visualization/*.js', - 'visualization/*.html', - 'visualization/*.css'] - }, - - # Alternatively, if you want to distribute just a my_module.py, uncomment - # this: - # py_modules=["my_module"], - - # List run-time dependencies here. These will be installed by pip when - # your project is installed. For an analysis of "install_requires" vs pip's - # requirements files see: - # https://packaging.python.org/en/latest/requirements.html - install_requires=[ - 'numpy >= 1.15.0', - 'protobuf >= 3.6.0', - 'six >= 1.10.0', - 'pillow >= 3.1.0', - ], - - # To provide executable scripts, use entry points in preference to the - # "scripts" keyword. Entry points provide cross-platform support and allow - # pip to create the appropriate form of executable for the target platform. - entry_points={ - 'console_scripts': [ - 'mmconvert = mmdnn.conversion._script.convert:_main', - 'mmdownload = mmdnn.conversion._script.extractModel:_main', - 'mmvismeta = mmdnn.conversion.examples.tensorflow.vis_meta:_main', - 'mmtoir = mmdnn.conversion._script.convertToIR:_main', - 'mmtocode = mmdnn.conversion._script.IRToCode:_main', - 'mmtomodel = mmdnn.conversion._script.dump_code:_main', - ], - }, -) -''' -Send JPEG image to tensorflow_model_server loaded with GAN model. - -Hint: the code has been compiled together with TensorFlow serving -and not locally. The client is called in the TensorFlow Docker container -''' - -from __future__ import print_function - -# Communication to TensorFlow server via gRPC -from grpc.beta import implementations -import tensorflow as tf - -# TensorFlow serving stuff to send messages -from tensorflow_serving.apis import predict_pb2 -from tensorflow_serving.apis import prediction_service_pb2 - - -# Command line arguments -tf.app.flags.DEFINE_string('server', 'localhost:9000', - 'PredictionService host:port') -tf.app.flags.DEFINE_string('image', '', 'path to image in JPEG format') -FLAGS = tf.app.flags.FLAGS - - -def main(_): - host, port = FLAGS.server.split(':') - channel = implementations.insecure_channel(host, int(port)) - stub = prediction_service_pb2.beta_create_PredictionService_stub(channel) - # Send request - image = tf.gfile.FastGFile(FLAGS.image, 'rb').read() - request = predict_pb2.PredictRequest() - request.model_spec.name = 'tensorflow-serving' - request.model_spec.signature_name = tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY - request.inputs['image'].CopyFrom(tf.contrib.util.make_tensor_proto(image)) - # request.inputs['input'].CopyFrom() - - result = stub.Predict(request, 10.0) # 10 secs timeout - print(result) - - -if __name__ == '__main__': - tf.app.run() -#!/usr/bin/python -""" -To have a single pip command that uses the specific requirements file use this -in a shell script for posix OS:: - - pip install -r $(select_requirements.py) - -On windows, create a bat of cmd file that loads the windows-specific -requirements directly:: - - for /f %%i in ('python select_requirements.py') do (set req_file="%%i") - pip install -r %req_file% -""" - -from __future__ import print_function - -import os -import platform -import struct -import sys - -# major python major_python_versions as python2 and python3 -major_python_versions = tuple(map(str, platform.python_version_tuple())) -python2 = major_python_versions[0] == '2' -python3 = major_python_versions[0] == '3' - - -# operating system -sys_platform = str(sys.platform).lower() -linux = 'linux' in sys_platform -windows = 'win32' in sys_platform -cygwin = 'cygwin' in sys_platform -solaris = 'sunos' in sys_platform -macosx = 'darwin' in sys_platform -posix = 'posix' in os.name.lower() - - -def select_requirements_file(): - """ - Print the path to a requirements file based on some os/arch condition. - """ - if windows: - print('requirements/win.txt') - elif macosx: - print('requirements/mac.txt') - elif linux: - if python2: - print('requirements/linux-py2.txt') - elif python3: - print('requirements/linux-py3.txt') - elif cygwin: - print('requirements/cygwin.txt') - else: - raise Exception('Unsupported OS/platform') - - -if __name__ == '__main__': - select_requirements_file() -from __future__ import absolute_import -from __future__ import print_function - -from utils import * -import utils -from mmdnn.conversion.examples.imagenet_test import TestKit -import numpy as np -import imp -import sys -import os -TEST_ONNX = os.environ.get('TEST_ONNX') - - -def is_paddle_supported(): - if (sys.version_info > (2, 7)): - print('PaddlePaddle does not support Python {0}'.format( - sys.version), file=sys.stderr) - return False - - return True - - -def is_coreml_supported(): - import sys - if sys.platform == 'darwin': - import platform - ver_str = platform.mac_ver()[0] - if (tuple([int(v) for v in ver_str.split('.')]) >= (10, 13)): - return True - - print('CoreML is not supported on your platform.', file=sys.stderr) - return False - - -def check_env(source_framework, target_framework, model_name): - if ((source_framework == 'paddle') or (target_framework == 'paddle')): - if not is_paddle_supported(): - return False - - if ((source_framework == 'coreml') or (target_framework == 'coreml')): - if not is_coreml_supported(): - return False - - return True - - -class TestModels(CorrectnessTest): - - image_path = "mmdnn/conversion/examples/data/seagull.jpg" - cachedir = "tests/cache/" - tmpdir = "tests/tmp/" - sentence_path = "mmdnn/conversion/examples/data/one_imdb.npy" - vocab_size = 30000 - - def __init__(self, test_table=None, methodName='test_nothing'): - super(TestModels, self).__init__(methodName) - if test_table: - print("Reset the test_table!", file=sys.stderr) - self.test_table = test_table - - @staticmethod - def tensorflow_parse(architecture_name, test_input_path): - from mmdnn.conversion.examples.tensorflow.extractor import tensorflow_extractor - from mmdnn.conversion.tensorflow.tensorflow_parser import TensorflowParser - - # get original model prediction result - original_predict = tensorflow_extractor.inference( - architecture_name, None, TestModels.cachedir, test_input_path) - del tensorflow_extractor - - # original to IR - IR_file = TestModels.tmpdir + 'tensorflow_' + architecture_name + "_converted" - parser = TensorflowParser( - TestModels.cachedir + "imagenet_" + architecture_name + ".ckpt.meta", - TestModels.cachedir + "imagenet_" + architecture_name + ".ckpt", - ["MMdnn_Output"]) - parser.run(IR_file) - del parser - del TensorflowParser - - return original_predict - - @staticmethod - def tensorflow_frozen_parse(architecture_name, test_input_path): - from mmdnn.conversion.examples.tensorflow.extractor import tensorflow_extractor - from mmdnn.conversion.tensorflow.tensorflow_frozenparser import TensorflowParser2 - - # get original model prediction result - original_predict = tensorflow_extractor.inference( - architecture_name, None, TestModels.cachedir, test_input_path, is_frozen=True) - para = tensorflow_extractor.get_frozen_para(architecture_name) - del tensorflow_extractor - - # original to IR - IR_file = TestModels.tmpdir + 'tensorflow_frozen_' + \ - architecture_name + "_converted" - parser = TensorflowParser2( - TestModels.cachedir + para[0], para[1], para[2], para[3]) - parser.run(IR_file) - del parser - del TensorflowParser2 - - return original_predict - - @staticmethod - def keras_parse(architecture_name, test_input_path): - from mmdnn.conversion.examples.keras.extractor import keras_extractor - from mmdnn.conversion.keras.keras2_parser import Keras2Parser - - # download model - model_filename = keras_extractor.download( - architecture_name, TestModels.cachedir) - - # get original model prediction result - original_predict = keras_extractor.inference( - architecture_name, model_filename, TestModels.cachedir, test_input_path) - # print(original_predict) - del keras_extractor - - # original to IR - IR_file = TestModels.tmpdir + 'keras_' + architecture_name + "_converted" - parser = Keras2Parser(model_filename) - parser.run(IR_file) - del parser - del Keras2Parser - return original_predict - - @staticmethod - def mxnet_parse(architecture_name, test_input_path): - from mmdnn.conversion.examples.mxnet.extractor import mxnet_extractor - from mmdnn.conversion.mxnet.mxnet_parser import MXNetParser - - # download model - architecture_file, weight_file = mxnet_extractor.download( - architecture_name, TestModels.cachedir) - - # get original model prediction result - original_predict = mxnet_extractor.inference( - architecture_name, None, TestModels.cachedir, test_input_path) - del mxnet_extractor - - # original to IR - import re - if re.search('.', weight_file): - weight_file = weight_file[:-7] - prefix, epoch = weight_file.rsplit('-', 1) - model = (architecture_file, prefix, epoch, [3, 224, 224]) - - IR_file = TestModels.tmpdir + 'mxnet_' + architecture_name + "_converted" - parser = MXNetParser(model) - parser.run(IR_file) - del parser - del MXNetParser - - return original_predict - - @staticmethod - def caffe_parse(architecture_name, test_input_path): - from mmdnn.conversion.examples.caffe.extractor import caffe_extractor - - # download model - architecture_file, weight_file = caffe_extractor.download( - architecture_name, TestModels.cachedir) - - # get original model prediction result - original_predict = caffe_extractor.inference( - architecture_name, (architecture_file, weight_file), TestModels.cachedir, test_input_path) - del caffe_extractor - - # original to IR - from mmdnn.conversion.caffe.transformer import CaffeTransformer - transformer = CaffeTransformer( - architecture_file, weight_file, "tensorflow", None, phase='TEST') - graph = transformer.transform_graph() - data = transformer.transform_data() - del CaffeTransformer - - from mmdnn.conversion.caffe.writer import ModelSaver, PyWriter - - prototxt = graph.as_graph_def().SerializeToString() - IR_file = TestModels.tmpdir + 'caffe_' + architecture_name + "_converted" - pb_path = IR_file + '.pb' - with open(pb_path, 'wb') as of: - of.write(prototxt) - print("IR network structure is saved as [{}].".format(pb_path)) - - import numpy as np - npy_path = IR_file + '.npy' - with open(npy_path, 'wb') as of: - np.save(of, data) - print("IR weights are saved as [{}].".format(npy_path)) - - if original_predict.ndim == 3: - original_predict = np.transpose(original_predict, (1, 2, 0)) - - return original_predict - - @staticmethod - def cntk_parse(architecture_name, test_input_path): - from mmdnn.conversion.examples.cntk.extractor import cntk_extractor - from mmdnn.conversion.cntk.cntk_parser import CntkParser - # download model - architecture_file = cntk_extractor.download( - architecture_name, TestModels.cachedir) - - # get original model prediction result - original_predict = cntk_extractor.inference( - architecture_name, architecture_file, test_input_path) - del cntk_extractor - - # original to IR - IR_file = TestModels.tmpdir + 'cntk_' + architecture_name + "_converted" - parser = CntkParser(architecture_file) - parser.run(IR_file) - del parser - del CntkParser - return original_predict - - @staticmethod - def coreml_parse(architecture_name, test_input_path): - from mmdnn.conversion.examples.coreml.extractor import coreml_extractor - from mmdnn.conversion.coreml.coreml_parser import CoremlParser - - # download model - architecture_file = coreml_extractor.download( - architecture_name, TestModels.cachedir) - - # get original model prediction result - original_predict = coreml_extractor.inference( - architecture_name, architecture_file, test_input_path) - del coreml_extractor - - # original to IR - IR_file = TestModels.tmpdir + 'coreml_' + architecture_name + "_converted" - parser = CoremlParser(architecture_file) - parser.run(IR_file) - del parser - del CoremlParser - return original_predict - - @staticmethod - def paddle_parse(architecture_name, test_input_path): - from mmdnn.conversion.examples.paddle.extractor import paddle_extractor - from mmdnn.conversion.paddle.paddle_parser import PaddleParser - - # download model - model_filename = paddle_extractor.download( - architecture_name, TestModels.cachedir) - - # get original model prediction result - original_predict = paddle_extractor.inference( - architecture_name, model_filename, TestModels.cachedir, test_input_path) - del paddle_extractor - - # original to IR - IR_file = TestModels.tmpdir + 'paddle_' + architecture_name + "_converted" - - parser = PaddleParser(model_filename) - parser.run(IR_file) - del parser - del PaddleParser - return original_predict - - @staticmethod - def pytorch_parse(architecture_name, test_input_path): - from mmdnn.conversion.examples.pytorch.extractor import pytorch_extractor - from mmdnn.conversion.pytorch.pytorch_parser import PytorchParser - - # download model - architecture_file = pytorch_extractor.download( - architecture_name, TestModels.cachedir) - - # get original model prediction result - original_predict = pytorch_extractor.inference( - architecture_name, architecture_file, test_input_path) - del pytorch_extractor - - # get shape - func = TestKit.preprocess_func['pytorch'][architecture_name] - - import inspect - funcstr = inspect.getsource(func) - - pytorch_pre = funcstr.split('(')[0].split('.')[-1] - - if len(funcstr.split(',')) == 3: - size = int(funcstr.split('path,')[1].split(')')[0]) - - elif len(funcstr.split(',')) == 4: - size = int(funcstr.split('path,')[1].split(',')[0]) - - elif len(funcstr.split(',')) == 11: - size = int(funcstr.split('path,')[1].split(',')[0]) - - # original to IR - IR_file = TestModels.tmpdir + 'pytorch_' + architecture_name + "_converted" - parser = PytorchParser(architecture_file, [3, size, size]) - parser.run(IR_file) - del parser - del PytorchParser - return original_predict - - @staticmethod - def darknet_parse(architecture_name, test_input_path): - ensure_dir("./data/") - from mmdnn.conversion.examples.darknet.extractor import darknet_extractor - from mmdnn.conversion.darknet.darknet_parser import DarknetParser - # download model - architecture_file = darknet_extractor.download( - architecture_name, TestModels.cachedir) - - # get original model prediction result - original_predict = darknet_extractor.inference( - architecture_name, architecture_file, TestModels.cachedir, test_input_path) - del darknet_extractor - - # original to IR - IR_file = TestModels.tmpdir + 'darknet_' + architecture_name + "_converted" - - if architecture_name == "yolov3": - start = "1" - else: - start = "0" - - parser = DarknetParser( - architecture_file[0], architecture_file[1], start) - parser.run(IR_file) - del parser - del DarknetParser - return original_predict - - @staticmethod - def cntk_emit(original_framework, architecture_name, architecture_path, weight_path, test_input_path): - from mmdnn.conversion.cntk.cntk_emitter import CntkEmitter - - # IR to code - converted_file = TestModels.tmpdir + original_framework + \ - '_cntk_' + architecture_name + "_converted" - converted_file = converted_file.replace('.', '_') - emitter = CntkEmitter((architecture_path, weight_path)) - emitter.run(converted_file + '.py', None, 'test') - del emitter - del CntkEmitter - - model_converted = imp.load_source( - 'CntkModel', converted_file + '.py').KitModel(weight_path) - - if 'rnn' not in architecture_name: - func = TestKit.preprocess_func[original_framework][architecture_name] - img = func(test_input_path) - input_data = img - else: - sentence = np.load(test_input_path) - from keras.utils import to_categorical - input_data = to_categorical(sentence, 30000)[0] - - predict = model_converted.eval( - {model_converted.arguments[0]: [input_data]}) - converted_predict = np.squeeze(predict) - del model_converted - del sys.modules['CntkModel'] - os.remove(converted_file + '.py') - - return converted_predict - - @staticmethod - def tensorflow_emit(original_framework, architecture_name, architecture_path, weight_path, test_input_path): - import tensorflow as tf - from mmdnn.conversion.tensorflow.tensorflow_emitter import TensorflowEmitter - # IR to code - converted_file = TestModels.tmpdir + original_framework + \ - '_tensorflow_' + architecture_name + "_converted" - converted_file = converted_file.replace('.', '_') - - emitter = TensorflowEmitter((architecture_path, weight_path)) - emitter.run(converted_file + '.py', None, 'test') - del emitter - del TensorflowEmitter - - # import converted model - model_converted = imp.load_source( - 'TFModel', converted_file + '.py').KitModel(weight_path) - - input_tf, model_tf = model_converted - - original_framework = checkfrozen(original_framework) - - if 'rnn' not in architecture_name: - func = TestKit.preprocess_func[original_framework][architecture_name] - img = func(test_input_path) - input_data = np.expand_dims(img, 0) - else: - input_data = np.load(test_input_path) - - with tf.Session() as sess: - init = tf.global_variables_initializer() - sess.run(init) - predict = sess.run(model_tf, feed_dict={input_tf: input_data}) - del model_converted - del sys.modules['TFModel'] - os.remove(converted_file + '.py') - converted_predict = np.squeeze(predict) - - del tf - - return converted_predict - - @staticmethod - def pytorch_emit(original_framework, architecture_name, architecture_path, weight_path, test_input_path): - from mmdnn.conversion.pytorch.pytorch_emitter import PytorchEmitter - - # IR to code - converted_file = TestModels.tmpdir + original_framework + \ - '_pytorch_' + architecture_name + "_converted" - converted_file = converted_file.replace('.', '_') - emitter = PytorchEmitter((architecture_path, weight_path)) - emitter.run(converted_file + '.py', converted_file + '.npy', 'test') - del emitter - del PytorchEmitter - - # import converted model - import torch - model_converted = imp.load_source( - 'PytorchModel', converted_file + '.py').KitModel(converted_file + '.npy') - - model_converted.eval() - - original_framework = checkfrozen(original_framework) - if 'rnn' not in architecture_name: - func = TestKit.preprocess_func[original_framework][architecture_name] - img = func(test_input_path) - img = np.transpose(img, (2, 0, 1)) - img = np.expand_dims(img, 0).copy() - input_data = torch.from_numpy(img) - input_data = torch.autograd.Variable( - input_data, requires_grad=False) - else: - sentence = np.load(test_input_path) - input_data = torch.from_numpy(sentence) - input_data = torch.autograd.Variable( - input_data, requires_grad=False) - - predict = model_converted(input_data) - predict = predict.data.numpy() - converted_predict = np.squeeze(predict) - - del model_converted - del sys.modules['PytorchModel'] - del torch - os.remove(converted_file + '.py') - os.remove(converted_file + '.npy') - - return converted_predict - - @staticmethod - def keras_emit(original_framework, architecture_name, architecture_path, weight_path, test_input_path): - from mmdnn.conversion.keras.keras2_emitter import Keras2Emitter - - # IR to code - converted_file = TestModels.tmpdir + original_framework + \ - '_keras_' + architecture_name + "_converted" - converted_file = converted_file.replace('.', '_') - emitter = Keras2Emitter((architecture_path, weight_path)) - emitter.run(converted_file + '.py', None, 'test') - del emitter - del Keras2Emitter - - # import converted model - model_converted = imp.load_source( - 'KerasModel', converted_file + '.py').KitModel(weight_path) - - original_framework = checkfrozen(original_framework) - if 'rnn' not in architecture_name: - func = TestKit.preprocess_func[original_framework][architecture_name] - img = func(test_input_path) - input_data = np.expand_dims(img, 0) - else: - input_data = np.load(test_input_path) - - predict = model_converted.predict(input_data) - - if original_framework == "darknet": - converted_predict = None - else: - converted_predict = np.squeeze(predict) - - del model_converted - del sys.modules['KerasModel'] - - import keras.backend as K - K.clear_session() - - os.remove(converted_file + '.py') - - return converted_predict - - @staticmethod - def mxnet_emit(original_framework, architecture_name, architecture_path, weight_path, test_input_path): - from mmdnn.conversion.mxnet.mxnet_emitter import MXNetEmitter - from collections import namedtuple - Batch = namedtuple('Batch', ['data']) - - import mxnet - - # IR to code - converted_file = TestModels.tmpdir + original_framework + \ - '_mxnet_' + architecture_name + "_converted" - converted_file = converted_file.replace('.', '_') - output_weights_file = converted_file + "-0000.params" - emitter = MXNetEmitter( - (architecture_path, weight_path, output_weights_file)) - emitter.run(converted_file + '.py', None, 'test') - del emitter - del MXNetEmitter - - # import converted model - imported = imp.load_source('MXNetModel', converted_file + '.py') - - model_converted = imported.RefactorModel() - model_converted = imported.deploy_weight( - model_converted, output_weights_file) - - original_framework = checkfrozen(original_framework) - if 'rnn' not in architecture_name: - func = TestKit.preprocess_func[original_framework][architecture_name] - img = func(test_input_path) - img = np.transpose(img, (2, 0, 1)) - input_data = np.expand_dims(img, 0) - else: - input_data = np.load(test_input_path) - - model_converted.forward(Batch([mxnet.nd.array(input_data)])) - predict = model_converted.get_outputs()[0].asnumpy() - converted_predict = np.squeeze(predict) - - del model_converted - del sys.modules['MXNetModel'] - del mxnet - - os.remove(converted_file + '.py') - os.remove(output_weights_file) - - return converted_predict - - @staticmethod - def caffe_emit(original_framework, architecture_name, architecture_path, weight_path, test_input_path): - try: - import caffe - from mmdnn.conversion.caffe.caffe_emitter import CaffeEmitter - - # IR to code - converted_file = TestModels.tmpdir + original_framework + \ - '_caffe_' + architecture_name + "_converted" - converted_file = converted_file.replace('.', '_') - emitter = CaffeEmitter((architecture_path, weight_path)) - emitter.run(converted_file + '.py', - converted_file + '.npy', 'test') - del emitter - del CaffeEmitter - - # import converted model - imported = imp.load_source('CaffeModel', converted_file + '.py') - - imported.make_net(converted_file + '.prototxt') - imported.gen_weight(converted_file + '.npy', converted_file + - '.caffemodel', converted_file + '.prototxt') - model_converted = caffe.Net( - converted_file + '.prototxt', converted_file + '.caffemodel', caffe.TEST) - - original_framework = checkfrozen(original_framework) - func = TestKit.preprocess_func[original_framework][architecture_name] - img = func(test_input_path) - img = np.transpose(img, [2, 0, 1]) - input_data = np.expand_dims(img, 0) - - model_converted.blobs[model_converted.inputs[0] - ].data[...] = input_data - predict = model_converted.forward()[model_converted.outputs[-1]] - converted_predict = np.squeeze(predict) - - del model_converted - del sys.modules['CaffeModel'] - del caffe - os.remove(converted_file + '.py') - os.remove(converted_file + '.npy') - os.remove(converted_file + '.prototxt') - os.remove(converted_file + '.caffemodel') - - return converted_predict - - except ImportError: - print("Cannot import Caffe. Caffe Emit is not tested.") - return None - - @staticmethod - def coreml_emit(original_framework, architecture_name, architecture_path, weight_path, test_input_path): - from mmdnn.conversion.coreml.coreml_emitter import CoreMLEmitter - from coremltools.models import MLModel - import coremltools - from PIL import Image - - def prep_for_coreml(prename, BGRTranspose): - # The list is in RGB oder - if prename == 'Standard': - return 0.00784313725490196, -1, -1, -1 - elif prename == 'ZeroCenter': - return 1, -123.68, -116.779, -103.939 - elif prename == 'Identity': - return 1, 1, 1, 1 - else: - raise ValueError() - - # IR to Model - # converted_file = original_framework + '_coreml_' + architecture_name + "_converted" - # converted_file = converted_file.replace('.', '_') - - original_framework = checkfrozen(original_framework) - func = TestKit.preprocess_func[original_framework][architecture_name] - - import inspect - funcstr = inspect.getsource(func) - - coreml_pre = funcstr.split('(')[0].split('.')[-1] - - if len(funcstr.split(',')) == 3: - BGRTranspose = bool(0) - size = int(funcstr.split('path,')[1].split(')')[0]) - prep_list = prep_for_coreml(coreml_pre, BGRTranspose) - elif len(funcstr.split(',')) == 4: - BGRTranspose = funcstr.split( - ',')[-2].split(')')[0].strip() == str(True) - size = int(funcstr.split('path,')[1].split(',')[0]) - prep_list = prep_for_coreml(coreml_pre, BGRTranspose) - - elif len(funcstr.split(',')) == 11: - BGRTranspose = funcstr.split( - ',')[-2].split(')')[0].strip() == str(True) - - size = int(funcstr.split('path,')[1].split(',')[0]) - prep_list = (float(funcstr.split(',')[2]), - float(funcstr.split(',')[3].split('[')[-1]), - float(funcstr.split(',')[4]), - float(funcstr.split(',')[5].split(']')[0]) - ) - - emitter = CoreMLEmitter(architecture_path, weight_path) - - model, input_name, output_name = emitter.gen_model( - input_names=None, - output_names=None, - image_input_names=test_input_path, - is_bgr=BGRTranspose, - red_bias=prep_list[1], - green_bias=prep_list[2], - blue_bias=prep_list[3], - gray_bias=0.0, - image_scale=prep_list[0], - class_labels=None, - predicted_feature_name=None, - predicted_probabilities_output='' - ) - - input_name = str(input_name[0][0]) - output_name = str(output_name[0][0]) - - # load model - model = MLModel(model) - - # save model - # coremltools.utils.save_spec(model.get_spec(), converted_file) - - if not is_coreml_supported(): - return None - else: - - from PIL import Image as pil_image - img = pil_image.open(test_input_path) - img = img.resize((size, size)) - - # inference - - coreml_input = {input_name: img} - coreml_output = model.predict(coreml_input) - prob = coreml_output[output_name] - prob = np.array(prob).squeeze() - - return prob - - @staticmethod - def onnx_emit(original_framework, architecture_name, architecture_path, weight_path, test_input_path): - try: - from mmdnn.conversion.onnx.onnx_emitter import OnnxEmitter - - # IR to code - converted_file = TestModels.tmpdir + original_framework + \ - '_onnx_' + architecture_name + "_converted" - converted_file = converted_file.replace('.', '_') - emitter = OnnxEmitter(architecture_path, weight_path) - emitter.run(converted_file + '.py', - converted_file + '.npy', 'test') - del emitter - del OnnxEmitter - - # import converted model - from onnx_tf.backend import prepare - model_converted = imp.load_source( - 'OnnxModel', converted_file + '.py').KitModel(converted_file + '.npy') - - tf_rep = prepare(model_converted) - - original_framework = checkfrozen(original_framework) - func = TestKit.preprocess_func[original_framework][architecture_name] - img = func(test_input_path) - input_data = np.expand_dims(img, 0) - - predict = tf_rep.run(input_data)[0] - - del prepare - del model_converted - del tf_rep - del sys.modules['OnnxModel'] - - os.remove(converted_file + '.py') - os.remove(converted_file + '.npy') - - return predict - - except ImportError: - print( - 'Please install Onnx! Or Onnx is not supported in your platform.', file=sys.stderr) - - # In case of odd number add the extra padding at the end for SAME_UPPER(eg. pads:[0, 2, 2, 0, 0, 3, 3, 0]) and at the beginning for SAME_LOWER(eg. pads:[0, 3, 3, 0, 0, 2, 2, 0]) - - exception_tabel = { - # Cntk Padding is SAME_LOWER, but Keras Padding is SAME_UPPER, in first convolution layer. - 'cntk_keras_resnet18', - # Cntk Padding is SAME_LOWER, but Keras Padding is SAME_UPPER, in first convolution layer. - 'cntk_keras_resnet152', - # Cntk Padding is SAME_LOWER, but Keras Padding is SAME_UPPER, in first convolution layer. - 'cntk_tensorflow_resnet18', - # Cntk Padding is SAME_LOWER, but Keras Padding is SAME_UPPER, in first convolution layer. - 'cntk_tensorflow_resnet152', - # Cntk Padding is SAME_LOWER, but Tensorflow Padding is SAME_UPPER, in first convolution layer. - 'tensorflow_cntk_inception_v1', - # Cntk Padding is SAME_LOWER, but Tensorflow Padding is SAME_UPPER, in first convolution layer. - 'tensorflow_cntk_resnet_v1_50', - # Cntk Padding is SAME_LOWER, but Tensorflow Padding is SAME_UPPER, in first convolution layer. - 'tensorflow_cntk_resnet_v2_50', - # Cntk Padding is SAME_LOWER, but Tensorflow Padding is SAME_UPPER, in first convolution layer. - 'tensorflow_cntk_resnet_v1_152', - # Cntk Padding is SAME_LOWER, but Tensorflow Padding is SAME_UPPER, in first convolution layer. - 'tensorflow_cntk_resnet_v2_152', - # Cntk Padding is SAME_LOWER, but Tensorflow Padding is SAME_UPPER, in first convolution layer. - 'tensorflow_cntk_mobilenet_v1_1.0', - # Cntk Padding is SAME_LOWER, but Tensorflow Padding is SAME_UPPER, in first convolution layer. - 'tensorflow_cntk_mobilenet_v2_1.0_224', - 'tensorflow_caffe_mobilenet_v1_1.0', # Caffe No Relu6 - 'tensorflow_caffe_mobilenet_v2_1.0_224', # Caffe No Relu6 - # different after AvgPool. AVG POOL padding difference between these two framework. MXNet AVGPooling Padding is SAME_LOWER, Tensorflow AVGPooling Padding is SAME_UPPER - 'tensorflow_frozen_mxnet_inception_v1', - # different after "InceptionV3/InceptionV3/Mixed_5b/Branch_3/AvgPool_0a_3x3/AvgPool". AVG POOL padding difference between these two framework. - 'tensorflow_mxnet_inception_v3', - 'darknet_keras_yolov2', # accumulation of small difference - 'darknet_keras_yolov3', # accumulation of small difference - } - - if TEST_ONNX and TEST_ONNX.lower() == 'true': - test_table = { - 'cntk': { - 'inception_v3': [onnx_emit], - 'resnet18': [onnx_emit], - 'resnet152': [onnx_emit], - }, - - 'keras': { - 'vgg16': [onnx_emit], - 'vgg19': [onnx_emit], - 'inception_v3': [onnx_emit], - 'resnet50': [onnx_emit], - 'densenet': [onnx_emit], - # 'xception' : [onnx_emit], - 'mobilenet': [onnx_emit], - # 'nasnet' : [onnx_emit], - 'yolo2': [onnx_emit], - }, - - 'mxnet': { - 'vgg19': [onnx_emit], - 'imagenet1k-inception-bn': [onnx_emit], - 'imagenet1k-resnet-18': [onnx_emit], - 'imagenet1k-resnet-152': [onnx_emit], - 'squeezenet_v1.1': [onnx_emit], - 'imagenet1k-resnext-101-64x4d': [onnx_emit], - 'imagenet1k-resnext-50': [onnx_emit], - }, - - 'caffe': { - 'alexnet': [onnx_emit], - 'inception_v1': [onnx_emit], - 'inception_v4': [onnx_emit], - 'resnet152': [onnx_emit], - 'squeezenet': [onnx_emit], - 'vgg19': [onnx_emit], - # 'voc-fcn8s' : [onnx_emit], # TODO: ConvTranspose, Crop - # 'voc-fcn16s' : [onnx_emit], # TODO: ConvTranspose, Crop - # 'voc-fcn32s' : [onnx_emit], # TODO: ConvTranspose, Crop - 'xception': [onnx_emit], - }, - - 'tensorflow': { - 'facenet': [onnx_emit], - 'vgg19': [onnx_emit], - 'inception_v1': [onnx_emit], - 'inception_v3': [onnx_emit], - # 'resnet_v1_50' : [onnx_emit], # POOL: strides > window_shape not supported due to inconsistency between CPU and GPU implementations - # 'resnet_v1_152' : [onnx_emit], # POOL: strides > window_shape not supported due to inconsistency between CPU and GPU implementations - # 'resnet_v2_50' : [onnx_emit], # POOL: strides > window_shape not supported due to inconsistency between CPU and GPU implementations - # 'resnet_v2_152' : [onnx_emit], # POOL: strides > window_shape not supported due to inconsistency between CPU and GPU implementations - 'mobilenet_v1_1.0': [onnx_emit], - 'mobilenet_v2_1.0_224': [onnx_emit], - # 'nasnet-a_large' : [onnx_emit], # POOL: strides > window_shape not supported due to inconsistency between CPU and GPU implementations - 'inception_resnet_v2': [onnx_emit], - }, - - 'tensorflow_frozen': { - 'inception_v1': [onnx_emit], - 'inception_v3': [onnx_emit], - 'mobilenet_v1_1.0': [onnx_emit], - 'facenet': [onnx_emit], - }, - - 'coreml': { - 'inception_v3': [onnx_emit], - 'mobilenet': [onnx_emit], - 'resnet50': [onnx_emit], - 'tinyyolo': [onnx_emit], - 'vgg16': [onnx_emit], - }, - - 'darknet': { - }, - - 'paddle': { - 'resnet50': [onnx_emit], - # First 1000 exactly the same, the last one is different - 'vgg16': [onnx_emit], - }, - - 'pytorch': { - # TODO: coredump - }, - - - } - - else: - test_table = { - 'cntk': { - # 'alexnet' : [cntk_emit, keras_emit, tensorflow_emit], - # TODO: Caffe, Keras, and MXNet no constant layer - 'inception_v3': [cntk_emit, pytorch_emit, tensorflow_emit], - 'resnet18': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'resnet152': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - }, - - 'keras': { - 'vgg19': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'inception_v3': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'resnet50': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'densenet': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'xception': [tensorflow_emit, keras_emit, coreml_emit], - # TODO: mxnet_emit - 'mobilenet': [coreml_emit, keras_emit, tensorflow_emit], - # 'nasnet' : [tensorflow_emit, keras_emit, coreml_emit], - 'yolo2': [keras_emit], - # 'facenet' : [tensorflow_emit, coreml_emit,mxnet_emit,keras_emit] # TODO - }, - - 'mxnet': { - 'vgg19': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'imagenet1k-inception-bn': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'imagenet1k-resnet-18': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'imagenet1k-resnet-152': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'squeezenet_v1.1': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - # Keras is ok but too slow - 'imagenet1k-resnext-101-64x4d': [caffe_emit, cntk_emit, coreml_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'imagenet1k-resnext-50': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - }, - - 'caffe': { - # TODO: keras_emit('Tensor' object has no attribute '_keras_history') - 'alexnet': [caffe_emit, cntk_emit, coreml_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'inception_v1': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - # TODO mxnet_emit(Small error), caffe_emit(Crash for shape) - 'inception_v4': [cntk_emit, coreml_emit, keras_emit, pytorch_emit, tensorflow_emit], - 'resnet152': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'squeezenet': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'vgg19': [caffe_emit, cntk_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'voc-fcn8s': [cntk_emit, coreml_emit, tensorflow_emit], - 'voc-fcn16s': [cntk_emit, coreml_emit, tensorflow_emit], - 'voc-fcn32s': [cntk_emit, coreml_emit, tensorflow_emit], - # TODO: Caffe(Crash) keras_emit(too slow) - 'xception': [coreml_emit, cntk_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - }, - - 'tensorflow': { - 'vgg19': [caffe_emit, coreml_emit, cntk_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - # TODO: cntk_emit - 'inception_v1': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'inception_v3': [caffe_emit, coreml_emit, cntk_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - # TODO: cntk_emit - 'resnet_v1_152': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'resnet_v2_152': [caffe_emit, coreml_emit, cntk_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'mobilenet_v1_1.0': [caffe_emit, coreml_emit, cntk_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'mobilenet_v2_1.0_224': [caffe_emit, coreml_emit, cntk_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - # TODO: keras_emit(Slice Layer: https://blog.csdn.net/lujiandong1/article/details/54936185) - 'nasnet-a_large': [mxnet_emit, pytorch_emit, tensorflow_emit], - # CoremlEmit worked once, then always - 'inception_resnet_v2': [caffe_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - # TODO: coreml_emit - 'facenet': [mxnet_emit, tensorflow_emit, keras_emit, pytorch_emit, caffe_emit], - # TODO cntk_emit - 'rnn_lstm_gru_stacked': [tensorflow_emit, keras_emit, pytorch_emit, mxnet_emit] - }, - - 'tensorflow_frozen': { - # TODO: cntk_emit - 'inception_v1': [tensorflow_emit, keras_emit, mxnet_emit, coreml_emit], - # TODO: cntk_emit - 'inception_v3': [tensorflow_emit, keras_emit, mxnet_emit, coreml_emit], - 'mobilenet_v1_1.0': [tensorflow_emit, keras_emit, mxnet_emit, coreml_emit], - # TODO: coreml_emit - 'facenet': [mxnet_emit, tensorflow_emit, keras_emit, caffe_emit] - }, - - 'coreml': { - 'inception_v3': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'mobilenet': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'resnet50': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - # 'tinyyolo' : [coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'vgg16': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - }, - - 'darknet': { - 'yolov2': [keras_emit], - 'yolov3': [keras_emit], - }, - - 'paddle': { - # caffe_emit crash, due to gflags_reporting.cc - 'resnet50': [coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - # caffe_emit crash - 'resnet101': [coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - # 'vgg16': [tensorflow_emit], - # 'alexnet': [tensorflow_emit] - }, - - 'pytorch': { - 'alexnet': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'densenet201': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - # Mxnet broken https://github.com/apache/incubator-mxnet/issues/10194 - 'inception_v3': [caffe_emit, coreml_emit, keras_emit, pytorch_emit, tensorflow_emit], - 'vgg19': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'vgg19_bn': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - 'resnet152': [caffe_emit, coreml_emit, keras_emit, mxnet_emit, pytorch_emit, tensorflow_emit], - } - } - - def _get_test_input(self, architecture_name): - if 'rnn' in architecture_name: - return self.sentence_path - else: - return self.image_path - - @classmethod - def _need_assert(cls, original_framework, target_framework, network_name, original_prediction, converted_prediction): - test_name = original_framework + '_' + target_framework + '_' + network_name - if test_name in cls.exception_tabel: - return False - - if target_framework == 'coreml': - if not is_coreml_supported(): - return False - - if target_framework == 'onnx' or target_framework == 'caffe': - if converted_prediction is None: - return False - - return True - - def _test_function(self, original_framework, parser): - ensure_dir(self.cachedir) - ensure_dir(self.tmpdir) - - for network_name in self.test_table[original_framework].keys(): - print("Test {} from {} start.".format( - network_name, original_framework), file=sys.stderr) - - # get test input path - test_input = self._get_test_input(network_name) - - # get original model prediction result - original_predict = parser(network_name, test_input) - - IR_file = TestModels.tmpdir + original_framework + \ - '_' + network_name + "_converted" - for emit in self.test_table[original_framework][network_name]: - if isinstance(emit, staticmethod): - emit = emit.__func__ - target_framework = emit.__name__[:-5] - - if (target_framework == 'coreml'): - if not is_coreml_supported(): - continue - - print('Testing {} from {} to {}.'.format(network_name, - original_framework, target_framework), file=sys.stderr) - converted_predict = emit( - original_framework, - network_name, - IR_file + ".pb", - IR_file + ".npy", - test_input) - - self._compare_outputs( - original_framework, - target_framework, - network_name, - original_predict, - converted_predict, - self._need_assert(original_framework, target_framework, - network_name, original_predict, converted_predict) - ) - print('Conversion {} from {} to {} passed.'.format( - network_name, original_framework, target_framework), file=sys.stderr) - - try: - os.remove(IR_file + ".json") - except OSError: - pass - - os.remove(IR_file + ".pb") - os.remove(IR_file + ".npy") - print("Testing {} model {} passed.".format( - original_framework, network_name), file=sys.stderr) - - print("Testing {} model all passed.".format( - original_framework), file=sys.stderr) - - def test_nothing(self): - pass - - # def test_caffe(self): - # try: - # import caffe - # self._test_function('caffe', self.caffe_parse) - # except ImportError: - # print('Please install caffe! Or caffe is not supported in your platform.', file=sys.stderr) - - # def test_cntk(self): - # try: - # import cntk - # self._test_function('cntk', self.cntk_parse) - # except ImportError: - # print('Please install cntk! Or cntk is not supported in your platform.', file=sys.stderr) - - # def test_coreml(self): - # from coremltools.models.utils import macos_version - # if macos_version() < (10, 13): - # print('Coreml is not supported in your platform.', file=sys.stderr) - # else: - # self._test_function('coreml', self.coreml_parse) - - # def test_keras(self): - # self._test_function('keras', self.keras_parse) - - # def test_mxnet(self): - # self._test_function('mxnet', self.mxnet_parse) - - # def test_darknet(self): - # self._test_function('darknet', self.darknet_parse) - - # def test_paddle(self): - # # omit tensorflow lead to crash - # import tensorflow as tf - # try: - # import paddle.v2 as paddle - # self._test_function('paddle', self.paddle_parse) - # except ImportError: - # print('Please install Paddlepaddle! Or Paddlepaddle is not supported in your platform.', file=sys.stderr) - - # def test_pytorch(self): - # self._test_function('pytorch', self.pytorch_parse) - - # def test_tensorflow(self): - # self._test_function('tensorflow', self.tensorflow_parse) - - # def test_tensorflow_frozen(self): - # self._test_function('tensorflow_frozen', self.tensorflow_frozen_parse) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import sys -import io -import os -import argparse -import yaml - -model_template_str = ''' -models: - - model: - name: 'vgg19' - source: 'tensorflow' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'inception_v1' - source: 'tensorflow' - targets: ['onnx', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'inception_v3' - source: 'tensorflow' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'resnet_v1_152' - source: 'tensorflow' - targets: ['tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'resnet_v2_152' - source: 'tensorflow' - targets: ['cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'mobilenet_v1_1.0' - source: 'tensorflow' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'mobilenet_v2_1.0_224' - source: 'tensorflow' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'nasnet-a_large' - source: 'tensorflow' - targets: ['tensorflow', 'mxnet', 'pytorch'] - - model: - name: 'inception_resnet_v2' - source: 'tensorflow' - targets: ['onnx', 'tensorflow', 'mxnet', 'pytorch', 'keras', 'caffe'] - - model: - name: 'facenet' - source: 'tensorflow' - targets: ['onnx', 'tensorflow', 'mxnet', 'pytorch', 'keras', 'caffe'] - - model: - name: 'rnn_embedding' - source: 'tensorflow' - targets: ['cntk', 'tensorflow', 'mxnet', 'pytorch', 'keras'] - - - model: - name: 'inception_v1' - source: 'tensorflow_frozen' - targets: ['onnx', 'tensorflow', 'mxnet', 'coreml', 'keras'] - - model: - name: 'inception_v3' - source: 'tensorflow_frozen' - targets: ['onnx', 'tensorflow', 'mxnet', 'coreml', 'keras'] - - model: - name: 'mobilenet_v1_1.0' - source: 'tensorflow_frozen' - targets: ['onnx', 'tensorflow', 'mxnet', 'coreml', 'keras'] - - model: - name: 'facenet' - source: 'tensorflow_frozen' - targets: ['onnx', 'tensorflow', 'mxnet', 'keras'] - - - model: - name: 'inception_v3' - source: 'cntk' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet'] - - model: - name: 'resnet18' - source: 'cntk' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'resnet152' - source: 'cntk' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - - model: - name: 'vgg19' - source: 'mxnet' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'imagenet1k-inception-bn' - source: 'mxnet' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'imagenet1k-resnet-18' - source: 'mxnet' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'imagenet1k-resnet-152' - source: 'mxnet' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'squeezenet_v1.1' - source: 'mxnet' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'imagenet1k-resnext-101-64x4d' - source: 'mxnet' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'imagenet1k-resnext-50' - source: 'mxnet' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - - model: - name: 'alexnet' - source: 'pytorch' - targets: ['tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'densenet201' - source: 'pytorch' - targets: ['tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'inception_v3' - source: 'pytorch' - targets: ['tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'vgg19' - source: 'pytorch' - targets: ['tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'vgg19_bn' - source: 'pytorch' - targets: ['tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'resnet152' - source: 'pytorch' - targets: ['tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - - model: - name: 'inception_v3' - source: 'coreml' - targets: ['onnx', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'mobilenet' - source: 'coreml' - targets: ['onnx', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'resnet50' - source: 'coreml' - targets: ['onnx', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'tinyyolo' - source: 'coreml' - targets: ['onnx', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras'] - - model: - name: 'vgg16' - source: 'coreml' - targets: ['onnx', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - - model: - name: 'vgg19' - source: 'keras' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'inception_v3' - source: 'keras' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'resnet50' - source: 'keras' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'densenet' - source: 'keras' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'xception' - source: 'keras' - targets: ['tensorflow', 'coreml', 'keras'] - - model: - name: 'mobilenet' - source: 'keras' - targets: ['onnx', 'tensorflow', 'coreml', 'keras'] - - model: - name: 'yolo2' - source: 'keras' - targets: ['onnx', 'keras'] - - - model: - name: 'alexnet' - source: 'caffe' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'caffe'] - - model: - name: 'inception_v1' - source: 'caffe' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'inception_v4' - source: 'caffe' - targets: ['onnx', 'cntk', 'tensorflow', 'pytorch', 'coreml', 'keras'] - - model: - name: 'resnet152' - source: 'caffe' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'squeezenet' - source: 'caffe' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'vgg19' - source: 'caffe' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml', 'keras', 'caffe'] - - model: - name: 'voc-fcn8s' - source: 'caffe' - targets: ['cntk', 'tensorflow', 'coreml'] - - model: - name: 'voc-fcn16s' - source: 'caffe' - targets: ['cntk', 'tensorflow', 'coreml'] - - model: - name: 'voc-fcn32s' - source: 'caffe' - targets: ['cntk', 'tensorflow', 'coreml'] - - model: - name: 'xception' - source: 'caffe' - targets: ['onnx', 'cntk', 'tensorflow', 'mxnet', 'pytorch', 'coreml'] - - - model: - name: 'resnet50' - source: 'paddle' - targets: ['onnx'] - - model: - name: 'vgg16' - source: 'paddle' - targets: ['onnx'] - -''' - -code_template_str = ''' -from __future__ import absolute_import -from __future__ import print_function - -import os -from conversion_imagenet import TestModels -from conversion_imagenet import check_env - -def get_test_table(): - return {{ '{1}' : - {{ - '{0}' : [TestModels.{2}_emit] - }}}} - - - -def test_{1}_{2}_{3}(): - if not check_env('{1}', '{2}', '{0}'): - return - - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('{1}', tester.{1}_parse) - - -if __name__ == '__main__': - test_{1}_{2}_{3}() - -''' - -travis_template_str = ''' -sudo: required -dist: xenial - -os: - - linux - -language: python -python: - - "2.7" - - "3.5" - -env: -{0} - -cache: - directories: - - $HOME/.cache/pip - -addons: - apt: - update: true - -before_install: - - sudo apt-get install -y openmpi-bin - - sudo apt-get install -y libprotobuf-dev libsnappy-dev libhdf5-serial-dev protobuf-compiler - - sudo apt-get install -y libatlas-base-dev - - sudo apt-get install -y libgflags-dev libgoogle-glog-dev - - if [ "$TEST_SOURCE_FRAMEWORK" = "caffe" ] || [ "$TEST_TARGET_FRAMEWORK" = "caffe" ]; then sudo apt-get install -y --no-install-recommends libboost-all-dev; fi - -install: - - pip install -q -r $(python requirements/select_requirements.py) - - pip install wget - -before_script: - - export LD_LIBRARY_PATH=$(python -c "import os; print(os.path.dirname(os.__file__) + '/site-packages/caffe/libs')"):${{LD_LIBRARY_PATH}} - -after_failure: true - -after_success: true - -after_script: true - -script: bash test.sh $TEST_SOURCE_FRAMEWORK $TEST_TARGET_FRAMEWORK $TEST_MODEL - -matrix: - fast_finish: true - - allow_failures: - - env: TEST_SOURCE_FRAMEWORK=paddle TEST_MODEL=resnet50 - - env: TEST_SOURCE_FRAMEWORK=paddle TEST_MODEL=vgg16 - -notifications: - email: - on_success: never - on_failure: never - -''' - - -def gen_test(output_dir, model): - model_name = model['name'] - normalized_model_name = model_name.replace('.', '_') - normalized_model_name2 = normalized_model_name.replace('-', '_') - length = len(model['targets']) - for i in range(length): - test_file = os.path.join(output_dir, 'test_{0}_{1}_{2}.py' - .format(model['source'], model['targets'][i], normalized_model_name)) - with open(test_file, "w+") as f: - code = code_template_str.format( - model_name, model['source'], model['targets'][i], normalized_model_name2) - f.write(code) - - -def gen_tests(output_dir): - y = yaml.load(model_template_str) - length = len(y['models']) - for i in range(length): - gen_test(output_dir, y['models'][i]['model']) - - -def gen_travis(output_dir): - y = yaml.load(model_template_str) - travis_file = os.path.join(output_dir, 'travis.yml') - - env_str = '' - length = len(y['models']) - for i in range(length): - model = y['models'][i]['model'] - model_name = model['name'] - normalized_model_name = model_name.replace('.', '_') - source_framework = model['source'] - if False: - env_str += ' - TEST_SOURCE_FRAMEWORK={0} TEST_MODEL={1}\n'.format( - source_framework, normalized_model_name) - else: - length2 = len(model['targets']) - for j in range(length2): - target_framework = model['targets'][j] - env_str += ' - TEST_SOURCE_FRAMEWORK={0} TEST_TARGET_FRAMEWORK={1} TEST_MODEL={2}\n'.format( - source_framework, target_framework, normalized_model_name) - - with open(travis_file, "w+") as f: - code = travis_template_str.format(env_str) - f.write(code) - - return - - -def prepare_env(FLAGS): - output_dir = FLAGS.output_dir - if (not os.path.exists(output_dir)): - os.mkdir(output_dir) - if ((not os.path.isdir(output_dir)) or (not os.path.exists(output_dir))): - print( - 'Cannot create target output directory: "{0}"'.format(output_dir)) - return False - return True - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('-o', '--output_dir', - help='The output directory.', required=True) - FLAGS, unparsed = parser.parse_known_args() - if (not prepare_env(FLAGS)): - return - - output_dir = FLAGS.output_dir - gen_travis(output_dir) - gen_tests(output_dir) - - -if __name__ == '__main__': - main() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -import six -from conversion_imagenet import TestModels - - -def get_test_table(): - TRAVIS_CI = os.environ.get('TRAVIS') - if not TRAVIS_CI or TRAVIS_CI.lower() != 'true': - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return {'caffe': - { - 'alexnet': [TestModels.onnx_emit], - 'inception_v1': [TestModels.onnx_emit], - 'inception_v4': [TestModels.onnx_emit], - 'resnet152': [TestModels.onnx_emit], - 'squeezenet': [TestModels.onnx_emit], - # 'vgg19' : [TestModels.onnx_emit], - 'xception': [TestModels.onnx_emit], - } - } - elif six.PY2: - return {'caffe': - { - 'inception_v1': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'resnet152': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'xception': [TestModels.coreml_emit, TestModels.cntk_emit, TestModels.tensorflow_emit], - - } - } - else: - return {'caffe': - { - # 'alexnet' : [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'inception_v1': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'resnet152': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'xception': [TestModels.coreml_emit, TestModels.cntk_emit, TestModels.tensorflow_emit], - } - } - - -def test_caffe(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('caffe', tester.caffe_parse) - - -if __name__ == '__main__': - test_caffe() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -import six -from conversion_imagenet import TestModels - - -def get_test_table(): - TRAVIS_CI = os.environ.get('TRAVIS') - if not TRAVIS_CI or TRAVIS_CI.lower() != 'true': - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return {'caffe': - { - 'alexnet': [TestModels.onnx_emit], - 'inception_v1': [TestModels.onnx_emit], - 'inception_v4': [TestModels.onnx_emit], - 'resnet152': [TestModels.onnx_emit], - 'squeezenet': [TestModels.onnx_emit], - # 'vgg19' : [TestModels.onnx_emit], - 'xception': [TestModels.onnx_emit], - } - } - elif six.PY2: - return {'caffe': - { - 'alexnet': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'inception_v4': [TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'squeezenet': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - # 'voc-fcn8s' : [TestModels.cntk_emit, TestModels.coreml_emit, TestModels.tensorflow_emit], - 'xception': [TestModels.mxnet_emit, TestModels.pytorch_emit], - } - } - else: - return {'caffe': - { - 'squeezenet': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'inception_v4': [TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - # 'vgg19' : [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - # 'voc-fcn8s' : [TestModels.cntk_emit, TestModels.coreml_emit, TestModels.tensorflow_emit], - # 'voc-fcn16s' : [TestModels.cntk_emit, TestModels.coreml_emit, TestModels.tensorflow_emit], - # 'voc-fcn32s' : [TestModels.cntk_emit, TestModels.coreml_emit, TestModels.tensorflow_emit], - 'xception': [TestModels.mxnet_emit, TestModels.pytorch_emit], - } - } - - -def test_caffe(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('caffe', tester.caffe_parse) - - -if __name__ == '__main__': - test_caffe() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -from conversion_imagenet import TestModels - - -def test_cntk(): - tester = TestModels() - tester._test_function('cntk', tester.cntk_parse) - - -if __name__ == '__main__': - test_cntk() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -from conversion_imagenet import TestModels -from conversion_imagenet import is_coreml_supported - - -def test_coreml(): - if is_coreml_supported(): - tester = TestModels() - tester._test_function('coreml', tester.coreml_parse) - - -if __name__ == '__main__': - test_coreml() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -from conversion_imagenet import TestModels - - -def test_darknet(): - tester = TestModels() - tester._test_function('darknet', tester.darknet_parse) - - -if __name__ == '__main__': - test_darknet() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -import six -from conversion_imagenet import TestModels - - -def get_test_table(): - if six.PY3: - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return { - 'keras': { - 'vgg16': [TestModels.onnx_emit], - 'vgg19': [TestModels.onnx_emit], - # 'nasnet' : [TestModels.onnx_emit], - }, - } - - else: - return { - 'keras': { - 'vgg19': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'inception_v3': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - }} - - -def test_keras(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('keras', tester.keras_parse) - - -if __name__ == '__main__': - test_keras() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -import six -from conversion_imagenet import TestModels - - -def get_test_table(): - if six.PY3: - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return { - 'keras': { - 'inception_v3': [TestModels.onnx_emit], - 'resnet50': [TestModels.onnx_emit], - # 'xception' : [TestModels.onnx_emit], - # 'nasnet' : [TestModels.onnx_emit], - }, - } - - else: - return { - 'keras': { - 'resnet50': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'xception': [TestModels.tensorflow_emit, TestModels.keras_emit, TestModels.coreml_emit], - }} - - -def test_keras(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('keras', tester.keras_parse) - - -if __name__ == '__main__': - test_keras() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -import six -from conversion_imagenet import TestModels - - -def get_test_table(): - if six.PY3: - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return { - 'keras': { - 'densenet': [TestModels.onnx_emit], - 'mobilenet': [TestModels.onnx_emit], - # 'xception' : [TestModels.onnx_emit], - # 'nasnet' : [TestModels.onnx_emit], - }, - } - - else: - return { - 'keras': { - 'densenet': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'mobilenet': [TestModels.coreml_emit, TestModels.keras_emit, TestModels.tensorflow_emit], - }} - - -def test_keras(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('keras', tester.keras_parse) - - -if __name__ == '__main__': - test_keras() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -import six -from conversion_imagenet import TestModels - - -def get_test_table(): - TRAVIS_CI = os.environ.get('TRAVIS') - if not TRAVIS_CI or TRAVIS_CI.lower() != 'true': - return None - - if six.PY2: - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return {'mxnet': { - 'imagenet1k-inception-bn': [TestModels.onnx_emit], - 'squeezenet_v1.1': [TestModels.onnx_emit], - 'imagenet1k-resnext-50': [TestModels.onnx_emit], - }} - else: - return {'mxnet': { - 'imagenet1k-inception-bn': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'squeezenet_v1.1': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'imagenet1k-resnext-50': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - }} - - -def test_mxnet(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('mxnet', tester.mxnet_parse) - - -if __name__ == '__main__': - test_mxnet() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -import six -from conversion_imagenet import TestModels - - -def get_test_table(): - TRAVIS_CI = os.environ.get('TRAVIS') - if not TRAVIS_CI or TRAVIS_CI.lower() != 'true': - return None - - if six.PY2: - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return {'mxnet': { - 'imagenet1k-resnet-18': [TestModels.onnx_emit], - 'imagenet1k-resnet-152': [TestModels.onnx_emit], - }} - else: - return {'mxnet': { - 'imagenet1k-resnet-18': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'imagenet1k-resnet-152': [TestModels.caffe_emit, TestModels.cntk_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - }} - - -def test_mxnet(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('mxnet', tester.mxnet_parse) - - -if __name__ == '__main__': - test_mxnet() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -from conversion_imagenet import TestModels -from conversion_imagenet import is_paddle_supported - - -def test_paddle(): - if not is_paddle_supported(): - return - # omit tensorflow lead to crash - import tensorflow as tf - tester = TestModels() - tester._test_function('paddle', tester.paddle_parse) - - -if __name__ == '__main__': - test_paddle() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -from conversion_imagenet import TestModels - - -def get_test_table(): - TRAVIS_CI = os.environ.get('TRAVIS') - if not TRAVIS_CI or TRAVIS_CI.lower() != 'true': - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return None - - return {'pytorch': { - 'alexnet': [TestModels.caffe_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'densenet201': [TestModels.caffe_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - }} - - -def test_pytorch(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('pytorch', tester.pytorch_parse) - - -if __name__ == '__main__': - test_pytorch() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -from conversion_imagenet import TestModels - - -def get_test_table(): - TRAVIS_CI = os.environ.get('TRAVIS') - if not TRAVIS_CI or TRAVIS_CI.lower() != 'true': - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return None - - return {'pytorch': { - 'inception_v3': [TestModels.caffe_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'resnet152': [TestModels.caffe_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - }} - - -def test_pytorch(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('pytorch', tester.pytorch_parse) - - -if __name__ == '__main__': - test_pytorch() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -from conversion_imagenet import TestModels - - -def get_test_table(): - TRAVIS_CI = os.environ.get('TRAVIS') - if not TRAVIS_CI or TRAVIS_CI.lower() != 'true': - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return None - - return {'tensorflow': - { - 'vgg19': [TestModels.caffe_emit, TestModels.coreml_emit, TestModels.cntk_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit], - 'inception_v1': [TestModels.caffe_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'inception_v3': [TestModels.caffe_emit, TestModels.coreml_emit, TestModels.cntk_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'resnet_v1_152': [TestModels.caffe_emit, TestModels.coreml_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - # 'nasnet-a_large' : [TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - # 'inception_resnet_v2' : [TestModels.caffe_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - }} - - -def test_tensorflow(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('tensorflow', tester.tensorflow_parse) - - -if __name__ == "__main__": - test_tensorflow() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -from conversion_imagenet import TestModels - - -def get_test_table(): - TRAVIS_CI = os.environ.get('TRAVIS') - if not TRAVIS_CI or TRAVIS_CI.lower() != 'true': - return None - - ONNX = os.environ.get('TEST_ONNX') - if ONNX and ONNX.lower() == 'true': - return None - - return {'tensorflow': - { - 'resnet_v2_152': [TestModels.caffe_emit, TestModels.coreml_emit, TestModels.cntk_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'mobilenet_v1_1.0': [TestModels.coreml_emit, TestModels.cntk_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - 'mobilenet_v2_1.0_224': [TestModels.coreml_emit, TestModels.cntk_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - # 'nasnet-a_large' : [TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - # 'inception_resnet_v2' : [TestModels.caffe_emit, TestModels.keras_emit, TestModels.mxnet_emit, TestModels.pytorch_emit, TestModels.tensorflow_emit], - }} - - -def test_tensorflow(): - test_table = get_test_table() - tester = TestModels(test_table) - tester._test_function('tensorflow', tester.tensorflow_parse) - - -if __name__ == "__main__": - test_tensorflow() -from __future__ import absolute_import -from __future__ import print_function - -import os -import sys -from conversion_imagenet import TestModels - - -def test_tensorflow_frozen(): - tester = TestModels() - tester._test_function('tensorflow_frozen', tester.tensorflow_frozen_parse) - - -if __name__ == '__main__': - test_tensorflow_frozen() -from __future__ import absolute_import -from __future__ import print_function - -__all__ = ['ensure_dir', 'checkfrozen', 'CorrectnessTest'] - -import os -import unittest -import numpy as np - - -def _compute_SNR(x, y): - noise = x - y - noise_var = np.sum(noise ** 2) / len(noise) + 1e-7 - signal_energy = np.sum(y ** 2) / len(y) - max_signal_energy = np.amax(y ** 2) - SNR = 10 * np.log10(signal_energy / noise_var) - PSNR = 10 * np.log10(max_signal_energy / noise_var) - return SNR, PSNR - - -def _compute_max_relative_error(x, y): - from six.moves import xrange - rerror = 0 - index = 0 - for i in xrange(len(x)): - den = max(1.0, np.abs(x[i]), np.abs(y[i])) - if np.abs(x[i]/den - y[i] / den) > rerror: - rerror = np.abs(x[i] / den - y[i] / den) - index = i - return rerror, index - - -def _compute_L1_error(x, y): - return np.linalg.norm(x - y, ord=1) - - -def ensure_dir(f): - d = os.path.dirname(f) - if not os.path.exists(d): - os.makedirs(d) - - -def checkfrozen(f): - if f == 'tensorflow_frozen': - return 'tensorflow' - else: - return f - - -class CorrectnessTest(unittest.TestCase): - - err_thresh = 0.15 - snr_thresh = 12 - psnr_thresh = 30 - - @classmethod - def setUpClass(cls): - """ Set up the unit test by loading common utilities. - """ - pass - - def _compare_outputs(self, original_framework, target_framework, network_name, original_predict, converted_predict, need_assert=True): - # Function self.assertEquals has deprecated, change to assertEqual - if (converted_predict is None or original_predict is None) and not need_assert: - return - - # self.assertEqual(original_predict.shape, converted_predict.shape) - original_predict = original_predict.flatten() - converted_predict = converted_predict.flatten() - len1 = original_predict.shape[0] - len2 = converted_predict.shape[0] - length = min(len1, len2) - original_predict = np.sort(original_predict)[::-1] - converted_predict = np.sort(converted_predict)[::-1] - original_predict = original_predict[0:length] - converted_predict = converted_predict[0:length] - error, ind = _compute_max_relative_error( - converted_predict, original_predict) - L1_error = _compute_L1_error(converted_predict, original_predict) - SNR, PSNR = _compute_SNR(converted_predict, original_predict) - print("error:", error) - print("L1 error:", L1_error) - print("SNR:", SNR) - print("PSNR:", PSNR) - - if need_assert: - self.assertGreater(SNR, self.snr_thresh, "Error in converting {} from {} to {}".format( - network_name, original_framework, target_framework)) - self.assertGreater(PSNR, self.psnr_thresh, "Error in converting {} from {} to {}".format( - network_name, original_framework, target_framework)) - self.assertLess(error, self.err_thresh, "Error in converting {} from {} to {}".format( - network_name, original_framework, target_framework)) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import json -import os -import argparse - -markdown_code = str() - -framework_list = ['caffe', 'cntk', 'coreml', 'darknet', - 'mxnet', 'pytorch', 'tensorflow'] # Haven't added 'keras' yet -frame_model_map = { - 'caffe': {'architecture': 'prototxt', 'weights': 'caffemodel'}, - 'cntk': {'architecture': 'model'}, - 'coreml': {'architecture': 'mlmodel'}, - 'darknet': {'architecture': 'cfg', 'weights': 'weights'}, - 'mxnet': {'architecture': 'json', 'weights': 'params'}, - 'pytorch': {'architecture': 'pth'}, - 'tensorflow': {'architecture': 'tgz'} -} # Haven't add 'keras' yet -dataset_list = ['imagenet', 'imagenet11k', 'Pascal VOC', 'grocery100'] - - -def add_code(code): - global markdown_code - markdown_code += code - - -def add_header(level, code): - add_code("#" * level + " " + code + '\n\n') - - -def draw_line(num): - add_code("| " * num + "|\n") - add_code(("|-" * num + "|\n")) - - -def save_code(filepath): - with open(filepath, 'w') as f: - f.write(markdown_code) - print("Markdown generate succeeded!") - - -def LoadJson(json_path): - with open(json_path, encoding='utf-8') as f: - data = json.load(f) - return data - - -def RegenerateJsonByDataset(data): - new_data = {} - new_data['dataset'] = {} - for i in range(len(dataset_list)): - new_data['dataset'][dataset_list[i]] = [] - for mo in data['models']: - ds = mo['dataset'] - item = {} - item['name'] = mo['name'] - item['framework'] = mo['framework'] - item['source'] = mo['source'] - item['link'] = mo['link'] - item['version'] = "" - new_data['dataset'][ds].append(item) - - # with open('modelmapbydataset.json', 'w') as outfile: - # json.dump(new_data, outfile) - return new_data - - -def GenerateModelBlock_v2(model): - link = model['link'] - framework = model['framework'] - - # generate makedown script - add_code('''|{}
Framework: {}
Download: '''.format( - model['name'], - model['framework'] - )) - for k in link.keys(): - if link[k]: - add_code("[{}]({}) ".format( - frame_model_map[framework][k], - link[k] - )) - add_code("
Source: ") - if (model['source'] != ""): - add_code("[Link]({})".format(model['source'])) - add_code("
") - - -def DrawTableBlock(data, dataset_name): - colnum = 3 - add_header(3, dataset_name) - draw_line(colnum) - models = data['dataset'][dataset_name] - num = 0 - for i in range(len(models)): - if ((models[i]['framework'] != 'keras') and (models[i]['link']['architecture'] != "")): - GenerateModelBlock_v2(models[i]) - num += 1 - if num % colnum == 0: - add_code("\n") - add_code("\n") - - -def GenerateModelsList_v2(data): - - add_header(1, "Model Collection") - - # add Image Classification - add_header(2, "Image Classification") - for ds_name in ['imagenet', 'imagenet11k']: - DrawTableBlock(data, ds_name) - - # add Object Detection - add_header(2, "Object Detection") - for ds_name in ['Pascal VOC', 'grocery100']: - DrawTableBlock(data, ds_name) - - add_code("\n") - - -def GenerateIntroductionAndTutorial(): - # MMdnn introduction - add_header(1, "Introduction") - text_intro = '''This is a collection of pre-trained models in different deep learning frameworks.\n -You can download the model you want by simply click the download link.\n -With the download model, you can convert them to different frameworks.\n -Next session show an example to show you how to convert pre-trained model between frameworks.\n\n''' - add_code(text_intro) - - # steps for model conversion - add_header(2, "Steps to Convert Model") - text_example = '''**Example: Convert vgg19 model from Tensorflow to CNTK**\n -1. Install the stable version of MMdnn - ```bash - pip install mmdnn - ``` -2. Download Tensorflow pre-trained model - - [x] **Method 1:** Directly download from below model collection - - [x] **Method 2:** Use command line - ```bash - $ mmdownload -f tensorflow -n vgg19 - - Downloading file [./vgg_19_2016_08_28.tar.gz] from [http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz] - progress: 520592.0 KB downloaded, 100% - Model saved in file: ./imagenet_vgg19.ckpt - ``` - **NOTICE:** _the model name after the **'-n'** argument must be the models appearence in the below model collection._ - -3. Convert model architecture(*.ckpt.meta) and weights(.ckpt) from Tensorflow to IR - ```bash - $ mmtoir -f tensorflow -d vgg19 -n imagenet_vgg19.ckpt.meta -w imagenet_vgg19.ckpt --dstNodeName MMdnn_Output - - Parse file [imagenet_vgg19.ckpt.meta] with binary format successfully. - Tensorflow model file [imagenet_vgg19.ckpt.meta] loaded successfully. - Tensorflow checkpoint file [imagenet_vgg19.ckpt] loaded successfully. [38] variables loaded. - IR network structure is saved as [vgg19.json]. - IR network structure is saved as [vgg19.pb]. - IR weights are saved as [vgg19.npy]. - ``` -4. Convert models from IR to PyTorch code snippet and weights - ```bash - $ mmtocode -f pytorch -n vgg19.pb --IRWeightPath vgg19.npy --dstModelPath pytorch_vgg19.py -dw pytorch_vgg19.npy - - Parse file [vgg19.pb] with binary format successfully. - Target network code snippet is saved as [pytorch_vgg19.py]. - Target weights are saved as [pytorch_vgg19.npy]. - ``` -5. Generate PyTorch model from code snippet file and weight file - ```bash - $ mmtomodel -f pytorch -in pytorch_vgg19.py -iw pytorch_vgg19.npy --o pytorch_vgg19.pth - - PyTorch model file is saved as [pytorch_vgg19.pth], generated by [pytorch_vgg19.py] and [pytorch_vgg19.npy]. - Notice that you may need [pytorch_vgg19.py] to load the model back. - ``` -''' - add_code(text_example) - add_code("\n\n") - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('-f', '--file', type=str, - default="modelmap2.json", help="the path of json file") - parser.add_argument('-d', '--distFile', type=str, - default="Collection_v2.md", help="the path of the readme file") - args = parser.parse_args() - - # Generate model converter description - GenerateIntroductionAndTutorial() - - # Generate models list - data = LoadJson(args.file) - new_data = RegenerateJsonByDataset(data) - GenerateModelsList_v2(new_data) - save_code(args.distFile) - - -if __name__ == "__main__": - main() -import json -import os -import argparse - -markdown_code = str() - -framework_list = ['caffe', 'cntk', 'coreml', 'darknet', - 'mxnet', 'pytorch', 'tensorflow'] # Haven't add 'keras' yet -frame_model_map = { - 'caffe': {'architecture': 'prototxt', 'weights': 'caffemodel'}, - 'cntk': {'architecture': 'model'}, - 'coreml': {'architecture': 'mlmodel'}, - 'darknet': {'architecture': 'cfg', 'weights': 'weights'}, - 'mxnet': {'architecture': 'json', 'weights': 'params'}, - 'pytorch': {'architecture': 'pth'}, - 'tensorflow': {'architecture': 'tgz'} -} # Haven't add 'keras' yet -dataset_list = ['imagenet', 'imagenet11k', 'Pascal VOC', 'grocery100'] - - -def add_code(code): - global markdown_code - markdown_code += code - - -def add_header(level, code): - add_code("#" * level + " " + code + '\n\n') - - -def draw_line(num): - add_code("| " * num + "|\n") - add_code(("|-" * num + "|\n")) - - -def save_code(filepath): - with open(filepath, 'w') as f: - f.write(markdown_code) - print("Markdown generate succeeded!") - - -def LoadJson(json_path): - with open(json_path, encoding='utf-8') as f: - data = json.load(f) - return data - - -def GenerateModelBlock(model): - link = model["link"] - framework = model["framework"] - - # generate makedown script - add_code('''|{}
Framework: {}
Dataset: _{}_
Download: '''.format( - model["name"], - model["framework"], - model["dataset"], - )) - for k in link.keys(): - if link[k]: - add_code("[{}]({}) ".format( - frame_model_map[framework][k], link[k])) - add_code("
Source: ") - if (model["source"] != ""): - add_code("[Link]({})".format(model["source"])) - add_code("
") - - -def GenerateModelsList(data): - colnum = 3 - add_header(1, "Model Collection") - draw_line(colnum) - models = data["models"] - num = 0 - for i in range(len(data["models"])): - if ((models[i]["framework"] != "keras") and (models[i]["link"]["architecture"] != "")): - GenerateModelBlock(models[i]) - num += 1 - if num % colnum == 0: - add_code("\n") - add_code("\n") - - -def GenerateIntroductionAndTutorial(): - # MMdnn introduction - add_header(1, "Introduction") - text_intro = '''This is a collection of pre-trained models in different deep learning frameworks.\n -You can download the model you want by simply click the download link.\n -With the download model, you can convert them to different frameworks.\n -Next session show an example to show you how to convert pre-trained model between frameworks.\n\n''' - add_code(text_intro) - - # steps for model conversion - add_header(2, "Steps to Convert Model") - text_example = '''**Example: Convert vgg19 model from Tensorflow to CNTK**\n -1. Install the stable version of MMdnn - ```bash - pip install mmdnn - ``` -2. Download Tensorflow pre-trained model - - [x] **Method 1:** Directly download from below model collection - - [x] **Method 2:** Use command line - ```bash - $ mmdownload -f tensorflow -n vgg19 - - Downloading file [./vgg_19_2016_08_28.tar.gz] from [http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz] - progress: 520592.0 KB downloaded, 100% - Model saved in file: ./imagenet_vgg19.ckpt - ``` - **NOTICE:** _the model name after the **'-n'** argument must be the models appearence in the below model collection._ - -3. Convert model architecture(*.ckpt.meta) and weights(.ckpt) from Tensorflow to IR - ```bash - $ mmtoir -f tensorflow -d vgg19 -n imagenet_vgg19.ckpt.meta -w imagenet_vgg19.ckpt --dstNodeName MMdnn_Output - - Parse file [imagenet_vgg19.ckpt.meta] with binary format successfully. - Tensorflow model file [imagenet_vgg19.ckpt.meta] loaded successfully. - Tensorflow checkpoint file [imagenet_vgg19.ckpt] loaded successfully. [38] variables loaded. - IR network structure is saved as [vgg19.json]. - IR network structure is saved as [vgg19.pb]. - IR weights are saved as [vgg19.npy]. - ``` -4. Convert models from IR to PyTorch code snippet and weights - ```bash - $ mmtocode -f pytorch -n vgg19.pb --IRWeightPath vgg19.npy --dstModelPath pytorch_vgg19.py -dw pytorch_vgg19.npy - - Parse file [vgg19.pb] with binary format successfully. - Target network code snippet is saved as [pytorch_vgg19.py]. - Target weights are saved as [pytorch_vgg19.npy]. - ``` -5. Generate PyTorch model from code snippet file and weight file - ```bash - $ mmtomodel -f pytorch -in pytorch_vgg19.py -iw pytorch_vgg19.npy --o pytorch_vgg19.pth - - PyTorch model file is saved as [pytorch_vgg19.pth], generated by [pytorch_vgg19.py] and [pytorch_vgg19.npy]. - Notice that you may need [pytorch_vgg19.py] to load the model back. - ``` -''' - add_code(text_example) - add_code("\n\n") - - -def main(): - parser = argparse.ArgumentParser() - parser.add_argument('-f', '--file', type=str, - default="modelmap2.json", help="the path of json file") - parser.add_argument('-d', '--distFile', type=str, - default="README.md", help="the path of the readme file") - args = parser.parse_args() - - # Generate model converter description - GenerateIntroductionAndTutorial() - - # Generate models list - data = LoadJson(args.file) - GenerateModelsList(data) - save_code(args.distFile) - - -if __name__ == "__main__": - main() -import sys as _sys -import google.protobuf.text_format as text_format -from six import text_type as _text_type - - -def _convert(args): - if args.dstFramework == 'caffe': - from mmdnn.conversion.caffe.caffe_emitter import CaffeEmitter - if args.IRWeightPath is None: - emitter = CaffeEmitter(args.IRModelPath) - else: - assert args.dstWeightPath - emitter = CaffeEmitter((args.IRModelPath, args.IRWeightPath)) - - elif args.dstFramework == 'keras': - from mmdnn.conversion.keras.keras2_emitter import Keras2Emitter - emitter = Keras2Emitter((args.IRModelPath, args.IRWeightPath)) - - elif args.dstFramework == 'tensorflow': - from mmdnn.conversion.tensorflow.tensorflow_emitter import TensorflowEmitter - if args.IRWeightPath is None: - # Convert network architecture only - emitter = TensorflowEmitter(args.IRModelPath) - else: - emitter = TensorflowEmitter((args.IRModelPath, args.IRWeightPath)) - - elif args.dstFramework == 'cntk': - from mmdnn.conversion.cntk.cntk_emitter import CntkEmitter - if args.IRWeightPath is None: - emitter = CntkEmitter(args.IRModelPath) - else: - emitter = CntkEmitter((args.IRModelPath, args.IRWeightPath)) - - elif args.dstFramework == 'coreml': - raise NotImplementedError("CoreML emitter is not finished yet.") - - elif args.dstFramework == 'pytorch': - if not args.dstWeightPath or not args.IRWeightPath: - raise ValueError("Need to set a target weight filename.") - from mmdnn.conversion.pytorch.pytorch_emitter import PytorchEmitter - emitter = PytorchEmitter((args.IRModelPath, args.IRWeightPath)) - - elif args.dstFramework == 'mxnet': - from mmdnn.conversion.mxnet.mxnet_emitter import MXNetEmitter - if args.IRWeightPath is None: - emitter = MXNetEmitter(args.IRModelPath) - else: - if args.dstWeightPath is None: - raise ValueError( - "MXNet emitter needs argument [dstWeightPath(dw)], like -dw mxnet_converted-0000.param") - emitter = MXNetEmitter( - (args.IRModelPath, args.IRWeightPath, args.dstWeightPath)) - elif args.dstFramework == 'onnx': - from mmdnn.conversion.onnx.onnx_emitter import OnnxEmitter - if args.IRWeightPath is None: - raise NotImplementedError("ONNX emitter needs IR weight file") - else: - emitter = OnnxEmitter(args.IRModelPath, args.IRWeightPath) - else: - assert False - - emitter.run(args.dstModelPath, args.dstWeightPath, args.phase) - - return 0 - - -def _get_parser(): - import argparse - - parser = argparse.ArgumentParser( - description='Convert IR model file formats to other format.') - - parser.add_argument( - '--phase', - type=_text_type, - choices=['train', 'test'], - default='test', - help='Convert phase (train/test) for destination toolkits.' - ) - - parser.add_argument( - '--dstFramework', '-f', - type=_text_type, - choices=['caffe', 'caffe2', 'cntk', 'mxnet', 'keras', - 'tensorflow', 'coreml', 'pytorch', 'onnx'], - required=True, - help='Format of model at srcModelPath (default is to auto-detect).') - - parser.add_argument( - '--IRModelPath', '-n', '-in', - type=_text_type, - required=True, - help='Path to the IR network structure file.') - - parser.add_argument( - '--IRWeightPath', '-w', '-iw', - type=_text_type, - required=False, - default=None, - help='Path to the IR network structure file.') - - parser.add_argument( - '--dstModelPath', '-d', '-o', - type=_text_type, - required=True, - help='Path to save the destination model') - - # MXNet - parser.add_argument( - '--dstWeightPath', '-dw', '-ow', - type=_text_type, - default=None, - help='[MXNet] Path to save the destination weight.') - return parser - - -def _main(): - parser = _get_parser() - args = parser.parse_args() - ret = _convert(args) - _sys.exit(int(ret)) # cast to int or else the exit code is always 1 - - -if __name__ == '__main__': - _main() -import sys as _sys -import google.protobuf.text_format as text_format -from six import text_type as _text_type - - -def _convert(args): - if args.framework == 'caffe': - raise NotImplementedError( - "Destination [Caffe] is not implemented yet.") - - elif args.framework == 'keras': - raise NotImplementedError( - "Destination [Keras] is not implemented yet.") - - elif args.framework == 'tensorflow': - raise NotImplementedError( - "Destination [Tensorflow] is not implemented yet.") - - elif args.framework == 'cntk': - raise NotImplementedError("Destination [CNTK] is not implemented yet.") - - elif args.framework == 'coreml': - from mmdnn.conversion.coreml.coreml_emitter import CoreMLEmitter - assert args.inputNetwork is not None - assert args.inputWeight is not None - emitter = CoreMLEmitter(args.inputNetwork, args.inputWeight) - model, in_, out_ = emitter.gen_model( - args.inputNames, - args.outputNames, - image_input_names=set( - args.imageInputNames) if args.imageInputNames else None, - is_bgr=args.isBGR, - red_bias=args.redBias, - blue_bias=args.blueBias, - green_bias=args.greenBias, - gray_bias=args.grayBias, - image_scale=args.scale, - class_labels=args.classInputPath if args.classInputPath else None, - predicted_feature_name=args.predictedFeatureName) - - """ - from google.protobuf import text_format - with open(args.output+'.txt', 'w') as f: - f.write(text_format.MessageToString(model)) - """ - - with open(args.output, 'wb') as f: - model = model.SerializeToString() - f.write(model) - - return 0 - - elif args.framework == 'pytorch': - if not args.dstWeightPath or not args.IRWeightPath: - raise ValueError("Need to set a target weight filename.") - from mmdnn.conversion.pytorch.pytorch_emitter import PytorchEmitter - emitter = PytorchEmitter((args.IRModelPath, args.IRWeightPath)) - - elif args.framework == 'mxnet': - from mmdnn.conversion.mxnet.mxnet_emitter import MXNetEmitter - if args.IRWeightPath == None: - emitter = MXNetEmitter(args.IRModelPath) - else: - emitter = MXNetEmitter( - (args.IRModelPath, args.IRWeightPath, args.inputShape, args.dstWeightPath)) - - else: - assert False - - emitter.run(args.output) - - return 0 - - -def _get_parser(): - import argparse - - parser = argparse.ArgumentParser( - description='Convert IR model file formats to other format.') - - parser.add_argument( - '-f', '--framework', type=_text_type, choices=['coreml'], required=True, - help='Format of model at srcModelPath (default is to auto-detect).' - ) - - parser.add_argument( - '-in', '--inputNetwork', - type=_text_type, - required=True, - help='Path of the IR network architecture file.') - - parser.add_argument( - '-iw', '--inputWeight', - type=_text_type, - required=True, - help='Path to the IR network weight file.') - - parser.add_argument( - '-o', '--output', - type=_text_type, - required=True, - help='Path to save the destination model') - - # Caffe - parser.add_argument( - '--phase', type=_text_type, choices=['train', 'test'], default='test', - help='[Caffe] Convert phase (train/test) for destination toolkits.' - ) - - # For CoreML - parser.add_argument('--inputNames', type=_text_type, nargs='*', - help='Names of the feature (input) columns, in order (required for keras models).') - parser.add_argument('--outputNames', type=_text_type, nargs='*', - help='Names of the target (output) columns, in order (required for keras models).') - parser.add_argument('--imageInputNames', type=_text_type, default=[], action='append', - help='Label the named input as an image. Can be specified more than once for multiple image inputs.') - parser.add_argument('--isBGR', action='store_true', default=False, - help='True if the image data in BGR order (RGB default)') - parser.add_argument('--redBias', type=float, default=0.0, - help='Bias value to be added to the red channel (optional, default 0.0)') - parser.add_argument('--blueBias', type=float, default=0.0, - help='Bias value to be added to the blue channel (optional, default 0.0)') - parser.add_argument('--greenBias', type=float, default=0.0, - help='Bias value to be added to the green channel (optional, default 0.0)') - parser.add_argument('--grayBias', type=float, default=0.0, - help='Bias value to be added to the gray channel for Grayscale images (optional, default 0.0)') - parser.add_argument('--scale', type=float, default=1.0, - help='Value by which the image data must be scaled (optional, default 1.0)') - parser.add_argument('--classInputPath', type=_text_type, default='', - help='Path to class labels (ordered new line separated) for treating the neural network as a classifier') - parser.add_argument('--predictedFeatureName', type=_text_type, default='class_output', - help='Name of the output feature that captures the class name (for classifiers models).') - return parser - - -def _main(): - parser = _get_parser() - args = parser.parse_args() - ret = _convert(args) - _sys.exit(int(ret)) # cast to int or else the exit code is always 1 - - -if __name__ == '__main__': - _main() -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import sys as _sys -import argparse -import mmdnn.conversion._script.convertToIR as convertToIR -import mmdnn.conversion._script.IRToCode as IRToCode -import mmdnn.conversion._script.IRToModel as IRToModel -from six import text_type as _text_type -import uuid -import os - - -def _get_parser(): - parser = argparse.ArgumentParser() - - parser.add_argument( - '--srcFramework', '-sf', - type=_text_type, - choices=["caffe", "caffe2", "cntk", "mxnet", - "keras", "tensorflow", 'tf', 'pytorch'], - help="Source toolkit name of the model to be converted.") - parser.add_argument( - '--inputWeight', '-iw', - type=_text_type, - default=None, - help='Path to the model weights file of the external tool (e.g caffe weights proto binary, keras h5 binary') - parser.add_argument( - '--inputNetwork', '-in', - type=_text_type, - default=None, - help='Path to the model network file of the external tool (e.g caffe prototxt, keras json') - parser.add_argument( - '--dstFramework', '-df', - type=_text_type, - choices=['caffe', 'caffe2', 'cntk', 'mxnet', 'keras', - 'tensorflow', 'coreml', 'pytorch', 'onnx'], - required=True, - help='Format of model at srcModelPath (default is to auto-detect).') - parser.add_argument( - '--outputModel', '-om', - type=_text_type, - required=True, - help='Path to save the destination model') - parser.add_argument( - '--dump_tag', - type=_text_type, - default=None, - help='Tensorflow model dump type', - choices=['SERVING', 'TRAINING']) - - return parser - - -def _extract_ir_args(args, unknown_args, temp_filename): - unknown_args.extend(['--srcFramework', args.srcFramework]) - if args.inputWeight is not None: - unknown_args.extend(['--weights', args.inputWeight]) - if args.inputNetwork is not None: - unknown_args.extend(['--network', args.inputNetwork]) - unknown_args.extend(['--dstPath', temp_filename]) - - ir_parser = convertToIR._get_parser() - return ir_parser.parse_known_args(unknown_args) - - -def _extract_code_args(args, unknown_args, temp_filename, network_filename): - unknown_args.extend(['--dstFramework', args.dstFramework]) - unknown_args.extend(['--IRModelPath', temp_filename + '.pb']) - unknown_args.extend(['--IRWeightPath', temp_filename + '.npy']) - unknown_args.extend(['--dstModelPath', network_filename + '.py']) - unknown_args.extend(['--dstWeightPath', temp_filename + '.npy']) - code_parser = IRToCode._get_parser() - return code_parser.parse_known_args(unknown_args) - - -def _extract_model_args(args, unknown_args, temp_filename): - unknown_args.extend(['--framework', args.dstFramework]) - unknown_args.extend(['--inputNetwork', temp_filename + '.pb']) - unknown_args.extend(['--inputWeight', temp_filename + '.npy']) - unknown_args.extend(['--output', args.outputModel]) - model_parser = IRToModel._get_parser() - return model_parser.parse_known_args(unknown_args) - - -def remove_temp_files(temp_filename, verbose=False): - exts = ['.json', '.pb', '.npy', '.py'] - # exts = ['.pb', '.npy', '.py'] - for ext in exts: - temp_file = temp_filename + ext - if os.path.isfile(temp_file): - os.remove(temp_file) - if verbose: - print( - 'temporary file [{}] has been removed.'.format(temp_file)) - - -def get_network_filename(framework, temp_filename, output_model_filename): - if framework in ['pytorch']: - return os.path.join(os.path.dirname(output_model_filename), os.path.basename(output_model_filename).split('.')[0]) - return temp_filename - - -def _main(): - parser = _get_parser() - args, unknown_args = parser.parse_known_args() - temp_filename = uuid.uuid4().hex - ir_args, unknown_args = _extract_ir_args(args, unknown_args, temp_filename) - ret = convertToIR._convert(ir_args) - if int(ret) != 0: - _sys.exit(int(ret)) - if args.dstFramework != 'coreml': - network_filename = get_network_filename( - args.dstFramework, temp_filename, args.outputModel) - code_args, unknown_args = _extract_code_args( - args, unknown_args, temp_filename, network_filename) - ret = IRToCode._convert(code_args) - if int(ret) != 0: - _sys.exit(int(ret)) - from mmdnn.conversion._script.dump_code import dump_code - dump_code(args.dstFramework, network_filename + '.py', - temp_filename + '.npy', args.outputModel, args.dump_tag) - remove_temp_files(temp_filename) - - else: - model_args, unknown_args = _extract_model_args( - args, unknown_args, temp_filename) - ret = IRToModel._convert(model_args) - remove_temp_files(temp_filename) - _sys.exit(int(ret)) - - -if __name__ == '__main__': - _main() -import sys as _sys -import google.protobuf.text_format as text_format -from six import text_type as _text_type - - -def _convert(args): - if args.inputShape != None: - inputshape = [] - for x in args.inputShape: - shape = x.split(',') - inputshape.append([int(x) for x in shape]) - else: - inputshape = [None] - if args.srcFramework == 'caffe': - from mmdnn.conversion.caffe.transformer import CaffeTransformer - transformer = CaffeTransformer( - args.network, args.weights, "tensorflow", inputshape[0], phase=args.caffePhase) - graph = transformer.transform_graph() - data = transformer.transform_data() - - from mmdnn.conversion.caffe.writer import JsonFormatter, ModelSaver, PyWriter - JsonFormatter(graph).dump(args.dstPath + ".json") - print("IR network structure is saved as [{}.json].".format( - args.dstPath)) - - prototxt = graph.as_graph_def().SerializeToString() - with open(args.dstPath + ".pb", 'wb') as of: - of.write(prototxt) - print("IR network structure is saved as [{}.pb].".format(args.dstPath)) - - import numpy as np - with open(args.dstPath + ".npy", 'wb') as of: - np.save(of, data) - print("IR weights are saved as [{}.npy].".format(args.dstPath)) - - return 0 - - elif args.srcFramework == 'caffe2': - raise NotImplementedError("Caffe2 is not supported yet.") - - elif args.srcFramework == 'keras': - if args.network != None: - model = (args.network, args.weights) - else: - model = args.weights - - from mmdnn.conversion.keras.keras2_parser import Keras2Parser - parser = Keras2Parser(model) - - elif args.srcFramework == 'tensorflow' or args.srcFramework == 'tf': - assert args.network or args.weights - if not args.network: - if args.dstNodeName is None: - raise ValueError( - "Need to provide the output node of Tensorflow model.") - if args.inNodeName is None: - raise ValueError( - "Need to provide the input node of Tensorflow model.") - if inputshape is None: - raise ValueError( - "Need to provide the input node shape of Tensorflow model.") - assert len(args.inNodeName) == len(inputshape) - from mmdnn.conversion.tensorflow.tensorflow_frozenparser import TensorflowParser2 - parser = TensorflowParser2( - args.weights, inputshape, args.inNodeName, args.dstNodeName) - - else: - from mmdnn.conversion.tensorflow.tensorflow_parser import TensorflowParser - if args.inNodeName and inputshape[0]: - parser = TensorflowParser( - args.network, args.weights, args.dstNodeName, inputshape[0], args.inNodeName) - else: - parser = TensorflowParser( - args.network, args.weights, args.dstNodeName) - - elif args.srcFramework == 'mxnet': - assert inputshape != None - if args.weights == None: - model = (args.network, inputshape[0]) - else: - import re - if re.search('.', args.weights): - args.weights = args.weights[:-7] - prefix, epoch = args.weights.rsplit('-', 1) - model = (args.network, prefix, epoch, inputshape[0]) - - from mmdnn.conversion.mxnet.mxnet_parser import MXNetParser - parser = MXNetParser(model) - - elif args.srcFramework == 'cntk': - from mmdnn.conversion.cntk.cntk_parser import CntkParser - model = args.network or args.weights - parser = CntkParser(model) - - elif args.srcFramework == 'pytorch': - assert inputshape != None - from mmdnn.conversion.pytorch.pytorch_parser import PytorchParser - model = args.network or args.weights - assert model != None - parser = PytorchParser(model, inputshape[0]) - - elif args.srcFramework == 'torch' or args.srcFramework == 'torch7': - from mmdnn.conversion.torch.torch_parser import TorchParser - model = args.network or args.weights - assert model != None - parser = TorchParser(model, inputshape[0]) - - elif args.srcFramework == 'onnx': - from mmdnn.conversion.onnx.onnx_parser import ONNXParser - parser = ONNXParser(args.network) - - elif args.srcFramework == 'darknet': - from mmdnn.conversion.darknet.darknet_parser import DarknetParser - parser = DarknetParser(args.network, args.weights, args.darknetStart) - - elif args.srcFramework == 'coreml': - from mmdnn.conversion.coreml.coreml_parser import CoremlParser - parser = CoremlParser(args.network) - - else: - raise ValueError("Unknown framework [{}].".format(args.srcFramework)) - - parser.run(args.dstPath) - - return 0 - - -def _get_parser(): - import argparse - - parser = argparse.ArgumentParser( - description='Convert other model file formats to IR format.') - - parser.add_argument( - '--srcFramework', '-f', - type=_text_type, - choices=["caffe", "caffe2", "cntk", "mxnet", "keras", "tensorflow", - 'tf', 'torch', 'torch7', 'onnx', 'darknet', 'coreml', 'pytorch'], - help="Source toolkit name of the model to be converted.") - - parser.add_argument( - '--weights', '-w', '-iw', - type=_text_type, - default=None, - help='Path to the model weights file of the external tool (e.g caffe weights proto binary, keras h5 binary') - - parser.add_argument( - '--network', '-n', '-in', - type=_text_type, - default=None, - help='Path to the model network file of the external tool (e.g caffe prototxt, keras json') - - parser.add_argument( - '--dstPath', '-d', '-o', - type=_text_type, - required=True, - help='Path to save the IR model.') - - parser.add_argument( - '--inNodeName', '-inode', - nargs='+', - type=_text_type, - default=None, - help="[Tensorflow] Input nodes' name of the graph.") - - parser.add_argument( - '--dstNodeName', '-node', - nargs='+', - type=_text_type, - default=None, - help="[Tensorflow] Output nodes' name of the graph.") - - parser.add_argument( - '--inputShape', - nargs='+', - type=_text_type, - default=None, - help='[Tensorflow/MXNet/Caffe2/Torch7] Input shape of model (channel, height, width)') - - # Caffe - parser.add_argument( - '--caffePhase', - type=_text_type, - default='TRAIN', - help='[Caffe] Convert the specific phase of caffe model.') - - # Darknet - parser.add_argument( - '--darknetStart', - type=_text_type, - choices=["0", "1"], - help='[Darknet] Parse the darkent model weight file from the start.') - - return parser - - -def _main(): - parser = _get_parser() - args = parser.parse_args() - ret = _convert(args) - _sys.exit(int(ret)) # cast to int or else the exit code is always 1 - - -if __name__ == '__main__': - _main() -import sys as _sys -from six import text_type as _text_type -import sys -import imp -import os.path - - -def dump_code(framework, network_filepath, weight_filepath, dump_filepath, dump_tag): - if network_filepath.endswith('.py'): - network_filepath = network_filepath[:-3] - sys.path.insert(0, os.path.dirname(os.path.abspath(network_filepath))) - MainModel = imp.load_source('MainModel', network_filepath + '.py') - if framework == 'caffe': - from mmdnn.conversion.caffe.saver import save_model - elif framework == 'cntk': - from mmdnn.conversion.cntk.saver import save_model - elif framework == 'keras': - from mmdnn.conversion.keras.saver import save_model - elif framework == 'mxnet': - from mmdnn.conversion.mxnet.saver import save_model - elif framework == 'pytorch': - from mmdnn.conversion.pytorch.saver import save_model - elif framework == 'tensorflow': - from mmdnn.conversion.tensorflow.saver import save_model - save_model(MainModel, network_filepath, - weight_filepath, dump_filepath, dump_tag) - return 0 - - elif framework == 'onnx': - from mmdnn.conversion.onnx.saver import save_model - else: - raise NotImplementedError( - "{} saver is not finished yet.".format(framework)) - save_model(MainModel, network_filepath, weight_filepath, dump_filepath) - - return 0 - - -def _get_parser(): - import argparse - - parser = argparse.ArgumentParser( - description='Dump the model code into target model.') - - parser.add_argument( - '-f', '--framework', type=_text_type, choices=["caffe", "cntk", "mxnet", "keras", "tensorflow", 'pytorch', 'onnx'], - required=True, - help='Format of model at srcModelPath (default is to auto-detect).' - ) - - parser.add_argument( - '-in', '--inputNetwork', - type=_text_type, - required=True, - help='Path to the model network architecture file.') - - parser.add_argument( - '-iw', '--inputWeight', - type=_text_type, - required=True, - help='Path to the model network weight file.') - - parser.add_argument( - '-o', '-om', '--outputModel', - type=_text_type, - required=True, - help='Path to save the target model') - - parser.add_argument( - '--dump_tag', - type=_text_type, - default=None, - help='Tensorflow model dump type', - choices=['SERVING', 'TRAINING']) - - return parser - - -def _main(): - parser = _get_parser() - args = parser.parse_args() - ret = dump_code(args.framework, args.inputNetwork, - args.inputWeight, args.outputModel, args.dump_tag) - _sys.exit(int(ret)) - - -if __name__ == '__main__': - _main() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from six import text_type as _text_type - - -def generate_label(predict, label_file, offset): - import os - - if not os.path.exists(label_file): - return predict - - with open(label_file, 'r') as f: - labels = [l.rstrip() for l in f] - - ret = [] - for i, j in predict: - ret.append((labels[i - offset], i, j)) - - return ret - - -def extract_model(args): - if args.framework == 'caffe': - from mmdnn.conversion.examples.caffe.extractor import caffe_extractor - extractor = caffe_extractor() - - elif args.framework == 'keras': - from mmdnn.conversion.examples.keras.extractor import keras_extractor - extractor = keras_extractor() - - elif args.framework == 'tensorflow' or args.framework == 'tf': - from mmdnn.conversion.examples.tensorflow.extractor import tensorflow_extractor - extractor = tensorflow_extractor() - - elif args.framework == 'mxnet': - from mmdnn.conversion.examples.mxnet.extractor import mxnet_extractor - extractor = mxnet_extractor() - - elif args.framework == 'cntk': - from mmdnn.conversion.examples.cntk.extractor import cntk_extractor - extractor = cntk_extractor() - - elif args.framework == 'pytorch': - from mmdnn.conversion.examples.pytorch.extractor import pytorch_extractor - extractor = pytorch_extractor() - - elif args.framework == 'darknet': - from mmdnn.conversion.examples.darknet.extractor import darknet_extractor - extractor = darknet_extractor() - - elif args.framework == 'coreml': - from mmdnn.conversion.examples.coreml.extractor import coreml_extractor - extractor = coreml_extractor() - - else: - raise ValueError("Unknown framework [{}].".format(args.framework)) - - files = extractor.download(args.network, args.path) - - if files and args.image: - predict = extractor.inference( - args.network, files, args.path, args.image) - if type(predict) == list: - print(predict) - - else: - if predict.ndim == 1: - if predict.shape[0] == 1001: - offset = 1 - else: - offset = 0 - top_indices = predict.argsort()[-5:][::-1] - predict = [(i, predict[i]) for i in top_indices] - predict = generate_label(predict, args.label, offset) - - for line in predict: - print(line) - - else: - print(predict.shape) - print(predict) - - -def _main(): - import argparse - - parser = argparse.ArgumentParser( - description='Extract pre-trained models for frameworks.') - - parser.add_argument( - '--framework', '-f', - type=_text_type, - required=True, - choices=["caffe", "cntk", "mxnet", "keras", - "tensorflow", 'tf', 'pytorch', 'darknet', 'coreml'], - help="Framework name") - - parser.add_argument( - '--network', '-n', - type=_text_type, - default=None, - help='Path to the model network file of the external tool (e.g caffe prototxt, keras json') - - parser.add_argument( - '-i', '--image', - type=_text_type, help='Test Image Path') - - parser.add_argument( - '--path', '-p', '-o', - type=_text_type, - default='./', - help='Path to save the pre-trained model files (e.g keras h5)') - - parser.add_argument( - '-l', '--label', - type=_text_type, - default='mmdnn/conversion/examples/data/imagenet_1000.txt', - help='Path of label.') - - args = parser.parse_args() - extract_model(args) - - -if __name__ == '__main__': - _main() -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import division - -import os -import sys -import math -import numpy as np - -import caffe -from caffe import layers as L -from caffe import params as P -from mmdnn.conversion.common.IR.IR_graph import IRGraph, IRGraphNode -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.emitter import Emitter -from mmdnn.conversion.common.utils import * - - -class CaffeEmitter(Emitter): - - def __init__(self, model): - from six import string_types as _string_types - super(CaffeEmitter, self).__init__() - if isinstance(model, _string_types): - network_path = model - else: - network_path = model[0] - self._load_weights(model[1]) - - self.IR_graph = IRGraph(network_path) - super(CaffeEmitter, self)._build() - - @property - def header_code(self): - return """from __future__ import print_function -import numpy as np -import sys, argparse -import caffe -from caffe import layers as L -from caffe import params as P -from caffe import to_proto -from six import text_type as _text_type - - -__weights_dict = dict() - -def load_weights(weight_file): - if weight_file == None: - return - - try: - weights_dict = np.load(weight_file).item() - except: - weights_dict = np.load(weight_file, encoding='bytes').item() - - return weights_dict - - -def KitModel(weight_file = None): - n = caffe.NetSpec() -""" - - @property - def end_code(self): - return """ return n - -def make_net(prototxt): - n = KitModel() - with open(prototxt, 'w') as fpb: - print(n.to_proto(), file=fpb) - -def gen_weight(weight_file, model, prototxt): - global __weights_dict - __weights_dict = load_weights(weight_file) - - net = caffe.Net(prototxt, caffe.TRAIN) - - for key in __weights_dict: - if 'weights' in __weights_dict[key]: - net.params[key][0].data.flat = __weights_dict[key]['weights'] - elif 'mean' in __weights_dict[key]: - net.params[key][0].data.flat = __weights_dict[key]['mean'] - net.params[key][1].data.flat = __weights_dict[key]['var'] - if 'scale' in __weights_dict[key]: - net.params[key][2].data.flat = __weights_dict[key]['scale'] - elif 'scale' in __weights_dict[key]: - net.params[key][0].data.flat = __weights_dict[key]['scale'] - if 'bias' in __weights_dict[key]: - net.params[key][1].data.flat = __weights_dict[key]['bias'] - if 'gamma' in __weights_dict[key]: # used for prelu, not sure if other layers use this too - net.params[key][0].data.flat = __weights_dict[key]['gamma'] - net.save(model) - return net - - - -if __name__=='__main__': - parser = argparse.ArgumentParser(description='Generate caffe model and prototxt') - parser.add_argument('--weight_file', '-w', type=_text_type, default='IR weight file') - parser.add_argument('--prototxt', '-p', type=_text_type, default='caffe_converted.prototxt') - parser.add_argument('--model', '-m', type=_text_type, default='caffe_converted.caffemodel') - args = parser.parse_args() - # For some reason argparser gives us unicode, so we need to conver to str first - make_net(str(args.prototxt)) - gen_weight(str(args.weight_file), str(args.model), str(args.prototxt)) - -""" - - def gen_code(self, phase='test'): - self.phase = phase - self.add_body(0, self.header_code) - - # for test - # with open("graph.txt", 'w') as f: - # for layer in self.IR_graph.topological_sort: - # current_node = self.IR_graph.get_node(layer) - # print("========current_node=========\n{}".format(current_node.layer), file=f) - # test end - - for layer in self.IR_graph.topological_sort: - current_node = self.IR_graph.get_node(layer) - node_type = current_node.type - # print("========current_node={}".format(current_node.layer)) - - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - func(current_node) - else: - print( - "CaffeEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(current_node) - - self.add_body(0, "") - self.add_body(0, self.end_code) - - return self.body_code - - def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): - super(CaffeEmitter, self).run(dstNetworkPath, dstWeightPath, phase) - if self.weight_loaded: - self.save_weights(self.weights_dict, dstWeightPath) - - @staticmethod - def _shapeToStr(shapes): - return [dim.size if dim.size > 0 else 1 for dim in shapes.dim] - - def _get_symmetric_padding(self, IR_node): - stride_h = IR_node.get_attr('strides')[1] - stride_w = IR_node.get_attr('strides')[2] - - # check if have pad layer - IR_parent_node = self.IR_graph.get_parent(IR_node.name, [0]) - if IR_parent_node.type == 'Pad': - pads = IR_parent_node.get_attr('pads') - else: - pads = IR_node.get_attr('pads') - - # Pad_h < kernel_h (vgg19 caffe2caffe) - if IR_node.type == "Pool": - if pads[1]: - pad_h = pads[1] + (0 if pads[1] == pads[5] else stride_h) - else: - pad_h = 0 - if pads[2]: - pad_w = pads[2] + (0 if pads[2] == pads[6] else stride_w) - else: - pad_w = 0 - else: - pad_h = pads[1] + (0 if pads[1] == pads[5] else stride_h) - pad_w = pads[2] + (0 if pads[2] == pads[6] else stride_w) - - return pad_h, pad_w - - def check_if_need_transpose(self, IR_node): - parent = self.IR_graph.get_parent(IR_node.name, [0]) - while parent.type == 'Flatten' or parent.type == 'Dropout' or parent.type == 'Reshape': - parent = self.IR_graph.get_parent(parent.name, [0]) - dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) - if dim > 2: - original_dims = self.weights_dict[IR_node.name]['weights'].shape - dims = [ - i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1] - self.weights_dict[IR_node.name]['weights'] = np.reshape( - self.weights_dict[IR_node.name]['weights'], dims) - self.weights_dict[IR_node.name]['weights'] = np.transpose( - self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) - self.weights_dict[IR_node.name]['weights'] = np.reshape( - self.weights_dict[IR_node.name]['weights'], original_dims) - - def emit_Conv(self, IR_node): - # implement asymmetric paddings by applying symmetric padding then cropping - pad_h, pad_w = self._get_symmetric_padding(IR_node) - - num_output = IR_node.get_attr('kernel_shape')[-1] - if IR_node.type == "DepthwiseConv": - num_group = IR_node.get_attr("kernel_shape")[-2] - num_output = IR_node.get_attr('kernel_shape')[-2] - else: - num_group = IR_node.get_attr("group", 1) - - self.add_body(1, "n.{:<15} = L.Convolution(n.{}, kernel_h={}, kernel_w={}, stride={}, num_output={}, pad_h={}, pad_w={}, group={}, \ - bias_term={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('kernel_shape')[0], - IR_node.get_attr('kernel_shape')[1], - IR_node.get_attr('strides')[1], - num_output, - pad_h, - pad_w, - num_group, - IR_node.get_attr('use_bias', False))) - - dim = len(IR_node.get_attr('strides')) - 2 - if self.weight_loaded: - if IR_node.type == "DepthwiseConv": - self.weights_dict[IR_node.name]['weights'] = np.swapaxes( - self.weights_dict[IR_node.name]['weights'], -1, -2) - self.weights_dict[IR_node.name]['weights'] = np.transpose( - self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) - self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( - IR_node.name) - - self.check_if_need_crop(IR_node) - # keys = [] - # for key in self.weights_dict[IR_node.name].keys(): - # keys.append(key) - # print("=======Layer: {}, keys: {}".format(IR_node.name, keys)) - - def compute_output_shape(self, IR_node, kernel_h, kernel_w): - parent_node = self.IR_graph.get_parent(IR_node.name, [0]) - - if parent_node.get_attr('_output_shapes'): - shape = parent_node.get_attr('_output_shapes')[0] - shape = shape_to_list(shape) - h_i = shape[1] - w_i = shape[2] - pad_h, pad_w = self._get_symmetric_padding(IR_node) - stride_h = IR_node.get_attr('strides')[1] - stride_w = IR_node.get_attr('strides')[2] - - if IR_node.type == 'Pool': - h_o = (h_i + 2 * pad_h - kernel_h + - stride_h - 1) // stride_h + 1 - w_o = (w_i + 2 * pad_w - kernel_w + - stride_w - 1) // stride_w + 1 - else: - h_o = (h_i + 2 * pad_h - kernel_h) // stride_h + 1 - w_o = (w_i + 2 * pad_w - kernel_w) // stride_w + 1 - return h_o, w_o - else: - assert False - - def check_if_need_crop(self, IR_node): - shape = IR_node.get_attr('_output_shapes')[0] - shape = shape_to_list(shape) - ir_ho = shape[1] - ir_wo = shape[2] - if ir_ho < 0 or ir_wo < 0: - return - if IR_node.type == 'Pool': - k_h = IR_node.get_attr('kernel_shape')[1] - k_w = IR_node.get_attr('kernel_shape')[2] - else: - k_h = IR_node.get_attr('kernel_shape')[0] - k_w = IR_node.get_attr('kernel_shape')[1] - - caffe_ho, caffe_wo = self.compute_output_shape(IR_node, k_h, k_w) - - # if asymmetric padding, set offset to 1 - pads = IR_node.get_attr('pads') - offset = [0 if pads[1] == pads[5] else 1, - 0 if pads[2] == pads[6] else 1] - if caffe_ho > ir_ho or caffe_wo > ir_wo: - crop_layer_variable_name = IR_node.variable_name + "_crop" - self.add_body(1, "n.{:<15} = L.Crop(n.{}, L.DummyData(shape=[dict(dim=[1, {}, {}, {}])], \ - ntop=1), ntop=1, offset={})".format( - crop_layer_variable_name, - IR_node.variable_name, - shape[3], - ir_ho, - ir_wo, - offset - )) - # Change the layer name - IR_node.real_name = IR_node.real_name + "_crop" - - def emit_Pool(self, IR_node): - pooling_type = IR_node.get_attr('pooling_type') - if pooling_type == 'MAX': - pooling_type = P.Pooling.MAX - elif pooling_type == 'AVG': - pooling_type = P.Pooling.AVE - elif pooling_type == 'STOCHASTIC': - pooling_type = P.Pooling.STOCHASTIC - else: - raise ValueError() - - if IR_node.layer.attr['global_pooling'].b: - self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, stride={}, global_pooling=True, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - pooling_type, - IR_node.get_attr('strides')[1])) - else: - pad_h, pad_w = self._get_symmetric_padding(IR_node) - pool_size = IR_node.get_attr('kernel_shape')[1:3] - if pool_size[0] != pool_size[1]: - self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, kernel_h={}, kernel_w={}, pad_h={}, pad_w={}, stride={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - pooling_type, - pool_size[0], - pool_size[1], - pad_h, - pad_w, - IR_node.get_attr('strides')[1])) - else: - self.add_body(1, "n.{:<15} = L.Pooling(n.{}, pool={}, kernel_size={}, pad_h={}, pad_w={}, stride={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - pooling_type, - pool_size[0], - pad_h, - pad_w, - IR_node.get_attr('strides')[1])) - - # check if need crop output shape - self.check_if_need_crop(IR_node) - - def emit_ResizeBilinear(self, IR_node): - shape = IR_node.get_attr("_output_shapes")[0] - shape = shape_to_list(shape) - self.add_body(1, "n.{:<15} = L.ResizeBilinear(n.{}, height={}, width={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - shape[1], - shape[2])) - - def emit_UNKNOWN(self, IR_node): - print(IR_node.IR_layer.name) - - def emit_DataInput(self, IR_node): - shape = self._shapeToStr(IR_node.get_attr('shape')) - shape = [shape[0], shape[-1]] + shape[1:-1] - self.add_body(1, "n.{:<15} = L.Input(shape=[dict(dim={})], ntop=1)".format( - IR_node.variable_name, - shape)) - - def emit_Dropout(self, IR_node): - in_place = True - self.add_body(1, "n.{:<15} = L.Dropout(n.{}, dropout_ratio={} , in_place={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - 1 - IR_node.get_attr('keep_prob'), - in_place)) - - def emit_FullyConnected(self, IR_node): - self.add_body(1, "n.{:<15} = L.InnerProduct(n.{}, num_output={}, bias_term={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.layer.attr["units"].i, - IR_node.get_attr('use_bias', False))) - if self.weight_loaded: - self.check_if_need_transpose(IR_node) - self.weights_dict[IR_node.name]['weights'] = np.transpose( - self.weights_dict[IR_node.name]['weights'], (1, 0)) - self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( - IR_node.name) - - def emit_BatchNorm(self, IR_node): - - self.add_body(1, "n.{:<15} = L.BatchNorm(n.{}, eps={}, use_global_stats={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('epsilon'), - self.phase == 'test' - )) - - scale_layer_var_name = IR_node.variable_name + "_scale" - self.add_body(1, "n.{:<15} = L.Scale(n.{}, bias_term={}, in_place=True, ntop=1)".format( - scale_layer_var_name, - IR_node.variable_name, - IR_node.get_attr('bias', False) - )) - - if self.weight_loaded: - self.weights_dict[scale_layer_var_name] = dict() - if 'scale' in self.weights_dict[IR_node.name]: - self.weights_dict[scale_layer_var_name]['scale'] = self.weights_dict[IR_node.name]['scale'] - else: - self.weights_dict[scale_layer_var_name]['scale'] = 1 - - self.weights_dict[IR_node.name]['scale'] = 1 - - if 'bias' in self.weights_dict[IR_node.name]: - self.weights_dict[scale_layer_var_name]['bias'] = self.weights_dict[IR_node.name]['bias'] - self.weights_dict[IR_node.name].pop('bias', None) - # change the key "name" to "variable_name", in case of the layer name has invalid characters - - self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( - IR_node.name) - - IR_node.real_name = IR_node.name + "_scale" - - def emit_Scale(self, IR_node): - self.add_body(1, "n.{:<15} = L.Scale(n.{}, bias_term={}, in_place=True, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('use_bias', False) - )) - if self.weight_loaded: - self.weights_dict[IR_node.variable_name] = self.weights_dict.pop( - IR_node.name) - - def emit_Constant(self, IR_node): - if IR_node.get_attr('value'): - value = IR_node.get_attr('value') - else: - value = self.weights_dict[IR_node.name]['value'][0] - IR_node_after = self.IR_graph.get_son(IR_node.name, [0]) - shape = IR_node_after.get_attr("_output_shapes")[0] - shape = shape_to_list(shape) - if len(shape) == 4: - shape[1], shape[3] = shape[3], shape[1] - shape[0] = 1 - shape = list(map(lambda x: str(x), shape)) - - self.add_body(1, "n.{:<15} = L.DummyData(shape=[dict(dim=[{}])], data_filler=dict(type='constant', value={}), ntop=1)".format( - IR_node.variable_name, - ', '.join(shape), - value - )) - - def emit_LRN(self, IR_node): - self.add_body(1, "n.{:<15} = L.LRN(n.{}, local_size={}, alpha={}, beta={}, k={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('size') * 2 - 1, - IR_node.get_attr('alpha'), - IR_node.get_attr('beta'), - IR_node.get_attr('k') - )) - - def emit_Add(self, IR_node): - input_layers = ', '.join(('n.' + self.IR_graph.get_parent(IR_node.name, [ - num]).real_variable_name) for num in range(0, len(IR_node.in_edges))) - self.add_body(1, "n.{:<15} = L.Eltwise({}, operation=1, ntop=1)".format( - IR_node.variable_name, - input_layers, - )) - - def emit_Flatten(self, IR_node): - self.add_body(1, "n.{:<15} = L.Flatten(n.{})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - )) - - def emit_Squeeze(self, IR_node): - shape = IR_node.get_attr("_output_shapes")[0] - shape = shape_to_list(shape) - if shape: - dim_str = "'dim': {}".format(shape) - dim_str = " reshape_param={'shape': { " + dim_str + '} }' - self.add_body(1, "n.{:<15} = L.Reshape(n.{}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - dim_str - )) - else: - IR_node.real_name = self.IR_graph.get_parent( - IR_node.name, [0]).real_name - - def emit_Concat(self, IR_node): - axis_array = (2, 3, 1, 0) - axis = axis_array.index(IR_node.get_attr('axis')) - input_layers = ', '.join( - ('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges) - self.add_body(1, "n.{:<15} = L.Concat({}, axis={})".format( - IR_node.variable_name, - input_layers, - axis - )) - - def emit_Sigmoid(self, IR_node): - self.add_body(1, "n.{:<15} = L.Sigmoid(n.{}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node))) - - def emit_Relu(self, IR_node): - in_place = True - self.add_body(1, "n.{:<15} = L.ReLU(n.{}, in_place={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - in_place)) - - def emit_LeakyRelu(self, IR_node): - in_place = True - self.add_body(1, "n.{:<15} = L.ReLU(n.{}, in_place={}, negative_slope={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - in_place, - IR_node.IR_layer.attr['alpha'].f)) - - def emit_PRelu(self, IR_node): - in_place = True - self.add_body(1, "n.{:<15} = L.PReLU(n.{}, in_place={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - in_place)) - - def emit_Tanh(self, IR_node): - self.add_body(1, "n.{:<15} = L.TanH(n.{}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node))) - - def emit_Softmax(self, IR_node): - self.add_body(1, "n.{:<15} = L.Softmax(n.{}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node))) - - def emit_Pad(self, IR_node): - IR_node.real_name = self.IR_graph.get_parent( - IR_node.name, [0]).real_name - - def reduction(self, IR_node, op, axes): - # Convert NHWC (IR) to NCHW (Caffe): [0,1,2,3]->[0,3,1,2] - if len(axes) == 1: - assert (axes[0] == 2) - elif len(axes) == 2: - assert ((axes[0] == 1) and (axes[1] == 2)) - - self.add_body(1, "n.{:<15} = L.Reduction(n.{}, operation={} , axis={} ,ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - op, - len(axes))) - - if IR_node.get_attr('keepdims') == True: - shape = IR_node.get_attr("_output_shapes")[0] - shape = shape_to_list(shape) - shape = [1] + [shape[-1]] + shape[1:-1] - dim_str = "'dim': {}".format(shape) - dim_str = "{'shape': { " + dim_str + '} }' - self.add_body(1, "n.{:<15} = L.Reshape(n.{}, reshape_param={}) ".format( - IR_node.variable_name + "_reshape", - IR_node.real_variable_name, - dim_str)) - IR_node.real_name = IR_node.real_name + '_reshape' - - def emit_ReduceMean(self, IR_node): - self.reduction(IR_node, 4, IR_node.get_attr('axes')) - - def emit_ReduceSum(self, IR_node): - self.reduction(IR_node, 1, IR_node.get_attr('axes')) - - def emit_Relu6(self, IR_node): - self.emit_Relu(IR_node) - - def emit_DepthwiseConv(self, IR_node): - self.emit_Conv(IR_node) - - def emit_Const(self, IR_node): - pass - - def emit_Shape(self, IR_node): - pass - - def emit_Reshape(self, IR_node): - shape = IR_node.get_attr("_output_shapes")[0] - shape = shape_to_list(shape) - if shape: - dim_str = "'dim': {}".format(shape) - dim_str = " reshape_param={'shape': { " + dim_str + '} }' - self.add_body(1, "n.{:<15} = L.Reshape(n.{}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - dim_str - )) - else: - IR_node.real_name = self.IR_graph.get_parent( - IR_node.name, [0]).real_name - - def emit_Slice(self, IR_node): - pass - - def emit_Pack(self, IR_node): - pass - - def emit_Abs(self, IR_node): - self.add_body(1, "n.{:<15} = L.AbsVal(n.{}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node))) - - def emit_Sub(self, IR_node): - input_layers = ', '.join( - ('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges) - self.add_body(1, "n.{:<15} = L.Eltwise({}, coeff = [1, -1], ntop=1)".format( - IR_node.variable_name, - input_layers)) - - def emit_Mul(self, IR_node): - if len(IR_node.in_edges) == 2: - input_layers = ', '.join( - ('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges) - self.add_body(1, "n.{:<15} = L.Eltwise({}, operation=0, ntop=1)".format( - IR_node.variable_name, - input_layers)) - elif len(IR_node.in_edges) == 1: - self.emit_Scale(IR_node) - else: - assert False - - def emit_UpSampling2D(self, IR_node): - scales = IR_node.get_attr('scales') - scale = tuple(scales)[0] - - shape = IR_node.get_attr('_output_shapes')[0] - shape = shape_to_list(shape) - - self.add_body(1, "n.{:<15} = L.Deconvolution(n.{}, convolution_param=dict(kernel_size={}, stride={}, pad={}, num_output={}, group={}, bias_term={}), param=[dict(lr_mult=0)], ntop=1)".format( - IR_node.variable_name, - IR_node.in_edges[0], - 2 * scale - scale % 2, - scale, - int(math.ceil((scale - 1) / 2)), - shape[-1], - shape[-1], - False)) - - # def emit_Square(self, IR_node): - # input_layers = ', '.join(('n.' + self.IR_graph.get_node(edge).real_variable_name) for edge in IR_node.in_edges) - # self.add_body(1, "n.{:<15} = L.Square({}, ntop=1)".format( - # IR_node.variable_name, - # input_layers)) - - def emit_Elu(self, IR_node): - in_place = True - self.add_body(1, "n.{:<15} = L.ELU(n.{}, in_place={}, ntop=1)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - in_place)) -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: caffe.proto - -from google.protobuf import descriptor_pb2 -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import reflection as _reflection -from google.protobuf import message as _message -from google.protobuf import descriptor as _descriptor -from google.protobuf.internal import enum_type_wrapper -import sys -_b = sys.version_info[0] < 3 and ( - lambda x: x) or (lambda x: x.encode('latin1')) -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='caffe.proto', - package='caffe', - syntax='proto2', - serialized_pb=_b( - '\n\x0b\x63\x61\x66\x66\x65.proto\x12\x05\x63\x61\x66\x66\x65\"\x1c\n\tBlobShape\x12\x0f\n\x03\x64im\x18\x01 \x03(\x03\x42\x02\x10\x01\"\xcc\x01\n\tBlobProto\x12\x1f\n\x05shape\x18\x07 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x10\n\x04\x64\x61ta\x18\x05 \x03(\x02\x42\x02\x10\x01\x12\x10\n\x04\x64iff\x18\x06 \x03(\x02\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_data\x18\x08 \x03(\x01\x42\x02\x10\x01\x12\x17\n\x0b\x64ouble_diff\x18\t \x03(\x01\x42\x02\x10\x01\x12\x0e\n\x03num\x18\x01 \x01(\x05:\x01\x30\x12\x13\n\x08\x63hannels\x18\x02 \x01(\x05:\x01\x30\x12\x11\n\x06height\x18\x03 \x01(\x05:\x01\x30\x12\x10\n\x05width\x18\x04 \x01(\x05:\x01\x30\"2\n\x0f\x42lobProtoVector\x12\x1f\n\x05\x62lobs\x18\x01 \x03(\x0b\x32\x10.caffe.BlobProto\"\x81\x01\n\x05\x44\x61tum\x12\x10\n\x08\x63hannels\x18\x01 \x01(\x05\x12\x0e\n\x06height\x18\x02 \x01(\x05\x12\r\n\x05width\x18\x03 \x01(\x05\x12\x0c\n\x04\x64\x61ta\x18\x04 \x01(\x0c\x12\r\n\x05label\x18\x05 \x01(\x05\x12\x12\n\nfloat_data\x18\x06 \x03(\x02\x12\x16\n\x07\x65ncoded\x18\x07 \x01(\x08:\x05\x66\x61lse\"\x8a\x02\n\x0f\x46illerParameter\x12\x16\n\x04type\x18\x01 \x01(\t:\x08\x63onstant\x12\x10\n\x05value\x18\x02 \x01(\x02:\x01\x30\x12\x0e\n\x03min\x18\x03 \x01(\x02:\x01\x30\x12\x0e\n\x03max\x18\x04 \x01(\x02:\x01\x31\x12\x0f\n\x04mean\x18\x05 \x01(\x02:\x01\x30\x12\x0e\n\x03std\x18\x06 \x01(\x02:\x01\x31\x12\x12\n\x06sparse\x18\x07 \x01(\x05:\x02-1\x12\x42\n\rvariance_norm\x18\x08 \x01(\x0e\x32#.caffe.FillerParameter.VarianceNorm:\x06\x46\x41N_IN\"4\n\x0cVarianceNorm\x12\n\n\x06\x46\x41N_IN\x10\x00\x12\x0b\n\x07\x46\x41N_OUT\x10\x01\x12\x0b\n\x07\x41VERAGE\x10\x02\"\x8e\x02\n\x0cNetParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12%\n\x0binput_shape\x18\x08 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x11\n\tinput_dim\x18\x04 \x03(\x05\x12\x1d\n\x0e\x66orce_backward\x18\x05 \x01(\x08:\x05\x66\x61lse\x12\x1e\n\x05state\x18\x06 \x01(\x0b\x32\x0f.caffe.NetState\x12\x19\n\ndebug_info\x18\x07 \x01(\x08:\x05\x66\x61lse\x12$\n\x05layer\x18\x64 \x03(\x0b\x32\x15.caffe.LayerParameter\x12\'\n\x06layers\x18\x02 \x03(\x0b\x32\x17.caffe.V1LayerParameter\"\xc3\n\n\x0fSolverParameter\x12\x0b\n\x03net\x18\x18 \x01(\t\x12&\n\tnet_param\x18\x19 \x01(\x0b\x32\x13.caffe.NetParameter\x12\x11\n\ttrain_net\x18\x01 \x01(\t\x12\x10\n\x08test_net\x18\x02 \x03(\t\x12,\n\x0ftrain_net_param\x18\x15 \x01(\x0b\x32\x13.caffe.NetParameter\x12+\n\x0etest_net_param\x18\x16 \x03(\x0b\x32\x13.caffe.NetParameter\x12$\n\x0btrain_state\x18\x1a \x01(\x0b\x32\x0f.caffe.NetState\x12#\n\ntest_state\x18\x1b \x03(\x0b\x32\x0f.caffe.NetState\x12\x11\n\ttest_iter\x18\x03 \x03(\x05\x12\x18\n\rtest_interval\x18\x04 \x01(\x05:\x01\x30\x12 \n\x11test_compute_loss\x18\x13 \x01(\x08:\x05\x66\x61lse\x12!\n\x13test_initialization\x18 \x01(\x08:\x04true\x12\x0f\n\x07\x62\x61se_lr\x18\x05 \x01(\x02\x12\x0f\n\x07\x64isplay\x18\x06 \x01(\x05\x12\x17\n\x0c\x61verage_loss\x18! \x01(\x05:\x01\x31\x12\x10\n\x08max_iter\x18\x07 \x01(\x05\x12\x14\n\titer_size\x18$ \x01(\x05:\x01\x31\x12\x11\n\tlr_policy\x18\x08 \x01(\t\x12\r\n\x05gamma\x18\t \x01(\x02\x12\r\n\x05power\x18\n \x01(\x02\x12\x10\n\x08momentum\x18\x0b \x01(\x02\x12\x14\n\x0cweight_decay\x18\x0c \x01(\x02\x12\x1f\n\x13regularization_type\x18\x1d \x01(\t:\x02L2\x12\x10\n\x08stepsize\x18\r \x01(\x05\x12\x11\n\tstepvalue\x18\" \x03(\x05\x12\x1a\n\x0e\x63lip_gradients\x18# \x01(\x02:\x02-1\x12\x13\n\x08snapshot\x18\x0e \x01(\x05:\x01\x30\x12\x17\n\x0fsnapshot_prefix\x18\x0f \x01(\t\x12\x1c\n\rsnapshot_diff\x18\x10 \x01(\x08:\x05\x66\x61lse\x12K\n\x0fsnapshot_format\x18% \x01(\x0e\x32%.caffe.SolverParameter.SnapshotFormat:\x0b\x42INARYPROTO\x12;\n\x0bsolver_mode\x18\x11 \x01(\x0e\x32!.caffe.SolverParameter.SolverMode:\x03GPU\x12\x14\n\tdevice_id\x18\x12 \x01(\x05:\x01\x30\x12\x17\n\x0brandom_seed\x18\x14 \x01(\x03:\x02-1\x12\x11\n\x04type\x18( \x01(\t:\x03SGD\x12\x14\n\x05\x64\x65lta\x18\x1f \x01(\x02:\x05\x31\x65-08\x12\x18\n\tmomentum2\x18\' \x01(\x02:\x05\x30.999\x12\x17\n\trms_decay\x18& \x01(\x02:\x04\x30.99\x12\x19\n\ndebug_info\x18\x17 \x01(\x08:\x05\x66\x61lse\x12\"\n\x14snapshot_after_train\x18\x1c \x01(\x08:\x04true\x12;\n\x0bsolver_type\x18\x1e \x01(\x0e\x32!.caffe.SolverParameter.SolverType:\x03SGD\x12\x1f\n\x11layer_wise_reduce\x18) \x01(\x08:\x04true\"+\n\x0eSnapshotFormat\x12\x08\n\x04HDF5\x10\x00\x12\x0f\n\x0b\x42INARYPROTO\x10\x01\"\x1e\n\nSolverMode\x12\x07\n\x03\x43PU\x10\x00\x12\x07\n\x03GPU\x10\x01\"U\n\nSolverType\x12\x07\n\x03SGD\x10\x00\x12\x0c\n\x08NESTEROV\x10\x01\x12\x0b\n\x07\x41\x44\x41GRAD\x10\x02\x12\x0b\n\x07RMSPROP\x10\x03\x12\x0c\n\x08\x41\x44\x41\x44\x45LTA\x10\x04\x12\x08\n\x04\x41\x44\x41M\x10\x05\"l\n\x0bSolverState\x12\x0c\n\x04iter\x18\x01 \x01(\x05\x12\x13\n\x0blearned_net\x18\x02 \x01(\t\x12!\n\x07history\x18\x03 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x17\n\x0c\x63urrent_step\x18\x04 \x01(\x05:\x01\x30\"N\n\x08NetState\x12!\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase:\x04TEST\x12\x10\n\x05level\x18\x02 \x01(\x05:\x01\x30\x12\r\n\x05stage\x18\x03 \x03(\t\"s\n\x0cNetStateRule\x12\x1b\n\x05phase\x18\x01 \x01(\x0e\x32\x0c.caffe.Phase\x12\x11\n\tmin_level\x18\x02 \x01(\x05\x12\x11\n\tmax_level\x18\x03 \x01(\x05\x12\r\n\x05stage\x18\x04 \x03(\t\x12\x11\n\tnot_stage\x18\x05 \x03(\t\"\xa3\x01\n\tParamSpec\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x31\n\nshare_mode\x18\x02 \x01(\x0e\x32\x1d.caffe.ParamSpec.DimCheckMode\x12\x12\n\x07lr_mult\x18\x03 \x01(\x02:\x01\x31\x12\x15\n\ndecay_mult\x18\x04 \x01(\x02:\x01\x31\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\x82\x14\n\x0eLayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x0e\n\x06\x62ottom\x18\x03 \x03(\t\x12\x0b\n\x03top\x18\x04 \x03(\t\x12\x1b\n\x05phase\x18\n \x01(\x0e\x32\x0c.caffe.Phase\x12\x13\n\x0bloss_weight\x18\x05 \x03(\x02\x12\x1f\n\x05param\x18\x06 \x03(\x0b\x32\x10.caffe.ParamSpec\x12\x1f\n\x05\x62lobs\x18\x07 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x16\n\x0epropagate_down\x18\x0b \x03(\x08\x12$\n\x07include\x18\x08 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18\t \x03(\x0b\x32\x13.caffe.NetStateRule\x12\x37\n\x0ftransform_param\x18\x64 \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18\x65 \x01(\x0b\x32\x14.caffe.LossParameter\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x66 \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18g \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12\x34\n\x10\x62\x61tch_norm_param\x18\x8b\x01 \x01(\x0b\x32\x19.caffe.BatchNormParameter\x12)\n\nbias_param\x18\x8d\x01 \x01(\x0b\x32\x14.caffe.BiasParameter\x12,\n\x0c\x63oncat_param\x18h \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18i \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18j \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12)\n\ncrop_param\x18\x90\x01 \x01(\x0b\x32\x14.caffe.CropParameter\x12(\n\ndata_param\x18k \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18l \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18m \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18n \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12\'\n\telu_param\x18\x8c\x01 \x01(\x0b\x32\x13.caffe.ELUParameter\x12+\n\x0b\x65mbed_param\x18\x89\x01 \x01(\x0b\x32\x15.caffe.EmbedParameter\x12&\n\texp_param\x18o \x01(\x0b\x32\x13.caffe.ExpParameter\x12/\n\rflatten_param\x18\x87\x01 \x01(\x0b\x32\x17.caffe.FlattenParameter\x12\x31\n\x0fhdf5_data_param\x18p \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18q \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18r \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18s \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18t \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18u \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12+\n\x0binput_param\x18\x8f\x01 \x01(\x0b\x32\x15.caffe.InputParameter\x12\'\n\tlog_param\x18\x86\x01 \x01(\x0b\x32\x13.caffe.LogParameter\x12&\n\tlrn_param\x18v \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18w \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18x \x01(\x0b\x32\x13.caffe.MVNParameter\x12\x33\n\x0fparameter_param\x18\x91\x01 \x01(\x0b\x32\x19.caffe.ParameterParameter\x12.\n\rpooling_param\x18y \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18z \x01(\x0b\x32\x15.caffe.PowerParameter\x12+\n\x0bprelu_param\x18\x83\x01 \x01(\x0b\x32\x15.caffe.PReLUParameter\x12-\n\x0cpython_param\x18\x82\x01 \x01(\x0b\x32\x16.caffe.PythonParameter\x12\x33\n\x0frecurrent_param\x18\x92\x01 \x01(\x0b\x32\x19.caffe.RecurrentParameter\x12\x33\n\x0freduction_param\x18\x88\x01 \x01(\x0b\x32\x19.caffe.ReductionParameter\x12(\n\nrelu_param\x18{ \x01(\x0b\x32\x14.caffe.ReLUParameter\x12/\n\rreshape_param\x18\x85\x01 \x01(\x0b\x32\x17.caffe.ReshapeParameter\x12+\n\x0bscale_param\x18\x8e\x01 \x01(\x0b\x32\x15.caffe.ScaleParameter\x12.\n\rsigmoid_param\x18| \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18} \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12\'\n\tspp_param\x18\x84\x01 \x01(\x0b\x32\x13.caffe.SPPParameter\x12*\n\x0bslice_param\x18~ \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18\x7f \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x33\n\x0fthreshold_param\x18\x80\x01 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12)\n\ntile_param\x18\x8a\x01 \x01(\x0b\x32\x14.caffe.TileParameter\x12\x36\n\x11window_data_param\x18\x81\x01 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\"\xb6\x01\n\x17TransformationParameter\x12\x10\n\x05scale\x18\x01 \x01(\x02:\x01\x31\x12\x15\n\x06mirror\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x14\n\tcrop_size\x18\x03 \x01(\r:\x01\x30\x12\x11\n\tmean_file\x18\x04 \x01(\t\x12\x12\n\nmean_value\x18\x05 \x03(\x02\x12\x1a\n\x0b\x66orce_color\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\nforce_gray\x18\x07 \x01(\x08:\x05\x66\x61lse\"\xc2\x01\n\rLossParameter\x12\x14\n\x0cignore_label\x18\x01 \x01(\x05\x12\x44\n\rnormalization\x18\x03 \x01(\x0e\x32&.caffe.LossParameter.NormalizationMode:\x05VALID\x12\x11\n\tnormalize\x18\x02 \x01(\x08\"B\n\x11NormalizationMode\x12\x08\n\x04\x46ULL\x10\x00\x12\t\n\x05VALID\x10\x01\x12\x0e\n\nBATCH_SIZE\x10\x02\x12\x08\n\x04NONE\x10\x03\"L\n\x11\x41\x63\x63uracyParameter\x12\x10\n\x05top_k\x18\x01 \x01(\r:\x01\x31\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x14\n\x0cignore_label\x18\x03 \x01(\x05\"M\n\x0f\x41rgMaxParameter\x12\x1a\n\x0bout_max_val\x18\x01 \x01(\x08:\x05\x66\x61lse\x12\x10\n\x05top_k\x18\x02 \x01(\r:\x01\x31\x12\x0c\n\x04\x61xis\x18\x03 \x01(\x05\"9\n\x0f\x43oncatParameter\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\x12\x15\n\nconcat_dim\x18\x01 \x01(\r:\x01\x31\"j\n\x12\x42\x61tchNormParameter\x12\x18\n\x10use_global_stats\x18\x01 \x01(\x08\x12&\n\x17moving_average_fraction\x18\x02 \x01(\x02:\x05\x30.999\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x31\x65-05\"]\n\rBiasParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\"L\n\x18\x43ontrastiveLossParameter\x12\x11\n\x06margin\x18\x01 \x01(\x02:\x01\x31\x12\x1d\n\x0elegacy_version\x18\x02 \x01(\x08:\x05\x66\x61lse\"\xfc\x03\n\x14\x43onvolutionParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12\x0b\n\x03pad\x18\x03 \x03(\r\x12\x13\n\x0bkernel_size\x18\x04 \x03(\r\x12\x0e\n\x06stride\x18\x06 \x03(\r\x12\x10\n\x08\x64ilation\x18\x12 \x03(\r\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x10\n\x08kernel_h\x18\x0b \x01(\r\x12\x10\n\x08kernel_w\x18\x0c \x01(\r\x12\x10\n\x08stride_h\x18\r \x01(\r\x12\x10\n\x08stride_w\x18\x0e \x01(\r\x12\x10\n\x05group\x18\x05 \x01(\r:\x01\x31\x12-\n\rweight_filler\x18\x07 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x08 \x01(\x0b\x32\x16.caffe.FillerParameter\x12;\n\x06\x65ngine\x18\x0f \x01(\x0e\x32\".caffe.ConvolutionParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x10 \x01(\x05:\x01\x31\x12\x1e\n\x0f\x66orce_nd_im2col\x18\x11 \x01(\x08:\x05\x66\x61lse\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"0\n\rCropParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x32\x12\x0e\n\x06offset\x18\x02 \x03(\r\"\xa4\x02\n\rDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x31\n\x07\x62\x61\x63kend\x18\x08 \x01(\x0e\x32\x17.caffe.DataParameter.DB:\x07LEVELDB\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\"\n\x13\x66orce_encoded_color\x18\t \x01(\x08:\x05\x66\x61lse\x12\x13\n\x08prefetch\x18\n \x01(\r:\x01\x34\"\x1b\n\x02\x44\x42\x12\x0b\n\x07LEVELDB\x10\x00\x12\x08\n\x04LMDB\x10\x01\".\n\x10\x44ropoutParameter\x12\x1a\n\rdropout_ratio\x18\x01 \x01(\x02:\x03\x30.5\"\xa0\x01\n\x12\x44ummyDataParameter\x12+\n\x0b\x64\x61ta_filler\x18\x01 \x03(\x0b\x32\x16.caffe.FillerParameter\x12\x1f\n\x05shape\x18\x06 \x03(\x0b\x32\x10.caffe.BlobShape\x12\x0b\n\x03num\x18\x02 \x03(\r\x12\x10\n\x08\x63hannels\x18\x03 \x03(\r\x12\x0e\n\x06height\x18\x04 \x03(\r\x12\r\n\x05width\x18\x05 \x03(\r\"\xa5\x01\n\x10\x45ltwiseParameter\x12\x39\n\toperation\x18\x01 \x01(\x0e\x32!.caffe.EltwiseParameter.EltwiseOp:\x03SUM\x12\r\n\x05\x63oeff\x18\x02 \x03(\x02\x12\x1e\n\x10stable_prod_grad\x18\x03 \x01(\x08:\x04true\"\'\n\tEltwiseOp\x12\x08\n\x04PROD\x10\x00\x12\x07\n\x03SUM\x10\x01\x12\x07\n\x03MAX\x10\x02\" \n\x0c\x45LUParameter\x12\x10\n\x05\x61lpha\x18\x01 \x01(\x02:\x01\x31\"\xac\x01\n\x0e\x45mbedParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x11\n\tinput_dim\x18\x02 \x01(\r\x12\x17\n\tbias_term\x18\x03 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"D\n\x0c\x45xpParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"9\n\x10\x46lattenParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x14\n\x08\x65nd_axis\x18\x02 \x01(\x05:\x02-1\"O\n\x11HDF5DataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x12\n\nbatch_size\x18\x02 \x01(\r\x12\x16\n\x07shuffle\x18\x03 \x01(\x08:\x05\x66\x61lse\"(\n\x13HDF5OutputParameter\x12\x11\n\tfile_name\x18\x01 \x01(\t\"^\n\x12HingeLossParameter\x12\x30\n\x04norm\x18\x01 \x01(\x0e\x32\x1e.caffe.HingeLossParameter.Norm:\x02L1\"\x16\n\x04Norm\x12\x06\n\x02L1\x10\x01\x12\x06\n\x02L2\x10\x02\"\x97\x02\n\x12ImageDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x15\n\nbatch_size\x18\x04 \x01(\r:\x01\x31\x12\x14\n\trand_skip\x18\x07 \x01(\r:\x01\x30\x12\x16\n\x07shuffle\x18\x08 \x01(\x08:\x05\x66\x61lse\x12\x15\n\nnew_height\x18\t \x01(\r:\x01\x30\x12\x14\n\tnew_width\x18\n \x01(\r:\x01\x30\x12\x16\n\x08is_color\x18\x0b \x01(\x08:\x04true\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\x0c \x01(\t:\x00\"8\n\x15InfogainLossParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"\xcb\x01\n\x15InnerProductParameter\x12\x12\n\nnum_output\x18\x01 \x01(\r\x12\x17\n\tbias_term\x18\x02 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x04 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0f\n\x04\x61xis\x18\x05 \x01(\x05:\x01\x31\x12\x18\n\ttranspose\x18\x06 \x01(\x08:\x05\x66\x61lse\"1\n\x0eInputParameter\x12\x1f\n\x05shape\x18\x01 \x03(\x0b\x32\x10.caffe.BlobShape\"D\n\x0cLogParameter\x12\x10\n\x04\x62\x61se\x18\x01 \x01(\x02:\x02-1\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"\xb8\x02\n\x0cLRNParameter\x12\x15\n\nlocal_size\x18\x01 \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x02 \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x03 \x01(\x02:\x04\x30.75\x12\x44\n\x0bnorm_region\x18\x04 \x01(\x0e\x32\x1e.caffe.LRNParameter.NormRegion:\x0f\x41\x43ROSS_CHANNELS\x12\x0c\n\x01k\x18\x05 \x01(\x02:\x01\x31\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.LRNParameter.Engine:\x07\x44\x45\x46\x41ULT\"5\n\nNormRegion\x12\x13\n\x0f\x41\x43ROSS_CHANNELS\x10\x00\x12\x12\n\x0eWITHIN_CHANNEL\x10\x01\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Z\n\x13MemoryDataParameter\x12\x12\n\nbatch_size\x18\x01 \x01(\r\x12\x10\n\x08\x63hannels\x18\x02 \x01(\r\x12\x0e\n\x06height\x18\x03 \x01(\r\x12\r\n\x05width\x18\x04 \x01(\r\"d\n\x0cMVNParameter\x12 \n\x12normalize_variance\x18\x01 \x01(\x08:\x04true\x12\x1e\n\x0f\x61\x63ross_channels\x18\x02 \x01(\x08:\x05\x66\x61lse\x12\x12\n\x03\x65ps\x18\x03 \x01(\x02:\x05\x31\x65-09\"5\n\x12ParameterParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\"\xa2\x03\n\x10PoolingParameter\x12\x35\n\x04pool\x18\x01 \x01(\x0e\x32\".caffe.PoolingParameter.PoolMethod:\x03MAX\x12\x0e\n\x03pad\x18\x04 \x01(\r:\x01\x30\x12\x10\n\x05pad_h\x18\t \x01(\r:\x01\x30\x12\x10\n\x05pad_w\x18\n \x01(\r:\x01\x30\x12\x13\n\x0bkernel_size\x18\x02 \x01(\r\x12\x10\n\x08kernel_h\x18\x05 \x01(\r\x12\x10\n\x08kernel_w\x18\x06 \x01(\r\x12\x11\n\x06stride\x18\x03 \x01(\r:\x01\x31\x12\x10\n\x08stride_h\x18\x07 \x01(\r\x12\x10\n\x08stride_w\x18\x08 \x01(\r\x12\x37\n\x06\x65ngine\x18\x0b \x01(\x0e\x32\x1e.caffe.PoolingParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x1d\n\x0eglobal_pooling\x18\x0c \x01(\x08:\x05\x66\x61lse\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"F\n\x0ePowerParameter\x12\x10\n\x05power\x18\x01 \x01(\x02:\x01\x31\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x10\n\x05shift\x18\x03 \x01(\x02:\x01\x30\"g\n\x0fPythonParameter\x12\x0e\n\x06module\x18\x01 \x01(\t\x12\r\n\x05layer\x18\x02 \x01(\t\x12\x13\n\tparam_str\x18\x03 \x01(\t:\x00\x12 \n\x11share_in_parallel\x18\x04 \x01(\x08:\x05\x66\x61lse\"\xc0\x01\n\x12RecurrentParameter\x12\x15\n\nnum_output\x18\x01 \x01(\r:\x01\x30\x12-\n\rweight_filler\x18\x02 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x19\n\ndebug_info\x18\x04 \x01(\x08:\x05\x66\x61lse\x12\x1c\n\rexpose_hidden\x18\x05 \x01(\x08:\x05\x66\x61lse\"\xad\x01\n\x12ReductionParameter\x12=\n\toperation\x18\x01 \x01(\x0e\x32%.caffe.ReductionParameter.ReductionOp:\x03SUM\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x10\n\x05\x63oeff\x18\x03 \x01(\x02:\x01\x31\"5\n\x0bReductionOp\x12\x07\n\x03SUM\x10\x01\x12\x08\n\x04\x41SUM\x10\x02\x12\t\n\x05SUMSQ\x10\x03\x12\x08\n\x04MEAN\x10\x04\"\x8d\x01\n\rReLUParameter\x12\x19\n\x0enegative_slope\x18\x01 \x01(\x02:\x01\x30\x12\x34\n\x06\x65ngine\x18\x02 \x01(\x0e\x32\x1b.caffe.ReLUParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"Z\n\x10ReshapeParameter\x12\x1f\n\x05shape\x18\x01 \x01(\x0b\x32\x10.caffe.BlobShape\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x30\x12\x14\n\x08num_axes\x18\x03 \x01(\x05:\x02-1\"\xa5\x01\n\x0eScaleParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\x13\n\x08num_axes\x18\x02 \x01(\x05:\x01\x31\x12&\n\x06\x66iller\x18\x03 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x18\n\tbias_term\x18\x04 \x01(\x08:\x05\x66\x61lse\x12+\n\x0b\x62ias_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\"x\n\x10SigmoidParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SigmoidParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"L\n\x0eSliceParameter\x12\x0f\n\x04\x61xis\x18\x03 \x01(\x05:\x01\x31\x12\x13\n\x0bslice_point\x18\x02 \x03(\r\x12\x14\n\tslice_dim\x18\x01 \x01(\r:\x01\x31\"\x89\x01\n\x10SoftmaxParameter\x12\x37\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1e.caffe.SoftmaxParameter.Engine:\x07\x44\x45\x46\x41ULT\x12\x0f\n\x04\x61xis\x18\x02 \x01(\x05:\x01\x31\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"r\n\rTanHParameter\x12\x34\n\x06\x65ngine\x18\x01 \x01(\x0e\x32\x1b.caffe.TanHParameter.Engine:\x07\x44\x45\x46\x41ULT\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"/\n\rTileParameter\x12\x0f\n\x04\x61xis\x18\x01 \x01(\x05:\x01\x31\x12\r\n\x05tiles\x18\x02 \x01(\x05\"*\n\x12ThresholdParameter\x12\x14\n\tthreshold\x18\x01 \x01(\x02:\x01\x30\"\xc1\x02\n\x13WindowDataParameter\x12\x0e\n\x06source\x18\x01 \x01(\t\x12\x10\n\x05scale\x18\x02 \x01(\x02:\x01\x31\x12\x11\n\tmean_file\x18\x03 \x01(\t\x12\x12\n\nbatch_size\x18\x04 \x01(\r\x12\x14\n\tcrop_size\x18\x05 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x06 \x01(\x08:\x05\x66\x61lse\x12\x19\n\x0c\x66g_threshold\x18\x07 \x01(\x02:\x03\x30.5\x12\x19\n\x0c\x62g_threshold\x18\x08 \x01(\x02:\x03\x30.5\x12\x19\n\x0b\x66g_fraction\x18\t \x01(\x02:\x04\x30.25\x12\x16\n\x0b\x63ontext_pad\x18\n \x01(\r:\x01\x30\x12\x17\n\tcrop_mode\x18\x0b \x01(\t:\x04warp\x12\x1b\n\x0c\x63\x61\x63he_images\x18\x0c \x01(\x08:\x05\x66\x61lse\x12\x15\n\x0broot_folder\x18\r \x01(\t:\x00\"\xeb\x01\n\x0cSPPParameter\x12\x16\n\x0epyramid_height\x18\x01 \x01(\r\x12\x31\n\x04pool\x18\x02 \x01(\x0e\x32\x1e.caffe.SPPParameter.PoolMethod:\x03MAX\x12\x33\n\x06\x65ngine\x18\x06 \x01(\x0e\x32\x1a.caffe.SPPParameter.Engine:\x07\x44\x45\x46\x41ULT\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"+\n\x06\x45ngine\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\t\n\x05\x43\x41\x46\x46\x45\x10\x01\x12\t\n\x05\x43UDNN\x10\x02\"\xe0\x13\n\x10V1LayerParameter\x12\x0e\n\x06\x62ottom\x18\x02 \x03(\t\x12\x0b\n\x03top\x18\x03 \x03(\t\x12\x0c\n\x04name\x18\x04 \x01(\t\x12$\n\x07include\x18 \x03(\x0b\x32\x13.caffe.NetStateRule\x12$\n\x07\x65xclude\x18! \x03(\x0b\x32\x13.caffe.NetStateRule\x12/\n\x04type\x18\x05 \x01(\x0e\x32!.caffe.V1LayerParameter.LayerType\x12\x1f\n\x05\x62lobs\x18\x06 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x0e\n\x05param\x18\xe9\x07 \x03(\t\x12>\n\x0f\x62lob_share_mode\x18\xea\x07 \x03(\x0e\x32$.caffe.V1LayerParameter.DimCheckMode\x12\x10\n\x08\x62lobs_lr\x18\x07 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x08 \x03(\x02\x12\x13\n\x0bloss_weight\x18# \x03(\x02\x12\x30\n\x0e\x61\x63\x63uracy_param\x18\x1b \x01(\x0b\x32\x18.caffe.AccuracyParameter\x12,\n\x0c\x61rgmax_param\x18\x17 \x01(\x0b\x32\x16.caffe.ArgMaxParameter\x12,\n\x0c\x63oncat_param\x18\t \x01(\x0b\x32\x16.caffe.ConcatParameter\x12?\n\x16\x63ontrastive_loss_param\x18( \x01(\x0b\x32\x1f.caffe.ContrastiveLossParameter\x12\x36\n\x11\x63onvolution_param\x18\n \x01(\x0b\x32\x1b.caffe.ConvolutionParameter\x12(\n\ndata_param\x18\x0b \x01(\x0b\x32\x14.caffe.DataParameter\x12.\n\rdropout_param\x18\x0c \x01(\x0b\x32\x17.caffe.DropoutParameter\x12\x33\n\x10\x64ummy_data_param\x18\x1a \x01(\x0b\x32\x19.caffe.DummyDataParameter\x12.\n\reltwise_param\x18\x18 \x01(\x0b\x32\x17.caffe.EltwiseParameter\x12&\n\texp_param\x18) \x01(\x0b\x32\x13.caffe.ExpParameter\x12\x31\n\x0fhdf5_data_param\x18\r \x01(\x0b\x32\x18.caffe.HDF5DataParameter\x12\x35\n\x11hdf5_output_param\x18\x0e \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\x12\x33\n\x10hinge_loss_param\x18\x1d \x01(\x0b\x32\x19.caffe.HingeLossParameter\x12\x33\n\x10image_data_param\x18\x0f \x01(\x0b\x32\x19.caffe.ImageDataParameter\x12\x39\n\x13infogain_loss_param\x18\x10 \x01(\x0b\x32\x1c.caffe.InfogainLossParameter\x12\x39\n\x13inner_product_param\x18\x11 \x01(\x0b\x32\x1c.caffe.InnerProductParameter\x12&\n\tlrn_param\x18\x12 \x01(\x0b\x32\x13.caffe.LRNParameter\x12\x35\n\x11memory_data_param\x18\x16 \x01(\x0b\x32\x1a.caffe.MemoryDataParameter\x12&\n\tmvn_param\x18\" \x01(\x0b\x32\x13.caffe.MVNParameter\x12.\n\rpooling_param\x18\x13 \x01(\x0b\x32\x17.caffe.PoolingParameter\x12*\n\x0bpower_param\x18\x15 \x01(\x0b\x32\x15.caffe.PowerParameter\x12(\n\nrelu_param\x18\x1e \x01(\x0b\x32\x14.caffe.ReLUParameter\x12.\n\rsigmoid_param\x18& \x01(\x0b\x32\x17.caffe.SigmoidParameter\x12.\n\rsoftmax_param\x18\' \x01(\x0b\x32\x17.caffe.SoftmaxParameter\x12*\n\x0bslice_param\x18\x1f \x01(\x0b\x32\x15.caffe.SliceParameter\x12(\n\ntanh_param\x18% \x01(\x0b\x32\x14.caffe.TanHParameter\x12\x32\n\x0fthreshold_param\x18\x19 \x01(\x0b\x32\x19.caffe.ThresholdParameter\x12\x35\n\x11window_data_param\x18\x14 \x01(\x0b\x32\x1a.caffe.WindowDataParameter\x12\x37\n\x0ftransform_param\x18$ \x01(\x0b\x32\x1e.caffe.TransformationParameter\x12(\n\nloss_param\x18* \x01(\x0b\x32\x14.caffe.LossParameter\x12&\n\x05layer\x18\x01 \x01(\x0b\x32\x17.caffe.V0LayerParameter\"\xd8\x04\n\tLayerType\x12\x08\n\x04NONE\x10\x00\x12\n\n\x06\x41\x42SVAL\x10#\x12\x0c\n\x08\x41\x43\x43URACY\x10\x01\x12\n\n\x06\x41RGMAX\x10\x1e\x12\x08\n\x04\x42NLL\x10\x02\x12\n\n\x06\x43ONCAT\x10\x03\x12\x14\n\x10\x43ONTRASTIVE_LOSS\x10%\x12\x0f\n\x0b\x43ONVOLUTION\x10\x04\x12\x08\n\x04\x44\x41TA\x10\x05\x12\x11\n\rDECONVOLUTION\x10\'\x12\x0b\n\x07\x44ROPOUT\x10\x06\x12\x0e\n\nDUMMY_DATA\x10 \x12\x12\n\x0e\x45UCLIDEAN_LOSS\x10\x07\x12\x0b\n\x07\x45LTWISE\x10\x19\x12\x07\n\x03\x45XP\x10&\x12\x0b\n\x07\x46LATTEN\x10\x08\x12\r\n\tHDF5_DATA\x10\t\x12\x0f\n\x0bHDF5_OUTPUT\x10\n\x12\x0e\n\nHINGE_LOSS\x10\x1c\x12\n\n\x06IM2COL\x10\x0b\x12\x0e\n\nIMAGE_DATA\x10\x0c\x12\x11\n\rINFOGAIN_LOSS\x10\r\x12\x11\n\rINNER_PRODUCT\x10\x0e\x12\x07\n\x03LRN\x10\x0f\x12\x0f\n\x0bMEMORY_DATA\x10\x1d\x12\x1d\n\x19MULTINOMIAL_LOGISTIC_LOSS\x10\x10\x12\x07\n\x03MVN\x10\"\x12\x0b\n\x07POOLING\x10\x11\x12\t\n\x05POWER\x10\x1a\x12\x08\n\x04RELU\x10\x12\x12\x0b\n\x07SIGMOID\x10\x13\x12\x1e\n\x1aSIGMOID_CROSS_ENTROPY_LOSS\x10\x1b\x12\x0b\n\x07SILENCE\x10$\x12\x0b\n\x07SOFTMAX\x10\x14\x12\x10\n\x0cSOFTMAX_LOSS\x10\x15\x12\t\n\x05SPLIT\x10\x16\x12\t\n\x05SLICE\x10!\x12\x08\n\x04TANH\x10\x17\x12\x0f\n\x0bWINDOW_DATA\x10\x18\x12\r\n\tTHRESHOLD\x10\x1f\"*\n\x0c\x44imCheckMode\x12\n\n\x06STRICT\x10\x00\x12\x0e\n\nPERMISSIVE\x10\x01\"\xfd\x07\n\x10V0LayerParameter\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x12\n\nnum_output\x18\x03 \x01(\r\x12\x16\n\x08\x62iasterm\x18\x04 \x01(\x08:\x04true\x12-\n\rweight_filler\x18\x05 \x01(\x0b\x32\x16.caffe.FillerParameter\x12+\n\x0b\x62ias_filler\x18\x06 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x0e\n\x03pad\x18\x07 \x01(\r:\x01\x30\x12\x12\n\nkernelsize\x18\x08 \x01(\r\x12\x10\n\x05group\x18\t \x01(\r:\x01\x31\x12\x11\n\x06stride\x18\n \x01(\r:\x01\x31\x12\x35\n\x04pool\x18\x0b \x01(\x0e\x32\".caffe.V0LayerParameter.PoolMethod:\x03MAX\x12\x1a\n\rdropout_ratio\x18\x0c \x01(\x02:\x03\x30.5\x12\x15\n\nlocal_size\x18\r \x01(\r:\x01\x35\x12\x10\n\x05\x61lpha\x18\x0e \x01(\x02:\x01\x31\x12\x12\n\x04\x62\x65ta\x18\x0f \x01(\x02:\x04\x30.75\x12\x0c\n\x01k\x18\x16 \x01(\x02:\x01\x31\x12\x0e\n\x06source\x18\x10 \x01(\t\x12\x10\n\x05scale\x18\x11 \x01(\x02:\x01\x31\x12\x10\n\x08meanfile\x18\x12 \x01(\t\x12\x11\n\tbatchsize\x18\x13 \x01(\r\x12\x13\n\x08\x63ropsize\x18\x14 \x01(\r:\x01\x30\x12\x15\n\x06mirror\x18\x15 \x01(\x08:\x05\x66\x61lse\x12\x1f\n\x05\x62lobs\x18\x32 \x03(\x0b\x32\x10.caffe.BlobProto\x12\x10\n\x08\x62lobs_lr\x18\x33 \x03(\x02\x12\x14\n\x0cweight_decay\x18\x34 \x03(\x02\x12\x14\n\trand_skip\x18\x35 \x01(\r:\x01\x30\x12\x1d\n\x10\x64\x65t_fg_threshold\x18\x36 \x01(\x02:\x03\x30.5\x12\x1d\n\x10\x64\x65t_bg_threshold\x18\x37 \x01(\x02:\x03\x30.5\x12\x1d\n\x0f\x64\x65t_fg_fraction\x18\x38 \x01(\x02:\x04\x30.25\x12\x1a\n\x0f\x64\x65t_context_pad\x18: \x01(\r:\x01\x30\x12\x1b\n\rdet_crop_mode\x18; \x01(\t:\x04warp\x12\x12\n\x07new_num\x18< \x01(\x05:\x01\x30\x12\x17\n\x0cnew_channels\x18= \x01(\x05:\x01\x30\x12\x15\n\nnew_height\x18> \x01(\x05:\x01\x30\x12\x14\n\tnew_width\x18? \x01(\x05:\x01\x30\x12\x1d\n\x0eshuffle_images\x18@ \x01(\x08:\x05\x66\x61lse\x12\x15\n\nconcat_dim\x18\x41 \x01(\r:\x01\x31\x12\x36\n\x11hdf5_output_param\x18\xe9\x07 \x01(\x0b\x32\x1a.caffe.HDF5OutputParameter\".\n\nPoolMethod\x12\x07\n\x03MAX\x10\x00\x12\x07\n\x03\x41VE\x10\x01\x12\x0e\n\nSTOCHASTIC\x10\x02\"W\n\x0ePReLUParameter\x12&\n\x06\x66iller\x18\x01 \x01(\x0b\x32\x16.caffe.FillerParameter\x12\x1d\n\x0e\x63hannel_shared\x18\x02 \x01(\x08:\x05\x66\x61lse*\x1c\n\x05Phase\x12\t\n\x05TRAIN\x10\x00\x12\x08\n\x04TEST\x10\x01') -) -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -_PHASE = _descriptor.EnumDescriptor( - name='Phase', - full_name='caffe.Phase', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='TRAIN', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='TEST', index=1, number=1, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=15403, - serialized_end=15431, -) -_sym_db.RegisterEnumDescriptor(_PHASE) - -Phase = enum_type_wrapper.EnumTypeWrapper(_PHASE) -TRAIN = 0 -TEST = 1 - - -_FILLERPARAMETER_VARIANCENORM = _descriptor.EnumDescriptor( - name='VarianceNorm', - full_name='caffe.FillerParameter.VarianceNorm', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='FAN_IN', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='FAN_OUT', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='AVERAGE', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=658, - serialized_end=710, -) -_sym_db.RegisterEnumDescriptor(_FILLERPARAMETER_VARIANCENORM) - -_SOLVERPARAMETER_SNAPSHOTFORMAT = _descriptor.EnumDescriptor( - name='SnapshotFormat', - full_name='caffe.SolverParameter.SnapshotFormat', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='HDF5', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='BINARYPROTO', index=1, number=1, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=2171, - serialized_end=2214, -) -_sym_db.RegisterEnumDescriptor(_SOLVERPARAMETER_SNAPSHOTFORMAT) - -_SOLVERPARAMETER_SOLVERMODE = _descriptor.EnumDescriptor( - name='SolverMode', - full_name='caffe.SolverParameter.SolverMode', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='CPU', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='GPU', index=1, number=1, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=2216, - serialized_end=2246, -) -_sym_db.RegisterEnumDescriptor(_SOLVERPARAMETER_SOLVERMODE) - -_SOLVERPARAMETER_SOLVERTYPE = _descriptor.EnumDescriptor( - name='SolverType', - full_name='caffe.SolverParameter.SolverType', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='SGD', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='NESTEROV', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='ADAGRAD', index=2, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='RMSPROP', index=3, number=3, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='ADADELTA', index=4, number=4, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='ADAM', index=5, number=5, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=2248, - serialized_end=2333, -) -_sym_db.RegisterEnumDescriptor(_SOLVERPARAMETER_SOLVERTYPE) - -_PARAMSPEC_DIMCHECKMODE = _descriptor.EnumDescriptor( - name='DimCheckMode', - full_name='caffe.ParamSpec.DimCheckMode', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='STRICT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PERMISSIVE', index=1, number=1, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=2764, - serialized_end=2806, -) -_sym_db.RegisterEnumDescriptor(_PARAMSPEC_DIMCHECKMODE) - -_LOSSPARAMETER_NORMALIZATIONMODE = _descriptor.EnumDescriptor( - name='NormalizationMode', - full_name='caffe.LossParameter.NormalizationMode', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='FULL', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='VALID', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='BATCH_SIZE', index=2, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='NONE', index=3, number=3, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=5687, - serialized_end=5753, -) -_sym_db.RegisterEnumDescriptor(_LOSSPARAMETER_NORMALIZATIONMODE) - -_CONVOLUTIONPARAMETER_ENGINE = _descriptor.EnumDescriptor( - name='Engine', - full_name='caffe.ConvolutionParameter.Engine', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='DEFAULT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CAFFE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CUDNN', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=6718, - serialized_end=6761, -) -_sym_db.RegisterEnumDescriptor(_CONVOLUTIONPARAMETER_ENGINE) - -_DATAPARAMETER_DB = _descriptor.EnumDescriptor( - name='DB', - full_name='caffe.DataParameter.DB', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='LEVELDB', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='LMDB', index=1, number=1, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=7079, - serialized_end=7106, -) -_sym_db.RegisterEnumDescriptor(_DATAPARAMETER_DB) - -_ELTWISEPARAMETER_ELTWISEOP = _descriptor.EnumDescriptor( - name='EltwiseOp', - full_name='caffe.EltwiseParameter.EltwiseOp', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='PROD', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SUM', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='MAX', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=7446, - serialized_end=7485, -) -_sym_db.RegisterEnumDescriptor(_ELTWISEPARAMETER_ELTWISEOP) - -_HINGELOSSPARAMETER_NORM = _descriptor.EnumDescriptor( - name='Norm', - full_name='caffe.HingeLossParameter.Norm', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='L1', index=0, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='L2', index=1, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=8020, - serialized_end=8042, -) -_sym_db.RegisterEnumDescriptor(_HINGELOSSPARAMETER_NORM) - -_LRNPARAMETER_NORMREGION = _descriptor.EnumDescriptor( - name='NormRegion', - full_name='caffe.LRNParameter.NormRegion', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='ACROSS_CHANNELS', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='WITHIN_CHANNEL', index=1, number=1, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=8926, - serialized_end=8979, -) -_sym_db.RegisterEnumDescriptor(_LRNPARAMETER_NORMREGION) - -_LRNPARAMETER_ENGINE = _descriptor.EnumDescriptor( - name='Engine', - full_name='caffe.LRNParameter.Engine', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='DEFAULT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CAFFE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CUDNN', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=6718, - serialized_end=6761, -) -_sym_db.RegisterEnumDescriptor(_LRNPARAMETER_ENGINE) - -_POOLINGPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor( - name='PoolMethod', - full_name='caffe.PoolingParameter.PoolMethod', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='MAX', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='AVE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='STOCHASTIC', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=9603, - serialized_end=9649, -) -_sym_db.RegisterEnumDescriptor(_POOLINGPARAMETER_POOLMETHOD) - -_POOLINGPARAMETER_ENGINE = _descriptor.EnumDescriptor( - name='Engine', - full_name='caffe.PoolingParameter.Engine', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='DEFAULT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CAFFE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CUDNN', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=6718, - serialized_end=6761, -) -_sym_db.RegisterEnumDescriptor(_POOLINGPARAMETER_ENGINE) - -_REDUCTIONPARAMETER_REDUCTIONOP = _descriptor.EnumDescriptor( - name='ReductionOp', - full_name='caffe.ReductionParameter.ReductionOp', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='SUM', index=0, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='ASUM', index=1, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SUMSQ', index=2, number=3, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='MEAN', index=3, number=4, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=10189, - serialized_end=10242, -) -_sym_db.RegisterEnumDescriptor(_REDUCTIONPARAMETER_REDUCTIONOP) - -_RELUPARAMETER_ENGINE = _descriptor.EnumDescriptor( - name='Engine', - full_name='caffe.ReLUParameter.Engine', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='DEFAULT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CAFFE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CUDNN', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=6718, - serialized_end=6761, -) -_sym_db.RegisterEnumDescriptor(_RELUPARAMETER_ENGINE) - -_SIGMOIDPARAMETER_ENGINE = _descriptor.EnumDescriptor( - name='Engine', - full_name='caffe.SigmoidParameter.Engine', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='DEFAULT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CAFFE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CUDNN', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=6718, - serialized_end=6761, -) -_sym_db.RegisterEnumDescriptor(_SIGMOIDPARAMETER_ENGINE) - -_SOFTMAXPARAMETER_ENGINE = _descriptor.EnumDescriptor( - name='Engine', - full_name='caffe.SoftmaxParameter.Engine', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='DEFAULT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CAFFE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CUDNN', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=6718, - serialized_end=6761, -) -_sym_db.RegisterEnumDescriptor(_SOFTMAXPARAMETER_ENGINE) - -_TANHPARAMETER_ENGINE = _descriptor.EnumDescriptor( - name='Engine', - full_name='caffe.TanHParameter.Engine', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='DEFAULT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CAFFE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CUDNN', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=6718, - serialized_end=6761, -) -_sym_db.RegisterEnumDescriptor(_TANHPARAMETER_ENGINE) - -_SPPPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor( - name='PoolMethod', - full_name='caffe.SPPParameter.PoolMethod', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='MAX', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='AVE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='STOCHASTIC', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=9603, - serialized_end=9649, -) -_sym_db.RegisterEnumDescriptor(_SPPPARAMETER_POOLMETHOD) - -_SPPPARAMETER_ENGINE = _descriptor.EnumDescriptor( - name='Engine', - full_name='caffe.SPPParameter.Engine', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='DEFAULT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CAFFE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CUDNN', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=6718, - serialized_end=6761, -) -_sym_db.RegisterEnumDescriptor(_SPPPARAMETER_ENGINE) - -_V1LAYERPARAMETER_LAYERTYPE = _descriptor.EnumDescriptor( - name='LayerType', - full_name='caffe.V1LayerParameter.LayerType', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='NONE', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='ABSVAL', index=1, number=35, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='ACCURACY', index=2, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='ARGMAX', index=3, number=30, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='BNLL', index=4, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CONCAT', index=5, number=3, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CONTRASTIVE_LOSS', index=6, number=37, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='CONVOLUTION', index=7, number=4, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DATA', index=8, number=5, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DECONVOLUTION', index=9, number=39, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DROPOUT', index=10, number=6, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DUMMY_DATA', index=11, number=32, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='EUCLIDEAN_LOSS', index=12, number=7, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='ELTWISE', index=13, number=25, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='EXP', index=14, number=38, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='FLATTEN', index=15, number=8, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='HDF5_DATA', index=16, number=9, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='HDF5_OUTPUT', index=17, number=10, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='HINGE_LOSS', index=18, number=28, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='IM2COL', index=19, number=11, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='IMAGE_DATA', index=20, number=12, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='INFOGAIN_LOSS', index=21, number=13, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='INNER_PRODUCT', index=22, number=14, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='LRN', index=23, number=15, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='MEMORY_DATA', index=24, number=29, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='MULTINOMIAL_LOGISTIC_LOSS', index=25, number=16, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='MVN', index=26, number=34, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='POOLING', index=27, number=17, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='POWER', index=28, number=26, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='RELU', index=29, number=18, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SIGMOID', index=30, number=19, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SIGMOID_CROSS_ENTROPY_LOSS', index=31, number=27, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SILENCE', index=32, number=36, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SOFTMAX', index=33, number=20, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SOFTMAX_LOSS', index=34, number=21, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SPLIT', index=35, number=22, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='SLICE', index=36, number=33, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='TANH', index=37, number=23, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='WINDOW_DATA', index=38, number=24, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='THRESHOLD', index=39, number=31, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=13644, - serialized_end=14244, -) -_sym_db.RegisterEnumDescriptor(_V1LAYERPARAMETER_LAYERTYPE) - -_V1LAYERPARAMETER_DIMCHECKMODE = _descriptor.EnumDescriptor( - name='DimCheckMode', - full_name='caffe.V1LayerParameter.DimCheckMode', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='STRICT', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='PERMISSIVE', index=1, number=1, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=2764, - serialized_end=2806, -) -_sym_db.RegisterEnumDescriptor(_V1LAYERPARAMETER_DIMCHECKMODE) - -_V0LAYERPARAMETER_POOLMETHOD = _descriptor.EnumDescriptor( - name='PoolMethod', - full_name='caffe.V0LayerParameter.PoolMethod', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='MAX', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='AVE', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='STOCHASTIC', index=2, number=2, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=9603, - serialized_end=9649, -) -_sym_db.RegisterEnumDescriptor(_V0LAYERPARAMETER_POOLMETHOD) - - -_BLOBSHAPE = _descriptor.Descriptor( - name='BlobShape', - full_name='caffe.BlobShape', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='dim', full_name='caffe.BlobShape.dim', index=0, - number=1, type=3, cpp_type=2, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=22, - serialized_end=50, -) - - -_BLOBPROTO = _descriptor.Descriptor( - name='BlobProto', - full_name='caffe.BlobProto', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='shape', full_name='caffe.BlobProto.shape', index=0, - number=7, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='data', full_name='caffe.BlobProto.data', index=1, - number=5, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='diff', full_name='caffe.BlobProto.diff', index=2, - number=6, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='double_data', full_name='caffe.BlobProto.double_data', index=3, - number=8, type=1, cpp_type=5, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='double_diff', full_name='caffe.BlobProto.double_diff', index=4, - number=9, type=1, cpp_type=5, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='num', full_name='caffe.BlobProto.num', index=5, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='channels', full_name='caffe.BlobProto.channels', index=6, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='height', full_name='caffe.BlobProto.height', index=7, - number=3, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='width', full_name='caffe.BlobProto.width', index=8, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=53, - serialized_end=257, -) - - -_BLOBPROTOVECTOR = _descriptor.Descriptor( - name='BlobProtoVector', - full_name='caffe.BlobProtoVector', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='blobs', full_name='caffe.BlobProtoVector.blobs', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=259, - serialized_end=309, -) - - -_DATUM = _descriptor.Descriptor( - name='Datum', - full_name='caffe.Datum', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='channels', full_name='caffe.Datum.channels', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='height', full_name='caffe.Datum.height', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='width', full_name='caffe.Datum.width', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='data', full_name='caffe.Datum.data', index=3, - number=4, type=12, cpp_type=9, label=1, - has_default_value=False, default_value=_b(""), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='label', full_name='caffe.Datum.label', index=4, - number=5, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='float_data', full_name='caffe.Datum.float_data', index=5, - number=6, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='encoded', full_name='caffe.Datum.encoded', index=6, - number=7, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=312, - serialized_end=441, -) - - -_FILLERPARAMETER = _descriptor.Descriptor( - name='FillerParameter', - full_name='caffe.FillerParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='type', full_name='caffe.FillerParameter.type', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("constant").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='value', full_name='caffe.FillerParameter.value', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='min', full_name='caffe.FillerParameter.min', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='max', full_name='caffe.FillerParameter.max', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mean', full_name='caffe.FillerParameter.mean', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='std', full_name='caffe.FillerParameter.std', index=5, - number=6, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='sparse', full_name='caffe.FillerParameter.sparse', index=6, - number=7, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=-1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='variance_norm', full_name='caffe.FillerParameter.variance_norm', index=7, - number=8, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _FILLERPARAMETER_VARIANCENORM, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=444, - serialized_end=710, -) - - -_NETPARAMETER = _descriptor.Descriptor( - name='NetParameter', - full_name='caffe.NetParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='caffe.NetParameter.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='input', full_name='caffe.NetParameter.input', index=1, - number=3, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='input_shape', full_name='caffe.NetParameter.input_shape', index=2, - number=8, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='input_dim', full_name='caffe.NetParameter.input_dim', index=3, - number=4, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='force_backward', full_name='caffe.NetParameter.force_backward', index=4, - number=5, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='state', full_name='caffe.NetParameter.state', index=5, - number=6, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='debug_info', full_name='caffe.NetParameter.debug_info', index=6, - number=7, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='layer', full_name='caffe.NetParameter.layer', index=7, - number=100, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='layers', full_name='caffe.NetParameter.layers', index=8, - number=2, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=713, - serialized_end=983, -) - - -_SOLVERPARAMETER = _descriptor.Descriptor( - name='SolverParameter', - full_name='caffe.SolverParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='net', full_name='caffe.SolverParameter.net', index=0, - number=24, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='net_param', full_name='caffe.SolverParameter.net_param', index=1, - number=25, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='train_net', full_name='caffe.SolverParameter.train_net', index=2, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='test_net', full_name='caffe.SolverParameter.test_net', index=3, - number=2, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='train_net_param', full_name='caffe.SolverParameter.train_net_param', index=4, - number=21, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='test_net_param', full_name='caffe.SolverParameter.test_net_param', index=5, - number=22, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='train_state', full_name='caffe.SolverParameter.train_state', index=6, - number=26, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='test_state', full_name='caffe.SolverParameter.test_state', index=7, - number=27, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='test_iter', full_name='caffe.SolverParameter.test_iter', index=8, - number=3, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='test_interval', full_name='caffe.SolverParameter.test_interval', index=9, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='test_compute_loss', full_name='caffe.SolverParameter.test_compute_loss', index=10, - number=19, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='test_initialization', full_name='caffe.SolverParameter.test_initialization', index=11, - number=32, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='base_lr', full_name='caffe.SolverParameter.base_lr', index=12, - number=5, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='display', full_name='caffe.SolverParameter.display', index=13, - number=6, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='average_loss', full_name='caffe.SolverParameter.average_loss', index=14, - number=33, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='max_iter', full_name='caffe.SolverParameter.max_iter', index=15, - number=7, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='iter_size', full_name='caffe.SolverParameter.iter_size', index=16, - number=36, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='lr_policy', full_name='caffe.SolverParameter.lr_policy', index=17, - number=8, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='gamma', full_name='caffe.SolverParameter.gamma', index=18, - number=9, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='power', full_name='caffe.SolverParameter.power', index=19, - number=10, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='momentum', full_name='caffe.SolverParameter.momentum', index=20, - number=11, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weight_decay', full_name='caffe.SolverParameter.weight_decay', index=21, - number=12, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='regularization_type', full_name='caffe.SolverParameter.regularization_type', index=22, - number=29, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("L2").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stepsize', full_name='caffe.SolverParameter.stepsize', index=23, - number=13, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stepvalue', full_name='caffe.SolverParameter.stepvalue', index=24, - number=34, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='clip_gradients', full_name='caffe.SolverParameter.clip_gradients', index=25, - number=35, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(-1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='snapshot', full_name='caffe.SolverParameter.snapshot', index=26, - number=14, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='snapshot_prefix', full_name='caffe.SolverParameter.snapshot_prefix', index=27, - number=15, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='snapshot_diff', full_name='caffe.SolverParameter.snapshot_diff', index=28, - number=16, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='snapshot_format', full_name='caffe.SolverParameter.snapshot_format', index=29, - number=37, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='solver_mode', full_name='caffe.SolverParameter.solver_mode', index=30, - number=17, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='device_id', full_name='caffe.SolverParameter.device_id', index=31, - number=18, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='random_seed', full_name='caffe.SolverParameter.random_seed', index=32, - number=20, type=3, cpp_type=2, label=1, - has_default_value=True, default_value=-1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', full_name='caffe.SolverParameter.type', index=33, - number=40, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("SGD").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='delta', full_name='caffe.SolverParameter.delta', index=34, - number=31, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1e-08), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='momentum2', full_name='caffe.SolverParameter.momentum2', index=35, - number=39, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.999), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='rms_decay', full_name='caffe.SolverParameter.rms_decay', index=36, - number=38, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.99), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='debug_info', full_name='caffe.SolverParameter.debug_info', index=37, - number=23, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='snapshot_after_train', full_name='caffe.SolverParameter.snapshot_after_train', index=38, - number=28, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='solver_type', full_name='caffe.SolverParameter.solver_type', index=39, - number=30, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='layer_wise_reduce', full_name='caffe.SolverParameter.layer_wise_reduce', index=40, - number=41, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _SOLVERPARAMETER_SNAPSHOTFORMAT, - _SOLVERPARAMETER_SOLVERMODE, - _SOLVERPARAMETER_SOLVERTYPE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=986, - serialized_end=2333, -) - - -_SOLVERSTATE = _descriptor.Descriptor( - name='SolverState', - full_name='caffe.SolverState', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='iter', full_name='caffe.SolverState.iter', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='learned_net', full_name='caffe.SolverState.learned_net', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='history', full_name='caffe.SolverState.history', index=2, - number=3, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='current_step', full_name='caffe.SolverState.current_step', index=3, - number=4, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2335, - serialized_end=2443, -) - - -_NETSTATE = _descriptor.Descriptor( - name='NetState', - full_name='caffe.NetState', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='phase', full_name='caffe.NetState.phase', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='level', full_name='caffe.NetState.level', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stage', full_name='caffe.NetState.stage', index=2, - number=3, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2445, - serialized_end=2523, -) - - -_NETSTATERULE = _descriptor.Descriptor( - name='NetStateRule', - full_name='caffe.NetStateRule', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='phase', full_name='caffe.NetStateRule.phase', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='min_level', full_name='caffe.NetStateRule.min_level', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='max_level', full_name='caffe.NetStateRule.max_level', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stage', full_name='caffe.NetStateRule.stage', index=3, - number=4, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='not_stage', full_name='caffe.NetStateRule.not_stage', index=4, - number=5, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2525, - serialized_end=2640, -) - - -_PARAMSPEC = _descriptor.Descriptor( - name='ParamSpec', - full_name='caffe.ParamSpec', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='caffe.ParamSpec.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='share_mode', full_name='caffe.ParamSpec.share_mode', index=1, - number=2, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='lr_mult', full_name='caffe.ParamSpec.lr_mult', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='decay_mult', full_name='caffe.ParamSpec.decay_mult', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _PARAMSPEC_DIMCHECKMODE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2643, - serialized_end=2806, -) - - -_LAYERPARAMETER = _descriptor.Descriptor( - name='LayerParameter', - full_name='caffe.LayerParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='caffe.LayerParameter.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', full_name='caffe.LayerParameter.type', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bottom', full_name='caffe.LayerParameter.bottom', index=2, - number=3, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='top', full_name='caffe.LayerParameter.top', index=3, - number=4, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='phase', full_name='caffe.LayerParameter.phase', index=4, - number=10, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='loss_weight', full_name='caffe.LayerParameter.loss_weight', index=5, - number=5, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='param', full_name='caffe.LayerParameter.param', index=6, - number=6, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='blobs', full_name='caffe.LayerParameter.blobs', index=7, - number=7, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='propagate_down', full_name='caffe.LayerParameter.propagate_down', index=8, - number=11, type=8, cpp_type=7, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='include', full_name='caffe.LayerParameter.include', index=9, - number=8, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='exclude', full_name='caffe.LayerParameter.exclude', index=10, - number=9, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='transform_param', full_name='caffe.LayerParameter.transform_param', index=11, - number=100, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='loss_param', full_name='caffe.LayerParameter.loss_param', index=12, - number=101, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='accuracy_param', full_name='caffe.LayerParameter.accuracy_param', index=13, - number=102, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='argmax_param', full_name='caffe.LayerParameter.argmax_param', index=14, - number=103, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='batch_norm_param', full_name='caffe.LayerParameter.batch_norm_param', index=15, - number=139, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_param', full_name='caffe.LayerParameter.bias_param', index=16, - number=141, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='concat_param', full_name='caffe.LayerParameter.concat_param', index=17, - number=104, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='contrastive_loss_param', full_name='caffe.LayerParameter.contrastive_loss_param', index=18, - number=105, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='convolution_param', full_name='caffe.LayerParameter.convolution_param', index=19, - number=106, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='crop_param', full_name='caffe.LayerParameter.crop_param', index=20, - number=144, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='data_param', full_name='caffe.LayerParameter.data_param', index=21, - number=107, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dropout_param', full_name='caffe.LayerParameter.dropout_param', index=22, - number=108, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dummy_data_param', full_name='caffe.LayerParameter.dummy_data_param', index=23, - number=109, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='eltwise_param', full_name='caffe.LayerParameter.eltwise_param', index=24, - number=110, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='elu_param', full_name='caffe.LayerParameter.elu_param', index=25, - number=140, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='embed_param', full_name='caffe.LayerParameter.embed_param', index=26, - number=137, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='exp_param', full_name='caffe.LayerParameter.exp_param', index=27, - number=111, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='flatten_param', full_name='caffe.LayerParameter.flatten_param', index=28, - number=135, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='hdf5_data_param', full_name='caffe.LayerParameter.hdf5_data_param', index=29, - number=112, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='hdf5_output_param', full_name='caffe.LayerParameter.hdf5_output_param', index=30, - number=113, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='hinge_loss_param', full_name='caffe.LayerParameter.hinge_loss_param', index=31, - number=114, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='image_data_param', full_name='caffe.LayerParameter.image_data_param', index=32, - number=115, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='infogain_loss_param', full_name='caffe.LayerParameter.infogain_loss_param', index=33, - number=116, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='inner_product_param', full_name='caffe.LayerParameter.inner_product_param', index=34, - number=117, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='input_param', full_name='caffe.LayerParameter.input_param', index=35, - number=143, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='log_param', full_name='caffe.LayerParameter.log_param', index=36, - number=134, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='lrn_param', full_name='caffe.LayerParameter.lrn_param', index=37, - number=118, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='memory_data_param', full_name='caffe.LayerParameter.memory_data_param', index=38, - number=119, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mvn_param', full_name='caffe.LayerParameter.mvn_param', index=39, - number=120, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='parameter_param', full_name='caffe.LayerParameter.parameter_param', index=40, - number=145, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pooling_param', full_name='caffe.LayerParameter.pooling_param', index=41, - number=121, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='power_param', full_name='caffe.LayerParameter.power_param', index=42, - number=122, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='prelu_param', full_name='caffe.LayerParameter.prelu_param', index=43, - number=131, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='python_param', full_name='caffe.LayerParameter.python_param', index=44, - number=130, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='recurrent_param', full_name='caffe.LayerParameter.recurrent_param', index=45, - number=146, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='reduction_param', full_name='caffe.LayerParameter.reduction_param', index=46, - number=136, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='relu_param', full_name='caffe.LayerParameter.relu_param', index=47, - number=123, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='reshape_param', full_name='caffe.LayerParameter.reshape_param', index=48, - number=133, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='scale_param', full_name='caffe.LayerParameter.scale_param', index=49, - number=142, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='sigmoid_param', full_name='caffe.LayerParameter.sigmoid_param', index=50, - number=124, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='softmax_param', full_name='caffe.LayerParameter.softmax_param', index=51, - number=125, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='spp_param', full_name='caffe.LayerParameter.spp_param', index=52, - number=132, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='slice_param', full_name='caffe.LayerParameter.slice_param', index=53, - number=126, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='tanh_param', full_name='caffe.LayerParameter.tanh_param', index=54, - number=127, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='threshold_param', full_name='caffe.LayerParameter.threshold_param', index=55, - number=128, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='tile_param', full_name='caffe.LayerParameter.tile_param', index=56, - number=138, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='window_data_param', full_name='caffe.LayerParameter.window_data_param', index=57, - number=129, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=2809, - serialized_end=5371, -) - - -_TRANSFORMATIONPARAMETER = _descriptor.Descriptor( - name='TransformationParameter', - full_name='caffe.TransformationParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='scale', full_name='caffe.TransformationParameter.scale', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mirror', full_name='caffe.TransformationParameter.mirror', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='crop_size', full_name='caffe.TransformationParameter.crop_size', index=2, - number=3, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mean_file', full_name='caffe.TransformationParameter.mean_file', index=3, - number=4, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mean_value', full_name='caffe.TransformationParameter.mean_value', index=4, - number=5, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='force_color', full_name='caffe.TransformationParameter.force_color', index=5, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='force_gray', full_name='caffe.TransformationParameter.force_gray', index=6, - number=7, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=5374, - serialized_end=5556, -) - - -_LOSSPARAMETER = _descriptor.Descriptor( - name='LossParameter', - full_name='caffe.LossParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='ignore_label', full_name='caffe.LossParameter.ignore_label', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='normalization', full_name='caffe.LossParameter.normalization', index=1, - number=3, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='normalize', full_name='caffe.LossParameter.normalize', index=2, - number=2, type=8, cpp_type=7, label=1, - has_default_value=False, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _LOSSPARAMETER_NORMALIZATIONMODE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=5559, - serialized_end=5753, -) - - -_ACCURACYPARAMETER = _descriptor.Descriptor( - name='AccuracyParameter', - full_name='caffe.AccuracyParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='top_k', full_name='caffe.AccuracyParameter.top_k', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.AccuracyParameter.axis', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='ignore_label', full_name='caffe.AccuracyParameter.ignore_label', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=5755, - serialized_end=5831, -) - - -_ARGMAXPARAMETER = _descriptor.Descriptor( - name='ArgMaxParameter', - full_name='caffe.ArgMaxParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='out_max_val', full_name='caffe.ArgMaxParameter.out_max_val', index=0, - number=1, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='top_k', full_name='caffe.ArgMaxParameter.top_k', index=1, - number=2, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.ArgMaxParameter.axis', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=5833, - serialized_end=5910, -) - - -_CONCATPARAMETER = _descriptor.Descriptor( - name='ConcatParameter', - full_name='caffe.ConcatParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.ConcatParameter.axis', index=0, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='concat_dim', full_name='caffe.ConcatParameter.concat_dim', index=1, - number=1, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=5912, - serialized_end=5969, -) - - -_BATCHNORMPARAMETER = _descriptor.Descriptor( - name='BatchNormParameter', - full_name='caffe.BatchNormParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='use_global_stats', full_name='caffe.BatchNormParameter.use_global_stats', index=0, - number=1, type=8, cpp_type=7, label=1, - has_default_value=False, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='moving_average_fraction', full_name='caffe.BatchNormParameter.moving_average_fraction', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.999), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='eps', full_name='caffe.BatchNormParameter.eps', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1e-05), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=5971, - serialized_end=6077, -) - - -_BIASPARAMETER = _descriptor.Descriptor( - name='BiasParameter', - full_name='caffe.BiasParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.BiasParameter.axis', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_axes', full_name='caffe.BiasParameter.num_axes', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='filler', full_name='caffe.BiasParameter.filler', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=6079, - serialized_end=6172, -) - - -_CONTRASTIVELOSSPARAMETER = _descriptor.Descriptor( - name='ContrastiveLossParameter', - full_name='caffe.ContrastiveLossParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='margin', full_name='caffe.ContrastiveLossParameter.margin', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='legacy_version', full_name='caffe.ContrastiveLossParameter.legacy_version', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=6174, - serialized_end=6250, -) - - -_CONVOLUTIONPARAMETER = _descriptor.Descriptor( - name='ConvolutionParameter', - full_name='caffe.ConvolutionParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_output', full_name='caffe.ConvolutionParameter.num_output', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_term', full_name='caffe.ConvolutionParameter.bias_term', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pad', full_name='caffe.ConvolutionParameter.pad', index=2, - number=3, type=13, cpp_type=3, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='kernel_size', full_name='caffe.ConvolutionParameter.kernel_size', index=3, - number=4, type=13, cpp_type=3, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stride', full_name='caffe.ConvolutionParameter.stride', index=4, - number=6, type=13, cpp_type=3, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dilation', full_name='caffe.ConvolutionParameter.dilation', index=5, - number=18, type=13, cpp_type=3, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pad_h', full_name='caffe.ConvolutionParameter.pad_h', index=6, - number=9, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pad_w', full_name='caffe.ConvolutionParameter.pad_w', index=7, - number=10, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='kernel_h', full_name='caffe.ConvolutionParameter.kernel_h', index=8, - number=11, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='kernel_w', full_name='caffe.ConvolutionParameter.kernel_w', index=9, - number=12, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stride_h', full_name='caffe.ConvolutionParameter.stride_h', index=10, - number=13, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stride_w', full_name='caffe.ConvolutionParameter.stride_w', index=11, - number=14, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='group', full_name='caffe.ConvolutionParameter.group', index=12, - number=5, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weight_filler', full_name='caffe.ConvolutionParameter.weight_filler', index=13, - number=7, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_filler', full_name='caffe.ConvolutionParameter.bias_filler', index=14, - number=8, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='engine', full_name='caffe.ConvolutionParameter.engine', index=15, - number=15, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.ConvolutionParameter.axis', index=16, - number=16, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='force_nd_im2col', full_name='caffe.ConvolutionParameter.force_nd_im2col', index=17, - number=17, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _CONVOLUTIONPARAMETER_ENGINE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=6253, - serialized_end=6761, -) - - -_CROPPARAMETER = _descriptor.Descriptor( - name='CropParameter', - full_name='caffe.CropParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.CropParameter.axis', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=2, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='offset', full_name='caffe.CropParameter.offset', index=1, - number=2, type=13, cpp_type=3, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=6763, - serialized_end=6811, -) - - -_DATAPARAMETER = _descriptor.Descriptor( - name='DataParameter', - full_name='caffe.DataParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='source', full_name='caffe.DataParameter.source', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='batch_size', full_name='caffe.DataParameter.batch_size', index=1, - number=4, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='rand_skip', full_name='caffe.DataParameter.rand_skip', index=2, - number=7, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='backend', full_name='caffe.DataParameter.backend', index=3, - number=8, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='scale', full_name='caffe.DataParameter.scale', index=4, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mean_file', full_name='caffe.DataParameter.mean_file', index=5, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='crop_size', full_name='caffe.DataParameter.crop_size', index=6, - number=5, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mirror', full_name='caffe.DataParameter.mirror', index=7, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='force_encoded_color', full_name='caffe.DataParameter.force_encoded_color', index=8, - number=9, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='prefetch', full_name='caffe.DataParameter.prefetch', index=9, - number=10, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=4, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _DATAPARAMETER_DB, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=6814, - serialized_end=7106, -) - - -_DROPOUTPARAMETER = _descriptor.Descriptor( - name='DropoutParameter', - full_name='caffe.DropoutParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='dropout_ratio', full_name='caffe.DropoutParameter.dropout_ratio', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=7108, - serialized_end=7154, -) - - -_DUMMYDATAPARAMETER = _descriptor.Descriptor( - name='DummyDataParameter', - full_name='caffe.DummyDataParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='data_filler', full_name='caffe.DummyDataParameter.data_filler', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shape', full_name='caffe.DummyDataParameter.shape', index=1, - number=6, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num', full_name='caffe.DummyDataParameter.num', index=2, - number=2, type=13, cpp_type=3, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='channels', full_name='caffe.DummyDataParameter.channels', index=3, - number=3, type=13, cpp_type=3, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='height', full_name='caffe.DummyDataParameter.height', index=4, - number=4, type=13, cpp_type=3, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='width', full_name='caffe.DummyDataParameter.width', index=5, - number=5, type=13, cpp_type=3, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=7157, - serialized_end=7317, -) - - -_ELTWISEPARAMETER = _descriptor.Descriptor( - name='EltwiseParameter', - full_name='caffe.EltwiseParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='operation', full_name='caffe.EltwiseParameter.operation', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='coeff', full_name='caffe.EltwiseParameter.coeff', index=1, - number=2, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stable_prod_grad', full_name='caffe.EltwiseParameter.stable_prod_grad', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _ELTWISEPARAMETER_ELTWISEOP, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=7320, - serialized_end=7485, -) - - -_ELUPARAMETER = _descriptor.Descriptor( - name='ELUParameter', - full_name='caffe.ELUParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='alpha', full_name='caffe.ELUParameter.alpha', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=7487, - serialized_end=7519, -) - - -_EMBEDPARAMETER = _descriptor.Descriptor( - name='EmbedParameter', - full_name='caffe.EmbedParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_output', full_name='caffe.EmbedParameter.num_output', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='input_dim', full_name='caffe.EmbedParameter.input_dim', index=1, - number=2, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_term', full_name='caffe.EmbedParameter.bias_term', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weight_filler', full_name='caffe.EmbedParameter.weight_filler', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_filler', full_name='caffe.EmbedParameter.bias_filler', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=7522, - serialized_end=7694, -) - - -_EXPPARAMETER = _descriptor.Descriptor( - name='ExpParameter', - full_name='caffe.ExpParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='base', full_name='caffe.ExpParameter.base', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(-1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='scale', full_name='caffe.ExpParameter.scale', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shift', full_name='caffe.ExpParameter.shift', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=7696, - serialized_end=7764, -) - - -_FLATTENPARAMETER = _descriptor.Descriptor( - name='FlattenParameter', - full_name='caffe.FlattenParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.FlattenParameter.axis', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='end_axis', full_name='caffe.FlattenParameter.end_axis', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=-1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=7766, - serialized_end=7823, -) - - -_HDF5DATAPARAMETER = _descriptor.Descriptor( - name='HDF5DataParameter', - full_name='caffe.HDF5DataParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='source', full_name='caffe.HDF5DataParameter.source', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='batch_size', full_name='caffe.HDF5DataParameter.batch_size', index=1, - number=2, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shuffle', full_name='caffe.HDF5DataParameter.shuffle', index=2, - number=3, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=7825, - serialized_end=7904, -) - - -_HDF5OUTPUTPARAMETER = _descriptor.Descriptor( - name='HDF5OutputParameter', - full_name='caffe.HDF5OutputParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='file_name', full_name='caffe.HDF5OutputParameter.file_name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=7906, - serialized_end=7946, -) - - -_HINGELOSSPARAMETER = _descriptor.Descriptor( - name='HingeLossParameter', - full_name='caffe.HingeLossParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='norm', full_name='caffe.HingeLossParameter.norm', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _HINGELOSSPARAMETER_NORM, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=7948, - serialized_end=8042, -) - - -_IMAGEDATAPARAMETER = _descriptor.Descriptor( - name='ImageDataParameter', - full_name='caffe.ImageDataParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='source', full_name='caffe.ImageDataParameter.source', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='batch_size', full_name='caffe.ImageDataParameter.batch_size', index=1, - number=4, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='rand_skip', full_name='caffe.ImageDataParameter.rand_skip', index=2, - number=7, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shuffle', full_name='caffe.ImageDataParameter.shuffle', index=3, - number=8, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='new_height', full_name='caffe.ImageDataParameter.new_height', index=4, - number=9, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='new_width', full_name='caffe.ImageDataParameter.new_width', index=5, - number=10, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='is_color', full_name='caffe.ImageDataParameter.is_color', index=6, - number=11, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='scale', full_name='caffe.ImageDataParameter.scale', index=7, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mean_file', full_name='caffe.ImageDataParameter.mean_file', index=8, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='crop_size', full_name='caffe.ImageDataParameter.crop_size', index=9, - number=5, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mirror', full_name='caffe.ImageDataParameter.mirror', index=10, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='root_folder', full_name='caffe.ImageDataParameter.root_folder', index=11, - number=12, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=8045, - serialized_end=8324, -) - - -_INFOGAINLOSSPARAMETER = _descriptor.Descriptor( - name='InfogainLossParameter', - full_name='caffe.InfogainLossParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='source', full_name='caffe.InfogainLossParameter.source', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.InfogainLossParameter.axis', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=8326, - serialized_end=8382, -) - - -_INNERPRODUCTPARAMETER = _descriptor.Descriptor( - name='InnerProductParameter', - full_name='caffe.InnerProductParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_output', full_name='caffe.InnerProductParameter.num_output', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_term', full_name='caffe.InnerProductParameter.bias_term', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weight_filler', full_name='caffe.InnerProductParameter.weight_filler', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_filler', full_name='caffe.InnerProductParameter.bias_filler', index=3, - number=4, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.InnerProductParameter.axis', index=4, - number=5, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='transpose', full_name='caffe.InnerProductParameter.transpose', index=5, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=8385, - serialized_end=8588, -) - - -_INPUTPARAMETER = _descriptor.Descriptor( - name='InputParameter', - full_name='caffe.InputParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='shape', full_name='caffe.InputParameter.shape', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=8590, - serialized_end=8639, -) - - -_LOGPARAMETER = _descriptor.Descriptor( - name='LogParameter', - full_name='caffe.LogParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='base', full_name='caffe.LogParameter.base', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(-1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='scale', full_name='caffe.LogParameter.scale', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shift', full_name='caffe.LogParameter.shift', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=8641, - serialized_end=8709, -) - - -_LRNPARAMETER = _descriptor.Descriptor( - name='LRNParameter', - full_name='caffe.LRNParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='local_size', full_name='caffe.LRNParameter.local_size', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=5, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='alpha', full_name='caffe.LRNParameter.alpha', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='beta', full_name='caffe.LRNParameter.beta', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.75), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='norm_region', full_name='caffe.LRNParameter.norm_region', index=3, - number=4, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='k', full_name='caffe.LRNParameter.k', index=4, - number=5, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='engine', full_name='caffe.LRNParameter.engine', index=5, - number=6, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _LRNPARAMETER_NORMREGION, - _LRNPARAMETER_ENGINE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=8712, - serialized_end=9024, -) - - -_MEMORYDATAPARAMETER = _descriptor.Descriptor( - name='MemoryDataParameter', - full_name='caffe.MemoryDataParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='batch_size', full_name='caffe.MemoryDataParameter.batch_size', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='channels', full_name='caffe.MemoryDataParameter.channels', index=1, - number=2, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='height', full_name='caffe.MemoryDataParameter.height', index=2, - number=3, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='width', full_name='caffe.MemoryDataParameter.width', index=3, - number=4, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=9026, - serialized_end=9116, -) - - -_MVNPARAMETER = _descriptor.Descriptor( - name='MVNParameter', - full_name='caffe.MVNParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='normalize_variance', full_name='caffe.MVNParameter.normalize_variance', index=0, - number=1, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='across_channels', full_name='caffe.MVNParameter.across_channels', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='eps', full_name='caffe.MVNParameter.eps', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1e-09), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=9118, - serialized_end=9218, -) - - -_PARAMETERPARAMETER = _descriptor.Descriptor( - name='ParameterParameter', - full_name='caffe.ParameterParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='shape', full_name='caffe.ParameterParameter.shape', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=9220, - serialized_end=9273, -) - - -_POOLINGPARAMETER = _descriptor.Descriptor( - name='PoolingParameter', - full_name='caffe.PoolingParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='pool', full_name='caffe.PoolingParameter.pool', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pad', full_name='caffe.PoolingParameter.pad', index=1, - number=4, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pad_h', full_name='caffe.PoolingParameter.pad_h', index=2, - number=9, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pad_w', full_name='caffe.PoolingParameter.pad_w', index=3, - number=10, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='kernel_size', full_name='caffe.PoolingParameter.kernel_size', index=4, - number=2, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='kernel_h', full_name='caffe.PoolingParameter.kernel_h', index=5, - number=5, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='kernel_w', full_name='caffe.PoolingParameter.kernel_w', index=6, - number=6, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stride', full_name='caffe.PoolingParameter.stride', index=7, - number=3, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stride_h', full_name='caffe.PoolingParameter.stride_h', index=8, - number=7, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stride_w', full_name='caffe.PoolingParameter.stride_w', index=9, - number=8, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='engine', full_name='caffe.PoolingParameter.engine', index=10, - number=11, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='global_pooling', full_name='caffe.PoolingParameter.global_pooling', index=11, - number=12, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _POOLINGPARAMETER_POOLMETHOD, - _POOLINGPARAMETER_ENGINE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=9276, - serialized_end=9694, -) - - -_POWERPARAMETER = _descriptor.Descriptor( - name='PowerParameter', - full_name='caffe.PowerParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='power', full_name='caffe.PowerParameter.power', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='scale', full_name='caffe.PowerParameter.scale', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shift', full_name='caffe.PowerParameter.shift', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=9696, - serialized_end=9766, -) - - -_PYTHONPARAMETER = _descriptor.Descriptor( - name='PythonParameter', - full_name='caffe.PythonParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='module', full_name='caffe.PythonParameter.module', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='layer', full_name='caffe.PythonParameter.layer', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='param_str', full_name='caffe.PythonParameter.param_str', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='share_in_parallel', full_name='caffe.PythonParameter.share_in_parallel', index=3, - number=4, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=9768, - serialized_end=9871, -) - - -_RECURRENTPARAMETER = _descriptor.Descriptor( - name='RecurrentParameter', - full_name='caffe.RecurrentParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='num_output', full_name='caffe.RecurrentParameter.num_output', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weight_filler', full_name='caffe.RecurrentParameter.weight_filler', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_filler', full_name='caffe.RecurrentParameter.bias_filler', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='debug_info', full_name='caffe.RecurrentParameter.debug_info', index=3, - number=4, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='expose_hidden', full_name='caffe.RecurrentParameter.expose_hidden', index=4, - number=5, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=9874, - serialized_end=10066, -) - - -_REDUCTIONPARAMETER = _descriptor.Descriptor( - name='ReductionParameter', - full_name='caffe.ReductionParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='operation', full_name='caffe.ReductionParameter.operation', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.ReductionParameter.axis', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='coeff', full_name='caffe.ReductionParameter.coeff', index=2, - number=3, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _REDUCTIONPARAMETER_REDUCTIONOP, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=10069, - serialized_end=10242, -) - - -_RELUPARAMETER = _descriptor.Descriptor( - name='ReLUParameter', - full_name='caffe.ReLUParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='negative_slope', full_name='caffe.ReLUParameter.negative_slope', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='engine', full_name='caffe.ReLUParameter.engine', index=1, - number=2, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _RELUPARAMETER_ENGINE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=10245, - serialized_end=10386, -) - - -_RESHAPEPARAMETER = _descriptor.Descriptor( - name='ReshapeParameter', - full_name='caffe.ReshapeParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='shape', full_name='caffe.ReshapeParameter.shape', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.ReshapeParameter.axis', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_axes', full_name='caffe.ReshapeParameter.num_axes', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=-1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=10388, - serialized_end=10478, -) - - -_SCALEPARAMETER = _descriptor.Descriptor( - name='ScaleParameter', - full_name='caffe.ScaleParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.ScaleParameter.axis', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_axes', full_name='caffe.ScaleParameter.num_axes', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='filler', full_name='caffe.ScaleParameter.filler', index=2, - number=3, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_term', full_name='caffe.ScaleParameter.bias_term', index=3, - number=4, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_filler', full_name='caffe.ScaleParameter.bias_filler', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=10481, - serialized_end=10646, -) - - -_SIGMOIDPARAMETER = _descriptor.Descriptor( - name='SigmoidParameter', - full_name='caffe.SigmoidParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='engine', full_name='caffe.SigmoidParameter.engine', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _SIGMOIDPARAMETER_ENGINE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=10648, - serialized_end=10768, -) - - -_SLICEPARAMETER = _descriptor.Descriptor( - name='SliceParameter', - full_name='caffe.SliceParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.SliceParameter.axis', index=0, - number=3, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='slice_point', full_name='caffe.SliceParameter.slice_point', index=1, - number=2, type=13, cpp_type=3, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='slice_dim', full_name='caffe.SliceParameter.slice_dim', index=2, - number=1, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=10770, - serialized_end=10846, -) - - -_SOFTMAXPARAMETER = _descriptor.Descriptor( - name='SoftmaxParameter', - full_name='caffe.SoftmaxParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='engine', full_name='caffe.SoftmaxParameter.engine', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.SoftmaxParameter.axis', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _SOFTMAXPARAMETER_ENGINE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=10849, - serialized_end=10986, -) - - -_TANHPARAMETER = _descriptor.Descriptor( - name='TanHParameter', - full_name='caffe.TanHParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='engine', full_name='caffe.TanHParameter.engine', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _TANHPARAMETER_ENGINE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=10988, - serialized_end=11102, -) - - -_TILEPARAMETER = _descriptor.Descriptor( - name='TileParameter', - full_name='caffe.TileParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='axis', full_name='caffe.TileParameter.axis', index=0, - number=1, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='tiles', full_name='caffe.TileParameter.tiles', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=11104, - serialized_end=11151, -) - - -_THRESHOLDPARAMETER = _descriptor.Descriptor( - name='ThresholdParameter', - full_name='caffe.ThresholdParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='threshold', full_name='caffe.ThresholdParameter.threshold', index=0, - number=1, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=11153, - serialized_end=11195, -) - - -_WINDOWDATAPARAMETER = _descriptor.Descriptor( - name='WindowDataParameter', - full_name='caffe.WindowDataParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='source', full_name='caffe.WindowDataParameter.source', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='scale', full_name='caffe.WindowDataParameter.scale', index=1, - number=2, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mean_file', full_name='caffe.WindowDataParameter.mean_file', index=2, - number=3, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='batch_size', full_name='caffe.WindowDataParameter.batch_size', index=3, - number=4, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='crop_size', full_name='caffe.WindowDataParameter.crop_size', index=4, - number=5, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mirror', full_name='caffe.WindowDataParameter.mirror', index=5, - number=6, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='fg_threshold', full_name='caffe.WindowDataParameter.fg_threshold', index=6, - number=7, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bg_threshold', full_name='caffe.WindowDataParameter.bg_threshold', index=7, - number=8, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='fg_fraction', full_name='caffe.WindowDataParameter.fg_fraction', index=8, - number=9, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.25), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='context_pad', full_name='caffe.WindowDataParameter.context_pad', index=9, - number=10, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='crop_mode', full_name='caffe.WindowDataParameter.crop_mode', index=10, - number=11, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("warp").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='cache_images', full_name='caffe.WindowDataParameter.cache_images', index=11, - number=12, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='root_folder', full_name='caffe.WindowDataParameter.root_folder', index=12, - number=13, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=11198, - serialized_end=11519, -) - - -_SPPPARAMETER = _descriptor.Descriptor( - name='SPPParameter', - full_name='caffe.SPPParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='pyramid_height', full_name='caffe.SPPParameter.pyramid_height', index=0, - number=1, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pool', full_name='caffe.SPPParameter.pool', index=1, - number=2, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='engine', full_name='caffe.SPPParameter.engine', index=2, - number=6, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _SPPPARAMETER_POOLMETHOD, - _SPPPARAMETER_ENGINE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=11522, - serialized_end=11757, -) - - -_V1LAYERPARAMETER = _descriptor.Descriptor( - name='V1LayerParameter', - full_name='caffe.V1LayerParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='bottom', full_name='caffe.V1LayerParameter.bottom', index=0, - number=2, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='top', full_name='caffe.V1LayerParameter.top', index=1, - number=3, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='name', full_name='caffe.V1LayerParameter.name', index=2, - number=4, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='include', full_name='caffe.V1LayerParameter.include', index=3, - number=32, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='exclude', full_name='caffe.V1LayerParameter.exclude', index=4, - number=33, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', full_name='caffe.V1LayerParameter.type', index=5, - number=5, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='blobs', full_name='caffe.V1LayerParameter.blobs', index=6, - number=6, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='param', full_name='caffe.V1LayerParameter.param', index=7, - number=1001, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='blob_share_mode', full_name='caffe.V1LayerParameter.blob_share_mode', index=8, - number=1002, type=14, cpp_type=8, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='blobs_lr', full_name='caffe.V1LayerParameter.blobs_lr', index=9, - number=7, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weight_decay', full_name='caffe.V1LayerParameter.weight_decay', index=10, - number=8, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='loss_weight', full_name='caffe.V1LayerParameter.loss_weight', index=11, - number=35, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='accuracy_param', full_name='caffe.V1LayerParameter.accuracy_param', index=12, - number=27, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='argmax_param', full_name='caffe.V1LayerParameter.argmax_param', index=13, - number=23, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='concat_param', full_name='caffe.V1LayerParameter.concat_param', index=14, - number=9, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='contrastive_loss_param', full_name='caffe.V1LayerParameter.contrastive_loss_param', index=15, - number=40, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='convolution_param', full_name='caffe.V1LayerParameter.convolution_param', index=16, - number=10, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='data_param', full_name='caffe.V1LayerParameter.data_param', index=17, - number=11, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dropout_param', full_name='caffe.V1LayerParameter.dropout_param', index=18, - number=12, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dummy_data_param', full_name='caffe.V1LayerParameter.dummy_data_param', index=19, - number=26, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='eltwise_param', full_name='caffe.V1LayerParameter.eltwise_param', index=20, - number=24, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='exp_param', full_name='caffe.V1LayerParameter.exp_param', index=21, - number=41, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='hdf5_data_param', full_name='caffe.V1LayerParameter.hdf5_data_param', index=22, - number=13, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='hdf5_output_param', full_name='caffe.V1LayerParameter.hdf5_output_param', index=23, - number=14, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='hinge_loss_param', full_name='caffe.V1LayerParameter.hinge_loss_param', index=24, - number=29, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='image_data_param', full_name='caffe.V1LayerParameter.image_data_param', index=25, - number=15, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='infogain_loss_param', full_name='caffe.V1LayerParameter.infogain_loss_param', index=26, - number=16, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='inner_product_param', full_name='caffe.V1LayerParameter.inner_product_param', index=27, - number=17, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='lrn_param', full_name='caffe.V1LayerParameter.lrn_param', index=28, - number=18, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='memory_data_param', full_name='caffe.V1LayerParameter.memory_data_param', index=29, - number=22, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mvn_param', full_name='caffe.V1LayerParameter.mvn_param', index=30, - number=34, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pooling_param', full_name='caffe.V1LayerParameter.pooling_param', index=31, - number=19, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='power_param', full_name='caffe.V1LayerParameter.power_param', index=32, - number=21, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='relu_param', full_name='caffe.V1LayerParameter.relu_param', index=33, - number=30, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='sigmoid_param', full_name='caffe.V1LayerParameter.sigmoid_param', index=34, - number=38, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='softmax_param', full_name='caffe.V1LayerParameter.softmax_param', index=35, - number=39, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='slice_param', full_name='caffe.V1LayerParameter.slice_param', index=36, - number=31, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='tanh_param', full_name='caffe.V1LayerParameter.tanh_param', index=37, - number=37, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='threshold_param', full_name='caffe.V1LayerParameter.threshold_param', index=38, - number=25, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='window_data_param', full_name='caffe.V1LayerParameter.window_data_param', index=39, - number=20, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='transform_param', full_name='caffe.V1LayerParameter.transform_param', index=40, - number=36, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='loss_param', full_name='caffe.V1LayerParameter.loss_param', index=41, - number=42, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='layer', full_name='caffe.V1LayerParameter.layer', index=42, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _V1LAYERPARAMETER_LAYERTYPE, - _V1LAYERPARAMETER_DIMCHECKMODE, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=11760, - serialized_end=14288, -) - - -_V0LAYERPARAMETER = _descriptor.Descriptor( - name='V0LayerParameter', - full_name='caffe.V0LayerParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='caffe.V0LayerParameter.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', full_name='caffe.V0LayerParameter.type', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='num_output', full_name='caffe.V0LayerParameter.num_output', index=2, - number=3, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='biasterm', full_name='caffe.V0LayerParameter.biasterm', index=3, - number=4, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=True, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weight_filler', full_name='caffe.V0LayerParameter.weight_filler', index=4, - number=5, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='bias_filler', full_name='caffe.V0LayerParameter.bias_filler', index=5, - number=6, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pad', full_name='caffe.V0LayerParameter.pad', index=6, - number=7, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='kernelsize', full_name='caffe.V0LayerParameter.kernelsize', index=7, - number=8, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='group', full_name='caffe.V0LayerParameter.group', index=8, - number=9, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='stride', full_name='caffe.V0LayerParameter.stride', index=9, - number=10, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='pool', full_name='caffe.V0LayerParameter.pool', index=10, - number=11, type=14, cpp_type=8, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='dropout_ratio', full_name='caffe.V0LayerParameter.dropout_ratio', index=11, - number=12, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='local_size', full_name='caffe.V0LayerParameter.local_size', index=12, - number=13, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=5, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='alpha', full_name='caffe.V0LayerParameter.alpha', index=13, - number=14, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='beta', full_name='caffe.V0LayerParameter.beta', index=14, - number=15, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.75), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='k', full_name='caffe.V0LayerParameter.k', index=15, - number=22, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='source', full_name='caffe.V0LayerParameter.source', index=16, - number=16, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='scale', full_name='caffe.V0LayerParameter.scale', index=17, - number=17, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(1), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='meanfile', full_name='caffe.V0LayerParameter.meanfile', index=18, - number=18, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='batchsize', full_name='caffe.V0LayerParameter.batchsize', index=19, - number=19, type=13, cpp_type=3, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='cropsize', full_name='caffe.V0LayerParameter.cropsize', index=20, - number=20, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='mirror', full_name='caffe.V0LayerParameter.mirror', index=21, - number=21, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='blobs', full_name='caffe.V0LayerParameter.blobs', index=22, - number=50, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='blobs_lr', full_name='caffe.V0LayerParameter.blobs_lr', index=23, - number=51, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='weight_decay', full_name='caffe.V0LayerParameter.weight_decay', index=24, - number=52, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='rand_skip', full_name='caffe.V0LayerParameter.rand_skip', index=25, - number=53, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='det_fg_threshold', full_name='caffe.V0LayerParameter.det_fg_threshold', index=26, - number=54, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='det_bg_threshold', full_name='caffe.V0LayerParameter.det_bg_threshold', index=27, - number=55, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.5), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='det_fg_fraction', full_name='caffe.V0LayerParameter.det_fg_fraction', index=28, - number=56, type=2, cpp_type=6, label=1, - has_default_value=True, default_value=float(0.25), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='det_context_pad', full_name='caffe.V0LayerParameter.det_context_pad', index=29, - number=58, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='det_crop_mode', full_name='caffe.V0LayerParameter.det_crop_mode', index=30, - number=59, type=9, cpp_type=9, label=1, - has_default_value=True, default_value=_b("warp").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='new_num', full_name='caffe.V0LayerParameter.new_num', index=31, - number=60, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='new_channels', full_name='caffe.V0LayerParameter.new_channels', index=32, - number=61, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='new_height', full_name='caffe.V0LayerParameter.new_height', index=33, - number=62, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='new_width', full_name='caffe.V0LayerParameter.new_width', index=34, - number=63, type=5, cpp_type=1, label=1, - has_default_value=True, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shuffle_images', full_name='caffe.V0LayerParameter.shuffle_images', index=35, - number=64, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='concat_dim', full_name='caffe.V0LayerParameter.concat_dim', index=36, - number=65, type=13, cpp_type=3, label=1, - has_default_value=True, default_value=1, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='hdf5_output_param', full_name='caffe.V0LayerParameter.hdf5_output_param', index=37, - number=1001, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - _V0LAYERPARAMETER_POOLMETHOD, - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=14291, - serialized_end=15312, -) - - -_PRELUPARAMETER = _descriptor.Descriptor( - name='PReLUParameter', - full_name='caffe.PReLUParameter', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='filler', full_name='caffe.PReLUParameter.filler', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='channel_shared', full_name='caffe.PReLUParameter.channel_shared', index=1, - number=2, type=8, cpp_type=7, label=1, - has_default_value=True, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto2', - extension_ranges=[], - oneofs=[ - ], - serialized_start=15314, - serialized_end=15401, -) - -_BLOBPROTO.fields_by_name['shape'].message_type = _BLOBSHAPE -_BLOBPROTOVECTOR.fields_by_name['blobs'].message_type = _BLOBPROTO -_FILLERPARAMETER.fields_by_name['variance_norm'].enum_type = _FILLERPARAMETER_VARIANCENORM -_FILLERPARAMETER_VARIANCENORM.containing_type = _FILLERPARAMETER -_NETPARAMETER.fields_by_name['input_shape'].message_type = _BLOBSHAPE -_NETPARAMETER.fields_by_name['state'].message_type = _NETSTATE -_NETPARAMETER.fields_by_name['layer'].message_type = _LAYERPARAMETER -_NETPARAMETER.fields_by_name['layers'].message_type = _V1LAYERPARAMETER -_SOLVERPARAMETER.fields_by_name['net_param'].message_type = _NETPARAMETER -_SOLVERPARAMETER.fields_by_name['train_net_param'].message_type = _NETPARAMETER -_SOLVERPARAMETER.fields_by_name['test_net_param'].message_type = _NETPARAMETER -_SOLVERPARAMETER.fields_by_name['train_state'].message_type = _NETSTATE -_SOLVERPARAMETER.fields_by_name['test_state'].message_type = _NETSTATE -_SOLVERPARAMETER.fields_by_name['snapshot_format'].enum_type = _SOLVERPARAMETER_SNAPSHOTFORMAT -_SOLVERPARAMETER.fields_by_name['solver_mode'].enum_type = _SOLVERPARAMETER_SOLVERMODE -_SOLVERPARAMETER.fields_by_name['solver_type'].enum_type = _SOLVERPARAMETER_SOLVERTYPE -_SOLVERPARAMETER_SNAPSHOTFORMAT.containing_type = _SOLVERPARAMETER -_SOLVERPARAMETER_SOLVERMODE.containing_type = _SOLVERPARAMETER -_SOLVERPARAMETER_SOLVERTYPE.containing_type = _SOLVERPARAMETER -_SOLVERSTATE.fields_by_name['history'].message_type = _BLOBPROTO -_NETSTATE.fields_by_name['phase'].enum_type = _PHASE -_NETSTATERULE.fields_by_name['phase'].enum_type = _PHASE -_PARAMSPEC.fields_by_name['share_mode'].enum_type = _PARAMSPEC_DIMCHECKMODE -_PARAMSPEC_DIMCHECKMODE.containing_type = _PARAMSPEC -_LAYERPARAMETER.fields_by_name['phase'].enum_type = _PHASE -_LAYERPARAMETER.fields_by_name['param'].message_type = _PARAMSPEC -_LAYERPARAMETER.fields_by_name['blobs'].message_type = _BLOBPROTO -_LAYERPARAMETER.fields_by_name['include'].message_type = _NETSTATERULE -_LAYERPARAMETER.fields_by_name['exclude'].message_type = _NETSTATERULE -_LAYERPARAMETER.fields_by_name['transform_param'].message_type = _TRANSFORMATIONPARAMETER -_LAYERPARAMETER.fields_by_name['loss_param'].message_type = _LOSSPARAMETER -_LAYERPARAMETER.fields_by_name['accuracy_param'].message_type = _ACCURACYPARAMETER -_LAYERPARAMETER.fields_by_name['argmax_param'].message_type = _ARGMAXPARAMETER -_LAYERPARAMETER.fields_by_name['batch_norm_param'].message_type = _BATCHNORMPARAMETER -_LAYERPARAMETER.fields_by_name['bias_param'].message_type = _BIASPARAMETER -_LAYERPARAMETER.fields_by_name['concat_param'].message_type = _CONCATPARAMETER -_LAYERPARAMETER.fields_by_name['contrastive_loss_param'].message_type = _CONTRASTIVELOSSPARAMETER -_LAYERPARAMETER.fields_by_name['convolution_param'].message_type = _CONVOLUTIONPARAMETER -_LAYERPARAMETER.fields_by_name['crop_param'].message_type = _CROPPARAMETER -_LAYERPARAMETER.fields_by_name['data_param'].message_type = _DATAPARAMETER -_LAYERPARAMETER.fields_by_name['dropout_param'].message_type = _DROPOUTPARAMETER -_LAYERPARAMETER.fields_by_name['dummy_data_param'].message_type = _DUMMYDATAPARAMETER -_LAYERPARAMETER.fields_by_name['eltwise_param'].message_type = _ELTWISEPARAMETER -_LAYERPARAMETER.fields_by_name['elu_param'].message_type = _ELUPARAMETER -_LAYERPARAMETER.fields_by_name['embed_param'].message_type = _EMBEDPARAMETER -_LAYERPARAMETER.fields_by_name['exp_param'].message_type = _EXPPARAMETER -_LAYERPARAMETER.fields_by_name['flatten_param'].message_type = _FLATTENPARAMETER -_LAYERPARAMETER.fields_by_name['hdf5_data_param'].message_type = _HDF5DATAPARAMETER -_LAYERPARAMETER.fields_by_name['hdf5_output_param'].message_type = _HDF5OUTPUTPARAMETER -_LAYERPARAMETER.fields_by_name['hinge_loss_param'].message_type = _HINGELOSSPARAMETER -_LAYERPARAMETER.fields_by_name['image_data_param'].message_type = _IMAGEDATAPARAMETER -_LAYERPARAMETER.fields_by_name['infogain_loss_param'].message_type = _INFOGAINLOSSPARAMETER -_LAYERPARAMETER.fields_by_name['inner_product_param'].message_type = _INNERPRODUCTPARAMETER -_LAYERPARAMETER.fields_by_name['input_param'].message_type = _INPUTPARAMETER -_LAYERPARAMETER.fields_by_name['log_param'].message_type = _LOGPARAMETER -_LAYERPARAMETER.fields_by_name['lrn_param'].message_type = _LRNPARAMETER -_LAYERPARAMETER.fields_by_name['memory_data_param'].message_type = _MEMORYDATAPARAMETER -_LAYERPARAMETER.fields_by_name['mvn_param'].message_type = _MVNPARAMETER -_LAYERPARAMETER.fields_by_name['parameter_param'].message_type = _PARAMETERPARAMETER -_LAYERPARAMETER.fields_by_name['pooling_param'].message_type = _POOLINGPARAMETER -_LAYERPARAMETER.fields_by_name['power_param'].message_type = _POWERPARAMETER -_LAYERPARAMETER.fields_by_name['prelu_param'].message_type = _PRELUPARAMETER -_LAYERPARAMETER.fields_by_name['python_param'].message_type = _PYTHONPARAMETER -_LAYERPARAMETER.fields_by_name['recurrent_param'].message_type = _RECURRENTPARAMETER -_LAYERPARAMETER.fields_by_name['reduction_param'].message_type = _REDUCTIONPARAMETER -_LAYERPARAMETER.fields_by_name['relu_param'].message_type = _RELUPARAMETER -_LAYERPARAMETER.fields_by_name['reshape_param'].message_type = _RESHAPEPARAMETER -_LAYERPARAMETER.fields_by_name['scale_param'].message_type = _SCALEPARAMETER -_LAYERPARAMETER.fields_by_name['sigmoid_param'].message_type = _SIGMOIDPARAMETER -_LAYERPARAMETER.fields_by_name['softmax_param'].message_type = _SOFTMAXPARAMETER -_LAYERPARAMETER.fields_by_name['spp_param'].message_type = _SPPPARAMETER -_LAYERPARAMETER.fields_by_name['slice_param'].message_type = _SLICEPARAMETER -_LAYERPARAMETER.fields_by_name['tanh_param'].message_type = _TANHPARAMETER -_LAYERPARAMETER.fields_by_name['threshold_param'].message_type = _THRESHOLDPARAMETER -_LAYERPARAMETER.fields_by_name['tile_param'].message_type = _TILEPARAMETER -_LAYERPARAMETER.fields_by_name['window_data_param'].message_type = _WINDOWDATAPARAMETER -_LOSSPARAMETER.fields_by_name['normalization'].enum_type = _LOSSPARAMETER_NORMALIZATIONMODE -_LOSSPARAMETER_NORMALIZATIONMODE.containing_type = _LOSSPARAMETER -_BIASPARAMETER.fields_by_name['filler'].message_type = _FILLERPARAMETER -_CONVOLUTIONPARAMETER.fields_by_name['weight_filler'].message_type = _FILLERPARAMETER -_CONVOLUTIONPARAMETER.fields_by_name['bias_filler'].message_type = _FILLERPARAMETER -_CONVOLUTIONPARAMETER.fields_by_name['engine'].enum_type = _CONVOLUTIONPARAMETER_ENGINE -_CONVOLUTIONPARAMETER_ENGINE.containing_type = _CONVOLUTIONPARAMETER -_DATAPARAMETER.fields_by_name['backend'].enum_type = _DATAPARAMETER_DB -_DATAPARAMETER_DB.containing_type = _DATAPARAMETER -_DUMMYDATAPARAMETER.fields_by_name['data_filler'].message_type = _FILLERPARAMETER -_DUMMYDATAPARAMETER.fields_by_name['shape'].message_type = _BLOBSHAPE -_ELTWISEPARAMETER.fields_by_name['operation'].enum_type = _ELTWISEPARAMETER_ELTWISEOP -_ELTWISEPARAMETER_ELTWISEOP.containing_type = _ELTWISEPARAMETER -_EMBEDPARAMETER.fields_by_name['weight_filler'].message_type = _FILLERPARAMETER -_EMBEDPARAMETER.fields_by_name['bias_filler'].message_type = _FILLERPARAMETER -_HINGELOSSPARAMETER.fields_by_name['norm'].enum_type = _HINGELOSSPARAMETER_NORM -_HINGELOSSPARAMETER_NORM.containing_type = _HINGELOSSPARAMETER -_INNERPRODUCTPARAMETER.fields_by_name['weight_filler'].message_type = _FILLERPARAMETER -_INNERPRODUCTPARAMETER.fields_by_name['bias_filler'].message_type = _FILLERPARAMETER -_INPUTPARAMETER.fields_by_name['shape'].message_type = _BLOBSHAPE -_LRNPARAMETER.fields_by_name['norm_region'].enum_type = _LRNPARAMETER_NORMREGION -_LRNPARAMETER.fields_by_name['engine'].enum_type = _LRNPARAMETER_ENGINE -_LRNPARAMETER_NORMREGION.containing_type = _LRNPARAMETER -_LRNPARAMETER_ENGINE.containing_type = _LRNPARAMETER -_PARAMETERPARAMETER.fields_by_name['shape'].message_type = _BLOBSHAPE -_POOLINGPARAMETER.fields_by_name['pool'].enum_type = _POOLINGPARAMETER_POOLMETHOD -_POOLINGPARAMETER.fields_by_name['engine'].enum_type = _POOLINGPARAMETER_ENGINE -_POOLINGPARAMETER_POOLMETHOD.containing_type = _POOLINGPARAMETER -_POOLINGPARAMETER_ENGINE.containing_type = _POOLINGPARAMETER -_RECURRENTPARAMETER.fields_by_name['weight_filler'].message_type = _FILLERPARAMETER -_RECURRENTPARAMETER.fields_by_name['bias_filler'].message_type = _FILLERPARAMETER -_REDUCTIONPARAMETER.fields_by_name['operation'].enum_type = _REDUCTIONPARAMETER_REDUCTIONOP -_REDUCTIONPARAMETER_REDUCTIONOP.containing_type = _REDUCTIONPARAMETER -_RELUPARAMETER.fields_by_name['engine'].enum_type = _RELUPARAMETER_ENGINE -_RELUPARAMETER_ENGINE.containing_type = _RELUPARAMETER -_RESHAPEPARAMETER.fields_by_name['shape'].message_type = _BLOBSHAPE -_SCALEPARAMETER.fields_by_name['filler'].message_type = _FILLERPARAMETER -_SCALEPARAMETER.fields_by_name['bias_filler'].message_type = _FILLERPARAMETER -_SIGMOIDPARAMETER.fields_by_name['engine'].enum_type = _SIGMOIDPARAMETER_ENGINE -_SIGMOIDPARAMETER_ENGINE.containing_type = _SIGMOIDPARAMETER -_SOFTMAXPARAMETER.fields_by_name['engine'].enum_type = _SOFTMAXPARAMETER_ENGINE -_SOFTMAXPARAMETER_ENGINE.containing_type = _SOFTMAXPARAMETER -_TANHPARAMETER.fields_by_name['engine'].enum_type = _TANHPARAMETER_ENGINE -_TANHPARAMETER_ENGINE.containing_type = _TANHPARAMETER -_SPPPARAMETER.fields_by_name['pool'].enum_type = _SPPPARAMETER_POOLMETHOD -_SPPPARAMETER.fields_by_name['engine'].enum_type = _SPPPARAMETER_ENGINE -_SPPPARAMETER_POOLMETHOD.containing_type = _SPPPARAMETER -_SPPPARAMETER_ENGINE.containing_type = _SPPPARAMETER -_V1LAYERPARAMETER.fields_by_name['include'].message_type = _NETSTATERULE -_V1LAYERPARAMETER.fields_by_name['exclude'].message_type = _NETSTATERULE -_V1LAYERPARAMETER.fields_by_name['type'].enum_type = _V1LAYERPARAMETER_LAYERTYPE -_V1LAYERPARAMETER.fields_by_name['blobs'].message_type = _BLOBPROTO -_V1LAYERPARAMETER.fields_by_name['blob_share_mode'].enum_type = _V1LAYERPARAMETER_DIMCHECKMODE -_V1LAYERPARAMETER.fields_by_name['accuracy_param'].message_type = _ACCURACYPARAMETER -_V1LAYERPARAMETER.fields_by_name['argmax_param'].message_type = _ARGMAXPARAMETER -_V1LAYERPARAMETER.fields_by_name['concat_param'].message_type = _CONCATPARAMETER -_V1LAYERPARAMETER.fields_by_name['contrastive_loss_param'].message_type = _CONTRASTIVELOSSPARAMETER -_V1LAYERPARAMETER.fields_by_name['convolution_param'].message_type = _CONVOLUTIONPARAMETER -_V1LAYERPARAMETER.fields_by_name['data_param'].message_type = _DATAPARAMETER -_V1LAYERPARAMETER.fields_by_name['dropout_param'].message_type = _DROPOUTPARAMETER -_V1LAYERPARAMETER.fields_by_name['dummy_data_param'].message_type = _DUMMYDATAPARAMETER -_V1LAYERPARAMETER.fields_by_name['eltwise_param'].message_type = _ELTWISEPARAMETER -_V1LAYERPARAMETER.fields_by_name['exp_param'].message_type = _EXPPARAMETER -_V1LAYERPARAMETER.fields_by_name['hdf5_data_param'].message_type = _HDF5DATAPARAMETER -_V1LAYERPARAMETER.fields_by_name['hdf5_output_param'].message_type = _HDF5OUTPUTPARAMETER -_V1LAYERPARAMETER.fields_by_name['hinge_loss_param'].message_type = _HINGELOSSPARAMETER -_V1LAYERPARAMETER.fields_by_name['image_data_param'].message_type = _IMAGEDATAPARAMETER -_V1LAYERPARAMETER.fields_by_name['infogain_loss_param'].message_type = _INFOGAINLOSSPARAMETER -_V1LAYERPARAMETER.fields_by_name['inner_product_param'].message_type = _INNERPRODUCTPARAMETER -_V1LAYERPARAMETER.fields_by_name['lrn_param'].message_type = _LRNPARAMETER -_V1LAYERPARAMETER.fields_by_name['memory_data_param'].message_type = _MEMORYDATAPARAMETER -_V1LAYERPARAMETER.fields_by_name['mvn_param'].message_type = _MVNPARAMETER -_V1LAYERPARAMETER.fields_by_name['pooling_param'].message_type = _POOLINGPARAMETER -_V1LAYERPARAMETER.fields_by_name['power_param'].message_type = _POWERPARAMETER -_V1LAYERPARAMETER.fields_by_name['relu_param'].message_type = _RELUPARAMETER -_V1LAYERPARAMETER.fields_by_name['sigmoid_param'].message_type = _SIGMOIDPARAMETER -_V1LAYERPARAMETER.fields_by_name['softmax_param'].message_type = _SOFTMAXPARAMETER -_V1LAYERPARAMETER.fields_by_name['slice_param'].message_type = _SLICEPARAMETER -_V1LAYERPARAMETER.fields_by_name['tanh_param'].message_type = _TANHPARAMETER -_V1LAYERPARAMETER.fields_by_name['threshold_param'].message_type = _THRESHOLDPARAMETER -_V1LAYERPARAMETER.fields_by_name['window_data_param'].message_type = _WINDOWDATAPARAMETER -_V1LAYERPARAMETER.fields_by_name['transform_param'].message_type = _TRANSFORMATIONPARAMETER -_V1LAYERPARAMETER.fields_by_name['loss_param'].message_type = _LOSSPARAMETER -_V1LAYERPARAMETER.fields_by_name['layer'].message_type = _V0LAYERPARAMETER -_V1LAYERPARAMETER_LAYERTYPE.containing_type = _V1LAYERPARAMETER -_V1LAYERPARAMETER_DIMCHECKMODE.containing_type = _V1LAYERPARAMETER -_V0LAYERPARAMETER.fields_by_name['weight_filler'].message_type = _FILLERPARAMETER -_V0LAYERPARAMETER.fields_by_name['bias_filler'].message_type = _FILLERPARAMETER -_V0LAYERPARAMETER.fields_by_name['pool'].enum_type = _V0LAYERPARAMETER_POOLMETHOD -_V0LAYERPARAMETER.fields_by_name['blobs'].message_type = _BLOBPROTO -_V0LAYERPARAMETER.fields_by_name['hdf5_output_param'].message_type = _HDF5OUTPUTPARAMETER -_V0LAYERPARAMETER_POOLMETHOD.containing_type = _V0LAYERPARAMETER -_PRELUPARAMETER.fields_by_name['filler'].message_type = _FILLERPARAMETER -DESCRIPTOR.message_types_by_name['BlobShape'] = _BLOBSHAPE -DESCRIPTOR.message_types_by_name['BlobProto'] = _BLOBPROTO -DESCRIPTOR.message_types_by_name['BlobProtoVector'] = _BLOBPROTOVECTOR -DESCRIPTOR.message_types_by_name['Datum'] = _DATUM -DESCRIPTOR.message_types_by_name['FillerParameter'] = _FILLERPARAMETER -DESCRIPTOR.message_types_by_name['NetParameter'] = _NETPARAMETER -DESCRIPTOR.message_types_by_name['SolverParameter'] = _SOLVERPARAMETER -DESCRIPTOR.message_types_by_name['SolverState'] = _SOLVERSTATE -DESCRIPTOR.message_types_by_name['NetState'] = _NETSTATE -DESCRIPTOR.message_types_by_name['NetStateRule'] = _NETSTATERULE -DESCRIPTOR.message_types_by_name['ParamSpec'] = _PARAMSPEC -DESCRIPTOR.message_types_by_name['LayerParameter'] = _LAYERPARAMETER -DESCRIPTOR.message_types_by_name['TransformationParameter'] = _TRANSFORMATIONPARAMETER -DESCRIPTOR.message_types_by_name['LossParameter'] = _LOSSPARAMETER -DESCRIPTOR.message_types_by_name['AccuracyParameter'] = _ACCURACYPARAMETER -DESCRIPTOR.message_types_by_name['ArgMaxParameter'] = _ARGMAXPARAMETER -DESCRIPTOR.message_types_by_name['ConcatParameter'] = _CONCATPARAMETER -DESCRIPTOR.message_types_by_name['BatchNormParameter'] = _BATCHNORMPARAMETER -DESCRIPTOR.message_types_by_name['BiasParameter'] = _BIASPARAMETER -DESCRIPTOR.message_types_by_name['ContrastiveLossParameter'] = _CONTRASTIVELOSSPARAMETER -DESCRIPTOR.message_types_by_name['ConvolutionParameter'] = _CONVOLUTIONPARAMETER -DESCRIPTOR.message_types_by_name['CropParameter'] = _CROPPARAMETER -DESCRIPTOR.message_types_by_name['DataParameter'] = _DATAPARAMETER -DESCRIPTOR.message_types_by_name['DropoutParameter'] = _DROPOUTPARAMETER -DESCRIPTOR.message_types_by_name['DummyDataParameter'] = _DUMMYDATAPARAMETER -DESCRIPTOR.message_types_by_name['EltwiseParameter'] = _ELTWISEPARAMETER -DESCRIPTOR.message_types_by_name['ELUParameter'] = _ELUPARAMETER -DESCRIPTOR.message_types_by_name['EmbedParameter'] = _EMBEDPARAMETER -DESCRIPTOR.message_types_by_name['ExpParameter'] = _EXPPARAMETER -DESCRIPTOR.message_types_by_name['FlattenParameter'] = _FLATTENPARAMETER -DESCRIPTOR.message_types_by_name['HDF5DataParameter'] = _HDF5DATAPARAMETER -DESCRIPTOR.message_types_by_name['HDF5OutputParameter'] = _HDF5OUTPUTPARAMETER -DESCRIPTOR.message_types_by_name['HingeLossParameter'] = _HINGELOSSPARAMETER -DESCRIPTOR.message_types_by_name['ImageDataParameter'] = _IMAGEDATAPARAMETER -DESCRIPTOR.message_types_by_name['InfogainLossParameter'] = _INFOGAINLOSSPARAMETER -DESCRIPTOR.message_types_by_name['InnerProductParameter'] = _INNERPRODUCTPARAMETER -DESCRIPTOR.message_types_by_name['InputParameter'] = _INPUTPARAMETER -DESCRIPTOR.message_types_by_name['LogParameter'] = _LOGPARAMETER -DESCRIPTOR.message_types_by_name['LRNParameter'] = _LRNPARAMETER -DESCRIPTOR.message_types_by_name['MemoryDataParameter'] = _MEMORYDATAPARAMETER -DESCRIPTOR.message_types_by_name['MVNParameter'] = _MVNPARAMETER -DESCRIPTOR.message_types_by_name['ParameterParameter'] = _PARAMETERPARAMETER -DESCRIPTOR.message_types_by_name['PoolingParameter'] = _POOLINGPARAMETER -DESCRIPTOR.message_types_by_name['PowerParameter'] = _POWERPARAMETER -DESCRIPTOR.message_types_by_name['PythonParameter'] = _PYTHONPARAMETER -DESCRIPTOR.message_types_by_name['RecurrentParameter'] = _RECURRENTPARAMETER -DESCRIPTOR.message_types_by_name['ReductionParameter'] = _REDUCTIONPARAMETER -DESCRIPTOR.message_types_by_name['ReLUParameter'] = _RELUPARAMETER -DESCRIPTOR.message_types_by_name['ReshapeParameter'] = _RESHAPEPARAMETER -DESCRIPTOR.message_types_by_name['ScaleParameter'] = _SCALEPARAMETER -DESCRIPTOR.message_types_by_name['SigmoidParameter'] = _SIGMOIDPARAMETER -DESCRIPTOR.message_types_by_name['SliceParameter'] = _SLICEPARAMETER -DESCRIPTOR.message_types_by_name['SoftmaxParameter'] = _SOFTMAXPARAMETER -DESCRIPTOR.message_types_by_name['TanHParameter'] = _TANHPARAMETER -DESCRIPTOR.message_types_by_name['TileParameter'] = _TILEPARAMETER -DESCRIPTOR.message_types_by_name['ThresholdParameter'] = _THRESHOLDPARAMETER -DESCRIPTOR.message_types_by_name['WindowDataParameter'] = _WINDOWDATAPARAMETER -DESCRIPTOR.message_types_by_name['SPPParameter'] = _SPPPARAMETER -DESCRIPTOR.message_types_by_name['V1LayerParameter'] = _V1LAYERPARAMETER -DESCRIPTOR.message_types_by_name['V0LayerParameter'] = _V0LAYERPARAMETER -DESCRIPTOR.message_types_by_name['PReLUParameter'] = _PRELUPARAMETER -DESCRIPTOR.enum_types_by_name['Phase'] = _PHASE - -BlobShape = _reflection.GeneratedProtocolMessageType('BlobShape', (_message.Message,), dict( - DESCRIPTOR=_BLOBSHAPE, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.BlobShape) -)) -_sym_db.RegisterMessage(BlobShape) - -BlobProto = _reflection.GeneratedProtocolMessageType('BlobProto', (_message.Message,), dict( - DESCRIPTOR=_BLOBPROTO, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.BlobProto) -)) -_sym_db.RegisterMessage(BlobProto) - -BlobProtoVector = _reflection.GeneratedProtocolMessageType('BlobProtoVector', (_message.Message,), dict( - DESCRIPTOR=_BLOBPROTOVECTOR, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.BlobProtoVector) -)) -_sym_db.RegisterMessage(BlobProtoVector) - -Datum = _reflection.GeneratedProtocolMessageType('Datum', (_message.Message,), dict( - DESCRIPTOR=_DATUM, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.Datum) -)) -_sym_db.RegisterMessage(Datum) - -FillerParameter = _reflection.GeneratedProtocolMessageType('FillerParameter', (_message.Message,), dict( - DESCRIPTOR=_FILLERPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.FillerParameter) -)) -_sym_db.RegisterMessage(FillerParameter) - -NetParameter = _reflection.GeneratedProtocolMessageType('NetParameter', (_message.Message,), dict( - DESCRIPTOR=_NETPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.NetParameter) -)) -_sym_db.RegisterMessage(NetParameter) - -SolverParameter = _reflection.GeneratedProtocolMessageType('SolverParameter', (_message.Message,), dict( - DESCRIPTOR=_SOLVERPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.SolverParameter) -)) -_sym_db.RegisterMessage(SolverParameter) - -SolverState = _reflection.GeneratedProtocolMessageType('SolverState', (_message.Message,), dict( - DESCRIPTOR=_SOLVERSTATE, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.SolverState) -)) -_sym_db.RegisterMessage(SolverState) - -NetState = _reflection.GeneratedProtocolMessageType('NetState', (_message.Message,), dict( - DESCRIPTOR=_NETSTATE, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.NetState) -)) -_sym_db.RegisterMessage(NetState) - -NetStateRule = _reflection.GeneratedProtocolMessageType('NetStateRule', (_message.Message,), dict( - DESCRIPTOR=_NETSTATERULE, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.NetStateRule) -)) -_sym_db.RegisterMessage(NetStateRule) - -ParamSpec = _reflection.GeneratedProtocolMessageType('ParamSpec', (_message.Message,), dict( - DESCRIPTOR=_PARAMSPEC, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ParamSpec) -)) -_sym_db.RegisterMessage(ParamSpec) - -LayerParameter = _reflection.GeneratedProtocolMessageType('LayerParameter', (_message.Message,), dict( - DESCRIPTOR=_LAYERPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.LayerParameter) -)) -_sym_db.RegisterMessage(LayerParameter) - -TransformationParameter = _reflection.GeneratedProtocolMessageType('TransformationParameter', (_message.Message,), dict( - DESCRIPTOR=_TRANSFORMATIONPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.TransformationParameter) -)) -_sym_db.RegisterMessage(TransformationParameter) - -LossParameter = _reflection.GeneratedProtocolMessageType('LossParameter', (_message.Message,), dict( - DESCRIPTOR=_LOSSPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.LossParameter) -)) -_sym_db.RegisterMessage(LossParameter) - -AccuracyParameter = _reflection.GeneratedProtocolMessageType('AccuracyParameter', (_message.Message,), dict( - DESCRIPTOR=_ACCURACYPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.AccuracyParameter) -)) -_sym_db.RegisterMessage(AccuracyParameter) - -ArgMaxParameter = _reflection.GeneratedProtocolMessageType('ArgMaxParameter', (_message.Message,), dict( - DESCRIPTOR=_ARGMAXPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ArgMaxParameter) -)) -_sym_db.RegisterMessage(ArgMaxParameter) - -ConcatParameter = _reflection.GeneratedProtocolMessageType('ConcatParameter', (_message.Message,), dict( - DESCRIPTOR=_CONCATPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ConcatParameter) -)) -_sym_db.RegisterMessage(ConcatParameter) - -BatchNormParameter = _reflection.GeneratedProtocolMessageType('BatchNormParameter', (_message.Message,), dict( - DESCRIPTOR=_BATCHNORMPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.BatchNormParameter) -)) -_sym_db.RegisterMessage(BatchNormParameter) - -BiasParameter = _reflection.GeneratedProtocolMessageType('BiasParameter', (_message.Message,), dict( - DESCRIPTOR=_BIASPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.BiasParameter) -)) -_sym_db.RegisterMessage(BiasParameter) - -ContrastiveLossParameter = _reflection.GeneratedProtocolMessageType('ContrastiveLossParameter', (_message.Message,), dict( - DESCRIPTOR=_CONTRASTIVELOSSPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ContrastiveLossParameter) -)) -_sym_db.RegisterMessage(ContrastiveLossParameter) - -ConvolutionParameter = _reflection.GeneratedProtocolMessageType('ConvolutionParameter', (_message.Message,), dict( - DESCRIPTOR=_CONVOLUTIONPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ConvolutionParameter) -)) -_sym_db.RegisterMessage(ConvolutionParameter) - -CropParameter = _reflection.GeneratedProtocolMessageType('CropParameter', (_message.Message,), dict( - DESCRIPTOR=_CROPPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.CropParameter) -)) -_sym_db.RegisterMessage(CropParameter) - -DataParameter = _reflection.GeneratedProtocolMessageType('DataParameter', (_message.Message,), dict( - DESCRIPTOR=_DATAPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.DataParameter) -)) -_sym_db.RegisterMessage(DataParameter) - -DropoutParameter = _reflection.GeneratedProtocolMessageType('DropoutParameter', (_message.Message,), dict( - DESCRIPTOR=_DROPOUTPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.DropoutParameter) -)) -_sym_db.RegisterMessage(DropoutParameter) - -DummyDataParameter = _reflection.GeneratedProtocolMessageType('DummyDataParameter', (_message.Message,), dict( - DESCRIPTOR=_DUMMYDATAPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.DummyDataParameter) -)) -_sym_db.RegisterMessage(DummyDataParameter) - -EltwiseParameter = _reflection.GeneratedProtocolMessageType('EltwiseParameter', (_message.Message,), dict( - DESCRIPTOR=_ELTWISEPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.EltwiseParameter) -)) -_sym_db.RegisterMessage(EltwiseParameter) - -ELUParameter = _reflection.GeneratedProtocolMessageType('ELUParameter', (_message.Message,), dict( - DESCRIPTOR=_ELUPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ELUParameter) -)) -_sym_db.RegisterMessage(ELUParameter) - -EmbedParameter = _reflection.GeneratedProtocolMessageType('EmbedParameter', (_message.Message,), dict( - DESCRIPTOR=_EMBEDPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.EmbedParameter) -)) -_sym_db.RegisterMessage(EmbedParameter) - -ExpParameter = _reflection.GeneratedProtocolMessageType('ExpParameter', (_message.Message,), dict( - DESCRIPTOR=_EXPPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ExpParameter) -)) -_sym_db.RegisterMessage(ExpParameter) - -FlattenParameter = _reflection.GeneratedProtocolMessageType('FlattenParameter', (_message.Message,), dict( - DESCRIPTOR=_FLATTENPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.FlattenParameter) -)) -_sym_db.RegisterMessage(FlattenParameter) - -HDF5DataParameter = _reflection.GeneratedProtocolMessageType('HDF5DataParameter', (_message.Message,), dict( - DESCRIPTOR=_HDF5DATAPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.HDF5DataParameter) -)) -_sym_db.RegisterMessage(HDF5DataParameter) - -HDF5OutputParameter = _reflection.GeneratedProtocolMessageType('HDF5OutputParameter', (_message.Message,), dict( - DESCRIPTOR=_HDF5OUTPUTPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.HDF5OutputParameter) -)) -_sym_db.RegisterMessage(HDF5OutputParameter) - -HingeLossParameter = _reflection.GeneratedProtocolMessageType('HingeLossParameter', (_message.Message,), dict( - DESCRIPTOR=_HINGELOSSPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.HingeLossParameter) -)) -_sym_db.RegisterMessage(HingeLossParameter) - -ImageDataParameter = _reflection.GeneratedProtocolMessageType('ImageDataParameter', (_message.Message,), dict( - DESCRIPTOR=_IMAGEDATAPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ImageDataParameter) -)) -_sym_db.RegisterMessage(ImageDataParameter) - -InfogainLossParameter = _reflection.GeneratedProtocolMessageType('InfogainLossParameter', (_message.Message,), dict( - DESCRIPTOR=_INFOGAINLOSSPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.InfogainLossParameter) -)) -_sym_db.RegisterMessage(InfogainLossParameter) - -InnerProductParameter = _reflection.GeneratedProtocolMessageType('InnerProductParameter', (_message.Message,), dict( - DESCRIPTOR=_INNERPRODUCTPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.InnerProductParameter) -)) -_sym_db.RegisterMessage(InnerProductParameter) - -InputParameter = _reflection.GeneratedProtocolMessageType('InputParameter', (_message.Message,), dict( - DESCRIPTOR=_INPUTPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.InputParameter) -)) -_sym_db.RegisterMessage(InputParameter) - -LogParameter = _reflection.GeneratedProtocolMessageType('LogParameter', (_message.Message,), dict( - DESCRIPTOR=_LOGPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.LogParameter) -)) -_sym_db.RegisterMessage(LogParameter) - -LRNParameter = _reflection.GeneratedProtocolMessageType('LRNParameter', (_message.Message,), dict( - DESCRIPTOR=_LRNPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.LRNParameter) -)) -_sym_db.RegisterMessage(LRNParameter) - -MemoryDataParameter = _reflection.GeneratedProtocolMessageType('MemoryDataParameter', (_message.Message,), dict( - DESCRIPTOR=_MEMORYDATAPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.MemoryDataParameter) -)) -_sym_db.RegisterMessage(MemoryDataParameter) - -MVNParameter = _reflection.GeneratedProtocolMessageType('MVNParameter', (_message.Message,), dict( - DESCRIPTOR=_MVNPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.MVNParameter) -)) -_sym_db.RegisterMessage(MVNParameter) - -ParameterParameter = _reflection.GeneratedProtocolMessageType('ParameterParameter', (_message.Message,), dict( - DESCRIPTOR=_PARAMETERPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ParameterParameter) -)) -_sym_db.RegisterMessage(ParameterParameter) - -PoolingParameter = _reflection.GeneratedProtocolMessageType('PoolingParameter', (_message.Message,), dict( - DESCRIPTOR=_POOLINGPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.PoolingParameter) -)) -_sym_db.RegisterMessage(PoolingParameter) - -PowerParameter = _reflection.GeneratedProtocolMessageType('PowerParameter', (_message.Message,), dict( - DESCRIPTOR=_POWERPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.PowerParameter) -)) -_sym_db.RegisterMessage(PowerParameter) - -PythonParameter = _reflection.GeneratedProtocolMessageType('PythonParameter', (_message.Message,), dict( - DESCRIPTOR=_PYTHONPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.PythonParameter) -)) -_sym_db.RegisterMessage(PythonParameter) - -RecurrentParameter = _reflection.GeneratedProtocolMessageType('RecurrentParameter', (_message.Message,), dict( - DESCRIPTOR=_RECURRENTPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.RecurrentParameter) -)) -_sym_db.RegisterMessage(RecurrentParameter) - -ReductionParameter = _reflection.GeneratedProtocolMessageType('ReductionParameter', (_message.Message,), dict( - DESCRIPTOR=_REDUCTIONPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ReductionParameter) -)) -_sym_db.RegisterMessage(ReductionParameter) - -ReLUParameter = _reflection.GeneratedProtocolMessageType('ReLUParameter', (_message.Message,), dict( - DESCRIPTOR=_RELUPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ReLUParameter) -)) -_sym_db.RegisterMessage(ReLUParameter) - -ReshapeParameter = _reflection.GeneratedProtocolMessageType('ReshapeParameter', (_message.Message,), dict( - DESCRIPTOR=_RESHAPEPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ReshapeParameter) -)) -_sym_db.RegisterMessage(ReshapeParameter) - -ScaleParameter = _reflection.GeneratedProtocolMessageType('ScaleParameter', (_message.Message,), dict( - DESCRIPTOR=_SCALEPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ScaleParameter) -)) -_sym_db.RegisterMessage(ScaleParameter) - -SigmoidParameter = _reflection.GeneratedProtocolMessageType('SigmoidParameter', (_message.Message,), dict( - DESCRIPTOR=_SIGMOIDPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.SigmoidParameter) -)) -_sym_db.RegisterMessage(SigmoidParameter) - -SliceParameter = _reflection.GeneratedProtocolMessageType('SliceParameter', (_message.Message,), dict( - DESCRIPTOR=_SLICEPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.SliceParameter) -)) -_sym_db.RegisterMessage(SliceParameter) - -SoftmaxParameter = _reflection.GeneratedProtocolMessageType('SoftmaxParameter', (_message.Message,), dict( - DESCRIPTOR=_SOFTMAXPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.SoftmaxParameter) -)) -_sym_db.RegisterMessage(SoftmaxParameter) - -TanHParameter = _reflection.GeneratedProtocolMessageType('TanHParameter', (_message.Message,), dict( - DESCRIPTOR=_TANHPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.TanHParameter) -)) -_sym_db.RegisterMessage(TanHParameter) - -TileParameter = _reflection.GeneratedProtocolMessageType('TileParameter', (_message.Message,), dict( - DESCRIPTOR=_TILEPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.TileParameter) -)) -_sym_db.RegisterMessage(TileParameter) - -ThresholdParameter = _reflection.GeneratedProtocolMessageType('ThresholdParameter', (_message.Message,), dict( - DESCRIPTOR=_THRESHOLDPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.ThresholdParameter) -)) -_sym_db.RegisterMessage(ThresholdParameter) - -WindowDataParameter = _reflection.GeneratedProtocolMessageType('WindowDataParameter', (_message.Message,), dict( - DESCRIPTOR=_WINDOWDATAPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.WindowDataParameter) -)) -_sym_db.RegisterMessage(WindowDataParameter) - -SPPParameter = _reflection.GeneratedProtocolMessageType('SPPParameter', (_message.Message,), dict( - DESCRIPTOR=_SPPPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.SPPParameter) -)) -_sym_db.RegisterMessage(SPPParameter) - -V1LayerParameter = _reflection.GeneratedProtocolMessageType('V1LayerParameter', (_message.Message,), dict( - DESCRIPTOR=_V1LAYERPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.V1LayerParameter) -)) -_sym_db.RegisterMessage(V1LayerParameter) - -V0LayerParameter = _reflection.GeneratedProtocolMessageType('V0LayerParameter', (_message.Message,), dict( - DESCRIPTOR=_V0LAYERPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.V0LayerParameter) -)) -_sym_db.RegisterMessage(V0LayerParameter) - -PReLUParameter = _reflection.GeneratedProtocolMessageType('PReLUParameter', (_message.Message,), dict( - DESCRIPTOR=_PRELUPARAMETER, - __module__='caffe_pb2' - # @@protoc_insertion_point(class_scope:caffe.PReLUParameter) -)) -_sym_db.RegisterMessage(PReLUParameter) - - -_BLOBSHAPE.fields_by_name['dim'].has_options = True -_BLOBSHAPE.fields_by_name['dim']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_BLOBPROTO.fields_by_name['data'].has_options = True -_BLOBPROTO.fields_by_name['data']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_BLOBPROTO.fields_by_name['diff'].has_options = True -_BLOBPROTO.fields_by_name['diff']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_BLOBPROTO.fields_by_name['double_data'].has_options = True -_BLOBPROTO.fields_by_name['double_data']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_BLOBPROTO.fields_by_name['double_diff'].has_options = True -_BLOBPROTO.fields_by_name['double_diff']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -# @@protoc_insertion_point(module_scope) -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import six -from six import string_types as _string_types -from mmdnn.conversion.caffe.errors import ConversionError -from mmdnn.conversion.common.IR.graph_pb2 import GraphDef, NodeDef, TensorShape -from mmdnn.conversion.caffe.utils import get_real_name - - -def assign_attr_value(attr, val): - '''Assign value to AttrValue proto according to data type.''' - if isinstance(val, bool): - attr.b = val - elif isinstance(val, six.integer_types): - attr.i = val - elif isinstance(val, float): - attr.f = val - elif isinstance(val, str): - attr.s = val.encode('utf-8') - elif isinstance(val, TensorShape): - attr.shape.MergeFromString(val.SerializeToString()) - elif isinstance(val, list): - if len(val) == 0: - return - - if isinstance(val[0], six.integer_types): - attr.list.i.extend(val) - elif isinstance(val[0], TensorShape): - attr.list.shape.extend(val) - else: - raise NotImplementedError( - 'AttrValue cannot be of %s %s' % (type(val), type(val[0]))) - else: - raise NotImplementedError('AttrValue cannot be of %s' % type(val)) - - -def fetch_attr_value(attr): - '''Fetch valid value from AttrValue proto.''' - field = attr.WhichOneof('value') - val = getattr(attr, field) if field else None - return val.decode('utf-8') if isinstance(val, bytes) else val - - -class Node(object): - '''An intermediate representation for DL operations.''' - - def __init__(self, node_pb2): - assert isinstance(node_pb2, NodeDef) - self.node_pb2 = node_pb2 - self.output = [] - - @staticmethod - def create(op, **kwargs): - node_pb2 = NodeDef() - node_pb2.op = op - for k, v in kwargs.items(): - assign_attr_value(node_pb2.attr[k], v) - return Node(node_pb2) - - @property - def op(self): - return self.node_pb2.op - - @property - def name(self): - return self.node_pb2.name - - @name.setter - def name(self, value): - assert isinstance(value, _string_types) - self.node_pb2.name = value - - @property - def input(self): - return self.node_pb2.input - - @property - def attr(self): - return self.node_pb2.attr.items() - - -class Graph(object): - '''An intermediate representation for DL graph.''' - - def __init__(self, name, node_list, version=0): - if node_list and len(node_list): - assert isinstance(node_list[0], Node) - self.node_dict = {node.name: node for node in node_list} - else: - self.node_dict = {} - self.name = name - self.version = version - - def topologically_sorted(self): - visited = set() - sorted_nodes = [] - - def topo_sort_dfs(node, visited, sorted_nodes): - if node in visited: - return - visited.add(node) - for n in self.get_input(node): - topo_sort_dfs(n, visited, sorted_nodes) - sorted_nodes.append(node) - for node in self.node_dict.values(): - topo_sort_dfs(node, visited, sorted_nodes) - return sorted_nodes - - def get_node(self, name): - return self.node_dict[name] - - def add_node(self, node): - assert node.name not in self.node_dict - self.node_dict[node.name] = node - - def remove_node(self, name): - return self.node_dict.pop(name) - - def get_input(self, node): - input_nodes = [] - for name in node.input: - name = get_real_name(name) - if name in self.node_dict: - input_nodes.append(self.get_node(name)) - return input_nodes - - def as_graph_def(self): - graph_pb2 = GraphDef() - graph_pb2.version = self.version - graph_pb2.node.extend( - [node.node_pb2 for node in self.node_dict.values()]) - return graph_pb2 -import sys - - -class ConversionError(Exception): - ''' - an abtract class - ''' - pass - - -def print_stderr(msg): - ''' - a function to print information to the std - ''' - sys.stderr.write('%s\n' % msg) -from collections import namedtuple -from functools import reduce -from google.protobuf import text_format -from copy import deepcopy -import numbers -import os -import tempfile - -from mmdnn.conversion.caffe.mapper import get_handler_name -from mmdnn.conversion.caffe.resolver import get_caffe_resolver, has_pycaffe -from mmdnn.conversion.caffe.shape import * -from mmdnn.conversion.caffe.errors import print_stderr, ConversionError - - -layer_num_to_name = { - 0: 'None', - 1: 'Accuracy', - 2: 'BNLL', - 3: 'Concat', - 4: 'Convolution', - 5: 'Data', - 6: 'Dropout', - 7: 'EuclideanLoss', - 8: 'Flatten', - 9: 'HDF5Data', - 10: 'HDF5Output', - 11: 'Im2col', - 12: 'ImageData', - 13: 'InfogainLoss', - 14: 'InnerProduct', - 15: 'LRN', - 16: 'MultinomialLogisticLoss', - 17: 'Pooling', - 18: 'ReLU', - 19: 'Sigmoid', - 20: 'Softmax', - 21: 'SoftmaxWithLoss', - 22: 'Split', - 23: 'TanH', - 24: 'WindowData', - 25: 'Eltwise', - 26: 'Power', - 27: 'SigmoidCrossEntropyLoss', - 28: 'HingeLoss', - 29: 'MemoryData', - 30: 'ArgMax', - 31: 'Threshold', - 32: 'DummyData', - 33: 'Slice', - 34: 'MVN', - 35: 'AbsVal', - 36: 'Silence', - 37: 'ContrastiveLoss', - 38: 'Exp', - 39: 'Deconvolution', - 40: 'PReLU', - 41: 'ELU', -} - -LAYER_DESCRIPTORS = { - # Caffe Types - 'AbsVal': shape_identity, - 'Accuracy': shape_scalar, - 'ArgMax': shape_not_implemented, - 'BatchNorm': shape_identity, - 'BNLL': shape_not_implemented, - 'Concat': shape_concat, - 'ContrastiveLoss': shape_scalar, - 'Convolution': shape_convolution, - 'Crop': shape_not_implemented, - 'Deconvolution': shape_deconvolution, - 'Data': shape_data, - 'Dropout': shape_identity, - 'DummyData': shape_data, - 'EuclideanLoss': shape_scalar, - 'Eltwise': shape_identity, - 'Exp': shape_identity, - 'Flatten': shape_flatten, - 'HDF5Data': shape_data, - 'HDF5Output': shape_identity, - 'HingeLoss': shape_scalar, - 'Im2col': shape_not_implemented, - 'ImageData': shape_data, - 'InfogainLoss': shape_scalar, - 'InnerProduct': shape_inner_product, - 'Input': shape_data, - 'LRN': shape_identity, - 'MemoryData': shape_mem_data, - 'MultinomialLogisticLoss': shape_scalar, - 'MVN': shape_not_implemented, - 'Pooling': shape_pool, - 'Power': shape_identity, - 'ReLU': shape_identity, - 'Scale': shape_identity, - 'Sigmoid': shape_identity, - 'SigmoidCrossEntropyLoss': shape_scalar, - 'Silence': shape_identity, - 'Softmax': shape_identity, - 'SoftmaxWithLoss': shape_scalar, - 'Split': shape_split, - 'Slice': shape_not_implemented, - 'TanH': shape_identity, - 'WindowData': shape_not_implemented, - 'Threshold': shape_identity, - 'Reshape': shape_reshape, - 'ResizeBilinear': shape_reshape, - 'PReLU': shape_identity, - 'ELU': shape_identity, -} - -LAYER_TYPES = LAYER_DESCRIPTORS.keys() - -LayerType = type('LayerType', (), {t: t for t in LAYER_TYPES}) - -KernelParameters = namedtuple('KernelParameters', [ - 'global_pooling', 'k_h', 'k_w', 's_h', 's_w', 'p_h', 'p_w']) - - -class NodeKind(LayerType): - - @staticmethod - def map_raw_kind(node_kind): - if isinstance(node_kind, int): - node_kind = layer_num_to_name[node_kind] - else: - node_kind = str(node_kind) - if node_kind in LAYER_TYPES: - return node_kind - return None - - @staticmethod - def compute_output_shape(node): - try: - return LAYER_DESCRIPTORS[node.kind](node) - except NotImplementedError: - raise ConversionError( - 'Output shape computation not implemented for type: %s' % node.kind) - - -LAYER_IN_TRAIN_PROTO = [NodeKind.ImageData, NodeKind.Data, NodeKind.HDF5Data, - NodeKind.HDF5Output, NodeKind.WindowData, NodeKind.DummyData, NodeKind.MemoryData] - - -class CaffeNode(object): - def __init__(self, name, kind, layer=None): - self.name = name - self.kind = kind - self.layer = layer - self.parents = [] - self.children = [] - self.data = None - self.output = [] - self.output_shape = None - self.metadata = {} - - def add_parent(self, parent_node, from_output, index=None): - assert parent_node not in self.parents - index = len(self.parents) if index is None else index - self.parents.insert(index, (parent_node, from_output)) - if self not in parent_node.children: - parent_node.children.append(self) - - def get_only_parent(self): - if len(self.parents) != 1: - raise ConversionError( - 'Node (%s) expected to have 1 parent. Found %s.' % (self, len(self.parents))) - return self.parents[0] - - @property - def parameters(self): - if self.layer is not None: - params = get_handler_name(self.kind) - if params == 'deconvolution': - params = 'convolution' - params = '_'.join((params, 'param')) - try: - return getattr(self.layer, params) - except AttributeError: - raise ConversionError( - 'Caffe parameters not found for layer kind: %s' % (self.kind)) - return None - - @staticmethod - def get_kernel_value(scalar, repeated, idx, default=None): - if scalar: - return scalar - if repeated: - if isinstance(repeated, numbers.Number): - return repeated - if len(repeated) == 1: - # Same value applies to all spatial dimensions - return int(repeated[0]) - assert idx < len(repeated) - # Extract the value for the given spatial dimension - return repeated[idx] - if default is None: - raise ValueError('Unable to determine kernel parameter!') - return default - - @property - def kernel_parameters(self): - assert self.kind in (NodeKind.Convolution, - NodeKind.Pooling, NodeKind.Deconvolution) - params = self.parameters - global_pooling = hasattr( - params, 'global_pooling') and params.global_pooling - if not global_pooling: - k_h = self.get_kernel_value(params.kernel_h, params.kernel_size, 0) - k_w = self.get_kernel_value(params.kernel_w, params.kernel_size, 1) - s_h = self.get_kernel_value( - params.stride_h, params.stride, 0, default=1) - s_w = self.get_kernel_value( - params.stride_w, params.stride, 1, default=1) - else: - k_h = k_w = 0 - s_h = s_w = 1 - p_h = self.get_kernel_value(params.pad_h, params.pad, 0, default=0) - p_w = self.get_kernel_value(params.pad_w, params.pad, 1, default=0) - return KernelParameters(global_pooling, k_h, k_w, s_h, s_w, p_h, p_w) - - def __str__(self): - return '[%s] %s' % (self.kind, self.name) - - def __repr__(self): - return '%s (0x%x)' % (self.name, id(self)) - - -class CaffeGraph(object): - - def __init__(self, nodes=None, name=None): - self.nodes = nodes or [] - self.node_lut = {node.name: node for node in self.nodes} - self.name = name - self.prototxt = None - - def add_node(self, node): - self.nodes.append(node) - self.node_lut[node.name] = node - - def get_node(self, name): - try: - return self.node_lut[name] - except KeyError: - raise ConversionError('Layer not found: %s' % name) - - def get_input_nodes(self): - return [node for node in self.nodes if len(node.parents) == 0] - - def get_output_nodes(self): - return [node for node in self.nodes if len(node.children) == 0] - - def topologically_sorted(self): - visited = set() - sorted_nodes = [] - - def topo_sort_dfs(node, visited, sorted_nodes): - if node in visited: - return - visited.add(node) - for n, idx in node.parents: - topo_sort_dfs(n, visited, sorted_nodes) - sorted_nodes.append(node) - for node in self.nodes: - topo_sort_dfs(node, visited, sorted_nodes) - return sorted_nodes - - def compute_output_shapes(self, model): - sorted_nodes = self.topologically_sorted() - (tmp_handle, tmp_prototxt) = tempfile.mkstemp(suffix=".prototxt") - with open(tmp_prototxt, 'w') as f: - f.write(text_format.MessageToString(model)) - self.prototxt = tmp_prototxt - if has_pycaffe(): - caffe = get_caffe_resolver().caffe - net = caffe.Net(tmp_prototxt, caffe.TEST) - for key, value in net.blobs.items(): - try: - node = self.get_node(key) - dims = list(value.shape) - dims = dims + [1] * (4 - len(dims)) - node.output_shape = TensorShape(*dims) - except: - continue - for node in sorted_nodes: - if node.output_shape is None: - node.output_shape = TensorShape( - *NodeKind.compute_output_shape(node)) - os.close(tmp_handle) - else: - for node in sorted_nodes: - node.output_shape = TensorShape( - *NodeKind.compute_output_shape(node)) - - # consider rewrite this function to Network.py - def replaced(self, new_nodes): - return CaffeGraph(nodes=new_nodes, name=self.name) - - def transformed(self, transformers): - graph = self - for transformer in transformers: - graph = transformer(graph) - if graph is None: - raise ConversionError( - 'Transformer failed: {}'.format(transformer)) - assert isinstance(graph, CaffeGraph) - return graph - - def __contains__(self, key): - return key in self.node_lut - - def __str__(self): - def get_max_shape(data): - if isinstance(data, dict): - max = 0 - val = None - for k, v in data.items(): - tmp = reduce(lambda x, y: x*y, v.shape) - if tmp > max: - val = v.shape - max = tmp - return val - else: - return data[0].shape - hdr = '{:<20} {:<30} {:>20} {:>20}'.format( - 'Type', 'Name', 'Param', 'Output') - s = [hdr, '-' * 94] - for node in self.topologically_sorted(): - data_shape = get_max_shape(node.data) if node.data else '--' - out_shape = node.output_shape or '--' - s.append('{:<20} {:<30} {!s:>20} {!s:>20}'.format( - node.kind, node.name, data_shape, tuple(out_shape))) - return '\n'.join(s) - - -class GraphBuilder(object): - def __init__(self, model_path, input_shape=None, is_train_proto=False, phase='test'): - self.model_path = model_path - self.phase = phase - self.is_train_proto = is_train_proto - self.input_shape = input_shape - self.load() - - def load(self): - self.model = get_caffe_resolver().NetParameter() - with open(self.model_path, 'r') as f: - text_format.Merge(f.read(), self.model) - if self.is_train_proto: - self.process_train_proto() - - def process_train_proto(self): - layers = self.model.layer or self.model.layers - delete_layer = set() - split_op_map = dict() - loss_layers = [layer for layer in layers if NodeKind.map_raw_kind( - layer.type) in (NodeKind.SoftmaxWithLoss, NodeKind.SigmoidCrossEntropyLoss)] - a = [layers.remove(layer) for layer in layers[:] if layer in loss_layers[:-1] - or NodeKind.map_raw_kind(layer.type) in LAYER_IN_TRAIN_PROTO] - for layer in layers[:]: - if 'label' in layer.bottom: - if NodeKind.map_raw_kind(layer.type) in (NodeKind.SoftmaxWithLoss, NodeKind.SigmoidCrossEntropyLoss): - continue - elif NodeKind.map_raw_kind(layer.type) == NodeKind.Split: - for item in layer.top: - delete_layer.add(item) - layers.remove(layer) - elif NodeKind.map_raw_kind(layer.type) == NodeKind.Split: - for item in layer.top: - split_op_map[item] = layer.bottom[0] - layers.remove(layer) - - for layer in layers[:]: - for item in delete_layer: - if item in layer.bottom: - layers.remove(layer) - break - for key, value in split_op_map.items(): - if key in layer.bottom: - layer.bottom.remove(key) - layer.bottom.append(value) - self.model.input.append('data') - self.model.input_dim.extend(self.input_shape) - last_layer = layers[-1] - kind = NodeKind.map_raw_kind(last_layer.type) - if kind in (NodeKind.SoftmaxWithLoss, NodeKind.SigmoidCrossEntropyLoss): - pred = layers.add() - pred.name = 'prob' - pred.top.append('prob') - pred.bottom.append(last_layer.bottom[0]) - if kind == NodeKind.SoftmaxWithLoss: - # competiable with old version caffe proto - pred.type = NodeKind.Softmax if self.model.layer else 20 - elif kind == NodeKind.SigmoidCrossEntropyLoss: - pred.type = NodeKind.Sigmoid if self.model.layer else 19 - layers.remove(last_layer) - - def filter_layers(self, layers): - phase_map = {0: 'train', 1: 'test'} - filtered_layer_names = set() - filtered_layers = [] - for layer in layers: - phase = self.phase - if len(layer.include): - phase = phase_map[layer.include[0].phase] - if len(layer.exclude): - phase = phase_map[1 - layer.include[0].phase] - exclude = (phase != self.phase) - # Dropout layers appear in a fair number of Caffe - # test-time networks. These are just ignored. We'll - # filter them out here. - if (not exclude) and (phase == 'test'): - exclude = (layer.type == LayerType.Dropout) - if (not exclude): - exclude = (layer.type == LayerType.Silence) - if not exclude: - if layer.name in filtered_layer_names: - for i in range(1, len(filtered_layer_names)): - new_name = layer.name + '_%s' % i - if new_name not in filtered_layer_names: - layer.name = new_name - break - filtered_layer_names.add(layer.name) - filtered_layers.append(layer) - return filtered_layers - - def make_node(self, layer): - kind = NodeKind.map_raw_kind(layer.type) - if kind is None: - # TODO: raise error - pass - node = CaffeNode(layer.name, kind, layer=layer) - node.output.append(layer.name.replace('/', '_')) - node.output.extend(layer.top[1:]) - return node - - def make_input_node(self): - nodes = [CaffeNode(name, NodeKind.Data) for name in self.model.input] - if len(nodes): - input_dim = list(map(int, self.model.input_dim)) - if not input_dim: - if len(self.model.input_shape) > 0: - input_dim = list(map(int, self.model.input_shape[0].dim)) - else: - # TODO: raise error - pass - for node in nodes: - node.output_shape = tuple(input_dim) - node.output.append('data') - return nodes - - def build(self): - layers = self.model.layers or self.model.layer - layers = self.filter_layers(layers) - nodes = self.make_input_node() - nodes += [self.make_node(layer) for layer in layers] - graph = CaffeGraph(nodes=nodes, name=self.model.name) - node_outputs = {} - for idx, layer in enumerate(layers): - node = graph.get_node(layer.name) - for input_name in layer.bottom: - assert input_name != layer.name - parent_node = node_outputs.get(input_name) - if (parent_node is None) or (parent_node == node): - parent_node = graph.get_node(input_name) - if parent_node.layer: - for i, output in enumerate(parent_node.layer.top): - if input_name == output: - node.add_parent(parent_node, i) - else: - node.add_parent(parent_node, 0) - for output_name in layer.top: - if output_name == layer.name: - continue - node_outputs[output_name] = node - graph.compute_output_shapes(self.model) - return graph -from __future__ import absolute_import -from __future__ import division -import numpy as np - -from mmdnn.conversion.caffe.errors import ConversionError -from mmdnn.conversion.caffe.common_graph import Node -from mmdnn.conversion.caffe.network import DEFAULT_PADDING -from mmdnn.conversion.caffe.utils import get_lower_case -from mmdnn.conversion.common.IR.graph_pb2 import TensorShape - - -def get_handler_name(node_kind): - if node_kind is None: - return node_kind - else: - if len(node_kind) <= 4: - return node_kind.lower() - else: - return get_lower_case(node_kind) - - -class NodeMapper(object): - - @classmethod - def _convert_output_shape(cls, kwargs, node): - shape = TensorShape() - dim = shape.dim.add() - dim.size = -1 - - if len(node.output_shape) > 2: - for i in node.output_shape[2:]: - dim = shape.dim.add() - dim.size = i - dim = shape.dim.add() - dim.size = node.output_shape.channels - else: - dim = shape.dim.add() - dim.size = node.output_shape[1] - kwargs['_output_shapes'] = [shape] - - @classmethod - def get_kernel_params(cls, node, input_shape): - kwargs = {} - - if node.kernel_parameters.global_pooling: - kwargs['kernel_shape'] = [ - 1, input_shape.height, input_shape.width, 1] - kwargs['pads'] = [0] * 8 - - else: - from mmdnn.conversion.caffe.graph import NodeKind - if node.kind == NodeKind.Pooling: - kwargs['kernel_shape'] = [ - 1, node.kernel_parameters.k_h, node.kernel_parameters.k_w, 1] - elif node.kind in [NodeKind.Convolution, NodeKind.Deconvolution]: - pass - else: - raise ValueError - - dilation = node.parameters.dilation[0] if hasattr( - node.parameters, 'dilation') and node.parameters.dilation else 1 - o_h_caffe = node.output_shape.height - o_w_caffe = node.output_shape.width - ko_h = dilation * (int(node.kernel_parameters.k_h) - 1) + 1 - ko_w = dilation * (int(node.kernel_parameters.k_w) - 1) + 1 - - if node.kind == NodeKind.Deconvolution: - o_h_tf = int(node.kernel_parameters.s_h) * (input_shape.height - - 1) + ko_h - 2 * int(node.kernel_parameters.p_h) - o_w_tf = int(node.kernel_parameters.s_w) * (input_shape.width - - 1) + ko_w - 2 * int(node.kernel_parameters.p_w) - else: - o_h_tf = (input_shape.height + node.kernel_parameters.p_h * - 2 - ko_h + 1) // node.kernel_parameters.s_h - o_w_tf = (input_shape.width + node.kernel_parameters.p_w * - 2 - ko_w + 1) // node.kernel_parameters.s_w - - kwargs['pads'] = [0, node.kernel_parameters.p_h, node.kernel_parameters.p_w, 0] + \ - [0, node.kernel_parameters.p_h + o_h_caffe - o_h_tf, - node.kernel_parameters.p_w + o_w_caffe - o_w_tf, 0] - - kwargs['strides'] = [1, node.kernel_parameters.s_h, - node.kernel_parameters.s_w, 1] - cls._convert_output_shape(kwargs, node) - - return kwargs - - @classmethod - def map_data(cls, node): - # TODO: We need to identify whether this is 4D image data, otherwise we shouldn't change the dimension order - shape = TensorShape() - dim = shape.dim.add() - dim.size = -1 - for i in node.output_shape[2:]: - dim = shape.dim.add() - dim.size = i - dim = shape.dim.add() - dim.size = node.output_shape.channels - - kwargs = {'shape': shape} # Ignore the dimension of batch size - cls._convert_output_shape(kwargs, node) - return Node.create('DataInput', **kwargs) - - @classmethod - def map_input(cls, node): - return cls.map_data(node) - - @classmethod - def map_convolution(cls, node): - parent, _ = node.get_only_parent() - kwargs = cls.get_kernel_params(node, parent.output_shape) - kwargs['kernel_shape'] = [node.kernel_parameters.k_h, node.kernel_parameters.k_w, - parent.output_shape.channels, node.parameters.num_output] - kwargs['use_bias'] = node.parameters.bias_term - if node.parameters.dilation: - dilation = node.parameters.dilation[0] - if dilation != 1: - kwargs['dilations'] = [1, dilation, dilation, 1] - kwargs['group'] = node.parameters.group - return Node.create('Conv', **kwargs) - - @classmethod - def map_deconvolution(cls, node): - parent, _ = node.get_only_parent() - kwargs = cls.get_kernel_params(node, parent.output_shape) - - kwargs['kernel_shape'] = [node.kernel_parameters.k_h, node.kernel_parameters.k_w, - node.parameters.num_output, parent.output_shape.channels] - kwargs['use_bias'] = node.parameters.bias_term - if node.parameters.dilation: - dilation = node.parameters.dilation[0] - if dilation != 1: - kwargs['dilations'] = [1, dilation, dilation, 1] - kwargs['group'] = node.parameters.group - return Node.create('ConvTranspose', **kwargs) - - @classmethod - def map_crop(cls, node): - kwargs = {} - cls._convert_output_shape(kwargs, node) - offset = node.parameters.offset - if offset: - if len(offset) == 1: - kwargs['border'] = [offset[0], offset[0], 0, 0] - else: - kwargs['border'] = [offset[0], offset[1], 0, 0] - - return Node.create('Crop', **kwargs) - - @classmethod - def map_relu(cls, node): - kwargs = {} - cls._convert_output_shape(kwargs, node) - return Node.create('Relu', **kwargs) - - @classmethod - def map_p_re_lu(cls, node): - # print(node.parameters) - # assert False - try: - scale_value = float(node.parameters.filler.value) - kwargs = {'gamma': scale_value} - except ConversionError: - kwargs = {'gamma': 0.25} - cls._convert_output_shape(kwargs, node) - return Node.create('PRelu', **kwargs) - - @classmethod - def map_pooling(cls, node): - parent, _ = node.get_only_parent() - kwargs = cls.get_kernel_params(node, parent.output_shape) - if node.parameters.pool == 0: - kwargs['pooling_type'] = 'MAX' - elif node.parameters.pool == 1: - kwargs['pooling_type'] = 'AVG' - else: - # Stochastic pooling, for instance. - raise ConversionError('Unsupported pooling type.') - cls._convert_output_shape(kwargs, node) - return Node.create('Pool', **kwargs) - - @classmethod - def _add_flatten_layer(cls, node): - shape = TensorShape() - dim = shape.dim.add() - dim.size = -1 - - dim = shape.dim.add() - dim.size = 1 - for i in node.output_shape[1:]: - dim.size *= i - kwargs = {'_output_shapes': [shape]} - return Node.create('Flatten', **kwargs) - - @classmethod - def map_inner_product(cls, node): - #TODO: Axis - assert node.parameters.axis == 1 - #TODO: Unbiased - shape = TensorShape() - dim = shape.dim.add() - dim.size = -1 - dim = shape.dim.add() - dim.size = 1 - for i in node.output_shape[1:]: - dim.size *= i - kwargs = {'use_bias': node.parameters.bias_term, 'units': node.parameters.num_output, - '_output_shapes': [shape]} - - # check if need the Flatten layer - parent, _ = node.get_only_parent() - ret = [] - - # if parent.output_shape.height > 1 or parent.output_shape.width > 1: - ret.append(cls._add_flatten_layer(parent)) - ret.append(Node.create('FullyConnected', **kwargs)) - return ret - - @classmethod - def map_softmax(cls, node): - kwargs = {} - cls._convert_output_shape(kwargs, node) - return Node.create('Softmax', **kwargs) - - @classmethod - def map_lrn(cls, node): - params = node.parameters - assert params.local_size % 2 == 1 - kwargs = {'size': int((params.local_size + 1) / 2), - 'alpha': params.alpha, 'beta': params.beta, 'k': params.k} - cls._convert_output_shape(kwargs, node) - return Node.create('LRN', **kwargs) - - @classmethod - def map_concat(cls, node): - kwargs = {'axis': (2, 3, 1, 0)[node.parameters.axis]} - cls._convert_output_shape(kwargs, node) - return Node.create('Concat', **kwargs) - - @classmethod - def map_dropout(cls, node): - kwargs = {'keep_prob': node.parameters.dropout_ratio} - cls._convert_output_shape(kwargs, node) - return Node.create('Dropout', **kwargs) - - @classmethod - def map_batch_norm(cls, node): - kwargs = {'scale': len(node.data) >= 3, 'bias': len(node.data) == 4} - epsilon = node.parameters.eps - kwargs['epsilon'] = epsilon - cls._convert_output_shape(kwargs, node) - return Node.create('BatchNorm', **kwargs) - - @classmethod - def map_scale(cls, node): - raise NotImplementedError - # TODO: The gamma parameter has to be set (in node.data?) and this should work. - # Also, mean should be set to 0, and var to 1, just to be safe. - scale_value = float(node.parameters.filler.value) - kwargs = {'scale': True, 'bias': False, - 'gamma': scale_value, 'epsilon': 0} - return Node.create('BatchNorm', **kwargs) - - @classmethod - def map_eltwise(cls, node): - operations = {0: 'Mul', 1: 'Add', 2: 'Max'} - op_code = node.parameters.operation - try: - kwargs = {} - cls._convert_output_shape(kwargs, node) - return Node.create(operations[op_code], **kwargs) - except KeyError: - raise ConversionError( - 'Unknown elementwise operation: {}'.format(op_code)) - - @classmethod - def map_abs_val(cls, node): - return Node.create('Abs') - - @classmethod - def map_tanh(cls, node): - return Node.create('Tanh') - - @classmethod - def map_sigmoid(cls, node): - return Node.create('Sigmoid') - - @classmethod - def map_reshape(cls, node): - kwargs = {'shape': [dim for dim in node.output_shape]} - cls._convert_output_shape(kwargs, node) - return Node.create('Reshape', **kwargs) - - @classmethod - def map_flatten(cls, node): - return cls._add_flatten_layer(node) - - @classmethod - def map_split(cls, node): - # skip the split node - return - - @classmethod - def map_elu(cls, node): - kwargs = {} - cls._convert_output_shape(kwargs, node) - return Node.create('ELU', **kwargs) -import numpy as np - -DEFAULT_PADDING = 'SAME' - - -def layer(op): - '''Decorator for composable network layers.''' - - def layer_decorated(self, *args, **kwargs): - # Automatically set a name if not provided. - name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) - # Figure out the layer inputs. - assert len(args) >= 1 - if len(args) == 1: - layer_inputs = args[0] - else: - layer_inputs = list(args) - layer_output = op(self, layer_inputs, **kwargs) - # print('op: %s shape: %s' % (op, layer_output._keras_shape)) - # print('op: %s shape: %s' % (op, layer_output.get_shape().as_list())) - # Add to layer LUT. - self.layers[name] = layer_output - self.output = layer_output - return layer_output - - return layer_decorated - - -class Network(object): - - def __init__(self, trainable=False): - self.output = None - self.layers = {} - self.trainable = trainable - self.setup() - - def setup(self): - raise NotImplementedError('Must be implemented by the subclass') - - def load(self, data_path, session, ignore_missing=False): - raise NotImplementedError('Must be implemented by the subclass') - - def input(self, shape, name): - raise NotImplementedError('Must be implemented by the subclass') - - def get_output(self): - raise NotImplementedError('Must be implemented by the subclass') - - def get_unique_name(self, prefix): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def conv(self, input, k_h, k_w, c_o, s_h, s_w, p_h, p_w, name, group=1, biased=True): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def deconv(self, input, c_o, k_h, k_w, s_h, s_w, p_h, p_w, name): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def relu(self, input, name): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def sigmoid(self, input, name): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def max_pool(self, input, k_h, k_w, s_h, s_w, p_h, p_w, name): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def avg_pool(self, input, k_h, k_w, s_h, s_w, p_h, p_w, name): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def lrn(self, input, local_size, alpha, beta, name, bias=1): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def concat(self, inputs, axis, name): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def add(self, inputs, name): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def fc(self, input, num_out, name): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def softmax(self, input, name): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def batch_normalization(self, input, name, epsilon=0.00001, scale_offset=True): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def dropout(self, input, keep_prob, name): - raise NotImplementedError('Must be implemented by the subclass') - - @layer - def crop(self, inputs, offset, name): - raise NotImplementedError('Must be implemented by the subclass') -import sys - -SHARED_CAFFE_RESOLVER = None - - -class CaffeResolver(object): - def __init__(self): - self.import_caffe() - - def import_caffe(self): - self.caffe = None - - # try: - import caffe - self.caffe = caffe - # except ImportError: - # # # Fall back to the protobuf implementation - # # from mmdnn.conversion.caffe import caffe_pb2 - # # self.caffepb = caffe_pb2 - # # show_fallback_warning() - if self.caffe: - self.caffepb = self.caffe.proto.caffe_pb2 - self.NetParameter = self.caffepb.NetParameter - - def has_pycaffe(self): - return self.caffe is not None - - -def get_caffe_resolver(): - global SHARED_CAFFE_RESOLVER - if SHARED_CAFFE_RESOLVER is None: - SHARED_CAFFE_RESOLVER = CaffeResolver() - return SHARED_CAFFE_RESOLVER - - -def has_pycaffe(): - return get_caffe_resolver().has_pycaffe() - - -def show_fallback_warning(): - msg = ''' ------------------------------------------------------------- - WARNING: PyCaffe not found! - Falling back to a pure protocol buffer implementation. - * Conversions will be drastically slower. - * This backend is UNTESTED! ------------------------------------------------------------- - -''' - sys.stderr.write(msg) -import caffe - - -def save_model(MainModel, network_filepath, weight_filepath, dump_filepath): - dump_net = dump_filepath + '.prototxt' - dump_weight = dump_filepath + '.caffemodel' - dump_net = str(dump_net) - dump_weight = str(dump_weight) - MainModel.make_net(dump_net) - MainModel.gen_weight(weight_filepath, dump_weight, dump_net) - print('Caffe model files are saved as [{}] and [{}], generated by [{}.py] and [{}].'.format( - dump_net, dump_weight, network_filepath, weight_filepath)) -from collections import namedtuple -import math - -TensorShape = namedtuple( - 'TensorShape', ['batch_size', 'channels', 'height', 'width']) - - -def get_kernel_extents(params, dilation): - ko_h = dilation * (int(params.k_h) - 1) + 1 - ko_w = dilation * (int(params.k_w) - 1) + 1 - return ko_h, ko_w - - -def get_filter_output_shape(i_h, i_w, dilation, params, round_func): - ko_h, ko_w = get_kernel_extents(params, dilation) - - o_h = (i_h + 2 * params.p_h - ko_h) / float(params.s_h) + 1 - o_w = (i_w + 2 * params.p_w - ko_w) / float(params.s_w) + 1 - return (int(round_func(o_h)), int(round_func(o_w))) - - -def get_strided_kernel_output_shape(node, round_func): - assert node.layer is not None - input_shape = node.get_only_parent()[0].output_shape - params = node.kernel_parameters - dilation = node.parameters.dilation[0] if hasattr( - node.parameters, 'dilation') and node.parameters.dilation else 1 - - o_h, o_w = get_filter_output_shape(input_shape.height, input_shape.width, - dilation, params, round_func) - params = node.parameters - has_c_o = hasattr(params, 'num_output') - c = params.num_output if has_c_o else input_shape.channels - return TensorShape(input_shape.batch_size, c, o_h, o_w) - - -def shape_not_implemented(node): - raise NotImplementedError - - -def shape_deconvolution(node): - input_shape = node.get_only_parent()[0].output_shape - params = node.kernel_parameters - dilation = 1 if len( - node.parameters.dilation) == 0 else node.parameters.dilation[0] - - ko_h, ko_w = get_kernel_extents(params, dilation) - o_h = int(params.s_h) * (input_shape.height - 1) + \ - ko_h - 2 * int(params.p_h) - o_w = int(params.s_w) * (input_shape.width - 1) + \ - ko_w - 2 * int(params.p_w) - - has_c_o = hasattr(node.parameters, 'num_output') - c = node.parameters.num_output if has_c_o else input_shape.channels - return TensorShape(input_shape.batch_size, c, o_h, o_w) - - -def shape_identity(node): - assert len(node.parents) > 0 - return node.parents[0][0].output_shape - - -def shape_scalar(node): - return TensorShape(1, 1, 1, 1) - - -def shape_reshape(node): - last_shape = node.get_only_parent()[0].output_shape - shapes = [] - for idx, shape in enumerate(node.layer.reshape_param.shape.dim): - shapes.append(shape if shape != 0 else last_shape[idx]) - return TensorShape(shapes[0], shapes[1], shapes[2], shapes[3]) - - -def shape_data(node): - if node.output_shape: - # Old-style input specification - return node.output_shape - try: - # New-style input specification - return tuple(map(int, node.parameters.shape[0].dim)) - except: - # We most likely have a data layer on our hands. The problem is, - # Caffe infers the dimensions of the data from the source (eg: LMDB). - # We want to avoid reading datasets here. Fail for now. - # This can be temporarily fixed by transforming the data layer to - # Caffe's "input" layer (as is usually used in the "deploy" version). - # TODO: Find a better solution for this. - pass - - -def shape_mem_data(node): - params = node.parameters - return TensorShape(params.batch_size, params.channels, params.height, params.width) - - -def shape_concat(node): - axis = node.parameters.axis - output_shape = None - for parent, idx in node.parents: - if output_shape is None: - output_shape = list(parent.output_shape) - else: - output_shape[axis] += parent.output_shape[axis] - return tuple(output_shape) - - -def shape_convolution(node): - return get_strided_kernel_output_shape(node, math.floor) - - -def shape_pool(node): - if node.parameters.global_pooling: - return shape_global_pooling(node) - return get_strided_kernel_output_shape(node, math.ceil) - - -def shape_inner_product(node): - input_shape = node.get_only_parent()[0].output_shape - return TensorShape(input_shape.batch_size, node.parameters.num_output, 1, 1) - - -def shape_global_pooling(node): - input_shape = node.get_only_parent()[0].output_shape - params = node.kernel_parameters - has_c_o = hasattr(params, 'num_output') - c = params.num_output if has_c_o else input_shape.channels - # Output height and width is 1 when global_pooling - return TensorShape(input_shape.batch_size, c, 1, 1) - - -def shape_split(node): - input_shape = node.get_only_parent()[0].output_shape - return TensorShape(input_shape.batch_size, input_shape.channels, input_shape.height, input_shape.width) - - -def shape_flatten(node): - input_shape = node.get_only_parent()[0].output_shape - return TensorShape(input_shape.batch_size, input_shape.channels * input_shape.height * input_shape.width, 1, 1) -from __future__ import unicode_literals -from google.protobuf import text_format -import numpy as np -from mmdnn.conversion.caffe.graph import GraphBuilder, NodeKind, LAYER_IN_TRAIN_PROTO -from mmdnn.conversion.caffe.mapper import NodeMapper, get_handler_name -from mmdnn.conversion.caffe.resolver import get_caffe_resolver, has_pycaffe -from mmdnn.conversion.caffe.errors import print_stderr, ConversionError -from mmdnn.conversion.caffe.common_graph import Graph -from mmdnn.conversion.caffe.utils import get_lower_case, get_upper_case - - -class DataInjector(object): - ''' - Associates parameters loaded from a .caffemodel file with their corresponding nodes. - ''' - - def __init__(self, def_path, data_path): - # The .prototxt file defining the graph - self.def_path = def_path - # The .caffemodel file containing the learned parameters - self.data_path = data_path - # Set to true if the fallback protocol-buffer based backend was used - self.did_use_pb = False - # A list containing (layer name, parameters) tuples - self.params = None - # Load the parameters - self.caffemodel = None - if has_pycaffe() and self.def_path: - self.load_using_caffe() - else: - self.load_using_pb() - - def load_using_caffe(self): - caffe = get_caffe_resolver().caffe - net = caffe.Net(str(self.def_path), str(self.data_path), caffe.TEST) - def data(blob): return blob.data - self.params = [(k, list(map(data, v))) for k, v in net.params.items()] - - def load_using_pb(self): - self.caffemodel = get_caffe_resolver().NetParameter() - self.caffemodel.MergeFromString(open(self.data_path, 'rb').read()) - def pair(layer): return (layer.name, self.normalize_pb_data(layer)) - layers = self.caffemodel.layers or self.caffemodel.layer - self.params = [pair(layer) for layer in layers if layer.blobs] - self.did_use_pb = True - - def normalize_pb_data(self, layer): - transformed = [] - for blob in layer.blobs: - if len(blob.shape.dim): - dims = blob.shape.dim - c_o, c_i, h, w = map(int, [1] * (4 - len(dims)) + list(dims)) - else: - c_o = blob.num - c_i = blob.channels - h = blob.height - w = blob.width - data = np.array(blob.data, dtype=np.float32).reshape( - c_o, c_i, h, w) - transformed.append(data) - return transformed - - def adjust_parameters(self, node, data): - if not self.did_use_pb: - return data - # When using the protobuf-backend, each parameter initially has four dimensions. - # In certain cases (like FC layers), we want to eliminate the singleton dimensions. - # This implementation takes care of the common cases. However, it does leave the - # potential for future issues. - # The Caffe-backend does not suffer from this problem. - data = list(data) - squeeze_indices = [1] # Squeeze biases. - if node.kind == NodeKind.InnerProduct: - squeeze_indices.append(0) # Squeeze FC. - for idx in squeeze_indices: - data[idx] = np.squeeze(data[idx]) - return data - - def __call__(self, graph): - for layer_name, data in self.params: - if layer_name in graph: - node = graph.get_node(layer_name) - node.data = self.adjust_parameters(node, data) - else: - print_stderr( - 'Ignoring parameters for non-existent layer: %s' % layer_name) - return graph - - -class NodeRenamer(object): - - def __call__(self, graph): - for node in graph.nodes: - node.name = node.name.replace('/', '_') - return graph - - -class DataReshaper(object): - - def __init__(self, mapping, replace=True): - # A dictionary mapping NodeKind to the transposed order. - self.mapping = mapping - # The node kinds eligible for reshaping - self.reshaped_node_types = self.mapping.keys() - # If true, the reshaped data will replace the old one. - # Otherwise, it's set to the reshaped_data attribute. - self.replace = replace - - def has_spatial_parent(self, node): - try: - parent = node.get_only_parent()[0] - s = parent.output_shape - return s.height > 1 or s.width > 1 - except ConversionError: - return False - - def map(self, node_kind): - try: - return self.mapping[node_kind] - except KeyError: - raise ConversionError( - 'Ordering not found for node kind: {}'.format(node_kind)) - - def _is_image_data(self, node): - return len([child for child in node.children if child.kind in (NodeKind.Convolution, NodeKind.Pooling)]) - - def __call__(self, graph): - for node in graph.nodes: - if node.data is None: - continue - if node.kind not in self.reshaped_node_types: - # Check for 2+ dimensional data - if any(len(tensor.shape) > 1 for tensor in node.data): - print_stderr( - 'Warning: parameters not reshaped for node: {}'.format(node)) - continue - transpose_order = self.map(node.kind) - weights = node.data[0] - if (node.kind == NodeKind.InnerProduct) and self.has_spatial_parent(node): - # The FC layer connected to the spatial layer needs to be - # re-wired to match the new spatial ordering. - in_shape = node.get_only_parent()[0].output_shape - fc_shape = weights.shape - output_channels = fc_shape[0] - weights = weights.reshape((output_channels, in_shape.channels, in_shape.height, - in_shape.width)) - weights = weights.transpose(self.map(NodeKind.Convolution)) - node.reshaped_data = weights.reshape(fc_shape[transpose_order[0]], - fc_shape[transpose_order[1]]) - else: - node.reshaped_data = weights.transpose(transpose_order) - # node.reshaped_data = weights.transpose(transpose_order) - if self.replace: - for node in graph.nodes: - if hasattr(node, 'reshaped_data'): - # Set the weights - node.data[0] = node.reshaped_data - del node.reshaped_data - return graph - - -class SubNodeFuser(object): - ''' - An abstract helper for merging a single-child with its single-parent. - ''' - - def __call__(self, graph): - nodes = graph.nodes - fused_nodes = [] - for node in nodes: - if len(node.parents) != 1: - # We're only fusing nodes with single parents - continue - parent, from_output = node.get_only_parent() - if len(parent.children) != 1: - # We can only fuse a node if its parent's - # value isn't used by any other node. - continue - if not self.is_eligible_pair(parent, node): - continue - # Rewrite the fused node's children to its parent. - for child in node.children: - index = [n for n, (input, idx) in enumerate( - child.parents) if input == node][0] - child.parents.pop(index) - child.add_parent(parent, from_output, index) - # Disconnect the fused node from the graph. - parent.children.remove(node) - fused_nodes.append(node) - # Let the sub-class merge the fused node in any arbitrary way. - self.merge(parent, node) - transformed_nodes = [node for node in nodes if node not in fused_nodes] - return graph.replaced(transformed_nodes) - - def is_eligible_pair(self, parent, child): - '''Returns true if this parent/child pair is eligible for fusion.''' - raise NotImplementedError('Must be implemented by subclass.') - - def merge(self, parent, child): - '''Merge the child node into the parent.''' - raise NotImplementedError('Must be implemented by subclass') - - -class ReLUFuser(SubNodeFuser): - ''' - Fuses rectified linear units with their parent nodes. - ''' - - def __init__(self, allowed_parent_types=None): - # Fuse ReLUs when the parent node is one of the given types. - # If None, all node types are eligible. - self.allowed_parent_types = allowed_parent_types - - def is_eligible_pair(self, parent, child): - return ((self.allowed_parent_types is None or parent.kind in self.allowed_parent_types) and - child.kind == NodeKind.ReLU) - - def merge(self, parent, _): - parent.metadata['relu'] = True - - -class BatchNormScaleBiasFuser(SubNodeFuser): - ''' - The original batch normalization paper includes two learned - parameters: a scaling factor \gamma and a bias \beta. - Caffe's implementation does not include these two. However, it is commonly - replicated by adding a scaling+bias layer immidiately after the batch norm. - - This fuser merges the scaling+bias layer with the batch norm. - ''' - - def is_eligible_pair(self, parent, child): - return (parent.kind == NodeKind.BatchNorm and child.kind == NodeKind.Scale and - child.parameters.axis == 1 and child.parameters.bias_term == True) - - def merge(self, parent, child): - parent.scale_bias_node = child - - -class BatchNormPreprocessor(object): - ''' - Prescale batch normalization parameters. - Concatenate gamma (scale) and beta (bias) terms if set. - ''' - - def __call__(self, graph): - for node in graph.nodes: - if node.kind != NodeKind.BatchNorm: - continue - assert node.data is not None - assert len(node.data) == 3 - mean, variance, scale = node.data - - # Prescale the stats - scaling_factor = 1.0 / scale if scale != 0 else 0 - - if len(np.squeeze(mean) == 1): - mean = np.squeeze(mean) - variance = np.squeeze(variance) - scaling_factor = np.squeeze(scaling_factor) - - mean *= scaling_factor - variance *= scaling_factor - - # Replace with the updated values - node.data = [mean, variance] - if hasattr(node, 'scale_bias_node'): - # Include the scale and bias terms - gamma, beta = node.scale_bias_node.data - node.data += [gamma, beta] - return graph - - -class ParameterNamer(object): - ''' - Convert layer data arrays to a dictionary mapping parameter names to their values. - ''' - - def __call__(self, graph): - for node in graph.nodes: - if node.data is None: - continue - if node.kind in (NodeKind.Convolution, NodeKind.Deconvolution, NodeKind.InnerProduct): - names = ('weights',) - if node.parameters.bias_term: - names += ('bias',) - elif node.kind == NodeKind.BatchNorm: - names = ('mean', 'var') - if len(node.data) == 4: - names += ('scale', 'bias') - elif node.kind == NodeKind.PReLU: - names = ('gamma',) - elif node.kind == NodeKind.ELU: - names = ('alpha',) - else: - print_stderr( - 'WARNING: Unhandled parameters: {}'.format(node.kind)) - continue - assert len(names) == len(node.data) - node.data = dict(zip(names, node.data)) - return graph - - -class CaffeTransformer(object): - - def __init__(self, def_path, data_path, target_toolkit, input_shape=None, phase='test'): - self.layer_name_map = {} - self.data_injector = None - self.is_train_proto = False - self.input_shape = input_shape - if def_path is None: - if self.input_shape is None: - raise ConversionError( - 'if the graph prototxt is not provided, the input shape should be provided') - self.input_shape = [1] + self.input_shape - def_path, self.data_injector = self.gen_prototxt_from_caffemodel( - data_path, self.input_shape) - self.is_train_proto = True - else: - model = get_caffe_resolver().NetParameter() - with open(def_path, 'r') as f: - text_format.Merge(f.read(), model) - layers = model.layers or model.layer - if len([layer for layer in layers if NodeKind.map_raw_kind(layer.type) in LAYER_IN_TRAIN_PROTO]) > 0: - if self.input_shape is None: - raise ConversionError( - 'the train_val.prototxt should be provided with the input shape') - self.input_shape = [1] + self.input_shape - self.is_train_proto = True - graph = GraphBuilder(def_path, self.input_shape, - self.is_train_proto, phase).build() - if self.is_train_proto: - def_path = graph.prototxt - if data_path is not None: - graph = graph.transformed([ - self.data_injector if self.data_injector else DataInjector( - def_path, data_path), # Load and associate learned parameters - BatchNormScaleBiasFuser(), - BatchNormPreprocessor() # Pre-process batch normalization data - ]) - target_toolkit = target_toolkit.lower() - if target_toolkit not in ('caffe', 'caffe2'): - graph = graph.transformed([DataReshaper({ # Reshape the parameters to TensorFlow's ordering - # (c_o, c_i, h, w) -> (h, w, c_i, c_o) - NodeKind.Convolution: (2, 3, 1, 0), - # (c_o, c_i, h, w) -> (h, w, c_i, c_o) - NodeKind.Deconvolution: (2, 3, 1, 0), - NodeKind.InnerProduct: (1, 0) # (c_o, c_i) -> (c_i, c_o) - }), - ParameterNamer() # Convert parameters to dictionaries - ]) - self.graph = graph - # self.graph = NodeRenamer()(graph) - print(self.graph) - - def gen_prototxt_from_caffemodel(self, data_path, input_shape): - prototxt = 'deploy.prototxt' - data_injector = DataInjector(None, data_path) - caffemodel = data_injector.caffemodel - layers = caffemodel.layers or caffemodel.layer - for item in layers: - item.ClearField('blobs') - with open(prototxt, 'w') as f: - f.write(str(caffemodel)) - return prototxt, data_injector - - def transform_data(self): - return {self.layer_name_map[node.name]: node.data for node in self.graph.nodes if node.data} - - def transform_graph(self): - for node in self.graph.nodes: - self.layer_name_map[node.name] = node.name - - ret = [] - for node in self.graph.nodes: - mapped_node = self.map_node(node) - if isinstance(mapped_node, list): - ret.extend([n for n in mapped_node]) - elif mapped_node: - ret.append(mapped_node) - - name = get_upper_case(get_lower_case(self.graph.name)) - return Graph(name, ret) - # return Graph(name, [self.map_node(node) for node in self.graph.nodes]) - - def get_handler(self, node_kind, prefix): - name = get_handler_name(node_kind) - name = '_'.join((prefix, name)) - try: - return getattr(NodeMapper, name) - except AttributeError: - raise ConversionError( - 'No handler found for node kind: %s (expected: %s)' % (node_kind, name)) - - def map_node(self, node): - map_func = self.get_handler(node.kind, 'map') - - mapped_node = map_func(node) - # assert mapped_node is not None - - if isinstance(mapped_node, list): - ret = [] - for idx, cur_node in enumerate(mapped_node): - cur_node.name = node.name + '_' + str(idx) - if idx == 0: - cur_node.input.extend( - [self.layer_name_map[input.name] for input, idx in node.parents]) - else: - cur_node.input.extend([node.name + '_' + str(idx - 1)]) - - if idx == len(mapped_node) - 1: - cur_node.output.extend(node.output) - else: - cur_node.output.extend([node.name + '_' + str(idx + 1)]) - - self.layer_name_map[node.name] = node.name + \ - '_' + str(len(mapped_node) - 1) - ret.append(cur_node) - return ret - - # skip when mapped_node is None - elif not mapped_node: - input_of_next = node.get_only_parent()[0] - next_node = node.children - for next in next_node: - next.parents[0] = tuple([input_of_next, next.parents[0][1]]) - - else: - mapped_node.name = node.name - mapped_node.input.extend( - ['%s' % (self.layer_name_map[input.name]) for input, idx in node.parents]) - mapped_node.output.extend(node.output) - return mapped_node -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import re - - -def get_lower_case(text): - ''' - Convert PascalCase name to words concatenated by '_'. - 'PascalCase' -> 'pascal_case' - ''' - name = re.sub('(.)([A-Z][a-z]+)', r'\1_\2', text) - return re.sub('([a-z0-9])([A-Z])', r'\1_\2', name).lower() - - -def get_upper_case(text): - ''' - 'pascal_case' -> 'PascalCase' - ''' - return ''.join([item.title() for item in text.split('_')]) - - -def get_real_name(text): - text = text.strip().split(':') - return ''.join(text[:-1]) -import base64 -from google.protobuf import json_format -from importlib import import_module -import json -import numpy as np -import os -import sys - -from mmdnn.conversion.caffe.errors import ConversionError -from mmdnn.conversion.caffe.common_graph import fetch_attr_value -from mmdnn.conversion.caffe.utils import get_lower_case, get_upper_case, get_real_name - - -class JsonFormatter(object): - '''Dumpt a DL graph into a Json file.''' - - def __init__(self, graph): - self.graph_def = graph.as_graph_def() - - def dump(self, json_path): - json_txt = json_format.MessageToJson(self.graph_def) - parsed = json.loads(json_txt) - formatted = json.dumps(parsed, indent=4, sort_keys=True) - with open(json_path, 'w') as f: - f.write(formatted) - - -class PyWriter(object): - '''Dumpt a DL graph into a Python script.''' - - def __init__(self, graph, data, target): - self.graph = graph - self.data = data - self.tab = ' ' * 4 - self.prefix = '' - target = target.lower() - if target == 'tensorflow': - self.target = target - self.net = 'TensorFlowNetwork' - elif target == 'keras': - self.target = target - self.net = 'KerasNetwork' - elif target == 'caffe': - self.target = target - self.net = 'CaffeNetwork' - else: - raise ConversionError('Target %s is not supported yet.' % target) - - def indent(self): - self.prefix += self.tab - - def outdent(self): - self.prefix = self.prefix[:-len(self.tab)] - - def statement(self, s): - return self.prefix + s + '\n' - - def emit_imports(self): - return self.statement('from dlconv.%s import %s\n' % (self.target, self.net)) - - def emit_class_def(self, name): - return self.statement('class %s(%s):' % (name, self.net)) - - def emit_setup_def(self): - return self.statement('def setup(self):') - - def emit_node(self, node): - '''Emits the Python source for this node.''' - - def pair(key, value): - return '%s=%s' % (key, value) - args = [] - for input in node.input: - input = input.strip().split(':') - name = ''.join(input[:-1]) - idx = int(input[-1]) - assert name in self.graph.node_dict - parent = self.graph.get_node(name) - args.append(parent.output[idx]) - # FIXME: - output = [node.output[0]] - # output = node.output - for k, v in node.attr: - if k == 'cell_type': - args.append(pair(k, "'" + fetch_attr_value(v) + "'")) - else: - args.append(pair(k, fetch_attr_value(v))) - args.append(pair('name', "'" + node.name + "'")) # Set the node name - args = ', '.join(args) - return self.statement('%s = self.%s(%s)' % (', '.join(output), node.op, args)) - - def dump(self, code_output_dir): - if not os.path.exists(code_output_dir): - os.makedirs(code_output_dir) - file_name = get_lower_case(self.graph.name) - code_output_path = os.path.join(code_output_dir, file_name + '.py') - data_output_path = os.path.join(code_output_dir, file_name + '.npy') - with open(code_output_path, 'w') as f: - f.write(self.emit()) - with open(data_output_path, 'wb') as f: - np.save(f, self.data) - return code_output_path, data_output_path - - def emit(self): - # Decompose DAG into chains - chains = [] - for node in self.graph.topologically_sorted(): - attach_to_chain = None - if len(node.input) == 1: - parent = get_real_name(node.input[0]) - for chain in chains: - # Node is part of an existing chain. - if chain[-1].name == parent: - attach_to_chain = chain - break - if attach_to_chain is None: # Start a new chain for this node. - attach_to_chain = [] - chains.append(attach_to_chain) - attach_to_chain.append(node) - - # Generate Python code line by line - source = self.emit_imports() - source += self.emit_class_def(self.graph.name) - self.indent() - source += self.emit_setup_def() - self.indent() - blocks = [] - for chain in chains: - b = '' - for node in chain: - b += self.emit_node(node) - blocks.append(b[:-1]) - source += '\n\n'.join(blocks) - return source - - -class ModelSaver(object): - - def __init__(self, code_output_path, data_output_path): - self.code_output_path = code_output_path - self.data_output_path = data_output_path - - def dump(self, model_output_dir): - '''Return the file path containing graph in generated model files.''' - if not os.path.exists(model_output_dir): - os.makedirs(model_output_dir) - sys.path.append(os.path.dirname(self.code_output_path)) - file_name = os.path.splitext( - os.path.basename(self.code_output_path))[0] - module = import_module(file_name) - class_name = get_upper_case(file_name) - net = getattr(module, class_name) - return net.dump(self.data_output_path, model_output_dir) - - -class GraphDrawer(object): - - def __init__(self, toolkit, meta_path): - self.toolkit = toolkit.lower() - self.meta_path = meta_path - - def dump(self, graph_path): - if self.toolkit == 'tensorflow': - from dlconv.tensorflow.visualizer import TensorFlowVisualizer - if self._is_web_page(graph_path): - TensorFlowVisualizer(self.meta_path).dump_html(graph_path) - else: - raise NotImplementedError( - 'Image format or %s is unsupported!' % graph_path) - elif self.toolkit == 'keras': - from dlconv.keras.visualizer import KerasVisualizer - png_path, html_path = (None, None) - if graph_path.endswith('.png'): - png_path = graph_path - elif self._is_web_page(graph_path): - png_path = graph_path + ".png" - html_path = graph_path - else: - raise NotImplementedError( - 'Image format or %s is unsupported!' % graph_path) - KerasVisualizer(self.meta_path).dump_png(png_path) - if html_path: - self._png_to_html(png_path, html_path) - os.remove(png_path) - else: - raise NotImplementedError( - 'Visualization of %s is unsupported!' % self.toolkit) - - def _is_web_page(self, path): - return path.split('.')[-1] in ('html', 'htm') - - def _png_to_html(self, png_path, html_path): - with open(png_path, "rb") as f: - encoded = base64.b64encode(f.read()).decode('utf-8') - source = """ - - - - Keras - - - Model Graph - -""".format(base64_str=encoded) - with open(html_path, 'w', encoding='utf-8') as f: - f.write(source) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import print_function -from __future__ import division - -import os -from six.moves import xrange - -import cntk -from mmdnn.conversion.common.IR.IR_graph import IRGraph, IRGraphNode -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.emitter import Emitter -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.rewriter.folder import * - - -class CntkEmitter(Emitter): - - dtype_map = { - graph_pb2.DT_FLOAT16: "np.float16", - graph_pb2.DT_FLOAT32: "np.float32", - graph_pb2.DT_FLOAT64: "np.float64", - graph_pb2.DT_INT16: "np.float16", # Cntk does not support Int. - graph_pb2.DT_INT32: "np.float32", # Cntk does not support Int. - graph_pb2.DT_INT64: "np.float64", # Cntk does not support Int. - graph_pb2.DT_UINT8: "np.uint8", - graph_pb2.DT_UINT16: "np.uint16" - } - - naive_scope_pattern = ['gru_cell', 'lstm_cell'] - - def __init__(self, model): - from six import string_types as _string_types - super(CntkEmitter, self).__init__() - if isinstance(model, _string_types): - network_path = model - else: - network_path = model[0] - self._load_weights(model[1]) - - self.IR_graph = IRGraph(network_path) - super(CntkEmitter, self)._build() - self.yolo_parameter = [] - folder = Folder(self.IR_graph, self.weights_dict) - folder.fold() - - @property - def header_code(self): - return """import numpy as np -import cntk -from cntk import ops, layers -from cntk.contrib.crosstalkcaffe.unimodel.cntkinstance import BlockApiSetup - -__weights_dict = dict() - -def load_weights(weight_file): - if weight_file == None: - return - - try: - weights_dict = np.load(weight_file).item() - except: - weights_dict = np.load(weight_file, encoding='bytes').item() - - return weights_dict - - -def KitModel(weight_file = None): - global __weights_dict - __weights_dict = load_weights(weight_file) - -""" - - def gen_code(self, phase='test'): - self.phase = phase - self.add_body(0, self.header_code) - - for layer in self.IR_graph.topological_sort: - current_node = self.IR_graph.get_node(layer) - node_type = current_node.type - - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - line = func(current_node) - if line: - self.add_body(1, line) - else: - print( - "CntkEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(current_node) - - self.add_body(1, "return {}".format( - ','.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers]))) - - self.add_body(0, "") - for i in self.used_layers: - func = getattr(self, "_layer_" + i) - func() - - self.add_body(0, "") - for code in self.layers_codes.values(): - self.add_body(0, code) - - return self.body_code - - @staticmethod - def _shapeToStr(shapes): - new_shape = filter(lambda x: x > - 1, [dim.size for dim in shapes.dim]) - return ', '.join('%s' % i for i in new_shape) - - @staticmethod - def is_valid_padding(auto_pad, pads): - """ - different from utils.is_valid_padding - """ - if auto_pad: - if auto_pad == 'VALID': - return True - elif auto_pad.startswith('SAME'): - return False - else: - raise ValueError("Unknown padding type{}.".format(auto_pad)) - - else: - lens = len(pads) - assert lens % 2 == 0 - for i in range(0, lens // 2): - if pads[i] != 0: - return False - return True - - @staticmethod - def is_ceil_mode(pads): - lens = len(pads) - for i in range(lens // 2 + 1, lens - 1): - if pads[i] == pads[i - lens // 2]: - return False - else: - return True - - def _defuse_padding(self, IR_node): - auto_pad = IR_node.get_attr('auto_pad') - if auto_pad: - input_node = self.parent_variable_name(IR_node) - if auto_pad == 'VALID': - padding = False - elif auto_pad.startswith("SAME"): - padding = True - else: - raise ValueError("Unknown padding type [{}].".format(auto_pad)) - - return input_node, padding - - else: - padding = IR_node.get_attr('pads') - if not is_valid_padding(padding): - dim = len(padding) // 2 - padding_str = list() - for i in xrange(1, dim): - padding_str.append((padding[i], padding[i + dim])) - input_node = IR_node.variable_name + '_pad' - self.add_body(1, "{:<15} = cntk.pad({}, pattern={})".format( - input_node, - self.parent_variable_name(IR_node), - padding_str)) - - else: - input_node = self.parent_variable_name(IR_node) - - return input_node, False - - def emit_Conv(self, IR_node): - codes = list() - if self.weight_loaded: - self.used_layers.add('Conv') - input_node, padding = self._defuse_padding(IR_node) - - dim = len(IR_node.get_attr('strides')) - 2 - padding = [False] + [padding] * dim - - if IR_node.type == 'DepthwiseConv': - groups = IR_node.get_attr('kernel_shape')[-2] - codes.append("__weights_dict['{}']['weights'] = np.swapaxes(__weights_dict['{}']['weights'], -1, -2)".format( - IR_node.real_name, IR_node.real_name)) - else: - groups = IR_node.get_attr('group', 1) - - codes.append("{:<15} = convolution({}, is_transpose={}, strides={}, auto_padding={}, dilation={}, groups={}, name='{}')".format( - IR_node.variable_name, - input_node, - IR_node.type == 'ConvTranspose', - tuple(IR_node.get_attr('strides')[1:-1]), - padding, - tuple(IR_node.get_attr('dilations', [1])), - groups, - IR_node.name)) - - else: - codes.append("{:<15} = Convolution(name = '{}', num_filters = {}, filter_shape = ({}), strides = ({},), pad = {}, bias = {})({})\n".format( - IR_node.variable_name, - IR_node.name, - IR_node.get_attr('kernel_shape')[-1], - ', '.join( - '%s' % i for i in IR_node.layer.attr["kernel_shape"].list.i[:-2]), - ', '.join( - '%s' % i for i in IR_node.layer.attr['strides'].list.i[1:-1]), - IR_node.get_attr('auto_pad') != 'VALID', - IR_node.get_attr('use_bias'), - self.parent_variable_name(IR_node))) - return codes - - def emit_Pool(self, IR_node): - input_node = self.IR_graph.get_node( - IR_node.in_edges[0]).real_variable_name - if IR_node.layer.attr['global_pooling'].b: - self.used_layers.add('GlobalPooling') - code = "{:<15} = global_pooling({}, '{}', name = '{}')".format( - IR_node.variable_name, - input_node, - IR_node.get_attr('pooling_type'), - IR_node.name) - else: - for e in IR_node.get_attr('dilations', []): - assert e == 1 - - dim = len(IR_node.get_attr('kernel_shape')) - 2 - padding = not self.is_valid_padding( - IR_node.get_attr('auto_pad'), IR_node.get_attr('pads')) - padding = [False] + [padding] * dim - ceil_out_dim = self.is_ceil_mode(IR_node.get_attr('pads')) - - pooling_type = IR_node.get_attr('pooling_type') - if pooling_type == 'MAX': - pooling_type = cntk.MAX_POOLING - elif pooling_type == 'AVG': - pooling_type = cntk.AVG_POOLING - else: - raise ValueError - - if self.weight_loaded: - self.used_layers.add(IR_node.type) - code = "{:<15} = pooling({}, pooling_type={}, pooling_window_shape={}, strides={}, auto_padding={}, ceil_out_dim={})".format( - IR_node.variable_name, - input_node, - pooling_type, - tuple(IR_node.get_attr('kernel_shape')[1:-1]), - tuple(IR_node.get_attr('strides')[1:-1]), - padding, - ceil_out_dim - ) - else: - raise NotImplementedError - return code - - def emit_UNKNOWN(self, IR_node): - print(IR_node.IR_layer.name) - - def emit_DataInput(self, IR_node): - - shape_str = self._shapeToStr(IR_node.IR_layer.attr["shape"].shape) - - dtype_str = ", dtype = {}".format( - self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'dtype' in IR_node.layer.attr else "" - code = "{:<15} = cntk.sequence.input_variable(({},) {}, name='{}')".format( - IR_node.variable_name, - shape_str, - dtype_str, - IR_node.name) - return code - - def emit_Dropout(self, IR_node): - parent = self.IR_graph.get_parent(IR_node.name, [0]) - if self.phase == 'train': - code = "{:<15} = Dropout({}, name = '{}')({})".format( - IR_node.variable_name, - 1 - IR_node.get_attr('keep_prob'), - IR_node.name, - parent.real_variable_name) - return code - else: - IR_node.real_name = parent.real_name - - def emit_FullyConnected(self, IR_node): - input_node = self.parent_variable_name(IR_node) - if self.weight_loaded: - self.used_layers.add(IR_node.type) - code = "{:<15} = dense({}, name = '{}')".format( - IR_node.variable_name, - input_node, - IR_node.name) - - else: - code = "{:<15} = Dense({}, bias = {}, name = '{}')({})".format( - IR_node.variable_name, - IR_node.layer.attr["units"].i, - IR_node.layer.attr['use_bias'].b, - IR_node.name, - input_node) - return code - - def emit_Flatten(self, IR_node): - code = "{:<15} = ops.reshape({}, (-1,), name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name) - return code - - def emit_Reshape(self, IR_node): - code = "{:<15} = cntk.reshape({}, shape={}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - tuple(IR_node.get_attr('shape')), - IR_node.name) - return code - - def _emit_activation(self, IR_node, op_name): - code = "{:<15} = layers.Activation(activation = {}, name = '{}')({})".format( - IR_node.variable_name, - op_name, - IR_node.name, - self.parent_variable_name(IR_node)) - return code - - def emit_Tanh(self, IR_node): - return self._emit_activation(IR_node, 'ops.tanh') - - def emit_Relu(self, IR_node): - return self._emit_activation(IR_node, 'ops.relu') - - def emit_Softmax(self, IR_node): - return self._emit_activation(IR_node, 'ops.softmax') - - def emit_Sigmoid(self, IR_node): - return self._emit_activation(IR_node, 'ops.sigmoid') - - def emit_RNNs(self, IR_node, func): - assert False - - def emit_LSTM(self, IR_node): - return self.emit_RNNs(IR_node, "LSTM") - - def emit_GRU(self, IR_node): - return self.emit_RNNs(IR_node, "GRU") - - def emit_Add(self, IR_node): - if len(IR_node.in_edges) > 1: - inputs = ' + '.join(self.parent_variable_name(IR_node, i) - for i in IR_node.in_edges) - code = "{:<15} = {}".format( - IR_node.variable_name, - inputs) - return code - - def emit_Sub(self, IR_node): - if len(IR_node.in_edges) > 1: - inputs = ' - '.join(self.parent_variable_name(IR_node, i) - for i in IR_node.in_edges) - code = "{:<15} = {}".format( - IR_node.variable_name, - inputs) - return code - - def emit_Mul(self, IR_node): - if len(IR_node.in_edges) > 1: - inputs = ' * '.join(self.parent_variable_name(IR_node, i) - for i in IR_node.in_edges) - code = "{:<15} = {}".format( - IR_node.variable_name, - inputs) - return code - - def emit_Constant(self, IR_node): - if IR_node.get_attr('value'): - code = "{:<15} = cntk.Constant(value={})".format( - IR_node.variable_name, IR_node.get_attr('value')) - else: - code = "{:<15} = cntk.Constant(value=__weights_dict['{}']['value'])".format( - IR_node.variable_name, IR_node.name) - return code - - def emit_Concat(self, IR_node): - inputs = ', '.join(self.parent_variable_name(IR_node, i) - for i in IR_node.in_edges) - for s in IR_node.in_edges: - node = self.IR_graph.get_node(s) - - code = "{:<15} = cntk.splice({}, axis={}, name='{}')".format( - IR_node.variable_name, - inputs, - IR_node.get_attr('axis') - 1, # why -1 ? - IR_node.name) - return code - - def emit_BatchNorm(self, IR_node): - self.used_layers.add(IR_node.type) - code = "{:<15} = batch_normalization({}, epsilon={}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('epsilon'), - IR_node.name) - return code - - def emit_Pad(self, IR_node): - if IR_node.get_attr('mode') == 'constant': - mode = 'mode = ops.CONSTANT_PAD, constant_value = {}'.format( - IR_node.get_attr('constant_values', 0.0)) - elif IR_node.get_attr('mode') == 'reflect': - mode = 'mode = ops.REFLECT_PAD' - elif IR_node.get_attr('mode') == 'SYMMETRIC': - mode = 'mode = ops.SYMMETRIC_PAD' - else: - assert False - - padding = IR_node.get_attr('pads') - padding = convert_onnx_pad_to_tf(padding)[1:] - - code = "{:<15} = ops.pad({}, pattern={}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - padding, - mode) - return code - - def emit_Squeeze(self, IR_node): - IR_node.real_name = self.IR_graph.get_node( - IR_node.in_edges[0]).real_name - - def emit_Log(self, IR_node): - code = "{:<15} = _cntk.log({}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name) - return code - - def emit_Exp(self, IR_node): - code = "{:<15} = _cntk.exp({}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name) - return code - - def emit_Embedding(self, IR_node): - - codes = list() - codes.append("{}_P = cntk.one_hot({}, __weights_dict['{}']['weights'].shape[0])".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name)) - - codes.append("{:<15} = layers.Embedding(weights=__weights_dict['{}']['weights'])({}_P)".format( - IR_node.variable_name, - # IR_node.get_attr('output_dim'), - IR_node.name, - IR_node.variable_name)) - - return codes - - def emit_Reciprocal(self, IR_node): - code = "{:<15} = _cntk.reciprocal({}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name) - return code - - def emit_ReduceMean(self, IR_node): - code = "{:<15} = ops.reduce_mean({}, axis = ({}), name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - ', '.join('%s' % (i - 1) for i in IR_node.get_attr('axes')), - IR_node.name) - return code - - def emit_LRN(self, IR_node): - self.used_layers.add(IR_node.type) - code = "{:<15} = lrn({}, k=1, n={}, alpha={}, beta={}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.layer.attr['size'].i, - IR_node.layer.attr['alpha'].f, - IR_node.layer.attr['beta'].f, - IR_node.name) - return code - - # ?? - def emit_LeakRelu(self, IR_node): - code = "{:<15} = _cntk.relu({}) - {} * _cntk.relu(-{})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('alpha'), - self.parent_variable_name(IR_node)) - return code - - def emit_LeakyRelu(self, IR_node): - self.used_layers.add(IR_node.type) - code = "{:<15} = _leaky_relu({}, {}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('alpha'), - IR_node.name) - return code - - def emit_UpSampling2D(self, IR_node): - self.used_layers.add(IR_node.type) - code = "{:<15} = Upsampling2D({}, stride = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('scales')[0], - IR_node.name) - return code - - def emit_ConvTranspose(self, IR_node): - return self.emit_Conv(IR_node) - - def emit_yolo(self, IR_node): - self.used_layers.add(IR_node.type) - code = "{:<15} = {}".format( - IR_node.variable_name, - self.parent_variable_name(IR_node) - ) - # print(IR_node.layer) - self.yolo_parameter = [IR_node.get_attr('anchors'), - IR_node.get_attr('classes'), - IR_node.get_attr("ignore_thresh"), - IR_node.get_attr("jitter")] - # assert False - return code - - def emit_Crop(self, IR_node): - self.used_layers.add(IR_node.type) - output_shape = IR_node.get_attr('_output_shapes')[0] - output_shape = shape_to_list(output_shape)[1:] - code = "{:<15} = _crop({}, {}, {}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('border')[:2], - output_shape, - IR_node.real_name) - return code - - def emit_Relu6(self, IR_node): - codes = list() - codes.append(self.emit_Relu(IR_node)) - codes.append("{:<15} = cntk.clip({}, 0, 6, name='{}_clip')".format( - IR_node.variable_name + "_clip", - IR_node.variable_name, - IR_node.name - )) - IR_node.real_name = IR_node.name + '_clip' - return codes - - def emit_DepthwiseConv(self, IR_node): - return self.emit_Conv(IR_node) - - # def emit_Unstack(self, IR_node): - # num_str = "{}.shape[{}]".format(self.parent_variable_name(IR_node), IR_node.get_attr('axis')) - # axis = IR_node.get_attr('axis') - # parent_variable_shape = "list({}.shape)".format(self.parent_variable_name(IR_node) - # if self.IR_graph.get_parent(IR_node.name, [0]).type != 'Embedding' - # else self.parent_variable_name(IR_node)+'.E') - # if axis==1: - # shape_str = "tuple([{}[0]*{}[{}], 1].extend({}[{}+1:]))".format( - # parent_variable_shape, - # parent_variable_shape, - # str(axis), - # parent_variable_shape, - # str(axis)) - # else: - # shape_str = "tuple([{}[0]*{}[{}]].extend({}[1:{}]).append(1).extend({}[{}+1:]))".format( - # parent_variable_shape, - # parent_variable_shape, - # str(axis), - # parent_variable_shape, - # str(axis), - # parent_variable_shape, - # str(axis)) - # code = "{:<15} = cntk.reshape({}, {}, name='{}')".format( - # IR_node.variable_name, - # self.parent_variable_name(IR_node), - # shape_str, - # IR_node.variable_name) - # code = "{: <15} = cntk.reshape({}, {}.shape, name='{}')".format( - # IR_node.variable_name, - # self.parent_variable_name(IR_node), - # self.parent_variable_name(IR_node), - # IR_node.name - # ) - # return code - - def emit_Shape(self, IR_node): - parent_node = self.IR_graph.get_parent(IR_node.name, [0]) - code = "{:<15} = {}.shape".format( - IR_node.variable_name, - self.parent_variable_name(IR_node) if parent_node.type != 'Embedding' else self.parent_variable_name(IR_node)+".E") - return code - - def emit_Slice(self, IR_node): - starts = IR_node.get_attr('starts') - if len(starts) > 1: - starts = [starts[0], starts[-1]] + starts[1:-1] - ends = IR_node.get_attr('ends') - if len(ends) > 1: - ends = [ends[0], ends[-1]] + ends[1:-1] - extra_str = "" - for idx, _ in enumerate(starts): - if idx: - extra_str += ", " - extra_str += "{}:".format(starts[idx]) - if ends[idx]: - extra_str += "{}".format(ends[idx]) - code = "{:<15} = {}[{}]".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - extra_str) - return code - - def emit_Split(self, IR_node): - self.used_layers.add(IR_node.type) - axis = IR_node.get_attr('axis') - split_num = IR_node.get_attr('split') - code = "{:<15} = split(input={}, axis={}, split_num={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - str(axis), - str(split_num)) - - return code - - # def emit_Fill(self, IR_node): - # code = "{:<15} = cntk.Constant({}, {}, name='{}')".format( - # IR_node.variable_name, - # IR_node.get_attr('value'), - # self.parent_variable_name(IR_node), - # IR_node.name) - # return code - - def emit_Unsqueeze(self, IR_node): - code = "{:<15} = cntk.expand_dims({}, axis={}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('axes')[0], - IR_node.name) - return code - - def emit_Scope(self, IR_node): - pattern = IR_node.pattern - if pattern not in self.naive_scope_pattern and re.sub(r'(_\d+)*$', '', IR_node.pattern) not in self.naive_scope_pattern: - func = getattr(self, "_emit_" + pattern) - code = func(IR_node) - else: - code = "{:<15} = __{}({})".format( - IR_node.real_variable_name, - IR_node.pattern, - ', '.join(self.parent_variable_name(IR_node, s) for s in IR_node.in_edges)) - self._gen_scope_code(IR_node) - return code - - def _gen_scope_code(self, scope_node): - - def _scope_func(scope_name, params, code, return_var): - code = """ -def __{}({}): -{} - return {} - """.format(scope_name, params, code, ', '.join(return_var)) - return code - - if not self.layers_codes.get(scope_node.pattern, None): - body_code = str() - for node_name in scope_node.topology_list: - node = self.IR_graph.get_node(node_name) - node_type = node.type - - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - line = func(node) - if line != None: - body_code += " " + line + '\n' - else: - print( - "CntkEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(node) - - # param_code does not need parameter slice. - input_params = scope_node.input_params - param_code = ', '.join(input_params) - function_code = _scope_func( - scope_node.pattern, param_code, body_code, scope_node.return_variables) - - self.layers_codes[scope_node.pattern] = function_code - - def _emit_h_zero(self, IR_node): - code = "{:<15} = cntk.Constant({}, (1, {}))".format( - IR_node.variable_name, - IR_node.get_attr('fill_value'), - IR_node.get_attr('fill_size')) - return code - - def _layer_Crop(self): - self.add_body(0, ''' -def _crop(input, border, output_shape, **kwargs): - dim = len(output_shape) - output_shape = [output_shape[-1]] + output_shape[:-1] - ref_tensor = np.zeros(shape=output_shape, dtype=np.float32) - - input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) - layer = cntk.crop_manual(node_input=input, node_referent=ref_tensor, offset_x=border[0], offset_y=border[1]) - layer = cntk.transpose(layer, list(range(1, dim)) + [0]) - return layer -''') - - def _layer_LeakyRelu(self): - self.add_body(0, ''' -def _leaky_relu(x, leak, name): - return cntk.param_relu(cntk.constant((np.ones(x.shape)*leak).astype(np.float32)), x, name = name) -''') - - def _layer_yolo(self): - self.add_body(0, ''' -def yolo_parameter(): - return {} -'''.format(self.yolo_parameter)) - - def _layer_upsample(self): - self.add_body(0, ''' -def Upsampling2D(x, stride, name): - assert stride == 2 - xr = cntk.reshape(x, (x.shape[0], 1, x.shape[1], 1, x.shape[2])) - xx = cntk.splice(xr, xr, axis = -2) - xy = cntk.splice(xx, xx, axis = -4) - r = cntk.reshape(xy, (x.shape[0] * 2, x.shape[1] * 2, x.shape[2]), name = name) - return r -''') - - def _layer_LRN(self): - self.add_body(0, """ -def lrn(input, **kwargs): - dim = len(input.output.shape) - input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) - layer = BlockApiSetup.lrn(**kwargs)(input) - layer = cntk.transpose(layer, list(range(1, dim)) + [0]) - return layer -""") - - def _layer_FullyConnected(self): - self.add_body(0, """ -def dense(input, name, **kwargs): - w = __weights_dict[name]['weights'] - b = __weights_dict[name]['bias'] if 'bias' in __weights_dict[name] else None - return BlockApiSetup.linear(output_shape=w.shape[1], input_shape=w.shape[0], scale_init=w, bias_init=b, name=name, **kwargs)(input) -""") - - def _layer_Conv(self): - self.add_body(0, """ -def convolution(input, is_transpose, name, **kwargs): - dim = __weights_dict[name]['weights'].ndim - - if is_transpose: - weight = np.transpose(__weights_dict[name]['weights'], [dim - 2, dim - 1] + list(range(0, dim - 2))) - kwargs.pop('groups', None) - else: - weight = np.transpose(__weights_dict[name]['weights'], [dim - 1, dim - 2] + list(range(0, dim - 2))) - w = cntk.Parameter(init=weight, name=name + '_weight') - - input = cntk.transpose(input, [dim - 2] + list(range(0, dim - 2))) - - if is_transpose: - layer = ops.convolution_transpose(w, input, **kwargs) - else: - layer = ops.convolution(w, input, **kwargs) - if 'bias' in __weights_dict[name]: - bias = np.reshape(__weights_dict[name]['bias'], [-1] + [1] * (dim - 2)) - b = cntk.Parameter(init=bias, name=name + '_bias') - layer = layer + b - layer = cntk.transpose(layer, list(range(1, dim - 1)) + [0]) - return layer -""") - - def _layer_Pool(self): - self.add_body(0, """ -def pooling(input, **kwargs): - dim = len(input.output.shape) - input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) - layer = ops.pooling(input, **kwargs) - layer = cntk.transpose(layer, list(range(1, dim)) + [0]) - return layer -""") - - def _layer_GlobalPooling(self): - self.add_body(0, """ -def global_pooling(input, type, **kwargs): - dim = len(input.output.shape) - input = cntk.transpose(input, [dim - 1] + list(range(0, dim - 1))) - layer = layers.GlobalMaxPooling(**kwargs)(input) if type == 'MAX' else layers.GlobalAveragePooling(**kwargs)(input) - layer = cntk.transpose(layer, list(range(1, dim)) + [0]) - return layer -""") - - def _layer_BatchNorm(self): - self.add_body(0, """ -def batch_normalization(input, name, epsilon, **kwargs): - mean = cntk.Parameter(init = __weights_dict[name]['mean'], - name = name + "_mean") - var = cntk.Parameter(init = __weights_dict[name]['var'], - name = name + "_var") - - layer = (input - mean) / cntk.sqrt(var + epsilon) - if 'scale' in __weights_dict[name]: - scale = cntk.Parameter(init = __weights_dict[name]['scale'], - name = name + "_scale") - layer = scale * layer - - if 'bias' in __weights_dict[name]: - bias = cntk.Parameter(init = __weights_dict[name]['bias'], - name = name + "_bias") - layer = layer + bias - - return layer -""") - - def _layer_Split(self): - self.add_body(0, """ -def split(input, axis, split_num): - split_len = input.shape[axis] - res = [] - st = 0 - for i in range(split_num): - ed = st + split_len//split_num - res.append(cntk.slice(input, axis, st, ed)) - st += split_len//split_num - - return res - """) -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import cntk as _cntk -from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph - - -class CntkGraphNode(GraphNode): - - def __init__(self, layer): - super(CntkGraphNode, self).__init__(layer) - - @property - def name(self): - return self.layer.uid - - @property - def type(self): - if hasattr(self.layer, 'op_name'): - return self.layer.op_name - elif self.layer.is_input: - return "DataInput" - else: - raise NotImplementedError() - - @property - def cntk_layer(self): - return self.layer - - def get_attr(self, name, default_value=None): - if self.layer.is_block: - return self.layer.block_root.attributes[name] - else: - return self.layer.attributes[name] - - -class CntkGraph(Graph): - - def __init__(self, model): - # sanity check. - pass - - self.weights = dict() - self.visited = set() - super(CntkGraph, self).__init__(model) - - def _traverse_graph(self, son_node): - if not son_node.uid in self.visited: - self.visited.add(son_node.uid) - - if son_node.is_block: - inputs = [input for _, - input in son_node.block_arguments_mapping] - - else: - inputs = son_node.inputs - - for input_node in inputs: - if input_node.is_output: - input_node = input_node.owner - if not input_node.uid in self.layer_map: - self.layer_map[input_node.uid] = CntkGraphNode( - input_node) - self._make_connection(input_node.uid, son_node.uid) - self._traverse_graph(input_node) - - elif input_node.is_input: - if not input_node.uid in self.layer_map: - self.layer_map[input_node.uid] = CntkGraphNode( - input_node) - self._make_connection(input_node.uid, son_node.uid) - - elif input_node.is_placeholder: - raise NotImplementedError( - "PlaceHolder of placeholder is not supported.") - - def build(self): - if len(self.model.outputs) > 1: - for idx, output in enumerate(self.model.outputs): - if len(output.shape) > 0: - eval_node = idx - break - - output = self.model[eval_node].owner - else: - output = self.model.outputs[0].owner - - self.layer_map[output.uid] = CntkGraphNode(output) - self._traverse_graph(output) - - super(CntkGraph, self).build() - - -""" - def __traverse_graph(self, node): - if node.uid in self.visited: - return - - self.visited.add(node.uid) - - if isinstance(node, _cntk.Function) and node.is_block: - composite = node.block_root - - # BlockFunction node - mapping = node.block_arguments_mapping - - # redirect the composite's inputs to the true inputs - stack.extend([(actual_input, depth-1) for _, actual_input in mapping]) # traverse into actual composite inputs - visited |= {comp_input.uid for comp_input, _ in mapping} # don't traverse into the mapped-away inputs - stack.append((composite, depth-1)) - # BlockFunctions are short-circuited, and not added to accum[] - try: - # Function node - stack = list((i, depth) for i in node.root_function.inputs) + stack - except AttributeError: - # OutputVariable node - try: - if node.is_output: - stack.insert(0, (node.owner, depth)) - visited.add(node.uid) - continue - except AttributeError: - pass - - if visitor(node): - if isinstance(node, Variable): - if node.is_parameter: - node = node.as_parameter() - elif node.is_constant: - node = node.as_constant() - - accum.append(node) - - visited.add(node.uid) - - - # def build(self): - # _traverse_graph(self, self.model.root_function) - # super(CntkGraph, self).build() -""" -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import os -import numpy as np -from six.moves import xrange -import cntk as _cntk -from mmdnn.conversion.cntk.cntk_graph import CntkGraph -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.common.DataStructure.parser import Parser - - -class CntkParser(Parser): - - dtype_map = { - 0: graph_pb2.DT_UNDEFINED, - np.float32: graph_pb2.DT_FLOAT32, - np.float64: graph_pb2.DT_FLOAT64, - 3: graph_pb2.DT_INT32, - 4: graph_pb2.DT_UINT8, - 5: graph_pb2.DT_INT16, - 6: graph_pb2.DT_INT8, - 7: graph_pb2.DT_STRING, - 9: graph_pb2.DT_INT64 - } - - @property - def src_graph(self): - return self.cntk_graph - - def __init__(self, model, dest_nodes=None): - super(CntkParser, self).__init__() - - if not os.path.exists(model): - raise ValueError('Cntk model [{}] can not be found!'.format(model)) - model = _cntk.Function.load(model) - self.weight_loaded = True - - # Build network graph - self.cntk_graph = CntkGraph(model) - self.cntk_graph.build() - - @staticmethod - def _convert_padding_to_IR(kernel_shape, auto_pad): - lower = [] - upper = [] - for idx in range(0, len(kernel_shape)): - if auto_pad[idx] == False: - lower += [0] - upper += [0] - else: - q = kernel_shape[idx] // 2 - lower += [q] if kernel_shape[idx] % 2 else [q - 1] - upper += [q] - - return [0] + lower + [0, 0] + upper + [0] - - def _convert_identity_operation(self, source_node, start_edge=0, end_edge=None, new_op=None, shape_transpose=True): - IR_node = self.IR_graph.node.add() - CntkParser._copy_and_reop( - source_node, IR_node, new_op, shape_transpose) - self.convert_inedge(source_node, IR_node, start_edge, end_edge) - return IR_node - - def gen_IR(self): - for layer in self.src_graph.topological_sort: - current_node = self.src_graph.get_node(layer) - node_type = current_node.type - if hasattr(self, "rename_" + node_type): - func = getattr(self, "rename_" + node_type) - func(current_node) - else: - self.rename_UNKNOWN(current_node) - - @staticmethod - def _copy_and_reop(source_node, IR_node, new_op=None, shape_transpose=False): - if new_op == None: - new_op = source_node.type - IR_node.name = source_node.real_name - IR_node.op = new_op - - kwargs = {} - - if hasattr(source_node.layer, 'dtype'): - assert source_node.layer.dtype in CntkParser.dtype_map, 'type [{}] is unknown.'.format( - source_node.layer.dtype) - IR_node.attr["dtype"].type = CntkParser.dtype_map[source_node.layer.dtype] - - if hasattr(source_node.layer, 'shape'): - shape = (-1,) + source_node.layer.shape - if shape_transpose: - shape = CntkParser.channel_first_shape_to_IR(shape) - shape = list_to_shape(shape) - kwargs['_output_shapes'] = [shape] - - assign_IRnode_values(IR_node, kwargs) - - def _fuse_bias_node(self, source_node): - next_node = self.src_graph.get_son(source_node.name, [0]) - if next_node is None or next_node.type != 'Plus' or not next_node.layer.parameters: - return False - - next_node.covered = True - next_node.real_name = source_node.real_name - B = next_node.layer.parameters[0].asarray() - self.set_weight(source_node.name, 'bias', B) - - return True - - @staticmethod - def _print_layer(source_node): - print("Layer: ", source_node.layer) - print("Parameters: ", source_node.layer.parameters) - print("Attributes: ", source_node.layer.attributes) - for in_node in source_node.layer.inputs: - print(in_node) - - def rename_UNKNOWN(self, source_node): - print("Cntk Parser has not supported operator [%s] with name [%s]." - % (source_node.type, source_node.name)) - - self._print_layer(source_node) - assert False - - @staticmethod - def get_ndarray(variable): - if variable.is_parameter: - return variable.as_parameter().asarray() - - elif variable.is_constant: - return variable.as_constant().asarray() - - else: - raise ValueError("Unknown variable [{}].".format(variable)) - - @staticmethod - def _get_attribute(source_node, attribute_name): - if attribute_name in source_node.attributes: - return source_node.attributes - - node = source_node.block_root - while not attribute_name in node.attributes: - node = node.inputs[0].owner - - return node.attributes - - def rename_Convolution(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op="Conv") - - for input in source_node.layer.inputs: - if input.name.endswith("W"): - W = self.get_ndarray(input) - break - - W = self.channel_first_conv_kernel_to_IR(W) - self.set_weight(source_node.name, 'weights', W) - - attributes = CntkParser._get_attribute(source_node.layer, 'strides') - - kwargs = dict() - kwargs['strides'] = [1] + list(attributes['strides'])[1:] + [1] - kwargs['dilations'] = [1] + list(attributes['dilation'])[1:] + [1] - kwargs['kernel_shape'] = list(W.shape) - padding = attributes['autoPadding'][1:] - - for pad in padding: - assert pad == padding[0] - - kwargs['auto_pad'] = 'SAME_LOWER' if padding[0] else 'VALID' - kwargs['pads'] = self._convert_padding_to_IR( - kwargs['kernel_shape'][:-2], padding) - - kwargs['use_bias'] = self._fuse_bias_node(source_node) - - assign_IRnode_values(IR_node, kwargs) - - def rename_ReLU(self, source_node): - self._convert_identity_operation(source_node, new_op='Relu') - - def rename_Relu6(self, source_node): - self._convert_identity_operation(source_node) - - def rename_Plus(self, source_node): - if not source_node.covered: - assert not source_node.layer.parameters - IR_node = self._convert_identity_operation( - source_node, new_op='Add') - - def rename_Minus(self, source_node): - if not source_node.covered: - assert not source_node.layer.parameters - self._convert_binary_operator(source_node, new_op='Sub') - - def rename_Sub(self, source_node): - self._convert_identity_operation(source_node) - - def rename_Reshape(self, source_node): - IR_node = self._convert_identity_operation(source_node) - new_shape = source_node.get_attr('newShape') - kwargs = {'shape': self.channel_first_shape_to_IR(new_shape)} - assign_IRnode_values(IR_node, kwargs) - - def rename_Times(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='FullyConnected') - - W = source_node.layer.parameters[0].asarray().squeeze() - self.set_weight(source_node.name, 'weights', W) - - kwargs = dict() - kwargs['units'] = W.shape[-1] - kwargs['use_bias'] = self._fuse_bias_node(source_node) - assign_IRnode_values(IR_node, kwargs) - - def rename_MaxPooling(self, source_node): - if source_node.layer.is_block: - source_node.layer = source_node.layer.block_root.owner - - self.rename_Pooling(source_node) - - def rename_AveragePooling(self, source_node): - self.rename_Pooling(source_node) - - def rename_Slice(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Slice') - kwargs = dict() - kwargs['axis'] = source_node.get_attr('axis')[-1] + 1 - kwargs['ends'] = source_node.get_attr('endIndex') - kwargs['starts'] = source_node.get_attr('beginIndex') - kwargs['strides'] = source_node.get_attr('sliceStrides') - assign_IRnode_values(IR_node, kwargs) - - def rename_Splice(self, source_node): - if len(source_node.in_edges) == 1: - source_node.in_edges.append(source_node.in_edges[0]) - IR_node = self._convert_identity_operation( - source_node, new_op='Concat') - assign_IRnode_values( - IR_node, {'axis': source_node.get_attr('axis')[-1] + 1}) - - def rename_StableSigmoid(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Sigmoid') - - def rename_BinaryCrossEntropy(self, source_node): - pass - - def rename_Pooling(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Pool') - dim = len(IR_node.attr['_output_shapes'].list.shape[0].dim) - kwargs = {} - - # strides - kwargs['strides'] = list(source_node.get_attr('strides')) + [1] - if len(kwargs['strides']) < dim: - kwargs['strides'] = [1] + kwargs['strides'] - - # window_shape - kwargs['kernel_shape'] = list( - source_node.get_attr('poolingWindowShape')) + [1] - if len(kwargs['kernel_shape']) < dim: - kwargs['kernel_shape'] = [1] + kwargs['kernel_shape'] - - # pool type - pool_type = source_node.get_attr('poolingType') - if pool_type == _cntk.MAX_POOLING: - kwargs['pooling_type'] = 'MAX' - elif pool_type == _cntk.AVG_POOLING: - kwargs['pooling_type'] = 'AVG' - else: - raise ValueError("Unknown pooling type [{}].".format(pool_type)) - - # padding - padding = source_node.get_attr('autoPadding') - if len(padding) >= dim - 1: - padding = padding[1:] - elif len(padding) < dim - 2: - padding.extend([padding[-1]] * (dim - len(padding) - 2)) - for pad in padding: - assert pad == padding[-1] - kwargs['auto_pad'] = 'SAME_LOWER' if padding[0] else 'VALID' - kwargs['pads'] = self._convert_padding_to_IR( - kwargs['kernel_shape'][1:-1], padding) - - assign_IRnode_values(IR_node, kwargs) - - def rename_DataInput(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='DataInput') - shape = [-1] + list(source_node.layer.shape) - assign_IRnode_values(IR_node, {'shape': list_to_shape( - self.channel_first_shape_to_IR(shape))}) - - def rename_BatchNormalization(self, source_node): - kwargs = dict() - kwargs['scale'] = False - kwargs['bias'] = False - for param in source_node.layer.inputs: - if param.name.endswith('scale'): - self.set_weight(source_node.name, 'scale', - self.get_ndarray(param).flatten()) - kwargs['scale'] = True - - elif param.name.endswith('bias'): - self.set_weight(source_node.name, 'bias', - self.get_ndarray(param).flatten()) - kwargs['bias'] = True - - elif param.name.lower().endswith('mean'): - self.set_weight(source_node.name, 'mean', - self.get_ndarray(param).flatten()) - - elif param.name.lower().endswith('variance'): - self.set_weight(source_node.name, 'var', - self.get_ndarray(param).flatten()) - - IR_node = self._convert_identity_operation( - source_node, end_edge=1, new_op='BatchNorm') - kwargs['epsilon'] = source_node.get_attr('epsilon') - kwargs['axis'] = -1 - assign_IRnode_values(IR_node, kwargs) - - def _add_constant_node(self, constant_node, IR_node): - new_node = self.IR_graph.node.add() - new_node.name = constant_node.uid - new_node.op = 'Constant' - value = np.atleast_1d(self.get_ndarray(constant_node)) - self.set_weight(new_node.name, 'value', value) - IR_node.input.append(new_node.name) - - def _convert_binary_operator(self, source_node, new_op): - IR_node = self._convert_identity_operation(source_node, new_op=new_op) - for in_node in source_node.layer.inputs: - if in_node.is_constant: - self._add_constant_node(in_node, IR_node) - - def rename_ElementTimes(self, source_node): - if source_node.layer.inputs[0] == source_node.layer.inputs[1]: - # TODO: Handle square - pass - - self._convert_binary_operator(source_node, 'Mul') - - def rename_Log(self, source_node): - self._convert_identity_operation(source_node) - - def rename_Exp(self, source_node): - self._convert_identity_operation(source_node) - - def rename_Reciprocal(self, source_node): - self._convert_identity_operation(source_node) - - def rename_Dropout(self, source_node): - # self._print_layer(source_node) - # print (source_node.name) - # print (self.src_graph.get_parent(source_node.name, [0]).real_name) - # assert False - source_node.real_name = self.src_graph.get_parent( - source_node.name, [0]).real_name - - def rename_Dense(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='FullyConnected', shape_transpose=False) - for param in source_node.layer.inputs: - if param.name.endswith('W'): - w = np.squeeze(self.get_ndarray(param)) - if w.ndim > 2: - w = np.transpose(w, list(range(1, w.ndim - 1)) + [0, -1]) - w = np.reshape(w, [-1, w.shape[-1]]) - self.set_weight(source_node.name, 'weights', w) - assign_IRnode_values(IR_node, {'units': w.shape[-1]}) - - elif param.name.endswith('b'): - self.set_weight(source_node.name, 'bias', - self.get_ndarray(param)) - assign_IRnode_values(IR_node, {'use_bias': True}) - - def rename_Convolution2D(self, source_node): - assert source_node.layer.is_block - - # Convolution - kwargs = dict() - conv_IR_node = self.IR_graph.node.add() - conv_node = source_node.layer.block_root.inputs[0].owner.inputs[0].owner - - conv_IR_node.name = conv_node.uid - conv_IR_node.op = 'Conv' - conv_IR_node.input.append(self.get_parent( - source_node.name, [0]).real_name) - - # Kernel - conv_weight = source_node.layer.block_root.inputs[0].owner.inputs[0].owner.inputs[0] - conv_weight = self.get_ndarray(conv_weight) - W = self.channel_first_conv_kernel_to_IR(conv_weight) - self.set_weight(conv_IR_node.name, 'weights', W) - - # Attributes - conv_attr = source_node.layer.block_root.inputs[0].owner.inputs[0].owner.attributes - - kwargs['strides'] = [1] + list(conv_attr['strides'])[1:] + [1] - kwargs['dilations'] = [1] + list(conv_attr['dilation'])[1:] + [1] - kwargs['kernel_shape'] = list(W.shape) - padding = conv_attr['autoPadding'][1:] - - for pad in padding: - assert pad == padding[0] - - kwargs['auto_pad'] = 'SAME_LOWER' if padding[0] else 'VALID' - kwargs['pads'] = self._convert_padding_to_IR( - kwargs['kernel_shape'][:-2], padding) - - kwargs['use_bias'] = True - - assign_IRnode_values(conv_IR_node, kwargs) - - # Bias - plus = source_node.layer.block_root.inputs[0].owner.inputs[1] - plus = np.squeeze(self.get_ndarray(plus)) - self.set_weight(conv_IR_node.name, 'bias', plus) - - # Activation - activation = source_node.layer.block_root.owner.op_name - - activation_IR = self.IR_graph.node.add() - activation_IR.name = source_node.name - activation_IR.input.append(conv_IR_node.name) - if (activation == 'ReLU'): - activation_IR.op = 'Relu' - else: - raise ValueError() - - def rename_Activation(self, source_node): - assert source_node.layer.is_block - - op = source_node.layer.root_function.owner.name - - if op.startswith('relu'): - new_op = 'Relu' - else: - raise ValueError() - - self._convert_identity_operation(source_node, new_op=new_op) -def save_model(MainModel, network_filepath, weight_filepath, dump_filepath): - model = MainModel.KitModel(weight_filepath) - model.save(dump_filepath) - print('CNTK model file is saved as [{}], generated by [{}.py] and [{}].'.format( - dump_filepath, network_filepath, weight_filepath)) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import division -import os -import sys -import numpy as np -from six import text_type, binary_type, integer_types -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 - - -__all__ = ["assign_IRnode_values", "convert_onnx_pad_to_tf", 'convert_tf_pad_to_onnx', - 'compute_tf_same_padding', 'is_valid_padding', 'download_file', - 'shape_to_list', 'list_to_shape'] - - -def assign_attr_value(attr, val): - from mmdnn.conversion.common.IR.graph_pb2 import TensorShape - '''Assign value to AttrValue proto according to data type.''' - if isinstance(val, bool): - attr.b = val - elif isinstance(val, integer_types): - attr.i = val - elif isinstance(val, float): - attr.f = val - elif isinstance(val, binary_type) or isinstance(val, text_type): - if hasattr(val, 'encode'): - val = val.encode() - attr.s = val - elif isinstance(val, TensorShape): - attr.shape.MergeFromString(val.SerializeToString()) - elif isinstance(val, list): - if not val: - return - if isinstance(val[0], integer_types): - attr.list.i.extend(val) - elif isinstance(val[0], TensorShape): - attr.list.shape.extend(val) - elif isinstance(val[0], float): - attr.list.f.extend(val) - else: - raise NotImplementedError( - 'AttrValue cannot be of list[{}].'.format(val[0])) - elif isinstance(val, np.ndarray): - assign_attr_value(attr, val.tolist()) - else: - raise NotImplementedError('AttrValue cannot be of %s' % type(val)) - - -def assign_IRnode_values(IR_node, val_dict): - for name, val in val_dict.items(): - assign_attr_value(IR_node.attr[name], val) - - -# For padding -def convert_tf_pad_to_onnx(pads): - pads = np.reshape(pads, -1).tolist() - dims = len(pads) - assert dims % 2 == 0 - ret = [] - for idx in range(0, dims, 2): - ret.append(pads[idx]) - for idx in range(1, dims, 2): - ret.append(pads[idx]) - return ret - - -def convert_onnx_pad_to_tf(pads): - return np.transpose(np.array(pads).reshape([2, -1])).reshape(-1, 2).tolist() - - -def is_valid_padding(pads): - return sum(np.reshape(pads, -1)) == 0 - - -def shape_to_list(shape): - return [dim.size for dim in shape.dim] - - -def list_to_shape(shape): - ret = graph_pb2.TensorShape() - for dim in shape: - new_dim = ret.dim.add() - new_dim.size = dim - return ret - - -def compute_tf_same_padding(input_shape, kernel_shape, strides, data_format='NHWC'): - """ Convert [SAME] padding in tensorflow, keras to onnx pads, - i.e. [x1_begin, x2_begin...x1_end, x2_end,...] """ - # print (input_shape) - # print (kernel_shape) - # print (strides) - if data_format.startswith('NC'): - # Not tested - input_shape = input_shape[2:] - remove_dim = len(strides) - len(input_shape) - if remove_dim > 0: - strides = strides[remove_dim::] - - else: - input_shape = input_shape[1:-1] - remove_dim = len(input_shape) - len(strides) + 1 - if remove_dim < 0: - strides = strides[1:remove_dim] - - # print (input_shape) - # print (kernel_shape) - # print (strides) - - up_list = [0] - down_list = [0] - - for idx in range(0, len(input_shape)): - # kernel_shape[idx] = (kernel_shape[idx] - 1) * dilation_rate + 1 - output_shape = (input_shape[idx] + strides[idx] - 1) // strides[idx] - this_padding = (output_shape - 1) * \ - strides[idx] + kernel_shape[idx] - input_shape[idx] - this_padding = max(0, this_padding) - up_list.append(this_padding // 2) - down_list.append(this_padding - this_padding // 2) - - # print ([0] + up_list + [0] + down_list if data_format.startswith('NC') else up_list + [0] + down_list + [0]) - # print ('-----------------------------------------------------') - return [0] + up_list + [0] + down_list if data_format.startswith('NC') else up_list + [0] + down_list + [0] - - -# network library -def sizeof_fmt(num, suffix='B'): - for unit in ['', 'Ki', 'Mi', 'Gi', 'Ti', 'Pi', 'Ei', 'Zi']: - if abs(num) < 1024.0: - return "%3.1f %s%s" % (num, unit, suffix) - num /= 1024.0 - return "%.1f %s%s" % (num, 'Yi', suffix) - - -def _progress_check(count, block_size, total_size): - read_size = count * block_size - read_size_str = sizeof_fmt(read_size) - if total_size > 0: - percent = int(count * block_size * 100 / total_size) - percent = min(percent, 100) - sys.stdout.write("\rprogress: {} downloaded, {}%.".format( - read_size_str, percent)) - if read_size >= total_size: - sys.stdout.write("\n") - else: - sys.stdout.write("\rprogress: {} downloaded.".format(read_size_str)) - sys.stdout.flush() - - -def _single_thread_download(url, file_name): - from six.moves import urllib - result, _ = urllib.request.urlretrieve(url, file_name, _progress_check) - return result - - -def _downloader(start, end, url, filename): - import requests - headers = {'Range': 'bytes=%d-%d' % (start, end)} - r = requests.get(url, headers=headers, stream=True) - with open(filename, "r+b") as fp: - fp.seek(start) - var = fp.tell() - fp.write(r.content) - - -def _multi_thread_download(url, file_name, file_size, thread_count): - import threading - fp = open(file_name, "wb") - fp.truncate(file_size) - fp.close() - - part = file_size // thread_count - for i in range(thread_count): - start = part * i - if i == thread_count - 1: - end = file_size - else: - end = start + part - - t = threading.Thread(target=_downloader, kwargs={ - 'start': start, 'end': end, 'url': url, 'filename': file_name}) - t.setDaemon(True) - t.start() - - main_thread = threading.current_thread() - for t in threading.enumerate(): - if t is main_thread: - continue - t.join() - - return file_name - - -def download_file(url, directory='./', local_fname=None, force_write=False, auto_unzip=False, compre_type=''): - """Download the data from source url, unless it's already here. - - Args: - filename: string, name of the file in the directory. - work_directory: string, path to working directory. - source_url: url to download from if file doesn't exist. - - Returns: - Path to resulting file. - """ - - if not os.path.isdir(directory): - os.mkdir(directory) - - if not local_fname: - k = url.rfind('/') - local_fname = url[k + 1:] - - local_fname = os.path.join(directory, local_fname) - - if os.path.exists(local_fname) and not force_write: - print("File [{}] existed!".format(local_fname)) - return local_fname - - else: - print("Downloading file [{}] from [{}]".format(local_fname, url)) - try: - import wget - ret = wget.download(url, local_fname) - print("") - except: - ret = _single_thread_download(url, local_fname) - - if auto_unzip: - if ret.endswith(".tar.gz") or ret.endswith(".tgz"): - try: - import tarfile - tar = tarfile.open(ret) - tar.extractall(directory) - tar.close() - except: - print("Unzip file [{}] failed.".format(ret)) - - elif ret.endswith('.zip'): - try: - import zipfile - zip_ref = zipfile.ZipFile(ret, 'r') - zip_ref.extractall(directory) - zip_ref.close() - except: - print("Unzip file [{}] failed.".format(ret)) - return ret - - -""" - r = requests.head(url) - try: - file_size = int(r.headers['content-length']) - return _multi_thread_download(url, local_fname, file_size, 5) - - except: - # not support multi-threads download - return _single_thread_download(url, local_fname) - - return result -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import division - -import os -import numpy as np -from six import string_types as _string_types -from mmdnn.conversion.common.IR.IR_graph import IRGraph, IRGraphNode -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.emitter import Emitter -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.coreml.coreml_utils import _infer_coreml_input_shape - -from coremltools.models.neural_network import NeuralNetworkBuilder as _NeuralNetworkBuilder -from coremltools.models import datatypes -from coremltools.models import MLModel as _MLModel -from coremltools.models.utils import save_spec as _save_spec - - -class CoreMLEmitter(Emitter): - - def __init__(self, architecture, weight): - super(CoreMLEmitter, self).__init__() - if os.path.exists(architecture) == False: - raise ValueError( - "IR architecture file [{}] is not found.".format(architecture)) - else: - self.IR_graph = IRGraph(architecture) - self.IR_graph.build() - - if os.path.exists(weight) == False: - raise ValueError( - "IR weight file [{}] is not found.".format(weight)) - else: - self._load_weights(weight) - - def _get_inout(self): - input_features = [] - output_features = [] - for input_node in self.IR_graph.input_layers: - if self.IR_graph.get_node(input_node).type == 'Const': - continue - shape = shape_to_list(self.IR_graph.get_node( - input_node).get_attr('shape')) - shape = _infer_coreml_input_shape(shape) - input_features.append((str(input_node), shape)) - print("CoreML Model Input Layer: [{}] {}".format( - input_node, shape)) - - for output_node in self.IR_graph.output_layers: - - node = self.IR_graph.get_node(output_node) - - if node.type == 'Pack': - continue - - node.out_edges.append(node.name) - shape = node.get_attr('_output_shapes') - if shape: - shape = shape_to_list(shape[0]) - else: - shape = [1] - - shape = _infer_coreml_input_shape(shape) - - output_features.append((str(output_node), shape)) - - print("CoreML Model Output Layer: [{}] {}".format( - output_node, shape)) - - return list(input_features), list(output_features) - - def _connect_coreml_layers(self): - for layer in self.builder.nn_spec.layers: - for i, out_node in enumerate(layer.output): - layer.output[i] = self.IR_graph.get_node(out_node).real_name - - def gen_model(self, - input_names=None, - output_names=None, - image_input_names=None, - is_bgr=False, - red_bias=0.0, - green_bias=0.0, - blue_bias=0.0, - gray_bias=0.0, - image_scale=1.0, - class_labels=None, - predicted_feature_name=None, - predicted_probabilities_output=''): - - input_features, output_features = self._get_inout() - is_classifier = class_labels is not None - mode = 'classifier' if is_classifier else None - self.builder = _NeuralNetworkBuilder( - input_features, output_features, mode=mode) - - for layer in self.IR_graph.topological_sort: - current_node = self.IR_graph.get_node(layer) - print("Converting layer {}({})".format( - current_node.name, current_node.type)) - node_type = current_node.type - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - func(current_node) - else: - print( - "CoreMLEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(current_node) - assert False - - # Add classifier classes (if applicable) - if is_classifier: - classes_in = class_labels - if isinstance(classes_in, _string_types): - if not os.path.isfile(classes_in): - raise ValueError( - "Path to class labels [{}] does not exist.".format(classes_in)) - with open(classes_in, 'r') as f: - classes = f.read() - classes = classes.splitlines() - elif type(classes_in) is list: # list[int or str] - classes = classes_in - else: - raise ValueError( - 'Class labels must be a list of integers / strings, or a file path') - - if predicted_feature_name is not None: - self.builder.set_class_labels(classes, predicted_feature_name=predicted_feature_name, - prediction_blob=predicted_probabilities_output) - else: - self.builder.set_class_labels(classes) - - # Set pre-processing paramsters - self.builder.set_pre_processing_parameters( - image_input_names=[input_features[0][0]], - # image_input_names, - is_bgr=is_bgr, - red_bias=red_bias, - green_bias=green_bias, - blue_bias=blue_bias, - gray_bias=gray_bias, - image_scale=image_scale) - - # Return the protobuf spec - # model = _MLModel(self.builder.spec) - - print(self.builder.spec.description) - - return self.builder.spec, input_features, output_features - - @staticmethod - def _get_padding(IR_node): - - auto_pad = IR_node.get_attr('auto_pad') - if auto_pad is not None: - if auto_pad == 'VALID': - pass - else: - return 'SAME' - - pads = IR_node.get_attr('pads', [0, 0, 0, 0, 0, 0, 0, 0]) - - return pads - - def emit_Mul(self, IR_node): - """ - Not implement yet - """ - pass - # if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[IR_node.name]: - # pass - - # self._emit_merge(IR_node,'DOT') - - def _emit_merge(self, IR_node, func): - """ - Convert concat layer to coreml. - """ - # Get input and output names - input_names = [self.IR_graph.get_node( - inp).real_name for inp in IR_node.in_edges] - - self.builder.add_elementwise(name=IR_node.name, input_names=input_names, - output_name=IR_node.name, mode=func) - - def emit_Conv(self, IR_node): - """ - Convert convolution layer to coreml. - """ - has_bias = IR_node.get_attr('use_bias', False) - is_deconv = False - - # Dimensions and weights - kernel_shape = IR_node.get_attr('kernel_shape') - - if len(kernel_shape) == 4: - height, width, input_channels, output_channels = kernel_shape - elif len(kernel_shape) == 5: - depth, height, width, input_channels, output_channels = kernel_shape - else: - raise NotImplementedError() - - output_shape = None - - # W should have shape (height, width, kernel_channels, output_channels), where kernel_channel = input_channels / groups - W = self.weights_dict[IR_node.name]['weights'] - b = self.weights_dict[IR_node.name]['bias'] if has_bias else None - - stride_height, stride_width = IR_node.get_attr( - 'strides')[1], IR_node.get_attr('strides')[2] - - # Dilations - dilations = IR_node.get_attr('dilations', [1, 1]) - if is_deconv and not dilations == [1, 1]: - raise ValueError( - "Unsupported non-unity dilation for Deconvolution layer") - - groups = IR_node.get_attr('group', 1) - - kernel_channels = input_channels // groups - padding = self._get_padding(IR_node) - - if isinstance(padding, list): - border_mode = "valid" - # see protobuf - padding_top, padding_left, padding_bottom, padding_right = padding[ - 1], padding[2], padding[5], padding[6] - else: - border_mode = "same" - padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0 - - input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name - - self.builder.add_convolution(name=IR_node.real_name, - kernel_channels=kernel_channels, - output_channels=output_channels, - height=height, - width=width, - stride_height=stride_height, - stride_width=stride_width, - border_mode=border_mode, - groups=groups, - W=W, - b=b, - has_bias=has_bias, - is_deconv=is_deconv, - output_shape=output_shape, - input_name=input_name, - padding_top=padding_top, - padding_left=padding_left, - padding_bottom=padding_bottom, - padding_right=padding_right, - output_name=IR_node.real_name, - dilation_factors=dilations) - - def emit_ConvTranspose(self, IR_node): - """ - Convert convolution layer to coreml. - """ - - # assert False - has_bias = IR_node.get_attr('use_bias', False) - is_deconv = True - - # Get the weights. - - kernel_shape = IR_node.get_attr('kernel_shape') - - if len(kernel_shape) == 4: - height, width, output_channels, kernel_channels = kernel_shape - W = self.weights_dict[IR_node.name]['weights'] - W = W.reshape(kernel_shape) - W = W.transpose((0, 1, 3, 2)) - elif len(kernel_shape) == 5: - depth, height, width, output_channels, kernel_channels = kernel_shape - W = self.weights_dict[IR_node.name]['weights'] - W = W.reshape(kernel_shape) - W = W.transpose((0, 1, 2, 4, 3)) - else: - raise NotImplementedError() - - output_shape = None - b = self.weights_dict[IR_node.name]['bias'] if has_bias else None - - stride_height, stride_width = IR_node.get_attr( - 'strides')[1], IR_node.get_attr('strides')[2] - - # Dilations - dilations = IR_node.get_attr('dilations', [1, 1]) - if is_deconv and not dilations == [1, 1]: - raise ValueError( - "Unsupported non-unity dilation for Deconvolution layer") - - groups = IR_node.get_attr('group', 1) - - padding = self._get_padding(IR_node) - - if isinstance(padding, list): - border_mode = "valid" - # see protobuf - padding_top, padding_left, padding_bottom, padding_right = padding[ - 1], padding[2], padding[5], padding[6] - else: - border_mode = "same" - padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0 - - input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name - - self.builder.add_convolution(name=IR_node.real_name, - kernel_channels=kernel_channels, - output_channels=output_channels, - height=height, - width=width, - stride_height=stride_height, - stride_width=stride_width, - border_mode=border_mode, - groups=groups, - W=W, - b=b, - has_bias=has_bias, - is_deconv=is_deconv, - output_shape=output_shape, - input_name=input_name, - padding_top=padding_top, - padding_left=padding_left, - padding_bottom=padding_bottom, - padding_right=padding_right, - output_name=IR_node.real_name, - dilation_factors=dilations) - - def emit_DepthwiseConv(self, IR_node): - # depth-wise convolution - - input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name - kernel_channels = 1 - is_deconv = False - has_bias = IR_node.get_attr('use_bias', False) - - depth_multiplier = IR_node.get_attr('kernel_shape')[-1] - - W = self.weights_dict[IR_node.name]['weights'] - height, width, channels, n_filters = W.shape - output_shape = None - W = np.reshape(W, (height, width, 1, channels * depth_multiplier)) - b = self.weights_dict[IR_node.name]['bias'] if has_bias else None - - # Dilations - dilations = IR_node.get_attr('dilations', [1, 1]) - - padding = self._get_padding(IR_node) - - if isinstance(padding, list): - border_mode = "valid" - # see protobuf - padding_top, padding_left, padding_bottom, padding_right = padding[ - 1], padding[2], padding[5], padding[6] - else: - border_mode = "same" - padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0 - - output_channels = W.shape[-1] - groups = W.shape[-1] - stride_height, stride_width = IR_node.get_attr( - 'strides')[1], IR_node.get_attr('strides')[2] - - self.builder.add_convolution(name=IR_node.real_name, - kernel_channels=kernel_channels, - output_channels=output_channels, - height=height, - width=width, - stride_height=stride_height, - stride_width=stride_width, - border_mode=border_mode, - groups=groups, - W=W, - b=b, - has_bias=has_bias, - is_deconv=is_deconv, - output_shape=output_shape, - padding_top=padding_top, - padding_left=padding_left, - padding_bottom=padding_bottom, - padding_right=padding_right, - input_name=input_name, - output_name=IR_node.real_name, - dilation_factors=dilations) - - def emit_Pool(self, IR_node): - """ - Convert pooling layer to coreml. - """ - # Get input and output names - input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name - - # Pooling layer type - pooling_type = IR_node.get_attr('pooling_type') - if pooling_type == 'MAX': - layer_type_str = 'MAX' - elif pooling_type == 'AVG': - layer_type_str = 'AVERAGE' - else: - raise TypeError("Pooling type %s not supported" % pooling_type) - - # if it's global, set the global flag - global_pooling = IR_node.get_attr('global_pooling', False) - dim = len(IR_node.get_attr('strides')) - 2 - - if global_pooling: - if dim == 2: - - stride_height, stride_width = tuple( - IR_node.get_attr('strides')[1:-1]) - height, width = 1, 1 - - # TODO global pooling modification - - # Padding - padding = self._get_padding(IR_node) - - if isinstance(padding, list): - padding_type = "VALID" - # see protobuf - padding_top, padding_left, padding_bottom, padding_right = padding[ - 1], padding[2], padding[5], padding[6] - else: - padding_type = "SAME" - padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0 - - elif dim == 1: - raise NotImplementedError() - global_pooling = False - _, width, channels = keras_layer.input_shape - height = 1 - stride_height, stride_width = height, width - padding_type = 'VALID' - else: - raise NotImplementedError() - - else: - - height, width = tuple(IR_node.get_attr('kernel_shape')[1:-1]) - stride_height, stride_width = tuple( - IR_node.get_attr('strides')[1:-1]) - - # Padding - padding = self._get_padding(IR_node) - if isinstance(padding, list): - - padding_type = "VALID" - # see protobuf - padding_top, padding_left, padding_bottom, padding_right = padding[ - 1], padding[2], padding[5], padding[6] - else: - padding_type = "SAME" - padding_top, padding_left, padding_bottom, padding_right = 0, 0, 0, 0 - - self.builder.add_pooling(name=IR_node.name, - height=height, - width=width, - stride_height=stride_height, - stride_width=stride_width, - layer_type=layer_type_str, - padding_type=padding_type, - padding_top=padding_top, - padding_left=padding_left, - padding_bottom=padding_bottom, - padding_right=padding_right, - input_name=input_name, - output_name=IR_node.name, - exclude_pad_area=True, - is_global=global_pooling) - - def emit_Scale(self, IR_node): - # Get input and output names - input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name - - weights = IR_node.get_attr('scale', False) - weights = self.weights_dict[IR_node.name]['scale'] - has_bias = IR_node.get_attr('use_bias', False) - if has_bias: - bias = self.weights_dict[IR_node.name]['bias'] - - shape_scale = self.weights_dict[IR_node.name]['shapeScale'] - if has_bias: - shape_bias = self.weights_dict[IR_node.name]['shapeBias'] - - self.builder.add_scale(name=IR_node.real_name, - W=weights, - b=bias, - has_bias=has_bias, - input_name=input_name, - output_name=IR_node.name, - shape_scale=[shape_scale], - shape_bias=[shape_bias]) - - def emit_UNKNOWN(self, IR_node): - print(IR_node.name) - - def emit_Crop(self, IR_node): - input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name - output_name = IR_node.real_name - - is_1d = False - border = IR_node.get_attr('border') - - if is_1d: - raise ValueError("Unrecognized padding option: %s" % (str(border))) - else: - if type(border) is int: - top = left = bottom = right = border - elif type(border) is list: - # type: "list(int). A 1-D values of (leftBorder, topBorder, rightBorder, bottomBorder)." - # This is central crop - top, left = border[1], border[0] - bottom, right = border[1], border[0] - else: - raise ValueError( - "Unrecognized padding option: %s" % (str(border))) - - # Now add the layer - self.builder.add_crop(name=IR_node.name, - left=left, right=right, top=top, bottom=bottom, offset=[ - 0, 0], - input_names=[input_name], output_name=output_name - ) - - def emit_ReduceMean(self, IR_node): - """ - Convert ReduceMean layer to coreml. - """ - - axis = IR_node.get_attr('axes', [1, 2]) - -# Allowed values: 'CHW', 'HW', 'C', 'H', 'W' - if len(axis) == 1: - if axis[0] == 0: - axis_str = 'C' - elif axis[0] == 1: - axis_str = 'H' - elif axis[0] == 2: - axis_str = 'W' - elif len(axis) == 2: - axis_str = 'HW' - elif len(axis) == 3: - axis_str = 'CHW' - - # Get input and output names - input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name - - self.builder.add_reduce(IR_node.name, - input_name=input_name, - output_name=IR_node.name, - axis=axis_str, - mode='avg', - epsilon=1e-6) - - def emit_DataInput(self, IR_node): - """ Layers that can be skipped. """ - return - - def emit_Dropout(self, IR_node): - """ Layers that can be skipped (because they are train time only. """ - IR_node.real_name = self.IR_graph.get_parent( - IR_node.name, [0]).real_name - - def emit_FullyConnected(self, IR_node): - """ - Convert a dense layer to coreml. - """ - # Get input and output names - input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name - output_name = IR_node.out_edges[0] - - has_bias = IR_node.get_attr('use_bias') - - # Get the weights from keras - W = self.weights_dict[IR_node.name]['weights'].T - Wb = self.weights_dict[IR_node.name]['bias'].T if has_bias else None - output_channels, input_channels = W.shape - - self.builder.add_inner_product(name=IR_node.name, - W=W, - b=Wb, - input_channels=input_channels, - output_channels=output_channels, - has_bias=has_bias, - input_name=input_name, - output_name=IR_node.name) - - def emit_Flatten(self, IR_node): - """ - Convert a flatten layer from keras to coreml. - """ - # Get input and output names - input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name - output_name = IR_node.out_edges[0] - - """ - # blob_order == 0 if the input blob needs not be rearranged - # blob_order == 1 if the input blob needs to be rearranged - blob_order = 0 - - # using keras_layer.input.shape have a "?" (Dimension[None] at the front), - # making a 3D tensor with unknown batch size 4D - if len(keras_layer.input.shape) == 4: - blob_order = 1 - """ - - self.builder.add_flatten(name=IR_node.name, mode=1, - input_name=input_name, output_name=IR_node.name) - - def emit_Reshape(self, IR_node): - def ShapetrToTuple(string, batch_none=False): - if batch_none == True: - ls = [int(item) for item in string.split(', ')] - ls.insert(0, None) - return tuple(ls) - else: - ls = [int(item) for item in string.split(', ')] - return tuple(ls) - - last_node = self.IR_graph.get_node(IR_node.in_edges[0]).layer - input_shape_dims = last_node.attr["_output_shapes"].list.shape - target_shape_dims = IR_node.IR_layer.attr["_output_shapes"].list.shape - - input_shape = ShapetrToTuple( - IRGraph.shapeToStr(input_shape_dims[0]), True) - target_shape = ShapetrToTuple(IRGraph.shapeToStr(target_shape_dims[0])) - - def get_coreml_target_shape(target_shape): - if len(target_shape) == 1: # (D,) - coreml_shape = (1, target_shape[0], 1, 1) - elif len(target_shape) == 2: # (S,D) - coreml_shape = target_shape + (1, 1) - elif len(target_shape) == 3: # (H,W,C) - coreml_shape = ( - 1, target_shape[2], target_shape[0], target_shape[1]) - else: - coreml_shape = None - return coreml_shape - - def get_mode(input_shape, target_shape): - in_shape = input_shape[1:] - if len(in_shape) == 3 or len(target_shape) == 3: - return 1 - else: - return 0 - input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name - new_shape = get_coreml_target_shape(target_shape) - mode = get_mode(input_shape, target_shape) - - self.builder.add_reshape( - name=IR_node.real_name, - input_name=input_name, - output_name=IR_node.real_name, - target_shape=new_shape, - mode=mode) - - def _emit_activation(self, IR_node, act, params=None): - # Get input and output names - input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name - output_name = IR_node.real_name - if not isinstance(params, list): - params = [params] - - self.builder.add_activation(name=IR_node.real_name, - non_linearity=act, - input_name=input_name, - output_name=output_name, - params=params) - - # activation emit - - def emit_Relu(self, IR_node): - self._emit_activation(IR_node, 'RELU') - - def emit_Tanh(self, IR_node): - self._emit_activation(IR_node, 'TANH') - - def emit_PRelu(self, IR_node): - self._emit_activation(IR_node, 'PRELU', IR_node.get_attr('gamma', 0)) - - def emit_LeakyRelu(self, IR_node): - self._emit_activation(IR_node, 'LEAKYRELU', - IR_node.get_attr('alpha', 0)) - - def emit_Elu(self, IR_node): - self._emit_activation(IR_node, 'ELU', IR_node.get_attr('alpha', 0)) - - def emit_ThresholdedRelu(self, IR_node): - self._emit_activation(IR_node, 'THRESHOLDEDRELU', - IR_node.get_attr('alpha', 0)) - - def emit_ScaledTanh(self, IR_node): - self._emit_activation(IR_node, 'SCALED_TANH', [ - IR_node.get_attr('alpha', 0), IR_node.get_attr('beta', 0)]) - - def emit_linear(self, IR_node): - self._emit_activation(IR_node, 'LINEAR', [IR_node.get_attr( - 'alpha', 0), IR_node.get_attr('beta', 0)]) - - def emit_SigmoidHard(self, IR_node): - self._emit_activation(IR_node, 'SIGMOID_HARD', [ - IR_node.get_attr('alpha', 0), IR_node.get_attr('beta', 0)]) - - def emit_ParametricSoftplus(self, IR_node): - self._emit_activation(IR_node, 'PARAMETRICSOFTPLUS', [ - IR_node.get_attr('alpha', 0), IR_node.get_attr('beta', 0)]) - - def emit_Softmax(self, IR_node): - # Get input and output names - input_name = self.IR_graph.get_node(IR_node.in_edges[0]).real_name - output_name = IR_node.out_edges[0] - self.builder.add_softmax(name=IR_node.name, input_name=input_name, - output_name=IR_node.name) - - def emit_Sigmoid(self, IR_node): - assert False - code = "{:<15} = Activation(name = '{}', activation = 'sigmoid')({})".format( - IR_node.replace_scope(IR_node.name), - IR_node.name, - IR_node.replace_scope(IR_node.in_edges[0])) - return code - - def emit_Relu6(self, IR_node): - - layer = IR_node.real_name - input_name, output_name = ( - IR_node.IR_layer.input[0], IR_node.IR_layer.name) - - relu_output_name = output_name + '_relu' - self.builder.add_activation( - layer, 'RELU', input_name, relu_output_name) - # negate it - neg_output_name = relu_output_name + '_neg' - self.builder.add_activation(layer+'__neg__', 'LINEAR', relu_output_name, - neg_output_name, [-1.0, 0]) - # apply threshold - clip_output_name = relu_output_name + '_clip' - self.builder.add_unary(layer+'__clip__', neg_output_name, clip_output_name, - 'threshold', alpha=-6.0) - # negate it back - self.builder.add_activation( - layer + '_neg2', - 'LINEAR', - clip_output_name, - output_name, - [-1.0, 0]) - - def emit_Gather(self, IR_node): - raise NotImplementedError() - W = self.weights_dict[IR_node.name]['weights'] - if W.ndim == 2: - vocab_size = W.shape[0] - output_channels = W.shape[1] - builder.add_embedding( - name=IR_node.real_name, - W=W, - b=None, - input_dim=vocab_size, - output_channels=output_channels, - has_bias=False, - input_name=input_name, - output_name=IR_node.real_name) - else: - raise NotImplementedError() - - def emit_RNNs(self, IR_node, func): - assert False - # for Keras - if "dropout" in IR_node.IR_layer.attr: - dropout_str = ",dropout = {}, recurrent_dropout = {}".format( - IR_node.IR_layer.attr['dropout'].f, - IR_node.IR_layer.attr['recurrent_dropout'].f) - else: - dropout_str = "" - - code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( - IR_node.name, - func, - IR_node.IR_layer.attr['units'].i, - IR_node.IR_layer.attr['use_bias'].b, - dropout_str, - IR_node.in_edges[0]) - - return code - - def emit_LSTM(self, IR_node): - return self.emit_RNNs(IR_node, "LSTM") - - def emit_GRU(self, IR_node): - return self.emit_RNNs(IR_node, "GRU") - - def emit_Add(self, IR_node): - self._emit_merge(IR_node, 'ADD') - - def emit_Concat(self, IR_node): - self._emit_merge(IR_node, "CONCAT") - - def emit_BatchNorm(self, IR_node): - """ - Convert a Batch Normalization layer. - """ - # Get input and output names - input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name - - axis = IR_node.get_attr('axis', -1) - nb_channels = IR_node.get_attr('_output_shapes')[0].dim[axis].size - - # Set parameters - # Parameter arrangement in Keras: gamma, beta, mean, variance - weights = self.weights_dict[IR_node.name] - mean = weights['mean'] - std = weights['var'] - gamma = weights.get('scale', np.ones(mean.shape)) - beta = weights.get('bias', np.zeros(mean.shape)) - - # compute adjusted parameters - # Reference: parameter transformation https://github.com/apple/coremltools/issues/153 - variance = std * std - f = 1.0 / np.sqrt(std + IR_node.get_attr('epsilon')) - gamma1 = gamma*f - beta1 = beta - gamma*mean*f - mean[:] = 0.0 # mean - variance[:] = 1.0 - .00001 # stddev - - self.builder.add_batchnorm( - name=IR_node.real_name, - channels=nb_channels, - gamma=gamma1, - beta=beta1, - mean=mean, - variance=variance, - input_name=input_name, - output_name=IR_node.real_name) - - def emit_Pad(self, IR_node): - input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name - output_name = IR_node.real_name - is_1d = False - padding = IR_node.get_attr('pads') - if is_1d: - raise ValueError("Unrecognized padding option: %s" % - (str(padding))) - else: - if type(padding) is int: - top = left = bottom = right = padding - elif type(padding) is list: - top, left = padding[1], padding[2] - bottom, right = padding[5], padding[6] - else: - raise ValueError( - "Unrecognized padding option: %s" % (str(padding))) - - # padding type TODO - # Type of the padding. Can be one of 'constant', 'reflection' or 'replication - padding_type = IR_node.get_attr('mode', 'CONSTANT') - if padding_type == 'CONSTANT': - padding_type = 'constant' - elif padding_type == 'REFLECT': - padding_type = 'reflection' - elif padding_type == 'SYMMETRIC': - padding_type = 'replication' - - # Now add the layer - self.builder.add_padding(name=IR_node.name, - left=left, right=right, top=top, bottom=bottom, value=0, - input_name=input_name, output_name=output_name, padding_type=padding_type - ) - - def emit_Squeeze(self, IR_node): - input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name - output_name = IR_node.real_name - - self.builder.add_bias(name=IR_node.name, - b=0, - input_name=input_name, - output_name=output_name, - shape_bias=[1]) - # self.emit_Flatten(IR_node) - - def emit_LRN(self, IR_node): - input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name - output_name = IR_node.real_name - alpha = IR_node.get_attr('alpha') - beta = IR_node.get_attr('beta') - k = IR_node.get_attr('k') - depth_radius = int(IR_node.get_attr('size')) - # depth_radius: Half-width of the 1-D normalization window." - self.builder.add_lrn(output_name, input_name, output_name, - alpha=alpha, - beta=beta, - local_size=2*depth_radius-1, - k=k) - - def emit_SeparableConv(self, IR_node): - - input_name = self.IR_graph.get_parent(IR_node.name, [0]).real_name - output_name = IR_node.real_name - - strides = IR_node.get_attr('strides') - stride_height, stride_width = (strides[1], strides[2]) - - # Get the weights - W0 = self.weights_dict[IR_node.name]['depthwise_filter'] - W1 = self.weights_dict[IR_node.name]['pointwise_filter'] - - padding = IR_node.get_attr('auto_pad').split('_')[0].lower() - has_bias = IR_node.get_attr('use_bias') - b = self.weights_dict[IR_node.name]['bias'] if has_bias else None - - output_blob_shape = IR_node.get_attr('_output_shapes') - shape = shape_to_list(output_blob_shape[0]) - output_channels = shape[-1] - - height, width, input_channels, depth_mult = W0.shape - - W0 = np.reshape(W0, (height, width, 1, input_channels * depth_mult)) - - intermediate_name = input_name + '_intermin_' - - self.builder.add_convolution(name=IR_node.name + '_step_1', - kernel_channels=1, - output_channels=input_channels * depth_mult, - height=height, - width=width, - stride_height=stride_height, - stride_width=stride_width, - border_mode=padding, - groups=input_channels, - W=W0, - b=None, - has_bias=False, - is_deconv=False, - output_shape=None, - input_name=input_name, - output_name=intermediate_name, - dilation_factors=[1, 1]) - - self.builder.add_convolution(name=IR_node.name + '_step_2', - kernel_channels=input_channels * depth_mult, - output_channels=output_channels, - height=1, - width=1, - stride_height=1, - stride_width=1, - border_mode=padding, - groups=1, - W=W1, - b=b, - has_bias=has_bias, - is_deconv=False, - output_shape=None, - input_name=intermediate_name, - output_name=output_name, - dilation_factors=[1, 1]) - - def emit_Slice(self, IR_node): - pass - - def emit_Const(self, IR_node): - pass - - def emit_Shape(self, IR_node): - pass - - def emit_Pack(self, IR_node): - pass -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- -import os - -import coremltools -from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph - - -class CoremlGraphNode(GraphNode): - - def __init__(self, layer): - super(CoremlGraphNode, self).__init__(layer) - - @property - def name(self): - return self.layer.name - - @property - def type(self): - return self.layer.__class__.__name__ - - @property - def coreml_layer(self): - return self.layer - - -class CoremlGraph(Graph): - - def __init__(self, model): - from coremltools.proto import Model_pb2 - - # sanity check. - if not isinstance(model, Model_pb2.Model): - raise TypeError( - "Coreml layer of type %s is not supported." % type(model)) - super(CoremlGraph, self).__init__(model) - self.model = model - - def build(self): - self.input_layers = list() - - # input layer - - for layer in self.model.description.input: - self.layer_map[layer.name] = CoremlGraphNode(layer) - self.layer_name_map[layer.name] = layer.name - - model_type = self.model.WhichOneof('Type') - if model_type == 'neuralNetworkClassifier': - # build each layer - for layer in self.model.neuralNetworkClassifier.layers: - self.layer_map[layer.name] = CoremlGraphNode(layer) - self.layer_name_map[layer.name] = layer.name - - # if A.output == B.input, then make the connection: A -> B - for layerA in self.model.neuralNetworkClassifier.layers: - for layerB in self.model.neuralNetworkClassifier.layers: - for A in layerA.output: - for B in layerB.input: - if A == B: - # print('{0:20}-> {1:20}'.format(layerA.name, layerB.name)) - self._make_connection(layerA.name, layerB.name) - - # if A.name == B.input, then make the connection: A -> B, here A is the input - for layerA in self.model.description.input: - for layerB in self.model.neuralNetworkClassifier.layers: - for B in layerB.input: - if layerA.name == B: - self._make_connection(layerA.name, layerB.name) - elif model_type == 'neuralNetwork': - # build each layer - for layer in self.model.neuralNetwork.layers: - self.layer_map[layer.name] = CoremlGraphNode(layer) - self.layer_name_map[layer.name] = layer.name - - # if A.output == B.input, then make the connection: A -> B - for layerA in self.model.neuralNetwork.layers: - for layerB in self.model.neuralNetwork.layers: - for A in layerA.output: - for B in layerB.input: - if A == B: - # print('{0:20}-> {1:20}'.format(layerA.name, layerB.name)) - self._make_connection(layerA.name, layerB.name) - # if A.name == B.input, then make the connection: A -> B, here A is the input - for layerA in self.model.description.input: - for layerB in self.model.neuralNetwork.layers: - for B in layerB.input: - if layerA.name == B: - self._make_connection(layerA.name, layerB.name) - elif model_type == 'neuralNetworkRegressor': - # build each layer - for layer in self.model.neuralNetworkRegressor.layers: - self.layer_map[layer.name] = CoremlGraphNode(layer) - self.layer_name_map[layer.name] = layer.name - - # if A.output == B.input, then make the connection: A -> B - for layerA in self.model.neuralNetworkRegressor.layers: - for layerB in self.model.neuralNetworkRegressor.layers: - for A in layerA.output: - for B in layerB.input: - if A == B: - # print('{0:20}-> {1:20}'.format(layerA.name, layerB.name)) - self._make_connection(layerA.name, layerB.name) - # if A.name == B.input, then make the connection: A -> B, here A is the input - for layerA in self.model.description.input: - for layerB in self.model.neuralNetworkRegressor.layers: - for B in layerB.input: - if layerA.name == B: - self._make_connection(layerA.name, layerB.name) - else: - assert False - - # The information of the layer - super(CoremlGraph, self).build() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - - -import os -from six import string_types as _string_types -import numpy as np -import math - -from coremltools.models.neural_network import NeuralNetworkBuilder as _NeuralNetworkBuilder -from coremltools.models import datatypes -from coremltools.models import MLModel as _MLModel -from coremltools.models.utils import save_spec as _save_spec -from coremltools.models._infer_shapes_nn_mlmodel import infer_shapes -from coremltools.proto import Model_pb2, NeuralNetwork_pb2 - - -from mmdnn.conversion.coreml.coreml_graph import CoremlGraph -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.parser import Parser -from mmdnn.conversion.common.utils import * - - -class CoremlParser(Parser): - - activation_map = { - "ReLU": "Relu", - "leakyReLU": "LeakyRelu", - "linear": "linear", - "thresholdedReLU": "ThresholdedRelu", - "PReLU": "PRelu", - "tanh": "Tanh", - "scaledTanh": "ScaledTanh", - 'sigmoid': "Sigmoid", - "sigmoidHard": "SigmoidHard", - "ELU": "Elu", - 'softplus': 'Softplus', - 'softsign': 'Softsign', - 'parametricSoftplus': "ParametricSoftplus" - } - - def __init__(self, model): - super(CoremlParser, self).__init__() - - # load model file into Coreml Graph - if isinstance(model, _string_types): - # model.encode() convert to str --- python2 may crash due to type 'unicode' - model = _MLModel(model) - model = model.get_spec() - self.weight_loaded = True - else: - assert False - - # Build Network Graph - - model_type = model.WhichOneof('Type') - if model_type == 'neuralNetworkClassifier': - CoremlParser.shape_dict = infer_shapes( - model.neuralNetworkClassifier, model.description.input) - elif model_type == 'neuralNetwork': - CoremlParser.shape_dict = infer_shapes( - model.neuralNetwork, model.description.input) - elif model_type == 'neuralNetworkRegressor': - CoremlParser.shape_dict = infer_shapes( - model.neuralNetworkRegressor, model.description.input) - else: - assert False - - # self.data_format ? TODO - self.data_format = 'channels_first' - self.coreml_graph = CoremlGraph(model) - self.coreml_graph.build() - self.lambda_layer_count = 0 - - def _load_model(self, model_network_path): - """Load a Coreml model from disk - - Parameters - ---------- - - model_network_path: str - Path where the model network path is (mlmodel file) - - Returns - ------- - model: A coreml model - """ - - from coremltools.models import MLModel - - if os.path.isfile(model_network_path): - # load the model network - loaded_model_ml = MLModel(model_network_path) - # convert to Model_pb2.Model - loaded_model_pb = loaded_model_ml.get_spec() - self.weight_loaded = True - print("Network file [{}] is loaded successfully.".format( - model_network_path)) - else: - print("Warning: Weights File [{}] is not found.".format( - model_network_path)) - - return loaded_model_pb - - @property - def src_graph(self): - return self.coreml_graph - - def gen_IR(self): - for i, layer in enumerate(self.coreml_graph.topological_sort): - - current_node = self.coreml_graph.get_node(layer) - current_node_layer = current_node.layer - - # determine the type of the current_node - node_type = current_node_layer.name - - if isinstance(current_node_layer, Model_pb2.FeatureDescription): - self.rename_InputLayer(current_node) - elif isinstance(current_node_layer, NeuralNetwork_pb2.NeuralNetworkLayer): - if current_node_layer.HasField("convolution"): - self.rename_CONV2D(current_node) - elif current_node_layer.HasField('batchnorm'): - self.rename_BatchNormalization(current_node) - elif current_node_layer.HasField("scale"): - self.rename_scale(current_node) - elif current_node_layer.HasField("pooling"): - self.rename_Pooling(current_node) - elif current_node_layer.HasField("activation"): - self.rename_Activation(current_node) - elif current_node_layer.HasField("softmax"): - self.rename_Softmax(current_node) - elif current_node_layer.HasField("padding"): - self.rename_Padding(current_node) - elif current_node_layer.HasField("add"): - self.rename_Add(current_node) - elif current_node_layer.HasField("flatten"): - self.rename_Flatten(current_node) - elif current_node_layer.HasField("innerProduct"): - self.rename_innerProduct(current_node) - elif current_node_layer.HasField("concat"): - self.rename_Concatenate(current_node) - else: - print( - "CoremlParser has not supported operator [{}]".format(node_type)) - self.rename_UNKNOWN(current_node) - else: - assert False - - # staticmethods - @staticmethod - def _set_output_shape(source_node, IR_node): - - shape = graph_pb2.TensorShape() - source_node_layer = source_node.layer - - layer_name = source_node_layer.output[0] - - shape_coreml = CoremlParser.shape_dict[layer_name] - # (seq, batch, C, H, W) & NHWC - - new_dim = shape.dim.add() - if shape_coreml[1] == 1: - new_dim.size = -1 - else: - new_dim.size = shape_coreml[1] - for index in [3, 4, 2]: - new_dim = shape.dim.add() - dim = shape_coreml[index] - new_dim.size = dim if dim else -1 - - IR_node.attr["_output_shapes"].list.shape.extend([shape]) - - @staticmethod - def _copy_and_reop(source_node, IR_node, new_op=None): - source_node_layer = source_node.layer - IR_node.name = source_node_layer.name - - if new_op: - IR_node.op = new_op - elif source_node_layer.HasField("convolution"): - IR_node.op = "convolution" - elif source_node_layer.HasField('batchnorm'): - IR_node.op = "batchnorm" - elif source_node_layer.HasField("scale"): - IR_node.op = "scale" - elif source_node_layer.HasField("pooling"): - IR_node.op = "pooling" - elif source_node_layer.HasField("activation"): - IR_node.op = "activation" - elif source_node_layer.HasField("softmax"): - IR_node.op = "softmax" - elif source_node_layer.HasField("padding"): - IR_node.op = "padding" - elif source_node_layer.HasField("add"): - IR_node.op = "add" - elif source_node_layer.HasField("flatten"): - IR_node.op = "flatten" - elif source_node_layer.HasField("innerProduct"): - IR_node.op = "innerProduct" - elif source_node_layer.HasField("concat"): - IR_node.op = "concatenate" - else: - assert False - - # TODO dtype_map - if hasattr(source_node.layer, "dtype"): - IR_node.attr["dtype"].type = CoremlParser.dtype_map[source_node.layer.dtype] - - CoremlParser._set_output_shape(source_node, IR_node) - - @staticmethod - def _copy_shape(source_node, target_node): - if hasattr(source_node, "output_shape"): - for dim in source_node.output_shape: - new_dim = target_node.attr['shape'].shape.dim.add() - new_dim.size = -1 if dim == None else new_dim - else: - target_node.attr['shape'].shape.unknown_rank = True - - @staticmethod - def _convert_dataformat(source_node, target_node): - if source_node.coreml_layer.data_format == "channels_last": - target_node.attr['data_format'].s = "NHWC" - elif source_node.coreml_layer.data_format == 'channels_first': - target_node.attr['data_format'].s = "NCHW" - else: - print("Warning: [%s] don't have data format info" % - (source_node.coreml_layer.name)) - - -# convert methods - - # convolution - def __convert_convolution(self, source_node, dim): - - IR_node = self.IR_graph.node.add() - # input edge - self.convert_inedge(source_node, IR_node) - source_node_layer = source_node.layer - source_node_conv = source_node_layer.convolution - layer_name = source_node_layer.name.split('/')[-1] - - # important! - if source_node_conv.HasField('weights'): - # reshape the weight! - [h, w, k, o] = list(source_node_conv.kernelSize) + \ - [source_node_conv.kernelChannels, source_node_conv.outputChannels] - # [2, 3, 0, 1] - weights = np.array(source_node_conv.weights.floatValue, dtype=np.float32).reshape( - [o, k, h, w]).transpose([2, 3, 1, 0]) - - kwargs = dict() - kwargs['kernel_shape'] = list(source_node_conv.kernelSize) + [ - source_node_conv.kernelChannels, source_node_conv.outputChannels] - - # pads - CoremlParser._convert_padding(source_node, IR_node) - # use_bias - kwargs['use_bias'] = source_node_conv.hasBias - # isDeconvolution - kwargs['isDeconvolution'] = source_node_conv.isDeconvolution - # name, op - if layer_name == 'sep': - CoremlParser._copy_and_reop(source_node, IR_node, "Conv") - elif layer_name == 'dw': - CoremlParser._copy_and_reop(source_node, IR_node, "DepthwiseConv") - weights = weights.transpose((0, 1, 3, 2)) - kwargs['kernel_shape'] = list(source_node_conv.kernelSize) + [ - source_node_conv.outputChannels, source_node_conv.kernelChannels] - - else: - if kwargs['isDeconvolution']: - CoremlParser._copy_and_reop( - source_node, IR_node, "ConvTranspose") - else: - CoremlParser._copy_and_reop(source_node, IR_node, "Conv") - - self.set_weight(source_node.name, 'weights', weights) - if source_node_layer.convolution.HasField('bias'): - self.set_weight(source_node.name, 'bias', np.array( - source_node_conv.bias.floatValue, dtype=np.float32)) - - # kwargs['kernel_shape'] = weights.shape - - kwargs['group'] = source_node_conv.nGroups - - # strides - # [1, sd, sh, sw, 1] - kwargs['strides'] = [1] + list(source_node_conv.stride) + [1] - - dilation = list(source_node_conv.dilationFactor) - if dilation == []: - dilation = [1, 1] - kwargs['dilations'] = [1] + dilation + [1] - - assign_IRnode_values(IR_node, kwargs) - - # activation - # TODO - self._defuse_activation(source_node) - - @staticmethod - def _convert_padding(source_node, IR_node): - source_node_layer = source_node.layer - - if source_node_layer.HasField('convolution'): - # padding in conv - - source_node_conv = source_node_layer.convolution - - if source_node_conv.HasField('valid'): - # pad in IR is [x1_b, x2_b, ..., x1_e, x2_e, ...] - - dim = [] - for i in source_node_conv.valid.paddingAmounts.borderAmounts: - dim.extend([i.startEdgeSize, i.endEdgeSize]) - - if dim == []: - assign_IRnode_values(IR_node, {'auto_pad': 'VALID'}) - pad_dim = [0] * 8 - pad_dim = convert_tf_pad_to_onnx(pad_dim) - - assign_IRnode_values(IR_node, {'pads': pad_dim}) - else: - - # padding - pad_dim = [0, 0] - - pad_dim.extend(dim) - - pad_dim += [0, 0] - - pad_dim = convert_tf_pad_to_onnx(pad_dim) - - assign_IRnode_values(IR_node, {'pads': pad_dim}) - - elif source_node_conv.HasField('same'): - - # compute padding for 'same' - assign_IRnode_values(IR_node, {'auto_pad': "SAME"}) - - kernel = list(source_node_conv.kernelSize) - dilation = list(source_node_conv.dilationFactor) - if dilation == []: - dilation = [1, 1] - stride = list(source_node_conv.stride) - if stride == []: - stride = [1, 1] - - kernel[0] = dilation[0] * (kernel[0] - 1) + 1 - kernel[1] = dilation[1] * (kernel[1] - 1) + 1 - - if stride == [1, 1]: - - # https://discuss.mxnet.io/t/pooling-and-convolution-with-same-mode/528/3 - - p0 = (kernel[0] - 1) // 2 - p1 = (kernel[1] - 1) // 2 - - if kernel[0] % 2 == 0: - p00 = p0 - p01 = p0 + 1 - else: - p00 = p0 - p01 = p0 - - if kernel[1] % 2 == 0: - p10 = p1 - p11 = p1 + 1 - else: - p10 = p1 - p11 = p1 - - pad_dim = [0, 0, p00, p01, p10, p11, 0, 0] - - pad_dim = convert_tf_pad_to_onnx(pad_dim) - - assign_IRnode_values(IR_node, {'pads': pad_dim}) - else: - # https://www.jianshu.com/p/05c4f1621c7e - pad_dim = [0, 0, 0, 0, 0, 0, 0, 0] - - pad_dim = convert_tf_pad_to_onnx(pad_dim) - - assign_IRnode_values(IR_node, {'pads': pad_dim}) - - else: - assert False - - elif source_node_layer.HasField('pooling'): - # padding in pooling - source_node_pool = source_node_layer.pooling - if source_node_pool.HasField('valid'): - - dim = [] - for i in source_node_pool.valid.paddingAmounts.borderAmounts: - dim.extend([i.startEdgeSize, i.endEdgeSize]) - - if dim == []: - assign_IRnode_values(IR_node, {'auto_pad': 'VALID'}) - pad_dim = [0] * 8 - pad_dim = convert_tf_pad_to_onnx(pad_dim) - - assign_IRnode_values(IR_node, {'pads': pad_dim}) - else: - # padding - pad_dim = [0, 0] - - pad_dim.extend(dim) - - pad_dim += [0, 0] - pad_dim = convert_tf_pad_to_onnx(pad_dim) - assign_IRnode_values(IR_node, {'pads': pad_dim}) - - elif source_node_pool.HasField('same'): - - assign_IRnode_values(IR_node, {'auto_pad': 'SAME'}) - - kernel = list(source_node_pool.kernelSize) - stride = list(source_node_pool.stride) - if stride == []: - stride = [1, 1] - - if stride == [1, 1]: - # https://discuss.mxnet.io/t/pooling-and-convolution-with-same-mode/528/3 - p0 = (kernel[0] - 1) // 2 - p1 = (kernel[1] - 1) // 2 - - if kernel[0] % 2 == 0: - p00 = p0 - p01 = p0 + 1 - else: - p00 = p0 - p01 = p0 - - if kernel[1] % 2 == 0: - p10 = p1 - p11 = p1 + 1 - else: - p10 = p1 - p11 = p1 - - pad_dim = [0, 0, p00, p01, p10, p11, 0, 0] - - pad_dim = convert_tf_pad_to_onnx(pad_dim) - - assign_IRnode_values(IR_node, {'pads': pad_dim}) - else: - # TODO - pad_dim = [0, 0, 0, 0, 0, 0, 0, 0] - - pad_dim = convert_tf_pad_to_onnx(pad_dim) - - assign_IRnode_values(IR_node, {'pads': pad_dim}) - - elif source_node_pool.HasField('includeLastPixel'): - - # symmetric padding - h, w = source_node_pool.includeLastPixel.paddingAmounts - assign_IRnode_values( - IR_node, {'pads': [0, h, h, 0, 0, w, w, 0]}) - else: - assert False - - else: - assert False - - def _convert_merge(self, source_node, new_name=None): - - IR_node = self.IR_graph.node.add() - - # name, op - CoremlParser._copy_and_reop(source_node, IR_node, new_name) - - # input edge - self.convert_inedge(source_node, IR_node) - - # For concat axis - # NO axis in coreml, so set the last axis - IR_node.attr['axis'].i = len( - CoremlParser.shape_dict[source_node.layer.output[0]])-1 - 1 - # The first -1 means in coreml there is one-more axis, - # The second -1 means the last axis - - return IR_node - - def _convert_padding_api(self, source_node, IR_node): - # name, op - CoremlParser._copy_and_reop(source_node, IR_node, "Pad") - - # input edge - self.convert_inedge(source_node, IR_node) - - kwargs = dict() - - source_node_layer = source_node.layer - source_node_pad = source_node_layer.padding - - if source_node_pad.HasField('constant'): - kwargs['mode'] = 'CONSTANT' - elif source_node_pad.HasField('reflection'): - kwargs['mode'] = 'REFLECT' - elif source_node_pad.HasField('replication'): - kwargs['mode'] = 'SYMMETRIC' - else: - assert False - - dim = [] - for i in source_node_pad.paddingAmounts.borderAmounts: - dim.extend([i.startEdgeSize, i.endEdgeSize]) - - if dim == []: - dim = [0, 0, 0, 0] - - # padding - kwargs['pads'] = [0, 0] - - kwargs['pads'].extend(dim) - - kwargs['pads'] += [0, 0] - kwargs['pads'] = convert_tf_pad_to_onnx(kwargs['pads']) - - assign_IRnode_values(IR_node, kwargs) - - def _defuse_activation(self, source_node): - # Future Module TODO - pass - return - - -# rename methods - - def rename_UNKNOWN(self, source_node): - print(source_node.layer.get_config()) - IR_node = self.IR_graph.node.add() - CoremlParser._copy_and_reop(source, IR_node) - self.convert_inedge(source_node, IR_node) - - def rename_Activation(self, coreml_node): - IR_node = self.IR_graph.node.add() - - coreml_node_layer = coreml_node.layer - coreml_node_activation = coreml_node_layer.activation - - # name, op - for activation_name in self.activation_map.keys(): - if coreml_node_activation.HasField(activation_name): - CoremlParser._copy_and_reop( - coreml_node, IR_node, self.activation_map[activation_name]) - - # activation type - activation_type = coreml_node_activation.WhichOneof("NonlinearityType") - - if activation_type == 'leakyReLU': - assign_IRnode_values( - IR_node, {'alpha': coreml_node_activation.leakyReLU.alpha}) - elif activation_type == 'PReLU': - assign_IRnode_values( - IR_node, {'gamma': coreml_node_activation.PReLU.alpha}) - elif activation_type == 'ELU': - assign_IRnode_values( - IR_node, {'alpha': coreml_node_activation.ELU.alpha}) - elif activation_type == 'thresholdedRelu': - assign_IRnode_values( - IR_node, {'alpha': coreml_node_activation.thresholdedReLU.alpha}) - elif activation_type == 'scaledTanh': - assign_IRnode_values( - IR_node, {'alpha': coreml_node_activation.scaledTanh.alpha}) - assign_IRnode_values( - IR_node, {'beta': coreml_node_activation.scaledTanh.beta}) - elif activation_type == 'linear': - assign_IRnode_values( - IR_node, {'alpha': coreml_node_activation.linear.alpha}) - assign_IRnode_values( - IR_node, {'beta': coreml_node_activation.linear.beta}) - elif activation_type == 'sigmoidHard': - assign_IRnode_values( - IR_node, {'alpha': coreml_node_activation.sigmoidHard.alpha}) - assign_IRnode_values( - IR_node, {'beta': coreml_node_activation.sigmoidHard.beta}) - elif activation_type == 'parametricSoftplus': - assign_IRnode_values( - IR_node, {'alpha': coreml_node_activation.parametricSoftplus.alpha}) - assign_IRnode_values( - IR_node, {'beta': coreml_node_activation.parametricSoftplus.beta}) - # else: - # assert False - - # input edge - self.convert_inedge(coreml_node, IR_node) - - # Merge layers - def rename_Add(self, source_node): - self._convert_merge(source_node, 'Add') - - def rename_CONV2D(self, source_node): - self.__convert_convolution(source_node, 2) - - def rename_InputLayer(self, source_node): - # only for training - IR_node = self.IR_graph.node.add() - - # name, op - IR_node.name = source_node.name - IR_node.op = "DataInput" - graph_shape = graph_pb2.TensorShape() - coreml_node_layer = source_node.layer - - new_dim = graph_shape.dim.add() - new_dim.size = -1 - new_dim = graph_shape.dim.add() - new_dim.size = coreml_node_layer.type.imageType.width - new_dim = graph_shape.dim.add() - new_dim.size = coreml_node_layer.type.imageType.height - new_dim = graph_shape.dim.add() - - if coreml_node_layer.type.imageType.colorSpace == 10: - new_dim.size = 2 - elif coreml_node_layer.type.imageType.colorSpace == 20: - new_dim.size = 3 - elif coreml_node_layer.type.imageType.colorSpace == 30: - new_dim.size = 3 - else: - assert False - IR_node.attr["_output_shapes"].list.shape.extend([graph_shape]) - - # input edge - self.convert_inedge(source_node, IR_node) - - # shape - # NHWC channel last - # in fact, here is NWHC - new_dim = IR_node.attr['shape'].shape.dim.add() - new_dim.size = -1 - new_dim = IR_node.attr['shape'].shape.dim.add() - new_dim.size = coreml_node_layer.type.imageType.width - new_dim = IR_node.attr['shape'].shape.dim.add() - new_dim.size = coreml_node_layer.type.imageType.height - new_dim = IR_node.attr['shape'].shape.dim.add() - - if coreml_node_layer.type.imageType.colorSpace == 10: - new_dim.size = 2 - elif coreml_node_layer.type.imageType.colorSpace == 20: - new_dim.size = 3 - elif coreml_node_layer.type.imageType.colorSpace == 30: - new_dim.size = 3 - else: - assert False - - def rename_BatchNormalization(self, coreml_node): - - IR_node = self.IR_graph.node.add() - - coreml_node_layer = coreml_node.layer - coreml_node_bn = coreml_node_layer.batchnorm - - # name, op - CoremlParser._copy_and_reop(coreml_node, IR_node, "BatchNorm") - - # input edge - self.convert_inedge(coreml_node, IR_node) - - # axis TODO - # channels_first, then axis = 1 - IR_node.attr['axis'].i = -1 - - # scale - IR_node.attr['scale'].b = coreml_node_bn.HasField("gamma") - - # bias - IR_node.attr['bias'].b = coreml_node_bn.HasField("beta") - - # epsilon - IR_node.attr['epsilon'].f = coreml_node_bn.epsilon - - if IR_node.attr['scale'].b: - self.set_weight(coreml_node_layer.name, "scale", np.array( - coreml_node_bn.gamma.floatValue, dtype=np.float32)) - - if IR_node.attr['bias'].b: - self.set_weight(coreml_node_layer.name, "bias", np.array( - coreml_node_bn.beta.floatValue, dtype=np.float32)) - - gamma, beta = None, None - if IR_node.attr['scale'].b: - gamma = np.array(coreml_node_bn.gamma.floatValue, dtype=np.float32) - if IR_node.attr['bias'].b: - beta = np.array(coreml_node_bn.beta.floatValue, dtype=np.float32) - - mean = np.array(coreml_node_bn.mean.floatValue) - variance = np.array(coreml_node_bn.variance.floatValue) - - gamma = np.ones(mean.shape) if gamma is None else gamma - beta = np.zeros(mean.shape) if beta is None else beta - - # compute adjusted parameters - # Reference: parameter transformation https://github.com/apple/coremltools/issues/153 - f = 1.0 / np.sqrt(variance + coreml_node_bn.epsilon) - gamma1 = gamma*f - beta1 = beta - gamma*mean*f - mean[:] = 0.0 # mean - variance[:] = 1.0 - .00001 # stddev - - # convert type because of tensorflow - gamma1 = gamma1.astype(np.float32) - beta1 = beta1.astype(np.float32) - mean = mean.astype(np.float32) - variance = variance.astype(np.float32) - - if IR_node.attr['scale'].b: - self.set_weight(coreml_node_layer.name, "scale", gamma1) - - if IR_node.attr['bias'].b: - self.set_weight(coreml_node_layer.name, "bias", beta1) - - # mean - self.set_weight(coreml_node_layer.name, "mean", mean) - - # var - self.set_weight(coreml_node_layer.name, "var", variance) - - def rename_scale(self, coreml_node): - - IR_node = self.IR_graph.node.add() - - coreml_node_layer = coreml_node.layer - coreml_node_scale = coreml_node_layer.scale - - # name, op - CoremlParser._copy_and_reop(coreml_node, IR_node, "Scale") - - # input edge - self.convert_inedge(coreml_node, IR_node) - - # bias - IR_node.attr['use_bias'].b = coreml_node_scale.hasBias - - IR_node.attr['scale'].b = True - - self.set_weight(coreml_node_layer.name, "scale", np.array( - coreml_node_scale.scale.floatValue).astype(np.float32)) - self.set_weight(coreml_node_layer.name, "scale_mean", np.zeros_like( - coreml_node_scale.scale.floatValue).astype(np.float32)) - self.set_weight(coreml_node_layer.name, "scale_var", np.ones_like( - coreml_node_scale.scale.floatValue).astype(np.float32)) - - self.set_weight(coreml_node_layer.name, "shapeScale", - coreml_node_scale.shapeScale[0]) - - if IR_node.attr['use_bias'].b: - self.set_weight(coreml_node_layer.name, "bias", np.array( - coreml_node_scale.bias.floatValue).astype(np.float32)) - self.set_weight(coreml_node_layer.name, "shapeBias", - coreml_node_scale.shapeBias[0]) - - def rename_Pooling(self, coreml_node): - - IR_node = self.IR_graph.node.add() - - coreml_node_layer = coreml_node.layer - coreml_node_pool = coreml_node_layer.pooling - - # name, op - CoremlParser._copy_and_reop(coreml_node, IR_node, "Pool") - - # input edge - self.convert_inedge(coreml_node, IR_node) - - kwargs = {} - - # MAX = 0, AVERAGE = 1, L2 = 2 - if coreml_node_pool.type == 0: - kwargs['pooling_type'] = 'MAX' - elif coreml_node_pool.type == 1: - kwargs['pooling_type'] = 'AVG' - elif coreml_node_pool.type == 2: - kwargs['pooling_type'] = 'L2' - - is_global = coreml_node_pool.globalPooling - - if is_global: - kwargs['global_pooling'] = True - kwargs['global_pooling_coreml'] = True - kwargs['shape_coreml'] = [self.shape_dict[coreml_node_layer.name][3], - self.shape_dict[coreml_node_layer.name][4], self.shape_dict[coreml_node_layer.name][2]] - - # padding - self._convert_padding(coreml_node, IR_node) - - # strides - # [1, sd, sh, sw, 1] - kwargs['strides'] = [1] + list(coreml_node_pool.stride) + [1] - - # window_shape - # [1, pd, ph, pw, 1] - kwargs['kernel_shape'] = [1] + list(coreml_node_pool.kernelSize) + [1] - - assign_IRnode_values(IR_node, kwargs) - - def rename_Softmax(self, coreml_node): - IR_node = self.IR_graph.node.add() - - # name, op - CoremlParser._copy_and_reop(coreml_node, IR_node, 'Softmax') - - # input edge - self.convert_inedge(coreml_node, IR_node) - - def rename_Concatenate(self, source_node): - IR_node = self._convert_merge(source_node, 'Concat') - - def rename_Flatten(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - CoremlParser._copy_and_reop(source_node, IR_node, 'Flatten') - - # input edge - self.convert_inedge(source_node, IR_node) - - def rename_innerProduct(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - CoremlParser._copy_and_reop(source_node, IR_node, "FullyConnected") - - # input edge - self.convert_inedge(source_node, IR_node) - - source_node_layer = source_node.layer - source_node_inner = source_node_layer.innerProduct - - # units - IR_node.attr['units'].i = source_node_inner.outputChannels - - # use_bias - IR_node.attr['use_bias'].b = source_node_inner.hasBias - - # weights - self.set_weight(source_node_layer.name, 'weights', np.array(source_node_inner.weights.floatValue).astype( - np.float32).reshape(source_node_inner.outputChannels, source_node_inner.inputChannels).transpose()) - if IR_node.attr['use_bias'].b: - self.set_weight(source_node_layer.name, 'bias', np.array( - source_node_inner.bias.floatValue).astype(np.float32)) - # change to single because of the tf matmul - - # in features - IR_node.attr['in_features'].i = source_node_inner.inputChannels - - def rename_Padding(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - self._convert_padding_api(source_node, IR_node) -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from coremltools.models import datatypes - - -def _infer_coreml_input_shape(IR_shape, if_convert=True): - """Infer CoreML input shape from IR shape. - """ - if len(IR_shape) == 0: - # the end of the tensorflow_resnet_v2_50's squeeze shape is [unknown_rank: true] with len 0 - # 1001 means the 1001 classes for tensorflow_resnet_v2_50 - # !Alert! TODO - # Future implement can be changed to the last two layer - shape = [1001, 1, 1] - elif len(IR_shape) == 1: - # TODO - remove style transfer 1D hack - # Input is 1D but it goes to the width dimension: (1,1,W) - shape = [1, 1, IR_shape[0]] # (C,H,W) - elif len(IR_shape) == 2: - # assume (Batch, Channels) - Batch dimension should be dropped - shape = [IR_shape[1]] - elif len(IR_shape) == 3: - # assume (Batch, Sequence-Length, channels) - shape = [IR_shape[2], 1, IR_shape[1]] - elif len(IR_shape) == 4: # (B,H,W,C) --> (C,H,W) - shape = [IR_shape[3], IR_shape[1], IR_shape[2]] # (C,H,W) - else: - raise ValueError('Unrecognized IR input shape {}'.format(shape)) - if if_convert: - shape = datatypes.Array(*shape) - return shape -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import torch -from collections import OrderedDict - - -def parse_cfg(cfgfile): - def erase_comment(line): - line = line.split('#')[0] - return line - blocks = [] - fp = open(cfgfile, 'r') - block = None - line = fp.readline() - while line != '': - line = line.rstrip() - if line == '' or line[0] == '#': - line = fp.readline() - continue - elif line[0] == '[': - if block: - blocks.append(block) - block = OrderedDict() - block['type'] = line.lstrip('[').rstrip(']') - # set default value - if block['type'] == 'convolutional': - block['batch_normalize'] = 0 - else: - line = erase_comment(line) - key, value = line.split('=') - key = key.strip() - if key == 'type': - key = '_type' - value = value.strip() - block[key] = value - line = fp.readline() - - if block: - blocks.append(block) - fp.close() - return blocks - - -def print_cfg(blocks): - for block in blocks: - print('[%s]' % (block['type'])) - for key, value in block.items(): - if key != 'type': - print('%s=%s' % (key, value)) - print('') - - -def save_cfg(blocks, cfgfile): - with open(cfgfile, 'w') as fp: - for block in blocks: - fp.write('[%s]\n' % (block['type'])) - for key, value in block.items(): - if key != 'type': - fp.write('%s=%s\n' % (key, value)) - fp.write('\n') - - -def print_cfg_nicely(blocks): - print('layer filters size input output') - prev_width = 416 - prev_height = 416 - prev_filters = 3 - out_filters = [] - out_widths = [] - out_heights = [] - ind = -2 - for block in blocks: - ind = ind + 1 - if block['type'] == 'net': - prev_width = int(block['width']) - prev_height = int(block['height']) - continue - elif block['type'] == 'convolutional': - filters = int(block['filters']) - kernel_size = int(block['size']) - stride = int(block['stride']) - is_pad = int(block['pad']) - pad = (kernel_size-1)/2 if is_pad else 0 - width = (prev_width + 2*pad - kernel_size)/stride + 1 - height = (prev_height + 2*pad - kernel_size)/stride + 1 - print('%5d %-6s %4d %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (ind, 'conv', filters, - kernel_size, kernel_size, stride, prev_width, prev_height, prev_filters, width, height, filters)) - prev_width = width - prev_height = height - prev_filters = filters - out_widths.append(prev_width) - out_heights.append(prev_height) - out_filters.append(prev_filters) - elif block['type'] == 'maxpool': - pool_size = int(block['size']) - stride = int(block['stride']) - width = prev_width/stride - height = prev_height/stride - print('%5d %-6s %d x %d / %d %3d x %3d x%4d -> %3d x %3d x%4d' % (ind, 'max', - pool_size, pool_size, stride, prev_width, prev_height, prev_filters, width, height, filters)) - prev_width = width - prev_height = height - prev_filters = filters - out_widths.append(prev_width) - out_heights.append(prev_height) - out_filters.append(prev_filters) - elif block['type'] == 'avgpool': - width = 1 - height = 1 - print('%5d %-6s %3d x %3d x%4d -> %3d' % - (ind, 'avg', prev_width, prev_height, prev_filters, prev_filters)) - prev_width = 1 - prev_height = 1 - out_widths.append(prev_width) - out_heights.append(prev_height) - out_filters.append(prev_filters) - elif block['type'] == 'softmax': - print('%5d %-6s -> %3d' % - (ind, 'softmax', prev_filters)) - out_widths.append(prev_width) - out_heights.append(prev_height) - out_filters.append(prev_filters) - elif block['type'] == 'cost': - print('%5d %-6s -> %3d' % - (ind, 'cost', prev_filters)) - out_widths.append(prev_width) - out_heights.append(prev_height) - out_filters.append(prev_filters) - elif block['type'] == 'reorg': - stride = int(block['stride']) - filters = stride * stride * prev_filters - width = prev_width/stride - height = prev_height/stride - print('%5d %-6s / %d %3d x %3d x%4d -> %3d x %3d x%4d' % - (ind, 'reorg', stride, prev_width, prev_height, prev_filters, width, height, filters)) - prev_width = width - prev_height = height - prev_filters = filters - out_widths.append(prev_width) - out_heights.append(prev_height) - out_filters.append(prev_filters) - elif block['type'] == 'route': - layers = block['layers'].split(',') - layers = [int(i) if int(i) > 0 else int(i)+ind for i in layers] - if len(layers) == 1: - print('%5d %-6s %d' % (ind, 'route', layers[0])) - prev_width = out_widths[layers[0]] - prev_height = out_heights[layers[0]] - prev_filters = out_filters[layers[0]] - elif len(layers) == 2: - print('%5d %-6s %d %d' % (ind, 'route', layers[0], layers[1])) - prev_width = out_widths[layers[0]] - prev_height = out_heights[layers[0]] - assert(prev_width == out_widths[layers[1]]) - assert(prev_height == out_heights[layers[1]]) - prev_filters = out_filters[layers[0]] + out_filters[layers[1]] - out_widths.append(prev_width) - out_heights.append(prev_height) - out_filters.append(prev_filters) - elif block['type'] == 'region': - print('%5d %-6s' % (ind, 'detection')) - out_widths.append(prev_width) - out_heights.append(prev_height) - out_filters.append(prev_filters) - elif block['type'] == 'shortcut': - from_id = int(block['from']) - from_id = from_id if from_id > 0 else from_id+ind - print('%5d %-6s %d' % (ind, 'shortcut', from_id)) - prev_width = out_widths[from_id] - prev_height = out_heights[from_id] - prev_filters = out_filters[from_id] - out_widths.append(prev_width) - out_heights.append(prev_height) - out_filters.append(prev_filters) - elif block['type'] == 'softmax': - print('%5d %-6s' % (ind, 'softmax')) - out_widths.append(prev_width) - out_heights.append(prev_height) - out_filters.append(prev_filters) - elif block['type'] == 'connected': - filters = int(block['output']) - print('%5d %-6s %d -> %3d' % - (ind, 'connected', prev_filters, filters)) - prev_filters = filters - out_widths.append(1) - out_heights.append(1) - out_filters.append(prev_filters) - else: - print('unknown type %s' % (block['type'])) - - -def load_conv(buf, start, conv_model): - num_w = conv_model.weight.numel() - num_b = conv_model.bias.numel() - conv_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b])) - start = start + num_b - conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])) - start = start + num_w - return start - - -def save_conv(fp, conv_model): - if conv_model.bias.is_cuda: - convert2cpu(conv_model.bias.data).numpy().tofile(fp) - convert2cpu(conv_model.weight.data).numpy().tofile(fp) - else: - conv_model.bias.data.numpy().tofile(fp) - conv_model.weight.data.numpy().tofile(fp) - - -def load_conv_bn(buf, start, conv_model, bn_model): - num_w = conv_model.weight.numel() - num_b = bn_model.bias.numel() - bn_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b])) - start = start + num_b - bn_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_b])) - start = start + num_b - bn_model.running_mean.copy_(torch.from_numpy(buf[start:start+num_b])) - start = start + num_b - bn_model.running_var.copy_(torch.from_numpy(buf[start:start+num_b])) - start = start + num_b - conv_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])) - start = start + num_w - return start - - -def save_conv_bn(fp, conv_model, bn_model): - if bn_model.bias.is_cuda: - convert2cpu(bn_model.bias.data).numpy().tofile(fp) - convert2cpu(bn_model.weight.data).numpy().tofile(fp) - convert2cpu(bn_model.running_mean).numpy().tofile(fp) - convert2cpu(bn_model.running_var).numpy().tofile(fp) - convert2cpu(conv_model.weight.data).numpy().tofile(fp) - else: - bn_model.bias.data.numpy().tofile(fp) - bn_model.weight.data.numpy().tofile(fp) - bn_model.running_mean.numpy().tofile(fp) - bn_model.running_var.numpy().tofile(fp) - conv_model.weight.data.numpy().tofile(fp) - - -def save_conv_shrink_bn(fp, conv_model, bn_model, eps=1e-5): - if bn_model.bias.is_cuda: - bias = bn_model.bias.data - bn_model.running_mean * \ - bn_model.weight.data / torch.sqrt(bn_model.running_var + eps) - convert2cpu(bias).numpy().tofile(fp) - s = conv_model.weight.data.size() - weight = conv_model.weight.data * (bn_model.weight.data / torch.sqrt( - bn_model.running_var + eps)).view(-1, 1, 1, 1).repeat(1, s[1], s[2], s[3]) - convert2cpu(weight).numpy().tofile(fp) - else: - bias = bn_model.bias.data - bn_model.running_mean * \ - bn_model.weight.data / torch.sqrt(bn_model.running_var + eps) - bias.numpy().tofile(fp) - s = conv_model.weight.data.size() - weight = conv_model.weight.data * (bn_model.weight.data / torch.sqrt( - bn_model.running_var + eps)).view(-1, 1, 1, 1).repeat(1, s[1], s[2], s[3]) - weight.numpy().tofile(fp) - - -def load_fc(buf, start, fc_model): - num_w = fc_model.weight.numel() - num_b = fc_model.bias.numel() - fc_model.bias.data.copy_(torch.from_numpy(buf[start:start+num_b])) - start = start + num_b - fc_model.weight.data.copy_(torch.from_numpy(buf[start:start+num_w])) - start = start + num_w - return start - - -def save_fc(fp, fc_model): - fc_model.bias.data.numpy().tofile(fp) - fc_model.weight.data.numpy().tofile(fp) - - -if __name__ == '__main__': - import sys - if len(sys.argv) != 2: - print('Usage: python cfg.py model.cfg') - exit() - - blocks = parse_cfg(sys.argv[1]) - print_cfg_nicely(blocks) -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import numpy as np -from collections import OrderedDict -from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph -# from tensorflow.core.framework.node_def_pb2 import NodeDef -# from tensorflow.core.framework import attr_value_pb2 - - -class DarknetGraphNode(GraphNode): - - def __init__(self, layer): - - super(DarknetGraphNode, self).__init__(layer) - - @property - def name(self): - return self.layer['name'] - - @property - def type(self): - return self.layer['type'] - - @property - def dk_layer(self): - return self.layer - - def get_attr(self, name, default_value=None): - if name in self.layer['attr'].keys(): - return self.layer['attr'][name] - else: - return default_value - - -class DarknetGraph(Graph): - - def __init__(self, model): - # pass - - super(DarknetGraph, self).__init__(model) - self.layer_num_map = {} - self.model = model - self.weights = {} - self.original_list = OrderedDict() - - @staticmethod - def dim_str_to_int(input_dim): - if type(input_dim) == list: - return [int(i) for i in input_dim] - - @staticmethod - def conv_output_width(width, padding, kernel_size, stride): - return (width + 2*padding - kernel_size)/stride + 1 - - @staticmethod - def conv_output_height(height, padding, kernel_size, stride): - return (height + 2*padding - kernel_size)/stride + 1 - - def build(self): - - for i, block in enumerate(self.model): - # print("\n") - # print(i) - # print(block) - - # continue - node = OrderedDict() - if block['type'] == 'net': - node['name'] = 'dk_Input' - node['input'] = ['data'] - node['type'] = 'DataInput' - node['input_dim'] = ['-1'] - # NHWC - node['input_dim'].append(block['height']) - node['input_dim'].append(block['width']) - node['input_dim'].append(block['channels']) - input_param = OrderedDict() - input_param['shape'] = self.dim_str_to_int(node['input_dim']) - input_param['_output_shape'] = self.dim_str_to_int( - node['input_dim']) - node['attr'] = input_param - self.layer_map[node['name']] = DarknetGraphNode(node) - self.original_list[node['name']] = DarknetGraphNode(node) - self.layer_num_map[i] = node['name'] - pre_node_name = node['name'] - - elif block['type'] == 'convolutional': - conv_layer = OrderedDict() - conv_layer['input'] = [pre_node_name] - - input_shape = self.layer_map[pre_node_name].get_attr( - '_output_shape') - w = input_shape[1] - h = input_shape[2] - channels = input_shape[3] - # assert False - - if 'name' in block.keys(): - conv_layer['name'] = block['name'] - else: - conv_layer['name'] = 'layer%d-conv' % i - conv_layer['type'] = 'Conv' - - convolution_param = OrderedDict() - convolution_param['num_output'] = int(block['filters']) - convolution_param['kernel_size'] = int(block['size']) - convolution_param['kernel'] = [int(block['size']), int( - block['size']), channels, int(block['filters'])] - convolution_param['pad'] = int(block['pad']) - - if block['pad'] == '1': - convolution_param['padding'] = int( - convolution_param['kernel_size'])//2 - convolution_param['stride'] = int(block['stride']) - if block['batch_normalize'] == '1': - convolution_param['bias_term'] = 'false' - else: - convolution_param['bias_term'] = 'true' - output_w = self.conv_output_width( - w, convolution_param['padding'], convolution_param['kernel_size'], convolution_param['stride']) - output_h = self.conv_output_height( - h, convolution_param['padding'], convolution_param['kernel_size'], convolution_param['stride']) - convolution_param['_output_shape'] = [-1, output_w, - output_h, convolution_param['num_output']] - conv_layer['attr'] = convolution_param - self.layer_map[conv_layer['name'] - ] = DarknetGraphNode(conv_layer) - self.original_list[conv_layer['name'] - ] = DarknetGraphNode(conv_layer) - pre_node_name = conv_layer['name'] - - if block['batch_normalize'] == '1': - bn_layer = OrderedDict() - bn_layer['input'] = [pre_node_name] - - input_shape = self.layer_map[pre_node_name].get_attr( - '_output_shape') - if 'name' in block.keys(): - bn_layer['name'] = '%s-bn' % block['name'] - else: - bn_layer['name'] = 'layer%d-bn' % i - bn_layer['type'] = 'BatchNorm' - batch_norm_param = OrderedDict() - batch_norm_param['use_global_stats'] = True - batch_norm_param['_output_shape'] = convolution_param['_output_shape'] - batch_norm_param['bias_term'] = True - batch_norm_param['scale'] = True - bn_layer['attr'] = batch_norm_param - - self.layer_map[bn_layer['name'] - ] = DarknetGraphNode(bn_layer) - self.original_list[bn_layer['name'] - ] = DarknetGraphNode(bn_layer) - - pre_node_name = bn_layer['name'] - - if block['activation'] != 'linear': - relu_layer = OrderedDict() - relu_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - relu_layer['name'] = '%s-act' % block['name'] - else: - relu_layer['name'] = 'layer%d-act' % i - relu_layer['type'] = 'ReLU' - relu_param = OrderedDict() - if block['activation'] == 'leaky': - relu_layer['type'] = 'leakyReLU' - relu_param['negative_slope'] = '0.1' - relu_param['_output_shape'] = input_shape - relu_layer['attr'] = relu_param - self.layer_map[relu_layer['name'] - ] = DarknetGraphNode(relu_layer) - self.layer_num_map[i] = relu_layer['name'] - self.original_list[relu_layer['name'] - ] = DarknetGraphNode(relu_layer) - pre_node_name = relu_layer['name'] - - else: - self.layer_num_map[i] = bn_layer['name'] - - elif block['type'] == 'maxpool': - max_layer = OrderedDict() - max_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - max_layer['name'] = block['name'] - else: - max_layer['name'] = 'layer%d-maxpool' % i - max_layer['type'] = 'Pooling' - pooling_param = OrderedDict() - pooling_param['kernel_size'] = int(block['size']) - pooling_param['stride'] = int(block['stride']) - pooling_param['pool'] = 'MAX' - pooling_param['padding'] = 0 - if 'pad' in block.keys() and int(block['pad']) == 1: - pooling_param['padding'] = (int(block['size'])-1)/2 - - input_shape = self.layer_map[pre_node_name].get_attr( - '_output_shape') - w = input_shape[1] - h = input_shape[2] - output_w = ( - w + 2*pooling_param['padding'])/pooling_param['stride'] - output_h = ( - h + 2*pooling_param['padding'])/pooling_param['stride'] - - pooling_param['_output_shape'] = [-1, - output_w, output_h, input_shape[-1]] - max_layer['attr'] = pooling_param - self.layer_map[max_layer['name']] = DarknetGraphNode(max_layer) - self.original_list[max_layer['name'] - ] = DarknetGraphNode(max_layer) - self.layer_num_map[i] = max_layer['name'] - pre_node_name = max_layer['name'] - - elif block['type'] == 'avgpool': - avg_layer = OrderedDict() - - avg_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - avg_layer['name'] = block['name'] - else: - avg_layer['name'] = 'layer%d-avgpool' % i - avg_layer['type'] = 'Pooling' - pooling_param = OrderedDict() - input_shape = self.layer_map[pre_node_name].get_attr( - '_output_shape') - pooling_param['_output_shape'] = [-1, 1, 1, input_shape[-1]] - pooling_param['pool'] = 'AVG' - avg_layer['attr'] = pooling_param - self.layer_map[avg_layer['name']] = DarknetGraphNode(avg_layer) - self.original_list[avg_layer['name'] - ] = DarknetGraphNode(avg_layer) - self.layer_num_map[i] = avg_layer['name'] - pre_node_name = avg_layer['name'] - - elif block['type'] == 'route': - prev = block['layers'].split(',') # [-1,61] - if len(prev) == 1: - prev_layer_id = i + int(prev[0]) - self.layer_num_map[i] = self.layer_num_map[prev_layer_id] - pre_node_name = self.layer_num_map[i] - elif len(prev) == 2: - input_list = [] - input_shape = [] - route_layer = OrderedDict() - for p in prev: - if int(p) > 0: - - input_name = self.layer_num_map[int(p)+1] - input_list.append(input_name) - input_shape.append( - self.layer_map[input_name].get_attr('_output_shape')) - - else: - prev_layer_id = i + int(p) - input_name = self.layer_num_map[prev_layer_id] - input_shape.append( - self.layer_map[input_name].get_attr('_output_shape')) - input_list.append(input_name) - route_param = OrderedDict() - - shape_ = 0 - for shape in input_shape: - shape_ += shape[-1] - route_param['axis'] = 3 - route_param['_output_shape'] = input_shape[0][:-1] + [shape_] - route_layer['input'] = input_list - - if 'name' in block.keys(): - route_layer['name'] = block['name'] - else: - route_layer['name'] = 'layer%d-concat' % i - - route_layer['type'] = 'Concat' - route_layer['attr'] = route_param - - self.layer_map[route_layer['name'] - ] = DarknetGraphNode(route_layer) - self.original_list[route_layer['name'] - ] = DarknetGraphNode(route_layer) - self.layer_num_map[i] = route_layer['name'] - pre_node_name = route_layer['name'] - - elif block['type'] == 'shortcut': - prev_layer_id1 = i + int(block['from']) - prev_layer_id2 = i - 1 - bottom1 = self.layer_num_map[prev_layer_id1] - bottom2 = self.layer_num_map[prev_layer_id2] - input_shape = self.layer_map[bottom2].get_attr('_output_shape') - shortcut_layer = OrderedDict() - shortcut_layer['input'] = [bottom1, bottom2] - # print(shortcut_layer['input'] ) - if 'name' in block.keys(): - shortcut_layer['name'] = block['name'] - else: - shortcut_layer['name'] = 'layer%d-shortcut' % i - shortcut_layer['type'] = 'Add' - eltwise_param = OrderedDict() - eltwise_param['operation'] = 'SUM' - eltwise_param['_output_shape'] = input_shape - shortcut_layer['attr'] = eltwise_param - - self.layer_map[shortcut_layer['name'] - ] = DarknetGraphNode(shortcut_layer) - self.original_list[shortcut_layer['name'] - ] = DarknetGraphNode(shortcut_layer) - self.layer_num_map[i] = shortcut_layer['name'] - pre_node_name = shortcut_layer['name'] - - if block['activation'] != 'linear': - relu_layer = OrderedDict() - relu_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - relu_layer['name'] = '%s-act' % block['name'] - else: - relu_layer['name'] = 'layer%d-act' % i - relu_layer['type'] = 'ReLU' - relu_param = OrderedDict() - relu_param['_output_shape'] = input_shape - if block['activation'] == 'leaky': - - relu_param['negative_slope'] = '0.1' - - relu_layer['attr'] = relu_param - self.layer_map[relu_layer['name'] - ] = DarknetGraphNode(relu_layer) - self.original_list[relu_layer['name'] - ] = DarknetGraphNode(relu_layer) - pre_node_name = relu_layer['name'] - - elif block['type'] == 'connected': - fc_layer = OrderedDict() - fc_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - fc_layer['name'] = block['name'] - else: - fc_layer['name'] = 'layer%d-fc' % i - fc_layer['type'] = 'InnerProduct' - fc_param = OrderedDict() - fc_param['num_output'] = int(block['output']) - input_shape = self.layer_map[pre_node_name].get_attr( - '_output_shape') - fc_param['_output_shape'] = input_shape[:-1] + \ - [fc_param['num_output']] - fc_layer['attr'] = fc_param - self.layer_map[fc_layer['name']] = DarknetGraphNode(fc_layer) - self.original_list[fc_layer['name'] - ] = DarknetGraphNode(fc_layer) - self.layer_num_map[i] = fc_layer['name'] - pre_node_name = fc_layer['name'] - - if block['activation'] != 'linear': - relu_layer = OrderedDict() - relu_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - relu_layer['name'] = '%s-act' % block['name'] - else: - relu_layer['name'] = 'layer%d-act' % i - relu_layer['type'] = 'ReLU' - relu_param = OrderedDict() - if block['activation'] == 'leaky': - - relu_param['negative_slope'] = '0.1' - relu_param['_output_shape'] = fc_param['_output_shape'] - relu_layer['attr'] = relu_param - self.layer_map[relu_layer['name'] - ] = DarknetGraphNode(relu_layer) - self.original_list[relu_layer['name'] - ] = DarknetGraphNode(relu_layer) - pre_node_name = relu_layer['name'] - - elif block['type'] == 'softmax': - sm_layer = OrderedDict() - - sm_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - sm_layer['name'] = block['name'] - else: - sm_layer['name'] = 'layer%d-softmax' % i - sm_layer['type'] = 'Softmax' - softmax_param = OrderedDict() - input_shape = self.layer_map[pre_node_name].get_attr( - '_output_shape') - softmax_param['_output_shape'] = input_shape - sm_layer['attr'] = softmax_param - self.layer_map[sm_layer['name']] = DarknetGraphNode(sm_layer) - self.original_list[sm_layer['name'] - ] = DarknetGraphNode(sm_layer) - self.layer_num_map[i] = sm_layer['name'] - pre_node_name = sm_layer['name'] - - elif block['type'] == 'yolo': - - yolo_layer = OrderedDict() - yolo_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - yolo_layer['name'] = block['name'] - else: - yolo_layer['name'] = 'layer%d-yolo' % i - yolo_layer['type'] = 'yolo' - yolo_param = OrderedDict() - yolo_param['truth_thresh'] = float(block['truth_thresh']) - yolo_param['random'] = float(block['random']) - yolo_param['ignore_thresh'] = float(block['ignore_thresh']) - yolo_param['jitter'] = float(block['jitter']) - yolo_param['num'] = int(block['num']) - yolo_param['classes'] = int(block['classes']) - anchors = [int(t) for t in block['anchors'].split(',')] - yolo_param['anchors'] = anchors - mask = [int(t) for t in block['mask'].split(',')] - yolo_param['mask'] = mask - - yolo_layer['attr'] = yolo_param - self.layer_map[yolo_layer['name'] - ] = DarknetGraphNode(yolo_layer) - self.original_list[yolo_layer['name'] - ] = DarknetGraphNode(yolo_layer) - self.layer_num_map[i] = yolo_layer['name'] - - elif block['type'] == 'upsample': - - input_shape = self.layer_map[pre_node_name].get_attr( - '_output_shape') - upsample_layer = OrderedDict() - upsample_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - upsample_layer['name'] = block['name'] - else: - upsample_layer['name'] = 'layer%d-upsample' % i - upsample_layer['type'] = 'upsample' - upsample_param = OrderedDict() - stride = block['stride'] - upsample_param['scales'] = [int(stride), int(stride)] - upsample_param['_output_shape'] = [ - input_shape[0]] + [q*int(stride) for q in input_shape[1:3]] + [input_shape[-1]] - upsample_layer['attr'] = upsample_param - self.layer_map[upsample_layer['name'] - ] = DarknetGraphNode(upsample_layer) - self.original_list[upsample_layer['name'] - ] = DarknetGraphNode(upsample_layer) - self.layer_num_map[i] = upsample_layer['name'] - pre_node_name = upsample_layer['name'] - - elif block['type'] == 'cost': - continue - - # spacetodepth - elif block['type'] == 'reorg': - input_shape = self.layer_map[pre_node_name].get_attr( - '_output_shape') - reorg_layer = OrderedDict() - reorg_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - reorg_layer['name'] = block['name'] - else: - reorg_layer['name'] = 'layer%d-reorg' % i - - reorg_layer['type'] = 'SpaceToDepth' - reorg_param = OrderedDict() - stride = int(block['stride']) - reorg_param['strides'] = stride - reorg_param['_output_shape'] = [-1, input_shape[1]/stride, - input_shape[2]/stride, input_shape[3]*stride*stride] - reorg_layer['attr'] = reorg_param - - self.layer_map[reorg_layer['name'] - ] = DarknetGraphNode(reorg_layer) - self.original_list[reorg_layer['name'] - ] = DarknetGraphNode(reorg_layer) - self.layer_num_map[i] = reorg_layer['name'] - pre_node_name = reorg_layer['name'] - - elif block['type'] == 'region': - # print(block) - region_layer = OrderedDict() - region_layer['input'] = [pre_node_name] - if 'name' in block.keys(): - region_layer['name'] = block['name'] - else: - region_layer['name'] = 'layer%d-region' % i - region_layer['type'] = 'region' - region_param = OrderedDict() - region_param['softmax'] = int(block['softmax']) - region_param['thresh'] = float(block['thresh']) - region_param['random'] = float(block['random']) - region_param['jitter'] = float(block['jitter']) - region_param['num'] = int(block['num']) - region_param['classes'] = int(block['classes']) - region_param['coords'] = int(block['coords']) - region_param['rescore'] = int(block['rescore']) - region_param['object_scale'] = int(block['object_scale']) - - region_param['noobject_scale'] = int(block['noobject_scale']) - region_param['class_scale'] = int(block['class_scale']) - region_param['coord_scale'] = int(block['coord_scale']) - - region_param['bias_match'] = int(block['bias_match']) - region_param['absolute'] = int(block['absolute']) - - anchors = [float(t) for t in block['anchors'].split(',')] - region_param['anchors'] = anchors - - region_layer['attr'] = region_param - # print(region_layer) - self.layer_map[region_layer['name'] - ] = DarknetGraphNode(region_layer) - self.original_list[region_layer['name'] - ] = DarknetGraphNode(region_layer) - self.layer_num_map[i] = region_layer['name'] - # assert False - - else: - print('unknown layer type %s ' % block['type']) - print(block, "\n") - assert False - - for layer in self.layer_map: - for pred in self.layer_map[layer].layer['input']: - if pred not in self.layer_map.keys() and pred != 'data': - print(pred) - print("::::::::::::: unknown input :::::::::::::") - assert False - - self._make_connection(pred, layer) - - super(DarknetGraph, self).build() - -import os -import numpy as np - -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.darknet.prototxt import * -from mmdnn.conversion.darknet.darknet_utils import * - -from mmdnn.conversion.darknet.darknet_graph import DarknetGraph -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.DataStructure.parser import Parser -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType - - -class DarknetParser(Parser): - - dtype_map = { - 0: graph_pb2.DT_UNDEFINED, - np.float32: graph_pb2.DT_FLOAT32, - np.float64: graph_pb2.DT_FLOAT64, - 3: graph_pb2.DT_INT32, - 4: graph_pb2.DT_UINT8, - 5: graph_pb2.DT_INT16, - 6: graph_pb2.DT_INT8, - 7: graph_pb2.DT_STRING, - 9: graph_pb2.DT_INT64 - } - - @property - def src_graph(self): - return self.dk_graph - - def __init__(self, model_config, weightfile, yolo): - super(DarknetParser, self).__init__() - - if not os.path.exists(model_config): - raise ValueError( - 'Darknet model config [{}] can not be found!'.format(model_config)) - - if weightfile: - self.weight_loaded = True - - fp = open(weightfile, 'rb') - header = np.fromfile(fp, count=4, dtype=np.int32) - self.buf = np.fromfile(fp, dtype=np.float32) - print("weights buf size: {}".format(self.buf.size)) - - fp.close() - - # yolo3(608) start at 1, yolo2(608) start at 0. yolo2(416) start at 1, yolo3(416) start at 0 - if yolo == "1": - self.start = 1 # yolov3 - else: - self.start = 0 # yolov2 - - model = parse_cfg(model_config) - self.dk_graph = DarknetGraph(model) - self.dk_graph.build() - - def gen_IR(self): - - # load weight by original order - for layer in self.dk_graph.original_list: - - current_node = self.dk_graph.get_node(layer) - node_type = current_node.type - # print(node_type) - if hasattr(self, "rename_" + node_type): - func = getattr(self, "rename_" + node_type) - func(current_node) - else: - self.rename_UNKNOWN(current_node) - - print("loaded weights buf size: {}".format(self.start)) - - @staticmethod - def _copy_and_reop(source_node, IR_node, new_op=None): - if new_op == None: - new_op = source_node.type - IR_node.name = source_node.name - IR_node.op = new_op - - if '_output_shape' in source_node.layer['attr'].keys(): - output_list = source_node.layer['attr']['_output_shape'] - shape = graph_pb2.TensorShape() - for dim in output_list: - new_dim = shape.dim.add() - if dim == None: - new_dim.size = -1 - else: - new_dim.size = int(dim) - - IR_node.attr["_output_shape"].list.shape.extend([shape]) - - if 'shape' in source_node.layer['attr'].keys(): - shape_list = source_node.layer['attr']['shape'] - if not output_list == None: - for dim in shape_list: - new_dim = IR_node.attr["shape"].shape.dim.add() - if dim == None: - new_dim.size = -1 - else: - new_dim.size = int(dim) - else: - IR_node.attr["shape"].shape.unknown_rank = True - - def _convert_inedge(self, source_node, IR_node, start_idx=0, end_idx=None): - if end_idx == None: - end_idx = len(source_node.in_edges) - for idx in range(start_idx, end_idx): - IR_node.input.append(self.src_graph.get_node( - source_node.in_edges[idx]).real_name) - - def _convert_identity_operation(self, source_node, start_idx=0, end_idx=None, new_op=None): - IR_node = self.IR_graph.node.add() - DarknetParser._copy_and_reop(source_node, IR_node, new_op) - self._convert_inedge(source_node, IR_node, start_idx, end_idx) - return IR_node - - def rename_UNKNOWN(self, source_node): - print(source_node.layer) - print("Darknet has not supported operator [%s] with name [%s]." - % (source_node.type, source_node.name)) - assert False - - def rename_DataInput(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='DataInput') - # print(IR_node) - # assert False - - def rename_Conv(self, source_node): - """ - weights: name_weights, name_bias - """ - IR_node = self._convert_identity_operation(source_node, new_op='Conv') - kwargs = {} - - # strides - stride = source_node.get_attr('stride') - kwargs['strides'] = [1, stride, stride, 1] - - innode = self.dk_graph.get_node(source_node.in_edges[0]) - input_shape = innode.get_attr('_output_shape') - - # assert False - kwargs['kernel_shape'] = source_node.get_attr('kernel') - - # padding - if source_node.get_attr('pad'): - kwargs['auto_pad'] = "SAME" - padding = source_node.get_attr('padding') - kwargs['pads'] = [0, padding, padding, 0, 0, padding, padding, 0] - else: - kwargs['auto_pad'] = "VALID" - - # only load weight conv - - if source_node.get_attr('bias_term') == 'true': - kwargs['use_bias'] = True - - kernel = kwargs['kernel_shape'] - kernel = np.zeros([kernel[-1], kernel[-2], kernel[0], kernel[1]]) - k_bias = np.zeros(kwargs['kernel_shape'][-1]) - - conv_name = source_node.name - - # print("----------------",self.start) - # print(kernel.shape) - # print(k_bias.shape) - - b = np.reshape( - self.buf[self.start:self.start+k_bias.size], k_bias.shape) - self.start = self.start + k_bias.size - self.set_weight(conv_name, 'bias', b) - - W = np.reshape( - self.buf[self.start:self.start+kernel.size], kernel.shape) - self.start = self.start + kernel.size - W = np.transpose(W, (2, 3, 1, 0)) - self.set_weight(conv_name, 'weights', W) - else: - kwargs['use_bias'] = False - - assign_IRnode_values(IR_node, kwargs) - - def rename_BatchNorm(self, source_node): - - IR_node = self._convert_identity_operation( - source_node, new_op='BatchNorm') - kwargs = {} - IR_node.attr['use_global_stats'].b = source_node.get_attr( - 'use_global_stats') - IR_node.attr['bias'].b = source_node.get_attr('use_global_stats') - IR_node.attr['scale'].b = source_node.get_attr('use_global_stats') - IR_node.attr['epsilon'].f = 1e-5 - - assign_IRnode_values(IR_node, kwargs) - - innode = self.dk_graph.get_node(source_node.in_edges[0]) - input_shape = innode.get_attr('_output_shape') - kernel = innode.get_attr('kernel') - kernel = np.zeros([kernel[-1], kernel[-2], kernel[0], kernel[1]]) - - # buf, start, scale_layer['name'], bn_layer['name'], conv_layer['name'] - # print("==============",self.start) - bias = np.zeros(input_shape[-1]) - scale = np.zeros(input_shape[-1]) - mean = np.zeros(input_shape[-1]) - var = np.zeros(input_shape[-1]) - # print(bias.shape) - # print(scale.shape) - # print(mean.shape) - # print(var.shape) - # print(kernel.shape) - - bias_content = np.reshape( - self.buf[self.start:self.start+bias.size], bias.shape) - self.start = self.start + bias.size - self.set_weight(source_node.name, 'bias', bias_content) - - scale_content = np.reshape( - self.buf[self.start:self.start+scale.size], scale.shape) - self.start = self.start + scale.size - self.set_weight(source_node.name, 'scale', scale_content) - - mean_content = np.reshape( - self.buf[self.start:self.start+mean.size], mean.shape) - self.start = self.start + mean.size - self.set_weight(source_node.name, 'mean', mean_content) - - var_content = np.reshape( - self.buf[self.start:self.start+var.size], var.shape) - self.start = self.start + var.size - self.set_weight(source_node.name, 'var', var_content) - - W = np.reshape( - self.buf[self.start:self.start+kernel.size], kernel.shape) - self.start = self.start + kernel.size - W = np.transpose(W, (2, 3, 1, 0)) - # print(W) - # assert False - self.set_weight(innode.name, 'weights', W) - - # no use - - def rename_ReLU(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Relu') - - def rename_leakyReLU(self, source_node): - # print(source_node.layer) - kwargs = {} - kwargs['alpha'] = float(source_node.get_attr('negative_slope')) - IR_node = self._convert_identity_operation( - source_node, new_op='LeakyRelu') - assign_IRnode_values(IR_node, kwargs) - - def rename_Pooling(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Pool') - kwargs = {} - if source_node.get_attr('pool') == 'MAX': - kernel = source_node.get_attr('kernel_size') - kwargs['kernel_shape'] = [1, kernel, kernel, 1] - stride = source_node.get_attr('stride') - kwargs['strides'] = [1, stride, stride, 1] - kwargs['pooling_type'] = source_node.get_attr('pool') - pad = source_node.get_attr('padding') - IR_node.attr["pads"].list.i.extend(([0]+[pad, pad]+[0])*2) - - # for image classification(resnet) AVG pooling - else: - print(source_node.layer) - innode = self.dk_graph.get_node(source_node.in_edges[0]) - input_shape = innode.get_attr('_output_shape') - kwargs['kernel_shape'] = [1] + input_shape[1:2] + [1] - kwargs['strides'] = [1, 1, 1, 1] - - kwargs['pooling_type'] = source_node.get_attr('pool') - IR_node.attr["pads"].list.i.extend(([0, 0, 0, 0])*2) - - assign_IRnode_values(IR_node, kwargs) - - def rename_yolo(self, source_node): - # print(source_node.layer) - IR_node = self._convert_identity_operation(source_node, new_op='yolo') - kwargs = {} - kwargs['truth_thresh'] = source_node.get_attr('truth_thresh') - kwargs['random'] = source_node.get_attr('random') - kwargs['ignore_thresh'] = source_node.get_attr('ignore_thresh') - kwargs['jitter'] = source_node.get_attr('jitter') - kwargs['num'] = source_node.get_attr('num') - kwargs['classes'] = source_node.get_attr('classes') - kwargs['anchors'] = source_node.get_attr('anchors') - kwargs['mask'] = source_node.get_attr('mask') - assign_IRnode_values(IR_node, kwargs) - - def rename_Concat(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Concat') - IR_node.attr["axis"].i = int(source_node.get_attr("axis", "1")) - - def rename_upsample(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='UpSampling2D') - scales = source_node.get_attr('scales') - kwargs = {} - kwargs['scales'] = scales - - assign_IRnode_values(IR_node, kwargs) - - def rename_Add(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Add') - - def rename_SpaceToDepth(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='SpaceToDepth') - stride = source_node.get_attr('strides') - kwargs = {} - kwargs['blocksize'] = stride - - assign_IRnode_values(IR_node, kwargs) - - def rename_InnerProduct(self, source_node): - print(source_node.layer) - assert False - - def rename_region(self, source_node): - # print(source_node.layer) - IR_node = self._convert_identity_operation( - source_node, new_op='region') - kwargs = {} - kwargs['thresh'] = source_node.get_attr('thresh') - kwargs['random'] = source_node.get_attr('random') - # kwargs['ignore_thresh'] = source_node.get_attr('ignore_thresh') - kwargs['jitter'] = source_node.get_attr('jitter') - kwargs['num'] = source_node.get_attr('num') - kwargs['classes'] = source_node.get_attr('classes') - - kwargs['softmax'] = source_node.get_attr('softmax') - kwargs['coords'] = source_node.get_attr('coords') - kwargs['rescore'] = source_node.get_attr('rescore') - # print(source_node.get_attr('anchors')) - kwargs['anchors'] = source_node.get_attr('anchors') - # kwargs['anchors'] = ['0.52','0.22'] - # kwargs['mask'] = source_node.get_attr('mask') - kwargs['object_scale'] = source_node.get_attr('object_scale') - kwargs['noobject_scale'] = source_node.get_attr('noobject_scale') - kwargs['class_scale'] = source_node.get_attr('class_scale') - kwargs['coord_scale'] = source_node.get_attr('coord_scale') - - kwargs['bias_match'] = source_node.get_attr('bias_match') - kwargs['absolute'] = source_node.get_attr('absolute') - assign_IRnode_values(IR_node, kwargs) - - def rename_Softmax(self, source_node): - IR_node = self._convert_identity_operation(source_node) - -import numpy as np -from collections import OrderedDict -from mmdnn.conversion.darknet.cfg import * -from collections import OrderedDict - - -def parse_prototxt(protofile): - def line_type(line): - if line.find(':') >= 0: - return 0 - elif line.find('{') >= 0: - return 1 - return -1 - - def parse_block(fp): - block = OrderedDict() - line = fp.readline().strip() - while line != '}': - ltype = line_type(line) - if ltype == 0: # key: value - # print line - line = line.split('#')[0] - key, value = line.split(':') - key = key.strip() - value = value.strip().strip('"') - if key in block.keys(): - if type(block[key]) == list: - block[key].append(value) - else: - block[key] = [block[key], value] - else: - block[key] = value - elif ltype == 1: # blockname { - key = line.split('{')[0].strip() - sub_block = parse_block(fp) - block[key] = sub_block - line = fp.readline().strip() - line = line.split('#')[0] - return block - - fp = open(protofile, 'r') - props = OrderedDict() - layers = [] - line = fp.readline() - while line != '': - line = line.strip().split('#')[0] - if line == '': - line = fp.readline() - continue - ltype = line_type(line) - if ltype == 0: # key: value - key, value = line.split(':') - key = key.strip() - value = value.strip().strip('"') - if key in props.keys(): - if type(props[key]) == list: - props[key].append(value) - else: - props[key] = [props[key], value] - else: - props[key] = value - elif ltype == 1: # blockname { - key = line.split('{')[0].strip() - if key == 'layer': - layer = parse_block(fp) - layers.append(layer) - else: - props[key] = parse_block(fp) - line = fp.readline() - - if len(layers) > 0: - net_info = OrderedDict() - net_info['props'] = props - net_info['layers'] = layers - return net_info - else: - return props - - -def is_number(s): - try: - float(s) - return True - except ValueError: - return False -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import - - -class base_extractor(object): - - def __init__(self): - pass - - @classmethod - def help(cls): - print('Support frameworks: {}'.format( - list(cls.architecture_map.keys()))) - - @classmethod - def sanity_check(cls, architecture): - if architecture is None: - cls.help() - return False - - elif not architecture in cls.architecture_map: - cls.help() - raise ValueError( - "Unknown pretrained model name [{}].".format(architecture)) - - else: - return True - - @classmethod - def download(cls, architecture): - raise NotImplementedError() - - @classmethod - def inference(cls, image_path): - raise NotImplementedError() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import -import argparse -import numpy as np -from six import text_type as _text_type -from PIL import Image - - -class TestKit(object): - - truth = { - 'caffe': { - 'alexnet': [(821, 0.25088307), (657, 0.20857951), (744, 0.096812263), (595, 0.066312768), (847, 0.053720973)], - 'vgg19': [(21, 0.37522122), (144, 0.28500062), (23, 0.099720284), (134, 0.036305398), (22, 0.033559237)], - 'inception_v1': [(21, 0.93591732), (23, 0.037170019), (22, 0.014315935), (128, 0.005050648), (749, 0.001965977)], - 'resnet152': [(144, 0.93159181), (23, 0.033074539), (21, 0.028599562), (99, 0.001878676), (146, 0.001557963)], - 'squeezenet': [(21, 0.5285601), (128, 0.071685813), (144, 0.064104252), (416, 0.050044473), (22, 0.049522042)] - }, - 'tensorflow': { - 'vgg19': [(21, 11.285443), (144, 10.240093), (23, 9.1792336), (22, 8.1113129), (128, 8.1065922)], - 'resnet': [(22, 11.756789), (147, 8.5718527), (24, 6.1751032), (88, 4.3121386), (141, 4.1778097)], - 'resnet_v1_101': [(21, 14.384739), (23, 14.262486), (144, 14.068737), (94, 12.17205), (134, 12.064575)], - 'resnet_v2_50': [(22, 12.202208), (145, 7.9816318), (24, 7.6646066), (147, 7.008089), (88, 5.180175)], - 'resnet_v2_152': [(22, 13.370557), (147, 8.807369), (24, 5.702235), (90, 5.6126657), (95, 4.8026266)], - 'inception_v1': [(22, 9.2353525), (95, 6.9062357), (24, 6.693231), (750, 6.494348), (145, 6.054538)], - 'inception_v3': [(22, 9.4921198), (24, 4.0932288), (25, 3.700398), (23, 3.3715961), (147, 3.3620636)], - 'mobilenet_v1_1.0': [(22, 16.223597), (24, 14.54775), (147, 13.173758), (145, 11.36431), (728, 11.083847)], - 'mobilenet_v2_1.0_224': [(22, 9.384777), (147, 5.865254), (23, 5.5761757), (750, 5.0572333), (132, 4.865659)] - - }, - 'keras': { - 'vgg16': [(21, 0.81199354), (562, 0.019326132), (23, 0.018279659), (144, 0.012460723), (22, 0.012429929)], - 'vgg19': [(21, 0.37522098), (144, 0.28500044), (23, 0.099720411), (134, 0.036305476), (22, 0.033559218)], - 'inception_v3': [(21, 0.91967654), (23, 0.0029040477), (24, 0.0020232804), (146, 0.0019062747), (22, 0.0017500133)], - 'xception': [(21, 0.67462814), (23, 0.063138723), (87, 0.028424012), (89, 0.02484037), (88, 0.0062591862)], - 'mobilenet': [(21, 0.7869994), (23, 0.14728773), (146, 0.037277445), (144, 0.0061039869), (727, 0.0046111974)], - 'resnet': [(144, 0.80301273), (23, 0.067478567), (21, 0.046560187), (562, 0.037413299), (146, 0.015967956)], - 'inception_resnet_v2': [(21, 0.93837249), (87, 0.0021177295), (146, 0.0019775454), (23, 0.00072135136), (24, 0.00056668324)], - 'densenet': [(21, 0.86279225), (146, 0.051543437), (23, 0.030489875), (144, 0.028583106), (141, 0.003564599)], - 'nasnet': [(21, 0.8541155), (22, 0.0030572189), (146, 0.0026522065), (23, 0.0020259875), (88, 0.0020091296)] - }, - 'mxnet': { - 'vgg19': [(21, 0.54552644), (144, 0.19179004), (23, 0.066389613), (22, 0.022819581), (128, 0.02271222)], - 'resnet': [(21, 0.84012794), (144, 0.097428247), (23, 0.039757393), (146, 0.010432643), (99, 0.0023797606)], - 'squeezenet': [(21, 0.36026478), (128, 0.084114805), (835, 0.07940048), (144, 0.057378717), (749, 0.053491514)], - 'inception_bn': [(21, 0.84332663), (144, 0.041747514), (677, 0.021810319), (973, 0.02054958), (115, 0.008529461)], - 'resnet152-11k': [(1278, 0.49073416), (1277, 0.21393695), (282, 0.12980066), (1282, 0.0663582), (1224, 0.022041745)], - 'imagenet1k-resnext-101-64x4d': [(21, 0.587986), (23, 0.29983738), (862, 0.044453762), (596, 0.00983246), (80, 0.00465048)], - 'imagenet1k-resnext-50': [(396, 0.7104751), (398, 0.122665755), (438, 0.06391319), (440, 0.029796895), (417, 0.019492012)], - 'resnext': [(21, 0.58798772), (23, 0.29983655), (862, 0.044453178), (596, 0.0098323636), (80, 0.0046504852)] - }, - 'pytorch': { - 'resnet18': [(394, 10.310125), (395, 9.2285385), (21, 8.9611788), (144, 8.3729601), (749, 7.9692998)], - 'resnet152': [(21, 13.080057), (141, 12.32998), (94, 9.8761454), (146, 9.3761511), (143, 8.9194641)], - 'vgg19': [(821, 8.4734678), (562, 8.3472366), (835, 8.2712851), (749, 7.792901), (807, 6.6604013)], - }, - - 'cntk': { - 'alexnet': [(836, 7.5413785), (837, 7.076382), (84, 6.9632936), (148, 6.90293), (416, 6.571906)], - 'resnet18': [(21, 8.2490816), (22, 7.7600741), (23, 7.4341722), (148, 7.1398726), (144, 6.9187264)], - 'resnet152': [(21, 12.461424), (99, 12.38283), (144, 11.1572275), (94, 10.569823), (146, 10.096423)], - 'inception_v3': [(21, 15.558625), (22, 9.7712708), (23, 9.6847782), (146, 9.188818), (144, 8.0436306)] - }, - 'coreml': { - 'mobilenet': [], - }, - - 'darknet': { - 'yolov3': [], - }, - - } - - preprocess_func = { - 'caffe': { - 'alexnet': lambda path: TestKit.ZeroCenter(path, 227, True), - 'vgg19': lambda path: TestKit.ZeroCenter(path, 224, True), - 'inception_v1': lambda path: TestKit.ZeroCenter(path, 224, True), - 'resnet152': lambda path: TestKit.ZeroCenter(path, 224, True), - 'squeezenet': lambda path: TestKit.ZeroCenter(path, 227), - 'inception_v4': lambda path: TestKit.Standard(path, 299, True), - 'xception': lambda path: TestKit.Standard(path, 299, True), - 'voc-fcn8s': lambda path: TestKit.ZeroCenter(path, 500, True), - 'voc-fcn16s': lambda path: TestKit.ZeroCenter(path, 500, True), - 'voc-fcn32s': lambda path: TestKit.ZeroCenter(path, 500, True), - }, - - 'tensorflow': { - 'vgg16': lambda path: TestKit.ZeroCenter(path, 224), - 'vgg19': lambda path: TestKit.ZeroCenter(path, 224), - 'inception_v1': lambda path: TestKit.Standard(path, 224), - 'inception_v3': lambda path: TestKit.Standard(path, 299), - 'resnet': lambda path: TestKit.Standard(path, 299), - 'resnet_v1_50': lambda path: TestKit.ZeroCenter(path, 224), - 'resnet_v1_101': lambda path: TestKit.ZeroCenter(path, 224), - 'resnet_v1_152': lambda path: TestKit.ZeroCenter(path, 224), - 'resnet_v2_50': lambda path: TestKit.Standard(path, 299), - 'resnet_v2_101': lambda path: TestKit.Standard(path, 299), - 'resnet_v2_152': lambda path: TestKit.Standard(path, 299), - 'resnet_v2_200': lambda path: TestKit.Standard(path, 299), - 'resnet152': lambda path: TestKit.Standard(path, 299), - 'mobilenet_v1_1.0': lambda path: TestKit.Standard(path, 224), - 'mobilenet_v1_0.50': lambda path: TestKit.Standard(path, 224), - 'mobilenet_v1_0.25': lambda path: TestKit.Standard(path, 224), - 'mobilenet': lambda path: TestKit.Standard(path, 224), - 'mobilenet_v2_1.0_224': lambda path: TestKit.Standard(path, 224), - 'nasnet-a_large': lambda path: TestKit.Standard(path, 331), - 'inception_resnet_v2': lambda path: TestKit.Standard(path, 299), - 'facenet': lambda path: TestKit.Standard(path, 160), - 'rnn': lambda path: TestKit.RNN(path), - }, - - 'keras': { - 'vgg16': lambda path: TestKit.ZeroCenter(path, 224, True), - 'vgg19': lambda path: TestKit.ZeroCenter(path, 224, True), - 'inception_v3': lambda path: TestKit.Standard(path, 299), - 'resnet50': lambda path: TestKit.ZeroCenter(path, 224, True), - 'xception': lambda path: TestKit.Standard(path, 299), - 'mobilenet': lambda path: TestKit.Standard(path, 224), - 'inception_resnet_v2': lambda path: TestKit.Standard(path, 299), - 'densenet': lambda path: TestKit.Standard(path, 224), - 'nasnet': lambda path: TestKit.Standard(path, 331), - 'yolo2-tiny': lambda path: TestKit.Identity(path, 416), - 'yolo2': lambda path: TestKit.Identity(path, 416), - }, - - 'mxnet': { - 'vgg16': lambda path: TestKit.ZeroCenter(path, 224, False), - 'vgg19': lambda path: TestKit.ZeroCenter(path, 224, False), - 'resnet': lambda path: TestKit.Identity(path, 224, True), - 'squeezenet_v1.0': lambda path: TestKit.ZeroCenter(path, 224, False), - 'squeezenet_v1.1': lambda path: TestKit.ZeroCenter(path, 224, False), - 'imagenet1k-inception-bn': lambda path: TestKit.Identity(path, 224, False), - 'imagenet1k-resnet-18': lambda path: TestKit.Identity(path, 224, True), - 'imagenet1k-resnet-152': lambda path: TestKit.Identity(path, 224, True), - 'resnext': lambda path: TestKit.Identity(path, 224, False), - 'imagenet1k-resnext-50': lambda path: TestKit.Identity(path, 224, False), - 'imagenet1k-resnext-101-64x4d': lambda path: TestKit.Identity(path, 224, False), - }, - - 'pytorch': { - 'alexnet': lambda path: TestKit.Standard(path, 227), - 'densenet121': lambda path: TestKit.Standard(path, 224), - 'densenet169': lambda path: TestKit.Standard(path, 224), - 'densenet161': lambda path: TestKit.Standard(path, 224), - 'densenet201': lambda path: TestKit.Standard(path, 224), - 'vgg11': lambda path: TestKit.Standard(path, 224), - 'vgg13': lambda path: TestKit.Standard(path, 224), - 'vgg16': lambda path: TestKit.Standard(path, 224), - 'vgg19': lambda path: TestKit.Standard(path, 224), - 'vgg11_bn': lambda path: TestKit.Standard(path, 224), - 'vgg13_bn': lambda path: TestKit.Standard(path, 224), - 'vgg16_bn': lambda path: TestKit.Standard(path, 224), - 'vgg19_bn': lambda path: TestKit.Standard(path, 224), - 'resnet18': lambda path: TestKit.Standard(path, 224), - 'resnet34': lambda path: TestKit.Standard(path, 224), - 'resnet50': lambda path: TestKit.Standard(path, 224), - 'resnet101': lambda path: TestKit.Standard(path, 224), - 'resnet152': lambda path: TestKit.Standard(path, 224), - 'squeezenet1_0': lambda path: TestKit.Standard(path, 224), - 'inception_v3': lambda path: TestKit.Standard(path, 299), - }, - - 'cntk': { - 'alexnet': lambda path: TestKit.Identity(path, 227), - 'resnet18': lambda path: TestKit.Identity(path, 224), - 'resnet152': lambda path: TestKit.Identity(path, 224), - 'inception_v3': lambda path: TestKit.Identity(path, 299), - }, - - - 'darknet': { - 'yolov3': lambda path: TestKit.Identity(path, 608), - 'yolov2': lambda path: TestKit.Identity(path, 608), - }, - - - 'coreml': { - 'mobilenet': lambda path: TestKit.Normalize(path, 224, 0.0170000009239, [-2.10256004333, -1.98526000977, -1.76698005199], [1.0, 1.0, 1.0], True), - 'inception_v3': lambda path: TestKit.Standard(path, 299), - 'vgg16': lambda path: TestKit.ZeroCenter(path, 224, True), - 'resnet50': lambda path: TestKit.ZeroCenter(path, 224, True), - 'tinyyolo': lambda path: TestKit.Normalize(path, 416, 0.00392156863, [0, 0, 0], [1.0, 1.0, 1.0], False), - }, - - 'paddle': { - 'resnet50': lambda path: TestKit.Standard(path, 224), - 'resnet101': lambda path: TestKit.Standard(path, 224), - 'vgg16': lambda path: TestKit.Standard(path, 224), - } - - } - - def __init__(self): - parser = argparse.ArgumentParser() - - parser.add_argument('-p', '--preprocess', - type=_text_type, help='Model Preprocess Type') - - parser.add_argument('-n', type=_text_type, default='kit_imagenet', - help='Network structure file name.') - - parser.add_argument('-s', type=_text_type, help='Source Framework Type', - choices=self.truth.keys()) - - parser.add_argument('-w', type=_text_type, required=True, - help='Network weights file name') - - parser.add_argument('--image', '-i', - type=_text_type, help='Test image path.', - default="mmdnn/conversion/examples/data/seagull.jpg" - ) - - parser.add_argument('-l', '--label', - type=_text_type, - default='mmdnn/conversion/examples/data/imagenet_1000.txt', - help='Path of label.') - - parser.add_argument('--dump', - type=_text_type, - default=None, - help='Target model path.') - - parser.add_argument('--detect', - type=_text_type, - default=None, - help='Model detection result path.') - - # tensorflow dump tag - parser.add_argument('--dump_tag', - type=_text_type, - default=None, - help='Tensorflow model dump type', - choices=['SERVING', 'TRAINING']) - - self.args = parser.parse_args() - import imp - self.MainModel = imp.load_source('MainModel', self.args.n) - - @staticmethod - def ZeroCenter(path, size, BGRTranspose=False): - img = Image.open(path) - img = img.resize((size, size)) - x = np.array(img, dtype=np.float32) - - # Reference: 1) Keras image preprocess: https://github.com/keras-team/keras/blob/master/keras/applications/imagenet_utils.py - # 2) tensorflow github issue: https://github.com/tensorflow/models/issues/517 - # R-G-B for Imagenet === [123.68, 116.78, 103.94] - - x[..., 0] -= 123.68 - x[..., 1] -= 116.779 - x[..., 2] -= 103.939 - - if BGRTranspose == True: - x = x[..., ::-1] - - return x - - @staticmethod - def Normalize(path, size=224, scale=0.0392156863, mean=[-0.485, -0.456, -0.406], std=[0.229, 0.224, 0.225], BGRTranspose=False): - img = Image.open(path) - img = img.resize((size, size)) - x = np.array(img, dtype=np.float32) - x *= scale - for i in range(0, 3): - x[..., i] += mean[i] - x[..., i] /= std[i] - if BGRTranspose == True: - x = x[..., ::-1] - return x - - @staticmethod - def Standard(path, size, BGRTranspose=False): - img = Image.open(path) - img = img.resize((size, size)) - x = np.array(img, dtype=np.float32) - x /= 255.0 - x -= 0.5 - x *= 2.0 - if BGRTranspose == True: - x = x[..., ::-1] - return x - - @staticmethod - def Identity(path, size, BGRTranspose=False): - img = Image.open(path) - img = img.resize((size, size)) - x = np.array(img, dtype=np.float32) - if BGRTranspose == True: - x = x[..., ::-1] - return x - - @staticmethod - def RNN(path): - x = np.load(path) - return x - - def preprocess(self, image_path): - func = self.preprocess_func[self.args.s][self.args.preprocess] - return func(image_path) - - def print_result(self, predict): - predict = np.squeeze(predict) - if predict.ndim == 1: - top_indices = predict.argsort()[-5:][::-1] - if predict.shape[0] == 1001 or predict.shape[0] == 1000: - if predict.shape[0] == 1000: - offset = 0 - else: - offset = 1 - - import os - if os.path.exists(self.args.label): - with open(self.args.label, 'r') as f: - labels = [l.rstrip() for l in f] - - for i in top_indices: - print(labels[i - offset], i, predict[i]) - - else: - for i in top_indices: - print(i, predict[i]) - - self.result = [(i, predict[i]) for i in top_indices] - - else: - self.result = predict - print(self.result) - - @staticmethod - def print_intermediate_result(intermediate_output, if_transpose=False): - intermediate_output = np.squeeze(intermediate_output) - - if if_transpose == True: - intermediate_output = np.transpose(intermediate_output, [2, 0, 1]) - - print(intermediate_output) - print(intermediate_output.shape) - print("Sum = %.30f" % np.sum(intermediate_output)) - print("Std = %.30f" % np.std(intermediate_output)) - - def test_truth(self): - this_truth = self.truth[self.args.s][self.args.preprocess] - for index, i in enumerate(self.result): - assert this_truth[index][0] == i[0] - assert np.isclose(this_truth[index][1], i[1], atol=1e-6) - - print("Test model [{}] from [{}] passed.".format( - self.args.preprocess, - self.args.s - )) - - def inference(self, image_path): - self.preprocess(image_path) - self.print_result() - - def dump(self, path=None): - raise NotImplementedError() - - -''' -if __name__=='__main__': - tester = TestKit() - tester.inference('examples/data/seagull.jpg') -''' -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# -*- coding: utf-8 -*- -"""Normalization layers. -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from keras.engine import Layer, InputSpec -from keras import initializers -# from keras.layers.core import Layer -from keras import backend as K - - -class Scale(Layer): - """Scale layer (2018). - - Scale the activations of the previous layer at each batch. - - # Arguments - axis: Integer, the axis that should be normalized - (typically the features axis). - For instance, after a `Conv2D` layer with - `data_format="channels_first"`, - set `axis=1` in `BatchNormalization`. - center: If True, add offset of `beta` to normalized tensor. - If False, `beta` is ignored. - scale: If True, multiply by `gamma`. - If False, `gamma` is not used. - When the next layer is linear (also e.g. `nn.relu`), - this can be disabled since the scaling - will be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - - # Output shape - Same shape as input. - - """ - - def __init__(self, - axis=-1, - center=True, - scale=True, - beta_initializer='zeros', - gamma_initializer='ones', - **kwargs): - super(Scale, self).__init__(**kwargs) - self.supports_masking = True - self.axis = axis - self.center = center - self.scale = scale - self.beta_initializer = initializers.get(beta_initializer) - self.gamma_initializer = initializers.get(gamma_initializer) - - def build(self, input_shape): - dim = input_shape[self.axis] - if dim is None: - raise ValueError('Axis ' + str(self.axis) + ' of ' - 'input tensor should have a defined dimension ' - 'but the layer received an input with shape ' + - str(input_shape) + '.') - self.input_spec = InputSpec(ndim=len(input_shape), - axes={self.axis: dim}) - shape = (dim,) - - if self.scale: - self.gamma = self.add_weight(shape=shape, - name='gamma', - initializer=self.gamma_initializer) - else: - self.gamma = None - if self.center: - self.beta = self.add_weight(shape=shape, - name='beta', - initializer=self.beta_initializer) - else: - self.beta = None - - self.built = True - - def call(self, inputs, training=None): - input_shape = K.int_shape(inputs) - # Prepare broadcasting shape. - ndim = len(input_shape) - reduction_axes = list(range(len(input_shape))) - del reduction_axes[self.axis] - broadcast_shape = [1] * len(input_shape) - broadcast_shape[self.axis] = input_shape[self.axis] - return K.reshape(self.gamma, broadcast_shape) * inputs + K.reshape(self.beta, broadcast_shape) - - def get_config(self): - config = { - 'axis': self.axis, - 'center': self.center, - 'scale': self.scale, - } - base_config = super(Scale, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import os -from six.moves import xrange - -from mmdnn.conversion.common.IR.IR_graph import IRGraph, IRGraphNode -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.emitter import Emitter -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.keras.extra_layers import Scale -from mmdnn.conversion.rewriter.folder import Folder - - -class Keras2Emitter(Emitter): - - dtype_map = { - graph_pb2.DT_FLOAT16: "float16", - graph_pb2.DT_FLOAT32: "float32", - graph_pb2.DT_FLOAT64: "float64", - graph_pb2.DT_INT16: "int16", - graph_pb2.DT_INT32: "int32", - graph_pb2.DT_INT64: "int64", - graph_pb2.DT_UINT8: "uint8", - graph_pb2.DT_UINT16: "uint16" - } - - def __init__(self, model): - super(Keras2Emitter, self).__init__() - from six import string_types as _string_types - if isinstance(model, _string_types): - network_path = model - else: - network_path = model[0] - weight_path = model[1] - self._load_weights(weight_path) - - self.IR_graph = IRGraph(network_path) - self.IR_graph.build() - self.yolo_parameter = [] - self.region_parameter = [] - self.layers_codes_count = dict() - - folder = Folder(self.IR_graph, self.weights_dict) - folder.fold() - - @property - def header_code(self): - return """import keras -from keras.models import Model -from keras import layers -import keras.backend as K -import numpy as np -from keras.layers.core import Lambda -import tensorflow as tf - - -weights_dict = dict() -def load_weights_from_file(weight_file): - try: - weights_dict = np.load(weight_file).item() - except: - weights_dict = np.load(weight_file, encoding='bytes').item() - - return weights_dict - - -def set_layer_weights(model, weights_dict): - for layer in model.layers: - if layer.name in weights_dict: - cur_dict = weights_dict[layer.name] - current_layer_parameters = list() - if layer.__class__.__name__ == "BatchNormalization": - if 'scale' in cur_dict: - current_layer_parameters.append(cur_dict['scale']) - if 'bias' in cur_dict: - current_layer_parameters.append(cur_dict['bias']) - current_layer_parameters.extend([cur_dict['mean'], cur_dict['var']]) - elif layer.__class__.__name__ == "Scale": - if 'scale' in cur_dict: - current_layer_parameters.append(cur_dict['scale']) - if 'bias' in cur_dict: - current_layer_parameters.append(cur_dict['bias']) - elif layer.__class__.__name__ == "SeparableConv2D": - current_layer_parameters = [cur_dict['depthwise_filter'], cur_dict['pointwise_filter']] - if 'bias' in cur_dict: - current_layer_parameters.append(cur_dict['bias']) - elif layer.__class__.__name__ == "Embedding": - current_layer_parameters.append(cur_dict['weights']) - else: - # rot weights - current_layer_parameters = [cur_dict['weights']] - if 'bias' in cur_dict: - current_layer_parameters.append(cur_dict['bias']) - model.get_layer(layer.name).set_weights(current_layer_parameters) - - return model - - -def KitModel(weight_file = None): - global weights_dict - weights_dict = load_weights_from_file(weight_file) if not weight_file == None else None - """ - - def gen_code(self, phase): - self.add_body(0, self.header_code) - for layer in self.IR_graph.topological_sort: - current_node = self.IR_graph.get_node(layer) - node_type = current_node.type - - if hasattr(self, "emit_" + node_type): - # print("Converting layer {}({})".format(current_node.name, node_type)) - func = getattr(self, "emit_" + node_type) - line = func(current_node) - if line: - self.add_body(1, line) - else: - print( - "KerasEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(current_node) - - self.add_body(1, "{:<15} = Model(inputs = [{}], outputs = [{}])".format( - "model", - ', '.join([self.IR_graph.get_node( - name).real_variable_name for name in self.IR_graph.input_layers if self.IR_graph.get_node(name).type != 'Const']), - ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers if self.IR_graph.get_node(name).type != 'Pack']))) - self.add_body( - 1, ["set_layer_weights(model, weights_dict)", "return model"]) - - for i in self.used_layers: - func = getattr(self, "_layer_" + i) - func() - - self.add_body(0, "") - for code in self.layers_codes.values(): - self.add_body(0, code) - - return self.body_code - - @staticmethod - def shapeToStr(shapes): - return ', '.join('%s' % i for i in filter(lambda x: x > 0, shapes)) - - def _emit_activation(self, IR_node, op, in_scope=False): - if in_scope: - code = "{:<15} = keras.activations.get('{}')({})".format( - IR_node.variable_name, - op, - self.parent_variable_name(IR_node)) - else: - code = "{:<15} = layers.Activation(name='{}', activation='{}')({})".format( - IR_node.variable_name, - IR_node.name, - op, - self.parent_variable_name(IR_node)) - - return code - - def _emit_merge(self, IR_node, func): - if len(IR_node.in_edges) == 1: - IR_node.in_edges.append(IR_node.in_edges[0]) - inputs = ', '.join('%s' % self.parent_variable_name( - IR_node, i) for i in IR_node.in_edges) - axis = ' axis = {},'.format(IR_node.get_attr( - 'axis')) if 'axis' in IR_node.layer.attr else "" - code = "{:<15} = layers.{}(name = '{}', inputs = [{}])".format( - IR_node.variable_name, - func, - IR_node.name, - inputs) - return code - - @staticmethod - def _convert_padding(padding): - padding = convert_onnx_pad_to_tf(padding)[1:-1] - - for idx, pad in enumerate(padding): - padding[idx] = tuple(pad) - padding = tuple(padding) - return padding - - def _defuse_padding(self, IR_node, in_scope=False): - auto_pad = IR_node.get_attr('auto_pad') - - if auto_pad != None and auto_pad.startswith("SAME"): - input_node = self.parent_variable_name(IR_node) - padding = 'same' - return input_node, padding - else: - - padding = IR_node.get_attr("pads") - - if padding != None: - padding = self._convert_padding(padding) - - if is_valid_padding(padding) == False: - input_node = IR_node.variable_name + '_input' - self.add_body(1, "{:<15} = layers.ZeroPadding{}D(padding = {})({})".format( - input_node, - len(padding), - padding, - self.parent_variable_name(IR_node))) - else: - input_node = self.parent_variable_name(IR_node) - else: - input_node = self.parent_variable_name(IR_node) - - # TODO - return input_node, 'valid' - # return input_node, 'same' - - def _emit_convolution(self, IR_node, conv_type): - self.used_layers.add('Conv') - # assert IR_node.get_attr('group', 1) == 1 - group = IR_node.get_attr("group", 1) - - if conv_type.endswith('Transpose'): - filters = IR_node.get_attr('kernel_shape')[-2] - else: - filters = IR_node.get_attr('kernel_shape')[-1] - - filters_str = 'filters={}'.format(filters) if not conv_type.endswith( - 'DepthwiseConv2D') else 'depth_multiplier={}'.format(filters) - # change dw from filters to 1 - - input_node, padding = self._defuse_padding(IR_node) - - dilations = IR_node.get_attr('dilations') - - if not dilations or len(dilations) == 2: - # reset the default dilation - dilations = [1] * len(IR_node.get_attr('kernel_shape')) - - code = "{:<15} = convolution(weights_dict, name='{}', input={}, group={}, conv_type='{}', {}, kernel_size={}, strides={}, dilation_rate={}, padding='{}', use_bias={})".format( - IR_node.variable_name, - IR_node.name, - input_node, - group, - conv_type, - filters_str, - tuple(IR_node.get_attr('kernel_shape')[:-2]), - tuple(IR_node.get_attr('strides')[1:-1]), - tuple(dilations[1:-1]), - padding, - IR_node.get_attr('use_bias')) - - return code - - def emit_ConvTranspose(self, IR_node, in_scope=False): - dim = len(IR_node.get_attr('kernel_shape')) - 2 - return self._emit_convolution(IR_node, 'layers.Conv{}DTranspose'.format(dim)) - - def emit_Conv(self, IR_node, in_scope=False): - dim = len(IR_node.get_attr('kernel_shape')) - 2 - return self._emit_convolution(IR_node, 'layers.Conv{}D'.format(dim)) - - ############# - # Operators # - ############# - - def emit_UNKNOWN(self, IR_node, in_scope=False): - print(IR_node.name) - - def emit_Mul(self, IR_node, in_scope=False): - - if in_scope: - code = "{:<15} = {} * {}".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - return code - - node_1 = self.IR_graph.get_node(IR_node.in_edges[0]) - node_2 = self.IR_graph.get_node(IR_node.in_edges[1]) - - if node_1.type == 'Constant' or node_2.type == 'Constant': - self.used_layers.add('Mul_Constant') - if node_1.type == 'Constant': - weight_factor = node_1.get_attr('value') - code = "{:<15} = mul_constant(weight_factor={}, layer_name= {})".format( - IR_node.variable_name, - weight_factor, - self.parent_variable_name(IR_node, [1])) - else: - weight_factor = node_2.get_attr('value') - code = "{:<15} = mul_constant(weight_factor={}, layer_name= {})".format( - IR_node.variable_name, - weight_factor, - self.parent_variable_name(IR_node)) - else: - self.used_layers.add('Mul') - code = "{:<15} = my_mul(name='{}')([{}, {}])".format( - IR_node.variable_name, - IR_node.name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - return code - - def emit_Sub(self, IR_node, in_scope=False): - if in_scope: - code = "{:<15} = {} - {}".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - return code - - self.used_layers.add('Sub') - code = "{:<15} = my_sub()({}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - - # code = self._emit_merge(IR_node, "subtract") - return code - - def emit_Add(self, IR_node, in_scope=False): - if in_scope: - code = "{:<15} = {} + {}".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - return code - - self.used_layers.add('Add') - code = "{:<15} = my_add()([{}, {}])".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - return code - - def emit_DataInput(self, IR_node, in_scope=False): - shape_str = IRGraph.shapeToStr(IR_node.IR_layer.attr["shape"].shape) - dtype_str = ", dtype = '{}'".format( - self.dtype_map[IR_node.layer.attr['dtype'].type]) if 'dtype' in IR_node.layer.attr else "" - - code = "{:<15} = layers.Input(name = '{}', shape = ({},) {})".format( - IR_node.variable_name, - IR_node.name, - shape_str, - dtype_str) - return code - - def emit_Dropout(self, IR_node, in_scope=False): - seed = 'None' - if 'seed' in IR_node.IR_layer.attr: - seed = IR_node.IR_layer.attr['seed'].i - - code = "{:<15} = layers.Dropout(name = '{}', rate = {}, seed = {})({})".format( - IR_node.variable_name, - IR_node.name, - IR_node.IR_layer.attr["keep_prob"].f, - seed, - self.parent_variable_name(IR_node)) - return code - - def emit_FullyConnected(self, IR_node, in_scope=False): - if in_scope: - code = "{:<15} = K.bias_add(K.dot({}, K.variable(weights_dict['{}']['weights'])), K.variable(weights_dict['{}']['bias']))".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name, - IR_node.name) - else: - code = "{:<15} = layers.Dense(name = '{}', units = {}, use_bias = {})({})".format( - IR_node.variable_name, - IR_node.name, - IR_node.get_attr('units'), - IR_node.get_attr('use_bias'), - self.parent_variable_name(IR_node)) - return code - - def emit_Flatten(self, IR_node, in_scope=False): - self.used_layers.add('Flatten') - code = "{:<15} = __flatten(name = '{}', input = {})".format( - IR_node.variable_name, - IR_node.name, - self.parent_variable_name(IR_node)) - return code - - def emit_Pool(self, IR_node, in_scope=False): - codes = list() - dim = len(IR_node.get_attr("strides")) - 2 - - pooling_type = IR_node.get_attr('pooling_type') - if pooling_type == "MAX": - pool_name = "MaxPooling{}D".format(dim) - elif pooling_type == "AVG": - pool_name = "AveragePooling{}D".format(dim) - else: - print(pooling_type) - assert False - - # TODO - if IR_node.layer.attr['global_pooling'].b: - - shape_str = IR_node.get_attr("shape_coreml") - if shape_str: - shape_str = ','.join([str(i) for i in shape_str]) - - codes.append("{:<15} = layers.Global{}(name = '{}')({})".format( - IR_node.variable_name+'before', - pool_name, - IR_node.name, - self.parent_variable_name(IR_node))) - - # when converting from coreml model, reshape is needed after the global pooling - codes.append("{:<15} = layers.Reshape(name = '{}', target_shape = ({},))({})".format( - IR_node.variable_name, - IR_node.name + 'reshape', - shape_str, - IR_node.variable_name+'before')) - else: - codes.append("{:<15} = layers.Global{}(name = '{}')({})".format( - IR_node.variable_name, - pool_name, - IR_node.name, - self.parent_variable_name(IR_node))) - - else: - dilations = IR_node.get_attr('dilations') - if dilations: - for e in IR_node.get_attr('dilations'): - assert e == 1 - - pool_size = IR_node.get_attr('kernel_shape')[1:-1] - - strides = IR_node.get_attr('strides')[1:-1] - padding = IR_node.get_attr('pads')[1:dim] - - if pooling_type == "AVG" and pool_size.count(pool_size[0]) == len(pool_size) and strides[0] == 1 and strides.count(strides[0]) == len(strides) and padding.count(padding[0]) == len(padding) and pool_size[0] == padding[0]*2 + 1: - pool_size = ', '.join('%s' % i for i in pool_size) - strides = ', '.join('%s' % i for i in strides) - codes.append("{:<15} = layers.{}(name = '{}', pool_size = ({}), strides = ({}), padding = '{}')({})".format( - IR_node.variable_name, - pool_name, - IR_node.name, - pool_size, - strides, - 'same', - self.parent_variable_name(IR_node) - )) - - else: - - pool_size = ', '.join('%s' % i for i in pool_size) - strides = ', '.join('%s' % i for i in strides) - input_node, padding = self._defuse_padding(IR_node) - - codes.append("{:<15} = layers.{}(name = '{}', pool_size = ({}), strides = ({}), padding = '{}')({})".format( - IR_node.variable_name, - pool_name, - IR_node.name, - pool_size, - strides, - padding, - input_node)) - return codes - - def emit_Reshape(self, IR_node, in_scope=False): - shape_str = self.shapeToStr(IR_node.IR_layer.attr["shape"].list.i) - code = "{:<15} = layers.Reshape(name = '{}', target_shape = ({},))({})".format( - IR_node.variable_name, - IR_node.name, - shape_str, - self.parent_variable_name(IR_node)) - return code - - def emit_Tanh(self, IR_node, in_scope=False): - code = self._emit_activation(IR_node, 'tanh', in_scope) - return code - - def emit_Relu(self, IR_node, in_scope=False): - code = self._emit_activation(IR_node, 'relu', in_scope) - return code - - def emit_Softmax(self, IR_node, in_scope=False): - code = self._emit_activation(IR_node, 'softmax', in_scope) - return code - - def emit_Sigmoid(self, IR_node, in_scope=False): - code = self._emit_activation(IR_node, 'sigmoid', in_scope) - return code - - def emit_Embedding(self, IR_node, in_scope=False): - - code = "{:<15} = layers.Embedding(name = '{}', input_dim = {}, output_dim = {}, mask_zero = {})({})".format( - IR_node.variable_name, - IR_node.name, - IR_node.get_attr('input_dim'), - IR_node.get_attr('output_dim'), - IR_node.get_attr('mask_zero'), - IR_node.in_edges[0]) - return code - - def emit_RNNs(self, IR_node, func): - # for Keras - if "dropout" in IR_node.IR_layer.attr: - dropout_str = ",dropout = {}, recurrent_dropout = {}".format( - IR_node.IR_layer.attr['dropout'].f, - IR_node.IR_layer.attr['recurrent_dropout'].f) - else: - dropout_str = "" - - code = "{:<15} = layers.{}(units = {}, use_bias = {} {})({})".format( - IR_node.name, - func, - IR_node.IR_layer.attr['units'].i, - IR_node.IR_layer.attr['use_bias'].b, - dropout_str, - IR_node.in_edges[0]) - - return code - - def emit_LSTM(self, IR_node, in_scope=False): - return self.emit_RNNs(IR_node, "LSTM") - - def emit_GRU(self, IR_node, in_scope=False): - return self.emit_RNNs(IR_node, "GRU") - - def emit_Concat(self, IR_node, in_scope=False): - inputs = ', '.join('%s' % self.parent_variable_name( - IR_node, s) for s in IR_node.in_edges) - if in_scope: - code = "{:<15} = K.concatenate([{}])".format( - IR_node.variable_name, - inputs) - else: - code = self._emit_merge(IR_node, "concatenate") - return code - - def emit_BatchNorm(self, IR_node, in_scope=False): - axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1 - - code = "{:<15} = layers.BatchNormalization(name = '{}', axis = {}, epsilon = {}, center = {}, scale = {})({})".format( - IR_node.variable_name, - IR_node.name, - axis, - IR_node.layer.attr['epsilon'].f, - IR_node.layer.attr['bias'].b, - IR_node.layer.attr['scale'].b, - self.parent_variable_name(IR_node)) - return code - - def emit_Scale(self, IR_node, in_scope=False): - self.used_layers.add('Scale') - axis = IR_node.layer.attr['axis'].i if 'axis' in IR_node.layer.attr else -1 - - code = "{:<15} = Scale(name = '{}', axis = {}, center = {}, scale = {})({})".format( - IR_node.variable_name, - IR_node.name, - axis, - IR_node.layer.attr['use_bias'].b, - True, - self.parent_variable_name(IR_node)) - return code - - def emit_Pad(self, IR_node, in_scope=False): - mode = IR_node.get_attr('mode', 'constant') - mode = mode.lower() - if mode == "constant": - func = "ZeroPadding" - else: - raise NotImplementedError() - - dim = len(IR_node.get_attr('pads')) // 2 - 2 - - padding = self._convert_padding(IR_node.get_attr('pads')) - code = "{:<15} = layers.{}{}D(name='{}', padding={})({})".format( - IR_node.variable_name, - func, - dim, - IR_node.name, - padding, - self.parent_variable_name(IR_node)) - return code - - def emit_Squeeze(self, IR_node, in_scope=False): - return self.emit_Flatten(IR_node) - - def emit_ReduceMean(self, IR_node, in_scope=False): - axes = ', '.join('%s' % i for i in IR_node.get_attr('axes')) - - code = "{:<15} = layers.Lambda(lambda x: K.mean(x, axis=[{}], keepdims={}))({})".format( - IR_node.variable_name, - axes, - IR_node.get_attr('keepdims'), - self.parent_variable_name(IR_node)) - return code - - def emit_LRN(self, IR_node, in_scope=False): - self.used_layers.add(IR_node.type) - code = "{:<15} = LRN(size = {}, alpha = {}, beta = {}, k = {}, name = '{}')({})".format( - IR_node.variable_name, - IR_node.get_attr('size'), - IR_node.get_attr('alpha'), - IR_node.get_attr('beta'), - IR_node.get_attr('k'), - IR_node.name, - self.parent_variable_name(IR_node)) - return code - - def emit_Split(self, IR_node, in_scope=False): - if in_scope: - axis = IR_node.get_attr('axis') - split_num = IR_node.get_attr('split') - segment_len = "K.int_shape({})[{}]//{}".format( - self.parent_variable_name(IR_node), axis, split_num) - split_str = '[' + \ - ','.join(':' for i in range(axis)) + ',{}:{},...]' - split_strs = [] - - for i in range(split_num-1): - split_strs.append(self.parent_variable_name( - IR_node)+split_str.format(str(i)+'*' + segment_len, str(i+1)+'*'+segment_len)) - - split_strs.append(self.parent_variable_name( - IR_node)+split_str.format(str(split_num-1)+'*'+segment_len, '')) - - code = "{:<15} = {}".format( - IR_node.variable_name, ', '.join(split_strs)) - - else: - self.used_layers.add(IR_node.type) - code = "{:<15} = __split(input={}, split_num={}, axis={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('split'), - IR_node.get_attr('axis')) - - return code - - def emit_Unsqueeze(self, IR_node, in_scope=False): - self.used_layers.add(IR_node.type) - - code = "{:<15} = __unsqueeze(input={}, axis={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('axes')[0]) - return code - - def emit_Constant(self, IR_node, in_scope=False): - - if in_scope: - if IR_node.get_attr('value'): - code = "{:<15} = K.constant({})".format( - IR_node.variable_name, IR_node.get_attr('value')) - else: - code = "{:<15} = K.constant(weights_dict['{}']['value'])".format( - IR_node.variable_name, IR_node.name) - return code - else: - pass - - def emit_Shape(self, IR_node, in_scope=False): - self.used_layers.add(IR_node.type) - - code = "{:<15} = __shape(input={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node)) - return code - - def emit_Fill(self, IR_node, in_scope=False): - self.used_layers.add(IR_node.type) - code = "{:<15} = __fill(input={}, value={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('value')) - - return code - - def emit_Slice(self, IR_node, in_scope=False): - # It arouses some problems: - # it can be implemented by Lambda Layer - # https://github.com/keras-team/keras/issues/890 - - self.used_layers.add(IR_node.type) - - extra_str = "" - if IR_node.get_attr('strides'): - extra_str += "strides={}".format(IR_node.get_attr('strides')) - if IR_node.get_attr('begin_mask'): - extra_str += ", begin_mask={}".format( - IR_node.get_attr('begin_mask')) - if IR_node.get_attr('end_mask'): - extra_str += ", end_mask={}".format(IR_node.get_attr('end_mask')) - if IR_node.get_attr('shrink_axis_mask'): - extra_str += ", shrink_axis_mask={}".format( - IR_node.get_attr('shrink_axis_mask')) - - code = "{:<15} = __slice({}, {}, {}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('starts'), - IR_node.get_attr('ends'), - extra_str) - return code - - def emit_Unstack(self, IR_node, in_scope=False): - self.used_layers.add(IR_node.type) - - code = "{:<15} = __unstack(input={}, num={}, axis={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('num'), - IR_node.get_attr('axis')) - return code - - def emit_Pack(self, IR_node, in_scope=False): - pass - - def emit_SeparableConv(self, IR_node, in_scope=False): - assert len(IR_node.get_attr("strides")) == 4 - return self._emit_convolution(IR_node, "layers.SeparableConv2D") - - def emit_Relu6(self, IR_node, in_scope=False): - try: - # Keras == 2.1.6 - from keras.applications.mobilenet import relu6 - str_relu6 = 'keras.applications.mobilenet.relu6' - code = "{:<15} = layers.Activation({}, name = '{}')({})".format( - IR_node.variable_name, - str_relu6, - IR_node.name, - self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name) - return code - - except: - # Keras == 2.2.2 - from keras.layers import ReLU - code = "{:<15} = layers.ReLU(6, name = '{}')({})".format( - IR_node.variable_name, - IR_node.name, - self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name) - return code - - def emit_DepthwiseConv(self, IR_node, in_scope=False): - try: - from keras.applications.mobilenet import DepthwiseConv2D - return self._emit_convolution(IR_node, 'keras.applications.mobilenet.DepthwiseConv2D') - except: - return self._emit_convolution(IR_node, 'layers.DepthwiseConv2D') - - def emit_Crop(self, IR_node, in_scope=False): - border = IR_node.get_attr('border') - rank = len(border) // 2 - cropping = [] - for idx in xrange(rank): - cropping.append(tuple([border[idx * 2], border[idx * 2 + 1]])) - - code = "{:<15} = layers.Cropping{}D(cropping={}, name='{}')({})".format( - IR_node.variable_name, - rank, - tuple(cropping), - IR_node.name, - self.parent_variable_name(IR_node)) - return code - - def emit_LeakyRelu(self, IR_node, in_scope=False): - code = "{:<15} = layers.LeakyReLU(name='{}', alpha = {})({})".format( - IR_node.variable_name, - IR_node.name, - IR_node.get_attr('alpha'), - self.parent_variable_name(IR_node)) - return code - - def emit_UpSampling2D(self, IR_node, in_scope=False): - code = "{:<15} = layers.UpSampling2D(name='{}', size= ({}), data_format = 'channels_last')({})".format( - IR_node.variable_name, - IR_node.name, - IR_node.get_attr('scales'), - self.parent_variable_name(IR_node)) - return code - - def emit_SpaceToDepth(self, IR_node, in_scope=False): - self.used_layers.add(IR_node.type) - assert IR_node.get_attr('blocksize') == 2 - # TODO: arguments won't be saved in keras export model - - blocksize = "arguments={'blocksize': %d}" % 2 - code = "{:<15} = layers.Lambda(space_to_depth, {}, name='{}')({})".format( - IR_node.variable_name, - blocksize, - IR_node.name, - self.parent_variable_name(IR_node)) - return code - - def emit_Maxmum(self, IR_node, in_scope=False): - if in_scope: - code = "{:<15} = K.maxmum({}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1]) - ) - return code - else: - return self._emit_merge(IR_node, 'Maxmum') - - def emit_Minimum(self, IR_node, in_scope=False): - if in_scope: - code = "{:<15} = K.minimum({}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1]) - ) - return code - else: - return self._emit_merge(IR_node, 'Minimum') - - def emit_yolo(self, IR_node, in_scope=False): - self.used_layers.add('Yolo') - self.yolo_parameter = [IR_node.get_attr('anchors'), - IR_node.get_attr('classes'), - IR_node.get_attr("ignore_thresh"), - IR_node.get_attr("jitter")] - code = "{:<15} = {}".format( - IR_node.variable_name, - self.parent_variable_name(IR_node)) - return code - - def emit_region(self, IR_node, in_scope=False): - self.used_layers.add('Region') - code = "{:<15} = {}".format( - IR_node.variable_name, - self.parent_variable_name(IR_node)) - self.region_parameter = [IR_node.get_attr('anchors'), - IR_node.get_attr('classes'), - IR_node.get_attr("thresh"), - IR_node.get_attr("softmax"), - IR_node.get_attr("bias_match"), - IR_node.get_attr("jitter"), - IR_node.get_attr("num"), - IR_node.get_attr("random"), - IR_node.get_attr("coords"), - IR_node.get_attr("absolute"), - IR_node.get_attr("rescore"), - IR_node.get_attr("class_scale"), - IR_node.get_attr("object_scale"), - IR_node.get_attr("noobject_scale"), - IR_node.get_attr("coord_scale"), - ] - return code - - def emit_Scope(self, IR_node, in_scope=False): - if hasattr(self, '_emit_' + IR_node.pattern): - func = getattr(self, '_emit_' + IR_node.pattern) - line = func(IR_node) - return line - - input_vars = list() - for idx, in_edge in enumerate(IR_node.in_edges): - in_node = self.IR_graph.get_node(in_edge) - # the input is a list - if in_node.type == 'Scope' and len(in_node.return_variables) > 1 and ':' not in in_edge: - var_name = ', '.join( - [(in_node.variable_name + "[%s]") % s for s in range(len(in_node.return_variables))]) - input_vars.append(var_name) - else: - input_vars.append(self.parent_variable_name(IR_node, [idx])) - - code = "{:<15} = my_{}()([{}])".format( - IR_node.real_variable_name, - IR_node.pattern, - ', '.join(input_vars)) - self._gen_scope_code(IR_node) - - return code - - def _gen_scope_code(self, scope_node): - - def _scope_func(scope_name, params, code, return_var): - if len(return_var) > 1: - return_var_code = '[{}]'.format(', '.join(return_var)) - output_shape_code = ' self.output_shapes = [{}]\n'.format( - ', '.join(['K.int_shape(%s)' % s for s in return_var])) - else: - return_var_code = ', '.join(return_var) - output_shape_code = ' self.output_shapes = K.int_shape({})\n'.format( - return_var[0]) - - code = """ -class my_{}(keras.layers.Layer): - def __init__(self, **kwargs): - super(my_{}, self).__init__(**kwargs) - def call(self, inputs): - -{} -{} -{} - return {} - - def compute_output_shape(self, input_shape): - return self.output_shapes - - """.format(scope_name, scope_name, params, code, output_shape_code, return_var_code) - return code - - if not self.layers_codes.get(scope_node.pattern, None): - body_code = str() - for node_name in scope_node.topology_list: - node = self.IR_graph.get_node(node_name) - node_type = node.type - - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - line = func(node, True) - if line != None: - body_code += " " + line + '\n' - else: - print( - "KerasEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(node) - - # param_code does not need parameter slice. - input_params = scope_node.input_params - - param_code = str() - import re - for i, p in enumerate(scope_node.in_edges): - p_node = self.IR_graph.get_node(p) - # input is a list. - if p_node.type == 'Scope' and len(p_node.return_variables) > 1 and ':' not in p: - param_code += " {} = [{}]\n".format(p_node.variable_name, ', '.join( - 'inputs[%s]' % s for s in range(i, i + len(p_node.return_variables)))) - else: - param_code += " {} = inputs[{}]\n".format( - p_node.variable_name, i) - - function_code = _scope_func( - scope_node.pattern, param_code, body_code, scope_node.return_variables) - self.layers_codes[scope_node.pattern] = function_code - return body_code - - def _emit_h_zero(self, IR_node): - if not self.layers_codes.get(IR_node.pattern, None): - class_code = ''' -class my_h_zero(keras.layers.Layer): - def __init__(self, **kwargs): - super(my_h_zero, self).__init__(**kwargs) - - def call(self, dummy): - {:<15} = K.constant(np.full((1, {}), {})) - - return {} - '''.format(IR_node.variable_name, - IR_node.get_attr('fill_size'), - IR_node.get_attr('fill_value'), - IR_node.variable_name) - self.layers_codes[IR_node.pattern] = class_code - - code = "{:<15} = my_h_zero()({})".format( - IR_node.variable_name, self.parent_variable_name(IR_node)) - - return code - - def _layer_Yolo(self): - self.add_body(0, ''' -def yolo_parameter(): - return {} -'''.format(self.yolo_parameter)) - - def _layer_Region(self): - self.add_body(0, ''' -def region_parameter(): - return {} -'''.format(self.region_parameter)) - - def _layer_SpaceToDepth(self): - self.add_body(0, ''' -def space_to_depth(input, blocksize): - import tensorflow as tf - return tf.space_to_depth(input, block_size=blocksize) -''') - - def _layer_Flatten(self): - self.add_body(0, ''' -def __flatten(name, input): - if input.shape.ndims > 2: return layers.Flatten(name = name)(input) - else: return input -''') - - def _layer_LRN(self): - self.add_body(0, ''' -from keras.layers.core import Layer -class LRN(Layer): - - def __init__(self, size=5, alpha=0.0005, beta=0.75, k=2, **kwargs): - self.n = size - self.alpha = alpha - self.beta = beta - self.k = k - super(LRN, self).__init__(**kwargs) - - def build(self, input_shape): - self.shape = input_shape - super(LRN, self).build(input_shape) - - def call(self, x, mask=None): - half_n = self.n - 1 - squared = K.square(x) - scale = self.k - norm_alpha = self.alpha / (2 * half_n + 1) - if K.image_dim_ordering() == "th": - b, f, r, c = self.shape - squared = K.expand_dims(squared, 0) - squared = K.spatial_3d_padding(squared, padding=((half_n, half_n), (0, 0), (0,0))) - squared = K.squeeze(squared, 0) - for i in range(half_n*2+1): - scale += norm_alpha * squared[:, i:i+f, :, :] - else: - b, r, c, f = self.shape - squared = K.expand_dims(squared, -1) - squared = K.spatial_3d_padding(squared, padding=((0, 0), (0,0), (half_n, half_n))) - squared = K.squeeze(squared, -1) - for i in range(half_n*2+1): - scale += norm_alpha * squared[:, :, :, i:i+f] - - scale = K.pow(scale, self.beta) - return x / scale - - def compute_output_shape(self, input_shape): - return input_shape''') - - def _layer_Conv(self): - self.add_body(0, """ -def convolution(weights_dict, name, input, group, conv_type, filters=None, **kwargs): - if not conv_type.startswith('layer'): - layer = keras.applications.mobilenet.DepthwiseConv2D(name=name, **kwargs)(input) - return layer - elif conv_type == 'layers.DepthwiseConv2D': - layer = layers.DepthwiseConv2D(name=name, **kwargs)(input) - return layer - - inp_filters = K.int_shape(input)[-1] - inp_grouped_channels = int(inp_filters / group) - out_grouped_channels = int(filters / group) - group_list = [] - if group == 1: - func = getattr(layers, conv_type.split('.')[-1]) - layer = func(name = name, filters = filters, **kwargs)(input) - return layer - weight_groups = list() - if not weights_dict == None: - w = np.array(weights_dict[name]['weights']) - weight_groups = np.split(w, indices_or_sections=group, axis=-1) - for c in range(group): - x = layers.Lambda(lambda z: z[..., c * inp_grouped_channels:(c + 1) * inp_grouped_channels])(input) - x = layers.Conv2D(name=name + "_" + str(c), filters=out_grouped_channels, **kwargs)(x) - weights_dict[name + "_" + str(c)] = dict() - weights_dict[name + "_" + str(c)]['weights'] = weight_groups[c] - group_list.append(x) - layer = layers.concatenate(group_list, axis = -1) - if 'bias' in weights_dict[name]: - b = K.variable(weights_dict[name]['bias'], name = name + "_bias") - layer = layer + b - return layer""") - - def _layer_Scale(self): - self.add_body(0, """ -from keras.engine import Layer, InputSpec -from keras import initializers -from keras import backend as K - - -class Scale(Layer): - - def __init__(self, - axis=-1, - center=True, - scale=True, - beta_initializer='zeros', - gamma_initializer='ones', - **kwargs): - super(Scale, self).__init__(**kwargs) - self.supports_masking = True - self.axis = axis - self.center = center - self.scale = scale - self.beta_initializer = initializers.get(beta_initializer) - self.gamma_initializer = initializers.get(gamma_initializer) - - - def build(self, input_shape): - dim = input_shape[self.axis] - if dim is None: - raise ValueError('Axis ' + str(self.axis) + ' of ' - 'input tensor should have a defined dimension ' - 'but the layer received an input with shape ' + - str(input_shape) + '.') - self.input_spec = InputSpec(ndim=len(input_shape), - axes={self.axis: dim}) - shape = (dim,) - - if self.scale: - self.gamma = self.add_weight(shape=shape, - name='gamma', - initializer=self.gamma_initializer) - else: - self.gamma = None - if self.center: - self.beta = self.add_weight(shape=shape, - name='beta', - initializer=self.beta_initializer) - else: - self.beta = None - - - self.built = True - - def call(self, inputs, training=None): - input_shape = K.int_shape(inputs) - # Prepare broadcasting shape. - ndim = len(input_shape) - reduction_axes = list(range(len(input_shape))) - del reduction_axes[self.axis] - broadcast_shape = [1] * len(input_shape) - broadcast_shape[self.axis] = input_shape[self.axis] - return K.reshape(self.gamma, broadcast_shape) * inputs + K.reshape(self.beta, broadcast_shape) - - def get_config(self): - config = { - 'axis': self.axis, - 'center': self.center, - 'scale': self.scale, - } - base_config = super(Scale, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - def compute_output_shape(self, input_shape): - return input_shape""") - - def _layer_Split(self): - self.add_body(0, ''' -def __split(input, split_num, axis): - return Lambda(lambda x: tf.split(x, split_num, axis))(input) - ''') - - def _layer_Unsqueeze(self): - self.add_body(0, ''' -def __unsqueeze(input, axis): - return Lambda(lambda x: tf.expand_dims(x, axis))(input) - ''') - - def _layer_Fill(self): - self.add_body(0, ''' -def __fill(input, value): - class Fill(keras.layers.Layer): - def call(self, input): - if keras.backend.backend() =='tensorflow': - output = tf.fill(input, value) - else: - raise NotImplementedError - self.output_dim = [dim.value for dim in output.shape] - return output - - def compute_output_shape(self, input_shape): - return tuple(self.output_dim) - # output = Lambda(lambda x: tf.fill(x, value))(input) - output = Fill()(input) - # return output - - ''') - - def _layer_Slice(self): - self.add_body(0, ''' -def __slice(input, start, end, **kargs): - return Lambda(lambda x: tf.strided_slice(x, start, end, **kargs))(input) - ''') - - def _layer_Unstack(self): - self.add_body(0, ''' -def __unstack(input, num, axis): - return Lambda(lambda x: tf.unstack(x, num, axis))(input) - ''') - - def _layer_Mul(self): - self.add_body(0, ''' -class my_mul(keras.layers.Layer): - def __init__(self, **kwargs): - super(my_mul, self).__init__(**kwargs) - def call(self, inputs): - res = inputs[0] * inputs[1] - self.output_shapes = K.int_shape(res) - return res - - def compute_output_shape(self, input_shape): - return self.output_shapes -''') - - def _layer_Add(self): - self.add_body(0, ''' -class my_add(keras.layers.Layer): - def __init__(self, **kwargs): - super(my_add, self).__init__(**kwargs) - def call(self, inputs): - res = inputs[0] + inputs[1] - self.output_shapes = K.int_shape(res) - return res - - def compute_output_shape(self, input_shape): - return self.output_shapes -''') - - def _layer_Sub(self): - self.add_body(0, ''' -class my_sub(keras.layers.Layer): - def __init__(self, **kwargs): - super(my_sub, self).__init__(**kwargs) - def call(self, inputs): - res = inputs[0] - inputs[1] - self.output_shapes = K.int_shape(res) - return res - - def compute_output_shape(self, input_shape): - return self.output_shapes -''') - - def _layer_Shape(self): - self.add_body(0, ''' -def __shape(input): - return Lambda(lambda x: tf.shape(x))(input) - ''') - -# def _layer_Constant(self): -# self.add_body(0, ''' -# class my_constant(keras.layers.Layer): -# def __init__(self, value, **kwargs): -# super(my_constant, self).__init__(**kwargs) -# self._value = value -# # the input is dummy, just for creating keras graph. -# def call(self, dummy): -# res = K.constant(self._value) -# self.output_shapes = K.int_shape(res) -# return res - -# def compute_output_shape(self, input_shape): -# return self.output_shapes -# ''') - - def _layer_Mul_Constant(self): - self.add_body(0, ''' -def mul_constant(weight_factor, layer_name): - weight = Lambda(lambda x: x*weight_factor) - weight(layer_name) - return weight.output -''') -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- -import os -import keras as _keras -from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph - - -class Keras2GraphNode(GraphNode): - - def __init__(self, layer): - super(Keras2GraphNode, self).__init__(layer) - - @property - def name(self): - return self.layer.name - - @property - def type(self): - return self.layer.__class__.__name__ - - @property - def keras_layer(self): - return self.layer - - -class Keras2Graph(Graph): - - def __init__(self, model): - # sanity check. - if not (type(model) == _keras.models.Sequential or type(model) == _keras.models.Model): - raise TypeError( - "Keras layer of type %s is not supported." % type(model)) - super(Keras2Graph, self).__init__(model) - self.model = model - - def build(self): - self.input_layers = list() - for i, layer in enumerate(self.model.layers): - self.layer_map[layer.name] = Keras2GraphNode(layer) - self.layer_name_map[layer.name] = layer.name - for node in layer._inbound_nodes: - for pred in node.inbound_layers: - if pred.name not in self.layer_map: - self.layer_map[pred.name] = Keras2GraphNode(pred) - self.layer_name_map[pred.name] = pred.name - self._make_connection(pred.name, layer.name) - - # Kit: TODO - # Duplicate models for weight sharing - # Expand the sub-models - super(Keras2Graph, self).build() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import os -from six import string_types as _string_types -import keras as _keras -from keras import backend as _K - -from mmdnn.conversion.keras.keras2_graph import Keras2Graph -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.parser import Parser -from mmdnn.conversion.common.utils import * - - -class Keras2Parser(Parser): - - dtype_map = { - "float16": graph_pb2.DT_FLOAT16, - "float32": graph_pb2.DT_FLOAT32, - "float64": graph_pb2.DT_FLOAT64, - "int16": graph_pb2.DT_INT16, - "int32": graph_pb2.DT_INT32, - "int64": graph_pb2.DT_INT64, - "uint8": graph_pb2.DT_UINT8, - "uint16": graph_pb2.DT_UINT16 - } - - activation_map = { - "relu": "Relu", - 'softmax': "Softmax", - 'sigmoid': "Sigmoid", - "tanh": "Tanh", - "elu": "Elu", - "relu6": "Relu6", - 'softplus': 'Softplus', - 'softsign': 'Softsign', - 'hard_sigmoid': 'HardSigmoid' - } - - def _load_model(self, model_network_path, model_weight_path): - """Load a keras model from disk - - Parameters - ---------- - model_network_path: str - Path where the model network path is (json file) - - model_weight_path: str - Path where the model network weights are (hd5 file) - - Returns - ------- - model: A keras model - """ - from keras.models import model_from_json - - # Load the model network - json_file = open(model_network_path, 'r') - loaded_model_json = json_file.read() - json_file.close() - - # Load the model weights - - try: - from keras.applications.mobilenet import relu6 - from keras.applications.mobilenet import DepthwiseConv2D - loaded_model = model_from_json(loaded_model_json, custom_objects={ - 'relu6': _keras.applications.mobilenet.relu6, - 'DepthwiseConv2D': _keras.applications.mobilenet.DepthwiseConv2D}) - except: - import keras.layers as layers - loaded_model = model_from_json(loaded_model_json, custom_objects={ - 'relu6': layers.ReLU(6, name='relu6'), - 'DepthwiseConv2D': layers.DepthwiseConv2D}) - - if model_weight_path: - if os.path.isfile(model_weight_path): - loaded_model.load_weights(model_weight_path) - self.weight_loaded = True - print("Network file [{}] and [{}] is loaded successfully.".format( - model_network_path, model_weight_path)) - - else: - print("Warning: Weights File [%s] is not found." % ( - model_weight_path)) - - return loaded_model - - @property - def src_graph(self): - return self.keras_graph - - def __init__(self, model): - super(Keras2Parser, self).__init__() - - # load model files into Keras graph - if isinstance(model, _string_types): - try: - # Keras 2.1.6 - from keras.applications.mobilenet import relu6 - from keras.applications.mobilenet import DepthwiseConv2D - model = _keras.models.load_model( - model, - custom_objects={ - 'relu6': _keras.applications.mobilenet.relu6, - 'DepthwiseConv2D': _keras.applications.mobilenet.DepthwiseConv2D - } - ) - except: - # Keras. 2.2.2 - import keras.layers as layers - model = _keras.models.load_model( - model, - custom_objects={ - 'relu6': layers.ReLU(6, name='relu6'), - 'DepthwiseConv2D': layers.DepthwiseConv2D - } - ) - self.weight_loaded = True - - elif isinstance(model, tuple): - model = self._load_model(model[0], model[1]) - - else: - assert False - - # _keras.utils.plot_model(model, "model.png", show_shapes = True) - - # Build network graph - self.data_format = _keras.backend.image_data_format() - self.keras_graph = Keras2Graph(model) - self.keras_graph.build() - self.lambda_layer_count = 0 - - def gen_IR(self): - for layer in self.keras_graph.topological_sort: - current_node = self.keras_graph.get_node(layer) - node_type = current_node.type - - if hasattr(self, "rename_" + node_type): - func = getattr(self, "rename_" + node_type) - func(current_node) - else: - print( - "KerasParser has not supported operator [%s]." % (node_type)) - self.rename_UNKNOWN(current_node) - - _K.clear_session() - - @staticmethod - def _set_output_shape(source_node, IR_node): - shape = graph_pb2.TensorShape() - for dim in source_node.layer.output_shape: - new_dim = shape.dim.add() - new_dim.size = dim if dim else -1 - - IR_node.attr["_output_shapes"].list.shape.extend([shape]) - - @staticmethod - def _copy_and_reop(source_node, IR_node, new_op=None): - IR_node.name = source_node.name - IR_node.op = source_node.type if new_op == None else new_op - - if hasattr(source_node.layer, "dtype"): - IR_node.attr["dtype"].type = Keras2Parser.dtype_map[source_node.layer.dtype] - - Keras2Parser._set_output_shape(source_node, IR_node) - - @staticmethod - def _copy_shape(source_node, target_node): - if hasattr(source_node, "output_shape"): - for dim in source_node.output_shape: - new_dim = target_node.attr["shape"].shape.dim.add() - new_dim.size = -1 if dim == None else dim - - else: - target_node.attr["shape"].shape.unknown_rank = True - - @staticmethod - def _convert_dataformat(source_node, target_node): - if source_node.keras_layer.data_format == 'channels_last': - target_node.attr["data_format"].s = "NHWC" - elif source_node.keras_layer.data_format == 'channels_first': - target_node.attr["data_format"].s = "NCHW" - else: - print("Warning: [%s] don't have data format info." % - (source_node.keras_layer.name)) - - @staticmethod - def _convert_padding(source_node, IR_node): - # TODO: Fused conv and pool with padding is different from defused operators - dims = len(source_node.layer.input_shape) - if source_node.layer.padding == 'valid': - assign_IRnode_values( - IR_node, {'auto_pad': "VALID", 'pads': [0, 0] * dims}) - - elif source_node.layer.padding == 'same': - kernel_shape = source_node.layer.kernel_size if hasattr( - source_node.layer, 'kernel_size') else source_node.layer.pool_size - padding = compute_tf_same_padding( - source_node.layer.input_shape, - kernel_shape, - list(source_node.layer.strides)) - assign_IRnode_values( - IR_node, {'auto_pad': "SAME_LOWER", 'pads': padding}) - - else: - assert False - - def _defuse_activation(self, source_node): - if source_node.layer.activation is None or source_node.layer.activation.__name__ == "linear": - return - - IR_node = self.IR_graph.node.add() - IR_node.name = source_node.real_name + "_activation" - IR_node.op = Keras2Parser.activation_map[source_node.layer.activation.__name__] - IR_node.input.append(source_node.real_name) - Keras2Parser._set_output_shape(source_node, IR_node) - - # TODO: More activation functions - # for ELU - if hasattr(source_node.layer, 'alpha'): - assign_attr_value(IR_node['alpha'], source_node.layer.alpha) - - source_node.real_name = IR_node.name - - def _convert_convolution(self, source_node, dim): - IR_node = self.IR_graph.node.add() - - # input edge - self.convert_inedge(source_node, IR_node) - - # name, op - if source_node.type.startswith('Separable'): - Keras2Parser._copy_and_reop(source_node, IR_node, "SeparableConv") - if self.weight_loaded: - self.set_weight(source_node.name, 'depthwise_filter', - source_node.layer.get_weights()[0]) - self.set_weight(source_node.name, 'pointwise_filter', - source_node.layer.get_weights()[1]) - - else: - if source_node.type.startswith('Conv'): - if source_node.type.endswith('Transpose'): - Keras2Parser._copy_and_reop( - source_node, IR_node, "ConvTranspose") - else: - Keras2Parser._copy_and_reop(source_node, IR_node, "Conv") - elif source_node.type.startswith('Deconv'): - Keras2Parser._copy_and_reop( - source_node, IR_node, "ConvTranspose") - - elif source_node.type.startswith('Depthwise'): - Keras2Parser._copy_and_reop( - source_node, IR_node, "DepthwiseConv") - - else: - raise NotImplementedError( - "Convolution layer [{}] is not supported.".format(source_node.type)) - - # weights - if self.weight_loaded: - self.set_weight(source_node.name, "weights", - source_node.layer.get_weights()[0]) - if source_node.layer.use_bias: - self.set_weight(source_node.name, "bias", - source_node.layer.get_weights()[1]) - - if isinstance(source_node.layer.kernel_size, int): - source_node.layer.kernel_size = ( - source_node.layer.kernel_size) * dim - - if isinstance(source_node.layer.strides, int): - source_node.layer.strides = (source_node.layer.strides) * dim - - if isinstance(source_node.layer.dilation_rate, int): - source_node.layer.dilation_rate = ( - source_node.layer.dilation_rate) * dim - - kwargs = dict() - - # pads - Keras2Parser._convert_padding(source_node, IR_node) - - # filter - # [kd, kh, kw, channel_size, filter number] - in_channel = source_node.layer.input_shape[-1] if self.data_format == "channels_last" else source_node.layer.input_shape[1] - out_channel = source_node.layer.filters or source_node.layer.depth_multiplier - - if source_node.type.startswith("Deconv"): - kwargs['kernel_shape'] = list( - source_node.layer.kernel_size) + [out_channel, in_channel] - else: - kwargs['kernel_shape'] = list( - source_node.layer.kernel_size) + [in_channel, out_channel] - - # use_bias - kwargs['use_bias'] = source_node.keras_layer.use_bias - - # strides - # [1, sd, sh, sw, 1] - kwargs['strides'] = [1] + list(source_node.layer.strides) + [1] - - # dilations - # [1, dd, dh, dw, 1] - kwargs['dilations'] = [1] + list(source_node.layer.dilation_rate) + [1] - - assign_IRnode_values(IR_node, kwargs) - - # activation - self._defuse_activation(source_node) - - def _convert_pooling(self, source_node, dim, pooling_type, is_global): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node, "Pool") - - # input edge - self.convert_inedge(source_node, IR_node) - - kwargs = {} - - kwargs['pooling_type'] = pooling_type - - if is_global: - kwargs['global_pooling'] = True - kwargs['strides'] = [1] * (dim + 2) - else: - if isinstance(source_node.layer.pool_size, int): - source_node.layer.pool_size = ( - source_node.layer.pool_size) * dim - - if isinstance(source_node.layer.strides, int): - source_node.layer.strides = (source_node.layer.strides) * dim - - # padding - self._convert_padding(source_node, IR_node) - - # strides - # [1, sd, sh, sw, 1] - kwargs['strides'] = [1] + list(source_node.layer.strides) + [1] - - # window_shape - # [1, pd, ph, pw, 1] - kwargs['kernel_shape'] = [1] + \ - list(source_node.layer.pool_size) + [1] - - assign_IRnode_values(IR_node, kwargs) - - if is_global: - flatten_node = self.IR_graph.node.add() - flatten_node.name = source_node.name + '_flatten' - flatten_node.op = 'Flatten' - flatten_node.input.append(source_node.name) - Keras2Parser._set_output_shape(source_node, flatten_node) - source_node.real_name = flatten_node.name - - def _convert_merge(self, source_node, new_name=None): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node, new_name) - - # input edge - self.convert_inedge(source_node, IR_node) - - # For concat axis - if hasattr(source_node.layer, 'axis'): - axis = source_node.layer.axis - if int(axis) == -1: - axis = 3 if self.data_format == "channels_last" else 2 - IR_node.attr['axis'].i = axis - - return IR_node - - def _convert_padding_api(self, source_node, IR_node, mode): - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node, "Pad") - - # input edge - self.convert_inedge(source_node, IR_node) - - kwargs = dict() - kwargs['mode'] = mode - - # padding - kwargs['pads'] = [0, 0] - for padding_pair in source_node.layer.padding: - kwargs['pads'].extend(padding_pair) - kwargs['pads'] += [0, 0] - kwargs['pads'] = convert_tf_pad_to_onnx(kwargs['pads']) - assign_IRnode_values(IR_node, kwargs) - - def rename_UNKNOWN(self, source_node): - print(source_node.layer.get_config()) - - # only for training - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node) - - # input edge - self.convert_inedge(source_node, IR_node) - - def rename_Activation(self, keras_node): - IR_node = self.IR_graph.node.add() - # name, op - try: - Keras2Parser._copy_and_reop( - keras_node, IR_node, self.activation_map[keras_node.keras_layer.activation.__name__]) - except: - Keras2Parser._copy_and_reop( - keras_node, IR_node, self.activation_map[keras_node.keras_layer.activation.name]) - - # input edge - self.convert_inedge(keras_node, IR_node) - - # Merge Layers - - def rename_Add(self, source_node): - self._convert_merge(source_node) - - def rename_Conv1D(self, source_node): - self._convert_convolution(source_node, 1) - - def rename_Conv1DTranspose(self, source_node): - self._convert_convolution(source_node, 1) - - def rename_Conv2D(self, source_node): - self._convert_convolution(source_node, 2) - - def rename_Conv2DTranspose(self, source_node): - self._convert_convolution(source_node, 2) - - def rename_Conv3D(self, source_node): - self._convert_convolution(source_node, 3) - - def rename_Conv3DTranspose(self, source_node): - self._convert_convolution(source_node, 3) - - def rename_InputLayer(self, source_node): - # only for training - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node, "DataInput") - - # input edge - self.convert_inedge(source_node, IR_node) - - # shape - Keras2Parser._copy_shape(source_node.keras_layer, IR_node) - - def rename_GlobalMaxPooling1D(self, source_node): - self._convert_pooling(source_node, 1, "MAX", True) - - def rename_GlobalMaxPooling2D(self, source_node): - self._convert_pooling(source_node, 2, "MAX", True) - - def rename_GlobalMaxPooling3D(self, source_node): - self._convert_pooling(source_node, 3, "MAX", True) - - def rename_GlobalAveragePooling1D(self, source_node): - self._convert_pooling(source_node, 1, "AVG", True) - - def rename_GlobalAveragePooling2D(self, source_node): - self._convert_pooling(source_node, 2, "AVG", True) - - def rename_GlobalAveragePooling3D(self, source_node): - self._convert_pooling(source_node, 3, "AVG", True) - - def rename_MaxPooling1D(self, source_node): - self._convert_pooling(source_node, 1, "MAX", False) - - def rename_MaxPooling2D(self, source_node): - self._convert_pooling(source_node, 2, "MAX", False) - - def rename_MaxPooling3D(self, source_node): - self._convert_pooling(source_node, 3, "MAX", False) - - def rename_AveragePooling1D(self, source_node): - self._convert_pooling(source_node, 1, "AVG", False) - - def rename_AveragePooling2D(self, source_node): - self._convert_pooling(source_node, 2, "AVG", False) - - def rename_AveragePooling3D(self, source_node): - self._convert_pooling(source_node, 3, "AVG", False) - - def rename_Dropout(self, source_node): - # only for training - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node) - - # input edge - self.convert_inedge(source_node, IR_node) - - IR_node.attr["keep_prob"].f = source_node.keras_layer.rate - if source_node.keras_layer.seed != None: - IR_node.attr["seed"].i = source_node.keras_layer.seed - - # Core Layers - - def rename_Dense(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node, "FullyConnected") - - # input edge - self.convert_inedge(source_node, IR_node) - - # units - IR_node.attr["units"].i = source_node.keras_layer.units - - # use_bias - IR_node.attr["use_bias"].b = source_node.keras_layer.use_bias - - # weights - if self.weight_loaded == True: - self.set_weight(source_node.name, 'weights', - source_node.layer.get_weights()[0]) - if IR_node.attr["use_bias"].b == True: - self.set_weight(source_node.name, 'bias', - source_node.layer.get_weights()[1]) - - # activation - self._defuse_activation(source_node) - - def rename_Flatten(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node) - - # input edge - self.convert_inedge(source_node, IR_node) - - def rename_UpSampling2D(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node) - - # input edge - self.convert_inedge(source_node, IR_node) - - # size - IR_node.attr["scales"].list.i.extend(source_node.keras_layer.size) - - def rename_Embedding(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node) - - # input edge - self.convert_inedge(source_node, IR_node) - - # input_dim - IR_node.attr["input_dim"].i = source_node.keras_layer.input_dim - - # output_dim - IR_node.attr["output_dim"].i = source_node.keras_layer.output_dim - - # mask_zero - IR_node.attr["mask_zero"].b = source_node.keras_layer.mask_zero - - # weights - if self.weight_loaded: - self.set_weight(source_node.name, 'embedding_weights', - source_node.layer.get_weights()[0]) - - def rename_LSTM(self, keras_node): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(keras_node, IR_node) - - # input edge - self.convert_inedge(keras_node, IR_node) - - # units - IR_node.attr["units"].i = keras_node.keras_layer.units - - # use_bias - IR_node.attr["use_bias"].b = keras_node.keras_layer.use_bias - - # for Keras, drop_out and recurrent_dropout - IR_node.attr["dropout"].f = keras_node.keras_layer.dropout - IR_node.attr["recurrent_dropout"].f = keras_node.keras_layer.recurrent_dropout - - # activation - self._defuse_activation(keras_node) - - def rename_GRU(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node) - - # input edge - self.convert_inedge(source_node, IR_node) - - # units - IR_node.attr["units"].i = source_node.keras_layer.units - - # activation - self._defuse_activation(source_node) - - # weights - if self.weight_loaded: - self.set_weight(source_node.name, 'gru_weights', - source_node.layer.get_weights()[0]) - self.set_weight(source_node.name, 'gru_recurrent_weights', - source_node.layer.get_weights()[1]) - if source_node.layer.use_bias: - self.set_weight(source_node.name, "gru_bias", - source_node.layer.get_weights()[2]) - - def rename_Multiply(self, source_node): - self._convert_merge(source_node, 'Mul') - - def rename_Average(self, source_node): - # Kit TODO : need to search the tf - self._convert_merge(source_node, 'Avg') - - def rename_Maximum(self, source_node): - self._convert_merge(source_node) - - def rename_Concatenate(self, source_node): - IR_node = self._convert_merge(source_node, 'Concat') - - def rename_Reshape(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node, 'Reshape') - - # input edge - self.convert_inedge(source_node, IR_node) - - # for target shape - IR_node.attr["shape"].list.i.append(-1) - IR_node.attr["shape"].list.i.extend(source_node.layer.target_shape) - - def rename_Lambda(self, source_node): - node_type = source_node.layer.name - if hasattr(self, "rename_" + node_type): - print("Try to convert Lambda function [{}]".format( - source_node.layer.name)) - func = getattr(self, "rename_" + node_type) - func(source_node) - else: - raise NotImplementedError( - "Lambda layer [{}] in keras is not supported yet.".format(node_type)) - - def rename_BatchNormalization(self, keras_node): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(keras_node, IR_node, 'BatchNorm') - - # input edge - self.convert_inedge(keras_node, IR_node) - - # axis - IR_node.attr['axis'].i = keras_node.keras_layer.axis - - IR_node.attr['scale'].b = keras_node.keras_layer.scale - - IR_node.attr['bias'].b = keras_node.keras_layer.center - - IR_node.attr['epsilon'].f = keras_node.layer.epsilon - - if self.weight_loaded: - # Parameter arrangement in Keras: gamma, beta, mean, variance - idx = 0 - - # scale - if IR_node.attr['scale'].b: - self.set_weight(keras_node.name, "scale", - keras_node.layer.get_weights()[idx]) - idx += 1 - - # beta - if IR_node.attr['bias'].b: - self.set_weight(keras_node.name, "bias", - keras_node.layer.get_weights()[idx]) - idx += 1 - - # mean - self.set_weight(keras_node.name, "mean", - keras_node.layer.get_weights()[idx]) - - # var - self.set_weight(keras_node.name, "var", - keras_node.layer.get_weights()[idx + 1]) - - def rename_ZeroPadding2D(self, keras_node): - IR_node = self.IR_graph.node.add() - self._convert_padding_api(keras_node, IR_node, "constant") - - def rename_SeparableConv2D(self, source_node): - self._convert_convolution(source_node, 2) - - def rename_DepthwiseConv2D(self, source_node): - self._convert_convolution(source_node, 2) - - def custom_relu6(x): - return _keras.relu(x, max_value=6) - - def _convert_crop(self, source_node): - IR_node = self.IR_graph.node.add() - - Keras2Parser._copy_and_reop(source_node, IR_node, "Crop") - - self.convert_inedge(source_node, IR_node) - - border = [] - for i in source_node.layer.cropping: - for j in i: - border.append(j) - - assign_IRnode_values(IR_node, {'border': border}) - - def rename_Cropping1D(self, source_node): - self._convert_crop(source_node) - - def rename_Cropping2D(self, source_node): - self._convert_crop(source_node) - - def rename_Cropping3D(self, source_node): - self._convert_crop(source_node) - - def rename_LeakyReLU(self, source_node): - IR_node = self.IR_graph.node.add() - Keras2Parser._copy_and_reop(source_node, IR_node, 'LeakyRelu') - self.convert_inedge(source_node, IR_node) - assign_IRnode_values( - IR_node, {'alpha': source_node.layer.alpha.tolist()}) - - def rename_ReLU(self, source_node): - IR_node = self.IR_graph.node.add() - max_value = source_node.layer.max_value - if max_value == 6.0: - Keras2Parser._copy_and_reop(source_node, IR_node, 'Relu6') - else: - Keras2Parser._copy_and_reop(source_node, IR_node, 'Relu') - - assign_IRnode_values(IR_node, {'max_value': max_value}) - self.convert_inedge(source_node, IR_node) - - def rename_space_to_depth_x2(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - Keras2Parser._copy_and_reop(source_node, IR_node, 'SpaceToDepth') - IR_node.name = "Lambda_{}".format(self.lambda_layer_count) - - # input edge - self.convert_inedge(source_node, IR_node) - - # for target shape - IR_node.attr["blocksize"].i = 2 - self.lambda_layer_count = self.lambda_layer_count + 1 - source_node.real_name = IR_node.name -def save_model(MainModel, network_filepath, weight_filepath, dump_filepath): - model = MainModel.KitModel(weight_filepath) - model.save(dump_filepath) - print('Keras model file is saved as [{}], generated by [{}.py] and [{}].'.format( - dump_filepath, network_filepath, weight_filepath)) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import os - -import math -import mxnet as mx -import numpy as np -from mmdnn.conversion.common.IR.IR_graph import IRGraph, IRGraphNode -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.emitter import Emitter -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.rewriter.folder import Folder - - -class MXNetEmitter(Emitter): - - dtype_map = { - graph_pb2.DT_FLOAT16: "float16", - graph_pb2.DT_FLOAT32: "float32", - graph_pb2.DT_FLOAT64: "float64", - graph_pb2.DT_INT32: "int32", - graph_pb2.DT_UINT8: "uint8" - } - - activation_map = { - "relu": "Relu", - "sigmoid": "Sigmoid", - "tanh": "Tanh", - "elu": "Elu" - } - - transpose_map = { - 1: 2, - 2: 3, - -1: 1 - } - - naive_scope_pattern = [] - - channels_last = ['NDHWC', 'NHWC'] - - def __init__(self, model): - super(MXNetEmitter, self).__init__() - from six import string_types as _string_types - - if isinstance(model, _string_types): - network_path = model - self.weight_loaded = False - elif len(model) == 3: - network_path = model[0] - weight_path = model[1] - self.output_weights_file = model[2] - self.weights = np.load(weight_path).item() - self.weight_loaded = True - self.output_weights = dict() - else: - raise ValueError( - "the # of input arguments [{}] is not supported" % len(model)) - - self.IR_graph = IRGraph(network_path) - self.IR_graph.build() - - folder = Folder(self.IR_graph, self.weights) - folder.fold() - - @property - def header_code(self): - return """import mxnet as mx -import numpy as np -import math - -# mxnet-cpu only support channel first, default convert the model and weight as channel first - -def RefactorModel(): -""" - - def gen_code(self, phase): - self.IR_layer_map = dict() - self.add_body(0, self.header_code) - for layer in self.IR_graph.topological_sort: - self.IR_layer_map[layer] = self.IR_graph.get_node(layer) - - shape = dict() - for layer in self.IR_graph.topological_sort: - current_node = self.IR_graph.get_node(layer) - node_type = current_node.type - - if len(current_node.in_edges) == 0: - current_node.in_edges.append('data') - - if node_type.lower() in MXNetEmitter.activation_map: - func = getattr(self, "emit_Activation") - line = func( - current_node, MXNetEmitter.activation_map[node_type.lower()].lower()) - self.add_body(1, line) - - elif hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - line = func(current_node) - if line != None: - self.add_body(1, line) - else: - print( - "MXNet Emitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(current_node) - - if node_type == "DataInput": - cur_shape = list() - first = True - for dim in current_node.IR_layer.attr["shape"].shape.dim: - if dim.size == -1 and first: - cur_shape.append(1) - print("Detect input layer [{}] using infer batch size, set it as default value [1]".format( - current_node.name)) - else: - if dim.size == -1: - print("Warning: user should change input size manually") - cur_shape.append(dim.size) - first = False - - cur_shape.insert(1, cur_shape.pop()) - shape[current_node.name] = ', '.join( - '%s' % i for i in cur_shape) - self.input_name_shape = {current_node.name: tuple(cur_shape)} - - if self.weight_loaded: - fullpath = os.path.abspath(self.output_weights_file) - dirname = os.path.dirname(fullpath) - if not os.path.exists(dirname): - os.makedirs(dirname) - with open(self.output_weights_file, 'wb') as outfile: - np.save(outfile, self.output_weights) - - comment = "\n # if a GPU is available, change mx.cpu() to mx.gpu()" - # We use the real_name for specifying the input layer in data_names - # since MXNet API wants the actual name of the layer. On the other - # hand, the module API wants the last symbol in the symbol chain, so - # for the output node we need to use the actual python variable name - # of the last layer (real_variable_name). - last_line = "{:<15} = mx.mod.Module(symbol = {}, context = mx.cpu(), data_names = ['{}'])".format( - "model", - ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers if self.IR_graph.get_node( - name).type != 'Pack' and self.IR_graph.get_node(name).type != 'Shape']), - ', '.join([self.IR_graph.get_node(name).real_name for name in self.IR_graph.input_layers if self.IR_graph.get_node(name).type != 'Const'])) - - self.add_body(1, comment) - self.add_body(1, last_line) - self.add_body(1, "return model") - - self.add_body(0, "") - for code in self.layers_codes.values(): - self.add_body(0, code) - - weight_code = "" - if not self.weight_loaded: - weight_code += "# emitter does not detect any import weights, you may generate weights file manually\n" - - weight_code += self.gen_weight_code(shape, phase) - - main_code = "if __name__ == '__main__':\n model = RefactorModel()\n" - if self.weight_loaded: - main_code += " # remember to adjust params path\n model = deploy_weight(model, '{}')\n".format( - self.output_weights_file) - - if phase == 'train': - train_code = """def train(model): - import logging - logging.getLogger().setLevel(logging.DEBUG) - model.fit(train_iter, # train data - eval_data = val_iter, # validation data - optimizer = 'sgd', # Defaults to 'sgd' - optimizer_params = {'learning_rate':0.01}, # use fixed learning rate - eval_metric = 'acc', # report accuracy during training, other possible predefined metrics are: 'ce', 'f1', 'mae', 'mse', 'rmse', 'top_k_accuracy' - batch_end_callback = mx.callback.Speedometer(batch_size, 100), # output progress for each 100 data batches - num_epoch = 10) # train for at most 10 dataset passes\n\n -""" - code = self.body_code + weight_code + train_code + main_code - else: - test_code = """from collections import namedtuple -Batch = namedtuple('Batch', ['data']) - - -def get_image(url, show=False): - import cv2 - # download and show the image - fname = mx.test_utils.download(url) - img = cv2.cvtColor(cv2.imread(fname), cv2.COLOR_BGR2RGB) - if img is None: - return None - if show: - import matplotlib.pyplot as plt - plt.imshow(img) - plt.axis('off') - # convert into format (batch, RGB, width, height) - img = cv2.resize(img, (224, 224)) - img = np.swapaxes(img, 0, 2) - img = np.swapaxes(img, 1, 2) - img = img[np.newaxis, :] - return img - - -def predict(model, labels, url): - # to show the image, change the argument show into True - img = get_image(url, show = False) - # compute the predict probabilities - model.forward(Batch([mx.nd.array(img)])) - prob = model.get_outputs()[0].asnumpy() - # print the top-5 - prob = np.squeeze(prob) - a = np.argsort(prob)[::-1] - for i in a[0:5]: - print('prbability = %f, class = %s' %(prob[i], labels[i]))\n\n -""" - - main_code += """ - # # call function predict - # with open('synset.txt', 'r') as f: - # labels = [l.rstrip() for l in f] - # predict(model, labels, 'http://writm.com/wp-content/uploads/2016/08/Cat-hd-wallpapers.jpg') -""" - - code = self.body_code + weight_code + test_code + main_code - - return code - - def gen_weight_code(self, shape, phase): - str = "def deploy_weight(model, weight_file):\n" - str += """ - if weight_file == None: - return - - try: - weights_dict = np.load(weight_file).item() - except: - weights_dict = np.load(weight_file, encoding='bytes').item() - - arg_params = dict() - aux_params = dict() - for weight_name, weight_data in weights_dict.items(): - weight_name = str(weight_name) - if "moving" in weight_name: - aux_params[weight_name] = mx.nd.array(weight_data) - else: - arg_params[weight_name] = mx.nd.array(weight_data) - -""" - if phase == 'train': - str += " model.bind(for_training = True, data_shapes = [" - else: - str += " model.bind(for_training = False, data_shapes = [" - first = True - for k, v in shape.items(): - if not first: - str += ", " - str += "('" + k + "', " + "(" + v + "))" - first = False - str += "])\n" - str += " model.set_params(arg_params = arg_params, aux_params = aux_params, allow_missing = True, allow_extra=True)\n\n return model\n\n\n" - return str - - @staticmethod - def calculate_same_pad(data_shape, kernel, stride): - if (data_shape % stride == 0): - pad = max(kernel - stride, 0) - else: - pad = max(kernel - (data_shape % stride), 0) - if pad % 2 == 0: - return False, pad - else: - return True, pad - - @staticmethod - def transfer_pad(pad_list): - defuse_pad = False - pad = list() - - assert len(pad_list) % 2 == 0 - mid = int(len(pad_list)/2) - pad_first = pad_list[1:mid-1] - pad_second = pad_list[mid+1:-1] - - for i in range(0, mid-2): - if not pad_first[i] == pad_second[i]: - defuse_pad = True - - if defuse_pad: - pad.extend([0] * 4) - for i in range(0, mid-2): - pad.extend([pad_first[i], pad_second[i]]) - else: - pad = pad_first - - return defuse_pad, pad - - @staticmethod - def transpose(data, dim): - if dim == 1: - data = data.transpose((2, 1, 0)) - elif dim == 2: - data = data.transpose((3, 2, 0, 1)) - elif dim == 3: - data = data.transpose((4, 3, 0, 1, 2)) - else: - raise ValueError("The weight of dim {} cannot transpose" % dim) - - return data - - def set_pad(self, IR_node, code, pad, _max_pool): - if _max_pool: - constant_value = "float('-inf')" - else: - constant_value = "0.0" - - code = "{:<15} = mx.sym.pad(data = {}, mode = 'constant', pad_width={}, constant_value = {}, name = '{}')".format( - IR_node.variable_name + "_pad", - self.parent_variable_name(IR_node), - tuple(pad), - constant_value, - IR_node.name + "_pad") - - for e in IR_node.in_edges: - e = e.split(':')[0] - if e == 'data': - continue - self.IR_layer_map[e].out_edges = [x if not self.IR_layer_map[x.split( - ':')[0]].name == IR_node.variable_name else IR_node.variable_name + "_pad" for x in self.IR_layer_map[e].out_edges] - - return code - - def emit_UNKNOWN(self, IR_node): - print(IR_node.name) - - def emit_FullyConnected(self, IR_node): - if self.weight_loaded: - weight_dict = self.weights[IR_node.name] - parent = self.IR_graph.get_parent(IR_node.name, [0]) - while parent.type == "Flatten" or parent.type == 'Dropout': - parent = self.IR_graph.get_parent(parent.name, [0]) - dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) - if dim > 2: - original_dims = weight_dict['weights'].shape - dims = [ - i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1] - weight_dict['weights'] = np.reshape( - weight_dict['weights'], dims) - weight_dict['weights'] = np.transpose( - weight_dict['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) - weight_dict['weights'] = np.reshape( - weight_dict['weights'], original_dims) - self.output_weights[IR_node.name + - "_weight"] = weight_dict['weights'].transpose((1, 0)) - - num_hidden = IR_node.IR_layer.attr["units"].i - no_bias = not IR_node.IR_layer.attr["use_bias"].b - if not no_bias and self.weight_loaded: - self.output_weights[IR_node.name + "_bias"] = weight_dict['bias'] - - code = "{:<15} = mx.sym.FullyConnected(data = {}, num_hidden = {}, no_bias = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - num_hidden, - no_bias, - IR_node.name) - - return code - - def _emit_convolution(self, IR_node, pattern): - if self.weight_loaded: - weight_dict = self.weights[IR_node.name] - weights = weight_dict['weights'] - - dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2 - - kernel = list() - for idx in range(0, dim): - kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx]) - - stride = list() - for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: - stride.append(e) - - dilate = list() - for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]: - dilate.append(e) - if dilate == []: - dilate = [1, 1] - dilate = ', '.join('%s' % i for i in dilate) - - defuse_pad = False - pad = list() - if "pads" in IR_node.IR_layer.attr: - output_shape = list() - for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: - output_shape.append(e.size) - - # print("Warning: MXNet Convolution Layer pad does not match IR Convolution Layer pad") - defuse_pad, pad = MXNetEmitter.transfer_pad( - IR_node.IR_layer.attr["pads"].list.i) - - num_filter = 0 - if pattern == "Deconvolution": - num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] - else: - num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-1] - - use_bias = IR_node.get_attr('use_bias', False) - if use_bias and self.weight_loaded: - self.output_weights[IR_node.name + "_bias"] = weight_dict['bias'] - - if pattern == "DepthwiseConv": - num_group = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] - num_filter = num_filter * num_group - pattern = "Convolution" - if self.weight_loaded: - weights = np.swapaxes(weights, -1, -2) - - else: - num_group = IR_node.get_attr('group', 1) - - # layout = IR_node.IR_layer.attr["data_format"].s - if dim == 1: - layout = 'NCW' - elif dim == 2: - layout = 'NCHW' - elif dim == 3: - layout = 'NCDHW' - - if self.weight_loaded: - # if layout not in MXNetEmitter.channels_last: - weights = MXNetEmitter.transpose(weights, dim) - self.output_weights[IR_node.name + "_weight"] = weights - - code = "" - if not defuse_pad: - code += "{:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), pad={}, num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format( - IR_node.variable_name, - pattern, - self.parent_variable_name(IR_node), - tuple(kernel), - tuple(stride), - dilate, - tuple(pad), - num_filter, - num_group, - not use_bias, - layout, - IR_node.name) - else: - code += self.set_pad(IR_node, code, pad, False) - code += "\n {:<15} = mx.sym.{}(data={}, kernel={}, stride={}, dilate = ({}), num_filter = {}, num_group = {}, no_bias = {}, layout = '{}', name = '{}')".format( - IR_node.variable_name, - pattern, - IR_node.variable_name + "_pad", - tuple(kernel), - tuple(stride), - dilate, - num_filter, - num_group, - not use_bias, - layout, - IR_node.name) - - return code - - def emit_Conv(self, IR_node): - return self._emit_convolution(IR_node, "Convolution") - - def emit_DepthwiseConv(self, IR_node): - return self._emit_convolution(IR_node, "DepthwiseConv") - - def emit_ConvTranspose(self, IR_node): - return self._emit_convolution(IR_node, "Deconvolution") - - def emit_DataInput(self, IR_node): - shape = list() - shape.extend(IR_node.IR_layer.attr["shape"].list.i) - - code = "{:<15} = mx.sym.var('{}')".format( - IR_node.variable_name, IR_node.name) - return code - - # Add LeakyReLU Elu(slope not support) - def emit_Activation(self, IR_node, act_type): - - act_type = act_type - func_name = "" - - if act_type == "elu": - func_name = "LeakyReLU" - else: - func_name = "Activation" - - code = "{:<15} = mx.sym.{}(data = {}, act_type = '{}', name = '{}')".format( - IR_node.variable_name, - func_name, - self.parent_variable_name(IR_node), - act_type, - IR_node.name) - - return code - - def emit_BatchNorm(self, IR_node): - IR_node_after = self.IR_graph.get_son(IR_node.name, [0]) - if IR_node_after.type == 'Scale': - if self.weight_loaded: - weight_dict = self.weights[IR_node.name] - weight_dict_scale = self.weights[IR_node_after.name] - - # axis = IR_node.IR_layer.attr["axis"].i - axis = 1 - eps = IR_node.IR_layer.attr["epsilon"].f - momentum = IR_node.IR_layer.attr["momentum"].f - - fix_gamma = not IR_node.IR_layer.attr["scale"].b - - if self.weight_loaded: - if not fix_gamma: - # self.output_weights[IR_node.name + "_gamma"] = np.multiply(weight_dict['scale'], weight_dict_scale['scale']) - # self.output_weights[IR_node.name + "_beta"] = np.multiply(weight_dict['bias'], weight_dict_scale['scale']) + weight_dict_scale['bias'] - self.output_weights[IR_node.name + - "_gamma"] = weight_dict['scale'] - self.output_weights[IR_node.name + - "_beta"] = weight_dict['bias'] - - # not supported yet - use_global_stats = "False" - if self.weight_loaded: - self.output_weights[IR_node.name + - "_moving_var"] = weight_dict['var'] - self.output_weights[IR_node.name + - "_moving_mean"] = weight_dict['mean'] - - code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - axis, - eps, - momentum, - fix_gamma, - use_global_stats, - IR_node.name) - - return code - - else: - if self.weight_loaded: - weight_dict = self.weights[IR_node.name] - - # axis = IR_node.IR_layer.attr["axis"].i - axis = 1 - eps = IR_node.IR_layer.attr["epsilon"].f - momentum = IR_node.IR_layer.attr["momentum"].f - - fix_gamma = not IR_node.IR_layer.attr["scale"].b - - if self.weight_loaded: - if not fix_gamma: - self.output_weights[IR_node.name + - "_gamma"] = weight_dict['scale'] - self.output_weights[IR_node.name + - "_beta"] = weight_dict['bias'] - - # not supported yet - use_global_stats = "False" - if self.weight_loaded: - self.output_weights[IR_node.name + - "_moving_var"] = weight_dict['var'] - self.output_weights[IR_node.name + - "_moving_mean"] = weight_dict['mean'] - - code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - axis, - eps, - momentum, - fix_gamma, - use_global_stats, - IR_node.name) - - return code - - def emit_Scale(self, IR_node): - if self.weight_loaded: - weight_dict = self.weights[IR_node.name] - - # axis = IR_node.IR_layer.attr["axis"].i - axis = 1 - eps = 0.0 - momentum = 0.0 - - fix_gamma = not IR_node.IR_layer.attr["scale"].b - - if self.weight_loaded: - if not fix_gamma: - self.output_weights[IR_node.name + - "_gamma"] = weight_dict['scale'] - self.output_weights[IR_node.name + "_beta"] = weight_dict['bias'] - - # not supported yet - use_global_stats = "False" - if self.weight_loaded: - self.output_weights[IR_node.name + - "_moving_var"] = weight_dict['scale_var'] - self.output_weights[IR_node.name + - "_moving_mean"] = weight_dict['scale_mean'] - - code = "{:<15} = mx.sym.BatchNorm(data = {}, axis = {}, eps = {}, momentum = {}, fix_gamma = {}, use_global_stats = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - axis, - eps, - momentum, - fix_gamma, - use_global_stats, - IR_node.name) - - return code - - def emit_Pool(self, IR_node): - - global_pool = IR_node.IR_layer.attr["global_pooling"].b - - kernel = list() - if global_pool: - kernel = [1] * (len(IR_node.IR_layer.attr["strides"].list.i) - 2) - else: - for e in IR_node.IR_layer.attr["kernel_shape"].list.i[1:-1]: - kernel.append(e) - - pool_type = IR_node.get_attr('pooling_type').lower() - - stride = list() - for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: - stride.append(e) - - defuse_pad = False - pad = list() - if "pads" in IR_node.IR_layer.attr: - output_shape = list() - for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: - output_shape.append(e.size) - - # print("Warning: MXNet Pooling Layer pad does not match IR Pooling Layer pad") - defuse_pad, pad = MXNetEmitter.transfer_pad( - IR_node.IR_layer.attr["pads"].list.i) - code = "" - if not defuse_pad: - code += "{:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, pad={}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - global_pool, - tuple(kernel), - pool_type, - tuple(stride), - tuple(pad), - IR_node.name) - else: - code += self.set_pad(IR_node, code, pad, pool_type == "max") - code += "\n {:<15} = mx.sym.Pooling(data = {}, global_pool = {}, kernel={}, pool_type = '{}', stride={}, name = '{}')".format( - IR_node.variable_name, - IR_node.variable_name + "_pad", - global_pool, - tuple(kernel), - pool_type, - tuple(stride), - IR_node.name) - - return code - - def emit_SoftmaxOutput(self, IR_node): - - code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node) - ) - - return code - - def emit_Softmax(self, IR_node): - - code = "" - - if len(IR_node.out_edges) == 0: - code = "{:<15} = mx.sym.SoftmaxOutput(data = {}, name = 'softmax')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node)) - else: - axis = IR_node.IR_layer.attr["dim"].i - code = "{:<15} = mx.sym.softmax(data = {}, axis = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - axis, - IR_node.name) - - return code - - def emit_Squeeze(self, IR_node): - return self.emit_Flatten(IR_node) - - # def emit_ConvTranspose(self, IR_node): - # if self.weight_loaded: - # weight_dict = self.weights[IR_node.name] - # weights = weight_dict['weights'] - - # dim = len(IR_node.IR_layer.attr["kernel_shape"].list.i) - 2 - - # kernel = list() - # for idx in range(0, dim): - # kernel.append(IR_node.IR_layer.attr["kernel_shape"].list.i[idx]) - - # stride = list() - # for e in IR_node.IR_layer.attr["strides"].list.i[1:-1]: - # stride.append(e) - - # dilate = list() - # for e in IR_node.IR_layer.attr["dilations"].list.i[1:-1]: - # dilate.append(e) - # dilate = ', '.join('%s' % i for i in dilate) - - # defuse_pad = False - # pad = list() - # if "pads" in IR_node.IR_layer.attr: - # output_shape = list() - # for e in IR_node.IR_layer.attr["_output_shapes"].list.shape[0].dim: - # output_shape.append(e.size) - - # # print("Warning: MXNet Deconvolution Layer pad does not match IR Deconvolution Layer pad") - # defuse_pad, pad = MXNetEmitter.transfer_pad(IR_node.IR_layer.attr["pads"].list.i) - # pad = ', '.join('%s' % i for i in pad) - - # kernel = ', '.join('%s' % i for i in kernel) - # stride = ', '.join('%s' % i for i in stride) - - # num_filter = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] - # no_bias = not IR_node.IR_layer.attr["use_bias"].b - # if not no_bias and self.weight_loaded: - # self.output_weights[IR_node.replace_scope(IR_node.name) + "_bias"] = weight_dict['bias'] - - # # layout = IR_node.IR_layer.attr["data_format"].s - # if dim == 1: - # layout = 'NCW' - # elif dim == 2: - # layout = 'NCHW' - # elif dim == 3: - # layout = 'NCDHW' - - # if self.weight_loaded: - # # if layout not in MXNetEmitter.channels_last: - # weights = MXNetEmitter.transpose(weights, dim) - # self.output_weights[IR_node.replace_scope(IR_node.name) + "_weight"] = weights - - # code = "" - # if not defuse_pad: - # code = "{:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), pad = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format( - # IR_node.replace_scope(IR_node.name), - # IR_node.replace_scope(IR_node.in_edges[0]), - # kernel, - # stride, - # dilate, - # pad, - # num_filter, - # no_bias, - # layout, - # IR_node.replace_scope(IR_node.name)) - # else: - # code = self.set_pad(IR_node, code, pad) - # code += "\n {:<15} = mx.sym.Deconvolution(data = {}, kernel = ({}), stride = ({}), dilate = ({}), num_filter = {}, no_bias = {}, layout = '{}', name = '{}')".format( - # IR_node.replace_scope(IR_node.name), IR_node.replace_scope(IR_node.name) + "_pad", kernel, stride, dilate, num_filter, no_bias, layout, IR_node.replace_scope(IR_node.name)) - - # return code - - def emit_Embedding(self, IR_node): - - input_dim = IR_node.IR_layer.attr["input_dim"].i - output_dim = IR_node.IR_layer.attr["output_dim"].i - dtype = MXNetEmitter.dtype_map.get( - IR_node.layer.attr["dtype"].type, "float32") - - weight_dict = self.weights[IR_node.name] - - if self.weight_loaded: - self.output_weights[IR_node.name + - "_weight"] = weight_dict['weights'] - - code = "{:<15} = mx.sym.Embedding(data = {}, input_dim = {}, output_dim = {}, dtype = '{}', name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - input_dim, - output_dim, - dtype, - IR_node.name) - - return code - - def emit_LeakyRelu(self, IR_node): - alpha = IR_node.IR_layer.attr['alpha'].f - code = "{:<15} = mx.sym.LeakyReLU(data = {}, slope = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - alpha, - IR_node.name - ) - return code - - def emit_PRelu(self, IR_node): - slope = IR_node.get_attr('gamma') - code = "{:<15} = mx.sym.LeakyReLU(data = {}, slope = {}, act_type = '{}', name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - slope, - 'prelu', - IR_node.name - ) - return code - - def emit_Elu(self, IR_node): - alpha = IR_node.IR_layer.attr['alpha'].f - code = "{:<15} = mx.sym.LeakyReLU(data = {}, slope = {}, act_type = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - alpha, - 'elu', - IR_node.name - ) - return code - - def emit_Dropout(self, IR_node): - p = IR_node.IR_layer.attr["keep_prob"].f - mode = IR_node.IR_layer.attr["mode"].s.lower().decode( - ) if 'mode' in IR_node.layer.attr else 'training' - code = "{:<15} = mx.sym.Dropout(data = {}, p = {}, mode = '{}', name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - p, - mode, - IR_node.name) - - return code - - # reverse cannot support yet - - def emit_Reshape(self, IR_node): - shape = list() - for e in IR_node.IR_layer.attr["shape"].list.i: - shape.append(e) - shape = ', '.join('%s' % i for i in shape) - reverse = False - - code = "{:<15} = mx.sym.reshape(data = {}, shape = ({}), reverse = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - shape, - reverse, - IR_node.name) - - return code - - def emit_Flatten(self, IR_node): - # code = "{:<15} = mx.sym.transpose(data = {}, axes = (0, 2, 3, 1))\n".format("trans", self.parent_variable_name(IR_node)) - code = "{:<15} = mx.sym.flatten(data = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name) - - return code - - @staticmethod - def _convert_axis(IR_node, axis): - ndim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - if axis == 0: - return 0 - elif axis == ndim - 1: - return 1 - else: - return axis + 1 - - def emit_Concat(self, IR_node): - dim = MXNetEmitter._convert_axis( - IR_node, IR_node.IR_layer.attr["axis"].i) - code = "{:<15} = mx.sym.concat({}, dim = {}, name = '{}')".format( - IR_node.variable_name, - ', '.join(self.parent_variable_name( - IR_node, [idx]) for idx in range(len(IR_node.in_edges))), - dim, - IR_node.name) - - return code - - def emit_Cast(self, IR_node): - dtype = IR_node.IR_layer.attr["dtype"].type - code = "{:<15} = mx.sym.cast(data = {}, dtype = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - dtype, - IR_node.name) - - return code - - def emit_Expand_dims(self, IR_node): - axis = IR_node.IR_layer.attr["axis"].i - code = "{:<15} = mx.sym.expand_dims(data = {}, axis = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - axis, - IR_node.name) - - return code - - def emit_Pad(self, IR_node): - mode = IR_node.IR_layer.attr["mode"].s.lower().decode() - pad_width = list() - pad_width.extend([0]*4) - padding = convert_onnx_pad_to_tf(IR_node.get_attr("pads"))[1:-1] - for padding_pair in padding: - pad_width.extend(padding_pair) - - pad_width = ', '.join('%s' % i for i in pad_width) - - code = "{:<15} = mx.sym.pad(data = {}, mode = '{}', pad_width = ({}), name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - mode, - pad_width, - IR_node.name) - - return code - - def emit_Add(self, IR_node): - code = "{:<15} = mx.sym.broadcast_add({}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - - return code - - def emit_Mul(self, IR_node): - - code = "{:<15} = mx.sym.broadcast_mul({}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - - return code - - def emit_ReduceMean(self, IR_node): - axes = IR_node.layer.attr['axes'].list.i[:] - axes = ','.join('%s' % MXNetEmitter.transpose_map[i] for i in axes) - - code = "{:<15} = mx.sym.mean(data = {}, axis = ({}), keepdims = {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - axes, - IR_node.layer.attr['keepdims'].b) - - return code - - def emit_LRN(self, IR_node): - code = "{:<15} = mx.sym.LRN(data = {}, alpha = {}, beta = {}, knorm = {}, nsize = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.layer.attr['alpha'].f, - IR_node.layer.attr['beta'].f, - IR_node.layer.attr['k'].f, - IR_node.layer.attr['size'].i * 2 - 1, - IR_node.name) - - return code - - def emit_Constant(self, IR_node): - # save the constant into weight dict - if IR_node.get_attr('value'): - value = IR_node.get_attr('value') - else: - value = self.weights[IR_node.name]['value'] - - if not isinstance(value, list): - # mxnet's bug, it does not surpport scalar weight. - self.output_weights[IR_node.name + '_weight'] = [value] - code = "{:<15} = mx.sym.var(name = '{}', shape=(1,))".format( - IR_node.variable_name, IR_node.name+'_weight') - else: - shape = np.array(value).shape - self.output_weights[IR_node.name + '_weight'] = value - - code = "{:<15} = mx.sym.var(name = '{}', shape={})".format( - IR_node.variable_name, IR_node.name+'_weight', shape) - - return code - - def emit_Sub(self, IR_node): - code = "{:<15} = mx.sym.broadcast_sub({}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - - return code - - def emit_Relu6(self, IR_node): - codes = list() - codes.append(self.emit_Activation(IR_node, 'relu')) - old_name = IR_node.variable_name - IR_node.real_name = IR_node.real_name + "_clip" - codes.append("{:<15} = mx.sym.clip({}, a_min=0, a_max=6, name='{}')".format( - IR_node.real_variable_name, - old_name, - IR_node.real_name)) - - return codes - - def emit_Slice(self, IR_node): - - starts = IR_node.get_attr('starts') - starts = [starts[0], starts[-1]] + starts[1:-1] - ends = IR_node.get_attr('ends') - ends = [ends[0], ends[-1]] + ends[1:-1] - ends = [i if i else None for i in ends] - strides = IR_node.get_attr('strides') - if strides: - strides = [strides[0], strides[-1]] + strides[1:-1] - - code = "{:<15} = mx.sym.slice({}, begin={}, end={}, step={}, name='{}')".format( - IR_node.real_variable_name, - self.parent_variable_name(IR_node), - starts, - ends, - strides, - IR_node.name - ) - return code - - def emit_Const(self, IR_node): - pass - - def emit_Shape(self, IR_node): - code = "{:<15} = mx.sym.var(init = mx.init.Constant({}.infer_shape({}={})[1][0]), name='{}')".format( - IR_node.real_variable_name, - self.parent_variable_name(IR_node), - list(self.input_name_shape.keys())[0], - list(self.input_name_shape.values())[0], - IR_node.name - ) - return code - - def emit_Pack(self, IR_node): - pass - - def emit_Unsqueeze(self, IR_node): - axis = IR_node.get_attr('axes')[0] - code = "{:<15} = mx.sym.expand_dims(data = {}, axis = {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - axis, - IR_node.name) - - return code - - def emit_Unstack(self, IR_node): - squeeze_axis = axis = IR_node.get_attr('axis') - num = IR_node.get_attr('num') - if num is None: - args_str = "" - for input_name in self.IR_graph.input_layers: - if self.IR_graph.get_node(input_name).type != 'Const': - args_str += '{}={}, '.format(self.IR_graph.get_node( - input_name).real_variable_name, self.data_input_shape[input_name]) - - args_str = args_str[:-2] - num_outputs = "{}.infer_shape({})[1][0][{}]".format( - IR_node.variable_name, - args_str, - axis - ) - else: - num_outputs = num - - code = "{:<15} = mx.sym.split({}, num_outputs={}, axis={}, squeeze_axis={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - num_outputs, - axis, - squeeze_axis - ) - return code - - def emit_Fill(self, IR_node): - value = IR_node.get_attr('value') - code = "{:<15} = mx.sym.full({}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - value - ) - return code - - def emit_Split(self, IR_node): - axis = IR_node.get_attr('axis') - num_outputs = IR_node.get_attr('split') - - if isinstance(num_outputs, list): - raise NotImplementedError() - code = "{:<15} = mx.sym.split({}, num_outputs={}, axis={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - num_outputs, - axis) - - return code - - def emit_Sigmoid(self, IR_node): - code = "{:<15} = mx.sym.sigmoid(data={}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name - ) - return code - - def emit_Tanh(self, IR_node): - code = "{:<15} = mx.sym.tanh(data={}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name - ) - return code - - def emit_Maxmum(self, IR_node): - code = "{:<15} = mx.sym.maxmum({}, {}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1]), - IR_node.name - ) - return code - - def emit_Minimum(self, IR_node): - code = "{:<15} = mx.sym.minimum({}, {}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1]), - IR_node.name - ) - return code - - def emit_Scope(self, IR_node): - import re - pattern = IR_node.pattern - - if pattern not in self.naive_scope_pattern and re.sub(r'(_\d+)*$', '', IR_node.pattern) not in self.naive_scope_pattern: - origi_pattern = re.sub(r'(_\d+)*$', '', IR_node.pattern) - func = getattr(self, "_emit_" + origi_pattern) - code = func(IR_node) - else: - code = "{:<15} = __{}({})".format( - IR_node.real_variable_name, - IR_node.pattern, - ', '.join(self.parent_variable_name(IR_node, s) for s in IR_node.in_edges)) - self._gen_scope_code(IR_node) - return code - - def _gen_scope_code(self, scope_node): - - def _get_weight_related_op_name(node): - weight_related_ops = ['Constant', 'Conv', - 'FullyConnected', 'BatchNorm'] - op_type = node.type - if op_type in weight_related_ops: - return op_type, node.name - - def _scope_func(params, code, return_var): - code = """ - def __call__(self, {}): -{} - return {} - """.format(params, code, ', '.join(return_var)) - return code - - class_inits = dict() - - body_code = str() - for node_name in scope_node.topology_list: - node = self.IR_graph.get_node(node_name) - node_type = node.type - - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - line = func(node) - if line != None: - body_code += " " + line + '\n' - inits = _get_weight_related_op_name(node) - if inits: - if class_inits.get(inits[0], None): - class_inits[inits[0]].append(inits[1]) - else: - class_inits[inits[0]] = list([inits[1]]) - else: - print( - "MXNetEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(node) - - # param_code does not need parameter slice. - param_code = ', '.join('%s' % self.IR_graph.get_node( - s).real_variable_name for s in scope_node.in_edges) - function_code = _scope_func( - param_code, body_code, scope_node.return_variables) - - return class_inits, function_code - - def _emit_gru_cell(self, IR_node): - if not self.layers_codes.get(IR_node.pattern, None): - class_inits, func_code = self._gen_scope_code(IR_node) - variables, variable_codes, init_code, func_code = self.process_inits_func_code( - class_inits, func_code) - - states = [self.IR_graph.get_node( - s).real_variable_name for s in IR_node.in_edges] - states.pop(0) - states_code = ', '.join(states) - - class_code = ''' -class _{}(mx.rnn.BaseRNNCell): - def __init__(self, {}): - -{} - -{} - - '''.format(IR_node.pattern, - ', '.join(variables), - init_code, - func_code) - self.layers_codes[IR_node.pattern] = class_code - - if not hasattr(self, 'pattern_variables'): - self.pattern_variables = {IR_node.pattern: variables} - else: - self.pattern_variables[IR_node.pattern] = variables - - code = variable_codes - code.append("{:<15} = _{}({})({})".format( - IR_node.real_variable_name, - IR_node.pattern, - ', '.join(variables), - ', '.join(self.parent_variable_name(IR_node, s) for s in IR_node.in_edges))) - else: - code = "{:<15} = _{}({})({})".format( - IR_node.real_variable_name, - IR_node.pattern, - ', '.join(self.pattern_variables[IR_node.pattern]), - ', '.join(self.parent_variable_name(IR_node, s) for s in IR_node.in_edges)) - - return code - - def _emit_h_zero(self, IR_node): - code = "{:<15} = mx.sym.full((1, {}), {})".format( - IR_node.variable_name, - IR_node.get_attr('fill_size'), - IR_node.get_attr('fill_value') - ) - return code - - def _emit_lstm_cell(self, IR_node): - - if not self.layers_codes.get(IR_node.pattern, None): - class_inits, func_code = self._gen_scope_code(IR_node) - variables, variable_codes, init_code, func_code = self.process_inits_func_code( - class_inits, func_code) - - states = [self.IR_graph.get_node( - s).real_variable_name for s in IR_node.in_edges] - states.pop(0) - states_code = ', '.join(states) - - class_code = ''' -class _{}(mx.rnn.BaseRNNCell): - def __init__(self, {}): - -{} - -{} - - '''.format(IR_node.pattern, - ', '.join(variables), - init_code, - func_code) - self.layers_codes[IR_node.pattern] = class_code - - if not hasattr(self, 'pattern_variables'): - self.pattern_variables = {IR_node.pattern: variables} - else: - self.pattern_variables[IR_node.pattern] = variables - - code = variable_codes - code.append("{:<15} = _{}({})({})".format( - IR_node.real_variable_name, - IR_node.pattern, - ', '.join(variables), - ', '.join(self.parent_variable_name(IR_node, s) for s in IR_node.in_edges))) - else: - code = "{:<15} = _{}({})({})".format( - IR_node.real_variable_name, - IR_node.pattern, - ', '.join(self.pattern_variables[IR_node.pattern]), - ', '.join(self.parent_variable_name(IR_node, s) for s in IR_node.in_edges)) - - return code - - def process_inits_func_code(self, class_inits, func_code): - init_code = str() - variables = list() - variable_codes = list() - for k, v in class_inits.items(): - if k == 'FullyConnected': - for i, name in enumerate(class_inits[k]): - variable_name = self.IR_graph.get_node(name).variable_name - variables.append("W_" + variable_name) - variable_codes.append( - "W_{:<15} = mx.sym.var(name='{}_weight')".format(variable_name, name)) - init_code += " self.W_{} = W_{}\n".format( - variable_name, variable_name) - - if self.weight_loaded and self.weights[name].get('bias', None).any() != None: - variable_codes.append( - "B_{:<15} = mx.sym.var(name='{}_bias')".format(variable_name, name)) - variables.append("B_" + variable_name) - init_code += " self.B_{} = B_{}\n".format( - variable_name, variable_name) - func_code = func_code.replace("name = '{}'".format( - name), "name = '{}', weight = self.W_{}, bias = self.B_{}".format(name, variable_name, variable_name)) - else: - func_code = func_code.replace("name = '{}'".format( - name), "name = '{}', weight = self.W_{}".format(name, variable_name)) - elif k == 'Constant': - for name in class_inits[k]: - variable_name = self.IR_graph.get_node( - name.replace('_weight', '')).variable_name - variables.append(variable_name) - constant_line = self.emit_Constant( - self.IR_graph.get_node(name.replace('_weight', ''))) - variable_codes.append("{:<15} = {}".format( - variable_name, '='.join(constant_line.split('=')[1:]))) - init_code += " self.{} = {}\n".format( - variable_name, variable_name) - func_code = func_code.replace(constant_line, constant_line.split('=')[ - 0] + ' = self.'+constant_line.split('=')[0]) - else: - raise NotImplementedError - - return variables, variable_codes, init_code, func_code -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import os -import mxnet as mx -from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph - - -class MXNetGraphNode(GraphNode): - - def __init__(self, layer): - super(MXNetGraphNode, self).__init__(layer) - - if "attr" in layer: - self.attr = layer["attr"] - elif "param" in layer: - self.attr = layer["param"] - elif "attrs" in layer: - self.attr = layer["attrs"] - else: - self.attr = None - - @property - def name(self): - return self.layer["name"] - - @property - def type(self): - return self.layer["op"] - - @property - def mx_layer(self): - return self.layer - - def get_attr(self, name, default_value=None): - if self.attr: - if name in self.attr.keys(): - return self.attr.get(name) - else: - return default_value - else: - return default_value - - -class MXNetGraph(Graph): - - def __init__(self, model): - # sanity check non-sense always input module.Module - # if not (type(model) == mx.module.Module - # or type(model) == mx.module.SequentialModule - # or type(model) == mx.model) - # raise TypeError("MXNet layer of type %s is not supported." % type(model)) - - super(MXNetGraph, self).__init__(model) - - def build(self, json_data): - - self.input_layers = list() - input_dict = dict() # dict{layer_num, layer_name} - layer_num = -1 - - import re - - for layer in json_data: - - layer_num += 1 - # if layer["op"] == "null": - # continue - - if re.search("_(weight|bias|var|mean|gamma|beta|label)", layer["name"]) and layer["op"] == "null": - continue - - input_dict.update({layer_num: layer["name"]}) - self.layer_map[layer["name"]] = MXNetGraphNode(layer) - self.layer_name_map[layer["name"]] = layer["name"] - for input_layer in layer["inputs"]: - assert isinstance(input_layer, list) - if input_layer[0] in input_dict: - pred = input_dict.get(input_layer[0]) - - if pred not in self.layer_map: - new_node = dict( - {'op': 'NoOp', 'name': pred, 'inputs': list()}) - self.layer_map[pred] = MXNetGraphNode(new_node) - self.layer_name_map[pred] = pred - - self._make_connection(pred, layer["name"]) - - super(MXNetGraph, self).build() - - # raise NotImplementedError("Cannot support multi-input") -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import os -import sys -import math -import mxnet as mx -import numpy as np -from mmdnn.conversion.mxnet.mxnet_graph import MXNetGraph -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.parser import Parser -from mmdnn.conversion.common.utils import * - - -class MXNetParser(Parser): - - dtype_map = { - "int8": graph_pb2.DT_INT8, - "int16": graph_pb2.DT_INT16, - "int32": graph_pb2.DT_INT32, - "int64": graph_pb2.DT_INT64, - "uint8": graph_pb2.DT_UINT8, - "uint16": graph_pb2.DT_UINT16, - "uint32": graph_pb2.DT_UINT32, - "uint64": graph_pb2.DT_UINT64, - "float16": graph_pb2.DT_FLOAT16, - "float32": graph_pb2.DT_FLOAT32, - "float64": graph_pb2.DT_FLOAT64 - } - - activation_map = { - "relu": "Relu", - "sigmoid": "Sigmoid", - "tanh": "Tanh", - # Not support yet - # "softrelu" : "SoftReLU" - } - - channels_last = ['NDHWC', 'NHWC', 'NWC'] - channels_first = ['NCDHW', 'NCHW', 'NCW'] - - @property - def src_graph(self): - return self.mxnet_graph - - @staticmethod - def str2bool(v): - return v.lower() in ("1", "true") - - @staticmethod - def str2intList(v): - v = v.replace("(", "") - v = v.replace(")", "") - if v == "": - return list() - else: - return [int(s) for s in v.split(',')] - - @staticmethod - def transpose(data, dim): - if dim == 1: - data = data.transpose((2, 1, 0)) - elif dim == 2: - data = data.transpose((2, 3, 1, 0)) - elif dim == 3: - data = data.transpose((2, 3, 4, 1, 0)) - else: - print("Warning: The weight of dim {0} cannot transpose" % dim) - - return data - - @staticmethod - def _convert_axis(IR_node, axis): - ndim = len(IR_node.attr['_output_shapes'].list.shape[0].dim) - if axis == 0: - return 0 - elif axis == 1: - return ndim - 1 - else: - return axis - 1 - - def trace_shape(self, source_node, IR_node): - input_node = self.IR_layer_map[IR_node.input[0]] - while len(input_node.attr['_output_shapes'].list.shape[0].dim) <= 2: - IR_node = input_node - input_node = self.IR_layer_map[IR_node.input[0]] - - input_shape = list() - for e in input_node.attr["_output_shapes"].list.shape[0].dim: - input_shape.append(e.size) - C = input_shape.pop() - ret = [C] + input_shape[1:] - return ret - - def check_pad_mode(self, source_node, IR_node): - kernel = MXNetParser.str2intList(source_node.get_attr("kernel")) - dim = len(kernel) - - pad = source_node.get_attr("pad", "()") - if pad == "()": - pad = list([0] * dim) - else: - pad = MXNetParser.str2intList(pad) - - stride = source_node.get_attr("stride") - if stride == None: - stride = list([1] * dim) - else: - stride = MXNetParser.str2intList(stride) - - dilate = source_node.get_attr("dilate") - if dilate == None: - dilate = list([1] * dim) - else: - dilate = MXNetParser.str2intList(dilate) - - input_shape = list() - if len(source_node.in_edges) == 0 or IR_node.input[0] not in self.IR_layer_map: - input_shape = self.data_shape - else: - for e in self.IR_layer_map[IR_node.input[0]].attr["_output_shapes"].list.shape[0].dim: - input_shape.append(e.size) - - valid_flag = True - same_flag = True - - for i in range(dim): - if not pad[i] == 0: - valid_flag = False - output_shape = int(math.floor(float( - input_shape[i] + 2 * pad[i] - dilate[i] * (kernel[i] - 1) - 1) / float(stride[i])) + 1) - same_pad_shape = int( - math.ceil(float(input_shape[i]) / float(stride[i]))) - if not output_shape == same_pad_shape: - same_flag = False - - if valid_flag: - return "VALID" - elif same_flag: - return "SAME" - else: - return "None" - - @staticmethod - def _load_model(weights, epoch): - """Load a mxnet model from disk - - Parameters - ---------- - model_path: str - Path where the model network/params path is (json/params file) - - prefix: str - prefix for json file, e.g. prefix-symbol.json - - epoch: int - save epoch number - - Returns - ------- - model: A mxnet model - params: A pair of dictionaries each mapping parameter names to NDArray values - """ - - # Load the model network and weights - sym, arg_params, aux_params = mx.model.load_checkpoint( - weights, int(epoch)) - - # digraph = mx.viz.plot_network(sym, save_format='jpg') # For debugging - # digraph.render() - - model = mx.mod.Module(symbol=sym) - arg_params.update(aux_params) - return model, arg_params - - ''' - MXNet new api does not support load data without data_shapes - ''' - # model.bind(data_shapes = data_shapes) - # model.init_params() - - # mod.load(model_path, epoch_num) - # return mod.get_params() - - @staticmethod - def _load_json_file(model_path): - """Load a mxnet network json file - - Parameters - ---------- - model_path: str - Path where the model network/params path is (json/params file) - - (Deleted) - prefix: str - prefix for json file, e.g. prefix-symbol.json - - Returns - ------- - data["nodes"]: all the layer information(including weights, bias) with format - data["nodes"][layer_num][params = {"name", "op", "attr", "inputs"}] - - """ - import json - - # load the model network - with open(model_path, 'r') as data_file: - data = json.load(data_file) - - # adjust the data format - assert isinstance(data["nodes"], list) - return data["nodes"] - - def __init__(self, input_arg): - - super(MXNetParser, self).__init__() - - json_data = list() - self.data_shape = tuple() - # load model files into MXNet graph - # data_shape arguments added to calculate infer_shape(required) - # if isinstance(input_arg, basestring): - if len(input_arg) == 2: - with open(input_arg[0], 'r') as input_json: - json_string = input_json.read() - symbol = mx.sym.load_json(json_string) - self.model = mx.mod.Module(symbol=symbol) - json_data = MXNetParser._load_json_file(input_arg[0]) - self.data_shape = tuple([1] + list(map(int, input_arg[1]))) - - elif len(input_arg) == 4: - self.model, self.weight_data = MXNetParser._load_model( - input_arg[1], input_arg[2]) - json_data = MXNetParser._load_json_file(input_arg[0]) - self.weight_loaded = True - assert isinstance(input_arg[3], list) - self.data_shape = tuple([1] + list(map(int, input_arg[3]))) - - else: - raise ValueError( - "the # of input arguments [{}] is not supported" % len(input_arg)) - - # Build network graph - self.data_format = 'None' - self.mxnet_graph = MXNetGraph(self.model) - self.mxnet_graph.build(json_data) - - def gen_IR(self): - self.IR_layer_map = dict() - for layer in self.mxnet_graph.topological_sort: - current_node = self.mxnet_graph.get_node(layer) - node_type = current_node.type - - if hasattr(self, "rename_" + node_type): - func = getattr(self, "rename_" + node_type) - func(current_node) - - else: - self.rename_UNKNOWN(current_node) - - def _copy_and_reop(self, source_node, IR_node, new_op=None): - new_op = source_node.type if new_op == None else new_op - if source_node.name.startswith('_'): - source_node.real_name = source_node.name[1:] - IR_node.name = source_node.real_name - IR_node.op = new_op - self.IR_layer_map[IR_node.name] = IR_node - - def set_output_shape(self, source_node, IR_node): - sym_group = self.model.symbol.get_internals() - for sym in sym_group: - if source_node.name == sym.name: - arg_shape, output_shape, aux_shape = sym.infer_shape( - data=self.data_shape) - for idx in range(len(output_shape)): - output_list = list(output_shape[idx]) - - # transpose to channel last - if not self.data_format in MXNetParser.channels_last: - channel = output_list.pop(1) - output_list.append(channel) - - if IR_node.op == "DataInput": - MXNetParser._copy_shape( - IR_node, [-1] + output_list[1:]) - - shape = graph_pb2.TensorShape() - for dim in output_list: - new_dim = shape.dim.add() - if dim == None: - new_dim.size = -1 - else: - new_dim.size = dim - - IR_node.attr["_output_shapes"].list.shape.extend([shape]) - break - - def _convert_identity_operation(self, source_node, new_op=None): - IR_node = self.IR_graph.node.add() - - # name, op - self._copy_and_reop(source_node, IR_node, new_op) - - # input edge - self.convert_inedge(source_node, IR_node) - - # output shape - self.set_output_shape(source_node, IR_node) - - return IR_node - - def _defuse_padding(self, source_node): - IR_node = self.IR_graph.node.add() - IR_node.name = source_node.name + "_pad" - IR_node.op = "Pad" - # input edge - self.convert_inedge(source_node, IR_node) - - self.IR_layer_map[IR_node.name] = IR_node - - # attr - assign_IRnode_values(IR_node, {'mode': 'CONSTANT'}) - # print("Warning: MXNet symbol pad does not support channel last") - - pad = MXNetParser.str2intList(source_node.get_attr("pad")) - args['pads'] = [0, 0] - for e in pad: - args['pads'].extend([e, e]) - args['pads'] += [0, 0] - args['pads'] = convert_tf_pad_to_onnx(args['pads']) - IR_node.set_attrs(args) - - # IR_node.attr["pads"].list.i.extend([0, 0]) - # for e in pad: - # IR_node.attr["pads"].list.i.extend([e, e]) - # IR_node.attr["pads"].list.i.extend([0, 0]) - - IR_node.attr["constant_values"].f = 0. - - @staticmethod - def _copy_shape(IR_node, output_list): - if not output_list == None: - for dim in output_list: - new_dim = IR_node.attr["shape"].shape.dim.add() - if dim == None: - new_dim.size = -1 - else: - new_dim.size = dim - else: - IR_node.attr["shape"].shape.unknown_rank = True - - def rename_UNKNOWN(self, source_node): - print("Warning: MXNet Parser has not supported operator %s with name %s." - % (source_node.type, source_node.name)) - if source_node.type == "null" and source_node.name != 'label': - print( - "Warning: convert the null operator with name [%s] into input layer." % source_node.name) - IR_node = self.IR_graph.node.add() - - # name, op - self._copy_and_reop(source_node, IR_node, "DataInput") - - # input edge - self.convert_inedge(source_node, IR_node) - - self.set_output_shape(source_node, IR_node) - - else: - raise NotImplementedError() - - """ - Here start with Neural Network Symbol - """ - - def rename_Pad(self, source_node): - IR_node = self._convert_identity_operation(source_node) - kwargs = dict() - pad = MXNetParser.str2intList(source_node.get_attr("pad_width")) - pad += [pad.pop(2), pad.pop(3)] - kwargs['pads'] = pad - kwargs['pads'] = convert_tf_pad_to_onnx(kwargs['pads']) - kwargs['mode'] = 'CONSTANT' - assign_IRnode_values(IR_node, kwargs) - IR_node.attr["constant_values"].f = 0. - - def rename_FullyConnected(self, source_node): - IR_node = self._convert_identity_operation(source_node) - - # units - IR_node.attr["units"].i = int(source_node.get_attr("num_hidden")) - - # use bias (no_bias default = False) - IR_node.attr["use_bias"].b = not MXNetParser.str2bool( - source_node.get_attr("no_bias", "False")) - - # weights - if self.weight_loaded: - if self.data_format == 'NM': - self.set_weight(source_node.name, "weights", self.weight_data.get( - source_node.name + "_weight").asnumpy().transpose((1, 0))) - else: - weight = self.weight_data.get( - source_node.name + "_weight").asnumpy().transpose((1, 0)) - original_shape = weight.shape - - channel_first_list = self.trace_shape(source_node, IR_node) - dim = len(channel_first_list) + 1 - weight = weight.reshape( - channel_first_list + [original_shape[1]]) - assert dim > 2 - weight = weight.transpose(list(range(1, dim-1)) + [0, dim-1]) - weight = weight.reshape(original_shape) - self.set_weight(source_node.name, "weights", weight) - - if IR_node.attr["use_bias"].b: - self.set_weight(source_node.name, "bias", self.weight_data.get( - source_node.name + "_bias").asnumpy()) - - if not self.data_format == 'NM': - # print("Warning: Layer [{}] has changed model data format from [{}] to [NM]".format(source_node.name, self.data_format)) - self.data_format = 'NM' - - def rename_Convolution(self, source_node): - IR_node = self.IR_graph.node.add() - - # input edge - self.convert_inedge(source_node, IR_node) - - # output shape - self.set_output_shape(source_node, IR_node) - - dim = 0 - layout = 'None' - - # kernel_shape - kernel = MXNetParser.str2intList(source_node.get_attr("kernel")) - dim = len(kernel) - IR_node.attr["kernel_shape"].list.i.extend(kernel) - - layout = source_node.get_attr("layout") - if layout == None or layout == 'None': - if dim == 1: - layout = "NCW" - elif dim == 2: - layout = "NCHW" - elif dim == 3: - layout = "NCDHW" - - if not self.data_format == layout: - # print("Warning: Layer [{}] has changed model data format from [{}] to [{}]".format(source_node.name, self.data_format, layout)) - self.data_format = layout - - # groups - group = int(source_node.get_attr("num_group", "1")) - IR_node.attr["group"].i = group - in_channel = self.IR_layer_map[IR_node.input[0] - ].attr["_output_shapes"].list.shape[0].dim[-1].size - - if group == in_channel: - self._copy_and_reop(source_node, IR_node, "DepthwiseConv") - else: - self._copy_and_reop(source_node, IR_node, "Conv") - # in_channel = in_channel // group - - out_channel = int(source_node.get_attr("num_filter")) - - IR_node.attr["kernel_shape"].list.i.extend([in_channel, out_channel]) - - # use_bias (no_bias default = False) - IR_node.attr["use_bias"].b = not MXNetParser.str2bool( - source_node.get_attr("no_bias", "False")) - - # strides - strides = source_node.get_attr("stride") - IR_node.attr["strides"].list.i.append(1) - if not strides == None: - IR_node.attr["strides"].list.i.extend( - MXNetParser.str2intList(strides)) - else: - IR_node.attr["strides"].list.i.extend([1] * dim) - IR_node.attr["strides"].list.i.append(1) - - # dilations - dilate = source_node.get_attr("dilate") - IR_node.attr["dilations"].list.i.append(1) - if not dilate == None: - IR_node.attr["dilations"].list.i.extend( - MXNetParser.str2intList(dilate)) - else: - IR_node.attr["dilations"].list.i.extend([1] * dim) - IR_node.attr["dilations"].list.i.append(1) - - # data_format - assign_IRnode_values(IR_node, {'data_format': layout}) - - # padding - if "pad" in source_node.attr: - pad = MXNetParser.str2intList(source_node.get_attr("pad")) - IR_node.attr["pads"].list.i.extend(([0]+pad+[0])*2) - else: - IR_node.attr["pads"].list.i.extend([0, 0] * (dim + 2)) - - # weights - if self.weight_loaded: - weight = self.weight_data.get( - source_node.name + "_weight").asnumpy() - if not layout in MXNetParser.channels_last: - weight = MXNetParser.transpose(weight, dim) - if IR_node.op == "DepthwiseConv": - weight = weight.transpose(0, 1, 3, 2) - self.set_weight(source_node.name, "weights", weight) - - if IR_node.attr["use_bias"].b: - self.set_weight(source_node.name, "bias", self.weight_data.get( - source_node.name + "_bias").asnumpy()) - - def rename_Activation(self, source_node): - self._convert_identity_operation( - source_node, new_op=MXNetParser.activation_map[source_node.get_attr("act_type")]) - - def rename_BatchNorm(self, source_node): - IR_node = self._convert_identity_operation(source_node) - - # axis - if self.data_format in MXNetParser.channels_first or self.data_format == 'None': - IR_node.attr["axis"].i = MXNetParser._convert_axis( - IR_node, int(source_node.get_attr("axis", "1"))) - else: - IR_node.attr["axis"].i = int(source_node.get_attr("axis", "1")) - - # scale - IR_node.attr["scale"].b = not MXNetParser.str2bool( - source_node.get_attr("fix_gamma", "True")) - IR_node.attr["bias"].b = True - # epsilon - IR_node.attr["epsilon"].f = float(source_node.get_attr("eps", "0.001")) - - # momentum - IR_node.attr["momentum"].f = float( - source_node.get_attr("momentum", "0.9")) - - # weights - if self.weight_loaded: - # gamma - if IR_node.attr["scale"].b: - self.set_weight(source_node.name, "scale", self.weight_data.get( - source_node.name + "_gamma").asnumpy()) - - # beta - if IR_node.attr["bias"].b: - self.set_weight(source_node.name, "bias", self.weight_data.get( - source_node.name + "_beta").asnumpy()) - - # mean - self.set_weight(source_node.name, "mean", self.weight_data.get( - source_node.name + "_moving_mean").asnumpy()) - - # var - self.set_weight(source_node.name, "var", self.weight_data.get( - source_node.name + "_moving_var").asnumpy()) - - def rename_Pooling(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - self._copy_and_reop(source_node, IR_node, "Pool") - - # input edge - self.convert_inedge(source_node, IR_node) - - # pooling type (sum not allowed yet) - pool_type = source_node.get_attr("pool_type") - if pool_type == "sum": - print("Warning: sum pooling is not supported yet.") - elif pool_type == "max": - assign_IRnode_values(IR_node, {'pooling_type': 'MAX'}) - elif pool_type == "avg": - assign_IRnode_values(IR_node, {'pooling_type': 'AVG'}) - else: - raise ValueError("Error pool_type {}.".format(pool_type)) - - kernel_shape = MXNetParser.str2intList(source_node.get_attr("kernel")) - - if MXNetParser.str2bool(source_node.get_attr("global_pool", "False")): - - IR_node.attr['global_pooling'].b = True - IR_node.attr["kernel_shape"].list.i[:] = [ - 1] * (len(kernel_shape) + 2) - IR_node.attr["strides"].list.i[:] = [1] * (len(kernel_shape) + 2) - else: - IR_node.attr['global_pooling'].b = False - - # strides - strides = source_node.get_attr("stride") - IR_node.attr["strides"].list.i.append(1) - if not strides == None: - IR_node.attr["strides"].list.i.extend( - MXNetParser.str2intList(strides)) - IR_node.attr["strides"].list.i.append(1) - - # kernel_shape - IR_node.attr["kernel_shape"].list.i.append(1) - IR_node.attr["kernel_shape"].list.i.extend(kernel_shape) - IR_node.attr["kernel_shape"].list.i.append(1) - - # padding - if "pad" in source_node.attr: - pad = MXNetParser.str2intList(source_node.get_attr("pad")) - IR_node.attr["pads"].list.i.extend(([0]+pad+[0])*2) - else: - IR_node.attr["pads"].list.i.extend(([0])*8) - - # output shape - self.set_output_shape(source_node, IR_node) - - def rename_SoftmaxOutput(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - self._copy_and_reop(source_node, IR_node, "Softmax") - - # input edge - self.convert_inedge(source_node, IR_node) - - if "attr" in source_node.layer or "param" in source_node.layer: - print("Warning: SoftmaxOutput attrs are not supported in IR.") - - # output shape - self.set_output_shape(source_node, IR_node) - - def rename_softmax(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Softmax') - - # dim - if self.data_format in MXNetParser.channels_first or self.data_format == 'None': - IR_node.attr["dim"].i = MXNetParser._convert_axis( - IR_node, int(source_node.get_attr("axis", "-1"))) - else: - IR_node.attr["dim"].i = int(source_node.get_attr("axis", "-1")) - - # def rename_log_softmax(self, source_node): - # raise NotImplementedError("not support yet") - - # def rename_Correlation(self, source_node): - # raise NotImplementedError("not support yet") - - def rename_Deconvolution(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - self._copy_and_reop(source_node, IR_node, "ConvTranspose") - - # input edge - self.convert_inedge(source_node, IR_node) - - dim = 0 - layout = 'None' - - # padding - if "pad" in source_node.attr: - pad = MXNetParser.str2intList(source_node.get_attr("pad")) - IR_node.attr["pads"].list.i.extend(([0]+pad+[0])*2) - else: - IR_node.attr["pads"].list.i.extend([0, 0] * (dim + 2)) - - # output shape - self.set_output_shape(source_node, IR_node) - - # kernel_shape - kernel = MXNetParser.str2intList(source_node.get_attr("kernel")) - dim = len(kernel) - IR_node.attr["kernel_shape"].list.i.extend(kernel) - - layout = source_node.get_attr("layout") - if layout == None or layout == 'None': - if dim == 1: - layout = "NCW" - elif dim == 2: - layout = "NCHW" - elif dim == 3: - layout = "NCDHW" - - if not self.data_format == layout: - # print("Warning: Layer [{}] has changed model data format from [{}] to [{}]".format(source_node.name, self.data_format, layout)) - self.data_format = layout - - in_channel = self.IR_layer_map[IR_node.input[0] - ].attr["_output_shapes"].list.shape[0].dim[-1].size - - out_channel = int(source_node.get_attr("num_filter")) - - IR_node.attr["kernel_shape"].list.i.extend([out_channel, in_channel]) - - # use_bias (no_bias default = False) - IR_node.attr["use_bias"].b = not MXNetParser.str2bool( - source_node.get_attr("no_bias", "False")) - - # strides - strides = source_node.get_attr("strides") - IR_node.attr["strides"].list.i.append(1) - if not strides == None: - IR_node.attr["strides"].list.i.extend( - MXNetParser.str2intList(strides)) - else: - IR_node.attr["strides"].list.i.extend([1] * dim) - IR_node.attr["strides"].list.i.append(1) - - # dilations - dilate = source_node.get_attr("dilate") - IR_node.attr["dilations"].list.i.append(1) - if not dilate == None: - IR_node.attr["dilations"].list.i.extend( - MXNetParser.str2intList(dilate)) - else: - IR_node.attr["dilations"].list.i.extend([1] * dim) - IR_node.attr["dilations"].list.i.append(1) - - # data_format - IR_node.attr["data_format"].s = layout - - # groups - IR_node.attr["group"].i = int(source_node.get_attr("num_group", "1")) - - # weights - if self.weight_loaded: - weight = self.weight_data.get( - source_node.name + "_weight").asnumpy() - if not layout in MXNetParser.channels_last: - weight = MXNetParser.transpose(weight, dim) - self.set_weight(source_node.name, "weights", weight) - - if IR_node.attr["use_bias"].b: - self.set_weight(source_node.name, "bias", self.weight_data.get( - source_node.name + "_bias").asnumpy()) - - # def rename_RNN(self, source_node): - # raise NotImplementedError("RNN not support yet") - - def rename_Embedding(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - self._copy_and_reop(source_node, IR_node) - - # input edge - self.convert_inedge(source_node, IR_node) - - # input_dim - IR_node.attr["input_dim"].i = int(source_node.get_attr("input_dim")) - - # output_dim - IR_node.attr["output_dim"].i = int(source_node.get_attr("output_dim")) - - # dtype - IR_node.attr["dtype"].type = MXNetParser.dtype_map[source_node.get_attr( - "dtype", "float32")] - - # output shape - self.set_output_shape(source_node, IR_node) - - # IR only support elu and prelu from {'elu', 'leaky', 'prelu', 'rrelu'} - - def rename_LeakyReLU(self, source_node): - act_type = source_node.get_attr('act_type', None) - if act_type: - if not act_type == "elu" and not act_type == "prelu": - print("Warning: Activation Type %s is not supported yet." % act_type) - # return - - IR_node = self.IR_graph.node.add() - - # name, op - if act_type == 'prelu': - self._copy_and_reop(source_node, IR_node, "PRelu") - - # gamma - self.set_weight(source_node.name, "gamma", self.weight_data.get( - source_node.name + "_gamma").asnumpy()) - - else: # All other cases set to 'Elu' - self._copy_and_reop(source_node, IR_node, "Elu") - - # input edge - self.convert_inedge(source_node, IR_node) - - # alpha [exp(x) - alpha], but mxnet attr slope [slope*(exp(x) - 1)] when x < 0 - if "slope" in source_node.attr: - raise ValueError("Attribute Slope is not supported in IR format") - # IR_node.attr["alpha"].f = float() - - # output shape - self.set_output_shape(source_node, IR_node) - - # raise NotImplementedError("slope cannot convert to alpha") - - # def rename_InstanceNorm(self, source_node): - # raise NotImplementedError - - # def rename_L2Normalization(self, source_node): - # raise NotImplementedError - - def rename_LRN(self, source_node): - IR_node = self._convert_identity_operation(source_node) - - # alpha - IR_node.attr["alpha"].f = float( - source_node.get_attr("alpha", "0.0001")) - # beta - IR_node.attr["beta"].f = float(source_node.get_attr("beta", "0.75")) - # knorm - IR_node.attr["k"].f = float(source_node.get_attr("knorm", "2")) - # nsize - IR_node.attr["size"].i = float(source_node.get_attr["nsize"]) - - def rename_ROIPooling(self, source_node): - raise NotImplementedError() - - def rename_Dropout(self, source_node): - IR_node = self._convert_identity_operation(source_node) - - # keep_prob - IR_node.attr["keep_prob"].f = float(source_node.get_attr("p", "0.5")) - - # mode - assign_IRnode_values(IR_node, {'mode': 'training'}) - - """ - Here start with Symbol manipulation routines - """ - - # reverse cannot support yet - def rename_reshape(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Reshape') - - # old API target_shape not support yet - shape = source_node.get_attr("shape") - if not shape == None: - shape_list = MXNetParser.str2intList(shape) - for param in shape_list: - if param <= 0 and not param == -1: - raise ValueError( - "special value %d for Reshape is not pre-defined in IR." % param) - IR_node.attr["shape"].list.i.extend(shape_list) - - # output shape - self.set_output_shape(source_node, IR_node) - - # raise NotImplementedError("adjust output shape") - - def rename_Flatten(self, source_node): - self._convert_identity_operation(source_node, new_op='Flatten') - - def rename_Concat(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Concat') - - # dim - if self.data_format in MXNetParser.channels_first or self.data_format == 'None': - IR_node.attr["axis"].i = MXNetParser._convert_axis( - IR_node, int(source_node.get_attr("dim", "1"))) - else: - IR_node.attr["axis"].i = int(source_node.get_attr("dim", "1")) - - def rename_cast(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Cast') - - # dtype - IR_node.attr["dtype"].type = MXNetParser.dtype_map[source_node.get_attr( - "dtype")] - - # output shape - self.set_output_shape(source_node, IR_node) - - def rename_expand_dims(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - self._copy_and_reop(source_node, IR_node) - - # input edge - self.convert_inedge(source_node, IR_node) - - # output shape - self.set_output_shape(source_node, IR_node) - - # axis - if self.data_format in MXNetParser.channels_first or self.data_format == 'None': - IR_node.attr["axis"].i = MXNetParser._convert_axis( - IR_node, int(source_node.get_attr("axis"))) - else: - IR_node.attr["axis"].i = int(source_node.get_attr("axis")) - - def rename_elemwise_add(self, source_node): - self._convert_identity_operation(source_node, new_op='Add') - - def rename__Plus(self, source_node): - self._convert_identity_operation(source_node, new_op='Add') - - def rename_broadcast_add(self, source_node): - self._convert_identity_operation(source_node, new_op='Add') - - def rename_broadcast_mul(self, source_node): - self._convert_identity_operation(source_node, new_op='Mul') - - def rename__mul(self, source_node): - self._convert_identity_operation(source_node, new_op='Mul') - - def rename__copy(self, source_node): - self._convert_identity_operation(source_node) - # raise NotImplementedError("No matching IR api") - - def _convert_scalar_operator(self, source_node, new_op): - value = source_node.get_attr('scalar') - value_node = self.IR_graph.node.add() - value_node.name = source_node.real_name + "_second" - # left strip the "_" at the beginning of the name - # Issue #85, #135 - value_node.name = value_node.name.lstrip('_') - value_node.op = 'Constant' - self.set_weight(value_node.name, 'value', - np.array([value], np.float32)) - - IR_node = self._convert_identity_operation(source_node, new_op) - IR_node.input.append(value_node.name) - return IR_node - - def rename__mul_scalar(self, source_node): - self._convert_scalar_operator(source_node, 'Mul') - - def rename__minus_scalar(self, source_node): - self._convert_scalar_operator(source_node, 'Sub') - - def rename__copy(self, source_node): - source_node.real_name = self.get_parent( - source_node.name, [0]).real_name - - def rename_BlockGrad(self, source_node): - return -def save_model(MainModel, network_filepath, weight_filepath, dump_filepath): - model = MainModel.RefactorModel() - model = MainModel.deploy_weight(model, weight_filepath) - model.save_checkpoint(dump_filepath, 0) - print('MXNet checkpoint file is saved with prefix [{}] and iteration 0, generated by [{}.py] and [{}].'.format( - dump_filepath, network_filepath, weight_filepath)) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from mmdnn.conversion.common.DataStructure.emitter import Emitter -from mmdnn.conversion.common.IR.IR_graph import IRGraph -import os.path -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -import numpy as np -import sys - - -class OnnxEmitter(Emitter): - dtype_map = { - graph_pb2.DT_FLOAT32: "TensorProto.FLOAT" - } - - transpose_map = { - 1: 2, - 2: 3, - -1: 1 - } - - def __init__(self, architecture, weight): - super(OnnxEmitter, self).__init__() - if os.path.exists(architecture) == False: - raise ValueError( - "IR architecture file [{}] is not found.".format(architecture)) - else: - self.IR_graph = IRGraph(architecture) - self.IR_graph.build() - - if os.path.exists(weight) == False: - raise ValueError( - "IR weight file [{}] is not found.".format(weight)) - else: - self._load_weights(weight) - - @property - def header_code(self): - return """import numpy as np -from onnx import helper, TensorProto -import onnx - -__weights_dict = dict() - -def load_weights(weight_file): - if weight_file == None: - return - - try: - weights_dict = np.load(weight_file).item() - except: - weights_dict = np.load(weight_file, encoding='bytes').item() - - return weights_dict - - -def KitModel(weight_file = None): - global __weights_dict - __weights_dict = load_weights(weight_file) - -""" - - def gen_code(self, phase): - self.phase = phase - self.add_body(0, self.header_code) - - self.inputs = [] - self.outputs = [] - self.nodes = [] - self.initializer = [] - - for layer in self.IR_graph.topological_sort: - current_node = self.IR_graph.get_node(layer) - node_type = current_node.type - - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - func(current_node) - else: - print( - "OnnxEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(current_node) - - self._process_output_layers() - - self.add_body(1, "graph = helper.make_graph([{}], 'mmdnn', [{}], [{}], [{}])".format(', '.join(self.nodes), - ', '.join( - self.inputs), - ', '.join( - self.outputs), - ', '.join( - self.initializer)) - ) - self.add_body( - 1, "return helper.make_model(graph, opset_imports=[helper.make_opsetid('', 6)])") - return self.body_code - - def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): - super(OnnxEmitter, self).run(dstNetworkPath, dstWeightPath, phase) - self.save_weights(self.weights_dict, dstWeightPath) - - def check_if_need_transpose(self, IR_node): - parent = self.IR_graph.get_parent(IR_node.name, [0]) - while parent.type == 'Flatten' or parent.type == 'Dropout': - parent = self.IR_graph.get_parent(parent.name, [0]) - dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) - if dim > 2: - original_dims = self.weights_dict[IR_node.name]['weights'].shape - dims = [ - i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1] - self.weights_dict[IR_node.name]['weights'] = self.weights_dict[IR_node.name]['weights'] - self.weights_dict[IR_node.name]['weights'] = np.reshape( - self.weights_dict[IR_node.name]['weights'], dims) - self.weights_dict[IR_node.name]['weights'] = np.transpose( - self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) - self.weights_dict[IR_node.name]['weights'] = np.reshape( - self.weights_dict[IR_node.name]['weights'], original_dims) - - def _process_output_layers(self): - for name in self.IR_graph.output_layers: - IR_node = self.IR_graph.get_node( - self.IR_graph.get_node(name).real_name) - # omit node of some type - if IR_node.type == 'Shape' or IR_node.type == 'Pack': - continue - shape_str = IRGraph.shapeToStr( - IR_node.layer.attr["_output_shapes"].list.shape[0]) - if IR_node.layer.attr['dtype'].type == graph_pb2.DT_UNDEFINED: - IR_node.layer.attr['dtype'].type = graph_pb2.DT_FLOAT32 - dtype_str = self.dtype_map[IR_node.layer.attr['dtype'].type] - self.add_body(1, "{:<15} = helper.make_tensor_value_info('{}', {}, ({},))".format( - IR_node.variable_name + '_out', - IR_node.variable_name, - dtype_str, - shape_str)) - self.outputs.append(IR_node.variable_name + '_out') - - def emit_DataInput(self, IR_node): - shape = [dim.size if dim.size != - - 1 else 1 for dim in IR_node.IR_layer.attr["shape"].shape.dim] - shape_str = ', '.join('%s' % i for i in shape) - if IR_node.layer.attr['dtype'].type == graph_pb2.DT_UNDEFINED: - IR_node.layer.attr['dtype'].type = graph_pb2.DT_FLOAT32 - dtype_str = self.dtype_map[IR_node.layer.attr['dtype'].type] - self.add_body(1, "{:<15} = helper.make_tensor_value_info('{}', {}, ({},))".format( - IR_node.variable_name + '_orig', - IR_node.variable_name + '_orig', - dtype_str, - shape_str)) - self.add_body(1, "{:15} = helper.make_node('Transpose', inputs=['{}'], outputs=['{}'], perm=[0, 3, 1, 2])".format( - IR_node.variable_name, - IR_node.variable_name + '_orig', - IR_node.variable_name)) - self.inputs.append(IR_node.variable_name + '_orig') - self.nodes.append(IR_node.variable_name) - - def emit_Conv(self, IR_node): - kernel_shape = list(IR_node.get_attr('kernel_shape'))[:-2] - dilations = list(IR_node.get_attr( - 'dilations', [1] * (len(kernel_shape) + 2)))[1:-1] - group = IR_node.get_attr('group', 1) - if IR_node.type == 'DepthwiseConv': - group = IR_node.IR_layer.attr["kernel_shape"].list.i[-2] - self.weights_dict[IR_node.name]['weights'] = np.swapaxes( - self.weights_dict[IR_node.name]['weights'], -1, -2) - pads = IR_node.get_attr('pads') - pad_length = len(pads) - pads = pads[1:pad_length // 2 - 1] + \ - pads[pad_length // 2 + 1:pad_length - 1] - strides = list(IR_node.get_attr('strides'))[1:-1] - use_bias = IR_node.get_attr('use_bias') - self.add_body(1, "{:15} = __weights_dict['{}']['weights']".format( - IR_node.variable_name + '_weight_array', - IR_node.name)) - self.add_body(1, "{} = {}.transpose([3,2,0,1])".format( - IR_node.variable_name + '_weight_array', - IR_node.variable_name + '_weight_array')) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))".format( - IR_node.variable_name + '_weight', - IR_node.variable_name + '_weight', - IR_node.variable_name + '_weight_array', - IR_node.variable_name + '_weight_array', - IR_node.variable_name + '_weight_array')) - if use_bias: - self.add_body(1, "{:15} = __weights_dict['{}']['bias']".format( - IR_node.variable_name + '_bias_array', - IR_node.name)) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))".format( - IR_node.variable_name + '_bias', - IR_node.variable_name + '_bias', - IR_node.variable_name + '_bias_array', - IR_node.variable_name + '_bias_array', - IR_node.variable_name + '_bias_array')) - self.add_body(1, "{:15} = helper.make_node('Conv', inputs=['{}', '{}', '{}'],outputs=['{}'], dilations={}, group={}, kernel_shape={}, pads={}, strides={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name + '_weight', - IR_node.variable_name + '_bias', - IR_node.variable_name, - dilations, - group, - kernel_shape, - pads, - strides)) - self.nodes.append(IR_node.variable_name + '_bias') - else: - self.add_body(1, "{:15} = helper.make_node('Conv', inputs=['{}', '{}'],outputs=['{}'], dilations={}, group={}, kernel_shape={}, pads={}, strides={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name + '_weight', - IR_node.variable_name, - dilations, - group, - kernel_shape, - pads, - strides)) - self.nodes.append(IR_node.variable_name + '_weight') - self.nodes.append(IR_node.variable_name) - - def emit_BatchNorm(self, IR_node): - epsilon = IR_node.get_attr('epsilon') - if IR_node.get_attr('scale'): - self.add_body(1, "{:15} = __weights_dict['{}']['scale']".format( - IR_node.variable_name + '_scale_array', - IR_node.name)) - else: - self.add_body(1, "{:15} = np.ndarray(__weights_dict['{}']['bias'].shape, dtype=__weights_dict['{}']['bias'].dtype)".format( - IR_node.variable_name + '_scale_array', - IR_node.name, - IR_node.name)) - self.add_body(1, "{:15}.fill(1)".format( - IR_node.variable_name + '_scale_array')) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))".format( - IR_node.variable_name + '_scale', - IR_node.variable_name + '_scale', - IR_node.variable_name + '_scale_array', - IR_node.variable_name + '_scale_array', - IR_node.variable_name + '_scale_array')) - self.add_body(1, "{:15} = __weights_dict['{}']['bias']".format( - IR_node.variable_name + '_bias_array', - IR_node.name)) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))".format( - IR_node.variable_name + '_bias', - IR_node.variable_name + '_bias', - IR_node.variable_name + '_bias_array', - IR_node.variable_name + '_bias_array', - IR_node.variable_name + '_bias_array')) - self.add_body(1, "{:15} = __weights_dict['{}']['mean']".format( - IR_node.variable_name + '_mean_array', - IR_node.name)) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))".format( - IR_node.variable_name + '_mean', - IR_node.variable_name + '_mean', - IR_node.variable_name + '_mean_array', - IR_node.variable_name + '_mean_array', - IR_node.variable_name + '_mean_array')) - self.add_body(1, "{:15} = __weights_dict['{}']['var']".format( - IR_node.variable_name + '_var_array', - IR_node.name)) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))".format( - IR_node.variable_name + '_var', - IR_node.variable_name + '_var', - IR_node.variable_name + '_var_array', - IR_node.variable_name + '_var_array', - IR_node.variable_name + '_var_array')) - self.add_body(1, "{:15} = helper.make_node('BatchNormalization', inputs=['{}', '{}', '{}', '{}', '{}'],outputs=['{}'], epsilon={}, is_test={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name + '_scale', - IR_node.variable_name + '_bias', - IR_node.variable_name + '_mean', - IR_node.variable_name + '_var', - IR_node.variable_name, - epsilon, - 0 if self.phase == 'train' else 1)) - self.nodes.append(IR_node.variable_name + '_scale') - self.nodes.append(IR_node.variable_name + '_bias') - self.nodes.append(IR_node.variable_name + '_mean') - self.nodes.append(IR_node.variable_name + '_var') - self.nodes.append(IR_node.variable_name) - - def emit_Scale(self, IR_node): - dims = [ - i.size for i in IR_node.layer.attr['_output_shapes'].list.shape[0].dim[1:]] - units = dims[-1] - epsilon = 1e-5 - if IR_node.get_attr('scale'): - self.add_body(1, "{:15} = __weights_dict['{}']['scale']".format( - IR_node.variable_name + '_scale_array', - IR_node.name)) - else: - self.add_body(1, "{:15} = np.ndarray(__weights_dict['{}']['bias'].shape, dtype=__weights_dict['{}']['bias'].dtype)".format( - IR_node.variable_name + '_scale_array', - IR_node.name, - IR_node.name)) - self.add_body(1, "{:15}.fill(1)".format( - IR_node.variable_name + '_scale_array')) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))".format( - IR_node.variable_name + '_scale', - IR_node.variable_name + '_scale', - IR_node.variable_name + '_scale_array', - IR_node.variable_name + '_scale_array', - IR_node.variable_name + '_scale_array')) - self.add_body(1, "{:15} = __weights_dict['{}']['bias']".format( - IR_node.variable_name + '_bias_array', - IR_node.name)) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))".format( - IR_node.variable_name + '_bias', - IR_node.variable_name + '_bias', - IR_node.variable_name + '_bias_array', - IR_node.variable_name + '_bias_array', - IR_node.variable_name + '_bias_array')) - self.add_body(1, "{:15} = np.zeros({}, dtype=np.float32)".format( - IR_node.variable_name + '_mean_array', - units)) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))".format( - IR_node.variable_name + '_mean', - IR_node.variable_name + '_mean', - IR_node.variable_name + '_mean_array', - IR_node.variable_name + '_mean_array', - IR_node.variable_name + '_mean_array')) - self.add_body(1, "{:15} = np.ones({}, dtype=np.float32)".format( - IR_node.variable_name + '_var_array', - units)) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))".format( - IR_node.variable_name + '_var', - IR_node.variable_name + '_var', - IR_node.variable_name + '_var_array', - IR_node.variable_name + '_var_array', - IR_node.variable_name + '_var_array')) - self.add_body(1, "{:15} = helper.make_node('BatchNormalization', inputs=['{}', '{}', '{}', '{}', '{}'],outputs=['{}'], epsilon={}, is_test={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name + '_scale', - IR_node.variable_name + '_bias', - IR_node.variable_name + '_mean', - IR_node.variable_name + '_var', - IR_node.variable_name, - epsilon, - 0 if self.phase == 'train' else 1)) - self.nodes.append(IR_node.variable_name + '_scale') - self.nodes.append(IR_node.variable_name + '_bias') - self.nodes.append(IR_node.variable_name + '_mean') - self.nodes.append(IR_node.variable_name + '_var') - self.nodes.append(IR_node.variable_name) - - def emit_Relu(self, IR_node): - self.add_body(1, "{:15} = helper.make_node('Relu', inputs=['{}'], outputs=['{}'])".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name)) - self.nodes.append(IR_node.variable_name) - - def emit_Add(self, IR_node): - input_layers = ', '.join( - ("'" + self.IR_graph.get_parent(IR_node.name, [num]).real_variable_name) + "'" for num in - range(0, len(IR_node.in_edges))) - self.add_body(1, "{:15} = helper.make_node('Add', inputs=[{}], outputs=['{}'])".format( - IR_node.variable_name, - input_layers, - IR_node.variable_name)) - self.nodes.append(IR_node.variable_name) - - def emit_Pool(self, IR_node): - pooling_type = IR_node.get_attr('pooling_type') - if IR_node.layer.attr['global_pooling'].b: - if pooling_type == 'AVG': - self.add_body(1, "{:15} = helper.make_node('GlobalAveragePool', inputs=['{}'], outputs=['{}'])".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name)) - self.nodes.append(IR_node.variable_name) - else: - print("OnnxEmitter has not supported Global Pool type [%s]." % ( - pooling_type)) - self.emit_UNKNOWN(IR_node) - else: - if pooling_type in ['AVG', 'MAX']: - if pooling_type == 'AVG': - op_name = 'AveragePool' - elif pooling_type == 'MAX': - op_name = 'MaxPool' - kernel_shape = list(IR_node.get_attr('kernel_shape')[1:-1]) - pads = IR_node.get_attr('pads') - pad_length = len(pads) - pads = pads[1:pad_length // 2 - 1] + \ - pads[pad_length // 2 + 1:pad_length - 1] - strides = list(IR_node.get_attr('strides')[1:-1]) - self.add_body(1, "{:15} = helper.make_node('{}', inputs=['{}'],outputs=['{}'], kernel_shape={}, pads={}, strides={})".format( - IR_node.variable_name, - op_name, - self.parent_variable_name(IR_node), - IR_node.variable_name, - kernel_shape, - pads, - strides)) - self.nodes.append(IR_node.variable_name) - else: - print("OnnxEmitter has not supported Pool type [%s]." % ( - pooling_type)) - self.emit_UNKNOWN(IR_node) - - def emit_FullyConnected(self, IR_node): - self.check_if_need_transpose(IR_node) - use_bias = IR_node.get_attr('use_bias', True) - units = IR_node.get_attr('units') - - self.add_body(1, "{:15} = __weights_dict['{}']['weights']".format( - IR_node.variable_name + '_weight_array', - IR_node.name)) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))".format( - IR_node.variable_name + '_weight', - IR_node.variable_name + '_weight', - IR_node.variable_name + '_weight_array', - IR_node.variable_name + '_weight_array', - IR_node.variable_name + '_weight_array')) - if use_bias: - self.add_body(1, "{:15} = __weights_dict['{}']['bias']".format( - IR_node.variable_name + '_bias_array', - IR_node.name)) - else: - self.add_body(1, "{:15} = np.zeros({})".format( - IR_node.variable_name + '_bias_array', - units)) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))".format( - IR_node.variable_name + '_bias', - IR_node.variable_name + '_bias', - IR_node.variable_name + '_bias_array', - IR_node.variable_name + '_bias_array', - IR_node.variable_name + '_bias_array')) - self.add_body(1, "{:15} = helper.make_node('Gemm', inputs=['{}', '{}', '{}'],outputs=['{}'])".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name + '_weight', - IR_node.variable_name + '_bias', - IR_node.variable_name)) - self.nodes.append(IR_node.variable_name + '_weight') - self.nodes.append(IR_node.variable_name + '_bias') - self.nodes.append(IR_node.variable_name) - - def emit_Pad(self, IR_node): - mode = IR_node.layer.attr['mode'].s.decode() - pads = IR_node.get_attr('pads') - pad_length = len(pads) - pads = [0, 0] + pads[1:pad_length // 2 - 1] + \ - [0, 0] + pads[pad_length // 2 + 1:pad_length - 1] - self.add_body(1, "{:15} = helper.make_node('Pad', inputs=['{}'], outputs=['{}'], mode='{}', pads={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name, - mode, - pads)) - self.nodes.append(IR_node.variable_name) - - def emit_Concat(self, IR_node): - axis = IR_node.get_attr('axis') - 2 - inputs = ', '.join( - "'" + self.IR_graph.get_node(i).real_variable_name + "'" for i in IR_node.in_edges) - self.add_body(1, "{:15} = helper.make_node('Concat', inputs=[{}], outputs=['{}'], axis={})".format( - IR_node.variable_name, - inputs, - IR_node.variable_name, - axis)) - self.nodes.append(IR_node.variable_name) - - def emit_Flatten(self, IR_node): - self.add_body(1, "{:15} = helper.make_node('Flatten', inputs=['{}'], outputs=['{}'])".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name)) - self.nodes.append(IR_node.variable_name) - - def emit_Softmax(self, IR_node): - self.add_body(1, "{:15} = helper.make_node('Softmax', inputs=['{}'], outputs=['{}'])".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name)) - self.nodes.append(IR_node.variable_name) - - def emit_Constant(self, IR_node): - if IR_node.get_attr('value'): - value = 'np.array({}, dtype=np.float32)'.format( - IR_node.get_attr('value')) - self.add_body(1, "{:15} = {}".format( - IR_node.variable_name + '_value_array', - value)) - else: - self.add_body(1, "{:15} = __weights_dict['{}']['value']".format( - IR_node.variable_name + '_value_array', - IR_node.name)) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}.flatten().astype(float)))".format( - IR_node.variable_name, - IR_node.variable_name, - IR_node.variable_name + '_value_array', - IR_node.variable_name + '_value_array', - IR_node.variable_name + '_value_array')) - self.nodes.append(IR_node.variable_name) - - def emit_Sub(self, IR_node): - inputs = ', '.join( - "'" + self.IR_graph.get_node(i).real_variable_name + "'" for i in IR_node.in_edges) - self.add_body(1, "{:15} = helper.make_node('Sub', inputs=[{}], outputs=['{}'], broadcast=1)".format( - IR_node.variable_name, - inputs, - IR_node.variable_name)) - self.nodes.append(IR_node.variable_name) - - def emit_Mul(self, IR_node): - inputs = ', '.join( - "'" + self.IR_graph.get_node(i).real_variable_name + "'" for i in IR_node.in_edges) - - if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[IR_node.name]: - self.add_body(1, "{:15} = np.array([__weights_dict['{}']['weights']])".format( - IR_node.variable_name+'_weight_array', - IR_node.name - )) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))".format( - IR_node.variable_name + '_weight', - IR_node.variable_name + '_weight', - IR_node.variable_name + '_weight_array', - IR_node.variable_name + '_weight_array', - IR_node.variable_name + '_weight_array')) - inputs += ', '+''.join("'"+IR_node.variable_name + "_weight'") - self.nodes.append(IR_node.variable_name+'_weight') - - self.add_body(1, "{:15} = helper.make_node('Mul', inputs=[{}], outputs=['{}'], broadcast=1)".format( - IR_node.variable_name, - inputs, - IR_node.variable_name)) - self.nodes.append(IR_node.variable_name) - - def emit_Dropout(self, IR_node): - self.add_body(1, "{:15} = helper.make_node('Dropout', inputs=['{}'], outputs=['{}'], is_test={}, ratio={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name, - 0 if self.phase == 'train' else 1, - 1 - IR_node.get_attr('keep_prob'))) - self.nodes.append(IR_node.variable_name) - - def emit_Squeeze(self, IR_node): - IR_node.real_name = self.IR_graph.get_node( - IR_node.in_edges[0]).real_name - - def emit_ReduceMean(self, IR_node): - axes = IR_node.layer.attr['axes'].list.i[:] - axes = ','.join('%s' % OnnxEmitter.transpose_map[i] for i in axes) - self.add_body(1, "{:15} = helper.make_node('ReduceMean', inputs=['{}'], outputs=['{}'], axes=[{}], keepdims={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name, - axes, - 1 if IR_node.layer.attr['keepdims'].b else 0)) - self.nodes.append(IR_node.variable_name) - - def emit_Reshape(self, IR_node): - shape = [item if item != - - 1 else 1 for item in IR_node.get_attr('shape')] - if len(shape) == 4: - shape = [shape[i] for i in [0, 3, 1, 2]] - shape_str = ', '.join('%s' % i for i in shape) - self.add_body(1, "{:15} = np.array([{}], dtype=np.int64)".format( - IR_node.variable_name + '_shape_array', - shape_str - )) - self.add_body(1, "{:15} = helper.make_node('Constant', inputs=[], outputs=['{}'], value=helper.make_tensor(name='const_tensor', data_type=onnx.mapping.NP_TYPE_TO_TENSOR_TYPE[{}.dtype], dims={}.shape, vals={}))".format( - IR_node.variable_name + '_shape', - IR_node.variable_name + '_shape', - IR_node.variable_name + '_shape_array', - IR_node.variable_name + '_shape_array', - IR_node.variable_name + '_shape_array')) - self.add_body(1, "{:15} = helper.make_node('Reshape', inputs=['{}', '{}'], outputs=['{}'])".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name + '_shape', - IR_node.variable_name)) - self.nodes.append(IR_node.variable_name + '_shape') - self.nodes.append(IR_node.variable_name) - - def emit_LRN(self, IR_node): - alpha = IR_node.get_attr('alpha') - beta = IR_node.get_attr('beta') - bias = IR_node.get_attr('bias', 1.0) - size = IR_node.get_attr('size') * 2 - 1 - self.add_body(1, "{:15} = helper.make_node('LRN', inputs=['{}'], outputs=['{}'], alpha={}, beta={}, bias={}, size={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name, - alpha, - beta, - bias, - size)) - self.nodes.append(IR_node.variable_name) - - def emit_Relu6(self, IR_node): - self.add_body(1, "{:15} = helper.make_node('Clip', inputs=['{}'], outputs=['{}'], min=0.0, max=6.0)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name)) - self.nodes.append(IR_node.variable_name) - - def emit_DepthwiseConv(self, IR_node): - self.emit_Conv(IR_node) - - def emit_Slice(self, IR_node): - if self.IR_graph.get_parent(IR_node.name, [0]).type == 'Shape': - pass - else: - starts = IR_node.get_attr('starts') - starts = [starts[0], starts[-1]] + starts[1:-1] - ends = IR_node.get_attr('ends') - ends = [ends[0], ends[-1]] + ends[1:-1] - ends = [i if i != 0 else sys.maxsize for i in ends] - self.add_body(1, "{:15} = helper.make_node('Slice', inputs=['{}'], outputs=['{}'], starts={}, ends={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name, - starts, - ends)) - self.nodes.append(IR_node.variable_name) - - def emit_LeakyRelu(self, IR_node): - alpha = IR_node.get_attr('alpha') - self.add_body(1, "{:15} = helper.make_node('LeakyRelu', inputs=['{}'], outputs=['{}'], alpha={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name, - alpha)) - self.nodes.append(IR_node.variable_name) - - def emit_PRelu(self, IR_node): - slope = IR_node.get_attr('gamma') - self.add_body(1, "{:15} = helper.make_node('PRelu', inputs=['{}'], outputs=['{}'], slope={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name, - slope)) - self.nodes.append(IR_node.variable_name) - - def emit_SpaceToDepth(self, IR_node): - blocksize = IR_node.get_attr('blocksize') - self.add_body(1, "{:15} = helper.make_node('SpaceToDepth', inputs=['{}'], outputs=['{}'], blocksize={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.variable_name, - blocksize)) - self.nodes.append(IR_node.variable_name) - - def emit_UNKNOWN(self, IR_node): - print(IR_node.IR_layer.name) -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph -from onnx import onnx_pb2 - - -class ONNXGraphNode(GraphNode): - def __init__(self, layer): - super(ONNXGraphNode, self).__init__(layer) - self.weights = list() - self.inputs = list() - self.outputs = list() - - @property - def name(self): - return self.layer.name - - @property - def type(self): - return self.layer.op_type - - @property - def onnx_layer(self): - return self.layer - - -# node -# input -# edge(node a <-> node b) -# - -class ONNXGraph(Graph): - @staticmethod - def _generate_name(layer): - return "" - - def __init__(self, model): - super(ONNXGraph, self).__init__(model) - self._graph = model.graph - # key is edge name, value is src/dst node name - self._edge_src = dict() - self._edge_dst = dict() - # key is initializer name, value is TensorProto - self._weights = dict() - self._inputs = dict() - self._outputs = dict() - - def build(self): - for w in self._graph.initializer: - self._weights[w.name] = w - for s in self._graph.input: - self._inputs[s.name] = s - for s in self._graph.output: - self._outputs[s.name] = s - - for i, layer in enumerate(self._graph.node): - if not layer.name: - layer.name = '{0}_{1}'.format(layer.op_type, i) - name = layer.name - # print(name) - # print(layer.op_type) - node = ONNXGraphNode(layer) - self.layer_map[name] = node - self.layer_name_map[name] = name - for n in layer.input: - if n in self._weights: - # n is input data - node.weights.append(n) - if n in self._inputs: - node.inputs.append(n) - else: - # n is input edge - self._edge_dst[n] = name - if n in self._edge_src: - self._make_connection(self._edge_src[n], name) - for n in layer.output: - if n in self._outputs: - node.outputs.append(n) - else: - self._edge_src[n] = name - if n in self._edge_dst: - self._make_connection(name, self._edge_dst[n]) - - super(ONNXGraph, self).build() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from mmdnn.conversion.common.DataStructure.parser import Parser -from mmdnn.conversion.onnx.onnx_graph import ONNXGraph - - -class ONNXParser(Parser): - skip_type = set() - - @property - def src_graph(self): - return self.onnx_graph - - @staticmethod - def _load_model(model_file): - """Load a ONNX model file from disk - - Parameters - ---------- - model_file: str - Path where the model file path is (protobuf file) - - Returns - ------- - model: A ONNX protobuf model - """ - from onnx import onnx_pb2 - from mmdnn.conversion.common.IR.IR_graph import load_protobuf_from_file - - model = onnx_pb2.ModelProto() - load_protobuf_from_file(model, model_file) - - print("ONNX model file [%s] loaded successfully." % model_file) - return model - - def __init__(self, model_file): - super(ONNXParser, self).__init__() - - model = ONNXParser._load_model(model_file) - self.onnx_graph = ONNXGraph(model) - self.onnx_graph.build() - self.weight_loaded = True - - def rename_UNKNOWN(self, source_node): - if source_node.type in self.skip_type: - return - print("ONNX has not supported operator [%s] with name [%s]." - % (source_node.type, source_node.name)) - return - - def gen_IR(self): - # if node len(in_edges), generate additional DataInput node - - # print - for layer in self.src_graph.topological_sort: - current_node = self.src_graph.get_node(layer) - node_type = current_node.type - if hasattr(self, "rename_" + node_type): - func = getattr(self, "rename_" + node_type) - func(current_node) - else: - self.rename_UNKNOWN(current_node) -import onnx - - -def save_model(MainModel, network_filepath, weight_filepath, dump_filepath): - model = MainModel.KitModel(weight_filepath) - onnx.save(model, dump_filepath) - print('ONNX model file is saved as [{}], generated by [{}.py] and [{}].'.format( - dump_filepath, network_filepath, weight_filepath)) - -def Add(shapeA, shapeB, axis=None, broadcast=None): - # do not deal - return shapeA - - -def AveragePool(shape, auto_pad=None, kernelShape=None, pads=None, strides=None): - # I don't want to deal with auto_pad - - if kernelShape is None: - kernelShape = [2 for _ in range(2)] - - dim = len(kernelShape) - - if pads is None: - pads = [1 for _ in range(dim * 2)] - if strides is None: - strides = [1 for _ in range(dim)] - - retShape = shape[:-dim] - dimIdx = 0 - for dimSize in shape[-dim:]: - padUpper = pads[dimIdx * 2] - padLower = pads[dimIdx * 2 + 1] - stride = strides[dimIdx] - kernelDimSize = kernelShape[dimIdx] - retShape.append((dimSize + padUpper + padLower - - kernelDimSize) // stride + 1) - dimIdx = dimIdx + 1 - - return retShape - - -def BatchNormalization(shape, scale=None, B=None, mean=None, var=None): - return shape - - -def Concat(shapeList, axis): - newDimSize = sum([x[axis] for x in shapeList]) - newShape = shapeList[0] - newShape[axis] = newDimSize - return newShape - - -def Conv(shapeX, shapeW, auto_pad=None, dilations=None, group=None, kernel_shape=None, pads=None, strides=None): - # Don't support auto_pad current! - # 2018-02-28 - # if group is None: - # group = 1 - # group is not support yet too. - kernelDim = len(shapeX) - 2 - if kernel_shape is None: - # [[1 for _ in range(kernelDimSize)] for _ in range(kernelDimSize)] - kernel_shape = shapeW[2:] - if pads is None: - [0 for _ in range(kernelDim * 2)] - if strides is None: - [1 for _ in range(kernelDim)] - if pads is None: - pads = [0 for _ in range(kernelDim * 2)] - if strides is None: - strides = [1 for _ in range(kernelDim)] - if dilations is None: - dilations = [1 for _ in range(kernelDim)] - - retShape = [shapeX[0], shapeW[0]] - dimIdx = 0 - for dimSize in shapeX[2:]: - padUpper = pads[dimIdx * 2] - padLower = pads[dimIdx * 2 + 1] - stride = strides[dimIdx] - dilation = dilations[dimIdx] - kernelDimSize = (kernel_shape[dimIdx] - 1) // 2 * dilation * 2 + 1 - retShape.append((dimSize + padUpper + padLower - - kernelDimSize) // stride + 1) - dimIdx = dimIdx + 1 - return retShape - - -def GlobalAveragePool(shapeX): - return shapeX[:2] + [1, 1] - - -def MaxPool(shape, auto_pad=None, kernelShape=None, pads=None, strides=None): - return AveragePool(shape, auto_pad, kernelShape, pads, strides) - - -def Mul(shapeX, shapeW, axis=None, broadcast=None): - return shapeX - - -def Relu(shape): - return shape - - -def FC(shapeX, shapeW, shapeB=None, axis=None, axis_w=None): - if axis is None: - axis = 1 - if axis_w is None: - axis_w = 1 - return [shapeX[0], shapeW[1]] - - -def Flatten(shapeT, axis=None): - if axis is None: - axis = 1 - - firstDim = 1 - secondDim = 1 - for i in range(len(shapeT)): - if i < axis: - firstDim *= shapeT[i] - else: - secondDim *= shapeT[i] - - if (axis > 0): - return [firstDim, secondDim] - else: - return [secondDim] - - -inference_shape = { - 'Add': Add, - 'AveragePool': AveragePool, - 'BatchNormalization': BatchNormalization, - 'Concat': Concat, - 'Conv': Conv, - 'GlobalAveragePool': GlobalAveragePool, - 'MaxPool': MaxPool, - 'Mul': Mul, - 'Relu': Relu, - 'FC': FC, - 'Flatten': Flatten -} - - -def testByLeNet(image_shape): - print('\nLeNet output shape test:') - print('input_image_shape is : ', image_shape) - convLay1 = [5, 5] - WLay1 = [6, -1, 5, 5] - outputLay1 = inference_shape['Conv']( - image_shape, WLay1, kernel_shape=convLay1) - print('1st Lay output shape is : ', outputLay1) - - poolLay2 = [2, 2] - stridesLay2 = [2, 2] - outputLay2 = inference_shape['AveragePool']( - outputLay1, strides=stridesLay2) - print('2nd Lay output shape is : ', outputLay2) - - convLay3 = [5, 5] - WLay3 = [16, -1, 5, 5] - outputLay3 = inference_shape['Conv']( - outputLay2, WLay3, kernel_shape=convLay3) - print('3rd Lay output shape is : ', outputLay3) - - poolLay4 = [2, 2] - stridesLay4 = [2, 2] - outputLay4 = inference_shape['AveragePool']( - outputLay3, strides=stridesLay4) - print('4th Lay output shape is : ', outputLay4) - - convLay5 = [5, 5] - WLay5 = [120, -1, 5, 5] - outputLay5 = inference_shape['Conv'](outputLay4, WLay5) - print('5th Lay output shape is : ', outputLay5) - - outputLay5Flatten = inference_shape['Flatten'](outputLay5) - WLay6 = [-1, 84] - outputLay6 = inference_shape['FC'](outputLay5Flatten, WLay6) - print('6th Lay output shape is : ', outputLay6) - - WLay7 = [-1, 10] - outputLay7 = inference_shape['FC'](outputLay6, WLay7) - print('7th Lay output shape is : ', outputLay7) - return outputLay7 - - -if __name__ == '__main__': - - shape = [1, 9, 9, 9] - - print('input shape is : ', shape) - print('output shape is : ', AveragePool( - shape, pads=[1, 1, 1, 1], strides=[2, 2])) - print(inference_shape['AveragePool'](shape, pads=[ - 1, 1, 1, 1], kernelShape=[2, 2], strides=[2, 2])) - - print('input shape is : ', shape) - print('output shape is : ', AveragePool(shape, pads=[ - 0, 0, 0, 0], kernelShape=[3, 3], strides=[3, 3])) - - shape = [3, 9, 9] - print('input shape is : ', shape) - print('output shape is : ', AveragePool(shape)) - - x = [1, 1, 5, 5] - W = [1, 1, 3, 3] - print('input shapeX is :', x, 'input shapeW is :', W) - print('output shape is :', Conv(x, W), "without pads") - - W = [2, 1, 3, 3] - print('input shapeX is :', x, 'input shapeW is :', W) - print('output shape is :', Conv( - x, W, pads=[1, 1, 1, 1]), 'pads is [1, 1, 1, 1]') - - shape1 = [1, 1, 3, 3] - shape2 = [1, 3, 3, 3] - shape3 = [1, 5, 3, 3] - print('output shape is :', Concat([shape1, shape2, shape3], 1)) - - shape = [5, 5, 5, 5] - print("output shape is :", GlobalAveragePool(shape)) - - print('LeNet-5 output shape is : ', - testByLeNet(image_shape=[-1, 1, 32, 32])) - print('LeNet-5 output shape is : ', - testByLeNet(image_shape=[5, 1, 32, 32])) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- -import os -import paddle.v2 as paddle -import paddle.trainer_config_helpers.layers as layers -from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph - - -class PaddleGraphNode(GraphNode): - - def __init__(self, layer): - super(PaddleGraphNode, self).__init__(layer) - - @property - def name(self): - return self.layer.name - - @property - def type(self): - return self.layer.type - - @property - def paddle_layer(self): - return self.layer - - -class PaddleGraph(Graph): - - def __init__(self, model): - from paddle.proto import ModelConfig_pb2 - # sanity check. - if not isinstance(model, ModelConfig_pb2.ModelConfig): - raise TypeError( - "PaddlePaddle layer of type %s is not supported." % type(model)) - super(PaddleGraph, self).__init__(model) - self.model = model - - def build(self): - self.input_layers = list() - for layer in self.model.layers: - self.layer_map[layer.name] = PaddleGraphNode(layer) - self.layer_name_map[layer.name] = layer.name - - for input_layer in layer.inputs: - self._make_connection(input_layer.input_layer_name, layer.name) - - super(PaddleGraph, self).build() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import - -import os -import gzip -from six import string_types as _string_types -import paddle.v2 as paddle -import paddle.trainer_config_helpers.layers as layers -import numpy as np -from mmdnn.conversion.paddle.paddle_graph import PaddleGraph -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.parser import Parser -from mmdnn.conversion.common.utils import * - - -class PaddleParser(Parser): - - dtype_map = { - "float16": graph_pb2.DT_FLOAT16, - "float32": graph_pb2.DT_FLOAT32, - "float64": graph_pb2.DT_FLOAT64, - "int16": graph_pb2.DT_INT16, - "int32": graph_pb2.DT_INT32, - "int64": graph_pb2.DT_INT64, - "uint8": graph_pb2.DT_UINT8, - "uint16": graph_pb2.DT_UINT16 - } - - activation_map = { - "relu": "Relu", - 'softmax': "Softmax", - 'sigmoid': "Sigmoid", - "tanh": "Tanh", - "elu": "Elu", - "relu6": "Relu6", - 'softplus': 'Softplus', - 'softsign': 'Softsign', - 'hard_sigmoid': 'HardSigmoid' - } - - layer_map = { - "data": "InputLayer", - "exconv": "Conv", - "addto": "Add", - "batch_norm": "BatchNormalization", - "pool": "Pooling", - "fc": "Dense", - "norm": "LRN", - - - } - - def _load_model(self, model_network_path, model_weight_path): - """Load a paddle model from disk - - Parameters - ---------- - model_network_path: str - Path where the model network path is (json file) - - model_weight_path: str - Path where the model network weights are (hd5 file) - - Returns - ------- - model: A paddle model - """ - from paddle.proto import ModelConfig_pb2 - from mmdnn.conversion.common.IR.IR_graph import load_protobuf_from_file - - loaded_model = ModelConfig_pb2.ModelConfig() - load_protobuf_from_file(loaded_model, model_network_path) - - if model_weight_path: - if os.path.isfile(model_weight_path): - parameters = paddle.parameters.Parameters.from_tar( - gzip.open(model_weight_path, 'r')) - self.weight_loaded = True - print("Network file [{}] and [{}] is loaded successfully.".format( - model_network_path, model_weight_path)) - - else: - print("Warning: Weights File [%s] is not found." % ( - model_weight_path)) - - return loaded_model, parameters - - @property - def src_graph(self): - return self.paddle_graph - - def __init__(self, model): - super(PaddleParser, self).__init__() - - if isinstance(model, tuple): - model_network_path, model_weight_path = model - - # Build network graph - model, parameters = self._load_model( - model_network_path, model_weight_path) - self.paddle_graph = PaddleGraph(model) - self.paddle_graph.build() - self.parameters = parameters - self.shape_dict = dict() - - def gen_IR(self): - - for layer in self.paddle_graph.topological_sort: - current_node = self.paddle_graph.get_node(layer) - node_type = PaddleParser.layer_map[current_node.type] - if hasattr(self, "rename_" + node_type): - func = getattr(self, "rename_" + node_type) - func(current_node) - else: - print( - "PaddleParser has not supported operator [%s]." % (node_type)) - self.rename_UNKNOWN(current_node) - - @staticmethod - def _set_output_shape(source_node, IR_node, output_shapes): - shape = graph_pb2.TensorShape() - for output_shape in output_shapes: - new_dim = shape.dim.add() - new_dim.size = output_shape - IR_node.attr["_output_shapes"].list.shape.extend([shape]) - - @staticmethod - def _copy_and_reop(source_node, IR_node, new_op=None): - IR_node.name = source_node.name.lstrip('_') - IR_node.op = source_node.type if new_op == None else new_op - - if hasattr(source_node.layer, "dtype"): - IR_node.attr["dtype"].type = PaddleParser.dtype_map[source_node.layer.dtype] - - # PaddleParser._set_output_shape(source_node, IR_node) - - @staticmethod - def _copy_shape(source_node, target_node, output_shapes): - for dim in output_shapes: - new_dim = target_node.attr["shape"].shape.dim.add() - new_dim.size = dim - - @staticmethod - def _convert_dataformat(source_node, target_node): - if source_node.keras_layer.data_format == 'channels_last': - target_node.attr["data_format"].s = "NHWC" - elif source_node.keras_layer.data_format == 'channels_first': - target_node.attr["data_format"].s = "NCHW" - else: - print("Warning: [%s] don't have data format info." % - (source_node.keras_layer.name)) - - def _defuse_activation(self, source_node): - src_spec = source_node.layer - - IR_node = self.IR_graph.node.add() - IR_node.name = source_node.real_name.lstrip('_') + "_activation" - IR_node.op = PaddleParser.activation_map[src_spec.active_type.encode()] - IR_node.input.append(source_node.real_name.lstrip('_')) - - source_node.real_name = IR_node.name - return IR_node - - def _convert_merge(self, source_node, new_name=None): - IR_node = self.IR_graph.node.add() - - # name, op - PaddleParser._copy_and_reop(source_node, IR_node, new_name) - - # input edge - self.convert_inedge(source_node, IR_node) - - # For concat axis - if hasattr(source_node.layer, 'axis'): - IR_node.attr['axis'].i = -1 - return IR_node - - def rename_UNKNOWN(self, source_node): - print(source_node.layer.get_config()) - - # only for training - IR_node = self.IR_graph.node.add() - - # name, op - PaddleParser._copy_and_reop(source_node, IR_node) - - # input edge - self.convert_inedge(source_node, IR_node) - - def rename_Conv(self, source_node): - IR_node = self.IR_graph.node.add() - - # input edge - self.convert_inedge(source_node, IR_node) - - # layer and spec - conv_spec = source_node.layer - - spec = conv_spec.inputs[0].conv_conf - - # width <=> x or height <=> y - width = spec.filter_size - height = spec.filter_size_y if spec.HasField( - 'filter_size_y') else spec.filter_size - inputchannel = spec.channels - outputchannel = conv_spec.num_filters - stride_x = spec.stride - stride_y = spec.stride_y if spec.HasField('stride_y') else stride_x - padding_x = spec.padding - padding_y = spec.padding_y if spec.HasField('padding_y') else padding_x - dilation_x = spec.dilation - dilation_y = spec.dilation_y if spec.HasField( - 'dilation_y') else dilation_x - output_x = spec.output_x - output_y = spec.output_y if spec.HasField('output_y') else output_x - input_x = spec.img_size - input_y = spec.img_size_y if spec.HasField('img_size_y') else input_x - - # output shape - output_shapes = [-1, output_y, output_x, outputchannel] - self.shape_dict[source_node.name] = output_shapes - PaddleParser._set_output_shape(source_node, IR_node, output_shapes) - - kwargs = dict() - - if conv_spec.type == 'exconv' or 'cudnn_conv': - # name, op - PaddleParser._copy_and_reop(source_node, IR_node, "Conv") - else: - kwargs['isDeconvolution'] = True - PaddleParser._copy_and_reop(source_node, IR_node, "ConvTranspose") - - w_name = conv_spec.inputs[0].input_parameter_name - w = self.parameters.get(w_name) - - self.set_weight(IR_node.name, 'weights', w.reshape( - [outputchannel, inputchannel, height, width]).transpose([2, 3, 1, 0])) - - # it should be in the shape of height x width x inputchannel x outputchannel - - # use_bias: TODO - kwargs['use_bias'] = False - if conv_spec.HasField('bias_parameter_name'): - bias_name = conv_spec.bias_parameter_name - bias = self.parameters.get(bias_name).squeeze() - self.set_weight(IR_node.name, "bias", bias) - kwargs['use_bias'] = True - - kwargs['kernel_shape'] = [height, width, inputchannel, outputchannel] - - # pad_dim - pad_dim = [0, 0, padding_x, padding_y, padding_x, padding_y, 0, 0] - - # fail report because of auto_pad - # if dilation_x == 1 and dilation_y == 1: - # if output_x * stride_x == input_x and output_y * stride_y == input_y: - # auto_pad = "SAME" - # kwargs['auto_pad'] = auto_pad - # elif output_x * stride_x == input_x - width + 1 and output_y * stride_y == input_y - height + 1: - # auto_pad = "VALID" - # kwargs['auto_pad'] = auto_pad - - if input_x == output_x and input_y == output_y: - auto_pad = "SAME" - else: - auto_pad = "SAME" - - pad_dim = convert_tf_pad_to_onnx(pad_dim) - kwargs['pads'] = pad_dim - - kwargs['group'] = spec.groups - - kwargs['dilation'] = [1, dilation_x, dilation_y, 1] - - kwargs['strides'] = [1, stride_x, stride_y, 1] - - assign_IRnode_values(IR_node, kwargs) - - # defuse the activation layer - - if conv_spec.HasField('active_type') and conv_spec.active_type != '': - IR_node_act = self._defuse_activation(source_node) - PaddleParser._set_output_shape( - source_node, IR_node_act, output_shapes) - - def rename_BatchNormalization(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - PaddleParser._copy_and_reop(source_node, IR_node, "BatchNorm") - - # input edge - self.convert_inedge(source_node, IR_node) - - # layer and spec - bn_spec = source_node.layer - - # output shape - if bn_spec.inputs[0].HasField("image_conf"): - img_conf = bn_spec.inputs[0].image_conf - output_x = img_conf.img_size - output_y = img_conf.img_size_y if img_conf.HasField( - 'img_size_y') else output_x - outputchannel = img_conf.channels - - output_shapes = [-1, output_y, output_x, outputchannel] - self.shape_dict[source_node.name] = output_shapes - PaddleParser._set_output_shape(source_node, IR_node, output_shapes) - - IR_node.attr['scale'].b = True - IR_node.attr['bias'].b = bn_spec.HasField('bias_parameter_name') - - w_name = bn_spec.inputs[0].input_parameter_name - mean_name = bn_spec.inputs[1].input_parameter_name - var_name = bn_spec.inputs[2].input_parameter_name - bias_name = bn_spec.bias_parameter_name - - gamma = self.parameters.get(w_name) - mean = self.parameters.get(mean_name) - variance = self.parameters.get(var_name) - beta = self.parameters.get(bias_name) - - # channels_first, then axis = 1 - IR_node.attr['axis'].i = -1 - - # epsilon - IR_node.attr['epsilon'].f = bn_spec.epsilon - - # compute adjusted parameters - # Reference: parameter transformation https://github.com/apple/coremltools/issues/153 - f = 1.0 / np.sqrt(variance + bn_spec.epsilon) - gamma1 = gamma*f - beta1 = beta - gamma*mean*f - mean[:] = 0.0 # mean - variance[:] = 1.0 - .00001 # stddev - - # convert type because of tensorflow - gamma1 = gamma1.astype(np.float32) - beta1 = beta1.astype(np.float32) - mean = mean.astype(np.float32) - variance = variance.astype(np.float32) - - # flatten - gamma1 = gamma1.flatten() - beta1 = beta1.flatten() - mean = mean.flatten() - variance = variance.flatten() - - if IR_node.attr['scale'].b: - self.set_weight(IR_node.name, "scale", gamma1) - - if IR_node.attr['bias'].b: - self.set_weight(IR_node.name, "bias", beta1) - - # mean - self.set_weight(IR_node.name, "mean", mean) - - # var - self.set_weight(IR_node.name, "var", variance) - - # defuse the activation layer - - if bn_spec.HasField('active_type') and bn_spec.active_type != '': - IR_node_act = self._defuse_activation(source_node) - if bn_spec.inputs[0].HasField("image_conf"): - PaddleParser._set_output_shape( - source_node, IR_node_act, output_shapes) - - def rename_Pooling(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - PaddleParser._copy_and_reop(source_node, IR_node, "Pool") - - # input edge - self.convert_inedge(source_node, IR_node) - - # layer and spec - pool_spec = source_node.layer - spec = pool_spec.inputs[0].pool_conf - - # assert False - kwargs = dict() - - if spec.pool_type == 'max-projection': - kwargs['pooling_type'] = 'MAX' - elif spec.pool_type == 'avg-projection': - kwargs['pooling_type'] = 'AVG' - else: - kwargs['pooling_type'] = 'MAX' - - width = spec.size_x - height = spec.size_y if spec.HasField('size_y') else width - channel = spec.channels - stride_x = spec.stride - stride_y = spec.stride_y if spec.HasField('stride_y') else stride_x - padding_x = spec.padding - padding_y = spec.padding_y if spec.HasField('padding_y') else padding_x - output_x = spec.output_x - output_y = spec.output_y if spec.HasField('output_y') else output_x - input_x = spec.img_size - input_y = spec.img_size_y if spec.HasField('img_size_y') else input_x - - # output shape - output_shapes = [-1, output_y, output_x, channel] - self.shape_dict[source_node.name] = output_shapes - PaddleParser._set_output_shape(source_node, IR_node, output_shapes) - - kwargs['global_pooling'] = False - - kwargs['strides'] = [1, stride_x, stride_y, 1] - kwargs['kernel_shape'] = [1, width, height, 1] - - # pad_dim - pad_dim = [0, 0, padding_x, padding_y, padding_x, padding_y, 0, 0] - - # padding mode - # If padding == "SAME": output_spatial_shape[i] = ceil(input_spatial_shape[i] / strides[i]) - # If padding == "VALID": output_spatial_shape[i] = ceil((input_spatial_shape[i] - (spatial_filter_shape[i]-1) * dilation_rate[i]) / strides[i]). - - if output_x * stride_x == input_x and output_y * stride_y == input_y: - auto_pad = "SAME" - kwargs['auto_pad'] = auto_pad - elif output_x * stride_x == input_x - width + 1 and output_y * stride_y == input_y - height + 1: - auto_pad = "VALID" - kwargs['auto_pad'] = auto_pad - - pad_dim = convert_tf_pad_to_onnx(pad_dim) - kwargs['pads'] = pad_dim - - assign_IRnode_values(IR_node, kwargs) - - if pool_spec.HasField('active_type') and pool_spec.active_type != '': - IR_node_act = self._defuse_activation(source_node) - PaddleParser._set_output_shape( - source_node, IR_node_act, output_shapes) - - def rename_Dense(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - PaddleParser._copy_and_reop(source_node, IR_node, "FullyConnected") - - # input edge - self.convert_inedge(source_node, IR_node) - - # layer and spec - fc_spec = source_node.layer - - # units - IR_node.attr['units'].i = fc_spec.size - - # output shape - output_shapes = [-1, fc_spec.size] - self.shape_dict[source_node.name] = output_shapes - PaddleParser._set_output_shape(source_node, IR_node, output_shapes) - - # use_bias - IR_node.attr['use_bias'].b = fc_spec.HasField('bias_parameter_name') - - w_name = fc_spec.inputs[0].input_parameter_name - bias_name = fc_spec.bias_parameter_name - - w = self.parameters.get(w_name) - - bias = self.parameters.get(bias_name).flatten() - - # Kit weight tranpose - # weight: N x M -> C x H x W x M -> H x W x C x M -> N x M - if self.weight_loaded: - parent = self.src_graph.get_parent(source_node.name, [0]) - if len(self.shape_dict[parent.name]) == 4: - # - original_shape = w.shape - channel_first_list = self.shape_dict[parent.name][1:] - dim = len(channel_first_list) + 1 - weight = w.reshape(channel_first_list + [original_shape[1]]) - assert dim > 2 - weight = weight.transpose(list(range(1, dim-1)) + [0, dim-1]) - w = weight.reshape(original_shape) - if fc_spec.HasField('drop_rate'): - w = w * fc_spec.drop_rate - if IR_node.attr['use_bias'].b: - bias = bias * fc_spec.drop_rate - - # weights - self.set_weight(IR_node.name, 'weights', w) - if IR_node.attr['use_bias'].b: - self.set_weight(IR_node.name, 'bias', bias) - - if fc_spec.HasField('active_type') and fc_spec.active_type != '': - IR_node_act = self._defuse_activation(source_node) - PaddleParser._set_output_shape( - source_node, IR_node_act, output_shapes) - - def rename_Add(self, source_node): - add_spec = source_node.layer - self._convert_merge(source_node, 'Add') - if add_spec.HasField('active_type') and add_spec.active_type != '': - self._defuse_activation(source_node) - - def rename_InputLayer(self, source_node): - # need the shape TODO - - # only for training - IR_node = self.IR_graph.node.add() - - # name, op - PaddleParser._copy_and_reop(source_node, IR_node, "DataInput") - - # input edge - self.convert_inedge(source_node, IR_node) - - output_shapes = [-1, 224, 224, 3] - # shape - PaddleParser._copy_shape(source_node.layer, IR_node, output_shapes) - - def rename_LRN(self, source_node): - IR_node = self.IR_graph.node.add() - - # name, op - PaddleParser._copy_and_reop(source_node, IR_node, "LRN") - - # input edge - self.convert_inedge(source_node, IR_node) - - # layer and spec - lrn_spec = source_node.layer - spec = lrn_spec.inputs[0].norm_conf - channels = spec.channels - size = spec.size - alpha = spec.scale - beta = spec.pow - img_size_x = spec.img_size - img_size_y = spec.img_size_y if spec.HasField( - 'img_size_y') else img_size_x - output_x = spec.output_x - output_y = spec.output_y if spec.HasField('output_y') else output_x - - # output shape - output_shapes = [-1, output_y, output_x, channels] - self.shape_dict[source_node.name] = output_shapes - PaddleParser._set_output_shape(source_node, IR_node, output_shapes) - - # alpha - IR_node.attr["alpha"].f = alpha * size - # beta - IR_node.attr["beta"].f = beta - # nsize - IR_node.attr["size"].i = int((size+1)/2) - - if lrn_spec.HasField('active_type') and lrn_spec.active_type != '': - IR_node_act = self._defuse_activation(source_node) - PaddleParser._set_output_shape( - source_node, IR_node_act, output_shapes) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import os -import numpy as np -from six import string_types as _string_types -from mmdnn.conversion.common.IR.IR_graph import IRGraph, IRGraphNode -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.emitter import Emitter -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.rewriter.folder import Folder - - -class PytorchEmitter(Emitter): - - dtype_map = { - graph_pb2.DT_FLOAT16: "torch.float16", - graph_pb2.DT_FLOAT32: "torch.float32", - graph_pb2.DT_FLOAT64: "torch.float64", - graph_pb2.DT_INT16: "torch.int16", - graph_pb2.DT_INT32: "torch.int32", - graph_pb2.DT_INT64: "torch.int64", - graph_pb2.DT_UINT8: "torch.uint8", - graph_pb2.DT_UINT16: "torch.uint16" - } - - # Base Functions - def __init__(self, model): - super(PytorchEmitter, self).__init__() - if isinstance(model, _string_types): - network_path = model - else: - network_path = model[0] - weight_path = model[1] - - self.init_code = str() - self.IR_graph = IRGraph(network_path) - self.IR_graph.build() - self._load_weights(weight_path) - - folder = Folder(self.IR_graph, self.weights_dict) - folder.fold() - - def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): - super(PytorchEmitter, self).run(dstNetworkPath, dstWeightPath, phase) - if self.weight_loaded: - self.save_weights(self.weights_dict, dstWeightPath) - - def add_init(self, indent, codes): - if isinstance(codes, _string_types): - codes = [codes] - for code in codes: - self.init_code += (" " * indent) + code + '\n' - - def parent_variable_name(self, IR_node, path=[0], weight_type='weights'): - if not IR_node.in_edges and IR_node.name in self.weights_dict.keys(): - self.weights_dict[IR_node.name][weight_type] = self.weights_dict[IR_node.name][weight_type] - return "torch.from_numpy(__weights_dict['{}']['{}'])".format(IR_node.name, weight_type) - - return super(PytorchEmitter, self).parent_variable_name(IR_node, path) - - @property - def header_code(self): - return """import numpy as np -import torch -import torch.nn as nn -import torch.nn.functional as F -import math - -__weights_dict = dict() - -def load_weights(weight_file): - if weight_file == None: - return - - try: - weights_dict = np.load(weight_file).item() - except: - weights_dict = np.load(weight_file, encoding='bytes').item() - - return weights_dict - -class KitModel(nn.Module): -""" - - def gen_code(self, phase): - self.add_init(1, """ - def __init__(self, weight_file): - super(KitModel, self).__init__() - global __weights_dict - __weights_dict = load_weights(weight_file) -""") - - self.add_body(1, "def forward(self, x):") - - for layer in self.IR_graph.topological_sort: - current_node = self.IR_graph.get_node(layer) - node_type = current_node.type - - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - line = func(current_node) - if line: - self.add_body(2, line) - - else: - print( - "Pytorch Emitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(current_node) - - self.add_body(2, "return {}".format( - ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers if self.IR_graph.get_node(name).type != 'Pack']))) - - self.add_body(0, "") - for i in self.used_layers: - func = getattr(self, "_layer_" + i) - func() - - self.add_body(0, "") - for code in self.layers_codes.values(): - self.add_body(0, code) - - return self.header_code + '\n' + self.init_code + '\n' + self.body_code - - def _defuse_padding(self, IR_node, extra_str=""): - input_node = self.parent_variable_name(IR_node) - if IR_node.get_attr('auto_pad') == 'VALID': - return input_node - - if is_valid_padding(IR_node.get_attr("pads")) == True: - return input_node - - padding = self._convert_padding(IR_node) - input_node = IR_node.variable_name + '_pad' - self.add_body(2, "{:<15} = F.pad({}, {}{})".format( - input_node, - self.parent_variable_name(IR_node), - padding, - extra_str - )) - - return input_node - - def emit_Conv(self, IR_node): - self.used_layers.add('Conv') - - dim = len(IR_node.get_attr('strides')) - 2 - - in_channels = IR_node.get_attr('kernel_shape')[-2] - filter = IR_node.get_attr('kernel_shape')[-1] - kernel = IR_node.get_attr('kernel_shape')[:-2] - strides = IR_node.get_attr('strides')[1:-1] - - if IR_node.type == 'DepthwiseConv': - group = in_channels - filter *= group - - else: - group = IR_node.get_attr('group', 1) - - self.add_init(2, "self.{} = self.__conv({}, name='{}', in_channels={}, out_channels={}, kernel_size={}, stride={}, groups={}, bias={})".format( - IR_node.variable_name, - dim, - IR_node.name, - in_channels, - filter, - tuple(kernel), - tuple(strides), - # padding, - group, - IR_node.get_attr('use_bias'))) - - input_node = self._defuse_padding(IR_node) - - code = "{:<15} = self.{}({})".format( - IR_node.variable_name, - IR_node.variable_name, - input_node) - - if self.weight_loaded: - if IR_node.type == 'DepthwiseConv': - self.weights_dict[IR_node.name]['weights'] = np.swapaxes( - self.weights_dict[IR_node.name]['weights'], -1, -2) - self.weights_dict[IR_node.name]['weights'] = np.transpose( - self.weights_dict[IR_node.name]['weights'], [dim + 1, dim] + list(range(0, dim))) - - return code - - @staticmethod - def is_ceil_mode(pads): - lens = len(pads) - for i in range(lens // 2 + 1, lens - 1): - if pads[i] == pads[i - lens // 2]: - return False - else: - return True - - def emit_Pool(self, IR_node): - dim = len(IR_node.get_attr('strides')) - 2 - - if IR_node.get_attr('pooling_type') == "MAX": - pool_name = "max_pool{}d".format(dim) - # exstr = ", value=float('-Inf')" - elif IR_node.get_attr('pooling_type') == "AVG": - pool_name = "avg_pool{}d".format(dim) - # exstr = "" - else: - raise ValueError() - - if IR_node.layer.attr['global_pooling'].b: - code = "{:<15} = F.{}(input = {}, kernel_size = {}.size()[2:])".format( - IR_node.variable_name, - pool_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node) - ) - return code - - else: - if IR_node.get_attr('pooling_type') == "MAX": - # Change to padding defuse - input_node = self._defuse_padding( - IR_node, ", value=float('-inf')") - for e in IR_node.get_attr('dilations', []): - assert e == 1 - - pool_size = IR_node.get_attr('kernel_shape')[1:-1] - strides = IR_node.get_attr('strides')[1:-1] - - code = "{:<15} = F.{}({}, kernel_size={}, stride={}, padding={}, ceil_mode={})".format( - IR_node.variable_name, - pool_name, - input_node, - tuple(pool_size), - tuple(strides), - 0, - False - ) - return code - - elif IR_node.get_attr('pooling_type') == "AVG": - - for e in IR_node.get_attr('dilations', []): - assert e == 1 - - pool_size = IR_node.get_attr('kernel_shape')[1:-1] - strides = IR_node.get_attr('strides')[1:-1] - - padding = IR_node.get_attr('pads')[1:dim] - ceil_mode = self.is_ceil_mode(IR_node.get_attr('pads')) - - # input_node = self._defuse_padding(IR_node, exstr) - code = "{:<15} = F.{}({}, kernel_size={}, stride={}, padding={}, ceil_mode={}, count_include_pad=False)".format( - IR_node.variable_name, - pool_name, - self.parent_variable_name(IR_node), - tuple(pool_size), - tuple(strides), - tuple(padding), - ceil_mode - ) - return code - else: - raise ValueError() - - def emit_UNKNOWN(self, IR_node): - print(IR_node.name) - - def emit_DataInput(self, IR_node): - # Ignore it in Pytorch - IR_node.real_name = 'x' - - def emit_Dropout(self, IR_node): - code = "{:<15} = F.dropout(input = {}, p = {}, training = self.training, inplace = True)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.layer.attr["keep_prob"].f) - return code - - def check_if_need_transpose(self, IR_node): - parent = self.IR_graph.get_parent(IR_node.name, [0]) - while parent.type == 'Flatten' or parent.type == 'Dropout': - parent = self.IR_graph.get_parent(parent.name, [0]) - dim = len(parent.layer.attr['_output_shapes'].list.shape[0].dim) - if dim > 2: - original_dims = self.weights_dict[IR_node.name]['weights'].shape - dims = [ - i.size for i in parent.layer.attr['_output_shapes'].list.shape[0].dim[1:]] + [-1] - self.weights_dict[IR_node.name]['weights'] = np.reshape( - self.weights_dict[IR_node.name]['weights'], dims) - self.weights_dict[IR_node.name]['weights'] = np.transpose( - self.weights_dict[IR_node.name]['weights'], [dim - 2] + list(range(0, dim - 2)) + [dim - 1]) - self.weights_dict[IR_node.name]['weights'] = np.reshape( - self.weights_dict[IR_node.name]['weights'], original_dims) - - def emit_FullyConnected(self, IR_node): - self.used_layers.add(IR_node.type) - in_features = 1 - for i in self.IR_graph.get_parent(IR_node.name, [0]).layer.attr['_output_shapes'].list.shape[0].dim[1:]: - in_features *= i.size - - if IR_node.get_attr('in_features') != None: - in_features = IR_node.get_attr('in_features') - - self.add_init(2, "self.{} = self.__dense(name = '{}', in_features = {}, out_features = {}, bias = {})".format( - IR_node.variable_name, - IR_node.name, - in_features, - IR_node.layer.attr["units"].i, - IR_node.IR_layer.attr["use_bias"].b)) - - input_node = self.parent_variable_name(IR_node) - if len(self.IR_graph.get_parent(IR_node.name, [0]).get_attr('_output_shapes')[0].dim) > 2: - input_node = "{}.view({}.size(0), -1)".format(input_node, - input_node) - - code = "{:<15} = self.{}({})".format( - IR_node.variable_name, - IR_node.variable_name, - input_node) - - if self.weight_loaded: - self.check_if_need_transpose(IR_node) - self.weights_dict[IR_node.name]['weights'] = np.transpose( - self.weights_dict[IR_node.name]['weights'], (1, 0)) - - return code - - def emit_Flatten(self, IR_node): - parent = self.IR_graph.get_parent(IR_node.name, [0]).real_variable_name - code = "{:<15} = {}.view({}.size(0), -1)".format( - IR_node.variable_name, - parent, - parent) - return code - - def emit_Reshape(self, IR_node): - shape_list = IR_node.get_attr('shape') - shape_str = ','.join([str(int(i)) for i in shape_list]) - code = "{:<15} = torch.reshape(input = {}, shape = ({}))".format( - IR_node.variable_name, - self.IR_graph.get_node(IR_node.in_edges[0]).real_variable_name, - shape_str) - return code - - def emit_Tanh(self, IR_node): - code = "{:<15} = F.tanh({})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node, [0])) - return code - - def emit_Relu(self, IR_node): - code = "{:<15} = F.relu({})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node, [0])) - return code - - def emit_LeakyRelu(self, IR_node): - code = "{:<15} = F.leaky_relu({}, negative_slope={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node, [0]), - IR_node.get_attr('alpha')) - return code - - def emit_Relu6(self, IR_node): - code = "{:<15} = F.relu6({})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node, [0])) - return code - - def emit_Softmax(self, IR_node): - code = "{:<15} = F.softmax({})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node, [0])) - return code - - def emit_Sigmoid(self, IR_node): - code = "{:<15} = F.sigmoid({})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node) - ) - return code - - def emit_Embedding(self, IR_node): - self.used_layers.add("Embedding") - self.add_init(2, "self.{} = self.__embedding('{}', num_embeddings={}, embedding_dim={})".format( - IR_node.variable_name, - IR_node.name, - IR_node.get_attr('input_dim'), # 2-D - IR_node.get_attr('output_dim') - )) - - code = "{:<15} = self.{}({})".format( - IR_node.variable_name, - IR_node.variable_name, - "torch.LongTensor(np.array({}))".format( - self.parent_variable_name(IR_node)) - ) - return code - - def emit_RNNs(self, IR_node, func): - raise NotImplementedError() - # for Keras - if "dropout" in IR_node.IR_layer.attr: - dropout_str = ",dropout = {}, recurrent_dropout = {}".format( - IR_node.IR_layer.attr['dropout'].f, - IR_node.IR_layer.attr['recurrent_dropout'].f) - else: - dropout_str = "" - - code = "{:<15} = {}(units = {}, use_bias = {} {})({})".format( - IR_node.name, - func, - IR_node.IR_layer.attr['units'].i, - IR_node.IR_layer.attr['use_bias'].b, - dropout_str, - IR_node.in_edges[0]) - - return code - - def emit_LSTM(self, IR_node): - return self.emit_RNNs(IR_node, "LSTM") - - def emit_GRU(self, IR_node): - return self.emit_RNNs(IR_node, "GRU") - - def emit_Add(self, IR_node): - code = "{:<15} = {} + {}".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - return code - - def emit_Sub(self, IR_node): - code = "{:<15} = {}".format( - IR_node.variable_name, - ' - '.join(self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)))) - return code - - def emit_Mul(self, IR_node): - code = "{:<15} = {}".format( - IR_node.variable_name, - ' * '.join(self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)))) - return code - - def emit_MatMul(self, IR_node): - code = "{:<15} = torch.matmul({})".format( - IR_node.variable_name, - ' , '.join('%s' % self.IR_graph.get_node(s).real_variable_name for s in IR_node.in_edges)) - return code - - def emit_Constant(self, IR_node): - if IR_node.get_attr('value'): - value = IR_node.get_attr('value') - if not isinstance(value, list): - value = [value] - code = "self.{:<15} = torch.autograd.Variable(torch.Tensor({}), requires_grad=False)".format( - IR_node.variable_name, - value) - else: - code = "self.{:<15} = torch.autograd.Variable(torch.from_numpy(__weights_dict['{}']['value']), requires_grad=False)".format( - IR_node.variable_name, - IR_node.name) - - # self.add_init(2, "self.{:<15} = torch.from_numpy(__weights_dict['{}']['value'])".format( - # IR_node.variable_name, - # IR_node.name)) - IR_node.real_name = "self." + IR_node.variable_name - return code - - def _convert_axis(self, IR_node, axis): - ndim = len(self.IR_graph.get_parent( - IR_node.name, [0]).get_attr('_output_shapes')[0].dim) - if axis == 0: - return 0 - elif axis == ndim - 1: - return 1 - else: - return axis + 1 - - def emit_Concat(self, IR_node): - axis = self._convert_axis(IR_node, IR_node.get_attr('axis')) - code = "{:<15} = torch.cat(({}), {})".format( - IR_node.variable_name, - ', '.join(self.parent_variable_name( - IR_node, [idx]) for idx in range(len(IR_node.in_edges))), - axis, - ) - return code - - def emit_BatchNorm(self, IR_node): - self.used_layers.add(IR_node.type) - dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2 - - output_shape = IR_node.layer.attr['_output_shapes'].list.shape[0] - if IR_node.get_attr('data_format', "NHWC") == "NCHW": - num_features = output_shape.dim[1].size - else: - num_features = output_shape.dim[-1].size - - self.add_init(2, "self.{} = self.__batch_normalization({}, '{}', num_features={}, eps={}, momentum={})".format( - IR_node.variable_name, - dim, - IR_node.name, - num_features, - IR_node.layer.attr['epsilon'].f, - IR_node.layer.attr['momentum'].f, - )) - - code = "{:<15} = self.{}({})".format( - IR_node.variable_name, - IR_node.variable_name, - self.parent_variable_name(IR_node) - ) - return code - - def emit_Scale(self, IR_node): - self.used_layers.add(IR_node.type) - dim = len(IR_node.layer.attr['_output_shapes'].list.shape[0].dim) - 2 - - self.add_init(2, "self.{} = self.__scale({}, '{}', num_features={})".format( - IR_node.variable_name, - dim, - IR_node.name, - IR_node.layer.attr['_output_shapes'].list.shape[0].dim[-1].size - )) - - code = "{:<15} = self.{}({})".format( - IR_node.variable_name, - IR_node.variable_name, - self.parent_variable_name(IR_node) - ) - return code - - def emit_Squeeze(self, IR_node): - code = "{:<15} = torch.squeeze({})".format( - IR_node.variable_name, self.parent_variable_name(IR_node) - ) - return code - - @staticmethod - def _convert_padding(IR_node): - padding = IR_node.get_attr('pads') - padding = convert_onnx_pad_to_tf(padding)[1:-1] - new_padding = [] - for pad in padding: - new_padding.insert(0, pad) - return tuple(np.array(new_padding).reshape(-1).tolist()) - - def emit_Pad(self, IR_node): - if IR_node.get_attr('mode').lower() == 'constant': - mode = "mode = 'constant', value = {}".format(0) - elif IR_node.get_attr('mode').lower() == 'reflect': - mode = "mode = 'reflect'" - elif IR_node.get_attr('mode').upper() == 'SYMMETRIC': - mode = "mode = 'replicate'" - else: - assert False - - padding = self._convert_padding(IR_node) - code = "{:<15} = F.pad({}, {}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - padding, - mode) - return code - - def emit_ReduceMean(self, IR_node): - axes = [self._convert_axis(IR_node, x) - for x in IR_node.get_attr('axes')] - input_node = self.parent_variable_name(IR_node) - codes = [] - for axis in sorted(axes, reverse=True): - code = "{:<15} = torch.mean({}, {}, {})".format( - IR_node.variable_name, - input_node, - axis, - IR_node.get_attr("keepdims") - ) - codes.append(code) - input_node = IR_node.variable_name - return codes - - def emit_LRN(self, IR_node): - code = "{:<15} = F.local_response_norm({}, size={}, alpha={}, beta={}, k={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('size') * 2 - 1, - IR_node.get_attr('alpha'), - IR_node.get_attr('beta'), - IR_node.get_attr('k', 1) - ) - return code - - def emit_DepthwiseConv(self, IR_node): - return self.emit_Conv(IR_node) - - def emit_Const(self, IR_node): - if 'dtype' in IR_node.layer.attr: - dtype_str = "dtype={}".format( - self.dtype_map[IR_node.layer.attr['dtype'].type]) - if 'int' in dtype_str: - code = "{:<15} = torch.tensor({}, {})".format( - IR_node.variable_name, - IR_node.layer.attr['value'].i, - dtype_str) - else: - code = "{:<15} = torch.tensor({}, {})".format( - IR_node.variable_name, - IR_node.layer.attr['value'].f, - dtype_str) - - else: - dtype_str = "dtype=torch.float32" - code = "{:<15} = torch.tensor({}, {})".format( - IR_node.variable_name, - IR_node.layer.attr['value'].f, - dtype_str) - return code - - def emit_Shape(self, IR_node): - code = "{:<15} = torch.Tensor(list({}.size()))".format( - IR_node.variable_name, - self.parent_variable_name(IR_node) - ) - return code - - def emit_Pack(self, IR_node): - code = "{:<15} = {}".format( - IR_node.variable_name, - '[' + ','.join('%s' % self.IR_graph.get_node( - s).real_variable_name for s in IR_node.in_edges) + ']', - ) - return code - - def emit_Slice(self, IR_node): - starts = IR_node.get_attr('starts') - if len(starts) > 1: - starts = [starts[0], starts[-1]] + starts[1:-1] - ends = IR_node.get_attr('ends') - if len(ends) > 1: - ends = [ends[0], ends[-1]] + ends[1:-1] - extra_str = "" - for idx, _ in enumerate(starts): - if idx: - extra_str += ", " - extra_str += "{}:".format(starts[idx]) - if ends[idx]: - extra_str += "{}".format(ends[idx]) - - shrink_mask = IR_node.get_attr('shrink_axis_mask') - - if shrink_mask: - mask = [int(s) for s in bin(shrink_mask)[2:][::-1]] - shrink_str = '[' + ','.join(':' if bit == - 0 else '0' for bit in mask) + ']' - else: - shrink_str = '' - code = "{:<15} = {}[{}]{}".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - extra_str, - shrink_str - ) - return code - - def emit_Split(self, IR_node): - - if isinstance(IR_node.get_attr('split'), list): - split_str = IR_node.get_attr('split') - else: - num_split = IR_node.get_attr('split') - split_str = "math.ceil({}.shape[{}]/{})".format( - self.parent_variable_name(IR_node), - IR_node.get_attr('axis'), - num_split) - code = "{:<15} = torch.split({}, {}, dim={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - split_str, - IR_node.get_attr('axis'), - ) - return code - - def emit_Unstack(self, IR_node): - code = "{:<15} = torch.unbind({}, dim={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('axis') - ) - return code - - def emit_Fill(self, IR_node): - code = "{:<15} = torch.full({}.int().numpy().tolist(), {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('value') - ) - return code - - def emit_Gather(self, IR_node): - pass - - def emit_Unsqueeze(self, IR_node): - code = "{:<15} = {}.unsqueeze({})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('axes')[0] - ) - return code - - def emit_Transpose(self, IR_node): - code = "{:<15} = {}.permute({})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - return code - - def emit_Minimum(self, IR_node): - code = "{:<15} = torch.min({}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - return code - - def emit_Maxmum(self, IR_node): - code = "{:<15} = torch.max({}, {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1])) - return code - - def emit_Square(self, IR_node): - code = "{:<15} = {}.pow(2)".format( - IR_node.variable_name, - self.parent_variable_name(IR_node)) - return code - - def emit_PRelu(self, IR_node): - code = "{:<15} = F.prelu({}, torch.from_numpy(__weights_dict['{}']['weights']))".format( - IR_node.variable_name, - self.parent_variable_name(IR_node, [0]), - IR_node.name) - - if self.weight_loaded: - self.weights_dict[IR_node.name]['weights'] = self.weights_dict[IR_node.name]['gamma'] - - return code - - def emit_Cast(self, IR_node): - dstType = IR_node.get_attr('dstType') - - if dstType == 'float': - dst = 'torch.FloatTensor' - elif dstType == 'double': - dst = 'torch.DoubleTensor' - elif dstType == 'int': - dst = 'torch.IntTensor' - - code = "{:<15} = {}.type({})".format( - IR_node.real_variable_name, - self.parent_variable_name(IR_node), - dst) - - return code - - def emit_Scope(self, IR_node): - input_vars = [self.parent_variable_name( - IR_node, [idx]) for idx in range(len(IR_node.in_edges))] - code = "{:<15} = self.__{}({})".format( - IR_node.real_variable_name, - IR_node.pattern, - ', '.join(input_vars)) - self._gen_scope_code(IR_node) - return code - - def _gen_scope_code(self, scope_node): - - def _scope_func(scope_name, params, code, return_var): - code = """ - def __{}({}): -{} - return {} - """.format(scope_name, params, code, ', '.join(return_var)) - return code - - if not self.layers_codes.get(scope_node.pattern, None): - body_code = str() - for node_name in scope_node.topology_list: - node = self.IR_graph.get_node(node_name) - node_type = node.type - - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - line = func(node) - if line != None: - body_code += " " + line + '\n' - else: - print( - "PytorchEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(node) - - # param_code does not need parameter slice. - input_params = scope_node.input_params - input_params.insert(0, "self") - param_code = ', '.join(input_params) - function_code = _scope_func( - scope_node.pattern, param_code, body_code, scope_node.return_variables) - - self.layers_codes[scope_node.pattern] = function_code - - def _layer_Embedding(self): - self.add_body(0, """ - @staticmethod - def __embedding(name, **kwargs): - layer = nn.Embedding(**kwargs) #shape - layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) - return layer - """) - - def _layer_Conv(self): - self.add_body(0, """ - @staticmethod - def __conv(dim, name, **kwargs): - if dim == 1: layer = nn.Conv1d(**kwargs) - elif dim == 2: layer = nn.Conv2d(**kwargs) - elif dim == 3: layer = nn.Conv3d(**kwargs) - else: raise NotImplementedError() - - layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) - if 'bias' in __weights_dict[name]: - layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) - return layer""") - - def _layer_FullyConnected(self): - self.add_body(0, """ - @staticmethod - def __dense(name, **kwargs): - layer = nn.Linear(**kwargs) - layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['weights'])) - if 'bias' in __weights_dict[name]: - layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) - return layer""") - - def _layer_BatchNorm(self): - self.add_body(0, """ - @staticmethod - def __batch_normalization(dim, name, **kwargs): - if dim == 0 or dim == 1: layer = nn.BatchNorm1d(**kwargs) - elif dim == 2: layer = nn.BatchNorm2d(**kwargs) - elif dim == 3: layer = nn.BatchNorm3d(**kwargs) - else: raise NotImplementedError() - - if 'scale' in __weights_dict[name]: - layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale'])) - else: - layer.weight.data.fill_(1) - - if 'bias' in __weights_dict[name]: - layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) - else: - layer.bias.data.fill_(0) - - layer.state_dict()['running_mean'].copy_(torch.from_numpy(__weights_dict[name]['mean'])) - layer.state_dict()['running_var'].copy_(torch.from_numpy(__weights_dict[name]['var'])) - return layer""") - - def _layer_Scale(self): - self.add_body(0, """ - # from torch.nn.parameter import Parameter - - class _Scale(nn.Module): - - def __init__(self, num_features, affine=True): - super(KitModel._Scale, self).__init__() - self.num_features = num_features - self.affine = affine - - self.running_mean = torch.zeros(num_features) - self.running_var = torch.ones(num_features) - self.training = False - self.eps = 1e-5 - if self.affine: - self.weight = nn.Parameter(torch.Tensor(num_features)) - self.bias = nn.Parameter(torch.Tensor(num_features)) - else: - self.register_parameter('weight', None) - self.register_parameter('bias', None) - self.reset_parameters() - - - def reset_parameters(self): - if self.affine: - self.weight.data.uniform_() - self.bias.data.zero_() - - def _check_input_dim(self, input): - raise NotImplementedError - - def forward(self, input): - self._check_input_dim(input) - - return F.batch_norm( - input, self.running_mean, self.running_var, self.weight, self.bias, - self.training, - 0 , self.eps) - - - class Scale1d(_Scale): - - def _check_input_dim(self, input): - if input.dim() != 2 and input.dim() != 3: - raise ValueError('expected 2D or 3D input (got {}D input)' - .format(input.dim())) - - - - class Scale2d(_Scale): - - - def _check_input_dim(self, input): - if input.dim() != 4: - raise ValueError('expected 4D input (got {}D input)' - .format(input.dim())) - - - class Scale3d(_Scale): - - def _check_input_dim(self, input): - if input.dim() != 5: - raise ValueError('expected 5D input (got {}D input)' - .format(input.dim())) - - - @staticmethod - def __scale(dim, name, **kwargs): - if dim == 1: layer = KitModel.Scale1d(**kwargs) - elif dim == 2: layer = KitModel.Scale2d(**kwargs) - elif dim == 3: layer = KitModel.Scale3d(**kwargs) - else: raise NotImplementedError() - - if 'scale' in __weights_dict[name]: - layer.state_dict()['weight'].copy_(torch.from_numpy(__weights_dict[name]['scale'])) - else: - layer.weight.data.fill_(1) - - if 'bias' in __weights_dict[name]: - layer.state_dict()['bias'].copy_(torch.from_numpy(__weights_dict[name]['bias'])) - else: - layer.bias.data.fill_(0) - - return layer""") -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph -import torch -import torch.jit -import torch.autograd -import torch.serialization -import contextlib -from torch.jit import _unique_state_dict - - -class PytorchGraphNode(GraphNode): - - def __init__(self, layer): - self._name = layer.scopeName() - self._kind = layer.kind() - import re - node_id = re.search(r"[\d]+", layer.__str__()) - self.id = node_id.group(0) - - super(PytorchGraphNode, self).__init__(layer) - self.attrs = {k: layer[k] for k in layer.attributeNames()} - - self.weights_name = '.'.join( - re.findall(r'\[([\w\d.]+)\]', self._name) - ) - - @property - def name(self): - name = self._name + self.id - # Scopes created in a nested scope may have initial characters - # that are illegal as the initial character of an op name - # (viz. '-', '\', '/', and '_'). - name = name.replace('-', 'n').replace('\\', 'n').replace('/', - 'n').replace('_', 'n').replace('[', 'n').replace(']', 'n') - return name - - @property - def type(self): - return self._kind - - @property - def pytorch_layer(self): - return self.layer - - -class PytorchGraph(Graph): - - def __init__(self, model): - # sanity check. - super(PytorchGraph, self).__init__(model) - self.model = model - self.state_dict = _unique_state_dict(self.model) - self.shape_dict = dict() - - @staticmethod - def _optimize_graph(graph, aten, export_raw_ir=False): - # run dce first to eliminate dead parts of the graph that might have been - # left behind by things like symbolic_override - - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - - torch._C._jit_pass_peephole(graph) - torch._C._jit_pass_lint(graph) - if not export_raw_ir: - graph = torch._C._jit_pass_onnx(graph, aten) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_onnx_peephole(graph) - torch._C._jit_pass_lint(graph) - torch._C._jit_pass_dce(graph) - torch._C._jit_pass_lint(graph) - graph = torch._C._jit_pass_canonicalize(graph) - torch._C._jit_pass_lint(graph) - return graph - - @staticmethod - def get_node_id(node): - import re - node_id = re.search(r"[\d]+", node.__str__()) - return node_id.group(0) - - @contextlib.contextmanager - def set_training(self, model, mode): - r""" - A context manager to temporarily set the training mode of 'model' - to 'mode', resetting it when we exit the with-block. A no-op if - mode is None. - """ - if mode is None: - yield - return - old_mode = model.training - if old_mode != mode: - model.train(mode) - try: - yield - finally: - if old_mode != mode: - model.train(old_mode) - - def build(self, shape): - """ - build graph for pytorch 0.4.0 - """ - - import re - # construct graph - dummy_input = torch.autograd.Variable( - torch.randn(shape), requires_grad=False) - - with self.set_training(self.model, False): - trace, output = torch.jit.get_trace_graph( - self.model, (dummy_input, )) - - trace.set_graph(PytorchGraph._optimize_graph(trace.graph(), False)) - # nodes - nodes = list(trace.graph().nodes()) - - # input layer - # TODO - - # build each layer - for node in nodes: - - node_id = PytorchGraph.get_node_id(node) - node_scope = node.scopeName() - node_name = node_scope + node_id - node_name = node_name.replace('-', 'n').replace('\\', 'n').replace( - '/', 'n').replace('_', 'n').replace('[', 'n').replace(']', 'n') - output_shape_str = re.findall(r'[^()!]+', node.__str__())[1] - output_shape = [int(x.replace('!', '')) - for x in output_shape_str.split(',')] - - self.shape_dict[node_name] = output_shape - self.layer_map[node_name] = PytorchGraphNode(node) - self.layer_name_map[node_name] = node_name - - # input - for node_input in list(node.inputs()): - - if PytorchGraph.get_node_id(node_input.node()) and node_input.node().scopeName(): - node_input_name = node_input.node().scopeName( - ) + PytorchGraph.get_node_id(node_input.node()) - node_input_name = node_input_name.replace('-', 'n').replace('\\', 'n').replace( - '/', 'n').replace('_', 'n').replace('[', 'n').replace(']', 'n') - self._make_connection(node_input_name, node_name) - # print(node_input_name ,'->', node_name) - - super(PytorchGraph, self).build() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import os -import numpy as np -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.common.DataStructure.parser import Parser -from mmdnn.conversion.pytorch.pytorch_graph import PytorchGraph -import torch -import torchvision - - -class PytorchParser(Parser): - - layer_map = { - 'onnx::Conv': 'Conv', - 'onnx::Flatten': 'Flatten', - 'onnx::Gemm': 'FullyConnected', - 'onnx::MaxPool': 'Maxpool', - 'onnx::AveragePool': 'Avgpool', - 'onnx::Dropout': 'Dropout', - 'onnx::BatchNormalization': 'BatchNormalization', - 'onnx::Add': 'Add', - 'onnx::Concat': 'Concat', - 'onnx::Relu': 'Relu', - 'onnx::Tanh': 'Tanh', - 'onnx::Sigmoid': 'Sigmoid', - 'onnx::Mul': 'Mul' - - - # TODO - # 'max_pool2d': convert_maxpool, - # 'onnx::Mul': convert_elementwise_mul, - # 'onnx::Sub': convert_elementwise_sub, - # 'onnx::ConvTranspose': convert_convtranspose, - # 'onnx::LeakyRelu': convert_lrelu, - # 'onnx::Sigmoid': convert_sigmoid, - # 'onnx::Softmax': convert_softmax, - # 'onnx::Selu': convert_selu, - # 'onnx::Transpose': convert_transpose, - # 'onnx::Reshape': convert_reshape, - # 'onnx::MatMul': convert_matmul, - # 'onnx::Gather': convert_gather, - # 'onnx::ReduceSum': convert_reduce_sum, - # 'onnx::Constant': convert_constant, - # 'onnx::Upsample': convert_upsample, - # 'onnx::Pad': convert_padding, - } - - ############ - # property # - ############ - - @property - def src_graph(self): - return self.pytorch_graph - - #################### - # Public Functions # - #################### - - def __init__(self, model_file_name, input_shape): - super(PytorchParser, self).__init__() - if not os.path.exists(model_file_name): - print("Pytorch model file [{}] is not found.".format( - model_file_name)) - assert False - # test - - # cpu: https://github.com/pytorch/pytorch/issues/5286 - try: - model = torch.load(model_file_name) - except: - model = torch.load(model_file_name, map_location='cpu') - - self.weight_loaded = True - - # Build network graph - self.pytorch_graph = PytorchGraph(model) - self.input_shape = tuple([1] + input_shape) - self.pytorch_graph.build(self.input_shape) - self.state_dict = self.pytorch_graph.state_dict - self.shape_dict = self.pytorch_graph.shape_dict - - def gen_IR(self): - - for layer in self.src_graph.topological_sort: - current_node = self.src_graph.get_node(layer) - onnx_node_type = current_node.type - node_type = PytorchParser.layer_map[onnx_node_type] - - if hasattr(self, "rename_" + node_type): - func = getattr(self, "rename_" + node_type) - func(current_node) - - else: - self.rename_UNKNOWN(current_node) - - self.gen_Input() - - def _set_output_shape(self, source_node, IR_node): - - shape = graph_pb2.TensorShape() - - layer_name = source_node.name - - shape_pytorch = self.shape_dict[layer_name] - - new_dim = shape.dim.add() - - # (batch, C, H, W) & NHWC - if len(shape_pytorch) == 4: - - if shape_pytorch[0] == 1: - new_dim.size = -1 - else: - new_dim.size = shape_pytorch[0] - for index in [2, 3, 1]: - new_dim = shape.dim.add() - dim = shape_pytorch[index] - new_dim.size = dim if dim else -1 - elif len(shape_pytorch) == 2: - if shape_pytorch[0] == 1: - new_dim.size = -1 - else: - new_dim.size = shape_pytorch[0] - for _ in range(2): - new_dim = shape.dim.add() - new_dim.size = 1 - new_dim = shape.dim.add() - dim = shape_pytorch[1] - new_dim.size = dim if dim else -1 - - IR_node.attr["_output_shapes"].list.shape.extend([shape]) - - ########## - # Layers # - ########## - def rename_UNKNOWN(self, source_node): - print(source_node.layer) - print(source_node.layer.data.size()) - assert False - print("PyTorch parser has not supported operator [%s] with name [%s]." - % (source_node.type, source_node.name)) - - def gen_Input(self): - IR_node = self.IR_graph.node.add() - IR_node.name = 'input' - IR_node.op = "DataInput" - - for node in self.IR_graph.node: - if node.name in self.src_graph.input_layers: - node.input.append('input') - - assert len(self.input_shape) == 4 - new_dim = IR_node.attr["shape"].shape.dim.add() - if self.input_shape[0] == 1: - new_dim.size = -1 - else: - new_dim.size = self.input_shape[0] - for index in [2, 3, 1]: - new_dim = IR_node.attr["shape"].shape.dim.add() - new_dim.size = self.input_shape[index] - - shape = graph_pb2.TensorShape() - new_dim = shape.dim.add() - shape_pytorch = self.input_shape - - if len(shape_pytorch) == 4: - - if shape_pytorch[0] == 1: - new_dim.size = -1 - else: - new_dim.size = shape_pytorch[0] - for index in [2, 3, 1]: - new_dim = shape.dim.add() - dim = shape_pytorch[index] - new_dim.size = dim if dim else -1 - elif len(shape_pytorch) == 2: - if shape_pytorch[0] == 1: - new_dim.size = -1 - else: - new_dim.size = shape_pytorch[0] - for _ in range(2): - new_dim = shape.dim.add() - new_dim.size = 1 - new_dim = shape.dim.add() - dim = shape_pytorch[1] - new_dim.size = dim if dim else -1 - - IR_node.attr["_output_shapes"].list.shape.extend([shape]) - - def rename_Conv(self, source_node): - - attr = source_node.attrs - kwargs = dict() - - # dilation - if 'dilations' in attr: - kwargs['dilations'] = [1] + attr['dilations'] + [1] - else: - kwargs['dilations'] = [1] + [1, 1] + [1] - - if len(attr['pads']) == 4: - kwargs['pads'] = [0] + attr['pads'][0:2] + \ - [0, 0] + attr['pads'][2:] + [0] - elif len(attr['pads']) == 2: - kwargs['pads'] = ([0] + attr['pads'][0:2] + [0]) * 2 - - if 'strides' not in attr: - kwargs['strides'] = [1] + [1, 1] + [1] - else: - kwargs['strides'] = [1] + attr['strides'] + [1] - - kwargs['group'] = attr['group'] - - bias_name = '{0}.bias'.format(source_node.weights_name) - weights_name = '{0}.weight'.format(source_node.weights_name) - - weight = self.state_dict[weights_name] - - weight = weight.numpy() - dim = weight.ndim - 2 - - IR_node = self._convert_identity_operation(source_node, new_op="Conv") - weight = np.transpose(weight, list(range(2, dim + 2)) + [1, 0]) - - self.set_weight(source_node.name, 'weights', weight) - kwargs['kernel_shape'] = list(weight.shape) - - # handle bias - if bias_name in self.state_dict: - bias = self.state_dict[bias_name].numpy() - self.set_weight(source_node.name, 'bias', bias) - kwargs['use_bias'] = True - else: - kwargs['use_bias'] = False - - assign_IRnode_values(IR_node, kwargs) - - def rename_BatchNormalization(self, source_node): - # TODO - # output_shape - - IR_node = self._convert_identity_operation( - source_node, new_op="BatchNorm") - - attr = source_node.attrs - # epsilon - IR_node.attr['epsilon'].f = attr['epsilon'] - - bias_name = '{0}.bias'.format(source_node.weights_name) - weights_name = '{0}.weight'.format(source_node.weights_name) - mean_name = '{0}.running_mean'.format(source_node.weights_name) - var_name = '{0}.running_var'.format(source_node.weights_name) - - if bias_name in self.state_dict: - beta = self.state_dict[bias_name].numpy() - IR_node.attr['bias'].b = True - else: - IR_node.attr['bias'].b = False - - if weights_name in self.state_dict: - gamma = self.state_dict[weights_name].numpy() - IR_node.attr['scale'].b = True - else: - IR_node.attr['scale'].b = False - - mean = self.state_dict[mean_name].numpy() - variance = self.state_dict[var_name].numpy() - - if IR_node.attr['scale'].b: - self.set_weight(source_node.name, "scale", gamma) - - if IR_node.attr['bias'].b: - self.set_weight(source_node.name, "bias", beta) - - # mean - self.set_weight(source_node.name, "mean", mean) - - # var - self.set_weight(source_node.name, "var", variance) - - def rename_Relu(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op="Relu") - - def rename_Tanh(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op="Tanh") - - def rename_Sigmoid(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op="Sigmoid") - - def rename_Mul(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op="Mul") - - def rename_Maxpool(self, source_node): - attr = source_node.attrs - kwargs = dict() - kwargs['strides'] = [1] + attr['strides'] + [1] - if 'dilations' not in attr: - kwargs['dilations'] = [1] + [1, 1] + [1] - else: - kwargs['dilations'] = [1] + attr['dilations'] + [1] - kwargs['pads'] = [0] + attr['pads'][0:2] + \ - [0, 0] + attr['pads'][2:] + [0] - kwargs['kernel_shape'] = [1] + attr['kernel_shape'] + [1] - IR_node = self._convert_identity_operation(source_node, new_op="Pool") - - kwargs['pooling_type'] = 'MAX' - - assign_IRnode_values(IR_node, kwargs) - - def rename_Avgpool(self, source_node): - attr = source_node.attrs - kwargs = dict() - kwargs['strides'] = [1] + attr['strides'] + [1] - if 'dilations' not in attr: - kwargs['dilations'] = [1] + [1, 1] + [1] - else: - kwargs['dilations'] = [1] + attr['dilations'] + [1] - kwargs['pads'] = [0] + attr['pads'][0:2] + \ - [0, 0] + attr['pads'][2:] + [0] - kwargs['kernel_shape'] = [1] + attr['kernel_shape'] + [1] - IR_node = self._convert_identity_operation(source_node, new_op="Pool") - - kwargs['pooling_type'] = 'AVG' - - assign_IRnode_values(IR_node, kwargs) - - def rename_Flatten(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op="Flatten") - - def rename_FullyConnected(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op="FullyConnected") - - bias_name = '{0}.bias'.format(source_node.weights_name) - weights_name = '{0}.weight'.format(source_node.weights_name) - - W = self.state_dict[weights_name].numpy().transpose() - input_channels, output_channels = W.shape - - # Kit weight tranpose - # weight: N x M -> C x H x W x M -> H x W x C x M -> N x M - if self.weight_loaded: - parent = self.src_graph.get_parent(source_node.name, [0]) - while parent.type == 'onnx::Flatten' or parent.type == 'onnx::Dropout': - parent = self.src_graph.get_parent(parent.name, [0]) - if len(self.shape_dict[parent.name]) == 4: - # - original_shape = W.shape - channel_first_list = self.shape_dict[parent.name][1:] - dim = len(channel_first_list) + 1 - weight = W.reshape(channel_first_list + [original_shape[1]]) - assert dim > 2 - weight = weight.transpose(list(range(1, dim-1)) + [0, dim-1]) - W = weight.reshape(original_shape) - - # weights - self.set_weight(source_node.name, 'weights', W) - - # use_bias - if bias_name in self.state_dict: - IR_node.attr['use_bias'].b = True - bias = self.state_dict[bias_name].numpy() - self.set_weight(source_node.name, 'bias', bias) - else: - IR_node.attr['use_bias'].b = False - - # units - IR_node.attr['units'].i = output_channels - - def rename_Dropout(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Dropout') - IR_node.attr['keep_prob'].f = source_node.attrs['ratio'] - - def rename_Concat(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Concat') - - if source_node.attrs['axis'] == 1: - IR_node.attr['axis'].i = len(self.shape_dict[source_node.name]) - 1 - else: - IR_node.attr['axis'].i = source_node.attrs['axis'] - - def rename_Add(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Add') - - def rename_MaxPool2d(self, source_node): - self._convert_pooling(source_node) - - def rename_View(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Reshape') - assign_IRnode_values(IR_node, {'shape': list( - source_node.get_attr('new_sizes'))[1:]}) - - def rename_Addmm(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='FullyConnected') - kwargs = dict() - - # handle weight - weight = source_node.get_attr('next_functions')[ - 2][0].next_functions[0][0].variable.data.numpy() - weight = np.transpose(weight) - kwargs['units'] = weight.shape[1] - self.set_weight(source_node.name, 'weights', weight) - - # handle bias - if source_node.get_attr('next_functions')[0][0]: - bias = source_node.get_attr('next_functions')[ - 0][0].variable.data.numpy() - kwargs['use_bias'] = True - self.set_weight(source_node.name, 'bias', weight) - - assign_IRnode_values(IR_node, kwargs) - - print(IR_node) - - #################### - # Helper Functions # - #################### - - @staticmethod - def _copy_and_reop(source_node, IR_node, new_op=None): - if new_op == None: - new_op = source_node.type - IR_node.name = source_node.name - IR_node.op = new_op - - def _convert_identity_operation(self, source_node, in_edge_count=None, new_op=None): - IR_node = self.IR_graph.node.add() - PytorchParser._copy_and_reop(source_node, IR_node, new_op) - self.convert_inedge(source_node, IR_node, 0, in_edge_count) - self._set_output_shape(source_node, IR_node) - return IR_node - - def _convert_pooling(self, source_node): - kwargs = dict() - kwargs['strides'] = [1] + list(source_node.get_attr('stride')) + [1] - kwargs['dilations'] = [1] + \ - list(source_node.get_attr('dilation')) + [1] - kwargs['pads'] = ( - [0] + list(source_node.get_attr('padding')) + [0]) * 2 - kwargs['kernel_shape'] = [1] + \ - list(source_node.get_attr('kernel_size')) + [1] - IR_node = self._convert_identity_operation(source_node, new_op="Pool") - - if source_node.name.startswith('Max'): - kwargs['pooling_type'] = 'MAX' - elif source_node.name.startswith('Avg'): - kwargs['pooling_type'] = 'AVG' - else: - raise ValueError('Unknown pooling type') - - assign_IRnode_values(IR_node, kwargs) -import torch - - -def save_model(MainModel, network_filepath, weight_filepath, dump_filepath): - model = MainModel.KitModel(weight_filepath) - model.eval() - torch.save(model, dump_filepath) - print('PyTorch model file is saved as [{}], generated by [{}.py] and [{}]. Notice that you may need [{}.py] to load the model back.'.format( - dump_filepath, network_filepath, weight_filepath, network_filepath)) -import torchfile -import numpy as np - -model = torchfile.load('kit.model') - -weights = dict() - -params = ['weight', 'bias', 'running_mean', 'running_var'] - -recursive = ['conv_nets'] - - -def save_weight(name, node, level): - weights[name] = dict() - current_layer = weights[name] - for p in params: - if hasattr(node, p): - func = getattr(node, p) - arr = np.array(func) - if arr.ndim >= 1: - current_layer[p] = arr - print(" " * level + "{}.{} shape {} {}".format(name, - p, current_layer[p].shape, current_layer[p].dtype)) - - for p in recursive: - if hasattr(node, p): - func = getattr(node, p) - if func != None: - for idx, subnode in enumerate(func): - new_name = name + ":{}:{}".format(p, idx) - save_weight(new_name, subnode, level + 1) - - -for idx, data in enumerate(model.modules): - if data != None: - print("Find layer #{} : {}".format(idx, data._typename)) - if hasattr(data, 'search_flag'): - print(" name = {}".format(data.search_flag)) - if data.modules != None: - print(" submodule = {}#".format(len(data.modules))) - for idx_j, sub in enumerate(data.modules): - print(" layer [{}]".format(sub._typename)) - name = data.search_flag + ":" + str(idx_j) - save_weight(name, sub, 2) - print("\n") - else: - pass - #print (dir(data)) - - print("\n") - -with open("stylebank.npy", 'wb') as of: - np.save(of, weights) - -print("-------------------------------------------------") - -load_weight = np.load('stylebank.npy').item() -for i in load_weight: - #print (i) - for j in load_weight[i]: - pass - #print (" {} with shape {}".format(j, load_weight[i][j].shape)) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from mmdnn.conversion.common.IR.IR_graph import * -import sys -import re -import numpy as np -import collections - - -class Folder(object): - """A floder to fold a graph' nodes which has same scope into one node.""" - - def __init__(self, graph, weights_dict, init_level=0, fold_level_num=0, scope_names=None): - """ - Initializes a Folder. - - Args: - graph: the graph to be folded. - init_level: the start scope level to be folded. - fold_level_num: the number of level from init_level to be folded. For example, - there are three nodes, whose scope are A/B/X, A/B/Y, A/C/Z. If init_level=0 and fold_level_num=1, - then the fold result is A/B and A/C. - """ - self._graph = graph - self._weights_dict = weights_dict - self._init_level = init_level - self._fold_level_num = fold_level_num - self._scope_names = scope_names - - """fold the graph by compressing the nodes which have same scope into one scope node.""" - - def fold(self): - self.scope_level_name_map = self._get_scope_level_name_dict() # level: scope_name set - - if self._scope_names: - scope_names = self._scope_names - else: - if not self.scope_level_name_map: - return - scope_names = self.scope_level_name_map[0] - - for scope_name in scope_names: - level = self._init_level - sub_fold_level = self._fold_level_num - while sub_fold_level >= 0: - self._fold(self._graph.topological_sort, - scope_name, level, level + sub_fold_level) - sub_fold_level -= 1 - - # check the same pattern scope node whether have same inputs, outputs and weights. - # For those don't have, rename their scope names. - self.check_scope() - self.check_weights() - - # clear out scope node, typically input constant node. - self._graph.clear_out_scope_node() - - ''' - fold the given node list by squash the nodes whose scope is given scope into one scope node. - Args: - scope_list: scope node topology list - scope_name: the scope name need to be folded - level: the scope's level - fold_level: the scope's sub scope level need to be folded. - ''' - - def _fold(self, scope_list, scope_name, level, sub_level): - top_node = None - # get sub_scopes - if not self.scope_level_name_map.get(sub_level, 0): - raise ValueError("The fold level exceed maxium scope level.") - sub_scopes = self.scope_level_name_map[sub_level] - - for sub_scope in sub_scopes: - sub_scope_node_name_list = self._get_scope_name_dict(self._graph.topological_sort, top_level=level, - top_scope=scope_name, sub_level=sub_level, sub_scope=sub_scope) - for scope_list in sub_scope_node_name_list.values(): - top_node = self._graph.get_node(scope_list[-1]) - self._create_scope_node( - sub_scope, scope_list, top_node) - - '''get scope_level_name_dict. {level: scope name list(without suffix number)}''' - - def _get_scope_level_name_dict(self): - scope_level_name = collections.OrderedDict() - for node in self._graph.get_nodes(): - if not node.get_attr('scope'): - continue - for i, scope_name in enumerate(node.get_attr('scope').split('/')): - # decline the suffix number - import re - if re.search(r'\d', scope_name.split('_')[-1]): - scope_name = '_'.join(scope_name.split('_')[:-1]) - if scope_name == 'top': - continue - if scope_level_name.get(i, None): - if scope_name not in scope_level_name[i]: - scope_level_name[i].append(scope_name) - else: - # use list to keep sort. - scope_level_name[i] = list([scope_name]) - - return scope_level_name - - ''' - get the dict from required node_list by appointed scope_name and level - - Args: - node_list: current self.topology_sort - scope_name_dict: scope_no: node_name, a dict like {scope_name_no: a set of scope node' name} - ''' - - def _get_scope_name_dict(self, node_list, top_level=0, top_scope=None, sub_level=2, sub_scope=None): - scope_node_names = collections.OrderedDict() - - def _insert_scope_node_names_dict(scope_no, node_name): - if scope_node_names.get(scope_no, None): - scope_node_names[scope_no].append(node_name) - else: - scope_node_names[scope_no] = list([node_name]) - - def _get_scope_name_dict_by_cond(cond_top, cond_sub): - for node_name in node_list: - node = self._graph.get_node(node_name) - if not node.get_attr('scope'): - continue - node_scope = node.get_attr('scope') - if cond_top(top_scope, node_scope) and cond_sub(sub_scope, node_scope): - if 'True' in cond_top.__name__ and 'True' not in cond_sub.__name__: - scope_no = node_scope.split('/')[sub_level] - elif 'True' not in cond_top.__name__ and 'True' in cond_sub.__name__: - scope_no = node_scope.split('/')[top_level] - else: # both not equal True - scope_no = node_scope.split( - '/')[top_level] + '_' + node_scope.split('/')[sub_level] - _insert_scope_node_names_dict(scope_no, node.name) - - def cond_x_in_y(x, y): return x in y - - def cond_True(x, y): return True - - # Obtain nodes where the scope name that satisfies top_level is top_scope and sub_level is sub_scope - if top_scope and sub_scope: - _get_scope_name_dict_by_cond(cond_x_in_y, cond_x_in_y) - # Obtain nodes where the scope name that satisfies in sub_level is sub_scope - elif not top_scope and sub_scope: - _get_scope_name_dict_by_cond(cond_True, cond_x_in_y) - # Obtain nodes where the scope name that satisfies in top_level is top_scope - elif top_scope and not sub_scope: - _get_scope_name_dict_by_cond(cond_x_in_y, cond_True) - # Obtain all nodes grouped by sub_level sub_scope - elif top_scope is None and sub_scope is None: - top_scopes = self.scope_level_name_map[top_level] - for top_scope in top_scopes: # this top_scope will replace the input top_scope - _get_scope_name_dict_by_cond(cond_x_in_y, cond_True) - - return scope_node_names - - '''get the node names' topology sort of scope nodes''' - - def _get_scope_nodes_topology_list(self, scope_node_name_set): - - temp_dict = {} - for index, name in enumerate(scope_node_name_set): - # cover the node - self._graph.get_node(name).covered = True - # store idx, node into a dict and sort it later to keep its topology sort. - index = self._graph.topological_sort.index(name) - temp_dict[name] = index - - temp_dict = sorted( - temp_dict.items(), key=lambda item: item[1]) - - return [x[0] for x in temp_dict] - - ''' rebuild the conncetion of the edge around this scope node.''' - - def _rebuild_scope_edges_and_get_ret_vars(self, scope_node): - - def _get_index(node, name): - for idx, in_edge in enumerate(node.in_edges): - if in_edge.split(':')[0] == name: - return idx - - return_nodes = list() - return_variable_names = list() - - for n_name in scope_node.topology_list: - n = self._graph.get_node(n_name) - for in_edge in n.in_edges: - - if not in_edge.split(':')[0] in scope_node.topology_list: - if not in_edge in scope_node.in_edges: - scope_node.in_edges.append(in_edge) - - # in_node's out edges replace n_name with scope node name. - in_node = self._graph.get_node(in_edge) - if n_name in in_node.out_edges: - idx = in_node.out_edges.index(n_name) - in_node.out_edges.remove(n_name) - if scope_node.name not in in_node.out_edges: - in_node.out_edges.insert(idx, scope_node.name) - - for out_edge in n.out_edges: - - if not out_edge in scope_node.topology_list: - out_node = self._graph.get_node(out_edge) - parent_node_variable_name = self._graph.get_parent_variable_name(out_edge.split( - ':')[0], [_get_index(self._graph.get_node(out_edge), n_name)]) - - if parent_node_variable_name not in return_variable_names: - return_nodes.append(self._graph.get_node(n_name)) - return_variable_names.append(parent_node_variable_name) - scope_node.out_edges.append(out_edge) - - # no out nodes means the last node in scope nodes should be returned - if not return_nodes: - return_nodes.append(self._graph.get_node( - scope_node.topology_list[-1])) - return_variable_names.append(self._graph.get_node( - scope_node.topology_list[-1]).real_variable_name) - - ret_idx = 0 - for ret_node, ret_variable_name in zip(return_nodes, return_variable_names): - - subscript = '' if len(ret_variable_name.split( - '[')) == 1 else ':'+ret_variable_name.split('[')[1].split(']')[0] - - for out_name in ret_node.out_edges: - if not out_name in scope_node.topology_list: - out_node = self._graph.get_node(out_name) - - ret_name = ret_node.name + subscript - if ret_name in out_node.in_edges: - insert_pos = out_node.in_edges.index(ret_name) - insert_name = scope_node.name + \ - ':{}'.format(str(ret_idx)) if len( - return_variable_names) > 1 else scope_node.name - out_node.in_edges.remove(ret_name) - out_node.in_edges.insert(insert_pos, insert_name) - - # if out_node is scope node, replace the scope node's inner topology list node. - if out_node.type == 'Scope': - for n in out_node.topology_list: - n = self._graph.get_node(n) - if ret_name in n.in_edges: - idx = n.in_edges.index(ret_name) - n.in_edges.remove(ret_name) - n.in_edges.insert(idx, insert_name) - ret_idx += 1 - - return return_variable_names - - ''' if the input params include tensor which is multi-output type(e.g. unstack), then we need - to check whether this scope function body uses only one of the outputs or multi outputs. if it is - the former, feed the selected one(e.g. unstack[0]), otherwise feed all. ''' - - def _check_and_get_input_params(self, scope_node): - - def wipe_in_egde_idx(in_name, node): - for idx, in_edge in enumerate(node.in_edges): - if in_name in in_edge: - node.in_edges[idx] = in_edge.split(':')[0] - node.in_edges = sorted(set(node.in_edges), key=node.in_edges.index) - - input_params = list() - in_name_dict = collections.OrderedDict() - for in_name in scope_node.in_edges: - - if self._graph.get_node(in_name).variable_name not in input_params: - input_params.append(self._graph.get_node( - in_name).variable_name) - if ':' not in in_name: - continue - - if in_name_dict.get(in_name.split(':')[0], None): - in_name_dict[in_name.split(':')[0]].add( - in_name.split(':')[1]) - else: - in_name_dict[in_name.split(':')[0]] = set( - [in_name.split(':')[1]]) - - for in_name, subscript_set in in_name_dict.items(): - # the input parameter shoule be sliced when call func. - if len(subscript_set) == 1: - - # modify the in_edges in scope inner nodes. decline the :idx. - for n in scope_node.topology_list: - n = self._graph.get_node(n) - wipe_in_egde_idx(in_name, n) - else: # >2 - wipe_in_egde_idx(in_name, scope_node) - - return input_params - - ''' - create a scope node. - - Args: - scope_name: the name of this scope, will be assigned to scope pattern. - scope_node_names: node names involved by this scope. - top_node: the top node in these scope nodes. - ''' - - def _create_scope_node(self, scope_name, scope_node_names, top_node): - # 1. initilize scope node - scope_node = self._initialize_scope_node(top_node) - - # 2. get scope nodes' topology list. - scope_node.topology_list = self._get_scope_nodes_topology_list( - scope_node_names) - scope_node.pattern = scope_name - - # 3. rebuild the edges connection after folding these scope nodes into one node and - # get this scope node's return variables. - scope_node.return_variables = self._rebuild_scope_edges_and_get_ret_vars( - scope_node) - - # 4. rebuild graph. - self._graph.layer_map[scope_node.name] = scope_node - self._graph.layer_name_map[scope_node.name] = scope_node.name - self._graph.rebuild() - - '''initialize a scope node by copying source_node's attrs.''' - - def _initialize_scope_node(self, source_node): - scope_node = self._graph.model.node.add() - scope_node.name = source_node.name + '_scope' - scope_node.op = 'Scope' - scope_node = IRGraphNode(scope_node) - - kwargs = {} - kwargs['scope'] = source_node.get_attr('scope') - - if 'data_format' in source_node.layer.attr: - kwargs['data_format'] = source_node.get_attr('data_format') - - if '_output_shapes' in source_node.layer.attr: - scope_node.layer.attr["_output_shapes"].MergeFromString( - source_node.layer.attr['_output_shapes'].SerializeToString()) - if 'value' in source_node.layer.attr: - kwargs['value'] = source_node.get_attr('value') - # RNN-related attrs. - if 'input_size' in source_node.layer.attr: - kwargs['input_size'] = source_node.get_attr('input_size') - if 'num_units' in source_node.layer.attr: - kwargs['num_units'] = source_node.get_attr('num_units') - if 'fill_size' in source_node.layer.attr: - kwargs['fill_size'] = source_node.get_attr('fill_size') - if 'fill_value' in source_node.layer.attr: - kwargs['fill_value'] = source_node.get_attr('fill_value') - - assign_IRnode_values(scope_node.layer, kwargs) - return scope_node - - ''' - check whether same pattern scope node has same inputs and outputs. - For thoese do not have, rename it's pattern by adding serial number suffix. - ''' - - def check_scope(self): - name_no_dict = collections.OrderedDict() - name_inp_out_dict = collections.OrderedDict() - - for name, ir_node in self._graph.layer_map.items(): - if ir_node.type == 'Scope': - # get input params - ir_node.input_params = self._check_and_get_input_params( - ir_node) - origi_pattern = re.sub(r'(_\d+)*$', '', ir_node.pattern) - if name_inp_out_dict.get(origi_pattern, None): - inps_and_outs = name_inp_out_dict[origi_pattern] - exist_Equal = False - for inp_out in inps_and_outs: - if len(ir_node.input_params) == len(inp_out[0]) and len(ir_node.return_variables) == len(inp_out[1]): - exist_Equal = True - if inp_out[2]: - ir_node.pattern = ir_node.pattern + \ - '_' + str(inp_out[2]) - - if not exist_Equal: - name_inp_out_dict[origi_pattern].append( - [ir_node.input_params, ir_node.return_variables, name_no_dict.get(origi_pattern, 1)]) - ir_node.pattern = ir_node.pattern + '_' + \ - str(name_no_dict.get(origi_pattern, 1)) - name_no_dict[origi_pattern] = name_no_dict.get( - origi_pattern, 1) + 1 - - else: - name_inp_out_dict[origi_pattern] = [ - [ir_node.input_params, ir_node.return_variables, 0]] - name_no_dict[ir_node.pattern] = name_no_dict.get( - origi_pattern, 0) + 1 - - ''' - check whether same pattern scope node has same weights. - For thoese do not have, rename it's pattern by adding serial number suffix. - ''' - - def check_weights(self): - weight_related_ops = ['FullyConnected'] - pattern_weight_op = collections.OrderedDict() - name_no_dict = collections.OrderedDict() - pattern_weights = collections.OrderedDict() - - for ir_node_name in self._graph.topological_sort: - ir_node = self._graph.get_node(ir_node_name) - if ir_node.type == 'Scope': - for inner_name in ir_node.topology_list: - if self._graph.get_node(inner_name).type in weight_related_ops: - if pattern_weight_op.get(ir_node.pattern, None): - if self._weights_dict[inner_name]['weights'].any() and pattern_weights.get(ir_node.pattern, None): - inner_weights = self._weights_dict[inner_name]['weights'] - isExist = False - for idx, it in enumerate(pattern_weights[ir_node.pattern]): - if np.array_equal(inner_weights, it['weights']): - ir_node.pattern = ir_node.pattern + \ - '_' + str(idx) - isExist = True - break - if isExist: - continue - pattern_weight_op[ir_node.pattern].add(inner_name) - pattern_weights[ir_node.pattern].append( - self._weights_dict[inner_name]) - ir_node.pattern = ir_node.pattern + '_' + \ - str(len(pattern_weights[ir_node.pattern])-1) - - else: - pattern_weight_op[ir_node.pattern] = set( - [inner_name]) - if self._weights_dict.get(inner_name, None): - pattern_weights[ir_node.pattern] = [ - self._weights_dict[inner_name]] - ir_node.pattern = ir_node.pattern + '_' + \ - str(name_no_dict.get(ir_node.pattern, 0)) -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Utilities that match patterns in a Graph.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import abc -import itertools - -from mmdnn.conversion.common.DataStructure.graph import Graph - - -class Pattern(object): - """The parent class of all patterns (e.g. OpTypePattern and OneofPattern).""" - - @abc.abstractmethod - def match(self, op, tensor): - """Returns the result of matching op/tensor against this pattern.""" - raise NotImplementedError('Method "match" not implemented.') - - -class OpTypePattern(Pattern): - """A tree pattern that matches TF expressions with certain op types.""" - - def __init__(self, op_type, name=None, inputs=None, ordered_inputs=True): - """Initializes an OpTypePattern. - - Args: - op_type: string that specifies the allowed types of the root. It can be - (1) an op type, e.g. 'Conv2D', - (2) '*', i.e. wildcard, or - (3) multiple op types separated by '|', e.g., 'Relu|Relu6'. - We could use regex strings, which might be worthwhile when we have many - similar TF op types. - name: Optional string. The name of the pattern that can be looked up in - MatchResult. - inputs: Optional list of `Pattern`s or strings that specify the - patterns for the inputs of a matching op. If None, this pattern accepts - any inputs of a matching op. - ordered_inputs: Defaults to True. If False, will match any op that - matches a permutation of the inputs. - - Raises: - ValueError: if too many inputs are provided when order_inputs is False. - """ - self._op_type = op_type - self._name = name - if inputs is None: - inputs = [] - if len(inputs) > 8: - raise ValueError( - 'Only < 8 inputs are allowed when ordered_inputs is False.') - self._inputs = [ - input_pattern - if isinstance(input_pattern, Pattern) else OpTypePattern(input_pattern) - for input_pattern in inputs - ] - self._ordered_inputs = ordered_inputs - - @property - def name(self): - return self._name - - @property - def scope_ids(self): - return self._scope_ids - - @property - def inputs(self): - return self._inputs - - @property - def type(self): - return self._op_type - - def set_op_scope(self, op, scope): - op.scope = scope - - def match(self, op): - if self._op_type != '*': - if op.type not in self._op_type.split('|'): - return None - - match_result = MatchResult() - match_result.add(self, op) - - if not self._inputs: - # If pattern.inputs is empty, skips the rest and accepts all the inputs. - return match_result - - if len(op.in_edges) != len(self._inputs): - return None - - input_patterns_list = [self._inputs] - # If order doesn't matter for the inputs, then make sure we match at least - # one permutation of the inputs. - if not self._ordered_inputs: - input_patterns_list = list(itertools.permutations(self._inputs)) - - for input_patterns in input_patterns_list: - match_failed = False - - for input_op, input_pattern in zip(op.in_nodes, input_patterns): - input_match_result = input_pattern.match(input_op) - if input_match_result is None: - match_failed = True - break - match_result.merge_from(input_match_result) - if not match_failed: - return match_result - return None - - -class OneofPattern(Pattern): - """Matches one of the given sub-patterns.""" - - def __init__(self, sub_patterns): - self._sub_patterns = sub_patterns - - def match(self, op): - for sub_pattern in self._sub_patterns: - match_result = sub_pattern.match(op) - if match_result is not None: - return match_result - return None - - -class MatchResult(object): - r"""Encapsulates the result of a match done by GraphMatcher. - - MatchResult contains a map from Pattern to the matching op and tensor. - When the matching op has multiple output tensors, the matching tensor is the - output tensor used by the matching op of the parent pattern. E.g., when we - match graph - - - + - / \y0 y1/ \ - x split z - | - y (nodes are ops; edges are going up) - - against add_pattern defined as - - y1_pattern = OpTypePattern('*') - z_pattern = OpTypePattern('*') - add_pattern = OpTypePattern('+', inputs=[y1_pattern, z_pattern]) - - the matching op of `y1_pattern` is `split`, and the matching tensor of - `y1_pattern` - is `y1` not `y0`. - """ - - def __init__(self): - self._pattern_to_op = {} - self._name_to_pattern = {} - - def add(self, pattern, op): - self._pattern_to_op[pattern] = op - if pattern.name is not None: - if pattern.name in self._name_to_pattern: - raise ValueError( - 'Name %s is already bound to another pattern' % pattern.name) - self._name_to_pattern[pattern.name] = pattern - - def _to_pattern(self, pattern_or_name): - if isinstance(pattern_or_name, Pattern): - return pattern_or_name - - if isinstance(pattern_or_name, str): - if pattern_or_name not in self._name_to_pattern: - return None - return self._name_to_pattern[pattern_or_name] - - raise ValueError('pattern_or_name has type %s. Expect Pattern or str.' % - type(pattern_or_name)) - - def _get_op_tensor(self, pattern_or_name): - pattern = self._to_pattern(pattern_or_name) - if pattern is None: - return None - - if pattern not in self._pattern_to_op: - return None - - return self._pattern_to_op[pattern] - - def get_op(self, pattern_or_name): - op_tensor = self._get_op_tensor(pattern_or_name) - return op_tensor if op_tensor else None - - # def get_tensor(self, pattern_or_name): - # op_tensor = self._get_op_tensor(pattern_or_name) - # return op_tensor[1] if op_tensor else None - - def merge_from(self, other_match_result): - # pylint: disable=protected-access - self._pattern_to_op.update(other_match_result._pattern_to_op) - self._name_to_pattern.update(other_match_result._name_to_pattern) - # pylint: enable=protected-access - - -class GraphMatcher(object): - """Checks if a particular subgraph matches a given pattern.""" - - def __init__(self, pattern): - """Initializes a GraphMatcher. - - Args: - pattern: The `Pattern` against which `GraphMatcher` matches - subgraphs. - """ - self._pattern = pattern - - def _match_pattern(self, pattern, op): - """Returns whether an TF expression rooted at `op` matches `pattern`. - - If there is a match, adds to `self._match_result` the matching op and tensor - with key `pattern`. - - Args: - pattern: An `Pattern`. - op: A `tf.Operation` to match against the pattern. - tensor: the output `tf.Tensor` of `op` that is used by the matching op of - `pattern`'s parent. Can be None if `pattern` is already the root of the - pattern tree. - - Returns: - True if an TF expression rooted at `op` matches `pattern`. - """ - match_result = pattern.match(op) - if match_result is None: - return False - self._match_result.merge_from(match_result) - return True - - def match_op(self, op): - """Matches `op` against `self._pattern`. - - Args: - op: `tf.Operation` to match against the pattern. - - Returns: - Returns a `MatchResult` if `op` matches the pattern; otherwise, returns - None. - """ - self._match_result = MatchResult() - if not self._match_pattern(self._pattern, op): - return None - return self._match_result - - def match_ops(self, ops): - """Matches each operation in `ops` against `self._pattern`. - - Args: - ops: collection of `tf.Operation` to match against the pattern. - - Yields: - `MatchResult` for each `tf.Operation` that matches the pattern. - """ - for op in ops: - match_result = self.match_op(op) - if match_result: - yield match_result - - def match_graph(self, graph): - """Matches each operation in `graph` against `self._pattern`. - - Args: - graph: `tf.Graph` containing operations to match. - - Yields: - `MatchResult` for each `tf.Operation` in `graph` that matches the pattern. - """ - # Python 3.3.2+ implements `yield from`, but for now: - for match_result in self.match_ops(graph.get_nodes()): - yield match_result -from __future__ import division -from __future__ import print_function - -import numpy as np -import tensorflow -from tensorflow.python.framework import tensor_util -from tensorflow.core.framework import attr_value_pb2 - -import sys - -from mmdnn.conversion.tensorflow.tensorflow_graph import * -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.rewriter.graph_matcher import * - -from mmdnn.conversion.common.DataStructure import * -from tensorflow.core.framework.node_def_pb2 import NodeDef -from mmdnn.conversion.rewriter.rnn_utils import * - - -class UnitRewriterBase(object): - - def __init__(self, graph, weights_dict): - self._graph = graph - self._weights_dict = weights_dict - - def _rewrite_graph_by_pattern(self, pattern_name, graph_type): - pattern = rnn_patterns[graph_type][pattern_name] - matcher = GraphMatcher(pattern) - match_results = list(matcher.match_ops(self._graph.get_nodes())) - scope_names_dict = dict() # name: No. - - for i in range(len(match_results)): - result = match_results[i] - top_pattern_name = pattern_name + '_' + str(i) - - top_pattern = result._name_to_pattern[pattern_name] - self.create_scope(result, top_pattern, scope_names_dict) - - top_op = result._pattern_to_op[top_pattern] - top_op.scope = top_op.scope + '/top' - - # self.store_const_to_top(result) - # self.set_top_node_prop(result, pattern_name) - self.process_match_result(result, pattern_name) - - def rewrite_graph(self, pattern_names, graph_type): - from six import string_types as _string_types - if isinstance(pattern_names, _string_types): - pattern_names = [pattern_names] - elif not isinstance(pattern_names, list): - raise ValueError - for pattern_name in pattern_names: - self._rewrite_graph_by_pattern(pattern_name, graph_type) - - def run(self, pattern_names, graph_type): - self.rewrite_graph(pattern_names, graph_type) - - def store_const_to_top(self, match_result): - top_node = list(match_result._pattern_to_op.values())[0] - kwargs = dict() - for pattern, op in match_result._pattern_to_op.items(): - if pattern.name and pattern.type == 'Const': - if tensor_util.MakeNdarray(op.get_attr('value')).shape == (1, ): - kwargs[pattern.name] = np.asscalar( - tensor_util.MakeNdarray(op.get_attr('value'))) - else: - kwargs[pattern.name] = np.squeeze( - tensor_util.MakeNdarray(op.get_attr('value'))) - if hasattr(top_node, 'kwargs'): - top_node.kwargs.update(kwargs) - else: - top_node.kwargs = kwargs - - def create_scope(self, result, pattern, scope_names_dict, parent_scope_name=''): - op = result._pattern_to_op[pattern] - - if pattern.name: - # Do not include input op. - if 'input' in pattern.name.split('/')[-1]: - return - else: - no = scope_names_dict.get(pattern.name, 0) - scope_names_dict[pattern.name] = no + 1 - if parent_scope_name: - current_scope_name = '/'.join( - [parent_scope_name, pattern.name]) + '_' + str(no) - else: - current_scope_name = pattern.name + '_' + str(no) - else: - current_scope_name = parent_scope_name - op.scope = current_scope_name - for sub_pattern in pattern.inputs: - self.create_scope(result, sub_pattern, - scope_names_dict, current_scope_name) - - def set_top_node_prop(self, match_result): - raise NotImplementedError - - def process_match_result(self, match_result, pattern_name): - raise NotImplementedError -from mmdnn.conversion.rewriter.graph_matcher import * -from mmdnn.conversion.tensorflow.tensorflow_graph import * -import numpy as np - - -'''batch size pattern in tensorflow. Note: do not include _number in name''' -static_rnn_batch_size_pattern = OpTypePattern('ExpandDims', name='static_rnn_batch_size', inputs=[ - OpTypePattern('StridedSlice', inputs=[ - OpTypePattern('Shape', inputs=[ - OpTypePattern('*', name='input') - ]), - OpTypePattern('Const'), - OpTypePattern('Const'), - OpTypePattern('Const') - ]), - OpTypePattern('Const') -]) - -'''rnn h zero pattern in tensorflow.''' -static_rnn_h_zero_pattern = OpTypePattern('Fill', name='h_zero', inputs=[ - OpTypePattern('ConcatV2|Concat', inputs=[ - OpTypePattern('*', name='input'), - OpTypePattern('Const', name='fill_size'), - OpTypePattern('Const') - ]), - OpTypePattern('Const', name='fill_value') -]) - -''''split pattern in gru cell in tensorflow''' -gru_xc_pattern = OpTypePattern('Split', name='xc', inputs=[ - OpTypePattern("Const"), # axis for split - OpTypePattern("Sigmoid", inputs=[ - OpTypePattern("BiasAdd", name="bias_add", inputs=[ - OpTypePattern("MatMul", inputs=[ - OpTypePattern("ConcatV2|Concat", name="xh"), - OpTypePattern("Identity", name='cell_kernel') - ]), - OpTypePattern("Identity", name='cell_bias') - ])]), -]) - -'''split pattern in lstm cell in tensorflow''' -lstm_xc_pattern = OpTypePattern('Split', inputs=[ - OpTypePattern("Const"), # axis for split - OpTypePattern("BiasAdd", name="bias_add", inputs=[ - OpTypePattern("MatMul", inputs=[ - OpTypePattern("ConcatV2|Concat", name="xh"), - OpTypePattern("*", name="cell_kernel"), - ]), - OpTypePattern("*", name="cell_bias"), - ]), -]) - -''''gru cell pattern in tensorflow''' -grucell_pattern = \ - OpTypePattern('Add', name='gru_cell', inputs=[ - OpTypePattern('Mul', inputs=[ - gru_xc_pattern, - OpTypePattern('*', name='input') - ]), - OpTypePattern('Mul', inputs=[ - OpTypePattern('Sub', inputs=[ - OpTypePattern('Const'), - gru_xc_pattern - ]), - OpTypePattern('Tanh', inputs=[ - OpTypePattern('BiasAdd', inputs=[ - OpTypePattern('MatMul', name='FullyConnect', inputs=[ - OpTypePattern('Concat|ConcatV2', inputs=[ - OpTypePattern('*', name='input'), - OpTypePattern('Mul', inputs=[ - gru_xc_pattern, - OpTypePattern('*', name='input') - ]), - OpTypePattern('Const'), - ]), - - OpTypePattern('Identity', name='candidate_kernel') - ]), - OpTypePattern('Identity', name='candidate_bias') - ]) - ]) - ]) - ]) - - -''''lstm cell pattern in tensorflow''' -lstmcell_pattern = \ - OpTypePattern('Mul', name='lstm_cell', inputs=[ - OpTypePattern("Sigmoid", name="ot", inputs=[lstm_xc_pattern]), - OpTypePattern('Tanh', inputs=[ - OpTypePattern("Add", name="ct", inputs=[ - OpTypePattern("Mul", inputs=[ - OpTypePattern("Sigmoid", name="ft", inputs=[ - OpTypePattern("Add", inputs=[ - lstm_xc_pattern, - OpTypePattern("*", name="ft_bias"), - ]), - ]), - OpTypePattern("*", name='input'), - ]), - OpTypePattern("Mul", inputs=[ - OpTypePattern("Sigmoid", name="it", - inputs=[lstm_xc_pattern]), - OpTypePattern("Tanh", name="gt", inputs=[lstm_xc_pattern]), - ]), - ]), - ]), - ]) - - -rnn_patterns = { - 'tensorflow': { - 'gru_cell': grucell_pattern, - 'lstm_cell': lstmcell_pattern, - 'h_zero': static_rnn_h_zero_pattern, - 'static_rnn_batch_size': static_rnn_batch_size_pattern - } - # TODO: pytorch, mxnet, keras, cntk -} -from mmdnn.conversion.rewriter.rewriter import UnitRewriterBase -from mmdnn.conversion.tensorflow.rewriter.gru_rewriter import GRURewriter -from mmdnn.conversion.tensorflow.rewriter.lstm_rewriter import LSTMRewriter - - -def process_graph(graph, weights): - rewriter_list = [GRURewriter, LSTMRewriter] - - for rewriter in rewriter_list: - rewriter(graph, weights).run() -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import tensorflow as tf - - -def save_model(MainModel, network_filepath, weight_filepath, dump_filepath, dump_tag='SERVING'): - if dump_tag == 'SERVING': - tag_list = [tf.saved_model.tag_constants.SERVING] - else: - tag_list = [tf.saved_model.tag_constants.TRAINING] - input, model = MainModel.KitModel(weight_filepath) - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - - builder = tf.saved_model.builder.SavedModelBuilder(dump_filepath) - - tensor_info_input = tf.saved_model.utils.build_tensor_info(input) - tensor_info_output = tf.saved_model.utils.build_tensor_info(model) - - prediction_signature = ( - tf.saved_model.signature_def_utils.build_signature_def( - inputs={'input': tensor_info_input}, - outputs={'output': tensor_info_output}, - method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME - ) - ) - - builder.add_meta_graph_and_variables( - sess, - tag_list, - signature_def_map={ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: prediction_signature - } - ) - - save_path = builder.save() - - print('Tensorflow file is saved as [{}], generated by [{}.py] and [{}].'.format( - save_path, network_filepath, weight_filepath)) -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import os - -from mmdnn.conversion.common.IR.IR_graph import IRGraph, IRGraphNode -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.DataStructure.emitter import Emitter -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.rewriter.folder import Folder - - -class TensorflowEmitter(Emitter): - - dtype_map = { - graph_pb2.DT_FLOAT16: "tf.float16", - graph_pb2.DT_FLOAT32: "tf.float32", - graph_pb2.DT_FLOAT64: "tf.float64", - graph_pb2.DT_INT16: "tf.int16", - graph_pb2.DT_INT32: "tf.int32", - graph_pb2.DT_INT64: "tf.int64", - graph_pb2.DT_UINT8: "tf.uint8", - graph_pb2.DT_UINT16: "tf.uint16" - } - - @property - def header_code(self): - return """import tensorflow as tf - -__weights_dict = dict() - -is_train = {} - -def load_weights(weight_file): - import numpy as np - - if weight_file == None: - return - - try: - weights_dict = np.load(weight_file).item() - except: - weights_dict = np.load(weight_file, encoding='bytes').item() - - return weights_dict - - -def KitModel(weight_file = None): - global __weights_dict - __weights_dict = load_weights(weight_file) -""".format(self.trainable) - - def __init__(self, model): - super(TensorflowEmitter, self).__init__() - - from six import string_types as _string_types - if isinstance(model, _string_types): - network_path = model - else: - network_path = model[0] - self._load_weights(model[1]) - - self.IR_graph = IRGraph(network_path) - super(TensorflowEmitter, self)._build() - - folder = Folder(self.IR_graph, self.weights_dict) - folder.fold() - - def gen_code(self, phase): - self.trainable = (phase == 'train') - self.add_body(0, self.header_code) - - for layer in self.IR_graph.topological_sort: - current_node = self.IR_graph.get_node(layer) - node_type = current_node.type - - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - line = func(current_node) - if line != None: - self.add_body(1, line) - else: - print( - "TensorflowEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(current_node) - - self.add_body(1, "return {}, {}".format( - ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.input_layers if self.IR_graph.get_node( - name).type != 'Const' and not self.IR_graph.get_node(name).get_attr('feed_weights')]), - ', '.join([self.IR_graph.get_node(name).real_variable_name for name in self.IR_graph.output_layers if self.IR_graph.get_node(name).type != 'Pack' and self.IR_graph.get_node(name).type != 'Shape']))) - - self.add_body(0, "") - for i in self.used_layers: - func = getattr(self, "_layer_" + i) - func() - - self.add_body(0, "") - for code in self.layers_codes.values(): - self.add_body(0, code) - - return self.body_code - - def parent_variable_name(self, IR_node, path=[0]): - if not IR_node.in_edges and IR_node.name in self.weights_dict.keys(): - return "tf.constant(__weights_dict['{}']['weights'], name='{}')".format( - IR_node.name, - IR_node.name) - return super(TensorflowEmitter, self).parent_variable_name(IR_node, path) - - @staticmethod - def _shapeToStr(shapes): - ret = [dim.size if dim.size != -1 else 'None' for dim in shapes.dim] - return ', '.join('%s' % i for i in ret) - - def emit_Conv(self, IR_node): - self.used_layers.add(IR_node.type) - strides_str = ', '.join( - '%s' % i for i in IR_node.get_attr('strides')[1:-1]) - input_node, padding = self._defuse_padding(IR_node) - data_format = IR_node.get_attr('data_format') - code = "{:<15} = convolution({}, group={}, strides=[{}], padding='{}', name='{}')".format( - IR_node.variable_name, - input_node, - IR_node.get_attr('group', 1), - strides_str, - padding, - IR_node.name) - return code - - def _defuse_padding(self, IR_node, extra_str=""): - auto_pad = IR_node.get_attr('auto_pad') - if auto_pad: - input_node = self.parent_variable_name(IR_node) - if auto_pad == 'VALID': - padding = 'VALID' - elif auto_pad.startswith("SAME"): - padding = 'SAME' - else: - raise ValueError("Unknown padding type [{}].".format(auto_pad)) - - return input_node, padding - - else: - padding = IR_node.get_attr("pads") - padding = convert_onnx_pad_to_tf(padding) - if not is_valid_padding(padding): - input_node = IR_node.variable_name + '_pad' - self.add_body(1, "{:<15} = tf.pad({}, paddings = {}{})".format( - input_node, - self.parent_variable_name(IR_node), - padding, - extra_str - )) - else: - input_node = self.parent_variable_name(IR_node) - - return input_node, 'VALID' - - def emit_Constant(self, IR_node): - if 'dtype' in IR_node.layer.attr: - dtype_str = "{}".format( - self.dtype_map[IR_node.layer.attr['dtype'].type]) - else: - dtype_str = "tf.float32" - code = "{:<15} = tf.constant({}, dtype={}, name='{}')".format( - IR_node.variable_name, - "__weights_dict['{}']['value']".format(IR_node.name) if IR_node.get_attr( - 'value') == None else IR_node.get_attr('value'), - dtype_str, - IR_node.name) - - return code - - def emit_Pool(self, IR_node): - pooling_type = IR_node.get_attr('pooling_type') - if pooling_type == 'MAX': - op = 'max_pool' - padding_const = ", constant_values=float('-Inf')" - elif pooling_type == 'AVG': - op = 'avg_pool' - padding_const = "" - else: - raise ValueError("unknown pooling type [{}].".format(pooling_type)) - - arrlen = len(IR_node.get_attr('strides')) - dim_str = '3d' if arrlen == 5 else "" - - if IR_node.layer.attr['global_pooling'].b: - code = "{:<15} = tf.nn.{}{}({}, [1] + {}.get_shape().as_list()[1:-1] + [1], strides = [1] * {}, padding = 'VALID', name = '{}')".format( - IR_node.variable_name, - op, - dim_str, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node), - arrlen, - IR_node.name) - else: - dim = len(IR_node.get_attr("strides")) - 2 - dilations = IR_node.get_attr('dilations') - if dilations: - for e in IR_node.get_attr('dilations'): - assert e == 1 - - pool_size = IR_node.get_attr('kernel_shape')[1:-1] - strides = IR_node.get_attr('strides')[1:-1] - padding = IR_node.get_attr('pads')[1:dim] - - if pooling_type == "AVG" and pool_size.count(pool_size[0]) == len(pool_size) and strides[0] == 1 and strides.count(strides[0]) == len(strides) and padding.count(padding[0]) == len(padding) and pool_size[0] == padding[0]*2 + 1: - kernel_shape_str = ', '.join( - '%s' % i for i in IR_node.get_attr('kernel_shape')) - strides_str = ', '.join( - '%s' % i for i in IR_node.get_attr('strides')) - - code = "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding='{}', name='{}')".format( - IR_node.variable_name, - op, - dim_str, - self.parent_variable_name(IR_node), - kernel_shape_str, - strides_str, - 'SAME', - IR_node.name) - else: - kernel_shape_str = ', '.join( - '%s' % i for i in IR_node.get_attr('kernel_shape')) - strides_str = ', '.join( - '%s' % i for i in IR_node.get_attr('strides')) - input_node, padding = self._defuse_padding( - IR_node, padding_const) - code = "{:<15} = tf.nn.{}{}({}, [{}], [{}], padding='{}', name='{}')".format( - IR_node.variable_name, - op, - dim_str, - input_node, - kernel_shape_str, - strides_str, - padding, - IR_node.name) - - return code - - def emit_UNKNOWN(self, IR_node): - print(IR_node.name) - - def emit_Add(self, IR_node): - code = "{:<15} = {}".format( - IR_node.variable_name, - ' + '.join('%s' % self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)))) - - return code - - def emit_DataInput(self, IR_node): - assert not IR_node.in_edges - shape_str = self._shapeToStr(IR_node.layer.attr["shape"].shape) - - if 'dtype' in IR_node.layer.attr: - dtype_str = "{}, ".format( - self.dtype_map[IR_node.layer.attr['dtype'].type]) - else: - dtype_str = "tf.float32," - - code = "{:<15} = tf.placeholder({} shape = ({}), name = '{}')".format( - IR_node.variable_name, dtype_str, shape_str, IR_node.name - ) - return code - - def emit_Dropout(self, IR_node): - parent = self.IR_graph.get_parent(IR_node.name, [0]) - if self.trainable: - self.add_body(1, "{:<15} = Dropout(name = '{}', dropout_rate = {})({})".format( - IR_node.variable_name, - IR_node.name, - 1 - IR_node.IR_layer.attr["keep_prob"].f, - parent.real_variable_name)) - else: - IR_node.real_name = parent.real_name - - def emit_FullyConnected(self, IR_node): - if IR_node.name in self.weights_dict and 'weights' in self.weights_dict[IR_node.name]: - kernel_str = "kernel_initializer = tf.constant_initializer(__weights_dict['{}']['weights']), ".format( - IR_node.name) - else: - kernel_str = "" - - if IR_node.name in self.weights_dict and 'bias' in self.weights_dict[IR_node.name]: - bias_str = "bias_initializer = tf.constant_initializer(__weights_dict['{}']['bias']), ".format( - IR_node.name) - else: - bias_str = "" - - # check whether flatten operator should be added - parent = self.IR_graph.get_parent(IR_node.name, [0]) - parent_shape = shape_to_list(parent.get_attr('_output_shapes')[0]) - if len(parent_shape) > 2: - # flatten is needed - self.add_body(1, "{:<15} = tf.contrib.layers.flatten({})".format( - IR_node.variable_name + '_flatten', - self.parent_variable_name(IR_node))) - - code = "{:<15} = tf.layers.dense({}, {}, {}{}use_bias = {})".format( - IR_node.variable_name, - IR_node.variable_name + '_flatten', - IR_node.layer.attr['units'].i, - kernel_str, - bias_str, - IR_node.layer.attr['use_bias'].b) - return code - - else: - code = "{:<15} = tf.layers.dense({}, {}, {}{}use_bias = {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.layer.attr['units'].i, - kernel_str, - bias_str, - IR_node.layer.attr['use_bias'].b) - return code - - def emit_UpSampling2D(self, IR_node): - scales = IR_node.get_attr('scales') - scales = tuple(scales) - - code = "{:<15} = tf.keras.layers.UpSampling2D(size={})({})".format( - IR_node.variable_name, - scales, - self.parent_variable_name(IR_node)) - return code - - def emit_Flatten(self, IR_node): - #self._emit_unary_operation(IR_node, "contrib.layers.flatten") - code = "{:<15} = tf.contrib.layers.flatten({})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node)) - return code - - def emit_Mul(self, IR_node): - - code = "{:<15} = {}".format( - IR_node.variable_name, - ' * '.join('%s' % self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)))) - return code - - def emit_Const(self, IR_node): - if 'dtype' in IR_node.layer.attr: - dtype_str = "dtype={}".format( - self.dtype_map[IR_node.layer.attr['dtype'].type]) - if 'int' in dtype_str: - code = "{:<15} = tf.constant({}, {}, shape=(1,))".format( - IR_node.variable_name, - IR_node.layer.attr['value'].i, - dtype_str) - else: - code = "{:<15} = tf.constant({}, {}, shape=(1,))".format( - IR_node.variable_name, - IR_node.layer.attr['value'].f, - dtype_str) - else: - dtype_str = "dtype=tf.float32" - code = "{:<15} = tf.constant({}, {}, shape=(1,))".format( - IR_node.variable_name, - IR_node.layer.attr['value'].f, - dtype_str) - - return code - - def emit_Transpose(self, IR_node): - code = "{:<15} = tf.transpose(a = {}, perm = {})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node, [0]), - self.parent_variable_name(IR_node, [1])) - - return code - - def emit_Gather(self, IR_node): - variable_str = "tf.convert_to_tensor(__weights_dict['{}']['weights'])".format( - IR_node.name) - - code = "{:<15} = tf.gather(params = {}, indices = {}, axis = {})".format( - IR_node.variable_name, - variable_str, - self.parent_variable_name(IR_node), - IR_node.get_attr('axis') - ) - - return code - - def emit_Unstack(self, IR_node): - code = "{:<15} = tf.unstack(value={}, num={}, axis={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('num'), - IR_node.get_attr('axis') - ) - return code - - def emit_Reshape(self, IR_node): - code = "{:<15} = tf.reshape({}, [{}], '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - ', '.join('%s' % i for i in IR_node.get_attr('shape')), - IR_node.name) - - return code - - def emit_Sub(self, IR_node): - code = "{:<15} = {}".format( - IR_node.variable_name, - ' - '.join('%s' % self.parent_variable_name(IR_node, [idx]) for idx in range(len(IR_node.in_edges)))) - - return code - - def emit_Div(self, IR_node): - code = "{:<15} = tf.div({}, {}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1]), - IR_node.name - ) - return code - - def _emit_unary_operation(self, IR_node, op_name): - code = "{:<15} = tf.{}({}, name = '{}')".format( - IR_node.variable_name, - op_name, - self.parent_variable_name(IR_node), - IR_node.name) - return code - - def emit_Tanh(self, IR_node): - code = self._emit_unary_operation(IR_node, 'tanh') - return code - - def emit_Elu(self, IR_node): - return self._emit_unary_operation(IR_node, 'nn.elu') - - def emit_Relu(self, IR_node): - return self._emit_unary_operation(IR_node, 'nn.relu') - - def emit_Relu6(self, IR_node): - return self._emit_unary_operation(IR_node, 'nn.relu6') - - def emit_CRelu(self, IR_node): - return self._emit_unary_operation(IR_node, 'nn.crelu') - - def emit_PRelu(self, IR_node): - self.used_layers.add(IR_node.type) - code = "{:<15} = prelu({}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name) - return code - - def emit_LeakyRelu(self, IR_node): - self.add_body(1, "{:<15} = tf.nn.leaky_relu({}, alpha={}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('alpha'), - IR_node.name - )) - - def emit_Softmax(self, IR_node): - return self._emit_unary_operation(IR_node, 'nn.softmax') - - def emit_Sigmoid(self, IR_node): - code = self._emit_unary_operation(IR_node, 'sigmoid') - return code - - def emit_Embedding(self, IR_node): - variable_str = "tf.convert_to_tensor(__weights_dict['{}']['weights'])".format( - IR_node.name) - code = "{:<15} = tf.nn.embedding_lookup(params = {}, ids = {})".format( - IR_node.variable_name, - variable_str, - self.parent_variable_name(IR_node)) - return code - - def emit_LSTM(self, IR_node): - return self.emit_RNNs(IR_node, "LSTM") - - def emit_GRU(self, IR_node): - return self.emit_RNNs(IR_node, "GRU") - - def emit_Concat(self, IR_node): - - code = "{:<15} = tf.concat([{}], {}, name = '{}')".format( - IR_node.variable_name, - ', '.join(self.parent_variable_name( - IR_node, [idx]) for idx in range(len(IR_node.in_edges))), - IR_node.layer.attr['axis'].i, - IR_node.name) - - return code - - def emit_BatchNorm(self, IR_node): - self.used_layers.add(IR_node.type) - code = "{:<15} = batch_normalization({}, variance_epsilon={}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('epsilon'), - IR_node.name) - return code - - def emit_Scale(self, IR_node): - self.used_layers.add(IR_node.type) - code = "{:<15} = scale({}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name) - return code - - def emit_Pad(self, IR_node): - padding = IR_node.get_attr('pads') - padding = convert_onnx_pad_to_tf(padding) - - mode = IR_node.get_attr('mode', 'constant') - mode = mode.lower() - if mode == 'constant' or mode == 'reflect': - mode = mode.upper() - elif mode == 'edge': - mode = 'SYMMETRIC' - else: - raise NotImplementedError( - "Not support padding mode {}.".format(mode)) - code = "{:<15} = tf.pad({}, {}, '{}', name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - padding, - mode, - IR_node.variable_name) - return code - - def emit_Squeeze(self, IR_node): - code = "{:<15} = tf.squeeze({}, [{}], name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - ', '.join('%s' % - axis for axis in IR_node.layer.attr['axes'].list.i), - IR_node.name) - return code - - def emit_ReduceMean(self, IR_node): - code = "{:<15} = tf.reduce_mean({}, [{}], {}, name = '{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - ','.join('%s' % i for i in IR_node.get_attr('axes')), - IR_node.get_attr('keepdims'), - IR_node.name) - return code - - def emit_LRN(self, IR_node): - code = "{:<15} = tf.nn.lrn({}, depth_radius={}, bias={}, alpha={}, beta={}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('size') - 1, - IR_node.get_attr('bias', 1), - IR_node.get_attr('alpha') / (IR_node.get_attr('size') * 2 - 1), - IR_node.get_attr('beta'), - IR_node.name) - return code - - def emit_SeparableConv(self, IR_node): - self.used_layers.add(IR_node.type) - strides_str = ', '.join('%s' % i for i in IR_node.get_attr('strides')) - input_node, padding = self._defuse_padding(IR_node) - code = "{:<15} = separable_convolution({}, strides = [{}], padding = '{}', name = '{}')".format( - IR_node.variable_name, - input_node, - strides_str, - padding, - IR_node.name) - return code - - def emit_DepthwiseConv(self, IR_node): - self.used_layers.add(IR_node.type) - strides_str = ', '.join( - '%s' % i for i in IR_node.layer.attr['strides'].list.i) - input_node, padding = self._defuse_padding(IR_node) - code = "{:<15} = depthwise_convolution({}, strides = [{}], padding = '{}', name = '{}')".format( - IR_node.variable_name, - input_node, - strides_str, - padding, - IR_node.name) - return code - - def emit_Crop(self, IR_node): - border = IR_node.get_attr('border') - assert len(border) == 4 - - output_shape = IR_node.get_attr('_output_shapes')[0] - output_shape = shape_to_list(output_shape) - - code = "{:<15} = tf.image.crop_to_bounding_box({}, offset_height={}, offset_width={}, target_height={}, target_width={})".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - border[0], - border[1], - output_shape[1], - output_shape[2]) - - return code - - def emit_ConvTranspose(self, IR_node): - self.used_layers.add(IR_node.type) - output_shape = [1] + \ - shape_to_list(IR_node.get_attr('_output_shapes')[0])[1:] - input_node, padding = self._defuse_padding(IR_node) - code = "{:<15} = convolution_transpose({}, output_shape={}, strides={}, padding='{}', name='{}')".format( - IR_node.variable_name, - input_node, - output_shape, - IR_node.get_attr('strides'), - padding, - IR_node.name) - return code - - def emit_Slice(self, IR_node): - extra_str = "" - if IR_node.get_attr('begin_mask'): - extra_str += ", begin_mask={}".format( - IR_node.get_attr('begin_mask')) - if IR_node.get_attr('end_mask') != None: - extra_str += ", end_mask={}".format(IR_node.get_attr('end_mask')) - if IR_node.get_attr('shrink_axis_mask') != None: - extra_str += ", shrink_axis_mask={}".format( - IR_node.get_attr('shrink_axis_mask')) - if IR_node.get_attr('new_axis_mask') != None: - extra_str += ", new_axis_mask={}".format( - IR_node.get_attr('new_axis_mask')) - - if IR_node.get_attr('starts') != None: - starts = IR_node.get_attr('starts') - else: - starts = self.parent_variable_name(IR_node, [1]) - - if IR_node.get_attr('ends') != None: - ends = IR_node.get_attr('ends') - else: - ends = self.parent_variable_name(IR_node, [2]) - - if IR_node.get_attr('strides') != None: - strides = IR_node.get_attr('strides') - else: - strides = self.parent_variable_name(IR_node, [3]) - - code = "{:<15} = tf.strided_slice({}, {}, {}, {} {}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - starts, - ends, - strides, - extra_str, - IR_node.name) - - return code - - def emit_Shape(self, IR_node): - code = "{:<15} = tf.shape({}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.name) - return code - - def emit_Pack(self, IR_node): - code = "{:<15} = tf.stack({}, axis={}, name='{}')".format( - IR_node.variable_name, - '[' + ','.join('%s' % self.parent_variable_name(IR_node, [idx]) - for idx in range(len(IR_node.in_edges))) + ']', - IR_node.get_attr('axis'), - IR_node.name) - return code - - def emit_Split(self, IR_node): - code = "{:<15} = tf.split({}, {}, {}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('split'), - IR_node.get_attr('axis'), - IR_node.name) - return code - - def emit_Unsqueeze(self, IR_node): - code = "{:<15} = tf.expand_dims({}, axis={}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('axes')[0], - IR_node.name) - return code - - def emit_Fill(self, IR_node): - code = "{:<15} = tf.fill({}, {}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - IR_node.get_attr('value'), - IR_node.name) - return code - - def emit_Maxmum(self, IR_node): - code = "{:<15} = tf.maxmum({}, {}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1]), - IR_node.name - ) - return code - - def emit_Minimum(self, IR_node): - code = "{:<15} = tf.minimum({}, {}, name='{}')".format( - IR_node.variable_name, - self.parent_variable_name(IR_node), - self.parent_variable_name(IR_node, [1]), - IR_node.name - ) - return code - - def emit_Scope(self, IR_node): - input_vars = [self.parent_variable_name( - IR_node, [idx]) for idx in range(len(IR_node.in_edges))] - input_vars.append('__weights_dict') - code = "{:<15} = _{}({})".format( - IR_node.real_variable_name, - IR_node.pattern, - ', '.join(input_vars)) - self._gen_scope_code(IR_node) - return code - - def _gen_scope_code(self, scope_node): - - def _scope_func(scope_name, params, code, return_var): - code = """ -def _{}({}): -{} - return {} - """.format(scope_name, params, code, ', '.join(return_var)) - return code - - if not self.layers_codes.get(scope_node.pattern, None): - body_code = str() - for node_name in scope_node.topology_list: - node = self.IR_graph.get_node(node_name) - node_type = node.type - - if hasattr(self, "emit_" + node_type): - func = getattr(self, "emit_" + node_type) - line = func(node) - if line != None: - body_code += " " + line + '\n' - else: - print( - "TensorflowEmitter has not supported operator [%s]." % (node_type)) - self.emit_UNKNOWN(node) - - # param_code does not need parameter slice. - input_params = scope_node.input_params - input_params.append("__weights_dict") - param_code = ', '.join(input_params) - function_code = _scope_func( - scope_node.pattern, param_code, body_code, scope_node.return_variables) - - self.layers_codes[scope_node.pattern] = function_code - - def _layer_Conv(self): - self.add_body(0, """ -def convolution(input, name, group, **kwargs): - w = tf.Variable(__weights_dict[name]['weights'], trainable=is_train, name=name + "_weight") - if group == 1: - layer = tf.nn.convolution(input, w, name=name, **kwargs) - else: - weight_groups = tf.split(w, num_or_size_splits=group, axis=-1) - xs = tf.split(input, num_or_size_splits=group, axis=-1) - convolved = [tf.nn.convolution(x, weight, name=name, **kwargs) for - (x, weight) in zip(xs, weight_groups)] - layer = tf.concat(convolved, axis=-1) - - if 'bias' in __weights_dict[name]: - b = tf.Variable(__weights_dict[name]['bias'], trainable=is_train, name=name + "_bias") - layer = layer + b - return layer""") - - def _layer_PRelu(self): - self.add_body(0, """ -def prelu(input, name): - gamma = tf.Variable(__weights_dict[name]['gamma'], name=name + "_gamma", trainable=is_train) - return tf.maximum(0.0, input) + gamma * tf.minimum(0.0, input) - """) - - def _layer_BatchNorm(self): - self.add_body(0, """ -def batch_normalization(input, name, **kwargs): - mean = tf.Variable(__weights_dict[name]['mean'], name = name + "_mean", trainable = is_train) - variance = tf.Variable(__weights_dict[name]['var'], name = name + "_var", trainable = is_train) - offset = tf.Variable(__weights_dict[name]['bias'], name = name + "_bias", trainable = is_train) if 'bias' in __weights_dict[name] else None - scale = tf.Variable(__weights_dict[name]['scale'], name = name + "_scale", trainable = is_train) if 'scale' in __weights_dict[name] else None - return tf.nn.batch_normalization(input, mean, variance, offset, scale, name = name, **kwargs) -""") - - def _layer_Scale(self): - self.add_body(0, """ -def scale(input, name, **kwargs): - mean = tf.Variable(__weights_dict[name]['scale_mean'], name = name + "_mean", trainable = is_train) - variance = tf.Variable(__weights_dict[name]['scale_var'], name = name + "_var", trainable = is_train) - offset = tf.Variable(__weights_dict[name]['bias'], name = name + "_bias", trainable = is_train) if 'bias' in __weights_dict[name] else None - scale = tf.Variable(__weights_dict[name]['scale'], name = name + "_scale", trainable = is_train) if 'scale' in __weights_dict[name] else None - return tf.nn.batch_normalization(input, mean, variance, offset, scale, variance_epsilon = 0, name = name) -""") - - def _layer_SeparableConv(self): - self.add_body(0, """ -def separable_convolution(input, name, **kwargs): - depthwise = tf.Variable(__weights_dict[name]['depthwise_filter'], trainable = is_train, name = name + "_df") - pointwise = tf.Variable(__weights_dict[name]['pointwise_filter'], trainable = is_train, name = name + "_pf") - layer = tf.nn.separable_conv2d(input, depthwise, pointwise, **kwargs) - if 'bias' in __weights_dict[name]: - b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") - layer = layer + b - return layer""") - - def _layer_DepthwiseConv(self): - self.add_body(0, """ -def depthwise_convolution(input, name, **kwargs): - depthwise = tf.Variable(__weights_dict[name]['weights'], trainable = is_train, name = name + "_df") - layer = tf.nn.depthwise_conv2d(input, depthwise, **kwargs) - if 'bias' in __weights_dict[name]: - b = tf.Variable(__weights_dict[name]['bias'], trainable = is_train, name = name + "_bias") - layer = layer + b - return layer""") - - def _layer_ConvTranspose(self): - self.add_body(0, """ -def convolution_transpose(input, name, **kwargs): - w = tf.Variable(__weights_dict[name]['weights'], trainable=is_train, name=name + "_weight") - dim = __weights_dict[name]['weights'].ndim - 2 - if dim == 2: - layer = tf.nn.conv2d_transpose(input, w, **kwargs) - elif dim == 3: - layer = tf.nn.conv3d_transpose(input, w, **kwargs) - else: - raise ValueError("Error dim number {} in ConvTranspose".format(dim)) - - if 'bias' in __weights_dict[name]: - b = tf.Variable(__weights_dict[name]['bias'], trainable=is_train, name=name + "_bias") - layer = layer + b - return layer""") -import numpy as np -import tensorflow -from tensorflow.python.framework import tensor_util -from tensorflow.core.framework import attr_value_pb2 -from mmdnn.conversion.tensorflow.tensorflow_graph import TensorflowGraph -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.common.DataStructure.parser import Parser -from distutils.version import LooseVersion -import tempfile -import os -import shutil - - -class TensorflowParser2(Parser): - - # skip_prefix = [ - # "^", - # "train_op", - # "save", - # "gradients", - # "init", - # "global_step", - # "distort_image", - # "Adagrad", - # ] - skip_prefix = [ - ] - - skip_scope = [ - "random_uniform", - "Initializer", - "optimizer", - "weight_loss", - "parallel_read", - "case" - ] - - skip_type = set([ - "L2Loss", - "VariableV2", - "Const", - "Assign", - "RandomUniform", - "FIFOQueueV2", - "Assert", - "Unpack", - "NextIteration", - "TensorArrayV3", - "Range", - "TensorArrayScatterV3", - "TensorArrayReadV3", - "TensorArrayWriteV3", - # "Switch" - "Dequantize", - # "RequantizationRange", - # "Requantize", - "ExpandDims", - # "Identity", - # "Mean", - # "Cast" - "Pack", - "CheckNumerics", - "Where" - ]) - - q_type = set([ - "Dequantize", - "QuantizeV2", - "QuantizedConv2D", - "QuantizedReshape", - "RequantizationRange" - ]) - - dtype_map = { - 0: graph_pb2.DT_UNDEFINED, - 1: graph_pb2.DT_FLOAT32, - 2: graph_pb2.DT_FLOAT64, - 3: graph_pb2.DT_INT32, - 4: graph_pb2.DT_UINT8, - 5: graph_pb2.DT_INT16, - 6: graph_pb2.DT_INT8, - 7: graph_pb2.DT_STRING, - 9: graph_pb2.DT_INT64, - 10: graph_pb2.DT_BOOL - } - - @property - def src_graph(self): - return self.tf_graph - - def __init__(self, frozen_file, inputshape, in_nodes, dest_nodes): - if LooseVersion(tensorflow.__version__) < LooseVersion('1.8.0'): - raise ImportError( - 'Your TensorFlow version %s is outdated. ' - 'MMdnn requires tensorflow>=1.8.0' % tensorflow.__version__) - - super(TensorflowParser2, self).__init__() - - self.weight_loaded = True - # load model files into TensorFlow graph - with open(frozen_file, 'rb') as f: - serialized = f.read() - tensorflow.reset_default_graph() - original_gdef = tensorflow.GraphDef() - - original_gdef.ParseFromString(serialized) - - in_type_list = {} - for n in original_gdef.node: - if n.name in in_nodes: - in_type_list[n.name] = n.attr['dtype'].type - - from tensorflow.python.tools import strip_unused_lib - from tensorflow.python.framework import dtypes - from tensorflow.python.platform import gfile - original_gdef = strip_unused_lib.strip_unused( - input_graph_def=original_gdef, - input_node_names=in_nodes, - output_node_names=dest_nodes, - placeholder_type_enum=dtypes.float32.as_datatype_enum) - # Save it to an output file - tempdir = tempfile.mkdtemp() - frozen_model_file = os.path.join(tempdir, 'frozen.pb') - with gfile.GFile(frozen_model_file, "wb") as f: - f.write(original_gdef.SerializeToString()) - with open(frozen_model_file, 'rb') as f: - serialized = f.read() - shutil.rmtree(tempdir) - - tensorflow.reset_default_graph() - model = tensorflow.GraphDef() - model.ParseFromString(serialized) - - output_shape_map = dict() - input_shape_map = dict() - dtype = tensorflow.float32 - - with tensorflow.Graph().as_default() as g: - input_map = {} - for i in range(len(inputshape)): - if in_type_list[in_nodes[i]] == 1 or in_type_list[in_nodes[i]] == 0: - dtype = tensorflow.float32 - x = tensorflow.placeholder( - dtype, shape=[None] + inputshape[i]) - - elif in_type_list[in_nodes[i]] == 3: - dtype = tensorflow.int32 - x = tensorflow.placeholder(dtype, shape=inputshape[i]) - - elif in_type_list[in_nodes[i]] == 10: - dtype = tensorflow.bool - x = tensorflow.placeholder(dtype) - - input_map[in_nodes[i] + ':0'] = x - - tensorflow.import_graph_def(model, name='', input_map=input_map) - - with tensorflow.Session(graph=g) as sess: - - tempdir = tempfile.mkdtemp() - meta_graph_def = tensorflow.train.export_meta_graph( - filename=os.path.join(tempdir, 'my-model.meta')) - model = meta_graph_def.graph_def - shutil.rmtree((tempdir)) - - self.tf_graph = TensorflowGraph(model) - self.tf_graph.build() - - @staticmethod - def _get_scopes(layer_name): - return layer_name.split('/') - - def check_const(self, node): - while node: - if node.type == "Const": - return node - elif node.type == "NoOp": - return None - else: - node = self.get_parent(node.name, [0]) - - def _convert_reduction_operators(self, source_node, new_op=None): - IR_node = self._convert_identity_operation(source_node, 1, new_op) - - # keep dims - IR_node.attr['keepdims'].b = source_node.layer.attr['keep_dims'].b - - # axes - axes = self.get_parent( - source_node.name, [1]).layer.attr['value'].tensor - axes = tensor_util.MakeNdarray(axes) - IR_node.attr['axes'].list.i.extend(axes) - - def _convert_layers_batchnorm(self, source_node): - IR_node = self.IR_graph.node.add() - TensorflowParser2._copy_and_reop(source_node, IR_node, 'BatchNorm') - - is_transformed = False - test = self.get_parent(source_node.name, [0]) - - if test.type == 'Mul': - is_transformed = True - - # ssd model is transformed - if is_transformed: - # Ax - (Au - b) - - # A - input_mul_A = self.get_parent(source_node.name, [0, 1]) - tensor_content = input_mul_A.get_attr('value') - A_content = tensor_util.MakeNdarray(tensor_content) - self.set_weight(source_node.name, 'A', A_content) - - # b - input_sub = self.get_parent(source_node.name, [1]) - tensor_content = input_sub.get_attr('value') - sub_content = tensor_util.MakeNdarray(tensor_content) - # print(sub_content) - self.set_weight(source_node.name, 'b', sub_content) - - input_node = self.get_parent(source_node.name, [0]) - IR_node.input.append(input_node.real_name) - IR_node.attr["_output_shapes"].list.shape.pop() - IR_node.attr["_output_shapes"].MergeFromString( - input_node.layer.attr['_output_shapes'].SerializeToString()) - - else: - # epsilon - epsilon = self.get_parent(source_node.name, [1]) - IR_node.attr['epsilon'].f = epsilon.layer.attr['value'].tensor.float_val[0] - - # moving variance (var) /read - moving_variance = self.get_parent(source_node.name, [0]) - - if moving_variance.type == 'Identity': - moving_variance_read = self.src_graph.get_parent( - moving_variance.name, [0]) - tensor_content = moving_variance_read.get_attr('value') - moving_variance_content = tensor_util.MakeNdarray( - tensor_content) - self.set_weight(source_node.name, 'var', - moving_variance_content) - - else: - print(moving_variance.layer) - assert False - - # gamma (scale) - Rsqrt = self.get_son(source_node.name, [0], True) - # print(Rsqrt.out_edges) - - if len(Rsqrt.out_edges) == 2: - IR_node.attr['scale'].b = False - output_node = self.get_son(Rsqrt.name, [0, 0], True) - if output_node.type == 'Sub': - output_node = self.get_son(Rsqrt.name, [1, 0], True) - Mul = self.get_son(Rsqrt.name, [0], True) - else: - Mul = self.get_son(Rsqrt.name, [1], True) - else: - IR_node.attr['scale'].b = True - son = self.get_son(Rsqrt.name, [0, 0], True) - gamma_from = self.get_parent(son.name, [1, 1], True) - gamma = self.check_const(gamma_from) - gamma_tensor = gamma.get_attr('value') - scale = tensor_util.MakeNdarray(gamma_tensor) - self.set_weight(source_node.name, 'scale', scale) - output_node = self.get_son( - source_node.name, [0, 0, 0, 0], True) - if output_node.type == 'Sub': - output_node = self.get_son( - source_node.name, [0, 0, 0, 0, 0], True) - Mul = self.get_son(Rsqrt.name, [0, 0], True) - else: - Mul = self.get_son(Rsqrt.name, [0, 1], True) - - # beta (bias) - beta = self.get_parent( - output_node.name, [1, 0, 0], True).get_attr('value') - bias = tensor_util.MakeNdarray(beta) - IR_node.attr['bias'].b = True - self.set_weight(source_node.name, 'bias', bias) - - # moving mean (mean) - moving_mean = self.get_parent(Mul.name, [0, 0]).get_attr('value') - mean = tensor_util.MakeNdarray(moving_mean) - self.set_weight(source_node.name, 'mean', mean) - - # input node - assert output_node.type == 'Add' - input_node = self.get_parent(output_node.name, [0, 0]) - IR_node.input.append(input_node.real_name) - IR_node.attr["_output_shapes"].list.shape.pop() - IR_node.attr["_output_shapes"].MergeFromString( - input_node.layer.attr['_output_shapes'].SerializeToString()) - output_node.real_name = source_node.name - - def _convert_layers_instancenorm(self, source_node): - IR_node = self.IR_graph.node.add() - TensorflowParser2._copy_and_reop(source_node, IR_node, 'InstanceNorm') - - # epsilon - epsilon = self.get_parent(source_node.name, [1]) - epsilon_value = epsilon.get_attr('value').float_val[0] - IR_node.attr['epsilon'].f = epsilon_value - - # beta - output_node = self.get_son(source_node.name, [0, 0, 0, 0], True) - beta = self.get_parent(output_node.name, [1, 0, 0, 0, 0, 1], True) - beta_tensor = beta.get_attr('value') - beta = tensor_util.MakeNdarray(beta_tensor) - self.set_weight(source_node.name, 'bias', beta) - - # gamma (scale) - IR_node.attr['scale'].b = True - son = self.get_son(source_node.name, [0, 0, 0], True) - gamma = self.get_parent(son.name, [1, 1, 0, 0, 0, 1], True) - gamma_tensor = gamma.get_attr('value') - scale = tensor_util.MakeNdarray(gamma_tensor) - self.set_weight(source_node.name, 'scale', scale) - # output_node = self.get_son(source_node.name, [0, 0, 0, 0], True) - - assert output_node.type == 'Add' - input_node = self.get_parent(output_node.name, [0, 0]) - IR_node.input.append(input_node.real_name) - - output_node.real_name = source_node.name - - # assert False - - @classmethod - def _skip_node(cls, source_node): - if source_node.covered: - return True - - for prefix in cls.skip_prefix: - if source_node.name.startswith(prefix): - return True - - scopes = TensorflowParser2._get_scopes(source_node.name) - - for s in scopes: - if s in cls.skip_scope: - return True - - return False - - def _add_constant_node(self, source_node): - parent_ids = range(len(source_node.in_edges)) - for idx in parent_ids: - s = source_node.in_edges[idx] - parent_node = self.tf_graph.get_node(s) - if parent_node.type == 'Const': - self._rename_Const(parent_node) - - def _rename_Const(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=0, new_op='Constant') # Constant - value = source_node.get_attr('value') - if value.float_val: - value = value.float_val[0] - elif value.int_val: - value = value.int_val[0] - else: - value = tensor_util.MakeNdarray(value).tolist() - kwargs = {'value': value} - assign_IRnode_values(IR_node, kwargs) - - def gen_IR(self): - - for layer in self.src_graph.topological_sort: - current_node = self.src_graph.get_node(layer) - - if self._skip_node(current_node): - continue - - node_type = current_node.type - - if hasattr(self, "rename_" + node_type): - - func = getattr(self, "rename_" + node_type) - func(current_node) - else: - - self.rename_UNKNOWN(current_node) - - @staticmethod - def tensor_shape_to_list(shapes): - if isinstance(shapes, attr_value_pb2.AttrValue): - return [dim.size for dim in shapes.shape.dim] - - else: - ret = [] - for shape in shapes: - this_one = [dim.size for dim in shape.dim] - ret.append(this_one) - return ret - - @staticmethod - def _copy_and_reop(source_node, IR_node, new_op=None): - if new_op == None: - new_op = source_node.type - IR_node.name = source_node.name - IR_node.op = new_op - - kwargs = {} - if 'data_format' in source_node.layer.attr: - kwargs['data_format'] = source_node.get_attr('data_format') - - if 'T' in source_node.layer.attr: - if source_node.type not in TensorflowParser2.q_type: - if source_node.type == 'Enter': - IR_node.attr["dtype"].type = TensorflowParser2.dtype_map[6] - else: - assert source_node.layer.attr['T'].type in TensorflowParser2.dtype_map, 'type [{}] is unknown.'.format( - source_node.layer.attr['dtype'].type) - IR_node.attr["dtype"].type = TensorflowParser2.dtype_map[source_node.layer.attr['T'].type] - else: - # Quantized model type - IR_node.attr["dtype"].type = TensorflowParser2.dtype_map[6] - - if '_output_shapes' in source_node.layer.attr: - IR_node.attr["_output_shapes"].MergeFromString( - source_node.layer.attr['_output_shapes'].SerializeToString()) - - if 'paddings' in source_node.layer.attr: - IR_node.attr["paddings"].MergeFromString( - source_node.layer.attr['paddings'].SerializeToString()) - - assign_IRnode_values(IR_node, kwargs) - - def _convert_inedge(self, source_node, IR_node, start_idx=0, end_idx=None): - if end_idx == None: - end_idx = len(source_node.in_edges) - for idx in range(start_idx, end_idx): - IR_node.input.append(self.src_graph.get_node( - source_node.in_edges[idx]).real_name) - - @staticmethod - def _copy_shape(source_node, IR_node): - assert 'shape' in source_node.layer.attr - if source_node.layer.attr['shape'].list.shape: - IR_node.attr['shape'].shape.MergeFromString( - source_node.layer.attr['shape'].list.shape[0].SerializeToString()) - else: - IR_node.attr['shape'].shape.MergeFromString( - source_node.layer.attr['shape'].shape.SerializeToString()) - - def rename_UNKNOWN(self, source_node): - if source_node.type in self.skip_type: - return - print("Tensorflow has not supported operator [%s] with name [%s]." - % (source_node.type, source_node.name)) - assert False - - def rename_NoOp(self, source_node): - return - - def _convert_padding(self, source_node, IR_node, kernel_size): - # TODO: Fused conv and pool with padding is different from defused operators - input_node = self.get_parent(source_node.name, [0]) - input_shape = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - if source_node.get_attr('padding') == 'VALID': - dims = len(input_shape) - assign_IRnode_values( - IR_node, {'auto_pad': "VALID", 'pads': [0, 0] * dims}) - - elif source_node.get_attr('padding') == 'SAME': - padding = compute_tf_same_padding( - input_shape, - kernel_size, - source_node.get_attr('strides')) - assign_IRnode_values( - IR_node, {'auto_pad': "SAME_LOWER", 'pads': padding}) - - else: - assert False - - def _get_bias(self, source_node, IR_node): - if not source_node.out_edges: - return - - add_node = self.tf_graph.get_node(source_node.out_edges[0]) - if add_node.type != "Add" and add_node.type != "BiasAdd": - return - - variable = self.check_const(self.tf_graph.get_node( - add_node.in_edges[1])) # add_bias node - if variable.type != 'Const': - return - - bias_value = variable.get_attr('value') - bias = tensor_util.MakeNdarray(bias_value) - - # assert variable.get_attr('_output_shapes')[0].dim[0].size == IR_node.attr['kernel_shape'].list.i[-1] - - add_node.real_name = IR_node.name - add_node.covered = True - IR_node.attr['use_bias'].b = True - current_layer = self.weights[source_node.name] - current_layer['bias'] = bias - - def _convert_pooling(self, source_node, pool_type): - - IR_node = self._convert_identity_operation(source_node, new_op='Pool') - kwargs = {} - - # strides - kwargs['strides'] = source_node.get_attr('strides') - - # window_shape - kwargs['kernel_shape'] = source_node.get_attr('ksize') - - # pool type - kwargs['pooling_type'] = pool_type - - # padding - self._convert_padding(source_node, IR_node, - kwargs['kernel_shape'][1:-1]) - - assign_IRnode_values(IR_node, kwargs) - - def _convert_identity_operation(self, source_node, start_idx=0, end_idx=None, new_op=None): - IR_node = self.IR_graph.node.add() - TensorflowParser2._copy_and_reop(source_node, IR_node, new_op) - self._convert_inedge(source_node, IR_node, start_idx, end_idx) - return IR_node - - def rename_Relu6(self, source_node): - self._convert_identity_operation(source_node, new_op='Relu6') - - def rename_Merge(self, source_node): - # In facenet or other newtwork using slim.batch_norm, - # There are two BN(train, test) skip switch and merge. - source_node.real_name = self.src_graph.get_node( - source_node.in_edges[0]).real_name - - def rename_DepthwiseConv2dNative(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='DepthwiseConv') - kwargs = {} - kwargs['strides'] = source_node.get_attr('strides') - input_node = self.src_graph.get_parent(source_node.name, [1]) - kwargs['kernel_shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - self._convert_padding(source_node, IR_node, - kwargs['kernel_shape'][:-2]) - - weight_node = self.src_graph.get_parent(source_node.name, [1]) - weight = self.check_const(weight_node).get_attr('value') - weight_content = tensor_util.MakeNdarray(weight) - self.set_weight(source_node.name, 'weights', weight_content) - assign_IRnode_values(IR_node, kwargs) - - def rename_BatchNormWithGlobalNormalization(self, source_node): - - IR_node = self._convert_identity_operation( - source_node, start_idx=0, end_idx=1, new_op='BatchNorm') - # epsilon - IR_node.attr['epsilon'].f = source_node.get_attr('variance_epsilon') - - # moving variance (var) /read - moving_variance = self.get_parent(source_node.name, [2]) - tensor_variance = moving_variance.get_attr('value') - moving_variance_content = tensor_util.MakeNdarray(tensor_variance) - self.set_weight(source_node.name, 'var', moving_variance_content) - - # gamma (scale) - gamma = self.get_parent(source_node.name, [4]) - gamma_value = gamma.get_attr('value') - gamma = tensor_util.MakeNdarray(gamma_value) - self.set_weight(source_node.name, 'scale', gamma) - IR_node.attr['scale'].b = True - - # beta (bias) - beta = self.get_parent(source_node.name, [3]) - beta_value = beta.get_attr('value') - beta = tensor_util.MakeNdarray(beta_value) - self.set_weight(source_node.name, 'bias', beta) - IR_node.attr['use_bias'].b = True - - # moving mean (mean) - mean = self.get_parent(source_node.name, [1]) - mean_value = mean.get_attr('value') - mean = tensor_util.MakeNdarray(mean_value) - self.set_weight(source_node.name, 'mean', mean) - - def rename_Placeholder(self, source_node): - if source_node.layer.attr["shape"].shape.unknown_rank == True: - return - IR_node = self._convert_identity_operation( - source_node, new_op='DataInput') - TensorflowParser2._copy_shape(source_node, IR_node) - IR_node.attr['shape'].shape.dim[0].size = -1 - IR_node.attr['_output_shapes'].list.shape[0].dim[0].size = -1 - - def rename_Mean(self, source_node): - # ReduceMean - IR_node = self._convert_identity_operation( - source_node, start_idx=0, end_idx=1, new_op='ReduceMean') - # keep dims - IR_node.attr['keepdims'].b = source_node.layer.attr['keep_dims'].b - - # axes - axes = self.get_parent( - source_node.name, [1]).layer.attr['value'].tensor - axes = tensor_util.MakeNdarray(axes) - IR_node.attr['axes'].list.i.extend(axes) - - def rename_Reshape(self, source_node): - IR_node = self._convert_identity_operation(source_node, end_idx=1) - kwargs = {'shape': self.tensor_shape_to_list( - source_node.get_attr('_output_shapes'))[0]} - assign_IRnode_values(IR_node, kwargs) - - def rename_MirrorPad(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='MirrorPad') - input_node = self.src_graph.get_parent(source_node.name, [1]) - - tensor_content = tensor_util.MakeNdarray( - input_node.get_attr('value')).reshape(-1) - kwargs = {} - kwargs['mode'] = source_node.get_attr('mode') - kwargs['pads'] = tensor_content.tolist() - - assign_IRnode_values(IR_node, kwargs) - - def rename_Min(self, source_node): - IR_node = self._convert_identity_operation( - source_node, start_idx=0, end_idx=1, new_op='Min') - kwargs = {} - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs['shape_0'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - input_node = self.src_graph.get_parent(source_node.name, [1]) - kwargs['shape_1'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - assign_IRnode_values(IR_node, kwargs) - - def rename_Max(self, source_node): - IR_node = self._convert_identity_operation( - source_node, start_idx=0, end_idx=1, new_op='Max') - kwargs = {} - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs['shape_0'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - input_node = self.src_graph.get_parent(source_node.name, [1]) - kwargs['shape_1'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - assign_IRnode_values(IR_node, kwargs) - - def rename_Mul(self, source_node): - scopes = self._get_scopes(source_node.name) - - if len(scopes) >= 2: - if scopes[-2] == "batchnorm" or scopes[-2].startswith("Assign"): - return - self._add_constant_node(source_node) - self._convert_identity_operation(source_node) - - def rename_Add(self, source_node): - scopes = self._get_scopes(source_node.name) - if len(scopes) > 2: - if scopes[-2] == 'batchnorm': - if scopes[-3] == 'BatchNorm' or scopes[-3] == 'batch_normalization': - self._convert_layers_batchnorm(source_node) - elif scopes[-3] == 'InstanceNorm': - self._convert_layers_instancenorm(source_node) - else: - IR_node = self._convert_identity_operation( - source_node, new_op="Add") - else: - IR_node = self._convert_identity_operation( - source_node, new_op="Add") - - def rename_Fill(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op="Fill") - - def rename_Sub(self, source_node): - scopes = self._get_scopes(source_node.name) - if len(scopes) > 2: - if scopes[-2].startswith('Assign') or scopes[-1].startswith('Assign'): - return - IR_node = self._convert_identity_operation( - source_node, end_idx=2, new_op="Sub") - - def rename_Sum(self, source_node): - IR_node = self._convert_identity_operation( - source_node, start_idx=0, end_idx=1, new_op='Sum') - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs = {} - kwargs['cal_shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - input_node_indices = self.src_graph.get_parent(source_node.name, [1]) - indice_value = input_node_indices.get_attr('value') - if indice_value.tensor_content: - shapes = tensor_util.MakeNdarray(indice_value) - c = shapes.tolist() - kwargs['sum_indices'] = c - else: - kwargs['sum_indices'] = input_node_indices.get_attr( - 'value').int_val[0] - assign_IRnode_values(IR_node, kwargs) - - def rename_Rsqrt(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op="Rsqrt") - - kwargs = {} - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs['shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - assign_IRnode_values(IR_node, kwargs) - - def rename_Square(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Square') - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs = {} - kwargs['shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - assign_IRnode_values(IR_node, kwargs) - - def rename_Sigmoid(self, source_node): - IR_node = self._convert_identity_operation(source_node) - - kwargs = {} - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs['shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - assign_IRnode_values(IR_node, kwargs) - - def rename_Reciprocal(self, source_node): - IR_node = self._convert_identity_operation( - source_node, start_idx=0, end_idx=1, new_op='Reciprocal') - - def rename_Minimum(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Minimum') - - def rename_Maximum(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Maximum') - - def rename_RealDiv(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='RealDiv') - - def rename_Enter(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Enter') - - def rename_Switch(self, source_node): - # Skip the node as merge - source_node.real_name = self.src_graph.get_node( - source_node.in_edges[0]).real_name - - def rename_Identity(self, source_node): - source_node.real_name = self.src_graph.get_node( - source_node.in_edges[0]).real_name - - def rename_Exp(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Exp') - - def rename_ResizeBilinear(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='ResizeBilinear') - - def rename_Cast(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Cast') - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs = {} - kwargs['shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - assign_IRnode_values(IR_node, kwargs) - - def rename_Prod(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Prod') - - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs = {} - kwargs['shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - input_node_const = self.src_graph.get_parent(source_node.name, [1]) - - kwargs['const'] = input_node_const.get_attr('value').int_val[0] - assign_IRnode_values(IR_node, kwargs) - - def rename_Shape(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Shape') - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs = {} - kwargs['shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - assign_IRnode_values(IR_node, kwargs) - - def rename_Squeeze(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Squeeze') - - def rename_Gather(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Embedding') - - W = self.src_graph.get_parent(source_node.name, [0]) - W = self.src_graph.get_parent(W.name, [0]) - - self.set_weight(source_node.name, "weights", self.ckpt_data[W.name]) - - kwargs = { - 'input_dim': self.ckpt_data[W.name].shape[0], - 'output_dim': self.ckpt_data[W.name].shape[1], - 'mask_zero': False - } - kwargs['axis'] = 0 # add default - assign_IRnode_values(IR_node, kwargs) - - return IR_node - - def rename_GatherV2(self, source_node): - - IR_node = self.rename_Gather(source_node) - - kwargs = {} - kwargs['axis'] = source_node.layer.attr['axis'].i - assign_IRnode_values(IR_node, kwargs) - - def rename_StridedSlice(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='Slice') - kwargs = {} - kwargs = { - 'begin_mask': source_node.get_attr('begin_mask'), - 'end_mask': source_node.get_attr('end_mask'), - } - - starts = self.get_parent( - source_node.name, [1]).layer.attr['value'].tensor - starts = tensor_util.MakeNdarray(starts).tolist() - kwargs['starts'] = starts - - ends = self.get_parent( - source_node.name, [2]).layer.attr['value'].tensor - ends = tensor_util.MakeNdarray(ends).tolist() - kwargs['ends'] = ends - - if self.get_parent(source_node.name, [3]) != None: - strides = self.get_parent( - source_node.name, [3]).layer.attr['value'].tensor - strides = tensor_util.MakeNdarray(strides).tolist() - kwargs['strides'] = strides - - assign_IRnode_values(IR_node, kwargs) - - def rename_ResizeNearestNeighbor(self, source_node): - IR_node = self._convert_identity_operation(source_node) - kwargs = {} - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs['shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - input_node_size = self.src_graph.get_parent(source_node.name, [1]) - kwargs['size'] = self.tensor_shape_to_list( - input_node_size.get_attr('_output_shapes'))[0] - - assign_IRnode_values(IR_node, kwargs) - - def rename_Conv2D(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='Conv') - kwargs = {} - kwargs['strides'] = source_node.get_attr('strides') - kwargs['padding'] = source_node.get_attr('padding') - - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs['shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - # weights - input_node_weight = self.src_graph.get_parent(source_node.name, [1]) - tensor_content = self.check_const(input_node_weight).get_attr('value') - W = tensor_util.MakeNdarray(tensor_content) - - kwargs['kernel_shape'] = self.tensor_shape_to_list( - input_node_weight.get_attr('_output_shapes'))[0] - - self.set_weight(source_node.name, 'weights', W) - - self._convert_padding(source_node, IR_node, - kwargs['kernel_shape'][:-2]) - - assign_IRnode_values(IR_node, kwargs) - self._get_bias(source_node, IR_node) - - def rename_Relu(self, source_node): - IR_node = self._convert_identity_operation(source_node) - kwargs = {'shape': self.tensor_shape_to_list( - source_node.get_attr('_output_shapes'))[0]} - assign_IRnode_values(IR_node, kwargs) - - def rename_MaxPool(self, source_node): - self._convert_pooling(source_node, b'MAX') - - def rename_AvgPool(self, source_node): - self._convert_pooling(source_node, b'AVG') - - def rename_LRN(self, source_node): - IR_node = self._convert_identity_operation(source_node) - - # alpha - IR_node.attr["alpha"].f = float( - source_node.get_attr("alpha", "0.0001")) - # beta - IR_node.attr["beta"].f = float(source_node.get_attr("beta", "0.75")) - IR_node.attr["size"].i = source_node.get_attr("depth_radius") - IR_node.attr["bias"].f = float(source_node.get_attr("bias")) - - def rename_Concat(self, source_node): - n = len(source_node.in_edges) - IR_node = self._convert_identity_operation( - source_node, start_idx=1, end_idx=n, new_op='Concat') - axis = self.tf_graph.get_parent(source_node.name, [0]) - IR_node.attr["axis"].i = axis.get_attr('value').int_val[0] - - def rename_ConcatV2(self, source_node): - n = len(source_node.in_edges) - IR_node = self._convert_identity_operation( - source_node, start_idx=0, end_idx=n-1, new_op='Concat') - axis = self.tf_graph.get_parent(source_node.name, [n-1]) - IR_node.attr["axis"].i = axis.get_attr('value').int_val[0] - - def rename_MatMul(self, source_node): - IR_node = self._convert_identity_operation(source_node, end_idx=1) - input_weight_node = self.src_graph.get_parent(source_node.name, [1]) - weightnode = self.check_const(input_weight_node) - weight_value = weightnode.get_attr('value') - - weight = tensor_util.MakeNdarray(weight_value) - self.set_weight(source_node.name, 'weights', weight) - - units = source_node.layer.attr['_output_shapes'].list.shape[-1].dim[-1].size - IR_node.attr['units'].i = units - - if source_node.out_edges and self.tf_graph.get_node(source_node.out_edges[0]).type == 'BiasAdd': - add_node = self.tf_graph.get_node(source_node.out_edges[0]) - add_node.covered = True - add_node.real_name = source_node.real_name - - TensorflowParser2._copy_and_reop( - source_node, IR_node, 'FullyConnected') - variable = self.tf_graph.get_node( - add_node.in_edges[1]) # add_bias node - biasnode = self.check_const(variable) - bias_value = biasnode.get_attr('value') - bias = tensor_util.MakeNdarray(bias_value) - self.set_weight(source_node.name, 'bias', bias) - IR_node.attr['use_bias'].b = True - - def rename_Softmax(self, source_node): - IR_node = self._convert_identity_operation(source_node) - kwargs = {'shape': self.tensor_shape_to_list( - source_node.get_attr('_output_shapes'))[0]} - IR_node.attr["dim"].i = 1 - assign_IRnode_values(IR_node, kwargs) - - def rename_BiasAdd(self, source_node): - # Skip BiasAdd - source_node.real_name = self.src_graph.get_node( - source_node.in_edges[0]).real_name - - def rename_QuantizeV2(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='QuantizeV2') - TensorflowParser2._copy_shape(source_node, IR_node) - - def rename_QuantizedRelu(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op="QuantizedRelu") - kwargs = {'shape': self.tensor_shape_to_list( - source_node.get_attr('_output_shapes'))[0]} - assign_IRnode_values(IR_node, kwargs) - - def rename_QuantizedReshape(self, source_node): - IR_node = self._convert_identity_operation(source_node, end_idx=1) - kwargs = {'shape': self.tensor_shape_to_list( - source_node.get_attr('_output_shapes'))[0]} - assign_IRnode_values(IR_node, kwargs) - - def rename_QuantizedConv2D(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='QConv') - kwargs = {} - kwargs['strides'] = source_node.get_attr('strides') - kwargs['padding'] = source_node.get_attr('padding') - - # weights - input_node = self.src_graph.get_parent(source_node.name, [1]) - tensor_content = input_node.get_attr('value') - W = tensor_util.MakeNdarray(tensor_content) - W = W.astype(np.uint8) - - kwargs['kernel_shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - input_node_minw = self.src_graph.get_parent(source_node.name, [4]) - min_W = input_node_minw.get_attr('value').float_val[0] - input_node_maxw = self.src_graph.get_parent(source_node.name, [5]) - max_W = input_node_maxw.get_attr('value').float_val[0] - - if source_node.get_attr('Tfilter') == tensorflow.quint8: - W = ((max_W - min_W)/255.0) * W + min_W - else: - assert False, ('Only uint8 weights handled currently by the converter') - - self.set_weight(source_node.name, 'kernel_weights', W) - assign_IRnode_values(IR_node, kwargs) - - def rename_Requantize(self, source_node): - input_node = self.get_parent(source_node.name, [0]) - son_node = self.get_son(source_node.name, [0]) - - son_node.real_name = source_node.name - - def rename_RequantizationRange(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='RequantizationRange') - TensorflowParser2._copy_shape(source_node, IR_node) - - def rename_ZerosLike(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='ZerosLike') - - def rename_Rank(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Rank') - - def rename_Transpose(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='Transpose') - - input_node_perm = self.get_parent(source_node.name, [1]) - # input_node_perm = self.check_const(self.get_parent(source_node.name, [1], True)) - tensor_content = input_node_perm.get_attr('value') - perm = tensor_util.MakeNdarray(tensor_content).tolist() - assign_IRnode_values(IR_node, {'perm': perm}) - - def rename_GreaterEqual(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='GreaterEqual') - - def rename_Greater(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='Greater') - - def rename_Equal(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Equal') - - def rename_All(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='All') - - def rename_LogicalAnd(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Mul') - - def rename_Pad(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='Pad') - kwargs = {} - kwargs['mode'] = 'constant' - - # paddings - padding = self.get_parent( - source_node.name, [1]).layer.attr['value'].tensor - shapes = tensor_util.MakeNdarray(padding) - kwargs['pads'] = convert_tf_pad_to_onnx(shapes) - - assign_IRnode_values(IR_node, kwargs) - - def rename_FusedBatchNorm(self, source_node): - scalenode = self.check_const( - self.get_parent(source_node.name, [1], True)) - if ':' in source_node.in_edges[1]: # ? - scalenode = None - - if scalenode: - scale_value = scalenode.get_attr('value') - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='BatchNorm') - # for attr.shape >= 2 - for i in range(len(IR_node.attr["_output_shapes"].list.shape)-1): - IR_node.attr["_output_shapes"].list.shape.pop() - - else: - # For models built by slim.batch_norm, remove duplicate BN (eg.facenet) - return - - scale = tensor_util.MakeNdarray(scale_value) - self.set_weight(source_node.name, 'scale', scale) - IR_node.attr['scale'].b = True - - IR_node.attr['epsilon'].f = source_node.get_attr('epsilon', 0) - biasnode = self.check_const( - self.get_parent(source_node.name, [2], True)) - if biasnode: - bias_value = biasnode.get_attr('value') - else: - innode = self.get_parent(source_node.name, [2], True) - name = innode.name.split(':')[0] - bias_value = self.check_const( - self.src_graph.layer_map[name]).get_attr('value') - bias = tensor_util.MakeNdarray(bias_value) - self.set_weight(source_node.name, 'bias', bias) - IR_node.attr['bias'].b = True - - meannode = self.check_const( - self.get_parent(source_node.name, [3], True)) - mean_value = meannode.get_attr('value') - mean = tensor_util.MakeNdarray(mean_value) - self.set_weight(source_node.name, 'mean', mean) - - variancenode = self.check_const( - self.get_parent(source_node.name, [4], True)) - variance_value = variancenode.get_attr('value') - variance = tensor_util.MakeNdarray(variance_value) - self.set_weight(source_node.name, 'var', variance) - - def rename_SpaceToBatchND(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='SpaceToBatchND') - - def rename_BatchToSpaceND(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='BatchToSpaceND') - - def rename_ArgMax(self, source_node): - IR_node = self._convert_identity_operation( - source_node, end_idx=1, new_op='ArgMax') - - def rename_Slice(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Slice') - - def rename_Split(self, source_node): - if source_node.get_attr('num_split') == 1: - source_node.real_name = self.get_parent( - source_node.name, [1]).real_name - - else: - IR_node = self._convert_identity_operation( - source_node, start_idx=1, new_op='Split') - kwargs = { - 'axis': self.get_parent(source_node.name, [0]).layer.attr['value'].tensor.int_val[0], - 'split': source_node.get_attr('num_split') - } - assign_IRnode_values(IR_node, kwargs) - - def rename_Tile(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Tile') - - def rename_Sqrt(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Sqrt') - - def rename_Tanh(self, source_node): - IR_node = self._convert_identity_operation(source_node) - - kwargs = {} - input_node = self.src_graph.get_parent(source_node.name, [0]) - kwargs['shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - assign_IRnode_values(IR_node, kwargs) - - def rename_Log(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Log') -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph -from tensorflow.core.framework.node_def_pb2 import NodeDef -from tensorflow.core.framework import attr_value_pb2 - - -class TensorflowGraphNode(GraphNode): - - def __init__(self, layer): - super(TensorflowGraphNode, self).__init__(layer) - self.in_nodes = list() - self.out_nodes = list() - self._scope = str() - - @property - def scope(self): - return self._scope - - @scope.setter - def scope(self, scope): - self._scope = scope - - @property - def name(self): - return self.layer.name - - @property - def type(self): - return self.layer.op - - @property - def tf_layer(self): - return self.layer - - def get_attr(self, name, default_value=None): - if name in self.layer.attr: - attr = self.layer.attr[name] - field = attr.WhichOneof('value') - val = getattr(attr, field) if field else default_value - if isinstance(val, attr_value_pb2.AttrValue.ListValue): - return list(val.ListFields()[0][1]) - else: - return val.decode('utf-8') if isinstance(val, bytes) else val - else: - return default_value - - -class TensorflowGraph(Graph): - - multi_tensor_type = [ - "Slice", - "Split", - "Unpack" - ] - - def __init__(self, model): - # sanity check. - pass - - super(TensorflowGraph, self).__init__(model) - self.model = model - - def build(self): - for i, layer in enumerate(self.model.node): - self.layer_map[layer.name] = TensorflowGraphNode(layer) - self.layer_name_map[layer.name] = layer.name - for pred in layer.input: - if pred not in self.layer_map: - if not pred.split(':')[0] in self.layer_map: # test - new_node = NodeDef() - new_node.name = pred - new_node.op = "NoOp" - self.layer_map[pred] = TensorflowGraphNode(new_node) - self.layer_name_map[pred] = pred - - self.tf_make_connection(pred, layer.name) - - super(TensorflowGraph, self).build() - - def tf_make_connection(self, src, dst): - - if ':' not in src and self.get_node(src).type in self.multi_tensor_type: - src += ':0' - - self._make_connection(src, dst) - src_node = self.get_node(src.split(':')[0]) - dst_node = self.get_node(dst.split(':')[0]) - - if not src_node in self.layer_map[dst.split(':')[0]].in_nodes: - self.layer_map[dst.split(':')[0]].in_nodes.append(src_node) - if not dst_node in self.layer_map[src.split(':')[0]].out_nodes: - self.layer_map[src.split(':')[0]].out_nodes.append(dst_node) -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import numpy as np -import tensorflow -from tensorflow.python.framework import tensor_util -from tensorflow.core.framework import attr_value_pb2 -from mmdnn.conversion.tensorflow.tensorflow_graph import TensorflowGraph -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.common.DataStructure.parser import Parser -from tensorflow.tools.graph_transforms import TransformGraph -from mmdnn.conversion.rewriter.utils import * -import tempfile -import os -import shutil - - -class TensorflowParser(Parser): - - skip_prefix = [ - "^", - "train_op", - "save", - "gradients", - "global_step", - "distort_image", - "Adagrad", - ] - - skip_scope = [ - "random_uniform", - "Initializer", - "optimizer", - "weight_loss", - "parallel_read", - "case" - ] - - skip_type = set([ - "L2Loss", - "VariableV2", - "Const", - "Assign", - "RandomUniform", - "FIFOQueueV2" - ]) - - dtype_map = { - 0: graph_pb2.DT_UNDEFINED, - 1: graph_pb2.DT_FLOAT32, - 2: graph_pb2.DT_FLOAT64, - 3: graph_pb2.DT_INT32, - 4: graph_pb2.DT_UINT8, - 5: graph_pb2.DT_INT16, - 6: graph_pb2.DT_INT8, - 7: graph_pb2.DT_STRING, - 9: graph_pb2.DT_INT64, - 10: graph_pb2.DT_BOOL, - 19: graph_pb2.DT_FLOAT16 - } - - @property - def src_graph(self): - return self.tf_graph - - @staticmethod - def _shapeToStr(shapes): - return [dim.size if dim.size > 0 else 1 for dim in shapes.dim] - - @staticmethod - def _load_meta(model_network_path): - """Load a tensorflow meta file from disk - - Parameters - ---------- - model_network_path: str - Path where the model network path is (protobuf meta file) - - Returns - ------- - model: A tensorflow protobuf file - """ - from tensorflow.core.protobuf import meta_graph_pb2 - from mmdnn.conversion.common.IR.IR_graph import load_protobuf_from_file - - meta_graph = meta_graph_pb2.MetaGraphDef() - load_protobuf_from_file(meta_graph, model_network_path) - graph = meta_graph.graph_def - - print( - "Tensorflow model file [%s] loaded successfully." % model_network_path) - return graph - - @staticmethod - def _load_weights(model_weight_path): - """Load a tensorflow checkpoint file from disk - - Parameters - ---------- - model_weight_path: str - Path where the weight path is (checkpoint file) - - Returns - ------- - model: tensor name --> ndarry - """ - reader = tensorflow.train.NewCheckpointReader(model_weight_path) - var_to_shape_map = reader.get_variable_to_shape_map() - data = dict() - for name in var_to_shape_map: - tensor = reader.get_tensor(name) - data[name] = tensor - - print("Tensorflow checkpoint file [%s] loaded successfully. [%d] variables loaded." % ( - model_weight_path, len(data))) - return data - - @staticmethod - def _get_scopes(layer_name): - return layer_name.split('/') - - def check_const(self, node): - while node: - if node.type == "Const": - return node - elif node.type == "NoOp": - return None - else: - node = self.get_parent(node.name, [0]) - - def _add_constant_node(self, source_node): - parent_ids = range(len(source_node.in_edges)) - for idx in parent_ids: - parent_node = self.tf_graph.get_node(source_node.in_edges[idx]) - if parent_node.type == 'Const': - self._rename_Const(parent_node) - - def _rename_Const(self, source_node): - IR_node = self._convert_identity_operation( - source_node, in_edge_count=0, new_op='Constant') # Constant - value = source_node.get_attr('value') - if value.float_val: - shape = tuple(self.tensor_shape_to_list(value.tensor_shape)) - value = np.full(shape, value.float_val[0]) - elif value.int_val: - shape = tuple(self.tensor_shape_to_list(value.tensor_shape)) - value = np.full(shape, value.int_val[0]) - else: - value = np.array(tensor_util.MakeNdarray(value).tolist()) - - if value.ndim > 1: - self.set_weight(source_node.name, 'value', value) - else: - kwargs = {'value': value} - assign_IRnode_values(IR_node, kwargs) - - def _convert_reduction_operators(self, source_node, new_op=None): - IR_node = self._convert_identity_operation(source_node, 0, 1, new_op) - - # keep dims - IR_node.attr['keepdims'].b = source_node.layer.attr['keep_dims'].b - - # axes - axes = self.get_parent( - source_node.name, [1]).layer.attr['value'].tensor - axes = tensor_util.MakeNdarray(axes) - IR_node.attr['axes'].list.i.extend(axes) - - def _convert_layers_batchnorm(self, source_node): - # name, op - IR_node = self.IR_graph.node.add() - TensorflowParser._copy_and_reop(source_node, IR_node, 'BatchNorm') - - # epsilon - epsilon = self.get_parent(source_node.name, [1]) - IR_node.attr['epsilon'].f = epsilon.layer.attr['value'].tensor.float_val[0] - - # moving variance (var) - moving_variance = self.get_parent(source_node.name, [0, 0]) - # print(moving_variance.name) - if self.weight_loaded and moving_variance.name in self.ckpt_data.keys(): - self.set_weight(source_node.name, 'var', - self.ckpt_data[moving_variance.name]) - - # gamma (scale) - gamma = self.get_son(source_node.name, [0, 0], True) - gamma = self.get_parent(gamma.name, [1, 0], True) - if gamma is None or not gamma.type.startswith('Variable'): - IR_node.attr['scale'].b = False - output_node = self.get_son(source_node.name, [0, 0, 0], True) - else: - IR_node.attr['scale'].b = True - if self.weight_loaded: - self.set_weight(source_node.name, 'scale', - self.ckpt_data[gamma.name]) - output_node = self.get_son(source_node.name, [0, 0, 0, 0], True) - if output_node.type == 'Sub': - output_node = self.get_son( - source_node.name, [0, 0, 1, 0], True) - - # mean - mean = self.get_parent(output_node.name, [1, 1, 0, 0], True) - if self.weight_loaded and mean.name in self.ckpt_data.keys(): - self.set_weight(source_node.name, 'mean', - self.ckpt_data[mean.name]) - - # bias - bias = self.get_parent(output_node.name, [1, 0, 0], True) - if bias is None or not bias.type.startswith('Variable'): - IR_node.attr['bias'].b = False - else: - IR_node.attr['bias'].b = True - if self.weight_loaded: - self.set_weight(source_node.name, 'bias', - self.ckpt_data[bias.name]) - - # input node - assert output_node.type == 'Add' - input_node = self.get_parent(output_node.name, [0, 0]) - IR_node.input.append(input_node.real_name) - - # output node - output_node.real_name = source_node.name - - def __init__(self, meta_file, checkpoint_file, dest_nodes, inputShape=None, in_nodes=None): - super(TensorflowParser, self).__init__() - - # load model files into TensorFlow graph - if meta_file: - model = TensorflowParser._load_meta(meta_file) - - if checkpoint_file: - self.ckpt_data = TensorflowParser._load_weights(checkpoint_file) - self.weight_loaded = True - - # extract subgraph using in_nodes and dest_nodes - if in_nodes != None and inputShape != None: - from tensorflow.python.tools import strip_unused_lib - from tensorflow.python.framework import dtypes - from tensorflow.python.platform import gfile - model = strip_unused_lib.strip_unused( - input_graph_def=model, - input_node_names=in_nodes, - output_node_names=dest_nodes, - placeholder_type_enum=dtypes.float32.as_datatype_enum) - - input_list = [None] - for i in range(len(inputShape)): - input_list.append(tensorflow.Dimension(inputShape[i])) - tensor_input = tensorflow.TensorShape(input_list) - # Build network graph - self.tf_graph = TensorflowGraph(model) - for node in self.tf_graph.model.node: - if node.name in in_nodes: - node.attr['shape'].shape.CopyFrom(tensor_input.as_proto()) - # unknown_rank pop - node.attr['_output_shapes'].list.shape.pop() - node.attr['_output_shapes'].list.shape.extend( - [tensor_input.as_proto()]) - - # extract subgraph using dest_nodes - elif dest_nodes != None: - from tensorflow.python.framework.graph_util import extract_sub_graph - model = extract_sub_graph(model, dest_nodes) - self.tf_graph = TensorflowGraph(model) - - else: - self.tf_graph = TensorflowGraph(model) - - # Graph Transform - transforms = ["fold_constants(ignore_errors=true)"] - - # Get input node name - if not in_nodes: - in_nodes = [] - for node in model.node: - if node.op == 'Placeholder': - in_nodes.append(node.name) - - transformed_graph_def = TransformGraph(model, in_nodes, - dest_nodes, transforms) - in_type_list = {} - in_shape_list = {} - - for n in transformed_graph_def.node: - if n.name in in_nodes: - in_type_list[n.name] = n.attr['dtype'].type - in_node_shape = n.attr['shape'].shape - in_node_shape_str = self._shapeToStr(in_node_shape) - in_shape_list[n.name] = in_node_shape_str - - dtype = tensorflow.float32 - with tensorflow.Graph().as_default() as g: - input_map = {} - for in_node in in_nodes: - if in_type_list[in_node] == 1 or in_type_list[in_node] == 0: - dtype = tensorflow.float32 - - elif in_type_list[in_node] == 3: - dtype = tensorflow.int32 - - elif in_type_list[in_node] == 10: - dtype = tensorflow.bool - - x = tensorflow.placeholder(dtype, shape=in_shape_list[in_node]) - input_map[in_node] = x - - tensorflow.import_graph_def( - transformed_graph_def, name='', input_map=input_map) - - with tensorflow.Session(graph=g) as sess: - tempdir = tempfile.mkdtemp() - meta_graph_def = tensorflow.train.export_meta_graph( - filename=os.path.join(tempdir, 'my-model.meta')) - model = meta_graph_def.graph_def - shutil.rmtree(tempdir) - - self.tf_graph = TensorflowGraph(model) - self.tf_graph.build() - - process_graph(self.tf_graph, self.ckpt_data) - - @classmethod - def _skip_node(cls, source_node): - if source_node.covered: - return True - - for prefix in cls.skip_prefix: - if source_node.name.startswith(prefix): - return True - - scopes = TensorflowParser._get_scopes(source_node.name) - - for s in scopes: - if s in cls.skip_scope: - return True - - return False - - @staticmethod - def tensor_shape_to_list(shapes): - if isinstance(shapes, attr_value_pb2.AttrValue): - return [dim.size for dim in shapes.shape.dim] - elif isinstance(shapes, attr_value_pb2.tensorflow_dot_core_dot_framework_dot_tensor__shape__pb2.TensorShapeProto): - return [dim.size for dim in shapes.dim] - else: - ret = [] - for shape in shapes: - this_one = [dim.size for dim in shape.dim] - ret.append(this_one) - return ret - - ''' - check current source_node wether has input weights. If it has, set the weights into weight dict and remove the input edge. - return edges' index which do not include edge connecting weights - ''' - - def _check_weights(self, source_node, start_edge_id=0, in_edge_count=None): - if in_edge_count == None: - in_edge_count = len(source_node.in_edges) - start_edge_id - valid_pre_ids = [] - - for pre_idx in range(start_edge_id, start_edge_id + in_edge_count): - pre_node = self.get_parent(source_node.name, [pre_idx]) - if pre_node.type == 'Identity' and pre_node.name.split('/')[-1] == 'read': - weight_node = self.get_parent(pre_node.name, [0]) - assert 'Variable' in weight_node.type - self.set_weight(source_node.name, 'weights', - self.ckpt_data[weight_node.name]) - source_node.feed_weights = True - else: - valid_pre_ids.append(pre_idx) - - return valid_pre_ids - - def _convert_padding(self, source_node, IR_node, kernel_size): - # TODO: Fused conv and pool with padding is different from defused operators - input_node = self.get_parent(source_node.name, [0]) - input_shape = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - if source_node.get_attr('padding') == 'VALID': - dims = len(input_shape) - assign_IRnode_values( - IR_node, {'auto_pad': "VALID", 'pads': [0, 0] * dims}) - - elif source_node.get_attr('padding') == 'SAME': - padding = compute_tf_same_padding( - input_shape, - kernel_size, - source_node.get_attr('strides')) - assign_IRnode_values( - IR_node, {'auto_pad': "SAME_UPPER", 'pads': padding}) - - else: - assert False - - def _convert_pooling(self, source_node, pool_type): - IR_node = self._convert_identity_operation(source_node, new_op='Pool') - kwargs = {} - - # strides - kwargs['strides'] = source_node.get_attr('strides') - - # window_shape - kwargs['kernel_shape'] = source_node.get_attr('ksize') - - # pool type - kwargs['pooling_type'] = pool_type - - # padding - self._convert_padding(source_node, IR_node, - kwargs['kernel_shape'][1:-1]) - - assign_IRnode_values(IR_node, kwargs) - - def gen_IR(self): - for layer in self.src_graph.topological_sort: - current_node = self.src_graph.get_node(layer) - - if self._skip_node(current_node): - continue - - node_type = current_node.type - - if hasattr(self, "rename_" + node_type): - func = getattr(self, "rename_" + node_type) - func(current_node) - else: - self.rename_UNKNOWN(current_node) - - @staticmethod - def _copy_and_reop(source_node, IR_node, new_op=None): - if new_op == None: - new_op = source_node.type - IR_node.name = source_node.name - IR_node.op = new_op - - kwargs = {} - if 'data_format' in source_node.layer.attr: - kwargs["data_format"] = source_node.get_attr('data_format') - - if 'dtype' in source_node.layer.attr: - assert source_node.layer.attr['dtype'].type in TensorflowParser.dtype_map, 'type [{}] is unknown.'.format( - source_node.layer.attr['dtype'].type) - IR_node.attr["dtype"].type = TensorflowParser.dtype_map[source_node.layer.attr['dtype'].type] - - if '_output_shapes' in source_node.layer.attr: - IR_node.attr["_output_shapes"].MergeFromString( - source_node.layer.attr['_output_shapes'].SerializeToString()) - - if hasattr(source_node, 'feed_weights'): - kwargs["feed_weights"] = True - - if hasattr(source_node, 'kwargs'): - kwargs.update(source_node.kwargs) - - kwargs['scope'] = source_node.scope - - assign_IRnode_values(IR_node, kwargs) - - def _convert_inedge(self, source_node, IR_node, start_idx=0, end_idx=None, in_ids=None): - if end_idx == None: - end_idx = len(source_node.in_edges) - start_idx - if not in_ids: - in_ids = range(start_idx, end_idx + start_idx) - - for idx in in_ids: - if ':' in source_node.in_edges[idx]: - input_tensor = self.src_graph.get_node( - source_node.in_edges[idx]).real_name + ':' + source_node.in_edges[idx].split(':')[1] - else: - input_tensor = self.src_graph.get_node( - source_node.in_edges[idx]).real_name - - IR_node.input.append(input_tensor) - - def _get_bias(self, source_node, IR_node): - if not source_node.out_edges: - return - - add_node = self.tf_graph.get_node(source_node.out_edges[0]) - if add_node.type != "Add" and add_node.type != "BiasAdd": - return - - variable = self.tf_graph.get_node(add_node.in_edges[1]) - if variable.type != "Identity": - return - variable = self.tf_graph.get_node(variable.in_edges[0]) - - assert variable.layer.attr['shape'].shape.dim[0].size == IR_node.attr['kernel_shape'].list.i[-1] - - if self.weight_loaded: - assert variable.name in self.ckpt_data - current_layer = self.weights[source_node.name] - current_layer['bias'] = self.ckpt_data[variable.name] - - add_node.real_name = IR_node.name - add_node.covered = True - IR_node.attr['use_bias'].b = True - - @staticmethod - def _copy_shape(source_node, IR_node): - assert 'shape' in source_node.layer.attr - if source_node.layer.attr['shape'].list.shape: - IR_node.attr['shape'].shape.MergeFromString( - source_node.layer.attr['shape'].list.shape[0].SerializeToString()) - else: - IR_node.attr['shape'].shape.MergeFromString( - source_node.layer.attr['shape'].shape.SerializeToString()) - - def rename_UNKNOWN(self, source_node): - if source_node.type in self.skip_type: - return - print("TensorflowEmitter has not supported operator [%s] with name [%s]." - % (source_node.type, source_node.name)) - return - - def rename_Placeholder(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='DataInput') - # shape - TensorflowParser._copy_shape(source_node, IR_node) - if len(IR_node.attr['shape'].shape.dim) > 0 and len(IR_node.attr['_output_shapes'].list.shape) > 0 and len(IR_node.attr['_output_shapes'].list.shape[0].dim) > 0: - IR_node.attr['shape'].shape.dim[0].size = -1 - IR_node.attr['_output_shapes'].list.shape[0].dim[0].size = -1 - - def rename_Conv2D(self, source_node): - """ - weights: name_weights, name_bias - """ - IR_node = self._convert_identity_operation(source_node, 0, 1, 'Conv') - - kwargs = {} - - # strides - kwargs['strides'] = source_node.get_attr('strides') - - # input[1] : W - # filter - W = self.tf_graph.get_node(source_node.layer.input[1]) - if W.type == 'Const': - kwargs['kernel_shape'] = tensor_shape = self.tensor_shape_to_list( - W.layer.attr['value'].tensor.tensor_shape) - else: - W = self.tf_graph.get_node(W.layer.input[0]).layer - kwargs['kernel_shape'] = self.tensor_shape_to_list(W.attr['shape']) - - # padding - self._convert_padding(source_node, IR_node, - kwargs['kernel_shape'][:-2]) - - if self.weight_loaded: - self.set_weight(source_node.name, 'weights', - self.ckpt_data[W.name]) - - assign_IRnode_values(IR_node, kwargs) - # output[0] : B - self._get_bias(source_node, IR_node) - - def _convert_identity_operation(self, source_node, start_edge_id=0, in_edge_count=None, new_op=None): - IR_node = self.IR_graph.node.add() - in_ids = self._check_weights(source_node, start_edge_id, in_edge_count) - TensorflowParser._copy_and_reop(source_node, IR_node, new_op) - self._convert_inedge(source_node, IR_node, - start_edge_id, in_edge_count, in_ids) - return IR_node - - def rename_Relu(self, source_node): - self._convert_identity_operation(source_node) - - def rename_Softmax(self, source_node): - self._convert_identity_operation(source_node) - - def rename_Relu6(self, source_node): - self._convert_identity_operation(source_node) - - def rename_Add(self, source_node): - if not source_node.covered: - scopes = self._get_scopes(source_node.name) - if len(scopes) < 3: - self._convert_identity_operation(source_node) - - elif scopes[-2] == 'dropout': - # converted [dropout] - pass - - elif scopes[-2] == 'batchnorm': - # convert [tf.contrib.layers.batch_norm] - self._convert_layers_batchnorm(source_node) - - else: - # normal Add - self._add_constant_node(source_node) - self._convert_identity_operation(source_node) - - def rename_Sub(self, source_node): - self._add_constant_node(source_node) - self._convert_identity_operation(source_node) - - def rename_Reshape(self, source_node): - IR_node = self._convert_identity_operation( - source_node, in_edge_count=1) - kwargs = {'shape': self.tensor_shape_to_list( - source_node.get_attr('_output_shapes'))[0]} - assign_IRnode_values(IR_node, kwargs) - - def rename_Abs(self, source_node): - IR_node = self._convert_identity_operation( - source_node, in_edge_count=1, new_op='Abs') - - def rename_Square(self, source_node): - IR_node = self._convert_identity_operation( - source_node, in_edge_count=1, new_op='Square') - - def rename_MatMul(self, source_node): - - W = self.tf_graph.get_node(self.tf_graph.get_node( - source_node.in_edges[1]).in_edges[0]) - - if 'Variable' in W.type: - - """ - weights: name_weights, name_bias - """ - IR_node = self._convert_identity_operation( - source_node, in_edge_count=1) - - # units - units = source_node.layer.attr['_output_shapes'].list.shape[-1].dim[-1].size - IR_node.attr['units'].i = units - - # Weights - W = self.tf_graph.get_node(self.tf_graph.get_node( - source_node.in_edges[1]).in_edges[0]) - if self.weight_loaded: - self.set_weight(source_node.name, 'weights', - self.ckpt_data[W.name]) - - if source_node.out_edges and (self.tf_graph.get_node(source_node.out_edges[0]).type == 'Add' or self.tf_graph.get_node(source_node.out_edges[0]).type == 'BiasAdd'): - add_node = self.tf_graph.get_node(source_node.out_edges[0]) - add_node.covered = True - add_node.real_name = source_node.real_name - # FullyConnected Layer - # name, op - TensorflowParser._copy_and_reop( - source_node, IR_node, 'FullyConnected') - - # get Bias - B = self.tf_graph.get_node(self.tf_graph.get_node( - source_node.out_edges[0]).in_edges[1]).in_edges[0] - if self.weight_loaded: - self.set_weight(source_node.name, 'bias', - self.ckpt_data[B]) - IR_node.attr['use_bias'].b = True - - else: - # Matmul Layer - TensorflowParser._copy_and_reop( - source_node, IR_node, 'FullyConnected') - assign_IRnode_values(IR_node, {'use_bias': False}) - else: - self._convert_identity_operation(source_node, new_op='MatMul') - - def rename_RealDiv(self, source_node): - scopes = self._get_scopes(source_node.name) - - # Deal Dropout - if len(scopes) > 1 and scopes[-2][:7] == 'dropout': - IR_node = self._convert_identity_operation( - source_node, in_edge_count=1, new_op='Dropout') - - # keep prob - if 'value' in self.tf_graph.get_node(source_node.layer.input[1]).layer.attr: - IR_node.attr['keep_prob'].f = self.tf_graph.get_node( - source_node.layer.input[1]).layer.attr['value'].tensor.float_val[0] - else: - IR_node.attr['keep_prob'].f = 1.0 - - # Remove nodes - # Mul - mul_node = self.tf_graph.get_node(source_node.out_edges[0]) - assert mul_node.type == "Mul" - mul_node.covered = True - mul_node.real_name = source_node.name - - # Floor - floor_node = self.tf_graph.get_node(mul_node.in_edges[1]) - assert floor_node.type == "Floor" - floor_node.covered = True - - else: - # print (source_node) - # print (source_node.layer) - # assert False - self._convert_identity_operation(source_node, new_op='Div') - - def rename_Floor(self, source_node): - scopes = self._get_scopes(source_node.name) - assert scopes[-2] == 'dropout' - - def rename_MaxPool(self, source_node): - self._convert_pooling(source_node, b'MAX') - - def rename_AvgPool(self, source_node): - self._convert_pooling(source_node, b'AVG') - - def rename_Identity(self, source_node): - source_node.real_name = self.src_graph.get_node( - source_node.in_edges[0]).real_name - - def rename_Squeeze(self, source_node): - IR_node = self._convert_identity_operation(source_node) - IR_node.attr['axes'].MergeFromString( - source_node.layer.attr['squeeze_dims'].SerializeToString()) - - def rename_QueueDequeueUpToV2(self, source_node): - IR_node = self._convert_identity_operation( - source_node, in_edge_count=0, new_op='DataInput') - IR_node.attr['shape'].shape.MergeFromString( - source_node.layer.attr['_output_shapes'].list.shape[0].SerializeToString()) - IR_node.attr['shape'].shape.dim[0].size = -1 - IR_node.attr['dtype'].type = self.dtype_map[source_node.layer.attr['component_types'].list.type[0]] - - def rename_QueueDequeueManyV2(self, source_node): - IR_node = self._convert_identity_operation( - source_node, in_edge_count=0, new_op='DataInput') - IR_node.attr['shape'].shape.MergeFromString( - source_node.layer.attr['_output_shapes'].list.shape[0].SerializeToString()) - IR_node.attr['shape'].shape.dim[0].size = -1 - IR_node.attr['dtype'].type = self.dtype_map[source_node.layer.attr['component_types'].list.type[0]] - - # def rename_RandomShuffleQueueV2(self, source_node): - # # print(source_node.layer) - # IR_node = self._convert_identity_operation(source_node, in_edge_count = 0, new_op = 'DataInput') - # # IR_node.attr['shape'].shape.MergeFromString(source_node.layer.attr['_output_shapes'].list.shape[0].SerializeToString()) - # # IR_node.attr['shape'].shape.dim[0].size = -1 - # IR_node.attr['dtype'].type = self.dtype_map[source_node.layer.attr['component_types'].list.type[0]] - - def rename_Pad(self, source_node): - IR_node = self._convert_identity_operation( - source_node, in_edge_count=1, new_op='Pad') - kwargs = {} - kwargs['mode'] = 'constant' - kwargs['constant_values'] = 0.0 - - # paddings - padding = self.get_parent( - source_node.name, [1]).layer.attr['value'].tensor - shapes = tensor_util.MakeNdarray(padding) - kwargs['pads'] = convert_tf_pad_to_onnx(shapes) - - assign_IRnode_values(IR_node, kwargs) - - def rename_Mean(self, source_node): - self._convert_reduction_operators(source_node, new_op='ReduceMean') - - def rename_ConcatV2(self, source_node): - n = len(source_node.in_edges) - 1 - self._add_constant_node(source_node) - IR_node = self._convert_identity_operation( - source_node, in_edge_count=n, new_op='Concat') - axis = self.tf_graph.get_parent(source_node.name, [n]) - IR_node.attr['axis'].i = axis.layer.attr['value'].tensor.int_val[0] - - def rename_DepthwiseConv2dNative(self, source_node): - IR_node = self._convert_identity_operation( - source_node, in_edge_count=1, new_op='DepthwiseConv') - kwargs = {} - kwargs['strides'] = source_node.get_attr('strides') - - input_node = self.src_graph.get_parent(source_node.name, [1]) - kwargs['kernel_shape'] = self.tensor_shape_to_list( - input_node.get_attr('_output_shapes'))[0] - - self._convert_padding(source_node, IR_node, - kwargs['kernel_shape'][:-2]) - - if self.weight_loaded: - weight = self.src_graph.get_parent(source_node.name, [1, 0]) - self.set_weight(source_node.name, 'weights', - self.ckpt_data[weight.name]) - - assign_IRnode_values(IR_node, kwargs) - - def rename_FusedBatchNorm(self, source_node): - IR_node = self._convert_identity_operation( - source_node, in_edge_count=1, new_op='BatchNorm') - IR_node.attr['epsilon'].f = source_node.get_attr('epsilon', 0) - - # gamma (scale) - scale = self.get_parent(source_node.name, [1], True) - - if scale.type == 'Const': - value = scale.get_attr('value') - shape = value.tensor_shape - assert len(shape.dim) == 1 - shape = shape.dim[0].size - - assert len(value.float_val) == 1 - value = value.float_val[0] - - if np.isclose(value, 1.0): - IR_node.attr['scale'].b = False - else: - IR_node.attr['scale'].b = True - if self.weight_loaded: - self.set_weight(source_node.name, 'scale', - np.array([value] * shape)) - - else: - scale = self.get_parent(scale.name, [0], True) - if self.weight_loaded: - self.set_weight(source_node.name, 'scale', - self.ckpt_data[scale.name]) - IR_node.attr['scale'].b = True - - # bias - bias = self.get_parent(source_node.name, [2, 0], True) - IR_node.attr['bias'].b = True - - # Mean - mean = self.get_parent(source_node.name, [3, 0], True) - - # Var - var = self.get_parent(source_node.name, [4, 0], True) - - if self.weight_loaded: - self.set_weight(source_node.name, 'bias', - self.ckpt_data[bias.name]) - self.set_weight(source_node.name, 'mean', - self.ckpt_data[mean.name]) - self.set_weight(source_node.name, 'var', self.ckpt_data[var.name]) - - def rename_Shape(self, source_node): - IR_node = self._convert_identity_operation( - source_node, in_edge_count=1, new_op='Shape') - - def rename_Pack(self, source_node): - N = len(source_node.layer.input) - for i in range(N): - this_node = self.get_parent(source_node.name, [i]) - if this_node.type == 'Const': - - IR_node = self.IR_graph.node.add() - TensorflowParser._copy_and_reop(this_node, IR_node, 'Constant') - kwargs = { - 'value': this_node.layer.attr['value'].tensor.int_val[0], - } - assign_IRnode_values(IR_node, kwargs) - - IR_node = self._convert_identity_operation(source_node, new_op='Pack') - kwargs = { - 'axis': source_node.layer.attr['axis'].i, - 'N': source_node.layer.attr['N'].i - } - assign_IRnode_values(IR_node, kwargs) - - def rename_Gather(self, source_node): - - W = self.src_graph.get_parent(source_node.name, [0]) - W = self.src_graph.get_parent(W.name, [0]) - - if 'Variable' in W.type: - IR_node = self._convert_identity_operation( - source_node, new_op='Embedding') - - self.set_weight(source_node.name, "weights", - self.ckpt_data[W.name]) - - kwargs = { - 'input_dim': self.ckpt_data[W.name].shape[0], - 'output_dim': self.ckpt_data[W.name].shape[1], - 'mask_zero': False - } - kwargs['axis'] = 0 # add default - assign_IRnode_values(IR_node, kwargs) - else: - IR_node = self._convert_identity_operation( - source_node, new_op='Gather') - - return IR_node - - def rename_GatherV2(self, source_node): - - IR_node = self.rename_Gather(source_node) - - kwargs = {} - kwargs['axis'] = source_node.layer.attr['axis'].i - assign_IRnode_values(IR_node, kwargs) - - def rename_Transpose(self, source_node): - IR_node = self._convert_identity_operation(source_node) - - def rename_Sigmoid(self, source_node): - self._convert_identity_operation(source_node) - - def rename_Mul(self, source_node): - scale1 = self.get_parent(source_node.name, [1], True) - scale2 = self.get_parent(source_node.name, [0], True) - - if scale1.type == 'Const' or scale2.type == 'Const': - self._add_constant_node(source_node) - self._convert_identity_operation(source_node) - - elif scale2.type == 'Identity': - scale2 = self.get_parent(scale2.name, [0], True) - assert scale2.type == "VariableV2" - self.set_weight(source_node.name, 'alpha', - self.ckpt_data[scale2.name]) - self._convert_identity_operation(source_node) - - else: - self._convert_identity_operation(source_node) - - ''' - tf.unpack has been deprecated with replaced tf.unstack - ''' - - def rename_Unpack(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Unstack') - kwargs = { - 'axis': source_node.get_attr('axis'), - 'num': source_node.get_attr('num') - } - assign_IRnode_values(IR_node, kwargs) - - def rename_Split(self, source_node): - if source_node.get_attr('num_split') == 1: - for n in source_node.out_nodes: - for idx, e in enumerate(n.in_edges): - if source_node.name in e: - n.in_edges[idx] = e.split(':')[0] - - source_node.real_name = self.get_parent( - source_node.name, [1]).real_name - - else: - IR_node = self._convert_identity_operation(source_node, 1, 1) - kwargs = { - 'axis': self.get_parent(source_node.name, [0]).layer.attr['value'].tensor.int_val[0], - 'split': source_node.get_attr('num_split') - } - assign_IRnode_values(IR_node, kwargs) - - def rename_StridedSlice(self, source_node): - # TODO: Current it is only for slice - - if self.get_parent(source_node.name, [1]).type != 'Const': - self._add_constant_node(source_node) - IR_node = self._convert_identity_operation( - source_node, new_op='Slice') - return - - IR_node = self._convert_identity_operation( - source_node, in_edge_count=1, new_op='Slice') - kwargs = { - 'begin_mask': source_node.get_attr('begin_mask'), - 'end_mask': source_node.get_attr('end_mask'), - 'shrink_axis_mask': source_node.get_attr('shrink_axis_mask'), - 'new_axis_mask': source_node.get_attr('new_axis_mask') - } - - starts = self.get_parent( - source_node.name, [1]).layer.attr['value'].tensor - starts = tensor_util.MakeNdarray(starts).tolist() - kwargs['starts'] = starts - - ends = self.get_parent( - source_node.name, [2]).layer.attr['value'].tensor - ends = tensor_util.MakeNdarray(ends).tolist() - kwargs['ends'] = ends - - if self.get_parent(source_node.name, [3]) != None: - strides = self.get_parent( - source_node.name, [3]).layer.attr['value'].tensor - strides = tensor_util.MakeNdarray(strides).tolist() - kwargs['strides'] = strides - - assign_IRnode_values(IR_node, kwargs) - - def rename_Slice(self, source_node): - input_node_begin = self.get_parent(source_node.name, [1]) - input_node_size = self.get_parent(source_node.name, [2]) - - shape = self.get_parent( - source_node.name, [0]).layer.attr['value'].tensor - shape = tensor_util.MakeNdarray(shape).tolist() - - begin = input_node_begin.get_attr("axis") - - IR_node = self._convert_identity_operation( - source_node, in_edge_count=2, new_op='Slice') - - # TODO: only for 1D - end = int( - input_node_size.layer.attr['value'].tensor.int_val[0]) + begin - kwargs = { - 'begin_mask': begin, - 'end_mask': end - } - - assign_IRnode_values(IR_node, kwargs) - - def rename_LRN(self, source_node): - IR_node = self._convert_identity_operation(source_node) - kwargs = { - "alpha": source_node.get_attr('alpha') * (source_node.get_attr('depth_radius') * 2 + 1), - "beta": source_node.get_attr('beta'), - "bias": source_node.get_attr('bias'), - 'size': source_node.get_attr('depth_radius') + 1 - } - assign_IRnode_values(IR_node, kwargs) - - def rename_Tanh(self, source_node): - self._convert_identity_operation(source_node) - - def rename_ExpandDims(self, source_node): - IR_node = self._convert_identity_operation( - source_node, 0, 1, new_op='Unsqueeze') - - ax_node = self.get_parent(source_node.name, [1]) - kwargs = { - 'axes': [ax_node.layer.attr['value'].tensor.int_val[0]] - } - assign_IRnode_values(IR_node, kwargs) - - def rename_Fill(self, source_node): - IR_node = self._convert_identity_operation( - source_node, 0, 1, new_op='Fill') - - value_node = self.get_parent(source_node.name, [1]) - if value_node.layer.attr['value'].tensor.float_val: - IR_node.attr['value'].f = value_node.layer.attr['value'].tensor.float_val[0] - elif value_node.layer.attr['value'].tensor.int_val: - IR_node.attr['value'].i = value_node.layer.attr['value'].tensor.int_val[0] - else: - raise NotImplementedError() - - def rename_Conv2DBackpropInput(self, source_node): - """ - weights: name_weights, name_bias - """ - IR_node = self._convert_identity_operation( - source_node, new_op='ConvTranspose') - - kwargs = {} - - # strides - kwargs['strides'] = source_node.get_attr('strides') - - # input[1] : W - # filter - W = self.tf_graph.get_node(source_node.layer.input[1]) - W = self.tf_graph.get_node(W.layer.input[0]).layer - kwargs['kernel_shape'] = self.tensor_shape_to_list(W.attr['shape']) - - # padding - self._convert_padding(source_node, IR_node, - kwargs['kernel_shape'][:-2]) - - if self.weight_loaded: - self.set_weight(source_node.name, 'weights', - self.ckpt_data[W.name]) - - assign_IRnode_values(IR_node, kwargs) - # output[0] : B - self._get_bias(source_node, IR_node) - - def rename_Minimum(self, source_node): - self._add_constant_node(source_node) - self._convert_identity_operation(source_node) - - def rename_Maxmum(self, source_node): - self._add_constant_node(source_node) - self._convert_identity_operation(source_node) - - def rename_Cast(self, source_node): - IR_node = self._convert_identity_operation(source_node) - dst = source_node.get_attr('DstT') - if dst == 1: - dst = 'float' - elif dst == 3: - dst = 'int' - else: - raise NotImplementedError - - kwargs = {'dstType': dst} - assign_IRnode_values(IR_node, kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from mmdnn.conversion.common.DataStructure.graph import GraphNode, Graph -from tensorflow.core.framework.node_def_pb2 import NodeDef -from tensorflow.core.framework import attr_value_pb2 -import torch - - -class TorchGraphNode(GraphNode): - - def __init__(self, layer, id): - # self._type = layer.__class__.__name__.replace('Backward', '') - # self._name = "{}_{}".format(self.type, id) - # TODO - super(PyTorchGraphNode, self).__init__(layer) - - @property - def name(self): - return self._name - - @property - def type(self): - return self._type - - @property - def torch_layer(self): - return self.layer - - -class TorchGraph(Graph): - - def __init__(self, model): - super(TorchGraph, self).__init__(model) - self.model = model - - def build(self, shape): - print(self.model) - print(dir(self.model)) - - output_shapes = self._infer_torch_output_shapes( - self.model, - shape - ) - print(output_shapes) - - # """ - # build graph for pytorch 0.2.0 - # """ - # dummy_input = torch.autograd.Variable(torch.randn(shape)) - # output_node = self.model(dummy_input) - - # search_queue = [output_node.grad_fn] - # tmp_node = PyTorchGraphNode(output_node.grad_fn, 0) - # self.layer_map[tmp_node.name] = tmp_node - # visited = {output_node.grad_fn : self.layer_map[tmp_node.name]} - - # idx = 0 - # node_count = 1 - # while (idx < len(search_queue)): - # current_node = search_queue[idx] - # current_type = visited[current_node].type - # if hasattr(current_node, 'next_functions'): - # for parent, _ in current_node.next_functions: - # parent_type = parent.__class__.__name__.replace('Backward', '') - # if parent_type != 'AccumulateGrad' and \ - # (parent_type != 'Transpose' or current_type != 'Addmm'): - # if not parent in visited: - # tmp_node = PyTorchGraphNode(parent, node_count) - # self.layer_map[tmp_node.name] = tmp_node - # node_count += 1 - # visited[parent] = tmp_node - # search_queue.append(parent) - # self._make_connection(visited[parent].name, visited[current_node].name) - # idx += 1 - - super(TorchGraph, self).build() - - @staticmethod - def _infer_torch_output_shapes(torch_model, input_shapes): - """ - Forward torch model to infer output shape - """ - return TorchGraph._forward_torch_random_input( - torch_model, - input_shapes, - is_batch=False) - - # try: - # return TorchGraph._forward_torch_random_input( - # torch_model, - # input_shapes, - # is_batch=False - # ) - # except: - # # try batch mode - # # return TorchGraph._forward_torch_random_input( - # # torch_model, - # # input_shapes, - # # is_batch=True - # # ) - # pass - - @staticmethod - def _forward_torch_random_input(torch_model, input_shapes, is_batch=False): - input_tensors = [] - for shape in input_shapes: - if is_batch: - tensor = torch.rand(1, *shape).float() - else: - tensor = torch.randn(shape) - # tensor = torch.rand(*shape).float() - input_tensors.append(tensor) - - print(input_tensors[0].shape) - if len(input_tensors) == 1: - result = torch_model.forward(input_tensors[0]) - else: - result = torch_model.forward(input_tensors) - - print("result", result) - if isinstance(result, list): - # multi output - output_shapes = [] - for tensor in result: - shape = tensor.numpy().shape - if is_batch: - shape = shape[1:] - output_shapes.append(shape) - return output_shapes - else: - # single output - output_shape = result.numpy().shape - if is_batch: - return [output_shape[1:]] - else: - return [output_shape] -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import os -import numpy as np -from torch.utils.serialization import load_lua -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.common.DataStructure.parser import Parser -from mmdnn.conversion.torch.torch_graph import TorchGraph - - -class TorchParser(Parser): - - ############ - # property # - ############ - - @property - def src_graph(self): - return self.torch_graph - - #################### - # Public Functions # - #################### - - def __init__(self, model_file_name, input_shape): - super(TorchParser, self).__init__() - if not os.path.exists(model_file_name): - raise ValueError( - "Torch7 model file [{}] is not found.".format(model_file_name)) - model = load_lua(model_file_name) - if type(model).__name__ == 'hashable_uniq_dict': - model = model.model - model.evaluate() - self.weight_loaded = True - - # Build network graph - self.torch_graph = TorchGraph(model) - self.torch_graph.build([[1] + list(map(int, input_shape))]) - - def gen_IR(self): - print("OK") - assert False - print(self.torch_graph.model.childrens()) - for layer in self.src_graph.topological_sort: - current_node = self.src_graph.get_node(layer) - node_type = current_node.type - - if hasattr(self, "rename_" + node_type): - func = getattr(self, "rename_" + node_type) - func(current_node) - - else: - self.rename_UNKNOWN(current_node) - - ########## - # Layers # - ########## - def rename_UNKNOWN(self, source_node): - print(source_node.layer) - print(source_node.layer.data.size()) - assert False - print("PyTorch parser has not supported operator [%s] with name [%s]." - % (source_node.type, source_node.name)) - - def rename_NoneType(self, source_node): - assert source_node.name in self.src_graph.input_layers - IR_node = self._convert_identity_operation( - source_node, new_op="DataInput") - for dim in self.input_shape: - new_dim = IR_node.attr["shape"].shape.dim.add() - if dim == None: - new_dim.size = -1 - else: - new_dim.size = dim - - def rename_ConvNd(self, source_node): - kwargs = dict() - kwargs['dilations'] = [1] + \ - list(source_node.get_attr('dilation')) + [1] - kwargs['pads'] = ( - [0] + list(source_node.get_attr('padding')) + [0]) * 2 - kwargs['strides'] = [1] + list(source_node.get_attr('stride')) + [1] - kwargs['group'] = source_node.get_attr('groups') - - # handle weight - weight = source_node.get_attr('next_functions')[ - 1][0].variable.data.numpy() - dim = weight.ndim - 2 - - if source_node.get_attr('transposed'): - IR_node = self._convert_identity_operation( - source_node, new_op="ConvTranpose") - weight = np.transpose(weight, list(range(2, dim + 2)) + [0, 1]) - else: - IR_node = self._convert_identity_operation( - source_node, new_op="Conv") - weight = np.transpose(weight, list(range(2, dim + 2)) + [1, 0]) - - self.set_weight(source_node.name, 'weights', weight) - kwargs['kernel_shape'] = list(weight.shape) - - # handle bias - if source_node.get_attr('next_functions')[2][0]: - bias = source_node.get_attr('next_functions')[ - 2][0].variable.data.numpy() - self.set_weight(source_node.name, 'bias', weight) - kwargs['use_bias'] = True - else: - kwargs['use_bias'] = False - - assign_IRnode_values(IR_node, kwargs) - - def rename_Threshold(self, source_node): - IR_node = self._convert_identity_operation(source_node, new_op='Relu') - - def rename_MaxPool2d(self, source_node): - self._convert_pooling(source_node) - - def rename_View(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='Reshape') - assign_IRnode_values(IR_node, {'shape': list( - source_node.get_attr('new_sizes'))[1:]}) - - def rename_Addmm(self, source_node): - IR_node = self._convert_identity_operation( - source_node, new_op='FullyConnected') - kwargs = dict() - - # handle weight - weight = source_node.get_attr('next_functions')[ - 2][0].next_functions[0][0].variable.data.numpy() - weight = np.transpose(weight) - kwargs['units'] = weight.shape[1] - self.set_weight(source_node.name, 'weights', weight) - - # handle bias - if source_node.get_attr('next_functions')[0][0]: - bias = source_node.get_attr('next_functions')[ - 0][0].variable.data.numpy() - kwargs['use_bias'] = True - self.set_weight(source_node.name, 'bias', weight) - - assign_IRnode_values(IR_node, kwargs) - - print(IR_node) - - #################### - # Helper Functions # - #################### - - @staticmethod - def _copy_and_reop(source_node, IR_node, new_op=None): - if new_op == None: - new_op = source_node.type - IR_node.name = source_node.name - IR_node.op = new_op - - def _convert_identity_operation(self, source_node, in_edge_count=None, new_op=None): - IR_node = self.IR_graph.node.add() - PyTorchParser._copy_and_reop(source_node, IR_node, new_op) - self.convert_inedge(source_node, IR_node, 0, in_edge_count) - return IR_node - - def _convert_pooling(self, source_node): - kwargs = dict() - kwargs['strides'] = [1] + list(source_node.get_attr('stride')) + [1] - kwargs['dilations'] = [1] + \ - list(source_node.get_attr('dilation')) + [1] - kwargs['pads'] = ( - [0] + list(source_node.get_attr('padding')) + [0]) * 2 - kwargs['kernel_shape'] = [1] + \ - list(source_node.get_attr('kernel_size')) + [1] - IR_node = self._convert_identity_operation(source_node, new_op="Pool") - - if source_node.name.startswith('Max'): - kwargs['pooling_type'] = 'MAX' - elif source_node.name.startswith('Avg'): - kwargs['pooling_type'] = 'MAX' - else: - raise ValueError('Unknown pooling type') - - assign_IRnode_values(IR_node, kwargs) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- -from six import string_types as _string_types - -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType - - -class Emitter(object): - - def __init__(self): - self.body_code = str() - self.weights_dict = dict() - self.used_layers = set() - self.weight_loaded = False - self.layers_codes = dict() - - def run(self, dstNetworkPath, dstWeightPath=None, phase='test'): - self.save_code(dstNetworkPath, phase) - - # share functions - - def add_body(self, indent, codes): - if isinstance(codes, _string_types): - codes = [codes] - for code in codes: - self.body_code += (" " * indent) + code + '\n' - - def _load_weights(self, file_name=None): - import numpy as np - self.weight_loaded = True - try: - self.weights_dict = np.load(file_name).item() - except: - self.weights_dict = np.load(file_name, encoding='bytes').item() - - def parent_variable_name(self, IR_node, path_or_name=[0]): - if isinstance(path_or_name, _string_types): - path = [IR_node.in_edges.index(path_or_name)] - elif isinstance(path_or_name, list): - path = path_or_name - else: - raise ValueError - return self.IR_graph.get_parent_variable_name(IR_node.name, path) - - def _build(self): - self.IR_graph.build() - - def gen_code(self, phase): - raise NotImplementedError("do not use base emitter class.") - - def save_code(self, filepath, phase): - code = self.gen_code(phase) - with open(filepath, 'w') as fout: - fout.write(code) - print("Target network code snippet is saved as [{}].".format(filepath)) - - @staticmethod - def save_weights(weights, filename): - import numpy as np - with open(filename, 'wb') as of: - np.save(of, weights) - print("Target weights are saved as [{}].".format(filename)) - - @staticmethod - def _image_in_transpose_str(dim): - dims = [dim] - dims.extend(range(dim)) - return ','.join('%s' % id for id in dims) - - @staticmethod - def _image_out_transpose_str(dim): - dims = list(range(1, dim + 1)) - dims.append(0) - return ','.join('%s' % id for id in dims) - - @staticmethod - def _conv_kernel_transpose_str(dim): - dims = [dim + 1, dim] - dims.extend(range(dim)) - return ','.join('%s' % id for id in dims) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import collections - - -class GraphNode(object): - - def __init__(self, layer): - self.in_edges = list() - self.out_edges = list() - self.layer = layer - self.covered = False - self.real_name = self.name - - @property - def name(self): - assert False - - @property - def variable_name(self): - return self.real_name.replace('/', '_').replace('-', '_').replace('[', '_').replace(']', '_') - - @property - def real_variable_name(self): - return self.real_name.replace('/', '_').replace('-', '_').replace('[', '_').replace(']', '_') - - -class Graph(object): - - def __init__(self, model): - # key: layer_name value: keras layer - self.layer_map = collections.OrderedDict() - self.input_layers = list() - self.output_layers = list() - self.layer_name_map = collections.OrderedDict() - self.topological_sort = list() - self.model = model - - def build(self): - self._make_input_layers() - self._make_output_layers() - self._get_topological_sort() - - def rebuild(self): - self._make_input_layers(True) - self._make_output_layers() - self._get_topological_sort() - - def _make_input_layers(self, rebuild=False): - for name, layer in self.layer_map.items(): - layer.left_in_edges = len(layer.in_edges) - if len(layer.in_edges) == 0: - if rebuild: - if not layer.get_attr('scope'): - self.input_layers.append(name) - else: - self.input_layers.append(name) - - def _make_output_layers(self): - for name, layer in self.layer_map.items(): - if len(layer.out_edges) == 0: - self.output_layers.append(name) - - '''get node by its name or tensor name''' - - def get_node(self, name): - if not name.split(':')[0] in self.layer_map: - raise IOError("Graph doesn't have node [%s]." % name.split(':')[0]) - return None - else: - return self.layer_map[name.split(':')[0]] - - def get_nodes(self): - return self.layer_map.values() - - def get_son(self, name, path, set_flag=False): - if name == None: - return None - current_node = self.get_node(name) - for idx in path: - if len(current_node.out_edges) <= idx: - return None - son_name = current_node.out_edges[idx].split(':')[0] - current_node = self.get_node(son_name) - if set_flag: - current_node.covered = True - return current_node - - def get_parent(self, name, path, set_flag=False): - if name == None: - return None - current_node = self.get_node(name) - for idx in path: - if len(current_node.in_edges) <= idx: - return None - parent_name = current_node.in_edges[idx].split(':')[0] - current_node = self.get_node(parent_name) - if set_flag: - current_node.covered = True - return current_node - - def get_real_parent_name(self, name, path, set_flag=False): - if name == None: - return None - current_node = self.get_node(name) - for idx in path: - if len(current_node.in_edges) <= idx: - return None - parent_name = current_node.in_edges[idx].split(':')[0] - current_node = self.get_node(parent_name) - if set_flag: - current_node.covered = True - return self.layer_name_map[current_node.name] - - def get_parent_variable_name(self, name, path, set_flag=False): - if name == None: - return None - current_node = self.get_node(name) - for idx in path: - if len(current_node.in_edges) <= idx: - return None - parent_name = current_node.in_edges[idx].split(':')[0] - current_subscriptor = '' if len(current_node.in_edges[idx].split( - ':')) == 1 else '[{}]'.format(current_node.in_edges[idx].split(':')[1]) - current_node = self.get_node(parent_name) - if set_flag: - current_node.covered = True - - return current_node.real_variable_name + current_subscriptor - - # private functions - - def _get_topological_sort(self): - self.topological_sort = self.input_layers[:] - idx = 0 - while idx < len(self.topological_sort): - current_node = self.get_node(self.topological_sort[idx]) - for next_node in current_node.out_edges: - next_node_info = self.get_node(next_node) - # one node may connect another node by more than one edge. - next_node_info.left_in_edges -= self._check_left_in_edges_num( - current_node.name, next_node_info) - # next_node_info.left_in_edges -= 1 - if next_node_info.left_in_edges == 0: - self.topological_sort.append(next_node) - idx += 1 - - def _make_connection(self, src, dst): - if (src == dst) or (src not in self.layer_map) or (dst not in self.layer_map): - if src.split(':')[0] not in self.layer_map: - print( - "Warning: Graph Construct a self-loop node {}. Ignored.".format(src)) - return - - # print ('{} --> {}'.format(src, dst)) - if not dst in self.layer_map[src.split(':')[0]].out_edges: - self.layer_map[src.split(':')[0]].out_edges.append(dst) - if not src in self.layer_map[dst].in_edges: - self.layer_map[dst.split(':')[0]].in_edges.append(src) - - def _check_left_in_edges_num(self, in_node_name, node): - count = 0 - for in_edge in node.in_edges: - if in_node_name == in_edge.split(':')[0]: - count += 1 - return count -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import numpy as np -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.IR.graph_pb2 import NodeDef, GraphDef, DataType - - -class Parser(object): - - def __init__(self): - self.IR_graph = GraphDef() - self.weight_loaded = False - - # name --> (weight_name --> ndarray) - self.weights = dict() - - def run(self, dest_path): - self.gen_IR() - self.save_to_json(dest_path + ".json") - self.save_to_proto(dest_path + ".pb") - self.save_weights(dest_path + ".npy") - - @property - def src_graph(self): - raise NotImplementedError - - def get_son(self, name, path, set_flag=False): - return self.src_graph.get_son(name, path, set_flag) - - def get_parent(self, name, path, set_flag=False): - return self.src_graph.get_parent(name, path, set_flag) - - def set_weight(self, layer_name, weight_name, data): - if not layer_name in self.weights: - self.weights[layer_name] = dict() - layer = self.weights[layer_name] - layer[weight_name] = data - - def save_to_json(self, filename): - import google.protobuf.json_format as json_format - json_str = json_format.MessageToJson( - self.IR_graph, preserving_proto_field_name=True) - - with open(filename, "w") as of: - of.write(json_str) - - print("IR network structure is saved as [{}].".format(filename)) - - return json_str - - def save_to_proto(self, filename): - proto_str = self.IR_graph.SerializeToString() - with open(filename, 'wb') as of: - of.write(proto_str) - - print("IR network structure is saved as [{}].".format(filename)) - - return proto_str - - def save_weights(self, filename): - if self.weight_loaded: - with open(filename, 'wb') as of: - np.save(of, self.weights) - print("IR weights are saved as [{}].".format(filename)) - - else: - print("Warning: weights are not loaded.") - - def convert_inedge(self, source_node, IR_node, start_idx=0, end_idx=None): - if end_idx == None: - end_idx = len(source_node.in_edges) - for idx in range(start_idx, end_idx): - IR_node.input.append(self.src_graph.get_node( - source_node.in_edges[idx]).real_name.lstrip('_')) - - @staticmethod - def channel_first_conv_kernel_to_IR(tensor): - dim = tensor.ndim - tensor = np.transpose(tensor, list(range(2, dim)) + [1, 0]) - return tensor - - @staticmethod - def channel_first_shape_to_IR(shape): - return [shape[0]] + list(shape[2:]) + [shape[1]] - - @staticmethod - def channel_first_axis_to_IR(index): - if index == 0: - return 0 - elif index == 1: - return -1 - else: - return index - 1 -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import mmdnn.conversion.common.IR.graph_pb2 as graph_pb2 -from mmdnn.conversion.common.utils import * -from mmdnn.conversion.common.IR.graph_pb2 import TensorShape, AttrValue -from mmdnn.conversion.common.DataStructure.graph import Graph, GraphNode - - -def load_protobuf_from_file(container, filename): - with open(filename, 'rb') as fin: - file_content = fin.read() - - # First try to read it as a binary file. - try: - container.ParseFromString(file_content) - print("Parse file [%s] with binary format successfully." % (filename)) - return container - - except Exception as e: # pylint: disable=broad-except - print("Info: Trying to parse file [%s] with binary format but failed with error [%s]." % ( - filename, str(e))) - - # Next try to read it as a text file. - try: - from google.protobuf import text_format - text_format.Parse(file_content.decode('UTF-8'), - container, allow_unknown_extension=True) - print("Parse file [%s] with text format successfully." % (filename)) - except text_format.ParseError as e: - raise IOError("Cannot parse file %s: %s." % (filename, str(e))) - - return container - - -class IRGraphNode(GraphNode): - - @staticmethod - def replace_scope(name): - return name.replace('/', '_') - - @property - def IR_layer(self): - return self.layer - - @property - def name(self): - return self.layer.name - - @property - def type(self): - return self.layer.op - - def set_attrs(self, attrs): - assign_IRnode_values(self.layer, attrs) - - def get_attr(self, name, default_value=None): - if name in self.layer.attr: - attr = self.layer.attr[name] - field = attr.WhichOneof('value') - val = getattr(attr, field) if field else default_value - if not val: - return val - if isinstance(val, AttrValue.ListValue): - if val.ListFields(): - return list(val.ListFields()[0][1]) - else: - return val.ListFields() - else: - return val.decode('utf-8') if isinstance(val, bytes) else val - else: - return default_value - - -class IRGraph(Graph): - - @staticmethod - def shapeToStr(tensor_shape, keep_minus_one=False): - ret = "" - first = True - for e in tensor_shape.dim: - if e.size != -1 or keep_minus_one: - if first == False: - ret += ", " - ret += str(e.size) - first = False - return ret - - def __init__(self, filename): - model = graph_pb2.GraphDef() - load_protobuf_from_file(model, filename) - super(IRGraph, self).__init__(model) - - def filter_node(self): - self.layer_map = dict(filter( - lambda layer: layer[1].in_edges or layer[1].out_edges, self.layer_map.items())) - - def build(self): - for layer in self.model.node: - self.layer_map[layer.name] = IRGraphNode(layer) - self.layer_name_map[layer.name] = layer.name - - for i, layer in enumerate(self.model.node): - for pred in layer.input: - self._make_connection(pred, layer.name) - - self.filter_node() - super(IRGraph, self).build() - self.input_layers = list( - filter(lambda x: self.layer_map[x].type != 'Constant', self.input_layers)) - - def rebuild(self): - self.input_layers.clear() - self.output_layers.clear() - self.topological_sort.clear() - self.filter_node() - super(IRGraph, self).build() - self.input_layers = list( - filter(lambda x: self.layer_map[x].type != 'Constant', self.input_layers)) - - def clear_out_scope_node(self): - - def _clear_list_out_scope(list_): - for idx in range(len(list_) - 1, -1, -1): - node = self.get_node(list_[idx]) - if node.type != 'Scope' and node.get_attr('scope'): - del list_[idx] - - _clear_list_out_scope(self.input_layers) - _clear_list_out_scope(self.topological_sort) - _clear_list_out_scope(self.output_layers) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# Generated by the protocol buffer compiler. DO NOT EDIT! -# source: graph.proto - -from google.protobuf import descriptor_pb2 -from google.protobuf import symbol_database as _symbol_database -from google.protobuf import reflection as _reflection -from google.protobuf import message as _message -from google.protobuf import descriptor as _descriptor -from google.protobuf.internal import enum_type_wrapper -import sys -_b = sys.version_info[0] < 3 and ( - lambda x: x) or (lambda x: x.encode('latin1')) -# @@protoc_insertion_point(imports) - -_sym_db = _symbol_database.Default() - - -DESCRIPTOR = _descriptor.FileDescriptor( - name='graph.proto', - package='', - syntax='proto3', - serialized_pb=_b('\n\x0bgraph.proto\"3\n\x08GraphDef\x12\x16\n\x04node\x18\x01 \x03(\x0b\x32\x08.NodeDef\x12\x0f\n\x07version\x18\x02 \x01(\x05\"\x8d\x01\n\x07NodeDef\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\n\n\x02op\x18\x02 \x01(\t\x12\r\n\x05input\x18\x03 \x03(\t\x12 \n\x04\x61ttr\x18\x04 \x03(\x0b\x32\x12.NodeDef.AttrEntry\x1a\x37\n\tAttrEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\x19\n\x05value\x18\x02 \x01(\x0b\x32\n.AttrValue:\x02\x38\x01\"\xea\x02\n\tAttrValue\x12$\n\x04list\x18\x01 \x01(\x0b\x32\x14.AttrValue.ListValueH\x00\x12\x0b\n\x01s\x18\x02 \x01(\x0cH\x00\x12\x0b\n\x01i\x18\x03 \x01(\x03H\x00\x12\x0b\n\x01\x66\x18\x04 \x01(\x02H\x00\x12\x0b\n\x01\x62\x18\x05 \x01(\x08H\x00\x12\x19\n\x04type\x18\x06 \x01(\x0e\x32\t.DataTypeH\x00\x12\x1d\n\x05shape\x18\x07 \x01(\x0b\x32\x0c.TensorShapeH\x00\x12 \n\x06tensor\x18\x08 \x01(\x0b\x32\x0e.LiteralTensorH\x00\x1a\x9d\x01\n\tListValue\x12\t\n\x01s\x18\x02 \x03(\x0c\x12\r\n\x01i\x18\x03 \x03(\x03\x42\x02\x10\x01\x12\r\n\x01\x66\x18\x04 \x03(\x02\x42\x02\x10\x01\x12\r\n\x01\x62\x18\x05 \x03(\x08\x42\x02\x10\x01\x12\x1b\n\x04type\x18\x06 \x03(\x0e\x32\t.DataTypeB\x02\x10\x01\x12\x1b\n\x05shape\x18\x07 \x03(\x0b\x32\x0c.TensorShape\x12\x1e\n\x06tensor\x18\x08 \x03(\x0b\x32\x0e.LiteralTensorB\x07\n\x05value\"e\n\x0bTensorShape\x12\x1d\n\x03\x64im\x18\x02 \x03(\x0b\x32\x10.TensorShape.Dim\x12\x14\n\x0cunknown_rank\x18\x03 \x01(\x08\x1a!\n\x03\x44im\x12\x0c\n\x04size\x18\x01 \x01(\x03\x12\x0c\n\x04name\x18\x02 \x01(\t\"\xb0\x02\n\rLiteralTensor\x12\x18\n\x05\x64type\x18\x01 \x01(\x0e\x32\t.DataType\x12\"\n\x0ctensor_shape\x18\x02 \x01(\x0b\x32\x0c.TensorShape\x12\x16\n\x0eversion_number\x18\x03 \x01(\x05\x12\x16\n\x0etensor_content\x18\x04 \x01(\x0c\x12\x13\n\x07int_val\x18\x05 \x03(\x05\x42\x02\x10\x01\x12\x14\n\x08uint_val\x18\x06 \x03(\x05\x42\x02\x10\x01\x12\x15\n\tint64_val\x18\x07 \x03(\x03\x42\x02\x10\x01\x12\x16\n\nuint64_val\x18\x08 \x03(\x03\x42\x02\x10\x01\x12\x15\n\tfloat_val\x18\t \x03(\x02\x42\x02\x10\x01\x12\x16\n\ndouble_val\x18\n \x03(\x01\x42\x02\x10\x01\x12\x14\n\x08\x62ool_val\x18\x0b \x03(\x08\x42\x02\x10\x01\x12\x12\n\nstring_val\x18\x0c \x03(\x0c*\xff\x01\n\x08\x44\x61taType\x12\x10\n\x0c\x44T_UNDEFINED\x10\x00\x12\x0b\n\x07\x44T_INT8\x10\x01\x12\x0c\n\x08\x44T_INT16\x10\x02\x12\x0c\n\x08\x44T_INT32\x10\x03\x12\x0c\n\x08\x44T_INT64\x10\x04\x12\x0c\n\x08\x44T_UINT8\x10\x05\x12\r\n\tDT_UINT16\x10\x06\x12\r\n\tDT_UINT32\x10\x07\x12\r\n\tDT_UINT64\x10\x08\x12\x0e\n\nDT_FLOAT16\x10\t\x12\x0e\n\nDT_FLOAT32\x10\n\x12\x0e\n\nDT_FLOAT64\x10\x0b\x12\x10\n\x0c\x44T_COMPLEX64\x10\x0c\x12\x11\n\rDT_COMPLEX128\x10\r\x12\x0b\n\x07\x44T_BOOL\x10\x0e\x12\r\n\tDT_STRING\x10\x0f\x62\x06proto3') -) - -_DATATYPE = _descriptor.EnumDescriptor( - name='DataType', - full_name='DataType', - filename=None, - file=DESCRIPTOR, - values=[ - _descriptor.EnumValueDescriptor( - name='DT_UNDEFINED', index=0, number=0, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_INT8', index=1, number=1, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_INT16', index=2, number=2, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_INT32', index=3, number=3, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_INT64', index=4, number=4, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_UINT8', index=5, number=5, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_UINT16', index=6, number=6, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_UINT32', index=7, number=7, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_UINT64', index=8, number=8, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_FLOAT16', index=9, number=9, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_FLOAT32', index=10, number=10, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_FLOAT64', index=11, number=11, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_COMPLEX64', index=12, number=12, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_COMPLEX128', index=13, number=13, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_BOOL', index=14, number=14, - options=None, - type=None), - _descriptor.EnumValueDescriptor( - name='DT_STRING', index=15, number=15, - options=None, - type=None), - ], - containing_type=None, - options=None, - serialized_start=988, - serialized_end=1243, -) -_sym_db.RegisterEnumDescriptor(_DATATYPE) - -DataType = enum_type_wrapper.EnumTypeWrapper(_DATATYPE) -DT_UNDEFINED = 0 -DT_INT8 = 1 -DT_INT16 = 2 -DT_INT32 = 3 -DT_INT64 = 4 -DT_UINT8 = 5 -DT_UINT16 = 6 -DT_UINT32 = 7 -DT_UINT64 = 8 -DT_FLOAT16 = 9 -DT_FLOAT32 = 10 -DT_FLOAT64 = 11 -DT_COMPLEX64 = 12 -DT_COMPLEX128 = 13 -DT_BOOL = 14 -DT_STRING = 15 - - -_GRAPHDEF = _descriptor.Descriptor( - name='GraphDef', - full_name='GraphDef', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='node', full_name='GraphDef.node', index=0, - number=1, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='version', full_name='GraphDef.version', index=1, - number=2, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=15, - serialized_end=66, -) - - -_NODEDEF_ATTRENTRY = _descriptor.Descriptor( - name='AttrEntry', - full_name='NodeDef.AttrEntry', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='key', full_name='NodeDef.AttrEntry.key', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='value', full_name='NodeDef.AttrEntry.value', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=_descriptor._ParseOptions( - descriptor_pb2.MessageOptions(), _b('8\001')), - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=155, - serialized_end=210, -) - -_NODEDEF = _descriptor.Descriptor( - name='NodeDef', - full_name='NodeDef', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='name', full_name='NodeDef.name', index=0, - number=1, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='op', full_name='NodeDef.op', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='input', full_name='NodeDef.input', index=2, - number=3, type=9, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='attr', full_name='NodeDef.attr', index=3, - number=4, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[_NODEDEF_ATTRENTRY, ], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=69, - serialized_end=210, -) - - -_ATTRVALUE_LISTVALUE = _descriptor.Descriptor( - name='ListValue', - full_name='AttrValue.ListValue', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='s', full_name='AttrValue.ListValue.s', index=0, - number=2, type=12, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='i', full_name='AttrValue.ListValue.i', index=1, - number=3, type=3, cpp_type=2, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='f', full_name='AttrValue.ListValue.f', index=2, - number=4, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='b', full_name='AttrValue.ListValue.b', index=3, - number=5, type=8, cpp_type=7, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='type', full_name='AttrValue.ListValue.type', index=4, - number=6, type=14, cpp_type=8, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='shape', full_name='AttrValue.ListValue.shape', index=5, - number=7, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='tensor', full_name='AttrValue.ListValue.tensor', index=6, - number=8, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=409, - serialized_end=566, -) - -_ATTRVALUE = _descriptor.Descriptor( - name='AttrValue', - full_name='AttrValue', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='list', full_name='AttrValue.list', index=0, - number=1, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='s', full_name='AttrValue.s', index=1, - number=2, type=12, cpp_type=9, label=1, - has_default_value=False, default_value=_b(""), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='i', full_name='AttrValue.i', index=2, - number=3, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='f', full_name='AttrValue.f', index=3, - number=4, type=2, cpp_type=6, label=1, - has_default_value=False, default_value=float(0), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='b', full_name='AttrValue.b', index=4, - number=5, type=8, cpp_type=7, label=1, - has_default_value=False, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='type', full_name='AttrValue.type', index=5, - number=6, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='shape', full_name='AttrValue.shape', index=6, - number=7, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='tensor', full_name='AttrValue.tensor', index=7, - number=8, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[_ATTRVALUE_LISTVALUE, ], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - _descriptor.OneofDescriptor( - name='value', full_name='AttrValue.value', - index=0, containing_type=None, fields=[]), - ], - serialized_start=213, - serialized_end=575, -) - - -_TENSORSHAPE_DIM = _descriptor.Descriptor( - name='Dim', - full_name='TensorShape.Dim', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='size', full_name='TensorShape.Dim.size', index=0, - number=1, type=3, cpp_type=2, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='name', full_name='TensorShape.Dim.name', index=1, - number=2, type=9, cpp_type=9, label=1, - has_default_value=False, default_value=_b("").decode('utf-8'), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=645, - serialized_end=678, -) - -_TENSORSHAPE = _descriptor.Descriptor( - name='TensorShape', - full_name='TensorShape', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='dim', full_name='TensorShape.dim', index=0, - number=2, type=11, cpp_type=10, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='unknown_rank', full_name='TensorShape.unknown_rank', index=1, - number=3, type=8, cpp_type=7, label=1, - has_default_value=False, default_value=False, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[_TENSORSHAPE_DIM, ], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=577, - serialized_end=678, -) - - -_LITERALTENSOR = _descriptor.Descriptor( - name='LiteralTensor', - full_name='LiteralTensor', - filename=None, - file=DESCRIPTOR, - containing_type=None, - fields=[ - _descriptor.FieldDescriptor( - name='dtype', full_name='LiteralTensor.dtype', index=0, - number=1, type=14, cpp_type=8, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='tensor_shape', full_name='LiteralTensor.tensor_shape', index=1, - number=2, type=11, cpp_type=10, label=1, - has_default_value=False, default_value=None, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='version_number', full_name='LiteralTensor.version_number', index=2, - number=3, type=5, cpp_type=1, label=1, - has_default_value=False, default_value=0, - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='tensor_content', full_name='LiteralTensor.tensor_content', index=3, - number=4, type=12, cpp_type=9, label=1, - has_default_value=False, default_value=_b(""), - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - _descriptor.FieldDescriptor( - name='int_val', full_name='LiteralTensor.int_val', index=4, - number=5, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='uint_val', full_name='LiteralTensor.uint_val', index=5, - number=6, type=5, cpp_type=1, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='int64_val', full_name='LiteralTensor.int64_val', index=6, - number=7, type=3, cpp_type=2, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='uint64_val', full_name='LiteralTensor.uint64_val', index=7, - number=8, type=3, cpp_type=2, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='float_val', full_name='LiteralTensor.float_val', index=8, - number=9, type=2, cpp_type=6, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='double_val', full_name='LiteralTensor.double_val', index=9, - number=10, type=1, cpp_type=5, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='bool_val', full_name='LiteralTensor.bool_val', index=10, - number=11, type=8, cpp_type=7, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=_descriptor._ParseOptions(descriptor_pb2.FieldOptions(), _b('\020\001'))), - _descriptor.FieldDescriptor( - name='string_val', full_name='LiteralTensor.string_val', index=11, - number=12, type=12, cpp_type=9, label=3, - has_default_value=False, default_value=[], - message_type=None, enum_type=None, containing_type=None, - is_extension=False, extension_scope=None, - options=None), - ], - extensions=[ - ], - nested_types=[], - enum_types=[ - ], - options=None, - is_extendable=False, - syntax='proto3', - extension_ranges=[], - oneofs=[ - ], - serialized_start=681, - serialized_end=985, -) - -_GRAPHDEF.fields_by_name['node'].message_type = _NODEDEF -_NODEDEF_ATTRENTRY.fields_by_name['value'].message_type = _ATTRVALUE -_NODEDEF_ATTRENTRY.containing_type = _NODEDEF -_NODEDEF.fields_by_name['attr'].message_type = _NODEDEF_ATTRENTRY -_ATTRVALUE_LISTVALUE.fields_by_name['type'].enum_type = _DATATYPE -_ATTRVALUE_LISTVALUE.fields_by_name['shape'].message_type = _TENSORSHAPE -_ATTRVALUE_LISTVALUE.fields_by_name['tensor'].message_type = _LITERALTENSOR -_ATTRVALUE_LISTVALUE.containing_type = _ATTRVALUE -_ATTRVALUE.fields_by_name['list'].message_type = _ATTRVALUE_LISTVALUE -_ATTRVALUE.fields_by_name['type'].enum_type = _DATATYPE -_ATTRVALUE.fields_by_name['shape'].message_type = _TENSORSHAPE -_ATTRVALUE.fields_by_name['tensor'].message_type = _LITERALTENSOR -_ATTRVALUE.oneofs_by_name['value'].fields.append( - _ATTRVALUE.fields_by_name['list']) -_ATTRVALUE.fields_by_name['list'].containing_oneof = _ATTRVALUE.oneofs_by_name['value'] -_ATTRVALUE.oneofs_by_name['value'].fields.append( - _ATTRVALUE.fields_by_name['s']) -_ATTRVALUE.fields_by_name['s'].containing_oneof = _ATTRVALUE.oneofs_by_name['value'] -_ATTRVALUE.oneofs_by_name['value'].fields.append( - _ATTRVALUE.fields_by_name['i']) -_ATTRVALUE.fields_by_name['i'].containing_oneof = _ATTRVALUE.oneofs_by_name['value'] -_ATTRVALUE.oneofs_by_name['value'].fields.append( - _ATTRVALUE.fields_by_name['f']) -_ATTRVALUE.fields_by_name['f'].containing_oneof = _ATTRVALUE.oneofs_by_name['value'] -_ATTRVALUE.oneofs_by_name['value'].fields.append( - _ATTRVALUE.fields_by_name['b']) -_ATTRVALUE.fields_by_name['b'].containing_oneof = _ATTRVALUE.oneofs_by_name['value'] -_ATTRVALUE.oneofs_by_name['value'].fields.append( - _ATTRVALUE.fields_by_name['type']) -_ATTRVALUE.fields_by_name['type'].containing_oneof = _ATTRVALUE.oneofs_by_name['value'] -_ATTRVALUE.oneofs_by_name['value'].fields.append( - _ATTRVALUE.fields_by_name['shape']) -_ATTRVALUE.fields_by_name['shape'].containing_oneof = _ATTRVALUE.oneofs_by_name['value'] -_ATTRVALUE.oneofs_by_name['value'].fields.append( - _ATTRVALUE.fields_by_name['tensor']) -_ATTRVALUE.fields_by_name['tensor'].containing_oneof = _ATTRVALUE.oneofs_by_name['value'] -_TENSORSHAPE_DIM.containing_type = _TENSORSHAPE -_TENSORSHAPE.fields_by_name['dim'].message_type = _TENSORSHAPE_DIM -_LITERALTENSOR.fields_by_name['dtype'].enum_type = _DATATYPE -_LITERALTENSOR.fields_by_name['tensor_shape'].message_type = _TENSORSHAPE -DESCRIPTOR.message_types_by_name['GraphDef'] = _GRAPHDEF -DESCRIPTOR.message_types_by_name['NodeDef'] = _NODEDEF -DESCRIPTOR.message_types_by_name['AttrValue'] = _ATTRVALUE -DESCRIPTOR.message_types_by_name['TensorShape'] = _TENSORSHAPE -DESCRIPTOR.message_types_by_name['LiteralTensor'] = _LITERALTENSOR -DESCRIPTOR.enum_types_by_name['DataType'] = _DATATYPE -_sym_db.RegisterFileDescriptor(DESCRIPTOR) - -GraphDef = _reflection.GeneratedProtocolMessageType('GraphDef', (_message.Message,), dict( - DESCRIPTOR=_GRAPHDEF, - __module__='graph_pb2' - # @@protoc_insertion_point(class_scope:GraphDef) -)) -_sym_db.RegisterMessage(GraphDef) - -NodeDef = _reflection.GeneratedProtocolMessageType('NodeDef', (_message.Message,), dict( - - AttrEntry=_reflection.GeneratedProtocolMessageType('AttrEntry', (_message.Message,), dict( - DESCRIPTOR=_NODEDEF_ATTRENTRY, - __module__='graph_pb2' - # @@protoc_insertion_point(class_scope:NodeDef.AttrEntry) - )), - DESCRIPTOR=_NODEDEF, - __module__='graph_pb2' - # @@protoc_insertion_point(class_scope:NodeDef) -)) -_sym_db.RegisterMessage(NodeDef) -_sym_db.RegisterMessage(NodeDef.AttrEntry) - -AttrValue = _reflection.GeneratedProtocolMessageType('AttrValue', (_message.Message,), dict( - - ListValue=_reflection.GeneratedProtocolMessageType('ListValue', (_message.Message,), dict( - DESCRIPTOR=_ATTRVALUE_LISTVALUE, - __module__='graph_pb2' - # @@protoc_insertion_point(class_scope:AttrValue.ListValue) - )), - DESCRIPTOR=_ATTRVALUE, - __module__='graph_pb2' - # @@protoc_insertion_point(class_scope:AttrValue) -)) -_sym_db.RegisterMessage(AttrValue) -_sym_db.RegisterMessage(AttrValue.ListValue) - -TensorShape = _reflection.GeneratedProtocolMessageType('TensorShape', (_message.Message,), dict( - - Dim=_reflection.GeneratedProtocolMessageType('Dim', (_message.Message,), dict( - DESCRIPTOR=_TENSORSHAPE_DIM, - __module__='graph_pb2' - # @@protoc_insertion_point(class_scope:TensorShape.Dim) - )), - DESCRIPTOR=_TENSORSHAPE, - __module__='graph_pb2' - # @@protoc_insertion_point(class_scope:TensorShape) -)) -_sym_db.RegisterMessage(TensorShape) -_sym_db.RegisterMessage(TensorShape.Dim) - -LiteralTensor = _reflection.GeneratedProtocolMessageType('LiteralTensor', (_message.Message,), dict( - DESCRIPTOR=_LITERALTENSOR, - __module__='graph_pb2' - # @@protoc_insertion_point(class_scope:LiteralTensor) -)) -_sym_db.RegisterMessage(LiteralTensor) - - -_NODEDEF_ATTRENTRY.has_options = True -_NODEDEF_ATTRENTRY._options = _descriptor._ParseOptions( - descriptor_pb2.MessageOptions(), _b('8\001')) -_ATTRVALUE_LISTVALUE.fields_by_name['i'].has_options = True -_ATTRVALUE_LISTVALUE.fields_by_name['i']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_ATTRVALUE_LISTVALUE.fields_by_name['f'].has_options = True -_ATTRVALUE_LISTVALUE.fields_by_name['f']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_ATTRVALUE_LISTVALUE.fields_by_name['b'].has_options = True -_ATTRVALUE_LISTVALUE.fields_by_name['b']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_ATTRVALUE_LISTVALUE.fields_by_name['type'].has_options = True -_ATTRVALUE_LISTVALUE.fields_by_name['type']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_LITERALTENSOR.fields_by_name['int_val'].has_options = True -_LITERALTENSOR.fields_by_name['int_val']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_LITERALTENSOR.fields_by_name['uint_val'].has_options = True -_LITERALTENSOR.fields_by_name['uint_val']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_LITERALTENSOR.fields_by_name['int64_val'].has_options = True -_LITERALTENSOR.fields_by_name['int64_val']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_LITERALTENSOR.fields_by_name['uint64_val'].has_options = True -_LITERALTENSOR.fields_by_name['uint64_val']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_LITERALTENSOR.fields_by_name['float_val'].has_options = True -_LITERALTENSOR.fields_by_name['float_val']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_LITERALTENSOR.fields_by_name['double_val'].has_options = True -_LITERALTENSOR.fields_by_name['double_val']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -_LITERALTENSOR.fields_by_name['bool_val'].has_options = True -_LITERALTENSOR.fields_by_name['bool_val']._options = _descriptor._ParseOptions( - descriptor_pb2.FieldOptions(), _b('\020\001')) -# @@protoc_insertion_point(module_scope) -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import os -from six import text_type as _text_type -from mmdnn.conversion.common.utils import download_file - -BASE_MODEL_URL = 'http://data.mxnet.io/models/imagenet/test/caffe/' -# pylint: disable=line-too-long -DEFAULT_MODEL_INFO = { - 'alexnet': {'prototxt': 'https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_alexnet/deploy.prototxt', - 'caffemodel': 'http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel'}, - 'inception_v1': {'prototxt': 'https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_googlenet/deploy.prototxt', - 'caffemodel': 'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel'}, - 'vgg16': {'prototxt': 'https://gist.githubusercontent.com/ksimonyan/211839e770f7b538e2d8/raw/c3ba00e272d9f48594acef1f67e5fd12aff7a806/VGG_ILSVRC_16_layers_deploy.prototxt', - 'caffemodel': 'http://data.mxnet.io/models/imagenet/test/caffe/VGG_ILSVRC_16_layers.caffemodel'}, - 'vgg19': {'prototxt': 'https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/bb2b4fe0a9bb0669211cf3d0bc949dfdda173e9e/VGG_ILSVRC_19_layers_deploy.prototxt', - 'caffemodel': 'http://data.mxnet.io/models/imagenet/test/caffe/VGG_ILSVRC_19_layers.caffemodel'}, - 'resnet50': {'prototxt': BASE_MODEL_URL + 'ResNet-50-deploy.prototxt', - 'caffemodel': BASE_MODEL_URL + 'ResNet-50-model.caffemodel'}, - 'resnet101': {'prototxt': BASE_MODEL_URL + 'ResNet-101-deploy.prototxt', - 'caffemodel': BASE_MODEL_URL + 'ResNet-101-model.caffemodel'}, - 'resnet152': {'prototxt': BASE_MODEL_URL + 'ResNet-152-deploy.prototxt', - 'caffemodel': BASE_MODEL_URL + 'ResNet-152-model.caffemodel'}, - 'squeezenet': {'prototxt': "https://raw.githubusercontent.com/DeepScale/SqueezeNet/master/SqueezeNet_v1.1/deploy.prototxt", - 'caffemodel': "https://github.com/DeepScale/SqueezeNet/raw/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel"} -} -# pylint: enable=line-too-long - - -def _main(): - parser = argparse.ArgumentParser() - - parser.add_argument('-n', '--network', type=_text_type, help='Model Type', required=True, - choices=DEFAULT_MODEL_INFO.keys()) - - parser.add_argument('-i', '--image', default=None, - type=_text_type, help='Test Image Path') - - parser.add_argument('-o', '--output_dir', default='./', - type=_text_type, help='Caffe Checkpoint file name') - - args = parser.parse_args() - - arch_fn = download_file( - DEFAULT_MODEL_INFO[args.network]['prototxt'], directory=args.output_dir) - if not arch_fn: - return -1 - - weight_fn = download_file( - DEFAULT_MODEL_INFO[args.network]['caffemodel'], directory=args.output_dir) - if not weight_fn: - return -1 - - print("Model {} saved.".format(args.network)) - - if args.image: - import caffe - import numpy as np - from mmdnn.conversion.examples.imagenet_test import TestKit - - net = caffe.Net(arch_fn.encode("utf-8"), - weight_fn.encode("utf-8"), caffe.TEST) - # net = caffe.Net(arch_fn, weight_fn, caffe.TEST) - func = TestKit.preprocess_func['caffe'][args.network] - img = func(args.image) - img = np.transpose(img, (2, 0, 1)) - img = np.expand_dims(img, 0) - net.blobs['data'].data[...] = img - predict = np.squeeze(net.forward()['prob'][0]) - predict = np.squeeze(predict) - top_indices = predict.argsort()[-5:][::-1] - result = [(i, predict[i]) for i in top_indices] - print(result) - print(np.sum(result)) - - return 0 - - -if __name__ == '__main__': - _main() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import -from mmdnn.conversion.examples.imagenet_test import TestKit -from mmdnn.conversion.examples.extractor import base_extractor -from mmdnn.conversion.common.utils import download_file - - -class caffe_extractor(base_extractor): - - BASE_MODEL_URL = 'http://data.mxnet.io/models/imagenet/test/caffe/' - MMDNN_BASE_URL = 'http://mmdnn.eastasia.cloudapp.azure.com:89/models/' - - architecture_map = { - # Image Classification - 'alexnet': {'prototxt': 'https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_alexnet/deploy.prototxt', - 'caffemodel': 'http://dl.caffe.berkeleyvision.org/bvlc_alexnet.caffemodel'}, - 'inception_v1': {'prototxt': 'https://raw.githubusercontent.com/BVLC/caffe/master/models/bvlc_googlenet/deploy.prototxt', - 'caffemodel': 'http://dl.caffe.berkeleyvision.org/bvlc_googlenet.caffemodel'}, - 'vgg16': {'prototxt': 'https://gist.githubusercontent.com/ksimonyan/211839e770f7b538e2d8/raw/c3ba00e272d9f48594acef1f67e5fd12aff7a806/VGG_ILSVRC_16_layers_deploy.prototxt', - 'caffemodel': 'http://data.mxnet.io/models/imagenet/test/caffe/VGG_ILSVRC_16_layers.caffemodel'}, - 'vgg19': {'prototxt': 'https://gist.githubusercontent.com/ksimonyan/3785162f95cd2d5fee77/raw/bb2b4fe0a9bb0669211cf3d0bc949dfdda173e9e/VGG_ILSVRC_19_layers_deploy.prototxt', - 'caffemodel': 'http://data.mxnet.io/models/imagenet/test/caffe/VGG_ILSVRC_19_layers.caffemodel'}, - 'resnet50': {'prototxt': BASE_MODEL_URL + 'ResNet-50-deploy.prototxt', - 'caffemodel': BASE_MODEL_URL + 'ResNet-50-model.caffemodel'}, - 'resnet101': {'prototxt': BASE_MODEL_URL + 'ResNet-101-deploy.prototxt', - 'caffemodel': BASE_MODEL_URL + 'ResNet-101-model.caffemodel'}, - 'resnet152': {'prototxt': BASE_MODEL_URL + 'ResNet-152-deploy.prototxt', - 'caffemodel': BASE_MODEL_URL + 'ResNet-152-model.caffemodel'}, - 'squeezenet': {'prototxt': "https://raw.githubusercontent.com/DeepScale/SqueezeNet/master/SqueezeNet_v1.1/deploy.prototxt", - 'caffemodel': "https://github.com/DeepScale/SqueezeNet/raw/master/SqueezeNet_v1.1/squeezenet_v1.1.caffemodel"}, - 'xception': {'prototxt': MMDNN_BASE_URL + "caffe/xception_deploy.prototxt", - 'caffemodel': MMDNN_BASE_URL + "caffe/xception.caffemodel"}, - 'inception_v4': {'prototxt': MMDNN_BASE_URL + 'caffe/inception-v4_deploy.prototxt', - 'caffemodel': MMDNN_BASE_URL + 'caffe/inception-v4.caffemodel'}, - # Semantic Segmentation - 'voc-fcn8s': {'prototxt': 'https://raw.githubusercontent.com/shelhamer/fcn.berkeleyvision.org/master/voc-fcn8s/deploy.prototxt', - 'caffemodel': 'http://dl.caffe.berkeleyvision.org/fcn8s-heavy-pascal.caffemodel'}, - 'voc-fcn16s': {'prototxt': MMDNN_BASE_URL + "caffe/voc-fcn16s_deploy.prototxt", - 'caffemodel': 'http://dl.caffe.berkeleyvision.org/fcn16s-heavy-pascal.caffemodel'}, - 'voc-fcn32s': {'prototxt': MMDNN_BASE_URL + "caffe/voc-fcn32s_deploy.prototxt", - 'caffemodel': 'http://dl.caffe.berkeleyvision.org/fcn32s-heavy-pascal.caffemodel'}, - } - - @classmethod - def download(cls, architecture, path="./"): - if cls.sanity_check(architecture): - prototxt_name = architecture + "-deploy.prototxt" - architecture_file = download_file( - cls.architecture_map[architecture]['prototxt'], directory=path, local_fname=prototxt_name) - if not architecture_file: - return None - - weight_name = architecture + ".caffemodel" - weight_file = download_file( - cls.architecture_map[architecture]['caffemodel'], directory=path, local_fname=weight_name) - if not weight_file: - return None - - print("Caffe Model {} saved as [{}] and [{}].".format( - architecture, architecture_file, weight_file)) - return (architecture_file, weight_file) - - else: - return None - - @classmethod - def inference(cls, architecture_name, architecture, path, image_path): - if cls.sanity_check(architecture_name): - import caffe - import numpy as np - net = caffe.Net(architecture[0], architecture[1], caffe.TEST) - func = TestKit.preprocess_func['caffe'][architecture_name] - img = func(image_path) - img = np.transpose(img, (2, 0, 1)) - img = np.expand_dims(img, 0) - net.blobs['data'].data[...] = img - predict = np.squeeze(net.forward()[net._layer_names[-1]][0]) - predict = np.squeeze(predict) - return predict - - else: - return None -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import numpy as np -import sys -import os -from six import text_type as _text_type -from mmdnn.conversion.examples.imagenet_test import TestKit -import caffe - - -class TestCaffe(TestKit): - - def __init__(self): - super(TestCaffe, self).__init__() - - self.truth['caffe']['alexnet'] = [(657, 0.41121054), (744, 0.20789708), ( - 847, 0.086725503), (821, 0.05908291), (595, 0.058017164)] - - if self.args.dump: - self.dump_net = self.args.dump + '.prototxt' - self.dump_weight = self.args.dump + '.caffemodel' - else: - self.dump_net = 'tmp.prototxt' - self.dump_weight = 'tmp.caffemodel' - - self.MainModel.make_net(self.dump_net) - self.MainModel.gen_weight(self.args.w, self.dump_weight, self.dump_net) - self.model = caffe.Net(self.dump_net, self.dump_weight, caffe.TEST) - - def preprocess(self, image_path): - x = super(TestCaffe, self).preprocess(image_path) - # caffe uses NCHW - x = np.transpose(x, [2, 0, 1]) - self.data = np.expand_dims(x, 0) - - def print_result(self): - self.model.blobs['input'].data[...] = self.data - predict = self.model.forward()[self.model._layer_names[-1]][0] - super(TestCaffe, self).print_result(predict) - - def print_intermediate_result(self, layer_name, if_transpose=False): - intermediate_output = self.model.blobs[layer_name].data[0] - super(TestCaffe, self).print_intermediate_result( - intermediate_output, if_transpose) - - def inference(self, image_path): - self.preprocess(image_path) - - self.print_result() - - # self.print_intermediate_result('pooling0', False) - - self.test_truth() - - # delete tmp model files - if os.path.isfile(self.dump_net): - os.remove(self.dump_net) - if os.path.isfile(self.dump_weight): - os.remove(self.dump_weight) - - def dump(self): - print('Caffe model files are saved as [{}] and [{}], generated by [{}.py] and [{}].'.format( - self.dump_net, self.dump_weight, self.args.n, self.args.w)) - - -if __name__ == '__main__': - tester = TestCaffe() - if tester.args.dump: - tester.dump() - else: - tester.inference(tester.args.image) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import os -from six import text_type as _text_type -import cntk as C -from mmdnn.conversion.common.utils import download_file - -BASE_MODEL_URL = 'https://www.cntk.ai/Models/CNTK_Pretrained/' -# pylint: disable=line-too-long -MODEL_URL = { - 'alexnet': BASE_MODEL_URL + 'AlexNet_ImageNet_CNTK.model', - 'inception_v3': BASE_MODEL_URL + 'InceptionV3_ImageNet_CNTK.model', - 'resnet18': BASE_MODEL_URL + 'ResNet18_ImageNet_CNTK.model', - 'resnet50': BASE_MODEL_URL + 'ResNet50_ImageNet_CNTK.model', - 'resnet101': BASE_MODEL_URL + 'ResNet101_ImageNet_CNTK.model', - 'resnet152': BASE_MODEL_URL + 'ResNet152_ImageNet_CNTK.model', - 'Fast-RCNN_grocery100': 'https://www.cntk.ai/Models/FRCN_Grocery/Fast-RCNN_grocery100.model', - 'Fast-RCNN_Pascal': 'https://www.cntk.ai/Models/FRCN_Pascal/Fast-RCNN.model' -} -# pylint: enable=line-too-long - - -def _main(): - parser = argparse.ArgumentParser() - - parser.add_argument('-n', '--network', type=_text_type, help='Model Type', required=True, - choices=MODEL_URL.keys()) - - parser.add_argument('-i', '--image', default=None, - type=_text_type, help='Test Image Path') - - parser.add_argument('-o', '--output_dir', default='./', - type=_text_type, help='CNTK Checkpoint file name') - - args = parser.parse_args() - - fn = download_file(MODEL_URL[args.network], directory=args.output_dir) - if not fn: - return -1 - - model = C.Function.load(fn) - - if len(model.outputs) > 1: - for idx, output in enumerate(model.outputs): - if len(output.shape) > 0: - eval_node = idx - break - - model = C.as_composite(model[eval_node].owner) - model.save(fn) - - print("Model {} is saved as {}.".format(args.network, fn)) - - if args.image: - import numpy as np - from mmdnn.conversion.examples.imagenet_test import TestKit - func = TestKit.preprocess_func['cntk'][args.network] - img = func(args.image) - img = np.transpose(img, (2, 0, 1)) - predict = model.eval({model.arguments[0]: [img]}) - predict = np.squeeze(predict) - top_indices = predict.argsort()[-5:][::-1] - result = [(i, predict[i]) for i in top_indices] - print(result) - print(np.sum(result)) - - return 0 - - -if __name__ == '__main__': - _main() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import -import cntk as C -from mmdnn.conversion.examples.imagenet_test import TestKit -from mmdnn.conversion.examples.extractor import base_extractor -from mmdnn.conversion.common.utils import download_file - - -class cntk_extractor(base_extractor): - - BASE_MODEL_URL = 'https://www.cntk.ai/Models/CNTK_Pretrained/' - - architecture_map = { - 'alexnet': BASE_MODEL_URL + 'AlexNet_ImageNet_CNTK.model', - 'inception_v3': BASE_MODEL_URL + 'InceptionV3_ImageNet_CNTK.model', - 'resnet18': BASE_MODEL_URL + 'ResNet18_ImageNet_CNTK.model', - 'resnet50': BASE_MODEL_URL + 'ResNet50_ImageNet_CNTK.model', - 'resnet101': BASE_MODEL_URL + 'ResNet101_ImageNet_CNTK.model', - 'resnet152': BASE_MODEL_URL + 'ResNet152_ImageNet_CNTK.model', - 'Fast-RCNN_grocery100': 'https://www.cntk.ai/Models/FRCN_Grocery/Fast-RCNN_grocery100.model', - 'Fast-RCNN_Pascal': 'https://www.cntk.ai/Models/FRCN_Pascal/Fast-RCNN.model' - } - - @classmethod - def download(cls, architecture, path="./"): - if cls.sanity_check(architecture): - architecture_file = download_file( - cls.architecture_map[architecture], directory=path) - model = C.Function.load(architecture_file) - if len(model.outputs) > 1: - for idx, output in enumerate(model.outputs): - if len(output.shape) > 0: - eval_node = idx - break - - model = C.as_composite(model[eval_node].owner) - model.save(architecture_file) - print("Cntk Model {} saved as [{}].".format( - architecture, architecture_file)) - return architecture_file - - else: - return None - - @classmethod - def inference(cls, architecture_name, architecture_path, image_path): - if cls.sanity_check(architecture_name): - import numpy as np - func = TestKit.preprocess_func['cntk'][architecture_name] - img = func(image_path) - img = np.transpose(img, (2, 0, 1)) - model = C.Function.load(architecture_path) - predict = model.eval({model.arguments[0]: [img]}) - predict = np.squeeze(predict) - - top_indices = predict.argsort()[-5:][::-1] - return predict - - else: - return None -# Copyright (c) Microsoft. All rights reserved. - -# Licensed under the MIT license. See LICENSE.md file in the project root -# for full license information. -# ============================================================================== - -import argparse -import numpy as np -import sys -import os -import cntk as C -from mmdnn.conversion.examples.imagenet_test import TestKit - - -class TestCNTK(TestKit): - - def __init__(self): - super(TestCNTK, self).__init__() - - self.truth['mxnet']['inception_bn'] = [ - (21, 0.84820729), (144, 0.06263639), (677, 0.015408826), (973, 0.014532777), (562, 0.0053690737)] - - self.truth['keras']['resnet'] = [(144, 0.77398175), (23, 0.10650793), ( - 21, 0.081077583), (146, 0.0092755388), (562, 0.0089645367)] - self.truth['tensorflow']['resnet'] = [ - (22, 13.370872), (147, 8.8040094), (24, 5.6983061), (90, 5.6143088), (95, 4.8060427)] - - self.model = self.MainModel.KitModel(self.args.w) - # self.model, self.testop = self.MainModel.KitModel(self.args.w) - - def preprocess(self, image_path): - self.data = super(TestCNTK, self).preprocess(image_path) - - def print_result(self): - predict = self.model.eval({self.model.arguments[0]: [self.data]}) - super(TestCNTK, self).print_result(predict) - - def print_intermediate_result(self, layer_name, if_transpose=False): - test_arr = self.testop.eval({self.testop.arguments[0]: [self.data]}) - super(TestCNTK, self).print_intermediate_result(test_arr, if_transpose) - - def inference(self, image_path): - self.preprocess(image_path) - - # self.print_intermediate_result(None, False) - - self.print_result() - - self.test_truth() - - def dump(self, path=None): - if path is None: - path = self.args.dump - self.model.save(path) - print('CNTK model file is saved as [{}], generated by [{}.py] and [{}].'.format( - path, self.args.n, self.args.w)) - - def detect(self, image_path, path=None): - self.preprocess(image_path) - print("Found {} outputs".format(len(self.model))) - for output in self.model: - predict = output.eval({output.arguments[0]: [self.data/255.]}) - predict.dump("finalconv_{}.npy".format(str(predict.shape[1]))) - print('The output of CNTK model file is saved as [finalconv_{}.npy].'.format( - str(predict.shape[1]))) - - print('generated by [{}.py], [{}] and [{}].'.format( - self.args.n, self.args.w, image_path)) - - -if __name__ == '__main__': - tester = TestCNTK() - if tester.args.dump: - tester.dump() - elif tester.args.detect: - tester.detect(tester.args.image, tester.args.detect) - else: - tester.inference(tester.args.image) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import -import os -import coremltools -from coremltools.models import MLModel -from mmdnn.conversion.examples.imagenet_test import TestKit -from mmdnn.conversion.examples.extractor import base_extractor -from mmdnn.conversion.common.utils import download_file - - -class coreml_extractor(base_extractor): - - _base_model_url = "https://docs-assets.developer.apple.com/coreml/models/" - - # from collections import namedtuple - # Batch = namedtuple('Batch', ['data']) - - # TODO - # Apple has published some of their own models. They can be downloaded from https://developer.apple.com/machine-learning/. - # Those published models are: SqueezeNet, Places205-GoogLeNet, ResNet50, Inception v3, VGG16 - architecture_map = { - 'inception_v3': "https://docs-assets.developer.apple.com/coreml/models/Inceptionv3.mlmodel", - 'vgg16': "https://docs-assets.developer.apple.com/coreml/models/VGG16.mlmodel", - 'vgg19': None, - 'resnet50': "https://docs-assets.developer.apple.com/coreml/models/Resnet50.mlmodel", # resnet50 - 'mobilenet': "https://docs-assets.developer.apple.com/coreml/models/MobileNet.mlmodel", - 'xception': None, - 'inception_resnet': None, - 'densenet': None, - 'nasnet': None, - 'tinyyolo': "https://s3-us-west-2.amazonaws.com/coreml-models/TinyYOLO.mlmodel" - - } - - @classmethod - def download(cls, architecture, path='./'): - if cls.sanity_check(architecture): - architecture_file = download_file( - cls.architecture_map[architecture], directory=path) - if not architecture_file: - return None - - print('Coreml model {} is saved in [{}]'.format( - architecture, path)) - return architecture_file - else: - return None - - @classmethod - def inference(cls, architecture, model_path, image_path): - # TODO - from PIL import Image - import numpy as np - from coremltools.models._infer_shapes_nn_mlmodel import infer_shapes - if cls.sanity_check(architecture): - func = TestKit.preprocess_func['coreml'][architecture] - - import inspect - funcstr = inspect.getsource(func) - - if len(funcstr.split(',')) == 3: - size = int(funcstr.split('path,')[1].split(')')[0]) - else: - size = int(funcstr.split('path,')[1].split(',')[0]) - - img = Image.open(image_path) - img = img.resize((size, size)) - - # load model - model = MLModel(model_path) - spec = model.get_spec() - - # TODO: Multiple inputs - input_name = spec.description.input[0].name - - # TODO: Multiple outputs - output_name = spec.description.output[0].name - - # inference - input_data = img - coreml_input = {input_name: img} - coreml_output = model.predict(coreml_input) - - prob = coreml_output[output_name] - if isinstance(prob, dict): - prob = list(coreml_output[output_name].values()) - prob = np.array(prob).squeeze() - - return prob - - else: - return None -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import numpy as np -import sys -import os -from mmdnn.conversion.examples.imagenet_test import TestKit -import coremltools - - -class TestCoreML(TestKit): - - def __init__(self): - from six import text_type as _text_type - parser = argparse.ArgumentParser() - - parser.add_argument('-p', '--preprocess', - type=_text_type, help='Model Preprocess Type') - - parser.add_argument('--model', '-n', '-w', type=_text_type, - required=True, help='CoreML Model path.') - - parser.add_argument('-s', type=_text_type, help='Source Framework Type', - choices=self.truth.keys()) - - parser.add_argument('--image', '-i', - type=_text_type, help='Test image path.', - default="mmdnn/conversion/examples/data/seagull.jpg") - - parser.add_argument('-input', type=_text_type, - required=True, help='CoreML Input Node') - - parser.add_argument('-output', type=_text_type, - required=True, help='CoreML Output Node') - - parser.add_argument('-size', type=int, - default=224, help='CoreML Input Image Size') - - self.args = parser.parse_args() - - print("Loading model [{}].".format(self.args.model)) - - self.model = coremltools.models.MLModel(self.args.model.encode()) - - print("Model loading success.") - - def preprocess(self, image_path): - from PIL import Image as pil_image - img = pil_image.open(image_path) - img = img.resize((self.args.size, self.args.size)) - self.data = img - - def print_result(self): - coreml_inputs = {self.args.input: self.data} - self.coreml_output = self.model.predict( - coreml_inputs, useCPUOnly=False) - predict = self.coreml_output[self.args.output] - super(TestCoreML, self).print_result(predict) - - def print_intermediate_result(self, layer_name, if_transpose=False): - super(TestCoreML, self).print_intermediate_result( - self.coreml_output[layer_name], if_transpose) - - def inference(self, image_path): - self.preprocess(image_path) - - self.print_result() - - # self.print_intermediate_result('conv1_7x7_s2_1', True) - - # self.test_truth() - - -if __name__ == '__main__': - tester = TestCoreML() - tester.inference(tester.args.image) -import unittest -import urllib -import os -import tarfile -import zipfile -import numpy as np -import PIL.Image -import tensorflow as tf -from tensorflow.core.framework import graph_pb2 -import tfcoreml as tf_converter - -TMP_MODEL_DIR = '/Users/kit/tmp/tfcoreml' -TEST_IMAGE = '/Users/kit/github/MMdnn/mmdnn/conversion/examples/data/seagull.jpg' - - -def _download_file(url): - """Download the file. - url - The URL address of the frozen file - fname - Filename of the frozen TF graph in the url. - """ - dir_path = TMP_MODEL_DIR - if not os.path.exists(dir_path): - os.makedirs(dir_path) - - k = url.rfind('/') - fname = url[k+1:] - fpath = os.path.join(dir_path, fname) - - ftype = None - if url.endswith(".tar.gz") or url.endswith(".tgz"): - ftype = 'tgz' - elif url.endswith('.zip'): - ftype = 'zip' - - if not os.path.exists(fpath): - urllib.urlretrieve(url, fpath) - if ftype == 'tgz': - tar = tarfile.open(fpath) - tar.extractall(dir_path) - tar.close() - elif ftype == 'zip': - zip_ref = zipfile.ZipFile(fpath, 'r') - zip_ref.extractall(dir_path) - zip_ref.close() - - -def _compute_max_relative_error(x, y): - rerror = 0 - index = 0 - for i in range(len(x)): - den = max(1.0, np.abs(x[i]), np.abs(y[i])) - if np.abs(x[i]/den - y[i]/den) > rerror: - rerror = np.abs(x[i]/den - y[i]/den) - index = i - return rerror, index - - -def _compute_SNR(x, y): - noise = x - y - noise_var = np.sum(noise ** 2)/len(noise) + 1e-7 - signal_energy = np.sum(y ** 2)/len(y) - max_signal_energy = np.amax(y ** 2) - SNR = 10 * np.log10(signal_energy/noise_var) - PSNR = 10 * np.log10(max_signal_energy/noise_var) - return SNR, PSNR - - -def _load_image(path, resize_to=None): - img = PIL.Image.open(path) - if resize_to is not None: - img = img.resize(resize_to, PIL.Image.ANTIALIAS) - img_np = np.array(img).astype(np.float32) - return img_np, img - - -def _generate_data(input_shape, mode='random', - scale=2.0/255, bias=-1, - img_size=256): - """ - Generate some random data according to a shape. - """ - if input_shape is None or len(input_shape) == 0: - return 0.5 - if mode == 'zeros': - X = np.zeros(input_shape) - elif mode == 'ones': - X = np.ones(input_shape) - elif mode == 'linear': - X = np.array(range(np.product(input_shape))).reshape(input_shape)*1.0 - elif mode == 'random': - X = np.random.rand(*input_shape) - elif mode == 'random_zero_mean': - X = np.random.rand(*input_shape)-0.5 - elif mode == 'image': - # Load a real image and do default tf imageNet preprocessing - img_np, _ = _load_image(TEST_IMAGE, resize_to=(img_size, img_size)) - img_tf = np.expand_dims(img_np, axis=0) - X = img_tf * scale + bias - elif mode == 'onehot_0': - X = np.zeros(input_shape) - X[0] = 1 - return X - - -def _tf_transpose(x, is_sequence=False): - if not hasattr(x, "shape"): - return x - if len(x.shape) == 4: - # [Batch, Height, Width, Channels] --> [Batch, Channels, Height, Width] - x = np.transpose(x, [0, 3, 1, 2]) - return np.expand_dims(x, axis=0) - elif len(x.shape) == 3: - # We only deal with non-recurrent networks for now - # (H,W,C) --> (C,H,W) - return np.transpose(x, [2, 0, 1]) - elif len(x.shape) == 2: - if is_sequence: # (N,S) --> (S,N,1,) - return x.reshape(x.shape[::-1] + (1,)) - else: # (N,C) --> (N,C,1,1) - return x.reshape((1, ) + x.shape) # Dense - elif len(x.shape) == 1: - if is_sequence: # (S) --> (S,N,1,1,1) - return x.reshape((x.shape[0], 1, 1, 1, 1)) - else: - return x - else: - return x - - -class CorrectnessTest(unittest.TestCase): - - @classmethod - def setUpClass(self): - """ Set up the unit test by loading common utilities. - """ - self.err_thresh = 0.15 - self.snr_thresh = 12 - self.psnr_thresh = 30 - self.red_bias = -1 - self.blue_bias = -1 - self.green_bias = -1 - self.image_scale = 2.0/255 - - def _compare_tf_coreml_outputs(self, tf_out, coreml_out): - self.assertEquals(len(tf_out), len(coreml_out)) - error, ind = _compute_max_relative_error(coreml_out, tf_out) - SNR, PSNR = _compute_SNR(coreml_out, tf_out) - self.assertGreater(SNR, self.snr_thresh) - self.assertGreater(PSNR, self.psnr_thresh) - self.assertLess(error, self.err_thresh) - - def _test_tf_model(self, tf_model_path, coreml_model, input_tensors, - output_tensor_names, data_modes='random', delta=1e-2, - use_cpu_only=False, scale=2.0/255, bias=-1, - img_size=None, sequence_inputs=None): - """ Common entry to testing routine (Tensors in, tensors out). - tf_model_path - frozen TF model path - coreml_model - MLModel object - input_tensors - list of (name,shape) for each input (placeholder) - output_tensor_names - output_tensor_names, a list of strings - sequence_inputs - dict of input names that are sequences for CoreML input - """ - # Load TensorFlow model - tf.reset_default_graph() - graph_def = graph_pb2.GraphDef() - with open(tf_model_path, "rb") as f: - graph_def.ParseFromString(f.read()) - g = tf.import_graph_def(graph_def) - - if type(data_modes) is str: - data_modes = [data_modes] * len(input_tensors) - - with tf.Session(graph=g) as sess: - # Prepare inputs - feed_dict = {} - for idx, in_tensor in enumerate(input_tensors): - ts_name, ts_shape = in_tensor - ts_name = 'import/' + ts_name - feed_dict[ts_name] = _generate_data(ts_shape, - mode=data_modes[idx], - scale=scale, bias=bias, - img_size=img_size) - # Run TF session - out_tf_names = [] - for out_name in output_tensor_names: - out_tf_names.append('import/' + out_name) - result = sess.run(out_tf_names, feed_dict=feed_dict) - - # Evaluate coreml model - coreml_inputs = {} - for idx, in_tensor in enumerate(input_tensors): - in_tensor_name, in_shape = in_tensor - coreml_in_name = in_tensor_name.replace( - ':', '__').replace('/', '__') - if in_tensor_name in sequence_inputs: - coreml_inputs[coreml_in_name] = _tf_transpose( - feed_dict['import/'+in_tensor_name], is_sequence=True).copy() - else: - coreml_inputs[coreml_in_name] = _tf_transpose( - feed_dict['import/'+in_tensor_name]).copy() - - coreml_output = coreml_model.predict( - coreml_inputs, useCPUOnly=use_cpu_only) - - for idx, out_name in enumerate(output_tensor_names): - out_tensor_name = out_name.replace(':', '__').replace('/', '__') - tp = _tf_transpose(result[idx]).flatten() - cp = coreml_output[out_tensor_name].flatten() - error, index = _compute_max_relative_error(tp, cp) - snr, psnr = _compute_SNR(tp, cp) - self._compare_tf_coreml_outputs(tp, cp) - - def _test_coreml_model_image_input(self, tf_model_path, coreml_model, - input_tensor_name, output_tensor_name, img_size, useCPUOnly=False): - """Test single image input conversions. - tf_model_path - the TF model - coreml_model - converted CoreML model - input_tensor_name - the input image tensor name - output_tensor_name - the output tensor name - img_size - size of the image - """ - - img_np, img = _load_image(TEST_IMAGE, resize_to=(img_size, img_size)) - img_tf = np.expand_dims(img_np, axis=0) - img_tf[:, :, :, 0] = self.image_scale * \ - img_tf[:, :, :, 0] + self.red_bias - img_tf[:, :, :, 1] = self.image_scale * \ - img_tf[:, :, :, 1] + self.green_bias - img_tf[:, :, :, 2] = self.image_scale * \ - img_tf[:, :, :, 2] + self.blue_bias - - # evaluate the TF model - tf.reset_default_graph() - graph_def = graph_pb2.GraphDef() - with open(tf_model_path, "rb") as f: - graph_def.ParseFromString(f.read()) - g = tf.import_graph_def(graph_def) - with tf.Session(graph=g) as sess: - image_input_tensor = sess.graph.get_tensor_by_name( - 'import/' + input_tensor_name) - output = sess.graph.get_tensor_by_name( - 'import/' + output_tensor_name) - tf_out = sess.run(output, feed_dict={image_input_tensor: img_tf}) - if len(tf_out.shape) == 4: - tf_out = np.transpose(tf_out, (0, 3, 1, 2)) - tf_out_flatten = tf_out.flatten() - - # evaluate CoreML - coreml_input_name = input_tensor_name.replace( - ':', '__').replace('/', '__') - coreml_output_name = output_tensor_name.replace( - ':', '__').replace('/', '__') - coreml_input = {coreml_input_name: img} - - # Test the default CoreML evaluation - coreml_out = coreml_model.predict(coreml_input, useCPUOnly=useCPUOnly)[ - coreml_output_name] - coreml_out_flatten = coreml_out.flatten() - self._compare_tf_coreml_outputs(tf_out_flatten, coreml_out_flatten) - - -class TestModels(CorrectnessTest): - - def test_inception_v3_slim(self): - # Download model - url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'inception_v3_2016_08_28_frozen.pb') - - # Convert to coreml - mlmodel_path = os.path.join( - TMP_MODEL_DIR, 'inception_v3_2016_08_28.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['InceptionV3/Predictions/Softmax:0'], - input_name_shape_dict={'input:0': [1, 299, 299, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - self._test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='InceptionV3/Predictions/Softmax:0', - img_size=299) - - def test_googlenet_v1_nonslim(self): - # Download model - url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'tensorflow_inception_graph.pb') - - # Convert to coreml - mlmodel_path = os.path.join( - TMP_MODEL_DIR, 'googlenet_v1_nonslim.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['softmax2:0'], - input_name_shape_dict={'input:0': [1, 224, 224, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - self._test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='softmax2:0', - img_size=224) - - def test_googlenet_resnet_v2(self): - url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception_resnet_v2_2016_08_30_frozen.pb.tar.gz' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'inception_resnet_v2_2016_08_30_frozen.pb') - - mlmodel_path = os.path.join( - TMP_MODEL_DIR, 'inception_resnet_v2_2016_08_30_frozen.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['InceptionResnetV2/Logits/Predictions:0'], - input_name_shape_dict={'input:0': [1, 299, 299, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - self._test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='InceptionResnetV2/Logits/Predictions:0', - img_size=299) - - def test_googlenet_v1_slim(self): - url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'inception_v1_2016_08_28_frozen.pb') - - mlmodel_path = os.path.join( - TMP_MODEL_DIR, 'inception_v1_2016_08_28_frozen.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['InceptionV1/Logits/Predictions/Softmax:0'], - input_name_shape_dict={'input:0': [1, 244, 224, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - self._test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='InceptionV1/Logits/Predictions/Softmax:0', - img_size=224) - - def test_googlenet_v2_slim(self): - url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception_v2_2016_08_28_frozen.pb.tar.gz' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'inception_v2_2016_08_28_frozen.pb') - - mlmodel_path = os.path.join( - TMP_MODEL_DIR, 'inception_v2_2016_08_28_frozen.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['InceptionV2/Predictions/Softmax:0'], - input_name_shape_dict={'input:0': [1, 244, 224, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - self._test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='InceptionV2/Predictions/Softmax:0', - img_size=224) - - def test_googlenet_v4_slim(self): - url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception_v4_2016_09_09_frozen.pb.tar.gz' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'inception_v4_2016_09_09_frozen.pb') - - mlmodel_path = os.path.join( - TMP_MODEL_DIR, 'inception_v4_2016_09_09_frozen.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['InceptionV4/Logits/Predictions:0'], - input_name_shape_dict={'input:0': [1, 299, 299, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - self._test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='InceptionV4/Logits/Predictions:0', - img_size=299) - - def test_mobilenet_v1_100_224(self): - url = 'https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'mobilenet_v1_1.0_224/frozen_graph.pb') - - mlmodel_path = os.path.join( - TMP_MODEL_DIR, 'mobilenet_v1_1.0_224.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['MobilenetV1/Predictions/Softmax:0'], - input_name_shape_dict={'input:0': [1, 224, 224, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - self._test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='MobilenetV1/Predictions/Softmax:0', - img_size=224) - - def test_mobilenet_v2_100_224(self): - url = 'https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'mobilenet_v1_1.0_224/frozen_graph.pb') - - mlmodel_path = os.path.join( - TMP_MODEL_DIR, 'mobilenet_v1_1.0_224.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['MobilenetV1/Predictions/Softmax:0'], - input_name_shape_dict={'input:0': [1, 224, 224, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - self._test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='MobilenetV1/Predictions/Softmax:0', - img_size=224) - - def test_mobilenet_v1_75_192(self): - url = 'https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.75_192_frozen.tgz' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'mobilenet_v1_0.75_192/frozen_graph.pb') - - mlmodel_path = os.path.join( - TMP_MODEL_DIR, 'mobilenet_v1_0.75_192.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['MobilenetV1/Predictions/Softmax:0'], - input_name_shape_dict={'input:0': [1, 192, 192, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - self._test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='MobilenetV1/Predictions/Softmax:0', - img_size=192) - - def test_mobilenet_v1_50_160(self): - url = 'https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_0.50_160_frozen.tgz' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'mobilenet_v1_0.50_160/frozen_graph.pb') - - mlmodel_path = os.path.join( - TMP_MODEL_DIR, 'mobilenet_v1_0.50_160.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['MobilenetV1/Predictions/Softmax:0'], - input_name_shape_dict={'input:0': [1, 160, 160, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - self._test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='MobilenetV1/Predictions/Softmax:0', - img_size=160) - - # @unittest.skip("Failing GPU backend: related to https://github.com/tf-coreml/tf-coreml/issues/26") - def test_style_transfer(self): - url = 'https://storage.googleapis.com/download.tensorflow.org/models/stylize_v1.zip' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join(TMP_MODEL_DIR, 'stylize_quantized.pb') - mlmodel_path = os.path.join(TMP_MODEL_DIR, 'stylize_quantized.mlmodel') - # ? style transfer image size and style number? - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['Squeeze:0'], - input_name_shape_dict={'input:0': [1, 256, 256, 3], 'style_num:0': [26]}) - - # Test predictions on an image - input_tensors = [('input:0', [1, 256, 256, 3]), - ('style_num:0', [26])] - - self.err_thresh = 0.5 - self._test_tf_model( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensors=input_tensors, - output_tensor_names=['Squeeze:0'], - data_modes=['image', 'onehot_0'], - delta=1e-2, - use_cpu_only=True, - scale=1, - bias=0, - img_size=256, - sequence_inputs={'style_num:0'}) - - -def _test_coreml_model_image_input(tf_model_path, coreml_model, - input_tensor_name, output_tensor_name, img_size, useCPUOnly=False): - """Test single image input conversions. - tf_model_path - the TF model - coreml_model - converted CoreML model - input_tensor_name - the input image tensor name - output_tensor_name - the output tensor name - img_size - size of the image - """ - - img_np, img = _load_image(TEST_IMAGE, resize_to=(img_size, img_size)) - img_tf = np.expand_dims(img_np, axis=0) - img_tf[:, :, :, 0] = 2.0/255 * img_tf[:, :, :, 0] - 1 - img_tf[:, :, :, 1] = 2.0/255 * img_tf[:, :, :, 1] - 1 - img_tf[:, :, :, 2] = 2.0/255 * img_tf[:, :, :, 2] - 1 - - # evaluate the TF model - tf.reset_default_graph() - graph_def = graph_pb2.GraphDef() - with open(tf_model_path, "rb") as f: - graph_def.ParseFromString(f.read()) - g = tf.import_graph_def(graph_def) - with tf.Session(graph=g) as sess: - image_input_tensor = sess.graph.get_tensor_by_name( - 'import/' + input_tensor_name) - output = sess.graph.get_tensor_by_name('import/' + output_tensor_name) - tf_out = sess.run(output, feed_dict={image_input_tensor: img_tf}) - if len(tf_out.shape) == 4: - tf_out = np.transpose(tf_out, (0, 3, 1, 2)) - tf_out_flatten = tf_out.flatten() - - # evaluate CoreML - coreml_input_name = input_tensor_name.replace(':', '__').replace('/', '__') - coreml_output_name = output_tensor_name.replace( - ':', '__').replace('/', '__') - coreml_input = {coreml_input_name: img} - - # Test the default CoreML evaluation - coreml_out = coreml_model.predict(coreml_input, useCPUOnly=useCPUOnly)[ - coreml_output_name] - coreml_out_flatten = coreml_out.flatten() - print(coreml_out_flatten) - # compare_tf_coreml_outputs(tf_out_flatten, coreml_out_flatten) - - -if __name__ == '__main__': - # #Download model - # url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz' - # tf_model_dir = _download_file(url = url) - # tf_model_path = os.path.join(TMP_MODEL_DIR, 'inception_v3_2016_08_28_frozen.pb') - - # #Convert to coreml - # mlmodel_path = os.path.join(TMP_MODEL_DIR, 'inception_v3_2016_08_28.mlmodel') - # mlmodel = tf_converter.convert( - # tf_model_path = tf_model_path, - # mlmodel_path = mlmodel_path, - # output_feature_names = ['InceptionV3/Predictions/Softmax:0'], - # input_name_shape_dict = {'input:0':[1,299,299,3]}, - # image_input_names = ['input:0'], - # red_bias = -1, - # green_bias = -1, - # blue_bias = -1, - # image_scale = 2.0/255.0) - - # #Test predictions on an image - # _test_coreml_model_image_input( - # tf_model_path = tf_model_path, - # coreml_model = mlmodel, - # input_tensor_name = 'input:0', - # output_tensor_name = 'InceptionV3/Predictions/Softmax:0', - # img_size = 299) - - # Download model - url = 'https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip' - tf_model_dir = _download_file(url=url) - tf_model_path = os.path.join( - TMP_MODEL_DIR, 'tensorflow_inception_graph.pb') - - # Convert to coreml - mlmodel_path = os.path.join(TMP_MODEL_DIR, 'googlenet_v1_nonslim.mlmodel') - mlmodel = tf_converter.convert( - tf_model_path=tf_model_path, - mlmodel_path=mlmodel_path, - output_feature_names=['softmax2:0'], - input_name_shape_dict={'input:0': [1, 224, 224, 3]}, - image_input_names=['input:0'], - red_bias=-1, - green_bias=-1, - blue_bias=-1, - image_scale=2.0/255.0) - - # Test predictions on an image - _test_coreml_model_image_input( - tf_model_path=tf_model_path, - coreml_model=mlmodel, - input_tensor_name='input:0', - output_tensor_name='softmax2:0', - img_size=224) - - print("convert ok!") -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from ctypes import * -import math -import random - - -def sample(probs): - s = sum(probs) - probs = [a/s for a in probs] - r = random.uniform(0, 1) - for i in range(len(probs)): - r = r - probs[i] - if r <= 0: - return i - return len(probs)-1 - - -def c_array(ctype, values): - arr = (ctype*len(values))() - arr[:] = values - return arr - - -class BOX(Structure): - _fields_ = [("x", c_float), - ("y", c_float), - ("w", c_float), - ("h", c_float)] - - -class DETECTION(Structure): - _fields_ = [("bbox", BOX), - ("classes", c_int), - ("prob", POINTER(c_float)), - ("mask", POINTER(c_float)), - ("objectness", c_float), - ("sort_class", c_int)] - - -class IMAGE(Structure): - _fields_ = [("w", c_int), - ("h", c_int), - ("c", c_int), - ("data", POINTER(c_float))] - - -class METADATA(Structure): - _fields_ = [("classes", c_int), - ("names", POINTER(c_char_p))] - - -lib = CDLL("./mmdnn/conversion/examples/darknet/libdarknet.so", RTLD_GLOBAL) -lib.network_width.argtypes = [c_void_p] -lib.network_width.restype = c_int -lib.network_height.argtypes = [c_void_p] -lib.network_height.restype = c_int - -predict = lib.network_predict -predict.argtypes = [c_void_p, POINTER(c_float)] -predict.restype = POINTER(c_float) - -set_gpu = lib.cuda_set_device -set_gpu.argtypes = [c_int] - -make_image = lib.make_image -make_image.argtypes = [c_int, c_int, c_int] -make_image.restype = IMAGE - -get_network_boxes = lib.get_network_boxes -get_network_boxes.argtypes = [c_void_p, c_int, c_int, - c_float, c_float, POINTER(c_int), c_int, POINTER(c_int)] -get_network_boxes.restype = POINTER(DETECTION) - -make_network_boxes = lib.make_network_boxes -make_network_boxes.argtypes = [c_void_p] -make_network_boxes.restype = POINTER(DETECTION) - -free_detections = lib.free_detections -free_detections.argtypes = [POINTER(DETECTION), c_int] - -free_ptrs = lib.free_ptrs -free_ptrs.argtypes = [POINTER(c_void_p), c_int] - -network_predict = lib.network_predict -network_predict.argtypes = [c_void_p, POINTER(c_float)] - -reset_rnn = lib.reset_rnn -reset_rnn.argtypes = [c_void_p] - -load_net = lib.load_network -load_net.argtypes = [c_char_p, c_char_p, c_int] -load_net.restype = c_void_p - -do_nms_obj = lib.do_nms_obj -do_nms_obj.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] - -do_nms_sort = lib.do_nms_sort -do_nms_sort.argtypes = [POINTER(DETECTION), c_int, c_int, c_float] - -free_image = lib.free_image -free_image.argtypes = [IMAGE] - -letterbox_image = lib.letterbox_image -letterbox_image.argtypes = [IMAGE, c_int, c_int] -letterbox_image.restype = IMAGE - -load_meta = lib.get_metadata -lib.get_metadata.argtypes = [c_char_p] -lib.get_metadata.restype = METADATA - -load_image = lib.load_image_color -load_image.argtypes = [c_char_p, c_int, c_int] -load_image.restype = IMAGE - -rgbgr_image = lib.rgbgr_image -rgbgr_image.argtypes = [IMAGE] - -predict_image = lib.network_predict_image -predict_image.argtypes = [c_void_p, IMAGE] -predict_image.restype = POINTER(c_float) - - -def classify(net, meta, im): - out = predict_image(net, im) - res = [] - r = [] - for i in range(meta.classes): - # print(i) - r.append(out[i]) - res.append((meta.names[i], out[i])) - res = sorted(res, key=lambda x: -x[1]) - return res, r - - -def detect(net, meta, image, thresh=.5, hier_thresh=.5, nms=.45): - im = load_image(image, 0, 0) - num = c_int(0) - pnum = pointer(num) - predict_image(net, im) - dets = get_network_boxes(net, im.w, im.h, thresh, - hier_thresh, None, 0, pnum) - num = pnum[0] - if (nms): - do_nms_obj(dets, num, meta.classes, nms) - - res = [] - for j in range(num): - for i in range(meta.classes): - if dets[j].prob[i] > 0: - b = dets[j].bbox - res.append( - (meta.names[i], dets[j].prob[i], (b.x, b.y, b.w, b.h))) - res = sorted(res, key=lambda x: -x[1]) - free_image(im) - free_detections(dets, num) - return res -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import print_function -import os -from mmdnn.conversion.examples.darknet import darknet as cdarknet -from mmdnn.conversion.examples.imagenet_test import TestKit -from mmdnn.conversion.examples.extractor import base_extractor -from mmdnn.conversion.common.utils import download_file - - -class darknet_extractor(base_extractor): - - _base_model_url = "https://raw.githubusercontent.com/pjreddie/darknet/master/" - - architecture_map = { - 'yolov3': { - 'config': _base_model_url + "cfg/yolov3.cfg", - 'weights': "https://pjreddie.com/media/files/yolov3.weights" - }, - - 'yolov2': { - 'config': _base_model_url + "cfg/yolov2.cfg", - 'weights': "https://pjreddie.com/media/files/yolov2.weights" - } - - } - - @classmethod - def download(cls, architecture, path='./'): - - if cls.sanity_check(architecture): - cfg_name = architecture + ".cfg" - architecture_file = download_file( - cls.architecture_map[architecture]['config'], directory=path, local_fname=cfg_name) - if not architecture_file: - return None - - weight_name = architecture + ".weights" - weight_file = download_file( - cls.architecture_map[architecture]['weights'], directory=path, local_fname=weight_name) - if not weight_file: - return None - - print("Darknet Model {} saved as [{}] and [{}].".format( - architecture, architecture_file, weight_file)) - return (architecture_file, weight_file) - - else: - return None - - @classmethod - def inference(cls, architecture, files, model_path, image_path): - import numpy as np - - if cls.sanity_check(architecture): - download_file(cls._base_model_url + - "cfg/coco.data", directory='./') - download_file(cls._base_model_url + - "data/coco.names", directory='./data/') - - print(files) - net = cdarknet.load_net(files[0].encode(), files[1].encode(), 0) - meta = cdarknet.load_meta("coco.data".encode()) - - r = cdarknet.detect(net, meta, image_path.encode()) - # print(r) - return r - - else: - return None - - -# d = darknet_extractor() -# model_filename = d.download('yolov3') -# print(model_filename) - -# image_path = "./mmdnn/conversion/examples/data/dog.jpg" -# model_path = "./" -# d = darknet_extractor() -# result = d.inference('yolov3', model_filename, model_path, image_path = image_path) -# print(result) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -from six import text_type as _text_type -import keras -from mmdnn.conversion.examples.imagenet_test import TestKit - -networks_map = { - 'inception_v3': lambda: keras.applications.inception_v3.InceptionV3(input_shape=(299, 299, 3)), - 'vgg16': lambda: keras.applications.vgg16.VGG16(), - 'vgg19': lambda: keras.applications.vgg19.VGG19(), - 'resnet': lambda: keras.applications.resnet50.ResNet50(), - 'mobilenet': lambda: keras.applications.mobilenet.MobileNet(), - 'xception': lambda: keras.applications.xception.Xception(input_shape=(299, 299, 3)), - 'inception_resnet': lambda: keras.applications.inception_resnet_v2.InceptionResNetV2() -} - -image_size = { - 'inception_v3': 299, - 'vgg16': 224, - 'vgg19': 224, - 'resnet': 224, - 'mobilenet': 224, - 'xception': 299, - 'inception_resnet': 299 -} - - -def _main(): - parser = argparse.ArgumentParser() - - parser.add_argument('-n', '--network', - type=_text_type, help='Model Type', required=True, - choices=networks_map.keys()) - - parser.add_argument('-i', '--image', - type=_text_type, help='Test Image Path') - - args = parser.parse_args() - - model = networks_map.get(args.network) - if model is None: - raise NotImplementedError( - "Unknown keras application [{}]".format(args.network)) - - model = model() - # save network structure as JSON - json_string = model.to_json() - with open("imagenet_{}.json".format(args.network), "w") as of: - of.write(json_string) - - print("Network structure is saved as [imagenet_{}.json].".format( - args.network)) - - model.save_weights('imagenet_{}.h5'.format(args.network)) - - print("Network weights are saved as [imagenet_{}.h5].".format( - args.network)) - - if args.image: - import numpy as np - func = TestKit.preprocess_func['keras'][args.network] - img = func(args.image) - img = np.expand_dims(img, axis=0) - predict = model.predict(img) - predict = np.squeeze(predict) - top_indices = predict.argsort()[-5:][::-1] - result = [(i, predict[i]) for i in top_indices] - print(result) - - # layer_name = 'block2_pool' - # intermediate_layer_model = keras.Model(inputs=model.input, - # outputs=model.get_layer(layer_name).output) - # intermediate_output = intermediate_layer_model.predict(img) - # print (intermediate_output) - # print (intermediate_output.shape) - # print ("%.30f" % np.sum(intermediate_output)) - - -if __name__ == '__main__': - _main() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import -import os -import keras -from keras import backend as K -from mmdnn.conversion.examples.imagenet_test import TestKit -from mmdnn.conversion.examples.extractor import base_extractor -from mmdnn.conversion.common.utils import download_file - - -class keras_extractor(base_extractor): - - MMDNN_BASE_URL = 'http://mmdnn.eastasia.cloudapp.azure.com:89/models/' - - architecture_map = { - 'inception_v3': lambda: keras.applications.inception_v3.InceptionV3(input_shape=(299, 299, 3)), - 'vgg16': lambda: keras.applications.vgg16.VGG16(), - 'vgg19': lambda: keras.applications.vgg19.VGG19(), - 'resnet50': lambda: keras.applications.resnet50.ResNet50(), - 'mobilenet': lambda: keras.applications.mobilenet.MobileNet(), - 'xception': lambda: keras.applications.xception.Xception(input_shape=(299, 299, 3)), - 'inception_resnet_v2': lambda: keras.applications.inception_resnet_v2.InceptionResNetV2(input_shape=(299, 299, 3)), - 'densenet': lambda: keras.applications.densenet.DenseNet201(), - 'nasnet': lambda: keras.applications.nasnet.NASNetLarge() - } - - thirdparty_map = { - 'yolo2': MMDNN_BASE_URL + 'keras/yolo2.h5', - } - - image_size = { - 'inception_v3': 299, - 'vgg16': 224, - 'vgg19': 224, - 'resnet': 224, - 'mobilenet': 224, - 'xception': 299, - 'inception_resnet': 299, - 'densenet': 224, - 'nasnet': 331, - } - - @classmethod - def help(cls): - print('Support frameworks: {}'.format(set().union( - cls.architecture_map.keys(), cls.thirdparty_map.keys()))) - - @classmethod - def download(cls, architecture, path="./"): - if architecture in cls.thirdparty_map: - weight_file = download_file( - cls.thirdparty_map[architecture], directory=path) - return weight_file - - elif cls.sanity_check(architecture): - output_filename = path + 'imagenet_{}.h5'.format(architecture) - if os.path.exists(output_filename) == False: - model = cls.architecture_map[architecture]() - model.save(output_filename) - print("Keras model {} is saved in [{}]".format( - architecture, output_filename)) - K.clear_session() - del model - return output_filename - - else: - print("File [{}] existed, skip download.".format( - output_filename)) - return output_filename - - else: - return None - - @classmethod - def inference(cls, architecture, files, path, image_path): - if architecture in cls.thirdparty_map: - model = keras.models.load_model(files) - - elif cls.sanity_check(architecture): - model = cls.architecture_map[architecture]() - - else: - model = None - - if model: - import numpy as np - func = TestKit.preprocess_func['keras'][architecture] - img = func(image_path) - img = np.expand_dims(img, axis=0) - predict = model.predict(img) - predict = np.squeeze(predict) - K.clear_session() - del model - return predict - - else: - return None -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import numpy as np -import sys -import os -from mmdnn.conversion.examples.imagenet_test import TestKit - -import colorsys -from keras import backend as K -from PIL import Image, ImageFont, ImageDraw -from mmdnn.conversion.examples.keras.utils import yolo_eval - - -class TestKeras(TestKit): - - def __init__(self): - - # self.anchors = np.array([[10,13], [16,30],[33,23],[30,61],[62,45], [59,119],[116,90],[156,198],[373,326]]) - self.class_names = ['person', 'bicycle', 'car', 'motorbike', 'aeroplane', 'bus', 'train', 'truck', 'boat', - 'traffic light', 'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow', - 'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee', 'skis', 'snowboard', - 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup', - 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple', 'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', - 'cake', 'chair', 'sofa', 'pottedplant', 'bed', 'diningtable', 'toilet', 'tvmonitor', 'laptop', 'mouse', 'remote', 'keyboard', - 'cell phone', 'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear', 'hair drier', 'toothbrush'] - super(TestKeras, self).__init__() - self.model = self.MainModel.KitModel(self.args.w) - - def preprocess(self, image_path): - x = super(TestKeras, self).preprocess(image_path) - self.data = np.expand_dims(x, 0) - - def print_result(self): - predict = self.model.predict(self.data) - super(TestKeras, self).print_result(predict) - - def generate(self): - self.input_image_shape = K.placeholder(shape=(2, )) - output = self.model.output - output.sort(key=lambda x: int(x.shape[1])) - # print(output) - - boxes, scores, classes = yolo_eval(output, self.anchors, - len(self.class_names), self.input_image_shape, - score_threshold=self.score_threshold, iou_threshold=self.iou_threshold) - return boxes, scores, classes - - def yolo_result(self, path): - image = Image.fromarray(np.uint8(np.squeeze(self.data))) - - self.sess = K.get_session() - self.boxes, self.scores, self.classes = self.generate() - out_boxes, out_scores, out_classes = self.sess.run( - [self.boxes, self.scores, self.classes], - feed_dict={ - self.model.input: self.data/255., - self.input_image_shape: [608, 608], - K.learning_phase(): 0 - }) - # print(out_boxes, out_scores, out_classes) - print('Found {} boxes for {}'.format(len(out_boxes), 'img')) - - thickness = (image.size[0] + image.size[1]) // 300 - - for i, c in reversed(list(enumerate(out_classes))): - predicted_class = self.class_names[c] - box = out_boxes[i] - score = out_scores[i] - - label = '{} {:.2f}'.format(predicted_class, score) - draw = ImageDraw.Draw(image) - label_size = draw.textsize(label) - - top, left, bottom, right = box - top = max(0, np.floor(top + 0.5).astype('int32')) - left = max(0, np.floor(left + 0.5).astype('int32')) - bottom = min(image.size[1], np.floor(bottom + 0.5).astype('int32')) - right = min(image.size[0], np.floor(right + 0.5).astype('int32')) - print(label, (left, top), (right, bottom)) - - if top - label_size[1] >= 0: - text_origin = np.array([left, top - label_size[1]]) - else: - text_origin = np.array([left, top + 1]) - - # get random colors - self.colors = [] - C = list(np.random.random_integers( - 255, size=(len(self.class_names), 3))) - for i in C: - self.colors.append(tuple(i)) - - # My kingdom for a good redistributable image drawing library. - for i in range(thickness): - draw.rectangle( - [left + i, top + i, right - i, bottom - i], - outline=self.colors[c]) - draw.rectangle( - [tuple(text_origin), tuple(text_origin + label_size)], - fill=self.colors[c]) - draw.text(tuple(text_origin), label, fill=(0, 0, 0)) - del draw - image.save("{}.jpg".format(path), "JPEG") - - def print_intermediate_result(self, layer_name, if_transpose=False): - from keras.models import Model - intermediate_layer_model = Model(inputs=self.model.input, - outputs=self.model.get_layer(layer_name).output) - intermediate_output = intermediate_layer_model.predict(self.data) - super(TestKeras, self).print_intermediate_result( - intermediate_output, if_transpose) - - def inference(self, image_path): - self.preprocess(image_path) - - print(self.data.shape) - # self.print_intermediate_result('conv1_7x7_s2_1', True) - - self.print_result() - - self.test_truth() - - def dump(self, path=None): - if path is None: - path = self.args.dump - - self.model.save(path) - print('Keras model file is saved as [{}], generated by [{}.py] and [{}].'.format( - path, self.args.n, self.args.w)) - - def detect(self, image_path, path=None): - self.yolo_parameter = self.MainModel.yolo_parameter() - # yolov3 80 classes - assert self.yolo_parameter[1] == 80 - self.anchors = [] - for i in range(len(self.yolo_parameter[0])): - if i % 2: - tmp = [self.yolo_parameter[0][i-1], self.yolo_parameter[0][i]] - self.anchors.append(tmp) - self.anchors = np.array(self.anchors) - self.score_threshold = self.yolo_parameter[2] - self.iou_threshold = self.yolo_parameter[3] - - self.preprocess(image_path) - - self.yolo_result(path) - - print('Keras yolo model result file is saved as [{}.jpg], generated by [{}.py] and [{}].'.format( - path, self.args.n, self.args.w)) - - -if __name__ == '__main__': - tester = TestKeras() - if tester.args.dump: - tester.dump() - elif tester.args.detect: - tester.detect(tester.args.image, tester.args.detect) - else: - tester.inference(tester.args.image) -import tensorflow as tf -from keras import backend as K -from PIL import Image - - -def letterbox_image(image, size): - '''resize image with unchanged aspect ratio using padding''' - image_w, image_h = image.size - w, h = size - new_w = int(image_w * min(w/image_w, h/image_h)) - new_h = int(image_h * min(w/image_w, h/image_h)) - resized_image = image.resize((new_w, new_h), Image.BICUBIC) - - boxed_image = Image.new('RGB', size, (128, 128, 128)) - boxed_image.paste(resized_image, ((w-new_w)//2, (h-new_h)//2)) - return boxed_image - - -def yolo_head(feats, anchors, num_classes, input_shape): - """Convert final layer features to bounding box parameters.""" - num_anchors = len(anchors) - # Reshape to batch, height, width, num_anchors, box_params. - anchors_tensor = K.reshape(K.constant(anchors), [1, 1, 1, num_anchors, 2]) - - conv_dims = K.shape(feats)[1:3] - conv_height_index = K.arange(0, stop=conv_dims[1]) - conv_width_index = K.arange(0, stop=conv_dims[0]) - conv_height_index = K.tile(conv_height_index, [conv_dims[0]]) - - conv_width_index = K.tile( - K.expand_dims(conv_width_index, 0), [conv_dims[1], 1]) - conv_width_index = K.flatten(K.transpose(conv_width_index)) - conv_index = K.transpose(K.stack([conv_height_index, conv_width_index])) - conv_index = K.reshape(conv_index, [1, conv_dims[0], conv_dims[1], 1, 2]) - conv_index = K.cast(conv_index, K.dtype(feats)) - - feats = K.reshape( - feats, [-1, conv_dims[0], conv_dims[1], num_anchors, num_classes + 5]) - conv_dims = K.cast(conv_dims[::-1], K.dtype(feats)) - - box_xy = K.sigmoid(feats[..., :2]) - box_wh = K.exp(feats[..., 2:4]) - box_confidence = K.sigmoid(feats[..., 4:5]) - box_class_probs = K.sigmoid(feats[..., 5:]) - - # Adjust preditions to each spatial grid point and anchor size. - # Note: YOLO iterates over height index before width index. - # TODO: It works with +1, don't know why. - box_xy = (box_xy + conv_index + 1) / conv_dims - box_wh = box_wh * anchors_tensor / \ - K.cast(input_shape[::-1], K.dtype(box_wh)) - - return box_xy, box_wh, box_confidence, box_class_probs - - -def yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape): - '''Get corrected boxes''' - box_yx = box_xy[..., ::-1] - box_hw = box_wh[..., ::-1] - input_shape = K.cast(input_shape, K.dtype(box_yx)) - image_shape = K.cast(image_shape, K.dtype(box_yx)) - new_shape = K.round(image_shape * K.min(input_shape/image_shape)) - offset = (input_shape-new_shape)/2./input_shape - scale = input_shape/new_shape - box_yx = (box_yx - offset) * scale - box_hw *= scale - - box_mins = box_yx - (box_hw / 2.) - box_maxes = box_yx + (box_hw / 2.) - boxes = K.concatenate([ - box_mins[..., 0:1], # y_min - box_mins[..., 1:2], # x_min - box_maxes[..., 0:1], # y_max - box_maxes[..., 1:2] # x_max - ]) - - # Scale boxes back to original image shape. - boxes *= K.concatenate([image_shape, image_shape]) - return boxes - - -def yolo_boxes_and_scores(feats, anchors, num_classes, input_shape, image_shape): - '''Process Conv layer output''' - # print("feats,anchors, num_classes, input_shape", feats, anchors, num_classes, input_shape) - box_xy, box_wh, box_confidence, box_class_probs = yolo_head(feats, - anchors, num_classes, input_shape) - # print(box_xy, box_wh, box_confidence, box_class_probs) - boxes = yolo_correct_boxes(box_xy, box_wh, input_shape, image_shape) - boxes = K.reshape(boxes, [-1, 4]) - box_scores = box_confidence * box_class_probs - box_scores = K.reshape(box_scores, [-1, num_classes]) - return boxes, box_scores - - -def yolo_eval(yolo_outputs, - anchors, - num_classes, - image_shape, - max_boxes=20, - score_threshold=.6, - iou_threshold=.5): - """Evaluate YOLO model on given input and return filtered boxes.""" - # yolo_outputs order 13,26,52 - - input_shape = K.shape(yolo_outputs[0])[1:3] * 32 - - for i in range(0, 3): - _boxes, _box_scores = yolo_boxes_and_scores(yolo_outputs[i], - anchors[6-3*i:9-3*i], num_classes, input_shape, image_shape) - if i == 0: - boxes, box_scores = _boxes, _box_scores - else: - boxes = K.concatenate([boxes, _boxes], axis=0) - box_scores = K.concatenate([box_scores, _box_scores], axis=0) - - mask = box_scores >= score_threshold - max_boxes_tensor = K.constant(max_boxes, dtype='int32') - for i in range(num_classes): - # TODO: use keras backend instead of tf. - class_boxes = tf.boolean_mask(boxes, mask[:, i]) - class_box_scores = tf.boolean_mask(box_scores[:, i], mask[:, i]) - nms_index = tf.image.non_max_suppression( - class_boxes, class_box_scores, max_boxes_tensor, iou_threshold=iou_threshold) - class_boxes = K.gather(class_boxes, nms_index) - class_box_scores = K.gather(class_box_scores, nms_index) - classes = K.ones_like(class_box_scores, 'int32') * i - if i == 0: - boxes_, scores_, classes_ = class_boxes, class_box_scores, classes - else: - boxes_ = K.concatenate([boxes_, class_boxes], axis=0) - scores_ = K.concatenate([scores_, class_box_scores], axis=0) - classes_ = K.concatenate([classes_, classes], axis=0) - return boxes_, scores_, classes_ -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -from six import text_type as _text_type -import mxnet as mx -from mmdnn.conversion.examples.imagenet_test import TestKit -from mmdnn.conversion.common.utils import download_file -from collections import namedtuple -Batch = namedtuple('Batch', ['data']) - -network_name_key = ['resnet', 'vgg19', 'squeezenet', 'inception-bn', 'resnext'] - -_base_model_url = 'http://data.mxnet.io/models/' -_default_model_info = { - 'imagenet1k-inception-bn': {'symbol': _base_model_url+'imagenet/inception-bn/Inception-BN-symbol.json', - 'params': _base_model_url+'imagenet/inception-bn/Inception-BN-0126.params', - 'image_size': 224}, - 'imagenet1k-resnet-18': {'symbol': _base_model_url+'imagenet/resnet/18-layers/resnet-18-symbol.json', - 'params': _base_model_url+'imagenet/resnet/18-layers/resnet-18-0000.params', - 'image_size': 224}, - 'imagenet1k-resnet-34': {'symbol': _base_model_url+'imagenet/resnet/34-layers/resnet-34-symbol.json', - 'params': _base_model_url+'imagenet/resnet/34-layers/resnet-34-0000.params', - 'image_size': 224}, - 'imagenet1k-resnet-50': {'symbol': _base_model_url+'imagenet/resnet/50-layers/resnet-50-symbol.json', - 'params': _base_model_url+'imagenet/resnet/50-layers/resnet-50-0000.params', - 'image_size': 224}, - 'imagenet1k-resnet-101': {'symbol': _base_model_url+'imagenet/resnet/101-layers/resnet-101-symbol.json', - 'params': _base_model_url+'imagenet/resnet/101-layers/resnet-101-0000.params', - 'image_size': 224}, - 'imagenet1k-resnet-152': {'symbol': _base_model_url+'imagenet/resnet/152-layers/resnet-152-symbol.json', - 'params': _base_model_url+'imagenet/resnet/152-layers/resnet-152-0000.params', - 'image_size': 224}, - 'imagenet1k-resnext-50': {'symbol': _base_model_url+'imagenet/resnext/50-layers/resnext-50-symbol.json', - 'params': _base_model_url+'imagenet/resnext/50-layers/resnext-50-0000.params', - 'image_size': 224}, - 'imagenet1k-resnext-101': {'symbol': _base_model_url+'imagenet/resnext/101-layers/resnext-101-symbol.json', - 'params': _base_model_url+'imagenet/resnext/101-layers/resnext-101-0000.params', - 'image_size': 224}, - 'imagenet1k-resnext-101-64x4d': {'symbol': _base_model_url+'imagenet/resnext/101-layers/resnext-101-64x4d-symbol.json', - 'params': _base_model_url+'imagenet/resnext/101-layers/resnext-101-64x4d-0000.params', - 'image_size': 224}, - 'imagenet11k-resnet-152': {'symbol': _base_model_url+'imagenet-11k/resnet-152/resnet-152-symbol.json', - 'params': _base_model_url+'imagenet-11k/resnet-152/resnet-152-0000.params', - 'image_size': 224}, - 'imagenet11k-place365ch-resnet-152': {'symbol': _base_model_url+'imagenet-11k-place365-ch/resnet-152-symbol.json', - 'params': _base_model_url+'imagenet-11k-place365-ch/resnet-152-0000.params', - 'image_size': 224}, - 'imagenet11k-place365ch-resnet-50': {'symbol': _base_model_url+'imagenet-11k-place365-ch/resnet-50-symbol.json', - 'params': _base_model_url+'imagenet-11k-place365-ch/resnet-50-0000.params', - 'image_size': 224}, - 'vgg19': {'symbol': _base_model_url+'imagenet/vgg/vgg19-symbol.json', - 'params': _base_model_url+'imagenet/vgg/vgg19-0000.params', - 'image_size': 224}, - 'vgg16': {'symbol': _base_model_url+'imagenet/vgg/vgg16-symbol.json', - 'params': _base_model_url+'imagenet/vgg/vgg16-0000.params', - 'image_size': 224}, - 'squeezenet_v1.0': {'symbol': _base_model_url+'imagenet/squeezenet/squeezenet_v1.0-symbol.json', - 'params': _base_model_url+'imagenet/squeezenet/squeezenet_v1.0-0000.params', - 'image_size': 224}, - 'squeezenet_v1.1': {'symbol': _base_model_url+'imagenet/squeezenet/squeezenet_v1.1-symbol.json', - 'params': _base_model_url+'imagenet/squeezenet/squeezenet_v1.1-0000.params', - 'image_size': 224} -} - - -def _search_preprocess_key(original_network_name): - import re - for key in network_name_key: - if re.search(key, original_network_name): - return key - raise ValueError( - 'preprocess module cannot support [{}]'.format(original_network_name)) - - -def _main(): - parser = argparse.ArgumentParser() - - parser.add_argument('-n', '--network', type=_text_type, help='Model Type', required=True, - choices=_default_model_info.keys()) - - parser.add_argument('-i', '--image', default=None, - type=_text_type, help='Test Image Path') - - parser.add_argument('-o', '--output_dir', default='./', - type=_text_type, help='Tensorflow Checkpoint file name') - - args = parser.parse_args() - - if not download_file(_default_model_info[args.network]['symbol'], directory=args.output_dir): - return -1 - - if not download_file(_default_model_info[args.network]['params'], directory=args.output_dir): - return -1 - - print("Model {} saved.".format(args.network)) - - file_name = _default_model_info[args.network]['params'].split('/')[-1] - prefix, epoch_num = file_name[:-7].rsplit('-', 1) - - sym, arg_params, aux_params = mx.model.load_checkpoint( - args.output_dir + prefix, int(epoch_num)) - model = mx.mod.Module(symbol=sym) - model.bind(for_training=False, - data_shapes=[('data', (1, 3, _default_model_info[args.network]['image_size'], - _default_model_info[args.network]['image_size']))]) - model.set_params(arg_params, aux_params, - allow_missing=True, allow_extra=True) - - if args.image: - import numpy as np - - # need to be updated - network = _search_preprocess_key(args.network) - - func = TestKit.preprocess_func['mxnet'][network] - img = func(args.image) - img = np.swapaxes(img, 0, 2) - img = np.swapaxes(img, 1, 2) - img = np.expand_dims(img, axis=0) - - model.forward(Batch([mx.nd.array(img)])) - predict = model.get_outputs()[0].asnumpy() - predict = np.squeeze(predict) - top_indices = predict.argsort()[-5:][::-1] - result = [(i, predict[i]) for i in top_indices] - print(result) - - return 0 - - -if __name__ == '__main__': - _main() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import -from mmdnn.conversion.examples.imagenet_test import TestKit -from mmdnn.conversion.examples.extractor import base_extractor -from mmdnn.conversion.common.utils import download_file - - -class mxnet_extractor(base_extractor): - - _base_model_url = 'http://data.mxnet.io/models/' - - _image_size = 224 - - from collections import namedtuple - Batch = namedtuple('Batch', ['data']) - - architecture_map = { - 'imagenet1k-inception-bn': {'symbol': _base_model_url+'imagenet/inception-bn/Inception-BN-symbol.json', - 'params': _base_model_url+'imagenet/inception-bn/Inception-BN-0126.params'}, - 'imagenet1k-resnet-18': {'symbol': _base_model_url+'imagenet/resnet/18-layers/resnet-18-symbol.json', - 'params': _base_model_url+'imagenet/resnet/18-layers/resnet-18-0000.params'}, - 'imagenet1k-resnet-34': {'symbol': _base_model_url+'imagenet/resnet/34-layers/resnet-34-symbol.json', - 'params': _base_model_url+'imagenet/resnet/34-layers/resnet-34-0000.params'}, - 'imagenet1k-resnet-50': {'symbol': _base_model_url+'imagenet/resnet/50-layers/resnet-50-symbol.json', - 'params': _base_model_url+'imagenet/resnet/50-layers/resnet-50-0000.params'}, - 'imagenet1k-resnet-101': {'symbol': _base_model_url+'imagenet/resnet/101-layers/resnet-101-symbol.json', - 'params': _base_model_url+'imagenet/resnet/101-layers/resnet-101-0000.params'}, - 'imagenet1k-resnet-152': {'symbol': _base_model_url+'imagenet/resnet/152-layers/resnet-152-symbol.json', - 'params': _base_model_url+'imagenet/resnet/152-layers/resnet-152-0000.params'}, - 'imagenet1k-resnext-50': {'symbol': _base_model_url+'imagenet/resnext/50-layers/resnext-50-symbol.json', - 'params': _base_model_url+'imagenet/resnext/50-layers/resnext-50-0000.params'}, - 'imagenet1k-resnext-101': {'symbol': _base_model_url+'imagenet/resnext/101-layers/resnext-101-symbol.json', - 'params': _base_model_url+'imagenet/resnext/101-layers/resnext-101-0000.params'}, - 'imagenet1k-resnext-101-64x4d': {'symbol': _base_model_url+'imagenet/resnext/101-layers/resnext-101-64x4d-symbol.json', - 'params': _base_model_url+'imagenet/resnext/101-layers/resnext-101-64x4d-0000.params'}, - 'imagenet11k-resnet-152': {'symbol': _base_model_url+'imagenet-11k/resnet-152/resnet-152-symbol.json', - 'params': _base_model_url+'imagenet-11k/resnet-152/resnet-152-0000.params'}, - 'imagenet11k-place365ch-resnet-152': {'symbol': _base_model_url+'imagenet-11k-place365-ch/resnet-152-symbol.json', - 'params': _base_model_url+'imagenet-11k-place365-ch/resnet-152-0000.params'}, - 'imagenet11k-place365ch-resnet-50': {'symbol': _base_model_url+'imagenet-11k-place365-ch/resnet-50-symbol.json', - 'params': _base_model_url+'imagenet-11k-place365-ch/resnet-50-0000.params'}, - 'vgg19': {'symbol': _base_model_url+'imagenet/vgg/vgg19-symbol.json', - 'params': _base_model_url+'imagenet/vgg/vgg19-0000.params'}, - 'vgg16': {'symbol': _base_model_url+'imagenet/vgg/vgg16-symbol.json', - 'params': _base_model_url+'imagenet/vgg/vgg16-0000.params'}, - 'squeezenet_v1.0': {'symbol': _base_model_url+'imagenet/squeezenet/squeezenet_v1.0-symbol.json', - 'params': _base_model_url+'imagenet/squeezenet/squeezenet_v1.0-0000.params'}, - 'squeezenet_v1.1': {'symbol': _base_model_url+'imagenet/squeezenet/squeezenet_v1.1-symbol.json', - 'params': _base_model_url+'imagenet/squeezenet/squeezenet_v1.1-0000.params'} - } - - @classmethod - def download(cls, architecture, path="./"): - if cls.sanity_check(architecture): - architecture_file = download_file( - cls.architecture_map[architecture]['symbol'], directory=path) - if not architecture_file: - return None - - weight_file = download_file( - cls.architecture_map[architecture]['params'], directory=path) - if not weight_file: - return None - - print("MXNet Model {} saved as [{}] and [{}].".format( - architecture, architecture_file, weight_file)) - return (architecture_file, weight_file) - - else: - return None - - @classmethod - def inference(cls, architecture, files, path, image_path): - import mxnet as mx - import numpy as np - if cls.sanity_check(architecture): - file_name = cls.architecture_map[architecture]['params'].split( - '/')[-1] - prefix, epoch_num = file_name[:-7].rsplit('-', 1) - - sym, arg_params, aux_params = mx.model.load_checkpoint( - path + prefix, int(epoch_num)) - model = mx.mod.Module(symbol=sym) - model.bind(for_training=False, - data_shapes=[('data', (1, 3, cls._image_size, cls._image_size))]) - model.set_params(arg_params, aux_params, - allow_missing=True, allow_extra=True) - - func = TestKit.preprocess_func['mxnet'][architecture] - img = func(image_path) - img = np.transpose(img, [2, 0, 1]) - img = np.expand_dims(img, axis=0) - - model.forward(cls.Batch([mx.nd.array(img)])) - predict = model.get_outputs()[0].asnumpy() - predict = np.squeeze(predict) - - del model - return predict - - else: - return None -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from collections import namedtuple -import numpy as np -from mmdnn.conversion.examples.imagenet_test import TestKit -import mxnet as mx - -Batch = namedtuple('Batch', ['data']) - - -class TestMXNet(TestKit): - - def __init__(self): - super(TestMXNet, self).__init__() - - self.truth['tensorflow']['inception_v3'] = [ - (22, 9.6691055), (24, 4.3524752), (25, 3.5957956), (132, 3.5657482), (23, 3.3462858)] - self.truth['keras']['inception_v3'] = [(21, 0.93430501), (23, 0.0028834261), ( - 131, 0.0014781745), (24, 0.0014518937), (22, 0.0014435325)] - - self.model = self.MainModel.RefactorModel() - self.model = self.MainModel.deploy_weight(self.model, self.args.w) - - def preprocess(self, image_path): - self.data = super(TestMXNet, self).preprocess(image_path) - self.data = np.swapaxes(self.data, 0, 2) - self.data = np.swapaxes(self.data, 1, 2) - self.data = np.expand_dims(self.data, 0) - - def print_result(self): - self.model.forward(Batch([mx.nd.array(self.data)])) - prob = self.model.get_outputs()[0].asnumpy() - super(TestMXNet, self).print_result(prob) - - def inference(self, image_path): - self.preprocess(image_path) - - # self.print_intermediate_result('pooling0', False) - - self.print_result() - - self.test_truth() - - def print_intermediate_result(self, layer_name, if_transpose=False): - internals = self.model.symbol.get_internals() - intermediate_output = internals[layer_name + "_output"] - test_model = mx.mod.Module( - symbol=intermediate_output, context=mx.cpu(), data_names=['data']) - if self.args.preprocess == 'vgg19' or self.args.preprocess == 'inception_v1': - test_model.bind(for_training=False, data_shapes=[ - ('data', (1, 3, 224, 224))]) - elif 'resnet' in self.args.preprocess or self.args.preprocess == 'inception_v3': - test_model.bind(for_training=False, data_shapes=[ - ('data', (1, 3, 299, 299))]) - else: - assert False - - arg_params, aux_params = self.model.get_params() - - test_model.set_params( - arg_params=arg_params, aux_params=aux_params, allow_missing=True, allow_extra=True) - test_model.forward(Batch([mx.nd.array(self.data)])) - intermediate_output = test_model.get_outputs()[0].asnumpy() - - super(TestMXNet, self).print_intermediate_result( - intermediate_output, if_transpose) - - def dump(self, path=None): - if path is None: - path = self.args.dump - self.model.save_checkpoint(path, 0) - print('MXNet checkpoint file is saved with prefix [{}] and iteration 0, generated by [{}.py] and [{}].'.format( - path, self.args.n, self.args.w)) - - -if __name__ == '__main__': - tester = TestMXNet() - if tester.args.dump: - tester.dump() - else: - tester.inference(tester.args.image) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import numpy as np -import sys -import os -import tensorflow as tf -from onnx_tf.backend import prepare -from mmdnn.conversion.examples.imagenet_test import TestKit - - -class TestONNX(TestKit): - - def __init__(self): - super(TestONNX, self).__init__() - self.model = prepare(self.MainModel.KitModel(self.args.w)) - # self.input, self.model, self.testop = self.MainModel.KitModel(self.args.w) - - def preprocess(self, image_path): - x = super(TestONNX, self).preprocess(image_path) - self.data = np.expand_dims(x, 0) - - def print_result(self): - predict = self.model.run(self.data)[0] - super(TestONNX, self).print_result(predict) - - def print_intermediate_result(self, layer_name, if_transpose=False): - # testop = tf.get_default_graph().get_operation_by_name(layer_name) - testop = self.testop - with tf.Session() as sess: - init = tf.global_variables_initializer() - sess.run(init) - intermediate_output = sess.run( - testop, feed_dict={self.input: self.data}) - - super(TestONNX, self).print_intermediate_result( - intermediate_output, if_transpose) - - def inference(self, image_path): - self.preprocess(image_path) - - # self.print_intermediate_result('conv1_7x7_s2_1', True) - - self.print_result() - - self.test_truth() - - -if __name__ == '__main__': - tester = TestONNX() - if tester.args.dump: - tester.dump() - else: - tester.inference(tester.args.image) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import os -from six import text_type as _text_type -from mmdnn.conversion.common.utils import download_file -import paddle.v2 as paddle -import gzip -from paddle.trainer_config_helpers.config_parser_utils import \ - reset_parser - -BASE_MODEL_URL = 'http://cloud.dlnel.org/filepub/?uuid=' -# pylint: disable=line-too-long -MODEL_URL = { - 'resnet50': BASE_MODEL_URL + 'f63f237a-698e-4a22-9782-baf5bb183019', - 'resnet101': BASE_MODEL_URL + '3d5fb996-83d0-4745-8adc-13ee960fc55c', - 'vgg16': BASE_MODEL_URL + 'aa0e397e-474a-4cc1-bd8f-65a214039c2e', -} -# pylint: enable=line-too-long -IMG_SIZE = 224 -CLASS_DIMS = { - 'resnet50': 1000, - 'resnet101': 1000, - 'vgg16': 1001, # work at 1001, but fail at 1000 - 'alexnet': 1001, -} - - -def dump_v2_config(topology, save_path, binary=False): - import collections - - from paddle.trainer_config_helpers.layers import LayerOutput - from paddle.v2.layer import parse_network - from paddle.proto import TrainerConfig_pb2 - """ Dump the network topology to a specified file. - This function is only used to dump network defined by using PaddlePaddle V2 - API. - :param topology: The output layers in the entire network. - :type topology: LayerOutput|List|Tuple - :param save_path: The path to save the dump network topology. - :type save_path: str - :param binary: Whether to dump the serialized network topology. The default - value is false. - :type binary: bool. - """ - - if isinstance(topology, LayerOutput): - topology = [topology] - elif isinstance(topology, collections.Sequence): - for out_layer in topology: - assert isinstance(out_layer, LayerOutput), ( - "The type of each element in the parameter topology " - "should be LayerOutput.") - else: - raise RuntimeError("Error input type for parameter topology.") - - model_str = parse_network(topology) - with open(save_path, "w") as fout: - if binary: - fout.write(model_str.SerializeToString()) - else: - fout.write(str(model_str)) - - -def _main(): - parser = argparse.ArgumentParser() - - parser.add_argument('-n', '--network', type=_text_type, help='Model Type', required=True, - choices=MODEL_URL.keys()) - - parser.add_argument('-i', '--image', default=None, - type=_text_type, help='Test Image Path') - - parser.add_argument('-o', '--output_dir', default='./', - type=_text_type, help='Paddlepaddle parameters file name') - - args = parser.parse_args() - - fn = download_file( - MODEL_URL[args.network], local_fname=architecture + '.tar.gz', directory=args.output_dir) - if not fn: - return -1 - - # Use 3 * 331 * 331 or 3 * 299 * 299 for Inception-ResNet-v2. - DATA_DIM = 3 * IMG_SIZE * IMG_SIZE - CLASS_DIM = CLASS_DIMS[args.network] - - # refer to https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/tests/test_rnn_layer.py#L35 - reset_parser() - - # refer to https://github.com/PaddlePaddle/Paddle/issues/7403 - paddle.init(use_gpu=False, trainer_count=1) - - image = paddle.layer.data( - name="image", type=paddle.data_type.dense_vector(DATA_DIM)) - if 'resnet' in architecture: - from mmdnn.conversion.examples.paddle.models import resnet - depth = int(architecture.strip('resnet')) - out = resnet.resnet_imagenet(image, class_dim=CLASS_DIM, depth=depth) - elif architecture == 'vgg16': - from mmdnn.conversion.examples.paddle.models import vgg - out = vgg.vgg16(image, class_dim=CLASS_DIM) - else: - print("Not support for {} yet.", architecture) - return None - - dump_v2_config(out, args.output_dir + architecture + '.bin') - - print("Model {} is saved as {} and {}.".format( - args.network, args.output_dir + architecture + '.bin', fn)) - - if args.image: - - import numpy as np - from mmdnn.conversion.examples.imagenet_test import TestKit - func = TestKit.preprocess_func['paddle'][args.network] - img = func(args.image) - img = np.transpose(img, (2, 0, 1)) - test_data = [(img.flatten(),)] - - with gzip.open(parameters_file, 'r') as f: - parameters = paddle.parameters.Parameters.from_tar(f) - - predict = paddle.infer( - output_layer=out, parameters=parameters, input=test_data) - predict = np.squeeze(predict) - top_indices = predict.argsort()[-5:][::-1] - result = [(i, predict[i]) for i in top_indices] - print(result) - print(np.sum(result)) - - return 0 - - -if __name__ == '__main__': - _main() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import -from mmdnn.conversion.examples.imagenet_test import TestKit -from mmdnn.conversion.examples.extractor import base_extractor -from mmdnn.conversion.common.utils import download_file -import paddle.v2 as paddle -import gzip -from paddle.trainer_config_helpers.config_parser_utils import \ - reset_parser - - -class paddle_extractor(base_extractor): - - _base_model_url = 'http://cloud.dlnel.org/filepub/?uuid=' - - _image_size = 224 - - architecture_map = { - 'resnet50': {'params': _base_model_url + 'f63f237a-698e-4a22-9782-baf5bb183019', }, - 'resnet101': {'params': _base_model_url + '3d5fb996-83d0-4745-8adc-13ee960fc55c', }, - 'vgg16': {'params': _base_model_url + 'aa0e397e-474a-4cc1-bd8f-65a214039c2e', }, - - } - - class_dim_map = { - 'resnet50': 1000, - 'resnet101': 1000, - 'vgg16': 1001, # work at 1001, but fail at 1000 - 'alexnet': 1001, - } - - @classmethod - def dump_v2_config(cls, topology, save_path, binary=False): - import collections - - from paddle.trainer_config_helpers.layers import LayerOutput - from paddle.v2.layer import parse_network - from paddle.proto import TrainerConfig_pb2 - """ Dump the network topology to a specified file. - This function is only used to dump network defined by using PaddlePaddle V2 - API. - :param topology: The output layers in the entire network. - :type topology: LayerOutput|List|Tuple - :param save_path: The path to save the dump network topology. - :type save_path: str - :param binary: Whether to dump the serialized network topology. The default - value is false. - :type binary: bool. - """ - - if isinstance(topology, LayerOutput): - topology = [topology] - elif isinstance(topology, collections.Sequence): - for out_layer in topology: - assert isinstance(out_layer, LayerOutput), ( - "The type of each element in the parameter topology " - "should be LayerOutput.") - else: - raise RuntimeError("Error input type for parameter topology.") - - model_str = parse_network(topology) - with open(save_path, "w") as fout: - if binary: - fout.write(model_str.SerializeToString()) - else: - fout.write(str(model_str)) - - @classmethod - def download(cls, architecture, path="./"): - if cls.sanity_check(architecture): - reset_parser() - - # Use 3 * 331 * 331 or 3 * 299 * 299 for Inception-ResNet-v2. - DATA_DIM = 3 * paddle_extractor._image_size * paddle_extractor._image_size - CLASS_DIM = paddle_extractor.class_dim_map[architecture] - - image = paddle.layer.data( - name="image", type=paddle.data_type.dense_vector(DATA_DIM)) - if 'resnet' in architecture: - from mmdnn.conversion.examples.paddle.models import resnet - depth = int(architecture.strip('resnet')) - out = resnet.resnet_imagenet( - image, class_dim=CLASS_DIM, depth=depth) - elif architecture == 'vgg16': - from mmdnn.conversion.examples.paddle.models import vgg - out = vgg.vgg16(image, class_dim=CLASS_DIM) - else: - print("Not support for {} yet.", architecture) - return None - architecture_file = path + architecture + '.bin' - paddle_extractor.dump_v2_config(out, architecture_file, True) - - weight_file = download_file( - cls.architecture_map[architecture]['params'], directory=path, local_fname=architecture + '.tar.gz') - if not weight_file: - return None - - print("MXNet Model {} saved as [{}] and [{}].".format( - architecture, architecture_file, weight_file)) - return (architecture_file, weight_file) - - else: - return None - - @classmethod - def inference(cls, architecture, files, path, image_path): - - import numpy as np - if cls.sanity_check(architecture): - # refer to https://github.com/PaddlePaddle/Paddle/blob/develop/python/paddle/v2/tests/test_rnn_layer.py#L35 - reset_parser() - - # refer to https://github.com/PaddlePaddle/Paddle/issues/7403 - paddle.init(use_gpu=False, trainer_count=1) - - # Use 3 * 331 * 331 or 3 * 299 * 299 for Inception-ResNet-v2. - DATA_DIM = 3 * paddle_extractor._image_size * paddle_extractor._image_size - CLASS_DIM = paddle_extractor.class_dim_map[architecture] - image = paddle.layer.data( - name="image", type=paddle.data_type.dense_vector(DATA_DIM)) - - if 'resnet' in architecture: - from mmdnn.conversion.examples.paddle.models import resnet - depth = int(architecture.strip('resnet')) - out = resnet.resnet_imagenet( - image, class_dim=CLASS_DIM, depth=depth) - elif architecture == 'vgg16': - from mmdnn.conversion.examples.paddle.models import vgg - out = vgg.vgg16(image, class_dim=CLASS_DIM) - else: - print("Not support for {} yet.", architecture) - return None - - _, parameters_file = files - - with gzip.open(parameters_file, 'r') as f: - parameters = paddle.parameters.Parameters.from_tar(f) - - func = TestKit.preprocess_func['paddle'][architecture] - img = func(image_path) - img = np.transpose(img, [2, 0, 1]) - test_data = [(img.flatten(),)] - - predict = paddle.infer( - output_layer=out, parameters=parameters, input=test_data) - predict = np.squeeze(predict) - - return predict - - else: - return None -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import numpy as np -import sys -import os -from mmdnn.conversion.examples.imagenet_test import TestKit -import paddle.v2 as paddle -import gzip -from paddle.trainer_config_helpers.config_parser_utils import \ - reset_parser - - -class TestPaddle(TestKit): - - def __init__(self): - from six import text_type as _text_type - parser = argparse.ArgumentParser() - - parser.add_argument('-p', '--preprocess', - type=_text_type, help='Model Preprocess Type') - - parser.add_argument('--model', '-n', '-w', type=_text_type, - required=True, help='Paddle Model path.') - - parser.add_argument('-s', type=_text_type, help='Source Framework Type', - choices=self.truth.keys()) - - parser.add_argument('--image', '-i', - type=_text_type, help='Test image path.', - default="mmdnn/conversion/examples/data/seagull.jpg") - - parser.add_argument('-input', type=_text_type, - required=True, help='Paddle Input Node') - - parser.add_argument('-output', type=_text_type, - required=True, help='Paddle Output Node') - - parser.add_argument('-size', type=int, - default=224, help='Paddle Input Image Size') - - self.args = parser.parse_args() - - print("Loading model [{}].".format(self.args.model)) - - # import self.model - # self.model - - # how the model can not load from `***.bin` - - print("Model loading success.") - - def preprocess(self, image_path): - from PIL import Image as pil_image - img = pil_image.open(image_path) - img = img.resize((self.args.size, self.args.size)) - self.data = img - - def print_result(self): - reset_parser() - img = np.transpose(self.data, (2, 0, 1)) - test_data = [(img.flatten(),)] - - parameters_file = self.args.w - with gzip.open(parameters_file, 'r') as f: - parameters = paddle.parameters.Parameters.from_tar(f) - - predict = paddle.infer(output_layer=self.model, - parameters=parameters, input=test_data) - predict = np.squeeze(predict) - - super(TestPaddle, self).print_result(predict) - - def print_intermediate_result(self, layer_name, if_transpose=False): - super(TestPaddle, self).print_intermediate_result( - self.model.name, if_transpose) - - def inference(self, image_path): - self.preprocess(image_path) - self.print_result() - - -if __name__ == '__main__': - tester = TestPaddle() - tester.inference(tester.args.image) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import os -from six import text_type as _text_type -from mmdnn.conversion.examples.imagenet_test import TestKit -import torch -import torchvision.models as models - - -NETWORKS_MAP = { - 'inception_v3': lambda: models.inception_v3(pretrained=True), - 'vgg16': lambda: models.vgg16(pretrained=True), - 'vgg19': lambda: models.vgg19(pretrained=True), - 'resnet152': lambda: models.resnet152(pretrained=True), - 'densenet': lambda: models.densenet201(pretrained=True), - 'squeezenet': lambda: models.squeezenet1_1(pretrained=True) -} - - -def _main(): - parser = argparse.ArgumentParser() - - parser.add_argument('-n', '--network', - type=_text_type, help='Model Type', required=True, - choices=NETWORKS_MAP.keys()) - - parser.add_argument('-i', '--image', type=_text_type, - help='Test Image Path') - - args = parser.parse_args() - - file_name = "imagenet_{}.pth".format(args.network) - if not os.path.exists(file_name): - model = NETWORKS_MAP.get(args.network) - model = model() - torch.save(model, file_name) - print("PyTorch pretrained model is saved as [{}].".format(file_name)) - else: - print("File [{}] existed!".format(file_name)) - model = torch.load(file_name) - - if args.image: - import numpy as np - func = TestKit.preprocess_func['pytorch'][args.network] - img = func(args.image) - img = np.transpose(img, (2, 0, 1)) - img = np.expand_dims(img, 0).copy() - data = torch.from_numpy(img) - data = torch.autograd.Variable(data, requires_grad=False) - - model.eval() - predict = model(data).data.numpy() - predict = np.squeeze(predict) - top_indices = predict.argsort()[-5:][::-1] - result = [(i, predict[i]) for i in top_indices] - print(result) - - # layer_name = 'block2_pool' - # intermediate_layer_model = keras.Model(inputs=model.input, - # outputs=model.get_layer(layer_name).output) - # intermediate_output = intermediate_layer_model.predict(img) - # print (intermediate_output) - # print (intermediate_output.shape) - # print ("%.30f" % np.sum(intermediate_output)) - - -if __name__ == '__main__': - _main() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import - -import os -from mmdnn.conversion.examples.imagenet_test import TestKit -from mmdnn.conversion.examples.extractor import base_extractor -from mmdnn.conversion.common.utils import download_file -import torch -import torchvision.models as models - - -class pytorch_extractor(base_extractor): - - architecture_map = sorted(name for name in models.__dict__ - if name.islower() and not name.startswith("__") - and callable(models.__dict__[name])) - - @classmethod - def help(cls): - print('Support frameworks: {}'.format(cls.architecture_map)) - - @classmethod - def download(cls, architecture, path="./"): - if cls.sanity_check(architecture): - architecture_file = path + "imagenet_{}.pth".format(architecture) - if not os.path.exists(architecture_file): - kwargs = {} - if architecture == 'inception_v3': - kwargs['transform_input'] = False - model = models.__dict__[architecture]( - pretrained=True, **kwargs) - torch.save(model, architecture_file) - print("PyTorch pretrained model is saved as [{}].".format( - architecture_file)) - else: - print("File [{}] existed!".format(architecture_file)) - - return architecture_file - - else: - return None - - @classmethod - def inference(cls, architecture, path, image_path): - model = torch.load(path) - - model.eval() - - import numpy as np - func = TestKit.preprocess_func['pytorch'][architecture] - img = func(image_path) - img = np.transpose(img, (2, 0, 1)) - - img = np.expand_dims(img, 0).copy() - - data = torch.from_numpy(img) - data = torch.autograd.Variable(data, requires_grad=False) - - predict = model(data).data.numpy() - predict = np.squeeze(predict) - - return predict -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import numpy as np -import sys -import os -from mmdnn.conversion.examples.imagenet_test import TestKit -import torch - - -class TestTorch(TestKit): - - def __init__(self): - super(TestTorch, self).__init__() - - self.truth['tensorflow']['inception_v3'] = [ - (22, 9.6691055), (24, 4.3524747), (25, 3.5957973), (132, 3.5657473), (23, 3.346283)] - self.truth['keras']['inception_v3'] = [ - (21, 0.93430489), (23, 0.002883445), (131, 0.0014781791), (24, 0.0014518998), (22, 0.0014435351)] - - self.model = self.MainModel.KitModel(self.args.w) - self.model.eval() - - def preprocess(self, image_path): - x = super(TestTorch, self).preprocess(image_path) - x = np.transpose(x, (2, 0, 1)) - x = np.expand_dims(x, 0).copy() - self.data = torch.from_numpy(x) - self.data = torch.autograd.Variable(self.data, requires_grad=False) - - def print_result(self): - predict = self.model(self.data) - predict = predict.data.numpy() - super(TestTorch, self).print_result(predict) - - def print_intermediate_result(self, layer_name, if_transpose=False): - intermediate_output = self.model.test.data.numpy() - super(TestTorch, self).print_intermediate_result( - intermediate_output, if_transpose) - - def inference(self, image_path): - self.preprocess(image_path) - - self.print_result() - - # self.print_intermediate_result(None, False) - - self.test_truth() - - def dump(self, path=None): - if path is None: - path = self.args.dump - torch.save(self.model, path) - print('PyTorch model file is saved as [{}], generated by [{}.py] and [{}].'.format( - path, self.args.n, self.args.w)) - - -if __name__ == '__main__': - tester = TestTorch() - if tester.args.dump: - tester.dump() - else: - tester.inference(tester.args.image) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -from six import text_type as _text_type -import tensorflow as tf -from tensorflow.contrib.slim.python.slim.nets import vgg -from tensorflow.contrib.slim.python.slim.nets import inception -from tensorflow.contrib.slim.python.slim.nets import resnet_v1 -from tensorflow.contrib.slim.python.slim.nets import resnet_v2 -from mmdnn.conversion.examples.imagenet_test import TestKit - -slim = tf.contrib.slim - -input_layer_map = { - 'vgg16': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'vgg19': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'inception_v1': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'inception_v2': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'inception_v3': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'resnet50': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'resnet_v1_101': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'resnet101': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'resnet152': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'resnet200': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), -} - -arg_scopes_map = { - 'vgg16': vgg.vgg_arg_scope, - 'vgg19': vgg.vgg_arg_scope, - 'inception_v1': inception.inception_v3_arg_scope, - 'inception_v2': inception.inception_v3_arg_scope, - 'inception_v3': inception.inception_v3_arg_scope, - 'resnet50': resnet_v2.resnet_arg_scope, - 'resnet_v1_101': resnet_v2.resnet_arg_scope, - 'resnet101': resnet_v2.resnet_arg_scope, - 'resnet152': resnet_v2.resnet_arg_scope, - 'resnet200': resnet_v2.resnet_arg_scope, - # 'mobilenet_v1': mobilenet_v1.mobilenet_v1_arg_scope, -} - -networks_map = { - 'vgg16': lambda: vgg.vgg_16, - 'vgg19': lambda: vgg.vgg_19, - 'inception_v1': lambda: inception.inception_v1, - 'inception_v2': lambda: inception.inception_v2, - 'inception_v3': lambda: inception.inception_v3, - 'resnet_v1_101': lambda: resnet_v1.resnet_v1_101, - 'resnet50': lambda: resnet_v2.resnet_v2_50, - 'resnet101': lambda: resnet_v2.resnet_v2_101, - 'resnet152': lambda: resnet_v2.resnet_v2_152, - 'resnet200': lambda: resnet_v2.resnet_v2_200, - # 'mobilenet_v1' : mobilenet_v1.mobilenet_v1, -} - - -def _main(): - parser = argparse.ArgumentParser() - - parser.add_argument('-n', '--network', type=_text_type, help='Model Type', required=True, - choices=input_layer_map.keys()) - - parser.add_argument('-i', '--image', - type=_text_type, help='Test Image Path') - - parser.add_argument('-ckpt', '--checkpoint', - type=_text_type, help='Tensorflow Checkpoint file name', required=True) - - args = parser.parse_args() - - num_classes = 1000 if args.network in ( - 'vgg16', 'vgg19', 'resnet_v1_101') else 1001 - - with slim.arg_scope(arg_scopes_map[args.network]()): - data_input = input_layer_map[args.network]() - logits, endpoints = networks_map[args.network]()( - data_input, num_classes=num_classes, is_training=False) - labels = tf.squeeze(logits) - - init = tf.global_variables_initializer() - - with tf.Session() as sess: - writer = tf.summary.FileWriter('./graphs', sess.graph) - writer.close() - sess.run(init) - saver = tf.train.Saver() - saver.restore(sess, args.checkpoint) - save_path = saver.save(sess, "./imagenet_{}.ckpt".format(args.network)) - print("Model saved in file: %s" % save_path) - - if args.image: - import numpy as np - func = TestKit.preprocess_func['tensorflow'][args.network] - img = func(args.image) - img = np.expand_dims(img, axis=0) - predict = sess.run(logits, feed_dict={data_input: img}) - predict = np.squeeze(predict) - top_indices = predict.argsort()[-5:][::-1] - result = [(i, predict[i]) for i in top_indices] - print(result) - - -if __name__ == '__main__': - _main() -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -from __future__ import absolute_import - -from mmdnn.conversion.common.utils import download_file -from mmdnn.conversion.examples.extractor import base_extractor -from mmdnn.conversion.examples.imagenet_test import TestKit -import os -import tensorflow as tf - -from tensorflow.contrib.slim.nets import vgg -from tensorflow.contrib.slim.nets import inception -from tensorflow.contrib.slim.nets import resnet_v1 -from tensorflow.contrib.slim.nets import resnet_v2 -from mmdnn.conversion.examples.tensorflow.models import inception_resnet_v2 -from mmdnn.conversion.examples.tensorflow.models import mobilenet_v1 -from mmdnn.conversion.examples.tensorflow.models import nasnet -from mmdnn.conversion.examples.tensorflow.models.mobilenet import mobilenet_v2 -from mmdnn.conversion.examples.tensorflow.models import inception_resnet_v1 -from mmdnn.conversion.examples.tensorflow.models import test_rnn -slim = tf.contrib.slim - - -class tensorflow_extractor(base_extractor): - - architecture_map = { - 'vgg16': { - 'url': 'http://download.tensorflow.org/models/vgg_16_2016_08_28.tar.gz', - 'filename': 'vgg_16.ckpt', - 'builder': lambda: vgg.vgg_16, - 'arg_scope': vgg.vgg_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'num_classes': 1000, - }, - 'vgg19': { - 'url': 'http://download.tensorflow.org/models/vgg_19_2016_08_28.tar.gz', - 'filename': 'vgg_19.ckpt', - 'builder': lambda: vgg.vgg_19, - 'arg_scope': vgg.vgg_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'num_classes': 1000, - }, - 'inception_v1': { - 'url': 'http://download.tensorflow.org/models/inception_v1_2016_08_28.tar.gz', - 'filename': 'inception_v1.ckpt', - 'builder': lambda: inception.inception_v1, - 'arg_scope': inception.inception_v3_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'num_classes': 1001, - }, - 'inception_v1_frozen': { - 'url': 'https://storage.googleapis.com/download.tensorflow.org/models/inception_v1_2016_08_28_frozen.pb.tar.gz', - 'filename': 'inception_v1_2016_08_28_frozen.pb', - 'tensor_out': ['InceptionV1/Logits/Predictions/Reshape_1:0'], - 'tensor_in': ['input:0'], - # input_shape of the elem in tensor_in - 'input_shape': [[224, 224, 3]], - 'feed_dict': lambda img: {'input:0': img}, - 'num_classes': 1001, - }, - 'inception_v3': { - 'url': 'http://download.tensorflow.org/models/inception_v3_2016_08_28.tar.gz', - 'filename': 'inception_v3.ckpt', - 'builder': lambda: inception.inception_v3, - 'arg_scope': inception.inception_v3_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'num_classes': 1001, - }, - 'inception_v3_frozen': { - 'url': 'https://storage.googleapis.com/download.tensorflow.org/models/inception_v3_2016_08_28_frozen.pb.tar.gz', - 'filename': 'inception_v3_2016_08_28_frozen.pb', - 'tensor_out': ['InceptionV3/Predictions/Softmax:0'], - 'tensor_in': ['input:0'], - # input_shape of the elem in tensor_in - 'input_shape': [[299, 299, 3]], - 'feed_dict': lambda img: {'input:0': img}, - 'num_classes': 1001, - }, - 'resnet_v1_50': { - 'url': 'http://download.tensorflow.org/models/resnet_v1_50_2016_08_28.tar.gz', - 'filename': 'resnet_v1_50.ckpt', - 'builder': lambda: resnet_v1.resnet_v1_50, - 'arg_scope': resnet_v2.resnet_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'num_classes': 1000, - }, - 'resnet_v1_152': { - 'url': 'http://download.tensorflow.org/models/resnet_v1_152_2016_08_28.tar.gz', - 'filename': 'resnet_v1_152.ckpt', - 'builder': lambda: resnet_v1.resnet_v1_152, - 'arg_scope': resnet_v2.resnet_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'num_classes': 1000, - }, - 'resnet_v2_50': { - 'url': 'http://download.tensorflow.org/models/resnet_v2_50_2017_04_14.tar.gz', - 'filename': 'resnet_v2_50.ckpt', - 'builder': lambda: resnet_v2.resnet_v2_50, - 'arg_scope': resnet_v2.resnet_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'num_classes': 1001, - }, - 'resnet_v2_101': { - 'url': 'http://download.tensorflow.org/models/resnet_v2_101_2017_04_14.tar.gz', - 'filename': 'resnet_v2_101.ckpt', - 'builder': lambda: resnet_v2.resnet_v2_101, - 'arg_scope': resnet_v2.resnet_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'num_classes': 1001, - }, - 'resnet_v2_152': { - 'url': 'http://download.tensorflow.org/models/resnet_v2_152_2017_04_14.tar.gz', - 'filename': 'resnet_v2_152.ckpt', - 'builder': lambda: resnet_v2.resnet_v2_152, - 'arg_scope': resnet_v2.resnet_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'num_classes': 1001, - }, - 'resnet_v2_200': { - 'url': 'http://download.tensorflow.org/models/resnet_v2_200_2017_04_14.tar.gz', - 'filename': 'resnet_v2_200.ckpt', - 'builder': lambda: resnet_v2.resnet_v2_200, - 'arg_scope': resnet_v2.resnet_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'num_classes': 1001, - }, - 'mobilenet_v1_1.0': { - 'url': 'http://download.tensorflow.org/models/mobilenet_v1_1.0_224_2017_06_14.tar.gz', - 'filename': 'mobilenet_v1_1.0_224.ckpt', - 'builder': lambda: mobilenet_v1.mobilenet_v1, - 'arg_scope': mobilenet_v1.mobilenet_v1_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'num_classes': 1001, - }, - 'mobilenet_v1_1.0_frozen': { - 'url': 'https://storage.googleapis.com/download.tensorflow.org/models/mobilenet_v1_1.0_224_frozen.tgz', - 'filename': 'mobilenet_v1_1.0_224/frozen_graph.pb', - 'tensor_out': ['MobilenetV1/Predictions/Softmax:0'], - 'tensor_in': ['input:0'], - # input_shape of the elem in tensor_in - 'input_shape': [[224, 224, 3]], - 'feed_dict': lambda img: {'input:0': img}, - 'num_classes': 1001, - }, - 'mobilenet_v2_1.0_224': { - 'url': 'https://storage.googleapis.com/mobilenet_v2/checkpoints/mobilenet_v2_1.0_224.tgz', - 'filename': 'mobilenet_v2_1.0_224.ckpt', - 'builder': lambda: mobilenet_v2.mobilenet, - 'arg_scope': mobilenet_v2.training_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 224, 224, 3]), - 'num_classes': 1001, - }, - 'inception_resnet_v2': { - 'url': 'http://download.tensorflow.org/models/inception_resnet_v2_2016_08_30.tar.gz', - 'filename': 'inception_resnet_v2_2016_08_30.ckpt', - 'builder': lambda: inception_resnet_v2.inception_resnet_v2, - 'arg_scope': inception_resnet_v2.inception_resnet_v2_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 299, 299, 3]), - 'num_classes': 1001, - }, - 'nasnet-a_large': { - 'url': 'https://storage.googleapis.com/download.tensorflow.org/models/nasnet-a_large_04_10_2017.tar.gz', - 'filename': 'model.ckpt', - 'builder': lambda: nasnet.build_nasnet_large, - 'arg_scope': nasnet.nasnet_large_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 331, 331, 3]), - 'num_classes': 1001, - }, - 'facenet': { - 'url': 'http://mmdnn.eastasia.cloudapp.azure.com:89/models/tensorflow/facenet/20180408-102900.zip', - 'filename': '20180408-102900/model-20180408-102900.ckpt-90', - 'builder': lambda: inception_resnet_v1.inception_resnet_v1, - 'arg_scope': inception_resnet_v1.inception_resnet_v1_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.float32, shape=[None, 160, 160, 3]), - 'feed_dict': lambda img: {'input:0': img, 'phase_train:0': False}, - 'num_classes': 0, - }, - 'facenet_frozen': { - 'url': 'http://mmdnn.eastasia.cloudapp.azure.com:89/models/tensorflow/facenet/20180408-102900.zip', - 'filename': '20180408-102900/20180408-102900.pb', - 'tensor_out': ['InceptionResnetV1/Logits/AvgPool_1a_8x8/AvgPool:0'], - 'tensor_in': ['input:0', 'phase_train:0'], - # input_shape of the elem in tensor_in - 'input_shape': [[160, 160, 3], 1], - 'feed_dict': lambda img: {'input:0': img, 'phase_train:0': False}, - 'num_classes': 0, - }, - 'rnn_lstm_gru_stacked': { - # Note this is just a model used for test, not a standard rnn model. - 'url': 'http://mmdnn.eastasia.cloudapp.azure.com:89/models/tensorflow/tf_rnn/tf_rnn.zip', - 'filename': 'tf_rnn/tf_lstm_gru_stacked.ckpt', - 'builder': lambda: test_rnn.create_symbol, - 'arg_scope': test_rnn.dummy_arg_scope, - 'input': lambda: tf.placeholder(name='input', dtype=tf.int32, shape=[None, 150]), - 'feed_dict': lambda x: {'input:0': x}, - 'num_classes': 0 - } - } - - @classmethod - def handle_checkpoint(cls, architecture, path): - with slim.arg_scope(cls.architecture_map[architecture]['arg_scope']()): - data_input = cls.architecture_map[architecture]['input']() - logits, endpoints = cls.architecture_map[architecture]['builder']()( - data_input, - num_classes=cls.architecture_map[architecture]['num_classes'], - is_training=False) - - if logits.op.type == 'Squeeze': - labels = tf.identity(logits, name='MMdnn_Output') - else: - labels = tf.squeeze(logits, name='MMdnn_Output') - - init = tf.global_variables_initializer() - with tf.Session() as sess: - sess.run(init) - saver = tf.train.Saver() - saver.restore( - sess, path + cls.architecture_map[architecture]['filename']) - save_path = saver.save( - sess, path + "imagenet_{}.ckpt".format(architecture)) - print("Model saved in file: %s" % save_path) - - import tensorflow.contrib.keras as keras - keras.backend.clear_session() - - @classmethod - def handle_frozen_graph(cls, architecture, path): - return - # raise NotImplementedError() - - @classmethod - def get_frozen_para(cls, architecture): - frozenname = architecture + '_frozen' - tensor_in = list(map(lambda x: x.split( - ':')[0], cls.architecture_map[frozenname]['tensor_in'])) - tensor_out = list(map(lambda x: x.split( - ':')[0], cls.architecture_map[frozenname]['tensor_out'])) - return cls.architecture_map[frozenname]['filename'], cls.architecture_map[frozenname]['input_shape'], tensor_in, tensor_out - - @classmethod - def download(cls, architecture, path="./"): - if cls.sanity_check(architecture): - architecture_file = download_file( - cls.architecture_map[architecture]['url'], directory=path, auto_unzip=True) - if not architecture_file: - return None - - tf.reset_default_graph() - - if 'ckpt' in cls.architecture_map[architecture]['filename']: - cls.handle_checkpoint(architecture, path) - - elif cls.architecture_map[architecture]['filename'].endswith('pb'): - cls.handle_frozen_graph(architecture, path) - - else: - raise ValueError("Unknown file name [{}].".format( - cls.architecture_map[architecture]['filename'])) - - return architecture_file - - else: - return None - - @classmethod - def inference(cls, architecture, files, path, test_input_path, is_frozen=False): - if is_frozen: - architecture_ = architecture + "_frozen" - else: - architecture_ = architecture - - if cls.download(architecture_, path): - import numpy as np - if 'rnn' not in architecture_: - func = TestKit.preprocess_func['tensorflow'][architecture] - img = func(test_input_path) - img = np.expand_dims(img, axis=0) - input_data = img - else: - input_data = np.load(test_input_path) - - if is_frozen: - tf_model_path = cls.architecture_map[architecture_]['filename'] - with open(path + tf_model_path, 'rb') as f: - serialized = f.read() - tf.reset_default_graph() - original_gdef = tf.GraphDef() - original_gdef.ParseFromString(serialized) - tf_output_name = cls.architecture_map[architecture_]['tensor_out'] - tf_input_name = cls.architecture_map[architecture_]['tensor_in'] - feed_dict = cls.architecture_map[architecture_]['feed_dict'] - - with tf.Graph().as_default() as g: - tf.import_graph_def(original_gdef, name='') - with tf.Session(graph=g) as sess: - # temporarily think the num of out nodes is one - tf_out = sess.run( - tf_output_name[0], feed_dict=feed_dict(input_data)) - predict = np.squeeze(tf_out) - return predict - - else: - with slim.arg_scope(cls.architecture_map[architecture]['arg_scope']()): - data_input = cls.architecture_map[architecture]['input']() - logits, endpoints = cls.architecture_map[architecture]['builder']()( - data_input, - num_classes=cls.architecture_map[architecture]['num_classes'], - is_training=False) - labels = tf.squeeze(logits) - - init = tf.global_variables_initializer() - with tf.Session() as sess: - sess.run(init) - saver = tf.train.Saver() - saver.restore( - sess, path + cls.architecture_map[architecture]['filename']) - predict = sess.run(logits, feed_dict={ - data_input: input_data}) - - import tensorflow.contrib.keras as keras - keras.backend.clear_session() - - predict = np.squeeze(predict) - return predict - - else: - return None -# ---------------------------------------------------------------------------------------------- -# Copyright (c) Microsoft Corporation. All rights reserved. -# Licensed under the MIT License. See License.txt in the project root for license information. -# ---------------------------------------------------------------------------------------------- - -import argparse -import numpy as np -import sys -import os -import tensorflow as tf -from mmdnn.conversion.examples.imagenet_test import TestKit - - -class TestTF(TestKit): - - def __init__(self): - super(TestTF, self).__init__() - - self.truth['mxnet']['resnet152-11k'] = [(1278, 0.49070787), (1277, 0.21392652), - (282, 0.12979421), (1282, 0.066355646), (1224, 0.022040566)] - - self.input, self.model = self.MainModel.KitModel(self.args.w) - # self.input, self.model, self.testop = self.MainModel.KitModel(self.args.w) - - def preprocess(self, image_path): - x = super(TestTF, self).preprocess(image_path) - self.data = np.expand_dims(x, 0) - - def print_result(self): - with tf.Session() as sess: - init = tf.global_variables_initializer() - sess.run(init) - predict = sess.run(self.model, feed_dict={self.input: self.data}) - - super(TestTF, self).print_result(predict) - - def print_intermediate_result(self, layer_name, if_transpose=False): - # testop = tf.get_default_graph().get_operation_by_name(layer_name) - testop = self.testop - with tf.Session() as sess: - init = tf.global_variables_initializer() - sess.run(init) - intermediate_output = sess.run( - testop, feed_dict={self.input: self.data}) - - super(TestTF, self).print_intermediate_result( - intermediate_output, if_transpose) - - def inference(self, image_path): - self.preprocess(image_path) - - # self.print_intermediate_result('conv1_7x7_s2_1', True) - - self.print_result() - - self.test_truth() - - def dump(self, path=None): - dump_tag = self.args.dump_tag - if dump_tag == 'SERVING': - tag_list = [tf.saved_model.tag_constants.SERVING] - else: - tag_list = [tf.saved_model.tag_constants.TRAINING] - - if path is None: - path = self.args.dump - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - - builder = tf.saved_model.builder.SavedModelBuilder(path) - - tensor_info_input = tf.saved_model.utils.build_tensor_info( - self.input) - tensor_info_output = tf.saved_model.utils.build_tensor_info( - self.model) - - prediction_signature = ( - tf.saved_model.signature_def_utils.build_signature_def( - inputs={'input': tensor_info_input}, - outputs={'output': tensor_info_output}, - method_name=tf.saved_model.signature_constants.PREDICT_METHOD_NAME - ) - ) - - builder.add_meta_graph_and_variables( - sess, - tag_list, - signature_def_map={ - tf.saved_model.signature_constants.DEFAULT_SERVING_SIGNATURE_DEF_KEY: prediction_signature - } - ) - - save_path = builder.save() - - print('Tensorflow file is saved as [{}], generated by [{}.py] and [{}].'.format( - save_path, self.args.n, self.args.w)) - - -if __name__ == '__main__': - tester = TestTF() - if tester.args.dump: - if tester.args.dump_tag: - tester.dump() - else: - raise ValueError( - "Need to provide the model type of Tensorflow model.") - else: - tester.inference(tester.args.image) -import tensorflow as tf -from tensorflow.python.platform import gfile -import os -import shutil -import sys - - -def visualize(model_filename, log_dir): - with tf.Session() as sess: - tf.train.import_meta_graph(model_filename) - train_writer = tf.summary.FileWriter(log_dir) - train_writer.add_graph(sess.graph) - train_writer.close() - - -def _main(): - """ - Visualize the frozen TF graph using tensorboard. - - Arguments - ---------- - - path to the checkpoint meta file (.ckpt.meta) - - path to a log directory for writing graph summary for visualization - - Usage - ---------- - python vis_meta.py model.ckpt.meta /tmp/pb - - - To kill a previous tensorboard process, use the following commands in the terminal - ps aux | grep tensorboard - kill PID - """ - - if len(sys.argv) != 3: - raise ValueError( - "Usage: python vis_meta.py /path/to/model.meta /path/to/log/directory") - # load file - visualize(sys.argv[1], sys.argv[2]) - os.system("tensorboard --logdir=" + sys.argv[2]) - - -if __name__ == "__main__": - _main() -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from mmdnn.conversion.rewriter.rewriter import UnitRewriterBase -import numpy as np -import re - - -class GRURewriter(UnitRewriterBase): - - def __init__(self, graph, weights_dict): - return super(GRURewriter, self).__init__(graph, weights_dict) - - def process_gru_cell(self, match_result): - if 'gru_cell' not in match_result._pattern_to_op.keys(): - return - kwargs = dict() - top_node = match_result._pattern_to_op[match_result._name_to_pattern['gru_cell']] - - w_e = match_result.get_op("cell_kernel") - w = self._weights_dict[w_e.name.replace('/read', '')] - - num_units = w.shape[1]//2 - input_size = w.shape[0] - num_units - - kwargs['num_units'] = num_units - kwargs['input_size'] = input_size - - if hasattr(top_node, 'kwargs'): - top_node.kwargs.update(kwargs) - else: - top_node.kwargs = kwargs - - def process_rnn_h_zero(self, match_result): - if 'h_zero' not in match_result._name_to_pattern.keys(): - return - kwargs = dict() - top_node = match_result._pattern_to_op[match_result._name_to_pattern['h_zero']] - - fill_size = match_result.get_op('fill_size') - fill_value = match_result.get_op('fill_value') - - kwargs['fill_size'] = fill_size.get_attr('value').int_val[0] - kwargs['fill_value'] = fill_value.get_attr('value').float_val[0] - - if hasattr(top_node, 'kwargs'): - top_node.kwargs.update(kwargs) - else: - top_node.kwargs = kwargs - - def process_match_result(self, match_result, pattern_name): - if pattern_name == 'gru_cell': - self.process_gru_cell(match_result) - elif pattern_name == 'h_zero': - if self.check_match_scope(match_result, 'GRUCellZeroState'): - self.process_rnn_h_zero(match_result) - - '''For some short pattern, to avoid match other pattern, check it's scope''' - - def check_match_scope(self, match_result, scope_name): - ops = match_result._pattern_to_op.values() - - for op in ops: - op_name_splits = op.name.split('/') - if len(op_name_splits) < 2: - return False - if re.sub(r'(_\d+)*$', '', op_name_splits[-2]) != scope_name: - if len(op_name_splits) > 2: - if re.sub(r'(_\d+)*$', '', op_name_splits[-3]) != scope_name: - return False - else: - return False - return True - - def run(self): - return super(GRURewriter, self).run(['gru_cell', 'h_zero'], 'tensorflow') -from mmdnn.conversion.rewriter.rewriter import UnitRewriterBase -import numpy as np -import re - - -class LSTMRewriter(UnitRewriterBase): - - def __init__(self, graph, weights_dict): - return super(LSTMRewriter, self).__init__(graph, weights_dict) - - def process_lstm_cell(self, match_result): - if 'lstm_cell' not in match_result._pattern_to_op.keys(): - return - kwargs = dict() - - top_node = match_result._pattern_to_op[match_result._name_to_pattern['lstm_cell']] - - w_e = match_result.get_op("cell_kernel") - w = self._weights_dict[w_e.name.replace('/read', '')] - - num_units = w.shape[1]//4 - [wx, wh] = np.split(w, [-1 * num_units]) - input_size = wx.shape[0] - - kwargs['num_units'] = num_units - kwargs['input_size'] = input_size - - if hasattr(top_node, 'kwargs'): - top_node.kwargs.update(kwargs) - else: - top_node.kwargs = kwargs - - def process_rnn_h_zero(self, match_result): - if 'h_zero' not in match_result._name_to_pattern.keys(): - return - kwargs = dict() - top_node = match_result._pattern_to_op[match_result._name_to_pattern['h_zero']] - - fill_size = match_result.get_op('fill_size') - fill_value = match_result.get_op('fill_value') - - kwargs['fill_size'] = fill_size.get_attr('value').int_val[0] - kwargs['fill_value'] = fill_value.get_attr('value').float_val[0] - - if hasattr(top_node, 'kwargs'): - top_node.kwargs.update(kwargs) - else: - top_node.kwargs = kwargs - - def process_match_result(self, match_result, pattern_name): - if pattern_name == 'lstm_cell': - self.process_lstm_cell(match_result) - elif pattern_name == 'h_zero': - if self.check_match_scope(match_result, 'LSTMCellZeroState'): - self.process_rnn_h_zero(match_result) - - '''For some short pattern, to avoid match other pattern, check it's scope''' - - def check_match_scope(self, match_result, scope_name): - ops = match_result._pattern_to_op.values() - - for op in ops: - op_name_splits = op.name.split('/') - if len(op_name_splits) < 2: - return False - if re.sub(r'(_\d+)*$', '', op_name_splits[-2]) != scope_name: - if len(op_name_splits) > 2: - if re.sub(r'(_\d+)*$', '', op_name_splits[-3]) != scope_name: - return False - else: - return False - return True - - def run(self): - return super(LSTMRewriter, self).run(['lstm_cell', 'h_zero'], 'tensorflow') -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle.v2 as paddle - -__all__ = ['alexnet'] - - -def alexnet(input, class_dim): - conv1 = paddle.layer.img_conv( - input=input, - filter_size=11, - num_channels=3, - num_filters=96, - stride=4, - padding=1) - cmrnorm1 = paddle.layer.img_cmrnorm( - input=conv1, size=5, scale=0.0001, power=0.75) - pool1 = paddle.layer.img_pool(input=cmrnorm1, pool_size=3, stride=2) - - conv2 = paddle.layer.img_conv( - input=pool1, - filter_size=5, - num_filters=256, - stride=1, - padding=2, - groups=1) - cmrnorm2 = paddle.layer.img_cmrnorm( - input=conv2, size=5, scale=0.0001, power=0.75) - pool2 = paddle.layer.img_pool(input=cmrnorm2, pool_size=3, stride=2) - - pool3 = paddle.networks.img_conv_group( - input=pool2, - pool_size=3, - pool_stride=2, - conv_num_filter=[384, 384, 256], - conv_filter_size=3, - pool_type=paddle.pooling.Max()) - - fc1 = paddle.layer.fc(input=pool3, - size=4096, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - fc2 = paddle.layer.fc(input=fc1, - size=4096, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - - out = paddle.layer.fc(input=fc2, - size=class_dim, - act=paddle.activation.Softmax()) - return out -import paddle.v2 as paddle - -__all__ = ['resnet_imagenet', 'resnet_cifar10'] - - -def conv_bn_layer(input, - ch_out, - filter_size, - stride, - padding, - active_type=paddle.activation.Relu(), - ch_in=None): - tmp = paddle.layer.img_conv( - input=input, - filter_size=filter_size, - num_channels=ch_in, - num_filters=ch_out, - stride=stride, - padding=padding, - act=paddle.activation.Linear(), - bias_attr=False) - return paddle.layer.batch_norm(input=tmp, act=active_type) - - -def shortcut(input, ch_out, stride): - if input.num_filters != ch_out: - return conv_bn_layer(input, ch_out, 1, stride, 0, - paddle.activation.Linear()) - else: - return input - - -def basicblock(input, ch_out, stride): - short = shortcut(input, ch_out, stride) - conv1 = conv_bn_layer(input, ch_out, 3, stride, 1) - conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1, paddle.activation.Linear()) - return paddle.layer.addto( - input=[short, conv2], act=paddle.activation.Relu()) - - -def bottleneck(input, ch_out, stride): - short = shortcut(input, ch_out * 4, stride) - conv1 = conv_bn_layer(input, ch_out, 1, stride, 0) - conv2 = conv_bn_layer(conv1, ch_out, 3, 1, 1) - conv3 = conv_bn_layer(conv2, ch_out * 4, 1, 1, 0, - paddle.activation.Linear()) - return paddle.layer.addto( - input=[short, conv3], act=paddle.activation.Relu()) - - -def layer_warp(block_func, input, ch_out, count, stride): - conv = block_func(input, ch_out, stride) - for i in range(1, count): - conv = block_func(conv, ch_out, 1) - return conv - - -def resnet_imagenet(input, class_dim, depth=50): - cfg = { - 18: ([2, 2, 2, 1], basicblock), - 34: ([3, 4, 6, 3], basicblock), - 50: ([3, 4, 6, 3], bottleneck), - 101: ([3, 4, 23, 3], bottleneck), - 152: ([3, 8, 36, 3], bottleneck) - } - stages, block_func = cfg[depth] - conv1 = conv_bn_layer( - input, ch_in=3, ch_out=64, filter_size=7, stride=2, padding=3) - pool1 = paddle.layer.img_pool(input=conv1, pool_size=3, stride=2) - res1 = layer_warp(block_func, pool1, 64, stages[0], 1) - res2 = layer_warp(block_func, res1, 128, stages[1], 2) - res3 = layer_warp(block_func, res2, 256, stages[2], 2) - res4 = layer_warp(block_func, res3, 512, stages[3], 2) - pool2 = paddle.layer.img_pool( - input=res4, pool_size=7, stride=1, pool_type=paddle.pooling.Avg()) - out = paddle.layer.fc(input=pool2, - size=class_dim, - act=paddle.activation.Softmax()) - return out - - -def resnet_cifar10(input, class_dim, depth=32): - # depth should be one of 20, 32, 44, 56, 110, 1202 - assert (depth - 2) % 6 == 0 - n = (depth - 2) / 6 - nStages = {16, 64, 128} - conv1 = conv_bn_layer( - input, ch_in=3, ch_out=16, filter_size=3, stride=1, padding=1) - res1 = layer_warp(basicblock, conv1, 16, n, 1) - res2 = layer_warp(basicblock, res1, 32, n, 2) - res3 = layer_warp(basicblock, res2, 64, n, 2) - pool = paddle.layer.img_pool( - input=res3, pool_size=8, stride=1, pool_type=paddle.pooling.Avg()) - out = paddle.layer.fc(input=pool, - size=class_dim, - act=paddle.activation.Softmax()) - return out -import paddle.v2 as paddle - -__all__ = ['vgg13', 'vgg16', 'vgg19'] - - -def vgg(input, nums, class_dim): - def conv_block(input, num_filter, groups, num_channels=None): - return paddle.networks.img_conv_group( - input=input, - num_channels=num_channels, - pool_size=2, - pool_stride=2, - conv_num_filter=[num_filter] * groups, - conv_filter_size=3, - conv_act=paddle.activation.Relu(), - pool_type=paddle.pooling.Max()) - - assert len(nums) == 5 - # the channel of input feature is 3 - conv1 = conv_block(input, 64, nums[0], 3) - conv2 = conv_block(conv1, 128, nums[1]) - conv3 = conv_block(conv2, 256, nums[2]) - conv4 = conv_block(conv3, 512, nums[3]) - conv5 = conv_block(conv4, 512, nums[4]) - - fc_dim = 4096 - fc1 = paddle.layer.fc(input=conv5, - size=fc_dim, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - fc2 = paddle.layer.fc(input=fc1, - size=fc_dim, - act=paddle.activation.Relu(), - layer_attr=paddle.attr.Extra(drop_rate=0.5)) - out = paddle.layer.fc(input=fc2, - size=class_dim, - act=paddle.activation.Softmax()) - return out - - -def vgg13(input, class_dim): - nums = [2, 2, 2, 2, 2] - return vgg(input, nums, class_dim) - - -def vgg16(input, class_dim): - nums = [2, 2, 3, 3, 3] - return vgg(input, nums, class_dim) - - -def vgg19(input, class_dim): - nums = [2, 2, 4, 4, 4] - return vgg(input, nums, class_dim) -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -"""Contains the definition of the Inception Resnet V1 architecture. -As described in http://arxiv.org/abs/1602.07261. - Inception-v4, Inception-ResNet and the Impact of Residual Connections - on Learning - Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf -import tensorflow.contrib.slim as slim - -# Inception-Resnet-A - - -def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 35x35 resnet block.""" - with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d( - tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d( - tower_conv2_0, 32, 3, scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d( - tower_conv2_1, 32, 3, scope='Conv2d_0c_3x3') - mixed = tf.concat([tower_conv, tower_conv1_1, tower_conv2_2], 3) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - net += scale * up - if activation_fn: - net = activation_fn(net) - return net - -# Inception-Resnet-B - - -def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 17x17 resnet block.""" - with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 128, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 128, [1, 7], - scope='Conv2d_0b_1x7') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 128, [7, 1], - scope='Conv2d_0c_7x1') - mixed = tf.concat([tower_conv, tower_conv1_2], 3) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - net += scale * up - if activation_fn: - net = activation_fn(net) - return net - - -# Inception-Resnet-C -def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 8x8 resnet block.""" - with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 192, [1, 3], - scope='Conv2d_0b_1x3') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [3, 1], - scope='Conv2d_0c_3x1') - mixed = tf.concat([tower_conv, tower_conv1_2], 3) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - net += scale * up - if activation_fn: - net = activation_fn(net) - return net - - -def reduction_a(net, k, l, m, n): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, n, 3, stride=2, padding='VALID', - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, k, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, l, 3, - scope='Conv2d_0b_3x3') - tower_conv1_2 = slim.conv2d(tower_conv1_1, m, 3, - stride=2, padding='VALID', - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) - return net - - -def reduction_b(net): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1, 256, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2 = slim.conv2d(net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2, 256, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 256, 3, stride=2, - padding='VALID', scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.max_pool2d(net, 3, stride=2, padding='VALID', - scope='MaxPool_1a_3x3') - net = tf.concat([tower_conv_1, tower_conv1_1, - tower_conv2_2, tower_pool], 3) - return net - - -def inception_resnet_v1_arg_scope(weight_decay=0.0, - activation_fn=tf.nn.relu): - batch_norm_params = { - # Decay for the moving averages. - 'decay': 0.995, - # epsilon to prevent 0s in variance. - 'epsilon': 0.001, - # force in-place updates of mean and variance estimates - 'updates_collections': None, - # Moving averages ends up in the trainable variables collection - 'variables_collections': [tf.GraphKeys.TRAINABLE_VARIABLES], - } - with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_initializer=slim.initializers.xavier_initializer(), - weights_regularizer=slim.l2_regularizer(weight_decay), - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params) as sc: - return sc - - -def inception_resnet_v1(inputs, num_classes, is_training=True, - dropout_keep_prob=0.8, - bottleneck_layer_size=512, - reuse=None, - scope='InceptionResnetV1'): - """Creates the Inception Resnet V1 model. - Args: - inputs: a 4-D tensor of size [batch_size, height, width, 3]. - num_classes: number of predicted classes. - is_training: whether is training or not. - dropout_keep_prob: float, the fraction to keep before final layer. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - Returns: - logits: the logits outputs of the model. - end_points: the set of end_points from the inception model. - """ - end_points = {} - - with tf.variable_scope(scope, 'InceptionResnetV1', [inputs], reuse=reuse): - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - - # 149 x 149 x 32 - net = slim.conv2d(inputs, 32, 3, stride=2, padding='VALID', - scope='Conv2d_1a_3x3') - end_points['Conv2d_1a_3x3'] = net - # 147 x 147 x 32 - net = slim.conv2d(net, 32, 3, padding='VALID', - scope='Conv2d_2a_3x3') - end_points['Conv2d_2a_3x3'] = net - # 147 x 147 x 64 - net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') - end_points['Conv2d_2b_3x3'] = net - # 73 x 73 x 64 - net = slim.max_pool2d(net, 3, stride=2, padding='VALID', - scope='MaxPool_3a_3x3') - end_points['MaxPool_3a_3x3'] = net - # 73 x 73 x 80 - net = slim.conv2d(net, 80, 1, padding='VALID', - scope='Conv2d_3b_1x1') - end_points['Conv2d_3b_1x1'] = net - # 71 x 71 x 192 - net = slim.conv2d(net, 192, 3, padding='VALID', - scope='Conv2d_4a_3x3') - end_points['Conv2d_4a_3x3'] = net - # 35 x 35 x 256 - net = slim.conv2d(net, 256, 3, stride=2, padding='VALID', - scope='Conv2d_4b_3x3') - end_points['Conv2d_4b_3x3'] = net - - # 5 x Inception-resnet-A - net = slim.repeat(net, 5, block35, scale=0.17) - end_points['Mixed_5a'] = net - - # Reduction-A - with tf.variable_scope('Mixed_6a'): - net = reduction_a(net, 192, 192, 256, 384) - end_points['Mixed_6a'] = net - - # 10 x Inception-Resnet-B - net = slim.repeat(net, 10, block17, scale=0.10) - end_points['Mixed_6b'] = net - - # Reduction-B - with tf.variable_scope('Mixed_7a'): - net = reduction_b(net) - end_points['Mixed_7a'] = net - - # 5 x Inception-Resnet-C - net = slim.repeat(net, 5, block8, scale=0.20) - end_points['Mixed_8a'] = net - - net = block8(net, activation_fn=None) - end_points['Mixed_8b'] = net - - with tf.variable_scope('Logits'): - end_points['PrePool'] = net - #pylint: disable=no-member - net = slim.avg_pool2d(net, net.get_shape()[1:3], padding='VALID', - scope='AvgPool_1a_8x8') - net = slim.flatten(net) - - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='Dropout') - - end_points['PreLogitsFlatten'] = net - - # net = slim.fully_connected(net, bottleneck_layer_size, activation_fn=None, - # scope='Bottleneck', reuse=False) - - return net, end_points -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains the definition of the Inception Resnet V2 architecture. - -As described in http://arxiv.org/abs/1602.07261. - - Inception-v4, Inception-ResNet and the Impact of Residual Connections - on Learning - Christian Szegedy, Sergey Ioffe, Vincent Vanhoucke, Alex Alemi -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - - -import tensorflow as tf - -slim = tf.contrib.slim - - -def block35(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 35x35 resnet block.""" - with tf.variable_scope(scope, 'Block35', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 32, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d( - tower_conv1_0, 32, 3, scope='Conv2d_0b_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2_0 = slim.conv2d(net, 32, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d( - tower_conv2_0, 48, 3, scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d( - tower_conv2_1, 64, 3, scope='Conv2d_0c_3x3') - mixed = tf.concat( - axis=3, values=[tower_conv, tower_conv1_1, tower_conv2_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - scaled_up = up * scale - if activation_fn == tf.nn.relu6: - # Use clip_by_value to simulate bandpass activation. - scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) - - net += scaled_up - if activation_fn: - net = activation_fn(net) - return net - - -def block17(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 17x17 resnet block.""" - with tf.variable_scope(scope, 'Block17', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 128, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 160, [1, 7], - scope='Conv2d_0b_1x7') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 192, [7, 1], - scope='Conv2d_0c_7x1') - mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - - scaled_up = up * scale - if activation_fn == tf.nn.relu6: - # Use clip_by_value to simulate bandpass activation. - scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) - - net += scaled_up - if activation_fn: - net = activation_fn(net) - return net - - -def block8(net, scale=1.0, activation_fn=tf.nn.relu, scope=None, reuse=None): - """Builds the 8x8 resnet block.""" - with tf.variable_scope(scope, 'Block8', [net], reuse=reuse): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 192, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d(net, 192, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 224, [1, 3], - scope='Conv2d_0b_1x3') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 256, [3, 1], - scope='Conv2d_0c_3x1') - mixed = tf.concat(axis=3, values=[tower_conv, tower_conv1_2]) - up = slim.conv2d(mixed, net.get_shape()[3], 1, normalizer_fn=None, - activation_fn=None, scope='Conv2d_1x1') - - scaled_up = up * scale - if activation_fn == tf.nn.relu6: - # Use clip_by_value to simulate bandpass activation. - scaled_up = tf.clip_by_value(scaled_up, -6.0, 6.0) - - net += scaled_up - if activation_fn: - net = activation_fn(net) - return net - - -def inception_resnet_v2_base(inputs, - final_endpoint='Conv2d_7b_1x1', - output_stride=16, - align_feature_maps=False, - scope=None, - activation_fn=tf.nn.relu): - """Inception model from http://arxiv.org/abs/1602.07261. - - Constructs an Inception Resnet v2 network from inputs to the given final - endpoint. This method can construct the network up to the final inception - block Conv2d_7b_1x1. - - Args: - inputs: a tensor of size [batch_size, height, width, channels]. - final_endpoint: specifies the endpoint to construct the network up to. It - can be one of ['Conv2d_1a_3x3', 'Conv2d_2a_3x3', 'Conv2d_2b_3x3', - 'MaxPool_3a_3x3', 'Conv2d_3b_1x1', 'Conv2d_4a_3x3', 'MaxPool_5a_3x3', - 'Mixed_5b', 'Mixed_6a', 'PreAuxLogits', 'Mixed_7a', 'Conv2d_7b_1x1'] - output_stride: A scalar that specifies the requested ratio of input to - output spatial resolution. Only supports 8 and 16. - align_feature_maps: When true, changes all the VALID paddings in the network - to SAME padding so that the feature maps are aligned. - scope: Optional variable_scope. - activation_fn: Activation function for block scopes. - - Returns: - tensor_out: output tensor corresponding to the final_endpoint. - end_points: a set of activations for external use, for example summaries or - losses. - - Raises: - ValueError: if final_endpoint is not set to one of the predefined values, - or if the output_stride is not 8 or 16, or if the output_stride is 8 and - we request an end point after 'PreAuxLogits'. - """ - if output_stride != 8 and output_stride != 16: - raise ValueError('output_stride must be 8 or 16.') - - padding = 'SAME' if align_feature_maps else 'VALID' - - end_points = {} - - def add_and_check_final(name, net): - end_points[name] = net - return name == final_endpoint - - with tf.variable_scope(scope, 'InceptionResnetV2', [inputs]): - with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], - stride=1, padding='SAME'): - # 149 x 149 x 32 - net = slim.conv2d(inputs, 32, 3, stride=2, padding=padding, - scope='Conv2d_1a_3x3') - if add_and_check_final('Conv2d_1a_3x3', net): - return net, end_points - - # 147 x 147 x 32 - net = slim.conv2d(net, 32, 3, padding=padding, - scope='Conv2d_2a_3x3') - if add_and_check_final('Conv2d_2a_3x3', net): - return net, end_points - # 147 x 147 x 64 - net = slim.conv2d(net, 64, 3, scope='Conv2d_2b_3x3') - if add_and_check_final('Conv2d_2b_3x3', net): - return net, end_points - # 73 x 73 x 64 - net = slim.max_pool2d(net, 3, stride=2, padding=padding, - scope='MaxPool_3a_3x3') - if add_and_check_final('MaxPool_3a_3x3', net): - return net, end_points - # 73 x 73 x 80 - net = slim.conv2d(net, 80, 1, padding=padding, - scope='Conv2d_3b_1x1') - if add_and_check_final('Conv2d_3b_1x1', net): - return net, end_points - # 71 x 71 x 192 - net = slim.conv2d(net, 192, 3, padding=padding, - scope='Conv2d_4a_3x3') - if add_and_check_final('Conv2d_4a_3x3', net): - return net, end_points - # 35 x 35 x 192 - net = slim.max_pool2d(net, 3, stride=2, padding=padding, - scope='MaxPool_5a_3x3') - if add_and_check_final('MaxPool_5a_3x3', net): - return net, end_points - - # 35 x 35 x 320 - with tf.variable_scope('Mixed_5b'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 96, 1, scope='Conv2d_1x1') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d( - net, 48, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 64, 5, - scope='Conv2d_0b_5x5') - with tf.variable_scope('Branch_2'): - tower_conv2_0 = slim.conv2d( - net, 64, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2_0, 96, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 96, 3, - scope='Conv2d_0c_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.avg_pool2d(net, 3, stride=1, padding='SAME', - scope='AvgPool_0a_3x3') - tower_pool_1 = slim.conv2d(tower_pool, 64, 1, - scope='Conv2d_0b_1x1') - net = tf.concat( - [tower_conv, tower_conv1_1, tower_conv2_2, tower_pool_1], 3) - - if add_and_check_final('Mixed_5b', net): - return net, end_points - # TODO(alemi): Register intermediate endpoints - net = slim.repeat(net, 10, block35, scale=0.17, - activation_fn=activation_fn) - - # 17 x 17 x 1088 if output_stride == 8, - # 33 x 33 x 1088 if output_stride == 16 - use_atrous = output_stride == 8 - - with tf.variable_scope('Mixed_6a'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d(net, 384, 3, stride=1 if use_atrous else 2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1_0 = slim.conv2d( - net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1_0, 256, 3, - scope='Conv2d_0b_3x3') - tower_conv1_2 = slim.conv2d(tower_conv1_1, 384, 3, - stride=1 if use_atrous else 2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_pool = slim.max_pool2d(net, 3, stride=1 if use_atrous else 2, - padding=padding, - scope='MaxPool_1a_3x3') - net = tf.concat([tower_conv, tower_conv1_2, tower_pool], 3) - - if add_and_check_final('Mixed_6a', net): - return net, end_points - - # TODO(alemi): register intermediate endpoints - with slim.arg_scope([slim.conv2d], rate=2 if use_atrous else 1): - net = slim.repeat(net, 20, block17, scale=0.10, - activation_fn=activation_fn) - if add_and_check_final('PreAuxLogits', net): - return net, end_points - - if output_stride == 8: - # TODO(gpapan): Properly support output_stride for the rest of the net. - raise ValueError('output_stride==8 is only supported up to the ' - 'PreAuxlogits end_point for now.') - - # 8 x 8 x 2080 - with tf.variable_scope('Mixed_7a'): - with tf.variable_scope('Branch_0'): - tower_conv = slim.conv2d( - net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv_1 = slim.conv2d(tower_conv, 384, 3, stride=2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_1'): - tower_conv1 = slim.conv2d( - net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv1_1 = slim.conv2d(tower_conv1, 288, 3, stride=2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_2'): - tower_conv2 = slim.conv2d( - net, 256, 1, scope='Conv2d_0a_1x1') - tower_conv2_1 = slim.conv2d(tower_conv2, 288, 3, - scope='Conv2d_0b_3x3') - tower_conv2_2 = slim.conv2d(tower_conv2_1, 320, 3, stride=2, - padding=padding, - scope='Conv2d_1a_3x3') - with tf.variable_scope('Branch_3'): - tower_pool = slim.max_pool2d(net, 3, stride=2, - padding=padding, - scope='MaxPool_1a_3x3') - net = tf.concat( - [tower_conv_1, tower_conv1_1, tower_conv2_2, tower_pool], 3) - - if add_and_check_final('Mixed_7a', net): - return net, end_points - - # TODO(alemi): register intermediate endpoints - net = slim.repeat(net, 9, block8, scale=0.20, - activation_fn=activation_fn) - net = block8(net, activation_fn=None) - - # 8 x 8 x 1536 - net = slim.conv2d(net, 1536, 1, scope='Conv2d_7b_1x1') - if add_and_check_final('Conv2d_7b_1x1', net): - return net, end_points - - raise ValueError('final_endpoint (%s) not recognized', final_endpoint) - - -def inception_resnet_v2(inputs, num_classes=1001, is_training=True, - dropout_keep_prob=0.8, - reuse=None, - scope='InceptionResnetV2', - create_aux_logits=True, - activation_fn=tf.nn.relu): - """Creates the Inception Resnet V2 model. - - Args: - inputs: a 4-D tensor of size [batch_size, height, width, 3]. - Dimension batch_size may be undefined. If create_aux_logits is false, - also height and width may be undefined. - num_classes: number of predicted classes. If 0 or None, the logits layer - is omitted and the input features to the logits layer (before dropout) - are returned instead. - is_training: whether is training or not. - dropout_keep_prob: float, the fraction to keep before final layer. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - create_aux_logits: Whether to include the auxilliary logits. - activation_fn: Activation function for conv2d. - - Returns: - net: the output of the logits layer (if num_classes is a non-zero integer), - or the non-dropped-out input to the logits layer (if num_classes is 0 or - None). - end_points: the set of end_points from the inception model. - """ - end_points = {} - - with tf.variable_scope(scope, 'InceptionResnetV2', [inputs], - reuse=reuse) as scope: - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - - net, end_points = inception_resnet_v2_base(inputs, scope=scope, - activation_fn=activation_fn) - - if create_aux_logits and num_classes: - with tf.variable_scope('AuxLogits'): - aux = end_points['PreAuxLogits'] - aux = slim.avg_pool2d(aux, 5, stride=3, padding='VALID', - scope='Conv2d_1a_3x3') - aux = slim.conv2d(aux, 128, 1, scope='Conv2d_1b_1x1') - aux = slim.conv2d(aux, 768, aux.get_shape()[1:3], - padding='VALID', scope='Conv2d_2a_5x5') - aux = slim.flatten(aux) - aux = slim.fully_connected(aux, num_classes, activation_fn=None, - scope='Logits') - end_points['AuxLogits'] = aux - - with tf.variable_scope('Logits'): - # TODO(sguada,arnoegw): Consider adding a parameter global_pool which - # can be set to False to disable pooling here (as in resnet_*()). - kernel_size = net.get_shape()[1:3] - if kernel_size.is_fully_defined(): - net = slim.avg_pool2d(net, kernel_size, padding='VALID', - scope='AvgPool_1a_8x8') - else: - net = tf.reduce_mean( - net, [1, 2], keep_dims=True, name='global_pool') - end_points['global_pool'] = net - if not num_classes: - return net, end_points - net = slim.flatten(net) - net = slim.dropout(net, dropout_keep_prob, is_training=is_training, - scope='Dropout') - end_points['PreLogitsFlatten'] = net - logits = slim.fully_connected(net, num_classes, activation_fn=None, - scope='Logits') - end_points['Logits'] = logits - end_points['Predictions'] = tf.nn.softmax( - logits, name='Predictions') - - return logits, end_points - - -inception_resnet_v2.default_image_size = 299 - - -def inception_resnet_v2_arg_scope(weight_decay=0.00004, - batch_norm_decay=0.9997, - batch_norm_epsilon=0.001, - activation_fn=tf.nn.relu): - """Returns the scope with the default parameters for inception_resnet_v2. - - Args: - weight_decay: the weight decay for weights variables. - batch_norm_decay: decay for the moving average of batch_norm momentums. - batch_norm_epsilon: small float added to variance to avoid dividing by zero. - activation_fn: Activation function for conv2d. - - Returns: - a arg_scope with the parameters needed for inception_resnet_v2. - """ - # Set weight_decay for weights in conv2d and fully_connected layers. - with slim.arg_scope([slim.conv2d, slim.fully_connected], - weights_regularizer=slim.l2_regularizer(weight_decay), - biases_regularizer=slim.l2_regularizer(weight_decay)): - - batch_norm_params = { - 'decay': batch_norm_decay, - 'epsilon': batch_norm_epsilon, - 'fused': None, # Use fused batch norm if possible. - } - # Set activation_fn and parameters for batch_norm. - with slim.arg_scope([slim.conv2d], activation_fn=activation_fn, - normalizer_fn=slim.batch_norm, - normalizer_params=batch_norm_params) as scope: - return scope -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================= -"""MobileNet v1. - -MobileNet is a general architecture and can be used for multiple use cases. -Depending on the use case, it can use different input layer size and different -head (for example: embeddings, localization and classification). - -As described in https://arxiv.org/abs/1704.04861. - - MobileNets: Efficient Convolutional Neural Networks for - Mobile Vision Applications - Andrew G. Howard, Menglong Zhu, Bo Chen, Dmitry Kalenichenko, Weijun Wang, - Tobias Weyand, Marco Andreetto, Hartwig Adam - -100% Mobilenet V1 (base) with input size 224x224: - -See mobilenet_v1() - -Layer params macs --------------------------------------------------------------------------------- -MobilenetV1/Conv2d_0/Conv2D: 864 10,838,016 -MobilenetV1/Conv2d_1_depthwise/depthwise: 288 3,612,672 -MobilenetV1/Conv2d_1_pointwise/Conv2D: 2,048 25,690,112 -MobilenetV1/Conv2d_2_depthwise/depthwise: 576 1,806,336 -MobilenetV1/Conv2d_2_pointwise/Conv2D: 8,192 25,690,112 -MobilenetV1/Conv2d_3_depthwise/depthwise: 1,152 3,612,672 -MobilenetV1/Conv2d_3_pointwise/Conv2D: 16,384 51,380,224 -MobilenetV1/Conv2d_4_depthwise/depthwise: 1,152 903,168 -MobilenetV1/Conv2d_4_pointwise/Conv2D: 32,768 25,690,112 -MobilenetV1/Conv2d_5_depthwise/depthwise: 2,304 1,806,336 -MobilenetV1/Conv2d_5_pointwise/Conv2D: 65,536 51,380,224 -MobilenetV1/Conv2d_6_depthwise/depthwise: 2,304 451,584 -MobilenetV1/Conv2d_6_pointwise/Conv2D: 131,072 25,690,112 -MobilenetV1/Conv2d_7_depthwise/depthwise: 4,608 903,168 -MobilenetV1/Conv2d_7_pointwise/Conv2D: 262,144 51,380,224 -MobilenetV1/Conv2d_8_depthwise/depthwise: 4,608 903,168 -MobilenetV1/Conv2d_8_pointwise/Conv2D: 262,144 51,380,224 -MobilenetV1/Conv2d_9_depthwise/depthwise: 4,608 903,168 -MobilenetV1/Conv2d_9_pointwise/Conv2D: 262,144 51,380,224 -MobilenetV1/Conv2d_10_depthwise/depthwise: 4,608 903,168 -MobilenetV1/Conv2d_10_pointwise/Conv2D: 262,144 51,380,224 -MobilenetV1/Conv2d_11_depthwise/depthwise: 4,608 903,168 -MobilenetV1/Conv2d_11_pointwise/Conv2D: 262,144 51,380,224 -MobilenetV1/Conv2d_12_depthwise/depthwise: 4,608 225,792 -MobilenetV1/Conv2d_12_pointwise/Conv2D: 524,288 25,690,112 -MobilenetV1/Conv2d_13_depthwise/depthwise: 9,216 451,584 -MobilenetV1/Conv2d_13_pointwise/Conv2D: 1,048,576 51,380,224 --------------------------------------------------------------------------------- -Total: 3,185,088 567,716,352 - - -75% Mobilenet V1 (base) with input size 128x128: - -See mobilenet_v1_075() - -Layer params macs --------------------------------------------------------------------------------- -MobilenetV1/Conv2d_0/Conv2D: 648 2,654,208 -MobilenetV1/Conv2d_1_depthwise/depthwise: 216 884,736 -MobilenetV1/Conv2d_1_pointwise/Conv2D: 1,152 4,718,592 -MobilenetV1/Conv2d_2_depthwise/depthwise: 432 442,368 -MobilenetV1/Conv2d_2_pointwise/Conv2D: 4,608 4,718,592 -MobilenetV1/Conv2d_3_depthwise/depthwise: 864 884,736 -MobilenetV1/Conv2d_3_pointwise/Conv2D: 9,216 9,437,184 -MobilenetV1/Conv2d_4_depthwise/depthwise: 864 221,184 -MobilenetV1/Conv2d_4_pointwise/Conv2D: 18,432 4,718,592 -MobilenetV1/Conv2d_5_depthwise/depthwise: 1,728 442,368 -MobilenetV1/Conv2d_5_pointwise/Conv2D: 36,864 9,437,184 -MobilenetV1/Conv2d_6_depthwise/depthwise: 1,728 110,592 -MobilenetV1/Conv2d_6_pointwise/Conv2D: 73,728 4,718,592 -MobilenetV1/Conv2d_7_depthwise/depthwise: 3,456 221,184 -MobilenetV1/Conv2d_7_pointwise/Conv2D: 147,456 9,437,184 -MobilenetV1/Conv2d_8_depthwise/depthwise: 3,456 221,184 -MobilenetV1/Conv2d_8_pointwise/Conv2D: 147,456 9,437,184 -MobilenetV1/Conv2d_9_depthwise/depthwise: 3,456 221,184 -MobilenetV1/Conv2d_9_pointwise/Conv2D: 147,456 9,437,184 -MobilenetV1/Conv2d_10_depthwise/depthwise: 3,456 221,184 -MobilenetV1/Conv2d_10_pointwise/Conv2D: 147,456 9,437,184 -MobilenetV1/Conv2d_11_depthwise/depthwise: 3,456 221,184 -MobilenetV1/Conv2d_11_pointwise/Conv2D: 147,456 9,437,184 -MobilenetV1/Conv2d_12_depthwise/depthwise: 3,456 55,296 -MobilenetV1/Conv2d_12_pointwise/Conv2D: 294,912 4,718,592 -MobilenetV1/Conv2d_13_depthwise/depthwise: 6,912 110,592 -MobilenetV1/Conv2d_13_pointwise/Conv2D: 589,824 9,437,184 --------------------------------------------------------------------------------- -Total: 1,800,144 106,002,432 - -""" - -# Tensorflow mandates these. -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -from collections import namedtuple -import functools - -import tensorflow as tf - -slim = tf.contrib.slim - -# Conv and DepthSepConv namedtuple define layers of the MobileNet architecture -# Conv defines 3x3 convolution layers -# DepthSepConv defines 3x3 depthwise convolution followed by 1x1 convolution. -# stride is the stride of the convolution -# depth is the number of channels or filters in a layer -Conv = namedtuple('Conv', ['kernel', 'stride', 'depth']) -DepthSepConv = namedtuple('DepthSepConv', ['kernel', 'stride', 'depth']) - -# _CONV_DEFS specifies the MobileNet body -_CONV_DEFS = [ - Conv(kernel=[3, 3], stride=2, depth=32), - DepthSepConv(kernel=[3, 3], stride=1, depth=64), - DepthSepConv(kernel=[3, 3], stride=2, depth=128), - DepthSepConv(kernel=[3, 3], stride=1, depth=128), - DepthSepConv(kernel=[3, 3], stride=2, depth=256), - DepthSepConv(kernel=[3, 3], stride=1, depth=256), - DepthSepConv(kernel=[3, 3], stride=2, depth=512), - DepthSepConv(kernel=[3, 3], stride=1, depth=512), - DepthSepConv(kernel=[3, 3], stride=1, depth=512), - DepthSepConv(kernel=[3, 3], stride=1, depth=512), - DepthSepConv(kernel=[3, 3], stride=1, depth=512), - DepthSepConv(kernel=[3, 3], stride=1, depth=512), - DepthSepConv(kernel=[3, 3], stride=2, depth=1024), - DepthSepConv(kernel=[3, 3], stride=1, depth=1024) -] - - -def mobilenet_v1_base(inputs, - final_endpoint='Conv2d_13_pointwise', - min_depth=8, - depth_multiplier=1.0, - conv_defs=None, - output_stride=None, - scope=None): - """Mobilenet v1. - - Constructs a Mobilenet v1 network from inputs to the given final endpoint. - - Args: - inputs: a tensor of shape [batch_size, height, width, channels]. - final_endpoint: specifies the endpoint to construct the network up to. It - can be one of ['Conv2d_0', 'Conv2d_1_pointwise', 'Conv2d_2_pointwise', - 'Conv2d_3_pointwise', 'Conv2d_4_pointwise', 'Conv2d_5'_pointwise, - 'Conv2d_6_pointwise', 'Conv2d_7_pointwise', 'Conv2d_8_pointwise', - 'Conv2d_9_pointwise', 'Conv2d_10_pointwise', 'Conv2d_11_pointwise', - 'Conv2d_12_pointwise', 'Conv2d_13_pointwise']. - min_depth: Minimum depth value (number of channels) for all convolution ops. - Enforced when depth_multiplier < 1, and not an active constraint when - depth_multiplier >= 1. - depth_multiplier: Float multiplier for the depth (number of channels) - for all convolution ops. The value must be greater than zero. Typical - usage will be to set this value in (0, 1) to reduce the number of - parameters or computation cost of the model. - conv_defs: A list of ConvDef namedtuples specifying the net architecture. - output_stride: An integer that specifies the requested ratio of input to - output spatial resolution. If not None, then we invoke atrous convolution - if necessary to prevent the network from reducing the spatial resolution - of the activation maps. Allowed values are 8 (accurate fully convolutional - mode), 16 (fast fully convolutional mode), 32 (classification mode). - scope: Optional variable_scope. - - Returns: - tensor_out: output tensor corresponding to the final_endpoint. - end_points: a set of activations for external use, for example summaries or - losses. - - Raises: - ValueError: if final_endpoint is not set to one of the predefined values, - or depth_multiplier <= 0, or the target output_stride is not - allowed. - """ - def depth(d): return max(int(d * depth_multiplier), min_depth) - end_points = {} - - # Used to find thinned depths for each layer. - if depth_multiplier <= 0: - raise ValueError('depth_multiplier is not greater than zero.') - - if conv_defs is None: - conv_defs = _CONV_DEFS - - if output_stride is not None and output_stride not in [8, 16, 32]: - raise ValueError('Only allowed output_stride values are 8, 16, 32.') - - with tf.variable_scope(scope, 'MobilenetV1', [inputs]): - with slim.arg_scope([slim.conv2d, slim.separable_conv2d], padding='SAME'): - # The current_stride variable keeps track of the output stride of the - # activations, i.e., the running product of convolution strides up to the - # current network layer. This allows us to invoke atrous convolution - # whenever applying the next convolution would result in the activations - # having output stride larger than the target output_stride. - current_stride = 1 - - # The atrous convolution rate parameter. - rate = 1 - - net = inputs - for i, conv_def in enumerate(conv_defs): - end_point_base = 'Conv2d_%d' % i - - if output_stride is not None and current_stride == output_stride: - # If we have reached the target output_stride, then we need to employ - # atrous convolution with stride=1 and multiply the atrous rate by the - # current unit's stride for use in subsequent layers. - layer_stride = 1 - layer_rate = rate - rate *= conv_def.stride - else: - layer_stride = conv_def.stride - layer_rate = 1 - current_stride *= conv_def.stride - - if isinstance(conv_def, Conv): - end_point = end_point_base - net = slim.conv2d(net, depth(conv_def.depth), conv_def.kernel, - stride=conv_def.stride, - normalizer_fn=slim.batch_norm, - scope=end_point) - end_points[end_point] = net - if end_point == final_endpoint: - return net, end_points - - elif isinstance(conv_def, DepthSepConv): - end_point = end_point_base + '_depthwise' - - # By passing filters=None - # separable_conv2d produces only a depthwise convolution layer - net = slim.separable_conv2d(net, None, conv_def.kernel, - depth_multiplier=1, - stride=layer_stride, - rate=layer_rate, - normalizer_fn=slim.batch_norm, - scope=end_point) - - end_points[end_point] = net - if end_point == final_endpoint: - return net, end_points - - end_point = end_point_base + '_pointwise' - - net = slim.conv2d(net, depth(conv_def.depth), [1, 1], - stride=1, - normalizer_fn=slim.batch_norm, - scope=end_point) - - end_points[end_point] = net - if end_point == final_endpoint: - return net, end_points - else: - raise ValueError('Unknown convolution type %s for layer %d' - % (conv_def.ltype, i)) - raise ValueError('Unknown final endpoint %s' % final_endpoint) - - -def mobilenet_v1(inputs, - num_classes=1000, - dropout_keep_prob=0.999, - is_training=True, - min_depth=8, - depth_multiplier=1.0, - conv_defs=None, - prediction_fn=tf.contrib.layers.softmax, - spatial_squeeze=True, - reuse=None, - scope='MobilenetV1', - global_pool=False): - """Mobilenet v1 model for classification. - - Args: - inputs: a tensor of shape [batch_size, height, width, channels]. - num_classes: number of predicted classes. If 0 or None, the logits layer - is omitted and the input features to the logits layer (before dropout) - are returned instead. - dropout_keep_prob: the percentage of activation values that are retained. - is_training: whether is training or not. - min_depth: Minimum depth value (number of channels) for all convolution ops. - Enforced when depth_multiplier < 1, and not an active constraint when - depth_multiplier >= 1. - depth_multiplier: Float multiplier for the depth (number of channels) - for all convolution ops. The value must be greater than zero. Typical - usage will be to set this value in (0, 1) to reduce the number of - parameters or computation cost of the model. - conv_defs: A list of ConvDef namedtuples specifying the net architecture. - prediction_fn: a function to get predictions out of logits. - spatial_squeeze: if True, logits is of shape is [B, C], if false logits is - of shape [B, 1, 1, C], where B is batch_size and C is number of classes. - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - global_pool: Optional boolean flag to control the avgpooling before the - logits layer. If false or unset, pooling is done with a fixed window - that reduces default-sized inputs to 1x1, while larger inputs lead to - larger outputs. If true, any input size is pooled down to 1x1. - - Returns: - net: a 2D Tensor with the logits (pre-softmax activations) if num_classes - is a non-zero integer, or the non-dropped-out input to the logits layer - if num_classes is 0 or None. - end_points: a dictionary from components of the network to the corresponding - activation. - - Raises: - ValueError: Input rank is invalid. - """ - input_shape = inputs.get_shape().as_list() - if len(input_shape) != 4: - raise ValueError('Invalid input tensor rank, expected 4, was: %d' % - len(input_shape)) - - with tf.variable_scope(scope, 'MobilenetV1', [inputs], reuse=reuse) as scope: - with slim.arg_scope([slim.batch_norm, slim.dropout], - is_training=is_training): - net, end_points = mobilenet_v1_base(inputs, scope=scope, - min_depth=min_depth, - depth_multiplier=depth_multiplier, - conv_defs=conv_defs) - with tf.variable_scope('Logits'): - if global_pool: - # Global average pooling. - net = tf.reduce_mean( - net, [1, 2], keep_dims=True, name='global_pool') - end_points['global_pool'] = net - else: - # Pooling with a fixed kernel size. - kernel_size = _reduced_kernel_size_for_small_input(net, [ - 7, 7]) - net = slim.avg_pool2d(net, kernel_size, padding='VALID', - scope='AvgPool_1a') - end_points['AvgPool_1a'] = net - if not num_classes: - return net, end_points - # 1 x 1 x 1024 - net = slim.dropout( - net, keep_prob=dropout_keep_prob, scope='Dropout_1b') - logits = slim.conv2d(net, num_classes, [1, 1], activation_fn=None, - normalizer_fn=None, scope='Conv2d_1c_1x1') - if spatial_squeeze: - logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze') - end_points['Logits'] = logits - if prediction_fn: - end_points['Predictions'] = prediction_fn( - logits, scope='Predictions') - return logits, end_points - - -mobilenet_v1.default_image_size = 224 - - -def wrapped_partial(func, *args, **kwargs): - partial_func = functools.partial(func, *args, **kwargs) - functools.update_wrapper(partial_func, func) - return partial_func - - -mobilenet_v1_075 = wrapped_partial(mobilenet_v1, depth_multiplier=0.75) -mobilenet_v1_050 = wrapped_partial(mobilenet_v1, depth_multiplier=0.50) -mobilenet_v1_025 = wrapped_partial(mobilenet_v1, depth_multiplier=0.25) - - -def _reduced_kernel_size_for_small_input(input_tensor, kernel_size): - """Define kernel size which is automatically reduced for small input. - - If the shape of the input images is unknown at graph construction time this - function assumes that the input images are large enough. - - Args: - input_tensor: input tensor of size [batch_size, height, width, channels]. - kernel_size: desired kernel size of length 2: [kernel_height, kernel_width] - - Returns: - a tensor with the kernel size. - """ - shape = input_tensor.get_shape().as_list() - if shape[1] is None or shape[2] is None: - kernel_size_out = kernel_size - else: - kernel_size_out = [min(shape[1], kernel_size[0]), - min(shape[2], kernel_size[1])] - return kernel_size_out - - -def mobilenet_v1_arg_scope(is_training=True, - weight_decay=0.00004, - stddev=0.09, - regularize_depthwise=False): - """Defines the default MobilenetV1 arg scope. - - Args: - is_training: Whether or not we're training the model. - weight_decay: The weight decay to use for regularizing the model. - stddev: The standard deviation of the trunctated normal weight initializer. - regularize_depthwise: Whether or not apply regularization on depthwise. - - Returns: - An `arg_scope` to use for the mobilenet v1 model. - """ - batch_norm_params = { - 'is_training': is_training, - 'center': True, - 'scale': True, - 'decay': 0.9997, - 'epsilon': 0.001, - } - - # Set weight_decay for weights in Conv and DepthSepConv layers. - weights_init = tf.truncated_normal_initializer(stddev=stddev) - regularizer = tf.contrib.layers.l2_regularizer(weight_decay) - if regularize_depthwise: - depthwise_regularizer = regularizer - else: - depthwise_regularizer = None - with slim.arg_scope([slim.conv2d, slim.separable_conv2d], - weights_initializer=weights_init, - activation_fn=tf.nn.relu6, normalizer_fn=slim.batch_norm): - with slim.arg_scope([slim.batch_norm], **batch_norm_params): - with slim.arg_scope([slim.conv2d], weights_regularizer=regularizer): - with slim.arg_scope([slim.separable_conv2d], - weights_regularizer=depthwise_regularizer) as sc: - return sc -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Contains the definition for the NASNet classification networks. - -Paper: https://arxiv.org/abs/1707.07012 -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - -from . import nasnet_utils - -arg_scope = tf.contrib.framework.arg_scope -slim = tf.contrib.slim - - -# Notes for training NASNet Cifar Model -# ------------------------------------- -# batch_size: 32 -# learning rate: 0.025 -# cosine (single period) learning rate decay -# auxiliary head loss weighting: 0.4 -# clip global norm of all gradients by 5 -def _cifar_config(is_training=True): - drop_path_keep_prob = 1.0 if not is_training else 0.6 - return tf.contrib.training.HParams( - stem_multiplier=3.0, - drop_path_keep_prob=drop_path_keep_prob, - num_cells=18, - use_aux_head=1, - num_conv_filters=32, - dense_dropout_keep_prob=1.0, - filter_scaling_rate=2.0, - num_reduction_layers=2, - data_format='NHWC', - skip_reduction_layer_input=0, - # 600 epochs with a batch size of 32 - # This is used for the drop path probabilities since it needs to increase - # the drop out probability over the course of training. - total_training_steps=937500, - ) - - -# Notes for training large NASNet model on ImageNet -# ------------------------------------- -# batch size (per replica): 16 -# learning rate: 0.015 * 100 -# learning rate decay factor: 0.97 -# num epochs per decay: 2.4 -# sync sgd with 100 replicas -# auxiliary head loss weighting: 0.4 -# label smoothing: 0.1 -# clip global norm of all gradients by 10 -def _large_imagenet_config(is_training=True): - drop_path_keep_prob = 1.0 if not is_training else 0.7 - return tf.contrib.training.HParams( - stem_multiplier=3.0, - dense_dropout_keep_prob=0.5, - num_cells=18, - filter_scaling_rate=2.0, - num_conv_filters=168, - drop_path_keep_prob=drop_path_keep_prob, - use_aux_head=1, - num_reduction_layers=2, - data_format='NHWC', - skip_reduction_layer_input=1, - total_training_steps=250000, - ) - - -# Notes for training the mobile NASNet ImageNet model -# ------------------------------------- -# batch size (per replica): 32 -# learning rate: 0.04 * 50 -# learning rate scaling factor: 0.97 -# num epochs per decay: 2.4 -# sync sgd with 50 replicas -# auxiliary head weighting: 0.4 -# label smoothing: 0.1 -# clip global norm of all gradients by 10 -def _mobile_imagenet_config(): - return tf.contrib.training.HParams( - stem_multiplier=1.0, - dense_dropout_keep_prob=0.5, - num_cells=12, - filter_scaling_rate=2.0, - drop_path_keep_prob=1.0, - num_conv_filters=44, - use_aux_head=1, - num_reduction_layers=2, - data_format='NHWC', - skip_reduction_layer_input=0, - total_training_steps=250000, - ) - - -def nasnet_cifar_arg_scope(weight_decay=5e-4, - batch_norm_decay=0.9, - batch_norm_epsilon=1e-5): - """Defines the default arg scope for the NASNet-A Cifar model. - - Args: - weight_decay: The weight decay to use for regularizing the model. - batch_norm_decay: Decay for batch norm moving average. - batch_norm_epsilon: Small float added to variance to avoid dividing by zero - in batch norm. - - Returns: - An `arg_scope` to use for the NASNet Cifar Model. - """ - batch_norm_params = { - # Decay for the moving averages. - 'decay': batch_norm_decay, - # epsilon to prevent 0s in variance. - 'epsilon': batch_norm_epsilon, - 'scale': True, - 'fused': True, - } - weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) - weights_initializer = tf.contrib.layers.variance_scaling_initializer( - mode='FAN_OUT') - with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d], - weights_regularizer=weights_regularizer, - weights_initializer=weights_initializer): - with arg_scope([slim.fully_connected], - activation_fn=None, scope='FC'): - with arg_scope([slim.conv2d, slim.separable_conv2d], - activation_fn=None, biases_initializer=None): - with arg_scope([slim.batch_norm], **batch_norm_params) as sc: - return sc - - -def nasnet_mobile_arg_scope(weight_decay=4e-5, - batch_norm_decay=0.9997, - batch_norm_epsilon=1e-3): - """Defines the default arg scope for the NASNet-A Mobile ImageNet model. - - Args: - weight_decay: The weight decay to use for regularizing the model. - batch_norm_decay: Decay for batch norm moving average. - batch_norm_epsilon: Small float added to variance to avoid dividing by zero - in batch norm. - - Returns: - An `arg_scope` to use for the NASNet Mobile Model. - """ - batch_norm_params = { - # Decay for the moving averages. - 'decay': batch_norm_decay, - # epsilon to prevent 0s in variance. - 'epsilon': batch_norm_epsilon, - 'scale': True, - 'fused': True, - } - weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) - weights_initializer = tf.contrib.layers.variance_scaling_initializer( - mode='FAN_OUT') - with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d], - weights_regularizer=weights_regularizer, - weights_initializer=weights_initializer): - with arg_scope([slim.fully_connected], - activation_fn=None, scope='FC'): - with arg_scope([slim.conv2d, slim.separable_conv2d], - activation_fn=None, biases_initializer=None): - with arg_scope([slim.batch_norm], **batch_norm_params) as sc: - return sc - - -def nasnet_large_arg_scope(weight_decay=5e-5, - batch_norm_decay=0.9997, - batch_norm_epsilon=1e-3): - """Defines the default arg scope for the NASNet-A Large ImageNet model. - - Args: - weight_decay: The weight decay to use for regularizing the model. - batch_norm_decay: Decay for batch norm moving average. - batch_norm_epsilon: Small float added to variance to avoid dividing by zero - in batch norm. - - Returns: - An `arg_scope` to use for the NASNet Large Model. - """ - batch_norm_params = { - # Decay for the moving averages. - 'decay': batch_norm_decay, - # epsilon to prevent 0s in variance. - 'epsilon': batch_norm_epsilon, - 'scale': True, - 'fused': True, - } - weights_regularizer = tf.contrib.layers.l2_regularizer(weight_decay) - weights_initializer = tf.contrib.layers.variance_scaling_initializer( - mode='FAN_OUT') - with arg_scope([slim.fully_connected, slim.conv2d, slim.separable_conv2d], - weights_regularizer=weights_regularizer, - weights_initializer=weights_initializer): - with arg_scope([slim.fully_connected], - activation_fn=None, scope='FC'): - with arg_scope([slim.conv2d, slim.separable_conv2d], - activation_fn=None, biases_initializer=None): - with arg_scope([slim.batch_norm], **batch_norm_params) as sc: - return sc - - -def _build_aux_head(net, end_points, num_classes, hparams, scope): - """Auxiliary head used for all models across all datasets.""" - with tf.variable_scope(scope): - aux_logits = tf.identity(net) - with tf.variable_scope('aux_logits'): - aux_logits = slim.avg_pool2d( - aux_logits, [5, 5], stride=3, padding='VALID') - aux_logits = slim.conv2d(aux_logits, 128, [1, 1], scope='proj') - aux_logits = slim.batch_norm(aux_logits, scope='aux_bn0') - aux_logits = tf.nn.relu(aux_logits) - # Shape of feature map before the final layer. - shape = aux_logits.shape - if hparams.data_format == 'NHWC': - shape = shape[1:3] - else: - shape = shape[2:4] - aux_logits = slim.conv2d(aux_logits, 768, shape, padding='VALID') - aux_logits = slim.batch_norm(aux_logits, scope='aux_bn1') - aux_logits = tf.nn.relu(aux_logits) - aux_logits = tf.contrib.layers.flatten(aux_logits) - aux_logits = slim.fully_connected(aux_logits, num_classes) - end_points['AuxLogits'] = aux_logits - - -def _imagenet_stem(inputs, hparams, stem_cell): - """Stem used for models trained on ImageNet.""" - num_stem_cells = 2 - - # 149 x 149 x 32 - num_stem_filters = int(32 * hparams.stem_multiplier) - net = slim.conv2d( - inputs, num_stem_filters, [3, 3], stride=2, scope='conv0', - padding='VALID') - net = slim.batch_norm(net, scope='conv0_bn') - - # Run the reduction cells - cell_outputs = [None, net] - filter_scaling = 1.0 / (hparams.filter_scaling_rate**num_stem_cells) - for cell_num in range(num_stem_cells): - net = stem_cell( - net, - scope='cell_stem_{}'.format(cell_num), - filter_scaling=filter_scaling, - stride=2, - prev_layer=cell_outputs[-2], - cell_num=cell_num) - cell_outputs.append(net) - filter_scaling *= hparams.filter_scaling_rate - return net, cell_outputs - - -def _cifar_stem(inputs, hparams): - """Stem used for models trained on Cifar.""" - num_stem_filters = int(hparams.num_conv_filters * hparams.stem_multiplier) - net = slim.conv2d( - inputs, - num_stem_filters, - 3, - scope='l1_stem_3x3') - net = slim.batch_norm(net, scope='l1_stem_bn') - return net, [None, net] - - -def build_nasnet_cifar( - images, num_classes, is_training=True): - """Build NASNet model for the Cifar Dataset.""" - hparams = _cifar_config(is_training=is_training) - - if tf.test.is_gpu_available() and hparams.data_format == 'NHWC': - tf.logging.info('A GPU is available on the machine, consider using NCHW ' - 'data format for increased speed on GPU.') - - if hparams.data_format == 'NCHW': - images = tf.transpose(images, [0, 3, 1, 2]) - - # Calculate the total number of cells in the network - # Add 2 for the reduction cells - total_num_cells = hparams.num_cells + 2 - - normal_cell = nasnet_utils.NasNetANormalCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - reduction_cell = nasnet_utils.NasNetAReductionCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - with arg_scope([slim.dropout, nasnet_utils.drop_path, slim.batch_norm], - is_training=is_training): - with arg_scope([slim.avg_pool2d, - slim.max_pool2d, - slim.conv2d, - slim.batch_norm, - slim.separable_conv2d, - nasnet_utils.factorized_reduction, - nasnet_utils.global_avg_pool, - nasnet_utils.get_channel_index, - nasnet_utils.get_channel_dim], - data_format=hparams.data_format): - return _build_nasnet_base(images, - normal_cell=normal_cell, - reduction_cell=reduction_cell, - num_classes=num_classes, - hparams=hparams, - is_training=is_training, - stem_type='cifar') - - -build_nasnet_cifar.default_image_size = 32 - - -def build_nasnet_mobile(images, num_classes, - is_training=True, - final_endpoint=None): - """Build NASNet Mobile model for the ImageNet Dataset.""" - hparams = _mobile_imagenet_config() - - if tf.test.is_gpu_available() and hparams.data_format == 'NHWC': - tf.logging.info('A GPU is available on the machine, consider using NCHW ' - 'data format for increased speed on GPU.') - - if hparams.data_format == 'NCHW': - images = tf.transpose(images, [0, 3, 1, 2]) - - # Calculate the total number of cells in the network - # Add 2 for the reduction cells - total_num_cells = hparams.num_cells + 2 - # If ImageNet, then add an additional two for the stem cells - total_num_cells += 2 - - normal_cell = nasnet_utils.NasNetANormalCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - reduction_cell = nasnet_utils.NasNetAReductionCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - with arg_scope([slim.dropout, nasnet_utils.drop_path, slim.batch_norm], - is_training=is_training): - with arg_scope([slim.avg_pool2d, - slim.max_pool2d, - slim.conv2d, - slim.batch_norm, - slim.separable_conv2d, - nasnet_utils.factorized_reduction, - nasnet_utils.global_avg_pool, - nasnet_utils.get_channel_index, - nasnet_utils.get_channel_dim], - data_format=hparams.data_format): - return _build_nasnet_base(images, - normal_cell=normal_cell, - reduction_cell=reduction_cell, - num_classes=num_classes, - hparams=hparams, - is_training=is_training, - stem_type='imagenet', - final_endpoint=final_endpoint) - - -build_nasnet_mobile.default_image_size = 224 - - -def build_nasnet_large(images, num_classes, - is_training=True, - final_endpoint=None): - """Build NASNet Large model for the ImageNet Dataset.""" - hparams = _large_imagenet_config(is_training=is_training) - - if tf.test.is_gpu_available() and hparams.data_format == 'NHWC': - tf.logging.info('A GPU is available on the machine, consider using NCHW ' - 'data format for increased speed on GPU.') - - if hparams.data_format == 'NCHW': - images = tf.transpose(images, [0, 3, 1, 2]) - - # Calculate the total number of cells in the network - # Add 2 for the reduction cells - total_num_cells = hparams.num_cells + 2 - # If ImageNet, then add an additional two for the stem cells - total_num_cells += 2 - - normal_cell = nasnet_utils.NasNetANormalCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - reduction_cell = nasnet_utils.NasNetAReductionCell( - hparams.num_conv_filters, hparams.drop_path_keep_prob, - total_num_cells, hparams.total_training_steps) - with arg_scope([slim.dropout, nasnet_utils.drop_path, slim.batch_norm], - is_training=is_training): - with arg_scope([slim.avg_pool2d, - slim.max_pool2d, - slim.conv2d, - slim.batch_norm, - slim.separable_conv2d, - nasnet_utils.factorized_reduction, - nasnet_utils.global_avg_pool, - nasnet_utils.get_channel_index, - nasnet_utils.get_channel_dim], - data_format=hparams.data_format): - return _build_nasnet_base(images, - normal_cell=normal_cell, - reduction_cell=reduction_cell, - num_classes=num_classes, - hparams=hparams, - is_training=is_training, - stem_type='imagenet', - final_endpoint=final_endpoint) - - -build_nasnet_large.default_image_size = 331 - - -def _build_nasnet_base(images, - normal_cell, - reduction_cell, - num_classes, - hparams, - is_training, - stem_type, - final_endpoint=None): - """Constructs a NASNet image model.""" - - end_points = {} - - def add_and_check_endpoint(endpoint_name, net): - end_points[endpoint_name] = net - return final_endpoint and (endpoint_name == final_endpoint) - - # Find where to place the reduction cells or stride normal cells - reduction_indices = nasnet_utils.calc_reduction_layers( - hparams.num_cells, hparams.num_reduction_layers) - stem_cell = reduction_cell - - if stem_type == 'imagenet': - def stem(): return _imagenet_stem(images, hparams, stem_cell) - elif stem_type == 'cifar': - def stem(): return _cifar_stem(images, hparams) - else: - raise ValueError('Unknown stem_type: ', stem_type) - net, cell_outputs = stem() - if add_and_check_endpoint('Stem', net): - return net, end_points - - # Setup for building in the auxiliary head. - aux_head_cell_idxes = [] - if len(reduction_indices) >= 2: - aux_head_cell_idxes.append(reduction_indices[1] - 1) - - # Run the cells - filter_scaling = 1.0 - # true_cell_num accounts for the stem cells - true_cell_num = 2 if stem_type == 'imagenet' else 0 - for cell_num in range(hparams.num_cells): - stride = 1 - if hparams.skip_reduction_layer_input: - prev_layer = cell_outputs[-2] - if cell_num in reduction_indices: - filter_scaling *= hparams.filter_scaling_rate - net = reduction_cell( - net, - scope='reduction_cell_{}'.format( - reduction_indices.index(cell_num)), - filter_scaling=filter_scaling, - stride=2, - prev_layer=cell_outputs[-2], - cell_num=true_cell_num) - if add_and_check_endpoint( - 'Reduction_Cell_{}'.format(reduction_indices.index(cell_num)), net): - return net, end_points - true_cell_num += 1 - cell_outputs.append(net) - if not hparams.skip_reduction_layer_input: - prev_layer = cell_outputs[-2] - net = normal_cell( - net, - scope='cell_{}'.format(cell_num), - filter_scaling=filter_scaling, - stride=stride, - prev_layer=prev_layer, - cell_num=true_cell_num) - - if add_and_check_endpoint('Cell_{}'.format(cell_num), net): - return net, end_points - true_cell_num += 1 - if (hparams.use_aux_head and cell_num in aux_head_cell_idxes and - num_classes and is_training): - aux_net = tf.nn.relu(net) - _build_aux_head(aux_net, end_points, num_classes, hparams, - scope='aux_{}'.format(cell_num)) - cell_outputs.append(net) - - # Final softmax layer - with tf.variable_scope('final_layer'): - net = tf.nn.relu(net) - net = nasnet_utils.global_avg_pool(net) - if add_and_check_endpoint('global_pool', net) or num_classes is None: - return net, end_points - net = slim.dropout( - net, hparams.dense_dropout_keep_prob, scope='dropout') - logits = slim.fully_connected(net, num_classes) - - if add_and_check_endpoint('Logits', logits): - return net, end_points - - predictions = tf.nn.softmax(logits, name='predictions') - if add_and_check_endpoint('Predictions', predictions): - return net, end_points - return logits, end_points -# Copyright 2017 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""A custom module for some common operations used by NASNet. - -Functions exposed in this file: -- calc_reduction_layers -- get_channel_index -- get_channel_dim -- global_avg_pool -- factorized_reduction -- drop_path - -Classes exposed in this file: -- NasNetABaseCell -- NasNetANormalCell -- NasNetAReductionCell -""" -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import tensorflow as tf - - -arg_scope = tf.contrib.framework.arg_scope -slim = tf.contrib.slim - -DATA_FORMAT_NCHW = 'NCHW' -DATA_FORMAT_NHWC = 'NHWC' -INVALID = 'null' - - -def calc_reduction_layers(num_cells, num_reduction_layers): - """Figure out what layers should have reductions.""" - reduction_layers = [] - for pool_num in range(1, num_reduction_layers + 1): - layer_num = (float(pool_num) / (num_reduction_layers + 1)) * num_cells - layer_num = int(layer_num) - reduction_layers.append(layer_num) - return reduction_layers - - -@tf.contrib.framework.add_arg_scope -def get_channel_index(data_format=INVALID): - assert data_format != INVALID - axis = 3 if data_format == 'NHWC' else 1 - return axis - - -@tf.contrib.framework.add_arg_scope -def get_channel_dim(shape, data_format=INVALID): - assert data_format != INVALID - assert len(shape) == 4 - if data_format == 'NHWC': - return int(shape[3]) - elif data_format == 'NCHW': - return int(shape[1]) - else: - raise ValueError('Not a valid data_format', data_format) - - -@tf.contrib.framework.add_arg_scope -def global_avg_pool(x, data_format=INVALID): - """Average pool away the height and width spatial dimensions of x.""" - assert data_format != INVALID - assert data_format in ['NHWC', 'NCHW'] - assert x.shape.ndims == 4 - if data_format == 'NHWC': - return tf.reduce_mean(x, [1, 2]) - else: - return tf.reduce_mean(x, [2, 3]) - - -@tf.contrib.framework.add_arg_scope -def factorized_reduction(net, output_filters, stride, data_format=INVALID): - """Reduces the shape of net without information loss due to striding.""" - assert output_filters % 2 == 0, ( - 'Need even number of filters when using this factorized reduction.') - assert data_format != INVALID - if stride == 1: - net = slim.conv2d(net, output_filters, 1, scope='path_conv') - net = slim.batch_norm(net, scope='path_bn') - return net - if data_format == 'NHWC': - stride_spec = [1, stride, stride, 1] - else: - stride_spec = [1, 1, stride, stride] - - # Skip path 1 - path1 = tf.nn.avg_pool( - net, [1, 1, 1, 1], stride_spec, 'VALID', data_format=data_format) - path1 = slim.conv2d(path1, int(output_filters / 2), 1, scope='path1_conv') - - # Skip path 2 - # First pad with 0's on the right and bottom, then shift the filter to - # include those 0's that were added. - if data_format == 'NHWC': - pad_arr = [[0, 0], [0, 1], [0, 1], [0, 0]] - path2 = tf.pad(net, pad_arr)[:, 1:, 1:, :] - concat_axis = 3 - else: - pad_arr = [[0, 0], [0, 0], [0, 1], [0, 1]] - path2 = tf.pad(net, pad_arr)[:, :, 1:, 1:] - concat_axis = 1 - - path2 = tf.nn.avg_pool( - path2, [1, 1, 1, 1], stride_spec, 'VALID', data_format=data_format) - path2 = slim.conv2d(path2, int(output_filters / 2), 1, scope='path2_conv') - - # Concat and apply BN - final_path = tf.concat(values=[path1, path2], axis=concat_axis) - final_path = slim.batch_norm(final_path, scope='final_path_bn') - return final_path - - -@tf.contrib.framework.add_arg_scope -def drop_path(net, keep_prob, is_training=True): - """Drops out a whole example hiddenstate with the specified probability.""" - if is_training: - batch_size = tf.shape(net)[0] - noise_shape = [batch_size, 1, 1, 1] - random_tensor = keep_prob - random_tensor += tf.random_uniform(noise_shape, dtype=tf.float32) - binary_tensor = tf.floor(random_tensor) - net = tf.div(net, keep_prob) * binary_tensor - return net - - -def _operation_to_filter_shape(operation): - splitted_operation = operation.split('x') - filter_shape = int(splitted_operation[0][-1]) - assert filter_shape == int( - splitted_operation[1][0]), 'Rectangular filters not supported.' - return filter_shape - - -def _operation_to_num_layers(operation): - splitted_operation = operation.split('_') - if 'x' in splitted_operation[-1]: - return 1 - return int(splitted_operation[-1]) - - -def _operation_to_info(operation): - """Takes in operation name and returns meta information. - - An example would be 'separable_3x3_4' -> (3, 4). - - Args: - operation: String that corresponds to convolution operation. - - Returns: - Tuple of (filter shape, num layers). - """ - num_layers = _operation_to_num_layers(operation) - filter_shape = _operation_to_filter_shape(operation) - return num_layers, filter_shape - - -def _stacked_separable_conv(net, stride, operation, filter_size): - """Takes in an operations and parses it to the correct sep operation.""" - num_layers, kernel_size = _operation_to_info(operation) - for layer_num in range(num_layers - 1): - net = tf.nn.relu(net) - net = slim.separable_conv2d( - net, - filter_size, - kernel_size, - depth_multiplier=1, - scope='separable_{0}x{0}_{1}'.format(kernel_size, layer_num + 1), - stride=stride) - net = slim.batch_norm( - net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, layer_num + 1)) - stride = 1 - net = tf.nn.relu(net) - net = slim.separable_conv2d( - net, - filter_size, - kernel_size, - depth_multiplier=1, - scope='separable_{0}x{0}_{1}'.format(kernel_size, num_layers), - stride=stride) - net = slim.batch_norm( - net, scope='bn_sep_{0}x{0}_{1}'.format(kernel_size, num_layers)) - return net - - -def _operation_to_pooling_type(operation): - """Takes in the operation string and returns the pooling type.""" - splitted_operation = operation.split('_') - return splitted_operation[0] - - -def _operation_to_pooling_shape(operation): - """Takes in the operation string and returns the pooling kernel shape.""" - splitted_operation = operation.split('_') - shape = splitted_operation[-1] - assert 'x' in shape - filter_height, filter_width = shape.split('x') - assert filter_height == filter_width - return int(filter_height) - - -def _operation_to_pooling_info(operation): - """Parses the pooling operation string to return its type and shape.""" - pooling_type = _operation_to_pooling_type(operation) - pooling_shape = _operation_to_pooling_shape(operation) - return pooling_type, pooling_shape - - -def _pooling(net, stride, operation): - """Parses operation and performs the correct pooling operation on net.""" - padding = 'SAME' - pooling_type, pooling_shape = _operation_to_pooling_info(operation) - if pooling_type == 'avg': - net = slim.avg_pool2d(net, pooling_shape, - stride=stride, padding=padding) - elif pooling_type == 'max': - net = slim.max_pool2d(net, pooling_shape, - stride=stride, padding=padding) - else: - raise NotImplementedError('Unimplemented pooling type: ', pooling_type) - return net - - -class NasNetABaseCell(object): - """NASNet Cell class that is used as a 'layer' in image architectures. - - Args: - num_conv_filters: The number of filters for each convolution operation. - operations: List of operations that are performed in the NASNet Cell in - order. - used_hiddenstates: Binary array that signals if the hiddenstate was used - within the cell. This is used to determine what outputs of the cell - should be concatenated together. - hiddenstate_indices: Determines what hiddenstates should be combined - together with the specified operations to create the NASNet cell. - """ - - def __init__(self, num_conv_filters, operations, used_hiddenstates, - hiddenstate_indices, drop_path_keep_prob, total_num_cells, - total_training_steps): - self._num_conv_filters = num_conv_filters - self._operations = operations - self._used_hiddenstates = used_hiddenstates - self._hiddenstate_indices = hiddenstate_indices - self._drop_path_keep_prob = drop_path_keep_prob - self._total_num_cells = total_num_cells - self._total_training_steps = total_training_steps - - def _reduce_prev_layer(self, prev_layer, curr_layer): - """Matches dimension of prev_layer to the curr_layer.""" - # Set the prev layer to the current layer if it is none - if prev_layer is None: - return curr_layer - curr_num_filters = self._filter_size - prev_num_filters = get_channel_dim(prev_layer.shape) - curr_filter_shape = int(curr_layer.shape[2]) - prev_filter_shape = int(prev_layer.shape[2]) - if curr_filter_shape != prev_filter_shape: - prev_layer = tf.nn.relu(prev_layer) - prev_layer = factorized_reduction( - prev_layer, curr_num_filters, stride=2) - elif curr_num_filters != prev_num_filters: - prev_layer = tf.nn.relu(prev_layer) - prev_layer = slim.conv2d( - prev_layer, curr_num_filters, 1, scope='prev_1x1') - prev_layer = slim.batch_norm(prev_layer, scope='prev_bn') - return prev_layer - - def _cell_base(self, net, prev_layer): - """Runs the beginning of the conv cell before the predicted ops are run.""" - num_filters = self._filter_size - - # Check to be sure prev layer stuff is setup correctly - prev_layer = self._reduce_prev_layer(prev_layer, net) - - net = tf.nn.relu(net) - net = slim.conv2d(net, num_filters, 1, scope='1x1') - net = slim.batch_norm(net, scope='beginning_bn') - split_axis = get_channel_index() - net = tf.split( - axis=split_axis, num_or_size_splits=1, value=net) - for split in net: - assert int(split.shape[split_axis] == int(self._num_conv_filters * - self._filter_scaling)) - net.append(prev_layer) - return net - - def __call__(self, net, scope=None, filter_scaling=1, stride=1, - prev_layer=None, cell_num=-1): - """Runs the conv cell.""" - self._cell_num = cell_num - self._filter_scaling = filter_scaling - self._filter_size = int(self._num_conv_filters * filter_scaling) - - i = 0 - with tf.variable_scope(scope): - net = self._cell_base(net, prev_layer) - for iteration in range(5): - with tf.variable_scope('comb_iter_{}'.format(iteration)): - left_hiddenstate_idx, right_hiddenstate_idx = ( - self._hiddenstate_indices[i], - self._hiddenstate_indices[i + 1]) - original_input_left = left_hiddenstate_idx < 2 - original_input_right = right_hiddenstate_idx < 2 - h1 = net[left_hiddenstate_idx] - h2 = net[right_hiddenstate_idx] - - operation_left = self._operations[i] - operation_right = self._operations[i+1] - i += 2 - # Apply conv operations - with tf.variable_scope('left'): - h1 = self._apply_conv_operation(h1, operation_left, - stride, original_input_left) - with tf.variable_scope('right'): - h2 = self._apply_conv_operation(h2, operation_right, - stride, original_input_right) - - # Combine hidden states using 'add'. - with tf.variable_scope('combine'): - h = h1 + h2 - - # Add hiddenstate to the list of hiddenstates we can choose from - net.append(h) - - with tf.variable_scope('cell_output'): - net = self._combine_unused_states(net) - - return net - - def _apply_conv_operation(self, net, operation, - stride, is_from_original_input): - """Applies the predicted conv operation to net.""" - # Dont stride if this is not one of the original hiddenstates - if stride > 1 and not is_from_original_input: - stride = 1 - input_filters = get_channel_dim(net.shape) - filter_size = self._filter_size - if 'separable' in operation: - net = _stacked_separable_conv(net, stride, operation, filter_size) - elif operation in ['none']: - # Check if a stride is needed, then use a strided 1x1 here - if stride > 1 or (input_filters != filter_size): - net = tf.nn.relu(net) - net = slim.conv2d(net, filter_size, 1, - stride=stride, scope='1x1') - net = slim.batch_norm(net, scope='bn_1') - elif 'pool' in operation: - net = _pooling(net, stride, operation) - if input_filters != filter_size: - net = slim.conv2d(net, filter_size, 1, stride=1, scope='1x1') - net = slim.batch_norm(net, scope='bn_1') - else: - raise ValueError('Unimplemented operation', operation) - - if operation != 'none': - net = self._apply_drop_path(net) - return net - - def _combine_unused_states(self, net): - """Concatenate the unused hidden states of the cell.""" - used_hiddenstates = self._used_hiddenstates - - final_height = int(net[-1].shape[2]) - final_num_filters = get_channel_dim(net[-1].shape) - assert len(used_hiddenstates) == len(net) - for idx, used_h in enumerate(used_hiddenstates): - curr_height = int(net[idx].shape[2]) - curr_num_filters = get_channel_dim(net[idx].shape) - - # Determine if a reduction should be applied to make the number of - # filters match. - should_reduce = final_num_filters != curr_num_filters - should_reduce = (final_height != curr_height) or should_reduce - should_reduce = should_reduce and not used_h - if should_reduce: - stride = 2 if final_height != curr_height else 1 - with tf.variable_scope('reduction_{}'.format(idx)): - net[idx] = factorized_reduction( - net[idx], final_num_filters, stride) - - states_to_combine = ( - [h for h, is_used in zip(net, used_hiddenstates) if not is_used]) - - # Return the concat of all the states - concat_axis = get_channel_index() - net = tf.concat(values=states_to_combine, axis=concat_axis) - return net - - def _apply_drop_path(self, net): - """Apply drop_path regularization to net.""" - drop_path_keep_prob = self._drop_path_keep_prob - if drop_path_keep_prob < 1.0: - # Scale keep prob by layer number - assert self._cell_num != -1 - # The added 2 is for the reduction cells - num_cells = self._total_num_cells - layer_ratio = (self._cell_num + 1)/float(num_cells) - with tf.device('/cpu:0'): - tf.summary.scalar('layer_ratio', layer_ratio) - drop_path_keep_prob = 1 - layer_ratio * (1 - drop_path_keep_prob) - # Decrease the keep probability over time - current_step = tf.cast(tf.train.get_or_create_global_step(), - tf.float32) - drop_path_burn_in_steps = self._total_training_steps - current_ratio = ( - current_step / drop_path_burn_in_steps) - current_ratio = tf.minimum(1.0, current_ratio) - with tf.device('/cpu:0'): - tf.summary.scalar('current_ratio', current_ratio) - drop_path_keep_prob = ( - 1 - current_ratio * (1 - drop_path_keep_prob)) - with tf.device('/cpu:0'): - tf.summary.scalar('drop_path_keep_prob', drop_path_keep_prob) - net = drop_path(net, drop_path_keep_prob) - return net - - -class NasNetANormalCell(NasNetABaseCell): - """NASNetA Normal Cell.""" - - def __init__(self, num_conv_filters, drop_path_keep_prob, total_num_cells, - total_training_steps): - operations = ['separable_5x5_2', - 'separable_3x3_2', - 'separable_5x5_2', - 'separable_3x3_2', - 'avg_pool_3x3', - 'none', - 'avg_pool_3x3', - 'avg_pool_3x3', - 'separable_3x3_2', - 'none'] - used_hiddenstates = [1, 0, 0, 0, 0, 0, 0] - hiddenstate_indices = [0, 1, 1, 1, 0, 1, 1, 1, 0, 0] - super(NasNetANormalCell, self).__init__(num_conv_filters, operations, - used_hiddenstates, - hiddenstate_indices, - drop_path_keep_prob, - total_num_cells, - total_training_steps) - - -class NasNetAReductionCell(NasNetABaseCell): - """NASNetA Reduction Cell.""" - - def __init__(self, num_conv_filters, drop_path_keep_prob, total_num_cells, - total_training_steps): - operations = ['separable_5x5_2', - 'separable_7x7_2', - 'max_pool_3x3', - 'separable_7x7_2', - 'avg_pool_3x3', - 'separable_5x5_2', - 'none', - 'avg_pool_3x3', - 'separable_3x3_2', - 'max_pool_3x3'] - used_hiddenstates = [1, 1, 1, 0, 0, 0, 0] - hiddenstate_indices = [0, 1, 0, 1, 0, 1, 3, 2, 2, 0] - super(NasNetAReductionCell, self).__init__(num_conv_filters, operations, - used_hiddenstates, - hiddenstate_indices, - drop_path_keep_prob, - total_num_cells, - total_training_steps) -import numpy as np -import os -import sys -import tensorflow as tf -import tensorflow.contrib.slim as slim - - -def create_symbol(X, num_classes=0, is_training=False, CUDNN=False, - maxf=30000, edim=125, nhid=100, batchs=64): - word_vectors = tf.contrib.layers.embed_sequence( - X, vocab_size=maxf, embed_dim=edim) - - word_list = tf.unstack(word_vectors, axis=1) - - if not CUDNN: - cell1 = tf.contrib.rnn.LSTMCell(nhid) - cell2 = tf.contrib.rnn.GRUCell(nhid) - stacked_cell = tf.nn.rnn_cell.MultiRNNCell([cell1, cell2]) - outputs, states = tf.nn.static_rnn( - stacked_cell, word_list, dtype=tf.float32) - logits = tf.layers.dense( - outputs[-1], 2, activation=None, name='output') - else: - # Using cuDNN since vanilla RNN - from tensorflow.contrib.cudnn_rnn.python.ops import cudnn_rnn_ops - cudnn_cell = cudnn_rnn_ops.CudnnGRU(num_layers=1, - num_units=nhid, - input_size=edim, - input_mode='linear_input') - params_size_t = cudnn_cell.params_size() - params = tf.Variable(tf.random_uniform( - [params_size_t], -0.1, 0.1), validate_shape=False) - input_h = tf.Variable(tf.zeros([1, batchs, nhid])) - outputs, states = cudnn_cell(input_data=word_list, - input_h=input_h, - params=params) - logits = tf.layers.dense( - outputs[-1], 2, activation=None, name='output') - - return logits, logits - - -def dummy_arg_scope(): - with slim.arg_scope([]) as sc: - return sc -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Convolution blocks for mobilenet.""" -import contextlib -import functools - -import tensorflow as tf - -slim = tf.contrib.slim - - -def _fixed_padding(inputs, kernel_size, rate=1): - """Pads the input along the spatial dimensions independently of input size. - - Pads the input such that if it was used in a convolution with 'VALID' padding, - the output would have the same dimensions as if the unpadded input was used - in a convolution with 'SAME' padding. - - Args: - inputs: A tensor of size [batch, height_in, width_in, channels]. - kernel_size: The kernel to be used in the conv2d or max_pool2d operation. - rate: An integer, rate for atrous convolution. - - Returns: - output: A tensor of size [batch, height_out, width_out, channels] with the - input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). - """ - kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), - kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] - pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] - pad_beg = [pad_total[0] // 2, pad_total[1] // 2] - pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] - padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], - [pad_beg[1], pad_end[1]], [0, 0]]) - return padded_inputs - - -def _make_divisible(v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -def _split_divisible(num, num_ways, divisible_by=8): - """Evenly splits num, num_ways so each piece is a multiple of divisible_by.""" - assert num % divisible_by == 0 - assert num / num_ways >= divisible_by - # Note: want to round down, we adjust each split to match the total. - base = num // num_ways // divisible_by * divisible_by - result = [] - accumulated = 0 - for i in range(num_ways): - r = base - while accumulated + r < num * (i + 1) / num_ways: - r += divisible_by - result.append(r) - accumulated += r - assert accumulated == num - return result - - -@contextlib.contextmanager -def _v1_compatible_scope_naming(scope): - if scope is None: # Create uniqified separable blocks. - with tf.variable_scope(None, default_name='separable') as s, \ - tf.name_scope(s.original_name_scope): - yield '' - else: - # We use scope_depthwise, scope_pointwise for compatibility with V1 ckpts. - # which provide numbered scopes. - scope += '_' - yield scope - - -@slim.add_arg_scope -def split_separable_conv2d(input_tensor, - num_outputs, - scope=None, - normalizer_fn=None, - stride=1, - rate=1, - endpoints=None, - use_explicit_padding=False): - """Separable mobilenet V1 style convolution. - - Depthwise convolution, with default non-linearity, - followed by 1x1 depthwise convolution. This is similar to - slim.separable_conv2d, but differs in tha it applies batch - normalization and non-linearity to depthwise. This matches - the basic building of Mobilenet Paper - (https://arxiv.org/abs/1704.04861) - - Args: - input_tensor: input - num_outputs: number of outputs - scope: optional name of the scope. Note if provided it will use - scope_depthwise for deptwhise, and scope_pointwise for pointwise. - normalizer_fn: which normalizer function to use for depthwise/pointwise - stride: stride - rate: output rate (also known as dilation rate) - endpoints: optional, if provided, will export additional tensors to it. - use_explicit_padding: Use 'VALID' padding for convolutions, but prepad - inputs so that the output dimensions are the same as if 'SAME' padding - were used. - - Returns: - output tesnor - """ - - with _v1_compatible_scope_naming(scope) as scope: - dw_scope = scope + 'depthwise' - endpoints = endpoints if endpoints is not None else {} - kernel_size = [3, 3] - padding = 'SAME' - if use_explicit_padding: - padding = 'VALID' - input_tensor = _fixed_padding(input_tensor, kernel_size, rate) - net = slim.separable_conv2d( - input_tensor, - None, - kernel_size, - depth_multiplier=1, - stride=stride, - rate=rate, - normalizer_fn=normalizer_fn, - padding=padding, - scope=dw_scope) - - endpoints[dw_scope] = net - - pw_scope = scope + 'pointwise' - net = slim.conv2d( - net, - num_outputs, [1, 1], - stride=1, - normalizer_fn=normalizer_fn, - scope=pw_scope) - endpoints[pw_scope] = net - return net - - -def expand_input_by_factor(n, divisible_by=8): - return lambda num_inputs, **_: _make_divisible(num_inputs * n, divisible_by) - - -@slim.add_arg_scope -def expanded_conv(input_tensor, - num_outputs, - expansion_size=expand_input_by_factor(6), - stride=1, - rate=1, - kernel_size=(3, 3), - residual=True, - normalizer_fn=None, - split_projection=1, - split_expansion=1, - expansion_transform=None, - depthwise_location='expansion', - depthwise_channel_multiplier=1, - endpoints=None, - use_explicit_padding=False, - padding='SAME', - scope=None): - """Depthwise Convolution Block with expansion. - - Builds a composite convolution that has the following structure - expansion (1x1) -> depthwise (kernel_size) -> projection (1x1) - - Args: - input_tensor: input - num_outputs: number of outputs in the final layer. - expansion_size: the size of expansion, could be a constant or a callable. - If latter it will be provided 'num_inputs' as an input. For forward - compatibility it should accept arbitrary keyword arguments. - Default will expand the input by factor of 6. - stride: depthwise stride - rate: depthwise rate - kernel_size: depthwise kernel - residual: whether to include residual connection between input - and output. - normalizer_fn: batchnorm or otherwise - split_projection: how many ways to split projection operator - (that is conv expansion->bottleneck) - split_expansion: how many ways to split expansion op - (that is conv bottleneck->expansion) ops will keep depth divisible - by this value. - expansion_transform: Optional function that takes expansion - as a single input and returns output. - depthwise_location: where to put depthwise covnvolutions supported - values None, 'input', 'output', 'expansion' - depthwise_channel_multiplier: depthwise channel multiplier: - each input will replicated (with different filters) - that many times. So if input had c channels, - output will have c x depthwise_channel_multpilier. - endpoints: An optional dictionary into which intermediate endpoints are - placed. The keys "expansion_output", "depthwise_output", - "projection_output" and "expansion_transform" are always populated, even - if the corresponding functions are not invoked. - use_explicit_padding: Use 'VALID' padding for convolutions, but prepad - inputs so that the output dimensions are the same as if 'SAME' padding - were used. - padding: Padding type to use if `use_explicit_padding` is not set. - scope: optional scope. - - Returns: - Tensor of depth num_outputs - - Raises: - TypeError: on inval - """ - with tf.variable_scope(scope, default_name='expanded_conv') as s, \ - tf.name_scope(s.original_name_scope): - prev_depth = input_tensor.get_shape().as_list()[3] - if depthwise_location not in [None, 'input', 'output', 'expansion']: - raise TypeError('%r is unknown value for depthwise_location' % - depthwise_location) - if use_explicit_padding: - if padding != 'SAME': - raise TypeError('`use_explicit_padding` should only be used with ' - '"SAME" padding.') - padding = 'VALID' - depthwise_func = functools.partial( - slim.separable_conv2d, - num_outputs=None, - kernel_size=kernel_size, - depth_multiplier=depthwise_channel_multiplier, - stride=stride, - rate=rate, - normalizer_fn=normalizer_fn, - padding=padding, - scope='depthwise') - # b1 -> b2 * r -> b2 - # i -> (o * r) (bottleneck) -> o - input_tensor = tf.identity(input_tensor, 'input') - net = input_tensor - - if depthwise_location == 'input': - if use_explicit_padding: - net = _fixed_padding(net, kernel_size, rate) - net = depthwise_func(net, activation_fn=None) - - if callable(expansion_size): - inner_size = expansion_size(num_inputs=prev_depth) - else: - inner_size = expansion_size - - if inner_size > net.shape[3]: - net = split_conv( - net, - inner_size, - num_ways=split_expansion, - scope='expand', - stride=1, - normalizer_fn=normalizer_fn) - net = tf.identity(net, 'expansion_output') - if endpoints is not None: - endpoints['expansion_output'] = net - - if depthwise_location == 'expansion': - if use_explicit_padding: - net = _fixed_padding(net, kernel_size, rate) - net = depthwise_func(net) - - net = tf.identity(net, name='depthwise_output') - if endpoints is not None: - endpoints['depthwise_output'] = net - if expansion_transform: - net = expansion_transform( - expansion_tensor=net, input_tensor=input_tensor) - # Note in contrast with expansion, we always have - # projection to produce the desired output size. - net = split_conv( - net, - num_outputs, - num_ways=split_projection, - stride=1, - scope='project', - normalizer_fn=normalizer_fn, - activation_fn=tf.identity) - if endpoints is not None: - endpoints['projection_output'] = net - if depthwise_location == 'output': - if use_explicit_padding: - net = _fixed_padding(net, kernel_size, rate) - net = depthwise_func(net, activation_fn=None) - - if callable(residual): # custom residual - net = residual(input_tensor=input_tensor, output_tensor=net) - elif (residual and - # stride check enforces that we don't add residuals when spatial - # dimensions are None - stride == 1 and - # Depth matches - net.get_shape().as_list()[3] == - input_tensor.get_shape().as_list()[3]): - net += input_tensor - return tf.identity(net, name='output') - - -def split_conv(input_tensor, - num_outputs, - num_ways, - scope, - divisible_by=8, - **kwargs): - """Creates a split convolution. - - Split convolution splits the input and output into - 'num_blocks' blocks of approximately the same size each, - and only connects $i$-th input to $i$ output. - - Args: - input_tensor: input tensor - num_outputs: number of output filters - num_ways: num blocks to split by. - scope: scope for all the operators. - divisible_by: make sure that every part is divisiable by this. - **kwargs: will be passed directly into conv2d operator - Returns: - tensor - """ - b = input_tensor.get_shape().as_list()[3] - - if num_ways == 1 or min(b // num_ways, - num_outputs // num_ways) < divisible_by: - # Don't do any splitting if we end up with less than 8 filters - # on either side. - return slim.conv2d(input_tensor, num_outputs, [1, 1], scope=scope, **kwargs) - - outs = [] - input_splits = _split_divisible(b, num_ways, divisible_by=divisible_by) - output_splits = _split_divisible( - num_outputs, num_ways, divisible_by=divisible_by) - inputs = tf.split(input_tensor, input_splits, - axis=3, name='split_' + scope) - base = scope - for i, (input_tensor, out_size) in enumerate(zip(inputs, output_splits)): - scope = base + '_part_%d' % (i,) - n = slim.conv2d(input_tensor, out_size, [1, 1], scope=scope, **kwargs) - n = tf.identity(n, scope + '_output') - outs.append(n) - return tf.concat(outs, 3, name=scope + '_concat') -# Copyright 2018 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -"""Mobilenet Base Class.""" - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import collections -import contextlib -import copy -import os - -import tensorflow as tf - - -slim = tf.contrib.slim - - -@slim.add_arg_scope -def apply_activation(x, name=None, activation_fn=None): - return activation_fn(x, name=name) if activation_fn else x - - -def _fixed_padding(inputs, kernel_size, rate=1): - """Pads the input along the spatial dimensions independently of input size. - - Pads the input such that if it was used in a convolution with 'VALID' padding, - the output would have the same dimensions as if the unpadded input was used - in a convolution with 'SAME' padding. - - Args: - inputs: A tensor of size [batch, height_in, width_in, channels]. - kernel_size: The kernel to be used in the conv2d or max_pool2d operation. - rate: An integer, rate for atrous convolution. - - Returns: - output: A tensor of size [batch, height_out, width_out, channels] with the - input, either intact (if kernel_size == 1) or padded (if kernel_size > 1). - """ - kernel_size_effective = [kernel_size[0] + (kernel_size[0] - 1) * (rate - 1), - kernel_size[0] + (kernel_size[0] - 1) * (rate - 1)] - pad_total = [kernel_size_effective[0] - 1, kernel_size_effective[1] - 1] - pad_beg = [pad_total[0] // 2, pad_total[1] // 2] - pad_end = [pad_total[0] - pad_beg[0], pad_total[1] - pad_beg[1]] - padded_inputs = tf.pad(inputs, [[0, 0], [pad_beg[0], pad_end[0]], - [pad_beg[1], pad_end[1]], [0, 0]]) - return padded_inputs - - -def _make_divisible(v, divisor, min_value=None): - if min_value is None: - min_value = divisor - new_v = max(min_value, int(v + divisor / 2) // divisor * divisor) - # Make sure that round down does not go down by more than 10%. - if new_v < 0.9 * v: - new_v += divisor - return new_v - - -@contextlib.contextmanager -def _set_arg_scope_defaults(defaults): - """Sets arg scope defaults for all items present in defaults. - - Args: - defaults: dictionary/list of pairs, containing a mapping from - function to a dictionary of default args. - - Yields: - context manager where all defaults are set. - """ - if hasattr(defaults, 'items'): - items = list(defaults.items()) - else: - items = defaults - if not items: - yield - else: - func, default_arg = items[0] - with slim.arg_scope(func, **default_arg): - with _set_arg_scope_defaults(items[1:]): - yield - - -@slim.add_arg_scope -def depth_multiplier(output_params, - multiplier, - divisible_by=8, - min_depth=8, - **unused_kwargs): - if 'num_outputs' not in output_params: - return - d = output_params['num_outputs'] - output_params['num_outputs'] = _make_divisible(d * multiplier, divisible_by, - min_depth) - - -_Op = collections.namedtuple('Op', ['op', 'params', 'multiplier_func']) - - -def op(opfunc, **params): - multiplier = params.pop('multiplier_transorm', depth_multiplier) - return _Op(opfunc, params=params, multiplier_func=multiplier) - - -class NoOpScope(object): - """No-op context manager.""" - - def __enter__(self): - return None - - def __exit__(self, exc_type, exc_value, traceback): - return False - - -def safe_arg_scope(funcs, **kwargs): - """Returns `slim.arg_scope` with all None arguments removed. - - Arguments: - funcs: Functions to pass to `arg_scope`. - **kwargs: Arguments to pass to `arg_scope`. - - Returns: - arg_scope or No-op context manager. - - Note: can be useful if None value should be interpreted as "do not overwrite - this parameter value". - """ - filtered_args = {name: value for name, value in kwargs.items() - if value is not None} - if filtered_args: - return slim.arg_scope(funcs, **filtered_args) - else: - return NoOpScope() - - -@slim.add_arg_scope -def mobilenet_base( # pylint: disable=invalid-name - inputs, - conv_defs, - multiplier=1.0, - final_endpoint=None, - output_stride=None, - use_explicit_padding=False, - scope=None, - is_training=False): - """Mobilenet base network. - - Constructs a network from inputs to the given final endpoint. By default - the network is constructed in inference mode. To create network - in training mode use: - - with slim.arg_scope(mobilenet.training_scope()): - logits, endpoints = mobilenet_base(...) - - Args: - inputs: a tensor of shape [batch_size, height, width, channels]. - conv_defs: A list of op(...) layers specifying the net architecture. - multiplier: Float multiplier for the depth (number of channels) - for all convolution ops. The value must be greater than zero. Typical - usage will be to set this value in (0, 1) to reduce the number of - parameters or computation cost of the model. - final_endpoint: The name of last layer, for early termination for - for V1-based networks: last layer is "layer_14", for V2: "layer_20" - output_stride: An integer that specifies the requested ratio of input to - output spatial resolution. If not None, then we invoke atrous convolution - if necessary to prevent the network from reducing the spatial resolution - of the activation maps. Allowed values are 1 or any even number, excluding - zero. Typical values are 8 (accurate fully convolutional mode), 16 - (fast fully convolutional mode), and 32 (classification mode). - - NOTE- output_stride relies on all consequent operators to support dilated - operators via "rate" parameter. This might require wrapping non-conv - operators to operate properly. - - use_explicit_padding: Use 'VALID' padding for convolutions, but prepad - inputs so that the output dimensions are the same as if 'SAME' padding - were used. - scope: optional variable scope. - is_training: How to setup batch_norm and other ops. Note: most of the time - this does not need be set directly. Use mobilenet.training_scope() to set - up training instead. This parameter is here for backward compatibility - only. It is safe to set it to the value matching - training_scope(is_training=...). It is also safe to explicitly set - it to False, even if there is outer training_scope set to to training. - (The network will be built in inference mode). If this is set to None, - no arg_scope is added for slim.batch_norm's is_training parameter. - - Returns: - tensor_out: output tensor. - end_points: a set of activations for external use, for example summaries or - losses. - - Raises: - ValueError: depth_multiplier <= 0, or the target output_stride is not - allowed. - """ - if multiplier <= 0: - raise ValueError('multiplier is not greater than zero.') - - # Set conv defs defaults and overrides. - conv_defs_defaults = conv_defs.get('defaults', {}) - conv_defs_overrides = conv_defs.get('overrides', {}) - if use_explicit_padding: - conv_defs_overrides = copy.deepcopy(conv_defs_overrides) - conv_defs_overrides[ - (slim.conv2d, slim.separable_conv2d)] = {'padding': 'VALID'} - - if output_stride is not None: - if output_stride == 0 or (output_stride > 1 and output_stride % 2): - raise ValueError( - 'Output stride must be None, 1 or a multiple of 2.') - - # a) Set the tensorflow scope - # b) set padding to default: note we might consider removing this - # since it is also set by mobilenet_scope - # c) set all defaults - # d) set all extra overrides. - with _scope_all(scope, default_scope='Mobilenet'), \ - safe_arg_scope([slim.batch_norm], is_training=is_training), \ - _set_arg_scope_defaults(conv_defs_defaults), \ - _set_arg_scope_defaults(conv_defs_overrides): - # The current_stride variable keeps track of the output stride of the - # activations, i.e., the running product of convolution strides up to the - # current network layer. This allows us to invoke atrous convolution - # whenever applying the next convolution would result in the activations - # having output stride larger than the target output_stride. - current_stride = 1 - - # The atrous convolution rate parameter. - rate = 1 - - net = inputs - # Insert default parameters before the base scope which includes - # any custom overrides set in mobilenet. - end_points = {} - scopes = {} - for i, opdef in enumerate(conv_defs['spec']): - params = dict(opdef.params) - opdef.multiplier_func(params, multiplier) - stride = params.get('stride', 1) - if output_stride is not None and current_stride == output_stride: - # If we have reached the target output_stride, then we need to employ - # atrous convolution with stride=1 and multiply the atrous rate by the - # current unit's stride for use in subsequent layers. - layer_stride = 1 - layer_rate = rate - rate *= stride - else: - layer_stride = stride - layer_rate = 1 - current_stride *= stride - # Update params. - params['stride'] = layer_stride - # Only insert rate to params if rate > 1. - if layer_rate > 1: - params['rate'] = layer_rate - # Set padding - if use_explicit_padding: - if 'kernel_size' in params: - net = _fixed_padding( - net, params['kernel_size'], layer_rate) - else: - params['use_explicit_padding'] = True - - end_point = 'layer_%d' % (i + 1) - try: - net = opdef.op(net, **params) - except Exception: - print('Failed to create op %i: %r params: %r' % - (i, opdef, params)) - raise - end_points[end_point] = net - scope = os.path.dirname(net.name) - scopes[scope] = end_point - if final_endpoint is not None and end_point == final_endpoint: - break - - # Add all tensors that end with 'output' to - # endpoints - for t in net.graph.get_operations(): - scope = os.path.dirname(t.name) - bn = os.path.basename(t.name) - if scope in scopes and t.name.endswith('output'): - end_points[scopes[scope] + '/' + bn] = t.outputs[0] - return net, end_points - - -@contextlib.contextmanager -def _scope_all(scope, default_scope=None): - with tf.variable_scope(scope, default_name=default_scope) as s,\ - tf.name_scope(s.original_name_scope): - yield s - - -@slim.add_arg_scope -def mobilenet(inputs, - num_classes=1001, - prediction_fn=slim.softmax, - reuse=None, - scope='Mobilenet', - base_only=False, - **mobilenet_args): - """Mobilenet model for classification, supports both V1 and V2. - - Note: default mode is inference, use mobilenet.training_scope to create - training network. - - - Args: - inputs: a tensor of shape [batch_size, height, width, channels]. - num_classes: number of predicted classes. If 0 or None, the logits layer - is omitted and the input features to the logits layer (before dropout) - are returned instead. - prediction_fn: a function to get predictions out of logits - (default softmax). - reuse: whether or not the network and its variables should be reused. To be - able to reuse 'scope' must be given. - scope: Optional variable_scope. - base_only: if True will only create the base of the network (no pooling - and no logits). - **mobilenet_args: passed to mobilenet_base verbatim. - - conv_defs: list of conv defs - - multiplier: Float multiplier for the depth (number of channels) - for all convolution ops. The value must be greater than zero. Typical - usage will be to set this value in (0, 1) to reduce the number of - parameters or computation cost of the model. - - output_stride: will ensure that the last layer has at most total stride. - If the architecture calls for more stride than that provided - (e.g. output_stride=16, but the architecture has 5 stride=2 operators), - it will replace output_stride with fractional convolutions using Atrous - Convolutions. - - Returns: - logits: the pre-softmax activations, a tensor of size - [batch_size, num_classes] - end_points: a dictionary from components of the network to the corresponding - activation tensor. - - Raises: - ValueError: Input rank is invalid. - """ - is_training = mobilenet_args.get('is_training', False) - input_shape = inputs.get_shape().as_list() - if len(input_shape) != 4: - raise ValueError('Expected rank 4 input, was: %d' % len(input_shape)) - - with tf.variable_scope(scope, 'Mobilenet', reuse=reuse) as scope: - inputs = tf.identity(inputs, 'input') - net, end_points = mobilenet_base(inputs, scope=scope, **mobilenet_args) - if base_only: - return net, end_points - - net = tf.identity(net, name='embedding') - - with tf.variable_scope('Logits'): - net = global_pool(net) - end_points['global_pool'] = net - if not num_classes: - return net, end_points - net = slim.dropout(net, scope='Dropout', is_training=is_training) - # 1 x 1 x num_classes - # Note: legacy scope name. - logits = slim.conv2d( - net, - num_classes, [1, 1], - activation_fn=None, - normalizer_fn=None, - biases_initializer=tf.zeros_initializer(), - scope='Conv2d_1c_1x1') - - logits = tf.squeeze(logits, [1, 2]) - - logits = tf.identity(logits, name='output') - end_points['Logits'] = logits - if prediction_fn: - end_points['Predictions'] = prediction_fn(logits, 'Predictions') - return logits, end_points - - -def global_pool(input_tensor, pool_op=tf.nn.avg_pool): - """Applies avg pool to produce 1x1 output. - - NOTE: This function is funcitonally equivalenet to reduce_mean, but it has - baked in average pool which has better support across hardware. - - Args: - input_tensor: input tensor - pool_op: pooling op (avg pool is default) - Returns: - a tensor batch_size x 1 x 1 x depth. - """ - shape = input_tensor.get_shape().as_list() - if shape[1] is None or shape[2] is None: - kernel_size = tf.convert_to_tensor( - [1, tf.shape(input_tensor)[1], - tf.shape(input_tensor)[2], 1]) - else: - kernel_size = [1, shape[1], shape[2], 1] - output = pool_op( - input_tensor, ksize=kernel_size, strides=[1, 1, 1, 1], padding='VALID') - # Recover output shape, for unknown shape. - output.set_shape([None, 1, 1, None]) - return output - - -def training_scope(is_training=True, - weight_decay=0.00004, - stddev=0.09, - dropout_keep_prob=0.8, - bn_decay=0.997): - """Defines Mobilenet training scope. - - Usage: - with tf.contrib.slim.arg_scope(mobilenet.training_scope()): - logits, endpoints = mobilenet_v2.mobilenet(input_tensor) - - # the network created will be trainble with dropout/batch norm - # initialized appropriately. - Args: - is_training: if set to False this will ensure that all customizations are - set to non-training mode. This might be helpful for code that is reused - across both training/evaluation, but most of the time training_scope with - value False is not needed. If this is set to None, the parameters is not - added to the batch_norm arg_scope. - - weight_decay: The weight decay to use for regularizing the model. - stddev: Standard deviation for initialization, if negative uses xavier. - dropout_keep_prob: dropout keep probability (not set if equals to None). - bn_decay: decay for the batch norm moving averages (not set if equals to - None). - - Returns: - An argument scope to use via arg_scope. - """ - # Note: do not introduce parameters that would change the inference - # model here (for example whether to use bias), modify conv_def instead. - batch_norm_params = { - 'decay': bn_decay, - 'is_training': is_training - } - if stddev < 0: - weight_intitializer = slim.initializers.xavier_initializer() - else: - weight_intitializer = tf.truncated_normal_initializer(stddev=stddev) - - # Set weight_decay for weights in Conv and FC layers. - with slim.arg_scope( - [slim.conv2d, slim.fully_connected, slim.separable_conv2d], - weights_initializer=weight_intitializer, - normalizer_fn=slim.batch_norm), \ - slim.arg_scope([mobilenet_base, mobilenet], is_training=is_training),\ - safe_arg_scope([slim.batch_norm], **batch_norm_params), \ - safe_arg_scope([slim.dropout], is_training=is_training, - keep_prob=dropout_keep_prob), \ - slim.arg_scope([slim.conv2d], - weights_regularizer=slim.l2_regularizer(weight_decay)), \ - slim.arg_scope([slim.separable_conv2d], weights_regularizer=None) as s: - return s -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -r"""Make a request JSON from local images to send to CloudML serving API. - -Example usage: - $ python images_to_json.py -o request.json img1.jpg img2.jpg - $ gcloud beta ml predict \ - --model=MY_MODEL_NAME \ - --instances=request.json > response.yaml -""" -from __future__ import print_function - -import argparse -import base64 -from cStringIO import StringIO -import json -import sys - -from PIL import Image - -desired_width = 299 -desired_height = 299 - - -def parse_args(): - """Handle the command line arguments. - - Returns: - Output of argparse.ArgumentParser.parse_args. - """ - - parser = argparse.ArgumentParser() - parser.add_argument('-o', '--output', default='request.json', - help='Output file to write encoded images to.') - parser.add_argument('-r', '--resize', dest='resize', action='store_true', - help='Will resize images locally first. Not needed, but' - ' will reduce network traffic.') - parser.add_argument('inputs', nargs='+', type=argparse.FileType('r'), - help='A list of .jpg or .jpeg files to serialize into a ' - 'request json') - - args = parser.parse_args() - - def check(filename): return filename.lower().endswith(('jpeg', 'jpg')) - if not all(check(input_file.name) for input_file in args.inputs): - sys.stderr.write('All inputs must be .jpeg or .jpg') - sys.exit(1) - - return args - - -def make_request_json(input_images, output_json, do_resize): - """Produces a JSON request suitable to send to CloudML Prediction API. - - Args: - input_images: List of file handles corresponding to images to be encoded. - output_json: File handle of the output json where request will be written. - do_resize: Boolean specifying if script should resize images. - """ - - with open(output_json, 'w') as ff: - for image_handle in input_images: - # Uses argparse to check permissions, but ignore pre-opened file handle. - image = Image.open(image_handle.name) - image_handle.close() - resized_handle = StringIO() - is_too_big = ((image.size[0] * image.size[1]) > - (desired_width * desired_height)) - if do_resize and is_too_big: - image = image.resize((299, 299), Image.BILINEAR) - - image.save(resized_handle, format='JPEG') - encoded_contents = base64.b64encode(resized_handle.getvalue()) - - # key can be any UTF-8 string, since it goes in a HTTP request. - row = json.dumps({'key': image_handle.name, - 'image_bytes': {'b64': encoded_contents}}) - - ff.write(row) - ff.write('\n') - - print('Wrote {} images to {}'.format(len(input_images), output_json)) - - -def main(): - args = parse_args() - make_request_json(args.inputs, args.output, args.resize) - - -if __name__ == '__main__': - main() -# /!/usr/bin/env python -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Flowers Sample Cloud Runner.""" - -from __future__ import print_function -import argparse -import base64 -import datetime -import errno -import io -import json -import multiprocessing -import os -import subprocess -import time -import uuid -import apache_beam as beam -from PIL import Image -from tensorflow.python.lib.io import file_io -from tensorflow.python.framework import errors - -import trainer.preprocess as preprocess_lib - -# Model variables -MODEL_NAME = 'flowers' -TRAINER_NAME = 'trainer-0.1.tar.gz' -METADATA_FILE_NAME = 'metadata.json' -EXPORT_SUBDIRECTORY = 'model' -CONFIG_FILE_NAME = 'config.yaml' -MODULE_NAME = 'trainer.task' -SAMPLE_IMAGE = \ - 'gs://cloud-ml-data/img/flower_photos/tulips/4520577328_a94c11e806_n.jpg' - -# Number of seconds to wait before sending next online prediction after -# an online prediction fails due to model deployment not being complete. -PREDICTION_WAIT_TIME = 30 - - -def process_args(): - """Define arguments and assign default values to the ones that are not set. - - Returns: - args: The parsed namespace with defaults assigned to the flags. - """ - - parser = argparse.ArgumentParser( - description='Runs Flowers Sample E2E pipeline.') - parser.add_argument( - '--project', - default=None, - help='The project to which the job will be submitted.') - parser.add_argument( - '--cloud', - action='store_true', - help='Run preprocessing on the cloud.') - parser.add_argument( - '--train_input_path', - default=None, - help='Input specified as uri to CSV file for the train set') - parser.add_argument( - '--eval_input_path', - default=None, - help='Input specified as uri to CSV file for the eval set.') - parser.add_argument( - '--eval_set_size', - default=50, - help='The size of the eval dataset.') - parser.add_argument( - '--input_dict', - default=None, - help='Input dictionary. Specified as text file uri. ' - 'Each line of the file stores one label.') - parser.add_argument( - '--deploy_model_name', - default='flowerse2e', - help=('If --cloud is used, the model is deployed with this ' - 'name. The default is flowerse2e.')) - parser.add_argument( - '--dataflow_sdk_path', - default=None, - help=('Path to Dataflow SDK location. If None, Pip will ' - 'be used to download the latest published version')) - parser.add_argument( - '--max_deploy_wait_time', - default=600, - help=('Maximum number of seconds to wait after a model is deployed.')) - parser.add_argument( - '--deploy_model_version', - default='v' + uuid.uuid4().hex[:4], - help=('If --cloud is used, the model is deployed with this ' - 'version. The default is four random characters.')) - parser.add_argument( - '--preprocessed_train_set', - default=None, - help=('If specified, preprocessing steps will be skipped.' - 'The provided preprocessed dataset wil be used in this case.' - 'If specified, preprocessed_eval_set must also be provided.')) - parser.add_argument( - '--preprocessed_eval_set', - default=None, - help=('If specified, preprocessing steps will be skipped.' - 'The provided preprocessed dataset wil be used in this case.' - 'If specified, preprocessed_train_set must also be provided.')) - parser.add_argument( - '--pretrained_model_path', - default=None, - help=('If specified, preprocessing and training steps ares skipped.' - 'The pretrained model will be deployed in this case.')) - parser.add_argument( - '--sample_image_uri', - default=SAMPLE_IMAGE, - help=('URI for a single Jpeg image to be used for online prediction.')) - parser.add_argument( - '--gcs_bucket', - default=None, - help=('Google Cloud Storage bucket to be used for uploading intermediate ' - 'data')), - parser.add_argument( - '--output_dir', - default=None, - help=('Google Cloud Storage or Local directory in which ' - 'to place outputs.')) - parser.add_argument( - '--runtime_version', - default=os.getenv('CLOUDSDK_ML_DEFAULT_RUNTIME_VERSION', '1.0'), - help=('Tensorflow version for model training and prediction.')) - - args, _ = parser.parse_known_args() - - if args.cloud and not args.project: - args.project = get_cloud_project() - - return args - - -class FlowersE2E(object): - """The end-2-end pipeline for Flowers Sample.""" - - def __init__(self, args=None): - if not args: - self.args = process_args() - else: - self.args = args - - def preprocess(self): - """Runs the pre-processing pipeline. - - It tiggers two Dataflow pipelines in parallel for train and eval. - Returns: - train_output_prefix: Path prefix for the preprocessed train dataset. - eval_output_prefix: Path prefix for the preprocessed eval dataset. - """ - - train_dataset_name = 'train' - eval_dataset_name = 'eval' - - # Prepare the environment to run the Dataflow pipeline for preprocessing. - if self.args.dataflow_sdk_path: - dataflow_sdk = self.args.dataflow_sdk_path - if dataflow_sdk.startswith('gs://'): - subprocess.check_call( - ['gsutil', 'cp', self.args.dataflow_sdk_path, '.']) - dataflow_sdk = self.args.dataflow_sdk_path.split('/')[-1] - else: - dataflow_sdk = None - - subprocess.check_call( - ['python', 'setup.py', 'sdist', '--format=gztar']) - - trainer_uri = os.path.join(self.args.output_dir, TRAINER_NAME) - subprocess.check_call( - ['gsutil', '-q', 'cp', os.path.join('dist', TRAINER_NAME), trainer_uri]) - - thread_pool = multiprocessing.pool.ThreadPool(2) - - train_output_prefix = os.path.join(self.args.output_dir, 'preprocessed', - train_dataset_name) - eval_output_prefix = os.path.join(self.args.output_dir, 'preprocessed', - eval_dataset_name) - - train_args = (train_dataset_name, self.args.train_input_path, - train_output_prefix, dataflow_sdk, trainer_uri) - eval_args = (eval_dataset_name, self.args.eval_input_path, - eval_output_prefix, dataflow_sdk, trainer_uri) - - # make a pool to run two pipelines in parallel. - pipeline_pool = [thread_pool.apply_async(self.run_pipeline, train_args), - thread_pool.apply_async(self.run_pipeline, eval_args)] - _ = [res.get() for res in pipeline_pool] - return train_output_prefix, eval_output_prefix - - def run_pipeline(self, dataset_name, input_csv, output_prefix, - dataflow_sdk_location, trainer_uri): - """Runs a Dataflow pipeline to preprocess the given dataset. - - Args: - dataset_name: The name of the dataset ('eval' or 'train'). - input_csv: Path to the input CSV file which contains an image-URI with - its labels in each line. - output_prefix: Output prefix to write results to. - dataflow_sdk_location: path to Dataflow SDK package. - trainer_uri: Path to the Flower's trainer package. - """ - job_name = ('cloud-ml-sample-flowers-' + - datetime.datetime.now().strftime('%Y%m%d%H%M%S') + - '-' + dataset_name) - - options = { - 'staging_location': - os.path.join(self.args.output_dir, 'tmp', - dataset_name, 'staging'), - 'temp_location': - os.path.join(self.args.output_dir, 'tmp', dataset_name), - 'project': - self.args.project, - 'job_name': job_name, - 'extra_packages': [trainer_uri], - 'save_main_session': - True, - } - if dataflow_sdk_location: - options['sdk_location'] = dataflow_sdk_location - - pipeline_name = 'DataflowRunner' if self.args.cloud else 'DirectRunner' - - opts = beam.pipeline.PipelineOptions(flags=[], **options) - args = argparse.Namespace(**vars(self.args)) - vars(args)['input_path'] = input_csv - vars(args)['input_dict'] = self.args.input_dict - vars(args)['output_path'] = output_prefix - # execute the pipeline - with beam.Pipeline(pipeline_name, options=opts) as pipeline: - preprocess_lib.configure_pipeline(pipeline, args) - - def train(self, train_file_path, eval_file_path): - """Train a model using the eval and train datasets. - - Args: - train_file_path: Path to the train dataset. - eval_file_path: Path to the eval dataset. - """ - trainer_args = [ - '--output_path', self.args.output_dir, - '--eval_data_paths', eval_file_path, - '--eval_set_size', str(self.args.eval_set_size), - '--train_data_paths', train_file_path - ] - - if self.args.cloud: - job_name = 'flowers_model' + datetime.datetime.now().strftime( - '_%y%m%d_%H%M%S') - command = [ - 'gcloud', 'ml-engine', 'jobs', 'submit', 'training', job_name, - '--stream-logs', - '--module-name', MODULE_NAME, - '--staging-bucket', self.args.gcs_bucket, - '--region', 'us-central1', - '--project', self.args.project, - '--package-path', 'trainer', - '--runtime-version', self.args.runtime_version, - '--' - ] + trainer_args - else: - command = [ - 'gcloud', 'ml-engine', 'local', 'train', - '--module-name', MODULE_NAME, - '--package-path', 'trainer', - '--', - ] + trainer_args - subprocess.check_call(command) - - def deploy_model(self, model_path): - """Deploys the trained model. - - Args: - model_path: Path to the trained model. - """ - - create_model_cmd = [ - 'gcloud', 'ml-engine', 'models', 'create', self.args.deploy_model_name, - '--regions', 'us-central1', - '--project', self.args.project, - ] - - print(create_model_cmd) - subprocess.check_call(create_model_cmd) - - submit = [ - 'gcloud', 'ml-engine', 'versions', 'create', - self.args.deploy_model_version, - '--model', self.args.deploy_model_name, - '--origin', model_path, - '--project', self.args.project, - '--runtime-version', self.args.runtime_version, - ] - if not model_path.startswith('gs://'): - submit.extend(['--staging-bucket', self.args.gcs_bucket]) - print(submit) - subprocess.check_call(submit) - - self.adaptive_wait() - - print('Deployed %s version: %s' % (self.args.deploy_model_name, - self.args.deploy_model_version)) - - def adaptive_wait(self): - """Waits for a model to be fully deployed. - - It keeps sending online prediction requests until a prediction is - successful or maximum wait time is reached. It sleeps between requests. - """ - start_time = datetime.datetime.utcnow() - elapsed_time = 0 - while elapsed_time < self.args.max_deploy_wait_time: - try: - self.predict(self.args.sample_image_uri) - return - except Exception as e: - time.sleep(PREDICTION_WAIT_TIME) - elapsed_time = (datetime.datetime.utcnow() - - start_time).total_seconds() - continue - - def predict(self, image_uri): - """Sends a predict request for the deployed model for the given image. - - Args: - image_uri: The input image URI. - """ - output_json = 'request.json' - self.make_request_json(image_uri, output_json) - cmd = [ - 'gcloud', 'ml-engine', 'predict', - '--model', self.args.deploy_model_name, - '--version', self.args.deploy_model_version, - '--json-instances', 'request.json', - '--project', self.args.project - ] - subprocess.check_call(cmd) - - def make_request_json(self, uri, output_json): - """Produces a JSON request suitable to send to CloudML Prediction API. - - Args: - uri: The input image URI. - output_json: File handle of the output json where request will be written. - """ - def _open_file_read_binary(uri): - try: - return file_io.FileIO(uri, mode='rb') - except errors.InvalidArgumentError: - return file_io.FileIO(uri, mode='r') - - with open(output_json, 'w') as outf: - with _open_file_read_binary(uri) as f: - image_bytes = f.read() - image = Image.open(io.BytesIO(image_bytes)).convert('RGB') - image = image.resize((299, 299), Image.BILINEAR) - resized_image = io.BytesIO() - image.save(resized_image, format='JPEG') - encoded_image = base64.b64encode(resized_image.getvalue()) - row = json.dumps( - {'key': uri, 'image_bytes': {'b64': encoded_image}}) - outf.write(row) - outf.write('\n') - - def run(self): - """Runs the pipeline.""" - model_path = self.args.pretrained_model_path - if not model_path: - train_prefix, eval_prefix = (self.args.preprocessed_train_set, - self.args.preprocessed_eval_set) - - if not train_prefix or not eval_prefix: - train_prefix, eval_prefix = self.preprocess() - self.train(train_prefix + '*', eval_prefix + '*') - model_path = os.path.join( - self.args.output_dir, EXPORT_SUBDIRECTORY) - self.deploy_model(model_path) - - -def get_cloud_project(): - cmd = [ - 'gcloud', '-q', 'config', 'list', 'project', - '--format=value(core.project)' - ] - with open(os.devnull, 'w') as dev_null: - try: - res = subprocess.check_output(cmd, stderr=dev_null).strip() - if not res: - raise Exception('--cloud specified but no Google Cloud Platform ' - 'project found.\n' - 'Please specify your project name with the --project ' - 'flag or set a default project: ' - 'gcloud config set project YOUR_PROJECT_NAME') - return res - except OSError as e: - if e.errno == errno.ENOENT: - raise Exception('gcloud is not installed. The Google Cloud SDK is ' - 'necessary to communicate with the Cloud ML service. ' - 'Please install and set up gcloud.') - raise - - -def main(): - pipeline = FlowersE2E() - pipeline.run() - - -if __name__ == '__main__': - main() -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from setuptools import find_packages -from setuptools import setup - -REQUIRED_PACKAGES = [ - 'tensorflow==1.0.1', -] - -setup( - name='trainer', - version='0.1', - install_requires=REQUIRED_PACKAGES, - packages=find_packages(), - include_package_data=True, - requires=[] -) -# Copyright 2018 Google Inc. All Rights Reserved. Licensed under the Apache -# License, Version 2.0 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -# This tool downloads SDF files from an FTP source. - -import StringIO -import argparse -import ftplib -import multiprocessing as mp -import os -import re -import signal -import tempfile -import tensorflow as tf -import zlib - - -# Regular expressions to parse an FTP URI. -_USER_RE = r'''(?P[^:@]+|'[^']+'|"[^"]+")''' -_PASSWORD_RE = r'''(?P[^@]+|'[^']+'|"[^"]+")''' -_CREDS_RE = r'{}(?::{})?'.format(_USER_RE, _PASSWORD_RE) -FTP_RE = re.compile(r'^ftp://(?:{}@)?(?P.*)$'.format(_CREDS_RE)) - -# Good for debugging. -FORCE_DISABLE_MULTIPROCESSING = False - - -def _function_wrapper(args_tuple): - """Function wrapper to call from multiprocessing.""" - function, args = args_tuple - return function(*args) - - -def parallel_map(function, iterable): - """Calls a function for every element in an iterable using multiple cores.""" - if FORCE_DISABLE_MULTIPROCESSING: - return [function(*args) for args in iterable] - - original_sigint_handler = signal.signal(signal.SIGINT, signal.SIG_IGN) - num_threads = mp.cpu_count() * 2 - pool = mp.Pool(processes=num_threads) - signal.signal(signal.SIGINT, original_sigint_handler) - - p = pool.map_async(_function_wrapper, ((function, args) - for args in iterable)) - try: - results = p.get(0xFFFFFFFF) - except KeyboardInterrupt: - pool.terminate() - raise - pool.close() - return results - - -def extract_data_file(ftp_file, data_dir): - """Function to extract a single PubChem data file.""" - user = ftp_file['user'] - password = ftp_file['password'] - server = ftp_file['server'] - path = ftp_file['path'] - basename = os.path.basename(path) - sdf_file = os.path.join(data_dir, os.path.splitext(basename)[0]) - - if not tf.gfile.Exists(sdf_file): - # The `ftp` object cannot be pickled for multithreading, so we open a - # new connection here - memfile = StringIO.StringIO() - ftp = ftplib.FTP(server, user, password) - ftp.retrbinary('RETR ' + path, memfile.write) - ftp.quit() - - memfile.seek(0) - with tf.gfile.Open(sdf_file, 'w') as f: - gzip_wbits_format = zlib.MAX_WBITS | 16 - contents = zlib.decompress(memfile.getvalue(), gzip_wbits_format) - f.write(contents) - print('Extracted {}'.format(sdf_file)) - - else: - print('Found {}'.format(sdf_file)) - - -def run(data_sources, filter_regex, max_data_files, data_dir): - """Extracts the specified number of data files in parallel.""" - if not tf.gfile.Exists(data_dir): - tf.gfile.MakeDirs(data_dir) - - # Get available data files - filter_re = re.compile(filter_regex) - ftp_files = [] - for source in data_sources: - m = FTP_RE.search(source) - if not m: - raise ValueError('malformed FTP URI') - user = m.group('user') or 'anonymous' - password = m.group('password') or 'guest' - server, path_dir = m.group('abs_path').split('/', 1) - uri_prefix = 'ftp://{}:{}@{}/'.format(user, password, server) - - ftp = ftplib.FTP(server, user, password) - ftp_files += [{ - 'user': user, - 'password': password, - 'server': server, - 'path': path, - } for path in ftp.nlst(path_dir) - if filter_re.search(uri_prefix + path)] - ftp.quit() - - # Extract data files in parallel - if not max_data_files: - max_data_files = len(ftp_files) - assert max_data_files >= 1 - print('Found {} files, using {}'.format(len(ftp_files), max_data_files)) - ftp_files = ftp_files[:max_data_files] - print('Extracting data files...') - parallel_map( - extract_data_file, ((ftp_file, data_dir) for ftp_file in ftp_files)) - - -if __name__ == '__main__': - """Main function""" - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument( - '--work-dir', - type=str, - default=os.path.join( - tempfile.gettempdir(), 'cloudml-samples', 'molecules'), - help='Directory for staging and working files. ' - 'This can be a Google Cloud Storage path.') - - parser.add_argument( - '--data-sources', - type=str, - nargs='+', - default=['ftp://anonymous:guest@ftp.ncbi.nlm.nih.gov/' - 'pubchem/Compound_3D/01_conf_per_cmpd/SDF'], - help='Data source location where SDF file(s) are stored. ' - 'Paths can be local, ftp://, or gcs://. ' - 'Examples: ' - 'ftp://hostname/path ' - 'ftp://username:password@hostname/path') - - parser.add_argument( - '--filter-regex', - type=str, - default=r'\.sdf', - help='Regular expression to filter which files to use. ' - 'The regular expression will be searched on the full absolute path. ' - 'Every match will be kept.') - - parser.add_argument( - '--max-data-files', - type=int, - required=True, - help='Maximum number of data files for every file pattern expansion. ' - 'Set to -1 to use all files.') - - args = parser.parse_args() - - max_data_files = args.max_data_files - if args.max_data_files == -1: - max_data_files = None - - data_dir = os.path.join(args.work_dir, 'data') - run(args.data_sources, args.filter_regex, max_data_files, data_dir) -#!/usr/bin/env python -# -# Copyright 2018 Google Inc. All Rights Reserved. Licensed under the Apache -# License, Version 2.0 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -# This tool does either batch or streaming predictions on a trained model. - -from __future__ import print_function - -import argparse -import json -import os -import sys -import tempfile - -import pubchem - -import apache_beam as beam -import tensorflow as tf - -from apache_beam.options.pipeline_options import GoogleCloudOptions -from apache_beam.options.pipeline_options import PipelineOptions -from apache_beam.options.pipeline_options import SetupOptions -from apache_beam.options.pipeline_options import StandardOptions -from tensorflow.python.framework import ops -from tensorflow.python.saved_model import loader - - -class Predict(beam.DoFn): - def __init__(self, - model_dir, - id_key, - meta_tag='serve', - meta_signature='predict', - meta_predictions='predictions'): - super(Predict, self).__init__() - self.model_dir = model_dir - self.id_key = id_key - self.meta_tag = meta_tag - self.meta_signature = meta_signature - self.meta_predictions = meta_predictions - self.session = None - self.graph = None - self.feed_tensors = None - self.fetch_tensors = None - - def process(self, inputs): - # Create a session for every worker only once. The session is not - # pickleable, so it can't be created at the DoFn constructor. - if not self.session: - self.graph = ops.Graph() - with self.graph.as_default(): - self.session = tf.Session() - metagraph_def = loader.load( - self.session, {self.meta_tag}, self.model_dir) - signature_def = metagraph_def.signature_def[self.meta_signature] - - # inputs - self.feed_tensors = { - k: self.graph.get_tensor_by_name(v.name) - for k, v in signature_def.inputs.items() - } - - # outputs/predictions - self.fetch_tensors = { - k: self.graph.get_tensor_by_name(v.name) - for k, v in signature_def.outputs.items() - } - - # Create a feed_dict for a single element. - feed_dict = { - tensor: [inputs[key]] - for key, tensor in self.feed_tensors.items() - if key in inputs - } - results = self.session.run(self.fetch_tensors, feed_dict) - - yield { - 'id': inputs[self.id_key], - 'predictions': results[self.meta_predictions][0].tolist() - } - - -# [START dataflow_molecules_run_definition] -def run(model_dir, feature_extraction, sink, beam_options=None): - with beam.Pipeline(options=beam_options) as p: - _ = (p - | 'Feature extraction' >> feature_extraction - | 'Predict' >> beam.ParDo(Predict(model_dir, 'ID')) - | 'Format as JSON' >> beam.Map(json.dumps) - | 'Write predictions' >> sink) -# [END dataflow_molecules_run_definition] - - -if __name__ == '__main__': - """Main function""" - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument( - '--work-dir', - type=str, - default=os.path.join( - tempfile.gettempdir(), 'cloudml-samples', 'molecules'), - help='Directory for temporary files and preprocessed datasets to. ' - 'This can be a Google Cloud Storage path.') - - parser.add_argument( - '--model-dir', - type=str, - required=True, - help='Path to the exported TensorFlow model. ' - 'This can be a Google Cloud Storage path.') - - verbs = parser.add_subparsers(dest='verb') - batch_verb = verbs.add_parser('batch', help='Batch prediction') - batch_verb.add_argument( - '--inputs-dir', - type=str, - required=True, - help='Input directory where SDF data files are read from. ' - 'This can be a Google Cloud Storage path.') - batch_verb.add_argument( - '--outputs-dir', - type=str, - required=True, - help='Directory to store prediction results. ' - 'This can be a Google Cloud Storage path.') - - stream_verb = verbs.add_parser('stream', help='Streaming prediction') - stream_verb.add_argument( - '--inputs-topic', - type=str, - default='molecules-inputs', - help='PubSub topic to subscribe for molecules.') - - stream_verb.add_argument( - '--outputs-topic', - type=str, - default='molecules-predictions', - help='PubSub topic to publish predictions.') - - args, pipeline_args = parser.parse_known_args() - - beam_options = PipelineOptions(pipeline_args) - beam_options.view_as(SetupOptions).save_main_session = True - - project = beam_options.view_as(GoogleCloudOptions).project - - # [START dataflow_molecules_batch_or_stream] - if args.verb == 'batch': - data_files_pattern = os.path.join(args.inputs_dir, '*.sdf') - results_prefix = os.path.join(args.outputs_dir, 'part') - source = beam.io.Read(pubchem.ParseSDF(data_files_pattern)) - sink = beam.io.WriteToText(results_prefix) - - elif args.verb == 'stream': - if not project: - parser.print_usage() - print('error: argument --project is required for streaming') - sys.exit(1) - - beam_options.view_as(StandardOptions).streaming = True - source = beam.io.ReadFromPubSub(topic='projects/{}/topics/{}'.format( - project, args.inputs_topic)) - sink = beam.io.WriteStringsToPubSub(topic='projects/{}/topics/{}'.format( - project, args.outputs_topic)) - # [END dataflow_molecules_batch_or_stream] - - else: - parser.print_usage() - sys.exit(1) - - # [START dataflow_molecules_call_run] - run( - args.model_dir, - pubchem.SimpleFeatureExtraction(source), - sink, - beam_options) - # [END dataflow_molecules_call_run] -#!/usr/bin/env python -# -# Copyright 2018 Google Inc. All Rights Reserved. Licensed under the Apache -# License, Version 2.0 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -# This tool preprocesses and extracts features from SDF files. - -import argparse -import dill as pickle -import os -import random -import tempfile - -import pubchem - -import apache_beam as beam -import tensorflow as tf -import tensorflow_transform.beam.impl as beam_impl - -from apache_beam.io import tfrecordio -from apache_beam.options.pipeline_options import PipelineOptions -from apache_beam.options.pipeline_options import SetupOptions -from tensorflow_transform.beam.tft_beam_io import transform_fn_io -from tensorflow_transform.coders import example_proto_coder -from tensorflow_transform.tf_metadata import dataset_metadata -from tensorflow_transform.tf_metadata import dataset_schema - - -class PreprocessData(object): - def __init__( - self, - input_feature_spec, - labels, - train_files_pattern, - eval_files_pattern): - - self.labels = labels - self.input_feature_spec = input_feature_spec - self.train_files_pattern = train_files_pattern - self.eval_files_pattern = eval_files_pattern - - -def dump(obj, filename): - """ Wrapper to dump an object to a file.""" - with tf.gfile.Open(filename, 'wb') as f: - pickle.dump(obj, f, protocol=pickle.HIGHEST_PROTOCOL) - - -def load(filename): - """ Wrapper to load an object from a file.""" - with tf.gfile.Open(filename, 'rb') as f: - return pickle.load(f) - - -class ValidateInputData(beam.DoFn): - """This DoFn validates that every element matches the metadata given.""" - - def __init__(self, input_metadata): - super(ValidateInputData, self).__init__() - self.schema_keys = set(input_metadata.schema.column_schemas.keys()) - - def process(self, elem): - if not isinstance(elem, dict): - raise ValueError( - 'Element must be a dict(str, value). ' - 'Given: {} {}'.format(elem, type(elem))) - elem_keys = set(elem.keys()) - if not self.schema_keys.issubset(elem_keys): - raise ValueError( - "Element keys are missing from schema keys. " - 'Given: {}; Schema: {}'.format( - list(elem_keys), list(self.schema_keys))) - yield elem - - -def run( - input_schema, - labels, - feature_extraction, - feature_scaling=None, - eval_percent=20.0, - beam_options=None, - work_dir=None): - """Runs the whole preprocessing step. - - This runs the feature extraction PTransform, validates that the data conforms - to the schema provided, normalizes the features, and splits the dataset into - a training and evaluation dataset. - """ - - # Populate optional arguments - if not feature_scaling: - def feature_scaling(inputs): return inputs - - # Type checking - if not isinstance(labels, list): - raise ValueError( - '`labels` must be list(str). ' - 'Given: {} {}'.format(labels, type(labels))) - - if not isinstance(feature_extraction, beam.PTransform): - raise ValueError( - '`feature_extraction` must be {}. ' - 'Given: {} {}'.format(beam.PTransform, - feature_extraction, type(feature_extraction))) - - if not callable(feature_scaling): - raise ValueError( - '`feature_scaling` must be callable. ' - 'Given: {} {}'.format(feature_scaling, - type(feature_scaling))) - - if beam_options and not isinstance(beam_options, PipelineOptions): - raise ValueError( - '`beam_options` must be {}. ' - 'Given: {} {}'.format(PipelineOptions, - beam_options, type(beam_options))) - - if not work_dir: - work_dir = tempfile.mkdtemp(prefix='tensorflow-preprocessing') - - tft_temp_dir = os.path.join(work_dir, 'tft-temp') - train_dataset_dir = os.path.join(work_dir, 'train-dataset') - eval_dataset_dir = os.path.join(work_dir, 'eval-dataset') - - transform_fn_dir = os.path.join(work_dir, transform_fn_io.TRANSFORM_FN_DIR) - if tf.gfile.Exists(transform_fn_dir): - tf.gfile.DeleteRecursively(transform_fn_dir) - - input_metadata = dataset_metadata.DatasetMetadata( - dataset_schema.Schema(input_schema)) - - # [START dataflow_molecules_create_pipeline] - # Build and run a Beam Pipeline - with beam.Pipeline(options=beam_options) as p, \ - beam_impl.Context(temp_dir=tft_temp_dir): - # [END dataflow_molecules_create_pipeline] - - # [START dataflow_molecules_feature_extraction] - # Transform and validate the input data matches the input schema - dataset = ( - p - | 'Feature extraction' >> feature_extraction - # [END dataflow_molecules_feature_extraction] - # [START dataflow_molecules_validate_inputs] - | 'Validate inputs' >> beam.ParDo(ValidateInputData(input_metadata))) - # [END dataflow_molecules_validate_inputs] - - # [START dataflow_molecules_analyze_and_transform_dataset] - # Apply the tf.Transform preprocessing_fn - dataset_and_metadata, transform_fn = ( - (dataset, input_metadata) - | 'Feature scaling' >> beam_impl.AnalyzeAndTransformDataset( - feature_scaling)) - dataset, metadata = dataset_and_metadata - # [END dataflow_molecules_analyze_and_transform_dataset] - - # [START dataflow_molecules_split_to_train_and_eval_datasets] - # Split the dataset into a training set and an evaluation set - assert 0 < eval_percent < 100, 'eval_percent must in the range (0-100)' - train_dataset, eval_dataset = ( - dataset - | 'Split dataset' >> beam.Partition( - lambda elem, _: int(random.uniform(0, 100) < eval_percent), 2)) - # [END dataflow_molecules_split_to_train_and_eval_datasets] - - # [START dataflow_molecules_write_tfrecords] - # Write the datasets as TFRecords - coder = example_proto_coder.ExampleProtoCoder(metadata.schema) - - train_dataset_prefix = os.path.join(train_dataset_dir, 'part') - _ = ( - train_dataset - | 'Write train dataset' >> tfrecordio.WriteToTFRecord( - train_dataset_prefix, coder)) - - eval_dataset_prefix = os.path.join(eval_dataset_dir, 'part') - _ = ( - eval_dataset - | 'Write eval dataset' >> tfrecordio.WriteToTFRecord( - eval_dataset_prefix, coder)) - - # Write the transform_fn - _ = ( - transform_fn - | 'Write transformFn' >> transform_fn_io.WriteTransformFn(work_dir)) - # [END dataflow_molecules_write_tfrecords] - - return PreprocessData( - input_metadata.schema.as_feature_spec(), - labels, - train_dataset_prefix + '*', - eval_dataset_prefix + '*') - - -if __name__ == '__main__': - """Main function""" - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument( - '--work-dir', - type=str, - default=os.path.join( - tempfile.gettempdir(), 'cloudml-samples', 'molecules'), - help='Directory for staging and working files. ' - 'This can be a Google Cloud Storage path.') - - args, pipeline_args = parser.parse_known_args() - - beam_options = PipelineOptions(pipeline_args) - beam_options.view_as(SetupOptions).save_main_session = True - - data_files_pattern = os.path.join(args.work_dir, 'data', '*.sdf') - preprocess_data = run( - pubchem.INPUT_SCHEMA, - pubchem.LABELS, - # [START dataflow_molecules_feature_extraction_transform] - pubchem.SimpleFeatureExtraction( - beam.io.Read(pubchem.ParseSDF(data_files_pattern))), - # [END dataflow_molecules_feature_extraction_transform] - feature_scaling=pubchem.normalize_inputs, - beam_options=beam_options, - work_dir=args.work_dir) - - dump(preprocess_data, os.path.join(args.work_dir, 'PreprocessData')) -#!/usr/bin/env python -# -# Copyright 2018 Google Inc. All Rights Reserved. Licensed under the Apache -# License, Version 2.0 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -# This is a sample publisher for the streaming predictions service. - -import argparse -import os -import sys - -import pubchem - -import apache_beam as beam - -from apache_beam.options.pipeline_options import GoogleCloudOptions -from apache_beam.options.pipeline_options import PipelineOptions -from apache_beam.options.pipeline_options import SetupOptions -from apache_beam.options.pipeline_options import StandardOptions - - -if __name__ == '__main__': - """Main function""" - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument( - '--topic', - type=str, - default='molecules-inputs', - help='PubSub topic to publish molecules.') - - parser.add_argument( - '--inputs-dir', - type=str, - required=True, - help='Input directory where SDF data files are read from. ' - 'This can be a Google Cloud Storage path.') - - args, pipeline_args = parser.parse_known_args() - - beam_options = PipelineOptions(pipeline_args) - beam_options.view_as(SetupOptions).save_main_session = True - beam_options.view_as(StandardOptions).streaming = True - - project = beam_options.view_as(GoogleCloudOptions).project - if not project: - parser.print_usage() - print('error: argument --project is required') - sys.exit(1) - - data_files_pattern = os.path.join(args.inputs_dir, '*.sdf') - topic_path = 'projects/{}/topics/{}'.format(project, args.topic) - with beam.Pipeline(options=beam_options) as p: - _ = (p - | 'Read SDF files' >> beam.io.Read(pubchem.ParseSDF(data_files_pattern)) - | 'Publish molecules' >> beam.io.WriteStringsToPubSub(topic=topic_path)) -# Copyright 2018 Google Inc. All Rights Reserved. Licensed under the Apache -# License, Version 2.0 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -import setuptools - -# NOTE: Any additional file besides the `main.py` file has to be in a module -# (inside a directory) so it can be packaged and staged correctly for -# cloud runs. - -REQUIRED_PACKAGES = [ - 'apache-beam[gcp]==2.5', - 'tensorflow-transform==0.8', - 'tensorflow==1.8', -] - -setuptools.setup( - name='molecules', - version='0.0.1', - install_requires=REQUIRED_PACKAGES, - packages=setuptools.find_packages(), - include_package_data=True, - description='Cloud ML molecules sample with preprocessing', -) -#!/usr/bin/env python -# -# Copyright 2018 Google Inc. All Rights Reserved. Licensed under the Apache -# License, Version 2.0 (the "License"); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://www.apache.org/licenses/LICENSE-2.0 - -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT -# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the -# License for the specific language governing permissions and limitations under -# the License. - -# This is a sample subscriber for the streaming predictions service. - -import argparse -import json -import logging -import sys - -import apache_beam as beam - -from apache_beam.options.pipeline_options import GoogleCloudOptions -from apache_beam.options.pipeline_options import PipelineOptions -from apache_beam.options.pipeline_options import SetupOptions -from apache_beam.options.pipeline_options import StandardOptions - - -if __name__ == '__main__': - """Main function""" - parser = argparse.ArgumentParser( - formatter_class=argparse.ArgumentDefaultsHelpFormatter) - - parser.add_argument( - '--topic', - type=str, - default='molecules-predictions', - help='PubSub topic to subscribe for predictions.') - - args, pipeline_args = parser.parse_known_args() - - beam_options = PipelineOptions(pipeline_args) - beam_options.view_as(SetupOptions).save_main_session = True - beam_options.view_as(StandardOptions).streaming = True - - project = beam_options.view_as(GoogleCloudOptions).project - if not project: - parser.print_usage() - print('error: argument --project is required') - sys.exit(1) - - # We'll just log the results - logging.basicConfig(level=logging.INFO) - topic_path = 'projects/{}/topics/{}'.format(project, args.topic) - with beam.Pipeline(options=beam_options) as p: - _ = (p - | 'Read predictions' >> beam.io.ReadFromPubSub(topic=topic_path) - | 'Log' >> beam.Map(logging.info)) -# This file is for training on AI Platform with scikit-learn. - - -# [START setup] -import datetime -import os -import subprocess -import sys -import pandas as pd -from sklearn import svm -from sklearn.externals import joblib - -# Fill in your Cloud Storage bucket name -BUCKET_NAME = '' -# [END setup] - - -# [START download-data] -iris_data_filename = 'iris_data.csv' -iris_target_filename = 'iris_target.csv' -data_dir = 'gs://cloud-samples-data/ml-engine/iris' - -# gsutil outputs everything to stderr so we need to divert it to stdout. -subprocess.check_call(['gsutil', 'cp', os.path.join(data_dir, - iris_data_filename), - iris_data_filename], stderr=sys.stdout) -subprocess.check_call(['gsutil', 'cp', os.path.join(data_dir, - iris_target_filename), - iris_target_filename], stderr=sys.stdout) -# [END download-data] - - -# [START load-into-pandas] -# Load data into pandas, then use `.values` to get NumPy arrays -iris_data = pd.read_csv(iris_data_filename).values -iris_target = pd.read_csv(iris_target_filename).values - -# Convert one-column 2D array into 1D array for use with scikit-learn -iris_target = iris_target.reshape((iris_target.size,)) -# [END load-into-pandas] - - -# [START train-and-save-model] -# Train the model -classifier = svm.SVC(gamma='auto', verbose=True) -classifier.fit(iris_data, iris_target) - -# Export the classifier to a file -model_filename = 'model.joblib' -joblib.dump(classifier, model_filename) -# [END train-and-save-model] - - -# [START upload-model] -# Upload the saved model file to Cloud Storage -gcs_model_path = os.path.join('gs://', BUCKET_NAME, - datetime.datetime.now().strftime('iris_%Y%m%d_%H%M%S'), model_filename) -subprocess.check_call(['gsutil', 'cp', model_filename, gcs_model_path], - stderr=sys.stdout) -# [END upload-model] -#!/usr/bin/env python - -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -"""Checks that the development environment is configured for Cloud ML.""" - -from __future__ import print_function -import google.cloud.ml -import tensorflow as tf -import pkg_resources -import re -import subprocess -import sys - -MIN_CLOUD_ML_SDK_VERSION = '0.1.8a0' -MIN_CLOUD_SDK_VERSION = '136.0.0' -MIN_TENSORFLOW_VERSION = '0.11.0rc0' - - -def get_version_from_pip(package_name): - """Returns the version of an installed pip package.""" - try: - package_info = subprocess.check_output(['pip', 'show', package_name]) - except subprocess.CalledProcessError: - print('ERROR: Package %s has not been installed with pip.' % package_name, - file=sys.stderr) - exit(1) - for line in package_info.split('\n'): - m = re.match(r'Version: (.+)', line) - if m: - return m.group(1) - print('ERROR: Unable to parse "pip show" output: %s' % package_info, - file=sys.stderr) - exit(1) - - -def get_cloud_sdk_version(): - """Returns the version of the Cloud SDK that is installed.""" - gcloud_info = subprocess.check_output(['gcloud', 'version']) - for line in gcloud_info.split('\n'): - m = re.match(r'Google Cloud SDK (.+)', line) - if m: - return m.group(1) - print('ERROR: Unable to parse "gcloud version" output: %s' % gcloud_info, - file=sys.stderr) - exit(1) - - -def check_version_is_supported(name, version, min_version, help=''): - """Checks whether a particular version of a package is new enough.""" - if (pkg_resources.parse_version(version) < - pkg_resources.parse_version(min_version)): - # Version is too old. - print('ERROR: Unsupported %s version: %s (minimum %s).%s' % - (name, version, min_version, (' %s' % help) if help else ''), - file=sys.stderr) - exit(1) - - -# Check that TensorFlow is installed. -check_version_is_supported( - 'TensorFlow', tf.__version__, MIN_TENSORFLOW_VERSION) - -# Check that the Cloud ML SDK is installed. -check_version_is_supported( - 'Cloud ML SDK', get_version_from_pip('cloudml'), MIN_CLOUD_ML_SDK_VERSION) - -# Check that the Cloud SDK is installed, initialized, and logged in. -check_version_is_supported( - 'Cloud SDK', get_cloud_sdk_version(), MIN_CLOUD_SDK_VERSION, - help='To update the Cloud SDK, run "gcloud components update".') -project_id = subprocess.check_output( - ['gcloud', 'config', 'list', 'project', - '--format', 'value(core.project)']).rstrip() -auth_token = subprocess.check_output( - ['gcloud', 'auth', 'print-access-token']).rstrip() - -# Check that the Cloud ML API is enabled. -models = subprocess.check_output([ - 'curl', '-s', '-S', '-X', 'GET', '-H', 'Content-Type: application/json', - '-H', 'Authorization: Bearer %s' % auth_token, - 'https://ml.googleapis.com/v1beta1/projects/%s/models' % project_id]) -if '"error"' in models: - print('ERROR: Unable to list Cloud ML models: %s' % - models, file=sys.stderr) - exit(1) - -# Everything completed successfully. -print('Success! Your environment is configured correctly.') -from setuptools import setup, find_packages - -long_description = ''' -Easily train your own text-generating neural network of -any size and complexity on any text dataset with a few lines -of code, or quickly train on a text using a pretrained model. - -- A modern neural network architecture which utilizes new techniques as -attention-weighting and skip-embedding to accelerate training -and improve model quality. -- Able to train on and generate text at either the -character-level or word-level. -- Able to configure RNN size, the number of RNN layers, -and whether to use bidirectional RNNs. -- Able to train on any generic input text file, including large files. -- Able to train models on a GPU and then use them with a CPU. -- Able to utilize a powerful CuDNN implementation of RNNs -when trained on the GPU, which massively speeds up training time as -opposed to normal LSTM implementations. -- Able to train the model using contextual labels, -allowing it to learn faster and produce better results in some cases. -- Able to generate text interactively for customized stories. -''' - - -setup( - name='textgenrnn', - packages=['textgenrnn'], # this must be the same as the name above - version='1.5.0', - description='Easily train your own text-generating neural network ' \ - 'of any size and complexity', - long_description=long_description, - long_description_content_type='text/markdown', - author='Max Woolf', - author_email='max@minimaxir.com', - url='https://github.com/minimaxir/textgenrnn', - keywords=['deep learning', 'tensorflow', 'keras', 'text generation'], - classifiers=[], - license='MIT', - python_requires='>=3', - include_package_data=True, - install_requires=['keras>=2.1.5', 'h5py', 'scikit-learn', 'tqdm'] -) -from keras.engine import InputSpec, Layer -from keras import backend as K -from keras import initializers - - -class AttentionWeightedAverage(Layer): - """ - Computes a weighted average of the different channels across timesteps. - Uses 1 parameter pr. channel to compute the attention value for - a single timestep. - """ - - def __init__(self, return_attention=False, **kwargs): - self.init = initializers.get('uniform') - self.supports_masking = True - self.return_attention = return_attention - super(AttentionWeightedAverage, self).__init__(** kwargs) - - def build(self, input_shape): - self.input_spec = [InputSpec(ndim=3)] - assert len(input_shape) == 3 - - self.W = self.add_weight(shape=(input_shape[2], 1), - name='{}_W'.format(self.name), - initializer=self.init) - self.trainable_weights = [self.W] - super(AttentionWeightedAverage, self).build(input_shape) - - def call(self, x, mask=None): - # computes a probability distribution over the timesteps - # uses 'max trick' for numerical stability - # reshape is done to avoid issue with Tensorflow - # and 1-dimensional weights - logits = K.dot(x, self.W) - x_shape = K.shape(x) - logits = K.reshape(logits, (x_shape[0], x_shape[1])) - ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) - - # masked timesteps have zero weight - if mask is not None: - mask = K.cast(mask, K.floatx()) - ai = ai * mask - att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) - weighted_input = x * K.expand_dims(att_weights) - result = K.sum(weighted_input, axis=1) - if self.return_attention: - return [result, att_weights] - return result - - def get_output_shape_for(self, input_shape): - return self.compute_output_shape(input_shape) - - def compute_output_shape(self, input_shape): - output_len = input_shape[2] - if self.return_attention: - return [(input_shape[0], output_len), (input_shape[0], - input_shape[1])] - return (input_shape[0], output_len) - - def compute_mask(self, input, input_mask=None): - if isinstance(input_mask, list): - return [None] * len(input_mask) - else: - return None -from .textgenrnn import textgenrnn -from keras.optimizers import RMSprop -from keras.layers import Input, Embedding, Dense, LSTM, Bidirectional -from keras.layers import concatenate, Reshape, SpatialDropout1D -from keras.models import Model -from keras import backend as K -from .AttentionWeightedAverage import AttentionWeightedAverage - - -def textgenrnn_model(num_classes, cfg, context_size=None, - weights_path=None, - dropout=0.0, - optimizer=RMSprop(lr=4e-3, rho=0.99)): - ''' - Builds the model architecture for textgenrnn and - loads the specified weights for the model. - ''' - - input = Input(shape=(cfg['max_length'],), name='input') - embedded = Embedding(num_classes, cfg['dim_embeddings'], - input_length=cfg['max_length'], - name='embedding')(input) - - if dropout > 0.0: - embedded = SpatialDropout1D(dropout, name='dropout')(embedded) - - rnn_layer_list = [] - for i in range(cfg['rnn_layers']): - prev_layer = embedded if i is 0 else rnn_layer_list[-1] - rnn_layer_list.append(new_rnn(cfg, i+1)(prev_layer)) - - seq_concat = concatenate([embedded] + rnn_layer_list, name='rnn_concat') - attention = AttentionWeightedAverage(name='attention')(seq_concat) - output = Dense(num_classes, name='output', activation='softmax')(attention) - - if context_size is None: - model = Model(inputs=[input], outputs=[output]) - if weights_path is not None: - model.load_weights(weights_path, by_name=True) - model.compile(loss='categorical_crossentropy', optimizer=optimizer) - - else: - context_input = Input( - shape=(context_size,), name='context_input') - context_reshape = Reshape((context_size,), - name='context_reshape')(context_input) - merged = concatenate([attention, context_reshape], name='concat') - main_output = Dense(num_classes, name='context_output', - activation='softmax')(merged) - - model = Model(inputs=[input, context_input], - outputs=[main_output, output]) - if weights_path is not None: - model.load_weights(weights_path, by_name=True) - model.compile(loss='categorical_crossentropy', optimizer=optimizer, - loss_weights=[0.8, 0.2]) - - return model - - -''' -Create a new LSTM layer per parameters. Unfortunately, -each combination of parameters must be hardcoded. - -The normal LSTMs use sigmoid recurrent activations -for parity with CuDNNLSTM: -https://github.com/keras-team/keras/issues/8860 -''' - - -def new_rnn(cfg, layer_num): - use_cudnnlstm = K.backend() == 'tensorflow' and len( - K.tensorflow_backend._get_available_gpus()) > 0 - if use_cudnnlstm: - from keras.layers import CuDNNLSTM - if cfg['rnn_bidirectional']: - return Bidirectional(CuDNNLSTM(cfg['rnn_size'], - return_sequences=True), - name='rnn_{}'.format(layer_num)) - - return CuDNNLSTM(cfg['rnn_size'], - return_sequences=True, - name='rnn_{}'.format(layer_num)) - else: - if cfg['rnn_bidirectional']: - return Bidirectional(LSTM(cfg['rnn_size'], - return_sequences=True, - recurrent_activation='sigmoid'), - name='rnn_{}'.format(layer_num)) - - return LSTM(cfg['rnn_size'], - return_sequences=True, - recurrent_activation='sigmoid', - name='rnn_{}'.format(layer_num)) -from keras.callbacks import LearningRateScheduler, Callback -from keras.models import Model, load_model -from keras.preprocessing import sequence -from keras.preprocessing.text import Tokenizer, text_to_word_sequence -from keras.utils import Sequence -from keras import backend as K -from .utils import textgenrnn_encode_cat -import numpy as np - - -def generate_sequences_from_texts(texts, indices_list, - textgenrnn, context_labels, - batch_size=128): - is_words = textgenrnn.config['word_level'] - is_single = textgenrnn.config['single_text'] - max_length = textgenrnn.config['max_length'] - meta_token = textgenrnn.META_TOKEN - - if is_words: - new_tokenizer = Tokenizer(filters='', char_level=True) - new_tokenizer.word_index = textgenrnn.vocab - else: - new_tokenizer = textgenrnn.tokenizer - - while True: - np.random.shuffle(indices_list) - - X_batch = [] - Y_batch = [] - context_batch = [] - count_batch = 0 - - for row in range(indices_list.shape[0]): - text_index = indices_list[row, 0] - end_index = indices_list[row, 1] - - text = texts[text_index] - - if not is_single: - text = [meta_token] + list(text) + [meta_token] - - if end_index > max_length: - x = text[end_index - max_length: end_index + 1] - else: - x = text[0: end_index + 1] - y = text[end_index + 1] - - if y in textgenrnn.vocab: - x = process_sequence([x], textgenrnn, new_tokenizer) - y = textgenrnn_encode_cat([y], textgenrnn.vocab) - - X_batch.append(x) - Y_batch.append(y) - - if context_labels is not None: - context_batch.append(context_labels[text_index]) - - count_batch += 1 - - if count_batch % batch_size == 0: - X_batch = np.squeeze(np.array(X_batch)) - Y_batch = np.squeeze(np.array(Y_batch)) - context_batch = np.squeeze(np.array(context_batch)) - - # print(X_batch.shape) - - if context_labels is not None: - yield ([X_batch, context_batch], [Y_batch, Y_batch]) - else: - yield (X_batch, Y_batch) - X_batch = [] - Y_batch = [] - context_batch = [] - count_batch = 0 - - -def process_sequence(X, textgenrnn, new_tokenizer): - X = new_tokenizer.texts_to_sequences(X) - X = sequence.pad_sequences( - X, maxlen=textgenrnn.config['max_length']) - - return X -from keras.callbacks import LearningRateScheduler, Callback -from keras.models import Model, load_model -from keras.preprocessing import sequence -from keras.preprocessing.text import Tokenizer, text_to_word_sequence -from keras.utils import multi_gpu_model -from keras.optimizers import RMSprop -from keras import backend as K -from sklearn.preprocessing import LabelBinarizer -from sklearn.decomposition import PCA -from sklearn.manifold import TSNE -from sklearn.metrics.pairwise import cosine_similarity -import numpy as np -import json -import h5py -from pkg_resources import resource_filename -from .model import textgenrnn_model -from .model_training import * -from .utils import * -import csv -import re - - -class textgenrnn: - META_TOKEN = '' - config = { - 'rnn_layers': 2, - 'rnn_size': 128, - 'rnn_bidirectional': False, - 'max_length': 40, - 'max_words': 10000, - 'dim_embeddings': 100, - 'word_level': False, - 'single_text': False - } - default_config = config.copy() - - def __init__(self, weights_path=None, - vocab_path=None, - config_path=None, - name="textgenrnn"): - - if weights_path is None: - weights_path = resource_filename(__name__, - 'textgenrnn_weights.hdf5') - - if vocab_path is None: - vocab_path = resource_filename(__name__, - 'textgenrnn_vocab.json') - - if config_path is not None: - with open(config_path, 'r', - encoding='utf8', errors='ignore') as json_file: - self.config = json.load(json_file) - - self.config.update({'name': name}) - self.default_config.update({'name': name}) - - with open(vocab_path, 'r', - encoding='utf8', errors='ignore') as json_file: - self.vocab = json.load(json_file) - - self.tokenizer = Tokenizer(filters='', lower=False, char_level=True) - self.tokenizer.word_index = self.vocab - self.num_classes = len(self.vocab) + 1 - self.model = textgenrnn_model(self.num_classes, - cfg=self.config, - weights_path=weights_path) - self.indices_char = dict((self.vocab[c], c) for c in self.vocab) - - def generate(self, n=1, return_as_list=False, prefix=None, - temperature=[1.0, 0.5, 0.2, 0.2], - max_gen_length=300, interactive=False, - top_n=3, progress=True): - gen_texts = [] - iterable = trange(n) if progress and n > 1 else range(n) - for _ in iterable: - gen_text, _ = textgenrnn_generate(self.model, - self.vocab, - self.indices_char, - temperature, - self.config['max_length'], - self.META_TOKEN, - self.config['word_level'], - self.config.get( - 'single_text', False), - max_gen_length, - interactive, - top_n, - prefix) - if not return_as_list: - print("{}\n".format(gen_text)) - gen_texts.append(gen_text) - if return_as_list: - return gen_texts - - def generate_samples(self, n=3, temperatures=[0.2, 0.5, 1.0], **kwargs): - for temperature in temperatures: - print('#'*20 + '\nTemperature: {}\n'.format(temperature) + - '#'*20) - self.generate(n, temperature=temperature, progress=False, **kwargs) - - def train_on_texts(self, texts, context_labels=None, - batch_size=128, - num_epochs=50, - verbose=1, - new_model=False, - gen_epochs=1, - train_size=1.0, - max_gen_length=300, - validation=True, - dropout=0.0, - via_new_model=False, - save_epochs=0, - multi_gpu=False, - **kwargs): - - if new_model and not via_new_model: - self.train_new_model(texts, - context_labels=context_labels, - num_epochs=num_epochs, - gen_epochs=gen_epochs, - train_size=train_size, - batch_size=batch_size, - dropout=dropout, - validation=validation, - save_epochs=save_epochs, - multi_gpu=multi_gpu, - **kwargs) - return - - if context_labels: - context_labels = LabelBinarizer().fit_transform(context_labels) - - if 'prop_keep' in kwargs: - train_size = prop_keep - - if self.config['word_level']: - texts = [text_to_word_sequence(text, filters='') for text in texts] - - # calculate all combinations of text indices + token indices - indices_list = [np.meshgrid(np.array(i), np.arange( - len(text) + 1)) for i, text in enumerate(texts)] - indices_list = np.block(indices_list) - - # If a single text, there will be 2 extra indices, so remove them - # Also remove first sequences which use padding - if self.config['single_text']: - indices_list = indices_list[self.config['max_length']:-2, :] - - indices_mask = np.random.rand(indices_list.shape[0]) < train_size - - if multi_gpu: - num_gpus = len(K.tensorflow_backend._get_available_gpus()) - batch_size = batch_size * num_gpus - - gen_val = None - val_steps = None - if train_size < 1.0 and validation: - indices_list_val = indices_list[~indices_mask, :] - gen_val = generate_sequences_from_texts( - texts, indices_list_val, self, context_labels, batch_size) - val_steps = max( - int(np.floor(indices_list_val.shape[0] / batch_size)), 1) - - indices_list = indices_list[indices_mask, :] - - num_tokens = indices_list.shape[0] - assert num_tokens >= batch_size, "Fewer tokens than batch_size." - - level = 'word' if self.config['word_level'] else 'character' - print("Training on {:,} {} sequences.".format(num_tokens, level)) - - steps_per_epoch = max(int(np.floor(num_tokens / batch_size)), 1) - - gen = generate_sequences_from_texts( - texts, indices_list, self, context_labels, batch_size) - - base_lr = 4e-3 - - # scheduler function must be defined inline. - def lr_linear_decay(epoch): - return (base_lr * (1 - (epoch / num_epochs))) - - if context_labels is not None: - if new_model: - weights_path = None - else: - weights_path = "{}_weights.hdf5".format(self.config['name']) - self.save(weights_path) - - self.model = textgenrnn_model(self.num_classes, - dropout=dropout, - cfg=self.config, - context_size=context_labels.shape[1], - weights_path=weights_path) - - model_t = self.model - - if multi_gpu: - # Do not locate model/merge on CPU since sample sizes are small. - parallel_model = multi_gpu_model(self.model, - gpus=num_gpus, - cpu_merge=False) - parallel_model.compile(loss='categorical_crossentropy', - optimizer=RMSprop(lr=4e-3, rho=0.99)) - - model_t = parallel_model - print("Training on {} GPUs.".format(num_gpus)) - - model_t.fit_generator(gen, steps_per_epoch=steps_per_epoch, - epochs=num_epochs, - callbacks=[ - LearningRateScheduler( - lr_linear_decay), - generate_after_epoch( - self, gen_epochs, - max_gen_length), - save_model_weights( - self, num_epochs, - save_epochs)], - verbose=verbose, - max_queue_size=10, - validation_data=gen_val, - validation_steps=val_steps - ) - - # Keep the text-only version of the model if using context labels - if context_labels is not None: - self.model = Model(inputs=self.model.input[0], - outputs=self.model.output[1]) - - def train_new_model(self, texts, context_labels=None, num_epochs=50, - gen_epochs=1, batch_size=128, dropout=0.0, - train_size=1.0, - validation=True, save_epochs=0, - multi_gpu=False, **kwargs): - self.config = self.default_config.copy() - self.config.update(**kwargs) - - print("Training new model w/ {}-layer, {}-cell {}LSTMs".format( - self.config['rnn_layers'], self.config['rnn_size'], - 'Bidirectional ' if self.config['rnn_bidirectional'] else '' - )) - - # If training word level, must add spaces around each punctuation. - # https://stackoverflow.com/a/3645946/9314418 - - if self.config['word_level']: - punct = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~\\n\\t\'‘’“”’–—' - for i in range(len(texts)): - texts[i] = re.sub('([{}])'.format(punct), r' \1 ', texts[i]) - texts[i] = re.sub(' {2,}', ' ', texts[i]) - - # Create text vocabulary for new texts - # if word-level, lowercase; if char-level, uppercase - self.tokenizer = Tokenizer(filters='', - lower=self.config['word_level'], - char_level=(not self.config['word_level'])) - self.tokenizer.fit_on_texts(texts) - - # Limit vocab to max_words - max_words = self.config['max_words'] - self.tokenizer.word_index = {k: v for ( - k, v) in self.tokenizer.word_index.items() if v <= max_words} - - if not self.config.get('single_text', False): - self.tokenizer.word_index[self.META_TOKEN] = len( - self.tokenizer.word_index) + 1 - self.vocab = self.tokenizer.word_index - self.num_classes = len(self.vocab) + 1 - self.indices_char = dict((self.vocab[c], c) for c in self.vocab) - - # Create a new, blank model w/ given params - self.model = textgenrnn_model(self.num_classes, - dropout=dropout, - cfg=self.config) - - # Save the files needed to recreate the model - with open('{}_vocab.json'.format(self.config['name']), - 'w', encoding='utf8') as outfile: - json.dump(self.tokenizer.word_index, outfile, ensure_ascii=False) - - with open('{}_config.json'.format(self.config['name']), - 'w', encoding='utf8') as outfile: - json.dump(self.config, outfile, ensure_ascii=False) - - self.train_on_texts(texts, new_model=True, - via_new_model=True, - context_labels=context_labels, - num_epochs=num_epochs, - gen_epochs=gen_epochs, - train_size=train_size, - batch_size=batch_size, - dropout=dropout, - validation=validation, - save_epochs=save_epochs, - multi_gpu=multi_gpu, - **kwargs) - - def save(self, weights_path="textgenrnn_weights_saved.hdf5"): - self.model.save_weights(weights_path) - - def load(self, weights_path): - self.model = textgenrnn_model(self.num_classes, - cfg=self.config, - weights_path=weights_path) - - def reset(self): - self.config = self.default_config.copy() - self.__init__(name=self.config['name']) - - def train_from_file(self, file_path, header=True, delim="\n", - new_model=False, context=None, - is_csv=False, **kwargs): - - context_labels = None - if context: - texts, context_labels = textgenrnn_texts_from_file_context( - file_path) - else: - texts = textgenrnn_texts_from_file(file_path, header, - delim, is_csv) - - print("{:,} texts collected.".format(len(texts))) - if new_model: - self.train_new_model( - texts, context_labels=context_labels, **kwargs) - else: - self.train_on_texts(texts, context_labels=context_labels, **kwargs) - - def train_from_largetext_file(self, file_path, new_model=True, **kwargs): - with open(file_path, 'r', encoding='utf8', errors='ignore') as f: - texts = [f.read()] - - if new_model: - self.train_new_model( - texts, single_text=True, **kwargs) - else: - self.train_on_texts(texts, single_text=True, **kwargs) - - def generate_to_file(self, destination_path, **kwargs): - texts = self.generate(return_as_list=True, **kwargs) - with open(destination_path, 'w') as f: - for text in texts: - f.write("{}\n".format(text)) - - def encode_text_vectors(self, texts, pca_dims=50, tsne_dims=None, - tsne_seed=None, return_pca=False, - return_tsne=False): - - # if a single text, force it into a list: - if isinstance(texts, str): - texts = [texts] - - vector_output = Model(inputs=self.model.input, - outputs=self.model.get_layer('attention').output) - encoded_vectors = [] - maxlen = self.config['max_length'] - for text in texts: - if self.config['word_level']: - text = text_to_word_sequence(text, filters='') - text_aug = [self.META_TOKEN] + list(text[0:maxlen]) - encoded_text = textgenrnn_encode_sequence(text_aug, self.vocab, - maxlen) - encoded_vector = vector_output.predict(encoded_text) - encoded_vectors.append(encoded_vector) - - encoded_vectors = np.squeeze(np.array(encoded_vectors), axis=1) - if pca_dims is not None: - assert len(texts) > 1, "Must use more than 1 text for PCA" - pca = PCA(pca_dims) - encoded_vectors = pca.fit_transform(encoded_vectors) - - if tsne_dims is not None: - tsne = TSNE(tsne_dims, random_state=tsne_seed) - encoded_vectors = tsne.fit_transform(encoded_vectors) - - return_objects = encoded_vectors - if return_pca or return_tsne: - return_objects = [return_objects] - if return_pca: - return_objects.append(pca) - if return_tsne: - return_objects.append(tsne) - - return return_objects - - def similarity(self, text, texts, use_pca=True): - text_encoded = self.encode_text_vectors(text, pca_dims=None) - if use_pca: - texts_encoded, pca = self.encode_text_vectors(texts, - return_pca=True) - text_encoded = pca.transform(text_encoded) - else: - texts_encoded = self.encode_text_vectors(texts, pca_dims=None) - - cos_similairity = cosine_similarity(text_encoded, texts_encoded)[0] - text_sim_pairs = list(zip(texts, cos_similairity)) - text_sim_pairs = sorted(text_sim_pairs, key=lambda x: -x[1]) - return text_sim_pairs -from keras.callbacks import LearningRateScheduler, Callback -from keras.models import Model, load_model -from keras.preprocessing import sequence -from keras.preprocessing.text import Tokenizer, text_to_word_sequence -from keras import backend as K -from sklearn.preprocessing import LabelBinarizer -from random import shuffle -from tqdm import trange -import numpy as np -import json -import h5py -import csv -import re - - -def textgenrnn_sample(preds, temperature, interactive=False, top_n=3): - ''' - Samples predicted probabilities of the next character to allow - for the network to show "creativity." - ''' - - preds = np.asarray(preds).astype('float64') - - if temperature is None or temperature == 0.0: - return np.argmax(preds) - - preds = np.log(preds + K.epsilon()) / temperature - exp_preds = np.exp(preds) - preds = exp_preds / np.sum(exp_preds) - probas = np.random.multinomial(1, preds, 1) - - if not interactive: - index = np.argmax(probas) - - # prevent function from being able to choose 0 (placeholder) - # choose 2nd best index from preds - if index == 0: - index = np.argsort(preds)[-2] - else: - # return list of top N chars/words - # descending order, based on probability - index = (-preds).argsort()[:top_n] - - return index - - -def textgenrnn_generate(model, vocab, - indices_char, temperature=0.5, - maxlen=40, meta_token='', - word_level=False, - single_text=False, - max_gen_length=300, - interactive=False, - top_n=3, - prefix=None, - synthesize=False, - stop_tokens=[' ', '\n']): - ''' - Generates and returns a single text. - ''' - - collapse_char = ' ' if word_level else '' - end = False - - # If generating word level, must add spaces around each punctuation. - # https://stackoverflow.com/a/3645946/9314418 - if word_level and prefix: - punct = '!"#$%&()*+,-./:;<=>?@[\]^_`{|}~\\n\\t\'‘’“”’–—' - prefix = re.sub('([{}])'.format(punct), r' \1 ', prefix) - prefix_t = [x.lower() for x in prefix.split()] - - if not word_level and prefix: - prefix_t = list(prefix) - - if single_text: - text = prefix_t if prefix else [''] - max_gen_length += maxlen - else: - text = [meta_token] + prefix_t if prefix else [meta_token] - - next_char = '' - - if not isinstance(temperature, list): - temperature = [temperature] - - if len(model.inputs) > 1: - model = Model(inputs=model.inputs[0], outputs=model.outputs[1]) - - while not end and len(text) < max_gen_length: - encoded_text = textgenrnn_encode_sequence(text[-maxlen:], - vocab, maxlen) - next_temperature = temperature[(len(text) - 1) % len(temperature)] - - if not interactive: - # auto-generate text without user intervention - next_index = textgenrnn_sample( - model.predict(encoded_text, batch_size=1)[0], - next_temperature) - next_char = indices_char[next_index] - text += [next_char] - if next_char == meta_token or len(text) >= max_gen_length: - end = True - gen_break = (next_char in stop_tokens or word_level or - len(stop_tokens) == 0) - if synthesize and gen_break: - break - else: - # ask user what the next char/word should be - options_index = textgenrnn_sample( - model.predict(encoded_text, batch_size=1)[0], - next_temperature, - interactive=interactive, - top_n=top_n - ) - options = [indices_char[idx] for idx in options_index] - print('Controls:\n\ts: stop.\tx: backspace.\to: write your own.') - print('\nOptions:') - - for i, option in enumerate(options, 1): - print('\t{}: {}'.format(i, option)) - - print('\nProgress: {}'.format(collapse_char.join(text)[3:])) - print('\nYour choice?') - user_input = input('> ') - - try: - user_input = int(user_input) - next_char = options[user_input-1] - text += [next_char] - except ValueError: - if user_input == 's': - next_char = '' - text += [next_char] - elif user_input == 'o': - other = input('> ') - text += [other] - elif user_input == 'x': - try: - del text[-1] - except IndexError: - pass - else: - print('That\'s not an option!') - - # if single text, ignore sequences generated w/ padding - # if not single text, remove the meta_tokens - if single_text: - text = text[maxlen:] - else: - text = text[1:] - if meta_token in text: - text.remove(meta_token) - - text_joined = collapse_char.join(text) - - # If word level, remove spaces around punctuation for cleanliness. - if word_level: - # left_punct = "!%),.:;?@]_}\\n\\t'" - # right_punct = "$([_\\n\\t'" - punct = '\\n\\t' - text_joined = re.sub(" ([{}]) ".format(punct), r'\1', text_joined) - # text_joined = re.sub(" ([{}])".format( - # left_punct), r'\1', text_joined) - # text_joined = re.sub("([{}]) ".format( - # right_punct), r'\1', text_joined) - - return text_joined, end - - -def textgenrnn_encode_sequence(text, vocab, maxlen): - ''' - Encodes a text into the corresponding encoding for prediction with - the model. - ''' - - encoded = np.array([vocab.get(x, 0) for x in text]) - return sequence.pad_sequences([encoded], maxlen=maxlen) - - -def textgenrnn_texts_from_file(file_path, header=True, - delim='\n', is_csv=False): - ''' - Retrieves texts from a newline-delimited file and returns as a list. - ''' - - with open(file_path, 'r', encoding='utf8', errors='ignore') as f: - if header: - f.readline() - if is_csv: - texts = [] - reader = csv.reader(f) - for row in reader: - texts.append(row[0]) - else: - texts = [line.rstrip(delim) for line in f] - - return texts - - -def textgenrnn_texts_from_file_context(file_path, header=True): - ''' - Retrieves texts+context from a two-column CSV. - ''' - - with open(file_path, 'r', encoding='utf8', errors='ignore') as f: - if header: - f.readline() - texts = [] - context_labels = [] - reader = csv.reader(f) - for row in reader: - texts.append(row[0]) - context_labels.append(row[1]) - - return (texts, context_labels) - - -def textgenrnn_encode_cat(chars, vocab): - ''' - One-hot encodes values at given chars efficiently by preallocating - a zeros matrix. - ''' - - a = np.float32(np.zeros((len(chars), len(vocab) + 1))) - rows, cols = zip(*[(i, vocab.get(char, 0)) - for i, char in enumerate(chars)]) - a[rows, cols] = 1 - return a - - -def synthesize(textgens, n=1, return_as_list=False, prefix='', - temperature=[0.5, 0.2, 0.2], max_gen_length=300, - progress=True, stop_tokens=[' ', '\n']): - """Synthesizes texts using an ensemble of input models. - """ - - gen_texts = [] - iterable = trange(n) if progress and n > 1 else range(n) - for _ in iterable: - shuffle(textgens) - gen_text = prefix - end = False - textgen_i = 0 - while not end: - textgen = textgens[textgen_i % len(textgens)] - gen_text, end = textgenrnn_generate(textgen.model, - textgen.vocab, - textgen.indices_char, - temperature, - textgen.config['max_length'], - textgen.META_TOKEN, - textgen.config['word_level'], - textgen.config.get( - 'single_text', False), - max_gen_length, - prefix=gen_text, - synthesize=True, - stop_tokens=stop_tokens) - textgen_i += 1 - if not return_as_list: - print("{}\n".format(gen_text)) - gen_texts.append(gen_text) - if return_as_list: - return gen_texts - - -def synthesize_to_file(textgens, destination_path, **kwargs): - texts = synthesize(textgens, return_as_list=True, **kwargs) - with open(destination_path, 'w') as f: - for text in texts: - f.write("{}\n".format(text)) - - -class generate_after_epoch(Callback): - def __init__(self, textgenrnn, gen_epochs, max_gen_length): - self.textgenrnn = textgenrnn - self.gen_epochs = gen_epochs - self.max_gen_length = max_gen_length - - def on_epoch_end(self, epoch, logs={}): - if self.gen_epochs > 0 and (epoch+1) % self.gen_epochs == 0: - self.textgenrnn.generate_samples( - max_gen_length=self.max_gen_length) - - -class save_model_weights(Callback): - def __init__(self, textgenrnn, num_epochs, save_epochs): - self.textgenrnn = textgenrnn - self.weights_name = textgenrnn.config['name'] - self.num_epochs = num_epochs - self.save_epochs = save_epochs - - def on_epoch_end(self, epoch, logs={}): - if len(self.textgenrnn.model.inputs) > 1: - self.textgenrnn.model = Model(inputs=self.model.input[0], - outputs=self.model.output[1]) - if self.save_epochs > 0 and (epoch+1) % self.save_epochs == 0 and self.num_epochs != (epoch+1): - print("Saving Model Weights — Epoch #{}".format(epoch+1)) - self.textgenrnn.model.save_weights( - "{}_weights_epoch_{}.hdf5".format(self.weights_name, epoch+1)) - else: - self.textgenrnn.model.save_weights( - "{}_weights.hdf5".format(self.weights_name)) -#!/usr/bin/env python3 -""" -fg - -CLI for training and interfacing with face generating models. - -""" - -import argparse -import sys -import types - - -# ---- Available commands - -def train(): - """ - Command to train a model. - """ - - parser = argparse.ArgumentParser( - description="Trains a model using the Radboud Face Database", - usage="faces []", - ) - parser.add_argument( - 'data', type=str, help="Directory where RaFD data lives.") - parser.add_argument('-o', '--output', type=str, - default='output', help="Directory to output results to.") - - parser.add_argument('-m', '--model', type=str, default='', - help="The model to load. If none specified, a new model will be made instead.") - parser.add_argument('-b', '--batch-size', type=int, - default=16, help="Batch size to use while training.") - parser.add_argument('-e', '--num-epochs', type=int, - default=100, help="The number of epochs to train.") - parser.add_argument('-opt', '--optimizer', type=str, default='adam', - help="Optimizer to use, must be a valid optimizer included in Keras.") - parser.add_argument('-d', '--deconv-layers', type=int, default=5, - help="The number of deconvolution layers to include in the model.") - parser.add_argument('-k', '--kernels-per-layer', type=int, nargs='+', - help="The number of kernels to include in each layer.") - - parser.add_argument('-v', '--visualize', action='store_true', - help="Output intermediate results after each epoch.") - parser.add_argument('--use-yalefaces', action='store_true', - help="Use YaleFaces data instead of RaFD") - parser.add_argument('--use-jaffe', action='store_true', - help="Use JAFFE data instead of RaFD") - - args = parser.parse_args(sys.argv[2:]) - - import faces.train - - if args.deconv_layers > 6: - print("Warning: Having more than 6 deconv layers will create images " - "larger than the original data! (and may not fit in memory)") - - faces.train.train_model(args.data, args.output, args.model, - batch_size=args.batch_size, - num_epochs=args.num_epochs, - optimizer=args.optimizer, - deconv_layers=args.deconv_layers, - kernels_per_layer=args.kernels_per_layer, - generate_intermediate=args.visualize, - use_yale=args.use_yalefaces, - use_jaffe=args.use_jaffe, - verbose=True, - ) - - -def generate(): - """ - Command to generate faces with a trained model. - """ - - parser = argparse.ArgumentParser( - description="Generate faces using a trained model.", - usage="faces []", - ) - parser.add_argument('-m', '--model', type=str, - required=True, help="Model definition file to use.") - parser.add_argument('-o', '--output', type=str, - required=True, help="Directory to output results to.") - parser.add_argument('-f', '--gen-file', type=str, required=True, - help="YAML file that specifies the parameters to generate.") - parser.add_argument('-b', '--batch_size', type=int, default=64, - help="Batch size to use while generating images.") - parser.add_argument('-ext', '--extension', type=str, default='jpg', - help="Image file extension to use when saving images.") - - args = parser.parse_args(sys.argv[2:]) - - import faces.generate - - faces.generate.generate_from_yaml(args.gen_file, args.model, args.output, - batch_size=args.batch_size, extension=args.extension) - - -# ---- Command-line invocation - -if __name__ == '__main__': - - # Use all functions defined in this file as possible commands to run - cmd_fns = [x for x in locals().values( - ) if isinstance(x, types.FunctionType)] - cmd_names = sorted([fn.__name__ for fn in cmd_fns]) - cmd_dict = {fn.__name__: fn for fn in cmd_fns} - - parser = argparse.ArgumentParser( - description="Generate faces using a deconvolution network.", - usage="faces []" - ) - parser.add_argument( - 'command', type=str, help="Command to run. Available commands: {}.".format(cmd_names)) - - args = parser.parse_args([sys.argv[1]]) - - cmd = None - try: - cmd = cmd_dict[args.command] - except KeyError: - sys.stderr.write('\033[91m') - sys.stderr.write("\nInvalid command {}!\n\n".format(args.command)) - sys.stderr.write('\033[0m') - sys.stderr.flush() - - parser.print_help() - - if cmd is not None: - cmd() -""" -faces/__init__.py - -Initialize the facegen package. - -""" -""" -faces/generate.py - -Methods for generating faces. - -""" - -import os -import yaml - -import numpy as np -from scipy import interpolate -import scipy.misc -from tqdm import tqdm - -from .instance import Emotion, NUM_YALE_POSES - - -class GenParser: - """ - Class to parse and create inputs based on the parameters in a yaml file. - """ - - # Default parameters to use - DefaultParams = { - 'mode': 'single', - 'constrained': True, - 'id': None, - 'em': None, - 'or': None, - 'ps': None, - 'lt': None, - 'id_scale': 1.0, - 'id_step': 0.1, - 'id_min': None, - 'id_max': None, - 'em_scale': 1.0, - 'em_step': 0.1, - 'em_min': None, - 'em_max': None, - 'or_scale': 1.0, - 'or_step': 0.1, - 'or_min': None, - 'or_max': None, - 'ps_scale': 1.0, - 'ps_step': 0.1, - 'ps_min': None, - 'ps_max': None, - 'lt_scale': 1.0, - 'lt_step': 0.1, - 'lt_min': None, - 'lt_max': None, - 'num_images': '1s', - 'fps': 30, - 'keyframes': None, - } - - def __init__(self, yaml_path): - self.yaml_file = open(yaml_path, 'r') - - self.modes = { - 'single': self.mode_single, - 'random': self.mode_random, - 'drunk': self.mode_drunk, - 'interpolate': self.mode_interpolate, - } - - def __del__(self): - self.yaml_file.close() - - # Methods for generating inputs by mode - - def mode_single(self, params): - """ - Generate network inputs for a single image. - """ - - if params['id'] is None: - params['id'] = 0 - if params['em'] is None: - params['em'] = 'neutral' - if params['or'] is None: - params['or'] = 0 - if params['ps'] is None: - params['ps'] = 0 - if params['lt'] is None: - params['lt'] = 0 - - if params['dataset'] == 'YALE': - inputs = { - 'identity': np.empty((1, params['num_ids'])), - 'pose': np.empty((1, NUM_YALE_POSES)), - 'lighting': np.empty((1, 4)), - } - inputs['identity'][0, :] = self.identity_vector( - params['id'], params) - inputs['pose'][0, :] = self.pose_vector(params['ps'], params) - inputs['lighting'][0, :] = self.lighting_vector( - params['lt'], params) - else: - inputs = { - 'identity': np.empty((1, params['num_ids'])), - 'emotion': np.empty((1, Emotion.length())), - 'orientation': np.empty((1, 2)), - } - - inputs['identity'][0, :] = self.identity_vector( - params['id'], params) - inputs['emotion'][0, :] = self.emotion_vector(params['em'], params) - inputs['orientation'][0, :] = self.orientation_vector( - params['or'], params) - - return inputs - - def mode_random(self, params): - """ - Generate random network inputs. - """ - - num_images = self.num_frames(params['num_images'], params) - - if params['dataset'] == 'YALE': - inputs = { - 'identity': np.empty((num_images, params['num_ids'])), - 'pose': np.empty((num_images, NUM_YALE_POSES)), - 'lighting': np.empty((num_images, 4)), - } - else: - inputs = { - 'identity': np.empty((num_images, params['num_ids'])), - 'emotion': np.empty((num_images, Emotion.length())), - 'orientation': np.empty((num_images, 2)), - } - - for i in range(0, num_images): - if params['id'] is None: - inputs['identity'][i, :] = self.random_identity(params) - else: - inputs['identity'][i, :] = self.identity_vector( - params['id'], params) - - if params['dataset'] == "YALE": - if params['ps'] is None: - inputs['pose'][i, :] = self.random_pose(params) - else: - inputs['pose'][i, :] = self.pose_vector( - params['ps'], params) - - if params['lt'] is None: - inputs['lighting'][i, :], _ = self.random_lighting(params) - else: - inputs['lighting'][i, :] = self.lighting_vector( - params['lt'], params) - else: - if params['em'] is None: - inputs['emotion'][i, :] = self.random_emotion(params) - else: - inputs['emotion'][i, :] = self.emotion_vector( - params['em'], params) - - if params['or'] is None: - inputs['orientation'][i, - :], _ = self.random_orientation(params) - else: - inputs['orientation'][i, :] = self.orientation_vector( - params['or'], params) - - return inputs - - def mode_drunk(self, params): - """ - Generate "drunk" network inputs, random vectors created by randomly - shifting the last vector. - """ - - num_images = self.num_frames(params['num_images'], params) - - if params['dataset'] == "YALE": - inputs = { - 'identity': np.empty((num_images, params['num_ids'])), - 'pose': np.empty((num_images, NUM_YALE_POSES)), - 'lighting': np.empty((num_images, 4)), - } - else: - inputs = { - 'identity': np.empty((num_images, params['num_ids'])), - 'emotion': np.empty((num_images, Emotion.length())), - 'orientation': np.empty((num_images, 2)), - } - - last_id, last_em, last_or, last_ps, last_lt = None, None, None, None, None - - for i in range(0, num_images): - if params['id'] is None: - inputs['identity'][i, :] = self.random_identity( - params, last_id) - last_id = inputs['identity'][i, :] - else: - inputs['identity'][i, :] = self.identity_vector( - params['id'], params) - - if params['dataset'] == "YALE": - if params['ps'] is None: - inputs['pose'][i, :] = self.random_pose(params, last_ps) - last_ps = inputs['pose'][i, :] - else: - inputs['pose'][i, :] = self.pose_vector( - params['ps'], params) - - if params['lt'] is None: - inputs['lighting'][i, :], last_lt = self.random_lighting( - params, last_lt) - else: - inputs['lighting'][i, :] = self.lighting_vector( - params['lt'], params) - else: - if params['em'] is None: - inputs['emotion'][i, :] = self.random_emotion( - params, last_em) - last_em = inputs['emotion'][i, :] - else: - inputs['emotion'][i, :] = self.emotion_vector( - params['em'], params) - - if params['or'] is None: - inputs['orientation'][i, :], last_or = self.random_orientation( - params, last_or) - else: - inputs['orientation'][i, :] = self.orientation_vector( - params['or'], params) - - return inputs - - def mode_interpolate(self, params): - """ - Generate network inputs that interpolate between keyframes. - """ - - use_yale = params['dataset'] == "YALE" - - # Set starting/default values - id_val = params['id'] if params['id'] is not None else 0 - if use_yale: - ps_val = params['ps'] if params['ps'] is not None else 0 - lt_val = params['lt'] if params['lt'] is not None else 0 - else: - em_val = params['em'] if params['em'] is not None else 0 - or_val = params['or'] if params['or'] is not None else 0 - - # List of all id/em/or vectors for each keyframe - id_keyframes = list() - if use_yale: - ps_keyframes = list() - lt_keyframes = list() - else: - em_keyframes = list() - or_keyframes = list() - - keyframe_indicies = list() - - frame_index = None - - for keyframe_params in params['keyframes']: - - # Get new parameters, otherwise use values from the last keyframe - if 'id' in keyframe_params: - id_val = keyframe_params['id'] - if use_yale: - if 'ps' in keyframe_params: - ps_val = keyframe_params['ps'] - if 'lt' in keyframe_params: - lt_val = keyframe_params['lt'] - else: - if 'em' in keyframe_params: - em_val = keyframe_params['em'] - if 'or' in keyframe_params: - or_val = keyframe_params['or'] - - # Determine which frame index this is in the animation - if frame_index is None: - frame_index = 0 - else: - if 'length' not in keyframe_params: - raise RuntimeError("A length must be specified for every " - "keyframe except the first") - frame_index += self.num_frames( - keyframe_params['length'], params) - - # Create input vectors for this keyframe - id_keyframes.append(self.identity_vector(id_val, params)) - if use_yale: - ps_keyframes.append(self.pose_vector(ps_val, params)) - lt_keyframes.append(self.lighting_vector(lt_val, params)) - else: - em_keyframes.append(self.emotion_vector(em_val, params)) - or_keyframes.append(self.orientation_vector(or_val, params)) - - keyframe_indicies.append(frame_index) - - # Convert python lists to numpy arrays - id_keyframes = np.vstack(id_keyframes) - if use_yale: - ps_keyframes = np.vstack(ps_keyframes) - lt_keyframes = np.vstack(lt_keyframes) - else: - em_keyframes = np.vstack(em_keyframes) - or_keyframes = np.vstack(or_keyframes) - - keyframe_indicies = np.array(keyframe_indicies) - - num_frames = keyframe_indicies[-1]+1 - - # Interpolate - if use_yale: - id_idx = np.arange(0, params['num_ids']) - ps_idx = np.arange(0, NUM_YALE_POSES) - lt_idx = np.arange(0, 4) - else: - id_idx = np.arange(0, params['num_ids']) - em_idx = np.arange(0, Emotion.length()) - or_idx = np.arange(0, 2) - - f_id = interpolate.interp2d(id_idx, keyframe_indicies, id_keyframes) - if use_yale: - f_ps = interpolate.interp2d( - ps_idx, keyframe_indicies, ps_keyframes) - f_lt = interpolate.interp2d( - lt_idx, keyframe_indicies, lt_keyframes) - else: - f_em = interpolate.interp2d( - em_idx, keyframe_indicies, em_keyframes) - f_or = interpolate.interp2d( - or_idx, keyframe_indicies, or_keyframes) - - if use_yale: - return { - 'identity': f_id(id_idx, np.arange(0, num_frames)), - 'pose': f_ps(ps_idx, np.arange(0, num_frames)), - 'lighting': f_lt(lt_idx, np.arange(0, num_frames)), - } - else: - return { - 'identity': f_id(id_idx, np.arange(0, num_frames)), - 'emotion': f_em(em_idx, np.arange(0, num_frames)), - 'orientation': f_or(or_idx, np.arange(0, num_frames)), - } - - # Helper methods - - def num_frames(self, val, params): - """ Gets the number of frames for a value. """ - - if isinstance(val, int): - return val - elif isinstance(val, str): - if val.endswith('s'): - return int(float(val[:-1]) * params['fps']) - else: - raise RuntimeError("Length '{}' not understood".format(val)) - else: - raise RuntimeError("Length '{}' not understood".format(val)) - - def identity_vector(self, value, params): - """ Create an identity vector for a provided value. """ - - if isinstance(value, str): - if '+' not in value: - raise RuntimeError( - "Identity '{}' not understood".format(value)) - - try: - values = [int(x) for x in value.split('+')] - except: - raise RuntimeError( - "Identity '{}' not understood".format(value)) - elif isinstance(value, int): - values = [value] - else: - raise RuntimeError("Identity '{}' not understood".format(value)) - - vec = np.zeros((params['num_ids'],)) - for val in values: - if val < 0 or params['num_ids'] <= val: - raise RuntimeError("Identity '{}' invalid".format(val)) - vec[val] += 1.0 - - return self.constrain(vec, params['constrained'], params['id_scale'], - params['id_min'], params['id_max']) - - def emotion_vector(self, value, params): - """ Create an emotion vector for a provided value. """ - - if not isinstance(value, str): - raise RuntimeError("Emotion '{}' not understood".format(value)) - - if '+' in value: - values = value.split('+') - else: - values = [value] - - vec = np.zeros((Emotion.length(),)) - for emotion in values: - try: - vec += getattr(Emotion, emotion) - except AttributeError: - raise RuntimeError("Emotion '{}' is invalid".format(emotion)) - - return self.constrain(vec, params['constrained'], params['em_scale'], - params['em_min'], params['em_max']) - - def orientation_vector(self, value, params): - """ Create an orientation vector for a provided value. """ - - if isinstance(value, int) or isinstance(value, float): - value = np.deg2rad(value) - return np.array([np.sin(value), np.cos(value)]) - - elif isinstance(value, str): - if params['constrained']: - raise RuntimeError("Cannot manually set orientation vector " - "values when constrained is set to True") - - values = value.split() - if len(values) != 2: - raise RuntimeError( - "Orientation '{}' not understood".format(value)) - - vec = np.empty((2,)) - try: - vec[0] = float(values[0]) - vec[1] = float(values[1]) - except ValueError: - raise RuntimeError( - "Orientation '{}' not understood".format(value)) - - return vec - else: - raise RuntimeError("Orientation '{}' not understood".format(value)) - - def pose_vector(self, value, params): - """ Create an pose vector for a provided value. """ - - if isinstance(value, str): - if '+' not in value: - raise RuntimeError("Pose '{}' not understood".format(value)) - - try: - values = [int(x) for x in value.split('+')] - except: - raise RuntimeError("Pose '{}' not understood".format(value)) - elif isinstance(value, int): - values = [value] - else: - raise RuntimeError("Pose '{}' not understood".format(value)) - - vec = np.zeros((NUM_YALE_POSES,)) - for val in values: - if val < 0 or NUM_YALE_POSES <= val: - raise RuntimeError("Pose '{}' invalid".format(val)) - vec[val] += 1.0 - - return self.constrain(vec, params['constrained'], params['ps_scale'], - params['ps_min'], params['ps_max']) - - def lighting_vector(self, value, params): - """ Create a lighting vector for a provided value. """ - - if isinstance(value, int) or isinstance(value, float): - value = np.deg2rad(value) - return np.array([np.sin(value), np.cos(value), np.sin(value), np.cos(value)]) - - elif isinstance(value, str): - - values = value.split() - if len(values) != 2: - raise RuntimeError( - "Lighting '{}' not understood".format(value)) - - vec = np.empty((4,)) - try: - # First element is azimuth - vec[0] = np.sin(float(values[0])) - vec[1] = np.cos(float(values[0])) - # Second element is elevation - vec[2] = np.sin(float(values[1])) - vec[3] = np.cos(float(values[1])) - except ValueError: - raise RuntimeError( - "Lighting '{}' not understood".format(value)) - - return vec - else: - raise RuntimeError("Lighting '{}' not understood".format(value)) - - def random_identity(self, params, start=None): - """ Create a random identity vector. """ - - step = params['id_step'] - - if start is None: - vec = 2*(np.random.rand(params['num_ids'])-0.5) - else: - vec = start + (2*step*np.random.rand(params['num_ids'])-step) - - return self.constrain(vec, params['constrained'], params['id_scale'], - params['id_min'], params['id_max']) - - def random_emotion(self, params, start=None): - """ Create a random emotion vector. """ - - step = params['em_step'] - - if start is None: - vec = 2*(np.random.rand(Emotion.length())-0.5) - else: - vec = start + (2*step*np.random.rand(Emotion.length())-step) - - return self.constrain(vec, params['constrained'], params['em_scale'], - params['em_min'], params['em_max']) - - def random_orientation(self, params, start=None): - """ Create a random orientation vector. """ - - step = params['or_step'] - - if params['constrained']: - if start is None: - angle = 180*np.random.rand() - 90 - else: - angle = start + step * (180*np.random.rand()-90) - rad = np.deg2rad(angle) - - # Return the angle as a second argument so the caller can grab it - # in case it's in the drunk mode - return np.array([np.sin(rad), np.cos(rad)]), angle - else: - if start is None: - vec = 2*np.random.rand(2) - 1 - else: - vec = start + (2*step*np.random.rand(2)-step) - - vec = self.constrain(vec, params['constrained'], params['or_scale'], - params['or_min'], params['or_max']) - - # Return the vector twice so it behaves the same as constrained - return vec, vec - - def random_pose(self, params, start=None): - """ Create a random pose vector. """ - - step = params['ps_step'] - - if start is None: - vec = 2*(np.random.rand(NUM_YALE_POSES)-0.5) - else: - vec = start + (2*step*np.random.rand(NUM_YALE_POSES)-step) - - return self.constrain(vec, params['constrained'], params['ps_scale'], - params['ps_min'], params['ps_max']) - - def random_lighting(self, params, start=None): - """ Create a random lighting vector. """ - - step = params['lt_step'] - - if params['constrained']: - if start is None: - azimuth = 180*np.random.rand() - 90 - elevation = 180*np.random.rand() - 90 - else: - azimuth = start[0] + step * (180*np.random.rand()-90) - elevation = start[1] + step * (180*np.random.rand()-90) - azrad = np.deg2rad(azimuth) - elrad = np.deg2rad(elevation) - - # Return the angle as a second argument so the caller can grab it - # in case it's in the drunk mode - return np.array([np.sin(azrad), np.cos(azrad), np.sin(elrad), - np.cos(elrad)]), (azimuth, elevation) - else: - if start is None: - vec = 2*np.random.rand(4) - 1 - else: - vec = start + (2*step*np.random.rand(4)-step) - - vec = self.constrain(vec, params['constrained'], params['lt_scale'], - params['lt_min'], params['lt_max']) - - # Return the vector twice so it behaves the same as constrained - return vec, vec - - def constrain(self, vec, constrained, scale, vec_min, vec_max): - """ Constrains the emotion vector based on params. """ - - if constrained: - vec = vec / np.linalg.norm(vec) - - if scale is not None: - vec = vec * scale - - if vec_min is not None and vec_max is not None: - vec = np.clip(vec, vec_min, vec_max) - - return vec - - # Main parsing method - - def parse_params(self): - """ - Parses the yaml file and creates input vectors to use with the model. - """ - - self.yaml_file.seek(0) - - yaml_params = yaml.load(self.yaml_file) - - params = GenParser.DefaultParams - - for field in params.keys(): - if field in yaml_params: - params[field] = yaml_params[field] - - return params - - def gen_inputs(self, params): - """ - creates input vectors to use with the model. - """ - - fn = None - try: - fn = self.modes[params['mode']] - except KeyError: - raise RuntimeError("Mode '{}' is invalid".format(params['mode'])) - - return fn(params) - - -def generate_from_yaml(yaml_path, model_path, output_dir, batch_size=32, - extension='jpg'): - """ - Generate images based on parameters specified in a yaml file. - """ - - from keras import backend as K - from keras.models import load_model - - print("Loading model...") - - model = load_model(model_path) - num_ids = model.input_shape[0][1] # XXX is there a nicer way to get this? - dataset = os.path.basename(model_path).split('.')[1] - - parser = GenParser(yaml_path) - - try: - params = parser.parse_params() - except RuntimeError as e: - print("Error: Unable to parse '{}'. Encountered exception:".format(yaml_path)) - print(e) - return - params['dataset'] = dataset - params['num_ids'] = num_ids - inputs = parser.gen_inputs(params) - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - else: - raise RuntimeError( - "Directory '{}' exists. Cowardly refusing to continue." - .format(output_dir)) - - print("Generating images...") - - num_images = inputs['identity'].shape[0] - count = 0 - - for idx in tqdm(range(0, num_images, batch_size)): - - if dataset == "YALE": - batch = { - 'identity': inputs['identity'][idx:idx+batch_size, :], - 'pose': inputs['pose'][idx:idx+batch_size, :], - 'lighting': inputs['lighting'][idx:idx+batch_size, :], - } - else: - batch = { - 'identity': inputs['identity'][idx:idx+batch_size, :], - 'emotion': inputs['emotion'][idx:idx+batch_size, :], - 'orientation': inputs['orientation'][idx:idx+batch_size, :], - } - - gen = model.predict_on_batch(batch) - - for i in range(0, gen.shape[0]): - if K.image_dim_ordering() == 'th': - if dataset == "YALE": - image[:, :] = gen[i, 0, :, :] - else: - image = np.empty(gen.shape[2:]+(3,)) - for x in range(0, 3): - image[:, :, x] = gen[i, x, :, :] - else: - if dataset == "YALE" or dataset == "JAFFE": - image = gen[i, :, :, 0] - else: - image = gen[i, :, :, :] - image = np.array(255*np.clip(image, 0, 1), dtype=np.uint8) - file_path = os.path.join( - output_dir, '{:05}.{}'.format(count, extension)) - scipy.misc.imsave(file_path, image) - count += 1 -""" -faces/instance.py - -Instance class to hold data for each example. - -""" - -import os -import csv - -from keras import backend as K - -import numpy as np -import scipy.misc as misc -from tqdm import tqdm - - -NUM_YALE_POSES = 10 - - -# ---- Enum classes for vector descriptions - -class Emotion: - angry = [1., 0., 0., 0., 0., 0., 0., 0.] - contemptuous = [0., 1., 0., 0., 0., 0., 0., 0.] - disgusted = [0., 0., 1., 0., 0., 0., 0., 0.] - fearful = [0., 0., 0., 1., 0., 0., 0., 0.] - happy = [0., 0., 0., 0., 1., 0., 0., 0.] - neutral = [0., 0., 0., 0., 0., 1., 0., 0.] - sad = [0., 0., 0., 0., 0., 0., 1., 0.] - surprised = [0., 0., 0., 0., 0., 0., 0., 1.] - - @classmethod - def length(cls): - return len(Emotion.neutral) - - -# ---- Loading functions - -class RaFDInstances: - - def __init__(self, directory): - """ - Constructor for a RaFDInstances object. - - Args: - directory (str): Directory where the data lives. - """ - - self.directory = directory - - # A list of all files in the current directory (no kids, only frontal gaze) - self.filenames = [x for x in os.listdir(directory) - if 'Kid' not in x and 'frontal' in x] - - # The number of times the directory has been read over - self.num_iterations = 0 - - # Count identities and map each identity present to a contiguous value - identities = list() - for filename in self.filenames: - # Identities are 1-indexed - identity = int(filename.split('_')[1])-1 - if identity not in identities: - identities.append(identity) - self.identity_map = dict() - for idx, identity in enumerate(identities): - self.identity_map[identity] = idx - - self.num_identities = len(self.identity_map) - self.num_instances = len(self.filenames) - - def load_data(self, image_size, verbose=False): - """ - Loads RaFD data for training. - - Args: - image_size (tuple): Size images should be resized to. - Returns: - numpy.ndarray, training data (face parameters). - numpy.ndarray, output data (the actual images to generate). - """ - - inputs = { - 'emotion': np.empty((self.num_instances, len(Emotion.neutral))), - 'identity': np.empty((self.num_instances, self.num_identities)), - 'orientation': np.empty((self.num_instances, 2)), - } - - if K.image_dim_ordering() == 'th': - outputs = np.empty((self.num_instances, 3)+image_size) - else: - outputs = np.empty((self.num_instances,)+image_size+(3,)) - - all_instances = range(0, len(self.filenames)) - if verbose: - all_instances = tqdm(all_instances) - - for i in all_instances: - instance = RaFDInstance( - self.directory, self.filenames[i], image_size) - - inputs['emotion'][i, :] = instance.emotion - inputs['identity'][i, :] = instance.identity_vector( - self.identity_map) - inputs['orientation'][i, :] = instance.orientation - - if K.image_dim_ordering() == 'th': - outputs[i, :, :, :] = instance.th_image() - else: - outputs[i, :, :, :] = instance.tf_image() - - return inputs, outputs - - -class YaleInstances: - - def __init__(self, directory): - """ - Constructor for a YaleInstances object. - - Args: - directory (str): Directory where the data lives. - """ - - self.directory = directory - - subdirs = [x for x in os.listdir(directory) if 'yaleB' in x] - - self.num_identities = len(subdirs) - self.identity_map = dict() - for idx, subdir in enumerate(sorted(subdirs)): - identity = int(subdir[5:7]) - self.identity_map[identity] = idx - - self.filenames = list() - - for subdir in subdirs: - path = os.path.join(directory, subdir) - self.filenames.extend( - [os.path.join(subdir, x) for x in os.listdir(path) - if 'pgm' in x - and 'Ambient' not in x] - ) - - self.num_instances = len(self.filenames) - - def load_data(self, image_size, verbose=False): - """ - Loads YaleFaces data for training. - - Args: - image_size (tuple): Size images should be resized to. - Returns: - numpy.ndarray, training data (face parameters). - numpy.ndarray, output data (the actual images to generate). - """ - - inputs = { - 'identity': np.empty((self.num_instances, self.num_identities)), - 'pose': np.empty((self.num_instances, NUM_YALE_POSES)), - 'lighting': np.empty((self.num_instances, 4)), - } - - if K.image_dim_ordering() == 'th': - outputs = np.empty((self.num_instances, 1)+image_size) - else: - outputs = np.empty((self.num_instances,)+image_size+(1,)) - - all_instances = range(0, len(self.filenames)) - if verbose: - all_instances = tqdm(all_instances) - - for i in all_instances: - instance = YaleInstance( - self.directory, self.filenames[i], image_size) - - inputs['identity'][i, :] = instance.identity_vector( - self.identity_map) - inputs['pose'][i, :] = instance.pose - inputs['lighting'][i, :] = instance.lighting - - if K.image_dim_ordering() == 'th': - outputs[i, :, :, :] = instance.th_image() - else: - outputs[i, :, :, :] = instance.tf_image() - - return inputs, outputs - - -class JAFFEInstances: - """ - This is a reader for the JAFFE dataset of Japanese female faces - acting out varying expressions, scored by a panel of FACS evaluators. - Image download link at http://www.kasrl.org/jaffe_info.html - The unpacked directory structure is flat, with filenames like KA.AN1.39.tiff - You should add to this directory a CSV version of the semantic ratings - table appearing on the download page, as semantic-ratings.csv - You'll have to make this yourself. First two lines will look like: - N,HAP,SAD,SUR,ANG,DIS,FEA,PIC - 1,2.87,2.52,2.10,1.97,1.97,2.06,KM-NE1 - """ - - def __init__(self, directory): - """ - Constructor for a JAFFEInstances object. - - Args: - directory (str): Directory where the data lives - """ - - self.directory = directory - self.filenames = [x for x in os.listdir( - directory) if x.endswith('tiff')] - self.num_instances = len(self.filenames) - identity_map = {} - for fname in self.filenames: - ident, emotion = fname.split('.')[:2] - # assign identity strings contiguous indices - identity_map[ident] = identity_map.get(ident, len(identity_map)) - self.identity_map = identity_map - self.num_identities = len(identity_map) - - def load_semantic_ratings(self): - """ - Loads semantic ratings for each instance. These assign - human-evaluated levels for each emotion in a given face - (a face will generally have nonzero score on multiple emotions). - - Returns: - dict, ratings (vectors of emotion scores keyed by inst#) - """ - - # map JAFFE emotion labels to local Emotion indices - # note that there is no explicit JAFFE neutral, it is implied when - # no specific emotion dominates. - emotions = ('ANG', '_', 'DIS', 'FEA', 'HAP', 'NEU', 'SAD', 'SUR') - emotion_map = {emotion: idx for idx, emotion in enumerate(emotions)} - ratings = {} - with open(os.path.join(self.directory, 'semantic-ratings.csv')) as rows: - reader = csv.DictReader(rows) - for row in reader: - rates = np.array([float(row.get(emotion, 1)) - for emotion in emotions]) - # emotions are scored 1..5, make them 0-1 - rates = (rates - 1.0) / 4.0 - # synthesize 'neutral' score as the complement of the strongest - # emotion present. - rates[emotion_map['NEU']] = 1.0 - np.max(rates) - # input convention is for the emotion vector to sum to 1. - rates = rates / np.linalg.norm(rates) - N = int(row['N']) - 1 - ratings[N] = rates - return ratings - - def load_data(self, image_size, verbose=False): - """ - Loads JAFFE data for training. - - Args: - image_size (tuple): Size images should be resized to. - Returns: - numpy.ndarray, training data (face parameters). - numpy.ndarray, output data (the actual images to generate). - """ - - instances = [JAFFEInstance(self.directory, fname, image_size) - for fname in self.filenames] - inst_idents = np.zeros((self.num_instances, self.num_identities)) - for idx, inst in enumerate(instances): - # each row in inst_idents is a one-hot encoding of identity idx - inst_idents[idx, self.identity_map[inst.identity]] = 1 - - inst_orient = np.tile((0, 1), self.num_instances).reshape(-1, 2) - ratings = self.load_semantic_ratings() - # Note: there are some scored instance N's with no instance file! - inst_emotion = np.array([ratings[inst.N] for inst in instances]) - - inputs = { - 'identity': inst_idents, # 1-hot - 'orientation': inst_orient, - 'emotion': inst_emotion, - } - print("JAFFE: found %d identities, %instances" % ( - self.num_identities, self.num_instances)) - if K.image_dim_ordering() == 'th': - inst_image = [inst.th_image() for inst in instances] - outputs = np.empty((self.num_instances, 1)+image_size) - else: - inst_image = [inst.tf_image() for inst in instances] - outputs = np.empty((self.num_instances,)+image_size+(1,)) - outputs[np.arange(self.num_instances)] = inst_image - - return inputs, outputs - - -# ---- Instance class definition - -class RaFDInstance: - """ - Holds information about each RaFD example. - """ - - def __init__(self, directory, filename, image_size, trim=24, top=24): - """ - Constructor for an RaFDInstance object. - - Args: - directory (str): Base directory where the example lives. - filename (str): The name of the file of the example. - image_size (tuple): Size to resize the image to. - Args (optional): - trim (int): How many pixels from the edge to trim off the top and sides. - top (int): How much extra to trim off the top. - """ - - self.image = misc.imread(os.path.join(directory, filename)) - - # Trim the image to get more of the face - - height, width, d = self.image.shape - - width = int(width-2*trim) - height = int(width*image_size[0]/image_size[1]) - - self.image = self.image[trim+top:trim+height, trim:trim+width, :] - - # Resize and fit between 0-1 - self.image = misc.imresize(self.image, image_size) - self.image = self.image / 255.0 - - #self.mask = misc.imread( os.path.join(directory, 'mask', filename) ) - #self.mask = misc.imresize( self.mask, image_size ) - #self.mask = self.mask / 255.0 - - # Parse filename to get parameters - - items = filename.split('_') - - # Represent orientation as sin/cos vector - angle = np.deg2rad(float(items[0][-3:])-90) - self.orientation = np.array([np.sin(angle), np.cos(angle)]) - - self.identity_index = int(items[1])-1 # Identities are 1-indexed - - self.emotion = np.array(getattr(Emotion, items[4])) - - def identity_vector(self, identity_map): - """ - Creates a one-in-k encoding of the instance's identity. - - Args: - identity_map (dict): Mapping from identity to a unique index. - Returns: - numpy.ndarray, the identity vector. - """ - - identity_vec = np.zeros(len(identity_map), dtype=np.float32) - identity_vec[identity_map[self.identity_index]] = 1. - - return identity_vec - - def th_image(self): - """ - Returns a Theano-ordered representation of the image. - """ - - image = np.empty((3,)+self.image.shape[0:2]) - for i in range(0, 3): - image[i, :, :] = self.image[:, :, i] - return image - - def tf_image(self): - """ - Returns a TensorFlow-ordered representation of the image. - """ - - # As-is - return self.image - - -class YaleInstance: - """ - Holds information about each YaleFaces example. - """ - - def __init__(self, directory, filepath, image_size): - """ - Constructor for an YaleInstance object. - - Args: - directory (str): Base directory where the example lives. - filename (str): The name of the file of the example. - image_size (tuple): Size to resize the image to. - """ - - filename = filepath.split('/')[-1] - - self.image = misc.imread(os.path.join(directory, filepath)) - - # Resize and scale values to [0 1] - self.image = misc.imresize(self.image, image_size) - self.image = self.image / 255.0 - - self.identity_index = int(filename[5:7]) - - pose_idx = int(filename[9:11]) - self.pose = np.zeros(NUM_YALE_POSES, dtype=np.float32) - self.pose[pose_idx] = 1 - - # Light azimuth and elevation - az = np.deg2rad(float(filename[12:16])) - el = np.deg2rad(float(filename[17:20])) - - self.lighting = np.array( - [np.sin(az), np.cos(az), np.sin(el), np.cos(el)]) - - def identity_vector(self, identity_map): - """ - Creates a one-in-k encoding of the instance's identity. - - Args: - identity_map (dict): Mapping from identity to a unique index. - Returns: - numpy.ndarray, the identity vector. - """ - - identity_vec = np.zeros(len(identity_map), dtype=np.float32) - identity_vec[identity_map[self.identity_index]] = 1. - - return identity_vec - - def th_image(self): - """ - Returns a Theano-ordered representation of the image. - """ - - return np.expand_dims(self.image, 0) - - def tf_image(self): - """ - Returns a TensorFlow-ordered representation of the image. - """ - - return np.expand_dims(self.image, 2) - - -class JAFFEInstance: - """ - Holds information about each JAFFE example. - """ - - def __init__(self, directory, filepath, image_size): - """ - Constructor for an JAFFEInstance object. - - Args: - directory (str): Base directory where the example lives. - filename (str): The name of the file of the example. - image_size (tuple): Size to resize the image to. - """ - - filename = filepath.split('/')[-1] - - self.image = misc.imread(os.path.join(directory, filepath)) - # some of the jaffe images are 3-channel greyscale, some are 1-channel! - # make image 2d for sure - self.image = np.atleast_3d(self.image)[..., 0] - # Resize and scale values to [0 1] - self.image = misc.imresize(self.image, image_size) - self.image = self.image / 255.0 - ident, _, N, _ = filename.split('.') - # Note: the emotion encoded in the filename is the dominant - # scoring emotion, but we ignore this and use precise emotion scores - # from the semantic ratings table - self.identity, self.N = ident, int(N) - 1 # 0-based instance numbering - - def th_image(self): - """ - Returns a Theano-ordered representation of the image. - """ - - return np.expand_dims(self.image, 0) - - def tf_image(self): - """ - Returns a TensorFlow-ordered representation of the image. - """ - - return np.expand_dims(self.image, 2) -""" -faces/model.py - -Methods to build FaceGen models. - -""" - -from keras import backend as K -from keras.layers import BatchNormalization, Convolution2D, Dense, LeakyReLU, \ - Input, MaxPooling2D, merge, Reshape, UpSampling2D -from keras.models import Model - -from .instance import Emotion, NUM_YALE_POSES - - -def build_model(identity_len=57, orientation_len=2, lighting_len=4, - emotion_len=Emotion.length(), pose_len=NUM_YALE_POSES, - initial_shape=(5, 4), deconv_layers=5, num_kernels=None, - optimizer='adam', use_yale=False, use_jaffe=False): - """ - Builds a deconvolution FaceGen model. - - Args (optional): - identity_len (int): Length of the identity input vector. - orientation_len (int): Length of the orientation input vector. - emotion_len (int): Length of the emotion input vector. - initial_shape (tuple): The starting shape of the deconv. network. - deconv_layers (int): How many deconv. layers to use. More layers - gives better resolution, although requires more GPU memory. - num_kernels (list): Number of convolution kernels for each layer. - optimizer (str): The optimizer to use. Will only use default values. - Returns: - keras.Model, the constructed model. - """ - - print(initial_shape) - - if num_kernels is None: - num_kernels = [128, 128, 96, 96, 32, 32, 16] - - # TODO: Parameter validation - - identity_input = Input(shape=(identity_len,), name='identity') - - if use_yale: - lighting_input = Input(shape=(lighting_len,), name='lighting') - pose_input = Input(shape=(pose_len,), name='pose') - else: - orientation_input = Input(shape=(orientation_len,), name='orientation') - emotion_input = Input(shape=(emotion_len,), name='emotion') - - # Hidden representation for input parameters - - fc1 = LeakyReLU()(Dense(512)(identity_input)) - fc2 = LeakyReLU()(Dense(512)(lighting_input if use_yale else orientation_input)) - fc3 = LeakyReLU()(Dense(512)(pose_input if use_yale else emotion_input)) - - params = merge([fc1, fc2, fc3], mode='concat') - params = LeakyReLU()(Dense(1024)(params)) - - # Apply deconvolution layers - - height, width = initial_shape - - print('height:', height, 'width:', width) - - x = LeakyReLU()(Dense(height*width*num_kernels[0])(params)) - if K.image_dim_ordering() == 'th': - x = Reshape((num_kernels[0], height, width))(x) - else: - x = Reshape((height, width, num_kernels[0]))(x) - - for i in range(0, deconv_layers): - # Upsample input - x = UpSampling2D((2, 2))(x) - - # Apply 5x5 and 3x3 convolutions - - # If we didn't specify the number of kernels to use for this many - # layers, just repeat the last one in the list. - idx = i if i < len(num_kernels) else -1 - x = LeakyReLU()(Convolution2D( - num_kernels[idx], 5, 5, border_mode='same')(x)) - x = LeakyReLU()(Convolution2D( - num_kernels[idx], 3, 3, border_mode='same')(x)) - x = BatchNormalization()(x) - - # Last deconvolution layer: Create 3-channel image. - x = MaxPooling2D((1, 1))(x) - x = UpSampling2D((2, 2))(x) - x = LeakyReLU()(Convolution2D(8, 5, 5, border_mode='same')(x)) - x = LeakyReLU()(Convolution2D(8, 3, 3, border_mode='same')(x)) - x = Convolution2D(1 if use_yale or use_jaffe else 3, 3, 3, - border_mode='same', activation='sigmoid')(x) - - # Compile the model - - if use_yale: - model = Model(input=[identity_input, pose_input, - lighting_input], output=x) - else: - model = Model( - input=[identity_input, orientation_input, emotion_input], output=x) - - # TODO: Optimizer options - model.compile(optimizer=optimizer, loss='msle') - - return model -""" -faces/train.py -""" - -import os - -from keras import backend as K -from keras.callbacks import Callback, EarlyStopping, ModelCheckpoint -from keras.models import load_model - -import numpy as np -import scipy.misc - -from .instance import ( - Emotion, RaFDInstances, YaleInstances, JAFFEInstances, NUM_YALE_POSES) -from .model import build_model - - -class GenerateIntermediate(Callback): - """ Callback to generate intermediate images after each epoch. """ - - def __init__(self, output_dir, num_identities, batch_size=32, use_yale=False, - use_jaffe=False): - """ - Constructor for a GenerateIntermediate object. - - Args: - output_dir (str): Directory to save intermediate results in. - num_identities (int): Number of identities in the training set. - Args: (optional) - batch_size (int): Batch size to use when generating images. - """ - super(Callback, self).__init__() - - self.output_dir = output_dir - self.num_identities = num_identities - self.batch_size = batch_size - self.use_yale = use_yale - self.use_jaffe = use_jaffe - - self.parameters = dict() - - # Sweep through identities - self.parameters['identity'] = np.eye(num_identities) - - if use_yale: - # Use pose 0, lighting at 0deg azimuth and elevation - self.parameters['pose'] = np.zeros( - (num_identities, NUM_YALE_POSES)) - self.parameters['lighting'] = np.zeros((num_identities, 4)) - for i in range(0, num_identities): - self.parameters['pose'][i, 0] = 0 - self.parameters['lighting'][i, 1] = 1 - self.parameters['lighting'][i, 3] = 1 - else: - # Make all have neutral expressions, front-facing - self.parameters['emotion'] = np.empty( - (num_identities, Emotion.length())) - self.parameters['orientation'] = np.zeros((num_identities, 2)) - for i in range(0, num_identities): - self.parameters['emotion'][i, :] = Emotion.neutral - self.parameters['orientation'][i, 1] = 1 - - def on_train_begin(self, logs={}): - """ Create directories. """ - - if not os.path.exists(self.output_dir): - os.makedirs(self.output_dir) - - def on_epoch_end(self, epoch, logs={}): - """ Generate and save results to the output directory. """ - - dest_dir = os.path.join(self.output_dir, 'e{:04}'.format(epoch)) - if not os.path.exists(dest_dir): - os.makedirs(dest_dir) - - gen = self.model.predict(self.parameters, batch_size=self.batch_size) - - for i in range(0, gen.shape[0]): - if K.image_dim_ordering() == 'th': - if self.use_yale or self.use_jaffe: - image = np.empty(gen.shape[2:]) - image[:, :] = gen[i, 0, :, :] - else: - image = np.empty(gen.shape[2:]+(3,)) - for x in range(0, 3): - image[:, :, x] = gen[i, x, :, :] - else: - if self.use_yale or self.use_jaffe: - image = gen[i, :, :, 0] - else: - image = gen[i, :, :, :] - image = np.array(255*np.clip(image, 0, 1), dtype=np.uint8) - file_path = os.path.join(dest_dir, '{:02}.png'.format(i)) - scipy.misc.imsave(file_path, image) - - -def train_model(data_dir, output_dir, model_file='', batch_size=32, - num_epochs=100, optimizer='adam', deconv_layers=5, - use_yale=False, use_jaffe=False, - kernels_per_layer=None, generate_intermediate=False, - verbose=False): - """ - Trains the model on the data, generating intermediate results every epoch. - - Args: - data_dir (str): Directory where the data lives. - output_dir (str): Directory where outputs should be saved. - model_file (str): Model file to load. If none specified, a new model - will be created. - Args (optional): - batch_size (int): Size of the batch to use. - num_epochs (int): Number of epochs to train for. - optimizer (str): Keras optimizer to use. - deconv_layers (int): The number of deconvolution layers to use. - generate_intermediate (bool): Whether or not to generate intermediate results. - """ - - data_dir = os.path.expanduser(data_dir) - - if not os.path.exists(output_dir): - os.makedirs(output_dir) - - instances = (YaleInstances(data_dir) if use_yale - else JAFFEInstances(data_dir) if use_jaffe - else RaFDInstances(data_dir)) - - if verbose: - print("Found {} instances with {} identities".format( - instances.num_instances, instances.num_identities)) - - # Create FaceGen model to use - - if model_file: - model = load_model(model_file) - if verbose: - print("Loaded model %d identities from {}".format(model.model_file)) - else: - # TODO: Refactor this to a more elegant way to determine params by dataset - initial_shape = (5, 4) - if use_yale: - initial_shape = (6, 8) - if use_jaffe: - initial_shape = (4, 4) - - model = build_model( - identity_len=instances.num_identities, - deconv_layers=deconv_layers, - num_kernels=kernels_per_layer, - optimizer=optimizer, - initial_shape=initial_shape, - use_yale=use_yale, - use_jaffe=use_jaffe, - ) - if verbose: - print("Built model with:") - print("\tDeconv layers: {}".format(deconv_layers)) - print("\tOutput shape: {}".format(model.output_shape[1:])) - - # Create training callbacks - - callbacks = list() - - if generate_intermediate: - intermediate_dir = os.path.join( - output_dir, 'intermediate.d{}.{}'.format(deconv_layers, optimizer)) - callbacks.append(GenerateIntermediate(intermediate_dir, instances.num_identities, - use_yale=use_yale, use_jaffe=use_jaffe)) - - model_path = os.path.join(output_dir, 'FaceGen.{}.model.d{}.{}.h5' - .format('YaleFaces' if use_yale else 'JAFFE' if use_jaffe else 'RaFD', deconv_layers, optimizer)) - - callbacks.append( - ModelCheckpoint( - model_path, - monitor='loss', verbose=0, save_best_only=True, - ) - ) - callbacks.append( - EarlyStopping(monitor='loss', patience=8) - ) - - # Load data and begin training - - if verbose: - print("Loading data...") - - if K.image_dim_ordering() == 'th': - image_size = model.output_shape[2:4] - else: - image_size = model.output_shape[1:3] - - inputs, outputs = instances.load_data(image_size, verbose=verbose) - - if verbose: - print("Training...") - - model.fit(inputs, outputs, batch_size=batch_size, nb_epoch=num_epochs, - callbacks=callbacks, shuffle=True, verbose=1) - - if verbose: - print("Done!") -#!/usr/bin/env python -# -*- coding: utf-8 -*- -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -############################################################################### - -from setuptools import setup, find_packages - -with open('README.rst') as readme_file: - readme = readme_file.read() - -with open('HISTORY.rst') as history_file: - history = history_file.read() - -requirements = [ - 'click>=6.7', - 'cycler>=0.10.0', - 'Flask>=0.12', - 'h5py>=2.6.0', - 'itsdangerous>=0.24', - 'Jinja2>=2.9.5', - 'Keras>=1.2.2', - 'MarkupSafe>=0.23', - 'matplotlib>=2.0.0', - 'numpy>=1.12.0', - 'olefile>=0.44', - 'packaging>=16.8', - 'Pillow>=4.0.0', - 'protobuf>=3.2.0', - 'pyparsing>=2.1.10', - 'python-dateutil>=2.6.0', - 'pytz>=2016.10', - 'PyYAML>=3.12', - 'requests>=2.13.0', - 'scipy>=0.18.1', - 'six>=1.10.0', - 'Werkzeug>=0.11.15', -] - -# only add tensorflow as a requirement if it is not already provided. -# E.g. tensorflow-gpu -try: - import tensorflow -except ImportError: - requirements.append('tensorflow>=1.0.0') - -test_requirements = [ - 'pytest', - 'pytest-flask', - 'selenium==3.6.0', -] - -docs_require = [ - 'Sphinx', - 'sphinxcontrib-napoleon', - 'sphinx-rtd-theme' -] - -setup( - name='picasso_viz', - version='v0.2.0', - description="A CNN model visualizer", - long_description=readme + '\n\n' + history, - author="Ryan Henderson", - author_email='ryan@merantix.com', - url='https://github.com/merantix/picasso', - packages=find_packages(), - entry_points={ - 'console_scripts': [ - 'picasso=picasso.commands:main' - ], - }, - include_package_data=True, - package_data={'picasso': ['examples/keras/*', - 'examples/tensorflow/*', - 'examples/keras-vgg16/*', - 'examples/keras/data-volume/*', - 'examples/tensorflow/data-volume/*', - 'examples/keras-vgg16/data-volume/*', - 'templates/*', - 'static/*']}, - install_requires=requirements, - license="Eclipse Public License 1.0 (EPL-1.0)", - zip_safe=False, - keywords='picasso', - classifiers=[ - 'Development Status :: 2 - Pre-Alpha', - 'Intended Audience :: Developers', - 'License :: OSI Approved :: Eclipse Public License 1.0 (EPL-1.0)', - 'Natural Language :: English', - 'Programming Language :: Python :: 3.5', - ], - test_suite='tests', - tests_require=test_requirements, - extras_require={ - 'test': test_requirements, - 'docs': docs_require - }, - setup_requires=['pytest_runner'] -) -#!/usr/bin/env python -# -*- coding: utf-8 -*- -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -############################################################################### -"""Update encrypted deploy password in Travis config file -""" - - -from __future__ import print_function -import base64 -import json -import os -from getpass import getpass -import yaml -from cryptography.hazmat.primitives.serialization import load_pem_public_key -from cryptography.hazmat.backends import default_backend -from cryptography.hazmat.primitives.asymmetric.padding import PKCS1v15 - - -try: - from urllib import urlopen -except: - from urllib.request import urlopen - - -GITHUB_REPO = 'merantix/picasso' -TRAVIS_CONFIG_FILE = os.path.join( - os.path.dirname(os.path.abspath(__file__)), '.travis.yml') - - -def load_key(pubkey): - """Load public RSA key, with work-around for keys using - incorrect header/footer format. - - Read more about RSA encryption with cryptography: - https://cryptography.io/latest/hazmat/primitives/asymmetric/rsa/ - """ - try: - return load_pem_public_key(pubkey.encode(), default_backend()) - except ValueError: - # workaround for https://github.com/travis-ci/travis-api/issues/196 - pubkey = pubkey.replace('BEGIN RSA', 'BEGIN').replace('END RSA', 'END') - return load_pem_public_key(pubkey.encode(), default_backend()) - - -def encrypt(pubkey, password): - """Encrypt password using given RSA public key and encode it with base64. - - The encrypted password can only be decrypted by someone with the - private key (in this case, only Travis). - """ - key = load_key(pubkey) - encrypted_password = key.encrypt(password, PKCS1v15()) - return base64.b64encode(encrypted_password) - - -def fetch_public_key(repo): - """Download RSA public key Travis will use for this repo. - - Travis API docs: http://docs.travis-ci.com/api/#repository-keys - """ - keyurl = 'https://api.travis-ci.org/repos/{0}/key'.format(repo) - data = json.loads(urlopen(keyurl).read().decode()) - if 'key' not in data: - errmsg = "Could not find public key for repo: {}.\n".format(repo) - errmsg += "Have you already added your GitHub repo to Travis?" - raise ValueError(errmsg) - return data['key'] - - -def prepend_line(filepath, line): - """Rewrite a file adding a line to its beginning. - """ - with open(filepath) as f: - lines = f.readlines() - - lines.insert(0, line) - - with open(filepath, 'w') as f: - f.writelines(lines) - - -def load_yaml_config(filepath): - with open(filepath) as f: - return yaml.load(f) - - -def save_yaml_config(filepath, config): - with open(filepath, 'w') as f: - yaml.dump(config, f, default_flow_style=False) - - -def update_travis_deploy_password(encrypted_password): - """Update the deploy section of the .travis.yml file - to use the given encrypted password. - """ - config = load_yaml_config(TRAVIS_CONFIG_FILE) - - config['deploy']['password'] = dict(secure=encrypted_password) - - save_yaml_config(TRAVIS_CONFIG_FILE, config) - - line = ('# This file was autogenerated and will overwrite' - ' each time you run travis_pypi_setup.py\n') - prepend_line(TRAVIS_CONFIG_FILE, line) - - -def main(args): - public_key = fetch_public_key(args.repo) - password = args.password or getpass('PyPI password: ') - update_travis_deploy_password(encrypt(public_key, password.encode())) - print("Wrote encrypted password to .travis.yml -- you're ready to deploy") - - -if '__main__' == __name__: - import argparse - parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument('--repo', default=GITHUB_REPO, - help='GitHub repo (default: %s)' % GITHUB_REPO) - parser.add_argument('--password', - help='PyPI password (will prompt if not provided)') - - args = parser.parse_args() - main(args) -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# -# picasso documentation build configuration file, created by -# sphinx-quickstart on Tue Jul 9 22:26:36 2013. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -import sys -import os -import sphinx_rtd_theme - -# If extensions (or modules to document with autodoc) are in another -# directory, add these directories to sys.path here. If the directory is -# relative to the documentation root, use os.path.abspath to make it -# absolute, like shown here. -#sys.path.insert(0, os.path.abspath('.')) - -# Get the project root dir, which is the parent dir of this -cwd = os.getcwd() -project_root = os.path.dirname(cwd) - -# Insert the project root dir as the first element in the PYTHONPATH. -# This lets us ensure that the source package is imported, and that its -# version is used. -sys.path.insert(0, project_root) - -# -- General configuration --------------------------------------------- - -# If your documentation needs a minimal Sphinx version, state it here. -#needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom ones. -extensions = ['sphinx.ext.autodoc', - 'sphinxcontrib.napoleon', 'sphinx.ext.todo'] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix of source filenames. -source_suffix = '.rst' - -# The encoding of source files. -#source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'picasso' -copyright = u"2017, Ryan Henderson" - -# The version info for the project you're documenting, acts as replacement -# for |version| and |release|, also used in various other places throughout -# the built documents. -# -# The short X.Y version. -version = 'v0.2.0' -# The full version, including alpha/beta/rc tags. -release = 'v0.2.0' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -#language = None - -# There are two options for replacing |today|: either, you set today to -# some non-false value, then it is used: -#today = '' -# Else, today_fmt is used as the format for a strftime call. -#today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -exclude_patterns = ['_build'] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -#default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -#add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -#add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -#show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -#modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built -# documents. -#keep_warnings = False - - -# -- Options for HTML output ------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -html_theme = 'sphinx_rtd_theme' -html_theme_path = [sphinx_rtd_theme.get_html_theme_path()] - -# Theme options are theme-specific and customize the look and feel of a -# theme further. For a list of options available for each theme, see the -# documentation. -#html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -#html_theme_path = [] - -# The name for this set of Sphinx documents. If None, it defaults to -# " v documentation". -#html_title = None - -# A shorter title for the navigation bar. Default is the same as -# html_title. -#html_short_title = None - -# The name of an image file (relative to this directory) to place at the -# top of the sidebar. -#html_logo = None - -# The name of an image file (within the static path) to use as favicon -# of the docs. This file should be a Windows icon file (.ico) being -# 16x16 or 32x32 pixels large. -#html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) -# here, relative to this directory. They are copied after the builtin -# static files, so a file named "default.css" will overwrite the builtin -# "default.css". -html_static_path = ['_static'] - -# If not '', a 'Last updated on:' timestamp is inserted at every page -# bottom, using the given strftime format. -#html_last_updated_fmt = '%b %d, %Y' - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -#html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -#html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names -# to template names. -#html_additional_pages = {} - -# If false, no module index is generated. -#html_domain_indices = True - -# If false, no index is generated. -#html_use_index = True - -# If true, the index is split into individual pages for each letter. -#html_split_index = False - -# If true, links to the reST sources are added to the pages. -#html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. -# Default is True. -#html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. -# Default is True. -#html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages -# will contain a tag referring to it. The value of this option -# must be the base URL from which the finished HTML is served. -#html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -#html_file_suffix = None - -# Output file base name for HTML help builder. -htmlhelp_basename = 'picassodoc' - - -# -- Options for LaTeX output ------------------------------------------ - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # 'preamble': '', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, author, documentclass -# [howto/manual]). -latex_documents = [ - ('index', 'picasso.tex', - u'picasso Documentation', - u'Ryan Henderson', 'manual'), -] - -# The name of an image file (relative to this directory) to place at -# the top of the title page. -#latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings -# are parts, not chapters. -#latex_use_parts = False - -# If true, show page references after internal links. -#latex_show_pagerefs = False - -# If true, show URL addresses after external links. -#latex_show_urls = False - -# Documents to append as an appendix to all manuals. -#latex_appendices = [] - -# If false, no module index is generated. -#latex_domain_indices = True - - -# -- Options for manual page output ------------------------------------ - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - ('index', 'picasso', - u'picasso Documentation', - [u'Ryan Henderson'], 1) -] - -# If true, show URL addresses after external links. -#man_show_urls = False - - -# -- Options for Texinfo output ---------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - ('index', 'picasso', - u'picasso Documentation', - u'Ryan Henderson', - 'picasso', - 'One line description of project.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -#texinfo_appendices = [] - -# If false, no module index is generated. -#texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -#texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -#texinfo_no_detailmenu = False - - -def skip(app, what, name, obj, skip, options): - if name == "__init__": - return False - return skip - - -# See https://github.com/rtfd/readthedocs.org/issues/1139 -def run_apidoc(_): - import subprocess - module = '../picasso' - cur_dir = os.path.abspath(os.path.dirname(__file__)) - output_path = os.path.join(cur_dir, 'source') - cmd_path = 'sphinx-apidoc' - if hasattr(sys, 'real_prefix'): # Check to see if we are in a virtualenv - # If we are, assemble the path manually - cmd_path = os.path.abspath(os.path.join(sys.prefix, - 'bin', 'sphinx-apidoc')) - subprocess.check_call([cmd_path, '-f', '-o', - output_path, module, '--force']) - - -def setup(app): - app.connect('builder-inited', run_apidoc) - app.connect("autodoc-skip-member", skip) -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -############################################################################### -from PIL import Image -import numpy as np -import pytest -from selenium.webdriver import Firefox -from selenium.webdriver.firefox.options import Options - -from picasso import create_app - - -@pytest.fixture -def app(): - _app = create_app() - return _app - - -@pytest.fixture(scope='module') -def webdriver(): - options = Options() - options.add_argument('-headless') - driver = Firefox(firefox_options=options) - yield driver - driver.quit() -#!/usr/bin/env python -# -*- coding: utf-8 -*- -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Jan Steinke - Selenium integration tests -############################################################################### - -""" -test_selenium ----------------------------------- - -Integration tests for `picasso` module. -""" - -import pytest -from flask import url_for - - -@pytest.mark.usefixtures('live_server') -class TestIntegration: - - def test_page_load(self, webdriver): - url = url_for('frontend.index', _external=True) - webdriver.get(url) - webdriver.find_element_by_id('appstate_checkpoint') -# -*- coding: utf-8 -*- - -__author__ = """Ryan Henderson""" -__email__ = 'ryan@merantix.com' -__version__ = 'v0.2.0' - -from flask import Flask -import os -import sys -from picasso.interfaces.rest import API -from picasso.interfaces.web import frontend - -if sys.version_info.major < 3 or (sys.version_info.major == 3 and - sys.version_info.minor < 5): - raise SystemError('Python 3.5+ required, found {}'.format(sys.version)) - - -def create_app(debug=False): - _app = Flask(__name__) - _app.debug = debug - _app.config.from_object('picasso.config.Default') - _app.register_blueprint(API, url_prefix='/api') - _app.register_blueprint(frontend, url_prefix='/') - - # Use a bogus secret key for debugging ease. No client information is stored; - # the secret key is only necessary for generating the session cookie. - if _app.debug: - _app.secret_key = '...' - else: - _app.secret_key = os.urandom(24) - - return _app - - -app = create_app() - -if os.getenv('PICASSO_SETTINGS'): - app.config.from_envvar('PICASSO_SETTINGS') - -deprecated_settings = ['BACKEND_PREPROCESSOR_NAME', - 'BACKEND_PREPROCESSOR_PATH', - 'BACKEND_POSTPROCESSOR_NAME', - 'BACKEND_POSTPROCESSOR_PATH', - 'BACKEND_PROB_DECODER_NAME', - 'BACKEND_PROB_DECODER_PATH', - 'DATA_DIR'] - -if any([x in app.config.keys() for x in deprecated_settings]): - raise ValueError('It looks like you\'re using a deprecated' - ' setting. The settings and utility functions' - ' have been changed as of version v0.2.0 (and ' - 'you\'re using {}). Changing to the updated ' - ' settings is trivial: see ' - 'https://picasso.readthedocs.io/en/latest/models.html' - ' and ' - 'https://picasso.readthedocs.io/en/latest/settings.html' - .format(__version__)) -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -import os - -base_dir = os.path.dirname(__file__) # only for default config - - -class Default: - """Default settings for the Flask app. - - The Flask app uses these settings if no custom settings are defined. You - can define custom settings by creating a Python module, defining global - variables in that module, and setting the environment variable - `PICASSO_SETTINGS` to the path to that module. - - If `PICASSO_SETTINGS` is not set, or if any particular setting is not - defined in the indicated module, then the Flask app uses these default - settings. - - """ - # :obj:`str`: filepath of the module containing the model to run - MODEL_CLS_PATH = os.path.join( - base_dir, 'examples', 'keras', 'model.py') - - # :obj:`str`: name of model class - MODEL_CLS_NAME = 'KerasMNISTModel' - - # :obj:`dict`: dictionary of args to pass to the `load` method of the - # model instance. - MODEL_LOAD_ARGS = { - 'data_dir': os.path.join(base_dir, 'examples', 'keras', 'data-volume'), - } -# -*- coding: utf-8 -*- -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Jan Steinke - Restful API -############################################################################### -"""utiltiy code to provide the Flask server with information - -This code only provides utility functions to access the backend. -""" -from types import ModuleType -from importlib import import_module -import inspect -from flask import ( - g, - current_app -) -from picasso.visualizations import * -from picasso.visualizations.base import BaseVisualization -from picasso.models.base import load_model - -APP_TITLE = 'Picasso Visualizer' - - -def _get_visualization_classes(): - """Import visualizations classes dynamically - """ - visualization_attr = vars(import_module('picasso.visualizations')) - visualization_submodules = [ - visualization_attr[x] - for x in visualization_attr - if isinstance(visualization_attr[x], ModuleType)] - visualization_classes = [] - for submodule in visualization_submodules: - attrs = vars(submodule) - for attr_name in attrs: - attr = attrs[attr_name] - if (inspect.isclass(attr) - and issubclass(attr, BaseVisualization) - and attr is not BaseVisualization): - visualization_classes.append(attr) - return visualization_classes - - -def get_model(): - """Get the NN model that's being analyzed from the request context. Put - the model in the request context if it is not yet there. - - Returns: - instance of :class:`.models.model.Model` or derived - class - """ - if not hasattr(g, 'model'): - g.model = load_model(current_app.config['MODEL_CLS_PATH'], - current_app.config['MODEL_CLS_NAME'], - current_app.config['MODEL_LOAD_ARGS']) - return g.model - - -def get_visualizations(): - """Get the available visualizations from the request context. Put the - visualizations in the request context if they are not yet there. - - Returns: - :obj:`list` of instances of :class:`.BaseVisualization` or - derived class - - """ - if not hasattr(g, 'visualizations'): - g.visualizations = {} - for VisClass in _get_visualization_classes(): - vis = VisClass(get_model()) - g.visualizations[vis.__class__.__name__] = vis - return g.visualizations - - -def get_app_state(): - """Get current status of application in context - - Returns: - :obj:`dict` of application status - - """ - if not hasattr(g, 'app_state'): - model = get_model() - g.app_state = { - 'app_title': APP_TITLE, - 'model_name': type(model).__name__, - 'latest_ckpt_name': model.latest_ckpt_name, - 'latest_ckpt_time': model.latest_ckpt_time - } - return g.app_state -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -############################################################################### -from PIL import Image -import numpy as np -import pytest -from selenium.webdriver import Firefox -from selenium.webdriver.firefox.options import Options - -from picasso import create_app - - -@pytest.fixture -def app(): - _app = create_app() - return _app - - -@pytest.fixture(scope='session') -def random_image_files(tmpdir_factory): - fn = tmpdir_factory.mktemp('images') - for i in range(4): - imarray = np.random.rand(10**i, 10**i, 3) * 255 - img = Image.fromarray(imarray.astype('uint8')).convert('RGBA') - img.save(str(fn.join('{}.png'.format(i))), 'PNG') - return fn - - -@pytest.fixture -def test_image(): - return './tests/resources/input/9.png' - - -@pytest.fixture -def example_prob_array(): - return np.random.random((3, 10)) - - -@pytest.fixture -def base_model(): - from picasso.models.base import BaseModel - - class BaseModelForTest(BaseModel): - def load(self, data_dir): - pass - return BaseModelForTest() - - -@pytest.fixture -def tensorflow_model(): - from picasso.models.tensorflow import TFModel - return TFModel() -#!/usr/bin/env python -# -*- coding: utf-8 -*- -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -# Jan Steinke - Restful API -# XdpAreKid - Support Keras >= 2 -############################################################################### - -""" -test_picasso ----------------------------------- - -Tests for `picasso` module. -""" -import os - - -class TestBaseModel: - - def test_decode_prob(self, base_model, example_prob_array): - results = base_model.decode_prob(example_prob_array) - for i, result in enumerate(results): - max_val = max(example_prob_array[i]) - assert result[0]['prob'] == '{:.3f}'.format(max_val) - assert result[0]['index'] == example_prob_array[i].argmax() - assert result[0]['name'] == str(result[0]['index']) - - -class TestKerasModel: - - def test_saved_model(self): - # tests that KerasModel can load from a saved model - import tempfile - from picasso.models.keras import KerasModel - - data_path = os.path.join('picasso', 'examples', - 'keras', 'data-volume') - - km = KerasModel() - km.load(data_path) - - temp = tempfile.mkdtemp() - km._model.save(os.path.join(temp, 'temp.h5')) - - km = KerasModel() - km.load(temp) - - assert km.tf_predict_var is not None - - -class TestTensorflowBackend: - - def test_tensorflow_backend(self, tensorflow_model): - """Only tests tensorflow backend loads without error - - """ - tensorflow_model.load( - data_dir=os.path.join('picasso', 'examples', 'tensorflow', - 'data-volume'), - tf_predict_var='Softmax:0', - tf_input_var='convolution2d_input_1:0') - assert tensorflow_model.tf_predict_var is not None - assert tensorflow_model.tf_input_var is not None -import io -import json - -import pytest -from flask import url_for -from PIL import Image, ImageChops - - -def verify_data(client, data, vis, prefix=''): - res_path = './tests/resources/' - assert data['input_file_name'] - assert data['predict_probs'] - if data['has_output']: - assert data['output_file_names'] - i = 1 - for filename in data['output_file_names']: - actual_image = client.get( - url_for('api.download_outputs', filename=filename)).data - actual_processed_input = Image.open(io.BytesIO(actual_image)) - expected_processed_input = Image.open( - res_path + vis.__name__ + '/' + prefix + 'output/' + str(i) + '.png') - assert ImageChops.difference( - actual_processed_input, expected_processed_input).getbbox() is None - i += 1 - if data['has_processed_input']: - assert data['processed_input_file_name'] - filename = data['processed_input_file_name'] - actual_image = client.get( - url_for('api.download_outputs', filename=filename)).data - actual_processed_input = Image.open(io.BytesIO(actual_image)) - expected_processed_input = Image.open( - res_path + vis.__name__ + '/' + prefix + 'pre/default.png') - assert ImageChops.difference( - actual_processed_input, expected_processed_input).getbbox() is None - - -class TestRestAPI: - from picasso.utils import _get_visualization_classes - - def test_api_root_get(self, client): - assert client.get(url_for('api.root')).status_code == 200 - - def test_api_get_app_state(self, client): - response = client.get(url_for('api.app_state')) - data = json.loads(response.get_data(as_text=True)) - assert data['app_title'] - assert data['latest_ckpt_name'] - assert data['latest_ckpt_time'] - assert data['model_name'] - - def test_api_uploading_file(self, client, random_image_files): - upload_file = str(random_image_files.listdir()[0]) - with open(upload_file, "rb") as imageFile: - f = imageFile.read() - b = bytearray(f) - data = dict() - data['file'] = (io.BytesIO(b), 'test.png') - response = client.post(url_for('api.images'), data=data) - data = json.loads(response.get_data(as_text=True)) - assert data['ok'] == 'true' - assert type(data['file']) is str - assert type(data['uid']) is int - - @pytest.mark.parametrize("vis", _get_visualization_classes()) - def test_api_visualizing_input(self, client, test_image, vis): - upload_file = test_image - with open(upload_file, "rb") as imageFile: - f = imageFile.read() - b = bytearray(f) - upload_data = dict() - upload_data['file'] = (io.BytesIO(b), 'test.png') - upload_response = client.post(url_for('api.images'), data=upload_data) - upload_response_data = json.loads( - upload_response.get_data(as_text=True)) - base_url = '{base}?visualizer={visualizer}&image={image}'.format( - base=url_for('api.visualize'), - visualizer=vis.__name__, - image=str(upload_response_data['uid']) - ) - settings_string = '' - for setting in vis.ALLOWED_SETTINGS: - settings_string += "&{0}={1}".format(setting, - vis.ALLOWED_SETTINGS[setting][-1]) - - default_response = client.get(base_url) - assert default_response.status_code == 200 - raw_data_from_default_response = default_response.get_data( - as_text=True) - default_data = json.loads(raw_data_from_default_response) - verify_data(client, default_data, vis) - - settings_response = client.get(base_url + settings_string) - assert settings_response.status_code == 200 - raw_data_from_settings_response = settings_response.get_data( - as_text=True) - settings_data = json.loads(raw_data_from_settings_response) - verify_data(client, settings_data, vis, prefix='settings_') - - def test_listing_images(self, client): - response = client.get(url_for('api.images')) - assert response.status_code == 200 - - def test_end_session(self, client): - response = client.get(url_for('api.reset')) - assert response.status_code == 200 - - def test_visualizers(self, client): - response = client.get(url_for('api.visualizers')) - assert response.status_code == 200 - - @pytest.mark.parametrize("vis", _get_visualization_classes()) - def test_visualizers_information(self, client, vis): - response = client.get( - url_for('api.visualizers_information', vis_name=vis.__name__)) - assert response.status_code == 200 -# -*- coding: utf-8 -*- -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Jan Steinke - Restful API -############################################################################### -"""Flask blueprint for accessing and manipulating image ressources - -This is used by the main flask application to provide a REST API. -""" - -import os -import shutil -import logging -from tempfile import mkdtemp - -from PIL import Image -from werkzeug.utils import secure_filename -from flask import ( - Blueprint, - current_app, - jsonify, - session, - request, - send_from_directory) -from picasso import __version__ -from picasso.utils import ( - get_app_state, - get_visualizations -) - -API = Blueprint('api', __name__) -logger = logging.getLogger(__name__) - - -@API.before_request -def initialize_new_session(): - """Check session and initialize if necessary - - Before every request, check the user session. If no session exists, add - one and provide temporary locations for images - - """ - if 'image_uid_counter' in session and 'image_list' in session: - logger.debug('images are already being tracked') - else: - # reset image list counter for the session - session['image_uid_counter'] = 0 - session['image_list'] = [] - if 'img_input_dir' in session and 'img_output_dir' in session: - logger.debug('temporary image directories already exist') - else: - # make image upload directory - session['img_input_dir'] = mkdtemp() - session['img_output_dir'] = mkdtemp() - - -@API.route('/', methods=['GET']) -def root(): - """The root of the REST API - - displays a hello world message. - - """ - return jsonify(message='Picasso {version}. ' - 'See API documentation at: ' - 'https://picasso.readthedocs.io/en/latest/api.html' - .format(version=__version__), - version=__version__) - - -@API.route('/app_state', methods=['GET']) -def app_state(): - state = get_app_state() - return jsonify(state) - - -@API.route('/images', methods=['POST', 'GET']) -def images(): - """Upload images via REST interface - - Check if file upload was successful and sanatize user input. - - TODO: return file URL instead of filename - - """ - if request.method == 'POST': - file_upload = request.files['file'] - if file_upload: - image = dict() - image['filename'] = secure_filename(file_upload.filename) - full_path = os.path.join(session['img_input_dir'], - image['filename']) - file_upload.save(full_path) - image['uid'] = session['image_uid_counter'] - session['image_uid_counter'] += 1 - current_app.logger.debug('File %d is saved as %s', - image['uid'], - image['filename']) - session['image_list'].append(image) - return jsonify(ok="true", file=image['filename'], uid=image['uid']) - return jsonify(ok="false") - if request.method == 'GET': - return jsonify(images=session['image_list']) - - -@API.route('/visualizers', methods=['GET']) -def visualizers(): - """Get a list of available visualizers - - Responses with a JSON list of available visualizers - - """ - list_of_visualizers = [] - for visualizer in get_visualizations(): - list_of_visualizers.append({'name': visualizer}) - return jsonify(visualizers=list_of_visualizers) - - -@API.route('/visualizers/', methods=['GET']) -def visualizers_information(vis_name): - vis = get_visualizations()[vis_name] - - return jsonify(settings=vis.ALLOWED_SETTINGS) - - -@API.route('/visualize', methods=['GET']) -def visualize(): - """Trigger a visualization via the REST API - - Takes a single image and generates the visualization data, returning the - output exactly as given by the target visualization. - - """ - - session['settings'] = {} - image_uid = request.args.get('image') - vis_name = request.args.get('visualizer') - vis = get_visualizations()[vis_name] - if vis.ALLOWED_SETTINGS: - for key in vis.ALLOWED_SETTINGS.keys(): - if request.args.get(key) is not None: - session['settings'][key] = request.args.get(key) - else: - session['settings'][key] = vis.ALLOWED_SETTINGS[key][0] - else: - logger.debug( - 'Selected Visualizer {0} has no settings.'.format(vis_name)) - inputs = [] - for image in session['image_list']: - if image['uid'] == int(image_uid): - full_path = os.path.join(session['img_input_dir'], - image['filename']) - entry = dict() - entry['filename'] = image['filename'] - entry['data'] = Image.open(full_path) - inputs.append(entry) - - vis.update_settings(session['settings']) - output = vis.make_visualization( - inputs, output_dir=session['img_output_dir']) - return jsonify(output[0]) - - -@API.route('/reset', methods=['GET']) -def reset(): - """Delete the session and clear temporary directories - - """ - shutil.rmtree(session['img_input_dir']) - shutil.rmtree(session['img_output_dir']) - session.clear() - return jsonify(ok='true') - - -@API.route('/inputs/') -def download_inputs(filename): - """For serving input images""" - return send_from_directory(session['img_input_dir'], - filename) - - -@API.route('/outputs/') -def download_outputs(filename): - """For serving output images""" - return send_from_directory(session['img_output_dir'], - filename) - - -@API.errorhandler(500) -def internal_server_error(e): - return jsonify(ok=False, error=e, code=500), 500 - - -@API.errorhandler(404) -def not_found_error(e): - return jsonify(ok=False, error=e, code=404), 404 -# -*- coding: utf-8 -*- -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Jan Steinke - Restful API -############################################################################### -"""Flask blueprint for interfacing with picasso via web. - -This is used by the main flask application to provide a web front-end based on the REST api. -""" - -from flask import ( - render_template, - Blueprint -) - -frontend = Blueprint('frontend', __name__) - - -@frontend.route('/') -def index(): - return render_template('index.html') -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -import importlib -from operator import itemgetter -import warnings - - -def load_model(model_cls_path, model_cls_name, model_load_args): - """Get an instance of the described model. - - Args: - model_cls_path: Path to the module in which the model class - is defined. - model_cls_name: Name of the model class. - model_load_args: Dictionary of args to pass to the `load` method - of the model instance. - - Returns: - An instance of :class:`.models.model.BaseModel` or subclass - - """ - spec = importlib.util.spec_from_file_location('active_model', - model_cls_path) - model_module = importlib.util.module_from_spec(spec) - spec.loader.exec_module(model_module) - model_cls = getattr(model_module, model_cls_name) - model = model_cls() - if not isinstance(model, BaseModel): - warnings.warn("Loaded model '%s' at '%s' is not an instance of %r" - % (model_cls_name, model_cls_path, BaseModel)) - model.load(**model_load_args) - return model - - -class BaseModel: - """Interface encapsulating a trained NN model usable for prediction. - - This interface defines: - - - How to load the model's topology and parameters from disk - - How to preprocess a batch of examples for the model - - How to perform prediction using the model - - Etc - - """ - - def __init__(self, - top_probs=5): - """Create a new instance of this model. - - `BaseModel` is an interface and should only be instantiated via a - subclass. - - Args: - top_probs (int): Number of classes to display per result. For - instance, VGG16 has 1000 classes, we don't want to display a - visualization for every single possibility. Defaults to 5. - - """ - self.top_probs = top_probs - - self._sess = None - self._tf_input_var = None - self._tf_predict_var = None - self._model_name = None - self._latest_ckpt_name = None - self._latest_ckpt_time = None - - def load(self, *args, **kwargs): - """Load the model's graph and parameters from disk, restoring the model - into `self._sess` so that it can be run for inference. - - Subclasses should set the instance variables [self._sess, - self._tf_input_var, self._tf_predict_var, self._description] in their - implementation. - - """ - raise NotImplementedError - - @property - def sess(self): - """Tensorflow session that can be used to evaluate tensors in the - model. - - :type: :obj:`tf.Session` - - """ - return self._sess - - @property - def tf_input_var(self): - """Tensorflow tensor that represents the model's inputs. - - :type: :obj:`tf.Tensor` - - """ - return self._tf_input_var - - @property - def tf_predict_var(self): - """Tensorflow tensor that represents the model's predicted class - probabilities. - - :type: :obj:`tf.Tensor` - - """ - return self._tf_predict_var - - @property - def latest_ckpt_time(self): - """Timestamp of the latest checkpoint - - :type: str - - """ - return self._latest_ckpt_time - - @property - def latest_ckpt_name(self): - """Filename of the checkpoint - - :type: str - - """ - return self._latest_ckpt_name - - def preprocess(self, raw_inputs): - """Preprocess raw inputs into the format required by the model. - - E.g, the raw image may need to converted to a numpy array of the - appropriate dimension. - - By default, we perform no preprocessing. - - Args: - raw_inputs (:obj:`list` of :obj:`PIL.Image`): List of raw - input images of any mode and shape. - - Returns: - array (float32): Images ready to be fed into the model. - - """ - return raw_inputs - - def predict(self, inputs): - """Given preprocessed inputs, generate class probabilities by using the - model to perform inference. - - Given an iterable of examples or numpy array where the first - dimension is the number of example, return a n_examples x - n_classes array of class predictions - - Args: - inputs: Iterable of examples (e.g., a numpy array whose first - dimension is the batch size). - - Returns: - Class probabilities for each input example, as a numpy array of - shape (num_examples, num_classes). - - """ - raise NotImplementedError - - def decode_prob(self, class_probabilities): - """Given predicted class probabilites for a set of examples, annotate - each logit with a class name. - - By default, we name each class using its index in the logits array. - - Args: - class_probabilities (array): Class probabilities as output by - `self.predict`, i.e., a numpy array of shape (num_examples, - num_classes). - - Returns: - Annotated class probabilities for each input example, as a list of - dicts where each dict is formatted as: - { - 'index': class_index, - 'name': class_name, - 'prob': class_probability - } - - """ - results = [] - for row in class_probabilities: - entries = [] - for i, prob in enumerate(row): - entries.append({'index': i, - 'name': str(i), - 'prob': prob}) - - entries = sorted(entries, - key=itemgetter('prob'), - reverse=True)[:self.top_probs] - - for entry in entries: - entry['prob'] = '{:.3f}'.format(entry['prob']) - results.append(entries) - return results -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -from datetime import datetime -import glob -import json -import os - -import keras.backend as K -from keras.models import model_from_json, load_model - -from picasso.models.base import BaseModel - - -class KerasModel(BaseModel): - """Implements model loading functions for Keras. - - Using this Keras module will require the h5py library, which is not - included with Keras. - - """ - - def load(self, data_dir): - """Load graph and weight data. - - Args: - data_dir (:obj:`str`): location of Keras checkpoint (`.hdf5`) files - and model (in `.json`) structure. The default behavior - is to take the latest of each, by OS timestamp. - """ - # for tensorflow compatibility - K.set_learning_phase(0) - - # find newest ckpt and graph files - try: - latest_ckpt = max(glob.iglob( - os.path.join(data_dir, '*.h*5')), key=os.path.getctime) - latest_ckpt_name = os.path.basename(latest_ckpt) - latest_ckpt_time = str( - datetime.fromtimestamp(os.path.getmtime(latest_ckpt))) - except ValueError: - raise FileNotFoundError('No checkpoint (.hdf5 or .h5) files ' - 'available at {}'.format(data_dir)) - try: - latest_json = max(glob.iglob(os.path.join(data_dir, '*.json')), - key=os.path.getctime) - with open(latest_json, 'r') as f: - model_json = json.loads(f.read()) - self._model = model_from_json(model_json) - - self._model.load_weights(latest_ckpt) - except ValueError: - try: - self._model = load_model(latest_ckpt) - except ValueError: - raise FileNotFoundError('The (.hdf5 or .h5) files available at' - '{} don\'t have the model' - ' architecture.' - .format(latest_ckpt)) - - self._sess = K.get_session() - self._tf_predict_var = self._model.outputs[0] - self._tf_input_var = self._model.inputs[0] - self._model_name = type(self).__name__ - self._latest_ckpt_name = latest_ckpt_name - self._latest_ckpt_time = latest_ckpt_time - - def predict(self, input_array): - return self._model.predict(input_array) -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -from datetime import datetime -import glob -import os - -import tensorflow as tf - -from picasso.models.base import BaseModel - - -class TFModel(BaseModel): - """Implements model loading functions for Tensorflow. - - """ - - def load(self, data_dir, tf_input_var=None, tf_predict_var=None): - """Load graph and weight data. - - Args: - data_dir (:obj:`str`): location of tensorflow checkpoint data. - We'll need the .meta file to reconstruct the graph and the data - (checkpoint) files to fill in the weights of the model. The - default behavior is take the latest files, by OS timestamp. - tf_input_var (:obj:`str`): Name of the tensor corresponding to the - model's inputs. You must define this if you are loading the - model from a checkpoint. - tf_predict_var (:obj:`str`): Name of the tensor corresponding to - the model's predictions. You must define this if you are - loading the model from a checkpoint. - - """ - # find newest ckpt and meta files - try: - latest_ckpt_fn = max( - filter( - # exclude index and meta files which may have earlier - # timestamps - lambda x: os.path.splitext(x)[-1].startswith('.meta') or - os.path.splitext(x)[-1].startswith('.index'), - glob.glob(os.path.join(data_dir, '*.ckpt*'))), - key=os.path.getctime) - latest_ckpt_time = str( - datetime.fromtimestamp(os.path.getmtime(latest_ckpt_fn))) - # remove any step info that's been appended to the extension - fileext_div = latest_ckpt_fn.rfind('.ckpt') - additional_ext = latest_ckpt_fn.rfind('.', fileext_div + 1) - if additional_ext < 0: - latest_ckpt = latest_ckpt_fn - else: - latest_ckpt = latest_ckpt_fn[:additional_ext] - except ValueError: - raise FileNotFoundError('No checkpoint (.ckpt) files ' - 'available at {}'.format(data_dir)) - - try: - latest_meta = max(glob.iglob(os.path.join(data_dir, '*.meta')), - key=os.path.getctime) - except ValueError: - raise FileNotFoundError('No graph (.meta) files ' - 'available at {}'.format(data_dir)) - - self._sess = tf.Session() - self._sess.as_default() - - self._saver = tf.train.import_meta_graph(latest_meta) - self._saver.restore(self._sess, latest_ckpt) - - self._tf_input_var = self._sess.graph.get_tensor_by_name(tf_input_var) - self._tf_predict_var = self._sess.graph.get_tensor_by_name( - tf_predict_var) - self._model_name = type(self).__name__ - self._latest_ckpt_name = latest_ckpt_fn - self._latest_ckpt_time = latest_ckpt_time - - def predict(self, input_array): - return self.sess.run(self.tf_predict_var, - {self.tf_input_var: input_array}) -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -"""Visualizations live here - -All default and user-defined visualizations are submodules of this -module. All classes defined in this directory (except BaseVisualization) -will be imported. - -""" -import os - -__all__ = [x.rpartition('.')[0] for x in os.listdir(__path__[0]) - if not x.startswith('__') and x.endswith('py')] -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -import re - - -class BaseVisualization: - """Interface encapsulating a NN visualization. - - This interface defines how a visualization is computed for a given NN - model. - - """ - # (:obj:`str`): Short description of the visualization. - DESCRIPTION = None - - # (:obj:`str`): Optional link to the paper specifying the visualization. - REFERENCE_LINK = None - - # (:obj:`dict`): Optional visualization settings that the user can select, - # as a dict mapping setting names to lists of their allowed values. - ALLOWED_SETTINGS = None - - def __init__(self, model): - """Create a new instance of this visualization. - - `BaseVisualization` is an interface and should only be instantiated via - a subclass. - - Args: - model (:obj:`.models.model.BaseModel`): NN model to be - visualized. - - """ - self._model = model - - # give default settings - if self.ALLOWED_SETTINGS: - self.update_settings({setting: self.ALLOWED_SETTINGS[setting][0] - for setting in self.ALLOWED_SETTINGS}) - - @property - def model(self): - """NN model to be visualized. - - (:obj:`.models.model.BaseModel`) - - """ - return self._model - - def update_settings(self, settings): - """Update the settings - - If a derived class has an ALLOWED_SETTINGS dict, we check here that - incoming settings from the web app are allowed, and set the child - properties as appropriate. - - """ - - def error_string(setting, setting_val): - return ('{val} is not an acceptable value for ' - 'parameter {param} for visualization' - '{vis}.').format(val=setting_val, - param=setting, - vis=self.__class__.__name__) - - for setting in settings: - if settings[setting] in self.ALLOWED_SETTINGS[setting]: - # if the setting is allowed, set the attribute but remove - # invalid variable characters - # - # see: - # - # https://stackoverflow.com/questions/3303312/how-do-i-convert-a-string-to-a-valid-variable-name-in-python - setattr(self, '_' + re.sub('\W|^(?=\d)', '_', setting).lower(), - settings[setting]) - else: - raise ValueError(error_string(settings[setting], setting)) - - def make_visualization(self, inputs, output_dir, settings=None): - """Generate the visualization. - - All visualizations must implement this method. - - Args: - inputs (iterable of :class:`PIL.Image`): Batch of input images to - make visualizations for, as PIL :obj:`Image` objects. - output_dir (:obj:`str`): A directory to write outputs (e.g., - plots) to. - settings (:obj:`str`): Dictionary of settings that the user - selected, as a dict mapping setting names to values. This - should only be provided if this class's `ALLOWED_SETTINGS` - attribute is non-null. - - Returns: - Object used to render the visualization, passed directly to the - visualization class's associated HTML template. Since this HTML - template is custom for each visualization class, the return type - is arbitrary. - - """ - raise NotImplementedError -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -from picasso.visualizations.base import BaseVisualization - - -class ClassProbabilities(BaseVisualization): - """Display top class probabilities for a given image - - This is the simplest kind of visualization -- it merely displays the top - class probabilities of the input image. - - """ - - DESCRIPTION = 'Predict class probabilities from new examples' - - ALLOWED_SETTINGS = dict() - - def make_visualization(self, inputs, output_dir): - pre_processed_arrays = self.model.preprocess([example['data'] - for example in inputs]) - predictions = self.model.sess.run(self.model.tf_predict_var, - feed_dict={self.model.tf_input_var: - pre_processed_arrays}) - filtered_predictions = self.model.decode_prob(predictions) - results = [] - for i, inp in enumerate(inputs): - results.append({'input_file_name': inp['filename'], - 'has_output': False, - 'has_processed_input': False, - 'predict_probs': filtered_predictions[i]}) - return results -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -from picasso.visualizations.base import BaseVisualization -from matplotlib import pyplot -import os -import time - -import numpy as np -from PIL import Image - -import matplotlib -matplotlib.use('Agg') - - -class PartialOcclusion(BaseVisualization): - """Partial occlusion visualization - - The partial occlusion class blocks out part of the image and checks - the classification. Regions where classification probability drops - significantly are likely very important to classification. - - The visualization can therefore be used to check if the model is - classifying on the image feature we expect. - - """ - DESCRIPTION = ('Partially occlude image to determine regions ' - 'important to classification') - - REFERENCE_LINK = 'https://arxiv.org/abs/1311.2901' - - ALLOWED_SETTINGS = { - 'Window': ['0.50', '0.40', '0.30', '0.20', '0.10', '0.05'], - 'Strides': ['2', '5', '10', '20', '30'], - 'Occlusion': ['grey', 'black', 'white'] - } - - @property - def window(self): - return float(self._window) - - @property - def num_windows(self): - return int(self._strides) - - @property - def occlusion_method(self): - return self._occlusion - - def __init__(self, model): - super().__init__(model) - self.predict_tensor = self.get_predict_tensor() - - self.grid_percent = 0.01 - self.occlusion_value = 255 - self.initial_resize = (244, 244) - - def make_visualization(self, inputs, output_dir): - if self.occlusion_method == 'black': - self.occlusion_value = 0 - elif self.occlusion_method == 'grey': - self.occlusion_value = 128 - - # get class predictions as in ClassProbabilities - pre_processed_arrays = self.model.preprocess([example['data'] - for example in inputs]) - class_predictions = self.model.sess.run( - self.model.tf_predict_var, - feed_dict={self.model.tf_input_var: pre_processed_arrays}) - decoded_predictions = self.model.decode_prob(class_predictions) - - results = [] - for i, example in enumerate(inputs): - im = example['data'] - im_format = im.format - if self.initial_resize: - im = im.resize(self.initial_resize, Image.ANTIALIAS) - - occ_im = self.occluded_images(im) - predictions = self.model.sess.run( - self.predict_tensor, - feed_dict={self.model.tf_input_var: - self.model.preprocess(occ_im['occluded_images'])}) - - example_im = self.make_example_image(im, - occ_im['centers_horizontal'], - occ_im['centers_vertical'], - occ_im['win_width'], - occ_im['win_length'], - occ_im['pad_vertical'], - occ_im['pad_horizontal']) - example_filename = '{ts}{fn}'.format(ts=str(time.time()), - fn=example['filename']) - example_im.save( - os.path.join(output_dir, example_filename), - format=im_format) - - filenames = self.make_heatmaps( - predictions, output_dir, example['filename'], - decoded_predictions=decoded_predictions[i]) - results.append({'input_file_name': example['filename'], - 'has_output': True, - 'output_file_names': filenames, - 'predict_probs': decoded_predictions[i], - 'has_processed_input': True, - 'processed_input_file_name': example_filename}) - return results - - def get_predict_tensor(self): - # Assume that predict is the softmax - # tensor in the computation graph - return self.model.sess.graph.get_tensor_by_name( - self.model.tf_predict_var.name) - - def make_heatmaps(self, predictions, - output_dir, filename, - decoded_predictions=None): - if decoded_predictions: - relevant_class_indices = [pred['index'] - for pred in decoded_predictions] - predictions = predictions[:, relevant_class_indices] - stacked_heatmaps = predictions.reshape(self.num_windows, - self.num_windows, - predictions.shape[-1]) - filenames = [] - for i in range(predictions.shape[-1]): - grid = stacked_heatmaps[:, :, i] - pyplot.axis('off') - if i == 0: - im = pyplot.imshow(grid, vmin=0, vmax=1) - pyplot.axis('off') - im.axes.get_xaxis().set_visible(False) - im.axes.get_yaxis().set_visible(False) - else: - im.set_data(grid) - hm_filename = '{ts}{label}_{fn}'.format(ts=str(time.time()), - label=str(i), - fn=filename) - pyplot.savefig(os.path.join(output_dir, hm_filename), - format='PNG', bbox_inches='tight', pad_inches=0) - filenames.append(hm_filename) - return filenames - - def occluded_images(self, im): - width = im.size[0] - length = im.size[1] - win_width = round(self.window * width) - win_length = round(self.window * length) - pad_horizontal = win_width // 2 - pad_vertical = win_length // 2 - centers_horizontal, centers_vertical = self.get_centers( - width, length, win_width, win_length, pad_horizontal, pad_vertical, - self.num_windows) - upper_left_corners = np.array( - [(w - pad_vertical, v - pad_horizontal) - for w in centers_vertical - for v in centers_horizontal] - ) - - images = [] - for corner in upper_left_corners: - arr = np.array(im) - self.add_occlusion_to_arr(arr, corner, - win_width, win_length, - occ_val=self.occlusion_value) - images.append( - Image.fromarray(arr) - ) - - return {'occluded_images': images, - 'centers_horizontal': centers_horizontal, - 'centers_vertical': centers_vertical, - 'win_width': win_width, - 'win_length': win_length, - 'pad_horizontal': pad_horizontal, - 'pad_vertical': pad_vertical} - - def make_example_image(self, im, - centers_horizontal, centers_vertical, - win_width, win_length, pad_vertical, - pad_horizontal, output_size=(244, 244)): - arr = np.array(im) - # add an example occlusion - self.add_occlusion_to_arr(arr, - (centers_vertical[1] - pad_vertical, - centers_horizontal[1] - pad_horizontal), - win_width, win_length, occ_val=100) - # add grid - g_pad_vertical = round(self.grid_percent * im.size[1]) or 1 - g_pad_horizontal = round(self.grid_percent * im.size[0]) or 1 - w_grid = 2 * g_pad_horizontal - l_grid = 2 * g_pad_vertical - upper_left_corners = np.array( - [(w - g_pad_vertical, v - g_pad_horizontal) - for w in centers_vertical - for v in centers_horizontal] - ) - for corner in upper_left_corners: - self.add_occlusion_to_arr(arr, corner, - w_grid, l_grid) - return Image.fromarray(arr) - - @staticmethod - def get_centers(width, length, - win_width, win_length, - pad_horizontal, pad_vertical, - num_windows): - centers_horizontal = np.linspace(pad_horizontal, - width - pad_horizontal, - num_windows).astype('int') - centers_vertical = np.linspace(pad_vertical, - length - pad_vertical, - num_windows).astype('int') - return centers_horizontal, centers_vertical - - @staticmethod - def add_occlusion_to_arr(arr, upper_left_corner, - width_horizontal, - width_vertical, - occ_val=0): - arr[upper_left_corner[0]: - upper_left_corner[0] + width_vertical, - upper_left_corner[1]: - upper_left_corner[1] + width_horizontal] = occ_val -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -from picasso.visualizations.base import BaseVisualization -from matplotlib import pyplot -import os -import time - -import numpy as np -import tensorflow as tf - -import matplotlib -matplotlib.use('Agg') - - -class SaliencyMaps(BaseVisualization): - """Derivative of classification with respect to input pixels - - Saliency maps are a way of showing which inputs matter most to - classification. The derivative of a class probability with - respect to each input pixel are found with backpropagation. - High values for the derivative indicate pixels important to - classification (as changing them would change the classification). - - """ - DESCRIPTION = ('See maximal derivates against class with respect ' - 'to input') - - REFERENCE_LINK = 'https://arxiv.org/pdf/1312.6034' - - ALLOWED_SETTINGS = {'Transparency': ['0.0', '0.25', '0.5', '0.75']} - - @property - def transparency(self): - return float(self._transparency) - - def __init__(self, model, logit_tensor_name=None): - super().__init__(model) - if logit_tensor_name: - self.logit_tensor = self.model.sess.graph.get_tensor_by_name( - logit_tensor_name) - else: - self.logit_tensor = self.get_logit_tensor() - - self.input_shape = self.model.tf_input_var.get_shape()[1:].as_list() - - def get_gradient_wrt_class(self, class_index): - gradient_name = 'bv_{class_index}_gradient'.format( - class_index=class_index) - try: - return self.model.sess.graph.get_tensor_by_name( - '{}:0'.format(gradient_name)) - except KeyError: - class_logit = tf.slice(self.logit_tensor, - [0, class_index], - [1, 1]) - return tf.gradients(class_logit, - self.model.tf_input_var, - name=gradient_name)[0] - - def make_visualization(self, inputs, output_dir): - - pre_processed_arrays = self.model.preprocess([example['data'] - for example in inputs]) - - # get predictions - predictions = self.model.sess.run(self.model.tf_predict_var, - feed_dict={self.model.tf_input_var: - pre_processed_arrays}) - decoded_predictions = self.model.decode_prob(predictions) - - results = [] - for i, inp in enumerate(inputs): - class_gradients = [] - relevant_class_indices = [pred['index'] - for pred in decoded_predictions[i]] - gradients_wrt_class = [self.get_gradient_wrt_class(index) - for index in relevant_class_indices] - for gradient_wrt_class in gradients_wrt_class: - class_gradients.append([self.model.sess.run( - gradient_wrt_class, - feed_dict={self.model.tf_input_var: [arr]}) - for arr in pre_processed_arrays]) - - output_arrays = np.array([gradient[i] - for gradient in class_gradients]) - # if images are color, take the maximum channel - if output_arrays.shape[-1] == 3: - output_arrays = output_arrays.max(-1) - # we care about the size of the derivative, not the sign - output_arrays = np.abs(output_arrays) - - # We want each array to be represented as a 1-channel image of - # the same size as the model's input image. - output_images = output_arrays.reshape([-1] + self.input_shape[0:2]) - - output_fns = [] - pyplot.clf() - for j, output_image in enumerate(output_images): - output_fn = '{fn}-{j}-{ts}.png'.format(ts=str(time.time()), - j=j, - fn=inp['filename']) - - if j == 0: - pyplot.imshow(inputs[i]['data'] - .resize(output_image.shape) - .convert('RGB'), - alpha=self.transparency) - - im = pyplot.imshow(output_image, - alpha=1. - self.transparency, - cmap='inferno') - pyplot.axis('off') - im.axes.get_xaxis().set_visible(False) - im.axes.get_yaxis().set_visible(False) - else: - im.set_data(output_image) - - pyplot.savefig(os.path.join(output_dir, output_fn), - bbox_inches='tight', pad_inches=0) - output_fns.append(output_fn) - - results.append({'input_file_name': inp['filename'], - 'has_output': True, - 'predict_probs': decoded_predictions[i], - 'has_processed_input': False, - 'output_file_names': output_fns}) - return results - - def get_logit_tensor(self): - # Assume that the logits are the tensor input to the last softmax - # operation in the computation graph - sm = [node - for node in self.model.sess.graph_def.node - if node.name == self.model.tf_predict_var.name.split(':')[0]][-1] - logit_op_name = sm.input[0] - return self.model.sess.graph.get_tensor_by_name( - '{}:0'.format(logit_op_name)) -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### - -# Note: By default, Flask doesn't know that this file exists. If you want -# Flask to load the settings you specify here, you must set the environment -# variable `PICASSO_SETTINGS` to point to this file. E.g.: -# -# export PICASSO_SETTINGS=/path/to/examples/keras-vgg16/config.py -# -import os - -base_dir = os.path.dirname(os.path.abspath(__file__)) - -MODEL_CLS_PATH = os.path.join(base_dir, 'model.py') -MODEL_CLS_NAME = 'KerasVGG16Model' -MODEL_LOAD_ARGS = { - 'data_dir': os.path.join(base_dir, 'data-volume'), -} -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -from keras.applications import imagenet_utils -import numpy as np -from PIL import Image - -from picasso.models.keras import KerasModel - -VGG16_DIM = (224, 224, 3) - - -class KerasVGG16Model(KerasModel): - - def preprocess(self, raw_inputs): - """ - Args: - raw_inputs (list of Images): a list of PIL Image objects - Returns: - array (float32): num images * height * width * num channels - """ - image_arrays = [] - for raw_im in raw_inputs: - im = raw_im.resize(VGG16_DIM[:2], Image.ANTIALIAS) - im = im.convert('RGB') - arr = np.array(im).astype('float32') - image_arrays.append(arr) - - all_raw_inputs = np.array(image_arrays) - return imagenet_utils.preprocess_input(all_raw_inputs) - - def decode_prob(self, class_probabilities): - r = imagenet_utils.decode_predictions(class_probabilities, - top=self.top_probs) - results = [ - [{'code': entry[0], - 'name': entry[1], - 'prob': '{:.3f}'.format(entry[2])} - for entry in row] - for row in r - ] - classes = imagenet_utils.CLASS_INDEX - class_keys = list(classes.keys()) - class_values = list(classes.values()) - - for result in results: - for entry in result: - entry['index'] = int( - class_keys[class_values.index([entry['code'], - entry['name']])]) - return results -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -############################################################################### -import os -import json -from keras.applications.vgg16 import VGG16 - -path = 'data-volume' -try: - os.mkdir(path) -except FileExistsError: - pass - -print('Downloading and setting up VGG16...') - -vgg16 = VGG16() - -print('Saving...') - -if not os.path.exists(os.path.join(os.path.dirname(__file__), path)): - os.makedirs(os.path.join(os.path.dirname(__file__), path)) - -with open(os.path.join(os.path.dirname(__file__), - path, - 'vgg16.json'), 'w') as json_file: - json.dump(vgg16.to_json(), json_file) - -vgg16.save_weights(os.path.join(os.path.dirname(__file__), - path, - 'vgg16.hdf5')) - -print('Done.') -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### - -# Note: By default, Flask doesn't know that this file exists. If you want -# Flask to load the settings you specify here, you must set the environment -# variable `PICASSO_SETTINGS` to point to this file. E.g.: -# -# export PICASSO_SETTINGS=/path/to/examples/keras/config.py -# -import os - -base_dir = os.path.dirname(os.path.abspath(__file__)) - -MODEL_CLS_PATH = os.path.join(base_dir, 'model.py') -MODEL_CLS_NAME = 'KerasMNISTModel' -MODEL_LOAD_ARGS = { - 'data_dir': os.path.join(base_dir, 'data-volume'), -} -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -import numpy as np -from PIL import Image - -from picasso.models.keras import KerasModel - -MNIST_DIM = (28, 28) - - -class KerasMNISTModel(KerasModel): - - def preprocess(self, raw_inputs): - """Convert images into the format required by our model. - - Our model requires that inputs be grayscale (mode 'L'), be resized to - `MNIST_DIM`, and be represented as float32 numpy arrays in range - [0, 1]. - - Args: - raw_inputs (list of Images): a list of PIL Image objects - - Returns: - array (float32): num images * height * width * num channels - - """ - image_arrays = [] - for raw_im in raw_inputs: - im = raw_im.convert('L') - im = im.resize(MNIST_DIM, Image.ANTIALIAS) - arr = np.array(im) - image_arrays.append(arr) - - inputs = np.array(image_arrays) - return inputs.reshape(len(inputs), - MNIST_DIM[0], - MNIST_DIM[1], 1).astype('float32') / 255 -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### - -# Note: By default, Flask doesn't know that this file exists. If you want -# Flask to load the settings you specify here, you must set the environment -# variable `PICASSO_SETTINGS` to point to this file. E.g.: -# -# export PICASSO_SETTINGS=/path/to/examples/tensorflow/config.py -# -import os - -base_dir = os.path.dirname(os.path.abspath(__file__)) - -MODEL_CLS_PATH = os.path.join(base_dir, 'model.py') -MODEL_CLS_NAME = 'TensorflowMNISTModel' -MODEL_LOAD_ARGS = { - 'data_dir': os.path.join(base_dir, 'data-volume'), - 'tf_input_var': 'convolution2d_input_1:0', - 'tf_predict_var': 'Softmax:0', -} -############################################################################### -# Copyright (c) 2017 Merantix GmbH -# All rights reserved. This program and the accompanying materials -# are made available under the terms of the Eclipse Public License v1.0 -# which accompanies this distribution, and is available at -# http://www.eclipse.org/legal/epl-v10.html -# -# Contributors: -# Ryan Henderson - initial API and implementation and/or initial -# documentation -# Josh Chen - refactor and class config -############################################################################### -import numpy as np -from PIL import Image - -from picasso.models.tensorflow import TFModel - -MNIST_DIM = (28, 28) - - -class TensorflowMNISTModel(TFModel): - - def preprocess(self, raw_inputs): - """Convert images into the format required by our model. - - Our model requires that inputs be grayscale (mode 'L'), be resized to - `MNIST_DIM`, and be represented as float32 numpy arrays in range - [0, 1]. - - Args: - raw_inputs (list of Images): a list of PIL Image objects - - Returns: - array (float32): num images * height * width * num channels - - """ - image_arrays = [] - for raw_im in raw_inputs: - im = raw_im.convert('L') - im = im.resize(MNIST_DIM, Image.ANTIALIAS) - arr = np.array(im) - image_arrays.append(arr) - - inputs = np.array(image_arrays) - return inputs.reshape(len(inputs), - MNIST_DIM[0], - MNIST_DIM[1], 1).astype('float32') / 255 -""" tool functions """ - -import time - - -def gen_time_str(): - """ tool function to generate time str like 20180927_205959 """ - return time.strftime("%Y%m%d_%H%M%S", time.gmtime()) -""" -script to crop celebA dataset and save into new folder -""" - -import os -import glob -import numpy as np -import pandas as pd -import PIL -import PIL.Image -import h5py - -path_celeba_img = './data/raw/celebA' -path_celeba_att = './data/raw/celebA_annotation/list_attr_celeba.txt' -path_celeba_crop = './data/processed/celebA_crop' -path_celeba_crop_h5 = './data/processed/celebA_crop_h5' -filename_h5 = 'celebA_crop.h5' - -if not os.path.exists(path_celeba_crop): - os.mkdir(path_celeba_crop) - -## -""" image crop """ - - -def img_crop(img, cx=89, cy=121, w=128, h=128): - """ - crop images based on center and width, height - - :param img: image data, numpy array, shape = [height, width, RGB] - :param cx: center pixel, x - :param cy: center pixel, y - :param w: width, even number - :param h: height, even number - :return: img_crop - """ - img_cropped = img[cy-h//2: cy+h//2, cx-w//2: cx+w//2] - return img_cropped - - -## -""" get image and attribute data """ - -# loading attributes -df_attr = pd.read_csv(path_celeba_att, sep='\s+', header=1, index_col=0) - -img_names = os.listdir(path_celeba_img) -img_names = [img_name for img_name in img_names if img_name[-4:] == '.jpg'] -img_names.sort() - -assert df_attr.shape[0] == len( - img_names), 'images number does not match attribute table' -num_img = df_attr.shape[0] - -## -""" save cropped image to harddisk """ - -for i_img in range(num_img): - if i_img % 100 == 0: - print('{}/{}'.format(i_img, num_img)) - img_name = img_names[i_img] - img = np.asarray(PIL.Image.open(os.path.join(path_celeba_img, img_name))) - img = img_crop(img) - PIL.Image.fromarray(img).save(os.path.join(path_celeba_crop, img_name)) -print('finished {} images, saved in {}'.format(num_img, path_celeba_crop)) - -## -""" test cropping and whether image and label matches """ -yn_interactive_test = False -yn_time_img_loading = False - -if yn_interactive_test: - import matplotlib.pyplot as plt - - i = np.random.randint(num_img) - print(df_attr.ix[i]) - print("image file name: {}".format(img_names[i])) - - img = np.asarray(PIL.Image.open( - os.path.join(path_celeba_img, img_names[i]))) - - plt.imshow(img_crop(img)) - plt.show() - - -## -if yn_time_img_loading: - import time - num_times = 1000 - tic = time.time() - for i_time in range(num_times): - i = np.random.randint(num_img) - np.asarray(PIL.Image.open(os.path.join(path_celeba_img, img_names[i]))) - toc = time.time() - print((toc-tic)/num_times) - -## -yn_use_h5 = False - - -def fun_get_img(file_img): - img = np.asarray(PIL.Image.open(os.path.join(path_celeba_img, file_img))) - return img_crop(img) - - -if yn_use_h5: - - filepath_h5 = os.path.join(path_celeba_crop_h5, filename_h5) - if not os.path.exists(path_celeba_crop_h5): - os.mkdir(path_celeba_crop_h5) - - """ crop data and save to h5 """ - def save_to_h5_img(filepath_h5=filepath_h5, list_img_file=tuple(), fun_get_img=fun_get_img, dataset_name='img'): - """ - save the images as hdf5 format - """ - if os.path.exists(filepath_h5): - print('h5 file exists, please delete it or give another name. if you want to overwrite, type "overwrite"') - key_in = input() - if key_in == 'overwrite': - print('overwrite file {}'.format(filepath_h5)) - else: - raise Exception() - if len(list_img_file) == 0: - raise Exception('no input data') - - img = fun_get_img(list_img_file[0]) - print(img.shape) - - plt.imshow(img) - plt.show() - - with h5py.File(filepath_h5, 'a') as hf: - hf.create_dataset(dataset_name, data=img) - - save_to_h5_img(list_img_file=[img_names[0]]) - - ## - """ test_read_h5 """ - tic = time.time() - for i in range(1000): - with h5py.File(filepath_h5, 'r') as hf: - img = hf['img'][:] - toc = time.time() - print((toc-tic)/1000) - - plt.imshow(img) - plt.show() -""" -tool functions from Nvidia, with modification only in file path, -from: https://github.com/tkarras/progressive_growing_of_gans/blob/master/dataset_tool.py -""" - -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -import dataset -import tfutil -import os -import sys -import glob -import argparse -import threading -import six.moves.queue as Queue -import traceback -import numpy as np -import tensorflow as tf -import PIL.Image - -path_pg_gan_code = './src/model/pggan' -sys.path.append(path_pg_gan_code) - -# ---------------------------------------------------------------------------- - - -def error(msg): - print('Error: ' + msg) - exit(1) - -# ---------------------------------------------------------------------------- - - -class TFRecordExporter: - def __init__(self, tfrecord_dir, expected_images, print_progress=True, progress_interval=10): - self.tfrecord_dir = tfrecord_dir - self.tfr_prefix = os.path.join( - self.tfrecord_dir, os.path.basename(self.tfrecord_dir)) - self.expected_images = expected_images - self.cur_images = 0 - self.shape = None - self.resolution_log2 = None - self.tfr_writers = [] - self.print_progress = print_progress - self.progress_interval = progress_interval - if self.print_progress: - print('Creating dataset "%s"' % tfrecord_dir) - if not os.path.isdir(self.tfrecord_dir): - os.makedirs(self.tfrecord_dir) - assert(os.path.isdir(self.tfrecord_dir)) - - def close(self): - if self.print_progress: - print('%-40s\r' % 'Flushing data...', end='', flush=True) - for tfr_writer in self.tfr_writers: - tfr_writer.close() - self.tfr_writers = [] - if self.print_progress: - print('%-40s\r' % '', end='', flush=True) - print('Added %d images.' % self.cur_images) - - # Note: Images and labels must be added in shuffled order. - def choose_shuffled_order(self): - order = np.arange(self.expected_images) - np.random.RandomState(123).shuffle(order) - return order - - def add_image(self, img): - if self.print_progress and self.cur_images % self.progress_interval == 0: - print('%d / %d\r' % - (self.cur_images, self.expected_images), end='', flush=True) - if self.shape is None: - self.shape = img.shape - self.resolution_log2 = int(np.log2(self.shape[1])) - assert self.shape[0] in [1, 3] - assert self.shape[1] == self.shape[2] - assert self.shape[1] == 2**self.resolution_log2 - tfr_opt = tf.python_io.TFRecordOptions( - tf.python_io.TFRecordCompressionType.NONE) - for lod in range(self.resolution_log2 - 1): - tfr_file = self.tfr_prefix + \ - '-r%02d.tfrecords' % (self.resolution_log2 - lod) - self.tfr_writers.append( - tf.python_io.TFRecordWriter(tfr_file, tfr_opt)) - assert img.shape == self.shape - for lod, tfr_writer in enumerate(self.tfr_writers): - if lod: - img = img.astype(np.float32) - img = (img[:, 0::2, 0::2] + img[:, 0::2, 1::2] + - img[:, 1::2, 0::2] + img[:, 1::2, 1::2]) * 0.25 - quant = np.rint(img).clip(0, 255).astype(np.uint8) - ex = tf.train.Example(features=tf.train.Features(feature={ - 'shape': tf.train.Feature(int64_list=tf.train.Int64List(value=quant.shape)), - 'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[quant.tostring()]))})) - tfr_writer.write(ex.SerializeToString()) - self.cur_images += 1 - - def add_labels(self, labels): - if self.print_progress: - print('%-40s\r' % 'Saving labels...', end='', flush=True) - assert labels.shape[0] == self.cur_images - with open(self.tfr_prefix + '-rxx.labels', 'wb') as f: - np.save(f, labels.astype(np.float32)) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - -# ---------------------------------------------------------------------------- - - -class ExceptionInfo(object): - def __init__(self): - self.value = sys.exc_info()[1] - self.traceback = traceback.format_exc() - -# ---------------------------------------------------------------------------- - - -class WorkerThread(threading.Thread): - def __init__(self, task_queue): - threading.Thread.__init__(self) - self.task_queue = task_queue - - def run(self): - while True: - func, args, result_queue = self.task_queue.get() - if func is None: - break - try: - result = func(*args) - except: - result = ExceptionInfo() - result_queue.put((result, args)) - -# ---------------------------------------------------------------------------- - - -class ThreadPool(object): - def __init__(self, num_threads): - assert num_threads >= 1 - self.task_queue = Queue.Queue() - self.result_queues = dict() - self.num_threads = num_threads - for idx in range(self.num_threads): - thread = WorkerThread(self.task_queue) - thread.daemon = True - thread.start() - - def add_task(self, func, args=()): - assert hasattr(func, '__call__') # must be a function - if func not in self.result_queues: - self.result_queues[func] = Queue.Queue() - self.task_queue.put((func, args, self.result_queues[func])) - - def get_result(self, func): # returns (result, args) - result, args = self.result_queues[func].get() - if isinstance(result, ExceptionInfo): - print('\n\nWorker thread caught an exception:\n' + result.traceback) - raise result.value - return result, args - - def finish(self): - for idx in range(self.num_threads): - self.task_queue.put((None, (), None)) - - def __enter__(self): # for 'with' statement - return self - - def __exit__(self, *excinfo): - self.finish() - - def process_items_concurrently(self, item_iterator, process_func=lambda x: x, pre_func=lambda x: x, post_func=lambda x: x, max_items_in_flight=None): - if max_items_in_flight is None: - max_items_in_flight = self.num_threads * 4 - assert max_items_in_flight >= 1 - results = [] - retire_idx = [0] - - def task_func(prepared, idx): - return process_func(prepared) - - def retire_result(): - processed, (prepared, idx) = self.get_result(task_func) - results[idx] = processed - while retire_idx[0] < len(results) and results[retire_idx[0]] is not None: - yield post_func(results[retire_idx[0]]) - results[retire_idx[0]] = None - retire_idx[0] += 1 - - for idx, item in enumerate(item_iterator): - prepared = pre_func(item) - results.append(None) - self.add_task(func=task_func, args=(prepared, idx)) - while retire_idx[0] < idx - max_items_in_flight + 2: - for res in retire_result(): - yield res - while retire_idx[0] < len(results): - for res in retire_result(): - yield res - -# ---------------------------------------------------------------------------- - - -def display(tfrecord_dir): - print('Loading dataset "%s"' % tfrecord_dir) - tfutil.init_tf({'gpu_options.allow_growth': True}) - dset = dataset.TFRecordDataset( - tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0) - tfutil.init_uninited_vars() - - idx = 0 - while True: - try: - images, labels = dset.get_minibatch_np(1) - except tf.errors.OutOfRangeError: - break - if idx == 0: - print('Displaying images') - import cv2 # pip install opencv-python - cv2.namedWindow('dataset_tool') - print('Press SPACE or ENTER to advance, ESC to exit') - print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist())) - cv2.imshow('dataset_tool', images[0].transpose( - 1, 2, 0)[:, :, ::-1]) # CHW => HWC, RGB => BGR - idx += 1 - if cv2.waitKey() == 27: - break - print('\nDisplayed %d images.' % idx) - -# ---------------------------------------------------------------------------- - - -def extract(tfrecord_dir, output_dir): - print('Loading dataset "%s"' % tfrecord_dir) - tfutil.init_tf({'gpu_options.allow_growth': True}) - dset = dataset.TFRecordDataset( - tfrecord_dir, max_label_size=0, repeat=False, shuffle_mb=0) - tfutil.init_uninited_vars() - - print('Extracting images to "%s"' % output_dir) - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - idx = 0 - while True: - if idx % 10 == 0: - print('%d\r' % idx, end='', flush=True) - try: - images, labels = dset.get_minibatch_np(1) - except tf.errors.OutOfRangeError: - break - if images.shape[1] == 1: - img = PIL.Image.fromarray(images[0][0], 'L') - else: - img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') - img.save(os.path.join(output_dir, 'img%08d.png' % idx)) - idx += 1 - print('Extracted %d images.' % idx) - -# ---------------------------------------------------------------------------- - - -def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels): - max_label_size = 0 if ignore_labels else 'full' - print('Loading dataset "%s"' % tfrecord_dir_a) - tfutil.init_tf({'gpu_options.allow_growth': True}) - dset_a = dataset.TFRecordDataset( - tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_mb=0) - print('Loading dataset "%s"' % tfrecord_dir_b) - dset_b = dataset.TFRecordDataset( - tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_mb=0) - tfutil.init_uninited_vars() - - print('Comparing datasets') - idx = 0 - identical_images = 0 - identical_labels = 0 - while True: - if idx % 100 == 0: - print('%d\r' % idx, end='', flush=True) - try: - images_a, labels_a = dset_a.get_minibatch_np(1) - except tf.errors.OutOfRangeError: - images_a, labels_a = None, None - try: - images_b, labels_b = dset_b.get_minibatch_np(1) - except tf.errors.OutOfRangeError: - images_b, labels_b = None, None - if images_a is None or images_b is None: - if images_a is not None or images_b is not None: - print('Datasets contain different number of images') - break - if images_a.shape == images_b.shape and np.all(images_a == images_b): - identical_images += 1 - else: - print('Image %d is different' % idx) - if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b): - identical_labels += 1 - else: - print('Label %d is different' % idx) - idx += 1 - print('Identical images: %d / %d' % (identical_images, idx)) - if not ignore_labels: - print('Identical labels: %d / %d' % (identical_labels, idx)) - -# ---------------------------------------------------------------------------- - - -def create_mnist(tfrecord_dir, mnist_dir): - print('Loading MNIST from "%s"' % mnist_dir) - import gzip - with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: - images = np.frombuffer(file.read(), np.uint8, offset=16) - with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file: - labels = np.frombuffer(file.read(), np.uint8, offset=8) - images = images.reshape(-1, 1, 28, 28) - images = np.pad(images, [(0, 0), (0, 0), (2, 2), - (2, 2)], 'constant', constant_values=0) - assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8 - assert labels.shape == (60000,) and labels.dtype == np.uint8 - assert np.min(images) == 0 and np.max(images) == 255 - assert np.min(labels) == 0 and np.max(labels) == 9 - onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) - onehot[np.arange(labels.size), labels] = 1.0 - - with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: - order = tfr.choose_shuffled_order() - for idx in range(order.size): - tfr.add_image(images[order[idx]]) - tfr.add_labels(onehot[order]) - -# ---------------------------------------------------------------------------- - - -def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123): - print('Loading MNIST from "%s"' % mnist_dir) - import gzip - with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: - images = np.frombuffer(file.read(), np.uint8, offset=16) - images = images.reshape(-1, 28, 28) - images = np.pad(images, [(0, 0), (2, 2), (2, 2)], - 'constant', constant_values=0) - assert images.shape == (60000, 32, 32) and images.dtype == np.uint8 - assert np.min(images) == 0 and np.max(images) == 255 - - with TFRecordExporter(tfrecord_dir, num_images) as tfr: - rnd = np.random.RandomState(random_seed) - for idx in range(num_images): - tfr.add_image(images[rnd.randint(images.shape[0], size=3)]) - -# ---------------------------------------------------------------------------- - - -def create_cifar10(tfrecord_dir, cifar10_dir): - print('Loading CIFAR-10 from "%s"' % cifar10_dir) - import pickle - images = [] - labels = [] - for batch in range(1, 6): - with open(os.path.join(cifar10_dir, 'data_batch_%d' % batch), 'rb') as file: - data = pickle.load(file, encoding='latin1') - images.append(data['data'].reshape(-1, 3, 32, 32)) - labels.append(data['labels']) - images = np.concatenate(images) - labels = np.concatenate(labels) - assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 - assert labels.shape == (50000,) and labels.dtype == np.int32 - assert np.min(images) == 0 and np.max(images) == 255 - assert np.min(labels) == 0 and np.max(labels) == 9 - onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) - onehot[np.arange(labels.size), labels] = 1.0 - - with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: - order = tfr.choose_shuffled_order() - for idx in range(order.size): - tfr.add_image(images[order[idx]]) - tfr.add_labels(onehot[order]) - -# ---------------------------------------------------------------------------- - - -def create_cifar100(tfrecord_dir, cifar100_dir): - print('Loading CIFAR-100 from "%s"' % cifar100_dir) - import pickle - with open(os.path.join(cifar100_dir, 'train'), 'rb') as file: - data = pickle.load(file, encoding='latin1') - images = data['data'].reshape(-1, 3, 32, 32) - labels = np.array(data['fine_labels']) - assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 - assert labels.shape == (50000,) and labels.dtype == np.int32 - assert np.min(images) == 0 and np.max(images) == 255 - assert np.min(labels) == 0 and np.max(labels) == 99 - onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) - onehot[np.arange(labels.size), labels] = 1.0 - - with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: - order = tfr.choose_shuffled_order() - for idx in range(order.size): - tfr.add_image(images[order[idx]]) - tfr.add_labels(onehot[order]) - -# ---------------------------------------------------------------------------- - - -def create_svhn(tfrecord_dir, svhn_dir): - print('Loading SVHN from "%s"' % svhn_dir) - import pickle - images = [] - labels = [] - for batch in range(1, 4): - with open(os.path.join(svhn_dir, 'train_%d.pkl' % batch), 'rb') as file: - data = pickle.load(file, encoding='latin1') - images.append(data[0]) - labels.append(data[1]) - images = np.concatenate(images) - labels = np.concatenate(labels) - assert images.shape == (73257, 3, 32, 32) and images.dtype == np.uint8 - assert labels.shape == (73257,) and labels.dtype == np.uint8 - assert np.min(images) == 0 and np.max(images) == 255 - assert np.min(labels) == 0 and np.max(labels) == 9 - onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) - onehot[np.arange(labels.size), labels] = 1.0 - - with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: - order = tfr.choose_shuffled_order() - for idx in range(order.size): - tfr.add_image(images[order[idx]]) - tfr.add_labels(onehot[order]) - -# ---------------------------------------------------------------------------- - - -def create_lsun(tfrecord_dir, lmdb_dir, resolution=256, max_images=None): - print('Loading LSUN dataset from "%s"' % lmdb_dir) - import lmdb # pip install lmdb - import cv2 # pip install opencv-python - import io - with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: - total_images = txn.stat()['entries'] - if max_images is None: - max_images = total_images - with TFRecordExporter(tfrecord_dir, max_images) as tfr: - for idx, (key, value) in enumerate(txn.cursor()): - try: - try: - img = cv2.imdecode(np.fromstring( - value, dtype=np.uint8), 1) - if img is None: - raise IOError('cv2.imdecode failed') - img = img[:, :, ::-1] # BGR => RGB - except IOError: - img = np.asarray(PIL.Image.open(io.BytesIO(value))) - crop = np.min(img.shape[:2]) - img = img[(img.shape[0] - crop) // 2: (img.shape[0] + crop) // - 2, (img.shape[1] - crop) // 2: (img.shape[1] + crop) // 2] - img = PIL.Image.fromarray(img, 'RGB') - img = img.resize((resolution, resolution), - PIL.Image.ANTIALIAS) - img = np.asarray(img) - img = img.transpose(2, 0, 1) # HWC => CHW - tfr.add_image(img) - except: - print(sys.exc_info()[1]) - if tfr.cur_images == max_images: - break - -# ---------------------------------------------------------------------------- - - -def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121): - print('Loading CelebA from "%s"' % celeba_dir) - glob_pattern = os.path.join(celeba_dir, '*.jpg') - image_filenames = sorted(glob.glob(glob_pattern)) - expected_images = 202599 - print(len(image_filenames)) - if len(image_filenames) != expected_images: - error('Expected to find %d images' % expected_images) - - with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: - order = tfr.choose_shuffled_order() - for idx in range(order.size): - img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) - assert img.shape == (218, 178, 3) - img = img[cy - 64: cy + 64, cx - 64: cx + 64] - img = img.transpose(2, 0, 1) # HWC => CHW - tfr.add_image(img) - -# ---------------------------------------------------------------------------- - - -def create_celebahq(tfrecord_dir, celeba_dir, delta_dir, num_threads=4, num_tasks=100): - print('Loading CelebA from "%s"' % celeba_dir) - expected_images = 202599 - if len(glob.glob(os.path.join(celeba_dir, 'img_celeba', '*.jpg'))) != expected_images: - error('Expected to find %d images' % expected_images) - with open(os.path.join(celeba_dir, 'Anno', 'list_landmarks_celeba.txt'), 'rt') as file: - landmarks = [[float(value) for value in line.split()[1:]] - for line in file.readlines()[2:]] - landmarks = np.float32(landmarks).reshape(-1, 5, 2) - - print('Loading CelebA-HQ deltas from "%s"' % delta_dir) - import scipy.ndimage - import hashlib - import bz2 - import zipfile - import base64 - import cryptography.hazmat.primitives.hashes - import cryptography.hazmat.backends - import cryptography.hazmat.primitives.kdf.pbkdf2 - import cryptography.fernet - expected_zips = 30 - if len(glob.glob(os.path.join(delta_dir, 'delta*.zip'))) != expected_zips: - error('Expected to find %d zips' % expected_zips) - with open(os.path.join(delta_dir, 'image_list.txt'), 'rt') as file: - lines = [line.split() for line in file] - fields = dict() - for idx, field in enumerate(lines[0]): - type = int if field.endswith('idx') else str - fields[field] = [type(line[idx]) for line in lines[1:]] - indices = np.array(fields['idx']) - - # Must use pillow version 3.1.1 for everything to work correctly. - if getattr(PIL, 'PILLOW_VERSION', '') != '3.1.1': - # conda install pillow=3.1.1 - error('create_celebahq requires pillow version 3.1.1') - - # Must use libjpeg version 8d for everything to work correctly. - img = np.array(PIL.Image.open(os.path.join( - celeba_dir, 'img_celeba', '000001.jpg'))) - md5 = hashlib.md5() - md5.update(img.tobytes()) - if md5.hexdigest() != '9cad8178d6cb0196b36f7b34bc5eb6d3': - # conda install jpeg=8d - error('create_celebahq requires libjpeg version 8d') - - def rot90(v): - return np.array([-v[1], v[0]]) - - def process_func(idx): - # Load original image. - orig_idx = fields['orig_idx'][idx] - orig_file = fields['orig_file'][idx] - orig_path = os.path.join(celeba_dir, 'img_celeba', orig_file) - img = PIL.Image.open(orig_path) - - # Choose oriented crop rectangle. - lm = landmarks[orig_idx] - eye_avg = (lm[0] + lm[1]) * 0.5 + 0.5 - mouth_avg = (lm[3] + lm[4]) * 0.5 + 0.5 - eye_to_eye = lm[1] - lm[0] - eye_to_mouth = mouth_avg - eye_avg - x = eye_to_eye - rot90(eye_to_mouth) - x /= np.hypot(*x) - x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8) - y = rot90(x) - c = eye_avg + eye_to_mouth * 0.1 - quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) - zoom = 1024 / (np.hypot(*x) * 2) - - # Shrink. - shrink = int(np.floor(0.5 / zoom)) - if shrink > 1: - size = (int(np.round( - float(img.size[0]) / shrink)), int(np.round(float(img.size[1]) / shrink))) - img = img.resize(size, PIL.Image.ANTIALIAS) - quad /= shrink - zoom *= shrink - - # Crop. - border = max(int(np.round(1024 * 0.1 / zoom)), 3) - crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int( - np.ceil(max(quad[:, 0]))), int(np.ceil(max(quad[:, 1])))) - crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), - min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1])) - if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]: - img = img.crop(crop) - quad -= crop[0:2] - - # Simulate super-resolution. - superres = int(np.exp2(np.ceil(np.log2(zoom)))) - if superres > 1: - img = img.resize( - (img.size[0] * superres, img.size[1] * superres), PIL.Image.ANTIALIAS) - quad *= superres - zoom /= superres - - # Pad. - pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int( - np.ceil(max(quad[:, 0]))), int(np.ceil(max(quad[:, 1])))) - pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0)) - if max(pad) > border - 4: - pad = np.maximum(pad, int(np.round(1024 * 0.3 / zoom))) - img = np.pad(np.float32( - img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect') - h, w, _ = img.shape - y, x, _ = np.mgrid[:h, :w, :1] - mask = 1.0 - np.minimum(np.minimum(np.float32(x) / pad[0], np.float32( - y) / pad[1]), np.minimum(np.float32(w-1-x) / pad[2], np.float32(h-1-y) / pad[3])) - blur = 1024 * 0.02 / zoom - img += (scipy.ndimage.gaussian_filter(img, - [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0) - img += (np.median(img, axis=(0, 1)) - img) * \ - np.clip(mask, 0.0, 1.0) - img = PIL.Image.fromarray( - np.uint8(np.clip(np.round(img), 0, 255)), 'RGB') - quad += pad[0:2] - - # Transform. - img = img.transform((4096, 4096), PIL.Image.QUAD, - (quad + 0.5).flatten(), PIL.Image.BILINEAR) - img = img.resize((1024, 1024), PIL.Image.ANTIALIAS) - img = np.asarray(img).transpose(2, 0, 1) - - # Verify MD5. - md5 = hashlib.md5() - md5.update(img.tobytes()) - assert md5.hexdigest() == fields['proc_md5'][idx] - - # Load delta image and original JPG. - with zipfile.ZipFile(os.path.join(delta_dir, 'deltas%05d.zip' % (idx - idx % 1000)), 'r') as zip: - delta_bytes = zip.read('delta%05d.dat' % idx) - with open(orig_path, 'rb') as file: - orig_bytes = file.read() - - # Decrypt delta image, using original JPG data as decryption key. - algorithm = cryptography.hazmat.primitives.hashes.SHA256() - backend = cryptography.hazmat.backends.default_backend() - salt = bytes(orig_file, 'ascii') - kdf = cryptography.hazmat.primitives.kdf.pbkdf2.PBKDF2HMAC( - algorithm=algorithm, length=32, salt=salt, iterations=100000, backend=backend) - key = base64.urlsafe_b64encode(kdf.derive(orig_bytes)) - delta = np.frombuffer(bz2.decompress(cryptography.fernet.Fernet( - key).decrypt(delta_bytes)), dtype=np.uint8).reshape(3, 1024, 1024) - - # Apply delta image. - img = img + delta - - # Verify MD5. - md5 = hashlib.md5() - md5.update(img.tobytes()) - assert md5.hexdigest() == fields['final_md5'][idx] - return img - - with TFRecordExporter(tfrecord_dir, indices.size) as tfr: - order = tfr.choose_shuffled_order() - with ThreadPool(num_threads) as pool: - for img in pool.process_items_concurrently(indices[order].tolist(), process_func=process_func, max_items_in_flight=num_tasks): - tfr.add_image(img) - -# ---------------------------------------------------------------------------- - - -def create_from_images(tfrecord_dir, image_dir, shuffle): - print('Loading images from "%s"' % image_dir) - image_filenames = sorted(glob.glob(os.path.join(image_dir, '*'))) - if len(image_filenames) == 0: - error('No input images found') - - img = np.asarray(PIL.Image.open(image_filenames[0])) - resolution = img.shape[0] - channels = img.shape[2] if img.ndim == 3 else 1 - if img.shape[1] != resolution: - error('Input images must have the same width and height') - if resolution != 2 ** int(np.floor(np.log2(resolution))): - error('Input image resolution must be a power-of-two') - if channels not in [1, 3]: - error('Input images must be stored as RGB or grayscale') - - with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: - order = tfr.choose_shuffled_order() if shuffle else np.arange(len(image_filenames)) - for idx in range(order.size): - img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) - if channels == 1: - img = img[np.newaxis, :, :] # HW => CHW - else: - img = img.transpose(2, 0, 1) # HWC => CHW - tfr.add_image(img) - -# ---------------------------------------------------------------------------- - - -def create_from_hdf5(tfrecord_dir, hdf5_filename, shuffle): - print('Loading HDF5 archive from "%s"' % hdf5_filename) - import h5py # conda install h5py - with h5py.File(hdf5_filename, 'r') as hdf5_file: - hdf5_data = max([value for key, value in hdf5_file.items( - ) if key.startswith('data')], key=lambda lod: lod.shape[3]) - with TFRecordExporter(tfrecord_dir, hdf5_data.shape[0]) as tfr: - order = tfr.choose_shuffled_order( - ) if shuffle else np.arange(hdf5_data.shape[0]) - for idx in range(order.size): - tfr.add_image(hdf5_data[order[idx]]) - npy_filename = os.path.splitext(hdf5_filename)[0] + '-labels.npy' - if os.path.isfile(npy_filename): - tfr.add_labels(np.load(npy_filename)[order]) - -# ---------------------------------------------------------------------------- - - -def execute_cmdline(argv): - prog = argv[0] - parser = argparse.ArgumentParser( - prog=prog, - description='Tool for creating, extracting, and visualizing Progressive GAN datasets.', - epilog='Type "%s -h" for more information.' % prog) - - subparsers = parser.add_subparsers(dest='command') - subparsers.required = True - - def add_command(cmd, desc, example=None): - epilog = 'Example: %s %s' % ( - prog, example) if example is not None else None - return subparsers.add_parser(cmd, description=desc, help=desc, epilog=epilog) - - p = add_command('display', 'Display images in dataset.', - 'display datasets/mnist') - p.add_argument('tfrecord_dir', help='Directory containing dataset') - - p = add_command('extract', 'Extract images from dataset.', - 'extract datasets/mnist mnist-images') - p.add_argument('tfrecord_dir', help='Directory containing dataset') - p.add_argument('output_dir', - help='Directory to extract the images into') - - p = add_command('compare', 'Compare two datasets.', - 'compare datasets/mydataset datasets/mnist') - p.add_argument('tfrecord_dir_a', - help='Directory containing first dataset') - p.add_argument('tfrecord_dir_b', - help='Directory containing second dataset') - p.add_argument('--ignore_labels', - help='Ignore labels (default: 0)', type=int, default=0) - - p = add_command('create_mnist', 'Create dataset for MNIST.', - 'create_mnist datasets/mnist ~/downloads/mnist') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('mnist_dir', help='Directory containing MNIST') - - p = add_command('create_mnistrgb', 'Create dataset for MNIST-RGB.', - 'create_mnistrgb datasets/mnistrgb ~/downloads/mnist') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('mnist_dir', help='Directory containing MNIST') - p.add_argument('--num_images', - help='Number of composite images to create (default: 1000000)', type=int, default=1000000) - p.add_argument('--random_seed', - help='Random seed (default: 123)', type=int, default=123) - - p = add_command('create_cifar10', 'Create dataset for CIFAR-10.', - 'create_cifar10 datasets/cifar10 ~/downloads/cifar10') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('cifar10_dir', help='Directory containing CIFAR-10') - - p = add_command('create_cifar100', 'Create dataset for CIFAR-100.', - 'create_cifar100 datasets/cifar100 ~/downloads/cifar100') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('cifar100_dir', help='Directory containing CIFAR-100') - - p = add_command('create_svhn', 'Create dataset for SVHN.', - 'create_svhn datasets/svhn ~/downloads/svhn') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('svhn_dir', help='Directory containing SVHN') - - p = add_command('create_lsun', 'Create dataset for single LSUN category.', - 'create_lsun datasets/lsun-car-100k ~/downloads/lsun/car_lmdb --resolution 256 --max_images 100000') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument( - 'lmdb_dir', help='Directory containing LMDB database') - p.add_argument('--resolution', - help='Output resolution (default: 256)', type=int, default=256) - p.add_argument('--max_images', - help='Maximum number of images (default: none)', type=int, default=None) - - p = add_command('create_celeba', 'Create dataset for CelebA.', - 'create_celeba datasets/celeba ~/downloads/celeba') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('celeba_dir', help='Directory containing CelebA') - p.add_argument( - '--cx', help='Center X coordinate (default: 89)', type=int, default=89) - p.add_argument( - '--cy', help='Center Y coordinate (default: 121)', type=int, default=121) - - p = add_command('create_celebahq', 'Create dataset for CelebA-HQ.', - 'create_celebahq datasets/celebahq ~/downloads/celeba ~/downloads/celeba-hq-deltas') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('celeba_dir', help='Directory containing CelebA') - p.add_argument('delta_dir', - help='Directory containing CelebA-HQ deltas') - p.add_argument('--num_threads', - help='Number of concurrent threads (default: 4)', type=int, default=4) - p.add_argument('--num_tasks', - help='Number of concurrent processing tasks (default: 100)', type=int, default=100) - - p = add_command('create_from_images', 'Create dataset from a directory full of images.', - 'create_from_images datasets/mydataset myimagedir') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('image_dir', help='Directory containing the images') - p.add_argument('--shuffle', - help='Randomize image order (default: 1)', type=int, default=1) - - p = add_command('create_from_hdf5', 'Create dataset from legacy HDF5 archive.', - 'create_from_hdf5 datasets/celebahq ~/downloads/celeba-hq-1024x1024.h5') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('hdf5_filename', - help='HDF5 archive containing the images') - p.add_argument('--shuffle', - help='Randomize image order (default: 1)', type=int, default=1) - - args = parser.parse_args(argv[1:] if len(argv) > 1 else ['-h']) - func = globals()[args.command] - del args.command - func(**vars(args)) - -# ---------------------------------------------------------------------------- - - -if __name__ == "__main__": - execute_cmdline(sys.argv) - -# ---------------------------------------------------------------------------- -""" -Download and extract celebA dataset - -Modification of https://github.com/stanfordnlp/treelstm/blob/master/scripts/download.py -Downloads the following: -- Celeb-A dataset -- LSUN dataset -- MNIST dataset -""" - -import os -import sys -import gzip -import json -import shutil -import zipfile -import tarfile -import argparse -import subprocess -from six.moves import urllib - -path_data_raw = './data/raw' - -parser = argparse.ArgumentParser(description='Download dataset.') -parser.add_argument('datasets', metavar='N', type=str, nargs='+', choices=['celebA', 'cifar', 'mnist'], - help='name of dataset to download [celebA, cifar, mnist]') - - -def download(url, dirpath): - filename = url.split('/')[-1] - filepath = os.path.join(dirpath, filename) - u = urllib.request.urlopen(url) - f = open(filepath, 'wb') - filesize = int(u.headers["Content-Length"]) - print("Downloading: %s Bytes: %s" % (filename, filesize)) - - downloaded = 0 - block_sz = 8192 - status_width = 70 - while True: - buf = u.read(block_sz) - if not buf: - print('') - break - else: - print('', end='\r') - downloaded += len(buf) - f.write(buf) - status = (("[%-" + str(status_width + 1) + "s] %3.2f%%") % - ('=' * int(float(downloaded) / filesize * status_width) + '>', downloaded * 100. / filesize)) - print(status, end='') - sys.stdout.flush() - f.close() - return filepath - - -def unzip(filepath): - print("Extracting: " + filepath) - dirpath = os.path.dirname(filepath) - with zipfile.ZipFile(filepath) as zf: - zf.extractall(dirpath) - os.remove(filepath) - - -def reshape_celebA(path_to_data): - from scipy import misc - import numpy as np - from PIL import Image - files_read = [] - for root, subFolders, files in os.walk(path_to_data): - print(root) - print(subFolders) - print(len(files)) - for f in files: - if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.jpeg'): - files_read.append(os.path.join(root, f)) - # print(files_read[-1]) - print('one subdir done') - # files = [f for f in os.listdir(path_to_data) if f.endswith('.jpg') or f.endswith('.png') or f.endswith('.jpeg')] - print('Done listing files') - images = [] - for f in files_read: - try: - # im = misc.imread(f) - im = Image.open(f) - im = np.array(im) - # print(im) - except IOError: - print('Could not read: %s' % f) - if len(im.shape) == 2: - im = np.expand_dims(im, -1) - images.append(im) - print('Done reading files') - num_c = images[0].shape[-1] - for i in range(len(images)): - images[i] = misc.imresize(images[i], (64, 64, num_c)) - # if len(images[i].shape) == 3: - # images[i] = np.expand_dims(images[i], 0) - data = np.stack(images, axis=0).astype(np.float32) - np.save(os.path.join(path_to_data, 'celeb_64.npy'), data) - - -def download_celeb_a(dirpath): - data_dir = 'celebA' - if os.path.exists(os.path.join(dirpath, data_dir)): - print('Found Celeb-A - skip') - return - url = 'https://www.dropbox.com/sh/8oqt9vytwxb3s4r/AADIKlz8PR9zr6Y20qbkunrba/Img/img_align_celeba.zip?dl=1&pv=1' - filepath = download(url, dirpath) - zip_dir = '' - with zipfile.ZipFile(filepath) as zf: - zip_dir = zf.namelist()[0] - zf.extractall(dirpath) - os.remove(filepath) - os.rename(os.path.join(dirpath, zip_dir), os.path.join(dirpath, data_dir)) - reshape_celebA(os.path.join(dirpath, data_dir)) - - -def _list_categories(tag): - url = 'http://lsun.cs.princeton.edu/htbin/list.cgi?tag=' + tag - f = urllib.request.urlopen(url) - return json.loads(f.read()) - - -def _download_lsun(out_dir, category, set_name, tag): - url = 'http://lsun.cs.princeton.edu/htbin/download.cgi?tag={tag}' \ - '&category={category}&set={set_name}'.format(**locals()) - print(url) - if set_name == 'test': - out_name = 'test_lmdb.zip' - else: - out_name = '{category}_{set_name}_lmdb.zip'.format(**locals()) - out_path = os.path.join(out_dir, out_name) - cmd = ['curl', url, '-o', out_path] - print('Downloading', category, set_name, 'set') - subprocess.call(cmd) - - -def download_lsun(dirpath): - data_dir = os.path.join(dirpath, 'lsun') - if os.path.exists(data_dir): - print('Found LSUN - skip') - return - else: - os.mkdir(data_dir) - - tag = 'latest' - # categories = _list_categories(tag) - categories = ['bedroom'] - - for category in categories: - _download_lsun(data_dir, category, 'train', tag) - _download_lsun(data_dir, category, 'val', tag) - _download_lsun(data_dir, '', 'test', tag) - - -def _download_cifar(out_dir): - url = 'https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz' - print(url) - # if set_name == 'test': - # out_name = 'test_lmdb.zip' - # else: - # out_name = '{category}_{set_name}_lmdb.zip'.format(**locals()) - - file_path = os.path.join(out_dir, 'cifar-10-python.tar.gz') - if not os.path.exists(file_path): - cmd = ['wget', url, '-P', out_dir] - print('Downloading CIFAR') - subprocess.call(cmd) - # tfile = tarfile.TarFile(file_path) - with tarfile.open(name=file_path, mode='r:gz') as tfile: - tfile.extractall(path=out_dir) - - -def download_cifar(dirpath): - data_dir = os.path.join(dirpath, 'cifar-10-batches-py') - if os.path.exists(data_dir): - print('Found CIFAR - skip') - return - _download_cifar(dirpath) - - -def download_mnist(dirpath): - data_dir = os.path.join(dirpath, 'mnist') - if os.path.exists(data_dir): - print('Found MNIST - skip') - return - else: - os.mkdir(data_dir) - url_base = 'http://yann.lecun.com/exdb/mnist/' - file_names = ['train-images-idx3-ubyte.gz', 'train-labels-idx1-ubyte.gz', - 't10k-images-idx3-ubyte.gz', 't10k-labels-idx1-ubyte.gz'] - for file_name in file_names: - url = (url_base+file_name).format(**locals()) - print(url) - out_path = os.path.join(data_dir, file_name) - cmd = ['curl', url, '-o', out_path] - print('Downloading ', file_name) - subprocess.call(cmd) - cmd = ['gzip', '-d', out_path] - print('Decompressing ', file_name) - subprocess.call(cmd) - - -def prepare_data_dir(path=path_data_raw): - if not os.path.exists(path): - os.mkdir(path) - - -if __name__ == '__main__': - args = parser.parse_args() - prepare_data_dir() - - if 'celebA' in args.datasets: - download_celeb_a(path_data_raw) - if 'cifar' in args.datasets: - download_cifar(path_data_raw) - # if 'lsun' in args.datasets: - # download_lsun('./data') - if 'mnist' in args.datasets: - download_mnist(path_data_raw) -""" -Download and extract celebA dataset (original version, un-aligned) - -Note: to run this script, first make sure the datafile is manually downloaded and stored at './data/raw/celebA_wild_7z' -celebA, orignial (non-aligned) version of data can be downloaded from: -https://drive.google.com/drive/folders/0B7EVK8r0v71pTUZsaXdaSnZBZzg -in the local hard disk, it should be -./data/raw/celebA_wild_7z/img_celeba.7z.001 - img_celeba.7z.002 - ... - img_celeba.7z.014 - -celebA annotations should be manually downloaded and stored at './data/raw/celebA_annotation' -celabA annotations can be downloaded at: -https://drive.google.com/drive/folders/0B4qLcYyJmiz0TXY1NG02bzZVRGs?usp=drive_open -in the local hard disk, it should be -./data/raw/celebA_annotation/identity_CelebA.txt - list_attr_celeba.txt - list_bbox_celeba.txt - list_landmarks_align_celeba.txt - list_landmarks_celba.txt - - -celebA HQ delta should be manually downloaded and stored at './data/raw/celebA_hq_delta' -it can be downloaded at: -https://drive.google.com/drive/folders/0B4qLcYyJmiz0TXY1NG02bzZVRGs -in the local hard disk, it should be -./data/raw/celebA_hq_deltas/deltas00000.zip - deltas01000.zip - ... - deltas29000.zip - -""" - -import os -import sys -import gzip -import json -import shutil -import zipfile -import tarfile -import argparse -import subprocess -from six.moves import urllib - - -""" process celebA in the wild """ -path_celebA_alinged = '.data/raw/celebA' -path_celebA_wild_7z = './data/raw/celebA_wild_7z' -name_file_first = 'img_celeba.7z.001' -name_file_combined = 'img_celeba.7z' -path_celebA_wild_extracted = './data/raw/celebA_wild' - -path_celebA_wild_7z_file_to_extract = os.path.join( - path_celebA_wild_7z, name_file_first) - -if not os.path.exists(path_celebA_wild_extracted): - os.mkdir(path_celebA_wild_extracted) - -if os.path.exists(path_celebA_wild_7z_file_to_extract): - os.system('7z x {} -tiso.split -o{}'.format(path_celebA_wild_7z_file_to_extract, - path_celebA_wild_extracted)) - os.system('7z x {} -o{}'.format(os.path.join(path_celebA_wild_extracted, name_file_combined), - path_celebA_wild_extracted)) -else: - raise Exception( - 'data file does not exist for extraction ./data/raw/celebA_wild_7z//img_celeba.7z.001') - - -""" process celebA HQ delta """ -path_celebA_hq = './data/raw/celebA_hq_deltas_zip' - - -""" generate celeb HQ data """ -# not used heres - -# run the following in terminal to generate tf_record version of data -# ~/ana*/envs/ten*p36/bin/python ./src/ingestion/dataset_tool_modify.py create_celeba ./data/processed/celeba ./data/raw/celebA -""" process the transient attribute scence dataset """ - -import os -import warnings -import urllib -import urllib.request -import tarfile - -url_dataset = "http://transattr.cs.brown.edu/files/aligned_images.tar" -url_datalabel = "http://transattr.cs.brown.edu/files/annotations.tar" - -path_data = './data/raw/transient_attribute_scenes' -if not os.path.exists(path_data): - os.mkdir(path_data) - -path_file_dataset = './data/raw/transient_attribute_scenes/aligned_images.tar' -path_file_datalabel = './data/raw/transient_attribute_scenes/annotations.tar' - - -## -# assume you are running from the project base - -if not os.path.exists(path_file_dataset): - urllib.request.urlretrieve(url_dataset, path_file_dataset) - -if not os.path.exists(path_file_datalabel): - urllib.request.urlretrieve(url_datalabel, path_file_datalabel) - -## -# untar datafile -with tarfile.open(path_file_dataset) as f: - f.extractall(path_data) - -with tarfile.open(path_file_datalabel) as f: - f.extractall(path_data) -""" train and test for a convolutional neural network for predicting face attrubute for celebA """ - -import os -import time -import glob -import numpy as np -import pandas as pd -import PIL -import keras -import keras.applications -import keras.layers as layers -from keras.applications.mobilenet import preprocess_input - -path_celeba_img = './data/processed/celebA_crop' -path_celeba_att = './data/raw/celebA_annotation/list_attr_celeba.txt' -path_model_save = './asset_model/cnn_face_attr_celeba' - -""" create path if not exist """ -for path_used in [path_celeba_img, path_celeba_att, path_model_save]: - if not os.path.exists(path_used): - os.mkdir(path_used) - - -def create_cnn_model(size_output=None, tf_print=False): - """ - create keras model with convolution layers of MobileNet and added fully connected layers on to top - :param size_output: number of nodes in the output layer - :param tf_print: True/False to print - :return: keras model object - """ - - if size_output is None: - # get number of attrubutes, needed for defining the final layer size of network - df_attr = pd.read_csv(path_celeba_att, sep='\s+', - header=1, index_col=0) - size_output = df_attr.shape[1] - - # Load the convolutional layers of pretrained model: mobilenet - base_model = keras.applications.mobilenet.MobileNet(include_top=False, input_shape=(128, 128, 3), - alpha=1, depth_multiplier=1, - dropout=0.001, weights="imagenet", - input_tensor=None, pooling=None) - - # add fully connected layers - fc0 = base_model.output - fc0_pool = layers.GlobalAveragePooling2D( - data_format='channels_last', name='fc0_pool')(fc0) - fc1 = layers.Dense(256, activation='relu', name='fc1_dense')(fc0_pool) - fc2 = layers.Dense(size_output, activation='tanh', name='fc2_dense')(fc1) - - model = keras.models.Model(inputs=base_model.input, outputs=fc2) - - # freeze the early layers - for layer in base_model.layers: - layer.trainable = False - - model.compile(optimizer='sgd', loss='mean_squared_error') - - if tf_print: - print('use convolution layers of MobileNet, add fully connected layers') - print(model.summary()) - - return model - - -def get_data_info(path_celeba_img=path_celeba_img, path_celeba_att=path_celeba_att, yn_print_head_tail=False): - """ - function to get names of images files and and pandas data-frame containing face attributes - - :param path_celeba_img: path to image files directory (cropped to 128*128) - :param path_celeba_att: path to face attribute file (the original txt) - :param yn_print_head_tail: true/false to print head and tail of data - :return: img_names(list of file names of images), df_attr (pandas dataframe of face attributes) - """ - df_attr = pd.read_csv(path_celeba_att, sep='\s+', header=1, index_col=0) - - img_names = os.listdir(path_celeba_img) - img_names = [img_name for img_name in img_names if img_name[-4:] == '.jpg'] - img_names.sort() - - assert df_attr.shape[0] == len( - img_names), 'images number does not match attribute table' - - if yn_print_head_tail: - print(df_attr.head(3)) - print(df_attr.tail(3)) - print(img_names[:3]) - print(img_names[-3:]) - - assert df_attr.shape[0] == len(img_names), \ - 'images number does not match attribute table' - assert set(img_names) == set(df_attr.index.tolist()), \ - 'image names are not consistent between image files and attribute table ' - - return img_names, df_attr - - -try: - img_names, df_attr = get_data_info() - num_image, num_attr = df_attr.shape -except: - raise Exception( - 'can not reach data needed for training, here we can only do test') - - -def get_data_sample(img_idx=None, img_name=None, yn_interactive_plot=False): - """ - function to load one image and the corresponding attributes, either using idx_img or img_name - - :param img_idx: index of image - :param img_name: name of image, will overwrite img_idx if given - :param yn_interactive_plot: True/False to print the sample - :return: image (3d array, H*W*RGB), attributes (1d array) - """ - - if img_name is None: # if not given, use img_idx to find the name - if img_idx is None: # if not given, randomly select one - img_idx = np.random.randint(num_image) - img_name = img_names[img_idx] - - img = np.asarray(PIL.Image.open(os.path.join( - path_celeba_img, img_name))) # load image - # get labels - labels = df_attr.loc[img_name] - - if yn_interactive_plot: # if show things interactively for verification - import matplotlib.pyplot as plt - print(labels) - print("image file name: {}".format(img_name)) - plt.imshow(img) - plt.show() - - x = img - y = np.array(labels) - return x, y - - -def load_data_batch(num_images_total=None): - """ - load data and preprocess before feeding it to Keras model - :param num_images_total: - :return: - """ - - list_x, list_y = [], [] - - if num_images_total is None: - image_names_select = img_names - else: - image_names_select = np.random.choice( - img_names, num_images_total, replace=False) - - for img_name in image_names_select: - x, y = get_data_sample(img_name=img_name, yn_interactive_plot=False) - list_x.append(x) - list_y.append(y) - - x_batch = np.stack(list_x, axis=0) - y_batch = np.stack(list_y, axis=0) - - x_batch_ready = preprocess_input(x_batch.copy()) - y_batch_ready = np.array(y_batch, dtype='float32') - - return x_batch_ready, y_batch_ready - -## - - -def train_protocol(): - """ train the model with model.fit() """ - - model = create_cnn_model(tf_print=True) - - model.compile(optimizer='adam', loss='mean_squared_error') - - x_all, y_all = load_data_batch(num_images_total=2**16) - - model.fit(x=x_all, y=y_all, batch_size=128, epochs=50, verbose=1, - validation_split=0.125, shuffle=True) - - name_model_save = os.path.join( - path_model_save, 'model_{}.h5'.format(gen_time_str())) - model.save(filepath=name_model_save) - - return model - - -## -def gen_time_str(): - return time.strftime("%Y%m%d_%H%M%S", time.gmtime()) - - -def get_list_model_save(path_model_save=path_model_save): - return glob.glob(os.path.join(path_model_save, 'model*.h5')) -""" script to test Bokeh server """ - - -from random import random - -import bokeh -from bokeh.layouts import column -from bokeh.models import Button -from bokeh.palettes import RdYlBu3 -from bokeh.plotting import figure, curdoc - -# create a plot and style its properties -p = figure(x_range=(0, 100), y_range=(0, 100), toolbar_location=None) -p.border_fill_color = 'black' -p.background_fill_color = 'black' -p.outline_line_color = None -p.grid.grid_line_color = None - -# add a text renderer to our plot (no data yet) -r = p.text(x=[], y=[], text=[], text_color=[], text_font_size="20pt", - text_baseline="middle", text_align="center") - -i = 0 - -ds = r.data_source - -# create a callback that will add a number in a random location - - -def callback(): - global i - - # BEST PRACTICE --- update .data in one step with a new dict - new_data = dict() - new_data['x'] = ds.data['x'] + [random()*70 + 15] - new_data['y'] = ds.data['y'] + [random()*70 + 15] - new_data['text_color'] = ds.data['text_color'] + [RdYlBu3[i % 3]] - new_data['text'] = ds.data['text'] + [str(i)] - ds.data = new_data - - i = i + 1 - - -# add a button widget and configure with the call back -button = Button(label="Press Me") -button.on_click(callback) - -# put the button and plot in a layout and add to the document -curdoc().add_root(column(button, p)) - -# -# bokeh.plotting.show(p) -import importlib -import src.model.cnn_face_attr_celeba as cnn_face -importlib.reload(cnn_face) - -img_name, df_attr = cnn_face.get_data_info() - -model = cnn_face.create_cnn_model() - -x, y = cnn_face.get_data_sample(yn_interactive_plot=True) - -x_all, y_all = cnn_face.load_data_batch(num_images_total=2**16) - -cnn_face.train_protocol() - -model = cnn_face.create_cnn_model() -model.load_weights(cnn_face.get_list_model_save()[-1]) -""" test script """ - -import importlib -import numpy as np -import src.tl_gan.feature_axis as feature_axis - -importlib.reload(feature_axis) - -vectors = np.random.rand(10, 4) - -print(np.sum(vectors**2, axis=0)) - -vectors_normalized = feature_axis.normalize_feature_axis(vectors) - -print(np.sum(vectors_normalized**2, axis=0)) - -print(feature_axis.orthogonalize_one_vector( - np.array([1, 0, 0]), np.array([1, 1, 1]))) - -print(vectors_normalized) - -vectors_orthogonal = feature_axis.orthogonalize_vectors(vectors_normalized) -vectors_disentangled = feature_axis.disentangle_feature_axis_by_idx( - vectors, idx_base=[0], idx_target=[2, 3]) - -print(np.dot(vectors_normalized[:, -2], vectors_normalized[:, -1])) -print(np.dot(vectors_orthogonal[:, -2], vectors_orthogonal[:, -1])) - -feature_axis.plot_feature_cos_sim(vectors) -feature_axis.plot_feature_cos_sim(vectors_orthogonal) -feature_axis.plot_feature_cos_sim(vectors_disentangled) -""" script to test environment """ - -import tensorflow as tf -mnist = tf.keras.datasets.mnist - -(x_train, y_train), (x_test, y_test) = mnist.load_data() -x_train, x_test = x_train / 255.0, x_test / 255.0 - -model = tf.keras.models.Sequential([ - tf.keras.layers.Flatten(), - tf.keras.layers.Dense(512, activation=tf.nn.relu), - tf.keras.layers.Dropout(0.2), - tf.keras.layers.Dense(10, activation=tf.nn.softmax) -]) -model.compile(optimizer='adam', - loss='sparse_categorical_crossentropy', - metrics=['accuracy']) - -model.fit(x_train, y_train, epochs=5) -model.evaluate(x_test, y_test) -""" web gui using bokeh, not fulling functional yet """ - - -import src.tl_gan.feature_axis as feature_axis -import os -import glob -import sys -import numpy as np -import time -import pickle -import tensorflow as tf -import random - -import bokeh -from bokeh.layouts import column -from bokeh.models import Button -from bokeh.palettes import RdYlBu3 -from bokeh.plotting import figure, curdoc - -sys.path.append('.') - -""" load feature directions """ -path_feature_direction = './asset_results/pg_gan_celeba_feature_direction_40' - -pathfile_feature_direction = glob.glob(os.path.join( - path_feature_direction, 'feature_direction_*.pkl'))[-1] - -with open(pathfile_feature_direction, 'rb') as f: - feature_direction_name = pickle.load(f) - -feature_direction = feature_direction_name['direction'] -feature_name = feature_direction_name['name'] -num_feature = feature_direction.shape[1] - -""" load gan model """ - -# path to model code and weight -path_pg_gan_code = './src/model/pggan' -path_model = './asset_model/karras2018iclr-celebahq-1024x1024.pkl' -sys.path.append(path_pg_gan_code) - - -""" create tf session """ -yn_CPU_only = False - -if yn_CPU_only: - config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True) -else: - config = tf.ConfigProto(allow_soft_placement=True) - config.gpu_options.allow_growth = True - -sess = tf.InteractiveSession(config=config) - -try: - with open(path_model, 'rb') as file: - G, D, Gs = pickle.load(file) -except FileNotFoundError: - print('before running the code, download pre-trained model to project_root/asset_model/') - raise - -num_latent = Gs.input_shapes[0][1] - -latents = np.random.randn(1, *Gs.input_shapes[0][1:]) -# Generate dummy labels -dummies = np.zeros([latents.shape[0]] + Gs.input_shapes[1][1:]) - - -def gen_image(latents): - """ - tool funciton to generate image from latent variables - :param latents: latent variables - :return: - """ - images = Gs.run(latents, dummies) - images = np.clip(np.rint((images + 1.0) / 2.0 * 255.0), 0.0, - 255.0).astype(np.uint8) # [-1,1] => [0,255] - images = images.transpose(0, 2, 3, 1) # NCHW => NHWC - return images[0] - - -img_cur = gen_image(latents) - - -## -# create a plot and style its properties -def get_img_for_bokeh(img): - H, W, _ = img.shape - img_bokeh = np.empty([H, W], dtype=np.uint32) - img_bokeh_view = img_bokeh.view(dtype=np.uint8).reshape((H, W, 4)) - img_bokeh_view[:, :, :3] = np.flipud(img) - img_bokeh_view[:, :, 3] = 255 - return img_bokeh - - -p = figure(x_range=(0, 128), y_range=(0, 128), toolbar_location=None) - -p.image_rgba(image=[get_img_for_bokeh(img_cur)], x=0, y=0, dw=128, dh=128) - -curdoc().add_root(p) - -# bokeh.plotting.show(p) -""" module of functions related to discovering feature axis """ - -import time -import numpy as np -import sklearn.linear_model as linear_model - - -def find_feature_axis(z, y, method='linear', **kwargs_model): - """ - function to find axis in the latent space that is predictive of feature vectors - - :param z: vectors in the latent space, shape=(num_samples, num_latent_vector_dimension) - :param y: feature vectors, shape=(num_samples, num_features) - :param method: one of ['linear', 'logistic'], or a sklearn.linear_model object, (eg. sklearn.linear_model.ElasticNet) - :param kwargs_model: parameters specific to a sklearn.linear_model object, (eg., penalty=’l2’) - :return: feature vectors, shape = (num_latent_vector_dimension, num_features) - """ - - if method == 'linear': - model = linear_model.LinearRegression(**kwargs_model) - model.fit(z, y) - elif method == 'tanh': - def arctanh_clip(y): - return np.arctanh(np.clip(y, np.tanh(-3), np.tanh(3))) - - model = linear_model.LinearRegression(**kwargs_model) - - model.fit(z, arctanh_clip(y)) - else: - raise Exception('method has to be one of ["linear", "tanh"]') - - return model.coef_.transpose() - - -def normalize_feature_axis(feature_slope): - """ - function to normalize the slope of features axis so that they have the same length - - :param feature_slope: array of feature axis, shape = (num_latent_vector_dimension, num_features) - :return: same shape of input - """ - - feature_direction = feature_slope / \ - np.linalg.norm(feature_slope, ord=2, axis=0, keepdims=True) - return feature_direction - - -def disentangle_feature_axis(feature_axis_target, feature_axis_base, yn_base_orthogonalized=False): - """ - make feature_axis_target orthogonal to feature_axis_base - - :param feature_axis_target: features axes to decorrerelate, shape = (num_dim, num_feature_0) - :param feature_axis_base: features axes to decorrerelate, shape = (num_dim, num_feature_1)) - :param yn_base_orthogonalized: True/False whether the feature_axis_base is already othogonalized - :return: feature_axis_decorrelated, shape = shape = (num_dim, num_feature_0) - """ - - # make sure this funciton works to 1D vector - if len(feature_axis_target.shape) == 0: - yn_single_vector_in = True - feature_axis_target = feature_axis_target[:, None] - else: - yn_single_vector_in = False - - # if already othogonalized, skip this step - if yn_base_orthogonalized: - feature_axis_base_orthononal = orthogonalize_vectors(feature_axis_base) - else: - feature_axis_base_orthononal = feature_axis_base - - # orthogonalize every vector - feature_axis_decorrelated = feature_axis_target + 0 - num_dim, num_feature_0 = feature_axis_target.shape - num_dim, num_feature_1 = feature_axis_base_orthononal.shape - for i in range(num_feature_0): - for j in range(num_feature_1): - feature_axis_decorrelated[:, i] = orthogonalize_one_vector(feature_axis_decorrelated[:, i], - feature_axis_base_orthononal[:, j]) - - # make sure this funciton works to 1D vector - if yn_single_vector_in: - result = feature_axis_decorrelated[:, 0] - else: - result = feature_axis_decorrelated - - return result - - -def disentangle_feature_axis_by_idx(feature_axis, idx_base=None, idx_target=None, yn_normalize=True): - """ - disentangle correlated feature axis, make the features with index idx_target orthogonal to - those with index idx_target, wrapper of function disentangle_feature_axis() - - :param feature_axis: all features axis, shape = (num_dim, num_feature) - :param idx_base: index of base features (1D numpy array), to which the other features will be orthogonal - :param idx_target: index of features to disentangle (1D numpy array), which will be disentangled from - base features, default to all remaining features - :param yn_normalize: True/False to normalize the results - :return: disentangled features, shape = feature_axis - """ - - (num_dim, num_feature) = feature_axis.shape - - # process default input - if idx_base is None or len(idx_base) == 0: # if None or empty, do nothing - feature_axis_disentangled = feature_axis - else: # otherwise, disentangle features - if idx_target is None: # if None, use all remaining features - idx_target = np.setdiff1d(np.arange(num_feature), idx_base) - - feature_axis_target = feature_axis[:, idx_target] + 0 - feature_axis_base = feature_axis[:, idx_base] + 0 - feature_axis_base_orthogonalized = orthogonalize_vectors( - feature_axis_base) - feature_axis_target_orthogonalized = disentangle_feature_axis( - feature_axis_target, feature_axis_base_orthogonalized, yn_base_orthogonalized=True) - - feature_axis_disentangled = feature_axis + 0 # holder of results - feature_axis_disentangled[:, - idx_target] = feature_axis_target_orthogonalized - feature_axis_disentangled[:, - idx_base] = feature_axis_base_orthogonalized - - # normalize output - if yn_normalize: - feature_axis_out = normalize_feature_axis(feature_axis_disentangled) - else: - feature_axis_out = feature_axis_disentangled - return feature_axis_out - - -def orthogonalize_one_vector(vector, vector_base): - """ - tool function, adjust vector so that it is orthogonal to vector_base (i.e., vector - its_projection_on_vector_base ) - - :param vector0: 1D array - :param vector1: 1D array - :return: adjusted vector1 - """ - return vector - np.dot(vector, vector_base) / np.dot(vector_base, vector_base) * vector_base - - -def orthogonalize_vectors(vectors): - """ - tool function, adjust vectors so that they are orthogonal to each other, takes O(num_vector^2) time - - :param vectors: vectors, shape = (num_dimension, num_vector) - :return: orthorgonal vectors, shape = (num_dimension, num_vector) - """ - vectors_orthogonal = vectors + 0 - num_dimension, num_vector = vectors.shape - for i in range(num_vector): - for j in range(i): - vectors_orthogonal[:, i] = orthogonalize_one_vector( - vectors_orthogonal[:, i], vectors_orthogonal[:, j]) - return vectors_orthogonal - - -def plot_feature_correlation(feature_direction, feature_name=None): - import matplotlib.pyplot as plt - - len_z, len_y = feature_direction.shape - if feature_name is None: - feature_name = range(len_y) - - feature_correlation = np.corrcoef(feature_direction.transpose()) - - c_lim_abs = np.max(np.abs(feature_correlation)) - - plt.pcolormesh(np.arange(len_y+1), np.arange(len_y+1), feature_correlation, - cmap='coolwarm', vmin=-c_lim_abs, vmax=+c_lim_abs) - plt.gca().invert_yaxis() - plt.colorbar() - # plt.axis('square') - plt.xticks(np.arange(len_y) + 0.5, feature_name, - fontsize='x-small', rotation='vertical') - plt.yticks(np.arange(len_y) + 0.5, feature_name, fontsize='x-small') - plt.show() - - -def plot_feature_cos_sim(feature_direction, feature_name=None): - """ - plot cosine similarity measure of vectors - - :param feature_direction: vectors, shape = (num_dimension, num_vector) - :param feature_name: list of names of features - :return: cosines similarity matrix, shape = (num_vector, num_vector) - """ - import matplotlib.pyplot as plt - from sklearn.metrics.pairwise import cosine_similarity - - len_z, len_y = feature_direction.shape - if feature_name is None: - feature_name = range(len_y) - - feature_cos_sim = cosine_similarity(feature_direction.transpose()) - - c_lim_abs = np.max(np.abs(feature_cos_sim)) - - plt.pcolormesh(np.arange(len_y+1), np.arange(len_y+1), feature_cos_sim, - vmin=-c_lim_abs, vmax=+c_lim_abs, cmap='coolwarm') - plt.gca().invert_yaxis() - plt.colorbar() - # plt.axis('square') - plt.xticks(np.arange(len_y) + 0.5, feature_name, - fontsize='x-small', rotation='vertical') - plt.yticks(np.arange(len_y) + 0.5, feature_name, fontsize='x-small') - plt.show() - return feature_cos_sim -""" module to get the desierble order of features """ - -import numpy as np - -feature_name_celeba_org = [ - '5_o_Clock_Shadow', 'Arched_Eyebrows', 'Attractive', - 'Bags_Under_Eyes', 'Bald', 'Bangs', 'Big_Lips', 'Big_Nose', - 'Black_Hair', 'Blond_Hair', 'Blurry', 'Brown_Hair', - 'Bushy_Eyebrows', 'Chubby', 'Double_Chin', 'Eyeglasses', 'Goatee', - 'Gray_Hair', 'Heavy_Makeup', 'High_Cheekbones', 'Male', - 'Mouth_Slightly_Open', 'Mustache', 'Narrow_Eyes', 'No_Beard', - 'Oval_Face', 'Pale_Skin', 'Pointy_Nose', 'Receding_Hairline', - 'Rosy_Cheeks', 'Sideburns', 'Smiling', 'Straight_Hair', - 'Wavy_Hair', 'Wearing_Earrings', 'Wearing_Hat', 'Wearing_Lipstick', - 'Wearing_Necklace', 'Wearing_Necktie', 'Young' -] - -feature_name_celeba_rename = [ - 'Shadow', 'Arched_Eyebrows', 'Attractive', 'Eye_bags', 'Bald', - 'Bangs', 'Big_Lips', 'Big_Nose', 'Black_Hair', 'Blond_Hair', - 'Blurry', 'Brown_Hair', 'Bushy_Eyebrows', 'Chubby', 'Double_Chin', - 'Eyeglasses', 'Goatee', 'Gray_Hair', 'Makeup', 'High_Cheekbones', - 'Male', 'Mouth_Open', 'Mustache', 'Narrow_Eyes', 'Beard', - 'Oval_Face', 'Skin_Tone', 'Pointy_Nose', 'Hairline', 'Rosy_Cheeks', - 'Sideburns', 'Smiling', 'Straight_Hair', 'Wavy_Hair', 'Earrings', - 'Hat', 'Lipstick', 'Necklace', 'Necktie', 'Age' -] - -feature_reverse = np.array([ - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, -1, - 1, -1, 1, 1, 1, - 1, 1, 1, 1, 1, - 1, 1, 1, 1, -1 -]) - - -feature_celeba_layout = [ - [20, 39, 26, ], - [5, 28, 4, ], - [7, 27, 18], - [31, 21, 33], - [24, 16, 30], - [9, 8, 17], - [15, 34, 38], -] -""" module to generate iamges from pg-gan """ - -import os -import sys -import numpy as np -import tensorflow as tf -import PIL - -path_pg_gan_code = './src/model/pggan' -path_model = './asset_model/karras2018iclr-celebahq-1024x1024.pkl' -sys.path.append(path_pg_gan_code) - -len_z = 512 -len_dummy = 0 - - -def gen_single_img(z=None, Gs=None): - """ - function to generate image from noise - :param z: 1D array, latent vector for generating images - :param Gs: generator network of GAN - :return: one rgb image, H*W*3 - """ - - if z is None: # if input not given - z = np.random.randn(len_z) - if len(z.shape) == 1: - z = z[None, :] - dummy = np.zeros([z.shape[0], len_dummy]) - images = Gs.run(z, dummy) - images = np.clip(np.rint((images + 1.0) / 2.0 * 255.0), 0.0, - 255.0).astype(np.uint8) # [-1,1] => [0,255] - images = images.transpose(0, 2, 3, 1) # NCHW => NHWC - return images[0] - - -def save_img(img, pathfile): - PIL.Image.fromarray(img, 'RGB').save(pathfile) -""" -try face tl_gan using pg-gan model, modified from -https://drive.google.com/drive/folders/1A79qKDTFp6pExe4gTSgBsEOkxwa2oes_ -""" - -""" -prerequsit: before running the code, download pre-trained model to project_root/asset_model/ -pretrained model download url: https://drive.google.com/drive/folders/15hvzxt_XxuokSmj0uO4xxMTMWVc0cIMU -model name: karras2018iclr-celebahq-1024x1024.pkl -""" - - -# path to model code and weight -import os -import sys -import time -import pickle -import numpy as np -import tensorflow as tf -import PIL.Image -import datetime -path_pg_gan_code = './src/model/pggan' -path_model = './asset_model/karras2018iclr-celebahq-1024x1024.pkl' -sys.path.append(path_pg_gan_code) - -# path to model generated results -path_gen_sample = './asset_results/pggan_celeba_sample_pkl/' -if not os.path.exists(path_gen_sample): - os.mkdir(path_gen_sample) -path_gan_explore = './asset_results/pggan_celeba_explore/' -if not os.path.exists(path_gan_explore): - os.mkdir(path_gan_explore) - - -""" gen samples and save as pickle """ - -n_batch = 8000 -batch_size = 32 - -with tf.Session() as sess: - - # Import official CelebA-HQ networks. - try: - with open(path_model, 'rb') as file: - G, D, Gs = pickle.load(file) - except FileNotFoundError: - print('before running the code, download pre-trained model to project_root/asset_model/') - raise - - # Generate latent vectors. - # latents = np.random.RandomState(1000).randn(1000, *Gs.input_shapes[0][1:]) # 1000 random latents - # latents = latents[[477, 56, 83, 887, 583, 391, 86, 340, 341, 415]] # hand-picked top-10 - - for i_batch in range(n_batch): - try: - i_sample = i_batch * batch_size - - tic = time.time() - - latents = np.random.randn(batch_size, *Gs.input_shapes[0][1:]) - - # Generate dummy labels (not used by the official networks). - labels = np.zeros([latents.shape[0]] + Gs.input_shapes[1][1:]) - - # Run the generator to produce a set of images. - images = Gs.run(latents, labels) - - images = np.clip(np.rint((images + 1.0) / 2.0 * 255.0), - 0.0, 255.0).astype(np.uint8) # [-1,1] => [0,255] - images = images.transpose(0, 2, 3, 1) # NCHW => NHWC - - images = images[:, 4::8, 4::8, :] - - with open(os.path.join(path_gen_sample, 'pggan_celeba_{:0>6d}.pkl'.format(i_sample)), 'wb') as f: - pickle.dump({'z': latents, 'x': images}, f) - - toc = time.time() - print(i_sample, toc-tic) - - except: - print('error in {}'.format(i_sample)) - - -""" view generated samples """ -yn_view_sample = False -if yn_view_sample: - with open(os.path.join(path_gen_sample, 'pggan_celeba_{:0>6d}.pkl'.format(0)), 'rb') as f: - temp = pickle.load(f) - - import matplotlib.pyplot as plt - plt.imshow(temp['x'][0]) - plt.show() -""" generation of faces from one center image, and move along every feature axis """ - -import os -import glob -import sys -import numpy as np -import time -import pickle -import datetime -import tensorflow as tf -import PIL - - -# load feature directions -path_feature_direction = './asset_results/pg_gan_celeba_feature_direction_40' - -pathfile_feature_direction = glob.glob(os.path.join( - path_feature_direction, 'feature_direction_*.pkl'))[-1] - -with open(pathfile_feature_direction, 'rb') as f: - feature_direction_name = pickle.load(f) - -feature_direction = feature_direction_name['direction'] -feature_name = feature_direction_name['name'] - -## -""" test_discovered features """ - -# path to model code and weight -path_pg_gan_code = './src/model/pggan' -path_model = './asset_model/karras2018iclr-celebahq-1024x1024.pkl' -sys.path.append(path_pg_gan_code) - -path_gan_explore = './asset_results/pggan_celeba_feature_axis_explore/' -if not os.path.exists(path_gan_explore): - os.mkdir(path_gan_explore) - -""" play with the latent space """ -sess = tf.InteractiveSession() - -try: - with open(path_model, 'rb') as file: - G, D, Gs = pickle.load(file) -except FileNotFoundError: - print('before running the code, download pre-trained model to project_root/asset_model/') - raise - -batch_size = 7 - -## -latents_c = np.random.randn(1, *Gs.input_shapes[0][1:]) - -for i_feature in range(feature_direction.shape[1]): - latents_0 = latents_c - feature_direction[:, i_feature][None, :]*0.07 - latents_1 = latents_c + feature_direction[:, i_feature][None, :]*0.07 - - print(np.mean(latents_0-latents_1)**2) - - latents = np.random.randn(batch_size, *Gs.input_shapes[0][1:]) - for i_alpha, alpha in enumerate(np.linspace(0, 1, batch_size)): - latents[i_alpha, :] = latents_0[0]*(1-alpha) + latents_1[0]*alpha - - # Generate dummy labels (not used by the official networks). - labels = np.zeros([latents.shape[0]] + Gs.input_shapes[1][1:]) - - # Run the generator to produce a set of images. - images = Gs.run(latents, labels) - - images = np.clip(np.rint((images + 1.0) / 2.0 * 255.0), 0.0, - 255.0).astype(np.uint8) # [-1,1] => [0,255] - images = images.transpose(0, 2, 3, 1) # NCHW => NHWC - - # downsize images - # images = images[:, 2::4, 2::4] - - time_str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - - # Save images as PNG. - for idx in range(images.shape[0]): - PIL.Image.fromarray(images[idx], 'RGB')\ - .save(os.path.join(path_gan_explore, - 'img_{}_{}_{}_{}.png'.format(time_str, i_feature, feature_name[i_feature], idx))) - np.save(os.path.join(path_gan_explore, - 'img_{}_{}.pkl'.format(time_str, i_feature)), labels) -""" generation of images interactively with ui control """ - -import src.tl_gan.feature_axis as feature_axis -import matplotlib.widgets as widgets -import matplotlib.pyplot as plt -import os -import glob -import sys -import numpy as np -import time -import pickle -import tensorflow as tf -import PIL -import matplotlib -matplotlib.use('TkAgg') -plt.ion() - - -def gen_time_str(): - """ tool function """ - return time.strftime("%Y%m%d_%H%M%S", time.gmtime()) - - -""" location to save images """ -path_gan_explore_interactive = './asset_results/pggan_celeba_feature_axis_explore_interactive/' -if not os.path.exists(path_gan_explore_interactive): - os.mkdir(path_gan_explore_interactive) - -## -""" load feature directions """ -path_feature_direction = './asset_results/pg_gan_celeba_feature_direction_40' - -pathfile_feature_direction = glob.glob(os.path.join( - path_feature_direction, 'feature_direction_*.pkl'))[-1] - -with open(pathfile_feature_direction, 'rb') as f: - feature_direction_name = pickle.load(f) - -feature_direction = feature_direction_name['direction'] -feature_name = feature_direction_name['name'] -num_feature = feature_direction.shape[1] - -## -""" load gan model """ - -# path to model code and weight -path_pg_gan_code = './src/model/pggan' -path_model = './asset_model/karras2018iclr-celebahq-1024x1024.pkl' -sys.path.append(path_pg_gan_code) - - -""" create tf session """ -yn_CPU_only = False - -if yn_CPU_only: - config = tf.ConfigProto(device_count={'GPU': 0}, allow_soft_placement=True) -else: - config = tf.ConfigProto(allow_soft_placement=True) - -sess = tf.InteractiveSession(config=config) - -try: - with open(path_model, 'rb') as file: - G, D, Gs = pickle.load(file) -except FileNotFoundError: - print('before running the code, download pre-trained model to project_root/asset_model/') - raise - -num_latent = Gs.input_shapes[0][1] - -## - -# Generate random latent variables -latents = np.random.randn(1, *Gs.input_shapes[0][1:]) -# Generate dummy labels -dummies = np.zeros([latents.shape[0]] + Gs.input_shapes[1][1:]) - - -def gen_image(latents): - """ - tool funciton to generate image from latent variables - :param latents: latent variables - :return: - """ - images = Gs.run(latents, dummies) - images = np.clip(np.rint((images + 1.0) / 2.0 * 255.0), 0.0, - 255.0).astype(np.uint8) # [-1,1] => [0,255] - images = images.transpose(0, 2, 3, 1) # NCHW => NHWC - return images[0] - - -img_cur = gen_image(latents) - - -## -""" plot figure with GUI """ -h_fig = plt.figure(figsize=[12, 6]) -h_ax = plt.axes([0.0, 0.0, 0.5, 1.0]) -h_ax.axis('off') -h_img = plt.imshow(img_cur) - -yn_save_fig = True - - -class GuiCallback(object): - counter = 0 - latents = latents - - def __init__(self): - self.latents = np.random.randn(1, *Gs.input_shapes[0][1:]) - self.feature_direction = feature_direction - self.feature_lock_status = np.zeros(num_feature).astype('bool') - self.feature_directoion_disentangled = feature_axis.disentangle_feature_axis_by_idx( - self.feature_direction, idx_base=np.flatnonzero(self.feature_lock_status)) - img_cur = gen_image(self.latents) - h_img.set_data(img_cur) - plt.draw() - - def random_gen(self, event): - self.latents = np.random.randn(1, *Gs.input_shapes[0][1:]) - img_cur = gen_image(self.latents) - h_img.set_data(img_cur) - plt.draw() - - def modify_along_feature(self, event, idx_feature, step_size=0.05): - self.latents += self.feature_directoion_disentangled[:, - idx_feature] * step_size - img_cur = gen_image(self.latents) - h_img.set_data(img_cur) - plt.draw() - plt.savefig(os.path.join(path_gan_explore_interactive, - '{}_{}_{}.png'.format(gen_time_str(), feature_name[idx_feature], ('pos' if step_size > 0 else 'neg')))) - - def set_feature_lock(self, event, idx_feature): - self.feature_lock_status[idx_feature] = np.logical_not( - self.feature_lock_status[idx_feature]) - self.feature_directoion_disentangled = feature_axis.disentangle_feature_axis_by_idx( - self.feature_direction, idx_base=np.flatnonzero(self.feature_lock_status)) - - -callback = GuiCallback() - -ax_randgen = plt.axes([0.55, 0.90, 0.15, 0.05]) -b_randgen = widgets.Button(ax_randgen, 'Random Generate') -b_randgen.on_clicked(callback.random_gen) - - -def get_loc_control(idx_feature, nrows=8, ncols=5, - xywh_range=(0.51, 0.05, 0.48, 0.8)): - r = idx_feature // ncols - c = idx_feature % ncols - x, y, w, h = xywh_range - xywh = x+c*w/ncols, y+(nrows-r-1)*h/nrows, w/ncols, h/nrows - return xywh - - -step_size = 0.4 - - -def create_button(idx_feature): - """ function to built button groups for one feature """ - x, y, w, h = get_loc_control(idx_feature) - - plt.text(x+w/2, y+h/2+0.01, feature_name[idx_feature], horizontalalignment='center', - transform=plt.gcf().transFigure) - - ax_neg = plt.axes((x + w / 8, y, w / 4, h / 2)) - b_neg = widgets.Button(ax_neg, '-', hovercolor='0.1') - b_neg.on_clicked(lambda event: - callback.modify_along_feature(event, idx_feature, step_size=-1 * step_size)) - - ax_pos = plt.axes((x + w * 5/8, y, w / 4, h / 2)) - b_pos = widgets.Button(ax_pos, '+', hovercolor='0.1') - b_pos.on_clicked(lambda event: - callback.modify_along_feature(event, idx_feature, step_size=+1 * step_size)) - - ax_lock = plt.axes((x + w * 3/8, y, w / 4, h / 2)) - b_lock = widgets.CheckButtons(ax_lock, ['L'], [False]) - b_lock.on_clicked(lambda event: - callback.set_feature_lock(event, idx_feature)) - return b_neg, b_pos, b_lock - - -list_buttons = [] -for idx_feature in range(num_feature): - list_buttons.append(create_button(idx_feature)) - -plt.show() - - -## -# sess.close() -""" -try face tl_gan using pg-gan model, modified from -https://drive.google.com/drive/folders/1A79qKDTFp6pExe4gTSgBsEOkxwa2oes_ -""" - -""" -prerequsit: before running the code, download pre-trained model to project_root/asset_model/ -pretrained model download url: https://drive.google.com/drive/folders/15hvzxt_XxuokSmj0uO4xxMTMWVc0cIMU -model name: karras2018iclr-celebahq-1024x1024.pkl -""" - - -# path to model code and weight -import os -import sys -import time -import pickle -import numpy as np -import tensorflow as tf -import PIL.Image -import datetime -path_pg_gan_code = './src/model/pggan' -path_model = './asset_model/karras2018iclr-celebahq-1024x1024.pkl' -sys.path.append(path_pg_gan_code) - -# path to model generated results -path_gen_sample = './asset_results/pggan_celeba_sample_pkl/' -if not os.path.exists(path_gen_sample): - os.mkdir(path_gen_sample) -path_gan_explore = './asset_results/pggan_celeba_explore/' -if not os.path.exists(path_gan_explore): - os.mkdir(path_gan_explore) - - -""" play with the latent space """ -sess = tf.InteractiveSession() - -try: - with open(path_model, 'rb') as file: - G, D, Gs = pickle.load(file) -except FileNotFoundError: - print('before running the code, download pre-trained model to project_root/asset_model/') - raise - -batch_size = 8 - -latent_mode = 'random' -if latent_mode == 'random': - latents_0 = np.random.randn(1, *Gs.input_shapes[0][1:]) - latents_1 = np.random.randn(1, *Gs.input_shapes[0][1:]) -elif latent_mode == 'scale': - latents_0 = np.random.randn(1, *Gs.input_shapes[0][1:]) * 3 - latents_1 = latents_0*(-1) -else: - raise Exception('latent mode not accepted') - -print(np.sum(latents_0-latents_1)**2) - -latents = np.random.randn(batch_size, *Gs.input_shapes[0][1:]) -for i_alpha, alpha in enumerate(np.linspace(0, 1, batch_size)): - latents[i_alpha, :] = latents_0[0]*alpha + latents_1[0]*(1-alpha) - -# Generate dummy labels (not used by the official networks). -labels = np.zeros([latents.shape[0]] + Gs.input_shapes[1][1:]) - -# Run the generator to produce a set of images. -images = Gs.run(latents, labels) - -images = np.clip(np.rint((images + 1.0) / 2.0 * 255.0), 0.0, - 255.0).astype(np.uint8) # [-1,1] => [0,255] -images = images.transpose(0, 2, 3, 1) # NCHW => NHWC - -time_str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - -# Save images as PNG. -for idx in range(images.shape[0]): - PIL.Image.fromarray(images[idx], 'RGB').save(os.path.join(path_gan_explore, - 'img_{}_{}_{}.png'.format(latent_mode, time_str, idx))) -np.save(os.path.join(path_gan_explore, - 'img_{}_{}.pkl'.format(latent_mode, time_str)), labels) - - -sess.close() -""" functions to regress y (labels) based on z (latent space) """ - -import os -import glob -import numpy as np -import pickle -import h5py -import pandas as pd - - -import src.misc as misc -import src.tl_gan.feature_axis as feature_axis - -## -""" get y and z from pre-generated files """ -path_gan_sample_img = './asset_results/pggan_celeba_sample_jpg/' -path_celeba_att = './data/raw/celebA_annotation/list_attr_celeba.txt' -path_feature_direction = './asset_results/pg_gan_celeba_feature_direction_40' - -filename_sample_y = 'sample_y.h5' -filename_sample_z = 'sample_z.h5' - -pathfile_y = os.path.join(path_gan_sample_img, filename_sample_y) -pathfile_z = os.path.join(path_gan_sample_img, filename_sample_z) - -with h5py.File(pathfile_y, 'r') as f: - y = f['y'][:] -with h5py.File(pathfile_z, 'r') as f: - z = f['z'][:] - -# read feature name -df_attr = pd.read_csv(path_celeba_att, sep='\s+', header=1, index_col=0) -y_name = df_attr.columns.values.tolist() - -## -""" regression: use latent space z to predict features y """ -feature_slope = feature_axis.find_feature_axis(z, y, method='tanh') - -## -""" normalize the feature vectors """ -yn_normalize_feature_direction = True -if yn_normalize_feature_direction: - feature_direction = feature_axis.normalize_feature_axis(feature_slope) -else: - feature_direction = feature_slope - -""" save_regression result to hard disk """ -if not os.path.exists(path_feature_direction): - os.mkdir(path_feature_direction) - -pathfile_feature_direction = os.path.join( - path_feature_direction, 'feature_direction_{}.pkl'.format(misc.gen_time_str())) -dict_to_save = {'direction': feature_direction, 'name': y_name} -with open(pathfile_feature_direction, 'wb') as f: - pickle.dump(dict_to_save, f) - - -## -""" disentangle correlated feature axis """ -pathfile_feature_direction = glob.glob(os.path.join( - path_feature_direction, 'feature_direction_*.pkl'))[-1] - -with open(pathfile_feature_direction, 'rb') as f: - feature_direction_name = pickle.load(f) - -feature_direction = feature_direction_name['direction'] -feature_name = np.array(feature_direction_name['name']) - -len_z, len_y = feature_direction.shape - - -feature_direction_disentangled = feature_axis.disentangle_feature_axis_by_idx( - feature_direction, idx_base=range(len_y//4), idx_target=None) - -feature_axis.plot_feature_cos_sim( - feature_direction_disentangled, feature_name=feature_name) - -## -""" script to discover feature axis in the latent space """ - -""" -pre-requisite: this code needs pre-generated feature-image pairs, stored as pickle files located at: -project_root/asset_results/pggan_celeba_sample_pkl -""" - - -# path to model generated results -import matplotlib.pyplot as plt -import os -import sys -import time -import pickle -import numpy as np -import tensorflow as tf -import PIL.Image -import datetime -import glob -path_gan_sample = './asset_results/pggan_celeba_sample_pkl/' -if not os.path.exists(path_gan_sample): - os.mkdir(path_gan_sample) - -## -""" function to get features """ - - -def get_feature(x): - """ - get a list of features from images - - :param x: generated images, of shape [num_images, height, width, rgb] - :return: feature table, of shape [num_images, num_features] - """ - - n, h, w, _ = x.shape - fg_lum = np.mean(x[:, h//4:h//4*3, w//4:w//4*3], axis=(1, 2, 3)) - bg_lum = np.mean(x[:, :h//4, :w//4], axis=(1, 2, 3)) - return np.hstack((fg_lum[:, None], bg_lum[:, None])) - - -## -""" get the simplest feature: dark-bright skin color """ -list_pkl = sorted(glob.glob(path_gan_sample+'*.pkl')) - -list_z = [] -list_y = [] - -for file_pkl in list_pkl[:8000]: - with open(file_pkl, 'rb') as f: - dict_zx = pickle.load(f) - z = dict_zx['z'] - x = dict_zx['x'] - y = get_feature(x) - - list_z.append(z) - list_y.append(y) - -z_all = np.concatenate(list_z, axis=0) -y_all_raw = np.concatenate(list_y, axis=0) -y_all = (y_all_raw - np.mean(y_all_raw, axis=0, keepdims=True)) / \ - np.std(y_all_raw, axis=0, keepdims=True) - -## -"""discover feature axis""" -reg_res = np.linalg.lstsq(z_all, y_all) -feature_directon = reg_res[0] - - -## -""" visualize stored samples """ -plt.imshow(x[5]) -plt.show() - -## -""" test_discovered features """ - -# path to model code and weight -path_pg_gan_code = './src/model/pggan' -path_model = './asset_model/karras2018iclr-celebahq-1024x1024.pkl' -sys.path.append(path_pg_gan_code) - -path_gan_explore = './asset_results/pggan_celeba_feature_axis_explore/' -if not os.path.exists(path_gan_explore): - os.mkdir(path_gan_explore) - -""" play with the latent space """ -sess = tf.InteractiveSession() - -try: - with open(path_model, 'rb') as file: - G, D, Gs = pickle.load(file) -except FileNotFoundError: - print('before running the code, download pre-trained model to project_root/asset_model/') - raise - -batch_size = 7 - -## -latents_c = np.random.randn(1, *Gs.input_shapes[0][1:]) - -for i_feature in range(feature_directon.shape[1]): - latents_0 = latents_c - feature_directon[:, i_feature][None, :]*2 - latents_1 = latents_c + feature_directon[:, i_feature][None, :]*2 - - print(np.sum(latents_0-latents_1)**2) - - latents = np.random.randn(batch_size, *Gs.input_shapes[0][1:]) - for i_alpha, alpha in enumerate(np.linspace(0, 1, batch_size)): - latents[i_alpha, :] = latents_0[0]*(1-alpha) + latents_1[0]*alpha - - # Generate dummy labels (not used by the official networks). - labels = np.zeros([latents.shape[0]] + Gs.input_shapes[1][1:]) - - # Run the generator to produce a set of images. - images = Gs.run(latents, labels) - - images = np.clip(np.rint((images + 1.0) / 2.0 * 255.0), 0.0, - 255.0).astype(np.uint8) # [-1,1] => [0,255] - images = images.transpose(0, 2, 3, 1) # NCHW => NHWC - - images = images[:, 2::4, 2::4] - - time_str = datetime.datetime.now().strftime("%Y%m%d_%H%M%S") - - # Save images as PNG. - for idx in range(images.shape[0]): - PIL.Image.fromarray(images[idx], 'RGB').save(os.path.join(path_gan_explore, - 'img_{}_{}_{}.png'.format(time_str, i_feature, idx))) - np.save(os.path.join(path_gan_explore, - 'img_{}_{}.pkl'.format(time_str, i_feature)), labels) - -## -sess.close() -""" predict_feature labels of synthetic_images """ - -import os -import glob -import numpy as np -import PIL.Image -import h5py -import src.model.cnn_face_attr_celeba as cnn_face - -# path to model generated results -path_gan_sample_img = './asset_results/pggan_celeba_sample_jpg/' -file_pattern_x = 'sample_*.jpg' -file_pattern_z = 'sample_*_z.npy' -filename_sample_y = 'sample_y.h5' - -# get the list of image_names -list_pathfile_x = glob.glob(os.path.join(path_gan_sample_img, file_pattern_x)) -list_pathfile_z = glob.glob(os.path.join(path_gan_sample_img, file_pattern_z)) -list_pathfile_x.sort() -list_pathfile_z.sort() - -assert len(list_pathfile_x) == len( - list_pathfile_z), 'num_image does not match num_z' - -## -""" load model for prediction """ -model = cnn_face.create_cnn_model() -model.load_weights(cnn_face.get_list_model_save()[-1]) - -list_y = [] -batch_size = 64 -list_img_batch = [] -list_pathfile_x_use = list_pathfile_x -num_use = len(list_pathfile_x_use) -save_every = 2048 - -for i, pathfile_x in enumerate(list_pathfile_x_use): - img = np.asarray(PIL.Image.open(pathfile_x)) - list_img_batch.append(img) - - if i % batch_size == batch_size-1 or i == num_use-1: - print('{}/{}'.format(i+1, num_use)) - img_batch = np.stack(list_img_batch, axis=0) - x = cnn_face.preprocess_input(img_batch) - y = model.predict(x, batch_size=batch_size) - list_y.append(y) - - list_img_batch = [] - - if i % save_every == 0: - y_concat = np.concatenate(list_y, axis=0) - pathfile_sample_y = os.path.join( - path_gan_sample_img, filename_sample_y) - with h5py.File(pathfile_sample_y, 'w') as f: - f.create_dataset('y', data=y_concat) - - -y_concat = np.concatenate(list_y, axis=0) -pathfile_sample_y = os.path.join(path_gan_sample_img, filename_sample_y) -with h5py.File(pathfile_sample_y, 'w') as f: - f.create_dataset('y', data=y_concat) -""" temporary script to transform samples from pkl to images """ - -import os -import glob -import pickle -import numpy as np -import PIL.Image -import h5py - - -# path to model generated results -path_gan_sample_pkl = './asset_results/pggan_celeba_sample_pkl/' -path_gan_sample_img = './asset_results/pggan_celeba_sample_jpg/' - -if not os.path.exists(path_gan_sample_pkl): - os.mkdir(path_gan_sample_pkl) - -if not os.path.exists(path_gan_sample_img): - os.mkdir(path_gan_sample_img) - -# name of new data files - - -def get_filename_from_idx(idx): - return 'sample_{:0>6}'.format(idx) - - -filename_sample_z = 'sample_z.h5' - -# get the pkl file list -list_pathfile_pkl = glob.glob(os.path.join(path_gan_sample_pkl, '*.pkl')) -list_pathfile_pkl.sort() - -# loop to transform data and save image -list_z = [] -i_counter = 0 -for pathfile_pkl in list_pathfile_pkl: - print(pathfile_pkl) - with open(pathfile_pkl, 'rb') as f: - pkl_content = pickle.load(f) - x = pkl_content['x'] - z = pkl_content['z'] - num_cur = x.shape[0] - for i in range(num_cur): - pathfile_cur = os.path.join( - path_gan_sample_img, get_filename_from_idx(i_counter)) - PIL.Image.fromarray(x[i]).save(pathfile_cur + '.jpg') - np.save(pathfile_cur+'_z.npy', z[i]) - i_counter += 1 - list_z.append(z) - -# save z (latent variables) -z_concat = np.concatenate(list_z, axis=0) -pathfile_sample_z = os.path.join(path_gan_sample_img, filename_sample_z) -with h5py.File(pathfile_sample_z, 'w') as f: - f.create_dataset('z', data=z_concat) -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -# ---------------------------------------------------------------------------- -# Convenience class that behaves exactly like dict(), but allows accessing -# the keys and values using the attribute syntax, i.e., "mydict.key = value". - - -class EasyDict(dict): - def __init__(self, *args, **kwargs): super().__init__(*args, **kwargs) - def __getattr__(self, name): return self[name] - def __setattr__(self, name, value): self[name] = value - def __delattr__(self, name): del self[name] - -# ---------------------------------------------------------------------------- -# Paths. - - -data_dir = 'datasets' -result_dir = 'results' - -# ---------------------------------------------------------------------------- -# TensorFlow options. - -tf_config = EasyDict() # TensorFlow session config, set by tfutil.init_tf(). -# Environment variables, set by the main program in train.py. -env = EasyDict() - -# False (default) = Check that all ops are available on the designated device. True = Skip the check for ops that are not used. -tf_config['graph_options.place_pruned_graph'] = True -# tf_config['gpu_options.allow_growth'] = False # False (default) = Allocate all GPU memory at the beginning. True = Allocate only as much GPU memory as needed. -# env.CUDA_VISIBLE_DEVICES = '0' # Unspecified (default) = Use all available GPUs. List of ints = CUDA device numbers to use. -# 0 (default) = Print all available debug info from TensorFlow. 1 = Print warnings and errors, but disable debug info. -env.TF_CPP_MIN_LOG_LEVEL = '1' - -# ---------------------------------------------------------------------------- -# Official training configs, targeted mainly for CelebA-HQ. -# To run, comment/uncomment the lines as appropriate and launch train.py. - -# Description string included in result subdir name. -desc = 'pgan' -# Global random seed. -random_seed = 1000 -# Options for dataset.load_dataset(). -dataset = EasyDict() -# Options for main training func. -train = EasyDict(func='train.train_progressive_gan') -# Options for generator network. -G = EasyDict(func='networks.G_paper') -# Options for discriminator network. -D = EasyDict(func='networks.D_paper') -# Options for generator optimizer. -G_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) -# Options for discriminator optimizer. -D_opt = EasyDict(beta1=0.0, beta2=0.99, epsilon=1e-8) -# Options for generator loss. -G_loss = EasyDict(func='loss.G_wgan_acgan') -# Options for discriminator loss. -D_loss = EasyDict(func='loss.D_wgangp_acgan') -# Options for train.TrainingSchedule. -sched = EasyDict() -# Options for train.setup_snapshot_image_grid(). -grid = EasyDict(size='1080p', layout='random') - -# Dataset (choose one). -desc += '-celebahq' -dataset = EasyDict(tfrecord_dir='celebahq') -train.mirror_augment = True -#desc += '-celeba'; dataset = EasyDict(tfrecord_dir='celeba'); train.mirror_augment = True -#desc += '-cifar10'; dataset = EasyDict(tfrecord_dir='cifar10') -#desc += '-cifar100'; dataset = EasyDict(tfrecord_dir='cifar100') -#desc += '-svhn'; dataset = EasyDict(tfrecord_dir='svhn') -#desc += '-mnist'; dataset = EasyDict(tfrecord_dir='mnist') -#desc += '-mnistrgb'; dataset = EasyDict(tfrecord_dir='mnistrgb') -#desc += '-syn1024rgb'; dataset = EasyDict(class_name='dataset.SyntheticDataset', resolution=1024, num_channels=3) -#desc += '-lsun-airplane'; dataset = EasyDict(tfrecord_dir='lsun-airplane-100k'); train.mirror_augment = True -#desc += '-lsun-bedroom'; dataset = EasyDict(tfrecord_dir='lsun-bedroom-100k'); train.mirror_augment = True -#desc += '-lsun-bicycle'; dataset = EasyDict(tfrecord_dir='lsun-bicycle-100k'); train.mirror_augment = True -#desc += '-lsun-bird'; dataset = EasyDict(tfrecord_dir='lsun-bird-100k'); train.mirror_augment = True -#desc += '-lsun-boat'; dataset = EasyDict(tfrecord_dir='lsun-boat-100k'); train.mirror_augment = True -#desc += '-lsun-bottle'; dataset = EasyDict(tfrecord_dir='lsun-bottle-100k'); train.mirror_augment = True -#desc += '-lsun-bridge'; dataset = EasyDict(tfrecord_dir='lsun-bridge-100k'); train.mirror_augment = True -#desc += '-lsun-bus'; dataset = EasyDict(tfrecord_dir='lsun-bus-100k'); train.mirror_augment = True -#desc += '-lsun-car'; dataset = EasyDict(tfrecord_dir='lsun-car-100k'); train.mirror_augment = True -#desc += '-lsun-cat'; dataset = EasyDict(tfrecord_dir='lsun-cat-100k'); train.mirror_augment = True -#desc += '-lsun-chair'; dataset = EasyDict(tfrecord_dir='lsun-chair-100k'); train.mirror_augment = True -#desc += '-lsun-churchoutdoor'; dataset = EasyDict(tfrecord_dir='lsun-churchoutdoor-100k'); train.mirror_augment = True -#desc += '-lsun-classroom'; dataset = EasyDict(tfrecord_dir='lsun-classroom-100k'); train.mirror_augment = True -#desc += '-lsun-conferenceroom'; dataset = EasyDict(tfrecord_dir='lsun-conferenceroom-100k'); train.mirror_augment = True -#desc += '-lsun-cow'; dataset = EasyDict(tfrecord_dir='lsun-cow-100k'); train.mirror_augment = True -#desc += '-lsun-diningroom'; dataset = EasyDict(tfrecord_dir='lsun-diningroom-100k'); train.mirror_augment = True -#desc += '-lsun-diningtable'; dataset = EasyDict(tfrecord_dir='lsun-diningtable-100k'); train.mirror_augment = True -#desc += '-lsun-dog'; dataset = EasyDict(tfrecord_dir='lsun-dog-100k'); train.mirror_augment = True -#desc += '-lsun-horse'; dataset = EasyDict(tfrecord_dir='lsun-horse-100k'); train.mirror_augment = True -#desc += '-lsun-kitchen'; dataset = EasyDict(tfrecord_dir='lsun-kitchen-100k'); train.mirror_augment = True -#desc += '-lsun-livingroom'; dataset = EasyDict(tfrecord_dir='lsun-livingroom-100k'); train.mirror_augment = True -#desc += '-lsun-motorbike'; dataset = EasyDict(tfrecord_dir='lsun-motorbike-100k'); train.mirror_augment = True -#desc += '-lsun-person'; dataset = EasyDict(tfrecord_dir='lsun-person-100k'); train.mirror_augment = True -#desc += '-lsun-pottedplant'; dataset = EasyDict(tfrecord_dir='lsun-pottedplant-100k'); train.mirror_augment = True -#desc += '-lsun-restaurant'; dataset = EasyDict(tfrecord_dir='lsun-restaurant-100k'); train.mirror_augment = True -#desc += '-lsun-sheep'; dataset = EasyDict(tfrecord_dir='lsun-sheep-100k'); train.mirror_augment = True -#desc += '-lsun-sofa'; dataset = EasyDict(tfrecord_dir='lsun-sofa-100k'); train.mirror_augment = True -#desc += '-lsun-tower'; dataset = EasyDict(tfrecord_dir='lsun-tower-100k'); train.mirror_augment = True -#desc += '-lsun-train'; dataset = EasyDict(tfrecord_dir='lsun-train-100k'); train.mirror_augment = True -#desc += '-lsun-tvmonitor'; dataset = EasyDict(tfrecord_dir='lsun-tvmonitor-100k'); train.mirror_augment = True - -# Conditioning & snapshot options. -# desc += '-cond'; dataset.max_label_size = 'full' # conditioned on full label -# desc += '-cond1'; dataset.max_label_size = 1 # conditioned on first component of the label -#desc += '-g4k'; grid.size = '4k' -#desc += '-grpc'; grid.layout = 'row_per_class' - -# Config presets (choose one). -#desc += '-preset-v1-1gpu'; num_gpus = 1; D.mbstd_group_size = 16; sched.minibatch_base = 16; sched.minibatch_dict = {256: 14, 512: 6, 1024: 3}; sched.lod_training_kimg = 800; sched.lod_transition_kimg = 800; train.total_kimg = 19000 -desc += '-preset-v2-1gpu' -num_gpus = 1 -sched.minibatch_base = 4 -sched.minibatch_dict = {4: 128, 8: 128, 16: 128, - 32: 64, 64: 32, 128: 16, 256: 8, 512: 4} -sched.G_lrate_dict = {1024: 0.0015} -sched.D_lrate_dict = EasyDict(sched.G_lrate_dict) -train.total_kimg = 12000 -#desc += '-preset-v2-2gpus'; num_gpus = 2; sched.minibatch_base = 8; sched.minibatch_dict = {4: 256, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16, 256: 8}; sched.G_lrate_dict = {512: 0.0015, 1024: 0.002}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 -#desc += '-preset-v2-4gpus'; num_gpus = 4; sched.minibatch_base = 16; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32, 128: 16}; sched.G_lrate_dict = {256: 0.0015, 512: 0.002, 1024: 0.003}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 -#desc += '-preset-v2-8gpus'; num_gpus = 8; sched.minibatch_base = 32; sched.minibatch_dict = {4: 512, 8: 256, 16: 128, 32: 64, 64: 32}; sched.G_lrate_dict = {128: 0.0015, 256: 0.002, 512: 0.003, 1024: 0.003}; sched.D_lrate_dict = EasyDict(sched.G_lrate_dict); train.total_kimg = 12000 - -# Numerical precision (choose one). -desc += '-fp32' -sched.max_minibatch_per_gpu = {256: 16, 512: 8, 1024: 4} -#desc += '-fp16'; G.dtype = 'float16'; D.dtype = 'float16'; G.pixelnorm_epsilon=1e-4; G_opt.use_loss_scaling = True; D_opt.use_loss_scaling = True; sched.max_minibatch_per_gpu = {512: 16, 1024: 8} - -# Disable individual features. -#desc += '-nogrowing'; sched.lod_initial_resolution = 1024; sched.lod_training_kimg = 0; sched.lod_transition_kimg = 0; train.total_kimg = 10000 -#desc += '-nopixelnorm'; G.use_pixelnorm = False -#desc += '-nowscale'; G.use_wscale = False; D.use_wscale = False -#desc += '-noleakyrelu'; G.use_leakyrelu = False -#desc += '-nosmoothing'; train.G_smoothing = 0.0 -#desc += '-norepeat'; train.minibatch_repeats = 1 -#desc += '-noreset'; train.reset_opt_for_new_lod = False - -# Special modes. -#desc += '-BENCHMARK'; sched.lod_initial_resolution = 4; sched.lod_training_kimg = 3; sched.lod_transition_kimg = 3; train.total_kimg = (8*2+1)*3; sched.tick_kimg_base = 1; sched.tick_kimg_dict = {}; train.image_snapshot_ticks = 1000; train.network_snapshot_ticks = 1000 -#desc += '-BENCHMARK0'; sched.lod_initial_resolution = 1024; train.total_kimg = 10; sched.tick_kimg_base = 1; sched.tick_kimg_dict = {}; train.image_snapshot_ticks = 1000; train.network_snapshot_ticks = 1000 -#desc += '-VERBOSE'; sched.tick_kimg_base = 1; sched.tick_kimg_dict = {}; train.image_snapshot_ticks = 1; train.network_snapshot_ticks = 100 -#desc += '-GRAPH'; train.save_tf_graph = True -#desc += '-HIST'; train.save_weight_histograms = True - -# ---------------------------------------------------------------------------- -# Utility scripts. -# To run, uncomment the appropriate line and launch train.py. - -#train = EasyDict(func='util_scripts.generate_fake_images', run_id=23, num_pngs=1000); num_gpus = 1; desc = 'fake-images-' + str(train.run_id) -#train = EasyDict(func='util_scripts.generate_fake_images', run_id=23, grid_size=[15,8], num_pngs=10, image_shrink=4); num_gpus = 1; desc = 'fake-grids-' + str(train.run_id) -#train = EasyDict(func='util_scripts.generate_interpolation_video', run_id=23, grid_size=[1,1], duration_sec=60.0, smoothing_sec=1.0); num_gpus = 1; desc = 'interpolation-video-' + str(train.run_id) -#train = EasyDict(func='util_scripts.generate_training_video', run_id=23, duration_sec=20.0); num_gpus = 1; desc = 'training-video-' + str(train.run_id) - -#train = EasyDict(func='util_scripts.evaluate_metrics', run_id=23, log='metric-swd-16k.txt', metrics=['swd'], num_images=16384, real_passes=2); num_gpus = 1; desc = train.log.split('.')[0] + '-' + str(train.run_id) -#train = EasyDict(func='util_scripts.evaluate_metrics', run_id=23, log='metric-fid-10k.txt', metrics=['fid'], num_images=10000, real_passes=1); num_gpus = 1; desc = train.log.split('.')[0] + '-' + str(train.run_id) -#train = EasyDict(func='util_scripts.evaluate_metrics', run_id=23, log='metric-fid-50k.txt', metrics=['fid'], num_images=50000, real_passes=1); num_gpus = 1; desc = train.log.split('.')[0] + '-' + str(train.run_id) -#train = EasyDict(func='util_scripts.evaluate_metrics', run_id=23, log='metric-is-50k.txt', metrics=['is'], num_images=50000, real_passes=1); num_gpus = 1; desc = train.log.split('.')[0] + '-' + str(train.run_id) -#train = EasyDict(func='util_scripts.evaluate_metrics', run_id=23, log='metric-msssim-20k.txt', metrics=['msssim'], num_images=20000, real_passes=1); num_gpus = 1; desc = train.log.split('.')[0] + '-' + str(train.run_id) - -# ---------------------------------------------------------------------------- -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -import os -import glob -import numpy as np -import tensorflow as tf -import tfutil - -# ---------------------------------------------------------------------------- -# Parse individual image from a tfrecords file. - - -def parse_tfrecord_tf(record): - features = tf.parse_single_example(record, features={ - 'shape': tf.FixedLenFeature([3], tf.int64), - 'data': tf.FixedLenFeature([], tf.string)}) - data = tf.decode_raw(features['data'], tf.uint8) - return tf.reshape(data, features['shape']) - - -def parse_tfrecord_np(record): - ex = tf.train.Example() - ex.ParseFromString(record) - shape = ex.features.feature['shape'].int64_list.value - data = ex.features.feature['data'].bytes_list.value[0] - return np.fromstring(data, np.uint8).reshape(shape) - -# ---------------------------------------------------------------------------- -# Dataset class that loads data from tfrecords files. - - -class TFRecordDataset: - def __init__(self, - # Directory containing a collection of tfrecords files. - tfrecord_dir, - resolution=None, # Dataset resolution, None = autodetect. - # Relative path of the labels file, None = autodetect. - label_file=None, - # 0 = no labels, 'full' = full labels, = N first label components. - max_label_size=0, - repeat=True, # Repeat dataset indefinitely. - # Shuffle data within specified window (megabytes), 0 = disable shuffling. - shuffle_mb=4096, - # Amount of data to prefetch (megabytes), 0 = disable prefetching. - prefetch_mb=2048, - buffer_mb=256, # Read buffer size (megabytes). - num_threads=2): # Number of concurrent threads. - - self.tfrecord_dir = tfrecord_dir - self.resolution = None - self.resolution_log2 = None - self.shape = [] # [channel, height, width] - self.dtype = 'uint8' - self.dynamic_range = [0, 255] - self.label_file = label_file - self.label_size = None # [component] - self.label_dtype = None - self._np_labels = None - self._tf_minibatch_in = None - self._tf_labels_var = None - self._tf_labels_dataset = None - self._tf_datasets = dict() - self._tf_iterator = None - self._tf_init_ops = dict() - self._tf_minibatch_np = None - self._cur_minibatch = -1 - self._cur_lod = -1 - - # List tfrecords files and inspect their shapes. - assert os.path.isdir(self.tfrecord_dir) - tfr_files = sorted( - glob.glob(os.path.join(self.tfrecord_dir, '*.tfrecords'))) - assert len(tfr_files) >= 1 - tfr_shapes = [] - for tfr_file in tfr_files: - tfr_opt = tf.python_io.TFRecordOptions( - tf.python_io.TFRecordCompressionType.NONE) - for record in tf.python_io.tf_record_iterator(tfr_file, tfr_opt): - tfr_shapes.append(parse_tfrecord_np(record).shape) - break - - # Autodetect label filename. - if self.label_file is None: - guess = sorted(glob.glob(os.path.join( - self.tfrecord_dir, '*.labels'))) - if len(guess): - self.label_file = guess[0] - elif not os.path.isfile(self.label_file): - guess = os.path.join(self.tfrecord_dir, self.label_file) - if os.path.isfile(guess): - self.label_file = guess - - # Determine shape and resolution. - max_shape = max(tfr_shapes, key=lambda shape: np.prod(shape)) - self.resolution = resolution if resolution is not None else max_shape[1] - self.resolution_log2 = int(np.log2(self.resolution)) - self.shape = [max_shape[0], self.resolution, self.resolution] - tfr_lods = [self.resolution_log2 - - int(np.log2(shape[1])) for shape in tfr_shapes] - assert all(shape[0] == max_shape[0] for shape in tfr_shapes) - assert all(shape[1] == shape[2] for shape in tfr_shapes) - assert all(shape[1] == self.resolution // (2**lod) - for shape, lod in zip(tfr_shapes, tfr_lods)) - assert all(lod in tfr_lods for lod in range(self.resolution_log2 - 1)) - - # Load labels. - assert max_label_size == 'full' or max_label_size >= 0 - self._np_labels = np.zeros([1 << 20, 0], dtype=np.float32) - if self.label_file is not None and max_label_size != 0: - self._np_labels = np.load(self.label_file) - assert self._np_labels.ndim == 2 - if max_label_size != 'full' and self._np_labels.shape[1] > max_label_size: - self._np_labels = self._np_labels[:, :max_label_size] - self.label_size = self._np_labels.shape[1] - self.label_dtype = self._np_labels.dtype.name - - # Build TF expressions. - with tf.name_scope('Dataset'), tf.device('/cpu:0'): - self._tf_minibatch_in = tf.placeholder( - tf.int64, name='minibatch_in', shape=[]) - tf_labels_init = tf.zeros( - self._np_labels.shape, self._np_labels.dtype) - self._tf_labels_var = tf.Variable( - tf_labels_init, name='labels_var') - tfutil.set_vars({self._tf_labels_var: self._np_labels}) - self._tf_labels_dataset = tf.data.Dataset.from_tensor_slices( - self._tf_labels_var) - for tfr_file, tfr_shape, tfr_lod in zip(tfr_files, tfr_shapes, tfr_lods): - if tfr_lod < 0: - continue - dset = tf.data.TFRecordDataset( - tfr_file, compression_type='', buffer_size=buffer_mb << 20) - dset = dset.map(parse_tfrecord_tf, - num_parallel_calls=num_threads) - dset = tf.data.Dataset.zip((dset, self._tf_labels_dataset)) - bytes_per_item = np.prod(tfr_shape) * \ - np.dtype(self.dtype).itemsize - if shuffle_mb > 0: - dset = dset.shuffle( - ((shuffle_mb << 20) - 1) // bytes_per_item + 1) - if repeat: - dset = dset.repeat() - if prefetch_mb > 0: - dset = dset.prefetch( - ((prefetch_mb << 20) - 1) // bytes_per_item + 1) - dset = dset.batch(self._tf_minibatch_in) - self._tf_datasets[tfr_lod] = dset - self._tf_iterator = tf.data.Iterator.from_structure( - self._tf_datasets[0].output_types, self._tf_datasets[0].output_shapes) - self._tf_init_ops = {lod: self._tf_iterator.make_initializer( - dset) for lod, dset in self._tf_datasets.items()} - - # Use the given minibatch size and level-of-detail for the data returned by get_minibatch_tf(). - def configure(self, minibatch_size, lod=0): - lod = int(np.floor(lod)) - assert minibatch_size >= 1 and lod in self._tf_datasets - if self._cur_minibatch != minibatch_size or self._cur_lod != lod: - self._tf_init_ops[lod].run({self._tf_minibatch_in: minibatch_size}) - self._cur_minibatch = minibatch_size - self._cur_lod = lod - - # Get next minibatch as TensorFlow expressions. - def get_minibatch_tf(self): # => images, labels - return self._tf_iterator.get_next() - - # Get next minibatch as NumPy arrays. - def get_minibatch_np(self, minibatch_size, lod=0): # => images, labels - self.configure(minibatch_size, lod) - if self._tf_minibatch_np is None: - self._tf_minibatch_np = self.get_minibatch_tf() - return tfutil.run(self._tf_minibatch_np) - - # Get random labels as TensorFlow expression. - def get_random_labels_tf(self, minibatch_size): # => labels - if self.label_size > 0: - return tf.gather(self._tf_labels_var, tf.random_uniform([minibatch_size], 0, self._np_labels.shape[0], dtype=tf.int32)) - else: - return tf.zeros([minibatch_size, 0], self.label_dtype) - - # Get random labels as NumPy array. - def get_random_labels_np(self, minibatch_size): # => labels - if self.label_size > 0: - return self._np_labels[np.random.randint(self._np_labels.shape[0], size=[minibatch_size])] - else: - return np.zeros([minibatch_size, 0], self.label_dtype) - -# ---------------------------------------------------------------------------- -# Base class for datasets that are generated on the fly. - - -class SyntheticDataset: - def __init__(self, resolution=1024, num_channels=3, dtype='uint8', dynamic_range=[0, 255], label_size=0, label_dtype='float32'): - self.resolution = resolution - self.resolution_log2 = int(np.log2(resolution)) - self.shape = [num_channels, resolution, resolution] - self.dtype = dtype - self.dynamic_range = dynamic_range - self.label_size = label_size - self.label_dtype = label_dtype - self._tf_minibatch_var = None - self._tf_lod_var = None - self._tf_minibatch_np = None - self._tf_labels_np = None - - assert self.resolution == 2 ** self.resolution_log2 - with tf.name_scope('Dataset'): - self._tf_minibatch_var = tf.Variable( - np.int32(0), name='minibatch_var') - self._tf_lod_var = tf.Variable(np.int32(0), name='lod_var') - - def configure(self, minibatch_size, lod=0): - lod = int(np.floor(lod)) - assert minibatch_size >= 1 and lod >= 0 and lod <= self.resolution_log2 - tfutil.set_vars( - {self._tf_minibatch_var: minibatch_size, self._tf_lod_var: lod}) - - def get_minibatch_tf(self): # => images, labels - with tf.name_scope('SyntheticDataset'): - shrink = tf.cast( - 2.0 ** tf.cast(self._tf_lod_var, tf.float32), tf.int32) - shape = [self.shape[0], self.shape[1] // - shrink, self.shape[2] // shrink] - images = self._generate_images( - self._tf_minibatch_var, self._tf_lod_var, shape) - labels = self._generate_labels(self._tf_minibatch_var) - return images, labels - - def get_minibatch_np(self, minibatch_size, lod=0): # => images, labels - self.configure(minibatch_size, lod) - if self._tf_minibatch_np is None: - self._tf_minibatch_np = self.get_minibatch_tf() - return tfutil.run(self._tf_minibatch_np) - - def get_random_labels_tf(self, minibatch_size): # => labels - with tf.name_scope('SyntheticDataset'): - return self._generate_labels(minibatch_size) - - def get_random_labels_np(self, minibatch_size): # => labels - self.configure(minibatch_size) - if self._tf_labels_np is None: - self._tf_labels_np = self.get_random_labels_tf() - return tfutil.run(self._tf_labels_np) - - def _generate_images(self, minibatch, lod, shape): # to be overridden by subclasses - return tf.zeros([minibatch] + shape, self.dtype) - - def _generate_labels(self, minibatch): # to be overridden by subclasses - return tf.zeros([minibatch, self.label_size], self.label_dtype) - -# ---------------------------------------------------------------------------- -# Helper func for constructing a dataset object using the given options. - - -def load_dataset(class_name='dataset.TFRecordDataset', data_dir=None, verbose=False, **kwargs): - adjusted_kwargs = dict(kwargs) - if 'tfrecord_dir' in adjusted_kwargs and data_dir is not None: - adjusted_kwargs['tfrecord_dir'] = os.path.join( - data_dir, adjusted_kwargs['tfrecord_dir']) - if verbose: - print('Streaming data using %s...' % class_name) - dataset = tfutil.import_obj(class_name)(**adjusted_kwargs) - if verbose: - print('Dataset shape =', np.int32(dataset.shape).tolist()) - print('Dynamic range =', dataset.dynamic_range) - print('Label size =', dataset.label_size) - return dataset - -# ---------------------------------------------------------------------------- -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -import os -import sys -import glob -import argparse -import threading -import six.moves.queue as Queue -import traceback -import numpy as np -import tensorflow as tf -import PIL.Image - -import tfutil -import dataset - -# ---------------------------------------------------------------------------- - - -def error(msg): - print('Error: ' + msg) - exit(1) - -# ---------------------------------------------------------------------------- - - -class TFRecordExporter: - def __init__(self, tfrecord_dir, expected_images, print_progress=True, progress_interval=10): - self.tfrecord_dir = tfrecord_dir - self.tfr_prefix = os.path.join( - self.tfrecord_dir, os.path.basename(self.tfrecord_dir)) - self.expected_images = expected_images - self.cur_images = 0 - self.shape = None - self.resolution_log2 = None - self.tfr_writers = [] - self.print_progress = print_progress - self.progress_interval = progress_interval - if self.print_progress: - print('Creating dataset "%s"' % tfrecord_dir) - if not os.path.isdir(self.tfrecord_dir): - os.makedirs(self.tfrecord_dir) - assert(os.path.isdir(self.tfrecord_dir)) - - def close(self): - if self.print_progress: - print('%-40s\r' % 'Flushing data...', end='', flush=True) - for tfr_writer in self.tfr_writers: - tfr_writer.close() - self.tfr_writers = [] - if self.print_progress: - print('%-40s\r' % '', end='', flush=True) - print('Added %d images.' % self.cur_images) - - # Note: Images and labels must be added in shuffled order. - def choose_shuffled_order(self): - order = np.arange(self.expected_images) - np.random.RandomState(123).shuffle(order) - return order - - def add_image(self, img): - if self.print_progress and self.cur_images % self.progress_interval == 0: - print('%d / %d\r' % - (self.cur_images, self.expected_images), end='', flush=True) - if self.shape is None: - self.shape = img.shape - self.resolution_log2 = int(np.log2(self.shape[1])) - assert self.shape[0] in [1, 3] - assert self.shape[1] == self.shape[2] - assert self.shape[1] == 2**self.resolution_log2 - tfr_opt = tf.python_io.TFRecordOptions( - tf.python_io.TFRecordCompressionType.NONE) - for lod in range(self.resolution_log2 - 1): - tfr_file = self.tfr_prefix + \ - '-r%02d.tfrecords' % (self.resolution_log2 - lod) - self.tfr_writers.append( - tf.python_io.TFRecordWriter(tfr_file, tfr_opt)) - assert img.shape == self.shape - for lod, tfr_writer in enumerate(self.tfr_writers): - if lod: - img = img.astype(np.float32) - img = (img[:, 0::2, 0::2] + img[:, 0::2, 1::2] + - img[:, 1::2, 0::2] + img[:, 1::2, 1::2]) * 0.25 - quant = np.rint(img).clip(0, 255).astype(np.uint8) - ex = tf.train.Example(features=tf.train.Features(feature={ - 'shape': tf.train.Feature(int64_list=tf.train.Int64List(value=quant.shape)), - 'data': tf.train.Feature(bytes_list=tf.train.BytesList(value=[quant.tostring()]))})) - tfr_writer.write(ex.SerializeToString()) - self.cur_images += 1 - - def add_labels(self, labels): - if self.print_progress: - print('%-40s\r' % 'Saving labels...', end='', flush=True) - assert labels.shape[0] == self.cur_images - with open(self.tfr_prefix + '-rxx.labels', 'wb') as f: - np.save(f, labels.astype(np.float32)) - - def __enter__(self): - return self - - def __exit__(self, *args): - self.close() - -# ---------------------------------------------------------------------------- - - -class ExceptionInfo(object): - def __init__(self): - self.value = sys.exc_info()[1] - self.traceback = traceback.format_exc() - -# ---------------------------------------------------------------------------- - - -class WorkerThread(threading.Thread): - def __init__(self, task_queue): - threading.Thread.__init__(self) - self.task_queue = task_queue - - def run(self): - while True: - func, args, result_queue = self.task_queue.get() - if func is None: - break - try: - result = func(*args) - except: - result = ExceptionInfo() - result_queue.put((result, args)) - -# ---------------------------------------------------------------------------- - - -class ThreadPool(object): - def __init__(self, num_threads): - assert num_threads >= 1 - self.task_queue = Queue.Queue() - self.result_queues = dict() - self.num_threads = num_threads - for idx in range(self.num_threads): - thread = WorkerThread(self.task_queue) - thread.daemon = True - thread.start() - - def add_task(self, func, args=()): - assert hasattr(func, '__call__') # must be a function - if func not in self.result_queues: - self.result_queues[func] = Queue.Queue() - self.task_queue.put((func, args, self.result_queues[func])) - - def get_result(self, func): # returns (result, args) - result, args = self.result_queues[func].get() - if isinstance(result, ExceptionInfo): - print('\n\nWorker thread caught an exception:\n' + result.traceback) - raise result.value - return result, args - - def finish(self): - for idx in range(self.num_threads): - self.task_queue.put((None, (), None)) - - def __enter__(self): # for 'with' statement - return self - - def __exit__(self, *excinfo): - self.finish() - - def process_items_concurrently(self, item_iterator, process_func=lambda x: x, pre_func=lambda x: x, post_func=lambda x: x, max_items_in_flight=None): - if max_items_in_flight is None: - max_items_in_flight = self.num_threads * 4 - assert max_items_in_flight >= 1 - results = [] - retire_idx = [0] - - def task_func(prepared, idx): - return process_func(prepared) - - def retire_result(): - processed, (prepared, idx) = self.get_result(task_func) - results[idx] = processed - while retire_idx[0] < len(results) and results[retire_idx[0]] is not None: - yield post_func(results[retire_idx[0]]) - results[retire_idx[0]] = None - retire_idx[0] += 1 - - for idx, item in enumerate(item_iterator): - prepared = pre_func(item) - results.append(None) - self.add_task(func=task_func, args=(prepared, idx)) - while retire_idx[0] < idx - max_items_in_flight + 2: - for res in retire_result(): - yield res - while retire_idx[0] < len(results): - for res in retire_result(): - yield res - -# ---------------------------------------------------------------------------- - - -def display(tfrecord_dir): - print('Loading dataset "%s"' % tfrecord_dir) - tfutil.init_tf({'gpu_options.allow_growth': True}) - dset = dataset.TFRecordDataset( - tfrecord_dir, max_label_size='full', repeat=False, shuffle_mb=0) - tfutil.init_uninited_vars() - - idx = 0 - while True: - try: - images, labels = dset.get_minibatch_np(1) - except tf.errors.OutOfRangeError: - break - if idx == 0: - print('Displaying images') - import cv2 # pip install opencv-python - cv2.namedWindow('dataset_tool') - print('Press SPACE or ENTER to advance, ESC to exit') - print('\nidx = %-8d\nlabel = %s' % (idx, labels[0].tolist())) - cv2.imshow('dataset_tool', images[0].transpose( - 1, 2, 0)[:, :, ::-1]) # CHW => HWC, RGB => BGR - idx += 1 - if cv2.waitKey() == 27: - break - print('\nDisplayed %d images.' % idx) - -# ---------------------------------------------------------------------------- - - -def extract(tfrecord_dir, output_dir): - print('Loading dataset "%s"' % tfrecord_dir) - tfutil.init_tf({'gpu_options.allow_growth': True}) - dset = dataset.TFRecordDataset( - tfrecord_dir, max_label_size=0, repeat=False, shuffle_mb=0) - tfutil.init_uninited_vars() - - print('Extracting images to "%s"' % output_dir) - if not os.path.isdir(output_dir): - os.makedirs(output_dir) - idx = 0 - while True: - if idx % 10 == 0: - print('%d\r' % idx, end='', flush=True) - try: - images, labels = dset.get_minibatch_np(1) - except tf.errors.OutOfRangeError: - break - if images.shape[1] == 1: - img = PIL.Image.fromarray(images[0][0], 'L') - else: - img = PIL.Image.fromarray(images[0].transpose(1, 2, 0), 'RGB') - img.save(os.path.join(output_dir, 'img%08d.png' % idx)) - idx += 1 - print('Extracted %d images.' % idx) - -# ---------------------------------------------------------------------------- - - -def compare(tfrecord_dir_a, tfrecord_dir_b, ignore_labels): - max_label_size = 0 if ignore_labels else 'full' - print('Loading dataset "%s"' % tfrecord_dir_a) - tfutil.init_tf({'gpu_options.allow_growth': True}) - dset_a = dataset.TFRecordDataset( - tfrecord_dir_a, max_label_size=max_label_size, repeat=False, shuffle_mb=0) - print('Loading dataset "%s"' % tfrecord_dir_b) - dset_b = dataset.TFRecordDataset( - tfrecord_dir_b, max_label_size=max_label_size, repeat=False, shuffle_mb=0) - tfutil.init_uninited_vars() - - print('Comparing datasets') - idx = 0 - identical_images = 0 - identical_labels = 0 - while True: - if idx % 100 == 0: - print('%d\r' % idx, end='', flush=True) - try: - images_a, labels_a = dset_a.get_minibatch_np(1) - except tf.errors.OutOfRangeError: - images_a, labels_a = None, None - try: - images_b, labels_b = dset_b.get_minibatch_np(1) - except tf.errors.OutOfRangeError: - images_b, labels_b = None, None - if images_a is None or images_b is None: - if images_a is not None or images_b is not None: - print('Datasets contain different number of images') - break - if images_a.shape == images_b.shape and np.all(images_a == images_b): - identical_images += 1 - else: - print('Image %d is different' % idx) - if labels_a.shape == labels_b.shape and np.all(labels_a == labels_b): - identical_labels += 1 - else: - print('Label %d is different' % idx) - idx += 1 - print('Identical images: %d / %d' % (identical_images, idx)) - if not ignore_labels: - print('Identical labels: %d / %d' % (identical_labels, idx)) - -# ---------------------------------------------------------------------------- - - -def create_mnist(tfrecord_dir, mnist_dir): - print('Loading MNIST from "%s"' % mnist_dir) - import gzip - with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: - images = np.frombuffer(file.read(), np.uint8, offset=16) - with gzip.open(os.path.join(mnist_dir, 'train-labels-idx1-ubyte.gz'), 'rb') as file: - labels = np.frombuffer(file.read(), np.uint8, offset=8) - images = images.reshape(-1, 1, 28, 28) - images = np.pad(images, [(0, 0), (0, 0), (2, 2), - (2, 2)], 'constant', constant_values=0) - assert images.shape == (60000, 1, 32, 32) and images.dtype == np.uint8 - assert labels.shape == (60000,) and labels.dtype == np.uint8 - assert np.min(images) == 0 and np.max(images) == 255 - assert np.min(labels) == 0 and np.max(labels) == 9 - onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) - onehot[np.arange(labels.size), labels] = 1.0 - - with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: - order = tfr.choose_shuffled_order() - for idx in range(order.size): - tfr.add_image(images[order[idx]]) - tfr.add_labels(onehot[order]) - -# ---------------------------------------------------------------------------- - - -def create_mnistrgb(tfrecord_dir, mnist_dir, num_images=1000000, random_seed=123): - print('Loading MNIST from "%s"' % mnist_dir) - import gzip - with gzip.open(os.path.join(mnist_dir, 'train-images-idx3-ubyte.gz'), 'rb') as file: - images = np.frombuffer(file.read(), np.uint8, offset=16) - images = images.reshape(-1, 28, 28) - images = np.pad(images, [(0, 0), (2, 2), (2, 2)], - 'constant', constant_values=0) - assert images.shape == (60000, 32, 32) and images.dtype == np.uint8 - assert np.min(images) == 0 and np.max(images) == 255 - - with TFRecordExporter(tfrecord_dir, num_images) as tfr: - rnd = np.random.RandomState(random_seed) - for idx in range(num_images): - tfr.add_image(images[rnd.randint(images.shape[0], size=3)]) - -# ---------------------------------------------------------------------------- - - -def create_cifar10(tfrecord_dir, cifar10_dir): - print('Loading CIFAR-10 from "%s"' % cifar10_dir) - import pickle - images = [] - labels = [] - for batch in range(1, 6): - with open(os.path.join(cifar10_dir, 'data_batch_%d' % batch), 'rb') as file: - data = pickle.load(file, encoding='latin1') - images.append(data['data'].reshape(-1, 3, 32, 32)) - labels.append(data['labels']) - images = np.concatenate(images) - labels = np.concatenate(labels) - assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 - assert labels.shape == (50000,) and labels.dtype == np.int32 - assert np.min(images) == 0 and np.max(images) == 255 - assert np.min(labels) == 0 and np.max(labels) == 9 - onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) - onehot[np.arange(labels.size), labels] = 1.0 - - with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: - order = tfr.choose_shuffled_order() - for idx in range(order.size): - tfr.add_image(images[order[idx]]) - tfr.add_labels(onehot[order]) - -# ---------------------------------------------------------------------------- - - -def create_cifar100(tfrecord_dir, cifar100_dir): - print('Loading CIFAR-100 from "%s"' % cifar100_dir) - import pickle - with open(os.path.join(cifar100_dir, 'train'), 'rb') as file: - data = pickle.load(file, encoding='latin1') - images = data['data'].reshape(-1, 3, 32, 32) - labels = np.array(data['fine_labels']) - assert images.shape == (50000, 3, 32, 32) and images.dtype == np.uint8 - assert labels.shape == (50000,) and labels.dtype == np.int32 - assert np.min(images) == 0 and np.max(images) == 255 - assert np.min(labels) == 0 and np.max(labels) == 99 - onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) - onehot[np.arange(labels.size), labels] = 1.0 - - with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: - order = tfr.choose_shuffled_order() - for idx in range(order.size): - tfr.add_image(images[order[idx]]) - tfr.add_labels(onehot[order]) - -# ---------------------------------------------------------------------------- - - -def create_svhn(tfrecord_dir, svhn_dir): - print('Loading SVHN from "%s"' % svhn_dir) - import pickle - images = [] - labels = [] - for batch in range(1, 4): - with open(os.path.join(svhn_dir, 'train_%d.pkl' % batch), 'rb') as file: - data = pickle.load(file, encoding='latin1') - images.append(data[0]) - labels.append(data[1]) - images = np.concatenate(images) - labels = np.concatenate(labels) - assert images.shape == (73257, 3, 32, 32) and images.dtype == np.uint8 - assert labels.shape == (73257,) and labels.dtype == np.uint8 - assert np.min(images) == 0 and np.max(images) == 255 - assert np.min(labels) == 0 and np.max(labels) == 9 - onehot = np.zeros((labels.size, np.max(labels) + 1), dtype=np.float32) - onehot[np.arange(labels.size), labels] = 1.0 - - with TFRecordExporter(tfrecord_dir, images.shape[0]) as tfr: - order = tfr.choose_shuffled_order() - for idx in range(order.size): - tfr.add_image(images[order[idx]]) - tfr.add_labels(onehot[order]) - -# ---------------------------------------------------------------------------- - - -def create_lsun(tfrecord_dir, lmdb_dir, resolution=256, max_images=None): - print('Loading LSUN dataset from "%s"' % lmdb_dir) - import lmdb # pip install lmdb - import cv2 # pip install opencv-python - import io - with lmdb.open(lmdb_dir, readonly=True).begin(write=False) as txn: - total_images = txn.stat()['entries'] - if max_images is None: - max_images = total_images - with TFRecordExporter(tfrecord_dir, max_images) as tfr: - for idx, (key, value) in enumerate(txn.cursor()): - try: - try: - img = cv2.imdecode(np.fromstring( - value, dtype=np.uint8), 1) - if img is None: - raise IOError('cv2.imdecode failed') - img = img[:, :, ::-1] # BGR => RGB - except IOError: - img = np.asarray(PIL.Image.open(io.BytesIO(value))) - crop = np.min(img.shape[:2]) - img = img[(img.shape[0] - crop) // 2: (img.shape[0] + crop) // - 2, (img.shape[1] - crop) // 2: (img.shape[1] + crop) // 2] - img = PIL.Image.fromarray(img, 'RGB') - img = img.resize((resolution, resolution), - PIL.Image.ANTIALIAS) - img = np.asarray(img) - img = img.transpose(2, 0, 1) # HWC => CHW - tfr.add_image(img) - except: - print(sys.exc_info()[1]) - if tfr.cur_images == max_images: - break - -# ---------------------------------------------------------------------------- - - -def create_celeba(tfrecord_dir, celeba_dir, cx=89, cy=121): - print('Loading CelebA from "%s"' % celeba_dir) - glob_pattern = os.path.join(celeba_dir, 'img_align_celeba_png', '*.png') - image_filenames = sorted(glob.glob(glob_pattern)) - expected_images = 202599 - if len(image_filenames) != expected_images: - error('Expected to find %d images' % expected_images) - - with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: - order = tfr.choose_shuffled_order() - for idx in range(order.size): - img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) - assert img.shape == (218, 178, 3) - img = img[cy - 64: cy + 64, cx - 64: cx + 64] - img = img.transpose(2, 0, 1) # HWC => CHW - tfr.add_image(img) - -# ---------------------------------------------------------------------------- - - -def create_celebahq(tfrecord_dir, celeba_dir, delta_dir, num_threads=4, num_tasks=100): - print('Loading CelebA from "%s"' % celeba_dir) - expected_images = 202599 - if len(glob.glob(os.path.join(celeba_dir, 'img_celeba', '*.jpg'))) != expected_images: - error('Expected to find %d images' % expected_images) - with open(os.path.join(celeba_dir, 'Anno', 'list_landmarks_celeba.txt'), 'rt') as file: - landmarks = [[float(value) for value in line.split()[1:]] - for line in file.readlines()[2:]] - landmarks = np.float32(landmarks).reshape(-1, 5, 2) - - print('Loading CelebA-HQ deltas from "%s"' % delta_dir) - import scipy.ndimage - import hashlib - import bz2 - import zipfile - import base64 - import cryptography.hazmat.primitives.hashes - import cryptography.hazmat.backends - import cryptography.hazmat.primitives.kdf.pbkdf2 - import cryptography.fernet - expected_zips = 30 - if len(glob.glob(os.path.join(delta_dir, 'delta*.zip'))) != expected_zips: - error('Expected to find %d zips' % expected_zips) - with open(os.path.join(delta_dir, 'image_list.txt'), 'rt') as file: - lines = [line.split() for line in file] - fields = dict() - for idx, field in enumerate(lines[0]): - type = int if field.endswith('idx') else str - fields[field] = [type(line[idx]) for line in lines[1:]] - indices = np.array(fields['idx']) - - # Must use pillow version 3.1.1 for everything to work correctly. - if getattr(PIL, 'PILLOW_VERSION', '') != '3.1.1': - # conda install pillow=3.1.1 - error('create_celebahq requires pillow version 3.1.1') - - # Must use libjpeg version 8d for everything to work correctly. - img = np.array(PIL.Image.open(os.path.join( - celeba_dir, 'img_celeba', '000001.jpg'))) - md5 = hashlib.md5() - md5.update(img.tobytes()) - if md5.hexdigest() != '9cad8178d6cb0196b36f7b34bc5eb6d3': - # conda install jpeg=8d - error('create_celebahq requires libjpeg version 8d') - - def rot90(v): - return np.array([-v[1], v[0]]) - - def process_func(idx): - # Load original image. - orig_idx = fields['orig_idx'][idx] - orig_file = fields['orig_file'][idx] - orig_path = os.path.join(celeba_dir, 'img_celeba', orig_file) - img = PIL.Image.open(orig_path) - - # Choose oriented crop rectangle. - lm = landmarks[orig_idx] - eye_avg = (lm[0] + lm[1]) * 0.5 + 0.5 - mouth_avg = (lm[3] + lm[4]) * 0.5 + 0.5 - eye_to_eye = lm[1] - lm[0] - eye_to_mouth = mouth_avg - eye_avg - x = eye_to_eye - rot90(eye_to_mouth) - x /= np.hypot(*x) - x *= max(np.hypot(*eye_to_eye) * 2.0, np.hypot(*eye_to_mouth) * 1.8) - y = rot90(x) - c = eye_avg + eye_to_mouth * 0.1 - quad = np.stack([c - x - y, c - x + y, c + x + y, c + x - y]) - zoom = 1024 / (np.hypot(*x) * 2) - - # Shrink. - shrink = int(np.floor(0.5 / zoom)) - if shrink > 1: - size = (int(np.round( - float(img.size[0]) / shrink)), int(np.round(float(img.size[1]) / shrink))) - img = img.resize(size, PIL.Image.ANTIALIAS) - quad /= shrink - zoom *= shrink - - # Crop. - border = max(int(np.round(1024 * 0.1 / zoom)), 3) - crop = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int( - np.ceil(max(quad[:, 0]))), int(np.ceil(max(quad[:, 1])))) - crop = (max(crop[0] - border, 0), max(crop[1] - border, 0), - min(crop[2] + border, img.size[0]), min(crop[3] + border, img.size[1])) - if crop[2] - crop[0] < img.size[0] or crop[3] - crop[1] < img.size[1]: - img = img.crop(crop) - quad -= crop[0:2] - - # Simulate super-resolution. - superres = int(np.exp2(np.ceil(np.log2(zoom)))) - if superres > 1: - img = img.resize( - (img.size[0] * superres, img.size[1] * superres), PIL.Image.ANTIALIAS) - quad *= superres - zoom /= superres - - # Pad. - pad = (int(np.floor(min(quad[:, 0]))), int(np.floor(min(quad[:, 1]))), int( - np.ceil(max(quad[:, 0]))), int(np.ceil(max(quad[:, 1])))) - pad = (max(-pad[0] + border, 0), max(-pad[1] + border, 0), max(pad[2] - - img.size[0] + border, 0), max(pad[3] - img.size[1] + border, 0)) - if max(pad) > border - 4: - pad = np.maximum(pad, int(np.round(1024 * 0.3 / zoom))) - img = np.pad(np.float32( - img), ((pad[1], pad[3]), (pad[0], pad[2]), (0, 0)), 'reflect') - h, w, _ = img.shape - y, x, _ = np.mgrid[:h, :w, :1] - mask = 1.0 - np.minimum(np.minimum(np.float32(x) / pad[0], np.float32( - y) / pad[1]), np.minimum(np.float32(w-1-x) / pad[2], np.float32(h-1-y) / pad[3])) - blur = 1024 * 0.02 / zoom - img += (scipy.ndimage.gaussian_filter(img, - [blur, blur, 0]) - img) * np.clip(mask * 3.0 + 1.0, 0.0, 1.0) - img += (np.median(img, axis=(0, 1)) - img) * \ - np.clip(mask, 0.0, 1.0) - img = PIL.Image.fromarray( - np.uint8(np.clip(np.round(img), 0, 255)), 'RGB') - quad += pad[0:2] - - # Transform. - img = img.transform((4096, 4096), PIL.Image.QUAD, - (quad + 0.5).flatten(), PIL.Image.BILINEAR) - img = img.resize((1024, 1024), PIL.Image.ANTIALIAS) - img = np.asarray(img).transpose(2, 0, 1) - - # Verify MD5. - md5 = hashlib.md5() - md5.update(img.tobytes()) - assert md5.hexdigest() == fields['proc_md5'][idx] - - # Load delta image and original JPG. - with zipfile.ZipFile(os.path.join(delta_dir, 'deltas%05d.zip' % (idx - idx % 1000)), 'r') as zip: - delta_bytes = zip.read('delta%05d.dat' % idx) - with open(orig_path, 'rb') as file: - orig_bytes = file.read() - - # Decrypt delta image, using original JPG data as decryption key. - algorithm = cryptography.hazmat.primitives.hashes.SHA256() - backend = cryptography.hazmat.backends.default_backend() - salt = bytes(orig_file, 'ascii') - kdf = cryptography.hazmat.primitives.kdf.pbkdf2.PBKDF2HMAC( - algorithm=algorithm, length=32, salt=salt, iterations=100000, backend=backend) - key = base64.urlsafe_b64encode(kdf.derive(orig_bytes)) - delta = np.frombuffer(bz2.decompress(cryptography.fernet.Fernet( - key).decrypt(delta_bytes)), dtype=np.uint8).reshape(3, 1024, 1024) - - # Apply delta image. - img = img + delta - - # Verify MD5. - md5 = hashlib.md5() - md5.update(img.tobytes()) - assert md5.hexdigest() == fields['final_md5'][idx] - return img - - with TFRecordExporter(tfrecord_dir, indices.size) as tfr: - order = tfr.choose_shuffled_order() - with ThreadPool(num_threads) as pool: - for img in pool.process_items_concurrently(indices[order].tolist(), process_func=process_func, max_items_in_flight=num_tasks): - tfr.add_image(img) - -# ---------------------------------------------------------------------------- - - -def create_from_images(tfrecord_dir, image_dir, shuffle): - print('Loading images from "%s"' % image_dir) - image_filenames = sorted(glob.glob(os.path.join(image_dir, '*'))) - if len(image_filenames) == 0: - error('No input images found') - - img = np.asarray(PIL.Image.open(image_filenames[0])) - resolution = img.shape[0] - channels = img.shape[2] if img.ndim == 3 else 1 - if img.shape[1] != resolution: - error('Input images must have the same width and height') - if resolution != 2 ** int(np.floor(np.log2(resolution))): - error('Input image resolution must be a power-of-two') - if channels not in [1, 3]: - error('Input images must be stored as RGB or grayscale') - - with TFRecordExporter(tfrecord_dir, len(image_filenames)) as tfr: - order = tfr.choose_shuffled_order() if shuffle else np.arange(len(image_filenames)) - for idx in range(order.size): - img = np.asarray(PIL.Image.open(image_filenames[order[idx]])) - if channels == 1: - img = img[np.newaxis, :, :] # HW => CHW - else: - img = img.transpose(2, 0, 1) # HWC => CHW - tfr.add_image(img) - -# ---------------------------------------------------------------------------- - - -def create_from_hdf5(tfrecord_dir, hdf5_filename, shuffle): - print('Loading HDF5 archive from "%s"' % hdf5_filename) - import h5py # conda install h5py - with h5py.File(hdf5_filename, 'r') as hdf5_file: - hdf5_data = max([value for key, value in hdf5_file.items( - ) if key.startswith('data')], key=lambda lod: lod.shape[3]) - with TFRecordExporter(tfrecord_dir, hdf5_data.shape[0]) as tfr: - order = tfr.choose_shuffled_order( - ) if shuffle else np.arange(hdf5_data.shape[0]) - for idx in range(order.size): - tfr.add_image(hdf5_data[order[idx]]) - npy_filename = os.path.splitext(hdf5_filename)[0] + '-labels.npy' - if os.path.isfile(npy_filename): - tfr.add_labels(np.load(npy_filename)[order]) - -# ---------------------------------------------------------------------------- - - -def execute_cmdline(argv): - prog = argv[0] - parser = argparse.ArgumentParser( - prog=prog, - description='Tool for creating, extracting, and visualizing Progressive GAN datasets.', - epilog='Type "%s -h" for more information.' % prog) - - subparsers = parser.add_subparsers(dest='command') - subparsers.required = True - - def add_command(cmd, desc, example=None): - epilog = 'Example: %s %s' % ( - prog, example) if example is not None else None - return subparsers.add_parser(cmd, description=desc, help=desc, epilog=epilog) - - p = add_command('display', 'Display images in dataset.', - 'display datasets/mnist') - p.add_argument('tfrecord_dir', help='Directory containing dataset') - - p = add_command('extract', 'Extract images from dataset.', - 'extract datasets/mnist mnist-images') - p.add_argument('tfrecord_dir', help='Directory containing dataset') - p.add_argument('output_dir', - help='Directory to extract the images into') - - p = add_command('compare', 'Compare two datasets.', - 'compare datasets/mydataset datasets/mnist') - p.add_argument('tfrecord_dir_a', - help='Directory containing first dataset') - p.add_argument('tfrecord_dir_b', - help='Directory containing second dataset') - p.add_argument('--ignore_labels', - help='Ignore labels (default: 0)', type=int, default=0) - - p = add_command('create_mnist', 'Create dataset for MNIST.', - 'create_mnist datasets/mnist ~/downloads/mnist') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('mnist_dir', help='Directory containing MNIST') - - p = add_command('create_mnistrgb', 'Create dataset for MNIST-RGB.', - 'create_mnistrgb datasets/mnistrgb ~/downloads/mnist') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('mnist_dir', help='Directory containing MNIST') - p.add_argument('--num_images', - help='Number of composite images to create (default: 1000000)', type=int, default=1000000) - p.add_argument('--random_seed', - help='Random seed (default: 123)', type=int, default=123) - - p = add_command('create_cifar10', 'Create dataset for CIFAR-10.', - 'create_cifar10 datasets/cifar10 ~/downloads/cifar10') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('cifar10_dir', help='Directory containing CIFAR-10') - - p = add_command('create_cifar100', 'Create dataset for CIFAR-100.', - 'create_cifar100 datasets/cifar100 ~/downloads/cifar100') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('cifar100_dir', help='Directory containing CIFAR-100') - - p = add_command('create_svhn', 'Create dataset for SVHN.', - 'create_svhn datasets/svhn ~/downloads/svhn') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('svhn_dir', help='Directory containing SVHN') - - p = add_command('create_lsun', 'Create dataset for single LSUN category.', - 'create_lsun datasets/lsun-car-100k ~/downloads/lsun/car_lmdb --resolution 256 --max_images 100000') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument( - 'lmdb_dir', help='Directory containing LMDB database') - p.add_argument('--resolution', - help='Output resolution (default: 256)', type=int, default=256) - p.add_argument('--max_images', - help='Maximum number of images (default: none)', type=int, default=None) - - p = add_command('create_celeba', 'Create dataset for CelebA.', - 'create_celeba datasets/celeba ~/downloads/celeba') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('celeba_dir', help='Directory containing CelebA') - p.add_argument( - '--cx', help='Center X coordinate (default: 89)', type=int, default=89) - p.add_argument( - '--cy', help='Center Y coordinate (default: 121)', type=int, default=121) - - p = add_command('create_celebahq', 'Create dataset for CelebA-HQ.', - 'create_celebahq datasets/celebahq ~/downloads/celeba ~/downloads/celeba-hq-deltas') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('celeba_dir', help='Directory containing CelebA') - p.add_argument('delta_dir', - help='Directory containing CelebA-HQ deltas') - p.add_argument('--num_threads', - help='Number of concurrent threads (default: 4)', type=int, default=4) - p.add_argument('--num_tasks', - help='Number of concurrent processing tasks (default: 100)', type=int, default=100) - - p = add_command('create_from_images', 'Create dataset from a directory full of images.', - 'create_from_images datasets/mydataset myimagedir') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('image_dir', help='Directory containing the images') - p.add_argument('--shuffle', - help='Randomize image order (default: 1)', type=int, default=1) - - p = add_command('create_from_hdf5', 'Create dataset from legacy HDF5 archive.', - 'create_from_hdf5 datasets/celebahq ~/downloads/celeba-hq-1024x1024.h5') - p.add_argument('tfrecord_dir', - help='New dataset directory to be created') - p.add_argument('hdf5_filename', - help='HDF5 archive containing the images') - p.add_argument('--shuffle', - help='Randomize image order (default: 1)', type=int, default=1) - - args = parser.parse_args(argv[1:] if len(argv) > 1 else ['-h']) - func = globals()[args.command] - del args.command - func(**vars(args)) - -# ---------------------------------------------------------------------------- - - -if __name__ == "__main__": - execute_cmdline(sys.argv) - -# ---------------------------------------------------------------------------- -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -import pickle -import inspect -import numpy as np - -import tfutil -import networks - -# ---------------------------------------------------------------------------- -# Custom unpickler that is able to load network pickles produced by -# the old Theano implementation. - - -class LegacyUnpickler(pickle.Unpickler): - def __init__(self, *args, **kwargs): - super().__init__(*args, **kwargs) - - def find_class(self, module, name): - if module == 'network' and name == 'Network': - return tfutil.Network - return super().find_class(module, name) - -# ---------------------------------------------------------------------------- -# Import handler for tfutil.Network that silently converts networks produced -# by the old Theano implementation to a suitable format. - - -theano_gan_remap = { - 'G_paper': 'G_paper', - 'G_progressive_8': 'G_paper', - 'D_paper': 'D_paper', - 'D_progressive_8': 'D_paper'} - - -def patch_theano_gan(state): - if 'version' in state or state['build_func_spec']['func'] not in theano_gan_remap: - return state - - spec = dict(state['build_func_spec']) - func = spec.pop('func') - resolution = spec.get('resolution', 32) - resolution_log2 = int(np.log2(resolution)) - use_wscale = spec.get('use_wscale', True) - - assert spec.pop('label_size', 0) == 0 - assert spec.pop('use_batchnorm', False) == False - assert spec.pop('tanh_at_end', None) is None - assert spec.pop('mbstat_func', 'Tstdeps') == 'Tstdeps' - assert spec.pop('mbstat_avg', 'all') == 'all' - assert spec.pop('mbdisc_kernels', None) is None - spec.pop('use_gdrop', True) # doesn't make a difference - assert spec.pop('use_layernorm', False) == False - spec['fused_scale'] = False - spec['mbstd_group_size'] = 16 - - vars = [] - param_iter = iter(state['param_values']) - relu = np.sqrt(2) - linear = 1.0 - def flatten2(w): return w.reshape(w.shape[0], -1) - def he_std(gain, w): return gain / np.sqrt(np.prod(w.shape[:-1])) - def wscale(gain, w): return w * next(param_iter) / \ - he_std(gain, w) if use_wscale else w - def layer(name, gain, w): return [ - (name + '/weight', wscale(gain, w)), (name + '/bias', next(param_iter))] - - if func.startswith('G'): - vars += layer('4x4/Dense', relu/4, - flatten2(next(param_iter).transpose(1, 0, 2, 3))) - vars += layer('4x4/Conv', relu, - next(param_iter).transpose(2, 3, 1, 0)[::-1, ::-1]) - for res in range(3, resolution_log2 + 1): - vars += layer('%dx%d/Conv0' % (2**res, 2**res), relu, - next(param_iter).transpose(2, 3, 1, 0)[::-1, ::-1]) - vars += layer('%dx%d/Conv1' % (2**res, 2**res), relu, - next(param_iter).transpose(2, 3, 1, 0)[::-1, ::-1]) - for lod in range(0, resolution_log2 - 1): - vars += layer('ToRGB_lod%d' % lod, linear, - next(param_iter)[np.newaxis, np.newaxis]) - - if func.startswith('D'): - vars += layer('FromRGB_lod0', relu, next(param_iter) - [np.newaxis, np.newaxis]) - for res in range(resolution_log2, 2, -1): - vars += layer('%dx%d/Conv0' % (2**res, 2**res), relu, - next(param_iter).transpose(2, 3, 1, 0)[::-1, ::-1]) - vars += layer('%dx%d/Conv1' % (2**res, 2**res), relu, - next(param_iter).transpose(2, 3, 1, 0)[::-1, ::-1]) - vars += layer('FromRGB_lod%d' % (resolution_log2 - (res - 1)), - relu, next(param_iter)[np.newaxis, np.newaxis]) - vars += layer('4x4/Conv', relu, - next(param_iter).transpose(2, 3, 1, 0)[::-1, ::-1]) - vars += layer('4x4/Dense0', relu, - flatten2(next(param_iter)[:, :, ::-1, ::-1]).transpose()) - vars += layer('4x4/Dense1', linear, next(param_iter)) - - vars += [('lod', state['toplevel_params']['cur_lod'])] - - return { - 'version': 2, - 'name': func, - 'build_module_src': inspect.getsource(networks), - 'build_func_name': theano_gan_remap[func], - 'static_kwargs': spec, - 'variables': vars} - - -tfutil.network_import_handlers.append(patch_theano_gan) - -# ---------------------------------------------------------------------------- -# Import handler for tfutil.Network that ignores unsupported/deprecated -# networks produced by older versions of the code. - - -def ignore_unknown_theano_network(state): - if 'version' in state: - return state - - print('Ignoring unknown Theano network:', state['build_func_spec']['func']) - return { - 'version': 2, - 'name': 'Dummy', - 'build_module_src': 'def dummy(input, **kwargs): input.set_shape([None, 1]); return input', - 'build_func_name': 'dummy', - 'static_kwargs': {}, - 'variables': []} - - -tfutil.network_import_handlers.append(ignore_unknown_theano_network) - -# ---------------------------------------------------------------------------- -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -import numpy as np -import tensorflow as tf - -import tfutil - -# ---------------------------------------------------------------------------- -# Convenience func that casts all of its arguments to tf.float32. - - -def fp32(*values): - if len(values) == 1 and isinstance(values[0], tuple): - values = values[0] - values = tuple(tf.cast(v, tf.float32) for v in values) - return values if len(values) >= 2 else values[0] - -# ---------------------------------------------------------------------------- -# Generator loss function used in the paper (WGAN + AC-GAN). - - -def G_wgan_acgan(G, D, opt, training_set, minibatch_size, - cond_weight=1.0): # Weight of the conditioning term. - - latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) - labels = training_set.get_random_labels_tf(minibatch_size) - fake_images_out = G.get_output_for(latents, labels, is_training=True) - fake_scores_out, fake_labels_out = fp32( - D.get_output_for(fake_images_out, is_training=True)) - loss = -fake_scores_out - - if D.output_shapes[1][1] > 0: - with tf.name_scope('LabelPenalty'): - label_penalty_fakes = tf.nn.softmax_cross_entropy_with_logits_v2( - labels=labels, logits=fake_labels_out) - loss += label_penalty_fakes * cond_weight - return loss - -# ---------------------------------------------------------------------------- -# Discriminator loss function used in the paper (WGAN-GP + AC-GAN). - - -def D_wgangp_acgan(G, D, opt, training_set, minibatch_size, reals, labels, - # Weight for the gradient penalty term. - wgan_lambda=10.0, - # Weight for the epsilon term, \epsilon_{drift}. - wgan_epsilon=0.001, - # Target value for gradient magnitudes. - wgan_target=1.0, - cond_weight=1.0): # Weight of the conditioning terms. - - latents = tf.random_normal([minibatch_size] + G.input_shapes[0][1:]) - fake_images_out = G.get_output_for(latents, labels, is_training=True) - real_scores_out, real_labels_out = fp32( - D.get_output_for(reals, is_training=True)) - fake_scores_out, fake_labels_out = fp32( - D.get_output_for(fake_images_out, is_training=True)) - real_scores_out = tfutil.autosummary('Loss/real_scores', real_scores_out) - fake_scores_out = tfutil.autosummary('Loss/fake_scores', fake_scores_out) - loss = fake_scores_out - real_scores_out - - with tf.name_scope('GradientPenalty'): - mixing_factors = tf.random_uniform( - [minibatch_size, 1, 1, 1], 0.0, 1.0, dtype=fake_images_out.dtype) - mixed_images_out = tfutil.lerp( - tf.cast(reals, fake_images_out.dtype), fake_images_out, mixing_factors) - mixed_scores_out, mixed_labels_out = fp32( - D.get_output_for(mixed_images_out, is_training=True)) - mixed_scores_out = tfutil.autosummary( - 'Loss/mixed_scores', mixed_scores_out) - mixed_loss = opt.apply_loss_scaling(tf.reduce_sum(mixed_scores_out)) - mixed_grads = opt.undo_loss_scaling( - fp32(tf.gradients(mixed_loss, [mixed_images_out])[0])) - mixed_norms = tf.sqrt(tf.reduce_sum( - tf.square(mixed_grads), axis=[1, 2, 3])) - mixed_norms = tfutil.autosummary('Loss/mixed_norms', mixed_norms) - gradient_penalty = tf.square(mixed_norms - wgan_target) - loss += gradient_penalty * (wgan_lambda / (wgan_target**2)) - - with tf.name_scope('EpsilonPenalty'): - epsilon_penalty = tfutil.autosummary( - 'Loss/epsilon_penalty', tf.square(real_scores_out)) - loss += epsilon_penalty * wgan_epsilon - - if D.output_shapes[1][1] > 0: - with tf.name_scope('LabelPenalty'): - label_penalty_reals = tf.nn.softmax_cross_entropy_with_logits_v2( - labels=labels, logits=real_labels_out) - label_penalty_fakes = tf.nn.softmax_cross_entropy_with_logits_v2( - labels=labels, logits=fake_labels_out) - label_penalty_reals = tfutil.autosummary( - 'Loss/label_penalty_reals', label_penalty_reals) - label_penalty_fakes = tfutil.autosummary( - 'Loss/label_penalty_fakes', label_penalty_fakes) - loss += (label_penalty_reals + label_penalty_fakes) * cond_weight - return loss - -# ---------------------------------------------------------------------------- -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -import os -import sys -import glob -import datetime -import pickle -import re -import numpy as np -from collections import OrderedDict -import scipy.ndimage -import PIL.Image - -import config -import dataset -import legacy - -# ---------------------------------------------------------------------------- -# Convenience wrappers for pickle that are able to load data produced by -# older versions of the code. - - -def load_pkl(filename): - with open(filename, 'rb') as file: - return legacy.LegacyUnpickler(file, encoding='latin1').load() - - -def save_pkl(obj, filename): - with open(filename, 'wb') as file: - pickle.dump(obj, file, protocol=pickle.HIGHEST_PROTOCOL) - -# ---------------------------------------------------------------------------- -# Image utils. - - -def adjust_dynamic_range(data, drange_in, drange_out): - if drange_in != drange_out: - scale = (np.float32(drange_out[1]) - np.float32(drange_out[0])) / ( - np.float32(drange_in[1]) - np.float32(drange_in[0])) - bias = (np.float32(drange_out[0]) - np.float32(drange_in[0]) * scale) - data = data * scale + bias - return data - - -def create_image_grid(images, grid_size=None): - assert images.ndim == 3 or images.ndim == 4 - num, img_w, img_h = images.shape[0], images.shape[-1], images.shape[-2] - - if grid_size is not None: - grid_w, grid_h = tuple(grid_size) - else: - grid_w = max(int(np.ceil(np.sqrt(num))), 1) - grid_h = max((num - 1) // grid_w + 1, 1) - - grid = np.zeros( - list(images.shape[1:-2]) + [grid_h * img_h, grid_w * img_w], dtype=images.dtype) - for idx in range(num): - x = (idx % grid_w) * img_w - y = (idx // grid_w) * img_h - grid[..., y: y + img_h, x: x + img_w] = images[idx] - return grid - - -def convert_to_pil_image(image, drange=[0, 1]): - assert image.ndim == 2 or image.ndim == 3 - if image.ndim == 3: - if image.shape[0] == 1: - image = image[0] # grayscale CHW => HW - else: - image = image.transpose(1, 2, 0) # CHW -> HWC - - image = adjust_dynamic_range(image, drange, [0, 255]) - image = np.rint(image).clip(0, 255).astype(np.uint8) - format = 'RGB' if image.ndim == 3 else 'L' - return PIL.Image.fromarray(image, format) - - -def save_image(image, filename, drange=[0, 1], quality=95): - img = convert_to_pil_image(image, drange) - if '.jpg' in filename: - img.save(filename, "JPEG", quality=quality, optimize=True) - else: - img.save(filename) - - -def save_image_grid(images, filename, drange=[0, 1], grid_size=None): - convert_to_pil_image(create_image_grid( - images, grid_size), drange).save(filename) - -# ---------------------------------------------------------------------------- -# Logging of stdout and stderr to a file. - - -class OutputLogger(object): - def __init__(self): - self.file = None - self.buffer = '' - - def set_log_file(self, filename, mode='wt'): - assert self.file is None - self.file = open(filename, mode) - if self.buffer is not None: - self.file.write(self.buffer) - self.buffer = None - - def write(self, data): - if self.file is not None: - self.file.write(data) - if self.buffer is not None: - self.buffer += data - - def flush(self): - if self.file is not None: - self.file.flush() - - -class TeeOutputStream(object): - def __init__(self, child_streams, autoflush=False): - self.child_streams = child_streams - self.autoflush = autoflush - - def write(self, data): - for stream in self.child_streams: - stream.write(data) - if self.autoflush: - self.flush() - - def flush(self): - for stream in self.child_streams: - stream.flush() - - -output_logger = None - - -def init_output_logging(): - global output_logger - if output_logger is None: - output_logger = OutputLogger() - sys.stdout = TeeOutputStream( - [sys.stdout, output_logger], autoflush=True) - sys.stderr = TeeOutputStream( - [sys.stderr, output_logger], autoflush=True) - - -def set_output_log_file(filename, mode='wt'): - if output_logger is not None: - output_logger.set_log_file(filename, mode) - -# ---------------------------------------------------------------------------- -# Reporting results. - - -def create_result_subdir(result_dir, desc): - - # Select run ID and create subdir. - while True: - run_id = 0 - for fname in glob.glob(os.path.join(result_dir, '*')): - try: - fbase = os.path.basename(fname) - ford = int(fbase[:fbase.find('-')]) - run_id = max(run_id, ford + 1) - except ValueError: - pass - - result_subdir = os.path.join(result_dir, '%03d-%s' % (run_id, desc)) - try: - os.makedirs(result_subdir) - break - except OSError: - if os.path.isdir(result_subdir): - continue - raise - - print("Saving results to", result_subdir) - set_output_log_file(os.path.join(result_subdir, 'log.txt')) - - # Export config. - try: - with open(os.path.join(result_subdir, 'config.txt'), 'wt') as fout: - for k, v in sorted(config.__dict__.items()): - if not k.startswith('_'): - fout.write("%s = %s\n" % (k, str(v))) - except: - pass - - return result_subdir - - -def format_time(seconds): - s = int(np.rint(seconds)) - if s < 60: - return '%ds' % (s) - elif s < 60*60: - return '%dm %02ds' % (s // 60, s % 60) - elif s < 24*60*60: - return '%dh %02dm %02ds' % (s // (60*60), (s // 60) % 60, s % 60) - else: - return '%dd %02dh %02dm' % (s // (24*60*60), (s // (60*60)) % 24, (s // 60) % 60) - -# ---------------------------------------------------------------------------- -# Locating results. - - -def locate_result_subdir(run_id_or_result_subdir): - if isinstance(run_id_or_result_subdir, str) and os.path.isdir(run_id_or_result_subdir): - return run_id_or_result_subdir - - searchdirs = [] - searchdirs += [''] - searchdirs += ['results'] - searchdirs += ['networks'] - - for searchdir in searchdirs: - dir = config.result_dir if searchdir == '' else os.path.join( - config.result_dir, searchdir) - dir = os.path.join(dir, str(run_id_or_result_subdir)) - if os.path.isdir(dir): - return dir - prefix = '%03d' % run_id_or_result_subdir if isinstance( - run_id_or_result_subdir, int) else str(run_id_or_result_subdir) - dirs = sorted(glob.glob(os.path.join( - config.result_dir, searchdir, prefix + '-*'))) - dirs = [dir for dir in dirs if os.path.isdir(dir)] - if len(dirs) == 1: - return dirs[0] - raise IOError('Cannot locate result subdir for run', - run_id_or_result_subdir) - - -def list_network_pkls(run_id_or_result_subdir, include_final=True): - result_subdir = locate_result_subdir(run_id_or_result_subdir) - pkls = sorted(glob.glob(os.path.join(result_subdir, 'network-*.pkl'))) - if len(pkls) >= 1 and os.path.basename(pkls[0]) == 'network-final.pkl': - if include_final: - pkls.append(pkls[0]) - del pkls[0] - return pkls - - -def locate_network_pkl(run_id_or_result_subdir_or_network_pkl, snapshot=None): - if isinstance(run_id_or_result_subdir_or_network_pkl, str) and os.path.isfile(run_id_or_result_subdir_or_network_pkl): - return run_id_or_result_subdir_or_network_pkl - - pkls = list_network_pkls(run_id_or_result_subdir_or_network_pkl) - if len(pkls) >= 1 and snapshot is None: - return pkls[-1] - for pkl in pkls: - try: - name = os.path.splitext(os.path.basename(pkl))[0] - number = int(name.split('-')[-1]) - if number == snapshot: - return pkl - except ValueError: - pass - except IndexError: - pass - raise IOError('Cannot locate network pkl for snapshot', snapshot) - - -def get_id_string_for_network_pkl(network_pkl): - p = network_pkl.replace('.pkl', '').replace('\\', '/').split('/') - return '-'.join(p[max(len(p) - 2, 0):]) - -# ---------------------------------------------------------------------------- -# Loading and using trained networks. - - -def load_network_pkl(run_id_or_result_subdir_or_network_pkl, snapshot=None): - return load_pkl(locate_network_pkl(run_id_or_result_subdir_or_network_pkl, snapshot)) - - -def random_latents(num_latents, G, random_state=None): - if random_state is not None: - return random_state.randn(num_latents, *G.input_shape[1:]).astype(np.float32) - else: - return np.random.randn(num_latents, *G.input_shape[1:]).astype(np.float32) - - -# => dataset_obj, mirror_augment -def load_dataset_for_previous_run(run_id, **kwargs): - result_subdir = locate_result_subdir(run_id) - - # Parse config.txt. - parsed_cfg = dict() - with open(os.path.join(result_subdir, 'config.txt'), 'rt') as f: - for line in f: - if line.startswith('dataset =') or line.startswith('train ='): - exec(line, parsed_cfg, parsed_cfg) - dataset_cfg = parsed_cfg.get('dataset', dict()) - train_cfg = parsed_cfg.get('train', dict()) - mirror_augment = train_cfg.get('mirror_augment', False) - - # Handle legacy options. - if 'h5_path' in dataset_cfg: - dataset_cfg['tfrecord_dir'] = dataset_cfg.pop( - 'h5_path').replace('.h5', '') - if 'mirror_augment' in dataset_cfg: - mirror_augment = dataset_cfg.pop('mirror_augment') - if 'max_labels' in dataset_cfg: - v = dataset_cfg.pop('max_labels') - if v is None: - v = 0 - if v == 'all': - v = 'full' - dataset_cfg['max_label_size'] = v - if 'max_images' in dataset_cfg: - dataset_cfg.pop('max_images') - - # Handle legacy dataset names. - v = dataset_cfg['tfrecord_dir'] - v = v.replace('-32x32', '').replace('-32', '') - v = v.replace('-128x128', '').replace('-128', '') - v = v.replace('-256x256', '').replace('-256', '') - v = v.replace('-1024x1024', '').replace('-1024', '') - v = v.replace('celeba-hq', 'celebahq') - v = v.replace('cifar-10', 'cifar10') - v = v.replace('cifar-100', 'cifar100') - v = v.replace('mnist-rgb', 'mnistrgb') - v = re.sub('lsun-100k-([^-]*)', 'lsun-\\1-100k', v) - v = re.sub('lsun-full-([^-]*)', 'lsun-\\1-full', v) - dataset_cfg['tfrecord_dir'] = v - - # Load dataset. - dataset_cfg.update(kwargs) - dataset_obj = dataset.load_dataset(data_dir=config.data_dir, **dataset_cfg) - return dataset_obj, mirror_augment - - -def apply_mirror_augment(minibatch): - mask = np.random.rand(minibatch.shape[0]) < 0.5 - minibatch = np.array(minibatch) - minibatch[mask] = minibatch[mask, :, :, ::-1] - return minibatch - -# ---------------------------------------------------------------------------- -# Text labels. - - -_text_label_cache = OrderedDict() - - -def draw_text_label(img, text, x, y, alignx=0.5, aligny=0.5, color=255, opacity=1.0, glow_opacity=1.0, **kwargs): - color = np.array(color).flatten().astype(np.float32) - assert img.ndim == 3 and img.shape[2] == color.size or color.size == 1 - alpha, glow = setup_text_label(text, **kwargs) - xx, yy = int(np.rint(x - alpha.shape[1] * alignx) - ), int(np.rint(y - alpha.shape[0] * aligny)) - xb, yb = max(-xx, 0), max(-yy, 0) - xe, ye = min(alpha.shape[1], img.shape[1] - - xx), min(alpha.shape[0], img.shape[0] - yy) - img = np.array(img) - slice = img[yy+yb: yy+ye, xx+xb: xx+xe, :] - slice[:] = slice * (1.0 - (1.0 - (1.0 - alpha[yb:ye, xb:xe]) * - (1.0 - glow[yb:ye, xb:xe] * glow_opacity)) * opacity)[:, :, np.newaxis] - slice[:] = slice + alpha[yb:ye, xb:xe, np.newaxis] * \ - (color * opacity)[np.newaxis, np.newaxis, :] - return img - - -def setup_text_label(text, font='Calibri', fontsize=32, padding=6, glow_size=2.0, glow_coef=3.0, glow_exp=2.0, cache_size=100): # => (alpha, glow) - # Lookup from cache. - key = (text, font, fontsize, padding, glow_size, glow_coef, glow_exp) - if key in _text_label_cache: - value = _text_label_cache[key] - del _text_label_cache[key] # LRU policy - _text_label_cache[key] = value - return value - - # Limit cache size. - while len(_text_label_cache) >= cache_size: - _text_label_cache.popitem(last=False) - - # Render text. - import moviepy.editor # pip install moviepy - alpha = moviepy.editor.TextClip( - text, font=font, fontsize=fontsize).mask.make_frame(0) - alpha = np.pad(alpha, padding, mode='constant', constant_values=0.0) - glow = scipy.ndimage.gaussian_filter(alpha, glow_size) - glow = 1.0 - np.maximum(1.0 - glow * glow_coef, 0.0) ** glow_exp - - # Add to cache. - value = (alpha, glow) - _text_label_cache[key] = value - return value - -# ---------------------------------------------------------------------------- -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -import numpy as np -import tensorflow as tf - -# NOTE: Do not import any application-specific modules here! - -# ---------------------------------------------------------------------------- - - -def lerp(a, b, t): return a + (b - a) * t - - -def lerp_clip(a, b, t): return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) - - -def cset(cur_lambda, new_cond, new_lambda): return lambda: tf.cond( - new_cond, new_lambda, cur_lambda) - -# ---------------------------------------------------------------------------- -# Get/create weight tensor for a convolutional or fully-connected layer. - - -def get_weight(shape, gain=np.sqrt(2), use_wscale=False, fan_in=None): - if fan_in is None: - fan_in = np.prod(shape[:-1]) - std = gain / np.sqrt(fan_in) # He init - if use_wscale: - wscale = tf.constant(np.float32(std), name='wscale') - return tf.get_variable('weight', shape=shape, initializer=tf.initializers.random_normal()) * wscale - else: - return tf.get_variable('weight', shape=shape, initializer=tf.initializers.random_normal(0, std)) - -# ---------------------------------------------------------------------------- -# Fully-connected layer. - - -def dense(x, fmaps, gain=np.sqrt(2), use_wscale=False): - if len(x.shape) > 2: - x = tf.reshape(x, [-1, np.prod([d.value for d in x.shape[1:]])]) - w = get_weight([x.shape[1].value, fmaps], gain=gain, use_wscale=use_wscale) - w = tf.cast(w, x.dtype) - return tf.matmul(x, w) - -# ---------------------------------------------------------------------------- -# Convolutional layer. - - -def conv2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False): - assert kernel >= 1 and kernel % 2 == 1 - w = get_weight([kernel, kernel, x.shape[1].value, fmaps], - gain=gain, use_wscale=use_wscale) - w = tf.cast(w, x.dtype) - return tf.nn.conv2d(x, w, strides=[1, 1, 1, 1], padding='SAME', data_format='NCHW') - -# ---------------------------------------------------------------------------- -# Apply bias to the given activation tensor. - - -def apply_bias(x): - b = tf.get_variable( - 'bias', shape=[x.shape[1]], initializer=tf.initializers.zeros()) - b = tf.cast(b, x.dtype) - if len(x.shape) == 2: - return x + b - else: - return x + tf.reshape(b, [1, -1, 1, 1]) - -# ---------------------------------------------------------------------------- -# Leaky ReLU activation. Same as tf.nn.leaky_relu, but supports FP16. - - -def leaky_relu(x, alpha=0.2): - with tf.name_scope('LeakyRelu'): - alpha = tf.constant(alpha, dtype=x.dtype, name='alpha') - return tf.maximum(x * alpha, x) - -# ---------------------------------------------------------------------------- -# Nearest-neighbor upscaling layer. - - -def upscale2d(x, factor=2): - assert isinstance(factor, int) and factor >= 1 - if factor == 1: - return x - with tf.variable_scope('Upscale2D'): - s = x.shape - x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) - x = tf.tile(x, [1, 1, 1, factor, 1, factor]) - x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) - return x - -# ---------------------------------------------------------------------------- -# Fused upscale2d + conv2d. -# Faster and uses less memory than performing the operations separately. - - -def upscale2d_conv2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False): - assert kernel >= 1 and kernel % 2 == 1 - w = get_weight([kernel, kernel, fmaps, x.shape[1].value], gain=gain, - use_wscale=use_wscale, fan_in=(kernel**2)*x.shape[1].value) - w = tf.pad(w, [[1, 1], [1, 1], [0, 0], [0, 0]], mode='CONSTANT') - w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) - w = tf.cast(w, x.dtype) - os = [tf.shape(x)[0], fmaps, x.shape[2] * 2, x.shape[3] * 2] - return tf.nn.conv2d_transpose(x, w, os, strides=[1, 1, 2, 2], padding='SAME', data_format='NCHW') - -# ---------------------------------------------------------------------------- -# Box filter downscaling layer. - - -def downscale2d(x, factor=2): - assert isinstance(factor, int) and factor >= 1 - if factor == 1: - return x - with tf.variable_scope('Downscale2D'): - ksize = [1, 1, factor, factor] - # NOTE: requires tf_config['graph_options.place_pruned_graph'] = True - return tf.nn.avg_pool(x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') - -# ---------------------------------------------------------------------------- -# Fused conv2d + downscale2d. -# Faster and uses less memory than performing the operations separately. - - -def conv2d_downscale2d(x, fmaps, kernel, gain=np.sqrt(2), use_wscale=False): - assert kernel >= 1 and kernel % 2 == 1 - w = get_weight([kernel, kernel, x.shape[1].value, fmaps], - gain=gain, use_wscale=use_wscale) - w = tf.pad(w, [[1, 1], [1, 1], [0, 0], [0, 0]], mode='CONSTANT') - w = tf.add_n([w[1:, 1:], w[:-1, 1:], w[1:, :-1], w[:-1, :-1]]) * 0.25 - w = tf.cast(w, x.dtype) - return tf.nn.conv2d(x, w, strides=[1, 1, 2, 2], padding='SAME', data_format='NCHW') - -# ---------------------------------------------------------------------------- -# Pixelwise feature vector normalization. - - -def pixel_norm(x, epsilon=1e-8): - with tf.variable_scope('PixelNorm'): - return x * tf.rsqrt(tf.reduce_mean(tf.square(x), axis=1, keepdims=True) + epsilon) - -# ---------------------------------------------------------------------------- -# Minibatch standard deviation. - - -def minibatch_stddev_layer(x, group_size=4): - with tf.variable_scope('MinibatchStddev'): - # Minibatch must be divisible by (or smaller than) group_size. - group_size = tf.minimum(group_size, tf.shape(x)[0]) - # [NCHW] Input shape. - s = x.shape - # [GMCHW] Split minibatch into M groups of size G. - y = tf.reshape(x, [group_size, -1, s[1], s[2], s[3]]) - # [GMCHW] Cast to FP32. - y = tf.cast(y, tf.float32) - # [GMCHW] Subtract mean over group. - y -= tf.reduce_mean(y, axis=0, keepdims=True) - # [MCHW] Calc variance over group. - y = tf.reduce_mean(tf.square(y), axis=0) - # [MCHW] Calc stddev over group. - y = tf.sqrt(y + 1e-8) - # [M111] Take average over fmaps and pixels. - y = tf.reduce_mean(y, axis=[1, 2, 3], keepdims=True) - # [M111] Cast back to original data type. - y = tf.cast(y, x.dtype) - # [N1HW] Replicate over group and pixels. - y = tf.tile(y, [group_size, 1, s[2], s[3]]) - # [NCHW] Append as new fmap. - return tf.concat([x, y], axis=1) - -# ---------------------------------------------------------------------------- -# Generator network used in the paper. - - -def G_paper( - # First input: Latent vectors [minibatch, latent_size]. - latents_in, - # Second input: Labels [minibatch, label_size]. - labels_in, - # Number of output color channels. Overridden based on dataset. - num_channels=1, - # Output resolution. Overridden based on dataset. - resolution=32, - # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. - label_size=0, - # Overall multiplier for the number of feature maps. - fmap_base=8192, - # log2 feature map reduction when doubling the resolution. - fmap_decay=1.0, - fmap_max=512, # Maximum number of feature maps in any layer. - # Dimensionality of the latent vectors. None = min(fmap_base, fmap_max). - latent_size=None, - # Normalize latent vectors before feeding them to the network? - normalize_latents=True, - use_wscale=True, # Enable equalized learning rate? - use_pixelnorm=True, # Enable pixelwise feature vector normalization? - # Constant epsilon for pixelwise feature vector normalization. - pixelnorm_epsilon=1e-8, - use_leakyrelu=True, # True = leaky ReLU, False = ReLU. - dtype='float32', # Data type to use for activations and outputs. - # True = use fused upscale2d + conv2d, False = separate upscale2d layers. - fused_scale=True, - # 'linear' = human-readable, 'recursive' = efficient, None = select automatically. - structure=None, - # True = template graph constructed by the Network class, False = actual evaluation. - is_template_graph=False, - **kwargs): # Ignore unrecognized keyword args. - - resolution_log2 = int(np.log2(resolution)) - assert resolution == 2**resolution_log2 and resolution >= 4 - def nf(stage): return min( - int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) - def PN(x): return pixel_norm( - x, epsilon=pixelnorm_epsilon) if use_pixelnorm else x - if latent_size is None: - latent_size = nf(0) - if structure is None: - structure = 'linear' if is_template_graph else 'recursive' - act = leaky_relu if use_leakyrelu else tf.nn.relu - - latents_in.set_shape([None, latent_size]) - labels_in.set_shape([None, label_size]) - combo_in = tf.cast(tf.concat([latents_in, labels_in], axis=1), dtype) - lod_in = tf.cast(tf.get_variable( - 'lod', initializer=np.float32(0.0), trainable=False), dtype) - - # Building blocks. - def block(x, res): # res = 2..resolution_log2 - with tf.variable_scope('%dx%d' % (2**res, 2**res)): - if res == 2: # 4x4 - if normalize_latents: - x = pixel_norm(x, epsilon=pixelnorm_epsilon) - with tf.variable_scope('Dense'): - # override gain to match the original Theano implementation - x = dense(x, fmaps=nf(res-1)*16, - gain=np.sqrt(2)/4, use_wscale=use_wscale) - x = tf.reshape(x, [-1, nf(res-1), 4, 4]) - x = PN(act(apply_bias(x))) - with tf.variable_scope('Conv'): - x = PN( - act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) - else: # 8x8 and up - if fused_scale: - with tf.variable_scope('Conv0_up'): - x = PN(act(apply_bias(upscale2d_conv2d( - x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) - else: - x = upscale2d(x) - with tf.variable_scope('Conv0'): - x = PN( - act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) - with tf.variable_scope('Conv1'): - x = PN( - act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=3, use_wscale=use_wscale)))) - return x - - def torgb(x, res): # res = 2..resolution_log2 - lod = resolution_log2 - res - with tf.variable_scope('ToRGB_lod%d' % lod): - return apply_bias(conv2d(x, fmaps=num_channels, kernel=1, gain=1, use_wscale=use_wscale)) - - # Linear structure: simple but inefficient. - if structure == 'linear': - x = block(combo_in, 2) - images_out = torgb(x, 2) - for res in range(3, resolution_log2 + 1): - lod = resolution_log2 - res - x = block(x, res) - img = torgb(x, res) - images_out = upscale2d(images_out) - with tf.variable_scope('Grow_lod%d' % lod): - images_out = lerp_clip(img, images_out, lod_in - lod) - - # Recursive structure: complex but efficient. - if structure == 'recursive': - def grow(x, res, lod): - y = block(x, res) - def img(): return upscale2d(torgb(y, res), 2**lod) - if res > 2: - img = cset(img, (lod_in > lod), lambda: upscale2d( - lerp(torgb(y, res), upscale2d(torgb(x, res - 1)), lod_in - lod), 2**lod)) - if lod > 0: - img = cset(img, (lod_in < lod), - lambda: grow(y, res + 1, lod - 1)) - return img() - images_out = grow(combo_in, 2, resolution_log2 - 2) - - assert images_out.dtype == tf.as_dtype(dtype) - images_out = tf.identity(images_out, name='images_out') - return images_out - -# ---------------------------------------------------------------------------- -# Discriminator network used in the paper. - - -def D_paper( - # Input: Images [minibatch, channel, height, width]. - images_in, - # Number of input color channels. Overridden based on dataset. - num_channels=1, - # Input resolution. Overridden based on dataset. - resolution=32, - # Dimensionality of the labels, 0 if no labels. Overridden based on dataset. - label_size=0, - # Overall multiplier for the number of feature maps. - fmap_base=8192, - # log2 feature map reduction when doubling the resolution. - fmap_decay=1.0, - fmap_max=512, # Maximum number of feature maps in any layer. - use_wscale=True, # Enable equalized learning rate? - # Group size for the minibatch standard deviation layer, 0 = disable. - mbstd_group_size=4, - dtype='float32', # Data type to use for activations and outputs. - # True = use fused conv2d + downscale2d, False = separate downscale2d layers. - fused_scale=True, - # 'linear' = human-readable, 'recursive' = efficient, None = select automatically - structure=None, - # True = template graph constructed by the Network class, False = actual evaluation. - is_template_graph=False, - **kwargs): # Ignore unrecognized keyword args. - - resolution_log2 = int(np.log2(resolution)) - assert resolution == 2**resolution_log2 and resolution >= 4 - def nf(stage): return min( - int(fmap_base / (2.0 ** (stage * fmap_decay))), fmap_max) - if structure is None: - structure = 'linear' if is_template_graph else 'recursive' - act = leaky_relu - - images_in.set_shape([None, num_channels, resolution, resolution]) - images_in = tf.cast(images_in, dtype) - lod_in = tf.cast(tf.get_variable( - 'lod', initializer=np.float32(0.0), trainable=False), dtype) - - # Building blocks. - def fromrgb(x, res): # res = 2..resolution_log2 - with tf.variable_scope('FromRGB_lod%d' % (resolution_log2 - res)): - return act(apply_bias(conv2d(x, fmaps=nf(res-1), kernel=1, use_wscale=use_wscale))) - - def block(x, res): # res = 2..resolution_log2 - with tf.variable_scope('%dx%d' % (2**res, 2**res)): - if res >= 3: # 8x8 and up - with tf.variable_scope('Conv0'): - x = act(apply_bias(conv2d(x, fmaps=nf(res-1), - kernel=3, use_wscale=use_wscale))) - if fused_scale: - with tf.variable_scope('Conv1_down'): - x = act(apply_bias(conv2d_downscale2d( - x, fmaps=nf(res-2), kernel=3, use_wscale=use_wscale))) - else: - with tf.variable_scope('Conv1'): - x = act(apply_bias(conv2d(x, fmaps=nf(res-2), - kernel=3, use_wscale=use_wscale))) - x = downscale2d(x) - else: # 4x4 - if mbstd_group_size > 1: - x = minibatch_stddev_layer(x, mbstd_group_size) - with tf.variable_scope('Conv'): - x = act(apply_bias(conv2d(x, fmaps=nf(res-1), - kernel=3, use_wscale=use_wscale))) - with tf.variable_scope('Dense0'): - x = act(apply_bias( - dense(x, fmaps=nf(res-2), use_wscale=use_wscale))) - with tf.variable_scope('Dense1'): - x = apply_bias(dense(x, fmaps=1+label_size, - gain=1, use_wscale=use_wscale)) - return x - - # Linear structure: simple but inefficient. - if structure == 'linear': - img = images_in - x = fromrgb(img, resolution_log2) - for res in range(resolution_log2, 2, -1): - lod = resolution_log2 - res - x = block(x, res) - img = downscale2d(img) - y = fromrgb(img, res - 1) - with tf.variable_scope('Grow_lod%d' % lod): - x = lerp_clip(x, y, lod_in - lod) - combo_out = block(x, 2) - - # Recursive structure: complex but efficient. - if structure == 'recursive': - def grow(res, lod): - def x(): return fromrgb(downscale2d(images_in, 2**lod), res) - if lod > 0: - x = cset(x, (lod_in < lod), lambda: grow(res + 1, lod - 1)) - x = block(x(), res) - def y(): return x - if res > 2: - y = cset(y, (lod_in > lod), lambda: lerp(x, fromrgb( - downscale2d(images_in, 2**(lod+1)), res - 1), lod_in - lod)) - return y() - combo_out = grow(2, resolution_log2 - 2) - - assert combo_out.dtype == tf.as_dtype(dtype) - scores_out = tf.identity(combo_out[:, :1], name='scores_out') - labels_out = tf.identity(combo_out[:, 1:], name='labels_out') - return scores_out, labels_out - -# ---------------------------------------------------------------------------- -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -import os -import sys -import inspect -import importlib -import imp -import numpy as np -from collections import OrderedDict -import tensorflow as tf - -# ---------------------------------------------------------------------------- -# Convenience. - - -def run(*args, **kwargs): # Run the specified ops in the default session. - return tf.get_default_session().run(*args, **kwargs) - - -def is_tf_expression(x): - return isinstance(x, tf.Tensor) or isinstance(x, tf.Variable) or isinstance(x, tf.Operation) - - -def shape_to_list(shape): - return [dim.value for dim in shape] - - -def flatten(x): - with tf.name_scope('Flatten'): - return tf.reshape(x, [-1]) - - -def log2(x): - with tf.name_scope('Log2'): - return tf.log(x) * np.float32(1.0 / np.log(2.0)) - - -def exp2(x): - with tf.name_scope('Exp2'): - return tf.exp(x * np.float32(np.log(2.0))) - - -def lerp(a, b, t): - with tf.name_scope('Lerp'): - return a + (b - a) * t - - -def lerp_clip(a, b, t): - with tf.name_scope('LerpClip'): - return a + (b - a) * tf.clip_by_value(t, 0.0, 1.0) - - -# Forcefully enter the specified name scope, ignoring any surrounding scopes. -def absolute_name_scope(scope): - return tf.name_scope(scope + '/') - -# ---------------------------------------------------------------------------- -# Initialize TensorFlow graph and session using good default settings. - - -def init_tf(config_dict=dict()): - if tf.get_default_session() is None: - tf.set_random_seed(np.random.randint(1 << 31)) - create_session(config_dict, force_as_default=True) - -# ---------------------------------------------------------------------------- -# Create tf.Session based on config dict of the form -# {'gpu_options.allow_growth': True} - - -def create_session(config_dict=dict(), force_as_default=False): - config = tf.ConfigProto() - for key, value in config_dict.items(): - fields = key.split('.') - obj = config - for field in fields[:-1]: - obj = getattr(obj, field) - setattr(obj, fields[-1], value) - session = tf.Session(config=config) - if force_as_default: - session._default_session = session.as_default() - session._default_session.enforce_nesting = False - session._default_session.__enter__() - return session - -# ---------------------------------------------------------------------------- -# Initialize all tf.Variables that have not already been initialized. -# Equivalent to the following, but more efficient and does not bloat the tf graph: -# tf.variables_initializer(tf.report_unitialized_variables()).run() - - -def init_uninited_vars(vars=None): - if vars is None: - vars = tf.global_variables() - test_vars = [] - test_ops = [] - # ignore surrounding control_dependencies - with tf.control_dependencies(None): - for var in vars: - assert is_tf_expression(var) - try: - tf.get_default_graph().get_tensor_by_name( - var.name.replace(':0', '/IsVariableInitialized:0')) - except KeyError: - # Op does not exist => variable may be uninitialized. - test_vars.append(var) - with absolute_name_scope(var.name.split(':')[0]): - test_ops.append(tf.is_variable_initialized(var)) - init_vars = [var for var, inited in zip( - test_vars, run(test_ops)) if not inited] - run([var.initializer for var in init_vars]) - -# ---------------------------------------------------------------------------- -# Set the values of given tf.Variables. -# Equivalent to the following, but more efficient and does not bloat the tf graph: -# tfutil.run([tf.assign(var, value) for var, value in var_to_value_dict.items()] - - -def set_vars(var_to_value_dict): - ops = [] - feed_dict = {} - for var, value in var_to_value_dict.items(): - assert is_tf_expression(var) - try: - setter = tf.get_default_graph().get_tensor_by_name( - var.name.replace(':0', '/setter:0')) # look for existing op - except KeyError: - with absolute_name_scope(var.name.split(':')[0]): - # ignore surrounding control_dependencies - with tf.control_dependencies(None): - setter = tf.assign(var, tf.placeholder( - var.dtype, var.shape, 'new_value'), name='setter') # create new setter - ops.append(setter) - feed_dict[setter.op.inputs[1]] = value - run(ops, feed_dict) - -# ---------------------------------------------------------------------------- -# Autosummary creates an identity op that internally keeps track of the input -# values and automatically shows up in TensorBoard. The reported value -# represents an average over input components. The average is accumulated -# constantly over time and flushed when save_summaries() is called. -# -# Notes: -# - The output tensor must be used as an input for something else in the -# graph. Otherwise, the autosummary op will not get executed, and the average -# value will not get accumulated. -# - It is perfectly fine to include autosummaries with the same name in -# several places throughout the graph, even if they are executed concurrently. -# - It is ok to also pass in a python scalar or numpy array. In this case, it -# is added to the average immediately. - - -_autosummary_vars = OrderedDict() # name => [var, ...] -_autosummary_immediate = OrderedDict() # name => update_op, update_value -_autosummary_finalized = False - - -def autosummary(name, value): - id = name.replace('/', '_') - if is_tf_expression(value): - with tf.name_scope('summary_' + id), tf.device(value.device): - update_op = _create_autosummary_var(name, value) - with tf.control_dependencies([update_op]): - return tf.identity(value) - else: # python scalar or numpy array - if name not in _autosummary_immediate: - with absolute_name_scope('Autosummary/' + id), tf.device(None), tf.control_dependencies(None): - update_value = tf.placeholder(tf.float32) - update_op = _create_autosummary_var(name, update_value) - _autosummary_immediate[name] = update_op, update_value - update_op, update_value = _autosummary_immediate[name] - run(update_op, {update_value: np.float32(value)}) - return value - -# Create the necessary ops to include autosummaries in TensorBoard report. -# Note: This should be done only once per graph. - - -def finalize_autosummaries(): - global _autosummary_finalized - if _autosummary_finalized: - return - _autosummary_finalized = True - init_uninited_vars([var for vars in _autosummary_vars.values() - for var in vars]) - with tf.device(None), tf.control_dependencies(None): - for name, vars in _autosummary_vars.items(): - id = name.replace('/', '_') - with absolute_name_scope('Autosummary/' + id): - sum = tf.add_n(vars) - avg = sum[0] / sum[1] - with tf.control_dependencies([avg]): # read before resetting - reset_ops = [tf.assign(var, tf.zeros(2)) for var in vars] - # reset before reporting - with tf.name_scope(None), tf.control_dependencies(reset_ops): - tf.summary.scalar(name, avg) - -# Internal helper for creating autosummary accumulators. - - -def _create_autosummary_var(name, value_expr): - assert not _autosummary_finalized - v = tf.cast(value_expr, tf.float32) - if v.shape.ndims is 0: - v = [v, np.float32(1.0)] - elif v.shape.ndims is 1: - v = [tf.reduce_sum(v), tf.cast(tf.shape(v)[0], tf.float32)] - else: - v = [tf.reduce_sum(v), tf.reduce_prod( - tf.cast(tf.shape(v), tf.float32))] - v = tf.cond(tf.is_finite(v[0]), lambda: tf.stack(v), lambda: tf.zeros(2)) - with tf.control_dependencies(None): - var = tf.Variable(tf.zeros(2)) # [numerator, denominator] - update_op = tf.cond(tf.is_variable_initialized( - var), lambda: tf.assign_add(var, v), lambda: tf.assign(var, v)) - if name in _autosummary_vars: - _autosummary_vars[name].append(var) - else: - _autosummary_vars[name] = [var] - return update_op - -# ---------------------------------------------------------------------------- -# Call filewriter.add_summary() with all summaries in the default graph, -# automatically finalizing and merging them on the first call. - - -_summary_merge_op = None - - -def save_summaries(filewriter, global_step=None): - global _summary_merge_op - if _summary_merge_op is None: - finalize_autosummaries() - with tf.device(None), tf.control_dependencies(None): - _summary_merge_op = tf.summary.merge_all() - filewriter.add_summary(_summary_merge_op.eval(), global_step) - -# ---------------------------------------------------------------------------- -# Utilities for importing modules and objects by name. - - -def import_module(module_or_obj_name): - parts = module_or_obj_name.split('.') - parts[0] = {'np': 'numpy', 'tf': 'tensorflow'}.get(parts[0], parts[0]) - for i in range(len(parts), 0, -1): - try: - module = importlib.import_module('.'.join(parts[:i])) - relative_obj_name = '.'.join(parts[i:]) - return module, relative_obj_name - except ImportError: - pass - raise ImportError(module_or_obj_name) - - -def find_obj_in_module(module, relative_obj_name): - obj = module - for part in relative_obj_name.split('.'): - obj = getattr(obj, part) - return obj - - -def import_obj(obj_name): - module, relative_obj_name = import_module(obj_name) - return find_obj_in_module(module, relative_obj_name) - - -def call_func_by_name(*args, func=None, **kwargs): - assert func is not None - return import_obj(func)(*args, **kwargs) - -# ---------------------------------------------------------------------------- -# Wrapper for tf.train.Optimizer that automatically takes care of: -# - Gradient averaging for multi-GPU training. -# - Dynamic loss scaling and typecasts for FP16 training. -# - Ignoring corrupted gradients that contain NaNs/Infs. -# - Reporting statistics. -# - Well-chosen default settings. - - -class Optimizer: - def __init__( - self, - name='Train', - tf_optimizer='tf.train.AdamOptimizer', - learning_rate=0.001, - use_loss_scaling=False, - loss_scaling_init=64.0, - loss_scaling_inc=0.0005, - loss_scaling_dec=1.0, - **kwargs): - - # Init fields. - self.name = name - self.learning_rate = tf.convert_to_tensor(learning_rate) - self.id = self.name.replace('/', '.') - self.scope = tf.get_default_graph().unique_name(self.id) - self.optimizer_class = import_obj(tf_optimizer) - self.optimizer_kwargs = dict(kwargs) - self.use_loss_scaling = use_loss_scaling - self.loss_scaling_init = loss_scaling_init - self.loss_scaling_inc = loss_scaling_inc - self.loss_scaling_dec = loss_scaling_dec - self._grad_shapes = None # [shape, ...] - self._dev_opt = OrderedDict() # device => optimizer - self._dev_grads = OrderedDict() # device => [[(grad, var), ...], ...] - # device => variable (log2 of loss scaling factor) - self._dev_ls_var = OrderedDict() - self._updates_applied = False - - # Register the gradients of the given loss function with respect to the given variables. - # Intended to be called once per GPU. - def register_gradients(self, loss, vars): - assert not self._updates_applied - - # Validate arguments. - if isinstance(vars, dict): - # allow passing in Network.trainables as vars - vars = list(vars.values()) - assert isinstance(vars, list) and len(vars) >= 1 - assert all(is_tf_expression(expr) for expr in vars + [loss]) - if self._grad_shapes is None: - self._grad_shapes = [shape_to_list(var.shape) for var in vars] - assert len(vars) == len(self._grad_shapes) - assert all(shape_to_list(var.shape) == var_shape for var, - var_shape in zip(vars, self._grad_shapes)) - dev = loss.device - assert all(var.device == dev for var in vars) - - # Register device and compute gradients. - with tf.name_scope(self.id + '_grad'), tf.device(dev): - if dev not in self._dev_opt: - opt_name = self.scope.replace( - '/', '_') + '_opt%d' % len(self._dev_opt) - self._dev_opt[dev] = self.optimizer_class( - name=opt_name, learning_rate=self.learning_rate, **self.optimizer_kwargs) - self._dev_grads[dev] = [] - loss = self.apply_loss_scaling(tf.cast(loss, tf.float32)) - grads = self._dev_opt[dev].compute_gradients( - loss, vars, gate_gradients=tf.train.Optimizer.GATE_NONE) # disable gating to reduce memory usage - grads = [(g, v) if g is not None else (tf.zeros_like(v), v) - for g, v in grads] # replace disconnected gradients with zeros - self._dev_grads[dev].append(grads) - - # Construct training op to update the registered variables based on their gradients. - def apply_updates(self): - assert not self._updates_applied - self._updates_applied = True - devices = list(self._dev_grads.keys()) - total_grads = sum(len(grads) for grads in self._dev_grads.values()) - assert len(devices) >= 1 and total_grads >= 1 - ops = [] - with absolute_name_scope(self.scope): - - # Cast gradients to FP32 and calculate partial sum within each device. - dev_grads = OrderedDict() # device => [(grad, var), ...] - for dev_idx, dev in enumerate(devices): - with tf.name_scope('ProcessGrads%d' % dev_idx), tf.device(dev): - sums = [] - for gv in zip(*self._dev_grads[dev]): - assert all(v is gv[0][1] for g, v in gv) - g = [tf.cast(g, tf.float32) for g, v in gv] - g = g[0] if len(g) == 1 else tf.add_n(g) - sums.append((g, gv[0][1])) - dev_grads[dev] = sums - - # Sum gradients across devices. - if len(devices) > 1: - with tf.name_scope('SumAcrossGPUs'), tf.device(None): - for var_idx, grad_shape in enumerate(self._grad_shapes): - g = [dev_grads[dev][var_idx][0] for dev in devices] - if np.prod(grad_shape): # nccl does not support zero-sized tensors - g = tf.contrib.nccl.all_sum(g) - for dev, gg in zip(devices, g): - dev_grads[dev][var_idx] = ( - gg, dev_grads[dev][var_idx][1]) - - # Apply updates separately on each device. - for dev_idx, (dev, grads) in enumerate(dev_grads.items()): - with tf.name_scope('ApplyGrads%d' % dev_idx), tf.device(dev): - - # Scale gradients as needed. - if self.use_loss_scaling or total_grads > 1: - with tf.name_scope('Scale'): - coef = tf.constant(np.float32( - 1.0 / total_grads), name='coef') - coef = self.undo_loss_scaling(coef) - grads = [(g * coef, v) for g, v in grads] - - # Check for overflows. - with tf.name_scope('CheckOverflow'): - grad_ok = tf.reduce_all( - tf.stack([tf.reduce_all(tf.is_finite(g)) for g, v in grads])) - - # Update weights and adjust loss scaling. - with tf.name_scope('UpdateWeights'): - opt = self._dev_opt[dev] - ls_var = self.get_loss_scaling_var(dev) - if not self.use_loss_scaling: - ops.append( - tf.cond(grad_ok, lambda: opt.apply_gradients(grads), tf.no_op)) - else: - ops.append(tf.cond(grad_ok, - lambda: tf.group(tf.assign_add( - ls_var, self.loss_scaling_inc), opt.apply_gradients(grads)), - lambda: tf.group(tf.assign_sub(ls_var, self.loss_scaling_dec)))) - - # Report statistics on the last device. - if dev == devices[-1]: - with tf.name_scope('Statistics'): - ops.append(autosummary( - self.id + '/learning_rate', self.learning_rate)) - ops.append(autosummary( - self.id + '/overflow_frequency', tf.where(grad_ok, 0, 1))) - if self.use_loss_scaling: - ops.append(autosummary( - self.id + '/loss_scaling_log2', ls_var)) - - # Initialize variables and group everything into a single op. - self.reset_optimizer_state() - init_uninited_vars(list(self._dev_ls_var.values())) - return tf.group(*ops, name='TrainingOp') - - # Reset internal state of the underlying optimizer. - def reset_optimizer_state(self): - run([var.initializer for opt in self._dev_opt.values() - for var in opt.variables()]) - - # Get or create variable representing log2 of the current dynamic loss scaling factor. - def get_loss_scaling_var(self, device): - if not self.use_loss_scaling: - return None - if device not in self._dev_ls_var: - with absolute_name_scope(self.scope + '/LossScalingVars'), tf.control_dependencies(None): - self._dev_ls_var[device] = tf.Variable(np.float32( - self.loss_scaling_init), name='loss_scaling_var') - return self._dev_ls_var[device] - - # Apply dynamic loss scaling for the given expression. - def apply_loss_scaling(self, value): - assert is_tf_expression(value) - if not self.use_loss_scaling: - return value - return value * exp2(self.get_loss_scaling_var(value.device)) - - # Undo the effect of dynamic loss scaling for the given expression. - def undo_loss_scaling(self, value): - assert is_tf_expression(value) - if not self.use_loss_scaling: - return value - return value * exp2(-self.get_loss_scaling_var(value.device)) - -# ---------------------------------------------------------------------------- -# Generic network abstraction. -# -# Acts as a convenience wrapper for a parameterized network construction -# function, providing several utility methods and convenient access to -# the inputs/outputs/weights. -# -# Network objects can be safely pickled and unpickled for long-term -# archival purposes. The pickling works reliably as long as the underlying -# network construction function is defined in a standalone Python module -# that has no side effects or application-specific imports. - - -# Custom import handlers for dealing with legacy data in pickle import. -network_import_handlers = [] -# Temporary modules create during pickle import. -_network_import_modules = [] - - -class Network: - def __init__(self, - # Network name. Used to select TensorFlow name and variable scopes. - name=None, - # Fully qualified name of the underlying network construction function. - func=None, - **static_kwargs): # Keyword arguments to be passed in to the network construction function. - - self._init_fields() - self.name = name - self.static_kwargs = dict(static_kwargs) - - # Init build func. - module, self._build_func_name = import_module(func) - self._build_module_src = inspect.getsource(module) - self._build_func = find_obj_in_module(module, self._build_func_name) - - # Init graph. - self._init_graph() - self.reset_vars() - - def _init_fields(self): - # User-specified name, defaults to build func name if None. - self.name = None - # Unique TF graph scope, derived from the user-specified name. - self.scope = None - # Arguments passed to the user-supplied build func. - self.static_kwargs = dict() - self.num_inputs = 0 # Number of input tensors. - self.num_outputs = 0 # Number of output tensors. - # Input tensor shapes (NC or NCHW), including minibatch dimension. - self.input_shapes = [[]] - # Output tensor shapes (NC or NCHW), including minibatch dimension. - self.output_shapes = [[]] - self.input_shape = [] # Short-hand for input_shapes[0]. - self.output_shape = [] # Short-hand for output_shapes[0]. - # Input placeholders in the template graph. - self.input_templates = [] - # Output tensors in the template graph. - self.output_templates = [] - self.input_names = [] # Name string for each input. - self.output_names = [] # Name string for each output. - self.vars = OrderedDict() # All variables (localname => var). - # Trainable variables (localname => var). - self.trainables = OrderedDict() - # User-supplied build function that constructs the network. - self._build_func = None - self._build_func_name = None # Name of the build function. - # Full source code of the module containing the build function. - self._build_module_src = None - self._run_cache = dict() # Cached graph data for Network.run(). - - def _init_graph(self): - # Collect inputs. - self.input_names = [] - for param in inspect.signature(self._build_func).parameters.values(): - if param.kind == param.POSITIONAL_OR_KEYWORD and param.default is param.empty: - self.input_names.append(param.name) - self.num_inputs = len(self.input_names) - assert self.num_inputs >= 1 - - # Choose name and scope. - if self.name is None: - self.name = self._build_func_name - self.scope = tf.get_default_graph().unique_name( - self.name.replace('/', '_'), mark_as_used=False) - - # Build template graph. - with tf.variable_scope(self.scope, reuse=tf.AUTO_REUSE): - assert tf.get_variable_scope().name == self.scope - # ignore surrounding name_scope - with absolute_name_scope(self.scope): - # ignore surrounding control_dependencies - with tf.control_dependencies(None): - self.input_templates = [tf.placeholder( - tf.float32, name=name) for name in self.input_names] - out_expr = self._build_func( - *self.input_templates, is_template_graph=True, **self.static_kwargs) - - # Collect outputs. - assert is_tf_expression(out_expr) or isinstance(out_expr, tuple) - self.output_templates = [out_expr] if is_tf_expression( - out_expr) else list(out_expr) - self.output_names = [t.name.split( - '/')[-1].split(':')[0] for t in self.output_templates] - self.num_outputs = len(self.output_templates) - assert self.num_outputs >= 1 - - # Populate remaining fields. - self.input_shapes = [shape_to_list(t.shape) - for t in self.input_templates] - self.output_shapes = [shape_to_list( - t.shape) for t in self.output_templates] - self.input_shape = self.input_shapes[0] - self.output_shape = self.output_shapes[0] - self.vars = OrderedDict([(self.get_var_localname(var), var) - for var in tf.global_variables(self.scope + '/')]) - self.trainables = OrderedDict([(self.get_var_localname( - var), var) for var in tf.trainable_variables(self.scope + '/')]) - - # Run initializers for all variables defined by this network. - def reset_vars(self): - run([var.initializer for var in self.vars.values()]) - - # Run initializers for all trainable variables defined by this network. - def reset_trainables(self): - run([var.initializer for var in self.trainables.values()]) - - # Get TensorFlow expression(s) for the output(s) of this network, given the inputs. - def get_output_for(self, *in_expr, return_as_list=False, **dynamic_kwargs): - assert len(in_expr) == self.num_inputs - all_kwargs = dict(self.static_kwargs) - all_kwargs.update(dynamic_kwargs) - with tf.variable_scope(self.scope, reuse=True): - assert tf.get_variable_scope().name == self.scope - named_inputs = [tf.identity(expr, name=name) - for expr, name in zip(in_expr, self.input_names)] - out_expr = self._build_func(*named_inputs, **all_kwargs) - assert is_tf_expression(out_expr) or isinstance(out_expr, tuple) - if return_as_list: - out_expr = [out_expr] if is_tf_expression( - out_expr) else list(out_expr) - return out_expr - - # Get the local name of a given variable, excluding any surrounding name scopes. - def get_var_localname(self, var_or_globalname): - assert is_tf_expression(var_or_globalname) or isinstance( - var_or_globalname, str) - globalname = var_or_globalname if isinstance( - var_or_globalname, str) else var_or_globalname.name - assert globalname.startswith(self.scope + '/') - localname = globalname[len(self.scope) + 1:] - localname = localname.split(':')[0] - return localname - - # Find variable by local or global name. - def find_var(self, var_or_localname): - assert is_tf_expression(var_or_localname) or isinstance( - var_or_localname, str) - return self.vars[var_or_localname] if isinstance(var_or_localname, str) else var_or_localname - - # Get the value of a given variable as NumPy array. - # Note: This method is very inefficient -- prefer to use tfutil.run(list_of_vars) whenever possible. - def get_var(self, var_or_localname): - return self.find_var(var_or_localname).eval() - - # Set the value of a given variable based on the given NumPy array. - # Note: This method is very inefficient -- prefer to use tfutil.set_vars() whenever possible. - def set_var(self, var_or_localname, new_value): - return set_vars({self.find_var(var_or_localname): new_value}) - - # Pickle export. - def __getstate__(self): - return { - 'version': 2, - 'name': self.name, - 'static_kwargs': self.static_kwargs, - 'build_module_src': self._build_module_src, - 'build_func_name': self._build_func_name, - 'variables': list(zip(self.vars.keys(), run(list(self.vars.values()))))} - - # Pickle import. - def __setstate__(self, state): - self._init_fields() - - # Execute custom import handlers. - for handler in network_import_handlers: - state = handler(state) - - # Set basic fields. - assert state['version'] == 2 - self.name = state['name'] - self.static_kwargs = state['static_kwargs'] - self._build_module_src = state['build_module_src'] - self._build_func_name = state['build_func_name'] - - # Parse imported module. - module = imp.new_module( - '_tfutil_network_import_module_%d' % len(_network_import_modules)) - exec(self._build_module_src, module.__dict__) - self._build_func = find_obj_in_module(module, self._build_func_name) - _network_import_modules.append(module) # avoid gc - - # Init graph. - self._init_graph() - self.reset_vars() - set_vars({self.find_var(name): value for name, - value in state['variables']}) - - # Create a clone of this network with its own copy of the variables. - def clone(self, name=None): - net = object.__new__(Network) - net._init_fields() - net.name = name if name is not None else self.name - net.static_kwargs = dict(self.static_kwargs) - net._build_module_src = self._build_module_src - net._build_func_name = self._build_func_name - net._build_func = self._build_func - net._init_graph() - net.copy_vars_from(self) - return net - - # Copy the values of all variables from the given network. - def copy_vars_from(self, src_net): - assert isinstance(src_net, Network) - name_to_value = run({name: src_net.find_var(name) - for name in self.vars.keys()}) - set_vars({self.find_var(name): value for name, - value in name_to_value.items()}) - - # Copy the values of all trainable variables from the given network. - def copy_trainables_from(self, src_net): - assert isinstance(src_net, Network) - name_to_value = run({name: src_net.find_var(name) - for name in self.trainables.keys()}) - set_vars({self.find_var(name): value for name, - value in name_to_value.items()}) - - # Create new network with the given parameters, and copy all variables from this network. - def convert(self, name=None, func=None, **static_kwargs): - net = Network(name, func, **static_kwargs) - net.copy_vars_from(self) - return net - - # Construct a TensorFlow op that updates the variables of this network - # to be slightly closer to those of the given network. - def setup_as_moving_average_of(self, src_net, beta=0.99, beta_nontrainable=0.0): - assert isinstance(src_net, Network) - with absolute_name_scope(self.scope): - with tf.name_scope('MovingAvg'): - ops = [] - for name, var in self.vars.items(): - if name in src_net.vars: - cur_beta = beta if name in self.trainables else beta_nontrainable - new_value = lerp(src_net.vars[name], var, cur_beta) - ops.append(var.assign(new_value)) - return tf.group(*ops) - - # Run this network for the given NumPy array(s), and return the output(s) as NumPy array(s). - def run(self, *in_arrays, - # True = return a list of NumPy arrays, False = return a single NumPy array, or a tuple if there are multiple outputs. - return_as_list=False, - # Print progress to the console? Useful for very large input arrays. - print_progress=False, - # Maximum minibatch size to use, None = disable batching. - minibatch_size=None, - num_gpus=1, # Number of GPUs to use. - # Multiplicative constant to apply to the output(s). - out_mul=1.0, - out_add=0.0, # Additive constant to apply to the output(s). - # Shrink the spatial dimensions of the output(s) by the given factor. - out_shrink=1, - # Convert the output to the specified data type. - out_dtype=None, - **dynamic_kwargs): # Additional keyword arguments to pass into the network construction function. - - assert len(in_arrays) == self.num_inputs - num_items = in_arrays[0].shape[0] - if minibatch_size is None: - minibatch_size = num_items - key = str([list(sorted(dynamic_kwargs.items())), num_gpus, - out_mul, out_add, out_shrink, out_dtype]) - - # Build graph. - if key not in self._run_cache: - with absolute_name_scope(self.scope + '/Run'), tf.control_dependencies(None): - in_split = list(zip(*[tf.split(x, num_gpus) - for x in self.input_templates])) - out_split = [] - for gpu in range(num_gpus): - with tf.device('/gpu:%d' % gpu): - out_expr = self.get_output_for( - *in_split[gpu], return_as_list=True, **dynamic_kwargs) - if out_mul != 1.0: - out_expr = [x * out_mul for x in out_expr] - if out_add != 0.0: - out_expr = [x + out_add for x in out_expr] - if out_shrink > 1: - ksize = [1, 1, out_shrink, out_shrink] - out_expr = [tf.nn.avg_pool( - x, ksize=ksize, strides=ksize, padding='VALID', data_format='NCHW') for x in out_expr] - if out_dtype is not None: - if tf.as_dtype(out_dtype).is_integer: - out_expr = [tf.round(x) for x in out_expr] - out_expr = [tf.saturate_cast( - x, out_dtype) for x in out_expr] - out_split.append(out_expr) - self._run_cache[key] = [ - tf.concat(outputs, axis=0) for outputs in zip(*out_split)] - - # Run minibatches. - out_expr = self._run_cache[key] - out_arrays = [np.empty( - [num_items] + shape_to_list(expr.shape)[1:], expr.dtype.name) for expr in out_expr] - for mb_begin in range(0, num_items, minibatch_size): - if print_progress: - print('\r%d / %d' % (mb_begin, num_items), end='') - mb_end = min(mb_begin + minibatch_size, num_items) - mb_in = [src[mb_begin: mb_end] for src in in_arrays] - mb_out = tf.get_default_session().run( - out_expr, dict(zip(self.input_templates, mb_in))) - for dst, src in zip(out_arrays, mb_out): - dst[mb_begin: mb_end] = src - - # Done. - if print_progress: - print('\r%d / %d' % (num_items, num_items)) - if not return_as_list: - out_arrays = out_arrays[0] if len( - out_arrays) == 1 else tuple(out_arrays) - return out_arrays - - # Returns a list of (name, output_expr, trainable_vars) tuples corresponding to - # individual layers of the network. Mainly intended to be used for reporting. - def list_layers(self): - patterns_to_ignore = ['/Setter', '/new_value', - '/Shape', '/strided_slice', '/Cast', '/concat'] - all_ops = tf.get_default_graph().get_operations() - all_ops = [op for op in all_ops if not any( - p in op.name for p in patterns_to_ignore)] - layers = [] - - def recurse(scope, parent_ops, level): - prefix = scope + '/' - ops = [op for op in parent_ops if op.name == - scope or op.name.startswith(prefix)] - - # Does not contain leaf nodes => expand immediate children. - if level == 0 or all('/' in op.name[len(prefix):] for op in ops): - visited = set() - for op in ops: - suffix = op.name[len(prefix):] - if '/' in suffix: - suffix = suffix[:suffix.index('/')] - if suffix not in visited: - recurse(prefix + suffix, ops, level + 1) - visited.add(suffix) - - # Otherwise => interpret as a layer. - else: - layer_name = scope[len(self.scope)+1:] - layer_output = ops[-1].outputs[0] - layer_trainables = [op.outputs[0] for op in ops if op.type.startswith( - 'Variable') and self.get_var_localname(op.name) in self.trainables] - layers.append((layer_name, layer_output, layer_trainables)) - - recurse(self.scope, all_ops, 0) - return layers - - # Print a summary table of the network structure. - def print_layers(self, title=None, hide_layers_with_no_params=False): - if title is None: - title = self.name - print() - print('%-28s%-12s%-24s%-24s' % - (title, 'Params', 'OutputShape', 'WeightShape')) - print('%-28s%-12s%-24s%-24s' % (('---',) * 4)) - - total_params = 0 - for layer_name, layer_output, layer_trainables in self.list_layers(): - weights = [ - var for var in layer_trainables if var.name.endswith('/weight:0')] - num_params = sum(np.prod(shape_to_list(var.shape)) - for var in layer_trainables) - total_params += num_params - if hide_layers_with_no_params and num_params == 0: - continue - - print('%-28s%-12s%-24s%-24s' % ( - layer_name, - num_params if num_params else '-', - layer_output.shape, - weights[0].shape if len(weights) == 1 else '-')) - - print('%-28s%-12s%-24s%-24s' % (('---',) * 4)) - print('%-28s%-12s%-24s%-24s' % ('Total', total_params, '', '')) - print() - - # Construct summary ops to include histograms of all trainable parameters in TensorBoard. - def setup_weight_histograms(self, title=None): - if title is None: - title = self.name - with tf.name_scope(None), tf.device(None), tf.control_dependencies(None): - for localname, var in self.trainables.items(): - if '/' in localname: - p = localname.split('/') - name = title + '_' + p[-1] + '/' + '_'.join(p[:-1]) - else: - name = title + '_toplevel/' + localname - tf.summary.histogram(name, var) - -# ---------------------------------------------------------------------------- -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -import os -import time -import numpy as np -import tensorflow as tf - -import config -import tfutil -import dataset -import misc - -# ---------------------------------------------------------------------------- -# Choose the size and contents of the image snapshot grids that are exported -# periodically during training. - - -def setup_snapshot_image_grid(G, training_set, - # '1080p' = to be viewed on 1080p display, '4k' = to be viewed on 4k display. - size='1080p', - layout='random'): # 'random' = grid contents are selected randomly, 'row_per_class' = each row corresponds to one class label. - - # Select size. - gw = 1 - gh = 1 - if size == '1080p': - gw = np.clip(1920 // G.output_shape[3], 3, 32) - gh = np.clip(1080 // G.output_shape[2], 2, 32) - if size == '4k': - gw = np.clip(3840 // G.output_shape[3], 7, 32) - gh = np.clip(2160 // G.output_shape[2], 4, 32) - - # Fill in reals and labels. - reals = np.zeros([gw * gh] + training_set.shape, dtype=training_set.dtype) - labels = np.zeros([gw * gh, training_set.label_size], - dtype=training_set.label_dtype) - for idx in range(gw * gh): - x = idx % gw - y = idx // gw - while True: - real, label = training_set.get_minibatch_np(1) - if layout == 'row_per_class' and training_set.label_size > 0: - if label[0, y % training_set.label_size] == 0.0: - continue - reals[idx] = real[0] - labels[idx] = label[0] - break - - # Generate latents. - latents = misc.random_latents(gw * gh, G) - return (gw, gh), reals, labels, latents - -# ---------------------------------------------------------------------------- -# Just-in-time processing of training images before feeding them to the networks. - - -def process_reals(x, lod, mirror_augment, drange_data, drange_net): - with tf.name_scope('ProcessReals'): - with tf.name_scope('DynamicRange'): - x = tf.cast(x, tf.float32) - x = misc.adjust_dynamic_range(x, drange_data, drange_net) - if mirror_augment: - with tf.name_scope('MirrorAugment'): - s = tf.shape(x) - mask = tf.random_uniform([s[0], 1, 1, 1], 0.0, 1.0) - mask = tf.tile(mask, [1, s[1], s[2], s[3]]) - x = tf.where(mask < 0.5, x, tf.reverse(x, axis=[3])) - # Smooth crossfade between consecutive levels-of-detail. - with tf.name_scope('FadeLOD'): - s = tf.shape(x) - y = tf.reshape(x, [-1, s[1], s[2]//2, 2, s[3]//2, 2]) - y = tf.reduce_mean(y, axis=[3, 5], keepdims=True) - y = tf.tile(y, [1, 1, 1, 2, 1, 2]) - y = tf.reshape(y, [-1, s[1], s[2], s[3]]) - x = tfutil.lerp(x, y, lod - tf.floor(lod)) - # Upscale to match the expected input/output size of the networks. - with tf.name_scope('UpscaleLOD'): - s = tf.shape(x) - factor = tf.cast(2 ** tf.floor(lod), tf.int32) - x = tf.reshape(x, [-1, s[1], s[2], 1, s[3], 1]) - x = tf.tile(x, [1, 1, 1, factor, 1, factor]) - x = tf.reshape(x, [-1, s[1], s[2] * factor, s[3] * factor]) - return x - -# ---------------------------------------------------------------------------- -# Class for evaluating and storing the values of time-varying training parameters. - - -class TrainingSchedule: - def __init__( - self, - cur_nimg, - training_set, - # Image resolution used at the beginning. - lod_initial_resolution=4, - # Thousands of real images to show before doubling the resolution. - lod_training_kimg=600, - # Thousands of real images to show when fading in new layers. - lod_transition_kimg=600, - # Maximum minibatch size, divided evenly among GPUs. - minibatch_base=16, - minibatch_dict={}, # Resolution-specific overrides. - # Resolution-specific maximum minibatch size per GPU. - max_minibatch_per_gpu={}, - G_lrate_base=0.001, # Learning rate for the generator. - G_lrate_dict={}, # Resolution-specific overrides. - D_lrate_base=0.001, # Learning rate for the discriminator. - D_lrate_dict={}, # Resolution-specific overrides. - tick_kimg_base=160, # Default interval of progress snapshots. - tick_kimg_dict={4: 160, 8: 140, 16: 120, 32: 100, 64: 80, 128: 60, 256: 40, 512: 20, 1024: 10}): # Resolution-specific overrides. - - # Training phase. - self.kimg = cur_nimg / 1000.0 - phase_dur = lod_training_kimg + lod_transition_kimg - phase_idx = int(np.floor(self.kimg / phase_dur) - ) if phase_dur > 0 else 0 - phase_kimg = self.kimg - phase_idx * phase_dur - - # Level-of-detail and resolution. - self.lod = training_set.resolution_log2 - self.lod -= np.floor(np.log2(lod_initial_resolution)) - self.lod -= phase_idx - if lod_transition_kimg > 0: - self.lod -= max(phase_kimg - lod_training_kimg, - 0.0) / lod_transition_kimg - self.lod = max(self.lod, 0.0) - self.resolution = 2 ** (training_set.resolution_log2 - - int(np.floor(self.lod))) - - # Minibatch size. - self.minibatch = minibatch_dict.get(self.resolution, minibatch_base) - self.minibatch -= self.minibatch % config.num_gpus - if self.resolution in max_minibatch_per_gpu: - self.minibatch = min( - self.minibatch, max_minibatch_per_gpu[self.resolution] * config.num_gpus) - - # Other parameters. - self.G_lrate = G_lrate_dict.get(self.resolution, G_lrate_base) - self.D_lrate = D_lrate_dict.get(self.resolution, D_lrate_base) - self.tick_kimg = tick_kimg_dict.get(self.resolution, tick_kimg_base) - -# ---------------------------------------------------------------------------- -# Main training script. -# To run, comment/uncomment appropriate lines in config.py and launch train.py. - - -def train_progressive_gan( - # Exponential running average of generator weights. - G_smoothing=0.999, - # How many times the discriminator is trained per G iteration. - D_repeats=1, - # Number of minibatches to run before adjusting training parameters. - minibatch_repeats=4, - # Reset optimizer internal state (e.g. Adam moments) when new layers are introduced? - reset_opt_for_new_lod=True, - # Total length of the training, measured in thousands of real images. - total_kimg=15000, - mirror_augment=False, # Enable mirror augment? - # Dynamic range used when feeding image data to the networks. - drange_net=[-1, 1], - image_snapshot_ticks=1, # How often to export image snapshots? - network_snapshot_ticks=10, # How often to export network snapshots? - # Include full TensorFlow computation graph in the tfevents file? - save_tf_graph=False, - # Include weight histograms in the tfevents file? - save_weight_histograms=False, - # Run ID or network pkl to resume training from, None = start from scratch. - resume_run_id=None, - # Snapshot index to resume training from, None = autodetect. - resume_snapshot=None, - # Assumed training progress at the beginning. Affects reporting and training schedule. - resume_kimg=0.0, - resume_time=0.0): # Assumed wallclock time at the beginning. Affects reporting. - - maintenance_start_time = time.time() - training_set = dataset.load_dataset( - data_dir=config.data_dir, verbose=True, **config.dataset) - - # Construct networks. - with tf.device('/gpu:0'): - if resume_run_id is not None: - network_pkl = misc.locate_network_pkl( - resume_run_id, resume_snapshot) - print('Loading networks from "%s"...' % network_pkl) - G, D, Gs = misc.load_pkl(network_pkl) - else: - print('Constructing networks...') - G = tfutil.Network( - 'G', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **config.G) - D = tfutil.Network( - 'D', num_channels=training_set.shape[0], resolution=training_set.shape[1], label_size=training_set.label_size, **config.D) - Gs = G.clone('Gs') - Gs_update_op = Gs.setup_as_moving_average_of(G, beta=G_smoothing) - G.print_layers() - D.print_layers() - - print('Building TensorFlow graph...') - with tf.name_scope('Inputs'): - lod_in = tf.placeholder(tf.float32, name='lod_in', shape=[]) - lrate_in = tf.placeholder(tf.float32, name='lrate_in', shape=[]) - minibatch_in = tf.placeholder(tf.int32, name='minibatch_in', shape=[]) - minibatch_split = minibatch_in // config.num_gpus - reals, labels = training_set.get_minibatch_tf() - reals_split = tf.split(reals, config.num_gpus) - labels_split = tf.split(labels, config.num_gpus) - G_opt = tfutil.Optimizer( - name='TrainG', learning_rate=lrate_in, **config.G_opt) - D_opt = tfutil.Optimizer( - name='TrainD', learning_rate=lrate_in, **config.D_opt) - for gpu in range(config.num_gpus): - with tf.name_scope('GPU%d' % gpu), tf.device('/gpu:%d' % gpu): - G_gpu = G if gpu == 0 else G.clone(G.name + '_shadow') - D_gpu = D if gpu == 0 else D.clone(D.name + '_shadow') - lod_assign_ops = [tf.assign(G_gpu.find_var('lod'), lod_in), tf.assign( - D_gpu.find_var('lod'), lod_in)] - reals_gpu = process_reals( - reals_split[gpu], lod_in, mirror_augment, training_set.dynamic_range, drange_net) - labels_gpu = labels_split[gpu] - with tf.name_scope('G_loss'), tf.control_dependencies(lod_assign_ops): - G_loss = tfutil.call_func_by_name( - G=G_gpu, D=D_gpu, opt=G_opt, training_set=training_set, minibatch_size=minibatch_split, **config.G_loss) - with tf.name_scope('D_loss'), tf.control_dependencies(lod_assign_ops): - D_loss = tfutil.call_func_by_name(G=G_gpu, D=D_gpu, opt=D_opt, training_set=training_set, - minibatch_size=minibatch_split, reals=reals_gpu, labels=labels_gpu, **config.D_loss) - G_opt.register_gradients(tf.reduce_mean(G_loss), G_gpu.trainables) - D_opt.register_gradients(tf.reduce_mean(D_loss), D_gpu.trainables) - G_train_op = G_opt.apply_updates() - D_train_op = D_opt.apply_updates() - - print('Setting up snapshot image grid...') - grid_size, grid_reals, grid_labels, grid_latents = setup_snapshot_image_grid( - G, training_set, **config.grid) - sched = TrainingSchedule(total_kimg * 1000, training_set, **config.sched) - grid_fakes = Gs.run(grid_latents, grid_labels, - minibatch_size=sched.minibatch//config.num_gpus) - - print('Setting up result dir...') - result_subdir = misc.create_result_subdir(config.result_dir, config.desc) - misc.save_image_grid(grid_reals, os.path.join( - result_subdir, 'reals.png'), drange=training_set.dynamic_range, grid_size=grid_size) - misc.save_image_grid(grid_fakes, os.path.join( - result_subdir, 'fakes%06d.png' % 0), drange=drange_net, grid_size=grid_size) - summary_log = tf.summary.FileWriter(result_subdir) - if save_tf_graph: - summary_log.add_graph(tf.get_default_graph()) - if save_weight_histograms: - G.setup_weight_histograms() - D.setup_weight_histograms() - - print('Training...') - cur_nimg = int(resume_kimg * 1000) - cur_tick = 0 - tick_start_nimg = cur_nimg - tick_start_time = time.time() - train_start_time = tick_start_time - resume_time - prev_lod = -1.0 - while cur_nimg < total_kimg * 1000: - - # Choose training parameters and configure training ops. - sched = TrainingSchedule(cur_nimg, training_set, **config.sched) - training_set.configure(sched.minibatch, sched.lod) - if reset_opt_for_new_lod: - if np.floor(sched.lod) != np.floor(prev_lod) or np.ceil(sched.lod) != np.ceil(prev_lod): - G_opt.reset_optimizer_state() - D_opt.reset_optimizer_state() - prev_lod = sched.lod - - # Run training ops. - for repeat in range(minibatch_repeats): - for _ in range(D_repeats): - tfutil.run([D_train_op, Gs_update_op], { - lod_in: sched.lod, lrate_in: sched.D_lrate, minibatch_in: sched.minibatch}) - cur_nimg += sched.minibatch - tfutil.run([G_train_op], { - lod_in: sched.lod, lrate_in: sched.G_lrate, minibatch_in: sched.minibatch}) - - # Perform maintenance tasks once per tick. - done = (cur_nimg >= total_kimg * 1000) - if cur_nimg >= tick_start_nimg + sched.tick_kimg * 1000 or done: - cur_tick += 1 - cur_time = time.time() - tick_kimg = (cur_nimg - tick_start_nimg) / 1000.0 - tick_start_nimg = cur_nimg - tick_time = cur_time - tick_start_time - total_time = cur_time - train_start_time - maintenance_time = tick_start_time - maintenance_start_time - maintenance_start_time = cur_time - - # Report progress. - print('tick %-5d kimg %-8.1f lod %-5.2f minibatch %-4d time %-12s sec/tick %-7.1f sec/kimg %-7.2f maintenance %.1f' % ( - tfutil.autosummary('Progress/tick', cur_tick), - tfutil.autosummary('Progress/kimg', cur_nimg / 1000.0), - tfutil.autosummary('Progress/lod', sched.lod), - tfutil.autosummary('Progress/minibatch', sched.minibatch), - misc.format_time(tfutil.autosummary( - 'Timing/total_sec', total_time)), - tfutil.autosummary('Timing/sec_per_tick', tick_time), - tfutil.autosummary('Timing/sec_per_kimg', - tick_time / tick_kimg), - tfutil.autosummary('Timing/maintenance_sec', maintenance_time))) - tfutil.autosummary('Timing/total_hours', - total_time / (60.0 * 60.0)) - tfutil.autosummary('Timing/total_days', - total_time / (24.0 * 60.0 * 60.0)) - tfutil.save_summaries(summary_log, cur_nimg) - - # Save snapshots. - if cur_tick % image_snapshot_ticks == 0 or done: - grid_fakes = Gs.run( - grid_latents, grid_labels, minibatch_size=sched.minibatch//config.num_gpus) - misc.save_image_grid(grid_fakes, os.path.join(result_subdir, 'fakes%06d.png' % ( - cur_nimg // 1000)), drange=drange_net, grid_size=grid_size) - if cur_tick % network_snapshot_ticks == 0 or done: - misc.save_pkl((G, D, Gs), os.path.join( - result_subdir, 'network-snapshot-%06d.pkl' % (cur_nimg // 1000))) - - # Record start time of the next tick. - tick_start_time = time.time() - - # Write final results. - misc.save_pkl((G, D, Gs), os.path.join(result_subdir, 'network-final.pkl')) - summary_log.close() - open(os.path.join(result_subdir, '_training-done.txt'), 'wt').close() - -# ---------------------------------------------------------------------------- -# Main entry point. -# Calls the function indicated in config.py. - - -if __name__ == "__main__": - misc.init_output_logging() - np.random.seed(config.random_seed) - print('Initializing TensorFlow...') - os.environ.update(config.env) - tfutil.init_tf(config.tf_config) - print('Running %s()...' % config.train['func']) - tfutil.call_func_by_name(**config.train) - print('Exiting...') - -# ---------------------------------------------------------------------------- -# Copyright (c) 2018, NVIDIA CORPORATION. All rights reserved. -# -# This work is licensed under the Creative Commons Attribution-NonCommercial -# 4.0 International License. To view a copy of this license, visit -# http://creativecommons.org/licenses/by-nc/4.0/ or send a letter to -# Creative Commons, PO Box 1866, Mountain View, CA 94042, USA. - -import os -import time -import re -import bisect -from collections import OrderedDict -import numpy as np -import tensorflow as tf -import scipy.ndimage -import scipy.misc - -import config -import misc -import tfutil -import train -import dataset - -# ---------------------------------------------------------------------------- -# Generate random images or image grids using a previously trained network. -# To run, uncomment the appropriate line in config.py and launch train.py. - - -def generate_fake_images(run_id, snapshot=None, grid_size=[1, 1], num_pngs=1, image_shrink=1, png_prefix=None, random_seed=1000, minibatch_size=8): - network_pkl = misc.locate_network_pkl(run_id, snapshot) - if png_prefix is None: - png_prefix = misc.get_id_string_for_network_pkl(network_pkl) + '-' - random_state = np.random.RandomState(random_seed) - - print('Loading network from "%s"...' % network_pkl) - G, D, Gs = misc.load_network_pkl(run_id, snapshot) - - result_subdir = misc.create_result_subdir(config.result_dir, config.desc) - for png_idx in range(num_pngs): - print('Generating png %d / %d...' % (png_idx, num_pngs)) - latents = misc.random_latents( - np.prod(grid_size), Gs, random_state=random_state) - labels = np.zeros([latents.shape[0], 0], np.float32) - images = Gs.run(latents, labels, minibatch_size=minibatch_size, num_gpus=config.num_gpus, - out_mul=127.5, out_add=127.5, out_shrink=image_shrink, out_dtype=np.uint8) - misc.save_image_grid(images, os.path.join( - result_subdir, '%s%06d.png' % (png_prefix, png_idx)), [0, 255], grid_size) - open(os.path.join(result_subdir, '_done.txt'), 'wt').close() - -# ---------------------------------------------------------------------------- -# Generate MP4 video of random interpolations using a previously trained network. -# To run, uncomment the appropriate line in config.py and launch train.py. - - -def generate_interpolation_video(run_id, snapshot=None, grid_size=[1, 1], image_shrink=1, image_zoom=1, duration_sec=60.0, smoothing_sec=1.0, mp4=None, mp4_fps=30, mp4_codec='libx265', mp4_bitrate='16M', random_seed=1000, minibatch_size=8): - network_pkl = misc.locate_network_pkl(run_id, snapshot) - if mp4 is None: - mp4 = misc.get_id_string_for_network_pkl(network_pkl) + '-lerp.mp4' - num_frames = int(np.rint(duration_sec * mp4_fps)) - random_state = np.random.RandomState(random_seed) - - print('Loading network from "%s"...' % network_pkl) - G, D, Gs = misc.load_network_pkl(run_id, snapshot) - - print('Generating latent vectors...') - # [frame, image, channel, component] - shape = [num_frames, np.prod(grid_size)] + Gs.input_shape[1:] - all_latents = random_state.randn(*shape).astype(np.float32) - all_latents = scipy.ndimage.gaussian_filter( - all_latents, [smoothing_sec * mp4_fps] + [0] * len(Gs.input_shape), mode='wrap') - all_latents /= np.sqrt(np.mean(np.square(all_latents))) - - # Frame generation func for moviepy. - def make_frame(t): - frame_idx = int(np.clip(np.round(t * mp4_fps), 0, num_frames - 1)) - latents = all_latents[frame_idx] - labels = np.zeros([latents.shape[0], 0], np.float32) - images = Gs.run(latents, labels, minibatch_size=minibatch_size, num_gpus=config.num_gpus, - out_mul=127.5, out_add=127.5, out_shrink=image_shrink, out_dtype=np.uint8) - grid = misc.create_image_grid( - images, grid_size).transpose(1, 2, 0) # HWC - if image_zoom > 1: - grid = scipy.ndimage.zoom( - grid, [image_zoom, image_zoom, 1], order=0) - if grid.shape[2] == 1: - grid = grid.repeat(3, 2) # grayscale => RGB - return grid - - # Generate video. - import moviepy.editor # pip install moviepy - result_subdir = misc.create_result_subdir(config.result_dir, config.desc) - moviepy.editor.VideoClip(make_frame, duration=duration_sec).write_videofile( - os.path.join(result_subdir, mp4), fps=mp4_fps, codec='libx264', bitrate=mp4_bitrate) - open(os.path.join(result_subdir, '_done.txt'), 'wt').close() - -# ---------------------------------------------------------------------------- -# Generate MP4 video of training progress for a previous training run. -# To run, uncomment the appropriate line in config.py and launch train.py. - - -def generate_training_video(run_id, duration_sec=20.0, time_warp=1.5, mp4=None, mp4_fps=30, mp4_codec='libx265', mp4_bitrate='16M'): - src_result_subdir = misc.locate_result_subdir(run_id) - if mp4 is None: - mp4 = os.path.basename(src_result_subdir) + '-train.mp4' - - # Parse log. - times = [] - snaps = [] # [(png, kimg, lod), ...] - with open(os.path.join(src_result_subdir, 'log.txt'), 'rt') as log: - for line in log: - k = re.search(r'kimg ([\d\.]+) ', line) - l = re.search(r'lod ([\d\.]+) ', line) - t = re.search(r'time (\d+d)? *(\d+h)? *(\d+m)? *(\d+s)? ', line) - if k and l and t: - k = float(k.group(1)) - l = float(l.group(1)) - t = [int(t.group(i)[:-1]) if t.group(i) - else 0 for i in range(1, 5)] - t = t[0] * 24*60*60 + t[1] * 60*60 + t[2] * 60 + t[3] - png = os.path.join(src_result_subdir, - 'fakes%06d.png' % int(np.floor(k))) - if os.path.isfile(png): - times.append(t) - snaps.append((png, k, l)) - assert len(times) - - # Frame generation func for moviepy. - png_cache = [None, None] # [png, img] - - def make_frame(t): - wallclock = ((t / duration_sec) ** time_warp) * times[-1] - png, kimg, lod = snaps[max(bisect.bisect(times, wallclock) - 1, 0)] - if png_cache[0] == png: - img = png_cache[1] - else: - img = scipy.misc.imread(png) - while img.shape[1] > 1920 or img.shape[0] > 1080: - img = img.astype(np.float32).reshape( - img.shape[0]//2, 2, img.shape[1]//2, 2, -1).mean(axis=(1, 3)) - png_cache[:] = [png, img] - img = misc.draw_text_label( - img, 'lod %.2f' % lod, 16, img.shape[0]-4, alignx=0.0, aligny=1.0) - img = misc.draw_text_label(img, misc.format_time( - int(np.rint(wallclock))), img.shape[1]//2, img.shape[0]-4, alignx=0.5, aligny=1.0) - img = misc.draw_text_label( - img, '%.0f kimg' % kimg, img.shape[1]-16, img.shape[0]-4, alignx=1.0, aligny=1.0) - return img - - # Generate video. - import moviepy.editor # pip install moviepy - result_subdir = misc.create_result_subdir(config.result_dir, config.desc) - moviepy.editor.VideoClip(make_frame, duration=duration_sec).write_videofile( - os.path.join(result_subdir, mp4), fps=mp4_fps, codec='libx264', bitrate=mp4_bitrate) - open(os.path.join(result_subdir, '_done.txt'), 'wt').close() - -# ---------------------------------------------------------------------------- -# Evaluate one or more metrics for a previous training run. -# To run, uncomment one of the appropriate lines in config.py and launch train.py. - - -def evaluate_metrics(run_id, log, metrics, num_images, real_passes, minibatch_size=None): - metric_class_names = { - 'swd': 'metrics.sliced_wasserstein.API', - 'fid': 'metrics.frechet_inception_distance.API', - 'is': 'metrics.inception_score.API', - 'msssim': 'metrics.ms_ssim.API', - } - - # Locate training run and initialize logging. - result_subdir = misc.locate_result_subdir(run_id) - snapshot_pkls = misc.list_network_pkls(result_subdir, include_final=False) - assert len(snapshot_pkls) >= 1 - log_file = os.path.join(result_subdir, log) - print('Logging output to', log_file) - misc.set_output_log_file(log_file) - - # Initialize dataset and select minibatch size. - dataset_obj, mirror_augment = misc.load_dataset_for_previous_run( - result_subdir, verbose=True, shuffle_mb=0) - if minibatch_size is None: - minibatch_size = np.clip(8192 // dataset_obj.shape[1], 4, 256) - - # Initialize metrics. - metric_objs = [] - for name in metrics: - class_name = metric_class_names.get(name, name) - print('Initializing %s...' % class_name) - class_def = tfutil.import_obj(class_name) - image_shape = [3] + dataset_obj.shape[1:] - obj = class_def(num_images=num_images, image_shape=image_shape, - image_dtype=np.uint8, minibatch_size=minibatch_size) - tfutil.init_uninited_vars() - mode = 'warmup' - obj.begin(mode) - for idx in range(10): - obj.feed(mode, np.random.randint(0, 256, size=[ - minibatch_size]+image_shape, dtype=np.uint8)) - obj.end(mode) - metric_objs.append(obj) - - # Print table header. - print() - print('%-10s%-12s' % ('Snapshot', 'Time_eval'), end='') - for obj in metric_objs: - for name, fmt in zip(obj.get_metric_names(), obj.get_metric_formatting()): - print('%-*s' % (len(fmt % 0), name), end='') - print() - print('%-10s%-12s' % ('---', '---'), end='') - for obj in metric_objs: - for fmt in obj.get_metric_formatting(): - print('%-*s' % (len(fmt % 0), '---'), end='') - print() - - # Feed in reals. - for title, mode in [('Reals', 'reals'), ('Reals2', 'fakes')][:real_passes]: - print('%-10s' % title, end='') - time_begin = time.time() - labels = np.zeros( - [num_images, dataset_obj.label_size], dtype=np.float32) - [obj.begin(mode) for obj in metric_objs] - for begin in range(0, num_images, minibatch_size): - end = min(begin + minibatch_size, num_images) - images, labels[begin:end] = dataset_obj.get_minibatch_np( - end - begin) - if mirror_augment: - images = misc.apply_mirror_augment(images) - if images.shape[1] == 1: - images = np.tile(images, [1, 3, 1, 1]) # grayscale => RGB - [obj.feed(mode, images) for obj in metric_objs] - results = [obj.end(mode) for obj in metric_objs] - print('%-12s' % misc.format_time(time.time() - time_begin), end='') - for obj, vals in zip(metric_objs, results): - for val, fmt in zip(vals, obj.get_metric_formatting()): - print(fmt % val, end='') - print() - - # Evaluate each network snapshot. - for snapshot_idx, snapshot_pkl in enumerate(reversed(snapshot_pkls)): - prefix = 'network-snapshot-' - postfix = '.pkl' - snapshot_name = os.path.basename(snapshot_pkl) - assert snapshot_name.startswith( - prefix) and snapshot_name.endswith(postfix) - snapshot_kimg = int(snapshot_name[len(prefix): -len(postfix)]) - - print('%-10d' % snapshot_kimg, end='') - mode = 'fakes' - [obj.begin(mode) for obj in metric_objs] - time_begin = time.time() - with tf.Graph().as_default(), tfutil.create_session(config.tf_config).as_default(): - G, D, Gs = misc.load_pkl(snapshot_pkl) - for begin in range(0, num_images, minibatch_size): - end = min(begin + minibatch_size, num_images) - latents = misc.random_latents(end - begin, Gs) - images = Gs.run(latents, labels[begin:end], num_gpus=config.num_gpus, - out_mul=127.5, out_add=127.5, out_dtype=np.uint8) - if images.shape[1] == 1: - images = np.tile(images, [1, 3, 1, 1]) # grayscale => RGB - [obj.feed(mode, images) for obj in metric_objs] - results = [obj.end(mode) for obj in metric_objs] - print('%-12s' % misc.format_time(time.time() - time_begin), end='') - for obj, vals in zip(metric_objs, results): - for val, fmt in zip(vals, obj.get_metric_formatting()): - print(fmt % val, end='') - print() - print() - -# ---------------------------------------------------------------------------- -# empty -#!/usr/bin/env python3 -# -# Copyright 2017 Martin Heusel -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Adapted from the original implementation by Martin Heusel. -# Source https://github.com/bioinf-jku/TTUR/blob/master/fid.py - -''' Calculates the Frechet Inception Distance (FID) to evalulate GANs. - -The FID metric calculates the distance between two distributions of images. -Typically, we have summary statistics (mean & covariance matrix) of one -of these distributions, while the 2nd distribution is given by a GAN. - -When run as a stand-alone program, it compares the distribution of -images that are stored as PNG/JPEG at a specified location with a -distribution given by summary statistics (in pickle format). - -The FID is calculated by assuming that X_1 and X_2 are the activations of -the pool_3 layer of the inception net for generated samples and real world -samples respectivly. - -See --help to see further details. -''' - -from __future__ import absolute_import, division, print_function -import numpy as np -import scipy as sp -import os -import gzip -import pickle -import tensorflow as tf -from scipy.misc import imread -import pathlib -import urllib - - -class InvalidFIDException(Exception): - pass - - -def create_inception_graph(pth): - """Creates a graph from saved GraphDef file.""" - # Creates graph from saved graph_def.pb. - with tf.gfile.FastGFile(pth, 'rb') as f: - graph_def = tf.GraphDef() - graph_def.ParseFromString(f.read()) - _ = tf.import_graph_def(graph_def, name='FID_Inception_Net') -# ------------------------------------------------------------------------------- - - -# code for handling inception net derived from -# https://github.com/openai/improved-gan/blob/master/inception_score/model.py -def _get_inception_layer(sess): - """Prepares inception net for batched usage and returns pool_3 layer. """ - layername = 'FID_Inception_Net/pool_3:0' - pool3 = sess.graph.get_tensor_by_name(layername) - ops = pool3.graph.get_operations() - for op_idx, op in enumerate(ops): - for o in op.outputs: - shape = o.get_shape() - if shape._dims is not None: - shape = [s.value for s in shape] - new_shape = [] - for j, s in enumerate(shape): - if s == 1 and j == 0: - new_shape.append(None) - else: - new_shape.append(s) - try: - o._shape = tf.TensorShape(new_shape) - except ValueError: - # EDIT: added for compatibility with tensorflow 1.6.0 - o._shape_val = tf.TensorShape(new_shape) - return pool3 -# ------------------------------------------------------------------------------- - - -def get_activations(images, sess, batch_size=50, verbose=False): - """Calculates the activations of the pool_3 layer for all images. - - Params: - -- images : Numpy array of dimension (n_images, hi, wi, 3). The values - must lie between 0 and 256. - -- sess : current session - -- batch_size : the images numpy array is split into batches with batch size - batch_size. A reasonable batch size depends on the disposable hardware. - -- verbose : If set to True and parameter out_step is given, the number of calculated - batches is reported. - Returns: - -- A numpy array of dimension (num images, 2048) that contains the - activations of the given tensor when feeding inception with the query tensor. - """ - inception_layer = _get_inception_layer(sess) - d0 = images.shape[0] - if batch_size > d0: - print("warning: batch size is bigger than the data size. setting batch size to data size") - batch_size = d0 - n_batches = d0//batch_size - n_used_imgs = n_batches*batch_size - pred_arr = np.empty((n_used_imgs, 2048)) - for i in range(n_batches): - if verbose: - print("\rPropagating batch %d/%d" % - (i+1, n_batches), end="", flush=True) - start = i*batch_size - end = start + batch_size - batch = images[start:end] - pred = sess.run(inception_layer, { - 'FID_Inception_Net/ExpandDims:0': batch}) - pred_arr[start:end] = pred.reshape(batch_size, -1) - if verbose: - print(" done") - return pred_arr -# ------------------------------------------------------------------------------- - - -def calculate_frechet_distance(mu1, sigma1, mu2, sigma2): - """Numpy implementation of the Frechet Distance. - The Frechet distance between two multivariate Gaussians X_1 ~ N(mu_1, C_1) - and X_2 ~ N(mu_2, C_2) is - d^2 = ||mu_1 - mu_2||^2 + Tr(C_1 + C_2 - 2*sqrt(C_1*C_2)). - - Params: - -- mu1 : Numpy array containing the activations of the pool_3 layer of the - inception net ( like returned by the function 'get_predictions') - -- mu2 : The sample mean over activations of the pool_3 layer, precalcualted - on an representive data set. - -- sigma2: The covariance matrix over activations of the pool_3 layer, - precalcualted on an representive data set. - - Returns: - -- dist : The Frechet Distance. - - Raises: - -- InvalidFIDException if nan occures. - """ - m = np.square(mu1 - mu2).sum() - # s = sp.linalg.sqrtm(np.dot(sigma1, sigma2)) # EDIT: commented out - s, _ = sp.linalg.sqrtm(np.dot(sigma1, sigma2), disp=False) # EDIT: added - dist = m + np.trace(sigma1+sigma2 - 2*s) - # if np.isnan(dist): # EDIT: commented out - # raise InvalidFIDException("nan occured in distance calculation.") # EDIT: commented out - # return dist # EDIT: commented out - return np.real(dist) # EDIT: added -# ------------------------------------------------------------------------------- - - -def calculate_activation_statistics(images, sess, batch_size=50, verbose=False): - """Calculation of the statistics used by the FID. - Params: - -- images : Numpy array of dimension (n_images, hi, wi, 3). The values - must lie between 0 and 255. - -- sess : current session - -- batch_size : the images numpy array is split into batches with batch size - batch_size. A reasonable batch size depends on the available hardware. - -- verbose : If set to True and parameter out_step is given, the number of calculated - batches is reported. - Returns: - -- mu : The mean over samples of the activations of the pool_3 layer of - the incption model. - -- sigma : The covariance matrix of the activations of the pool_3 layer of - the incption model. - """ - act = get_activations(images, sess, batch_size, verbose) - mu = np.mean(act, axis=0) - sigma = np.cov(act, rowvar=False) - return mu, sigma -# ------------------------------------------------------------------------------- - - -# ------------------------------------------------------------------------------- -# The following functions aren't needed for calculating the FID -# they're just here to make this module work as a stand-alone script -# for calculating FID scores -# ------------------------------------------------------------------------------- -def check_or_download_inception(inception_path): - ''' Checks if the path to the inception file is valid, or downloads - the file if it is not present. ''' - INCEPTION_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' - if inception_path is None: - inception_path = '/tmp' - inception_path = pathlib.Path(inception_path) - model_file = inception_path / 'classify_image_graph_def.pb' - if not model_file.exists(): - print("Downloading Inception model") - from urllib import request - import tarfile - fn, _ = request.urlretrieve(INCEPTION_URL) - with tarfile.open(fn, mode='r') as f: - f.extract('classify_image_graph_def.pb', str(model_file.parent)) - return str(model_file) - - -def _handle_path(path, sess): - if path.endswith('.npz'): - f = np.load(path) - m, s = f['mu'][:], f['sigma'][:] - f.close() - else: - path = pathlib.Path(path) - files = list(path.glob('*.jpg')) + list(path.glob('*.png')) - x = np.array([imread(str(fn)).astype(np.float32) for fn in files]) - m, s = calculate_activation_statistics(x, sess) - return m, s - - -def calculate_fid_given_paths(paths, inception_path): - ''' Calculates the FID of two paths. ''' - inception_path = check_or_download_inception(inception_path) - - for p in paths: - if not os.path.exists(p): - raise RuntimeError("Invalid path: %s" % p) - - create_inception_graph(str(inception_path)) - with tf.Session() as sess: - sess.run(tf.global_variables_initializer()) - m1, s1 = _handle_path(paths[0], sess) - m2, s2 = _handle_path(paths[1], sess) - fid_value = calculate_frechet_distance(m1, s1, m2, s2) - return fid_value - - -if __name__ == "__main__": - from argparse import ArgumentParser, ArgumentDefaultsHelpFormatter - parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter) - parser.add_argument("path", type=str, nargs=2, - help='Path to the generated images or to .npz statistic files') - parser.add_argument("-i", "--inception", type=str, default=None, - help='Path to Inception model (will be downloaded if not provided)') - parser.add_argument("--gpu", default="", type=str, - help='GPU to use (leave blank for CPU only)') - args = parser.parse_args() - os.environ['CUDA_VISIBLE_DEVICES'] = args.gpu - fid_value = calculate_fid_given_paths(args.path, args.inception) - print("FID: ", fid_value) - -# ---------------------------------------------------------------------------- -# EDIT: added - - -class API: - def __init__(self, num_images, image_shape, image_dtype, minibatch_size): - import config - self.network_dir = os.path.join(config.result_dir, '_inception_fid') - self.network_file = check_or_download_inception(self.network_dir) - self.sess = tf.get_default_session() - create_inception_graph(self.network_file) - - def get_metric_names(self): - return ['FID'] - - def get_metric_formatting(self): - return ['%-10.4f'] - - def begin(self, mode): - assert mode in ['warmup', 'reals', 'fakes'] - self.activations = [] - - def feed(self, mode, minibatch): - act = get_activations(minibatch.transpose( - 0, 2, 3, 1), self.sess, batch_size=minibatch.shape[0]) - self.activations.append(act) - - def end(self, mode): - act = np.concatenate(self.activations) - mu = np.mean(act, axis=0) - sigma = np.cov(act, rowvar=False) - if mode in ['warmup', 'reals']: - self.mu_real = mu - self.sigma_real = sigma - fid = calculate_frechet_distance( - mu, sigma, self.mu_real, self.sigma_real) - return [fid] - -# ---------------------------------------------------------------------------- -# Copyright 2016 Wojciech Zaremba -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -# Adapted from the original implementation by Wojciech Zaremba. -# Source: https://github.com/openai/improved-gan/blob/master/inception_score/model.py - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import os.path -import sys -import tarfile - -import numpy as np -from six.moves import urllib -import tensorflow as tf -import glob -import scipy.misc -import math -import sys - -MODEL_DIR = '/tmp/imagenet' - -DATA_URL = 'http://download.tensorflow.org/models/image/imagenet/inception-2015-12-05.tgz' -softmax = None - -# Call this function with list of images. Each of elements should be a -# numpy array with values ranging from 0 to 255. - - -def get_inception_score(images, splits=10): - assert(type(images) == list) - assert(type(images[0]) == np.ndarray) - assert(len(images[0].shape) == 3) - # assert(np.max(images[0]) > 10) # EDIT: commented out - #assert(np.min(images[0]) >= 0.0) - inps = [] - for img in images: - img = img.astype(np.float32) - inps.append(np.expand_dims(img, 0)) - bs = 100 - with tf.Session() as sess: - preds = [] - n_batches = int(math.ceil(float(len(inps)) / float(bs))) - for i in range(n_batches): - # sys.stdout.write(".") # EDIT: commented out - # sys.stdout.flush() - inp = inps[(i * bs):min((i + 1) * bs, len(inps))] - inp = np.concatenate(inp, 0) - pred = sess.run(softmax, {'ExpandDims:0': inp}) - preds.append(pred) - preds = np.concatenate(preds, 0) - scores = [] - for i in range(splits): - part = preds[(i * preds.shape[0] // splits) :((i + 1) * preds.shape[0] // splits), :] - kl = part * (np.log(part) - - np.log(np.expand_dims(np.mean(part, 0), 0))) - kl = np.mean(np.sum(kl, 1)) - scores.append(np.exp(kl)) - return np.mean(scores), np.std(scores) - -# This function is called automatically. - - -def _init_inception(): - global softmax - if not os.path.exists(MODEL_DIR): - os.makedirs(MODEL_DIR) - filename = DATA_URL.split('/')[-1] - filepath = os.path.join(MODEL_DIR, filename) - if not os.path.exists(filepath): - def _progress(count, block_size, total_size): - sys.stdout.write('\r>> Downloading %s %.1f%%' % ( - filename, float(count * block_size) / float(total_size) * 100.0)) - sys.stdout.flush() - filepath, _ = urllib.request.urlretrieve(DATA_URL, filepath, _progress) - print() - statinfo = os.stat(filepath) - print('Succesfully downloaded', filename, statinfo.st_size, 'bytes.') - tarfile.open(filepath, 'r:gz').extractall( - MODEL_DIR) # EDIT: increased indent - with tf.gfile.FastGFile(os.path.join( - MODEL_DIR, 'classify_image_graph_def.pb'), 'rb') as f: - graph_def = tf.GraphDef() - graph_def.ParseFromString(f.read()) - _ = tf.import_graph_def(graph_def, name='') - # Works with an arbitrary minibatch size. - with tf.Session() as sess: - pool3 = sess.graph.get_tensor_by_name('pool_3:0') - ops = pool3.graph.get_operations() - for op_idx, op in enumerate(ops): - for o in op.outputs: - shape = o.get_shape() - shape = [s.value for s in shape] - new_shape = [] - for j, s in enumerate(shape): - if s == 1 and j == 0: - new_shape.append(None) - else: - new_shape.append(s) - try: - o._shape = tf.TensorShape(new_shape) - except ValueError: - # EDIT: added for compatibility with tensorflow 1.6.0 - o._shape_val = tf.TensorShape(new_shape) - w = sess.graph.get_operation_by_name("softmax/logits/MatMul").inputs[1] - logits = tf.matmul(tf.squeeze(pool3), w) - softmax = tf.nn.softmax(logits) - -# if softmax is None: # EDIT: commented out -# _init_inception() # EDIT: commented out - -# ---------------------------------------------------------------------------- -# EDIT: added - - -class API: - def __init__(self, num_images, image_shape, image_dtype, minibatch_size): - import config - globals()['MODEL_DIR'] = os.path.join(config.result_dir, '_inception') - self.sess = tf.get_default_session() - _init_inception() - - def get_metric_names(self): - return ['IS_mean', 'IS_std'] - - def get_metric_formatting(self): - return ['%-10.4f', '%-10.4f'] - - def begin(self, mode): - assert mode in ['warmup', 'reals', 'fakes'] - self.images = [] - - def feed(self, mode, minibatch): - self.images.append(minibatch.transpose(0, 2, 3, 1)) - - def end(self, mode): - images = list(np.concatenate(self.images)) - with self.sess.as_default(): - mean, std = get_inception_score(images) - return [mean, std] - -# ---------------------------------------------------------------------------- -#!/usr/bin/python -# -# Copyright 2016 The TensorFlow Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# ============================================================================== - -# Adapted from the original implementation by The TensorFlow Authors. -# Source: https://github.com/tensorflow/models/blob/master/research/compression/image_encoder/msssim.py - -import numpy as np -from scipy import signal -from scipy.ndimage.filters import convolve - - -def _FSpecialGauss(size, sigma): - """Function to mimic the 'fspecial' gaussian MATLAB function.""" - radius = size // 2 - offset = 0.0 - start, stop = -radius, radius + 1 - if size % 2 == 0: - offset = 0.5 - stop -= 1 - x, y = np.mgrid[offset + start:stop, offset + start:stop] - assert len(x) == size - g = np.exp(-((x**2 + y**2)/(2.0 * sigma**2))) - return g / g.sum() - - -def _SSIMForMultiScale(img1, img2, max_val=255, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03): - """Return the Structural Similarity Map between `img1` and `img2`. - - This function attempts to match the functionality of ssim_index_new.m by - Zhou Wang: http://www.cns.nyu.edu/~lcv/ssim/msssim.zip - - Arguments: - img1: Numpy array holding the first RGB image batch. - img2: Numpy array holding the second RGB image batch. - max_val: the dynamic range of the images (i.e., the difference between the - maximum the and minimum allowed values). - filter_size: Size of blur kernel to use (will be reduced for small images). - filter_sigma: Standard deviation for Gaussian blur kernel (will be reduced - for small images). - k1: Constant used to maintain stability in the SSIM calculation (0.01 in - the original paper). - k2: Constant used to maintain stability in the SSIM calculation (0.03 in - the original paper). - - Returns: - Pair containing the mean SSIM and contrast sensitivity between `img1` and - `img2`. - - Raises: - RuntimeError: If input images don't have the same shape or don't have four - dimensions: [batch_size, height, width, depth]. - """ - if img1.shape != img2.shape: - raise RuntimeError('Input images must have the same shape (%s vs. %s).' % ( - img1.shape, img2.shape)) - if img1.ndim != 4: - raise RuntimeError( - 'Input images must have four dimensions, not %d' % img1.ndim) - - img1 = img1.astype(np.float32) - img2 = img2.astype(np.float32) - _, height, width, _ = img1.shape - - # Filter size can't be larger than height or width of images. - size = min(filter_size, height, width) - - # Scale down sigma if a smaller filter size is used. - sigma = size * filter_sigma / filter_size if filter_size else 0 - - if filter_size: - window = np.reshape(_FSpecialGauss(size, sigma), (1, size, size, 1)) - mu1 = signal.fftconvolve(img1, window, mode='valid') - mu2 = signal.fftconvolve(img2, window, mode='valid') - sigma11 = signal.fftconvolve(img1 * img1, window, mode='valid') - sigma22 = signal.fftconvolve(img2 * img2, window, mode='valid') - sigma12 = signal.fftconvolve(img1 * img2, window, mode='valid') - else: - # Empty blur kernel so no need to convolve. - mu1, mu2 = img1, img2 - sigma11 = img1 * img1 - sigma22 = img2 * img2 - sigma12 = img1 * img2 - - mu11 = mu1 * mu1 - mu22 = mu2 * mu2 - mu12 = mu1 * mu2 - sigma11 -= mu11 - sigma22 -= mu22 - sigma12 -= mu12 - - # Calculate intermediate values used by both ssim and cs_map. - c1 = (k1 * max_val) ** 2 - c2 = (k2 * max_val) ** 2 - v1 = 2.0 * sigma12 + c2 - v2 = sigma11 + sigma22 + c2 - # Return for each image individually. - ssim = np.mean((((2.0 * mu12 + c1) * v1) / - ((mu11 + mu22 + c1) * v2)), axis=(1, 2, 3)) - cs = np.mean(v1 / v2, axis=(1, 2, 3)) - return ssim, cs - - -def _HoxDownsample(img): - return (img[:, 0::2, 0::2, :] + img[:, 1::2, 0::2, :] + img[:, 0::2, 1::2, :] + img[:, 1::2, 1::2, :]) * 0.25 - - -def msssim(img1, img2, max_val=255, filter_size=11, filter_sigma=1.5, k1=0.01, k2=0.03, weights=None): - """Return the MS-SSIM score between `img1` and `img2`. - - This function implements Multi-Scale Structural Similarity (MS-SSIM) Image - Quality Assessment according to Zhou Wang's paper, "Multi-scale structural - similarity for image quality assessment" (2003). - Link: https://ece.uwaterloo.ca/~z70wang/publications/msssim.pdf - - Author's MATLAB implementation: - http://www.cns.nyu.edu/~lcv/ssim/msssim.zip - - Arguments: - img1: Numpy array holding the first RGB image batch. - img2: Numpy array holding the second RGB image batch. - max_val: the dynamic range of the images (i.e., the difference between the - maximum the and minimum allowed values). - filter_size: Size of blur kernel to use (will be reduced for small images). - filter_sigma: Standard deviation for Gaussian blur kernel (will be reduced - for small images). - k1: Constant used to maintain stability in the SSIM calculation (0.01 in - the original paper). - k2: Constant used to maintain stability in the SSIM calculation (0.03 in - the original paper). - weights: List of weights for each level; if none, use five levels and the - weights from the original paper. - - Returns: - MS-SSIM score between `img1` and `img2`. - - Raises: - RuntimeError: If input images don't have the same shape or don't have four - dimensions: [batch_size, height, width, depth]. - """ - if img1.shape != img2.shape: - raise RuntimeError('Input images must have the same shape (%s vs. %s).' % ( - img1.shape, img2.shape)) - if img1.ndim != 4: - raise RuntimeError( - 'Input images must have four dimensions, not %d' % img1.ndim) - - # Note: default weights don't sum to 1.0 but do match the paper / matlab code. - weights = np.array(weights if weights else [ - 0.0448, 0.2856, 0.3001, 0.2363, 0.1333]) - levels = weights.size - downsample_filter = np.ones((1, 2, 2, 1)) / 4.0 - im1, im2 = [x.astype(np.float32) for x in [img1, img2]] - mssim = [] - mcs = [] - for _ in range(levels): - ssim, cs = _SSIMForMultiScale( - im1, im2, max_val=max_val, filter_size=filter_size, - filter_sigma=filter_sigma, k1=k1, k2=k2) - mssim.append(ssim) - mcs.append(cs) - im1, im2 = [_HoxDownsample(x) for x in [im1, im2]] - - # Clip to zero. Otherwise we get NaNs. - mssim = np.clip(np.asarray(mssim), 0.0, np.inf) - mcs = np.clip(np.asarray(mcs), 0.0, np.inf) - - # Average over images only at the end. - return np.mean(np.prod(mcs[:-1, :] ** weights[:-1, np.newaxis], axis=0) * (mssim[-1, :] ** weights[-1])) - -# ---------------------------------------------------------------------------- -# EDIT: added - - -class API: - def __init__(self, num_images, image_shape, image_dtype, minibatch_size): - assert num_images % 2 == 0 and minibatch_size % 2 == 0 - self.num_pairs = num_images // 2 - - def get_metric_names(self): - return ['MS-SSIM'] - - def get_metric_formatting(self): - return ['%-10.4f'] - - def begin(self, mode): - assert mode in ['warmup', 'reals', 'fakes'] - self.sum = 0.0 - - def feed(self, mode, minibatch): - images = minibatch.transpose(0, 2, 3, 1) - score = msssim(images[0::2], images[1::2]) - self.sum += score * (images.shape[0] // 2) - - def end(self, mode): - avg = self.sum / self.num_pairs - return [avg] - -# ---------------------------------------------------------------------------- -# coding:utf-8 -import time -from glob import glob - -import numpy as np -from PIL import Image - -import model -# ces - -paths = glob('./test/*.*') - -if __name__ == '__main__': - im = Image.open("./test/3.png") - img = np.array(im.convert('RGB')) - t = time.time() - ''' - result,img,angel分别对应-识别结果,图像的数组,文字旋转角度 - ''' - result, img, angle = model.model( - img, model='keras', adjust=True, detectAngle=True) - print("It takes time:{}s".format(time.time() - t)) - print("---------------------------------------") - for key in result: - print(result[key][1]) -# coding:utf-8 -# 添加文本方向 检测模型,自动检测文字方向,0、90、180、270 -# keras版本的OCR识别 - -from math import * - -import cv2 -import numpy as np -from PIL import Image - -from angle.predict import predict as angle_detect # 文字方向检测 -from ctpn.text_detect import text_detect -from ocr.model import predict as ocr - - -def crnnRec(im, text_recs, adjust=False): - """ - crnn模型,ocr识别 - @@model, - @@converter, - @@im:Array - @@text_recs:text box - - """ - index = 0 - results = {} - xDim, yDim = im.shape[1], im.shape[0] - - for index, rec in enumerate(text_recs): - results[index] = [ - rec, - ] - xlength = int((rec[6] - rec[0]) * 0.1) - ylength = int((rec[7] - rec[1]) * 0.2) - if adjust: - pt1 = (max(1, rec[0] - xlength), max(1, rec[1] - ylength)) - pt2 = (rec[2], rec[3]) - pt3 = (min(rec[6] + xlength, xDim - 2), - min(yDim - 2, rec[7] + ylength)) - pt4 = (rec[4], rec[5]) - else: - pt1 = (max(1, rec[0]), max(1, rec[1])) - pt2 = (rec[2], rec[3]) - pt3 = (min(rec[6], xDim - 2), min(yDim - 2, rec[7])) - pt4 = (rec[4], rec[5]) - - degree = degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])) # 图像倾斜角度 - - partImg = dumpRotateImage(im, degree, pt1, pt2, pt3, pt4) - - image = Image.fromarray(partImg).convert('L') - sim_pred = ocr(image) - - results[index].append(sim_pred) # 识别文字 - - return results - - -def dumpRotateImage(img, degree, pt1, pt2, pt3, pt4): - height, width = img.shape[:2] - heightNew = int(width * fabs(sin(radians(degree))) + - height * fabs(cos(radians(degree)))) - widthNew = int(height * fabs(sin(radians(degree))) + - width * fabs(cos(radians(degree)))) - matRotation = cv2.getRotationMatrix2D((width / 2, height / 2), degree, 1) - matRotation[0, 2] += (widthNew - width) / 2 - matRotation[1, 2] += (heightNew - height) / 2 - imgRotation = cv2.warpAffine( - img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255)) - pt1 = list(pt1) - pt3 = list(pt3) - - [[pt1[0]], [pt1[1]]] = np.dot(matRotation, - np.array([[pt1[0]], [pt1[1]], [1]])) - [[pt3[0]], [pt3[1]]] = np.dot(matRotation, - np.array([[pt3[0]], [pt3[1]], [1]])) - ydim, xdim = imgRotation.shape[:2] - imgOut = imgRotation[max(1, int(pt1[1])):min(ydim - 1, int(pt3[1])), - max(1, int(pt1[0])):min(xdim - 1, int(pt3[0]))] - # height,width=imgOut.shape[:2] - return imgOut - - -def model(img, adjust=False, detectAngle=False): - """ - @@param:img, - @@param:model,选择的ocr模型,支持keras\pytorch版本 - @@param:adjust 调整文字识别结果 - @@param:detectAngle,是否检测文字朝向 - - """ - angle = 0 - if detectAngle: - - angle = angle_detect(img=np.copy(img)) # 文字朝向检测 - im = Image.fromarray(img) - if angle == 90: - im = im.transpose(Image.ROTATE_90) - elif angle == 180: - im = im.transpose(Image.ROTATE_180) - elif angle == 270: - im = im.transpose(Image.ROTATE_270) - img = np.array(im) - - text_recs, tmp, img = text_detect(img) - text_recs = sort_box(text_recs) - result = crnnRec(img, text_recs, model, adjust=adjust) - return result, tmp, angle - - -def sort_box(box): - """ - 对box排序,及页面进行排版 - text_recs[index, 0] = x1 - text_recs[index, 1] = y1 - text_recs[index, 2] = x2 - text_recs[index, 3] = y2 - text_recs[index, 4] = x3 - text_recs[index, 5] = y3 - text_recs[index, 6] = x4 - text_recs[index, 7] = y4 - """ - - box = sorted(box, key=lambda x: sum([x[1], x[3], x[5], x[7]])) - return box -# coding:utf-8 -# 添加文本方向 检测模型,自动检测文字方向,0、90、180、270 -from angle.predict import predict as angle_detect # 文字方向检测 -from ocr.model import predict as ocr -from ctpn.text_detect import text_detect -from crnn.crnn import crnnOcr -from math import * - -import cv2 -import numpy as np -from PIL import Image -import sys - -sys.path.append("ocr") - - -def crnnRec(im, text_recs, ocrMode='keras', adjust=False): - """ - crnn模型,ocr识别 - @@model, - @@converter, - @@im:Array - @@text_recs:text box - - """ - index = 0 - results = {} - xDim, yDim = im.shape[1], im.shape[0] - - for index, rec in enumerate(text_recs): - results[index] = [ - rec, - ] - xlength = int((rec[6] - rec[0]) * 0.1) - ylength = int((rec[7] - rec[1]) * 0.2) - if adjust: - pt1 = (max(1, rec[0] - xlength), max(1, rec[1] - ylength)) - pt2 = (rec[2], rec[3]) - pt3 = (min(rec[6] + xlength, xDim - 2), - min(yDim - 2, rec[7] + ylength)) - pt4 = (rec[4], rec[5]) - else: - pt1 = (max(1, rec[0]), max(1, rec[1])) - pt2 = (rec[2], rec[3]) - pt3 = (min(rec[6], xDim - 2), min(yDim - 2, rec[7])) - pt4 = (rec[4], rec[5]) - - degree = degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])) # 图像倾斜角度 - - partImg = dumpRotateImage(im, degree, pt1, pt2, pt3, pt4) - # 根据ctpn进行识别出的文字区域,进行不同文字区域的crnn识别 - image = Image.fromarray(partImg).convert('L') - # 进行识别出的文字识别 - if ocrMode == 'keras': - sim_pred = ocr(image) - else: - sim_pred = crnnOcr(image) - - results[index].append(sim_pred) # 识别文字 - - return results - - -def dumpRotateImage(img, degree, pt1, pt2, pt3, pt4): - height, width = img.shape[:2] - heightNew = int(width * fabs(sin(radians(degree))) + - height * fabs(cos(radians(degree)))) - widthNew = int(height * fabs(sin(radians(degree))) + - width * fabs(cos(radians(degree)))) - matRotation = cv2.getRotationMatrix2D((width / 2, height / 2), degree, 1) - matRotation[0, 2] += (widthNew - width) / 2 - matRotation[1, 2] += (heightNew - height) / 2 - imgRotation = cv2.warpAffine( - img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255)) - pt1 = list(pt1) - pt3 = list(pt3) - - [[pt1[0]], [pt1[1]]] = np.dot(matRotation, - np.array([[pt1[0]], [pt1[1]], [1]])) - [[pt3[0]], [pt3[1]]] = np.dot(matRotation, - np.array([[pt3[0]], [pt3[1]], [1]])) - ydim, xdim = imgRotation.shape[:2] - imgOut = imgRotation[max(1, int(pt1[1])):min(ydim - 1, int(pt3[1])), - max(1, int(pt1[0])):min(xdim - 1, int(pt3[0]))] - # height,width=imgOut.shape[:2] - return imgOut - - -def model(img, model='keras', adjust=False, detectAngle=False): - """ - @@param:img, - @@param:model,选择的ocr模型,支持keras\pytorch版本 - @@param:adjust 调整文字识别结果 - @@param:detectAngle,是否检测文字朝向 - - """ - angle = 0 - if detectAngle: - # 进行文字旋转方向检测,分为[0, 90, 180, 270]四种情况 - angle = angle_detect(img=np.copy(img)) # 文字朝向检测 - print('The angel of this character is:', angle) - im = Image.fromarray(img) - print('Rotate the array of this img!') - if angle == 90: - im = im.transpose(Image.ROTATE_90) - elif angle == 180: - im = im.transpose(Image.ROTATE_180) - elif angle == 270: - im = im.transpose(Image.ROTATE_270) - img = np.array(im) - # 进行图像中的文字区域的识别 - text_recs, tmp, img = text_detect(img) - # 识别区域排列 - text_recs = sort_box(text_recs) - # - result = crnnRec(img, text_recs, model, adjust=adjust) - return result, tmp, angle - - -def sort_box(box): - """ - 对box排序,及页面进行排版 - text_recs[index, 0] = x1 - text_recs[index, 1] = y1 - text_recs[index, 2] = x2 - text_recs[index, 3] = y2 - text_recs[index, 4] = x3 - text_recs[index, 5] = y3 - text_recs[index, 6] = x4 - text_recs[index, 7] = y4 - """ - - box = sorted(box, key=lambda x: sum([x[1], x[3], x[5], x[7]])) - return box -# coding:utf-8 -# 添加文本方向 检测模型,自动检测文字方向,0、90、180、270 -# pytorch版本的OCR识别 -from math import * - -import cv2 -import numpy as np -from PIL import Image - -from angle.predict import predict as angle_detect # 文字方向检测 -from crnn.crnn import crnnOcr -from ctpn.text_detect import text_detect - - -def crnnRec(im, text_recs, adjust=False): - """ - crnn模型,ocr识别 - @@model, - @@converter, - @@im:Array - @@text_recs:text box - - """ - index = 0 - results = {} - xDim, yDim = im.shape[1], im.shape[0] - - for index, rec in enumerate(text_recs): - results[index] = [rec, ] - xlength = int((rec[6] - rec[0]) * 0.1) - ylength = int((rec[7] - rec[1]) * 0.2) - if adjust: - pt1 = (max(1, rec[0] - xlength), max(1, rec[1] - ylength)) - pt2 = (rec[2], rec[3]) - pt3 = (min(rec[6] + xlength, xDim - 2), - min(yDim - 2, rec[7] + ylength)) - pt4 = (rec[4], rec[5]) - else: - pt1 = (max(1, rec[0]), max(1, rec[1])) - pt2 = (rec[2], rec[3]) - pt3 = (min(rec[6], xDim - 2), min(yDim - 2, rec[7])) - pt4 = (rec[4], rec[5]) - - degree = degrees(atan2(pt2[1] - pt1[1], pt2[0] - pt1[0])) # 图像倾斜角度 - - partImg = dumpRotateImage(im, degree, pt1, pt2, pt3, pt4) - - image = Image.fromarray(partImg).convert('L') - sim_pred = crnnOcr(image) - results[index].append(sim_pred) # 识别文字 - - return results - - -def dumpRotateImage(img, degree, pt1, pt2, pt3, pt4): - height, width = img.shape[:2] - heightNew = int(width * fabs(sin(radians(degree))) + - height * fabs(cos(radians(degree)))) - widthNew = int(height * fabs(sin(radians(degree))) + - width * fabs(cos(radians(degree)))) - matRotation = cv2.getRotationMatrix2D((width / 2, height / 2), degree, 1) - matRotation[0, 2] += (widthNew - width) / 2 - matRotation[1, 2] += (heightNew - height) / 2 - imgRotation = cv2.warpAffine( - img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255)) - pt1 = list(pt1) - pt3 = list(pt3) - - [[pt1[0]], [pt1[1]]] = np.dot( - matRotation, np.array([[pt1[0]], [pt1[1]], [1]])) - [[pt3[0]], [pt3[1]]] = np.dot( - matRotation, np.array([[pt3[0]], [pt3[1]], [1]])) - ydim, xdim = imgRotation.shape[:2] - imgOut = imgRotation[max(1, int(pt1[1])):min( - ydim - 1, int(pt3[1])), max(1, int(pt1[0])):min(xdim - 1, int(pt3[0]))] - # height,width=imgOut.shape[:2] - return imgOut - - -def model(img, adjust=False, detectAngle=False): - """ - @@param:img, - @@param:model,选择的ocr模型,支持keras\pytorch版本 - @@param:adjust 调整文字识别结果 - @@param:detectAngle,是否检测文字朝向 - - """ - angle = 0 - if detectAngle: - angle = angle_detect(img=np.copy(img)) # 文字朝向检测 - im = Image.fromarray(img) - if angle == 90: - im = im.transpose(Image.ROTATE_90) - elif angle == 180: - im = im.transpose(Image.ROTATE_180) - elif angle == 270: - im = im.transpose(Image.ROTATE_270) - img = np.array(im) - - text_recs, tmp, img = text_detect(img) - text_recs = sort_box(text_recs) - result = crnnRec(img, text_recs, model, adjust=adjust) - return result, tmp, angle - - -def sort_box(box): - """ - 对box排序,及页面进行排版 - text_recs[index, 0] = x1 - text_recs[index, 1] = y1 - text_recs[index, 2] = x2 - text_recs[index, 3] = y2 - text_recs[index, 4] = x3 - text_recs[index, 5] = y3 - text_recs[index, 6] = x4 - text_recs[index, 7] = y4 - """ - - box = sorted(box, key=lambda x: sum([x[1], x[3], x[5], x[7]])) - return box -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -图像文字方向检测 -@author: xiaofeng -""" -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# _Author_: xiaofeng -# Date: 2018-04-22 18:13:46 -# Last Modified by: xiaofeng -# Last Modified time: 2018-04-22 18:13:46 -''' -根据给定的图形,分析文字的朝向 -''' -# from keras.models import load_model -import numpy as np -from PIL import Image -from keras.applications.vgg16 import preprocess_input, VGG16 -from keras.layers import Dense -from keras.models import Model -# 编译模型,以较小的学习参数进行训练 -from keras.optimizers import SGD - - -def load(): - vgg = VGG16(weights=None, input_shape=(224, 224, 3)) - # 修改输出层 3个输出 - x = vgg.layers[-2].output - predictions_class = Dense( - 4, activation='softmax', name='predictions_class')(x) - prediction = [predictions_class] - model = Model(inputs=vgg.input, outputs=prediction) - sgd = SGD(lr=0.00001, momentum=0.9) - model.compile( - optimizer=sgd, loss='categorical_crossentropy', metrics=['accuracy']) - model.load_weights( - '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/angle/modelAngle.h5') - return model - - -# 加载模型 -model = None - - -def predict(path=None, img=None): - global model - if model is None: - model = load() - """ - 图片文字方向预测 - """ - ROTATE = [0, 90, 180, 270] - if path is not None: - im = Image.open(path).convert('RGB') - elif img is not None: - im = Image.fromarray(img).convert('RGB') - w, h = im.size - # 对图像进行剪裁 - # 左上角(int(0.1 * w), int(0.1 * h)) - # 右下角(w - int(0.1 * w), h - int(0.1 * h)) - xmin, ymin, xmax, ymax = int(0.1 * w), int( - 0.1 * h), w - int(0.1 * w), h - int(0.1 * h) - im = im.crop((xmin, ymin, xmax, ymax)) # 剪切图片边缘,清除边缘噪声 - # 对图片进行剪裁之后进行resize成(224,224) - im = im.resize((224, 224)) - # 将图像转化成数组形式 - img = np.array(im) - img = preprocess_input(img.astype(np.float32)) - pred = model.predict(np.array([img])) - index = np.argmax(pred, axis=1)[0] - return ROTATE[index] -# coding:utf-8 -import cv2 -from math import * -import keys_crnn -import models.crnn as crnn -import dataset -import util -import numpy as np -from torch.autograd import Variable -import torch.utils.data -import torch -import sys - -sys.path.insert(1, "./crnn") - -GPU = False - - -def dumpRotateImage_(img, degree, pt1, pt2, pt3, pt4): - height, width = img.shape[:2] - heightNew = int(width * fabs(sin(radians(degree))) + - height * fabs(cos(radians(degree)))) - widthNew = int(height * fabs(sin(radians(degree))) + - width * fabs(cos(radians(degree)))) - matRotation = cv2.getRotationMatrix2D((width / 2, height / 2), degree, 1) - matRotation[0, 2] += (widthNew - width) / 2 - matRotation[1, 2] += (heightNew - height) / 2 - imgRotation = cv2.warpAffine( - img, matRotation, (widthNew, heightNew), borderValue=(255, 255, 255)) - pt1 = list(pt1) - pt3 = list(pt3) - - [[pt1[0]], [pt1[1]]] = np.dot( - matRotation, np.array([[pt1[0]], [pt1[1]], [1]])) - [[pt3[0]], [pt3[1]]] = np.dot( - matRotation, np.array([[pt3[0]], [pt3[1]], [1]])) - imgOut = imgRotation[int(pt1[1]):int(pt3[1]), int(pt1[0]):int(pt3[0])] - height, width = imgOut.shape[:2] - return imgOut - - -def crnnSource(): - alphabet = keys_crnn.alphabet - converter = util.strLabelConverter(alphabet) - if torch.cuda.is_available() and GPU: - model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda() - else: - model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cpu() - path = './crnn/samples/model_acc97.pth' - model.eval() - model.load_state_dict(torch.load(path)) - return model, converter - - -# 加载模型 -model, converter = crnnSource() - - -def crnnOcr(image): - """ - crnn模型,ocr识别 - @@model, - @@converter, - @@im - @@text_recs:text box - - """ - scale = image.size[1] * 1.0 / 32 - w = image.size[0] / scale - w = int(w) - # print "im size:{},{}".format(image.size,w) - transformer = dataset.resizeNormalize((w, 32)) - if torch.cuda.is_available() and GPU: - image = transformer(image).cuda() - else: - image = transformer(image).cpu() - - image = image.view(1, *image.size()) - image = Variable(image) - model.eval() - preds = model(image) - _, preds = preds.max(2) - preds = preds.transpose(1, 0).contiguous().view(-1) - preds_size = Variable(torch.IntTensor([preds.size(0)])) - sim_pred = converter.decode(preds.data, preds_size.data, raw=False) - if len(sim_pred) > 0: - if sim_pred[0] == u'-': - sim_pred = sim_pred[1:] - - return sim_pred -#!/usr/bin/python -# encoding: utf-8 - -import random -import sys - -import lmdb -import numpy as np -import six -import torch -import torchvision.transforms as transforms -from PIL import Image -from torch.utils.data import Dataset -from torch.utils.data import sampler - - -class lmdbDataset(Dataset): - def __init__(self, root=None, transform=None, target_transform=None): - self.env = lmdb.open( - root, - max_readers=1, - readonly=True, - lock=False, - readahead=False, - meminit=False) - - if not self.env: - print('cannot creat lmdb from %s' % (root)) - sys.exit(0) - - with self.env.begin(write=False) as txn: - nSamples = int(txn.get('num-samples')) - self.nSamples = nSamples - - self.transform = transform - self.target_transform = target_transform - - def __len__(self): - return self.nSamples - - def __getitem__(self, index): - assert index <= len(self), 'index range error' - index += 1 - with self.env.begin(write=False) as txn: - img_key = 'image-%09d' % index - imgbuf = txn.get(img_key) - - buf = six.BytesIO() - buf.write(imgbuf) - buf.seek(0) - try: - img = Image.open(buf).convert('L') - except IOError: - print('Corrupted image for %d' % index) - return self[index + 1] - - if self.transform is not None: - img = self.transform(img) - - label_key = 'label-%09d' % index - label = str(txn.get(label_key)) - if self.target_transform is not None: - label = self.target_transform(label) - - return (img, label) - - -class resizeNormalize(object): - def __init__(self, size, interpolation=Image.BILINEAR): - self.size = size - self.interpolation = interpolation - self.toTensor = transforms.ToTensor() - - def __call__(self, img): - img = img.resize(self.size, self.interpolation) - img = self.toTensor(img) - img.sub_(0.5).div_(0.5) - return img - - -class randomSequentialSampler(sampler.Sampler): - def __init__(self, data_source, batch_size): - self.num_samples = len(data_source) - self.batch_size = batch_size - - def __iter__(self): - n_batch = len(self) // self.batch_size - tail = len(self) % self.batch_size - index = torch.LongTensor(len(self)).fill_(0) - for i in range(n_batch): - random_start = random.randint(0, len(self) - self.batch_size) - batch_index = random_start + torch.range(0, self.batch_size - 1) - index[i * self.batch_size:(i + 1) * self.batch_size] = batch_index - # deal with tail - if tail: - random_start = random.randint(0, len(self) - self.batch_size) - tail_index = random_start + torch.range(0, tail - 1) - index[(i + 1) * self.batch_size:] = tail_index - - return iter(index) - - def __len__(self): - return self.num_samples - - -class alignCollate(object): - def __init__(self, imgH=32, imgW=128, keep_ratio=False, min_ratio=1): - self.imgH = imgH - self.imgW = imgW - self.keep_ratio = keep_ratio - self.min_ratio = min_ratio - - def __call__(self, batch): - images, labels = zip(*batch) - - imgH = self.imgH - imgW = self.imgW - if self.keep_ratio: - ratios = [] - for image in images: - w, h = image.size - ratios.append(w / float(h)) - ratios.sort() - max_ratio = ratios[-1] - imgW = int(np.floor(max_ratio * imgH)) - imgW = max(imgH * self.min_ratio, imgW) # assure imgH >= imgW - - transform = resizeNormalize((imgW, imgH)) - images = [transform(image) for image in images] - images = torch.cat([t.unsqueeze(0) for t in images], 0) - - return images, labels -# coding:UTF-8 -alphabet = u'\'疗绚诚娇溜题贿者廖更纳加奉公一就汴计与路房原妇208-7其>:],,骑刈全消昏傈安久钟嗅不影处驽蜿资关椤地瘸专问忖票嫉炎韵要月田节陂鄙捌备拳伺眼网盎大傍心东愉汇蹿科每业里航晏字平录先13彤鲶产稍督腴有象岳注绍在泺文定核名水过理让偷率等这发”为含肥酉相鄱七编猥锛日镀蒂掰倒辆栾栗综涩州雌滑馀了机块司宰甙兴矽抚保用沧秩如收息滥页疑埠!!姥异橹钇向下跄的椴沫国绥獠报开民蜇何分凇长讥藏掏施羽中讲派嘟人提浼间世而古多倪唇饯控庚首赛蜓味断制觉技替艰溢潮夕钺外摘枋动双单啮户枇确锦曜杜或能效霜盒然侗电晁放步鹃新杖蜂吒濂瞬评总隍对独合也是府青天诲墙组滴级邀帘示已时骸仄泅和遨店雇疫持巍踮境只亨目鉴崤闲体泄杂作般轰化解迂诿蛭璀腾告版服省师小规程线海办引二桧牌砺洄裴修图痫胡许犊事郛基柴呼食研奶律蛋因葆察戏褒戒再李骁工貂油鹅章啄休场给睡纷豆器捎说敏学会浒设诊格廓查来霓室溆¢诡寥焕舜柒狐回戟砾厄实翩尿五入径惭喹股宇篝|;美期云九祺扮靠锝槌系企酰阊暂蚕忻豁本羹执条钦H獒限进季楦于芘玖铋茯未答粘括样精欠矢甥帷嵩扣令仔风皈行支部蓉刮站蜡救钊汗松嫌成可.鹤院从交政怕活调球局验髌第韫谗串到圆年米/*友忿检区看自敢刃个兹弄流留同没齿星聆轼湖什三建蛔儿椋汕震颧鲤跟力情璺铨陪务指族训滦鄣濮扒商箱十召慷辗所莞管护臭横硒嗓接侦六露党馋驾剖高侬妪幂猗绺骐央酐孝筝课徇缰门男西项句谙瞒秃篇教碲罚声呐景前富嘴鳌稀免朋啬睐去赈鱼住肩愕速旁波厅健茼厥鲟谅投攸炔数方击呋谈绩别愫僚躬鹧胪炳招喇膨泵蹦毛结54谱识陕粽婚拟构且搜任潘比郢妨醪陀桔碘扎选哈骷楷亿明缆脯监睫逻婵共赴淝凡惦及达揖谩澹减焰蛹番祁柏员禄怡峤龙白叽生闯起细装谕竟聚钙上导渊按艾辘挡耒盹饪臀记邮蕙受各医搂普滇朗茸带翻酚(光堤墟蔷万幻〓瑙辈昧盏亘蛀吉铰请子假闻税井诩哨嫂好面琐校馊鬣缂营访炖占农缀否经钚棵趟张亟吏茶谨捻论迸堂玉信吧瞠乡姬寺咬溏苄皿意赉宝尔钰艺特唳踉都荣倚登荐丧奇涵批炭近符傩感道着菊虹仲众懈濯颞眺南释北缝标既茗整撼迤贲挎耱拒某妍卫哇英矶藩治他元领膜遮穗蛾飞荒棺劫么市火温拈棚洼转果奕卸迪伸泳斗邡侄涨屯萋胭氡崮枞惧冒彩斜手豚随旭淑妞形菌吲沱争驯歹挟兆柱传至包内响临红功弩衡寂禁老棍耆渍织害氵渑布载靥嗬虽苹咨娄库雉榜帜嘲套瑚亲簸欧边6腿旮抛吹瞳得镓梗厨继漾愣憨士策窑抑躯襟脏参贸言干绸鳄穷藜音折详)举悍甸癌黎谴死罩迁寒驷袖媒蒋掘模纠恣观祖蛆碍位稿主澧跌筏京锏帝贴证糠才黄鲸略炯饱四出园犀牧容汉杆浈汰瑷造虫瘩怪驴济应花沣谔夙旅价矿以考su呦晒巡茅准肟瓴詹仟褂译桌混宁怦郑抿些余鄂饴攒珑群阖岔琨藓预环洮岌宀杲瀵最常囡周踊女鼓袭喉简范薯遐疏粱黜禧法箔斤遥汝奥直贞撑置绱集她馅逗钧橱魉[恙躁唤9旺膘待脾惫购吗依盲度瘿蠖俾之镗拇鲵厝簧续款展啃表剔品钻腭损清锶统涌寸滨贪链吠冈伎迥咏吁览防迅失汾阔逵绀蔑列川凭努熨揪利俱绉抢鸨我即责膦易毓鹊刹玷岿空嘞绊排术估锷违们苟铜播肘件烫审鲂广像铌惰铟巳胍鲍康憧色恢想拷尤疳知SYFDA峄裕帮握搔氐氘难墒沮雨叁缥悴藐湫娟苑稠颛簇后阕闭蕤缚怎佞码嘤蔡痊舱螯帕赫昵升烬岫、疵蜻髁蕨隶烛械丑盂梁强鲛由拘揉劭龟撤钩呕孛费妻漂求阑崖秤甘通深补赃坎床啪承吼量暇钼烨阂擎脱逮称P神属矗华届狍葑汹育患窒蛰佼静槎运鳗庆逝曼疱克代官此麸耧蚌晟例础榛副测唰缢迹灬霁身岁赭扛又菡乜雾板读陷徉贯郁虑变钓菜圾现琢式乐维渔浜左吾脑钡警T啵拴偌漱湿硕止骼魄积燥联踢玛|则窿见振畿送班钽您赵刨印讨踝籍谡舌崧汽蔽沪酥绒怖财帖肱私莎勋羔霸励哼帐将帅渠纪婴娩岭厘滕吻伤坝冠戊隆瘁介涧物黍并姗奢蹑掣垸锴命箍捉病辖琰眭迩艘绌繁寅若毋思诉类诈燮轲酮狂重反职筱县委磕绣奖晋濉志徽肠呈獐坻口片碰几村柿劳料获亩惕晕厌号罢池正鏖煨家棕复尝懋蜥锅岛扰队坠瘾钬@卧疣镇譬冰彷频黯据垄采八缪瘫型熹砰楠襁箐但嘶绳啤拍盥穆傲洗盯塘怔筛丿台恒喂葛永¥烟酒桦书砂蚝缉态瀚袄圳轻蛛超榧遛姒奘铮右荽望偻卡丶氰附做革索戚坨桷唁垅榻岐偎坛莨山殊微骇陈爨推嗝驹澡藁呤卤嘻糅逛侵郓酌德摇※鬃被慨殡羸昌泡戛鞋河宪沿玲鲨翅哽源铅语照邯址荃佬顺鸳町霭睾瓢夸椁晓酿痈咔侏券噎湍签嚷离午尚社锤背孟使浪缦潍鞅军姹驶笑鳟鲁》孽钜绿洱礴焯椰颖囔乌孔巴互性椽哞聘昨早暮胶炀隧低彗昝铁呓氽藉喔癖瑗姨权胱韦堑蜜酋楝砝毁靓歙锲究屋喳骨辨碑武鸠宫辜烊适坡殃培佩供走蜈迟翼况姣凛浔吃飘债犟金促苛崇坂莳畔绂兵蠕斋根砍亢欢恬崔剁餐榫快扶‖濒缠鳜当彭驭浦篮昀锆秸钳弋娣瞑夷龛苫拱致%嵊障隐弑初娓抉汩累蓖"唬助苓昙押毙破城郧逢嚏獭瞻溱婿赊跨恼璧萃姻貉灵炉密氛陶砸谬衔点琛沛枳层岱诺脍榈埂征冷裁打蹴素瘘逞蛐聊激腱萘踵飒蓟吆取咙簋涓矩曝挺揣座你史舵焱尘苏笈脚溉榨诵樊邓焊义庶儋蟋蒲赦呷杞诠豪还试颓茉太除紫逃痴草充鳕珉祗墨渭烩蘸慕璇镶穴嵘恶骂险绋幕碉肺戳刘潞秣纾潜銮洛须罘销瘪汞兮屉r林厕质探划狸殚善煊烹〒锈逯宸辍泱柚袍远蹋嶙绝峥娥缍雀徵认镱谷=贩勉撩鄯斐洋非祚泾诒饿撬威晷搭芍锥笺蓦候琊档礁沼卵荠忑朝凹瑞头仪弧孵畏铆突衲车浩气茂悖厢枕酝戴湾邹飚攘锂写宵翁岷无喜丈挑嗟绛殉议槽具醇淞笃郴阅饼底壕砚弈询缕庹翟零筷暨舟闺甯撞麂茌蔼很珲捕棠角阉媛娲诽剿尉爵睬韩诰匣危糍镯立浏阳少盆舔擘匪申尬铣旯抖赘瓯居ˇ哮游锭茏歌坏甚秒舞沙仗劲潺阿燧郭嗖霏忠材奂耐跺砀输岖媳氟极摆灿今扔腻枝奎药熄吨话q额慑嘌协喀壳埭视著於愧陲翌峁颅佛腹聋侯咎叟秀颇存较罪哄岗扫栏钾羌己璨枭霉煌涸衿键镝益岢奏连夯睿冥均糖狞蹊稻爸刿胥煜丽肿璃掸跚灾垂樾濑乎莲窄犹撮战馄软络显鸢胸宾妲恕埔蝌份遇巧瞟粒恰剥桡博讯凯堇阶滤卖斌骚彬兑磺樱舷两娱福仃差找桁÷净把阴污戬雷碓蕲楚罡焖抽妫咒仑闱尽邑菁爱贷沥鞑牡嗉崴骤塌嗦订拮滓捡锻次坪杩臃箬融珂鹗宗枚降鸬妯阄堰盐毅必杨崃俺甬状莘货耸菱腼铸唏痤孚澳懒溅翘疙杷淼缙骰喊悉砻坷艇赁界谤纣宴晃茹归饭梢铡街抄肼鬟苯颂撷戈炒咆茭瘙负仰客琉铢封卑珥椿镧窨鬲寿御袤铃萎砖餮脒裳肪孕嫣馗嵇恳氯江石褶冢祸阻狈羞银靳透咳叼敷芷啥它瓤兰痘懊逑肌往捺坊甩呻〃沦忘膻祟菅剧崆智坯臧霍墅攻眯倘拢骠铐庭岙瓠′缺泥迢捶??郏喙掷沌纯秘种听绘固螨团香盗妒埚蓝拖旱荞铀血遏汲辰叩拽幅硬惶桀漠措泼唑齐肾念酱虚屁耶旗砦闵婉馆拭绅韧忏窝醋葺顾辞倜堆辋逆玟贱疾董惘倌锕淘嘀莽俭笏绑鲷杈择蟀粥嗯驰逾案谪褓胫哩昕颚鲢绠躺鹄崂儒俨丝尕泌啊萸彰幺吟骄苣弦脊瑰〈诛镁析闪剪侧哟框螃守嬗燕狭铈缮概迳痧鲲俯售笼痣扉挖满咋援邱扇歪便玑绦峡蛇叨〖泽胃斓喋怂坟猪该蚬炕弥赞棣晔娠挲狡创疖铕镭稷挫弭啾翔粉履苘哦楼秕铂土锣瘟挣栉习享桢袅磨桂谦延坚蔚噗署谟猬钎恐嬉雒倦衅亏璩睹刻殿王算雕麻丘柯骆丸塍谚添鲈垓桎蚯芥予飕镦谌窗醚菀亮搪莺蒿羁足J真轶悬衷靛翊掩哒炅掐冼妮l谐稚荆擒犯陵虏浓崽刍陌傻孜千靖演矜钕煽杰酗渗伞栋俗泫戍罕沾疽灏煦芬磴叱阱榉湃蜀叉醒彪租郡篷屎良垢隗弱陨峪砷掴颁胎雯绵贬沐撵隘篙暖曹陡栓填臼彦瓶琪潼哪鸡摩啦俟锋域耻蔫疯纹撇毒绶痛酯忍爪赳歆嘹辕烈册朴钱吮毯癜娃谀邵厮炽璞邃丐追词瓒忆轧芫谯喷弟半冕裙掖墉绮寝苔势顷褥切衮君佳嫒蚩霞佚洙逊镖暹唛&殒顶碗獗轭铺蛊废恹汨崩珍那杵曲纺夏薰傀闳淬姘舀拧卷楂恍讪厩寮篪赓乘灭盅鞣沟慎挂饺鼾杳树缨丛絮娌臻嗳篡侩述衰矛圈蚜匕筹匿濞晨叶骋郝挚蚴滞增侍描瓣吖嫦蟒匾圣赌毡癞恺百曳需篓肮庖帏卿驿遗蹬鬓骡歉芎胳屐禽烦晌寄媾狄翡苒船廉终痞殇々畦饶改拆悻萄£瓿乃訾桅匮溧拥纱铍骗蕃龋缬父佐疚栎醍掳蓄x惆颜鲆榆〔猎敌暴谥鲫贾罗玻缄扦芪癣落徒臾恿猩托邴肄牵春陛耀刊拓蓓邳堕寇枉淌啡湄兽酷萼碚濠萤夹旬戮梭琥椭昔勺蜊绐晚孺僵宣摄冽旨萌忙蚤眉噼蟑付契瓜悼颡壁曾窕颢澎仿俑浑嵌浣乍碌褪乱蔟隙玩剐葫箫纲围伐决伙漩瑟刑肓镳缓蹭氨皓典畲坍铑檐塑洞倬储胴淳戾吐灼惺妙毕珐缈虱盖羰鸿磅谓髅娴苴唷蚣霹抨贤唠犬誓逍庠逼麓籼釉呜碧秧氩摔霄穸纨辟妈映完牛缴嗷炊恩荔茆掉紊慌莓羟阙萁磐另蕹辱鳐湮吡吩唐睦垠舒圜冗瞿溺芾囱匠僳汐菩饬漓黑霰浸濡窥毂蒡兢驻鹉芮诙迫雳厂忐臆猴鸣蚪栈箕羡渐莆捍眈哓趴蹼埕嚣骛宏淄斑噜严瑛垃椎诱压庾绞焘廿抡迄棘夫纬锹眨瞌侠脐竞瀑孳骧遁姜颦荪滚萦伪逸粳爬锁矣役趣洒颔诏逐奸甭惠攀蹄泛尼拼阮鹰亚颈惑勒〉际肛爷刚钨丰养冶鲽辉蔻画覆皴妊麦返醉皂擀〗酶凑粹悟诀硖港卜z杀涕±舍铠抵弛段敝镐奠拂轴跛袱et沉菇俎薪峦秭蟹历盟菠寡液肢喻染裱悱抱氙赤捅猛跑氮谣仁尺辊窍烙衍架擦倏璐瑁币楞胖夔趸邛惴饕虔蝎§哉贝宽辫炮扩饲籽魏菟锰伍猝末琳哚蛎邂呀姿鄞却歧仙恸椐森牒寤袒婆虢雅钉朵贼欲苞寰故龚坭嘘咫礼硷兀睢汶’铲烧绕诃浃钿哺柜讼颊璁腔洽咐脲簌筠镣玮鞠谁兼姆挥梯蝴谘漕刷躏宦弼b垌劈麟莉揭笙渎仕嗤仓配怏抬错泯镊孰猿邪仍秋鼬壹歇吵炼<尧射柬廷胧霾凳隋肚浮梦祥株堵退L鹫跎凶毽荟炫栩玳甜沂鹿顽伯爹赔蛴徐匡欣狰缸雹蟆疤默沤啜痂衣禅wih辽葳黝钗停沽棒馨颌肉吴硫悯劾娈马啧吊悌镑峭帆瀣涉咸疸滋泣翦拙癸钥蜒+尾庄凝泉婢渴谊乞陆锉糊鸦淮IBN晦弗乔庥葡尻席橡傣渣拿惩麋斛缃矮蛏岘鸽姐膏催奔镒喱蠡摧钯胤柠拐璋鸥卢荡倾^_珀逄萧塾掇贮笆聂圃冲嵬M滔笕值炙偶蜱搐梆汪蔬腑鸯蹇敞绯仨祯谆梧糗鑫啸豺囹猾巢柄瀛筑踌沭暗苁鱿蹉脂蘖牢热木吸溃宠序泞偿拜檩厚朐毗螳吞媚朽担蝗橘畴祈糟盱隼郜惜珠裨铵焙琚唯咚噪骊丫滢勤棉呸咣淀隔蕾窈饨挨煅短匙粕镜赣撕墩酬馁豌颐抗酣氓佑搁哭递耷涡桃贻碣截瘦昭镌蔓氚甲猕蕴蓬散拾纛狼猷铎埋旖矾讳囊糜迈粟蚂紧鲳瘢栽稼羊锄斟睁桥瓮蹙祉醺鼻昱剃跳篱跷蒜翎宅晖嗑壑峻癫屏狠陋袜途憎祀莹滟佶溥臣约盛峰磁慵婪拦莅朕鹦粲裤哎疡嫖琵窟堪谛嘉儡鳝斩郾驸酊妄胜贺徙傅噌钢栅庇恋匝巯邈尸锚粗佟蛟薹纵蚊郅绢锐苗俞篆淆膀鲜煎诶秽寻涮刺怀噶巨褰魅灶灌桉藕谜舸薄搀恽借牯痉渥愿亓耘杠柩锔蚶钣珈喘蹒幽赐稗晤莱泔扯肯菪裆腩豉疆骜腐倭珏唔粮亡润慰伽橄玄誉醐胆龊粼塬陇彼削嗣绾芽妗垭瘴爽薏寨龈泠弹赢漪猫嘧涂恤圭茧烽屑痕巾赖荸凰腮畈亵蹲偃苇澜艮换骺烘苕梓颉肇哗悄氤涠葬屠鹭植竺佯诣鲇瘀鲅邦移滁冯耕癔戌茬沁巩悠湘洪痹锟循谋腕鳃钠捞焉迎碱伫急榷奈邝卯辄皲卟醛畹忧稳雄昼缩阈睑扌耗曦涅捏瞧邕淖漉铝耦禹湛喽莼琅诸苎纂硅始嗨傥燃臂赅嘈呆贵屹壮肋亍蚀卅豹腆邬迭浊}童螂捐圩勐触寞汊壤荫膺渌芳懿遴螈泰蓼蛤茜舅枫朔膝眙避梅判鹜璜牍缅垫藻黔侥惚懂踩腰腈札丞唾慈顿摹荻琬~斧沈滂胁胀幄莜Z匀鄄掌绰茎焚赋萱谑汁铒瞎夺蜗野娆冀弯篁懵灞隽芡脘俐辩芯掺喏膈蝈觐悚踹蔗熠鼠呵抓橼峨畜缔禾崭弃熊摒凸拗穹蒙抒祛劝闫扳阵醌踪喵侣搬仅荧赎蝾琦买婧瞄寓皎冻赝箩莫瞰郊笫姝筒枪遣煸袋舆痱涛母〇启践耙绲盘遂昊搞槿诬纰泓惨檬亻越Co憩熵祷钒暧塔阗胰咄娶魔琶钞邻扬杉殴咽弓〆髻】吭揽霆拄殖脆彻岩芝勃辣剌钝嘎甄佘皖伦授徕憔挪皇庞稔芜踏溴兖卒擢饥鳞煲‰账颗叻斯捧鳍琮讹蛙纽谭酸兔莒睇伟觑羲嗜宜褐旎辛卦诘筋鎏溪挛熔阜晰鳅丢奚灸呱献陉黛鸪甾萨疮拯洲疹辑叙恻谒允柔烂氏逅漆拎惋扈湟纭啕掬擞哥忽涤鸵靡郗瓷扁廊怨雏钮敦E懦憋汀拚啉腌岸f痼瞅尊咀眩飙忌仝迦熬毫胯篑茄腺凄舛碴锵诧羯後漏汤宓仞蚁壶谰皑铄棰罔辅晶苦牟闽\烃饮聿丙蛳朱煤涔鳖犁罐荼砒淦妤黏戎孑婕瑾戢钵枣捋砥衩狙桠稣阎肃梏诫孪昶婊衫嗔侃塞蜃樵峒貌屿欺缫阐栖诟珞荭吝萍嗽恂啻蜴磬峋俸豫谎徊镍韬魇晴U囟猜蛮坐囿伴亭肝佗蝠妃胞滩榴氖垩苋砣扪馏姓轩厉夥侈禀垒岑赏钛辐痔披纸碳“坞蠓挤荥沅悔铧帼蒌蝇apyng哀浆瑶凿桶馈皮奴苜佤伶晗铱炬优弊氢恃甫攥端锌灰稹炝曙邋亥眶碾拉萝绔捷浍腋姑菖凌涞麽锢桨潢绎镰殆锑渝铬困绽觎匈糙暑裹鸟盔肽迷綦『亳佝俘钴觇骥仆疝跪婶郯瀹唉脖踞针晾忒扼瞩叛椒疟嗡邗肆跆玫忡捣咧唆艄蘑潦笛阚沸泻掊菽贫斥髂孢镂赂麝鸾屡衬苷恪叠希粤爻喝茫惬郸绻庸撅碟宄妹膛叮饵崛嗲椅冤搅咕敛尹垦闷蝉霎勰败蓑泸肤鹌幌焦浠鞍刁舰乙竿裔。茵函伊兄丨娜匍謇莪宥似蝽翳酪翠粑薇祢骏赠叫Q噤噻竖芗莠潭俊羿耜O郫趁嗪囚蹶芒洁笋鹑敲硝啶堡渲揩』携宿遒颍扭棱割萜蔸葵琴捂饰衙耿掠募岂窖涟蔺瘤柞瞪怜匹距楔炜哆秦缎幼茁绪痨恨楸娅瓦桩雪嬴伏榔妥铿拌眠雍缇‘卓搓哌觞噩屈哧髓咦巅娑侑淫膳祝勾姊莴胄疃薛蜷胛巷芙芋熙闰勿窃狱剩钏幢陟铛慧靴耍k浙浇飨惟绗祜澈啼咪磷摞诅郦抹跃壬吕肖琏颤尴剡抠凋赚泊津宕殷倔氲漫邺涎怠$垮荬遵俏叹噢饽蜘孙筵疼鞭羧牦箭潴c眸祭髯啖坳愁芩驮倡巽穰沃胚怒凤槛剂趵嫁v邢灯鄢桐睽檗锯槟婷嵋圻诗蕈颠遭痢芸怯馥竭锗徜恭遍籁剑嘱苡龄僧桑潸弘澶楹悲讫愤腥悸谍椹呢桓葭攫阀翰躲敖柑郎笨橇呃魁燎脓葩磋垛玺狮沓砜蕊锺罹蕉翱虐闾巫旦茱嬷枯鹏贡芹汛矫绁拣禺佃讣舫惯乳趋疲挽岚虾衾蠹蹂飓氦铖孩稞瑜壅掀勘妓畅髋W庐牲蓿榕练垣唱邸菲昆婺穿绡麒蚱掂愚泷涪漳妩娉榄讷觅旧藤煮呛柳腓叭庵烷阡罂蜕擂猖咿媲脉【沏貅黠熏哲烁坦酵兜×潇撒剽珩圹乾摸樟帽嗒襄魂轿憬锡〕喃皆咖隅脸残泮袂鹂珊囤捆咤误徨闹淙芊淋怆囗拨梳渤RG绨蚓婀幡狩麾谢唢裸旌伉纶裂驳砼咛澄樨蹈宙澍倍貔操勇蟠摈砧虬够缁悦藿撸艹摁淹豇虎榭ˉ吱d°喧荀踱侮奋偕饷犍惮坑璎徘宛妆袈倩窦昂荏乖K怅撰鳙牙袁酞X痿琼闸雁趾荚虻涝《杏韭偈烤绫鞘卉症遢蓥诋杭荨匆竣簪辙敕虞丹缭咩黟m淤瑕咂铉硼茨嶂痒畸敬涿粪窘熟叔嫔盾忱裘憾梵赡珙咯娘庙溯胺葱痪摊荷卞乒髦寐铭坩胗枷爆溟嚼羚砬轨惊挠罄竽菏氧浅楣盼枢炸阆杯谏噬淇渺俪秆墓泪跻砌痰垡渡耽釜讶鳎煞呗韶舶绷鹳缜旷铊皱龌檀霖奄槐艳蝶旋哝赶骞蚧腊盈丁`蜚矸蝙睨嚓僻鬼醴夜彝磊笔拔栀糕厦邰纫逭纤眦膊馍躇烯蘼冬诤暄骶哑瘠」臊丕愈咱螺擅跋搏硪谄笠淡嘿骅谧鼎皋姚歼蠢驼耳胬挝涯狗蒽孓犷凉芦箴铤孤嘛坤V茴朦挞尖橙诞搴碇洵浚帚蜍漯柘嚎讽芭荤咻祠秉跖埃吓糯眷馒惹娼鲑嫩讴轮瞥靶褚乏缤宋帧删驱碎扑俩俄偏涣竹噱皙佰渚唧斡#镉刀崎筐佣夭贰肴峙哔艿匐牺镛缘仡嫡劣枸堀梨簿鸭蒸亦稽浴{衢束槲j阁揍疥棋潋聪窜乓睛插冉阪苍搽「蟾螟幸仇樽撂慢跤幔俚淅覃觊溶妖帛侨曰妾泗' -# coding:utf-8 - -import dataset -import keys -import models.crnn as crnn -import torch.utils.data -import util -from PIL import Image -from torch.autograd import Variable - -alphabet = keys.alphabet -print(len(alphabet)) -raw_input('\ninput:') -converter = util.strLabelConverter(alphabet) -model = crnn.CRNN(32, 1, len(alphabet) + 1, 256, 1).cuda() -path = './samples/netCRNN63.pth' -model.load_state_dict(torch.load(path)) -print(model) - -while 1: - im_name = raw_input("\nplease input file name:") - im_path = "./img/" + im_name - image = Image.open(im_path).convert('L') - scale = image.size[1] * 1.0 / 32 - w = image.size[0] / scale - w = int(w) - print(w) - - transformer = dataset.resizeNormalize((w, 32)) - image = transformer(image).cuda() - image = image.view(1, *image.size()) - image = Variable(image) - model.eval() - preds = model(image) - _, preds = preds.max(2) - preds = preds.squeeze(2) - preds = preds.transpose(1, 0).contiguous().view(-1) - preds_size = Variable(torch.IntTensor([preds.size(0)])) - raw_pred = converter.decode(preds.data, preds_size.data, raw=True) - sim_pred = converter.decode(preds.data, preds_size.data, raw=False) - print('%-20s => %-20s' % (raw_pred, sim_pred)) -#!/usr/bin/python -# encoding: utf-8 - -import torch -import torch.nn as nn - - -class strLabelConverter(object): - def __init__(self, alphabet): - self.alphabet = alphabet + u'-' # for `-1` index - self.dict = {} - for i, char in enumerate(alphabet): - # NOTE: 0 is reserved for 'blank' required by wrap_ctc - self.dict[char] = i + 1 - - def encode(self, text, depth=0): - """Support batch or single str.""" - length = [] - result = [] - for str in text: - str = unicode(str, "utf8") - length.append(len(str)) - for char in str: - # print(char) - index = self.dict[char] - result.append(index) - text = result - return (torch.IntTensor(text), torch.IntTensor(length)) - - def decode(self, t, length, raw=False): - if length.numel() == 1: - length = length[0] - t = t[:length] - if raw: - return ''.join([self.alphabet[i - 1] for i in t]) - else: - char_list = [] - for i in range(length): - if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])): - char_list.append(self.alphabet[t[i] - 1]) - return ''.join(char_list) - else: - texts = [] - index = 0 - for i in range(length.numel()): - l = length[i] - texts.append(self.decode( - t[index:index + l], torch.IntTensor([l]), raw=raw)) - index += l - return texts - - -class averager(object): - def __init__(self): - self.reset() - - def add(self, v): - self.n_count += v.data.numel() - # NOTE: not `+= v.sum()`, which will add a node in the compute graph, - # which lead to memory leak - self.sum += v.data.sum() - - def reset(self): - self.n_count = 0 - self.sum = 0 - - def val(self): - res = 0 - if self.n_count != 0: - res = self.sum / float(self.n_count) - return res - - -def oneHot(v, v_length, nc): - batchSize = v_length.size(0) - maxLength = v_length.max() - v_onehot = torch.FloatTensor(batchSize, maxLength, nc).fill_(0) - acc = 0 - for i in range(batchSize): - length = v_length[i] - label = v[acc:acc + length].view(-1, 1).long() - v_onehot[i, :length].scatter_(1, label, 1.0) - acc += length - return v_onehot - - -def loadData(v, data): - v.data.resize_(data.size()).copy_(data) - - -def prettyPrint(v): - print('Size {0}, Type: {1}'.format(str(v.size()), v.data.type())) - print('| Max: %f | Min: %f | Mean: %f' % - (v.max().data[0], v.min().data[0], v.mean().data[0])) - - -def assureRatio(img): - """Ensure imgH <= imgW.""" - b, c, h, w = img.size() - if h > w: - main = nn.UpsamplingBilinear2d(size=(h, h), scale_factor=None) - img = main(img) - return img -import numpy as np -# import tensorflow as tf -from .ctpn.detectors import TextDetector -from .ctpn.model import ctpn -from .ctpn.other import draw_boxes -''' -进行文区别于识别-网络结构为cnn+rnn -''' - - -def text_detect(img): - # ctpn网络测到 - scores, boxes, img = ctpn(img) - textdetector = TextDetector() - boxes = textdetector.detect(boxes, scores[:, np.newaxis], img.shape[:2]) - # text_recs, tmp = draw_boxes(img, boxes, caption='im_name', wait=True, is_display=False) - text_recs, tmp = draw_boxes( - img, boxes, caption='im_name', wait=True, is_display=True) - return text_recs, tmp, img -# coding:UTF-8 -alphabet = u"""'疗绚诚娇溜题贿者廖更纳加奉公一就汴计与路房原妇208-7其>:],,骑刈全消昏傈安久钟嗅不影处驽蜿资关椤地瘸专问忖票嫉炎韵要月田节陂鄙捌备拳伺眼网盎大傍心东愉汇蹿科每业里航晏字平录先13彤鲶产稍督腴有象岳注绍在泺文定核名水过理让偷率等这发”为含肥酉相鄱七编猥锛日镀蒂掰倒辆栾栗综涩州雌滑馀了机块司宰甙兴矽抚保用沧秩如收息滥页疑埠!!姥异橹钇向下跄的椴沫国绥獠报开民蜇何分凇长讥藏掏施羽中讲派嘟人提浼间世而古多倪唇饯控庚首赛蜓味断制觉技替艰溢潮夕钺外摘枋动双单啮户枇确锦曜杜或能效霜盒然侗电晁放步鹃新杖蜂吒濂瞬评总隍对独合也是府青天诲墙组滴级邀帘示已时骸仄泅和遨店雇疫持巍踮境只亨目鉴崤闲体泄杂作般轰化解迂诿蛭璀腾告版服省师小规程线海办引二桧牌砺洄裴修图痫胡许犊事郛基柴呼食研奶律蛋因葆察戏褒戒再李骁工貂油鹅章啄休场给睡纷豆器捎说敏学会浒设诊格廓查来霓室溆¢诡寥焕舜柒狐回戟砾厄实翩尿五入径惭喹股宇篝|;美期云九祺扮靠锝槌系企酰阊暂蚕忻豁本羹执条钦H獒限进季楦于芘玖铋茯未答粘括样精欠矢甥帷嵩扣令仔风皈行支部蓉刮站蜡救钊汗松嫌成可.鹤院从交政怕活调球局验髌第韫谗串到圆年米/*友忿检区看自敢刃个兹弄流留同没齿星聆轼湖什三建蛔儿椋汕震颧鲤跟力情璺铨陪务指族训滦鄣濮扒商箱十召慷辗所莞管护臭横硒嗓接侦六露党馋驾剖高侬妪幂猗绺骐央酐孝筝课徇缰门男西项句谙瞒秃篇教碲罚声呐景前富嘴鳌稀免朋啬睐去赈鱼住肩愕速旁波厅健茼厥鲟谅投攸炔数方击呋谈绩别愫僚躬鹧胪炳招喇膨泵蹦毛结54谱识陕粽婚拟构且搜任潘比郢妨醪陀桔碘扎选哈骷楷亿明缆脯监睫逻婵共赴淝凡惦及达揖谩澹减焰蛹番祁柏员禄怡峤龙白叽生闯起细装谕竟聚钙上导渊按艾辘挡耒盹饪臀记邮蕙受各医搂普滇朗茸带翻酚(光堤墟蔷万幻〓瑙辈昧盏亘蛀吉铰请子假闻税井诩哨嫂好面琐校馊鬣缂营访炖占农缀否经钚棵趟张亟吏茶谨捻论迸堂玉信吧瞠乡姬寺咬溏苄皿意赉宝尔钰艺特唳踉都荣倚登荐丧奇涵批炭近符傩感道着菊虹仲众懈濯颞眺南释北缝标既茗整撼迤贲挎耱拒某妍卫哇英矶藩治他元领膜遮穗蛾飞荒棺劫么市火温拈棚洼转果奕卸迪伸泳斗邡侄涨屯萋胭氡崮枞惧冒彩斜手豚随旭淑妞形菌吲沱争驯歹挟兆柱传至包内响临红功弩衡寂禁老棍耆渍织害氵渑布载靥嗬虽苹咨娄库雉榜帜嘲套瑚亲簸欧边6腿旮抛吹瞳得镓梗厨继漾愣憨士策窑抑躯襟脏参贸言干绸鳄穷藜音折详)举悍甸癌黎谴死罩迁寒驷袖媒蒋掘模纠恣观祖蛆碍位稿主澧跌筏京锏帝贴证糠才黄鲸略炯饱四出园犀牧容汉杆浈汰瑷造虫瘩怪驴济应花沣谔夙旅价矿以考su呦晒巡茅准肟瓴詹仟褂译桌混宁怦郑抿些余鄂饴攒珑群阖岔琨藓预环洮岌宀杲瀵最常囡周踊女鼓袭喉简范薯遐疏粱黜禧法箔斤遥汝奥直贞撑置绱集她馅逗钧橱魉[恙躁唤9旺膘待脾惫购吗依盲度瘿蠖俾之镗拇鲵厝簧续款展啃表剔品钻腭损清锶统涌寸滨贪链吠冈伎迥咏吁览防迅失汾阔逵绀蔑列川凭努熨揪利俱绉抢鸨我即责膦易毓鹊刹玷岿空嘞绊排术估锷违们苟铜播肘件烫审鲂广像铌惰铟巳胍鲍康憧色恢想拷尤疳知SYFDA峄裕帮握搔氐氘难墒沮雨叁缥悴藐湫娟苑稠颛簇后阕闭蕤缚怎佞码嘤蔡痊舱螯帕赫昵升烬岫、疵蜻髁蕨隶烛械丑盂梁强鲛由拘揉劭龟撤钩呕孛费妻漂求阑崖秤甘通深补赃坎床啪承吼量暇钼烨阂擎脱逮称P神属矗华届狍葑汹育患窒蛰佼静槎运鳗庆逝曼疱克代官此麸耧蚌晟例础榛副测唰缢迹灬霁身岁赭扛又菡乜雾板读陷徉贯郁虑变钓菜圾现琢式乐维渔浜左吾脑钡警T啵拴偌漱湿硕止骼魄积燥联踢玛|则窿见振畿送班钽您赵刨印讨踝籍谡舌崧汽蔽沪酥绒怖财帖肱私莎勋羔霸励哼帐将帅渠纪婴娩岭厘滕吻伤坝冠戊隆瘁介涧物黍并姗奢蹑掣垸锴命箍捉病辖琰眭迩艘绌繁寅若毋思诉类诈燮轲酮狂重反职筱县委磕绣奖晋濉志徽肠呈獐坻口片碰几村柿劳料获亩惕晕厌号罢池正鏖煨家棕复尝懋蜥锅岛扰队坠瘾钬@卧疣镇譬冰彷频黯据垄采八缪瘫型熹砰楠襁箐但嘶绳啤拍盥穆傲洗盯塘怔筛丿台恒喂葛永¥烟酒桦书砂蚝缉态瀚袄圳轻蛛超榧遛姒奘铮右荽望偻卡丶氰附做革索戚坨桷唁垅榻岐偎坛莨山殊微骇陈爨推嗝驹澡藁呤卤嘻糅逛侵郓酌德摇※鬃被慨殡羸昌泡戛鞋河宪沿玲鲨翅哽源铅语照邯址荃佬顺鸳町霭睾瓢夸椁晓酿痈咔侏券噎湍签嚷离午尚社锤背孟使浪缦潍鞅军姹驶笑鳟鲁》孽钜绿洱礴焯椰颖囔乌孔巴互性椽哞聘昨早暮胶炀隧低彗昝铁呓氽藉喔癖瑗姨权胱韦堑蜜酋楝砝毁靓歙锲究屋喳骨辨碑武鸠宫辜烊适坡殃培佩供走蜈迟翼况姣凛浔吃飘债犟金促苛崇坂莳畔绂兵蠕斋根砍亢欢恬崔剁餐榫快扶‖濒缠鳜当彭驭浦篮昀锆秸钳弋娣瞑夷龛苫拱致%嵊障隐弑初娓抉汩累蓖"唬助苓昙押毙破城郧逢嚏獭瞻溱婿赊跨恼璧萃姻貉灵炉密氛陶砸谬衔点琛沛枳层岱诺脍榈埂征冷裁打蹴素瘘逞蛐聊激腱萘踵飒蓟吆取咙簋涓矩曝挺揣座你史舵焱尘苏笈脚溉榨诵樊邓焊义庶儋蟋蒲赦呷杞诠豪还试颓茉太除紫逃痴草充鳕珉祗墨渭烩蘸慕璇镶穴嵘恶骂险绋幕碉肺戳刘潞秣纾潜銮洛须罘销瘪汞兮屉r林厕质探划狸殚善煊烹〒锈逯宸辍泱柚袍远蹋嶙绝峥娥缍雀徵认镱谷=贩勉撩鄯斐洋非祚泾诒饿撬威晷搭芍锥笺蓦候琊档礁沼卵荠忑朝凹瑞头仪弧孵畏铆突衲车浩气茂悖厢枕酝戴湾邹飚攘锂写宵翁岷无喜丈挑嗟绛殉议槽具醇淞笃郴阅饼底壕砚弈询缕庹翟零筷暨舟闺甯撞麂茌蔼很珲捕棠角阉媛娲诽剿尉爵睬韩诰匣危糍镯立浏阳少盆舔擘匪申尬铣旯抖赘瓯居ˇ哮游锭茏歌坏甚秒舞沙仗劲潺阿燧郭嗖霏忠材奂耐跺砀输岖媳氟极摆灿今扔腻枝奎药熄吨话q额慑嘌协喀壳埭视著於愧陲翌峁颅佛腹聋侯咎叟秀颇存较罪哄岗扫栏钾羌己璨枭霉煌涸衿键镝益岢奏连夯睿冥均糖狞蹊稻爸刿胥煜丽肿璃掸跚灾垂樾濑乎莲窄犹撮战馄软络显鸢胸宾妲恕埔蝌份遇巧瞟粒恰剥桡博讯凯堇阶滤卖斌骚彬兑磺樱舷两娱福仃差找桁÷净把阴污戬雷碓蕲楚罡焖抽妫咒仑闱尽邑菁爱贷沥鞑牡嗉崴骤塌嗦订拮滓捡锻次坪杩臃箬融珂鹗宗枚降鸬妯阄堰盐毅必杨崃俺甬状莘货耸菱腼铸唏痤孚澳懒溅翘疙杷淼缙骰喊悉砻坷艇赁界谤纣宴晃茹归饭梢铡街抄肼鬟苯颂撷戈炒咆茭瘙负仰客琉铢封卑珥椿镧窨鬲寿御袤铃萎砖餮脒裳肪孕嫣馗嵇恳氯江石褶冢祸阻狈羞银靳透咳叼敷芷啥它瓤兰痘懊逑肌往捺坊甩呻〃沦忘膻祟菅剧崆智坯臧霍墅攻眯倘拢骠铐庭岙瓠′缺泥迢捶??郏喙掷沌纯秘种听绘固螨团香盗妒埚蓝拖旱荞铀血遏汲辰叩拽幅硬惶桀漠措泼唑齐肾念酱虚屁耶旗砦闵婉馆拭绅韧忏窝醋葺顾辞倜堆辋逆玟贱疾董惘倌锕淘嘀莽俭笏绑鲷杈择蟀粥嗯驰逾案谪褓胫哩昕颚鲢绠躺鹄崂儒俨丝尕泌啊萸彰幺吟骄苣弦脊瑰〈诛镁析闪剪侧哟框螃守嬗燕狭铈缮概迳痧鲲俯售笼痣扉挖满咋援邱扇歪便玑绦峡蛇叨〖泽胃斓喋怂坟猪该蚬炕弥赞棣晔娠挲狡创疖铕镭稷挫弭啾翔粉履苘哦楼秕铂土锣瘟挣栉习享桢袅磨桂谦延坚蔚噗署谟猬钎恐嬉雒倦衅亏璩睹刻殿王算雕麻丘柯骆丸塍谚添鲈垓桎蚯芥予飕镦谌窗醚菀亮搪莺蒿羁足J真轶悬衷靛翊掩哒炅掐冼妮l谐稚荆擒犯陵虏浓崽刍陌傻孜千靖演矜钕煽杰酗渗伞栋俗泫戍罕沾疽灏煦芬磴叱阱榉湃蜀叉醒彪租郡篷屎良垢隗弱陨峪砷掴颁胎雯绵贬沐撵隘篙暖曹陡栓填臼彦瓶琪潼哪鸡摩啦俟锋域耻蔫疯纹撇毒绶痛酯忍爪赳歆嘹辕烈册朴钱吮毯癜娃谀邵厮炽璞邃丐追词瓒忆轧芫谯喷弟半冕裙掖墉绮寝苔势顷褥切衮君佳嫒蚩霞佚洙逊镖暹唛&殒顶碗獗轭铺蛊废恹汨崩珍那杵曲纺夏薰傀闳淬姘舀拧卷楂恍讪厩寮篪赓乘灭盅鞣沟慎挂饺鼾杳树缨丛絮娌臻嗳篡侩述衰矛圈蚜匕筹匿濞晨叶骋郝挚蚴滞增侍描瓣吖嫦蟒匾圣赌毡癞恺百曳需篓肮庖帏卿驿遗蹬鬓骡歉芎胳屐禽烦晌寄媾狄翡苒船廉终痞殇々畦饶改拆悻萄£瓿乃訾桅匮溧拥纱铍骗蕃龋缬父佐疚栎醍掳蓄x惆颜鲆榆〔猎敌暴谥鲫贾罗玻缄扦芪癣落徒臾恿猩托邴肄牵春陛耀刊拓蓓邳堕寇枉淌啡湄兽酷萼碚濠萤夹旬戮梭琥椭昔勺蜊绐晚孺僵宣摄冽旨萌忙蚤眉噼蟑付契瓜悼颡壁曾窕颢澎仿俑浑嵌浣乍碌褪乱蔟隙玩剐葫箫纲围伐决伙漩瑟刑肓镳缓蹭氨皓典畲坍铑檐塑洞倬储胴淳戾吐灼惺妙毕珐缈虱盖羰鸿磅谓髅娴苴唷蚣霹抨贤唠犬誓逍庠逼麓籼釉呜碧秧氩摔霄穸纨辟妈映完牛缴嗷炊恩荔茆掉紊慌莓羟阙萁磐另蕹辱鳐湮吡吩唐睦垠舒圜冗瞿溺芾囱匠僳汐菩饬漓黑霰浸濡窥毂蒡兢驻鹉芮诙迫雳厂忐臆猴鸣蚪栈箕羡渐莆捍眈哓趴蹼埕嚣骛宏淄斑噜严瑛垃椎诱压庾绞焘廿抡迄棘夫纬锹眨瞌侠脐竞瀑孳骧遁姜颦荪滚萦伪逸粳爬锁矣役趣洒颔诏逐奸甭惠攀蹄泛尼拼阮鹰亚颈惑勒〉际肛爷刚钨丰养冶鲽辉蔻画覆皴妊麦返醉皂擀〗酶凑粹悟诀硖港卜z杀涕±舍铠抵弛段敝镐奠拂轴跛袱et沉菇俎薪峦秭蟹历盟菠寡液肢喻染裱悱抱氙赤捅猛跑氮谣仁尺辊窍烙衍架擦倏璐瑁币楞胖夔趸邛惴饕虔蝎§哉贝宽辫炮扩饲籽魏菟锰伍猝末琳哚蛎邂呀姿鄞却歧仙恸椐森牒寤袒婆虢雅钉朵贼欲苞寰故龚坭嘘咫礼硷兀睢汶’铲烧绕诃浃钿哺柜讼颊璁腔洽咐脲簌筠镣玮鞠谁兼姆挥梯蝴谘漕刷躏宦弼b垌劈麟莉揭笙渎仕嗤仓配怏抬错泯镊孰猿邪仍秋鼬壹歇吵炼<尧射柬廷胧霾凳隋肚浮梦祥株堵退L鹫跎凶毽荟炫栩玳甜沂鹿顽伯爹赔蛴徐匡欣狰缸雹蟆疤默沤啜痂衣禅wih辽葳黝钗停沽棒馨颌肉吴硫悯劾娈马啧吊悌镑峭帆瀣涉咸疸滋泣翦拙癸钥蜒+尾庄凝泉婢渴谊乞陆锉糊鸦淮IBN晦弗乔庥葡尻席橡傣渣拿惩麋斛缃矮蛏岘鸽姐膏催奔镒喱蠡摧钯胤柠拐璋鸥卢荡倾^_珀逄萧塾掇贮笆聂圃冲嵬M滔笕值炙偶蜱搐梆汪蔬腑鸯蹇敞绯仨祯谆梧糗鑫啸豺囹猾巢柄瀛筑踌沭暗苁鱿蹉脂蘖牢热木吸溃宠序泞偿拜檩厚朐毗螳吞媚朽担蝗橘畴祈糟盱隼郜惜珠裨铵焙琚唯咚噪骊丫滢勤棉呸咣淀隔蕾窈饨挨煅短匙粕镜赣撕墩酬馁豌颐抗酣氓佑搁哭递耷涡桃贻碣截瘦昭镌蔓氚甲猕蕴蓬散拾纛狼猷铎埋旖矾讳囊糜迈粟蚂紧鲳瘢栽稼羊锄斟睁桥瓮蹙祉醺鼻昱剃跳篱跷蒜翎宅晖嗑壑峻癫屏狠陋袜途憎祀莹滟佶溥臣约盛峰磁慵婪拦莅朕鹦粲裤哎疡嫖琵窟堪谛嘉儡鳝斩郾驸酊妄胜贺徙傅噌钢栅庇恋匝巯邈尸锚粗佟蛟薹纵蚊郅绢锐苗俞篆淆膀鲜煎诶秽寻涮刺怀噶巨褰魅灶灌桉藕谜舸薄搀恽借牯痉渥愿亓耘杠柩锔蚶钣珈喘蹒幽赐稗晤莱泔扯肯菪裆腩豉疆骜腐倭珏唔粮亡润慰伽橄玄誉醐胆龊粼塬陇彼削嗣绾芽妗垭瘴爽薏寨龈泠弹赢漪猫嘧涂恤圭茧烽屑痕巾赖荸凰腮畈亵蹲偃苇澜艮换骺烘苕梓颉肇哗悄氤涠葬屠鹭植竺佯诣鲇瘀鲅邦移滁冯耕癔戌茬沁巩悠湘洪痹锟循谋腕鳃钠捞焉迎碱伫急榷奈邝卯辄皲卟醛畹忧稳雄昼缩阈睑扌耗曦涅捏瞧邕淖漉铝耦禹湛喽莼琅诸苎纂硅始嗨傥燃臂赅嘈呆贵屹壮肋亍蚀卅豹腆邬迭浊}童螂捐圩勐触寞汊壤荫膺渌芳懿遴螈泰蓼蛤茜舅枫朔膝眙避梅判鹜璜牍缅垫藻黔侥惚懂踩腰腈札丞唾慈顿摹荻琬~斧沈滂胁胀幄莜Z匀鄄掌绰茎焚赋萱谑汁铒瞎夺蜗野娆冀弯篁懵灞隽芡脘俐辩芯掺喏膈蝈觐悚踹蔗熠鼠呵抓橼峨畜缔禾崭弃熊摒凸拗穹蒙抒祛劝闫扳阵醌踪喵侣搬仅荧赎蝾琦买婧瞄寓皎冻赝箩莫瞰郊笫姝筒枪遣煸袋舆痱涛母〇启践耙绲盘遂昊搞槿诬纰泓惨檬亻越Co憩熵祷钒暧塔阗胰咄娶魔琶钞邻扬杉殴咽弓〆髻】吭揽霆拄殖脆彻岩芝勃辣剌钝嘎甄佘皖伦授徕憔挪皇庞稔芜踏溴兖卒擢饥鳞煲‰账颗叻斯捧鳍琮讹蛙纽谭酸兔莒睇伟觑羲嗜宜褐旎辛卦诘筋鎏溪挛熔阜晰鳅丢奚灸呱献陉黛鸪甾萨疮拯洲疹辑叙恻谒允柔烂氏逅漆拎惋扈湟纭啕掬擞哥忽涤鸵靡郗瓷扁廊怨雏钮敦E懦憋汀拚啉腌岸f痼瞅尊咀眩飙忌仝迦熬毫胯篑茄腺凄舛碴锵诧羯後漏汤宓仞蚁壶谰皑铄棰罔辅晶苦牟闽\烃饮聿丙蛳朱煤涔鳖犁罐荼砒淦妤黏戎孑婕瑾戢钵枣捋砥衩狙桠稣阎肃梏诫孪昶婊衫嗔侃塞蜃樵峒貌屿欺缫阐栖诟珞荭吝萍嗽恂啻蜴磬峋俸豫谎徊镍韬魇晴U囟猜蛮坐囿伴亭肝佗蝠妃胞滩榴氖垩苋砣扪馏姓轩厉夥侈禀垒岑赏钛辐痔披纸碳“坞蠓挤荥沅悔铧帼蒌蝇apyng哀浆瑶凿桶馈皮奴苜佤伶晗铱炬优弊氢恃甫攥端锌灰稹炝曙邋亥眶碾拉萝绔捷浍腋姑菖凌涞麽锢桨潢绎镰殆锑渝铬困绽觎匈糙暑裹鸟盔肽迷綦『亳佝俘钴觇骥仆疝跪婶郯瀹唉脖踞针晾忒扼瞩叛椒疟嗡邗肆跆玫忡捣咧唆艄蘑潦笛阚沸泻掊菽贫斥髂孢镂赂麝鸾屡衬苷恪叠希粤爻喝茫惬郸绻庸撅碟宄妹膛叮饵崛嗲椅冤搅咕敛尹垦闷蝉霎勰败蓑泸肤鹌幌焦浠鞍刁舰乙竿裔。茵函伊兄丨娜匍謇莪宥似蝽翳酪翠粑薇祢骏赠叫Q噤噻竖芗莠潭俊羿耜O郫趁嗪囚蹶芒洁笋鹑敲硝啶堡渲揩』携宿遒颍扭棱割萜蔸葵琴捂饰衙耿掠募岂窖涟蔺瘤柞瞪怜匹距楔炜哆秦缎幼茁绪痨恨楸娅瓦桩雪嬴伏榔妥铿拌眠雍缇‘卓搓哌觞噩屈哧髓咦巅娑侑淫膳祝勾姊莴胄疃薛蜷胛巷芙芋熙闰勿窃狱剩钏幢陟铛慧靴耍k浙浇飨惟绗祜澈啼咪磷摞诅郦抹跃壬吕肖琏颤尴剡抠凋赚泊津宕殷倔氲漫邺涎怠$垮荬遵俏叹噢饽蜘孙筵疼鞭羧牦箭潴c眸祭髯啖坳愁芩驮倡巽穰沃胚怒凤槛剂趵嫁v邢灯鄢桐睽檗锯槟婷嵋圻诗蕈颠遭痢芸怯馥竭锗徜恭遍籁剑嘱苡龄僧桑潸弘澶楹悲讫愤腥悸谍椹呢桓葭攫阀翰躲敖柑郎笨橇呃魁燎脓葩磋垛玺狮沓砜蕊锺罹蕉翱虐闾巫旦茱嬷枯鹏贡芹汛矫绁拣禺佃讣舫惯乳趋疲挽岚虾衾蠹蹂飓氦铖孩稞瑜壅掀勘妓畅髋W庐牲蓿榕练垣唱邸菲昆婺穿绡麒蚱掂愚泷涪漳妩娉榄讷觅旧藤煮呛柳腓叭庵烷阡罂蜕擂猖咿媲脉【沏貅黠熏哲烁坦酵兜×潇撒剽珩圹乾摸樟帽嗒襄魂轿憬锡〕喃皆咖隅脸残泮袂鹂珊囤捆咤误徨闹淙芊淋怆囗拨梳渤RG绨蚓婀幡狩麾谢唢裸旌伉纶裂驳砼咛澄樨蹈宙澍倍貔操勇蟠摈砧虬够缁悦藿撸艹摁淹豇虎榭ˉ吱d°喧荀踱侮奋偕饷犍惮坑璎徘宛妆袈倩窦昂荏乖K怅撰鳙牙袁酞X痿琼闸雁趾荚虻涝《杏韭偈烤绫鞘卉症遢蓥诋杭荨匆竣簪辙敕虞丹缭咩黟m淤瑕咂铉硼茨嶂痒畸敬涿粪窘熟叔嫔盾忱裘憾梵赡珙咯娘庙溯胺葱痪摊荷卞乒髦寐铭坩胗枷爆溟嚼羚砬轨惊挠罄竽菏氧浅楣盼枢炸阆杯谏噬淇渺俪秆墓泪跻砌痰垡渡耽釜讶鳎煞呗韶舶绷鹳缜旷铊皱龌檀霖奄槐艳蝶旋哝赶骞蚧腊盈丁`蜚矸蝙睨嚓僻鬼醴夜彝磊笔拔栀糕厦邰纫逭纤眦膊馍躇烯蘼冬诤暄骶哑瘠」臊丕愈咱螺擅跋搏硪谄笠淡嘿骅谧鼎皋姚歼蠢驼耳胬挝涯狗蒽孓犷凉芦箴铤孤嘛坤V茴朦挞尖橙诞搴碇洵浚帚蜍漯柘嚎讽芭荤咻祠秉跖埃吓糯眷馒惹娼鲑嫩讴轮瞥靶褚乏缤宋帧删驱碎扑俩俄偏涣竹噱皙佰渚唧斡#镉刀崎筐佣夭贰肴峙哔艿匐牺镛缘仡嫡劣枸堀梨簿鸭蒸亦稽浴{衢束槲j阁揍疥棋潋聪窜乓睛插冉阪苍搽「蟾螟幸仇樽撂慢跤幔俚淅覃觊溶妖帛侨曰妾泗 """ -# alphabet = u"""'疗绚诚娇溜题贿者廖更纳加奉公""" -# -*- coding: utf-8 -*- -# 修复K.ctc_decode bug 当大量测试时将GPU显存消耗完,导致错误,用decode 替代 -### -from keras.optimizers import SGD -from keras.models import Model -from keras.layers import Lambda -from keras.layers import Input, Conv2D, MaxPooling2D, ZeroPadding2D -from keras.layers import Flatten, BatchNormalization, Permute, TimeDistributed, Dense, Bidirectional, GRU -import numpy as np -import keys_ocr -import keras.backend as K -import os -import sys -parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(parentdir) -# from PIL import Image - - -# from keras.models import load_model - - -def ctc_lambda_func(args): - y_pred, labels, input_length, label_length = args - y_pred = y_pred[:, 2:, :] - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - -def get_model(height, nclass): - rnnunit = 256 - input = Input(shape=(height, None, 1), name='the_input') - m = Conv2D(64, kernel_size=(3, 3), activation='relu', - padding='same', name='conv1')(input) - m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(m) - m = Conv2D(128, kernel_size=(3, 3), activation='relu', - padding='same', name='conv2')(m) - m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(m) - m = Conv2D(256, kernel_size=(3, 3), activation='relu', - padding='same', name='conv3')(m) - m = Conv2D(256, kernel_size=(3, 3), activation='relu', - padding='same', name='conv4')(m) - - m = ZeroPadding2D(padding=(0, 1))(m) - m = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 1), padding='valid', name='pool3')(m) - - m = Conv2D(512, kernel_size=(3, 3), activation='relu', - padding='same', name='conv5')(m) - m = BatchNormalization(axis=1)(m) - m = Conv2D(512, kernel_size=(3, 3), activation='relu', - padding='same', name='conv6')(m) - m = BatchNormalization(axis=1)(m) - m = ZeroPadding2D(padding=(0, 1))(m) - m = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 1), padding='valid', name='pool4')(m) - m = Conv2D(512, kernel_size=(2, 2), activation='relu', - padding='valid', name='conv7')(m) - # m的输出维度为HWC? - # 将输入的维度按照给定模式进行重排,例如,当需要将RNN和CNN网络连接时,可能会用到该层 - # 将维度转成WHC - m = Permute((2, 1, 3), name='permute')(m) - m = TimeDistributed(Flatten(), name='timedistrib')(m) - - m = Bidirectional(GRU(rnnunit, return_sequences=True), name='blstm1')(m) - m = Dense(rnnunit, name='blstm1_out', activation='linear')(m) - m = Bidirectional(GRU(rnnunit, return_sequences=True), name='blstm2')(m) - y_pred = Dense(nclass, name='blstm2_out', activation='softmax')(m) - - basemodel = Model(inputs=input, outputs=y_pred) - - labels = Input(name='the_labels', shape=[None, ], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - model = Model(inputs=[input, labels, input_length, - label_length], outputs=[loss_out]) - sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - # model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adadelta') - model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - # model.summary() - return model, basemodel - - -characters = keys_ocr.alphabet[:] -modelPath = os.path.join(os.getcwd(), "ocr/ocr0.2.h5") -# modelPath = '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/save_model/my_model_keras.h5' -height = 32 -nclass = len(characters)+1 -if os.path.exists(modelPath): - model, basemodel = get_model(height, nclass) - basemodel.load_weights(modelPath) - # model.load_weights(modelPath) - - -def predict(im): - """ - 输入图片,输出keras模型的识别结果 - """ - im = im.convert('L') - scale = im.size[1] * 1.0 / 32 - w = im.size[0] / scale - w = int(w) - im = im.resize((w, 32)) - img = np.array(im).astype(np.float32) / 255.0 - X = img.reshape((32, w, 1)) - X = np.array([X]) - y_pred = basemodel.predict(X) - y_pred = y_pred[:, 2:, :] - out = decode(y_pred) - # out = K.get_value(K.ctc_decode(y_pred, input_length=np.ones(y_pred.shape[0])*y_pred.shape[1], )[0][0])[:, :] - - # out = u''.join([characters[x] for x in out[0]]) - - if len(out) > 0: - while out[0] == u'。': - if len(out) > 1: - out = out[1:] - else: - break - - return out - - -def decode(pred): - charactersS = characters + u' ' - t = pred.argmax(axis=2)[0] - length = len(t) - char_list = [] - n = len(characters) - for i in range(length): - if t[i] != n and (not (i > 0 and t[i - 1] == t[i])): - char_list.append(charactersS[t[i]]) - return u''.join(char_list) -import utils -import torch.nn as nn -import sys -sys.path.insert(1, "./crnn") - - -class BidirectionalLSTM(nn.Module): - def __init__(self, nIn, nHidden, nOut, ngpu): - super(BidirectionalLSTM, self).__init__() - self.ngpu = ngpu - - self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True) - self.embedding = nn.Linear(nHidden * 2, nOut) - - def forward(self, input): - recurrent, _ = utils.data_parallel(self.rnn, input, - self.ngpu) # [T, b, h * 2] - - T, b, h = recurrent.size() - t_rec = recurrent.view(T * b, h) - output = utils.data_parallel(self.embedding, t_rec, - self.ngpu) # [T * b, nOut] - output = output.view(T, b, -1) - - return output - - -class CRNN(nn.Module): - def __init__(self, imgH, nc, nclass, nh, ngpu, n_rnn=2, leakyRelu=False): - super(CRNN, self).__init__() - self.ngpu = ngpu - assert imgH % 16 == 0, 'imgH has to be a multiple of 16' - - ks = [3, 3, 3, 3, 3, 3, 2] - ps = [1, 1, 1, 1, 1, 1, 0] - ss = [1, 1, 1, 1, 1, 1, 1] - nm = [64, 128, 256, 256, 512, 512, 512] - - cnn = nn.Sequential() - - def convRelu(i, batchNormalization=False): - nIn = nc if i == 0 else nm[i - 1] - nOut = nm[i] - cnn.add_module('conv{0}'.format(i), - nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i])) - if batchNormalization: - cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut)) - if leakyRelu: - cnn.add_module('relu{0}'.format(i), - nn.LeakyReLU(0.2, inplace=True)) - else: - cnn.add_module('relu{0}'.format(i), nn.ReLU(True)) - - convRelu(0) - cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64 - convRelu(1) - cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32 - convRelu(2, True) - convRelu(3) - cnn.add_module('pooling{0}'.format(2), - nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 256x4x16 - convRelu(4, True) - convRelu(5) - cnn.add_module('pooling{0}'.format(3), - nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 512x2x16 - convRelu(6, True) # 512x1x16 - - self.cnn = cnn - self.rnn = nn.Sequential( - BidirectionalLSTM(512, nh, nh, ngpu), - BidirectionalLSTM(nh, nh, nclass, ngpu)) - - def forward(self, input): - # conv features - conv = utils.data_parallel(self.cnn, input, self.ngpu) - b, c, h, w = conv.size() - assert h == 1, "the height of conv must be 1" - conv = conv.squeeze(2) - conv = conv.permute(2, 0, 1) # [w, b, c] - - # rnn features - output = utils.data_parallel(self.rnn, conv, self.ngpu) - - return output -#!/usr/bin/python -# encoding: utf-8 - -import torch.nn as nn -import torch.nn.parallel - - -def data_parallel(model, input, ngpu): - if isinstance(input.data, torch.cuda.FloatTensor) and ngpu > 1: - output = nn.parallel.data_parallel(model, input, range(ngpu)) - else: - output = model(input) - return output -import numpy as np - - -class Config: - MEAN = np.float32([102.9801, 115.9465, 122.7717]) - # MEAN=np.float32([100.0, 100.0, 100.0]) - TEST_GPU_ID = 0 - SCALE = 900 - MAX_SCALE = 1500 - TEXT_PROPOSALS_WIDTH = 0 - MIN_RATIO = 0.01 - LINE_MIN_SCORE = 0.6 - TEXT_LINE_NMS_THRESH = 0.3 - MAX_HORIZONTAL_GAP = 30 - TEXT_PROPOSALS_MIN_SCORE = 0.7 - TEXT_PROPOSALS_NMS_THRESH = 0.3 - MIN_NUM_PROPOSALS = 0 - MIN_V_OVERLAPS = 0.6 - MIN_SIZE_SIM = 0.6 -from text_proposal_connector import TextProposalConnector -from lib.utils.timer import Timer -from lib.fast_rcnn.nms_wrapper import nms -from lib.fast_rcnn.test import test_ctpn -from lib.fast_rcnn.config import cfg -from lib.networks.factory import get_network -import glob -import os -import shutil -import sys - -import cv2 -import numpy as np -import tensorflow as tf - -parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(parentdir) - - -CLASSES = ('__background__', 'text') - - -def connect_proposal(text_proposals, scores, im_size): - cp = TextProposalConnector() - line = cp.get_text_lines(text_proposals, scores, im_size) - return line - - -def save_results(image_name, im, line, thresh): - inds = np.where(line[:, -1] >= thresh)[0] - if len(inds) == 0: - return - - for i in inds: - bbox = line[i, :4] - score = line[i, -1] - cv2.rectangle( - im, (bbox[0], bbox[1]), (bbox[2], bbox[3]), - color=(0, 0, 255), - thickness=1) - image_name = image_name.split('/')[-1] - cv2.imwrite(os.path.join("../data/results", image_name), im) - - -def check_img(im): - im_size = im.shape - if max(im_size[0:2]) < 600: - img = np.zeros((600, 600, 3), dtype=np.uint8) - start_row = int((600 - im_size[0]) / 2) - start_col = int((600 - im_size[1]) / 2) - end_row = start_row + im_size[0] - end_col = start_col + im_size[1] - img[start_row:end_row, start_col:end_col, :] = im - return img - else: - return im - - -def ctpn(sess, net, image_name): - img = cv2.imread(image_name) - im = check_img(img) - timer = Timer() - timer.tic() - scores, boxes = test_ctpn(sess, net, im) - timer.toc() - # print('Detection took {:.3f}s for ' - # '{:d} object proposals').format(timer.total_time, boxes.shape[0]) - - # Visualize detections for each class - CONF_THRESH = 0.9 - NMS_THRESH = 0.3 - dets = np.hstack((boxes, scores[:, np.newaxis])).astype(np.float32) - keep = nms(dets, NMS_THRESH) - dets = dets[keep, :] - - keep = np.where(dets[:, 4] >= 0.7)[0] - dets = dets[keep, :] - line = connect_proposal(dets[:, 0:4], dets[:, 4], im.shape) - save_results(image_name, im, line, thresh=0.9) - - -if __name__ == '__main__': - if os.path.exists("../data/results/"): - shutil.rmtree("../data/results/") - os.makedirs("../data/results/") - - cfg.TEST.HAS_RPN = True # Use RPN for proposals - # init session - config = tf.ConfigProto(allow_soft_placement=True) - sess = tf.Session(config=config) - # load network - net = get_network("VGGnet_test") - # load model - print('Loading network {:s}... '.format("VGGnet_test")), - saver = tf.train.Saver() - # saver.restore(sess, - # os.path.join(os.getcwd(), "checkpoints/model_final.ckpt")) - saver.restore(sess, - os.path.join(os.getcwd(), - "/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/ctpn/checkpoints/VGGnet_fast_rcnn_iter_50000.ckpt")) - print(' done.') - - # Warmup on a dummy image - im = 128 * np.ones((300, 300, 3), dtype=np.uint8) - for i in range(2): - _, _ = test_ctpn(sess, net, im) - - im_names = glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.png')) + \ - glob.glob(os.path.join(cfg.DATA_DIR, 'demo', '*.jpg')) - - for im_name in im_names: - print('~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~') - print('Demo for {:s}'.format(im_name)) - ctpn(sess, net, im_name) -# coding:utf-8 -from .text_proposal_connector import TextProposalConnector -from ..lib.fast_rcnn.nms_wrapper import nms -import sys - -import numpy as np - -from .cfg import Config as cfg -from .other import normalize - -sys.path.append('..') -# from lib.fast_rcnn.test import test_ctpn - - -class TextDetector: - """ - Detect text from an image - """ - - def __init__(self): - """ - pass - """ - self.text_proposal_connector = TextProposalConnector() - - def detect(self, text_proposals, scores, size): - """ - Detecting texts from an image - :return: the bounding boxes of the detected texts - """ - # text_proposals, scores=self.text_proposal_detector.detect(im, cfg.MEAN) - keep_inds = np.where(scores > cfg.TEXT_PROPOSALS_MIN_SCORE)[0] - text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] - - sorted_indices = np.argsort(scores.ravel())[::-1] - text_proposals, scores = text_proposals[sorted_indices], scores[sorted_indices] - - # nms for text proposals - keep_inds = nms(np.hstack((text_proposals, scores)), - cfg.TEXT_PROPOSALS_NMS_THRESH) - text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] - - scores = normalize(scores) - - text_lines = self.text_proposal_connector.get_text_lines( - text_proposals, scores, size) - - keep_inds = self.filter_boxes(text_lines) - text_lines = text_lines[keep_inds] - - if text_lines.shape[0] != 0: - keep_inds = nms(text_lines, cfg.TEXT_LINE_NMS_THRESH) - text_lines = text_lines[keep_inds] - - return text_lines - - def filter_boxes(self, boxes): - heights = boxes[:, 3] - boxes[:, 1] + 1 - widths = boxes[:, 2] - boxes[:, 0] + 1 - scores = boxes[:, -1] - return np.where((widths / heights > cfg.MIN_RATIO) & (scores > cfg.LINE_MIN_SCORE) & - (widths > (cfg.TEXT_PROPOSALS_WIDTH * cfg.MIN_NUM_PROPOSALS)))[0] -from lib.fast_rcnn.test import test_ctpn -from lib.networks.factory import get_network -from lib.fast_rcnn.config import cfg -import sys -import os - -import tensorflow as tf - -from .cfg import Config -from .other import resize_im -base_path = os.path.abspath(os.path.join( - os.path.dirname(__file__), '..', '..')) - - -sys.path.append(os.getcwd()) - -# from ..lib.networks.factory import get_network -# from ..lib.fast_rcnn.config import cfg -# from..lib.fast_rcnn.test import test_ctpn -''' -load network -输入的名称为'Net_model' -'VGGnet_test'--test -'VGGnet_train'-train -''' - - -def load_tf_model(): - cfg.TEST.HAS_RPN = True # Use RPN for proposals - # init session - config = tf.ConfigProto(allow_soft_placement=True) - net = get_network("VGGnet_test") - # load model - saver = tf.train.Saver() - # sess = tf.Session(config=config) - sess = tf.Session() - ckpt_path = '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/ctpn/ctpn/retrain/ckpt' - ckpt = tf.train.get_checkpoint_state(ckpt_path) - reader = tf.train.NewCheckpointReader(ckpt.model_checkpoint_path) - var_to_shape_map = reader.get_variable_to_shape_map() - for key in var_to_shape_map: - print("Tensor_name is : ", key) - # print(reader.get_tensor(key)) - saver.restore(sess, ckpt.model_checkpoint_path) - print("load vggnet done") - return sess, saver, net - - -# init model -sess, saver, net = load_tf_model() - - -# 进行文本识别 -def ctpn(img): - """ - text box detect - """ - scale, max_scale = Config.SCALE, Config.MAX_SCALE - # 对图像进行resize,输出的图像长宽 - img, f = resize_im(img, scale=scale, max_scale=max_scale) - scores, boxes = test_ctpn(sess, net, img) - return scores, boxes, img -import cv2 -import numpy as np -from matplotlib import cm - - -def prepare_img(im, mean): - """ - transform img into caffe's input img. - """ - im_data = np.transpose(im - mean, (2, 0, 1)) - return im_data - - -def draw_boxes(im, - bboxes, - is_display=True, - color=None, - caption="Image", - wait=True): - """ - boxes: bounding boxes - """ - text_recs = np.zeros((len(bboxes), 8), np.int) - - im = im.copy() - index = 0 - for box in bboxes: - if color == None: - if len(box) == 8 or len(box) == 9: - c = tuple(cm.jet([box[-1]])[0, 2::-1] * 255) - else: - c = tuple(np.random.randint(0, 256, 3)) - else: - c = color - - b1 = box[6] - box[7] / 2 - b2 = box[6] + box[7] / 2 - x1 = box[0] - y1 = box[5] * box[0] + b1 - x2 = box[2] - y2 = box[5] * box[2] + b1 - x3 = box[0] - y3 = box[5] * box[0] + b2 - x4 = box[2] - y4 = box[5] * box[2] + b2 - - disX = x2 - x1 - disY = y2 - y1 - width = np.sqrt(disX * disX + disY * disY) - fTmp0 = y3 - y1 - fTmp1 = fTmp0 * disY / width - x = np.fabs(fTmp1 * disX / width) - y = np.fabs(fTmp1 * disY / width) - if box[5] < 0: - x1 -= x - y1 += y - x4 += x - y4 -= y - else: - x2 += x - y2 += y - x3 -= x - y3 -= y - cv2.line(im, (int(x1), int(y1)), (int(x2), int(y2)), c, 2) - cv2.line(im, (int(x1), int(y1)), (int(x3), int(y3)), c, 2) - cv2.line(im, (int(x4), int(y4)), (int(x2), int(y2)), c, 2) - cv2.line(im, (int(x3), int(y3)), (int(x4), int(y4)), c, 2) - text_recs[index, 0] = x1 - text_recs[index, 1] = y1 - text_recs[index, 2] = x2 - text_recs[index, 3] = y2 - text_recs[index, 4] = x3 - text_recs[index, 5] = y3 - text_recs[index, 6] = x4 - text_recs[index, 7] = y4 - index = index + 1 - # cv2.rectangle(im, tuple(box[:2]), tuple(box[2:4]), c,2) - # cv2.waitKey(0) - # cv2.imshow('kk', im) - cv2.imwrite( - '/Users/xiaofeng/Code/Github/Chinese-OCR/test/test_result.png', im) - - return text_recs, im - - -def threshold(coords, min_, max_): - return np.maximum(np.minimum(coords, max_), min_) - - -def clip_boxes(boxes, im_shape): - """ - Clip boxes to image boundaries. - """ - boxes[:, 0::2] = threshold(boxes[:, 0::2], 0, im_shape[1] - 1) - boxes[:, 1::2] = threshold(boxes[:, 1::2], 0, im_shape[0] - 1) - return boxes - - -def normalize(data): - if data.shape[0] == 0: - return data - max_ = data.max() - min_ = data.min() - return (data - min_) / (max_ - min_) if max_ - min_ != 0 else data - min_ - - -def resize_im(im, scale, max_scale=None): - # 按照scale和图片的长宽的最小值的比值作为输入模型的图片的尺寸 - f = float(scale) / min(im.shape[0], im.shape[1]) - if max_scale != None and f * max(im.shape[0], im.shape[1]) > max_scale: - f = float(max_scale) / max(im.shape[0], im.shape[1]) - return cv2.resize(im, (0, 0), fx=f, fy=f), f - # return cv2.resize(im, (0, 0), fx=1.2, fy=1.2), f - - -class Graph: - def __init__(self, graph): - self.graph = graph - - def sub_graphs_connected(self): - sub_graphs = [] - for index in range(self.graph.shape[0]): - if not self.graph[:, index].any() and self.graph[index, :].any(): - v = index - sub_graphs.append([v]) - while self.graph[v, :].any(): - v = np.where(self.graph[v, :])[0][0] - sub_graphs[-1].append(v) - return sub_graphs -from .text_proposal_graph_builder import TextProposalGraphBuilder -import numpy as np -import os -import sys -parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(parentdir) - - -class TextProposalConnector: - """ - Connect text proposals into text lines - """ - - def __init__(self): - self.graph_builder = TextProposalGraphBuilder() - - def group_text_proposals(self, text_proposals, scores, im_size): - graph = self.graph_builder.build_graph(text_proposals, scores, im_size) - return graph.sub_graphs_connected() - - def fit_y(self, X, Y, x1, x2): - len(X) != 0 - # if X only include one point, the function will get line y=Y[0] - if np.sum(X == X[0]) == len(X): - return Y[0], Y[0] - p = np.poly1d(np.polyfit(X, Y, 1)) - return p(x1), p(x2) - - def get_text_lines(self, text_proposals, scores, im_size): - """ - text_proposals:boxes - - """ - # tp=text proposal - tp_groups = self.group_text_proposals(text_proposals, scores, - im_size) # find the text line - - text_lines = np.zeros((len(tp_groups), 8), np.float32) - - for index, tp_indices in enumerate(tp_groups): - text_line_boxes = text_proposals[list(tp_indices)] - num = np.size(text_line_boxes) # find - X = (text_line_boxes[:, 0] + text_line_boxes[:, 2]) / 2 - Y = (text_line_boxes[:, 1] + text_line_boxes[:, 3]) / 2 - - z1 = np.polyfit(X, Y, 1) - p1 = np.poly1d(z1) - - x0 = np.min(text_line_boxes[:, 0]) - x1 = np.max(text_line_boxes[:, 2]) - - offset = (text_line_boxes[0, 2] - text_line_boxes[0, 0]) * 0.5 - - lt_y, rt_y = self.fit_y(text_line_boxes[:, 0], - text_line_boxes[:, 1], x0 + offset, - x1 - offset) - lb_y, rb_y = self.fit_y(text_line_boxes[:, 0], - text_line_boxes[:, 3], x0 + offset, - x1 - offset) - - # the score of a text line is the average score of the scores - # of all text proposals contained in the text line - score = scores[list(tp_indices)].sum() / float(len(tp_indices)) - - text_lines[index, 0] = x0 - text_lines[index, 1] = min(lt_y, rt_y) - text_lines[index, 2] = x1 - text_lines[index, 3] = max(lb_y, rb_y) - text_lines[index, 4] = score - text_lines[index, 5] = z1[0] - text_lines[index, 6] = z1[1] - height = np.mean((text_line_boxes[:, 3] - text_line_boxes[:, 1])) - text_lines[index, 7] = height + 2.5 - # text_lines=clip_boxes(text_lines, im_size) - - return text_lines -from .other import Graph -from .cfg import Config as cfg -import numpy as np - -import os -import sys -parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(parentdir) - - -class TextProposalGraphBuilder: - """ - Build Text proposals into a graph. - """ - - def get_successions(self, index): - box = self.text_proposals[index] - results = [] - for left in range(int(box[0]) + 1, min(int(box[0]) + cfg.MAX_HORIZONTAL_GAP + 1, self.im_size[1])): - adj_box_indices = self.boxes_table[left] - for adj_box_index in adj_box_indices: - if self.meet_v_iou(adj_box_index, index): - results.append(adj_box_index) - if len(results) != 0: - return results - return results - - def get_precursors(self, index): - box = self.text_proposals[index] - results = [] - for left in range(int(box[0]) - 1, max(int(box[0] - cfg.MAX_HORIZONTAL_GAP), 0) - 1, -1): - adj_box_indices = self.boxes_table[left] - for adj_box_index in adj_box_indices: - if self.meet_v_iou(adj_box_index, index): - results.append(adj_box_index) - if len(results) != 0: - return results - return results - - def is_succession_node(self, index, succession_index): - precursors = self.get_precursors(succession_index) - if self.scores[index] >= np.max(self.scores[precursors]): - return True - return False - - def meet_v_iou(self, index1, index2): - def overlaps_v(index1, index2): - h1 = self.heights[index1] - h2 = self.heights[index2] - y0 = max(self.text_proposals[index2][1], - self.text_proposals[index1][1]) - y1 = min(self.text_proposals[index2][3], - self.text_proposals[index1][3]) - return max(0, y1 - y0 + 1) / min(h1, h2) - - def size_similarity(index1, index2): - h1 = self.heights[index1] - h2 = self.heights[index2] - return min(h1, h2) / max(h1, h2) - - return overlaps_v(index1, index2) >= cfg.MIN_V_OVERLAPS and \ - size_similarity(index1, index2) >= cfg.MIN_SIZE_SIM - - def build_graph(self, text_proposals, scores, im_size): - self.text_proposals = text_proposals - self.scores = scores - self.im_size = im_size - self.heights = text_proposals[:, 3] - text_proposals[:, 1] + 1 - - boxes_table = [[] for _ in range(self.im_size[1])] - for index, box in enumerate(text_proposals): - boxes_table[int(box[0])].append(index) - self.boxes_table = boxes_table - - graph = np.zeros( - (text_proposals.shape[0], text_proposals.shape[0]), np.bool) - - for index, box in enumerate(text_proposals): - successions = self.get_successions(index) - if len(successions) == 0: - continue - succession_index = successions[np.argmax(scores[successions])] - if self.is_succession_node(index, succession_index): - # NOTE: a box can have multiple successions(precursors) if multiple successions(precursors) - # have equal scores. - graph[index, succession_index] = True - return Graph(graph) -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# _Author_: xiaofeng -# Date: 2018-04-16 10:55:15 -# Last Modified by: xiaofeng -# Last Modified time: 2018-04-16 10:55:15 -''' -使用keras进行网络训练,速度相对pytorch比较慢 -''' -from lib.fast_rcnn.config import cfg -from lib.networks.factory import get_network -from lib.datasets.factory import get_imdb -from lib.fast_rcnn.config import cfg_from_file, get_output_dir, get_log_dir -from lib.fast_rcnn.train import get_training_roidb, train_net -import os.path as osp -import pprint -import sys -import os - -# sys.path.append(os.getcwd()) -# this_dir = os.path.dirname(__file__) -parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.append(parentdir) - - -if __name__ == '__main__': - # 将text.yml的配置与默认config中的默认配置进行合并 - cfg_from_file('text.yml') - print('Using config:~~~~~~~~~~~~~~~~') - # 根据给定的名字,得到要加载的数据集 - imdb = get_imdb('voc_2007_trainval') - print('Loaded dataset `{:s}` for training'.format(imdb.name)) - # 准备训练数据 - roidb = get_training_roidb(imdb) - # 模型输出的路径 - output_dir = get_output_dir(imdb, None) - # summary的输出路径 - log_dir = get_log_dir(imdb) - print('Output will be saved to `{:s}`'.format(output_dir)) - print('Logs will be saved to `{:s}`'.format(log_dir)) - - device_name = '/gpu:0' - print(device_name) - - network = get_network('VGGnet_train') - - train_net( - network, - imdb, - roidb, - output_dir=output_dir, - log_dir=log_dir, - # pretrained_model= - # '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/ctpn/pretrain/VGG_imagenet.npy', - # pretrained_model='/home/xiaofeng/data/ctpn/pretrainde_vgg', - pretrained_model=None, - max_iters=180000, - restore=bool(int(1))) -from . import fast_rcnn -from xml.dom.minidom import Document -import cv2 -import os -import glob -import shutil -import numpy as np - - -def generate_xml(name, lines, img_size, class_sets, doncateothers=True): - doc = Document() - - def append_xml_node_attr(child, parent=None, text=None): - ele = doc.createElement(child) - if not text is None: - text_node = doc.createTextNode(text) - ele.appendChild(text_node) - parent = doc if parent is None else parent - parent.appendChild(ele) - return ele - - img_name = name + '.jpg' - # create header - annotation = append_xml_node_attr('annotation') - append_xml_node_attr('folder', parent=annotation, text='text') - append_xml_node_attr('filename', parent=annotation, text=img_name) - source = append_xml_node_attr('source', parent=annotation) - append_xml_node_attr('database', parent=source, text='coco_text_database') - append_xml_node_attr('annotation', parent=source, text='text') - append_xml_node_attr('image', parent=source, text='text') - append_xml_node_attr('flickrid', parent=source, text='000000') - owner = append_xml_node_attr('owner', parent=annotation) - append_xml_node_attr('name', parent=owner, text='ms') - size = append_xml_node_attr('size', annotation) - append_xml_node_attr('width', size, str(img_size[1])) - append_xml_node_attr('height', size, str(img_size[0])) - append_xml_node_attr('depth', size, str(img_size[2])) - append_xml_node_attr('segmented', parent=annotation, text='0') - - # create objects - objs = [] - for line in lines: - splitted_line = line.strip().lower().split() - cls = splitted_line[0].lower() - if not doncateothers and cls not in class_sets: - continue - cls = 'dontcare' if cls not in class_sets else cls - if cls == 'dontcare': - continue - obj = append_xml_node_attr('object', parent=annotation) - occlusion = int(0) - x1, y1, x2, y2 = int(float(splitted_line[1]) + 1), int(float(splitted_line[2]) + 1), \ - int(float(splitted_line[3]) + 1), int(float(splitted_line[4]) + 1) - truncation = float(0) - difficult = 1 if _is_hard( - cls, truncation, occlusion, x1, y1, x2, y2) else 0 - truncted = 0 if truncation < 0.5 else 1 - - append_xml_node_attr('name', parent=obj, text=cls) - append_xml_node_attr('pose', parent=obj, text='none') - append_xml_node_attr('truncated', parent=obj, text=str(truncted)) - append_xml_node_attr('difficult', parent=obj, text=str(int(difficult))) - bb = append_xml_node_attr('bndbox', parent=obj) - append_xml_node_attr('xmin', parent=bb, text=str(x1)) - append_xml_node_attr('ymin', parent=bb, text=str(y1)) - append_xml_node_attr('xmax', parent=bb, text=str(x2)) - append_xml_node_attr('ymax', parent=bb, text=str(y2)) - - o = {'class': cls, 'box': np.asarray([x1, y1, x2, y2], dtype=float), - 'truncation': truncation, 'difficult': difficult, 'occlusion': occlusion} - objs.append(o) - - return doc, objs - - -def _is_hard(cls, truncation, occlusion, x1, y1, x2, y2): - hard = False - if y2 - y1 < 25 and occlusion >= 2: - hard = True - return hard - if occlusion >= 3: - hard = True - return hard - if truncation > 0.8: - hard = True - return hard - return hard - - -def build_voc_dirs(outdir): - def mkdir(dir): return os.makedirs( - dir) if not os.path.exists(dir) else None - mkdir(outdir) - mkdir(os.path.join(outdir, 'Annotations')) - mkdir(os.path.join(outdir, 'ImageSets')) - mkdir(os.path.join(outdir, 'ImageSets', 'Layout')) - mkdir(os.path.join(outdir, 'ImageSets', 'Main')) - mkdir(os.path.join(outdir, 'ImageSets', 'Segmentation')) - mkdir(os.path.join(outdir, 'JPEGImages')) - mkdir(os.path.join(outdir, 'SegmentationClass')) - mkdir(os.path.join(outdir, 'SegmentationObject')) - return os.path.join(outdir, 'Annotations'), os.path.join(outdir, 'JPEGImages'), os.path.join(outdir, 'ImageSets', - 'Main') - - -if __name__ == '__main__': - _outdir = 'TEXTVOC/VOC2007' - _draw = bool(0) - _dest_label_dir, _dest_img_dir, _dest_set_dir = build_voc_dirs(_outdir) - _doncateothers = bool(1) - for dset in ['train']: - _labeldir = 'label_tmp' - _imagedir = 're_image' - class_sets = ('text', 'dontcare') - class_sets_dict = dict((k, i) for i, k in enumerate(class_sets)) - allclasses = {} - fs = [open(os.path.join(_dest_set_dir, cls + '_' + dset + '.txt'), 'w') - for cls in class_sets] - ftrain = open(os.path.join(_dest_set_dir, dset + '.txt'), 'w') - - files = glob.glob(os.path.join(_labeldir, '*.txt')) - files.sort() - for file in files: - path, basename = os.path.split(file) - stem, ext = os.path.splitext(basename) - with open(file, 'r') as f: - lines = f.readlines() - img_file = os.path.join(_imagedir, stem + '.jpg') - - print(img_file) - img = cv2.imread(img_file) - img_size = img.shape - - doc, objs = generate_xml( - stem, lines, img_size, class_sets=class_sets, doncateothers=_doncateothers) - - cv2.imwrite(os.path.join(_dest_img_dir, stem + '.jpg'), img) - xmlfile = os.path.join(_dest_label_dir, stem + '.xml') - with open(xmlfile, 'w') as f: - f.write(doc.toprettyxml(indent=' ')) - - ftrain.writelines(stem + '\n') - - cls_in_image = set([o['class'] for o in objs]) - - for obj in objs: - cls = obj['class'] - allclasses[cls] = 0 \ - if not cls in list(allclasses.keys()) else allclasses[cls] + 1 - - for cls in cls_in_image: - if cls in class_sets: - fs[class_sets_dict[cls]].writelines(stem + ' 1\n') - for cls in class_sets: - if cls not in cls_in_image: - fs[class_sets_dict[cls]].writelines(stem + ' -1\n') - - (f.close() for f in fs) - ftrain.close() - - print('~~~~~~~~~~~~~~~~~~~') - print(allclasses) - print('~~~~~~~~~~~~~~~~~~~') - shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), - os.path.join(_dest_set_dir, 'val.txt')) - shutil.copyfile(os.path.join(_dest_set_dir, 'train.txt'), - os.path.join(_dest_set_dir, 'trainval.txt')) - for cls in class_sets: - shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'), - os.path.join(_dest_set_dir, cls + '_trainval.txt')) - shutil.copyfile(os.path.join(_dest_set_dir, cls + '_train.txt'), - os.path.join(_dest_set_dir, cls + '_val.txt')) -import os -import numpy as np -import math -import cv2 as cv - -path = '/media/D/code/OCR/text-detection-ctpn/data/mlt_english+chinese/image' -gt_path = '/media/D/code/OCR/text-detection-ctpn/data/mlt_english+chinese/label' -out_path = 're_image' -if not os.path.exists(out_path): - os.makedirs(out_path) -files = os.listdir(path) -files.sort() -# files=files[:100] -for file in files: - _, basename = os.path.split(file) - if basename.lower().split('.')[-1] not in ['jpg', 'png']: - continue - stem, ext = os.path.splitext(basename) - gt_file = os.path.join(gt_path, 'gt_' + stem + '.txt') - img_path = os.path.join(path, file) - print(img_path) - img = cv.imread(img_path) - img_size = img.shape - im_size_min = np.min(img_size[0:2]) - im_size_max = np.max(img_size[0:2]) - - im_scale = float(600) / float(im_size_min) - if np.round(im_scale * im_size_max) > 1200: - im_scale = float(1200) / float(im_size_max) - re_im = cv.resize(img, None, None, fx=im_scale, - fy=im_scale, interpolation=cv.INTER_LINEAR) - re_size = re_im.shape - cv.imwrite(os.path.join(out_path, stem) + '.jpg', re_im) - - with open(gt_file, 'r') as f: - lines = f.readlines() - for line in lines: - splitted_line = line.strip().lower().split(',') - pt_x = np.zeros((4, 1)) - pt_y = np.zeros((4, 1)) - pt_x[0, 0] = int(float(splitted_line[0]) / img_size[1] * re_size[1]) - pt_y[0, 0] = int(float(splitted_line[1]) / img_size[0] * re_size[0]) - pt_x[1, 0] = int(float(splitted_line[2]) / img_size[1] * re_size[1]) - pt_y[1, 0] = int(float(splitted_line[3]) / img_size[0] * re_size[0]) - pt_x[2, 0] = int(float(splitted_line[4]) / img_size[1] * re_size[1]) - pt_y[2, 0] = int(float(splitted_line[5]) / img_size[0] * re_size[0]) - pt_x[3, 0] = int(float(splitted_line[6]) / img_size[1] * re_size[1]) - pt_y[3, 0] = int(float(splitted_line[7]) / img_size[0] * re_size[0]) - - ind_x = np.argsort(pt_x, axis=0) - pt_x = pt_x[ind_x] - pt_y = pt_y[ind_x] - - if pt_y[0] < pt_y[1]: - pt1 = (pt_x[0], pt_y[0]) - pt3 = (pt_x[1], pt_y[1]) - else: - pt1 = (pt_x[1], pt_y[1]) - pt3 = (pt_x[0], pt_y[0]) - - if pt_y[2] < pt_y[3]: - pt2 = (pt_x[2], pt_y[2]) - pt4 = (pt_x[3], pt_y[3]) - else: - pt2 = (pt_x[3], pt_y[3]) - pt4 = (pt_x[2], pt_y[2]) - - xmin = int(min(pt1[0], pt2[0])) - ymin = int(min(pt1[1], pt2[1])) - xmax = int(max(pt2[0], pt4[0])) - ymax = int(max(pt3[1], pt4[1])) - - if xmin < 0: - xmin = 0 - if xmax > re_size[1] - 1: - xmax = re_size[1] - 1 - if ymin < 0: - ymin = 0 - if ymax > re_size[0] - 1: - ymax = re_size[0] - 1 - - width = xmax - xmin - height = ymax - ymin - - # reimplement - step = 16.0 - x_left = [] - x_right = [] - x_left.append(xmin) - x_left_start = int(math.ceil(xmin / 16.0) * 16.0) - if x_left_start == xmin: - x_left_start = xmin + 16 - for i in np.arange(x_left_start, xmax, 16): - x_left.append(i) - x_left = np.array(x_left) - - x_right.append(x_left_start - 1) - for i in range(1, len(x_left) - 1): - x_right.append(x_left[i] + 15) - x_right.append(xmax) - x_right = np.array(x_right) - - idx = np.where(x_left == x_right) - x_left = np.delete(x_left, idx, axis=0) - x_right = np.delete(x_right, idx, axis=0) - - if not os.path.exists('label_tmp'): - os.makedirs('label_tmp') - with open(os.path.join('label_tmp', stem) + '.txt', 'a') as f: - for i in range(len(x_left)): - f.writelines("text\t") - f.writelines(str(int(x_left[i]))) - f.writelines("\t") - f.writelines(str(int(ymin))) - f.writelines("\t") - f.writelines(str(int(x_right[i]))) - f.writelines("\t") - f.writelines(str(int(ymax))) - f.writelines("\n") -import glob -import os - -import cv2 -import lmdb # install lmdb by "pip install lmdb" -import numpy as np - - -# from genLineText import GenTextImage - -def checkImageIsValid(imageBin): - if imageBin is None: - return False - imageBuf = np.fromstring(imageBin, dtype=np.uint8) - img = cv2.imdecode(imageBuf, cv2.IMREAD_GRAYSCALE) - if img is None: - return False - imgH, imgW = img.shape[0], img.shape[1] - if imgH * imgW == 0: - return False - return True - - -def writeCache(env, cache): - with env.begin(write=True) as txn: - for k, v in cache.items(): - txn.put(k.encode(), v) - - -def createDataset(outputPath, imagePathList, labelList, lexiconList=None, checkValid=True): - """ - Create LMDB dataset for CRNN training. - - ARGS: - outputPath : LMDB output path - imagePathList : list of image path - labelList : list of corresponding groundtruth texts - lexiconList : (optional) list of lexicon lists - checkValid : if true, check the validity of every image - """ - # print (len(imagePathList) , len(labelList)) - assert (len(imagePathList) == len(labelList)) - nSamples = len(imagePathList) - print('...................') - env = lmdb.open(outputPath, map_size=1099511627776) - - cache = {} - cnt = 1 - for i in range(nSamples): - imagePath = imagePathList[i] - label = labelList[i] - if not os.path.exists(imagePath): - print('%s does not exist' % imagePath) - continue - with open(imagePath, 'rb') as f: - imageBin = f.read() - if checkValid: - if not checkImageIsValid(imageBin): - print('%s is not a valid image' % imagePath) - continue - - imageKey = 'image-%09d' % cnt - labelKey = 'label-%09d' % cnt - cache[imageKey] = imageBin - cache[labelKey] = label.encode() - if lexiconList: - lexiconKey = 'lexicon-%09d' % cnt - cache[lexiconKey] = ' '.join(lexiconList[i]).encode() - if cnt % 1000 == 0: - writeCache(env, cache) - cache = {} - print('Written %d / %d' % (cnt, nSamples)) - cnt += 1 - nSamples = cnt - 1 - cache['num-samples'] = str(nSamples).encode() - writeCache(env, cache) - print('Created dataset with %d samples' % nSamples) - - -def read_text(path): - with open(path) as f: - text = f.read() - text = text.strip() - - return text - - -if __name__ == '__main__': - - # lmdb 输出目录 - outputPath = '../data/lmdb/train' - - path = '../data/dataline/*.jpg' - imagePathList = glob.glob(path) - print('------------', len(imagePathList), '------------') - imgLabelLists = [] - for p in imagePathList: - try: - imgLabelLists.append((p, read_text(p.replace('.jpg', '.txt')))) - except: - continue - - # imgLabelList = [ (p,read_text(p.replace('.jpg','.txt'))) for p in imagePathList] - # sort by lebelList - imgLabelList = sorted(imgLabelLists, key=lambda x: len(x[1])) - imgPaths = [p[0] for p in imgLabelList] - txtLists = [p[1] for p in imgLabelList] - - createDataset(outputPath, imgPaths, txtLists, - lexiconList=None, checkValid=True) -# -*- coding: utf-8 -*- -from PIL import Image, ImageDraw, ImageFont -import random -import os - - -def genImage(label, fontsize, color=(0, 0, 0), fontName="华文细黑.ttf"): - img = Image.new("RGB", ((int)(fontsize * 1.2 * len(label)), - (int)(fontsize * 2)), (255, 255, 255)) - font = ImageFont.truetype(fontName, fontsize) - draw = ImageDraw.Draw(img) - draw.text((0, 0), label, fill=color, font=font) - with open("../data/dataline/" + label + "-" + str(fontsize) + ".txt", "w", encoding='utf-8') as f: - f.write(label) - img.save("../data/dataline/" + label + "-" + str(fontsize) + ".jpg") - - -if __name__ == '__main__': - alphabet = """'疗绚诚娇溜题贿者廖更纳加奉公一就汴计与路房原妇208-7其>:],,骑刈全消昏傈安久钟嗅不影处驽蜿资关椤地瘸专问忖票嫉炎韵要月田节陂鄙捌备拳伺眼网盎大傍心东愉汇蹿科每业里航晏字平录先13彤鲶产稍督腴有象岳注绍在泺文定核名水过理让偷率等这发”为含肥酉相鄱七编猥锛日镀蒂掰倒辆栾栗综涩州雌滑馀了机块司宰甙兴矽抚保用沧秩如收息滥页疑埠!!姥异橹钇向下跄的椴沫国绥獠报开民蜇何分凇长讥藏掏施羽中讲派嘟人提浼间世而古多倪唇饯控庚首赛蜓味断制觉技替艰溢潮夕钺外摘枋动双单啮户枇确锦曜杜或能效霜盒然侗电晁放步鹃新杖蜂吒濂瞬评总隍对独合也是府青天诲墙组滴级邀帘示已时骸仄泅和遨店雇疫持巍踮境只亨目鉴崤闲体泄杂作般轰化解迂诿蛭璀腾告版服省师小规程线海办引二桧牌砺洄裴修图痫胡许犊事郛基柴呼食研奶律蛋因葆察戏褒戒再李骁工貂油鹅章啄休场给睡纷豆器捎说敏学会浒设诊格廓查来霓室溆¢诡寥焕舜柒狐回戟砾厄实翩尿五入径惭喹股宇篝|;美期云九祺扮靠锝槌系企酰阊暂蚕忻豁本羹执条钦H獒限进季楦于芘玖铋茯未答粘括样精欠矢甥帷嵩扣令仔风皈行支部蓉刮站蜡救钊汗松嫌成可.鹤院从交政怕活调球局验髌第韫谗串到圆年米/*友忿检区看自敢刃个兹弄流留同没齿星聆轼湖什三建蛔儿椋汕震颧鲤跟力情璺铨陪务指族训滦鄣濮扒商箱十召慷辗所莞管护臭横硒嗓接侦六露党馋驾剖高侬妪幂猗绺骐央酐孝筝课徇缰门男西项句谙瞒秃篇教碲罚声呐景前富嘴鳌稀免朋啬睐去赈鱼住肩愕速旁波厅健茼厥鲟谅投攸炔数方击呋谈绩别愫僚躬鹧胪炳招喇膨泵蹦毛结54谱识陕粽婚拟构且搜任潘比郢妨醪陀桔碘扎选哈骷楷亿明缆脯监睫逻婵共赴淝凡惦及达揖谩澹减焰蛹番祁柏员禄怡峤龙白叽生闯起细装谕竟聚钙上导渊按艾辘挡耒盹饪臀记邮蕙受各医搂普滇朗茸带翻酚(光堤墟蔷万幻〓瑙辈昧盏亘蛀吉铰请子假闻税井诩哨嫂好面琐校馊鬣缂营访炖占农缀否经钚棵趟张亟吏茶谨捻论迸堂玉信吧瞠乡姬寺咬溏苄皿意赉宝尔钰艺特唳踉都荣倚登荐丧奇涵批炭近符傩感道着菊虹仲众懈濯颞眺南释北缝标既茗整撼迤贲挎耱拒某妍卫哇英矶藩治他元领膜遮穗蛾飞荒棺劫么市火温拈棚洼转果奕卸迪伸泳斗邡侄涨屯萋胭氡崮枞惧冒彩斜手豚随旭淑妞形菌吲沱争驯歹挟兆柱传至包内响临红功弩衡寂禁老棍耆渍织害氵渑布载靥嗬虽苹咨娄库雉榜帜嘲套瑚亲簸欧边6腿旮抛吹瞳得镓梗厨继漾愣憨士策窑抑躯襟脏参贸言干绸鳄穷藜音折详)举悍甸癌黎谴死罩迁寒驷袖媒蒋掘模纠恣观祖蛆碍位稿主澧跌筏京锏帝贴证糠才黄鲸略炯饱四出园犀牧容汉杆浈汰瑷造虫瘩怪驴济应花沣谔夙旅价矿以考su呦晒巡茅准肟瓴詹仟褂译桌混宁怦郑抿些余鄂饴攒珑群阖岔琨藓预环洮岌宀杲瀵最常囡周踊女鼓袭喉简范薯遐疏粱黜禧法箔斤遥汝奥直贞撑置绱集她馅逗钧橱魉[恙躁唤9旺膘待脾惫购吗依盲度瘿蠖俾之镗拇鲵厝簧续款展啃表剔品钻腭损清锶统涌寸滨贪链吠冈伎迥咏吁览防迅失汾阔逵绀蔑列川凭努熨揪利俱绉抢鸨我即责膦易毓鹊刹玷岿空嘞绊排术估锷违们苟铜播肘件烫审鲂广像铌惰铟巳胍鲍康憧色恢想拷尤疳知SYFDA峄裕帮握搔氐氘难墒沮雨叁缥悴藐湫娟苑稠颛簇后阕闭蕤缚怎佞码嘤蔡痊舱螯帕赫昵升烬岫、疵蜻髁蕨隶烛械丑盂梁强鲛由拘揉劭龟撤钩呕孛费妻漂求阑崖秤甘通深补赃坎床啪承吼量暇钼烨阂擎脱逮称P神属矗华届狍葑汹育患窒蛰佼静槎运鳗庆逝曼疱克代官此麸耧蚌晟例础榛副测唰缢迹灬霁身岁赭扛又菡乜雾板读陷徉贯郁虑变钓菜圾现琢式乐维渔浜左吾脑钡警T啵拴偌漱湿硕止骼魄积燥联踢玛|则窿见振畿送班钽您赵刨印讨踝籍谡舌崧汽蔽沪酥绒怖财帖肱私莎勋羔霸励哼帐将帅渠纪婴娩岭厘滕吻伤坝冠戊隆瘁介涧物黍并姗奢蹑掣垸锴命箍捉病辖琰眭迩艘绌繁寅若毋思诉类诈燮轲酮狂重反职筱县委磕绣奖晋濉志徽肠呈獐坻口片碰几村柿劳料获亩惕晕厌号罢池正鏖煨家棕复尝懋蜥锅岛扰队坠瘾钬@卧疣镇譬冰彷频黯据垄采八缪瘫型熹砰楠襁箐但嘶绳啤拍盥穆傲洗盯塘怔筛丿台恒喂葛永¥烟酒桦书砂蚝缉态瀚袄圳轻蛛超榧遛姒奘铮右荽望偻卡丶氰附做革索戚坨桷唁垅榻岐偎坛莨山殊微骇陈爨推嗝驹澡藁呤卤嘻糅逛侵郓酌德摇※鬃被慨殡羸昌泡戛鞋河宪沿玲鲨翅哽源铅语照邯址荃佬顺鸳町霭睾瓢夸椁晓酿痈咔侏券噎湍签嚷离午尚社锤背孟使浪缦潍鞅军姹驶笑鳟鲁》孽钜绿洱礴焯椰颖囔乌孔巴互性椽哞聘昨早暮胶炀隧低彗昝铁呓氽藉喔癖瑗姨权胱韦堑蜜酋楝砝毁靓歙锲究屋喳骨辨碑武鸠宫辜烊适坡殃培佩供走蜈迟翼况姣凛浔吃飘债犟金促苛崇坂莳畔绂兵蠕斋根砍亢欢恬崔剁餐榫快扶‖濒缠鳜当彭驭浦篮昀锆秸钳弋娣瞑夷龛苫拱致%嵊障隐弑初娓抉汩累蓖"唬助苓昙押毙破城郧逢嚏獭瞻溱婿赊跨恼璧萃姻貉灵炉密氛陶砸谬衔点琛沛枳层岱诺脍榈埂征冷裁打蹴素瘘逞蛐聊激腱萘踵飒蓟吆取咙簋涓矩曝挺揣座你史舵焱尘苏笈脚溉榨诵樊邓焊义庶儋蟋蒲赦呷杞诠豪还试颓茉太除紫逃痴草充鳕珉祗墨渭烩蘸慕璇镶穴嵘恶骂险绋幕碉肺戳刘潞秣纾潜銮洛须罘销瘪汞兮屉r林厕质探划狸殚善煊烹〒锈逯宸辍泱柚袍远蹋嶙绝峥娥缍雀徵认镱谷=贩勉撩鄯斐洋非祚泾诒饿撬威晷搭芍锥笺蓦候琊档礁沼卵荠忑朝凹瑞头仪弧孵畏铆突衲车浩气茂悖厢枕酝戴湾邹飚攘锂写宵翁岷无喜丈挑嗟绛殉议槽具醇淞笃郴阅饼底壕砚弈询缕庹翟零筷暨舟闺甯撞麂茌蔼很珲捕棠角阉媛娲诽剿尉爵睬韩诰匣危糍镯立浏阳少盆舔擘匪申尬铣旯抖赘瓯居ˇ哮游锭茏歌坏甚秒舞沙仗劲潺阿燧郭嗖霏忠材奂耐跺砀输岖媳氟极摆灿今扔腻枝奎药熄吨话q额慑嘌协喀壳埭视著於愧陲翌峁颅佛腹聋侯咎叟秀颇存较罪哄岗扫栏钾羌己璨枭霉煌涸衿键镝益岢奏连夯睿冥均糖狞蹊稻爸刿胥煜丽肿璃掸跚灾垂樾濑乎莲窄犹撮战馄软络显鸢胸宾妲恕埔蝌份遇巧瞟粒恰剥桡博讯凯堇阶滤卖斌骚彬兑磺樱舷两娱福仃差找桁÷净把阴污戬雷碓蕲楚罡焖抽妫咒仑闱尽邑菁爱贷沥鞑牡嗉崴骤塌嗦订拮滓捡锻次坪杩臃箬融珂鹗宗枚降鸬妯阄堰盐毅必杨崃俺甬状莘货耸菱腼铸唏痤孚澳懒溅翘疙杷淼缙骰喊悉砻坷艇赁界谤纣宴晃茹归饭梢铡街抄肼鬟苯颂撷戈炒咆茭瘙负仰客琉铢封卑珥椿镧窨鬲寿御袤铃萎砖餮脒裳肪孕嫣馗嵇恳氯江石褶冢祸阻狈羞银靳透咳叼敷芷啥它瓤兰痘懊逑肌往捺坊甩呻〃沦忘膻祟菅剧崆智坯臧霍墅攻眯倘拢骠铐庭岙瓠′缺泥迢捶??郏喙掷沌纯秘种听绘固螨团香盗妒埚蓝拖旱荞铀血遏汲辰叩拽幅硬惶桀漠措泼唑齐肾念酱虚屁耶旗砦闵婉馆拭绅韧忏窝醋葺顾辞倜堆辋逆玟贱疾董惘倌锕淘嘀莽俭笏绑鲷杈择蟀粥嗯驰逾案谪褓胫哩昕颚鲢绠躺鹄崂儒俨丝尕泌啊萸彰幺吟骄苣弦脊瑰〈诛镁析闪剪侧哟框螃守嬗燕狭铈缮概迳痧鲲俯售笼痣扉挖满咋援邱扇歪便玑绦峡蛇叨〖泽胃斓喋怂坟猪该蚬炕弥赞棣晔娠挲狡创疖铕镭稷挫弭啾翔粉履苘哦楼秕铂土锣瘟挣栉习享桢袅磨桂谦延坚蔚噗署谟猬钎恐嬉雒倦衅亏璩睹刻殿王算雕麻丘柯骆丸塍谚添鲈垓桎蚯芥予飕镦谌窗醚菀亮搪莺蒿羁足J真轶悬衷靛翊掩哒炅掐冼妮l谐稚荆擒犯陵虏浓崽刍陌傻孜千靖演矜钕煽杰酗渗伞栋俗泫戍罕沾疽灏煦芬磴叱阱榉湃蜀叉醒彪租郡篷屎良垢隗弱陨峪砷掴颁胎雯绵贬沐撵隘篙暖曹陡栓填臼彦瓶琪潼哪鸡摩啦俟锋域耻蔫疯纹撇毒绶痛酯忍爪赳歆嘹辕烈册朴钱吮毯癜娃谀邵厮炽璞邃丐追词瓒忆轧芫谯喷弟半冕裙掖墉绮寝苔势顷褥切衮君佳嫒蚩霞佚洙逊镖暹唛&殒顶碗獗轭铺蛊废恹汨崩珍那杵曲纺夏薰傀闳淬姘舀拧卷楂恍讪厩寮篪赓乘灭盅鞣沟慎挂饺鼾杳树缨丛絮娌臻嗳篡侩述衰矛圈蚜匕筹匿濞晨叶骋郝挚蚴滞增侍描瓣吖嫦蟒匾圣赌毡癞恺百曳需篓肮庖帏卿驿遗蹬鬓骡歉芎胳屐禽烦晌寄媾狄翡苒船廉终痞殇々畦饶改拆悻萄£瓿乃訾桅匮溧拥纱铍骗蕃龋缬父佐疚栎醍掳蓄x惆颜鲆榆〔猎敌暴谥鲫贾罗玻缄扦芪癣落徒臾恿猩托邴肄牵春陛耀刊拓蓓邳堕寇枉淌啡湄兽酷萼碚濠萤夹旬戮梭琥椭昔勺蜊绐晚孺僵宣摄冽旨萌忙蚤眉噼蟑付契瓜悼颡壁曾窕颢澎仿俑浑嵌浣乍碌褪乱蔟隙玩剐葫箫纲围伐决伙漩瑟刑肓镳缓蹭氨皓典畲坍铑檐塑洞倬储胴淳戾吐灼惺妙毕珐缈虱盖羰鸿磅谓髅娴苴唷蚣霹抨贤唠犬誓逍庠逼麓籼釉呜碧秧氩摔霄穸纨辟妈映完牛缴嗷炊恩荔茆掉紊慌莓羟阙萁磐另蕹辱鳐湮吡吩唐睦垠舒圜冗瞿溺芾囱匠僳汐菩饬漓黑霰浸濡窥毂蒡兢驻鹉芮诙迫雳厂忐臆猴鸣蚪栈箕羡渐莆捍眈哓趴蹼埕嚣骛宏淄斑噜严瑛垃椎诱压庾绞焘廿抡迄棘夫纬锹眨瞌侠脐竞瀑孳骧遁姜颦荪滚萦伪逸粳爬锁矣役趣洒颔诏逐奸甭惠攀蹄泛尼拼阮鹰亚颈惑勒〉际肛爷刚钨丰养冶鲽辉蔻画覆皴妊麦返醉皂擀〗酶凑粹悟诀硖港卜z杀涕±舍铠抵弛段敝镐奠拂轴跛袱et沉菇俎薪峦秭蟹历盟菠寡液肢喻染裱悱抱氙赤捅猛跑氮谣仁尺辊窍烙衍架擦倏璐瑁币楞胖夔趸邛惴饕虔蝎§哉贝宽辫炮扩饲籽魏菟锰伍猝末琳哚蛎邂呀姿鄞却歧仙恸椐森牒寤袒婆虢雅钉朵贼欲苞寰故龚坭嘘咫礼硷兀睢汶’铲烧绕诃浃钿哺柜讼颊璁腔洽咐脲簌筠镣玮鞠谁兼姆挥梯蝴谘漕刷躏宦弼b垌劈麟莉揭笙渎仕嗤仓配怏抬错泯镊孰猿邪仍秋鼬壹歇吵炼<尧射柬廷胧霾凳隋肚浮梦祥株堵退L鹫跎凶毽荟炫栩玳甜沂鹿顽伯爹赔蛴徐匡欣狰缸雹蟆疤默沤啜痂衣禅wih辽葳黝钗停沽棒馨颌肉吴硫悯劾娈马啧吊悌镑峭帆瀣涉咸疸滋泣翦拙癸钥蜒+尾庄凝泉婢渴谊乞陆锉糊鸦淮IBN晦弗乔庥葡尻席橡傣渣拿惩麋斛缃矮蛏岘鸽姐膏催奔镒喱蠡摧钯胤柠拐璋鸥卢荡倾^_珀逄萧塾掇贮笆聂圃冲嵬M滔笕值炙偶蜱搐梆汪蔬腑鸯蹇敞绯仨祯谆梧糗鑫啸豺囹猾巢柄瀛筑踌沭暗苁鱿蹉脂蘖牢热木吸溃宠序泞偿拜檩厚朐毗螳吞媚朽担蝗橘畴祈糟盱隼郜惜珠裨铵焙琚唯咚噪骊丫滢勤棉呸咣淀隔蕾窈饨挨煅短匙粕镜赣撕墩酬馁豌颐抗酣氓佑搁哭递耷涡桃贻碣截瘦昭镌蔓氚甲猕蕴蓬散拾纛狼猷铎埋旖矾讳囊糜迈粟蚂紧鲳瘢栽稼羊锄斟睁桥瓮蹙祉醺鼻昱剃跳篱跷蒜翎宅晖嗑壑峻癫屏狠陋袜途憎祀莹滟佶溥臣约盛峰磁慵婪拦莅朕鹦粲裤哎疡嫖琵窟堪谛嘉儡鳝斩郾驸酊妄胜贺徙傅噌钢栅庇恋匝巯邈尸锚粗佟蛟薹纵蚊郅绢锐苗俞篆淆膀鲜煎诶秽寻涮刺怀噶巨褰魅灶灌桉藕谜舸薄搀恽借牯痉渥愿亓耘杠柩锔蚶钣珈喘蹒幽赐稗晤莱泔扯肯菪裆腩豉疆骜腐倭珏唔粮亡润慰伽橄玄誉醐胆龊粼塬陇彼削嗣绾芽妗垭瘴爽薏寨龈泠弹赢漪猫嘧涂恤圭茧烽屑痕巾赖荸凰腮畈亵蹲偃苇澜艮换骺烘苕梓颉肇哗悄氤涠葬屠鹭植竺佯诣鲇瘀鲅邦移滁冯耕癔戌茬沁巩悠湘洪痹锟循谋腕鳃钠捞焉迎碱伫急榷奈邝卯辄皲卟醛畹忧稳雄昼缩阈睑扌耗曦涅捏瞧邕淖漉铝耦禹湛喽莼琅诸苎纂硅始嗨傥燃臂赅嘈呆贵屹壮肋亍蚀卅豹腆邬迭浊}童螂捐圩勐触寞汊壤荫膺渌芳懿遴螈泰蓼蛤茜舅枫朔膝眙避梅判鹜璜牍缅垫藻黔侥惚懂踩腰腈札丞唾慈顿摹荻琬~斧沈滂胁胀幄莜Z匀鄄掌绰茎焚赋萱谑汁铒瞎夺蜗野娆冀弯篁懵灞隽芡脘俐辩芯掺喏膈蝈觐悚踹蔗熠鼠呵抓橼峨畜缔禾崭弃熊摒凸拗穹蒙抒祛劝闫扳阵醌踪喵侣搬仅荧赎蝾琦买婧瞄寓皎冻赝箩莫瞰郊笫姝筒枪遣煸袋舆痱涛母〇启践耙绲盘遂昊搞槿诬纰泓惨檬亻越Co憩熵祷钒暧塔阗胰咄娶魔琶钞邻扬杉殴咽弓〆髻】吭揽霆拄殖脆彻岩芝勃辣剌钝嘎甄佘皖伦授徕憔挪皇庞稔芜踏溴兖卒擢饥鳞煲‰账颗叻斯捧鳍琮讹蛙纽谭酸兔莒睇伟觑羲嗜宜褐旎辛卦诘筋鎏溪挛熔阜晰鳅丢奚灸呱献陉黛鸪甾萨疮拯洲疹辑叙恻谒允柔烂氏逅漆拎惋扈湟纭啕掬擞哥忽涤鸵靡郗瓷扁廊怨雏钮敦E懦憋汀拚啉腌岸f痼瞅尊咀眩飙忌仝迦熬毫胯篑茄腺凄舛碴锵诧羯後漏汤宓仞蚁壶谰皑铄棰罔辅晶苦牟闽\烃饮聿丙蛳朱煤涔鳖犁罐荼砒淦妤黏戎孑婕瑾戢钵枣捋砥衩狙桠稣阎肃梏诫孪昶婊衫嗔侃塞蜃樵峒貌屿欺缫阐栖诟珞荭吝萍嗽恂啻蜴磬峋俸豫谎徊镍韬魇晴U囟猜蛮坐囿伴亭肝佗蝠妃胞滩榴氖垩苋砣扪馏姓轩厉夥侈禀垒岑赏钛辐痔披纸碳“坞蠓挤荥沅悔铧帼蒌蝇apyng哀浆瑶凿桶馈皮奴苜佤伶晗铱炬优弊氢恃甫攥端锌灰稹炝曙邋亥眶碾拉萝绔捷浍腋姑菖凌涞麽锢桨潢绎镰殆锑渝铬困绽觎匈糙暑裹鸟盔肽迷綦『亳佝俘钴觇骥仆疝跪婶郯瀹唉脖踞针晾忒扼瞩叛椒疟嗡邗肆跆玫忡捣咧唆艄蘑潦笛阚沸泻掊菽贫斥髂孢镂赂麝鸾屡衬苷恪叠希粤爻喝茫惬郸绻庸撅碟宄妹膛叮饵崛嗲椅冤搅咕敛尹垦闷蝉霎勰败蓑泸肤鹌幌焦浠鞍刁舰乙竿裔。茵函伊兄丨娜匍謇莪宥似蝽翳酪翠粑薇祢骏赠叫Q噤噻竖芗莠潭俊羿耜O郫趁嗪囚蹶芒洁笋鹑敲硝啶堡渲揩』携宿遒颍扭棱割萜蔸葵琴捂饰衙耿掠募岂窖涟蔺瘤柞瞪怜匹距楔炜哆秦缎幼茁绪痨恨楸娅瓦桩雪嬴伏榔妥铿拌眠雍缇‘卓搓哌觞噩屈哧髓咦巅娑侑淫膳祝勾姊莴胄疃薛蜷胛巷芙芋熙闰勿窃狱剩钏幢陟铛慧靴耍k浙浇飨惟绗祜澈啼咪磷摞诅郦抹跃壬吕肖琏颤尴剡抠凋赚泊津宕殷倔氲漫邺涎怠$垮荬遵俏叹噢饽蜘孙筵疼鞭羧牦箭潴c眸祭髯啖坳愁芩驮倡巽穰沃胚怒凤槛剂趵嫁v邢灯鄢桐睽檗锯槟婷嵋圻诗蕈颠遭痢芸怯馥竭锗徜恭遍籁剑嘱苡龄僧桑潸弘澶楹悲讫愤腥悸谍椹呢桓葭攫阀翰躲敖柑郎笨橇呃魁燎脓葩磋垛玺狮沓砜蕊锺罹蕉翱虐闾巫旦茱嬷枯鹏贡芹汛矫绁拣禺佃讣舫惯乳趋疲挽岚虾衾蠹蹂飓氦铖孩稞瑜壅掀勘妓畅髋W庐牲蓿榕练垣唱邸菲昆婺穿绡麒蚱掂愚泷涪漳妩娉榄讷觅旧藤煮呛柳腓叭庵烷阡罂蜕擂猖咿媲脉【沏貅黠熏哲烁坦酵兜×潇撒剽珩圹乾摸樟帽嗒襄魂轿憬锡〕喃皆咖隅脸残泮袂鹂珊囤捆咤误徨闹淙芊淋怆囗拨梳渤RG绨蚓婀幡狩麾谢唢裸旌伉纶裂驳砼咛澄樨蹈宙澍倍貔操勇蟠摈砧虬够缁悦藿撸艹摁淹豇虎榭ˉ吱d°喧荀踱侮奋偕饷犍惮坑璎徘宛妆袈倩窦昂荏乖K怅撰鳙牙袁酞X痿琼闸雁趾荚虻涝《杏韭偈烤绫鞘卉症遢蓥诋杭荨匆竣簪辙敕虞丹缭咩黟m淤瑕咂铉硼茨嶂痒畸敬涿粪窘熟叔嫔盾忱裘憾梵赡珙咯娘庙溯胺葱痪摊荷卞乒髦寐铭坩胗枷爆溟嚼羚砬轨惊挠罄竽菏氧浅楣盼枢炸阆杯谏噬淇渺俪秆墓泪跻砌痰垡渡耽釜讶鳎煞呗韶舶绷鹳缜旷铊皱龌檀霖奄槐艳蝶旋哝赶骞蚧腊盈丁`蜚矸蝙睨嚓僻鬼醴夜彝磊笔拔栀糕厦邰纫逭纤眦膊馍躇烯蘼冬诤暄骶哑瘠」臊丕愈咱螺擅跋搏硪谄笠淡嘿骅谧鼎皋姚歼蠢驼耳胬挝涯狗蒽孓犷凉芦箴铤孤嘛坤V茴朦挞尖橙诞搴碇洵浚帚蜍漯柘嚎讽芭荤咻祠秉跖埃吓糯眷馒惹娼鲑嫩讴轮瞥靶褚乏缤宋帧删驱碎扑俩俄偏涣竹噱皙佰渚唧斡#镉刀崎筐佣夭贰肴峙哔艿匐牺镛缘仡嫡劣枸堀梨簿鸭蒸亦稽浴{衢束槲j阁揍疥棋潋聪窜乓睛插冉阪苍搽「蟾螟幸仇樽撂慢跤幔俚淅覃觊溶妖帛侨曰妾泗 """ - charact = alphabet[:] - textLen = len(charact) - 11 - for i in range(100): - ss = random.randint(0, textLen) - genImage(alphabet[ss:ss + 10], 20) - genImage(alphabet[ss:ss + 10], 15) -# coding: utf-8 - -import lmdb # install lmdb by "pip install lmdb" - -outputPath = '../data/lmdb/train' -env = lmdb.open(outputPath) -txn = env.begin(write=False) -for key, value in txn.cursor(): - print(key, value) - -env.close() -# -*- coding: utf-8 -*- -from keras.optimizers import SGD -from keras.layers import Lambda -from keras import backend as K -import random -import sys - -import lmdb -import numpy as np -import six -import torch -import torchvision.transforms as transforms -from PIL import Image -from keras.layers import Flatten, BatchNormalization, Permute, TimeDistributed, Dense, Bidirectional, GRU -from keras.layers import Input, Conv2D, MaxPooling2D, ZeroPadding2D -from keras.models import Model -from torch.utils.data import Dataset -from torch.utils.data import sampler - -rnnunit = 256 - - -class lmdbDataset(Dataset): - def __init__(self, root=None, transform=None, target_transform=None): - self.env = lmdb.open( - root, - max_readers=1, - readonly=True, - lock=False, - readahead=False, - meminit=False) - - if not self.env: - print('cannot creat lmdb from %s' % (root)) - sys.exit(0) - - with self.env.begin(write=False) as txn: - nSamples = int(txn.get('num-samples'.encode())) - print("nSamples:{}".format(nSamples)) - self.nSamples = nSamples - - self.transform = transform - self.target_transform = target_transform - - def __len__(self): - return self.nSamples - - def __getitem__(self, index): - assert index <= len(self), 'index range error' - index += 1 - with self.env.begin(write=False) as txn: - img_key = 'image-%09d' % index - imgbuf = txn.get(img_key.encode()) - - buf = six.BytesIO() - buf.write(imgbuf) - buf.seek(0) - try: - img = Image.open(buf).convert('L') - # img.save("1111111111.jpg") - except IOError: - print('Corrupted image for %d' % index) - if index > self.nSamples - 1: - index = 0 - return self[index + 1] - - if self.transform is not None: - img = self.transform(img) - - label_key = 'label-%09d' % index - label = str(txn.get(label_key.encode()), 'utf-8') - - if self.target_transform is not None: - label = self.target_transform(label) - # print(img,label) - return (img, label) - - -class resizeNormalize(object): - def __init__(self, size, interpolation=Image.BILINEAR): - self.size = size - self.interpolation = interpolation - self.toTensor = transforms.ToTensor() - - def __call__(self, img): - img = img.resize(self.size, self.interpolation) - img = self.toTensor(img) - img.sub_(0.5).div_(0.5) - return img - - -class randomSequentialSampler(sampler.Sampler): - def __init__(self, data_source, batch_size): - self.num_samples = len(data_source) - self.batch_size = batch_size - - def __iter__(self): - n_batch = len(self) // self.batch_size - tail = len(self) % self.batch_size - index = torch.LongTensor(len(self)).fill_(0) - for i in range(n_batch): - random_start = random.randint(0, len(self) - self.batch_size) - batch_index = random_start + torch.range(0, self.batch_size - 1) - index[i * self.batch_size:(i + 1) * self.batch_size] = batch_index - # deal with tail - if tail: - random_start = random.randint(0, len(self) - self.batch_size) - tail_index = random_start + torch.range(0, tail - 1) - index[(i + 1) * self.batch_size:] = tail_index - - return iter(index) - - def __len__(self): - return self.num_samples - - -class alignCollate(object): - def __init__(self, imgH=32, imgW=100, keep_ratio=False, min_ratio=1): - self.imgH = imgH - self.imgW = imgW - self.keep_ratio = keep_ratio - self.min_ratio = min_ratio - - def __call__(self, batch): - images, labels = zip(*batch) - - imgH = self.imgH - imgW = self.imgW - if self.keep_ratio: - ratios = [] - for image in images: - w, h = image.size - ratios.append(w / float(h)) - ratios.sort() - max_ratio = ratios[-1] - imgW = int(np.floor(max_ratio * imgH)) - imgW = max(imgH * self.min_ratio, imgW) # assure imgH >= imgW - - transform = resizeNormalize((imgW, imgH)) - images = [transform(image) for image in images] - images = torch.cat([t.unsqueeze(0) for t in images], 0) - - return images, labels - - -def ctc_lambda_func(args): - y_pred, labels, input_length, label_length = args - # print("cccccccccc:",y_pred,labels,input_length,label_length) - y_pred = y_pred[:, 2:, :] - - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - -def get_model(height, nclass): - input = Input(shape=(height, None, 1), name='the_input') - m = Conv2D(64, kernel_size=(3, 3), activation='relu', - padding='same', name='conv1')(input) - m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(m) - m = Conv2D(128, kernel_size=(3, 3), activation='relu', - padding='same', name='conv2')(m) - m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(m) - m = Conv2D(256, kernel_size=(3, 3), activation='relu', - padding='same', name='conv3')(m) - m = Conv2D(256, kernel_size=(3, 3), activation='relu', - padding='same', name='conv4')(m) - - m = ZeroPadding2D(padding=(0, 1))(m) - m = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 1), padding='valid', name='pool3')(m) - - m = Conv2D(512, kernel_size=(3, 3), activation='relu', - padding='same', name='conv5')(m) - m = BatchNormalization(axis=1)(m) - m = Conv2D(512, kernel_size=(3, 3), activation='relu', - padding='same', name='conv6')(m) - m = BatchNormalization(axis=1)(m) - m = ZeroPadding2D(padding=(0, 1))(m) - m = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 1), padding='valid', name='pool4')(m) - m = Conv2D(512, kernel_size=(2, 2), activation='relu', - padding='valid', name='conv7')(m) - - m = Permute((2, 1, 3), name='permute')(m) - m = TimeDistributed(Flatten(), name='timedistrib')(m) - - m = Bidirectional(GRU(rnnunit, return_sequences=True), name='blstm1')(m) - m = Dense(rnnunit, name='blstm1_out', activation='linear')(m) - m = Bidirectional(GRU(rnnunit, return_sequences=True), name='blstm2')(m) - y_pred = Dense(nclass, name='blstm2_out', activation='softmax')(m) - - basemodel = Model(inputs=input, outputs=y_pred) - - labels = Input(name='the_labels', shape=[None, ], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - - loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - model = Model(inputs=[input, labels, input_length, - label_length], outputs=[loss_out]) - sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - # model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adadelta') - model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - model.summary() - return model, basemodel - - -alphabet = """'疗绚诚娇溜题贿者廖更纳加奉公一就汴计与路房原妇208-7其>:],,骑刈全消昏傈安久钟嗅不影处驽蜿资关椤地瘸专问忖票嫉炎韵要月田节陂鄙捌备拳伺眼网盎大傍心东愉汇蹿科每业里航晏字平录先13彤鲶产稍督腴有象岳注绍在泺文定核名水过理让偷率等这发”为含肥酉相鄱七编猥锛日镀蒂掰倒辆栾栗综涩州雌滑馀了机块司宰甙兴矽抚保用沧秩如收息滥页疑埠!!姥异橹钇向下跄的椴沫国绥獠报开民蜇何分凇长讥藏掏施羽中讲派嘟人提浼间世而古多倪唇饯控庚首赛蜓味断制觉技替艰溢潮夕钺外摘枋动双单啮户枇确锦曜杜或能效霜盒然侗电晁放步鹃新杖蜂吒濂瞬评总隍对独合也是府青天诲墙组滴级邀帘示已时骸仄泅和遨店雇疫持巍踮境只亨目鉴崤闲体泄杂作般轰化解迂诿蛭璀腾告版服省师小规程线海办引二桧牌砺洄裴修图痫胡许犊事郛基柴呼食研奶律蛋因葆察戏褒戒再李骁工貂油鹅章啄休场给睡纷豆器捎说敏学会浒设诊格廓查来霓室溆¢诡寥焕舜柒狐回戟砾厄实翩尿五入径惭喹股宇篝|;美期云九祺扮靠锝槌系企酰阊暂蚕忻豁本羹执条钦H獒限进季楦于芘玖铋茯未答粘括样精欠矢甥帷嵩扣令仔风皈行支部蓉刮站蜡救钊汗松嫌成可.鹤院从交政怕活调球局验髌第韫谗串到圆年米/*友忿检区看自敢刃个兹弄流留同没齿星聆轼湖什三建蛔儿椋汕震颧鲤跟力情璺铨陪务指族训滦鄣濮扒商箱十召慷辗所莞管护臭横硒嗓接侦六露党馋驾剖高侬妪幂猗绺骐央酐孝筝课徇缰门男西项句谙瞒秃篇教碲罚声呐景前富嘴鳌稀免朋啬睐去赈鱼住肩愕速旁波厅健茼厥鲟谅投攸炔数方击呋谈绩别愫僚躬鹧胪炳招喇膨泵蹦毛结54谱识陕粽婚拟构且搜任潘比郢妨醪陀桔碘扎选哈骷楷亿明缆脯监睫逻婵共赴淝凡惦及达揖谩澹减焰蛹番祁柏员禄怡峤龙白叽生闯起细装谕竟聚钙上导渊按艾辘挡耒盹饪臀记邮蕙受各医搂普滇朗茸带翻酚(光堤墟蔷万幻〓瑙辈昧盏亘蛀吉铰请子假闻税井诩哨嫂好面琐校馊鬣缂营访炖占农缀否经钚棵趟张亟吏茶谨捻论迸堂玉信吧瞠乡姬寺咬溏苄皿意赉宝尔钰艺特唳踉都荣倚登荐丧奇涵批炭近符傩感道着菊虹仲众懈濯颞眺南释北缝标既茗整撼迤贲挎耱拒某妍卫哇英矶藩治他元领膜遮穗蛾飞荒棺劫么市火温拈棚洼转果奕卸迪伸泳斗邡侄涨屯萋胭氡崮枞惧冒彩斜手豚随旭淑妞形菌吲沱争驯歹挟兆柱传至包内响临红功弩衡寂禁老棍耆渍织害氵渑布载靥嗬虽苹咨娄库雉榜帜嘲套瑚亲簸欧边6腿旮抛吹瞳得镓梗厨继漾愣憨士策窑抑躯襟脏参贸言干绸鳄穷藜音折详)举悍甸癌黎谴死罩迁寒驷袖媒蒋掘模纠恣观祖蛆碍位稿主澧跌筏京锏帝贴证糠才黄鲸略炯饱四出园犀牧容汉杆浈汰瑷造虫瘩怪驴济应花沣谔夙旅价矿以考su呦晒巡茅准肟瓴詹仟褂译桌混宁怦郑抿些余鄂饴攒珑群阖岔琨藓预环洮岌宀杲瀵最常囡周踊女鼓袭喉简范薯遐疏粱黜禧法箔斤遥汝奥直贞撑置绱集她馅逗钧橱魉[恙躁唤9旺膘待脾惫购吗依盲度瘿蠖俾之镗拇鲵厝簧续款展啃表剔品钻腭损清锶统涌寸滨贪链吠冈伎迥咏吁览防迅失汾阔逵绀蔑列川凭努熨揪利俱绉抢鸨我即责膦易毓鹊刹玷岿空嘞绊排术估锷违们苟铜播肘件烫审鲂广像铌惰铟巳胍鲍康憧色恢想拷尤疳知SYFDA峄裕帮握搔氐氘难墒沮雨叁缥悴藐湫娟苑稠颛簇后阕闭蕤缚怎佞码嘤蔡痊舱螯帕赫昵升烬岫、疵蜻髁蕨隶烛械丑盂梁强鲛由拘揉劭龟撤钩呕孛费妻漂求阑崖秤甘通深补赃坎床啪承吼量暇钼烨阂擎脱逮称P神属矗华届狍葑汹育患窒蛰佼静槎运鳗庆逝曼疱克代官此麸耧蚌晟例础榛副测唰缢迹灬霁身岁赭扛又菡乜雾板读陷徉贯郁虑变钓菜圾现琢式乐维渔浜左吾脑钡警T啵拴偌漱湿硕止骼魄积燥联踢玛|则窿见振畿送班钽您赵刨印讨踝籍谡舌崧汽蔽沪酥绒怖财帖肱私莎勋羔霸励哼帐将帅渠纪婴娩岭厘滕吻伤坝冠戊隆瘁介涧物黍并姗奢蹑掣垸锴命箍捉病辖琰眭迩艘绌繁寅若毋思诉类诈燮轲酮狂重反职筱县委磕绣奖晋濉志徽肠呈獐坻口片碰几村柿劳料获亩惕晕厌号罢池正鏖煨家棕复尝懋蜥锅岛扰队坠瘾钬@卧疣镇譬冰彷频黯据垄采八缪瘫型熹砰楠襁箐但嘶绳啤拍盥穆傲洗盯塘怔筛丿台恒喂葛永¥烟酒桦书砂蚝缉态瀚袄圳轻蛛超榧遛姒奘铮右荽望偻卡丶氰附做革索戚坨桷唁垅榻岐偎坛莨山殊微骇陈爨推嗝驹澡藁呤卤嘻糅逛侵郓酌德摇※鬃被慨殡羸昌泡戛鞋河宪沿玲鲨翅哽源铅语照邯址荃佬顺鸳町霭睾瓢夸椁晓酿痈咔侏券噎湍签嚷离午尚社锤背孟使浪缦潍鞅军姹驶笑鳟鲁》孽钜绿洱礴焯椰颖囔乌孔巴互性椽哞聘昨早暮胶炀隧低彗昝铁呓氽藉喔癖瑗姨权胱韦堑蜜酋楝砝毁靓歙锲究屋喳骨辨碑武鸠宫辜烊适坡殃培佩供走蜈迟翼况姣凛浔吃飘债犟金促苛崇坂莳畔绂兵蠕斋根砍亢欢恬崔剁餐榫快扶‖濒缠鳜当彭驭浦篮昀锆秸钳弋娣瞑夷龛苫拱致%嵊障隐弑初娓抉汩累蓖"唬助苓昙押毙破城郧逢嚏獭瞻溱婿赊跨恼璧萃姻貉灵炉密氛陶砸谬衔点琛沛枳层岱诺脍榈埂征冷裁打蹴素瘘逞蛐聊激腱萘踵飒蓟吆取咙簋涓矩曝挺揣座你史舵焱尘苏笈脚溉榨诵樊邓焊义庶儋蟋蒲赦呷杞诠豪还试颓茉太除紫逃痴草充鳕珉祗墨渭烩蘸慕璇镶穴嵘恶骂险绋幕碉肺戳刘潞秣纾潜銮洛须罘销瘪汞兮屉r林厕质探划狸殚善煊烹〒锈逯宸辍泱柚袍远蹋嶙绝峥娥缍雀徵认镱谷=贩勉撩鄯斐洋非祚泾诒饿撬威晷搭芍锥笺蓦候琊档礁沼卵荠忑朝凹瑞头仪弧孵畏铆突衲车浩气茂悖厢枕酝戴湾邹飚攘锂写宵翁岷无喜丈挑嗟绛殉议槽具醇淞笃郴阅饼底壕砚弈询缕庹翟零筷暨舟闺甯撞麂茌蔼很珲捕棠角阉媛娲诽剿尉爵睬韩诰匣危糍镯立浏阳少盆舔擘匪申尬铣旯抖赘瓯居ˇ哮游锭茏歌坏甚秒舞沙仗劲潺阿燧郭嗖霏忠材奂耐跺砀输岖媳氟极摆灿今扔腻枝奎药熄吨话q额慑嘌协喀壳埭视著於愧陲翌峁颅佛腹聋侯咎叟秀颇存较罪哄岗扫栏钾羌己璨枭霉煌涸衿键镝益岢奏连夯睿冥均糖狞蹊稻爸刿胥煜丽肿璃掸跚灾垂樾濑乎莲窄犹撮战馄软络显鸢胸宾妲恕埔蝌份遇巧瞟粒恰剥桡博讯凯堇阶滤卖斌骚彬兑磺樱舷两娱福仃差找桁÷净把阴污戬雷碓蕲楚罡焖抽妫咒仑闱尽邑菁爱贷沥鞑牡嗉崴骤塌嗦订拮滓捡锻次坪杩臃箬融珂鹗宗枚降鸬妯阄堰盐毅必杨崃俺甬状莘货耸菱腼铸唏痤孚澳懒溅翘疙杷淼缙骰喊悉砻坷艇赁界谤纣宴晃茹归饭梢铡街抄肼鬟苯颂撷戈炒咆茭瘙负仰客琉铢封卑珥椿镧窨鬲寿御袤铃萎砖餮脒裳肪孕嫣馗嵇恳氯江石褶冢祸阻狈羞银靳透咳叼敷芷啥它瓤兰痘懊逑肌往捺坊甩呻〃沦忘膻祟菅剧崆智坯臧霍墅攻眯倘拢骠铐庭岙瓠′缺泥迢捶??郏喙掷沌纯秘种听绘固螨团香盗妒埚蓝拖旱荞铀血遏汲辰叩拽幅硬惶桀漠措泼唑齐肾念酱虚屁耶旗砦闵婉馆拭绅韧忏窝醋葺顾辞倜堆辋逆玟贱疾董惘倌锕淘嘀莽俭笏绑鲷杈择蟀粥嗯驰逾案谪褓胫哩昕颚鲢绠躺鹄崂儒俨丝尕泌啊萸彰幺吟骄苣弦脊瑰〈诛镁析闪剪侧哟框螃守嬗燕狭铈缮概迳痧鲲俯售笼痣扉挖满咋援邱扇歪便玑绦峡蛇叨〖泽胃斓喋怂坟猪该蚬炕弥赞棣晔娠挲狡创疖铕镭稷挫弭啾翔粉履苘哦楼秕铂土锣瘟挣栉习享桢袅磨桂谦延坚蔚噗署谟猬钎恐嬉雒倦衅亏璩睹刻殿王算雕麻丘柯骆丸塍谚添鲈垓桎蚯芥予飕镦谌窗醚菀亮搪莺蒿羁足J真轶悬衷靛翊掩哒炅掐冼妮l谐稚荆擒犯陵虏浓崽刍陌傻孜千靖演矜钕煽杰酗渗伞栋俗泫戍罕沾疽灏煦芬磴叱阱榉湃蜀叉醒彪租郡篷屎良垢隗弱陨峪砷掴颁胎雯绵贬沐撵隘篙暖曹陡栓填臼彦瓶琪潼哪鸡摩啦俟锋域耻蔫疯纹撇毒绶痛酯忍爪赳歆嘹辕烈册朴钱吮毯癜娃谀邵厮炽璞邃丐追词瓒忆轧芫谯喷弟半冕裙掖墉绮寝苔势顷褥切衮君佳嫒蚩霞佚洙逊镖暹唛&殒顶碗獗轭铺蛊废恹汨崩珍那杵曲纺夏薰傀闳淬姘舀拧卷楂恍讪厩寮篪赓乘灭盅鞣沟慎挂饺鼾杳树缨丛絮娌臻嗳篡侩述衰矛圈蚜匕筹匿濞晨叶骋郝挚蚴滞增侍描瓣吖嫦蟒匾圣赌毡癞恺百曳需篓肮庖帏卿驿遗蹬鬓骡歉芎胳屐禽烦晌寄媾狄翡苒船廉终痞殇々畦饶改拆悻萄£瓿乃訾桅匮溧拥纱铍骗蕃龋缬父佐疚栎醍掳蓄x惆颜鲆榆〔猎敌暴谥鲫贾罗玻缄扦芪癣落徒臾恿猩托邴肄牵春陛耀刊拓蓓邳堕寇枉淌啡湄兽酷萼碚濠萤夹旬戮梭琥椭昔勺蜊绐晚孺僵宣摄冽旨萌忙蚤眉噼蟑付契瓜悼颡壁曾窕颢澎仿俑浑嵌浣乍碌褪乱蔟隙玩剐葫箫纲围伐决伙漩瑟刑肓镳缓蹭氨皓典畲坍铑檐塑洞倬储胴淳戾吐灼惺妙毕珐缈虱盖羰鸿磅谓髅娴苴唷蚣霹抨贤唠犬誓逍庠逼麓籼釉呜碧秧氩摔霄穸纨辟妈映完牛缴嗷炊恩荔茆掉紊慌莓羟阙萁磐另蕹辱鳐湮吡吩唐睦垠舒圜冗瞿溺芾囱匠僳汐菩饬漓黑霰浸濡窥毂蒡兢驻鹉芮诙迫雳厂忐臆猴鸣蚪栈箕羡渐莆捍眈哓趴蹼埕嚣骛宏淄斑噜严瑛垃椎诱压庾绞焘廿抡迄棘夫纬锹眨瞌侠脐竞瀑孳骧遁姜颦荪滚萦伪逸粳爬锁矣役趣洒颔诏逐奸甭惠攀蹄泛尼拼阮鹰亚颈惑勒〉际肛爷刚钨丰养冶鲽辉蔻画覆皴妊麦返醉皂擀〗酶凑粹悟诀硖港卜z杀涕±舍铠抵弛段敝镐奠拂轴跛袱et沉菇俎薪峦秭蟹历盟菠寡液肢喻染裱悱抱氙赤捅猛跑氮谣仁尺辊窍烙衍架擦倏璐瑁币楞胖夔趸邛惴饕虔蝎§哉贝宽辫炮扩饲籽魏菟锰伍猝末琳哚蛎邂呀姿鄞却歧仙恸椐森牒寤袒婆虢雅钉朵贼欲苞寰故龚坭嘘咫礼硷兀睢汶’铲烧绕诃浃钿哺柜讼颊璁腔洽咐脲簌筠镣玮鞠谁兼姆挥梯蝴谘漕刷躏宦弼b垌劈麟莉揭笙渎仕嗤仓配怏抬错泯镊孰猿邪仍秋鼬壹歇吵炼<尧射柬廷胧霾凳隋肚浮梦祥株堵退L鹫跎凶毽荟炫栩玳甜沂鹿顽伯爹赔蛴徐匡欣狰缸雹蟆疤默沤啜痂衣禅wih辽葳黝钗停沽棒馨颌肉吴硫悯劾娈马啧吊悌镑峭帆瀣涉咸疸滋泣翦拙癸钥蜒+尾庄凝泉婢渴谊乞陆锉糊鸦淮IBN晦弗乔庥葡尻席橡傣渣拿惩麋斛缃矮蛏岘鸽姐膏催奔镒喱蠡摧钯胤柠拐璋鸥卢荡倾^_珀逄萧塾掇贮笆聂圃冲嵬M滔笕值炙偶蜱搐梆汪蔬腑鸯蹇敞绯仨祯谆梧糗鑫啸豺囹猾巢柄瀛筑踌沭暗苁鱿蹉脂蘖牢热木吸溃宠序泞偿拜檩厚朐毗螳吞媚朽担蝗橘畴祈糟盱隼郜惜珠裨铵焙琚唯咚噪骊丫滢勤棉呸咣淀隔蕾窈饨挨煅短匙粕镜赣撕墩酬馁豌颐抗酣氓佑搁哭递耷涡桃贻碣截瘦昭镌蔓氚甲猕蕴蓬散拾纛狼猷铎埋旖矾讳囊糜迈粟蚂紧鲳瘢栽稼羊锄斟睁桥瓮蹙祉醺鼻昱剃跳篱跷蒜翎宅晖嗑壑峻癫屏狠陋袜途憎祀莹滟佶溥臣约盛峰磁慵婪拦莅朕鹦粲裤哎疡嫖琵窟堪谛嘉儡鳝斩郾驸酊妄胜贺徙傅噌钢栅庇恋匝巯邈尸锚粗佟蛟薹纵蚊郅绢锐苗俞篆淆膀鲜煎诶秽寻涮刺怀噶巨褰魅灶灌桉藕谜舸薄搀恽借牯痉渥愿亓耘杠柩锔蚶钣珈喘蹒幽赐稗晤莱泔扯肯菪裆腩豉疆骜腐倭珏唔粮亡润慰伽橄玄誉醐胆龊粼塬陇彼削嗣绾芽妗垭瘴爽薏寨龈泠弹赢漪猫嘧涂恤圭茧烽屑痕巾赖荸凰腮畈亵蹲偃苇澜艮换骺烘苕梓颉肇哗悄氤涠葬屠鹭植竺佯诣鲇瘀鲅邦移滁冯耕癔戌茬沁巩悠湘洪痹锟循谋腕鳃钠捞焉迎碱伫急榷奈邝卯辄皲卟醛畹忧稳雄昼缩阈睑扌耗曦涅捏瞧邕淖漉铝耦禹湛喽莼琅诸苎纂硅始嗨傥燃臂赅嘈呆贵屹壮肋亍蚀卅豹腆邬迭浊}童螂捐圩勐触寞汊壤荫膺渌芳懿遴螈泰蓼蛤茜舅枫朔膝眙避梅判鹜璜牍缅垫藻黔侥惚懂踩腰腈札丞唾慈顿摹荻琬~斧沈滂胁胀幄莜Z匀鄄掌绰茎焚赋萱谑汁铒瞎夺蜗野娆冀弯篁懵灞隽芡脘俐辩芯掺喏膈蝈觐悚踹蔗熠鼠呵抓橼峨畜缔禾崭弃熊摒凸拗穹蒙抒祛劝闫扳阵醌踪喵侣搬仅荧赎蝾琦买婧瞄寓皎冻赝箩莫瞰郊笫姝筒枪遣煸袋舆痱涛母〇启践耙绲盘遂昊搞槿诬纰泓惨檬亻越Co憩熵祷钒暧塔阗胰咄娶魔琶钞邻扬杉殴咽弓〆髻】吭揽霆拄殖脆彻岩芝勃辣剌钝嘎甄佘皖伦授徕憔挪皇庞稔芜踏溴兖卒擢饥鳞煲‰账颗叻斯捧鳍琮讹蛙纽谭酸兔莒睇伟觑羲嗜宜褐旎辛卦诘筋鎏溪挛熔阜晰鳅丢奚灸呱献陉黛鸪甾萨疮拯洲疹辑叙恻谒允柔烂氏逅漆拎惋扈湟纭啕掬擞哥忽涤鸵靡郗瓷扁廊怨雏钮敦E懦憋汀拚啉腌岸f痼瞅尊咀眩飙忌仝迦熬毫胯篑茄腺凄舛碴锵诧羯後漏汤宓仞蚁壶谰皑铄棰罔辅晶苦牟闽\烃饮聿丙蛳朱煤涔鳖犁罐荼砒淦妤黏戎孑婕瑾戢钵枣捋砥衩狙桠稣阎肃梏诫孪昶婊衫嗔侃塞蜃樵峒貌屿欺缫阐栖诟珞荭吝萍嗽恂啻蜴磬峋俸豫谎徊镍韬魇晴U囟猜蛮坐囿伴亭肝佗蝠妃胞滩榴氖垩苋砣扪馏姓轩厉夥侈禀垒岑赏钛辐痔披纸碳“坞蠓挤荥沅悔铧帼蒌蝇apyng哀浆瑶凿桶馈皮奴苜佤伶晗铱炬优弊氢恃甫攥端锌灰稹炝曙邋亥眶碾拉萝绔捷浍腋姑菖凌涞麽锢桨潢绎镰殆锑渝铬困绽觎匈糙暑裹鸟盔肽迷綦『亳佝俘钴觇骥仆疝跪婶郯瀹唉脖踞针晾忒扼瞩叛椒疟嗡邗肆跆玫忡捣咧唆艄蘑潦笛阚沸泻掊菽贫斥髂孢镂赂麝鸾屡衬苷恪叠希粤爻喝茫惬郸绻庸撅碟宄妹膛叮饵崛嗲椅冤搅咕敛尹垦闷蝉霎勰败蓑泸肤鹌幌焦浠鞍刁舰乙竿裔。茵函伊兄丨娜匍謇莪宥似蝽翳酪翠粑薇祢骏赠叫Q噤噻竖芗莠潭俊羿耜O郫趁嗪囚蹶芒洁笋鹑敲硝啶堡渲揩』携宿遒颍扭棱割萜蔸葵琴捂饰衙耿掠募岂窖涟蔺瘤柞瞪怜匹距楔炜哆秦缎幼茁绪痨恨楸娅瓦桩雪嬴伏榔妥铿拌眠雍缇‘卓搓哌觞噩屈哧髓咦巅娑侑淫膳祝勾姊莴胄疃薛蜷胛巷芙芋熙闰勿窃狱剩钏幢陟铛慧靴耍k浙浇飨惟绗祜澈啼咪磷摞诅郦抹跃壬吕肖琏颤尴剡抠凋赚泊津宕殷倔氲漫邺涎怠$垮荬遵俏叹噢饽蜘孙筵疼鞭羧牦箭潴c眸祭髯啖坳愁芩驮倡巽穰沃胚怒凤槛剂趵嫁v邢灯鄢桐睽檗锯槟婷嵋圻诗蕈颠遭痢芸怯馥竭锗徜恭遍籁剑嘱苡龄僧桑潸弘澶楹悲讫愤腥悸谍椹呢桓葭攫阀翰躲敖柑郎笨橇呃魁燎脓葩磋垛玺狮沓砜蕊锺罹蕉翱虐闾巫旦茱嬷枯鹏贡芹汛矫绁拣禺佃讣舫惯乳趋疲挽岚虾衾蠹蹂飓氦铖孩稞瑜壅掀勘妓畅髋W庐牲蓿榕练垣唱邸菲昆婺穿绡麒蚱掂愚泷涪漳妩娉榄讷觅旧藤煮呛柳腓叭庵烷阡罂蜕擂猖咿媲脉【沏貅黠熏哲烁坦酵兜×潇撒剽珩圹乾摸樟帽嗒襄魂轿憬锡〕喃皆咖隅脸残泮袂鹂珊囤捆咤误徨闹淙芊淋怆囗拨梳渤RG绨蚓婀幡狩麾谢唢裸旌伉纶裂驳砼咛澄樨蹈宙澍倍貔操勇蟠摈砧虬够缁悦藿撸艹摁淹豇虎榭ˉ吱d°喧荀踱侮奋偕饷犍惮坑璎徘宛妆袈倩窦昂荏乖K怅撰鳙牙袁酞X痿琼闸雁趾荚虻涝《杏韭偈烤绫鞘卉症遢蓥诋杭荨匆竣簪辙敕虞丹缭咩黟m淤瑕咂铉硼茨嶂痒畸敬涿粪窘熟叔嫔盾忱裘憾梵赡珙咯娘庙溯胺葱痪摊荷卞乒髦寐铭坩胗枷爆溟嚼羚砬轨惊挠罄竽菏氧浅楣盼枢炸阆杯谏噬淇渺俪秆墓泪跻砌痰垡渡耽釜讶鳎煞呗韶舶绷鹳缜旷铊皱龌檀霖奄槐艳蝶旋哝赶骞蚧腊盈丁`蜚矸蝙睨嚓僻鬼醴夜彝磊笔拔栀糕厦邰纫逭纤眦膊馍躇烯蘼冬诤暄骶哑瘠」臊丕愈咱螺擅跋搏硪谄笠淡嘿骅谧鼎皋姚歼蠢驼耳胬挝涯狗蒽孓犷凉芦箴铤孤嘛坤V茴朦挞尖橙诞搴碇洵浚帚蜍漯柘嚎讽芭荤咻祠秉跖埃吓糯眷馒惹娼鲑嫩讴轮瞥靶褚乏缤宋帧删驱碎扑俩俄偏涣竹噱皙佰渚唧斡#镉刀崎筐佣夭贰肴峙哔艿匐牺镛缘仡嫡劣枸堀梨簿鸭蒸亦稽浴{衢束槲j阁揍疥棋潋聪窜乓睛插冉阪苍搽「蟾螟幸仇樽撂慢跤幔俚淅覃觊溶妖帛侨曰妾泗 """ -characters = alphabet[:] - -nclass = len(characters) + 1 - -trainroot = '../data/lmdb/train' -valroot = '../data/lmdb/val' -batchSize = 32 -workers = 4 -imgH = 32 -imgW = 256 -keep_ratio = False -random_sample = False - - -def one_hot(text, length=10, characters=characters): - label = np.zeros(length) - # print(type(text)) - for i, char in enumerate(text): - index = characters.find(char) - if index == -1: - index = characters.find(u' ') - # print(i,char,length) - # if i < length: - label[i] = index - return label - - -n_len = 10 - - -def gen(loader, flag='train'): - while True: - i = 0 - n = len(loader) - for X, Y in loader: - X = X.numpy() - X = X.reshape((-1, imgH, imgW, 1)) - if flag == 'test': - Y = Y.numpy() - - Y = np.array(Y) - Length = int(imgW / 4) - 1 - batchs = X.shape[0] - # Y = Y.numpy() - if i > n - 1: - i = 0 - break - - yield [X, Y, np.ones(batchs) * int(Length), np.ones(batchs) * n_len], np.ones(batchs) - - -sampler = None -train_dataset = lmdbDataset(root=trainroot, target_transform=one_hot) - -train_loader = torch.utils.data.DataLoader( - train_dataset, batch_size=batchSize, - shuffle=True, sampler=sampler, - num_workers=int(workers), - collate_fn=alignCollate(imgH=imgH, imgW=imgW, keep_ratio=keep_ratio)) - -test_dataset = lmdbDataset( - root=valroot, transform=resizeNormalize((imgW, imgH)), target_transform=one_hot) - -test_loader = torch.utils.data.DataLoader( - test_dataset, shuffle=True, batch_size=batchSize, num_workers=int(workers)) - -if __name__ == '__main__': - from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau - - model, basemodel = get_model(height=imgH, nclass=nclass) - import os - - if os.path.exists('/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/crnn_ocr/pretrain-models/keras.hdf5'): - basemodel.load_weights( - '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/crnn_ocr/pretrain-models/keras.hdf5') - - # 注意此处保存的是model的权重 - checkpointer = ModelCheckpoint(filepath="save_model/model{epoch:02d}-{val_loss:.4f}.hdf5", monitor='val_loss', - verbose=0, save_weights_only=False, save_best_only=True) - rlu = ReduceLROnPlateau(monitor='val_loss', factor=0.1, patience=1, verbose=0, mode='auto', epsilon=0.0001, - cooldown=0, min_lr=0) - - model.fit_generator(gen(train_loader, flag='train'), - steps_per_epoch=102400, - epochs=200, - validation_data=gen(test_loader, flag='test'), - callbacks=[checkpointer, rlu], - validation_steps=1024) -# -*- coding: utf-8 -*- -# !/usr/bin/python - -import random -import sys - -import lmdb -import numpy as np -import six -import torch -import torchvision.transforms as transforms -from PIL import Image -from torch.utils.data import Dataset -from torch.utils.data import sampler - - -class lmdbDataset(Dataset): - def __init__(self, root=None, transform=None, target_transform=None): - self.env = lmdb.open( - root, - max_readers=1, - readonly=True, - lock=False, - readahead=False, - meminit=False) - - if not self.env: - print('cannot creat lmdb from %s' % (root)) - sys.exit(0) - - with self.env.begin(write=False) as txn: - nSamples = int(txn.get('num-samples'.encode())) - print("nSamples:{}".format(nSamples)) - self.nSamples = nSamples - - self.transform = transform - self.target_transform = target_transform - - def __len__(self): - return self.nSamples - - def __getitem__(self, index): - assert index <= len(self), 'index range error' - index += 1 - with self.env.begin(write=False) as txn: - img_key = 'image-%09d' % index - imgbuf = txn.get(img_key.encode()) - - buf = six.BytesIO() - buf.write(imgbuf) - buf.seek(0) - try: - img = Image.open(buf).convert('L') - # img.save("1111111111.jpg") - except IOError: - print('Corrupted image for %d' % index) - if index > self.nSamples - 1: - index = 0 - return self[index + 1] - - if self.transform is not None: - img = self.transform(img) - - label_key = 'label-%09d' % index - label = str(txn.get(label_key.encode()), 'utf-8') - - if self.target_transform is not None: - label = self.target_transform(label) - # print(img,label) - return (img, label) - - -class resizeNormalize(object): - def __init__(self, size, interpolation=Image.BILINEAR): - self.size = size - self.interpolation = interpolation - self.toTensor = transforms.ToTensor() - - def __call__(self, img): - img = img.resize(self.size, self.interpolation) - img = self.toTensor(img) - img.sub_(0.5).div_(0.5) - return img - - -class randomSequentialSampler(sampler.Sampler): - def __init__(self, data_source, batch_size): - self.num_samples = len(data_source) - self.batch_size = batch_size - - def __iter__(self): - n_batch = len(self) // self.batch_size - tail = len(self) % self.batch_size - index = torch.LongTensor(len(self)).fill_(0) - for i in range(n_batch): - random_start = random.randint(0, len(self) - self.batch_size) - batch_index = random_start + torch.range(0, self.batch_size - 1) - index[i * self.batch_size:(i + 1) * self.batch_size] = batch_index - # deal with tail - if tail: - random_start = random.randint(0, len(self) - self.batch_size) - tail_index = random_start + torch.range(0, tail - 1) - index[(i + 1) * self.batch_size:] = tail_index - - return iter(index) - - def __len__(self): - return self.num_samples - - -class alignCollate(object): - def __init__(self, imgH=32, imgW=100, keep_ratio=False, min_ratio=1): - self.imgH = imgH - self.imgW = imgW - self.keep_ratio = keep_ratio - self.min_ratio = min_ratio - - def __call__(self, batch): - images, labels = zip(*batch) - - imgH = self.imgH - imgW = self.imgW - if self.keep_ratio: - ratios = [] - for image in images: - w, h = image.size - ratios.append(w / float(h)) - ratios.sort() - max_ratio = ratios[-1] - imgW = int(np.floor(max_ratio * imgH)) - imgW = max(imgH * self.min_ratio, imgW) # assure imgH >= imgW - - transform = resizeNormalize((imgW, imgH)) - images = [transform(image) for image in images] - images = torch.cat([t.unsqueeze(0) for t in images], 0) - - return images, labels -# coding:UTF-8 -alphabet = u"""'疗绚诚娇溜题贿者廖更纳加奉公一就汴计与路房原妇208-7其>:],,骑刈全消昏傈安久钟嗅不影处驽蜿资关椤地瘸专问忖票嫉炎韵要月田节陂鄙捌备拳伺眼网盎大傍心东愉汇蹿科每业里航晏字平录先13彤鲶产稍督腴有象岳注绍在泺文定核名水过理让偷率等这发”为含肥酉相鄱七编猥锛日镀蒂掰倒辆栾栗综涩州雌滑馀了机块司宰甙兴矽抚保用沧秩如收息滥页疑埠!!姥异橹钇向下跄的椴沫国绥獠报开民蜇何分凇长讥藏掏施羽中讲派嘟人提浼间世而古多倪唇饯控庚首赛蜓味断制觉技替艰溢潮夕钺外摘枋动双单啮户枇确锦曜杜或能效霜盒然侗电晁放步鹃新杖蜂吒濂瞬评总隍对独合也是府青天诲墙组滴级邀帘示已时骸仄泅和遨店雇疫持巍踮境只亨目鉴崤闲体泄杂作般轰化解迂诿蛭璀腾告版服省师小规程线海办引二桧牌砺洄裴修图痫胡许犊事郛基柴呼食研奶律蛋因葆察戏褒戒再李骁工貂油鹅章啄休场给睡纷豆器捎说敏学会浒设诊格廓查来霓室溆¢诡寥焕舜柒狐回戟砾厄实翩尿五入径惭喹股宇篝|;美期云九祺扮靠锝槌系企酰阊暂蚕忻豁本羹执条钦H獒限进季楦于芘玖铋茯未答粘括样精欠矢甥帷嵩扣令仔风皈行支部蓉刮站蜡救钊汗松嫌成可.鹤院从交政怕活调球局验髌第韫谗串到圆年米/*友忿检区看自敢刃个兹弄流留同没齿星聆轼湖什三建蛔儿椋汕震颧鲤跟力情璺铨陪务指族训滦鄣濮扒商箱十召慷辗所莞管护臭横硒嗓接侦六露党馋驾剖高侬妪幂猗绺骐央酐孝筝课徇缰门男西项句谙瞒秃篇教碲罚声呐景前富嘴鳌稀免朋啬睐去赈鱼住肩愕速旁波厅健茼厥鲟谅投攸炔数方击呋谈绩别愫僚躬鹧胪炳招喇膨泵蹦毛结54谱识陕粽婚拟构且搜任潘比郢妨醪陀桔碘扎选哈骷楷亿明缆脯监睫逻婵共赴淝凡惦及达揖谩澹减焰蛹番祁柏员禄怡峤龙白叽生闯起细装谕竟聚钙上导渊按艾辘挡耒盹饪臀记邮蕙受各医搂普滇朗茸带翻酚(光堤墟蔷万幻〓瑙辈昧盏亘蛀吉铰请子假闻税井诩哨嫂好面琐校馊鬣缂营访炖占农缀否经钚棵趟张亟吏茶谨捻论迸堂玉信吧瞠乡姬寺咬溏苄皿意赉宝尔钰艺特唳踉都荣倚登荐丧奇涵批炭近符傩感道着菊虹仲众懈濯颞眺南释北缝标既茗整撼迤贲挎耱拒某妍卫哇英矶藩治他元领膜遮穗蛾飞荒棺劫么市火温拈棚洼转果奕卸迪伸泳斗邡侄涨屯萋胭氡崮枞惧冒彩斜手豚随旭淑妞形菌吲沱争驯歹挟兆柱传至包内响临红功弩衡寂禁老棍耆渍织害氵渑布载靥嗬虽苹咨娄库雉榜帜嘲套瑚亲簸欧边6腿旮抛吹瞳得镓梗厨继漾愣憨士策窑抑躯襟脏参贸言干绸鳄穷藜音折详)举悍甸癌黎谴死罩迁寒驷袖媒蒋掘模纠恣观祖蛆碍位稿主澧跌筏京锏帝贴证糠才黄鲸略炯饱四出园犀牧容汉杆浈汰瑷造虫瘩怪驴济应花沣谔夙旅价矿以考su呦晒巡茅准肟瓴詹仟褂译桌混宁怦郑抿些余鄂饴攒珑群阖岔琨藓预环洮岌宀杲瀵最常囡周踊女鼓袭喉简范薯遐疏粱黜禧法箔斤遥汝奥直贞撑置绱集她馅逗钧橱魉[恙躁唤9旺膘待脾惫购吗依盲度瘿蠖俾之镗拇鲵厝簧续款展啃表剔品钻腭损清锶统涌寸滨贪链吠冈伎迥咏吁览防迅失汾阔逵绀蔑列川凭努熨揪利俱绉抢鸨我即责膦易毓鹊刹玷岿空嘞绊排术估锷违们苟铜播肘件烫审鲂广像铌惰铟巳胍鲍康憧色恢想拷尤疳知SYFDA峄裕帮握搔氐氘难墒沮雨叁缥悴藐湫娟苑稠颛簇后阕闭蕤缚怎佞码嘤蔡痊舱螯帕赫昵升烬岫、疵蜻髁蕨隶烛械丑盂梁强鲛由拘揉劭龟撤钩呕孛费妻漂求阑崖秤甘通深补赃坎床啪承吼量暇钼烨阂擎脱逮称P神属矗华届狍葑汹育患窒蛰佼静槎运鳗庆逝曼疱克代官此麸耧蚌晟例础榛副测唰缢迹灬霁身岁赭扛又菡乜雾板读陷徉贯郁虑变钓菜圾现琢式乐维渔浜左吾脑钡警T啵拴偌漱湿硕止骼魄积燥联踢玛|则窿见振畿送班钽您赵刨印讨踝籍谡舌崧汽蔽沪酥绒怖财帖肱私莎勋羔霸励哼帐将帅渠纪婴娩岭厘滕吻伤坝冠戊隆瘁介涧物黍并姗奢蹑掣垸锴命箍捉病辖琰眭迩艘绌繁寅若毋思诉类诈燮轲酮狂重反职筱县委磕绣奖晋濉志徽肠呈獐坻口片碰几村柿劳料获亩惕晕厌号罢池正鏖煨家棕复尝懋蜥锅岛扰队坠瘾钬@卧疣镇譬冰彷频黯据垄采八缪瘫型熹砰楠襁箐但嘶绳啤拍盥穆傲洗盯塘怔筛丿台恒喂葛永¥烟酒桦书砂蚝缉态瀚袄圳轻蛛超榧遛姒奘铮右荽望偻卡丶氰附做革索戚坨桷唁垅榻岐偎坛莨山殊微骇陈爨推嗝驹澡藁呤卤嘻糅逛侵郓酌德摇※鬃被慨殡羸昌泡戛鞋河宪沿玲鲨翅哽源铅语照邯址荃佬顺鸳町霭睾瓢夸椁晓酿痈咔侏券噎湍签嚷离午尚社锤背孟使浪缦潍鞅军姹驶笑鳟鲁》孽钜绿洱礴焯椰颖囔乌孔巴互性椽哞聘昨早暮胶炀隧低彗昝铁呓氽藉喔癖瑗姨权胱韦堑蜜酋楝砝毁靓歙锲究屋喳骨辨碑武鸠宫辜烊适坡殃培佩供走蜈迟翼况姣凛浔吃飘债犟金促苛崇坂莳畔绂兵蠕斋根砍亢欢恬崔剁餐榫快扶‖濒缠鳜当彭驭浦篮昀锆秸钳弋娣瞑夷龛苫拱致%嵊障隐弑初娓抉汩累蓖"唬助苓昙押毙破城郧逢嚏獭瞻溱婿赊跨恼璧萃姻貉灵炉密氛陶砸谬衔点琛沛枳层岱诺脍榈埂征冷裁打蹴素瘘逞蛐聊激腱萘踵飒蓟吆取咙簋涓矩曝挺揣座你史舵焱尘苏笈脚溉榨诵樊邓焊义庶儋蟋蒲赦呷杞诠豪还试颓茉太除紫逃痴草充鳕珉祗墨渭烩蘸慕璇镶穴嵘恶骂险绋幕碉肺戳刘潞秣纾潜銮洛须罘销瘪汞兮屉r林厕质探划狸殚善煊烹〒锈逯宸辍泱柚袍远蹋嶙绝峥娥缍雀徵认镱谷=贩勉撩鄯斐洋非祚泾诒饿撬威晷搭芍锥笺蓦候琊档礁沼卵荠忑朝凹瑞头仪弧孵畏铆突衲车浩气茂悖厢枕酝戴湾邹飚攘锂写宵翁岷无喜丈挑嗟绛殉议槽具醇淞笃郴阅饼底壕砚弈询缕庹翟零筷暨舟闺甯撞麂茌蔼很珲捕棠角阉媛娲诽剿尉爵睬韩诰匣危糍镯立浏阳少盆舔擘匪申尬铣旯抖赘瓯居ˇ哮游锭茏歌坏甚秒舞沙仗劲潺阿燧郭嗖霏忠材奂耐跺砀输岖媳氟极摆灿今扔腻枝奎药熄吨话q额慑嘌协喀壳埭视著於愧陲翌峁颅佛腹聋侯咎叟秀颇存较罪哄岗扫栏钾羌己璨枭霉煌涸衿键镝益岢奏连夯睿冥均糖狞蹊稻爸刿胥煜丽肿璃掸跚灾垂樾濑乎莲窄犹撮战馄软络显鸢胸宾妲恕埔蝌份遇巧瞟粒恰剥桡博讯凯堇阶滤卖斌骚彬兑磺樱舷两娱福仃差找桁÷净把阴污戬雷碓蕲楚罡焖抽妫咒仑闱尽邑菁爱贷沥鞑牡嗉崴骤塌嗦订拮滓捡锻次坪杩臃箬融珂鹗宗枚降鸬妯阄堰盐毅必杨崃俺甬状莘货耸菱腼铸唏痤孚澳懒溅翘疙杷淼缙骰喊悉砻坷艇赁界谤纣宴晃茹归饭梢铡街抄肼鬟苯颂撷戈炒咆茭瘙负仰客琉铢封卑珥椿镧窨鬲寿御袤铃萎砖餮脒裳肪孕嫣馗嵇恳氯江石褶冢祸阻狈羞银靳透咳叼敷芷啥它瓤兰痘懊逑肌往捺坊甩呻〃沦忘膻祟菅剧崆智坯臧霍墅攻眯倘拢骠铐庭岙瓠′缺泥迢捶??郏喙掷沌纯秘种听绘固螨团香盗妒埚蓝拖旱荞铀血遏汲辰叩拽幅硬惶桀漠措泼唑齐肾念酱虚屁耶旗砦闵婉馆拭绅韧忏窝醋葺顾辞倜堆辋逆玟贱疾董惘倌锕淘嘀莽俭笏绑鲷杈择蟀粥嗯驰逾案谪褓胫哩昕颚鲢绠躺鹄崂儒俨丝尕泌啊萸彰幺吟骄苣弦脊瑰〈诛镁析闪剪侧哟框螃守嬗燕狭铈缮概迳痧鲲俯售笼痣扉挖满咋援邱扇歪便玑绦峡蛇叨〖泽胃斓喋怂坟猪该蚬炕弥赞棣晔娠挲狡创疖铕镭稷挫弭啾翔粉履苘哦楼秕铂土锣瘟挣栉习享桢袅磨桂谦延坚蔚噗署谟猬钎恐嬉雒倦衅亏璩睹刻殿王算雕麻丘柯骆丸塍谚添鲈垓桎蚯芥予飕镦谌窗醚菀亮搪莺蒿羁足J真轶悬衷靛翊掩哒炅掐冼妮l谐稚荆擒犯陵虏浓崽刍陌傻孜千靖演矜钕煽杰酗渗伞栋俗泫戍罕沾疽灏煦芬磴叱阱榉湃蜀叉醒彪租郡篷屎良垢隗弱陨峪砷掴颁胎雯绵贬沐撵隘篙暖曹陡栓填臼彦瓶琪潼哪鸡摩啦俟锋域耻蔫疯纹撇毒绶痛酯忍爪赳歆嘹辕烈册朴钱吮毯癜娃谀邵厮炽璞邃丐追词瓒忆轧芫谯喷弟半冕裙掖墉绮寝苔势顷褥切衮君佳嫒蚩霞佚洙逊镖暹唛&殒顶碗獗轭铺蛊废恹汨崩珍那杵曲纺夏薰傀闳淬姘舀拧卷楂恍讪厩寮篪赓乘灭盅鞣沟慎挂饺鼾杳树缨丛絮娌臻嗳篡侩述衰矛圈蚜匕筹匿濞晨叶骋郝挚蚴滞增侍描瓣吖嫦蟒匾圣赌毡癞恺百曳需篓肮庖帏卿驿遗蹬鬓骡歉芎胳屐禽烦晌寄媾狄翡苒船廉终痞殇々畦饶改拆悻萄£瓿乃訾桅匮溧拥纱铍骗蕃龋缬父佐疚栎醍掳蓄x惆颜鲆榆〔猎敌暴谥鲫贾罗玻缄扦芪癣落徒臾恿猩托邴肄牵春陛耀刊拓蓓邳堕寇枉淌啡湄兽酷萼碚濠萤夹旬戮梭琥椭昔勺蜊绐晚孺僵宣摄冽旨萌忙蚤眉噼蟑付契瓜悼颡壁曾窕颢澎仿俑浑嵌浣乍碌褪乱蔟隙玩剐葫箫纲围伐决伙漩瑟刑肓镳缓蹭氨皓典畲坍铑檐塑洞倬储胴淳戾吐灼惺妙毕珐缈虱盖羰鸿磅谓髅娴苴唷蚣霹抨贤唠犬誓逍庠逼麓籼釉呜碧秧氩摔霄穸纨辟妈映完牛缴嗷炊恩荔茆掉紊慌莓羟阙萁磐另蕹辱鳐湮吡吩唐睦垠舒圜冗瞿溺芾囱匠僳汐菩饬漓黑霰浸濡窥毂蒡兢驻鹉芮诙迫雳厂忐臆猴鸣蚪栈箕羡渐莆捍眈哓趴蹼埕嚣骛宏淄斑噜严瑛垃椎诱压庾绞焘廿抡迄棘夫纬锹眨瞌侠脐竞瀑孳骧遁姜颦荪滚萦伪逸粳爬锁矣役趣洒颔诏逐奸甭惠攀蹄泛尼拼阮鹰亚颈惑勒〉际肛爷刚钨丰养冶鲽辉蔻画覆皴妊麦返醉皂擀〗酶凑粹悟诀硖港卜z杀涕±舍铠抵弛段敝镐奠拂轴跛袱et沉菇俎薪峦秭蟹历盟菠寡液肢喻染裱悱抱氙赤捅猛跑氮谣仁尺辊窍烙衍架擦倏璐瑁币楞胖夔趸邛惴饕虔蝎§哉贝宽辫炮扩饲籽魏菟锰伍猝末琳哚蛎邂呀姿鄞却歧仙恸椐森牒寤袒婆虢雅钉朵贼欲苞寰故龚坭嘘咫礼硷兀睢汶’铲烧绕诃浃钿哺柜讼颊璁腔洽咐脲簌筠镣玮鞠谁兼姆挥梯蝴谘漕刷躏宦弼b垌劈麟莉揭笙渎仕嗤仓配怏抬错泯镊孰猿邪仍秋鼬壹歇吵炼<尧射柬廷胧霾凳隋肚浮梦祥株堵退L鹫跎凶毽荟炫栩玳甜沂鹿顽伯爹赔蛴徐匡欣狰缸雹蟆疤默沤啜痂衣禅wih辽葳黝钗停沽棒馨颌肉吴硫悯劾娈马啧吊悌镑峭帆瀣涉咸疸滋泣翦拙癸钥蜒+尾庄凝泉婢渴谊乞陆锉糊鸦淮IBN晦弗乔庥葡尻席橡傣渣拿惩麋斛缃矮蛏岘鸽姐膏催奔镒喱蠡摧钯胤柠拐璋鸥卢荡倾^_珀逄萧塾掇贮笆聂圃冲嵬M滔笕值炙偶蜱搐梆汪蔬腑鸯蹇敞绯仨祯谆梧糗鑫啸豺囹猾巢柄瀛筑踌沭暗苁鱿蹉脂蘖牢热木吸溃宠序泞偿拜檩厚朐毗螳吞媚朽担蝗橘畴祈糟盱隼郜惜珠裨铵焙琚唯咚噪骊丫滢勤棉呸咣淀隔蕾窈饨挨煅短匙粕镜赣撕墩酬馁豌颐抗酣氓佑搁哭递耷涡桃贻碣截瘦昭镌蔓氚甲猕蕴蓬散拾纛狼猷铎埋旖矾讳囊糜迈粟蚂紧鲳瘢栽稼羊锄斟睁桥瓮蹙祉醺鼻昱剃跳篱跷蒜翎宅晖嗑壑峻癫屏狠陋袜途憎祀莹滟佶溥臣约盛峰磁慵婪拦莅朕鹦粲裤哎疡嫖琵窟堪谛嘉儡鳝斩郾驸酊妄胜贺徙傅噌钢栅庇恋匝巯邈尸锚粗佟蛟薹纵蚊郅绢锐苗俞篆淆膀鲜煎诶秽寻涮刺怀噶巨褰魅灶灌桉藕谜舸薄搀恽借牯痉渥愿亓耘杠柩锔蚶钣珈喘蹒幽赐稗晤莱泔扯肯菪裆腩豉疆骜腐倭珏唔粮亡润慰伽橄玄誉醐胆龊粼塬陇彼削嗣绾芽妗垭瘴爽薏寨龈泠弹赢漪猫嘧涂恤圭茧烽屑痕巾赖荸凰腮畈亵蹲偃苇澜艮换骺烘苕梓颉肇哗悄氤涠葬屠鹭植竺佯诣鲇瘀鲅邦移滁冯耕癔戌茬沁巩悠湘洪痹锟循谋腕鳃钠捞焉迎碱伫急榷奈邝卯辄皲卟醛畹忧稳雄昼缩阈睑扌耗曦涅捏瞧邕淖漉铝耦禹湛喽莼琅诸苎纂硅始嗨傥燃臂赅嘈呆贵屹壮肋亍蚀卅豹腆邬迭浊}童螂捐圩勐触寞汊壤荫膺渌芳懿遴螈泰蓼蛤茜舅枫朔膝眙避梅判鹜璜牍缅垫藻黔侥惚懂踩腰腈札丞唾慈顿摹荻琬~斧沈滂胁胀幄莜Z匀鄄掌绰茎焚赋萱谑汁铒瞎夺蜗野娆冀弯篁懵灞隽芡脘俐辩芯掺喏膈蝈觐悚踹蔗熠鼠呵抓橼峨畜缔禾崭弃熊摒凸拗穹蒙抒祛劝闫扳阵醌踪喵侣搬仅荧赎蝾琦买婧瞄寓皎冻赝箩莫瞰郊笫姝筒枪遣煸袋舆痱涛母〇启践耙绲盘遂昊搞槿诬纰泓惨檬亻越Co憩熵祷钒暧塔阗胰咄娶魔琶钞邻扬杉殴咽弓〆髻】吭揽霆拄殖脆彻岩芝勃辣剌钝嘎甄佘皖伦授徕憔挪皇庞稔芜踏溴兖卒擢饥鳞煲‰账颗叻斯捧鳍琮讹蛙纽谭酸兔莒睇伟觑羲嗜宜褐旎辛卦诘筋鎏溪挛熔阜晰鳅丢奚灸呱献陉黛鸪甾萨疮拯洲疹辑叙恻谒允柔烂氏逅漆拎惋扈湟纭啕掬擞哥忽涤鸵靡郗瓷扁廊怨雏钮敦E懦憋汀拚啉腌岸f痼瞅尊咀眩飙忌仝迦熬毫胯篑茄腺凄舛碴锵诧羯後漏汤宓仞蚁壶谰皑铄棰罔辅晶苦牟闽\烃饮聿丙蛳朱煤涔鳖犁罐荼砒淦妤黏戎孑婕瑾戢钵枣捋砥衩狙桠稣阎肃梏诫孪昶婊衫嗔侃塞蜃樵峒貌屿欺缫阐栖诟珞荭吝萍嗽恂啻蜴磬峋俸豫谎徊镍韬魇晴U囟猜蛮坐囿伴亭肝佗蝠妃胞滩榴氖垩苋砣扪馏姓轩厉夥侈禀垒岑赏钛辐痔披纸碳“坞蠓挤荥沅悔铧帼蒌蝇apyng哀浆瑶凿桶馈皮奴苜佤伶晗铱炬优弊氢恃甫攥端锌灰稹炝曙邋亥眶碾拉萝绔捷浍腋姑菖凌涞麽锢桨潢绎镰殆锑渝铬困绽觎匈糙暑裹鸟盔肽迷綦『亳佝俘钴觇骥仆疝跪婶郯瀹唉脖踞针晾忒扼瞩叛椒疟嗡邗肆跆玫忡捣咧唆艄蘑潦笛阚沸泻掊菽贫斥髂孢镂赂麝鸾屡衬苷恪叠希粤爻喝茫惬郸绻庸撅碟宄妹膛叮饵崛嗲椅冤搅咕敛尹垦闷蝉霎勰败蓑泸肤鹌幌焦浠鞍刁舰乙竿裔。茵函伊兄丨娜匍謇莪宥似蝽翳酪翠粑薇祢骏赠叫Q噤噻竖芗莠潭俊羿耜O郫趁嗪囚蹶芒洁笋鹑敲硝啶堡渲揩』携宿遒颍扭棱割萜蔸葵琴捂饰衙耿掠募岂窖涟蔺瘤柞瞪怜匹距楔炜哆秦缎幼茁绪痨恨楸娅瓦桩雪嬴伏榔妥铿拌眠雍缇‘卓搓哌觞噩屈哧髓咦巅娑侑淫膳祝勾姊莴胄疃薛蜷胛巷芙芋熙闰勿窃狱剩钏幢陟铛慧靴耍k浙浇飨惟绗祜澈啼咪磷摞诅郦抹跃壬吕肖琏颤尴剡抠凋赚泊津宕殷倔氲漫邺涎怠$垮荬遵俏叹噢饽蜘孙筵疼鞭羧牦箭潴c眸祭髯啖坳愁芩驮倡巽穰沃胚怒凤槛剂趵嫁v邢灯鄢桐睽檗锯槟婷嵋圻诗蕈颠遭痢芸怯馥竭锗徜恭遍籁剑嘱苡龄僧桑潸弘澶楹悲讫愤腥悸谍椹呢桓葭攫阀翰躲敖柑郎笨橇呃魁燎脓葩磋垛玺狮沓砜蕊锺罹蕉翱虐闾巫旦茱嬷枯鹏贡芹汛矫绁拣禺佃讣舫惯乳趋疲挽岚虾衾蠹蹂飓氦铖孩稞瑜壅掀勘妓畅髋W庐牲蓿榕练垣唱邸菲昆婺穿绡麒蚱掂愚泷涪漳妩娉榄讷觅旧藤煮呛柳腓叭庵烷阡罂蜕擂猖咿媲脉【沏貅黠熏哲烁坦酵兜×潇撒剽珩圹乾摸樟帽嗒襄魂轿憬锡〕喃皆咖隅脸残泮袂鹂珊囤捆咤误徨闹淙芊淋怆囗拨梳渤RG绨蚓婀幡狩麾谢唢裸旌伉纶裂驳砼咛澄樨蹈宙澍倍貔操勇蟠摈砧虬够缁悦藿撸艹摁淹豇虎榭ˉ吱d°喧荀踱侮奋偕饷犍惮坑璎徘宛妆袈倩窦昂荏乖K怅撰鳙牙袁酞X痿琼闸雁趾荚虻涝《杏韭偈烤绫鞘卉症遢蓥诋杭荨匆竣簪辙敕虞丹缭咩黟m淤瑕咂铉硼茨嶂痒畸敬涿粪窘熟叔嫔盾忱裘憾梵赡珙咯娘庙溯胺葱痪摊荷卞乒髦寐铭坩胗枷爆溟嚼羚砬轨惊挠罄竽菏氧浅楣盼枢炸阆杯谏噬淇渺俪秆墓泪跻砌痰垡渡耽釜讶鳎煞呗韶舶绷鹳缜旷铊皱龌檀霖奄槐艳蝶旋哝赶骞蚧腊盈丁`蜚矸蝙睨嚓僻鬼醴夜彝磊笔拔栀糕厦邰纫逭纤眦膊馍躇烯蘼冬诤暄骶哑瘠」臊丕愈咱螺擅跋搏硪谄笠淡嘿骅谧鼎皋姚歼蠢驼耳胬挝涯狗蒽孓犷凉芦箴铤孤嘛坤V茴朦挞尖橙诞搴碇洵浚帚蜍漯柘嚎讽芭荤咻祠秉跖埃吓糯眷馒惹娼鲑嫩讴轮瞥靶褚乏缤宋帧删驱碎扑俩俄偏涣竹噱皙佰渚唧斡#镉刀崎筐佣夭贰肴峙哔艿匐牺镛缘仡嫡劣枸堀梨簿鸭蒸亦稽浴{衢束槲j阁揍疥棋潋聪窜乓睛插冉阪苍搽「蟾螟幸仇樽撂慢跤幔俚淅覃觊溶妖帛侨曰妾泗 """ -# -*- coding: utf-8 -*- -from keras.optimizers import SGD -from keras.layers import Lambda -from keras import backend as K -from keras.layers import Flatten, BatchNormalization, Permute, TimeDistributed, Dense, Bidirectional, GRU -from keras.layers import Input, Conv2D, MaxPooling2D, ZeroPadding2D -from keras.models import Model -rnnunit = 256 - - -def ctc_lambda_func(args): - y_pred, labels, input_length, label_length = args - # print("cccccccccc:",y_pred,labels,input_length,label_length) - y_pred = y_pred[:, 2:, :] - - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - -def get_model(height, nclass, learning_rate): - input = Input(shape=(height, None, 1), name='the_input') - m = Conv2D(64, kernel_size=(3, 3), activation='relu', - padding='same', name='conv1')(input) - m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool1')(m) - m = Conv2D(128, kernel_size=(3, 3), activation='relu', - padding='same', name='conv2')(m) - m = MaxPooling2D(pool_size=(2, 2), strides=(2, 2), name='pool2')(m) - m = Conv2D(256, kernel_size=(3, 3), activation='relu', - padding='same', name='conv3')(m) - m = Conv2D(256, kernel_size=(3, 3), activation='relu', - padding='same', name='conv4')(m) - - m = ZeroPadding2D(padding=(0, 1))(m) - m = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 1), padding='valid', name='pool3')(m) - - m = Conv2D(512, kernel_size=(3, 3), activation='relu', - padding='same', name='conv5')(m) - m = BatchNormalization(axis=1)(m) - m = Conv2D(512, kernel_size=(3, 3), activation='relu', - padding='same', name='conv6')(m) - m = BatchNormalization(axis=1)(m) - m = ZeroPadding2D(padding=(0, 1))(m) - m = MaxPooling2D(pool_size=(2, 2), strides=( - 2, 1), padding='valid', name='pool4')(m) - m = Conv2D(512, kernel_size=(2, 2), activation='relu', - padding='valid', name='conv7')(m) - - # Permute层将输入的维度按照给定模式进行重排,例如,当需要将RNN和CNN网络连接时,可能会用到该层。 - m = Permute((2, 1, 3), name='permute')(m) - m = TimeDistributed(Flatten(), name='timedistrib')(m) - # cnn之后链接双向GRU,双向GRU会输出固定长度的序列,这是一个encode的过程,之后再连接一个双向GRU,对该序列进行解码 - # 该序列的输出为长度为256的序列 - # cnn之后连接双向GRU - m = Bidirectional(GRU(rnnunit, return_sequences=True), name='blstm1')(m) - # 全连接层-rnnunit为全连接层的输出维度 - m = Dense(rnnunit, name='blstm1_out', activation='linear')(m) - # 连接双向GRU - m = Bidirectional(GRU(rnnunit, return_sequences=True), name='blstm2')(m) - # 全连接输出 - y_pred = Dense(nclass, name='blstm2_out', activation='softmax')(m) - # 确定模型 - basemodel = Model(inputs=input, outputs=y_pred) - - labels = Input(name='the_labels', shape=[None, ], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - - loss_out = Lambda(ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - model = Model(inputs=[input, labels, input_length, - label_length], outputs=[loss_out]) - sgd = SGD(lr=learning_rate, decay=1e-6, - momentum=0.9, nesterov=True, clipnorm=5) - # sgd = SGD(lr=0.001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - # model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer='adadelta') - model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - model.summary() - return model, basemodel -# -*- coding: utf-8 -*- -from model import get_model -import dataset -import keys -import numpy as np -import torch - -characters = keys.alphabet[:] - -nclass = len(characters) + 1 - -trainroot = '../data/lmdb/train' -valroot = '../data/lmdb/val' -batchSize = 32 -workers = 4 -imgH = 32 -imgW = 256 -keep_ratio = False -random_sample = False - - -def one_hot(text, length=10, characters=characters): - label = np.zeros(length) - # print(type(text)) - for i, char in enumerate(text): - index = characters.find(char) - if index == -1: - index = characters.find(u' ') - # print(i,char,length) - # if i < length: - label[i] = index - return label - - -n_len = 10 - - -def gen(loader, flag='train'): - while True: - i = 0 - n = len(loader) - for X, Y in loader: - X = X.numpy() - X = X.reshape((-1, imgH, imgW, 1)) - if flag == 'test': - Y = Y.numpy() - - Y = np.array(Y) - Length = int(imgW / 4) - 1 - batchs = X.shape[0] - # Y = Y.numpy() - if i > n - 1: - i = 0 - break - - yield [ - X, Y, - np.ones(batchs) * int(Length), - np.ones(batchs) * n_len - ], np.ones(batchs) - - -if random_sample: - sampler = dataset.randomSequentialSampler(train_dataset, batchSize) -else: - sampler = None -train_dataset = dataset.lmdbDataset(root=trainroot, target_transform=one_hot) - -train_loader = torch.utils.data.DataLoader( - train_dataset, - batch_size=batchSize, - shuffle=True, - sampler=sampler, - num_workers=int(workers), - collate_fn=dataset.alignCollate( - imgH=imgH, imgW=imgW, keep_ratio=keep_ratio)) - -test_dataset = dataset.lmdbDataset( - root=valroot, - transform=dataset.resizeNormalize((imgW, imgH)), - target_transform=one_hot) - -test_loader = torch.utils.data.DataLoader( - test_dataset, shuffle=True, batch_size=batchSize, num_workers=int(workers)) - -if __name__ == '__main__': - from keras.callbacks import ModelCheckpoint, ReduceLROnPlateau - - model, basemodel = get_model(height=imgH, nclass=nclass) - import os - - if os.path.exists( - '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/save_model/my_model_keras.h5' - ): - basemodel.load_weights( - '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/save_model/my_model_keras.h5' - ) - - # 注意此处保存的是model的权重 - checkpointer = ModelCheckpoint( - filepath="/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/save_model/my_model_keras.h5", - monitor='val_loss', - verbose=0, - save_weights_only=False, - save_best_only=True) - rlu = ReduceLROnPlateau( - monitor='val_loss', - factor=0.1, - patience=1, - verbose=0, - mode='auto', - epsilon=0.0001, - cooldown=0, - min_lr=0) - - model.fit_generator( - gen(train_loader, flag='train'), - steps_per_epoch=102400, - epochs=200, - validation_data=gen(test_loader, flag='test'), - callbacks=[checkpointer, rlu], - validation_steps=1024) -# -*- coding: utf-8 -*- -from model import get_model -from keras.utils import plot_model -from keras.callbacks import TensorBoard -import keras.backend.tensorflow_backend as KTF -import graphviz -import pydot -import tensorflow as tf -import dataset -import keys_keras -import numpy as np -import torch -import time -import os -import sys -sys.path.insert(0, os.getcwd()) - -characters = keys_keras.alphabet[:] -nclass = len(characters) + 1 -trainroot = '../data/lmdb/train' -valroot = '../data/lmdb/val' -# modelPath = '../pretrain-models/keras.hdf5' -modelPath = '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/save_model/my_model_keras.h5' -workers = 4 -imgH = 32 -imgW = 256 -keep_ratio = False -random_sample = False -batchSize = 32 -testSize = 16 -n_len = 50 -loss = 1000 -interval = 50 -LEARNING_RATE = 0.01 -Learning_decay_step = 20000 -PERCEPTION = 0.3 -EPOCH_NUMS = 1000000 -MODEL_PATH = '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/save_model' -LOG_FILE = 'log.txt' -SUMMARY_PATH = './log/' -if not os.path.exists(MODEL_PATH): - print('Creating save model path!!') - os.makedirs(MODEL_PATH) -if not os.path.exists(SUMMARY_PATH): - os.makedirs(SUMMARY_PATH) - -model, basemodel = get_model( - height=imgH, nclass=nclass, learning_rate=LEARNING_RATE) - -config = tf.ConfigProto(intra_op_parallelism_threads=2) -config.gpu_options.per_process_gpu_memory_fraction = PERCEPTION -KTF.set_session(tf.Session(config=config)) - -# 加载预训练参数 -if os.path.exists(modelPath): - # basemodel.load_weights(modelPath) - model.load_weights(modelPath) - -plot_model(basemodel, to_file='basemodel.png') -plot_model(model, to_file='model.png') - - -def one_hot(text, length=10, characters=characters): - label = np.zeros(length) - for i, char in enumerate(text): - index = characters.find(char) - if index == -1: - index = characters.find(u' ') - if i < length: - label[i] = index - return label - - -# 导入数据 -if random_sample: - sampler = dataset.randomSequentialSampler(train_dataset, batchSize) -else: - sampler = None -train_dataset = dataset.lmdbDataset(root=trainroot, target_transform=one_hot) -# print(len(train_dataset)) - -test_dataset = dataset.lmdbDataset( - root=valroot, - transform=dataset.resizeNormalize((imgW, imgH)), - target_transform=one_hot) - -# 生成训练用数据 -train_loader = torch.utils.data.DataLoader( - train_dataset, - batch_size=batchSize, - shuffle=True, - sampler=sampler, - num_workers=int(workers), - collate_fn=dataset.alignCollate( - imgH=imgH, imgW=imgW, keep_ratio=keep_ratio)) - -test_loader = torch.utils.data.DataLoader( - test_dataset, batch_size=testSize, shuffle=True, num_workers=int(workers)) - -j = 0 -print('Strat training!!') -for i in range(EPOCH_NUMS): - for X, Y in train_loader: - start = time.time() - X = X.numpy() - X = X.reshape((-1, imgH, imgW, 1)) - Y = np.array(Y) - Length = int(imgW / 4) - 2 - batch = X.shape[0] - X_train, Y_train = [X, Y, - np.ones(batch) * Length, - np.ones(batch) * n_len], np.ones(batch) - print('IMG_SHAPE:', np.shape(X)) - print('LABEL_SHAPE:', np.shape(Y)) - # print(np.shape(X_train)) - model.train_on_batch(X_train, Y_train) - if j % interval == 0: - times = time.time() - start - currentLoss_train = model.evaluate(X_train, Y_train) - X, Y = next(iter(test_loader)) - X = X.numpy() - X = X.reshape((-1, imgH, imgW, 1)) - Y = Y.numpy() - Y = np.array(Y) - batch = X.shape[0] - X_val, Y_val = [ - X, Y, np.ones(batch) * Length, - np.ones(batch) * n_len], np.ones(batch) - crrentLoss = model.evaluate(X_val, Y_val) - print('Learning rate is: ', LEARNING_RATE) - now_time = time.strftime('%Y/%m/%d-%H:%M:%S', - time.localtime(time.time())) - print('Time: [%s]--Step/Epoch/Total: [%d/%d/%d]' % (now_time, j, i, - EPOCH_NUMS)) - print('\tTraining Loss is: [{}]'.format(currentLoss_train)) - print('\tVal Loss is: [{}]'.format(crrentLoss)) - print('\tSpeed is: [{}] Samples/Secs'.format(interval / times)) - path = MODEL_PATH + '/my_model_keras.h5' - with open(LOG_FILE, mode='a') as log_file: - log_str = now_time + '----global_step:' + str( - j) + '----loss:' + str(loss) + '\n' - log_file.writelines(log_str) - log_file.close() - print('\tWriting to the file: log.txt') - print("\tSave model to disk: {}".format(path)) - model.save(path) - if crrentLoss < loss: - loss = crrentLoss - if j > 0 and j % Learning_decay_step == 0: - LEARNING_RATE_ori = LEARNING_RATE - LEARNING_RATE = 0.5 * LEARNING_RATE - print('\tUpdating Leaning rate from {} to {}'.format( - LEARNING_RATE_ori, LEARNING_RATE)) - j += 1 -# -*- coding: utf-8 -*- -from __future__ import print_function - -import argparse -import os -import random -# from manager_torch import GPUManager -import dataset -# Alphabet = [e.encode('utf-8') for e in alphabet] -import models.crnn as crnn -import numpy as np -import torch -import torch.backends.cudnn as cudnn -import torch.optim as optim -import torch.utils.data -import utils -from keys import alphabet -from torch.autograd import Variable -from warpctc_pytorch import CTCLoss - -# with open('../run/char.txt') as f: -# newChars = f.read().strip().decode('utf-8') -# alphabet += u''.join(list(set(newChars) - set(alphabet))) - -parser = argparse.ArgumentParser() -parser.add_argument( - '--trainroot', help='path to dataset', default='../data/lmdb/train') -parser.add_argument( - '--valroot', help='path to dataset', default='../data/lmdb/val') -parser.add_argument( - '--workers', type=int, help='number of data loading workers', default=4) -parser.add_argument( - '--batchSize', type=int, default=128, help='input batch size') -parser.add_argument( - '--imgH', - type=int, - default=32, - help='the height of the input image to network') -parser.add_argument( - '--imgW', - type=int, - default=256, - help='the width of the input image to network') -parser.add_argument( - '--nh', type=int, default=256, help='size of the lstm hidden state') -parser.add_argument( - '--niter', type=int, default=1000000, help='number of epochs to train for') -parser.add_argument( - '--lr', - type=float, - default=0.005, - help='learning rate for Critic, default=0.00005') -parser.add_argument( - '--beta1', type=float, default=0.5, help='beta1 for adam. default=0.5') -parser.add_argument('--cuda', action='store_true', help='enables cuda') -parser.add_argument( - '--ngpu', type=int, default=1, help='number of GPUs to use') -parser.add_argument( - '--crnn', - help="path to crnn (to continue training)", - default='/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/save_model/netCRNN.pth' -) -# parser.add_argument('--crnn', help="path to crnn (to continue training)",default='') -parser.add_argument('--alphabet', default=alphabet) -parser.add_argument( - '--experiment', - help='Where to store samples and models', - default='./save_model') -parser.add_argument( - '--displayInterval', type=int, default=50, help='Interval to be displayed') -parser.add_argument( - '--n_test_disp', - type=int, - default=1000, - help='Number of samples to display when test') -parser.add_argument( - '--valInterval', type=int, default=100, help='Interval to be displayed') -parser.add_argument( - '--saveInterval', type=int, default=1000, help='Interval to be displayed') -parser.add_argument( - '--adam', - action='store_true', - help='Whether to use adam (default is rmsprop)') -parser.add_argument( - '--adadelta', - action='store_true', - help='Whether to use adadelta (default is rmsprop)') -parser.add_argument( - '--keep_ratio', - action='store_true', - help='whether to keep ratio for image resize') -parser.add_argument( - '--random_sample', - action='store_true', - help='whether to sample the dataset with random sampler') -opt = parser.parse_args() -print(opt) -ifUnicode = True -if opt.experiment is None: - opt.experiment = 'expr' -os.system('mkdir {0}'.format(opt.experiment)) - -opt.manualSeed = random.randint(1, 10000) # fix seed -print("Random Seed: ", opt.manualSeed) -random.seed(opt.manualSeed) -np.random.seed(opt.manualSeed) -torch.manual_seed(opt.manualSeed) - -cudnn.benchmark = True - -if torch.cuda.is_available() and not opt.cuda: - print( - "WARNING: You have a CUDA device, so you should probably run with --cuda" - ) - -train_dataset = dataset.lmdbDataset(root=opt.trainroot) -assert train_dataset -if not opt.random_sample: - sampler = dataset.randomSequentialSampler(train_dataset, opt.batchSize) -else: - sampler = None -train_loader = torch.utils.data.DataLoader( - train_dataset, - batch_size=opt.batchSize, - shuffle=False, - sampler=sampler, - num_workers=int(opt.workers), - collate_fn=dataset.alignCollate( - imgH=opt.imgH, imgW=opt.imgW, keep_ratio=opt.keep_ratio)) -test_dataset = dataset.lmdbDataset( - root=opt.valroot, transform=dataset.resizeNormalize((256, 32))) - -ngpu = int(opt.ngpu) -nh = int(opt.nh) -alphabet = opt.alphabet -nclass = len(alphabet) + 1 -nc = 1 - -converter = utils.strLabelConverter(alphabet) -criterion = CTCLoss() - - -# custom weights initialization called on crnn -def weights_init(m): - classname = m.__class__.__name__ - if classname.find('Conv') != -1: - m.weight.data.normal_(0.0, 0.02) - elif classname.find('BatchNorm') != -1: - m.weight.data.normal_(1.0, 0.02) - m.bias.data.fill_(0) - - -# 创建网络模型 -crnn = crnn.CRNN(opt.imgH, nc, nclass, nh, ngpu) -crnn.apply(weights_init) -if opt.crnn != '': - print('loading pretrained model from %s' % opt.crnn) - crnn.load_state_dict(torch.load(opt.crnn)) -print(crnn) - -image = torch.FloatTensor(opt.batchSize, 3, opt.imgH, opt.imgH) -text = torch.IntTensor(opt.batchSize * 5) -length = torch.IntTensor(opt.batchSize) - -if opt.cuda: - crnn.cuda() - image = image.cuda() - criterion = criterion.cuda() - -image = Variable(image) -text = Variable(text) -length = Variable(length) - -# loss averager -loss_avg = utils.averager() - -# setup optimizer -if opt.adam: - optimizer = optim.Adam( - crnn.parameters(), lr=opt.lr, betas=(opt.beta1, 0.999)) -elif opt.adadelta: - optimizer = optim.Adadelta(crnn.parameters(), lr=opt.lr) -else: - optimizer = optim.RMSprop(crnn.parameters(), lr=opt.lr) - - -def val(net, dataset, criterion, max_iter=2): - print('Start val') - - for p in crnn.parameters(): - p.requires_grad = False - - net.eval() - data_loader = torch.utils.data.DataLoader( - dataset, - shuffle=True, - batch_size=opt.batchSize, - num_workers=int(opt.workers)) - val_iter = iter(data_loader) - - i = 0 - n_correct = 0 - loss_avg = utils.averager() - - max_iter = min(max_iter, len(data_loader)) - for i in range(max_iter): - data = val_iter.next() - i += 1 - cpu_images, cpu_texts = data - batch_size = cpu_images.size(0) - utils.loadData(image, cpu_images) - if ifUnicode: - cpu_texts = [clean_txt(tx.decode('utf-8')) for tx in cpu_texts] - t, l = converter.encode(cpu_texts) - utils.loadData(text, t) - utils.loadData(length, l) - - preds = crnn(image) - preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) - cost = criterion(preds, text, preds_size, length) / batch_size - loss_avg.add(cost) - - _, preds = preds.max(2) - preds = preds.squeeze(2) - preds = preds.transpose(1, 0).contiguous().view(-1) - sim_preds = converter.decode(preds.data, preds_size.data, raw=False) - for pred, target in zip(sim_preds, cpu_texts): - if pred.strip() == target.strip(): - n_correct += 1 - - raw_preds = converter.decode( - preds.data, preds_size.data, raw=True)[:opt.n_test_disp] - # for raw_pred, pred, gt in zip(raw_preds, sim_preds, cpu_texts): - # print((pred, gt)) - # print - accuracy = n_correct / float(max_iter * opt.batchSize) - testLoss = loss_avg.val() - # print('Test loss: %f, accuray: %f' % (testLoss, accuracy)) - return testLoss, accuracy - - -def clean_txt(txt): - """ - filter char where not in alphabet with ' ' - """ - newTxt = u'' - for t in txt: - if t in alphabet: - newTxt += t - else: - newTxt += u' ' - return newTxt - - -def trainBatch(net, criterion, optimizer, flage=False): - data = train_iter.next() - cpu_images, cpu_texts = data # decode utf-8 to unicode - if ifUnicode: - cpu_texts = [clean_txt(tx) for tx in cpu_texts] - - batch_size = cpu_images.size(0) - utils.loadData(image, cpu_images) - t, l = converter.encode(cpu_texts) - utils.loadData(text, t) - utils.loadData(length, l) - - preds = crnn(image) - preds_size = Variable(torch.IntTensor([preds.size(0)] * batch_size)) - cost = criterion(preds, text, preds_size, length) / batch_size - crnn.zero_grad() - cost.backward() - if flage: - lr = 0.0001 - optimizer = optim.Adadelta(crnn.parameters(), lr=lr) - optimizer.step() - return cost - - -num = 0 -lasttestLoss = 10000 -testLoss = 10000 - - -def delete(path): - """ - 删除文件 - """ - import os - import glob - paths = glob.glob(path + '/*.pth') - for p in paths: - os.remove(p) - - -numLoss = 0 # 判断训练参数是否下降 - -for epoch in range(opt.niter): - train_iter = iter(train_loader) - i = 0 - while i < len(train_loader): - # print('The step{} ........\n'.format(i)) - for p in crnn.parameters(): - p.requires_grad = True - crnn.train() - # if numLoss>50: - # cost = trainBatch(crnn, criterion, optimizer,True) - # numLoss = 0 - # else: - cost = trainBatch(crnn, criterion, optimizer) - loss_avg.add(cost) - i += 1 - - # if i % opt.displayInterval == 0: - # print('[%d/%d][%d/%d] Loss: %f' % - # (epoch, opt.niter, i, len(train_loader), loss_avg.val())) - # loss_avg.reset() - - if i % opt.valInterval == 0: - testLoss, accuracy = val(crnn, test_dataset, criterion) - # print('Test loss: %f, accuray: %f' % (testLoss, accuracy)) - print("epoch:{},step:{},Test loss:{},accuracy:{},train loss:{}". - format(epoch, num, testLoss, accuracy, loss_avg.val())) - loss_avg.reset() - print('Save model to:', opt.experiment) - torch.save(crnn.state_dict(), '{}/netCRNN.pth'.format( - opt.experiment)) - # do checkpointing - num += 1 - # lasttestLoss = min(lasttestLoss,testLoss) - - if lasttestLoss > testLoss: - print("The step {},last lost:{}, current: {},save model!".format( - num, lasttestLoss, testLoss)) - lasttestLoss = testLoss - # delete(opt.experiment)##删除历史模型 - torch.save(crnn.state_dict(), '{}/netCRNN.pth'.format( - opt.experiment)) - numLoss = 0 - else: - numLoss += 1 -#!/usr/bin/python -# encoding: utf-8 - -import random -import sys - -import lmdb -import numpy as np -import six -import torch -import torchvision.transforms as transforms -from PIL import Image -from torch.utils.data import Dataset -from torch.utils.data import sampler - - -class lmdbDataset(Dataset): - def __init__(self, root=None, transform=None, target_transform=None): - self.env = lmdb.open( - root, - max_readers=1, - readonly=True, - lock=False, - readahead=False, - meminit=False) - - if not self.env: - print('cannot creat lmdb from %s' % (root)) - sys.exit(0) - - with self.env.begin(write=False) as txn: - nSamples = int(txn.get('num-samples'.encode())) - self.nSamples = nSamples - - self.transform = transform - self.target_transform = target_transform - - def __len__(self): - return self.nSamples - - def __getitem__(self, index): - assert index <= len(self), 'index range error' - index += 1 - with self.env.begin(write=False) as txn: - img_key = 'image-%09d' % index - imgbuf = txn.get(img_key.encode()) - - buf = six.BytesIO() - buf.write(imgbuf) - buf.seek(0) - try: - img = Image.open(buf).convert('L') - except IOError: - print('Corrupted image for %d' % index) - return self[index + 1] - - if self.transform is not None: - img = self.transform(img) - - label_key = 'label-%09d' % index - label = str(txn.get(label_key.encode()), 'utf-8') - - if self.target_transform is not None: - label = self.target_transform(label) - - return (img, label) - - -class resizeNormalize(object): - def __init__(self, size, interpolation=Image.BILINEAR): - self.size = size - self.interpolation = interpolation - self.toTensor = transforms.ToTensor() - - def __call__(self, img): - img = img.resize(self.size, self.interpolation) - img = self.toTensor(img) - img.sub_(0.5).div_(0.5) - return img - - -class randomSequentialSampler(sampler.Sampler): - def __init__(self, data_source, batch_size): - self.num_samples = len(data_source) - self.batch_size = batch_size - - def __iter__(self): - n_batch = len(self) // self.batch_size - tail = len(self) % self.batch_size - index = torch.LongTensor(len(self)).fill_(0) - for i in range(n_batch): - random_start = random.randint(0, len(self) - self.batch_size) - batch_index = random_start + torch.range(0, self.batch_size - 1) - index[i * self.batch_size:(i + 1) * self.batch_size] = batch_index - # deal with tail - if tail: - random_start = random.randint(0, len(self) - self.batch_size) - tail_index = random_start + torch.range(0, tail - 1) - index[(i + 1) * self.batch_size:] = tail_index - - return iter(index) - - def __len__(self): - return self.num_samples - - -class alignCollate(object): - def __init__(self, imgH=32, imgW=100, keep_ratio=False, min_ratio=1): - self.imgH = imgH - self.imgW = imgW - self.keep_ratio = keep_ratio - self.min_ratio = min_ratio - - def __call__(self, batch): - images, labels = zip(*batch) - - imgH = self.imgH - imgW = self.imgW - if self.keep_ratio: - ratios = [] - for image in images: - w, h = image.size - ratios.append(w / float(h)) - ratios.sort() - max_ratio = ratios[-1] - imgW = int(np.floor(max_ratio * imgH)) - imgW = max(imgH * self.min_ratio, imgW) # assure imgH >= imgW - - transform = resizeNormalize((imgW, imgH)) - images = [transform(image) for image in images] - images = torch.cat([t.unsqueeze(0) for t in images], 0) - - return images, labels -# coding:UTF-8 -alphabet = u'\'疗绚诚娇溜题贿者廖更纳加奉公一就汴计与路房原妇208-7其>:],,骑刈全消昏傈安久钟嗅不影处驽蜿资关椤地瘸专问忖票嫉炎韵要月田节陂鄙捌备拳伺眼网盎大傍心东愉汇蹿科每业里航晏字平录先13彤鲶产稍督腴有象岳注绍在泺文定核名水过理让偷率等这发”为含肥酉相鄱七编猥锛日镀蒂掰倒辆栾栗综涩州雌滑馀了机块司宰甙兴矽抚保用沧秩如收息滥页疑埠!!姥异橹钇向下跄的椴沫国绥獠报开民蜇何分凇长讥藏掏施羽中讲派嘟人提浼间世而古多倪唇饯控庚首赛蜓味断制觉技替艰溢潮夕钺外摘枋动双单啮户枇确锦曜杜或能效霜盒然侗电晁放步鹃新杖蜂吒濂瞬评总隍对独合也是府青天诲墙组滴级邀帘示已时骸仄泅和遨店雇疫持巍踮境只亨目鉴崤闲体泄杂作般轰化解迂诿蛭璀腾告版服省师小规程线海办引二桧牌砺洄裴修图痫胡许犊事郛基柴呼食研奶律蛋因葆察戏褒戒再李骁工貂油鹅章啄休场给睡纷豆器捎说敏学会浒设诊格廓查来霓室溆¢诡寥焕舜柒狐回戟砾厄实翩尿五入径惭喹股宇篝|;美期云九祺扮靠锝槌系企酰阊暂蚕忻豁本羹执条钦H獒限进季楦于芘玖铋茯未答粘括样精欠矢甥帷嵩扣令仔风皈行支部蓉刮站蜡救钊汗松嫌成可.鹤院从交政怕活调球局验髌第韫谗串到圆年米/*友忿检区看自敢刃个兹弄流留同没齿星聆轼湖什三建蛔儿椋汕震颧鲤跟力情璺铨陪务指族训滦鄣濮扒商箱十召慷辗所莞管护臭横硒嗓接侦六露党馋驾剖高侬妪幂猗绺骐央酐孝筝课徇缰门男西项句谙瞒秃篇教碲罚声呐景前富嘴鳌稀免朋啬睐去赈鱼住肩愕速旁波厅健茼厥鲟谅投攸炔数方击呋谈绩别愫僚躬鹧胪炳招喇膨泵蹦毛结54谱识陕粽婚拟构且搜任潘比郢妨醪陀桔碘扎选哈骷楷亿明缆脯监睫逻婵共赴淝凡惦及达揖谩澹减焰蛹番祁柏员禄怡峤龙白叽生闯起细装谕竟聚钙上导渊按艾辘挡耒盹饪臀记邮蕙受各医搂普滇朗茸带翻酚(光堤墟蔷万幻〓瑙辈昧盏亘蛀吉铰请子假闻税井诩哨嫂好面琐校馊鬣缂营访炖占农缀否经钚棵趟张亟吏茶谨捻论迸堂玉信吧瞠乡姬寺咬溏苄皿意赉宝尔钰艺特唳踉都荣倚登荐丧奇涵批炭近符傩感道着菊虹仲众懈濯颞眺南释北缝标既茗整撼迤贲挎耱拒某妍卫哇英矶藩治他元领膜遮穗蛾飞荒棺劫么市火温拈棚洼转果奕卸迪伸泳斗邡侄涨屯萋胭氡崮枞惧冒彩斜手豚随旭淑妞形菌吲沱争驯歹挟兆柱传至包内响临红功弩衡寂禁老棍耆渍织害氵渑布载靥嗬虽苹咨娄库雉榜帜嘲套瑚亲簸欧边6腿旮抛吹瞳得镓梗厨继漾愣憨士策窑抑躯襟脏参贸言干绸鳄穷藜音折详)举悍甸癌黎谴死罩迁寒驷袖媒蒋掘模纠恣观祖蛆碍位稿主澧跌筏京锏帝贴证糠才黄鲸略炯饱四出园犀牧容汉杆浈汰瑷造虫瘩怪驴济应花沣谔夙旅价矿以考su呦晒巡茅准肟瓴詹仟褂译桌混宁怦郑抿些余鄂饴攒珑群阖岔琨藓预环洮岌宀杲瀵最常囡周踊女鼓袭喉简范薯遐疏粱黜禧法箔斤遥汝奥直贞撑置绱集她馅逗钧橱魉[恙躁唤9旺膘待脾惫购吗依盲度瘿蠖俾之镗拇鲵厝簧续款展啃表剔品钻腭损清锶统涌寸滨贪链吠冈伎迥咏吁览防迅失汾阔逵绀蔑列川凭努熨揪利俱绉抢鸨我即责膦易毓鹊刹玷岿空嘞绊排术估锷违们苟铜播肘件烫审鲂广像铌惰铟巳胍鲍康憧色恢想拷尤疳知SYFDA峄裕帮握搔氐氘难墒沮雨叁缥悴藐湫娟苑稠颛簇后阕闭蕤缚怎佞码嘤蔡痊舱螯帕赫昵升烬岫、疵蜻髁蕨隶烛械丑盂梁强鲛由拘揉劭龟撤钩呕孛费妻漂求阑崖秤甘通深补赃坎床啪承吼量暇钼烨阂擎脱逮称P神属矗华届狍葑汹育患窒蛰佼静槎运鳗庆逝曼疱克代官此麸耧蚌晟例础榛副测唰缢迹灬霁身岁赭扛又菡乜雾板读陷徉贯郁虑变钓菜圾现琢式乐维渔浜左吾脑钡警T啵拴偌漱湿硕止骼魄积燥联踢玛|则窿见振畿送班钽您赵刨印讨踝籍谡舌崧汽蔽沪酥绒怖财帖肱私莎勋羔霸励哼帐将帅渠纪婴娩岭厘滕吻伤坝冠戊隆瘁介涧物黍并姗奢蹑掣垸锴命箍捉病辖琰眭迩艘绌繁寅若毋思诉类诈燮轲酮狂重反职筱县委磕绣奖晋濉志徽肠呈獐坻口片碰几村柿劳料获亩惕晕厌号罢池正鏖煨家棕复尝懋蜥锅岛扰队坠瘾钬@卧疣镇譬冰彷频黯据垄采八缪瘫型熹砰楠襁箐但嘶绳啤拍盥穆傲洗盯塘怔筛丿台恒喂葛永¥烟酒桦书砂蚝缉态瀚袄圳轻蛛超榧遛姒奘铮右荽望偻卡丶氰附做革索戚坨桷唁垅榻岐偎坛莨山殊微骇陈爨推嗝驹澡藁呤卤嘻糅逛侵郓酌德摇※鬃被慨殡羸昌泡戛鞋河宪沿玲鲨翅哽源铅语照邯址荃佬顺鸳町霭睾瓢夸椁晓酿痈咔侏券噎湍签嚷离午尚社锤背孟使浪缦潍鞅军姹驶笑鳟鲁》孽钜绿洱礴焯椰颖囔乌孔巴互性椽哞聘昨早暮胶炀隧低彗昝铁呓氽藉喔癖瑗姨权胱韦堑蜜酋楝砝毁靓歙锲究屋喳骨辨碑武鸠宫辜烊适坡殃培佩供走蜈迟翼况姣凛浔吃飘债犟金促苛崇坂莳畔绂兵蠕斋根砍亢欢恬崔剁餐榫快扶‖濒缠鳜当彭驭浦篮昀锆秸钳弋娣瞑夷龛苫拱致%嵊障隐弑初娓抉汩累蓖"唬助苓昙押毙破城郧逢嚏獭瞻溱婿赊跨恼璧萃姻貉灵炉密氛陶砸谬衔点琛沛枳层岱诺脍榈埂征冷裁打蹴素瘘逞蛐聊激腱萘踵飒蓟吆取咙簋涓矩曝挺揣座你史舵焱尘苏笈脚溉榨诵樊邓焊义庶儋蟋蒲赦呷杞诠豪还试颓茉太除紫逃痴草充鳕珉祗墨渭烩蘸慕璇镶穴嵘恶骂险绋幕碉肺戳刘潞秣纾潜銮洛须罘销瘪汞兮屉r林厕质探划狸殚善煊烹〒锈逯宸辍泱柚袍远蹋嶙绝峥娥缍雀徵认镱谷=贩勉撩鄯斐洋非祚泾诒饿撬威晷搭芍锥笺蓦候琊档礁沼卵荠忑朝凹瑞头仪弧孵畏铆突衲车浩气茂悖厢枕酝戴湾邹飚攘锂写宵翁岷无喜丈挑嗟绛殉议槽具醇淞笃郴阅饼底壕砚弈询缕庹翟零筷暨舟闺甯撞麂茌蔼很珲捕棠角阉媛娲诽剿尉爵睬韩诰匣危糍镯立浏阳少盆舔擘匪申尬铣旯抖赘瓯居ˇ哮游锭茏歌坏甚秒舞沙仗劲潺阿燧郭嗖霏忠材奂耐跺砀输岖媳氟极摆灿今扔腻枝奎药熄吨话q额慑嘌协喀壳埭视著於愧陲翌峁颅佛腹聋侯咎叟秀颇存较罪哄岗扫栏钾羌己璨枭霉煌涸衿键镝益岢奏连夯睿冥均糖狞蹊稻爸刿胥煜丽肿璃掸跚灾垂樾濑乎莲窄犹撮战馄软络显鸢胸宾妲恕埔蝌份遇巧瞟粒恰剥桡博讯凯堇阶滤卖斌骚彬兑磺樱舷两娱福仃差找桁÷净把阴污戬雷碓蕲楚罡焖抽妫咒仑闱尽邑菁爱贷沥鞑牡嗉崴骤塌嗦订拮滓捡锻次坪杩臃箬融珂鹗宗枚降鸬妯阄堰盐毅必杨崃俺甬状莘货耸菱腼铸唏痤孚澳懒溅翘疙杷淼缙骰喊悉砻坷艇赁界谤纣宴晃茹归饭梢铡街抄肼鬟苯颂撷戈炒咆茭瘙负仰客琉铢封卑珥椿镧窨鬲寿御袤铃萎砖餮脒裳肪孕嫣馗嵇恳氯江石褶冢祸阻狈羞银靳透咳叼敷芷啥它瓤兰痘懊逑肌往捺坊甩呻〃沦忘膻祟菅剧崆智坯臧霍墅攻眯倘拢骠铐庭岙瓠′缺泥迢捶??郏喙掷沌纯秘种听绘固螨团香盗妒埚蓝拖旱荞铀血遏汲辰叩拽幅硬惶桀漠措泼唑齐肾念酱虚屁耶旗砦闵婉馆拭绅韧忏窝醋葺顾辞倜堆辋逆玟贱疾董惘倌锕淘嘀莽俭笏绑鲷杈择蟀粥嗯驰逾案谪褓胫哩昕颚鲢绠躺鹄崂儒俨丝尕泌啊萸彰幺吟骄苣弦脊瑰〈诛镁析闪剪侧哟框螃守嬗燕狭铈缮概迳痧鲲俯售笼痣扉挖满咋援邱扇歪便玑绦峡蛇叨〖泽胃斓喋怂坟猪该蚬炕弥赞棣晔娠挲狡创疖铕镭稷挫弭啾翔粉履苘哦楼秕铂土锣瘟挣栉习享桢袅磨桂谦延坚蔚噗署谟猬钎恐嬉雒倦衅亏璩睹刻殿王算雕麻丘柯骆丸塍谚添鲈垓桎蚯芥予飕镦谌窗醚菀亮搪莺蒿羁足J真轶悬衷靛翊掩哒炅掐冼妮l谐稚荆擒犯陵虏浓崽刍陌傻孜千靖演矜钕煽杰酗渗伞栋俗泫戍罕沾疽灏煦芬磴叱阱榉湃蜀叉醒彪租郡篷屎良垢隗弱陨峪砷掴颁胎雯绵贬沐撵隘篙暖曹陡栓填臼彦瓶琪潼哪鸡摩啦俟锋域耻蔫疯纹撇毒绶痛酯忍爪赳歆嘹辕烈册朴钱吮毯癜娃谀邵厮炽璞邃丐追词瓒忆轧芫谯喷弟半冕裙掖墉绮寝苔势顷褥切衮君佳嫒蚩霞佚洙逊镖暹唛&殒顶碗獗轭铺蛊废恹汨崩珍那杵曲纺夏薰傀闳淬姘舀拧卷楂恍讪厩寮篪赓乘灭盅鞣沟慎挂饺鼾杳树缨丛絮娌臻嗳篡侩述衰矛圈蚜匕筹匿濞晨叶骋郝挚蚴滞增侍描瓣吖嫦蟒匾圣赌毡癞恺百曳需篓肮庖帏卿驿遗蹬鬓骡歉芎胳屐禽烦晌寄媾狄翡苒船廉终痞殇々畦饶改拆悻萄£瓿乃訾桅匮溧拥纱铍骗蕃龋缬父佐疚栎醍掳蓄x惆颜鲆榆〔猎敌暴谥鲫贾罗玻缄扦芪癣落徒臾恿猩托邴肄牵春陛耀刊拓蓓邳堕寇枉淌啡湄兽酷萼碚濠萤夹旬戮梭琥椭昔勺蜊绐晚孺僵宣摄冽旨萌忙蚤眉噼蟑付契瓜悼颡壁曾窕颢澎仿俑浑嵌浣乍碌褪乱蔟隙玩剐葫箫纲围伐决伙漩瑟刑肓镳缓蹭氨皓典畲坍铑檐塑洞倬储胴淳戾吐灼惺妙毕珐缈虱盖羰鸿磅谓髅娴苴唷蚣霹抨贤唠犬誓逍庠逼麓籼釉呜碧秧氩摔霄穸纨辟妈映完牛缴嗷炊恩荔茆掉紊慌莓羟阙萁磐另蕹辱鳐湮吡吩唐睦垠舒圜冗瞿溺芾囱匠僳汐菩饬漓黑霰浸濡窥毂蒡兢驻鹉芮诙迫雳厂忐臆猴鸣蚪栈箕羡渐莆捍眈哓趴蹼埕嚣骛宏淄斑噜严瑛垃椎诱压庾绞焘廿抡迄棘夫纬锹眨瞌侠脐竞瀑孳骧遁姜颦荪滚萦伪逸粳爬锁矣役趣洒颔诏逐奸甭惠攀蹄泛尼拼阮鹰亚颈惑勒〉际肛爷刚钨丰养冶鲽辉蔻画覆皴妊麦返醉皂擀〗酶凑粹悟诀硖港卜z杀涕±舍铠抵弛段敝镐奠拂轴跛袱et沉菇俎薪峦秭蟹历盟菠寡液肢喻染裱悱抱氙赤捅猛跑氮谣仁尺辊窍烙衍架擦倏璐瑁币楞胖夔趸邛惴饕虔蝎§哉贝宽辫炮扩饲籽魏菟锰伍猝末琳哚蛎邂呀姿鄞却歧仙恸椐森牒寤袒婆虢雅钉朵贼欲苞寰故龚坭嘘咫礼硷兀睢汶’铲烧绕诃浃钿哺柜讼颊璁腔洽咐脲簌筠镣玮鞠谁兼姆挥梯蝴谘漕刷躏宦弼b垌劈麟莉揭笙渎仕嗤仓配怏抬错泯镊孰猿邪仍秋鼬壹歇吵炼<尧射柬廷胧霾凳隋肚浮梦祥株堵退L鹫跎凶毽荟炫栩玳甜沂鹿顽伯爹赔蛴徐匡欣狰缸雹蟆疤默沤啜痂衣禅wih辽葳黝钗停沽棒馨颌肉吴硫悯劾娈马啧吊悌镑峭帆瀣涉咸疸滋泣翦拙癸钥蜒+尾庄凝泉婢渴谊乞陆锉糊鸦淮IBN晦弗乔庥葡尻席橡傣渣拿惩麋斛缃矮蛏岘鸽姐膏催奔镒喱蠡摧钯胤柠拐璋鸥卢荡倾^_珀逄萧塾掇贮笆聂圃冲嵬M滔笕值炙偶蜱搐梆汪蔬腑鸯蹇敞绯仨祯谆梧糗鑫啸豺囹猾巢柄瀛筑踌沭暗苁鱿蹉脂蘖牢热木吸溃宠序泞偿拜檩厚朐毗螳吞媚朽担蝗橘畴祈糟盱隼郜惜珠裨铵焙琚唯咚噪骊丫滢勤棉呸咣淀隔蕾窈饨挨煅短匙粕镜赣撕墩酬馁豌颐抗酣氓佑搁哭递耷涡桃贻碣截瘦昭镌蔓氚甲猕蕴蓬散拾纛狼猷铎埋旖矾讳囊糜迈粟蚂紧鲳瘢栽稼羊锄斟睁桥瓮蹙祉醺鼻昱剃跳篱跷蒜翎宅晖嗑壑峻癫屏狠陋袜途憎祀莹滟佶溥臣约盛峰磁慵婪拦莅朕鹦粲裤哎疡嫖琵窟堪谛嘉儡鳝斩郾驸酊妄胜贺徙傅噌钢栅庇恋匝巯邈尸锚粗佟蛟薹纵蚊郅绢锐苗俞篆淆膀鲜煎诶秽寻涮刺怀噶巨褰魅灶灌桉藕谜舸薄搀恽借牯痉渥愿亓耘杠柩锔蚶钣珈喘蹒幽赐稗晤莱泔扯肯菪裆腩豉疆骜腐倭珏唔粮亡润慰伽橄玄誉醐胆龊粼塬陇彼削嗣绾芽妗垭瘴爽薏寨龈泠弹赢漪猫嘧涂恤圭茧烽屑痕巾赖荸凰腮畈亵蹲偃苇澜艮换骺烘苕梓颉肇哗悄氤涠葬屠鹭植竺佯诣鲇瘀鲅邦移滁冯耕癔戌茬沁巩悠湘洪痹锟循谋腕鳃钠捞焉迎碱伫急榷奈邝卯辄皲卟醛畹忧稳雄昼缩阈睑扌耗曦涅捏瞧邕淖漉铝耦禹湛喽莼琅诸苎纂硅始嗨傥燃臂赅嘈呆贵屹壮肋亍蚀卅豹腆邬迭浊}童螂捐圩勐触寞汊壤荫膺渌芳懿遴螈泰蓼蛤茜舅枫朔膝眙避梅判鹜璜牍缅垫藻黔侥惚懂踩腰腈札丞唾慈顿摹荻琬~斧沈滂胁胀幄莜Z匀鄄掌绰茎焚赋萱谑汁铒瞎夺蜗野娆冀弯篁懵灞隽芡脘俐辩芯掺喏膈蝈觐悚踹蔗熠鼠呵抓橼峨畜缔禾崭弃熊摒凸拗穹蒙抒祛劝闫扳阵醌踪喵侣搬仅荧赎蝾琦买婧瞄寓皎冻赝箩莫瞰郊笫姝筒枪遣煸袋舆痱涛母〇启践耙绲盘遂昊搞槿诬纰泓惨檬亻越Co憩熵祷钒暧塔阗胰咄娶魔琶钞邻扬杉殴咽弓〆髻】吭揽霆拄殖脆彻岩芝勃辣剌钝嘎甄佘皖伦授徕憔挪皇庞稔芜踏溴兖卒擢饥鳞煲‰账颗叻斯捧鳍琮讹蛙纽谭酸兔莒睇伟觑羲嗜宜褐旎辛卦诘筋鎏溪挛熔阜晰鳅丢奚灸呱献陉黛鸪甾萨疮拯洲疹辑叙恻谒允柔烂氏逅漆拎惋扈湟纭啕掬擞哥忽涤鸵靡郗瓷扁廊怨雏钮敦E懦憋汀拚啉腌岸f痼瞅尊咀眩飙忌仝迦熬毫胯篑茄腺凄舛碴锵诧羯後漏汤宓仞蚁壶谰皑铄棰罔辅晶苦牟闽\烃饮聿丙蛳朱煤涔鳖犁罐荼砒淦妤黏戎孑婕瑾戢钵枣捋砥衩狙桠稣阎肃梏诫孪昶婊衫嗔侃塞蜃樵峒貌屿欺缫阐栖诟珞荭吝萍嗽恂啻蜴磬峋俸豫谎徊镍韬魇晴U囟猜蛮坐囿伴亭肝佗蝠妃胞滩榴氖垩苋砣扪馏姓轩厉夥侈禀垒岑赏钛辐痔披纸碳“坞蠓挤荥沅悔铧帼蒌蝇apyng哀浆瑶凿桶馈皮奴苜佤伶晗铱炬优弊氢恃甫攥端锌灰稹炝曙邋亥眶碾拉萝绔捷浍腋姑菖凌涞麽锢桨潢绎镰殆锑渝铬困绽觎匈糙暑裹鸟盔肽迷綦『亳佝俘钴觇骥仆疝跪婶郯瀹唉脖踞针晾忒扼瞩叛椒疟嗡邗肆跆玫忡捣咧唆艄蘑潦笛阚沸泻掊菽贫斥髂孢镂赂麝鸾屡衬苷恪叠希粤爻喝茫惬郸绻庸撅碟宄妹膛叮饵崛嗲椅冤搅咕敛尹垦闷蝉霎勰败蓑泸肤鹌幌焦浠鞍刁舰乙竿裔。茵函伊兄丨娜匍謇莪宥似蝽翳酪翠粑薇祢骏赠叫Q噤噻竖芗莠潭俊羿耜O郫趁嗪囚蹶芒洁笋鹑敲硝啶堡渲揩』携宿遒颍扭棱割萜蔸葵琴捂饰衙耿掠募岂窖涟蔺瘤柞瞪怜匹距楔炜哆秦缎幼茁绪痨恨楸娅瓦桩雪嬴伏榔妥铿拌眠雍缇‘卓搓哌觞噩屈哧髓咦巅娑侑淫膳祝勾姊莴胄疃薛蜷胛巷芙芋熙闰勿窃狱剩钏幢陟铛慧靴耍k浙浇飨惟绗祜澈啼咪磷摞诅郦抹跃壬吕肖琏颤尴剡抠凋赚泊津宕殷倔氲漫邺涎怠$垮荬遵俏叹噢饽蜘孙筵疼鞭羧牦箭潴c眸祭髯啖坳愁芩驮倡巽穰沃胚怒凤槛剂趵嫁v邢灯鄢桐睽檗锯槟婷嵋圻诗蕈颠遭痢芸怯馥竭锗徜恭遍籁剑嘱苡龄僧桑潸弘澶楹悲讫愤腥悸谍椹呢桓葭攫阀翰躲敖柑郎笨橇呃魁燎脓葩磋垛玺狮沓砜蕊锺罹蕉翱虐闾巫旦茱嬷枯鹏贡芹汛矫绁拣禺佃讣舫惯乳趋疲挽岚虾衾蠹蹂飓氦铖孩稞瑜壅掀勘妓畅髋W庐牲蓿榕练垣唱邸菲昆婺穿绡麒蚱掂愚泷涪漳妩娉榄讷觅旧藤煮呛柳腓叭庵烷阡罂蜕擂猖咿媲脉【沏貅黠熏哲烁坦酵兜×潇撒剽珩圹乾摸樟帽嗒襄魂轿憬锡〕喃皆咖隅脸残泮袂鹂珊囤捆咤误徨闹淙芊淋怆囗拨梳渤RG绨蚓婀幡狩麾谢唢裸旌伉纶裂驳砼咛澄樨蹈宙澍倍貔操勇蟠摈砧虬够缁悦藿撸艹摁淹豇虎榭ˉ吱d°喧荀踱侮奋偕饷犍惮坑璎徘宛妆袈倩窦昂荏乖K怅撰鳙牙袁酞X痿琼闸雁趾荚虻涝《杏韭偈烤绫鞘卉症遢蓥诋杭荨匆竣簪辙敕虞丹缭咩黟m淤瑕咂铉硼茨嶂痒畸敬涿粪窘熟叔嫔盾忱裘憾梵赡珙咯娘庙溯胺葱痪摊荷卞乒髦寐铭坩胗枷爆溟嚼羚砬轨惊挠罄竽菏氧浅楣盼枢炸阆杯谏噬淇渺俪秆墓泪跻砌痰垡渡耽釜讶鳎煞呗韶舶绷鹳缜旷铊皱龌檀霖奄槐艳蝶旋哝赶骞蚧腊盈丁`蜚矸蝙睨嚓僻鬼醴夜彝磊笔拔栀糕厦邰纫逭纤眦膊馍躇烯蘼冬诤暄骶哑瘠」臊丕愈咱螺擅跋搏硪谄笠淡嘿骅谧鼎皋姚歼蠢驼耳胬挝涯狗蒽孓犷凉芦箴铤孤嘛坤V茴朦挞尖橙诞搴碇洵浚帚蜍漯柘嚎讽芭荤咻祠秉跖埃吓糯眷馒惹娼鲑嫩讴轮瞥靶褚乏缤宋帧删驱碎扑俩俄偏涣竹噱皙佰渚唧斡#镉刀崎筐佣夭贰肴峙哔艿匐牺镛缘仡嫡劣枸堀梨簿鸭蒸亦稽浴{衢束槲j阁揍疥棋潋聪窜乓睛插冉阪苍搽「蟾螟幸仇樽撂慢跤幔俚淅覃觊溶妖帛侨曰妾泗' -# -*- coding: utf-8 -*- -""" -Created on Tue Aug 22 19:41:55 2017 - -@author: Quantum Liu -""" -''' -Example: -gm=GPUManager() -with torch.cuda.device(gm.auto_choice()): - blabla - -Or: -gm=GPUManager() -torch.cuda.set_device(gm.auto_choice()) -''' - - - - -import os -import torch -def check_gpus(): - ''' - GPU available check - http://pytorch-cn.readthedocs.io/zh/latest/package_references/torch-cuda/ - ''' - if not torch.cuda.is_available(): - print('This script could only be used to manage NVIDIA GPUs,but no GPU found in your device') - return False - elif not 'NVIDIA System Management' in os.popen('nvidia-smi -h').read(): - print("'nvidia-smi' tool not found.") - return False - return True - - -if check_gpus(): - def parse(line, qargs): - ''' - line: - a line of text - qargs: - query arguments - return: - a dict of gpu infos - Pasing a line of csv format text returned by nvidia-smi - 解析一行nvidia-smi返回的csv格式文本 - ''' - numberic_args = ['memory.free', 'memory.total', - 'power.draw', 'power.limit'] # 可计数的参数 - - # lambda表达式,显卡是否滋瓷power management(笔记本可能不滋瓷) - def power_manage_enable(v): return (not 'Not Support' in v) - def to_numberic(v): return float(v.upper().strip().replace( - 'MIB', '').replace('W', '')) # 带单位字符串去掉单位 - def process(k, v): return ((int(to_numberic(v)) if power_manage_enable( - v) else 1) if k in numberic_args else v.strip()) - return {k: process(k, v) for k, v in zip(qargs, line.strip().split(','))} - - def query_gpu(qargs=[]): - ''' - qargs: - query arguments - return: - a list of dict - Querying GPUs infos - 查询GPU信息 - ''' - qargs = ['index', 'gpu_name', 'memory.free', - 'memory.total', 'power.draw', 'power.limit'] + qargs - cmd = 'nvidia-smi --query-gpu={} --format=csv,noheader'.format( - ','.join(qargs)) - results = os.popen(cmd).readlines() - return [parse(line, qargs) for line in results] - - def by_power(d): - ''' - helper function fo sorting gpus by power - ''' - power_infos = (d['power.draw'], d['power.limit']) - if any(v == 1 for v in power_infos): - print('Power management unable for GPU {}'.format(d['index'])) - return 1 - return float(d['power.draw'])/d['power.limit'] - - class GPUManager(): - ''' - qargs: - query arguments - A manager which can list all available GPU devices - and sort them and choice the most free one.Unspecified - ones pref. - GPU设备管理器,考虑列举出所有可用GPU设备,并加以排序,自动选出 - 最空闲的设备。在一个GPUManager对象内会记录每个GPU是否已被指定, - 优先选择未指定的GPU。 - ''' - - def __init__(self, qargs=[]): - ''' - ''' - self.qargs = qargs - self.gpus = query_gpu(qargs) - for gpu in self.gpus: - gpu['specified'] = False - self.gpu_num = len(self.gpus) - - def _sort_by_memory(self, gpus, by_size=False): - if by_size: - print('Sorted by free memory size') - return sorted(gpus, key=lambda d: d['memory.free'], reverse=True) - else: - print('Sorted by free memory rate') - return sorted(gpus, key=lambda d: float(d['memory.free']) / d['memory.total'], reverse=True) - - def _sort_by_power(self, gpus): - return sorted(gpus, key=by_power) - - def _sort_by_custom(self, gpus, key, reverse=False, qargs=[]): - if isinstance(key, str) and (key in qargs): - return sorted(gpus, key=lambda d: d[key], reverse=reverse) - if isinstance(key, type(lambda a: a)): - return sorted(gpus, key=key, reverse=reverse) - raise ValueError( - "The argument 'key' must be a function or a key in query args,please read the documention of nvidia-smi") - - def auto_choice(self, mode=0): - ''' - mode: - 0:(default)sorted by free memory size - return: - a TF device object - Auto choice the freest GPU device,not specified - ones - 自动选择最空闲GPU,返回索引 - ''' - for old_infos, new_infos in zip(self.gpus, query_gpu(self.qargs)): - old_infos.update(new_infos) - unspecified_gpus = [ - gpu for gpu in self.gpus if not gpu['specified']] or self.gpus - - if mode == 0: - print('Choosing the GPU device has largest free memory...') - chosen_gpu = self._sort_by_memory(unspecified_gpus, True)[0] - elif mode == 1: - print('Choosing the GPU device has highest free memory rate...') - chosen_gpu = self._sort_by_power(unspecified_gpus)[0] - elif mode == 2: - print('Choosing the GPU device by power...') - chosen_gpu = self._sort_by_power(unspecified_gpus)[0] - else: - print('Given an unaviliable mode,will be chosen by memory') - chosen_gpu = self._sort_by_memory(unspecified_gpus)[0] - chosen_gpu['specified'] = True - index = chosen_gpu['index'] - print('Using GPU {i}:\n{info}'.format(i=index, info='\n'.join( - [str(k)+':'+str(v) for k, v in chosen_gpu.items()]))) - return int(index) -else: - raise ImportError('GPU available check failed') -#!/usr/bin/python -# encoding: utf-8 - -import collections - -import torch -import torch.nn as nn - - -class strLabelConverter(object): - def __init__(self, alphabet): - self.alphabet = alphabet + u'-' # for `-1` index - - self.dict = {} - for i, char in enumerate(alphabet): - # NOTE: 0 is reserved for 'blank' required by wrap_ctc - self.dict[char] = i + 1 - - def encode(self, text, depth=0): - """Support batch or single str.""" - if isinstance(text, str): - text = [self.dict[char.lower()] for char in text] - length = [len(text)] - - if isinstance(text, str): - text = [self.dict.get(char, 0) for char in text] - length = [len(text)] - # add for unicode - # elif isinstance(text, unicode): - # text = [self.dict.get(char, self.dict[u'-']) for char in text] - # length = [len(text)] - - elif isinstance(text, collections.Iterable): - length = [len(text)] - text = ''.join(str(v) for v in text) - text, _ = self.encode(text) - - if depth: - return text, len(text) - return (torch.IntTensor(text), torch.IntTensor(length)) - - def decode(self, t, length, raw=False): - if length.numel() == 1: - length = length[0] - t = t[:length] - if raw: - return ''.join([self.alphabet[i - 1] for i in t]) - else: - char_list = [] - for i in range(length): - if t[i] != 0 and (not (i > 0 and t[i - 1] == t[i])): - char_list.append(self.alphabet[t[i] - 1]) - return ''.join(char_list) - else: - texts = [] - index = 0 - for i in range(length.numel()): - l = length[i] - texts.append( - self.decode( - t[index:index + l], torch.IntTensor([l]), raw=raw)) - index += l - return texts - - -class averager(object): - def __init__(self): - self.reset() - - def add(self, v): - self.n_count += v.data.numel() - # NOTE: not `+= v.sum()`, which will add a node in the compute graph, - # which lead to memory leak - self.sum += v.data.sum() - - def reset(self): - self.n_count = 0 - self.sum = 0 - - def val(self): - res = 0 - if self.n_count != 0: - res = self.sum / float(self.n_count) - return res - - -def oneHot(v, v_length, nc): - batchSize = v_length.size(0) - maxLength = v_length.max() - v_onehot = torch.FloatTensor(batchSize, maxLength, nc).fill_(0) - acc = 0 - for i in range(batchSize): - length = v_length[i] - label = v[acc:acc + length].view(-1, 1).long() - v_onehot[i, :length].scatter_(1, label, 1.0) - acc += length - return v_onehot - - -def loadData(v, data): - v.data.resize_(data.size()).copy_(data) - - -def prettyPrint(v): - print('Size {0}, Type: {1}'.format(str(v.size()), v.data.type())) - print('| Max: %f | Min: %f | Mean: %f' % (v.max().data[0], v.min().data[0], - v.mean().data[0])) - - -def assureRatio(img): - """Ensure imgH <= imgW.""" - b, c, h, w = img.size() - if h > w: - main = nn.UpsamplingBilinear2d(size=(h, h), scale_factor=None) - img = main(img) - return img -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# _Author_: xiaofeng -# Date: 2018-04-08 14:41:12 -# Last Modified by: xiaofeng -# Last Modified time: 2018-04-08 14:41:12 - -from .imdb import imdb -# from pascal_voc import pascal_voc -from .pascal_voc import pascal_voc -from . import factory - - -def _which(program): - import os - - def is_exe(fpath): - return os.path.isfile(fpath) and os.access(fpath, os.X_OK) - - fpath, fname = os.path.split(program) - if fpath: - if is_exe(program): - return program - else: - for path in os.environ["PATH"].split(os.pathsep): - path = path.strip('"') - exe_file = os.path.join(path, program) - if is_exe(exe_file): - return exe_file - - return None -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# _Author_: xiaofeng -# Date: 2018-04-08 14:46:05 -# Last Modified by: xiaofeng -# Last Modified time: 2018-04-08 14:46:05 - -import numpy as np - - -def unique_boxes(boxes, scale=1.0): - """Return indices of unique boxes.""" - v = np.array([1, 1e3, 1e6, 1e9]) - hashes = np.round(boxes * scale).dot(v) - _, index = np.unique(hashes, return_index=True) - return np.sort(index) - - -def xywh_to_xyxy(boxes): - """Convert [x y w h] box format to [x1 y1 x2 y2] format.""" - return np.hstack((boxes[:, 0:2], boxes[:, 0:2] + boxes[:, 2:4] - 1)) - - -def xyxy_to_xywh(boxes): - """Convert [x1 y1 x2 y2] box format to [x y w h] format.""" - return np.hstack((boxes[:, 0:2], boxes[:, 2:4] - boxes[:, 0:2] + 1)) - - -def validate_boxes(boxes, width=0, height=0): - """Check that a set of boxes are valid.""" - x1 = boxes[:, 0] - y1 = boxes[:, 1] - x2 = boxes[:, 2] - y2 = boxes[:, 3] - assert (x1 >= 0).all() - assert (y1 >= 0).all() - assert (x2 >= x1).all() - assert (y2 >= y1).all() - assert (x2 < width).all() - assert (y2 < height).all() - - -def filter_small_boxes(boxes, min_size): - w = boxes[:, 2] - boxes[:, 0] - h = boxes[:, 3] - boxes[:, 1] - keep = np.where((w >= min_size) & (h > min_size))[0] - return keep -from .pascal_voc import pascal_voc -__sets = {} - - -def _selective_search_IJCV_top_k(split, year, top_k): - imdb = pascal_voc(split, year) - imdb.roidb_handler = imdb.selective_search_IJCV_roidb - imdb.config['top_k'] = top_k - return imdb - - -# Set up voc__ using selective search "fast" mode -for year in ['2007', '2012', '0712']: - for split in ['train', 'val', 'trainval', 'test']: - name = 'voc_{}_{}'.format(year, split) - # __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) - __sets[name] = (lambda split=split, year=year: pascal_voc(split, year)) - - -def get_imdb(name): - """Get an imdb (image database) by name.""" - # print('__Sets', __sets) - if name not in __sets: - raise KeyError('Unknown dataset: {}'.format(name)) - return __sets[name]() - - -def list_imdbs(): - """List all registered imdbs.""" - return list(__sets.keys()) -import os -import os.path as osp -import PIL -import numpy as np -import scipy.sparse - -from ..utils.bbox import bbox_overlaps -from ..fast_rcnn.config import cfg - - -class imdb(object): - def __init__(self, name): - self._name = name - self._num_classes = 0 - self._classes = [] - self._image_index = [] - self._obj_proposer = 'selective_search' - self._roidb = None - print(self.default_roidb) - self._roidb_handler = self.default_roidb - # Use this dict for storing dataset specific config options - self.config = {} - - @property - def name(self): - return self._name - - @property - def num_classes(self): - return len(self._classes) - - @property - def classes(self): - return self._classes - - @property - def image_index(self): - return self._image_index - - @property - def roidb_handler(self): - return self._roidb_handler - - @roidb_handler.setter - def roidb_handler(self, val): - self._roidb_handler = val - - def set_proposal_method(self, method): - method = eval('self.' + method + '_roidb') - self.roidb_handler = method - - @property - def roidb(self): - # A roidb is a list of dictionaries, each with the following keys: - # boxes - # gt_overlaps - # gt_classes - # flipped - if self._roidb is not None: - return self._roidb - self._roidb = self.roidb_handler() - return self._roidb - - @property - def cache_path(self): - cache_path = osp.abspath(osp.join(cfg.DATA_DIR, 'cache')) - if not os.path.exists(cache_path): - os.makedirs(cache_path) - return cache_path - - @property - def num_images(self): - return len(self.image_index) - - def image_path_at(self, i): - raise NotImplementedError - - def default_roidb(self): - raise NotImplementedError - - def _get_widths(self): - return [ - PIL.Image.open(self.image_path_at(i)).size[0] - for i in range(self.num_images) - ] - - def append_flipped_images(self): - num_images = self.num_images - widths = self._get_widths() - for i in range(num_images): - boxes = self.roidb[i]['boxes'].copy() - oldx1 = boxes[:, 0].copy() - oldx2 = boxes[:, 2].copy() - boxes[:, 0] = widths[i] - oldx2 - 1 - boxes[:, 2] = widths[i] - oldx1 - 1 - for b in range(len(boxes)): - if boxes[b][2] < boxes[b][0]: - boxes[b][0] = 0 - assert (boxes[:, 2] >= boxes[:, 0]).all() - entry = { - 'boxes': boxes, - 'gt_overlaps': self.roidb[i]['gt_overlaps'], - 'gt_classes': self.roidb[i]['gt_classes'], - 'flipped': True - } - - if 'gt_ishard' in self.roidb[i] and 'dontcare_areas' in self.roidb[i]: - entry['gt_ishard'] = self.roidb[i]['gt_ishard'].copy() - dontcare_areas = self.roidb[i]['dontcare_areas'].copy() - oldx1 = dontcare_areas[:, 0].copy() - oldx2 = dontcare_areas[:, 2].copy() - dontcare_areas[:, 0] = widths[i] - oldx2 - 1 - dontcare_areas[:, 2] = widths[i] - oldx1 - 1 - entry['dontcare_areas'] = dontcare_areas - - self.roidb.append(entry) - - self._image_index = self._image_index * 2 - - def create_roidb_from_box_list(self, box_list, gt_roidb): - assert len(box_list) == self.num_images, \ - 'Number of boxes must match number of ground-truth images' - roidb = [] - for i in range(self.num_images): - boxes = box_list[i] - num_boxes = boxes.shape[0] - overlaps = np.zeros( - (num_boxes, self.num_classes), dtype=np.float32) - - if gt_roidb is not None and gt_roidb[i]['boxes'].size > 0: - gt_boxes = gt_roidb[i]['boxes'] - gt_classes = gt_roidb[i]['gt_classes'] - gt_overlaps = bbox_overlaps( - boxes.astype(np.float), gt_boxes.astype(np.float)) - argmaxes = gt_overlaps.argmax(axis=1) - maxes = gt_overlaps.max(axis=1) - I = np.where(maxes > 0)[0] - overlaps[I, gt_classes[argmaxes[I]]] = maxes[I] - - overlaps = scipy.sparse.csr_matrix(overlaps) - roidb.append({ - 'boxes': - boxes, - 'gt_classes': - np.zeros((num_boxes, ), dtype=np.int32), - 'gt_overlaps': - overlaps, - 'flipped': - False, - 'seg_areas': - np.zeros((num_boxes, ), dtype=np.float32), - }) - return roidb - - @staticmethod - def merge_roidbs(a, b): - assert len(a) == len(b) - for i in range(len(a)): - a[i]['boxes'] = np.vstack((a[i]['boxes'], b[i]['boxes'])) - a[i]['gt_classes'] = np.hstack((a[i]['gt_classes'], - b[i]['gt_classes'])) - a[i]['gt_overlaps'] = scipy.sparse.vstack( - [a[i]['gt_overlaps'], b[i]['gt_overlaps']]) - a[i]['seg_areas'] = np.hstack((a[i]['seg_areas'], - b[i]['seg_areas'])) - return a -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# _Author_: xiaofeng -# Date: 2018-04-08 14:40:30 -# Last Modified by: xiaofeng -# Last Modified time: 2018-04-08 14:40:30 - -from ..fast_rcnn.config import cfg -from .ds_utils import * -from .imdb import imdb -import xml.etree.ElementTree as ET -import scipy.io as sio -import uuid -import os -import sys -import numpy as np -import scipy.sparse -parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, parentdir) -try: - import cPickle as pickle -except: - import pickle -#import pickle - - -class pascal_voc(imdb): - def __init__(self, image_set, year, devkit_path=None): - imdb.__init__(self, 'voc_' + year + '_' + image_set) - self._year = year - self._image_set = image_set - # 生成数据集的根目录 - self._devkit_path = self._get_default_path() if devkit_path is None \ - else devkit_path - # 将数据文件保存在仓库之外的位置 - # self._devkit_path = '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/ctpn/VOCdevkit2007' - self._devkit_path = '/home/xiaofeng/data/ctpn/VOCdevkit2007' - # 得到数据集的目录 - self._data_path = os.path.join(self._devkit_path, 'VOC' + self._year) - - self._classes = ( - '__background__', # always index 0 - 'text') - - self._class_to_ind = dict( - list(zip(self.classes, list(range(self.num_classes))))) - self._image_ext = '.jpg' - # 读取数据集中的txt文件,得到对应的图片的索引 - self._image_index = self._load_image_set_index() - # Default to roidb handler - #self._roidb_handler = self.selective_search_roidb - self._roidb_handler = self.gt_roidb - self._salt = str(uuid.uuid4()) - self._comp_id = 'comp4' - - # PASCAL specific config options - self.config = { - 'cleanup': True, - 'use_salt': True, - 'use_diff': False, - 'matlab_eval': False, - 'rpn_file': None, - 'min_size': 2 - } - - assert os.path.exists(self._devkit_path), \ - 'VOCdevkit path does not exist: {}'.format(self._devkit_path) - assert os.path.exists(self._data_path), \ - 'Path does not exist: {}'.format(self._data_path) - - def image_path_at(self, i): - """ - Return the absolute path to image i in the image sequence. - """ - return self.image_path_from_index(self._image_index[i]) - - def image_path_from_index(self, index): - """ - Construct an image path from the image's "index" identifier. - """ - image_path = os.path.join(self._data_path, 'JPEGImages', - index + self._image_ext) - assert os.path.exists(image_path), \ - 'Path does not exist: {}'.format(image_path) - return image_path - - def _load_image_set_index(self): - """ - Load the indexes listed in this dataset's image set file. - """ - # Example path to image set file: - # self._devkit_path + /VOCdevkit2007/VOC2007/ImageSets/Main/val.txt - image_set_file = os.path.join(self._data_path, 'ImageSets', 'Main', - self._image_set + '.txt') - assert os.path.exists(image_set_file), \ - 'Path does not exist: {}'.format(image_set_file) - with open(image_set_file) as f: - image_index = [x.strip() for x in f.readlines()] - return image_index - - def _get_default_path(self): - """ - Return the default path where PASCAL VOC is expected to be installed. - """ - return os.path.join(cfg.DATA_DIR, 'VOCdevkit' + self._year) - - def gt_roidb(self): - """ - Return the database of ground-truth regions of interest. - This function loads/saves from/to a cache file to speed up future calls. - """ - # name 是指定的要读取的数据集的字符串 - cache_file = os.path.join(self.cache_path, self.name + '_gt_roidb.pkl') - # 如果catch存在则直接读取catch - if os.path.exists(cache_file): - with open(cache_file, 'rb') as fid: - roidb = pickle.load(fid) - print('{} gt roidb loaded from {}'.format(self.name, cache_file)) - return roidb - - gt_roidb = [ - self._load_pascal_annotation(index) for index in self.image_index - ] - with open(cache_file, 'wb') as fid: - pickle.dump(gt_roidb, fid, pickle.HIGHEST_PROTOCOL) - print('wrote gt roidb to {}'.format(cache_file)) - - return gt_roidb - - def selective_search_roidb(self): - """ - Return the database of selective search regions of interest. - Ground-truth ROIs are also included. - - This function loads/saves from/to a cache file to speed up future calls. - """ - cache_file = os.path.join(self.cache_path, - self.name + '_selective_search_roidb.pkl') - - if os.path.exists(cache_file): - with open(cache_file, 'rb') as fid: - roidb = pickle.load(fid) - print('{} ss roidb loaded from {}'.format(self.name, cache_file)) - return roidb - - if int(self._year) == 2007 or self._image_set != 'test': - gt_roidb = self.gt_roidb() - ss_roidb = self._load_selective_search_roidb(gt_roidb) - roidb = imdb.merge_roidbs(gt_roidb, ss_roidb) - else: - roidb = self._load_selective_search_roidb(None) - with open(cache_file, 'wb') as fid: - pickle.dump(roidb, fid, pickle.HIGHEST_PROTOCOL) - print('wrote ss roidb to {}'.format(cache_file)) - - return roidb - - def rpn_roidb(self): - if int(self._year) == 2007 or self._image_set != 'test': - gt_roidb = self.gt_roidb() - rpn_roidb = self._load_rpn_roidb(gt_roidb) - roidb = imdb.merge_roidbs(gt_roidb, rpn_roidb) - else: - roidb = self._load_rpn_roidb(None) - - return roidb - - def _load_rpn_roidb(self, gt_roidb): - filename = self.config['rpn_file'] - print('loading {}'.format(filename)) - assert os.path.exists(filename), \ - 'rpn data not found at: {}'.format(filename) - with open(filename, 'rb') as f: - box_list = pickle.load(f) - return self.create_roidb_from_box_list(box_list, gt_roidb) - - def _load_selective_search_roidb(self, gt_roidb): - filename = os.path.abspath( - os.path.join(cfg.DATA_DIR, 'selective_search_data', - self.name + '.mat')) - assert os.path.exists(filename), \ - 'Selective search data not found at: {}'.format(filename) - raw_data = sio.loadmat(filename)['boxes'].ravel() - - box_list = [] - for i in range(raw_data.shape[0]): - boxes = raw_data[i][:, (1, 0, 3, 2)] - 1 - keep = unique_boxes(boxes) - boxes = boxes[keep, :] - keep = filter_small_boxes(boxes, self.config['min_size']) - boxes = boxes[keep, :] - box_list.append(boxes) - - return self.create_roidb_from_box_list(box_list, gt_roidb) - - def _load_pascal_annotation(self, index): - """ - Load image and bounding boxes info from XML file in the PASCAL VOC - format. - """ - filename = os.path.join(self._data_path, 'Annotations', index + '.xml') - tree = ET.parse(filename) - objs = tree.findall('object') - num_objs = len(objs) - - boxes = np.zeros((num_objs, 4), dtype=np.uint16) - gt_classes = np.zeros((num_objs), dtype=np.int32) - overlaps = np.zeros((num_objs, self.num_classes), dtype=np.float32) - # "Seg" area for pascal is just the box area - seg_areas = np.zeros((num_objs), dtype=np.float32) - ishards = np.zeros((num_objs), dtype=np.int32) - - # objs = diff_objs(or non_diff_objs) - # ignore any objects with classes except the classes we are looking for - - # cls_objs = [ - # obj for obj in objs if obj.find('name').text in self._classes - # ] - # objs = cls_objs - - #### - - # Load object bounding boxes into a data frame. - for ix, obj in enumerate(objs): - bbox = obj.find('bndbox') - # Make pixel indexes 0-based - x1 = float(bbox.find('xmin').text) - y1 = float(bbox.find('ymin').text) - x2 = float(bbox.find('xmax').text) - y2 = float(bbox.find('ymax').text) - ''' - x1 = float(bbox.find('xmin').text) - 1 - y1 = float(bbox.find('ymin').text) - 1 - x2 = float(bbox.find('xmax').text) - 1 - y2 = float(bbox.find('ymax').text) - 1 - ''' - diffc = obj.find('difficult') - difficult = 0 if diffc == None else int(diffc.text) - ishards[ix] = difficult - - cls = self._class_to_ind[obj.find('name').text.lower().strip()] - # cls = self._class_to_ind[obj.find('name').text] - - boxes[ix, :] = [x1, y1, x2, y2] - gt_classes[ix] = cls - overlaps[ix, cls] = 1.0 - seg_areas[ix] = (x2 - x1 + 1) * (y2 - y1 + 1) - - overlaps = scipy.sparse.csr_matrix(overlaps) - - return { - 'boxes': boxes, - 'gt_classes': gt_classes, - 'gt_ishard': ishards, - 'gt_overlaps': overlaps, - 'flipped': False, - 'seg_areas': seg_areas - } - - def _get_comp_id(self): - comp_id = (self._comp_id + '_' + self._salt - if self.config['use_salt'] else self._comp_id) - return comp_id - - def _get_voc_results_file_template(self): - filename = self._get_comp_id() + '_det_' + \ - self._image_set + '_{:s}.txt' - filedir = os.path.join(self._devkit_path, 'results', - 'VOC' + self._year, 'Main') - if not os.path.exists(filedir): - os.makedirs(filedir) - path = os.path.join(filedir, filename) - return path - - def _write_voc_results_file(self, all_boxes): - for cls_ind, cls in enumerate(self.classes): - if cls == '__background__': - continue - print('Writing {} VOC results file'.format(cls)) - filename = self._get_voc_results_file_template().format(cls) - with open(filename, 'wt') as f: - for im_ind, index in enumerate(self.image_index): - dets = all_boxes[cls_ind][im_ind] - if dets == []: - continue - # the VOCdevkit expects 1-based indices - for k in range(dets.shape[0]): - f.write( - '{:s} {:.3f} {:.1f} {:.1f} {:.1f} {:.1f}\n'.format( - index, dets[k, -1], dets[k, 0] + 1, - dets[k, 1] + 1, dets[k, 2] + 1, - dets[k, 3] + 1)) - - -if __name__ == '__main__': - d = pascal_voc('trainval', '2007') - res = d.roidb - from IPython import embed - embed() -from . import config -from . import nms_wrapper -from . import test -from . import train -import numpy as np - -# tinanjia - - -def bbox_transform(ex_rois, gt_rois): - """ - computes the distance from ground-truth boxes to the given boxes, normed by their size - :param ex_rois: n * 4 numpy array, given boxes - :param gt_rois: n * 4 numpy array, ground-truth boxes - :return: deltas: n * 4 numpy array, ground-truth boxes - """ - ex_widths = ex_rois[:, 2] - ex_rois[:, 0] + 1.0 - ex_heights = ex_rois[:, 3] - ex_rois[:, 1] + 1.0 - ex_ctr_x = ex_rois[:, 0] + 0.5 * ex_widths - ex_ctr_y = ex_rois[:, 1] + 0.5 * ex_heights - - assert np.min(ex_widths) > 0.1 and np.min(ex_heights) > 0.1, \ - 'Invalid boxes found: {} {}'. \ - format(ex_rois[np.argmin(ex_widths), :], - ex_rois[np.argmin(ex_heights), :]) - - gt_widths = gt_rois[:, 2] - gt_rois[:, 0] + 1.0 - gt_heights = gt_rois[:, 3] - gt_rois[:, 1] + 1.0 - gt_ctr_x = gt_rois[:, 0] + 0.5 * gt_widths - gt_ctr_y = gt_rois[:, 1] + 0.5 * gt_heights - - # warnings.catch_warnings() - # warnings.filterwarnings('error') - targets_dx = (gt_ctr_x - ex_ctr_x) / ex_widths - targets_dy = (gt_ctr_y - ex_ctr_y) / ex_heights - targets_dw = np.log(gt_widths / ex_widths) - targets_dh = np.log(gt_heights / ex_heights) - - targets = np.vstack( - (targets_dx, targets_dy, targets_dw, targets_dh)).transpose() - - return targets - - -def bbox_transform_inv(boxes, deltas): - boxes = boxes.astype(deltas.dtype, copy=False) - - widths = boxes[:, 2] - boxes[:, 0] + 1.0 - heights = boxes[:, 3] - boxes[:, 1] + 1.0 - ctr_x = boxes[:, 0] + 0.5 * widths - ctr_y = boxes[:, 1] + 0.5 * heights - - dx = deltas[:, 0::4] - dy = deltas[:, 1::4] - dw = deltas[:, 2::4] - dh = deltas[:, 3::4] - - pred_ctr_x = ctr_x[:, np.newaxis] - pred_ctr_y = dy * heights[:, np.newaxis] + ctr_y[:, np.newaxis] - pred_w = widths[:, np.newaxis] - pred_h = np.exp(dh) * heights[:, np.newaxis] - - pred_boxes = np.zeros(deltas.shape, dtype=deltas.dtype) - # x1 - pred_boxes[:, 0::4] = pred_ctr_x - 0.5 * pred_w - # y1 - pred_boxes[:, 1::4] = pred_ctr_y - 0.5 * pred_h - # x2 - pred_boxes[:, 2::4] = pred_ctr_x + 0.5 * pred_w - # y2 - pred_boxes[:, 3::4] = pred_ctr_y + 0.5 * pred_h - - return pred_boxes - - -def clip_boxes(boxes, im_shape): - """ - Clip boxes to image boundaries. - """ - - # x1 >= 0 - boxes[:, 0::4] = np.maximum(np.minimum(boxes[:, 0::4], im_shape[1] - 1), 0) - # y1 >= 0 - boxes[:, 1::4] = np.maximum(np.minimum(boxes[:, 1::4], im_shape[0] - 1), 0) - # x2 < im_shape[1] - boxes[:, 2::4] = np.maximum(np.minimum(boxes[:, 2::4], im_shape[1] - 1), 0) - # y2 < im_shape[0] - boxes[:, 3::4] = np.maximum(np.minimum(boxes[:, 3::4], im_shape[0] - 1), 0) - return boxes -# -------------------------------------------------------- -# Fast R-CNN -# Copyright (c) 2015 Microsoft -# Licensed under The MIT License [see LICENSE for details] -# Written by Ross Girshick -# -------------------------------------------------------- -"""Fast R-CNN config system. -This file specifies default config options for Fast R-CNN. You should not -change values in this file. Instead, you should write a config file (in yaml) -and use cfg_from_file(yaml_file) to load it and override the default options. -Most tools in $ROOT/tools take a --cfg option to specify an override file. - - See tools/{train,test}_net.py for example code that uses cfg_from_file() - - See experiments/cfgs/*.yml for example YAML config override files -""" - -import os -import os.path as osp -from time import strftime, localtime - -import numpy as np -from easydict import EasyDict as edict - -__C = edict() -# Consumers can get config by: -# from fast_rcnn_config import cfg -cfg = __C - -# -# Training options -# - -# region proposal network (RPN) or not -__C.IS_RPN = True -__C.ANCHOR_SCALES = [16] -__C.NCLASSES = 2 - -# multiscale training and testing -__C.IS_MULTISCALE = False -__C.IS_EXTRAPOLATING = True - -__C.REGION_PROPOSAL = 'RPN' - -__C.NET_NAME = 'VGGnet' -__C.SUBCLS_NAME = 'voxel_exemplars' - -__C.TRAIN = edict() -# Adam, Momentum, RMS -__C.TRAIN.SOLVER = 'Momentum' -# learning rate -__C.TRAIN.WEIGHT_DECAY = 0.0005 -__C.TRAIN.LEARNING_RATE = 0.001 -__C.TRAIN.MOMENTUM = 0.9 -__C.TRAIN.GAMMA = 0.1 -__C.TRAIN.STEPSIZE = 50000 -__C.TRAIN.DISPLAY = 10 -__C.TRAIN.LOG_IMAGE_ITERS = 100 -__C.TRAIN.OHEM = False -__C.TRAIN.RANDOM_DOWNSAMPLE = False - -# Scales to compute real features -__C.TRAIN.SCALES_BASE = (0.25, 0.5, 1.0, 2.0, 3.0) -# __C.TRAIN.SCALES_BASE = (1.0,) - -# parameters for ROI generating -# __C.TRAIN.SPATIAL_SCALE = 0.0625 -__C.TRAIN.KERNEL_SIZE = 5 - -# Aspect ratio to use during training -# __C.TRAIN.ASPECTS = (1, 0.75, 0.5, 0.25) -__C.TRAIN.ASPECTS = (1, ) - -# Scales to use during training (can list multiple scales) -# Each scale is the pixel size of an image's shortest side -__C.TRAIN.SCALES = (600, ) - -# Max pixel size of the longest side of a scaled input image -__C.TRAIN.MAX_SIZE = 1000 - -# Images to use per minibatch -__C.TRAIN.IMS_PER_BATCH = 1 - -# Minibatch size (number of regions of interest [ROIs]) -__C.TRAIN.BATCH_SIZE = 128 - -# Fraction of minibatch that is labeled foreground (i.e. class > 0) -__C.TRAIN.FG_FRACTION = 0.25 - -# Overlap threshold for a ROI to be considered foreground (if >= FG_THRESH) -__C.TRAIN.FG_THRESH = 0.5 - -# Overlap threshold for a ROI to be considered background (class = 0 if -# overlap in [LO, HI)) -__C.TRAIN.BG_THRESH_HI = 0.5 -__C.TRAIN.BG_THRESH_LO = 0.1 - -# Use horizontally-flipped images during training? -__C.TRAIN.USE_FLIPPED = True - -# Train bounding-box regressors -__C.TRAIN.BBOX_REG = True - -# Overlap required between a ROI and ground-truth box in order for that ROI to -# be used as a bounding-box regression training example -__C.TRAIN.BBOX_THRESH = 0.5 - -# Iterations between snapshots -__C.TRAIN.SNAPSHOT_ITERS = 5000 - -# solver.prototxt specifies the snapshot path prefix, this adds an optional -# infix to yield the path: [_]_iters_XYZ.caffemodel -__C.TRAIN.SNAPSHOT_PREFIX = 'VGGnet_fast_rcnn' -__C.TRAIN.SNAPSHOT_INFIX = '' - -# Use a prefetch thread in roi_data_layer.layer -# So far I haven't found this useful; likely more engineering work is required -__C.TRAIN.USE_PREFETCH = False - -# Normalize the targets (subtract empirical mean, divide by empirical stddev) -__C.TRAIN.BBOX_NORMALIZE_TARGETS = True -# Deprecated (inside weights) -# used for assigning weights for each coords (x1, y1, w, h) -__C.TRAIN.BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) -# Normalize the targets using "precomputed" (or made up) means and stdevs -# (BBOX_NORMALIZE_TARGETS must also be True) -__C.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED = True -__C.TRAIN.BBOX_NORMALIZE_MEANS = (0.0, 0.0, 0.0, 0.0) -__C.TRAIN.BBOX_NORMALIZE_STDS = (0.1, 0.1, 0.2, 0.2) -# faster rcnn dont use pre-generated rois by selective search -# __C.TRAIN.BBOX_NORMALIZE_STDS = (1, 1, 1, 1) - -# Train using these proposals -__C.TRAIN.PROPOSAL_METHOD = 'selective_search' - -# Make minibatches from images that have similar aspect ratios (i.e. both -# tall and thin or both short and wide) in order to avoid wasting computation -# on zero-padding. -__C.TRAIN.ASPECT_GROUPING = True -# preclude rois intersected with dontcare areas above the value -__C.TRAIN.DONTCARE_AREA_INTERSECTION_HI = 0.5 -__C.TRAIN.PRECLUDE_HARD_SAMPLES = True -# Use RPN to detect objects -__C.TRAIN.HAS_RPN = True -# IOU >= thresh: positive example -__C.TRAIN.RPN_POSITIVE_OVERLAP = 0.7 -# IOU < thresh: negative example -__C.TRAIN.RPN_NEGATIVE_OVERLAP = 0.3 -# If an anchor statisfied by positive and negative conditions set to negative -__C.TRAIN.RPN_CLOBBER_POSITIVES = False -# Max number of foreground examples -__C.TRAIN.RPN_FG_FRACTION = 0.5 -# Total number of examples -__C.TRAIN.RPN_BATCHSIZE = 256 -# NMS threshold used on RPN proposals -__C.TRAIN.RPN_NMS_THRESH = 0.7 -# Number of top scoring boxes to keep before apply NMS to RPN proposals -__C.TRAIN.RPN_PRE_NMS_TOP_N = 12000 -# Number of top scoring boxes to keep after applying NMS to RPN proposals -__C.TRAIN.RPN_POST_NMS_TOP_N = 2000 -# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) -__C.TRAIN.RPN_MIN_SIZE = 8 -# Deprecated (outside weights) -__C.TRAIN.RPN_BBOX_INSIDE_WEIGHTS = (1.0, 1.0, 1.0, 1.0) -# Give the positive RPN examples weight of p * 1 / {num positives} -# and give negatives a weight of (1 - p) -# Set to -1.0 to use uniform example weighting -__C.TRAIN.RPN_POSITIVE_WEIGHT = -1.0 -# __C.TRAIN.RPN_POSITIVE_WEIGHT = 0.5 - -# -# Testing options -# - -__C.TEST = edict() - -# Scales to use during testing (can list multiple scales) -# Each scale is the pixel size of an image's shortest side -__C.TEST.SCALES = (900, ) - -# Max pixel size of the longest side of a scaled input image -__C.TEST.MAX_SIZE = 1500 - -# Overlap threshold used for non-maximum suppression (suppress boxes with -# IoU >= this threshold) -__C.TEST.NMS = 0.3 - -# Experimental: treat the (K+1) units in the cls_score layer as linear -# predictors (trained, eg, with one-vs-rest SVMs). -__C.TEST.SVM = False - -# Test using bounding-box regressors -__C.TEST.BBOX_REG = True - -# Propose boxes -__C.TEST.HAS_RPN = True - -# Test using these proposals -__C.TEST.PROPOSAL_METHOD = 'selective_search' - -# NMS threshold used on RPN proposals -__C.TEST.RPN_NMS_THRESH = 0.7 -# Number of top scoring boxes to keep before apply NMS to RPN proposals -# __C.TEST.RPN_PRE_NMS_TOP_N = 6000 -__C.TEST.RPN_PRE_NMS_TOP_N = 12000 -# Number of top scoring boxes to keep after applying NMS to RPN proposals -__C.TEST.RPN_POST_NMS_TOP_N = 1000 -# __C.TEST.RPN_POST_NMS_TOP_N = 2000 -# Proposal height and width both need to be greater than RPN_MIN_SIZE (at orig image scale) -__C.TEST.RPN_MIN_SIZE = 8 - -# -# MISC -# - -# The mapping from image coordinates to feature map coordinates might cause -# some boxes that are distinct in image space to become identical in feature -# coordinates. If DEDUP_BOXES > 0, then DEDUP_BOXES is used as the scale factor -# for identifying duplicate boxes. -# 1/16 is correct for {Alex,Caffe}Net, VGG_CNN_M_1024, and VGG16 -__C.DEDUP_BOXES = 1. / 16. - -# Pixel mean values (BGR order) as a (1, 1, 3) array -# We use the same pixel mean for all networks even though it's not exactly what -# they were trained with -__C.PIXEL_MEANS = np.array([[[102.9801, 115.9465, 122.7717]]]) - -# For reproducibility -# __C.RNG_SEED = 3 -__C.RNG_SEED = 3 - -# A small number that's used many times -__C.EPS = 1e-14 - -# Root directory of project -__C.ROOT_DIR = osp.abspath(osp.join(osp.dirname(__file__), '..', '..')) - -# Data directory -__C.DATA_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'data')) -# 将voc的文件位置移到仓库外部 -# __C.DATA_DIR = '/Users/xiaofeng/Code/Github/dataset/CHINESE_OCR/ctpn/' - -# Model directory -__C.MODELS_DIR = osp.abspath(osp.join(__C.ROOT_DIR, 'models', 'pascal_voc')) - -# Name (or path to) the matlab executable -__C.MATLAB = 'matlab' - -# Place outputs under an experiments directory -__C.EXP_DIR = 'default' -__C.LOG_DIR = 'default' - -# Use GPU implementation of non-maximum suppression -__C.USE_GPU_NMS = True - -# Default GPU device id -__C.GPU_ID = 0 - - -def get_output_dir(imdb, weights_filename): - """Return the directory where experimental artifacts are placed. - If the directory does not exist, it is created. - A canonical path is built using the name from an imdb and a network - (if not None). - """ - outdir = osp.abspath( - osp.join(__C.ROOT_DIR, 'output', __C.EXP_DIR, imdb.name)) - if weights_filename is not None: - outdir = osp.join(outdir, weights_filename) - if not os.path.exists(outdir): - os.makedirs(outdir) - return outdir - - -def get_log_dir(imdb): - """Return the directory where experimental artifacts are placed. - If the directory does not exist, it is created. - A canonical path is built using the name from an imdb and a network - (if not None). - """ - log_dir = osp.abspath( - osp.join(__C.ROOT_DIR, 'logs', __C.LOG_DIR, imdb.name, strftime("%Y-%m-%d-%H-%M-%S", localtime()))) - if not os.path.exists(log_dir): - os.makedirs(log_dir) - return log_dir - - -def _merge_a_into_b(a, b): - """Merge config dictionary a into config dictionary b, clobbering the - options in b whenever they are also specified in a. - """ - if type(a) is not edict: - return - - for k, v in a.items(): - # a must specify keys that are in b - # if not b.has_key(k): #--python2 - if k not in b: # python3 - raise KeyError('{} is not a valid config key'.format(k)) - - # the types must match, too - old_type = type(b[k]) - if old_type is not type(v): - if isinstance(b[k], np.ndarray): - v = np.array(v, dtype=b[k].dtype) - else: - raise ValueError(('Type mismatch ({} vs. {}) ' - 'for config key: {}').format( - type(b[k]), type(v), k)) - - # recursively merge dicts - if type(v) is edict: - try: - _merge_a_into_b(a[k], b[k]) - except: - print('Error under config key: {}'.format(k)) - raise - else: - b[k] = v - - -def cfg_from_file(filename): - """Load a config file and merge it into the default options.""" - import yaml - with open(filename, 'r') as f: - yaml_cfg = edict(yaml.load(f)) - - _merge_a_into_b(yaml_cfg, __C) - - -def cfg_from_list(cfg_list): - """Set config keys via list (e.g., from command line).""" - from ast import literal_eval - assert len(cfg_list) % 2 == 0 - for k, v in zip(cfg_list[0::2], cfg_list[1::2]): - key_list = k.split('.') - d = __C - for subkey in key_list[:-1]: - # assert d.has_key(subkey) - assert subkey in d - d = d[subkey] - subkey = key_list[-1] - assert d.has_key(subkey) - try: - value = literal_eval(v) - except: - # handle the case when v is a string literal - value = v - assert type(value) == type(d[subkey]), \ - 'type {} does not match original type {}'.format( - type(value), type(d[subkey])) - d[subkey] = value -from .config import cfg -from ..utils.cython_nms import nms as cython_nms - -try: - from lib.utils.gpu_nms import gpu_nms -except: - gpu_nms = cython_nms -pass - - -def nms(dets, thresh): - if dets.shape[0] == 0: - return [] - if cfg.USE_GPU_NMS: - try: - return gpu_nms(dets, thresh, device_id=cfg.GPU_ID) - except: - return cython_nms(dets, thresh) - else: - return cython_nms(dets, thresh) -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# _Author_: xiaofeng -# Date: 2018-04-08 14:31:45 -# Last Modified by: xiaofeng -# Last Modified time: 2018-04-08 14:31:45 - -from ..utils.blob import im_list_to_blob -from .config import cfg -import cv2 -import os -import sys -import numpy as np - -# sys.path.append(os.getcwd()) -parentdir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) -sys.path.insert(0, parentdir) - -# from ..utils.blob import im_list_to_blob - - -def _get_image_blob(im): - im_orig = im.astype(np.float32, copy=True) - im_orig -= cfg.PIXEL_MEANS - - im_shape = im_orig.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - - processed_ims = [] - im_scale_factors = [] - - for target_size in cfg.TEST.SCALES: - im_scale = float(target_size) / float(im_size_min) - # Prevent the biggest axis from being more than MAX_SIZE - if np.round(im_scale * im_size_max) > cfg.TEST.MAX_SIZE: - im_scale = float(cfg.TEST.MAX_SIZE) / float(im_size_max) - im = cv2.resize( - im_orig, - None, - None, - fx=im_scale, - fy=im_scale, - interpolation=cv2.INTER_LINEAR) - im_scale_factors.append(im_scale) - processed_ims.append(im) - - # Create a blob to hold the input images - blob = im_list_to_blob(processed_ims) - - return blob, np.array(im_scale_factors) - - -def _get_blobs(im, rois): - blobs = {'data': None, 'rois': None} - blobs['data'], im_scale_factors = _get_image_blob(im) - return blobs, im_scale_factors - - -def test_ctpn(sess, net, im, boxes=None): - blobs, im_scales = _get_blobs(im, boxes) - if cfg.TEST.HAS_RPN: - im_blob = blobs['data'] - blobs['im_info'] = np.array( - [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], - dtype=np.float32) - - # forward pass - if cfg.TEST.HAS_RPN: - feed_dict = { - net.data: blobs['data'], - net.im_info: blobs['im_info'], - net.keep_prob: 1.0 - } - - rois = sess.run([net.get_output('rois')[0]], feed_dict=feed_dict) - rois = rois[0] - - scores = rois[:, 0] - if cfg.TEST.HAS_RPN: - assert len(im_scales) == 1, "Only single-image batch implemented" - boxes = rois[:, 1:5] / im_scales[0] - return scores, boxes -from __future__ import print_function - -import os - -import numpy as np -import tensorflow as tf - -from ..fast_rcnn.config import cfg -from ..roi_data_layer import roidb as rdl_roidb -from ..roi_data_layer.layer import RoIDataLayer -from..utils.timer import Timer -# from lib.datasets import imdb as imdb - -_DEBUG = False - - -class SolverWrapper(object): - def __init__(self, - sess, - network, - imdb, - roidb, - output_dir, - logdir, - pretrained_model=None): - """Initialize the SolverWrapper.""" - self.net = network - self.imdb = imdb - self.roidb = roidb - self.output_dir = output_dir - self.pretrained_model = pretrained_model - - print('Computing bounding-box regression targets...') - if cfg.TRAIN.BBOX_REG: - self.bbox_means, self.bbox_stds = rdl_roidb.add_bbox_regression_targets( - roidb) - print('done') - - # For checkpoint - self.saver = tf.train.Saver( - max_to_keep=1, write_version=tf.train.SaverDef.V2) - self.writer = tf.summary.FileWriter( - logdir=logdir, graph=tf.get_default_graph(), flush_secs=5) - - def snapshot(self, sess, iter): - net = self.net - if cfg.TRAIN.BBOX_REG and 'bbox_pred' in net.layers and cfg.TRAIN.BBOX_NORMALIZE_TARGETS: - # save original values - with tf.variable_scope('bbox_pred', reuse=True): - weights = tf.get_variable("weights") - biases = tf.get_variable("biases") - - orig_0 = weights.eval() - orig_1 = biases.eval() - - # scale and shift with bbox reg unnormalization; then save snapshot - weights_shape = weights.get_shape().as_list() - sess.run( - weights.assign(orig_0 * np.tile(self.bbox_stds, - (weights_shape[0], 1)))) - sess.run(biases.assign(orig_1 * self.bbox_stds + self.bbox_means)) - - if not os.path.exists(self.output_dir): - os.makedirs(self.output_dir) - - infix = ('_' + cfg.TRAIN.SNAPSHOT_INFIX - if cfg.TRAIN.SNAPSHOT_INFIX != '' else '') - filename = (cfg.TRAIN.SNAPSHOT_PREFIX + infix + - '_iter_{:d}'.format(iter + 1) + '.ckpt') - filename = os.path.join(self.output_dir, filename) - # save - self.saver.save(sess, filename) - print('Wrote snapshot to: {:s}'.format(filename)) - - if cfg.TRAIN.BBOX_REG and 'bbox_pred' in net.layers: - # restore net to original state - sess.run(weights.assign(orig_0)) - sess.run(biases.assign(orig_1)) - - def build_image_summary(self): - # A simple graph for write image summary - - log_image_data = tf.placeholder(tf.uint8, [None, None, 3]) - log_image_name = tf.placeholder(tf.string) - # import tensorflow.python.ops.gen_logging_ops as logging_ops - from tensorflow.python.ops import gen_logging_ops - from tensorflow.python.framework import ops as _ops - log_image = gen_logging_ops.image_summary( - log_image_name, tf.expand_dims(log_image_data, 0), max_images=1) - _ops.add_to_collection(_ops.GraphKeys.SUMMARIES, log_image) - # log_image = tf.summary.image(log_image_name, tf.expand_dims(log_image_data, 0), max_outputs=1) - return log_image, log_image_data, log_image_name - - def train_model(self, sess, max_iters, restore=False): - """Network training loop.""" - data_layer = get_data_layer(self.roidb, self.imdb.num_classes) - total_loss, model_loss, rpn_cross_entropy, rpn_loss_box = self.net.build_loss( - ohem=cfg.TRAIN.OHEM) - # scalar summary - tf.summary.scalar('rpn_reg_loss', rpn_loss_box) - tf.summary.scalar('rpn_cls_loss', rpn_cross_entropy) - tf.summary.scalar('model_loss', model_loss) - tf.summary.scalar('total_loss', total_loss) - summary_op = tf.summary.merge_all() - - log_image, log_image_data, log_image_name = \ - self.build_image_summary() - - # optimizer - lr = tf.Variable(cfg.TRAIN.LEARNING_RATE, trainable=False) - if cfg.TRAIN.SOLVER == 'Adam': - opt = tf.train.AdamOptimizer(cfg.TRAIN.LEARNING_RATE) - elif cfg.TRAIN.SOLVER == 'RMS': - opt = tf.train.RMSPropOptimizer(cfg.TRAIN.LEARNING_RATE) - else: - # lr = tf.Variable(0.0, trainable=False) - momentum = cfg.TRAIN.MOMENTUM - opt = tf.train.MomentumOptimizer(lr, momentum) - - global_step = tf.Variable(0, trainable=False) - with_clip = True - if with_clip: - tvars = tf.trainable_variables() - grads, norm = tf.clip_by_global_norm( - tf.gradients(total_loss, tvars), 10.0) - train_op = opt.apply_gradients( - list(zip(grads, tvars)), global_step=global_step) - else: - train_op = opt.minimize(total_loss, global_step=global_step) - - # intialize variables - sess.run(tf.global_variables_initializer()) - restore_iter = 0 - - # load vgg16 - if self.pretrained_model is not None and not restore: - try: - print(('Loading pretrained model ' - 'weights from {:s}').format(self.pretrained_model)) - self.net.load(self.pretrained_model, sess, True) - except: - raise 'Check your pretrained model {:s}'.format( - self.pretrained_model) - - # resuming a trainer - if restore: - # try: - print('output_dir:', self.output_dir) - # 加载ckpt文件路径,而非指向checkpoint - ckpt = tf.train.get_checkpoint_state( - self.output_dir + '/') - print( - 'Restoring from {}...'.format(ckpt.model_checkpoint_path), - end=' ') - self.saver.restore(sess, ckpt.model_checkpoint_path) - stem = os.path.splitext( - os.path.basename(ckpt.model_checkpoint_path))[0] - restore_iter = int(stem.split('_')[-1]) - sess.run(global_step.assign(restore_iter)) - print('done') - # except: - - # raise 'Check your pretrained {:s}'.format(ckpt.model_checkpoint_path) - - last_snapshot_iter = -1 - timer = Timer() - print(restore_iter, max_iters) - for iter in range(restore_iter, max_iters): - timer.tic() - # learning rate - print(iter) - if iter != 0 and iter % cfg.TRAIN.STEPSIZE == 0: - sess.run(tf.assign(lr, lr.eval() * cfg.TRAIN.GAMMA)) - print(lr) - - # get one batch - blobs = data_layer.forward() - - feed_dict = { - self.net.data: blobs['data'], - self.net.im_info: blobs['im_info'], - self.net.keep_prob: 0.5, - self.net.gt_boxes: blobs['gt_boxes'], - self.net.gt_ishard: blobs['gt_ishard'], - self.net.dontcare_areas: blobs['dontcare_areas'] - } - res_fetches = [] - fetch_list = [ - total_loss, model_loss, rpn_cross_entropy, rpn_loss_box, - summary_op, train_op - ] + res_fetches - - total_loss_val, model_loss_val, rpn_loss_cls_val, rpn_loss_box_val, \ - summary_str, _ = sess.run( - fetches=fetch_list, feed_dict=feed_dict) - - self.writer.add_summary( - summary=summary_str, global_step=global_step.eval()) - - _diff_time = timer.toc(average=False) - - if (iter) % (cfg.TRAIN.DISPLAY) == 0: - print( - 'iter: %d / %d, total loss: %.4f, model loss: %.4f, rpn_loss_cls: %.4f, rpn_loss_box: %.4f, lr: %f' % - (iter, max_iters, total_loss_val, model_loss_val, rpn_loss_cls_val, rpn_loss_box_val, lr.eval())) - print('speed: {:.3f}s / iter'.format(_diff_time)) - - if (iter + 1) % cfg.TRAIN.SNAPSHOT_ITERS == 0: - last_snapshot_iter = iter - self.snapshot(sess, iter) - - if last_snapshot_iter != iter: - self.snapshot(sess, iter) - - -def get_training_roidb(imdb): - """Returns a roidb (Region of Interest database) for use in training.""" - if cfg.TRAIN.USE_FLIPPED: - print('Appending horizontally-flipped training examples...') - imdb.append_flipped_images() - print('done') - - print('Preparing training data...') - if cfg.TRAIN.HAS_RPN: - rdl_roidb.prepare_roidb(imdb) - else: - rdl_roidb.prepare_roidb(imdb) - print('done') - - return imdb.roidb - - -def get_data_layer(roidb, num_classes): - """return a data layer.""" - if cfg.TRAIN.HAS_RPN: - if cfg.IS_MULTISCALE: - # obsolete - # layer = GtDataLayer(roidb) - raise "Calling caffe modules..." - else: - layer = RoIDataLayer(roidb, num_classes) - else: - layer = RoIDataLayer(roidb, num_classes) - - return layer - - -def train_net(network, - imdb, - roidb, - output_dir, - log_dir, - pretrained_model=None, - max_iters=40000, - restore=False): - """Train a Fast R-CNN network.""" - - config = tf.ConfigProto(allow_soft_placement=True) - config.gpu_options.allocator_type = 'BFC' - config.gpu_options.per_process_gpu_memory_fraction = 0.75 - with tf.Session(config=config) as sess: - sw = SolverWrapper( - sess, - network, - imdb, - roidb, - output_dir, - logdir=log_dir, - pretrained_model=pretrained_model) - print('Solving...') - sw.train_model(sess, max_iters, restore=restore) - print('done solving') -#!/usr/bin/env python -# -*- coding: utf-8 -*- -# _Author_: xiaofeng -# Date: 2018-04-22 21:45:13 -# Last Modified by: xiaofeng -# Last Modified time: 2018-04-22 21:45:13 - -import tensorflow as tf -import numpy as np -from .network import Network -from ..fast_rcnn.config import cfg - - -class VGGnet_test(Network): - def __init__(self, trainable=True): - self.inputs = [] - self.data = tf.placeholder(tf.float32, shape=[None, None, None, 3]) - # a list of [image_height, image_width, scale_ratios] - self.im_info = tf.placeholder(tf.float32, shape=[None, 3]) - self.keep_prob = tf.placeholder(tf.float32) - self.layers = dict({'data': self.data, 'im_info': self.im_info}) - self.trainable = trainable - self.setup() - - def setup(self): - anchor_scales = cfg.ANCHOR_SCALES - _feat_stride = [16, ] - - (self.feed('data').conv(3, 3, 64, 1, 1, name='conv1_1') - .conv(3, 3, 64, 1, 1, name='conv1_2') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') - .conv(3, 3, 128, 1, 1, name='conv2_1') - .conv(3, 3, 128, 1, 1, name='conv2_2') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') - .conv(3, 3, 256, 1, 1, name='conv3_1') - .conv(3, 3, 256, 1, 1, name='conv3_2') - .conv(3, 3, 256, 1, 1, name='conv3_3') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') - .conv(3, 3, 512, 1, 1, name='conv4_1') - .conv(3, 3, 512, 1, 1, name='conv4_2') - .conv(3, 3, 512, 1, 1, name='conv4_3') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') - .conv(3, 3, 512, 1, 1, name='conv5_1') - .conv(3, 3, 512, 1, 1, name='conv5_2') - .conv(3, 3, 512, 1, 1, name='conv5_3')) - - # 卷积3x3x512--步长1x1 - # 使用vgg最后一层的feature map进行rpn区域提议 - (self.feed('conv5_3').conv(3, 3, 512, 1, 1, name='rpn_conv/3x3')) - # rpn的输出为512个通道 - # 双向lstm 包含128个节点 - (self.feed('rpn_conv/3x3').Bilstm(512, 128, 512, name='lstm_o')) - - # lstm全连接 - (self.feed('lstm_o').lstm_fc( - 512, len(anchor_scales) * 10 * 4, name='rpn_bbox_pred')) - (self.feed('lstm_o').lstm_fc( - 512, len(anchor_scales) * 10 * 2, name='rpn_cls_score')) - - # shape is (1, H, W, Ax2) -> (1, H, WxA, 2) - (self.feed('rpn_cls_score').spatial_reshape_layer( - 2, name='rpn_cls_score_reshape') - .spatial_softmax(name='rpn_cls_prob')) - - # shape is (1, H, WxA, 2) -> (1, H, W, Ax2) - (self.feed('rpn_cls_prob').spatial_reshape_layer( - len(anchor_scales) * 10 * 2, name='rpn_cls_prob_reshape')) - - (self.feed('rpn_cls_prob_reshape', 'rpn_bbox_pred', 'im_info') - .proposal_layer(_feat_stride, anchor_scales, 'TEST', name='rois')) -# -*- coding:utf-8 -*- -import tensorflow as tf - -from .network import Network -from ..fast_rcnn.config import cfg - - -class VGGnet_train(Network): - def __init__(self, trainable=True): - self.inputs = [] - self.data = tf.placeholder( - tf.float32, shape=[None, None, None, 3], name='data') - self.im_info = tf.placeholder( - tf.float32, shape=[None, 3], name='im_info') - self.gt_boxes = tf.placeholder( - tf.float32, shape=[None, 5], name='gt_boxes') - self.gt_ishard = tf.placeholder( - tf.int32, shape=[None], name='gt_ishard') - self.dontcare_areas = tf.placeholder( - tf.float32, shape=[None, 4], name='dontcare_areas') - self.keep_prob = tf.placeholder(tf.float32) - self.layers = dict({'data': self.data, 'im_info': self.im_info, 'gt_boxes': self.gt_boxes, - 'gt_ishard': self.gt_ishard, 'dontcare_areas': self.dontcare_areas}) - self.trainable = trainable - self.setup() - - def setup(self): - n_classes = cfg.NCLASSES - anchor_scales = cfg.ANCHOR_SCALES - _feat_stride = [16, ] - # net frame - (self.feed('data') - .conv(3, 3, 64, 1, 1, name='conv1_1') - .conv(3, 3, 64, 1, 1, name='conv1_2') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool1') - .conv(3, 3, 128, 1, 1, name='conv2_1') - .conv(3, 3, 128, 1, 1, name='conv2_2') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool2') - .conv(3, 3, 256, 1, 1, name='conv3_1') - .conv(3, 3, 256, 1, 1, name='conv3_2') - .conv(3, 3, 256, 1, 1, name='conv3_3') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool3') - .conv(3, 3, 512, 1, 1, name='conv4_1') - .conv(3, 3, 512, 1, 1, name='conv4_2') - .conv(3, 3, 512, 1, 1, name='conv4_3') - .max_pool(2, 2, 2, 2, padding='VALID', name='pool4') - .conv(3, 3, 512, 1, 1, name='conv5_1') - .conv(3, 3, 512, 1, 1, name='conv5_2') - .conv(3, 3, 512, 1, 1, name='conv5_3')) - # ========= RPN ============ - (self.feed('conv5_3') - .conv(3, 3, 512, 1, 1, name='rpn_conv/3x3')) - - (self.feed('rpn_conv/3x3').Bilstm(512, 128, 512, name='lstm_o')) - (self.feed('lstm_o').lstm_fc( - 512, len(anchor_scales) * 10 * 4, name='rpn_bbox_pred')) - (self.feed('lstm_o').lstm_fc( - 512, len(anchor_scales) * 10 * 2, name='rpn_cls_score')) - - # generating training labels on the fly - # output: rpn_labels(HxWxA, 2) rpn_bbox_targets(HxWxA, 4) rpn_bbox_inside_weights rpn_bbox_outside_weights - # 给每个anchor上标签,并计算真值(也是delta的形式),以及内部权重和外部权重 - (self.feed('rpn_cls_score', 'gt_boxes', 'gt_ishard', 'dontcare_areas', 'im_info') - .anchor_target_layer(_feat_stride, anchor_scales, name='rpn-data')) - - # shape is (1, H, W, Ax2) -> (1, H, WxA, 2) - # 给之前得到的score进行softmax,得到0-1之间的得分 - (self.feed('rpn_cls_score') - .spatial_reshape_layer(2, name='rpn_cls_score_reshape') - .spatial_softmax(name='rpn_cls_prob')) -from . import factory -from .VGGnet_test import VGGnet_test -from .VGGnet_train import VGGnet_train -from .VGGnet_test import VGGnet_test -from .VGGnet_train import VGGnet_train - - -def get_network(name): - """Get a network by name.""" - if name.split('_')[0] == 'VGGnet': - if name.split('_')[1] == 'test': - return VGGnet_test() - elif name.split('_')[1] == 'train': - return VGGnet_train() - else: - raise KeyError('Unknown dataset: {}'.format(name)) - else: - raise KeyError('Unknown dataset: {}'.format(name)) -# -*- coding:utf-8 -*- -import numpy as np -import tensorflow as tf - -from ..fast_rcnn.config import cfg -from ..rpn_msr.anchor_target_layer_tf import anchor_target_layer as anchor_target_layer_py -from ..rpn_msr.proposal_layer_tf import proposal_layer as proposal_layer_py - -DEFAULT_PADDING = 'SAME' - - -def layer(op): - def layer_decorated(self, *args, **kwargs): - # Automatically set a name if not provided. - name = kwargs.setdefault('name', self.get_unique_name(op.__name__)) - # Figure out the layer inputs. - if len(self.inputs) == 0: - raise RuntimeError('No input variables found for layer %s.' % name) - elif len(self.inputs) == 1: - layer_input = self.inputs[0] - else: - layer_input = list(self.inputs) - # Perform the operation and get the output. - layer_output = op(self, layer_input, *args, **kwargs) - # Add to layer LUT. - self.layers[name] = layer_output - # This output is now the input for the next layer. - self.feed(layer_output) - # Return self for chained calls. - return self - - return layer_decorated - - -class Network(object): - def __init__(self, inputs, trainable=True): - self.inputs = [] - self.layers = dict(inputs) - self.trainable = trainable - self.setup() - - def setup(self): - raise NotImplementedError('Must be subclassed.') - - def load(self, data_path, session, ignore_missing=False): - data_dict = np.load(data_path, encoding='latin1').item() - for key in data_dict: - with tf.variable_scope(key, reuse=True): - for subkey in data_dict[key]: - try: - var = tf.get_variable(subkey) - session.run(var.assign(data_dict[key][subkey])) - print("assign pretrain model " + subkey + " to " + key) - except ValueError: - print("ignore " + key) - if not ignore_missing: - raise - - def feed(self, *args): - assert len(args) != 0 - self.inputs = [] - for layer in args: - if isinstance(layer, str): - try: - layer = self.layers[layer] - print(layer) - except KeyError: - print(list(self.layers.keys())) - raise KeyError('Unknown layer name fed: %s' % layer) - self.inputs.append(layer) - return self - - def get_output(self, layer): - try: - layer = self.layers[layer] - except KeyError: - print(list(self.layers.keys())) - raise KeyError('Unknown layer name fed: %s' % layer) - return layer - - def get_unique_name(self, prefix): - id = sum(t.startswith(prefix) - for t, _ in list(self.layers.items())) + 1 - return '%s_%d' % (prefix, id) - - def make_var(self, name, shape, initializer=None, trainable=True, regularizer=None): - return tf.get_variable(name, shape, initializer=initializer, trainable=trainable, regularizer=regularizer) - - def validate_padding(self, padding): - assert padding in ('SAME', 'VALID') - - @layer - def Bilstm(self, input, d_i, d_h, d_o, name, trainable=True): - img = input - with tf.variable_scope(name) as scope: - shape = tf.shape(img) - N, H, W, C = shape[0], shape[1], shape[2], shape[3] - img = tf.reshape(img, [N * H, W, C]) - img.set_shape([None, None, d_i]) - # 单层双向动态RNN - lstm_fw_cell = tf.contrib.rnn.LSTMCell(d_h, state_is_tuple=True) - lstm_bw_cell = tf.contrib.rnn.LSTMCell(d_h, state_is_tuple=True) - - lstm_out, last_state = tf.nn.bidirectional_dynamic_rnn( - lstm_fw_cell, lstm_bw_cell, img, dtype=tf.float32) - lstm_out = tf.concat(lstm_out, axis=-1) - - lstm_out = tf.reshape(lstm_out, [N * H * W, 2 * d_h]) - - init_weights = tf.truncated_normal_initializer(stddev=0.1) - init_biases = tf.constant_initializer(0.0) - weights = self.make_var('weights', [2 * d_h, d_o], init_weights, trainable, - regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) - biases = self.make_var('biases', [d_o], init_biases, trainable) - outputs = tf.matmul(lstm_out, weights) + biases - - outputs = tf.reshape(outputs, [N, H, W, d_o]) - return outputs - - @layer - def lstm(self, input, d_i, d_h, d_o, name, trainable=True): - img = input - with tf.variable_scope(name) as scope: - shape = tf.shape(img) - N, H, W, C = shape[0], shape[1], shape[2], shape[3] - img = tf.reshape(img, [N * H, W, C]) - img.set_shape([None, None, d_i]) - - lstm_cell = tf.contrib.rnn.LSTMCell(d_h, state_is_tuple=True) - initial_state = lstm_cell.zero_state(N * H, dtype=tf.float32) - - lstm_out, last_state = tf.nn.dynamic_rnn(lstm_cell, img, - initial_state=initial_state, dtype=tf.float32) - - lstm_out = tf.reshape(lstm_out, [N * H * W, d_h]) - - init_weights = tf.truncated_normal_initializer(stddev=0.1) - init_biases = tf.constant_initializer(0.0) - weights = self.make_var('weights', [d_h, d_o], init_weights, trainable, - regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) - biases = self.make_var('biases', [d_o], init_biases, trainable) - outputs = tf.matmul(lstm_out, weights) + biases - - outputs = tf.reshape(outputs, [N, H, W, d_o]) - return outputs - - @layer - def lstm_fc(self, input, d_i, d_o, name, trainable=True): - with tf.variable_scope(name) as scope: - shape = tf.shape(input) - N, H, W, C = shape[0], shape[1], shape[2], shape[3] - input = tf.reshape(input, [N * H * W, C]) - - init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) - init_biases = tf.constant_initializer(0.0) - kernel = self.make_var('weights', [d_i, d_o], init_weights, trainable, - regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) - biases = self.make_var('biases', [d_o], init_biases, trainable) - - _O = tf.matmul(input, kernel) + biases - return tf.reshape(_O, [N, H, W, int(d_o)]) - - @layer - def conv(self, input, k_h, k_w, c_o, s_h, s_w, name, biased=True, relu=True, padding=DEFAULT_PADDING, - trainable=True): - """ contribution by miraclebiu, and biased option""" - self.validate_padding(padding) - c_i = input.get_shape()[-1] - - def convolve(i, k): return tf.nn.conv2d( - i, k, [1, s_h, s_w, 1], padding=padding) - with tf.variable_scope(name) as scope: - - init_weights = tf.truncated_normal_initializer(0.0, stddev=0.01) - init_biases = tf.constant_initializer(0.0) - kernel = self.make_var('weights', [k_h, k_w, c_i, c_o], init_weights, trainable, - regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) - if biased: - biases = self.make_var('biases', [c_o], init_biases, trainable) - conv = convolve(input, kernel) - if relu: - bias = tf.nn.bias_add(conv, biases) - return tf.nn.relu(bias, name=scope.name) - return tf.nn.bias_add(conv, biases, name=scope.name) - else: - conv = convolve(input, kernel) - if relu: - return tf.nn.relu(conv, name=scope.name) - return conv - - @layer - def relu(self, input, name): - return tf.nn.relu(input, name=name) - - @layer - def max_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): - self.validate_padding(padding) - return tf.nn.max_pool(input, - ksize=[1, k_h, k_w, 1], - strides=[1, s_h, s_w, 1], - padding=padding, - name=name) - - @layer - def avg_pool(self, input, k_h, k_w, s_h, s_w, name, padding=DEFAULT_PADDING): - self.validate_padding(padding) - return tf.nn.avg_pool(input, - ksize=[1, k_h, k_w, 1], - strides=[1, s_h, s_w, 1], - padding=padding, - name=name) - - @layer - def proposal_layer(self, input, _feat_stride, anchor_scales, cfg_key, name): - if isinstance(input[0], tuple): - input[0] = input[0][0] - # input[0] shape is (1, H, W, Ax2) - # rpn_rois <- (1 x H x W x A, 5) [0, x1, y1, x2, y2] - with tf.variable_scope(name) as scope: - blob, bbox_delta = tf.py_func(proposal_layer_py, - [input[0], input[1], input[2], cfg_key, - _feat_stride, anchor_scales], - [tf.float32, tf.float32]) - - rpn_rois = tf.convert_to_tensor(tf.reshape( - blob, [-1, 5]), name='rpn_rois') # shape is (1 x H x W x A, 2) - rpn_targets = tf.convert_to_tensor( - bbox_delta, name='rpn_targets') # shape is (1 x H x W x A, 4) - self.layers['rpn_rois'] = rpn_rois - self.layers['rpn_targets'] = rpn_targets - - return rpn_rois, rpn_targets - - @layer - def anchor_target_layer(self, input, _feat_stride, anchor_scales, name): - if isinstance(input[0], tuple): - input[0] = input[0][0] - - with tf.variable_scope(name) as scope: - # 'rpn_cls_score', 'gt_boxes', 'gt_ishard', 'dontcare_areas', 'im_info' - rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights = \ - tf.py_func(anchor_target_layer_py, - [input[0], input[1], input[2], input[3], - input[4], _feat_stride, anchor_scales], - [tf.float32, tf.float32, tf.float32, tf.float32]) - - rpn_labels = tf.convert_to_tensor(tf.cast(rpn_labels, tf.int32), - name='rpn_labels') # shape is (1 x H x W x A, 2) - rpn_bbox_targets = tf.convert_to_tensor(rpn_bbox_targets, - name='rpn_bbox_targets') # shape is (1 x H x W x A, 4) - rpn_bbox_inside_weights = tf.convert_to_tensor(rpn_bbox_inside_weights, - name='rpn_bbox_inside_weights') # shape is (1 x H x W x A, 4) - rpn_bbox_outside_weights = tf.convert_to_tensor(rpn_bbox_outside_weights, - name='rpn_bbox_outside_weights') # shape is (1 x H x W x A, 4) - - return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights - - @layer - def reshape_layer(self, input, d, name): - input_shape = tf.shape(input) - if name == 'rpn_cls_prob_reshape': - # - # transpose: (1, AxH, W, 2) -> (1, 2, AxH, W) - # reshape: (1, 2xA, H, W) - # transpose: -> (1, H, W, 2xA) - return tf.transpose(tf.reshape(tf.transpose(input, [0, 3, 1, 2]), - [input_shape[0], - int(d), - tf.cast( - tf.cast(input_shape[1], tf.float32) / tf.cast(d, tf.float32) * tf.cast( - input_shape[3], tf.float32), tf.int32), - input_shape[2] - ]), - [0, 2, 3, 1], name=name) - else: - return tf.transpose(tf.reshape(tf.transpose(input, [0, 3, 1, 2]), - [input_shape[0], - int(d), - tf.cast(tf.cast(input_shape[1], tf.float32) * ( - tf.cast(input_shape[3], tf.float32) / tf.cast(d, tf.float32)), - tf.int32), - input_shape[2] - ]), - [0, 2, 3, 1], name=name) - - @layer - def spatial_reshape_layer(self, input, d, name): - input_shape = tf.shape(input) - # transpose: (1, H, W, A x d) -> (1, H, WxA, d) - return tf.reshape(input, - [input_shape[0], - input_shape[1], - -1, - int(d)]) - - @layer - def lrn(self, input, radius, alpha, beta, name, bias=1.0): - return tf.nn.local_response_normalization(input, - depth_radius=radius, - alpha=alpha, - beta=beta, - bias=bias, - name=name) - - @layer - def concat(self, inputs, axis, name): - return tf.concat(concat_dim=axis, values=inputs, name=name) - - @layer - def fc(self, input, num_out, name, relu=True, trainable=True): - with tf.variable_scope(name) as scope: - # only use the first input - if isinstance(input, tuple): - input = input[0] - - input_shape = input.get_shape() - if input_shape.ndims == 4: - dim = 1 - for d in input_shape[1:].as_list(): - dim *= d - feed_in = tf.reshape(tf.transpose( - input, [0, 3, 1, 2]), [-1, dim]) - else: - feed_in, dim = (input, int(input_shape[-1])) - - if name == 'bbox_pred': - init_weights = tf.truncated_normal_initializer( - 0.0, stddev=0.001) - init_biases = tf.constant_initializer(0.0) - else: - init_weights = tf.truncated_normal_initializer( - 0.0, stddev=0.01) - init_biases = tf.constant_initializer(0.0) - - weights = self.make_var('weights', [dim, num_out], init_weights, trainable, - regularizer=self.l2_regularizer(cfg.TRAIN.WEIGHT_DECAY)) - biases = self.make_var('biases', [num_out], init_biases, trainable) - - op = tf.nn.relu_layer if relu else tf.nn.xw_plus_b - fc = op(feed_in, weights, biases, name=scope.name) - return fc - - @layer - def softmax(self, input, name): - input_shape = tf.shape(input) - if name == 'rpn_cls_prob': - return tf.reshape(tf.nn.softmax(tf.reshape(input, [-1, input_shape[3]])), - [-1, input_shape[1], input_shape[2], input_shape[3]], name=name) - else: - return tf.nn.softmax(input, name=name) - - @layer - def spatial_softmax(self, input, name): - input_shape = tf.shape(input) - # d = input.get_shape()[-1] - return tf.reshape(tf.nn.softmax(tf.reshape(input, [-1, input_shape[3]])), - [-1, input_shape[1], input_shape[2], input_shape[3]], name=name) - - @layer - def add(self, input, name): - """contribution by miraclebiu""" - return tf.add(input[0], input[1]) - - @layer - def batch_normalization(self, input, name, relu=True, is_training=False): - """contribution by miraclebiu""" - if relu: - temp_layer = tf.contrib.layers.batch_norm(input, scale=True, center=True, is_training=is_training, - scope=name) - return tf.nn.relu(temp_layer) - else: - return tf.contrib.layers.batch_norm(input, scale=True, center=True, is_training=is_training, scope=name) - - @layer - def dropout(self, input, keep_prob, name): - return tf.nn.dropout(input, keep_prob, name=name) - - def l2_regularizer(self, weight_decay=0.0005, scope=None): - def regularizer(tensor): - with tf.name_scope(scope, default_name='l2_regularizer', values=[tensor]): - l2_weight = tf.convert_to_tensor(weight_decay, - dtype=tensor.dtype.base_dtype, - name='weight_decay') - # return tf.mul(l2_weight, tf.nn.l2_loss(tensor), name='value') - return tf.multiply(l2_weight, tf.nn.l2_loss(tensor), name='value') - - return regularizer - - def smooth_l1_dist(self, deltas, sigma2=9.0, name='smooth_l1_dist'): - with tf.name_scope(name=name) as scope: - deltas_abs = tf.abs(deltas) - smoothL1_sign = tf.cast( - tf.less(deltas_abs, 1.0 / sigma2), tf.float32) - return tf.square(deltas) * 0.5 * sigma2 * smoothL1_sign + \ - (deltas_abs - 0.5 / sigma2) * tf.abs(smoothL1_sign - 1) - - def build_loss(self, ohem=False): - # classification loss - rpn_cls_score = tf.reshape(self.get_output( - 'rpn_cls_score_reshape'), [-1, 2]) # shape (HxWxA, 2) - rpn_label = tf.reshape(self.get_output( - 'rpn-data')[0], [-1]) # shape (HxWxA) - # ignore_label(-1) - fg_keep = tf.equal(rpn_label, 1) - rpn_keep = tf.where(tf.not_equal(rpn_label, -1)) - rpn_cls_score = tf.gather(rpn_cls_score, rpn_keep) # shape (N, 2) - rpn_label = tf.gather(rpn_label, rpn_keep) - rpn_cross_entropy_n = tf.nn.sparse_softmax_cross_entropy_with_logits( - labels=rpn_label, logits=rpn_cls_score) - - # box loss - rpn_bbox_pred = self.get_output( - 'rpn_bbox_pred') # shape (1, H, W, Ax4) - rpn_bbox_targets = self.get_output('rpn-data')[1] - rpn_bbox_inside_weights = self.get_output('rpn-data')[2] - rpn_bbox_outside_weights = self.get_output('rpn-data')[3] - rpn_bbox_pred = tf.gather(tf.reshape( - rpn_bbox_pred, [-1, 4]), rpn_keep) # shape (N, 4) - rpn_bbox_targets = tf.gather(tf.reshape( - rpn_bbox_targets, [-1, 4]), rpn_keep) - rpn_bbox_inside_weights = tf.gather(tf.reshape( - rpn_bbox_inside_weights, [-1, 4]), rpn_keep) - rpn_bbox_outside_weights = tf.gather(tf.reshape( - rpn_bbox_outside_weights, [-1, 4]), rpn_keep) - - rpn_loss_box_n = tf.reduce_sum(rpn_bbox_outside_weights * self.smooth_l1_dist( - rpn_bbox_inside_weights * (rpn_bbox_pred - rpn_bbox_targets)), reduction_indices=[1]) - - rpn_loss_box = tf.reduce_sum( - rpn_loss_box_n) / (tf.reduce_sum(tf.cast(fg_keep, tf.float32)) + 1) - rpn_cross_entropy = tf.reduce_mean(rpn_cross_entropy_n) - - model_loss = rpn_cross_entropy + rpn_loss_box - - regularization_losses = tf.get_collection( - tf.GraphKeys.REGULARIZATION_LOSSES) - total_loss = tf.add_n(regularization_losses) + model_loss - - return total_loss, model_loss, rpn_cross_entropy, rpn_loss_box -from . import roidb -# -------------------------------------------------------- -# Fast R-CNN -# Copyright (c) 2015 Microsoft -# Licensed under The MIT License [see LICENSE for details] -# Written by Ross Girshick -# -------------------------------------------------------- - -"""The data layer used during training to train a Fast R-CNN network. - -RoIDataLayer implements a Caffe Python layer. -""" - -import numpy as np - -# TODO: make fast_rcnn irrelevant -# >>>> obsolete, because it depends on sth outside of this project -from ..fast_rcnn.config import cfg -# <<<< obsolete -from ..roi_data_layer.minibatch import get_minibatch - - -class RoIDataLayer(object): - """Fast R-CNN data layer used for training.""" - - def __init__(self, roidb, num_classes): - """Set the roidb to be used by this layer during training.""" - self._roidb = roidb - self._num_classes = num_classes - self._shuffle_roidb_inds() - - def _shuffle_roidb_inds(self): - """Randomly permute the training roidb.""" - self._perm = np.random.permutation(np.arange(len(self._roidb))) - self._cur = 0 - - def _get_next_minibatch_inds(self): - """Return the roidb indices for the next minibatch.""" - - if cfg.TRAIN.HAS_RPN: - if self._cur + cfg.TRAIN.IMS_PER_BATCH >= len(self._roidb): - self._shuffle_roidb_inds() - - db_inds = self._perm[self._cur:self._cur + cfg.TRAIN.IMS_PER_BATCH] - self._cur += cfg.TRAIN.IMS_PER_BATCH - else: - # sample images - db_inds = np.zeros((cfg.TRAIN.IMS_PER_BATCH), dtype=np.int32) - i = 0 - while (i < cfg.TRAIN.IMS_PER_BATCH): - ind = self._perm[self._cur] - num_objs = self._roidb[ind]['boxes'].shape[0] - if num_objs != 0: - db_inds[i] = ind - i += 1 - - self._cur += 1 - if self._cur >= len(self._roidb): - self._shuffle_roidb_inds() - - return db_inds - - def _get_next_minibatch(self): - """Return the blobs to be used for the next minibatch. - - If cfg.TRAIN.USE_PREFETCH is True, then blobs will be computed in a - separate process and made available through self._blob_queue. - """ - db_inds = self._get_next_minibatch_inds() - minibatch_db = [self._roidb[i] for i in db_inds] - return get_minibatch(minibatch_db, self._num_classes) - - def forward(self): - """Get blobs and copy them into this layer's top blob vector.""" - blobs = self._get_next_minibatch() - return blobs -import os - -import cv2 -import numpy as np -import numpy.random as npr - -from ..fast_rcnn.config import cfg -from ..utils.blob import prep_im_for_blob, im_list_to_blob - - -def get_minibatch(roidb, num_classes): - """Given a roidb, construct a minibatch sampled from it.""" - num_images = len(roidb) - # Sample random scales to use for each image in this batch - random_scale_inds = npr.randint( - 0, high=len(cfg.TRAIN.SCALES), size=num_images) - assert (cfg.TRAIN.BATCH_SIZE % num_images == 0), \ - 'num_images ({}) must divide BATCH_SIZE ({})'. \ - format(num_images, cfg.TRAIN.BATCH_SIZE) - rois_per_image = cfg.TRAIN.BATCH_SIZE / num_images - fg_rois_per_image = np.round(cfg.TRAIN.FG_FRACTION * rois_per_image) - - # Get the input image blob, formatted for caffe - im_blob, im_scales = _get_image_blob(roidb, random_scale_inds) - - blobs = {'data': im_blob} - - if cfg.TRAIN.HAS_RPN: - assert len(im_scales) == 1, "Single batch only" - assert len(roidb) == 1, "Single batch only" - # gt boxes: (x1, y1, x2, y2, cls) - gt_inds = np.where(roidb[0]['gt_classes'] != 0)[0] - gt_boxes = np.empty((len(gt_inds), 5), dtype=np.float32) - gt_boxes[:, 0:4] = roidb[0]['boxes'][gt_inds, :] * im_scales[0] - gt_boxes[:, 4] = roidb[0]['gt_classes'][gt_inds] - blobs['gt_boxes'] = gt_boxes - blobs['gt_ishard'] = roidb[0]['gt_ishard'][gt_inds] \ - if 'gt_ishard' in roidb[0] else np.zeros(gt_inds.size, dtype=int) - # blobs['gt_ishard'] = roidb[0]['gt_ishard'][gt_inds] - blobs['dontcare_areas'] = roidb[0]['dontcare_areas'] * im_scales[0] \ - if 'dontcare_areas' in roidb[0] else np.zeros([0, 4], dtype=float) - blobs['im_info'] = np.array( - [[im_blob.shape[1], im_blob.shape[2], im_scales[0]]], - dtype=np.float32) - blobs['im_name'] = os.path.basename(roidb[0]['image']) - - else: # not using RPN - # Now, build the region of interest and label blobs - rois_blob = np.zeros((0, 5), dtype=np.float32) - labels_blob = np.zeros((0), dtype=np.float32) - bbox_targets_blob = np.zeros((0, 4 * num_classes), dtype=np.float32) - bbox_inside_blob = np.zeros(bbox_targets_blob.shape, dtype=np.float32) - # all_overlaps = [] - for im_i in range(num_images): - labels, overlaps, im_rois, bbox_targets, bbox_inside_weights \ - = _sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, - num_classes) - - # Add to RoIs blob - rois = _project_im_rois(im_rois, im_scales[im_i]) - batch_ind = im_i * np.ones((rois.shape[0], 1)) - rois_blob_this_image = np.hstack((batch_ind, rois)) - rois_blob = np.vstack((rois_blob, rois_blob_this_image)) - - # Add to labels, bbox targets, and bbox loss blobs - labels_blob = np.hstack((labels_blob, labels)) - bbox_targets_blob = np.vstack((bbox_targets_blob, bbox_targets)) - bbox_inside_blob = np.vstack((bbox_inside_blob, - bbox_inside_weights)) - # all_overlaps = np.hstack((all_overlaps, overlaps)) - - # For debug visualizations - # _vis_minibatch(im_blob, rois_blob, labels_blob, all_overlaps) - - blobs['rois'] = rois_blob - blobs['labels'] = labels_blob - - if cfg.TRAIN.BBOX_REG: - blobs['bbox_targets'] = bbox_targets_blob - blobs['bbox_inside_weights'] = bbox_inside_blob - blobs['bbox_outside_weights'] = \ - np.array(bbox_inside_blob > 0).astype(np.float32) - - return blobs - - -def _sample_rois(roidb, fg_rois_per_image, rois_per_image, num_classes): - """Generate a random sample of RoIs comprising foreground and background - examples. - """ - # label = class RoI has max overlap with - labels = roidb['max_classes'] - overlaps = roidb['max_overlaps'] - rois = roidb['boxes'] - - # Select foreground RoIs as those with >= FG_THRESH overlap - fg_inds = np.where(overlaps >= cfg.TRAIN.FG_THRESH)[0] - # Guard against the case when an image has fewer than fg_rois_per_image - # foreground RoIs - fg_rois_per_this_image = np.minimum(fg_rois_per_image, fg_inds.size) - # Sample foreground regions without replacement - if fg_inds.size > 0: - fg_inds = npr.choice( - fg_inds, size=fg_rois_per_this_image, replace=False) - - # Select background RoIs as those within [BG_THRESH_LO, BG_THRESH_HI) - bg_inds = np.where((overlaps < cfg.TRAIN.BG_THRESH_HI) & - (overlaps >= cfg.TRAIN.BG_THRESH_LO))[0] - # Compute number of background RoIs to take from this image (guarding - # against there being fewer than desired) - bg_rois_per_this_image = rois_per_image - fg_rois_per_this_image - bg_rois_per_this_image = np.minimum(bg_rois_per_this_image, bg_inds.size) - # Sample foreground regions without replacement - if bg_inds.size > 0: - bg_inds = npr.choice( - bg_inds, size=bg_rois_per_this_image, replace=False) - - # The indices that we're selecting (both fg and bg) - keep_inds = np.append(fg_inds, bg_inds) - # Select sampled values from various arrays: - labels = labels[keep_inds] - # Clamp labels for the background RoIs to 0 - labels[fg_rois_per_this_image:] = 0 - overlaps = overlaps[keep_inds] - rois = rois[keep_inds] - - bbox_targets, bbox_inside_weights = _get_bbox_regression_labels( - roidb['bbox_targets'][keep_inds, :], num_classes) - - return labels, overlaps, rois, bbox_targets, bbox_inside_weights - - -def _get_image_blob(roidb, scale_inds): - """Builds an input blob from the images in the roidb at the specified - scales. - """ - num_images = len(roidb) - processed_ims = [] - im_scales = [] - for i in range(num_images): - im = cv2.imread(roidb[i]['image']) - if roidb[i]['flipped']: - im = im[:, ::-1, :] - target_size = cfg.TRAIN.SCALES[scale_inds[i]] - im, im_scale = prep_im_for_blob(im, cfg.PIXEL_MEANS, target_size, - cfg.TRAIN.MAX_SIZE) - im_scales.append(im_scale) - processed_ims.append(im) - - # Create a blob to hold the input images - blob = im_list_to_blob(processed_ims) - - return blob, im_scales - - -def _project_im_rois(im_rois, im_scale_factor): - """Project image RoIs into the rescaled training image.""" - rois = im_rois * im_scale_factor - return rois - - -def _get_bbox_regression_labels(bbox_target_data, num_classes): - """Bounding-box regression targets are stored in a compact form in the - roidb. - - This function expands those targets into the 4-of-4*K representation used - by the network (i.e. only one class has non-zero targets). The loss weights - are similarly expanded. - - Returns: - bbox_target_data (ndarray): N x 4K blob of regression targets - bbox_inside_weights (ndarray): N x 4K blob of loss weights - """ - clss = bbox_target_data[:, 0] - bbox_targets = np.zeros((clss.size, 4 * num_classes), dtype=np.float32) - bbox_inside_weights = np.zeros(bbox_targets.shape, dtype=np.float32) - inds = np.where(clss > 0)[0] - for ind in inds: - cls = clss[ind] - start = 4 * cls - end = start + 4 - bbox_targets[ind, start:end] = bbox_target_data[ind, 1:] - bbox_inside_weights[ind, start:end] = cfg.TRAIN.BBOX_INSIDE_WEIGHTS - return bbox_targets, bbox_inside_weights - - -def _vis_minibatch(im_blob, rois_blob, labels_blob, overlaps): - """Visualize a mini-batch for debugging.""" - import matplotlib.pyplot as plt - for i in range(rois_blob.shape[0]): - rois = rois_blob[i, :] - im_ind = rois[0] - roi = rois[1:] - im = im_blob[im_ind, :, :, :].transpose((1, 2, 0)).copy() - im += cfg.PIXEL_MEANS - im = im[:, :, (2, 1, 0)] - im = im.astype(np.uint8) - cls = labels_blob[i] - plt.imshow(im) - print('class: ', cls, ' overlap: ', overlaps[i]) - plt.gca().add_patch( - plt.Rectangle( - (roi[0], roi[1]), - roi[2] - roi[0], - roi[3] - roi[1], - fill=False, - edgecolor='r', - linewidth=3)) - plt.show() -import PIL -import numpy as np - -from ..fast_rcnn.bbox_transform import bbox_transform -from ..fast_rcnn.config import cfg -from ..utils.bbox import bbox_overlaps - - -def prepare_roidb(imdb): - """Enrich the imdb's roidb by adding some derived quantities that - are useful for training. This function precomputes the maximum - overlap, taken over ground-truth boxes, between each ROI and - each ground-truth box. The class with maximum overlap is also - recorded. - """ - sizes = [ - PIL.Image.open(imdb.image_path_at(i)).size - for i in range(imdb.num_images) - ] - roidb = imdb.roidb - for i in range(len(imdb.image_index)): - roidb[i]['image'] = imdb.image_path_at(i) - roidb[i]['width'] = sizes[i][0] - roidb[i]['height'] = sizes[i][1] - # need gt_overlaps as a dense array for argmax - gt_overlaps = roidb[i]['gt_overlaps'].toarray() - # max overlap with gt over classes (columns) - max_overlaps = gt_overlaps.max(axis=1) - # gt class that had the max overlap - max_classes = gt_overlaps.argmax(axis=1) - roidb[i]['max_classes'] = max_classes - roidb[i]['max_overlaps'] = max_overlaps - # sanity checks - # max overlap of 0 => class should be zero (background) - zero_inds = np.where(max_overlaps == 0)[0] - assert all(max_classes[zero_inds] == 0) - # max overlap > 0 => class should not be zero (must be a fg class) - nonzero_inds = np.where(max_overlaps > 0)[0] - assert all(max_classes[nonzero_inds] != 0) - - -def add_bbox_regression_targets(roidb): - """ - Add information needed to train bounding-box regressors. - For each roi find the corresponding gt box, and compute the distance. - then normalize the distance into Gaussian by minus mean and divided by std - """ - assert len(roidb) > 0 - assert 'max_classes' in roidb[0], 'Did you call prepare_roidb first?' - - num_images = len(roidb) - # Infer number of classes from the number of columns in gt_overlaps - num_classes = roidb[0]['gt_overlaps'].shape[1] - for im_i in range(num_images): - rois = roidb[im_i]['boxes'] - max_overlaps = roidb[im_i]['max_overlaps'] - max_classes = roidb[im_i]['max_classes'] - roidb[im_i]['bbox_targets'] = \ - _compute_targets(rois, max_overlaps, max_classes) - - if cfg.TRAIN.BBOX_NORMALIZE_TARGETS_PRECOMPUTED: - # Use fixed / precomputed "means" and "stds" instead of empirical values - means = np.tile( - np.array(cfg.TRAIN.BBOX_NORMALIZE_MEANS), (num_classes, 1)) - stds = np.tile( - np.array(cfg.TRAIN.BBOX_NORMALIZE_STDS), (num_classes, 1)) - else: - # Compute values needed for means and stds - # var(x) = E(x^2) - E(x)^2 - class_counts = np.zeros((num_classes, 1)) + cfg.EPS - sums = np.zeros((num_classes, 4)) - squared_sums = np.zeros((num_classes, 4)) - for im_i in range(num_images): - targets = roidb[im_i]['bbox_targets'] - for cls in range(1, num_classes): - cls_inds = np.where(targets[:, 0] == cls)[0] - if cls_inds.size > 0: - class_counts[cls] += cls_inds.size - sums[cls, :] += targets[cls_inds, 1:].sum(axis=0) - squared_sums[cls, :] += \ - (targets[cls_inds, 1:] ** 2).sum(axis=0) - - means = sums / class_counts - stds = np.sqrt(squared_sums / class_counts - means**2) - # too small number will cause nan error - assert np.min(stds) < 0.01, \ - 'Boxes std is too small, std:{}'.format(stds) - - print('bbox target means:') - print(means) - print(means[1:, :].mean(axis=0)) # ignore bg class - print('bbox target stdevs:') - print(stds) - print(stds[1:, :].mean(axis=0)) # ignore bg class - - # Normalize targets - if cfg.TRAIN.BBOX_NORMALIZE_TARGETS: - print("Normalizing targets") - for im_i in range(num_images): - targets = roidb[im_i]['bbox_targets'] - for cls in range(1, num_classes): - cls_inds = np.where(targets[:, 0] == cls)[0] - roidb[im_i]['bbox_targets'][cls_inds, 1:] -= means[cls, :] - roidb[im_i]['bbox_targets'][cls_inds, 1:] /= stds[cls, :] - else: - print("NOT normalizing targets") - - # These values will be needed for making predictions - # (the predicts will need to be unnormalized and uncentered) - return means.ravel(), stds.ravel() - - -def _compute_targets(rois, overlaps, labels): - """ - Compute bounding-box regression targets for an image. - for each roi find the corresponding gt_box, then compute the distance. - """ - # Indices of ground-truth ROIs - gt_inds = np.where(overlaps == 1)[0] - if len(gt_inds) == 0: - # Bail if the image has no ground-truth ROIs - return np.zeros((rois.shape[0], 5), dtype=np.float32) - # Indices of examples for which we try to make predictions - ex_inds = np.where(overlaps >= cfg.TRAIN.BBOX_THRESH)[0] - - # Get IoU overlap between each ex ROI and gt ROI - ex_gt_overlaps = bbox_overlaps( - np.ascontiguousarray(rois[ex_inds, :], dtype=np.float), - np.ascontiguousarray(rois[gt_inds, :], dtype=np.float)) - - # Find which gt ROI each ex ROI has max overlap with: - # this will be the ex ROI's gt target - gt_assignment = ex_gt_overlaps.argmax(axis=1) - gt_rois = rois[gt_inds[gt_assignment], :] - ex_rois = rois[ex_inds, :] - - targets = np.zeros((rois.shape[0], 5), dtype=np.float32) - targets[ex_inds, 0] = labels[ex_inds] - targets[ex_inds, 1:] = bbox_transform(ex_rois, gt_rois) - return targets -# -*- coding:utf-8 -*- -import numpy as np -import numpy.random as npr - -from .generate_anchors import generate_anchors -from ..fast_rcnn.bbox_transform import bbox_transform -from ..fast_rcnn.config import cfg -from ..utils.bbox import bbox_overlaps, bbox_intersections - -DEBUG = False - - -def anchor_target_layer(rpn_cls_score, gt_boxes, gt_ishard, dontcare_areas, im_info, _feat_stride=[16, ], - anchor_scales=[16, ]): - """ - Assign anchors to ground-truth targets. Produces anchor classification - labels and bounding-box regression targets. - Parameters - ---------- - rpn_cls_score: (1, H, W, Ax2) bg/fg scores of previous conv layer - gt_boxes: (G, 5) vstack of [x1, y1, x2, y2, class] - gt_ishard: (G, 1), 1 or 0 indicates difficult or not - dontcare_areas: (D, 4), some areas may contains small objs but no labelling. D may be 0 - im_info: a list of [image_height, image_width, scale_ratios] - _feat_stride: the downsampling ratio of feature map to the original input image - anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) - ---------- - Returns - ---------- - rpn_labels : (HxWxA, 1), for each anchor, 0 denotes bg, 1 fg, -1 dontcare - rpn_bbox_targets: (HxWxA, 4), distances of the anchors to the gt_boxes(may contains some transform) - that are the regression objectives - rpn_bbox_inside_weights: (HxWxA, 4) weights of each boxes, mainly accepts hyper param in cfg - rpn_bbox_outside_weights: (HxWxA, 4) used to balance the fg/bg, - beacuse the numbers of bgs and fgs mays significiantly different - """ - _anchors = generate_anchors(scales=np.array( - anchor_scales)) # 生成基本的anchor,一共9个 - _num_anchors = _anchors.shape[0] # 9个anchor - - if DEBUG: - print('anchors:') - print(_anchors) - print('anchor shapes:') - print(np.hstack(( - _anchors[:, 2::4] - _anchors[:, 0::4], - _anchors[:, 3::4] - _anchors[:, 1::4], - ))) - _counts = cfg.EPS - _sums = np.zeros((1, 4)) - _squared_sums = np.zeros((1, 4)) - _fg_sum = 0 - _bg_sum = 0 - _count = 0 - - # allow boxes to sit over the edge by a small amount - _allowed_border = 0 - # map of shape (..., H, W) - # height, width = rpn_cls_score.shape[1:3] - - im_info = im_info[0] # 图像的高宽及通道数 - - # 在feature-map上定位anchor,并加上delta,得到在实际图像中anchor的真实坐标 - # Algorithm: - # for each (H, W) location i - # generate 9 anchor boxes centered on cell i - # apply predicted bbox deltas at cell i to each of the 9 anchors - # filter out-of-image anchors - # measure GT overlap - - assert rpn_cls_score.shape[0] == 1, \ - 'Only single item batches are supported' - - # map of shape (..., H, W) - height, width = rpn_cls_score.shape[1:3] # feature-map的高宽 - - if DEBUG: - print('AnchorTargetLayer: height', height, 'width', width) - print('') - print('im_size: ({}, {})'.format(im_info[0], im_info[1])) - print('scale: {}'.format(im_info[2])) - print('height, width: ({}, {})'.format(height, width)) - print('rpn: gt_boxes.shape', gt_boxes.shape) - print('rpn: gt_boxes', gt_boxes) - - # 1. Generate proposals from bbox deltas and shifted anchors - shift_x = np.arange(0, width) * _feat_stride - shift_y = np.arange(0, height) * _feat_stride - shift_x, shift_y = np.meshgrid(shift_x, shift_y) # in W H order - # K is H x W - shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), - shift_x.ravel(), shift_y.ravel())).transpose() # 生成feature-map和真实image上anchor之间的偏移量 - # add A anchors (1, A, 4) to - # cell K shifts (K, 1, 4) to get - # shift anchors (K, A, 4) - # reshape to (K*A, 4) shifted anchors - A = _num_anchors # 9个anchor - K = shifts.shape[0] # 50*37,feature-map的宽乘高的大小 - all_anchors = (_anchors.reshape((1, A, 4)) + - shifts.reshape((1, K, 4)).transpose((1, 0, 2))) # 相当于复制宽高的维度,然后相加 - all_anchors = all_anchors.reshape((K * A, 4)) - total_anchors = int(K * A) - - # only keep anchors inside the image - # 仅保留那些还在图像内部的anchor,超出图像的都删掉 - inds_inside = np.where( - (all_anchors[:, 0] >= -_allowed_border) & - (all_anchors[:, 1] >= -_allowed_border) & - (all_anchors[:, 2] < im_info[1] + _allowed_border) & # width - (all_anchors[:, 3] < im_info[0] + _allowed_border) # height - )[0] - - if DEBUG: - print('total_anchors', total_anchors) - print('inds_inside', len(inds_inside)) - - # keep only inside anchors - anchors = all_anchors[inds_inside, :] # 保留那些在图像内的anchor - if DEBUG: - print('anchors.shape', anchors.shape) - - # 至此,anchor准备好了 - # -------------------------------------------------------------- - # label: 1 is positive, 0 is negative, -1 is dont care - # (A) - labels = np.empty((len(inds_inside),), dtype=np.float32) - labels.fill(-1) # 初始化label,均为-1 - - # overlaps between the anchors and the gt boxes - # overlaps (ex, gt), shape is A x G - # 计算anchor和gt-box的overlap,用来给anchor上标签 - overlaps = bbox_overlaps( - np.ascontiguousarray(anchors, dtype=np.float), - np.ascontiguousarray(gt_boxes, dtype=np.float)) # 假设anchors有x个,gt_boxes有y个,返回的是一个(x,y)的数组 - # 存放每一个anchor和每一个gtbox之间的overlap - # (A)#找到和每一个gtbox,overlap最大的那个anchor - argmax_overlaps = overlaps.argmax(axis=1) - max_overlaps = overlaps[np.arange(len(inds_inside)), argmax_overlaps] - # G#找到每个位置上9个anchor中与gtbox,overlap最大的那个 - gt_argmax_overlaps = overlaps.argmax(axis=0) - gt_max_overlaps = overlaps[gt_argmax_overlaps, - np.arange(overlaps.shape[1])] - gt_argmax_overlaps = np.where(overlaps == gt_max_overlaps)[0] - - if not cfg.TRAIN.RPN_CLOBBER_POSITIVES: - # assign bg labels first so that positive labels can clobber them - # 先给背景上标签,小于0.3overlap的 - labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 - - # fg label: for each gt, anchor with highest overlap - labels[gt_argmax_overlaps] = 1 # 每个位置上的9个anchor中overlap最大的认为是前景 - # fg label: above threshold IOU - # overlap大于0.7的认为是前景 - labels[max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = 1 - - if cfg.TRAIN.RPN_CLOBBER_POSITIVES: - # assign bg labels last so that negative labels can clobber positives - labels[max_overlaps < cfg.TRAIN.RPN_NEGATIVE_OVERLAP] = 0 - - # preclude dontcare areas - # 这里我们暂时不考虑有doncare_area的存在 - if dontcare_areas is not None and dontcare_areas.shape[0] > 0: - # intersec shape is D x A - intersecs = bbox_intersections( - np.ascontiguousarray(dontcare_areas, dtype=np.float), # D x 4 - np.ascontiguousarray(anchors, dtype=np.float) # A x 4 - ) - intersecs_ = intersecs.sum(axis=0) # A x 1 - labels[intersecs_ > cfg.TRAIN.DONTCARE_AREA_INTERSECTION_HI] = -1 - - # 这里我们暂时不考虑难样本的问题 - # preclude hard samples that are highly occlusioned, truncated or difficult to see - if cfg.TRAIN.PRECLUDE_HARD_SAMPLES and gt_ishard is not None and gt_ishard.shape[0] > 0: - assert gt_ishard.shape[0] == gt_boxes.shape[0] - gt_ishard = gt_ishard.astype(int) - gt_hardboxes = gt_boxes[gt_ishard == 1, :] - if gt_hardboxes.shape[0] > 0: - # H x A - hard_overlaps = bbox_overlaps( - np.ascontiguousarray(gt_hardboxes, dtype=np.float), # H x 4 - np.ascontiguousarray(anchors, dtype=np.float)) # A x 4 - hard_max_overlaps = hard_overlaps.max(axis=0) # (A) - labels[hard_max_overlaps >= cfg.TRAIN.RPN_POSITIVE_OVERLAP] = -1 - max_intersec_label_inds = hard_overlaps.argmax(axis=1) # H x 1 - labels[max_intersec_label_inds] = -1 # - - # subsample positive labels if we have too many - # 对正样本进行采样,如果正样本的数量太多的话 - # 限制正样本的数量不超过128个 - # TODO 这个后期可能还需要修改,毕竟如果使用的是字符的片段,那个正样本的数量是很多的。 - num_fg = int(cfg.TRAIN.RPN_FG_FRACTION * cfg.TRAIN.RPN_BATCHSIZE) - fg_inds = np.where(labels == 1)[0] - if len(fg_inds) > num_fg: - disable_inds = npr.choice( - fg_inds, size=(len(fg_inds) - num_fg), replace=False) # 随机去除掉一些正样本 - labels[disable_inds] = -1 # 变为-1 - - # subsample negative labels if we have too many - # 对负样本进行采样,如果负样本的数量太多的话 - # 正负样本总数是256,限制正样本数目最多128, - # 如果正样本数量小于128,差的那些就用负样本补上,凑齐256个样本 - num_bg = cfg.TRAIN.RPN_BATCHSIZE - np.sum(labels == 1) - bg_inds = np.where(labels == 0)[0] - if len(bg_inds) > num_bg: - disable_inds = npr.choice( - bg_inds, size=(len(bg_inds) - num_bg), replace=False) - labels[disable_inds] = -1 - # print "was %s inds, disabling %s, now %s inds" % ( - # len(bg_inds), len(disable_inds), np.sum(labels == 0)) - - # 至此, 上好标签,开始计算rpn-box的真值 - # -------------------------------------------------------------- - bbox_targets = np.zeros((len(inds_inside), 4), dtype=np.float32) - # 根据anchor和gtbox计算得真值(anchor和gtbox之间的偏差) - bbox_targets = _compute_targets(anchors, gt_boxes[argmax_overlaps, :]) - - bbox_inside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) - bbox_inside_weights[labels == 1, :] = np.array( - cfg.TRAIN.RPN_BBOX_INSIDE_WEIGHTS) # 内部权重,前景就给1,其他是0 - - bbox_outside_weights = np.zeros((len(inds_inside), 4), dtype=np.float32) - if cfg.TRAIN.RPN_POSITIVE_WEIGHT < 0: # 暂时使用uniform 权重,也就是正样本是1,负样本是0 - # uniform weighting of examples (given non-uniform sampling) - num_examples = np.sum(labels >= 0) + 1 - # positive_weights = np.ones((1, 4)) * 1.0 / num_examples - # negative_weights = np.ones((1, 4)) * 1.0 / num_examples - positive_weights = np.ones((1, 4)) - negative_weights = np.zeros((1, 4)) - else: - assert ((cfg.TRAIN.RPN_POSITIVE_WEIGHT > 0) & - (cfg.TRAIN.RPN_POSITIVE_WEIGHT < 1)) - positive_weights = (cfg.TRAIN.RPN_POSITIVE_WEIGHT / - (np.sum(labels == 1)) + 1) - negative_weights = ((1.0 - cfg.TRAIN.RPN_POSITIVE_WEIGHT) / - (np.sum(labels == 0)) + 1) - bbox_outside_weights[labels == 1, :] = positive_weights # 外部权重,前景是1,背景是0 - bbox_outside_weights[labels == 0, :] = negative_weights - - if DEBUG: - _sums += bbox_targets[labels == 1, :].sum(axis=0) - _squared_sums += (bbox_targets[labels == 1, :] ** 2).sum(axis=0) - _counts += np.sum(labels == 1) - means = _sums / _counts - stds = np.sqrt(_squared_sums / _counts - means ** 2) - print('means:') - print(means) - print('stdevs:') - print(stds) - - # map up to original set of anchors - # 一开始是将超出图像范围的anchor直接丢掉的,现在在加回来 - labels = _unmap(labels, total_anchors, inds_inside, - fill=-1) # 这些anchor的label是-1,也即dontcare - bbox_targets = _unmap(bbox_targets, total_anchors, - inds_inside, fill=0) # 这些anchor的真值是0,也即没有值 - bbox_inside_weights = _unmap( - bbox_inside_weights, total_anchors, inds_inside, fill=0) # 内部权重以0填充 - bbox_outside_weights = _unmap( - bbox_outside_weights, total_anchors, inds_inside, fill=0) # 外部权重以0填充 - - if DEBUG: - print('rpn: max max_overlap', np.max(max_overlaps)) - print('rpn: num_positive', np.sum(labels == 1)) - print('rpn: num_negative', np.sum(labels == 0)) - _fg_sum += np.sum(labels == 1) - _bg_sum += np.sum(labels == 0) - _count += 1 - print('rpn: num_positive avg', _fg_sum / _count) - print('rpn: num_negative avg', _bg_sum / _count) - - # labels - labels = labels.reshape((1, height, width, A)) # reshap一下label - rpn_labels = labels - - # bbox_targets - bbox_targets = bbox_targets \ - .reshape((1, height, width, A * 4)) # reshape - - rpn_bbox_targets = bbox_targets - # bbox_inside_weights - bbox_inside_weights = bbox_inside_weights \ - .reshape((1, height, width, A * 4)) - - rpn_bbox_inside_weights = bbox_inside_weights - - # bbox_outside_weights - bbox_outside_weights = bbox_outside_weights \ - .reshape((1, height, width, A * 4)) - rpn_bbox_outside_weights = bbox_outside_weights - - return rpn_labels, rpn_bbox_targets, rpn_bbox_inside_weights, rpn_bbox_outside_weights - - -def _unmap(data, count, inds, fill=0): - """ Unmap a subset of item (data) back to the original set of items (of - size count) """ - if len(data.shape) == 1: - ret = np.empty((count,), dtype=np.float32) - ret.fill(fill) - ret[inds] = data - else: - ret = np.empty((count,) + data.shape[1:], dtype=np.float32) - ret.fill(fill) - ret[inds, :] = data - return ret - - -def _compute_targets(ex_rois, gt_rois): - """Compute bounding-box regression targets for an image.""" - - assert ex_rois.shape[0] == gt_rois.shape[0] - assert ex_rois.shape[1] == 4 - assert gt_rois.shape[1] == 5 - - return bbox_transform(ex_rois, gt_rois[:, :4]).astype(np.float32, copy=False) -import numpy as np - - -def generate_basic_anchors(sizes, base_size=16): - base_anchor = np.array([0, 0, base_size - 1, base_size - 1], np.int32) - anchors = np.zeros((len(sizes), 4), np.int32) - index = 0 - for h, w in sizes: - anchors[index] = scale_anchor(base_anchor, h, w) - index += 1 - return anchors - - -def scale_anchor(anchor, h, w): - x_ctr = (anchor[0] + anchor[2]) * 0.5 - y_ctr = (anchor[1] + anchor[3]) * 0.5 - scaled_anchor = anchor.copy() - scaled_anchor[0] = x_ctr - w / 2 # xmin - scaled_anchor[2] = x_ctr + w / 2 # xmax - scaled_anchor[1] = y_ctr - h / 2 # ymin - scaled_anchor[3] = y_ctr + h / 2 # ymax - return scaled_anchor - - -def generate_anchors(base_size=16, ratios=[0.5, 1, 2], - scales=2 ** np.arange(3, 6)): - heights = [11, 16, 23, 33, 48, 68, 97, 139, 198, 283] - widths = [16] - sizes = [] - for h in heights: - for w in widths: - sizes.append((h, w)) - return generate_basic_anchors(sizes) - - -if __name__ == '__main__': - import time - - t = time.time() - a = generate_anchors() - print(time.time() - t) - print(a) - from IPython import embed - - embed() -# -*- coding:utf-8 -*- -import numpy as np - -from .generate_anchors import generate_anchors -from ..fast_rcnn.bbox_transform import bbox_transform_inv, clip_boxes -from ..fast_rcnn.config import cfg -from ..fast_rcnn.nms_wrapper import nms - -DEBUG = False -# DEBUG=True -""" -Outputs object detection proposals by applying estimated bounding-box -transformations to a set of regular boxes (called "anchors"). -""" - - -def proposal_layer(rpn_cls_prob_reshape, rpn_bbox_pred, im_info, cfg_key, _feat_stride=[16, ], anchor_scales=[16, ]): - """ - Parameters - ---------- - rpn_cls_prob_reshape: (1 , H , W , Ax2) outputs of RPN, prob of bg or fg - NOTICE: the old version is ordered by (1, H, W, 2, A) !!!! - rpn_bbox_pred: (1 , H , W , Ax4), rgs boxes output of RPN - im_info: a list of [image_height, image_width, scale_ratios] - cfg_key: 'TRAIN' or 'TEST' - _feat_stride: the downsampling ratio of feature map to the original input image - anchor_scales: the scales to the basic_anchor (basic anchor is [16, 16]) - ---------- - Returns - ---------- - rpn_rois : (1 x H x W x A, 5) e.g. [0, x1, y1, x2, y2] - - # Algorithm: - # - # for each (H, W) location i - # generate A anchor boxes centered on cell i - # apply predicted bbox deltas at cell i to each of the A anchors - # clip predicted boxes to image - # remove predicted boxes with either height or width < threshold - # sort all (proposal, score) pairs by score from highest to lowest - # take top pre_nms_topN proposals before NMS - # apply NMS with threshold 0.7 to remaining proposals - # take after_nms_topN proposals after NMS - # return the top proposals (-> RoIs top, scores top) - #layer_params = yaml.load(self.param_str_) - - """ - cfg_key = cfg_key.decode('ascii') - _anchors = generate_anchors( - scales=np.array(anchor_scales)) # 生成基本的9个anchor - # print('anchors', _anchors) - _num_anchors = _anchors.shape[0] # 9个anchor - - im_info = im_info[0] # 原始图像的高宽、缩放尺度 - - assert rpn_cls_prob_reshape.shape[0] == 1, \ - 'Only single item batches are supported' - - pre_nms_topN = cfg[cfg_key].RPN_PRE_NMS_TOP_N # 12000,在做nms之前,最多保留的候选box数目 - post_nms_topN = cfg[cfg_key].RPN_POST_NMS_TOP_N # 2000,做完nms之后,最多保留的box的数目 - nms_thresh = cfg[cfg_key].RPN_NMS_THRESH # nms用参数,阈值是0.7 - min_size = cfg[cfg_key].RPN_MIN_SIZE # 候选box的最小尺寸,目前是16,高宽均要大于16 - # TODO 后期需要修改这个最小尺寸,改为8? - - height, width = rpn_cls_prob_reshape.shape[1:3] # feature-map的高宽 - - # the first set of _num_anchors channels are bg probs - # the second set are the fg probs, which we want - # (1, H, W, A) - scores = np.reshape(np.reshape(rpn_cls_prob_reshape, [1, height, width, _num_anchors, 2])[:, :, :, :, 1], - [1, height, width, _num_anchors]) - # 提取到object的分数,non-object的我们不关心 - # 并reshape到1*H*W*9 - - bbox_deltas = rpn_bbox_pred # 模型输出的pred是相对值,需要进一步处理成真实图像中的坐标 - # im_info = bottom[2].data[0, :] - - if DEBUG: - print('im_size: ({}, {})'.format(im_info[0], im_info[1])) - print('scale: {}'.format(im_info[2])) - - # 1. Generate proposals from bbox deltas and shifted anchors - if DEBUG: - print('score map size: {}'.format(scores.shape)) - - # Enumerate all shifts - # 同anchor-target-layer-tf这个文件一样,生成anchor的shift,进一步得到整张图像上的所有anchor - shift_x = np.arange(0, width) * _feat_stride - shift_y = np.arange(0, height) * _feat_stride - shift_x, shift_y = np.meshgrid(shift_x, shift_y) - shifts = np.vstack((shift_x.ravel(), shift_y.ravel(), - shift_x.ravel(), shift_y.ravel())).transpose() - - # Enumerate all shifted anchors: - # - # add A anchors (1, A, 4) to - # cell K shifts (K, 1, 4) to get - # shift anchors (K, A, 4) - # reshape to (K*A, 4) shifted anchors - A = _num_anchors - K = shifts.shape[0] - anchors = _anchors.reshape((1, A, 4)) + \ - shifts.reshape((1, K, 4)).transpose((1, 0, 2)) - anchors = anchors.reshape((K * A, 4)) # 这里得到的anchor就是整张图像上的所有anchor - - # Transpose and reshape predicted bbox transformations to get them - # into the same order as the anchors: - # bbox deltas will be (1, 4 * A, H, W) format - # transpose to (1, H, W, 4 * A) - # reshape to (1 * H * W * A, 4) where rows are ordered by (h, w, a) - # in slowest to fastest order - bbox_deltas = bbox_deltas.reshape((-1, 4)) # (HxWxA, 4) - - # Same story for the scores: - scores = scores.reshape((-1, 1)) - - # Convert anchors into proposals via bbox transformations - proposals = bbox_transform_inv(anchors, bbox_deltas) # 做逆变换,得到box在图像上的真实坐标 - - # 2. clip predicted boxes to image - # 将所有的proposal修建一下,超出图像范围的将会被修剪掉 - proposals = clip_boxes(proposals, im_info[:2]) - - # 3. remove predicted boxes with either height or width < threshold - # (NOTE: convert min_size to input image scale stored in im_info[2]) - # 移除那些proposal小于一定尺寸的proposal - keep = _filter_boxes(proposals, min_size * im_info[2]) - proposals = proposals[keep, :] # 保留剩下的proposal - scores = scores[keep] - bbox_deltas = bbox_deltas[keep, :] - - # # remove irregular boxes, too fat too tall - # keep = _filter_irregular_boxes(proposals) - # proposals = proposals[keep, :] - # scores = scores[keep] - - # 4. sort all (proposal, score) pairs by score from highest to lowest - # 5. take top pre_nms_topN (e.g. 6000) - order = scores.ravel().argsort()[::-1] # score按得分的高低进行排序 - if pre_nms_topN > 0: # 保留12000个proposal进去做nms - order = order[:pre_nms_topN] - proposals = proposals[order, :] - scores = scores[order] - bbox_deltas = bbox_deltas[order, :] - - # 6. apply nms (e.g. threshold = 0.7) - # 7. take after_nms_topN (e.g. 300) - # 8. return the top proposals (-> RoIs top) - keep = nms(np.hstack((proposals, scores)), - nms_thresh) # 进行nms操作,保留2000个proposal - if post_nms_topN > 0: - keep = keep[:post_nms_topN] - proposals = proposals[keep, :] - scores = scores[keep] - bbox_deltas = bbox_deltas[keep, :] - - # Output rois blob - # Our RPN implementation only supports a single input image, so all - # batch inds are 0 - blob = np.hstack((scores.astype(np.float32, copy=False), - proposals.astype(np.float32, copy=False))) - - return blob, bbox_deltas - - -def _filter_boxes(boxes, min_size): - """Remove all boxes with any side smaller than min_size.""" - ws = boxes[:, 2] - boxes[:, 0] + 1 - hs = boxes[:, 3] - boxes[:, 1] + 1 - keep = np.where((ws >= min_size) & (hs >= min_size))[0] - return keep - - -def _filter_irregular_boxes(boxes, min_ratio=0.2, max_ratio=5): - """Remove all boxes with any side smaller than min_size.""" - ws = boxes[:, 2] - boxes[:, 0] + 1 - hs = boxes[:, 3] - boxes[:, 1] + 1 - rs = ws / hs - keep = np.where((rs <= max_ratio) & (rs >= min_ratio))[0] - return keep -from .detectors import TextDetector -from .text_connect_cfg import Config -# coding:utf-8 -import numpy as np -from lib.fast_rcnn.nms_wrapper import nms -from lib.fast_rcnn.config import cfg -from .text_proposal_connector import TextProposalConnector -from .text_proposal_connector_oriented import TextProposalConnector as TextProposalConnectorOriented -from .text_connect_cfg import Config as TextLineCfg - - -class TextDetector: - def __init__(self): - self.mode = cfg.TEST.DETECT_MODE - if self.mode == "H": - self.text_proposal_connector = TextProposalConnector() - elif self.mode == "O": - self.text_proposal_connector = TextProposalConnectorOriented() - - def detect(self, text_proposals, scores, size): - # 删除得分较低的proposal - keep_inds = np.where(scores > TextLineCfg.TEXT_PROPOSALS_MIN_SCORE)[0] - text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] - - # 按得分排序 - sorted_indices = np.argsort(scores.ravel())[::-1] - text_proposals, scores = text_proposals[sorted_indices], scores[sorted_indices] - - # 对proposal做nms - keep_inds = nms(np.hstack((text_proposals, scores)), - TextLineCfg.TEXT_PROPOSALS_NMS_THRESH) - text_proposals, scores = text_proposals[keep_inds], scores[keep_inds] - - # 获取检测结果 - text_recs = self.text_proposal_connector.get_text_lines( - text_proposals, scores, size) - keep_inds = self.filter_boxes(text_recs) - return text_recs[keep_inds] - - def filter_boxes(self, boxes): - heights = np.zeros((len(boxes), 1), np.float) - widths = np.zeros((len(boxes), 1), np.float) - scores = np.zeros((len(boxes), 1), np.float) - index = 0 - for box in boxes: - heights[index] = (abs(box[5]-box[1])+abs(box[7]-box[3]))/2.0+1 - widths[index] = (abs(box[2]-box[0])+abs(box[6]-box[4]))/2.0+1 - scores[index] = box[8] - index += 1 - - return np.where((widths/heights > TextLineCfg.MIN_RATIO) & (scores > TextLineCfg.LINE_MIN_SCORE) & - (widths > (TextLineCfg.TEXT_PROPOSALS_WIDTH*TextLineCfg.MIN_NUM_PROPOSALS)))[0] -import numpy as np - - -def threshold(coords, min_, max_): - return np.maximum(np.minimum(coords, max_), min_) - - -def clip_boxes(boxes, im_shape): - """ - Clip boxes to image boundaries. - """ - boxes[:, 0::2] = threshold(boxes[:, 0::2], 0, im_shape[1]-1) - boxes[:, 1::2] = threshold(boxes[:, 1::2], 0, im_shape[0]-1) - return boxes - - -class Graph: - def __init__(self, graph): - self.graph = graph - - def sub_graphs_connected(self): - sub_graphs = [] - for index in range(self.graph.shape[0]): - if not self.graph[:, index].any() and self.graph[index, :].any(): - v = index - sub_graphs.append([v]) - while self.graph[v, :].any(): - v = np.where(self.graph[v, :])[0][0] - sub_graphs[-1].append(v) - return sub_graphs -class Config: - SCALE = 600 - MAX_SCALE = 1200 - TEXT_PROPOSALS_WIDTH = 16 - MIN_NUM_PROPOSALS = 2 - MIN_RATIO = 0.5 - LINE_MIN_SCORE = 0.9 - MAX_HORIZONTAL_GAP = 50 - TEXT_PROPOSALS_MIN_SCORE = 0.7 - TEXT_PROPOSALS_NMS_THRESH = 0.2 - MIN_V_OVERLAPS = 0.7 - MIN_SIZE_SIM = 0.7 -import numpy as np -from .other import clip_boxes -from .text_proposal_graph_builder import TextProposalGraphBuilder - - -class TextProposalConnector: - def __init__(self): - self.graph_builder = TextProposalGraphBuilder() - - def group_text_proposals(self, text_proposals, scores, im_size): - graph = self.graph_builder.build_graph(text_proposals, scores, im_size) - return graph.sub_graphs_connected() - - def fit_y(self, X, Y, x1, x2): - len(X) != 0 - # if X only include one point, the function will get line y=Y[0] - if np.sum(X == X[0]) == len(X): - return Y[0], Y[0] - p = np.poly1d(np.polyfit(X, Y, 1)) - return p(x1), p(x2) - - def get_text_lines(self, text_proposals, scores, im_size): - # tp=text proposal - tp_groups = self.group_text_proposals(text_proposals, scores, im_size) - text_lines = np.zeros((len(tp_groups), 5), np.float32) - - for index, tp_indices in enumerate(tp_groups): - text_line_boxes = text_proposals[list(tp_indices)] - - x0 = np.min(text_line_boxes[:, 0]) - x1 = np.max(text_line_boxes[:, 2]) - - offset = (text_line_boxes[0, 2]-text_line_boxes[0, 0])*0.5 - - lt_y, rt_y = self.fit_y( - text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset) - lb_y, rb_y = self.fit_y( - text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset) - - # the score of a text line is the average score of the scores - # of all text proposals contained in the text line - score = scores[list(tp_indices)].sum()/float(len(tp_indices)) - - text_lines[index, 0] = x0 - text_lines[index, 1] = min(lt_y, rt_y) - text_lines[index, 2] = x1 - text_lines[index, 3] = max(lb_y, rb_y) - text_lines[index, 4] = score - - text_lines = clip_boxes(text_lines, im_size) - - text_recs = np.zeros((len(text_lines), 9), np.float) - index = 0 - for line in text_lines: - xmin, ymin, xmax, ymax = line[0], line[1], line[2], line[3] - text_recs[index, 0] = xmin - text_recs[index, 1] = ymin - text_recs[index, 2] = xmax - text_recs[index, 3] = ymin - text_recs[index, 4] = xmin - text_recs[index, 5] = ymax - text_recs[index, 6] = xmax - text_recs[index, 7] = ymax - text_recs[index, 8] = line[4] - index = index + 1 - - return text_recs -# coding:utf-8 -import numpy as np -from .text_proposal_graph_builder import TextProposalGraphBuilder - - -class TextProposalConnector: - """ - Connect text proposals into text lines - """ - - def __init__(self): - self.graph_builder = TextProposalGraphBuilder() - - def group_text_proposals(self, text_proposals, scores, im_size): - graph = self.graph_builder.build_graph(text_proposals, scores, im_size) - return graph.sub_graphs_connected() - - def fit_y(self, X, Y, x1, x2): - len(X) != 0 - # if X only include one point, the function will get line y=Y[0] - if np.sum(X == X[0]) == len(X): - return Y[0], Y[0] - p = np.poly1d(np.polyfit(X, Y, 1)) - return p(x1), p(x2) - - def get_text_lines(self, text_proposals, scores, im_size): - """ - text_proposals:boxes - - """ - # tp=text proposal - tp_groups = self.group_text_proposals( - text_proposals, scores, im_size) # 首先还是建图,获取到文本行由哪几个小框构成 - - text_lines = np.zeros((len(tp_groups), 8), np.float32) - - for index, tp_indices in enumerate(tp_groups): - text_line_boxes = text_proposals[list(tp_indices)] # 每个文本行的全部小框 - # 求每一个小框的中心x,y坐标 - X = (text_line_boxes[:, 0] + text_line_boxes[:, 2]) / 2 - Y = (text_line_boxes[:, 1] + text_line_boxes[:, 3]) / 2 - - z1 = np.polyfit(X, Y, 1) # 多项式拟合,根据之前求的中心店拟合一条直线(最小二乘) - - x0 = np.min(text_line_boxes[:, 0]) # 文本行x坐标最小值 - x1 = np.max(text_line_boxes[:, 2]) # 文本行x坐标最大值 - - offset = (text_line_boxes[0, 2] - - text_line_boxes[0, 0])*0.5 # 小框宽度的一半 - - # 以全部小框的左上角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标 - lt_y, rt_y = self.fit_y( - text_line_boxes[:, 0], text_line_boxes[:, 1], x0+offset, x1-offset) - # 以全部小框的左下角这个点去拟合一条直线,然后计算一下文本行x坐标的极左极右对应的y坐标 - lb_y, rb_y = self.fit_y( - text_line_boxes[:, 0], text_line_boxes[:, 3], x0+offset, x1-offset) - - score = scores[list(tp_indices)].sum() / \ - float(len(tp_indices)) # 求全部小框得分的均值作为文本行的均值 - - text_lines[index, 0] = x0 - text_lines[index, 1] = min(lt_y, rt_y) # 文本行上端 线段 的y坐标的小值 - text_lines[index, 2] = x1 - text_lines[index, 3] = max(lb_y, rb_y) # 文本行下端 线段 的y坐标的大值 - text_lines[index, 4] = score # 文本行得分 - text_lines[index, 5] = z1[0] # 根据中心点拟合的直线的k,b - text_lines[index, 6] = z1[1] - height = np.mean( - (text_line_boxes[:, 3]-text_line_boxes[:, 1])) # 小框平均高度 - text_lines[index, 7] = height + 2.5 - - text_recs = np.zeros((len(text_lines), 9), np.float) - index = 0 - for line in text_lines: - b1 = line[6] - line[7] / 2 # 根据高度和文本行中心线,求取文本行上下两条线的b值 - b2 = line[6] + line[7] / 2 - x1 = line[0] - y1 = line[5] * line[0] + b1 # 左上 - x2 = line[2] - y2 = line[5] * line[2] + b1 # 右上 - x3 = line[0] - y3 = line[5] * line[0] + b2 # 左下 - x4 = line[2] - y4 = line[5] * line[2] + b2 # 右下 - disX = x2 - x1 - disY = y2 - y1 - width = np.sqrt(disX * disX + disY * disY) # 文本行宽度 - - fTmp0 = y3 - y1 # 文本行高度 - fTmp1 = fTmp0 * disY / width - x = np.fabs(fTmp1 * disX / width) # 做补偿 - y = np.fabs(fTmp1 * disY / width) - if line[5] < 0: - x1 -= x - y1 += y - x4 += x - y4 -= y - else: - x2 += x - y2 += y - x3 -= x - y3 -= y - text_recs[index, 0] = x1 - text_recs[index, 1] = y1 - text_recs[index, 2] = x2 - text_recs[index, 3] = y2 - text_recs[index, 4] = x3 - text_recs[index, 5] = y3 - text_recs[index, 6] = x4 - text_recs[index, 7] = y4 - text_recs[index, 8] = line[4] - index = index + 1 - - return text_recs -from .text_connect_cfg import Config as TextLineCfg -from .other import Graph -import numpy as np - - -class TextProposalGraphBuilder: - """ - Build Text proposals into a graph. - """ - - def get_successions(self, index): - box = self.text_proposals[index] - results = [] - for left in range(int(box[0])+1, min(int(box[0])+TextLineCfg.MAX_HORIZONTAL_GAP+1, self.im_size[1])): - adj_box_indices = self.boxes_table[left] - for adj_box_index in adj_box_indices: - if self.meet_v_iou(adj_box_index, index): - results.append(adj_box_index) - if len(results) != 0: - return results - return results - - def get_precursors(self, index): - box = self.text_proposals[index] - results = [] - for left in range(int(box[0])-1, max(int(box[0]-TextLineCfg.MAX_HORIZONTAL_GAP), 0)-1, -1): - adj_box_indices = self.boxes_table[left] - for adj_box_index in adj_box_indices: - if self.meet_v_iou(adj_box_index, index): - results.append(adj_box_index) - if len(results) != 0: - return results - return results - - def is_succession_node(self, index, succession_index): - precursors = self.get_precursors(succession_index) - if self.scores[index] >= np.max(self.scores[precursors]): - return True - return False - - def meet_v_iou(self, index1, index2): - def overlaps_v(index1, index2): - h1 = self.heights[index1] - h2 = self.heights[index2] - y0 = max(self.text_proposals[index2][1], - self.text_proposals[index1][1]) - y1 = min(self.text_proposals[index2][3], - self.text_proposals[index1][3]) - return max(0, y1-y0+1)/min(h1, h2) - - def size_similarity(index1, index2): - h1 = self.heights[index1] - h2 = self.heights[index2] - return min(h1, h2)/max(h1, h2) - - return overlaps_v(index1, index2) >= TextLineCfg.MIN_V_OVERLAPS and \ - size_similarity(index1, index2) >= TextLineCfg.MIN_SIZE_SIM - - def build_graph(self, text_proposals, scores, im_size): - self.text_proposals = text_proposals - self.scores = scores - self.im_size = im_size - self.heights = text_proposals[:, 3]-text_proposals[:, 1]+1 - - boxes_table = [[] for _ in range(self.im_size[1])] - for index, box in enumerate(text_proposals): - boxes_table[int(box[0])].append(index) - self.boxes_table = boxes_table - - graph = np.zeros( - (text_proposals.shape[0], text_proposals.shape[0]), np.bool) - - for index, box in enumerate(text_proposals): - successions = self.get_successions(index) - if len(successions) == 0: - continue - succession_index = successions[np.argmax(scores[successions])] - if self.is_succession_node(index, succession_index): - # NOTE: a box can have multiple successions(precursors) if multiple successions(precursors) - # have equal scores. - graph[index, succession_index] = True - return Graph(graph) -from . import bbox -from . import blob -from . import boxes_grid -from . import cython_nms -from . import timer - -try: - from . import gpu_nms -except: - gpu_nms = cython_nms -# -------------------------------------------------------- -# Fast R-CNN -# Copyright (c) 2015 Microsoft -# Licensed under The MIT License [see LICENSE for details] -# Written by Sergey Karayev -# -------------------------------------------------------- - -import numpy as np -cimport numpy as np - - -DTYPE = np.float -ctypedef np.float_t DTYPE_t - - -def bbox_overlaps( - np.ndarray[DTYPE_t, ndim=2] boxes, - np.ndarray[DTYPE_t, ndim=2] query_boxes): - """ - Parameters - ---------- - boxes: (N, 4) ndarray of float - query_boxes: (K, 4) ndarray of float - Returns - ------- - overlaps: (N, K) ndarray of overlap between boxes and query_boxes - """ - cdef unsigned int N = boxes.shape[0] - cdef unsigned int K = query_boxes.shape[0] - cdef np.ndarray[DTYPE_t, ndim = 2] overlaps = np.zeros((N, K), dtype=DTYPE) - cdef DTYPE_t iw, ih, box_area - cdef DTYPE_t ua - cdef unsigned int k, n - for k in range(K): - box_area = ( - (query_boxes[k, 2] - query_boxes[k, 0] + 1) * - (query_boxes[k, 3] - query_boxes[k, 1] + 1) - ) - for n in range(N): - iw = ( - min(boxes[n, 2], query_boxes[k, 2]) - - max(boxes[n, 0], query_boxes[k, 0]) + 1 - ) - if iw > 0: - ih = ( - min(boxes[n, 3], query_boxes[k, 3]) - - max(boxes[n, 1], query_boxes[k, 1]) + 1 - ) - if ih > 0: - ua = float( - (boxes[n, 2] - boxes[n, 0] + 1) * - (boxes[n, 3] - boxes[n, 1] + 1) + - box_area - iw * ih - ) - overlaps[n, k] = iw * ih / ua - return overlaps - - -def bbox_intersections( - np.ndarray[DTYPE_t, ndim=2] boxes, - np.ndarray[DTYPE_t, ndim=2] query_boxes): - """ - For each query box compute the intersection ratio covered by boxes - ---------- - Parameters - ---------- - boxes: (N, 4) ndarray of float - query_boxes: (K, 4) ndarray of float - Returns - ------- - overlaps: (N, K) ndarray of intersec between boxes and query_boxes - """ - cdef unsigned int N = boxes.shape[0] - cdef unsigned int K = query_boxes.shape[0] - cdef np.ndarray[DTYPE_t, ndim = 2] intersec = np.zeros((N, K), dtype=DTYPE) - cdef DTYPE_t iw, ih, box_area - cdef DTYPE_t ua - cdef unsigned int k, n - for k in range(K): - box_area = ( - (query_boxes[k, 2] - query_boxes[k, 0] + 1) * - (query_boxes[k, 3] - query_boxes[k, 1] + 1) - ) - for n in range(N): - iw = ( - min(boxes[n, 2], query_boxes[k, 2]) - - max(boxes[n, 0], query_boxes[k, 0]) + 1 - ) - if iw > 0: - ih = ( - min(boxes[n, 3], query_boxes[k, 3]) - - max(boxes[n, 1], query_boxes[k, 1]) + 1 - ) - if ih > 0: - intersec[n, k] = iw * ih / box_area - return intersec -"""Blob helper functions.""" -import cv2 -import numpy as np - -from ..fast_rcnn.config import cfg - - -def im_list_to_blob(ims): - """Convert a list of images into a network input. - - Assumes images are already prepared (means subtracted, BGR order, ...). - """ - max_shape = np.array([im.shape for im in ims]).max(axis=0) - num_images = len(ims) - blob = np.zeros((num_images, max_shape[0], max_shape[1], 3), - dtype=np.float32) - for i in range(num_images): - im = ims[i] - blob[i, 0:im.shape[0], 0:im.shape[1], :] = im - - return blob - - -def prep_im_for_blob(im, pixel_means, target_size, max_size): - """Mean subtract and scale an image for use in a blob.""" - im = im.astype(np.float32, copy=False) - im -= pixel_means - im_shape = im.shape - im_size_min = np.min(im_shape[0:2]) - im_size_max = np.max(im_shape[0:2]) - im_scale = float(target_size) / float(im_size_min) - # Prevent the biggest axis from being more than MAX_SIZE - if np.round(im_scale * im_size_max) > max_size: - im_scale = float(max_size) / float(im_size_max) - if cfg.TRAIN.RANDOM_DOWNSAMPLE: - r = 0.6 + np.random.rand() * 0.4 - im_scale *= r - im = cv2.resize(im, None, None, fx=im_scale, fy=im_scale, - interpolation=cv2.INTER_LINEAR) - - return im, im_scale -# -------------------------------------------------------- -# Subcategory CNN -# Copyright (c) 2015 CVGL Stanford -# Licensed under The MIT License [see LICENSE for details] -# Written by Yu Xiang -# -------------------------------------------------------- - -import math - -import numpy as np - -# TODO: make fast_rcnn irrelevant -# >>>> obsolete, because it depends on sth outside of this project -from ..fast_rcnn.config import cfg - - -# <<<< obsolete - -def get_boxes_grid(image_height, image_width): - """ - Return the boxes on image grid. - calling this function when cfg.IS_MULTISCALE is True, otherwise, calling rdl_roidb.prepare_roidb(imdb) instead. - """ - - # fixed a bug, change cfg.TRAIN.SCALES to cfg.TRAIN.SCALES_BASE - # coz, here needs a ratio around 1.0, not the accutual size. - # height and width of the feature map - if cfg.NET_NAME == 'CaffeNet': - height = np.floor( - (image_height * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1) - height = np.floor((height - 1) / 2.0 + 1 + 0.5) - height = np.floor((height - 1) / 2.0 + 1 + 0.5) - - width = np.floor( - (image_width * max(cfg.TRAIN.SCALES_BASE) - 1) / 4.0 + 1) - width = np.floor((width - 1) / 2.0 + 1 + 0.5) - width = np.floor((width - 1) / 2.0 + 1 + 0.5) - elif cfg.NET_NAME == 'VGGnet': - height = np.floor( - image_height * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5) - height = np.floor(height / 2.0 + 0.5) - height = np.floor(height / 2.0 + 0.5) - height = np.floor(height / 2.0 + 0.5) - - width = np.floor(image_width * max(cfg.TRAIN.SCALES_BASE) / 2.0 + 0.5) - width = np.floor(width / 2.0 + 0.5) - width = np.floor(width / 2.0 + 0.5) - width = np.floor(width / 2.0 + 0.5) - else: - assert (1), 'The network architecture is not supported in utils.get_boxes_grid!' - - # compute the grid box centers - h = np.arange(height) - w = np.arange(width) - y, x = np.meshgrid(h, w, indexing='ij') - centers = np.dstack((x, y)) - centers = np.reshape(centers, (-1, 2)) - num = centers.shape[0] - - # compute width and height of grid box - area = cfg.TRAIN.KERNEL_SIZE * cfg.TRAIN.KERNEL_SIZE - aspect = cfg.TRAIN.ASPECTS # height / width - num_aspect = len(aspect) - widths = np.zeros((1, num_aspect), dtype=np.float32) - heights = np.zeros((1, num_aspect), dtype=np.float32) - for i in range(num_aspect): - widths[0, i] = math.sqrt(area / aspect[i]) - heights[0, i] = widths[0, i] * aspect[i] - - # construct grid boxes - centers = np.repeat(centers, num_aspect, axis=0) - widths = np.tile(widths, num).transpose() - heights = np.tile(heights, num).transpose() - - x1 = np.reshape(centers[:, 0], (-1, 1)) - widths * 0.5 - x2 = np.reshape(centers[:, 0], (-1, 1)) + widths * 0.5 - y1 = np.reshape(centers[:, 1], (-1, 1)) - heights * 0.5 - y2 = np.reshape(centers[:, 1], (-1, 1)) + heights * 0.5 - - boxes_grid = np.hstack((x1, y1, x2, y2)) / cfg.TRAIN.SPATIAL_SCALE - - return boxes_grid, centers[:, 0], centers[:, 1] -import os -from distutils.core import setup -from distutils.extension import Extension -from os.path import join as pjoin - -import numpy as np -from Cython.Distutils import build_ext - - -def find_in_path(name, path): - for dir in path.split(os.pathsep): - binpath = pjoin(dir, name) - if os.path.exists(binpath): - return os.path.abspath(binpath) - return None - - -def locate_cuda(): - # first check if the CUDAHOME env variable is in use - if 'CUDAHOME' in os.environ: - home = os.environ['CUDAHOME'] - nvcc = pjoin(home, 'bin', 'nvcc') - else: - # otherwise, search the PATH for NVCC - default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') - nvcc = find_in_path( - 'nvcc', os.environ['PATH'] + os.pathsep + default_path) - if nvcc is None: - raise EnvironmentError('The nvcc binary could not be ' - 'located in your $PATH. Either add it to your path, or set $CUDAHOME') - home = os.path.dirname(os.path.dirname(nvcc)) - - cudaconfig = {'home': home, 'nvcc': nvcc, - 'include': pjoin(home, 'include'), - 'lib64': pjoin(home, 'lib64')} - for k, v in cudaconfig.items(): - # for k, v in cudaconfig.iteritems(): - if not os.path.exists(v): - raise EnvironmentError( - 'The CUDA %s path could not be located in %s' % (k, v)) - return cudaconfig - - -CUDA = locate_cuda() - -try: - numpy_include = np.get_include() -except AttributeError: - numpy_include = np.get_numpy_include() - - -def customize_compiler_for_nvcc(self): - self.src_extensions.append('.cu') - default_compiler_so = self.compiler_so - super = self._compile - - def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): - print(extra_postargs) - if os.path.splitext(src)[1] == '.cu': - # use the cuda for .cu files - self.set_executable('compiler_so', CUDA['nvcc']) - # use only a subset of the extra_postargs, which are 1-1 translated - # from the extra_compile_args in the Extension class - postargs = extra_postargs['nvcc'] - else: - postargs = extra_postargs['gcc'] - - super(obj, src, ext, cc_args, postargs, pp_opts) - # reset the default compiler_so, which we might have changed for cuda - self.compiler_so = default_compiler_so - - # inject our redefined _compile method into the class - self._compile = _compile - - -# run the customize_compiler -class custom_build_ext(build_ext): - def build_extensions(self): - customize_compiler_for_nvcc(self.compiler) - build_ext.build_extensions(self) - - -ext_modules = [ - Extension( - "utils.bbox", - ["bbox.pyx"], - extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs=[numpy_include] - ), - Extension( - "utils.cython_nms", - ["cython_nms.pyx"], - extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs=[numpy_include] - ), - Extension('utils.gpu_nms', - ['nms_kernel.cu', 'gpu_nms.pyx'], - library_dirs=[CUDA['lib64']], - libraries=['cudart'], - language='c++', - runtime_library_dirs=[CUDA['lib64']], - extra_compile_args={'gcc': ["-Wno-unused-function"], - 'nvcc': ['-arch=sm_35', - '--ptxas-options=-v', - '-c', - '--compiler-options', - "'-fPIC'"]}, - include_dirs=[numpy_include, CUDA['include']] - ), -] - -setup( - ext_modules=ext_modules, - cmdclass={'build_ext': custom_build_ext}, -) -import os -from distutils.core import setup -from distutils.extension import Extension -from os.path import join as pjoin - -import numpy as np -from Cython.Distutils import build_ext - - -def find_in_path(name, path): - for dir in path.split(os.pathsep): - binpath = pjoin(dir, name) - if os.path.exists(binpath): - return os.path.abspath(binpath) - return None - - -def locate_cuda(): - # first check if the CUDAHOME env variable is in use - if 'CUDAHOME' in os.environ: - home = os.environ['CUDAHOME'] - nvcc = pjoin(home, 'bin', 'nvcc') - else: - # otherwise, search the PATH for NVCC - default_path = pjoin(os.sep, 'usr', 'local', 'cuda', 'bin') - nvcc = find_in_path('nvcc', - os.environ['PATH'] + os.pathsep + default_path) - if nvcc is None: - raise EnvironmentError( - 'The nvcc binary could not be ' - 'located in your $PATH. Either add it to your path, or set $CUDAHOME' - ) - home = os.path.dirname(os.path.dirname(nvcc)) - - cudaconfig = { - 'home': home, - 'nvcc': nvcc, - 'include': pjoin(home, 'include'), - 'lib64': pjoin(home, 'lib64') - } - for k, v in cudaconfig.items(): - # for k, v in cudaconfig.iteritems(): - if not os.path.exists(v): - raise EnvironmentError( - 'The CUDA %s path could not be located in %s' % (k, v)) - return cudaconfig - - -# CUDA = locate_cuda() - -try: - numpy_include = np.get_include() -except AttributeError: - numpy_include = np.get_numpy_include() - - -def customize_compiler_for_nvcc(self): - self.src_extensions.append('.cu') - default_compiler_so = self.compiler_so - super = self._compile - - def _compile(obj, src, ext, cc_args, extra_postargs, pp_opts): - print(extra_postargs) - """ - if os.path.splitext(src)[1] == '.cu': - # use the cuda for .cu files - self.set_executable('compiler_so', CUDA['nvcc']) - # use only a subset of the extra_postargs, which are 1-1 translated - # from the extra_compile_args in the Extension class - postargs = extra_postargs['nvcc'] - else: - postargs = extra_postargs['gcc'] - """ - postargs = extra_postargs['gcc'] - super(obj, src, ext, cc_args, postargs, pp_opts) - # reset the default compiler_so, which we might have changed for cuda - self.compiler_so = default_compiler_so - - # inject our redefined _compile method into the class - self._compile = _compile - - -# run the customize_compiler -class custom_build_ext(build_ext): - def build_extensions(self): - customize_compiler_for_nvcc(self.compiler) - build_ext.build_extensions(self) - - -""" -ext_modules = [ - Extension( - "utils.bbox", - ["bbox.pyx"], - extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs = [numpy_include] - ), - Extension( - "utils.cython_nms", - ["cython_nms.pyx"], - extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs = [numpy_include] - ), - - Extension('utils.gpu_nms', - ['nms_kernel.cu', 'gpu_nms.pyx'], - library_dirs=[CUDA['lib64']], - libraries=['cudart'], - language='c++', - runtime_library_dirs=[CUDA['lib64']], - extra_compile_args={'gcc': ["-Wno-unused-function"], - 'nvcc': ['-arch=sm_35', - '--ptxas-options=-v', - '-c', - '--compiler-options', - "'-fPIC'"]}, - include_dirs = [numpy_include, CUDA['include']] - ), -] -""" -ext_modules = [ - Extension( - "utils.bbox", ["bbox.pyx"], - extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs=[numpy_include]), - Extension( - "utils.cython_nms", ["cython_nms.pyx"], - extra_compile_args={'gcc': ["-Wno-cpp", "-Wno-unused-function"]}, - include_dirs=[numpy_include]), -] - -setup( - ext_modules=ext_modules, - cmdclass={'build_ext': custom_build_ext}, -) -import time - - -class Timer(object): - def __init__(self): - self.total_time = 0. - self.calls = 0 - self.start_time = 0. - self.diff = 0. - self.average_time = 0. - - def tic(self): - self.start_time = time.time() - - def toc(self, average=True): - self.diff = time.time() - self.start_time - self.total_time += self.diff - self.calls += 1 - self.average_time = self.total_time / self.calls - if average: - return self.average_time - else: - return self.diff -import torch.nn as nn -import utils - - -class BidirectionalLSTM(nn.Module): - def __init__(self, nIn, nHidden, nOut, ngpu): - super(BidirectionalLSTM, self).__init__() - self.ngpu = ngpu - - self.rnn = nn.LSTM(nIn, nHidden, bidirectional=True) - self.embedding = nn.Linear(nHidden * 2, nOut) - - def forward(self, input): - recurrent, _ = utils.data_parallel(self.rnn, input, - self.ngpu) # [T, b, h * 2] - - T, b, h = recurrent.size() - t_rec = recurrent.view(T * b, h) - output = utils.data_parallel(self.embedding, t_rec, - self.ngpu) # [T * b, nOut] - output = output.view(T, b, -1) - - return output - - -class CRNN(nn.Module): - def __init__(self, imgH, nc, nclass, nh, ngpu, n_rnn=2, leakyRelu=False): - super(CRNN, self).__init__() - self.ngpu = ngpu - assert imgH % 16 == 0, 'imgH has to be a multiple of 16' - - ks = [3, 3, 3, 3, 3, 3, 2] - ps = [1, 1, 1, 1, 1, 1, 0] - ss = [1, 1, 1, 1, 1, 1, 1] - nm = [64, 128, 256, 256, 512, 512, 512] - - cnn = nn.Sequential() - - def convRelu(i, batchNormalization=False): - nIn = nc if i == 0 else nm[i - 1] - nOut = nm[i] - cnn.add_module('conv{0}'.format(i), - nn.Conv2d(nIn, nOut, ks[i], ss[i], ps[i])) - if batchNormalization: - cnn.add_module('batchnorm{0}'.format(i), nn.BatchNorm2d(nOut)) - if leakyRelu: - cnn.add_module('relu{0}'.format(i), - nn.LeakyReLU(0.2, inplace=True)) - else: - cnn.add_module('relu{0}'.format(i), nn.ReLU(True)) - - convRelu(0) - cnn.add_module('pooling{0}'.format(0), nn.MaxPool2d(2, 2)) # 64x16x64 - convRelu(1) - cnn.add_module('pooling{0}'.format(1), nn.MaxPool2d(2, 2)) # 128x8x32 - convRelu(2, True) - convRelu(3) - cnn.add_module('pooling{0}'.format(2), - nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 256x4x16 - convRelu(4, True) - convRelu(5) - cnn.add_module('pooling{0}'.format(3), - nn.MaxPool2d((2, 2), (2, 1), (0, 1))) # 512x2x16 - convRelu(6, True) # 512x1x16 - - self.cnn = cnn - self.rnn = nn.Sequential( - BidirectionalLSTM(512, nh, nh, ngpu), - BidirectionalLSTM(nh, nh, nclass, ngpu)) - - def forward(self, input): - # conv features - conv = utils.data_parallel(self.cnn, input, self.ngpu) - b, c, h, w = conv.size() - assert h == 1, "the height of conv must be 1" - conv = conv.squeeze(2) - conv = conv.permute(2, 0, 1) # [w, b, c] - - # rnn features - output = utils.data_parallel(self.rnn, conv, self.ngpu) - - return output -#!/usr/bin/python -# encoding: utf-8 - -import torch.nn as nn -import torch.nn.parallel - - -def data_parallel(model, input, ngpu): - if isinstance(input.data, torch.cuda.FloatTensor) and ngpu > 1: - output = nn.parallel.data_parallel(model, input, range(ngpu)) - else: - output = model(input) - return output -from setuptools import setup, find_packages - -import os - -with open("README.md", "r") as fh: - long_description = fh.read() - - -setup(name='donkeycar', - version='2.5.7', - description='Self driving library for python.', - long_description=long_description, - long_description_content_type="text/markdown", - url='https://github.com/autorope/donkeycar', - download_url='https://github.com/autorope/donkeycar/archive/2.1.5.tar.gz', - author='Will Roscoe', - author_email='wroscoe@gmail.com', - license='MIT', - entry_points={ - 'console_scripts': [ - 'donkey=donkeycar.management.base:execute_from_command_line', - ], - }, - install_requires=['numpy', - 'pillow', - 'docopt', - 'tornado==4.5.3', - 'requests', - 'h5py', - 'python-socketio', - 'flask', - 'eventlet', - 'moviepy', - 'pandas', - ], - - extras_require={ - 'tf': ['tensorflow>=1.9.0'], - 'tf_gpu': ['tensorflow-gpu>=1.9.0'], - 'pi': [ - 'picamera', - 'Adafruit_PCA9685', - ], - 'dev': [ - 'pytest', - 'pytest-cov', - 'responses' - ], - 'ci': ['codecov'] - }, - - include_package_data=True, - - classifiers=[ - # How mature is this project? Common values are - # 3 - Alpha - # 4 - Beta - # 5 - Production/Stable - 'Development Status :: 3 - Alpha', - - # Indicate who your project is intended for - 'Intended Audience :: Developers', - 'Topic :: Scientific/Engineering :: Artificial Intelligence', - - # Pick your license as you wish (should match "license" above) - 'License :: OSI Approved :: MIT License', - - # Specify the Python versions you support here. In particular, ensure - # that you indicate whether you support Python 2, Python 3 or both. - - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - ], - keywords='selfdriving cars donkeycar diyrobocars', - - packages=find_packages(exclude=(['tests', 'docs', 'site', 'env'])), - ) -from .config import load_config -from . import config -from . import util -from .memory import Memory -from .vehicle import Vehicle -from . import parts -import os - -import pkg_resources # part of setuptools -import sys - -__version__ = pkg_resources.require("donkeycar")[0].version -print('using donkey version: {} ...'.format(__version__)) - - -current_module = sys.modules[__name__] - - -if sys.version_info.major < 3: - msg = 'Donkey Requires Python 3.4 or greater. You are using {}'.format( - sys.version) - raise ValueError(msg) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Wed Sep 13 21:27:44 2017 - -@author: wroscoe -""" -import os -import types - - -class Config: - - def __init__(self): - pass - - def from_pyfile(self, filename, silent=False): - """ - Read config class from a file. - """ - d = types.ModuleType('config') - d.__file__ = filename - try: - with open(filename, mode='rb') as config_file: - exec(compile(config_file.read(), filename, 'exec'), d.__dict__) - except IOError as e: - e.strerror = 'Unable to load configuration file (%s)' % e.strerror - raise - self.from_object(d) - return True - - def from_object(self, obj): - """ - Read config class from another object. - """ - for key in dir(obj): - if key.isupper(): - setattr(self, key, getattr(obj, key)) - - def __str__(self): - """ - Get a string representation of the config class. - """ - result = [] - for key in dir(self): - if key.isupper(): - result.append((key, getattr(self, key))) - return str(result) - - -def load_config(config_path=None): - """ - Load the config from a file and return the config class. - """ - if config_path is None: - import __main__ as main - main_path = os.path.dirname(os.path.realpath(main.__file__)) - config_path = os.path.join(main_path, 'config.py') - - print('loading config file: {}'.format(config_path)) - cfg = Config() - cfg.from_pyfile(config_path) - print('config loaded') - return cfg -import os -import logging.config - - -def setup(log_file_path=None): - - if log_file_path is None: - log_file_path = os.path.expanduser('~/donkey.log') - - config_default = { - "version": 1, - "disable_existing_loggers": False, - "formatters": { - "simple": { - "format": "%(asctime)s - %(name)s - %(levelname)s - %(message)s" - } - }, - "handlers": { - "console": { - "class": "logging.StreamHandler", - "level": "INFO", - "formatter": "simple", - "stream": "ext://sys.stdout" - }, - "error_file_handler": { - "class": "logging.handlers.RotatingFileHandler", - "level": "INFO", - "formatter": "simple", - "filename": log_file_path, - "maxBytes": 10485760, - "backupCount": 20, - "encoding": "utf8" - }, - }, - "root": { - "level": "DEBUG", - "handlers": ["console", "error_file_handler"] - } - } - - logging.config.dictConfig(config_default) - - -def get_logger(name): - """ - Return a logger that will contextualize the logs with the name. - """ - logger = logging.getLogger(name) - return logger - - -# get a logger specific to this file -logger = get_logger(__name__) -logger.info('Logging configured and loaded.') - - -if __name__ == '__main__': - print('run') - logger.error('test') -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Sun Jun 25 11:07:48 2017 - -@author: wroscoe -""" - - -class Memory: - """ - A convenience class to save key/value pairs. - """ - - def __init__(self, *args, **kw): - self.d = {} - - def __setitem__(self, key, value): - if type(key) is not tuple: - key = (key,) - value = (value,) - - for i, k in enumerate(key): - self.d[k] = value[i] - - def __getitem__(self, key): - if type(key) is tuple: - return [self.d[k] for k in key] - else: - return self.d[key] - - def update(self, new_d): - self.d.update(new_d) - - def put(self, keys, inputs): - if len(keys) > 1: - for i, key in enumerate(keys): - try: - self.d[key] = inputs[i] - except IndexError as e: - error = str(e) + ' issue with keys: ' + str(key) - raise IndexError(error) - else: - self.d[keys[0]] = inputs - - def get(self, keys): - result = [self.d.get(k) for k in keys] - return result - - def keys(self): - return self.d.keys() - - def values(self): - return self.d.values() - - def items(self): - return self.d.items() -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Sun Jun 25 10:44:24 2017 - -@author: wroscoe -""" - -from builtins import bool -from threading import Thread -import time - -from .log import get_logger -from .memory import Memory - - -logger = get_logger(__name__) - - -class Vehicle: - def __init__(self, mem=None): - if not mem: - mem = Memory() - self.mem = mem - self.parts = [] - self.on = True - self.threads = [] - - def add(self, part, inputs=[], outputs=[], - threaded=False, run_condition=None): - """ - Method to add a part to the vehicle drive loop. - - Parameters - ---------- - inputs : list - Channel names to get from memory. - outputs : list - Channel names to save to memory. - threaded : boolean - If a part should be run in a separate thread. - run_condition: boolean - If a part should be run at all. - """ - assert type(inputs) is list, "inputs is not a list: %r" % inputs - assert type(outputs) is list, "outputs is not a list: %r" % outputs - assert type( - threaded) is bool, "threaded is not a boolean: %r" % threaded - - p = part - logger.info('Adding part {}.'.format(p.__class__.__name__)) - entry = dict() - entry['part'] = p - entry['inputs'] = inputs - entry['outputs'] = outputs - entry['run_condition'] = run_condition - - if threaded: - t = Thread(target=part.update, args=()) - t.daemon = True - entry['thread'] = t - self.parts.append(entry) - - def start(self, rate_hz=10, max_loop_count=None): - """ - Start vehicle's main drive loop. - - This is the main thread of the vehicle. It starts all the new - threads for the threaded parts then starts an infinit loop - that runs each part and updates the memory. - - Parameters - ---------- - - rate_hz : int - The max frequency that the drive loop should run. The actual - frequency may be less than this if there are many blocking parts. - max_loop_count : int - Maxiumum number of loops the drive loop should execute. This is - used for testing the all the parts of the vehicle work. - """ - - try: - self.on = True - - for entry in self.parts: - if entry.get('thread'): - # start the update thread - entry.get('thread').start() - - # wait until the parts warm up. - logger.info('Starting vehicle...') - time.sleep(1) - - loop_count = 0 - while self.on: - start_time = time.time() - loop_count += 1 - - self.update_parts() - - # stop drive loop if loop_count exceeds max_loopcount - if max_loop_count and loop_count > max_loop_count: - self.on = False - - sleep_time = 1.0 / rate_hz - (time.time() - start_time) - if sleep_time > 0.0: - time.sleep(sleep_time) - - except KeyboardInterrupt: - pass - finally: - self.stop() - - def update_parts(self): - """ - loop over all parts - """ - for entry in self.parts: - # don't run if there is a run condition that is False - run = True - if entry.get('run_condition'): - run_condition = entry.get('run_condition') - run = self.mem.get([run_condition])[0] - # print('run_condition', entry['part'], entry.get('run_condition'), run) - - if run: - p = entry['part'] - # get inputs from memory - inputs = self.mem.get(entry['inputs']) - - # run the part - if entry.get('thread'): - outputs = p.run_threaded(*inputs) - else: - outputs = p.run(*inputs) - - # save the output to memory - if outputs is not None: - self.mem.put(entry['outputs'], outputs) - - def stop(self): - logger.info('Shutting down vehicle and its parts...') - for entry in self.parts: - try: - entry['part'].shutdown() - except Exception as e: - logger.debug(e) -# -*- coding: utf-8 -*- - -import sys -import os -import socket -import shutil -import argparse - -import donkeycar as dk -from donkeycar.parts.datastore import Tub -from .tub import TubManager - - -PACKAGE_PATH = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) -TEMPLATES_PATH = os.path.join(PACKAGE_PATH, 'templates') - - -def make_dir(path): - real_path = os.path.expanduser(path) - print('making dir ', real_path) - if not os.path.exists(real_path): - os.makedirs(real_path) - return real_path - - -def load_config(config_path): - """ - load a config from the given path - """ - conf = os.path.expanduser(config_path) - - if not os.path.exists(conf): - print("No config file at location: %s. Add --config to specify\ - location or run from dir containing config.py." % conf) - return None - - try: - cfg = dk.load_config(conf) - except: - print("Exception while loading config from", conf) - return None - - return cfg - - -class BaseCommand(): - pass - - -class CreateCar(BaseCommand): - - def parse_args(self, args): - parser = argparse.ArgumentParser( - prog='createcar', usage='%(prog)s [options]') - parser.add_argument('path') - #parser.add_argument('--path', default=None, help='path where to create car folder') - parser.add_argument('--template', default=None, - help='name of car template to use') - parser.add_argument('--overwrite', action='store_true', - help='should replace existing files') - - parsed_args = parser.parse_args(args) - return parsed_args - - def run(self, args): - args = self.parse_args(args) - self.create_car(path=args.path, template=args.template, - overwrite=args.overwrite) - - def create_car(self, path, template='donkey2', overwrite=False): - """ - This script sets up the folder struction for donkey to work. - It must run without donkey installed so that people installing with - docker can build the folder structure for docker to mount to. - """ - - # these are neeeded incase None is passed as path - path = path or '~/mycar' - template = template or 'donkey2' - - print("Creating car folder: {}".format(path)) - path = make_dir(path) - - print("Creating data & model folders.") - folders = ['models', 'data', 'logs'] - folder_paths = [os.path.join(path, f) for f in folders] - for fp in folder_paths: - make_dir(fp) - - # add car application and config files if they don't exist - app_template_path = os.path.join(TEMPLATES_PATH, template+'.py') - config_template_path = os.path.join( - TEMPLATES_PATH, 'config_defaults.py') - car_app_path = os.path.join(path, 'manage.py') - car_config_path = os.path.join(path, 'config.py') - - if os.path.exists(car_app_path) and not overwrite: - print('Car app already exists. Delete it and rerun createcar to replace.') - else: - print("Copying car application template: {}".format(template)) - shutil.copyfile(app_template_path, car_app_path) - - if os.path.exists(car_config_path) and not overwrite: - print('Car config already exists. Delete it and rerun createcar to replace.') - else: - print("Copying car config defaults. Adjust these before starting your car.") - shutil.copyfile(config_template_path, car_config_path) - - print("Donkey setup complete.") - - -class UploadData(BaseCommand): - - def parse_args(self, args): - parser = argparse.ArgumentParser( - prog='uploaddata', usage='%(prog)s [options]') - parser.add_argument('--url', help='path where to create car folder') - parser.add_argument('--template', help='name of car template to use') - - parsed_args = parser.parse_args(args) - return parsed_args - - -class FindCar(BaseCommand): - def parse_args(self, args): - pass - - def run(self, args): - print('Looking up your computer IP address...') - s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) - s.connect(("8.8.8.8", 80)) - ip = s.getsockname()[0] - print('Your IP address: %s ' % s.getsockname()[0]) - s.close() - - print("Finding your car's IP address...") - cmd = "sudo nmap -sP " + ip + \ - "/24 | awk '/^Nmap/{ip=$NF}/B8:27:EB/{print ip}'" - print("Your car's ip address is:") - os.system(cmd) - - -class CalibrateCar(BaseCommand): - def __init__(self): - self.pwm_min = 0 - self.pwm_max = 1500 - - def parse_args(self, args): - parser = argparse.ArgumentParser( - prog='calibrate', usage='%(prog)s [options]') - parser.add_argument( - '--channel', help='The channel youd like to calibrate [0-15]') - parsed_args = parser.parse_args(args) - return parsed_args - - def run(self, args): - from donkeycar.parts.actuator import PCA9685 - - args = self.parse_args(args) - channel = int(args.channel) - c = PCA9685(channel) - - while True: - try: - val = input( - """Enter a PWM setting to test ('q' for quit) (0-1500): """) - if val == 'q' or val == 'Q': - break - pmw = int(val) - c.run(pmw) - except KeyboardInterrupt: - print("\nKeyboardInterrupt received, exit.") - break - except Exception as ex: - print("Oops, {}".format(ex)) - - -class MakeMovie(BaseCommand): - - def parse_args(self, args): - parser = argparse.ArgumentParser(prog='makemovie') - parser.add_argument('--tub', help='The tub to make movie from') - parser.add_argument('--out', default='tub_movie.mp4', - help='The movie filename to create. default: tub_movie.mp4') - parser.add_argument('--config', default='./config.py', - help='location of config file to use. default: ./config.py') - parsed_args = parser.parse_args(args) - return parsed_args, parser - - def run(self, args): - """ - Load the images from a tub and create a movie from them. - Movie - """ - import moviepy.editor as mpy - - args, parser = self.parse_args(args) - - if args.tub is None: - parser.print_help() - return - - conf = os.path.expanduser(args.config) - - if not os.path.exists(conf): - print("No config file at location: %s. Add --config to specify\ - location or run from dir containing config.py." % conf) - return - - try: - cfg = dk.load_config(conf) - except: - print("Exception while loading config from", conf) - return - - self.tub = Tub(args.tub) - self.num_rec = self.tub.get_num_records() - self.iRec = 0 - - print('making movie', args.out, 'from', self.num_rec, 'images') - clip = mpy.VideoClip(self.make_frame, duration=( - self.num_rec//cfg.DRIVE_LOOP_HZ) - 1) - clip.write_videofile(args.out, fps=cfg.DRIVE_LOOP_HZ) - - print('done') - - def make_frame(self, t): - """ - Callback to return an image from from our tub records. - This is called from the VideoClip as it references a time. - We don't use t to reference the frame, but instead increment - a frame counter. This assumes sequential access. - """ - self.iRec = self.iRec + 1 - - if self.iRec >= self.num_rec - 1: - return None - - rec = self.tub.get_record(self.iRec) - image = rec['cam/image_array'] - - return image # returns a 8-bit RGB array - - -class Sim(BaseCommand): - """ - Start a websocket SocketIO server to talk to a donkey simulator - """ - - def parse_args(self, args): - parser = argparse.ArgumentParser(prog='sim') - parser.add_argument('--model', help='the model to use for predictions') - parser.add_argument('--config', default='./config.py', - help='location of config file to use. default: ./config.py') - parser.add_argument('--type', default='categorical', - help='model type to use when loading. categorical|linear') - parser.add_argument('--top_speed', default='3', - help='what is top speed to drive') - parsed_args = parser.parse_args(args) - return parsed_args, parser - - def run(self, args): - """ - Start a websocket SocketIO server to talk to a donkey simulator - """ - import socketio - from donkeycar.parts.simulation import SteeringServer - from donkeycar.parts.keras import KerasCategorical, KerasLinear - - args, parser = self.parse_args(args) - - cfg = load_config(args.config) - - if cfg is None: - return - - # TODO: this logic should be in a pilot or modle handler part. - if args.type == "categorical": - kl = KerasCategorical() - elif args.type == "linear": - kl = KerasLinear(num_outputs=2) - else: - print("didn't recognice type:", args.type) - return - - # can provide an optional image filter part - img_stack = None - - # load keras model - kl.load(args.model) - - # start socket server framework - sio = socketio.Server() - - top_speed = float(args.top_speed) - - # start sim server handler - ss = SteeringServer( - sio, kpart=kl, top_speed=top_speed, image_part=img_stack) - - # register events and pass to server handlers - - @sio.on('telemetry') - def telemetry(sid, data): - ss.telemetry(sid, data) - - @sio.on('connect') - def connect(sid, environ): - ss.connect(sid, environ) - - ss.go(('0.0.0.0', 9090)) - - -class TubCheck(BaseCommand): - def parse_args(self, args): - parser = argparse.ArgumentParser( - prog='tubcheck', usage='%(prog)s [options]') - parser.add_argument('tubs', nargs='+', help='paths to tubs') - parser.add_argument('--fix', action='store_true', - default=False, help='paths to tubs') - parsed_args = parser.parse_args(args) - return parsed_args - - def check(self, tub_paths, fix=False): - """ - Check for any problems. Looks at tubs and find problems in any records or images that won't open. - If fix is True, then delete images and records that cause problems. - """ - tubs = [Tub(path) for path in tub_paths] - - for tub in tubs: - tub.check(fix=fix) - - def run(self, args): - args = self.parse_args(args) - self.check(args.tubs, args.fix) - - -class ShowHistogram(BaseCommand): - - def parse_args(self, args): - parser = argparse.ArgumentParser( - prog='tubhist', usage='%(prog)s [options]') - parser.add_argument('tubs', nargs='+', help='paths to tubs') - parser.add_argument('--record', default=None, - help='name of record to create histogram') - parsed_args = parser.parse_args(args) - return parsed_args - - def show_histogram(self, tub_paths, record_name): - """ - Produce a histogram of record type frequency in the given tub - """ - from matplotlib import pyplot as plt - from donkeycar.parts.datastore import TubGroup - - tg = TubGroup(tub_paths) - if record_name is not None: - tg.df[record_name].hist(bins=50) - else: - tg.df.hist(bins=50) - plt.show() - - def run(self, args): - args = self.parse_args(args) - args.tubs = ','.join(args.tubs) - self.show_histogram(args.tubs, args.record) - - -class ShowPredictionPlots(BaseCommand): - - def parse_args(self, args): - """ - Parse tubplot arguments - """ - parser = argparse.ArgumentParser( - prog='tubplot', usage='%(prog)s [options]') - parser.add_argument('tubs', nargs='+', help='paths to tubs') - parser.add_argument('--model', help='the model to use for predictions') - parser.add_argument('--config', default='./config.py', - help='location of config file to use. default: ./config.py') - parsed_args = parser.parse_args(args) - return parsed_args - - def run(self, args): - """ - executes the plotting function - """ - args = self.parse_args(args) - args.tubs = ','.join(args.tubs) - self.plot_predictions(args.config, args.tubs, args.model) - - def plot_predictions(self, cfg, tub_paths, model_path): - """ - Plot model predictions for angle and throttle against data from tubs. - - """ - from donkeycar.parts.datastore import TubGroup - from donkeycar.parts.keras import KerasCategorical - - tg = TubGroup(tub_paths) - - model_path = os.path.expanduser(model_path) - model = KerasCategorical() - model.load(model_path) - - gen = tg.get_batch_gen(None, None, batch_size=len( - tg.df), shuffle=False, df=tg.df) - arr = next(gen) - - user_angles = [] - user_throttles = [] - pilot_angles = [] - pilot_throttles = [] - - for tub in tg.tubs: - num_records = tub.get_num_records() - for iRec in tub.get_index(shuffled=False): - record = tub.get_record(iRec) - - img = record["cam/image_array"] - user_angle = float(record["user/angle"]) - user_throttle = float(record["user/throttle"]) - pilot_angle, pilot_throttle = model.run(img) - - user_angles.append(user_angle) - user_throttles.append(user_throttle) - pilot_angles.append(pilot_angle) - pilot_throttles.append(pilot_throttle) - - angles_df = pd.DataFrame( - {'user_angle': user_angles, 'pilot_angle': pilot_angles}) - throttles_df = pd.DataFrame( - {'user_throttle': user_throttles, 'pilot_throttle': pilot_throttles}) - - fig = plt.figure() - - title = "Model Predictions\nTubs: {}\nModel: {}".format( - tub_paths, model_path) - fig.suptitle(title) - - ax1 = fig.add_subplot(211) - ax2 = fig.add_subplot(212) - - angles_df.plot(ax=ax1) - throttles_df.plot(ax=ax2) - - ax1.legend(loc=4) - ax2.legend(loc=4) - - plt.show() - - -def execute_from_command_line(): - """ - This is the fuction linked to the "donkey" terminal command. - """ - commands = { - 'createcar': CreateCar, - 'findcar': FindCar, - 'calibrate': CalibrateCar, - 'tubclean': TubManager, - 'tubhist': ShowHistogram, - 'tubplot': ShowPredictionPlots, - 'tubcheck': TubCheck, - 'makemovie': MakeMovie, - 'sim': Sim, - } - - args = sys.argv[:] - command_text = args[1] - - if command_text in commands.keys(): - command = commands[command_text] - c = command() - c.run(args[2:]) - else: - dk.util.proc.eprint('Usage: The availible commands are:') - dk.util.proc.eprint(list(commands.keys())) -""" -tub.py - -Manage tubs -""" - -import os -import sys -import time -import json -import tornado.web -from stat import S_ISREG, ST_MTIME, ST_MODE, ST_CTIME, ST_ATIME - - -class TubManager: - - def run(self, args): - WebServer(args[0]).start() - - -class WebServer(tornado.web.Application): - - def __init__(self, data_path): - if not os.path.exists(data_path): - raise ValueError('The path {} does not exist.'.format(data_path)) - - this_dir = os.path.dirname(os.path.realpath(__file__)) - static_file_path = os.path.join(this_dir, 'tub_web', 'static') - - handlers = [ - (r"/", tornado.web.RedirectHandler, dict(url="/tubs")), - (r"/tubs", TubsView, dict(data_path=data_path)), - (r"/tubs/?(?P[^/]+)?", TubView), - (r"/api/tubs/?(?P[^/]+)?", - TubApi, dict(data_path=data_path)), - (r"/static/(.*)", tornado.web.StaticFileHandler, - {"path": static_file_path}), - (r"/tub_data/(.*)", tornado.web.StaticFileHandler, - {"path": data_path}), - ] - - settings = {'debug': True} - - super().__init__(handlers, **settings) - - def start(self, port=8886): - self.port = int(port) - self.listen(self.port) - print('Listening on {}...'.format(port)) - tornado.ioloop.IOLoop.instance().start() - - -class TubsView(tornado.web.RequestHandler): - - def initialize(self, data_path): - self.data_path = data_path - - def get(self): - import fnmatch - dir_list = fnmatch.filter(os.listdir(self.data_path), '*') - dir_list.sort() - data = {"tubs": dir_list} - self.render("tub_web/tubs.html", **data) - - -class TubView(tornado.web.RequestHandler): - - def get(self, tub_id): - data = {} - self.render("tub_web/tub.html", **data) - - -class TubApi(tornado.web.RequestHandler): - - def initialize(self, data_path): - self.data_path = data_path - - def image_path(self, tub_path, frame_id): - return os.path.join(tub_path, str(frame_id) + "_cam-image_array_.jpg") - - def record_path(self, tub_path, frame_id): - return os.path.join(tub_path, "record_" + frame_id + ".json") - - def clips_of_tub(self, tub_path): - seqs = [int(f.split("_")[0]) - for f in os.listdir(tub_path) if f.endswith('.jpg')] - seqs.sort() - - entries = ((os.stat(self.image_path(tub_path, seq)) - [ST_ATIME], seq) for seq in seqs) - - (last_ts, seq) = next(entries) - clips = [[seq]] - for next_ts, next_seq in entries: - if next_ts - last_ts > 100: # greater than 1s apart - clips.append([next_seq]) - else: - clips[-1].append(next_seq) - last_ts = next_ts - - return clips - - def get(self, tub_id): - clips = self.clips_of_tub(os.path.join(self.data_path, tub_id)) - - self.set_header("Content-Type", "application/json; charset=UTF-8") - self.write(json.dumps({'clips': clips})) - - def post(self, tub_id): - tub_path = os.path.join(self.data_path, tub_id) - old_clips = self.clips_of_tub(tub_path) - new_clips = tornado.escape.json_decode(self.request.body) - - import itertools - old_frames = list(itertools.chain(*old_clips)) - new_frames = list(itertools.chain(*new_clips['clips'])) - frames_to_delete = [str(item) - for item in old_frames if item not in new_frames] - for frm in frames_to_delete: - os.remove(self.record_path(tub_path, frm)) - os.remove(self.image_path(tub_path, frm)) -""" -actuators.py -Classes to control the motors and servos. These classes -are wrapped in a mixer class before being used in the drive loop. -""" - -import time -import donkeycar as dk - - -class PCA9685: - """ - PWM motor controler using PCA9685 boards. - This is used for most RC Cars - """ - - def __init__(self, channel, frequency=60): - import Adafruit_PCA9685 - # Initialise the PCA9685 using the default address (0x40). - self.pwm = Adafruit_PCA9685.PCA9685() - self.pwm.set_pwm_freq(frequency) - self.channel = channel - - def set_pulse(self, pulse): - try: - self.pwm.set_pwm(self.channel, 0, pulse) - except OSError as err: - print( - "Unexpected issue setting PWM (check wires to motor board): {0}".format(err)) - - def run(self, pulse): - self.set_pulse(pulse) - - -class PWMSteering: - """ - Wrapper over a PWM motor cotnroller to convert angles to PWM pulses. - """ - LEFT_ANGLE = -1 - RIGHT_ANGLE = 1 - - def __init__(self, controller=None, - left_pulse=290, right_pulse=490): - - self.controller = controller - self.left_pulse = left_pulse - self.right_pulse = right_pulse - - def run(self, angle): - # map absolute angle to angle that vehicle can implement. - pulse = dk.util.data.map_range( - angle, - self.LEFT_ANGLE, self.RIGHT_ANGLE, - self.left_pulse, self.right_pulse - ) - - self.controller.set_pulse(pulse) - - def shutdown(self): - self.run(0) # set steering straight - - -class PWMThrottle: - """ - Wrapper over a PWM motor cotnroller to convert -1 to 1 throttle - values to PWM pulses. - """ - MIN_THROTTLE = -1 - MAX_THROTTLE = 1 - - def __init__(self, - controller=None, - max_pulse=300, - min_pulse=490, - zero_pulse=350): - - self.controller = controller - self.max_pulse = max_pulse - self.min_pulse = min_pulse - self.zero_pulse = zero_pulse - - # send zero pulse to calibrate ESC - self.controller.set_pulse(self.zero_pulse) - time.sleep(1) - - def run(self, throttle): - if throttle > 0: - pulse = dk.util.data.map_range(throttle, - 0, self.MAX_THROTTLE, - self.zero_pulse, self.max_pulse) - else: - pulse = dk.util.data.map_range(throttle, - self.MIN_THROTTLE, 0, - self.min_pulse, self.zero_pulse) - - self.controller.set_pulse(pulse) - - def shutdown(self): - self.run(0) # stop vehicle - - -class Adafruit_DCMotor_Hat: - """ - Adafruit DC Motor Controller - Used for each motor on a differential drive car. - """ - - def __init__(self, motor_num): - from Adafruit_MotorHAT import Adafruit_MotorHAT - import atexit - - self.FORWARD = Adafruit_MotorHAT.FORWARD - self.BACKWARD = Adafruit_MotorHAT.BACKWARD - self.mh = Adafruit_MotorHAT(addr=0x60) - - self.motor = self.mh.getMotor(motor_num) - self.motor_num = motor_num - - atexit.register(self.turn_off_motors) - self.speed = 0 - self.throttle = 0 - - def run(self, speed): - """ - Update the speed of the motor where 1 is full forward and - -1 is full backwards. - """ - if speed > 1 or speed < -1: - raise ValueError( - "Speed must be between 1(forward) and -1(reverse)") - - self.speed = speed - self.throttle = int(dk.util.data.map_range( - abs(speed), -1, 1, -255, 255)) - - if speed > 0: - self.motor.run(self.FORWARD) - else: - self.motor.run(self.BACKWARD) - - self.motor.setSpeed(self.throttle) - - def shutdown(self): - self.mh.getMotor(self.motor_num).run(Adafruit_MotorHAT.RELEASE) -import os -import time -import numpy as np -from PIL import Image -import glob - - -class BaseCamera: - - def run_threaded(self): - return self.frame - - -class PiCamera(BaseCamera): - def __init__(self, resolution=(120, 160), framerate=20): - from picamera.array import PiRGBArray - from picamera import PiCamera - resolution = (resolution[1], resolution[0]) - # initialize the camera and stream - self.camera = PiCamera() # PiCamera gets resolution (height, width) - self.camera.resolution = resolution - self.camera.framerate = framerate - self.rawCapture = PiRGBArray(self.camera, size=resolution) - self.stream = self.camera.capture_continuous(self.rawCapture, - format="rgb", - use_video_port=True) - - # initialize the frame and the variable used to indicate - # if the thread should be stopped - self.frame = None - self.on = True - - print('PiCamera loaded.. .warming camera') - time.sleep(2) - - def run(self): - f = next(self.stream) - frame = f.array - self.rawCapture.truncate(0) - return frame - - def update(self): - # keep looping infinitely until the thread is stopped - for f in self.stream: - # grab the frame from the stream and clear the stream in - # preparation for the next frame - self.frame = f.array - self.rawCapture.truncate(0) - - # if the thread indicator variable is set, stop the thread - if not self.on: - break - - def shutdown(self): - # indicate that the thread should be stopped - self.on = False - print('stoping PiCamera') - time.sleep(.5) - self.stream.close() - self.rawCapture.close() - self.camera.close() -import datetime - - -class Timestamp(): - - def run(self,): - return str(datetime.datetime.utcnow()) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Tue Jul 4 12:32:53 2017 - -@author: wroscoe -""" -import os -import sys -import time -import json -import datetime -import random -import tarfile - -import numpy as np -import pandas as pd -from PIL import Image - -from donkeycar import util -from ..log import get_logger - -logger = get_logger(__name__) - - -class Tub(object): - """ - A datastore to store sensor data in a key, value format. - - Accepts str, int, float, image_array, image, and array data types. - - For example: - - #Create a tub to store speed values. - >>> path = '~/mydonkey/test_tub' - >>> inputs = ['user/speed', 'cam/image'] - >>> types = ['float', 'image'] - >>> t=Tub(path=path, inputs=inputs, types=types) - - """ - - def __init__(self, path, inputs=None, types=None): - - self.path = os.path.expanduser(path) - logger.info('path_in_tub: {}'.format(self.path)) - self.meta_path = os.path.join(self.path, 'meta.json') - self.df = None - - exists = os.path.exists(self.path) - - if exists: - # load log and meta - logger.info('Tub exists: {}'.format(self.path)) - with open(self.meta_path, 'r') as f: - self.meta = json.load(f) - self.current_ix = self.get_last_ix() + 1 - - elif not exists and inputs: - logger.info('Tub does NOT exist. Creating new tub...') - # create log and save meta - os.makedirs(self.path) - self.meta = {'inputs': inputs, 'types': types} - with open(self.meta_path, 'w') as f: - json.dump(self.meta, f) - self.current_ix = 0 - logger.info('New tub created at: {}'.format(self.path)) - else: - msg = "The tub path you provided doesn't exist and you didnt pass any meta info (inputs & types)" + \ - "to create a new tub. Please check your tub path or provide meta info to create a new tub." - - raise AttributeError(msg) - - self.start_time = time.time() - - def get_last_ix(self): - index = self.get_index() - if len(index) >= 1: - return max(index) - return -1 - - def update_df(self): - df = pd.DataFrame([self.get_json_record(i) - for i in self.get_index(shuffled=False)]) - self.df = df - - def get_df(self): - if self.df is None: - self.update_df() - return self.df - - def get_index(self, shuffled=True): - files = next(os.walk(self.path))[2] - record_files = [f for f in files if f[:6] == 'record'] - - def get_file_ix(file_name): - try: - name = file_name.split('.')[0] - num = int(name.split('_')[1]) - except: - num = 0 - return num - - nums = [get_file_ix(f) for f in record_files] - - if shuffled: - random.shuffle(nums) - else: - nums = sorted(nums) - - return nums - - @property - def inputs(self): - return list(self.meta['inputs']) - - @property - def types(self): - return list(self.meta['types']) - - def get_input_type(self, key): - input_types = dict(zip(self.inputs, self.types)) - return input_types.get(key) - - def write_json_record(self, json_data): - path = self.get_json_record_path(self.current_ix) - try: - with open(path, 'w') as fp: - json.dump(json_data, fp) - except TypeError: - logger.warn('troubles with record: {}'.format(json_data)) - except FileNotFoundError: - raise - except: - logger.error('Unexpected error: {}'.format(sys.exc_info()[0])) - raise - - def get_num_records(self): - import glob - files = glob.glob(os.path.join(self.path, 'record_*.json')) - return len(files) - - def make_record_paths_absolute(self, record_dict): - d = {} - for k, v in record_dict.items(): - if type(v) == str: # filename - if '.' in v: - v = os.path.join(self.path, v) - d[k] = v - - return d - - def check(self, fix=False): - """ - Iterate over all records and make sure we can load them. - Optionally remove records that cause a problem. - """ - logger.info('Checking tub: {}'.format(self.path)) - logger.info('Found: {} records'.format(self.get_num_records())) - problems = False - for ix in self.get_index(shuffled=False): - try: - self.get_record(ix) - except: - problems = True - if fix is False: - logger.warning( - 'problems with record {} : {}'.format(ix, self.path)) - else: - logger.warning( - 'problems with record {}, removing: {}'.format(ix, self.path)) - self.remove_record(ix) - if not problems: - logger.info('No problems found.') - - def remove_record(self, ix): - """ - remove data associate with a record - """ - record = self.get_json_record_path(ix) - os.unlink(record) - - def put_record(self, data): - """ - Save values like images that can't be saved in the csv log and - return a record with references to the saved values that can - be saved in a csv. - """ - json_data = {} - - for key, val in data.items(): - typ = self.get_input_type(key) - - if typ in ['str', 'float', 'int', 'boolean']: - json_data[key] = val - - elif typ is 'image': - name = self.make_file_name(key, ext='.jpg') - val.save(os.path.join(self.path, name)) - json_data[key] = name - - elif typ == 'image_array': - img = Image.fromarray(np.uint8(val)) - name = self.make_file_name(key, ext='.jpg') - img.save(os.path.join(self.path, name)) - json_data[key] = name - - else: - msg = 'Tub does not know what to do with this type {}'.format( - typ) - raise TypeError(msg) - - self.write_json_record(json_data) - self.current_ix += 1 - return self.current_ix - - def get_json_record_path(self, ix): - # fill zeros - # return os.path.join(self.path, 'record_'+str(ix).zfill(6)+'.json') - # don't fill zeros - return os.path.join(self.path, 'record_' + str(ix) + '.json') - - def get_json_record(self, ix): - path = self.get_json_record_path(ix) - try: - with open(path, 'r') as fp: - json_data = json.load(fp) - except UnicodeDecodeError: - raise Exception( - 'bad record: %d. You may want to run `python manage.py check --fix`' % ix) - except FileNotFoundError: - raise - except: - logger.error('Unexpected error: {}'.format(sys.exc_info()[0])) - raise - - record_dict = self.make_record_paths_absolute(json_data) - return record_dict - - def get_record(self, ix): - json_data = self.get_json_record(ix) - data = self.read_record(json_data) - return data - - def read_record(self, record_dict): - data = {} - for key, val in record_dict.items(): - typ = self.get_input_type(key) - - # load objects that were saved as separate files - if typ == 'image_array': - img = Image.open((val)) - val = np.array(img) - - data[key] = val - return data - - def make_file_name(self, key, ext='.png'): - # name = '_'.join([str(self.current_ix).zfill(6), key, ext]) - name = '_'.join([str(self.current_ix), key, ext]) # don't fill zeros - name = name = name.replace('/', '-') - return name - - def delete(self): - """ Delete the folder and files for this tub. """ - import shutil - shutil.rmtree(self.path) - - def shutdown(self): - """ Required by the Part interface """ - pass - - def get_record_gen(self, record_transform=None, shuffle=True, df=None): - """ - Returns records. - - Parameters - ---------- - record_transform : function - The mapping function should handle records in dict format - shuffle : bool - Shuffle records - df : numpy Dataframe - If df is specified, the generator will use the records specified in that DataFrame. If None, - the internal DataFrame will be used by calling get_df() - - Returns - ------- - A dict with keys mapping to the specified keys, and values lists of size batch_size. - - See Also - -------- - get_df - """ - if df is None: - df = self.get_df() - - while True: - for _ in self.df.iterrows(): - if shuffle: - record_dict = df.sample(n=1).to_dict(orient='record')[0] - - record_dict = self.read_record(record_dict) - - if record_transform: - record_dict = record_transform(record_dict) - - yield record_dict - - def get_batch_gen(self, keys=None, batch_size=128, record_transform=None, shuffle=True, df=None): - """ - Returns batches of records. - - Additionally, each record in a batch is split up into a dict with inputs:list of values. By specifying keys as a subset of the inputs, you can filter out unnecessary data. - - Parameters - ---------- - keys : list of strings - List of keys to filter out. If None, all inputs are included. - batch_size : int - The number of records in one batch. - - Returns - ------- - A dict with keys mapping to the specified keys, and values lists of size batch_size. - - See Also - -------- - get_record_gen - """ - record_gen = self.get_record_gen( - record_transform=record_transform, shuffle=shuffle, df=df) - - if df is None: - df = self.get_df() - - if keys is None: - keys = list(self.df.columns) - - while True: - record_list = [next(record_gen) for _ in range(batch_size)] - - batch_arrays = {} - for i, k in enumerate(keys): - arr = np.array([r[k] for r in record_list]) - batch_arrays[k] = arr - yield batch_arrays - - def get_train_gen(self, X_keys, Y_keys, - batch_size=128, - record_transform=None, - df=None): - """ - Returns a training/validation set. - - The records are always shuffled. - - Parameters - ---------- - X_keys : list of strings - List of the feature(s) to use. Must be included in Tub.inputs. - Y_keys : list of strings - List of the label(s) to use. Must be included in Tub.inputs. - - Returns - ------- - A tuple (X, Y), where X is a two dimensional array ( len(X_keys) x batch_size ) and Y is a two dimensional array ( len(Y_keys) x batch_size ). - - See Also - -------- - get_batch_gen - """ - batch_gen = self.get_batch_gen(X_keys + Y_keys, - batch_size=batch_size, - record_transform=record_transform, - df=df) - - while True: - batch = next(batch_gen) - X = [batch[k] for k in X_keys] - Y = [batch[k] for k in Y_keys] - yield X, Y - - def get_train_val_gen(self, X_keys, Y_keys, batch_size=128, train_frac=.8, - train_record_transform=None, val_record_transform=None): - """ - Create generators for training and validation set. - - Parameters - ---------- - train_frac : float - Training/validation set split. - train_record_transform : function - Transform function for the training set. Used internally by Tub.get_record_gen(). - val_record_transform : function - Transform function for the validation set. Used internally by Tub.get_record_gen(). - - Returns - ------- - A tuple (train_gen, val_gen), where where train_gen is the training set generator, and - val_gen the validation set generator. - - See Also - -------- - get_train_gen - get_record_gen - """ - if self.df is None: - self.update_df() - - train_df = self.df.sample(frac=train_frac, random_state=200) - val_df = self.df.drop(train_df.index) - - train_gen = self.get_train_gen(X_keys=X_keys, Y_keys=Y_keys, batch_size=batch_size, - record_transform=train_record_transform, df=train_df) - - val_gen = self.get_train_gen(X_keys=X_keys, Y_keys=Y_keys, batch_size=batch_size, - record_transform=val_record_transform, df=val_df) - - return train_gen, val_gen - - def tar_records(self, file_path, start_ix=None, end_ix=None): - """ - Create a tarfile of the records and metadata from a tub. - - Compress using gzip. - - Parameters - ---------- - file_path : string - The destination path of the created tar archive - start_ix : int - Start index. Defaults to 0. - end_ix : int - End index. Defaults to last index. - - Returns - ------- - Path to the tar archive - """ - if not start_ix: - start_ix = 0 - - if not end_ix: - end_ix = self.get_last_ix() + 1 - - with tarfile.open(name=file_path, mode='w:gz') as f: - for ix in range(start_ix, end_ix): - record_path = self.get_json_record_path(ix) - f.add(record_path) - f.add(self.meta_path) - - return file_path - - -class TubWriter(Tub): - def __init__(self, *args, **kwargs): - super(TubWriter, self).__init__(*args, **kwargs) - - def run(self, *args): - """ - Accepts values, pairs them with their input keys and saves them - to disk. - """ - assert len(self.inputs) == len(args) - record = dict(zip(self.inputs, args)) - self.put_record(record) - - -class TubReader(Tub): - def __init__(self, *args, **kwargs): - super(TubReader, self).__init__(*args, **kwargs) - self.read_ix = 0 - - def run(self, *args): - """ - Accepts keys to read from the tub and retrieves them sequentially. - """ - if self.read_ix >= self.current_ix: - return None - - record_dict = self.get_record(self.read_ix) - self.read_ix += 1 - record = [record_dict[key] for key in args] - return record - - -class TubHandler(): - def __init__(self, path): - self.path = os.path.expanduser(path) - - def get_tub_list(self): - folders = next(os.walk(self.path))[1] - return folders - - def next_tub_number(self): - def get_tub_num(tub_name): - try: - num = int(tub_name.split('_')[1]) - except: - num = 0 - return num - - folders = self.get_tub_list() - numbers = [get_tub_num(x) for x in folders] - next_number = max(numbers+[0]) + 1 - return next_number - - def create_tub_path(self): - tub_num = self.next_tub_number() - date = datetime.datetime.now().strftime('%y-%m-%d') - name = '_'.join(['tub', str(tub_num).zfill(2), date]) - tub_path = os.path.join(self.path, name) - return tub_path - - def new_tub_writer(self, inputs, types): - tub_path = self.create_tub_path() - tw = TubWriter(path=tub_path, inputs=inputs, types=types) - return tw - - -class TubImageStacker(Tub): - """ - A Tub for training a NN with images that are the last three records stacked - togther as 3 channels of a single image. The idea is to give a simple feedforward - NN some chance of building a model based on motion. - If you drive with the ImageFIFO part, then you don't need this. - Just make sure your inference pass uses the ImageFIFO that the NN will now expect. - """ - - def rgb2gray(self, rgb): - """ - take a numpy rgb image return a new single channel image converted to greyscale - """ - return np.dot(rgb[..., :3], [0.299, 0.587, 0.114]) - - def stack3Images(self, img_a, img_b, img_c): - """ - convert 3 rgb images into grayscale and put them into the 3 channels of - a single output image - """ - width, height, _ = img_a.shape - - gray_a = self.rgb2gray(img_a) - gray_b = self.rgb2gray(img_b) - gray_c = self.rgb2gray(img_c) - - img_arr = np.zeros([width, height, 3], dtype=np.dtype('B')) - - img_arr[..., 0] = np.reshape(gray_a, (width, height)) - img_arr[..., 1] = np.reshape(gray_b, (width, height)) - img_arr[..., 2] = np.reshape(gray_c, (width, height)) - - return img_arr - - def get_record(self, ix): - """ - get the current record and two previous. - stack the 3 images into a single image. - """ - data = super(TubImageStacker, self).get_record(ix) - - if ix > 1: - data_ch1 = super(TubImageStacker, self).get_record(ix - 1) - data_ch0 = super(TubImageStacker, self).get_record(ix - 2) - - json_data = self.get_json_record(ix) - for key, val in json_data.items(): - typ = self.get_input_type(key) - - # load objects that were saved as separate files - if typ == 'image': - val = self.stack3Images( - data_ch0[key], data_ch1[key], data[key]) - data[key] = val - elif typ == 'image_array': - img = self.stack3Images( - data_ch0[key], data_ch1[key], data[key]) - val = np.array(img) - - return data - - -class TubTimeStacker(TubImageStacker): - """ - A Tub for training N with records stacked through time. - The idea here is to force the network to learn to look ahead in time. - Init with an array of time offsets from the current time. - """ - - def __init__(self, frame_list, *args, **kwargs): - """ - frame_list of [0, 10] would stack the current and 10 frames from now records togther in a single record - with just the current image returned. - [5, 90, 200] would return 3 frames of records, ofset 5, 90, and 200 frames in the future. - - """ - super(TubTimeStacker, self).__init__(*args, **kwargs) - self.frame_list = frame_list - - def get_record(self, ix): - """ - stack the N records into a single record. - Each key value has the record index with a suffix of _N where N is - the frame offset into the data. - """ - data = {} - for i, iOffset in enumerate(self.frame_list): - iRec = ix + iOffset - - try: - json_data = self.get_json_record(iRec) - except FileNotFoundError: - pass - except: - pass - - for key, val in json_data.items(): - typ = self.get_input_type(key) - - # load only the first image saved as separate files - if typ == 'image' and i == 0: - val = Image.open(os.path.join(self.path, val)) - data[key] = val - elif typ == 'image_array' and i == 0: - d = super(TubTimeStacker, self).get_record(ix) - data[key] = d[key] - else: - """ - we append a _offset to the key - so user/angle out now be user/angle_0 - """ - new_key = key + "_" + str(iOffset) - data[new_key] = val - return data - - -class TubGroup(Tub): - def __init__(self, tub_paths_arg): - tub_paths = util.files.expand_path_arg(tub_paths_arg) - logger.info('TubGroup:tubpaths: {}'.format(tub_paths)) - self.tubs = [Tub(path) for path in tub_paths] - self.input_types = {} - - record_count = 0 - for t in self.tubs: - t.update_df() - record_count += len(t.df) - self.input_types.update(dict(zip(t.inputs, t.types))) - - logger.info('joining the tubs {} records together. This could take {} minutes.'.format(record_count, - int(record_count / 300000))) - - self.meta = {'inputs': list(self.input_types.keys()), - 'types': list(self.input_types.values())} - - self.df = pd.concat([t.df for t in self.tubs], axis=0, join='inner') - - @property - def inputs(self): - return list(self.meta['inputs']) - - @property - def types(self): - return list(self.meta['types']) - - def get_num_tubs(self): - return len(self.tubs) - - def get_num_records(self): - return len(self.df) -"""" - -keras.py - -functions to run and train autopilots using keras - -""" - -from tensorflow.python.keras.layers import Input -from tensorflow.python.keras.models import Model, load_model -from tensorflow.python.keras.layers import Convolution2D -from tensorflow.python.keras.layers import Dropout, Flatten, Dense -from tensorflow.python.keras.callbacks import ModelCheckpoint, EarlyStopping - - -class KerasPilot: - - def load(self, model_path): - self.model = load_model(model_path) - - def shutdown(self): - pass - - def train(self, train_gen, val_gen, - saved_model_path, epochs=100, steps=100, train_split=0.8, - verbose=1, min_delta=.0005, patience=5, use_early_stop=True): - """ - train_gen: generator that yields an array of images an array of - - """ - - # checkpoint to save model after each epoch - save_best = ModelCheckpoint(saved_model_path, - monitor='val_loss', - verbose=verbose, - save_best_only=True, - mode='min') - - # stop training if the validation error stops improving. - early_stop = EarlyStopping(monitor='val_loss', - min_delta=min_delta, - patience=patience, - verbose=verbose, - mode='auto') - - callbacks_list = [save_best] - - if use_early_stop: - callbacks_list.append(early_stop) - - hist = self.model.fit_generator( - train_gen, - steps_per_epoch=steps, - epochs=epochs, - verbose=1, - validation_data=val_gen, - callbacks=callbacks_list, - validation_steps=steps * (1.0 - train_split) / train_split) - return hist - - -class KerasLinear(KerasPilot): - def __init__(self, model=None, num_outputs=None, *args, **kwargs): - super(KerasLinear, self).__init__(*args, **kwargs) - if model: - self.model = model - elif num_outputs is not None: - self.model = default_linear() - else: - self.model = default_linear() - - def run(self, img_arr): - img_arr = img_arr.reshape((1,) + img_arr.shape) - outputs = self.model.predict(img_arr) - # print(len(outputs), outputs) - steering = outputs[0] - throttle = outputs[1] - return steering[0][0], throttle[0][0] - - -def default_linear(): - img_in = Input(shape=(120, 160, 3), name='img_in') - x = img_in - - # Convolution2D class name is an alias for Conv2D - x = Convolution2D(filters=24, kernel_size=( - 5, 5), strides=(2, 2), activation='relu')(x) - x = Convolution2D(filters=32, kernel_size=( - 5, 5), strides=(2, 2), activation='relu')(x) - x = Convolution2D(filters=64, kernel_size=( - 5, 5), strides=(2, 2), activation='relu')(x) - x = Convolution2D(filters=64, kernel_size=( - 3, 3), strides=(2, 2), activation='relu')(x) - x = Convolution2D(filters=64, kernel_size=( - 3, 3), strides=(1, 1), activation='relu')(x) - - x = Flatten(name='flattened')(x) - x = Dense(units=100, activation='linear')(x) - x = Dropout(rate=.1)(x) - x = Dense(units=50, activation='linear')(x) - x = Dropout(rate=.1)(x) - # categorical output of the angle - angle_out = Dense(units=1, activation='linear', name='angle_out')(x) - - # continous output of throttle - throttle_out = Dense(units=1, activation='linear', name='throttle_out')(x) - - model = Model(inputs=[img_in], outputs=[angle_out, throttle_out]) - - model.compile(optimizer='adam', - loss={'angle_out': 'mean_squared_error', - 'throttle_out': 'mean_squared_error'}, - loss_weights={'angle_out': 0.5, 'throttle_out': .5}) - - return model -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Parts to try donkeycar without a physical car. -""" - -import random -import numpy as np - - -class MovingSquareTelemetry: - """ - Generator of cordinates of a bouncing moving square for simulations. - """ - - def __init__(self, max_velocity=29, - x_min=10, x_max=150, - y_min=10, y_max=110): - - self.velocity = random.random() * max_velocity - - self.x_min, self.x_max = x_min, x_max - self.y_min, self.y_max = y_min, y_max - - self.x_direction = random.random() * 2 - 1 - self.y_direction = random.random() * 2 - 1 - - self.x = random.random() * x_max - self.y = random.random() * y_max - - self.tel = self.x, self.y - - def run(self): - # move - self.x += self.x_direction * self.velocity - self.y += self.y_direction * self.velocity - - # make square bounce off walls - if self.y < self.y_min or self.y > self.y_max: - self.y_direction *= -1 - if self.x < self.x_min or self.x > self.x_max: - self.x_direction *= -1 - - return int(self.x), int(self.y) - - def update(self): - self.tel = self.run() - - def run_threaded(self): - return self.tel - - -class SquareBoxCamera: - """ - Fake camera that returns an image with a square box. - - This can be used to test if a learning algorithm can learn. - """ - - def __init__(self, resolution=(120, 160), box_size=4, color=(255, 0, 0)): - self.resolution = resolution - self.box_size = box_size - self.color = color - - def run(self, x, y, box_size=None, color=None): - """ - Create an image of a square box at a given coordinates. - """ - radius = int((box_size or self.box_size)/2) - color = color or self.color - frame = np.zeros(shape=self.resolution + (3,)) - frame[y - radius: y + radius, - x - radius: x + radius, :] = color - return frame -# -*- coding: utf-8 -*- - -import time - - -class Lambda: - """ - Wraps a function into a donkey part. - """ - - def __init__(self, f): - """ - Accepts the function to use. - """ - self.f = f - - def run(self, *args, **kwargs): - return self.f(*args, **kwargs) - - def shutdown(self): - return - - -class PIDController: - """ Performs a PID computation and returns a control value. - This is based on the elapsed time (dt) and the current value - of the process variable - (i.e. the thing we're measuring and trying to change). - https://github.com/chrisspen/pid_controller/blob/master/pid_controller/pid.py - """ - - def __init__(self, p=0, i=0, d=0, debug=False): - - # initialize gains - self.Kp = p - self.Ki = i - self.Kd = d - - # The value the controller is trying to get the system to achieve. - self.target = 0 - - # initialize delta t variables - self.prev_tm = time.time() - self.prev_feedback = 0 - self.error = None - - # initialize the output - self.alpha = 0 - - # debug flag (set to True for console output) - self.debug = debug - - def run(self, target_value, feedback): - curr_tm = time.time() - - self.target = target_value - error = self.error = self.target - feedback - - # Calculate time differential. - dt = curr_tm - self.prev_tm - - # Initialize output variable. - curr_alpha = 0 - - # Add proportional component. - curr_alpha += self.Kp * error - - # Add integral component. - curr_alpha += self.Ki * (error * dt) - - # Add differential component (avoiding divide-by-zero). - if dt > 0: - curr_alpha += self.Kd * \ - ((feedback - self.prev_feedback) / float(dt)) - - # Maintain memory for next loop. - self.prev_tm = curr_tm - self.prev_feedback = feedback - - # Update the output - self.alpha = curr_alpha - - if (self.debug): - print('PID target value:', round(target_value, 4)) - print('PID feedback value:', round(feedback, 4)) - print('PID output:', round(curr_alpha, 4)) - - return curr_alpha -""" -CAR CONFIG - -This file is read by your car application's manage.py script to change the car -performance. - -EXMAPLE ------------ -import dk -cfg = dk.load_config(config_path='~/mycar/config.py') -print(cfg.CAMERA_RESOLUTION) - -""" - - -import os - -# PATHS -CAR_PATH = PACKAGE_PATH = os.path.dirname(os.path.realpath(__file__)) -DATA_PATH = os.path.join(CAR_PATH, 'data') -MODELS_PATH = os.path.join(CAR_PATH, 'models') - -# VEHICLE -DRIVE_LOOP_HZ = 20 -MAX_LOOPS = 100000 - -# CAMERA -CAMERA_RESOLUTION = (120, 160) # (height, width) -CAMERA_FRAMERATE = DRIVE_LOOP_HZ - -# STEERING -STEERING_CHANNEL = 1 -STEERING_LEFT_PWM = 420 -STEERING_RIGHT_PWM = 360 - -# THROTTLE -THROTTLE_CHANNEL = 0 -THROTTLE_FORWARD_PWM = 400 -THROTTLE_STOPPED_PWM = 360 -THROTTLE_REVERSE_PWM = 310 - -# TRAINING -BATCH_SIZE = 128 -TRAIN_TEST_SPLIT = 0.8 - - -TUB_PATH = os.path.join(CAR_PATH, 'tub') # if using a single tub - -# ROPE.DONKEYCAR.COM -ROPE_TOKEN = "GET A TOKEN AT ROPE.DONKEYCAR.COM" -#!/usr/bin/env python3 -""" -Scripts to drive a donkey 2 car and train a model for it. - -Usage: - manage.py (drive) [--model=] [--js] [--chaos] - manage.py (train) [--tub=] (--model=) [--base_model=] [--no_cache] - -Options: - -h --help Show this screen. - --tub TUBPATHS List of paths to tubs. Comma separated. Use quotes to use wildcards. ie "~/tubs/*" - --chaos Add periodic random steering when manually driving -""" -import os - -from docopt import docopt -import donkeycar as dk - -from donkeycar.parts.camera import PiCamera -from donkeycar.parts.transform import Lambda -from donkeycar.parts.keras import KerasLinear -from donkeycar.parts.actuator import PCA9685, PWMSteering, PWMThrottle -from donkeycar.parts.datastore import TubGroup, TubWriter -from donkeycar.parts.web_controller import LocalWebController -from donkeycar.parts.clock import Timestamp -from donkeycar.parts.datastore import TubGroup, TubWriter -from donkeycar.parts.keras import KerasLinear -from donkeycar.parts.transform import Lambda - - -def drive(cfg, model_path=None, use_chaos=False): - """ - Construct a working robotic vehicle from many parts. - Each part runs as a job in the Vehicle loop, calling either - it's run or run_threaded method depending on the constructor flag `threaded`. - All parts are updated one after another at the framerate given in - cfg.DRIVE_LOOP_HZ assuming each part finishes processing in a timely manner. - Parts may have named outputs and inputs. The framework handles passing named outputs - to parts requesting the same named input. - """ - - V = dk.vehicle.Vehicle() - - clock = Timestamp() - V.add(clock, outputs=['timestamp']) - - cam = PiCamera(resolution=cfg.CAMERA_RESOLUTION) - V.add(cam, outputs=['cam/image_array'], threaded=True) - - ctr = LocalWebController(use_chaos=use_chaos) - V.add(ctr, - inputs=['cam/image_array'], - outputs=['user/angle', 'user/throttle', 'user/mode', 'recording'], - threaded=True) - - # See if we should even run the pilot module. - # This is only needed because the part run_condition only accepts boolean - def pilot_condition(mode): - if mode == 'user': - return False - else: - return True - - pilot_condition_part = Lambda(pilot_condition) - V.add(pilot_condition_part, - inputs=['user/mode'], - outputs=['run_pilot']) - - # Run the pilot if the mode is not user. - kl = KerasLinear() - if model_path: - kl.load(model_path) - - V.add(kl, - inputs=['cam/image_array'], - outputs=['pilot/angle', 'pilot/throttle'], - run_condition='run_pilot') - - # Choose what inputs should change the car. - def drive_mode(mode, - user_angle, user_throttle, - pilot_angle, pilot_throttle): - if mode == 'user': - return user_angle, user_throttle - - elif mode == 'local_angle': - return pilot_angle, user_throttle - - else: - return pilot_angle, pilot_throttle - - drive_mode_part = Lambda(drive_mode) - V.add(drive_mode_part, - inputs=['user/mode', 'user/angle', 'user/throttle', - 'pilot/angle', 'pilot/throttle'], - outputs=['angle', 'throttle']) - - steering_controller = PCA9685(cfg.STEERING_CHANNEL) - steering = PWMSteering(controller=steering_controller, - left_pulse=cfg.STEERING_LEFT_PWM, - right_pulse=cfg.STEERING_RIGHT_PWM) - - throttle_controller = PCA9685(cfg.THROTTLE_CHANNEL) - throttle = PWMThrottle(controller=throttle_controller, - max_pulse=cfg.THROTTLE_FORWARD_PWM, - zero_pulse=cfg.THROTTLE_STOPPED_PWM, - min_pulse=cfg.THROTTLE_REVERSE_PWM) - - V.add(steering, inputs=['angle']) - V.add(throttle, inputs=['throttle']) - - # add tub to save data - inputs = ['cam/image_array', 'user/angle', - 'user/throttle', 'user/mode', 'timestamp'] - types = ['image_array', 'float', 'float', 'str', 'str'] - - # multiple tubs - # th = TubHandler(path=cfg.DATA_PATH) - # tub = th.new_tub_writer(inputs=inputs, types=types) - - # single tub - tub = TubWriter(path=cfg.TUB_PATH, inputs=inputs, types=types) - V.add(tub, inputs=inputs, run_condition='recording') - - # run the vehicle - V.start(rate_hz=cfg.DRIVE_LOOP_HZ, - max_loop_count=cfg.MAX_LOOPS) - - -def train(cfg, tub_names, new_model_path, base_model_path=None): - """ - use the specified data in tub_names to train an artifical neural network - saves the output trained model as model_name - """ - X_keys = ['cam/image_array'] - y_keys = ['user/angle', 'user/throttle'] - - new_model_path = os.path.expanduser(new_model_path) - - kl = KerasLinear() - if base_model_path is not None: - base_model_path = os.path.expanduser(base_model_path) - kl.load(base_model_path) - - print('tub_names', tub_names) - if not tub_names: - tub_names = os.path.join(cfg.DATA_PATH, '*') - tubgroup = TubGroup(tub_names) - train_gen, val_gen = tubgroup.get_train_val_gen(X_keys, y_keys, - batch_size=cfg.BATCH_SIZE, - train_frac=cfg.TRAIN_TEST_SPLIT) - - total_records = len(tubgroup.df) - total_train = int(total_records * cfg.TRAIN_TEST_SPLIT) - total_val = total_records - total_train - print('train: %d, validation: %d' % (total_train, total_val)) - steps_per_epoch = total_train // cfg.BATCH_SIZE - print('steps_per_epoch', steps_per_epoch) - - kl.train(train_gen, - val_gen, - saved_model_path=new_model_path, - steps=steps_per_epoch, - train_split=cfg.TRAIN_TEST_SPLIT) - - -if __name__ == '__main__': - args = docopt(__doc__) - cfg = dk.load_config() - - if args['drive']: - drive(cfg, model_path=args['--model'], use_chaos=args['--chaos']) - - elif args['train']: - tub = args['--tub'] - new_model_path = args['--model'] - base_model_path = args['--base_model'] - cache = not args['--no_cache'] - train(cfg, tub, new_model_path, base_model_path) -# -*- coding: utf-8 -*- - -""" -Web controller. - -This example shows how a user use a web controller to controll -a square that move around the image frame. - - -Usage: - manage.py (drive) [--model=] - manage.py (train) [--tub=] (--model=) - -""" - - -import os -from docopt import docopt -import donkeycar as dk - -from donkeycar.parts.datastore import TubGroup, TubWriter -from donkeycar.parts.transform import Lambda -from donkeycar.parts.simulation import SquareBoxCamera -from donkeycar.parts.web_controller import LocalWebController -from donkeycar.parts.keras import KerasLinear -from donkeycar.parts.clock import Timestamp - -log_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'sq.log') -dk.log.setup(log_path) -logger = dk.log.get_logger(__name__) -logger.info('Loading manage.py') - - -def drive(cfg, model_path=None): - - V = dk.vehicle.Vehicle() - V.mem.put(['square/angle', 'square/throttle'], (100, 100)) - - # display square box given by cooridantes. - cam = SquareBoxCamera(resolution=cfg.CAMERA_RESOLUTION) - V.add(cam, - inputs=['square/angle', 'square/throttle'], - outputs=['cam/image_array']) - - # display the image and read user values from a local web controller - ctr = LocalWebController() - V.add(ctr, - inputs=['cam/image_array'], - outputs=['user/angle', 'user/throttle', - 'user/mode', 'recording'], - threaded=True) - - # See if we should even run the pilot module. - # This is only needed because the part run_contion only accepts boolean - def pilot_condition(mode): - if mode == 'user': - return False - else: - return True - - pilot_condition_part = Lambda(pilot_condition) - V.add(pilot_condition_part, inputs=['user/mode'], outputs=['run_pilot']) - - # Run the pilot if the mode is not user. - kl = KerasLinear() - if model_path: - kl.load(model_path) - - V.add(kl, inputs=['cam/image_array'], - outputs=['pilot/angle', 'pilot/throttle'], - run_condition='run_pilot') - - # See if we should even run the pilot module. - def drive_mode(mode, - user_angle, user_throttle, - pilot_angle, pilot_throttle): - if mode == 'user': - return user_angle, user_throttle - - elif mode == 'pilot_angle': - return pilot_angle, user_throttle - - else: - return pilot_angle, pilot_throttle - - drive_mode_part = Lambda(drive_mode) - V.add(drive_mode_part, - inputs=['user/mode', 'user/angle', 'user/throttle', - 'pilot/angle', 'pilot/throttle'], - outputs=['angle', 'throttle']) - - clock = Timestamp() - V.add(clock, outputs=['timestamp']) - - # transform angle and throttle values to coordinate values - def f(x): - return int(x * 100 + 100) - l = Lambda(f) - V.add(l, inputs=['user/angle'], outputs=['square/angle']) - V.add(l, inputs=['user/throttle'], outputs=['square/throttle']) - - # add tub to save data - inputs = ['cam/image_array', - 'user/angle', 'user/throttle', - 'pilot/angle', 'pilot/throttle', - 'square/angle', 'square/throttle', - 'user/mode', - 'timestamp'] - types = ['image_array', - 'float', 'float', - 'float', 'float', - 'float', 'float', - 'str', - 'str'] - - # multiple tubs - #th = TubHandler(path=cfg.DATA_PATH) - #tub = th.new_tub_writer(inputs=inputs, types=types) - - # single tub - tub = TubWriter(path=cfg.TUB_PATH, inputs=inputs, types=types) - V.add(tub, inputs=inputs, run_condition='recording') - - # run the vehicle for 20 seconds - V.start(rate_hz=50, max_loop_count=10000) - - -def train(cfg, tub_names, model_name): - - X_keys = ['cam/image_array'] - y_keys = ['user/angle', 'user/throttle'] - - def rt(record): - record['user/angle'] = donkeycar.utils.utils.linear_bin( - record['user/angle']) - return record - - def combined_gen(gens): - import itertools - combined_gen = itertools.chain() - for gen in gens: - combined_gen = itertools.chain(combined_gen, gen) - return combined_gen - - kl = KerasCategorical() - logger.info('tub_names', tub_names) - if not tub_names: - tub_names = os.path.join(cfg.DATA_PATH, '*') - tubgroup = TubGroup(tub_names) - train_gen, val_gen = tubgroup.get_train_val_gen(X_keys, y_keys, record_transform=rt, - batch_size=cfg.BATCH_SIZE, - train_frac=cfg.TRAIN_TEST_SPLIT) - - model_path = os.path.expanduser(model_name) - - total_records = len(tubgroup.df) - total_train = int(total_records * cfg.TRAIN_TEST_SPLIT) - total_val = total_records - total_train - logger.info('train: %d, validation: %d' % (total_train, total_val)) - steps_per_epoch = total_train // cfg.BATCH_SIZE - logger.ino('steps_per_epoch', steps_per_epoch) - - kl.train(train_gen, - val_gen, - saved_model_path=model_path, - steps=steps_per_epoch, - train_split=cfg.TRAIN_TEST_SPLIT) - - -if __name__ == '__main__': - args = docopt(__doc__) - cfg = dk.load_config() - - if args['drive']: - drive(cfg, args['--model']) - - elif args['train']: - tub = args['--tub'] - model = args['--model'] - train(cfg, tub, model) -# -*- coding: utf-8 -*- -import platform -import pytest -from donkeycar.parts.datastore import Tub -from donkeycar.parts.simulation import SquareBoxCamera, MovingSquareTelemetry - - -def on_pi(): - if 'arm' in platform.machine(): - return True - return False - - -@pytest.fixture -def tub_path(tmpdir): - tub_path = tmpdir.mkdir('tubs').join('tub') - return str(tub_path) - - -@pytest.fixture -def tub(tub_path): - t = create_sample_tub(tub_path, records=10) - return t - - -@pytest.fixture -def tubs(tmpdir, tubs=5): - tubs_dir = tmpdir.mkdir('tubs') - tub_paths = [str(tubs_dir.join('tub_{}'.format(i))) for i in range(tubs)] - tubs = [create_sample_tub(tub_path, records=5) for tub_path in tub_paths] - return (str(tubs_dir), tub_paths, tubs) - - -def create_sample_tub(path, records=10): - inputs = ['cam/image_array', 'angle', 'throttle'] - types = ['image_array', 'float', 'float'] - t = Tub(path, inputs=inputs, types=types) - for _ in range(records): - record = create_sample_record() - t.put_record(record) - return t - - -def create_sample_record(): - cam = SquareBoxCamera() - tel = MovingSquareTelemetry() - x, y = tel.run() - img_arr = cam.run(x, y) - return {'cam/image_array': img_arr, 'angle': x, 'throttle': y} -from .setup import on_pi - -from donkeycar.parts.actuator import PCA9685, PWMSteering, PWMThrottle -import pytest - - -@pytest.mark.skipif(on_pi() == False, reason='Not on RPi') -def test_PCA9685(): - c = PCA9685(0) - - -@pytest.mark.skipif(on_pi() == False, reason='Not on RPi') -def test_PWMSteering(): - c = PCA9685(0) - s = PWMSteering(c) -# -*- coding: utf-8 -*- -import pytest -from donkeycar.parts.keras import KerasPilot, KerasLinear -from donkeycar.parts.keras import default_linear - - -def test_linear(): - kl = KerasLinear() - assert kl.model is not None - - -def test_linear_with_model(): - kc = KerasLinear(default_linear()) - assert kc.model is not None - -from donkeycar.management import base -from tempfile import tempdir - - -def get_test_tub_path(): - tempdir() - - -def test_tubcheck(): - tc = base.TubCheck() -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -import unittest -import pytest -from donkeycar.memory import Memory - - -class TestMemory(unittest.TestCase): - - def test_setitem_single_item(self): - mem = Memory() - mem['myitem'] = 999 - assert mem['myitem'] == 999 - - def test_setitem_multi_items(self): - mem = Memory() - mem[('myitem1', 'myitem2')] = [888, '999'] - assert mem[('myitem1', 'myitem2')] == [888, '999'] - - def test_put_single_item(self): - mem = Memory() - mem.put(['myitem'], 999) - assert mem['myitem'] == 999 - - def test_put_single_item_as_tuple(self): - mem = Memory() - mem.put(('myitem',), 999) - assert mem['myitem'] == 999 - - def test_put_multi_item(self): - mem = Memory() - mem.put(['my1stitem', 'my2nditem'], [777, '999']) - assert mem['my1stitem'] == 777 - assert mem['my2nditem'] == '999' - - def test_put_multi_item_as_tuple(self): - mem = Memory() - mem.put(('my1stitem', 'my2nditem'), (777, '999')) - assert mem['my1stitem'] == 777 - assert mem['my2nditem'] == '999' - - def test_get_multi_item(self): - mem = Memory() - mem.put(['my1stitem', 'my2nditem'], [777, '999']) - assert mem.get(['my1stitem', 'my2nditem']) == [777, '999'] - - def test_update_item(self): - mem = Memory() - mem.put(['myitem'], 888) - assert mem['myitem'] == 888 - - mem.update({'myitem': '111'}) - assert mem['myitem'] == '111' - - def test_get_keys(self): - mem = Memory() - mem.put(['myitem'], 888) - assert list(mem.keys()) == ['myitem'] - - def test_get_values(self): - mem = Memory() - mem.put(['myitem'], 888) - assert list(mem.values()) == [888] - - def test_get_iter(self): - mem = Memory() - mem.put(['myitem'], 888) - - assert dict(mem.items()) == {'myitem': 888} -from donkeycar import util -import pytest - - -def is_error(err): - for e in err: - # Catch error if 'Error' is in the stderr output. - if 'Error' in e.decode(): - return True - # Catch error when the wrong command is used. - if 'Usage:' in e.decode(): - return True - return False - - -@pytest.fixture -def cardir(tmpdir): - path = str(tmpdir.mkdir("mycar")) - return path - - -def test_createcar(cardir): - cmd = ['donkey', 'createcar', '--path', cardir] - out, err, proc_id = util.proc.run_shell_command(cmd) - assert is_error(err) is False - - -def test_drivesim(cardir): - cmd = ['donkey', 'createcar', '--path', cardir, '--template', 'square'] - out, err, proc_id = util.proc.run_shell_command(cmd, timeout=10) - cmd = ['python', 'manage.py', 'drive'] - out, err, proc_id = util.proc.run_shell_command(cmd, cwd=cardir) - print(err) - - if is_error(err) is True: - print('out', out) - print('error: ', err) - raise ValueError(err) - - -def test_bad_command_fails(): - cmd = ['donkey', 'not a comand'] - out, err, proc_id = util.proc.run_shell_command(cmd) - print(err) - print(out) - assert is_error(err) is True -import unittest - -import pytest -from .setup import on_pi -from donkeycar.parts.camera import BaseCamera - - -def test_base_camera(): - cam = BaseCamera() - - -@pytest.mark.skipif(on_pi() == False, reason='only works on RPi') -def test_picamera(): - from donkeycar.parts.camera import PiCamera - resolution = (120, 160) - cam = PiCamera(resolution=resolution) - frame = cam.run() - # assert shape is as expected. img_array shape shows (width, height, channels) - assert frame.shape[:2] == resolution[:] -# -*- coding: utf-8 -*- -import unittest -import numpy as np -from donkeycar.parts.simulation import MovingSquareTelemetry, SquareBoxCamera - - -class TestMovingSquareTelemetry(unittest.TestCase): - def setUp(self): - self.tel = MovingSquareTelemetry() - - def test_run_types(self): - x, y = self.tel.run() - assert type(x) == int - assert type(y) == int - - -class TestSquareBoxCamera(unittest.TestCase): - def setUp(self): - self.cam = SquareBoxCamera() - - def test_run_types(self): - arr = self.cam.run(50, 50) - assert type(arr) == np.ndarray -from donkeycar.parts.transform import Lambda - - -def f(a): - return a + 1 - - -def f2(a, b): - return a + b + 1 - - -def test_lambda_one_arg(): - l = Lambda(f) - b = l.run(1) - assert b == 2 - - -def test_lambda_two_args(): - l = Lambda(f2) - b = l.run(1, 1) - assert b == 3 -# -*- coding: utf-8 -*- -import os -import pytest -import tempfile -import tarfile -from PIL import Image -from donkeycar.parts.datastore import Tub -from .setup import tub, tub_path, create_sample_record - - -def test_tub_load(tub, tub_path): - """Tub loads from existing tub path.""" - t = Tub(tub_path) - assert t is not None - - -def test_get_last_ix(tub): - assert tub.get_last_ix() == 9 - - -def test_get_last_ix_after_adding_new_record(tub): - record = create_sample_record() - tub.put_record(record) - assert tub.get_last_ix() == 10 - - -def test_get_last_ix_for_empty_tub(tub_path): - inputs = ['cam/image_array', 'angle', 'throttle'] - types = ['image_array', 'float', 'float'] - t = Tub(tub_path, inputs=inputs, types=types) - assert t.get_last_ix() == -1 - - -def test_get_last_ix_for_one_record(tub_path): - inputs = ['cam/image_array', 'angle', 'throttle'] - types = ['image_array', 'float', 'float'] - t = Tub(tub_path, inputs=inputs, types=types) - record = create_sample_record() - t.put_record(record) - assert t.get_last_ix() == 0 - - -def test_tub_update_df(tub): - """ Tub updats its dataframe """ - tub.update_df() - assert len(tub.df) == 10 - - -def test_tub_get_df(tub): - """ Get Tub dataframe """ - df = tub.get_df() - assert len(df) == 10 - - -def test_tub_add_record(tub): - """Tub can save a record and then retrieve it.""" - rec_in = create_sample_record() - rec_index = tub.put_record(rec_in) - rec_out = tub.get_record(rec_index-1) - assert rec_in.keys() == rec_out.keys() - - -def test_tub_get_num_records(tub): - """ Get nbr of records in Tub """ - cnt = tub.get_num_records() - assert cnt == 10 - - -def test_tub_check_removes_illegal_records(tub): - """ Get Tub dataframe """ - record = tub.get_json_record_path(tub.get_last_ix()) - with open(record, 'w') as f: - f.write('illegal json data') - assert tub.get_num_records() == 10 - - tub.check(fix=True) - assert tub.get_num_records() == 9 - - -def test_tub_remove_record(tub): - """ Remove record from tub """ - assert tub.get_num_records() == 10 - tub.remove_record(0) - assert tub.get_num_records() == 9 - - -def test_tub_put_image(tub_path): - """ Add an encoded image to the tub """ - inputs = ['user/speed', 'cam/image'] - types = ['float', 'image'] - img = Image.new('RGB', (120, 160)) - t = Tub(path=tub_path, inputs=inputs, types=types) - t.put_record({'cam/image': img, 'user/speed': 0.2, }) - assert t.get_record(t.get_last_ix())['user/speed'] == 0.2 - - -def test_tub_put_unknown_type(tub_path): - """ Creating a record with unknown type should fail """ - inputs = ['user/speed'] - types = ['bob'] - t = Tub(path=tub_path, inputs=inputs, types=types) - with pytest.raises(TypeError): - t.put_record({'user/speed': 0.2, }) - - -def test_delete_tub(tub): - """ Delete the tub content """ - assert tub.get_num_records() == 10 - tub.delete() - assert tub.get_num_records() == 0 - - -def test_get_record_gen(tub): - """ Create a records generator and pull 20 records from it """ - records = tub.get_record_gen() - assert len([next(records) for x in range(20)]) == 20 - - -def test_get_batch_gen(tub): - """ Create a batch generator and pull 1 batch (128) records from it """ - batches = tub.get_batch_gen() - batch = next(batches) - - assert len(batch.keys()) == 3 - assert len(list(batch.values())[0]) == 128 - - -def test_get_train_val_gen(tub): - """ Create training and validation generators. """ - x = ['angle', 'throttle'] - y = ['cam/image_array'] - train_gen, val_gen = tub.get_train_val_gen(x, y) - - train_batch = next(train_gen) - assert len(train_batch) - - # X is a list of all requested features (angle & throttle) - X = train_batch[0] - assert len(X) == 2 - assert len(X[0]) == 128 - assert len(X[1]) == 128 - - # Y is a list of all requested labels (image_array) - Y = train_batch[1] - assert len(Y) == 1 - assert len(Y[0]) == 128 - - val_batch = next(val_gen) - # X is a list of all requested features (angle & throttle) - X = val_batch[0] - assert len(X) == 2 - assert len(X[0]) == 128 - assert len(X[1]) == 128 - - # Y is a list of all requested labels (image_array) - Y = train_batch[1] - assert len(Y) == 1 - assert len(Y[0]) == 128 - - -def test_tar_records(tub): - """ Tar all records in the tub """ - with tempfile.TemporaryDirectory() as tmpdirname: - tar_path = os.path.join(tmpdirname, 'tub.tar.gz') - tub.tar_records(tar_path) - - with tarfile.open(name=tar_path, mode='r') as t: - assert len(t.getnames()) == 11 - - -def test_recreating_tub(tub): - """ Recreating a Tub should restore it to working state """ - assert tub.get_num_records() == 10 - assert tub.current_ix == 10 - assert tub.get_last_ix() == 9 - path = tub.path - tub = None - - inputs = ['cam/image_array', 'angle', 'throttle'] - types = ['image_array', 'float', 'float'] - t = Tub(path, inputs=inputs, types=types) - assert t.get_num_records() == 10 - assert t.current_ix == 10 - assert t.get_last_ix() == 9 -# -*- coding: utf-8 -*- -import os -from donkeycar.parts.datastore import TubHandler -from .setup import tubs - - -def test_create_tub_handler(tubs): - root_dir = tubs[0] - th = TubHandler(root_dir) - assert th is not None - - -def test_get_tub_list(tubs): - root_dir = tubs[0] - th = TubHandler(root_dir) - assert len(th.get_tub_list()) == 5 - - -def test_next_tub_number(tubs): - root_dir = tubs[0] - th = TubHandler(root_dir) - assert th.next_tub_number() == 5 - - -def test_new_tub_writer(tubs): - root_dir = tubs[0] - th = TubHandler(root_dir) - inputs = ['cam/image_array', 'angle', 'throttle'] - types = ['image_array', 'float', 'float'] - tw = th.new_tub_writer(inputs, types) - assert len(th.get_tub_list()) == 6 - print(tw.path) - assert int(tw.path.split('_')[-2]) == 5 -# -*- coding: utf-8 -*- -import unittest -import tempfile -import os - -from donkeycar.parts.datastore import Tub, TubReader, TubWriter - - -def test_tubreader(): - with tempfile.TemporaryDirectory() as tempfolder: - path = os.path.join(tempfolder, 'new') - inputs = ['name', 'age', 'pic'] - types = ['str', 'float', 'str'] - writer = TubWriter(path, inputs=inputs, types=types) - writer.run('will', 323, 'asdfasdf') - assert writer.get_num_records() == 1 - - reader = TubReader(path) - assert reader.get_num_records() == 1 - - record = reader.run('name', 'age', 'pic') - assert set(record) == set(['will', 323, 'asdfasdf']) -# -*- coding: utf-8 -*- -import unittest -import tempfile -import os - -from donkeycar.parts.datastore import Tub, TubWriter - - -class TestTubWriter(unittest.TestCase): - - def setUp(self): - tempfolder = tempfile.TemporaryDirectory() - self.path = os.path.join(tempfolder.name, 'new') - self.inputs = ['name', 'age', 'pic'] - self.types = ['str', 'float', 'str'] - - def test_tub_create(self): - tub = TubWriter(self.path, inputs=self.inputs, types=self.types) - - def test_tub_path(self): - tub = TubWriter(self.path, inputs=self.inputs, types=self.types) - print(tub.types, tub.inputs) - tub.run('will', 323, 'asdfasdf') - - def test_make_paths_absolute(self): - tub = Tub(self.path, inputs=['file_path'], types=['image']) - rel_file_name = 'test.jpg' - record_dict = {'file_path': rel_file_name} - abs_record_dict = tub.make_record_paths_absolute(record_dict) - - assert abs_record_dict['file_path'] == os.path.join( - self.path, rel_file_name) -# -*- coding: utf-8 -*- -from donkeycar.parts.datastore import TubGroup -from .setup import tubs - - -def test_tubgroup_load(tubs): - """ Load TubGroup from existing tubs dir """ - list_of_tubs = tubs[1] - str_of_tubs = ','.join(list_of_tubs) - t = TubGroup(str_of_tubs) - assert t is not None - - -def test_tubgroup_inputs(tubs): - """ Get TubGroup inputs """ - list_of_tubs = tubs[1] - str_of_tubs = ','.join(list_of_tubs) - t = TubGroup(str_of_tubs) - assert sorted(t.inputs) == sorted(['cam/image_array', 'angle', 'throttle']) - - -def test_tubgroup_types(tubs): - """ Get TubGroup types """ - list_of_tubs = tubs[1] - str_of_tubs = ','.join(list_of_tubs) - t = TubGroup(str_of_tubs) - assert sorted(t.types) == sorted(['image_array', 'float', 'float']) - - -def test_tubgroup_get_num_tubs(tubs): - """ Get number of tubs in TubGroup """ - list_of_tubs = tubs[1] - str_of_tubs = ','.join(list_of_tubs) - t = TubGroup(str_of_tubs) - assert t.get_num_tubs() == 5 - - -def test_tubgroup_get_num_records(tubs): - """ Get number of records in TubGroup """ - list_of_tubs = tubs[1] - str_of_tubs = ','.join(list_of_tubs) - t = TubGroup(str_of_tubs) - assert t.get_num_records() == 25 -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Sun Jun 25 14:17:59 2017 - -@author: wroscoe -""" -import unittest -import pytest - - -from donkeycar.util.data import linear_bin -from donkeycar.util.data import linear_unbin -from donkeycar.util.data import bin_Y -from donkeycar.util.data import unbin_Y -from donkeycar.util.data import map_range -from donkeycar.util.data import merge_two_dicts -from donkeycar.util.data import param_gen - - -def create_lbin(marker_index): - """ Create a linear binary array with value set """ - l = [0] * 15 - l[marker_index] = 1 - return l - - -class TestLinearBin(unittest.TestCase): - - def test_zero(self): - res = linear_bin(0) - assert res[7] == 1 - assert sum(res[:7]) == 0 - assert sum(res[8:]) == 0 - - def test_positive(self): - res = linear_bin(1) - assert res[14] == 1 - assert sum(res[:14]) == 0 - - def test_negative(self): - res = linear_bin(-1) - assert res[0] == 1 - assert sum(res[1:]) == 0 - - def test_illegal_pos(self): - with pytest.raises(IndexError): - linear_bin(2) - - def test_illegal_type(self): - with pytest.raises(TypeError): - linear_bin('0') - - -class TestLinearUnbin(unittest.TestCase): - - def test_zero(self): - l = create_lbin(7) - res = linear_unbin(l) - assert res == 0.0 - - def test_positive(self): - l = create_lbin(14) - res = linear_unbin(l) - assert res == 1.0 - - def test_negative(self): - l = create_lbin(0) - res = linear_unbin(l) - assert res == -1.0 - - def test_empty_list(self): - res = linear_unbin([0] * 15) - assert res == -1.0 - - def test_illegal_list(self): - with pytest.raises(ValueError): - linear_unbin([0] * 10) - - -class TestBinY(unittest.TestCase): - - def test_normal_list(self): - l = [-1, 0, 1] - res = bin_Y(l) - - # negative - assert res[0][0] == 1 - assert sum(res[0][1:]) == 0 - - # zero - assert res[1][7] == 1 - assert sum(res[1][:7]) == 0 - assert sum(res[1][8:]) == 0 - - # positive - assert res[2][14] == 1 - assert sum(res[2][:14]) == 0 - - -class TestUnbinY(unittest.TestCase): - - def test_normal_list(self): - l = [create_lbin(0), create_lbin(7), create_lbin(14)] - res = unbin_Y(l) - - # negative - assert res[0] == -1.0 - - # zero - assert res[1] == 0.0 - - # positive - assert res[2] == 1.0 - - -class TestMapping(unittest.TestCase): - - def test_positive(self): - min = map_range(-100, -100, 100, 0, 1000) - half = map_range(0, -100, 100, 0, 1000) - max = map_range(100, -100, 100, 0, 1000) - assert min == 0 - assert half == 500 - assert max == 1000 - - def test_negative(self): - ranges = (0, 100, 0, 1000) - min = map_range(0, *ranges) - half = map_range(50, *ranges) - max = map_range(100, *ranges) - assert min == 0 - assert half == 500 - assert max == 1000 - - def test_reverse(self): - ranges = (100, 0, 0, 1000) - min = map_range(0, *ranges) - half = map_range(50, *ranges) - max = map_range(100, *ranges) - assert min == 1000 - assert half == 500 - assert max == 0 - - -class TestMergeDicts(unittest.TestCase): - - def test_merge_two_dicts(self): - d1 = {'a': 1, 'b': 2, 'c': 3} - d2 = {10: 'hi', 'bob': 20} - res = merge_two_dicts(d1, d2) - - assert res == {'a': 1, 'b': 2, 'c': 3, 10: 'hi', 'bob': 20} - - -class TestParamGen(unittest.TestCase): - - def test_param_gen(self): - g = param_gen({'a': ['opt1', 'opt2'], 'b': ['opt3', 'opt4']}) - l = [x for x in g] - expected = [ - {'a': 'opt1', 'b': 'opt3'}, - {'a': 'opt1', 'b': 'opt4'}, - {'a': 'opt2', 'b': 'opt3'}, - {'a': 'opt2', 'b': 'opt4'} - ] - self.assertCountEqual(expected, l) -import pytest -import donkeycar as dk -from donkeycar.parts.transform import Lambda - - -def _get_sample_lambda(): - def f(): - return 1 - return Lambda(f) - - -@pytest.fixture() -def vehicle(): - v = dk.Vehicle() - v.add(_get_sample_lambda(), outputs=['test_out']) - return v - - -def test_create_vehicle(): - v = dk.Vehicle() - assert v.parts == [] - - -def test_add_part(): - v = dk.Vehicle() - v.add(_get_sample_lambda(), outputs=['test_out']) - assert len(v.parts) == 1 - - -def test_vehicle_run(vehicle): - vehicle.start(rate_hz=20, max_loop_count=2) - assert vehicle is not None - - -def test_should_raise_assertion_on_non_list_inputs_for_add_part(): - vehicle = dk.Vehicle() - inputs = 'any' - with pytest.raises(AssertionError, message="inputs is not a list: %r" % inputs): - vehicle.add(_get_sample_lambda(), inputs=inputs) - - -def test_should_raise_assertion_on_non_list_outputs_for_add_part(): - vehicle = dk.Vehicle() - outputs = 'any' - with pytest.raises(AssertionError, message="outputs is not a list: %r" % outputs): - vehicle.add(_get_sample_lambda(), outputs=outputs) - - -def test_should_raise_assertion_on_non_boolean_threaded_for_add_part(): - vehicle = dk.Vehicle() - threaded = 'non_boolean' - with pytest.raises(AssertionError, message="threaded is not a boolean: %r" % threaded): - vehicle.add(_get_sample_lambda(), threaded=threaded) -# -*- coding: utf-8 -*- -import pytest -import json -from donkeycar.parts.web_controller.web import LocalWebController - - -@pytest.fixture -def server(): - server = LocalWebController() - return server - - -def test_json_output(server): - result = server.run() - json_result = json.dumps(result) - d = json.loads(json_result) - assert d is not None - assert int(d[0]) == 0 -from . import (proc, - data, - files, - img, - times, - web) -""" -Assorted functions for manipulating data. -""" -import numpy as np -import itertools - - -def linear_bin(a): - """ - Convert a value to a categorical array. - - Parameters - ---------- - a : int or float - A value between -1 and 1 - - Returns - ------- - list of int - A list of length 15 with one item set to 1, which represents the linear value, and all other items set to 0. - """ - a = a + 1 - b = round(a / (2 / 14)) - arr = np.zeros(15) - arr[int(b)] = 1 - return arr - - -def linear_unbin(arr): - """ - Convert a categorical array to value. - - See Also - -------- - linear_bin - """ - if not len(arr) == 15: - raise ValueError('Illegal array length, must be 15') - b = np.argmax(arr) - a = b * (2 / 14) - 1 - return a - - -def bin_Y(Y): - """ - Convert a list of values to a list of categorical arrays. - - Parameters - ---------- - Y : iterable of int - Iterable with values between -1 and 1 - - Returns - ------- - A two dimensional array of int - - See Also - -------- - linear_bin - """ - d = [linear_bin(y) for y in Y] - return np.array(d) - - -def unbin_Y(Y): - """ - Convert a list of categorical arrays to a list of values. - - See Also - -------- - linear_bin - """ - d = [linear_unbin(y) for y in Y] - return np.array(d) - - -def map_range(x, X_min, X_max, Y_min, Y_max): - """ - Linear mapping between two ranges of values - """ - X_range = X_max - X_min - Y_range = Y_max - Y_min - XY_ratio = X_range / Y_range - - y = ((x - X_min) / XY_ratio + Y_min) // 1 - - return int(y) - - -def merge_two_dicts(x, y): - """ - Given two dicts, merge them into a new dict as a shallow copy - """ - z = x.copy() - z.update(y) - return z - - -def param_gen(params): - """ - Accepts a dictionary of parameter options and returns - a list of dictionary with the permutations of the parameters. - """ - for p in itertools.product(*params.values()): - yield dict(zip(params.keys(), p)) -""" -Utilities to manipulate files and directories. -""" - -import glob -import zipfile -import os - - -def most_recent_file(dir_path, ext=''): - """ - return the most recent file given a directory path and extension - """ - query = dir_path + '/*' + ext - newest = min(glob.iglob(query), key=os.path.getctime) - return newest - - -def make_dir(path): - real_path = os.path.expanduser(path) - if not os.path.exists(real_path): - os.makedirs(real_path) - return real_path - - -def zip_dir(dir_path, zip_path): - """ - Create and save a zipfile of a one level directory - """ - file_paths = glob.glob(dir_path + "/*") # create path to search for files. - - zf = zipfile.ZipFile(zip_path, 'w') - dir_name = os.path.basename(dir_path) - for p in file_paths: - file_name = os.path.basename(p) - zf.write(p, arcname=os.path.join(dir_name, file_name)) - zf.close() - return zip_path - - -def time_since_last_file_edited(path): - """return seconds since last file was updated""" - list_of_files = glob.glob(os.path.join(path, '*')) - if len(list_of_files) > 0: - latest_file = max(list_of_files, key=os.path.getctime) - return int(time.time() - os.path.getctime(latest_file)) - return 0 - - -def expand_path_mask(path): - matches = [] - path = os.path.expanduser(path) - for file in glob.glob(path): - if os.path.isdir(file): - matches.append(os.path.join(os.path.abspath(file))) - return matches - - -def expand_path_arg(path_str): - path_list = path_str.split(",") - expanded_paths = [] - for path in path_list: - paths = expand_path_mask(path) - expanded_paths += paths - return expanded_paths - -import random -import io -import os - -from PIL import Image -import numpy as np -# TODO: put this in its own image_utils file. - - -""" -IMAGES -""" - - -def scale(im, size=128): - """ - accepts: PIL image, size of square sides - returns: PIL image scaled so sides length = size - """ - size = (size, size) - im.thumbnail(size, Image.ANTIALIAS) - return im - - -def img_to_binary(img): - """ - accepts: PIL image - returns: binary stream (used to save to database) - """ - f = io.BytesIO() - img.save(f, format='jpeg') - return f.getvalue() - - -def arr_to_binary(arr): - """ - accepts: numpy array with shape (Hight, Width, Channels) - returns: binary stream (used to save to database) - """ - img = arr_to_img(arr) - return img_to_binary(img) - - -def arr_to_img(arr): - """ - accepts: numpy array with shape (Hight, Width, Channels) - returns: binary stream (used to save to database) - """ - arr = np.uint8(arr) - img = Image.fromarray(arr) - return img - - -def img_to_arr(img): - """ - accepts: numpy array with shape (Hight, Width, Channels) - returns: binary stream (used to save to database) - """ - return np.array(img) - - -def binary_to_img(binary): - """ - accepts: binary file object from BytesIO - returns: PIL image - """ - img = io.BytesIO(binary) - return Image.open(img) - - -def norm_img(img): - return (img - img.mean() / np.std(img))/255.0 - - -def create_video(img_dir_path, output_video_path): - import envoy - # Setup path to the images with telemetry. - full_path = os.path.join(img_dir_path, 'frame_*.png') - - # Run ffmpeg. - command = ("""ffmpeg - -framerate 30/1 - -pattern_type glob -i '%s' - -c:v libx264 - -r 15 - -pix_fmt yuv420p - -y - %s""" % (full_path, output_video_path)) - response = envoy.run(command) -""" -Functions to simplify working with processes. -""" -import signal -import subprocess -import os -import sys - - -def run_shell_command(cmd, cwd=None, timeout=15): - proc = subprocess.Popen(cmd, stdout=subprocess.PIPE, - stderr=subprocess.PIPE, cwd=cwd) - out = [] - err = [] - - try: - proc.wait(timeout=timeout) - except subprocess.TimeoutExpired: - kill(proc.pid) - - for line in proc.stdout.readlines(): - out.append(line.decode()) - - for line in proc.stderr.readlines(): - err.append(line) - return out, err, proc.pid - - -""" -def kill(proc_pid): - process = psutil.Process(proc_pid) - for proc in process.children(recursive=True): - proc.kill() - process.kill() -""" - - -def kill(proc_id): - os.kill(proc_id, signal.SIGINT) - - -def eprint(*args, **kwargs): - print(*args, file=sys.stderr, **kwargs) -import socket - - -def get_ip_address(): - try: - ip = ([l for l in ([ip for ip in socket.gethostbyname_ex(socket.gethostname())[2] if not ip.startswith("127.")][:1], - [[(s.connect(('8.8.8.8', 53)), s.getsockname()[0], s.close()) for s in - [socket.socket(socket.AF_INET, socket.SOCK_DGRAM)]][0][1]]) if l][0][0]) - return ip - except OSError: # occurs when cannot connect to '8.8.8.8' - return "127.0.0.1" # loopback -from .web import LocalWebController -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -Created on Sat Jun 24 20:10:44 2017 - -@author: wroscoe - -remotes.py - -The client and web server needed to control a car remotely. -""" - -import random - - -import os -import time - -import tornado -import tornado.ioloop -import tornado.web -import tornado.gen - -from donkeycar import util - - -class LocalWebController(tornado.web.Application): - port = 8887 - - def __init__(self, use_chaos=False): - """ - Create and publish variables needed on many of - the web handlers. - """ - print('Starting Donkey Server...') - - this_dir = os.path.dirname(os.path.realpath(__file__)) - self.static_file_path = os.path.join(this_dir, 'templates', 'static') - - self.angle = 0.0 - self.throttle = 0.0 - self.mode = 'user' - self.recording = False - self.ip_address = util.web.get_ip_address() - self.access_url = 'http://{}:{}'.format(self.ip_address, self.port) - - self.chaos_on = False - self.chaos_counter = 0 - self.chaos_frequency = 1000 # frames - self.chaos_duration = 10 - - if use_chaos: - self.run_threaded = self.run_chaos - else: - self.run_threaded = self._run_threaded - - handlers = [ - (r"/", tornado.web.RedirectHandler, dict(url="/drive")), - (r"/drive", DriveAPI), - (r"/video", VideoAPI), - (r"/static/(.*)", tornado.web.StaticFileHandler, - {"path": self.static_file_path}), - ] - - settings = {'debug': True} - super().__init__(handlers, **settings) - - def run_chaos(self, img_arr=None): - """ - Run function where steering is made random to add corrective - """ - self.img_arr = img_arr - if self.chaos_counter == self.chaos_frequency: - self.chaos_on = True - random_steering = random.random() - elif self.chaos_counter == self.chaos_duration: - self.chaos_on = False - - if self.chaos_on: - return random_steering, self.throttle, self.mode, False - else: - return self.angle, self.throttle, self.mode, self.recording - - def say_hello(self): - """ - Print friendly message to user - """ - print("You can now go to {} to drive your car.".format(self.access_url)) - - def update(self): - """ Start the tornado web server. """ - self.port = int(self.port) - self.listen(self.port) - instance = tornado.ioloop.IOLoop.instance() - instance.add_callback(self.say_hello) - instance.start() - - def _run_threaded(self, img_arr=None): - self.img_arr = img_arr - return self.angle, self.throttle, self.mode, self.recording - - def run(self, img_arr=None): - return self.run_threaded(img_arr) - - -class DriveAPI(tornado.web.RequestHandler): - def get(self): - data = {} - self.render("templates/vehicle.html", **data) - - def post(self): - """ - Receive post requests as user changes the angle - and throttle of the vehicle on a the index webpage - """ - data = tornado.escape.json_decode(self.request.body) - self.application.angle = data['angle'] - self.application.throttle = data['throttle'] - self.application.mode = data['drive_mode'] - self.application.recording = data['recording'] - - -class VideoAPI(tornado.web.RequestHandler): - """ - Serves a MJPEG of the images posted from the vehicle. - """ - - @tornado.web.asynchronous - @tornado.gen.coroutine - def get(self): - - ioloop = tornado.ioloop.IOLoop.current() - self.set_header( - "Content-type", "multipart/x-mixed-replace;boundary=--boundarydonotcross") - - self.served_image_timestamp = time.time() - my_boundary = "--boundarydonotcross" - while True: - - interval = .1 - if self.served_image_timestamp + interval < time.time(): - - img = util.img.arr_to_binary(self.application.img_arr) - - self.write(my_boundary) - self.write("Content-type: image/jpeg\r\n") - self.write("Content-length: %s\r\n\r\n" % len(img)) - self.write(img) - self.served_image_timestamp = time.time() - yield tornado.gen.Task(self.flush) - else: - yield tornado.gen.Task(ioloop.add_timeout, ioloop.time() + interval) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -语音识别的语言模型 - -基于马尔可夫模型的语言模型 - -""" -import platform as plat - - -class ModelLanguage(): # 语音模型类 - def __init__(self, modelpath): - self.modelpath = modelpath - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - - self.slash = '' - if(system_type == 'Windows'): - self.slash = '\\' - elif(system_type == 'Linux'): - self.slash = '/' - else: - print('*[Message] Unknown System\n') - self.slash = '/' - - if(self.slash != self.modelpath[-1]): # 在目录路径末尾增加斜杠 - self.modelpath = self.modelpath + self.slash - - pass - - def LoadModel(self): - self.dict_pinyin = self.GetSymbolDict('dict.txt') - self.model1 = self.GetLanguageModel( - self.modelpath + 'language_model1.txt') - self.model2 = self.GetLanguageModel( - self.modelpath + 'language_model2.txt') - self.pinyin = self.GetPinyin(self.modelpath + 'dic_pinyin.txt') - model = (self.dict_pinyin, self.model1, self.model2) - return model - pass - - def SpeechToText(self, list_syllable): - ''' - 为语音识别专用的处理函数 - 实现从语音拼音符号到最终文本的转换 - ''' - r = '' - length = len(list_syllable) - if(length == 0): # 传入的参数没有包含任何拼音时 - return '' - - # 先取出一个字,即拼音列表中第一个字 - str_tmp = [list_syllable[0]] - - for i in range(0, length - 1): - # 依次从第一个字开始每次连续取两个字拼音 - str_split = list_syllable[i] + ' ' + list_syllable[i+1] - # print(str_split,str_tmp,r) - # 如果这个拼音在汉语拼音状态转移字典里的话 - if(str_split in self.pinyin): - # 将第二个字的拼音加入 - str_tmp.append(list_syllable[i+1]) - else: - # 否则不加入,然后直接将现有的拼音序列进行解码 - str_decode = self.decode(str_tmp, 0.0000) - #print('decode ',str_tmp,str_decode) - if(str_decode != []): - r += str_decode[0][0] - # 再重新从i+1开始作为第一个拼音 - str_tmp = [list_syllable[i+1]] - - #print('最后:', str_tmp) - str_decode = self.decode(str_tmp, 0.0000) - - # print('剩余解码:',str_decode) - - if(str_decode != []): - r += str_decode[0][0] - - return r - - def decode(self, list_syllable, yuzhi=0.0001): - ''' - 实现拼音向文本的转换 - 基于马尔可夫链 - ''' - #assert self.dic_pinyin == null or self.model1 == null or self.model2 == null - list_words = [] - - num_pinyin = len(list_syllable) - # print('======') - #print('decode function: list_syllable\n',list_syllable) - # print(num_pinyin) - # 开始语音解码 - for i in range(num_pinyin): - # print(i) - ls = '' - if(list_syllable[i] in self.dict_pinyin): # 如果这个拼音在汉语拼音字典里的话 - # 获取拼音下属的字的列表,ls包含了该拼音对应的所有的字 - ls = self.dict_pinyin[list_syllable[i]] - else: - break - - if(i == 0): - # 第一个字做初始处理 - num_ls = len(ls) - for j in range(num_ls): - tuple_word = ['', 0.0] - # 设置马尔科夫模型初始状态值 - # 设置初始概率,置为1.0 - tuple_word = [ls[j], 1.0] - # print(tuple_word) - # 添加到可能的句子列表 - list_words.append(tuple_word) - - # print(list_words) - continue - else: - # 开始处理紧跟在第一个字后面的字 - list_words_2 = [] - num_ls_word = len(list_words) - #print('ls_wd: ',list_words) - for j in range(0, num_ls_word): - - num_ls = len(ls) - for k in range(0, num_ls): - tuple_word = ['', 0.0] - tuple_word = list(list_words[j]) # 把现有的每一条短语取出来 - #print('tw1: ',tuple_word) - tuple_word[0] = tuple_word[0] + \ - ls[k] # 尝试按照下一个音可能对应的全部的字进行组合 - #print('ls[k] ',ls[k]) - - tmp_words = tuple_word[0][-2:] # 取出用于计算的最后两个字 - #print('tmp_words: ',tmp_words,tmp_words in self.model2) - if(tmp_words in self.model2): # 判断它们是不是再状态转移表里 - #print(tmp_words,tmp_words in self.model2) - tuple_word[1] = tuple_word[1] * float( - self.model2[tmp_words]) / float(self.model1[tmp_words[-2]]) - # 核心!在当前概率上乘转移概率,公式化简后为第n-1和n个字出现的次数除以第n-1个字出现的次数 - # print(self.model2[tmp_words],self.model1[tmp_words[-2]]) - else: - tuple_word[1] = 0.0 - continue - #print('tw2: ',tuple_word) - #print(tuple_word[1] >= pow(yuzhi, i)) - if(tuple_word[1] >= pow(yuzhi, i)): - # 大于阈值之后保留,否则丢弃 - list_words_2.append(tuple_word) - - list_words = list_words_2 - # print(list_words,'\n') - # print(list_words) - for i in range(0, len(list_words)): - for j in range(i + 1, len(list_words)): - if(list_words[i][1] < list_words[j][1]): - tmp = list_words[i] - list_words[i] = list_words[j] - list_words[j] = tmp - - return list_words - pass - - def GetSymbolDict(self, dictfilename): - ''' - 读取拼音汉字的字典文件 - 返回读取后的字典 - ''' - txt_obj = open(dictfilename, 'r', encoding='UTF-8') # 打开文件并读入 - txt_text = txt_obj.read() - txt_obj.close() - txt_lines = txt_text.split('\n') # 文本分割 - - dic_symbol = {} # 初始化符号字典 - for i in txt_lines: - list_symbol = [] # 初始化符号列表 - if(i != ''): - txt_l = i.split('\t') - pinyin = txt_l[0] - for word in txt_l[1]: - list_symbol.append(word) - dic_symbol[pinyin] = list_symbol - - return dic_symbol - - def GetLanguageModel(self, modelLanFilename): - ''' - 读取语言模型的文件 - 返回读取后的模型 - ''' - txt_obj = open(modelLanFilename, 'r', encoding='UTF-8') # 打开文件并读入 - txt_text = txt_obj.read() - txt_obj.close() - txt_lines = txt_text.split('\n') # 文本分割 - - dic_model = {} # 初始化符号字典 - for i in txt_lines: - if(i != ''): - txt_l = i.split('\t') - if(len(txt_l) == 1): - continue - # print(txt_l) - dic_model[txt_l[0]] = txt_l[1] - - return dic_model - - def GetPinyin(self, filename): - file_obj = open(filename, 'r', encoding='UTF-8') - txt_all = file_obj.read() - file_obj.close() - - txt_lines = txt_all.split('\n') - dic = {} - - for line in txt_lines: - if(line == ''): - continue - pinyin_split = line.split('\t') - - list_pinyin = pinyin_split[0] - - if(list_pinyin not in dic and int(pinyin_split[1]) > 1): - dic[list_pinyin] = pinyin_split[1] - return dic - - -if(__name__ == '__main__'): - - ml = ModelLanguage('model_language') - ml.LoadModel() - - #str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5'] - #str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1'] - #str_pinyin = ['ni3', 'hao3','a1'] - #str_pinyin = ['wo3','dui4','shi4','mei2','cuo4','ni3','hao3'] - #str_pinyin = ['wo3','dui4','shi4','tian1','mei2','na5','li3','hai4'] - #str_pinyin = ['ba3','zhe4','xie1','zuo4','wan2','wo3','jiu4','qu4','shui4','jiao4'] - #str_pinyin = ['wo3','qu4','a4','mei2','shi4','er2','la1'] - #str_pinyin = ['wo3', 'men5', 'qun2', 'li3', 'xiong1', 'di4', 'jian4', 'mei4', 'dou1', 'zai4', 'shuo1'] - #str_pinyin = ['su1', 'an1', 'ni3', 'sui4', 'li4', 'yun4', 'sui2', 'cong2', 'jiao4', 'ming2', 'tao2', 'qi3', 'yu2', 'peng2', 'ya4', 'yang4', 'chao1', 'dao3', 'jiang1', 'li3', 'yuan2', 'kang1', 'zhua1', 'zou3'] - #str_pinyin = ['da4', 'jia1', 'hao3'] - str_pinyin = ['kao3', 'yan2', 'yan1', 'yu3', 'ci2', 'hui4'] - #r = ml.decode(str_pinyin) - r = ml.SpeechToText(str_pinyin) - print('语音转文字结果:\n', r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -语音识别的语言模型 - -基于马尔可夫模型的语言模型 - -""" -import platform as plat - - -class ModelLanguage(): # 语音模型类 - def __init__(self, modelpath): - self.modelpath = modelpath - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - - self.slash = '' - if(system_type == 'Windows'): - self.slash = '\\' - elif(system_type == 'Linux'): - self.slash = '/' - else: - print('*[Message] Unknown System\n') - self.slash = '/' - - if(self.slash != self.modelpath[-1]): # 在目录路径末尾增加斜杠 - self.modelpath = self.modelpath + self.slash - - pass - - def LoadModel(self): - self.dict_pinyin = self.GetSymbolDict('dict.txt') - self.model1 = self.GetLanguageModel( - self.modelpath + 'language_model1.txt') - self.model2 = self.GetLanguageModel( - self.modelpath + 'language_model2.txt') - self.pinyin = self.GetPinyin(self.modelpath + 'dic_pinyin.txt') - model = (self.dict_pinyin, self.model1, self.model2) - return model - pass - - def SpeechToText(self, list_syllable): - ''' - 语音识别专用的处理函数 - - 实现从语音拼音符号到最终文本的转换 - - 使用恐慌模式处理一次解码失败的情况 - ''' - length = len(list_syllable) - if(length == 0): # 传入的参数没有包含任何拼音时 - return '' - - lst_syllable_remain = [] # 存储剩余的拼音序列 - str_result = '' - - # 存储临时输入拼音序列 - tmp_list_syllable = list_syllable - - while(len(tmp_list_syllable) > 0): - # 进行拼音转汉字解码,存储临时结果 - tmp_lst_result = self.decode(tmp_list_syllable, 0.0) - - if(len(tmp_lst_result) > 0): # 有结果,不用恐慌 - str_result = str_result + tmp_lst_result[0][0] - - while(len(tmp_lst_result) == 0): # 没结果,开始恐慌 - # 插入最后一个拼音 - lst_syllable_remain.insert(0, tmp_list_syllable[-1]) - # 删除最后一个拼音 - tmp_list_syllable = tmp_list_syllable[:-1] - # 再次进行拼音转汉字解码 - tmp_lst_result = self.decode(tmp_list_syllable, 0.0) - - if(len(tmp_lst_result) > 0): - # 将得到的结果加入进来 - str_result = str_result + tmp_lst_result[0][0] - - # 将剩余的结果补回来 - tmp_list_syllable = lst_syllable_remain - lst_syllable_remain = [] # 清空 - - return str_result - - def decode(self, list_syllable, yuzhi=0.0001): - ''' - 实现拼音向文本的转换 - 基于马尔可夫链 - ''' - #assert self.dic_pinyin == null or self.model1 == null or self.model2 == null - list_words = [] - - num_pinyin = len(list_syllable) - # print('======') - #print('decode function: list_syllable\n',list_syllable) - # print(num_pinyin) - # 开始语音解码 - for i in range(num_pinyin): - # print(i) - ls = '' - if(list_syllable[i] in self.dict_pinyin): # 如果这个拼音在汉语拼音字典里的话 - # 获取拼音下属的字的列表,ls包含了该拼音对应的所有的字 - ls = self.dict_pinyin[list_syllable[i]] - else: - break - - if(i == 0): - # 第一个字做初始处理 - num_ls = len(ls) - for j in range(num_ls): - tuple_word = ['', 0.0] - # 设置马尔科夫模型初始状态值 - # 设置初始概率,置为1.0 - tuple_word = [ls[j], 1.0] - # print(tuple_word) - # 添加到可能的句子列表 - list_words.append(tuple_word) - - # print(list_words) - continue - else: - # 开始处理紧跟在第一个字后面的字 - list_words_2 = [] - num_ls_word = len(list_words) - #print('ls_wd: ',list_words) - for j in range(0, num_ls_word): - - num_ls = len(ls) - for k in range(0, num_ls): - tuple_word = ['', 0.0] - tuple_word = list(list_words[j]) # 把现有的每一条短语取出来 - #print('tw1: ',tuple_word) - tuple_word[0] = tuple_word[0] + \ - ls[k] # 尝试按照下一个音可能对应的全部的字进行组合 - #print('ls[k] ',ls[k]) - - tmp_words = tuple_word[0][-2:] # 取出用于计算的最后两个字 - #print('tmp_words: ',tmp_words,tmp_words in self.model2) - if(tmp_words in self.model2): # 判断它们是不是再状态转移表里 - #print(tmp_words,tmp_words in self.model2) - tuple_word[1] = tuple_word[1] * float( - self.model2[tmp_words]) / float(self.model1[tmp_words[-2]]) - # 核心!在当前概率上乘转移概率,公式化简后为第n-1和n个字出现的次数除以第n-1个字出现的次数 - # print(self.model2[tmp_words],self.model1[tmp_words[-2]]) - else: - tuple_word[1] = 0.0 - continue - #print('tw2: ',tuple_word) - #print(tuple_word[1] >= pow(yuzhi, i)) - if(tuple_word[1] >= pow(yuzhi, i)): - # 大于阈值之后保留,否则丢弃 - list_words_2.append(tuple_word) - - list_words = list_words_2 - # print(list_words,'\n') - # print(list_words) - for i in range(0, len(list_words)): - for j in range(i + 1, len(list_words)): - if(list_words[i][1] < list_words[j][1]): - tmp = list_words[i] - list_words[i] = list_words[j] - list_words[j] = tmp - - return list_words - pass - - def GetSymbolDict(self, dictfilename): - ''' - 读取拼音汉字的字典文件 - 返回读取后的字典 - ''' - txt_obj = open(dictfilename, 'r', encoding='UTF-8') # 打开文件并读入 - txt_text = txt_obj.read() - txt_obj.close() - txt_lines = txt_text.split('\n') # 文本分割 - - dic_symbol = {} # 初始化符号字典 - for i in txt_lines: - list_symbol = [] # 初始化符号列表 - if(i != ''): - txt_l = i.split('\t') - pinyin = txt_l[0] - for word in txt_l[1]: - list_symbol.append(word) - dic_symbol[pinyin] = list_symbol - - return dic_symbol - - def GetLanguageModel(self, modelLanFilename): - ''' - 读取语言模型的文件 - 返回读取后的模型 - ''' - txt_obj = open(modelLanFilename, 'r', encoding='UTF-8') # 打开文件并读入 - txt_text = txt_obj.read() - txt_obj.close() - txt_lines = txt_text.split('\n') # 文本分割 - - dic_model = {} # 初始化符号字典 - for i in txt_lines: - if(i != ''): - txt_l = i.split('\t') - if(len(txt_l) == 1): - continue - # print(txt_l) - dic_model[txt_l[0]] = txt_l[1] - - return dic_model - - def GetPinyin(self, filename): - file_obj = open(filename, 'r', encoding='UTF-8') - txt_all = file_obj.read() - file_obj.close() - - txt_lines = txt_all.split('\n') - dic = {} - - for line in txt_lines: - if(line == ''): - continue - pinyin_split = line.split('\t') - - list_pinyin = pinyin_split[0] - - if(list_pinyin not in dic and int(pinyin_split[1]) > 1): - dic[list_pinyin] = pinyin_split[1] - return dic - - -if(__name__ == '__main__'): - - ml = ModelLanguage('model_language') - ml.LoadModel() - - #str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5'] - #str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1'] - #str_pinyin = ['ni3', 'hao3','a1'] - #str_pinyin = ['wo3','dui4','shi4','mei2','cuo4','ni3','hao3'] - #str_pinyin = ['wo3','dui4','shi4','tian1','mei2','na5','li3','hai4'] - #str_pinyin = ['ba3','zhe4','xie1','zuo4','wan2','wo3','jiu4','qu4','shui4','jiao4'] - #str_pinyin = ['wo3','qu4','a4','mei2','shi4','er2','la1'] - #str_pinyin = ['wo3', 'men5', 'qun2', 'li3', 'xiong1', 'di4', 'jian4', 'mei4', 'dou1', 'zai4', 'shuo1'] - #str_pinyin = ['su1', 'an1', 'ni3', 'sui4', 'li4', 'yun4', 'sui2', 'cong2', 'jiao4', 'ming2', 'tao2', 'qi3', 'yu2', 'peng2', 'ya4', 'yang4', 'chao1', 'dao3', 'jiang1', 'li3', 'yuan2', 'kang1', 'zhua1', 'zou3'] - #str_pinyin = ['da4', 'jia1', 'hao3'] - str_pinyin = ['kao3', 'yan2', 'yan1', 'yu3', 'ci2', 'hui4'] - #r = ml.decode(str_pinyin) - r = ml.SpeechToText(str_pinyin) - print('语音转文字结果:\n', r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -""" -import platform as plat -import os -import time - -from general_function.file_wav import * -from general_function.file_dict import * -from general_function.gen_func import * - -# LSTM_CNN -import keras as kr -import numpy as np -import random - -from keras.models import Sequential, Model -# , Flatten,LSTM,Convolution1D,MaxPooling1D,Merge -from keras.layers import Dense, Dropout, Input, Reshape -from keras.layers import Conv1D, LSTM, MaxPooling1D, Lambda, TimeDistributed, Activation, Conv2D, MaxPooling2D # , Merge,Conv1D -from keras import backend as K -from keras.optimizers import SGD, Adadelta - -from readdata24 import DataSpeech - - -class ModelSpeech(): # 语音模型类 - def __init__(self, datapath): - ''' - 初始化 - 默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块 - ''' - MS_OUTPUT_SIZE = 1424 - self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 - # self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch - self.label_max_string_length = 64 - self.AUDIO_LENGTH = 1600 - self.AUDIO_FEATURE_LENGTH = 200 - self._model, self.base_model = self.CreateModel() - - self.datapath = datapath - self.slash = '' - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - self.slash = '\\' # 反斜杠 - elif(system_type == 'Linux'): - self.slash = '/' # 正斜杠 - else: - print('*[Message] Unknown System\n') - self.slash = '/' # 正斜杠 - if(self.slash != self.datapath[-1]): # 在目录路径末尾增加斜杠 - self.datapath = self.datapath + self.slash - - def CreateModel(self): - ''' - 定义CNN/LSTM/CTC模型,使用函数式模型 - 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) - 隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2 - 隐藏层:全连接层 - 输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, - CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出 - - ''' - # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500 - input_data = Input(name='the_input', shape=( - self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) - - layer_h1 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 - layer_h1 = Dropout(0.1)(layer_h1) - layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 - layer_h3 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h2) # 池化层 - # layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 - layer_h3 = Dropout(0.2)(layer_h3) - layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 - layer_h4 = Dropout(0.2)(layer_h4) - layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 - layer_h6 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h5) # 池化层 - - layer_h6 = Dropout(0.3)(layer_h6) - layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 - layer_h7 = Dropout(0.3)(layer_h7) - layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 - layer_h9 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h8) # 池化层 - #test=Model(inputs = input_data, outputs = layer_h6) - # test.summary() - - layer_h10 = Reshape((200, 3200))(layer_h9) # Reshape层 - # layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 - # layer_h6 = Dropout(0.2)(layer_h5) # 随机中断部分神经网络连接,防止过拟合 - layer_h10 = Dropout(0.4)(layer_h10) - layer_h11 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h10) # 全连接层 - layer_h11 = Dropout(0.4)(layer_h11) - layer_h12 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, - kernel_initializer='he_normal')(layer_h11) # 全连接层 - - y_pred = Activation('softmax', name='Activation0')(layer_h12) - model_data = Model(inputs=input_data, outputs=y_pred) - # model_data.summary() - - labels = Input(name='the_labels', shape=[ - self.label_max_string_length], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - # Keras doesn't currently support loss funcs with extra parameters - # so CTC loss is implemented in a lambda layer - - # layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC - loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - - model = Model(inputs=[input_data, labels, - input_length, label_length], outputs=loss_out) - - # model.summary() - - # clipnorm seems to speeds up convergence - #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06) - - #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - model.compile(loss={'ctc': lambda y_true, - y_pred: y_pred}, optimizer=ada_d) - - # captures output of softmax so we can decode the output during visualization - test_func = K.function([input_data], [y_pred]) - - print('[*提示] 创建模型成功,模型编译成功') - return model, model_data - - def ctc_lambda_func(self, args): - y_pred, labels, input_length, label_length = args - - y_pred = y_pred[:, :, :] - #y_pred = y_pred[:, 2:, :] - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32, filename='model_speech/speech_model24'): - ''' - 训练模型 - 参数: - datapath: 数据保存的路径 - epoch: 迭代轮数 - save_step: 每多少步保存一次模型 - filename: 默认保存文件名,不含文件后缀名 - ''' - data = DataSpeech(datapath, 'train') - - num_data = data.GetDataNum() # 获取数据的数量 - - yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) - - for epoch in range(epoch): # 迭代轮数 - print('[running] train epoch %d .' % epoch) - n_step = 0 # 迭代数据数 - while True: - try: - print('[message] epoch %d . Have train datas %d+' % - (epoch, n_step*save_step)) - # data_genetator是一个生成器函数 - - #self._model.fit_generator(yielddatas, save_step, nb_worker=2) - self._model.fit_generator(yielddatas, save_step) - n_step += 1 - except StopIteration: - print('[error] generator error. please check data format.') - break - - self.SaveModel(comment='_e_'+str(epoch) + - '_step_'+str(n_step * save_step)) - self.TestModel( - self.datapath, str_dataset='train', data_count=4) - self.TestModel(self.datapath, str_dataset='dev', data_count=4) - - def LoadModel(self, filename='model_speech/speech_model24.model'): - ''' - 加载模型参数 - ''' - self._model.load_weights(filename) - self.base_model.load_weights(filename + '.base') - - def SaveModel(self, filename='model_speech/speech_model24', comment=''): - ''' - 保存模型参数 - ''' - self._model.save_weights(filename+comment+'.model') - self.base_model.save_weights(filename + comment + '.model.base') - f = open('step24.txt', 'w') - f.write(filename+comment) - f.close() - - def TestModel(self, datapath='', str_dataset='dev', data_count=32, out_report=False, show_ratio=True): - ''' - 测试检验模型效果 - ''' - data = DataSpeech(self.datapath, str_dataset) - # data.LoadDataList(str_dataset) - num_data = data.GetDataNum() # 获取数据的数量 - if(data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 - data_count = num_data - - try: - ran_num = random.randint(0, num_data - 1) # 获取一个随机数 - - words_num = 0 - word_error_num = 0 - - nowtime = time.strftime( - '%Y%m%d_%H%M%S', time.localtime(time.time())) - if(out_report == True): - txt_obj = open('Test_Report_' + str_dataset + '_' + - nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 - - txt = '' - for i in range(data_count): - data_input, data_labels = data.GetData( - (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 - - # 数据格式出错处理 开始 - # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 - num_bias = 0 - while(data_input.shape[0] > self.AUDIO_LENGTH): - print('*[Error]', 'wave data lenghth of num', (ran_num + i) % num_data, - 'is too long.', '\n A Exception raise when test Speech Model.') - num_bias += 1 - data_input, data_labels = data.GetData( - (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 - # 数据格式出错处理 结束 - - pre = self.Predict(data_input, data_input.shape[0] // 8) - - words_n = data_labels.shape[0] # 获取每个句子的字数 - words_num += words_n # 把句子的总字数加上 - edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 - if(edit_distance <= words_n): # 当编辑距离小于等于句子字数时 - word_error_num += edit_distance # 使用编辑距离作为错误字数 - else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 - word_error_num += words_n # 就直接加句子本来的总字数就好了 - - if(i % 10 == 0 and show_ratio == True): - print('测试进度:', i, '/', data_count) - - txt = '' - if(out_report == True): - txt += str(i) + '\n' - txt += 'True:\t' + str(data_labels) + '\n' - txt += 'Pred:\t' + str(pre) + '\n' - txt += '\n' - txt_obj.write(txt) - - print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', - word_error_num / words_num * 100, '%') - if(out_report == True): - txt = '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + \ - str(word_error_num / words_num * 100) + ' %' - txt_obj.write(txt) - txt_obj.close() - - except StopIteration: - print('[Error] Model Test Error. please check data format.') - - def Predict(self, data_input, input_len): - ''' - 预测结果 - 返回语音识别后的拼音符号列表 - ''' - - batch_size = 1 - in_len = np.zeros((batch_size), dtype=np.int32) - - in_len[0] = input_len - - x_in = np.zeros( - (batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) - - for i in range(batch_size): - x_in[i, 0:len(data_input)] = data_input - - base_pred = self.base_model.predict(x=x_in) - - #print('base_pred:\n', base_pred) - - #y_p = base_pred - # for j in range(200): - # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] - # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) - # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) - # count=0 - # for i in range(y_p[0][j].shape[0]): - # if(y_p[0][j][i] < mean): - # count += 1 - # print('count:',count) - - base_pred = base_pred[:, :, :] - #base_pred =base_pred[:, 2:, :] - - r = K.ctc_decode(base_pred, in_len, greedy=True, - beam_width=100, top_paths=1) - - #print('r', r) - - r1 = K.get_value(r[0][0]) - #print('r1', r1) - - #r2 = K.get_value(r[1]) - # print(r2) - - r1 = r1[0] - - return r1 - pass - - def RecognizeSpeech(self, wavsignal, fs): - ''' - 最终做语音识别用的函数,识别一个wav序列的语音 - 不过这里现在还有bug - ''' - - #data = self.data - #data = DataSpeech('E:\\语音数据集') - # data.LoadDataList('dev') - # 获取输入特征 - #data_input = GetMfccFeature(wavsignal, fs) - # t0=time.time() - data_input = GetFrequencyFeature2(wavsignal, fs) - # t1=time.time() - #print('time cost:',t1-t0) - - input_length = len(data_input) - input_length = input_length // 8 - - data_input = np.array(data_input, dtype=np.float) - # print(data_input,data_input.shape) - data_input = data_input.reshape( - data_input.shape[0], data_input.shape[1], 1) - # t2=time.time() - r1 = self.Predict(data_input, input_length) - # t3=time.time() - #print('time cost:',t3-t2) - list_symbol_dic = GetSymbolList(self.datapath) # 获取拼音列表 - - r_str = [] - for i in r1: - r_str.append(list_symbol_dic[i]) - - return r_str - pass - - def RecognizeSpeech_FromFile(self, filename): - ''' - 最终做语音识别用的函数,识别指定文件名的语音 - ''' - - wavsignal, fs = read_wav_data(filename) - - r = self.RecognizeSpeech(wavsignal, fs) - - return r - - pass - - @property - def model(self): - ''' - 返回keras model - ''' - return self._model - - -if(__name__ == '__main__'): - - import tensorflow as tf - from keras.backend.tensorflow_backend import set_session - os.environ["CUDA_VISIBLE_DEVICES"] = "0" - # 进行配置,使用70%的GPU - config = tf.ConfigProto() - config.gpu_options.per_process_gpu_memory_fraction = 0.93 - # config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 - set_session(tf.Session(config=config)) - - datapath = '' - modelpath = 'model_speech' - - if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 - os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 - - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - datapath = 'D:\\语音数据集' - modelpath = modelpath + '\\' - elif(system_type == 'Linux'): - datapath = 'dataset' - modelpath = modelpath + '/' - else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - - ms = ModelSpeech(datapath) - - #ms.LoadModel(modelpath + 'm24/speech_model24_e_0_step_411000.model') - ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500) - #ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True) - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') - #print('*[提示] 语音识别结果:\n',r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -""" -import platform as plat -import os -import time - -from general_function.file_wav import * -from general_function.file_dict import * -from general_function.gen_func import * - -# LSTM_CNN -import keras as kr -import numpy as np -import random - -from keras.models import Sequential, Model -# , Flatten,LSTM,Convolution1D,MaxPooling1D,Merge -from keras.layers import Dense, Dropout, Input, Reshape -from keras.layers import Conv1D, LSTM, MaxPooling1D, Lambda, TimeDistributed, Activation, Conv2D, MaxPooling2D # , Merge,Conv1D -from keras import backend as K -from keras.optimizers import SGD, Adadelta - -from readdata24 import DataSpeech - - -class ModelSpeech(): # 语音模型类 - def __init__(self, datapath): - ''' - 初始化 - 默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块 - ''' - MS_OUTPUT_SIZE = 1424 - self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 - # self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch - self.label_max_string_length = 64 - self.AUDIO_LENGTH = 1600 - self.AUDIO_FEATURE_LENGTH = 200 - self._model, self.base_model = self.CreateModel() - - self.datapath = datapath - self.slash = '' - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - self.slash = '\\' # 反斜杠 - elif(system_type == 'Linux'): - self.slash = '/' # 正斜杠 - else: - print('*[Message] Unknown System\n') - self.slash = '/' # 正斜杠 - if(self.slash != self.datapath[-1]): # 在目录路径末尾增加斜杠 - self.datapath = self.datapath + self.slash - - def CreateModel(self): - ''' - 定义CNN/LSTM/CTC模型,使用函数式模型 - 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) - 隐藏层:3*3卷积层 - 隐藏层:池化层,池化窗口大小为2 - 隐藏层:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 - 隐藏层:全连接层 - 目标输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数 - 输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出 - - ''' - # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500 - input_data = Input(name='the_input', shape=( - self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) - - layer_h1 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 - layer_h1 = Dropout(0.1)(layer_h1) - layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 - layer_h3 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h2) # 池化层 - # layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 - layer_h3 = Dropout(0.1)(layer_h3) - layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 - layer_h4 = Dropout(0.2)(layer_h4) - layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 - layer_h6 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h5) # 池化层 - - layer_h6 = Dropout(0.2)(layer_h6) - layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 - layer_h7 = Dropout(0.3)(layer_h7) - layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 - layer_h9 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h8) # 池化层 - - layer_h9 = Dropout(0.3)(layer_h9) - layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 - layer_h10 = Dropout(0.4)(layer_h10) - layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 - layer_h12 = MaxPooling2D( - pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 - - #test=Model(inputs = input_data, outputs = layer_h12) - # test.summary() - - layer_h10 = Reshape((200, 3200))(layer_h12) # Reshape层 - # layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 - # layer_h6 = Dropout(0.2)(layer_h5) # 随机中断部分神经网络连接,防止过拟合 - layer_h10 = Dropout(0.4)(layer_h10) - layer_h11 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h10) # 全连接层 - layer_h11 = Dropout(0.5)(layer_h11) - layer_h12 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, - kernel_initializer='he_normal')(layer_h11) # 全连接层 - - y_pred = Activation('softmax', name='Activation0')(layer_h12) - model_data = Model(inputs=input_data, outputs=y_pred) - # model_data.summary() - - labels = Input(name='the_labels', shape=[ - self.label_max_string_length], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - # Keras doesn't currently support loss funcs with extra parameters - # so CTC loss is implemented in a lambda layer - - # layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC - loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - - model = Model(inputs=[input_data, labels, - input_length, label_length], outputs=loss_out) - - # model.summary() - - # clipnorm seems to speeds up convergence - #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06) - - #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - model.compile(loss={'ctc': lambda y_true, - y_pred: y_pred}, optimizer=ada_d) - - # captures output of softmax so we can decode the output during visualization - test_func = K.function([input_data], [y_pred]) - - print('[*提示] 创建模型成功,模型编译成功') - return model, model_data - - def ctc_lambda_func(self, args): - y_pred, labels, input_length, label_length = args - - y_pred = y_pred[:, :, :] - #y_pred = y_pred[:, 2:, :] - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32, filename='model_speech/speech_model25'): - ''' - 训练模型 - 参数: - datapath: 数据保存的路径 - epoch: 迭代轮数 - save_step: 每多少步保存一次模型 - filename: 默认保存文件名,不含文件后缀名 - ''' - data = DataSpeech(datapath, 'train') - - num_data = data.GetDataNum() # 获取数据的数量 - - yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) - - for epoch in range(epoch): # 迭代轮数 - print('[running] train epoch %d .' % epoch) - n_step = 0 # 迭代数据数 - while True: - try: - print('[message] epoch %d . Have train datas %d+' % - (epoch, n_step*save_step)) - # data_genetator是一个生成器函数 - - #self._model.fit_generator(yielddatas, save_step, nb_worker=2) - self._model.fit_generator(yielddatas, save_step) - n_step += 1 - except StopIteration: - print('[error] generator error. please check data format.') - break - - self.SaveModel(comment='_e_'+str(epoch) + - '_step_'+str(n_step * save_step)) - self.TestModel( - self.datapath, str_dataset='train', data_count=4) - self.TestModel(self.datapath, str_dataset='dev', data_count=4) - - def LoadModel(self, filename='model_speech/speech_model25.model'): - ''' - 加载模型参数 - ''' - self._model.load_weights(filename) - self.base_model.load_weights(filename + '.base') - - def SaveModel(self, filename='model_speech/speech_model25', comment=''): - ''' - 保存模型参数 - ''' - self._model.save_weights(filename+comment+'.model') - self.base_model.save_weights(filename + comment + '.model.base') - f = open('step25.txt', 'w') - f.write(filename+comment) - f.close() - - def TestModel(self, datapath='', str_dataset='dev', data_count=32, out_report=False, show_ratio=True): - ''' - 测试检验模型效果 - ''' - data = DataSpeech(self.datapath, str_dataset) - # data.LoadDataList(str_dataset) - num_data = data.GetDataNum() # 获取数据的数量 - if(data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 - data_count = num_data - - try: - ran_num = random.randint(0, num_data - 1) # 获取一个随机数 - - words_num = 0 - word_error_num = 0 - - nowtime = time.strftime( - '%Y%m%d_%H%M%S', time.localtime(time.time())) - if(out_report == True): - txt_obj = open('Test_Report_' + str_dataset + '_' + - nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 - - txt = '' - for i in range(data_count): - data_input, data_labels = data.GetData( - (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 - - # 数据格式出错处理 开始 - # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 - num_bias = 0 - while(data_input.shape[0] > self.AUDIO_LENGTH): - print('*[Error]', 'wave data lenghth of num', (ran_num + i) % num_data, - 'is too long.', '\n A Exception raise when test Speech Model.') - num_bias += 1 - data_input, data_labels = data.GetData( - (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 - # 数据格式出错处理 结束 - - pre = self.Predict(data_input, data_input.shape[0] // 8) - - words_n = data_labels.shape[0] # 获取每个句子的字数 - words_num += words_n # 把句子的总字数加上 - edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 - if(edit_distance <= words_n): # 当编辑距离小于等于句子字数时 - word_error_num += edit_distance # 使用编辑距离作为错误字数 - else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 - word_error_num += words_n # 就直接加句子本来的总字数就好了 - - if(i % 10 == 0 and show_ratio == True): - print('测试进度:', i, '/', data_count) - - txt = '' - if(out_report == True): - txt += str(i) + '\n' - txt += 'True:\t' + str(data_labels) + '\n' - txt += 'Pred:\t' + str(pre) + '\n' - txt += '\n' - txt_obj.write(txt) - - print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', - word_error_num / words_num * 100, '%') - if(out_report == True): - txt = '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + \ - str(word_error_num / words_num * 100) + ' %' - txt_obj.write(txt) - txt_obj.close() - - except StopIteration: - print('[Error] Model Test Error. please check data format.') - - def Predict(self, data_input, input_len): - ''' - 预测结果 - 返回语音识别后的拼音符号列表 - ''' - - batch_size = 1 - in_len = np.zeros((batch_size), dtype=np.int32) - - in_len[0] = input_len - - x_in = np.zeros( - (batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) - - for i in range(batch_size): - x_in[i, 0:len(data_input)] = data_input - - base_pred = self.base_model.predict(x=x_in) - - #print('base_pred:\n', base_pred) - - #y_p = base_pred - # for j in range(200): - # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] - # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) - # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) - # count=0 - # for i in range(y_p[0][j].shape[0]): - # if(y_p[0][j][i] < mean): - # count += 1 - # print('count:',count) - - base_pred = base_pred[:, :, :] - #base_pred =base_pred[:, 2:, :] - - r = K.ctc_decode(base_pred, in_len, greedy=True, - beam_width=100, top_paths=1) - - #print('r', r) - - r1 = K.get_value(r[0][0]) - #print('r1', r1) - - #r2 = K.get_value(r[1]) - # print(r2) - - r1 = r1[0] - - return r1 - pass - - def RecognizeSpeech(self, wavsignal, fs): - ''' - 最终做语音识别用的函数,识别一个wav序列的语音 - 不过这里现在还有bug - ''' - - #data = self.data - #data = DataSpeech('E:\\语音数据集') - # data.LoadDataList('dev') - # 获取输入特征 - #data_input = GetMfccFeature(wavsignal, fs) - # t0=time.time() - data_input = GetFrequencyFeature3(wavsignal, fs) - # t1=time.time() - #print('time cost:',t1-t0) - - input_length = len(data_input) - input_length = input_length // 8 - - data_input = np.array(data_input, dtype=np.float) - # print(data_input,data_input.shape) - data_input = data_input.reshape( - data_input.shape[0], data_input.shape[1], 1) - # t2=time.time() - r1 = self.Predict(data_input, input_length) - # t3=time.time() - #print('time cost:',t3-t2) - list_symbol_dic = GetSymbolList(self.datapath) # 获取拼音列表 - - r_str = [] - for i in r1: - r_str.append(list_symbol_dic[i]) - - return r_str - pass - - def RecognizeSpeech_FromFile(self, filename): - ''' - 最终做语音识别用的函数,识别指定文件名的语音 - ''' - - wavsignal, fs = read_wav_data(filename) - - r = self.RecognizeSpeech(wavsignal, fs) - - return r - - pass - - @property - def model(self): - ''' - 返回keras model - ''' - return self._model - - -if(__name__ == '__main__'): - - import tensorflow as tf - from keras.backend.tensorflow_backend import set_session - os.environ["CUDA_VISIBLE_DEVICES"] = "0" - # 进行配置,使用70%的GPU - config = tf.ConfigProto() - config.gpu_options.per_process_gpu_memory_fraction = 0.93 - # config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 - set_session(tf.Session(config=config)) - - datapath = '' - modelpath = 'model_speech' - - if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 - os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 - - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - datapath = 'E:\\语音数据集' - modelpath = modelpath + '\\' - elif(system_type == 'Linux'): - datapath = 'dataset' - modelpath = modelpath + '/' - else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - - ms = ModelSpeech(datapath) - - #ms.LoadModel(modelpath + 'm25/speech_model25_e_0_step_545500.model') - ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500) - #ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True) - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') - #print('*[提示] 语音识别结果:\n',r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -""" -import platform as plat -import os -import time - -from general_function.file_wav import * -from general_function.file_dict import * -from general_function.gen_func import * - -# LSTM_CNN -import keras as kr -import numpy as np -import random - -from keras.models import Sequential, Model -from keras.layers import Dense, Dropout, Input, Reshape, BatchNormalization # , Flatten -from keras.layers import Lambda, TimeDistributed, Activation, Conv2D, MaxPooling2D # , Merge -from keras import backend as K -from keras.optimizers import SGD, Adadelta, Adam - -from readdata24 import DataSpeech - -abspath = '' -ModelName = '251' -#NUM_GPU = 2 - - -class ModelSpeech(): # 语音模型类 - def __init__(self, datapath): - ''' - 初始化 - 默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块 - ''' - MS_OUTPUT_SIZE = 1424 - self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 - # self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch - self.label_max_string_length = 64 - self.AUDIO_LENGTH = 1600 - self.AUDIO_FEATURE_LENGTH = 200 - self._model, self.base_model = self.CreateModel() - - self.datapath = datapath - self.slash = '' - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - self.slash = '\\' # 反斜杠 - elif(system_type == 'Linux'): - self.slash = '/' # 正斜杠 - else: - print('*[Message] Unknown System\n') - self.slash = '/' # 正斜杠 - if(self.slash != self.datapath[-1]): # 在目录路径末尾增加斜杠 - self.datapath = self.datapath + self.slash - - def CreateModel(self): - ''' - 定义CNN/LSTM/CTC模型,使用函数式模型 - 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) - 隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2 - 隐藏层:全连接层 - 输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, - CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出 - - ''' - - input_data = Input(name='the_input', shape=( - self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) - - layer_h1 = Conv2D(32, (3, 3), use_bias=False, activation='relu', - padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 - layer_h1 = Dropout(0.05)(layer_h1) - layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 - layer_h3 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h2) # 池化层 - # layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 - layer_h3 = Dropout(0.05)(layer_h3) - layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 - layer_h4 = Dropout(0.1)(layer_h4) - layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 - layer_h6 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h5) # 池化层 - - layer_h6 = Dropout(0.1)(layer_h6) - layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 - layer_h7 = Dropout(0.15)(layer_h7) - layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 - layer_h9 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h8) # 池化层 - - layer_h9 = Dropout(0.15)(layer_h9) - layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 - layer_h10 = Dropout(0.2)(layer_h10) - layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 - layer_h12 = MaxPooling2D( - pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 - - layer_h12 = Dropout(0.2)(layer_h12) - layer_h13 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h12) # 卷积层 - layer_h13 = Dropout(0.2)(layer_h13) - layer_h14 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h13) # 卷积层 - layer_h15 = MaxPooling2D( - pool_size=1, strides=None, padding="valid")(layer_h14) # 池化层 - - #test=Model(inputs = input_data, outputs = layer_h12) - # test.summary() - - layer_h16 = Reshape((200, 3200))(layer_h15) # Reshape层 - # layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 - # layer_h6 = Dropout(0.2)(layer_h5) # 随机中断部分神经网络连接,防止过拟合 - layer_h16 = Dropout(0.3)(layer_h16) - layer_h17 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h16) # 全连接层 - layer_h17 = Dropout(0.3)(layer_h17) - layer_h18 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, - kernel_initializer='he_normal')(layer_h17) # 全连接层 - - y_pred = Activation('softmax', name='Activation0')(layer_h18) - model_data = Model(inputs=input_data, outputs=y_pred) - # model_data.summary() - - labels = Input(name='the_labels', shape=[ - self.label_max_string_length], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - # Keras doesn't currently support loss funcs with extra parameters - # so CTC loss is implemented in a lambda layer - - # layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC - loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - - model = Model(inputs=[input_data, labels, - input_length, label_length], outputs=loss_out) - - model.summary() - - # clipnorm seems to speeds up convergence - #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - #opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06) - opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, - decay=0.0, epsilon=10e-8) - #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - model.compile(loss={'ctc': lambda y_true, - y_pred: y_pred}, optimizer=opt) - - # captures output of softmax so we can decode the output during visualization - test_func = K.function([input_data], [y_pred]) - - #print('[*提示] 创建模型成功,模型编译成功') - print('[*Info] Create Model Successful, Compiles Model Successful. ') - return model, model_data - - def ctc_lambda_func(self, args): - y_pred, labels, input_length, label_length = args - - y_pred = y_pred[:, :, :] - #y_pred = y_pred[:, 2:, :] - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32, filename=abspath + 'model_speech/m' + ModelName + '/speech_model'+ModelName): - ''' - 训练模型 - 参数: - datapath: 数据保存的路径 - epoch: 迭代轮数 - save_step: 每多少步保存一次模型 - filename: 默认保存文件名,不含文件后缀名 - ''' - data = DataSpeech(datapath, 'train') - - num_data = data.GetDataNum() # 获取数据的数量 - - yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) - - for epoch in range(epoch): # 迭代轮数 - print('[running] train epoch %d .' % epoch) - n_step = 0 # 迭代数据数 - while True: - try: - print('[message] epoch %d . Have train datas %d+' % - (epoch, n_step*save_step)) - # data_genetator是一个生成器函数 - - #self._model.fit_generator(yielddatas, save_step, nb_worker=2) - self._model.fit_generator(yielddatas, save_step) - n_step += 1 - except StopIteration: - print('[error] generator error. please check data format.') - break - - self.SaveModel(comment='_e_'+str(epoch) + - '_step_'+str(n_step * save_step)) - self.TestModel( - self.datapath, str_dataset='train', data_count=4) - self.TestModel(self.datapath, str_dataset='dev', data_count=4) - - def LoadModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName+'.model'): - ''' - 加载模型参数 - ''' - self._model.load_weights(filename) - self.base_model.load_weights(filename + '.base') - - def SaveModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName, comment=''): - ''' - 保存模型参数 - ''' - self._model.save_weights(filename + comment + '.model') - self.base_model.save_weights(filename + comment + '.model.base') - # 需要安装 hdf5 模块 - self._model.save(filename + comment + '.h5') - self.base_model.save(filename + comment + '.base.h5') - f = open('step'+ModelName+'.txt', 'w') - f.write(filename+comment) - f.close() - - def TestModel(self, datapath='', str_dataset='dev', data_count=32, out_report=False, show_ratio=True, io_step_print=10, io_step_file=10): - ''' - 测试检验模型效果 - - io_step_print - 为了减少测试时标准输出的io开销,可以通过调整这个参数来实现 - - io_step_file - 为了减少测试时文件读写的io开销,可以通过调整这个参数来实现 - - ''' - data = DataSpeech(self.datapath, str_dataset) - # data.LoadDataList(str_dataset) - num_data = data.GetDataNum() # 获取数据的数量 - if(data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 - data_count = num_data - - try: - ran_num = random.randint(0, num_data - 1) # 获取一个随机数 - - words_num = 0 - word_error_num = 0 - - nowtime = time.strftime( - '%Y%m%d_%H%M%S', time.localtime(time.time())) - if(out_report == True): - txt_obj = open('Test_Report_' + str_dataset + '_' + - nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 - - txt = '测试报告\n模型编号 ' + ModelName + '\n\n' - for i in range(data_count): - data_input, data_labels = data.GetData( - (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 - - # 数据格式出错处理 开始 - # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 - num_bias = 0 - while(data_input.shape[0] > self.AUDIO_LENGTH): - print('*[Error]', 'wave data lenghth of num', (ran_num + i) % num_data, - 'is too long.', '\n A Exception raise when test Speech Model.') - num_bias += 1 - data_input, data_labels = data.GetData( - (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 - # 数据格式出错处理 结束 - - pre = self.Predict(data_input, data_input.shape[0] // 8) - - words_n = data_labels.shape[0] # 获取每个句子的字数 - words_num += words_n # 把句子的总字数加上 - edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 - if(edit_distance <= words_n): # 当编辑距离小于等于句子字数时 - word_error_num += edit_distance # 使用编辑距离作为错误字数 - else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 - word_error_num += words_n # 就直接加句子本来的总字数就好了 - - if((i % io_step_print == 0 or i == data_count - 1) and show_ratio == True): - # print('测试进度:',i,'/',data_count) - print('Test Count: ', i, '/', data_count) - - if(out_report == True): - if(i % io_step_file == 0 or i == data_count - 1): - txt_obj.write(txt) - txt = '' - - txt += str(i) + '\n' - txt += 'True:\t' + str(data_labels) + '\n' - txt += 'Pred:\t' + str(pre) + '\n' - txt += '\n' - - #print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', word_error_num / words_num * 100, '%') - print('*[Test Result] Speech Recognition ' + str_dataset + - ' set word error ratio: ', word_error_num / words_num * 100, '%') - if(out_report == True): - txt += '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + \ - str(word_error_num / words_num * 100) + ' %' - txt_obj.write(txt) - txt = '' - txt_obj.close() - - except StopIteration: - print('[Error] Model Test Error. please check data format.') - - def Predict(self, data_input, input_len): - ''' - 预测结果 - 返回语音识别后的拼音符号列表 - ''' - - batch_size = 1 - in_len = np.zeros((batch_size), dtype=np.int32) - - in_len[0] = input_len - - x_in = np.zeros( - (batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) - - for i in range(batch_size): - x_in[i, 0:len(data_input)] = data_input - - base_pred = self.base_model.predict(x=x_in) - - #print('base_pred:\n', base_pred) - - #y_p = base_pred - # for j in range(200): - # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] - # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) - # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) - # count=0 - # for i in range(y_p[0][j].shape[0]): - # if(y_p[0][j][i] < mean): - # count += 1 - # print('count:',count) - - base_pred = base_pred[:, :, :] - #base_pred =base_pred[:, 2:, :] - - r = K.ctc_decode(base_pred, in_len, greedy=True, - beam_width=100, top_paths=1) - - #print('r', r) - - r1 = K.get_value(r[0][0]) - #print('r1', r1) - - #r2 = K.get_value(r[1]) - # print(r2) - - r1 = r1[0] - - return r1 - pass - - def RecognizeSpeech(self, wavsignal, fs): - ''' - 最终做语音识别用的函数,识别一个wav序列的语音 - 不过这里现在还有bug - ''' - - #data = self.data - #data = DataSpeech('E:\\语音数据集') - # data.LoadDataList('dev') - # 获取输入特征 - #data_input = GetMfccFeature(wavsignal, fs) - # t0=time.time() - data_input = GetFrequencyFeature3(wavsignal, fs) - # t1=time.time() - #print('time cost:',t1-t0) - - input_length = len(data_input) - input_length = input_length // 8 - - data_input = np.array(data_input, dtype=np.float) - # print(data_input,data_input.shape) - data_input = data_input.reshape( - data_input.shape[0], data_input.shape[1], 1) - # t2=time.time() - r1 = self.Predict(data_input, input_length) - # t3=time.time() - #print('time cost:',t3-t2) - list_symbol_dic = GetSymbolList(self.datapath) # 获取拼音列表 - - r_str = [] - for i in r1: - r_str.append(list_symbol_dic[i]) - - return r_str - pass - - def RecognizeSpeech_FromFile(self, filename): - ''' - 最终做语音识别用的函数,识别指定文件名的语音 - ''' - - wavsignal, fs = read_wav_data(filename) - - r = self.RecognizeSpeech(wavsignal, fs) - - return r - - pass - - @property - def model(self): - ''' - 返回keras model - ''' - return self._model - - -if(__name__ == '__main__'): - - #import tensorflow as tf - #from keras.backend.tensorflow_backend import set_session - #os.environ["CUDA_VISIBLE_DEVICES"] = "0" - # 进行配置,使用95%的GPU - #config = tf.ConfigProto() - #config.gpu_options.per_process_gpu_memory_fraction = 0.95 - # config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 - # set_session(tf.Session(config=config)) - - datapath = abspath + '' - modelpath = abspath + 'model_speech' - - if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 - os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 - - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - datapath = 'E:\\语音数据集' - modelpath = modelpath + '\\' - elif(system_type == 'Linux'): - datapath = abspath + 'dataset' - modelpath = modelpath + '/' - else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - - ms = ModelSpeech(datapath) - - #ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_100000.model') - ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500) - - # t1=time.time() - #ms.TestModel(datapath, str_dataset='train', data_count = 128, out_report = True) - #ms.TestModel(datapath, str_dataset='dev', data_count = 128, out_report = True) - #ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True) - # t2=time.time() - #print('Test Model Time Cost:',t2-t1,'s') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') - #print('*[提示] 语音识别结果:\n',r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -""" -import platform as plat -import os -import time - -from general_function.file_wav import * -from general_function.file_dict import * -from general_function.gen_func import * -from general_function.muti_gpu import * - -import keras as kr -import numpy as np -import random - -from keras.models import Sequential, Model -from keras.layers import Dense, Dropout, Input, Reshape, BatchNormalization # , Flatten -from keras.layers import Lambda, TimeDistributed, Activation, Conv2D, MaxPooling2D # , Merge -from keras import backend as K -from keras.optimizers import SGD, Adadelta, Adam - -from readdata24 import DataSpeech - -abspath = '' -ModelName = '251' -NUM_GPU = 2 - - -class ModelSpeech(): # 语音模型类 - def __init__(self, datapath): - ''' - 初始化 - 默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块 - ''' - MS_OUTPUT_SIZE = 1424 - self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 - # self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch - self.label_max_string_length = 64 - self.AUDIO_LENGTH = 1600 - self.AUDIO_FEATURE_LENGTH = 200 - self._model, self.base_model = self.CreateModel() - - self.datapath = datapath - self.slash = '' - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - self.slash = '\\' # 反斜杠 - elif(system_type == 'Linux'): - self.slash = '/' # 正斜杠 - else: - print('*[Message] Unknown System\n') - self.slash = '/' # 正斜杠 - if(self.slash != self.datapath[-1]): # 在目录路径末尾增加斜杠 - self.datapath = self.datapath + self.slash - - def CreateModel(self): - ''' - 定义CNN/LSTM/CTC模型,使用函数式模型 - 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) - 隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2 - 隐藏层:全连接层 - 输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, - CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出 - - ''' - - input_data = Input(name='the_input', shape=( - self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) - - layer_h1 = Conv2D(32, (3, 3), use_bias=False, activation='relu', - padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 - layer_h1 = Dropout(0.05)(layer_h1) - layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 - layer_h3 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h2) # 池化层 - # layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 - layer_h3 = Dropout(0.05)(layer_h3) - layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 - layer_h4 = Dropout(0.1)(layer_h4) - layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 - layer_h6 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h5) # 池化层 - - layer_h6 = Dropout(0.1)(layer_h6) - layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 - layer_h7 = Dropout(0.15)(layer_h7) - layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 - layer_h9 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h8) # 池化层 - - layer_h9 = Dropout(0.15)(layer_h9) - layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 - layer_h10 = Dropout(0.2)(layer_h10) - layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 - layer_h12 = MaxPooling2D( - pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 - - layer_h12 = Dropout(0.2)(layer_h12) - layer_h13 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h12) # 卷积层 - layer_h13 = Dropout(0.3)(layer_h13) - layer_h14 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h13) # 卷积层 - layer_h15 = MaxPooling2D( - pool_size=1, strides=None, padding="valid")(layer_h14) # 池化层 - - #test=Model(inputs = input_data, outputs = layer_h12) - # test.summary() - - layer_h16 = Reshape((200, 3200))(layer_h15) # Reshape层 - # layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 - # layer_h6 = Dropout(0.2)(layer_h5) # 随机中断部分神经网络连接,防止过拟合 - layer_h16 = Dropout(0.3)(layer_h16) - layer_h17 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h16) # 全连接层 - layer_h17 = Dropout(0.3)(layer_h17) - layer_h18 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, - kernel_initializer='he_normal')(layer_h17) # 全连接层 - - y_pred = Activation('softmax', name='Activation0')(layer_h18) - model_data = Model(inputs=input_data, outputs=y_pred) - # model_data.summary() - - labels = Input(name='the_labels', shape=[ - self.label_max_string_length], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - # Keras doesn't currently support loss funcs with extra parameters - # so CTC loss is implemented in a lambda layer - - # layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC - loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - - model = Model(inputs=[input_data, labels, - input_length, label_length], outputs=loss_out) - - model.summary() - - # clipnorm seems to speeds up convergence - #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - #ada_d = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06) - opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, - decay=0.0, epsilon=10e-8) - #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - - model.build((self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) - model = ParallelModel(model, NUM_GPU) - - model.compile(loss={'ctc': lambda y_true, - y_pred: y_pred}, optimizer=opt) - - # captures output of softmax so we can decode the output during visualization - test_func = K.function([input_data], [y_pred]) - - #print('[*提示] 创建模型成功,模型编译成功') - print('[*Info] Create Model Successful, Compiles Model Successful. ') - return model, model_data - - def ctc_lambda_func(self, args): - y_pred, labels, input_length, label_length = args - - y_pred = y_pred[:, :, :] - #y_pred = y_pred[:, 2:, :] - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32, filename=abspath + 'model_speech/m' + ModelName + '/speech_model'+ModelName): - ''' - 训练模型 - 参数: - datapath: 数据保存的路径 - epoch: 迭代轮数 - save_step: 每多少步保存一次模型 - filename: 默认保存文件名,不含文件后缀名 - ''' - data = DataSpeech(datapath, 'train') - - num_data = data.GetDataNum() # 获取数据的数量 - - yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) - - for epoch in range(epoch): # 迭代轮数 - print('[running] train epoch %d .' % epoch) - n_step = 0 # 迭代数据数 - while True: - try: - print('[message] epoch %d . Have train datas %d+' % - (epoch, n_step*save_step)) - # data_genetator是一个生成器函数 - - #self._model.fit_generator(yielddatas, save_step, nb_worker=2) - self._model.fit_generator(yielddatas, save_step) - n_step += 1 - except StopIteration: - print('[error] generator error. please check data format.') - break - - self.SaveModel(comment='_e_'+str(epoch) + - '_step_'+str(n_step * save_step)) - self.TestModel( - self.datapath, str_dataset='train', data_count=4) - self.TestModel(self.datapath, str_dataset='dev', data_count=4) - - def LoadModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName+'.model'): - ''' - 加载模型参数 - ''' - self._model.load_weights(filename) - self.base_model.load_weights(filename + '.base') - - def SaveModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName, comment=''): - ''' - 保存模型参数 - ''' - self._model.save_weights(filename+comment+'.model') - self.base_model.save_weights(filename + comment + '.model.base') - f = open('step'+ModelName+'.txt', 'w') - f.write(filename+comment) - f.close() - - def TestModel(self, datapath='', str_dataset='dev', data_count=32, out_report=False, show_ratio=True): - ''' - 测试检验模型效果 - ''' - data = DataSpeech(self.datapath, str_dataset) - # data.LoadDataList(str_dataset) - num_data = data.GetDataNum() # 获取数据的数量 - if(data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 - data_count = num_data - - try: - ran_num = random.randint(0, num_data - 1) # 获取一个随机数 - - words_num = 0 - word_error_num = 0 - - nowtime = time.strftime( - '%Y%m%d_%H%M%S', time.localtime(time.time())) - if(out_report == True): - txt_obj = open('Test_Report_' + str_dataset + '_' + - nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 - - txt = '' - for i in range(data_count): - data_input, data_labels = data.GetData( - (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 - - # 数据格式出错处理 开始 - # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 - num_bias = 0 - while(data_input.shape[0] > self.AUDIO_LENGTH): - print('*[Error]', 'wave data lenghth of num', (ran_num + i) % num_data, - 'is too long.', '\n A Exception raise when test Speech Model.') - num_bias += 1 - data_input, data_labels = data.GetData( - (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 - # 数据格式出错处理 结束 - - pre = self.Predict(data_input, data_input.shape[0] // 8) - - words_n = data_labels.shape[0] # 获取每个句子的字数 - words_num += words_n # 把句子的总字数加上 - edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 - if(edit_distance <= words_n): # 当编辑距离小于等于句子字数时 - word_error_num += edit_distance # 使用编辑距离作为错误字数 - else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 - word_error_num += words_n # 就直接加句子本来的总字数就好了 - - if(i % 10 == 0 and show_ratio == True): - print('Test Count: ', i, '/', data_count) - - txt = '' - if(out_report == True): - txt += str(i) + '\n' - txt += 'True:\t' + str(data_labels) + '\n' - txt += 'Pred:\t' + str(pre) + '\n' - txt += '\n' - txt_obj.write(txt) - - #print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', word_error_num / words_num * 100, '%') - print('*[Test Result] Speech Recognition ' + str_dataset + - ' set word error ratio: ', word_error_num / words_num * 100, '%') - if(out_report == True): - txt = '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + \ - str(word_error_num / words_num * 100) + ' %' - txt_obj.write(txt) - txt_obj.close() - - except StopIteration: - print('[Error] Model Test Error. please check data format.') - - def Predict(self, data_input, input_len): - ''' - 预测结果 - 返回语音识别后的拼音符号列表 - ''' - - batch_size = 1 - in_len = np.zeros((batch_size), dtype=np.int32) - - in_len[0] = input_len - - x_in = np.zeros( - (batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) - - for i in range(batch_size): - x_in[i, 0:len(data_input)] = data_input - - base_pred = self.base_model.predict(x=x_in) - - #print('base_pred:\n', base_pred) - - #y_p = base_pred - # for j in range(200): - # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] - # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) - # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) - # count=0 - # for i in range(y_p[0][j].shape[0]): - # if(y_p[0][j][i] < mean): - # count += 1 - # print('count:',count) - - base_pred = base_pred[:, :, :] - #base_pred =base_pred[:, 2:, :] - - r = K.ctc_decode(base_pred, in_len, greedy=True, - beam_width=100, top_paths=1) - - #print('r', r) - - r1 = K.get_value(r[0][0]) - #print('r1', r1) - - #r2 = K.get_value(r[1]) - # print(r2) - - r1 = r1[0] - - return r1 - pass - - def RecognizeSpeech(self, wavsignal, fs): - ''' - 最终做语音识别用的函数,识别一个wav序列的语音 - 不过这里现在还有bug - ''' - - #data = self.data - #data = DataSpeech('E:\\语音数据集') - # data.LoadDataList('dev') - # 获取输入特征 - #data_input = GetMfccFeature(wavsignal, fs) - # t0=time.time() - data_input = GetFrequencyFeature3(wavsignal, fs) - # t1=time.time() - #print('time cost:',t1-t0) - - input_length = len(data_input) - input_length = input_length // 8 - - data_input = np.array(data_input, dtype=np.float) - # print(data_input,data_input.shape) - data_input = data_input.reshape( - data_input.shape[0], data_input.shape[1], 1) - # t2=time.time() - r1 = self.Predict(data_input, input_length) - # t3=time.time() - #print('time cost:',t3-t2) - list_symbol_dic = GetSymbolList(self.datapath) # 获取拼音列表 - - r_str = [] - for i in r1: - r_str.append(list_symbol_dic[i]) - - return r_str - pass - - def RecognizeSpeech_FromFile(self, filename): - ''' - 最终做语音识别用的函数,识别指定文件名的语音 - ''' - - wavsignal, fs = read_wav_data(filename) - - r = self.RecognizeSpeech(wavsignal, fs) - - return r - - pass - - @property - def model(self): - ''' - 返回keras model - ''' - return self._model - - -if(__name__ == '__main__'): - - #import tensorflow as tf - #from keras.backend.tensorflow_backend import set_session - #os.environ["CUDA_VISIBLE_DEVICES"] = "1" - # 进行配置,使用70%的GPU - #config = tf.ConfigProto() - #config.gpu_options.per_process_gpu_memory_fraction = 0.95 - # config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 - # set_session(tf.Session(config=config)) - - datapath = abspath + '' - modelpath = abspath + 'model_speech' - - if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 - os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 - - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - datapath = 'E:\\语音数据集' - modelpath = modelpath + '\\' - elif(system_type == 'Linux'): - datapath = abspath + 'dataset' - modelpath = modelpath + '/' - else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - - ms = ModelSpeech(datapath) - - #ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_98000.model') - ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500) - #ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True) - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') - #print('*[提示] 语音识别结果:\n',r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -""" -import platform as plat -import os -import time - -from general_function.file_wav import * -from general_function.file_dict import * -from general_function.gen_func import * - -# LSTM_CNN -import keras as kr -import numpy as np -import random - -from keras.models import Sequential, Model -# , Flatten,LSTM,Convolution1D,MaxPooling1D,Merge -from keras.layers import Dense, Dropout, Input, Reshape -from keras.layers import Conv1D, LSTM, MaxPooling1D, Lambda, TimeDistributed, Activation, Conv2D, MaxPooling2D # , Merge,Conv1D -from keras import backend as K -from keras.optimizers import SGD, Adadelta - -from readdata24 import DataSpeech - -abspath = '' -ModelName = '252' - - -class ModelSpeech(): # 语音模型类 - def __init__(self, datapath): - ''' - 初始化 - 默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块 - ''' - MS_OUTPUT_SIZE = 1422 - self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 - # self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch - self.label_max_string_length = 64 - self.AUDIO_LENGTH = 1600 - self.AUDIO_FEATURE_LENGTH = 200 - self._model, self.base_model = self.CreateModel() - - self.datapath = datapath - self.slash = '' - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - self.slash = '\\' # 反斜杠 - elif(system_type == 'Linux'): - self.slash = '/' # 正斜杠 - else: - print('*[Message] Unknown System\n') - self.slash = '/' # 正斜杠 - if(self.slash != self.datapath[-1]): # 在目录路径末尾增加斜杠 - self.datapath = self.datapath + self.slash - - def CreateModel(self): - ''' - 定义CNN/LSTM/CTC模型,使用函数式模型 - 输入层:39维的特征值序列,一条语音数据的最大长度设为1500(大约15s) - 隐藏层一:1024个神经元的卷积层 - 隐藏层二:池化层,池化窗口大小为2 - 隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 - 隐藏层四:循环层、LSTM层 - 隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 - 隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, - 输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出 - - ''' - # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500 - input_data = Input(name='the_input', shape=( - self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) - - layer_h1 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 - layer_h1 = Dropout(0.1)(layer_h1) - layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 - layer_h3 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h2) # 池化层 - # layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 - layer_h3 = Dropout(0.1)(layer_h3) - layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 - layer_h4 = Dropout(0.2)(layer_h4) - layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 - layer_h6 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h5) # 池化层 - - layer_h6 = Dropout(0.2)(layer_h6) - layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 - layer_h7 = Dropout(0.3)(layer_h7) - layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 - layer_h9 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h8) # 池化层 - - layer_h9 = Dropout(0.3)(layer_h9) - layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 - layer_h10 = Dropout(0.4)(layer_h10) - layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 - layer_h12 = MaxPooling2D( - pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 - - layer_h12 = Dropout(0.4)(layer_h12) - layer_h13 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h12) # 卷积层 - layer_h13 = Dropout(0.5)(layer_h13) - layer_h14 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h13) # 卷积层 - layer_h15 = MaxPooling2D( - pool_size=1, strides=None, padding="valid")(layer_h14) # 池化层 - - #test=Model(inputs = input_data, outputs = layer_h12) - # test.summary() - - layer_h16 = Reshape((200, 3200))(layer_h15) # Reshape层 - # layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 - # layer_h6 = Dropout(0.2)(layer_h5) # 随机中断部分神经网络连接,防止过拟合 - layer_h16 = Dropout(0.5)(layer_h16) - layer_h17 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h16) # 全连接层 - - layer_h17 = Dropout(0.5)(layer_h17) - layer_h18 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h17) # 全连接层 - layer_h18 = Dropout(0.5)(layer_h18) - layer_h19 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h18) # 全连接层 - - layer_h19 = Dropout(0.5)(layer_h19) - layer_h20 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, - kernel_initializer='he_normal')(layer_h19) # 全连接层 - - y_pred = Activation('softmax', name='Activation0')(layer_h20) - model_data = Model(inputs=input_data, outputs=y_pred) - # model_data.summary() - - labels = Input(name='the_labels', shape=[ - self.label_max_string_length], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - # Keras doesn't currently support loss funcs with extra parameters - # so CTC loss is implemented in a lambda layer - - # layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC - loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - - model = Model(inputs=[input_data, labels, - input_length, label_length], outputs=loss_out) - - model.summary() - - # clipnorm seems to speeds up convergence - #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06) - - #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - model.compile(loss={'ctc': lambda y_true, - y_pred: y_pred}, optimizer=ada_d) - - # captures output of softmax so we can decode the output during visualization - test_func = K.function([input_data], [y_pred]) - - print('[*提示] 创建模型成功,模型编译成功') - #print('[*Info] Create Model Successful, Compiles Model Successful. ') - return model, model_data - - def ctc_lambda_func(self, args): - y_pred, labels, input_length, label_length = args - - y_pred = y_pred[:, :, :] - #y_pred = y_pred[:, 2:, :] - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName): - ''' - 训练模型 - 参数: - datapath: 数据保存的路径 - epoch: 迭代轮数 - save_step: 每多少步保存一次模型 - filename: 默认保存文件名,不含文件后缀名 - ''' - data = DataSpeech(datapath, 'train') - - num_data = data.GetDataNum() # 获取数据的数量 - - yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) - - for epoch in range(epoch): # 迭代轮数 - print('[running] train epoch %d .' % epoch) - n_step = 0 # 迭代数据数 - while True: - try: - print('[message] epoch %d . Have train datas %d+' % - (epoch, n_step*save_step)) - # data_genetator是一个生成器函数 - - #self._model.fit_generator(yielddatas, save_step, nb_worker=2) - self._model.fit_generator(yielddatas, save_step) - n_step += 1 - except StopIteration: - print('[error] generator error. please check data format.') - break - - self.SaveModel(comment='_e_'+str(epoch) + - '_step_'+str(n_step * save_step)) - self.TestModel( - self.datapath, str_dataset='train', data_count=4) - self.TestModel(self.datapath, str_dataset='dev', data_count=4) - - def LoadModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName+'.model'): - ''' - 加载模型参数 - ''' - self._model.load_weights(filename) - self.base_model.load_weights(filename + '.base') - - def SaveModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName, comment=''): - ''' - 保存模型参数 - ''' - self._model.save_weights(filename+comment+'.model') - self.base_model.save_weights(filename + comment + '.model.base') - f = open('step'+ModelName+'.txt', 'w') - f.write(filename+comment) - f.close() - - def TestModel(self, datapath='', str_dataset='dev', data_count=32, out_report=False, show_ratio=True): - ''' - 测试检验模型效果 - ''' - data = DataSpeech(self.datapath, str_dataset) - # data.LoadDataList(str_dataset) - num_data = data.GetDataNum() # 获取数据的数量 - if(data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 - data_count = num_data - - try: - ran_num = random.randint(0, num_data - 1) # 获取一个随机数 - - words_num = 0 - word_error_num = 0 - - nowtime = time.strftime( - '%Y%m%d_%H%M%S', time.localtime(time.time())) - if(out_report == True): - txt_obj = open('Test_Report_' + str_dataset + '_' + - nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 - - txt = '' - for i in range(data_count): - data_input, data_labels = data.GetData( - (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 - - # 数据格式出错处理 开始 - # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 - num_bias = 0 - while(data_input.shape[0] > self.AUDIO_LENGTH): - print('*[Error]', 'wave data lenghth of num', (ran_num + i) % num_data, - 'is too long.', '\n A Exception raise when test Speech Model.') - num_bias += 1 - data_input, data_labels = data.GetData( - (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 - # 数据格式出错处理 结束 - - pre = self.Predict(data_input, data_input.shape[0] // 8) - - words_n = data_labels.shape[0] # 获取每个句子的字数 - words_num += words_n # 把句子的总字数加上 - edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 - if(edit_distance <= words_n): # 当编辑距离小于等于句子字数时 - word_error_num += edit_distance # 使用编辑距离作为错误字数 - else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 - word_error_num += words_n # 就直接加句子本来的总字数就好了 - - if(i % 10 == 0 and show_ratio == True): - print('测试进度:', i, '/', data_count) - - txt = '' - if(out_report == True): - txt += str(i) + '\n' - txt += 'True:\t' + str(data_labels) + '\n' - txt += 'Pred:\t' + str(pre) + '\n' - txt += '\n' - txt_obj.write(txt) - - print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', - word_error_num / words_num * 100, '%') - #print('*[Test Result] Speech Recognition ' + str_dataset + ' set word error ratio: ', word_error_num / words_num * 100, '%') - if(out_report == True): - txt = '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + \ - str(word_error_num / words_num * 100) + ' %' - txt_obj.write(txt) - txt_obj.close() - - except StopIteration: - print('[Error] Model Test Error. please check data format.') - - def Predict(self, data_input, input_len): - ''' - 预测结果 - 返回语音识别后的拼音符号列表 - ''' - - batch_size = 1 - in_len = np.zeros((batch_size), dtype=np.int32) - - in_len[0] = input_len - - x_in = np.zeros( - (batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) - - for i in range(batch_size): - x_in[i, 0:len(data_input)] = data_input - - base_pred = self.base_model.predict(x=x_in) - - #print('base_pred:\n', base_pred) - - #y_p = base_pred - # for j in range(200): - # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] - # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) - # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) - # count=0 - # for i in range(y_p[0][j].shape[0]): - # if(y_p[0][j][i] < mean): - # count += 1 - # print('count:',count) - - base_pred = base_pred[:, :, :] - #base_pred =base_pred[:, 2:, :] - - r = K.ctc_decode(base_pred, in_len, greedy=True, - beam_width=100, top_paths=1) - - #print('r', r) - - r1 = K.get_value(r[0][0]) - #print('r1', r1) - - #r2 = K.get_value(r[1]) - # print(r2) - - r1 = r1[0] - - return r1 - pass - - def RecognizeSpeech(self, wavsignal, fs): - ''' - 最终做语音识别用的函数,识别一个wav序列的语音 - 不过这里现在还有bug - ''' - - #data = self.data - #data = DataSpeech('E:\\语音数据集') - # data.LoadDataList('dev') - # 获取输入特征 - #data_input = GetMfccFeature(wavsignal, fs) - # t0=time.time() - data_input = GetFrequencyFeature3(wavsignal, fs) - # t1=time.time() - #print('time cost:',t1-t0) - - input_length = len(data_input) - input_length = input_length // 8 - - data_input = np.array(data_input, dtype=np.float) - # print(data_input,data_input.shape) - data_input = data_input.reshape( - data_input.shape[0], data_input.shape[1], 1) - # t2=time.time() - r1 = self.Predict(data_input, input_length) - # t3=time.time() - #print('time cost:',t3-t2) - list_symbol_dic = GetSymbolList(self.datapath) # 获取拼音列表 - - r_str = [] - for i in r1: - r_str.append(list_symbol_dic[i]) - - return r_str - pass - - def RecognizeSpeech_FromFile(self, filename): - ''' - 最终做语音识别用的函数,识别指定文件名的语音 - ''' - - wavsignal, fs = read_wav_data(filename) - - r = self.RecognizeSpeech(wavsignal, fs) - - return r - - pass - - @property - def model(self): - ''' - 返回keras model - ''' - return self._model - - -if(__name__ == '__main__'): - - import tensorflow as tf - from keras.backend.tensorflow_backend import set_session - os.environ["CUDA_VISIBLE_DEVICES"] = "1" - # 进行配置,使用95%的GPU - config = tf.ConfigProto() - config.gpu_options.per_process_gpu_memory_fraction = 0.95 - # config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 - set_session(tf.Session(config=config)) - - datapath = abspath + '' - modelpath = abspath + 'model_speech' - - if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 - os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 - - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - datapath = 'E:\\语音数据集' - modelpath = modelpath + '\\' - elif(system_type == 'Linux'): - datapath = abspath + 'dataset' - modelpath = modelpath + '/' - else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - - ms = ModelSpeech(datapath) - - #ms.LoadModel(modelpath + 'm252\\speech_model252_e_0_step_115500.model') - ms.TrainModel(datapath, epoch=50, batch_size=4, save_step=500) - #ms.TestModel(datapath, str_dataset='dev', data_count = 128, out_report = True) - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') - #print('*[提示] 语音识别结果:\n',r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -""" -import platform as plat -import os -import time - -from general_function.file_wav import * -from general_function.file_dict import * -from general_function.gen_func import * - -# LSTM_CNN -import keras as kr -import numpy as np -import random - -from keras.models import Sequential, Model -# , Flatten,LSTM,Convolution1D,MaxPooling1D,Merge -from keras.layers import Dense, Dropout, Input, Reshape, GRU -from keras.layers import Conv1D, LSTM, MaxPooling1D, Lambda, TimeDistributed, Activation, Conv2D, MaxPooling2D # , Merge,Conv1D -from keras.layers.merge import add, concatenate -from keras import backend as K -from keras.optimizers import SGD, Adadelta - -from readdata24 import DataSpeech - - -class ModelSpeech(): # 语音模型类 - def __init__(self, datapath): - ''' - 初始化 - 默认输出的拼音的表示大小是1424,即1423个拼音+1个空白块 - ''' - MS_OUTPUT_SIZE = 1424 - self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 - # self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch - self.label_max_string_length = 64 - self.AUDIO_LENGTH = 1600 - self.AUDIO_FEATURE_LENGTH = 200 - self._model, self.base_model = self.CreateModel() - - self.datapath = datapath - self.slash = '' - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - self.slash = '\\' # 反斜杠 - elif(system_type == 'Linux'): - self.slash = '/' # 正斜杠 - else: - print('*[Message] Unknown System\n') - self.slash = '/' # 正斜杠 - if(self.slash != self.datapath[-1]): # 在目录路径末尾增加斜杠 - self.datapath = self.datapath + self.slash - - def CreateModel(self): - ''' - 定义CNN/LSTM/CTC模型,使用函数式模型 - 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) - 隐藏层一:3*3卷积层 - 隐藏层二:池化层,池化窗口大小为2 - 隐藏层三:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 - 隐藏层四:循环层、LSTM/GRU层 - 隐藏层五:Dropout层,需要断开的神经元的比例为0.2,防止过拟合 - 隐藏层六:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, - 输出层:自定义层,即CTC层,使用CTC的loss作为损失函数,实现连接性时序多输出 - - ''' - # 每一帧使用13维mfcc特征及其13维一阶差分和13维二阶差分表示,最大信号序列长度为1500 - input_data = Input(name='the_input', shape=( - self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) - - layer_h1 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 - layer_h1 = Dropout(0.1)(layer_h1) - layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 - layer_h3 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h2) # 池化层 - # layer_h3 = Dropout(0.2)(layer_h2) # 随机中断部分神经网络连接,防止过拟合 - layer_h3 = Dropout(0.2)(layer_h3) - layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 - layer_h4 = Dropout(0.2)(layer_h4) - layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 - layer_h6 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h5) # 池化层 - - layer_h6 = Dropout(0.3)(layer_h6) - layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 - layer_h7 = Dropout(0.3)(layer_h7) - layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 - layer_h9 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h8) # 池化层 - - layer_h9 = Dropout(0.3)(layer_h9) - layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 - layer_h10 = Dropout(0.4)(layer_h10) - layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 - layer_h12 = MaxPooling2D( - pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 - - #test=Model(inputs = input_data, outputs = layer_h6) - # test.summary() - - layer_h13 = Reshape((200, 3200))(layer_h12) # Reshape层 - - layer_h13 = Dropout(0.4)(layer_h13) - layer_h14 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h13) # 全连接层 - layer_h14 = Dropout(0.4)(layer_h14) - inner = layer_h14 - # layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 - - rnn_size = 128 - gru_1 = GRU(rnn_size, return_sequences=True, - kernel_initializer='he_normal', name='gru1')(inner) - gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, - kernel_initializer='he_normal', name='gru1_b')(inner) - gru1_merged = add([gru_1, gru_1b]) - gru_2 = GRU(rnn_size, return_sequences=True, - kernel_initializer='he_normal', name='gru2')(gru1_merged) - gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, - kernel_initializer='he_normal', name='gru2_b')(gru1_merged) - - gru2 = concatenate([gru_2, gru_2b]) - #layer_h12 = GRU(128,activation='tanh', recurrent_activation='hard_sigmoid', use_bias=True, kernel_initializer='he_normal', recurrent_initializer='orthogonal', bias_initializer='zeros', return_sequences=True)(layer_h11) - - layer_h15 = Dropout(0.4)(gru2) - layer_h16 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h15) # 全连接层 - - layer_h16 = Dropout(0.5)(layer_h16) # 随机中断部分神经网络连接,防止过拟合 - layer_h17 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, - kernel_initializer='he_normal')(layer_h16) # 全连接层 - - y_pred = Activation('softmax', name='Activation0')(layer_h17) - model_data = Model(inputs=input_data, outputs=y_pred) - # model_data.summary() - - labels = Input(name='the_labels', shape=[ - self.label_max_string_length], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - # Keras doesn't currently support loss funcs with extra parameters - # so CTC loss is implemented in a lambda layer - - # layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC - loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - - model = Model(inputs=[input_data, labels, - input_length, label_length], outputs=loss_out) - - model.summary() - - # clipnorm seems to speeds up convergence - #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - ada_d = Adadelta(lr=0.01, rho=0.95, epsilon=1e-06) - - #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - model.compile(loss={'ctc': lambda y_true, - y_pred: y_pred}, optimizer=ada_d) - - # captures output of softmax so we can decode the output during visualization - test_func = K.function([input_data], [y_pred]) - - print('[*提示] 创建模型成功,模型编译成功') - return model, model_data - - def ctc_lambda_func(self, args): - y_pred, labels, input_length, label_length = args - - y_pred = y_pred[:, :, :] - #y_pred = y_pred[:, 2:, :] - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32, filename='model_speech/m26/speech_model26'): - ''' - 训练模型 - 参数: - datapath: 数据保存的路径 - epoch: 迭代轮数 - save_step: 每多少步保存一次模型 - filename: 默认保存文件名,不含文件后缀名 - ''' - data = DataSpeech(datapath, 'train') - - num_data = data.GetDataNum() # 获取数据的数量 - - yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) - - for epoch in range(epoch): # 迭代轮数 - print('[running] train epoch %d .' % epoch) - n_step = 0 # 迭代数据数 - while True: - try: - print('[message] epoch %d . Have train datas %d+' % - (epoch, n_step*save_step)) - # data_genetator是一个生成器函数 - - #self._model.fit_generator(yielddatas, save_step, nb_worker=2) - self._model.fit_generator(yielddatas, save_step) - n_step += 1 - except StopIteration: - print('[error] generator error. please check data format.') - break - - self.SaveModel(comment='_e_'+str(epoch) + - '_step_'+str(n_step * save_step)) - self.TestModel( - self.datapath, str_dataset='train', data_count=4) - self.TestModel(self.datapath, str_dataset='dev', data_count=4) - - def LoadModel(self, filename='model_speech/m26/speech_model26.model'): - ''' - 加载模型参数 - ''' - self._model.load_weights(filename) - self.base_model.load_weights(filename + '.base') - - def SaveModel(self, filename='model_speech/m26/speech_model26', comment=''): - ''' - 保存模型参数 - ''' - self._model.save_weights(filename+comment+'.model') - self.base_model.save_weights(filename + comment + '.model.base') - f = open('step26.txt', 'w') - f.write(filename+comment) - f.close() - - def TestModel(self, datapath='', str_dataset='dev', data_count=32, out_report=False, show_ratio=True): - ''' - 测试检验模型效果 - ''' - data = DataSpeech(self.datapath, str_dataset) - # data.LoadDataList(str_dataset) - num_data = data.GetDataNum() # 获取数据的数量 - if(data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 - data_count = num_data - - try: - ran_num = random.randint(0, num_data - 1) # 获取一个随机数 - - words_num = 0 - word_error_num = 0 - - nowtime = time.strftime( - '%Y%m%d_%H%M%S', time.localtime(time.time())) - if(out_report == True): - txt_obj = open('Test_Report_' + str_dataset + '_' + - nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 - - txt = '' - for i in range(data_count): - data_input, data_labels = data.GetData( - (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 - - # 数据格式出错处理 开始 - # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 - num_bias = 0 - while(data_input.shape[0] > self.AUDIO_LENGTH): - print('*[Error]', 'wave data lenghth of num', (ran_num + i) % num_data, - 'is too long.', '\n A Exception raise when test Speech Model.') - num_bias += 1 - data_input, data_labels = data.GetData( - (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 - # 数据格式出错处理 结束 - - pre = self.Predict(data_input, data_input.shape[0] // 8) - - words_n = data_labels.shape[0] # 获取每个句子的字数 - words_num += words_n # 把句子的总字数加上 - edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 - if(edit_distance <= words_n): # 当编辑距离小于等于句子字数时 - word_error_num += edit_distance # 使用编辑距离作为错误字数 - else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 - word_error_num += words_n # 就直接加句子本来的总字数就好了 - - if(i % 10 == 0 and show_ratio == True): - print('测试进度:', i, '/', data_count) - - txt = '' - if(out_report == True): - txt += str(i) + '\n' - txt += 'True:\t' + str(data_labels) + '\n' - txt += 'Pred:\t' + str(pre) + '\n' - txt += '\n' - txt_obj.write(txt) - - print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', - word_error_num / words_num * 100, '%') - if(out_report == True): - txt = '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + \ - str(word_error_num / words_num * 100) + ' %' - txt_obj.write(txt) - txt_obj.close() - - except StopIteration: - print('[Error] Model Test Error. please check data format.') - - def Predict(self, data_input, input_len): - ''' - 预测结果 - 返回语音识别后的拼音符号列表 - ''' - - batch_size = 1 - in_len = np.zeros((batch_size), dtype=np.int32) - - in_len[0] = input_len - - x_in = np.zeros( - (batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) - - for i in range(batch_size): - x_in[i, 0:len(data_input)] = data_input - - base_pred = self.base_model.predict(x=x_in) - - #print('base_pred:\n', base_pred) - - #y_p = base_pred - # for j in range(200): - # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] - # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) - # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) - # count=0 - # for i in range(y_p[0][j].shape[0]): - # if(y_p[0][j][i] < mean): - # count += 1 - # print('count:',count) - - base_pred = base_pred[:, :, :] - #base_pred =base_pred[:, 2:, :] - - r = K.ctc_decode(base_pred, in_len, greedy=True, - beam_width=100, top_paths=1) - - #print('r', r) - - r1 = K.get_value(r[0][0]) - #print('r1', r1) - - #r2 = K.get_value(r[1]) - # print(r2) - - r1 = r1[0] - - return r1 - pass - - def RecognizeSpeech(self, wavsignal, fs): - ''' - 最终做语音识别用的函数,识别一个wav序列的语音 - 不过这里现在还有bug - ''' - - #data = self.data - #data = DataSpeech('E:\\语音数据集') - # data.LoadDataList('dev') - # 获取输入特征 - #data_input = GetMfccFeature(wavsignal, fs) - # t0=time.time() - data_input = GetFrequencyFeature2(wavsignal, fs) - # t1=time.time() - #print('time cost:',t1-t0) - - input_length = len(data_input) - input_length = input_length // 8 - - data_input = np.array(data_input, dtype=np.float) - # print(data_input,data_input.shape) - data_input = data_input.reshape( - data_input.shape[0], data_input.shape[1], 1) - # t2=time.time() - r1 = self.Predict(data_input, input_length) - # t3=time.time() - #print('time cost:',t3-t2) - list_symbol_dic = GetSymbolList(self.datapath) # 获取拼音列表 - - r_str = [] - for i in r1: - r_str.append(list_symbol_dic[i]) - - return r_str - pass - - def RecognizeSpeech_FromFile(self, filename): - ''' - 最终做语音识别用的函数,识别指定文件名的语音 - ''' - - wavsignal, fs = read_wav_data(filename) - - r = self.RecognizeSpeech(wavsignal, fs) - - return r - - pass - - @property - def model(self): - ''' - 返回keras model - ''' - return self._model - - -if(__name__ == '__main__'): - - import tensorflow as tf - from keras.backend.tensorflow_backend import set_session - os.environ["CUDA_VISIBLE_DEVICES"] = "0" - # 进行配置,使用70%的GPU - config = tf.ConfigProto() - config.gpu_options.per_process_gpu_memory_fraction = 0.93 - # config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 - set_session(tf.Session(config=config)) - - datapath = '' - modelpath = 'model_speech' - - if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 - os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 - - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - datapath = 'E:\\语音数据集' - modelpath = modelpath + '\\' - elif(system_type == 'Linux'): - datapath = 'dataset' - modelpath = modelpath + '/' - else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - - ms = ModelSpeech(datapath) - - #ms.LoadModel(modelpath + 'm26/speech_model26_e_0_step_397000.model') - ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500) - #ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True) - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') - #print('*[提示] 语音识别结果:\n',r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -""" -import platform as plat -import os -import time - -from general_function.file_wav import * -from general_function.file_dict import * -from general_function.gen_func import * - -# LSTM_CNN -import keras as kr -import numpy as np -import random - -from keras.models import Sequential, Model -from keras.layers import Dense, Dropout, Input, Reshape, BatchNormalization # , Flatten -from keras.layers import Lambda, TimeDistributed, Activation, Conv2D, MaxPooling2D, GRU # , Merge -from keras.layers.merge import add, concatenate -from keras import backend as K -from keras.optimizers import SGD, Adadelta, Adam - -from readdata24 import DataSpeech - -abspath = '' -ModelName = '261' -#NUM_GPU = 2 - - -class ModelSpeech(): # 语音模型类 - def __init__(self, datapath): - ''' - 初始化 - 默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块 - ''' - MS_OUTPUT_SIZE = 1422 - self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 - # self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch - self.label_max_string_length = 64 - self.AUDIO_LENGTH = 1600 - self.AUDIO_FEATURE_LENGTH = 200 - self._model, self.base_model = self.CreateModel() - - self.datapath = datapath - self.slash = '' - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - self.slash = '\\' # 反斜杠 - elif(system_type == 'Linux'): - self.slash = '/' # 正斜杠 - else: - print('*[Message] Unknown System\n') - self.slash = '/' # 正斜杠 - if(self.slash != self.datapath[-1]): # 在目录路径末尾增加斜杠 - self.datapath = self.datapath + self.slash - - def CreateModel(self): - ''' - 定义CNN/LSTM/CTC模型,使用函数式模型 - 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) - 隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2 - 隐藏层:全连接层 - 输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, - CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出 - - ''' - - input_data = Input(name='the_input', shape=( - self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) - - layer_h = Conv2D(32, (3, 3), use_bias=False, activation='relu', - padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 - #layer_h = Dropout(0.05)(layer_h) - layer_h = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 - layer_h = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h) # 池化层 - - # layer_h = Dropout(0.05)(layer_h) # 随机中断部分神经网络连接,防止过拟合 - layer_h = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 - #layer_h = Dropout(0.1)(layer_h) - layer_h = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 - layer_h = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h) # 池化层 - - #layer_h = Dropout(0.1)(layer_h) - layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 - #layer_h = Dropout(0.15)(layer_h) - layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 - layer_h = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h) # 池化层 - - #layer_h = Dropout(0.15)(layer_h) - layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 - #layer_h = Dropout(0.2)(layer_h) - layer_h = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 - layer_h = MaxPooling2D(pool_size=1, strides=None, - padding="valid")(layer_h) # 池化层 - - #layer_h = Dropout(0.2)(layer_h) - # layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 - #layer_h = Dropout(0.2)(layer_h) - # layer_h = Conv2D(128, (3,3), use_bias=True, activation='relu', padding='same', kernel_initializer='he_normal')(layer_h) # 卷积层 - # layer_h = MaxPooling2D(pool_size=1, strides=None, padding="valid")(layer_h) # 池化层 - - #test=Model(inputs = input_data, outputs = layer_h) - # test.summary() - - layer_h = Reshape((200, 3200))(layer_h) # Reshape层 - - # layer_h16 = Dropout(0.3)(layer_h16) # 随机中断部分神经网络连接,防止过拟合 - layer_h = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h) # 全连接层 - - inner = layer_h - # layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 - - rnn_size = 128 - gru_1 = GRU(rnn_size, return_sequences=True, - kernel_initializer='he_normal', name='gru1')(inner) - gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, - kernel_initializer='he_normal', name='gru1_b')(inner) - gru1_merged = add([gru_1, gru_1b]) - gru_2 = GRU(rnn_size, return_sequences=True, - kernel_initializer='he_normal', name='gru2')(gru1_merged) - gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, - kernel_initializer='he_normal', name='gru2_b')(gru1_merged) - - gru2 = concatenate([gru_2, gru_2b]) - - layer_h = gru2 - #layer_h20 = Dropout(0.4)(gru2) - layer_h = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h) # 全连接层 - - #layer_h17 = Dropout(0.3)(layer_h17) - layer_h = Dense(self.MS_OUTPUT_SIZE, use_bias=True, - kernel_initializer='he_normal')(layer_h) # 全连接层 - - y_pred = Activation('softmax', name='Activation0')(layer_h) - model_data = Model(inputs=input_data, outputs=y_pred) - # model_data.summary() - - labels = Input(name='the_labels', shape=[ - self.label_max_string_length], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - # Keras doesn't currently support loss funcs with extra parameters - # so CTC loss is implemented in a lambda layer - - # layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC - loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - - model = Model(inputs=[input_data, labels, - input_length, label_length], outputs=loss_out) - - model.summary() - - # clipnorm seems to speeds up convergence - #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - #opt = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06) - opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, - decay=0.0, epsilon=10e-8) - #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - model.compile(loss={'ctc': lambda y_true, - y_pred: y_pred}, optimizer=opt) - - # captures output of softmax so we can decode the output during visualization - test_func = K.function([input_data], [y_pred]) - - #print('[*提示] 创建模型成功,模型编译成功') - print('[*Info] Create Model Successful, Compiles Model Successful. ') - return model, model_data - - def ctc_lambda_func(self, args): - y_pred, labels, input_length, label_length = args - - y_pred = y_pred[:, :, :] - #y_pred = y_pred[:, 2:, :] - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32, filename=abspath + 'model_speech/m' + ModelName + '/speech_model'+ModelName): - ''' - 训练模型 - 参数: - datapath: 数据保存的路径 - epoch: 迭代轮数 - save_step: 每多少步保存一次模型 - filename: 默认保存文件名,不含文件后缀名 - ''' - data = DataSpeech(datapath, 'train') - - num_data = data.GetDataNum() # 获取数据的数量 - - yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) - - for epoch in range(epoch): # 迭代轮数 - print('[running] train epoch %d .' % epoch) - n_step = 0 # 迭代数据数 - while True: - try: - print('[message] epoch %d . Have train datas %d+' % - (epoch, n_step*save_step)) - # data_genetator是一个生成器函数 - - #self._model.fit_generator(yielddatas, save_step, nb_worker=2) - self._model.fit_generator(yielddatas, save_step) - n_step += 1 - except StopIteration: - print('[error] generator error. please check data format.') - break - - self.SaveModel(comment='_e_'+str(epoch) + - '_step_'+str(n_step * save_step)) - self.TestModel( - self.datapath, str_dataset='train', data_count=4) - self.TestModel(self.datapath, str_dataset='dev', data_count=4) - - def LoadModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName+'.model'): - ''' - 加载模型参数 - ''' - self._model.load_weights(filename) - self.base_model.load_weights(filename + '.base') - - def SaveModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName, comment=''): - ''' - 保存模型参数 - ''' - self._model.save_weights(filename+comment+'.model') - self.base_model.save_weights(filename + comment + '.model.base') - f = open('step'+ModelName+'.txt', 'w') - f.write(filename+comment) - f.close() - - def TestModel(self, datapath='', str_dataset='dev', data_count=32, out_report=False, show_ratio=True, io_step_print=10, io_step_file=10): - ''' - 测试检验模型效果 - - io_step_print - 为了减少测试时标准输出的io开销,可以通过调整这个参数来实现 - - io_step_file - 为了减少测试时文件读写的io开销,可以通过调整这个参数来实现 - - ''' - data = DataSpeech(self.datapath, str_dataset) - # data.LoadDataList(str_dataset) - num_data = data.GetDataNum() # 获取数据的数量 - if(data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 - data_count = num_data - - try: - ran_num = random.randint(0, num_data - 1) # 获取一个随机数 - - words_num = 0 - word_error_num = 0 - - nowtime = time.strftime( - '%Y%m%d_%H%M%S', time.localtime(time.time())) - if(out_report == True): - txt_obj = open('Test_Report_' + str_dataset + '_' + - nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 - - txt = '测试报告\n模型编号 ' + ModelName + '\n\n' - for i in range(data_count): - data_input, data_labels = data.GetData( - (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 - - # 数据格式出错处理 开始 - # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 - num_bias = 0 - while(data_input.shape[0] > self.AUDIO_LENGTH): - print('*[Error]', 'wave data lenghth of num', (ran_num + i) % num_data, - 'is too long.', '\n A Exception raise when test Speech Model.') - num_bias += 1 - data_input, data_labels = data.GetData( - (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 - # 数据格式出错处理 结束 - - pre = self.Predict(data_input, data_input.shape[0] // 8) - - words_n = data_labels.shape[0] # 获取每个句子的字数 - words_num += words_n # 把句子的总字数加上 - edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 - if(edit_distance <= words_n): # 当编辑距离小于等于句子字数时 - word_error_num += edit_distance # 使用编辑距离作为错误字数 - else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 - word_error_num += words_n # 就直接加句子本来的总字数就好了 - - if((i % io_step_print == 0 or i == data_count - 1) and show_ratio == True): - # print('测试进度:',i,'/',data_count) - print('Test Count: ', i, '/', data_count) - - if(out_report == True): - if(i % io_step_file == 0 or i == data_count - 1): - txt_obj.write(txt) - txt = '' - - txt += str(i) + '\n' - txt += 'True:\t' + str(data_labels) + '\n' - txt += 'Pred:\t' + str(pre) + '\n' - txt += '\n' - - #print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', word_error_num / words_num * 100, '%') - print('*[Test Result] Speech Recognition ' + str_dataset + - ' set word error ratio: ', word_error_num / words_num * 100, '%') - if(out_report == True): - txt += '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + \ - str(word_error_num / words_num * 100) + ' %' - txt_obj.write(txt) - txt = '' - txt_obj.close() - - except StopIteration: - print('[Error] Model Test Error. please check data format.') - - def Predict(self, data_input, input_len): - ''' - 预测结果 - 返回语音识别后的拼音符号列表 - ''' - - batch_size = 1 - in_len = np.zeros((batch_size), dtype=np.int32) - - in_len[0] = input_len - - x_in = np.zeros( - (batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) - - for i in range(batch_size): - x_in[i, 0:len(data_input)] = data_input - - base_pred = self.base_model.predict(x=x_in) - - #print('base_pred:\n', base_pred) - - #y_p = base_pred - # for j in range(200): - # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] - # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) - # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) - # count=0 - # for i in range(y_p[0][j].shape[0]): - # if(y_p[0][j][i] < mean): - # count += 1 - # print('count:',count) - - base_pred = base_pred[:, :, :] - #base_pred =base_pred[:, 2:, :] - - r = K.ctc_decode(base_pred, in_len, greedy=True, - beam_width=100, top_paths=1) - - #print('r', r) - - r1 = K.get_value(r[0][0]) - #print('r1', r1) - - #r2 = K.get_value(r[1]) - # print(r2) - - r1 = r1[0] - - return r1 - pass - - def RecognizeSpeech(self, wavsignal, fs): - ''' - 最终做语音识别用的函数,识别一个wav序列的语音 - 不过这里现在还有bug - ''' - - #data = self.data - #data = DataSpeech('E:\\语音数据集') - # data.LoadDataList('dev') - # 获取输入特征 - #data_input = GetMfccFeature(wavsignal, fs) - # t0=time.time() - data_input = GetFrequencyFeature3(wavsignal, fs) - # t1=time.time() - #print('time cost:',t1-t0) - - input_length = len(data_input) - input_length = input_length // 8 - - data_input = np.array(data_input, dtype=np.float) - # print(data_input,data_input.shape) - data_input = data_input.reshape( - data_input.shape[0], data_input.shape[1], 1) - # t2=time.time() - r1 = self.Predict(data_input, input_length) - # t3=time.time() - #print('time cost:',t3-t2) - list_symbol_dic = GetSymbolList(self.datapath) # 获取拼音列表 - - r_str = [] - for i in r1: - r_str.append(list_symbol_dic[i]) - - return r_str - pass - - def RecognizeSpeech_FromFile(self, filename): - ''' - 最终做语音识别用的函数,识别指定文件名的语音 - ''' - - wavsignal, fs = read_wav_data(filename) - - r = self.RecognizeSpeech(wavsignal, fs) - - return r - - pass - - @property - def model(self): - ''' - 返回keras model - ''' - return self._model - - -if(__name__ == '__main__'): - - #import tensorflow as tf - #from keras.backend.tensorflow_backend import set_session - #os.environ["CUDA_VISIBLE_DEVICES"] = "1" - # 进行配置,使用70%的GPU - #config = tf.ConfigProto() - #config.gpu_options.per_process_gpu_memory_fraction = 0.95 - # config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 - # set_session(tf.Session(config=config)) - - datapath = abspath + '' - modelpath = abspath + 'model_speech' - - if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 - os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 - - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - datapath = 'E:\\语音数据集' - modelpath = modelpath + '\\' - elif(system_type == 'Linux'): - datapath = abspath + 'dataset' - modelpath = modelpath + '/' - else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - - ms = ModelSpeech(datapath) - - #ms.LoadModel(modelpath + 'm261/speech_model261_e_0_step_100000.model') - #ms.TrainModel(datapath, epoch = 50, batch_size = 16, save_step = 500) - - # t1=time.time() - #ms.TestModel(datapath, str_dataset='train', data_count = 128, out_report = True) - #ms.TestModel(datapath, str_dataset='dev', data_count = 128, out_report = True) - #ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True) - # t2=time.time() - #print('Test Model Time Cost:',t2-t1,'s') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') - #print('*[提示] 语音识别结果:\n',r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -""" -import platform as plat -import os -import time - -from general_function.file_wav import * -from general_function.file_dict import * -from general_function.gen_func import * -from general_function.muti_gpu import * - -import keras as kr -import numpy as np -import random - -from keras.models import Sequential, Model -from keras.layers import Dense, Dropout, Input, Reshape, BatchNormalization # , Flatten -from keras.layers import Lambda, TimeDistributed, Activation, Conv2D, MaxPooling2D, GRU # , Merge -from keras.layers.merge import add, concatenate -from keras import backend as K -from keras.optimizers import SGD, Adadelta, Adam - -from readdata24 import DataSpeech - -abspath = '' -ModelName = '261' -NUM_GPU = 2 - - -class ModelSpeech(): # 语音模型类 - def __init__(self, datapath): - ''' - 初始化 - 默认输出的拼音的表示大小是1422,即1421个拼音+1个空白块 - ''' - MS_OUTPUT_SIZE = 1422 - self.MS_OUTPUT_SIZE = MS_OUTPUT_SIZE # 神经网络最终输出的每一个字符向量维度的大小 - # self.BATCH_SIZE = BATCH_SIZE # 一次训练的batch - self.label_max_string_length = 64 - self.AUDIO_LENGTH = 1600 - self.AUDIO_FEATURE_LENGTH = 200 - self._model, self.base_model = self.CreateModel() - - self.datapath = datapath - self.slash = '' - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - self.slash = '\\' # 反斜杠 - elif(system_type == 'Linux'): - self.slash = '/' # 正斜杠 - else: - print('*[Message] Unknown System\n') - self.slash = '/' # 正斜杠 - if(self.slash != self.datapath[-1]): # 在目录路径末尾增加斜杠 - self.datapath = self.datapath + self.slash - - def CreateModel(self): - ''' - 定义CNN/LSTM/CTC模型,使用函数式模型 - 输入层:200维的特征值序列,一条语音数据的最大长度设为1600(大约16s) - 隐藏层:卷积池化层,卷积核大小为3x3,池化窗口大小为2 - 隐藏层:全连接层 - 输出层:全连接层,神经元数量为self.MS_OUTPUT_SIZE,使用softmax作为激活函数, - CTC层:使用CTC的loss作为损失函数,实现连接性时序多输出 - - ''' - - input_data = Input(name='the_input', shape=( - self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) - - layer_h1 = Conv2D(32, (3, 3), use_bias=False, activation='relu', - padding='same', kernel_initializer='he_normal')(input_data) # 卷积层 - #layer_h1 = Dropout(0.05)(layer_h1) - layer_h2 = Conv2D(32, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h1) # 卷积层 - layer_h3 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h2) # 池化层 - - # layer_h3 = Dropout(0.05)(layer_h3) # 随机中断部分神经网络连接,防止过拟合 - layer_h4 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h3) # 卷积层 - #layer_h4 = Dropout(0.1)(layer_h4) - layer_h5 = Conv2D(64, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h4) # 卷积层 - layer_h6 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h5) # 池化层 - - #layer_h6 = Dropout(0.1)(layer_h6) - layer_h7 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h6) # 卷积层 - #layer_h7 = Dropout(0.15)(layer_h7) - layer_h8 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h7) # 卷积层 - layer_h9 = MaxPooling2D(pool_size=2, strides=None, - padding="valid")(layer_h8) # 池化层 - - #layer_h9 = Dropout(0.15)(layer_h9) - layer_h10 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h9) # 卷积层 - #layer_h10 = Dropout(0.2)(layer_h10) - layer_h11 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h10) # 卷积层 - layer_h12 = MaxPooling2D( - pool_size=1, strides=None, padding="valid")(layer_h11) # 池化层 - - #layer_h12 = Dropout(0.2)(layer_h12) - layer_h13 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h12) # 卷积层 - #layer_h13 = Dropout(0.3)(layer_h13) - layer_h14 = Conv2D(128, (3, 3), use_bias=True, activation='relu', - padding='same', kernel_initializer='he_normal')(layer_h13) # 卷积层 - layer_h15 = MaxPooling2D( - pool_size=1, strides=None, padding="valid")(layer_h14) # 池化层 - - #test=Model(inputs = input_data, outputs = layer_h12) - # test.summary() - - layer_h16 = Reshape((200, 3200))(layer_h15) # Reshape层 - - # layer_h16 = Dropout(0.3)(layer_h16) # 随机中断部分神经网络连接,防止过拟合 - layer_h17 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h16) # 全连接层 - - inner = layer_h17 - # layer_h5 = LSTM(256, activation='relu', use_bias=True, return_sequences=True)(layer_h4) # LSTM层 - - rnn_size = 128 - gru_1 = GRU(rnn_size, return_sequences=True, - kernel_initializer='he_normal', name='gru1')(inner) - gru_1b = GRU(rnn_size, return_sequences=True, go_backwards=True, - kernel_initializer='he_normal', name='gru1_b')(inner) - gru1_merged = add([gru_1, gru_1b]) - gru_2 = GRU(rnn_size, return_sequences=True, - kernel_initializer='he_normal', name='gru2')(gru1_merged) - gru_2b = GRU(rnn_size, return_sequences=True, go_backwards=True, - kernel_initializer='he_normal', name='gru2_b')(gru1_merged) - - gru2 = concatenate([gru_2, gru_2b]) - - layer_h20 = gru2 - #layer_h20 = Dropout(0.4)(gru2) - layer_h21 = Dense(128, activation="relu", use_bias=True, - kernel_initializer='he_normal')(layer_h20) # 全连接层 - - #layer_h17 = Dropout(0.3)(layer_h17) - layer_h22 = Dense(self.MS_OUTPUT_SIZE, use_bias=True, - kernel_initializer='he_normal')(layer_h21) # 全连接层 - - y_pred = Activation('softmax', name='Activation0')(layer_h22) - model_data = Model(inputs=input_data, outputs=y_pred) - # model_data.summary() - - labels = Input(name='the_labels', shape=[ - self.label_max_string_length], dtype='float32') - input_length = Input(name='input_length', shape=[1], dtype='int64') - label_length = Input(name='label_length', shape=[1], dtype='int64') - # Keras doesn't currently support loss funcs with extra parameters - # so CTC loss is implemented in a lambda layer - - # layer_out = Lambda(ctc_lambda_func,output_shape=(self.MS_OUTPUT_SIZE, ), name='ctc')([y_pred, labels, input_length, label_length])#(layer_h6) # CTC - loss_out = Lambda(self.ctc_lambda_func, output_shape=(1,), name='ctc')( - [y_pred, labels, input_length, label_length]) - - model = Model(inputs=[input_data, labels, - input_length, label_length], outputs=loss_out) - - model.summary() - - # clipnorm seems to speeds up convergence - #sgd = SGD(lr=0.0001, decay=1e-6, momentum=0.9, nesterov=True, clipnorm=5) - #ada_d = Adadelta(lr = 0.01, rho = 0.95, epsilon = 1e-06) - opt = Adam(lr=0.001, beta_1=0.9, beta_2=0.999, - decay=0.0, epsilon=10e-8) - #model.compile(loss={'ctc': lambda y_true, y_pred: y_pred}, optimizer=sgd) - - model.build((self.AUDIO_LENGTH, self.AUDIO_FEATURE_LENGTH, 1)) - model = ParallelModel(model, NUM_GPU) - - model.compile(loss={'ctc': lambda y_true, - y_pred: y_pred}, optimizer=opt) - - # captures output of softmax so we can decode the output during visualization - test_func = K.function([input_data], [y_pred]) - - #print('[*提示] 创建模型成功,模型编译成功') - print('[*Info] Create Model Successful, Compiles Model Successful. ') - return model, model_data - - def ctc_lambda_func(self, args): - y_pred, labels, input_length, label_length = args - - y_pred = y_pred[:, :, :] - #y_pred = y_pred[:, 2:, :] - return K.ctc_batch_cost(labels, y_pred, input_length, label_length) - - def TrainModel(self, datapath, epoch=2, save_step=1000, batch_size=32, filename=abspath + 'model_speech/m' + ModelName + '/speech_model'+ModelName): - ''' - 训练模型 - 参数: - datapath: 数据保存的路径 - epoch: 迭代轮数 - save_step: 每多少步保存一次模型 - filename: 默认保存文件名,不含文件后缀名 - ''' - data = DataSpeech(datapath, 'train') - - num_data = data.GetDataNum() # 获取数据的数量 - - yielddatas = data.data_genetator(batch_size, self.AUDIO_LENGTH) - - for epoch in range(epoch): # 迭代轮数 - print('[running] train epoch %d .' % epoch) - n_step = 0 # 迭代数据数 - while True: - try: - print('[message] epoch %d . Have train datas %d+' % - (epoch, n_step*save_step)) - # data_genetator是一个生成器函数 - - #self._model.fit_generator(yielddatas, save_step, nb_worker=2) - self._model.fit_generator(yielddatas, save_step) - n_step += 1 - except StopIteration: - print('[error] generator error. please check data format.') - break - - self.SaveModel(comment='_e_'+str(epoch) + - '_step_'+str(n_step * save_step)) - self.TestModel( - self.datapath, str_dataset='train', data_count=4) - self.TestModel(self.datapath, str_dataset='dev', data_count=4) - - def LoadModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName+'.model'): - ''' - 加载模型参数 - ''' - self._model.load_weights(filename) - self.base_model.load_weights(filename + '.base') - - def SaveModel(self, filename=abspath + 'model_speech/m'+ModelName+'/speech_model'+ModelName, comment=''): - ''' - 保存模型参数 - ''' - self._model.save_weights(filename+comment+'.model') - self.base_model.save_weights(filename + comment + '.model.base') - f = open('step'+ModelName+'.txt', 'w') - f.write(filename+comment) - f.close() - - def TestModel(self, datapath='', str_dataset='dev', data_count=32, out_report=False, show_ratio=True): - ''' - 测试检验模型效果 - ''' - data = DataSpeech(self.datapath, str_dataset) - # data.LoadDataList(str_dataset) - num_data = data.GetDataNum() # 获取数据的数量 - if(data_count <= 0 or data_count > num_data): # 当data_count为小于等于0或者大于测试数据量的值时,则使用全部数据来测试 - data_count = num_data - - try: - ran_num = random.randint(0, num_data - 1) # 获取一个随机数 - - words_num = 0 - word_error_num = 0 - - nowtime = time.strftime( - '%Y%m%d_%H%M%S', time.localtime(time.time())) - if(out_report == True): - txt_obj = open('Test_Report_' + str_dataset + '_' + - nowtime + '.txt', 'w', encoding='UTF-8') # 打开文件并读入 - - txt = '' - for i in range(data_count): - data_input, data_labels = data.GetData( - (ran_num + i) % num_data) # 从随机数开始连续向后取一定数量数据 - - # 数据格式出错处理 开始 - # 当输入的wav文件长度过长时自动跳过该文件,转而使用下一个wav文件来运行 - num_bias = 0 - while(data_input.shape[0] > self.AUDIO_LENGTH): - print('*[Error]', 'wave data lenghth of num', (ran_num + i) % num_data, - 'is too long.', '\n A Exception raise when test Speech Model.') - num_bias += 1 - data_input, data_labels = data.GetData( - (ran_num + i + num_bias) % num_data) # 从随机数开始连续向后取一定数量数据 - # 数据格式出错处理 结束 - - pre = self.Predict(data_input, data_input.shape[0] // 8) - - words_n = data_labels.shape[0] # 获取每个句子的字数 - words_num += words_n # 把句子的总字数加上 - edit_distance = GetEditDistance(data_labels, pre) # 获取编辑距离 - if(edit_distance <= words_n): # 当编辑距离小于等于句子字数时 - word_error_num += edit_distance # 使用编辑距离作为错误字数 - else: # 否则肯定是增加了一堆乱七八糟的奇奇怪怪的字 - word_error_num += words_n # 就直接加句子本来的总字数就好了 - - if(i % 10 == 0 and show_ratio == True): - print('Test Count: ', i, '/', data_count) - - txt = '' - if(out_report == True): - txt += str(i) + '\n' - txt += 'True:\t' + str(data_labels) + '\n' - txt += 'Pred:\t' + str(pre) + '\n' - txt += '\n' - txt_obj.write(txt) - - #print('*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率:', word_error_num / words_num * 100, '%') - print('*[Test Result] Speech Recognition ' + str_dataset + - ' set word error ratio: ', word_error_num / words_num * 100, '%') - if(out_report == True): - txt = '*[测试结果] 语音识别 ' + str_dataset + ' 集语音单字错误率: ' + \ - str(word_error_num / words_num * 100) + ' %' - txt_obj.write(txt) - txt_obj.close() - - except StopIteration: - print('[Error] Model Test Error. please check data format.') - - def Predict(self, data_input, input_len): - ''' - 预测结果 - 返回语音识别后的拼音符号列表 - ''' - - batch_size = 1 - in_len = np.zeros((batch_size), dtype=np.int32) - - in_len[0] = input_len - - x_in = np.zeros( - (batch_size, 1600, self.AUDIO_FEATURE_LENGTH, 1), dtype=np.float) - - for i in range(batch_size): - x_in[i, 0:len(data_input)] = data_input - - base_pred = self.base_model.predict(x=x_in) - - #print('base_pred:\n', base_pred) - - #y_p = base_pred - # for j in range(200): - # mean = np.sum(y_p[0][j]) / y_p[0][j].shape[0] - # print('max y_p:',np.max(y_p[0][j]),'min y_p:',np.min(y_p[0][j]),'mean y_p:',mean,'mid y_p:',y_p[0][j][100]) - # print('argmin:',np.argmin(y_p[0][j]),'argmax:',np.argmax(y_p[0][j])) - # count=0 - # for i in range(y_p[0][j].shape[0]): - # if(y_p[0][j][i] < mean): - # count += 1 - # print('count:',count) - - base_pred = base_pred[:, :, :] - #base_pred =base_pred[:, 2:, :] - - r = K.ctc_decode(base_pred, in_len, greedy=True, - beam_width=100, top_paths=1) - - #print('r', r) - - r1 = K.get_value(r[0][0]) - #print('r1', r1) - - #r2 = K.get_value(r[1]) - # print(r2) - - r1 = r1[0] - - return r1 - pass - - def RecognizeSpeech(self, wavsignal, fs): - ''' - 最终做语音识别用的函数,识别一个wav序列的语音 - 不过这里现在还有bug - ''' - - #data = self.data - #data = DataSpeech('E:\\语音数据集') - # data.LoadDataList('dev') - # 获取输入特征 - #data_input = GetMfccFeature(wavsignal, fs) - # t0=time.time() - data_input = GetFrequencyFeature3(wavsignal, fs) - # t1=time.time() - #print('time cost:',t1-t0) - - input_length = len(data_input) - input_length = input_length // 8 - - data_input = np.array(data_input, dtype=np.float) - # print(data_input,data_input.shape) - data_input = data_input.reshape( - data_input.shape[0], data_input.shape[1], 1) - # t2=time.time() - r1 = self.Predict(data_input, input_length) - # t3=time.time() - #print('time cost:',t3-t2) - list_symbol_dic = GetSymbolList(self.datapath) # 获取拼音列表 - - r_str = [] - for i in r1: - r_str.append(list_symbol_dic[i]) - - return r_str - pass - - def RecognizeSpeech_FromFile(self, filename): - ''' - 最终做语音识别用的函数,识别指定文件名的语音 - ''' - - wavsignal, fs = read_wav_data(filename) - - r = self.RecognizeSpeech(wavsignal, fs) - - return r - - pass - - @property - def model(self): - ''' - 返回keras model - ''' - return self._model - - -if(__name__ == '__main__'): - - #import tensorflow as tf - #from keras.backend.tensorflow_backend import set_session - #os.environ["CUDA_VISIBLE_DEVICES"] = "1" - # 进行配置,使用70%的GPU - #config = tf.ConfigProto() - #config.gpu_options.per_process_gpu_memory_fraction = 0.95 - # config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 - # set_session(tf.Session(config=config)) - - datapath = abspath + '' - modelpath = abspath + 'model_speech' - - if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 - os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 - - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if(system_type == 'Windows'): - datapath = 'E:\\语音数据集' - modelpath = modelpath + '\\' - elif(system_type == 'Linux'): - datapath = abspath + 'dataset' - modelpath = modelpath + '/' - else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - - ms = ModelSpeech(datapath) - - #ms.LoadModel(modelpath + 'm261/speech_model261_e_0_step_98000.model') - ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500) - #ms.TestModel(datapath, str_dataset='test', data_count = 128, out_report = True) - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') - #r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') - #print('*[提示] 语音识别结果:\n',r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -语音识别API的HTTP服务器程序 - -""" -import socket -import http.server -import urllib -import keras -from SpeechModel251 import ModelSpeech -from LanguageModel import ModelLanguage - -datapath = './' -modelpath = 'model_speech/' -ms = ModelSpeech(datapath) -ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_12000.model') - -ml = ModelLanguage('model_language') -ml.LoadModel() - - -class TestHTTPHandle(http.server.BaseHTTPRequestHandler): - def setup(self): - self.request.settimeout(10) - http.server.BaseHTTPRequestHandler.setup(self) - - def _set_response(self): - self.send_response(200) - self.send_header('Content-type', 'text/html') - self.end_headers() - - def do_GET(self): - - buf = 'ASRT_SpeechRecognition API' - self.protocal_version = 'HTTP/1.1' - - self._set_response() - - buf = bytes(buf, encoding="utf-8") - self.wfile.write(buf) - - def do_POST(self): - ''' - 处理通过POST方式传递过来并接收的语音数据 - 通过语音模型和语言模型计算得到语音识别结果并返回 - ''' - path = self.path - print(path) - # 获取post提交的数据 - datas = self.rfile.read(int(self.headers['content-length'])) - #datas = urllib.unquote(datas).decode("utf-8", 'ignore') - datas = datas.decode('utf-8') - datas_split = datas.split('&') - token = '' - fs = 0 - wavs = [] - # type = 'wavfilebytes' # wavfilebytes or python-list - - for line in datas_split: - [key, value] = line.split('=') - if('wavs' == key and '' != value): - wavs.append(int(value)) - elif('fs' == key): - fs = int(value) - elif('token' == key): - token = value - # elif('type' == key): - # type = value - else: - print(key, value) - - if(token != 'qwertasd'): - buf = '403' - print(buf) - buf = bytes(buf, encoding="utf-8") - self.wfile.write(buf) - return - - # if('python-list' == type): - if(len(wavs) > 0): - r = self.recognize([wavs], fs) - else: - r = '' - # else: - # r = self.recognize_from_file('') - - if(token == 'qwertasd'): - #buf = '成功\n'+'wavs:\n'+str(wavs)+'\nfs:\n'+str(fs) - buf = r - else: - buf = '403' - - # print(datas) - - self._set_response() - - #buf = ' \n \n\nPost page\n \nPost Data:%s
Path:%s\n \n'%(datas,self.path) - print(buf) - buf = bytes(buf, encoding="utf-8") - self.wfile.write(buf) - - def recognize(self, wavs, fs): - r = '' - try: - r_speech = ms.RecognizeSpeech(wavs, fs) - print(r_speech) - str_pinyin = r_speech - r = ml.SpeechToText(str_pinyin) - except: - r = '' - print('[*Message] Server raise a bug. ') - return r - pass - - def recognize_from_file(self, filename): - pass - - -class HTTPServerV6(http.server.HTTPServer): - address_family = socket.AF_INET6 - - -def start_server(ip, port): - - if(':' in ip): - http_server = HTTPServerV6((ip, port), TestHTTPHandle) - else: - http_server = http.server.HTTPServer((ip, int(port)), TestHTTPHandle) - - print('服务器已开启') - - try: - http_server.serve_forever() # 设置一直监听并接收请求 - except KeyboardInterrupt: - pass - http_server.server_close() - print('HTTP server closed') - - -if __name__ == '__main__': - start_server('', 20000) # For IPv4 Network Only - # start_server('::', 20000) # For IPv6 Network -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import platform as plat -import os - -import numpy as np -from general_function.file_wav import * -from general_function.file_dict import * - -import random -#import scipy.io.wavfile as wav -from scipy.fftpack import fft - - -class DataSpeech(): - - def __init__(self, path, type, LoadToMem=False, MemWavCount=10000): - ''' - 初始化 - 参数: - path:数据存放位置根目录 - ''' - - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - - self.datapath = path # 数据存放位置根目录 - self.type = type # 数据类型,分为三种:训练集(train)、验证集(dev)、测试集(test) - - self.slash = '' - if(system_type == 'Windows'): - self.slash = '\\' # 反斜杠 - elif(system_type == 'Linux'): - self.slash = '/' # 正斜杠 - else: - print('*[Message] Unknown System\n') - self.slash = '/' # 正斜杠 - - if(self.slash != self.datapath[-1]): # 在目录路径末尾增加斜杠 - self.datapath = self.datapath + self.slash - - self.dic_wavlist_thchs30 = {} - self.dic_symbollist_thchs30 = {} - self.dic_wavlist_stcmds = {} - self.dic_symbollist_stcmds = {} - - self.SymbolNum = 0 # 记录拼音符号数量 - self.list_symbol = self.GetSymbolList() # 全部汉语拼音符号列表 - self.list_wavnum = [] # wav文件标记列表 - self.list_symbolnum = [] # symbol标记列表 - - self.DataNum = 0 # 记录数据量 - self.LoadDataList() - - self.wavs_data = [] - self.LoadToMem = LoadToMem - self.MemWavCount = MemWavCount - pass - - def LoadDataList(self): - ''' - 加载用于计算的数据列表 - 参数: - type:选取的数据集类型 - train 训练集 - dev 开发集 - test 测试集 - ''' - # 设定选取哪一项作为要使用的数据集 - if(self.type == 'train'): - filename_wavlist_thchs30 = 'thchs30' + self.slash + 'train.wav.lst' - filename_wavlist_stcmds = 'st-cmds' + self.slash + 'train.wav.txt' - filename_symbollist_thchs30 = 'thchs30' + self.slash + 'train.syllable.txt' - filename_symbollist_stcmds = 'st-cmds' + self.slash + 'train.syllable.txt' - elif(self.type == 'dev'): - filename_wavlist_thchs30 = 'thchs30' + self.slash + 'cv.wav.lst' - filename_wavlist_stcmds = 'st-cmds' + self.slash + 'dev.wav.txt' - filename_symbollist_thchs30 = 'thchs30' + self.slash + 'cv.syllable.txt' - filename_symbollist_stcmds = 'st-cmds' + self.slash + 'dev.syllable.txt' - elif(self.type == 'test'): - filename_wavlist_thchs30 = 'thchs30' + self.slash + 'test.wav.lst' - filename_wavlist_stcmds = 'st-cmds' + self.slash + 'test.wav.txt' - filename_symbollist_thchs30 = 'thchs30' + self.slash + 'test.syllable.txt' - filename_symbollist_stcmds = 'st-cmds' + self.slash + 'test.syllable.txt' - else: - filename_wavlist = '' # 默认留空 - filename_symbollist = '' - # 读取数据列表,wav文件列表和其对应的符号列表 - self.dic_wavlist_thchs30, self.list_wavnum_thchs30 = get_wav_list( - self.datapath + filename_wavlist_thchs30) - self.dic_wavlist_stcmds, self.list_wavnum_stcmds = get_wav_list( - self.datapath + filename_wavlist_stcmds) - - self.dic_symbollist_thchs30, self.list_symbolnum_thchs30 = get_wav_symbol( - self.datapath + filename_symbollist_thchs30) - self.dic_symbollist_stcmds, self.list_symbolnum_stcmds = get_wav_symbol( - self.datapath + filename_symbollist_stcmds) - self.DataNum = self.GetDataNum() - - def GetDataNum(self): - ''' - 获取数据的数量 - 当wav数量和symbol数量一致的时候返回正确的值,否则返回-1,代表出错。 - ''' - num_wavlist_thchs30 = len(self.dic_wavlist_thchs30) - num_symbollist_thchs30 = len(self.dic_symbollist_thchs30) - num_wavlist_stcmds = len(self.dic_wavlist_stcmds) - num_symbollist_stcmds = len(self.dic_symbollist_stcmds) - if(num_wavlist_thchs30 == num_symbollist_thchs30 and num_wavlist_stcmds == num_symbollist_stcmds): - DataNum = num_wavlist_thchs30 + num_wavlist_stcmds - else: - DataNum = -1 - - return DataNum - - def GetData(self, n_start, n_amount=1): - ''' - 读取数据,返回神经网络输入值和输出值矩阵(可直接用于神经网络训练的那种) - 参数: - n_start:从编号为n_start数据开始选取数据 - n_amount:选取的数据数量,默认为1,即一次一个wav文件 - 返回: - 三个包含wav特征矩阵的神经网络输入值,和一个标定的类别矩阵神经网络输出值 - ''' - bili = 2 - if(self.type == 'train'): - bili = 11 - - # 读取一个文件 - if(n_start % bili == 0): - filename = self.dic_wavlist_thchs30[self.list_wavnum_thchs30[n_start // bili]] - list_symbol = self.dic_symbollist_thchs30[self.list_symbolnum_thchs30[n_start // bili]] - else: - n = n_start // bili * (bili - 1) - yushu = n_start % bili - length = len(self.list_wavnum_stcmds) - filename = self.dic_wavlist_stcmds[self.list_wavnum_stcmds[( - n + yushu - 1) % length]] - list_symbol = self.dic_symbollist_stcmds[self.list_symbolnum_stcmds[( - n + yushu - 1) % length]] - - if('Windows' == plat.system()): - # windows系统下需要执行这一行,对文件路径做特别处理 - filename = filename.replace('/', '\\') - - wavsignal, fs = read_wav_data(self.datapath + filename) - - # 获取输出特征 - - feat_out = [] - # print("数据编号",n_start,filename) - for i in list_symbol: - if('' != i): - n = self.SymbolToNum(i) - # v=self.NumToVector(n) - # feat_out.append(v) - feat_out.append(n) - # print('feat_out:',feat_out) - - # 获取输入特征 - data_input = GetFrequencyFeature3(wavsignal, fs) - #data_input = np.array(data_input) - data_input = data_input.reshape( - data_input.shape[0], data_input.shape[1], 1) - # arr_zero = np.zeros((1, 39), dtype=np.int16) #一个全是0的行向量 - - # while(len(data_input)<1600): #长度不够时补全到1600 - # data_input = np.row_stack((data_input,arr_zero)) - - #data_input = data_input.T - data_label = np.array(feat_out) - return data_input, data_label - - def data_genetator(self, batch_size=32, audio_length=1600): - ''' - 数据生成器函数,用于Keras的generator_fit训练 - batch_size: 一次产生的数据量 - 需要再修改。。。 - ''' - - #labels = [] - # for i in range(0,batch_size): - # #input_length.append([1500]) - # labels.append([0.0]) - - #labels = np.array(labels, dtype = np.float) - labels = np.zeros((batch_size, 1), dtype=np.float) - # print(input_length,len(input_length)) - - while True: - X = np.zeros((batch_size, audio_length, 200, 1), dtype=np.float) - #y = np.zeros((batch_size, 64, self.SymbolNum), dtype=np.int16) - y = np.zeros((batch_size, 64), dtype=np.int16) - - #generator = ImageCaptcha(width=width, height=height) - input_length = [] - label_length = [] - - for i in range(batch_size): - ran_num = random.randint(0, self.DataNum - 1) # 获取一个随机数 - data_input, data_labels = self.GetData(ran_num) # 通过随机数取一个数据 - # data_input, data_labels = self.GetData((ran_num + i) % self.DataNum) # 从随机数开始连续向后取一定数量数据 - - input_length.append( - data_input.shape[0] // 8 + data_input.shape[0] % 8) - #print(data_input, data_labels) - # print('data_input长度:',len(data_input)) - - X[i, 0:len(data_input)] = data_input - # print('data_labels长度:',len(data_labels)) - # print(data_labels) - y[i, 0:len(data_labels)] = data_labels - # print(i,y[i].shape) - #y[i] = y[i].T - # print(i,y[i].shape) - label_length.append([len(data_labels)]) - - label_length = np.matrix(label_length) - input_length = np.array([input_length]).T - #input_length = np.array(input_length) - # print('input_length:\n',input_length) - #X=X.reshape(batch_size, audio_length, 200, 1) - # print(X) - yield [X, y, input_length, label_length], labels - pass - - def GetSymbolList(self): - ''' - 加载拼音符号列表,用于标记符号 - 返回一个列表list类型变量 - ''' - txt_obj = open('dict.txt', 'r', encoding='UTF-8') # 打开文件并读入 - txt_text = txt_obj.read() - txt_lines = txt_text.split('\n') # 文本分割 - list_symbol = [] # 初始化符号列表 - for i in txt_lines: - if(i != ''): - txt_l = i.split('\t') - list_symbol.append(txt_l[0]) - txt_obj.close() - list_symbol.append('_') - self.SymbolNum = len(list_symbol) - return list_symbol - - def GetSymbolNum(self): - ''' - 获取拼音符号数量 - ''' - return len(self.list_symbol) - - def SymbolToNum(self, symbol): - ''' - 符号转为数字 - ''' - if(symbol != ''): - return self.list_symbol.index(symbol) - return self.SymbolNum - - def NumToVector(self, num): - ''' - 数字转为对应的向量 - ''' - v_tmp = [] - for i in range(0, len(self.list_symbol)): - if(i == num): - v_tmp.append(1) - else: - v_tmp.append(0) - v = np.array(v_tmp) - return v - - -if(__name__ == '__main__'): - # path='E:\\语音数据集' - # l=DataSpeech(path) - # l.LoadDataList('train') - # print(l.GetDataNum()) - # print(l.GetData(0)) - # aa=l.data_genetator() - # for i in aa: - # a,b=i - # print(a,b) - pass -import wave -from pyaudio import PyAudio, paInt16 - -framerate = 16000 -NUM_SAMPLES = 2000 -channels = 1 -sampwidth = 2 -TIME = 10 - - -def save_wave_file(filename, data): - '''save the date to the wavfile''' - wf = wave.open(filename, 'wb') - wf.setnchannels(channels) - wf.setsampwidth(sampwidth) - wf.setframerate(framerate) - wf.writeframes(b"".join(data)) - wf.close() - - -def my_record(): - pa = PyAudio() - stream = pa.open(format=paInt16, channels=1, - rate=framerate, input=True, - frames_per_buffer=NUM_SAMPLES) - my_buf = [] - count = 0 - while count < TIME*8: # 控制录音时间 - string_audio_data = stream.read(NUM_SAMPLES) - my_buf.append(string_audio_data) - count += 1 - print('.') - save_wave_file('01.wav', my_buf) - stream.close() - - -chunk = 2014 - - -def play(): - wf = wave.open(r"01.wav", 'rb') - p = PyAudio() - stream = p.open(format=p.get_format_from_width(wf.getsampwidth()), - channels=wf.getnchannels(), rate=wf.getframerate(), output=True) - while True: - data = wf.readframes(chunk) - if data == "": - break - stream.write(data) - stream.close() - p.terminate() - - -if __name__ == '__main__': - my_record() - print('Over!') - play() -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -用于测试整个一套语音识别系统的程序 -语音模型 + 语言模型 -""" -import platform as plat - -from SpeechModel251 import ModelSpeech -from LanguageModel2 import ModelLanguage -from keras import backend as K - -datapath = '' -modelpath = 'model_speech' - -system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 -if(system_type == 'Windows'): - datapath = 'D:\\语音数据集' - modelpath = modelpath + '\\' -elif(system_type == 'Linux'): - datapath = 'dataset' - modelpath = modelpath + '/' -else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - -ms = ModelSpeech(datapath) - -#ms.LoadModel(modelpath + 'm22_2\\0\\speech_model22_e_0_step_257000.model') -ms.LoadModel(modelpath + 'm251\\speech_model251_e_0_step_12000.model') - -#ms.TestModel(datapath, str_dataset='test', data_count = 64, out_report = True) -r = ms.RecognizeSpeech_FromFile( - 'D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav') -#r = ms.RecognizeSpeech_FromFile('D:\语音数据集\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') -#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') -#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\A11_167.WAV') -#r = ms.RecognizeSpeech_FromFile('D:\\语音数据集\\data_thchs30\\data\\D4_750.wav') - -K.clear_session() - -print('*[提示] 语音识别结果:\n', r) - - -ml = ModelLanguage('model_language') -ml.LoadModel() - -#str_pinyin = ['zhe4','zhen1','shi4','ji2', 'hao3','de5'] -#str_pinyin = ['jin1', 'tian1', 'shi4', 'xing1', 'qi1', 'san1'] -#str_pinyin = ['ni3', 'hao3','a1'] -str_pinyin = r -#str_pinyin = ['su1', 'bei3', 'jun1', 'de5', 'yi4','xie1', 'ai4', 'guo2', 'jiang4', 'shi4', 'ma3', 'zhan4', 'shan1', 'ming2', 'yi1', 'dong4', 'ta1', 'ju4', 'su1', 'bi3', 'ai4', 'dan4', 'tian2','mei2', 'bai3', 'ye3', 'fei1', 'qi3', 'kan4', 'zhan4'] -r = ml.SpeechToText(str_pinyin) -print('语音转文字结果:\n', r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -''' -@author: nl8590687 -asrserver测试专用客户端 - -''' - -import requests -from general_function.file_wav import * - -url = 'http://127.0.0.1:20000/' - -token = 'qwertasd' - -wavsignal, fs = read_wav_data( - 'E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0052.wav') - -# print(wavsignal,fs) - -datas = {'token': token, 'fs': fs, 'wavs': wavsignal} - -r = requests.post(url, datas) - -r.encoding = 'utf-8' - -print(r.text) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -用于测试语音识别系统语音模型的程序 - -""" -import platform as plat -import os - -import tensorflow as tf -from keras.backend.tensorflow_backend import set_session - - -from SpeechModel251 import ModelSpeech - - -os.environ["CUDA_VISIBLE_DEVICES"] = "0" -# 进行配置,使用90%的GPU -config = tf.ConfigProto() -config.gpu_options.per_process_gpu_memory_fraction = 0.9 -# config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 -set_session(tf.Session(config=config)) - - -datapath = '' -modelpath = 'model_speech' - - -if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 - os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 - -system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 -if(system_type == 'Windows'): - datapath = 'E:\\语音数据集' - modelpath = modelpath + '\\' -elif(system_type == 'Linux'): - datapath = 'dataset' - modelpath = modelpath + '/' -else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - -ms = ModelSpeech(datapath) - -ms.LoadModel(modelpath + 'm251/speech_model251_e_0_step_42500.model') - -ms.TestModel(datapath, str_dataset='test', data_count=128, out_report=True) - -#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00241I0053.wav') -#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\ST-CMDS-20170001_1-OS\\20170001P00020I0087.wav') -#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\train\\A11\\A11_167.WAV') -#r = ms.RecognizeSpeech_FromFile('E:\\语音数据集\\wav\\test\\D4\\D4_750.wav') -#print('*[提示] 语音识别结果:\n',r) -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -""" -@author: nl8590687 -用于训练语音识别系统语音模型的程序 - -""" -import platform as plat -import os - -import tensorflow as tf -from keras.backend.tensorflow_backend import set_session - - -from SpeechModel251 import ModelSpeech - -os.environ["CUDA_VISIBLE_DEVICES"] = "0" -# 进行配置,使用95%的GPU -config = tf.ConfigProto() -config.gpu_options.per_process_gpu_memory_fraction = 0.95 -# config.gpu_options.allow_growth=True #不全部占满显存, 按需分配 -set_session(tf.Session(config=config)) - - -datapath = '' -modelpath = 'model_speech' - - -if(not os.path.exists(modelpath)): # 判断保存模型的目录是否存在 - os.makedirs(modelpath) # 如果不存在,就新建一个,避免之后保存模型的时候炸掉 - -system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 -if(system_type == 'Windows'): - datapath = 'E:\\语音数据集' - modelpath = modelpath + '\\' -elif(system_type == 'Linux'): - datapath = 'dataset' - modelpath = modelpath + '/' -else: - print('*[Message] Unknown System\n') - datapath = 'dataset' - modelpath = modelpath + '/' - -ms = ModelSpeech(datapath) - -#ms.LoadModel(modelpath + 'speech_model251_e_0_step_327500.model') -ms.TrainModel(datapath, epoch=50, batch_size=16, save_step=500) -'''@package processing -This package contains all the functionality for data processing: -- feature computation -- feature storing and loading -- file interpretation -''' -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -''' -获取符号字典列表的程序 -''' -import platform as plat - - -def GetSymbolList_trash(datapath): - ''' - 加载拼音符号列表,用于标记符号 - 返回一个列表list类型变量 - ''' - if(datapath != ''): - if(datapath[-1] != '/' or datapath[-1] != '\\'): - datapath = datapath + '/' - - txt_obj = open(datapath + 'dict.txt', 'r', encoding='UTF-8') # 打开文件并读入 - txt_text = txt_obj.read() - txt_lines = txt_text.split('\n') # 文本分割 - list_symbol = [] # 初始化符号列表 - for i in txt_lines: - if(i != ''): - txt_l = i.split('\t') - list_symbol.append(txt_l[0]) - txt_obj.close() - list_symbol.append('_') - #SymbolNum = len(list_symbol) - return list_symbol - - -def GetSymbolList(datapath): - ''' - 加载拼音符号列表,用于标记符号 - 返回一个列表list类型变量 - ''' - - datapath_ = datapath.strip('dataset\\') - - system_type = plat.system() # 由于不同的系统的文件路径表示不一样,需要进行判断 - if (system_type == 'Windows'): - datapath_ += '\\' - elif (system_type == 'Linux'): - datapath_ += '/' - else: - print('*[Message] Unknown System\n') - datapath_ += '/' - - txt_obj = open(datapath_ + 'dict.txt', 'r', encoding='UTF-8') # 打开文件并读入 - txt_text = txt_obj.read() - txt_lines = txt_text.split('\n') # 文本分割 - list_symbol = [] # 初始化符号列表 - for i in txt_lines: - if(i != ''): - txt_l = i.split('\t') - list_symbol.append(txt_l[0]) - txt_obj.close() - list_symbol.append('_') - #SymbolNum = len(list_symbol) - return list_symbol - - -if(__name__ == '__main__'): - GetSymbolList('E:\\abc\\') -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -import os -import wave -import numpy as np -import matplotlib.pyplot as plt -import math -import time - -from python_speech_features import mfcc -from python_speech_features import delta -from python_speech_features import logfbank - -from scipy.fftpack import fft - - -def read_wav_data(filename): - ''' - 读取一个wav文件,返回声音信号的时域谱矩阵和播放时间 - ''' - wav = wave.open(filename, "rb") # 打开一个wav格式的声音文件流 - num_frame = wav.getnframes() # 获取帧数 - num_channel = wav.getnchannels() # 获取声道数 - framerate = wav.getframerate() # 获取帧速率 - num_sample_width = wav.getsampwidth() # 获取实例的比特宽度,即每一帧的字节数 - str_data = wav.readframes(num_frame) # 读取全部的帧 - wav.close() # 关闭流 - wave_data = np.fromstring(str_data, dtype=np.short) # 将声音文件数据转换为数组矩阵形式 - wave_data.shape = -1, num_channel # 按照声道数将数组整形,单声道时候是一列数组,双声道时候是两列的矩阵 - wave_data = wave_data.T # 将矩阵转置 - #wave_data = wave_data - return wave_data, framerate - - -def GetMfccFeature(wavsignal, fs): - # 获取输入特征 - feat_mfcc = mfcc(wavsignal[0], fs) - feat_mfcc_d = delta(feat_mfcc, 2) - feat_mfcc_dd = delta(feat_mfcc_d, 2) - # 返回值分别是mfcc特征向量的矩阵及其一阶差分和二阶差分矩阵 - wav_feature = np.column_stack((feat_mfcc, feat_mfcc_d, feat_mfcc_dd)) - return wav_feature - - -def GetFrequencyFeature(wavsignal, fs): - if(16000 != fs): - raise ValueError( - '[Error] ASRT currently only supports wav audio files with a sampling rate of 16000 Hz, but this audio is ' + str(fs) + ' Hz. ') - - # wav波形 加时间窗以及时移10ms - time_window = 25 # 单位ms - data_input = [] - - #print(int(len(wavsignal[0])/fs*1000 - time_window) // 10) - wav_length = len(wavsignal[0]) # 计算一条语音信号的原始长度 - # 计算循环终止的位置,也就是最终生成的窗数 - range0_end = int(len(wavsignal[0])/fs*1000 - time_window) // 10 - for i in range(0, range0_end): - p_start = i * 160 - p_end = p_start + 400 - data_line = [] - - for j in range(p_start, p_end): - data_line.append(wavsignal[0][j]) - # print('wavsignal[0][j]:\n',wavsignal[0][j]) - #data_line = abs(fft(data_line)) / len(wavsignal[0]) - data_line = fft(data_line) / wav_length - data_line2 = [] - for fre_sig in data_line: - # 分别取出频率信号的实部和虚部作为语音信号的频率特征 - # 直接使用复数的话,之后会被numpy将虚部丢弃,造成信息丢失 - # print('fre_sig:\n',fre_sig) - data_line2.append(fre_sig.real) - data_line2.append(fre_sig.imag) - - data_input.append(data_line2[0:len(data_line2)//2]) # 除以2是取一半数据,因为是对称的 - # print('data_input:\n',data_input) - # print('data_line:\n',data_line) - # print(len(data_input),len(data_input[0])) - return data_input - - -def GetFrequencyFeature2(wavsignal, fs): - if(16000 != fs): - raise ValueError( - '[Error] ASRT currently only supports wav audio files with a sampling rate of 16000 Hz, but this audio is ' + str(fs) + ' Hz. ') - - # wav波形 加时间窗以及时移10ms - time_window = 25 # 单位ms - window_length = fs / 1000 * time_window # 计算窗长度的公式,目前全部为400固定值 - - wav_arr = np.array(wavsignal) - #wav_length = len(wavsignal[0]) - wav_length = wav_arr.shape[1] - - # 计算循环终止的位置,也就是最终生成的窗数 - range0_end = int(len(wavsignal[0])/fs*1000 - time_window) // 10 - data_input = np.zeros((range0_end, 200), dtype=np.float) # 用于存放最终的频率特征数据 - data_line = np.zeros((1, 400), dtype=np.float) - for i in range(0, range0_end): - p_start = i * 160 - p_end = p_start + 400 - - data_line = wav_arr[0, p_start:p_end] - ''' - x=np.linspace(0, 400 - 1, 400, dtype = np.int64) - w = 0.54 - 0.46 * np.cos(2 * np.pi * (x) / (400 - 1) ) # 汉明窗 - data_line = data_line * w # 加窗 - ''' - data_line = np.abs(fft(data_line)) / wav_length - - data_input[i] = data_line[0:200] # 设置为400除以2的值(即200)是取一半数据,因为是对称的 - - # print(data_input.shape) - return data_input - - -x = np.linspace(0, 400 - 1, 400, dtype=np.int64) -w = 0.54 - 0.46 * np.cos(2 * np.pi * (x) / (400 - 1)) # 汉明窗 - - -def GetFrequencyFeature3(wavsignal, fs): - if(16000 != fs): - raise ValueError( - '[Error] ASRT currently only supports wav audio files with a sampling rate of 16000 Hz, but this audio is ' + str(fs) + ' Hz. ') - - # wav波形 加时间窗以及时移10ms - time_window = 25 # 单位ms - window_length = fs / 1000 * time_window # 计算窗长度的公式,目前全部为400固定值 - - wav_arr = np.array(wavsignal) - #wav_length = len(wavsignal[0]) - wav_length = wav_arr.shape[1] - - # 计算循环终止的位置,也就是最终生成的窗数 - range0_end = int(len(wavsignal[0])/fs*1000 - time_window) // 10 - data_input = np.zeros((range0_end, 200), dtype=np.float) # 用于存放最终的频率特征数据 - data_line = np.zeros((1, 400), dtype=np.float) - - for i in range(0, range0_end): - p_start = i * 160 - p_end = p_start + 400 - - data_line = wav_arr[0, p_start:p_end] - - data_line = data_line * w # 加窗 - - data_line = np.abs(fft(data_line)) / wav_length - - data_input[i] = data_line[0:200] # 设置为400除以2的值(即200)是取一半数据,因为是对称的 - - # print(data_input.shape) - data_input = np.log(data_input + 1) - return data_input - - -def GetFrequencyFeature4(wavsignal, fs): - ''' - 主要是用来修正3版的bug - ''' - if(16000 != fs): - raise ValueError( - '[Error] ASRT currently only supports wav audio files with a sampling rate of 16000 Hz, but this audio is ' + str(fs) + ' Hz. ') - - # wav波形 加时间窗以及时移10ms - time_window = 25 # 单位ms - window_length = fs / 1000 * time_window # 计算窗长度的公式,目前全部为400固定值 - - wav_arr = np.array(wavsignal) - #wav_length = len(wavsignal[0]) - wav_length = wav_arr.shape[1] - - # 计算循环终止的位置,也就是最终生成的窗数 - range0_end = int(len(wavsignal[0])/fs*1000 - time_window) // 10 + 1 - data_input = np.zeros((range0_end, window_length // 2), - dtype=np.float) # 用于存放最终的频率特征数据 - data_line = np.zeros((1, window_length), dtype=np.float) - - for i in range(0, range0_end): - p_start = i * 160 - p_end = p_start + 400 - - data_line = wav_arr[0, p_start:p_end] - - data_line = data_line * w # 加窗 - - data_line = np.abs(fft(data_line)) / wav_length - - # 设置为400除以2的值(即200)是取一半数据,因为是对称的 - data_input[i] = data_line[0: window_length // 2] - - # print(data_input.shape) - data_input = np.log(data_input + 1) - return data_input - - -def wav_scale(energy): - ''' - 语音信号能量归一化 - ''' - means = energy.mean() # 均值 - var = energy.var() # 方差 - e = (energy-means)/math.sqrt(var) # 归一化能量 - return e - - -def wav_scale2(energy): - ''' - 语音信号能量归一化 - ''' - maxnum = max(energy) - e = energy / maxnum - return e - - -def wav_scale3(energy): - ''' - 语音信号能量归一化 - ''' - for i in range(len(energy)): - # if i == 1: - # #print('wavsignal[0]:\n {:.4f}'.format(energy[1]),energy[1] is int) - energy[i] = float(energy[i]) / 100.0 - # if i == 1: - # #print('wavsignal[0]:\n {:.4f}'.format(energy[1]),energy[1] is int) - return energy - - -def wav_show(wave_data, fs): # 显示出来声音波形 - time = np.arange(0, len(wave_data)) * (1.0/fs) # 计算声音的播放时间,单位为秒 - # 画声音波形 - # plt.subplot(211) - plt.plot(time, wave_data) - # plt.subplot(212) - #plt.plot(time, wave_data[1], c = "g") - plt.show() - - -def get_wav_list(filename): - ''' - 读取一个wav文件列表,返回一个存储该列表的字典类型值 - ps:在数据中专门有几个文件用于存放用于训练、验证和测试的wav文件列表 - ''' - txt_obj = open(filename, 'r') # 打开文件并读入 - txt_text = txt_obj.read() - txt_lines = txt_text.split('\n') # 文本分割 - dic_filelist = {} # 初始化字典 - list_wavmark = [] # 初始化wav列表 - for i in txt_lines: - if(i != ''): - txt_l = i.split(' ') - dic_filelist[txt_l[0]] = txt_l[1] - list_wavmark.append(txt_l[0]) - txt_obj.close() - return dic_filelist, list_wavmark - - -def get_wav_symbol(filename): - ''' - 读取指定数据集中,所有wav文件对应的语音符号 - 返回一个存储符号集的字典类型值 - ''' - txt_obj = open(filename, 'r') # 打开文件并读入 - txt_text = txt_obj.read() - txt_lines = txt_text.split('\n') # 文本分割 - dic_symbol_list = {} # 初始化字典 - list_symbolmark = [] # 初始化symbol列表 - for i in txt_lines: - if(i != ''): - txt_l = i.split(' ') - dic_symbol_list[txt_l[0]] = txt_l[1:] - list_symbolmark.append(txt_l[0]) - txt_obj.close() - return dic_symbol_list, list_symbolmark - - -if(__name__ == '__main__'): - - wave_data, fs = read_wav_data("A2_0.wav") - - wav_show(wave_data[0], fs) - t0 = time.time() - freimg = GetFrequencyFeature3(wave_data, fs) - t1 = time.time() - print('time cost:', t1-t0) - - freimg = freimg.T - plt.subplot(111) - - plt.imshow(freimg) - plt.colorbar(cax=None, ax=None, shrink=0.5) - - plt.show() -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -''' -一些通用函数 -''' - -import difflib - - -def GetEditDistance(str1, str2): - leven_cost = 0 - s = difflib.SequenceMatcher(None, str1, str2) - for tag, i1, i2, j1, j2 in s.get_opcodes(): - #print('{:7} a[{}: {}] --> b[{}: {}] {} --> {}'.format(tag, i1, i2, j1, j2, str1[i1: i2], str2[j1: j2])) - if tag == 'replace': - leven_cost += max(i2-i1, j2-j1) - elif tag == 'insert': - leven_cost += (j2-j1) - elif tag == 'delete': - leven_cost += (i2-i1) - return leven_cost -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- -''' -感谢原作者的无私奉献 -来自: -https://www.jianshu.com/p/db0ba022936f -''' - -import tensorflow as tf -import keras -import keras.backend as K -import keras.layers as KL - - -class ParallelModel(keras.models.Model): - """Subclasses the standard Keras Model and adds multi-GPU support. - It works by creating a copy of the model on each GPU. Then it slices - the inputs and sends a slice to each copy of the model, and then - merges the outputs together and applies the loss on the combined - outputs. - """ - - def __init__(self, keras_model, gpu_count): - """Class constructor. - keras_model: The Keras model to parallelize - gpu_count: Number of GPUs. Must be > 1 - """ - super(ParallelModel, self).__init__( - ) # Thanks to @greatken999 for fixing bugs - self.inner_model = keras_model - self.gpu_count = gpu_count - merged_outputs = self.make_parallel() - super(ParallelModel, self).__init__(inputs=self.inner_model.inputs, - outputs=merged_outputs) - - def __getattribute__(self, attrname): - """Redirect loading and saving methods to the inner model. That's where - the weights are stored.""" - if 'load' in attrname or 'save' in attrname: - return getattr(self.inner_model, attrname) - return super(ParallelModel, self).__getattribute__(attrname) - - def summary(self, *args, **kwargs): - """Override summary() to display summaries of both, the wrapper - and inner models.""" - super(ParallelModel, self).summary(*args, **kwargs) - self.inner_model.summary(*args, **kwargs) - - def make_parallel(self): - """Creates a new wrapper model that consists of multiple replicas of - the original model placed on different GPUs. - """ - # Slice inputs. Slice inputs on the CPU to avoid sending a copy - # of the full inputs to all GPUs. Saves on bandwidth and memory. - input_slices = {name: tf.split(x, self.gpu_count) - for name, x in zip(self.inner_model.input_names, - self.inner_model.inputs)} - - output_names = self.inner_model.output_names - outputs_all = [] - for i in range(len(self.inner_model.outputs)): - outputs_all.append([]) - - # Run the model call() on each GPU to place the ops there - for i in range(self.gpu_count): - with tf.device('/gpu:%d' % i): - with tf.name_scope('tower_%d' % i): - # Run a slice of inputs through this replica - zipped_inputs = zip(self.inner_model.input_names, - self.inner_model.inputs) - inputs = [ - KL.Lambda(lambda s: input_slices[name][i], - output_shape=lambda s: (None,) + s[1:])(tensor) - for name, tensor in zipped_inputs] - # Create the model replica and get the outputs - outputs = self.inner_model(inputs) - if not isinstance(outputs, list): - outputs = [outputs] - # Save the outputs for merging back together later - for l, o in enumerate(outputs): - outputs_all[l].append(o) - - # Merge outputs on CPU - with tf.device('/cpu:0'): - merged = [] - for outputs, name in zip(outputs_all, output_names): - # If outputs are numbers without dimensions, add a batch dim. - def add_dim(tensor): - """Add a dimension to tensors that don't have any.""" - if K.int_shape(tensor) == (): - return KL.Lambda(lambda t: K.reshape(t, [1, 1]))(tensor) - return tensor - outputs = list(map(add_dim, outputs)) - - # Concatenate - merged.append(KL.Concatenate(axis=0, name=name)(outputs)) - return merged -#!/usr/bin/env python -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: setup.py -@time: 2019-01-24 16:42 - -""" -import pathlib - -from setuptools import find_packages, setup - -from version import __version__ - -# Package meta-data. -NAME = 'kashgari' -DESCRIPTION = 'Simple and powerful NLP framework, ' \ - 'build your state-of-art model in 5 minutes for ' \ - 'named entity recognition (NER), part-of-speech ' \ - 'tagging (PoS) and text classification tasks.' -URL = 'https://github.com/BrikerMan/Kashgari' -EMAIL = 'eliyar917@gmail.com' -AUTHOR = 'BrikerMan' -LICENSE = 'Apache License 2.0' - -HERE = pathlib.Path(__file__).parent -README = (HERE / "README.md").read_text() - -required = [ - 'Keras>=2.2.0', - 'h5py>=2.7.1', - 'keras-bert==0.41.0', - 'scikit-learn>=0.19.1', - 'numpy>=1.14.3', - 'download>=0.3.3', - 'seqeval >=0.0.3', - 'colorlog>=4.0.0', - 'gensim>=3.5.0', - # 'bz2file>=0.98', - 'sklearn', - 'pandas>=0.23.0', - 'keras-gpt-2==0.7.0' -] - -# long_description = "" - -setup( - name=NAME, - version=__version__, - description=DESCRIPTION, - long_description=README, - long_description_content_type="text/markdown", - author=AUTHOR, - author_email=EMAIL, - url=URL, - packages=find_packages(exclude=('tests',)), - install_requires=required, - include_package_data=True, - license=LICENSE, - classifiers=[ - 'License :: OSI Approved :: Apache Software License', - # 'Programming Language :: Python', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy' - ], -) - - -if __name__ == "__main__": - print("Hello world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: __version__.py -@time: 2019-02-21 15:22 - -""" - -__version__ = '0.2.3' -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: run_flask_api -@time: 2019-02-24 - -""" -import random -from flask import Flask, jsonify -from kashgari.tasks.classification import KMaxCNNModel -from kashgari.corpus import SMP2017ECDTClassificationCorpus - -train_x, train_y = SMP2017ECDTClassificationCorpus.get_classification_data() - -model = KMaxCNNModel() -model.fit(train_x, train_y) - - -app = Flask(__name__) - - -@app.route('/predict', methods=['GET']) -def get_tasks(): - x = random.choice(train_x) - y = model.predict(x, output_dict=True) - return jsonify({'x': x, 'y': y}) - - -if __name__ == '__main__': - # must run predict once before `app.run` to prevent predict error - model.predict(train_x[10]) - app.run(debug=True, port=8080) -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: __init__.py.py -@time: 2019-01-19 13:42 - -""" -import kashgari.embeddings -import kashgari.corpus -import kashgari.tasks - -from kashgari.tasks import classification -from kashgari.tasks import seq_labeling - -from kashgari.macros import config - - -if __name__ == "__main__": - print("Hello world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: corpus -@time: 2019-01-20 - -""" -import logging -import os -import re -from typing import Tuple, List - -import pandas as pd - -from kashgari.utils import helper - -DATA_TRAIN = 'train' -DATA_VALIDATE = 'validate' -DATA_TEST = 'test' - - -class Corpus(object): - __corpus_name__ = '' - __zip_file__name = '' - - __desc__ = '' - - @classmethod - def get_classification_data(cls, - is_test: bool = False, - shuffle: bool = True, - max_count: int = 0) -> Tuple[List[str], List[str]]: - pass - - # @classmethod - # def get_info(cls): - # raise NotImplementedError() - - -class TencentDingdangSLUCorpus(Corpus): - - __corpus_name__ = 'corpus/task-slu-tencent.dingdang-v1.1' - __zip_file__name = 'corpus/task-slu-tencent.dingdang-v1.1.tar.gz' - - __desc__ = """ Download from NLPCC 2018 Task4 dataset - details: http://tcci.ccf.org.cn/conference/2018/taskdata.php - The dataset adopted by this task is a sample of the real query log from a commercial - task-oriented dialog system. The data is all in Chinese. The evaluation includes three - domains, namely music, navigation and phone call. Within the dataset, an additional - domain label ‘OTHERS’ is used to annotate the data not covered by the three domains. To - simplify the task, we keep only the intents and the slots of high-frequency while ignoring - others although they appear in the original data. The entire data can be seen as a stream - of user queries ordered by time stamp. The stream is further split into a series of segments - according to the gaps of time stamps between queries and each segment is denoted as a - ‘session’. The contexts within a session are taken into consideration when a query within - the session was annotated. Below are two example sessions with annotations. - - sample - ``` - 1 打电话 phone_call.make_a_phone_call 打电话 - 1 我想听美观 music.play 我想听美观 - 1 我想听什话 music.play 我想听什话||神话 - 1 神话 music.play 神话 - - 2 播放调频广播 OTHERS 播放调频广播 - 2 给我唱一首一晃就老了 music.play 给我唱一首一晃就老了 - ``` - """ - - @classmethod - def get_info(cls): - folder_path = helper.cached_path( - cls.__corpus_name__, cls.__zip_file__name, ) - logging.info("""{} info\n dataset path: {}\n{}""".format(cls.__corpus_name__, - folder_path, - cls.__desc__)) - - @classmethod - def get_classification_data(cls, - data_type: str = DATA_TRAIN, - shuffle: bool = True, - cutter: str = 'char', - max_count: int = 0) -> Tuple[List[List[str]], List[str]]: - """ - - :param data_type: {train, validate, test} - :param shuffle: shuffle or not - :param cutter: - :param max_count: - :return: - """ - folder_path = helper.cached_path(cls.__corpus_name__, - cls.__zip_file__name) - if data_type not in [DATA_TRAIN, DATA_VALIDATE, DATA_TEST]: - raise ValueError('data_type error, please use one onf the {}'.format([DATA_TRAIN, - DATA_VALIDATE, - DATA_TEST])) - if cutter not in ['char', 'jieba', 'none']: - raise ValueError('data_type error, please use one onf the {}'.format([DATA_TRAIN, - DATA_VALIDATE, - DATA_TEST])) - file_path = os.path.join(folder_path, '{}.csv'.format(data_type)) - df = pd.read_csv(file_path) - x_data = df['text'].values - y_data = df['domain'].values - if shuffle: - x_data, y_data = helper.unison_shuffled_copies(x_data, y_data) - - if max_count != 0: - x_data = x_data[:max_count] - y_data = y_data[:max_count] - - if cutter == 'jieba': - try: - import jieba - except ModuleNotFoundError: - raise ModuleNotFoundError( - "please install jieba, `$ pip install jieba`") - x_data = [list(jieba.cut(item)) for item in x_data] - elif 'char': - x_data = [list(item) for item in x_data] - return x_data, y_data - - @staticmethod - def parse_ner_str(text: str) -> Tuple[str, str]: - pattern = '<(?P\\w*)>(?P[^<>]*)<\\/\\w*>' - x_list = [] - tag_list = [] - last_index = 0 - for m in re.finditer(pattern, text): - x_list += text[last_index:m.start()] - tag_list += ['O'] * (m.start() - last_index) - last_index = m.end() - dic = m.groupdict() - value = dic['value'].split('||')[0] - entity = dic['entity'] - x_list += list(value) - tag_list += ['P-' + entity] + ['I-' + entity] * (len(value) - 1) - if last_index < len(text): - x_list += list(text[last_index:]) - tag_list += len(text[last_index:]) * ['O'] - return ' '.join(x_list), ' '.join(tag_list) - - @classmethod - def get_sequence_tagging_data(cls, - is_test: bool = False, - shuffle: bool = True, - max_count: int = 0) -> Tuple[List[str], List[str]]: - folder_path = helper.cached_path(cls.__corpus_name__, - cls.__zip_file__name) - - if is_test: - file_path = os.path.join(folder_path, 'test.csv') - else: - file_path = os.path.join(folder_path, 'train.csv') - - df = pd.read_csv(file_path) - x_data = [] - y_data = [] - - for tagging_text in df['tagging']: - x_item, y_item = cls.parse_ner_str(tagging_text) - x_data.append(x_item) - y_data.append(y_item) - if shuffle: - x_data, y_data = helper.unison_shuffled_copies(x_data, y_data) - if max_count != 0: - x_data = x_data[:max_count] - y_data = y_data[:max_count] - return x_data, y_data - - -class ChinaPeoplesDailyNerCorpus(object): - __corpus_name__ = 'corpus/china-people-daily-ner-corpus' - __zip_file__name = 'corpus/china-people-daily-ner-corpus.tar.gz' - - __desc__ = """ - https://github.com/zjy-ucas/ChineseNER/ - """ - - @classmethod - def get_sequence_tagging_data(cls, - data_type: str = DATA_TRAIN, - shuffle: bool = True, - max_count: int = 0) -> Tuple[List[List[str]], List[List[str]]]: - folder_path = helper.cached_path(cls.__corpus_name__, - cls.__zip_file__name) - - if data_type == DATA_TRAIN: - file_path = os.path.join(folder_path, 'example.train') - elif data_type == DATA_TEST: - file_path = os.path.join(folder_path, 'example.test') - else: - file_path = os.path.join(folder_path, 'example.dev') - - data_x, data_y = [], [] - - with open(file_path, 'r', encoding='utf-8') as f: - lines = f.read().splitlines() - x, y = [], [] - for line in lines: - rows = line.split(' ') - if len(rows) == 1: - data_x.append(x) - data_y.append(y) - x = [] - y = [] - else: - x.append(rows[0]) - y.append(rows[1]) - return data_x, data_y - - -class CoNLL2003Corpus(Corpus): - __corpus_name__ = 'corpus/conll2003' - __zip_file__name = 'corpus/conll2003.tar.gz' - - @classmethod - def get_sequence_tagging_data(cls, - data_type: str = DATA_TRAIN, - task_name: str = 'ner', - shuffle: bool = True, - max_count: int = 0) -> Tuple[List[List[str]], List[List[str]]]: - folder_path = helper.cached_path(cls.__corpus_name__, - cls.__zip_file__name) - - if data_type not in [DATA_TRAIN, DATA_VALIDATE, DATA_TEST]: - raise ValueError('data_type error, please use one onf the {}'.format([DATA_TRAIN, - DATA_VALIDATE, - DATA_TEST])) - if task_name not in ['ner', 'pos', 'chunking']: - raise ValueError('data_type error, please use one onf the {}'.format( - ['ner', 'pos', 'chunking'])) - folder_path = os.path.join(folder_path, task_name) - if data_type == DATA_TRAIN: - file_path = os.path.join(folder_path, 'train.txt') - elif data_type == DATA_TEST: - file_path = os.path.join(folder_path, 'test.txt') - else: - file_path = os.path.join(folder_path, 'valid.txt') - x_list, y_list = _load_data_and_labels(file_path) - if shuffle: - x_list, y_list = helper.unison_shuffled_copies(x_list, y_list) - if max_count: - x_list = x_list[:max_count] - y_list = y_list[:max_count] - return x_list, y_list - - __desc__ = """ - http://ir.hit.edu.cn/smp2017ecdt-data - """ - - -class SMP2017ECDTClassificationCorpus(Corpus): - __corpus_name__ = 'corpus/smp2017ecdt-data-task1' - __zip_file__name = 'corpus/smp2017ecdt-data-task1.tar.gz' - - __desc__ = """ - http://ir.hit.edu.cn/smp2017ecdt-data - """ - - @classmethod - def get_classification_data(cls, - data_type: str = DATA_TRAIN, - shuffle: bool = True, - cutter: str = 'char', - max_count: int = 0) -> Tuple[List[List[str]], List[str]]: - """ - - :param data_type: {train, validate, test} - :param shuffle: shuffle or not - :param cutter: - :param max_count: - :return: - """ - folder_path = helper.cached_path(cls.__corpus_name__, - cls.__zip_file__name) - if data_type not in [DATA_TRAIN, DATA_VALIDATE, DATA_TEST]: - raise ValueError('data_type error, please use one onf the {}'.format([DATA_TRAIN, - DATA_VALIDATE, - DATA_TEST])) - if cutter not in ['char', 'jieba', 'none']: - raise ValueError('data_type error, please use one onf the {}'.format([DATA_TRAIN, - DATA_VALIDATE, - DATA_TEST])) - - file_path = os.path.join(folder_path, '{}.csv'.format(data_type)) - df = pd.read_csv(file_path) - x_data = df['text'].values - y_data = df['domain'].values - if shuffle: - x_data, y_data = helper.unison_shuffled_copies(x_data, y_data) - - if max_count != 0: - x_data = x_data[:max_count] - y_data = y_data[:max_count] - - if cutter == 'jieba': - try: - import jieba - except ModuleNotFoundError: - raise ModuleNotFoundError( - "please install jieba, `$ pip install jieba`") - x_data = [list(jieba.cut(item)) for item in x_data] - elif 'char': - x_data = [list(item) for item in x_data] - return x_data, y_data - - -def _load_data_and_labels(filename, encoding='utf-8'): - """Loads data and label from a file. - Args: - filename (str): path to the file. - encoding (str): file encoding format. - The file format is tab-separated values. - A blank line is required at the end of a sentence. - For example: - ``` - EU B-ORG - rejects O - German B-MISC - call O - to O - boycott O - British B-MISC - lamb O - . O - Peter B-PER - Blackburn I-PER - ... - ``` - Returns: - tuple(numpy array, numpy array): data and labels. - Example: - >>> filename = 'conll2003/en/ner/train.txt' - >>> data, labels = load_data_and_labels(filename) - """ - sents, labels = [], [] - words, tags = [], [] - with open(filename, encoding=encoding) as f: - for line in f: - line = line.rstrip() - if line: - word, tag = line.split('\t') - words.append(word) - tags.append(tag) - else: - sents.append(words) - labels.append(tags) - words, tags = [], [] - - return sents, labels - - -if __name__ == '__main__': - - # init_logger() - x, y = CoNLL2003Corpus.get_sequence_tagging_data() - for i in range(5): - print('{} -> {}'.format(x[i], y[i])) -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: layers -@time: 2019-02-23 - -""" -from __future__ import absolute_import, division -import logging - -import tensorflow as tf -from keras.layers import Flatten -from keras.layers import GRU, LSTM -from keras.layers import CuDNNGRU, CuDNNLSTM -from keras import initializers -from keras.engine import InputSpec, Layer -from keras import backend as K - -from kashgari.macros import config - -if config.use_CuDNN_cell: - GRULayer = CuDNNGRU - LSTMLayer = CuDNNLSTM -else: - GRULayer = GRU - LSTMLayer = LSTM - - -class AttentionWeightedAverage(Layer): - ''' - Computes a weighted average of the different channels across timesteps. - Uses 1 parameter pr. channel to compute the attention value for a single timestep. - ''' - - def __init__(self, return_attention=False, **kwargs): - self.init = initializers.get('uniform') - self.supports_masking = True - self.return_attention = return_attention - super(AttentionWeightedAverage, self).__init__(**kwargs) - - def build(self, input_shape): - self.input_spec = [InputSpec(ndim=3)] - assert len(input_shape) == 3 - - self.W = self.add_weight(shape=(input_shape[2], 1), - name='{}_w'.format(self.name), - initializer=self.init) - self.trainable_weights = [self.W] - super(AttentionWeightedAverage, self).build(input_shape) - - def call(self, x, mask=None): - # computes a probability distribution over the timesteps - # uses 'max trick' for numerical stability - # reshape is done to avoid issue with Tensorflow - # and 1-dimensional weights - logits = K.dot(x, self.W) - x_shape = K.shape(x) - logits = K.reshape(logits, (x_shape[0], x_shape[1])) - ai = K.exp(logits - K.max(logits, axis=-1, keepdims=True)) - - # masked timesteps have zero weight - if mask is not None: - mask = K.cast(mask, K.floatx()) - ai = ai * mask - att_weights = ai / (K.sum(ai, axis=1, keepdims=True) + K.epsilon()) - weighted_input = x * K.expand_dims(att_weights) - result = K.sum(weighted_input, axis=1) - if self.return_attention: - return [result, att_weights] - return result - - def get_output_shape_for(self, input_shape): - return self.compute_output_shape(input_shape) - - def compute_output_shape(self, input_shape): - output_len = input_shape[2] - if self.return_attention: - return [(input_shape[0], output_len), (input_shape[0], input_shape[1])] - return (input_shape[0], output_len) - - def compute_mask(self, input, input_mask=None): - if isinstance(input_mask, list): - return [None] * len(input_mask) - else: - return None - - -class KMaxPooling(Layer): - ''' - K-max pooling layer that extracts the k-highest activation from a sequence (2nd dimension). - TensorFlow backend. - - # Arguments - k: An int scale, - indicate k max steps of features to pool. - sorted: A bool, - if output is sorted (default) or not. - data_format: A string, - one of `channels_last` (default) or `channels_first`. - The ordering of the dimensions in the inputs. - `channels_last` corresponds to inputs with shape - `(batch, steps, features)` while `channels_first` - corresponds to inputs with shape - `(batch, features, steps)`. - # Input shape - - If `data_format='channels_last'`: - 3D tensor with shape: - `(batch_size, steps, features)` - - If `data_format='channels_first'`: - 3D tensor with shape: - `(batch_size, features, steps)` - # Output shape - 3D tensor with shape: - `(batch_size, top-k-steps, features)` - ''' - - def __init__(self, k=1, sorted=True, data_format='channels_last', **kwargs): - super(KMaxPooling, self).__init__(**kwargs) - self.input_spec = InputSpec(ndim=3) - self.k = k - self.sorted = sorted - self.data_format = K.normalize_data_format(data_format) - - # def build(self, input_shape): - # assert len(input_shape) == 3 - # super(KMaxPooling, self).build(input_shape) - - def compute_output_shape(self, input_shape): - if self.data_format == 'channels_first': - return (input_shape[0], self.k, input_shape[1]) - else: - return (input_shape[0], self.k, input_shape[2]) - - def call(self, inputs): - if self.data_format == 'channels_last': - # swap last two dimensions since top_k will be applied along the last dimension - shifted_input = tf.transpose(inputs, [0, 2, 1]) - - # extract top_k, returns two tensors [values, indices] - top_k = tf.nn.top_k(shifted_input, k=self.k, sorted=self.sorted)[0] - else: - top_k = tf.nn.top_k(inputs, k=self.k, sorted=self.sorted)[0] - # return flattened output - return tf.transpose(top_k, [0, 2, 1]) - - def get_config(self): - config = {'k': self.k, - 'sorted': self.sorted, - 'data_format': self.data_format} - base_config = super(KMaxPooling, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -class NonMaskingLayer(Layer): - """ - fix convolutional 1D can't receive masked input, detail: https://github.com/keras-team/keras/issues/4978 - thanks for https://github.com/jacoxu - """ - - def __init__(self, **kwargs): - self.supports_masking = True - super(NonMaskingLayer, self).__init__(**kwargs) - - def build(self, input_shape): - pass - - def compute_mask(self, input, input_mask=None): - # do not pass the mask to the next layers - return None - - def call(self, x, mask=None): - return x - - def get_output_shape_for(self, input_shape): - return input_shape - - -if __name__ == '__main__': - print("hello, world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: macros.py -@time: 2019-01-19 09:58 - -""" -import bz2 -import os -import pathlib -from enum import Enum -from pathlib import Path - -import download - -PAD = "[PAD]" -BOS = "[BOS]" -EOS = "[EOS]" -UNK = "[UNK]" - -MARKED_KEYS = [PAD, BOS, EOS, UNK] - -NO_TAG = 'O' - -home = str(Path.home()) - -DATA_PATH = os.path.join(home, '.kashgari') -STORAGE_HOST = 'http://storage.eliyar.biz/' -PROCESSED_CORPUS_PATH = os.path.join(DATA_PATH, 'pre_processed') - -pathlib.Path(PROCESSED_CORPUS_PATH).mkdir(parents=True, exist_ok=True) - - -class _Config(object): - def __init__(self): - self.use_CuDNN_cell = False - self.sequence_labeling_tokenize_add_bos_eos = False - - -config = _Config() - - -class CustomEmbedding(object): - def __init__(self, embedding_size=100): - self.embedding_size = embedding_size - - -class TaskType(Enum): - classification = 'classification' - tagging = 'tagging' - - -class DataSetType(Enum): - train = 'train' - test = 'test' - validate = 'validate' - - -class SegmenterType(Enum): - space = 'space' - jieba = 'jieba' - char = 'char' - - -URL_MAP = { - 'w2v.sgns.weibo.bigram': 'embedding/word2vev/sgns.weibo.bigram.bz2' -} - - -def download_file(file: str): - url = STORAGE_HOST + file - target_path = os.path.join(DATA_PATH, file) - download.download(url, target_path) - - -def download_if_not_existed(file_path: str) -> str: - target_path = os.path.join(DATA_PATH, file_path) - if not os.path.exists(target_path[:-4]): - download_file(file_path) - with open(target_path, 'rb') as source, open(target_path[:-4], 'wb') as dest: - dest.write(bz2.decompress(source.read())) - return target_path[:-4] - - -def get_model_path(file: str) -> str: - file_path = URL_MAP.get(file, file) - return download_if_not_existed(file_path) - - -if __name__ == "__main__": - from kashgari.utils.logger import init_logger - init_logger() -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: type_hints.py -@time: 2019-01-21 13:55 - -""" -from typing import Union, List - -# ClassificationXType = Union[List[List[str]], List[str]] -# ClassificationYType = List[str] - -TextSeqType = List[str] -TokenSeqType = List[int] - -TextSeqInputType = Union[List[TextSeqType], TextSeqType] -TokenSeqInputType = Union[List[TokenSeqType], TokenSeqType] - - -if __name__ == "__main__": - print("Hello world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: test_classifier_models.py -@time: 2019-01-27 13:28 - -""" -import time -import os -import random -import logging -import tempfile -import unittest - -from kashgari.embeddings import WordEmbeddings, BERTEmbedding - -from kashgari.tasks.classification import BLSTMModel, CNNLSTMModel, CNNModel -from kashgari.tasks.classification import AVCNNModel, KMaxCNNModel, RCNNModel, AVRNNModel -from kashgari.tasks.classification import DropoutBGRUModel, DropoutAVRNNModel - - -from kashgari.utils.logger import init_logger -init_logger() - - -SEQUENCE_LENGTH = 30 - -train_x = [ - list('语言学(英语:linguistics)是一门关于人类语言的科学研究'), - list('语言学(英语:linguistics)是一门关于人类语言的科学研究'), - list('语言学(英语:linguistics)是一门关于人类语言的科学研究'), - list('语言学包含了几种分支领域。'), - list('在语言结构(语法)研究与意义(语义与语用)研究之间存在一个重要的主题划分'), -] -train_y = ['a', 'a', 'a', 'b', 'c'] -train_multi_y = [['b', 'c'], ['a'], ['a', 'c'], ['a', 'b'], ['c']] - -eval_x = [ - list('语言学是一门关于人类语言的科学研究。'), - list('语言学包含了几种分支领域。'), - list('在语言结构研究与意义研究之间存在一个重要的主题划分。'), - list('语法中包含了词法,句法以及语音。'), - list('语音学是语言学的一个相关分支,它涉及到语音与非语音声音的实际属性,以及它们是如何发出与被接收到的。'), - list('与学习语言不同,语言学是研究所有人类语文发展有关的一门学术科目。'), - list('在语言结构(语法)研究与意义(语义与语用)研究之间存在一个重要的主题划分'), -] - -eval_y = ['a', 'a', 'a', 'b', 'c', 'a', 'c'] -eval_multi_y = [['b', 'c'], ['a'], ['a', 'c'], ['a', 'b'], ['c'], ['b'], ['a']] - - -class EmbeddingManager(object): - word2vec_embedding = None - bert_embedding = None - - @classmethod - def get_bert(cls): - if cls.bert_embedding is None: - cls.bert_embedding = BERTEmbedding( - 'bert-base-chinese', sequence_length=15) - logging.info('bert_embedding seq len: {}'.format( - cls.bert_embedding.sequence_length)) - return cls.bert_embedding - - @classmethod - def get_w2v(cls): - if cls.word2vec_embedding is None: - cls.word2vec_embedding = WordEmbeddings( - 'sgns.weibo.bigram', sequence_length=SEQUENCE_LENGTH, limit=5000) - return cls.word2vec_embedding - - -class TestBLSTMModelModelBasic(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.epochs = 2 - cls.model_class = BLSTMModel - cls.model = cls.model_class() - - def test_fit(self): - self.model.fit(train_x, train_y, eval_x, eval_y, epochs=self.epochs) - - def test_save_and_load(self): - self.test_fit() - model_path = os.path.join( - tempfile.gettempdir(), 'kashgari_model', str(time.time())) - self.model.save(model_path) - new_model = BLSTMModel.load_model(model_path) - assert new_model is not None - sentence = list('语言学包含了几种分支领域。') - result = new_model.predict(sentence) - assert isinstance(result, str) - - def test_w2v_embedding(self): - embedding = EmbeddingManager.get_w2v() - w2v_model = self.model_class(embedding) - w2v_model.fit(train_x, train_y, epochs=1) - assert len(w2v_model.label2idx) == 4 - assert len(w2v_model.token2idx) > 4 - - sentence = list('语言学包含了几种分支领域。') - assert isinstance(w2v_model.predict(sentence), str) - assert isinstance(w2v_model.predict([sentence]), list) - logging.info('test predict: {} -> {}'.format(sentence, - self.model.predict(sentence))) - w2v_model.predict(sentence, output_dict=True) - w2v_model.predict(sentence, output_dict=False) - - def test_build_multi_gpu_model(self): - self.model.build_model(train_x, train_y, eval_x, eval_y) - # self.model.build_multi_gpu_model(2) - logging.info(self.model) - - def test_multi_label_model(self): - multi_label_model = self.model_class(multi_label=True) - multi_label_model.fit(train_x, train_multi_y, - eval_x, eval_multi_y, epochs=2) - assert isinstance(multi_label_model.predict(train_x[0]), tuple) - - model_path = os.path.join( - tempfile.gettempdir(), 'kashgari_model', str(time.time())) - multi_label_model.save(model_path) - new_model = BLSTMModel.load_model(model_path) - assert new_model is not None - sentence = list('语言学包含了几种分支领域。') - result = new_model.predict(sentence) - assert isinstance(result, tuple) - - @classmethod - def tearDownClass(cls): - del cls.model - logging.info('tearDownClass {}'.format(cls)) - - -class TestAllCNNModelModel(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.epochs = 2 - cls.model_class = CNNModel - cls.model = cls.model_class() - - def test_build(self): - self.model.build_model(train_x, train_y) - self.model.fit(train_x, train_y, epochs=1) - assert len(self.model.label2idx) == 4 - assert len(self.model.token2idx) > 4 - - def test_fit(self): - self.model.fit(train_x, train_y, eval_x, eval_y, epochs=self.epochs) - - def test_fit_class_weight(self): - self.model.fit(train_x, train_y, eval_x, eval_y, - class_weight=True, batch_size=128, epochs=2) - - def test_label_token_convert(self): - self.test_fit() - assert isinstance(self.model.convert_label_to_idx('a'), int) - assert isinstance(self.model.convert_idx_to_label(1), str) - assert all(isinstance(i, int) - for i in self.model.convert_label_to_idx(['a'])) - assert all(isinstance(i, str) - for i in self.model.convert_idx_to_label([1, 2])) - - sentence = random.choice(eval_x) - tokens = self.model.embedding.tokenize(sentence) - assert min(30, len(sentence)+2) == min(len(tokens), SEQUENCE_LENGTH) - - def test_predict(self): - self.test_fit() - sentence = list('语言学包含了几种分支领域。') - assert isinstance(self.model.predict(sentence), str) - assert isinstance(self.model.predict([sentence]), list) - logging.info('test predict: {} -> {}'.format(sentence, - self.model.predict(sentence))) - self.model.predict(sentence, output_dict=True) - - def test_eval(self): - self.test_fit() - self.model.evaluate(eval_x, eval_y) - - def test_save_and_load(self): - self.test_fit() - model_path = os.path.join( - tempfile.gettempdir(), 'kashgari_model', str(time.time())) - self.model.save(model_path) - new_model = BLSTMModel.load_model(model_path) - assert new_model is not None - sentence = list('语言学包含了几种分支领域。') - result = new_model.predict(sentence) - assert isinstance(result, str) - - # def test_bert_embedding(self): - # embedding = EmbeddingManager.get_bert() - # bert_model = self.model_class(embedding) - # bert_model.fit(train_x, train_y, epochs=1) - # assert len(bert_model.label2idx) == 4 - # assert len(bert_model.token2idx) > 4 - # - # sentence = list('语言学包含了几种分支领域。') - # assert isinstance(bert_model.predict(sentence), str) - # assert isinstance(bert_model.predict([sentence]), list) - # logging.info('test predict: {} -> {}'.format(sentence, self.model.predict(sentence))) - # bert_model.predict(sentence, output_dict=True) - # bert_model.predict(sentence, output_dict=False) - - def test_w2v_embedding(self): - embedding = EmbeddingManager.get_w2v() - w2v_model = self.model_class(embedding) - w2v_model.fit(train_x, train_y, epochs=1) - assert len(w2v_model.label2idx) == 4 - assert len(w2v_model.token2idx) > 4 - - sentence = list('语言学包含了几种分支领域。') - assert isinstance(w2v_model.predict(sentence), str) - assert isinstance(w2v_model.predict([sentence]), list) - logging.info('test predict: {} -> {}'.format(sentence, - self.model.predict(sentence))) - w2v_model.predict(sentence, output_dict=True) - w2v_model.predict(sentence, output_dict=False) - - def test_multi_label_model(self): - multi_label_model = self.model_class(multi_label=True) - multi_label_model.fit(train_x, train_multi_y, - eval_x, eval_multi_y, epochs=2) - assert isinstance(multi_label_model.predict(train_x[0]), tuple) - - model_path = os.path.join( - tempfile.gettempdir(), 'kashgari_model', str(time.time())) - multi_label_model.save(model_path) - new_model = BLSTMModel.load_model(model_path) - assert new_model is not None - sentence = list('语言学包含了几种分支领域。') - result = new_model.predict(sentence) - assert isinstance(result, tuple) - - @classmethod - def tearDownClass(cls): - del cls.model - logging.info('tearDownClass {}'.format(cls)) - - -class TestCNNLSTMModelBasic(TestBLSTMModelModelBasic): - - @classmethod - def setUpClass(cls): - cls.epochs = 2 - cls.model_class = CNNLSTMModel - cls.model = cls.model_class() - - -class TestCNNModelBasic(TestBLSTMModelModelBasic): - - @classmethod - def setUpClass(cls): - cls.epochs = 2 - cls.model_class = CNNModel - cls.model = cls.model_class() - - -class TestAVCNNModelBasic(TestBLSTMModelModelBasic): - - @classmethod - def setUpClass(cls): - cls.epochs = 2 - cls.model_class = AVCNNModel - cls.model = cls.model_class() - - -class TestKMaxCNNModelBasic(TestBLSTMModelModelBasic): - - @classmethod - def setUpClass(cls): - cls.epochs = 2 - cls .model_class = KMaxCNNModel - cls.model = cls.model_class() - - -class TestRCNNModelBasic(TestBLSTMModelModelBasic): - - @classmethod - def setUpClass(cls): - cls.epochs = 2 - cls.model_class = RCNNModel - cls.model = cls.model_class() - - -class TestAVRNNModelBasic(TestBLSTMModelModelBasic): - - @classmethod - def setUpClass(cls): - cls.epochs = 2 - cls.model_class = AVRNNModel - cls.model = cls.model_class() - - -class TestDropoutBGRUModelBasic(TestBLSTMModelModelBasic): - - @classmethod - def setUpClass(cls): - cls.epochs = 2 - cls.model_class = DropoutBGRUModel - cls.model = cls.model_class() - - -class TestDropoutAVRNNModelBasic(TestBLSTMModelModelBasic): - - @classmethod - def setUpClass(cls): - cls.epochs = 2 - cls.model_class = DropoutAVRNNModel - cls.model = cls.model_class() -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: test_corpus.py -@time: 2019-01-31 13:56 - -""" -import unittest - -from kashgari.corpus import TencentDingdangSLUCorpus -from kashgari.corpus import ChinaPeoplesDailyNerCorpus -from kashgari.corpus import CoNLL2003Corpus -from kashgari.corpus import SMP2017ECDTClassificationCorpus - - -class TestTencentDingdangSLUCorpus(unittest.TestCase): - def test_get_classification_data(self): - train_x, train_y = TencentDingdangSLUCorpus.get_classification_data( - 'train') - assert len(train_x) == len(train_y) - assert len(train_x) > 0 - - test_x, test_y = TencentDingdangSLUCorpus.get_classification_data( - 'test') - assert len(test_x) == len(test_y) - - def test_get_sequence_tagging_data(self): - train_x, train_y = TencentDingdangSLUCorpus.get_sequence_tagging_data( - is_test=False) - assert len(train_x) == len(train_y) - assert len(train_x) > 0 - - -class TestChinaPeoplesDailyNerCorpus(unittest.TestCase): - def test_ner_data(self): - train_x, train_y = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data( - 'train') - assert len(train_x) == len(train_y) - assert len(train_x) > 0 - - test_x, test_y = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data( - 'test') - assert len(test_x) == len(test_y) - assert len(test_x) > 0 - - -class TestCoNLL2003Corpus(unittest.TestCase): - def test_ner_data(self): - train_x, train_y = CoNLL2003Corpus.get_sequence_tagging_data('train') - assert len(train_x) == len(train_y) - assert len(train_x) > 0 - - test_x, test_y = CoNLL2003Corpus.get_sequence_tagging_data('test') - assert len(test_x) == len(test_y) - assert len(test_x) > 0 - - -class TestSMP2017ECDTClassificationCorpus(unittest.TestCase): - def test_ner_data(self): - train_x, train_y = SMP2017ECDTClassificationCorpus.get_classification_data( - 'train') - assert len(train_x) == len(train_y) - assert len(train_x) > 0 - - test_x, test_y = SMP2017ECDTClassificationCorpus.get_classification_data( - 'train') - assert len(test_x) == len(test_y) - assert len(test_x) > 0 - - -if __name__ == "__main__": - unittest.main() -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: test_embeddings.py -@time: 2019-01-27 13:05 - -""" -import os -import unittest -import logging -import kashgari.macros as k -from kashgari.embeddings import WordEmbeddings, BERTEmbedding, CustomEmbedding, BaseEmbedding, TwoHeadEmbedding -from kashgari.utils.logger import init_logger -init_logger() - -SEQUENCE_LENGTH = 30 -TEST_DIR = os.path.dirname(os.path.realpath(__file__)) - - -class TestWordEmbeddings(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.embedding = WordEmbeddings('sgns.weibo.bigram-char', - sequence_length=SEQUENCE_LENGTH, - limit=1000) - - def test_build(self): - # self.setup() - assert self.embedding.idx2token[0] == k.PAD - assert self.embedding.idx2token[1] == k.BOS - assert self.embedding.idx2token[2] == k.EOS - assert self.embedding.idx2token[3] == k.UNK - - def test_tokenize(self): - sentence = ['我', '想', '看', '电影', '%%##!$#%'] - tokens = self.embedding.tokenize(sentence) - - logging.info('tokenize test: {} -> {}'.format(sentence, tokens)) - assert len(tokens) == len(tokens) - assert tokens[-2] == self.embedding.token2idx[k.UNK] - - token_list = self.embedding.tokenize([sentence]) - assert len(token_list[0]) == len(sentence) + 2 - - def test_embed(self): - sentence = ['我', '想', '看', '电影', '%%##!$#%'] - embedded_sentence = self.embedding.embed(sentence) - embedded_sentences = self.embedding.embed([sentence]) - logging.info( - 'embed test: {} -> {}'.format(sentence, embedded_sentence)) - assert embedded_sentence.shape == ( - SEQUENCE_LENGTH, self.embedding.embedding_size) - assert embedded_sentences.shape == ( - 1, SEQUENCE_LENGTH, self.embedding.embedding_size) - - -class TestBERTEmbedding(TestWordEmbeddings): - @classmethod - def setUpClass(cls): - bert_path = 'chinese_L-12_H-768_A-12' - cls.embedding = BERTEmbedding(bert_path, - sequence_length=SEQUENCE_LENGTH) - - def test_build(self): - assert self.embedding.embedding_size > 0 - assert self.embedding.token2idx[k.PAD] == 0 - assert self.embedding.token2idx[k.BOS] > 0 - assert self.embedding.token2idx[k.EOS] > 0 - assert self.embedding.token2idx[k.UNK] > 0 - - -class TestCustomEmbedding(TestWordEmbeddings): - @classmethod - def setUpClass(cls): - cls.embedding = CustomEmbedding('empty_embedding', - sequence_length=SEQUENCE_LENGTH, - embedding_size=100) - - corpus = [['我', '们', '变', '而', '以', '书', '会', '友', ',', '以', '书', '结', '缘', ',', - '把', '欧', '美', '、', '港', '台', '流', '行', '的', - '食', '品', '类', '图', '谱', '、', '画', '册', '、', - '工', '具', '书', '汇', '集', '一', '堂', '。'], - ['为', '了', '跟', '踪', '国', '际', '最', '新', '食', '品', - '工', '艺', '、', '流', '行', '趋', '势', ',', '大', '量', - '搜', '集', '海', '外', '专', '业', '书', '刊', '资', '料', - '是', '提', '高', '技', '艺', '的', '捷', '径', '。'], - ['其', '中', '线', '装', '古', '籍', '逾', '千', '册', - ';', '民', '国', '出', '版', '物', '几', '百', '种', - ';', '珍', '本', '四', '册', '、', '稀', '见', '本', - '四', '百', '余', '册', ',', '出', '版', '时', '间', - '跨', '越', '三', '百', '余', '年', '。'], - ['有', '的', '古', '木', '交', '柯', ',', - '春', '机', '荣', '欣', ',', '从', '诗', - '人', '句', '中', '得', '之', ',', '而', - '入', '画', '中', ',', '观', '之', '令', '人', '心', '驰', '。', '我']] - cls.embedding.build_token2idx_dict(x_data=corpus, min_count=2) - - def test_build(self): - assert self.embedding.token_count == 33 - super(TestCustomEmbedding, self).test_build() - - -class TestTwoHeadEmbedding(TestWordEmbeddings): - @classmethod - def setUpClass(cls): - cls.embedding = TwoHeadEmbedding('empty_embedding', - sequence_length=[ - SEQUENCE_LENGTH, SEQUENCE_LENGTH], - embedding_size=100) - corpus1 = [['我', '们', '变', '而', '以', '书', '会', '友', ',', '以', '书', '结', '缘', ',', - '把', '欧', '美', '、', '港', '台', '流', '行', '的', - '食', '品', '类', '图', '谱', '、', '画', '册', '、', - '工', '具', '书', '汇', '集', '一', '堂', '。'], - ['为', '了', '跟', '踪', '国', '际', '最', '新', '食', '品', - '工', '艺', '、', '流', '行', '趋', '势', ',', '大', '量', - '搜', '集', '海', '外', '专', '业', '书', '刊', '资', '料', - '是', '提', '高', '技', '艺', '的', '捷', '径', '。']] - corpus2 = [['其', '中', '线', '装', '古', '籍', '逾', '千', '册', - ';', '民', '国', '出', '版', '物', '几', '百', '种', - ';', '珍', '本', '四', '册', '、', '稀', '见', '本', - '四', '百', '余', '册', ',', '出', '版', '时', '间', - '跨', '越', '三', '百', '余', '年', '。'], - ['有', '的', '古', '木', '交', '柯', ',', - '春', '机', '荣', '欣', ',', '从', '诗', - '人', '句', '中', '得', '之', ',', '而', - '入', '画', '中', ',', '观', '之', '令', '人', '心', '驰', '。', '我']] - cls.embedding.build_token2idx_dict( - x_data=[corpus1, corpus2], min_count=2) - - def test_build(self): - assert self.embedding.token_count == 33 - super(TestTwoHeadEmbedding, self).test_build() - - def test_embed(self): - sentence1 = ['我', '想', '看', '电影', '%%##!$#%'] - sentence2 = ['我', '不', '看', '电影', '%%##!$#%'] - sentences = [[sentence1], [sentence2]] - embedded_sentences = self.embedding.embed(sentences) - logging.info( - 'embed test: {} -> {}'.format(sentences, embedded_sentences)) - # assert embedded_sentence.shape == (SEQUENCE_LENGTH, self.embedding.embedding_size) - assert embedded_sentences.shape == ( - 1, SEQUENCE_LENGTH*2, self.embedding.embedding_size) - - -if __name__ == "__main__": - unittest.main() -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: test_seq_labeling_models.py -@time: 2019-01-27 13:55 - -""" -import os -import time -import logging -import tempfile -import unittest - -from kashgari.embeddings import WordEmbeddings, BERTEmbedding -from kashgari.tasks.seq_labeling import CNNLSTMModel, BLSTMModel, BLSTMCRFModel -from kashgari.utils.logger import init_logger - -init_logger() - - -train_x = [ - ['我', '们', '变', '而', '以', '书', '会', '友', ',', '以', '书', '结', '缘', ',', '把', '欧', '美', - '、', '港', '台', '流', '行', '的', '食', '品', '类', '图', '谱', '、', '画', '册', '、', '工', '具', - '书', '汇', '集', '一', '堂', '。'], - ['鲁', '宾', '明', '确', '指', '出', ',', '对', '政', '府', '的', '这', '种', '指', '控', '完', '全', '没', - '有', '事', '实', '根', '据', ',', '美', '国', '政', '府', '不', '想', '也', '没', '有', '向', '中', '国', - '转', '让', '敏', '感', '技', '术', ',', '事', '实', '真', '相', '总', '有', '一', '天', '会', '大', '白', - '于', '天', '下', ';', '众', '议', '院', '的', '这', '种', '做', '法', '令', '人', '“', '非', '常', '失', - '望', '”', ',', '将', '使', '美', '国', '的', '商', '业', '卫', '星', '产', '业', '受', '到', '威', '胁', - ',', '使', '美', '国', '的', '竞', '争', '力', '受', '到', '损', '害', '。'], - ['今', '年', '年', '初', ',', '党', '中', '央', '、', '国', '务', '院', '根', '据', '国', '内', '外', '经', - '济', '形', '势', '的', '变', '化', ',', '及', '时', '作', '出', '扩', '大', '内', '需', '、', '保', '持', - '经', '济', '持', '续', '快', '速', '增', '长', '的', '重', '大', '决', '策', '。'], - ['我', '们', '变', '而', '以', '书', '会', '友', ',', '以', '书', '结', '缘', ',', '把', '欧', '美', - '、', '港', '台', '流', '行', '的', '食', '品', '类', '图', '谱', '、', '画', '册', '、', '工', '具', - '书', '汇', '集', '一', '堂', '。'], - ['我', '们', '变', '而', '以', '书', '会', '友', ',', '以', '书', '结', '缘', ',', '把', '欧', '美', - '、', '港', '台', '流', '行', '的', '食', '品', '类', '图', '谱', '、', '画', '册', '、', '工', '具', - '书', '汇', '集', '一', '堂', '。'], - ['为', '了', '跟', '踪', '国', '际', '最', '新', '食', '品', '工', '艺', '、', '流', '行', '趋', '势', - ',', '大', '量', '搜', '集', '海', '外', '专', '业', '书', '刊', '资', '料', '是', '提', '高', '技', - '艺', '的', '捷', '径', '。'], - ['其', '中', '线', '装', '古', '籍', '逾', '千', '册', ';', '民', '国', '出', '版', '物', '几', '百', - '种', ';', '珍', '本', '四', '册', '、', '稀', '见', '本', '四', '百', '余', '册', ',', '出', '版', - '时', '间', '跨', '越', '三', '百', '余', '年', '。'] -] - -train_y = [ - ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'B-LOC', - 'O', 'B-LOC', 'B-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O'], - ['B-PER', 'I-PER', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'I-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', - 'B-LOC', 'I-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'I-LOC', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'I-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], - ['O', 'O', 'O', 'O', 'O', 'B-ORG', 'I-ORG', 'I-ORG', 'O', 'B-ORG', 'I-ORG', 'I-ORG', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], - ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'B-LOC', - 'O', 'B-LOC', 'B-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O'], - - ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-LOC', 'B-LOC', - 'O', 'B-LOC', 'B-LOC', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O'], - ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'], - ['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', - 'O', 'O', 'O'] -] - -eval_x = train_x -eval_y = train_y - -SEQUENCE_LENGTH = 15 - - -class EmbeddingManager(object): - word2vec_embedding = None - bert_embedding = None - - @classmethod - def get_bert(cls): - if cls.bert_embedding is None: - dir_path = os.path.dirname(os.path.realpath(__file__)) - bert_path = os.path.join(dir_path, 'data', 'test_bert_checkpoint') - cls.bert_embedding = BERTEmbedding( - bert_path, sequence_length=SEQUENCE_LENGTH) - return cls.bert_embedding - - @classmethod - def get_w2v(cls): - if cls.word2vec_embedding is None: - cls.word2vec_embedding = WordEmbeddings( - 'sgns.weibo.bigram', sequence_length=SEQUENCE_LENGTH, limit=5000) - return cls.word2vec_embedding - - -class TestCNNLSTMModel(unittest.TestCase): - @classmethod - def setUpClass(cls): - cls.epochs = 3 - cls.model = CNNLSTMModel() - - def test_build(self): - self.model.fit(train_x, train_y, epochs=1) - self.assertEqual(len(self.model.label2idx), 10) - self.assertGreater(len(self.model.token2idx), 4) - - def test_fit(self): - self.model.fit(train_x, train_y, x_validate=eval_x, - y_validate=eval_y, epochs=self.epochs) - - def test_label_token_convert(self): - self.test_fit() - sentence = list('在语言结构(语法)研究与意义(语义与语用)研究之间存在一个重要的主题划分') - idxs = self.model.embedding.tokenize(sentence) - self.assertEqual(min(len(sentence), self.model.embedding.sequence_length), - min(len(idxs)-2, self.model.embedding.sequence_length)) - tokens = self.model.embedding.tokenize(sentence) - self.assertEqual(len(sentence)+2, len(tokens)) - - def test_predict(self): - self.test_fit() - sentence = list('语言学包含了几种分支领域。') - result = self.model.predict(sentence) - logging.info('test predict: {} -> {}'.format(sentence, result)) - self.assertTrue(isinstance(self.model.predict(sentence)[0], str)) - self.assertTrue(isinstance(self.model.predict([sentence])[0], list)) - self.assertEqual(len(self.model.predict(sentence)), len(sentence)) - self.model.predict(sentence, output_dict=True) - - def test_eval(self): - self.test_fit() - self.model.evaluate(eval_x, eval_y, debug_info=True) - - def test_save_and_load(self): - self.test_fit() - model_path = os.path.join( - tempfile.gettempdir(), 'kashgari_model', str(time.time())) - self.model.save(model_path) - new_model = BLSTMModel.load_model(model_path) - self.assertIsNotNone(new_model) - sentence = list('语言学包含了几种分支领域。') - result = new_model.predict(sentence) - self.assertTrue(isinstance(result[0], str)) - self.assertEqual(len(sentence), len(result)) - - @classmethod - def tearDownClass(cls): - del cls.model - logging.info('tearDownClass {}'.format(cls)) - - -class TestCNNLSTMModelWithWord2Vec(TestCNNLSTMModel): - - @classmethod - def setUpClass(cls): - cls.epochs = 3 - embedding = EmbeddingManager.get_w2v() - cls.model = CNNLSTMModel(embedding) - - -class TestLSTMCNNModelWithBERT(TestCNNLSTMModel): - - @classmethod - def setUpClass(cls): - cls.epochs = 1 - embedding = EmbeddingManager.get_bert() - cls.model = CNNLSTMModel(embedding) - - -class TestBLSTMModel(TestCNNLSTMModel): - @classmethod - def setUpClass(cls): - cls.epochs = 3 - cls.model = BLSTMModel() - - -class TestBLSTMModelWithWord2Vec(TestCNNLSTMModel): - @classmethod - def setUpClass(cls): - cls.epochs = 3 - embedding = EmbeddingManager.get_w2v() - cls.model = BLSTMModel(embedding) - - -class TestBLSTMModelWithBERT(TestCNNLSTMModel): - @classmethod - def setUpClass(cls): - cls.epochs = 1 - embedding = EmbeddingManager.get_bert() - cls.model = BLSTMModel(embedding) - - -class TestBLSTMCRFModel(TestCNNLSTMModel): - @classmethod - def setUpClass(cls): - cls.epochs = 5 - cls.model = BLSTMCRFModel() - - -class TestBLSTMCRFModelWithWord2Vec(TestCNNLSTMModel): - @classmethod - def setUpClass(cls): - cls.epochs = 5 - embedding = EmbeddingManager.get_w2v() - cls.model = BLSTMCRFModel(embedding) - - -class TestBLSTMCRFModelWithBERT(TestCNNLSTMModel): - @classmethod - def setUpClass(cls): - cls.epochs = 5 - embedding = EmbeddingManager.get_bert() - cls.model = BLSTMCRFModel(embedding) - - -if __name__ == "__main__": - unittest.main() -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: __init__.py.py -@time: 2019-01-19 09:57 - -""" -from .embeddings import BERTEmbedding -from .embeddings import BaseEmbedding -from .embeddings import CustomEmbedding -from .embeddings import WordEmbeddings -from .embeddings import TwoHeadEmbedding -from .embeddings import GPT2Embedding -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: embedding -@time: 2019-01-20 - -""" -import json -import logging -import os -from typing import Dict, Any -from itertools import chain -from collections import Counter - -import keras_bert -import numpy as np -from gensim.models import KeyedVectors -from keras.layers import Input, Embedding, concatenate -from keras.models import Model -from keras.preprocessing import sequence -from keras_gpt_2 import load_trained_model_from_checkpoint, get_bpe_from_files, BytePairEncoding - -import kashgari.macros as k -from kashgari.type_hints import * -from kashgari.utils import helper -from kashgari.layers import NonMaskingLayer - - -EMBEDDINGS_PATH = os.path.join(k.DATA_PATH, 'embedding') - - -class BaseEmbedding(object): - base_dict = { - k.PAD: 0, - k.BOS: 1, - k.EOS: 2, - k.UNK: 3 - } - - special_tokens = { - k.PAD: k.PAD, - k.UNK: k.UNK, - k.BOS: k.BOS, - k.EOS: k.EOS - } - - def __init__(self, - name_or_path: str, - sequence_length: int = None, - embedding_size: int = None, - **kwargs): - """ - init a WordEmbedding - :param name_or_path: model name as `sgns.weibo.bigram` or model path like '/home/brikerman/w2v.model - :param sequence_length: length of max sequence, all embedding is shaped as (sequence_length, embedding_size) - :param embedding_size: embedding vector size, only need to set when using a CustomEmbedding - :param kwargs: kwargs to pass to the method, func: `BaseEmbedding.build` - """ - self.embedding_type = 'base' - self.name = name_or_path - self.embedding_size = embedding_size - self._sequence_length = sequence_length - self.model_path = '' - self._token2idx: Dict[str, int] = None - self._idx2token: Dict[int, str] = None - self._model: Model = None - self._kwargs = kwargs - self.build(**kwargs) - - def update(self, info: Dict[str, Any]): - self.name = info['name'] - self.embedding_type = info['embedding_type'] - self.embedding_size = info['embedding_size'] - self._sequence_length = info['sequence_length'] - self.model_path = info['model_path'] - self._kwargs = info['kwargs'] - - def info(self): - return { - 'embedding_type': self.embedding_type, - 'name': self.name, - 'embedding_size': self.embedding_size, - 'sequence_length': self._sequence_length, - 'model_path': self.model_path, - 'kwargs': self._kwargs - } - - @property - def token_count(self): - return len(self._token2idx) - - @property - def sequence_length(self): - return self._sequence_length - - @sequence_length.setter - def sequence_length(self, val): - self._sequence_length = val - self.build(**self._kwargs) - - @property - def model(self) -> Model: - return self._model - - @property - def token2idx(self): - return self._token2idx - - @property - def is_bert(self): - return self.embedding_type == 'bert' - - @token2idx.setter - def token2idx(self, value): - self._token2idx = value - self._idx2token = dict([(value, key) - for (key, value) in value.items()]) - - @property - def idx2token(self): - return self._idx2token - - def build(self, **kwargs): - raise NotImplementedError() - - def build_token2idx_dict(self, x_data: List[TextSeqType], min_count: int = 5): - raise NotImplementedError() - - def tokenize(self, - sentence: TextSeqInputType, - add_bos_eos: bool = True) -> TokenSeqInputType: - is_list = isinstance(sentence[0], list) - - def tokenize_sentence(text: TextSeqType) -> TokenSeqType: - tokens = [self.token2idx.get( - token, self.token2idx[k.UNK]) for token in text] - if add_bos_eos: - tokens = [self.token2idx[k.BOS]] + \ - tokens + [self.token2idx[k.EOS]] - return tokens - - if is_list: - return [tokenize_sentence(sen) for sen in sentence] - else: - return tokenize_sentence(sentence) - - def embed(self, sentence: TextSeqInputType, seq_idx: int = 0) -> np.array: - is_list = isinstance(sentence[0], list) - tokens = self.tokenize(sentence) - - if not is_list: - tokens = [tokens] - if isinstance(self.sequence_length, int): - embed_input = sequence.pad_sequences( - tokens, self.sequence_length, padding='post') - elif isinstance(self.sequence_length, list): - embed_input = sequence.pad_sequences( - tokens, self.sequence_length[seq_idx], padding='post') - - embed_input = self.prepare_model_input(embed_input) - print(embed_input) - embed_pred = self.model.predict(embed_input) - if is_list: - return embed_pred - else: - return embed_pred[0] - - def prepare_model_input(self, input_x: np.array, **kwargs) -> np.array: - return input_x - - -class WordEmbeddings(BaseEmbedding): - base_dict = { - k.PAD: 0, - k.BOS: 1, - k.EOS: 2, - k.UNK: 3 - } - - URL_MAP = { - 'sgns.renmin.bigram': 'embedding/word2vec/sgns.renmin.bigram.bz2', - 'sgns.renmin.bigram-char': 'embedding/word2vec/sgns.renmin.bigram-char.bz2', - 'sgns.weibo.bigram': 'embedding/word2vec/sgns.weibo.bigram.bz2', - 'sgns.weibo.bigram-char': 'embedding/word2vec/sgns.weibo.bigram-char.bz2', - } - - def get_embedding_matrix(self) -> np.array: - base_matrix = [] - - file = os.path.join( - k.DATA_PATH, 'w2v_embedding_{}.json'.format(self.embedding_size)) - if os.path.exists(file): - base_matrix = json.load(open(file, 'r', encoding='utf-8')) - base_matrix = [np.array(matrix) for matrix in base_matrix] - else: - for index, key in enumerate(k.MARKED_KEYS): - if index != 0: - vector = np.random.uniform(-0.5, 0.5, self.embedding_size) - else: - vector = np.zeros(self.embedding_size) - base_matrix.append(vector) - with open(file, 'w', encoding='utf-8') as f: - f.write(json.dumps([list(item) for item in base_matrix])) - - matrix_list = base_matrix + list(self.keyed_vector.vectors) - return np.array(matrix_list) - - def build(self, **kwargs): - self.embedding_type = 'word2vec' - if self.name in WordEmbeddings.URL_MAP: - url = self.URL_MAP.get(self.name) - self.name = self.name + '.bz2' - else: - url = None - - self.model_path = helper.cached_path(self.name, - url, - sub_folders=['embedding', 'word2vec']) - - self.keyed_vector: KeyedVectors = KeyedVectors.load_word2vec_format( - self.model_path, **kwargs) - self.embedding_size = self.keyed_vector.vector_size - - word2idx = self.base_dict.copy() - for word in self.keyed_vector.index2entity: - word2idx[word] = len(word2idx) - self.token2idx = word2idx - - input_layer = Input(shape=(self.sequence_length,), dtype='int32') - embedding_matrix = self.get_embedding_matrix() - - current = Embedding(self.token_count, - self.embedding_size, - input_length=self.sequence_length, - weights=[embedding_matrix], - trainable=False)(input_layer) - self._model = Model(input_layer, current) - logging.debug('------------------------------------------------') - logging.debug('Loaded gensim word2vec model') - logging.debug('model : {}'.format(self.model_path)) - logging.debug('word count : {}'.format( - len(self.keyed_vector.index2entity))) - logging.debug('Top 50 word : {}'.format( - self.keyed_vector.index2entity[:50])) - logging.debug('------------------------------------------------') - - def build_token2idx_dict(self, x_data: List[TextSeqType], min_count: int = 5): - logging.debug( - "word2vec embedding no need to build token2idx with corpus") - - -class BERTEmbedding(BaseEmbedding): - base_dict = {} - special_tokens = { - k.PAD: '[PAD]', - k.UNK: '[UNK]', - k.BOS: '[CLS]', - k.EOS: '[SEP]', - } - - model_key_map = { - 'bert-base-uncased': 'uncased_L-12_H-768_A-12', - 'bert-large-uncased': 'uncased_L-24_H-1024_A-16', - 'bert-base-cased': 'cased_L-12_H-768_A-12', - 'bert-large-cased': 'cased_L-24_H-1024_A-16', - 'bert-base-multilingual-cased': 'multi_cased_L-12_H-768_A-12', - 'bert-base-chinese': 'chinese_L-12_H-768_A-12' - } - - pre_trained_models = { - # BERT-Base, Uncased: 12-layer, 768-hidden, 12-heads, 110M parameters - 'uncased_L-12_H-768_A-12': 'https://storage.googleapis.com/bert_models/2018_10_18/' - 'uncased_L-12_H-768_A-12.zip', - # BERT-Large, Uncased - # 24-layer, 1024-hidden, 16-heads, 340M parameters - 'uncased_L-24_H-1024_A-16': 'https://storage.googleapis.com/bert_models/2018_10_18/' - 'uncased_L-24_H-1024_A-16.zip', - # BERT-Base, Cased - # 12-layer, 768-hidden, 12-heads , 110M parameters - 'cased_L-12_H-768_A-12': 'https://storage.googleapis.com/bert_models/2018_10_18/' - 'cased_L-12_H-768_A-12.zip', - # BERT-Large, Cased - # 24-layer, 1024-hidden, 16-heads, 340M parameters - 'cased_L-24_H-1024_A-16': 'https://storage.googleapis.com/bert_models/2018_10_18/' - 'cased_L-24_H-1024_A-16.zip', - # BERT-Base, Multilingual Cased (New, recommended) - # 104 languages, 12-layer, 768-hidden, 12-heads, 110M parameters - 'multi_cased_L-12_H-768_A-12': 'https://storage.googleapis.com/bert_models/2018_11_23/' - 'multi_cased_L-12_H-768_A-12.zip', - # BERT-Base, Multilingual Uncased (Orig, not recommended) - # 12-layer, 768-hidden, 12-heads, 110M parameters - 'multilingual_L-12_H-768_A-12': 'https://storage.googleapis.com/bert_models/2018_11_03/' - 'multilingual_L-12_H-768_A-12.zip', - # BERT-Base, Chinese - # Chinese Simplified and Traditional, 12-layer, 768-hidden, 12-heads, 110M - 'chinese_L-12_H-768_A-12': 'https://storage.googleapis.com/bert_models/2018_11_03/' - 'chinese_L-12_H-768_A-12.zip', - } - - def build(self): - self.embedding_type = 'bert' - url = self.pre_trained_models.get( - self.model_key_map.get(self.name, self.name)) - self.model_path = helper.cached_path(self.model_key_map.get(self.name, self.name), - url, - ['embedding', 'bert']) - - config_path = os.path.join(self.model_path, 'bert_config.json') - check_point_path = os.path.join(self.model_path, 'bert_model.ckpt') - logging.info('loading bert model from {}\n'.format(self.model_path)) - model = keras_bert.load_trained_model_from_checkpoint(config_path, - check_point_path, - seq_len=self.sequence_length) - num_layers = len(model.layers) - features_layers = [model.get_layer(index=num_layers-1+idx*8).output - for idx in range(-3, 1)] - embedding_layer = concatenate(features_layers) - output_layer = NonMaskingLayer()(embedding_layer) - #output_layer = NonMaskingLayer()(model.output) - self._model = Model(model.inputs, output_layer) - - self.embedding_size = self.model.output_shape[-1] - dict_path = os.path.join(self.model_path, 'vocab.txt') - word2idx = {} - with open(dict_path, 'r', encoding='utf-8') as f: - words = f.read().splitlines() - for idx, word in enumerate(words): - word2idx[word] = idx - #word2idx[word] = len(word2idx) - for key, value in self.special_tokens.items(): - word2idx[key] = word2idx[value] - - self.token2idx = word2idx - - def build_token2idx_dict(self, x_data: List[TextSeqType], min_count: int = 5): - logging.debug("bert embedding no need to build token2idx with corpus") - - def prepare_model_input(self, input_x: np.array, **kwargs) -> np.array: - input_seg = np.zeros(input_x.shape) - return [input_x, input_seg] - - -class CustomEmbedding(BaseEmbedding): - def __init__(self, - name_or_path: str = 'custom-embedding', - sequence_length: int = None, - embedding_size: int = None, - **kwargs): - """ - :param name_or_path: just a name for custom embedding - :param sequence_length: length of max sequence, all embedding is shaped as (sequence_length, embedding_size) - :param embedding_size: embedding vector size, only need to set when using a CustomEmbedding - :param kwargs: kwargs to pass to the method, func: `BaseEmbedding.build` - """ - if sequence_length is None or embedding_size is None: - raise ValueError( - 'Must set sequence_length and sequence_length when using the CustomEmbedding layer') - super(CustomEmbedding, self).__init__( - name_or_path, sequence_length, embedding_size, **kwargs) - - def build(self, **kwargs): - if self._token2idx is None: - logging.debug('need to build after build_word2idx') - else: - input_x = Input(shape=(self.sequence_length,), dtype='int32') - current = Embedding(self.token_count, - self.embedding_size)(input_x) - self._model = Model(input_x, current) - - def build_token2idx_dict(self, x_data: List[TextSeqType], min_count: int = 5): - if self.token2idx is None: - #word_set: Dict[str, int] = {} - # for x_item in x_data: - # for word in x_item: - # word_set[word] = word_set.get(word, 0) + 1 - data_depth = helper.depth_count(x_data) - if data_depth > 1: - x_items = x_data - for _ in range(data_depth-1): - x_items = list(chain(*x_items)) - word_freq = Counter(x_items) - # word_set = {word: freq for word, freq in word_freq.items() if freq >= min_count} - # word2idx_list = sorted(word_set.items(), key=lambda kv: -kv[1]) - word2idx_list = sorted(word_freq.items(), key=lambda kv: -kv[1]) - - word2idx = self.base_dict.copy() - offset = len(word2idx) - # for word, count in word2idx_list: - # if count >= min_count: - # word2idx[word] = len(word2idx) - for idx, (word, freq) in enumerate(word2idx_list): - if freq >= min_count: - word2idx[word] = idx + offset - - self.token2idx = word2idx - self.build() - - -class TwoHeadEmbedding(CustomEmbedding): - def __init__(self, - name_or_path: str = 'twohead-embedding', - sequence_length: List[int] = None, - embedding_size: int = None, - **kwargs): - """ - Inheritated from CustomEmbedding class. - :param name_or_path: just a name for two head embedding - :param sequence_length: max length list of sequences, all embedding is shaped as (sequence_length[idx], embedding_size) - :param embedding_size: embedding vector size, only need to set when using a CustomEmbedding or its subclass - :param kwargs: kwargs to pass to the method, func: `BaseEmbedding.build` - """ - if sequence_length is None or embedding_size is None: - raise ValueError( - 'Must set all sequence_length and embedding_size when using the TwoheadEmbedding layer') - super(TwoHeadEmbedding, self).__init__( - name_or_path, sequence_length, embedding_size, **kwargs) - - def build(self, **kwargs): - self.embedding_type = 'twohead' - if self._token2idx is None: - logging.debug('need to build after build_word2idx') - else: - input_x1 = Input( - shape=(self.sequence_length[0],), dtype='int32', name='master_input') - current1 = Embedding(self.token_count, - self.embedding_size)(input_x1) - input_x2 = Input( - shape=(self.sequence_length[1],), dtype='int32', name='assist_input') - current2 = Embedding(self.token_count, - self.embedding_size)(input_x2) - current = concatenate([current1, current2], axis=1) - self._model = Model(inputs=[input_x1, input_x2], outputs=current) - - def build_token2idx_dict(self, x_data: List[TextSeqType], min_count: int = 5): - super(TwoHeadEmbedding, self).build_token2idx_dict(x_data, min_count) - - def embed(self, sentences_pair: List[List[TextSeqInputType]]) -> np.array: - embed_inputs = [] - for idx, sentences in enumerate(sentences_pair): - is_list = isinstance(sentences[0], list) - tokens = self.tokenize(sentences) - if not is_list: - tokens = [tokens] - if isinstance(self.sequence_length, list): - embed_input = sequence.pad_sequences( - tokens, self.sequence_length[idx], padding='post') - elif isinstance(self.sequence_length, int): - embed_input = sequence.pad_sequences( - tokens, self.sequence_length, padding='post') - embed_inputs.append(embed_input) - embed_inputs = self.prepare_model_input(embed_inputs) - print(embed_inputs) - embed_pred = self.model.predict(embed_inputs) - return embed_pred - - -class GPT2Embedding(BaseEmbedding): - - def build(self, **kwargs): - self.embedding_type = 'gpt2' - - config_path = os.path.join(self.name, 'hparams.json') - checkpoint_path = os.path.join(self.name, 'model.ckpt') - encoder_path = os.path.join(self.name, 'encoder.json') - vocab_path = os.path.join(self.name, 'vocab.bpe') - - self._model: Model = load_trained_model_from_checkpoint( - config_path, checkpoint_path) - for layer in self._model.layers: - layer.trainable = False - - self.bpe: BytePairEncoding = get_bpe_from_files( - encoder_path, vocab_path) - - word2idx = self.bpe.token_dict.copy() - word2idx[k.PAD] = word2idx['pad'] - word2idx[k.UNK] = word2idx['unk'] - word2idx[k.BOS] = word2idx['pad'] - word2idx[k.EOS] = word2idx['pad'] - self.token2idx = word2idx - - def build_token2idx_dict(self, x_data: List[TextSeqType], min_count: int = 5): - logging.debug( - "word2vec embedding no need to build token2idx with corpus") - - -if __name__ == '__main__': - train_x = [ - list('语言学(英语:linguistics)是一门关于人类语言的科学研究'), - list('语言学(英语:linguistics)是一门关于人类语言的科学研究'), - list('语言学(英语:linguistics)是一门关于人类语言的科学研究'), - list('语言学包含了几种分支领域。'), - list('在语言结构(语法)研究与意义(语义与语用)研究之间存在一个重要的主题划分'), - ] - train_y = ['a', 'a', 'a', 'b', 'c'] - - from kashgari.utils.logger import init_logger - from kashgari.tasks.classification import CNNModel - init_logger() - embedding = GPT2Embedding( - '/Users/brikerman/Desktop/python/gpt-2/models/117M', 10) - r = embedding.embed(['hello', 'world']) - model = CNNModel(embedding) - model.fit(train_x, train_y, epochs=20) - print(r.shape) -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: __init__.py.py -@time: 2019-01-19 09:57 - -""" -from . import classification -from . import seq_labeling - -if __name__ == "__main__": - print("Hello world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: __init__.py.py -@time: 2019-01-19 09:57 - -""" - - -if __name__ == "__main__": - print("Hello world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: helper.py -@time: 2019-01-19 16:25 - -""" -import logging -import os -import random -from typing import List, Optional - -import download -import h5py -import numpy as np -from keras import backend as K -from keras.layers import Layer -from keras.preprocessing import sequence -from keras.utils import to_categorical - -from kashgari.macros import DATA_PATH -from kashgari.macros import STORAGE_HOST - - -# def h5f_generator(h5path: str, -# # indices: List[int], -# num_classes: int, -# batch_size: int = 128): -# """ -# fit generator for h5 file -# :param h5path: target f5file -# :param num_classes: label counts to covert y label to one hot array -# :param batch_size: -# :return: -# """ -# -# db = h5py.File(h5path, "r") -# while True: -# page_list = list(range(len(db['x']) // batch_size + 1)) -# random.shuffle(page_list) -# for page in page_list: -# x = db["x"][page: (page + 1) * batch_size] -# y = to_categorical(db["y"][page: (page + 1) * batch_size], -# num_classes=num_classes, -# dtype=np.int) -# yield (x, y) - - -def unison_shuffled_copies(a, b): - assert len(a) == len(b) - c = list(zip(a, b)) - random.shuffle(c) - a, b = zip(*c) - return list(a), list(b) - - -def weighted_categorical_crossentropy(weights): - """ - A weighted version of keras.objectives.categorical_crossentropy - - Variables: - weights: numpy array of shape (C,) where C is the number of classes - - Usage: - weights = np.array([0.5,2,10]) # Class one at 0.5, class 2 twice the normal weights, class 3 10x. - loss = weighted_categorical_crossentropy(weights) - model.compile(loss=loss,optimizer='adam') - """ - - weights = K.variable(weights) - - def loss(y_true, y_pred): - # scale predictions so that the class probas of each sample sum to 1 - y_pred /= K.sum(y_pred, axis=-1, keepdims=True) - # clip to prevent NaN's and Inf's - y_pred = K.clip(y_pred, K.epsilon(), 1 - K.epsilon()) - # calc - loss = y_true * K.log(y_pred) * weights - loss = -K.sum(loss, -1) - return loss - - return loss - - -def cached_path(file_path: str, download_url: Optional[str], sub_folders: List[str] = []): - if os.path.exists(file_path): - return file_path - - file_name_list = [DATA_PATH] + sub_folders + [file_path] - file_path = os.path.join(*file_name_list) - if os.path.exists(file_path): - return file_path - - if download_url.startswith('http'): - url = download_url - else: - url = STORAGE_HOST + download_url - - final_path = file_path - if url.endswith('zip'): - kind = 'zip' - download_path = os.path.dirname(file_path) - elif url.endswith('tar.gz'): - kind = 'tar.gz' - download_path = os.path.dirname(file_path) - else: - kind = 'file' - download_path = file_path - # url = url.replace('https://', 'http://') - logging.info('start downloading file, if it takes too long, you could download with other downloader\n' - 'url : {}\n' - 'path : {}'.format(url, file_path)) - e_path = download.download(url, download_path, kind=kind, replace=True) - logging.info('downloader file_path {}, {} '.format(e_path, file_path)) - return final_path - # if file_path.endswith('.bz2'): - # archive_path = e_path - # outfile_path = e_path[:-4] - # with open(archive_path, 'rb') as source, open(outfile_path, 'wb') as dest: - # dest.write(bz2.decompress(source.read())) - # return outfile_path - # else: - # return final_path - - -# def check_should_download(file: str, -# download_url: Optional[str], -# sub_folders: List[str] = None): -# """ -# check should download the file, if exist return file url, if not download and unzip -# :param file: -# :param sub_folders: -# :param download_url: -# :return: -# """ -# logging.debug('check_should_download: file {}\ndownload_url {}\nsub_folders {}'.format(file, -# download_url, -# sub_folders)) -# if sub_folders is None: -# sub_folders = [] -# -# if os.path.exists(file): -# return file -# -# folders = [DATA_PATH] + sub_folders + [file] -# target_path = os.path.join(*folders) -# original_file_path = target_path -# -# if os.path.exists(target_path): -# return target_path -# -# if not download_url: -# raise ValueError("need to provide valid model name or path") -# -# if download_url.startswith('http'): -# url = download_url -# else: -# url = STORAGE_HOST + download_url -# -# if url.endswith('zip'): -# kind = 'zip' -# elif url.endswith('tar.gz'): -# kind = 'tar.gz' -# else: -# kind = 'file' -# target_path = os.path.join(target_path, url.split('/')[-1]) -# -# logging.info('start downloading file, if it takes too long, you could download with other downloader\n' -# 'url : {}\n' -# 'path : {}'.format(url, -# os.path.dirname(target_path))) -# -# file_path = download.download(url, target_path, kind=kind, replace=True) -# logging.debug('file downloaded to {}'.format(file_path)) -# if file_path.endswith('.bz2'): -# archive_path = file_path -# outfile_path = file_path[:-4] -# with open(archive_path, 'rb') as source, open(outfile_path, 'wb') as dest: -# dest.write(bz2.decompress(source.read())) -# return original_file_path -# else: -# return target_path - - -def depth_count(lst: List[List]) -> int: - return 1 + max(map(depth_count, lst)) if lst and isinstance(lst, list) else 0 - - -if __name__ == "__main__": - from kashgari.utils.logger import init_logger - init_logger() - file = 'embedding/word2vec/sgns.weibo.bigram-char' - url = 'embedding/word2vec/sgns.weibo.bigram-char.bz2' - print(cached_path(file, url)) -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: logger.py -@time: 2019-01-19 09:58 - -""" -import os -import sys -import logging -from colorlog import ColoredFormatter - - -def init_logger(): - level = os.getenv('LOG_LEVEL', 'DEBUG') - change_log_level(level) - - -def change_log_level(log_level): - print('----------------------') - # color_format = "%(log_color)s[%(asctime)s] %(levelname)-7s " \ - # "%(name)s:%(filename)s:%(lineno)d - %(message)s" - color_format = "%(log_color)s[%(asctime)s] %(levelname)-5s " \ - "- %(message)s" - - color_formatter = ColoredFormatter(color_format, - datefmt=None, - reset=True, - log_colors={ - 'DEBUG': 'white', - 'INFO': 'green', - 'WARNING': 'purple', - 'ERROR': 'red', - 'CRITICAL': 'red,bg_white', - }, - secondary_log_colors={}, - style='%' - ) - - print_handler = logging.StreamHandler(sys.stdout) - print_handler.setFormatter(color_formatter) - print_handler.setLevel(log_level) - - logging.basicConfig(level=logging.DEBUG, - handlers=[ - # handler, - print_handler - ]) - - logging.info('logging init finished') - - -if __name__ == "__main__": - init_logger() - - logging.info('info') - logging.error('error') - logging.warning('warning') -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: __init__.py -@time: 2019-01-27 14:52 - -""" -from .base_model import BaseModel - - -if __name__ == "__main__": - print("Hello world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: base_model.py -@time: 2019-01-27 14:53 - -""" -from kashgari.layers import AttentionWeightedAverage, KMaxPooling, NonMaskingLayer -from keras_bert.bert import get_custom_objects as get_bert_custom_objects -from kashgari.utils.crf import CRF, crf_loss, crf_accuracy -from kashgari.embeddings import CustomEmbedding, BaseEmbedding -from kashgari.utils import helper -from keras.utils import multi_gpu_model -from keras import backend as K -from keras.models import Model -import keras -from typing import Dict -import numpy as np -import os -import json -import pickle -import pathlib -import traceback -import logging -logger = logging.getLogger(__name__) - - -class BaseModel(object): - __base_hyper_parameters__ = {} - __architect_name__ = '' - - @property - def hyper_parameters(self): - return self._hyper_parameters_ - - def __init__(self, embedding: BaseEmbedding = None, hyper_parameters: Dict = None, **kwargs): - if embedding is None: - self.embedding = CustomEmbedding( - 'custom', sequence_length=0, embedding_size=100) - else: - self.embedding = embedding - self.model: Model = None - self._hyper_parameters_ = self.__base_hyper_parameters__.copy() - self._label2idx = {} - self._idx2label = {} - self.model_info = {} - - self.task = 'classification' - - if hyper_parameters: - self._hyper_parameters_.update(hyper_parameters) - - def info(self): - return { - 'architect_name': self.__architect_name__, - 'task': self.task, - 'embedding': self.embedding.info(), - 'hyper_parameters': self.hyper_parameters, - 'model_info': self.model_info - } - - def _compile_model(self): - """ - compile model function - :return: - """ - raise NotImplementedError() - - def _prepare_model(self): - """ - prepare model function - :return: - """ - raise NotImplementedError() - - def build_multi_gpu_model(self, gpus: int): - """ - build multi-gpu model function - :return: - """ - if not self.model: - raise RuntimeError("Model not built yet, Please call build_model function with" - "your corpus to build model") - - # If gpus < 2, this will fall back to normal build_model() on CPU or GPU - if gpus >= 2: - self.model = multi_gpu_model(self.model, gpus=gpus) - self._compile_model() - self.model.summary() - - def save(self, model_path: str): - pathlib.Path(model_path).mkdir(exist_ok=True, parents=True) - - model_info = self.info() - - with open(os.path.join(model_path, 'labels.json'), 'w', encoding='utf-8') as f: - f.write(json.dumps(self._label2idx, indent=2, ensure_ascii=False)) - - with open(os.path.join(model_path, 'words.json'), 'w', encoding='utf-8') as f: - f.write(json.dumps(self.embedding.token2idx, - indent=2, ensure_ascii=False)) - - with open(os.path.join(model_path, 'model.json'), 'w', encoding='utf-8') as f: - f.write(json.dumps(model_info, indent=2, ensure_ascii=False)) - - with open(os.path.join(model_path, 'struct.json'), 'w', encoding='utf-8') as f: - f.write(self.model.to_json()) - - #self.model.save_weights(os.path.join(model_path, 'weights.h5')) - optimizer_weight_values = None - try: - symbolic_weights = getattr(self.model.optimizer, 'weights') - optimizer_weight_values = K.batch_get_value(symbolic_weights) - except Exception as e: - logger.warn('error occur: {}'.format(e)) - traceback.print_tb(e.__traceback__) - logger.warn('No optimizer weights found.') - if optimizer_weight_values is not None: - with open(os.path.join(model_path, 'optimizer.pkl'), 'wb') as f: - pickle.dump(optimizer_weight_values, f) - - self.model.save(os.path.join(model_path, 'model.model')) - logger.info('model saved to {}'.format(os.path.abspath(model_path))) - - @staticmethod - def create_custom_objects(model_info): - custom_objects = {} - loss = model_info.get('model_info', {}).get('loss') - if loss and loss['name'] == 'weighted_categorical_crossentropy': - loss_f = helper.weighted_categorical_crossentropy( - np.array(loss['weights'])) - custom_objects['loss'] = loss_f - - architect_name = model_info.get('architect_name') - if architect_name and 'CRF' in architect_name: - custom_objects['CRF'] = CRF - custom_objects['crf_loss'] = crf_loss - custom_objects['crf_accuracy'] = crf_accuracy - - embedding = model_info.get('embedding') - - if embedding and embedding['embedding_type'] == 'bert': - custom_objects['NonMaskingLayer'] = NonMaskingLayer - custom_objects.update(get_bert_custom_objects()) - custom_objects['AttentionWeightedAverage'] = AttentionWeightedAverage - custom_objects['KMaxPooling'] = KMaxPooling - return custom_objects - - @classmethod - def load_model(cls, model_path: str): - with open(os.path.join(model_path, 'labels.json'), 'r', encoding='utf-8') as f: - label2idx = json.load(f) - - with open(os.path.join(model_path, 'words.json'), 'r', encoding='utf-8') as f: - token2idx = json.load(f) - - with open(os.path.join(model_path, 'model.json'), 'r', encoding='utf-8') as f: - model_info = json.load(f) - agent = cls() - custom_objects = cls.create_custom_objects(model_info) - agent.model_info = model_info['model_info'] - if custom_objects: - logger.debug('prepared custom objects: {}'.format(custom_objects)) - - try: - agent.model = keras.models.load_model(os.path.join(model_path, 'model.model'), - custom_objects=custom_objects) - except Exception as e: - logger.warning( - 'Error `{}` occured trying directly model loading. Try to rebuild.'.format(e)) - logger.debug('Load model structure from json.') - with open(os.path.join(model_path, 'struct.json'), 'r', encoding='utf-8') as f: - model_struct = f.read() - agent.model = keras.models.model_from_json(model_struct, - custom_objects=custom_objects) - logger.debug('Build optimizer with model info.') - optimizer_conf = model_info['hyper_parameters'].get( - 'optimizer', None) - optimizer = 'adam' # default - if optimizer_conf is not None and isinstance(optimizer_conf, dict): - module_str = optimizer_conf.get('module', 'None') - name_str = optimizer_conf.get('name', 'None') - params = optimizer_conf.get('params', None) - invalid_set = [None, 'None', '', {}] - if not any([module_str.strip() in invalid_set, - name_str.strip() in invalid_set, - params in invalid_set]): - try: - optimizer = getattr( - eval(module_str), name_str)(**params) - except: - logger.warn( - 'Invalid optimizer configuration in model info. Use `adam` as default.') - else: - logger.warn( - 'No optimizer configuration found in model info. Use `adam` as default.') - - default_compile_params = { - 'loss': 'categorical_crossentropy', 'metrics': ['accuracy']} - compile_params = model_info['hyper_parameters'].get( - 'compile_params', default_compile_params) - logger.debug('Compile model from scratch.') - try: - agent.model.compile(optimizer=optimizer, **compile_params) - except: - logger.warn( - 'Failed to compile model. Compile params seems incorrect.') - logger.warn('Use default options `{}` to compile.'.format( - default_compile_params)) - agent.model.compile(optimizer=optimizer, ** - default_compile_params) - logger.debug('Load model weights.') - agent.model.summary() - agent.model.load_weights(os.path.join(model_path, 'model.model')) - agent.model._make_train_function() - optimizer_weight_values = None - logger.debug('Load optimizer weights.') - try: - with open(os.path.join(model_path, 'optimizer.pkl'), 'rb') as f: - optimizer_weight_values = pickle.load(f) - except Exception as e: - logger.warn( - 'Try to load optimizer weights but no optimizer weights file found.') - if optimizer_weight_values is not None: - agent.model.optimizer.set_weights(optimizer_weight_values) - else: - logger.warn( - 'Rebuild model but optimizer weights missed. Retrain needed.') - logger.info('Model rebuild finished.') - agent.embedding.update(model_info.get('embedding', {})) - agent.model.summary() - agent.label2idx = label2idx - agent.embedding.token2idx = token2idx - logger.info('loaded model from {}'.format(os.path.abspath(model_path))) - return agent - - -if __name__ == "__main__": - print("Hello world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: __init__.py.py -@time: 2019-01-19 11:49 - -""" -from .base_model import ClassificationModel -from kashgari.tasks.classification.models import BLSTMModel, CNNLSTMModel, CNNModel -from kashgari.tasks.classification.models import AVCNNModel, KMaxCNNModel, RCNNModel, AVRNNModel -from kashgari.tasks.classification.models import DropoutBGRUModel, DropoutAVRNNModel -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: base_model.py -@time: 2019-01-19 11:50 - -""" -import logging -import random -from itertools import chain -from typing import Tuple, Dict - -import numpy as np -from keras.preprocessing import sequence -from keras.utils import to_categorical, multi_gpu_model -from sklearn import metrics -from sklearn.utils import class_weight as class_weight_calculte -from sklearn.preprocessing import MultiLabelBinarizer - -import kashgari.macros as k -from kashgari.tasks.base import BaseModel -from kashgari.embeddings import BaseEmbedding -from kashgari.type_hints import * -from kashgari.utils.helper import depth_count - - -class ClassificationModel(BaseModel): - - def __init__(self, - embedding: BaseEmbedding = None, - hyper_parameters: Dict = None, - multi_label: bool = False, - **kwargs): - """ - - :param embedding: - :param hyper_parameters: - :param multi_label: - :param kwargs: - """ - super(ClassificationModel, self).__init__( - embedding, hyper_parameters, **kwargs) - self.multi_label = multi_label - self.multi_label_binarizer: MultiLabelBinarizer = None - - if self.multi_label: - if not hyper_parameters or \ - hyper_parameters.get('compile_params', {}).get('loss') is None: - self.hyper_parameters['compile_params']['loss'] = 'binary_crossentropy' - else: - logging.warning( - 'recommend to use binary_crossentropy loss for multi_label task') - - if not hyper_parameters or \ - hyper_parameters.get('compile_params', {}).get('metrics') is None: - self.hyper_parameters['compile_params']['metrics'] = [ - 'categorical_accuracy'] - else: - logging.warning( - 'recommend to use categorical_accuracy metrivs for multi_label task') - - if not hyper_parameters or \ - hyper_parameters.get('activation_layer', {}).get('sigmoid') is None: - self.hyper_parameters['activation_layer']['activation'] = 'sigmoid' - else: - logging.warning( - 'recommend to use sigmoid activation for multi_label task') - - def info(self): - info = super(ClassificationModel, self).info() - info['model_info']['multi_label'] = self.multi_label - return info - - @property - def label2idx(self) -> Dict[str, int]: - return self._label2idx - - @property - def token2idx(self) -> Dict[str, int]: - return self.embedding.token2idx - - @label2idx.setter - def label2idx(self, value): - self._label2idx = value - self._idx2label = dict([(val, key) for (key, val) in value.items()]) - - def build_model(self, - x_train: List[List[str]], - y_train: Union[List[str], List[List[str]], List[Tuple[str]]], - x_validate: List[List[str]] = None, - y_validate: Union[List[str], List[List[str]], List[Tuple[str]]] = None): - """ - build model function - :return: - """ - assert len(x_train) == len(y_train) - self.build_token2id_label2id_dict( - x_train, y_train, x_validate, y_validate) - - if not self.model: - if self.embedding.sequence_length == 0: - self.embedding.sequence_length = sorted([len(x) for x in x_train])[ - int(0.95 * len(x_train))] - logging.info('sequence length set to {}'.format( - self.embedding.sequence_length)) - self._prepare_model() - self._compile_model() - self.model.summary() - - @classmethod - def load_model(cls, model_path: str): - agent: ClassificationModel = super( - ClassificationModel, cls).load_model(model_path) - agent.multi_label = agent.model_info.get('multi_label', False) - if agent.multi_label: - keys = list(agent.label2idx.keys()) - agent.multi_label_binarizer = MultiLabelBinarizer(classes=keys) - agent.multi_label_binarizer.fit(keys[0]) - return agent - - def build_token2id_label2id_dict(self, - x_train: List[List[str]], - y_train: List[str], - x_validate: List[List[str]] = None, - y_validate: List[str] = None): - if x_validate: - x_data = [*x_train, *x_validate] - y_data = [*y_train, *y_validate] - else: - x_data = x_train - y_data = y_train - x_data_level = depth_count(x_data) - if x_data_level > 2: - for _ in range(x_data_level-2): - x_data = list(chain(*x_data)) - - self.embedding.build_token2idx_dict(x_data, 3) - - if self.multi_label: - label_set = set() - for i in y_data: - label_set = label_set.union(list(i)) - else: - label_set = set(y_data) - - if not len(self.label2idx): - label2idx = { - k.PAD: 0, - } - for idx, label in enumerate(label_set): - label2idx[label] = idx + 1 - self._label2idx = label2idx - self._idx2label = dict([(val, key) - for (key, val) in label2idx.items()]) - self.multi_label_binarizer = MultiLabelBinarizer( - classes=list(self.label2idx.keys())) - - def convert_label_to_idx(self, label: Union[List[str], str]) -> Union[List[int], int]: - if isinstance(label, str): - return self.label2idx[label] - else: - return [self.label2idx[l] for l in label] - - def convert_idx_to_label(self, token: Union[List[int], int]) -> Union[List[str], str]: - if isinstance(token, int): - return self._idx2label[token] - else: - return [self._idx2label[l] for l in token] - - def get_data_generator(self, - x_data: Union[List[List[str]], List[List[List[str]]]], - y_data: List[str], - batch_size: int = 64, - is_bert: bool = False): - x_data_level = depth_count(x_data) - if x_data_level == 2: - x_data = [x_data] - data_len = len(y_data) - for x in x_data: - assert len(x) == data_len - while True: - page_list = list(range((data_len // batch_size) + 1)) - random.shuffle(page_list) - for page in page_list: - start_index = page * batch_size - end_index = start_index + batch_size - target_x = [] - for x in x_data: - target_x.append(x[start_index: end_index]) - target_y = y_data[start_index: end_index] - if len(target_x[0]) == 0: - for x in x_data: - target_x.append(x[0: batch_size]) - target_y = y_data[0: batch_size] - - padded_x = [] - for i, x in enumerate(target_x): - tokenized_x = self.embedding.tokenize(x) - - if isinstance(self.embedding.sequence_length, int): - padded_x.append(sequence.pad_sequences(tokenized_x, - maxlen=self.embedding.sequence_length, - padding='post') - ) - elif isinstance(self.embedding.sequence_length, list): - padded_x.append(sequence.pad_sequences(tokenized_x, - maxlen=self.embedding.sequence_length[i], - padding='post') - ) - - if self.multi_label: - padded_y = self.multi_label_binarizer.fit_transform( - target_y) - else: - tokenized_y = self.convert_label_to_idx(target_y) - padded_y = to_categorical(tokenized_y, - num_classes=len(self.label2idx), - dtype=np.int) - if is_bert: - if isinstance(self.embedding.sequence_length, int): - padded_x_seg = [np.zeros(shape=(len(padded_x_i), - self.embedding.sequence_length)) - for padded_x_i in padded_x] - elif isinstance(self.embedding.sequence_length, list): - padded_x_seg = [np.zeros(shape=(len(padded_x_i), - self.embedding.sequence_length[i])) - for i, padded_x_i in enumerate(padded_x)] - x_input_data = list(chain(*[(x, x_seg) - for x, x_seg in zip(padded_x, padded_x_seg)])) - else: - x_input_data = padded_x[0] if x_data_level == 2 else padded_x - yield (x_input_data, padded_y) - - def fit(self, - x_train: Union[List[List[str]], List[List[List[str]]]], - y_train: Union[List[str], List[List[str]], List[Tuple[str]]], - x_validate: Union[List[List[str]], List[List[List[str]]]] = None, - y_validate: Union[List[str], List[List[str]], List[Tuple[str]]] = None, - batch_size: int = 64, - epochs: int = 5, - class_weight: bool = False, - fit_kwargs: Dict = None, - **kwargs): - """ - - :param x_train: list of training data. - :param y_train: list of training target label data. - :param x_validate: list of validation data. - :param y_validate: list of validation target label data. - :param batch_size: batch size for trainer model - :param epochs: Number of epochs to train the model. - :param class_weight: set class weights for imbalanced classes - :param fit_kwargs: additional kwargs to be passed to - :func:`~keras.models.Model.fit` - :param kwargs: - :return: - """ - x_train_level = depth_count(x_train) - if x_train_level == 2: - assert len(x_train) == len(y_train) - elif x_train_level > 2: - for x_part in x_train: - assert len(x_part) == len(y_train) - else: - raise Exception('x_train type error') - - if len(y_train) < batch_size: - batch_size = len(y_train) // 2 - - if not self.model: - if isinstance(self.embedding.sequence_length, int): - if self.embedding.sequence_length == 0: - self.embedding.sequence_length = sorted([len(x) for x in x_train])[ - int(0.95 * len(x_train))] - logging.info('sequence length set to {}'.format( - self.embedding.sequence_length)) - elif isinstance(self.embedding.sequence_length, list): - seq_len = [] - for i, x_part in enumerate(x_train): - if self.embedding.sequence_length[i] == 0: - seq_len.append(max(sorted([len(x) for x in x_part])[ - int(0.95 * len(x_part))], 1)) - logging.info( - f'sequence_{i} length set to {self.embedding.sequence_length[i]}') - else: - seq_len.append(self.embedding.sequence_length[i]) - self.embedding.sequence_length = seq_len - self.build_model(x_train, y_train, x_validate, y_validate) - - train_generator = self.get_data_generator(x_train, - y_train, - batch_size, - is_bert=self.embedding.is_bert) - - if fit_kwargs is None: - fit_kwargs = {} - - if x_validate: - validation_generator = self.get_data_generator(x_validate, - y_validate, - batch_size, - is_bert=self.embedding.is_bert) - fit_kwargs['validation_data'] = validation_generator - fit_kwargs['validation_steps'] = max( - len(y_validate) // batch_size, 1) - - if class_weight: - if self.multi_label: - y_list = [self.convert_label_to_idx(y) for y in y_train] - y_list = [y for ys in y_list for y in ys] - else: - y_list = self.convert_label_to_idx(y_train) - class_weights = class_weight_calculte.compute_class_weight('balanced', - np.unique( - y_list), - y_list) - else: - class_weights = None - - self.model.fit_generator(train_generator, - steps_per_epoch=len(y_train) // batch_size, - epochs=epochs, - class_weight=class_weights, - **fit_kwargs) - - def _format_output_dic(self, words: List[str], res: np.ndarray): - results = sorted(list(enumerate(res)), key=lambda x: -x[1]) - candidates = [] - for result in results: - candidates.append({ - 'name': self.convert_idx_to_label([result[0]])[0], - 'confidence': float(result[1]), - }) - - data = { - 'words': words, - 'class': candidates[0], - 'class_candidates': candidates - } - return data - - def predict(self, - sentence: Union[List[str], List[List[str]], List[List[List[str]]]], - batch_size=None, - output_dict=False, - multi_label_threshold=0.6, - debug_info=False) -> Union[List[str], str, List[Dict], Dict]: - """ - predict with model - :param sentence: single sentence as List[str] or list of sentence as List[List[str]] - :param batch_size: predict batch_size - :param output_dict: return dict with result with confidence - :param multi_label_threshold: - :param debug_info: print debug info using logging.debug when True - :return: - """ - sentence_level = depth_count(sentence) - if sentence_level == 2: - sentence = [sentence] - elif sentence_level == 1: - sentence = [[sentence]] - padded_tokens = [] - for i, sent_part in enumerate(sentence): - tokens = self.embedding.tokenize(sent_part) - if isinstance(self.embedding.sequence_length, int): - padded_tokens_part = sequence.pad_sequences(tokens, - maxlen=self.embedding.sequence_length, - padding='post') - padded_tokens.append(padded_tokens_part) - if self.embedding.is_bert: - padded_tokens.append(np.zeros(shape=(len(padded_tokens_part), - self.embedding.sequence_length))) - elif isinstance(self.embedding.sequence_length, list): - padded_tokens_part = sequence.pad_sequences(tokens, - maxlen=self.embedding.sequence_length[i], - padding='post') - padded_tokens.append(padded_tokens_part) - if self.embedding.is_bert: - padded_tokens.append(np.zeros(shape=(len(padded_tokens_part), - self.embedding.sequence_length[i]))) - - x = padded_tokens - res = self.model.predict(x, batch_size=batch_size) - - if self.multi_label: - if debug_info: - logging.info('raw output: {}'.format(res)) - res[res >= multi_label_threshold] = 1 - res[res < multi_label_threshold] = 0 - predict_result = res - else: - predict_result = res.argmax(-1) - - if debug_info: - logging.info('input: {}'.format(x)) - logging.info('output: {}'.format(res)) - logging.info('output argmax: {}'.format(predict_result)) - - if output_dict: - words_list: List[List[str]] = sentence[0] - results = [] - for index in range(len(words_list)): - results.append(self._format_output_dic( - words_list[index], res[index])) - if sentence_level >= 2: - return results - elif sentence_level == 1: - return results[0] - else: - if self.multi_label: - results = self.multi_label_binarizer.inverse_transform( - predict_result) - else: - results = self.convert_idx_to_label(predict_result) - if sentence_level >= 2: - return results - elif sentence_level == 1: - return results[0] - - def evaluate(self, x_data, y_data, batch_size=None, digits=4, debug_info=False) -> Tuple[float, float, Dict]: - y_pred = self.predict(x_data, batch_size=batch_size) - report = metrics.classification_report( - y_data, y_pred, output_dict=True, digits=digits) - print(metrics.classification_report(y_data, y_pred, digits=digits)) - if debug_info: - for index in random.sample(list(range(len(x_data))), 5): - logging.debug('------ sample {} ------'.format(index)) - logging.debug('x : {}'.format(x_data[index])) - logging.debug('y : {}'.format(y_data[index])) - logging.debug('y_pred : {}'.format(y_pred[index])) - return report -# encoding: utf-8 -""" -@author: Alex -@contact: ialexwwang@gmail.com - -@version: 0.1 -@license: Apache Licence -@file: deep_models.py -@time: 2019-02-21 17:54 - -@Reference: https://github.com/zake7749/DeepToxic/blob/master/sotoxic/models/keras/model_zoo.py -""" -from __future__ import absolute_import, division - -import keras -from keras.layers import Bidirectional, Conv1D -from keras.layers import Dense, Lambda, Flatten -from keras.layers import Dropout, SpatialDropout1D -from keras.layers import GlobalAveragePooling1D, GlobalMaxPooling1D, MaxPooling1D -from keras.layers import concatenate -from keras.models import Model - -from kashgari.layers import AttentionWeightedAverage, KMaxPooling, LSTMLayer, GRULayer -from kashgari.tasks.classification.base_model import ClassificationModel - - -class CNNModel(ClassificationModel): - __architect_name__ = 'CNNModel' - __base_hyper_parameters__ = { - 'conv1d_layer': { - 'filters': 128, - 'kernel_size': 5, - 'activation': 'relu' - }, - 'max_pool_layer': {}, - 'dense_1_layer': { - 'units': 64, - 'activation': 'relu' - }, - 'activation_layer': { - 'activation': 'softmax' - }, - 'optimizer': { - 'module': 'keras.optimizers', - 'name': 'Adam', - 'params': { - 'lr': 1e-3, - 'decay': 0.0 - } - }, - 'compile_params': { - 'loss': 'categorical_crossentropy', - # 'optimizer': 'adam', - 'metrics': ['accuracy'] - } - } - - def _prepare_model(self): - base_model = self.embedding.model - conv1d_layer = Conv1D( - **self.hyper_parameters['conv1d_layer'])(base_model.output) - max_pool_layer = GlobalMaxPooling1D( - **self.hyper_parameters['max_pool_layer'])(conv1d_layer) - dense_1_layer = Dense( - **self.hyper_parameters['dense_1_layer'])(max_pool_layer) - dense_2_layer = Dense( - len(self.label2idx), **self.hyper_parameters['activation_layer'])(dense_1_layer) - - self.model = Model(base_model.inputs, dense_2_layer) - - def _compile_model(self): - optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), - self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) - self.model.compile(optimizer=optimizer, ** - self.hyper_parameters['compile_params']) - - -class BLSTMModel(ClassificationModel): - __architect_name__ = 'BLSTMModel' - __base_hyper_parameters__ = { - 'lstm_layer': { - 'units': 256, - 'return_sequences': False - }, - 'activation_layer': { - 'activation': 'softmax' - }, - 'optimizer': { - 'module': 'keras.optimizers', - 'name': 'Adam', - 'params': { - 'lr': 1e-3, - 'decay': 0.0 - } - }, - 'compile_params': { - 'loss': 'categorical_crossentropy', - # 'optimizer': 'adam', - 'metrics': ['accuracy'] - } - } - - def _prepare_model(self): - base_model = self.embedding.model - blstm_layer = Bidirectional( - LSTMLayer(**self.hyper_parameters['lstm_layer']))(base_model.output) - dense_layer = Dense( - len(self.label2idx), **self.hyper_parameters['activation_layer'])(blstm_layer) - output_layers = [dense_layer] - - self.model = Model(base_model.inputs, output_layers) - - def _compile_model(self): - optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), - self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) - self.model.compile(optimizer=optimizer, ** - self.hyper_parameters['compile_params']) - - -class CNNLSTMModel(ClassificationModel): - __architect_name__ = 'CNNLSTMModel' - __base_hyper_parameters__ = { - 'conv_layer': { - 'filters': 32, - 'kernel_size': 3, - 'padding': 'same', - 'activation': 'relu' - }, - 'max_pool_layer': { - 'pool_size': 2 - }, - 'lstm_layer': { - 'units': 100 - }, - 'activation_layer': { - 'activation': 'softmax' - }, - 'optimizer': { - 'module': 'keras.optimizers', - 'name': 'Adam', - 'params': { - 'lr': 1e-3, - 'decay': 0.0 - } - }, - 'compile_params': { - 'loss': 'categorical_crossentropy', - # 'optimizer': 'adam', - 'metrics': ['accuracy'] - } - } - - def _prepare_model(self): - base_model = self.embedding.model - conv_layer = Conv1D( - **self.hyper_parameters['conv_layer'])(base_model.output) - max_pool_layer = MaxPooling1D( - **self.hyper_parameters['max_pool_layer'])(conv_layer) - lstm_layer = LSTMLayer( - **self.hyper_parameters['lstm_layer'])(max_pool_layer) - dense_layer = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(lstm_layer) - output_layers = [dense_layer] - - self.model = Model(base_model.inputs, output_layers) - - def _compile_model(self): - optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), - self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) - self.model.compile(optimizer=optimizer, ** - self.hyper_parameters['compile_params']) - - -class AVCNNModel(ClassificationModel): - __architect_name__ = 'AVCNNModel' - __base_hyper_parameters__ = { - 'spatial_dropout': { - 'rate': 0.25 - }, - 'conv_0': { - 'filters': 300, - 'kernel_size': 1, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_1': { - 'filters': 300, - 'kernel_size': 2, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_2': { - 'filters': 300, - 'kernel_size': 3, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_3': { - 'filters': 300, - 'kernel_size': 4, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - # --- - 'attn_0': {}, - 'avg_0': {}, - 'maxpool_0': {}, - # --- - 'maxpool_1': {}, - 'attn_1': {}, - 'avg_1': {}, - # --- - 'maxpool_2': {}, - 'attn_2': {}, - 'avg_2': {}, - # --- - 'maxpool_3': {}, - 'attn_3': {}, - 'avg_3': {}, - # --- - 'v0_col': { - # 'mode': 'concat', - 'axis': 1 - }, - 'v1_col': { - # 'mode': 'concat', - 'axis': 1 - }, - 'v2_col': { - # 'mode': 'concat', - 'axis': 1 - }, - 'merged_tensor': { - # 'mode': 'concat', - 'axis': 1 - }, - 'dropout': { - 'rate': 0.7 - }, - 'dense': { - 'units': 144, - 'activation': 'relu' - }, - 'activation_layer': { - 'activation': 'softmax' - }, - 'optimizer': { - 'module': 'keras.optimizers', - 'name': 'Adam', - 'params': { - 'lr': 1e-3, - 'decay': 1e-7 - } - }, - 'compile_params': { - 'loss': 'categorical_crossentropy', - # 'optimizer': 'adam', - 'metrics': ['accuracy'] - } - } - - def _prepare_model(self): - base_model = self.embedding.model - embedded_seq = SpatialDropout1D( - **self.hyper_parameters['spatial_dropout'])(base_model.output) - conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(embedded_seq) - conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(embedded_seq) - conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(embedded_seq) - conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(embedded_seq) - - maxpool_0 = GlobalMaxPooling1D()(conv_0) - attn_0 = AttentionWeightedAverage()(conv_0) - avg_0 = GlobalAveragePooling1D()(conv_0) - - maxpool_1 = GlobalMaxPooling1D()(conv_1) - attn_1 = AttentionWeightedAverage()(conv_1) - avg_1 = GlobalAveragePooling1D()(conv_1) - - maxpool_2 = GlobalMaxPooling1D()(conv_2) - attn_2 = AttentionWeightedAverage()(conv_2) - avg_2 = GlobalAveragePooling1D()(conv_2) - - maxpool_3 = GlobalMaxPooling1D()(conv_3) - attn_3 = AttentionWeightedAverage()(conv_3) - avg_3 = GlobalAveragePooling1D()(conv_3) - - v0_col = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], - **self.hyper_parameters['v0_col']) - v1_col = concatenate([attn_0, attn_1, attn_2, attn_3], - **self.hyper_parameters['v1_col']) - v2_col = concatenate([avg_1, avg_2, avg_0, avg_3], - **self.hyper_parameters['v2_col']) - merged_tensor = concatenate([v0_col, v1_col, v2_col], - **self.hyper_parameters['merged_tensor']) - output = Dropout(**self.hyper_parameters['dropout'])(merged_tensor) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - self.model = Model(base_model.inputs, output) - - def _compile_model(self): - optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), - self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) - self.model.compile(optimizer=optimizer, ** - self.hyper_parameters['compile_params']) - - -class KMaxCNNModel(ClassificationModel): - __architect_name__ = 'KMaxCNNModel' - __base_hyper_parameters__ = { - 'spatial_dropout': { - 'rate': 0.2 - }, - 'conv_0': { - 'filters': 180, - 'kernel_size': 1, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_1': { - 'filters': 180, - 'kernel_size': 2, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_2': { - 'filters': 180, - 'kernel_size': 3, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'conv_3': { - 'filters': 180, - 'kernel_size': 4, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu' - }, - 'maxpool_0': { - 'k': 3 - }, - 'maxpool_1': { - 'k': 3 - }, - 'maxpool_2': { - 'k': 3 - }, - 'maxpool_3': { - 'k': 3 - }, - 'merged_tensor': { - # 'mode': 'concat', - 'axis': 1 - }, - 'dropout': { - 'rate': 0.6 - }, - 'dense': { - 'units': 144, - 'activation': 'relu' - }, - 'activation_layer': { - 'activation': 'softmax' - }, - 'optimizer': { - 'module': 'keras.optimizers', - 'name': 'Adam', - 'params': { - 'lr': 1e-3, - 'decay': 1e-7 - } - }, - 'compile_params': { - 'loss': 'categorical_crossentropy', - # 'optimizer': 'adam', - 'metrics': ['accuracy'] - } - } - - def _prepare_model(self): - base_model = self.embedding.model - embedded_seq = SpatialDropout1D( - **self.hyper_parameters['spatial_dropout'])(base_model.output) - conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(embedded_seq) - conv_1 = Conv1D(**self.hyper_parameters['conv_1'])(embedded_seq) - conv_2 = Conv1D(**self.hyper_parameters['conv_2'])(embedded_seq) - conv_3 = Conv1D(**self.hyper_parameters['conv_3'])(embedded_seq) - - maxpool_0 = KMaxPooling(**self.hyper_parameters['maxpool_0'])(conv_0) - # maxpool_0f = Reshape((-1,))(maxpool_0) - maxpool_0f = Flatten()(maxpool_0) - maxpool_1 = KMaxPooling(**self.hyper_parameters['maxpool_1'])(conv_1) - # maxpool_1f = Reshape((-1,))(maxpool_1) - maxpool_1f = Flatten()(maxpool_1) - maxpool_2 = KMaxPooling(**self.hyper_parameters['maxpool_2'])(conv_2) - # maxpool_2f = Reshape((-1,))(maxpool_2) - maxpool_2f = Flatten()(maxpool_2) - maxpool_3 = KMaxPooling(**self.hyper_parameters['maxpool_3'])(conv_3) - # maxpool_3f = Reshape((-1,))(maxpool_3) - maxpool_3f = Flatten()(maxpool_3) - # maxpool_0 = GlobalMaxPooling1D()(conv_0) - # maxpool_1 = GlobalMaxPooling1D()(conv_1) - # maxpool_2 = GlobalMaxPooling1D()(conv_2) - # maxpool_3 = GlobalMaxPooling1D()(conv_3) - - # merged_tensor = concatenate([maxpool_0, maxpool_1, maxpool_2, maxpool_3], - # **self.hyper_parameters['merged_tensor']) - merged_tensor = concatenate([maxpool_0f, maxpool_1f, maxpool_2f, maxpool_3f], - **self.hyper_parameters['merged_tensor']) - # flatten = Reshape((-1,))(merged_tensor) - # output = Dropout(**self.hyper_parameters['dropout'])(flatten) - output = Dropout(**self.hyper_parameters['dropout'])(merged_tensor) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - self.model = Model(base_model.inputs, output) - - def _compile_model(self): - optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), - self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) - self.model.compile(optimizer=optimizer, ** - self.hyper_parameters['compile_params']) - - -class RCNNModel(ClassificationModel): - __architect_name__ = 'RCNNModel' - __base_hyper_parameters__ = { - 'spatial_dropout': { - 'rate': 0.2 - }, - 'rnn_0': { - 'units': 64, - 'return_sequences': True - }, - 'conv_0': { - 'filters': 128, - 'kernel_size': 2, - 'kernel_initializer': 'normal', - 'padding': 'valid', - 'activation': 'relu', - 'strides': 1 - }, - 'maxpool': {}, - 'attn': {}, - 'average': {}, - 'concat': { - 'axis': 1 - }, - 'dropout': { - 'rate': 0.5 - }, - 'dense': { - 'units': 120, - 'activation': 'relu' - }, - 'activation_layer': { - 'activation': 'softmax' - }, - 'optimizer': { - 'module': 'keras.optimizers', - 'name': 'Adam', - 'params': { - 'lr': 1e-3, - 'clipvalue': 5, - 'decay': 1e-5 - } - }, - 'compile_params': { - 'loss': 'categorical_crossentropy', - # 'optimizer': 'adam', - 'metrics': ['accuracy'] - } - } - - def _prepare_model(self): - base_model = self.embedding.model - embedded_seq = SpatialDropout1D( - **self.hyper_parameters['spatial_dropout'])(base_model.output) - rnn_0 = Bidirectional( - GRULayer(**self.hyper_parameters['rnn_0']))(embedded_seq) - conv_0 = Conv1D(**self.hyper_parameters['conv_0'])(rnn_0) - maxpool = GlobalMaxPooling1D()(conv_0) - attn = AttentionWeightedAverage()(conv_0) - average = GlobalAveragePooling1D()(conv_0) - - concatenated = concatenate([maxpool, attn, average], - **self.hyper_parameters['concat']) - output = Dropout(**self.hyper_parameters['dropout'])(concatenated) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - self.model = Model(base_model.inputs, output) - - def _compile_model(self): - optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), - self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) - self.model.compile(optimizer=optimizer, ** - self.hyper_parameters['compile_params']) - - -class AVRNNModel(ClassificationModel): - __architect_name__ = 'AVRNNModel' - __base_hyper_parameters__ = { - 'spatial_dropout': { - 'rate': 0.25 - }, - 'rnn_0': { - 'units': 60, - 'return_sequences': True - }, - 'rnn_1': { - 'units': 60, - 'return_sequences': True - }, - 'concat_rnn': { - 'axis': 2 - }, - 'last': {}, - 'maxpool': {}, - 'attn': {}, - 'average': {}, - 'all_views': { - 'axis': 1 - }, - 'dropout': { - 'rate': 0.5 - }, - 'dense': { - 'units': 144, - 'activation': 'relu' - }, - 'activation_layer': { - 'activation': 'softmax' - }, - 'optimizer': { - 'module': 'keras.optimizers', - 'name': 'Adam', - 'params': { - 'lr': 1e-3, - 'clipvalue': 5, - 'decay': 1e-6 - } - }, - 'compile_params': { - 'loss': 'categorical_crossentropy', - # 'optimizer': 'adam', - 'metrics': ['accuracy'] - } - } - - def _prepare_model(self): - base_model = self.embedding.model - embedded_seq = SpatialDropout1D( - **self.hyper_parameters['spatial_dropout'])(base_model.output) - rnn_0 = Bidirectional( - GRULayer(**self.hyper_parameters['rnn_0']))(embedded_seq) - rnn_1 = Bidirectional( - GRULayer(**self.hyper_parameters['rnn_1']))(rnn_0) - concat_rnn = concatenate([rnn_0, rnn_1], - **self.hyper_parameters['concat_rnn']) - - last = Lambda(lambda t: t[:, -1], name='last')(concat_rnn) - maxpool = GlobalMaxPooling1D()(concat_rnn) - attn = AttentionWeightedAverage()(concat_rnn) - average = GlobalAveragePooling1D()(concat_rnn) - - all_views = concatenate([last, maxpool, attn, average], - **self.hyper_parameters['all_views']) - output = Dropout(**self.hyper_parameters['dropout'])(all_views) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - self.model = Model(base_model.inputs, output) - - def _compile_model(self): - optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), - self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) - self.model.compile(optimizer=optimizer, ** - self.hyper_parameters['compile_params']) - - -class DropoutBGRUModel(ClassificationModel): - __architect_name__ = 'DropoutBGRUModel' - __base_hyper_parameters__ = { - 'spatial_dropout': { - 'rate': 0.15 - }, - 'rnn_0': { - 'units': 64, - 'return_sequences': True - }, - 'dropout_rnn': { - 'rate': 0.35 - }, - 'rnn_1': { - 'units': 64, - 'return_sequences': True - }, - 'last': {}, - 'maxpool': {}, - 'average': {}, - 'all_views': { - 'axis': 1 - }, - 'dropout': { - 'rate': 0.5 - }, - 'dense': { - 'units': 72, - 'activation': 'relu' - }, - 'activation_layer': { - 'activation': 'softmax' - }, - 'optimizer': { - 'module': 'keras.optimizers', - 'name': 'Adam', - 'params': { - 'lr': 1e-3, - 'decay': 0.0 - } - }, - 'compile_params': { - 'loss': 'categorical_crossentropy', - # 'optimizer': 'adam', - 'metrics': ['accuracy'] - } - } - - def _prepare_model(self): - base_model = self.embedding.model - embedded_seq = SpatialDropout1D( - **self.hyper_parameters['spatial_dropout'])(base_model.output) - rnn_0 = Bidirectional( - GRULayer(**self.hyper_parameters['rnn_0']))(embedded_seq) - dropout_rnn = Dropout(**self.hyper_parameters['dropout_rnn'])(rnn_0) - rnn_1 = Bidirectional( - GRULayer(**self.hyper_parameters['rnn_1']))(dropout_rnn) - last = Lambda(lambda t: t[:, -1], name='last')(rnn_1) - maxpool = GlobalMaxPooling1D()(rnn_1) - # attn = AttentionWeightedAverage()(rnn_1) - average = GlobalAveragePooling1D()(rnn_1) - - all_views = concatenate([last, maxpool, average], - **self.hyper_parameters['all_views']) - output = Dropout(**self.hyper_parameters['dropout'])(all_views) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - self.model = Model(base_model.inputs, output) - - def _compile_model(self): - optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), - self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) - self.model.compile(optimizer=optimizer, ** - self.hyper_parameters['compile_params']) - - -class DropoutAVRNNModel(ClassificationModel): - __architect_name__ = 'DropoutAVRNNModel' - __base_hyper_parameters__ = { - 'spatial_dropout': { - 'rate': 0.25 - }, - 'rnn_0': { - 'units': 56, - 'return_sequences': True - }, - 'rnn_dropout': { - 'rate': 0.3 - }, - 'rnn_1': { - 'units': 56, - 'return_sequences': True - }, - 'last': {}, - 'maxpool': {}, - 'attn': {}, - 'average': {}, - 'all_views': { - 'axis': 1 - }, - 'dropout_0': { - 'rate': 0.5 - }, - 'dense': { - 'units': 128, - 'activation': 'relu' - }, - 'dropout_1': { - 'rate': 0.25 - }, - 'activation_layer': { - 'activation': 'softmax' - }, - 'optimizer': { - 'module': 'keras.optimizers', - 'name': 'Adam', - 'params': { - 'lr': 1e-3, - 'clipvalue': 5, - 'decay': 1e-7 - } - }, - 'compile_params': { - 'loss': 'categorical_crossentropy', - # 'optimizer': 'adam', - 'metrics': ['accuracy'] - } - } - - def _prepare_model(self): - base_model = self.embedding.model - embedded_seq = SpatialDropout1D( - **self.hyper_parameters['spatial_dropout'])(base_model.output) - rnn_0 = Bidirectional( - GRULayer(**self.hyper_parameters['rnn_0']))(embedded_seq) - rnn_dropout = SpatialDropout1D( - **self.hyper_parameters['rnn_dropout'])(rnn_0) - rnn_1 = Bidirectional( - GRULayer(**self.hyper_parameters['rnn_1']))(rnn_dropout) - - last = Lambda(lambda t: t[:, -1], name='last')(rnn_1) - maxpool = GlobalMaxPooling1D()(rnn_1) - attn = AttentionWeightedAverage()(rnn_1) - average = GlobalAveragePooling1D()(rnn_1) - - all_views = concatenate([last, maxpool, attn, average], - **self.hyper_parameters['all_views']) - output = Dropout(**self.hyper_parameters['dropout_0'])(all_views) - output = Dense(**self.hyper_parameters['dense'])(output) - output = Dropout(**self.hyper_parameters['dropout_1'])(output) - output = Dense(len(self.label2idx), - **self.hyper_parameters['activation_layer'])(output) - - self.model = Model(base_model.inputs, output) - - def _compile_model(self): - optimizer = getattr(eval(self.hyper_parameters['optimizer']['module']), - self.hyper_parameters['optimizer']['name'])( - **self.hyper_parameters['optimizer']['params']) - self.model.compile(optimizer=optimizer, ** - self.hyper_parameters['compile_params']) - - -if __name__ == '__main__': - from kashgari.corpus import TencentDingdangSLUCorpus - from kashgari.embeddings import WordEmbeddings, BERTEmbedding - - train_x, train_y = TencentDingdangSLUCorpus.get_classification_data() - - w2v = WordEmbeddings('sgns.weibo.bigram', - sequence_length=15, - limit=5000) - bert = BERTEmbedding('bert-base-chinese', sequence_length=15) - t_model = CNNModel(bert) - t_model.fit(train_x, train_y, epochs=1) -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: __init__.py -@time: 2019-01-21 - -""" - -from .blstm_model import BLSTMModel -from .blstm_crf_model import BLSTMCRFModel -from .cnn_lstm_model import CNNLSTMModel -from .base_model import SequenceLabelingModel - - -if __name__ == '__main__': - print("hello, world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: base_model -@time: 2019-01-21 - -""" -import random -import logging -from typing import Tuple, Dict - -import numpy as np -from keras.preprocessing import sequence -from keras.utils import to_categorical, multi_gpu_model -from seqeval.metrics import classification_report -from seqeval.metrics.sequence_labeling import get_entities - -import kashgari.macros as k -from kashgari.utils import helper -from kashgari.type_hints import * - -from kashgari.tasks.base import BaseModel -from kashgari.embeddings import BaseEmbedding - - -class SequenceLabelingModel(BaseModel): - - def __init__(self, embedding: BaseEmbedding = None, hyper_parameters: Dict = None, **kwargs): - super(SequenceLabelingModel, self).__init__( - embedding, hyper_parameters, **kwargs) - self.task = 'sequence_labeling' - - @property - def label2idx(self) -> Dict[str, int]: - return self._label2idx - - @property - def token2idx(self) -> Dict[str, int]: - return self.embedding.token2idx - - @label2idx.setter - def label2idx(self, value): - self._label2idx = value - self._idx2label = dict([(val, key) for (key, val) in value.items()]) - - def _prepare_model(self): - """ - prepare model function - :return: - """ - raise NotImplementedError() - - def _compile_model(self): - """ - compile model function - :return: - """ - raise NotImplementedError() - - def build_model(self, - x_train: List[List[str]], - y_train: List[List[str]], - x_validate: List[List[str]] = None, - y_validate: List[List[str]] = None, - labels_weight: bool = None, - default_labels_weight: float = 50.0, - ): - assert len(x_train) == len(y_train) - self.build_token2id_label2id_dict( - x_train, y_train, x_validate, y_validate) - - if not self.model: - if self.embedding.sequence_length == 0: - self.embedding.sequence_length = sorted([len(x) for x in x_train])[ - int(0.95 * len(x_train))] - logging.info('sequence length set to {}'.format( - self.embedding.sequence_length)) - - if labels_weight: - weights = [] - initial_weights = { - k.PAD: 1, - k.BOS: 1, - k.EOS: 1, - 'O': 1 - } - for label in self.label2idx.keys(): - weights.append(initial_weights.get( - label, default_labels_weight)) - loss_f = helper.weighted_categorical_crossentropy( - np.array(weights)) - self.model_info['loss'] = { - 'func': 'weighted_categorical_crossentropy', - 'weights': weights - } - - self._prepare_model() - self._compile_model() - self.model.summary() - - def build_token2id_label2id_dict(self, - x_train: List[List[str]], - y_train: List[List[str]], - x_validate: List[List[str]] = None, - y_validate: List[List[str]] = None): - for index in range(len(x_train)): - assert len(x_train[index]) == len(y_train[index]) - x_data = x_train - y_data = y_train - if x_validate: - x_data = x_train + x_validate - y_data = y_data + y_validate - self.embedding.build_token2idx_dict(x_data, 3) - - label_set = [] - for seq in y_data: - for y in seq: - if y not in label_set: - label_set.append(y) - - label2idx = { - k.PAD: 0, - k.BOS: 1, - k.EOS: 2 - } - label_set = [i for i in label_set if i not in label2idx] - for label in label_set: - label2idx[label] = len(label2idx) - - self.label2idx = label2idx - - def convert_labels_to_idx(self, - label: Union[List[List[str]], List[str]], - add_eos_bos: bool = True) -> Union[List[List[int]], List[int]]: - - def tokenize_tokens(seq: List[str]): - tokens = [self._label2idx[i] for i in seq] - if add_eos_bos: - if k.config.sequence_labeling_tokenize_add_bos_eos: - tokens = [self._label2idx[k.BOS]] + \ - tokens + [self._label2idx[k.EOS]] - else: - tokens = [self._label2idx[k.NO_TAG]] + \ - tokens + [self._label2idx[k.NO_TAG]] - return tokens - - if isinstance(label[0], str): - return tokenize_tokens(label) - else: - return [tokenize_tokens(l) for l in label] - - def convert_idx_to_labels(self, - idx: Union[List[List[int]], List[int]], - tokens_length: Union[List[int], int], - remove_eos_bos: bool = True) -> Union[List[str], str]: - - def reverse_tokenize_tokens(idx_item, seq_length): - if remove_eos_bos: - seq = idx_item[1: 1 + seq_length] - else: - seq = idx_item - tokens = [self._idx2label[i] for i in seq] - return tokens - - if isinstance(idx[0], int): - return reverse_tokenize_tokens(idx, tokens_length) - else: - labels = [] - for index in range(len(idx)): - idx_item = idx[index] - seq_length = tokens_length[index] - labels.append(reverse_tokenize_tokens(idx_item, seq_length)) - return labels - - def get_data_generator(self, - x_data: List[List[str]], - y_data: List[List[str]], - batch_size: int = 64): - is_bert = self.embedding.embedding_type == 'bert' - while True: - page_list = list(range((len(x_data) // batch_size) + 1)) - random.shuffle(page_list) - for page in page_list: - start_index = page * batch_size - end_index = start_index + batch_size - target_x = x_data[start_index: end_index] - target_y = y_data[start_index: end_index] - if len(target_x) == 0: - target_x = x_data[0: batch_size] - target_y = y_data[0: batch_size] - - tokenized_x = self.embedding.tokenize(target_x) - tokenized_y = self.convert_labels_to_idx(target_y) - - padded_x = sequence.pad_sequences(tokenized_x, - maxlen=self.embedding.sequence_length, - padding='post', truncating='post') - padded_y = sequence.pad_sequences(tokenized_y, - maxlen=self.embedding.sequence_length, - padding='post', truncating='post') - - one_hot_y = to_categorical( - padded_y, num_classes=len(self.label2idx)) - - if is_bert: - padded_x_seg = np.zeros( - shape=(len(padded_x), self.embedding.sequence_length)) - x_input_data = [padded_x, padded_x_seg] - else: - x_input_data = padded_x - yield (x_input_data, one_hot_y) - - def fit(self, - x_train: List[List[str]], - y_train: List[List[str]], - x_validate: List[List[str]] = None, - y_validate: List[List[str]] = None, - batch_size: int = 64, - epochs: int = 5, - labels_weight: bool = None, - default_labels_weight: float = 50.0, - fit_kwargs: Dict = None, - **kwargs): - """ - - :param x_train: list of training data. - :param y_train: list of training target label data. - :param batch_size: batch size for trainer model - :param epochs: Number of epochs to train the model. - :param x_validate: list of validation data. - :param y_validate: list of validation target label data. - :param y_validate: list of validation target label data. - :param y_validate: list of validation target label data. - :param labels_weight: set class weights for imbalanced classes - :param default_labels_weight: default weight for labels not in labels_weight dict - :param fit_kwargs: additional kwargs to be passed to - :func:`~keras.models.Model.fit` - :return: - """ - if not self.model: - self.build_model(x_train, y_train, x_validate, - y_validate, labels_weight, default_labels_weight) - if len(x_train) < batch_size: - batch_size = len(x_train) // 2 - - train_generator = self.get_data_generator(x_train, - y_train, - batch_size) - - if fit_kwargs is None: - fit_kwargs = {} - - if x_validate: - validation_generator = self.get_data_generator(x_validate, - y_validate, - batch_size) - - fit_kwargs['validation_data'] = validation_generator - fit_kwargs['validation_steps'] = len(x_validate) // batch_size - - self.model.fit_generator(train_generator, - steps_per_epoch=len(x_train) // batch_size, - epochs=epochs, - **fit_kwargs) - - def _format_output_dic(self, words: List[str], tags: List[str], chunk_joiner: str): - chunks = get_entities(tags) - res = { - 'words': words, - 'entities': [] - } - for chunk_type, chunk_start, chunk_end in chunks: - chunk_end += 1 - entity = { - 'text': chunk_joiner.join(words[chunk_start: chunk_end]), - 'type': chunk_type, - # 'score': float(np.average(prob[chunk_start: chunk_end])), - 'beginOffset': chunk_start, - 'endOffset': chunk_end - } - res['entities'].append(entity) - return res - - def predict(self, - sentence: Union[List[str], List[List[str]]], - batch_size=None, - output_dict=False, - chunk_joiner=' ', - debug_info=False): - """ - predict with model - :param sentence: input for predict, accept a single sentence as type List[str] or - list of sentence as List[List[str]] - :param batch_size: predict batch_size - :param output_dict: return dict with result with confidence - :param chunk_joiner: the char to join the chunks when output dict - :param debug_info: print debug info using logging.debug when True - :return: - """ - tokens = self.embedding.tokenize(sentence) - is_list = not isinstance(sentence[0], str) - if is_list: - seq_length = [len(item) for item in sentence] - padded_tokens = sequence.pad_sequences(tokens, - maxlen=self.embedding.sequence_length, - padding='post', truncating='post') - else: - seq_length = [len(sentence)] - padded_tokens = sequence.pad_sequences([tokens], - maxlen=self.embedding.sequence_length, - padding='post', truncating='post') - if self.embedding.is_bert: - x = [padded_tokens, np.zeros( - shape=(len(padded_tokens), self.embedding.sequence_length))] - else: - x = padded_tokens - - predict_result_prob = self.model.predict(x, batch_size=batch_size) - predict_result = predict_result_prob.argmax(-1) - if debug_info: - logging.info('input: {}'.format(x)) - logging.info('output: {}'.format(predict_result_prob)) - logging.info('output argmax: {}'.format(predict_result)) - - result: List[List[str]] = self.convert_idx_to_labels( - predict_result, seq_length) - if output_dict: - dict_list = [] - if is_list: - sentence_list: List[List[str]] = sentence - else: - sentence_list: List[List[str]] = [sentence] - for index in range(len(sentence_list)): - dict_list.append(self._format_output_dic(sentence_list[index], - result[index], - chunk_joiner)) - if is_list: - return dict_list - else: - return dict_list[0] - else: - if is_list: - return result - else: - return result[0] - - def evaluate(self, x_data, y_data, batch_size=None, digits=4, debug_info=False) -> Tuple[float, float, Dict]: - seq_length = [len(x) for x in x_data] - tokenized_y = self.convert_labels_to_idx(y_data) - padded_y = sequence.pad_sequences(tokenized_y, - maxlen=self.embedding.sequence_length, - padding='post', truncating='post') - y_true = self.convert_idx_to_labels(padded_y, seq_length) - y_pred = self.predict(x_data, batch_size=batch_size) - if debug_info: - for index in random.sample(list(range(len(x_data))), 5): - logging.debug('------ sample {} ------'.format(index)) - logging.debug('x : {}'.format(x_data[index])) - logging.debug('y_true : {}'.format(y_true[index])) - logging.debug('y_pred : {}'.format(y_pred[index])) - report = classification_report(y_true, y_pred, digits=digits) - print(classification_report(y_true, y_pred, digits=digits)) - return report -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: blstm_crf_model.py -@time: 2019-01-23 17:02 - -""" -from keras.layers import Dense, Bidirectional -from keras.layers.recurrent import LSTM -from keras.models import Model - -from kashgari.utils.crf import CRF, crf_loss, crf_accuracy - -from kashgari.tasks.seq_labeling.base_model import SequenceLabelingModel - - -class BLSTMCRFModel(SequenceLabelingModel): - __architect_name__ = 'BLSTMCRFModel' - __base_hyper_parameters__ = { - 'lstm_layer': { - 'units': 256, - 'return_sequences': True - }, - 'dense_layer': { - 'units': 64, - 'activation': 'tanh' - } - } - - def _prepare_model(self): - base_model = self.embedding.model - blstm_layer = Bidirectional( - LSTM(**self.hyper_parameters['lstm_layer']))(base_model.output) - dense_layer = Dense(128, activation='tanh')(blstm_layer) - crf = CRF(len(self.label2idx), sparse_target=False) - crf_layer = crf(dense_layer) - self.model = Model(base_model.inputs, crf_layer) - - # TODO: Allow custom loss and optimizer - def _compile_model(self): - self.model.compile(loss=crf_loss, - optimizer='adam', - metrics=[crf_accuracy]) - - -if __name__ == "__main__": - print("Hello world") - from kashgari.utils.logger import init_logger - - init_logger() - from kashgari.corpus import ChinaPeoplesDailyNerCorpus - - init_logger() - - x_train, y_train = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data() - x_validate, y_validate = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data( - data_type='validate') - x_test, y_test = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data( - data_type='test') - - tagger = BLSTMCRFModel() - tagger.fit(x_train, y_train, epochs=2) - tagger.evaluate(x_validate, y_validate) - tagger.evaluate(x_test, y_test, debug_info=True) - - model = BLSTMCRFModel.load_model( - '/Users/brikerman/Downloads/KashgariNER.output/model') - model.evaluate(x_test, y_test, debug_info=True) -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: blstm_model -@time: 2019-01-21 - -""" - -import logging - -from keras.layers import Bidirectional, LSTM -from keras.layers import Dense, Dropout, TimeDistributed, Activation -from keras.models import Model - -from kashgari.tasks.seq_labeling.base_model import SequenceLabelingModel - - -class BLSTMModel(SequenceLabelingModel): - __architect_name__ = 'BLSTMModel' - __base_hyper_parameters__ = { - 'lstm_layer': { - 'units': 256, - 'return_sequences': True - }, 'dropout_layer': { - 'rate': 0.4 - } - } - - def _prepare_model(self): - embed_model = self.embedding.model - - blstm_layer = Bidirectional( - LSTM(**self.hyper_parameters['lstm_layer']))(embed_model.output) - dropout_layer = Dropout( - **self.hyper_parameters['dropout_layer'])(blstm_layer) - time_distributed_layer = TimeDistributed( - Dense(len(self.label2idx)))(dropout_layer) - activation = Activation('softmax')(time_distributed_layer) - - self.model = Model(embed_model.inputs, activation) - - # TODO: Allow custom loss and optimizer - def _compile_model(self): - loss_f = 'categorical_crossentropy' - optimizer = 'adam' - metrics = ['accuracy'] - - self.model.compile(loss=loss_f, - optimizer=optimizer, - metrics=metrics) - - -if __name__ == '__main__': - import random - from keras.callbacks import ModelCheckpoint - from kashgari.corpus import ChinaPeoplesDailyNerCorpus - - x_train, y_train = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data() - x_validate, y_validate = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data( - data_type='validate') - x_test, y_test = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data( - data_type='test') - - # embedding = WordEmbeddings('sgns.weibo.bigram', sequence_length=100) - m = BLSTMModel() - - check = ModelCheckpoint('./model.model', - monitor='acc', - verbose=1, - save_best_only=False, - save_weights_only=False, - mode='auto', - period=1) - m.fit(x_train, - y_train, - class_weight=True, - epochs=1, y_validate=y_validate, x_validate=x_validate, labels_weight=True) - - sample_queries = random.sample(list(range(len(x_train))), 10) - for i in sample_queries: - text = x_train[i] - logging.info('-------- sample {} --------'.format(i)) - logging.info('x: {}'.format(text)) - logging.info('y_true: {}'.format(y_train[i])) - logging.info('y_pred: {}'.format(m.predict(text))) - - m.evaluate(x_test, y_test) -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: cnn_lstm_model.py -@time: 2019-01-24 15:27 - -""" -from keras.layers import Dense, Conv1D, TimeDistributed, Activation -from keras.layers.recurrent import LSTM -from keras.models import Model - -from kashgari.tasks.seq_labeling.base_model import SequenceLabelingModel - - -class CNNLSTMModel(SequenceLabelingModel): - __architect_name__ = 'CNNLSTMModel' - __base_hyper_parameters__ = { - 'conv_layer': { - 'filters': 32, - 'kernel_size': 3, - 'padding': 'same', - 'activation': 'relu' - }, - 'max_pool_layer': { - 'pool_size': 2 - }, - 'lstm_layer': { - 'units': 100, - 'return_sequences': True - } - } - - def _prepare_model(self): - base_model = self.embedding.model - conv_layer = Conv1D( - **self.hyper_parameters['conv_layer'])(base_model.output) - # max_pool_layer = MaxPooling1D(**self.hyper_parameters['max_pool_layer'])(conv_layer) - lstm_layer = LSTM(**self.hyper_parameters['lstm_layer'])(conv_layer) - time_distributed_layer = TimeDistributed( - Dense(len(self.label2idx)))(lstm_layer) - activation = Activation('softmax')(time_distributed_layer) - output_layers = [activation] - - self.model = Model(base_model.inputs, output_layers) - - # TODO: Allow custom loss and optimizer - def _compile_model(self): - self.model.compile(loss='categorical_crossentropy', - optimizer='adam', - metrics=['accuracy']) - - -if __name__ == "__main__": - print("Hello world") - from kashgari.utils.logger import init_logger - from kashgari.corpus import ChinaPeoplesDailyNerCorpus - - init_logger() - - x_train, y_train = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data() - x_validate, y_validate = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data( - data_type='validate') - x_test, y_test = ChinaPeoplesDailyNerCorpus.get_sequence_tagging_data( - data_type='test') - - classifier = CNNLSTMModel() - classifier.fit(x_train, y_train, epochs=2) - classifier.evaluate(x_validate, y_validate) - classifier.evaluate(x_test, y_train) -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: __init__.py.py -@time: 2019-01-23 17:08 - -""" -from .crf import CRF -from .crf_losses import crf_loss -from .crf_accuracies import crf_accuracy - -if __name__ == "__main__": - print("Hello world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: crf_layer.py -@time: 2019-01-23 17:07 - -""" -from __future__ import absolute_import -from __future__ import division - -import warnings - -from keras import backend as K -from keras import activations -from keras import initializers -from keras import regularizers -from keras import constraints -from keras.layers import Layer -from keras.layers import InputSpec - -from kashgari.utils.crf import crf_losses -from kashgari.utils.crf.crf_accuracies import crf_marginal_accuracy -from kashgari.utils.crf.crf_accuracies import crf_viterbi_accuracy - -""" -from https://github.com/keras-team/keras-contrib -""" - - -class CRF(Layer): - """An implementation of linear chain conditional random field (CRF). - - An linear chain CRF is defined to maximize the following likelihood function: - - $$ L(W, U, b; y_1, ..., y_n) := \frac{1}{Z} - \sum_{y_1, ..., y_n} \exp(-a_1' y_1 - a_n' y_n - - \sum_{k=1^n}((f(x_k' W + b) y_k) + y_1' U y_2)), $$ - - where: - $Z$: normalization constant - $x_k, y_k$: inputs and outputs - - This implementation has two modes for optimization: - 1. (`join mode`) optimized by maximizing join likelihood, - which is optimal in theory of statistics. - Note that in this case, CRF must be the output/last layer. - 2. (`marginal mode`) return marginal probabilities on each time - step and optimized via composition - likelihood (product of marginal likelihood), i.e., - using `categorical_crossentropy` loss. - Note that in this case, CRF can be either the last layer or an - intermediate layer (though not explored). - - For prediction (test phrase), one can choose either Viterbi - best path (class indices) or marginal - probabilities if probabilities are needed. - However, if one chooses *join mode* for training, - Viterbi output is typically better than marginal output, - but the marginal output will still perform - reasonably close, while if *marginal mode* is used for training, - marginal output usually performs - much better. The default behavior and `metrics.crf_accuracy` - is set according to this observation. - - In addition, this implementation supports masking and accepts either - onehot or sparse target. - - If you open a issue or a pull request about CRF, please - add 'cc @lzfelix' to notify Luiz Felix. - - - # Examples - - ```python - from keras_contrib.layers import CRF - from keras_contrib.losses import crf_loss - from keras_contrib.metrics import crf_viterbi_accuracy - - model = Sequential() - model.add(Embedding(3001, 300, mask_zero=True)(X) - - # use learn_mode = 'join', test_mode = 'viterbi', - # sparse_target = True (label indice output) - crf = CRF(10, sparse_target=True) - model.add(crf) - - # crf_accuracy is default to Viterbi acc if using join-mode (default). - # One can add crf.marginal_acc if interested, but may slow down learning - model.compile('adam', loss=crf_loss, metrics=[crf_viterbi_accuracy]) - - # y must be label indices (with shape 1 at dim 3) here, - # since `sparse_target=True` - model.fit(x, y) - - # prediction give onehot representation of Viterbi best path - y_hat = model.predict(x_test) - ``` - - The following snippet shows how to load a persisted - model that uses the CRF layer: - - ```python - from keras.models import load_model - from keras_contrib.losses import import crf_loss - from keras_contrib.metrics import crf_viterbi_accuracy - - custom_objects={'CRF': CRF, - 'crf_loss': crf_loss, - 'crf_viterbi_accuracy': crf_viterbi_accuracy} - - loaded_model = load_model('', - custom_objects=custom_objects) - ``` - - # Arguments - units: Positive integer, dimensionality of the output space. - learn_mode: Either 'join' or 'marginal'. - The former train the model by maximizing join likelihood while the latter - maximize the product of marginal likelihood over all time steps. - One should use `losses.crf_nll` for 'join' mode - and `losses.categorical_crossentropy` or - `losses.sparse_categorical_crossentropy` for - `marginal` mode. For convenience, simply - use `losses.crf_loss`, which will decide the proper loss as described. - test_mode: Either 'viterbi' or 'marginal'. - The former is recommended and as default when `learn_mode = 'join'` and - gives one-hot representation of the best path at test (prediction) time, - while the latter is recommended and chosen as default - when `learn_mode = 'marginal'`, - which produces marginal probabilities for each time step. - For evaluating metrics, one should - use `metrics.crf_viterbi_accuracy` for 'viterbi' mode and - 'metrics.crf_marginal_accuracy' for 'marginal' mode, or - simply use `metrics.crf_accuracy` for - both which automatically decides it as described. - One can also use both for evaluation at training. - sparse_target: Boolean (default False) indicating - if provided labels are one-hot or - indices (with shape 1 at dim 3). - use_boundary: Boolean (default True) indicating if trainable - start-end chain energies - should be added to model. - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - chain_initializer: Initializer for the `chain_kernel` weights matrix, - used for the CRF chain energy. - (see [initializers](../initializers.md)). - boundary_initializer: Initializer for the `left_boundary`, - 'right_boundary' weights vectors, - used for the start/left and end/right boundary energy. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - activation: Activation function to use - (see [activations](../activations.md)). - If you pass None, no activation is applied - (ie. "linear" activation: `a(x) = x`). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - chain_regularizer: Regularizer function applied to - the `chain_kernel` weights matrix - (see [regularizer](../regularizers.md)). - boundary_regularizer: Regularizer function applied to - the 'left_boundary', 'right_boundary' weight vectors - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - chain_constraint: Constraint function applied to - the `chain_kernel` weights matrix - (see [constraints](../constraints.md)). - boundary_constraint: Constraint function applied to - the `left_boundary`, `right_boundary` weights vectors - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - input_dim: dimensionality of the input (integer). - This argument (or alternatively, the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - unroll: Boolean (default False). If True, the network will be - unrolled, else a symbolic loop will be used. - Unrolling can speed-up a RNN, although it tends - to be more memory-intensive. - Unrolling is only suitable for short sequences. - - # Input shape - 3D tensor with shape `(nb_samples, timesteps, input_dim)`. - - # Output shape - 3D tensor with shape `(nb_samples, timesteps, units)`. - - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an [Embedding](embeddings.md) layer with the `mask_zero` parameter - set to `True`. - - """ - - def __init__(self, units, - learn_mode='join', - test_mode=None, - sparse_target=False, - use_boundary=True, - use_bias=True, - activation='linear', - kernel_initializer='glorot_uniform', - chain_initializer='orthogonal', - bias_initializer='zeros', - boundary_initializer='zeros', - kernel_regularizer=None, - chain_regularizer=None, - boundary_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - chain_constraint=None, - boundary_constraint=None, - bias_constraint=None, - input_dim=None, - unroll=False, - **kwargs): - super(CRF, self).__init__(**kwargs) - self.supports_masking = True - self.units = units - self.learn_mode = learn_mode - assert self.learn_mode in ['join', 'marginal'] - self.test_mode = test_mode - if self.test_mode is None: - self.test_mode = 'viterbi' if self.learn_mode == 'join' else 'marginal' - else: - assert self.test_mode in ['viterbi', 'marginal'] - self.sparse_target = sparse_target - self.use_boundary = use_boundary - self.use_bias = use_bias - - self.activation = activations.get(activation) - - self.kernel_initializer = initializers.get(kernel_initializer) - self.chain_initializer = initializers.get(chain_initializer) - self.boundary_initializer = initializers.get(boundary_initializer) - self.bias_initializer = initializers.get(bias_initializer) - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.chain_regularizer = regularizers.get(chain_regularizer) - self.boundary_regularizer = regularizers.get(boundary_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.chain_constraint = constraints.get(chain_constraint) - self.boundary_constraint = constraints.get(boundary_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.unroll = unroll - - def build(self, input_shape): - self.input_spec = [InputSpec(shape=input_shape)] - self.input_dim = input_shape[-1] - - self.kernel = self.add_weight(shape=(self.input_dim, self.units), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.chain_kernel = self.add_weight(shape=(self.units, self.units), - name='chain_kernel', - initializer=self.chain_initializer, - regularizer=self.chain_regularizer, - constraint=self.chain_constraint) - if self.use_bias: - self.bias = self.add_weight(shape=(self.units,), - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = 0 - - if self.use_boundary: - self.left_boundary = self.add_weight(shape=(self.units,), - name='left_boundary', - initializer=self.boundary_initializer, - regularizer=self.boundary_regularizer, - constraint=self.boundary_constraint) - self.right_boundary = self.add_weight(shape=(self.units,), - name='right_boundary', - initializer=self.boundary_initializer, - regularizer=self.boundary_regularizer, - constraint=self.boundary_constraint) - self.built = True - - def call(self, X, mask=None): - if mask is not None: - assert K.ndim( - mask) == 2, 'Input mask to CRF must have dim 2 if not None' - - if self.test_mode == 'viterbi': - test_output = self.viterbi_decoding(X, mask) - else: - test_output = self.get_marginal_prob(X, mask) - - self.uses_learning_phase = True - if self.learn_mode == 'join': - train_output = K.zeros_like(K.dot(X, self.kernel)) - out = K.in_train_phase(train_output, test_output) - else: - if self.test_mode == 'viterbi': - train_output = self.get_marginal_prob(X, mask) - out = K.in_train_phase(train_output, test_output) - else: - out = test_output - return out - - def compute_output_shape(self, input_shape): - return input_shape[:2] + (self.units,) - - def compute_mask(self, input, mask=None): - if mask is not None and self.learn_mode == 'join': - return K.any(mask, axis=1) - return mask - - def get_config(self): - config = { - 'units': self.units, - 'learn_mode': self.learn_mode, - 'test_mode': self.test_mode, - 'use_boundary': self.use_boundary, - 'use_bias': self.use_bias, - 'sparse_target': self.sparse_target, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'chain_initializer': initializers.serialize(self.chain_initializer), - 'boundary_initializer': initializers.serialize( - self.boundary_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'activation': activations.serialize(self.activation), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'chain_regularizer': regularizers.serialize(self.chain_regularizer), - 'boundary_regularizer': regularizers.serialize( - self.boundary_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'chain_constraint': constraints.serialize(self.chain_constraint), - 'boundary_constraint': constraints.serialize(self.boundary_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'input_dim': self.input_dim, - 'unroll': self.unroll} - base_config = super(CRF, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - # @property - # def loss_function(self): - # warnings.warn('CRF.loss_function is deprecated ' - # 'and it might be removed in the future. Please ' - # 'use losses.crf_loss instead.') - # return crf_losses - # - # @property - # def accuracy(self): - # warnings.warn('CRF.accuracy is deprecated and it ' - # 'might be removed in the future. Please ' - # 'use metrics.crf_accuracy') - # if self.test_mode == 'viterbi': - # return crf_viterbi_accuracy - # else: - # return crf_marginal_accuracy - # - # @property - # def viterbi_acc(self): - # warnings.warn('CRF.viterbi_acc is deprecated and it might ' - # 'be removed in the future. Please ' - # 'use metrics.viterbi_acc instead.') - # return crf_viterbi_accuracy - # - # @property - # def marginal_acc(self): - # warnings.warn('CRF.moarginal_acc is deprecated and it ' - # 'might be removed in the future. Please ' - # 'use metrics.marginal_acc instead.') - # return crf_marginal_accuracy - - @staticmethod - def softmaxNd(x, axis=-1): - m = K.max(x, axis=axis, keepdims=True) - exp_x = K.exp(x - m) - prob_x = exp_x / K.sum(exp_x, axis=axis, keepdims=True) - return prob_x - - @staticmethod - def shift_left(x, offset=1): - assert offset > 0 - return K.concatenate([x[:, offset:], K.zeros_like(x[:, :offset])], axis=1) - - @staticmethod - def shift_right(x, offset=1): - assert offset > 0 - return K.concatenate([K.zeros_like(x[:, :offset]), x[:, :-offset]], axis=1) - - def add_boundary_energy(self, energy, mask, start, end): - start = K.expand_dims(K.expand_dims(start, 0), 0) - end = K.expand_dims(K.expand_dims(end, 0), 0) - if mask is None: - energy = K.concatenate([energy[:, :1, :] + start, energy[:, 1:, :]], - axis=1) - energy = K.concatenate([energy[:, :-1, :], energy[:, -1:, :] + end], - axis=1) - else: - mask = K.expand_dims(K.cast(mask, K.floatx())) - start_mask = K.cast( - K.greater(mask, self.shift_right(mask)), K.floatx()) - end_mask = K.cast( - K.greater(self.shift_left(mask), mask), K.floatx()) - energy = energy + start_mask * start - energy = energy + end_mask * end - return energy - - def get_log_normalization_constant(self, input_energy, mask, **kwargs): - """Compute logarithm of the normalization constant Z, where - Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ - """ - # should have logZ[:, i] == logZ[:, j] for any i, j - logZ = self.recursion(input_energy, mask, - return_sequences=False, **kwargs) - return logZ[:, 0] - - def get_energy(self, y_true, input_energy, mask): - """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3 - """ - input_energy = K.sum(input_energy * y_true, 2) # (B, T) - # (B, T-1) - chain_energy = K.sum(K.dot(y_true[:, :-1, :], - self.chain_kernel) * y_true[:, 1:, :], 2) - - if mask is not None: - mask = K.cast(mask, K.floatx()) - # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding - chain_mask = mask[:, :-1] * mask[:, 1:] - input_energy = input_energy * mask - chain_energy = chain_energy * chain_mask - total_energy = K.sum(input_energy, -1) + \ - K.sum(chain_energy, -1) # (B, ) - - return total_energy - - def get_negative_log_likelihood(self, y_true, X, mask): - """Compute the loss, i.e., negative log likelihood (normalize by number of time steps) - likelihood = 1/Z * exp(-E) -> neg_log_like = - log(1/Z * exp(-E)) = logZ + E - """ - input_energy = self.activation(K.dot(X, self.kernel) + self.bias) - if self.use_boundary: - input_energy = self.add_boundary_energy(input_energy, mask, - self.left_boundary, - self.right_boundary) - energy = self.get_energy(y_true, input_energy, mask) - logZ = self.get_log_normalization_constant(input_energy, mask, - input_length=K.int_shape(X)[1]) - nloglik = logZ + energy - if mask is not None: - nloglik = nloglik / K.sum(K.cast(mask, K.floatx()), 1) - else: - nloglik = nloglik / K.cast(K.shape(X)[1], K.floatx()) - return nloglik - - def step(self, input_energy_t, states, return_logZ=True): - # not in the following `prev_target_val` has shape = (B, F) - # where B = batch_size, F = output feature dim - # Note: `i` is of float32, due to the behavior of `K.rnn` - prev_target_val, i, chain_energy = states[:3] - t = K.cast(i[0, 0], dtype='int32') - if len(states) > 3: - if K.backend() == 'theano': - m = states[3][:, t:(t + 2)] - else: - m = K.tf.slice(states[3], [0, t], [-1, 2]) - input_energy_t = input_energy_t * K.expand_dims(m[:, 0]) - # (1, F, F)*(B, 1, 1) -> (B, F, F) - chain_energy = chain_energy * K.expand_dims( - K.expand_dims(m[:, 0] * m[:, 1])) - if return_logZ: - # shapes: (1, B, F) + (B, F, 1) -> (B, F, F) - energy = chain_energy + \ - K.expand_dims(input_energy_t - prev_target_val, 2) - new_target_val = K.logsumexp(-energy, 1) # shapes: (B, F) - return new_target_val, [new_target_val, i + 1] - else: - energy = chain_energy + \ - K.expand_dims(input_energy_t + prev_target_val, 2) - min_energy = K.min(energy, 1) - # cast for tf-version `K.rnn - argmin_table = K.cast(K.argmin(energy, 1), K.floatx()) - return argmin_table, [min_energy, i + 1] - - def recursion(self, input_energy, mask=None, go_backwards=False, - return_sequences=True, return_logZ=True, input_length=None): - """Forward (alpha) or backward (beta) recursion - - If `return_logZ = True`, compute the logZ, the normalization constant: - - \[ Z = \sum_{y1, y2, y3} exp(-E) # energy - = \sum_{y1, y2, y3} exp(-(u1' y1 + y1' W y2 + u2' y2 + y2' W y3 + u3' y3)) - = sum_{y2, y3} (exp(-(u2' y2 + y2' W y3 + u3' y3)) - sum_{y1} exp(-(u1' y1' + y1' W y2))) \] - - Denote: - \[ S(y2) := sum_{y1} exp(-(u1' y1 + y1' W y2)), \] - \[ Z = sum_{y2, y3} exp(log S(y2) - (u2' y2 + y2' W y3 + u3' y3)) \] - \[ logS(y2) = log S(y2) = log_sum_exp(-(u1' y1' + y1' W y2)) \] - Note that: - yi's are one-hot vectors - u1, u3: boundary energies have been merged - - If `return_logZ = False`, compute the Viterbi's best path lookup table. - """ - chain_energy = self.chain_kernel - # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t - chain_energy = K.expand_dims(chain_energy, 0) - # shape=(B, F), dtype=float32 - prev_target_val = K.zeros_like(input_energy[:, 0, :]) - - if go_backwards: - input_energy = K.reverse(input_energy, 1) - if mask is not None: - mask = K.reverse(mask, 1) - - initial_states = [prev_target_val, - K.zeros_like(prev_target_val[:, :1])] - constants = [chain_energy] - - if mask is not None: - mask2 = K.cast(K.concatenate([mask, K.zeros_like(mask[:, :1])], axis=1), - K.floatx()) - constants.append(mask2) - - def _step(input_energy_i, states): - return self.step(input_energy_i, states, return_logZ) - - target_val_last, target_val_seq, _ = K.rnn(_step, input_energy, - initial_states, - constants=constants, - input_length=input_length, - unroll=self.unroll) - - if return_sequences: - if go_backwards: - target_val_seq = K.reverse(target_val_seq, 1) - return target_val_seq - else: - return target_val_last - - def forward_recursion(self, input_energy, **kwargs): - return self.recursion(input_energy, **kwargs) - - def backward_recursion(self, input_energy, **kwargs): - return self.recursion(input_energy, go_backwards=True, **kwargs) - - def get_marginal_prob(self, X, mask=None): - input_energy = self.activation(K.dot(X, self.kernel) + self.bias) - if self.use_boundary: - input_energy = self.add_boundary_energy(input_energy, mask, - self.left_boundary, - self.right_boundary) - input_length = K.int_shape(X)[1] - alpha = self.forward_recursion(input_energy, mask=mask, - input_length=input_length) - beta = self.backward_recursion(input_energy, mask=mask, - input_length=input_length) - if mask is not None: - input_energy = input_energy * \ - K.expand_dims(K.cast(mask, K.floatx())) - margin = -(self.shift_right(alpha) + - input_energy + self.shift_left(beta)) - return self.softmaxNd(margin) - - def viterbi_decoding(self, X, mask=None): - input_energy = self.activation(K.dot(X, self.kernel) + self.bias) - if self.use_boundary: - input_energy = self.add_boundary_energy( - input_energy, mask, self.left_boundary, self.right_boundary) - - argmin_tables = self.recursion(input_energy, mask, return_logZ=False) - argmin_tables = K.cast(argmin_tables, 'int32') - - # backward to find best path, `initial_best_idx` can be any, - # as all elements in the last argmin_table are the same - argmin_tables = K.reverse(argmin_tables, 1) - # matrix instead of vector is required by tf `K.rnn` - initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] - if K.backend() == 'theano': - initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] - - def gather_each_row(params, indices): - n = K.shape(indices)[0] - if K.backend() == 'theano': - return params[K.T.arange(n), indices] - else: - indices = K.transpose(K.stack([K.tf.range(n), indices])) - return K.tf.gather_nd(params, indices) - - def find_path(argmin_table, best_idx): - next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) - next_best_idx = K.expand_dims(next_best_idx) - if K.backend() == 'theano': - next_best_idx = K.T.unbroadcast(next_best_idx, 1) - return next_best_idx, [next_best_idx] - - _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, - input_length=K.int_shape(X)[1], unroll=self.unroll) - best_paths = K.reverse(best_paths, 1) - best_paths = K.squeeze(best_paths, 2) - - return K.one_hot(best_paths, self.units) - - -if __name__ == "__main__": - print("Hello world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: crf_accuracies.py.py -@time: 2019-01-23 17:10 - -""" -from keras import backend as K - -""" -from https://github.com/keras-team/keras-contrib -""" - - -def _get_accuracy(y_true, y_pred, mask, sparse_target=False): - y_pred = K.argmax(y_pred, -1) - if sparse_target: - y_true = K.cast(y_true[:, :, 0], K.dtype(y_pred)) - else: - y_true = K.argmax(y_true, -1) - judge = K.cast(K.equal(y_pred, y_true), K.floatx()) - if mask is None: - return K.mean(judge) - else: - mask = K.cast(mask, K.floatx()) - return K.sum(judge * mask) / K.sum(mask) - - -def crf_viterbi_accuracy(y_true, y_pred): - '''Use Viterbi algorithm to get best path, and compute its accuracy. - `y_pred` must be an output from CRF.''' - crf, idx = y_pred._keras_history[:2] - X = crf._inbound_nodes[idx].input_tensors[0] - mask = crf._inbound_nodes[idx].input_masks[0] - y_pred = crf.viterbi_decoding(X, mask) - return _get_accuracy(y_true, y_pred, mask, crf.sparse_target) - - -def crf_marginal_accuracy(y_true, y_pred): - '''Use time-wise marginal argmax as prediction. - `y_pred` must be an output from CRF with `learn_mode="marginal"`.''' - crf, idx = y_pred._keras_history[:2] - X = crf._inbound_nodes[idx].input_tensors[0] - mask = crf._inbound_nodes[idx].input_masks[0] - y_pred = crf.get_marginal_prob(X, mask) - return _get_accuracy(y_true, y_pred, mask, crf.sparse_target) - - -def crf_accuracy(y_true, y_pred): - '''Ge default accuracy based on CRF `test_mode`.''' - crf, idx = y_pred._keras_history[:2] - if crf.test_mode == 'viterbi': - return crf_viterbi_accuracy(y_true, y_pred) - else: - return crf_marginal_accuracy(y_true, y_pred) - - -if __name__ == "__main__": - print("Hello world") -# encoding: utf-8 -""" -@author: BrikerMan -@contact: eliyar917@gmail.com -@blog: https://eliyar.biz - -@version: 1.0 -@license: Apache Licence -@file: crf_losss.py -@time: 2019-01-23 17:08 - -""" -from keras import backend as K -from keras.losses import categorical_crossentropy -from keras.losses import sparse_categorical_crossentropy - -""" -from https://github.com/keras-team/keras-contrib -""" - - -def crf_nll(y_true, y_pred): - """The negative log-likelihood for linear chain Conditional Random Field (CRF). - - This loss function is only used when the `layers.CRF` layer - is trained in the "join" mode. - - # Arguments - y_true: tensor with true targets. - y_pred: tensor with predicted targets. - - # Returns - A scalar representing corresponding to the negative log-likelihood. - - # Raises - TypeError: If CRF is not the last layer. - - # About GitHub - If you open an issue or a pull request about CRF, please - add `cc @lzfelix` to notify Luiz Felix. - """ - - crf, idx = y_pred._keras_history[:2] - if crf._outbound_nodes: - raise TypeError('When learn_model="join", CRF must be the last layer.') - if crf.sparse_target: - y_true = K.one_hot(K.cast(y_true[:, :, 0], 'int32'), crf.units) - X = crf._inbound_nodes[idx].input_tensors[0] - mask = crf._inbound_nodes[idx].input_masks[0] - nloglik = crf.get_negative_log_likelihood(y_true, X, mask) - return nloglik - - -def crf_loss(y_true, y_pred): - """General CRF loss function depending on the learning mode. - - # Arguments - y_true: tensor with true targets. - y_pred: tensor with predicted targets. - - # Returns - If the CRF layer is being trained in the join mode, returns the negative - log-likelihood. Otherwise returns the categorical crossentropy implemented - by the underlying Keras backend. - - # About GitHub - If you open an issue or a pull request about CRF, please - add `cc @lzfelix` to notify Luiz Felix. - """ - crf, idx = y_pred._keras_history[:2] - if crf.learn_mode == 'join': - return crf_nll(y_true, y_pred) - else: - if crf.sparse_target: - return sparse_categorical_crossentropy(y_true, y_pred) - else: - return categorical_crossentropy(y_true, y_pred) -#!/usr/bin/env python -# -*- coding: utf-8 -*- - -from keras.models import model_from_json -import numpy as np -import pandas as pd -from PIL import Image -import pickle -import os - - -# parameters -model_path = "model/model.json" # or "model/model_light.json" -weight_path = "model/weight.hdf5" # or "model/weight_light.json" -# put the path of the image that you convert. -image_path = 'sample images/original images/21 original.png' -# adjust the width of the image. the original width is used if new_width = 0. -new_width = 0 -input_shape = [64, 64, 1] - - -def add_mergin(img, mergin): - if mergin != 0: - img_new = np.ones([img.shape[0] + 2 * mergin, - img.shape[1] + 2 * mergin], dtype=np.uint8) * 255 - img_new[mergin:-mergin, mergin:-mergin] = img - else: - img_new = img - return img_new - - -def pickleload(path): - with open(path, mode='rb') as f: - data = pickle.load(f) - return data - - -# load model -json_string = open(model_path).read() -model = model_from_json(json_string) -model.load_weights(weight_path) -print("model load done") - -char_list_path = "data/char_list.csv" -char_list = pd.read_csv(char_list_path, encoding="cp932") -print("len(char_list)", len(char_list)) -# print(char_list.head()) -char_list = char_list[char_list['frequency'] >= 10] -char_list = char_list['char'].as_matrix() - -for k, v in enumerate(char_list): - if v == " ": - space = k - break -print("class index of 1B space:", space) - - -mergin = (input_shape[0] - 18) // 2 -img = Image.open(image_path) -orig_width, orig_height = img.size -if new_width == 0: - new_width = orig_width -new_height = int(img.size[1] * new_width / img.size[0]) -img = img.resize((new_width, new_height), Image.LANCZOS) -img = np.array(img) -if len(img.shape) == 3: - img = img[:, :, 0] - -img_new = np.ones([img.shape[0]+2*mergin+18, img.shape[1]+2*mergin+18], - dtype=np.uint8) * 255 -img_new[mergin:mergin+new_height, mergin:mergin+new_width] = img -img = (img_new.astype(np.float32)) / 255 - -char_dict_path = "data/char_dict.pkl" -char_dict = pickleload(char_dict_path) - -print("len(char_dict)", len(char_dict)) - -output_dir = "output/" -if not os.path.isdir(output_dir): - os.makedirs(output_dir) - -for slide in range(18): - print("converting:", slide) - num_line = (img.shape[0] - input_shape[0]) // 18 - img_width = img.shape[1] - new_line = np.ones([1, img_width]) - img = np.concatenate([new_line, img], axis=0) - predicts = [] - text = [] - for h in range(num_line): - w = 0 - penalty = 1 - predict_line = [] - text_line = "" - while w <= img_width - input_shape[1]: - input_img = img[h*18:h*18 + input_shape[0], w:w+input_shape[1]] - input_img = input_img.reshape( - [1, input_shape[0], input_shape[1], 1]) - predict = model.predict(input_img) - if penalty: - predict[0, space] = 0 - predict = np.argmax(predict[0]) - penalty = (predict == space) - char = char_list[predict] - predict_line.append(char) - char_width = char_dict[char].shape[1] - w += char_width - text_line += char - predicts.append(predict_line) - text.append(text_line+'\r\n') - # print(text) - - img_aa = np.ones_like(img, dtype=np.uint8) * 255 - - for h in range(num_line): - w = 0 - for char in predicts[h]: - # print("w", w) - char_width = char_dict[char].shape[1] - char_img = 255 - char_dict[char].astype(np.uint8) * 255 - img_aa[h*18:h*18+16, w:w+char_width] = char_img - w += char_width - - img_aa = Image.fromarray(img_aa) - img_aa = img_aa.crop([0, slide, new_width, new_height+slide]) - save_path = output_dir + os.path.basename(image_path)[:-4] + '_'\ - + 'w' + str(new_width) \ - + '_slide' + str(slide) + '.png' - img_aa.save(save_path) - - f = open(save_path[:-4] + '.txt', 'w') - f.writelines(text) - f.close() -from multiprocessing import Pool -import math -from sklearn.model_selection import train_test_split -from PIL import Image -from os import path -import pandas as pd -from keras.models import Model -from keras.layers import Dense, Activation, Reshape, Dropout, Embedding, Input, BatchNormalization -from keras.layers import Concatenate, Multiply, Conv2D, MaxPooling2D, Add, Flatten, GaussianNoise -from keras.models import model_from_json -from keras.callbacks import LearningRateScheduler, ModelCheckpoint, \ - EarlyStopping, CSVLogger, ReduceLROnPlateau - -import time -import numpy as np - -np.random.seed(42) - - -def CBRD(inputs, filters=64, kernel_size=(3, 3), droprate=0.5): - x = Conv2D(filters, kernel_size, padding='same', - kernel_initializer='random_normal')(inputs) - x = BatchNormalization()(x) - x = Activation('relu')(x) - # x = Dropout(droprate)(x) - return x - - -def DBRD(inputs, units=4096, droprate=0.5): - x = Dense(units)(inputs) - x = BatchNormalization()(x) - x = Activation('relu')(x) - x = Dropout(droprate)(x) - return x - - -def CNN(input_shape=None, classes=1000): - inputs = Input(shape=input_shape) - - # Block 1 - x = GaussianNoise(0.3)(inputs) - x = CBRD(x, 64) - x = CBRD(x, 64) - x = MaxPooling2D()(x) - - # Block 2 - x = CBRD(x, 128) - x = CBRD(x, 128) - x = MaxPooling2D()(x) - - # Block 3 - x = CBRD(x, 256) - x = CBRD(x, 256) - x = CBRD(x, 256) - x = MaxPooling2D()(x) - - # Classification block - x = Flatten(name='flatten')(x) - x = DBRD(x, 4096) - x = DBRD(x, 4096) - x = Dense(classes, activation='softmax', name='predictions')(x) - - model = Model(inputs=inputs, outputs=x) - - return model - - -def add_mergin(img, mergin): - if mergin != 0: - img_new = np.ones([img.shape[0] + 2 * mergin, - img.shape[1] + 2 * mergin], dtype=np.uint8) * 255 - img_new[mergin:-mergin, mergin:-mergin] = img - else: - img_new = img - return img_new - - -def load_img(args): - img_path, x, y, input_size, mergin, slide = args - img = np.array(Image.open(img_path)) - if len(img.shape) == 3: - img = img[:, :, 0] - img = add_mergin(img, mergin) - x += np.random.randint(-slide, slide+1) - y += np.random.randint(-slide, slide+1) - img = img[y:y + input_size, x:x + input_size] - img = img.reshape([1, input_size, input_size, 1]) - # print(img_path, x, y, input_size, mergin ) - # print(input_size, img.shape) - return img - - -def batch_generator(df, img_dir, input_size, batch_size, num_label, slide, - tail='line', shuffle=True): - df = df.reset_index() - batch_index = 0 - mergin = (input_size - 18) // 2 + 30 - n = df.shape[0] - pool = Pool() - while 1: - if batch_index == 0: - index_array = np.arange(n) - if shuffle: - index_array = np.random.permutation(n) - - current_index = (batch_index * batch_size) % n - if n >= current_index + batch_size: - current_batch_size = batch_size - batch_index += 1 - else: - current_batch_size = n - current_index - batch_index = 0 - - index_array_batch = index_array[current_index: - current_index + current_batch_size] - batch_img_path = df['file_name'][index_array_batch].apply( - lambda x: img_dir + x + tail + '.png').as_matrix() - # print(batch_img_path) - batch_coord_x = (df['x'][index_array_batch] + 30).as_matrix() - batch_coord_y = (df['y'][index_array_batch] + 30).as_matrix() - # print(batch_img_path[0], batch_coord_x[0], batch_coord_y[0], mergin) - batch_x = pool.map(load_img, - [(batch_img_path[i], - batch_coord_x[i], - batch_coord_y[i], - input_size, - mergin, - slide) - for i in range(current_batch_size)]) - # print(batch_x[0].shape) - batch_x = np.concatenate(batch_x, axis=0) - batch_x = batch_x.astype(np.float32) / 255 - # print(batch_x.shape) - - batch_y = df['label'][index_array[current_index: current_index + - current_batch_size]].as_matrix() - batch_y = np.eye(num_label)[batch_y] - - yield batch_x, batch_y - - -def train_generator(df, img_dir, input_size, batch_size, num_label, slide, - tail='line', shuffle=True): - gen_line = batch_generator(df, img_dir, input_size, - batch_size // 2, num_label, slide, tail="line_resize") - gen_orig = batch_generator(df, img_dir, input_size, - batch_size // 2, num_label, slide, tail="orig") - while True: - batch1 = next(gen_line) - batch2 = next(gen_orig) - batch_x = np.concatenate([batch1[0], batch2[0]]) - batch_y = np.concatenate([batch1[1], batch2[1]]) - yield batch_x, batch_y - - -def train(): - # parameter - num_epoch = 256 - batch_size = 64 - input_shape = [64, 64, 1] - learning_rate = 0.001 - df_path = "data/data_500.csv" - char_list_path = "data/char_list_500.csv" - img_dir = "data/image_500/" - - # load text - df = pd.read_csv(df_path, encoding="cp932") - char_list = pd.read_csv(char_list_path, encoding="cp932") - num_label = char_list[char_list['frequency'] >= 10].shape[0] - # print(num_label) - df = df[df['label'] < num_label] - df = df.reset_index() - input_size = input_shape[0] - slide = 1 - df_train, df_val = train_test_split(df, test_size=0.1, random_state=42) - gen = train_generator(df_train, img_dir, - input_size, batch_size, num_label, slide) - gen_val = batch_generator(df_val, img_dir, input_size, - batch_size, num_label, 0, - tail="line_resize", shuffle=False) - - # build model - model = CNN(input_shape=input_shape, classes=num_label) - model.compile(loss='categorical_crossentropy', - optimizer='sgd', metrics=['accuracy']) - - # train - nb_train = df_train.shape[0] - nb_val = df_val.shape[0] - nb_step = math.ceil(nb_train / batch_size) - nb_val_step = math.ceil(nb_val / batch_size) - - format = "%H%M" - ts = time.strftime(format) - save_path = "model/" + path.splitext(__file__)[0] + "_" + ts - - json_string = model.to_json() - with open(save_path + '_model.json', "w") as f: - f.write(json_string) - - csv_logger = CSVLogger(save_path + '_log.csv', append=True) - check_path = save_path + '_e{epoch:02d}_vl{val_loss:.5f}.hdf5' - save_checkpoint = ModelCheckpoint( - filepath=check_path, monitor='val_loss', save_best_only=True) - lerning_rate_schedular = ReduceLROnPlateau( - patience=8, min_lr=learning_rate * 0.00001) - early_stopping = EarlyStopping(monitor='val_loss', - patience=16, - verbose=1, - min_delta=1e-4, - mode='min') - Callbacks = [csv_logger, - save_checkpoint, - lerning_rate_schedular, early_stopping] - model.fit_generator(gen, - steps_per_epoch=nb_step, - epochs=num_epoch, - validation_data=gen_val, - validation_steps=nb_val_step, - callbacks=Callbacks - ) - - -if __name__ == "__main__": - train() -#!/usr/bin/env python -# -*- coding: utf-8 -*- -import os -import sys - -from setuptools import find_packages, setup -# from m2r import parse_from_file - -# Package meta-data. -NAME = 'anago' -DESCRIPTION = 'Sequence labeling library using Keras.' -URL = 'https://github.com/Hironsan/anago' -EMAIL = 'hiroki.nakayama.py@gmail.com' -AUTHOR = 'Hironsan' -LICENSE = 'MIT' - -here = os.path.abspath(os.path.dirname(__file__)) - -# long_description = parse_from_file(os.path.join(here, 'README.md')) -long_description = open(os.path.join( - here, 'README.md'), encoding='utf-8').read() - -if sys.argv[-1] == 'publish': - os.system('python setup.py sdist bdist_wheel upload') - sys.exit() - -required = [ - 'Keras>=2.2.0', 'h5py>=2.7.1', 'scikit-learn>=0.19.1', - 'numpy>=1.14.3', 'tensorflow>=1.8.0', 'requests>=2.18.4', - 'seqeval>=0.0.3' -] - -setup( - name=NAME, - version='1.0.8', - description=DESCRIPTION, - long_description=long_description, - author=AUTHOR, - author_email=EMAIL, - url=URL, - packages=find_packages(exclude=('tests',)), - install_requires=required, - include_package_data=True, - license=LICENSE, - classifiers=[ - 'License :: OSI Approved :: MIT License', - 'Programming Language :: Python', - 'Programming Language :: Python :: 2.6', - 'Programming Language :: Python :: 2.7', - 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', - 'Programming Language :: Python :: Implementation :: CPython', - 'Programming Language :: Python :: Implementation :: PyPy' - ], -) -from anago.tagger import Tagger -from anago.trainer import Trainer -from anago.wrapper import Sequence -""" -Custom callbacks. -""" -import numpy as np -from keras.callbacks import Callback -from seqeval.metrics import f1_score, classification_report - - -class F1score(Callback): - - def __init__(self, seq, preprocessor=None): - super(F1score, self).__init__() - self.seq = seq - self.p = preprocessor - - def get_lengths(self, y_true): - lengths = [] - for y in np.argmax(y_true, -1): - try: - i = list(y).index(0) - except ValueError: - i = len(y) - lengths.append(i) - - return lengths - - def on_epoch_end(self, epoch, logs={}): - label_true = [] - label_pred = [] - for i in range(len(self.seq)): - x_true, y_true = self.seq[i] - lengths = self.get_lengths(y_true) - y_pred = self.model.predict_on_batch(x_true) - - y_true = self.p.inverse_transform(y_true, lengths) - y_pred = self.p.inverse_transform(y_pred, lengths) - - label_true.extend(y_true) - label_pred.extend(y_pred) - - score = f1_score(label_true, label_pred) - print(' - f1: {:04.2f}'.format(score * 100)) - print(classification_report(label_true, label_pred)) - logs['f1'] = score -from __future__ import absolute_import -from __future__ import division - -import keras.backend as K -from keras import activations -from keras import initializers -from keras import regularizers -from keras import constraints -from keras.engine import Layer -from keras.engine import InputSpec -from keras.objectives import categorical_crossentropy -from keras.objectives import sparse_categorical_crossentropy - - -class CRF(Layer): - """An implementation of linear chain conditional random field (CRF). - An linear chain CRF is defined to maximize the following likelihood function: - $$ L(W, U, b; y_1, ..., y_n) := \frac{1}{Z} \sum_{y_1, ..., y_n} \exp(-a_1' y_1 - a_n' y_n - - \sum_{k=1^n}((f(x_k' W + b) y_k) + y_1' U y_2)), $$ - where: - $Z$: normalization constant - $x_k, y_k$: inputs and outputs - This implementation has two modes for optimization: - 1. (`join mode`) optimized by maximizing join likelihood, which is optimal in theory of statistics. - Note that in this case, CRF must be the output/last layer. - 2. (`marginal mode`) return marginal probabilities on each time step and optimized via composition - likelihood (product of marginal likelihood), i.e., using `categorical_crossentropy` loss. - Note that in this case, CRF can be either the last layer or an intermediate layer (though not explored). - For prediction (test phrase), one can choose either Viterbi best path (class indices) or marginal - probabilities if probabilities are needed. However, if one chooses *join mode* for training, - Viterbi output is typically better than marginal output, but the marginal output will still perform - reasonably close, while if *marginal mode* is used for training, marginal output usually performs - much better. The default behavior is set according to this observation. - In addition, this implementation supports masking and accepts either onehot or sparse target. - # Examples - ```python - model = Sequential() - model.add(Embedding(3001, 300, mask_zero=True)(X) - # use learn_mode = 'join', test_mode = 'viterbi', sparse_target = True (label indice output) - crf = CRF(10, sparse_target=True) - model.add(crf) - # crf.accuracy is default to Viterbi acc if using join-mode (default). - # One can add crf.marginal_acc if interested, but may slow down learning - model.compile('adam', loss=crf.loss_function, metrics=[crf.accuracy]) - # y must be label indices (with shape 1 at dim 3) here, since `sparse_target=True` - model.fit(x, y) - # prediction give onehot representation of Viterbi best path - y_hat = model.predict(x_test) - ``` - # Arguments - units: Positive integer, dimensionality of the output space. - learn_mode: Either 'join' or 'marginal'. - The former train the model by maximizing join likelihood while the latter - maximize the product of marginal likelihood over all time steps. - test_mode: Either 'viterbi' or 'marginal'. - The former is recommended and as default when `learn_mode = 'join'` and - gives one-hot representation of the best path at test (prediction) time, - while the latter is recommended and chosen as default when `learn_mode = 'marginal'`, - which produces marginal probabilities for each time step. - sparse_target: Boolean (default False) indicating if provided labels are one-hot or - indices (with shape 1 at dim 3). - use_boundary: Boolean (default True) indicating if trainable start-end chain energies - should be added to model. - use_bias: Boolean, whether the layer uses a bias vector. - kernel_initializer: Initializer for the `kernel` weights matrix, - used for the linear transformation of the inputs. - (see [initializers](../initializers.md)). - chain_initializer: Initializer for the `chain_kernel` weights matrix, - used for the CRF chain energy. - (see [initializers](../initializers.md)). - boundary_initializer: Initializer for the `left_boundary`, 'right_boundary' weights vectors, - used for the start/left and end/right boundary energy. - (see [initializers](../initializers.md)). - bias_initializer: Initializer for the bias vector - (see [initializers](../initializers.md)). - activation: Activation function to use - (see [activations](../activations.md)). - If you pass None, no activation is applied - (ie. "linear" activation: `a(x) = x`). - kernel_regularizer: Regularizer function applied to - the `kernel` weights matrix - (see [regularizer](../regularizers.md)). - chain_regularizer: Regularizer function applied to - the `chain_kernel` weights matrix - (see [regularizer](../regularizers.md)). - boundary_regularizer: Regularizer function applied to - the 'left_boundary', 'right_boundary' weight vectors - (see [regularizer](../regularizers.md)). - bias_regularizer: Regularizer function applied to the bias vector - (see [regularizer](../regularizers.md)). - kernel_constraint: Constraint function applied to - the `kernel` weights matrix - (see [constraints](../constraints.md)). - chain_constraint: Constraint function applied to - the `chain_kernel` weights matrix - (see [constraints](../constraints.md)). - boundary_constraint: Constraint function applied to - the `left_boundary`, `right_boundary` weights vectors - (see [constraints](../constraints.md)). - bias_constraint: Constraint function applied to the bias vector - (see [constraints](../constraints.md)). - input_dim: dimensionality of the input (integer). - This argument (or alternatively, the keyword argument `input_shape`) - is required when using this layer as the first layer in a model. - unroll: Boolean (default False). If True, the network will be unrolled, else a symbolic loop will be used. - Unrolling can speed-up a RNN, although it tends to be more memory-intensive. - Unrolling is only suitable for short sequences. - # Input shape - 3D tensor with shape `(nb_samples, timesteps, input_dim)`. - # Output shape - 3D tensor with shape `(nb_samples, timesteps, units)`. - # Masking - This layer supports masking for input data with a variable number - of timesteps. To introduce masks to your data, - use an [Embedding](embeddings.md) layer with the `mask_zero` parameter - set to `True`. - """ - - def __init__(self, units, - learn_mode='join', - test_mode=None, - sparse_target=False, - use_boundary=True, - use_bias=True, - activation='linear', - kernel_initializer='glorot_uniform', - chain_initializer='orthogonal', - bias_initializer='zeros', - boundary_initializer='zeros', - kernel_regularizer=None, - chain_regularizer=None, - boundary_regularizer=None, - bias_regularizer=None, - kernel_constraint=None, - chain_constraint=None, - boundary_constraint=None, - bias_constraint=None, - input_dim=None, - unroll=False, - **kwargs): - super(CRF, self).__init__(**kwargs) - self.supports_masking = True - self.units = units - self.learn_mode = learn_mode - assert self.learn_mode in ['join', 'marginal'] - self.test_mode = test_mode - if self.test_mode is None: - self.test_mode = 'viterbi' if self.learn_mode == 'join' else 'marginal' - else: - assert self.test_mode in ['viterbi', 'marginal'] - self.sparse_target = sparse_target - self.use_boundary = use_boundary - self.use_bias = use_bias - - self.activation = activations.get(activation) - - self.kernel_initializer = initializers.get(kernel_initializer) - self.chain_initializer = initializers.get(chain_initializer) - self.boundary_initializer = initializers.get(boundary_initializer) - self.bias_initializer = initializers.get(bias_initializer) - - self.kernel_regularizer = regularizers.get(kernel_regularizer) - self.chain_regularizer = regularizers.get(chain_regularizer) - self.boundary_regularizer = regularizers.get(boundary_regularizer) - self.bias_regularizer = regularizers.get(bias_regularizer) - - self.kernel_constraint = constraints.get(kernel_constraint) - self.chain_constraint = constraints.get(chain_constraint) - self.boundary_constraint = constraints.get(boundary_constraint) - self.bias_constraint = constraints.get(bias_constraint) - - self.unroll = unroll - - def build(self, input_shape): - self.input_spec = [InputSpec(shape=input_shape)] - self.input_dim = input_shape[-1] - - self.kernel = self.add_weight((self.input_dim, self.units), - name='kernel', - initializer=self.kernel_initializer, - regularizer=self.kernel_regularizer, - constraint=self.kernel_constraint) - self.chain_kernel = self.add_weight((self.units, self.units), - name='chain_kernel', - initializer=self.chain_initializer, - regularizer=self.chain_regularizer, - constraint=self.chain_constraint) - if self.use_bias: - self.bias = self.add_weight((self.units,), - name='bias', - initializer=self.bias_initializer, - regularizer=self.bias_regularizer, - constraint=self.bias_constraint) - else: - self.bias = None - - if self.use_boundary: - self.left_boundary = self.add_weight((self.units,), - name='left_boundary', - initializer=self.boundary_initializer, - regularizer=self.boundary_regularizer, - constraint=self.boundary_constraint) - self.right_boundary = self.add_weight((self.units,), - name='right_boundary', - initializer=self.boundary_initializer, - regularizer=self.boundary_regularizer, - constraint=self.boundary_constraint) - self.built = True - - def call(self, X, mask=None): - if mask is not None: - assert K.ndim( - mask) == 2, 'Input mask to CRF must have dim 2 if not None' - - if self.test_mode == 'viterbi': - test_output = self.viterbi_decoding(X, mask) - else: - test_output = self.get_marginal_prob(X, mask) - - self.uses_learning_phase = True - if self.learn_mode == 'join': - train_output = K.zeros_like(K.dot(X, self.kernel)) - out = K.in_train_phase(train_output, test_output) - else: - if self.test_mode == 'viterbi': - train_output = self.get_marginal_prob(X, mask) - out = K.in_train_phase(train_output, test_output) - else: - out = test_output - return out - - def compute_output_shape(self, input_shape): - return input_shape[:2] + (self.units,) - - def compute_mask(self, input, mask=None): - if mask is not None and self.learn_mode == 'join': - return K.any(mask, axis=1) - return mask - - def get_config(self): - config = {'units': self.units, - 'learn_mode': self.learn_mode, - 'test_mode': self.test_mode, - 'use_boundary': self.use_boundary, - 'use_bias': self.use_bias, - 'sparse_target': self.sparse_target, - 'kernel_initializer': initializers.serialize(self.kernel_initializer), - 'chain_initializer': initializers.serialize(self.chain_initializer), - 'boundary_initializer': initializers.serialize(self.boundary_initializer), - 'bias_initializer': initializers.serialize(self.bias_initializer), - 'activation': activations.serialize(self.activation), - 'kernel_regularizer': regularizers.serialize(self.kernel_regularizer), - 'chain_regularizer': regularizers.serialize(self.chain_regularizer), - 'boundary_regularizer': regularizers.serialize(self.boundary_regularizer), - 'bias_regularizer': regularizers.serialize(self.bias_regularizer), - 'kernel_constraint': constraints.serialize(self.kernel_constraint), - 'chain_constraint': constraints.serialize(self.chain_constraint), - 'boundary_constraint': constraints.serialize(self.boundary_constraint), - 'bias_constraint': constraints.serialize(self.bias_constraint), - 'input_dim': self.input_dim, - 'unroll': self.unroll} - base_config = super(CRF, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - @property - def loss_function(self): - if self.learn_mode == 'join': - def loss(y_true, y_pred): - assert self._inbound_nodes, 'CRF has not connected to any layer.' - assert not self._outbound_nodes, 'When learn_model="join", CRF must be the last layer.' - if self.sparse_target: - y_true = K.one_hot( - K.cast(y_true[:, :, 0], 'int32'), self.units) - X = self._inbound_nodes[0].input_tensors[0] - mask = self._inbound_nodes[0].input_masks[0] - nloglik = self.get_negative_log_likelihood(y_true, X, mask) - return nloglik - return loss - else: - if self.sparse_target: - return sparse_categorical_crossentropy - else: - return categorical_crossentropy - - @property - def accuracy(self): - if self.test_mode == 'viterbi': - return self.viterbi_acc - else: - return self.marginal_acc - - @staticmethod - def _get_accuracy(y_true, y_pred, mask, sparse_target=False): - y_pred = K.argmax(y_pred, -1) - if sparse_target: - y_true = K.cast(y_true[:, :, 0], K.dtype(y_pred)) - else: - y_true = K.argmax(y_true, -1) - judge = K.cast(K.equal(y_pred, y_true), K.floatx()) - if mask is None: - return K.mean(judge) - else: - mask = K.cast(mask, K.floatx()) - return K.sum(judge * mask) / K.sum(mask) - - @property - def viterbi_acc(self): - def acc(y_true, y_pred): - X = self._inbound_nodes[0].input_tensors[0] - mask = self._inbound_nodes[0].input_masks[0] - y_pred = self.viterbi_decoding(X, mask) - return self._get_accuracy(y_true, y_pred, mask, self.sparse_target) - acc.func_name = 'viterbi_acc' - return acc - - @property - def marginal_acc(self): - def acc(y_true, y_pred): - X = self._inbound_nodes[0].input_tensors[0] - mask = self._inbound_nodes[0].input_masks[0] - y_pred = self.get_marginal_prob(X, mask) - return self._get_accuracy(y_true, y_pred, mask, self.sparse_target) - acc.func_name = 'marginal_acc' - return acc - - @staticmethod - def softmaxNd(x, axis=-1): - m = K.max(x, axis=axis, keepdims=True) - exp_x = K.exp(x - m) - prob_x = exp_x / K.sum(exp_x, axis=axis, keepdims=True) - return prob_x - - @staticmethod - def shift_left(x, offset=1): - assert offset > 0 - return K.concatenate([x[:, offset:], K.zeros_like(x[:, :offset])], axis=1) - - @staticmethod - def shift_right(x, offset=1): - assert offset > 0 - return K.concatenate([K.zeros_like(x[:, :offset]), x[:, :-offset]], axis=1) - - def add_boundary_energy(self, energy, mask, start, end): - start = K.expand_dims(K.expand_dims(start, 0), 0) - end = K.expand_dims(K.expand_dims(end, 0), 0) - if mask is None: - energy = K.concatenate( - [energy[:, :1, :] + start, energy[:, 1:, :]], axis=1) - energy = K.concatenate( - [energy[:, :-1, :], energy[:, -1:, :] + end], axis=1) - else: - mask = K.expand_dims(K.cast(mask, K.floatx())) - start_mask = K.cast( - K.greater(mask, self.shift_right(mask)), K.floatx()) - end_mask = K.cast( - K.greater(self.shift_left(mask), mask), K.floatx()) - energy = energy + start_mask * start - energy = energy + end_mask * end - return energy - - def get_log_normalization_constant(self, input_energy, mask, **kwargs): - """Compute logarithm of the normalization constant Z, where - Z = sum exp(-E) -> logZ = log sum exp(-E) =: -nlogZ - """ - # should have logZ[:, i] == logZ[:, j] for any i, j - logZ = self.recursion(input_energy, mask, - return_sequences=False, **kwargs) - return logZ[:, 0] - - def get_energy(self, y_true, input_energy, mask): - """Energy = a1' y1 + u1' y1 + y1' U y2 + u2' y2 + y2' U y3 + u3' y3 + an' y3 - """ - input_energy = K.sum(input_energy * y_true, 2) # (B, T) - chain_energy = K.sum( - K.dot(y_true[:, :-1, :], self.chain_kernel) * y_true[:, 1:, :], 2) # (B, T-1) - - if mask is not None: - mask = K.cast(mask, K.floatx()) - # (B, T-1), mask[:,:-1]*mask[:,1:] makes it work with any padding - chain_mask = mask[:, :-1] * mask[:, 1:] - input_energy = input_energy * mask - chain_energy = chain_energy * chain_mask - total_energy = K.sum(input_energy, -1) + \ - K.sum(chain_energy, -1) # (B, ) - - return total_energy - - def get_negative_log_likelihood(self, y_true, X, mask): - """Compute the loss, i.e., negative log likelihood (normalize by number of time steps) - likelihood = 1/Z * exp(-E) -> neg_log_like = - log(1/Z * exp(-E)) = logZ + E - """ - input_energy = self.activation(K.dot(X, self.kernel) + self.bias) - if self.use_boundary: - input_energy = self.add_boundary_energy( - input_energy, mask, self.left_boundary, self.right_boundary) - energy = self.get_energy(y_true, input_energy, mask) - logZ = self.get_log_normalization_constant( - input_energy, mask, input_length=K.int_shape(X)[1]) - nloglik = logZ + energy - if mask is not None: - nloglik = nloglik / K.sum(K.cast(mask, K.floatx()), 1) - else: - nloglik = nloglik / K.cast(K.shape(X)[1], K.floatx()) - return nloglik - - def step(self, input_energy_t, states, return_logZ=True): - # not in the following `prev_target_val` has shape = (B, F) - # where B = batch_size, F = output feature dim - # Note: `i` is of float32, due to the behavior of `K.rnn` - prev_target_val, i, chain_energy = states[:3] - t = K.cast(i[0, 0], dtype='int32') - if len(states) > 3: - if K.backend() == 'theano': - m = states[3][:, t:(t + 2)] - else: - m = K.tf.slice(states[3], [0, t], [-1, 2]) - input_energy_t = input_energy_t * K.expand_dims(m[:, 0]) - # (1, F, F)*(B, 1, 1) -> (B, F, F) - chain_energy = chain_energy * \ - K.expand_dims(K.expand_dims(m[:, 0] * m[:, 1])) - if return_logZ: - # shapes: (1, B, F) + (B, F, 1) -> (B, F, F) - energy = chain_energy + \ - K.expand_dims(input_energy_t - prev_target_val, 2) - new_target_val = K.logsumexp(-energy, 1) # shapes: (B, F) - return new_target_val, [new_target_val, i + 1] - else: - energy = chain_energy + \ - K.expand_dims(input_energy_t + prev_target_val, 2) - min_energy = K.min(energy, 1) - # cast for tf-version `K.rnn` - argmin_table = K.cast(K.argmin(energy, 1), K.floatx()) - return argmin_table, [min_energy, i + 1] - - def recursion(self, input_energy, mask=None, go_backwards=False, return_sequences=True, return_logZ=True, input_length=None): - """Forward (alpha) or backward (beta) recursion - If `return_logZ = True`, compute the logZ, the normalization constant: - \[ Z = \sum_{y1, y2, y3} exp(-E) # energy - = \sum_{y1, y2, y3} exp(-(u1' y1 + y1' W y2 + u2' y2 + y2' W y3 + u3' y3)) - = sum_{y2, y3} (exp(-(u2' y2 + y2' W y3 + u3' y3)) sum_{y1} exp(-(u1' y1' + y1' W y2))) \] - Denote: - \[ S(y2) := sum_{y1} exp(-(u1' y1 + y1' W y2)), \] - \[ Z = sum_{y2, y3} exp(log S(y2) - (u2' y2 + y2' W y3 + u3' y3)) \] - \[ logS(y2) = log S(y2) = log_sum_exp(-(u1' y1' + y1' W y2)) \] - Note that: - yi's are one-hot vectors - u1, u3: boundary energies have been merged - If `return_logZ = False`, compute the Viterbi's best path lookup table. - """ - chain_energy = self.chain_kernel - # shape=(1, F, F): F=num of output features. 1st F is for t-1, 2nd F for t - chain_energy = K.expand_dims(chain_energy, 0) - # shape=(B, F), dtype=float32 - prev_target_val = K.zeros_like(input_energy[:, 0, :]) - - if go_backwards: - input_energy = K.reverse(input_energy, 1) - if mask is not None: - mask = K.reverse(mask, 1) - - initial_states = [prev_target_val, - K.zeros_like(prev_target_val[:, :1])] - constants = [chain_energy] - - if mask is not None: - mask2 = K.cast(K.concatenate( - [mask, K.zeros_like(mask[:, :1])], axis=1), K.floatx()) - constants.append(mask2) - - def _step(input_energy_i, states): - return self.step(input_energy_i, states, return_logZ) - - target_val_last, target_val_seq, _ = K.rnn(_step, input_energy, initial_states, constants=constants, - input_length=input_length, unroll=self.unroll) - - if return_sequences: - if go_backwards: - target_val_seq = K.reverse(target_val_seq, 1) - return target_val_seq - else: - return target_val_last - - def forward_recursion(self, input_energy, **kwargs): - return self.recursion(input_energy, **kwargs) - - def backward_recursion(self, input_energy, **kwargs): - return self.recursion(input_energy, go_backwards=True, **kwargs) - - def get_marginal_prob(self, X, mask=None): - input_energy = self.activation(K.dot(X, self.kernel) + self.bias) - if self.use_boundary: - input_energy = self.add_boundary_energy( - input_energy, mask, self.left_boundary, self.right_boundary) - input_length = K.int_shape(X)[1] - alpha = self.forward_recursion( - input_energy, mask=mask, input_length=input_length) - beta = self.backward_recursion( - input_energy, mask=mask, input_length=input_length) - if mask is not None: - input_energy = input_energy * \ - K.expand_dims(K.cast(mask, K.floatx())) - margin = -(self.shift_right(alpha) + - input_energy + self.shift_left(beta)) - return self.softmaxNd(margin) - - def viterbi_decoding(self, X, mask=None): - input_energy = self.activation(K.dot(X, self.kernel) + self.bias) - if self.use_boundary: - input_energy = self.add_boundary_energy( - input_energy, mask, self.left_boundary, self.right_boundary) - - argmin_tables = self.recursion(input_energy, mask, return_logZ=False) - argmin_tables = K.cast(argmin_tables, 'int32') - - # backward to find best path, `initial_best_idx` can be any, as all elements in the last argmin_table are the same - argmin_tables = K.reverse(argmin_tables, 1) - # matrix instead of vector is required by tf `K.rnn` - initial_best_idx = [K.expand_dims(argmin_tables[:, 0, 0])] - if K.backend() == 'theano': - initial_best_idx = [K.T.unbroadcast(initial_best_idx[0], 1)] - - def gather_each_row(params, indices): - n = K.shape(indices)[0] - if K.backend() == 'theano': - return params[K.T.arange(n), indices] - else: - indices = K.transpose(K.stack([K.tf.range(n), indices])) - return K.tf.gather_nd(params, indices) - - def find_path(argmin_table, best_idx): - next_best_idx = gather_each_row(argmin_table, best_idx[0][:, 0]) - next_best_idx = K.expand_dims(next_best_idx) - if K.backend() == 'theano': - next_best_idx = K.T.unbroadcast(next_best_idx, 1) - return next_best_idx, [next_best_idx] - - _, best_paths, _ = K.rnn(find_path, argmin_tables, initial_best_idx, - input_length=K.int_shape(X)[1], unroll=self.unroll) - best_paths = K.reverse(best_paths, 1) - best_paths = K.squeeze(best_paths, 2) - - return K.one_hot(best_paths, self.units) -""" -Model definition. -""" -import json - -from keras.layers import Dense, LSTM, Bidirectional, Embedding, Input, Dropout, TimeDistributed -from keras.layers.merge import Concatenate -from keras.models import Model, model_from_json - -from anago.layers import CRF - - -def save_model(model, weights_file, params_file): - with open(params_file, 'w') as f: - params = model.to_json() - json.dump(json.loads(params), f, sort_keys=True, indent=4) - model.save_weights(weights_file) - - -def load_model(weights_file, params_file): - with open(params_file) as f: - model = model_from_json(f.read(), custom_objects={'CRF': CRF}) - model.load_weights(weights_file) - - return model - - -class BiLSTMCRF(object): - """A Keras implementation of BiLSTM-CRF for sequence labeling. - - References - -- - Guillaume Lample, Miguel Ballesteros, Sandeep Subramanian, Kazuya Kawakami, Chris Dyer. - "Neural Architectures for Named Entity Recognition". Proceedings of NAACL 2016. - https://arxiv.org/abs/1603.01360 - """ - - def __init__(self, - num_labels, - word_vocab_size, - char_vocab_size=None, - word_embedding_dim=100, - char_embedding_dim=25, - word_lstm_size=100, - char_lstm_size=25, - fc_dim=100, - dropout=0.5, - embeddings=None, - use_char=True, - use_crf=True): - """Build a Bi-LSTM CRF model. - - Args: - word_vocab_size (int): word vocabulary size. - char_vocab_size (int): character vocabulary size. - num_labels (int): number of entity labels. - word_embedding_dim (int): word embedding dimensions. - char_embedding_dim (int): character embedding dimensions. - word_lstm_size (int): character LSTM feature extractor output dimensions. - char_lstm_size (int): word tagger LSTM output dimensions. - fc_dim (int): output fully-connected layer size. - dropout (float): dropout rate. - embeddings (numpy array): word embedding matrix. - use_char (boolean): add char feature. - use_crf (boolean): use crf as last layer. - """ - super(BiLSTMCRF).__init__() - self._char_embedding_dim = char_embedding_dim - self._word_embedding_dim = word_embedding_dim - self._char_lstm_size = char_lstm_size - self._word_lstm_size = word_lstm_size - self._char_vocab_size = char_vocab_size - self._word_vocab_size = word_vocab_size - self._fc_dim = fc_dim - self._dropout = dropout - self._use_char = use_char - self._use_crf = use_crf - self._embeddings = embeddings - self._num_labels = num_labels - - def build(self): - # build word embedding - word_ids = Input(batch_shape=(None, None), - dtype='int32', name='word_input') - inputs = [word_ids] - if self._embeddings is None: - word_embeddings = Embedding(input_dim=self._word_vocab_size, - output_dim=self._word_embedding_dim, - mask_zero=True, - name='word_embedding')(word_ids) - else: - word_embeddings = Embedding(input_dim=self._embeddings.shape[0], - output_dim=self._embeddings.shape[1], - mask_zero=True, - weights=[self._embeddings], - name='word_embedding')(word_ids) - - # build character based word embedding - if self._use_char: - char_ids = Input(batch_shape=(None, None, None), - dtype='int32', name='char_input') - inputs.append(char_ids) - char_embeddings = Embedding(input_dim=self._char_vocab_size, - output_dim=self._char_embedding_dim, - mask_zero=True, - name='char_embedding')(char_ids) - char_embeddings = TimeDistributed(Bidirectional( - LSTM(self._char_lstm_size)))(char_embeddings) - word_embeddings = Concatenate()([word_embeddings, char_embeddings]) - - word_embeddings = Dropout(self._dropout)(word_embeddings) - z = Bidirectional(LSTM(units=self._word_lstm_size, - return_sequences=True))(word_embeddings) - z = Dense(self._fc_dim, activation='tanh')(z) - - if self._use_crf: - crf = CRF(self._num_labels, sparse_target=False) - loss = crf.loss_function - pred = crf(z) - else: - loss = 'categorical_crossentropy' - pred = Dense(self._num_labels, activation='softmax')(z) - - model = Model(inputs=inputs, outputs=pred) - - return model, loss - - -class ELModel(object): - """ - A Keras implementation of ELMo BiLSTM-CRF for sequence labeling. - """ - - def __init__(self, - num_labels, - word_vocab_size, - char_vocab_size=None, - word_embedding_dim=100, - char_embedding_dim=25, - word_lstm_size=100, - char_lstm_size=25, - fc_dim=100, - dropout=0.5, - embeddings=None): - """Build a Bi-LSTM CRF model. - - Args: - word_vocab_size (int): word vocabulary size. - char_vocab_size (int): character vocabulary size. - num_labels (int): number of entity labels. - word_embedding_dim (int): word embedding dimensions. - char_embedding_dim (int): character embedding dimensions. - word_lstm_size (int): character LSTM feature extractor output dimensions. - char_lstm_size (int): word tagger LSTM output dimensions. - fc_dim (int): output fully-connected layer size. - dropout (float): dropout rate. - embeddings (numpy array): word embedding matrix. - """ - self._char_embedding_dim = char_embedding_dim - self._word_embedding_dim = word_embedding_dim - self._char_lstm_size = char_lstm_size - self._word_lstm_size = word_lstm_size - self._char_vocab_size = char_vocab_size - self._word_vocab_size = word_vocab_size - self._fc_dim = fc_dim - self._dropout = dropout - self._embeddings = embeddings - self._num_labels = num_labels - - def build(self): - # build word embedding - word_ids = Input(batch_shape=(None, None), - dtype='int32', name='word_input') - if self._embeddings is None: - word_embeddings = Embedding(input_dim=self._word_vocab_size, - output_dim=self._word_embedding_dim, - mask_zero=True, - name='word_embedding')(word_ids) - else: - word_embeddings = Embedding(input_dim=self._embeddings.shape[0], - output_dim=self._embeddings.shape[1], - mask_zero=True, - weights=[self._embeddings], - name='word_embedding')(word_ids) - - # build character based word embedding - char_ids = Input(batch_shape=(None, None, None), - dtype='int32', name='char_input') - char_embeddings = Embedding(input_dim=self._char_vocab_size, - output_dim=self._char_embedding_dim, - mask_zero=True, - name='char_embedding')(char_ids) - char_embeddings = TimeDistributed(Bidirectional( - LSTM(self._char_lstm_size)))(char_embeddings) - - elmo_embeddings = Input(shape=(None, 1024), dtype='float32') - - word_embeddings = Concatenate()( - [word_embeddings, char_embeddings, elmo_embeddings]) - - word_embeddings = Dropout(self._dropout)(word_embeddings) - z = Bidirectional(LSTM(units=self._word_lstm_size, - return_sequences=True))(word_embeddings) - z = Dense(self._fc_dim, activation='tanh')(z) - - crf = CRF(self._num_labels, sparse_target=False) - loss = crf.loss_function - pred = crf(z) - - model = Model(inputs=[word_ids, char_ids, - elmo_embeddings], outputs=pred) - - return model, loss -# -*- coding: utf-8 -*- -""" -Preprocessors. -""" -import re - -import numpy as np -from allennlp.modules.elmo import Elmo, batch_to_ids -from sklearn.base import BaseEstimator, TransformerMixin -from sklearn.externals import joblib -from keras.utils.np_utils import to_categorical -from keras.preprocessing.sequence import pad_sequences - -from anago.utils import Vocabulary - -options_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_options.json' -weight_file = 'https://s3-us-west-2.amazonaws.com/allennlp/models/elmo/2x4096_512_2048cnn_2xhighway/elmo_2x4096_512_2048cnn_2xhighway_weights.hdf5' - - -def normalize_number(text): - return re.sub(r'[0-90123456789]', r'0', text) - - -class IndexTransformer(BaseEstimator, TransformerMixin): - """Convert a collection of raw documents to a document id matrix. - - Attributes: - _use_char: boolean. Whether to use char feature. - _num_norm: boolean. Whether to normalize text. - _word_vocab: dict. A mapping of words to feature indices. - _char_vocab: dict. A mapping of chars to feature indices. - _label_vocab: dict. A mapping of labels to feature indices. - """ - - def __init__(self, lower=True, num_norm=True, - use_char=True, initial_vocab=None): - """Create a preprocessor object. - - Args: - lower: boolean. Whether to convert the texts to lowercase. - use_char: boolean. Whether to use char feature. - num_norm: boolean. Whether to normalize text. - initial_vocab: Iterable. Initial vocabulary for expanding word_vocab. - """ - self._num_norm = num_norm - self._use_char = use_char - self._word_vocab = Vocabulary(lower=lower) - self._char_vocab = Vocabulary(lower=False) - self._label_vocab = Vocabulary(lower=False, unk_token=False) - - if initial_vocab: - self._word_vocab.add_documents([initial_vocab]) - self._char_vocab.add_documents(initial_vocab) - - def fit(self, X, y): - """Learn vocabulary from training set. - - Args: - X : iterable. An iterable which yields either str, unicode or file objects. - - Returns: - self : IndexTransformer. - """ - self._word_vocab.add_documents(X) - self._label_vocab.add_documents(y) - if self._use_char: - for doc in X: - self._char_vocab.add_documents(doc) - - self._word_vocab.build() - self._char_vocab.build() - self._label_vocab.build() - - return self - - def transform(self, X, y=None): - """Transform documents to document ids. - - Uses the vocabulary learned by fit. - - Args: - X : iterable - an iterable which yields either str, unicode or file objects. - y : iterabl, label strings. - - Returns: - features: document id matrix. - y: label id matrix. - """ - word_ids = [self._word_vocab.doc2id(doc) for doc in X] - word_ids = pad_sequences(word_ids, padding='post') - - if self._use_char: - char_ids = [[self._char_vocab.doc2id(w) for w in doc] for doc in X] - char_ids = pad_nested_sequences(char_ids) - features = [word_ids, char_ids] - else: - features = word_ids - - if y is not None: - y = [self._label_vocab.doc2id(doc) for doc in y] - y = pad_sequences(y, padding='post') - y = to_categorical(y, self.label_size).astype(int) - # In 2018/06/01, to_categorical is a bit strange. - # >>> to_categorical([[1,3]], num_classes=4).shape - # (1, 2, 4) - # >>> to_categorical([[1]], num_classes=4).shape - # (1, 4) - # So, I expand dimensions when len(y.shape) == 2. - y = y if len(y.shape) == 3 else np.expand_dims(y, axis=0) - return features, y - else: - return features - - def fit_transform(self, X, y=None, **params): - """Learn vocabulary and return document id matrix. - - This is equivalent to fit followed by transform. - - Args: - X : iterable - an iterable which yields either str, unicode or file objects. - - Returns: - list : document id matrix. - list: label id matrix. - """ - return self.fit(X, y).transform(X, y) - - def inverse_transform(self, y, lengths=None): - """Return label strings. - - Args: - y: label id matrix. - lengths: sentences length. - - Returns: - list: list of list of strings. - """ - y = np.argmax(y, -1) - inverse_y = [self._label_vocab.id2doc(ids) for ids in y] - if lengths is not None: - inverse_y = [iy[:l] for iy, l in zip(inverse_y, lengths)] - - return inverse_y - - @property - def word_vocab_size(self): - return len(self._word_vocab) - - @property - def char_vocab_size(self): - return len(self._char_vocab) - - @property - def label_size(self): - return len(self._label_vocab) - - def save(self, file_path): - joblib.dump(self, file_path) - - @classmethod - def load(cls, file_path): - p = joblib.load(file_path) - - return p - - -def pad_nested_sequences(sequences, dtype='int32'): - """Pads nested sequences to the same length. - - This function transforms a list of list sequences - into a 3D Numpy array of shape `(num_samples, max_sent_len, max_word_len)`. - - Args: - sequences: List of lists of lists. - dtype: Type of the output sequences. - - # Returns - x: Numpy array. - """ - max_sent_len = 0 - max_word_len = 0 - for sent in sequences: - max_sent_len = max(len(sent), max_sent_len) - for word in sent: - max_word_len = max(len(word), max_word_len) - - x = np.zeros((len(sequences), max_sent_len, max_word_len)).astype(dtype) - for i, sent in enumerate(sequences): - for j, word in enumerate(sent): - x[i, j, :len(word)] = word - - return x - - -class ELMoTransformer(IndexTransformer): - - def __init__(self, lower=True, num_norm=True, - use_char=True, initial_vocab=None): - super(ELMoTransformer, self).__init__( - lower, num_norm, use_char, initial_vocab) - self._elmo = Elmo(options_file, weight_file, 2, dropout=0) - - def transform(self, X, y=None): - """Transform documents to document ids. - - Uses the vocabulary learned by fit. - - Args: - X : iterable - an iterable which yields either str, unicode or file objects. - y : iterabl, label strings. - - Returns: - features: document id matrix. - y: label id matrix. - """ - word_ids = [self._word_vocab.doc2id(doc) for doc in X] - word_ids = pad_sequences(word_ids, padding='post') - - char_ids = [[self._char_vocab.doc2id(w) for w in doc] for doc in X] - char_ids = pad_nested_sequences(char_ids) - - character_ids = batch_to_ids(X) - elmo_embeddings = self._elmo(character_ids)['elmo_representations'][1] - elmo_embeddings = elmo_embeddings.detach().numpy() - - features = [word_ids, char_ids, elmo_embeddings] - - if y is not None: - y = [self._label_vocab.doc2id(doc) for doc in y] - y = pad_sequences(y, padding='post') - y = to_categorical(y, self.label_size).astype(int) - # In 2018/06/01, to_categorical is a bit strange. - # >>> to_categorical([[1,3]], num_classes=4).shape - # (1, 2, 4) - # >>> to_categorical([[1]], num_classes=4).shape - # (1, 4) - # So, I expand dimensions when len(y.shape) == 2. - y = y if len(y.shape) == 3 else np.expand_dims(y, axis=0) - return features, y - else: - return features -""" -Model API. -""" -import numpy as np -from seqeval.metrics.sequence_labeling import get_entities - - -class Tagger(object): - """A model API that tags input sentence. - - Attributes: - model: Model. - preprocessor: Transformer. Preprocessing data for feature extraction. - tokenizer: Tokenize input sentence. Default tokenizer is `str.split`. - """ - - def __init__(self, model, preprocessor, tokenizer=str.split): - self.model = model - self.preprocessor = preprocessor - self.tokenizer = tokenizer - - def predict_proba(self, text): - """Probability estimates. - - The returned estimates for all classes are ordered by the - label of classes. - - Args: - text : string, the input text. - - Returns: - y : array-like, shape = [num_words, num_classes] - Returns the probability of the word for each class in the model, - """ - assert isinstance(text, str) - - words = self.tokenizer(text) - X = self.preprocessor.transform([words]) - y = self.model.predict(X) - y = y[0] # reduce batch dimension. - - return y - - def _get_prob(self, pred): - prob = np.max(pred, -1) - - return prob - - def _get_tags(self, pred): - tags = self.preprocessor.inverse_transform([pred]) - tags = tags[0] # reduce batch dimension - - return tags - - def _build_response(self, sent, tags, prob): - words = self.tokenizer(sent) - res = { - 'words': words, - 'entities': [ - - ] - } - chunks = get_entities(tags) - - for chunk_type, chunk_start, chunk_end in chunks: - chunk_end += 1 - entity = { - 'text': ' '.join(words[chunk_start: chunk_end]), - 'type': chunk_type, - 'score': float(np.average(prob[chunk_start: chunk_end])), - 'beginOffset': chunk_start, - 'endOffset': chunk_end - } - res['entities'].append(entity) - - return res - - def analyze(self, text): - """Analyze text and return pretty format. - - Args: - text: string, the input text. - - Returns: - res: dict. - - Examples: - >>> text = 'President Obama is speaking at the White House.' - >>> model.analyze(text) - { - "words": [ - "President", - "Obama", - "is", - "speaking", - "at", - "the", - "White", - "House." - ], - "entities": [ - { - "beginOffset": 1, - "endOffset": 2, - "score": 1, - "text": "Obama", - "type": "PER" - }, - { - "beginOffset": 6, - "endOffset": 8, - "score": 1, - "text": "White House.", - "type": "ORG" - } - ] - } - """ - pred = self.predict_proba(text) - tags = self._get_tags(pred) - prob = self._get_prob(pred) - res = self._build_response(text, tags, prob) - - return res - - def predict(self, text): - """Predict using the model. - - Args: - text: string, the input text. - - Returns: - tags: list, shape = (num_words,) - Returns predicted values. - """ - pred = self.predict_proba(text) - tags = self._get_tags(pred) - - return tags -"""Training-related module. -""" -from anago.callbacks import F1score -from anago.utils import NERSequence - - -class Trainer(object): - """A trainer that train the model. - - Attributes: - _model: Model. - _preprocessor: Transformer. Preprocessing data for feature extraction. - """ - - def __init__(self, model, preprocessor=None): - self._model = model - self._preprocessor = preprocessor - - def train(self, x_train, y_train, x_valid=None, y_valid=None, - epochs=1, batch_size=32, verbose=1, callbacks=None, shuffle=True): - """Trains the model for a fixed number of epochs (iterations on a dataset). - - Args: - x_train: list of training data. - y_train: list of training target (label) data. - x_valid: list of validation data. - y_valid: list of validation target (label) data. - batch_size: Integer. - Number of samples per gradient update. - If unspecified, `batch_size` will default to 32. - epochs: Integer. Number of epochs to train the model. - verbose: Integer. 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = one line per epoch. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during training. - shuffle: Boolean (whether to shuffle the training data - before each epoch). `shuffle` will default to True. - """ - - train_seq = NERSequence( - x_train, y_train, batch_size, self._preprocessor.transform) - - if x_valid and y_valid: - valid_seq = NERSequence( - x_valid, y_valid, batch_size, self._preprocessor.transform) - f1 = F1score(valid_seq, preprocessor=self._preprocessor) - callbacks = [f1] + callbacks if callbacks else [f1] - - self._model.fit_generator(generator=train_seq, - epochs=epochs, - callbacks=callbacks, - verbose=verbose, - shuffle=shuffle) -""" -Utility functions. -""" -import math -import os -from collections import Counter - -import numpy as np -from keras.utils import Sequence, get_file - - -def download(url): - """Download a trained weights, config and preprocessor. - - Args: - url (str): target url. - """ - filepath = get_file(fname='tmp.zip', origin=url, extract=True) - base_dir = os.path.dirname(filepath) - weights_file = os.path.join(base_dir, 'weights.h5') - params_file = os.path.join(base_dir, 'params.json') - preprocessor_file = os.path.join(base_dir, 'preprocessor.pickle') - - return weights_file, params_file, preprocessor_file - - -def load_data_and_labels(filename, encoding='utf-8'): - """Loads data and label from a file. - - Args: - filename (str): path to the file. - encoding (str): file encoding format. - - The file format is tab-separated values. - A blank line is required at the end of a sentence. - - For example: - ``` - EU B-ORG - rejects O - German B-MISC - call O - to O - boycott O - British B-MISC - lamb O - . O - - Peter B-PER - Blackburn I-PER - ... - ``` - - Returns: - tuple(numpy array, numpy array): data and labels. - - Example: - >>> filename = 'conll2003/en/ner/train.txt' - >>> data, labels = load_data_and_labels(filename) - """ - sents, labels = [], [] - words, tags = [], [] - with open(filename, encoding=encoding) as f: - for line in f: - line = line.rstrip() - if line: - word, tag = line.split('\t') - words.append(word) - tags.append(tag) - else: - sents.append(words) - labels.append(tags) - words, tags = [], [] - - return sents, labels - - -class NERSequence(Sequence): - - def __init__(self, x, y, batch_size=1, preprocess=None): - self.x = x - self.y = y - self.batch_size = batch_size - self.preprocess = preprocess - - def __getitem__(self, idx): - batch_x = self.x[idx * self.batch_size: (idx + 1) * self.batch_size] - batch_y = self.y[idx * self.batch_size: (idx + 1) * self.batch_size] - - return self.preprocess(batch_x, batch_y) - - def __len__(self): - return math.ceil(len(self.x) / self.batch_size) - - -class Vocabulary(object): - """A vocabulary that maps tokens to ints (storing a vocabulary). - - Attributes: - _token_count: A collections.Counter object holding the frequencies of tokens - in the data used to build the Vocabulary. - _token2id: A collections.defaultdict instance mapping token strings to - numerical identifiers. - _id2token: A list of token strings indexed by their numerical identifiers. - """ - - def __init__(self, max_size=None, lower=True, unk_token=True, specials=('',)): - """Create a Vocabulary object. - - Args: - max_size: The maximum size of the vocabulary, or None for no - maximum. Default: None. - lower: boolean. Whether to convert the texts to lowercase. - unk_token: boolean. Whether to add unknown token. - specials: The list of special tokens (e.g., padding or eos) that - will be prepended to the vocabulary. Default: ('',) - """ - self._max_size = max_size - self._lower = lower - self._unk = unk_token - self._token2id = {token: i for i, token in enumerate(specials)} - self._id2token = list(specials) - self._token_count = Counter() - - def __len__(self): - return len(self._token2id) - - def add_token(self, token): - """Add token to vocabulary. - - Args: - token (str): token to add. - """ - token = self.process_token(token) - self._token_count.update([token]) - - def add_documents(self, docs): - """Update dictionary from a collection of documents. Each document is a list - of tokens. - - Args: - docs (list): documents to add. - """ - for sent in docs: - sent = map(self.process_token, sent) - self._token_count.update(sent) - - def doc2id(self, doc): - """Get the list of token_id given doc. - - Args: - doc (list): document. - - Returns: - list: int id of doc. - """ - doc = map(self.process_token, doc) - return [self.token_to_id(token) for token in doc] - - def id2doc(self, ids): - """Get the token list. - - Args: - ids (list): token ids. - - Returns: - list: token list. - """ - return [self.id_to_token(idx) for idx in ids] - - def build(self): - """ - Build vocabulary. - """ - token_freq = self._token_count.most_common(self._max_size) - idx = len(self.vocab) - for token, _ in token_freq: - self._token2id[token] = idx - self._id2token.append(token) - idx += 1 - if self._unk: - unk = '' - self._token2id[unk] = idx - self._id2token.append(unk) - - def process_token(self, token): - """Process token before following methods: - * add_token - * add_documents - * doc2id - * token_to_id - - Args: - token (str): token to process. - - Returns: - str: processed token string. - """ - if self._lower: - token = token.lower() - - return token - - def token_to_id(self, token): - """Get the token_id of given token. - - Args: - token (str): token from vocabulary. - - Returns: - int: int id of token. - """ - token = self.process_token(token) - return self._token2id.get(token, len(self._token2id) - 1) - - def id_to_token(self, idx): - """token-id to token (string). - - Args: - idx (int): token id. - - Returns: - str: string of given token id. - """ - return self._id2token[idx] - - @property - def vocab(self): - """Return the vocabulary. - - Returns: - dict: get the dict object of the vocabulary. - """ - return self._token2id - - @property - def reverse_vocab(self): - """Return the vocabulary as a reversed dict object. - - Returns: - dict: reversed vocabulary object. - """ - return self._id2token - - -def filter_embeddings(embeddings, vocab, dim): - """Loads word vectors in numpy array. - - Args: - embeddings (dict): a dictionary of numpy array. - vocab (dict): word_index lookup table. - - Returns: - numpy array: an array of word embeddings. - """ - if not isinstance(embeddings, dict): - return - _embeddings = np.zeros([len(vocab), dim]) - for word in vocab: - if word in embeddings: - word_idx = vocab[word] - _embeddings[word_idx] = embeddings[word] - - return _embeddings - - -def load_glove(file): - """Loads GloVe vectors in numpy array. - - Args: - file (str): a path to a glove file. - - Return: - dict: a dict of numpy arrays. - """ - model = {} - with open(file, encoding="utf8", errors='ignore') as f: - for line in f: - line = line.split(' ') - word = line[0] - vector = np.array([float(val) for val in line[1:]]) - model[word] = vector - - return model -""" -Wrapper class. -""" -from seqeval.metrics import f1_score - -from anago.models import BiLSTMCRF, save_model, load_model -from anago.preprocessing import IndexTransformer -from anago.tagger import Tagger -from anago.trainer import Trainer -from anago.utils import filter_embeddings - - -class Sequence(object): - - def __init__(self, - word_embedding_dim=100, - char_embedding_dim=25, - word_lstm_size=100, - char_lstm_size=25, - fc_dim=100, - dropout=0.5, - embeddings=None, - use_char=True, - use_crf=True, - initial_vocab=None, - optimizer='adam'): - - self.model = None - self.p = None - self.tagger = None - - self.word_embedding_dim = word_embedding_dim - self.char_embedding_dim = char_embedding_dim - self.word_lstm_size = word_lstm_size - self.char_lstm_size = char_lstm_size - self.fc_dim = fc_dim - self.dropout = dropout - self.embeddings = embeddings - self.use_char = use_char - self.use_crf = use_crf - self.initial_vocab = initial_vocab - self.optimizer = optimizer - - def fit(self, x_train, y_train, x_valid=None, y_valid=None, - epochs=1, batch_size=32, verbose=1, callbacks=None, shuffle=True): - """Fit the model for a fixed number of epochs. - - Args: - x_train: list of training data. - y_train: list of training target (label) data. - x_valid: list of validation data. - y_valid: list of validation target (label) data. - batch_size: Integer. - Number of samples per gradient update. - If unspecified, `batch_size` will default to 32. - epochs: Integer. Number of epochs to train the model. - verbose: Integer. 0, 1, or 2. Verbosity mode. - 0 = silent, 1 = progress bar, 2 = one line per epoch. - callbacks: List of `keras.callbacks.Callback` instances. - List of callbacks to apply during training. - shuffle: Boolean (whether to shuffle the training data - before each epoch). `shuffle` will default to True. - """ - p = IndexTransformer( - initial_vocab=self.initial_vocab, use_char=self.use_char) - p.fit(x_train, y_train) - embeddings = filter_embeddings( - self.embeddings, p._word_vocab.vocab, self.word_embedding_dim) - - model = BiLSTMCRF(char_vocab_size=p.char_vocab_size, - word_vocab_size=p.word_vocab_size, - num_labels=p.label_size, - word_embedding_dim=self.word_embedding_dim, - char_embedding_dim=self.char_embedding_dim, - word_lstm_size=self.word_lstm_size, - char_lstm_size=self.char_lstm_size, - fc_dim=self.fc_dim, - dropout=self.dropout, - embeddings=embeddings, - use_char=self.use_char, - use_crf=self.use_crf) - model, loss = model.build() - model.compile(loss=loss, optimizer=self.optimizer) - - trainer = Trainer(model, preprocessor=p) - trainer.train(x_train, y_train, x_valid, y_valid, - epochs=epochs, batch_size=batch_size, - verbose=verbose, callbacks=callbacks, - shuffle=shuffle) - - self.p = p - self.model = model - - def predict(self, x_test): - """Returns the prediction of the model on the given test data. - - Args: - x_test : array-like, shape = (n_samples, sent_length) - Test samples. - - Returns: - y_pred : array-like, shape = (n_smaples, sent_length) - Prediction labels for x. - """ - if self.model: - lengths = map(len, x_test) - x_test = self.p.transform(x_test) - y_pred = self.model.predict(x_test) - y_pred = self.p.inverse_transform(y_pred, lengths) - return y_pred - else: - raise OSError('Could not find a model. Call load(dir_path).') - - def score(self, x_test, y_test): - """Returns the f1-micro score on the given test data and labels. - - Args: - x_test : array-like, shape = (n_samples, sent_length) - Test samples. - - y_test : array-like, shape = (n_samples, sent_length) - True labels for x. - - Returns: - score : float, f1-micro score. - """ - if self.model: - x_test = self.p.transform(x_test) - lengths = map(len, y_test) - y_pred = self.model.predict(x_test) - y_pred = self.p.inverse_transform(y_pred, lengths) - score = f1_score(y_test, y_pred) - return score - else: - raise OSError('Could not find a model. Call load(dir_path).') - - def analyze(self, text, tokenizer=str.split): - """Analyze text and return pretty format. - - Args: - text: string, the input text. - tokenizer: Tokenize input sentence. Default tokenizer is `str.split`. - - Returns: - res: dict. - """ - if not self.tagger: - self.tagger = Tagger(self.model, - preprocessor=self.p, - tokenizer=tokenizer) - - return self.tagger.analyze(text) - - def save(self, weights_file, params_file, preprocessor_file): - self.p.save(preprocessor_file) - save_model(self.model, weights_file, params_file) - - @classmethod - def load(cls, weights_file, params_file, preprocessor_file): - self = cls() - self.p = IndexTransformer.load(preprocessor_file) - self.model = load_model(weights_file, params_file) - - return self -import os - -import anago -from anago.utils import download, load_data_and_labels - - -if __name__ == '__main__': - dir_path = 'test_dir' - url = 'https://s3-ap-northeast-1.amazonaws.com/dev.tech-sketch.jp/chakki/public/conll2003_en.zip' - DATA_ROOT = os.path.join(os.path.dirname( - __file__), '../data/conll2003/en/ner') - - test_path = os.path.join(DATA_ROOT, 'test.txt') - x_test, y_test = load_data_and_labels(test_path) - - download(url, dir_path) - - model = anago.Sequence.load( - 'weights.h5', 'params.json', 'preprocessor.pickle') - model.score(x_test, y_test) -""" -Example from training to saving. -""" -import argparse -import os - -import numpy as np - -from anago.utils import load_data_and_labels, load_glove, filter_embeddings -from anago.models import ELModel -from anago.preprocessing import ELMoTransformer -from anago.trainer import Trainer - - -def main(args): - print('Loading dataset...') - x_train, y_train = load_data_and_labels(args.train_data) - x_valid, y_valid = load_data_and_labels(args.valid_data) - x_test, y_test = load_data_and_labels(args.test_data) - x_train = np.r_[x_train, x_valid] - y_train = np.r_[y_train, y_valid] - - print('Transforming datasets...') - p = ELMoTransformer() - p.fit(x_train, y_train) - - print('Loading word embeddings...') - embeddings = load_glove(EMBEDDING_PATH) - embeddings = filter_embeddings(embeddings, p._word_vocab.vocab, 100) - - print('Building a model.') - model = ELModel(char_embedding_dim=args.char_emb_size, - word_embedding_dim=args.word_emb_size, - char_lstm_size=args.char_lstm_units, - word_lstm_size=args.word_lstm_units, - char_vocab_size=p.char_vocab_size, - word_vocab_size=p.word_vocab_size, - num_labels=p.label_size, - embeddings=embeddings, - dropout=args.dropout) - model, loss = model.build() - model.compile(loss=loss, optimizer='adam') - - print('Training the model...') - trainer = Trainer(model, preprocessor=p) - trainer.train(x_train, y_train, x_test, y_test) - - print('Saving the model...') - model.save(args.weights_file, args.params_file) - # p.save(args.preprocessor_file) - - -if __name__ == '__main__': - DATA_DIR = os.path.join(os.path.dirname(__file__), - '../data/conll2003/en/ner') - EMBEDDING_PATH = os.path.join(os.path.dirname( - __file__), '../data/glove.6B/glove.6B.100d.txt') - parser = argparse.ArgumentParser(description='Training a model') - parser.add_argument( - '--train_data', default=os.path.join(DATA_DIR, 'train.txt'), help='training data') - parser.add_argument( - '--valid_data', default=os.path.join(DATA_DIR, 'valid.txt'), help='validation data') - parser.add_argument( - '--test_data', default=os.path.join(DATA_DIR, 'test.txt'), help='test data') - parser.add_argument( - '--weights_file', default='weights.h5', help='weights file') - parser.add_argument( - '--params_file', default='params.json', help='parameter file') - parser.add_argument('--preprocessor_file', default='preprocessor.json') - # Training parameters - parser.add_argument('--optimizer', default='adam', help='optimizer') - parser.add_argument('--max_epoch', type=int, default=15, help='max epoch') - parser.add_argument('--batch_size', type=int, - default=32, help='batch size') - parser.add_argument('--checkpoint_path', default=None, - help='checkpoint path') - parser.add_argument('--log_dir', default=None, help='log directory') - parser.add_argument('--early_stopping', - action='store_true', help='early stopping') - # Model parameters - parser.add_argument('--char_emb_size', type=int, - default=25, help='character embedding size') - parser.add_argument('--word_emb_size', type=int, - default=100, help='word embedding size') - parser.add_argument('--char_lstm_units', type=int, - default=25, help='num of character lstm units') - parser.add_argument('--word_lstm_units', type=int, - default=100, help='num of word lstm units') - parser.add_argument('--dropout', type=float, - default=0.5, help='dropout rate') - - args = parser.parse_args() - main(args) -import os - -import anago -from anago.utils import load_data_and_labels, load_glove - - -if __name__ == '__main__': - DATA_ROOT = os.path.join(os.path.dirname( - __file__), '../data/conll2003/en/ner') - EMBEDDING_PATH = os.path.join(os.path.dirname( - __file__), '../data/glove.6B/glove.6B.100d.txt') - - train_path = os.path.join(DATA_ROOT, 'train.txt') - valid_path = os.path.join(DATA_ROOT, 'valid.txt') - - print('Loading data...') - x_train, y_train = load_data_and_labels(train_path) - x_valid, y_valid = load_data_and_labels(valid_path) - print(len(x_train), 'train sequences') - print(len(x_valid), 'valid sequences') - - embeddings = load_glove(EMBEDDING_PATH) - - # Use pre-trained word embeddings - model = anago.Sequence(embeddings=embeddings) - model.fit(x_train, y_train, x_valid, y_valid) -import os - -from gensim.models.keyedvectors import KeyedVectors - -import anago -from anago.utils import load_data_and_labels - - -if __name__ == '__main__': - DATA_ROOT = os.path.join(os.path.dirname( - __file__), '../data/conll2003/en/ner') - EMBEDDING_PATH = 'model.txt' - - train_path = os.path.join(DATA_ROOT, 'train.txt') - valid_path = os.path.join(DATA_ROOT, 'valid.txt') - - print('Loading data...') - x_train, y_train = load_data_and_labels(train_path) - x_valid, y_valid = load_data_and_labels(valid_path) - print(len(x_train), 'train sequences') - print(len(x_valid), 'valid sequences') - - embeddings = KeyedVectors.load_word2vec_format(EMBEDDING_PATH).wv - - # Use pre-trained word embeddings - model = anago.Sequence(embeddings=embeddings) - model.fit(x_train, y_train, x_valid, y_valid) -""" -Tagging example. -""" -import argparse -import os -from pprint import pprint - -from anago.tagger import Tagger -from anago.models import BiLSTMCRF -from anago.preprocessing import IndexTransformer - - -def main(args): - print('Loading objects...') - model = BiLSTMCRF.load(args.weights_file, args.params_file) - it = IndexTransformer.load(args.preprocessor_file) - tagger = Tagger(model, preprocessor=it) - - print('Tagging a sentence...') - res = tagger.analyze(args.sent) - pprint(res) - - -if __name__ == '__main__': - SAVE_DIR = os.path.join(os.path.dirname(__file__), 'models') - parser = argparse.ArgumentParser(description='Tagging a sentence.') - parser.add_argument( - '--sent', default='President Obama is speaking at the White House.') - parser.add_argument('--save_dir', default=SAVE_DIR) - parser.add_argument( - '--weights_file', default=os.path.join(SAVE_DIR, 'model_weights.h5')) - parser.add_argument( - '--params_file', default=os.path.join(SAVE_DIR, 'params.json')) - parser.add_argument('--preprocessor_file', - default=os.path.join(SAVE_DIR, 'preprocessor.json')) - args = parser.parse_args() - main(args) -""" -Example from training to saving. -""" -import argparse -import os - -from anago.utils import load_data_and_labels -from anago.models import BiLSTMCRF -from anago.preprocessing import IndexTransformer -from anago.trainer import Trainer - - -def main(args): - print('Loading dataset...') - x_train, y_train = load_data_and_labels(args.train_data) - x_valid, y_valid = load_data_and_labels(args.valid_data) - - print('Transforming datasets...') - p = IndexTransformer(use_char=args.no_char_feature) - p.fit(x_train, y_train) - - print('Building a model.') - model = BiLSTMCRF(char_embedding_dim=args.char_emb_size, - word_embedding_dim=args.word_emb_size, - char_lstm_size=args.char_lstm_units, - word_lstm_size=args.word_lstm_units, - char_vocab_size=p.char_vocab_size, - word_vocab_size=p.word_vocab_size, - num_labels=p.label_size, - dropout=args.dropout, - use_char=args.no_char_feature, - use_crf=args.no_use_crf) - model, loss = model.build() - model.compile(loss=loss, optimizer='adam') - - print('Training the model...') - trainer = Trainer(model, preprocessor=p) - trainer.train(x_train, y_train, x_valid, y_valid) - - print('Saving the model...') - model.save(args.weights_file, args.params_file) - p.save(args.preprocessor_file) - - -if __name__ == '__main__': - DATA_DIR = os.path.join(os.path.dirname(__file__), - '../data/conll2003/en/ner') - parser = argparse.ArgumentParser(description='Training a model') - parser.add_argument( - '--train_data', default=os.path.join(DATA_DIR, 'train.txt'), help='training data') - parser.add_argument( - '--valid_data', default=os.path.join(DATA_DIR, 'valid.txt'), help='validation data') - parser.add_argument( - '--weights_file', default='weights.h5', help='weights file') - parser.add_argument( - '--params_file', default='params.json', help='parameter file') - # Training parameters - parser.add_argument( - '--loss', default='categorical_crossentropy', help='loss') - parser.add_argument('--optimizer', default='adam', help='optimizer') - parser.add_argument('--max_epoch', type=int, default=15, help='max epoch') - parser.add_argument('--batch_size', type=int, - default=32, help='batch size') - parser.add_argument('--checkpoint_path', default=None, - help='checkpoint path') - parser.add_argument('--log_dir', default=None, help='log directory') - parser.add_argument('--early_stopping', - action='store_true', help='early stopping') - # Model parameters - parser.add_argument('--char_emb_size', type=int, - default=25, help='character embedding size') - parser.add_argument('--word_emb_size', type=int, - default=100, help='word embedding size') - parser.add_argument('--char_lstm_units', type=int, - default=25, help='num of character lstm units') - parser.add_argument('--word_lstm_units', type=int, - default=100, help='num of word lstm units') - parser.add_argument('--dropout', type=float, - default=0.5, help='dropout rate') - parser.add_argument('--no_char_feature', - action='store_false', help='use char feature') - parser.add_argument( - '--no_use_crf', action='store_false', help='use crf layer') - - args = parser.parse_args() - main(args) -import os -import shutil -import unittest - -from anago.models import BiLSTMCRF, load_model, save_model - - -class TestModel(unittest.TestCase): - - @classmethod - def setUpClass(cls): - cls.save_root = os.path.join(os.path.dirname(__file__), 'models') - cls.weights_file = os.path.join(cls.save_root, 'weights.h5') - cls.params_file = os.path.join(cls.save_root, 'params.json') - if not os.path.exists(cls.save_root): - os.mkdir(cls.save_root) - if os.path.exists(cls.weights_file): - os.remove(cls.weights_file) - if os.path.exists(cls.weights_file): - os.remove(cls.params_file) - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.save_root) - - def test_build_model(self): - char_vocab_size = 100 - word_vocab_size = 10000 - num_labels = 10 - - # Normal. - model = BiLSTMCRF(char_vocab_size=char_vocab_size, - word_vocab_size=word_vocab_size, - num_labels=num_labels) - model.build() - - # No CRF. - model = BiLSTMCRF(char_vocab_size=char_vocab_size, - word_vocab_size=word_vocab_size, - num_labels=num_labels, - use_crf=False) - model.build() - - # No character feature. - model = BiLSTMCRF(char_vocab_size=char_vocab_size, - word_vocab_size=word_vocab_size, - num_labels=num_labels, - use_char=False) - model.build() - - def test_save_and_load(self): - char_vocab_size = 100 - word_vocab_size = 10000 - num_labels = 10 - - model = BiLSTMCRF(char_vocab_size=char_vocab_size, - word_vocab_size=word_vocab_size, - num_labels=num_labels) - model, loss = model.build() - - self.assertFalse(os.path.exists(self.weights_file)) - self.assertFalse(os.path.exists(self.params_file)) - - save_model(model, self.weights_file, self.params_file) - - self.assertTrue(os.path.exists(self.weights_file)) - self.assertTrue(os.path.exists(self.params_file)) - - model = load_model(self.weights_file, self.params_file) -import os -import shutil -import unittest - -import numpy as np - -from anago.preprocessing import IndexTransformer, pad_nested_sequences - - -class TestIndexTransformer(unittest.TestCase): - - def setUp(self): - self.x = [['a'], ['aa', 'ab'], ['AA', 'ab', 'ac']] - self.y = [['O'], ['B-A', 'I-A'], ['O', 'O', 'B-A']] - - @classmethod - def setUpClass(cls): - cls.save_root = os.path.join(os.path.dirname(__file__), 'data') - cls.preprocessor_file = os.path.join(cls.save_root, 'preprocessor.pkl') - if not os.path.exists(cls.save_root): - os.mkdir(cls.save_root) - if os.path.exists(cls.preprocessor_file): - os.remove(cls.preprocessor_file) - - @classmethod - def tearDownClass(cls): - shutil.rmtree(cls.save_root) - - def test_vocab_size_lower_on(self): - word_vocab_size = 4 - char_vocab_size = 4 - label_size = 3 - - # lower is effective. - it = IndexTransformer(lower=True) - it.fit(self.x, self.y) - self.assertEqual(it.word_vocab_size, word_vocab_size + 2) # pad, unk - self.assertEqual(it.char_vocab_size, char_vocab_size + 2) # pad, unk - self.assertEqual(it.label_size, label_size + 1) # pad - - def test_vocab_size_lower_off(self): - word_vocab_size = 5 - char_vocab_size = 4 - label_size = 3 - - # lower is not effective. - it = IndexTransformer(lower=False) - it.fit(self.x, self.y) - self.assertEqual(it.word_vocab_size, word_vocab_size + 2) # pad, unk - self.assertEqual(it.char_vocab_size, char_vocab_size + 2) # pad, unk - self.assertEqual(it.label_size, label_size + 1) # pad - - def test_vocab_size_with_initial_vocab(self): - vocab = {'aaa', 'aab', 'aac'} - word_vocab_size = 4 + len(vocab) - char_vocab_size = 4 - label_size = 3 - - # Add initial vocab. - it = IndexTransformer(lower=True, initial_vocab=vocab) - it.fit(self.x, self.y) - self.assertEqual(it.word_vocab_size, word_vocab_size + 2) # pad, unk - self.assertEqual(it.char_vocab_size, char_vocab_size + 2) # pad, unk - self.assertEqual(it.label_size, label_size + 1) # pad - - def test_transform_without_character(self): - # No character feature. - it = IndexTransformer(use_char=False) - x, y = it.fit_transform(self.x, self.y) - - # Check sequence length. - self.assertEqual(len(x), len(self.x)) - self.assertEqual(len(y), len(self.y)) - - # Check sequence type. - self.assertIsInstance(x, np.ndarray) - self.assertIsInstance(y, np.ndarray) - - def test_transform_with_character(self): - # With character feature. - it = IndexTransformer(use_char=True) - X, y = it.fit_transform(self.x, self.y) - words, chars = X - - # Check sequence length. - self.assertEqual(len(words), len(self.x)) - self.assertEqual(len(chars), len(self.x)) - self.assertEqual(len(y), len(self.y)) - - # Check sequence type. - self.assertIsInstance(words, np.ndarray) - self.assertIsInstance(chars, np.ndarray) - self.assertIsInstance(y, np.ndarray) - - def test_transform_unknown_token(self): - it = IndexTransformer() - it.fit(self.x, self.y) - - x_train, y_train = [['aaa']], [['X']] - X, y = it.transform(x_train, y_train) - words, chars = X - - # Check sequence length. - self.assertEqual(len(words), len(x_train)) - self.assertEqual(len(chars), len(x_train)) - self.assertEqual(len(y), len(y_train)) - - # Check sequence type. - self.assertIsInstance(words, np.ndarray) - self.assertIsInstance(chars, np.ndarray) - self.assertIsInstance(y, np.ndarray) - - def test_inverse_transform(self): - it = IndexTransformer() - x, y = it.fit_transform(self.x, self.y) - lengths = map(len, self.y) - inv_y = it.inverse_transform(y, lengths) - self.assertEqual(inv_y, self.y) - - def test_inverse_transform_unknown_token(self): - x_train, y_train = [['a', 'b']], [['X', 'O']] - it = IndexTransformer() - it.fit(self.x, self.y) - _, y = it.transform(x_train, y_train) - inv_y = it.inverse_transform(y) - self.assertNotEqual(inv_y, self.y) - - def test_inverse_transform_one_cat(self): - x_train, y_train = [['a']], [['O']] - it = IndexTransformer() - it.fit(self.x, self.y) - _, y = it.transform(x_train, y_train) - inv_y = it.inverse_transform(y) - self.assertNotEqual(inv_y, self.y) - - def test_save_and_load(self): - it = IndexTransformer(lower=False) - x1, y1 = it.fit_transform(self.x, self.y) - x1_word, x1_char = x1 - - self.assertFalse(os.path.exists(self.preprocessor_file)) - it.save(self.preprocessor_file) - self.assertTrue(os.path.exists(self.preprocessor_file)) - - it = IndexTransformer.load(self.preprocessor_file) - x2, y2 = it.transform(self.x, self.y) - x2_word, x2_char = x2 - - np.testing.assert_array_equal(x1_word, x2_word) - np.testing.assert_array_equal(x1_char, x2_char) - np.testing.assert_array_equal(y1, y2) - - -class TestPadding(unittest.TestCase): - - def test_pad_nested_sequences(self): - sequences = [[[1, 2, 3, 4], [1, 2], [1], [1, 2, 3]], - [[1, 2, 3, 4, 5], [1, 2], [1, 2, 3, 4]]] - expected_seq = [[[1, 2, 3, 4, 0], [1, 2, 0, 0, 0], [1, 0, 0, 0, 0], [1, 2, 3, 0, 0]], - [[1, 2, 3, 4, 5], [1, 2, 0, 0, 0], [1, 2, 3, 4, 0], [0, 0, 0, 0, 0]]] - padded_seq = pad_nested_sequences(sequences) - np.testing.assert_equal(padded_seq, expected_seq) - - sequences = [[[1, 2], [1]]] - expected_seq = [[[1, 2], [1, 0]]] - padded_seq = pad_nested_sequences(sequences) - np.testing.assert_equal(padded_seq, expected_seq) - - sequences = [[[1], []]] - expected_seq = [[[1], [0]]] - padded_seq = pad_nested_sequences(sequences) - np.testing.assert_equal(padded_seq, expected_seq) - - sequences = [[[1]]] - expected_seq = [[[1]]] - padded_seq = pad_nested_sequences(sequences) - np.testing.assert_equal(padded_seq, expected_seq) - - sequences = [[[]]] - expected_seq = [[[]]] - padded_seq = pad_nested_sequences(sequences) - np.testing.assert_equal(padded_seq, expected_seq) -import os -import unittest - -import numpy as np -import tensorflow as tf - -import anago -from anago.models import load_model -from anago.preprocessing import IndexTransformer - -DATA_ROOT = os.path.join(os.path.dirname(__file__), '../data/conll2003/en/ner') -SAVE_ROOT = os.path.join(os.path.dirname(__file__), 'models') - - -class TestTagger(unittest.TestCase): - - @classmethod - def setUpClass(cls): - weights_file = os.path.join(SAVE_ROOT, 'weights.h5') - params_file = os.path.join(SAVE_ROOT, 'params.json') - preprocessor_file = os.path.join(SAVE_ROOT, 'preprocessor.pickle') - - # Load preprocessor - p = IndexTransformer.load(preprocessor_file) - - # Load the model. - model = load_model(weights_file, params_file) - - # Build a tagger - cls.tagger = anago.Tagger(model, preprocessor=p) - - cls.sent = 'President Obama is speaking at the White House.' - - def test_predict_proba(self): - res = self.tagger.predict_proba(self.sent) - self.assertIsInstance(res, np.ndarray) - self.assertEqual(len(res), len(self.sent.split())) - - res = self.tagger.predict_proba('Obama') - self.assertIsInstance(res, np.ndarray) - self.assertEqual(len(res), len('Obama'.split())) - - with self.assertRaises(tf.errors.InvalidArgumentError): - res = self.tagger.predict_proba('') - - def test_analyze(self): - res = self.tagger.analyze(self.sent) - self.assertIsInstance(res, dict) - self.assertIn('words', res) - self.assertIn('entities', res) - self.assertIsInstance(res['words'], list) - self.assertIsInstance(res['entities'], list) - for w in res['words']: - self.assertIsInstance(w, str) - for e in res['entities']: - self.assertIsInstance(e, dict) - self.assertIn('beginOffset', e) - self.assertIn('endOffset', e) - self.assertIn('score', e) - self.assertIn('text', e) - self.assertIn('type', e) - - def test_predict_labels(self): - res = self.tagger.predict(self.sent) - self.assertEqual(len(res), len(self.sent.split())) - self.assertIsInstance(res, list) - for tag in res: - self.assertIsInstance(tag, str) -import os -import unittest - -from anago.utils import load_data_and_labels -from anago.models import BiLSTMCRF, save_model -from anago.preprocessing import IndexTransformer -from anago.trainer import Trainer - - -def get_path(path): return os.path.join(os.path.dirname(__file__), path) - - -DATA_ROOT = get_path('../data/conll2003/en/ner') -SAVE_ROOT = get_path('models') # trained model -LOG_ROOT = get_path('logs') # checkpoint, tensorboard -EMBEDDING_PATH = get_path('../data/glove.6B/glove.6B.100d.txt') - - -class TestTrainer(unittest.TestCase): - - @classmethod - def setUpClass(cls): - if not os.path.exists(LOG_ROOT): - os.mkdir(LOG_ROOT) - - if not os.path.exists(SAVE_ROOT): - os.mkdir(SAVE_ROOT) - - cls.weights_file = os.path.join(SAVE_ROOT, 'weights.h5') - cls.params_file = os.path.join(SAVE_ROOT, 'params.json') - cls.preprocessor_file = os.path.join(SAVE_ROOT, 'preprocessor.pickle') - - def setUp(self): - # Load datasets. - train_path = os.path.join(DATA_ROOT, 'train.txt') - valid_path = os.path.join(DATA_ROOT, 'valid.txt') - self.x_train, self.y_train = load_data_and_labels(train_path) - self.x_valid, self.y_valid = load_data_and_labels(valid_path) - - # Fit transformer. - self.p = IndexTransformer() - self.p.fit(self.x_train, self.y_train) - - # Build a model. - self.model = BiLSTMCRF(char_vocab_size=self.p.char_vocab_size, - word_vocab_size=self.p.word_vocab_size, - num_labels=self.p.label_size) - self.model, loss = self.model.build() - self.model.compile(loss=loss, optimizer='adam') - - def test_train(self): - trainer = Trainer(self.model, preprocessor=self.p) - trainer.train(self.x_train, self.y_train, - x_valid=self.x_valid, y_valid=self.y_valid) - - def test_train_no_valid(self): - trainer = Trainer(self.model, preprocessor=self.p) - trainer.train(self.x_train, self.y_train) - - def test_train_no_crf(self): - model = BiLSTMCRF(char_vocab_size=self.p.char_vocab_size, - word_vocab_size=self.p.word_vocab_size, - num_labels=self.p.label_size, - use_crf=False) - model, loss = model.build() - model.compile(loss=loss, optimizer='adam') - trainer = Trainer(model, preprocessor=self.p) - trainer.train(self.x_train, self.y_train, - x_valid=self.x_valid, y_valid=self.y_valid) - - def test_train_no_character(self): - p = IndexTransformer(use_char=False) - p.fit(self.x_train, self.y_train) - model = BiLSTMCRF(word_vocab_size=p.word_vocab_size, - num_labels=p.label_size, - use_crf=False, - use_char=False) - model, loss = model.build() - model.compile(loss=loss, optimizer='adam') - trainer = Trainer(model, preprocessor=p) - trainer.train(self.x_train, self.y_train, - x_valid=self.x_valid, y_valid=self.y_valid) - - def test_save(self): - # Train the model. - trainer = Trainer(self.model, preprocessor=self.p) - trainer.train(self.x_train, self.y_train) - - # Save the model. - save_model(self.model, self.weights_file, self.params_file) - self.p.save(self.preprocessor_file) -import os -import unittest - -from anago.utils import load_data_and_labels, Vocabulary, download, NERSequence -from anago.preprocessing import IndexTransformer - - -class TestUtils(unittest.TestCase): - - def setUp(self): - self.filename = os.path.join(os.path.dirname( - __file__), '../data/conll2003/en/ner/test.txt') - - def test_extract(self): - X, y = load_data_and_labels(self.filename) - self.assertTrue(len(X) == len(y)) - - def test_batch_iter(self): - X, y = load_data_and_labels(self.filename) - batch_size = 32 - p = IndexTransformer() - p.fit(X, y) - gen = NERSequence(X, y, batch_size, preprocess=p.transform) - - y_gen = [] - for i in range(len(gen)): - x1, y1 = gen[i] - y_gen.extend(y1) - self.assertEqual(len(y_gen), len(y)) - - def test_download(self): - url = 'https://s3-ap-northeast-1.amazonaws.com/dev.tech-sketch.jp/chakki/public/conll2003_en.zip' - weights_file, params_file, preprocessor_file = download(url) - - self.assertTrue(os.path.exists(weights_file)) - self.assertTrue(os.path.exists(params_file)) - self.assertTrue(os.path.exists(preprocessor_file)) - - -class TestVocabulary(unittest.TestCase): - - def test_add_documents(self): - # word vocabulary. - docs = [['a'], ['a', 'b'], ['a', 'b', 'c']] - token2id = {'': 0, 'a': 1, 'b': 2, 'c': 3, '': 4} - vocab = Vocabulary() - vocab.add_documents(docs) - vocab.build() - self.assertEqual(vocab._token2id, token2id) - - token2id = {'': 0, 'a': 1, 'b': 2, 'c': 3} - vocab = Vocabulary(unk_token=False) - vocab.add_documents(docs) - vocab.build() - self.assertEqual(vocab._token2id, token2id) - - token2id = {'': 0, '': 1, 'a': 2, 'b': 3, 'c': 4} - vocab = Vocabulary(unk_token=False, specials=('', '')) - vocab.add_documents(docs) - vocab.build() - self.assertEqual(vocab._token2id, token2id) - - token2id = {'a': 0, 'b': 1, 'c': 2} - vocab = Vocabulary(unk_token=False, specials=()) - vocab.add_documents(docs) - vocab.build() - self.assertEqual(vocab._token2id, token2id) - - # char vocabulary. - docs = ['hoge', 'fuga', 'bar'] - vocab = Vocabulary() - vocab.add_documents(docs) - vocab.build() - num_chars = len(set(''.join(docs))) + 2 - self.assertEqual(len(vocab._token2id), num_chars) - - def test_doc2id(self): - # word ids. - docs = [['a'], ['a', 'b'], ['a', 'b', 'c']] - vocab = Vocabulary() - vocab.add_documents(docs) - vocab.build() - another_doc = ['a', 'b', 'c', 'd'] - doc_ids = vocab.doc2id(another_doc) - self.assertEqual(doc_ids, [1, 2, 3, 4]) - - # char_ids. - docs = ['hoge', 'fuga', 'bar'] - vocab = Vocabulary() - vocab.add_documents(docs) - vocab.build() - doc_ids = vocab.doc2id(docs[0]) - correct = [vocab.token_to_id(c) for c in docs[0]] - self.assertEqual(doc_ids, correct) - - def test_id2doc(self): - # word ids. - docs = [['B-PSN'], ['B-ORG', 'I-ORG'], ['B-LOC', 'I-LOC', 'O']] - vocab = Vocabulary(unk_token=False, lower=False) - vocab.add_documents(docs) - vocab.build() - true_doc = ['O', 'B-LOC', 'O', 'O'] - doc_ids = vocab.doc2id(true_doc) - pred_doc = vocab.id2doc(doc_ids) - self.assertEqual(pred_doc, true_doc) -import os -import unittest -from pprint import pprint - -import numpy as np -from keras.callbacks import ModelCheckpoint - -import anago -from anago.utils import load_data_and_labels, load_glove - - -def get_path(path): return os.path.join(os.path.dirname(__file__), path) - - -DATA_ROOT = get_path('../data/conll2003/en/ner') -SAVE_ROOT = get_path('models') # trained model -LOG_ROOT = get_path('logs') # checkpoint, tensorboard -EMBEDDING_PATH = get_path('../data/glove.6B/glove.6B.100d.txt') - - -class TestWrapper(unittest.TestCase): - - @classmethod - def setUpClass(cls): - if not os.path.exists(LOG_ROOT): - os.mkdir(LOG_ROOT) - - if not os.path.exists(SAVE_ROOT): - os.mkdir(SAVE_ROOT) - - train_path = os.path.join(DATA_ROOT, 'train.txt') - valid_path = os.path.join(DATA_ROOT, 'valid.txt') - test_path = os.path.join(DATA_ROOT, 'test.txt') - - x_train, y_train = load_data_and_labels(train_path) - x_valid, y_valid = load_data_and_labels(valid_path) - cls.x_test, cls.y_test = load_data_and_labels(test_path) - cls.x_train = np.r_[x_train, x_valid] - cls.y_train = np.r_[y_train, y_valid] - - cls.embeddings = load_glove(EMBEDDING_PATH) - cls.text = 'President Obama is speaking at the White House.' - cls.dir_path = 'models' - - def test_train_without_pretrained_embedding(self): - model = anago.Sequence() - model.fit(self.x_train, self.y_train, self.x_test, self.y_test) - - def test_train_with_pretrained_embedding(self): - model = anago.Sequence(embeddings=self.embeddings) - model.fit(self.x_train, self.y_train, self.x_test, self.y_test) - - def test_score(self): - model = anago.Sequence() - model.fit(self.x_train, self.y_train) - score = model.score(self.x_test, self.y_test) - self.assertIsInstance(score, float) - - def test_analyze(self): - model = anago.Sequence() - model.fit(self.x_train, self.y_train) - res = model.analyze(self.text) - pprint(res) - - self.assertIn('words', res) - self.assertIn('entities', res) - - def test_save_and_load(self): - weights_file = os.path.join(SAVE_ROOT, 'weights.h5') - params_file = os.path.join(SAVE_ROOT, 'params.json') - preprocessor_file = os.path.join(SAVE_ROOT, 'preprocessor.pickle') - - model = anago.Sequence() - model.fit(self.x_train, self.y_train) - model.save(weights_file, params_file, preprocessor_file) - score1 = model.score(self.x_test, self.y_test) - - self.assertTrue(weights_file) - self.assertTrue(params_file) - self.assertTrue(preprocessor_file) - - model = anago.Sequence.load( - weights_file, params_file, preprocessor_file) - score2 = model.score(self.x_test, self.y_test) - - self.assertEqual(score1, score2) - - def test_train_vocab_init(self): - vocab = set() - for words in np.r_[self.x_train, self.x_test, self.x_test]: - for word in words: - vocab.add(word) - model = anago.Sequence(initial_vocab=vocab, embeddings=self.embeddings) - model.fit(self.x_train, self.y_train, self.x_test, self.y_test) - - def test_load(self): - weights_file = os.path.join(SAVE_ROOT, 'weights.h5') - params_file = os.path.join(SAVE_ROOT, 'params.json') - preprocessor_file = os.path.join(SAVE_ROOT, 'preprocessor.pickle') - model = anago.Sequence.load( - weights_file, params_file, preprocessor_file) - score = model.score(self.x_test, self.y_test) - print(score) - - def test_train_callbacks(self): - weights_file = os.path.join(SAVE_ROOT, 'weights.h5') - params_file = os.path.join(SAVE_ROOT, 'params.json') - preprocessor_file = os.path.join(SAVE_ROOT, 'preprocessor.pickle') - - log_dir = os.path.join(os.path.dirname(__file__), 'logs') - file_name = '_'.join(['weights', '{epoch:02d}', '{f1:2.4f}']) + '.h5' - callback = ModelCheckpoint(os.path.join(log_dir, file_name), - monitor='f1', - save_weights_only=True) - vocab = set() - for words in np.r_[self.x_train, self.x_test, self.x_test]: - for word in words: - vocab.add(word) - model = anago.Sequence(initial_vocab=vocab, embeddings=self.embeddings) - model.fit(self.x_train, self.y_train, self.x_test, self.y_test, - epochs=100, callbacks=[callback]) - model.save(weights_file, params_file, preprocessor_file) -#!/usr/bin/env python -import os -import sys - -if __name__ == "__main__": - os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings") - - from django.core.management import execute_from_command_line - - execute_from_command_line(sys.argv) -# Register your models here. -from django.contrib import admin # noqa -# -*- coding: utf-8 -*- -from __future__ import unicode_literals - -from django.apps import AppConfig - - -class BackendapiConfig(AppConfig): - name = 'backendAPI' -from django.conf.urls import url -from views import check_login - -urlpatterns = [ - url(r'^checkLogin$', check_login) -] -# -*- coding: utf-8 -*- -from __future__ import unicode_literals -from django.http import JsonResponse -from django.contrib.auth.models import User - - -def check_login(request): - try: - if request.GET.get('isOAuth') == 'false': - username = request.GET['username'] - password = request.GET['password'] - user = User.objects.get(username=username) - user_id = user.id - - if not user.check_password(password): - return JsonResponse({ - 'result': False, - 'error': 'Please enter valid credentials' - }) - - is_authenticated = user.is_authenticated() - if (is_authenticated): - username = user.username - - return JsonResponse({ - 'result': is_authenticated, - 'user_id': user_id, - 'username': username, - }) - else: - user = User.objects.get(username=request.user.username) - user_id = user.id - username = 'Anonymous' - - is_authenticated = user.is_authenticated() - if (is_authenticated): - username = user.username - - return JsonResponse({ - 'result': is_authenticated, - 'user_id': user_id, - 'username': username - }) - except Exception as e: - return JsonResponse({ - 'result': False, - 'error': str(e) - }) -# Register your models here. -from django.contrib import admin -from .models import SharedWith, Network - -admin.site.register(SharedWith) -admin.site.register(Network) -from __future__ import unicode_literals - -from django.apps import AppConfig - - -class CaffeAppConfig(AppConfig): - name = 'caffe_app' -import json -import yaml -import urlparse -from channels import Group -from channels.auth import channel_session_user, channel_session_user_from_http -from caffe_app.models import Network, NetworkVersion, NetworkUpdates -from ide.views import get_network_version -from ide.tasks import export_caffe_prototxt, export_keras_json - - -def create_network_version(network, netObj): - # creating a unique version of network to allow revert and view hitory - network_version = NetworkVersion(network=netObj) - network_version.network_def = network - network_version.save() - return network_version - - -def create_network_update(network_version, updated_data, tag): - network_update = NetworkUpdates(network_version=network_version, - updated_data=updated_data, - tag=tag) - return network_update - - -def fetch_network_version(netObj): - network_version = NetworkVersion.objects.filter( - network=netObj).order_by('-created_on')[0] - updates_batch = NetworkUpdates.objects.filter( - network_version=network_version) - - # Batching updates - # Note - size of batch is 20 for now, optimization can be done - if len(updates_batch) == 2: - data = get_network_version(netObj) - network_version = NetworkVersion( - network=netObj, network_def=json.dumps(data['network'])) - network_version.save() - - network_update = NetworkUpdates(network_version=network_version, - updated_data=json.dumps( - {'nextLayerId': data['next_layer_id']}), - tag='CheckpointCreated') - network_update.save() - return network_version - - -def update_data(data, required_data, version_id=0): - ''' - Parses data to include only required keys and returns the required object - ''' - - updated_data = {key: data[key] for key in required_data} - group_data = updated_data.copy() - group_data['action'] = data['action'] - - if ('randomId' in data): - group_data['randomId'] = data['randomId'] - group_data['version_id'] = version_id - - group_data = {"text": json.dumps(group_data)} - - return updated_data, group_data - - -@channel_session_user_from_http -def ws_connect(message): - print('Connection being established...') - message.reply_channel.send({ - 'accept': True - }) - # extracting id of network from url params - params = urlparse.parse_qs(message.content['query_string']) - networkId = params.get('id', ('Not Supplied',))[0] - message.channel_session['networkId'] = networkId - # adding socket to a group based on networkId to send updates of network - Group('model-{0}'.format(networkId)).add(message.reply_channel) - - -@channel_session_user -def ws_disconnect(message): - networkId = message.channel_session['networkId'] - Group('model-{0}'.format(networkId)).discard(message.reply_channel) - print('Disconnected...') - - -@channel_session_user -def ws_receive(message): - print('Message received...') - # param initialization - data = yaml.safe_load(message['text']) - action = data['action'] - - update_params = { - 'UpdateHighlight': ['addHighlightTo', 'removeHighlightFrom', 'userId', 'highlightColor', 'username'], - 'UpdateParam': ['layerId', 'param', 'value', 'isProp'], - 'DeleteLayer': ['layerId'], - 'AddLayer': ['layer', 'layerId', 'prevLayerId', 'nextLayerId'], - 'AddComment': ['layerId', 'comment'] - } - - if ('networkId' in message.channel_session): - networkId = message.channel_session['networkId'] - - if (action == 'ExportNet'): - # async export call - framework = data['framework'] - net = data['net'] - net_name = data['net_name'] - - reply_channel = message.reply_channel.name - - if (framework == 'caffe'): - export_caffe_prototxt.delay(net, net_name, reply_channel) - elif (framework == 'keras'): - export_keras_json.delay(net, net_name, False, reply_channel) - elif (framework == 'tensorflow'): - export_keras_json.delay(net, net_name, True, reply_channel) - - elif (action == 'UpdateHighlight'): - group_data = update_data(data, update_params['UpdateHighlight'])[1] - - Group('model-{0}'.format(networkId)).send(group_data) - elif (action in update_params): - # get the net object on which update is made - netObj = Network.objects.get(id=int(networkId)) - network_version = fetch_network_version(netObj) - - updated_data, group_data = update_data(data, update_params[action]) - - network_update = create_network_update( - network_version, json.dumps(updated_data), data['action']) - network_update.save() - - Group('model-{0}'.format(networkId)).send(group_data) -from __future__ import unicode_literals -from django.contrib.auth.models import User - -from django.db import models -from django.contrib.postgres.fields import JSONField - - -class Network(models.Model): - name = models.CharField(max_length=100) - author = models.ForeignKey(User, blank=True, null=True) - public_sharing = models.BooleanField(default=False) - - def __unicode__(self): - return self.id - - -class NetworkVersion(models.Model): - network = models.ForeignKey(Network) - network_def = JSONField() - created_on = models.DateTimeField(auto_now_add=True) - - def __unicode__(self): - return self.id - - -class NetworkUpdates(models.Model): - network_version = models.ForeignKey(NetworkVersion) - updated_data = JSONField() - tag = models.CharField(max_length=100) - created_on = models.DateTimeField(auto_now_add=True) - - def __unicode__(self): - return self.tag - - -class SharedWith(models.Model): - ACCESS_PRIVILEGE = ( - ('E', 'Can Edit'), - ('V', 'Can View'), - ('C', 'Can Comment') - ) - network = models.ForeignKey(Network) - user = models.ForeignKey(User) - access_privilege = models.CharField(max_length=1, choices=ACCESS_PRIVILEGE) - created_on = models.DateField(auto_now_add=True) - updated_on = models.DateField(auto_now_add=True) - - def __unicode__(self): - return self.user.username -from django.conf.urls import url -from views.import_prototxt import import_prototxt -from views.export_prototxt import export_to_caffe - -urlpatterns = [ - url(r'^export$', export_to_caffe, name='caffe-export'), - url(r'^import$', import_prototxt, name='caffe-import'), -] -from __future__ import absolute_import - -# This will make sure the app is always imported when -# Django starts so that shared_task will use this app. -from .celery_app import app as celery_app # noqa -""" -ASGI entrypoint. Configures Django and then runs the application -defined in the ASGI_APPLICATION setting. -""" - -import os - -from channels.asgi import get_channel_layer - -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings") - -channel_layer = get_channel_layer() -from __future__ import absolute_import -import os -from celery import Celery -from django.conf import settings - -# set the default Django settings module for the 'celery' program. -os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'settings') -app = Celery('app', broker='redis://redis:6379/0', - backend='redis://redis:6379/0', include=['ide.tasks']) - -# Using a string here means the worker will not have to -# pickle the object when using Windows. -app.config_from_object('settings') -app.autodiscover_tasks(lambda: settings.INSTALLED_APPS) - - -@app.task(bind=True) -def debug_task(self): - print('Request: {0!r}'.format(self.request)) -from channels.routing import route, include -from channels.staticfiles import StaticFilesConsumer # noqa: ignore=F405 -from caffe_app.consumers import ws_connect, ws_disconnect, ws_receive - -# routes defined for channel calls -# this is similar to the Django urls, but specifically for Channels -ws_routing = [ - route('websocket.connect', ws_connect), - route('websocket.receive', ws_receive), - route('websocket.disconnect', ws_disconnect) -] - -channel_routing = [ - include(ws_routing, path=r"^/ws/connect"), -] -import json -import os -import sys -import imp -import yaml -import random -import string -from datetime import datetime -from channels import Channel -from utils.jsonToPrototxt import json_to_prototxt -from celery.decorators import task -from keras.models import Model -from keras_app.views.layers_export import data, convolution, deconvolution, pooling, dense, dropout, embed,\ - recurrent, batch_norm, activation, flatten, reshape, eltwise, concat, upsample, locally_connected,\ - permute, repeat_vector, regularization, masking, gaussian_noise, gaussian_dropout, alpha_dropout, \ - bidirectional, time_distributed, lrn, depthwiseConv -from keras_app.custom_layers import config as custom_layers_config -from keras.models import model_from_json -import tensorflow as tf -from keras import backend as K - -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - - -def randomword(length): - return ''.join(random.choice(string.lowercase) for i in range(length)) - - -@task(name="export_to_caffe", bind=True) -def export_caffe_prototxt(self, net, net_name, reply_channel): - net = yaml.safe_load(net) - if net_name == '': - net_name = 'Net' - try: - prototxt, input_dim = json_to_prototxt(net, net_name) - randomId = datetime.now().strftime('%Y%m%d%H%M%S')+randomword(5) - - with open(BASE_DIR + '/media/' + randomId + '.prototxt', 'w+') as f: - f.write(prototxt) - - Channel(reply_channel).send({ - 'text': json.dumps({ - 'result': 'success', - 'action': 'ExportNet', - 'name': randomId + '.prototxt', - 'url': '/media/' + randomId + '.prototxt' - }) - }) - except: - Channel(reply_channel).send({ - 'text': json.dumps({ - 'result': 'error', - 'action': 'ExportNet', - 'error': str(sys.exc_info()[1]) - }) - }) - - -@task(name="export_to_keras") -def export_keras_json(net, net_name, is_tf, reply_channel): - net = yaml.safe_load(net) - if net_name == '': - net_name = 'Net' - - layer_map = { - 'ImageData': data, - 'Data': data, - 'Input': data, - 'WindowData': data, - 'MemoryData': data, - 'DummyData': data, - 'InnerProduct': dense, - 'Softmax': activation, - 'SELU': activation, - 'Softplus': activation, - 'Softsign': activation, - 'ReLU': activation, - 'TanH': activation, - 'Sigmoid': activation, - 'HardSigmoid': activation, - 'Linear': activation, - 'Dropout': dropout, - 'Flatten': flatten, - 'Reshape': reshape, - 'Permute': permute, - 'RepeatVector': repeat_vector, - 'Regularization': regularization, - 'Masking': masking, - 'Convolution': convolution, - 'Deconvolution': deconvolution, - 'DepthwiseConv': depthwiseConv, - 'Upsample': upsample, - 'Pooling': pooling, - 'LocallyConnected': locally_connected, - 'RNN': recurrent, - 'GRU': recurrent, - 'LSTM': recurrent, - 'Embed': embed, - 'Concat': concat, - 'Eltwise': eltwise, - 'PReLU': activation, - 'ELU': activation, - 'ThresholdedReLU': activation, - 'BatchNorm': batch_norm, - 'GaussianNoise': gaussian_noise, - 'GaussianDropout': gaussian_dropout, - 'AlphaDropout': alpha_dropout, - 'Scale': '', - 'TimeDistributed': time_distributed, - 'Bidirectional': bidirectional - } - - custom_layers_map = { - 'LRN': lrn - } - - # Remove any duplicate activation layers (timedistributed and bidirectional layers) - redundant_layers = [] - for layerId in net: - if (net[layerId]['connection']['input'] - and net[net[layerId]['connection']['input'][0]]['info']['type'] in - ['TimeDistributed', 'Bidirectional']): - if len(net[layerId]['connection']['output']) > 0: - target = net[layerId]['connection']['output'][0] - outputs = net[target]['connection']['output'] - if len(outputs) > 0: - net[layerId]['connection']['output'] = outputs - for j in outputs: - net[j]['connection']['input'] = [ - x if (x != target) else layerId for x in net[j]['connection']['input']] - redundant_layers.append(target) - elif (net[layerId]['info']['type'] == 'Input' - and net[net[layerId]['connection']['output'][0]]['info']['type'] in - ['TimeDistributed', 'Bidirectional']): - connected_layer = net[layerId]['connection']['output'][0] - net[connected_layer]['params']['batch_input_shape'] = net[layerId]['params']['dim'] - for i in redundant_layers: - del net[i] - - # Check if conversion is possible - # Note : Error handling can be improved further - error = [] - custom_layers = [] - for key, value in custom_layers_map.iteritems(): - layer_map[key] = value - for layerId in net: - layerType = net[layerId]['info']['type'] - if (layerType in custom_layers_map): - custom_layers.append(layerType) - if ('Loss' in layerType or layerType == - 'Accuracy' or layerType in layer_map): - pass - else: - error.append(layerId + '(' + layerType + ')') - break - if len(error): - Channel(reply_channel).send({ - 'text': json.dumps({ - 'result': 'error', - 'action': 'ExportNet', - 'error': 'Cannot convert ' + ', '.join(error) + ' to Keras' - }) - }) - return - - stack = [] - net_out = {} - dataLayers = ['ImageData', 'Data', 'HDF5Data', 'Input', 'WindowData', - 'MemoryData', 'DummyData', 'Bidirectional', - 'TimeDistributed'] - processedLayer = {} - inputLayerId = [] - outputLayerId = [] - - def isProcessPossible(layerId): - inputs = net[layerId]['connection']['input'] - for layerId in inputs: - if processedLayer[layerId] is False: - return False - return True - - # Finding the data layer - for layerId in net: - processedLayer[layerId] = False - if (net[layerId]['info']['type'] == 'Python'): - error.append(layerId + '(Python)') - continue - if(net[layerId]['info']['type'] in dataLayers): - stack.append(layerId) - if (not net[layerId]['connection']['input']): - inputLayerId.append(layerId) - if (not net[layerId]['connection']['output']): - outputLayerId.append(layerId) - if len(error): - Channel(reply_channel).send({ - 'text': json.dumps({ - 'result': 'error', - 'action': 'ExportNet', - 'error': 'Cannot convert ' + ', '.join(error) + ' to Keras' - }) - }) - return - - while(len(stack)): - if ('Loss' in net[layerId]['info']['type'] or - net[layerId]['info']['type'] == 'Accuracy'): - pass - elif (net[layerId]['info']['type'] in layer_map): - i = len(stack) - 1 - while isProcessPossible(stack[i]) is False: - i = i - 1 - layerId = stack[i] - stack.remove(layerId) - if (net[layerId]['info']['type'] != 'Scale'): - layer_in = [net_out[inputId] - for inputId in net[layerId]['connection']['input']] - # Need to check if next layer is Scale - if (net[layerId]['info']['type'] == 'BatchNorm'): - idNext = net[layerId]['connection']['output'][0] - nextLayer = net[idNext] - # If the BN layer is followed by Scale, then we need to pass both layers - # as in Keras parameters from both go into one single layer - net_out.update(layer_map[net[layerId]['info']['type']]( - net[layerId], layer_in, layerId, idNext, nextLayer)) - elif (net[layerId]['info']['type'] == 'Scale'): - type = net[net[layerId]['connection'] - ['input'][0]]['info']['type'] - if (type != 'BatchNorm'): - Channel(reply_channel).send({ - 'text': json.dumps({ - 'result': 'error', - 'action': 'ExportNet', - 'error': 'Cannot convert ' + - net[layerId]['info']['type'] + - ' to Keras' - }) - }) - - elif (net[layerId]['info']['type'] in ['TimeDistributed', 'Bidirectional']): - idNext = net[layerId]['connection']['output'][0] - net_out.update( - layer_map[net[layerId]['info']['type']](layerId, idNext, net, layer_in, layer_map)) - if len(net[idNext]['connection']['output']) > 0: - net[net[idNext]['connection']['output'][0] - ]['connection']['input'] = [layerId] - processedLayer[idNext] = True - processedLayer[layerId] = True - else: - if (net[layerId]['info']['type'] in layer_map): - net_out.update(layer_map[net[layerId]['info']['type']]( - net[layerId], layer_in, layerId)) - else: - error.append( - layerId + '(' + net[layerId]['info']['type'] + ')') - break - for outputId in net[layerId]['connection']['output']: - if outputId not in stack: - stack.append(outputId) - processedLayer[layerId] = True - else: - error.append( - layerId + '(' + net[layerId]['info']['type'] + ')') - break - - if len(error) > 0: - Channel(reply_channel).send({ - 'text': json.dumps({ - 'result': 'error', - 'action': 'ExportNet', - 'error': 'Cannot convert ' + ', '.join(error) + ' to Keras' - }) - }) - return - - final_input = [] - final_output = [] - for i in inputLayerId: - final_input.append(net_out[i]) - - for j in outputLayerId: - if (net[net[j]['connection']['input'][0]]['info']['type'] in - ['TimeDistributed', 'Bidirectional']): - final_output.append(net_out[net[j]['connection']['input'][0]]) - else: - final_output.append(net_out[j]) - - model = Model(inputs=final_input, outputs=final_output, name=net_name) - json_string = Model.to_json(model) - - randomId = datetime.now().strftime('%Y%m%d%H%M%S') + randomword(5) - with open(BASE_DIR + '/media/' + randomId + '.json', 'w') as f: - json.dump(json.loads(json_string), f, indent=4) - - custom_layers_response = [] - for layer in set(custom_layers): - layer_data = {'name': layer} - layer_data.update(custom_layers_config.config[layer]) - custom_layers_response.append(layer_data) - - if(is_tf): - # export part for tensorflow from keras model - input_file = randomId + '.json' - output_file = randomId - - K.set_learning_phase(0) - - output_fld = BASE_DIR + '/media/' - - with open(output_fld + input_file, 'r') as f: - json_str = f.read() - - json_str = json_str.strip("'<>() ").replace('\'', '\"') - lrnLayer = imp.load_source( - 'LRN', BASE_DIR + '/keras_app/custom_layers/lrn.py') - - model = model_from_json(json_str, {'LRN': lrnLayer.LRN}) - - sess = K.get_session() - tf.train.write_graph(sess.graph.as_graph_def(add_shapes=True), output_fld, - output_file + '.pbtxt', as_text=True) - - Channel(reply_channel).send({ - 'text': json.dumps({ - 'result': 'success', - 'action': 'ExportNet', - 'id': 'randomId', - 'name': randomId + '.pbtxt', - 'url': '/media/' + randomId + '.pbtxt', - 'customLayers': custom_layers_response - }) - }) - else: - Channel(reply_channel).send({ - 'text': json.dumps({ - 'result': 'success', - 'action': 'ExportNet', - 'id': 'randomId', - 'name': randomId + '.json', - 'url': '/media/' + randomId + '.json', - 'customLayers': custom_layers_response - }) - }) -from django.conf.urls import url, include -from django.contrib import admin -from django.conf.urls.static import static -from django.conf import settings -from views import index, calculate_parameter, fetch_layer_shape -from views import load_from_db, save_to_db, fetch_model_history - -urlpatterns = [ - url(r'^$', index), - url(r'^admin/', admin.site.urls), - url(r'^accounts/', include('allauth.urls')), - url(r'^backendAPI/', include('backendAPI.urls')), - url(r'^caffe/', include('caffe_app.urls')), - url(r'^keras/', include('keras_app.urls')), - url(r'^tensorflow/', include('tensorflow_app.urls')), - url(r'^save$', save_to_db, name='saveDB'), - url(r'^load*', load_from_db, name='loadDB'), - url(r'^model_history', fetch_model_history, name='model-history'), - url(r'^model_parameter/', calculate_parameter, name='calculate-parameter'), - url(r'^layer_parameter/', fetch_layer_shape, name='fetch-layer-shape') -] + static(settings.MEDIA_URL, document_root=settings.MEDIA_ROOT) + \ - static(settings.STATIC_URL, document_root=settings.STATIC_ROOT) -import copy -import sys -import yaml -import json - -from caffe_app.models import Network, NetworkVersion, NetworkUpdates -from django.shortcuts import render -from django.http import JsonResponse -from django.views.decorators.csrf import csrf_exempt -from django.contrib.auth.models import User -from utils.shapes import get_shapes, get_layer_shape, handle_concat_layer - - -def index(request): - return render(request, 'index.html') - - -@csrf_exempt -def fetch_layer_shape(request): - if request.method == 'POST': - net = yaml.safe_load(request.POST.get('net')) - layerId = request.POST.get('layerId') - try: - net[layerId]['shape'] = {} - net[layerId]['shape']['input'] = None - net[layerId]['shape']['output'] = None - dataLayers = ['ImageData', 'Data', 'HDF5Data', - 'Input', 'WindowData', 'MemoryData', 'DummyData'] - - # Obtain input shape of new layer - if (net[layerId]['info']['type'] == "Concat"): - for parentLayerId in net[layerId]['connection']['input']: - # Check if parent layer have shapes - if (net[parentLayerId]['shape']['output']): - net[layerId]['shape']['input'] = handle_concat_layer( - net[layerId], net[parentLayerId]) - elif (not (net[layerId]['info']['type'] in dataLayers)): - if (len(net[layerId]['connection']['input']) > 0): - parentLayerId = net[layerId]['connection']['input'][0] - # Check if parent layer have shapes - if (net[parentLayerId]['shape']['output']): - net[layerId]['shape']['input'] = net[parentLayerId]['shape']['output'][:] - - # Obtain output shape of new layer - if (net[layerId]['info']['type'] in dataLayers): - # handling Data Layers separately - if ('dim' in net[layerId]['params'] and len(net[layerId]['params']['dim'])): - # layers with empty dim parameter can't be passed - net[layerId]['shape']['input'], net[layerId]['shape']['output'] =\ - get_layer_shape(net[layerId]) - elif ('dim' not in net[layerId]['params']): - # shape calculation for layers with no dim param - net[layerId]['shape']['input'], net[layerId]['shape']['output'] =\ - get_layer_shape(net[layerId]) - else: - if (net[layerId]['shape']['input']): - net[layerId]['shape']['output'] = get_layer_shape( - net[layerId]) - except BaseException: - return JsonResponse({ - 'result': 'error', 'error': str(sys.exc_info()[1])}) - return JsonResponse({'result': 'success', 'net': net}) - - -@csrf_exempt -def calculate_parameter(request): - if request.method == 'POST': - net = yaml.safe_load(request.POST.get('net')) - try: - # While calling get_shapes we need to remove the flag - # added in frontend to show the parameter on pane - netObj = copy.deepcopy(net) - for layerId in netObj: - for param in netObj[layerId]['params']: - netObj[layerId]['params'][param] = netObj[layerId]['params'][param][0] - # use get_shapes method to obtain shapes of each layer - netObj = get_shapes(netObj) - for layerId in net: - net[layerId]['shape'] = {} - net[layerId]['shape']['input'] = netObj[layerId]['shape']['input'] - net[layerId]['shape']['output'] = netObj[layerId]['shape']['output'] - except BaseException: - return JsonResponse({ - 'result': 'error', 'error': str(sys.exc_info()[1])}) - return JsonResponse({'result': 'success', 'net': net}) - - -@csrf_exempt -def save_to_db(request): - if request.method == 'POST': - net = request.POST.get('net') - net_name = request.POST.get('net_name') - user_id = request.POST.get('user_id') - next_layer_id = request.POST.get('nextLayerId') - public_sharing = True - user = None - if net_name == '': - net_name = 'Net' - try: - # making model sharing public by default for now - # TODO: Prvilege on Sharing - if user_id: - user_id = int(user_id) - user = User.objects.get(id=user_id) - - # create a new model on share event - model = Network( - name=net_name, public_sharing=public_sharing, author=user) - model.save() - # create first version of model - model_version = NetworkVersion(network=model, network_def=net) - model_version.save() - # create initial update for nextLayerId - model_update = NetworkUpdates(network_version=model_version, - updated_data=json.dumps( - {'nextLayerId': next_layer_id}), - tag='ModelShared') - model_update.save() - - return JsonResponse({'result': 'success', 'id': model.id}) - except: - return JsonResponse({'result': 'error', 'error': str(sys.exc_info()[1])}) - - -def create_network_version(network_def, updates_batch): - network_def = yaml.safe_load(network_def) - next_layer_id = 0 - - for network_update in updates_batch: - updated_data = json.loads(network_update.updated_data) - tag = network_update.tag - - if 'nextLayerId' in updated_data: - next_layer_id = updated_data['nextLayerId'] - - if tag == 'UpdateParam': - # Update Param UI event handling - param = updated_data['param'] - layer_id = updated_data['layerId'] - value = updated_data['value'] - - if updated_data['isProp']: - network_def[layer_id]['props'][param] = value - else: - network_def[layer_id]['params'][param][0] = value - - elif tag == 'DeleteLayer': - # Delete layer UI event handling - layer_id = updated_data['layerId'] - input_layer_ids = network_def[layer_id]['connection']['input'] - output_layer_ids = network_def[layer_id]['connection']['output'] - - for input_layer_id in input_layer_ids: - network_def[input_layer_id]['connection']['output'].remove( - layer_id) - - for output_layer_id in output_layer_ids: - network_def[output_layer_id]['connection']['input'].remove( - layer_id) - - del network_def[layer_id] - - elif tag == 'AddLayer': - # Add layer UI event handling - prev_layer_id = updated_data['prevLayerId'] - new_layer_id = updated_data['layerId'] - - if isinstance(prev_layer_id, list): - for layer_id in prev_layer_id: - network_def[layer_id]['connection']['output'].append( - new_layer_id) - else: - network_def[prev_layer_id]['connection']['output'].append( - new_layer_id) - network_def[new_layer_id] = updated_data['layer'] - - elif tag == 'AddComment': - layer_id = updated_data['layerId'] - comment = updated_data['comment'] - - if ('comments' not in network_def[layer_id]): - network_def[layer_id]['comments'] = [] - network_def[layer_id]['comments'].append(comment) - - return { - 'network': network_def, - 'next_layer_id': next_layer_id - } - - -def get_network_version(netObj): - network_version = NetworkVersion.objects.filter( - network=netObj).order_by('-created_on')[0] - updates_batch = NetworkUpdates.objects.filter( - network_version=network_version).order_by('created_on') - - return create_network_version(network_version.network_def, updates_batch) - - -def get_checkpoint_version(netObj, checkpoint_id): - network_update = NetworkUpdates.objects.get(id=checkpoint_id) - network_version = network_update.network_version - - updates_batch = NetworkUpdates.objects.filter(network_version=network_version)\ - .filter(created_on__lte=network_update.created_on)\ - .order_by('created_on') - return create_network_version(network_version.network_def, updates_batch) - - -@csrf_exempt -def load_from_db(request): - if request.method == 'POST': - if 'proto_id' in request.POST: - try: - model = Network.objects.get(id=int(request.POST['proto_id'])) - version_id = None - data = {} - - if 'version_id' in request.POST and request.POST['version_id'] != '': - # added for loading any previous version of model - version_id = int(request.POST['version_id']) - data = get_checkpoint_version(model, version_id) - else: - # fetch the required version of model - data = get_network_version(model) - - net = data['network'] - next_layer_id = data['next_layer_id'] - - # authorizing the user for access to model - if not model.public_sharing: - return JsonResponse({'result': 'error', - 'error': 'Permission denied for access to model'}) - except Exception: - return JsonResponse({'result': 'error', - 'error': 'No network file found'}) - return JsonResponse({'result': 'success', 'net': net, 'net_name': model.name, - 'next_layer_id': next_layer_id}) - - if request.method == 'GET': - return index(request) - - -@csrf_exempt -def fetch_model_history(request): - if request.method == 'POST': - try: - network_id = int(request.POST['net_id']) - network = Network.objects.get(id=network_id) - network_versions = NetworkVersion.objects.filter( - network=network).order_by('created_on') - - modelHistory = {} - for version in network_versions: - network_updates = NetworkUpdates.objects.filter(network_version=version)\ - .order_by('created_on') - for update in network_updates: - modelHistory[update.id] = update.tag - - return JsonResponse({ - 'result': 'success', - 'data': modelHistory - }) - except Exception: - return JsonResponse({ - 'result': 'error', - 'error': 'Unable to load model history' - }) -""" -WSGI config for ide project. - -It exposes the WSGI callable as a module-level variable named ``application``. - -For more information on this file, see -https://docs.djangoproject.com/en/1.9/howto/deployment/wsgi/ -""" - -import os - -from django.core.wsgi import get_wsgi_application - -os.environ.setdefault("DJANGO_SETTINGS_MODULE", "settings") - -application = get_wsgi_application() -# Register your models here. -from __future__ import unicode_literals - -from django.apps import AppConfig - -import startup - - -class KerasAppConfig(AppConfig): - name = 'keras_app' - - -startup.run() -from __future__ import unicode_literals - -# Create your models here. -import os -from custom_layers import config - - -BASE_DIR = os.path.dirname( - os.path.dirname( - os.path.abspath(__file__))) - - -def run(): - for key, layer in config.config.iteritems(): - os.system('cp ' + BASE_DIR + '/keras_app/custom_layers/' + layer['filename'] + ' ' - + BASE_DIR + '/media') -# Create your tests here. -from django.conf.urls import url -from views.import_json import import_json -from views.export_json import export_json - -urlpatterns = [ - url(r'^import$', import_json, name='keras-import'), - url(r'^export$', export_json, name='keras-export') -] -# -*- coding: utf-8 -*- -from __future__ import absolute_import - -# TODO: Add support for production environment settings - -import sys - -TEST = [arg for arg in sys.argv if 'test' in arg] -if TEST: - print("Using Test settings") - from .test import * # noqa -else: - try: - from .dev import * # noqa - print("Using Dev settings") - except ImportError: - pass -import os - -# Build paths inside the project like this: os.path.join(BASE_DIR, ...) -BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) - -STATICFILES_DIRS = ( - os.path.join(BASE_DIR, 'ide/static'), -) - -# SECURITY WARNING: keep the secret key used in production secret! -SECRET_KEY = 's9&vp1jq1yzr!1c_temg#v_)j-a)i5+@vbsekmi6pbjl4l1&u@' - -# SECURITY WARNING: don't run with debug turned on in production! -DEBUG = True - -ALLOWED_HOSTS = ['*'] - - -# Application definition - -INSTALLED_APPS = [ - 'channels', - 'caffe_app.apps.CaffeAppConfig', - 'keras_app.apps.KerasAppConfig', - 'tensorflow_app.apps.TensorflowAppConfig', - 'backendAPI.apps.BackendapiConfig', - 'django.contrib.admin', - 'django.contrib.auth', - 'django.contrib.contenttypes', - 'django.contrib.sessions', - 'django.contrib.messages', - 'django.contrib.staticfiles', - 'django.contrib.sites', - 'allauth', - 'allauth.account', - 'allauth.socialaccount', - 'allauth.socialaccount.providers.github', - 'allauth.socialaccount.providers.google' -] - -MIDDLEWARE_CLASSES = [ - 'django.middleware.security.SecurityMiddleware', - 'django.contrib.sessions.middleware.SessionMiddleware', - 'django.middleware.common.CommonMiddleware', - 'django.middleware.csrf.CsrfViewMiddleware', - 'django.contrib.auth.middleware.AuthenticationMiddleware', - 'django.contrib.auth.middleware.SessionAuthenticationMiddleware', - 'django.contrib.messages.middleware.MessageMiddleware', - 'django.middleware.clickjacking.XFrameOptionsMiddleware', -] - -ROOT_URLCONF = 'ide.urls' - -TEMPLATES = [ - { - 'BACKEND': 'django.template.backends.django.DjangoTemplates', - 'DIRS': [os.path.join(BASE_DIR, 'ide/templates')], - 'APP_DIRS': True, - 'OPTIONS': { - 'context_processors': [ - 'django.template.context_processors.debug', - 'django.template.context_processors.request', - 'django.contrib.auth.context_processors.auth', - 'django.contrib.messages.context_processors.messages', - ], - }, - }, -] - -WSGI_APPLICATION = 'ide.wsgi.application' - -SITE_ID = 1 - -# Internationalization -# https://docs.djangoproject.com/en/1.9/topics/i18n/ - -LANGUAGE_CODE = 'en-us' - -TIME_ZONE = 'UTC' - -USE_I18N = True - -USE_L10N = True - -USE_TZ = True - - -# Static files (CSS, JavaScript, Images) -# https://docs.djangoproject.com/en/1.9/howto/static-files/ - -STATIC_URL = '/static/' -STATIC_ROOT = os.path.join(BASE_DIR, 'static') - -MEDIA_ROOT = os.path.join(BASE_DIR, 'media') -MEDIA_URL = '/media/' - - -AUTHENTICATION_BACKENDS = ( - "django.contrib.auth.backends.ModelBackend", - "allauth.account.auth_backends.AuthenticationBackend", -) - -LOGIN_REDIRECT_URL = '/' -ACCOUNT_LOGOUT_ON_GET = True - - -EMAIL_BACKEND = 'django.core.mail.backends.console.EmailBackend' - - -CHANNEL_LAYERS = { - "default": { - "BACKEND": "asgi_redis.RedisChannelLayer", - "CONFIG": { - # replace redis hostname to localhost if running on local system - "hosts": [("redis", 6379)], - "prefix": u'fabrik:', - }, - "ROUTING": "ide.routing.channel_routing", - }, -} - -CELERY_RESULT_BACKEND = 'redis://redis:6379/0' -from .common import * # noqa: ignore=F405 -import os - -# Database -# https://docs.djangoproject.com/en/1.9/ref/settings/#databases - -DEBUG = True - -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.postgresql_psycopg2', - 'NAME': os.environ.get("POSTGRES_NAME", 'postgres'), - 'USER': os.environ.get("POSTGRES_USER", 'postgres'), - 'PASSWORD': os.environ.get("POSTGRES_PASSWORD", 'postgres'), - 'HOST': os.environ.get("POSTGRES_HOST", 'db'), - 'PORT': os.environ.get("POSTGRES_PORT", 5432), - } -} -from .common import * # noqa: ignore=F405 - -# Database -# https://docs.djangoproject.com/en/1.9/ref/settings/#databases - -DEBUG = False - -DATABASES = { - 'default': { - 'ENGINE': 'django.db.backends.postgresql_psycopg2', - 'NAME': 'fabrik', - 'USER': 'admin', - 'PASSWORD': 'fabrik', - 'HOST': 'localhost', - 'PORT': 5432, - } -} - -TEST = True -# Register your models here. -from __future__ import unicode_literals - -from django.apps import AppConfig - - -class TensorflowAppConfig(AppConfig): - name = 'tensorflow_app' -from __future__ import unicode_literals - -# Create your models here. -# Create your tests here. -from django.conf.urls import url -from views.import_graphdef import import_graph_def -from views.export_graphdef import export_to_tensorflow - -urlpatterns = [ - url(r'^export$', export_to_tensorflow, name='tf-export'), - url(r'^import$', import_graph_def, name='tf-import'), -] -from django.shortcuts import render - - -def index(request): - return render(request, 'index.html') -from django.views.decorators.csrf import csrf_exempt -from django.http import JsonResponse -import yaml -from datetime import datetime -import random -import string -import sys -import os -from ide.utils.jsonToPrototxt import json_to_prototxt -BASE_DIR = os.path.dirname(os.path.dirname( - os.path.dirname(os.path.abspath(__file__)))) - - -def randomword(length): - return ''.join(random.choice(string.lowercase) for i in range(length)) - - -@csrf_exempt -def export_to_caffe(request): - # Note : Remove the views for export by adding unittest for celery tasks - if request.method == 'POST': - net = yaml.safe_load(request.POST.get('net')) - net_name = request.POST.get('net_name') - if net_name == '': - net_name = 'Net' - try: - prototxt, input_dim = json_to_prototxt(net, net_name) - randomId = datetime.now().strftime('%Y%m%d%H%M%S')+randomword(5) - with open(BASE_DIR+'/media/'+randomId+'.prototxt', 'w') as f: - f.write(prototxt) - return JsonResponse({'result': 'success', 'id': randomId, - 'name': randomId+'.prototxt', 'url': '/media/'+randomId+'.prototxt'}) - except: - return JsonResponse({'result': 'error', 'error': str(sys.exc_info()[1])}) -from django.views.decorators.csrf import csrf_exempt -from django.http import JsonResponse -from django.conf import settings -import os -from caffe.proto import caffe_pb2 -from google.protobuf import text_format -import tempfile -import subprocess -import urllib2 -from urlparse import urlparse - -# ******Data Layers****** - - -def ImageData(layer): - params = {} - params['source'] = layer.image_data_param.source - params['batch_size'] = layer.image_data_param.batch_size - params['rand_skip'] = layer.image_data_param.rand_skip - params['shuffle'] = layer.image_data_param.shuffle - params['new_height'] = layer.image_data_param.new_height - params['new_width'] = layer.image_data_param.new_width - params['is_color'] = layer.image_data_param.is_color - params['root_folder'] = layer.image_data_param.root_folder - return params - - -def Data(layer): - params = {} - params['source'] = layer.data_param.source - params['batch_size'] = layer.data_param.batch_size - params['backend'] = layer.data_param.backend - if (params['backend'] == 0): - params['backend'] = 'LEVELDB' - else: - params['backend'] = 'LMDB' - params['rand_skip'] = layer.data_param.rand_skip - params['prefetch'] = layer.data_param.prefetch - return params - - -def HDF5Data(layer): - params = {} - params['source'] = layer.hdf5_data_param.source - params['batch_size'] = layer.hdf5_data_param.batch_size - params['shuffle'] = layer.hdf5_data_param.shuffle - return params - - -def HDF5Output(layer): - params = {} - params['file_name'] = layer.hdf5_output_param.file_name - return params - - -def Input(layer): - params = {} - params['dim'] = str(map(int, layer.input_param.shape[0].dim))[1:-1] - return params - - -def WindowData(layer): - params = {} - params['source'] = layer.window_data_param.source - params['batch_size'] = layer.window_data_param.batch_size - params['fg_threshold'] = layer.window_data_param.fg_threshold - params['bg_threshold'] = layer.window_data_param.bg_threshold - params['fg_fraction'] = layer.window_data_param.fg_fraction - params['context_pad'] = layer.window_data_param.context_pad - params['crop_mode'] = layer.window_data_param.crop_mode - params['cache_images'] = layer.window_data_param.cache_images - params['root_folder'] = layer.window_data_param.root_folder - return params - - -def MemoryData(layer): - params = {} - params['batch_size'] = layer.memory_data_param.batch_size - params['channels'] = layer.memory_data_param.channels - params['height'] = layer.memory_data_param.height - params['width'] = layer.memory_data_param.width - return params - - -def DummyData(layer): - params = {} - params['dim'] = str(map(int, layer.dummy_data_param.shape[0].dim))[1:-1] - params['type'] = str(layer.dummy_data_param.data_filler[0].type) - return params - - -# ********** Vision Layers ********** -def Convolution(layer): - params = {} - if len(layer.convolution_param.kernel_size): - params['kernel_h'] = layer.convolution_param.kernel_size[0] - params['kernel_w'] = layer.convolution_param.kernel_size[0] - if layer.convolution_param.kernel_w: - params['kernel_w'] = layer.convolution_param.kernel_w - if layer.convolution_param.kernel_h: - params['kernel_h'] = layer.convolution_param.kernel_h - if len(layer.convolution_param.pad): - params['pad_h'] = layer.convolution_param.pad[0] - params['pad_w'] = layer.convolution_param.pad[0] - if layer.convolution_param.pad_w: - params['pad_w'] = layer.convolution_param.pad_w - if layer.convolution_param.pad_h: - params['pad_h'] = layer.convolution_param.pad_h - if len(layer.convolution_param.stride): - params['stride_h'] = layer.convolution_param.stride_h \ - or layer.convolution_param.stride[0] - params['stride_w'] = layer.convolution_param.stride_w \ - or layer.convolution_param.stride[0] - if len(layer.convolution_param.dilation): - params['dilation_h'] = layer.convolution_param.dilation[0] - params['dilation_w'] = layer.convolution_param.dilation[0] - params['weight_filler'] = layer.convolution_param.weight_filler.type - params['bias_filler'] = layer.convolution_param.bias_filler.type - params['num_output'] = layer.convolution_param.num_output - params['use_bias'] = layer.convolution_param.bias_term - params['layer_type'] = '2D' - return params - - -def Pooling(layer): - params = {} - params['pad_h'] = layer.pooling_param.pad_h or layer.pooling_param.pad - params['pad_w'] = layer.pooling_param.pad_w or layer.pooling_param.pad - params['stride_h'] = layer.pooling_param.stride_h or layer.pooling_param.stride - params['stride_w'] = layer.pooling_param.stride_w or layer.pooling_param.stride - params['kernel_h'] = layer.pooling_param.kernel_h or layer.pooling_param.kernel_size - params['kernel_w'] = layer.pooling_param.kernel_w or layer.pooling_param.kernel_size - params['pool'] = layer.pooling_param.pool - if (params['pool'] == 0): - params['pool'] = 'MAX' - elif (params['pool'] == 1): - params['pool'] = 'AVE' - else: - params['pool'] = 'STOCHASTIC' - params['layer_type'] = '2D' - return params - - -def SPP(layer): - params = {} - params['pool'] = layer.spp_param.pool - params['pyramid_height'] = layer.spp_param.pyramid_height - return params - - -def Crop(layer): - params = {} - if layer.crop_param.axis: - params['axis'] = layer.crop_param.axis - if len(layer.crop_param.offset): - params['offset'] = layer.crop_param.offset[0] - return params - - -def Deconvolution(layer): - params = {} - if len(layer.convolution_param.kernel_size): - params['kernel_h'] = layer.convolution_param.kernel_size[0] - params['kernel_w'] = layer.convolution_param.kernel_size[0] - if layer.convolution_param.kernel_w: - params['kernel_w'] = layer.convolution_param.kernel_w - if layer.convolution_param.kernel_h: - params['kernel_h'] = layer.convolution_param.kernel_h - if len(layer.convolution_param.pad): - params['pad_h'] = layer.convolution_param.pad[0] - params['pad_w'] = layer.convolution_param.pad[0] - if layer.convolution_param.pad_w: - params['pad_w'] = layer.convolution_param.pad_w - if layer.convolution_param.pad_h: - params['pad_h'] = layer.convolution_param.pad_h - if len(layer.convolution_param.stride): - params['stride_h'] = layer.convolution_param.stride_h \ - or layer.convolution_param.stride[0] - params['stride_w'] = layer.convolution_param.stride_w \ - or layer.convolution_param.stride[0] - if len(layer.convolution_param.dilation): - params['dilation_h'] = layer.convolution_param.dilation[0] - params['dilation_w'] = layer.convolution_param.dilation[0] - params['weight_filler'] = layer.convolution_param.weight_filler.type - params['bias_filler'] = layer.convolution_param.bias_filler.type - params['num_output'] = layer.convolution_param.num_output - params['use_bias'] = layer.convolution_param.bias_term - return params - - -# ********** Recurrent Layers ********** -def Recurrent(layer): - params = {} - params['num_output'] = layer.recurrent_param.num_output - params['weight_filler'] = layer.recurrent_param.weight_filler.type - params['bias_filler'] = layer.recurrent_param.bias_filler.type - params['debug_info'] = layer.recurrent_param.debug_info - params['expose_hidden'] = layer.recurrent_param.expose_hidden - return params - - -# ********** Common Layers ********** -def InnerProduct(layer): - params = {} - params['num_output'] = layer.inner_product_param.num_output - params['weight_filler'] = layer.inner_product_param.weight_filler.type - params['bias_filler'] = layer.inner_product_param.bias_filler.type - params['use_bias'] = layer.inner_product_param.bias_term - return params - - -def Dropout(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - return params - - -def Embed(layer): - params = {} - params['bias_term'] = layer.embed_param.bias_term - params['input_dim'] = layer.embed_param.input_dim - params['num_output'] = layer.embed_param.num_output - params['weight_filler'] = layer.embed_param.weight_filler.type - params['bias_filler'] = layer.embed_param.bias_filler.type - return params - - -# ********** Normalisation Layers ********** -def LRN(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - params['local_size'] = layer.lrn_param.local_size - params['alpha'] = layer.lrn_param.alpha - params['beta'] = layer.lrn_param.beta - params['k'] = layer.lrn_param.k - if layer.lrn_param.norm_region: - params['norm_region'] = layer.lrn_param.norm_region - else: - params['norm_region'] = 'ACROSS_CHANNELS' - return params - - -def MVN(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - params['normalize_variance'] = layer.mvn_param.normalize_variance - params['across_channels'] = layer.mvn_param.across_channels - params['eps'] = layer.mvn_param.eps - return params - - -def BatchNorm(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - params['use_global_stats'] = layer.batch_norm_param.use_global_stats - params['moving_average_fraction'] = layer.batch_norm_param.moving_average_fraction - params['eps'] = layer.batch_norm_param.eps - return params - - -# ********** Activation/Neuron Layers ********** -def ReLU(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - params['negative_slope'] = layer.relu_param.negative_slope - return params - - -def PReLU(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - params['channel_shared'] = layer.prelu_param.channel_shared - return params - - -def ELU(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - params['alpha'] = layer.elu_param.alpha - return params - - -def Sigmoid(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - return params - - -def TanH(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - return params - - -def AbsVal(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - return params - - -def Power(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - params['power'] = layer.power_param.power - params['scale'] = layer.power_param.scale - params['shift'] = layer.power_param.shift - return params - - -def Exp(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - params['base'] = layer.exp_param.base - params['scale'] = layer.exp_param.scale - params['shift'] = layer.exp_param.shift - return params - - -def Log(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - params['base'] = layer.log_param.base - params['scale'] = layer.log_param.scale - params['shift'] = layer.log_param.shift - return params - - -def BNLL(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - return params - - -def Threshold(layer): - params = {} - if(layer.top == layer.bottom): - params['inplace'] = True - params['threshold'] = layer.threshold_param.threshold - return params - - -def Bias(layer): - params = {} - params['axis'] = layer.bias_param.axis - params['num_axes'] = layer.bias_param.num_axes - params['filler'] = layer.bias_param.filler.type - return params - - -def Scale(layer): - params = {} - params['scale'] = True - params['axis'] = layer.scale_param.axis - params['num_axes'] = layer.scale_param.num_axes - params['filler'] = layer.scale_param.filler.type - params['bias_term'] = layer.scale_param.bias_term - params['bias_filler'] = layer.scale_param.bias_filler.type - return params - - -# ********** Utility Layers ********** -def Flatten(layer): - params = {} - params['axis'] = layer.flatten_param.axis - params['end_axis'] = layer.flatten_param.end_axis - return params - - -def Reshape(layer): - params = {} - params['dim'] = str(map(int, layer.reshape_param.shape.dim))[1:-1] - return params - - -def Slice(layer): - params = {} - params['slice_point'] = str(map(int, layer.slice_param.slice_point))[1:-1] - params['axis'] = layer.slice_param.axis - params['slice_dim'] = layer.slice_param.slice_dim - return params - - -def Eltwise(layer): - params = {} - opMap = { - 0: 'Product', - 1: 'Sum', - 2: 'Maximum' - } - if layer.eltwise_param.operation: - params['layer_type'] = opMap[layer.eltwise_param.operation] - else: - params['layer_type'] = 'Sum' - return params - - -def Reduction(layer): - params = {} - if layer.reduction_param.operation: - params['operation'] = layer.reduction_param.operation - if (params['operation'] == 1): - params['operation'] = 'SUM' - elif (params['operation'] == 2): - params['operation'] = 'ASUM' - elif (params['operation'] == 3): - params['operation'] = 'SUMSQ' - else: - params['operation'] = 'MEAN' - else: - params['operation'] = 'SUM' - params['axis'] = layer.reduction_param.axis - params['coeff'] = layer.reduction_param.coeff - return params - - -def ArgMax(layer): - params = {} - params['out_max_val'] = layer.argmax_param.out_max_val - params['top_k'] = layer.argmax_param.top_k - params['axis'] = layer.argmax_param.axis - return params - - -# ********** Loss Layers ********** -def InfogainLoss(layer): - params = {} - params['source'] = layer.infogain_loss_param.source - params['axis'] = layer.infogain_loss_param.axis - return params - - -def SoftmaxWithLoss(layer): - params = {} - params['axis'] = layer.softmax_param.axis - return params - - -def HingeLoss(layer): - params = {} - params['norm'] = layer.hinge_loss_param.norm - return params - - -def Accuracy(layer): - params = {} - params['top_k'] = layer.accuracy_param.top_k - params['axis'] = layer.accuracy_param.axis - return params - - -def ContrastiveLoss(layer): - params = {} - params['margin'] = layer.contrastive_loss_param.margin - params['legacy_version'] = layer.contrastive_loss_param.legacy_version - return params - - -def Concat(layer): - params = {} - if (layer.concat_param.axis is not None): - params['axis'] = layer.concat_param.axis - else: - # default value for axis of concat in caffe - params['axis'] = 1 - return params - - -# ********** Python Layer ********** -def Python(layer): - params = {} - if (layer.python_param.module): - params['module'] = layer.python_param.module - if (layer.python_param.layer): - params['layer'] = layer.python_param.layer - if (layer.python_param.param_str): - params.update(eval(layer.python_param.param_str)) - if (layer.loss_weight): - params['loss_weight'] = layer.loss_weight[0] - ''' If its a loss layer ('1,0'), there will be no source endpoint, if - its a data layer ('0,1') there will be no target endpoint, otherwise there - will be both endpoints ('1,1')''' - if (not layer.bottom): - params['endPoint'] = '1, 0' - elif ('loss' in layer.name.lower()): - params['endPoint'] = '0, 1' - else: - params['endPoint'] = '1, 1' - for param in params: - if isinstance(params[param], list): - params[param] = str(params[param])[1:-1] - return params - - -layer_dict = {'Accuracy': Accuracy, - 'WindowData': WindowData, - 'Convolution': Convolution, - 'SoftmaxWithLoss': SoftmaxWithLoss, - 'InnerProduct': InnerProduct, - 'HDF5Data': HDF5Data, - 'Threshold': Threshold, - 'Deconvolution': Deconvolution, - 'Embed': Embed, - 'Log': Log, - 'Reduction': Reduction, - 'Slice': Slice, - 'Eltwise': Eltwise, - 'Dropout': Dropout, - 'PReLU': PReLU, - 'BatchNorm': BatchNorm, - 'MVN': MVN, - 'Recurrent': Recurrent, - 'Bias': Bias, - 'ContrastiveLoss': ContrastiveLoss, - 'Input': Input, - 'Exp': Exp, - 'ImageData': ImageData, - 'ReLU': ReLU, - 'MemoryData': MemoryData, - 'Crop': Crop, - 'SPP': SPP, - 'Pooling': Pooling, - 'Scale': Scale, - 'HingeLoss': HingeLoss, - 'Flatten': Flatten, - 'ArgMax': ArgMax, - 'BNLL': BNLL, - 'Data': Data, - 'HDF5Output': HDF5Output, - 'ELU': ELU, - 'DummyData': DummyData, - 'InfogainLoss': InfogainLoss, - 'TanH': TanH, - 'AbsVal': AbsVal, - 'Reshape': Reshape, - 'Power': Power, - 'Sigmoid': Sigmoid, - 'Python': Python, - 'LRN': LRN, - 'LSTM': Recurrent, - 'RNN': Recurrent, - 'Concat': Concat - } - - -@csrf_exempt -def import_prototxt(request): - prototxtIsText = False - if request.method == 'POST': - if ('file' in request.FILES) and \ - (request.FILES['file'].content_type == 'application/octet-stream' or - request.FILES['file'].content_type == 'text/plain'): - try: - prototxt = request.FILES['file'] - except Exception: - return JsonResponse({'result': 'error', - 'error': 'No Prototxt model file found'}) - elif 'sample_id' in request.POST: - try: - prototxt = open(os.path.join(settings.BASE_DIR, - 'example', 'caffe', - request.POST['sample_id'] + '.prototxt'), 'r') - except Exception: - return JsonResponse({'result': 'error', - 'error': 'No Prototxt model file found'}) - elif 'config' in request.POST: - prototxt = request.POST['config'] - prototxtIsText = True - elif 'url' in request.POST: - try: - url = urlparse(request.POST['url']) - if url.netloc == 'github.com': - url = url._replace(netloc='raw.githubusercontent.com') - url = url._replace(path=url.path.replace('blob/', '')) - prototxt = urllib2.urlopen(url.geturl()) - except Exception as ex: - return JsonResponse({'result': 'error', 'error': 'Invalid URL\n'+str(ex)}) - caffe_net = caffe_pb2.NetParameter() - - # try to convert to new prototxt - try: - if prototxtIsText is True: - content = prototxt - else: - content = prototxt.read() - tempFile = tempfile.NamedTemporaryFile() - tempFile.write(content) - tempFile.seek(0) - subprocess.call("~/caffe/caffe/build/tools/upgrade_net_proto_text " - + tempFile.name + " " + tempFile.name, shell=True) - tempFile.seek(0) - content = tempFile.read() - tempFile.close() - except Exception as ex: - return JsonResponse({'result': 'error', 'error': 'Invalid Prototxt\n'+str(ex)}) - - try: - text_format.Merge(content, caffe_net) - except Exception as ex: - return JsonResponse({'result': 'error', 'error': 'Invalid Prototxt\n'+str(ex)}) - - net = {} - i = 0 - blobMap = {} - net_name = caffe_net.name - hasTransformParam = ['ImageData', 'Data', 'WindowData'] - for layer in caffe_net.layer: - id = "l" + str(i) - input = [] - - # this logic for phase has to be improved - if len(layer.include): - if (layer.include[0].HasField('phase')): - phase = layer.include[0].phase - else: - phase = None - else: - phase = None - - params = {} - if (layer.type in hasTransformParam): - params['scale'] = layer.transform_param.scale - params['mirror'] = layer.transform_param.mirror - params['crop_size'] = layer.transform_param.crop_size - if (layer.transform_param.mean_file != ''): - params['mean_file'] = layer.transform_param.mean_file - elif (layer.transform_param.mean_value): - params['mean_value'] = str( - map(int, layer.transform_param.mean_value))[1:-1] - params['force_color'] = layer.transform_param.force_color - params['force_gray'] = layer.transform_param.force_gray - - if layer.type in layer_dict: - layer_params = layer_dict[layer.type](layer) - params.update(layer_params) - - jsonLayer = { - 'info': { - 'type': layer.type, - 'phase': phase - }, - 'connection': { - 'input': [], - 'output': [] - }, - 'params': params - } - # this logic was written for a scenario where train and test layers are mixed up - # But as we know, the only differences between the train and test phase are: - # 1) input layer with different source in test - # 2) some accuracy layers in test - # If we consider these constraint, the below logic can be vastly reduced - for bottom_blob in layer.bottom: - if (bottom_blob != 'label'): - # if the current layer has a phase - # then only connect with layers of same phase - # if it has no phase then connect with all layers - if jsonLayer['info']['phase'] is not None: - phase = jsonLayer['info']['phase'] - for bottomLayerId in blobMap[bottom_blob]: - if (net[bottomLayerId]['info']['phase'] == phase) or\ - (net[bottomLayerId]['info']['phase'] is None): - input.append(bottomLayerId) - net[bottomLayerId]['connection']['output'].append( - id) - else: - for bottomLayerId in blobMap[bottom_blob]: - input.append(bottomLayerId) - net[bottomLayerId]['connection']['output'].append( - id) - for top_blob in layer.top: - if (top_blob != 'label'): - if top_blob in blobMap: - if top_blob in layer.bottom: - # check for in-place operations - # layer has no phase - # then remove all layer history - # and add this one to the top - # layer has phase then remove all layers with same phase and append this - if jsonLayer['info']['phase'] is not None: - phase = jsonLayer['info']['phase'] - for layerId in blobMap[bottom_blob]: - if net[layerId]['info']['phase'] == phase: - blobMap[bottom_blob].remove(layerId) - blobMap[top_blob].append(id) - else: - blobMap[top_blob] = [id] - else: - blobMap[top_blob].append(id) - else: - blobMap[top_blob] = [id] - jsonLayer['connection']['input'] = input - net[id] = jsonLayer - i = i + 1 - - return JsonResponse({'result': 'success', 'net': net, 'net_name': net_name}) -import networkx as nx -import json - -with open('state_net.json', 'r') as f: - network = json.loads(f.read()) - -network_map = {} -for node, params in network.items(): - new_name = (node + ' ' + params['info']['type'] + "\n" + - str(tuple(params["shape"]["output"]))) - network_map[node] = new_name - -graph = nx.DiGraph() -for node, params in network.items(): - output_nodes = params['connection']['output'] - for o_node in output_nodes: - graph.add_edge(network_map[node], network_map[o_node]) - -dotgraph = nx.nx_pydot.to_pydot(graph) -dotgraph.set('rankdir', 'LR') -dotgraph.set('dpi', 300) -dotgraph.write('PureVis.png', format='png') -from keras.models import model_from_json -from keras.utils import plot_model -import sys - -try: - json_file = sys.argv[1] - output_file = sys.argv[2] -except KeyError: - print("Usage: python print_keras_model.py ") - -with open(json_file, 'r') as f: - loaded_model = model_from_json(f.read()) - -plot_model(loaded_model, - to_file=json_file + '.png', - rankdir='LR', - show_shapes=True, - show_layer_names=False) -# -*- coding: utf-8 -*- -# -# Fabrik documentation build configuration file, created by -# sphinx-quickstart on Sat Nov 4 01:38:47 2017. -# -# This file is execfile()d with the current directory set to its -# containing dir. -# -# Note that not all possible configuration values are present in this -# autogenerated file. -# -# All configuration values have a default; values that are commented out -# serve to show the default. - -# If extensions (or modules to document with autodoc) are in another directory, -# add these directories to sys.path here. If the directory is relative to the -# documentation root, use os.path.abspath to make it absolute, like shown here. -# -# import os -# import sys -# sys.path.insert(0, os.path.abspath('.')) - -# -- General configuration ------------------------------------------------ - -# If your documentation needs a minimal Sphinx version, state it here. -# -# needs_sphinx = '1.0' - -# Add any Sphinx extension module names here, as strings. They can be -# extensions coming with Sphinx (named 'sphinx.ext.*') or your custom -# ones. - -import sphinx_rtd_theme - -from recommonmark.parser import CommonMarkParser - -source_parsers = {'.md': CommonMarkParser, } - -extensions = [ - 'sphinx.ext.githubpages', -] - -# Add any paths that contain templates here, relative to this directory. -templates_path = ['_templates'] - -# The suffix(es) of source filenames. -# You can specify multiple suffix as a list of string: -# -# source_suffix = ['.rst', '.md'] -source_suffix = ['.rst', '.md'] - -# The encoding of source files. -# -# source_encoding = 'utf-8-sig' - -# The master toctree document. -master_doc = 'index' - -# General information about the project. -project = u'Fabrik' -copyright = u'2017, CloudCV Team' -author = u'CloudCV Team' - -# The version info for the project you're documenting, acts as replacement for -# |version| and |release|, also used in various other places throughout the -# built documents. -# -# The short X.Y version. -version = u'0.2' -# The full version, including alpha/beta/rc tags. -release = u'0.2' - -# The language for content autogenerated by Sphinx. Refer to documentation -# for a list of supported languages. -# -# This is also used if you do content translation via gettext catalogs. -# Usually you set "language" from the command line for these cases. -language = None - -# There are two options for replacing |today|: either, you set today to some -# non-false value, then it is used: -# -# today = '' -# -# Else, today_fmt is used as the format for a strftime call. -# -# today_fmt = '%B %d, %Y' - -# List of patterns, relative to source directory, that match files and -# directories to ignore when looking for source files. -# This patterns also effect to html_static_path and html_extra_path -exclude_patterns = [] - -# The reST default role (used for this markup: `text`) to use for all -# documents. -# -# default_role = None - -# If true, '()' will be appended to :func: etc. cross-reference text. -# -# add_function_parentheses = True - -# If true, the current module name will be prepended to all description -# unit titles (such as .. function::). -# -# add_module_names = True - -# If true, sectionauthor and moduleauthor directives will be shown in the -# output. They are ignored by default. -# -# show_authors = False - -# The name of the Pygments (syntax highlighting) style to use. -pygments_style = 'sphinx' - -# A list of ignored prefixes for module index sorting. -# modindex_common_prefix = [] - -# If true, keep warnings as "system message" paragraphs in the built documents. -# keep_warnings = False - -# If true, `todo` and `todoList` produce output, else they produce nothing. -todo_include_todos = False - - -# -- Options for HTML output ---------------------------------------------- - -# The theme to use for HTML and HTML Help pages. See the documentation for -# a list of builtin themes. -# -html_theme = 'sphinx_rtd_theme' - -# Theme options are theme-specific and customize the look and feel of a theme -# further. For a list of options available for each theme, see the -# documentation. -# -# html_theme_options = {} - -# Add any paths that contain custom themes here, relative to this directory. -# html_theme_path = [] - -# The name for this set of Sphinx documents. -# " v documentation" by default. -# -# html_title = u'Fabrik v0.2' - -# A shorter title for the navigation bar. Default is the same as html_title. -# -# html_short_title = None - -# The name of an image file (relative to this directory) to place at the top -# of the sidebar. -# -# html_logo = None - -# The name of an image file (relative to this directory) to use as a favicon of -# the docs. This file should be a Windows icon file (.ico) being 16x16 or 32x32 -# pixels large. -# -# html_favicon = None - -# Add any paths that contain custom static files (such as style sheets) here, -# relative to this directory. They are copied after the builtin static files, -# so a file named "default.css" will overwrite the builtin "default.css". -html_static_path = ['_static'] - -# Add any extra paths that contain custom files (such as robots.txt or -# .htaccess) here, relative to this directory. These files are copied -# directly to the root of the documentation. -# -# html_extra_path = [] - -# If not None, a 'Last updated on:' timestamp is inserted at every page -# bottom, using the given strftime format. -# The empty string is equivalent to '%b %d, %Y'. -# -# html_last_updated_fmt = None - -# If true, SmartyPants will be used to convert quotes and dashes to -# typographically correct entities. -# -# html_use_smartypants = True - -# Custom sidebar templates, maps document names to template names. -# -# html_sidebars = {} - -# Additional templates that should be rendered to pages, maps page names to -# template names. -# -# html_additional_pages = {} - -# If false, no module index is generated. -# -# html_domain_indices = True - -# If false, no index is generated. -# -# html_use_index = True - -# If true, the index is split into individual pages for each letter. -# -# html_split_index = False - -# If true, links to the reST sources are added to the pages. -# -# html_show_sourcelink = True - -# If true, "Created using Sphinx" is shown in the HTML footer. Default is True. -# -# html_show_sphinx = True - -# If true, "(C) Copyright ..." is shown in the HTML footer. Default is True. -# -# html_show_copyright = True - -# If true, an OpenSearch description file will be output, and all pages will -# contain a tag referring to it. The value of this option must be the -# base URL from which the finished HTML is served. -# -# html_use_opensearch = '' - -# This is the file name suffix for HTML files (e.g. ".xhtml"). -# html_file_suffix = None - -# Language to be used for generating the HTML full-text search index. -# Sphinx supports the following languages: -# 'da', 'de', 'en', 'es', 'fi', 'fr', 'hu', 'it', 'ja' -# 'nl', 'no', 'pt', 'ro', 'ru', 'sv', 'tr', 'zh' -# -# html_search_language = 'en' - -# A dictionary with options for the search language support, empty by default. -# 'ja' uses this config value. -# 'zh' user can custom change `jieba` dictionary path. -# -# html_search_options = {'type': 'default'} - -# The name of a javascript file (relative to the configuration directory) that -# implements a search results scorer. If empty, the default will be used. -# -# html_search_scorer = 'scorer.js' - -# Output file base name for HTML help builder. -htmlhelp_basename = 'Fabrikdoc' - -# -- Options for LaTeX output --------------------------------------------- - -latex_elements = { - # The paper size ('letterpaper' or 'a4paper'). - # - # 'papersize': 'letterpaper', - - # The font size ('10pt', '11pt' or '12pt'). - # - # 'pointsize': '10pt', - - # Additional stuff for the LaTeX preamble. - # - # 'preamble': '', - - # Latex figure (float) alignment - # - # 'figure_align': 'htbp', -} - -# Grouping the document tree into LaTeX files. List of tuples -# (source start file, target name, title, -# author, documentclass [howto, manual, or own class]). -latex_documents = [ - (master_doc, 'Fabrik.tex', u'Fabrik Documentation', - u'CloudCV Team', 'manual'), -] - -# The name of an image file (relative to this directory) to place at the top of -# the title page. -# -# latex_logo = None - -# For "manual" documents, if this is true, then toplevel headings are parts, -# not chapters. -# -# latex_use_parts = False - -# If true, show page references after internal links. -# -# latex_show_pagerefs = False - -# If true, show URL addresses after external links. -# -# latex_show_urls = False - -# Documents to append as an appendix to all manuals. -# -# latex_appendices = [] - -# It false, will not define \strong, \code, itleref, \crossref ... but only -# \sphinxstrong, ..., \sphinxtitleref, ... To help avoid clash with user added -# packages. -# -# latex_keep_old_macro_names = True - -# If false, no module index is generated. -# -# latex_domain_indices = True - - -# -- Options for manual page output --------------------------------------- - -# One entry per manual page. List of tuples -# (source start file, name, description, authors, manual section). -man_pages = [ - (master_doc, 'fabrik', u'Fabrik Documentation', - [author], 1) -] - -# If true, show URL addresses after external links. -# -# man_show_urls = False - - -# -- Options for Texinfo output ------------------------------------------- - -# Grouping the document tree into Texinfo files. List of tuples -# (source start file, target name, title, author, -# dir menu entry, description, category) -texinfo_documents = [ - (master_doc, 'Fabrik', u'Fabrik Documentation', - author, 'Fabrik', 'One line description of project.', - 'Miscellaneous'), -] - -# Documents to append as an appendix to all manuals. -# -# texinfo_appendices = [] - -# If false, no module index is generated. -# -# texinfo_domain_indices = True - -# How to display URL addresses: 'footnote', 'no', or 'inline'. -# -# texinfo_show_urls = 'footnote' - -# If true, do not generate a @detailmenu in the "Top" node's menu. -# -# texinfo_no_detailmenu = False -import caffe -from caffe import layers as L -import re - - -def get_iterable(x): - return (x,) - - -# Weight/Bias filler mapping from Keras to Caffe, -# some which are not in Caffe were mapped to Xavier -fillerMap = { - 'Zeros': 'constant', - 'Ones': 'constant', - 'Constant': 'constant', - 'RandomNormal': 'uniform', - 'RandomUniform': 'gaussian', - 'TruncatedNormal': 'gaussian', - 'VarianceScaling': 'gaussian', - 'Orthogonal': 'xavier', - 'Identity': 'constant', - 'lecun_uniform': 'uniform', - 'glorot_normal': 'xavier', - 'glorot_uniform': 'xavier', - 'he_normal': 'msra', - 'he_uniform': 'msra' -} - - -def export_ImageData(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - transform_param = {} - transform_param['scale'] = layerParams['scale'] - transform_param['mirror'] = layerParams['mirror'] - transform_param['crop_size'] = layerParams['crop_size'] - transform_param['force_color'] = layerParams['force_color'] - transform_param['force_gray'] = layerParams['force_gray'] - if (layerParams['mean_value'] != ''): - transform_param['mean_value'] = map( - int, layerParams['mean_value'].split(',')) - elif (layerParams['mean_file'] != ''): - transform_param['mean_file'] = layerParams['mean_file'] - - image_data_param = {} - image_data_param['source'] = layerParams['source'] - image_data_param['batch_size'] = layerParams['batch_size'] - image_data_param['rand_skip'] = layerParams['rand_skip'] - image_data_param['shuffle'] = layerParams['shuffle'] - image_data_param['new_height'] = layerParams['new_height'] - image_data_param['new_width'] = layerParams['new_width'] - image_data_param['is_color'] = layerParams['is_color'] - image_data_param['root_folder'] = layerParams['root_folder'] - if layerPhase is not None: - caffeLayer = get_iterable(L.ImageData( - transform_param=transform_param, - image_data_param=image_data_param, - include={ - 'phase': int(layerPhase) - })) - if int(layerPhase) == 0: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_train[key] = value - elif int(layerPhase) == 1: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_test[key] = value - else: - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.ImageData( - transform_param=transform_param, - image_data_param=image_data_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Data(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - transform_param = {} - transform_param['scale'] = layerParams['scale'] - transform_param['mirror'] = layerParams['mirror'] - transform_param['crop_size'] = layerParams['crop_size'] - transform_param['force_color'] = layerParams['force_color'] - transform_param['force_gray'] = layerParams['force_gray'] - if (layerParams['mean_value'] != ''): - transform_param['mean_value'] = map( - int, layerParams['mean_value'].split(',')) - elif (layerParams['mean_file'] != ''): - transform_param['mean_file'] = layerParams['mean_file'] - - data_param = {} - data_param['source'] = layerParams['source'] - data_param['batch_size'] = layerParams['batch_size'] - data_param['backend'] = layerParams['backend'] - if (data_param['backend'] == 'LEVELDB'): - data_param['backend'] = 0 - elif (data_param['backend'] == 'LMDB'): - data_param['backend'] = 1 - data_param['rand_skip'] = layerParams['rand_skip'] - data_param['prefetch'] = layerParams['prefetch'] - if layerPhase is not None: - caffeLayer = get_iterable(L.Data( - transform_param=transform_param, - data_param=data_param, - include={ - 'phase': int(layerPhase) - })) - if int(layerPhase) == 0: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_train[key] = value - elif int(layerPhase) == 1: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_test[key] = value - else: - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Data( - transform_param=transform_param, - data_param=data_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_HDF5Data(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - hdf5_data_param = {} - hdf5_data_param['source'] = layerParams['source'] - hdf5_data_param['batch_size'] = layerParams['batch_size'] - hdf5_data_param['shuffle'] = layerParams['shuffle'] - if layerPhase is not None: - caffeLayer = get_iterable(L.HDF5Data( - hdf5_data_param=hdf5_data_param, - include={ - 'phase': int(layerPhase) - })) - if int(layerPhase) == 0: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_train[key] = value - elif int(layerPhase) == 1: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_test[key] = value - else: - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.HDF5Data( - hdf5_data_param=hdf5_data_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_HDF5Output(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - hdf5_output_param = {'file_name': layerParams['file_name']} - if layerPhase is not None: - if int(layerPhase) == 0: - caffeLayer = get_iterable(L.HDF5Output( - *[ns_train[x] for x in blobNames[layerId]['bottom']], - hdf5_output_param=hdf5_output_param, - include={ - 'phase': int(layerPhase) - })) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_train[key] = value - elif int(layerPhase) == 1: - caffeLayer = get_iterable(L.HDF5Output( - *[ns_test[x] for x in blobNames[layerId]['bottom']], - hdf5_output_param=hdf5_output_param, - include={ - 'phase': int(layerPhase) - })) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_test[key] = value - else: - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.HDF5Output( - *[ns[x] for x in blobNames[layerId]['bottom']], - hdf5_output_param=hdf5_output_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Input(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - input_param = {'shape': {'dim': map(int, layerParams['dim'].split(','))}} - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Input( - input_param=input_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_WindowData(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - transform_param = {} - transform_param['scale'] = layerParams['scale'] - transform_param['mirror'] = layerParams['mirror'] - transform_param['crop_size'] = layerParams['crop_size'] - transform_param['force_color'] = layerParams['force_color'] - transform_param['force_gray'] = layerParams['force_gray'] - if (layerParams['mean_value'] != ''): - transform_param['mean_value'] = map( - int, layerParams['mean_value'].split(',')) - elif (layerParams['mean_file'] != ''): - transform_param['mean_file'] = layerParams['mean_file'] - - window_data_param = {} - window_data_param['source'] = layerParams['source'] - window_data_param['batch_size'] = layerParams['batch_size'] - window_data_param['fg_threshold'] = layerParams['fg_threshold'] - window_data_param['bg_threshold'] = layerParams['bg_threshold'] - window_data_param['fg_fraction'] = layerParams['fg_fraction'] - window_data_param['context_pad'] = layerParams['context_pad'] - window_data_param['crop_mode'] = layerParams['crop_mode'] - window_data_param['cache_images'] = layerParams['cache_images'] - window_data_param['root_folder'] = layerParams['root_folder'] - if layerPhase is not None: - caffeLayer = get_iterable(L.WindowData( - transform_param=transform_param, - window_data_param=window_data_param, - include={ - 'phase': int(layerPhase) - })) - if int(layerPhase) == 0: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_train[key] = value - elif int(layerPhase) == 1: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_test[key] = value - else: - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.WindowData( - transform_param=transform_param, - window_data_param=window_data_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_MemoryData(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - memory_data_param = {} - memory_data_param['batch_size'] = layerParams['batch_size'] - memory_data_param['channels'] = layerParams['channels'] - memory_data_param['height'] = layerParams['height'] - memory_data_param['width'] = layerParams['width'] - if layerPhase is not None: - caffeLayer = get_iterable(L.MemoryData( - memory_data_param=memory_data_param, - include={ - 'phase': int(layerPhase) - })) - if int(layerPhase) == 0: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_train[key] = value - elif int(layerPhase) == 1: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_test[key] = value - else: - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.MemoryData( - memory_data_param=memory_data_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_DummyData(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - # Adding a default size - dummy_data_param = {} - dummy_data_param['shape'] = {'dim': map( - int, layerParams['dim'].split(','))} - dummy_data_param['data_filler'] = {'type': layerParams['type']} - if layerPhase is not None: - caffeLayer = get_iterable(L.DummyData( - dummy_data_param=dummy_data_param, - include={ - 'phase': int(layerPhase) - })) - if int(layerPhase) == 0: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_train[key] = value - elif int(layerPhase) == 1: - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_test[key] = value - else: - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.DummyData( - dummy_data_param=dummy_data_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Convolution(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - convolution_param = {} - if layerParams['kernel_h'] != '': - convolution_param['kernel_h'] = int(float(layerParams['kernel_h'])) - if layerParams['kernel_w'] != '': - convolution_param['kernel_w'] = int(float(layerParams['kernel_w'])) - if layerParams['stride_h'] != '': - convolution_param['stride_h'] = int(float(layerParams['stride_h'])) - if layerParams['stride_w'] != '': - convolution_param['stride_w'] = int(float(layerParams['stride_w'])) - if layerParams['num_output'] != '': - convolution_param['num_output'] = int(float(layerParams['num_output'])) - if layerParams['pad_h'] != '': - convolution_param['pad_h'] = int(float(layerParams['pad_h'])) - if layerParams['pad_w'] != '': - convolution_param['pad_w'] = int(float(layerParams['pad_w'])) - if layerParams['weight_filler'] != '': - convolution_param['weight_filler'] = {} - try: - convolution_param['weight_filler']['type'] = \ - fillerMap[layerParams['weight_filler']] - except: - convolution_param['weight_filler']['type'] = layerParams['weight_filler'] - if layerParams['bias_filler'] != '': - convolution_param['bias_filler'] = {} - try: - convolution_param['bias_filler']['type'] = \ - fillerMap[layerParams['bias_filler']] - except: - convolution_param['bias_filler']['type'] = layerParams['bias_filler'] - convolution_param['dilation'] = layerParams['dilation_h'] - convolution_param['bias_term'] = layerParams['use_bias'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Convolution( - *[ns[x] for x in blobNames[layerId]['bottom']], - convolution_param=convolution_param, - param=[ - { - 'lr_mult': 1 - }, - { - 'lr_mult': 2 - } - ])) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Pooling(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - pooling_param = {} - if layerParams['kernel_h'] != '': - pooling_param['kernel_h'] = int(float(layerParams['kernel_h'])) - if layerParams['kernel_w'] != '': - pooling_param['kernel_w'] = int(float(layerParams['kernel_w'])) - if layerParams['stride_h'] != '': - pooling_param['stride_h'] = int(float(layerParams['stride_h'])) - if layerParams['stride_w'] != '': - pooling_param['stride_w'] = int(float(layerParams['stride_w'])) - if layerParams['pad_h'] != '': - pooling_param['pad_h'] = int(float(layerParams['pad_h'])) - if layerParams['pad_w'] != '': - pooling_param['pad_w'] = int(float(layerParams['pad_w'])) - if layerParams['pool'] != '': - pool = layerParams['pool'] - if(pool == 'MAX'): - pool = 0 - elif(pool == 'AVE'): - pool = 1 - elif(pool == 'STOCHASTIC'): - pool = 2 - pooling_param['pool'] = pool - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Pooling( - *[ns[x] for x in blobNames[layerId]['bottom']], - pooling_param=pooling_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Crop(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - crop_param = {} - if layerParams['axis'] != '': - crop_param['axis'] = int(float(layerParams['axis'])) - if layerParams['offset'] != '': - crop_param['offset'] = int(float(layerParams['offset'])) - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Crop( - *[ns[x] for x in blobNames[layerId]['bottom']], - crop_param=crop_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_SPP(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - spp_param = {} - spp_param['pool'] = layerParams['pool'] - spp_param['pyramid_height'] = layerParams['pyramid_height'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.SPP( - *[ns[x] for x in blobNames[layerId]['bottom']], - spp_param=spp_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Deconvolution(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - convolution_param = {} - if layerParams['kernel_h'] != '': - convolution_param['kernel_h'] = int(float(layerParams['kernel_h'])) - if layerParams['kernel_w'] != '': - convolution_param['kernel_w'] = int(float(layerParams['kernel_w'])) - if layerParams['stride_h'] != '': - convolution_param['stride_h'] = int(float(layerParams['stride_h'])) - if layerParams['stride_w'] != '': - convolution_param['stride_w'] = int(float(layerParams['stride_w'])) - if layerParams['num_output'] != '': - convolution_param['num_output'] = int(float(layerParams['num_output'])) - if layerParams['pad_h'] != '': - convolution_param['pad_h'] = int(float(layerParams['pad_h'])) - if layerParams['pad_w'] != '': - convolution_param['pad_w'] = int(float(layerParams['pad_w'])) - if layerParams['weight_filler'] != '': - convolution_param['weight_filler'] = {} - try: - convolution_param['weight_filler']['type'] = \ - fillerMap[layerParams['weight_filler']] - except: - convolution_param['weight_filler']['type'] = layerParams['weight_filler'] - if layerParams['bias_filler'] != '': - convolution_param['bias_filler'] = {} - try: - convolution_param['bias_filler']['type'] = \ - fillerMap[layerParams['bias_filler']] - except: - convolution_param['bias_filler']['type'] = layerParams['bias_filler'] - convolution_param['dilation'] = layerParams['dilation_h'] - convolution_param['bias_term'] = layerParams['use_bias'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Deconvolution( - *[ns[x] for x in blobNames[layerId]['bottom']], - convolution_param=convolution_param, - param=[ - { - 'lr_mult': 1 - }, - { - 'lr_mult': 2 - } - ])) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Recurrent(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - recurrent_param = {} - recurrent_param['num_output'] = int(layerParams['num_output']) - if layerParams['weight_filler'] != '': - recurrent_param['weight_filler'] = {} - try: - recurrent_param['weight_filler']['type'] = \ - fillerMap[layerParams['weight_filler']] - except: - recurrent_param['weight_filler']['type'] = layerParams['weight_filler'] - if layerParams['bias_filler'] != '': - recurrent_param['bias_filler'] = {} - try: - recurrent_param['bias_filler']['type'] = \ - fillerMap[layerParams['bias_filler']] - except: - recurrent_param['bias_filler']['type'] = layerParams['bias_filler'] - recurrent_param['debug_info'] = layerParams['debug_info'] - recurrent_param['expose_hidden'] = layerParams['expose_hidden'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Recurrent( - *[ns[x] for x in blobNames[layerId]['bottom']], - recurrent_param=recurrent_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_RNN(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - recurrent_param = {} - recurrent_param['num_output'] = int(layerParams['num_output']) - if layerParams['weight_filler'] != '': - recurrent_param['weight_filler'] = {} - try: - recurrent_param['weight_filler']['type'] = \ - fillerMap[layerParams['weight_filler']] - except: - recurrent_param['weight_filler']['type'] = layerParams['weight_filler'] - if layerParams['bias_filler'] != '': - recurrent_param['bias_filler'] = {} - try: - recurrent_param['bias_filler']['type'] = \ - fillerMap[layerParams['bias_filler']] - except: - recurrent_param['bias_filler']['type'] = layerParams['bias_filler'] - recurrent_param['debug_info'] = layerParams['debug_info'] - recurrent_param['expose_hidden'] = layerParams['expose_hidden'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.RNN( - *[ns[x] for x in blobNames[layerId]['bottom']], - recurrent_param=recurrent_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_LSTM(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - recurrent_param = {} - recurrent_param['num_output'] = int(layerParams['num_output']) - if layerParams['weight_filler'] != '': - recurrent_param['weight_filler'] = {} - try: - recurrent_param['weight_filler']['type'] = \ - fillerMap[layerParams['weight_filler']] - except: - recurrent_param['weight_filler']['type'] = layerParams['weight_filler'] - if layerParams['bias_filler'] != '': - recurrent_param['bias_filler'] = {} - try: - recurrent_param['bias_filler']['type'] = \ - fillerMap[layerParams['bias_filler']] - except: - recurrent_param['bias_filler']['type'] = layerParams['bias_filler'] - recurrent_param['debug_info'] = layerParams['debug_info'] - recurrent_param['expose_hidden'] = layerParams['expose_hidden'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.LSTM( - *[ns[x] for x in blobNames[layerId]['bottom']], - recurrent_param=recurrent_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_InnerProduct(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inner_product_param = {} - if layerParams['num_output'] != '': - inner_product_param['num_output'] = int( - float(layerParams['num_output'])) - if layerParams['weight_filler'] != '': - inner_product_param['weight_filler'] = {} - try: - inner_product_param['weight_filler']['type'] = \ - fillerMap[layerParams['weight_filler']] - except: - inner_product_param['weight_filler']['type'] = layerParams['weight_filler'] - if layerParams['bias_filler'] != '': - inner_product_param['bias_filler'] = {} - try: - inner_product_param['bias_filler']['type'] = \ - fillerMap[layerParams['bias_filler']] - except: - inner_product_param['bias_filler']['type'] = layerParams['bias_filler'] - inner_product_param['bias_term'] = layerParams['use_bias'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.InnerProduct( - *[ns[x] for x in blobNames[layerId]['bottom']], - inner_product_param=inner_product_param, - param=[ - { - 'lr_mult': 1 - }, - { - 'lr_mult': 2 - } - ])) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Dropout(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - # inplace dropout? caffe-tensorflow do not work - inplace = layerParams['inplace'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Dropout( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Embed(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - embed_param = {} - if layerParams['num_output'] != '': - embed_param['num_output'] = int(float(layerParams['num_output'])) - if layerParams['input_dim'] != '': - embed_param['input_dim'] = int(float(layerParams['input_dim'])) - if layerParams['weight_filler'] != '': - embed_param['weight_filler'] = {} - try: - embed_param['weight_filler']['type'] = \ - fillerMap[layerParams['weight_filler']] - except: - embed_param['weight_filler']['type'] = layerParams['weight_filler'] - if layerParams['bias_filler'] != '': - embed_param['bias_filler'] = {} - try: - embed_param['bias_filler']['type'] = \ - fillerMap[layerParams['bias_filler']] - except: - embed_param['bias_filler']['type'] = layerParams['bias_filler'] - embed_param['bias_term'] = layerParams['bias_term'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Embed( - *[ns[x] for x in blobNames[layerId]['bottom']], - embed_param=embed_param, - param=[ - { - 'lr_mult': 1, - 'decay_mult': 1 - }, - { - 'lr_mult': 2, - 'decay_mult': 0 - } - ])) - # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]))) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_LRN(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - lrn_param = {} - lrn_param['local_size'] = layerParams['local_size'] - lrn_param['alpha'] = layerParams['alpha'] - lrn_param['beta'] = layerParams['beta'] - lrn_param['k'] = layerParams['k'] - if(layerParams['norm_region'] == 'ACROSS_CHANNELS'): - lrn_param['norm_region'] = 0 - else: - lrn_param['norm_region'] = 1 - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.LRN( - *[ns[x] for x in blobNames[layerId]['bottom']], - lrn_param=lrn_param, in_place=inplace)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_MVN(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - mvn_param = {} - mvn_param['normalize_variance'] = layerParams['normalize_variance'] - mvn_param['across_channels'] = layerParams['across_channels'] - # JS converts 1e-9 to string - mvn_param['eps'] = float(layerParams['eps']) - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.MVN( - *[ns[x] for x in blobNames[layerId]['bottom']], - mvn_param=mvn_param, in_place=inplace)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_BatchNorm(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - batch_norm_param = {} - batch_norm_param['use_global_stats'] = layerParams['use_global_stats'] - batch_norm_param['moving_average_fraction'] = layerParams['moving_average_fraction'] - batch_norm_param['eps'] = float(layerParams['eps']) - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.BatchNorm( - *[ns[x] for x in blobNames[layerId]['bottom']], - batch_norm_param=batch_norm_param, in_place=inplace)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_ReLU(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - relu_param = {} - relu_param['negative_slope'] = layerParams['negative_slope'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.ReLU( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace, relu_param=relu_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_PReLU(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - prelu_param = {} - prelu_param['channel_shared'] = layerParams['channel_shared'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.PReLU( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace, prelu_param=prelu_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_ELU(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - elu_param = {} - elu_param['alpha'] = layerParams['alpha'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.ELU( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace, elu_param=elu_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Sigmoid(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Sigmoid( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_TanH(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.TanH( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_AbsVal(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.AbsVal( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Power(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - power_param = {} - inplace = layerParams['inplace'] - power_param['power'] = layerParams['power'] - power_param['scale'] = layerParams['scale'] - power_param['shift'] = layerParams['shift'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Power( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace, power_param=power_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Exp(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - exp_param = {} - inplace = layerParams['inplace'] - exp_param['base'] = layerParams['base'] - exp_param['scale'] = layerParams['scale'] - exp_param['shift'] = layerParams['shift'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Exp( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace, exp_param=exp_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Log(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - log_param = {} - inplace = layerParams['inplace'] - log_param['base'] = layerParams['base'] - log_param['scale'] = layerParams['scale'] - log_param['shift'] = layerParams['shift'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Log( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace, log_param=log_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_BNLL(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.BNLL( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Threshold(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - inplace = layerParams['inplace'] - threshold_param = {} - threshold_param['threshold'] = layerParams['threshold'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Threshold( - *[ns[x] for x in blobNames[layerId]['bottom']], - in_place=inplace, threshold_param=threshold_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Bias(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - bias_param = {} - bias_param['axis'] = layerParams['axis'] - bias_param['num_axes'] = layerParams['num_axes'] - if layerParams['filler'] != '': - bias_param['filler'] = {} - try: - bias_param['filler']['type'] = \ - fillerMap[layerParams['filler']] - except: - bias_param['filler']['type'] = layerParams['filler'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Bias( - *[ns[x] for x in blobNames[layerId]['bottom']], - bias_param=bias_param - )) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Scale(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - scale_param = {} - scale_param['axis'] = layerParams['axis'] - scale_param['num_axes'] = layerParams['num_axes'] - if layerParams['filler'] != '': - scale_param['filler'] = {} - try: - scale_param['filler']['type'] = \ - fillerMap[layerParams['filler']] - except: - scale_param['filler']['type'] = layerParams['filler'] - scale_param['bias_term'] = layerParams['bias_term'] - if layerParams['bias_filler'] != '': - scale_param['bias_filler'] = {} - try: - scale_param['bias_filler']['type'] = \ - fillerMap[layerParams['bias_filler']] - except: - scale_param['bias_filler']['type'] = layerParams['bias_filler'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Scale( - *[ns[x] for x in blobNames[layerId]['bottom']], - scale_param=scale_param - )) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Flatten(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - flatten_param = {} - flatten_param['axis'] = layerParams['axis'] - flatten_param['end_axis'] = layerParams['end_axis'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Flatten( - *[ns[x] for x in blobNames[layerId]['bottom']], - flatten_param=flatten_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Reshape(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - reshape_param = {'shape': {'dim': map(int, layerParams['dim'].split(','))}} - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Reshape( - *[ns[x] for x in blobNames[layerId]['bottom']], - reshape_param=reshape_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_BatchReindex(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.BatchReindex( - *[ns[x] for x in blobNames[layerId]['bottom']])) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Split(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Split( - *[ns[x] for x in blobNames[layerId]['bottom']])) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Concat(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Concat( - *[ns[x] for x in blobNames[layerId]['bottom']], - ntop=len(blobNames[layerId]['top']))) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Slice(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - slice_param = {} - slice_param['slice_point'] = map( - int, layerParams['slice_point'].split(',')) - slice_param['axis'] = layerParams['axis'] - slice_param['slice_dim'] = layerParams['slice_dim'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Slice( - *[ns[x] for x in blobNames[layerId]['bottom']], - slice_param=slice_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Eltwise(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - eltwise_param = {} - if layerParams['layer_type'] != '': - elt = layerParams['layer_type'] - if(elt == 'Product'): - elt = 0 - elif(elt == 'Sum'): - elt = 1 - elif(elt == 'Maximum'): - elt = 2 - else: - elt = 1 # Default is sum - eltwise_param['operation'] = elt - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Eltwise( - *[ns[x] for x in blobNames[layerId]['bottom']], - eltwise_param=eltwise_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Filter(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Filter( - *[ns[x] for x in blobNames[layerId]['bottom']], - ntop=len(blobNames[layerId]['top']))) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -# This layer is currently not supported as there is no bottom blob -# def export_Parameter(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): -# parameter_param = {} -# parameter_param['shape'] = map(int, layerParams['shape'].split(',')) -# for ns in (ns_train, ns_test): -# caffeLayer = get_iterable(L.Parameter( -# parameter_param=parameter_param)) -# for key, value in zip(blobNames[layerId]['top'], caffeLayer): -# ns[key] = value -# return ns_train, ns_test - - -def export_Reduction(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - reduction_param = {} - if(layerParams['operation'] == 'SUM'): - reduction_param['operation'] = 1 - elif(layerParams['operation'] == 'ASUM'): - reduction_param['operation'] = 2 - elif(layerParams['operation'] == 'SUMSQ'): - reduction_param['operation'] = 3 - elif(layerParams['operation'] == 'MEAN'): - reduction_param['operation'] = 4 - reduction_param['axis'] = layerParams['axis'] - reduction_param['coeff'] = layerParams['coeff'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Reduction( - *[ns[x] for x in blobNames[layerId]['bottom']], - reduction_param=reduction_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Silence(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Silence( - *[ns[x] for x in blobNames[layerId]['bottom']], - ntop=len(blobNames[layerId]['top']))) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_ArgMax(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - argmax_param = {} - argmax_param['out_max_val'] = layerParams['out_max_val'] - argmax_param['top_k'] = layerParams['top_k'] - argmax_param['axis'] = layerParams['axis'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.ArgMax( - *[ns[x] for x in blobNames[layerId]['bottom']], - argmax_param=argmax_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Softmax(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Softmax( - *([ns[x] for x in blobNames[layerId]['bottom']]))) - # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]))) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_MultinomialLogisticLoss(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.MultinomialLogisticLoss( - *[ns[x] for x in blobNames[layerId]['bottom']])) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_InfogainLoss(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - infogain_loss_param = {} - infogain_loss_param['source'] = layerParams['source'] - infogain_loss_param['axis'] = layerParams['axis'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.MultinomialLogisticLoss( - *[ns[x] for x in blobNames[layerId]['bottom']], - infogain_loss_param=infogain_loss_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_SoftmaxWithLoss(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - softmax_param = {'axis': layerParams['axis']} - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.SoftmaxWithLoss( # try L['SoftmaxWithLoss'] - *([ns[x] for x in blobNames[layerId]['bottom']]), - softmax_param=softmax_param)) - # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]))) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_EuclideanLoss(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.EuclideanLoss( - *[ns[x] for x in blobNames[layerId]['bottom']])) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_HingeLoss(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - hinge_loss_param = {'norm': layerParams['norm']} - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.HingeLoss( - *[ns[x] for x in blobNames[layerId]['bottom']], - hinge_loss_param=hinge_loss_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_SigmoidCrossEntropyLoss(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.SigmoidCrossEntropyLoss( - *[ns[x] for x in blobNames[layerId]['bottom']])) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Accuracy(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - accuracy_param = {} - accuracy_param['top_k'] = layerParams['top_k'] - accuracy_param['axis'] = layerParams['axis'] - if layerPhase is not None: - if int(layerPhase) == 0: - caffeLayer = get_iterable(L.Accuracy( - *([ns_train[x] for x in blobNames[layerId]['bottom']]), - accuracy_param=accuracy_param, - # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]), - include={ - 'phase': int(layerPhase) - })) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_train[key] = value - elif int(layerPhase) == 1: - caffeLayer = get_iterable(L.Accuracy( - *([ns_test[x] for x in blobNames[layerId]['bottom']]), - accuracy_param=accuracy_param, - # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]), - include={ - 'phase': int(layerPhase) - })) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns_test[key] = value - else: - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Accuracy( - *([ns[x] for x in blobNames[layerId]['bottom']]), - accuracy_param=accuracy_param)) - # *([ns[x] for x in blobNames[layerId]['bottom']] + [ns.label]))) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_ContrastiveLoss(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - contrastive_loss_param = {} - contrastive_loss_param['margin'] = layerParams['margin'] - contrastive_loss_param['legacy_version'] = layerParams['legacy_version'] - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.ContrastiveLoss( - *[ns[x] for x in blobNames[layerId]['bottom']], - contrastive_loss_param=contrastive_loss_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -def export_Python(layerId, layerParams, layerPhase, ns_train, ns_test, blobNames): - # Parameters not to be included in param_str - notParamStr = ['module', 'layer', 'endPoint', - 'loss_weight', 'dragDrop', 'param_str'] - hasParamStr = False - python_param = {} - python_param['module'] = layerParams['module'] - python_param['layer'] = layerParams['layer'] - for param in layerParams: - if (param not in notParamStr): - hasParamStr = True - if 'param_str' not in python_param.keys(): - python_param['param_str'] = {} - if isinstance(layerParams[param], str): - try: - python_param['param_str'][param] = map(int, - layerParams[param].split(',')) - except: - python_param['param_str'][param] = layerParams[param] - else: - python_param['param_str'][param] = layerParams[param] - if 'dragDrop' in layerParams.keys(): - python_param['param_str'] = layerParams['param_str'] - if (hasParamStr): - python_param['param_str'] = str(python_param['param_str']) - if 'loss_weight' in layerParams: - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Python( - *[ns[x] for x in blobNames[layerId]['bottom']], - python_param=python_param, loss_weight=layerParams['loss_weight'])) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - else: - for ns in (ns_train, ns_test): - caffeLayer = get_iterable(L.Python( - *[ns[x] for x in blobNames[layerId]['bottom']], - python_param=python_param)) - for key, value in zip(blobNames[layerId]['top'], caffeLayer): - ns[key] = value - return ns_train, ns_test - - -layer_map = { - 'ImageData': export_ImageData, - 'Data': export_Data, - 'HDF5Data': export_HDF5Data, - 'HDF5Output': export_HDF5Output, - 'Input': export_Input, - 'WindowData': export_WindowData, - 'MemoryData': export_MemoryData, - 'DummyData': export_DummyData, - 'Convolution': export_Convolution, - 'Pooling': export_Pooling, - 'Crop': export_Crop, - 'SPP': export_SPP, - 'Deconvolution': export_Deconvolution, - 'Recurrent': export_Recurrent, - 'RNN': export_RNN, - 'LSTM': export_LSTM, - 'InnerProduct': export_InnerProduct, - 'Dropout': export_Dropout, - 'Embed': export_Embed, - 'LRN': export_LRN, - 'MVN': export_MVN, - 'BatchNorm': export_BatchNorm, - 'ReLU': export_ReLU, - 'PReLU': export_PReLU, - 'ELU': export_ELU, - 'Sigmoid': export_Sigmoid, - 'TanH': export_TanH, - 'AbsVal': export_AbsVal, - 'Power': export_Power, - 'Exp': export_Exp, - 'Log': export_Log, - 'BNLL': export_BNLL, - 'Threshold': export_Threshold, - 'Bias': export_Bias, - 'Scale': export_Scale, - 'Flatten': export_Flatten, - 'Reshape': export_Reshape, - 'BatchReindex': export_BatchReindex, - 'Split': export_Split, - 'Concat': export_Concat, - 'Slice': export_Slice, - 'Eltwise': export_Eltwise, - 'Filter': export_Filter, - # 'Parameter': export_Parameter, - 'Reduction': export_Reduction, - 'Silence': export_Silence, - 'ArgMax': export_ArgMax, - 'Softmax': export_Softmax, - 'MultinomialLogisticLoss': export_MultinomialLogisticLoss, - 'InfogainLoss': export_InfogainLoss, - 'SoftmaxWithLoss': export_SoftmaxWithLoss, - 'EuclideanLoss': export_EuclideanLoss, - 'HingeLoss': export_HingeLoss, - 'SigmoidCrossEntropyLoss': export_SigmoidCrossEntropyLoss, - 'Accuracy': export_Accuracy, - 'ContrastiveLoss': export_ContrastiveLoss, - 'Python': export_Python -} - - -def json_to_prototxt(net, net_name): - # assumption: a layer can accept only one input blob - # the data layer produces two blobs: data and label - # the loss layer requires two blobs: and label - # the label blob is hardcoded. - # layers name have to be unique - - # custom DFS of the network - input_dim = None - stack = [] - layersProcessed = {} - processOrder = [] - blobNames = {} - for layerId in net: - layersProcessed[layerId] = False - blobNames[layerId] = { - 'bottom': [], - 'top': [], - } - blobId = 0 - - def isProcessPossible(layerId): - inputs = net[layerId]['connection']['input'] - for layerId in inputs: - if layersProcessed[layerId] is False: - return False - return True - - # finding the data layer - dataLayers = ['ImageData', 'Data', 'HDF5Data', - 'Input', 'WindowData', 'MemoryData', 'DummyData'] - for layerId in net: - if (net[layerId]['info']['type'] == 'Python'): - if ('endPoint' not in net[layerId]['params'].keys()): - net[layerId]['params']['dragDrop'] = True - if (not net[layerId]['connection']['input']): - stack.append(layerId) - else: - if (net[layerId]['params']['endPoint'] == "1, 0"): - stack.append(layerId) - if(net[layerId]['info']['type'] in dataLayers): - stack.append(layerId) - - def changeTopBlobName(layerId, newName): - blobNames[layerId]['top'] = newName - - while len(stack): - - i = len(stack) - 1 - - while isProcessPossible(stack[i]) is False: - i = i - 1 - - layerId = stack[i] - stack.remove(stack[i]) - - inputs = net[layerId]['connection']['input'] - if len(inputs) > 0: - if len(inputs) == 2 and (net[inputs[0]]['info']['phase'] is not None) \ - and (net[inputs[1]]['info']['phase']): - commonBlobName = blobNames[inputs[0]]['top'] - changeTopBlobName(inputs[1], commonBlobName) - blobNames[layerId]['bottom'] = commonBlobName - else: - inputBlobNames = [] - for inputId in inputs: - inputBlobNames.extend(blobNames[inputId]['top']) - blobNames[layerId]['bottom'] = inputBlobNames - - blobNames[layerId]['top'] = ['blob' + str(blobId)] - blobId = blobId + 1 - - for outputId in net[layerId]['connection']['output']: - if outputId not in stack: - stack.append(outputId) - - layersProcessed[layerId] = True - processOrder.append(layerId) - - ns_train = caffe.NetSpec() - ns_test = caffe.NetSpec() - - for layerId in processOrder: - - layer = net[layerId] - layerParams = layer['params'] - layerType = layer['info']['type'] - layerPhase = layer['info']['phase'] - - if (str(layerType) == "Input"): - input_dim = layerParams['dim'] - - if (not layerParams['caffe']): - if ('layer_type' in layerParams): - raise Exception('Cannot export layer of type ' + layerType + ' ' + layerParams['layer_type'] - + ' to Caffe.') - else: - raise Exception('Cannot export layer of type ' + - layerType + ' to Caffe.') - ns_train, ns_test = layer_map[layerType](layerId, layerParams, layerPhase, - ns_train, ns_test, blobNames) - - train = 'name: "' + net_name + '"\n' + str(ns_train.to_proto()) - test = str(ns_test.to_proto()) - - # merge the train and test prototxt to get a single train_test prototxt - testIndex = [m.start() for m in re.finditer('layer', test)] - - previousIndex = -1 - for i in range(len(testIndex)): - if i < len(testIndex) - 1: - layer = test[testIndex[i]:testIndex[i + 1]] - else: - layer = test[testIndex[i]:] - a = train.find(layer) - if a != -1: - l = test[testIndex[previousIndex + 1]:testIndex[i]] - train = train[0:a] + l + train[a:] - previousIndex = i - if previousIndex < len(testIndex) - 1: - l = test[testIndex[previousIndex + 1]:] - train = train + l - - prototxt = train - return prototxt, input_dim -import numpy as np -from collections import deque - - -def data(layer): - Input = [] - if (layer['info']['type'] in ['ImageData', 'Data', 'WindowData']): - if (('crop_size' in layer['params']) and (layer['params']['crop_size'] != 0)): - Output = [3] + [layer['params']['crop_size']]*2 - elif (('new_height' in layer['params']) and ('new_width' in layer['params'])): - Output = [3, layer['params']['new_height'], - layer['params']['new_width']] - else: - # When a new layer is created with default parameters - Output = [] - elif (layer['info']['type'] in ['Input', 'DummyData']): - Output = map(int, layer['params']['dim'].split(','))[1:] - elif (layer['info']['type'] == 'MemoryData'): - Output = [3, layer['params']['height'], layer['params']['width']] - else: - raise Exception('Cannot determine shape of ' + - layer['info']['type'] + ' layer.') - return Input, Output - - -def identity(layer): - return layer['shape']['input'] - - -def filter(layer): - if (layer['info']['type'] == 'Pooling'): - num_out = layer['shape']['input'][0] - else: - num_out = layer['params']['num_output'] - if (layer['info']['type'] == 'Deconvolution'): - _, i_h, i_w = layer['shape']['input'] - k_h, k_w = layer['params']['kernel_h'], layer['params']['kernel_w'] - s_h, s_w = layer['params']['stride_h'], layer['params']['stride_w'] - p_h, p_w = layer['params']['pad_h'], layer['params']['pad_w'] - - o_h = i_h * s_h - o_w = i_w * s_w - if ('padding' in layer['params'] and layer['params']['padding'] == 'VALID'): - # handling tensorflow deconv layer separately - o_h += max(k_h - s_h, 0) - o_w += max(k_w - s_w, 0) - - return [num_out, o_h, o_w] - elif (layer['info']['type'] == 'DepthwiseConv'): - _, i_h, i_w = layer['shape']['input'] - k_h, k_w = layer['params']['kernel_h'], layer['params']['kernel_w'] - s_h, s_w = layer['params']['stride_h'], layer['params']['stride_w'] - p_h, p_w = layer['params']['pad_h'], layer['params']['pad_w'] - o_h = int((i_h - 1)*s_h + k_h - 2*p_h) - o_w = int((i_w - 1)*s_w + k_w - 2*p_w) - - return [num_out, o_h, o_w] - else: - if (layer['params']['layer_type'] == '1D'): - try: - _, i_w = layer['shape']['input'] - k_w = layer['params']['kernel_w'] - s_w = layer['params']['stride_w'] - p_w = layer['params']['pad_w'] - o_w = int((i_w + 2 * p_w - k_w) / float(s_w) + 1) - except: - return [num_out, 0] - return [num_out, o_w] - elif (layer['params']['layer_type'] == '2D'): - try: - _, i_h, i_w = layer['shape']['input'] - k_h, k_w = layer['params']['kernel_h'], layer['params']['kernel_w'] - s_h, s_w = layer['params']['stride_h'], layer['params']['stride_w'] - p_h, p_w = layer['params']['pad_h'], layer['params']['pad_w'] - o_h = int((i_h + 2 * p_h - k_h) / float(s_h) + 1) - o_w = int((i_w + 2 * p_w - k_w) / float(s_w) + 1) - except: - return [num_out, 0, 0] - return [num_out, o_h, o_w] - else: - try: - _, i_d, i_h, i_w = layer['shape']['input'] - k_h, k_w, k_d = layer['params']['kernel_h'], layer['params']['kernel_w'],\ - layer['params']['kernel_d'] - s_h, s_w, s_d = layer['params']['stride_h'], layer['params']['stride_w'],\ - layer['params']['stride_d'] - p_h, p_w, p_d = layer['params']['pad_h'], layer['params']['pad_w'],\ - layer['params']['pad_d'] - o_h = int((i_h + 2 * p_h - k_h) / float(s_h) + 1) - o_w = int((i_w + 2 * p_w - k_w) / float(s_w) + 1) - o_d = int((i_d + 2 * p_d - k_d) / float(s_d) + 1) - except: - return [num_out, 0, 0, 0] - return [num_out, o_d, o_h, o_w] - - -def upsample(layer): - if (layer['params']['layer_type'] == '1D'): - num_out, i_w = layer['shape']['input'] - s_w = layer['params']['size_w'] - o_w = int(i_w*s_w) - return [num_out, o_w] - elif (layer['params']['layer_type'] == '2D'): - num_out, i_h, i_w = layer['shape']['input'] - s_h, s_w = layer['params']['size_h'], layer['params']['size_w'] - o_w = int(i_w*s_w) - o_h = int(i_h*s_h) - return [num_out, o_h, o_w] - else: - num_out, i_h, i_w, i_d = layer['shape']['input'] - s_h, s_w, s_d = layer['params']['size_h'], layer['params']['size_w'],\ - layer['params']['size_d'] - o_w = int(i_w*s_w) - o_h = int(i_h*s_h) - o_d = int(i_d*s_d) - return [num_out, o_h, o_w, o_d] - - -def output(layer): - return [layer['params']['num_output']] - - -def flatten(layer): - out = 1 - for i in layer['shape']['input']: - if (i > 0): - out *= i - return [out] - - -def reshape(layer): - temp = np.zeros(layer['shape']['input']) - shape = map(int, layer['params']['dim'].split(','))[1:] - temp = np.reshape(temp, shape) - return list(temp.shape[::-1]) - - -def repeat(layer): - shape = layer['shape']['input'] - shape = shape + [layer['params']['n']] - return shape - - -def handle_concat_layer(outputLayer, inputLayer): - if('input' not in outputLayer['shape']): - shape = inputLayer['shape']['output'][:] - else: - old_num_output = outputLayer['shape']['input'][0] - shape = inputLayer['shape']['output'][:] - shape[0] += old_num_output - return shape - - -def get_layer_shape(layer): - # separating checking the type of layer inorder to make it modular - # which can be reused in case we only want to get shapes of a single - # layer, for example: if a new layer is added to already drawn model - dataLayers = ['ImageData', 'Data', 'HDF5Data', - 'Input', 'WindowData', 'MemoryData', 'DummyData'] - - if(layer['info']['type'] in dataLayers): - return data(layer) - - elif(layer['info']['type'] in ['Convolution', 'Pooling', 'Deconvolution', 'DepthwiseConv']): - return filter(layer) - - elif(layer['info']['type'] in ['InnerProduct', 'Recurrent', 'RNN', 'LSTM', 'Embed']): - return output(layer) - - elif(layer['info']['type'] == 'Flatten'): - return flatten(layer) - - elif(layer['info']['type'] == 'Reshape'): - return reshape(layer) - - elif(layer['info']['type'] == 'Upsample'): - return upsample(layer) - - elif(layer['info']['type'] == 'RepeatVector'): - return repeat(layer) - - elif(layer['info']['type'] in ['SPP', 'Crop']): - raise Exception('Cannot determine shape of ' + - layer['info']['type'] + 'layer.') - - else: - return identity(layer) - - -def get_shapes(net): - queue = deque([]) - dataLayers = ['ImageData', 'Data', 'HDF5Data', - 'Input', 'WindowData', 'MemoryData', 'DummyData'] - processedLayer = {} - layer_indegree = {} - - # Finding the data layer - for layerId in net: - processedLayer[layerId] = False - # store indegree of every layer for Topological sort - layer_indegree[layerId] = len(net[layerId]['connection']['input']) - net[layerId]['shape'] = {} - if (net[layerId]['info']['type'] == 'Python'): - if ('endPoint' not in net[layerId]['params'].keys()): - if (not net[layerId]['connection']['input']): - raise Exception('Cannot determine shape of Python layer.') - else: - if (net[layerId]['params']['endPoint'] == "1, 0"): - raise Exception('Cannot determine shape of Python layer.') - if(net[layerId]['info']['type'] in dataLayers): - queue.append(layerId) - - while(len(queue)): - # using deque as stack - layerId = queue.pop() - - if(net[layerId]['info']['type'] in dataLayers): - net[layerId]['shape']['input'], net[layerId]['shape']['output'] = get_layer_shape( - net[layerId]) - else: - net[layerId]['shape']['output'] = get_layer_shape(net[layerId]) - - for outputId in net[layerId]['connection']['output']: - if (not processedLayer[outputId]): - # Handling Concat layer separately - if (net[outputId]['info']['type'] == "Concat"): - net[outputId]['shape']['input'] = handle_concat_layer( - net[outputId], net[layerId]) - else: - net[outputId]['shape']['input'] = net[layerId]['shape']['output'][:] - - # Decrement indegree of every output node of current layer - layer_indegree[outputId] -= 1 - - if layer_indegree[outputId] == 0: - queue.append(outputId) - else: - if (net[outputId]['info']['type'] == "Concat"): - net[outputId]['shape']['input'] = handle_concat_layer( - net[outputId], net[layerId]) - - processedLayer[layerId] = True - - return net -# Make sure to fill in all data! - -config = { - 'LRN': { - 'filename': 'lrn.py', - 'url': '/media/lrn.py' - } -} -# Implementation for Custom LRN layer used from -# https://github.com/keras-team/keras/issues/1549 -from keras.layers.core import Layer -from keras import backend as K - - -class LRN(Layer): - - def __init__(self, alpha=1e-4, k=2, beta=0.75, n=5, **kwargs): - if n % 2 == 0: - raise NotImplementedError( - "LRN only works with odd n. n provided: " + str(n)) - super(LRN, self).__init__(**kwargs) - self.alpha = alpha - self.k = k - self.beta = beta - self.n = n - - def get_output(self, train): - X = self.get_input(train) - b, ch, r, c = K.shape(X) - half_n = self.n // 2 - input_sqr = K.square(X) - extra_channels = K.zeros((b, ch + 2 * half_n, r, c)) - input_sqr = K.concatenate([extra_channels[:, :half_n, :, :], - input_sqr, - extra_channels[:, half_n + ch:, :, :]], - axis=1) - scale = self.k - for i in range(self.n): - scale += self.alpha * input_sqr[:, i:i + ch, :, :] - scale = scale ** self.beta - return X / scale - - def get_config(self): - config = {"name": self.__class__.__name__, - "alpha": self.alpha, - "k": self.k, - "beta": self.beta, - "n": self.n, - "name": self.name} - base_config = super(LRN, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -''' -For Keras import or export, the following layers will require TF version >1.0 -* Concatenate -* Embedding -* SimpleRNN -* LSTM -''' -import json -import os -import random -import string -import yaml -from datetime import datetime -from django.views.decorators.csrf import csrf_exempt -from django.http import JsonResponse -from keras.models import Model -from layers_export import data, convolution, deconvolution, pooling, dense, dropout, embed,\ - recurrent, batch_norm, activation, flatten, reshape, eltwise, concat, upsample, locally_connected,\ - permute, repeat_vector, regularization, masking, gaussian_noise, gaussian_dropout, alpha_dropout, \ - bidirectional, time_distributed, lrn, depthwiseConv -from ..custom_layers import config as custom_layers_config - - -BASE_DIR = os.path.dirname( - os.path.dirname( - os.path.dirname( - os.path.abspath(__file__)))) - - -def randomword(length): - return ''.join(random.choice(string.lowercase) for i in range(length)) - - -@csrf_exempt -def export_json(request, is_tf=False): - # Note : Remove the views for export by adding unittest for celery tasks - if request.method == 'POST': - net = yaml.safe_load(request.POST.get('net')) - net_name = request.POST.get('net_name') - if net_name == '': - net_name = 'Net' - - layer_map = { - 'ImageData': data, - 'Data': data, - 'Input': data, - 'WindowData': data, - 'MemoryData': data, - 'DummyData': data, - 'InnerProduct': dense, - 'Softmax': activation, - 'SELU': activation, - 'Softplus': activation, - 'Softsign': activation, - 'ReLU': activation, - 'TanH': activation, - 'Sigmoid': activation, - 'HardSigmoid': activation, - 'Linear': activation, - 'Dropout': dropout, - 'Flatten': flatten, - 'Reshape': reshape, - 'Permute': permute, - 'RepeatVector': repeat_vector, - 'Regularization': regularization, - 'Masking': masking, - 'Convolution': convolution, - 'Deconvolution': deconvolution, - 'DepthwiseConv': depthwiseConv, - 'Upsample': upsample, - 'Pooling': pooling, - 'LocallyConnected': locally_connected, - 'RNN': recurrent, - 'GRU': recurrent, - 'LSTM': recurrent, - 'Embed': embed, - 'Concat': concat, - 'Eltwise': eltwise, - 'PReLU': activation, - 'ELU': activation, - 'ThresholdedReLU': activation, - 'BatchNorm': batch_norm, - 'GaussianNoise': gaussian_noise, - 'GaussianDropout': gaussian_dropout, - 'AlphaDropout': alpha_dropout, - 'Scale': '', - 'TimeDistributed': time_distributed, - 'Bidirectional': bidirectional - } - - custom_layers_map = { - 'LRN': lrn - } - - # Remove any duplicate activation layers (timedistributed and bidirectional layers) - redundant_layers = [] - for layerId in net: - if (net[layerId]['connection']['input'] - and net[net[layerId]['connection']['input'][0]]['info']['type'] in - ['TimeDistributed', 'Bidirectional']): - if len(net[layerId]['connection']['output']) > 0: - target = net[layerId]['connection']['output'][0] - outputs = net[target]['connection']['output'] - if len(outputs) > 0: - net[layerId]['connection']['output'] = outputs - for j in outputs: - net[j]['connection']['input'] = [ - x if (x != target) else layerId for x in net[j]['connection']['input']] - redundant_layers.append(target) - elif (net[layerId]['info']['type'] == 'Input' - and net[net[layerId]['connection']['output'][0]]['info']['type'] in - ['TimeDistributed', 'Bidirectional']): - connected_layer = net[layerId]['connection']['output'][0] - net[connected_layer]['params']['batch_input_shape'] = net[layerId]['params']['dim'] - for i in redundant_layers: - del net[i] - - # Check if conversion is possible - error = [] - custom_layers = [] - for key, value in custom_layers_map.iteritems(): - layer_map[key] = value - for layerId in net: - layerType = net[layerId]['info']['type'] - if (layerType in custom_layers_map): - custom_layers.append(layerType) - if ('Loss' in layerType or layerType == - 'Accuracy' or layerType in layer_map): - pass - else: - error.append(layerId + '(' + layerType + ')') - if len(error): - return JsonResponse( - {'result': 'error', 'error': 'Cannot convert ' + ', '.join(error) + ' to Keras'}) - - stack = [] - net_out = {} - dataLayers = ['ImageData', 'Data', 'HDF5Data', 'Input', 'WindowData', - 'MemoryData', 'DummyData', 'Bidirectional', - 'TimeDistributed'] - processedLayer = {} - inputLayerId = [] - outputLayerId = [] - - def isProcessPossible(layerId): - inputs = net[layerId]['connection']['input'] - for layerId in inputs: - if processedLayer[layerId] is False: - return False - return True - - # Finding the data layer - for layerId in net: - processedLayer[layerId] = False - if (net[layerId]['info']['type'] == 'Python'): - error.append(layerId + '(Python)') - continue - if(net[layerId]['info']['type'] in dataLayers): - stack.append(layerId) - if (not net[layerId]['connection']['input']): - inputLayerId.append(layerId) - if (not net[layerId]['connection']['output']): - outputLayerId.append(layerId) - if len(error): - return JsonResponse( - {'result': 'error', 'error': 'Cannot convert ' + ', '.join(error) + ' to Keras'}) - - while(len(stack)): - if ('Loss' in net[layerId]['info']['type'] or - net[layerId]['info']['type'] == 'Accuracy'): - pass - elif (net[layerId]['info']['type'] in layer_map): - i = len(stack) - 1 - while isProcessPossible(stack[i]) is False: - i = i - 1 - layerId = stack[i] - stack.remove(layerId) - if (net[layerId]['info']['type'] != 'Scale'): - layer_in = [net_out[inputId] - for inputId in net[layerId]['connection']['input']] - # Need to check if next layer is Scale - if (net[layerId]['info']['type'] == 'BatchNorm'): - idNext = net[layerId]['connection']['output'][0] - nextLayer = net[idNext] - # If the BN layer is followed by Scale, then we need to pass both layers - # as in Keras parameters from both go into one single layer - net_out.update(layer_map[net[layerId]['info']['type']]( - net[layerId], layer_in, layerId, idNext, nextLayer)) - elif (net[layerId]['info']['type'] == 'Scale'): - type = net[net[layerId]['connection'] - ['input'][0]]['info']['type'] - if (type != 'BatchNorm'): - return JsonResponse({'result': 'error', 'error': 'Cannot convert ' + - net[layerId]['info']['type'] + ' to Keras'}) - elif (net[layerId]['info']['type'] in ['TimeDistributed', 'Bidirectional']): - idNext = net[layerId]['connection']['output'][0] - net_out.update( - layer_map[net[layerId]['info']['type']](layerId, idNext, net, layer_in, layer_map)) - if len(net[idNext]['connection']['output']) > 0: - net[net[idNext]['connection']['output'][0] - ]['connection']['input'] = [layerId] - processedLayer[idNext] = True - processedLayer[layerId] = True - else: - net_out.update(layer_map[net[layerId]['info']['type']]( - net[layerId], layer_in, layerId)) - for outputId in net[layerId]['connection']['output']: - if outputId not in stack: - stack.append(outputId) - processedLayer[layerId] = True - else: - error.append( - layerId + '(' + net[layerId]['info']['type'] + ')') - if len(error): - return JsonResponse( - {'result': 'error', 'error': 'Cannot convert ' + ', '.join(error) + ' to Keras'}) - - final_input = [] - final_output = [] - for i in inputLayerId: - final_input.append(net_out[i]) - - for j in outputLayerId: - if (net[net[j]['connection']['input'][0]]['info']['type'] in - ['TimeDistributed', 'Bidirectional']): - final_output.append(net_out[net[j]['connection']['input'][0]]) - else: - final_output.append(net_out[j]) - - model = Model(inputs=final_input, outputs=final_output, name=net_name) - json_string = Model.to_json(model) - - randomId = datetime.now().strftime('%Y%m%d%H%M%S') + randomword(5) - with open(BASE_DIR + '/media/' + randomId + '.json', 'w') as f: - json.dump(json.loads(json_string), f, indent=4) - - custom_layers_response = [] - for layer in set(custom_layers): - layer_data = {'name': layer} - layer_data.update(custom_layers_config.config[layer]) - custom_layers_response.append(layer_data) - - if not is_tf: - return JsonResponse({'result': 'success', - 'id': randomId, - 'name': randomId + '.json', - 'url': '/media/' + randomId + '.json', - 'customLayers': custom_layers_response - }) - else: - return {'randomId': randomId, 'customLayers': custom_layers_response} -import json -import os -import urllib2 -from urlparse import urlparse - -from django.conf import settings -from django.http import JsonResponse -from django.views.decorators.csrf import csrf_exempt -from layers_import import Input, Convolution, Deconvolution, Pooling, Dense, Dropout, Embed,\ - Recurrent, BatchNorm, Activation, LeakyReLU, PReLU, ELU, Scale, Flatten, Reshape, Concat, \ - Eltwise, Padding, Upsample, LocallyConnected, ThresholdedReLU, Permute, RepeatVector,\ - ActivityRegularization, Masking, GaussianNoise, GaussianDropout, AlphaDropout, \ - TimeDistributed, Bidirectional, DepthwiseConv, lrn -from keras.models import model_from_json, Sequential -from keras.layers import deserialize -from ..custom_layers.lrn import LRN - - -@csrf_exempt -def import_json(request): - loadFromText = False - if request.method == 'POST': - if ('file' in request.FILES): - f = request.FILES['file'] - elif 'sample_id' in request.POST: - try: - f = open(os.path.join(settings.BASE_DIR, - 'example', 'keras', - request.POST['sample_id'] + '.json'), 'r') - except Exception: - return JsonResponse({'result': 'error', - 'error': 'No JSON model file found'}) - elif 'config' in request.POST: - loadFromText = True - elif 'url' in request.POST: - try: - url = urlparse(request.POST['url']) - if url.netloc == 'github.com': - url = url._replace(netloc='raw.githubusercontent.com') - url = url._replace(path=url.path.replace('blob/', '')) - f = urllib2.urlopen(url.geturl()) - except Exception as ex: - return JsonResponse({'result': 'error', 'error': 'Invalid URL\n' + str(ex)}) - try: - if loadFromText is True: - model = json.loads(request.POST['config']) - else: - model = json.load(f) - except Exception: - return JsonResponse({'result': 'error', 'error': 'Invalid JSON'}) - - model = model_from_json(json.dumps(model), custom_objects={'LRN': LRN}) - layer_map = { - 'InputLayer': Input, - 'Dense': Dense, - 'Activation': Activation, - 'softmax': Activation, - 'selu': Activation, - 'softplus': Activation, - 'softsign': Activation, - 'relu': Activation, - 'tanh': Activation, - 'sigmoid': Activation, - 'hard_sigmoid': Activation, - 'linear': Activation, - 'Dropout': Dropout, - 'Flatten': Flatten, - 'Reshape': Reshape, - 'Permute': Permute, - 'RepeatVector': RepeatVector, - 'ActivityRegularization': ActivityRegularization, - 'Masking': Masking, - 'Conv1D': Convolution, - 'Conv2D': Convolution, - 'Conv2DTranspose': Deconvolution, - 'Conv3D': Convolution, - 'SeparableConv2D': DepthwiseConv, - 'UpSampling1D': Upsample, - 'UpSampling2D': Upsample, - 'UpSampling3D': Upsample, - 'ZeroPadding1D': Padding, - 'ZeroPadding2D': Padding, - 'ZeroPadding3D': Padding, - 'MaxPooling1D': Pooling, - 'MaxPooling2D': Pooling, - 'MaxPooling3D': Pooling, - 'AveragePooling1D': Pooling, - 'AveragePooling2D': Pooling, - 'AveragePooling3D': Pooling, - 'GlobalMaxPooling1D': Pooling, - 'GlobalAveragePooling1D': Pooling, - 'GlobalMaxPooling2D': Pooling, - 'GlobalAveragePooling2D': Pooling, - 'LocallyConnected1D': LocallyConnected, - 'LocallyConnected2D': LocallyConnected, - 'SimpleRNN': Recurrent, - 'GRU': Recurrent, - 'LSTM': Recurrent, - 'Embedding': Embed, - 'Add': Eltwise, - 'Multiply': Eltwise, - 'Average': Eltwise, - 'Maximum': Eltwise, - 'Concatenate': Concat, - 'Dot': Eltwise, - 'LeakyReLU': LeakyReLU, - 'PReLU': PReLU, - 'elu': ELU, - 'ELU': ELU, - 'ThresholdedReLU': ThresholdedReLU, - 'BatchNormalization': BatchNorm, - 'GaussianNoise': GaussianNoise, - 'GaussianDropout': GaussianDropout, - 'AlphaDropout': AlphaDropout, - 'TimeDistributed': TimeDistributed, - 'Bidirectional': Bidirectional, - 'LRN': lrn - } - - hasActivation = ['Conv1D', 'Conv2D', 'Conv3D', 'Conv2DTranspose', 'Dense', 'LocallyConnected1D', - 'LocallyConnected2D', 'SeparableConv2D', 'LSTM', 'SimpleRNN', 'GRU'] - - net = {} - # Add dummy input layer if sequential model - if (isinstance(model, Sequential)): - input_layer = model.layers[0].inbound_nodes[0].inbound_layers[0] - # If embedding is the first layer, the input has shape (None, None) - if (model.layers[0].__class__.__name__ == 'Embedding'): - input_layer.batch_input_shape = (None, model.layers[0].input_dim) - net[input_layer.name] = Input(input_layer) - net[input_layer.name]['connection']['output'] = [model.layers[0].name] - for idx, layer in enumerate(model.layers): - name = '' - class_name = layer.__class__.__name__ - wrapped = False - if (class_name in layer_map): - # This is to handle wrappers and the wrapped layers. - if class_name == 'InputLayer': - found = 0 - for find_layer in model.layers: - if len(find_layer.inbound_nodes[0].inbound_layers): - if find_layer.inbound_nodes[0].inbound_layers[0].__class__.__name__ == 'InputLayer': - net[layer.name] = Input(layer) - if find_layer.__class__.__name__ in ['Bidirectional', 'TimeDistributed']: - net[layer.name]['connection']['output'] = [ - find_layer.name] - found = 1 - break - if not found: - net[layer.name] = Input(layer) - - elif class_name in ['Bidirectional', 'TimeDistributed']: - net[layer.name] = layer_map[class_name](layer) - wrapped_layer = layer.get_config()['layer'] - name = wrapped_layer['config']['name'] - new_layer = deserialize({ - 'class_name': wrapped_layer['class_name'], - 'config': wrapped_layer['config'] - }) - new_layer.wrapped = True - new_layer.wrapper = [layer.name] - if new_layer.activation.func_name != 'linear': - net[name + wrapped_layer['class_name'] - ] = layer_map[wrapped_layer['class_name']](new_layer) - net[name] = layer_map[new_layer.activation.func_name]( - new_layer) - net[name + wrapped_layer['class_name'] - ]['connection']['output'].append(name) - net[name]['connection']['input'] = [ - name + wrapped_layer['class_name']] - net[layer.name]['connection']['output'] = [ - name + wrapped_layer['class_name']] - else: - net[name] = layer_map[wrapped_layer['class_name']]( - new_layer) - net[name]['connection']['input'] = [layer.name] - net[layer.name]['connection']['output'] = [name] - if len(model.layers) >= idx + 2: - net[name]['connection']['output'] = [ - model.layers[idx + 1].name] - model.layers[idx + - 1].inbound_nodes[0].inbound_layers = [new_layer] - else: - net[name]['connection']['output'] = [] - wrapped = True - # This extra logic is to handle connections if the layer has an Activation - elif (class_name in hasActivation and layer.activation.func_name != 'linear'): - net[layer.name + class_name] = layer_map[class_name](layer) - net[layer.name] = layer_map[layer.activation.func_name](layer) - net[layer.name + - class_name]['connection']['output'].append(layer.name) - name = layer.name + class_name - # To check if a Scale layer is required - elif (class_name == 'BatchNormalization' and ( - layer.center or layer.scale)): - net[layer.name + class_name] = layer_map[class_name](layer) - net[layer.name] = Scale(layer) - net[layer.name + - class_name]['connection']['output'].append(layer.name) - name = layer.name + class_name - else: - net[layer.name] = layer_map[class_name](layer) - name = layer.name - if (layer.inbound_nodes[0].inbound_layers) and not wrapped: - for node in layer.inbound_nodes[0].inbound_layers: - net[node.name]['connection']['output'].append(name) - else: - return JsonResponse({'result': 'error', - 'error': 'Cannot import layer of ' + layer.__class__.__name__ + ' type'}) - raise Exception('Cannot import layer of ' + - layer.__class__.__name__ + ' type') - # collect names of all zeroPad layers - zeroPad = [] - # Transfer parameters and connections from zero pad - # The 'pad' param is a list with upto 3 elements - for node in net: - if (net[node]['info']['type'] == 'Pad'): - net[net[node]['connection']['output'][0]]['connection']['input'] = \ - net[node]['connection']['input'] - net[net[node]['connection']['input'][0]]['connection']['output'] = \ - net[node]['connection']['output'] - net[net[node]['connection']['output'][0]]['params']['pad_w'] += \ - net[node]['params']['pad'][0] - if (net[net[node]['connection']['output'][0]]['params']['layer_type'] == '2D'): - net[net[node]['connection']['output'][0]]['params']['pad_h'] += \ - net[node]['params']['pad'][1] - elif (net[net[node]['connection']['output'][0]]['params']['layer_type'] == '3D'): - net[net[node]['connection']['output'][0]]['params']['pad_h'] += \ - net[node]['params']['pad'][1] - net[net[node]['connection']['output'][0]]['params']['pad_d'] += \ - net[node]['params']['pad'][2] - zeroPad.append(node) - # Switching connection order to handle visualization - elif (net[node]['info']['type'] == 'Eltwise'): - net[node]['connection']['input'] = net[node]['connection']['input'][::-1] - for node in zeroPad: - net.pop(node, None) - return JsonResponse({'result': 'success', 'net': net, 'net_name': model.name}) -import numpy as np - -from keras.layers import Dense, Activation, Dropout, Flatten, Reshape, Permute, RepeatVector -from keras.layers import ActivityRegularization, Masking -from keras.layers import Conv1D, Conv2D, Conv3D, Conv2DTranspose, SeparableConv2D -from keras.layers import UpSampling1D, UpSampling2D, UpSampling3D -from keras.layers import MaxPooling1D, MaxPooling2D, MaxPooling3D -from keras.layers import AveragePooling1D, AveragePooling2D, AveragePooling3D -from keras.layers import ZeroPadding1D, ZeroPadding2D, ZeroPadding3D -from keras.layers import LocallyConnected1D, LocallyConnected2D -from keras.layers import SimpleRNN, LSTM, GRU -from keras.layers import Embedding -from keras.layers import add, multiply, maximum, concatenate, average, dot -from keras.layers.advanced_activations import LeakyReLU, PReLU, ELU, ThresholdedReLU -from keras.layers import BatchNormalization -from keras.layers import GaussianNoise, GaussianDropout, AlphaDropout -from keras.layers import Input -from keras.layers import TimeDistributed, Bidirectional -from keras import regularizers -from ..custom_layers.lrn import LRN - -fillerMap = { - 'constant': 'Constant', - 'uniform': 'RandomUniform', - 'gaussian': 'RandomNormal', - 'xavier': 'glorot_normal', - 'msra': 'he_normal' -} - -regularizerMap = { - 'l1': regularizers.l1(), - 'l2': regularizers.l2(), - 'l1_l2': regularizers.l1_l2(), - 'L1L2': regularizers.l1_l2(), - 'None': None -} - -constraintMap = { - 'max_norm': 'max_norm', - 'non_neg': 'non_neg', - 'unit_norm': 'unit_norm', - 'MaxNorm': 'max_norm', - 'NonNeg': 'non_neg', - 'UnitNorm': 'unit_norm', - 'None': None -} - - -# ********** Data Layers ********** -def data(layer, layer_in, layerId): - out = {layerId: Input(layer['shape']['output'] - [1:] + layer['shape']['output'][:1])} - return out - - -# ********** Core Layers ********** -def dense(layer, layer_in, layerId, tensor=True): - out = {} - if (len(layer['shape']['input']) > 1): - out[layerId + 'Flatten'] = Flatten()(*layer_in) - layer_in = [out[layerId + 'Flatten']] - units = layer['params']['num_output'] - if (layer['params']['weight_filler'] in fillerMap): - kernel_initializer = fillerMap[layer['params']['weight_filler']] - else: - kernel_initializer = layer['params']['weight_filler'] - if (layer['params']['bias_filler'] in fillerMap): - bias_initializer = fillerMap[layer['params']['bias_filler']] - else: - bias_initializer = layer['params']['bias_filler'] - # safety checks to avoid runtime errors - kernel_regularizer = None - bias_regularizer = None - activity_regularizer = None - kernel_constraint = None - bias_constraint = None - if 'kernel_regularizer' in layer['params']: - kernel_regularizer = regularizerMap[layer['params'] - ['kernel_regularizer']] - if 'bias_regularizer' in layer['params']: - bias_regularizer = regularizerMap[layer['params']['bias_regularizer']] - if 'activity_regularizer' in layer['params']: - activity_regularizer = regularizerMap[layer['params'] - ['activity_regularizer']] - if 'kernel_constraint' in layer['params']: - kernel_constraint = constraintMap[layer['params']['kernel_constraint']] - if 'bias_constraint' in layer['params']: - bias_constraint = constraintMap[layer['params']['bias_constraint']] - use_bias = layer['params']['use_bias'] - out[layerId] = Dense(units=units, kernel_initializer=kernel_initializer, - kernel_regularizer=kernel_regularizer, bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, bias_constraint=bias_constraint, - kernel_constraint=kernel_constraint, use_bias=use_bias, - bias_initializer=bias_initializer) - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def activation(layer, layer_in, layerId, tensor=True): - out = {} - if (layer['info']['type'] == 'ReLU'): - if ('negative_slope' in layer['params'] and layer['params']['negative_slope'] != 0): - out[layerId] = LeakyReLU(alpha=layer['params']['negative_slope']) - else: - out[layerId] = Activation('relu') - elif (layer['info']['type'] == 'PReLU'): - out[layerId] = PReLU() - elif (layer['info']['type'] == 'ELU'): - out[layerId] = ELU(alpha=layer['params']['alpha']) - elif (layer['info']['type'] == 'ThresholdedReLU'): - out[layerId] = ThresholdedReLU(theta=layer['params']['theta']) - elif (layer['info']['type'] == 'Sigmoid'): - out[layerId] = Activation('sigmoid') - elif (layer['info']['type'] == 'TanH'): - out[layerId] = Activation('tanh') - elif (layer['info']['type'] == 'Softmax'): - out[layerId] = Activation('softmax') - elif (layer['info']['type'] == 'SELU'): - out[layerId] = Activation('selu') - elif (layer['info']['type'] == 'Softplus'): - out[layerId] = Activation('softplus') - elif (layer['info']['type'] == 'Softsign'): - out[layerId] = Activation('softsign') - elif (layer['info']['type'] == 'HardSigmoid'): - out[layerId] = Activation('hard_sigmoid') - elif (layer['info']['type'] == 'Linear'): - out[layerId] = Activation('linear') - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def dropout(layer, layer_in, layerId, tensor=True): - out = {layerId: Dropout(0.5)} - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def flatten(layer, layer_in, layerId, tensor=True): - out = {layerId: Flatten()} - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def reshape(layer, layer_in, layerId, tensor=True): - shape = map(int, layer['params']['dim'].split(',')) - out = {layerId: Reshape(shape[2:] + shape[1:2])} - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def permute(layer, layer_in, layerId, tensor=True): - out = {layerId: Permute(map(int, layer['params']['dim'].split(',')))} - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def repeat_vector(layer, layer_in, layerId, tensor=True): - out = {layerId: RepeatVector(layer['params']['n'])} - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def regularization(layer, layer_in, layerId, tensor=True): - l1 = layer['params']['l1'] - l2 = layer['params']['l2'] - out = {layerId: ActivityRegularization(l1=l1, l2=l2)} - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def masking(layer, layer_in, layerId, tensor=True): - out = {layerId: Masking(mask_value=layer['params']['mask_value'])} - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -# ********** Convolution Layers ********** -def convolution(layer, layer_in, layerId, tensor=True): - convMap = { - '1D': Conv1D, - '2D': Conv2D, - '3D': Conv3D - } - out = {} - padding = get_padding(layer) - if (layer['params']['weight_filler'] in fillerMap): - kernel_initializer = fillerMap[layer['params']['weight_filler']] - else: - kernel_initializer = layer['params']['weight_filler'] - if (layer['params']['bias_filler'] in fillerMap): - bias_initializer = fillerMap[layer['params']['bias_filler']] - else: - bias_initializer = layer['params']['bias_filler'] - # safety checks to avoid runtime errors - filters = layer['params']['num_output'] - kernel_regularizer = None - bias_regularizer = None - activity_regularizer = None - kernel_constraint = None - bias_constraint = None - if 'kernel_regularizer' in layer['params']: - kernel_regularizer = regularizerMap[layer['params'] - ['kernel_regularizer']] - if 'bias_regularizer' in layer['params']: - bias_regularizer = regularizerMap[layer['params']['bias_regularizer']] - if 'activity_regularizer' in layer['params']: - activity_regularizer = regularizerMap[layer['params'] - ['activity_regularizer']] - if 'kernel_constraint' in layer['params']: - kernel_constraint = constraintMap[layer['params']['kernel_constraint']] - if 'bias_constraint' in layer['params']: - bias_constraint = constraintMap[layer['params']['bias_constraint']] - use_bias = layer['params']['use_bias'] - layer_type = layer['params']['layer_type'] - if (layer_type == '1D'): - strides = layer['params']['stride_w'] - kernel = layer['params']['kernel_w'] - dilation_rate = layer['params']['dilation_w'] - if (padding == 'custom'): - p_w = layer['params']['pad_w'] - out[layerId + 'Pad'] = ZeroPadding1D(padding=p_w)(*layer_in) - padding = 'valid' - layer_in = [out[layerId + 'Pad']] - elif (layer_type == '2D'): - strides = (layer['params']['stride_h'], layer['params']['stride_w']) - kernel = (layer['params']['kernel_h'], layer['params']['kernel_w']) - dilation_rate = (layer['params']['dilation_h'], - layer['params']['dilation_w']) - if (padding == 'custom'): - p_h, p_w = layer['params']['pad_h'], layer['params']['pad_w'] - out[layerId + 'Pad'] = ZeroPadding2D(padding=(p_h, p_w))(*layer_in) - padding = 'valid' - layer_in = [out[layerId + 'Pad']] - else: - strides = (layer['params']['stride_h'], layer['params']['stride_w'], - layer['params']['stride_d']) - kernel = (layer['params']['kernel_h'], layer['params']['kernel_w'], - layer['params']['kernel_d']) - dilation_rate = (layer['params']['dilation_h'], layer['params']['dilation_w'], - layer['params']['dilation_d']) - if (padding == 'custom'): - p_h, p_w, p_d = layer['params']['pad_h'], layer['params']['pad_w'],\ - layer['params']['pad_d'] - out[layerId + - 'Pad'] = ZeroPadding3D(padding=(p_h, p_w, p_d))(*layer_in) - padding = 'valid' - layer_in = [out[layerId + 'Pad']] - out[layerId] = convMap[layer_type](filters, kernel, strides=strides, padding=padding, - dilation_rate=dilation_rate, - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, use_bias=use_bias, - bias_constraint=bias_constraint, - kernel_constraint=kernel_constraint) - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -# Separable Convolution is currently not supported with Theano backend - -def depthwiseConv(layer, layer_in, layerId, tensor=True): - out = {} - padding = get_padding(layer) - filters = layer['params']['num_output'] - k_h, k_w = layer['params']['kernel_h'], layer['params']['kernel_w'] - s_h, s_w = layer['params']['stride_h'], layer['params']['stride_w'] - depth_multiplier = layer['params']['depth_multiplier'] - use_bias = layer['params']['use_bias'] - depthwise_initializer = layer['params']['depthwise_initializer'] - pointwise_initializer = layer['params']['pointwise_initializer'] - bias_initializer = layer['params']['bias_initializer'] - if (padding == 'custom'): - p_h, p_w = layer['params']['pad_h'], layer['params']['pad_w'] - out[layerId + 'Pad'] = ZeroPadding2D(padding=(p_h, p_w))(*layer_in) - padding = 'valid' - layer_in = [out[layerId + 'Pad']] - depthwise_regularizer = regularizerMap[layer['params'] - ['depthwise_regularizer']] - pointwise_regularizer = regularizerMap[layer['params'] - ['pointwise_regularizer']] - bias_regularizer = regularizerMap[layer['params']['bias_regularizer']] - activity_regularizer = regularizerMap[layer['params'] - ['activity_regularizer']] - depthwise_constraint = constraintMap[layer['params'] - ['depthwise_constraint']] - pointwise_constraint = constraintMap[layer['params'] - ['pointwise_constraint']] - bias_constraint = constraintMap[layer['params']['bias_constraint']] - out[layerId] = SeparableConv2D(filters, [k_h, k_w], strides=(s_h, s_w), padding=padding, - depth_multiplier=depth_multiplier, use_bias=use_bias, - depthwise_initializer=depthwise_initializer, - pointwise_initializer=pointwise_initializer, - bias_initializer=bias_initializer, - depthwise_regularizer=depthwise_regularizer, - pointwise_regularizer=pointwise_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - depthwise_constraint=depthwise_constraint, - pointwise_constraint=pointwise_constraint, - bias_constraint=bias_constraint,)(*layer_in) - return out - - -def deconvolution(layer, layer_in, layerId, tensor=True): - out = {} - padding = get_padding(layer) - k_h, k_w = layer['params']['kernel_h'], layer['params']['kernel_w'] - s_h, s_w = layer['params']['stride_h'], layer['params']['stride_w'] - d_h, d_w = layer['params']['dilation_h'], layer['params']['dilation_w'] - if (layer['params']['weight_filler'] in fillerMap): - kernel_initializer = fillerMap[layer['params']['weight_filler']] - else: - kernel_initializer = layer['params']['weight_filler'] - if (layer['params']['bias_filler'] in fillerMap): - bias_initializer = fillerMap[layer['params']['bias_filler']] - else: - bias_initializer = layer['params']['bias_filler'] - filters = layer['params']['num_output'] - if (padding == 'custom'): - p_h, p_w = layer['params']['pad_h'], layer['params']['pad_w'] - out[layerId + 'Pad'] = ZeroPadding2D(padding=(p_h, p_w))(*layer_in) - padding = 'valid' - layer_in = [out[layerId + 'Pad']] - kernel_regularizer = regularizerMap[layer['params']['kernel_regularizer']] - bias_regularizer = regularizerMap[layer['params']['bias_regularizer']] - activity_regularizer = regularizerMap[layer['params'] - ['activity_regularizer']] - kernel_constraint = constraintMap[layer['params']['kernel_constraint']] - bias_constraint = constraintMap[layer['params']['bias_constraint']] - use_bias = layer['params']['use_bias'] - out[layerId] = Conv2DTranspose(filters, [k_h, k_w], strides=(s_h, s_w), padding=padding, - dilation_rate=( - d_h, d_w), kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, use_bias=use_bias, - bias_constraint=bias_constraint, - kernel_constraint=kernel_constraint) - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def upsample(layer, layer_in, layerId, tensor=True): - upsampleMap = { - '1D': UpSampling1D, - '2D': UpSampling2D, - '3D': UpSampling3D - } - out = {} - layer_type = layer['params']['layer_type'] - if (layer_type == '1D'): - size = layer['params']['size_w'] - elif (layer_type == '2D'): - size = (layer['params']['size_h'], layer['params']['size_w']) - else: - size = (layer['params']['size_h'], layer['params']['size_w'], - layer['params']['size_d']) - out[layerId] = upsampleMap[layer_type](size=size) - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -# ********** Pooling Layers ********** -def pooling(layer, layer_in, layerId, tensor=True): - poolMap = { - ('1D', 'MAX'): MaxPooling1D, - ('2D', 'MAX'): MaxPooling2D, - ('3D', 'MAX'): MaxPooling3D, - ('1D', 'AVE'): AveragePooling1D, - ('2D', 'AVE'): AveragePooling2D, - ('3D', 'AVE'): AveragePooling3D, - } - out = {} - layer_type = layer['params']['layer_type'] - pool_type = layer['params']['pool'] - padding = get_padding(layer) - if (layer_type == '1D'): - strides = layer['params']['stride_w'] - kernel = layer['params']['kernel_w'] - if (padding == 'custom'): - p_w = layer['params']['pad_w'] - out[layerId + 'Pad'] = ZeroPadding1D(padding=p_w)(*layer_in) - padding = 'valid' - layer_in = [out[layerId + 'Pad']] - elif (layer_type == '2D'): - strides = (layer['params']['stride_h'], layer['params']['stride_w']) - kernel = (layer['params']['kernel_h'], layer['params']['kernel_w']) - if (padding == 'custom'): - p_h, p_w = layer['params']['pad_h'], layer['params']['pad_w'] - out[layerId + 'Pad'] = ZeroPadding2D(padding=(p_h, p_w))(*layer_in) - padding = 'valid' - layer_in = [out[layerId + 'Pad']] - else: - strides = (layer['params']['stride_h'], layer['params']['stride_w'], - layer['params']['stride_d']) - kernel = (layer['params']['kernel_h'], layer['params']['kernel_w'], - layer['params']['kernel_d']) - if (padding == 'custom'): - p_h, p_w, p_d = layer['params']['pad_h'], layer['params']['pad_w'],\ - layer['params']['pad_d'] - out[layerId + - 'Pad'] = ZeroPadding3D(padding=(p_h, p_w, p_d))(*layer_in) - padding = 'valid' - layer_in = [out[layerId + 'Pad']] - # Note - figure out a permanent fix for padding calculation of layers - # in case padding is given in layer attributes - # if ('padding' in layer['params']): - # padding = layer['params']['padding'] - out[layerId] = poolMap[(layer_type, pool_type)]( - pool_size=kernel, strides=strides, padding=padding) - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -# ********** Locally-connected Layers ********** -def locally_connected(layer, layer_in, layerId, tensor=True): - localMap = { - '1D': LocallyConnected1D, - '2D': LocallyConnected2D, - } - out = {} - kernel_initializer = layer['params']['kernel_initializer'] - bias_initializer = layer['params']['bias_initializer'] - filters = layer['params']['filters'] - kernel_regularizer = regularizerMap[layer['params']['kernel_regularizer']] - bias_regularizer = regularizerMap[layer['params']['bias_regularizer']] - activity_regularizer = regularizerMap[layer['params'] - ['activity_regularizer']] - kernel_constraint = constraintMap[layer['params']['kernel_constraint']] - bias_constraint = constraintMap[layer['params']['bias_constraint']] - use_bias = layer['params']['use_bias'] - layer_type = layer['params']['layer_type'] - if (layer_type == '1D'): - strides = layer['params']['stride_w'] - kernel = layer['params']['kernel_w'] - else: - strides = (layer['params']['stride_h'], layer['params']['stride_w']) - kernel = (layer['params']['kernel_h'], layer['params']['kernel_w']) - out[layerId] = localMap[layer_type](filters, kernel, strides=strides, padding='valid', - kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - kernel_regularizer=kernel_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, use_bias=use_bias, - bias_constraint=bias_constraint, - kernel_constraint=kernel_constraint) - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -# ********** Recurrent Layers ********** -def recurrent(layer, layer_in, layerId, tensor=True): - out = {} - units = layer['params']['num_output'] - if (layer['params']['weight_filler'] in fillerMap): - kernel_initializer = fillerMap[layer['params']['weight_filler']] - else: - kernel_initializer = layer['params']['weight_filler'] - if (layer['params']['bias_filler'] in fillerMap): - bias_initializer = fillerMap[layer['params']['bias_filler']] - else: - bias_initializer = layer['params']['bias_filler'] - recurrent_initializer = layer['params']['recurrent_initializer'] - kernel_regularizer = regularizerMap[layer['params']['kernel_regularizer']] - recurrent_regularizer = regularizerMap[layer['params'] - ['recurrent_regularizer']] - bias_regularizer = regularizerMap[layer['params']['bias_regularizer']] - activity_regularizer = regularizerMap[layer['params'] - ['activity_regularizer']] - kernel_constraint = constraintMap[layer['params']['kernel_constraint']] - recurrent_constraint = constraintMap[layer['params'] - ['recurrent_constraint']] - bias_constraint = constraintMap[layer['params']['bias_constraint']] - use_bias = layer['params']['use_bias'] - dropout = layer['params']['dropout'] - recurrent_dropout = layer['params']['recurrent_dropout'] - if ('return_sequences' in layer['params']): - return_sequences = layer['params']['return_sequences'] - else: - return_sequences = False - if (layer['info']['type'] == 'GRU'): - recurrent_activation = layer['params']['recurrent_activation'] - out[layerId] = GRU(units, kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - recurrent_activation=recurrent_activation, - recurrent_initializer=recurrent_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, use_bias=use_bias, dropout=dropout, - recurrent_dropout=recurrent_dropout) - elif (layer['info']['type'] == 'LSTM'): - recurrent_activation = layer['params']['recurrent_activation'] - unit_forget_bias = layer['params']['unit_forget_bias'] - out[layerId] = LSTM(units, kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - recurrent_activation=recurrent_activation, unit_forget_bias=unit_forget_bias, - recurrent_initializer=recurrent_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, use_bias=use_bias, dropout=dropout, - recurrent_dropout=recurrent_dropout, return_sequences=return_sequences) - else: - out[layerId] = SimpleRNN(units, kernel_initializer=kernel_initializer, - bias_initializer=bias_initializer, - recurrent_initializer=recurrent_initializer, - kernel_regularizer=kernel_regularizer, - recurrent_regularizer=recurrent_regularizer, - bias_regularizer=bias_regularizer, - activity_regularizer=activity_regularizer, - kernel_constraint=kernel_constraint, - recurrent_constraint=recurrent_constraint, - bias_constraint=bias_constraint, - use_bias=use_bias, dropout=dropout, - recurrent_dropout=recurrent_dropout) - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -# ********** Embedding Layers ********** -def embed(layer, layer_in, layerId, tensor=True): - out = {} - if (layer['params']['weight_filler'] in fillerMap): - embeddings_initializer = fillerMap[layer['params']['weight_filler']] - else: - embeddings_initializer = layer['params']['weight_filler'] - embeddings_regularizer = regularizerMap[layer['params'] - ['embeddings_regularizer']] - embeddings_constraint = constraintMap[layer['params'] - ['embeddings_constraint']] - mask_zero = layer['params']['mask_zero'] - if (layer['params']['input_length']): - input_length = layer['params']['input_length'] - else: - input_length = None - out[layerId] = Embedding(layer['params']['input_dim'], layer['params']['num_output'], - embeddings_initializer=embeddings_initializer, - embeddings_regularizer=embeddings_regularizer, - embeddings_constraint=embeddings_constraint, - mask_zero=mask_zero, input_length=input_length) - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -# ********** Merge Layers ********** -def eltwise(layer, layer_in, layerId): - out = {} - if (layer['params']['layer_type'] == 'Multiply'): - # This input reverse is to handle visualization - out[layerId] = multiply(layer_in[::-1]) - elif (layer['params']['layer_type'] == 'Sum'): - out[layerId] = add(layer_in[::-1]) - elif (layer['params']['layer_type'] == 'Average'): - out[layerId] = average(layer_in[::-1]) - elif (layer['params']['layer_type'] == 'Dot'): - out[layerId] = dot(layer_in[::-1], -1) - else: - out[layerId] = maximum(layer_in[::-1]) - return out - - -def concat(layer, layer_in, layerId): - out = {layerId: concatenate(layer_in)} - return out - - -# ********** Noise Layers ********** -def gaussian_noise(layer, layer_in, layerId, tensor=True): - stddev = layer['params']['stddev'] - out = {layerId: GaussianNoise(stddev=stddev)} - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def gaussian_dropout(layer, layer_in, layerId, tensor=True): - rate = layer['params']['rate'] - out = {layerId: GaussianDropout(rate=rate)} - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -def alpha_dropout(layer, layer_in, layerId, tensor=True): - rate = layer['params']['rate'] - seed = layer['params']['seed'] - out = {layerId: AlphaDropout(rate=rate, seed=seed)} - if tensor: - out[layerId] = out[layerId](*layer_in) - return out - - -# ********** Normalisation Layers ********** -def batch_norm(layer, layer_in, layerId, idNext, nextLayer): - out = {} - momentum = layer['params']['moving_average_fraction'] - eps = float(layer['params']['eps']) - if (eps <= 1e-5): - eps = 1e-4 # In Keras the smallest epsilon allowed is 1e-5 - moving_mean_initializer = layer['params']['moving_mean_initializer'] - moving_variance_initializer = layer['params']['moving_variance_initializer'] - if (nextLayer['info']['type'] == 'Scale'): - axis = nextLayer['params']['axis'] - # In Caffe the first dimension has number of filters/outputs but in Keras it is the last - # dimension - if (axis == 1): - axis = -1 - center = nextLayer['params']['bias_term'] - scale = nextLayer['params']['scale'] - if (nextLayer['params']['filler'] in fillerMap): - gamma_initializer = fillerMap[nextLayer['params']['filler']] - else: - gamma_initializer = nextLayer['params']['filler'] - if (nextLayer['params']['bias_filler'] in fillerMap): - beta_initializer = fillerMap[nextLayer['params']['bias_filler']] - else: - beta_initializer = nextLayer['params']['bias_filler'] - gamma_regularizer = regularizerMap[nextLayer['params'] - ['gamma_regularizer']] - beta_regularizer = regularizerMap[nextLayer['params'] - ['beta_regularizer']] - gamma_constraint = constraintMap[nextLayer['params'] - ['gamma_constraint']] - beta_constraint = constraintMap[nextLayer['params']['beta_constraint']] - out[idNext] = BatchNormalization(axis=axis, momentum=momentum, epsilon=eps, - moving_mean_initializer=moving_mean_initializer, - moving_variance_initializer=moving_variance_initializer, - center=center, scale=scale, - gamma_initializer=gamma_initializer, - beta_initializer=beta_initializer, - gamma_regularizer=gamma_regularizer, - beta_regularizer=beta_regularizer, - gamma_constraint=gamma_constraint, - beta_constraint=beta_constraint)(*layer_in) - else: - out[layerId] = BatchNormalization(momentum=momentum, epsilon=eps, - moving_mean_initializer=moving_mean_initializer, - moving_variance_initializer=moving_variance_initializer, - scale=False, center=False)(*layer_in) - return out - - -def bidirectional(layerId, idNext, net, layer_in, layer_map): - out = {} - if net[layerId]['params']['merge_mode'] == '': - net[layerId]['params']['merge_mode'] = None - mode = net[layerId]['params']['merge_mode'] - out[layerId] = Bidirectional( - layer_map[net[idNext]['info']['type']]( - net[idNext], layer_in, idNext, False)[idNext], - merge_mode=mode)(*layer_in) - return out - - -def time_distributed(layerId, idNext, net, layer_in, layer_map): - out = {} - out[layerId] = TimeDistributed( - layer_map[net[idNext]['info']['type']](net[idNext], layer_in, idNext, False)[idNext])(*layer_in) - - -# Custom LRN for Tensorflow export and Keras export -def lrn(layer, layer_in, layerId): - alpha = layer['params']['alpha'] - beta = layer['params']['beta'] - k = layer['params']['beta'] - n = layer['params']['local_size'] - out = {} - out[layerId] = LRN(alpha=alpha, beta=beta, k=k, n=n)(*layer_in) - return out - - -# logic as used in caffe-tensorflow -# https://github.com/ethereon/caffe-tensorflow/blob/master/kaffe/tensorflow/transformer.py -def get_padding(layer): - if (layer['info']['type'] in ['Deconvolution', 'DepthwiseConv']): - _, i_h, i_w = layer['shape']['output'] - _, o_h, o_w = layer['shape']['input'] - k_h, k_w = layer['params']['kernel_h'], layer['params']['kernel_w'] - s_h, s_w = layer['params']['stride_h'], layer['params']['stride_w'] - layer['params']['layer_type'] = '2D' - else: - if (layer['params']['layer_type'] == '1D'): - i_w = layer['shape']['input'][0] - o_w = layer['shape']['output'][1] - k_w = layer['params']['kernel_w'] - s_w = layer['params']['stride_w'] - - elif (layer['params']['layer_type'] == '2D'): - _, i_h, i_w = layer['shape']['input'] - _, o_h, o_w = layer['shape']['output'] - k_h, k_w = layer['params']['kernel_h'], layer['params']['kernel_w'] - s_h, s_w = layer['params']['stride_h'], layer['params']['stride_w'] - else: - _, i_h, i_w, i_d = layer['shape']['input'] - _, o_h, o_w, o_d = layer['shape']['output'] - k_h, k_w, k_d = layer['params']['kernel_h'], layer['params']['kernel_w'],\ - layer['params']['kernel_d'] - s_h, s_w, s_d = layer['params']['stride_h'], layer['params']['stride_w'],\ - layer['params']['stride_d'] - if (layer['params']['layer_type'] == '1D'): - s_o_w = np.ceil(i_w / float(s_w)) - if (o_w == s_o_w): - return 'same' - v_o_w = np.ceil((i_w - k_w + 1.0) / float(s_w)) - if (o_w == v_o_w): - return 'valid' - return 'custom' - elif (layer['params']['layer_type'] == '2D'): - s_o_h = np.ceil(i_h / float(s_h)) - s_o_w = np.ceil(i_w / float(s_w)) - if (o_h == s_o_h) and (o_w == s_o_w): - return 'same' - v_o_h = np.ceil((i_h - k_h + 1.0) / float(s_h)) - v_o_w = np.ceil((i_w - k_w + 1.0) / float(s_w)) - if (o_h == v_o_h) and (o_w == v_o_w): - return 'valid' - return 'custom' - else: - s_o_h = np.ceil(i_h / float(s_h)) - s_o_w = np.ceil(i_w / float(s_w)) - s_o_d = np.ceil(i_d / float(s_d)) - if (o_h == s_o_h) and (o_w == s_o_w) and (o_d == s_o_d): - return 'same' - v_o_h = np.ceil((i_h - k_h + 1.0) / float(s_h)) - v_o_w = np.ceil((i_w - k_w + 1.0) / float(s_w)) - v_o_d = np.ceil((i_d - k_d + 1.0) / float(s_d)) - if (o_h == v_o_h) and (o_w == v_o_w) and (o_d == v_o_d): - return 'valid' - return 'custom' -import numpy as np - - -# ********** Data Layers ********** -def Input(layer): - params = {} - shape = layer.batch_input_shape - if (len(shape) == 2): - params['dim'] = str([1, shape[1]])[1:-1] - else: - params['dim'] = str([1, shape[-1]] + list(shape[1:-1]))[1:-1] - return jsonLayer('Input', params, layer) - - -# ********** Core Layers ********** -def Dense(layer): - params = {} - params['weight_filler'] = layer.kernel_initializer.__class__.__name__ - params['bias_filler'] = layer.bias_initializer.__class__.__name__ - params['num_output'] = layer.units - if (layer.kernel_regularizer): - params['kernel_regularizer'] = layer.kernel_regularizer.__class__.__name__ - if (layer.bias_regularizer): - params['bias_regularizer'] = layer.bias_regularizer.__class__.__name__ - if (layer.activity_regularizer): - params['activity_regularizer'] = layer.activity_regularizer.__class__.__name__ - if (layer.kernel_constraint): - params['kernel_constraint'] = layer.kernel_constraint.__class__.__name__ - if (layer.bias_constraint): - params['bias_constraint'] = layer.bias_constraint.__class__.__name__ - params['use_bias'] = layer.use_bias - return jsonLayer('InnerProduct', params, layer) - - -def Activation(layer): - activationMap = { - 'softmax': 'Softmax', - 'relu': 'ReLU', - 'tanh': 'TanH', - 'sigmoid': 'Sigmoid', - 'selu': 'SELU', - 'softplus': 'Softplus', - 'softsign': 'Softsign', - 'hard_sigmoid': 'HardSigmoid', - 'linear': 'Linear' - } - if (layer.__class__.__name__ == 'Activation'): - return jsonLayer(activationMap[layer.activation.func_name], {}, layer) - else: - tempLayer = {} - tempLayer['inbound_nodes'] = [ - [[layer.name + layer.__class__.__name__]]] - return jsonLayer(activationMap[layer.activation.func_name], {}, tempLayer) - - -def Dropout(layer): - params = {} - if (layer.rate is not None): - params['rate'] = layer.rate - if (layer.seed is not None): - params['seed'] = layer.seed - if (layer.trainable is not None): - params['trainable'] = layer.trainable - return jsonLayer('Dropout', params, layer) - - -def Flatten(layer): - return jsonLayer('Flatten', {}, layer) - - -def Reshape(layer): - params = {} - shape = layer.target_shape - params['dim'] = str([1] + list(shape))[1:-1] - return jsonLayer('Reshape', params, layer) - - -def Permute(layer): - params = {} - params['dim'] = str(layer.dims)[1:-1] - return jsonLayer('Permute', params, layer) - - -def RepeatVector(layer): - params = {} - params['n'] = layer.n - return jsonLayer('RepeatVector', params, layer) - - -def ActivityRegularization(layer): - params = {} - params['l1'] = layer.l1 - params['l2'] = layer.l2 - return jsonLayer('Regularization', params, layer) - - -def Masking(layer): - params = {} - params['mask_value'] = layer.mask_value - return jsonLayer('Masking', params, layer) - - -# ********** Convolutional Layers ********** -def Convolution(layer): - params = {} - if (layer.__class__.__name__ == 'Conv1D'): - params['layer_type'] = '1D' - params['kernel_w'] = layer.kernel_size[0] - params['stride_w'] = layer.strides[0] - params['dilation_w'] = layer.dilation_rate[0] - params['pad_w'] = get_padding([params['kernel_w'], -1, -1, - params['stride_w'], -1, -1], - layer.input_shape, layer.output_shape, - layer.padding.lower(), '1D') - elif (layer.__class__.__name__ == 'Conv2D'): - params['layer_type'] = '2D' - params['kernel_h'], params['kernel_w'] = layer.kernel_size - params['stride_h'], params['stride_w'] = layer.strides - params['dilation_h'], params['dilation_w'] = layer.dilation_rate - params['pad_h'], params['pad_w'] = get_padding([params['kernel_w'], params['kernel_h'], -1, - params['stride_w'], params['stride_h'], -1], - layer.input_shape, layer.output_shape, - layer.padding.lower(), '2D') - else: - params['layer_type'] = '3D' - params['kernel_h'], params['kernel_w'], params['kernel_d'] = layer.kernel_size - params['stride_h'], params['stride_w'], params['stride_d'] = layer.strides - params['dilation_h'], params['dilation_w'], params['dilation_d'] = layer.dilation_rate - params['pad_h'], params['pad_w'], params['pad_d'] = get_padding([params['kernel_w'], - params['kernel_h'], - params['kernel_d'], - params['stride_w'], - params['stride_h'], - params['stride_d']], - layer.input_shape, - layer.output_shape, - layer.padding.lower(), '3D') - params['weight_filler'] = layer.kernel_initializer.__class__.__name__ - params['bias_filler'] = layer.bias_initializer.__class__.__name__ - params['num_output'] = layer.filters - if (layer.kernel_regularizer): - params['kernel_regularizer'] = layer.kernel_regularizer.__class__.__name__ - if (layer.bias_regularizer): - params['bias_regularizer'] = layer.bias_regularizer.__class__.__name__ - if (layer.activity_regularizer): - params['activity_regularizer'] = layer.activity_regularizer.__class__.__name__ - if (layer.kernel_constraint): - params['kernel_constraint'] = layer.kernel_constraint.__class__.__name__ - if (layer.bias_constraint): - params['bias_constraint'] = layer.bias_constraint.__class__.__name__ - params['use_bias'] = layer.use_bias - return jsonLayer('Convolution', params, layer) - - -# Separable Convolution is currently not supported with Theano backend - -def DepthwiseConv(layer): - params = {} - params['num_output'] = layer.filters - params['kernel_h'], params['kernel_w'] = layer.kernel_size - params['stride_h'], params['stride_w'] = layer.strides - params['pad_h'], params['pad_w'] = get_padding([params['kernel_w'], params['kernel_h'], -1, - params['stride_w'], params['stride_h'], -1], - layer.input_shape, layer.output_shape, - layer.padding.lower(), '2D') - params['depth_multiplier'] = layer.depth_multiplier - params['use_bias'] = layer.use_bias - params['depthwise_initializer'] = layer.depthwise_initializer.__class__.__name__ - params['pointwise_initializer'] = layer.pointwise_initializer.__class__.__name__ - params['bias_initializer'] = layer.bias_initializer.__class__.__name__ - if (layer.depthwise_regularizer): - params['depthwise_regularizer'] = layer.depthwise_regularizer.__class__.__name__ - if (layer.pointwise_regularizer): - params['pointwise_regularizer'] = layer.pointwise_regularizer.__class__.__name__ - if (layer.bias_regularizer): - params['bias_regularizer'] = layer.bias_regularizer.__class__.__name__ - if (layer.activity_regularizer): - params['activity_regularizer'] = layer.activity_regularizer.__class__.__name__ - if (layer.depthwise_constraint): - params['depthwise_constraint'] = layer.depthwise_constraint.__class__.__name__ - if (layer.pointwise_constraint): - params['pointwise_constraint'] = layer.pointwise_constraint.__class__.__name__ - if (layer.bias_constraint): - params['bias_constraint'] = layer.bias_constraint.__class__.__name__ - return jsonLayer('DepthwiseConv', params, layer) - - -def Deconvolution(layer): - params = {} - params['kernel_h'], params['kernel_w'] = layer.kernel_size - params['stride_h'], params['stride_w'] = layer.strides - params['dilation_h'], params['dilation_w'] = layer.dilation_rate - params['pad_h'], params['pad_w'] = get_padding([params['kernel_w'], params['kernel_h'], -1, - params['stride_w'], params['stride_h'], -1], - layer.input_shape, layer.output_shape, - layer.padding.lower(), '2D') - params['padding'] = layer.padding.upper() - params['weight_filler'] = layer.kernel_initializer.__class__.__name__ - params['bias_filler'] = layer.bias_initializer.__class__.__name__ - params['num_output'] = layer.filters - if (layer.kernel_regularizer): - params['kernel_regularizer'] = layer.kernel_regularizer.__class__.__name__ - if (layer.bias_regularizer): - params['bias_regularizer'] = layer.bias_regularizer.__class__.__name__ - if (layer.activity_regularizer): - params['activity_regularizer'] = layer.activity_regularizer.__class__.__name__ - if (layer.kernel_constraint): - params['kernel_constraint'] = layer.kernel_constraint.__class__.__name__ - if (layer.bias_constraint): - params['bias_constraint'] = layer.bias_constraint.__class__.__name__ - params['use_bias'] = layer.use_bias - return jsonLayer('Deconvolution', params, layer) - - -def Upsample(layer): - params = {} - if (layer.__class__.__name__ == 'UpSampling1D'): - params['size_w'] = layer.size - params['layer_type'] = '1D' - elif (layer.__class__.__name__ == 'UpSampling2D'): - params['size_w'], params['size_h'] = layer.size - params['layer_type'] = '2D' - else: - params['size_w'], params['size_h'], params['size_d'] = layer.size - params['layer_type'] = '3D' - return jsonLayer('Upsample', params, layer) - - -# ********** Pooling Layers ********** -def Pooling(layer): - params = {} - poolMap = { - 'MaxPooling1D': 'MAX', - 'MaxPooling2D': 'MAX', - 'MaxPooling3D': 'MAX', - 'AveragePooling1D': 'AVE', - 'AveragePooling2D': 'AVE', - 'AveragePooling3D': 'AVE', - 'GlobalMaxPooling1D': 'MAX', - 'GlobalMaxPooling2D': 'MAX', - 'GlobalAveragePooling1D': 'AVE', - 'GlobalAveragePooling2D': 'AVE' - } - if (layer.__class__.__name__ in ['GlobalAveragePooling1D', 'GlobalMaxPooling1D']): - input_shape = layer.input_shape - params['kernel_w'] = params['stride_w'] = input_shape[1] - padding = 'valid' - params['layer_type'] = '1D' - params['pad_w'] = get_padding([params['kernel_w'], -1, -1, - params['stride_w'], -1, -1], - layer.input_shape, layer.output_shape, - padding, '1D') - elif (layer.__class__.__name__ in ['GlobalAveragePooling2D', 'GlobalMaxPooling2D']): - input_shape = layer.input_shape - params['kernel_h'] = params['stride_h'] = input_shape[2] - params['kernel_w'] = params['stride_w'] = input_shape[1] - padding = 'valid' - params['layer_type'] = '2D' - params['pad_h'], params['pad_w'] = get_padding([params['kernel_w'], params['kernel_h'], -1, - params['stride_w'], params['stride_h'], -1], - layer.input_shape, layer.output_shape, - padding, '2D') - else: - padding = layer.padding.lower() - if (layer.__class__.__name__ in ['MaxPooling1D', 'AveragePooling1D']): - params['kernel_w'] = layer.pool_size[0] - params['stride_w'] = layer.strides[0] - params['layer_type'] = '1D' - params['pad_w'] = get_padding([params['kernel_w'], -1, -1, - params['stride_w'], -1, -1], - layer.input_shape, layer.output_shape, - padding, '1D') - elif (layer.__class__.__name__ in ['MaxPooling2D', 'AveragePooling2D']): - params['kernel_w'], params['kernel_h'] = layer.pool_size - params['stride_w'], params['stride_h'] = layer.strides - params['layer_type'] = '2D' - params['pad_h'], params['pad_w'] = get_padding([params['kernel_w'], params['kernel_h'], -1, - params['stride_w'], params['stride_h'], -1], - layer.input_shape, layer.output_shape, - padding, '2D') - else: - params['kernel_h'], params['kernel_w'], params['kernel_d'] = layer.pool_size - params['stride_h'], params['stride_w'], params['stride_d'] = layer.strides - params['layer_type'] = '3D' - params['pad_h'], params['pad_w'], params['pad_d'] = get_padding([params['kernel_w'], - params['kernel_h'], - params['kernel_d'], - params['stride_w'], - params['stride_h'], - params['stride_d']], - layer.input_shape, - layer.output_shape, - padding, '3D') - params['pool'] = poolMap[layer.__class__.__name__] - return jsonLayer('Pooling', params, layer) - - -# ********** Locally-connected Layers ********** -def LocallyConnected(layer): - params = {} - if (layer.__class__.__name__ == 'LocallyConnected1D'): - params['layer_type'] = '1D' - params['kernel_w'] = layer.kernel_size[0] - params['stride_w'] = layer.strides[0] - else: - params['layer_type'] = '2D' - params['kernel_h'], params['kernel_w'] = layer.kernel_size - params['stride_h'], params['stride_w'] = layer.strides - params['kernel_initializer'] = layer.kernel_initializer.__class__.__name__ - params['bias_initializer'] = layer.bias_initializer.__class__.__name__ - params['filters'] = layer.filters - if (layer.kernel_regularizer): - params['kernel_regularizer'] = layer.kernel_regularizer.__class__.__name__ - if (layer.bias_regularizer): - params['bias_regularizer'] = layer.bias_regularizer.__class__.__name__ - if (layer.activity_regularizer): - params['activity_regularizer'] = layer.activity_regularizer.__class__.__name__ - if (layer.kernel_constraint): - params['kernel_constraint'] = layer.kernel_constraint.__class__.__name__ - if (layer.bias_constraint): - params['bias_constraint'] = layer.bias_constraint.__class__.__name__ - params['use_bias'] = layer.use_bias - return jsonLayer('LocallyConnected', params, layer) - - -# ********** Recurrent Layers ********** -def Recurrent(layer): - recurrentMap = { - 'SimpleRNN': 'RNN', - 'GRU': 'GRU', - 'LSTM': 'LSTM' - } - params = {} - params['num_output'] = layer.units - params['weight_filler'] = layer.kernel_initializer.__class__.__name__ - params['recurrent_initializer'] = layer.recurrent_initializer.__class__.__name__ - params['bias_filler'] = layer.bias_initializer.__class__.__name__ - if (layer.kernel_regularizer): - params['kernel_regularizer'] = layer.kernel_regularizer.__class__.__name__ - if (layer.recurrent_regularizer): - params['recurrent_regularizer'] = layer.recurrent_regularizer.__class__.__name__ - if (layer.bias_regularizer): - params['bias_regularizer'] = layer.bias_regularizer.__class__.__name__ - if (layer.activity_regularizer): - params['activity_regularizer'] = layer.activity_regularizer.__class__.__name__ - if (layer.kernel_constraint): - params['kernel_constraint'] = layer.kernel_constraint.__class__.__name__ - if (layer.recurrent_constraint): - params['recurrent_constraint'] = layer.recurrent_constraint.__class__.__name__ - if (layer.bias_constraint): - params['bias_constraint'] = layer.bias_constraint.__class__.__name__ - params['use_bias'] = layer.use_bias - params['dropout'] = layer.dropout - params['recurrent_dropout'] = layer.recurrent_dropout - if (layer.__class__.__name__ == 'GRU'): - params['recurrent_activation'] = layer.recurrent_activation.func_name - elif (layer.__class__.__name__ == 'LSTM'): - params['recurrent_activation'] = layer.recurrent_activation.func_name - params['unit_forget_bias'] = layer.unit_forget_bias - params['return_sequences'] = layer.return_sequences - return jsonLayer(recurrentMap[layer.__class__.__name__], params, layer) - - -# ********** Embedding Layers ********** -def Embed(layer): - params = {} - params['input_dim'] = layer.input_dim - params['num_output'] = layer.output_dim - params['weight_filler'] = layer.embeddings_initializer.__class__.__name__ - if (layer.embeddings_regularizer): - params['embeddings_regularizer'] = layer.embeddings_regularizer.__class__.__name__ - if (layer.embeddings_constraint): - params['embeddings_constraint'] = layer.embeddings_constraint.__class__.__name__ - if (layer.input_length): - params['input_length'] = layer.input_length - params['mask_zero'] = layer.mask_zero - return jsonLayer('Embed', params, layer) - - -# ********** Merge Layers ********** -def Concat(layer): - params = {} - params['axis'] = layer.axis - return jsonLayer('Concat', params, layer) - - -def Eltwise(layer): - eltwiseMap = { - 'Add': 'Sum', - 'Multiply': 'Product', - 'Maximum': 'Maximum', - 'Dot': 'Dot', - 'Average': 'Average' - } - params = {'layer_type': eltwiseMap[layer.__class__.__name__]} - return jsonLayer('Eltwise', params, layer) - - -# ********** Advanced Activations Layers ********** -def LeakyReLU(layer): - params = {'negative_slope': layer.alpha.tolist()} - return jsonLayer('ReLU', params, layer) - - -def PReLU(layer): - return jsonLayer('PReLU', {}, layer) - - -def ELU(layer): - params = {'alpha': layer.alpha.tolist()} - return jsonLayer('ELU', params, layer) - - -def ThresholdedReLU(layer): - params = {'theta': layer.theta.tolist()} - return jsonLayer('ThresholdedReLU', params, layer) - - -# ********** Normalisation Layers ********** -def BatchNorm(layer): - params = {} - params['eps'] = layer.epsilon - params['moving_average_fraction'] = layer.momentum - params['moving_mean_initializer'] = layer.moving_mean_initializer.__class__.__name__ - params['moving_variance_initializer'] = layer.moving_variance_initializer.__class__.__name__ - return jsonLayer('BatchNorm', params, layer) - - -# ********** Noise Layers ********** -def GaussianNoise(layer): - params = {} - params['stddev'] = layer.stddev - return jsonLayer('GaussianNoise', params, layer) - - -def GaussianDropout(layer): - params = {} - params['rate'] = layer.rate - return jsonLayer('GaussianDropout', params, layer) - - -def AlphaDropout(layer): - params = {} - params['rate'] = layer.rate - if (layer.seed): - params['seed'] = layer.seed - return jsonLayer('AlphaDropout', params, layer) - - -# ********** Utility Layers ********** -def Scale(layer): - tempLayer = {} - params = {} - params['axis'] = layer.axis - params['bias_term'] = layer.center - params['scale'] = layer.scale - params['filler'] = layer.gamma_initializer.__class__.__name__ - params['bias_filler'] = layer.beta_initializer.__class__.__name__ - if (layer.beta_regularizer): - params['beta_regularizer'] = layer.beta_regularizer.__class__.__name__ - if (layer.gamma_regularizer): - params['gamma_regularizer'] = layer.gamma_regularizer.__class__.__name__ - if (layer.beta_constraint): - params['beta_constraint'] = layer.beta_constraint.__class__.__name__ - if (layer.gamma_constraint): - params['gamma_constraint'] = layer.gamma_constraint.__class__.__name__ - tempLayer['inbound_nodes'] = [[[layer.name + layer.__class__.__name__]]] - return jsonLayer('Scale', params, tempLayer) - - -def Padding(layer): - pad = np.asarray(layer.padding) - if (len(pad.shape) == 1): - pad = [pad[0]] - else: - pad = pad[:, 0].tolist() - params = {'pad': pad} - return jsonLayer('Pad', params, layer) - - -def TimeDistributed(layer): - return jsonLayer('TimeDistributed', {}, layer) - - -def Bidirectional(layer): - params = {} - params['merge_mode'] = layer.merge_mode - return jsonLayer('Bidirectional', params, layer) - - -def lrn(layer): - params = {} - params['k'] = layer.k - params['beta'] = layer.beta - params['alpha'] = layer.alpha - params['local_size'] = layer.n - return jsonLayer('LRN', params, layer) - - -# ********** Helper functions ********** - -# padding logic following -# https://github.com/Yangqing/caffe2/blob/master/caffe2/proto/caffe2_legacy.proto -def get_padding(params, input_shape, output_shape, pad_type, type): - k_w, k_h, k_d, s_w, s_h, s_d = params - if (type == '1D'): - if (pad_type == 'valid'): - return 0 - else: - pad_w = ((output_shape[1] - 1) * s_w + k_w - input_shape[1]) / 2 - return pad_w - elif (type == '2D'): - if (pad_type == 'valid'): - return [0, 0] - else: - pad_h = ((output_shape[2] - 1) * s_h + k_h - input_shape[2]) / 2 - pad_w = ((output_shape[1] - 1) * s_w + k_w - input_shape[1]) / 2 - return (pad_h, pad_w) - else: - if (pad_type == 'valid'): - return [0, 0, 0] - else: - pad_h = ((output_shape[2] - 1) * s_h + k_h - input_shape[2]) / 2 - pad_w = ((output_shape[1] - 1) * s_w + k_w - input_shape[1]) / 2 - pad_d = ((output_shape[3] - 1) * s_d + k_d - input_shape[3]) / 2 - return (pad_h, pad_w, pad_d) - - -def jsonLayer(type, params, layer): - input = [] - if hasattr(layer, 'wrapped'): - input.append(layer.wrapper[0]) - else: - if isinstance(layer, dict): - for node in layer['inbound_nodes'][0]: - input.append(node[0]) - elif (len(layer.inbound_nodes[0].inbound_layers)): - for node in layer.inbound_nodes[0].inbound_layers: - input.append(node.name) - layer = { - 'info': { - 'type': type, - 'phase': None - }, - 'connection': { - 'input': input, - 'output': [] - }, - 'params': params - } - return layer -import os -import string -import random -from django.views.decorators.csrf import csrf_exempt -from django.http import JsonResponse -from keras_app.views.export_json import export_json - -BASE_DIR = os.path.dirname( - os.path.dirname( - os.path.dirname( - os.path.abspath(__file__)))) - - -def randomword(length): - return ''.join(random.choice(string.lowercase) for i in range(length)) - - -@csrf_exempt -def export_to_tensorflow(request): - # Note : Remove the views for export by adding unittest for celery tasks - response = export_json(request, is_tf=True) - if isinstance(response, JsonResponse): - return response - randomId = response['randomId'] - customLayers = response['customLayers'] - os.chdir(BASE_DIR + '/tensorflow_app/views/') - os.system('KERAS_BACKEND=tensorflow python json2pbtxt.py -input_file ' + - randomId + '.json -output_file ' + randomId) - return JsonResponse({'result': 'success', - 'id': randomId, - 'name': randomId + '.pbtxt', - 'url': '/media/' + randomId + '.pbtxt', - 'customLayers': customLayers}) -import tensorflow as tf -from google.protobuf import text_format -from tensorflow.core.framework import graph_pb2 -from django.views.decorators.csrf import csrf_exempt -from django.http import JsonResponse -import urllib2 -from urlparse import urlparse - -from layers_import import import_placeholder, import_conv2d, import_conv3d, import_deconvolution, \ - import_depthwise_convolution, import_pooling2d, import_pooling3d, \ - import_inner_product, import_batchnorm, import_eltwise, import_activation, \ - import_dropout, import_flatten, import_concat, import_lrn - -from layers_import import get_layer_name, get_layer_type, jsonLayer, activation_layers - -layer_map = { - 'Placeholder': import_placeholder, - 'Conv2D': import_conv2d, - 'Conv3D': import_conv3d, - 'MaxPool': import_pooling2d, - 'MaxPool3D': import_pooling3d, - 'AvgPool3D': import_pooling3d, - 'DepthwiseConv2dNative': import_depthwise_convolution, - 'FusedBatchNorm': import_batchnorm, - 'Conv2DBackpropInput': import_deconvolution, - 'LRN': import_lrn, - 'MatMul': import_inner_product, - 'Prod': import_inner_product, - 'Concat': import_concat, - 'AvgPool': import_pooling2d, - 'Reshape': import_flatten -} - -name_map = { - 'flatten': import_flatten, - 'dropout': import_dropout, - 'lrn': import_lrn, - 'concatenate': import_concat, - 'batch': import_batchnorm, - 'BatchNorm': import_batchnorm, - 'add': import_eltwise, - 'mul': import_eltwise -} - - -def get_all_ops_in_layer(layer_name, all_ops): - ops_from_same_layer = [] - for op in all_ops: - if get_layer_name(op.name) == layer_name: - ops_from_same_layer.append(op) - return ops_from_same_layer - - -@csrf_exempt -def import_graph_def(request): - if request.method == 'POST': - if ('file' in request.FILES) and \ - (request.FILES['file'].content_type == 'application/octet-stream' or - request.FILES['file'].content_type == 'text/plain'): - try: - f = request.FILES['file'] - config = f.read() - f.close() - except Exception: - return JsonResponse({'result': 'error', 'error': 'No GraphDef model file found'}) - elif 'config' in request.POST: - config = request.POST['config'] - elif 'url' in request.POST: - try: - url = urlparse(request.POST['url']) - if url.netloc == 'github.com': - url = url._replace(netloc='raw.githubusercontent.com') - url = url._replace(path=url.path.replace('blob/', '')) - config = urllib2.urlopen(url.geturl()).read() - except Exception as ex: - return JsonResponse({'result': 'error', 'error': 'Invalid URL\n'+str(ex)}) - else: - return JsonResponse({'result': 'error', 'error': 'No GraphDef model found'}) - - tf.reset_default_graph() - graph_def = graph_pb2.GraphDef() - - try: - text_format.Merge(config, graph_def) - except Exception: - return JsonResponse({'result': 'error', 'error': 'Invalid GraphDef'}) - - tf.import_graph_def(graph_def, name='') - graph = tf.get_default_graph() - session = tf.Session(graph=graph) - all_ops = graph.get_operations() - - net = {} - processed_layers = [] - layers_with_inplace_relu = {} - - for node in all_ops: - - layer_name = get_layer_name(node.name) - layer_type = get_layer_type(node.name) - - if layer_name in processed_layers: - continue - - if node.type == 'NoOp': - init_op = session.graph.get_operation_by_name(node.name) - session.run(init_op) - continue - - all_ops_in_layer = get_all_ops_in_layer(layer_name, all_ops) - for op in all_ops_in_layer: - if op.type == 'FusedBatchNorm': - net[layer_name] = import_batchnorm(all_ops_in_layer) - processed_layers.append(layer_name) - - if node.type in layer_map: - for i, op in enumerate(all_ops_in_layer): - # if the layer has an inplace relu operation, separate the relu op - # this prevents net[layer_name] from being overwritten by an inplace - # relu layer when the layer might actually contain another important - # layer like a dense layer for example - if op.type == 'Relu': - del(all_ops_in_layer[i]) - relu_layer = jsonLayer('ReLU', {}, [layer_name]) - relu_layer_name = layer_name + '_relu' - net[relu_layer_name] = relu_layer - layers_with_inplace_relu[layer_name] = relu_layer_name - json_layer = layer_map[node.type](all_ops_in_layer) - net[layer_name] = json_layer - processed_layers.append(layer_name) - - elif node.type in activation_layers: - json_layer = import_activation(all_ops_in_layer) - net[layer_name] = json_layer - processed_layers.append(layer_name) - - elif layer_type in name_map: - json_layer = name_map[layer_type](all_ops_in_layer) - net[layer_name] = json_layer - processed_layers.append(layer_name) - - # connect layers with the previous layer's inplace relu ops, if any - for layer_name in net: - for i, input_layer in enumerate(net[layer_name]['connection']['input']): - if (input_layer in layers_with_inplace_relu.keys()) and \ - layers_with_inplace_relu[input_layer] != layer_name: - net[layer_name]['connection']['input'][i] = layers_with_inplace_relu[input_layer] - - # fill in outputs of every layer in net using inputs of consumer layers - outputs = {} - for layer_name in net.keys(): - for input_layer_name in net[layer_name]['connection']['input']: - if input_layer_name not in outputs: - outputs[input_layer_name] = [] - if layer_name not in outputs[input_layer_name]: - outputs[input_layer_name].append(layer_name) - for layer in outputs: - net[layer]['connection']['output'] = outputs[layer] - - # add a scale layer next to batch normalization layers - scale_layers = {} - for layer_name in net: - if net[layer_name]['info']['type'] == 'BatchNorm': - batch_norm_outputs = net[layer_name]['connection']['output'][:] - scale_layer_name = layer_name + '_scale' - scale_layer = jsonLayer( - 'Scale', {}, [layer_name], batch_norm_outputs) - net[layer_name]['connection']['output'] = [scale_layer_name] - scale_layers[scale_layer_name] = scale_layer - for scale_layer_name in scale_layers: - net[scale_layer_name] = scale_layers[scale_layer_name] - - session.close() - - return JsonResponse({'result': 'success', 'net': net, 'net_name': ''}) -from keras.models import model_from_json -import tensorflow as tf -from keras import backend as K -import argparse -import os -import imp - -parser = argparse.ArgumentParser(description='set input arguments') -parser.add_argument('-input_file', action="store", - dest='input_file', type=str, default='model.json') -parser.add_argument('-output_file', action="store", - dest='output_file', type=str, default='model.pbtxt') -args = parser.parse_args() -input_file = args.input_file -output_file = args.output_file - -K.set_learning_phase(0) - -BASE_DIR = os.path.dirname( - os.path.dirname( - os.path.dirname( - os.path.abspath(__file__)))) - -output_fld = BASE_DIR + '/media/' - -with open(output_fld + input_file, 'r') as f: - json_str = f.read() - -json_str = json_str.strip("'<>() ").replace('\'', '\"') -lrn = imp.load_source('LRN', BASE_DIR + '/keras_app/custom_layers/lrn.py') -model = model_from_json(json_str, {'LRN': lrn.LRN}) - -sess = K.get_session() -tf.train.write_graph(sess.graph.as_graph_def(add_shapes=True), output_fld, - output_file + '.pbtxt', as_text=True) -import math -import re - - -initializer_map = {'random_uniform': 'RandomUniform', 'random_normal': 'RandomNormal', - 'Const': 'Constant', 'zeros': 'Zeros', 'ones': 'Ones', - 'eye': 'Identity', 'truncated_normal': 'TruncatedNormal'} - -activation_layers = [ - 'Sigmoid', - 'Softplus', - 'Softsign', - 'Elu', - 'LeakyRelu', - 'Softmax', - 'Relu', - 'Tanh', - 'SELU' -] - - -def get_layer_name(node_name): - i = node_name.find('/') - if i == -1: - name = str(node_name) - elif str(node_name[:i]) in ['Repeat', 'Stack']: - name = str(node_name.split('/')[1]) - else: - name = str(node_name[:i]) - return name - - -def get_layer_type(node_name): - return node_name.split('_')[0] - - -def get_padding(node, kernel_shape, strides): - if node.type in ["Conv3D", "MaxPool3D", "AvgPool3D"]: - input_tensor = node.inputs[0] - output_tensor = node.outputs[0] - input_shape = [1 if i.value is None else int( - i) for i in input_tensor.shape] - output_shape = [1 if i.value is None else int( - i) for i in output_tensor.shape] - - kernel_d = kernel_shape[0] - kernel_h = kernel_shape[1] - kernel_w = kernel_shape[2] - stride_d = strides[1] - stride_h = strides[2] - stride_w = strides[3] - - pad_d = ((int(output_shape[1]) - 1) * stride_d + - kernel_d - int(input_shape[1])) / float(2) - pad_h = ((int(output_shape[2]) - 1) * stride_h + - kernel_h - int(input_shape[2])) / float(2) - pad_w = ((int(output_shape[3]) - 1) * stride_w + - kernel_w - int(input_shape[3])) / float(2) - - if node.type == "Conv3D": - pad_d = math.ceil(pad_d) - pad_h = math.ceil(pad_h) - pad_w = math.ceil(pad_w) - elif node.type in ["MaxPool3D", "AvgPool3D"]: - pad_d = math.floor(pad_d) - pad_h = math.floor(pad_h) - pad_w = math.floor(pad_w) - - return int(pad_d), int(pad_h), int(pad_w) - - elif node.type == "Conv2DBackpropInput": - input_tensor = node.inputs[2] - output_tensor = node.outputs[0] - input_shape = [1 if i.value is None else int( - i) for i in input_tensor.shape] - output_shape = [1 if i.value is None else int( - i) for i in output_tensor.shape] - - # if deconvolution layer padding calculation logic changes - if ('padding' in node.node_def.attr): - kernel_h = kernel_shape[0] - kernel_w = kernel_shape[1] - stride_h = strides[1] - stride_w = strides[2] - pad_h = ((int(input_shape[1]) - 1) * stride_h + - kernel_h - int(output_shape[1])) / float(2) - pad_w = ((int(input_shape[2]) - 1) * stride_w + - kernel_w - int(output_shape[2])) / float(2) - - return int(math.floor(pad_h)), int(math.floor(pad_w)) - - else: - input_tensor = node.inputs[0] - output_tensor = node.outputs[0] - input_shape = [1 if i.value is None else int( - i) for i in input_tensor.shape] - output_shape = [1 if i.value is None else int( - i) for i in output_tensor.shape] - kernel_h = kernel_shape[0] - kernel_w = kernel_shape[1] - stride_h = strides[1] - stride_w = strides[2] - - pad_h = ((int(output_shape[1]) - 1) * stride_h + - kernel_h - int(input_shape[1])) / float(2) - pad_w = ((int(output_shape[2]) - 1) * stride_w + - kernel_w - int(input_shape[2])) / float(2) - - # check this logic (see caffe-tensorflow/caffe/shapes.py) - if node.type == "Conv2D": - pad_h = math.ceil(pad_h) - pad_w = math.ceil(pad_w) - elif node.type in ["MaxPool", "AvgPool"]: - pad_h = math.floor(pad_h) - pad_w = math.floor(pad_w) - - return int(pad_h), int(pad_w) - - -def get_initializer_type(layer_ops): - """Returns a dict mapping variables (weight, bias etc) to initializer types. - The returned dict maybe empty if no initializers are found. - """ - weight_name_patterns = [r'.*/weight/*', r'.*/kernel/*'] - bias_name_patterns = [r'.*/bias/*'] - pointwise_weight_name_patterns = [r'.*/pointwise_weights/*'] - depthwise_weight_name_patterns = [r'.*/depthwise_weights/*'] - - initializers = {} - for op in layer_ops: - # extracting weights initializer - for weight_name_pattern in weight_name_patterns: - if re.match(weight_name_pattern, str(op.name)) and op.type in initializer_map.keys(): - initializers['weight'] = initializer_map[op.type] - # extracting bias initializer - for bias_name_pattern in bias_name_patterns: - if re.match(bias_name_pattern, str(op.name)) and op.type in initializer_map.keys(): - initializers['bias'] = initializer_map[op.type] - # extracting pointwise wei - for pointwise_weight_name_pattern in pointwise_weight_name_patterns: - if re.match(pointwise_weight_name_pattern, str(op.name)) and op.type in initializer_map.keys(): - initializers['pointwise_weight'] = initializer_map[op.type] - for depthwise_weight_name_pattern in depthwise_weight_name_patterns: - if re.match(depthwise_weight_name_pattern, str(op.name)) and op.type in initializer_map.keys(): - initializers['depthwise_weight'] = initializer_map[op.type] - - return initializers - - -def get_input_layers(layer_ops): - ''' - return the name of the layers directly preceeding the layer of layer_ops. - layer_ops is a list of all ops of the layer we want the inputs of. - ''' - input_layer_names = [] - name = get_layer_name(layer_ops[0].name) - for node in layer_ops: - for input_tensor in node.inputs: - input_layer_name = get_layer_name(input_tensor.op.name) - if input_layer_name != name: - input_layer_names.append(input_layer_name) - return input_layer_names - - -def import_activation(layer_ops): - layer_type = '' - layer_params = {} - - activation_op = next( - (x for x in layer_ops if x.type in activation_layers), None) - - if activation_op.type == 'Relu': - layer_type = 'ReLU' - - elif activation_op.type == 'LeakyRelu': - if 'alpha' in activation_op.node_def.attr: - layer_params['negative_slope'] = activation_op.get_attr('alpha') - layer_type = 'ReLU' - - elif activation_op.type == 'Elu': - layer_params['alpha'] = 1 - layer_type = 'ELU' - - elif activation_op.type == 'Tanh': - layer_type = 'TanH' - - else: - # rest of the activations have the same name in TF and Fabrik - layer_type = activation_op.type - - return jsonLayer(layer_type, layer_params, get_input_layers(layer_ops), []) - - -def import_placeholder(layer_ops): - placeholder_op = layer_ops[0] - layer_params = {} - layer_dim = [int(dim.size) for dim in placeholder_op.get_attr('shape').dim] - - # make batch size 1 if it is -1 - if layer_dim[0] == 0: - layer_dim[0] = 1 - - # change tensor format from tensorflow default (NHWC/NDHWC) - # to (NCHW/NCDHW) - temp = layer_dim[1] - layer_dim[1] = layer_dim[-1] - layer_dim[-1] = temp - layer_params['dim'] = str(layer_dim)[1:-1] - - return jsonLayer('Input', layer_params, get_input_layers(layer_ops), []) - - -def import_conv2d(layer_ops): - conv2d_op = next((x for x in layer_ops if x.type == 'Conv2D'), None) - layer_params = {} - layer_params['layer_type'] = '2D' - - strides = [int(i) for i in conv2d_op.get_attr('strides')] - kernel_shape = [int(i) for i in conv2d_op.inputs[1].shape] - layer_params['stride_h'] = strides[1] - layer_params['stride_w'] = strides[2] - layer_params['kernel_h'] = kernel_shape[0] - layer_params['kernel_w'] = kernel_shape[1] - layer_params['num_output'] = kernel_shape[3] - layer_params['pad_h'], layer_params['pad_w'] = get_padding( - conv2d_op, kernel_shape, strides) - - initializers = get_initializer_type(layer_ops) - try: - layer_params['weight_filler'] = initializers['kernel'] - layer_params['bias_filler'] = initializers['bias'] - except KeyError: - # no initializers found, continue - pass - - return jsonLayer('Convolution', layer_params, get_input_layers(layer_ops), []) - - -def import_conv3d(layer_ops): - conv3d_op = next((x for x in layer_ops if x.type == 'Conv3D'), None) - layer_params = {} - layer_params['layer_type'] = '3D' - - kernel_shape = [int(i) for i in conv3d_op.inputs[1].shape] - layer_params['kernel_d'] = kernel_shape[0] - layer_params['kernel_h'] = kernel_shape[1] - layer_params['kernel_w'] = kernel_shape[2] - layer_params['num_output'] = kernel_shape[4] - - strides = [int(i) for i in conv3d_op.get_attr('strides')] - layer_params['stride_d'] = strides[1] - layer_params['stride_h'] = strides[2] - layer_params['stride_w'] = strides[3] - - pad_d, pad_h, pad_w = get_padding(conv3d_op, kernel_shape, strides) - layer_params['pad_d'] = pad_d - layer_params['pad_h'] = pad_h - layer_params['pad_w'] = pad_w - - initializers = get_initializer_type(layer_ops) - try: - layer_params['weight_filler'] = initializers['kernel'] - layer_params['bias_filler'] = initializers['bias'] - except KeyError: - # no initializers found, continue - pass - - return jsonLayer('Convolution', layer_params, get_input_layers(layer_ops), []) - - -def import_deconvolution(layer_ops): - deconv_op = next((x for x in layer_ops if x.type == - 'Conv2DBackpropInput'), None) - layer_params = {} - layer_params['layer_type'] = '2D' - - kernel_shape = [int(i) for i in deconv_op.inputs[1].shape] - strides = [int(i) for i in deconv_op.get_attr('strides')] - layer_params['padding'] = deconv_op.get_attr('padding') - layer_params['kernel_h'] = kernel_shape[0] - layer_params['kernel_w'] = kernel_shape[1] - layer_params['num_output'] = kernel_shape[3] - layer_params['pad_h'], layer_params['pad_w'] = get_padding( - deconv_op, kernel_shape, strides) - - initializers = get_initializer_type(layer_ops) - try: - layer_params['weight_filler'] = initializers['kernel'] - layer_params['bias_filler'] = initializers['bias'] - except KeyError: - # no initializers found, continue - pass - - return jsonLayer('Deconvolution', layer_params, get_input_layers(layer_ops), []) - - -def import_depthwise_convolution(layer_ops): - depthwise_conv_op = next( - (x for x in layer_ops if x.type == 'DepthwiseConv2dNative'), None) - layer_params = {} - if '3D' in depthwise_conv_op.type: - raise ValueError('3D depthwise convolution cannot be imported.') - - kernel_shape = [int(i) for i in depthwise_conv_op.inputs[1].shape] - layer_params['kernel_h'] = kernel_shape[0] - layer_params['kernel_w'] = kernel_shape[1] - layer_params['num_output'] = kernel_shape[2] - layer_params['depth_multiplier'] = kernel_shape[3] - - if 'padding' in depthwise_conv_op.node_def.attr: - layer_params['padding'] = str(depthwise_conv_op.get_attr('padding')) - strides = [int(i) for i in depthwise_conv_op.get_attr('strides')] - layer_params['stride_h'] = strides[1] - layer_params['stride_w'] = strides[2] - layer_params['pad_h'], layer_params['pad_w'] = get_padding( - depthwise_conv_op, kernel_shape, strides) - - initializers = get_initializer_type(layer_ops) - try: - layer_params['pointwise_weight'] = initializers['pointwise_initializer'] - layer_params['depthwise_weight'] = initializers['depthwise_initializer'] - except KeyError: - # no initializers found, continue - pass - - return jsonLayer('DepthwiseConv', layer_params, get_input_layers(layer_ops), []) - - -def import_pooling2d(layer_ops): - pooling2d_op = next( - (x for x in layer_ops if x.type in ['MaxPool', 'AvgPool'])) - layer_params = {} - layer_params['layer_type'] = '2D' - - # checking type of pooling layer - if pooling2d_op.type == 'MaxPool': - layer_params['pool'] = 'MAX' - elif pooling2d_op.type == 'AvgPool': - layer_params['pool'] = 'AVE' - - kernel_shape = [int(i) for i in pooling2d_op.get_attr('ksize')] - strides = [int(i) for i in pooling2d_op.get_attr('strides')] - layer_params['kernel_h'] = kernel_shape[1] - layer_params['kernel_w'] = kernel_shape[2] - layer_params['stride_h'] = strides[1] - layer_params['stride_w'] = strides[2] - layer_params['padding'] = str(pooling2d_op.get_attr('padding')) - layer_params['pad_h'], layer_params['pad_w'] = get_padding( - pooling2d_op, kernel_shape, strides) - - return jsonLayer('Pooling', layer_params, get_input_layers(layer_ops), []) - - -def import_pooling3d(layer_ops): - pooling3d_op = next( - (x for x in layer_ops if x.type in ['MaxPool3D', 'AvgPool3D'])) - layer_params = {} - layer_params['layer_type'] = '3D' - layer_params['padding'] = str(pooling3d_op.get_attr('padding')) - - # checking type of pooling layer - if pooling3d_op.type == 'MaxPool': - layer_params['pool'] = 'MAX' - elif pooling3d_op.type == 'AvgPool': - layer_params['pool'] = 'AVE' - - kernel_shape = [int(i) for i in pooling3d_op.get_attr('ksize')] - strides = [int(i) for i in pooling3d_op.get_attr('strides')] - layer_params['kernel_d'] = kernel_shape[1] - layer_params['kernel_h'] = kernel_shape[2] - layer_params['kernel_w'] = kernel_shape[3] - layer_params['stride_d'] = strides[1] - layer_params['stride_h'] = strides[2] - layer_params['stride_w'] = strides[3] - - pad_d, pad_h, pad_w = get_padding(pooling3d_op, kernel_shape, strides) - layer_params['pad_d'] = pad_d - layer_params['pad_h'] = pad_h - layer_params['pad_w'] = pad_w - - return jsonLayer('Pooling', layer_params, get_input_layers(layer_ops), []) - - -def import_inner_product(layer_ops): - inner_product_op = next( - (x for x in layer_ops if x.type in ['Prod', 'MatMul'])) - layer_params = {} - if inner_product_op.type == 'MatMul': - layer_params['num_output'] = int(inner_product_op.inputs[1].shape[1]) - - return jsonLayer('InnerProduct', layer_params, get_input_layers(layer_ops), []) - - -def import_batchnorm(layer_ops): - layer_params = {} - name = get_layer_name(layer_ops[0].name) - - for node in layer_ops: - if re.match('.*\/batchnorm[_]?[0-9]?\/add.*', str(node.name)): - try: - layer_params['eps'] = node.get_attr('value').float_val[0] - except: - pass - if (node.type == 'FusedBatchNorm'): - layer_params['eps'] = float(node.get_attr('epsilon')) - # searching for moving_mean/Initializer ops to extract moving - # mean initializer of batchnorm layer - if name + '/moving_mean/Initializer' in str(node.name): - layer_params['moving_mean_initializer'] = \ - initializer_map[str(node.name).split('/')[3]] - # searching for AssignMovingAvg/decay ops to extract moving - # average fraction of batchnorm layer also considering repeat & stack layer - # as prefixes - if str(node.name) in [name + '/AssignMovingAvg/decay', - 'Repeat/' + name + '/AssignMovingAvg/decay', - 'Stack/' + name + '/AssignMovingAvg/decay']: - layer_params['moving_average_fraction'] = node.get_attr( - 'value').float_val[0] - - return jsonLayer('BatchNorm', layer_params, get_input_layers(layer_ops), []) - - -def import_eltwise(layer_ops): - eltwise_op = next( - (x for x in layer_ops if x.type in ['add', 'mul', 'dot'])) - layer_params = {} - if eltwise_op.type == 'add': - layer_params['layer_type'] = 'Sum' - if eltwise_op.type == 'mul': - layer_params['layer_type'] = 'Product' - if eltwise_op.type == 'dot': - layer_params['layer_type'] = 'Dot' - - return jsonLayer('Eltwise', layer_params, get_input_layers(layer_ops), []) - - -def import_dropout(layer_ops): - layer_params = {} - for node in layer_ops: - if ('rate' in node.node_def.attr): - layer_params['rate'] = node.get_attr('rate') - if ('seed' in node.node_def.attr): - layer_params['seed'] = node.get_attr('seed') - if ('training' in node.node_def.attr): - layer_params['trainable'] = node.get_attr('training') - - return jsonLayer('Dropout', layer_params, get_input_layers(layer_ops), []) - - -def import_flatten(layer_ops): - return jsonLayer('Flatten', [], get_input_layers(layer_ops), []) - - -def import_concat(layer_ops): - layer_params = {} - for node in layer_ops: - if 'axis' in node.node_def.attr: - layer_params['axis'] = node.get_attr('axis') - - return jsonLayer('Concat', layer_params, get_input_layers(layer_ops), []) - - -def import_lrn(layer_ops): - layer_params = {} - for node in layer_ops: - if ('alpha' in node.node_def.attr): - layer_params['alpha'] = node.get_attr('alpha') - if ('beta' in node.node_def.attr): - layer_params['beta'] = node.get_attr('beta') - if ('local_size' in node.node_def.attr): - layer_params['local_size'] = node.get_attr('depth_radius') - if ('bias' in node.node_def.attr): - layer_params['k'] = node.get_attr('bias') - - return jsonLayer('LRN', layer_params, get_input_layers(layer_ops), []) - - -def jsonLayer(layer_type, layer_params={}, inputs=[], outputs=[]): - layer = { - 'info': { - 'type': layer_type, - 'phase': None - }, - 'connection': { - 'input': inputs, - 'output': outputs - }, - 'params': layer_params - } - return layer -import subprocess -import sys - - -# Get the command line arguments -model_file = '' -try: - model_file = sys.argv[1] -except IndexError: - print('Usage: python caffe_sample.py PATH_TO_MODEL') - exit() - -solver = [ - 'net: "{}"'.format(model_file), - 'test_iter: 200', - 'test_interval: 500', - 'base_lr: 1e-5', - 'lr_policy: "step"', - 'gamma: 0.1', - 'stepsize: 5000', - 'display: 20', - 'max_iter: 450000', - 'momentum: 0.9', - 'weight_decay: 0.0005', - 'snapshot: 2000', - 'snapshot_prefix: "model/caffe_sample"', - 'solver_mode: GPU', -] - -# Create solver.prototxt -with open('solver.prototxt', 'w') as file: - for line in solver: - file.write(line + '\n') - -# Train the model -subprocess.call(['caffe', 'train', '-gpu', '0', '-solver', 'solver.prototxt']) -from keras.datasets import cifar10 -from keras.models import model_from_json -import sys - -# Get the command line arguments -model_file_name = '' -try: - model_file_name = sys.argv[1] -except IndexError: - print('Usage: python train.py model_json_file') - exit() - -# Load the dataset (keras.datasets.cifar10) -# To use other datasets from keras.datasets, replace cifar10 in line 1 with your preferred dataset. -(x_train, y_train), (x_test, y_test) = cifar10.load_data() - -# Load the model from JSON file -json_file = open(model_file_name, 'r') -loaded_model_json = json_file.read() -json_file.close() -loaded_model = model_from_json(loaded_model_json) - -# Print the model summary -loaded_model.summary() - -# Configure model for training and testing with accuracy evaluation -loaded_model.compile(loss='categorical_crossentropy', - optimizer='adam', metrics=['accuracy']) - -# Train the model -loaded_model.fit(x_train, y_train, epochs=150, batch_size=10, verbose=0) - -# Evaluate the model -scores = loaded_model.evaluate(x_test, y_test, verbose=0) - -# Print final accuracy -print("%s: %.2f%%" % (loaded_model.metrics_names[1], scores[1] * 100)) -import tensorflow as tf -from google.protobuf import text_format -import sys - -# Get the model file name -try: - model_file_name = sys.argv[1] -except IndexError: - print('Usage: python tensorflow_sample.py ') - -# Read the protobuf text and build a tf.GraphDef -with open(model_file_name, 'r') as model_file: - model_protobuf = text_format.Parse(model_file.read(), - tf.GraphDef()) - -# Import the GraphDef built above into the default graph -tf.import_graph_def(model_protobuf) - -# You can now add operations on top of the imported graph -import json -import os -import unittest -from django.conf import settings -from django.core.urlresolvers import reverse -from django.test import Client -from django.contrib.auth.models import User -from caffe_app.models import Network, NetworkVersion - - -class SaveToDBTest(unittest.TestCase): - - def setUp(self): - self.client = Client() - - def test_save_json(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - net = json.load(tests)['net'] - response = self.client.post( - reverse('saveDB'), - {'net': net, 'net_name': 'netname'}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - def test_load(self): - u_1 = User(id=1, username='user_1') - u_1.save() - u_2 = User(id=2, username='user_2') - u_2.save() - model = Network(name='net') - model.save() - model_version = NetworkVersion(network=model, network_def={}) - model_version.save() - - response = self.client.post( - reverse('saveDB'), - {'net': '{"net": "testnet"}', 'net_name': 'name'}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - self.assertTrue('id' in response) - proto_id = response['id'] - response = self.client.post(reverse('loadDB'), {'proto_id': proto_id}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - self.assertEqual(response['net_name'], 'name') - - def test_load_nofile(self): - response = self.client.post(reverse('loadDB'), - {'proto_id': 'inexistent'}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - self.assertEqual(response['error'], 'No network file found') -import caffe -import json -import os -import unittest - -from caffe import layers as L, params as P, to_proto -from django.conf import settings -from django.core.urlresolvers import reverse -from django.test import Client - - -class ImportPrototxtTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - sample_file = open(os.path.join(settings.BASE_DIR, - 'example/caffe', - 'GoogleNet.prototxt'), 'r') - # Test 1 - response = self.client.post(reverse('caffe-import'), - {'file': sample_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - # Test 2 - sample_file = open(os.path.join(settings.BASE_DIR, - 'example/keras', - 'vgg16.json'), 'r') - response = self.client.post(reverse('caffe-import'), - {'file': sample_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - self.assertEqual(response['error'], 'Invalid Prototxt\n' - 'local variable \'prototxt\' referenced before assignment') - - def test_caffe_import_by_input(self): - sample_file = open(os.path.join(settings.BASE_DIR, - 'example/caffe', - 'GoogleNet.prototxt'), 'r') - # Test 1 - response = self.client.post(reverse('caffe-import'), - {'config': sample_file.read()}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - # Test 2 - sample_file = open(os.path.join(settings.BASE_DIR, - 'example/keras', - 'vgg16.json'), 'r') - response = self.client.post(reverse('caffe-import'), - {'config': sample_file.read()}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - self.assertEqual(response['error'], 'Invalid Prototxt\n' - '1:1 : Expected identifier or number, got {.') - - def test_caffe_import_by_url(self): - url = 'https://github.com/Cloud-CV/Fabrik/blob/master/example/caffe/All_CNN.prototxt' - # Test 1 - response = self.client.post(reverse('caffe-import'), - {'url': url}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - # Test 2 - url = 'https://github.com/Cloud-CV/Fabrik/blob/master/some_typo_here' - response = self.client.post(reverse('caffe-import'), - {'url': url}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - self.assertEqual(response['error'], - 'Invalid URL\nHTTP Error 404: Not Found') - - def test_caffe_import_by_sample_id(self): - response = self.client.post(reverse('caffe-import'), - {'sample_id': 'GoogleNet'}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - response = self.client.post(reverse('caffe-import'), - {'sample_id': 'vgg15'}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - self.assertEqual(response['error'], 'No Prototxt model file found') - - -class ExportPrototxtTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_export(self): - data = L.Input(shape={'dim': [10, 3, 224, 224]}) - top = L.Convolution(data, kernel_size=3, pad=1, stride=1, num_output=128, dilation=1, - weight_filler={'type': 'xavier'}, bias_filler={'type': 'constant'}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - response['net']['l0']['params']['caffe'] = True - response['net']['l1']['params']['caffe'] = True - response = self.client.post(reverse('caffe-export'), {'net': json.dumps(response['net']), - 'net_name': ''}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class ExportPrototxtFailTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_export(self): - data = L.Input(shape={'dim': [10, 3, 16, 224, 224]}) - top = L.Convolution(data, kernel_size=3, pad=1, stride=1, num_output=128, dilation=1, - weight_filler={'type': 'xavier'}, bias_filler={'type': 'constant'}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - response['net']['l0']['params']['caffe'] = True - response['net']['l1']['params']['layer_type'] = '3D' - response['net']['l1']['params']['caffe'] = False - response = self.client.post(reverse('caffe-export'), {'net': json.dumps(response['net']), - 'net_name': ''}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - - -# ********** Data Layers Test ********** -class ImageDataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - # Test 1 - data, label = L.ImageData(source='/dummy/source/', batch_size=32, ntop=2, rand_skip=0, - shuffle=False, new_height=256, new_width=256, is_color=False, - root_folder='/dummy/folder/', - transform_param=dict(crop_size=227, mean_value=[104, 117, 123], - mirror=True, force_color=False, - force_gray=False)) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data, label))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 13) - self.assertEqual(response['result'], 'success') - # Test 2 - data, label = L.ImageData(source='/dummy/source/', batch_size=32, ntop=2, rand_skip=0, - shuffle=False, new_height=256, new_width=256, is_color=False, - root_folder='/dummy/folder/', include=dict(phase=caffe.TRAIN), - transform_param=dict(crop_size=227, mean_file='/path/to/file', - mirror=True, force_color=False, - force_gray=False)) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data, label))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 13) - self.assertEqual(response['result'], 'success') - - -class DataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - # Test 1 - data, label = L.Data(source='/dummy/source/', backend=P.Data.LMDB, batch_size=32, ntop=2, - rand_skip=0, prefetch=10, - transform_param=dict(crop_size=227, mean_value=[104, 117, 123], - mirror=True, force_color=False, - force_gray=False)) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data, label))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 10) - self.assertEqual(response['result'], 'success') - # Test 2 - data, label = L.Data(source='/dummy/source/', backend=P.Data.LEVELDB, batch_size=32, ntop=2, - rand_skip=0, prefetch=10, - transform_param=dict(crop_size=227, mean_value=[104, 117, 123], - mirror=True, force_color=False, - force_gray=False)) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data, label))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 10) - self.assertEqual(response['result'], 'success') - - -class HDF5DataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - data, label = L.HDF5Data( - source='/dummy/source/', batch_size=32, ntop=2, shuffle=False) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data, label))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - - -class HDF5OutputLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.HDF5Output(file_name='/dummy/filename') - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -class InputLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - data = L.Input(shape={'dim': [10, 3, 224, 224]}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -class WindowDataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - data, label = L.WindowData(source='/dummy/source/', batch_size=32, ntop=2, - fg_threshold=0.5, bg_threshold=0.5, fg_fraction=0.25, - context_pad=0, crop_mode='warp', cache_images=False, - root_folder='/dummy/folder/', - transform_param=dict(crop_size=227, mean_value=[104, 117, 123], - mirror=True, force_color=False, - force_gray=False)) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data, label))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 14) - self.assertEqual(response['result'], 'success') - - -class MemoryDataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - data, label = L.MemoryData( - batch_size=32, ntop=2, channels=3, height=224, width=224) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data, label))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 4) - self.assertEqual(response['result'], 'success') - - -class DummyDataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - data = L.DummyData(shape={'dim': [10, 3, 224, 224]}, - data_filler={'type': 'constant'}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -# ********** Vision Layers Test ********** -class ConvolutionLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - # Test 1 - top = L.Convolution(kernel_size=3, pad=1, stride=1, num_output=128, - weight_filler={'type': 'xavier'}, bias_filler={'type': 'constant'}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 6) - self.assertEqual(response['result'], 'success') - # Test 2 - top = L.Convolution(kernel_w=3, kernel_h=3, pad_w=1, pad_h=1, stride=1, num_output=128, - weight_filler={'type': 'xavier'}, bias_filler={'type': 'constant'}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 6) - self.assertEqual(response['result'], 'success') - - -class PoolingLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - # Test 1 - top = L.Pooling(kernel_size=2, pad=0, stride=2, pool=1) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 4) - self.assertEqual(response['result'], 'success') - # Test 2 - top = L.Pooling(kernel_size=2, pad=0, stride=2, pool=2) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 4) - self.assertEqual(response['result'], 'success') - - -class SPPLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.SPP(pyramid_height=2, pool=1) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 2) - self.assertEqual(response['result'], 'success') - - -class CropLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Crop(axis=2, offset=2) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 2) - self.assertEqual(response['result'], 'success') - - -class DeconvolutionLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - # Test 1 - top = L.Deconvolution(convolution_param=dict(kernel_size=3, pad=1, stride=1, num_output=128, - weight_filler={'type': 'xavier'}, bias_filler={'type': 'constant'})) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 6) - self.assertEqual(response['result'], 'success') - # Test 2 - top = L.Deconvolution(convolution_param=dict(kernel_w=3, kernel_h=3, pad_w=1, pad_h=1, stride=1, - num_output=128, dilation=1, weight_filler={'type': 'xavier'}, - bias_filler={'type': 'constant'})) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 6) - self.assertEqual(response['result'], 'success') - - -# ********** Recurrent Layers Test ********** -class RecurrentLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Recurrent(recurrent_param=dict(num_output=128, debug_info=False, - expose_hidden=False, weight_filler={'type': 'xavier'}, - bias_filler={'type': 'constant'})) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 5) - self.assertEqual(response['result'], 'success') - - -class RNNLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.RNN(recurrent_param=dict(num_output=128, debug_info=False, - expose_hidden=False, weight_filler={'type': 'xavier'}, - bias_filler={'type': 'constant'})) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 5) - self.assertEqual(response['result'], 'success') - - -class LSTMLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.LSTM(recurrent_param=dict(num_output=128, debug_info=False, - expose_hidden=False, weight_filler={'type': 'xavier'}, - bias_filler={'type': 'constant'})) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 5) - self.assertEqual(response['result'], 'success') - - -# ********** Common Layers Test ********** -class InnerProductLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.InnerProduct(num_output=128, weight_filler={'type': 'xavier'}, - bias_filler={'type': 'constant'}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - - -class DropoutLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Dropout() - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class EmbedLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Embed(num_output=128, input_dim=2, bias_term=False, - weight_filler={'type': 'xavier'}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 4) - self.assertEqual(response['result'], 'success') - - -# ********** Normalisation Layers Test ********** -class LRNLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.LRN(local_size=5, alpha=1, beta=0.75, - k=1, norm_region=1, in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 5) - self.assertEqual(response['result'], 'success') - - -class MVNLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.MVN(normalize_variance=True, eps=1e-9, - across_channels=False, in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - - -class BatchNormLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.BatchNorm(use_global_stats=True, - moving_average_fraction=0.999, eps=1e-5, in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - - -# ********** Activation / Neuron Layers Test ********** -class ReLULayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.ReLU(negative_slope=0, in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -class PReLULayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.PReLU(channel_shared=False, in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -class ELULayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.ELU(alpha=1, in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -class SigmoidLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Sigmoid(in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class TanHLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.TanH(in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class AbsValLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.AbsVal(in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class PowerLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Power(power=1.0, scale=1.0, shift=0.0, in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - - -class ExpLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Exp(base=-1.0, scale=1.0, shift=0.0, in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - - -class LogLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Log(base=-1.0, scale=1.0, shift=0.0, in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - - -class BNLLLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.BNLL(in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class ThresholdLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Threshold(threshold=1.0, in_place=True) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -class BiasLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Bias(axis=1, num_axes=1, filler={'type': 'constant'}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - - -class ScaleLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Scale(bias_term=False) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -# ********** Utility Layers Test ********** -class FlattenLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Flatten(axis=1, end_axis=-1) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 2) - self.assertEqual(response['result'], 'success') - - -class ReshapeLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Reshape(shape={'dim': [2, -1]}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -class BatchReindexLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.BatchReindex() - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class SplitLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Split() - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class ConcatLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Concat() - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class SliceLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Slice(axis=1, slice_dim=1, slice_point=[1, 2]) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - - -class EltwiseLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - # Test 1 - top = L.Eltwise(operation=2) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -class FilterLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Filter() - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -# This layer is currently not supported as there is no bottom blob -'''class ParameterLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Parameter(shape={'dim': [10, 3, 224, 224]}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'r') - response = self.client.post(reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') -''' - - -class ReductionLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - # Test 1 - top = L.Reduction(operation=1, axis=0, coeff=1.0) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - # Test 2 - top = L.Reduction(operation=2, axis=0, coeff=1.0) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - # Test 3 - top = L.Reduction(operation=3, axis=0, coeff=1.0) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - # Test 4 - top = L.Reduction(operation=4, axis=0, coeff=1.0) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - # Test 5 - top = L.Reduction(axis=0, coeff=1.0) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['net']['l0']['params']['operation'], 'SUM') - self.assertEqual(response['result'], 'success') - - -class SilenceLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Silence() - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class ArgMaxLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.ArgMax(out_max_val=False, top_k=1, axis=0) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 3) - self.assertEqual(response['result'], 'success') - - -class SoftmaxLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.Softmax() - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -# ********** Loss Layers Test ********** -class MultinomialLogisticLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.MultinomialLogisticLoss() - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class InfogainLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.InfogainLoss(source='/dummy/source/', axis=1) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 2) - self.assertEqual(response['result'], 'success') - - -class SoftmaxWithLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.SoftmaxWithLoss(softmax_param=dict(axis=1)) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -class EuclideanLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.EuclideanLoss() - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class HingeLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.HingeLoss(norm=2) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 1) - self.assertEqual(response['result'], 'success') - - -class SigmoidCrossEntropyLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.SigmoidCrossEntropyLoss() - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertEqual(response['result'], 'success') - - -class AccuracyLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - data = L.Input(shape={'dim': [10, 100]}) - top = L.Accuracy(data, axis=1, top_k=1, include=dict(phase=caffe.TEST)) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l1']['params']), 2) - self.assertEqual(response['result'], 'success') - - -class ContrastiveLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - top = L.ContrastiveLoss(margin=1.0, legacy_version=False) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 2) - self.assertEqual(response['result'], 'success') - - -# ********** Python Layer Test ********** -class PythonLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_caffe_import(self): - # Test 1 - data = L.Input(shape={'dim': [10, 3, 224, 224]}) - top = L.Python(data, module='pyloss', - layer='EuclideanLossLayer', loss_weight=1, name='eucLoss') - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l1']['params']), 4) - self.assertEqual(response['result'], 'success') - # Test 2 - top = L.Python(module='pascal_multilabel_datalayers', layer='PascalMultilabelDataLayerSync', - param_str="{\'pascal_root\': \'../data/pascal/VOC2007\', \'im_shape\': [227, 227], \ - \'split\': \'train\', \'batch_size\': 128}") - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, - 'media', 'test.prototxt'), 'r') - response = self.client.post( - reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - os.remove(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt')) - self.assertGreaterEqual(len(response['net']['l0']['params']), 6) - self.assertEqual(response['result'], 'success') -import caffe -import json -import os -import sys -import unittest -import yaml - -from caffe import layers as L, to_proto -from django.conf import settings -from django.core.urlresolvers import reverse -from django.test import Client -from ide.utils.jsonToPrototxt import json_to_prototxt -from ide.utils.shapes import get_shapes -from keras.models import model_from_json - - -# ********** Data Layers Test ********** -class ImageDataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['ImageData']} - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'ImageData') - # Test 2 - net['l0']['info']['phase'] = 0 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'ImageData') - # Test 3 - net['l0']['info']['phase'] = 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'ImageData') - - -class DataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Data']} - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'Data') - # Test 2 - net['l0']['info']['phase'] = 0 - net['l0']['params']['mean_value'] = '' - net['l0']['params']['mean_file'] = '/path/to/mean/file' - net['l0']['params']['backend'] = "LEVELDB" - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'Data') - # Test 3 - net['l0']['info']['phase'] = 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'Data') - - -class HDF5DataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['HDF5Data']} - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'HDF5Data') - # Test 2 - net['l0']['info']['phase'] = 0 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'HDF5Data') - # Test 3 - net['l0']['info']['phase'] = 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'HDF5Data') - - -class HDF5OutputLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['HDF5Output']} - net['l0']['connection']['output'].append('l1') - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'HDF5Output') - # Test 2 - net['l1']['info']['phase'] = 0 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'HDF5Output') - # Test 3 - net['l1']['info']['phase'] = 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'HDF5Output') - - -class InputLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input']} - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertEqual(net['l0']['info']['type'], 'Input') - - -class WindowDataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['WindowData']} - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'WindowData') - # Test 2 - net['l0']['info']['phase'] = 0 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'WindowData') - # Test 3 - net['l0']['info']['phase'] = 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'WindowData') - - -class MemoryDataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['MemoryData']} - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'MemoryData') - # Test 2 - net['l0']['info']['phase'] = 0 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'MemoryData') - # Test 3 - net['l0']['info']['phase'] = 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'MemoryData') - - -class DummyDataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['DummyData']} - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'DummyData') - # Test 2 - net['l0']['info']['phase'] = 0 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'DummyData') - # Test 3 - net['l0']['info']['phase'] = 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'DummyData') - - -# ********** Vision Layers Test ********** -class ConvolutionLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Convolution']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Convolution') - - -class PoolingLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Pooling']} - net['l0']['connection']['output'].append('l1') - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Pooling') - # Test 2 - net['l1']['params']['pool'] = 'AVE' - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Pooling') - # Test 3 - net['l1']['params']['pool'] = 'STOCHASTIC' - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Pooling') - - -class SPPLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['SPP']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'SPP') - - -class CropLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Crop']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Crop') - - -class DeconvolutionLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Deconvolution']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Deconvolution') - - -# ********** Recurrent Layers Test ********** -class RecurrentLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Recurrent']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Recurrent') - - -class RNNLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['RNN']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'RNN') - - -class LSTMLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['LSTM']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'LSTM') - - -# ********** Common Layers Test ********** -class InnerProductLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['InnerProduct']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'InnerProduct') - - -class DropoutLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Dropout']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Dropout') - - -class EmbedLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Embed']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Embed') - - -# ********** Normalisation Layers Test ********** -class LRNLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['LRN']} - net['l0']['connection']['output'].append('l1') - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'LRN') - # Test 2 - net['l1']['params']['norm_region'] = 'ACROSS_CHANNELS' - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'LRN') - - -class MVNLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['MVN']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'MVN') - - -class BatchNormLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['BatchNorm']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'BatchNorm') - - -# ********** Activation / Neuron Layers Test ********** -class ReLULayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['ReLU']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'ReLU') - - -class PReLULayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['PReLU']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'PReLU') - - -class ELULayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['ELU']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'ELU') - - -class SigmoidLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Sigmoid']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Sigmoid') - - -class TanHLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['TanH']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'TanH') - - -class AbsValLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['AbsVal']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'AbsVal') - - -class PowerLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Power']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Power') - - -class ExpLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Exp']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Exp') - - -class LogLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Log']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Log') - - -class BNLLLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['BNLL']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'BNLL') - - -class ThresholdLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Threshold']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Threshold') - - -class BiasLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Bias']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Bias') - - -class ScaleLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Scale']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Scale') - - -# ********** Utility Layers Test ********** -class FlattenLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Flatten']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Flatten') - - -class ReshapeLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Reshape']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Reshape') - - -class BatchReindexLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['BatchReindex']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'BatchReindex') - - -class SplitLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Split']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Split') - - -class ConcatLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Concat']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Concat') - - -class SliceLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Slice']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Slice') - - -class EltwiseLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Eltwise']} - net['l0']['connection']['output'].append('l1') - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Eltwise') - # Test 2 - net['l1']['params']['layer_type'] = 'Sum' - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Eltwise') - # Test 3 - net['l1']['params']['layer_type'] = 'Maximum' - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Eltwise') - # Test 4 - net['l1']['params']['layer_type'] = '' - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Eltwise') - - -class FilterLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Filter']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Filter') - - -# This layer is currently not supported as there is no bottom blob -'''class ParameterLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - data = L.Input(shape={'dim': [10, 3, 224, 224]}) - top = L.Parameter(data, shape={'dim': [10, 3, 224, 224]}) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - sample_file = open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'r') - response = self.client.post(reverse('caffe-import'), {'file': sample_file}) - response = json.loads(response.content) - with open('/home/utsav/Fabrik_Tests/ImageData.json', 'w') as outfile: - json.dump(response, outfile) - net = yaml.safe_load(json.dumps(response['net'])) - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'ImageData')''' - - -class ReductionLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Reduction']} - net['l0']['connection']['output'].append('l1') - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Reduction') - # Test 2 - net['l1']['params']['operation'] = 'SUM' - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Reduction') - # Test 3 - net['l1']['params']['operation'] = 'ASUM' - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Reduction') - # Test 4 - net['l1']['params']['operation'] = 'SUMSQ' - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Reduction') - - -class SilenceLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Silence']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Silence') - - -class ArgMaxLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['ArgMax']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'ArgMax') - - -class SoftmaxLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Softmax']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Softmax') - - -# ********** Loss Layers Test ********** -class MultinomialLogisticLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['MultinomialLogisticLoss']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'MultinomialLogisticLoss') - - -class InfogainLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['InfogainLoss']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'InfogainLoss') - - -class SoftmaxWithLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['SoftmaxWithLoss']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'SoftmaxWithLoss') - - -class EuclideanLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['EuclideanLoss']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'EuclideanLoss') - - -class HingeLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['HingeLoss']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'HingeLoss') - - -class SigmoidCrossEntropyLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['SigmoidCrossEntropyLoss']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'SigmoidCrossEntropyLoss') - - -class AccuracyLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Accuracy']} - net['l0']['connection']['output'].append('l1') - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Accuracy') - # Test 2 - net['l1']['info']['phase'] = 0 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Accuracy') - # Test 3 - net['l1']['info']['phase'] = 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Accuracy') - - -class ContrastiveLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['ContrastiveLoss']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'ContrastiveLoss') - - -# ********** Python Layer Test ********** -class PythonDataLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['PythonData']} - # Test 1 - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'Python') - # Test 2 - net['l0']['params']['endPoint'] = "1, 0" - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l0']['info']['type'], 'Python') - - -class PythonLossLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_json_to_prototxt(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['PythonLoss']} - net['l0']['connection']['output'].append('l1') - prototxt, input_dim = json_to_prototxt(net, response['net_name']) - self.assertGreater(len(prototxt), 9) - self.assertEqual(net['l1']['info']['type'], 'Python') - - -# ********** Shape Calculation Test ********** -class ShapeCalculationTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def caffe_test(self, path, key, success, layer=None): - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'r') as f: - response = self.client.post(reverse('caffe-import'), {'file': f}) - response = json.loads(response.content) - if success: - net = get_shapes(response['net']) - caffe_net = caffe.Net(os.path.join( - settings.BASE_DIR, 'media', 'test.prototxt'), caffe.TEST) - self.assertEqual( - list(caffe_net.blobs[key].data.shape[1:]), net['l0']['shape']['output']) - else: - try: - net = get_shapes(response['net']) - except: - message = 'Cannot determine shape of ' + layer + ' layer.' - self.assertEqual(str(sys.exc_info()[1]), message) - - def keras_test(self, filename): - with open(filename, 'r') as f: - response = self.client.post(reverse('keras-import'), {'file': f}) - response = json.loads(response.content) - net = get_shapes(response['net']) - with open(filename, 'r') as f: - model = model_from_json(json.dumps(json.load(f))) - for layer in model.layers: - self.assertEqual( - list(layer.output_shape[::-1][:-1]), net[layer.name]['shape']['output']) - - def test_shapes(self): - # Test 1 - image_path = os.path.join(settings.BASE_DIR, 'media', 'image_list.txt') - data, _ = L.ImageData(source=image_path, batch_size=32, ntop=2, rand_skip=0, - shuffle=False, new_height=256, new_width=256, is_color=True, - root_folder=os.path.join( - settings.BASE_DIR, 'ide/static/img/'), - transform_param=dict(crop_size=227), name='l0') - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data))) - self.caffe_test(os.path.join(settings.BASE_DIR, 'media', - 'test.prototxt'), 'ImageData1', True) - # Test 2 - image_path = os.path.join(settings.BASE_DIR, 'media', 'image_list.txt') - data, _ = L.ImageData(source=image_path, batch_size=32, ntop=2, rand_skip=0, - shuffle=False, new_height=256, new_width=256, is_color=True, - root_folder=os.path.join(settings.BASE_DIR, 'ide/static/img/'), name='l0') - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data))) - self.caffe_test(os.path.join(settings.BASE_DIR, 'media', - 'test.prototxt'), 'ImageData1', True) - # Test 3 - data, _ = L.MemoryData(batch_size=32, ntop=2, - channels=3, height=224, width=224) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data))) - self.caffe_test(os.path.join(settings.BASE_DIR, 'media', - 'test.prototxt'), 'MemoryData1', True) - # Test 4 - data, _ = L.HDF5Data(source='/dummy/source/', - batch_size=32, ntop=2, shuffle=False) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(data))) - self.caffe_test(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'HDF5Data1', False, - 'HDF5Data') - # Test 5 - top = L.Python(module='pascal_multilabel_datalayers', layer='PascalMultilabelDataLayerSync', - param_str="{\'pascal_root\': \'../data/pascal/VOC2007\', \'im_shape\': [227, 227], \ - \'split\': \'train\', \'batch_size\': 128}") - with open(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'w') as f: - f.write(str(to_proto(top))) - self.caffe_test(os.path.join(settings.BASE_DIR, 'media', 'test.prototxt'), 'HDF5Data1', False, - 'Python') - # Test 6 - self.keras_test(os.path.join(settings.BASE_DIR, - 'example/keras', 'shapeCheck1D.json')) - # Test 7 - self.keras_test(os.path.join(settings.BASE_DIR, - 'example/keras', 'shapeCheck2D.json')) - # Test 8 - self.keras_test(os.path.join(settings.BASE_DIR, - 'example/keras', 'shapeCheck3D.json')) -import json -import os -import unittest -import yaml - -from django.conf import settings -from django.core.urlresolvers import reverse -from django.test import Client -from keras.layers import Dense, Activation, Dropout, Flatten -from keras.layers import Reshape, Permute, RepeatVector -from keras.layers import ActivityRegularization, Masking -from keras.layers import Conv1D, Conv2D, Conv3D, Conv2DTranspose, \ - SeparableConv2D -from keras.layers import UpSampling1D, UpSampling2D, UpSampling3D -from keras.layers import GlobalMaxPooling1D, GlobalMaxPooling2D -from keras.layers import MaxPooling1D, MaxPooling2D, MaxPooling3D -from keras.layers import ZeroPadding1D, ZeroPadding2D, ZeroPadding3D -from keras.layers import LocallyConnected1D, LocallyConnected2D -from keras.layers import SimpleRNN, LSTM, GRU -from keras.layers import Embedding -from keras.layers import add, concatenate -from keras.layers.advanced_activations import LeakyReLU, PReLU, \ - ELU, ThresholdedReLU -from keras.layers import BatchNormalization -from keras.layers import GaussianNoise, GaussianDropout, AlphaDropout -from keras.layers import Input -from keras import regularizers -from keras.models import Model, Sequential -from keras import backend as K -from keras_app.views.layers_export import data, convolution, deconvolution, \ - pooling, dense, dropout, embed, recurrent, batch_norm, activation, \ - flatten, reshape, eltwise, concat, upsample, locally_connected, permute, \ - repeat_vector, regularization, masking, gaussian_noise, \ - gaussian_dropout, alpha_dropout -from ide.utils.shapes import get_shapes - - -class ImportJsonTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - # Test 1 - sample_file = open(os.path.join(settings.BASE_DIR, - 'example/keras', - 'vgg16.json'), 'r') - response = self.client.post(reverse('keras-import'), - {'file': sample_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - # Test 2 - sample_file = open(os.path.join(settings.BASE_DIR, - 'example/caffe', - 'GoogleNet.prototxt'), 'r') - response = self.client.post(reverse('keras-import'), - {'file': sample_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - self.assertEqual(response['error'], 'Invalid JSON') - - def test_keras_import_input(self): - # Test 1 - sample_file = open(os.path.join(settings.BASE_DIR, - 'example/keras', - 'vgg16.json'), 'r') - response = self.client.post(reverse('keras-import'), - {'config': sample_file.read()}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - # Test 2 - sample_file = open(os.path.join(settings.BASE_DIR, - 'example/caffe', - 'GoogleNet.prototxt'), 'r') - response = self.client.post(reverse('keras-import'), - {'config': sample_file.read()}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - self.assertEqual(response['error'], 'Invalid JSON') - - def test_keras_import_by_url(self): - url = 'https://github.com/Cloud-CV/Fabrik/blob/master/example/keras/resnet50.json' - # Test 1 - response = self.client.post(reverse('keras-import'), - {'url': url}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - # Test 2 - url = 'https://github.com/Cloud-CV/Fabrik/blob/master/some_typo_here' - response = self.client.post(reverse('keras-import'), - {'url': url}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - self.assertEqual(response['error'], - 'Invalid URL\nHTTP Error 404: Not Found') - - def test_keras_import_sample_id(self): - # Test 1 - response = self.client.post( - reverse('keras-import'), - {'sample_id': 'vgg16'}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - self.assertEqual(response['net_name'], 'vgg16') - self.assertTrue('net' in response) - # Test 2 - response = self.client.post( - reverse('keras-import'), - {'sample_id': 'shapeCheck4D'}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - self.assertEqual(response['error'], 'No JSON model file found') - - -class ExportJsonTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - # Test 1 - img_input = Input((224, 224, 3)) - model = Conv2D(64, (3, 3), padding='same', dilation_rate=1, use_bias=True, - kernel_regularizer=regularizers.l1(), bias_regularizer='l1', - activity_regularizer='l1', kernel_constraint='max_norm', - bias_constraint='max_norm')(img_input) - model = BatchNormalization(center=True, scale=True, beta_regularizer=regularizers.l2(0.01), - gamma_regularizer=regularizers.l2(0.01), - beta_constraint='max_norm', gamma_constraint='max_norm',)(model) - model = Model(img_input, model) - json_string = Model.to_json(model) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.json'), 'w') as out: - json.dump(json.loads(json_string), out, indent=4) - sample_file = open(os.path.join( - settings.BASE_DIR, 'media', 'test.json'), 'r') - response = self.client.post( - reverse('keras-import'), {'file': sample_file}) - response = json.loads(response.content) - net = get_shapes(response['net']) - response = self.client.post(reverse('keras-export'), {'net': json.dumps(net), - 'net_name': ''}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - # Test 2 - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'ide', - 'caffe_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['HDF5Data']} - # Currently we can't determine shape of HDF5Data Layer - response = self.client.post(reverse('keras-export'), {'net': json.dumps(net), - 'net_name': ''}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'error') - - -# *********** Keras Backend Test ********** -class KerasBackendTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_backend(self): - dim_order = K.image_dim_ordering() - backend = K.backend() - if(backend == 'tensorflow'): - self.assertEqual(dim_order, 'tf') - elif(backend == 'theano'): - self.assertNotEqual(dim_order, 'th') - self.assertEqual(dim_order, 'tf') - else: - self.fail('%s backend not supported' % backend) - - -# ********** Import json tests ********** -class HelperFunctions(): - def setUp(self): - self.client = Client() - - def keras_type_test(self, model, id, type): - json_string = Model.to_json(model) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.json'), 'w') as out: - json.dump(json.loads(json_string), out, indent=4) - sample_file = open(os.path.join( - settings.BASE_DIR, 'media', 'test.json'), 'r') - response = self.client.post( - reverse('keras-import'), {'file': sample_file}) - response = json.loads(response.content) - layerId = sorted(response['net'].keys()) - self.assertEqual(response['result'], 'success') - self.assertEqual(response['net'][layerId[id]]['info']['type'], type) - - def keras_param_test(self, model, id, params): - json_string = Model.to_json(model) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.json'), 'w') as out: - json.dump(json.loads(json_string), out, indent=4) - sample_file = open(os.path.join( - settings.BASE_DIR, 'media', 'test.json'), 'r') - response = self.client.post( - reverse('keras-import'), {'file': sample_file}) - response = json.loads(response.content) - layerId = sorted(response['net'].keys()) - self.assertEqual(response['result'], 'success') - self.assertGreaterEqual( - len(response['net'][layerId[id]]['params']), params) - - -# ********** Data Layers ********** -class InputImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Input((224, 224, 3)) - model = Model(model, model) - self.keras_param_test(model, 0, 1) - - -# ********** Core Layers ********** -class DenseImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(Dense(100, kernel_regularizer=regularizers.l2(0.01), bias_regularizer=regularizers.l2(0.01), - activity_regularizer=regularizers.l2(0.01), kernel_constraint='max_norm', - bias_constraint='max_norm', activation='relu', input_shape=(16,))) - model.build() - self.keras_param_test(model, 1, 3) - - -class ActivationImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - # softmax - model = Sequential() - model.add(Activation('softmax', input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'Softmax') - # relu - model = Sequential() - model.add(Activation('relu', input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'ReLU') - # tanh - model = Sequential() - model.add(Activation('tanh', input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'TanH') - # sigmoid - model = Sequential() - model.add(Activation('sigmoid', input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'Sigmoid') - # selu - model = Sequential() - model.add(Activation('selu', input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'SELU') - # softplus - model = Sequential() - model.add(Activation('softplus', input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'Softplus') - # softsign - model = Sequential() - model.add(Activation('softsign', input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'Softsign') - # hard_sigmoid - model = Sequential() - model.add(Activation('hard_sigmoid', input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'HardSigmoid') - # LeakyReLU - model = Sequential() - model.add(LeakyReLU(alpha=1, input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'ReLU') - # PReLU - model = Sequential() - model.add(PReLU(input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'PReLU') - # ELU - model = Sequential() - model.add(ELU(alpha=1, input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'ELU') - # ThresholdedReLU - model = Sequential() - model.add(ThresholdedReLU(theta=1, input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'ThresholdedReLU') - # Linear - model = Sequential() - model.add(Activation('linear', input_shape=(15,))) - model.build() - self.keras_type_test(model, 0, 'Linear') - - -class DropoutImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(Dropout(0.5, input_shape=(64, 10))) - model.build() - self.keras_type_test(model, 0, 'Dropout') - - -class FlattenImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(Flatten(input_shape=(64, 10))) - model.build() - self.keras_type_test(model, 0, 'Flatten') - - -class ReshapeImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(Reshape((5, 2), input_shape=(10,))) - model.build() - self.keras_type_test(model, 0, 'Reshape') - - -class PermuteImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(Permute((2, 1), input_shape=(64, 10))) - model.build() - self.keras_type_test(model, 0, 'Permute') - - -class RepeatVectorImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(RepeatVector(3, input_shape=(10,))) - model.build() - self.keras_type_test(model, 0, 'RepeatVector') - - -class ActivityRegularizationImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(ActivityRegularization(l1=2, input_shape=(10,))) - model.build() - self.keras_type_test(model, 0, 'Regularization') - - -class MaskingImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(Masking(mask_value=0., input_shape=(100, 5))) - model.build() - self.keras_type_test(model, 0, 'Masking') - - -# ********** Convolutional Layers ********** -class ConvolutionImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - # Conv 1D - model = Sequential() - model.add(Conv1D(32, 10, kernel_regularizer=regularizers.l2(0.01), - bias_regularizer=regularizers.l2(0.01), - activity_regularizer=regularizers.l2(0.01), kernel_constraint='max_norm', - bias_constraint='max_norm', activation='relu', input_shape=(10, 1))) - model.build() - self.keras_param_test(model, 1, 9) - # Conv 2D - model = Sequential() - model.add(Conv2D(32, (3, 3), kernel_regularizer=regularizers.l2(0.01), - bias_regularizer=regularizers.l2(0.01), - activity_regularizer=regularizers.l2(0.01), kernel_constraint='max_norm', - bias_constraint='max_norm', activation='relu', input_shape=(16, 16, 1))) - model.build() - self.keras_param_test(model, 1, 13) - # Conv 3D - model = Sequential() - model.add(Conv3D(32, (3, 3, 3), kernel_regularizer=regularizers.l2(0.01), - bias_regularizer=regularizers.l2(0.01), - activity_regularizer=regularizers.l2(0.01), kernel_constraint='max_norm', - bias_constraint='max_norm', activation='relu', input_shape=(16, 16, 16, 1))) - model.build() - self.keras_param_test(model, 1, 17) - - -class DepthwiseConvolutionImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(SeparableConv2D(32, 3, depthwise_regularizer=regularizers.l2(0.01), - pointwise_regularizer=regularizers.l2(0.01), - bias_regularizer=regularizers.l2(0.01), - activity_regularizer=regularizers.l2(0.01), depthwise_constraint='max_norm', - bias_constraint='max_norm', pointwise_constraint='max_norm', - activation='relu', input_shape=(16, 16, 1))) - self.keras_param_test(model, 1, 12) - - def test_keras_export(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/keras', - 'SeparableConvKerasTest.json'), 'r') - response = self.client.post( - reverse('keras-import'), {'file': model_file}) - response = json.loads(response.content) - net = get_shapes(response['net']) - response = self.client.post(reverse('keras-export'), {'net': json.dumps(net), - 'net_name': ''}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class LRNImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import_export(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/keras', - 'AlexNet.json'), 'r') - response = self.client.post( - reverse('keras-import'), {'file': model_file}) - response = json.loads(response.content) - net = get_shapes(response['net']) - response = self.client.post(reverse('keras-export'), {'net': json.dumps(net), - 'net_name': ''}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class DeconvolutionImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(Conv2DTranspose(32, (3, 3), kernel_regularizer=regularizers.l2(0.01), - bias_regularizer=regularizers.l2(0.01), - activity_regularizer=regularizers.l2(0.01), kernel_constraint='max_norm', - bias_constraint='max_norm', activation='relu', input_shape=(16, 16, 1))) - model.build() - self.keras_param_test(model, 1, 13) - - -class UpsampleImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - # Upsample 1D - model = Sequential() - model.add(UpSampling1D(size=2, input_shape=(16, 1))) - model.build() - self.keras_param_test(model, 0, 2) - # Upsample 2D - model = Sequential() - model.add(UpSampling2D(size=(2, 2), input_shape=(16, 16, 1))) - model.build() - self.keras_param_test(model, 0, 3) - # Upsample 3D - model = Sequential() - model.add(UpSampling3D(size=(2, 2, 2), input_shape=(16, 16, 16, 1))) - model.build() - self.keras_param_test(model, 0, 4) - - -# ********** Pooling Layers ********** -class PoolingImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - # Global Pooling 1D - model = Sequential() - model.add(GlobalMaxPooling1D(input_shape=(16, 1))) - model.build() - self.keras_param_test(model, 0, 5) - # Global Pooling 2D - model = Sequential() - model.add(GlobalMaxPooling2D(input_shape=(16, 16, 1))) - model.build() - self.keras_param_test(model, 0, 8) - # Pooling 1D - model = Sequential() - model.add(MaxPooling1D(pool_size=2, strides=2, - padding='same', input_shape=(16, 1))) - model.build() - self.keras_param_test(model, 0, 5) - # Pooling 2D - model = Sequential() - model.add(MaxPooling2D(pool_size=(2, 2), strides=( - 2, 2), padding='same', input_shape=(16, 16, 1))) - model.build() - self.keras_param_test(model, 0, 8) - # Pooling 3D - model = Sequential() - model.add(MaxPooling3D(pool_size=(2, 2, 2), strides=(2, 2, 2), padding='same', - input_shape=(16, 16, 16, 1))) - model.build() - self.keras_param_test(model, 0, 11) - - -# ********** Locally-connected Layers ********** -class LocallyConnectedImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - # Conv 1D - model = Sequential() - model.add(LocallyConnected1D(32, 3, kernel_regularizer=regularizers.l2(0.01), - bias_regularizer=regularizers.l2(0.01), - activity_regularizer=regularizers.l2(0.01), kernel_constraint='max_norm', - bias_constraint='max_norm', activation='relu', input_shape=(16, 10))) - model.build() - self.keras_param_test(model, 1, 12) - # Conv 2D - model = Sequential() - model.add(LocallyConnected2D(32, (3, 3), kernel_regularizer=regularizers.l2(0.01), - bias_regularizer=regularizers.l2(0.01), - activity_regularizer=regularizers.l2(0.01), kernel_constraint='max_norm', - bias_constraint='max_norm', activation='relu', input_shape=(16, 16, 10))) - model.build() - self.keras_param_test(model, 1, 14) - - -# ********** Recurrent Layers ********** -class RecurrentImportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(LSTM(64, return_sequences=True, input_shape=(10, 64))) - model.add(SimpleRNN(32, return_sequences=True)) - model.add(GRU(10, kernel_regularizer=regularizers.l2(0.01), - bias_regularizer=regularizers.l2(0.01), recurrent_regularizer=regularizers.l2(0.01), - activity_regularizer=regularizers.l2(0.01), kernel_constraint='max_norm', - bias_constraint='max_norm', recurrent_constraint='max_norm')) - model.build() - json_string = Model.to_json(model) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.json'), 'w') as out: - json.dump(json.loads(json_string), out, indent=4) - sample_file = open(os.path.join( - settings.BASE_DIR, 'media', 'test.json'), 'r') - response = self.client.post( - reverse('keras-import'), {'file': sample_file}) - response = json.loads(response.content) - layerId = sorted(response['net'].keys()) - self.assertEqual(response['result'], 'success') - self.assertGreaterEqual(len(response['net'][layerId[1]]['params']), 7) - self.assertGreaterEqual(len(response['net'][layerId[3]]['params']), 7) - self.assertGreaterEqual(len(response['net'][layerId[6]]['params']), 7) - - -# ********** Embedding Layers ********** -class EmbeddingImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(Embedding(1000, 64, input_length=10, embeddings_regularizer=regularizers.l2(0.01), - embeddings_constraint='max_norm')) - model.build() - self.keras_param_test(model, 0, 7) - - -# ********** Merge Layers ********** -class ConcatImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - img_input = Input((224, 224, 3)) - model = Conv2D(64, (3, 3), padding='same')(img_input) - model = concatenate([img_input, model]) - model = Model(img_input, model) - self.keras_type_test(model, 0, 'Concat') - - -class EltwiseImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - img_input = Input((224, 224, 64)) - model = Conv2D(64, (3, 3), padding='same')(img_input) - model = add([img_input, model]) - model = Model(img_input, model) - self.keras_type_test(model, 0, 'Eltwise') - - -# ********** Normalisation Layers ********** -class BatchNormImportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(BatchNormalization(center=True, scale=True, beta_regularizer=regularizers.l2(0.01), - gamma_regularizer=regularizers.l2(0.01), - beta_constraint='max_norm', gamma_constraint='max_norm', - input_shape=(16, 10))) - model.build() - json_string = Model.to_json(model) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.json'), 'w') as out: - json.dump(json.loads(json_string), out, indent=4) - sample_file = open(os.path.join( - settings.BASE_DIR, 'media', 'test.json'), 'r') - response = self.client.post( - reverse('keras-import'), {'file': sample_file}) - response = json.loads(response.content) - layerId = sorted(response['net'].keys()) - self.assertEqual(response['result'], 'success') - self.assertEqual(response['net'][layerId[0]]['info']['type'], 'Scale') - self.assertEqual(response['net'][layerId[1]] - ['info']['type'], 'BatchNorm') - - -# ********** Noise Layers ********** -class GaussianNoiseImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(GaussianNoise(stddev=0.1, input_shape=(16, 1))) - model.build() - self.keras_param_test(model, 0, 1) - - -class GaussianDropoutImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(GaussianDropout(rate=0.5, input_shape=(16, 1))) - model.build() - self.keras_param_test(model, 0, 1) - - -class AlphaDropoutImportTest(unittest.TestCase, HelperFunctions): - def setUp(self): - self.client = Client() - - def test_keras_import(self): - model = Sequential() - model.add(AlphaDropout(rate=0.5, seed=5, input_shape=(16, 1))) - model.build() - self.keras_param_test(model, 0, 1) - - -# ********** Utility Layers ********** -class PaddingImportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def pad_test(self, model, field, value): - json_string = Model.to_json(model) - with open(os.path.join(settings.BASE_DIR, 'media', 'test.json'), 'w') as out: - json.dump(json.loads(json_string), out, indent=4) - sample_file = open(os.path.join( - settings.BASE_DIR, 'media', 'test.json'), 'r') - response = self.client.post( - reverse('keras-import'), {'file': sample_file}) - response = json.loads(response.content) - layerId = sorted(response['net'].keys()) - self.assertEqual(response['result'], 'success') - self.assertEqual(response['net'][layerId[0]]['params'][field], value) - - def test_keras_import(self): - # Pad 1D - model = Sequential() - model.add(ZeroPadding1D(2, input_shape=(224, 3))) - model.add(Conv1D(32, 7, strides=2)) - model.build() - self.pad_test(model, 'pad_w', 2) - # Pad 2D - model = Sequential() - model.add(ZeroPadding2D(2, input_shape=(224, 224, 3))) - model.add(Conv2D(32, 7, strides=2)) - model.build() - self.pad_test(model, 'pad_w', 2) - # Pad 3D - model = Sequential() - model.add(ZeroPadding3D(2, input_shape=(224, 224, 224, 3))) - model.add(Conv3D(32, 7, strides=2)) - model.build() - self.pad_test(model, 'pad_w', 2) - - -# ********** Export json tests ********** - -# ********** Data Layers Test ********** -class InputExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input']} - net = data(net['l0'], '', 'l0') - model = Model(net['l0'], net['l0']) - self.assertEqual(model.layers[0].__class__.__name__, 'InputLayer') - - -# ********** Core Layers ********** -class DenseExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input2'], 'l1': net['InnerProduct']} - net['l0']['connection']['output'].append('l1') - # Test 1 - inp = data(net['l0'], '', 'l0')['l0'] - temp = dense(net['l1'], [inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[2].__class__.__name__, 'Dense') - # Test 2 - net['l1']['params']['weight_filler'] = 'glorot_normal' - net['l1']['params']['bias_filler'] = 'glorot_normal' - inp = data(net['l0'], '', 'l0')['l0'] - temp = dense(net['l1'], [inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[2].__class__.__name__, 'Dense') - - -class ReLUExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['ReLU']} - # Test 1 - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - temp = activation(net['l1'], [inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Activation') - # Test 2 - net['l1']['params']['negative_slope'] = 1 - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - temp = activation(net['l1'], [inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'LeakyReLU') - - -class PReLUExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['PReLU']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'PReLU') - - -class ELUExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['ELU']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'ELU') - - -class ThresholdedReLUExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['ThresholdedReLU']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'ThresholdedReLU') - - -class SigmoidExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Sigmoid']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Activation') - - -class TanHExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['TanH']} - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Activation') - - -class SoftmaxExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Softmax']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Activation') - - -class SELUExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['SELU']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Activation') - - -class SoftplusExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Softplus']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Activation') - - -class SoftsignExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Softsign']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Activation') - - -class HardSigmoidExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['HardSigmoid']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Activation') - - -class LinearActivationExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Linear']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = activation(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Activation') - - -class DropoutExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input3'], 'l1': net['Dropout']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = dropout(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Dropout') - - -class FlattenExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Flatten']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = flatten(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Flatten') - - -class ReshapeExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Reshape']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = reshape(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Reshape') - - -class PermuteExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input2'], 'l1': net['Permute']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = permute(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Permute') - - -class RepeatVectorExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input3'], 'l1': net['RepeatVector']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = repeat_vector(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'RepeatVector') - - -class RegularizationExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input3'], 'l1': net['Regularization']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = regularization(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual( - model.layers[1].__class__.__name__, 'ActivityRegularization') - - -class MaskingExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input2'], 'l1': net['Masking']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = masking(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Masking') - - -# ********** Vision Layers Test ********** -class ConvolutionExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Input2'], - 'l2': net['Input4'], 'l3': net['Convolution']} - # Conv 1D - net['l1']['connection']['output'].append('l3') - net['l3']['connection']['input'] = ['l1'] - net['l3']['params']['layer_type'] = '1D' - net['l3']['shape']['input'] = net['l1']['shape']['output'] - net['l3']['shape']['output'] = [128, 12] - inp = data(net['l1'], '', 'l1')['l1'] - temp = convolution(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual(model.layers[2].__class__.__name__, 'Conv1D') - # Conv 2D - net['l0']['connection']['output'].append('l0') - net['l3']['connection']['input'] = ['l0'] - net['l3']['params']['layer_type'] = '2D' - net['l3']['shape']['input'] = net['l0']['shape']['output'] - net['l3']['shape']['output'] = [128, 226, 226] - inp = data(net['l0'], '', 'l0')['l0'] - temp = convolution(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual(model.layers[2].__class__.__name__, 'Conv2D') - # Conv 3D - net['l2']['connection']['output'].append('l3') - net['l3']['connection']['input'] = ['l2'] - net['l3']['params']['layer_type'] = '3D' - net['l3']['shape']['input'] = net['l2']['shape']['output'] - net['l3']['shape']['output'] = [128, 226, 226, 18] - inp = data(net['l2'], '', 'l2')['l2'] - temp = convolution(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual(model.layers[2].__class__.__name__, 'Conv3D') - - -class DeconvolutionExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Deconvolution']} - net['l0']['connection']['output'].append('l1') - # Test 1 - inp = data(net['l0'], '', 'l0')['l0'] - temp = deconvolution(net['l1'], [inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[2].__class__.__name__, 'Conv2DTranspose') - # Test 2 - net['l1']['params']['weight_filler'] = 'xavier' - net['l1']['params']['bias_filler'] = 'xavier' - inp = data(net['l0'], '', 'l0')['l0'] - temp = deconvolution(net['l1'], [inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[2].__class__.__name__, 'Conv2DTranspose') - - -class UpsampleExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Input2'], - 'l2': net['Input4'], 'l3': net['Upsample']} - # Conv 1D - net['l1']['connection']['output'].append('l3') - net['l3']['connection']['input'] = ['l1'] - net['l3']['params']['layer_type'] = '1D' - inp = data(net['l1'], '', 'l1')['l1'] - temp = upsample(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual(model.layers[1].__class__.__name__, 'UpSampling1D') - # Conv 2D - net['l0']['connection']['output'].append('l0') - net['l3']['connection']['input'] = ['l0'] - net['l3']['params']['layer_type'] = '2D' - inp = data(net['l0'], '', 'l0')['l0'] - temp = upsample(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual(model.layers[1].__class__.__name__, 'UpSampling2D') - # Conv 3D - net['l2']['connection']['output'].append('l3') - net['l3']['connection']['input'] = ['l2'] - net['l3']['params']['layer_type'] = '3D' - inp = data(net['l2'], '', 'l2')['l2'] - temp = upsample(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual(model.layers[1].__class__.__name__, 'UpSampling3D') - - -class PoolingExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Input2'], - 'l2': net['Input4'], 'l3': net['Pooling']} - # Pool 1D - net['l1']['connection']['output'].append('l3') - net['l3']['connection']['input'] = ['l1'] - net['l3']['params']['layer_type'] = '1D' - net['l3']['shape']['input'] = net['l1']['shape']['output'] - net['l3']['shape']['output'] = [12, 12] - inp = data(net['l1'], '', 'l1')['l1'] - temp = pooling(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual(model.layers[2].__class__.__name__, 'MaxPooling1D') - # Pool 2D - net['l0']['connection']['output'].append('l0') - net['l3']['connection']['input'] = ['l0'] - net['l3']['params']['layer_type'] = '2D' - net['l3']['shape']['input'] = net['l0']['shape']['output'] - net['l3']['shape']['output'] = [3, 226, 226] - inp = data(net['l0'], '', 'l0')['l0'] - temp = pooling(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual(model.layers[2].__class__.__name__, 'MaxPooling2D') - # Pool 3D - net['l2']['connection']['output'].append('l3') - net['l3']['connection']['input'] = ['l2'] - net['l3']['params']['layer_type'] = '3D' - net['l3']['shape']['input'] = net['l2']['shape']['output'] - net['l3']['shape']['output'] = [3, 226, 226, 18] - inp = data(net['l2'], '', 'l2')['l2'] - temp = pooling(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual(model.layers[2].__class__.__name__, 'MaxPooling3D') - - -# ********** Locally-connected Layers ********** -class LocallyConnectedExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Input2'], - 'l3': net['LocallyConnected']} - # LocallyConnected 1D - net['l1']['connection']['output'].append('l3') - net['l3']['connection']['input'] = ['l1'] - net['l3']['params']['layer_type'] = '1D' - inp = data(net['l1'], '', 'l1')['l1'] - temp = locally_connected(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual( - model.layers[1].__class__.__name__, 'LocallyConnected1D') - # LocallyConnected 2D - net['l0']['connection']['output'].append('l0') - net['l0']['shape']['output'] = [3, 10, 10] - net['l3']['connection']['input'] = ['l0'] - net['l3']['params']['layer_type'] = '2D' - inp = data(net['l0'], '', 'l0')['l0'] - temp = locally_connected(net['l3'], [inp], 'l3') - model = Model(inp, temp['l3']) - self.assertEqual( - model.layers[1].__class__.__name__, 'LocallyConnected2D') - - -# ********** Recurrent Layers Test ********** -class RNNExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input2'], 'l1': net['RNN']} - net['l0']['connection']['output'].append('l1') - # # net = get_shapes(net) - inp = data(net['l0'], '', 'l0')['l0'] - net = recurrent(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'SimpleRNN') - - -class LSTMExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input2'], 'l1': net['LSTM']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = recurrent(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'LSTM') - - -class GRUExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input2'], 'l1': net['GRU']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = recurrent(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'GRU') - - -# ********** Embed Layer Test ********* -class EmbedExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input3'], 'l1': net['Embed']} - net['l0']['connection']['output'].append('l1') - # Test 1 - inp = data(net['l0'], '', 'l0')['l0'] - temp = embed(net['l1'], [inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Embedding') - # Test 2 - net['l1']['params']['input_length'] = None - net['l1']['params']['weight_filler'] = 'VarianceScaling' - inp = data(net['l0'], '', 'l0')['l0'] - temp = embed(net['l1'], [inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Embedding') - - -# ********** Merge Layers Test ********** -class EltwiseExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Eltwise']} - net['l0']['connection']['output'].append('l1') - # Test 1 - inp = data(net['l0'], '', 'l0')['l0'] - temp = eltwise(net['l1'], [inp, inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Multiply') - # Test 2 - net['l1']['params']['layer_type'] = 'Sum' - inp = data(net['l0'], '', 'l0')['l0'] - temp = eltwise(net['l1'], [inp, inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Add') - # Test 3 - net['l1']['params']['layer_type'] = 'Average' - inp = data(net['l0'], '', 'l0')['l0'] - temp = eltwise(net['l1'], [inp, inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Average') - # Test 4 - net['l1']['params']['layer_type'] = 'Dot' - inp = data(net['l0'], '', 'l0')['l0'] - temp = eltwise(net['l1'], [inp, inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Dot') - # Test 5 - net['l1']['params']['layer_type'] = 'Maximum' - inp = data(net['l0'], '', 'l0')['l0'] - temp = eltwise(net['l1'], [inp, inp], 'l1') - model = Model(inp, temp['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Maximum') - - -class ConcatExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['Concat']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = concat(net['l1'], [inp, inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'Concatenate') - - -# ********** Noise Layers Test ********** -class GaussianNoiseExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['GaussianNoise']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = gaussian_noise(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'GaussianNoise') - - -class GaussianDropoutExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['GaussianDropout']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = gaussian_dropout(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'GaussianDropout') - - -class AlphaDropoutExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['AlphaDropout']} - net['l0']['connection']['output'].append('l1') - inp = data(net['l0'], '', 'l0')['l0'] - net = alpha_dropout(net['l1'], [inp], 'l1') - model = Model(inp, net['l1']) - self.assertEqual(model.layers[1].__class__.__name__, 'AlphaDropout') - - -# ********** Normalisation Layers Test ********** -class BatchNormExportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_keras_export(self): - tests = open(os.path.join(settings.BASE_DIR, 'tests', 'unit', 'keras_app', - 'keras_export_test.json'), 'r') - response = json.load(tests) - tests.close() - net = yaml.safe_load(json.dumps(response['net'])) - net = {'l0': net['Input'], 'l1': net['BatchNorm'], 'l2': net['Scale']} - net['l0']['connection']['output'].append('l1') - # Test 1 - inp = data(net['l0'], '', 'l0')['l0'] - temp = batch_norm(net['l1'], [inp], 'l1', 'l2', net['l2']) - model = Model(inp, temp['l2']) - self.assertEqual( - model.layers[1].__class__.__name__, 'BatchNormalization') - # Test 2 - net['l2']['params']['filler'] = 'VarianceScaling' - net['l2']['params']['bias_filler'] = 'VarianceScaling' - inp = data(net['l0'], '', 'l0')['l0'] - temp = batch_norm(net['l1'], [inp], 'l1', 'l2', net['l2']) - model = Model(inp, temp['l2']) - self.assertEqual( - model.layers[1].__class__.__name__, 'BatchNormalization') - # Test 3 - inp = data(net['l0'], '', 'l0')['l0'] - temp = batch_norm(net['l1'], [inp], 'l1', 'l0', net['l0']) - model = Model(inp, temp['l1']) - self.assertEqual( - model.layers[1].__class__.__name__, 'BatchNormalization') -import json -import os -import unittest - -from django.conf import settings -from django.core.urlresolvers import reverse -from django.test import Client -from ide.utils.shapes import get_shapes - - -class UploadTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_tf_import(self): - sample_file = open(os.path.join(settings.BASE_DIR, 'example/tensorflow', 'GoogleNet.pbtxt'), - 'r') - response = self.client.post( - reverse('tf-import'), {'file': sample_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class ConvLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_tf_import(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/tensorflow', 'Conv3DCheck.pbtxt'), - 'r') - response = self.client.post(reverse('tf-import'), {'file': model_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class DeconvLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_tf_import(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/tensorflow', 'denoiseAutoEncoder.pbtxt'), - 'r') - response = self.client.post(reverse('tf-import'), {'file': model_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class PoolLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_tf_import(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/tensorflow', 'Pool3DCheck.pbtxt'), - 'r') - response = self.client.post(reverse('tf-import'), {'file': model_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class RepeatLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_tf_import(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/tensorflow', 'Conv2DRepeat.pbtxt'), - 'r') - response = self.client.post(reverse('tf-import'), {'file': model_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class StackLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_tf_import(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/tensorflow', 'FCStack.pbtxt'), - 'r') - response = self.client.post(reverse('tf-import'), {'file': model_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class DepthwiseConvLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_tf_import(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/tensorflow', 'DepthwiseConv.pbtxt'), - 'r') - response = self.client.post(reverse('tf-import'), {'file': model_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class BatchNormLayerTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_tf_import(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/tensorflow', 'BatchNorm.pbtxt'), - 'r') - response = self.client.post(reverse('tf-import'), {'file': model_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - -class LRNImportTest(unittest.TestCase): - def setUp(self): - self.client = Client() - - def test_tf_export(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/keras', - 'AlexNet.json'), 'r') - response = self.client.post( - reverse('keras-import'), {'file': model_file}) - response = json.loads(response.content) - net = get_shapes(response['net']) - response = self.client.post(reverse('tf-export'), {'net': json.dumps(net), - 'net_name': ''}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') - - def test_custom_lrn_tf_import(self): - model_file = open(os.path.join(settings.BASE_DIR, 'example/tensorflow', 'LRN.pbtxt'), - 'r') - response = self.client.post(reverse('tf-import'), {'file': model_file}) - response = json.loads(response.content) - self.assertEqual(response['result'], 'success') -from setuptools import setup, find_packages - -long_description = ''' -Give an input CSV file and a target field you want to predict to automl-gs, and get a trained high-performing machine learning or deep learning model plus native code pipelines allowing you to integrate that model into any prediction workflow. No black box: you can see *exactly* how the data is processed, how the model is constructed, and you can make tweaks as necessary. - -automl-gs is an AutoML tool which, unlike Microsoft's [NNI](https://github.com/Microsoft/nni), Uber's [Ludwig](https://github.com/uber/ludwig), and [TPOT](https://github.com/EpistasisLab/tpot), offers a *zero code/model definition interface* to getting an optimized model and data transformation pipeline in multiple popular ML/DL frameworks, with minimal Python dependencies (pandas + scikit-learn + your framework of choice). automl-gs is designed for citizen data scientists and engineers without a deep statistical background under the philosophy that you don't need to know any modern data preprocessing and machine learning engineering techniques to create a powerful prediction workflow. - -Nowadays, the cost of computing many different models and hyperparameters is much lower than the oppertunity cost of an data scientist's time. automl-gs is a Python 3 module designed to abstract away the common approaches to transforming tabular data, architecting machine learning/deep learning models, and performing random hyperparameter searches to identify the best-performing model. This allows data scientists and researchers to better utilize their time on model performance optimization. - -* Generates native Python code; no platform lock-in, and no need to use automl-gs after the model script is created. -* Train model configurations super-fast *for free* using a **TPU** in Google Colaboratory. -* Handles messy datasets that normally require manual intervention, such as datetime/categorical encoding and spaced/parathesized column names. -* Each part of the generated model pipeline is its own function w/ docstrings, making it much easier to integrate into production workflows. -* Extremely detailed metrics reporting for every trial stored in a tidy CSV, allowing you to identify and visualize model strengths and weaknesses. -* Correct serialization of data pipeline encoders on disk (i.e. no pickled Python objects!) -* Retrain the generated model on new data without making any code/pipeline changes. -* Quit the hyperparameter search at any time, as the results are saved after each trial. - -The models generated by automl-gs are intended to give a very strong *baseline* for solving a given problem; they're not the end-all-be-all that often accompanies the AutoML hype, but the resulting code is easily tweakable to improve from the baseline. -''' - - -setup( - name='automl_gs', - packages=['automl_gs'], # this must be the same as the name above - version='0.2.1', - description='Provide an input CSV and a target field to predict, ' \ - 'generate a model + code to run it.', - long_description=long_description, - long_description_content_type='text/markdown', - author='Max Woolf', - author_email='max@minimaxir.com', - url='https://github.com/minimaxir/automl-gs', - keywords=['deep learning', 'tensorflow', 'keras', 'automl', 'xgboost'], - classifiers=[], - license='MIT', - entry_points={ - 'console_scripts': ['automl_gs=automl_gs.automl_gs:cmd'], - }, - python_requires='>=3.5', - include_package_data=True, - install_requires=['pandas', 'scikit-learn', - 'autopep8', 'tqdm', 'jinja2>=2.8', 'pyyaml'] -) -from .automl_gs import automl_grid_search -from .automl_gs import cmd -import os -import pandas as pd -from jinja2 import Environment, PackageLoader -from tqdm import tqdm, tqdm_notebook -from datetime import datetime -import shutil -import uuid -import argparse -from .utils_automl import * - - -def automl_grid_search(csv_path, target_field, - target_metric=None, - framework='tensorflow', - model_name='automl', - context='standalone', - num_trials=100, - split=0.7, - num_epochs=20, - col_types={}, - gpu=False, - tpu_address=None): - """Parent function which performs the hyperparameter search. - - See the package README for parameter descriptions: - https://github.com/minimaxir/automl-gs - """ - - # Prepare environment and source data - env = Environment( - loader=PackageLoader('automl_gs', 'templates'), - trim_blocks=True, - lstrip_blocks=True - ) - - df = pd.read_csv(csv_path, nrows=100) - object_cols = [col for col, col_type in df.dtypes.iteritems() - if col_type == 'object'] - df[object_cols] = df[object_cols].apply(pd.to_datetime, errors='ignore') - - problem_type, target_metric, direction = get_problem_config( - df[target_field], framework, target_metric) - input_types = get_input_types(df, col_types, target_field) - hp_grid = build_hp_grid(framework, set( - input_types.values()), num_trials, problem_type) - fields = normalize_col_names(input_types) - - metrics_csv = open("automl_results.csv", 'w') - best_result = None - timeformat_utc = "{:%Y%m%d_%H%M%S}".format(datetime.utcnow()) - best_folder = "{}_{}_{}".format(model_name, framework, timeformat_utc) - train_folder = "{}_train".format(model_name) - cmd = build_subprocess_cmd(csv_path, train_folder) - - # https://stackoverflow.com/a/39662359 - try: - is_notebook = get_ipython().__class__.__name__ in ['ZMQInteractiveShell', - 'Shell'] - except: - is_notebook = False - - pbar_func = tqdm_notebook if is_notebook else tqdm - pbar = pbar_func(hp_grid, smoothing=0, unit='trial') - pbar_sub = pbar_func(total=num_epochs, leave=False, - smoothing=0, unit='epoch') - - for params in pbar: - - # Create destination folders for the model scripts + metadata - if not os.path.exists(train_folder): - os.mkdir(train_folder) - os.mkdir(os.path.join(train_folder, 'metadata')) - os.mkdir(os.path.join(train_folder, 'encoders')) - - # Generate model files according to the given hyperparameters. - render_model(params, model_name, - framework, env, problem_type, - target_metric, target_field, - train_folder, fields, split, num_epochs, gpu, tpu_address) - - # Execute model training using the generated files. - train_generated_model(cmd, num_epochs, train_folder, pbar_sub) - - # Load the training results from the generated CSV, - # and append to the metrics CSV. - results = pd.read_csv(os.path.join(train_folder, - "metadata", "results.csv")) - results = results.assign(**params) - results.insert(0, 'trial_id', uuid.uuid4()) - - results.to_csv("automl_results.csv", mode="a", index=False, - header=(best_result is None)) - - train_results = results.tail(1).to_dict('records')[0] - - # If the target metric improves, save the new hps/files, - # update the hyperparameters in console, - # and delete the previous best files. - - top_result = train_results[target_metric] - - if top_result is not None: - if best_result is None: # if first iteration - best_result = top_result - shutil.copytree(train_folder, best_folder) - print_progress_tqdm(params, train_results, - pbar, is_notebook, False) - else: - is_imp = top_result > best_result - is_imp = not is_imp if direction == 'min' else is_imp - if is_imp: - best_result = top_result - shutil.rmtree(best_folder) - shutil.copytree(train_folder, best_folder) - print_progress_tqdm(params, train_results, - pbar, is_notebook) - - # Clean up the generated file folder for the next trial. - shutil.rmtree(train_folder) - - metrics_csv.close() - pbar.close() - pbar_sub.close() - - -def cmd(): - """Function called when invoking from the terminal.""" - - parser = argparse.ArgumentParser( - description="Provide an input CSV and a target field to predict, generate a model + code to run it. (https://github.com/minimaxir/automl-gs)" - ) - - # Explicit arguments - parser.add_argument( - '--csv_path', help='Path to the CSV file (must be in the current directory) [Required]', nargs='?') - parser.add_argument( - '--target_field', help="Target field to predict [Required]", - nargs='?') - parser.add_argument( - '--target_metric', help='Target metric to optimize [Default: Automatically determined depending on problem type]', nargs='?', default=None) - parser.add_argument( - '--framework', help='Machine learning framework to use [Default: tensorflow]', nargs='?', default='tensorflow') - parser.add_argument( - '--model_name', help=" Name of the model (if you want to train models with different names) [Default: 'automl']", - nargs='?', default='automl') - parser.add_argument( - '--num_trials', help='Number of trials / different hyperameter combos to test. [Default: 100]', nargs='?', type=int, default=100) - parser.add_argument( - '--split', help="Train-val split when training the models [Default: 0.7]", - nargs='?', type=float, default=0.7) - parser.add_argument( - '--num_epochs', help='Number of epochs / passes through the data when training the models. [Default: 20]', type=int, default=20) - parser.add_argument( - '--gpu', help="For non-Tensorflow frameworks and Pascal-or-later GPUs, boolean to determine whether to use GPU-optimized training methods (TensorFlow can detect it automatically) [Default: False]", - nargs='?', type=bool, default=False) - parser.add_argument( - '--tpu_address', help="For TensorFlow, hardware address of the TPU on the system. [Default: None]", - nargs='?', default=None) - - # Positional arguments - parser.add_argument('csv_path', nargs='?') - parser.add_argument('target_field', nargs='?') - - args = parser.parse_args() - automl_grid_search(csv_path=args.csv_path, - target_field=args.target_field, - target_metric=args.target_metric, - framework=args.framework, - model_name=args.model_name, - num_trials=args.num_trials, - split=args.split, - num_epochs=args.num_epochs, - gpu=args.gpu, - tpu_address=args.tpu_address) -import re -import pandas as pd -import random -import yaml -import os -import shutil -from time import time -from pkg_resources import resource_filename -from tqdm import tqdm, tqdm_notebook -from tqdm._utils import _term_move_up -from subprocess import Popen, PIPE, DEVNULL, CalledProcessError -from autopep8 import fix_code -from collections import OrderedDict - - -def get_input_types(df, col_types, target_field): - """Get the input types for each field in the DataFrame that corresponds - to an input type to be fed into the model. - - Valid values are ['text', 'categorical', 'numeric', 'datetime', 'ignore'] - - # Arguments: - df: A pandas DataFrame. - col_types: A dict of explicitly defined {field_name: type} mappings. - target_field: string indicating the target field - - # Returns: - A dict of {field_name: type} mappings. - """ - - fields = df.columns - nrows = df.shape[0] - avg_spaces = -1 - - field_types = OrderedDict() - - for field in fields: - if field in col_types: - field_types[field] = col_types[field] - continue - field_type = df[field].dtype - num_unique_values = df[field].nunique() - if field_type == 'object': - avg_spaces = df[field].str.count(' ').mean() - - # Automatically ignore `id`-related fields - if field.lower() in ['id', 'uuid', 'guid', 'pk', 'name']: - field_types[field] = 'ignore' - - # Foreign key fields are always categorical - # else if "_id" in field or "_uuid" in field: - # field_types[field] = 'categorical' - - # Datetime is a straightforward data type. - elif field_type == 'datetime64[ns]': - field_types[field] = 'datetime' - - # Assume a float is always numeric. - elif field_type == 'float64': - field_types[field] = 'numeric' - - # If it's an object where the contents has - # many spaces on average, it's text - elif field_type == 'object' and avg_spaces >= 2.0: - field_types[field] = 'text' - - # If the field has very few distinct values, it's categorical - elif num_unique_values <= 10: - field_types[field] = 'categorical' - - # If the field has many distinct integers, assume numeric. - elif field_type == 'int64': - field_types[field] = 'numeric' - - # If the field has many distinct nonintegers, it's not helpful. - elif num_unique_values > 0.9 * nrows: - field_types[field] = 'ignore' - - # The rest (e.g. bool) is categorical - else: - field_types[field] = 'categorical' - - # Print to console for user-level debugging - print("Modeling with field specifications:") - print("\n".join(["{}: {}".format(k, v) - for k, v in field_types.items() if k != target_field])) - - field_types = {k: v for k, v in field_types.items() if v != 'ignore'} - - return field_types - - -def normalize_col_names(input_types): - """Fixes unusual column names (e.g. Caps, Spaces) - to make them suitable printing into code templates. - - # Arguments: - input_types: dict of col names: input types - - # Returns: - A dict of col names: input types with normalized keys - """ - - pattern = re.compile('\W+') - fields = [(re.sub(pattern, '_', field.lower()), field, field_type) - for field, field_type in input_types.items()] - - return fields - - -def build_hp_grid(framework, types, num_trials, - problem_type, - hp_path=resource_filename(__name__, "hyperparameters.yml")): - """Builds the hyperparameter grid for model grid search. - - # Arguments: - framework: string indicating the framework (e.g. `tensorflow`) - types: list of hyperparameter types to consider; exclude rest - num_trials: number of distinct trials to keep - problem_type: type of problem to solve - hp_path: filepath of hyperparameters - - # Returns - A list of dicts of hyperparameter specifications - """ - - with open(hp_path) as f: - hps = yaml.safe_load(f) - - # Refine hyperparameters by only using ones relevant to - # the data and framework of choice - hps = dict(hps['base'], **hps[framework]) - keys = [key for key in hps.keys() if (hps[key]['type'] in types - or hps[key]['type'] == 'base' - or hps[key]['type'] == problem_type)] - values = [hps[key]['hyperparams'] for key in keys] - - grid = set() - while len(grid) < num_trials: - grid.add(tuple([random.choice(x) for x in values])) - - grid_params = [dict(zip(keys, grid_hps)) for grid_hps in grid] - return grid_params - - -def print_progress_tqdm(hps, metrics, pbar, is_notebook, clear=True): - """Custom writer for tqdm which prints winning metrics - to console after each iteration. - - Uses a hack for tqdm.write(): https://github.com/tqdm/tqdm/issues/520 - - # Arguments: - hps: dict of hyperparameters - metrics: dict of hyperparameters+metrics - pbar: a tqdm progressbar - is_notebook: boolean if automl-gs is running in a Notebook. - clear: if writing should clear existing output - """ - - # hp_str = '\n'.join(['{}: {}'.format(k, v) for k, v in hps.items()]) - metrics_str = '\n'.join(['{}: {}'.format(k, v) for k, v in metrics.items() - if k not in hps.keys()]) - - # console_str = ("\nHyperparameters:\n" + hp_str + "\n" + - # "\nMetrics:\n" + metrics_str) - - console_str = "\nMetrics:\n" + metrics_str - - # Print to console, removing appropriate number of lines - move_up_char = '' if is_notebook else _term_move_up() - if clear: - pbar.write("".join([move_up_char] * (console_str.count('\n') + 2))) - - pbar.write(console_str) - - -def render_model(params, model_name, framework, env, problem_type, - target_metric, target_field, train_folder, fields, - split, num_epochs, gpu, tpu_address, - metrics_path=resource_filename(__name__, "metrics.yml")): - """Renders and saves the files (model.py, pipeline.py, requirements.txt) for the given hyperparameters. - """ - - files = ['model.py', 'pipeline.py', 'requirements.txt'] - - type_map = { - 'numeric': 'float64', - 'categorical': 'str', - 'datetime': 'str', - 'text': 'str' - } - - load_fields = {field[1]: type_map[field[2]] for field in fields} - text_fields = [field for field in fields if field[2] == 'text'] - nontarget_fields = [field for field in fields if field[1] != target_field] - target_field, target_field_raw = [ - (field[0], field[1]) for field in fields if field[1] == target_field][0] - has_text_input = 'text' in [field[2] for field in fields] - text_framework = 'tensorflow' if framework == 'tensorflow' else 'sklearn' - - with open(metrics_path) as f: - metrics = yaml.safe_load(f)[problem_type] - - for file in files: - script = env.get_template('scripts/' + file.replace('.py', '')).render( - params=params, - model_name=model_name, - framework=framework, - problem_type=problem_type, - target_metric=target_metric, - target_field=target_field, - fields=fields, - split=split, - num_epochs=num_epochs, - load_fields=load_fields, - text_fields=text_fields, - nontarget_fields=nontarget_fields, - target_field_raw=target_field_raw, - has_text_input=has_text_input, - metrics=metrics, - text_framework=text_framework, - gpu=gpu, - tpu_address=tpu_address) - - script = fix_code(script) - - with open(train_folder + "/" + file, 'w', encoding='utf8') as outfile: - outfile.write(script) - - -def get_problem_config(target_data, - framework, - target_metric, - metrics_path=resource_filename(__name__, "metrics.yml")): - """Gets the problem type, target metric, and metric direction, or infers - them from the data if not expicitly specified. - - # Arguments: - target_data: Data column to infer problem spec on. - framework: problem framework - target_metric: Target metric to optimize (overrides automatic selection) - metrics_path: location of the metrics file - - # Returns: - problem_type: One of 'regression', 'binary_classification' or - 'classification'. - target_metric: Target metric to optimize. - direction: Direction of the metric to optimize (either 'max' or 'min') - """ - - nrows = target_data.size - num_unique_values = target_data.nunique() - field_type = target_data.dtype - - # Problem Type - if num_unique_values == 2: - problem_type = 'binary_classification' - elif field_type == 'float64': - problem_type = 'regression' - else: - problem_type = 'classification' - - # Target Metric - if target_metric is not None: - pass - elif problem_type == 'regression': - target_metric = 'mse' - else: - target_metric = 'accuracy' - - # Direction - with open(metrics_path) as f: - metrics = yaml.safe_load(f) - - direction = metrics[target_metric]['objective'] - direction_text = 'minimizing' if direction == 'min' else 'maximizing' - - # Print config to console for user-level debugging. - print("Solving a {} problem, {} {} using {}.\n".format( - problem_type, direction_text, target_metric, framework)) - - return problem_type, target_metric, direction - - -def build_subprocess_cmd(csv_path, train_folder): - """Builds the command used to call a subprocess for model training. - - Other parameters like split and num_epochs are not passed - since they are the default in the generated code. - """ - - csv_path_join = os.path.join('..', csv_path) - - # Find the python executable - if shutil.which('python3') is not None: - pycmd = shutil.which('python3') - elif shutil.which('python'): - # fall back to regular python, which may be py3 - pycmd = shutil.which('python') - else: - # might be a better exception for this - raise Exception( - "error: unable to locate the python binary for the subprocess call") - - return [pycmd, "model.py", - "-d", csv_path_join, - "-m", "train", - "-c", "automl-gs"] - - -def train_generated_model(cmd, num_epochs, train_folder, pbar_sub): - """Trains a generated model script in a Python subprocess, - and maintains a progress bar of the subprocess training. - - Each subprocess must output a stdout flush + an - "EPOCH_END" string accordingly - - # Arguments: - cmd: A generate command - num_epochs: number of epochs - train_folder: subfolder where the training occurs. - pbar_sub: tqdm progress bar for the subprocess - """ - - p = Popen(cmd, cwd=train_folder, stdout=PIPE, bufsize=1, - universal_newlines=True) - - for line in iter(p.stdout.readline, ""): - if line == "EPOCH_END\n": - pbar_sub.update(1) - - if p.returncode is not None: - raise CalledProcessError(p.returncode, p.args) - - p.stdout.close() - - # Reset the subprogress bar without destroying it - # https://github.com/tqdm/tqdm/issues/545#issuecomment-471090550 - pbar_sub.n = 0 - pbar_sub.last_print_n = 0 - pbar_sub.start_t = time() - pbar_sub.last_print_t = time() - pbar_sub.refresh() -import os - -from moke_config import ConfigBase - - -def _project_dir(): - d = os.path.dirname - return d(d(d(os.path.abspath(__file__)))) - - -def _data_dir(): - return os.path.join(_project_dir(), "data") - - -class Config(ConfigBase): - def __init__(self): - self.type = "default" - self.opts = Options() - self.resource = ResourceConfig() - self.gui = GuiConfig() - self.nboard = NBoardConfig() - self.model = ModelConfig() - self.play = PlayConfig() - self.play_data = PlayDataConfig() - self.trainer = TrainerConfig() - self.eval = EvaluateConfig() - self.play_with_human = PlayWithHumanConfig() - - -class Options(ConfigBase): - new = False - - -class ResourceConfig(ConfigBase): - def __init__(self): - self.project_dir = os.environ.get("PROJECT_DIR", _project_dir()) - self.data_dir = os.environ.get("DATA_DIR", _data_dir()) - self.model_dir = os.environ.get( - "MODEL_DIR", os.path.join(self.data_dir, "model")) - self.model_best_config_path = os.path.join( - self.model_dir, "model_best_config.json") - self.model_best_weight_path = os.path.join( - self.model_dir, "model_best_weight.h5") - - self.next_generation_model_dir = os.path.join( - self.model_dir, "next_generation") - self.next_generation_model_dirname_tmpl = "model_%s" - self.next_generation_model_config_filename = "model_config.json" - self.next_generation_model_weight_filename = "model_weight.h5" - - self.play_data_dir = os.path.join(self.data_dir, "play_data") - self.play_data_filename_tmpl = "play_%s.json" - self.self_play_ggf_data_dir = os.path.join( - self.data_dir, "self_play-ggf") - self.ggf_filename_tmpl = "self_play-%s.ggf" - - self.log_dir = os.path.join(self.project_dir, "logs") - self.main_log_path = os.path.join(self.log_dir, "main.log") - self.tensorboard_log_dir = os.path.join(self.log_dir, 'tensorboard') - self.self_play_log_dir = os.path.join( - self.tensorboard_log_dir, "self_play") - self.force_learing_rate_file = os.path.join(self.data_dir, ".force-lr") - self.force_simulation_num_file = os.path.join( - self.data_dir, ".force-sim") - self.self_play_game_idx_file = os.path.join( - self.data_dir, ".self-play-game-idx") - - def create_directories(self): - dirs = [self.project_dir, self.data_dir, self.model_dir, self.play_data_dir, self.log_dir, - self.next_generation_model_dir, self.self_play_log_dir, self.self_play_ggf_data_dir] - for d in dirs: - if not os.path.exists(d): - os.makedirs(d) - - -class GuiConfig(ConfigBase): - def __init__(self): - self.window_size = (400, 440) - self.window_title = "reversi-alpha-zero" - - -class PlayWithHumanConfig(ConfigBase): - def __init__(self): - self.parallel_search_num = 8 - self.noise_eps = 0 - self.change_tau_turn = 0 - self.resign_threshold = None - self.use_newest_next_generation_model = True - - def update_play_config(self, pc): - """ - - :param PlayConfig pc: - :return: - """ - pc.noise_eps = self.noise_eps - pc.change_tau_turn = self.change_tau_turn - pc.parallel_search_num = self.parallel_search_num - pc.resign_threshold = self.resign_threshold - pc.use_newest_next_generation_model = self.use_newest_next_generation_model - - -class NBoardConfig(ConfigBase): - def __init__(self): - self.my_name = "RAZ" - self.read_stdin_timeout = 0.1 - self.simulation_num_per_depth_about = 20 - self.hint_callback_per_sim = 10 - - -class EvaluateConfig(ConfigBase): - def __init__(self): - self.game_num = 200 # 400 - self.replace_rate = 0.55 - self.play_config = PlayConfig() - self.play_config.simulation_num_per_move = 400 - self.play_config.thinking_loop = 1 - self.play_config.change_tau_turn = 0 - self.play_config.noise_eps = 0 - self.play_config.disable_resignation_rate = 0 - self.evaluate_latest_first = True - - -class PlayDataConfig(ConfigBase): - def __init__(self): - # Max Training Data Size = nb_game_in_file * max_file_num * 8 - self.multi_process_num = 16 - self.nb_game_in_file = 2 - self.max_file_num = 800 - self.save_policy_of_tau_1 = True - self.enable_ggf_data = True - self.nb_game_in_ggf_file = 100 - self.drop_draw_game_rate = 0 - - -class PlayConfig(ConfigBase): - def __init__(self): - self.simulation_num_per_move = 200 - self.share_mtcs_info_in_self_play = True - self.reset_mtcs_info_per_game = 1 - self.thinking_loop = 10 - self.required_visit_to_decide_action = 400 - self.start_rethinking_turn = 8 - self.c_puct = 1 - self.noise_eps = 0.25 - self.dirichlet_alpha = 0.5 - self.change_tau_turn = 4 - self.virtual_loss = 3 - self.prediction_queue_size = 16 - self.parallel_search_num = 8 - self.prediction_worker_sleep_sec = 0.0001 - self.wait_for_expanding_sleep_sec = 0.00001 - self.resign_threshold = -0.9 - self.allowed_resign_turn = 20 - self.disable_resignation_rate = 0.1 - self.false_positive_threshold = 0.05 - self.resign_threshold_delta = 0.01 - self.policy_decay_turn = 60 # not used - self.policy_decay_power = 3 - - # Using a solver is a kind of cheating! - self.use_solver_turn = 50 - self.use_solver_turn_in_simulation = 50 - - # - self.schedule_of_simulation_num_per_move = [ - (0, 8), - (300, 50), - (2000, 200), - ] - - # True means evaluating 'AlphaZero' method (disable 'eval' worker). - # Please change to False if you want to evaluate 'AlphaGo Zero' method. - self.use_newest_next_generation_model = True - - -class TrainerConfig(ConfigBase): - def __init__(self): - self.wait_after_save_model_ratio = 1 # wait after saving model - self.batch_size = 256 # 2048 - self.min_data_size_to_learn = 100000 - self.epoch_to_checkpoint = 1 - self.start_total_steps = 0 - self.save_model_steps = 200 - self.use_tensorboard = True - self.logging_per_steps = 100 - # control ratio of train:self data. - self.delete_self_play_after_number_of_training = 0 - self.lr_schedules = [ - (0, 0.01), - (150000, 0.001), - (300000, 0.0001), - ] - - -class ModelConfig(ConfigBase): - def __init__(self): - self.cnn_filter_num = 256 - self.cnn_filter_size = 3 - self.res_layer_num = 10 - self.l2_reg = 1e-4 - self.value_fc_size = 256 -import argparse - -from logging import getLogger - -import yaml -from moke_config import create_config - -from .lib.logger import setup_logger -from .config import Config - -logger = getLogger(__name__) - -CMD_LIST = ['self', 'opt', 'eval', 'play_gui', 'nboard'] - - -def create_parser(): - parser = argparse.ArgumentParser() - parser.add_argument("cmd", help="what to do", choices=CMD_LIST) - parser.add_argument("-c", help="specify config yaml", dest="config_file") - parser.add_argument( - "--new", help="run from new best model", action="store_true") - parser.add_argument("--type", help="deprecated. Please use -c instead") - parser.add_argument( - "--total-step", help="set TrainerConfig.start_total_steps", type=int) - return parser - - -def setup(config: Config, args): - config.opts.new = args.new - if args.total_step is not None: - config.trainer.start_total_steps = args.total_step - config.resource.create_directories() - setup_logger(config.resource.main_log_path) - - -def start(): - parser = create_parser() - args = parser.parse_args() - if args.type: - print("I'm very sorry. --type option was deprecated. Please use -c option instead!") - return 1 - - if args.config_file: - with open(args.config_file, "rt") as f: - config = create_config(Config, yaml.load(f)) - else: - config = create_config(Config) - setup(config, args) - - if args.cmd != "nboard": - logger.info(f"config type: {config.type}") - - if args.cmd == "self": - from .worker import self_play - return self_play.start(config) - elif args.cmd == 'opt': - from .worker import optimize - return optimize.start(config) - elif args.cmd == 'eval': - from .worker import evaluate - return evaluate.start(config) - elif args.cmd == 'play_gui': - from .play_game import gui - return gui.start(config) - elif args.cmd == 'nboard': - from .play_game import nboard - return nboard.start(config) - -import os -import sys -from dotenv import load_dotenv, find_dotenv - -if find_dotenv(): - load_dotenv(find_dotenv()) - -_PATH_ = os.path.dirname(os.path.dirname(__file__)) - -if _PATH_ not in sys.path: - sys.path.append(_PATH_) - - -if __name__ == "__main__": - from reversi_zero import manager - manager.start() -import timeit -from time import time - -import pyximport - -from reversi_zero.env.reversi_env import Player -from reversi_zero.lib.util import parse_to_bitboards - -pyximport.install() - - -def examples(): - ret = [ - ''' - ########## - #OO # - #XOO # - #OXOOO # - # XOX # - # XXX # - # X # - # X # - # # - ########## - ''', - ''' - ########## - #OOOOOXO # - #OOOOOXOO# - #OOOOOXOO# - #OXOXOXOO# - #OOXOXOXO# - #OOOOOOOO# - #XXXO O# - # # - ########## - ''', - ''' - ########## - #OOXXXXX # - #XOXXXXXX# - #XXXXXXXX# - #XOOXXXXX# - #OXXXOOOX# - #OXXOOOOX# - #OXXXOOOX# - # OOOOOOO# - ########## - '''] - return ret - - -def test_find_correct_move(): - import spike.bitboard_cython as f - import reversi_zero.lib.bitboard as b - - for ex in examples(): - black, white = parse_to_bitboards(ex) - assert f.find_correct_moves( - black, white) == b.find_correct_moves(black, white) - cy = timeit.timeit("f.find_correct_moves(black, white)", - globals=locals(), number=10000) - py = timeit.timeit("b.find_correct_moves(black, white)", - globals=locals(), number=10000) - print(f"Cython={cy} : cPython={py}") - - -def test_calc_flip(): - import spike.bitboard_cython as f - import reversi_zero.lib.bitboard as b - - for ex in examples(): - black, white = parse_to_bitboards(ex) - assert f.find_correct_moves( - black, white) == b.find_correct_moves(black, white) - legal_moves = f.find_correct_moves(black, white) - action_list = [idx for idx in range(64) if legal_moves & (1 << idx)] - - for action in action_list: - assert f.calc_flip(action, black, white) == b.calc_flip( - action, black, white) - cy = timeit.timeit( - "f.calc_flip(action, black, white)", globals=locals(), number=10000) - py = timeit.timeit( - "b.calc_flip(action, black, white)", globals=locals(), number=10000) - print(f"Cython={cy} : cPython={py}") - - -def test_solve(): - def q1(): - import reversi_zero.lib.reversi_solver as p - import spike.reversi_solver_cython as c - board = ''' - ########## - #XXXX # - #XOXX # - #XOXXOOOO# - #XOXOXOOO# - #XOXXOXOO# - #OOOOXOXO# - # OOOOOOO# - # XXXXXO# - ##########''' - b, w = parse_to_bitboards(board) - print("correct is (57, +2)") - - start_time = time() - ret = p.ReversiSolver().solve(b, w, next_player=Player.white, exactly=False) - print(f"{time()-start_time} sec: ret={ret}") - - start_time = time() - ret = c.ReversiSolver().solve(b, w, next_player=2, exactly=False) - print(f"{time()-start_time} sec: ret={ret}") - - # rr = p.ReversiSolver() - # print(rr.solve(b, w, Player.white, exactly=False)) - # print(len(rr.cache)) - - def q2(): - import reversi_zero.lib.reversi_solver as p - import spike.reversi_solver_cython as c - board = ''' - ########## - #XXXX # - #XXXX X # - #XXXXXXOO# - #XXXXXXOO# - #XXXXOXOO# - #OXOOXOXO# - # OOOOOOO# - #OOOOOOOO# - ##########''' - b, w = parse_to_bitboards(board) - - start_time = time() - ret = p.ReversiSolver().solve(b, w, next_player=Player.black, exactly=True) - print(f"{time()-start_time} sec: ret={ret}") - - start_time = time() - ret = c.ReversiSolver().solve(b, w, next_player=1, exactly=True) - print(f"{time()-start_time} sec: ret={ret}") - - def q3(): - import reversi_zero.lib.reversi_solver as p - import spike.reversi_solver_cython as c - board = ''' - ########## - #XXXXOOOX# - #XXXX XOX# - #XXXXXXOO# - #XXXXXOO # - #XXXXOXOO# - #OXOOXOXO# - # OOOOOOO# - #OOOOOOOO# - ##########''' - b, w = parse_to_bitboards(board) - start_time = time() - print(p.ReversiSolver().solve( - b, w, next_player=Player.black, exactly=True)) - ret = c.ReversiSolver().solve(b, w, next_player=1, exactly=True) - print(f"{time()-start_time} sec: ret={ret}") - - def q4(): - import reversi_zero.lib.reversi_solver as p - import spike.reversi_solver_cython as c - board = ''' - ########## - # X XXXO# - #O XXXXXX# - #OOXOOOXX# - #OOOOOOXO# - #OOOOXOOO# - #OOOOXOOX# - # XXXOO # - # OOO # - ##########''' - b, w = parse_to_bitboards(board) - start_time = time() - ret = p.ReversiSolver().solve(b, w, next_player=Player.black, exactly=True) - print(f"{time()-start_time} sec: ret={ret}") - - start_time = time() - ret = c.ReversiSolver().solve(b, w, next_player=1, exactly=True) - print(f"{time()-start_time} sec: ret={ret}") - - q4() - - -def test_bitcount(): - import spike.bitboard_cython as c - import reversi_zero.lib.bitboard as p - - x = 4242342758 - assert p.bit_count(x) == c.bc_timeit(x) - print(timeit.timeit("p.bit_count(x)", number=100000, globals=locals())) - print(timeit.timeit("c.bc_timeit(x)", number=1, globals=locals())) - - -if __name__ == '__main__': - # print("find_correct_moves") - # test_find_correct_move() - # print("calc_flip") - # test_calc_flip() - test_solve() - # test_bitcount() -from nose.tools.trivial import eq_, ok_ - -import numpy as np - - -from reversi_zero.config import Config -from reversi_zero.agent.player import ReversiPlayer -from reversi_zero.lib.bitboard import bit_count - - -def test_add_data_to_move_buffer_with_8_symmetries(): - config = Config() - player = ReversiPlayer(config, None) - - """ - board: p=0.2, q=0.8, O=own, X=enemy - 01234567 - x - 0O q - 1 O - 2 - 3 - 4 - 5 - 6 X - 7p X - | - y - """ - - own = stone_bit(0, 0) | stone_bit(1, 1) - enemy = stone_bit(7, 6) | stone_bit(7, 7) - policy = np.zeros((64, )) - policy[idx(7, 0)] = 0.8 - policy[idx(0, 7)] = 0.2 - player.add_data_to_move_buffer_with_8_symmetries(own, enemy, policy) - - # no transform - (o, e), p = player.moves[0] # own, enemy, policy - eq_((bit_count(o), bit_count(e)), (2, 2)) - ok_(check_bit(o, 0, 0)) - ok_(check_bit(o, 1, 1)) - ok_(check_bit(e, 7, 6)) - ok_(check_bit(e, 7, 7)) - eq_(p[idx(7, 0)], 0.8) - eq_(p[idx(0, 7)], 0.2) - - # rotate right - (o, e), p = player.moves[1] # own, enemy, policy - eq_((bit_count(o), bit_count(e)), (2, 2)) - ok_(check_bit(o, 7, 0)) - ok_(check_bit(o, 6, 1)) - ok_(check_bit(e, 0, 7)) - ok_(check_bit(e, 1, 7)) - eq_(p[idx(7, 7)], 0.8) - eq_(p[idx(0, 0)], 0.2) - - # rotate right twice - (o, e), p = player.moves[2] # own, enemy, policy - eq_((bit_count(o), bit_count(e)), (2, 2)) - ok_(check_bit(o, 7, 7)) - ok_(check_bit(o, 6, 6)) - ok_(check_bit(e, 0, 0)) - ok_(check_bit(e, 0, 1)) - eq_(p[idx(0, 7)], 0.8) - eq_(p[idx(7, 0)], 0.2) - - # flip vertical -> rotate right - (o, e), p = player.moves[5] # own, enemy, policy - eq_((bit_count(o), bit_count(e)), (2, 2)) - ok_(check_bit(o, 0, 0)) - ok_(check_bit(o, 1, 1)) - ok_(check_bit(e, 6, 7)) - ok_(check_bit(e, 7, 7)) - eq_(p[idx(0, 7)], 0.8) - eq_(p[idx(7, 0)], 0.2) - - -def idx(x, y): - return y*8 + x - - -def stone_bit(x, y): - return 1 << idx(x, y) - - -def check_bit(bb, x, y): - return bb & stone_bit(x, y) != 0 -import numpy as np - -from nose.tools import assert_almost_equal -from nose.tools.trivial import ok_, eq_ - -from reversi_zero.lib.bitboard import find_correct_moves, board_to_string, bit_count, dirichlet_noise_of_mask, \ - bit_to_array -from reversi_zero.lib.util import parse_to_bitboards - - -def test_find_correct_moves_1(): - ex = ''' -########## -#OO # -#XOO # -#OXOOO # -# XOX # -# XXX # -# X # -# X # -# # -##########''' - - expect = ''' -########## -#OO # -#XOO # -#OXOOO # -#**XOX* # -# **XXX # -# X**** # -# X # -# # -########## -''' - _flip_test(ex, expect) - - -def _flip_test(ex, expect, player_black=True): - b, w = parse_to_bitboards(ex) - moves = find_correct_moves( - b, w) if player_black else find_correct_moves(w, b) - res = board_to_string(b, w, extra=moves) - eq_(res.strip(), expect.strip(), f"\n{res}----{expect}") - - -def test_find_correct_moves_2(): - ex = ''' -########## -#OOOOOXO # -#OOOOOXOO# -#OOOOOXOO# -#OXOXOXOO# -#OOXOXOXO# -#OOOOOOOO# -#XXXO O# -# # -##########''' - - expect = ''' -########## -#OOOOOXO*# -#OOOOOXOO# -#OOOOOXOO# -#OXOXOXOO# -#OOXOXOXO# -#OOOOOOOO# -#XXXO***O# -# * # -##########''' - - _flip_test(ex, expect, player_black=False) - - -def test_find_correct_moves_3(): - ex = ''' -########## -#OOXXXXX # -#XOXXXXXX# -#XXXXXXXX# -#XOOXXXXX# -#OXXXOOOX# -#OXXOOOOX# -#OXXXOOOX# -# OOOOOOO# -##########''' - - expect1 = ''' -########## -#OOXXXXX # -#XOXXXXXX# -#XXXXXXXX# -#XOOXXXXX# -#OXXXOOOX# -#OXXOOOOX# -#OXXXOOOX# -#*OOOOOOO# -##########''' - - expect2 = ''' -########## -#OOXXXXX*# -#XOXXXXXX# -#XXXXXXXX# -#XOOXXXXX# -#OXXXOOOX# -#OXXOOOOX# -#OXXXOOOX# -# OOOOOOO# -##########''' - - _flip_test(ex, expect1, player_black=False) - _flip_test(ex, expect2, player_black=True) - - -def test_dirichlet_noise_of_mask(): - legal_moves = 47289423 - bc = bit_count(legal_moves) - noise = dirichlet_noise_of_mask(legal_moves, 0.5) - assert_almost_equal(1, np.sum(noise)) - eq_(bc, np.sum(noise > 0)) - ary = bit_to_array(legal_moves, 64) - eq_(list(noise), list(noise * ary)) -import numpy as np - -from nose.tools.trivial import eq_ - -from reversi_zero.lib.bitboard import board_to_string -from reversi_zero.lib.ggf import parse_ggf, convert_move_to_action, convert_action_to_move -from reversi_zero.lib.util import parse_ggf_board_to_bitboard - -GGF_STR = '(;GM[Othello]PC[NBoard]DT[2014-02-21 20:52:27 GMT]PB[./mEdax]PW[chris]RE[?]TI[15:00]TY[8]' \ - 'BO[8 --*O-----------------------O*------*O--------------------------- *]' \ - 'B[F5]W[F6]B[D3]W[C5]B[E6]W[F7]B[E7]W[F4];)' - - -def test_parse_ggf(): - ggf = parse_ggf(GGF_STR) - eq_("8", ggf.BO.board_type) - eq_(64, len(ggf.BO.square_cont)) - eq_("*", ggf.BO.color) - eq_(8, len(ggf.MOVES)) - eq_("B", ggf.MOVES[0].color) - eq_("F5", ggf.MOVES[0].pos) - eq_("W", ggf.MOVES[1].color) - eq_("F6", ggf.MOVES[1].pos) - - -def test_parse_ggf_board_to_bitboard(): - ggf = parse_ggf(GGF_STR) - black, white = parse_ggf_board_to_bitboard(ggf.BO.square_cont) - eq_(EXPECTED1.strip(), board_to_string(black, white).strip()) - - -def test_convert_move_to_action(): - eq_(0, convert_move_to_action("A1")) - eq_(63, convert_move_to_action("H8")) - eq_(44, convert_move_to_action("F5")) - eq_(None, convert_move_to_action("PA")) - - -def test_convert_action_to_move(): - eq_("A1", convert_action_to_move(0)) - eq_("H8", convert_action_to_move(63)) - eq_("F5", convert_action_to_move(44)) - eq_("PA", convert_action_to_move(None)) - - -EXPECTED1 = ''' -########## -# OX # -# # -# # -# XO # -# OX # -# # -# # -# # -########## -''' -from nose.tools.trivial import eq_ - -from reversi_zero.lib import util -from reversi_zero.lib.bitboard import board_to_string - - -def test_parse_to_bitboards_init(): - ex = ''' - ########## - # # - # # - # # - # OX # - # XO # - # # - # # - # # - ########## - ''' - - black, white = util.parse_to_bitboards(ex) - eq_(black, 0b00001000 << 24 | 0b00010000 << 32, - f"{ex}\n-------\n{board_to_string(black, white)}") - eq_(white, 0b00010000 << 24 | 0b00001000 << 32, - f"{ex}\n-------\n{board_to_string(black, white)}") - - -def test_parse_to_bitboards(): - ex = ''' -########## -#OO # -#XOO # -#OXOOO # -# XOX # -# XXX # -# X # -# X # -# X# -##########''' - - black, white = util.parse_to_bitboards(ex) - eq_(ex.strip(), board_to_string(black, white).strip(), - f"{ex}\n-------\n{board_to_string(black, white)}") -from nose.tools import eq_ - -from reversi_zero.config import Config -from reversi_zero.worker.optimize import OptimizeWorker - - -def test_decide_learning_rate(): - config = Config() - optimizer = OptimizeWorker(config) - - config.trainer.lr_schedules = [ - (0, 0.02), - (100000, 0.002), - (200000, 0.0002), - ] - - eq_(0.02, optimizer.decide_learning_rate(100)) - eq_(0.02, optimizer.decide_learning_rate(99999)) - eq_(0.002, optimizer.decide_learning_rate(100001)) - eq_(0.002, optimizer.decide_learning_rate(199999)) - eq_(0.0002, optimizer.decide_learning_rate(200001)) -import numpy as np - -from multiprocessing import Pipe, connection -from threading import Thread -from time import time - -from logging import getLogger - -from reversi_zero.agent.model import ReversiModel -from reversi_zero.config import Config - -from reversi_zero.lib.model_helpler import reload_newest_next_generation_model_if_changed, load_best_model_weight, \ - save_as_best_model, reload_best_model_weight_if_changed -import tensorflow as tf - - -logger = getLogger(__name__) - - -class ReversiModelAPI: - def __init__(self, config: Config, agent_model): - """ - - :param config: - :param reversi_zero.agent.model.ReversiModel agent_model: - """ - self.config = config - self.agent_model = agent_model - - def predict(self, x): - assert x.ndim in (3, 4) - assert x.shape == (2, 8, 8) or x.shape[1:] == (2, 8, 8) - orig_x = x - if x.ndim == 3: - x = x.reshape(1, 2, 8, 8) - - policy, value = self._do_predict(x) - - if orig_x.ndim == 3: - return policy[0], value[0] - else: - return policy, value - - def _do_predict(self, x): - return self.agent_model.model.predict_on_batch(x) - - -class MultiProcessReversiModelAPIServer: - # https://github.com/Akababa/Chess-Zero/blob/nohistory/src/chess_zero/agent/api_chess.py - - def __init__(self, config: Config): - """ - - :param config: - """ - self.config = config - self.model = None # type: ReversiModel - self.connections = [] - - def get_api_client(self): - me, you = Pipe() - self.connections.append(me) - return MultiProcessReversiModelAPIClient(self.config, None, you) - - def start_serve(self): - self.model = self.load_model() - # threading workaround: https://github.com/keras-team/keras/issues/5640 - self.model.model._make_predict_function() - self.graph = tf.get_default_graph() - - prediction_worker = Thread( - target=self.prediction_worker, name="prediction_worker") - prediction_worker.daemon = True - prediction_worker.start() - - def prediction_worker(self): - logger.debug("prediction_worker started") - average_prediction_size = [] - last_model_check_time = time() - while True: - if last_model_check_time+60 < time(): - self.try_reload_model() - last_model_check_time = time() - logger.debug( - f"average_prediction_size={np.average(average_prediction_size)}") - average_prediction_size = [] - ready_conns = connection.wait( - self.connections, timeout=0.001) # type: list[Connection] - if not ready_conns: - continue - data = [] - size_list = [] - for conn in ready_conns: - x = conn.recv() - data.append(x) # shape: (k, 2, 8, 8) - size_list.append(x.shape[0]) # save k - average_prediction_size.append(np.sum(size_list)) - array = np.concatenate(data, axis=0) - policy_ary, value_ary = self.model.model.predict_on_batch(array) - idx = 0 - for conn, s in zip(ready_conns, size_list): - conn.send((policy_ary[idx:idx+s], value_ary[idx:idx+s])) - idx += s - - def load_model(self): - from reversi_zero.agent.model import ReversiModel - model = ReversiModel(self.config) - loaded = False - if not self.config.opts.new: - if self.config.play.use_newest_next_generation_model: - loaded = reload_newest_next_generation_model_if_changed( - model) or load_best_model_weight(model) - else: - loaded = load_best_model_weight( - model) or reload_newest_next_generation_model_if_changed(model) - - if not loaded: - model.build() - save_as_best_model(model) - return model - - def try_reload_model(self): - try: - logger.debug("check model") - if self.config.play.use_newest_next_generation_model: - reload_newest_next_generation_model_if_changed( - self.model, clear_session=True) - else: - reload_best_model_weight_if_changed( - self.model, clear_session=True) - except Exception as e: - logger.error(e) - - -class MultiProcessReversiModelAPIClient(ReversiModelAPI): - def __init__(self, config: Config, agent_model, conn): - """ - - :param config: - :param reversi_zero.agent.model.ReversiModel agent_model: - :param Connection conn: - """ - super().__init__(config, agent_model) - self.connection = conn - - def _do_predict(self, x): - self.connection.send(x) - return self.connection.recv() -import hashlib -import json -import os -from logging import getLogger -# noinspection PyPep8Naming -import keras.backend as K - -from keras.engine.topology import Input -from keras.engine.training import Model -from keras.layers.convolutional import Conv2D -from keras.layers.core import Activation, Dense, Flatten -from keras.layers.merge import Add -from keras.layers.normalization import BatchNormalization -from keras.losses import mean_squared_error -from keras.regularizers import l2 - -from reversi_zero.config import Config - -logger = getLogger(__name__) - - -class ReversiModel: - def __init__(self, config: Config): - self.config = config - self.model = None # type: Model - self.digest = None - - def build(self): - mc = self.config.model - in_x = x = Input((2, 8, 8)) # [own(8x8), enemy(8x8)] - - # (batch, channels, height, width) - x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", - data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(x) - x = BatchNormalization(axis=1)(x) - x = Activation("relu")(x) - - for _ in range(mc.res_layer_num): - x = self._build_residual_block(x) - - res_out = x - # for policy output - x = Conv2D(filters=2, kernel_size=1, data_format="channels_first", - kernel_regularizer=l2(mc.l2_reg))(res_out) - x = BatchNormalization(axis=1)(x) - x = Activation("relu")(x) - x = Flatten()(x) - # no output for 'pass' - policy_out = Dense(8*8, kernel_regularizer=l2(mc.l2_reg), - activation="softmax", name="policy_out")(x) - - # for value output - x = Conv2D(filters=1, kernel_size=1, data_format="channels_first", - kernel_regularizer=l2(mc.l2_reg))(res_out) - x = BatchNormalization(axis=1)(x) - x = Activation("relu")(x) - x = Flatten()(x) - x = Dense(mc.value_fc_size, kernel_regularizer=l2( - mc.l2_reg), activation="relu")(x) - value_out = Dense(1, kernel_regularizer=l2(mc.l2_reg), - activation="tanh", name="value_out")(x) - - self.model = Model(in_x, [policy_out, value_out], name="reversi_model") - - def _build_residual_block(self, x): - mc = self.config.model - in_x = x - x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", - data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(x) - x = BatchNormalization(axis=1)(x) - x = Activation("relu")(x) - x = Conv2D(filters=mc.cnn_filter_num, kernel_size=mc.cnn_filter_size, padding="same", - data_format="channels_first", kernel_regularizer=l2(mc.l2_reg))(x) - x = BatchNormalization(axis=1)(x) - x = Add()([in_x, x]) - x = Activation("relu")(x) - return x - - @staticmethod - def fetch_digest(weight_path): - if os.path.exists(weight_path): - m = hashlib.sha256() - with open(weight_path, "rb") as f: - m.update(f.read()) - return m.hexdigest() - - def load(self, config_path, weight_path): - if os.path.exists(config_path) and os.path.exists(weight_path): - logger.debug(f"loading model from {config_path}") - with open(config_path, "rt") as f: - self.model = Model.from_config(json.load(f)) - self.model.load_weights(weight_path) - self.digest = self.fetch_digest(weight_path) - logger.debug(f"loaded model digest = {self.digest}") - return True - else: - logger.debug( - f"model files does not exist at {config_path} and {weight_path}") - return False - - def save(self, config_path, weight_path): - logger.debug(f"save model to {config_path}") - with open(config_path, "wt") as f: - json.dump(self.model.get_config(), f) - self.model.save_weights(weight_path) - self.digest = self.fetch_digest(weight_path) - logger.debug(f"saved model digest {self.digest}") - - -def objective_function_for_policy(y_true, y_pred): - # can use categorical_crossentropy?? - return K.sum(-y_true * K.log(y_pred + K.epsilon()), axis=-1) - - -def objective_function_for_value(y_true, y_pred): - return mean_squared_error(y_true, y_pred) -from _asyncio import Future -from asyncio.queues import Queue -from collections import defaultdict, namedtuple -from logging import getLogger -import asyncio - -import numpy as np -from numpy.random import random - -from reversi_zero.agent.api import ReversiModelAPI -from reversi_zero.config import Config -from reversi_zero.env.reversi_env import ReversiEnv, Player, Winner, another_player -from reversi_zero.lib.bitboard import find_correct_moves, bit_to_array, flip_vertical, rotate90, dirichlet_noise_of_mask -# from reversi_zero.lib.reversi_solver import ReversiSolver -from reversi_zero.lib.alt.reversi_solver import ReversiSolver - - -CounterKey = namedtuple("CounterKey", "black white next_player") -QueueItem = namedtuple("QueueItem", "state future") -HistoryItem = namedtuple( - "HistoryItem", "action policy values visit enemy_values enemy_visit") -CallbackInMCTS = namedtuple("CallbackInMCTS", "per_sim callback") -MCTSInfo = namedtuple("MCTSInfo", "var_n var_w var_p") -ActionWithEvaluation = namedtuple("ActionWithEvaluation", "action n q") - -logger = getLogger(__name__) - - -class ReversiPlayer: - def __init__(self, config: Config, model, play_config=None, enable_resign=True, mtcs_info=None, api=None): - """ - - :param config: - :param reversi_zero.agent.model.ReversiModel|None model: - :param MCTSInfo mtcs_info: - :parameter ReversiModelAPI api: - """ - self.config = config - self.model = model - self.play_config = play_config or self.config.play - self.enable_resign = enable_resign - self.api = api or ReversiModelAPI(self.config, self.model) - - # key=(own, enemy, action) - mtcs_info = mtcs_info or self.create_mtcs_info() - self.var_n, self.var_w, self.var_p = mtcs_info - - self.expanded = set(self.var_p.keys()) - self.now_expanding = set() - self.prediction_queue = Queue(self.play_config.prediction_queue_size) - self.sem = asyncio.Semaphore(self.play_config.parallel_search_num) - - self.moves = [] - self.loop = asyncio.get_event_loop() - self.running_simulation_num = 0 - self.callback_in_mtcs = None - - self.thinking_history = {} # for fun - self.resigned = False - self.requested_stop_thinking = False - self.solver = self.create_solver() - - @staticmethod - def create_mtcs_info(): - return MCTSInfo(defaultdict(lambda: np.zeros((64,))), - defaultdict(lambda: np.zeros((64,))), - defaultdict(lambda: np.zeros((64,)))) - - def var_q(self, key): - return self.var_w[key] / (self.var_n[key] + 1e-5) - - def action(self, own, enemy, callback_in_mtcs=None): - """ - - :param own: BitBoard - :param enemy: BitBoard - :param CallbackInMCTS callback_in_mtcs: - :return action=move pos=0 ~ 63 (0=top left, 7 top right, 63 bottom right) - """ - action_with_eval = self.action_with_evaluation( - own, enemy, callback_in_mtcs=callback_in_mtcs) - return action_with_eval.action - - def action_with_evaluation(self, own, enemy, callback_in_mtcs=None): - """ - - :param own: BitBoard - :param enemy: BitBoard - :param CallbackInMCTS callback_in_mtcs: - :rtype: ActionWithEvaluation - :return ActionWithEvaluation( - action=move pos=0 ~ 63 (0=top left, 7 top right, 63 bottom right), - n=N of the action, - q=W/N of the action, - ) - """ - env = ReversiEnv().update(own, enemy, Player.black) - key = self.counter_key(env) - self.callback_in_mtcs = callback_in_mtcs - pc = self.play_config - - if pc.use_solver_turn and env.turn >= pc.use_solver_turn: - ret = self.action_by_searching(key) - if ret: # not save move as play data - return ret - - for tl in range(self.play_config.thinking_loop): - if env.turn > 0: - self.search_moves(own, enemy) - else: - self.bypass_first_move(key) - - policy = self.calc_policy(own, enemy) - action = int(np.random.choice(range(64), p=policy)) - action_by_value = int( - np.argmax(self.var_q(key) + (self.var_n[key] > 0)*100)) - value_diff = self.var_q(key)[action] - \ - self.var_q(key)[action_by_value] - - if env.turn <= pc.start_rethinking_turn or self.requested_stop_thinking or \ - (value_diff > -0.01 and self.var_n[key][action] >= pc.required_visit_to_decide_action): - break - - # this is for play_gui, not necessary when training. - self.update_thinking_history(own, enemy, action, policy) - - if self.play_config.resign_threshold is not None and\ - np.max(self.var_q(key) - (self.var_n[key] == 0)*10) <= self.play_config.resign_threshold: - self.resigned = True - if self.enable_resign: - if env.turn >= self.config.play.allowed_resign_turn: - return ActionWithEvaluation(None, 0, 0) # means resign - else: - logger.debug( - f"Want to resign but disallowed turn {env.turn} < {self.config.play.allowed_resign_turn}") - - saved_policy = self.calc_policy_by_tau_1( - key) if self.config.play_data.save_policy_of_tau_1 else policy - self.add_data_to_move_buffer_with_8_symmetries( - own, enemy, saved_policy) - return ActionWithEvaluation(action=action, n=self.var_n[key][action], q=self.var_q(key)[action]) - - def update_thinking_history(self, black, white, action, policy): - key = CounterKey(black, white, Player.black.value) - next_key = self.get_next_key(black, white, action) - self.thinking_history[(black, white)] = \ - HistoryItem(action, policy, list(self.var_q(key)), list(self.var_n[key]), - list(self.var_q(next_key)), list(self.var_n[next_key])) - - def bypass_first_move(self, key): - legal_array = bit_to_array( - find_correct_moves(key.black, key.white), 64) - action = np.argmax(legal_array) - self.var_n[key][action] = 1 - self.var_w[key][action] = 0 - self.var_p[key] = legal_array / np.sum(legal_array) - - def action_by_searching(self, key): - action, score = self.solver.solve( - key.black, key.white, Player(key.next_player), exactly=True) - if action is None: - return None - # logger.debug(f"action_by_searching: score={score}") - policy = np.zeros(64) - policy[action] = 1 - self.var_n[key][action] = 999 - self.var_w[key][action] = np.sign(score) * 999 - self.var_p[key] = policy - self.update_thinking_history(key.black, key.white, action, policy) - return ActionWithEvaluation(action=action, n=999, q=np.sign(score)) - - def stop_thinking(self): - self.requested_stop_thinking = True - - def add_data_to_move_buffer_with_8_symmetries(self, own, enemy, policy): - for flip in [False, True]: - for rot_right in range(4): - own_saved, enemy_saved, policy_saved = own, enemy, policy.reshape( - (8, 8)) - if flip: - own_saved = flip_vertical(own_saved) - enemy_saved = flip_vertical(enemy_saved) - policy_saved = np.flipud(policy_saved) - if rot_right: - for _ in range(rot_right): - own_saved = rotate90(own_saved) - enemy_saved = rotate90(enemy_saved) - policy_saved = np.rot90(policy_saved, k=-rot_right) - self.moves.append( - [(own_saved, enemy_saved), list(policy_saved.reshape((64, )))]) - - def get_next_key(self, own, enemy, action): - env = ReversiEnv().update(own, enemy, Player.black) - env.step(action) - return self.counter_key(env) - - def ask_thought_about(self, own, enemy) -> HistoryItem: - return self.thinking_history.get((own, enemy)) - - def search_moves(self, own, enemy): - loop = self.loop - self.running_simulation_num = 0 - self.requested_stop_thinking = False - - coroutine_list = [] - for it in range(self.play_config.simulation_num_per_move): - cor = self.start_search_my_move(own, enemy) - coroutine_list.append(cor) - - coroutine_list.append(self.prediction_worker()) - loop.run_until_complete(asyncio.gather(*coroutine_list)) - - async def start_search_my_move(self, own, enemy): - self.running_simulation_num += 1 - root_key = self.counter_key( - ReversiEnv().update(own, enemy, Player.black)) - with await self.sem: # reduce parallel search number - if self.requested_stop_thinking: - self.running_simulation_num -= 1 - return None - env = ReversiEnv().update(own, enemy, Player.black) - leaf_v = await self.search_my_move(env, is_root_node=True) - self.running_simulation_num -= 1 - if self.callback_in_mtcs and self.callback_in_mtcs.per_sim > 0 and \ - self.running_simulation_num % self.callback_in_mtcs.per_sim == 0: - self.callback_in_mtcs.callback( - list(self.var_q(root_key)), list(self.var_n[root_key])) - return leaf_v - - async def search_my_move(self, env: ReversiEnv, is_root_node=False): - """ - - Q, V is value for this Player(always black). - P is value for the player of next_player (black or white) - :param env: - :param is_root_node: - :return: - """ - if env.done: - if env.winner == Winner.black: - return 1 - elif env.winner == Winner.white: - return -1 - else: - return 0 - - key = self.counter_key(env) - another_side_key = self.another_side_counter_key(env) - - if self.config.play.use_solver_turn_in_simulation and \ - env.turn >= self.config.play.use_solver_turn_in_simulation: - action, score = self.solver.solve( - key.black, key.white, Player(key.next_player), exactly=False) - if action: - score = score if env.next_player == Player.black else -score - leaf_v = np.sign(score) - leaf_p = np.zeros(64) - leaf_p[action] = 1 - self.var_n[key][action] += 1 - self.var_w[key][action] += leaf_v - self.var_p[key] = leaf_p - self.var_n[another_side_key][action] += 1 - self.var_w[another_side_key][action] -= leaf_v - self.var_p[another_side_key] = leaf_p - return np.sign(score) - - while key in self.now_expanding: - await asyncio.sleep(self.config.play.wait_for_expanding_sleep_sec) - - # is leaf? - if key not in self.expanded: # reach leaf node - leaf_v = await self.expand_and_evaluate(env) - if env.next_player == Player.black: - return leaf_v # Value for black - else: - return -leaf_v # Value for white == -Value for black - - virtual_loss = self.config.play.virtual_loss - virtual_loss_for_w = virtual_loss if env.next_player == Player.black else -virtual_loss - - action_t = self.select_action_q_and_u(env, is_root_node) - _, _ = env.step(action_t) - - self.var_n[key][action_t] += virtual_loss - self.var_w[key][action_t] -= virtual_loss_for_w - leaf_v = await self.search_my_move(env) # next move - - # on returning search path - # update: N, W - self.var_n[key][action_t] += - virtual_loss + 1 - self.var_w[key][action_t] += virtual_loss_for_w + leaf_v - # update another side info(flip color and player) - self.var_n[another_side_key][action_t] += 1 - self.var_w[another_side_key][action_t] -= leaf_v # must flip the sign. - return leaf_v - - async def expand_and_evaluate(self, env): - """expand new leaf - - update var_p, return leaf_v - - :param ReversiEnv env: - :return: leaf_v - """ - - key = self.counter_key(env) - another_side_key = self.another_side_counter_key(env) - self.now_expanding.add(key) - - black, white = env.board.black, env.board.white - - # (di(p), v) = fθ(di(sL)) - # rotation and flip. flip -> rot. - is_flip_vertical = random() < 0.5 - rotate_right_num = int(random() * 4) - if is_flip_vertical: - black, white = flip_vertical(black), flip_vertical(white) - for i in range(rotate_right_num): - # rotate90: rotate bitboard RIGHT 1 time - black, white = rotate90(black), rotate90(white) - - black_ary = bit_to_array(black, 64).reshape((8, 8)) - white_ary = bit_to_array(white, 64).reshape((8, 8)) - state = [black_ary, white_ary] if env.next_player == Player.black else [ - white_ary, black_ary] - future = await self.predict(np.array(state)) # type: Future - await future - leaf_p, leaf_v = future.result() - - # reverse rotate and flip about leaf_p - # reverse rotation and flip. rot -> flip. - if rotate_right_num > 0 or is_flip_vertical: - leaf_p = leaf_p.reshape((8, 8)) - if rotate_right_num > 0: - # rot90: rotate matrix LEFT k times - leaf_p = np.rot90(leaf_p, k=rotate_right_num) - if is_flip_vertical: - leaf_p = np.flipud(leaf_p) - leaf_p = leaf_p.reshape((64, )) - - self.var_p[key] = leaf_p # P is value for next_player (black or white) - self.var_p[another_side_key] = leaf_p - self.expanded.add(key) - self.now_expanding.remove(key) - return float(leaf_v) - - async def prediction_worker(self): - """For better performance, queueing prediction requests and predict together in this worker. - - speed up about 45sec -> 15sec for example. - :return: - """ - q = self.prediction_queue - margin = 10 # avoid finishing before other searches starting. - while self.running_simulation_num > 0 or margin > 0: - if q.empty(): - if margin > 0: - margin -= 1 - await asyncio.sleep(self.config.play.prediction_worker_sleep_sec) - continue - item_list = [q.get_nowait() - for _ in range(q.qsize())] # type: list[QueueItem] - #logger.debug(f"predicting {len(item_list)} items") - data = np.array([x.state for x in item_list]) - policy_ary, value_ary = self.api.predict( - data) # shape=(N, 2, 8, 8) - #logger.debug(f"predicted {len(item_list)} items") - for p, v, item in zip(policy_ary, value_ary, item_list): - item.future.set_result((p, v)) - - async def predict(self, x): - future = self.loop.create_future() - item = QueueItem(x, future) - await self.prediction_queue.put(item) - return future - - def finish_game(self, z): - """ - - :param z: win=1, lose=-1, draw=0 - :return: - """ - for move in self.moves: # add this game winner result to all past moves. - move += [z] - - def calc_policy(self, own, enemy): - """calc π(a|s0) - - :param own: - :param enemy: - :return: - """ - pc = self.play_config - env = ReversiEnv().update(own, enemy, Player.black) - key = self.counter_key(env) - if env.turn < pc.change_tau_turn: - return self.calc_policy_by_tau_1(key) - else: - action = np.argmax(self.var_n[key]) # tau = 0 - ret = np.zeros(64) - ret[action] = 1 - return ret - - def calc_policy_by_tau_1(self, key): - return self.var_n[key] / np.sum(self.var_n[key]) # tau = 1 - - @staticmethod - def counter_key(env: ReversiEnv): - return CounterKey(env.board.black, env.board.white, env.next_player.value) - - @staticmethod - def another_side_counter_key(env: ReversiEnv): - return CounterKey(env.board.white, env.board.black, another_player(env.next_player).value) - - def select_action_q_and_u(self, env, is_root_node): - key = self.counter_key(env) - if env.next_player == Player.black: - legal_moves = find_correct_moves(key.black, key.white) - else: - legal_moves = find_correct_moves(key.white, key.black) - # noinspection PyUnresolvedReferences - # SQRT of sum(N(s, b); for all b) - xx_ = np.sqrt(np.sum(self.var_n[key])) - xx_ = max(xx_, 1) # avoid u_=0 if N is all 0 - p_ = self.var_p[key] - - # re-normalize in legal moves - p_ = p_ * bit_to_array(legal_moves, 64) - if np.sum(p_) > 0: - # decay policy gradually in the end phase - _pc = self.config.play - temperature = min( - np.exp(1-np.power(env.turn/_pc.policy_decay_turn, _pc.policy_decay_power)), 1) - # normalize and decay policy - p_ = self.normalize(p_, temperature) - - # Is it correct?? -> (1-e)p + e*Dir(alpha) - if is_root_node and self.play_config.noise_eps > 0: - noise = dirichlet_noise_of_mask( - legal_moves, self.play_config.dirichlet_alpha) - p_ = (1 - self.play_config.noise_eps) * \ - p_ + self.play_config.noise_eps * noise - - u_ = self.play_config.c_puct * p_ * xx_ / (1 + self.var_n[key]) - if env.next_player == Player.black: - v_ = (self.var_q(key) + u_ + 1000) * bit_to_array(legal_moves, 64) - else: - # When enemy's selecting action, flip Q-Value. - v_ = (-self.var_q(key) + u_ + 1000) * bit_to_array(legal_moves, 64) - - # noinspection PyTypeChecker - action_t = int(np.argmax(v_)) - return action_t - - @staticmethod - def normalize(p, t=1): - pp = np.power(p, t) - return pp / np.sum(pp) - - def create_solver(self): - return ReversiSolver() -import enum - -from logging import getLogger - -from reversi_zero.lib.bitboard import board_to_string, calc_flip, bit_count, find_correct_moves - -logger = getLogger(__name__) -# noinspection PyArgumentList -Player = enum.Enum("Player", "black white") -# noinspection PyArgumentList -Winner = enum.Enum("Winner", "black white draw") - - -def another_player(player: Player): - return Player.white if player == Player.black else Player.black - - -class ReversiEnv: - def __init__(self): - self.board = None - self.next_player = None # type: Player - self.turn = 0 - self.done = False - self.winner = None # type: Winner - - def reset(self): - self.board = Board() - self.next_player = Player.black - self.turn = 0 - self.done = False - self.winner = None - return self - - def update(self, black, white, next_player): - self.board = Board(black, white) - self.next_player = next_player - self.turn = sum(self.board.number_of_black_and_white) - 4 - self.done = False - self.winner = None - return self - - def step(self, action): - """ - - :param int|None action: move pos=0 ~ 63 (0=top left, 7 top right, 63 bottom right), None is resign - :return: - """ - assert action is None or 0 <= action <= 63, f"Illegal action={action}" - - if action is None: - self._resigned() - return self.board, {} - - own, enemy = self.get_own_and_enemy() - - flipped = calc_flip(action, own, enemy) - if bit_count(flipped) == 0: - self.illegal_move_to_lose(action) - return self.board, {} - own ^= flipped - own |= 1 << action - enemy ^= flipped - - self.set_own_and_enemy(own, enemy) - self.turn += 1 - - # there are legal moves for enemy. - if bit_count(find_correct_moves(enemy, own)) > 0: - self.change_to_next_player() - # there are legal moves for me but enemy. - elif bit_count(find_correct_moves(own, enemy)) > 0: - pass - else: # there is no legal moves for me and enemy. - self._game_over() - - return self.board, {} - - def _game_over(self): - self.done = True - if self.winner is None: - black_num, white_num = self.board.number_of_black_and_white - if black_num > white_num: - self.winner = Winner.black - elif black_num < white_num: - self.winner = Winner.white - else: - self.winner = Winner.draw - - def change_to_next_player(self): - self.next_player = another_player(self.next_player) - - def illegal_move_to_lose(self, action): - logger.warning(f"Illegal action={action}, No Flipped!") - self._win_another_player() - self._game_over() - - def _resigned(self): - self._win_another_player() - self._game_over() - - def _win_another_player(self): - win_player = another_player(self.next_player) # type: Player - if win_player == Player.black: - self.winner = Winner.black - else: - self.winner = Winner.white - - def get_own_and_enemy(self): - if self.next_player == Player.black: - own, enemy = self.board.black, self.board.white - else: - own, enemy = self.board.white, self.board.black - return own, enemy - - def set_own_and_enemy(self, own, enemy): - if self.next_player == Player.black: - self.board.black, self.board.white = own, enemy - else: - self.board.white, self.board.black = own, enemy - - def render(self): - b, w = self.board.number_of_black_and_white - print(f"next={self.next_player.name} turn={self.turn} B={b} W={w}") - print(board_to_string(self.board.black, self.board.white, with_edge=True)) - - @property - def observation(self): - """ - - :rtype: Board - """ - return self.board - - -class Board: - def __init__(self, black=None, white=None, init_type=0): - self.black = black or (0b00010000 << 24 | 0b00001000 << 32) - self.white = white or (0b00001000 << 24 | 0b00010000 << 32) - - if init_type: - self.black, self.white = self.white, self.black - - @property - def number_of_black_and_white(self): - return bit_count(self.black), bit_count(self.white) -# http://primenumber.hatenadiary.jp/entry/2016/12/26/063226 -import numpy as np - -BLACK_CHR = "O" -WHITE_CHR = "X" -EXTRA_CHR = "*" - - -def board_to_string(black, white, with_edge=True, extra=None): - """ - 0 1 2 3 4 5 6 7 - 8 9 10 11 12 13 14 15 - .. - 56 57 58 59 60 61 62 63 - - 0: Top Left, LSB - 63: Bottom Right - - :param black: bitboard - :param white: bitboard - :param with_edge: - :param extra: bitboard - :return: - """ - array = [" "] * 64 - extra = extra or 0 - for i in range(64): - if black & 1: - array[i] = BLACK_CHR - elif white & 1: - array[i] = WHITE_CHR - elif extra & 1: - array[i] = EXTRA_CHR - black >>= 1 - white >>= 1 - extra >>= 1 - - ret = "" - if with_edge: - ret = "#" * 10 + "\n" - for y in range(8): - if with_edge: - ret += "#" - ret += "".join(array[y * 8:y * 8 + 8]) - if with_edge: - ret += "#" - ret += "\n" - if with_edge: - ret += "#" * 10 + "\n" - return ret - - -def find_correct_moves(own, enemy): - """return legal moves""" - left_right_mask = 0x7e7e7e7e7e7e7e7e # Both most left-right edge are 0, else 1 - top_bottom_mask = 0x00ffffffffffff00 # Both most top-bottom edge are 0, else 1 - mask = left_right_mask & top_bottom_mask - mobility = 0 - mobility |= search_offset_left(own, enemy, left_right_mask, 1) # Left - mobility |= search_offset_left(own, enemy, mask, 9) # Left Top - mobility |= search_offset_left(own, enemy, top_bottom_mask, 8) # Top - mobility |= search_offset_left(own, enemy, mask, 7) # Top Right - mobility |= search_offset_right(own, enemy, left_right_mask, 1) # Right - mobility |= search_offset_right(own, enemy, mask, 9) # Bottom Right - mobility |= search_offset_right(own, enemy, top_bottom_mask, 8) # Bottom - mobility |= search_offset_right(own, enemy, mask, 7) # Left bottom - return mobility - - -def calc_flip(pos, own, enemy): - """return flip stones of enemy by bitboard when I place stone at pos. - - :param pos: 0~63 - :param own: bitboard (0=top left, 63=bottom right) - :param enemy: bitboard - :return: flip stones of enemy when I place stone at pos. - """ - assert 0 <= pos <= 63, f"pos={pos}" - f1 = _calc_flip_half(pos, own, enemy) - f2 = _calc_flip_half(63 - pos, rotate180(own), rotate180(enemy)) - return f1 | rotate180(f2) - - -def _calc_flip_half(pos, own, enemy): - el = [enemy, enemy & 0x7e7e7e7e7e7e7e7e, enemy & - 0x7e7e7e7e7e7e7e7e, enemy & 0x7e7e7e7e7e7e7e7e] - masks = [0x0101010101010100, 0x00000000000000fe, - 0x0002040810204080, 0x8040201008040200] - masks = [b64(m << pos) for m in masks] - flipped = 0 - for e, mask in zip(el, masks): - outflank = mask & ((e | ~mask) + 1) & own - flipped |= (outflank - (outflank != 0)) & mask - return flipped - - -def search_offset_left(own, enemy, mask, offset): - e = enemy & mask - blank = ~(own | enemy) - t = e & (own >> offset) - t |= e & (t >> offset) - t |= e & (t >> offset) - t |= e & (t >> offset) - t |= e & (t >> offset) - t |= e & (t >> offset) # Up to six stones can be turned at once - return blank & (t >> offset) # Only the blank squares can be started - - -def search_offset_right(own, enemy, mask, offset): - e = enemy & mask - blank = ~(own | enemy) - t = e & (own << offset) - t |= e & (t << offset) - t |= e & (t << offset) - t |= e & (t << offset) - t |= e & (t << offset) - t |= e & (t << offset) # Up to six stones can be turned at once - return blank & (t << offset) # Only the blank squares can be started - - -def flip_vertical(x): - k1 = 0x00FF00FF00FF00FF - k2 = 0x0000FFFF0000FFFF - x = ((x >> 8) & k1) | ((x & k1) << 8) - x = ((x >> 16) & k2) | ((x & k2) << 16) - x = (x >> 32) | b64(x << 32) - return x - - -def b64(x): - return x & 0xFFFFFFFFFFFFFFFF - - -def bit_count(x): - return bin(x).count('1') - - -def bit_to_array(x, size): - """bit_to_array(0b0010, 4) -> array([0, 1, 0, 0])""" - return np.array(list(reversed((("0" * size) + bin(x)[2:])[-size:])), dtype=np.uint8) - - -def flip_diag_a1h8(x): - k1 = 0x5500550055005500 - k2 = 0x3333000033330000 - k4 = 0x0f0f0f0f00000000 - t = k4 & (x ^ b64(x << 28)) - x ^= t ^ (t >> 28) - t = k2 & (x ^ b64(x << 14)) - x ^= t ^ (t >> 14) - t = k1 & (x ^ b64(x << 7)) - x ^= t ^ (t >> 7) - return x - - -def rotate90(x): - return flip_diag_a1h8(flip_vertical(x)) - - -def rotate180(x): - return rotate90(rotate90(x)) - - -def dirichlet_noise_of_mask(mask, alpha): - num_1 = bit_count(mask) - noise = list(np.random.dirichlet([alpha] * num_1)) - ret_list = [] - for i in range(64): - if (1 << i) & mask: - ret_list.append(noise.pop(0)) - else: - ret_list.append(0) - return np.array(ret_list) -import json -import os -from glob import glob -from logging import getLogger - -from reversi_zero.config import ResourceConfig - -logger = getLogger(__name__) - - -def get_game_data_filenames(rc: ResourceConfig): - pattern = os.path.join(rc.play_data_dir, rc.play_data_filename_tmpl % "*") - files = list(sorted(glob(pattern))) - return files - - -def get_next_generation_model_dirs(rc: ResourceConfig): - dir_pattern = os.path.join( - rc.next_generation_model_dir, rc.next_generation_model_dirname_tmpl % "*") - dirs = list(sorted(glob(dir_pattern))) - return dirs - - -def write_game_data_to_file(path, data): - with open(path, "wt") as f: - json.dump(data, f) - - -def read_game_data_from_file(path): - with open(path, "rt") as f: - return json.load(f) -import os - - -def read_as_int(filename): - if os.path.exists(filename): - try: - with open(filename, "rt") as f: - ret = int(str(f.read()).strip()) - if ret: - return ret - except ValueError: - pass -import re -from collections import namedtuple - -from datetime import datetime - -from reversi_zero.lib.util import parse_ggf_board_to_bitboard - -GGF = namedtuple("GGF", "BO MOVES") -# color: {O, *} (O is white, * is black) -BO = namedtuple("BO", "board_type, square_cont, color") -MOVE = namedtuple("MOVE", "color pos") # color={B, W} pos: like 'F5' - - -def parse_ggf(ggf): - """https://skatgame.net/mburo/ggsa/ggf - - :param ggf: - :rtype: GGF - """ - tokens = re.split(r'([a-zA-Z]+\[[^\]]+\])', ggf) - moves = [] - bo = None - for token in tokens: - match = re.search(r'([a-zA-Z]+)\[([^\]]+)\]', token) - if not match: - continue - key, value = re.search(r'([a-zA-Z]+)\[([^\]]+)\]', token).groups() - key = key.upper() - if key == "BO": - bo = BO(*value.split(" ")) - elif key in ("B", "W"): - moves.append(MOVE(key, value)) - return GGF(bo, moves) - - -def convert_move_to_action(move_str: str): - """ - - :param move_str: A1 -> 0, H8 -> 63 - :return: - """ - if move_str[:2].lower() == "pa": - return None - pos = move_str.lower() - y = ord(pos[0]) - ord("a") - x = int(pos[1]) - 1 - return y * 8 + x - - -def convert_action_to_move(action): - """ - - :param int|None action: - :return: - """ - if action is None: - return "PA" - y = action // 8 - x = action % 8 - return chr(ord("A") + y) + str(x + 1) - - -def convert_to_bitboard_and_actions(ggf: GGF): - black, white = parse_ggf_board_to_bitboard(ggf.BO.square_cont) - actions = [] - for move in ggf.MOVES: # type: MOVE - actions.append(convert_move_to_action(move.pos)) - return black, white, actions - - -def make_ggf_string(black_name=None, white_name=None, dt=None, moves=None, result=None, think_time_sec=60): - """ - - :param str black_name: - :param str white_name: - :param datetime|None dt: - :param str|None result: - :param list[str] moves: - :param int think_time_sec: - :return: - """ - ggf = '(;GM[Othello]PC[RAZSelf]DT[%(datetime)s]PB[%(black_name)s]PW[%(white_name)s]RE[%(result)s]TI[%(time)s]' \ - 'TY[8]BO[8 ---------------------------O*------*O--------------------------- *]%(move_list)s;)' - dt = dt or datetime.utcnow() - - move_list = [] - for i, move in enumerate(moves or []): - if i % 2 == 0: - move_list.append(f"B[{move}]") - else: - move_list.append(f"W[{move}]") - - params = dict( - black_name=black_name or "black", - white_name=white_name or "white", - result=result or '?', - datetime=dt.strftime("%Y.%m.%d_%H:%M:%S.%Z"), - time=f"{think_time_sec // 60}:{think_time_sec % 60}", - move_list="".join(move_list), - ) - return ggf % params -from logging import StreamHandler, basicConfig, DEBUG, getLogger, Formatter - - -def setup_logger(log_filename): - format_str = '%(asctime)s@%(name)s %(levelname)s # %(message)s' - basicConfig(filename=log_filename, level=DEBUG, format=format_str) - stream_handler = StreamHandler() - stream_handler.setFormatter(Formatter(format_str)) - getLogger().addHandler(stream_handler) - - -if __name__ == '__main__': - setup_logger("aa.log") - logger = getLogger("test") - logger.info("OK") -import os -from logging import getLogger -from time import sleep - -import keras.backend as K - - -logger = getLogger(__name__) - - -def load_best_model_weight(model, clear_session=False): - """ - - :param reversi_zero.agent.model.ReversiModel model: - :param bool clear_session: - :return: - """ - if clear_session: - K.clear_session() - return model.load(model.config.resource.model_best_config_path, model.config.resource.model_best_weight_path) - - -def save_as_best_model(model): - """ - - :param reversi_zero.agent.model.ReversiModel model: - :return: - """ - return model.save(model.config.resource.model_best_config_path, model.config.resource.model_best_weight_path) - - -def reload_best_model_weight_if_changed(model, clear_session=False): - """ - - :param reversi_zero.agent.model.ReversiModel model: - :param bool clear_session: - :return: - """ - logger.debug(f"start reload the best model if changed") - digest = model.fetch_digest(model.config.resource.model_best_weight_path) - if digest != model.digest: - return load_best_model_weight(model, clear_session=clear_session) - - logger.debug(f"the best model is not changed") - return False - - -def reload_newest_next_generation_model_if_changed(model, clear_session=False): - """ - - :param reversi_zero.agent.model.ReversiModel model: - :param bool clear_session: - :return: - """ - from reversi_zero.lib.data_helper import get_next_generation_model_dirs - - rc = model.config.resource - dirs = get_next_generation_model_dirs(rc) - if not dirs: - logger.debug("No next generation model exists.") - return False - model_dir = dirs[-1] - config_path = os.path.join( - model_dir, rc.next_generation_model_config_filename) - weight_path = os.path.join( - model_dir, rc.next_generation_model_weight_filename) - digest = model.fetch_digest(weight_path) - if digest and digest != model.digest: - logger.debug(f"Loading weight from {model_dir}") - if clear_session: - K.clear_session() - for _ in range(5): - try: - return model.load(config_path, weight_path) - except Exception as e: - logger.warning(f"error in load model: #{e}") - sleep(3) - raise RuntimeError("Cannot Load Model!") - - else: - logger.debug(f"The newest model is not changed: digest={digest}") - return False -# idea from http://eyalarubas.com/python-subproc-nonblock.html -from queue import Queue, Empty -from threading import Thread - -from logging import getLogger -logger = getLogger(__name__) - - -class NonBlockingStreamReader: - def __init__(self, stream): - self._stream = stream - self._queue = Queue() - self._thread = None - self.closed = True - - def start(self, push_callback=None): - def _worker(): - while True: - line = self._stream.readline() - if line: - if push_callback: - push_callback(line) - self._queue.put(line) - else: - logger.debug("the stream may be closed") - break - self.closed = True - - self._thread = Thread(target=_worker) - self._thread.setDaemon(True) - self._thread.setName("NonBlockingStreamReader of %s" % - repr(self._stream)) - self.closed = False - self._thread.start() - - def readline(self, timeout=None): - try: - return self._queue.get(block=timeout is not None, timeout=timeout) - except Empty: - return None -from time import time - -from logging import getLogger - -from reversi_zero.env.reversi_env import ReversiEnv, Player -from reversi_zero.lib.bitboard import find_correct_moves -import numpy as np - - -logger = getLogger(__name__) - - -class Timeout(Exception): - pass - - -class ReversiSolver: - """calculate which is winner. Not estimation by NN! - - this implementation runs very slow. (^^; - """ - - def __init__(self): - self.cache = {} - self.start_time = None - self.timeout = None - self.last_is_exactly = False - - def solve(self, black, white, next_player, timeout=30, exactly=False): - self.timeout = timeout - self.start_time = time() - if not self.last_is_exactly and exactly: - self.cache = {} - self.last_is_exactly = exactly - - try: - # logger.debug("start resolving") - move, score = self.find_winning_move_and_score(ReversiEnv().update(black, white, next_player), - exactly=exactly) - if next_player == Player.white: - score = -score - # logger.debug(f"solve answer=({move},{score})({time()-self.start_time:.3f} seconds)") - return move, score - except Timeout: - return None, None - - def find_winning_move_and_score(self, env: ReversiEnv, exactly=True): - if env.done: - b, w = env.board.number_of_black_and_white - return None, b - w - if time() - self.start_time > self.timeout: - logger.debug("timeout!") - raise Timeout() - - turn = env.turn - key = black, white, next_player = env.board.black, env.board.white, env.next_player - if key in self.cache: - return self.cache[key] - - if next_player == Player.black: - legal_moves = find_correct_moves(black, white) - else: - legal_moves = find_correct_moves(white, black) - - action_list = [idx for idx in range(64) if legal_moves & (1 << idx)] - score_list = np.zeros(len(action_list), dtype=int) - for i, action in enumerate(action_list): - # env.update(black, white, next_player) - env.board.black = black - env.board.white = white - env.next_player = next_player - env.turn = turn - env.done = False - env.winner = None - # - env.step(action) - _, score = self.find_winning_move_and_score(env, exactly=exactly) - score_list[i] = score - - if not exactly: - # do not need to find the best score move - if next_player == Player.black and score > 0: - break - elif next_player == Player.white and score < 0: - break - - # print(list(zip(action_list, score_list))) - - if next_player == Player.black: - best_action = action_list[int(np.argmax(score_list))] - best_score = np.max(score_list) - else: - best_action = action_list[int(np.argmin(score_list))] - best_score = np.min(score_list) - - self.cache[key] = (best_action, best_score) - return best_action, best_score - - -if __name__ == '__main__': - from reversi_zero.lib.util import parse_to_bitboards - - def q1(): - board = ''' - ########## - #XXXX # - #XOXX # - #XOXXOOOO# - #XOXOXOOO# - #XOXXOXOO# - #OOOOXOXO# - # OOOOOOO# - # XXXXXO# - ##########''' - b, w = parse_to_bitboards(board) - rr = ReversiSolver() - print("correct is (57, +2)") - print(rr.solve(b, w, Player.white, exactly=False)) - print(len(rr.cache)) - - def q2(): - board = ''' - ########## - #XXXX # - #XXXX X # - #XXXXXXOO# - #XXXXXXOO# - #XXXXOXOO# - #OXOOXOXO# - # OOOOOOO# - #OOOOOOOO# - ##########''' - b, w = parse_to_bitboards(board) - rr = ReversiSolver() - print("correct is (4 or 14, -2)") - print(rr.solve(b, w, Player.black, exactly=False)) - print(len(rr.cache)) - - def q3(): # O: black, X: white - board = ''' - ########## - # X OOO # - #X XOXO O# - #XXXXOXOO# - #XOXOOXXO# - #XOOOOXXO# - #XOOOXXXO# - # OOOOXX # - # OOOOX # - ##########''' - b, w = parse_to_bitboards(board) - rr = ReversiSolver() - print("correct is (3, +2)") - print(rr.solve(b, w, Player.white, exactly=True)) - print(len(rr.cache)) - - q3() -import tensorflow as tf - - -class TensorBoardLogger: - def __init__(self, log_dir, filename_suffix=None): - self.writer = tf.summary.FileWriter( - log_dir, filename_suffix=filename_suffix) - - def log_scaler(self, info: dict, step): - """ - - :param dict info: dict of {: } - :param int step: - :return: - """ - for tag, value in info.items(): - summary = tf.Summary( - value=[tf.Summary.Value(tag=tag, simple_value=value)]) - self.writer.add_summary(summary, step) - self.writer.flush() -from keras.callbacks import Callback -import tensorflow as tf - - -class TensorBoardStepCallback(Callback): - """Tensorboard basic visualizations by step. - - """ - - def __init__(self, log_dir, logging_per_steps=100, step=0): - super().__init__() - self.step = step - self.logging_per_steps = logging_per_steps - self.writer = tf.summary.FileWriter(log_dir) - - def on_batch_end(self, batch, logs=None): - self.step += 1 - - if self.step % self.logging_per_steps > 0: - return - - for name, value in logs.items(): - if name in ['batch', 'size']: - continue - summary = tf.Summary() - summary_value = summary.value.add() - summary_value.simple_value = value.item() - summary_value.tag = name - self.writer.add_summary(summary, self.step) - self.writer.flush() - - def close(self): - self.writer.close() -def set_session_config(per_process_gpu_memory_fraction=None, allow_growth=None): - """ - - :param allow_growth: When necessary, reserve memory - :param float per_process_gpu_memory_fraction: specify GPU memory usage as 0 to 1 - - :return: - """ - import tensorflow as tf - import keras.backend as K - - config = tf.ConfigProto( - gpu_options=tf.GPUOptions( - per_process_gpu_memory_fraction=per_process_gpu_memory_fraction, - allow_growth=allow_growth, - ) - ) - sess = tf.Session(config=config) - K.set_session(sess) -def parse_to_bitboards(string: str): - lines = string.strip().split("\n") - black = 0 - white = 0 - y = 0 - - for line in [l.strip() for l in lines]: - if line[:2] == '##': - continue - for i, ch in enumerate(line[1:9]): - if ch == 'O': - black |= 1 << (y*8+i) - elif ch == 'X': - white |= 1 << (y*8+i) - y += 1 - - return black, white - - -def parse_ggf_board_to_bitboard(string: str): - white = black = 0 - for i, ch in enumerate(string): - if ch == "*": - black |= 1 << i - elif ch == "O": - white |= 1 << i - return black, white -from reversi_zero.config import Config -from reversi_zero.lib.model_helpler import reload_newest_next_generation_model_if_changed, load_best_model_weight - - -def load_model(config: Config): - from reversi_zero.agent.model import ReversiModel - model = ReversiModel(config) - if config.play.use_newest_next_generation_model: - loaded = reload_newest_next_generation_model_if_changed( - model) or load_best_model_weight(model) - else: - loaded = load_best_model_weight( - model) or reload_newest_next_generation_model_if_changed(model) - if not loaded: - raise RuntimeError("No models found!") - return model -import enum -from logging import getLogger - -from reversi_zero.agent.player import HistoryItem -from reversi_zero.agent.player import ReversiPlayer -from reversi_zero.config import Config -from reversi_zero.env.reversi_env import Player, ReversiEnv -from reversi_zero.lib.bitboard import find_correct_moves -from reversi_zero.lib.model_helpler import load_best_model_weight, reload_newest_next_generation_model_if_changed -from reversi_zero.play_game.common import load_model - -logger = getLogger(__name__) - -GameEvent = enum.Enum("GameEvent", "update ai_move over pass") - - -class PlayWithHuman: - def __init__(self, config: Config): - self.config = config - self.human_color = None - self.observers = [] - self.env = ReversiEnv().reset() - self.model = self._load_model() - self.ai = None # type: ReversiPlayer - self.last_evaluation = None - self.last_history = None # type: HistoryItem - - def add_observer(self, observer_func): - self.observers.append(observer_func) - - def notify_all(self, event): - for ob_func in self.observers: - ob_func(event) - - def start_game(self, human_is_black): - self.human_color = Player.black if human_is_black else Player.white - self.env = ReversiEnv().reset() - self.ai = ReversiPlayer(self.config, self.model) - - def play_next_turn(self): - self.notify_all(GameEvent.update) - - if self.over: - self.notify_all(GameEvent.over) - return - - if self.next_player != self.human_color: - self.notify_all(GameEvent.ai_move) - - @property - def over(self): - return self.env.done - - @property - def next_player(self): - return self.env.next_player - - def stone(self, px, py): - """left top=(0, 0), right bottom=(7,7)""" - pos = int(py * 8 + px) - assert 0 <= pos < 64 - bit = 1 << pos - if self.env.board.black & bit: - return Player.black - elif self.env.board.white & bit: - return Player.white - return None - - @property - def number_of_black_and_white(self): - return self.env.observation.number_of_black_and_white - - def available(self, px, py): - pos = int(py * 8 + px) - if pos < 0 or 64 <= pos: - return False - own, enemy = self.env.board.black, self.env.board.white - if self.human_color == Player.white: - own, enemy = enemy, own - legal_moves = find_correct_moves(own, enemy) - return legal_moves & (1 << pos) - - def move(self, px, py): - pos = int(py * 8 + px) - assert 0 <= pos < 64 - - if self.next_player != self.human_color: - return False - - self.env.step(pos) - - def _load_model(self): - return load_model(self.config) - - def move_by_ai(self): - if self.next_player == self.human_color: - return False - - own, enemy = self.get_state_of_next_player() - action = self.ai.action(own, enemy) - self.env.step(action) - - self.last_history = self.ai.ask_thought_about(own, enemy) - self.last_evaluation = self.last_history.values[self.last_history.action] - logger.debug(f"evaluation by ai={self.last_evaluation}") - - def get_state_of_next_player(self): - if self.next_player == Player.black: - own, enemy = self.env.board.black, self.env.board.white - else: - own, enemy = self.env.board.white, self.env.board.black - return own, enemy -# many code from http://d.hatena.ne.jp/yatt/20100129/1264791420 - -from logging import getLogger - -import wx -from wx.core import CommandEvent - -from reversi_zero.config import Config, GuiConfig, PlayWithHumanConfig -from reversi_zero.env.reversi_env import Player -from reversi_zero.play_game.game_model import PlayWithHuman, GameEvent - -logger = getLogger(__name__) - - -def start(config: Config): - config.play_with_human.update_play_config(config.play) - reversi_model = PlayWithHuman(config) - app = wx.App() - Frame(reversi_model, config.gui).Show() - app.MainLoop() - - -def notify(caption, message): - dialog = wx.MessageDialog(None, message=message, - caption=caption, style=wx.OK) - dialog.ShowModal() - dialog.Destroy() - - -class Frame(wx.Frame): - def __init__(self, model: PlayWithHuman, gui_config: GuiConfig): - self.model = model - self.gui_config = gui_config - self.is_flip_vertical = False - self.show_player_evaluation = True - wx.Frame.__init__( - self, None, -1, self.gui_config.window_title, size=self.gui_config.window_size) - # panel - self.panel = wx.Panel(self) - self.panel.Bind(wx.EVT_LEFT_DOWN, self.try_move) - self.panel.Bind(wx.EVT_PAINT, self.refresh) - - self.new_game(human_is_black=True) - # menu bar - menu = wx.Menu() - menu.Append(1, u"New Game(Black)") - menu.Append(2, u"New Game(White)") - menu.AppendSeparator() - menu.Append(5, u"Flip Vertical") - menu.Append(6, u"Show/Hide Player evaluation") - menu.AppendSeparator() - menu.Append(9, u"quit") - menu_bar = wx.MenuBar() - menu_bar.Append(menu, u"menu") - self.SetMenuBar(menu_bar) - self.Bind(wx.EVT_MENU, self.handle_new_game, id=1) - self.Bind(wx.EVT_MENU, self.handle_new_game, id=2) - self.Bind(wx.EVT_MENU, self.handle_flip_vertical, id=5) - self.Bind(wx.EVT_MENU, self.handle_show_hide_player_evaluation, id=6) - self.Bind(wx.EVT_MENU, self.handle_quit, id=9) - - # status bar - self.CreateStatusBar() - - self.model.add_observer(self.handle_game_event) - - def handle_game_event(self, event): - if event == GameEvent.update: - self.panel.Refresh() - self.update_status_bar() - wx.Yield() - elif event == GameEvent.over: - self.game_over() - elif event == GameEvent.ai_move: - self.ai_move() - - def handle_quit(self, event: CommandEvent): - self.Close() - - def handle_new_game(self, event: CommandEvent): - self.new_game(human_is_black=event.GetId() == 1) - - def handle_flip_vertical(self, event): - self.is_flip_vertical = not self.is_flip_vertical - self.panel.Refresh() - - def handle_show_hide_player_evaluation(self, event): - self.show_player_evaluation = not self.show_player_evaluation - self.panel.Refresh() - - def new_game(self, human_is_black): - self.model.start_game(human_is_black=human_is_black) - self.model.play_next_turn() - - def ai_move(self): - self.panel.Refresh() - self.update_status_bar() - wx.Yield() - self.model.move_by_ai() - self.model.play_next_turn() - - def try_move(self, event): - if self.model.over: - return - # calculate coordinate from window coordinate - event_x, event_y = event.GetX(), event.GetY() - w, h = self.panel.GetSize() - x = int(event_x / (w / 8)) - y = int(event_y / (h / 8)) - - if self.is_flip_vertical: - y = 7-y - - if not self.model.available(x, y): - return - - self.model.move(x, y) - self.model.play_next_turn() - - def game_over(self): - # if game is over then display dialog - - black, white = self.model.number_of_black_and_white - mes = "black: %d\nwhite: %d\n" % (black, white) - if black == white: - mes += "** draw **" - else: - mes += "winner: %s" % ["black", "white"][black < white] - notify("game is over", mes) - # elif self.reversi.passed != None: - # notify("passing turn", "pass") - - def update_status_bar(self): - msg = "current player is " + \ - ["White", "Black"][self.model.next_player == Player.black] - if self.model.last_evaluation: - msg += f"|AI Confidence={self.model.last_evaluation:.4f}" - self.SetStatusText(msg) - - def refresh(self, event): - dc = wx.PaintDC(self.panel) - self.update_status_bar() - - w, h = self.panel.GetSize() - # background - dc.SetBrush(wx.Brush("#228b22")) - dc.DrawRectangle(0, 0, w, h) - # grid - dc.SetBrush(wx.Brush("black")) - px, py = w / 8, h / 8 - for y in range(8): - dc.DrawLine(y * px, 0, y * px, h) - dc.DrawLine(0, y * py, w, y * py) - dc.DrawLine(w - 1, 0, w - 1, h - 1) - dc.DrawLine(0, h - 1, w - 1, h - 1) - - # stones - brushes = {Player.white: wx.Brush( - "white"), Player.black: wx.Brush("black")} - for y in range(8): - vy = 7-y if self.is_flip_vertical else y - for x in range(8): - c = self.model.stone(x, y) - if c is not None: - dc.SetBrush(brushes[c]) - dc.DrawEllipse(x * px, vy * py, px, py) - if self.model.last_history: - q_value = self.model.last_history.values[y*8+x] - n_value = self.model.last_history.visit[y*8+x] - enemy_q_value = - \ - self.model.last_history.enemy_values[y*8+x] - enemy_n_value = self.model.last_history.enemy_visit[y*8+x] - - dc.SetTextForeground(wx.Colour("blue")) - if n_value: - dc.DrawText(f"{int(n_value):d}", x*px+2, vy*py+2) - if q_value: - if q_value < 0: - dc.SetTextForeground(wx.Colour("red")) - dc.DrawText(f"{int(q_value*100):d}", - x*px+2, (vy+1)*py-16) - - if self.show_player_evaluation: - dc.SetTextForeground(wx.Colour("purple")) - if enemy_n_value: - dc.DrawText(f"{int(enemy_n_value):2d}", - (x+1)*px-20, vy*py+2) - if enemy_q_value: - if enemy_q_value < 0: - dc.SetTextForeground(wx.Colour("orange")) - dc.DrawText( - f"{int(enemy_q_value*100):2d}", (x+1)*px-24, (vy+1)*py-16) -import re -import sys -from collections import namedtuple - -from logging import getLogger, StreamHandler, FileHandler -from time import time - -from reversi_zero.agent.player import ReversiPlayer, CallbackInMCTS -from reversi_zero.config import Config, PlayWithHumanConfig -from reversi_zero.env.reversi_env import ReversiEnv, Player -from reversi_zero.lib.ggf import parse_ggf, convert_to_bitboard_and_actions, convert_move_to_action, \ - convert_action_to_move -from reversi_zero.lib.nonblocking_stream_reader import NonBlockingStreamReader -from reversi_zero.play_game.common import load_model - -logger = getLogger(__name__) - -GameState = namedtuple("GameState", "black white actions player") -GoResponse = namedtuple("GoResponse", "action eval time") -HintResponse = namedtuple("HintResponse", "action value visit") - - -def start(config: Config): - config.play_with_human.update_play_config(config.play) - root_logger = getLogger() - for h in root_logger.handlers: - if isinstance(h, StreamHandler) and not isinstance(h, FileHandler): - root_logger.removeHandler(h) - logger.info(f"config type={config.type}") - NBoardEngine(config).start() - logger.info("finish nboard") - - -class NBoardEngine: - def __init__(self, config: Config): - self.config = config - self.reader = NonBlockingStreamReader(sys.stdin) - self.handler = NBoardProtocolVersion2(config, self) - self.running = False - self.nc = self.config.nboard # shorcut - # - self.env = ReversiEnv().reset() - self.model = load_model(self.config) - self.play_config = self.config.play - self.player = self.create_player() - self.turn_of_nboard = None - - def create_player(self): - logger.debug("create new ReversiPlayer()") - return ReversiPlayer(self.config, self.model, self.play_config, enable_resign=False) - - def start(self): - self.running = True - self.reader.start(push_callback=self.push_callback) - while self.running and not self.reader.closed: - message = self.reader.readline(self.nc.read_stdin_timeout) - if message is None: - continue - message = message.strip() - logger.debug(f"> {message}") - self.handler.handle_message(message) - - def push_callback(self, message: str): - # note: called in another thread - if message.startswith("ping"): # interupt - self.stop_thinkng() - - def stop(self): - self.running = False - - def reply(self, message): - logger.debug(f"< {message}") - sys.stdout.write(message + "\n") - sys.stdout.flush() - - def stop_thinkng(self): - self.player.stop_thinking() - - def set_depth(self, n): - try: - n = int(n) - # self.play_config.simulation_num_per_move = n * self.nc.simulation_num_per_depth_about - self.play_config.required_visit_to_decide_action = n * \ - self.nc.simulation_num_per_depth_about - self.play_config.thinking_loop = min( - 30, - int(self.play_config.required_visit_to_decide_action * - 5 / self.play_config.simulation_num_per_move) - ) - - logger.info( - f"set required_visit_to_decide_action to {self.play_config.required_visit_to_decide_action}") - except ValueError: - pass - - def reset_state(self): - self.player = self.create_player() - - def set_game(self, game_state: GameState): - self.env.reset() - self.env.update(game_state.black, game_state.white, game_state.player) - self.turn_of_nboard = game_state.player - for action in game_state.actions: - self._change_turn() - if action is not None: - self.env.step(action) - - def _change_turn(self): - if self.turn_of_nboard: - self.turn_of_nboard = Player.black if self.turn_of_nboard == Player.white else Player.white - - def move(self, action): - self._change_turn() - if action is not None: - self.env.step(action) - - def go(self) -> GoResponse: - if self.env.next_player != self.turn_of_nboard: - return GoResponse(None, 0, 0) - - board = self.env.board - if self.env.next_player == Player.black: - states = (board.black, board.white) - else: - states = (board.white, board.black) - start_time = time() - action = self.player.action(*states) - item = self.player.ask_thought_about(*states) - evaluation = item.values[action] - time_took = time() - start_time - return GoResponse(action, evaluation, time_took) - - def hint(self, n_hint): - """ - - :param n_hint: - """ - board = self.env.board - if self.env.next_player == Player.black: - states = (board.black, board.white) - else: - states = (board.white, board.black) - - def hint_report_callback(values, visits): - hint_list = [] - for action, visit in list(sorted(enumerate(visits), key=lambda x: -x[1]))[:n_hint]: - if visit > 0: - hint_list.append(HintResponse( - action, values[action], visit)) - self.handler.report_hint(hint_list) - - callback_info = CallbackInMCTS( - self.config.nboard.hint_callback_per_sim, hint_report_callback) - self.player.action(*states, callback_in_mtcs=callback_info) - item = self.player.ask_thought_about(*states) - hint_report_callback(item.values, item.visit) - - -class NBoardProtocolVersion2: - def __init__(self, config: Config, engine: NBoardEngine): - self.config = config - self.engine = engine - self.handlers = [ - (re.compile(r'nboard ([0-9]+)'), self.nboard), - (re.compile(r'set depth ([0-9]+)'), self.set_depth), - (re.compile(r'set game (.+)'), self.set_game), - (re.compile(r'move ([^/]+)(/[^/]*)?(/[^/]*)?'), self.move), - (re.compile(r'hint ([0-9]+)'), self.hint), - (re.compile(r'go'), self.go), - (re.compile(r'ping ([0-9]+)'), self.ping), - (re.compile(r'learn'), self.learn), - (re.compile(r'analyze'), self.analyze), - ] - - def handle_message(self, message): - for regexp, func in self.handlers: - if self.scan(message, regexp, func): - return - logger.debug(f"ignore message: {message}") - - def scan(self, message, regexp, func): - match = regexp.match(message) - if match: - func(*match.groups()) - return True - return False - - def nboard(self, version): - if version != "2": - logger.warning(f"UNKNOWN NBoard Version {version}!!!") - self.engine.reply( - f"set myname {self.config.nboard.my_name}({self.config.type})") - self.tell_status("waiting") - - def set_depth(self, depth): - """Set engine midgame search depth. - - Optional: Set midgame depth to {maxDepth}. Endgame depths are at the engine author's discretion. - :param depth: - """ - self.engine.set_depth(depth) - - def set_game(self, ggf_str): - """Tell the engine that all further commands relate to the position at the end of the given game, in GGF format. - - Required:The engine must update its stored game state. - :param ggf_str: see https://skatgame.net/mburo/ggsa/ggf . important info are BO, B+, W+ - """ - ggf = parse_ggf(ggf_str) - black, white, actions = convert_to_bitboard_and_actions(ggf) - player = Player.black if ggf.BO.color == "*" else Player.white - self.engine.set_game(GameState(black, white, actions, player)) - - # if set_game at turn=1~2 is sent, reset engine state. - if len(actions) <= 1: - self.engine.reset_state() # clear MCTS cache - - def move(self, move, evaluation, time_sec): - """Tell the engine that all further commands relate to the position after the given move. - The move is 2 characters e.g. "F5". Eval is normally in centi-disks. Time is in seconds. - Eval and time may be omitted. If eval is omitted it is assumed to be "unknown"; - if time is omitted it is assumed to be 0. - - Required:Update the game state by making the move. No response required. - """ - # logger.debug(f"[{move}] [{evaluation}] [{time_sec}]") - - action = convert_move_to_action(move) - self.engine.move(action) - - def hint(self, n): - """Tell the engine to give evaluations for the given position. n tells how many moves to evaluate, - e.g. 2 means give evaluations for the top 2 positions. This is used when the user is analyzing a game. - With the "hint" command the engine is not CONSTRained by the time remaining in the game. - - Required: The engine sends back an evaluation for at its top move - - Best: The engine sends back an evaluation for approximately the top n moves. - If the engine searches using iterative deepening it should also send back evaluations during search, - which makes the GUI feel more responsive to the user. - - Depending on whether the evalation came from book or a search, the engine sends back - - search {pv: PV} {eval:Eval} 0 {depth:Depth} {freeform text} - or - book {pv: PV} {eval:Eval} {# games:long} {depth:Depth} {freeform text:string} - - PV: The pv must begin with two characters representing the move considered (e.g. "F5" or "PA") and - must not contain any whitespace. "F5d6C3" and "F5-D6-C3" are valid PVs but "F5 D6 C3" will - consider D6 to be the eval. - - Eval: The eval is from the point-of-view of the player to move and is a double. - At the engine's option it can also be an ordered pair of doubles separated by a comma: - {draw-to-black value}, {draw-to-white value}. - - Depth: depth is the search depth. It must start with an integer but can end with other characters; - for instance "100%W" is a valid depth. The depth cannot contain spaces. - - Two depth codes have special meaning to NBoard: "100%W" tells NBoard that the engine has solved - for a win/loss/draw and the sign of the eval matches the sign of the returned eval. - "100%" tells NBoard that the engine has done an exact solve. - The freeform text can be any other information that the engine wants to convey. - NBoard 1.1 and 2.0 do not display this information but later versions or other GUIs may. - - :param n: - """ - self.tell_status("thinkng hint...") - self.engine.hint(int(n)) - self.tell_status("waiting") - - def report_hint(self, hint_list): - for hint in reversed(hint_list): # there is a rule that the last is best? - move = convert_action_to_move(hint.action) - self.engine.reply( - f"search {move} {hint.value} 0 {int(hint.visit)}") - - def go(self): - """Tell the engine to decide what move it would play. - - This is used when the engine is playing in a game. - With the "go" command the computer is limited by both the maximum search depth and - the time remaining in the game. - - Required: The engine responds with "=== {move}" where move is e.g. "F5" - - Best: The engine responds with "=== {move:String}/{eval:float}/{time:float}". - Eval may be omitted if the move is forced. The engine also sends back thinking output - as in the "hint" command. - - Important: The engine does not update the board with this move, - instead it waits for a "move" command from NBoard. - This is because the user may have modified the board while the engine was thinking. - - Note: To make it easier for the engine author, - The NBoard gui sets the engine's status to "" when it receives the response. - The engine can override this behaviour by sending a "status" command immediately after the response. - """ - self.tell_status("thinking...") - gr = self.engine.go() - move = convert_action_to_move(gr.action) - self.engine.reply(f"=== {move}/{gr.eval * 10}/{gr.time}") - self.tell_status("waiting") - - def ping(self, n): - """Ensure synchronization when the board position is about to change. - - Required: Stop thinking and respond with "pong n". - If the engine is analyzing a position it must stop analyzing before sending "pong n" - otherwise NBoard will think the analysis relates to the current position. - :param n: - :return: - """ - # self.engine.stop_thinkng() # not implemented - self.engine.reply(f"pong {n}") - - def learn(self): - """Learn the current game. - Required: Respond "learned". - - Best: Add the current game to book. - - Note: To make it easier for the engine author, - The NBoard gui sets the engine's status to "" when it receives the "learned" response. - The engine can override this behaviour by sending a "status" command immediately after the response. - """ - self.engine.reply("learned") - - def analyze(self): - """Perform a retrograde analysis of the current game. - - Optional: Perform a retrograde analysis of the current game. - For each board position occurring in the game, - the engine sends back a line of the form analysis {movesMade:int} {eval:double}. - movesMade = 0 corresponds to the start position. Passes count towards movesMade, - so movesMade can go above 60. - """ - pass - - def tell_status(self, status): - self.engine.reply(f"status {status}") -import os -from logging import getLogger -from random import random -from time import sleep - -from reversi_zero.agent.model import ReversiModel -from reversi_zero.agent.player import ReversiPlayer -from reversi_zero.config import Config -from reversi_zero.env.reversi_env import ReversiEnv, Player, Winner -from reversi_zero.lib import tf_util -from reversi_zero.lib.data_helper import get_next_generation_model_dirs -from reversi_zero.lib.model_helpler import save_as_best_model, load_best_model_weight - -logger = getLogger(__name__) - - -def start(config: Config): - tf_util.set_session_config(per_process_gpu_memory_fraction=0.2) - return EvaluateWorker(config).start() - - -class EvaluateWorker: - def __init__(self, config: Config): - """ - - :param config: - """ - self.config = config - self.best_model = None - - def start(self): - self.best_model = self.load_best_model() - - while True: - ng_model, model_dir = self.load_next_generation_model() - logger.debug(f"start evaluate model {model_dir}") - ng_is_great = self.evaluate_model(ng_model) - if ng_is_great: - logger.debug(f"New Model become best model: {model_dir}") - save_as_best_model(ng_model) - self.best_model = ng_model - self.remove_model(model_dir) - - def evaluate_model(self, ng_model): - results = [] - winning_rate = 0 - for game_idx in range(self.config.eval.game_num): - # ng_win := if ng_model win -> 1, lose -> 0, draw -> None - ng_win, black_is_best, black_white = self.play_game( - self.best_model, ng_model) - if ng_win is not None: - results.append(ng_win) - winning_rate = sum(results) / len(results) - logger.debug(f"game {game_idx}: ng_win={ng_win} black_is_best_model={black_is_best} score={black_white} " - f"winning rate {winning_rate*100:.1f}%") - if results.count(0) >= self.config.eval.game_num * (1-self.config.eval.replace_rate): - logger.debug( - f"lose count reach {results.count(0)} so give up challenge") - break - if results.count(1) >= self.config.eval.game_num * self.config.eval.replace_rate: - logger.debug( - f"win count reach {results.count(1)} so change best model") - break - - winning_rate = sum(results) / len(results) - logger.debug(f"winning rate {winning_rate*100:.1f}%") - return winning_rate >= self.config.eval.replace_rate - - def play_game(self, best_model, ng_model): - env = ReversiEnv().reset() - - best_player = ReversiPlayer( - self.config, best_model, play_config=self.config.eval.play_config) - ng_player = ReversiPlayer( - self.config, ng_model, play_config=self.config.eval.play_config) - best_is_black = random() < 0.5 - if best_is_black: - black, white = best_player, ng_player - else: - black, white = ng_player, best_player - - observation = env.observation - while not env.done: - if env.next_player == Player.black: - action = black.action(observation.black, observation.white) - else: - action = white.action(observation.white, observation.black) - observation, info = env.step(action) - - ng_win = None - if env.winner == Winner.black: - if best_is_black: - ng_win = 0 - else: - ng_win = 1 - elif env.winner == Winner.white: - if best_is_black: - ng_win = 1 - else: - ng_win = 0 - return ng_win, best_is_black, observation.number_of_black_and_white - - def load_best_model(self): - model = ReversiModel(self.config) - load_best_model_weight(model) - return model - - def load_next_generation_model(self): - rc = self.config.resource - while True: - dirs = get_next_generation_model_dirs(self.config.resource) - if dirs: - break - logger.info(f"There is no next generation model to evaluate") - sleep(60) - model_dir = dirs[-1] if self.config.eval.evaluate_latest_first else dirs[0] - config_path = os.path.join( - model_dir, rc.next_generation_model_config_filename) - weight_path = os.path.join( - model_dir, rc.next_generation_model_weight_filename) - model = ReversiModel(self.config) - model.load(config_path, weight_path) - return model, model_dir - - def remove_model(self, model_dir): - rc = self.config.resource - config_path = os.path.join( - model_dir, rc.next_generation_model_config_filename) - weight_path = os.path.join( - model_dir, rc.next_generation_model_weight_filename) - os.remove(config_path) - os.remove(weight_path) - os.rmdir(model_dir) -import os -from collections import Counter -from datetime import datetime -from logging import getLogger -from time import sleep, time - -import keras.backend as K -import numpy as np -from keras.callbacks import Callback -from keras.optimizers import SGD - -from reversi_zero.agent.model import ReversiModel, objective_function_for_policy, \ - objective_function_for_value -from reversi_zero.config import Config -from reversi_zero.lib import tf_util -from reversi_zero.lib.bitboard import bit_to_array -from reversi_zero.lib.data_helper import get_game_data_filenames, read_game_data_from_file, \ - get_next_generation_model_dirs -from reversi_zero.lib.model_helpler import load_best_model_weight -from reversi_zero.lib.tensorboard_step_callback import TensorBoardStepCallback - -logger = getLogger(__name__) - - -def start(config: Config): - tf_util.set_session_config(per_process_gpu_memory_fraction=0.65) - return OptimizeWorker(config).start() - - -class OptimizeWorker: - def __init__(self, config: Config): - self.config = config - self.model = None # type: ReversiModel - self.loaded_filenames = set() - self.loaded_data = {} - self.training_count_of_files = Counter() - self.dataset = None - self.optimizer = None - - def start(self): - self.model = self.load_model() - self.training() - - def training(self): - self.compile_model() - total_steps = self.config.trainer.start_total_steps - save_model_callback = PerStepCallback(self.config.trainer.save_model_steps, self.save_current_model, - self.config.trainer.wait_after_save_model_ratio) - callbacks = [save_model_callback] # type: list[Callback] - tb_callback = None # type: TensorBoardStepCallback - - if self.config.trainer.use_tensorboard: - tb_callback = TensorBoardStepCallback( - log_dir=self.config.resource.tensorboard_log_dir, - logging_per_steps=self.config.trainer.logging_per_steps, - step=total_steps, - ) - callbacks.append(tb_callback) - - while True: - self.load_play_data() - if self.dataset_size < self.config.trainer.min_data_size_to_learn: - logger.info( - f"dataset_size={self.dataset_size} is less than {self.config.trainer.min_data_size_to_learn}") - sleep(10) - continue - self.update_learning_rate(total_steps) - total_steps += self.train_epoch( - self.config.trainer.epoch_to_checkpoint, callbacks) - self.count_up_training_count_and_delete_self_play_data_files() - - if tb_callback: # This code is never reached. But potentially this is required. - tb_callback.close() - - def train_epoch(self, epochs, callbacks): - tc = self.config.trainer - state_ary, policy_ary, z_ary = self.dataset - self.model.model.fit(state_ary, [policy_ary, z_ary], - batch_size=tc.batch_size, - callbacks=callbacks, - epochs=epochs) - steps = (state_ary.shape[0] // tc.batch_size) * epochs - return steps - - def compile_model(self): - self.optimizer = SGD(lr=1e-2, momentum=0.9) - losses = [objective_function_for_policy, objective_function_for_value] - self.model.model.compile(optimizer=self.optimizer, loss=losses) - - def update_learning_rate(self, total_steps): - # The deepmind paper says - # ~400k: 1e-2 - # 400k~600k: 1e-3 - # 600k~: 1e-4 - - lr = self.decide_learning_rate(total_steps) - if lr: - K.set_value(self.optimizer.lr, lr) - logger.debug( - f"total step={total_steps}, set learning rate to {lr}") - - def decide_learning_rate(self, total_steps): - ret = None - - if os.path.exists(self.config.resource.force_learing_rate_file): - try: - with open(self.config.resource.force_learing_rate_file, "rt") as f: - ret = float(str(f.read()).strip()) - if ret: - logger.debug( - f"loaded lr from force learning rate file: {ret}") - return ret - except ValueError: - pass - - for step, lr in self.config.trainer.lr_schedules: - if total_steps >= step: - ret = lr - return ret - - def save_current_model(self): - rc = self.config.resource - model_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") - model_dir = os.path.join( - rc.next_generation_model_dir, rc.next_generation_model_dirname_tmpl % model_id) - os.makedirs(model_dir, exist_ok=True) - config_path = os.path.join( - model_dir, rc.next_generation_model_config_filename) - weight_path = os.path.join( - model_dir, rc.next_generation_model_weight_filename) - self.model.save(config_path, weight_path) - - def collect_all_loaded_data(self): - state_ary_list, policy_ary_list, z_ary_list = [], [], [] - for s_ary, p_ary, z_ary_ in self.loaded_data.values(): - state_ary_list.append(s_ary) - policy_ary_list.append(p_ary) - z_ary_list.append(z_ary_) - - if state_ary_list: - state_ary = np.concatenate(state_ary_list) - policy_ary = np.concatenate(policy_ary_list) - z_ary = np.concatenate(z_ary_list) - return state_ary, policy_ary, z_ary - else: - return None - - @property - def dataset_size(self): - if self.dataset is None: - return 0 - return len(self.dataset[0]) - - def load_model(self): - from reversi_zero.agent.model import ReversiModel - model = ReversiModel(self.config) - rc = self.config.resource - - dirs = get_next_generation_model_dirs(rc) - if not dirs: - logger.debug(f"loading best model") - if not load_best_model_weight(model): - raise RuntimeError(f"Best model can not loaded!") - else: - latest_dir = dirs[-1] - logger.debug(f"loading latest model") - config_path = os.path.join( - latest_dir, rc.next_generation_model_config_filename) - weight_path = os.path.join( - latest_dir, rc.next_generation_model_weight_filename) - model.load(config_path, weight_path) - return model - - def load_play_data(self): - filenames = get_game_data_filenames(self.config.resource) - updated = False - for filename in filenames: - if filename in self.loaded_filenames: - continue - self.load_data_from_file(filename) - updated = True - - for filename in (self.loaded_filenames - set(filenames)): - self.unload_data_of_file(filename) - updated = True - - if updated: - logger.debug("updating training dataset") - self.dataset = self.collect_all_loaded_data() - - def load_data_from_file(self, filename): - try: - logger.debug(f"loading data from {filename}") - data = read_game_data_from_file(filename) - self.loaded_data[filename] = self.convert_to_training_data(data) - self.loaded_filenames.add(filename) - except Exception as e: - logger.warning(str(e)) - - def unload_data_of_file(self, filename): - logger.debug(f"removing data about {filename} from training set") - self.loaded_filenames.remove(filename) - if filename in self.loaded_data: - del self.loaded_data[filename] - if filename in self.training_count_of_files: - del self.training_count_of_files[filename] - - def count_up_training_count_and_delete_self_play_data_files(self): - limit = self.config.trainer.delete_self_play_after_number_of_training - if not limit: - return - - for filename in self.loaded_filenames: - self.training_count_of_files[filename] += 1 - if self.training_count_of_files[filename] >= limit: - if os.path.exists(filename): - try: - logger.debug(f"remove {filename}") - os.remove(filename) - except Exception as e: - logger.warning(e) - - @staticmethod - def convert_to_training_data(data): - """ - - :param data: format is SelfPlayWorker.buffer - list of [(own: bitboard, enemy: bitboard), [policy: float 64 items], z: number] - :return: - """ - state_list = [] - policy_list = [] - z_list = [] - for state, policy, z in data: - own, enemy = bit_to_array(state[0], 64).reshape( - (8, 8)), bit_to_array(state[1], 64).reshape((8, 8)) - state_list.append([own, enemy]) - policy_list.append(policy) - z_list.append(z) - - return np.array(state_list), np.array(policy_list), np.array(z_list) - - -class PerStepCallback(Callback): - def __init__(self, per_step, callback, wait_after_save_model_ratio=None): - super().__init__() - self.per_step = per_step - self.step = 0 - self.callback = callback - self.wait_after_save_model_ratio = wait_after_save_model_ratio - self.last_wait_time = time() - - def on_batch_end(self, batch, logs=None): - self.step += 1 - if self.step % self.per_step == 0: - self.callback() - self.wait() - - def wait(self): - if self.wait_after_save_model_ratio: - time_spent = time() - self.last_wait_time - logger.debug(f"start sleeping {time_spent} seconds") - sleep(time_spent * self.wait_after_save_model_ratio) - logger.debug(f"finish sleeping") - self.last_wait_time = time() -import cProfile -import os -from concurrent.futures import ProcessPoolExecutor -from datetime import datetime -from logging import getLogger -from random import random -from time import time -from traceback import print_stack - -import numpy as np -from multiprocessing import Manager, Lock - - -from reversi_zero.agent.api import MultiProcessReversiModelAPIServer -from reversi_zero.agent.player import ReversiPlayer -from reversi_zero.config import Config -from reversi_zero.env.reversi_env import Board, Winner -from reversi_zero.env.reversi_env import ReversiEnv, Player -from reversi_zero.lib import tf_util -from reversi_zero.lib.data_helper import get_game_data_filenames, write_game_data_to_file -from reversi_zero.lib.file_util import read_as_int -from reversi_zero.lib.ggf import convert_action_to_move, make_ggf_string -from reversi_zero.lib.tensorboard_logger import TensorBoardLogger - -logger = getLogger(__name__) - - -def start(config: Config): - tf_util.set_session_config(per_process_gpu_memory_fraction=0.3) - api_server = MultiProcessReversiModelAPIServer(config) - process_num = config.play_data.multi_process_num - api_server.start_serve() - - with Manager() as manager: - shared_var = SharedVar(manager, game_idx=read_as_int( - config.resource.self_play_game_idx_file) or 0) - with ProcessPoolExecutor(max_workers=process_num) as executor: - futures = [] - for i in range(process_num): - play_worker = SelfPlayWorker(config, env=ReversiEnv(), api=api_server.get_api_client(), - shared_var=shared_var, worker_index=i) - futures.append(executor.submit(play_worker.start)) - - -class SharedVar: - def __init__(self, manager, game_idx: int): - """ - - :param Manager manager: - :param int game_idx: - """ - self._lock = manager.Lock() - # type: multiprocessing.managers.ValueProxy - self._game_idx = manager.Value('i', game_idx) - - @property - def game_idx(self): - return self._game_idx.value - - def incr_game_idx(self, n=1): - with self._lock: - self._game_idx.value += n - return self._game_idx.value - - -class SelfPlayWorker: - def __init__(self, config: Config, env, api, shared_var, worker_index=0): - """ - - :param config: - :param ReversiEnv|None env: - :param ReversiModelAPI|None api: - :param SharedVar shared_var: - :param int worker_index: - """ - self.config = config - self.env = env - self.api = api - self.shared_var = shared_var - self.black = None # type: ReversiPlayer - self.white = None # type: ReversiPlayer - self.buffer = [] - self.false_positive_count_of_resign = 0 - self.resign_test_game_count = 0 - self.worker_index = worker_index - self.tensor_board = None # type: TensorBoardLogger - self.move_history = None # type: MoveHistory - self.move_history_buffer = [] # type: list[MoveHistory] - - def start(self): - try: - self._start() - except Exception as e: - print(repr(e)) - print_stack() - - def _start(self): - logger.debug("SelfPlayWorker#start()") - np.random.seed(None) - worker_name = f"worker{self.worker_index:03d}" - self.tensor_board = TensorBoardLogger(os.path.join( - self.config.resource.self_play_log_dir, worker_name)) - - self.buffer = [] - mtcs_info = None - local_idx = 0 - - while True: - np.random.seed(None) - local_idx += 1 - game_idx = self.shared_var.game_idx - - start_time = time() - if mtcs_info is None and self.config.play.share_mtcs_info_in_self_play: - mtcs_info = ReversiPlayer.create_mtcs_info() - - # play game - env = self.start_game(local_idx, game_idx, mtcs_info) - - game_idx = self.shared_var.incr_game_idx() - # just log - end_time = time() - time_spent = end_time - start_time - logger.debug(f"play game {game_idx} time={time_spent} sec, " - f"turn={env.turn}:{env.board.number_of_black_and_white}:{env.winner}") - - # log play info to tensor board - prefix = "self" - log_info = {f"{prefix}/time": time_spent, - f"{prefix}/turn": env.turn} - if mtcs_info: - log_info[f"{prefix}/mcts_buffer_size"] = len(mtcs_info.var_p) - self.tensor_board.log_scaler(log_info, game_idx) - - # reset MCTS info per X games - if self.config.play.reset_mtcs_info_per_game and local_idx % self.config.play.reset_mtcs_info_per_game == 0: - logger.debug("reset MCTS info") - mtcs_info = None - - with open(self.config.resource.self_play_game_idx_file, "wt") as f: - f.write(str(game_idx)) - - def start_game(self, local_idx, last_game_idx, mtcs_info): - # profiler = cProfile.Profile() - # profiler.enable() - - self.env.reset() - enable_resign = self.config.play.disable_resignation_rate <= random() - self.config.play.simulation_num_per_move = self.decide_simulation_num_per_move( - last_game_idx) - logger.debug( - f"simulation_num_per_move = {self.config.play.simulation_num_per_move}") - self.black = self.create_reversi_player( - enable_resign=enable_resign, mtcs_info=mtcs_info) - self.white = self.create_reversi_player( - enable_resign=enable_resign, mtcs_info=mtcs_info) - if not enable_resign: - logger.debug("Resignation is disabled in the next game.") - observation = self.env.observation # type: Board - self.move_history = MoveHistory() - - # game loop - while not self.env.done: - # logger.debug(f"turn={self.env.turn}") - if self.env.next_player == Player.black: - action = self.black.action_with_evaluation( - observation.black, observation.white) - else: - action = self.white.action_with_evaluation( - observation.white, observation.black) - self.move_history.move(self.env, action) - observation, info = self.env.step(action.action) - - self.finish_game(resign_enabled=enable_resign) - self.save_play_data(write=local_idx % - self.config.play_data.nb_game_in_file == 0) - self.remove_play_data() - - if self.config.play_data.enable_ggf_data: - is_write = local_idx % self.config.play_data.nb_game_in_ggf_file == 0 - is_write |= local_idx <= 5 - self.save_ggf_data(write=is_write) - - # profiler.disable() - # profiler.dump_stats(f"profile-worker-{self.worker_index}-{local_idx}") - return self.env - - def create_reversi_player(self, enable_resign=None, mtcs_info=None): - return ReversiPlayer(self.config, None, enable_resign=enable_resign, mtcs_info=mtcs_info, api=self.api) - - def save_play_data(self, write=True): - # drop draw game by drop_draw_game_rate - if self.black.moves[0][-1] != 0 or self.config.play_data.drop_draw_game_rate <= np.random.random(): - data = self.black.moves + self.white.moves - self.buffer += data - - if not write or not self.buffer: - return - - rc = self.config.resource - game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") - path = os.path.join( - rc.play_data_dir, rc.play_data_filename_tmpl % game_id) - logger.info(f"save play data to {path}") - write_game_data_to_file(path, self.buffer) - self.buffer = [] - - def save_ggf_data(self, write=True): - self.move_history_buffer.append(self.move_history) - if not write: - return - - rc = self.config.resource - game_id = datetime.now().strftime("%Y%m%d-%H%M%S.%f") - path = os.path.join(rc.self_play_ggf_data_dir, - rc.ggf_filename_tmpl % game_id) - with open(path, "wt") as f: - for mh in self.move_history_buffer: - f.write(mh.make_ggf_string("RAZ", "RAZ") + "\n") - self.move_history_buffer = [] - - def remove_play_data(self): - files = get_game_data_filenames(self.config.resource) - if len(files) < self.config.play_data.max_file_num: - return - try: - for i in range(len(files) - self.config.play_data.max_file_num): - os.remove(files[i]) - except: - pass - - def finish_game(self, resign_enabled=True): - if self.env.winner == Winner.black: - black_win = 1 - false_positive_of_resign = self.black.resigned - elif self.env.winner == Winner.white: - black_win = -1 - false_positive_of_resign = self.white.resigned - else: - black_win = 0 - false_positive_of_resign = self.black.resigned or self.white.resigned - - self.black.finish_game(black_win) - self.white.finish_game(-black_win) - - if not resign_enabled: - self.resign_test_game_count += 1 - if false_positive_of_resign: - self.false_positive_count_of_resign += 1 - logger.debug("false positive of resignation happened") - self.check_and_update_resignation_threshold() - - def reset_false_positive_count(self): - self.false_positive_count_of_resign = 0 - self.resign_test_game_count = 0 - - @property - def false_positive_rate(self): - if self.resign_test_game_count == 0: - return 0 - return self.false_positive_count_of_resign / self.resign_test_game_count - - def check_and_update_resignation_threshold(self): - if self.resign_test_game_count < 100 or self.config.play.resign_threshold is None: - return - - old_threshold = self.config.play.resign_threshold - if self.false_positive_rate >= self.config.play.false_positive_threshold: - self.config.play.resign_threshold -= self.config.play.resign_threshold_delta - else: - self.config.play.resign_threshold += self.config.play.resign_threshold_delta - logger.debug( - f"update resign_threshold: {old_threshold} -> {self.config.play.resign_threshold}") - self.reset_false_positive_count() - - def decide_simulation_num_per_move(self, idx): - ret = read_as_int(self.config.resource.force_simulation_num_file) - - if ret: - logger.debug(f"loaded simulation num from file: {ret}") - return ret - - for min_idx, num in self.config.play.schedule_of_simulation_num_per_move: - if idx >= min_idx: - ret = num - return ret - - -class MoveHistory: - def __init__(self): - self.moves = [] - - def move(self, env, action): - """ - - :param ReversiEnv env: - :param ActionWithEvaluation action: - :return: - """ - if action.action is None: - return # resigned - - if len(self.moves) % 2 == 0: - if env.next_player == Player.white: - self.moves.append(convert_action_to_move(None)) - else: - if env.next_player == Player.black: - self.moves.append(convert_action_to_move(None)) - move = f"{convert_action_to_move(action.action)}/{action.q*10}/{action.n}" - self.moves.append(move) - - def make_ggf_string(self, black_name=None, white_name=None): - return make_ggf_string(black_name=black_name, white_name=white_name, moves=self.moves) -from .reversi_solver_cython import * -import pyximport -pyximport.install() -# System Modules -import numpy as np -import tensorflow as tf -from models.core_generator import CoreGenerator -from models.discriminator_medium import DiscriminatorMedium -from models.discriminator_low import DiscriminatorLow -from models.discriminator_full import DiscriminatorFull -import os -import time - -# Extra -from keras.engine.topology import Input -from keras.engine.training import Model -from keras.layers import LeakyReLU, Concatenate, Dropout -from keras.layers.convolutional import Conv2D, UpSampling2D, Conv2DTranspose -from keras.layers.core import Activation, SpatialDropout2D -from keras.layers.merge import concatenate -from keras.layers.normalization import BatchNormalization -from keras.layers.pooling import MaxPooling2D -from models.utils.instance_normalization import InstanceNormalization -from models.utils.sn import ConvSN2D -from models.utils.calc_output_and_feature_size import calc_output_and_feature_size -from models.utils.attention import Attention -from keras.layers import Conv2D, Lambda, add, AvgPool2D, Activation, UpSampling2D, Input, concatenate, Reshape, LeakyReLU, Reshape, Flatten, concatenate - -# Custom Libs -from models.utils.calc_output_and_feature_size import calc_output_and_feature_size -from lib.data_utils import save_sample_images, write_log, generate_training_images -from lib.data_utils import generator, generate_label_data - -# Keras Modules -import keras -from keras.utils import multi_gpu_model -from keras.layers import Lambda, UpSampling2D, Input, concatenate -from keras.utils.data_utils import GeneratorEnqueuer -from keras.utils import multi_gpu_model -from keras.callbacks import TensorBoard -from keras.optimizers import Adam -from keras.models import Model, save_model, load_model -from keras import backend as K -K.clear_session() - -# Import models - -# Other Modules - -# ---------- -# Settings -# ---------- - -height = 128 -width = 128 -channels = 1 -epochs = 10 -gpus = 1 -batch_size = 5 -cpus = 2 -use_multiprocessing = True -save_weights_every_n_epochs = 0.01 -max_queue_size = batch_size * 1 -img_dir = "./Train/" -test_dir = "./Test/" -resource_dir = "./resources/" -dataset_len = len(os.listdir(img_dir)) -testset_len = len(os.listdir(test_dir)) -learning_rate = 0.0002 -experiment_name = time.strftime("%Y-%m-%d-%H-%M") -decay_rate = 0 -decay_rate = learning_rate / ((dataset_len / batch_size) * epochs) - - -# ---------------------------------- -# Load filenames -# ----------------------------------- - -X = [] -for filename in os.listdir(img_dir): - X.append(filename) - -Test = [] -for filename in os.listdir(test_dir): - Test.append(filename) - -# ---------------------------------- -# Create directory for sample data -# ---------------------------------- - -main_dir = './output/256/' + experiment_name -save_sample_images_dir = main_dir + '/sample_images/' -save_validation_images_dir = main_dir + '/validation_images/' -weights_dir = main_dir + '/weights/' -log_path = main_dir + '/logs/' -model_path = main_dir + '/models/' - -if not os.path.exists(main_dir): - os.makedirs(main_dir) - os.makedirs(save_sample_images_dir) - os.makedirs(save_validation_images_dir) - os.makedirs(log_path) - os.makedirs(weights_dir) - os.makedirs(model_path) - -# --------------- -# Import Models -# --------------- - -core_generator = CoreGenerator(gpus=gpus, width=width, height=height) -discriminator_full = DiscriminatorFull( - gpus=gpus, decay_rate=decay_rate, width=width, height=height) -discriminator_medium = DiscriminatorMedium( - gpus=gpus, decay_rate=decay_rate, width=width, height=height) -discriminator_low = DiscriminatorLow( - gpus=gpus, decay_rate=decay_rate, width=width, height=height) - -if os.path.isdir("./resources/"): - core_generator.model.load_weights('./resources/core_generator.h5') - discriminator_full.model.load_weights('./resources/discriminator_full.h5') - discriminator_medium.model.load_weights( - './resources/discriminator_medium.h5') - discriminator_low.model.load_weights('./resources/discriminator_low.h5') - -# Create a directory to save weights -if not os.path.exists(resource_dir): - os.makedirs(resource_dir) - -discriminator_full.trainable = False -discriminator_medium.model.trainable = False -discriminator_full.model.trainable = False - - -# -------------------------------- -# Create GAN with core generator -# -------------------------------- - -# Generate image with core generator -gan_x = Input(shape=(height, width, channels,)) -gan_y = Input(shape=(height, width, 2,)) - -# Extract style features and add them to image -gan_output = core_generator.model(gan_x) - -# Extract features and predictions from discriminators -disc_input = concatenate([gan_x, gan_output], axis=-1) -pred_full, features_full = discriminator_full.model(disc_input) -pred_medium, features_medium = discriminator_medium.model(disc_input) -pred_low, features_low = discriminator_low.model(disc_input) - -# Compile GAN -gan_core = Model(inputs=gan_x, outputs=[ - gan_output, features_full, features_medium, features_low, pred_full, pred_medium, pred_low]) - -gan_core.name = "gan_core" -optimizer = Adam(learning_rate, 0.5, decay=decay_rate) -loss_gan = ['mae', 'mae', 'mae', 'mae', 'mse', 'mse', 'mse'] -loss_weights_gan = [1, 3.33, 3.33, 3.33, 0.33, 0.33, 0.33] - -# gan_core = multi_gpu_model(gan_core_org) -gan_core.compile(optimizer=optimizer, - loss_weights=loss_weights_gan, loss=loss_gan) - - -# -------------------------------- -# Compile Discriminator -# -------------------------------- - -discriminator_full.model.trainable = True -discriminator_medium.model.trainable = True -discriminator_low.model.trainable = True - - -def zero_loss(y_true, y_pred): - return K.zeros_like(y_true) - - -loss_d = ['mse', zero_loss] -loss_weights_d = [1, 0] -optimizer_dis = Adam(learning_rate, 0.5, decay=decay_rate) - -discriminator_full_multi = discriminator_full.model -discriminator_medium_multi = discriminator_medium.model -discriminator_low_multi = discriminator_low.model - -discriminator_full_multi.compile( - optimizer=optimizer_dis, loss_weights=loss_weights_d, loss=loss_d) -discriminator_medium_multi.compile( - optimizer=optimizer_dis, loss_weights=loss_weights_d, loss=loss_d) -discriminator_low_multi.compile( - optimizer=optimizer_dis, loss_weights=loss_weights_d, loss=loss_d) - - -# -------------------------------------------------- -# Initiate Generator Queue -# -------------------------------------------------- - -enqueuer = GeneratorEnqueuer(generator(X, img_dir, batch_size, dataset_len, - width, height), use_multiprocessing=use_multiprocessing, wait_time=0.01) - -enqueuer.start(workers=cpus, max_queue_size=max_queue_size) -output_generator = enqueuer.get() - -# --------------------------------- -# Initiate values for Tensorboard -# --------------------------------- - -callback_Full = TensorBoard(log_path) -callback_Medium = TensorBoard(log_path) -callback_Low = TensorBoard(log_path) -callback_gan = TensorBoard(log_path) - -callback_Full.set_model(discriminator_full.model) -callback_Medium.set_model(discriminator_medium.model) -callback_Low.set_model(discriminator_low.model) -callback_gan.set_model(gan_core) - -callback_Full_names = ['weighted_loss_real_full', 'disc_loss_real_full', - 'zero_1', 'weighted_loss_fake_full', 'disc_loss_fake_full', 'zero_2'] -callback_Medium_names = ['weighted_loss_real_low', 'disc_loss_real_medium', - 'zero_3', 'weighted_loss_fake_medium', 'disc_loss_fake_medium', 'zero_4'] -callback_Low_names = ['weighted_loss_real_low', 'disc_loss_real_low', - 'zero_3', 'weighted_loss_fake_low', 'disc_loss_fake_low', 'zero_4'] -callback_gan_names = ['total_gan_loss', 'image_diff', 'feature_diff_disc_full', - 'feature_diff_disc_low', 'predictions_full', 'predictions_low'] - -# Decide how often to create sample images, save log data, and weights. -cycles = int(epochs * (dataset_len / batch_size)) -save_images_cycle = int((dataset_len / batch_size)) -save_weights_cycle = int((dataset_len / batch_size)) - -# Calculate the discriminator output size for features and image predictions -pred_size_f, feat_size_f = calc_output_and_feature_size(width, height) -pred_size_m, feat_size_m = calc_output_and_feature_size(width/2, height/2) -pred_size_l, feat_size_l = calc_output_and_feature_size(width/4, height/4) - -# Create benchmark to see progress -start = time.time() - - -def concatenateNumba(x, y): - return np.concatenate([x, y], axis=-1) - - -for i in range(0, cycles): - start_c = time.time() - # ------------------------ - # Generate Training Data - # ------------------------ - - # Discriminator data - x_full, y_full, x_and_y_full = next(output_generator) - x_medium, y_medium, x_and_y_medium = next(output_generator) - x_low, y_low, x_and_y_low = next(output_generator) - - # Fixed data - fake_labels_f, true_labels_f, dummy_f = generate_label_data( - batch_size, pred_size_f, feat_size_f) - fake_labels_m, true_labels_m, dummy_m = generate_label_data( - batch_size, pred_size_m, feat_size_m) - fake_labels_l, true_labels_l, dummy_l = generate_label_data( - batch_size, pred_size_l, feat_size_l) - - # GAN data - x_gan, y_gan, x_and_y_gan = next(output_generator) - - # ---------------------- - # Train Discriminators - # ---------------------- - - y_gen_full, _, _, _, _, _, _ = gan_core.predict(x_full) - x_and_y_gen_full = concatenateNumba(x_full, y_gen_full) - - # Prepare data for Medium Resolution Discriminator - y_gen_medium, _, _, _, _, _, _ = gan_core.predict(x_medium) - x_and_y_gen_medium = concatenateNumba(x_medium, y_gen_medium) - - # Prepare data for Low Resolution Discriminator - y_gen_low, _, _, _, _, _, _ = gan_core.predict(x_low) - x_and_y_gen_low = concatenateNumba(x_low, y_gen_low) - - # Train Discriminators - d_loss_fake_full = discriminator_full_multi.train_on_batch( - x_and_y_gen_full, [fake_labels_f, dummy_f]) - d_loss_real_full = discriminator_full_multi.train_on_batch( - x_and_y_full, [true_labels_f, dummy_f]) - - d_loss_fake_medium = discriminator_medium_multi.train_on_batch( - x_and_y_gen_medium, [fake_labels_m, dummy_m]) - d_loss_real_medium = discriminator_medium_multi.train_on_batch( - x_and_y_medium, [true_labels_m, dummy_m]) - - d_loss_fake_low = discriminator_low_multi.train_on_batch( - x_and_y_gen_low, [fake_labels_l, dummy_l]) - d_loss_real_low = discriminator_low_multi.train_on_batch( - x_and_y_low, [true_labels_l, dummy_l]) - - # ----------- - # Train GAN - # ----------- - - # Extract featuers from discriminators - _, real_features_full = discriminator_full_multi.predict(x_and_y_gan) - _, real_features_medium = discriminator_medium_multi.predict(x_and_y_gan) - _, real_features_low = discriminator_low_multi.predict(x_and_y_gan) - - # Train GAN on one batch - gan_core_loss = gan_core.train_on_batch(x_gan, [y_gan, - real_features_full, - real_features_medium, - real_features_low, - true_labels_f, - true_labels_m, - true_labels_l]) - - # ------------------------------------------- - # Save image samples, weights, and log data - # ------------------------------------------- - - # Print log data to tensorboard - write_log(callback_Full, callback_Full_names, - d_loss_fake_full + d_loss_real_full, i) - write_log(callback_Medium, callback_Medium_names, - d_loss_fake_medium + d_loss_real_medium, i) - write_log(callback_Low, callback_Low_names, - d_loss_fake_low + d_loss_real_low, i) - write_log(callback_gan, callback_gan_names, gan_core_loss, i) - - end_c = time.time() - print("\n\nCycle:", i) - print("Time:", end_c - start_c) - print("Total images:", batch_size * i) - - # Save sample images - if i % save_images_cycle == 0: - print('Print those bad boys:', i) - end = time.time() - hours, rem = divmod(end-start, 3600) - minutes, seconds = divmod(rem, 60) - print("{:0>2}:{:0>2}:{:05.2f}".format( - int(hours), int(minutes), seconds)) - x_val, y_val, x_y_val = generate_training_images( - Test, 5, testset_len, width, height, test_dir) - output_benchmark, _, _, _, _, _, _ = gan_core.predict(x_val) - save_sample_images(output_benchmark, x_val, 'b-' + - str(i), save_validation_images_dir) - save_sample_images(y_gen_full, x_full, str(i), save_sample_images_dir) - start = time.time() - - # Save weights - if i % save_weights_cycle == 0: - discriminator_full.model.save_weights( - weights_dir + str(i) + "-discriminator_full.h5") - discriminator_medium.model.save_weights( - weights_dir + str(i) + "-discriminator_medium.h5") - discriminator_low.model.save_weights( - weights_dir + str(i) + "-discriminator_low.h5") - core_generator.model.save_weights( - weights_dir + str(i) + "-core_generator.h5") - - discriminator_full.model.save_weights( - resource_dir + "discriminator_full.h5") - discriminator_medium.model.save_weights( - resource_dir + "discriminator_medium.h5") - discriminator_low.model.save_weights( - resource_dir + "discriminator_low.h5") - core_generator.model.save_weights(resource_dir + "core_generator.h5") -import os -from PIL import Image -from multiprocessing import Pool - - -def crop_image(path_and_file): - with Image.open(path_and_file[0]) as im: - x, y = im.size - im.crop((0, 18, x - 18, y)).save( - '/home/userai/jobs/drawing2logo/data/screenshots_382/' + path_and_file[1], "PNG") - - -if __name__ == "__main__": - img_dir = r"./screenshots/" - images = [] - - for filename in os.listdir(img_dir): - filepath = os.path.join(img_dir, filename) - images.append([filepath, filename]) - - pool = Pool(processes=120) - pool.map(crop_image, images) - - print("Done!") -import os -from PIL import Image -from multiprocessing import Pool - -size = 224, 224 - - -def resize_image(path_and_file): - with Image.open(path_and_file[0]) as im: - im.thumbnail(size) - im.save('/home/userai/jobs/drawing2logo/data/screenshots_224/' + - path_and_file[1], "PNG") - - -if __name__ == "__main__": - img_dir = r"./screenshots_382/" - images = [] - - for filename in os.listdir(img_dir): - filepath = os.path.join(img_dir, filename) - images.append([filepath, filename]) - - pool = Pool(processes=125) - pool.map(resize_image, images) - - print("Done!") -import numpy as np -from keras.preprocessing.image import ImageDataGenerator, array_to_img, img_to_array, load_img -from keras.preprocessing import image -import tensorflow as tf -from keras.callbacks import TensorBoard -from keras.layers import Input, Dense -from keras.models import Model -import os -from skimage.transform import resize, rotate, rescale -from skimage.color import rgb2lab, lab2rgb, rgb2gray, gray2rgb -from skimage.io import imsave -import random - - -def turn_filename_into_image(filenames, batch_size, width, height, img_dir): - - empty_x = [] - rot_value = random.randint(-20, 20) - flip_lr = bool(random.getrandbits(1)) - flip_ud = bool(random.getrandbits(1)) - - for name in filenames: - image_x = img_to_array( - load_img(img_dir + name, target_size=(width, height))) - image_x = np.array(image_x, dtype='float32') - image_x = (1.0/(255./2))*image_x - 1 - - image_x = rotate(image_x, rot_value, mode='reflect') - - if flip_lr: - image_x = np.fliplr(image_x) - - empty_x.append(image_x) - - empty_x = np.array(empty_x, dtype='float32') - lab_batch = rgb2lab(empty_x) - X_batch = lab_batch[:, :, :, 0] / 100 - X_batch = X_batch.reshape(X_batch.shape+(1,)) - Y_batch = lab_batch[:, :, :, 1:] / 128 - - return np.array(X_batch, dtype='float32'), np.array(Y_batch, dtype='float32') - - -def random_image_index(dataset_len, batchsize): - start = random.randint(0, (dataset_len - (batchsize + 1))) - end = start + batchsize - return start, end - - -def generate_training_images(filenames, batch_size, dataset_len, width, height, img_dir): - - start, end = random_image_index(dataset_len, batch_size) - names = filenames[start:end] - x, y = turn_filename_into_image(names, batch_size, width, height, img_dir) - x_and_y = np.concatenate([x, y], axis=-1) - - return x, y, x_and_y - - -def generator(X, img_dir, batch_size, dataset_len, width, height): - while True: - x, y, x_and_y = generate_training_images( - X, batch_size, dataset_len, width, height, img_dir) - yield x, y, x_and_y - - -def generate_label_data(batch_size, output_size_pred, output_size_features): - - fake_labels = np.zeros((batch_size, output_size_pred, 1)) - true_labels = np.ones((batch_size, output_size_pred, 1)) - placeholder_input = np.zeros((batch_size, output_size_features, 1)) - - return fake_labels, true_labels, placeholder_input - - -def save_each_image(colored_layers, BW_layer, cycle, nr, path, ending): - - cur = np.zeros((128, 128, 3)) - cur[:, :, 0] = BW_layer[:, :, 0] * 100 - cur[:, :, 1:] = colored_layers * 128 - imsave(os.path.join(path, cycle + nr + ending), lab2rgb(cur)) - - -def save_sample_images(colored_layers, BW_layer, cycle, path): - for i in range(len(colored_layers)): - save_each_image(colored_layers[i], BW_layer[i], - cycle, str(i), path, '-gen.png') - - -def write_log(callback, names, logs, batch_no): - for name, value in zip(names, logs): - summary = tf.Summary() - summary_value = summary.value.add() - summary_value.simple_value = value - summary_value.tag = name - callback.writer.add_summary(summary, batch_no) - callback.writer.flush() -from keras.engine.topology import Input -from keras.engine.training import Model -from keras.layers import LeakyReLU, Concatenate, Dropout -from keras.layers.convolutional import Conv2D, UpSampling2D, Conv2DTranspose -from keras.layers.core import Activation, SpatialDropout2D -from keras.layers.merge import concatenate -from keras.layers.normalization import BatchNormalization -from keras.layers.pooling import MaxPooling2D -from .utils.instance_normalization import InstanceNormalization -from .utils.sn import ConvSN2D -from .utils.attention import Attention -import keras - - -class CoreGenerator(): - """Core Generator. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image - gpus: The number of gpus you will be using. - """ - - def __init__(self, - width=256, - height=256, - channels=1, - gpus=0): - - self.width = width - self.height = height - self.channels = channels - self.gpus = gpus - self.gf = 64 - - # ------------------------------------------------------------------------------------- - # Core Generator - # The U-net structure is from Erik Linder-Noren's brilliant pix2pix model - # Source: https://github.com/eriklindernoren/Keras-GAN/blob/master/pix2pix/pix2pix.py - # Modifications: Thinner to enable 128x128 images, Spectral Normalization and - # an Attention layer. - # ------------------------------------------------------------------------------------- - - def conv2d(layer_input, filters, f_size=4): - """Layers used during downsampling""" - d = ConvSN2D(filters, kernel_size=f_size, - strides=2, padding='same')(layer_input) - d = LeakyReLU(alpha=0.2)(d) - - return d - - def deconv2d(layer_input, skip_input, filters, f_size=4, dropout_rate=0): - """Layers used during upsampling""" - u = UpSampling2D(size=2)(layer_input) - u = ConvSN2D(filters, kernel_size=f_size, strides=1, - padding='same', activation='relu')(u) - if dropout_rate: - u = Dropout(dropout_rate)(u) - u = Concatenate()([u, skip_input]) - return u - - # Image input - d1 = Input(shape=(width, height, channels)) - - # Downsampling - d2 = conv2d(d1, self.gf*2) - d3 = conv2d(d2, self.gf*4) - d4 = conv2d(d3, self.gf*8) - d5 = conv2d(d4, self.gf*8) - d6 = conv2d(d5, self.gf*8) - d7 = conv2d(d6, self.gf*8) - - # Upsampling - u1 = deconv2d(d7, d6, self.gf*8) - u2 = deconv2d(u1, d5, self.gf*8) - u3 = deconv2d(u2, d4, self.gf*8) - u4 = deconv2d(u3, d3, self.gf*4) - u4_att = Attention(512)(u4) - u5 = deconv2d(u4_att, d2, self.gf*2) - - u6 = UpSampling2D(size=2)(u5) - output = ConvSN2D(2, kernel_size=(7, 7), strides=1, - padding='same', activation='tanh')(u6) - - core_generator = Model(d1, output) - core_generator.name = "core_generator" - - # -------------- - # Compile Model - # -------------- - - if self.gpus < 2: - self.model = core_generator - self.save_model = self.model - else: - self.save_model = core_generator - self.model = multi_gpu_model(self.save_model, gpus=gpus) -from .utils.calc_output_and_feature_size import calc_output_and_feature_size -from .utils.instance_normalization import InstanceNormalization -from keras.models import model_from_json, Model -from .utils.sn import ConvSN2D -from keras.optimizers import Adam -from keras import backend as K -from .utils.attention import Attention -from keras.utils import multi_gpu_model -from keras.layers import Conv2D, Lambda, add, AvgPool2D, Activation, UpSampling2D, Input, concatenate, Reshape, LeakyReLU, Reshape, Flatten, concatenate - - -class DiscriminatorFull(): - """Full Resolution Discriminator. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image and the generated image - gpus: The number of gpus you will be using. - learning_rate: Learning rate - decay_rate: The amount of learning decay for each training update - """ - - def __init__(self, - width=256, - height=256, - channels=3, - learning_rate=0.0002, - decay_rate=2e-6, - gpus=0): - - self.width = width - self.height = height - self.channels = channels - self.gpus = gpus - self.learning_rate = learning_rate - self.decay_rate = decay_rate - - # ---------------------- - # Discriminator Fullres - # ---------------------- - - output_size_full_picture, output_size_full_features = calc_output_and_feature_size( - self.width, self.height) - - discriminator_input = Input( - shape=(self.height, self.width, self.channels,)) - - x_1 = ConvSN2D(64, 4, padding='same', strides=2)(discriminator_input) - x = LeakyReLU(alpha=0.2)(x_1) - - x_2 = ConvSN2D(128, 4, padding='same', strides=2)(x) - x = LeakyReLU(alpha=0.2)(x_2) - - x_2_att = Attention(128)(x) - - x_3 = ConvSN2D(256, 4, padding='same', strides=2)(x_2_att) - x = LeakyReLU(alpha=0.2)(x_3) - - x_4 = ConvSN2D(512, 4, padding='same', strides=1)(x) - x = LeakyReLU(alpha=0.2)(x_4) - - x = ConvSN2D(1, 4, padding='same', strides=1)(x) - x = Reshape([output_size_full_picture, 1])(x) - - discriminator_features = concatenate( - [Flatten()(x_1), Flatten()(x_2), Flatten()(x_3), Flatten()(x_4)], axis=1) - discriminator_features = Reshape( - [output_size_full_features, 1])(discriminator_features) - - def zero_loss(y_true, y_pred): - return K.zeros_like(y_true) - - loss_d = ['mse', zero_loss] - - if self.gpus < 2: - self.model = Model(discriminator_input, [ - x, discriminator_features]) - self.save_model = self.model - else: - self.save_model = Model(discriminator_input, [ - x, discriminator_features]) - self.model = multi_gpu_model(self.save_model, gpus=gpus) - - loss_weights_d = [1, 0] - optimizer = Adam(self.learning_rate, 0.5, decay=self.decay_rate) - self.model.compile(optimizer=optimizer, - loss_weights=loss_weights_d, loss=loss_d) -from .utils.calc_output_and_feature_size import calc_output_and_feature_size -from .utils.sn import ConvSN2D -from .utils.instance_normalization import InstanceNormalization -from keras.models import model_from_json, Model -from keras.optimizers import Adam -from keras import backend as K -from .utils.attention import Attention -from keras.utils import multi_gpu_model -from keras.layers import Conv2D, Lambda, add, AvgPool2D, Activation, UpSampling2D, Input, concatenate, Reshape, LeakyReLU, Reshape, Flatten, concatenate - - -class DiscriminatorLow(): - """1/4 Resolution Discriminator. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image - gpus: The number of gpus you will be using. - learning_rate: Learning rate - decay_rate: The amount of learning decay for each training update - """ - - def __init__(self, - width=256, - height=256, - channels=3, - learning_rate=0.0002, - decay_rate=2e-6, - gpus=0): - - self.width = width - self.height = height - self.channels = channels - self.gpus = gpus - self.learning_rate = learning_rate - self.decay_rate = decay_rate - - # ----------------------------- - # Discriminator Low resolution - # ----------------------------- - - output_size_low_picture, output_size_low_features = calc_output_and_feature_size( - self.height/4, self.width/4) - - discriminator_low_res_input = Input( - shape=(self.height, self.width, self.channels,)) - discriminator_low_res_input_downsample = AvgPool2D( - 2, padding='same')(discriminator_low_res_input) - discriminator_low_res_input_downsample = AvgPool2D( - 2, padding='same')(discriminator_low_res_input_downsample) - - x_1 = ConvSN2D(64, 4, padding='same', strides=2)( - discriminator_low_res_input_downsample) - x = LeakyReLU(alpha=0.2)(x_1) - - x_1_att = Attention(64)(x) - - x_2 = ConvSN2D(128, 4, padding='same', strides=2)(x_1_att) - x = LeakyReLU(alpha=0.2)(x_2) - - x_3 = ConvSN2D(256, 4, padding='same', strides=2)(x) - x = LeakyReLU(alpha=0.2)(x_3) - - x_4 = ConvSN2D(512, 4, padding='same', strides=1)(x) - x = LeakyReLU(alpha=0.2)(x_4) - - x = ConvSN2D(1, 4, padding='same', strides=1)(x) - x = Reshape([output_size_low_picture, 1])(x) - - discriminator_low_features = concatenate( - [Flatten()(x_1), Flatten()(x_2), Flatten()(x_3), Flatten()(x_4)], axis=1) - discriminator_low_features = Reshape( - [output_size_low_features, 1])(discriminator_low_features) - - def zero_loss(y_true, y_pred): - return K.zeros_like(y_true) - - loss_d = ['mse', zero_loss] - loss_weights_d = [1, 0] - optimizer = Adam(self.learning_rate, 0.5, decay=self.decay_rate) - - if self.gpus < 2: - self.model = Model(discriminator_low_res_input, [ - x, discriminator_low_features]) - self.save_model = self.model - else: - self.save_model = Model(discriminator_low_res_input, [ - x, discriminator_low_features]) - self.model = multi_gpu_model(self.save_model, gpus=self.gpus) - - self.model.compile(optimizer=optimizer, - loss_weights=loss_weights_d, loss=loss_d) -from .utils.calc_output_and_feature_size import calc_output_and_feature_size -from .utils.sn import ConvSN2D -from .utils.instance_normalization import InstanceNormalization -from keras.models import model_from_json, Model -from keras.optimizers import Adam -from keras import backend as K -from .utils.attention import Attention -from keras.utils import multi_gpu_model -from keras.layers import Conv2D, Lambda, add, AvgPool2D, Activation, UpSampling2D, Input, concatenate, Reshape, LeakyReLU, Reshape, Flatten, concatenate - - -class DiscriminatorMedium(): - """1/2 Resolution Discriminator. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image - gpus: The number of gpus you will be using. - learning_rate: Learning rate - decay_rate: The amount of learning decay for each training update - """ - - def __init__(self, - width=256, - height=256, - channels=3, - learning_rate=0.0002, - decay_rate=2e-6, - gpus=0): - - self.width = width - self.height = height - self.channels = channels - self.gpus = gpus - self.learning_rate = learning_rate - self.decay_rate = decay_rate - - # ----------------------------- - # Discriminator Low resolution - # ----------------------------- - - output_size_low_picture, output_size_low_features = calc_output_and_feature_size( - self.height/2, self.width/2) - - discriminator_low_res_input = Input( - shape=(self.height, self.width, self.channels,)) - discriminator_low_res_input_downsample = AvgPool2D( - 2, padding='same')(discriminator_low_res_input) - - x_1 = ConvSN2D(64, 4, padding='same', strides=2)( - discriminator_low_res_input_downsample) - x = LeakyReLU(alpha=0.2)(x_1) - - x_1_att = Attention(64)(x) - - x_2 = ConvSN2D(128, 4, padding='same', strides=2)(x_1_att) - x = LeakyReLU(alpha=0.2)(x_2) - - x_3 = ConvSN2D(256, 4, padding='same', strides=2)(x) - x = LeakyReLU(alpha=0.2)(x_3) - - x_4 = ConvSN2D(512, 4, padding='same', strides=1)(x) - x = LeakyReLU(alpha=0.2)(x_4) - - x = ConvSN2D(1, 4, padding='same', strides=1)(x) - x = Reshape([output_size_low_picture, 1])(x) - - discriminator_low_features = concatenate( - [Flatten()(x_1), Flatten()(x_2), Flatten()(x_3), Flatten()(x_4)], axis=1) - discriminator_low_features = Reshape( - [output_size_low_features, 1])(discriminator_low_features) - - def zero_loss(y_true, y_pred): - return K.zeros_like(y_true) - - loss_d = ['mse', zero_loss] - loss_weights_d = [1, 0] - optimizer = Adam(self.learning_rate, 0.5, decay=self.decay_rate) - - if self.gpus < 2: - self.model = Model(discriminator_low_res_input, [ - x, discriminator_low_features]) - self.save_model = self.model - else: - self.save_model = Model(discriminator_low_res_input, [ - x, discriminator_low_features]) - self.model = multi_gpu_model(self.save_model, gpus=self.gpus) - - self.model.compile(optimizer=optimizer, - loss_weights=loss_weights_d, loss=loss_d) -# -*- coding: utf-8 -*- -# @Time : 2018/8/16 10:59 -# @Author : 陈子昂 -import os -import requests -from bs4 import BeautifulSoup -from tqdm import tqdm -import sys -from utils import save_img, path_processor, img_name_processor - - -def pexels(keyword): - img_cnt = 0 - if not keyword: - sys.exit('程序退出:未输入关键字!') - for page in tqdm(range(1, 50)): - print(f'\n-----[{keyword}]正在爬取第{page}页-----') - pexels_url = "https://www.pexels.com/search/%s/?page=%s" % ( - keyword, page) - - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} - res = requests.get(pexels_url, headers=headers, verify=False) - - # print(res.text) - if 'Sorry, no pictures found!' in res.text: - print('-*--*--*-爬取完毕-*--*--*-') - sys.exit(0) - - soup = BeautifulSoup(res.text, 'lxml') - # print(soup) - articles = soup.find_all('article') - # print(len(articles)) - for article in articles: - src = article.img.attrs['src'] - print(src) - path = rf'D://人脸相关的图片//pexels//{keyword}' - if not os.path.exists(path): - os.makedirs(path) - filename = img_name_processor(src) - file = os.path.join(path, filename) - save_img(file=file, src=src) - - -if __name__ == "__main__": - - categories = ['male', 'old', 'vintage', 'dog', 'cat', 'building', 'nature', 'castle', 'water', 'ocean', 'cities', 'body', 'hands', 'people', 'culture', 'religion', 'color', 'patterns', 'houses', 'vintage', 'river', 'landscape', 'lights', 'animals', 'wallpaper', 'texture', 'current events', - 'architecture', 'business', 'work', 'travel', 'fashion', 'food', 'drink', 'spirituality', 'experimental', 'health', 'arts', 'culture', 'children', 'people', 'events', 'trees', 'green', 'yellow', 'pink', 'blue', 'red', 'minimal', 'hands', 'head', 'eyes', 'mouth', 'eating', 'playing', 'sports'] - for i in categories: - pexels(i) -# -*- coding: utf-8 -*- -import os -import requests -import hashlib -import time -from random import random -from datetime import datetime -import logging - -today = datetime.today().date() - -logging.basicConfig(level=logging.INFO, - format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', - filename='log/%s.log' % today, - filemode='a') - - -def save_img(file, src): - ''' - This function is used to save pictures. - Initiates an HTTP request to the picture URL, - gets the binary code, - writes the code to the local file, - and completes the preservation of a picture. - :param file:folder path - :param src: image url - :return: - ''' - if os.path.exists(file): - print(f'-{file}已存在,跳过。-') - else: # This is done simply to dedup process - try: - headers = { - 'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'} - res = requests.get(src, timeout=3, verify=False, headers=headers) - # print(res.content) - except Exception as e: - print(f'--{e}--') - logging.warning(f'{os.path.split(__file__)[1]} - {src} - {e}') - return False - else: - if res.status_code == 200: - img = res.content - open(file, 'wb').write(img) - time.sleep(random()) - return True - - -def path_processor(site, folder): - ''' - :param site: site name,pexels,pixabay,google - :param folder: category name - :return path: path - ''' - categories = [['乐器', '笛子', '鼓', '长号', '钢琴', '小提琴', '女脸'], - ['交通工具', '面包车', '摩托车', '轿车', 'SUV', '电瓶车', - '三轮车', '自行车', '船', '大客车', '微型车'], - ['办公产品', '显示屏', '鼠标', '垃圾篓', '路由器', '折叠床', '办公桌', '电话', '打印机', '键盘', '书本', '电脑椅', '投影仪', '绿植盆栽', '本子', - '笔类', '接线板', '笔记本电脑', '文件收纳', '多肉盆栽', '文件柜', '碎纸机', '平板电脑', '订书机', '保险柜', '计算器'], - ['场景', '商场内景', '酒吧夜店', '卧室', '湖泊', '山', '地铁内景', '厢式电梯外景', '沙滩', '轿车内景', '篮球场', '图书馆内景', '跑道', '广场', - '客厅', - '田野', '公路', '卫生间', '超市内景', '大门口', '街道', '电影院内景', '草坪', '厨房', '厢式电梯内景', '写字楼外景', '瀑布', '足球场', '鲜花', - '天空', - '办公室', '树木', '手扶电梯', '餐厅内景', '健身房内景'], - ['家用电器', '洗衣机', '壁挂空调', '电磁炉', '超薄电视', '微波炉', '吸尘器', '电饭煲', '加湿器', '电热片', '燃气灶', '电风扇', '柜式空调', '咖啡机', - '榨汁机', '剃须刀', '扫地机器人', '面包机', '电水壶', '电吹风', '冰箱', '饮水机', '熨斗', '油烟机'], - ['数码产品', '手机', '音箱', '相机', 'VR眼镜', - '三脚架', '体感车', '手表', '无人机', '耳机耳麦'], - ['服饰', '短裤', '连衣裙', '休闲裤', '衬衫', '运动鞋', - '外套', 'T恤', '凉鞋', '皮鞋', '牛仔裤', '拖鞋'], - ['活动', '运动会', '婚礼', '聚餐'], - ['生活用品', '玻璃杯', '碗', '运动水壶', '保鲜盒', '锅具', '瓜果刨', '菜刀', '剪刀', '筷子', '叉', '椅子', '梯子', '沙发', '马克杯', '衣架', - '盘子', '伞', '勺子', '餐桌'], - ['箱包装饰', '双肩包', '化妆品', '珠宝', '女式挎包', - '眼镜', '拉杆箱', '手提包', '钱包', '腰带'], - ['食品', '车厘子 樱桃', '三文鱼', '火锅', '矿泉水', '休闲零食', '火龙果', '香蕉', '椰子', '鱿鱼 章鱼', '面包', '饼干', '烧烤', - '糖果 巧克力', - '海参', '坚果炒货', '贝类', '海产干货', '鸡翅', '牛奶', '芒果', '食用油', '猕猴桃', '牛排', '虾类', '蛋糕', '橙子', '西餐', '饮料', - '方便面', - '鱼类', '膨化食品', '牛油果', '小龙虾', '米面', '蓝莓', '菠萝', '红酒', '咖啡粉', '咖啡豆', '榴莲', '白酒', '苹果', '肉', '蟹类']] - for cat in categories: - if folder in cat: - path = f'{site}/{cat[0]}/{folder}/' - break - else: - raise NameError("Please input correct category name!") - if not os.path.exists(path): - os.makedirs(path) - return path - - -def img_name_processor(src): - """ - This function is used to handle the file name of the saved picture. - Hash the URL of the picture as its filename. - :param src: image url - :return: image filename - """ - h5 = hashlib.md5() - h5.update(src.encode('utf-8')) - img = h5.hexdigest() + '.jpg' - return img - - -if __name__ == "__main__": - save_img('test.jpg', 'https://images.pexels.com/photos/458766/pexels-photo-458766.jpeg?auto=compress&cs=tinysrgb&h=350') -""" -parser.py: A basic parser for the YFCC100M dataset. - -author: Frank Liu - frank.zijie@gmail.com -last modified: 05/30/2015 - -Copyright (c) 2015, Frank Liu -All rights reserved. - -Redistribution and use in source and binary forms, with or without -modification, are permitted provided that the following conditions are met: - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - * Neither the name of the Frank Liu (fzliu) nor the - names of its contributors may be used to endorse or promote products - derived from this software without specific prior written permission. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND -ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED -WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE -DISCLAIMED. IN NO EVENT SHALL Frank Liu (fzliu) BE LIABLE FOR ANY -DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES -(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; -LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND -ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS -SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -""" - -from io import BytesIO -import random -import os -import sys -import time - - -import re -from random import randint -import uuid -from multiprocessing import Pool - - -# library imports (install with pip) -import numpy as np -from PIL import Image -import requests - -# directory which contains the tab-separated YFCC100M data -# more info @ download at http://labs.yahoo.com/news/yfcc100m/ -YFCC100M_DIR = "yfcc100m_dataset" - -# keys for the YFCC100M data -YFCC100M_KEYS = [ - "photo_id", - "identifier", - "hash", - "user_id", - "username", - "date_taken", - "upload_time", - "camera_type", - "title", - "description", - "user_tags", - "machine_tags", - "longitude", - "latitude", - "accuracy", - "page_url", - "download_url", - "license", - "license_url", - "server", - "farm", - "secret", - "original", - "extension", - "image_or_video" -] - - -def image_from_url(url): - """ - Downloads an image in numpy array format, given a URL. - """ - - # loop until the image is successfully downloaded - status = None - while status != 200: - response = requests.get(url) - status = response.status_code - pimg = Image.open(BytesIO(response.content)) - pimg = pimg.convert("RGB") - - pimg.save('/home/ubuntu/storage/yahoo/yfcc100m-tools/peoplenet/' + - str(uuid.uuid4()) + '.jpg') - - return True - - -def download_images(line): - try: - - # fit the data into a dictionary - values = [item.strip() for item in line.split("\t")] - data = dict(zip(YFCC100M_KEYS, values)) - - people = False - if bool(re.search("people", data["machine_tags"])) or bool(re.search("people", data["user_tags"])): - people = True - - if data["image_or_video"] == "0" and people: - image_from_url(data["download_url"]) - - except IOError: - print('Error!') - - -if __name__ == "__main__": - YFCC100M_DIR = '/home/ubuntu/storage/yahoo/yfcc100m-tools/parts/' - - for part in os.listdir(YFCC100M_DIR): - fh = open(os.path.join(YFCC100M_DIR, part), "r").readlines() - - print(part) - pool = Pool(processes=16) - pool.map(download_images, fh) - - print("Done!") -from keras.models import model_from_json, Model -from keras.layers import Conv2D, Lambda, add, AvgPool2D, Activation, UpSampling2D, Input, concatenate, Reshape, Flatten, Dense -from .utils.conv2d_r import Conv2D_r -from keras.utils import multi_gpu_model -from .utils.instance_normalization import InstanceNormalization - - -class CoreGenerator(): - """Core Generator. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image and the generated image - gpus: The number of gpus you will be using. - """ - - def __init__(self, - width=384, - height=384, - channels=1, - gpus=0): - - self.width = width - self.height = height - self.input_channels = channels - self.channels = channels - self.gpus = gpus - - # ----------------------- - # Core Generator Encoder - # ----------------------- - - core_generator_idea = Input( - shape=(self.width, self.height, self.input_channels,)) - core_generator_idea_downsample = AvgPool2D( - 2, padding='same')(core_generator_idea) - - core_generator_style = Input( - shape=(self.width/(2**7), self.height/(2**7), self.input_channels,)) - - # ----------------------- - # Idea Head - # ----------------------- - - encoder = Conv2D_r(64, 7, 1, core_generator_idea_downsample) - encoder = InstanceNormalization(axis=-1)(encoder) - encoder = Activation('relu')(encoder) - - encoder = Conv2D_r(128, 3, 2, encoder) - encoder = InstanceNormalization(axis=-1)(encoder) - encoder = Activation('relu')(encoder) - - encoder = Conv2D_r(256, 3, 2, encoder) - encoder = InstanceNormalization(axis=-1)(encoder) - encoder = Activation('relu')(encoder) - - encoder = Conv2D_r(512, 3, 2, encoder) - encoder = InstanceNormalization(axis=-1)(encoder) - encoder = Activation('relu')(encoder) - - encoder = Conv2D_r(512, 3, 2, encoder) - encoder = InstanceNormalization(axis=-1)(encoder) - encoder = Activation('relu')(encoder) - - # ----------------------- - # Style Head - # ----------------------- - - style = Conv2D_r(128, 3, 1, core_generator_style) - style = InstanceNormalization(axis=-1)(style) - style = Activation('relu')(style) - - style = UpSampling2D(2)(style) - style = Conv2D_r(256, 3, 1, style) - style = InstanceNormalization(axis=-1)(style) - style = Activation('relu')(style) - - style = UpSampling2D(2)(style) - style = Conv2D_r(512, 3, 1, style) - style = InstanceNormalization(axis=-1)(style) - style = Activation('relu')(style) - - # ----------------------- - # Merge Style and Idea - # ----------------------- - - style_and_idea = concatenate([encoder, style], axis=-1) - style_and_idea = Conv2D_r(1024, 3, 1, style_and_idea) - style_and_idea = InstanceNormalization(axis=-1)(style_and_idea) - style_and_idea = Activation('relu')(style_and_idea) - - style_and_idea = Conv2D_r(512, 3, 1, style_and_idea) - style_and_idea = InstanceNormalization(axis=-1)(style_and_idea) - style_and_idea = Activation('relu')(style_and_idea) - - # ------------------------------- - # Core Generator Residual Block - # ------------------------------- - - def ResidualUnit(input_features): - output_features = Conv2D_r(512, 3, 1, input_features) - output_features = InstanceNormalization(axis=-1)(output_features) - output_features = Activation('relu')(output_features) - output_features = Conv2D_r(512, 3, 1, output_features) - output_features = InstanceNormalization(axis=-1)(output_features) - output_features = add([input_features, output_features]) - output_features = Activation('relu')(output_features) - return output_features - - resnet = ResidualUnit(style_and_idea) - resnet = ResidualUnit(resnet) - resnet = ResidualUnit(resnet) - resnet = ResidualUnit(resnet) - resnet = ResidualUnit(resnet) - resnet = ResidualUnit(resnet) - resnet = ResidualUnit(resnet) - resnet = ResidualUnit(resnet) - resnet = ResidualUnit(resnet) - - # ------------- - # Core Decoder - # ------------- - - decoder = UpSampling2D(2)(resnet) - decoder = Conv2D_r(512, 3, 1, decoder) - decoder = InstanceNormalization(axis=-1)(decoder) - decoder = Activation('relu')(decoder) - - decoder = UpSampling2D(2)(decoder) - decoder = Conv2D_r(256, 3, 1, decoder) - decoder = InstanceNormalization(axis=-1)(decoder) - decoder = Activation('relu')(decoder) - - decoder = UpSampling2D(2)(decoder) - decoder = Conv2D_r(128, 3, 1, decoder) - decoder = InstanceNormalization(axis=-1)(decoder) - decoder = Activation('relu')(decoder) - - decoder = UpSampling2D(2)(decoder) - decoder = Conv2D_r(64, 3, 1, decoder) - features = Lambda(lambda x: x, name='core_features_org')(decoder) - decoder = InstanceNormalization(axis=-1)(decoder) - decoder = Activation('relu')(decoder) - - decoder = Conv2D_r(channels, 7, 1, decoder) - picture_lowres = Activation('tanh')(decoder) - - core_generator = Model([core_generator_idea, core_generator_style], [ - picture_lowres, features]) - core_generator.name = "core_generator" - - # -------------- - # Compile Model - # -------------- - - if self.gpus < 2: - self.model = core_generator - self.save_model = self.model - else: - self.save_model = core_generator - self.model = multi_gpu_model(self.save_model, gpus=gpus) -from keras.models import model_from_json, Model -from keras.layers import Conv2D, Lambda, add, AvgPool2D, Activation, UpSampling2D, Input, concatenate, Reshape -from .utils.conv2d_r import Conv2D_r -from .utils.instance_normalization import InstanceNormalization -from keras.utils import multi_gpu_model - - -class Enhancer(): - """Enhancer. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image and the generated image - gpus: The number of gpus you will be using. - """ - - def __init__(self, - width=256, - height=256, - channels=1, - gpus=0): - - self.width = width - self.height = height - self.channels = channels - self.gpus = gpus - - # --------------------------- - # Enhancer Generator Encoder - # --------------------------- - - enhancer_generator_input = Input( - shape=(self.width, self.height, channels,)) - enhancer_core_features = Input( - shape=(self.width/2, self.height/2, 64,)) - - encoder = Conv2D_r(32, 7, 1, enhancer_generator_input) - encoder = InstanceNormalization(axis=-1)(encoder) - encoder = Activation('relu')(encoder) - - encoder = Conv2D_r(64, 3, 2, encoder) - enhancer_and_core = concatenate( - [encoder, enhancer_core_features], axis=-1) - enhancer_and_core = InstanceNormalization(axis=-1)(enhancer_and_core) - enhancer_and_core = Activation('relu')(enhancer_and_core) - - enhancer_and_core = Conv2D_r(64, 3, 1, enhancer_and_core) - enhancer_and_core = InstanceNormalization(axis=-1)(enhancer_and_core) - enhancer_and_core = Activation('relu')(enhancer_and_core) - - # ---------------------------------- - # Enhancer Generator Residual Block - # ---------------------------------- - - def ResidualUnitLocal(input_features): - output_features = Conv2D_r(64, 3, 1, input_features) - output_features = InstanceNormalization(axis=-1)(output_features) - output_features = Activation('relu')(output_features) - output_features = Conv2D_r(64, 3, 1, output_features) - output_features = InstanceNormalization(axis=-1)(output_features) - output_features = add([input_features, output_features]) - output_features = Activation('relu')(output_features) - return output_features - - resnet = ResidualUnitLocal(enhancer_and_core) - resnet = ResidualUnitLocal(resnet) - resnet = ResidualUnitLocal(resnet) - - # --------------------------- - # Enhancer Generator Decoder - # --------------------------- - - decoder = UpSampling2D(2)(resnet) - decoder = Conv2D_r(64, 3, 1, decoder) - decoder = InstanceNormalization(axis=-1)(decoder) - decoder = Activation('relu')(decoder) - - decoder = Conv2D_r(channels, 7, 2, decoder) - enhanced_picture = Activation('tanh')(decoder) - - # ----------------- - # Save model - # ----------------- - - if self.gpus < 2: - self.model = Model( - [enhancer_generator_input, enhancer_core_features], enhanced_picture) - self.save_model = self.model - else: - self.save_model = Model( - [enhancer_generator_input, enhancer_core_features], enhanced_picture) - self.model = multi_gpu_model(self.save_model, gpus=gpus) -from keras.models import Model, save_model, load_model -from keras.optimizers import Adam -from .utils.conv2d_r import Conv2D_r -from keras.utils import multi_gpu_model -from .utils.instance_normalization import InstanceNormalization -import tensorflow as tf -from keras import backend as K -from .utils.sn import ConvSN2D, DenseSN - - -def zero_loss(y_true, y_pred): - return K.zeros_like(y_true) - - -class CoreGeneratorEnhancer(): - """Core Generator. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image and the generated image - gpus: The number of gpus you will be using. - """ - - def __init__(self, - resource_path='./resources/', - gpus=0): - - self.gpus = gpus - - core_generator_original = load_model(resource_path + 'core_generator.h5', custom_objects={ - 'Conv2D_r': Conv2D_r, 'InstanceNormalization': InstanceNormalization, 'tf': tf, 'ConvSN2D': ConvSN2D, 'DenseSN': DenseSN}) - core_generator = Model(inputs=core_generator_original.input, outputs=[ - core_generator_original.output, core_generator_original.get_layer('core_features_org').output]) - core_generator.name = "core_generator" - core_generator.trainable = True - - self.model = core_generator - self.save_model = core_generator - - -class CoreGenerator(): - """Core Generator. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image and the generated image - gpus: The number of gpus you will be using. - """ - - def __init__(self, - resource_path='./resources/', - gpus=0): - - self.gpus = gpus - - core_generator = load_model(resource_path + 'core_generator.h5', custom_objects={ - 'Conv2D_r': Conv2D_r, 'InstanceNormalization': InstanceNormalization, 'tf': tf, 'ConvSN2D': ConvSN2D, 'DenseSN': DenseSN}) - # core_generator = Model(inputs=core_generator_original.input, - # outputs=[core_generator_original.get_layer('core_features_org').output, # core_generator_original.get_layer('core_features_true').output]) - core_generator.name = "core_generator" - core_generator.trainable = True - - self.model = core_generator - self.save_model = core_generator - - -class Enhancer(): - """Enhancer. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image and the generated image - gpus: The number of gpus you will be using. - """ - - def __init__(self, - resource_path='./resources/', - gpus=0): - - self.gpus = gpus - - enhancer = load_model(resource_path + 'enhancer.h5', custom_objects={ - 'Conv2D_r': Conv2D_r, 'InstanceNormalization': InstanceNormalization, 'tf': tf, 'ConvSN2D': ConvSN2D, 'DenseSN': DenseSN}) - enhancer.name = 'enhancer' - enhancer.trainable = True - - self.model = enhancer - self.save_model = enhancer - - -class DiscriminatorFull(): - """Core Generator. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image and the generated image - gpus: The number of gpus you will be using. - learning_rate: learning rate - decay_rate: - """ - - def __init__(self, - resource_path='./resources/', - learning_rate=0.0002, - decay_rate=2e-6, - gpus=1): - - self.gpus = gpus - self.learning_rate = learning_rate - self.decay_rate = decay_rate - - def zero_loss(y_true, y_pred): - return K.zeros_like(y_true) - - discriminator_full = load_model(resource_path + 'discriminator_full.h5', custom_objects={ - 'Conv2D_r': Conv2D_r, 'InstanceNormalization': InstanceNormalization, 'tf': tf, 'zero_loss': zero_loss, 'ConvSN2D': ConvSN2D, 'DenseSN': DenseSN}) - - discriminator_full.trainable = True - discriminator_full.name = "discriminator_full" - - self.model = discriminator_full - self.save_model = discriminator_full - - -class DiscriminatorLow(): - """Core Generator. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image and the generated image - gpus: The number of gpus you will be using. - learning_rate: learning rate - decay_rate: - """ - - def __init__(self, - resource_path='./resources/', - learning_rate=0.0002, - decay_rate=2e-6, - gpus=0): - - self.gpus = gpus - self.learning_rate = learning_rate - self.decay_rate = decay_rate - - def zero_loss(y_true, y_pred): - return K.zeros_like(y_true) - - discriminator_low = load_model(resource_path + 'discriminator_low.h5', custom_objects={ - 'Conv2D_r': Conv2D_r, 'InstanceNormalization': InstanceNormalization, 'tf': tf, 'zero_loss': zero_loss, 'ConvSN2D': ConvSN2D, 'DenseSN': DenseSN}) - - discriminator_low.trainable = True - discriminator_low.name = "discriminator_low" - - self.model = discriminator_low - self.save_model = discriminator_low - - -class StyleFeatures(): - """Core Generator. - - # Arguments - width: Width of image in pixels - height: Height of image in pixels - channels: Channels for the input image and the generated image - gpus: The number of gpus you will be using. - learning_rate: learning rate - decay_rate: - """ - - def __init__(self, - resource_path='./resources/', - gpus=0): - - self.gpus = gpus - - style_features = load_model(resource_path + 'style_features.h5', custom_objects={ - 'Conv2D_r': Conv2D_r, 'InstanceNormalization': InstanceNormalization, 'tf': tf, 'ConvSN2D': ConvSN2D, 'DenseSN': DenseSN}) - - style_features.trainable = True - style_features.name = "style_features" - - self.model = style_features - self.save_model = style_features -import keras -from keras.models import Model, save_model, load_model -from core_generator_load import CoreGenerator -from discriminator_full import DiscriminatorFull -from discriminator_low import DiscriminatorLow -from style_features import StyleFeatures -from enhancer import Enhancer - -from keras.optimizers import Adam -from keras.models import model_from_json -from utils.conv2d_r import Conv2D_r -from keras.utils import multi_gpu_model -from utils.instance_normalization import InstanceNormalization -import tensorflow as tf -from keras import backend as K -from utils.sn import ConvSN2D, DenseSN -from keras.models import Model, save_model, load_model - - -def zero_loss(y_true, y_pred): - return K.zeros_like(y_true) - - -style_features = StyleFeatures(gpus=1) -core_generator = CoreGenerator(gpus=1) -discriminator_full = DiscriminatorFull(gpus=1, decay_rate=0) -discriminator_low = DiscriminatorLow(gpus=1, decay_rate=0) -enhancer = Enhancer(gpus=1) - -resource_path = './weights/' -save_path = './resources/' -learning_rate = 0.0002, -decay_rate = 0 - -core_generator.model.load_weights(resource_path + "core_generator.h5") -enhancer.model.load_weights(resource_path + 'enhancer.h5') -discriminator_full.model.load_weights(resource_path + 'discriminator_full.h5') -discriminator_low.model.load_weights(resource_path + 'discriminator_low.h5') -style_features.model.load_weights(resource_path + 'style_features.h5') - - -save_model(discriminator_full.model, save_path + "discriminator_full.h5") -save_model(discriminator_low.model, save_path + "discriminator_low.h5") -save_model(enhancer.model, save_path + "enhancer.h5") -save_model(core_generator.model, save_path + "core_generator.h5") -save_model(style_features.model, save_path + "style_features.h5") -import keras.backend as K -from keras.legacy import interfaces -from keras.optimizers import Optimizer - - -class AdamAccumulate(Optimizer): - - def __init__(self, lr=0.001, beta_1=0.9, beta_2=0.999, - epsilon=None, decay=0., amsgrad=False, accum_iters=1, **kwargs): - if accum_iters < 1: - raise ValueError('accum_iters must be >= 1') - super(AdamAccumulate, self).__init__(**kwargs) - with K.name_scope(self.__class__.__name__): - self.iterations = K.variable(0, dtype='int64', name='iterations') - self.lr = K.variable(lr, name='lr') - self.beta_1 = K.variable(beta_1, name='beta_1') - self.beta_2 = K.variable(beta_2, name='beta_2') - self.decay = K.variable(decay, name='decay') - if epsilon is None: - epsilon = K.epsilon() - self.epsilon = epsilon - self.initial_decay = decay - self.amsgrad = amsgrad - self.accum_iters = K.variable(accum_iters, K.dtype(self.iterations)) - self.accum_iters_float = K.cast(self.accum_iters, K.floatx()) - - @interfaces.legacy_get_updates_support - def get_updates(self, loss, params): - grads = self.get_gradients(loss, params) - self.updates = [K.update_add(self.iterations, 1)] - - lr = self.lr - - completed_updates = K.cast(K.tf.floor( - self.iterations / self.accum_iters), K.floatx()) - - if self.initial_decay > 0: - lr = lr * (1. / (1. + self.decay * completed_updates)) - - t = completed_updates + 1 - - lr_t = lr * (K.sqrt(1. - K.pow(self.beta_2, t)) / - (1. - K.pow(self.beta_1, t))) - - # self.iterations incremented after processing a batch - # batch: 1 2 3 4 5 6 7 8 9 - # self.iterations: 0 1 2 3 4 5 6 7 8 - # update_switch = 1: x x (if accum_iters=4) - update_switch = K.equal((self.iterations + 1) % self.accum_iters, 0) - update_switch = K.cast(update_switch, K.floatx()) - - ms = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - vs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - gs = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - - if self.amsgrad: - vhats = [K.zeros(K.int_shape(p), dtype=K.dtype(p)) for p in params] - else: - vhats = [K.zeros(1) for _ in params] - - self.weights = [self.iterations] + ms + vs + vhats - - for p, g, m, v, vhat, tg in zip(params, grads, ms, vs, vhats, gs): - - sum_grad = tg + g - avg_grad = sum_grad / self.accum_iters_float - - m_t = (self.beta_1 * m) + (1. - self.beta_1) * avg_grad - v_t = (self.beta_2 * v) + (1. - self.beta_2) * K.square(avg_grad) - - if self.amsgrad: - vhat_t = K.maximum(vhat, v_t) - p_t = p - lr_t * m_t / (K.sqrt(vhat_t) + self.epsilon) - self.updates.append( - K.update(vhat, (1 - update_switch) * vhat + update_switch * vhat_t)) - else: - p_t = p - lr_t * m_t / (K.sqrt(v_t) + self.epsilon) - - self.updates.append( - K.update(m, (1 - update_switch) * m + update_switch * m_t)) - self.updates.append( - K.update(v, (1 - update_switch) * v + update_switch * v_t)) - self.updates.append(K.update(tg, (1 - update_switch) * sum_grad)) - new_p = p_t - - # Apply constraints. - if getattr(p, 'constraint', None) is not None: - new_p = p.constraint(new_p) - - self.updates.append( - K.update(p, (1 - update_switch) * p + update_switch * new_p)) - return self.updates - - def get_config(self): - config = {'lr': float(K.get_value(self.lr)), - 'beta_1': float(K.get_value(self.beta_1)), - 'beta_2': float(K.get_value(self.beta_2)), - 'decay': float(K.get_value(self.decay)), - 'epsilon': self.epsilon, - 'amsgrad': self.amsgrad} - base_config = super(AdamAccumulate, self).get_config() - return dict(list(base_config.items()) + list(config.items())) -from keras import backend as K -from keras.layers import InputSpec -import tensorflow as tf -from keras.engine.topology import Layer - -# ------------------------------------------------------------------------------------- -# Attention Layer from Self-Attention Generative Adversarial Networks -# Paper: https://arxiv.org/abs/1805.08318 -# Author of the layer: Hao Chen -# Source: https://stackoverflow.com/questions/50819931/self-attention-gan-in-keras -# ------------------------------------------------------------------------------------- - - -class Attention(Layer): - def __init__(self, ch, **kwargs): - super(Attention, self).__init__(**kwargs) - self.channels = ch - self.filters_f_g = self.channels // 8 - self.filters_h = self.channels - - def build(self, input_shape): - kernel_shape_f_g = (1, 1) + (self.channels, self.filters_f_g) - kernel_shape_h = (1, 1) + (self.channels, self.filters_h) - - # Create a trainable weight variable for this layer: - self.gamma = self.add_weight( - name='gamma', shape=[1], initializer='zeros', trainable=True) - self.kernel_f = self.add_weight(shape=kernel_shape_f_g, - initializer='glorot_uniform', - name='kernel_f') - self.kernel_g = self.add_weight(shape=kernel_shape_f_g, - initializer='glorot_uniform', - name='kernel_g') - self.kernel_h = self.add_weight(shape=kernel_shape_h, - initializer='glorot_uniform', - name='kernel_h') - self.bias_f = self.add_weight(shape=(self.filters_f_g,), - initializer='zeros', - name='bias_F') - self.bias_g = self.add_weight(shape=(self.filters_f_g,), - initializer='zeros', - name='bias_g') - self.bias_h = self.add_weight(shape=(self.filters_h,), - initializer='zeros', - name='bias_h') - super(Attention, self).build(input_shape) - # Set input spec. - self.input_spec = InputSpec(ndim=4, - axes={3: input_shape[-1]}) - self.built = True - - def call(self, x): - def hw_flatten(x): - return K.reshape(x, shape=[K.shape(x)[0], K.shape(x)[1]*K.shape(x)[2], K.shape(x)[-1]]) - - f = K.conv2d(x, - kernel=self.kernel_f, - strides=(1, 1), padding='same') # [bs, h, w, c'] - f = K.bias_add(f, self.bias_f) - g = K.conv2d(x, - kernel=self.kernel_g, - strides=(1, 1), padding='same') # [bs, h, w, c'] - g = K.bias_add(g, self.bias_g) - h = K.conv2d(x, - kernel=self.kernel_h, - strides=(1, 1), padding='same') # [bs, h, w, c] - h = K.bias_add(h, self.bias_h) - - s = tf.matmul(hw_flatten(g), hw_flatten( - f), transpose_b=True) # # [bs, N, N] - - beta = K.softmax(s, axis=-1) # attention map - - o = K.batch_dot(beta, hw_flatten(h)) # [bs, N, C] - - o = K.reshape(o, shape=K.shape(x)) # [bs, h, w, C] - x = self.gamma * o + x - - return x - - def compute_output_shape(self, input_shape): - return input_shape -def calc_output_and_feature_size(height, width): - output = (height/(2**3))*(width/(2**3)) - features = ((height/2**1)*(width/2**1)) * 64 + \ - ((height/2**2)*(width/2**2)) * 128 + \ - ((height/2**3)*(width/2**3)) * 256 + \ - ((height/2**3)*(width/2**3)) * 512 - return int(output), int(features) -import tensorflow as tf -from keras.layers import Conv2D, Lambda -from .sn import ConvSN2D - - -def Conv2D_r(channels, filter_size, strides, features): - padding = [[0, 0], [filter_size // 2, filter_size // 2], - [filter_size // 2, filter_size // 2], [0, 0]] - - out = Lambda(lambda net: tf.pad(net, padding, 'REFLECT'))(features) - out = ConvSN2D(channels, filter_size, - strides=strides, padding='valid')(out) - return out -# Source: https://github.com/keras-team/keras-contrib/ - -from keras.engine import Layer, InputSpec -from keras import initializers -from keras import regularizers -from keras import constraints -from keras import backend as K -from keras.utils.generic_utils import get_custom_objects - -import numpy as np - - -class InstanceNormalization(Layer): - """Instance normalization layer (Lei Ba et al, 2016, Ulyanov et al., 2016). - Normalize the activations of the previous layer at each step, - i.e. applies a transformation that maintains the mean activation - close to 0 and the activation standard deviation close to 1. - # Arguments - axis: Integer, the axis that should be normalized - (typically the features axis). - For instance, after a `Conv2D` layer with - `data_format="channels_first"`, - set `axis=1` in `InstanceNormalization`. - Setting `axis=None` will normalize all values in each instance of the batch. - Axis 0 is the batch dimension. `axis` cannot be set to 0 to avoid errors. - epsilon: Small float added to variance to avoid dividing by zero. - center: If True, add offset of `beta` to normalized tensor. - If False, `beta` is ignored. - scale: If True, multiply by `gamma`. - If False, `gamma` is not used. - When the next layer is linear (also e.g. `nn.relu`), - this can be disabled since the scaling - will be done by the next layer. - beta_initializer: Initializer for the beta weight. - gamma_initializer: Initializer for the gamma weight. - beta_regularizer: Optional regularizer for the beta weight. - gamma_regularizer: Optional regularizer for the gamma weight. - beta_constraint: Optional constraint for the beta weight. - gamma_constraint: Optional constraint for the gamma weight. - # Input shape - Arbitrary. Use the keyword argument `input_shape` - (tuple of integers, does not include the samples axis) - when using this layer as the first layer in a model. - # Output shape - Same shape as input. - # References - - [Layer Normalization](https://arxiv.org/abs/1607.06450) - - [Instance Normalization: The Missing Ingredient for Fast Stylization](https://arxiv.org/abs/1607.08022) - """ - - def __init__(self, - axis=None, - epsilon=1e-3, - center=True, - scale=True, - beta_initializer='zeros', - gamma_initializer='ones', - beta_regularizer=None, - gamma_regularizer=None, - beta_constraint=None, - gamma_constraint=None, - **kwargs): - super(InstanceNormalization, self).__init__(**kwargs) - self.supports_masking = True - self.axis = axis - self.epsilon = epsilon - self.center = center - self.scale = scale - self.beta_initializer = initializers.get(beta_initializer) - self.gamma_initializer = initializers.get(gamma_initializer) - self.beta_regularizer = regularizers.get(beta_regularizer) - self.gamma_regularizer = regularizers.get(gamma_regularizer) - self.beta_constraint = constraints.get(beta_constraint) - self.gamma_constraint = constraints.get(gamma_constraint) - - def build(self, input_shape): - ndim = len(input_shape) - if self.axis == 0: - raise ValueError('Axis cannot be zero') - - if (self.axis is not None) and (ndim == 2): - raise ValueError('Cannot specify axis for rank 1 tensor') - - self.input_spec = InputSpec(ndim=ndim) - - if self.axis is None: - shape = (1,) - else: - shape = (input_shape[self.axis],) - - if self.scale: - self.gamma = self.add_weight(shape=shape, - name='gamma', - initializer=self.gamma_initializer, - regularizer=self.gamma_regularizer, - constraint=self.gamma_constraint) - else: - self.gamma = None - if self.center: - self.beta = self.add_weight(shape=shape, - name='beta', - initializer=self.beta_initializer, - regularizer=self.beta_regularizer, - constraint=self.beta_constraint) - else: - self.beta = None - self.built = True - - def call(self, inputs, training=None): - input_shape = K.int_shape(inputs) - reduction_axes = list(range(0, len(input_shape))) - - if (self.axis is not None): - del reduction_axes[self.axis] - - del reduction_axes[0] - - mean = K.mean(inputs, reduction_axes, keepdims=True) - stddev = K.std(inputs, reduction_axes, keepdims=True) + self.epsilon - normed = (inputs - mean) / stddev - - broadcast_shape = [1] * len(input_shape) - if self.axis is not None: - broadcast_shape[self.axis] = input_shape[self.axis] - - if self.scale: - broadcast_gamma = K.reshape(self.gamma, broadcast_shape) - normed = normed * broadcast_gamma - if self.center: - broadcast_beta = K.reshape(self.beta, broadcast_shape) - normed = normed + broadcast_beta - return normed - - def get_config(self): - config = { - 'axis': self.axis, - 'epsilon': self.epsilon, - 'center': self.center, - 'scale': self.scale, - 'beta_initializer': initializers.serialize(self.beta_initializer), - 'gamma_initializer': initializers.serialize(self.gamma_initializer), - 'beta_regularizer': regularizers.serialize(self.beta_regularizer), - 'gamma_regularizer': regularizers.serialize(self.gamma_regularizer), - 'beta_constraint': constraints.serialize(self.beta_constraint), - 'gamma_constraint': constraints.serialize(self.gamma_constraint) - } - base_config = super(InstanceNormalization, self).get_config() - return dict(list(base_config.items()) + list(config.items())) - - -get_custom_objects().update({'InstanceNormalization': InstanceNormalization}) -from PIL import Image, ImageDraw, ImageFont -from random import randint -import csv -import numpy as np -FONTPATH = ["./data/font/times-bold.ttf", "./data/font/courier-bold.ttf"] -ENGSTR = "ABCDEFGHJKLMNPQRSTUVWXYZ" # 沒有O和I -LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" - - -class rect: - def __init__(self): - self.size = (randint(5, 21), randint(5, 21)) - self.location = (randint(1, 199), randint(1, 59)) - self.luoverlay = True if randint(1, 10) > 6 else False - self.rdoverlay = False if self.luoverlay else True if randint( - 1, 10) > 8 else False - self.lucolor = 0 if randint(0, 1) else 255 - self.rdcolor = 0 if self.lucolor == 255 else 255 - self.ludrawn = False - self.rddrawn = False - self.pattern = randint(0, 1) - - def draw(self, image, overlay): - if((overlay or not self.luoverlay) and not self.ludrawn): - self.ludrawn = True - stp = self.location - transparent = int(255 * 0.45 if self.lucolor == 0 else 255 * 0.8) - color = (self.lucolor, self.lucolor, self.lucolor, transparent) - uline = Image.new("RGBA", (self.size[0], 1), color) - lline = Image.new("RGBA", (1, self.size[1]), color) - image.paste(uline, stp, uline) - image.paste(lline, stp, lline) - if((overlay or not self.rdoverlay) and not self.rddrawn): - self.rddrawn = True - dstp = (self.location[0], self.location[1] + self.size[1]) - rstp = (self.location[0] + self.size[0], self.location[1]) - transparent = int(255 * 0.45 if self.rdcolor == 0 else 255 * 0.8) - color = (self.rdcolor, self.rdcolor, self.rdcolor, transparent) - dline = Image.new("RGBA", (self.size[0], 1), color) - rline = Image.new("RGBA", (1, self.size[1]), color) - image.paste(dline, dstp, dline) - image.paste(rline, rstp, rline) - - -class captchatext: - def __init__(self, priority, offset, captchalen, engletter, ENGNOLIMIT): - self.engletter = engletter - if ENGNOLIMIT: - engletter = True if randint(1, 34) <= 24 else False - if engletter: - self.letter = ENGSTR[randint(0, len(ENGSTR) - 1)] - else: - self.letter = str(randint(0, 9)) - self.color = [randint(10, 140) for _ in range(3)] - self.angle = randint(-55, 55) - self.priority = priority - self.offset = offset - self.next_offset = 0 - self.captchalen = captchalen - - def draw(self, image): - color = (self.color[0], self.color[1], self.color[2], 255) - font = ImageFont.truetype( - FONTPATH[randint(0, 1)], randint(25, 27) * 10) - text = Image.new("RGBA", (font.getsize( - self.letter)[0], 300), (0, 0, 0, 0)) - textdraw = ImageDraw.Draw(text) - textdraw.text((0, 0), self.letter, font=font, fill=color) - text = text.rotate(self.angle, expand=True) - text = text.resize((int(text.size[0] / 10), int(text.size[1] / 10))) - base = int(self.priority * (200 / self.captchalen)) - rand_min = (self.offset - base - 4) if (self.offset - - base - 4) >= -15 else -15 - rand_min = 0 if self.priority == 0 else rand_min - avg_dp = int(200 / self.captchalen) - rand_max = ( - avg_dp - text.size[0]) if self.priority == self.captchalen - 1 else (avg_dp - text.size[0] + 10) - try: - displace = randint(rand_min, rand_max) - except: - displace = rand_max - location = (base + displace, randint(3, 23)) - self.next_offset = location[0] + text.size[0] - image.paste(text, location, text) - - -def generate(GENNUM, SAVEPATH, ENGP=25, FIVEP=0, ENGNOLIMIT=False, filename="train"): - captchacsv = open( - SAVEPATH + "captcha_{:s}.csv".format(filename), 'w', encoding='utf8', newline='') - lencsv = open( - SAVEPATH + "len_{:s}.csv".format(filename), 'w', encoding='utf8', newline='') - letterlist = [] - lenlist = [] - for index in range(1, GENNUM + 1, 1): - captchastr = "" - captchalen = 5 if randint(1, 100) <= FIVEP else 6 - engat = randint(0, captchalen - 1) if randint(1, 100) <= ENGP else -1 - bgcolor = [randint(180, 250) for _ in range(3)] - captcha = Image.new('RGBA', (200, 60), - (bgcolor[0], bgcolor[1], bgcolor[2], 255)) - rectlist = [rect() for _ in range(32)] - for obj in rectlist: - obj.draw(image=captcha, overlay=False) - offset = 0 - for i in range(captchalen): - newtext = captchatext(i, offset, captchalen, - (True if engat == i else False), ENGNOLIMIT) - newtext.draw(image=captcha) - offset = newtext.next_offset - captchastr += str(newtext.letter) - letterlist.append([str(index).zfill(len(str(GENNUM))), captchastr]) - lenlist.append([str(index).zfill(len(str(GENNUM))), captchalen]) - for obj in rectlist: - obj.draw(image=captcha, overlay=True) - captcha.convert("RGB").save( - SAVEPATH + str(index).zfill(len(str(GENNUM))) + ".jpg", "JPEG") - writer = csv.writer(captchacsv) - writer.writerows(letterlist) - writer = csv.writer(lencsv) - writer.writerows(lenlist) - captchacsv.close() - lencsv.close() - - -if __name__ == "__main__": - generate(50000, "./data/56_imitate_train_set/", ENGP=100, - FIVEP=50, ENGNOLIMIT=True, filename="train") - generate(10240, "./data/56_imitate_vali_set/", ENGP=100, - FIVEP=50, ENGNOLIMIT=True, filename="vali") - generate(50000, "./data/5_imitate_train_set/", ENGP=100, - FIVEP=100, ENGNOLIMIT=True, filename="train") - generate(10240, "./data/5_imitate_vali_set/", ENGP=100, - FIVEP=100, ENGNOLIMIT=True, filename="vali") - generate(50000, "./data/6_imitate_train_set/", ENGP=100, - FIVEP=0, ENGNOLIMIT=True, filename="train") - generate(10240, "./data/6_imitate_vali_set/", ENGP=100, - FIVEP=0, ENGNOLIMIT=True, filename="vali") -import shutil -import requests -import time -SAVEPATH = "./data/manual_label/" -url = 'http://railway1.hinet.net/ImageOut.jsp' -for i in range(1, 3000): - response = requests.get(url, stream=True) - with open(SAVEPATH + str(i) + '.jpg', 'wb') as out_file: - shutil.copyfileobj(response.raw, out_file) - del response - time.sleep(0.5) -from keras.preprocessing.image import ImageDataGenerator -from PIL import Image -import numpy as np -import csv - -outputcsv = open('./data/6_real_train_set/captcha_train.csv', - 'w', encoding='utf8', newline='') # 輸出csv -inputcsv = open('./data/manual_label/captcha_vali.csv', 'r', encoding='utf8') -data = [np.array(Image.open('./data/manual_label/' + row[0] + ".jpg")) - for row in csv.reader(inputcsv) if len(row[1]) == 6] # 只讀答案是6位的 -inputcsv = open('./data/manual_label/captcha_vali.csv', 'r', encoding='utf8') -oldanswer = [row[1] - for row in csv.reader(inputcsv) if len(row[1]) == 6] # 只讀答案是6位的 -answer = [] -datagen = ImageDataGenerator( - rotation_range=5, shear_range=0.2, zoom_range=0.05, fill_mode='nearest') -index, augmentindex, oldanswerindex = 0, 0, 0 -for img in data: - for batch in datagen.flow(np.asarray([img]), batch_size=1): - index += 1 - augmentindex += 1 - batch = batch.reshape((60, 200, 3)) - Image.fromarray(np.uint8(batch)).convert("RGB").save( - "./data/6_real_train_set/" + str(index) + ".jpg", "JPEG") - answer.append((str(index), oldanswer[oldanswerindex])) - if augmentindex >= 50: # 每張產生50個 - oldanswerindex += 1 - augmentindex = 0 - break -csv.writer(outputcsv).writerows(answer) -from keras.models import load_model -from keras.models import Model -from keras import backend as K -from PIL import Image -import numpy as np -import os -import csv -LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" - - -def toonehot(text): - labellist = [] - for letter in text: - onehot = [0 for _ in range(34)] - num = LETTERSTR.find(letter) - onehot[num] = 1 - labellist.append(onehot) - return labellist - - -print("Loading test data...") -testcsv = open('./data/manual_label/captcha_test.csv', 'r', encoding='utf8') -test_data = np.stack([np.array(Image.open( - "./data/manual_label/" + row[0] + ".jpg"))/255.0 for row in csv.reader(testcsv)]) -testcsv = open('./data/manual_label/captcha_test.csv', 'r', encoding='utf8') -test_label = [row[1] for row in csv.reader(testcsv)] -print("Loading model...") -K.clear_session() -model = None -model5 = load_model("./data/model/imitate_5_model.h5") -model6 = load_model("./data/model/imitate_6_model.h5") -model56 = load_model("./data/model/real_56_model.h5") -print("Predicting...") -prediction56 = [6 if arr[0] > - 0.5 else 5 for arr in model56.predict(test_data)] # 5/6碼分類 -prediction5 = model5.predict(test_data) # 5碼 -prediction6 = model6.predict(test_data) # 6碼 - -# 以下計算各個模型各個字元辨識率等等,有點亂,以後有空再整理 -total, total5, total6 = len(prediction56), 0, 0 -correct5, correct6, correct56, correct = 0, 0, 0, 0 -correct5digit, correct6digit = [0 for _ in range(5)], [0 for _ in range(6)] -totalalpha, correctalpha = len( - [1 for ans in test_label for char in ans if char.isalpha()]), 0 -for i in range(total): - checkcorrect = True - if prediction56[i] == len(test_label[i]): - correct56 += 1 - else: - checkcorrect = False - if prediction56[i] == 5: - total5 += 1 - allequal = True - for char in range(5): - if LETTERSTR[np.argmax(prediction5[char][i])] == test_label[i][char]: - correct5digit[char] += 1 - correctalpha += 1 if LETTERSTR[np.argmax( - prediction5[char][i])].isalpha() else 0 - else: - allequal = False - if allequal: - correct5 += 1 - else: - checkcorrect = False - else: - total6 += 1 - allequal = True - for char in range(6): - if LETTERSTR[np.argmax(prediction6[char][i])] == test_label[i][char]: - correct6digit[char] += 1 - correctalpha += 1 if LETTERSTR[np.argmax( - prediction6[char][i])].isalpha() else 0 - else: - allequal = False - if allequal: - correct6 += 1 - else: - checkcorrect = False - if checkcorrect: - correct += 1 - -print("5 or 6 model acc:{:.4f}%".format(correct56/total*100)) # 5/6模型acc -print("---------------------------") -print("5digits model acc:{:.4f}%".format(correct5/total5*100)) # 5模型acc -for i in range(5): - print("digit{:d} acc:{:.4f}%".format( - i+1, correct5digit[i]/total5*100)) # 5模型各字元acc -print("---------------------------") -print("6digits model acc:{:.4f}%".format(correct6/total6*100)) # 6模型acc -for i in range(6): - print("digit{:d} acc:{:.4f}%".format( - i+1, correct6digit[i]/total6*100)) # 6模型各字元acc -print("---------------------------") -print("alpha acc:{:.4f}%".format(correctalpha/totalalpha*100)) # 整體英文字acc -from selenium import webdriver -from selenium.common.exceptions import TimeoutException -from selenium.webdriver.common.by import By -from selenium.webdriver.support.ui import WebDriverWait -from selenium.webdriver.support import expected_conditions as EC -import numpy as np -from PIL import Image -from keras.models import load_model, Model -import time -import random -IDNumber = "X123456789" # 填入你的身分證字號 -model = None -model5 = load_model("./data/model/imitate_5_model.h5") # 辨識5碼的Model -model6 = load_model("./data/model/imitate_6_model.h5") # 辨識6碼的Model -model56 = load_model("./data/model/real_56_model.h5") # 辨識是5碼or6碼的Model -LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" -driver = webdriver.Chrome("./data/chromedriver.exe") # chromedriver 路徑 -correct, wrong = 0, 0 - -for _ in range(1000): # 跑1000次 - driver.get('http://railway1.hinet.net/Foreign/TW/ecsearch.html') - id_textbox = driver.find_element_by_id('person_id') - id_textbox.send_keys(IDNumber) - button = driver.find_element_by_css_selector( - 'body > div.container > div.row.contents > div > form > div > div.col-xs-12 > button') - button.click() - driver.save_screenshot('tmp.png') - location = driver.find_element_by_id('idRandomPic').location - x, y = location['x'] + 5, location['y'] + 5 - img = Image.open('tmp.png') - captcha = img.crop((x, y, x+200, y+60)) - captcha.convert("RGB").save('captcha.jpg', 'JPEG') - # check is 5 or 6 digits - p56 = model56.predict( - np.stack([np.array(Image.open('captcha.jpg'))/255.0]))[0][0] - if p56 > 0.5: - model = model6 - else: - model = model5 - prediction = model.predict( - np.stack([np.array(Image.open('captcha.jpg'))/255.0])) - answer = "" - for predict in prediction: - answer += LETTERSTR[np.argmax(predict[0])] - captcha_textbox = driver.find_element_by_id('randInput') - captcha_textbox.send_keys(answer) - driver.find_element_by_id('sbutton').click() - if "亂數號碼錯誤" in driver.page_source: - wrong += 1 - else: - correct += 1 - print("{:.4f}% (Correct{:d}-Wrong{:d})".format(correct / - (correct+wrong)*100, correct, wrong)) - time.sleep(3) -from keras.models import Model -from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization -from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard -from PIL import Image -import numpy as np -import csv - -LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" - - -def toonehot(text): - labellist = [] - for letter in text: - onehot = [0 for _ in range(34)] - num = LETTERSTR.find(letter) - onehot[num] = 1 - labellist.append(onehot) - return labellist - - -# Create CNN Model -print("Creating CNN model...") -in = Input((60, 200, 3)) -out = in -out = Conv2D(filters=32, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.3)(out) -out = Conv2D(filters=64, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.3)(out) -out = Conv2D(filters=128, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.3)(out) -out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Flatten()(out) -out = Dropout(0.3)(out) -out = [Dense(34, name='digit1', activation='softmax')(out), - Dense(34, name='digit2', activation='softmax')(out), - Dense(34, name='digit3', activation='softmax')(out), - Dense(34, name='digit4', activation='softmax')(out), - Dense(34, name='digit5', activation='softmax')(out)] -model = Model(inputs=in, outputs=out) -model.compile(loss='categorical_crossentropy', - optimizer='adam', metrics=['accuracy']) -model.summary() - -print("Reading training data...") -traincsv = open('./data/5_imitate_train_set/captcha_train.csv', - 'r', encoding='utf8') -train_data = np.stack([np.array(Image.open( - "./data/5_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)]) -traincsv = open('./data/5_imitate_train_set/captcha_train.csv', - 'r', encoding='utf8') -read_label = [toonehot(row[1]) for row in csv.reader(traincsv)] -train_label = [[] for _ in range(5)] -for arr in read_label: - for index in range(5): - train_label[index].append(arr[index]) -train_label = [arr for arr in np.asarray(train_label)] -print("Shape of train data:", train_data.shape) - -print("Reading validation data...") -valicsv = open('./data/5_imitate_vali_set/captcha_vali.csv', - 'r', encoding='utf8') -vali_data = np.stack([np.array(Image.open( - "./data/5_imitate_vali_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)]) -valicsv = open('./data/5_imitate_vali_set/captcha_vali.csv', - 'r', encoding='utf8') -read_label = [toonehot(row[1]) for row in csv.reader(valicsv)] -vali_label = [[] for _ in range(5)] -for arr in read_label: - for index in range(5): - vali_label[index].append(arr[index]) -vali_label = [arr for arr in np.asarray(vali_label)] -print("Shape of validation data:", vali_data.shape) - -filepath = "./data/model/imitate_5_model.h5" -checkpoint = ModelCheckpoint( - filepath, monitor='val_digit5_acc', verbose=1, save_best_only=True, mode='max') -earlystop = EarlyStopping(monitor='val_digit5_acc', - patience=5, verbose=1, mode='auto') -tensorBoard = TensorBoard(log_dir="./logs", histogram_freq=1) -callbacks_list = [checkpoint, earlystop, tensorBoard] -model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, - validation_data=(vali_data, vali_label), callbacks=callbacks_list) -from keras.models import Model -from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization -from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard -from PIL import Image -import numpy as np -import csv - - -# Create CNN Model -print("Creating CNN model...") -in = Input((60, 200, 3)) -out = in -out = Conv2D(filters=32, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=64, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=128, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Flatten()(out) -out = Dropout(0.5)(out) -out = Dense(1, name='6digit', activation='sigmoid')(out) -model = Model(inputs=in, outputs=out) -model.compile(loss='binary_crossentropy', - optimizer='adam', metrics=['accuracy']) -model.summary() - -print("Reading training data...") -traincsv = open('./data/56_imitate_train_set/len_train.csv', - 'r', encoding='utf8') -train_data = np.stack([np.array(Image.open( - "./data/56_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)]) -traincsv = open('./data/56_imitate_train_set/len_train.csv', - 'r', encoding='utf8') -train_label = np.asarray( - [1 if row[1] == '6' else 0 for row in csv.reader(traincsv)]) -print("Shape of train data:", train_data.shape) - -print("Reading validation data...") -valicsv = open('./data/56_imitate_vali_set/len_vali.csv', 'r', encoding='utf8') -vali_data = np.stack([np.array(Image.open( - './data/56_imitate_vali_set/' + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)]) -valicsv = open('./data/56_imitate_vali_set/len_vali.csv', 'r', encoding='utf8') -vali_label = np.asarray( - [1 if row[1] == '6' else 0 for row in csv.reader(valicsv)]) -print("Shape of validation data:", vali_data.shape) - -filepath = "./data/model/imitate_56_model.h5" -checkpoint = ModelCheckpoint( - filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') -earlystop = EarlyStopping( - monitor='val_acc', patience=10, verbose=1, mode='auto') -tensorBoard = TensorBoard(log_dir="./logs", histogram_freq=1) -callbacks_list = [checkpoint, earlystop, tensorBoard] -model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=1, - validation_data=(vali_data, vali_label), callbacks=callbacks_list) -from keras.models import Model -from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization -from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard -from PIL import Image -import numpy as np -import csv - -LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" - - -def toonehot(text): - labellist = [] - for letter in text: - onehot = [0 for _ in range(34)] - num = LETTERSTR.find(letter) - onehot[num] = 1 - labellist.append(onehot) - return labellist - - -# Create CNN Model -print("Creating CNN model...") -in = Input((60, 200, 3)) -out = in -out = Conv2D(filters=32, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.3)(out) -out = Conv2D(filters=64, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.3)(out) -out = Conv2D(filters=128, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.3)(out) -out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Flatten()(out) -out = Dropout(0.3)(out) -out = [Dense(34, name='digit1', activation='softmax')(out), - Dense(34, name='digit2', activation='softmax')(out), - Dense(34, name='digit3', activation='softmax')(out), - Dense(34, name='digit4', activation='softmax')(out), - Dense(34, name='digit5', activation='softmax')(out), - Dense(34, name='digit6', activation='softmax')(out)] -model = Model(inputs=in, outputs=out) -model.compile(loss='categorical_crossentropy', - optimizer='adam', metrics=['accuracy']) -model.summary() - -print("Reading training data...") -traincsv = open('./data/6_imitate_train_set/captcha_train.csv', - 'r', encoding='utf8') -train_data = np.stack([np.array(Image.open( - "./data/6_imitate_train_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(traincsv)]) -traincsv = open('./data/6_imitate_train_set/captcha_train.csv', - 'r', encoding='utf8') -read_label = [toonehot(row[1]) for row in csv.reader(traincsv)] -train_label = [[] for _ in range(6)] -for arr in read_label: - for index in range(6): - train_label[index].append(arr[index]) -train_label = [arr for arr in np.asarray(train_label)] -print("Shape of train data:", train_data.shape) - -print("Reading validation data...") -valicsv = open('./data/6_imitate_vali_set/captcha_vali.csv', - 'r', encoding='utf8') -vali_data = np.stack([np.array(Image.open( - "./data/6_imitate_vali_set/" + row[0] + ".jpg"))/255.0 for row in csv.reader(valicsv)]) -valicsv = open('./data/6_imitate_vali_set/captcha_vali.csv', - 'r', encoding='utf8') -read_label = [toonehot(row[1]) for row in csv.reader(valicsv)] -vali_label = [[] for _ in range(6)] -for arr in read_label: - for index in range(6): - vali_label[index].append(arr[index]) -vali_label = [arr for arr in np.asarray(vali_label)] -print("Shape of validation data:", vali_data.shape) - -filepath = "./data/model/imitate_6_model.h5" -checkpoint = ModelCheckpoint( - filepath, monitor='val_digit6_acc', verbose=1, save_best_only=True, mode='max') -earlystop = EarlyStopping(monitor='val_digit6_acc', - patience=5, verbose=1, mode='auto') -tensorBoard = TensorBoard(log_dir="./logs", histogram_freq=1) -callbacks_list = [checkpoint, earlystop, tensorBoard] -model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, - validation_data=(vali_data, vali_label), callbacks=callbacks_list) -from keras.models import Model -from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization -from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard -from PIL import Image -import numpy as np -import csv -import os - -LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" - - -def toonehot(text): - labellist = [] - for letter in text: - onehot = [0 for _ in range(34)] - num = LETTERSTR.find(letter) - onehot[num] = 1 - labellist.append(onehot) - return labellist - - -# Create CNN Model -print("Creating CNN model...") -in = Input((60, 200, 3)) -out = in -out = Conv2D(filters=32, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=64, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=128, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Flatten()(out) -out = Dropout(0.5)(out) -out = [Dense(34, name='digit1', activation='softmax')(out), - Dense(34, name='digit2', activation='softmax')(out), - Dense(34, name='digit3', activation='softmax')(out), - Dense(34, name='digit4', activation='softmax')(out), - Dense(34, name='digit5', activation='softmax')(out)] -model = Model(inputs=in, outputs=out) -model.compile(loss='categorical_crossentropy', - optimizer='Adam', metrics=['accuracy']) -model.summary() - -print("Reading training data...") -traincsv = open('./data/5_real_train_set/captcha_train.csv', - 'r', encoding='utf8') -train_data = np.stack([np.array(Image.open( - './data/5_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(1, 60001)]) -traincsv = open('./data/5_real_train_set/captcha_train.csv', - 'r', encoding='utf8') -read_label = [toonehot(row[1]) for row in csv.reader(traincsv)][:60000] -train_label = [[] for _ in range(5)] -for arr in read_label: - for index in range(5): - train_label[index].append(arr[index]) -train_label = [arr for arr in np.asarray(train_label)] -print("Shape of train data:", train_data.shape) - -print("Reading validation data...") -valicsv = open('./data/5_real_train_set/captcha_train.csv', - 'r', encoding='utf8') -vali_data = np.stack([np.array(Image.open( - './data/5_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(60001, 75001)]) -valicsv = open('./data/5_real_train_set/captcha_train.csv', - 'r', encoding='utf8') -read_label = [toonehot(row[1]) for row in csv.reader(valicsv)][60000:] -vali_label = [[] for _ in range(5)] -for arr in read_label: - for index in range(5): - vali_label[index].append(arr[index]) -vali_label = [arr for arr in np.asarray(vali_label)] -print("Shape of validation data:", vali_data.shape) - -filepath = "./data/model/real_5_model.h5" -checkpoint = ModelCheckpoint( - filepath, monitor='val_digit5_acc', verbose=1, save_best_only=True, mode='max') -earlystop = EarlyStopping(monitor='val_digit5_acc', - patience=5, verbose=1, mode='auto') -tensorBoard = TensorBoard(log_dir="./logs", histogram_freq=1) -callbacks_list = [checkpoint, earlystop, tensorBoard] -model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, - validation_data=(vali_data, vali_label), callbacks=callbacks_list) -from keras.models import Model -from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization -from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard -from PIL import Image -import numpy as np -import csv - - -# Create CNN Model -print("Creating CNN model...") -in = Input((60, 200, 3)) -out = in -out = Conv2D(filters=32, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=64, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=128, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Flatten()(out) -out = Dropout(0.5)(out) -out = Dense(1, name='6digit', activation='sigmoid')(out) -model = Model(inputs=in, outputs=out) -model.compile(loss='binary_crossentropy', - optimizer='Adam', metrics=['accuracy']) -model.summary() - -print("Reading training data...") -train_label = np.asarray([0 for _ in range(40000)]) -train_data = [np.array(Image.open("./data/5_real_train_set/" + str(i) + ".jpg")) / - 255.0 for i in np.random.choice(range(1, 60001), size=20000, replace=False)] -train_data = np.concatenate((train_data, [np.array(Image.open("./data/6_real_train_set/" + str( - i) + ".jpg"))/255.0 for i in np.random.choice(range(1, 60001), size=20000, replace=False)])) -train_data = np.stack(train_data) -train_label[:20000] = 0 -train_label[20000:] = 1 -print("Shape of train data:", train_data.shape) - -print("Reading validation data...") -vali_label = np.asarray([0 for _ in range(10000)]) -vali_data = [np.array(Image.open("./data/5_real_train_set/" + str(i) + ".jpg")) / - 255.0 for i in np.random.choice(range(60001, 75001), size=5000, replace=False)] -vali_data = np.concatenate((vali_data, [np.array(Image.open("./data/6_real_train_set/" + str( - i) + ".jpg"))/255.0 for i in np.random.choice(range(60001, 75001), size=5000, replace=False)])) -vali_data = np.stack(vali_data) -vali_label[:5000] = 0 -vali_label[5000:] = 1 -print("Shape of validation data:", vali_data.shape) - -filepath = "./data/model/real_56_model.h5" -checkpoint = ModelCheckpoint( - filepath, monitor='val_acc', verbose=1, save_best_only=True, mode='max') -earlystop = EarlyStopping( - monitor='val_loss', patience=5, verbose=1, mode='auto') -tensorBoard = TensorBoard(log_dir="./logs", histogram_freq=1) -callbacks_list = [checkpoint, earlystop, tensorBoard] -model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, - validation_data=(vali_data, vali_label), callbacks=callbacks_list) -from keras.models import Model -from keras.layers import Input, Dense, Dropout, Flatten, Conv2D, MaxPooling2D, BatchNormalization -from keras.callbacks import ModelCheckpoint, EarlyStopping, TensorBoard -from PIL import Image -import numpy as np -import csv -import os - -LETTERSTR = "0123456789ABCDEFGHJKLMNPQRSTUVWXYZ" - - -def toonehot(text): - labellist = [] - for letter in text: - onehot = [0 for _ in range(34)] - num = LETTERSTR.find(letter) - onehot[num] = 1 - labellist.append(onehot) - return labellist - - -# Create CNN Model -print("Creating CNN model...") -in = Input((60, 200, 3)) -out = in -out = Conv2D(filters=32, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=32, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=64, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=64, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=128, kernel_size=(3, 3), - padding='same', activation='relu')(out) -out = Conv2D(filters=128, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Dropout(0.5)(out) -out = Conv2D(filters=256, kernel_size=(3, 3), activation='relu')(out) -out = BatchNormalization()(out) -out = MaxPooling2D(pool_size=(2, 2))(out) -out = Flatten()(out) -out = Dropout(0.5)(out) -out = [Dense(34, name='digit1', activation='softmax')(out), - Dense(34, name='digit2', activation='softmax')(out), - Dense(34, name='digit3', activation='softmax')(out), - Dense(34, name='digit4', activation='softmax')(out), - Dense(34, name='digit5', activation='softmax')(out), - Dense(34, name='digit6', activation='softmax')(out)] -model = Model(inputs=in, outputs=out) -model.compile(loss='categorical_crossentropy', - optimizer='Adam', metrics=['accuracy']) -model.summary() - -print("Reading training data...") -traincsv = open('./data/6_real_train_set/captcha_train.csv', - 'r', encoding='utf8') -train_data = np.stack([np.array(Image.open( - './data/6_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(1, 60001)]) -traincsv = open('./data/6_real_train_set/captcha_train.csv', - 'r', encoding='utf8') -read_label = [toonehot(row[1]) for row in csv.reader(traincsv)][:60000] -train_label = [[] for _ in range(6)] -for arr in read_label: - for index in range(6): - train_label[index].append(arr[index]) -train_label = [arr for arr in np.asarray(train_label)] -print("Shape of train data:", train_data.shape) - -print("Reading validation data...") -valicsv = open('./data/6_real_train_set/captcha_train.csv', - 'r', encoding='utf8') -vali_data = np.stack([np.array(Image.open( - './data/6_real_train_set/' + str(i) + ".jpg"))/255.0 for i in range(60001, 75001)]) -valicsv = open('./data/6_real_train_set/captcha_train.csv', - 'r', encoding='utf8') -read_label = [toonehot(row[1]) for row in csv.reader(valicsv)][60000:] -vali_label = [[] for _ in range(6)] -for arr in read_label: - for index in range(6): - vali_label[index].append(arr[index]) -vali_label = [arr for arr in np.asarray(vali_label)] -print("Shape of validation data:", vali_data.shape) - -filepath = "./data/model/real_6_model.h5" -checkpoint = ModelCheckpoint( - filepath, monitor='val_digit6_acc', verbose=1, save_best_only=True, mode='max') -earlystop = EarlyStopping(monitor='val_digit6_acc', - patience=5, verbose=1, mode='auto') -tensorBoard = TensorBoard(log_dir="./logs", histogram_freq=1) -callbacks_list = [checkpoint, earlystop, tensorBoard] -model.fit(train_data, train_label, batch_size=400, epochs=100, verbose=2, - validation_data=(vali_data, vali_label), callbacks=callbacks_list) -#!/usr/bin/python - -""" -A simple discretionary locking system for /dev/nvidia devices. - -Iain Murray, November 2009, January 2010, January 2011. -""" - -import os -import os.path - -_dev_prefix = '/dev/nvidia' -#URL = 'http://www.cs.toronto.edu/~murray/code/gpu_monitoring/' -URL = 'http://homepages.inf.ed.ac.uk/imurray2/code/gpu_monitoring/' - - -# Get ID's of NVIDIA boards. Should do this through a CUDA call, but this is -# a quick and dirty way that works for now: -def board_ids(): - """Returns integer board ids available on this machine.""" - from glob import glob - board_devs = glob(_dev_prefix + '[0-9]*') - return list(range(len(board_devs))) - - -def _lock_file(id): - """lock file from integer id""" - # /tmp is cleared on reboot on many systems, but it doesn't have to be - if os.path.exists('/dev/shm'): - # /dev/shm on linux machines is a RAM disk, so is definitely cleared - return '/dev/shm/gpu_lock_%d' % id - else: - return '/tmp/gpu_lock_%d' % id - - -def owner_of_lock(id): - """Username that has locked the device id. (Empty string if no lock).""" - import pwd - try: - statinfo = os.lstat(_lock_file(id)) - return pwd.getpwuid(statinfo.st_uid).pw_name - except: - return "" - - -def _obtain_lock(id): - """Attempts to lock id, returning success as True/False.""" -# print id - try: - # On POSIX systems symlink creation is atomic, so this should be a - # robust locking operation: - os.symlink('/dev/null', _lock_file(id)) - return True - except: - return False - - -def _launch_reaper(id, pid): - """Start a process that will free a lock when process pid terminates""" - from subprocess import Popen, PIPE - me = __file__ - if me.endswith('.pyc'): - me = me[:-1] - myloc = os.path.dirname(me) - if not myloc: - myloc = os.getcwd() - reaper_cmd = os.path.join(myloc, 'run_on_me_or_pid_quit') - Popen([reaper_cmd, str(pid), me, '--free', str(id)], - stdout=open('/dev/null', 'w')) - - -def obtain_lock_id(pid=None): - """ - Finds a free id, locks it and returns integer id, or -1 if none free. - - A process is spawned that will free the lock automatically when the - process pid (by default the current python process) terminates. - """ - id = -1 - id = obtain_lock_id_to_hog() - try: - if id >= 0: - if pid is None: - pid = os.getpid() - _launch_reaper(id, pid) - except: - free_lock(id) - id = -1 - return id - - -def obtain_lock_id_to_hog(): - """ - Finds a free id, locks it and returns integer id, or -1 if none free. - - * Lock must be freed manually * - """ - for id in board_ids(): - if _obtain_lock(id): - return id - return -1 - - -def free_lock(id): - """Attempts to free lock id, returning success as True/False.""" - try: - filename = _lock_file(id) - # On POSIX systems os.rename is an atomic operation, so this is the safe - # way to delete a lock: - os.rename(filename, filename + '.redundant') - os.remove(filename + '.redundant') - return True - except: - return False - - -# If run as a program: -if __name__ == "__main__": - import sys - me = sys.argv[0] - # Report - if '--id' in sys.argv: - if len(sys.argv) > 2: - try: - pid = int(sys.argv[2]) - print(pid, sys.argv[2]) - assert(os.path.exists('/proc/%d' % pid)) - except: - print('Usage: %s --id [pid_to_wait_on]' % me) - print('The optional process id must exist if specified.') - print('Otherwise the id of the parent process is used.') - sys.exit(1) - else: - pid = os.getppid() - print(pid) - print(obtain_lock_id(pid)) - elif '--id-to-hog' in sys.argv: - print(obtain_lock_id_to_hog()) - elif '--free' in sys.argv: - try: - id = int(sys.argv[2]) - except: - print('Usage: %s --free ' % me) - sys.exit(1) - if free_lock(id): - print("Lock freed") - else: - owner = owner_of_lock(id) - if owner: - print("Failed to free lock id=%d owned by %s" % (id, owner)) - else: - print("Failed to free lock, but it wasn't actually set?") - else: - print('\n Usage instructions:\n') - print(' To obtain and lock an id: %s --id' % me) - print(' The lock is automatically freed when the parent terminates') - print() - print(" To get an id that won't be freed: %s --id-to-hog" % me) - print(" You *must* manually free these ids: %s --free \n" % me) - print(' More info: %s\n' % URL) - div = ' ' + "-"*60 - print('\n' + div) - print(" NVIDIA board users:") - print(div) - for id in board_ids(): - print(" Board %d: %s" % (id, owner_of_lock(id))) - print(div + '\n') -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import os -import sys -import time - -from keras_lib import configuration -from keras_lib import data_utils -from keras_lib.train import TrainKerasModels - - -class KerasClass(object): - - def __init__(self, cfg): - - ################################################### - ########## User configurable variables ############ - ################################################### - - inp_feat_dir = cfg.inp_feat_dir - out_feat_dir = cfg.out_feat_dir - pred_feat_dir = cfg.pred_feat_dir - - inp_file_ext = cfg.inp_file_ext - out_file_ext = cfg.out_file_ext - - ### Input-Output ### - - self.inp_dim = cfg.inp_dim - self.out_dim = cfg.out_dim - - self.inp_norm = cfg.inp_norm - self.out_norm = cfg.out_norm - - self.inp_stats_file = cfg.inp_stats_file - self.out_stats_file = cfg.out_stats_file - - self.inp_scaler = None - self.out_scaler = None - - #### define model params #### - - self.hidden_layer_type = cfg.hidden_layer_type - self.hidden_layer_size = cfg.hidden_layer_size - - self.sequential_training = cfg.sequential_training - - self.stateful = cfg.stateful - self.batch_size = cfg.batch_size - self.seq_length = cfg.seq_length - - self.training_algo = cfg.training_algo - self.shuffle_data = cfg.shuffle_data - - self.output_layer_type = cfg.output_layer_type - self.loss_function = cfg.loss_function - self.optimizer = cfg.optimizer - - self.rnn_params = cfg.rnn_params - self.dropout_rate = cfg.dropout_rate - self.num_of_epochs = cfg.num_of_epochs - - self.json_model_file = cfg.json_model_file - self.h5_model_file = cfg.h5_model_file - - ### define train, valid, test ### - - train_file_number = cfg.train_file_number - valid_file_number = cfg.valid_file_number - test_file_number = cfg.test_file_number - - file_id_scp = cfg.file_id_scp - test_id_scp = cfg.test_id_scp - - #### main processess #### - - self.NORMDATA = cfg.NORMDATA - self.TRAINMODEL = cfg.TRAINMODEL - self.TESTMODEL = cfg.TESTMODEL - - #### Generate only test list #### - self.GenTestList = cfg.GenTestList - - ################################################### - ####### End of user-defined conf variables ######## - ################################################### - - #### Create train, valid and test file lists #### - file_id_list = data_utils.read_file_list(file_id_scp) - - train_id_list = file_id_list[0: train_file_number] - valid_id_list = file_id_list[train_file_number: - train_file_number + valid_file_number] - test_id_list = file_id_list[train_file_number + - valid_file_number: train_file_number + valid_file_number + test_file_number] - - valid_test_id_list = file_id_list[train_file_number: train_file_number + - valid_file_number + test_file_number] - - self.inp_train_file_list = data_utils.prepare_file_path_list( - train_id_list, inp_feat_dir, inp_file_ext) - self.out_train_file_list = data_utils.prepare_file_path_list( - train_id_list, out_feat_dir, out_file_ext) - - self.inp_valid_file_list = data_utils.prepare_file_path_list( - valid_id_list, inp_feat_dir, inp_file_ext) - self.out_valid_file_list = data_utils.prepare_file_path_list( - valid_id_list, out_feat_dir, out_file_ext) - - self.inp_test_file_list = data_utils.prepare_file_path_list( - valid_test_id_list, inp_feat_dir, inp_file_ext) - self.out_test_file_list = data_utils.prepare_file_path_list( - valid_test_id_list, out_feat_dir, out_file_ext) - - self.gen_test_file_list = data_utils.prepare_file_path_list( - valid_test_id_list, pred_feat_dir, out_file_ext) - - if self.GenTestList: - test_id_list = data_utils.read_file_list(test_id_scp) - self.inp_test_file_list = data_utils.prepare_file_path_list( - test_id_list, inp_feat_dir, inp_file_ext) - self.gen_test_file_list = data_utils.prepare_file_path_list( - test_id_list, pred_feat_dir, out_file_ext) - - #### Define keras models class #### - self.keras_models = TrainKerasModels(self.inp_dim, self.hidden_layer_size, self.out_dim, self.hidden_layer_type, - output_type=self.output_layer_type, dropout_rate=self.dropout_rate, - loss_function=self.loss_function, optimizer=self.optimizer, - rnn_params=self.rnn_params) - - def normlize_data(self): - ### normalize train data ### - if os.path.isfile(self.inp_stats_file) and os.path.isfile(self.out_stats_file): - self.inp_scaler = data_utils.load_norm_stats( - self.inp_stats_file, self.inp_dim, method=self.inp_norm) - self.out_scaler = data_utils.load_norm_stats( - self.out_stats_file, self.out_dim, method=self.out_norm) - else: - print('preparing train_x, train_y from input and output feature files...') - train_x, train_y, train_flen = data_utils.read_data_from_file_list(self.inp_train_file_list, self.out_train_file_list, - self.inp_dim, self.out_dim, sequential_training=self.sequential_training) - - print('computing norm stats for train_x...') - inp_scaler = data_utils.compute_norm_stats( - train_x, self.inp_stats_file, method=self.inp_norm) - - print('computing norm stats for train_y...') - out_scaler = data_utils.compute_norm_stats( - train_y, self.out_stats_file, method=self.out_norm) - - def train_keras_model(self): - #### define the model #### - if not self.sequential_training: - self.keras_models.define_feedforward_model() - elif self.stateful: - self.keras_models.define_stateful_model( - batch_size=self.batch_size, seq_length=self.seq_length) - else: - self.keras_models.define_sequence_model() - - #### load the data #### - print('preparing train_x, train_y from input and output feature files...') - train_x, train_y, train_flen = data_utils.read_data_from_file_list(self.inp_train_file_list, self.out_train_file_list, - self.inp_dim, self.out_dim, sequential_training=self.sequential_training) - print('preparing valid_x, valid_y from input and output feature files...') - valid_x, valid_y, valid_flen = data_utils.read_data_from_file_list(self.inp_valid_file_list, self.out_valid_file_list, - self.inp_dim, self.out_dim, sequential_training=self.sequential_training) - - #### normalize the data #### - data_utils.norm_data(train_x, self.inp_scaler, - sequential_training=self.sequential_training) - data_utils.norm_data(train_y, self.out_scaler, - sequential_training=self.sequential_training) - data_utils.norm_data(valid_x, self.inp_scaler, - sequential_training=self.sequential_training) - data_utils.norm_data(valid_y, self.out_scaler, - sequential_training=self.sequential_training) - - #### train the model #### - print('training...') - if not self.sequential_training: - ### Train feedforward model ### - self.keras_models.train_feedforward_model( - train_x, train_y, valid_x, valid_y, batch_size=self.batch_size, num_of_epochs=self.num_of_epochs, shuffle_data=self.shuffle_data) - else: - ### Train recurrent model ### - print(('training algorithm: %d' % (self.training_algo))) - self.keras_models.train_sequence_model(train_x, train_y, valid_x, valid_y, train_flen, batch_size=self.batch_size, num_of_epochs=self.num_of_epochs, - shuffle_data=self.shuffle_data, training_algo=self.training_algo) - - #### store the model #### - self.keras_models.save_model(self.json_model_file, self.h5_model_file) - - def test_keras_model(self): - #### load the model #### - self.keras_models.load_model(self.json_model_file, self.h5_model_file) - - #### load the data #### - print('preparing test_x from input feature files...') - test_x, test_flen = data_utils.read_test_data_from_file_list( - self.inp_test_file_list, self.inp_dim) - - #### normalize the data #### - data_utils.norm_data(test_x, self.inp_scaler) - - #### compute predictions #### - self.keras_models.predict( - test_x, self.out_scaler, self.gen_test_file_list, self.sequential_training) - - def main_function(self): - ### Implement each module ### - if self.NORMDATA: - self.normlize_data() - - if self.TRAINMODEL: - self.train_keras_model() - - if self.TESTMODEL: - self.test_keras_model() - - -if __name__ == "__main__": - - if len(sys.argv) != 2: - print('usage: python run_keras_with_merlin_io.py [config file name]') - sys.exit(1) - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.configuration() - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - print("--- Job started ---") - start_time = time.time() - - # main function - keras_instance = KerasClass(cfg) - keras_instance.main_function() - - (m, s) = divmod(int(time.time() - start_time), 60) - print(("--- Job completion time: %d min. %d sec ---" % (m, s))) - - sys.exit(0) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -import subprocess -import socket # only for socket.getfqdn() -import multiprocessing - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -#import gnumpy as gnp -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer -from frontend.label_modifier import HTSLabelModification -from frontend.merge_features import MergeFeat - -import configuration -from models.deep_rnn import DeepRecurrentNetwork - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.acous_feat_extraction import acous_feat_extraction -from utils.learn_rates import ExpDecreaseLearningRate - -from io_funcs.binary_io import BinaryIOCollection - -# our custom logging class that can also plot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io -from utils.file_paths import FilePaths -from utils.utils import read_file_list, prepare_file_path_list - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def visualize_dnn(dnn): - - plotlogger = logging.getLogger("plotting") - - # reference activation weights in layers - W = list() - layer_name = list() - for i in range(len(dnn.params)): - aa = dnn.params[i].get_value(borrow=True).T - print(aa.shape, aa.size) - if aa.size > aa.shape[0]: - W.append(aa) - layer_name.append(dnn.params[i].name) - - # plot activation weights including input and output - layer_num = len(W) - for i_layer in range(layer_num): - fig_name = 'Activation weights W' + \ - str(i_layer) + '_' + layer_name[i_layer] - fig_title = 'Activation weights of W' + str(i_layer) - xlabel = 'Neuron index of hidden layer ' + str(i_layer) - ylabel = 'Neuron index of hidden layer ' + str(i_layer+1) - if i_layer == 0: - xlabel = 'Input feature index' - if i_layer == layer_num-1: - ylabel = 'Output feature index' - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point(fig_name, fig_name, W[i_layer]) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def load_covariance(var_file_dict, out_dimension_dict): - var = {} - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - - var_values = numpy.reshape( - var_values, (out_dimension_dict[feature_name], 1)) - - var[feature_name] = var_values - - return var - - -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None, - cmp_mean_vector=None, cmp_std_vector=None, init_dnn_model_file=None): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layer_size = hyper_params['hidden_layer_size'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - model_type = hyper_params['model_type'] - hidden_layer_type = hyper_params['hidden_layer_type'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - - sequential_training = hyper_params['sequential_training'] - dropout_rate = hyper_params['dropout_rate'] - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, - sequential=sequential_training, shuffle=True) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, - sequential=sequential_training, shuffle=False) - - if cfg.rnn_batch_training: - train_data_reader.set_rnn_params(training_algo=cfg.training_algo, batch_size=cfg.batch_size, - seq_length=cfg.seq_length, merge_size=cfg.merge_size, bucket_range=cfg.bucket_range) - valid_data_reader.reshape_input_output() - - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() - train_set_x, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition() - valid_set_x, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - dnn_model = DeepRecurrentNetwork(n_in=n_ins, hidden_layer_size=hidden_layer_size, n_out=n_outs, - L1_reg=l1_reg, L2_reg=l2_reg, hidden_layer_type=hidden_layer_type, output_type=cfg.output_layer_type, - dropout_rate=dropout_rate, optimizer=cfg.optimizer, rnn_batch_training=cfg.rnn_batch_training) - - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - # Model adaptation -- fine tuning the existing model - # We can't just unpickle the old model and use that because fine-tune functions - # depend on opt_l2e option used in construction of initial model. One way around this - # would be to unpickle, manually set unpickled_dnn_model.opt_l2e=True and then call - # unpickled_dnn_model.build_finetne_function() again. This is another way, construct - # new model from scratch with opt_l2e=True, then copy existing weights over: - use_lhuc = cfg.use_lhuc - if init_dnn_model_file != "_": - logger.info('load parameters from existing model: %s' % - (init_dnn_model_file)) - if not os.path.isfile(init_dnn_model_file): - sys.exit('Model file %s does not exist' % (init_dnn_model_file)) - existing_dnn_model = pickle.load(open(init_dnn_model_file, 'rb')) - if not use_lhuc and not len(existing_dnn_model.params) == len(dnn_model.params): - sys.exit('Old and new models have different numbers of weight matrices') - elif use_lhuc and len(dnn_model.params) < len(existing_dnn_model.params): - sys.exit( - 'In LHUC adaptation new model must have more parameters than old model.') - # assign the existing dnn model parameters to the new dnn model - k = 0 - for i in range(len(dnn_model.params)): - ## Added for LHUC ## - # In LHUC, we keep all the old parameters intact and learn only a small set of new - # parameters - if dnn_model.params[i].name == 'c': - continue - else: - old_val = existing_dnn_model.params[k].get_value() - new_val = dnn_model.params[i].get_value() - if numpy.shape(old_val) == numpy.shape(new_val): - dnn_model.params[i].set_value(old_val) - else: - sys.exit('old and new weight matrices have different shapes') - k = k + 1 - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), use_lhuc, layer_index=cfg.freeze_layers) # , batch_size=batch_size - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.time() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - lr_decay = cfg.lr_decay - if lr_decay > 0: - early_stop_epoch *= lr_decay - - early_stop = 0 - val_loss_counter = 0 - - previous_finetune_lr = finetune_lr - - epoch = 0 - while (epoch < training_epochs): - epoch = epoch + 1 - - if lr_decay == 0: - # fixed learning rate - reduce_lr = False - elif lr_decay < 0: - # exponential decay - reduce_lr = False if epoch <= warmup_epoch else True - elif val_loss_counter > 0: - # linear decay - reduce_lr = False - if val_loss_counter % lr_decay == 0: - reduce_lr = True - val_loss_counter = 0 - else: - # no decay - reduce_lr = False - - if reduce_lr: - current_finetune_lr = previous_finetune_lr * 0.5 - current_momentum = momentum - else: - current_finetune_lr = previous_finetune_lr - current_momentum = warmup_momentum - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.time() - - logger.debug("training params -- learning rate: %f, early_stop: %d/%d" % - (current_finetune_lr, early_stop, early_stop_epoch)) - while (not train_data_reader.is_finish()): - - _, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() - - # if sequential training, the batch size will be the number of frames in an utterance - # batch_size for sequential training is considered only when rnn_batch_training is set to True - if sequential_training == True: - batch_size = temp_train_set_x.shape[0] - - n_train_batches = temp_train_set_x.shape[0] // batch_size - for index in range(n_train_batches): - # send a batch to the shared variable, rather than pass the batch size and batch index to the finetune function - train_set_x.set_value(numpy.asarray(temp_train_set_x[index*batch_size:( - index + 1)*batch_size], dtype=theano.config.floatX), borrow=True) - train_set_y.set_value(numpy.asarray(temp_train_set_y[index*batch_size:( - index + 1)*batch_size], dtype=theano.config.floatX), borrow=True) - - this_train_error = train_fn( - current_finetune_lr, current_momentum) - - train_error.append(this_train_error) - - train_data_reader.reset() - - logger.debug('calculating validation loss') - validation_losses = [] - while (not valid_data_reader.is_finish()): - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition() - valid_set_x.set_value(numpy.asarray( - temp_valid_set_x, dtype=theano.config.floatX), borrow=True) - valid_set_y.set_value(numpy.asarray( - temp_valid_set_y, dtype=theano.config.floatX), borrow=True) - - this_valid_loss = valid_fn() - - validation_losses.append(this_valid_loss) - valid_data_reader.reset() - - this_validation_loss = numpy.mean(validation_losses) - - this_train_valid_loss = numpy.mean(numpy.asarray(train_error)) - - sub_end_time = time.time() - - loss_difference = this_validation_loss - previous_loss - - logger.info('epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss - - if this_validation_loss >= previous_loss: - logger.debug('validation loss increased') - val_loss_counter += 1 - early_stop += 1 - - if epoch > 15 and early_stop > early_stop_epoch: - logger.debug('stopping early') - break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - end_time = time.time() - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list, reshape_io=False): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size // n_ins))] - test_set_x = features.reshape((-1, n_ins)) - n_rows = test_set_x.shape[0] - - if reshape_io: - test_set_x = numpy.reshape( - test_set_x, (1, test_set_x.shape[0], n_ins)) - test_set_x = numpy.array(test_set_x, 'float32') - - predicted_parameter = dnn_model.parameter_prediction(test_set_x) - predicted_parameter = predicted_parameter.reshape(-1, n_outs) - predicted_parameter = predicted_parameter[0:n_rows] - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - -# generate bottleneck layer as features - - -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list, bottleneck_index): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size // n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_hidden_layer( - test_set_x, bottleneck_index) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def perform_acoustic_composition_on_split(args): - """ Performs acoustic composition on one chunk of data. - This is used as input for Pool.map to allow parallel acoustic composition. - """ - (delta_win, acc_win, in_file_list_dict, nn_cmp_file_list, - in_dimension_dict, out_dimension_dict) = args - acoustic_worker = AcousticComposition(delta_win=delta_win, acc_win=acc_win) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, in_dimension_dict, out_dimension_dict) - - -def perform_acoustic_composition(delta_win, acc_win, in_file_list_dict, nn_cmp_file_list, cfg, parallel=True): - """ Runs acoustic composition from in_file_list_dict to nn_cmp_file_list. - If parallel is true, splits the data into multiple chunks and calls - perform_acoustic_composition_on_split for each chunk. - """ - if parallel: - num_splits = multiprocessing.cpu_count() - pool = multiprocessing.Pool(num_splits) - - # split data into a list of num_splits tuples with each tuple representing - # the parameters for perform_acoustic_compositon_on_split - splits_full = [ - (delta_win, - acc_win, - {stream: in_file_list_dict[stream][i::num_splits] - for stream in in_file_list_dict}, - nn_cmp_file_list[i::num_splits], - cfg.in_dimension_dict, - cfg.out_dimension_dict - ) for i in range(num_splits)] - - pool.map(perform_acoustic_composition_on_split, splits_full) - pool.close() - pool.join() - else: - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - -def main_function(cfg): - file_paths = FilePaths(cfg) - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - # create plot dir if set to True - if not os.path.exists(cfg.plot_dir) and cfg.plot: - os.makedirs(cfg.plot_dir) - - #### parameter setting######## - hidden_layer_size = cfg.hyper_params['hidden_layer_size'] - - # prepare environment - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - - assert cfg.train_file_number+cfg.valid_file_number + \ - cfg.test_file_number == total_file_number, 'check train, valid, test file number' - - data_dir = cfg.data_dir - - inter_data_dir = cfg.inter_data_dir - nn_cmp_dir = file_paths.nn_cmp_dir - nn_cmp_norm_dir = file_paths.nn_cmp_norm_dir - model_dir = file_paths.model_dir - gen_dir = file_paths.gen_dir - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = file_paths.get_nn_cmp_file_list() - nn_cmp_norm_file_list = file_paths.get_nn_cmp_norm_file_list() - - # normalisation information - norm_info_file = file_paths.norm_info_file - - # normalise input full context label - # currently supporting two different forms of lingustic features - # later, we should generalise this - - assert cfg.label_style == 'HTS', 'Only HTS-style labels are now supported as input to Merlin' - - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, add_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - add_feat_dim = sum(cfg.additional_features.values()) - lab_dim = label_normaliser.dimension + add_feat_dim + cfg.appended_input_dim - if cfg.VoiceConversion: - lab_dim = cfg.cmp_dim - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - - if cfg.process_labels_in_work_dir: - inter_data_dir = cfg.work_dir - - # the number can be removed - file_paths.set_label_dir(label_normaliser.dimension, suffix, lab_dim) - file_paths.set_label_file_list() - - binary_label_dir = file_paths.binary_label_dir - nn_label_dir = file_paths.nn_label_dir - nn_label_norm_dir = file_paths.nn_label_norm_dir - - in_label_align_file_list = file_paths.in_label_align_file_list - binary_label_file_list = file_paths.binary_label_file_list - nn_label_file_list = file_paths.nn_label_file_list - nn_label_norm_file_list = file_paths.nn_label_norm_file_list - - min_max_normaliser = None - - label_norm_file = file_paths.label_norm_file - - test_id_list = file_paths.test_id_list - - # Debug:---------------------------------- - if cfg.ACFTEXTR: - logger.info('acoustic feature extraction') - acous_feat_extraction(cfg.nat_wav_dir, file_id_list, cfg) - # generate_wav(gen_dir, file_id_list, cfg) # generated speech - - # ----------------------------------------- - - if cfg.NORMLAB: - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list, label_type=cfg.label_type) - - if cfg.additional_features: - out_feat_file_list = file_paths.out_feat_file_list - in_dim = label_normaliser.dimension - - for new_feature, new_feature_dim in cfg.additional_features.items(): - new_feat_dir = os.path.join(data_dir, new_feature) - new_feat_file_list = prepare_file_path_list( - file_id_list, new_feat_dir, '.'+new_feature) - - merger = MergeFeat(lab_dim=in_dim, feat_dim=new_feature_dim) - merger.merge_data(binary_label_file_list, - new_feat_file_list, out_feat_file_list) - in_dim += new_feature_dim - - binary_label_file_list = out_feat_file_list - - remover = SilenceRemover(n_cmp=lab_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type, - remove_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - remover.remove_silence(binary_label_file_list, - in_label_align_file_list, nn_label_file_list) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - - # use only training data to find min-max information, then apply on the whole dataset - if cfg.GenTestList: - min_max_normaliser.load_min_max_values(label_norm_file) - else: - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - - # enforce silence such that the normalization runs without removing silence: only for final synthesis - if cfg.GenTestList and cfg.enforce_silence: - min_max_normaliser.normalise_data( - binary_label_file_list, nn_label_norm_file_list) - else: - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if min_max_normaliser != None and not cfg.GenTestList: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output duration data - if cfg.MAKEDUR: - logger.info('creating duration (output) features') - label_normaliser.prepare_dur_data( - in_label_align_file_list, file_paths.dur_file_list, cfg.label_type, cfg.dur_feature_type) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = cfg.delta_win # [-0.5, 0.0, 0.5] - acc_win = cfg.acc_win # [1.0, -2.0, 1.0] - - if cfg.GenTestList: - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - test_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - nn_cmp_file_list = prepare_file_path_list( - test_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - test_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - if 'dur' in list(cfg.in_dir_dict.keys()) and cfg.AcousticModel: - lf0_file_list = file_paths.get_lf0_file_list() - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - acoustic_worker.make_equal_frames( - dur_file_list, lf0_file_list, cfg.in_dimension_dict) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - else: - perform_acoustic_composition( - delta_win, acc_win, in_file_list_dict, nn_cmp_file_list, cfg, parallel=True) - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature) - - elif cfg.remove_silence_using_hts_labels: - # back off to previous method using HTS labels: - remover = SilenceRemover(n_cmp=cfg.cmp_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type, - remove_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - remover.remove_silence( - nn_cmp_file_list, in_label_align_file_list, nn_cmp_file_list) # save to itself - - # save acoustic normalisation information for normalising the features back - var_dir = file_paths.var_dir - var_file_dict = file_paths.get_var_dic() - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - if cfg.GenTestList: - # load mean std values - global_mean_vector, global_std_vector = normaliser.load_mean_std_values( - norm_info_file) - else: - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - # for hmpd vocoder we don't need to normalize the - # pdd values - if cfg.vocoder_type == 'hmpd': - stream_start_index = {} - dimension_index = 0 - recorded_vuv = False - vuv_dimension = None - for feature_name in cfg.out_dimension_dict.keys(): - if feature_name != 'vuv': - stream_start_index[feature_name] = dimension_index - else: - vuv_dimension = dimension_index - recorded_vuv = True - - dimension_index += cfg.out_dimension_dict[feature_name] - logger.info( - 'hmpd pdd values are not normalized since they are in 0 to 1') - global_mean_vector[:, stream_start_index['pdd'] : stream_start_index['pdd'] + cfg.out_dimension_dict['pdd']] = 0 - global_std_vector[:, stream_start_index['pdd'] : stream_start_index['pdd'] + cfg.out_dimension_dict['pdd']] = 1 - normaliser.feature_normalisation( - nn_cmp_file_list, nn_cmp_norm_file_list) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - if cfg.GenTestList: - min_max_normaliser.load_min_max_values(norm_info_file) - else: - min_max_normaliser.find_min_max_values( - nn_cmp_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - if not cfg.GenTestList: - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_var_vector = feature_std_vector**2 - feature_var_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list, train_y_file_list = file_paths.get_train_list_x_y() - valid_x_file_list, valid_y_file_list = file_paths.get_valid_list_x_y() - test_x_file_list, test_y_file_list = file_paths.get_test_list_x_y() - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, add_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - add_feat_dim = sum(cfg.additional_features.values()) - lab_dim = label_normaliser.dimension + add_feat_dim + cfg.appended_input_dim - if cfg.VoiceConversion: - lab_dim = cfg.cmp_dim - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layer_size)) - for hid_size in hidden_layer_size: - combined_model_arch += '_' + str(hid_size) - - nnets_file_name = file_paths.get_nnets_file_name() - temp_dir_name = file_paths.get_temp_nn_dir_name() - - gen_dir = os.path.join(gen_dir, temp_dir_name) - - if cfg.switch_to_keras or cfg.switch_to_tensorflow: - ### set configuration variables ### - cfg.inp_dim = lab_dim - cfg.out_dim = cfg.cmp_dim - - cfg.inp_feat_dir = nn_label_norm_dir - cfg.out_feat_dir = nn_cmp_norm_dir - cfg.pred_feat_dir = gen_dir - - if cfg.GenTestList and cfg.test_synth_dir != "None": - cfg.inp_feat_dir = cfg.test_synth_dir - cfg.pred_feat_dir = cfg.test_synth_dir - - if cfg.switch_to_keras: - ### call kerasclass and use an instance ### - from run_keras_with_merlin_io import KerasClass - keras_instance = KerasClass(cfg) - - elif cfg.switch_to_tensorflow: - ### call Tensorflowclass and use an instance ### - from run_tensorflow_with_merlin_io import TensorflowClass - tf_instance = TensorflowClass(cfg) - - # DNN model training - if cfg.TRAINDNN: - - var_dict = load_covariance(var_file_dict, cfg.out_dimension_dict) - - logger.info('training DNN') - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_mean_vector = cmp_min_max[0, ] - cmp_std_vector = cmp_min_max[1, ] - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - if cfg.switch_to_keras: - keras_instance.train_keras_model() - elif cfg.switch_to_tensorflow: - tf_instance.train_tensorflow_model() - else: - train_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot, var_dict=var_dict, - cmp_mean_vector=cmp_mean_vector, cmp_std_vector=cmp_std_vector, init_dnn_model_file=cfg.start_from_trained_model) - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - if cfg.GENBNFEA: - # Please only tune on this step when you want to generate bottleneck features from DNN - gen_dir = file_paths.bottleneck_features - - bottleneck_size = min(hidden_layer_size) - bottleneck_index = 0 - for i in range(len(hidden_layer_size)): - if hidden_layer_size[i] == bottleneck_size: - bottleneck_index = i - - logger.info('generating bottleneck features from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_id_list = file_id_list[0:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - - dnn_hidden_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list, bottleneck_index) - - # generate parameters from DNN - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.GenTestList: - gen_file_id_list = test_id_list - test_x_file_list = nn_label_norm_file_list - if cfg.test_synth_dir != "None": - gen_dir = cfg.test_synth_dir - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - - if cfg.switch_to_keras: - keras_instance.test_keras_model() - elif cfg.switch_to_tensorflow: - tf_instance.test_tensorflow_model() - else: - reshape_io = True if cfg.rnn_batch_training else False - dnn_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list, reshape_io) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - if cfg.AcousticModel: - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration( - gen_wav_features=cfg.gen_wav_features, enforce_silence=cfg.enforce_silence) - generator.acoustic_decomposition(gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, - cfg.file_extension_dict, var_file_dict, do_MLPG=cfg.do_MLPG, cfg=cfg) - - if cfg.DurationModel: - ### Perform duration normalization(min. state dur set to 1) ### - gen_dur_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.dur_ext) - gen_label_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.lab_ext) - in_gen_label_align_file_list = prepare_file_path_list( - gen_file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - - generator = ParameterGeneration( - gen_wav_features=cfg.gen_wav_features) - generator.duration_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict) - - label_modifier = HTSLabelModification( - silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - label_modifier.modify_duration_labels( - in_gen_label_align_file_list, gen_dur_list, gen_label_list) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - generate_wav(gen_dir, gen_file_id_list, cfg) # generated speech -# generate_wav(nn_cmp_dir, gen_file_id_list, cfg) # reference copy synthesis speech - - ### setting back to original conditions before calculating objective scores ### - if cfg.GenTestList: - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - - # evaluation: RMSE and CORR for duration - if cfg.CALMCD and cfg.DurationModel: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(inter_data_dir, 'ref_data') - - ref_dur_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.dur_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['dur'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_dur_list, cfg.dur_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover(n_cmp=cfg.dur_dim, silence_pattern=cfg.silence_pattern, - label_type=cfg.label_type, remove_frame_features=cfg.add_frame_features) - remover.remove_silence(in_file_list_dict['dur'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_dur_list) - - valid_dur_rmse, valid_dur_corr = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.dur_ext, cfg.dur_dim) - test_dur_rmse, test_dur_corr = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.dur_ext, cfg.dur_dim) - - logger.info('Develop: DNN -- RMSE: %.3f frames/phoneme; CORR: %.3f; ' - % (valid_dur_rmse, valid_dur_corr)) - logger.info('Test: DNN -- RMSE: %.3f frames/phoneme; CORR: %.3f; ' - % (test_dur_rmse, test_dur_corr)) - - # evaluation: calculate distortion - if cfg.CALMCD and cfg.AcousticModel: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(inter_data_dir, 'ref_data') - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - # for straight or world vocoders - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - # for magphase vocoder - ref_mag_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mag_ext) - ref_real_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.real_ext) - ref_imag_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.imag_ext) - # for GlottDNN vocoder - ref_lsf_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lsf_ext) - ref_slsf_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.slsf_ext) - ref_gain_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.gain_ext) - ref_hnr_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.hnr_ext) - # for pulsemodel vocoder - ref_pdd_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.pdd_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - elif cfg.remove_silence_using_hts_labels: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - else: - ref_data_dir = os.path.join(data_dir, 'mgc') - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - elif cfg.remove_silence_using_hts_labels: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - else: - ref_data_dir = os.path.join(data_dir, 'bap') - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - valid_bap_mse = valid_bap_mse / 10.0 - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - elif cfg.remove_silence_using_hts_labels: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - else: - if cfg.vocoder_type == 'MAGPHASE': - ref_data_dir = os.path.join(data_dir, 'feats') - else: - ref_data_dir = os.path.join(data_dir, 'lf0') - valid_f0_mse, valid_f0_corr, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_f0_corr, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - if 'mag' in cfg.in_dimension_dict: - if cfg.remove_silence_using_hts_labels: - remover = SilenceRemover( - n_cmp=cfg.mag_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['mag'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mag_list) - else: - ref_data_dir = os.path.join(data_dir, 'feats') - valid_mag_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mag_ext, cfg.mag_dim) - test_mag_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mag_ext, cfg.mag_dim) - valid_mag_mse = 10.0*numpy.log10(valid_mag_mse) - test_mag_mse = 10.0*numpy.log10(test_mag_mse) - - if 'real' in cfg.in_dimension_dict: - if cfg.remove_silence_using_hts_labels: - remover = SilenceRemover( - n_cmp=cfg.real_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['real'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_real_list) - else: - ref_data_dir = os.path.join(data_dir, 'feats') - valid_real_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.real_ext, cfg.real_dim) - test_real_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.real_ext, cfg.real_dim) - valid_real_mse = 10.0*numpy.log10(valid_real_mse) - test_real_mse = 10.0*numpy.log10(test_real_mse) - - if 'imag' in cfg.in_dimension_dict: - if cfg.remove_silence_using_hts_labels: - remover = SilenceRemover( - n_cmp=cfg.imag_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['imag'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_imag_list) - else: - ref_data_dir = os.path.join(data_dir, 'feats') - valid_imag_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.imag_ext, cfg.imag_dim) - test_imag_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.imag_ext, cfg.imag_dim) - valid_imag_mse = 10.0*numpy.log10(valid_imag_mse) - test_imag_mse = 10.0*numpy.log10(test_imag_mse) - - if 'lsf' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lsf'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lsf_list, cfg.lsf_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lsf_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['lsf'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lsf_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lsf_ext, cfg.lsf_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lsf_ext, cfg.lsf_dim) - - if 'slsf' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['slsf'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_slsf_list, cfg.slsf_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.slsf_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['slsf'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_slsf_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.slsf_ext, cfg.slsf_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.slsf_ext, cfg.slsf_dim) - - if 'hnr' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['hnr'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_hnr_list, cfg.hnr_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.hnr_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['hnr'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_hnr_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.hnr_ext, cfg.hnr_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.hnr_ext, cfg.hnr_dim) - - if 'gain' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['gain'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_gain_list, cfg.gain_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.gain_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['gain'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_gain_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.gain_ext, cfg.gain_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.gain_ext, cfg.gain_dim) - - if 'pdd' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['pdd'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_pdd_list, cfg.pdd_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.pdd_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['pdd'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_pdd_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.pdd_ext, cfg.pdd_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.pdd_ext, cfg.pdd_dim) - - if cfg.vocoder_type == 'MAGPHASE': - logger.info('Develop: DNN -- MAG: %.3f dB; REAL: %.3f dB; IMAG: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (valid_mag_mse, valid_real_mse, valid_imag_mse, valid_f0_mse, valid_f0_corr, valid_vuv_error*100.)) - logger.info('Test : DNN -- MAG: %.3f dB; REAL: %.3f dB; IMAG: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (test_mag_mse, test_real_mse, test_imag_mse, test_f0_mse, test_f0_corr, test_vuv_error*100.)) - else: - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_f0_corr, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_f0_corr, test_vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_merlin.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - logger.info('Installation information:') - logger.info(' Merlin directory: '+os.path.abspath( - os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))) - logger.info(' PATH:') - env_PATHs = os.getenv('PATH') - if env_PATHs: - env_PATHs = env_PATHs.split(':') - for p in env_PATHs: - if len(p) > 0: - logger.info(' '+p) - logger.info(' LD_LIBRARY_PATH:') - env_LD_LIBRARY_PATHs = os.getenv('LD_LIBRARY_PATH') - if env_LD_LIBRARY_PATHs: - env_LD_LIBRARY_PATHs = env_LD_LIBRARY_PATHs.split(':') - for p in env_LD_LIBRARY_PATHs: - if len(p) > 0: - logger.info(' '+p) - logger.info(' Python version: '+sys.version.replace('\n', '')) - logger.info(' PYTHONPATH:') - env_PYTHONPATHs = os.getenv('PYTHONPATH') - if env_PYTHONPATHs: - env_PYTHONPATHs = env_PYTHONPATHs.split(':') - for p in env_PYTHONPATHs: - if len(p) > 0: - logger.info(' '+p) - logger.info(' Numpy version: '+numpy.version.version) - logger.info(' Theano version: '+theano.version.version) - logger.info(' THEANO_FLAGS: '+os.getenv('THEANO_FLAGS')) - logger.info(' device: '+theano.config.device) - - # Check for the presence of git - ret = os.system('git status > /dev/null') - if ret == 0: - logger.info(' Git is available in the working directory:') - git_describe = subprocess.Popen( - ['git', 'describe', '--tags', '--always'], stdout=subprocess.PIPE).communicate()[0][:-1] - logger.info(' Merlin version: {}'.format(git_describe)) - git_branch = subprocess.Popen( - ['git', 'rev-parse', '--abbrev-ref', 'HEAD'], stdout=subprocess.PIPE).communicate()[0][:-1] - logger.info(' branch: {}'.format(git_branch)) - git_diff = subprocess.Popen( - ['git', 'diff', '--name-status'], stdout=subprocess.PIPE).communicate()[0] - if sys.version_info.major >= 3: - git_diff = git_diff.decode('utf-8') - git_diff = git_diff.replace('\t', ' ').split('\n') - logger.info(' diff to Merlin version:') - for filediff in git_diff: - if len(filediff) > 0: - logger.info(' '+filediff) - logger.info(' (all diffs logged in ' + - os.path.basename(cfg.log_file)+'.gitdiff'+')') - os.system('git diff > '+cfg.log_file+'.gitdiff') - - logger.info('Execution information:') - logger.info(' HOSTNAME: '+socket.getfqdn()) - logger.info(' USER: '+os.getenv('USER')) - logger.info(' PID: '+str(os.getpid())) - PBS_JOBID = os.getenv('PBS_JOBID') - if PBS_JOBID: - logger.info(' PBS_JOBID: '+PBS_JOBID) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - - sys.exit(0) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -import subprocess -import socket # only for socket.getfqdn() - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -#import gnumpy as gnp -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer -from frontend.label_modifier import HTSLabelModification -from frontend.merge_features import MergeFeat - -import configuration -from models.deep_rnn import DeepRecurrentNetwork -from models.hed_rnn import DeepEncoderDecoderNetwork - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - -from io_funcs.binary_io import BinaryIOCollection - -# our custom logging class that can also plot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io -from utils.file_paths import FilePaths -from utils.utils import read_file_list, prepare_file_path_list - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def visualize_dnn(dnn): - - plotlogger = logging.getLogger("plotting") - - # reference activation weights in layers - W = list() - layer_name = list() - for i in range(len(dnn.params)): - aa = dnn.params[i].get_value(borrow=True).T - print(aa.shape, aa.size) - if aa.size > aa.shape[0]: - W.append(aa) - layer_name.append(dnn.params[i].name) - - # plot activation weights including input and output - layer_num = len(W) - for i_layer in range(layer_num): - fig_name = 'Activation weights W' + \ - str(i_layer) + '_' + layer_name[i_layer] - fig_title = 'Activation weights of W' + str(i_layer) - xlabel = 'Neuron index of hidden layer ' + str(i_layer) - ylabel = 'Neuron index of hidden layer ' + str(i_layer+1) - if i_layer == 0: - xlabel = 'Input feature index' - if i_layer == layer_num-1: - ylabel = 'Output feature index' - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point(fig_name, fig_name, W[i_layer]) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def load_covariance(var_file_dict, out_dimension_dict): - var = {} - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - - var_values = numpy.reshape( - var_values, (out_dimension_dict[feature_name], 1)) - - var[feature_name] = var_values - - return var - - -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None, - cmp_mean_vector=None, cmp_std_vector=None, seq_dur_file_list=None, init_dnn_model_file=None): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layer_size = hyper_params['hidden_layer_size'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - model_type = hyper_params['model_type'] - hidden_layer_type = hyper_params['hidden_layer_type'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - - sequential_training = hyper_params['sequential_training'] - dropout_rate = hyper_params['dropout_rate'] - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - if cfg.network_type != 'S2S': - seq_dur_file_list = None - - if not seq_dur_file_list: - train_dur_file_list = None - valid_dur_file_list = None - else: - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, subphone_feats="coarse_coding") - train_dur_file_list = seq_dur_file_list[0:cfg.train_file_number] - valid_dur_file_list = seq_dur_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - - logger.debug('Creating training data provider') - train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, dur_file_list=train_dur_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, - sequential=sequential_training, network_type=cfg.network_type, shuffle=True) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, dur_file_list=valid_dur_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, - sequential=sequential_training, network_type=cfg.network_type, shuffle=False) - - if cfg.rnn_batch_training: - train_data_reader.set_rnn_params(training_algo=cfg.training_algo, batch_size=cfg.batch_size, - seq_length=cfg.seq_length, merge_size=cfg.merge_size, bucket_range=cfg.bucket_range) - valid_data_reader.reshape_input_output() - - if cfg.network_type == 'S2S': - MLU_div = train_data_reader.set_s2s_division(cfg.linguistic_file_name) - MLU_div = valid_data_reader.set_s2s_division(cfg.linguistic_file_name) - - if cfg.network_type == 'S2SD': - shared_train_set_xyd, temp_train_set_x, temp_train_set_y, temp_train_set_d = train_data_reader.load_one_partition() - shared_valid_set_xyd, temp_valid_set_x, temp_valid_set_y, temp_valid_set_d = valid_data_reader.load_one_partition() - train_set_x, train_set_y, train_set_d = shared_train_set_xyd - valid_set_x, valid_set_y, valid_set_d = shared_valid_set_xyd - - temp_train_set_f = label_normaliser.extract_durational_features( - dur_data=temp_train_set_d) - temp_valid_set_f = label_normaliser.extract_durational_features( - dur_data=temp_valid_set_d) - train_set_f = theano.shared(numpy.asarray( - temp_train_set_f, dtype=theano.config.floatX), name='f', borrow=True) - valid_set_f = theano.shared(numpy.asarray( - temp_valid_set_f, dtype=theano.config.floatX), name='f', borrow=True) - elif cfg.network_type == 'S2S': - shared_train_set_xyd, temp_train_set_x, temp_train_set_y, temp_train_set_d, temp_train_set_af = train_data_reader.load_one_partition() - shared_valid_set_xyd, temp_valid_set_x, temp_valid_set_y, temp_valid_set_d, temp_valid_set_af = valid_data_reader.load_one_partition() - train_set_x, train_set_y, train_set_d = shared_train_set_xyd - valid_set_x, valid_set_y, valid_set_d = shared_valid_set_xyd - - ### extract phone duration array for frame features ### - [num_train_words, n_ins] = temp_train_set_x.shape - num_train_syl = sum(temp_train_set_d[0: num_train_words]) - - [num_valid_words, n_ins] = temp_valid_set_x.shape - num_valid_syl = sum(temp_valid_set_d[0: num_valid_words]) - - temp_train_ph_dur_data = temp_train_set_d[num_train_words+num_train_syl:] - temp_valid_ph_dur_data = temp_valid_set_d[num_valid_words+num_valid_syl:] - - temp_train_set_f = label_normaliser.extract_durational_features( - dur_data=temp_train_ph_dur_data) - temp_valid_set_f = label_normaliser.extract_durational_features( - dur_data=temp_valid_ph_dur_data) - temp_train_set_af[:, -4:] = temp_train_set_f - temp_valid_set_af[:, -4:] = temp_valid_set_f - train_set_f = theano.shared(numpy.asarray( - temp_train_set_af, dtype=theano.config.floatX), name='f', borrow=True) - valid_set_f = theano.shared(numpy.asarray( - temp_valid_set_af, dtype=theano.config.floatX), name='f', borrow=True) - else: - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() - train_set_x, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition() - valid_set_x, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - if cfg.network_type == 'S2S': - dnn_model = DeepEncoderDecoderNetwork(n_in=n_ins, hidden_layer_size=hidden_layer_size, n_out=n_outs, - L1_reg=l1_reg, L2_reg=l2_reg, hidden_layer_type=hidden_layer_type, output_type=cfg.output_layer_type, - network_type=cfg.network_type, ed_type='HED', MLU_div_lengths=MLU_div['length'], - dropout_rate=dropout_rate, optimizer=cfg.optimizer, rnn_batch_training=cfg.rnn_batch_training) - else: - dnn_model = DeepRecurrentNetwork(n_in=n_ins, hidden_layer_size=hidden_layer_size, n_out=n_outs, - L1_reg=l1_reg, L2_reg=l2_reg, hidden_layer_type=hidden_layer_type, output_type=cfg.output_layer_type, - dropout_rate=dropout_rate, optimizer=cfg.optimizer, rnn_batch_training=cfg.rnn_batch_training) - - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - # Model adaptation -- fine tuning the existing model - # We can't just unpickle the old model and use that because fine-tune functions - # depend on opt_l2e option used in construction of initial model. One way around this - # would be to unpickle, manually set unpickled_dnn_model.opt_l2e=True and then call - # unpickled_dnn_model.build_finetne_function() again. This is another way, construct - # new model from scratch with opt_l2e=True, then copy existing weights over: - use_lhuc = cfg.use_lhuc - if init_dnn_model_file != "_": - logger.info('load parameters from existing model: %s' % - (init_dnn_model_file)) - if not os.path.isfile(init_dnn_model_file): - sys.exit('Model file %s does not exist' % (init_dnn_model_file)) - existing_dnn_model = pickle.load(open(init_dnn_model_file, 'rb')) - if not use_lhuc and not len(existing_dnn_model.params) == len(dnn_model.params): - sys.exit('Old and new models have different numbers of weight matrices') - elif use_lhuc and len(dnn_model.params) < len(existing_dnn_model.params): - sys.exit( - 'In LHUC adaptation new model must have more parameters than old model.') - # assign the existing dnn model parameters to the new dnn model - k = 0 - for i in range(len(dnn_model.params)): - ## Added for LHUC ## - # In LHUC, we keep all the old parameters intact and learn only a small set of new - # parameters - if dnn_model.params[i].name == 'c': - continue - else: - old_val = existing_dnn_model.params[k].get_value() - new_val = dnn_model.params[i].get_value() - if numpy.shape(old_val) == numpy.shape(new_val): - dnn_model.params[i].set_value(old_val) - else: - sys.exit('old and new weight matrices have different shapes') - k = k + 1 - - if cfg.network_type == 'S2S': - train_fn, valid_fn = dnn_model.build_finetune_functions_S2SPF( - (train_set_x, train_set_y, train_set_d, train_set_f), (valid_set_x, valid_set_y, valid_set_d, valid_set_f)) - else: - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), use_lhuc) # , batch_size=batch_size - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.time() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - lr_decay = cfg.lr_decay - if lr_decay > 0: - early_stop_epoch *= lr_decay - - early_stop = 0 - val_loss_counter = 0 - - previous_finetune_lr = finetune_lr - - epoch = 0 - while (epoch < training_epochs): - epoch = epoch + 1 - - if lr_decay == 0: - # fixed learning rate - reduce_lr = False - elif lr_decay < 0: - # exponential decay - reduce_lr = False if epoch <= warmup_epoch else True - elif val_loss_counter > 0: - # linear decay - reduce_lr = False - if val_loss_counter % lr_decay == 0: - reduce_lr = True - val_loss_counter = 0 - else: - # no decay - reduce_lr = False - - if reduce_lr: - current_finetune_lr = previous_finetune_lr * 0.5 - current_momentum = momentum - else: - current_finetune_lr = previous_finetune_lr - current_momentum = warmup_momentum - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.time() - - logger.debug("training params -- learning rate: %f, early_stop: %d/%d" % - (current_finetune_lr, early_stop, early_stop_epoch)) - while (not train_data_reader.is_finish()): - - if cfg.network_type == 'S2SD': - shared_train_set_xyd, temp_train_set_x, temp_train_set_y, temp_train_set_d = train_data_reader.load_one_partition() - temp_train_set_f = label_normaliser.extract_durational_features( - dur_data=temp_train_set_d) - train_set_d.set_value(numpy.asarray( - temp_train_set_d, dtype='int32'), borrow=True) - train_set_f.set_value(numpy.asarray( - temp_train_set_f, dtype=theano.config.floatX), borrow=True) - elif cfg.network_type == 'S2S': - shared_train_set_xyd, temp_train_set_x, temp_train_set_y, temp_train_set_d, temp_train_set_af = train_data_reader.load_one_partition() - [num_train_words, n_ins] = temp_train_set_x.shape - num_train_syl = sum(temp_train_set_d[0: num_train_words]) - temp_train_ph_dur_data = temp_train_set_d[num_train_words+num_train_syl:] - temp_train_set_f = label_normaliser.extract_durational_features( - dur_data=temp_train_ph_dur_data) - temp_train_set_af[:, -4:] = temp_train_set_f - train_set_d.set_value(numpy.asarray( - temp_train_set_d, dtype='int32'), borrow=True) - train_set_f.set_value(numpy.asarray( - temp_train_set_af, dtype=theano.config.floatX), borrow=True) - else: - _, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() - - # if sequential training, the batch size will be the number of frames in an utterance - # batch_size for sequential training is considered only when rnn_batch_training is set to True - if sequential_training == True: - batch_size = temp_train_set_x.shape[0] - - n_train_batches = temp_train_set_x.shape[0] // batch_size - for index in range(n_train_batches): - # send a batch to the shared variable, rather than pass the batch size and batch index to the finetune function - train_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - train_set_y.set_value(numpy.asarray( - temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - this_train_error = train_fn( - current_finetune_lr, current_momentum) - - train_error.append(this_train_error) - - train_data_reader.reset() - - logger.debug('calculating validation loss') - validation_losses = [] - while (not valid_data_reader.is_finish()): - - if cfg.network_type == 'S2SD': - shared_valid_set_xyd, temp_valid_set_x, temp_valid_set_y, temp_valid_set_d = valid_data_reader.load_one_partition() - temp_valid_set_f = label_normaliser.extract_durational_features( - dur_data=temp_valid_set_d) - valid_set_d.set_value(numpy.asarray( - temp_valid_set_d, dtype='int32'), borrow=True) - valid_set_f.set_value(numpy.asarray( - temp_valid_set_f, dtype=theano.config.floatX), borrow=True) - elif cfg.network_type == 'S2S': - shared_valid_set_xyd, temp_valid_set_x, temp_valid_set_y, temp_valid_set_d, temp_valid_set_af = valid_data_reader.load_one_partition() - [num_valid_words, n_ins] = temp_valid_set_x.shape - num_valid_syl = sum(temp_valid_set_d[0: num_valid_words]) - temp_valid_ph_dur_data = temp_valid_set_d[num_valid_words+num_valid_syl:] - temp_valid_set_f = label_normaliser.extract_durational_features( - dur_data=temp_valid_ph_dur_data) - temp_valid_set_af[:, -4:] = temp_valid_set_f - valid_set_d.set_value(numpy.asarray( - temp_valid_set_d, dtype='int32'), borrow=True) - valid_set_f.set_value(numpy.asarray( - temp_valid_set_af, dtype=theano.config.floatX), borrow=True) - else: - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition() - valid_set_x.set_value(numpy.asarray( - temp_valid_set_x, dtype=theano.config.floatX), borrow=True) - valid_set_y.set_value(numpy.asarray( - temp_valid_set_y, dtype=theano.config.floatX), borrow=True) - - this_valid_loss = valid_fn() - - validation_losses.append(this_valid_loss) - valid_data_reader.reset() - - this_validation_loss = numpy.mean(validation_losses) - - this_train_valid_loss = numpy.mean(numpy.asarray(train_error)) - - sub_end_time = time.time() - - loss_difference = this_validation_loss - previous_loss - - logger.info('epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss - - if this_validation_loss >= previous_loss: - logger.debug('validation loss increased') - val_loss_counter += 1 - early_stop += 1 - - if epoch > 15 and early_stop > early_stop_epoch: - logger.debug('stopping early') - break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - end_time = time.time() - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list, reshape_io=False): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size // n_ins))] - test_set_x = features.reshape((-1, n_ins)) - n_rows = test_set_x.shape[0] - - if reshape_io: - test_set_x = numpy.reshape( - test_set_x, (1, test_set_x.shape[0], n_ins)) - test_set_x = numpy.array(test_set_x, 'float32') - - predicted_parameter = dnn_model.parameter_prediction(test_set_x) - predicted_parameter = predicted_parameter.reshape(-1, n_outs) - predicted_parameter = predicted_parameter[0:n_rows] - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def dnn_generation_S2S(valid_file_list, valid_dur_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, subphone_feats="coarse_coding") - for i in xrange(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_x = features.reshape((-1, n_ins)) - - fid_lab = open(valid_dur_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - test_set_d = features.astype(numpy.int32) - - dur_features = label_normaliser.extract_durational_features( - dur_data=test_set_d) - test_set_f = dur_features.astype(numpy.float32) - - predicted_parameter = dnn_model.parameter_prediction_S2SPF( - test_set_x, test_set_d, test_set_f) - - # print b_indices - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def dnn_generation_S2SML(valid_file_list, valid_dur_file_list, nnets_file_name, n_ins, n_outs, MLU_div, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, subphone_feats="coarse_coding") - for i in xrange(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_MLU = features.reshape((-1, n_ins)) - - fid_lab = open(valid_dur_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - test_set_d = features.astype(numpy.int32) - - ### MLU features sub-division ### - test_set_phone = numpy.concatenate([test_set_MLU[:, MLU_div['phone'][0]: MLU_div['phone'][1]], - test_set_MLU[:, MLU_div['phone'][2]: MLU_div['phone'][3]]], axis=1) - test_set_syl = numpy.concatenate( - [test_set_MLU[:, MLU_div['syl'][0]: MLU_div['syl'][1]], test_set_MLU[:, MLU_div['syl'][2]: MLU_div['syl'][3]]], axis=1) - test_set_word = numpy.concatenate([test_set_MLU[:, MLU_div['word'][0]: MLU_div['word'][1]], - test_set_MLU[:, MLU_div['word'][2]: MLU_div['word'][3]]], axis=1) - - ### duration array sub-division ### - num_ph = len(test_set_MLU) - - dur_word_syl = test_set_d[0: -num_ph] - - num_syl = (numpy.where(numpy.cumsum( - dur_word_syl[::-1]) == num_ph)[0][0] + 1) - num_words = len(dur_word_syl) - num_syl - - test_set_dur_phone = test_set_d[-num_ph:] - test_set_dur_word = dur_word_syl[0: num_words] - test_set_dur_syl = dur_word_syl[num_words:] - - ### additional feature matrix (syllable+phone+frame) ### - num_frames = sum(test_set_dur_phone) - test_set_af = numpy.empty((num_frames, MLU_div['length'][-1])) - - test_set_af[0: num_syl, MLU_div['length'][0]: MLU_div['length'] - [1]] = test_set_syl[numpy.cumsum(test_set_dur_syl)-1] - test_set_af[0: num_ph, MLU_div['length'][1] - : MLU_div['length'][2]] = test_set_phone - - ### input word feature matrix ### - test_set_dur_word_segments = numpy.zeros(num_words, dtype='int32') - syl_bound = numpy.cumsum(test_set_dur_word) - for indx in xrange(num_words): - test_set_dur_word_segments[indx] = int( - sum(test_set_dur_syl[0: syl_bound[indx]])) - test_set_x = test_set_word[test_set_dur_word_segments-1] - - dur_features = label_normaliser.extract_durational_features( - dur_data=test_set_dur_phone) - test_set_af[:, -4:] = dur_features - test_set_f = test_set_af.astype(numpy.float32) - - predicted_parameter = dnn_model.parameter_prediction_S2SPF( - test_set_x, test_set_d, test_set_f) - - # print b_indices - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - -# generate bottleneck layer as features - - -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list, bottleneck_index): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size // n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_hidden_layer( - test_set_x, bottleneck_index) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg): - file_paths = FilePaths(cfg) - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - # create plot dir if set to True - if not os.path.exists(cfg.plot_dir) and cfg.plot: - os.makedirs(cfg.plot_dir) - - #### parameter setting######## - hidden_layer_size = cfg.hyper_params['hidden_layer_size'] - - # prepare environment - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - assert cfg.train_file_number+cfg.valid_file_number + \ - cfg.test_file_number == total_file_number, 'check train, valid, test file number' - - data_dir = cfg.data_dir - - inter_data_dir = cfg.inter_data_dir - nn_cmp_dir = file_paths.nn_cmp_dir - nn_cmp_norm_dir = file_paths.nn_cmp_norm_dir - model_dir = file_paths.model_dir - gen_dir = file_paths.gen_dir - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = file_paths.get_nn_cmp_file_list() - nn_cmp_norm_file_list = file_paths.get_nn_cmp_norm_file_list() - - # normalisation information - norm_info_file = file_paths.norm_info_file - - # normalise input full context label - # currently supporting two different forms of lingustic features - # later, we should generalise this - - assert cfg.label_style == 'HTS', 'Only HTS-style labels are now supported as input to Merlin' - - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, add_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - add_feat_dim = sum(cfg.additional_features.values()) - lab_dim = label_normaliser.dimension + add_feat_dim + cfg.appended_input_dim - if cfg.VoiceConversion: - lab_dim = cfg.cmp_dim - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - - if cfg.process_labels_in_work_dir: - inter_data_dir = cfg.work_dir - - # the number can be removed - file_paths.set_label_dir(label_normaliser.dimension, suffix, lab_dim) - file_paths.set_label_file_list() - - binary_label_dir = file_paths.binary_label_dir - nn_label_dir = file_paths.nn_label_dir - nn_label_norm_dir = file_paths.nn_label_norm_dir - - in_label_align_file_list = file_paths.in_label_align_file_list - binary_label_file_list = file_paths.binary_label_file_list - nn_label_file_list = file_paths.nn_label_file_list - nn_label_norm_file_list = file_paths.nn_label_norm_file_list - - min_max_normaliser = None - - label_norm_file = file_paths.label_norm_file - - test_id_list = file_paths.test_id_list - - if cfg.NORMLAB: - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list, label_type=cfg.label_type) - - if cfg.additional_features: - out_feat_file_list = file_paths.out_feat_file_list - in_dim = label_normaliser.dimension - - for new_feature, new_feature_dim in cfg.additional_features.items(): - new_feat_dir = os.path.join(data_dir, new_feature) - new_feat_file_list = prepare_file_path_list( - file_id_list, new_feat_dir, '.'+new_feature) - - merger = MergeFeat(lab_dim=in_dim, feat_dim=new_feature_dim) - merger.merge_data(binary_label_file_list, - new_feat_file_list, out_feat_file_list) - in_dim += new_feature_dim - - binary_label_file_list = out_feat_file_list - - remover = SilenceRemover(n_cmp=lab_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type, - remove_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - remover.remove_silence(binary_label_file_list, - in_label_align_file_list, nn_label_file_list) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - - # use only training data to find min-max information, then apply on the whole dataset - if cfg.GenTestList: - min_max_normaliser.load_min_max_values(label_norm_file) - else: - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - - # enforce silence such that the normalization runs without removing silence: only for final synthesis - if cfg.GenTestList and cfg.enforce_silence: - min_max_normaliser.normalise_data( - binary_label_file_list, nn_label_norm_file_list) - else: - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - ### make duration data for S2S network ### - if cfg.network_type == "S2S": - logger.info('creating duration (input) features for S2S network') - label_normaliser.prepare_dur_data(in_label_align_file_list, file_paths.seq_dur_file_list, cfg.label_type, - feature_type=cfg.dur_feature_type, unit_size=cfg.dur_unit_size, feat_size=cfg.dur_feat_size) - - if cfg.remove_silence_from_dur: - remover = SilenceRemover(n_cmp=cfg.seq_dur_dim, silence_pattern=cfg.silence_pattern, - label_type=cfg.label_type, remove_frame_features=False) - remover.remove_silence( - file_paths.seq_dur_file_list, in_label_align_file_list, file_paths.seq_dur_file_list) - - if min_max_normaliser != None and not cfg.GenTestList: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output duration data - if cfg.MAKEDUR: - logger.info('creating duration (output) features') - label_normaliser.prepare_dur_data( - in_label_align_file_list, file_paths.dur_file_list, cfg.label_type, cfg.dur_feature_type) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = cfg.delta_win # [-0.5, 0.0, 0.5] - acc_win = cfg.acc_win # [1.0, -2.0, 1.0] - - if cfg.GenTestList: - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - test_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - nn_cmp_file_list = prepare_file_path_list( - test_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - test_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - - if 'dur' in list(cfg.in_dir_dict.keys()) and cfg.AcousticModel: - lf0_file_list = file_paths.get_lf0_file_list() - acoustic_worker.make_equal_frames( - dur_file_list, lf0_file_list, cfg.in_dimension_dict) - - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature) - - elif cfg.remove_silence_using_hts_labels: - # back off to previous method using HTS labels: - remover = SilenceRemover(n_cmp=cfg.cmp_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type, - remove_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - remover.remove_silence( - nn_cmp_file_list, in_label_align_file_list, nn_cmp_file_list) # save to itself - - # save acoustic normalisation information for normalising the features back - var_dir = file_paths.var_dir - var_file_dict = file_paths.get_var_dic() - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - if cfg.GenTestList: - # load mean std values - global_mean_vector, global_std_vector = normaliser.load_mean_std_values( - norm_info_file) - else: - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - # for hmpd vocoder we don't need to normalize the - # pdd values - if cfg.vocoder_type == 'hmpd': - stream_start_index = {} - dimension_index = 0 - recorded_vuv = False - vuv_dimension = None - for feature_name in cfg.out_dimension_dict.keys(): - if feature_name != 'vuv': - stream_start_index[feature_name] = dimension_index - else: - vuv_dimension = dimension_index - recorded_vuv = True - - dimension_index += cfg.out_dimension_dict[feature_name] - logger.info( - 'hmpd pdd values are not normalized since they are in 0 to 1') - global_mean_vector[:, stream_start_index['pdd']: stream_start_index['pdd'] + cfg.out_dimension_dict['pdd']] = 0 - global_std_vector[:, stream_start_index['pdd']: stream_start_index['pdd'] + cfg.out_dimension_dict['pdd']] = 1 - normaliser.feature_normalisation( - nn_cmp_file_list, nn_cmp_norm_file_list) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - if cfg.GenTestList: - min_max_normaliser.load_min_max_values(norm_info_file) - else: - min_max_normaliser.find_min_max_values( - nn_cmp_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - if not cfg.GenTestList: - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_var_vector = feature_std_vector**2 - feature_var_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list, train_y_file_list = file_paths.get_train_list_x_y() - valid_x_file_list, valid_y_file_list = file_paths.get_valid_list_x_y() - test_x_file_list, test_y_file_list = file_paths.get_test_list_x_y() - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, add_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - add_feat_dim = sum(cfg.additional_features.values()) - lab_dim = label_normaliser.dimension + add_feat_dim + cfg.appended_input_dim - - if cfg.VoiceConversion: - lab_dim = cfg.cmp_dim - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layer_size)) - for hid_size in hidden_layer_size: - combined_model_arch += '_' + str(hid_size) - - nnets_file_name = file_paths.get_nnets_file_name() - temp_dir_name = file_paths.get_temp_nn_dir_name() - - gen_dir = os.path.join(gen_dir, temp_dir_name) - - if cfg.switch_to_keras or cfg.switch_to_tensorflow: - ### set configuration variables ### - cfg.inp_dim = lab_dim - cfg.out_dim = cfg.cmp_dim - - cfg.inp_feat_dir = nn_label_norm_dir - cfg.out_feat_dir = nn_cmp_norm_dir - cfg.pred_feat_dir = gen_dir - - if cfg.GenTestList and cfg.test_synth_dir != "None": - cfg.inp_feat_dir = cfg.test_synth_dir - cfg.pred_feat_dir = cfg.test_synth_dir - - if cfg.switch_to_keras: - ### call kerasclass and use an instance ### - from run_keras_with_merlin_io import KerasClass - keras_instance = KerasClass(cfg) - - elif cfg.switch_to_tensorflow: - ### call Tensorflowclass and use an instance ### - from run_tensorflow_with_merlin_io import TensorflowClass - tf_instance = TensorflowClass(cfg) - - # DNN model training - if cfg.TRAINDNN: - - var_dict = load_covariance(var_file_dict, cfg.out_dimension_dict) - - logger.info('training DNN') - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_mean_vector = cmp_min_max[0, ] - cmp_std_vector = cmp_min_max[1, ] - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - if cfg.switch_to_keras: - keras_instance.train_keras_model() - elif cfg.switch_to_tensorflow: - tf_instance.train_tensorflow_model() - else: - train_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot, var_dict=var_dict, - cmp_mean_vector=cmp_mean_vector, cmp_std_vector=cmp_std_vector, seq_dur_file_list=file_paths.seq_dur_file_list, init_dnn_model_file=cfg.start_from_trained_model) - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - if cfg.GENBNFEA: - # Please only tune on this step when you want to generate bottleneck features from DNN - gen_dir = file_paths.bottleneck_features - - bottleneck_size = min(hidden_layer_size) - bottleneck_index = 0 - for i in range(len(hidden_layer_size)): - if hidden_layer_size[i] == bottleneck_size: - bottleneck_index = i - - logger.info('generating bottleneck features from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_id_list = file_id_list[0:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - - dnn_hidden_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list, bottleneck_index) - - # generate parameters from DNN - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_d_file_list = file_paths.seq_dur_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.GenTestList: - gen_file_id_list = test_id_list - test_x_file_list = nn_label_norm_file_list - if cfg.test_synth_dir != "None": - gen_dir = cfg.test_synth_dir - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - - if cfg.switch_to_keras: - keras_instance.test_keras_model() - elif cfg.switch_to_tensorflow: - tf_instance.test_tensorflow_model() - else: - reshape_io = True if cfg.rnn_batch_training else False - if cfg.network_type == "S2SD": - dnn_generation_S2S(test_x_file_list, test_d_file_list, - nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list) - elif cfg.network_type == "S2S": - test_data_reader = ListDataProvider( - x_file_list=test_x_file_list, y_file_list=test_y_file_list, dur_file_list=test_d_file_list) - MLU_div = test_data_reader.set_s2s_division( - cfg.linguistic_file_name) - dnn_generation_S2SML(test_x_file_list, test_d_file_list, - nnets_file_name, lab_dim, cfg.cmp_dim, MLU_div, gen_file_list) - else: - dnn_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list, reshape_io) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - if cfg.AcousticModel: - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration( - gen_wav_features=cfg.gen_wav_features, enforce_silence=cfg.enforce_silence) - generator.acoustic_decomposition(gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, - cfg.file_extension_dict, var_file_dict, do_MLPG=cfg.do_MLPG, cfg=cfg) - - if cfg.DurationModel: - ### Perform duration normalization(min. state dur set to 1) ### - gen_dur_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.dur_ext) - gen_label_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.lab_ext) - in_gen_label_align_file_list = prepare_file_path_list( - gen_file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - - generator = ParameterGeneration( - gen_wav_features=cfg.gen_wav_features) - generator.duration_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict) - - label_modifier = HTSLabelModification( - silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - label_modifier.modify_duration_labels( - in_gen_label_align_file_list, gen_dur_list, gen_label_list) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - generate_wav(gen_dir, gen_file_id_list, cfg) # generated speech -# generate_wav(nn_cmp_dir, gen_file_id_list, cfg) # reference copy synthesis speech - - ### setting back to original conditions before calculating objective scores ### - if cfg.GenTestList: - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - - # evaluation: RMSE and CORR for duration - if cfg.CALMCD and cfg.DurationModel: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(inter_data_dir, 'ref_data') - - ref_dur_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.dur_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['dur'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_dur_list, cfg.dur_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover(n_cmp=cfg.dur_dim, silence_pattern=cfg.silence_pattern, - label_type=cfg.label_type, remove_frame_features=cfg.add_frame_features) - remover.remove_silence(in_file_list_dict['dur'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_dur_list) - - valid_dur_rmse, valid_dur_corr = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.dur_ext, cfg.dur_dim) - test_dur_rmse, test_dur_corr = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.dur_ext, cfg.dur_dim) - - logger.info('Develop: DNN -- RMSE: %.3f frames/phoneme; CORR: %.3f; ' - % (valid_dur_rmse, valid_dur_corr)) - logger.info('Test: DNN -- RMSE: %.3f frames/phoneme; CORR: %.3f; ' - % (test_dur_rmse, test_dur_corr)) - - # evaluation: calculate distortion - if cfg.CALMCD and cfg.AcousticModel: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(inter_data_dir, 'ref_data') - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - # for straight or world vocoders - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - # for GlottDNN vocoder - ref_lsf_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lsf_ext) - ref_slsf_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.slsf_ext) - ref_gain_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.gain_ext) - ref_hnr_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.hnr_ext) - # for pulsemodel vocoder - ref_pdd_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.pdd_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - elif cfg.remove_silence_using_hts_labels: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - else: - ref_data_dir = os.path.join(data_dir, 'mgc') - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - elif cfg.remove_silence_using_hts_labels: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - else: - ref_data_dir = os.path.join(data_dir, 'bap') - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - valid_bap_mse = valid_bap_mse / 10.0 - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - elif cfg.remove_silence_using_hts_labels: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - else: - ref_data_dir = os.path.join(data_dir, 'lf0') - valid_f0_mse, valid_f0_corr, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_f0_corr, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - if 'lsf' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lsf'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lsf_list, cfg.lsf_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lsf_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['lsf'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lsf_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lsf_ext, cfg.lsf_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lsf_ext, cfg.lsf_dim) - - if 'slsf' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['slsf'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_slsf_list, cfg.slsf_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.slsf_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['slsf'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_slsf_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.slsf_ext, cfg.slsf_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.slsf_ext, cfg.slsf_dim) - - if 'hnr' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['hnr'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_hnr_list, cfg.hnr_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.hnr_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['hnr'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_hnr_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.hnr_ext, cfg.hnr_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.hnr_ext, cfg.hnr_dim) - - if 'gain' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['gain'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_gain_list, cfg.gain_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.gain_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['gain'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_gain_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.gain_ext, cfg.gain_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.gain_ext, cfg.gain_dim) - - if 'pdd' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['pdd'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_pdd_list, cfg.pdd_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.pdd_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['pdd'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_pdd_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.pdd_ext, cfg.pdd_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.pdd_ext, cfg.pdd_dim) - - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_f0_corr, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_f0_corr, test_vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_merlin.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - logger.info('Installation information:') - logger.info(' Merlin directory: '+os.path.abspath( - os.path.join(os.path.dirname(os.path.realpath(__file__)), os.pardir))) - logger.info(' PATH:') - env_PATHs = os.getenv('PATH') - if env_PATHs: - env_PATHs = env_PATHs.split(':') - for p in env_PATHs: - if len(p) > 0: - logger.info(' '+p) - logger.info(' LD_LIBRARY_PATH:') - env_LD_LIBRARY_PATHs = os.getenv('LD_LIBRARY_PATH') - if env_LD_LIBRARY_PATHs: - env_LD_LIBRARY_PATHs = env_LD_LIBRARY_PATHs.split(':') - for p in env_LD_LIBRARY_PATHs: - if len(p) > 0: - logger.info(' '+p) - logger.info(' Python version: '+sys.version.replace('\n', '')) - logger.info(' PYTHONPATH:') - env_PYTHONPATHs = os.getenv('PYTHONPATH') - if env_PYTHONPATHs: - env_PYTHONPATHs = env_PYTHONPATHs.split(':') - for p in env_PYTHONPATHs: - if len(p) > 0: - logger.info(' '+p) - logger.info(' Numpy version: '+numpy.version.version) - logger.info(' Theano version: '+theano.version.version) - logger.info(' THEANO_FLAGS: '+os.getenv('THEANO_FLAGS')) - logger.info(' device: '+theano.config.device) - - # Check for the presence of git - ret = os.system('git status > /dev/null') - if ret == 0: - logger.info(' Git is available in the working directory:') - git_describe = subprocess.Popen( - ['git', 'describe', '--tags', '--always'], stdout=subprocess.PIPE).communicate()[0][:-1] - logger.info(' Merlin version: {}'.format(git_describe)) - git_branch = subprocess.Popen( - ['git', 'rev-parse', '--abbrev-ref', 'HEAD'], stdout=subprocess.PIPE).communicate()[0][:-1] - logger.info(' branch: {}'.format(git_branch)) - git_diff = subprocess.Popen( - ['git', 'diff', '--name-status'], stdout=subprocess.PIPE).communicate()[0] - if sys.version_info.major >= 3: - git_diff = git_diff.decode('utf-8') - git_diff = git_diff.replace('\t', ' ').split('\n') - logger.info(' diff to Merlin version:') - for filediff in git_diff: - if len(filediff) > 0: - logger.info(' '+filediff) - logger.info(' (all diffs logged in ' + - os.path.basename(cfg.log_file)+'.gitdiff'+')') - os.system('git diff > '+cfg.log_file+'.gitdiff') - - logger.info('Execution information:') - logger.info(' HOSTNAME: '+socket.getfqdn()) - logger.info(' USER: '+os.getenv('USER')) - logger.info(' PID: '+str(os.getpid())) - PBS_JOBID = os.getenv('PBS_JOBID') - if PBS_JOBID: - logger.info(' PBS_JOBID: '+PBS_JOBID) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - - sys.exit(0) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import os -import sys -import time -import tensorflow as tf -from tensorflow_lib import configuration -from tensorflow_lib import data_utils -from tensorflow_lib.train import TrainTensorflowModels, Train_Encoder_Decoder_Models - - -class TensorflowClass(object): - - def __init__(self, cfg): - - ################################################### - ########## User configurable variables ############ - ################################################### - - inp_feat_dir = cfg.inp_feat_dir - out_feat_dir = cfg.out_feat_dir - pred_feat_dir = cfg.pred_feat_dir - - inp_file_ext = cfg.inp_file_ext - out_file_ext = cfg.out_file_ext - - ### Input-Output ### - - self.inp_dim = cfg.inp_dim - self.out_dim = cfg.out_dim - - self.inp_norm = cfg.inp_norm - self.out_norm = cfg.out_norm - - self.inp_stats_file = cfg.inp_stats_file - self.out_stats_file = cfg.out_stats_file - - self.inp_scaler = None - self.out_scaler = None - - #### define model params #### - - self.hidden_layer_type = cfg.hidden_layer_type - self.hidden_layer_size = cfg.hidden_layer_size - - self.sequential_training = cfg.sequential_training - self.encoder_decoder = cfg.encoder_decoder - - self.attention = cfg.attention - self.cbhg = cfg.cbhg - self.batch_size = cfg.batch_size - self.shuffle_data = cfg.shuffle_data - - self.output_layer_type = cfg.output_layer_type - self.loss_function = cfg.loss_function - self.optimizer = cfg.optimizer - - self.rnn_params = cfg.rnn_params - self.dropout_rate = cfg.dropout_rate - self.num_of_epochs = cfg.num_of_epochs - - ### Define the work directory### - self.model_dir = cfg.model_dir - - ### define train, valid, test ### - - train_file_number = cfg.train_file_number - valid_file_number = cfg.valid_file_number - test_file_number = cfg.test_file_number - - file_id_scp = cfg.file_id_scp - test_id_scp = cfg.test_id_scp - - #### main processess #### - - self.NORMDATA = cfg.NORMDATA - self.TRAINMODEL = cfg.TRAINMODEL - self.TESTMODEL = cfg.TESTMODEL - - #### Generate only test list #### - self.GenTestList = cfg.GenTestList - - ################################################### - ####### End of user-defined conf variables ######## - ################################################### - - #### Create train, valid and test file lists #### - file_id_list = data_utils.read_file_list(file_id_scp) - - train_id_list = file_id_list[0: train_file_number] - valid_id_list = file_id_list[train_file_number: - train_file_number + valid_file_number] - test_id_list = file_id_list[train_file_number + - valid_file_number: train_file_number + valid_file_number + test_file_number] - - valid_test_id_list = file_id_list[train_file_number: train_file_number + - valid_file_number + test_file_number] - - self.inp_train_file_list = data_utils.prepare_file_path_list( - train_id_list, inp_feat_dir, inp_file_ext) - self.out_train_file_list = data_utils.prepare_file_path_list( - train_id_list, out_feat_dir, out_file_ext) - - self.inp_valid_file_list = data_utils.prepare_file_path_list( - valid_id_list, inp_feat_dir, inp_file_ext) - self.out_valid_file_list = data_utils.prepare_file_path_list( - valid_id_list, out_feat_dir, out_file_ext) - - self.inp_test_file_list = data_utils.prepare_file_path_list( - valid_test_id_list, inp_feat_dir, inp_file_ext) - self.out_test_file_list = data_utils.prepare_file_path_list( - valid_test_id_list, out_feat_dir, out_file_ext) - - self.gen_test_file_list = data_utils.prepare_file_path_list( - valid_test_id_list, pred_feat_dir, out_file_ext) - - if self.GenTestList: - test_id_list = data_utils.read_file_list(test_id_scp) - self.inp_test_file_list = data_utils.prepare_file_path_list( - test_id_list, inp_feat_dir, inp_file_ext) - self.gen_test_file_list = data_utils.prepare_file_path_list( - test_id_list, pred_feat_dir, out_file_ext) - - if not self.encoder_decoder: - self.tensorflow_models = TrainTensorflowModels(self.inp_dim, self.hidden_layer_size, self.out_dim, self.hidden_layer_type, self.model_dir, - output_type=self.output_layer_type, dropout_rate=self.dropout_rate, - loss_function=self.loss_function, optimizer=self.optimizer) - else: - self.encoder_decoder_models = Train_Encoder_Decoder_Models(self.inp_dim, self.hidden_layer_size, self.out_dim, self.hidden_layer_type, output_type=self.output_layer_type, - dropout_rate=self.dropout_rate, loss_function=self.loss_function, optimizer=self.optimizer, - attention=self.attention, cbhg=self.cbhg) - - def normlize_data(self): - ### normalize train data ### - if os.path.isfile(self.inp_stats_file) and os.path.isfile(self.out_stats_file): - self.inp_scaler = data_utils.load_norm_stats( - self.inp_stats_file, self.inp_dim, method=self.inp_norm) - self.out_scaler = data_utils.load_norm_stats( - self.out_stats_file, self.out_dim, method=self.out_norm) - else: - print('preparing train_x, train_y from input and output feature files...') - train_x, train_y, train_flen = data_utils.read_data_from_file_list(self.inp_train_file_list, self.out_train_file_list, - self.inp_dim, self.out_dim, sequential_training=True if self.sequential_training or self.encoder_decoder else False) - - print('computing norm stats for train_x...') - inp_scaler = data_utils.compute_norm_stats( - train_x, self.inp_stats_file, method=self.inp_norm) - - print('computing norm stats for train_y...') - out_scaler = data_utils.compute_norm_stats( - train_y, self.out_stats_file, method=self.out_norm) - - def train_tensorflow_model(self): - print('preparing train_x, train_y from input and output feature files...') - #### load the data #### - - train_x, train_y, train_flen = data_utils.read_data_from_file_list(self.inp_train_file_list, self.out_train_file_list, - self.inp_dim, self.out_dim, sequential_training=True if self.sequential_training or self.encoder_decoder else False) - #### normalize the data #### - data_utils.norm_data(train_x, self.inp_scaler, - sequential_training=True if self.sequential_training or self.encoder_decoder else False) - data_utils.norm_data(train_y, self.out_scaler, - sequential_training=True if self.sequential_training or self.encoder_decoder else False) - - #### define the model #### - if self.sequential_training: - utt_length = train_flen["utt2framenum"].values() - self.tensorflow_models.get_max_step(max(utt_length)) - self.tensorflow_models.define_sequence_model() - - elif self.encoder_decoder: - utt_length = train_flen["utt2framenum"].values() - super(Train_Encoder_Decoder_Models, self.encoder_decoder_models).__setattr__( - "max_step", max(utt_length)) - self.encoder_decoder_models.define_encoder_decoder() - else: - self.tensorflow_models.define_feedforward_model() - - #### train the model #### - print('training...') - if self.sequential_training: - ### Train feedforward model ### - self.tensorflow_models.train_sequence_model( - train_x, train_y, batch_size=self.batch_size, num_of_epochs=self.num_of_epochs, shuffle_data=self.shuffle_data, utt_length=utt_length) - - elif self.encoder_decoder: - self.encoder_decoder_models.train_encoder_decoder_model( - train_x, train_y, batch_size=self.batch_size, num_of_epochs=self.num_of_epochs, shuffle_data=True, utt_length=utt_length) - else: - self.tensorflow_models.train_feedforward_model( - train_x, train_y, batch_size=self.batch_size, num_of_epochs=self.num_of_epochs, shuffle_data=self.shuffle_data) - - def test_tensorflow_model(self): - #### load the data #### - print('preparing test_x from input feature files...') - test_x, test_flen = data_utils.read_test_data_from_file_list( - self.inp_test_file_list, self.inp_dim) - - #### normalize the data #### - data_utils.norm_data(test_x, self.inp_scaler) - #### compute predictions #### - if self.encoder_decoder: - self.encoder_decoder_models.predict( - test_x, self.out_scaler, self.gen_test_file_list) - else: - self.tensorflow_models.predict( - test_x, self.out_scaler, self.gen_test_file_list, self.sequential_training) - - def main_function(self): - ### Implement each module ### - if self.NORMDATA: - self.normlize_data() - - if self.TRAINMODEL: - self.train_tensorflow_model() - - if self.TESTMODEL: - self.test_tensorflow_model() - - -if __name__ == "__main__": - - if len(sys.argv) != 2: - print( - 'usage: python run_tensorflow_with_merlin_io.py [config file name]') - sys.exit(1) - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.configuration() - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - print("--- Job started ---") - start_time = time.time() - - # main function - tensorflow_instance = TensorflowClass(cfg) - # except: - # print "inp stats file is %s"%cfg.inp_stats_file - # sys.exit(0) - tensorflow_instance.main_function() - - (m, s) = divmod(int(time.time() - start_time), 60) - print("--- Job completion time: %d min. %d sec ---" % (m, s)) - - sys.exit(0) -#! /usr/bin/python2 -u -# -*- coding: utf-8 -*- -# -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Script to validate Merlin setup. -""" - -__author__ = 'pasindu@google.com (Pasindu De Silva)' - -import logging -import logging.config -import os -import sys -import configuration -from utils.utils import read_file_list - -logger = logging.getLogger('validation') - - -class Validation(object): - """Runs Merlin validations - """ - _is_valid = True - - def __init__(self, cfg): - self.cfg = cfg - - def is_valid(self): - """Returns whether the given configuration file is valid.""" - - self.validate_label_settings() - self.validate_acoustic_files() - return self._is_valid - - def validate_label_settings(self): - if self.cfg.label_style != 'HTS': - self._is_valid = False - logging.error( - 'Only HTS-style labels are now supported as input to Merlin') - - def validate_acoustic_files(self): - """Validates that acoustic features exists in given path. - - Args: - cfg: Merlin configuration. - """ - file_types_to_check = [ - { - 'name': 'mgc', - 'dir': self.cfg.in_mgc_dir, - 'ext': self.cfg.mgc_ext - }, - { - 'name': 'bap', - 'dir': self.cfg.in_bap_dir, - 'ext': self.cfg.bap_ext - }, - { - 'name': 'lf0', - 'dir': self.cfg.in_lf0_dir, - 'ext': self.cfg.lf0_ext - }, - { - 'name': 'label_align', - 'dir': self.cfg.in_label_align_dir, - 'ext': self.cfg.lab_ext - }, - ] - - file_ids = read_file_list(self.cfg.file_id_scp) - actual_total = len(file_ids) - - expected_total = self.cfg.train_file_number + \ - self.cfg.valid_file_number + self.cfg.test_file_number - - if expected_total > actual_total: - logger.error('Expected %d files but found %d files', expected_total, - actual_total) - - for file_id in file_ids: - for path_info in file_types_to_check: - path = '%s/%s%s' % (path_info['dir'], - file_id, path_info['ext']) - if not os.path.exists(path): - self._is_valid = False - logger.error('File id %s missing feature %s at %s', file_id, - path_info['name'], path) - - -def main(args): - if len(args) <= 1: - sys.stderr.write( - 'Usage - python src/validation path_to_conf1 path_to_conf2 ...\n') - exit(1) - - for config_file in args[1:]: - - logging.info('Validating %s configuration.', config_file) - - cfg = configuration.cfg - cfg.configure(config_file) - validation = Validation(cfg) - - if validation.is_valid(): - logging.info( - 'Configuration file %s passed validation checks.', config_file) - else: - logging.error( - 'Configuration file %s contains errors.', config_file) - - -if __name__ == '__main__': - main(sys.argv) -# Test the classes used in Merlin pipeline -# TODO run some very simple training on random data) - -import logging -import cPickle -import numpy as np -import errno -import sys -import os -sys.path.append('../src') - - -def makedir(path): - try: - os.makedirs(path) - except OSError as exception: - if exception.errno != errno.EEXIST: - raise - - -def build_model(hidden_layer_type): - logger.info(' DeepRecurrentNetwork '+str(hidden_layer_type)) - nnmodel = DeepRecurrentNetwork(8, 16*np.ones(len(hidden_layer_type)), - 4, L1_reg=0.0, L2_reg=0.00001, hidden_layer_type=hidden_layer_type) - - # Always try to save it and reload it - modelfile = 'log/model.pkl' - makedir('log') - cPickle.dump(nnmodel, open(modelfile, 'wb')) - nnmodel = cPickle.load(open(modelfile, 'rb')) - - logger.info(' OK') - - return nnmodel - - -if __name__ == '__main__': - - # Get a logger for these tests - logging.basicConfig( - format='%(asctime)s %(levelname)8s%(name)15s: %(message)s') - logger = logging.getLogger("test") - logger.setLevel(logging.DEBUG) - - logger.info('Testing Merlin classes') - - # Build various models - logger.info('Build models without training') - from models.deep_rnn import DeepRecurrentNetwork - nnmodel = build_model(['TANH']) - del nnmodel - nnmodel = build_model(['TANH', 'TANH']) - del nnmodel - nnmodel = build_model(['LSTM', 'LSTM']) - del nnmodel - nnmodel = build_model(['SLSTM', 'SLSTM']) - del nnmodel -import sys -import argparse - -import numpy as np - - -def similar_reals(ref, test, tol, colnames=None): - ''' - Compare vector test against vector ref with a tolerance of tol (common scalar or vector) - ''' - - ref = np.array(ref) - test = np.array(test) - tol = np.atleast_1d(tol) - - if len(ref) != len(test): - raise ValueError('Cannot compare arrays of different size') - - if len(tol) == 1: - tol = tol*np.ones(len(ref)) - - row_format = "{:>10}" * len(ref) - - if colnames: - print(' '+row_format.format(*colnames)) - print('Reference: '+row_format.format(*ref)) - print('Test: '+row_format.format(*test)) - print('Diff: '+row_format.format(*(ref-test))) - print('Tolerance: '+row_format.format(*tol)) - if any(abs(ref-test) > tol): - print('FAILED') - return False - - return True - - -if __name__ == '__main__': - argpar = argparse.ArgumentParser() - argpar.add_argument("--ref", nargs='+', type=float, - default=None, help="Reference values.") - argpar.add_argument("--test", nargs='+', type=float, - help="Values to test against the references.") - argpar.add_argument("--tol", nargs='+', type=float, default=0.1, - help="Accepted tolerance (if a single value is provided, it is used for all compared pairs") - argpar.add_argument("--colnames", nargs='+', - default=None, help="Names for each column") - args = argpar.parse_args() - - if not similar_reals(args.ref, args.test, args.tol, args.colnames): - sys.exit(1) -# -*- coding: utf-8 -*- -# -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Tests FilePaths class. -""" - -__author__ = 'pasindu@google.com (Pasindu De Silva)' - -from utils.file_paths import FilePaths -import configuration -import logging.config # pylint: disable=unused-import -import sys -# pylint: disable=g-import-not-at-top -sys.path.append('../src') - - -def test_file_paths(cfg): - """Tests FilePaths constructor. - - Args: - cfg: Merlin configuration - """ - cfg.GenTestList = True - file_paths = FilePaths(cfg) - - assert len( - file_paths.file_id_list) == 100, 'Number of files in file list incorrect' - assert len( - file_paths.test_id_list) == 100, 'Number of test in file list incorrect' - - -def test_nn_out_in_data_sets(cfg): - """Tests Train, Valid and Test filelists. - - Args: - cfg: Merlin configuration - """ - file_paths = FilePaths(cfg) - file_paths.set_label_dir(0, 'ext', 0) - file_paths.set_label_file_list() - - train_x_file_list, train_y_file_list = file_paths.get_train_list_x_y() - valid_x_file_list, valid_y_file_list = file_paths.get_valid_list_x_y() - test_x_file_list, test_y_file_list = file_paths.get_test_list_x_y() - - assert len(train_x_file_list - ) == cfg.train_file_number, 'train set x axis dimension incorrect' - assert len(valid_x_file_list - ) == cfg.valid_file_number, 'valid set x axis dimension incorrect' - assert len(test_x_file_list - ) == cfg.test_file_number, 'test set x axis dimension incorrect' - - assert len(train_y_file_list - ) == cfg.train_file_number, 'train set y axis dimension incorrect' - assert len(valid_y_file_list - ) == cfg.valid_file_number, 'valid set y axis dimension incorrect' - assert len(test_y_file_list - ) == cfg.test_file_number, 'test set y axis dimension incorrect' - - -def test_label_file_lists(cfg): - """Tests label filelists. - - Args: - cfg: Merlin configuration - """ - file_paths = FilePaths(cfg) - file_paths.set_label_dir(0, 'ext', 0) - file_paths.set_label_file_list() - - # Case 1: GenTestList = False and test_synth_dir = None - assert file_paths.in_label_align_file_list[ - 0] == '/tmp/label_state_align/file1.lab' - assert file_paths.binary_label_file_list[ - 0] == '/tmp/inter_module/binary_label_0/file1.lab' - assert file_paths.nn_label_file_list[ - 0] == '/tmp/inter_module/nn_no_silence_lab_ext/file1.lab' - assert file_paths.nn_label_norm_file_list[ - 0] == '/tmp/inter_module/nn_no_silence_lab_norm_ext/file1.lab' - - # Case 2: GenTestList = True and test_synth_dir = None - cfg.GenTestList = True - file_paths = FilePaths(cfg) - file_paths.set_label_dir(0, 'ext', 0) - file_paths.set_label_file_list() - assert file_paths.in_label_align_file_list[ - 0] == '/tmp/label_state_align/test1.lab' - assert file_paths.binary_label_file_list[ - 0] == '/tmp/inter_module/binary_label_0/test1.lab' - assert file_paths.nn_label_file_list[ - 0] == '/tmp/inter_module/nn_no_silence_lab_ext/test1.lab' - assert file_paths.nn_label_norm_file_list[ - 0] == '/tmp/inter_module/nn_no_silence_lab_norm_ext/test1.lab' - - # Case 3: GenTestList = True and test_synth_dir = test_synth - cfg.GenTestList = True - cfg.test_synth_dir = 'test_synth' - file_paths = FilePaths(cfg) - file_paths.set_label_dir(0, 'ext', 0) - file_paths.set_label_file_list() - assert file_paths.in_label_align_file_list[ - 0] == '/tmp/label_state_align/test1.lab' - assert file_paths.binary_label_file_list[0] == 'test_synth/test1.lab' - assert file_paths.nn_label_file_list[0] == 'test_synth/test1.lab' - assert file_paths.nn_label_norm_file_list[0] == 'test_synth/test1.lab' - - -def _get_config_file(): - cfg = configuration.cfg - cfg.configure('test_data/test.conf') - return cfg - - -def main(): - test_file_paths(_get_config_file()) - test_nn_out_in_data_sets(_get_config_file()) - test_label_file_lists(_get_config_file()) - - -if __name__ == '__main__': - main() -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -from . import configuration - -# instantiate one object of this class -cfg = configuration.configuration() -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import datetime -import textwrap -import io -import logging -import os -import math -import sys -if sys.version_info.major >= 3: - import configparser -else: - import ConfigParser as configparser - - -class configuration(object): - - """Configuration settings. Any user-specific values are read from an external file - and parsed by an instance of the built-in ConfigParser class""" - - def __init__(self): - # doesn't do anything - pass - - def configure(self, configFile=None, use_logging=True): - - # get a logger - logger = logging.getLogger("configuration") - # this (and only this) logger needs to be configured immediately, otherwise it won't work - # we can't use the full user-supplied configuration mechanism in this particular case, - # because we haven't loaded it yet! - # - # so, just use simple console-only logging - # this level is hardwired here - should change it to INFO - logger.setLevel(logging.INFO) - # add a handler & its formatter - will write only to console - ch = logging.StreamHandler() - logger.addHandler(ch) - formatter = logging.Formatter( - '%(asctime)s %(levelname)8s%(name)15s: %(message)s') - ch.setFormatter(formatter) - - # first, set up some default configuration values - self.initial_configuration() - - # next, load in any user-supplied configuration values - # that might over-ride the default values - self.user_configuration(configFile) - - # now that we have loaded the user's configuration, we can load the - # separate config file for logging (the name of that file will be specified in the config file) - if use_logging: - self.logging_configuration() - - # finally, set up all remaining configuration values - # that depend upon either default or user-supplied values - self.complete_configuration() - - logger.debug('configuration completed') - - def initial_configuration(self): - - # to be called before loading any user specific values - - # things to put here are - # 1. variables that the user cannot change - # 2. variables that need to be set before loading the user's config file - - UTTID_REGEX = '(.*)\..*' - - def user_configuration(self, configFile=None): - - # get a logger - logger = logging.getLogger("configuration") - - # load and parse the provided configFile, if provided - if not configFile: - logger.warn( - 'no user configuration file provided; using only built-in default settings') - return - - # load the config file - try: - cfgparser = configparser.ConfigParser() - cfgparser.readfp(open(configFile)) - logger.debug( - 'successfully read and parsed user configuration file %s' % configFile) - except: - logger.fatal('error reading user configuration file %s' % - configFile) - raise - - # work_dir must be provided before initialising other directories - self.work_dir = None - - if self.work_dir == None: - try: - self.work_dir = cfgparser.get('Paths', 'work') - - except (configparser.NoSectionError, configparser.NoOptionError): - if self.work_dir == None: - logger.critical('Paths:work has no value!') - raise Exception - - # look for those items that are user-configurable, and get their values - # sptk_bindir= .... - - # default place for some data - self.data_dir = os.path.join(self.work_dir, 'data') - self.inter_data_dir = os.path.join(self.work_dir, 'inter_module') - - self.gen_dir = os.path.join(self.work_dir, 'gen') - self.model_dir = os.path.join(self.work_dir, 'nnets_model') - self.stats_dir = os.path.join(self.work_dir, 'stats') - - self.def_inp_dir = os.path.join( - self.inter_data_dir, 'nn_no_silence_lab_norm_425') - self.def_out_dir = os.path.join( - self.inter_data_dir, 'nn_norm_mgc_lf0_vuv_bap_187') - - # a list instead of a dict because OrderedDict is not available until 2.7 - # and I don't want to import theano here just for that one class - # each entry is a tuple of (variable name, default value, section in config file, option name in config file) - # - # the type of the default value is important and controls the type that the corresponding - # variable will have - # - # to set a default value of 'undefined' use an empty string - # or the special value 'impossible', as appropriate - # - impossible_int = int(-99999) - impossible_float = float(-99999.0) - - user_options = [ - - ('work_dir', self.work_dir, 'Paths', 'work'), - ('data_dir', self.data_dir, 'Paths', 'data'), - ('inter_data_dir', self.inter_data_dir, 'Paths', 'inter_data'), - ('plot_dir', '', 'Paths', 'plot'), - - ('inp_feat_dir', self.def_inp_dir, 'Paths', 'inp_feat'), - ('out_feat_dir', self.def_out_dir, 'Paths', 'out_feat'), - - ('model_dir', self.model_dir, 'Paths', 'models'), - ('stats_dir', self.stats_dir, 'Paths', 'stats'), - ('gen_dir', self.gen_dir, 'Paths', 'gen'), - ('pred_feat_dir', self.gen_dir, 'Paths', 'pred_feat'), - - ('plot', False, 'Utility', 'plot'), - ('profile', False, 'Utility', 'profile'), - - ('file_id_scp', os.path.join(self.work_dir, - 'data/file_id_list.scp'), 'Paths', 'file_id_list'), - ('test_id_scp', os.path.join(self.work_dir, - 'data/test_id_list.scp'), 'Paths', 'test_id_list'), - - ('GV_dir', os.path.join(self.work_dir, 'data/GV'), 'Paths', 'GV_dir'), - - ('in_stepw_dir', os.path.join(self.work_dir, - 'data/stepw'), 'Paths', 'in_stepw_dir'), - ('in_mgc_dir', os.path.join(self.work_dir, - 'data/mgc'), 'Paths', 'in_mgc_dir'), - ('in_lf0_dir', os.path.join(self.work_dir, - 'data/lf0'), 'Paths', 'in_lf0_dir'), - ('in_bap_dir', os.path.join(self.work_dir, - 'data/bap'), 'Paths', 'in_bap_dir'), - ('in_sp_dir', os.path.join(self.work_dir, 'data/sp'), 'Paths', 'in_sp_dir'), - ('in_seglf0_dir', os.path.join(self.work_dir, - 'data/lf03'), 'Paths', 'in_seglf0_dir'), - - # for glottHMM: - ('in_F0_dir', os.path.join(self.work_dir, 'data/F0'), 'Paths', 'in_F0_dir'), - ('in_Gain_dir', os.path.join(self.work_dir, - 'data/Gain'), 'Paths', 'in_Gain_dir'), - ('in_HNR_dir', os.path.join(self.work_dir, - 'data/HNR'), 'Paths', 'in_HNR_dir'), - ('in_LSF_dir', os.path.join(self.work_dir, - 'data/LSF'), 'Paths', 'in_LSF_dir'), - ('in_LSFsource_dir', os.path.join(self.work_dir, - 'data/LSFsource'), 'Paths', 'in_LSFsource_dir'), - - # for glottDNN: - ('in_f0_dir', os.path.join(self.work_dir, 'data/f0'), 'Paths', 'in_f0_dir'), - ('in_gain_dir', os.path.join(self.work_dir, - 'data/gain'), 'Paths', 'in_gain_dir'), - ('in_hnr_dir', os.path.join(self.work_dir, - 'data/hnr'), 'Paths', 'in_hnr_dir'), - ('in_lsf_dir', os.path.join(self.work_dir, - 'data/lsf'), 'Paths', 'in_lsf_dir'), - ('in_slsf_dir', os.path.join(self.work_dir, - 'data/slsf'), 'Paths', 'in_slsf_dir'), - - # for sinusoidal: - ('in_pdd_dir', os.path.join(self.work_dir, - 'data/pdd'), 'Paths', 'in_pdd_dir'), - - # For MagPhase Vocoder: - ('in_acous_feats_dir', os.path.join(self.work_dir, - 'data/in_acoustic_feats'), 'Paths', 'in_acous_feats_dir'), - # Containg natural speech waveforms (for acous feat extraction). - ('nat_wav_dir', os.path.join(self.work_dir, - 'data/nat_wavs'), 'Paths', 'nat_wav_dir'), - - # Input-Output - ('inp_dim', 425, 'Input-Output', 'inp_dim'), - ('out_dim', 187, 'Input-Output', 'out_dim'), - - ('inp_file_ext', '.lab', 'Input-Output', 'inp_file_ext'), - ('out_file_ext', '.cmp', 'Input-Output', 'out_file_ext'), - - ('inp_norm', 'MINMAX', 'Input-Output', 'inp_norm'), - ('out_norm', 'MINMAX', 'Input-Output', 'out_norm'), - - # for joint duration - ('in_seq_dur_dir', os.path.join(self.work_dir, - 'data/S2S_dur'), 'Paths', 'in_seq_dur_dir'), - ('in_dur_dir', os.path.join(self.work_dir, - 'data/dur'), 'Paths', 'in_dur_dir'), - - - ('nn_norm_temp_dir', os.path.join(self.work_dir, - 'data/step_hidden9'), 'Paths', 'nn_norm_temp_dir'), - - ('process_labels_in_work_dir', False, - 'Labels', 'process_labels_in_work_dir'), - - - - ('label_style', 'HTS', 'Labels', 'label_style'), - ('label_type', 'state_align', 'Labels', 'label_type'), - ('in_label_align_dir', os.path.join(self.work_dir, - 'data/label_state_align'), 'Labels', 'label_align'), - ('question_file_name', os.path.join(self.work_dir, - 'data/questions.hed'), 'Labels', 'question_file_name'), - ('linguistic_file_name', os.path.join(self.work_dir, - 'data/hed_feats.txt'), 'Labels', 'linguistic_file_name'), - ('silence_pattern', ['*-#+*'], 'Labels', 'silence_pattern'), - ('subphone_feats', 'full', 'Labels', 'subphone_feats'), - ('additional_features', {}, 'Labels', 'additional_features'), - - # For MagPhase Vocoder: - #('label_align_orig_const_rate_dir', os.path.join(self.work_dir, 'data/label_state_align'), 'Labels', 'label_align_orig_const_rate'), - - ('xpath_file_name', os.path.join(self.work_dir, - 'data/xml_labels/xpaths.txt'), 'Labels', 'xpath_file_name'), - - ('label_config_file', 'configuration/examplelabelconfigfile.py', - 'Labels', 'label_config'), - ('add_frame_features', True, - 'Labels', 'add_frame_features'), - ('fill_missing_values', False, - 'Labels', 'fill_missing_values'), - ('xpath_label_align_dir', os.path.join(self.work_dir, - 'data/label_state_align'), 'Labels', 'xpath_label_align'), - - ('enforce_silence', False, 'Labels', 'enforce_silence'), - ('remove_silence_using_binary_labels', False, - 'Labels', 'remove_silence_using_binary_labels'), - ('remove_silence_using_hts_labels', True, - 'Labels', 'remove_silence_using_hts_labels'), - - ('precompile_xpaths', True, 'Labels', 'precompile_xpaths'), - ('iterate_over_frames', True, 'Labels', 'iterate_over_frames'), - - ('appended_input_dim', 0, 'Labels', 'appended_input_dim'), - - ('buffer_size', 200000, 'Data', 'buffer_size'), - - ('train_file_number', impossible_int, 'Data', 'train_file_number'), - ('valid_file_number', impossible_int, 'Data', 'valid_file_number'), - ('test_file_number', impossible_int, 'Data', 'test_file_number'), - - ('log_path', os.path.join(self.work_dir, 'log'), 'Paths', 'log_path'), - ('log_file', '', 'Paths', 'log_file'), - ('log_config_file', 'configuration/exampleloggingconfigfile.conf', - 'Paths', 'log_config_file'), - - ('sptk_bindir', 'tools/bin/SPTK-3.9', 'Paths', 'sptk'), - ('straight_bindir', 'tools/bin/straight', 'Paths', 'straight'), - ('world_bindir', 'tools/bin/WORLD', 'Paths', 'world'), - ('glotthmm_bindir', 'tools/bin/glotthmm', 'Paths', 'glotthmm'), - ('glottdnn_bindir', 'tools/bin/glottdnn', 'Paths', 'glottdnn'), - ('hmpd_bindir', 'tools/bin/hmpd', 'Paths', 'hmpd'), - ('magphase_bindir', 'tools/bin/magphase/src', 'Paths', 'magphase'), - - ('network_type', 'RNN', 'Architecture', 'network_type'), - ('model_type', 'DNN', 'Architecture', 'model_type'), - ('hidden_layer_type', ['TANH', 'TANH', 'TANH', 'TANH', - 'TANH', 'TANH'], 'Architecture', 'hidden_layer_type'), - ('output_layer_type', 'LINEAR', 'Architecture', 'output_layer_type'), - ('sequential_training', False, 'Architecture', 'sequential_training'), - ('rnn_batch_training', False, 'Architecture', 'rnn_batch_training'), - ('dropout_rate', 0.0, 'Architecture', 'dropout_rate'), - ('switch_to_keras', False, 'Architecture', 'switch_to_keras'), - ('switch_to_tensorflow', False, 'Architecture', 'switch_to_tensorflow'), - - # some config variables for token projection DNN - ('scheme', 'stagewise', 'Architecture', 'scheme'), - ('index_to_project', 0, 'Architecture', 'index_to_project'), - ('projection_insize', 10000, 'Architecture', 'projection_insize'), - ('projection_outsize', 10, 'Architecture', 'projection_outsize'), - ('initial_projection_distrib', 'gaussian', - 'Architecture', 'initial_projection_distrib'), - ('projection_weights_output_dir', 'some_path', - 'Architecture', 'projection_weights_output_dir'), - ('layers_with_projection_input', [ - 0], 'Architecture', 'layers_with_projection_input'), - ('projection_learning_rate_scaling', 1.0, - 'Architecture', 'projection_learning_rate_scaling'), - - ('num_of_epochs', 1, 'Architecture', 'training_epochs'), - - ('optimizer', 'sgd', 'Architecture', 'optimizer'), - ('loss_function', 'mse', 'Architecture', 'loss_function'), - - # RNN - ('model_file_name', 'feed_forward_6_tanh', - 'Architecture', 'model_file_name'), - ('stateful', False, 'Architecture', 'stateful'), - ('use_high_batch_size', False, 'Architecture', 'use_high_batch_size'), - - ('training_algo', 1, 'Architecture', 'training_algo'), - ('merge_size', 1, 'Architecture', 'merge_size'), - ('seq_length', 200, 'Architecture', 'seq_length'), - ('bucket_range', 100, 'Architecture', 'bucket_range'), - - ('encoder_decoder', False, 'Architecture', 'encoder_decoder'), - ('attention', False, 'Architecture', 'attention'), - ("cbhg", False, "Architecture", "cbhg"), - - # Data - ('shuffle_data', True, 'Data', 'shuffle_data'), - - # Keras Processes - ('NORMDATA', False, 'Processes', 'NORMDATA'), - ('TRAINMODEL', False, 'Processes', 'TRAINMODEL'), - ('TESTMODEL', False, 'Processes', 'TESTMODEL'), - - - ('learning_rate', 0.0002, 'Architecture', 'learning_rate'), - ('lr_decay', -1, 'Architecture', 'lr_decay'), - ('l2_reg', 0.00001, 'Architecture', 'L2_regularization'), - ('l1_reg', 0.0, 'Architecture', 'L1_regularization'), - ('batch_size', 16, 'Architecture', 'batch_size'), - ('training_epochs', 25, 'Architecture', 'training_epochs'), - ('hidden_activation', 'tanh', 'Architecture', 'hidden_activation'), - ('output_activation', 'linear', 'Architecture', 'output_activation'), - ('hidden_layer_size', [1024, 1024, 1024, 1024, - 1024, 1024], 'Architecture', 'hidden_layer_size'), - ('private_hidden_sizes', [1024], - 'Architecture', 'private_hidden_sizes'), - ('stream_weights', [1.0], 'Architecture', 'stream_weights'), - ('private_l2_reg', 0.00001, 'Architecture', 'private_l2_reg'), - ('warmup_epoch', 5, 'Architecture', 'warmup_epoch'), - - ('warmup_momentum', 0.3, 'Architecture', 'warmup_momentum'), - ('momentum', 0.9, 'Architecture', 'momentum'), - ('warmup_epoch', 5, 'Architecture', 'warmup_epoch'), - ('mdn_component', 1, 'Architecture', 'mdn_component'), - ('var_floor', 0.01, 'Architecture', 'var_floor'), - ('beta_opt', False, 'Architecture', 'beta_opt'), - ('eff_sample_size', 0.8, 'Architecture', 'eff_sample_size'), - ('mean_log_det', -100.0, 'Architecture', 'mean_log_det'), - ('start_from_trained_model', '_', - 'Architecture', 'start_from_trained_model'), - ('use_rprop', 0, 'Architecture', 'use_rprop'), - ('use_lhuc', False, 'Architecture', 'use_lhuc'), - ('freeze_layers', 0, 'Architecture', 'freeze_layers'), - - ('mgc_dim', 60, 'Outputs', 'mgc'), - ('dmgc_dim', 60 * 3, 'Outputs', 'dmgc'), - ('vuv_dim', 1, 'Outputs', 'vuv'), - ('lf0_dim', 1, 'Outputs', 'lf0'), - ('dlf0_dim', 1 * 3, 'Outputs', 'dlf0'), - ('bap_dim', 25, 'Outputs', 'bap'), - ('dbap_dim', 25 * 3, 'Outputs', 'dbap'), - ('cmp_dim', (60 * 3) + 1 + (1 * 3) + (25 * 3), 'Outputs', 'cmp'), - ('stepw_dim', 55, 'Outputs', 'stepw_dim'), - ('temp_sp_dim', 1025, 'Outputs', 'temp_sp_dim'), - ('seglf0_dim', 7, 'Outputs', 'seglf0_dim'), - ('delta_win', [-0.5, 0.0, 0.5], 'Outputs', 'delta_win'), - ('acc_win', [1.0, -2.0, 1.0], 'Outputs', 'acc_win'), - ('do_MLPG', True, 'Outputs', 'do_MLPG'), - - # for GlottHMM: - ('F0_dim', 1, 'Outputs', 'F0'), - ('dF0_dim', 1 * 3, 'Outputs', 'dF0'), - ('Gain_dim', 1, 'Outputs', 'Gain'), - ('dGain_dim', 1 * 3, 'Outputs', 'dGain'), - ('HNR_dim', 5, 'Outputs', 'HNR'), - ('dHNR_dim', 5 * 3, 'Outputs', 'dHNR'), - ('LSF_dim', 30, 'Outputs', 'LSF'), - ('dLSF_dim', 30 * 3, 'Outputs', 'dLSF'), - ('LSFsource_dim', 10, 'Outputs', 'LSFsource'), - ('dLSFsource_dim', 10 * 3, 'Outputs', 'dLSFsource'), - - # for GlottDNN: - ('f0_dim', 1, 'Outputs', 'f0'), - ('df0_dim', 1 * 3, 'Outputs', 'df0'), - ('gain_dim', 1, 'Outputs', 'gain'), - ('dgain_dim', 1 * 3, 'Outputs', 'dgain'), - ('hnr_dim', 5, 'Outputs', 'hnr'), - ('dhnr_dim', 5 * 3, 'Outputs', 'dhnr'), - ('lsf_dim', 30, 'Outputs', 'lsf'), - ('dlsf_dim', 30 * 3, 'Outputs', 'dlsf'), - ('slsf_dim', 10, 'Outputs', 'slsf'), - ('dslsf_dim', 10 * 3, 'Outputs', 'dslsf'), - - # for sinusoidal: - ('pdd_dim', 25, 'Outputs', 'pdd'), - ('dpdd_dim', 25 * 3, 'Outputs', 'dpdd'), - - # For MagPhase Vocoder: - ('mag_dim', 60, 'Outputs', 'mag'), - ('dmag_dim', 60 * 3, 'Outputs', 'dmag'), - ('real_dim', 45, 'Outputs', 'real'), - ('dreal_dim', 45 * 3, 'Outputs', 'dreal'), - ('imag_dim', 45, 'Outputs', 'imag'), - ('dimag_dim', 45 * 3, 'Outputs', 'dimag'), - - # for joint dur:- - ('seq_dur_dim', 1, 'Outputs', 'seq_dur'), - ('remove_silence_from_dur', True, 'Outputs', 'remove_silence_from_dur'), - ('dur_dim', 5, 'Outputs', 'dur'), - ('dur_feature_type', 'numerical', 'Outputs', 'dur_feature_type'), - ('dur_unit_size', 'phoneme', 'Outputs', 'dur_unit_size'), - ('dur_feat_size', 'phoneme', 'Outputs', 'dur_feat_size'), - - ('output_feature_normalisation', 'MVN', - 'Outputs', 'output_feature_normalisation'), - - ('multistream_switch', False, 'Streams', 'multistream_switch'), - # ('use_private_hidden' , False, 'Streams', 'use_private_hidden'), - - ('output_features', ['mgc', 'lf0', 'vuv', - 'bap'], 'Streams', 'output_features'), - ('gen_wav_features', ['mgc', 'bap', 'lf0'], - 'Streams', 'gen_wav_features'), - - ('vocoder_type', 'STRAIGHT', 'Waveform', 'vocoder_type'), - ('sr', 48000, 'Waveform', 'samplerate'), - ('fl', 4096, 'Waveform', 'framelength'), - ('shift', 1000 * 240 / 48000, 'Waveform', 'frameshift'), - ('sp_dim', (4096 / 2) + 1, 'Waveform', 'sp_dim'), - # fw_alpha: 'Bark' or 'ERB' allowing deduction of alpha, or explicity float value (e.g. 0.77) - ('fw_alpha', 0.77, 'Waveform', 'fw_alpha'), - ('pf_coef', 1.4, 'Waveform', 'postfilter_coef'), - ('co_coef', 2047, 'Waveform', 'minimum_phase_order'), - ('use_cep_ap', True, 'Waveform', 'use_cep_ap'), - ('do_post_filtering', True, 'Waveform', 'do_post_filtering'), - ('apply_GV', False, 'Waveform', 'apply_GV'), - ('test_synth_dir', 'test_synthesis/wav', 'Waveform', 'test_synth_dir'), - - # For MagPhase Vocoder: - # ('use_magphase_pf' ,True ,'Waveform' , 'use_magphase_pf'), # Use MagPhase own Post-Filter (experimemental) - ('magphase_pf_type', ['magphase', 'no', - 'merlin'], 'Waveform', 'magphase_pf_type'), - ('magphase_const_rate', False, 'Waveform', 'magphase_const_rate'), - - - ('DurationModel', False, 'Processes', 'DurationModel'), - ('AcousticModel', False, 'Processes', 'AcousticModel'), - ('VoiceConversion', False, 'Processes', 'VoiceConversion'), - ('GenTestList', False, 'Processes', 'GenTestList'), - - # Acoustic feature extraction - ('ACFTEXTR', False, 'Processes', 'ACFTEXTR'), - ('NORMLAB', False, 'Processes', 'NORMLAB'), - ('MAKEDUR', False, 'Processes', 'MAKEDUR'), - ('MAKECMP', False, 'Processes', 'MAKECMP'), - ('NORMCMP', False, 'Processes', 'NORMCMP'), - ('TRAINDNN', False, 'Processes', 'TRAINDNN'), - ('DNNGEN', False, 'Processes', 'DNNGEN'), - ('GENWAV', False, 'Processes', 'GENWAV'), - ('CALMCD', False, 'Processes', 'CALMCD'), - ('NORMSTEP', False, 'Processes', 'NORMSTEP'), - ('GENBNFEA', False, 'Processes', 'GENBNFEA'), - - ('mgc_ext', '.mgc', 'Extensions', 'mgc_ext'), - ('bap_ext', '.bap', 'Extensions', 'bap_ext'), - ('lf0_ext', '.lf0', 'Extensions', 'lf0_ext'), - ('cmp_ext', '.cmp', 'Extensions', 'cmp_ext'), - ('lab_ext', '.lab', 'Extensions', 'lab_ext'), - ('utt_ext', '.utt', 'Extensions', 'utt_ext'), - ('stepw_ext', '.stepw', 'Extensions', 'stepw_ext'), - ('sp_ext', '.sp', 'Extensions', 'sp_ext'), - - - # GlottHMM - ('F0_ext', '.F0', 'Extensions', 'F0_ext'), - ('Gain_ext', '.Gain', 'Extensions', 'Gain_ext'), - ('HNR_ext', '.HNR', 'Extensions', 'HNR_ext'), - ('LSF_ext', '.LSF', 'Extensions', 'LSF_ext'), - ('LSFsource_ext', '.LSFsource', 'Extensions', 'LSFsource_ext'), - - # GlottDNN - ('f0_ext', '.f0', 'Extensions', 'f0_ext'), - ('gain_ext', '.gain', 'Extensions', 'gain_ext'), - ('hnr_ext', '.hnr', 'Extensions', 'hnr_ext'), - ('lsf_ext', '.lsf', 'Extensions', 'lsf_ext'), - ('slsf_ext', '.slsf', 'Extensions', 'slsf_ext'), - - # sinusoidal - ('pdd_ext', '.pdd', 'Extensions', 'pdd_ext'), - - # For MagPhase Vocoder: - ('mag_ext', '.mag', 'Extensions', 'mag_ext'), - ('real_ext', '.real', 'Extensions', 'real_ext'), - ('imag_ext', '.imag', 'Extensions', 'imag_ext'), - - # joint dur - ('dur_ext', '.dur', 'Extensions', 'dur_ext'), - - ] - - # this uses exec(...) which is potentially dangerous since arbitrary code could be executed - for (variable, default, section, option) in user_options: - value = None - - try: - # first, look for a user-set value for this variable in the config file - value = cfgparser.get(section, option) - user_or_default = 'user' - - except (configparser.NoSectionError, configparser.NoOptionError): - # use default value, if there is one - if (default == None) or \ - (default == '') or \ - ((type(default) == int) and (default == impossible_int)) or \ - ((type(default) == float) and (default == impossible_float)): - logger.critical('%20s has no value!' % - (section+":"+option)) - raise Exception - else: - value = default - user_or_default = 'default' - - if type(default) == str: - exec('self.%s = "%s"' % (variable, value)) - elif type(default) == int: - exec('self.%s = int(%s)' % (variable, value)) - elif type(default) == float: - exec('self.%s = float(%s)' % (variable, value)) - elif type(default) == bool: - exec('self.%s = bool(%s)' % (variable, value)) - elif type(default) == list: - exec('self.%s = list(%s)' % (variable, value)) - elif type(default) == dict: - exec('self.%s = dict(%s)' % (variable, value)) - else: - logger.critical( - 'Variable %s has default value of unsupported type %s', variable, type(default)) - raise Exception( - 'Internal error in configuration settings: unsupported default type') - - logger.info('%20s has %7s value %s' % - (section+":"+option, user_or_default, value)) - - self.combined_feature_name = '' - for feature_name in self.output_features: - self.combined_feature_name += '_' - self.combined_feature_name += feature_name - - self.combined_model_name = self.model_type - for hidden_type in self.hidden_layer_type: - self.combined_model_name += '_' + hidden_type - - self.combined_model_name += '_' + self.output_layer_type - - def complete_configuration(self): - # to be called after reading any user-specific settings - # because the values set here depend on those user-specific settings - - # get a logger - logger = logging.getLogger("configuration") - - # tools - self.SPTK = { - 'X2X': os.path.join(self.sptk_bindir, 'x2x'), - 'MERGE': os.path.join(self.sptk_bindir, 'merge'), - 'BCP': os.path.join(self.sptk_bindir, 'bcp'), - 'MLPG': os.path.join(self.sptk_bindir, 'mlpg'), - 'MGC2SP': os.path.join(self.sptk_bindir, 'mgc2sp'), - 'VSUM': os.path.join(self.sptk_bindir, 'vsum'), - 'VSTAT': os.path.join(self.sptk_bindir, 'vstat'), - 'SOPR': os.path.join(self.sptk_bindir, 'sopr'), - 'VOPR': os.path.join(self.sptk_bindir, 'vopr'), - 'FREQT': os.path.join(self.sptk_bindir, 'freqt'), - 'C2ACR': os.path.join(self.sptk_bindir, 'c2acr'), - 'MC2B': os.path.join(self.sptk_bindir, 'mc2b'), - 'B2MC': os.path.join(self.sptk_bindir, 'b2mc') - } - - self.STRAIGHT = { - 'SYNTHESIS_FFT': os.path.join(self.straight_bindir, 'synthesis_fft'), - 'BNDAP2AP': os.path.join(self.straight_bindir, 'bndap2ap'), - } - - self.WORLD = { - 'SYNTHESIS': os.path.join(self.world_bindir, 'synth'), - 'ANALYSIS': os.path.join(self.world_bindir, 'analysis'), - } - - self.GLOTTHMM = { - 'SYNTHESIS': os.path.join(self.glotthmm_bindir, 'Synthesis'), - 'config_file': os.path.join(self.glotthmm_bindir, 'config_default_48'), - 'config_file_16': os.path.join(self.glotthmm_bindir, 'config_default_16'), - } - - self.GLOTTDNN = { - 'SYNTHESIS': os.path.join(self.glottdnn_bindir, 'Synthesis'), - 'config_file': os.path.join(self.glottdnn_bindir, 'config_default_48'), - 'config_file_16': os.path.join(self.glottdnn_bindir, 'config_default_16'), - } - - self.HMPD = { - 'SYNTHESIS': os.path.join(self.hmpd_bindir, 'synthesis.py'), - } - - # set input extension same as output for voice conversion - if self.VoiceConversion: - self.remove_silence_using_hts_labels = False - self.lab_ext = self.cmp_ext - - # check if any hidden layer is recurrent layer - list_of_RNNs = ['RNN', 'LSTM', 'GRU', - 'BLSTM', 'SLSTM', 'SGRU', 'BSLSTM'] - for hidden_type in self.hidden_layer_type: - if hidden_type in list_of_RNNs: - self.sequential_training = True - break - - # switch to tensorflow - if self.switch_to_tensorflow: - # create directories if not exists - self.model_dir = os.path.join(self.model_dir, "tensorflow") - self.model_dir = os.path.join(self.model_dir, self.model_file_name) - if not os.path.exists(self.model_dir): - os.makedirs(self.model_dir) - - # switch to keras - if self.switch_to_keras: - # create directories if not exists - self.model_dir = os.path.join(self.model_dir, "keras") - if not os.path.exists(self.model_dir): - os.makedirs(self.model_dir) - - # model files - self.json_model_file = os.path.join( - self.model_dir, self.model_file_name+'.json') - self.h5_model_file = os.path.join( - self.model_dir, self.model_file_name+'.h5') - - if self.switch_to_keras and self.switch_to_tensorflow: - logger.critical( - "Please switch to either tensorflow or keras, but not both!!") - sys.exit(1) - - if self.switch_to_keras or self.switch_to_tensorflow: - if not os.path.exists(self.gen_dir): - os.makedirs(self.gen_dir) - - # input-output normalization stat files - self.inp_stats_file = os.path.join(self.stats_dir, "input_%d_%s_%d.norm" % ( - int(self.train_file_number), self.inp_norm, self.inp_dim)) - self.out_stats_file = os.path.join(self.stats_dir, "output_%d_%s_%d.norm" % ( - int(self.train_file_number), self.out_norm, self.out_dim)) - - # define model file name - logger.info('model file: %s' % (self.model_file_name)) - - # predicted features directory - self.pred_feat_dir = os.path.join( - self.gen_dir, self.model_file_name) - if not os.path.exists(self.pred_feat_dir): - os.makedirs(self.pred_feat_dir) - - # string.lower for some architecture values - self.output_layer_type = self.output_layer_type.lower() - self.optimizer = self.optimizer.lower() - self.loss_function = self.loss_function.lower() - for i in range(len(self.hidden_layer_type)): - self.hidden_layer_type[i] = self.hidden_layer_type[i].lower() - - # force optimizer to adam if set to sgd - if self.optimizer == "sgd": - self.optimizer = 'adam' - - # set sequential training True if using LSTMs - if 'lstm' in self.hidden_layer_type: - self.sequential_training = True - - # set default seq length for duration model - if self.DurationModel and self.training_algo == 3 and self.seq_length > 50: - self.seq_length = 20 - - # rnn params - self.rnn_params = {} - self.rnn_params['merge_size'] = self.merge_size - self.rnn_params['seq_length'] = self.seq_length - self.rnn_params['bucket_range'] = self.bucket_range - self.rnn_params['stateful'] = self.stateful - - # RNN params - if self.sequential_training: - # batch training for RNNs - if self.batch_size > 1: - self.rnn_batch_training = True - - # set/limit batch size to 25 - if self.batch_size > 50: - if not self.use_high_batch_size: - logger.info('reducing the batch size from %s to 25' % - (self.batch_size)) - self.batch_size = 25 # num. of sentences in this case - - # dimensions for the output features - # key name must follow the self.in_dimension_dict. - # If do not want to include dynamic feature, just use the same dimension as that self.in_dimension_dict - # if lf0 is one of the acoustic featues, the out_dimension_dict must have an additional 'vuv' key - # a bit confusing - - # need to control the order of the key? - # dimensions for each raw acoustic (output of NN) feature - self.in_dir_dict = {} - self.out_dimension_dict = {} - self.in_dimension_dict = {} - - self.private_hidden_sizes = [] - self.stream_weights = [] - - logger.debug('setting up output features') - self.cmp_dim = 0 - for feature_name in self.output_features: - logger.debug(' %s' % feature_name) - - in_dimension = 0 - out_dimension = 0 - in_directory = '' -# current_stream_hidden_size = 0 -# current_stream_weight = 0.0 -# stream_lr_ratio = 0.0 - if feature_name == 'mgc': - in_dimension = self.mgc_dim - out_dimension = self.dmgc_dim - in_directory = self.in_mgc_dir - -# current_stream_hidden_size = self.stream_mgc_hidden_size -# current_stream_weight = self.stream_weight_mgc - elif feature_name == 'bap': - in_dimension = self.bap_dim - out_dimension = self.dbap_dim - in_directory = self.in_bap_dir - -# current_stream_hidden_size = self.stream_bap_hidden_size -# current_stream_weight = self.stream_weight_bap - elif feature_name == 'lf0': - in_dimension = self.lf0_dim - out_dimension = self.dlf0_dim - in_directory = self.in_lf0_dir - if self.vocoder_type == 'MAGPHASE': - in_directory = self.in_acous_feats_dir - -# current_stream_hidden_size = self.stream_lf0_hidden_size -# current_stream_weight = self.stream_weight_lf0 - elif feature_name == 'vuv': - out_dimension = 1 - -# current_stream_hidden_size = self.stream_vuv_hidden_size -# current_stream_weight = self.stream_weight_vuv - elif feature_name == 'stepw': - in_dimension = self.stepw_dim - out_dimension = self.stepw_dim - in_directory = self.in_stepw_dir - -# current_stream_hidden_size = self.stream_stepw_hidden_size -# current_stream_weight = self.stream_weight_stepw - elif feature_name == 'sp': - in_dimension = self.sp_dim - out_dimension = self.sp_dim - in_directory = self.in_sp_dir - -# current_stream_hidden_size = self.stream_sp_hidden_size -# current_stream_weight = self.stream_weight_sp - - elif feature_name == 'seglf0': - in_dimension = self.seglf0_dim - out_dimension = self.seglf0_dim - in_directory = self.in_seglf0_dir - -# current_stream_hidden_size = self.stream_seglf0_hidden_size -# current_stream_weight = self.stream_weight_seglf0 - - # for GlottHMM (start) - elif feature_name == 'F0': - in_dimension = self.F0_dim - out_dimension = self.dF0_dim - in_directory = self.in_F0_dir - -# current_stream_hidden_size = self.stream_F0_hidden_size -# current_stream_weight = self.stream_weight_F0 - - elif feature_name == 'Gain': - in_dimension = self.Gain_dim - out_dimension = self.dGain_dim - in_directory = self.in_Gain_dir - -# current_stream_hidden_size = self.stream_Gain_hidden_size -# current_stream_weight = self.stream_weight_Gain - - elif feature_name == 'HNR': - in_dimension = self.HNR_dim - out_dimension = self.dHNR_dim - in_directory = self.in_HNR_dir - -# current_stream_hidden_size = self.stream_HNR_hidden_size -# current_stream_weight = self.stream_weight_HNR - - elif feature_name == 'LSF': - in_dimension = self.LSF_dim - out_dimension = self.dLSF_dim - in_directory = self.in_LSF_dir - -# current_stream_hidden_size = self.stream_LSF_hidden_size -# current_stream_weight = self.stream_weight_LSF - - elif feature_name == 'LSFsource': - in_dimension = self.LSFsource_dim - out_dimension = self.dLSFsource_dim - in_directory = self.in_LSFsource_dir - -# current_stream_hidden_size = self.stream_LSFsource_hidden_size -# current_stream_weight = self.stream_weight_LSFsource - # for GlottHMM (end) - - # for GlottDNN (start) - elif feature_name == 'f0': - in_dimension = self.f0_dim - out_dimension = self.df0_dim - in_directory = self.in_f0_dir - - elif feature_name == 'gain': - in_dimension = self.gain_dim - out_dimension = self.dgain_dim - in_directory = self.in_gain_dir - - elif feature_name == 'hnr': - in_dimension = self.hnr_dim - out_dimension = self.dhnr_dim - in_directory = self.in_hnr_dir - - elif feature_name == 'lsf': - in_dimension = self.lsf_dim - out_dimension = self.dlsf_dim - in_directory = self.in_lsf_dir - - elif feature_name == 'slsf': - in_dimension = self.slsf_dim - out_dimension = self.dslsf_dim - in_directory = self.in_slsf_dir - # for GlottDNN (end) - - # for HMPD (start) - elif feature_name == 'pdd': - in_dimension = self.pdd_dim - out_dimension = self.dpdd_dim - in_directory = self.in_pdd_dir - # for HMPD (end) - - # For MagPhase Vocoder (start): - # Note: 'lf0' is set before. See above. - elif feature_name == 'mag': - in_dimension = self.mag_dim - out_dimension = self.dmag_dim - in_directory = self.in_acous_feats_dir - - elif feature_name == 'real': - in_dimension = self.real_dim - out_dimension = self.dreal_dim - in_directory = self.in_acous_feats_dir - - elif feature_name == 'imag': - in_dimension = self.imag_dim - out_dimension = self.dimag_dim - in_directory = self.in_acous_feats_dir - # For MagPhase Vocoder (end) - - # for joint dur (start) - elif feature_name == 'dur': - in_dimension = self.dur_dim - out_dimension = self.dur_dim - in_directory = self.in_dur_dir - -# current_stream_hidden_size = self.stream_dur_hidden_size -# current_stream_weight = self.stream_weight_dur - # for joint dur (end) - - else: - logger.critical( - '%s feature is not supported right now. Please change the configuration.py to support it' % (feature_name)) - raise - - logger.info(' in_dimension: %d' % in_dimension) - logger.info(' out_dimension : %d' % out_dimension) - logger.info(' in_directory : %s' % in_directory) -# logger.info(' current_stream_hidden_size: %d' % current_stream_hidden_size) -# logger.info(' current_stream_weight: %d' % current_stream_weight) - - if in_dimension > 0: - self.in_dimension_dict[feature_name] = in_dimension - if in_directory == '': - logger.critical( - 'please provide the path for %s feature' % (feature_name)) - raise - if out_dimension < in_dimension: - logger.critical( - 'the dimensionality setting for %s feature is not correct!' % (feature_name)) - raise - - self.in_dir_dict[feature_name] = in_directory - - if out_dimension > 0: - self.out_dimension_dict[feature_name] = out_dimension - -# if (current_stream_hidden_size <= 0 or current_stream_weight <= 0.0) and self.multistream_switch: -# logger.critical('the hidden layer size or stream weight is not corrected setted for %s feature' %(feature_name)) -# raise - -# if self.multistream_switch: -# self.private_hidden_sizes.append(current_stream_hidden_size) -# self.stream_weights.append(current_stream_weight) - - self.cmp_dim += out_dimension - - -# if not self.multistream_switch: -# self.private_hidden_sizes = [] -# if self.stream_cmp_hidden_size > 0: -# self.private_hidden_sizes.append(self.stream_cmp_hidden_size) -# else: -# self.private_hidden_sizes.append(self.hidden_layer_size[-1]) ## use the same number of hidden layers if multi-stream is not supported -# self.stream_weights = [] -# self.stream_weights.append(1.0) - - self.stream_lr_weights = [] - - self.multistream_outs = [] - if self.multistream_switch: - for feature_name in list(self.out_dimension_dict.keys()): - self.multistream_outs.append( - self.out_dimension_dict[feature_name]) - -# stream_lr_ratio = 0.5 -# if feature_name == 'lf0': -# stream_lr_ratio = self.stream_lf0_lr -# if feature_name == 'vuv': -# stream_lr_ratio = self.stream_vuv_lr -# self.stream_lr_weights.append(stream_lr_ratio) - else: - # the new cmp is not the one for HTS, it includes all the features, such as that for main tasks and that for additional tasks - self.multistream_outs.append(self.cmp_dim) -# self.stream_lr_weights.append(0.5) - - logger.info('multistream dimensions: %s' % (self.multistream_outs)) - - # to check whether all the input and output features' file extensions are here - self.file_extension_dict = {} - self.file_extension_dict['mgc'] = self.mgc_ext - self.file_extension_dict['lf0'] = self.lf0_ext - self.file_extension_dict['bap'] = self.bap_ext - self.file_extension_dict['stepw'] = self.stepw_ext - self.file_extension_dict['cmp'] = self.cmp_ext - self.file_extension_dict['seglf0'] = self.lf0_ext - - # gHMM: - self.file_extension_dict['F0'] = self.F0_ext - self.file_extension_dict['Gain'] = self.Gain_ext - self.file_extension_dict['HNR'] = self.HNR_ext - self.file_extension_dict['LSF'] = self.LSF_ext - self.file_extension_dict['LSFsource'] = self.LSFsource_ext - - # gDNN - self.file_extension_dict['f0'] = self.f0_ext - self.file_extension_dict['gain'] = self.gain_ext - self.file_extension_dict['hnr'] = self.hnr_ext - self.file_extension_dict['lsf'] = self.lsf_ext - self.file_extension_dict['slsf'] = self.slsf_ext - - # HMPD - self.file_extension_dict['pdd'] = self.pdd_ext - - # For MagPhase Vocoder: - # Note: 'lf0' is set before. See above. - self.file_extension_dict['mag'] = self.mag_ext - self.file_extension_dict['real'] = self.real_ext - self.file_extension_dict['imag'] = self.imag_ext - - # joint dur - self.file_extension_dict['dur'] = self.dur_ext - - # hyper parameters for DNN. need to be setted by the user, as they depend on the architecture - self.hyper_params = {'learning_rate': '0.0002', - 'l2_reg': '0.00001', - 'l1_reg': '0.0', - 'batch_size': '16', - 'training_epochs': '25', - 'early_stop_epochs': '5', - 'hidden_activation': 'tanh', - 'output_activation': 'linear', - 'do_pretraining': False, - 'pretraining_epochs': '10', - 'pretraining_lr': '0.0001'} - - self.hyper_params['warmup_momentum'] = self.warmup_momentum - self.hyper_params['momentum'] = self.momentum - self.hyper_params['warmup_epoch'] = self.warmup_epoch - - self.hyper_params['learning_rate'] = self.learning_rate - self.hyper_params['l2_reg'] = self.l2_reg - self.hyper_params['l1_reg'] = self.l1_reg - self.hyper_params['batch_size'] = self.batch_size - self.hyper_params['training_epochs'] = self.training_epochs - self.hyper_params['hidden_activation'] = self.hidden_activation - self.hyper_params['output_activation'] = self.output_activation - self.hyper_params['hidden_layer_size'] = self.hidden_layer_size - self.hyper_params['warmup_epoch'] = self.warmup_epoch - self.hyper_params['use_rprop'] = self.use_rprop - - self.hyper_params['model_type'] = self.model_type - self.hyper_params['hidden_layer_type'] = self.hidden_layer_type - - self.hyper_params['index_to_project'] = self.index_to_project - self.hyper_params['projection_insize'] = self.projection_insize - self.hyper_params['projection_outsize'] = self.projection_outsize - self.hyper_params['initial_projection_distrib'] = self.initial_projection_distrib - self.hyper_params['layers_with_projection_input'] = self.layers_with_projection_input - self.hyper_params['projection_learning_rate_scaling'] = self.projection_learning_rate_scaling - - self.hyper_params['sequential_training'] = self.sequential_training - self.hyper_params['dropout_rate'] = self.dropout_rate - - # To be recorded in the logging file for reference - for param_name in list(self.hyper_params.keys()): - logger.info('%s : %s' % - (param_name, str(self.hyper_params[param_name]))) - - # input files - - # set up the label processing - # currently must be one of two styles - if self.label_style == 'HTS': - # xpath_file_name is now obsolete - to remove - self.xpath_file_name = None - elif self.label_style == 'HTS_duration': - self.xpath_file_name = None - - elif self.label_style == 'composed': - self.question_file_name = None - - else: - logger.critical( - 'unsupported label style requested: %s' % self.label_style) - raise Exception - - def logging_configuration(self): - - # get a logger - logger = logging.getLogger("configuration") - - # logging configuration, see here for format description - # https://docs.python.org/2/library/logging.config.html#logging-config-fileformat - - # what we really want to do is this dicitonary-based configuration, but it's only available from Python 2.7 onwards - # logging.config.dictConfig(cfg.logging_configuration) - # so we will settle for this file-based configuration procedure instead - - try: - # open the logging configuration file - fp = open(self.log_config_file, 'r') - logger.debug("loading logging configuration from %s" % - self.log_config_file) - # load the logging configuration file into a string - config_string = fp.read() - fp.close() - - except ValueError: - # this means that cfg.log_config_file does not exist and that no default was provided - # NOTE: currently this will never run - logging.warn( - 'no logging configuration file provided - using default (console only, DEBUG level)') - - # set up a default level and default handlers - # first, get the root logger - all other loggers will inherit its configuration - rootogger = logging.getLogger("") - # default logging level is DEBUG (a highly-verbose level) - rootlogger.setLevel(logging.DEBUG) - # add a handler to write to console - ch = logging.StreamHandler() - rootlogger.addHandler(ch) - # and a formatter - formatter = logging.Formatter( - '%(asctime)s %(levelname)8s%(name)15s: %(message)s') - ch.setFormatter(formatter) - - except IOError: - # this means that open(...) threw an error - logger.critical( - 'could not load logging configuration file %s' % self.log_config_file) - raise - - else: - - # inject the config lines for the file handler, now that we know the name of the file it will write to - - if not os.path.exists(self.log_path): - os.makedirs(self.log_path, 0o755) - log_file_name = '%s_%s.log' % ( - self.model_file_name, datetime.datetime.now().strftime("%I_%M%p_%B_%d_%Y")) - - self.log_file = os.path.join(self.log_path, log_file_name) - - to_inject = """ - [handler_file] - class=FileHandler - formatter=file - args=('"""+self.log_file+"""', 'w') - """ - - # config file format doesn't allow leading white space on lines, so remove it with dedent - config_string = config_string + textwrap.dedent(to_inject) - - try: - # pass that string as a filehandle - if sys.version_info.major < 3: - config_string = unicode(config_string, "utf-8") - fh = io.StringIO(config_string) - logging.config.fileConfig(fh) - fh.close() - logger.info("logging is now fully configured") - - except IOError: - logger.critical( - 'could not configure logging: perhaps log file path is wrong?') - sys.exit(1) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -# configuration for the input labels (features) for the DNN -# -# this currently supports -# * input labels can be any combination of HTS and XML style input labels -# * output features are numerical *only* (all strings are fully expanded into 1-of-n encodings, etc) -# -# -# -# this is all executable python code -# so we need to define things before using them -# that means the description is bottom-up - -import imp -import logging -logger = logging.getLogger("labels") - -# we need to specify how any non-numerical (e.g., unicode string) features will be converted (mapped) into numerical feature vectors -# (just some examples for now) -maps = { - - 'cplace_to_binary': { - '_UNSEEN_': [0, 0, 0, 0, 0, 0, 0], - 'NA': [0, 0, 0, 0, 0, 0, 0], - '_NA_': [0, 0, 0, 0, 0, 0, 0], - 'alveolar': [1, 0, 0, 0, 0, 0, 0], - 'dental': [0, 1, 0, 0, 0, 0, 0], - 'glottal': [0, 0, 1, 0, 0, 0, 0], - 'labial': [0, 0, 0, 1, 0, 0, 0], - 'labiodental': [0, 0, 0, 0, 1, 0, 0], - 'palatal': [0, 0, 0, 0, 0, 1, 0], - 'velar': [0, 0, 0, 0, 0, 0, 1] - }, - - 'cmanner_to_binary': { - '_UNSEEN_': [0, 0, 0, 0, 0, 0], - 'NA': [0, 0, 0, 0, 0, 0], - '_NA_': [0, 0, 0, 0, 0, 0], - 'affricate': [1, 0, 0, 0, 0, 0], - 'approximant': [0, 1, 0, 0, 0, 0], - 'fricative': [0, 0, 1, 0, 0, 0], - 'liquid': [0, 0, 0, 1, 0, 0], - 'nasal': [0, 0, 0, 0, 1, 0], - 'stop': [0, 0, 0, 0, 0, 1] - }, - - 'cvoiced_to_binary': { - '_UNSEEN_': [0, 0], - 'NA': [0, 0], - '_NA_': [0, 0], - 'yes': [1, 0], - 'no': [0, 1] - }, - - 'vfront_to_binary': { - '_UNSEEN_': [0, 0, 0], - 'NA': [0, 0, 0], - '_NA_': [0, 0, 0], - 'back': [1, 0, 0], - 'mid': [0, 1, 0], - 'front': [0, 0, 1] - }, - - 'vheight_to_binary': { - '_UNSEEN_': [0, 0, 0], - 'NA': [0, 0, 0], - '_NA_': [0, 0, 0], - 'high': [1, 0, 0], - 'mid': [0, 1, 0], - 'low': [0, 0, 1] - }, - - 'vlength_to_binary': { - '_UNSEEN_': [0, 0, 0, 0], - 'NA': [0, 0, 0, 0], - '_NA_': [0, 0, 0, 0], - 'diphthong': [1, 0, 0, 0], - 'long': [0, 1, 0, 0], - 'schwa': [0, 0, 1, 0], - 'short': [0, 0, 0, 1] - }, - - 'vround_to_binary': { - '_UNSEEN_': [0, 0], - 'NA': [0, 0], - '_NA_': [0, 0], - 'yes': [1, 0], - 'no': [0, 1] - }, - - 'vowel_cons_to_binary': { - '_UNSEEN_': [0, 0], - 'NA': [0, 0], - '_NA_': [0, 0], - 'vowel': [1, 0], - 'cons': [0, 1] - } -} - -# read additional maps from external files and add them to the 'maps' dictionary -# each such file must define a dictionary of dictionaries called maps, in the same format as above -# TO DO - avoid full paths here - import them from the main config file -external_map_files = [ - '/Users/simonk/data/dnn_tts/data/ossian/maps/segment_map.py'] - -for fname in external_map_files: - # not sure this will work second time around - may not be able to import under the same module name ?? - external_maps = imp.load_source('external_maps', fname) - for k, v in external_maps.maps.items(): - if k in maps: - logger.warning( - 'Redefined map %s and over-wrote the previous map with the same name' % k) - maps[k] = v - -# how to extract features -# (just a few examples for now) -# -# each feature is a dictionary with various possible entries: -# xpath: an XPATH that will extract the required feature from a segment target node of an Ossian XML utterance tree -# hts: a (list of) HTS pseudo regular expression(s) that match(es) part of an HTS label, resulting in a single boolean feature -# mapper: an optional function or dictionary which converts the feature value (e.g., a string) to a (vector of) numerical value(s) -# -# the dictionary describes how to compute that feature -# first, either xpath or hts describes how to extract the feature from a tree or label name -# then, an optional mapping converts the feature via a lookup table (also a dictionary) into a numerical value or vector -# -# if no mapper is provided, then the feature must already be a single numerical or boolean value -# -# some XPATH-based features - -# in a future version, we could be more fleixble and allow more than one target_node type at once, -# with a set of XPATHs for each target_node - it would not be very hard to modify the code to do this - -# the target nodes within the XML trees that the XPATH expressions apply to -target_nodes = "//segment" -# target_nodes = "//state" ??? - -# - - -# and the XPATH expressions to apply - -ll_segment = { - 'xpath': 'preceding::segment[2]/attribute::pronunciation', 'mapper': maps['segment_to_binary']} -l_segment = { - 'xpath': 'preceding::segment[1]/attribute::pronunciation', 'mapper': maps['segment_to_binary']} -c_segment = {'xpath': './attribute::pronunciation', - 'mapper': maps['segment_to_binary']} -r_segment = { - 'xpath': 'following::segment[1]/attribute::pronunciation', 'mapper': maps['segment_to_binary']} -rr_segment = { - 'xpath': 'following::segment[2]/attribute::pronunciation', 'mapper': maps['segment_to_binary']} - -cmanner = {'xpath': './attribute::cmanner', - 'mapper': maps['cmanner_to_binary']} -cplace = {'xpath': './attribute::cplace', - 'mapper': maps['cplace_to_binary']} -cvoiced = {'xpath': './attribute::cvoiced', - 'mapper': maps['cvoiced_to_binary']} - -vfront = {'xpath': './attribute::vfront', - 'mapper': maps['vfront_to_binary']} -vheight = {'xpath': './attribute::vheight', - 'mapper': maps['vheight_to_binary']} -vlength = {'xpath': './attribute::vlength', - 'mapper': maps['vlength_to_binary']} -vround = {'xpath': './attribute::vround', - 'mapper': maps['vround_to_binary']} - -vowel_cons = {'xpath': './@vowel_cons', - 'mapper': maps['vowel_cons_to_binary']} - - -# a composite "vector" of XPATH features -# this is just an ordered list of features, each of which is a dictionary describing how to compute this feature -# each feature may be a single numerical value or a vector of numerical values -xpath_labels = [ - - ll_segment, - l_segment, - c_segment, - r_segment, - rr_segment, - - cmanner, - cplace, - cvoiced, - - vfront, - vheight, - vlength, - vround, - - vowel_cons -] - - -# some HTS pseudo regular expression-based features -# all of these evaluate to a single boolean value, which will be eventually represented numerically -# note: names of features will need modifying to valid Python variable names (cannot contain "-", for example) -C_Dental_Fricative = {'hts': '{*-T+*,*-D+*}'} -C_Rounded_End = { - 'hts': '{*-9^+*,*-aU+*,*-o^+*,*-Or+*,*-QO+*,*-Q+*,*-@Ur+*,*-@U+*,*-O+*,*-u+*,*-U+*}'} -C_OI = {'hts': '{*-OI+*}'} - -# a composite "vector" of HTS features -hts_labels = [C_Dental_Fricative, C_Rounded_End, C_OI] - - -# the full feature vector -labels = xpath_labels # + hts_labels -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -# configuration for the input labels (features) for the DNN -# -# this currently supports -# * input labels can be any combination of HTS and XML style input labels -# * output features are numerical *only* (all strings are fully expanded into 1-of-n encodings, etc) -# -# -# -# this is all executable python code -# so we need to define things before using them -# that means the description is bottom-up - -import imp -import numpy - -import logging -logger = logging.getLogger("labels") - -# we need to specify how any non-numerical (e.g., unicode string) features will be converted (mapped) into numerical feature vectors -# (just some examples for now) - - -maps = { - - 'cplace_to_binary': { - '_UNSEEN_': [0, 0, 0, 0, 0, 0, 0], - 'NA': [0, 0, 0, 0, 0, 0, 0], - '_NA_': [0, 0, 0, 0, 0, 0, 0], - 'alveolar': [1, 0, 0, 0, 0, 0, 0], - 'dental': [0, 1, 0, 0, 0, 0, 0], - 'glottal': [0, 0, 1, 0, 0, 0, 0], - 'labial': [0, 0, 0, 1, 0, 0, 0], - 'labiodental': [0, 0, 0, 0, 1, 0, 0], - 'palatal': [0, 0, 0, 0, 0, 1, 0], - 'velar': [0, 0, 0, 0, 0, 0, 1] - }, - - 'cmanner_to_binary': { - '_UNSEEN_': [0, 0, 0, 0, 0, 0], - 'NA': [0, 0, 0, 0, 0, 0], - '_NA_': [0, 0, 0, 0, 0, 0], - 'affricate': [1, 0, 0, 0, 0, 0], - 'approximant': [0, 1, 0, 0, 0, 0], - 'fricative': [0, 0, 1, 0, 0, 0], - 'liquid': [0, 0, 0, 1, 0, 0], - 'nasal': [0, 0, 0, 0, 1, 0], - 'stop': [0, 0, 0, 0, 0, 1] - }, - - 'cvoiced_to_binary': { - '_UNSEEN_': [0, 0], - 'NA': [0, 0], - '_NA_': [0, 0], - 'yes': [1, 0], - 'no': [0, 1] - }, - - - 'vfront_to_binary': { - '_UNSEEN_': [0, 0, 0], - 'NA': [0, 0, 0], - '_NA_': [0, 0, 0], - 'back': [1, 0, 0], - 'mid': [0, 1, 0], - 'front': [0, 0, 1] - }, - - 'vheight_to_binary': { - '_UNSEEN_': [0, 0, 0], - 'NA': [0, 0, 0], - '_NA_': [0, 0, 0], - 'high': [1, 0, 0], - 'mid': [0, 1, 0], - 'low': [0, 0, 1] - }, - - 'vlength_to_binary': { - '_UNSEEN_': [0, 0, 0, 0], - 'NA': [0, 0, 0, 0], - '_NA_': [0, 0, 0, 0], - 'diphthong': [1, 0, 0, 0], - 'long': [0, 1, 0, 0], - 'schwa': [0, 0, 1, 0], - 'short': [0, 0, 0, 1] - }, - - 'vround_to_binary': { - '_UNSEEN_': [0, 0], - 'NA': [0, 0], - '_NA_': [0, 0], - 'yes': [1, 0], - 'no': [0, 1] - }, - - 'vowel_cons_to_binary': { - '_UNSEEN_': [0, 0], - 'NA': [0, 0], - '_NA_': [0, 0], - 'vowel': [1, 0], - 'cons': [0, 1] - } -} - - -# osw -- also make some maps automatically, only specifying list of values for brevity: - -def make_1_of_k_map(values): - # strip special null values: - nulls = ['_UNSEEN_', 'NA', '_NA_'] - values = [val for val in values if val not in nulls] - map = {} - for (i, value) in enumerate(values): - vector = numpy.zeros(len(values)) - vector[i] = 1 - map[value] = vector.tolist() - for value in nulls: - map[value] = numpy.zeros(len(values)).tolist() - return map - - -phone_names = ['@', '@@', '@U', 'A', 'D', 'E', 'E@', 'I', 'I@', 'N', 'O', 'OI', 'Q', 'S', 'T', 'U', 'U@', 'V', 'Z', 'a', 'aI', 'aU', 'b', - 'd', 'dZ', 'eI', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'l!', 'lw', 'm', 'm!', 'n', 'n!', 'p', 'r', 's', 'sil', 't', 'tS', 'u', 'v', 'w', 'z'] - -fine_POS_inventory = ['_COMMA_', '_FULLSTOP_', '_SPACE_', 'cc', 'cd', 'dt', 'dt_VERTICALLINE_vbz', 'ex', 'ex_VERTICALLINE_vbz', 'in', 'jj', 'jjr', 'jjs', 'md', 'md_VERTICALLINE_rb', 'nn', 'nn_VERTICALLINE_pos', 'nnp', 'nnp_VERTICALLINE_pos', 'nnps', 'nns', 'pdt', 'prp', 'prp_DOLLARSIGN_', - 'prp_VERTICALLINE_md', 'prp_VERTICALLINE_vbp', 'prp_VERTICALLINE_vbz', 'rb', 'rbr', 'rbs', 'rp', 'to', 'vb', 'vb_VERTICALLINE_pos', 'vb_VERTICALLINE_prp', 'vbd', 'vbd_VERTICALLINE_rb', 'vbg', 'vbn', 'vbp', 'vbp_VERTICALLINE_rb', 'vbz', 'vbz_VERTICALLINE_rb', 'wdt', 'wp', 'wp_VERTICALLINE_vbz', 'wrb'] - -coarse_POS_inventory = ['adj', 'adv', - 'function', 'noun', 'punc', 'space', 'verb'] - -stress_inventory = ['stress_0', 'stress_1', 'stress_2'] - -maps['phone_to_binary'] = make_1_of_k_map(phone_names) -maps['fine_POS_to_binary'] = make_1_of_k_map(fine_POS_inventory) -maps['coarse_POS_to_binary'] = make_1_of_k_map(coarse_POS_inventory) -maps['stress_to_binary'] = make_1_of_k_map(stress_inventory) - - -# read additional maps from external files and add them to the 'maps' dictionary -# each such file must define a dictionary of dictionaries called maps, in the same format as above -# TO DO - avoid full paths here - import them from the main config file -external_map_files = [] - -for fname in external_map_files: - # not sure this will work second time around - may not be able to import under the same module name ?? - external_maps = imp.load_source('external_maps', fname) - for k, v in external_maps.maps.items(): - if k in maps: - logger.warning( - 'Redefined map %s and over-wrote the previous map with the same name' % k) - maps[k] = v - -# how to extract features -# (just a few examples for now) -# -# each feature is a dictionary with various possible entries: -# xpath: an XPATH that will extract the required feature from a segment target node of an Ossian XML utterance tree -# hts: a (list of) HTS pseudo regular expression(s) that match(es) part of an HTS label, resulting in a single boolean feature -# mapper: an optional function or dictionary which converts the feature value (e.g., a string) to a (vector of) numerical value(s) -# -# the dictionary describes how to compute that feature -# first, either xpath or hts describes how to extract the feature from a tree or label name -# then, an optional mapping converts the feature via a lookup table (also a dictionary) into a numerical value or vector -# -# if no mapper is provided, then the feature must already be a single numerical or boolean value -# -# some XPATH-based features - -# in a future version, we could be more fleixble and allow more than one target_node type at once, -# with a set of XPATHs for each target_node - it would not be very hard to modify the code to do this - -# the target nodes within the XML trees that the XPATH expressions apply to -target_nodes = "//state" - - -# and the XPATH expressions to apply - -xpath_labels = [] - -# NB: first feature is for silence trimming only: -xpath_labels.append( - {'xpath': "./ancestor::segment/attribute::pronunciation = 'sil'"}) - -for xpath in [ - - "./ancestor::segment/preceding::segment[2]/attribute::pronunciation", - "./ancestor::segment/preceding::segment[1]/attribute::pronunciation", - "./ancestor::segment/attribute::pronunciation", - "./ancestor::segment/following::segment[1]/attribute::pronunciation", - "./ancestor::segment/following::segment[2]/attribute::pronunciation"]: - - xpath_labels.append({'xpath': xpath, 'mapper': maps['phone_to_binary']}) - - -for xpath in [ - - "./ancestor::segment/preceding::segment[2]/attribute::vowel_cons", - "./ancestor::segment/preceding::segment[2]/attribute::vfront", - "./ancestor::segment/preceding::segment[2]/attribute::vheight", - "./ancestor::segment/preceding::segment[2]/attribute::vlength", - "./ancestor::segment/preceding::segment[2]/attribute::vround", - "./ancestor::segment/preceding::segment[2]/attribute::cmanner", - "./ancestor::segment/preceding::segment[2]/attribute::cplace", - "./ancestor::segment/preceding::segment[2]/attribute::cvoiced", - - "./ancestor::segment/preceding::segment[1]/attribute::vowel_cons", - "./ancestor::segment/preceding::segment[1]/attribute::vfront", - "./ancestor::segment/preceding::segment[1]/attribute::vheight", - "./ancestor::segment/preceding::segment[1]/attribute::vlength", - "./ancestor::segment/preceding::segment[1]/attribute::vround", - "./ancestor::segment/preceding::segment[1]/attribute::cmanner", - "./ancestor::segment/preceding::segment[1]/attribute::cplace", - "./ancestor::segment/preceding::segment[1]/attribute::cvoiced", - - "./ancestor::segment/attribute::vowel_cons", - "./ancestor::segment/attribute::vfront", - "./ancestor::segment/attribute::vheight", - "./ancestor::segment/attribute::vlength", - "./ancestor::segment/attribute::vround", - "./ancestor::segment/attribute::cmanner", - "./ancestor::segment/attribute::cplace", - "./ancestor::segment/attribute::cvoiced", - - "./ancestor::segment/following::segment[1]/attribute::vowel_cons", - "./ancestor::segment/following::segment[1]/attribute::vfront", - "./ancestor::segment/following::segment[1]/attribute::vheight", - "./ancestor::segment/following::segment[1]/attribute::vlength", - "./ancestor::segment/following::segment[1]/attribute::vround", - "./ancestor::segment/following::segment[1]/attribute::cmanner", - "./ancestor::segment/following::segment[1]/attribute::cplace", - "./ancestor::segment/following::segment[1]/attribute::cvoiced", - - "./ancestor::segment/following::segment[2]/attribute::vowel_cons", - "./ancestor::segment/following::segment[2]/attribute::vfront", - "./ancestor::segment/following::segment[2]/attribute::vheight", - "./ancestor::segment/following::segment[2]/attribute::vlength", - "./ancestor::segment/following::segment[2]/attribute::vround", - "./ancestor::segment/following::segment[2]/attribute::cmanner", - "./ancestor::segment/following::segment[2]/attribute::cplace", - "./ancestor::segment/following::segment[2]/attribute::cvoiced"]: - - feature = xpath.split(':')[-1] - xpath_labels.append( - {'xpath': xpath, 'mapper': maps[feature + '_to_binary']}) - - -# syll stress -for xpath in [ - "ancestor::syllable/preceding::syllable[1]/attribute::stress", - "ancestor::syllable/attribute::stress", - "ancestor::syllable/following::syllable[1]/attribute::stress"]: - xpath_labels.append({'xpath': xpath, 'mapper': maps['stress_to_binary']}) - - -# fine & coarse POS -- 3 word window -for xpath in [ - "ancestor::token/preceding::token[@token_class='word'][1]/attribute::safe_pos", - "ancestor::token/attribute::safe_pos", - "ancestor::token/following::token[@token_class='word'][1]/attribute::safe_pos"]: - xpath_labels.append({'xpath': xpath, 'mapper': maps['fine_POS_to_binary']}) - -for xpath in [ - "ancestor::token/preceding::token[@token_class='word'][1]/attribute::coarse_pos", - "ancestor::token/attribute::coarse_pos", - "ancestor::token/following::token[@token_class='word'][1]/attribute::coarse_pos"]: - xpath_labels.append( - {'xpath': xpath, 'mapper': maps['coarse_POS_to_binary']}) - - -# === SIZES and DISTANCES till start/end -- these are numeric and not mapped: - -for xpath in [ - - # state in segment -- number states is fixed, so exclude size and only count in 1 direction - "count(./preceding-sibling::state)", - - ## segments in syll - "count(ancestor::syllable/preceding::syllable[1]/descendant::segment)", - "count(ancestor::syllable/descendant::segment)", - "count(ancestor::syllable/following::syllable[1]/descendant::segment)", - "count(./ancestor::segment/preceding-sibling::segment)", - "count(./ancestor::segment/following-sibling::segment)", - - ## segments in word - "count(ancestor::token/preceding::token[@token_class='word'][1]/descendant::segment)", - "count(ancestor::token/descendant::segment)", - "count(ancestor::token/following::token[@token_class='word'][1]/descendant::segment)", - "count(./ancestor::syllable/preceding-sibling::syllable/descendant::segment)", - "count(./ancestor::syllable/following-sibling::syllable/descendant::segment)", - - ## syll in word - "count(ancestor::token/preceding::token[@token_class='word'][1]/descendant::syllable)", - "count(ancestor::token/descendant::syllable)", - "count(ancestor::token/following::token[@token_class='word'][1]/descendant::syllable)", - "count(./ancestor::syllable/preceding-sibling::syllable)", - "count(./ancestor::syllable/following-sibling::syllable)", - - ## word in phrase - "count(ancestor::phrase/preceding::phrase[1]/descendant::token[@token_class='word'])", - "count(ancestor::phrase/descendant::token[@token_class='word'])", - "count(ancestor::phrase/following::phrase[1]/descendant::token[@token_class='word'])", - "count(ancestor::token/preceding-sibling::token[@token_class='word'])", - "count(ancestor::token/following-sibling::token[@token_class='word'])", - - ## syll in phrase - "count(ancestor::phrase/preceding::phrase[1]/descendant::syllable)", - "count(ancestor::phrase/descendant::syllable)", - "count(ancestor::phrase/following::phrase[1]/descendant::syllable)", - "count(ancestor::token/preceding-sibling::token/descendant::syllable)", - "count(ancestor::token/following-sibling::token/descendant::syllable)", - - ## segment in phrase - "count(ancestor::phrase/preceding::phrase[1]/descendant::segment)", - "count(ancestor::phrase/descendant::segment)", - "count(ancestor::phrase/following::phrase[1]/descendant::segment)", - "count(ancestor::token/preceding-sibling::token/descendant::segment)", - "count(ancestor::token/following-sibling::token/descendant::segment)", - - ## X in utterance - "count(preceding::segment)", - "count(preceding::syllable)", - "count(preceding::token[@token_class='word'])", - "count(preceding::phrase)", - - "count(following::segment)", - "count(following::syllable)", - "count(following::token[@token_class='word'])", - "count(following::phrase)", - - "count(ancestor::utt/descendant::segment)", - "count(ancestor::utt/descendant::syllable)", - "count(ancestor::utt/descendant::token[@token_class='word'])", - "count(ancestor::utt/descendant::phrase)" -]: - xpath_labels.append({'xpath': xpath}) - - -# -# # a composite "vector" of XPATH features -# # this is just an ordered list of features, each of which is a dictionary describing how to compute this feature -# # each feature may be a single numerical value or a vector of numerical values -# xpath_labels =[ -# -# # ll_segment, -# # l_segment, -# # c_segment, -# # r_segment, -# # rr_segment, -# -# cmanner, -# cplace, -# cvoiced, -# -# vfront, -# vheight, -# vlength, -# vround, -# -# vowel_cons -# ] -# - -# some HTS pseudo regular expression-based features -# all of these evaluate to a single boolean value, which will be eventually represented numerically -# note: names of features will need modifying to valid Python variable names (cannot contain "-", for example) -C_Dental_Fricative = {'hts': '{*-T+*,*-D+*}'} -C_Rounded_End = { - 'hts': '{*-9^+*,*-aU+*,*-o^+*,*-Or+*,*-QO+*,*-Q+*,*-@Ur+*,*-@U+*,*-O+*,*-u+*,*-U+*}'} -C_OI = {'hts': '{*-OI+*}'} - -# a composite "vector" of HTS features -hts_labels = [C_Dental_Fricative, C_Rounded_End, C_OI] - - -# the full feature vector -labels = xpath_labels # + hts_labels -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import numpy -import logging - - -class AcousticBase(object): - def __init__(self, delta_win=[-0.5, 0.0, 0.5], acc_win=[1.0, -2.0, 1.0]): - - # whether dynamic features are needed for each data stream - self.compute_dynamic = {} - self.file_number = 0 - self.data_stream_number = 0 - self.data_stream_list = [] - - self.out_dimension = 0 - self.record_vuv = False - - self.delta_win = delta_win - self.acc_win = acc_win - - self.logger = logging.getLogger("acoustic_data") - - ''' - in_file_list_dict: if there are multiple acoustic features, - each feature has a key in the dict() and correspond to a list of file paths - out_file_list_dict: merge all the input files - - three types of data: - CMP : the one used for HTS training - DIY : raw data without header, such the data to compose CMP files - CMP_DIY : mix of CMP and DIY data - ''' - - def prepare_nn_data(self, in_file_list_dict, out_file_list, in_dimension_dict, out_dimension_dict): - - self.file_number = len(out_file_list) - - for data_stream_name in list(in_file_list_dict.keys()): - - try: - assert len( - in_file_list_dict[data_stream_name]) == self.file_number - except AssertionError: - self.logger.critical('file number of stream %s is different from others: %d %d' - % (data_stream_name, len(in_file_list_dict[data_stream_name]), self.file_number)) - raise - - try: - assert data_stream_name in in_dimension_dict - except AssertionError: - self.logger.critical( - 'data stream %s is missing in the input dimension dict!' % (data_stream_name)) - raise - - try: - assert data_stream_name in out_dimension_dict - except AssertionError: - self.logger.critical( - 'data stream %s is missing in the output dimension dict!' % (data_stream_name)) - raise - - # we assume static+delta+delta-delta - if out_dimension_dict[data_stream_name] == 3 * in_dimension_dict[data_stream_name]: - self.compute_dynamic[data_stream_name] = True - elif out_dimension_dict[data_stream_name] == in_dimension_dict[data_stream_name]: - self.compute_dynamic[data_stream_name] = False - else: - self.logger.critical('output dimension of stream %s should be equal to or three times of input dimension: %d %d' - % (data_stream_name, out_dimension_dict[data_stream_name], in_dimension_dict[data_stream_name])) - raise - - self.data_stream_list.append(data_stream_name) - - self.data_stream_number = len(self.data_stream_list) - - if 'vuv' in out_dimension_dict: - self.record_vuv = True - - if not ('lf0' in in_dimension_dict or 'F0' in in_dimension_dict): - self.logger.critical( - "if voiced and unvoiced information are to be recorded, the 'lf0' information must be provided") - raise - - for data_stream_name in list(out_dimension_dict.keys()): - self.out_dimension += out_dimension_dict[data_stream_name] - - # merge the data: like the cmp file - self.prepare_data(in_file_list_dict, out_file_list, - in_dimension_dict, out_dimension_dict) - - # the real function to do the work - # need to be implemented for a specific format - def prepare_data(self, in_file_list_dict, out_file_list, in_dimension_dict, out_dimension_dict): - pass - - # interpolate F0, if F0 has already been interpolated, nothing will be changed after passing this function - def interpolate_f0(self, data): - - data = numpy.reshape(data, (data.size, 1)) - - vuv_vector = numpy.zeros((data.size, 1)) - vuv_vector[data > 0.0] = 1.0 - vuv_vector[data <= 0.0] = 0.0 - - ip_data = data - - frame_number = data.size - last_value = 0.0 - for i in range(frame_number): - if data[i] <= 0.0: - j = i+1 - for j in range(i+1, frame_number): - if data[j] > 0.0: - break - if j < frame_number-1: - if last_value > 0.0: - step = (data[j] - data[i-1]) / float(j - i + 1) - for k in range(i, j): - ip_data[k] = data[i-1] + step * (k - i + 1) - else: - for k in range(i, j): - ip_data[k] = data[j] - else: - for k in range(i, frame_number): - ip_data[k] = last_value - else: - ip_data[i] = data[i] - last_value = data[i] - - return ip_data, vuv_vector - -# delta_win = [-0.5, 0.0, 0.5] -# acc_win = [1.0, -2.0, 1.0] - def compute_dynamic_vector(self, vector, dynamic_win, frame_number): - - vector = numpy.reshape(vector, (frame_number, 1)) - - win_length = len(dynamic_win) - win_width = int(win_length/2) - temp_vector = numpy.zeros((frame_number + 2 * win_width, 1)) - delta_vector = numpy.zeros((frame_number, 1)) - - temp_vector[win_width:frame_number+win_width] = vector - for w in range(win_width): - temp_vector[w, 0] = vector[0, 0] - temp_vector[frame_number+win_width + - w, 0] = vector[frame_number-1, 0] - - for i in range(frame_number): - for w in range(win_length): - delta_vector[i] += temp_vector[i+w, 0] * dynamic_win[w] - - return delta_vector - - # compute dynamic features for a data matrix - def compute_dynamic_matrix(self, data_matrix, dynamic_win, frame_number, dimension): - dynamic_matrix = numpy.zeros((frame_number, dimension)) - - # compute dynamic feature dimension by dimension - for dim in range(dimension): - dynamic_matrix[:, dim:dim+1] = self.compute_dynamic_vector( - data_matrix[:, dim], dynamic_win, frame_number) - - return dynamic_matrix -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -from io_funcs.binary_io import BinaryIOCollection -import numpy -import logging -from .acoustic_base import AcousticBase -import os -# io_funcs. - - -class AcousticComposition(AcousticBase): - - # prepare_nn_data(self, in_file_list_dict, out_file_list, in_dimension_dict, out_dimension_dict): - - ''' - variables inheritate from AcousticBase: - self.compute_dynamic = {} - self.file_number = 0 - self.data_stream_number = 0 - self.data_stream_list = [] - - self.out_dimension = 0 - self.record_vuv = False - ''' - - def make_equal_frames(self, in_file_list, ref_file_list, in_dimension_dict): - logger = logging.getLogger("acoustic_comp") - - logger.info('making equal number of lines...') - - io_funcs = BinaryIOCollection() - - utt_number = len(in_file_list) - - for i in range(utt_number): - in_file_name = in_file_list[i] - in_data_stream_name = in_file_name.split('.')[-1] - in_feature_dim = in_dimension_dict[in_data_stream_name] - in_features, in_frame_number = io_funcs.load_binary_file_frame( - in_file_name, in_feature_dim) - - ref_file_name = ref_file_list[i] - ref_data_stream_name = ref_file_name.split('.')[-1] - ref_feature_dim = in_dimension_dict[ref_data_stream_name] - ref_features, ref_frame_number = io_funcs.load_binary_file_frame( - ref_file_name, ref_feature_dim) - - target_features = numpy.zeros((ref_frame_number, in_feature_dim)) - if in_frame_number == ref_frame_number: - continue - elif in_frame_number > ref_frame_number: - target_features[0:ref_frame_number, - ] = in_features[0:ref_frame_number, ] - elif in_frame_number < ref_frame_number: - target_features[0:in_frame_number, - ] = in_features[0:in_frame_number, ] - io_funcs.array_to_binary_file(target_features, in_file_name) - - logger.info('Finished: made equal rows in data stream %s with reference to data stream %s ' % ( - in_data_stream_name, ref_data_stream_name)) - - def prepare_data(self, in_file_list_dict, out_file_list, in_dimension_dict, out_dimension_dict): - - logger = logging.getLogger("acoustic_comp") - - stream_start_index = {} - stream_dim_index = 0 - for stream_name in list(out_dimension_dict.keys()): - if stream_name not in stream_start_index: - stream_start_index[stream_name] = stream_dim_index - - stream_dim_index += out_dimension_dict[stream_name] - - io_funcs = BinaryIOCollection() - - for i in range(self.file_number): - out_file_name = out_file_list[i] - - # if os.path.isfile(out_file_name): - # logger.info('processing file %4d of %4d : %s exists' % (i+1, self.file_number, out_file_name)) - # continue - - logger.info('processing file %4d of %4d : %s' % - (i+1, self.file_number, out_file_name)) - - out_data_matrix = None - out_frame_number = 0 - - for k in range(self.data_stream_number): - data_stream_name = self.data_stream_list[k] - in_file_name = in_file_list_dict[data_stream_name][i] - in_feature_dim = in_dimension_dict[data_stream_name] - features, frame_number = io_funcs.load_binary_file_frame( - in_file_name, in_feature_dim) - - if k == 0: - out_frame_number = frame_number - out_data_matrix = numpy.zeros( - (out_frame_number, self.out_dimension)) - - if frame_number > out_frame_number: - features = features[0:out_frame_number, ] - frame_number = out_frame_number - - try: - assert out_frame_number == frame_number - except AssertionError: - logger.critical('the frame number of data stream %s is not consistent with others: current %d others %d' - % (data_stream_name, out_frame_number, frame_number)) - raise - - dim_index = stream_start_index[data_stream_name] - - if data_stream_name in ['lf0', 'F0']: # F0 added for GlottHMM - features, vuv_vector = self.interpolate_f0(features) - - # if vuv information to be recorded, store it in corresponding column - if self.record_vuv: - out_data_matrix[0:out_frame_number, stream_start_index['vuv'] :stream_start_index['vuv']+1] = vuv_vector - - out_data_matrix[0:out_frame_number, - dim_index:dim_index+in_feature_dim] = features - dim_index = dim_index+in_feature_dim - - if self.compute_dynamic[data_stream_name]: - - delta_features = self.compute_dynamic_matrix( - features, self.delta_win, frame_number, in_feature_dim) - acc_features = self.compute_dynamic_matrix( - features, self.acc_win, frame_number, in_feature_dim) - - out_data_matrix[0:out_frame_number, - dim_index:dim_index+in_feature_dim] = delta_features - dim_index = dim_index+in_feature_dim - - out_data_matrix[0:out_frame_number, - dim_index:dim_index+in_feature_dim] = acc_features - - # write data to file - io_funcs.array_to_binary_file(out_data_matrix, out_file_name) - logger.debug(' wrote %d frames of features', out_frame_number) - - def acoustic_decomposition(self, in_file_list, out_dimension_dict, file_extension_dict): - - stream_start_index = {} - dimension_index = 0 - recorded_vuv = False - vuv_dimension = None - for feature_name in list(out_dimension_dict.keys()): - if feature_name != 'vuv': - stream_start_index[feature_name] = dimension_index - else: - vuv_dimension = dimension_index - recorded_vuv = True - - dimension_index += out_dimension_dict[feature_name] - - for file_name in in_file_list: - dir_name = os.path.dirname(file_name) - file_id = os.path.splitext(os.path.basename(file_name))[0] - - -if __name__ == '__main__': - - acoustic_cmper = AcousticPreparation() - - in_dimension_dict = {'mgc': 50, - 'lf0': 1, - 'bap': 25} - out_dimension_dict = {'mgc': 150, - 'lf0': 3, - 'vuv': 1, - 'bap': 75} - - in_file_list_dict = {} - in_file_list_dict['mgc'] = ['/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/mgc/herald_001.mgc', - '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/mgc/herald_002.mgc'] - in_file_list_dict['lf0'] = ['/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/lf0/herald_001.lf0', - '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/lf0/herald_002.lf0'] - in_file_list_dict['bap'] = ['/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/bap/herald_001.bap', - '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/bap/herald_002.bap'] - - out_file_list = ['/afs/inf.ed.ac.uk/group/project/dnn_tts/herald_001.cmp', - '/afs/inf.ed.ac.uk/group/project/dnn_tts/herald_002.cmp'] - - acoustic_cmper.prepare_nn_data( - in_file_list_dict, out_file_list, in_dimension_dict, out_dimension_dict) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -from io_funcs.htk_io import HTK_Parm_IO -from io_funcs.binary_io import BinaryIOCollection -import numpy -import logging - - -class CMPNormalisation(object): - def __init__(self, mgc_dim=0, bap_dim=0, lf0_dim=0): - self.mgc_dim = mgc_dim * 3 - self.bap_dim = bap_dim * 3 - self.lf0_dim = lf0_dim * 3 - - def load_cmp_file(self, file_name): - - logger = logging.getLogger("acoustic_norm") - - htk_reader = HTK_Parm_IO() - htk_reader.read_htk(file_name) - - cmp_data = htk_reader.data - - mgc_data = cmp_data[:, 0:self.mgc_dim] - - # this only extracts the static lf0 because we need to interpolate it, then add deltas ourselves later - lf0_data = cmp_data[:, self.mgc_dim] - - bap_data = cmp_data[:, self.mgc_dim + - self.lf0_dim:self.mgc_dim+self.lf0_dim+self.bap_dim] - - logger.debug('loaded %s of shape %s' % (file_name, cmp_data.shape)) - logger.debug(' with: %d mgc + %d lf0 + %d bap = %d' % (self.mgc_dim, - self.lf0_dim, self.bap_dim, self.mgc_dim+self.lf0_dim+self.bap_dim)) - - assert((self.mgc_dim+self.lf0_dim+self.bap_dim) == cmp_data.shape[1]) - - return mgc_data, bap_data, lf0_data - - def interpolate_f0(self, data): - - data = numpy.reshape(data, (data.size, 1)) - - vuv_vector = numpy.zeros((data.size, 1)) - vuv_vector[data > 0.0] = 1.0 - vuv_vector[data <= 0.0] = 0.0 - - ip_data = data - - frame_number = data.size - last_value = 0.0 - for i in range(frame_number): - if data[i] <= 0.0: - j = i+1 - for j in range(i+1, frame_number): - if data[j] > 0.0: - break - if j < frame_number-1: - if last_value > 0.0: - step = (data[j] - data[i-1]) / float(j - i) - for k in range(i, j): - ip_data[k] = data[i-1] + step * (k - i + 1) - else: - for k in range(i, j): - ip_data[k] = data[j] - else: - for k in range(i, frame_number): - ip_data[k] = last_value - else: - ip_data[i] = data[i] - last_value = data[i] - - return ip_data, vuv_vector - - def compute_delta(self, vector, delta_win): - # delta_win = [-0.5, 0.0, 0.5] - # acc_win = [1.0, -2.0, 1.0] - - frame_number = vector.size - win_length = len(delta_win) - win_width = int(win_length/2) - temp_vector = numpy.zeros((frame_number + 2 * win_width, 1)) - delta_vector = numpy.zeros((frame_number, 1)) - - temp_vector[win_width:frame_number+win_width, ] = vector - for w in range(win_width): - temp_vector[w, 0] = vector[0, 0] - temp_vector[frame_number+win_width + - w, 0] = vector[frame_number-1, 0] - - for i in range(frame_number): - for w in range(win_length): - delta_vector[i] += temp_vector[i+w, 0] * delta_win[w] - - return delta_vector - - def produce_nn_cmp(self, in_file_list, out_file_list): - - logger = logging.getLogger("acoustic_norm") - - delta_win = [-0.5, 0.0, 0.5] - acc_win = [1.0, -2.0, 1.0] - - file_number = len(in_file_list) - logger.info('starting creation of %d files' % file_number) - - for i in range(file_number): - - mgc_data, bap_data, lf0_data = self.load_cmp_file(in_file_list[i]) - ip_lf0, vuv_vector = self.interpolate_f0(lf0_data) - - delta_lf0 = self.compute_delta(ip_lf0, delta_win) - acc_lf0 = self.compute_delta(ip_lf0, acc_win) - - frame_number = ip_lf0.size - - cmp_data = numpy.concatenate( - (mgc_data, ip_lf0, delta_lf0, acc_lf0, vuv_vector, bap_data), axis=1) - - io_funcs = BinaryIOCollection() - io_funcs.array_to_binary_file(cmp_data, out_file_list[i]) - - logger.info('finished creation of %d binary files' % file_number) - - -if __name__ == '__main__': - in_file_list = ['/group/project/dnn_tts/data/nick/cmp/herald_001.cmp'] - out_file_list = ['/group/project/dnn_tts/herald_001.out.cmp'] - - cmp_norm = CMPNormalisation(mgc_dim=50, bap_dim=25, lf0_dim=1) - - cmp_norm.produce_nn_cmp(in_file_list, out_file_list) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import numpy -from io_funcs.binary_io import BinaryIOCollection - -import logging - - -class FeatureNormBase(object): - ''' - to normalise feature into specific range - to de-normalise feature back - support min-max norm, MVN, - this is a genetic class - ''' - - def __init__(self): - self.logger = logging.getLogger('feature_normalisation') - - self.dimension_dict = {} - self.start_index_dict = {} - self.feature_dimension = 0 - - def feature_normalisation(self): - pass - - def feature_denormalisation(self): - pass - - def normal_standardization(self, in_file_list, out_file_list, feature_dimension): - - # self.dimension_dict = dimension_dict - self.feature_dimension = feature_dimension - - mean_vector = self.compute_mean(in_file_list, 0, feature_dimension) - std_vector = self.compute_std( - in_file_list, mean_vector, 0, feature_dimension) - - io_funcs = BinaryIOCollection() - file_number = len(in_file_list) - - for i in range(file_number): - - features, current_frame_number = io_funcs.load_binary_file_frame( - in_file_list[i], self.feature_dimension) - - mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1)) - std_matrix = numpy.tile(std_vector, (current_frame_number, 1)) - - norm_features = (features - mean_matrix) / std_matrix - - io_funcs.array_to_binary_file(norm_features, out_file_list[i]) - - return mean_vector, std_vector - - def find_min_max_values(self, in_file_list, start_index, end_index): - - local_feature_dimension = end_index - start_index - - file_number = len(in_file_list) - min_value_matrix = numpy.zeros((file_number, local_feature_dimension)) - max_value_matrix = numpy.zeros((file_number, local_feature_dimension)) - io_funcs = BinaryIOCollection() - for i in range(file_number): - features = io_funcs.load_binary_file( - in_file_list[i], self.feature_dimension) - - temp_min = numpy.amin(features[:, start_index:end_index], axis=0) - temp_max = numpy.amax(features[:, start_index:end_index], axis=0) - - min_value_matrix[i, ] = temp_min - max_value_matrix[i, ] = temp_max - - self.min_vector = numpy.amin(min_value_matrix, axis=0) - self.max_vector = numpy.amax(max_value_matrix, axis=0) - self.min_vector = numpy.reshape( - self.min_vector, (1, local_feature_dimension)) - self.max_vector = numpy.reshape( - self.max_vector, (1, local_feature_dimension)) - - # po=numpy.get_printoptions() - # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) - self.logger.info('found min/max values of length %d:' % - local_feature_dimension) - self.logger.info(' min: %s' % self.min_vector) - self.logger.info(' max: %s' % self.max_vector) - # restore the print options - # numpy.set_printoptions(po) - - def compute_mean(self, file_list, start_index, end_index): - - local_feature_dimension = end_index - start_index - - mean_vector = numpy.zeros((1, local_feature_dimension)) - all_frame_number = 0 - - io_funcs = BinaryIOCollection() - for file_name in file_list: - features, current_frame_number = io_funcs.load_binary_file_frame( - file_name, self.feature_dimension) - - mean_vector += numpy.reshape(numpy.sum( - features[:, start_index:end_index], axis=0), (1, local_feature_dimension)) - all_frame_number += current_frame_number - - mean_vector /= float(all_frame_number) - - # po=numpy.get_printoptions() - # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) - self.logger.info('computed mean vector of length %d :' % - mean_vector.shape[1]) - self.logger.info(' mean: %s' % mean_vector) - # restore the print options - # numpy.set_printoptions(po) - - return mean_vector - - def compute_std(self, file_list, mean_vector, start_index, end_index): - local_feature_dimension = end_index - start_index - - std_vector = numpy.zeros((1, self.feature_dimension)) - all_frame_number = 0 - - io_funcs = BinaryIOCollection() - for file_name in file_list: - features, current_frame_number = io_funcs.load_binary_file_frame( - file_name, self.feature_dimension) - - mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1)) - - std_vector += numpy.reshape(numpy.sum( - (features[:, start_index:end_index] - mean_matrix) ** 2, axis=0), (1, local_feature_dimension)) - all_frame_number += current_frame_number - - std_vector /= float(all_frame_number) - - std_vector = std_vector ** 0.5 - - # po=numpy.get_printoptions() - # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) - self.logger.info('computed std vector of length %d' % - std_vector.shape[1]) - self.logger.info(' std: %s' % std_vector) - # restore the print options - # numpy.set_printoptions(po) - - return std_vector -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import logging -import imp -import numpy -from io_funcs.binary_io import BinaryIOCollection - -from frontend.label_normalisation import HTSLabelNormalisation - - -# context-dependent printing format for Numpy - should move this out to a utility file somewhere -import contextlib -@contextlib.contextmanager -def printoptions(*args, **kwargs): - original = numpy.get_printoptions() - numpy.set_printoptions(*args, **kwargs) - yield - numpy.set_printoptions(**original) - - -class LabelComposer(object): - - # a class that can compose input labels according to the user's specification, and convert them to numerical vectors - - def __init__(self): - - self.logger = logging.getLogger("labels") - self.configuration = None - self.label_dimension = None - - # what label styles we find in the feature specification - # e.g., 'xpath' , 'hts' - self.label_styles = {} - - self.use_precompiled_xpaths = False # will be set True if xpaths are compiled - - def load_label_configuration(self, filename): - - # load in a label specification, provided by the user - try: - self.configuration = imp.load_source('label_config', filename) - except IOError: - self.logger.critical( - 'failed to open label configuration file %s' % filename) - raise - except: - self.logger.critical( - 'error loading label configuration from %s' % filename) - raise - - # perform some sanity checks on it - # - # make sure 'labels' is defined - try: - assert self.configuration.labels - except AssertionError: - logger.critical( - 'loaded label configuration file %s, but it did not define "labels" !' % filename) - - def compute_label_dimension(self): - - self.label_dimension = 0 - - try: - assert self.configuration - except AssertionError: - self.logger.critical( - 'no label configuration loaded, so cannot compute dimension') - raise - - for feature_specification in self.configuration.labels: - # osw# self.logger.debug('looking at feature %s' % feature_specification ) - # feature is a dictionary specifying how to construct this part of the input feature vector - if 'xpath' in feature_specification: - # xpath and hts are mutually exclusive label styles - assert 'hts' not in feature_specification - - # if there is a mapper, then we will use that to convert the features to numbers - # we need to look at the mapper to deduce the dimensionality of vectors that it will produce - if 'mapper' in feature_specification: - - # get an arbitrary item as the reference and measure its dimensionality - try: - l = len( - next(iter(feature_specification['mapper'].values()))) - except: - logger.critical('Empty mapper for feature %s' % - feature_specification) - - for k, v in feature_specification['mapper'].items(): - # make sure all other entries have the same dimension - try: - assert len(v) == l - except AssertionError: - logger.critical( - 'Inconsistent dimensionality in mapper for feature %s' % feature_specification) - self.label_dimension = self.label_dimension + l - # print ' add %s cum: %s'%( str(l), self.label_dimension) - - else: - # without a mapper, features will be single numerical values - self.label_dimension = self.label_dimension + 1 - # print ' add 1 cum: %s'%( self.label_dimension) - - # we have seen at least one feature that will required xpath label files to be loaded - self.label_styles['xpath'] = True - - if 'hts' in feature_specification: - assert 'xpath' not in feature_specification - # will become True once implemented - self.label_styles['hts'] = False - # not yet implemented ! - self.logger.warning( - 'HTS features not implemented - ignoring them!') - - # for frame features -- TODO: decide how to handle this properly - self.label_dimension += 1 - # print ' add 3 cum: %s'%( self.label_dimension) - - return self.label_dimension - - -if __name__ == '__main__': - - logger = logging.getLogger("labels") - logger.setLevel(logging.DEBUG) - # a console handler - ch = logging.StreamHandler() - ch.setLevel(logging.DEBUG) - logger.addHandler(ch) - - label_composer = LabelComposer() - label_composer.load_label_configuration( - 'configuration/labelconfigfile.conf') - - print('Loaded configuration, which is:') - print(label_composer.configuration.labels) - - d = label_composer.compute_label_dimension() - print("label dimension will be", d) - - # not written test code for actual label processing - too complex and relies on config files - -import os -import numpy -import re -import sys -from io_funcs.binary_io import BinaryIOCollection - -import logging -# from logplot.logging_plotting import LoggerPlotter #, MultipleTimeSeriesPlot, SingleWeightMatrixPlot - - -class HTSLabelModification(object): - """This class is to modify HTS format labels with predicted duration. - - Time alignments are expected in the HTS labels. Here is an example of the HTS labels: - - 3050000 3100000 xx~#-p+l=i:1_4/A/0_0_0/B/1-1-4:1-1&1-4#1-3$1-4>0-1<0-1|i/C/1+1+3/D/0_0/E/content+1:1+3&1+2#0+1/F/content_1/G/0_0/H/4=3:1=1&L-L%/I/0_0/J/4+3-1[2] - - 3100000 3150000 xx~#-p+l=i:1_4/A/0_0_0/B/1-1-4:1-1&1-4#1-3$1-4>0-1<0-1|i/C/1+1+3/D/0_0/E/content+1:1+3&1+2#0+1/F/content_1/G/0_0/H/4=3:1=1&L-L%/I/0_0/J/4+3-1[3] - - 3150000 3250000 xx~#-p+l=i:1_4/A/0_0_0/B/1-1-4:1-1&1-4#1-3$1-4>0-1<0-1|i/C/1+1+3/D/0_0/E/content+1:1+3&1+2#0+1/F/content_1/G/0_0/H/4=3:1=1&L-L%/I/0_0/J/4+3-1[4] - - 3250000 3350000 xx~#-p+l=i:1_4/A/0_0_0/B/1-1-4:1-1&1-4#1-3$1-4>0-1<0-1|i/C/1+1+3/D/0_0/E/content+1:1+3&1+2#0+1/F/content_1/G/0_0/H/4=3:1=1&L-L%/I/0_0/J/4+3-1[5] - - 3350000 3900000 xx~#-p+l=i:1_4/A/0_0_0/B/1-1-4:1-1&1-4#1-3$1-4>0-1<0-1|i/C/1+1+3/D/0_0/E/content+1:1+3&1+2#0+1/F/content_1/G/0_0/H/4=3:1=1&L-L%/I/0_0/J/4+3-1[6] - - 305000 310000 are the starting and ending time. - [2], [3], [4], [5], [6] mean the HMM state index. - - """ - - def __init__(self, silence_pattern=['*-#+*'], label_type="state_align"): - - logger = logging.getLogger("labels") - - self.silence_pattern = silence_pattern - self.silence_pattern_size = len(silence_pattern) - self.label_type = label_type - self.state_number = 5 - - def check_silence_pattern(self, label): - for current_pattern in self.silence_pattern: - current_pattern = current_pattern.strip('*') - if current_pattern in label: - return 1 - return 0 - - def modify_duration_labels(self, in_gen_label_align_file_list, gen_dur_list, gen_label_list): - ''' - modifying duration from label alignments with predicted duration. - ''' - utt_number = len(gen_dur_list) - if utt_number != len(in_gen_label_align_file_list): - print("the number of input and output files should be the same!\n") - sys.exit(1) - - for i in range(utt_number): - if (self.label_type == "state_align"): - self.modify_dur_from_state_alignment_labels( - in_gen_label_align_file_list[i], gen_dur_list[i], gen_label_list[i]) - elif (self.label_type == "phone_align"): - self.modify_dur_from_phone_alignment_labels( - in_gen_label_align_file_list[i], gen_dur_list[i], gen_label_list[i]) - else: - logger.critical( - "we don't support %s labels as of now!!" % (self.label_type)) - sys.exit(1) - - def modify_dur_from_state_alignment_labels(self, label_file_name, gen_dur_file_name, gen_lab_file_name): - logger = logging.getLogger("dur") - - state_number = self.state_number - dur_dim = state_number - - io_funcs = BinaryIOCollection() - dur_features, frame_number = io_funcs.load_binary_file_frame( - gen_dur_file_name, dur_dim) - - fid = open(label_file_name) - utt_labels = fid.readlines() - fid.close() - - label_number = len(utt_labels) - logger.info('loaded %s, %3d labels' % (label_file_name, label_number)) - - out_fid = open(gen_lab_file_name, 'w') - - current_index = 0 - prev_end_time = 0 - for line in utt_labels: - line = line.strip() - - if len(line) < 1: - continue - temp_list = re.split('\s+', line) - - if len(temp_list) == 1: - start_time = 0 - end_time = 600000 # hard-coded silence duration - full_label = temp_list[0] - else: - start_time = int(temp_list[0]) - end_time = int(temp_list[1]) - full_label = temp_list[2] - - # remove state information [k] - full_label_length = len(full_label) - 3 - state_index = full_label[full_label_length + 1] - state_index = int(state_index) - 1 - - label_binary_flag = self.check_silence_pattern(full_label) - - if len(temp_list) == 1: - for state_index in range(1, state_number+1): - if label_binary_flag == 1: - current_state_dur = end_time - start_time - else: - pred_state_dur = dur_features[current_index, - state_index-1] - current_state_dur = int(pred_state_dur)*5*10000 - out_fid.write(str(prev_end_time)+' '+str(prev_end_time + - current_state_dur)+' '+full_label+'['+str(state_index+1)+']\n') - prev_end_time = prev_end_time + current_state_dur - else: - if label_binary_flag == 1: - current_state_dur = end_time - start_time - else: - pred_state_dur = dur_features[current_index, state_index-1] - current_state_dur = int(pred_state_dur)*5*10000 - out_fid.write( - str(prev_end_time)+' '+str(prev_end_time+current_state_dur)+' '+full_label+'\n') - prev_end_time = prev_end_time + current_state_dur - - if state_index == state_number and label_binary_flag != 1: - current_index += 1 - - logger.debug( - 'modifed label with predicted duration of %d frames x %d features' % dur_features.shape) - - def modify_dur_from_phone_alignment_labels(self, label_file_name, gen_dur_file_name, gen_lab_file_name): - logger = logging.getLogger("dur") - - dur_dim = 1 - - io_funcs = BinaryIOCollection() - dur_features, frame_number = io_funcs.load_binary_file_frame( - gen_dur_file_name, dur_dim) - - fid = open(label_file_name) - utt_labels = fid.readlines() - fid.close() - - label_number = len(utt_labels) - logger.info('loaded %s, %3d labels' % (label_file_name, label_number)) - - out_fid = open(gen_lab_file_name, 'w') - - current_index = 0 - prev_end_time = 0 - for line in utt_labels: - line = line.strip() - - if len(line) < 1: - continue - temp_list = re.split('\s+', line) - - if len(temp_list) == 1: - start_time = 0 - end_time = 3000000 # hard-coded silence duration - full_label = temp_list[0] - else: - start_time = int(temp_list[0]) - end_time = int(temp_list[1]) - full_label = temp_list[2] - - label_binary_flag = self.check_silence_pattern(full_label) - - if label_binary_flag == 1: - current_phone_dur = end_time - start_time - out_fid.write( - str(prev_end_time)+' '+str(prev_end_time+current_phone_dur)+' '+full_label+'\n') - prev_end_time = prev_end_time+current_phone_dur - continue - else: - phone_dur = dur_features[current_index] - phone_dur = int(phone_dur)*5*10000 - out_fid.write(str(prev_end_time)+' ' + - str(prev_end_time+phone_dur)+' '+full_label+'\n') - prev_end_time = prev_end_time+phone_dur - - current_index += 1 - - logger.debug( - 'modifed label with predicted duration of %d frames x %d features' % dur_features.shape) - -import os -import numpy -import re -import sys -from multiprocessing import Pool -from io_funcs.binary_io import BinaryIOCollection -from .linguistic_base import LinguisticBase - -import matplotlib.mlab as mlab -import math - -import logging -# from logplot.logging_plotting import LoggerPlotter #, MultipleTimeSeriesPlot, SingleWeightMatrixPlot - - -class LabelNormalisation(LinguisticBase): - - # this class only knows how to deal with a single style of labels (XML or HTS) - # (to deal with composite labels, use LabelComposer instead) - - def __init__(self, question_file_name=None, xpath_file_name=None): - pass - - def extract_linguistic_features(self, in_file_name, out_file_name=None, label_type="state_align", dur_file_name=None): - if label_type == "phone_align": - A = self.load_labels_with_phone_alignment( - in_file_name, dur_file_name) - elif label_type == "state_align": - A = self.load_labels_with_state_alignment(in_file_name) - else: - logger.critical( - "we don't support %s labels as of now!!" % (label_type)) - - if out_file_name: - io_funcs = BinaryIOCollection() - io_funcs.array_to_binary_file(A, out_file_name) - else: - return A - -# ----------------------------- - - -class HTSLabelNormalisation(LabelNormalisation): - """This class is to convert HTS format labels into continous or binary values, and store as binary format with float32 precision. - - The class supports two kinds of questions: QS and CQS. - **QS**: is the same as that used in HTS - - **CQS**: is the new defined question in the system. Here is an example of the question: CQS C-Syl-Tone {_(\d+)+}. regular expression is used for continous values. - - Time alignments are expected in the HTS labels. Here is an example of the HTS labels: - - 3050000 3100000 xx~#-p+l=i:1_4/A/0_0_0/B/1-1-4:1-1&1-4#1-3$1-4>0-1<0-1|i/C/1+1+3/D/0_0/E/content+1:1+3&1+2#0+1/F/content_1/G/0_0/H/4=3:1=1&L-L%/I/0_0/J/4+3-1[2] - - 3100000 3150000 xx~#-p+l=i:1_4/A/0_0_0/B/1-1-4:1-1&1-4#1-3$1-4>0-1<0-1|i/C/1+1+3/D/0_0/E/content+1:1+3&1+2#0+1/F/content_1/G/0_0/H/4=3:1=1&L-L%/I/0_0/J/4+3-1[3] - - 3150000 3250000 xx~#-p+l=i:1_4/A/0_0_0/B/1-1-4:1-1&1-4#1-3$1-4>0-1<0-1|i/C/1+1+3/D/0_0/E/content+1:1+3&1+2#0+1/F/content_1/G/0_0/H/4=3:1=1&L-L%/I/0_0/J/4+3-1[4] - - 3250000 3350000 xx~#-p+l=i:1_4/A/0_0_0/B/1-1-4:1-1&1-4#1-3$1-4>0-1<0-1|i/C/1+1+3/D/0_0/E/content+1:1+3&1+2#0+1/F/content_1/G/0_0/H/4=3:1=1&L-L%/I/0_0/J/4+3-1[5] - - 3350000 3900000 xx~#-p+l=i:1_4/A/0_0_0/B/1-1-4:1-1&1-4#1-3$1-4>0-1<0-1|i/C/1+1+3/D/0_0/E/content+1:1+3&1+2#0+1/F/content_1/G/0_0/H/4=3:1=1&L-L%/I/0_0/J/4+3-1[6] - - 305000 310000 are the starting and ending time. - [2], [3], [4], [5], [6] mean the HMM state index. - - """ - - # this subclass support HTS labels, which include time alignments - - def __init__(self, question_file_name=None, add_frame_features=True, subphone_feats='full', continuous_flag=True): - - logger = logging.getLogger("labels") - - self.question_dict = {} - self.ori_question_dict = {} - self.dict_size = 0 - self.continuous_flag = continuous_flag - try: - # self.question_dict, self.ori_question_dict = self.load_question_set(question_file_name) - self.discrete_dict, self.continuous_dict = self.load_question_set_continous( - question_file_name) - except: - logger.critical('error whilst loading HTS question set') - raise - - ###self.dict_size = len(self.question_dict) - - self.dict_size = len(self.discrete_dict) + len(self.continuous_dict) - self.add_frame_features = add_frame_features - self.subphone_feats = subphone_feats - - if self.subphone_feats == 'full': - # zhizheng's original 5 state features + 4 phoneme features - self.frame_feature_size = 9 - elif self.subphone_feats == 'minimal_frame': - # the minimal features necessary to go from a state-level to frame-level model - self.frame_feature_size = 2 - elif self.subphone_feats == 'state_only': - self.frame_feature_size = 1 # this is equivalent to a state-based system - elif self.subphone_feats == 'none': - self.frame_feature_size = 0 # the phoneme level features only - elif self.subphone_feats == 'frame_only': - # this is equivalent to a frame-based system without relying on state-features - self.frame_feature_size = 1 - elif self.subphone_feats == 'uniform_state': - # this is equivalent to a frame-based system with uniform state-features - self.frame_feature_size = 2 - elif self.subphone_feats == 'minimal_phoneme': - # this is equivalent to a frame-based system with minimal features - self.frame_feature_size = 3 - elif self.subphone_feats == 'coarse_coding': - # this is equivalent to a frame-based positioning system reported in Heiga Zen's work - self.frame_feature_size = 4 - self.cc_features = self.compute_coarse_coding_features(3) - else: - sys.exit('Unknown value for subphone_feats: %s' % (subphone_feats)) - - self.dimension = self.dict_size + self.frame_feature_size - - # if user wants to define their own input, simply set the question set to empty. - if self.dict_size == 0: - self.dimension = 0 - - logger.debug('HTS-derived input feature dimension is %d + %d = %d' % - (self.dict_size, self.frame_feature_size, self.dimension)) - - def prepare_dur_data(self, ori_file_list, output_file_list, label_type="state_align", feature_type=None, unit_size=None, feat_size=None): - ''' - extracting duration binary features or numerical features. - ''' - logger = logging.getLogger("dur") - utt_number = len(ori_file_list) - if utt_number != len(output_file_list): - print("the number of input and output files should be the same!\n") - sys.exit(1) - - ### set default feature type to numerical, if not assigned ### - if not feature_type: - feature_type = "numerical" - - ### set default unit size to state, if not assigned ### - if not unit_size: - unit_size = "state" - if label_type == "phone_align": - unit_size = "phoneme" - - ### set default feat size to frame or phoneme, if not assigned ### - if feature_type == "binary": - if not feat_size: - feat_size = "frame" - elif feature_type == "numerical": - if not feat_size: - feat_size = "phoneme" - else: - logger.critical( - "Unknown feature type: %s \n Please use one of the following: binary, numerical\n" % (feature_type)) - sys.exit(1) - - for i in range(utt_number): - self.extract_dur_features( - ori_file_list[i], output_file_list[i], label_type, feature_type, unit_size, feat_size) - - def extract_dur_features(self, in_file_name, out_file_name=None, label_type="state_align", feature_type=None, unit_size=None, feat_size=None): - logger = logging.getLogger("dur") - if label_type == "phone_align": - A = self.extract_dur_from_phone_alignment_labels( - in_file_name, feature_type, unit_size, feat_size) - elif label_type == "state_align": - A = self.extract_dur_from_state_alignment_labels( - in_file_name, feature_type, unit_size, feat_size) - else: - logger.critical( - "we don't support %s labels as of now!!" % (label_type)) - sys.exit(1) - - if out_file_name: - io_funcs = BinaryIOCollection() - io_funcs.array_to_binary_file(A, out_file_name) - else: - return A - - def extract_dur_from_state_alignment_labels(self, file_name, feature_type, unit_size, feat_size): - logger = logging.getLogger("dur") - - state_number = 5 - dur_dim = state_number - - if feature_type == "binary": - dur_feature_matrix = numpy.empty((100000, 1)) - elif feature_type == "numerical": - if unit_size == "state": - dur_feature_matrix = numpy.empty((100000, dur_dim)) - current_dur_array = numpy.zeros((dur_dim, 1)) - else: # phoneme/syllable/word - dur_feature_matrix = numpy.empty((100000, 1)) - - fid = open(file_name) - utt_labels = fid.readlines() - fid.close() - - label_number = len(utt_labels) - logger.info('loaded %s, %3d labels' % (file_name, label_number)) - - MLU_dur = [[], [], []] - list_of_silences = ['#', 'sil', 'pau', 'SIL'] - current_index = 0 - dur_feature_index = 0 - syllable_duration = 0 - word_duration = 0 - for line in utt_labels: - line = line.strip() - - if len(line) < 1: - continue - temp_list = re.split('\s+', line) - start_time = int(temp_list[0]) - end_time = int(temp_list[1]) - - full_label = temp_list[2] - # remove state information [k] - full_label_length = len(full_label) - 3 - state_index = full_label[full_label_length + 1] - state_index = int(state_index) - 1 - current_phone = full_label[full_label.index( - '-') + 1:full_label.index('+')] - - frame_number = int(end_time/50000) - int(start_time/50000) - - if state_index == 1: - phone_duration = frame_number - - for i in range(state_number - 1): - line = utt_labels[current_index + i + 1].strip() - temp_list = re.split('\s+', line) - phone_duration += int( - (int(temp_list[1]) - int(temp_list[0]))/50000) - - syllable_duration += phone_duration - word_duration += phone_duration - - ### for syllable and word positional information ### - label_binary_vector = self.pattern_matching_binary(full_label) - label_continuous_vector = self.pattern_matching_continous_position( - full_label) - - ### syllable ending information ### - syl_end = 0 - # pos-bw and c-silences - if(label_continuous_vector[0, 1] == 1 or current_phone in list_of_silences): - syl_end = 1 - - ### word ending information ### - word_end = 0 - if(syl_end and label_continuous_vector[0, 9] == 1 or current_phone in list_of_silences): - word_end = 1 - - if feature_type == "binary": - current_block_array = numpy.zeros((frame_number, 1)) - if unit_size == "state": - current_block_array[-1] = 1 - elif unit_size == "phoneme": - if state_index == state_number: - current_block_array[-1] = 1 - else: - logger.critical( - "Unknown unit size: %s \n Please use one of the following: state, phoneme\n" % (unit_size)) - sys.exit(1) - elif feature_type == "numerical": - if unit_size == "state": - current_dur_array[current_index % 5] = frame_number - if feat_size == "phoneme" and state_index == state_number: - current_block_array = current_dur_array.transpose() - if feat_size == "frame": - current_block_array = numpy.tile( - current_dur_array.transpose(), (frame_number, 1)) - elif state_index == state_number: - if unit_size == "phoneme": - current_block_array = numpy.array([phone_duration]) - elif unit_size == "syllable": - current_block_array = numpy.array([syllable_duration]) - elif unit_size == "word": - current_block_array = numpy.array([word_duration]) - if syl_end: - syllable_duration = 0 - if word_end: - word_duration = 0 - - ### writing into dur_feature_matrix ### - if feat_size == "frame": - dur_feature_matrix[dur_feature_index:dur_feature_index + - frame_number, ] = current_block_array - dur_feature_index = dur_feature_index + frame_number - elif state_index == state_number: - if feat_size == "phoneme": - dur_feature_matrix[dur_feature_index:dur_feature_index + - 1, ] = current_block_array - dur_feature_index = dur_feature_index + 1 - elif current_phone != '#': # removing silence here - if feat_size == "syllable" and syl_end: - dur_feature_matrix[dur_feature_index:dur_feature_index + - 1, ] = current_block_array - dur_feature_index = dur_feature_index + 1 - elif feat_size == "word" and word_end: - dur_feature_matrix[dur_feature_index:dur_feature_index + - 1, ] = current_block_array - dur_feature_index = dur_feature_index + 1 - elif feat_size == "MLU": - if word_end: - if current_phone == 'pau': - MLU_dur[0].append(1) - else: - MLU_dur[0].append( - int(label_continuous_vector[0, 24])) - if syl_end: - if current_phone == 'pau': - MLU_dur[1].append(1) - else: - MLU_dur[1].append( - int(label_continuous_vector[0, 7])) - MLU_dur[2].append(int(phone_duration)) - - current_index += 1 - - if feat_size == "MLU": - for seg_indx in xrange(len(MLU_dur)): - seg_len = len(MLU_dur[seg_indx]) - current_block_array = numpy.reshape( - numpy.array(MLU_dur[seg_indx]), (-1, 1)) - dur_feature_matrix[dur_feature_index:dur_feature_index + - seg_len, ] = current_block_array - dur_feature_index = dur_feature_index + seg_len - - dur_feature_matrix = dur_feature_matrix[0:dur_feature_index, ] - logger.debug('made duration matrix of %d frames x %d features' % - dur_feature_matrix.shape) - return dur_feature_matrix - - def extract_dur_from_phone_alignment_labels(self, file_name, feature_type, unit_size, feat_size): - logger = logging.getLogger("dur") - - dur_dim = 1 # hard coded here - - if feature_type == "binary": - dur_feature_matrix = numpy.empty((100000, dur_dim)) - elif feature_type == "numerical": - if unit_size == "phoneme": - dur_feature_matrix = numpy.empty((100000, dur_dim)) - - fid = open(file_name) - utt_labels = fid.readlines() - fid.close() - - label_number = len(utt_labels) - logger.info('loaded %s, %3d labels' % (file_name, label_number)) - - current_index = 0 - dur_feature_index = 0 - for line in utt_labels: - line = line.strip() - - if len(line) < 1: - continue - temp_list = re.split('\s+', line) - start_time = int(temp_list[0]) - end_time = int(temp_list[1]) - - full_label = temp_list[2] - - frame_number = int(end_time/50000) - int(start_time/50000) - - phone_duration = frame_number - - if feature_type == "binary": - current_block_array = numpy.zeros((frame_number, 1)) - if unit_size == "phoneme": - current_block_array[-1] = 1 - else: - logger.critical( - "Unknown unit size: %s \n Please use one of the following: phoneme\n" % (unit_size)) - sys.exit(1) - elif feature_type == "numerical": - if unit_size == "phoneme": - current_block_array = numpy.array([phone_duration]) - - ### writing into dur_feature_matrix ### - if feat_size == "frame": - dur_feature_matrix[dur_feature_index:dur_feature_index + - frame_number, ] = current_block_array - dur_feature_index = dur_feature_index + frame_number - elif feat_size == "phoneme": - dur_feature_matrix[dur_feature_index:dur_feature_index + - 1, ] = current_block_array - dur_feature_index = dur_feature_index + 1 - - current_index += 1 - - dur_feature_matrix = dur_feature_matrix[0:dur_feature_index, ] - logger.debug('made duration matrix of %d frames x %d features' % - dur_feature_matrix.shape) - return dur_feature_matrix - - def load_labels_with_phone_alignment(self, file_name, dur_file_name): - - # this is not currently used ??? -- it works now :D - logger = logging.getLogger("labels") - #logger.critical('unused function ???') - #raise Exception - - if dur_file_name: - io_funcs = BinaryIOCollection() - dur_dim = 1 # hard coded for now - manual_dur_data = io_funcs.load_binary_file(dur_file_name, dur_dim) - - if self.add_frame_features: - assert self.dimension == self.dict_size+self.frame_feature_size - elif self.subphone_feats != 'none': - assert self.dimension == self.dict_size+self.frame_feature_size - else: - assert self.dimension == self.dict_size - - label_feature_matrix = numpy.empty((100000, self.dimension)) - - ph_count = 0 - label_feature_index = 0 - with open(file_name) as fid: - all_data = fid.readlines() - for line in all_data: - line = line.strip() - if len(line) < 1: - continue - temp_list = re.split('\s+', line) - - if len(temp_list) == 1: - frame_number = 0 - full_label = temp_list[0] - else: - start_time = int(temp_list[0]) - end_time = int(temp_list[1]) - full_label = temp_list[2] - - # to do - support different frame shift - currently hardwired to 5msec - # currently under beta testing: support different frame shift - if dur_file_name: - frame_number = manual_dur_data[ph_count] - else: - frame_number = int(end_time/50000) - int(start_time/50000) - - if self.subphone_feats == "coarse_coding": - cc_feat_matrix = self.extract_coarse_coding_features_relative( - frame_number) - - ph_count = ph_count+1 - #label_binary_vector = self.pattern_matching(full_label) - label_binary_vector = self.pattern_matching_binary(full_label) - - # if there is no CQS question, the label_continuous_vector will become to empty - label_continuous_vector = self.pattern_matching_continous_position( - full_label) - label_vector = numpy.concatenate( - [label_binary_vector, label_continuous_vector], axis=1) - - if self.add_frame_features: - current_block_binary_array = numpy.zeros( - (frame_number, self.dict_size+self.frame_feature_size)) - for i in range(frame_number): - current_block_binary_array[i, - 0:self.dict_size] = label_vector - - if self.subphone_feats == 'minimal_phoneme': - # features which distinguish frame position in phoneme - current_block_binary_array[i, self.dict_size] = float( - i+1)/float(frame_number) # fraction through phone forwards - # fraction through phone backwards - current_block_binary_array[i, self.dict_size + - 1] = float(frame_number - i)/float(frame_number) - # phone duration - current_block_binary_array[i, - self.dict_size+2] = float(frame_number) - - elif self.subphone_feats == 'coarse_coding': - # features which distinguish frame position in phoneme using three continous numerical features - current_block_binary_array[i, - self.dict_size+0] = cc_feat_matrix[i, 0] - current_block_binary_array[i, - self.dict_size+1] = cc_feat_matrix[i, 1] - current_block_binary_array[i, - self.dict_size+2] = cc_feat_matrix[i, 2] - current_block_binary_array[i, - self.dict_size+3] = float(frame_number) - - elif self.subphone_feats == 'none': - pass - - else: - sys.exit('unknown subphone_feats type') - - label_feature_matrix[label_feature_index:label_feature_index + - frame_number, ] = current_block_binary_array - label_feature_index = label_feature_index + frame_number - - elif self.subphone_feats == 'none': - current_block_binary_array = label_vector - label_feature_matrix[label_feature_index:label_feature_index + - 1, ] = current_block_binary_array - label_feature_index = label_feature_index + 1 - - label_feature_matrix = label_feature_matrix[0:label_feature_index, ] - - logger.info('loaded %s, %3d labels' % (file_name, ph_count)) - logger.debug('made label matrix of %d frames x %d labels' % - label_feature_matrix.shape) - return label_feature_matrix - - def load_labels_with_state_alignment(self, file_name): - # setting add_frame_features to False performs either state/phoneme level normalisation - - logger = logging.getLogger("labels") - - if self.add_frame_features: - assert self.dimension == self.dict_size+self.frame_feature_size - elif self.subphone_feats != 'none': - assert self.dimension == self.dict_size+self.frame_feature_size - else: - assert self.dimension == self.dict_size - - # label_feature_matrix = numpy.empty((100000, self.dict_size+self.frame_feature_size)) - label_feature_matrix = numpy.empty((100000, self.dimension)) - - label_feature_index = 0 - - state_number = 5 - - lab_binary_vector = numpy.zeros((1, self.dict_size)) - fid = open(file_name) - utt_labels = fid.readlines() - fid.close() - current_index = 0 - label_number = len(utt_labels) - logger.info('loaded %s, %3d labels' % (file_name, label_number)) - - phone_duration = 0 - state_duration_base = 0 - for line in utt_labels: - line = line.strip() - - if len(line) < 1: - continue - temp_list = re.split('\s+', line) - - if len(temp_list) == 1: - frame_number = 0 - state_index = 1 - full_label = temp_list[0] - else: - start_time = int(temp_list[0]) - end_time = int(temp_list[1]) - frame_number = int(end_time/50000) - int(start_time/50000) - full_label = temp_list[2] - - # remove state information [k] - full_label_length = len(full_label) - 3 - state_index = full_label[full_label_length + 1] - - state_index = int(state_index) - 1 - state_index_backward = 6 - state_index - full_label = full_label[0:full_label_length] - - if state_index == 1: - current_frame_number = 0 - phone_duration = frame_number - state_duration_base = 0 - -# label_binary_vector = self.pattern_matching(full_label) - label_binary_vector = self.pattern_matching_binary(full_label) - - # if there is no CQS question, the label_continuous_vector will become to empty - label_continuous_vector = self.pattern_matching_continous_position( - full_label) - label_vector = numpy.concatenate( - [label_binary_vector, label_continuous_vector], axis=1) - - if len(temp_list) == 1: - state_index = state_number - else: - for i in range(state_number - 1): - line = utt_labels[current_index + i + 1].strip() - temp_list = re.split('\s+', line) - phone_duration += int( - (int(temp_list[1]) - int(temp_list[0]))/50000) - - if self.subphone_feats == "coarse_coding": - cc_feat_matrix = self.extract_coarse_coding_features_relative( - phone_duration) - - if self.add_frame_features: - current_block_binary_array = numpy.zeros( - (frame_number, self.dict_size+self.frame_feature_size)) - for i in range(frame_number): - current_block_binary_array[i, - 0:self.dict_size] = label_vector - - if self.subphone_feats == 'full': - # Zhizheng's original 9 subphone features: - current_block_binary_array[i, self.dict_size] = float( - i+1) / float(frame_number) # fraction through state (forwards) - # fraction through state (backwards) - current_block_binary_array[i, self.dict_size+1] = float( - frame_number - i) / float(frame_number) - # length of state in frames - current_block_binary_array[i, - self.dict_size+2] = float(frame_number) - # state index (counting forwards) - current_block_binary_array[i, - self.dict_size+3] = float(state_index) - # state index (counting backwards) - current_block_binary_array[i, self.dict_size + - 4] = float(state_index_backward) - - # length of phone in frames - current_block_binary_array[i, - self.dict_size+5] = float(phone_duration) - # fraction of the phone made up by current state - current_block_binary_array[i, self.dict_size + - 6] = float(frame_number) / float(phone_duration) - current_block_binary_array[i, self.dict_size+7] = float( - phone_duration - i - state_duration_base) / float(phone_duration) # fraction through phone (backwards) - current_block_binary_array[i, self.dict_size+8] = float( - state_duration_base + i + 1) / float(phone_duration) # fraction through phone (forwards) - - elif self.subphone_feats == 'state_only': - # features which only distinguish state: - current_block_binary_array[i, self.dict_size] = float( - state_index) # state index (counting forwards) - - elif self.subphone_feats == 'frame_only': - # features which distinguish frame position in phoneme: - current_frame_number += 1 - current_block_binary_array[i, self.dict_size] = float( - current_frame_number) / float(phone_duration) # fraction through phone (counting forwards) - - elif self.subphone_feats == 'uniform_state': - # features which distinguish frame position in phoneme: - current_frame_number += 1 - current_block_binary_array[i, self.dict_size] = float( - current_frame_number) / float(phone_duration) # fraction through phone (counting forwards) - new_state_index = max( - 1, round(float(current_frame_number)/float(phone_duration)*5)) - # state index (counting forwards) - current_block_binary_array[i, - self.dict_size+1] = float(new_state_index) - - elif self.subphone_feats == "coarse_coding": - # features which distinguish frame position in phoneme using three continous numerical features - current_block_binary_array[i, self.dict_size + - 0] = cc_feat_matrix[current_frame_number, 0] - current_block_binary_array[i, self.dict_size + - 1] = cc_feat_matrix[current_frame_number, 1] - current_block_binary_array[i, self.dict_size + - 2] = cc_feat_matrix[current_frame_number, 2] - current_block_binary_array[i, - self.dict_size+3] = float(phone_duration) - current_frame_number += 1 - - elif self.subphone_feats == 'minimal_frame': - # features which distinguish state and minimally frame position in state: - current_block_binary_array[i, self.dict_size] = float( - i+1) / float(frame_number) # fraction through state (forwards) - # state index (counting forwards) - current_block_binary_array[i, - self.dict_size+1] = float(state_index) - elif self.subphone_feats == 'none': - pass - else: - sys.exit('unknown subphone_feats type') - - label_feature_matrix[label_feature_index:label_feature_index + - frame_number, ] = current_block_binary_array - label_feature_index = label_feature_index + frame_number - elif self.subphone_feats == 'state_only' and state_index == state_number: - current_block_binary_array = numpy.zeros( - (state_number, self.dict_size+self.frame_feature_size)) - for i in range(state_number): - current_block_binary_array[i, - 0:self.dict_size] = label_vector - current_block_binary_array[i, self.dict_size] = float( - i+1) # state index (counting forwards) - label_feature_matrix[label_feature_index:label_feature_index + - state_number, ] = current_block_binary_array - label_feature_index = label_feature_index + state_number - elif self.subphone_feats == 'none' and state_index == state_number: - current_block_binary_array = label_vector - label_feature_matrix[label_feature_index:label_feature_index + - 1, ] = current_block_binary_array - label_feature_index = label_feature_index + 1 - - state_duration_base += frame_number - - current_index += 1 - - label_feature_matrix = label_feature_matrix[0:label_feature_index, ] - logger.debug('made label matrix of %d frames x %d labels' % - label_feature_matrix.shape) - return label_feature_matrix - - def extract_durational_features(self, dur_file_name=None, dur_data=None): - - if dur_file_name: - io_funcs = BinaryIOCollection() - dur_dim = 1 # hard coded for now - dur_data = io_funcs.load_binary_file(dur_file_name, dur_dim) - - ph_count = len(dur_data) - total_num_of_frames = int(sum(dur_data)) - - duration_feature_array = numpy.zeros( - (total_num_of_frames, self.frame_feature_size)) - - frame_index = 0 - for i in range(ph_count): - frame_number = int(dur_data[i]) - if self.subphone_feats == "coarse_coding": - cc_feat_matrix = self.extract_coarse_coding_features_relative( - frame_number) - - for j in range(frame_number): - duration_feature_array[frame_index, - 0] = cc_feat_matrix[j, 0] - duration_feature_array[frame_index, - 1] = cc_feat_matrix[j, 1] - duration_feature_array[frame_index, - 2] = cc_feat_matrix[j, 2] - duration_feature_array[frame_index, - 3] = float(frame_number) - frame_index += 1 - - elif self.subphone_feats == 'full': - state_number = 5 # hard coded here - phone_duration = sum(dur_data[i, :]) - state_duration_base = 0 - for state_index in xrange(1, state_number+1): - state_index_backward = (state_number - state_index) + 1 - frame_number = int(dur_data[i][state_index-1]) - for j in xrange(frame_number): - duration_feature_array[frame_index, 0] = float( - j+1) / float(frame_number) # fraction through state (forwards) - duration_feature_array[frame_index, 1] = float( - frame_number - j) / float(frame_number) # fraction through state (backwards) - duration_feature_array[frame_index, 2] = float( - frame_number) # length of state in frames - duration_feature_array[frame_index, 3] = float( - state_index) # state index (counting forwards) - duration_feature_array[frame_index, 4] = float( - state_index_backward) # state index (counting backwards) - - duration_feature_array[frame_index, 5] = float( - phone_duration) # length of phone in frames - # fraction of the phone made up by current state - duration_feature_array[frame_index, 6] = float( - frame_number) / float(phone_duration) - duration_feature_array[frame_index, 7] = float( - phone_duration - j - state_duration_base) / float(phone_duration) # fraction through phone (forwards) - duration_feature_array[frame_index, 8] = float( - state_duration_base + j + 1) / float(phone_duration) # fraction through phone (backwards) - frame_index += 1 - - state_duration_base += frame_number - - return duration_feature_array - - def compute_coarse_coding_features(self, num_states): - assert num_states == 3 - - npoints = 600 - cc_features = numpy.zeros((num_states, npoints)) - - x1 = numpy.linspace(-1.5, 1.5, npoints) - x2 = numpy.linspace(-1.0, 2.0, npoints) - x3 = numpy.linspace(-0.5, 2.5, npoints) - - mu1 = 0.0 - mu2 = 0.5 - mu3 = 1.0 - - sigma = 0.4 - - cc_features[0, :] = mlab.normpdf(x1, mu1, sigma) - cc_features[1, :] = mlab.normpdf(x2, mu2, sigma) - cc_features[2, :] = mlab.normpdf(x3, mu3, sigma) - - return cc_features - - def extract_coarse_coding_features_relative(self, phone_duration): - dur = int(phone_duration) - - cc_feat_matrix = numpy.zeros((dur, 3)) - - for i in range(dur): - rel_indx = int((200/float(dur))*i) - cc_feat_matrix[i, 0] = self.cc_features[0, 300+rel_indx] - cc_feat_matrix[i, 1] = self.cc_features[1, 200+rel_indx] - cc_feat_matrix[i, 2] = self.cc_features[2, 100+rel_indx] - - return cc_feat_matrix - - # this function is not used now - def extract_coarse_coding_features_absolute(self, phone_duration): - dur = int(phone_duration) - - cc_feat_matrix = numpy.zeros((dur, 3)) - - npoints1 = (dur*2)*10+1 - npoints2 = (dur-1)*10+1 - npoints3 = (2*dur-1)*10+1 - - x1 = numpy.linspace(-dur, dur, npoints1) - x2 = numpy.linspace(1, dur, npoints2) - x3 = numpy.linspace(1, 2*dur-1, npoints3) - - mu1 = 0 - mu2 = (1+dur)/2 - mu3 = dur - variance = 1 - sigma = variance*((dur/10)+2) - sigma1 = sigma - sigma2 = sigma-1 - sigma3 = sigma - - y1 = mlab.normpdf(x1, mu1, sigma1) - y2 = mlab.normpdf(x2, mu2, sigma2) - y3 = mlab.normpdf(x3, mu3, sigma3) - - for i in range(dur): - cc_feat_matrix[i, 0] = y1[(dur+1+i)*10] - cc_feat_matrix[i, 1] = y2[i*10] - cc_feat_matrix[i, 2] = y3[i*10] - - for i in range(3): - cc_feat_matrix[:, i] = cc_feat_matrix[:, i] / \ - max(cc_feat_matrix[:, i]) - - return cc_feat_matrix - - # this function is not used now - - def pattern_matching(self, label): - # this function is where most time is spent during label preparation - # - # it might be possible to speed it up by using pre-compiled regular expressions? - # (not trying this now, since we may change to to XML tree format for input instead of HTS labels) - # - label_size = len(label) - - lab_binary_vector = numpy.zeros((1, self.dict_size)) - - for i in range(self.dict_size): - current_question_list = self.question_dict[str(i)] - binary_flag = 0 - for iq in range(len(current_question_list)): - current_question = current_question_list[iq] - current_size = len(current_question) - if current_question[0] == '*' and current_question[current_size-1] == '*': - temp_question = current_question[1:current_size-1] - for il in range(1, label_size-current_size+2): - if temp_question == label[il:il+current_size-2]: - binary_flag = 1 - elif current_question[current_size-1] != '*': - temp_question = current_question[1:current_size] - if temp_question == label[label_size-current_size+1:label_size]: - binary_flag = 1 - elif current_question[0] != '*': - temp_question = current_question[0:current_size-1] - if temp_question == label[0:current_size-1]: - binary_flag = 1 - if binary_flag == 1: - break - lab_binary_vector[0, i] = binary_flag - - return lab_binary_vector - - def pattern_matching_binary(self, label): - - dict_size = len(self.discrete_dict) - lab_binary_vector = numpy.zeros((1, dict_size)) - - for i in range(dict_size): - current_question_list = self.discrete_dict[str(i)] - binary_flag = 0 - for iq in range(len(current_question_list)): - current_compiled = current_question_list[iq] - - ms = current_compiled.search(label) - if ms is not None: - binary_flag = 1 - break - lab_binary_vector[0, i] = binary_flag - - return lab_binary_vector - - def pattern_matching_continous_position(self, label): - - dict_size = len(self.continuous_dict) - - lab_continuous_vector = numpy.zeros((1, dict_size)) - - for i in range(dict_size): - continuous_value = -1.0 - - current_compiled = self.continuous_dict[str(i)] - - ms = current_compiled.search(label) - if ms is not None: - # assert len(ms.group()) == 1 - continuous_value = ms.group(1) - - lab_continuous_vector[0, i] = continuous_value - - return lab_continuous_vector - - def load_question_set(self, qs_file_name): - fid = open(qs_file_name) - question_index = 0 - question_dict = {} - ori_question_dict = {} - for line in fid.readlines(): - line = line.replace('\n', '') - if len(line) > 5: - temp_list = line.split('{') - temp_line = temp_list[1] - temp_list = temp_line.split('}') - temp_line = temp_list[0] - question_list = temp_line.split(',') - question_dict[str(question_index)] = question_list - ori_question_dict[str(question_index)] = line - question_index += 1 - fid.close() - - logger = logging.getLogger("labels") - logger.debug('loaded question set with %d questions' % - len(question_dict)) - - return question_dict, ori_question_dict - - def load_question_set_continous(self, qs_file_name): - - logger = logging.getLogger("labels") - - fid = open(qs_file_name) - binary_qs_index = 0 - continuous_qs_index = 0 - binary_dict = {} - continuous_dict = {} - LL = re.compile(re.escape('LL-')) - # regex for last question - LAST_QUESTION = re.compile(re.escape('(\d+)') + '$') - - for line in fid.readlines(): - line = line.replace('\n', '').replace('\t', ' ') - - if len(line) > 5: - temp_list = line.split('{') - temp_line = temp_list[1] - temp_list = temp_line.split('}') - temp_line = temp_list[0] - temp_line = temp_line.strip() - question_list = temp_line.split(',') - - temp_list = line.split(' ') - question_key = temp_list[1] -# print line - if temp_list[0] == 'CQS': - assert len(question_list) == 1 - processed_question = self.wildcards2regex( - question_list[0], convert_number_pattern=True) - if LAST_QUESTION.search(question_list[0]): - # last question must only match at end of HTS label string - processed_question = processed_question + '$' - continuous_dict[str(continuous_qs_index)] = re.compile( - processed_question) # save pre-compiled regular expression - continuous_qs_index = continuous_qs_index + 1 - elif temp_list[0] == 'QS': - re_list = [] - for temp_question in question_list: - processed_question = self.wildcards2regex( - temp_question) - if LL.search(question_key): - processed_question = '^'+processed_question - re_list.append(re.compile(processed_question)) - - binary_dict[str(binary_qs_index)] = re_list - binary_qs_index = binary_qs_index + 1 - else: - logger.critical( - 'The question set is not defined correctly: %s' % (line)) - raise Exception - -# question_index = question_index + 1 - return binary_dict, continuous_dict - - def wildcards2regex(self, question, convert_number_pattern=False): - """ - Convert HTK-style question into regular expression for searching labels. - If convert_number_pattern, keep the following sequences unescaped for - extracting continuous values): - (\d+) -- handles digit without decimal point - ([\d\.]+) -- handles digits with and without decimal point - """ - - # handle HTK wildcards (and lack of them) at ends of label: - prefix = "" - postfix = "" - if '*' in question: - if not question.startswith('*'): - prefix = "\A" - if not question.endswith('*'): - postfix = "\Z" - question = question.strip('*') - question = re.escape(question) - # convert remaining HTK wildcards * and ? to equivalent regex: - question = question.replace('\\*', '.*') - question = question.replace('\\?', '.') - question = prefix + question + postfix - - if convert_number_pattern: - question = question.replace('\\(\\\\d\\+\\)', '(\d+)') - question = question.replace( - '\\(\\[\\\\d\\\\\\.\\]\\+\\)', '([\d\.]+)') - return question - - -class HTSDurationLabelNormalisation(HTSLabelNormalisation): - """ - Unlike HTSLabelNormalisation, HTSDurationLabelNormalisation does not accept timings. - One line of labels is converted into 1 datapoint, that is, the label is not 'unpacked' - into frames. HTK state index [\d] is not handled in any special way. - """ - - def __init__(self, question_file_name=None, subphone_feats='full', continuous_flag=True): - super(HTSDurationLabelNormalisation, self).__init__(question_file_name=question_file_name, - subphone_feats=subphone_feats, continuous_flag=continuous_flag) - # don't use extra features beyond those in questions for duration labels: - self.dimension = self.dict_size - - def load_labels_with_state_alignment(self, file_name, add_frame_features=False): - # add_frame_features not used in HTSLabelNormalisation -- only in XML version - - logger = logging.getLogger("labels") - - assert self.dimension == self.dict_size - - label_feature_matrix = numpy.empty((100000, self.dimension)) - - label_feature_index = 0 - - lab_binary_vector = numpy.zeros((1, self.dict_size)) - fid = open(file_name) - utt_labels = fid.readlines() - fid.close() - current_index = 0 - label_number = len(utt_labels) - logger.info('loaded %s, %3d labels' % (file_name, label_number)) - - # remove empty lines - utt_labels = [line for line in utt_labels if line != ''] - - for (line_number, line) in enumerate(utt_labels): - temp_list = re.split('\s+', line.strip()) - # take last entry -- ignore timings if present - full_label = temp_list[-1] - - label_binary_vector = self.pattern_matching_binary(full_label) - - # if there is no CQS question, the label_continuous_vector will become to empty - label_continuous_vector = self.pattern_matching_continous_position( - full_label) - label_vector = numpy.concatenate( - [label_binary_vector, label_continuous_vector], axis=1) - - label_feature_matrix[line_number, :] = label_vector[:] - - label_feature_matrix = label_feature_matrix[:line_number+1, :] - logger.debug('made label matrix of %d frames x %d labels' % - label_feature_matrix.shape) - return label_feature_matrix - - -# ----------------------------- - - -if __name__ == '__main__': - - qs_file_name = '/afs/inf.ed.ac.uk/group/cstr/projects/blizzard_entries/blizzard2016/straight_voice/Hybrid_duration_experiments/dnn_tts_release/lstm_rnn/data/questions.hed' - - print(qs_file_name) - - ori_file_list = ['/afs/inf.ed.ac.uk/group/cstr/projects/blizzard_entries/blizzard2016/straight_voice/Hybrid_duration_experiments/dnn_tts_release/lstm_rnn/data/label_state_align/AMidsummerNightsDream_000_000.lab'] - output_file_list = ['/afs/inf.ed.ac.uk/group/cstr/projects/blizzard_entries/blizzard2016/straight_voice/Hybrid_duration_experiments/dnn_tts_release/lstm_rnn/data/binary_label_601/AMidsummerNightsDream_000_000.lab'] - #output_file_list = ['/afs/inf.ed.ac.uk/group/cstr/projects/blizzard_entries/blizzard2016/straight_voice/Hybrid_duration_experiments/dnn_tts_release/lstm_rnn/data/dur/AMidsummerNightsDream_000_000.dur'] - - label_operater = HTSLabelNormalisation(qs_file_name) - label_operater.perform_normalisation(ori_file_list, output_file_list) - # feature_type="binary" - #unit_size = "phoneme" - #feat_size = "phoneme" - #label_operater.prepare_dur_data(ori_file_list, output_file_list, feature_type, unit_size, feat_size) - #label_operater.prepare_dur_data(ori_file_list, output_file_list, feature_type) - print(label_operater.dimension) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import logging -import sys -from multiprocessing.pool import ThreadPool as Pool - - -# a generic class of linguistic feature extraction -## -class LinguisticBase(object): - def __init__(self, dimension=0): - # the feature dimensionality of output (should that read 'input' ?) - self.dimension = dimension - - # the number of utterances to be normalised - self.utterance_num = 0 - - # the ori_file_list contains the file paths of the raw linguistic data - # the output_file_list contains the file paths of the normalised linguistic data - ## - def perform_normalisation(self, ori_file_list, output_file_list, label_type="state_align", dur_file_list=None): - - logger = logging.getLogger("perform_normalisation") - logger.info('perform linguistic feature extraction') - self.utterance_num = len(ori_file_list) - if self.utterance_num != len(output_file_list): - logger.error( - 'the number of input and output linguistic files should be the same!\n') - sys.exit(1) - - def _perform_normalisation(i): - if not dur_file_list: - self.extract_linguistic_features( - ori_file_list[i], output_file_list[i], label_type) - else: - self.extract_linguistic_features( - ori_file_list[i], output_file_list[i], label_type, dur_file_list[i]) - - pool = Pool() - pool.map(_perform_normalisation, range(self.utterance_num)) - pool.close() - pool.join() - - # the exact function to do the work - # need to be implemented in the specific class - # the function will write the linguistic features directly to the output file - def extract_linguistic_features(self, in_file_name, out_file_name, label_type, dur_file_name=None): - pass -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -from io_funcs.binary_io import BinaryIOCollection -import logging -import numpy - -from .feature_normalisation_base import FeatureNormBase - - -class MeanVarianceNorm(FeatureNormBase): - ''' - plan: 1: support normal MVN and denormalisation for both input and output - 2: support stream-based operation: for example, some streams can use min-max, other streams use MVN, may need one more class - ''' -# def __init__(self, feature_dimension): - - def __init__(self, feature_dimension): - - self.mean_vector = None - self.std_vector = None - self.feature_dimension = feature_dimension - - def feature_normalisation(self, in_file_list, out_file_list): - logger = logging.getLogger('feature_normalisation') - -# self.feature_dimension = feature_dimension - try: - assert len(in_file_list) == len(out_file_list) - except AssertionError: - logger.critical('The input and output file numbers are not the same! %d vs %d' % ( - len(in_file_list), len(out_file_list))) - raise - - if self.mean_vector is None: - self.mean_vector = self.compute_mean( - in_file_list, 0, self.feature_dimension) - if self.std_vector is None: - self.std_vector = self.compute_std( - in_file_list, self.mean_vector, 0, self.feature_dimension) - - io_funcs = BinaryIOCollection() - file_number = len(in_file_list) - for i in range(file_number): - features, current_frame_number = io_funcs.load_binary_file_frame( - in_file_list[i], self.feature_dimension) - - mean_matrix = numpy.tile( - self.mean_vector, (current_frame_number, 1)) - std_matrix = numpy.tile(self.std_vector, (current_frame_number, 1)) - - norm_features = (features - mean_matrix) / std_matrix - - io_funcs.array_to_binary_file(norm_features, out_file_list[i]) - - return self.mean_vector, self.std_vector - - def feature_denormalisation(self, in_file_list, out_file_list, mean_vector, std_vector): - io_funcs = BinaryIOCollection() - file_number = len(in_file_list) - try: - assert len(in_file_list) == len(out_file_list) - except AssertionError: - logger.critical('The input and output file numbers are not the same! %d vs %d' % ( - len(in_file_list), len(out_file_list))) - raise - - try: - assert mean_vector.size == self.feature_dimension and std_vector.size == self.feature_dimension - except AssertionError: - logger.critical( - 'the dimensionalities of the mean and standard derivation vectors are not the same as the dimensionality of the feature') - raise - - for i in range(file_number): - features, current_frame_number = io_funcs.load_binary_file_frame( - in_file_list[i], self.feature_dimension) - - mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1)) - std_matrix = numpy.tile(std_vector, (current_frame_number, 1)) - - norm_features = features * std_matrix + mean_matrix - - io_funcs.array_to_binary_file(norm_features, out_file_list[i]) - - def load_mean_std_values(self, acoustic_norm_file): - - logger = logging.getLogger('feature_normalisation') - - io_funcs = BinaryIOCollection() - mean_std_vector, frame_number = io_funcs.load_binary_file_frame( - acoustic_norm_file, 1) - mean_std_vector = numpy.reshape(mean_std_vector, (-1, )) - self.mean_vector = mean_std_vector[0:frame_number//2] - self.std_vector = mean_std_vector[frame_number//2:] - - logger.info('Loaded mean std values from the trained data for feature dimension of %d' % - self.feature_dimension) - return self.mean_vector, self.std_vector - - def compute_mean(self, file_list, start_index, end_index): - - logger = logging.getLogger('feature_normalisation') - - local_feature_dimension = end_index - start_index - - mean_vector = numpy.zeros((1, local_feature_dimension)) - all_frame_number = 0 - - io_funcs = BinaryIOCollection() - for file_name in file_list: - features, current_frame_number = io_funcs.load_binary_file_frame( - file_name, self.feature_dimension) - - mean_vector += numpy.reshape(numpy.sum( - features[:, start_index:end_index], axis=0), (1, local_feature_dimension)) - all_frame_number += current_frame_number - - mean_vector /= float(all_frame_number) - - # setting the print options in this way seems to break subsequent printing of numpy float32 types - # no idea what is going on - removed until this can be solved - # po=numpy.get_printoptions() - # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) - logger.info('computed mean vector of length %d :' % - mean_vector.shape[1]) - logger.info(' mean: %s' % mean_vector) - # restore the print options - # numpy.set_printoptions(po) - - self.mean_vector = mean_vector - - return mean_vector - - def compute_std(self, file_list, mean_vector, start_index, end_index): - - logger = logging.getLogger('feature_normalisation') - - local_feature_dimension = end_index - start_index - - std_vector = numpy.zeros((1, self.feature_dimension)) - all_frame_number = 0 - - io_funcs = BinaryIOCollection() - for file_name in file_list: - features, current_frame_number = io_funcs.load_binary_file_frame( - file_name, self.feature_dimension) - - mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1)) - - std_vector += numpy.reshape(numpy.sum( - (features[:, start_index:end_index] - mean_matrix) ** 2, axis=0), (1, local_feature_dimension)) - all_frame_number += current_frame_number - - std_vector /= float(all_frame_number) - - std_vector = std_vector ** 0.5 - - # setting the print options in this way seems to break subsequent printing of numpy float32 types - # no idea what is going on - removed until this can be solved - # po=numpy.get_printoptions() - # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) - logger.info('computed std vector of length %d' % std_vector.shape[1]) - logger.info(' std: %s' % std_vector) - # restore the print options - # numpy.set_printoptions(po) - - self.std_vector = std_vector - - return std_vector - -import numpy -import sys -from io_funcs.binary_io import BinaryIOCollection - -import logging - - -class MergeFeat(object): - - def __init__(self, lab_dim=481, feat_dim=1): - - self.logger = logging.getLogger("labels") - - self.lab_dim = lab_dim - self.feat_dim = feat_dim - - def merge_data(self, binary_label_file_list, new_feat_file_list, out_feat_file_list): - ''' - merging new features with normalised label features - ''' - utt_number = len(new_feat_file_list) - if utt_number != len(binary_label_file_list): - print( - "the number of new feature input files and label files should be the same!\n") - sys.exit(1) - - new_feat_ext = new_feat_file_list[0].split('/')[-1].split('.')[1] - - io_funcs = BinaryIOCollection() - for i in range(utt_number): - lab_file_name = binary_label_file_list[i] - new_feat_file_name = new_feat_file_list[i] - out_feat_file_name = out_feat_file_list[i] - - lab_features, lab_frame_number = io_funcs.load_binary_file_frame( - lab_file_name, self.lab_dim) - new_features, feat_frame_number = io_funcs.load_binary_file_frame( - new_feat_file_name, self.feat_dim) - - if (lab_frame_number - feat_frame_number) > 5: - base_file_name = new_feat_file_list[i].split( - '/')[-1].split('.')[0] - self.logger.critical("the number of frames in label and new features are different: %d vs %d (%s)" % ( - lab_frame_number, feat_frame_number, base_file_name)) - raise - - merged_features = numpy.zeros( - (lab_frame_number, self.lab_dim+self.feat_dim)) - - merged_features[0:lab_frame_number, 0:self.lab_dim] = lab_features - merged_features[0:feat_frame_number, self.lab_dim:self.lab_dim + - self.feat_dim] = new_features[0:lab_frame_number, ] - - io_funcs.array_to_binary_file(merged_features, out_feat_file_name) - self.logger.debug('merged new feature %s of %d frames with %d label features' % ( - new_feat_ext, feat_frame_number, lab_frame_number)) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import numpy -from io_funcs.binary_io import BinaryIOCollection -import logging - - -class MinMaxNormalisation(object): - def __init__(self, feature_dimension, min_value=0.01, max_value=0.99, min_vector=0.0, max_vector=0.0, exclude_columns=[]): - - # this is the wrong name for this logger because we can also normalise labels here too - logger = logging.getLogger("acoustic_norm") - - self.target_min_value = min_value - self.target_max_value = max_value - - self.feature_dimension = feature_dimension - - self.min_vector = min_vector - self.max_vector = max_vector - - self.exclude_columns = exclude_columns - - if type(min_vector) != float: - try: - assert(len(self.min_vector) == self.feature_dimension) - except AssertionError: - logger.critical('inconsistent feature_dimension (%d) and length of min_vector (%d)' % ( - self.feature_dimension, len(self.min_vector))) - raise - - if type(max_vector) != float: - try: - assert(len(self.max_vector) == self.feature_dimension) - except AssertionError: - logger.critical('inconsistent feature_dimension (%d) and length of max_vector (%d)' % ( - self.feature_dimension, len(self.max_vector))) - raise - - logger.debug( - 'MinMaxNormalisation created for feature dimension of %d' % self.feature_dimension) - - def load_min_max_values(self, label_norm_file): - - logger = logging.getLogger("acoustic_norm") - - io_funcs = BinaryIOCollection() - min_max_vector, frame_number = io_funcs.load_binary_file_frame( - label_norm_file, 1) - min_max_vector = numpy.reshape(min_max_vector, (-1, )) - self.min_vector = min_max_vector[0:frame_number//2] - self.max_vector = min_max_vector[frame_number//2:] - - logger.info('Loaded min max values from the trained data for feature dimension of %d' % - self.feature_dimension) - - def find_min_max_values(self, in_file_list): - - logger = logging.getLogger("acoustic_norm") - - file_number = len(in_file_list) - min_value_matrix = numpy.zeros((file_number, self.feature_dimension)) - max_value_matrix = numpy.zeros((file_number, self.feature_dimension)) - io_funcs = BinaryIOCollection() - for i in range(file_number): - features = io_funcs.load_binary_file( - in_file_list[i], self.feature_dimension) - - temp_min = numpy.amin(features, axis=0) - temp_max = numpy.amax(features, axis=0) - - min_value_matrix[i, ] = temp_min - max_value_matrix[i, ] = temp_max - - self.min_vector = numpy.amin(min_value_matrix, axis=0) - self.max_vector = numpy.amax(max_value_matrix, axis=0) - self.min_vector = numpy.reshape( - self.min_vector, (1, self.feature_dimension)) - self.max_vector = numpy.reshape( - self.max_vector, (1, self.feature_dimension)) - - # po=numpy.get_printoptions() - # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) - logger.info('across %d files found min/max values of length %d:' % - (file_number, self.feature_dimension)) - logger.info(' min: %s' % self.min_vector) - logger.info(' max: %s' % self.max_vector) - # restore the print options - # numpy.set_printoptions(po) - - def normalise_data(self, in_file_list, out_file_list): - file_number = len(in_file_list) - - fea_max_min_diff = self.max_vector - self.min_vector - diff_value = self.target_max_value - self.target_min_value - fea_max_min_diff = numpy.reshape( - fea_max_min_diff, (1, self.feature_dimension)) - - target_max_min_diff = numpy.zeros((1, self.feature_dimension)) - target_max_min_diff.fill(diff_value) - - target_max_min_diff[fea_max_min_diff <= 0.0] = 1.0 - fea_max_min_diff[fea_max_min_diff <= 0.0] = 1.0 - - io_funcs = BinaryIOCollection() - for i in range(file_number): - features = io_funcs.load_binary_file( - in_file_list[i], self.feature_dimension) - - frame_number = features.size // self.feature_dimension - fea_min_matrix = numpy.tile(self.min_vector, (frame_number, 1)) - target_min_matrix = numpy.tile( - self.target_min_value, (frame_number, self.feature_dimension)) - - fea_diff_matrix = numpy.tile(fea_max_min_diff, (frame_number, 1)) - diff_norm_matrix = numpy.tile( - target_max_min_diff, (frame_number, 1)) / fea_diff_matrix - - norm_features = diff_norm_matrix * \ - (features - fea_min_matrix) + target_min_matrix - - # If we are to keep some columns unnormalised, use advanced indexing to - # reinstate original values: - m, n = numpy.shape(features) - for col in self.exclude_columns: - norm_features[list(range(m)), [col] * - m] = features[list(range(m)), [col]*m] - - io_funcs.array_to_binary_file(norm_features, out_file_list[i]) - -# norm_features = numpy.array(norm_features, 'float32') -# fid = open(out_file_list[i], 'wb') -# norm_features.tofile(fid) -# fid.close() - - def denormalise_data(self, in_file_list, out_file_list): - - logger = logging.getLogger("acoustic_norm") - - file_number = len(in_file_list) - logger.info( - 'MinMaxNormalisation.denormalise_data for %d files' % file_number) - - # print self.max_vector, self.min_vector - fea_max_min_diff = self.max_vector - self.min_vector - diff_value = self.target_max_value - self.target_min_value - # logger.debug('reshaping fea_max_min_diff from shape %s to (1,%d)' % (fea_max_min_diff.shape, self.feature_dimension) ) - - fea_max_min_diff = numpy.reshape( - fea_max_min_diff, (1, self.feature_dimension)) - - target_max_min_diff = numpy.zeros((1, self.feature_dimension)) - target_max_min_diff.fill(diff_value) - - target_max_min_diff[fea_max_min_diff <= 0.0] = 1.0 - fea_max_min_diff[fea_max_min_diff <= 0.0] = 1.0 - - io_funcs = BinaryIOCollection() - for i in range(file_number): - features = io_funcs.load_binary_file( - in_file_list[i], self.feature_dimension) - - frame_number = features.size // self.feature_dimension - fea_min_matrix = numpy.tile(self.min_vector, (frame_number, 1)) - target_min_matrix = numpy.tile( - self.target_min_value, (frame_number, self.feature_dimension)) - - fea_diff_matrix = numpy.tile(fea_max_min_diff, (frame_number, 1)) - diff_norm_matrix = fea_diff_matrix / \ - numpy.tile(target_max_min_diff, (frame_number, 1)) - norm_features = diff_norm_matrix * \ - (features - target_min_matrix) + fea_min_matrix - io_funcs.array_to_binary_file(norm_features, out_file_list[i]) - - def normal_standardization(self, in_file_list, out_file_list): - mean_vector = self.compute_mean(in_file_list) - std_vector = self.compute_std(in_file_list, mean_vector) - - io_funcs = BinaryIOCollection() - file_number = len(in_file_list) - for i in range(file_number): - features = io_funcs.load_binary_file( - in_file_list[i], self.feature_dimension) - current_frame_number = features.size // self.feature_dimension - - mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1)) - std_matrix = numpy.tile(std_vector, (current_frame_number, 1)) - - norm_features = (features - mean_matrix) / std_matrix - - io_funcs.array_to_binary_file(norm_features, out_file_list[i]) - - def compute_mean(self, file_list): - - logger = logging.getLogger("acoustic_norm") - - mean_vector = numpy.zeros((1, self.feature_dimension)) - all_frame_number = 0 - - io_funcs = BinaryIOCollection() - for file_name in file_list: - features = io_funcs.load_binary_file( - file_name, self.feature_dimension) - current_frame_number = features.size // self.feature_dimension - mean_vector += numpy.reshape(numpy.sum(features, axis=0), - (1, self.feature_dimension)) - all_frame_number += current_frame_number - - mean_vector /= float(all_frame_number) - - # po=numpy.get_printoptions() - # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) - logger.info('computed mean vector of length %d :' % - mean_vector.shape[1]) - logger.info(' mean: %s' % mean_vector) - # restore the print options - # numpy.set_printoptions(po) - - return mean_vector - - def compute_std(self, file_list, mean_vector): - - logger = logging.getLogger("acoustic_norm") - - std_vector = numpy.zeros((1, self.feature_dimension)) - all_frame_number = 0 - - io_funcs = BinaryIOCollection() - for file_name in file_list: - features = io_funcs.load_binary_file( - file_name, self.feature_dimension) - current_frame_number = features.size // self.feature_dimension - mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1)) - - std_vector += numpy.reshape(numpy.sum((features - mean_matrix) - ** 2, axis=0), (1, self.feature_dimension)) - all_frame_number += current_frame_number - - std_vector /= float(all_frame_number) - - std_vector = std_vector ** 0.5 - - # po=numpy.get_printoptions() - # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) - logger.info('computed std vector of length %d' % std_vector.shape[1]) - logger.info(' std: %s' % std_vector) - # restore the print options - # numpy.set_printoptions(po) - - return std_vector - - -if __name__ == '__main__': - - in_file_list = ['/group/project/dnn_tts/data/nick/sp/nick/herald_001.sp'] - out_file_list = ['/group/project/dnn_tts/herald_001.sp'] - out_file_list1 = ['/group/project/dnn_tts/herald_001.test.sp'] - - feature_dimension = 1025 - - normaliser = MinMaxNormalisation( - feature_dimension, min_value=0.01, max_value=0.99) - normaliser.find_min_max_values(in_file_list) - tmp_min_vector = normaliser.min_vector - tmp_max_vector = normaliser.max_vector - normaliser.normalise_data(in_file_list, out_file_list) - - denormaliser = MinMaxNormalisation(feature_dimension, min_value=0.01, - max_value=0.99, min_vector=tmp_min_vector, max_vector=tmp_max_vector) - denormaliser.denormalise_data(out_file_list, out_file_list1) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -# use theano to benefit from GPU computation -from theano import tensor as T -import theano - -import numpy -from numpy import dot -import logging - - -class MLParameterGeneration(object): - def __init__(self, delta_win=[-0.5, 0.0, 0.5], acc_win=[1.0, -2.0, 1.0]): - self.delta_win = delta_win - self.acc_win = acc_win - # assume the delta and acc windows have the same length - self.win_length = int(len(delta_win)/2) - - def build_theano_function_wdw(self): - - W_static = T.matrix('W_static') - W_delta = T.matrix('W_delta') - W_acc = T.matrix('W_acc') - D_static = T.matrix('D_static') - D_delta = T.matrix('D_delta') - D_acc = T.matrix('D_acc') - - WDW = T.dot(T.dot(W_static.T, D_static), W_static) + T.dot(T.dot(W_delta.T, - D_delta), W_delta) + T.dot(T.dot(W_acc.T, D_acc), W_acc) - - fn = theano.function( - inputs=[W_static, W_delta, W_acc, D_static, D_delta, D_acc], outputs=WDW) - - return fn - - def build_theano_function_wdu(self): - - W_static = T.matrix('W_static') - W_delta = T.matrix('W_delta') - W_acc = T.matrix('W_acc') - D_static = T.matrix('D_static') - D_delta = T.matrix('D_delta') - D_acc = T.matrix('D_acc') - U_static = T.matrix('U_static') - U_delta = T.matrix('U_delta') - U_acc = T.matrix('U_acc') - - WDU = T.dot(T.dot(W_static.T, D_static), U_static) + T.dot(T.dot(W_delta.T, - D_delta), U_delta) + T.dot(T.dot(W_acc.T, D_acc), U_acc) - - fn = theano.function(inputs=[W_static, W_delta, W_acc, D_static, - D_delta, D_acc, U_static, U_delta, U_acc], outputs=WDU) - - return fn - - def generation(self, features, covariance, static_dimension): - ''' - plan: use theano to do the parameter generation to benefit from GPU - ''' - - logger = logging.getLogger('param_generation') - logger.debug('starting MLParameterGeneration.generation') - - frame_number = features.shape[0] - - gen_parameter = numpy.zeros((frame_number, static_dimension)) - - W_static, W_delta, W_acc = self.prepare_window(frame_number) - - WT_static = numpy.transpose(W_static) - WT_delta = numpy.transpose(W_delta) - WT_acc = numpy.transpose(W_acc) - - fn_wdw = self.build_theano_function_wdw() - fn_wdu = self.build_theano_function_wdu() - - for d in range(static_dimension): - logger.debug('static dimension %3d of %3d' % - (d+1, static_dimension)) - - D_static = self.prepare_D(frame_number, covariance[d, 0]) - D_delta = self.prepare_D( - frame_number, covariance[static_dimension + d, 0]) - D_acc = self.prepare_D( - frame_number, covariance[2*static_dimension + d, 0]) - - U_static = self.prepare_U(frame_number, features[:, d:d+1]) - U_delta = self.prepare_U( - frame_number, features[:, static_dimension + d:static_dimension + d + 1]) - U_acc = self.prepare_U( - frame_number, features[:, 2*static_dimension + d:2*static_dimension + d + 1]) - -# WDW = dot(dot(WT_static, D_static), W_static) + dot(dot(WT_delta, D_delta), W_delta) + dot(dot(WT_acc, D_acc), W_acc) -# WDU = dot(dot(WT_static, D_static), U_static) + dot(dot(WT_delta, D_delta), U_delta) + dot(dot(WT_acc, D_acc), U_acc) -# temp_obs = dot(numpy.linalg.inv(WDW), WDU) - - WDW = fn_wdw(W_static, W_delta, W_acc, D_static, D_delta, D_acc) - WDU = fn_wdu(W_static, W_delta, W_acc, D_static, - D_delta, D_acc, U_static, U_delta, U_acc) - # only theano-dev version support matrix inversion - temp_obs = dot(numpy.linalg.inv(WDW), WDU) - - gen_parameter[0:frame_number, - d] = temp_obs[self.win_length:frame_number+self.win_length, 0] - - return gen_parameter - - def prepare_window(self, frame_number): - win_length = self.win_length - - w_static = numpy.zeros( - (frame_number+win_length*2, frame_number+win_length*2), dtype=theano.config.floatX) - w_delta = numpy.zeros( - (frame_number+win_length*2, frame_number+win_length*2), dtype=theano.config.floatX) - w_acc = numpy.zeros( - (frame_number+win_length*2, frame_number+win_length*2), dtype=theano.config.floatX) - - for i in range(frame_number+win_length*2): - w_static[i, i] = 1.0 - w_delta[i, i] = self.delta_win[win_length] - w_acc[i, i] = self.acc_win[win_length] - - for j in range(win_length): - if i - j > 0: - w_delta[i, i-j-1] = self.delta_win[win_length-j-1] - w_acc[i, i-j-1] = self.acc_win[win_length-j-1] - - if i + j + 1 < frame_number+win_length*2: - w_delta[i, i+j+1] = self.delta_win[win_length+j+1] - w_acc[i, i+j+1] = self.acc_win[win_length+j+1] - - return w_static, w_delta, w_acc - - def prepare_D(self, frame_number, D_value): - win_length = self.win_length - D_matrix = numpy.zeros( - (frame_number+win_length*2, frame_number+win_length*2), dtype=theano.config.floatX) - - for i in range(win_length): - D_matrix[i, i] = 1.0 - D_matrix[frame_number+win_length+i, - frame_number+win_length+i] = 1.0 - - for i in range(frame_number): - D_matrix[win_length+i, win_length+i] = 1.0 / D_value - - return D_matrix - - def prepare_U(self, frame_number, U_vector): - - win_length = self.win_length - - U_expanded = numpy.zeros( - (frame_number+win_length*2, 1), dtype=theano.config.floatX) - - U_expanded[win_length:frame_number+win_length, :] = U_vector - - return U_expanded -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import numpy as np -from numpy import dot -import logging -from numpy import float64 - - -# Adding this before the bandmat import lets us import .pyx files without running bandmat's setup.py: -#import pyximport; pyximport.install() - - -import bandmat as bm -import bandmat.linalg as bla - - -class MLParameterGenerationFast(object): - def __init__(self, delta_win=[-0.5, 0.0, 0.5], acc_win=[1.0, -2.0, 1.0]): - self.delta_win = delta_win - self.acc_win = acc_win - # assume the delta and acc windows have the same length - self.win_length = int(len(delta_win)/2) - - def build_win_mats(self, windows, frames): - win_mats = [] - for l, u, win_coeff in windows: - assert l >= 0 and u >= 0 - assert len(win_coeff) == l + u + 1 - win_coeffs = np.tile(np.reshape(win_coeff, (l + u + 1, 1)), frames) - win_mat = bm.band_c_bm(u, l, win_coeffs).T - win_mats.append(win_mat) - - return win_mats - - def build_poe(self, b_frames, tau_frames, win_mats, sdw=None): - # tau_frames.astype('float64') - - if sdw is None: - sdw = max([win_mat.l + win_mat.u for win_mat in win_mats]) - num_windows = len(win_mats) - frames = len(b_frames) - assert np.shape(b_frames) == (frames, num_windows) - assert np.shape(tau_frames) == (frames, num_windows) - assert all([win_mat.l + win_mat.u <= sdw for win_mat in win_mats]) - - b = np.zeros((frames,)) - prec = bm.zeros(sdw, sdw, frames) - - for win_index, win_mat in enumerate(win_mats): - bm.dot_mv_plus_equals(win_mat.T, b_frames[:, win_index], target=b) - bm.dot_mm_plus_equals(win_mat.T, win_mat, target_bm=prec, - diag=float64(tau_frames[:, win_index])) - - return b, prec - - def generation(self, features, covariance, static_dimension): - - windows = [ - (0, 0, np.array([1.0])), - (1, 1, np.array([-0.5, 0.0, 0.5])), - (1, 1, np.array([1.0, -2.0, 1.0])), - ] - num_windows = len(windows) - - frame_number = features.shape[0] - - logger = logging.getLogger('param_generation') - logger.debug('starting MLParameterGeneration.generation') - - gen_parameter = np.zeros((frame_number, static_dimension)) - - win_mats = self.build_win_mats(windows, frame_number) - mu_frames = np.zeros((frame_number, 3)) - var_frames = np.zeros((frame_number, 3)) - - for d in range(static_dimension): - var_frames[:, 0] = covariance[:, d] - var_frames[:, 1] = covariance[:, static_dimension+d] - var_frames[:, 2] = covariance[:, static_dimension*2+d] - mu_frames[:, 0] = features[:, d] - mu_frames[:, 1] = features[:, static_dimension+d] - mu_frames[:, 2] = features[:, static_dimension*2+d] - var_frames[0, 1] = 100000000000 - var_frames[0, 2] = 100000000000 - var_frames[frame_number-1, 1] = 100000000000 - var_frames[frame_number-1, 2] = 100000000000 - - b_frames = mu_frames / var_frames - tau_frames = 1.0 / var_frames - - b, prec = self.build_poe(b_frames, tau_frames, win_mats) - mean_traj = bla.solveh(prec, b) - - gen_parameter[0:frame_number, d] = mean_traj - - return gen_parameter -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -# Added FAST_MLPG as a variable here, in case someone wants to use the slow version, but perhaps we -# should always use the bandmat version? -import numpy -import re -import os -import logging -from io_funcs.binary_io import BinaryIOCollection -FAST_MLPG = True -# io_funcs. - - -if FAST_MLPG: - from .mlpg_fast import MLParameterGenerationFast as MLParameterGeneration -# pass -else: - from .mlpg import MLParameterGeneration - - -class ParameterGeneration(object): - - def __init__(self, gen_wav_features=['mgc', 'lf0', 'bap'], enforce_silence=False): - self.gen_wav_features = gen_wav_features - self.enforce_silence = enforce_silence - - # Debug: - self.inf_float = -1.0e+10 - #self.inf_float = -50000 - - # not really necessary to have the logger rembered in the class - can easily obtain it by name instead - # self.logger = logging.getLogger('param_generation') - - self.var = {} - - def duration_decomposition(self, in_file_list, dimension, out_dimension_dict, file_extension_dict): - - logger = logging.getLogger('param_generation') - - logger.debug('duration_decomposition for %d files' % len(in_file_list)) - - state_number = 5 # hard coding, try removing in future? - - if len(list(out_dimension_dict.keys())) > 1: - logger.critical( - "we don't support any additional features along with duration as of now.") - sys.exit(1) - else: - feature_name = list(out_dimension_dict.keys())[0] - - io_funcs = BinaryIOCollection() - - findex = 0 - flen = len(in_file_list) - for file_name in in_file_list: - - findex = findex+1 - - dir_name = os.path.dirname(file_name) - file_id = os.path.splitext(os.path.basename(file_name))[0] - - features, frame_number = io_funcs.load_binary_file_frame( - file_name, dimension) - gen_features = numpy.int32(numpy.round(features)) - gen_features[gen_features < 1] = 1 - - if dimension > state_number: - gen_features = gen_features[:, state_number] - - logger.info('processing %4d of %4d: %s' % - (findex, flen, file_name)) - - new_file_name = os.path.join( - dir_name, file_id + file_extension_dict[feature_name]) - io_funcs.array_to_binary_file(gen_features, new_file_name) - - logger.debug('wrote to file %s' % new_file_name) - - def acoustic_decomposition(self, in_file_list, dimension, out_dimension_dict, file_extension_dict, var_file_dict, do_MLPG=True, cfg=None): - - logger = logging.getLogger('param_generation') - - logger.debug('acoustic_decomposition for %d files' % len(in_file_list)) - - self.load_covariance(var_file_dict, out_dimension_dict) - - stream_start_index = {} - dimension_index = 0 - recorded_vuv = False - vuv_dimension = None - - for feature_name in list(out_dimension_dict.keys()): - # if feature_name != 'vuv': - stream_start_index[feature_name] = dimension_index -# else: -# vuv_dimension = dimension_index -# recorded_vuv = True - - dimension_index += out_dimension_dict[feature_name] - - io_funcs = BinaryIOCollection() - - mlpg_algo = MLParameterGeneration() - - findex = 0 - flen = len(in_file_list) - for file_name in in_file_list: - - findex = findex+1 - - dir_name = os.path.dirname(file_name) - file_id = os.path.splitext(os.path.basename(file_name))[0] - - features, frame_number = io_funcs.load_binary_file_frame( - file_name, dimension) - - logger.info('processing %4d of %4d: %s' % - (findex, flen, file_name)) - - for feature_name in self.gen_wav_features: - - logger.debug(' feature: %s' % feature_name) - - current_features = features[:, stream_start_index[feature_name] :stream_start_index[feature_name]+out_dimension_dict[feature_name]] - if FAST_MLPG: - # fast version wants variance per frame, not single global one: - var = self.var[feature_name] - var = numpy.transpose(numpy.tile(var, frame_number)) - else: - var = self.var[feature_name] - -# print var.shape[1] - if do_MLPG == False: - gen_features = current_features - else: - gen_features = mlpg_algo.generation( - current_features, var, out_dimension_dict[feature_name]//3) -# else: -# self.logger.critical("the dimensions do not match for MLPG: %d vs %d" %(var.shape[1], out_dimension_dict[feature_name])) -# raise - - logger.debug(' feature dimensions: %d by %d' % - (gen_features.shape[0], gen_features.shape[1])) - - if feature_name in ['lf0', 'F0']: - if 'vuv' in stream_start_index: - vuv_feature = features[:, stream_start_index['vuv'] :stream_start_index['vuv']+1] - - for i in range(frame_number): - if vuv_feature[i, 0] < 0.5 or gen_features[i, 0] < numpy.log(20): - gen_features[i, 0] = self.inf_float - - new_file_name = os.path.join( - dir_name, file_id + file_extension_dict[feature_name]) - - if self.enforce_silence: - silence_pattern = cfg.silence_pattern - label_align_dir = cfg.in_label_align_dir - in_f = open(label_align_dir+'/'+file_id+'.lab', 'r') - for line in in_f.readlines(): - line = line.strip() - - if len(line) < 1: - continue - temp_list = re.split('\s+', line) - start_time = int(int(temp_list[0])*(10**-4)/5) - end_time = int(int(temp_list[1])*(10**-4)/5) - - full_label = temp_list[2] - - label_binary_flag = self.check_silence_pattern( - full_label, silence_pattern) - - if label_binary_flag: - if feature_name in ['lf0', 'F0', 'mag']: - gen_features[start_time:end_time, - :] = self.inf_float - else: - gen_features[start_time:end_time, :] = 0.0 - - io_funcs.array_to_binary_file(gen_features, new_file_name) - logger.debug(' wrote to file %s' % new_file_name) - - def load_covariance(self, var_file_dict, out_dimension_dict): - - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - - var_values = numpy.reshape( - var_values, (out_dimension_dict[feature_name], 1)) - - self.var[feature_name] = var_values - - def check_silence_pattern(self, label, silence_pattern): - for current_pattern in silence_pattern: - current_pattern = current_pattern.strip('*') - if current_pattern in label: - return 1 - return 0 - - -if __name__ == '__main__': - - in_file_list = [ - '/afs/inf.ed.ac.uk/group/project/dnn_tts/mtl_dnn/gen/dnn_2500_601_229/hvd_678.cmp'] - - out_dimension_dict = {'mgc': 150, - 'lf0': 3, - 'vuv': 1, - 'bap': 75} - - file_extension_dict = {'mgc': '.mgc', - 'lf0': '.lf0', - 'vuv': '.vuv', - 'bap': '.bap'} - - var_file_dict = {'mgc': '/afs/inf.ed.ac.uk/group/project/dnn_tts/mtl_dnn/data/var/mgc', - 'lf0': '/afs/inf.ed.ac.uk/group/project/dnn_tts/mtl_dnn/data/var/lf0', - 'bap': '/afs/inf.ed.ac.uk/group/project/dnn_tts/mtl_dnn/data/var/bap'} - - generator = ParameterGeneration() - - generator.acoustic_decomposition( - in_file_list, 229, out_dimension_dict, file_extension_dict, var_file_dict) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import sys -import numpy -import re -import math -from io_funcs.binary_io import BinaryIOCollection -from multiprocessing.dummy import Pool as ThreadPool - - -class SilenceRemover(object): - def __init__(self, n_cmp, silence_pattern=['*-#+*'], label_type="state_align", remove_frame_features=True, - subphone_feats="none"): - self.silence_pattern = silence_pattern - self.silence_pattern_size = len(silence_pattern) - self.label_type = label_type - self.remove_frame_features = remove_frame_features - self.subphone_feats = subphone_feats - self.n_cmp = n_cmp - - def remove_silence(self, in_data_list, in_align_list, out_data_list, dur_file_list=None): - file_number = len(in_data_list) - align_file_number = len(in_align_list) - - if file_number != align_file_number: - print("The number of input and output files does not equal!\n") - sys.exit(1) - if file_number != len(out_data_list): - print("The number of input and output files does not equal!\n") - sys.exit(1) - - io_funcs = BinaryIOCollection() - - def _remove_silence(i): - if self.label_type == "phone_align": - if dur_file_list: - dur_file_name = dur_file_list[i] - else: - dur_file_name = None - nonsilence_indices = self.load_phone_alignment( - in_align_list[i], dur_file_name) - else: - nonsilence_indices = self.load_alignment(in_align_list[i]) - - ori_cmp_data = io_funcs.load_binary_file( - in_data_list[i], self.n_cmp) - - frame_number = ori_cmp_data.size / self.n_cmp - - if len(nonsilence_indices) == frame_number: - print('WARNING: no silence found!') - # previsouly: continue -- in fact we should keep non-silent data! - - # if labels have a few extra frames than audio, this can break the indexing, remove them: - nonsilence_indices = [ - ix for ix in nonsilence_indices if ix < frame_number] - - new_cmp_data = ori_cmp_data[nonsilence_indices, ] - - io_funcs.array_to_binary_file(new_cmp_data, out_data_list[i]) - - pool = ThreadPool() - pool.map(_remove_silence, range(file_number)) - pool.close() - pool.join() - - # OSW: rewrote above more succintly - def check_silence_pattern(self, label): - for current_pattern in self.silence_pattern: - current_pattern = current_pattern.strip('*') - if current_pattern in label: - return 1 - return 0 - - def load_phone_alignment(self, alignment_file_name, dur_file_name=None): - - if dur_file_name: - io_funcs = BinaryIOCollection() - dur_dim = 1 # hard coded for now - manual_dur_data = io_funcs.load_binary_file(dur_file_name, dur_dim) - - ph_count = 0 - base_frame_index = 0 - nonsilence_frame_index_list = [] - fid = open(alignment_file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - temp_list = re.split('\s+', line) - - if len(temp_list) == 1: - full_label = temp_list[0] - else: - start_time = int(temp_list[0]) - end_time = int(temp_list[1]) - full_label = temp_list[2] - - # to do - support different frame shift - currently hardwired to 5msec - # currently under beta testing: supports different frame shift - if dur_file_name: - frame_number = manual_dur_data[ph_count] - ph_count = ph_count + 1 - else: - frame_number = int((end_time - start_time) / 50000) - - label_binary_flag = self.check_silence_pattern(full_label) - - if self.remove_frame_features: - if label_binary_flag == 0: - for frame_index in range(frame_number): - nonsilence_frame_index_list.append( - base_frame_index + frame_index) - base_frame_index = base_frame_index + frame_number - elif self.subphone_feats == 'none': - if label_binary_flag == 0: - nonsilence_frame_index_list.append(base_frame_index) - base_frame_index = base_frame_index + 1 - - fid.close() - - return nonsilence_frame_index_list - - def load_alignment(self, alignment_file_name, dur_file_name=None): - - state_number = 5 - base_frame_index = 0 - nonsilence_frame_index_list = [] - fid = open(alignment_file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - temp_list = re.split('\s+', line) - if len(temp_list) == 1: - state_index = state_number - full_label = temp_list[0] - else: - start_time = int(temp_list[0]) - end_time = int(temp_list[1]) - full_label = temp_list[2] - # remove state information [k] - full_label_length = len(full_label) - 3 - state_index = full_label[full_label_length + 1] - state_index = int(state_index) - 1 - frame_number = int((end_time - start_time) / 50000) - - label_binary_flag = self.check_silence_pattern(full_label) - - if self.remove_frame_features: - if label_binary_flag == 0: - for frame_index in range(frame_number): - nonsilence_frame_index_list.append( - base_frame_index + frame_index) - base_frame_index = base_frame_index + frame_number - elif self.subphone_feats == 'state_only': - if label_binary_flag == 0: - nonsilence_frame_index_list.append(base_frame_index) - base_frame_index = base_frame_index + 1 - elif self.subphone_feats == 'none' and state_index == state_number: - if label_binary_flag == 0: - nonsilence_frame_index_list.append(base_frame_index) - base_frame_index = base_frame_index + 1 - - fid.close() - - return nonsilence_frame_index_list - - -# def load_binary_file(self, file_name, dimension): - -# fid_lab = open(file_name, 'rb') -# features = numpy.fromfile(fid_lab, dtype=numpy.float32) -# fid_lab.close() -# features = features[:(dimension * (features.size / dimension))] -# features = features.reshape((-1, dimension)) - -# return features - - -def trim_silence(in_list, out_list, in_dimension, label_list, label_dimension, - silence_feature_index, percent_to_keep=0): - ''' - Function to trim silence from binary label/speech files based on binary labels. - in_list: list of binary label/speech files to trim - out_list: trimmed files - in_dimension: dimension of data to trim - label_list: list of binary labels which contain trimming criterion - label_dimesion: - silence_feature_index: index of feature in labels which is silence: 1 means silence (trim), 0 means leave. - ''' - assert len(in_list) == len(out_list) == len(label_list) - io_funcs = BinaryIOCollection() - for (infile, outfile, label_file) in zip(in_list, out_list, label_list): - - data = io_funcs.load_binary_file(infile, in_dimension) - label = io_funcs.load_binary_file(label_file, label_dimension) - - audio_label_difference = data.shape[0] - label.shape[0] - assert math.fabs(audio_label_difference) < 3, '%s and %s contain different numbers of frames: %s %s' % ( - infile, label_file, data.shape[0], label.shape[0]) - - # In case they are different, resize -- keep label fixed as we assume this has - # already been processed. (This problem only arose with STRAIGHT features.) - # label is longer -- pad audio to match by repeating last frame: - if audio_label_difference < 0: - print('audio too short -- pad') - padding = numpy.vstack( - [data[-1, :]] * int(math.fabs(audio_label_difference))) - data = numpy.vstack([data, padding]) - elif audio_label_difference > 0: # audio is longer -- cut it - print('audio too long -- trim') - new_length = label.shape[0] - data = data[:new_length, :] - # else: -- expected case -- lengths match, so do nothing - - silence_flag = label[:, silence_feature_index] - # print silence_flag - if not (numpy.unique(silence_flag) == numpy.array([0, 1])).all(): - # if it's all 0s or 1s, that's ok: - assert (numpy.unique(silence_flag) == numpy.array([0]).all()) or \ - (numpy.unique(silence_flag) == numpy.array([1]).all()), \ - 'dimension %s of %s contains values other than 0 and 1' % ( - silence_feature_index, infile) - print('Remove %d%% of frames (%s frames) as silence... ' % ( - 100 * numpy.sum(silence_flag / float(len(silence_flag))), int(numpy.sum(silence_flag)))) - non_silence_indices = numpy.nonzero( - silence_flag == 0) # get the indices where silence_flag == 0 is True (i.e. != 0) - if percent_to_keep != 0: - assert type(percent_to_keep) == int and percent_to_keep > 0 - # print silence_flag - silence_indices = numpy.nonzero(silence_flag == 1) - # nonzero returns a tuple of arrays, one for each dimension of input array - silence_indices = silence_indices[0] - every_nth = 100 / percent_to_keep - # every_nth used +as step value in slice - silence_indices_to_keep = silence_indices[::every_nth] - # -1 due to weird error with STRAIGHT features at line 144: - # IndexError: index 445 is out of bounds for axis 0 with size 445 - if len(silence_indices_to_keep) == 0: - # avoid errors in case there is no silence - silence_indices_to_keep = numpy.array([1]) - print(' Restore %s%% (every %sth frame: %s frames) of silent frames' % ( - percent_to_keep, every_nth, len(silence_indices_to_keep))) - - # Append to end of utt -- same function used for labels and audio - # means that violation of temporal order doesn't matter -- will be consistent. - # Later, frame shuffling will disperse silent frames evenly across minibatches: - non_silence_indices = (numpy.hstack( - [non_silence_indices[0], silence_indices_to_keep])) - # ^---- from tuple and back (see nonzero note above) - - # advanced integer indexing - trimmed_data = data[non_silence_indices, :] - io_funcs.array_to_binary_file(trimmed_data, outfile) - - -if __name__ == '__main__': - cmp_file_list_name = '' - lab_file_list_name = '' - align_file_list_name = '' - - n_cmp = 229 - n_lab = 898 - - in_cmp_list = [ - '/group/project/dnn_tts/data/nick/nn_cmp/nick/herald_001.cmp'] - in_lab_list = [ - '/group/project/dnn_tts/data/nick/nn_new_lab/herald_001.lab'] - in_align_list = [ - '/group/project/dnn_tts/data/cassia/nick_lab/herald_001.lab'] - - out_cmp_list = [ - '/group/project/dnn_tts/data/nick/nn_new_lab/herald_001.tmp.cmp'] - out_lab_list = [ - '/group/project/dnn_tts/data/nick/nn_new_lab/herald_001.tmp.no.lab'] - - remover = SilenceRemover(in_cmp_list, in_align_list, n_cmp, out_cmp_list) - remover.remove_silence() -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import numpy - - -class BinaryIOCollection(object): - - def load_binary_file(self, file_name, dimension): - fid_lab = open(file_name, 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - assert features.size % float( - dimension) == 0.0, 'specified dimension %s not compatible with data' % (dimension) - features = features[:(dimension * (features.size // dimension))] - features = features.reshape((-1, dimension)) - - return features - - def array_to_binary_file(self, data, output_file_name): - data = numpy.array(data, 'float32') - - fid = open(output_file_name, 'wb') - data.tofile(fid) - fid.close() - - def load_binary_file_frame(self, file_name, dimension): - fid_lab = open(file_name, 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - assert features.size % float( - dimension) == 0.0, 'specified dimension %s not compatible with data' % (dimension) - frame_number = features.size // dimension - features = features[:(dimension * frame_number)] - features = features.reshape((-1, dimension)) - - return features, frame_number -''' -Copyright 2011-2013 Pawel Swietojanski - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -MERCHANTABLITY OR NON-INFRINGEMENT. -See the Apache 2 License for the specific language governing permissions and -limitations under the License. - -Not fully implemented [28 OCT 2011] -TODO: support for options: _C, H_IREFC - -''' - -import io -import os -import sys -import numpy -import struct -import logging - - -class HTK_Parm_IO(object): - ''' - For details look at the HTK book, Chapter 5.10 Storage of Parameter Files - ''' - - # HTK datatybes - H_WAVEFORM = 0 - H_LPC = 1 - H_LPREFC = 2 - H_LPCEPSTRA = 3 - H_LPDELCEP = 4 - H_IREFC = 5 - H_MFCC = 6 - H_FBANK = 7 - H_MELSPEC = 8 - H_USER = 9 - H_DISCRETE = 10 - H_PLP = 11 - H_ANON = 12 - - # Additional 'param kind' options - _E = 0x0001 # has energy - _N = 0x0002 # absolute energy suppressed - _D = 0x0004 # has delta coefficients - _A = 0x0008 # has acceleration coefficients - _C = 0x0010 # is compressed - _Z = 0x0020 # has zero mean static coef. - _K = 0x0040 # has CRC checksum - _O = 0x0080 # has 0th cepstral coef. - _V = 0x0100 # has VQ data - _T = 0x0200 # has third differential coef. - - MASK_H_DATATYPE = 0x003f # the first 6 bits contain datatype - - def __init__(self, n_samples=0, samp_period=0, samp_size=0, param_kind=0, data=None): - ''' - ''' - - # HTK header - # number of samples in file (4-byte integer) - self.n_samples = n_samples - # sample period in 100ns units (4-byte integer) - self.samp_period = samp_period - # number of bytes per sample (2-byte integer) - self.samp_size = samp_size - # a code indicating the sample kind (2-byte integer) - self.param_kind = param_kind - - self.data = data - - return None - - def htk_datatype(self): - return (self.param_kind & self.MASK_H_DATATYPE) - - def set_htk_datatype(self, value): - self.param_kind = value | ~self.MASK_H_DATATYPE - - def htk_datatype_has_option(self, option): - """Return True/False if the given options are set - - :type option: int - :param option: one of the _E _N _D etc. flags - - """ - return (((self.param_kind >> 6) & option) > 0) - - def set_htk_datatype_option(self, value): - self.param_kind = (value << 6) | self.param_kind - - def read_htk(self, filename, reshape_to_matrix=True): - ''' - ''' - try: - - f = open(filename, 'rb') - - self.n_samples = struct.unpack('H', f.read(2))[0] - #self.B = struct.unpack('>H', f.read(2))[0] - raise Exception("Compressed files not supported yet!") - - if (self.htk_datatype() == self.H_WAVEFORM): - self.data = numpy.fromfile(f, numpy.int16) - else: - self.data = numpy.fromfile(f, numpy.float32) -# print "world" - if reshape_to_matrix: - self.data = self.data.reshape((self.n_samples, -1)) - -# if(sys.byteorder=='little'): -# print "hello" -# self.data.byteswap(True) # forces big-endian byte ordering - - f.close() - except IOError as e: - logging.error(e) - raise Exception(e) - - return None - - def write_htk(self, filename): - ''' - ''' - try: - - file = open(filename, 'wb') - - file.write(struct.pack('= 3: - import configparser -else: - import ConfigParser as configparser - - -class configuration(object): - - def __init__(self): - pass - - def configure(self, configFile=None): - - # get a logger - logger = logging.getLogger("configuration") - # this (and only this) logger needs to be configured immediately, otherwise it won't work - # we can't use the full user-supplied configuration mechanism in this particular case, - # because we haven't loaded it yet! - # - # so, just use simple console-only logging - # this level is hardwired here - should change it to INFO - logger.setLevel(logging.DEBUG) - # add a handler & its formatter - will write only to console - ch = logging.StreamHandler() - logger.addHandler(ch) - formatter = logging.Formatter( - '%(asctime)s %(levelname)8s%(name)15s: %(message)s') - ch.setFormatter(formatter) - - # first, set up some default configuration values - self.initial_configuration() - - # next, load in any user-supplied configuration values - # that might over-ride the default values - self.user_configuration(configFile) - - # finally, set up all remaining configuration values - # that depend upon either default or user-supplied values - self.complete_configuration() - - logger.debug('configuration completed') - - def initial_configuration(self): - - # to be called before loading any user specific values - - # things to put here are - # 1. variables that the user cannot change - # 2. variables that need to be set before loading the user's config file - - UTTID_REGEX = '(.*)\..*' - - def user_configuration(self, configFile=None): - - # get a logger - logger = logging.getLogger("configuration") - - # load and parse the provided configFile, if provided - if not configFile: - logger.warn( - 'no user configuration file provided; using only built-in default settings') - return - - # load the config file - try: - cfgparser = configparser.ConfigParser() - cfgparser.readfp(open(configFile)) - logger.debug( - 'successfully read and parsed user configuration file %s' % configFile) - except: - logger.fatal('error reading user configuration file %s' % - configFile) - raise - - # work_dir must be provided before initialising other directories - self.work_dir = None - - if self.work_dir == None: - try: - self.work_dir = cfgparser.get('Paths', 'work') - - except (configparser.NoSectionError, configparser.NoOptionError): - if self.work_dir == None: - logger.critical('Paths:work has no value!') - raise Exception - - # default place for some data - self.data_dir = os.path.join(self.work_dir, 'data') - self.keras_dir = os.path.join(self.work_dir, 'keras') - - self.gen_dir = os.path.join(self.keras_dir, 'gen') - self.model_dir = os.path.join(self.keras_dir, 'models') - self.stats_dir = os.path.join(self.keras_dir, 'stats') - - self.inter_data_dir = os.path.join(self.work_dir, 'inter_module') - self.def_inp_dir = os.path.join( - self.inter_data_dir, 'nn_no_silence_lab_norm_425') - self.def_out_dir = os.path.join( - self.inter_data_dir, 'nn_norm_mgc_lf0_vuv_bap_187') - - impossible_int = int(-99999) - impossible_float = float(-99999.0) - - user_options = [ - - # Paths - ('work_dir', self.work_dir, 'Paths', 'work'), - ('data_dir', self.data_dir, 'Paths', 'data'), - - ('inp_feat_dir', self.def_inp_dir, 'Paths', 'inp_feat'), - ('out_feat_dir', self.def_out_dir, 'Paths', 'out_feat'), - - ('model_dir', self.model_dir, 'Paths', 'models'), - ('stats_dir', self.stats_dir, 'Paths', 'stats'), - ('gen_dir', self.gen_dir, 'Paths', 'gen'), - - ('file_id_scp', os.path.join(self.data_dir, - 'file_id_list.scp'), 'Paths', 'file_id_list'), - ('test_id_scp', os.path.join(self.data_dir, - 'test_id_list.scp'), 'Paths', 'test_id_list'), - - # Input-Output - ('inp_dim', 425, 'Input-Output', 'inp_dim'), - ('out_dim', 187, 'Input-Output', 'out_dim'), - - ('inp_file_ext', '.lab', 'Input-Output', 'inp_file_ext'), - ('out_file_ext', '.cmp', 'Input-Output', 'out_file_ext'), - - ('inp_norm', 'MINMAX', 'Input-Output', 'inp_norm'), - ('out_norm', 'MINMAX', 'Input-Output', 'out_norm'), - - # Architecture - ('hidden_layer_type', ['TANH', 'TANH', 'TANH', 'TANH', - 'TANH', 'TANH'], 'Architecture', 'hidden_layer_type'), - ('hidden_layer_size', [1024, 1024, 1024, 1024, - 1024, 1024], 'Architecture', 'hidden_layer_size'), - - ('batch_size', 256, 'Architecture', 'batch_size'), - ('num_of_epochs', 1, 'Architecture', 'training_epochs'), - ('dropout_rate', 0.0, 'Architecture', 'dropout_rate'), - - ('output_layer_type', 'linear', 'Architecture', 'output_layer_type'), - ('optimizer', 'adam', 'Architecture', 'optimizer'), - ('loss_function', 'mse', 'Architecture', 'loss_function'), - - # RNN - ('sequential_training', False, 'Architecture', 'sequential_training'), - ('stateful', False, 'Architecture', 'stateful'), - ('use_high_batch_size', False, 'Architecture', 'use_high_batch_size'), - - ('training_algo', 1, 'Architecture', 'training_algo'), - ('merge_size', 1, 'Architecture', 'merge_size'), - ('seq_length', 200, 'Architecture', 'seq_length'), - ('bucket_range', 100, 'Architecture', 'bucket_range'), - - # Data - ('shuffle_data', True, 'Data', 'shuffle_data'), - - ('train_file_number', impossible_int, 'Data', 'train_file_number'), - ('valid_file_number', impossible_int, 'Data', 'valid_file_number'), - ('test_file_number', impossible_int, 'Data', 'test_file_number'), - - # Processes - ('GenTestList', False, 'Processes', 'GenTestList'), - - ('NORMDATA', False, 'Processes', 'NORMDATA'), - ('TRAINMODEL', False, 'Processes', 'TRAINMODEL'), - ('TESTMODEL', False, 'Processes', 'TESTMODEL') - - ] - - # this uses exec(...) which is potentially dangerous since arbitrary code could be executed - for (variable, default, section, option) in user_options: - # default value - value = None - - try: - # first, look for a user-set value for this variable in the config file - value = cfgparser.get(section, option) - user_or_default = 'user' - - except (configparser.NoSectionError, configparser.NoOptionError): - # use default value, if there is one - if (default == None) or \ - (default == '') or \ - ((type(default) == int) and (default == impossible_int)) or \ - ((type(default) == float) and (default == impossible_float)): - logger.critical('%20s has no value!' % - (section+":"+option)) - raise Exception - else: - value = default - user_or_default = 'default' - - if type(default) == str: - exec('self.%s = "%s"' % (variable, value)) - elif type(default) == int: - exec('self.%s = int(%s)' % (variable, value)) - elif type(default) == float: - exec('self.%s = float(%s)' % (variable, value)) - elif type(default) == bool: - exec('self.%s = bool(%s)' % (variable, value)) - elif type(default) == list: - exec('self.%s = list(%s)' % (variable, value)) - elif type(default) == dict: - exec('self.%s = dict(%s)' % (variable, value)) - else: - logger.critical( - 'Variable %s has default value of unsupported type %s', variable, type(default)) - raise Exception( - 'Internal error in configuration settings: unsupported default type') - - logger.info('%20s has %7s value %s' % - (section+":"+option, user_or_default, value)) - - def complete_configuration(self): - # to be called after reading any user-specific settings - # because the values set here depend on those user-specific settings - - # get a logger - logger = logging.getLogger("configuration") - - # create directories if not exists - if not os.path.exists(self.model_dir): - os.makedirs(self.model_dir) - - if not os.path.exists(self.stats_dir): - os.makedirs(self.stats_dir) - - if not os.path.exists(self.gen_dir): - os.makedirs(self.gen_dir) - - # input-output normalization stat files - self.inp_stats_file = os.path.join(self.stats_dir, "input_%d_%s_%d.norm" % ( - int(self.train_file_number), self.inp_norm, self.inp_dim)) - self.out_stats_file = os.path.join(self.stats_dir, "output_%d_%s_%d.norm" % ( - int(self.train_file_number), self.out_norm, self.out_dim)) - - # define model file name - if self.sequential_training: - self.combined_model_arch = 'RNN'+str(self.training_algo) - else: - self.combined_model_arch = 'DNN' - - self.combined_model_arch += '_'+str(len(self.hidden_layer_size)) - self.combined_model_arch += '_' + \ - '_'.join(map(str, self.hidden_layer_size)) - self.combined_model_arch += '_' + \ - '_'.join(map(str, self.hidden_layer_type)) - - self.nnets_file_name = '%s_%d_train_%d_%d_%d_%d_%d_model' \ - % (self.combined_model_arch, int(self.shuffle_data), - self.inp_dim, self.out_dim, self.train_file_number, self.batch_size, self.num_of_epochs) - - logger.info('model file: %s' % (self.nnets_file_name)) - - # model files - self.json_model_file = os.path.join( - self.model_dir, self.nnets_file_name+'.json') - self.h5_model_file = os.path.join( - self.model_dir, self.nnets_file_name+'.h5') - - # predicted features directory - self.pred_feat_dir = os.path.join(self.gen_dir, self.nnets_file_name) - if not os.path.exists(self.pred_feat_dir): - os.makedirs(self.pred_feat_dir) - - # string.lower for some architecture values - self.output_layer_type = self.output_layer_type.lower() - self.optimizer = self.optimizer.lower() - self.loss_function = self.loss_function.lower() - for i in range(len(self.hidden_layer_type)): - self.hidden_layer_type[i] = self.hidden_layer_type[i].lower() - - # set sequential training True if using LSTMs - if 'lstm' in self.hidden_layer_type: - self.sequential_training = True - - # set/limit batch size to 25 - if self.sequential_training and self.batch_size > 50: - if not self.use_high_batch_size: - logger.info('reducing the batch size from %s to 25' % - (self.batch_size)) - self.batch_size = 25 # num. of sentences in this case - - # rnn params - self.rnn_params = {} - self.rnn_params['merge_size'] = self.merge_size - self.rnn_params['seq_length'] = self.seq_length - self.rnn_params['bucket_range'] = self.bucket_range - self.rnn_params['stateful'] = self.stateful -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import os -import sys -import time -import random -import numpy as np - -from sklearn import preprocessing - -from io_funcs.binary_io import BinaryIOCollection - -############################ -##### Memory variables ##### -############################ - -UTT_BUFFER_SIZE = 10000 -FRAME_BUFFER_SIZE = 3000000 - - -def read_data_from_file_list(inp_file_list, out_file_list, inp_dim, out_dim, sequential_training=True): - io_funcs = BinaryIOCollection() - - num_of_utt = len(inp_file_list) - - file_length_dict = {'framenum2utt': {}, 'utt2framenum': {}} - - if sequential_training: - temp_set_x = {} - temp_set_y = {} - else: - temp_set_x = np.empty((FRAME_BUFFER_SIZE, inp_dim)) - temp_set_y = np.empty((FRAME_BUFFER_SIZE, out_dim)) - - ### read file by file ### - current_index = 0 - for i in range(num_of_utt): - inp_file_name = inp_file_list[i] - out_file_name = out_file_list[i] - inp_features, inp_frame_number = io_funcs.load_binary_file_frame( - inp_file_name, inp_dim) - out_features, out_frame_number = io_funcs.load_binary_file_frame( - out_file_name, out_dim) - - base_file_name = os.path.basename(inp_file_name).split(".")[0] - - if abs(inp_frame_number-out_frame_number) > 5: - print('the number of frames in input and output features are different: %d vs %d (%s)' % ( - inp_frame_number, out_frame_number, base_file_name)) - sys.exit(0) - else: - frame_number = min(inp_frame_number, out_frame_number) - - if sequential_training: - temp_set_x[base_file_name] = inp_features[0:frame_number] - temp_set_y[base_file_name] = out_features[0:frame_number] - else: - temp_set_x[current_index:current_index + - frame_number, ] = inp_features[0:frame_number] - temp_set_y[current_index:current_index + - frame_number, ] = out_features[0:frame_number] - current_index += frame_number - - if frame_number not in file_length_dict['framenum2utt']: - file_length_dict['framenum2utt'][frame_number] = [base_file_name] - else: - file_length_dict['framenum2utt'][frame_number].append( - base_file_name) - - file_length_dict['utt2framenum'][base_file_name] = frame_number - - drawProgressBar(i+1, num_of_utt) - - sys.stdout.write("\n") - - if not sequential_training: - temp_set_x = temp_set_x[0:current_index, ] - temp_set_y = temp_set_y[0:current_index, ] - - return temp_set_x, temp_set_y, file_length_dict - - -def read_test_data_from_file_list(inp_file_list, inp_dim, sequential_training=True): - io_funcs = BinaryIOCollection() - - num_of_utt = len(inp_file_list) - - file_length_dict = {'framenum2utt': {}, 'utt2framenum': {}} - - if sequential_training: - temp_set_x = {} - else: - temp_set_x = np.empty((FRAME_BUFFER_SIZE, inp_dim)) - - ### read file by file ### - current_index = 0 - for i in range(num_of_utt): - inp_file_name = inp_file_list[i] - inp_features, frame_number = io_funcs.load_binary_file_frame( - inp_file_name, inp_dim) - - base_file_name = os.path.basename(inp_file_name).split(".")[0] - - if sequential_training: - temp_set_x[base_file_name] = inp_features - else: - temp_set_x[current_index:current_index + - frame_number, ] = inp_features[0:frame_number] - current_index += frame_number - - if frame_number not in file_length_dict['framenum2utt']: - file_length_dict['framenum2utt'][frame_number] = [base_file_name] - else: - file_length_dict['framenum2utt'][frame_number].append( - base_file_name) - - file_length_dict['utt2framenum'][base_file_name] = frame_number - - drawProgressBar(i+1, num_of_utt) - - sys.stdout.write("\n") - - if not sequential_training: - temp_set_x = temp_set_x[0:current_index, ] - - return temp_set_x, file_length_dict - - -def transform_data_to_3d_matrix(data, seq_length=200, max_length=0, merge_size=1, shuffle_data=True, shuffle_type=1, padding="right"): - num_of_utt = len(data) - feat_dim = data[list(data.keys())[0]].shape[1] - - if max_length > 0: - temp_set = np.zeros((num_of_utt, max_length, feat_dim)) - - ### read file by file ### - current_index = 0 - for base_file_name, in_features in data.items(): - frame_number = min(in_features.shape[0], max_length) - if padding == "right": - temp_set[current_index, 0:frame_number, ] = in_features - else: - temp_set[current_index, -frame_number:, ] = in_features - current_index += 1 - - else: - temp_set = np.zeros((FRAME_BUFFER_SIZE, feat_dim)) - - train_idx_list = list(data.keys()) - train_idx_list.sort() - - if shuffle_data: - if shuffle_type == 1: - train_idx_list = shuffle_file_list(train_idx_list) - elif shuffle_type == 2: - train_idx_list = shuffle_file_list( - train_idx_list, shuffle_type=2, merge_size=merge_size) - - ### read file by file ### - current_index = 0 - for file_number in range(num_of_utt): - base_file_name = train_idx_list[file_number] - in_features = data[base_file_name] - frame_number = in_features.shape[0] - - temp_set[current_index:current_index+frame_number, ] = in_features - current_index += frame_number - - if (file_number+1) % merge_size == 0: - current_index = seq_length * \ - (int(np.ceil(float(current_index)/float(seq_length)))) - - num_of_samples = int(np.ceil(float(current_index)/float(seq_length))) - - temp_set = temp_set[0: num_of_samples*seq_length, ] - temp_set = temp_set.reshape(-1, seq_length, feat_dim) - - return temp_set - - -def read_and_transform_data_from_file_list(in_file_list, dim, seq_length=200, merge_size=1): - io_funcs = BinaryIOCollection() - - num_of_utt = len(in_file_list) - - temp_set = np.zeros((FRAME_BUFFER_SIZE, dim)) - - ### read file by file ### - current_index = 0 - for i in range(num_of_utt): - in_file_name = in_file_list[i] - in_features, frame_number = io_funcs.load_binary_file_frame( - in_file_name, dim) - base_file_name = os.path.basename(in_file_name).split(".")[0] - - temp_set[current_index:current_index+frame_number, ] = in_features - current_index += frame_number - - if (i+1) % merge_size == 0: - current_index = seq_length * \ - (int(np.ceil(float(current_index)/float(seq_length)))) - - drawProgressBar(i+1, num_of_utt) - - sys.stdout.write("\n") - - num_of_samples = int(np.ceil(float(current_index)/float(seq_length))) - - temp_set = temp_set[0: num_of_samples*seq_length, ] - temp_set = temp_set.reshape(num_of_samples, seq_length) - - return temp_set - - -def merge_data(train_x, train_y, merge_size): - temp_train_x = {} - temp_train_y = {} - - train_id_list = list(train_x.keys()) - train_file_number = len(train_id_list) - train_id_list.sort() - - inp_dim = train_x[train_id_list[0]].shape[1] - out_dim = train_y[train_id_list[0]].shape[1] - - merged_features_x = np.zeros((0, inp_dim)) - merged_features_y = np.zeros((0, out_dim)) - new_file_count = 0 - for file_index in range(1, train_file_number+1): - inp_features = train_x[train_id_list[file_index-1]] - out_features = train_y[train_id_list[file_index-1]] - merged_features_x = np.vstack((merged_features_x, inp_features)) - merged_features_y = np.vstack((merged_features_y, out_features)) - - if file_index % merge_size == 0 or file_index == train_file_number: - base_file_name = "new_utterance_%04d" % (new_file_count) - temp_train_x[base_file_name] = merged_features_x - temp_train_y[base_file_name] = merged_features_y - new_file_count += 1 - merged_features_x = np.zeros((0, inp_dim)) - merged_features_y = np.zeros((0, out_dim)) - - return temp_train_x, temp_train_y - - -def shuffle_file_list(train_idx_list, shuffle_type=1, merge_size=5): - ### shuffle train id list ### - random.seed(271638) - train_file_number = len(train_idx_list) - - if shuffle_type == 1: # shuffle by sentence - random.shuffle(train_idx_list) - return train_idx_list - - elif shuffle_type == 2: # shuffle by a group of sentences - id_numbers = list(range(0, train_file_number, merge_size)) - random.shuffle(id_numbers) - new_train_idx_list = [] - for i in range(len(id_numbers)): - new_train_idx_list += train_idx_list[id_numbers[i]:id_numbers[i]+merge_size] - return new_train_idx_list - - -def get_stateful_data(train_x, train_y, batch_size): - num_of_batches = int(train_x.shape[0]/batch_size) - train_x = train_x[0: num_of_batches*batch_size, ] - train_y = train_y[0: num_of_batches*batch_size, ] - - stateful_seq = np.zeros(num_of_batches*batch_size, dtype="int32") - for i in range(num_of_batches): - stateful_seq[i*batch_size:(i+1)*batch_size] = np.array( - list(range(batch_size)))*num_of_batches+i - - temp_train_x = train_x[stateful_seq] - temp_train_y = train_y[stateful_seq] - - return temp_train_x, temp_train_y - - -def get_stateful_input(test_x, seq_length, batch_size=1): - [n_frames, n_dim] = test_x.shape - - num_of_samples = batch_size*seq_length - num_of_batches = int(n_frames/num_of_samples) + 1 - new_data_size = num_of_batches*num_of_samples - - temp_test_x = np.zeros((new_data_size, n_dim)) - temp_test_x[0: n_frames, ] = test_x - - temp_test_x = temp_test_x.reshape(-1, seq_length, n_dim) - - return temp_test_x - - -def compute_norm_stats(data, stats_file, method="MVN"): - #### normalize training data #### - io_funcs = BinaryIOCollection() - - if method == "MVN": - scaler = preprocessing.StandardScaler().fit(data) - norm_matrix = np.vstack((scaler.mean_, scaler.scale_)) - elif method == "MINMAX": - scaler = preprocessing.MinMaxScaler( - feature_range=(0.01, 0.99)).fit(data) - norm_matrix = np.vstack((scaler.min_, scaler.scale_)) - - print(norm_matrix.shape) - io_funcs.array_to_binary_file(norm_matrix, stats_file) - - return scaler - - -def load_norm_stats(stats_file, dim, method="MVN"): - #### load norm stats #### - io_funcs = BinaryIOCollection() - - norm_matrix, frame_number = io_funcs.load_binary_file_frame( - stats_file, dim) - assert frame_number == 2 - - if method == "MVN": - scaler = preprocessing.StandardScaler() - scaler.mean_ = norm_matrix[0, :] - scaler.scale_ = norm_matrix[1, :] - elif method == "MINMAX": - scaler = preprocessing.MinMaxScaler(feature_range=(0.01, 0.99)) - scaler.min_ = norm_matrix[0, :] - scaler.scale_ = norm_matrix[1, :] - - return scaler - - -def norm_data(data, scaler, sequential_training=True): - if scaler is None: - return - - #### normalize data #### - if not sequential_training: - data = scaler.transform(data) - else: - for filename, features in data.items(): - data[filename] = scaler.transform(features) - - -def denorm_data(data, scaler): - if scaler is None: - return - - #### de-normalize data #### - data = scaler.inverse_transform(data) - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def read_file_list(file_name): - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - return file_lists - - -def print_status(i, length): - pr = int(float(i)/float(length)*100) - st = int(float(pr)/7) - sys.stdout.write(("\r%d/%d ") % (i, length) + - ("[ %d" % pr+"% ] <<< ")+('='*st)+(''*(100-st))) - sys.stdout.flush() - - -def drawProgressBar(indx, length, barLen=20): - percent = float(indx)/length - sys.stdout.write("\r") - progress = "" - for i in range(barLen): - if i < int(barLen * percent): - progress += "=" - else: - progress += " " - sys.stdout.write("[%s] <<< %d/%d (%d%%)" % - (progress, indx, length, percent * 100)) - sys.stdout.flush() -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import random -import numpy as np - -import keras -from keras.models import Sequential -from keras.models import model_from_json -from keras.layers import Dense, SimpleRNN, GRU, LSTM -from keras.layers import Dropout - - -class kerasModels(object): - - def __init__(self, n_in, hidden_layer_size, n_out, hidden_layer_type, output_type='linear', dropout_rate=0.0, loss_function='mse', optimizer='adam'): - """ This function initialises a neural network - - :param n_in: Dimensionality of input features - :param hidden_layer_size: The layer size for each hidden layer - :param n_out: Dimensionality of output features - :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM - :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression. - :param dropout_rate: probability of dropout, a float number between 0 and 1. - :type n_in: Integer - :type hidden_layer_size: A list of integers - :type n_out: Integrer - """ - - self.n_in = int(n_in) - self.n_out = int(n_out) - - self.n_layers = len(hidden_layer_size) - - self.hidden_layer_size = hidden_layer_size - self.hidden_layer_type = hidden_layer_type - - assert len(self.hidden_layer_size) == len(self.hidden_layer_type) - - self.output_type = output_type - self.dropout_rate = dropout_rate - self.loss_function = loss_function - self.optimizer = optimizer - - # create model - self.model = Sequential() - - def define_feedforward_model(self): - seed = 12345 - np.random.seed(seed) - - # add hidden layers - for i in range(self.n_layers): - if i == 0: - input_size = self.n_in - else: - input_size = self.hidden_layer_size[i - 1] - - self.model.add(Dense( - units=self.hidden_layer_size[i], - activation=self.hidden_layer_type[i], - kernel_initializer="normal", - input_dim=input_size)) - self.model.add(Dropout(self.dropout_rate)) - - # add output layer - self.final_layer = self.model.add(Dense( - units=self.n_out, - activation=self.output_type.lower(), - kernel_initializer="normal", - input_dim=self.hidden_layer_size[-1])) - - # Compile the model - self.compile_model() - - def define_sequence_model(self): - seed = 12345 - np.random.seed(seed) - - # add hidden layers - for i in range(self.n_layers): - if i == 0: - input_size = self.n_in - else: - input_size = self.hidden_layer_size[i - 1] - - if self.hidden_layer_type[i] == 'rnn': - self.model.add(SimpleRNN( - units=self.hidden_layer_size[i], - input_shape=(None, input_size), - return_sequences=True)) - elif self.hidden_layer_type[i] == 'gru': - self.model.add(GRU( - units=self.hidden_layer_size[i], - input_shape=(None, input_size), - return_sequences=True)) - elif self.hidden_layer_type[i] == 'lstm': - self.model.add(LSTM( - units=self.hidden_layer_size[i], - input_shape=(None, input_size), - return_sequences=True)) - elif self.hidden_layer_type[i] == 'blstm': - self.model.add(LSTM( - units=self.hidden_layer_size[i], - input_shape=(None, input_size), - return_sequences=True, - go_backwards=True)) - else: - self.model.add(Dense( - units=self.hidden_layer_size[i], - activation=self.hidden_layer_type[i], - kernel_initializer="normal", - input_shape=(None, input_size))) - - # add output layer - self.final_layer = self.model.add(Dense( - units=self.n_out, - input_dim=self.hidden_layer_size[-1], - kernel_initializer='normal', - activation=self.output_type.lower())) - - # Compile the model - self.compile_model() - - def define_stateful_model(self, batch_size=25, seq_length=200): - seed = 12345 - np.random.seed(seed) - - # params - batch_size = batch_size - timesteps = seq_length - - # add hidden layers - for i in range(self.n_layers): - if i == 0: - input_size = self.n_in - else: - input_size = self.hidden_layer_size[i - 1] - - if hidden_layer_type[i] == 'lstm': - self.model.add(LSTM( - units=self.hidden_layer_size[i], - batch_input_shape=(batch_size, timesteps, input_size), - return_sequences=True, - stateful=True)) # go_backwards=True)) - elif self.hidden_layer_type[i] == 'blstm': - self.model.add(LSTM( - units=self.hidden_layer_size[i], - batch_input_shape=(batch_size, timesteps, input_size), - return_sequences=True, - stateful=True, - go_backwards=True)) - else: - self.model.add(Dense( - units=self.hidden_layer_size[i], - activation=self.hidden_layer_type[i], - kernel_initializer="normal", - batch_input_shape=(batch_size, timesteps, input_size))) - - # add output layer - self.final_layer = self.model.add(Dense( - units=self.n_out, - input_dim=self.hidden_layer_size[-1], - kernel_initializer='normal', - activation=self.output_type.lower())) - - # Compile the model - self.compile_model() - - def compile_model(self): - self.model.compile(loss=self.loss_function, - optimizer=self.optimizer, metrics=['accuracy']) - - def save_model(self, json_model_file, h5_model_file): - # serialize model to JSON - model_json = self.model.to_json() - with open(json_model_file, "w") as json_file: - json_file.write(model_json) - # serialize weights to HDF5 - self.model.save_weights(h5_model_file) - print("Saved model to disk") - - def load_model(self, json_model_file, h5_model_file): - #### load the model #### - json_file = open(json_model_file, 'r') - loaded_model_json = json_file.read() - json_file.close() - loaded_model = model_from_json(loaded_model_json) - loaded_model.load_weights(h5_model_file) - print("Loaded model from disk") - - #### compile the model #### - self.model = loaded_model - self.compile_model() -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import os -import sys -import random -import numpy as np - -from io_funcs.binary_io import BinaryIOCollection - -from keras_lib.model import kerasModels -from keras_lib import data_utils - - -class TrainKerasModels(kerasModels): - - def __init__(self, n_in, hidden_layer_size, n_out, hidden_layer_type, output_type='linear', dropout_rate=0.0, loss_function='mse', optimizer='adam', rnn_params=None): - - kerasModels.__init__(self, n_in, hidden_layer_size, n_out, hidden_layer_type, - output_type, dropout_rate, loss_function, optimizer) - - #### TODO: Find a good way to pass below params #### - self.merge_size = rnn_params['merge_size'] - self.seq_length = rnn_params['seq_length'] - self.bucket_range = rnn_params['bucket_range'] - - self.stateful = rnn_params['stateful'] - - pass - - def train_feedforward_model(self, train_x, train_y, valid_x, valid_y, batch_size=256, num_of_epochs=10, shuffle_data=True): - self.model.fit(train_x, train_y, batch_size=batch_size, - epochs=num_of_epochs, shuffle=shuffle_data) - - def train_sequence_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size=1, num_of_epochs=10, shuffle_data=True, training_algo=1): - if batch_size == 1: - self.train_recurrent_model_batchsize_one( - train_x, train_y, valid_x, valid_y, num_of_epochs, shuffle_data) - else: - self.train_recurrent_model(train_x, train_y, valid_x, valid_y, - train_flen, batch_size, num_of_epochs, shuffle_data, training_algo) - - def train_recurrent_model_batchsize_one(self, train_x, train_y, valid_x, valid_y, num_of_epochs, shuffle_data): - ### if batch size is equal to 1 ### - train_idx_list = list(train_x.keys()) - if shuffle_data: - random.seed(271638) - random.shuffle(train_idx_list) - - train_file_number = len(train_idx_list) - for epoch_num in range(num_of_epochs): - print(('Epoch: %d/%d ' % (epoch_num+1, num_of_epochs))) - file_num = 0 - for file_name in train_idx_list: - temp_train_x = train_x[file_name] - temp_train_y = train_y[file_name] - temp_train_x = np.reshape( - temp_train_x, (1, temp_train_x.shape[0], self.n_in)) - temp_train_y = np.reshape( - temp_train_y, (1, temp_train_y.shape[0], self.n_out)) - self.model.train_on_batch(temp_train_x, temp_train_y) - #self.model.fit(temp_train_x, temp_train_y, epochs=1, shuffle=False, verbose=0) - file_num += 1 - data_utils.drawProgressBar(file_num, train_file_number) - - sys.stdout.write("\n") - - def train_recurrent_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data, training_algo): - ### if batch size more than 1 ### - if training_algo == 1: - self.train_padding_model( - train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data) - elif training_algo == 2: - self.train_bucket_model(train_x, train_y, valid_x, valid_y, - train_flen, batch_size, num_of_epochs, shuffle_data) - elif training_algo == 3: - self.train_split_model(train_x, train_y, valid_x, valid_y, - train_flen, batch_size, num_of_epochs, shuffle_data) - else: - print("Choose training algorithm for batch training with RNNs:") - print( - "1. Padding model -- pad utterances with zeros to maximum sequence length") - print( - "2. Bucket model -- form buckets with minimum and maximum sequence length") - print("3. Split model -- split utterances to a fixed sequence length") - sys.exit(1) - - def train_padding_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): - ### Method 1 ### - train_id_list = list(train_flen['utt2framenum'].keys()) - if shuffle_data: - random.seed(271638) - random.shuffle(train_id_list) - - train_file_number = len(train_id_list) - for epoch_num in range(num_of_epochs): - print(('Epoch: %d/%d ' % (epoch_num+1, num_of_epochs))) - file_num = 0 - while file_num < train_file_number: - train_idx_list = train_id_list[file_num: file_num + batch_size] - seq_len_arr = [train_flen['utt2framenum'][filename] - for filename in train_idx_list] - max_seq_length = max(seq_len_arr) - sub_train_x = dict( - (filename, train_x[filename]) for filename in train_idx_list) - sub_train_y = dict( - (filename, train_y[filename]) for filename in train_idx_list) - temp_train_x = data_utils.transform_data_to_3d_matrix( - sub_train_x, max_length=max_seq_length) - temp_train_y = data_utils.transform_data_to_3d_matrix( - sub_train_y, max_length=max_seq_length) - self.model.train_on_batch(temp_train_x, temp_train_y) - file_num += len(train_idx_list) - data_utils.drawProgressBar(file_num, train_file_number) - - print(" Validation error: %.3f" % - (self.get_validation_error(valid_x, valid_y))) - - def train_bucket_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): - ### Method 2 ### - train_fnum_list = np.array(list(train_flen['framenum2utt'].keys())) - train_range_list = list( - range(min(train_fnum_list), max(train_fnum_list)+1, self.bucket_range)) - if shuffle_data: - random.seed(271638) - random.shuffle(train_range_list) - - train_file_number = len(train_x) - for epoch_num in range(num_of_epochs): - print(('Epoch: %d/%d ' % (epoch_num+1, num_of_epochs))) - file_num = 0 - for frame_num in train_range_list: - min_seq_length = frame_num - max_seq_length = frame_num+self.bucket_range - sub_train_list = train_fnum_list[(train_fnum_list >= min_seq_length) & ( - train_fnum_list < max_seq_length)] - if len(sub_train_list) == 0: - continue - train_idx_list = sum( - [train_flen['framenum2utt'][framenum] for framenum in sub_train_list], []) - sub_train_x = dict( - (filename, train_x[filename]) for filename in train_idx_list) - sub_train_y = dict( - (filename, train_y[filename]) for filename in train_idx_list) - temp_train_x = data_utils.transform_data_to_3d_matrix( - sub_train_x, max_length=max_seq_length) - temp_train_y = data_utils.transform_data_to_3d_matrix( - sub_train_y, max_length=max_seq_length) - self.model.fit(temp_train_x, temp_train_y, - batch_size=batch_size, shuffle=False, epochs=1, verbose=0) - - file_num += len(train_idx_list) - data_utils.drawProgressBar(file_num, train_file_number) - - print(" Validation error: %.3f" % - (self.get_validation_error(valid_x, valid_y))) - - def train_split_model(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): - ### Method 3 ### - train_id_list = list(train_flen['utt2framenum'].keys()) - if shuffle_data: - random.seed(271638) - random.shuffle(train_id_list) - - train_file_number = len(train_id_list) - for epoch_num in range(num_of_epochs): - print(('Epoch: %d/%d ' % (epoch_num+1, num_of_epochs))) - file_num = 0 - while file_num < train_file_number: - train_idx_list = train_id_list[file_num: file_num + batch_size] - sub_train_x = dict( - (filename, train_x[filename]) for filename in train_idx_list) - sub_train_y = dict( - (filename, train_y[filename]) for filename in train_idx_list) - temp_train_x = data_utils.transform_data_to_3d_matrix( - sub_train_x, seq_length=self.seq_length, merge_size=self.merge_size) - temp_train_y = data_utils.transform_data_to_3d_matrix( - sub_train_y, seq_length=self.seq_length, merge_size=self.merge_size) - - self.model.train_on_batch(temp_train_x, temp_train_y) - - file_num += len(train_idx_list) - data_utils.drawProgressBar(file_num, train_file_number) - - print(" Validation error: %.3f" % - (self.get_validation_error(valid_x, valid_y))) - - def train_split_model_keras_version(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): - """This function is not used as of now - """ - ### Method 3 ### - temp_train_x = data_utils.transform_data_to_3d_matrix( - train_x, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) - print(("Input shape: "+str(temp_train_x.shape))) - - temp_train_y = data_utils.transform_data_to_3d_matrix( - train_y, seq_length=self.seq_length, merge_size=self.merge_size, shuffle_data=shuffle_data) - print(("Output shape: "+str(temp_train_y.shape))) - - if self.stateful: - temp_train_x, temp_train_y = data_utils.get_stateful_data( - temp_train_x, temp_train_y, batch_size) - - self.model.fit(temp_train_x, temp_train_y, - batch_size=batch_size, epochs=num_of_epochs) - - def train_bucket_model_without_padding(self, train_x, train_y, valid_x, valid_y, train_flen, batch_size, num_of_epochs, shuffle_data): - """This function is not used as of now - """ - ### Method 4 ### - train_count_list = list(train_flen['framenum2utt'].keys()) - if shuffle_data: - random.seed(271638) - random.shuffle(train_count_list) - - train_file_number = len(train_x) - for epoch_num in range(num_of_epochs): - print(('Epoch: %d/%d ' % (epoch_num+1, num_of_epochs))) - file_num = 0 - for sequence_length in train_count_list: - train_idx_list = train_flen['framenum2utt'][sequence_length] - sub_train_x = dict( - (filename, train_x[filename]) for filename in train_idx_list) - sub_train_y = dict( - (filename, train_y[filename]) for filename in train_idx_list) - temp_train_x = data_utils.transform_data_to_3d_matrix( - sub_train_x, max_length=sequence_length) - temp_train_y = data_utils.transform_data_to_3d_matrix( - sub_train_y, max_length=sequence_length) - self.model.fit(temp_train_x, temp_train_y, - batch_size=batch_size, epochs=1, verbose=0) - - file_num += len(train_idx_list) - data_utils.drawProgressBar(file_num, train_file_number) - - sys.stdout.write("\n") - - def get_validation_error(self, valid_x, valid_y, sequential_training=True, stateful=False): - valid_id_list = list(valid_x.keys()) - valid_id_list.sort() - - valid_error = 0.0 - valid_file_number = len(valid_id_list) - for utt_index in range(valid_file_number): - temp_valid_x = valid_x[valid_id_list[utt_index]] - temp_valid_y = valid_y[valid_id_list[utt_index]] - num_of_rows = temp_valid_x.shape[0] - - if stateful: - temp_valid_x = data_utils.get_stateful_input( - temp_valid_x, self.seq_length, self.batch_size) - elif sequential_training: - temp_valid_x = np.reshape( - temp_valid_x, (1, num_of_rows, self.n_in)) - - predictions = self.model.predict(temp_valid_x) - if sequential_training: - predictions = np.reshape( - predictions, (num_of_rows, self.n_out)) - - valid_error += np.mean(np.sum((predictions - - temp_valid_y) ** 2, axis=1)) - - valid_error = valid_error/valid_file_number - - return valid_error - - def predict(self, test_x, out_scaler, gen_test_file_list, sequential_training=False, stateful=False): - #### compute predictions #### - io_funcs = BinaryIOCollection() - - test_file_number = len(gen_test_file_list) - print("generating features on held-out test data...") - for utt_index in range(test_file_number): - gen_test_file_name = gen_test_file_list[utt_index] - test_id = os.path.splitext(os.path.basename(gen_test_file_name))[0] - temp_test_x = test_x[test_id] - num_of_rows = temp_test_x.shape[0] - - if stateful: - temp_test_x = data_utils.get_stateful_input( - temp_test_x, self.seq_length, self.batch_size) - elif sequential_training: - temp_test_x = np.reshape( - temp_test_x, (1, num_of_rows, self.n_in)) - - predictions = self.model.predict(temp_test_x) - if sequential_training: - predictions = np.reshape( - predictions, (num_of_rows, self.n_out)) - - data_utils.denorm_data(predictions, out_scaler) - - io_funcs.array_to_binary_file(predictions, gen_test_file_name) - data_utils.drawProgressBar(utt_index+1, test_file_number) - - sys.stdout.write("\n") - -# refer Zhizheng and Simon's ICASSP'16 paper for more details -# http://www.zhizheng.org/papers/icassp2016_lstm.pdf - -import numpy as np -import theano -import theano.tensor as T -from theano import config -from theano.tensor.shared_randomstreams import RandomStreams - - -class VanillaRNN(object): - """ This class implements a standard recurrent neural network: h_{t} = f(W^{hx}x_{t} + W^{hh}h_{t-1}+b_{h}) - - """ - - def __init__(self, rng, x, n_in, n_h, p, training, rnn_batch_training=False): - """ This is to initialise a standard RNN hidden unit - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input data to current layer - :param n_in: dimension of input data - :param n_h: number of hidden units/blocks - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - self.input = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x # (1-p) * - - self.n_in = int(n_in) - self.n_h = int(n_h) - - self.rnn_batch_training = rnn_batch_training - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - - # Input gate weights - self.W_xi = theano.shared(value=Wx_value, name='W_xi') - self.W_hi = theano.shared(value=Wh_value, name='W_hi') - - # bias - self.b_i = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_i') - - # initial value of hidden and cell state - if self.rnn_batch_training: - self.h0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='c0') - - self.h0 = T.repeat(self.h0, x.shape[1], 0) - self.c0 = T.repeat(self.c0, x.shape[1], 0) - else: - self.h0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='c0') - - self.Wix = T.dot(self.input, self.W_xi) - - [self.h, self.c], _ = theano.scan(self.recurrent_as_activation_function, sequences=[self.Wix], - outputs_info=[self.h0, self.c0]) - - self.output = self.h - - self.params = [self.W_xi, self.W_hi, self.b_i] - - self.L2_cost = (self.W_xi ** 2).sum() + (self.W_hi ** 2).sum() - - def recurrent_as_activation_function(self, Wix, h_tm1, c_tm1): - """ Implement the recurrent unit as an activation function. This function is called by self.__init__(). - - :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation - :type Wix: matrix - :param h_tm1: contains the hidden activation from previous time step - :type h_tm1: matrix, each row means a hidden activation vector of a time step - :param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM - :returns: h_t is the hidden activation of current time step - """ - - h_t = T.tanh(Wix + T.dot(h_tm1, self.W_hi) + self.b_i) # - - c_t = h_t - - return h_t, c_t - - -class VanillaRNNDecoder(object): - """ This class implements a standard recurrent neural network decoder: - h_{t} = f(W^{hx}x_{t} + W^{hh}h_{t-1}+ W^{yh}y_{t-1} + b_{h}) - y_{t} = g(h_{t}W^{hy} + b_{y}) - - """ - - def __init__(self, rng, x, n_in, n_h, n_out, p, training, rnn_batch_training=False): - """ This is to initialise a standard RNN hidden unit - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input data to current layer - :param n_in: dimension of input data - :param n_h: number of hidden units/blocks - :param n_out: dimension of output data - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - self.input = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x # (1-p) * - - self.n_in = int(n_in) - self.n_h = int(n_h) - self.n_out = int(n_out) - - self.rnn_batch_training = rnn_batch_training - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wy_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_out), size=(n_out, n_h)), dtype=config.floatX) - Ux_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_out)), dtype=config.floatX) - Uh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_out)), dtype=config.floatX) - Uy_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_out), size=(n_out, n_out)), dtype=config.floatX) - - # Input gate weights - self.W_xi = theano.shared(value=Wx_value, name='W_xi') - self.W_hi = theano.shared(value=Wh_value, name='W_hi') - self.W_yi = theano.shared(value=Wy_value, name='W_yi') - - # Output gate weights - self.U_xi = theano.shared(value=Ux_value, name='U_xi') - self.U_hi = theano.shared(value=Uh_value, name='U_hi') - self.U_yi = theano.shared(value=Uy_value, name='U_yi') - - # bias - self.b_i = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_i') - self.b = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='b') - - # initial value of hidden and cell state and output - if self.rnn_batch_training: - self.h0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='c0') - self.y0 = theano.shared(value=np.zeros( - (1, n_out), dtype=config.floatX), name='y0') - - self.h0 = T.repeat(self.h0, x.shape[1], 0) - self.c0 = T.repeat(self.c0, x.shape[1], 0) - self.y0 = T.repeat(self.c0, x.shape[1], 0) - else: - self.h0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='c0') - self.y0 = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='y0') - - self.Wix = T.dot(self.input, self.W_xi) - - [self.h, self.c, self.y], _ = theano.scan(self.recurrent_as_activation_function, sequences=[self.Wix], - outputs_info=[self.h0, self.c0, self.y0]) - - self.output = self.y - - self.params = [self.W_xi, self.W_hi, - self.W_yi, self.U_hi, self.b_i, self.b] - - self.L2_cost = (self.W_xi ** 2).sum() + (self.W_hi ** 2).sum() + \ - (self.W_yi ** 2).sum() + (self.U_hi ** 2).sum() - - def recurrent_as_activation_function(self, Wix, h_tm1, c_tm1, y_tm1): - """ Implement the recurrent unit as an activation function. This function is called by self.__init__(). - - :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation - :type Wix: matrix - :param h_tm1: contains the hidden activation from previous time step - :type h_tm1: matrix, each row means a hidden activation vector of a time step - :param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM - :returns: h_t is the hidden activation of current time step - """ - - h_t = T.tanh(Wix + T.dot(h_tm1, self.W_hi) + - T.dot(y_tm1, self.W_yi) + self.b_i) # - - y_t = T.dot(h_t, self.U_hi) + self.b - - c_t = h_t - - return h_t, c_t, y_t - - -class LstmBase(object): - """ This class provides as a base for all long short-term memory (LSTM) related classes. - Several variants of LSTM were investigated in (Wu & King, ICASSP 2016): Zhizheng Wu, Simon King, "Investigating gated recurrent neural networks for speech synthesis", ICASSP 2016 - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise all the components in a LSTM block, including input gate, output gate, forget gate, peephole connections - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - - n_in = int(n_in) # ensure sizes have integer type - n_h = int(n_h) # ensure sizes have integer type - - self.input = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x - - self.n_in = int(n_in) - self.n_h = int(n_h) - - self.rnn_batch_training = rnn_batch_training - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Input gate weights - self.W_xi = theano.shared(value=Wx_value, name='W_xi') - self.W_hi = theano.shared(value=Wh_value, name='W_hi') - self.w_ci = theano.shared(value=Wc_value, name='w_ci') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Forget gate weights - self.W_xf = theano.shared(value=Wx_value, name='W_xf') - self.W_hf = theano.shared(value=Wh_value, name='W_hf') - self.w_cf = theano.shared(value=Wc_value, name='w_cf') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Output gate weights - self.W_xo = theano.shared(value=Wx_value, name='W_xo') - self.W_ho = theano.shared(value=Wh_value, name='W_ho') - self.w_co = theano.shared(value=Wc_value, name='w_co') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Cell weights - self.W_xc = theano.shared(value=Wx_value, name='W_xc') - self.W_hc = theano.shared(value=Wh_value, name='W_hc') - - # bias - self.b_i = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_i') - self.b_f = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_f') - self.b_o = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_o') - self.b_c = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_c') - - # make a layer - - # initial value of hidden and cell state - if self.rnn_batch_training: - self.h0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='c0') - - self.h0 = T.repeat(self.h0, x.shape[1], 0) - self.c0 = T.repeat(self.c0, x.shape[1], 0) - else: - self.h0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='c0') - - self.Wix = T.dot(self.input, self.W_xi) - self.Wfx = T.dot(self.input, self.W_xf) - self.Wcx = T.dot(self.input, self.W_xc) - self.Wox = T.dot(self.input, self.W_xo) - - [self.h, self.c], _ = theano.scan(self.recurrent_fn, sequences=[self.Wix, self.Wfx, self.Wcx, self.Wox], - outputs_info=[self.h0, self.c0]) - - self.output = self.h - - def recurrent_fn(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1=None): - """ This implements a genetic recurrent function, called by self.__init__(). - - :param Wix: pre-computed matrix applying the weight matrix W on the input units, for input gate - :param Wfx: Similar to Wix, but for forget gate - :param Wcx: Similar to Wix, but for cell memory - :param Wox: Similar to Wox, but for output gate - :param h_tm1: hidden activation from previous time step - :param c_tm1: activation from cell memory from previous time step - :returns: h_t is the hidden activation of current time step, and c_t is the activation for cell memory of current time step - """ - - h_t, c_t = self.lstm_as_activation_function( - Wix, Wfx, Wcx, Wox, h_tm1, c_tm1) - - return h_t, c_t - - def lstm_as_activation_function(self): - """ A genetic recurrent activation function for variants of LSTM architectures. - The function is called by self.recurrent_fn(). - - """ - pass - - -class LstmDecoderBase(object): - """ This class provides as a base for all long short-term memory (LSTM) related classes. - Several variants of LSTM were investigated in (Wu & King, ICASSP 2016): Zhizheng Wu, Simon King, "Investigating gated recurrent neural networks for speech synthesis", ICASSP 2016 - - """ - - def __init__(self, rng, x, n_in, n_h, n_out, p=0.0, training=0, rnn_batch_training=False): - """ Initialise all the components in a LSTM block, including input gate, output gate, forget gate, peephole connections - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - - self.input = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x - - self.n_in = int(n_in) - self.n_h = int(n_h) - - self.rnn_batch_training = rnn_batch_training - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - Wy_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_out), size=(n_out, n_h)), dtype=config.floatX) - - # Input gate weights - self.W_xi = theano.shared(value=Wx_value, name='W_xi') - self.W_hi = theano.shared(value=Wh_value, name='W_hi') - self.w_ci = theano.shared(value=Wc_value, name='w_ci') - self.W_yi = theano.shared(value=Wy_value, name='W_yi') - - # random initialisation - Uh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_out)), dtype=config.floatX) - - # Output gate weights - self.U_ho = theano.shared(value=Uh_value, name='U_ho') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Forget gate weights - self.W_xf = theano.shared(value=Wx_value, name='W_xf') - self.W_hf = theano.shared(value=Wh_value, name='W_hf') - self.w_cf = theano.shared(value=Wc_value, name='w_cf') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Output gate weights - self.W_xo = theano.shared(value=Wx_value, name='W_xo') - self.W_ho = theano.shared(value=Wh_value, name='W_ho') - self.w_co = theano.shared(value=Wc_value, name='w_co') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Cell weights - self.W_xc = theano.shared(value=Wx_value, name='W_xc') - self.W_hc = theano.shared(value=Wh_value, name='W_hc') - - # bias - self.b_i = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_i') - self.b_f = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_f') - self.b_o = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_o') - self.b_c = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_c') - self.b = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='b') - - # make a layer - - # initial value of hidden and cell state - if self.rnn_batch_training: - self.h0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='c0') - self.y0 = theano.shared(value=np.zeros( - (1, n_out), dtype=config.floatX), name='y0') - - self.h0 = T.repeat(self.h0, x.shape[1], 0) - self.c0 = T.repeat(self.c0, x.shape[1], 0) - self.y0 = T.repeat(self.c0, x.shape[1], 0) - else: - self.h0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='c0') - self.y0 = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='y0') - - self.Wix = T.dot(self.input, self.W_xi) - self.Wfx = T.dot(self.input, self.W_xf) - self.Wcx = T.dot(self.input, self.W_xc) - self.Wox = T.dot(self.input, self.W_xo) - - [self.h, self.c, self.y], _ = theano.scan(self.recurrent_fn, sequences=[self.Wix, self.Wfx, self.Wcx, self.Wox], - outputs_info=[self.h0, self.c0, self.y0]) - - self.output = self.y - - def recurrent_fn(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1=None, y_tm1=None): - """ This implements a genetic recurrent function, called by self.__init__(). - - :param Wix: pre-computed matrix applying the weight matrix W on the input units, for input gate - :param Wfx: Similar to Wix, but for forget gate - :param Wcx: Similar to Wix, but for cell memory - :param Wox: Similar to Wox, but for output gate - :param h_tm1: hidden activation from previous time step - :param c_tm1: activation from cell memory from previous time step - :returns: h_t is the hidden activation of current time step, and c_t is the activation for cell memory of current time step - """ - - h_t, c_t, y_t = self.lstm_as_activation_function( - Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1) - - return h_t, c_t, y_t - - def lstm_as_activation_function(self): - """ A genetic recurrent activation function for variants of LSTM architectures. - The function is called by self.recurrent_fn(). - - """ - pass - - -class VanillaLstm(LstmBase): - """ This class implements the standard LSTM block, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a vanilla LSTM block - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - LstmBase.__init__(self, rng, x, n_in, n_h, p, - training, rnn_batch_training) - - self.params = [self.W_xi, self.W_hi, self.w_ci, - self.W_xf, self.W_hf, self.w_cf, - self.W_xo, self.W_ho, self.w_co, - self.W_xc, self.W_hc, - self.b_i, self.b_f, self.b_o, self.b_c] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): - """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + - self.w_ci * c_tm1 + self.b_i) # - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.w_cf * c_tm1 + self.b_f) # - - c_t = f_t * c_tm1 + i_t * \ - T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c) - - o_t = T.nnet.sigmoid( - Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o) - - h_t = o_t * T.tanh(c_t) - - return h_t, c_t # , i_t, f_t, o_t - - -class VanillaLstmDecoder(LstmDecoderBase): - """ This class implements the standard LSTM block, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, n_out, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a vanilla LSTM block - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - self.n_out = int(n_out) - - LstmDecoderBase.__init__( - self, rng, x, n_in, n_h, n_out, p, training, rnn_batch_training) - - self.params = [self.W_xi, self.W_hi, self.w_ci, self.W_yi, - self.W_xf, self.W_hf, self.w_cf, - self.W_xo, self.W_ho, self.w_co, - self.W_xc, self.W_hc, - self.U_ho, - self.b_i, self.b_f, self.b_o, self.b_c, self.b] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1): - """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + - self.w_ci * c_tm1 + self.b_i) # - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.w_cf * c_tm1 + self.b_f) # - - c_t = f_t * c_tm1 + i_t * \ - T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + - T.dot(y_tm1, self.W_yi) + self.b_c) - - o_t = T.nnet.sigmoid( - Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o) - - h_t = o_t * T.tanh(c_t) - - y_t = T.dot(h_t, self.U_ho) + self.b - - return h_t, c_t, y_t # , i_t, f_t, o_t - - -class SimplifiedLstmDecoder(LstmDecoderBase): - """ This class implements a simplified LSTM block which only keeps the forget gate, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, n_out, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a LSTM with only the forget gate - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - self.n_out = int(n_out) - - LstmDecoderBase.__init__( - self, rng, x, n_in, n_h, n_out, p, training, rnn_batch_training) - - self.params = [self.W_yi, - self.W_xf, self.W_hf, - self.W_xc, self.W_hc, - self.U_ho, - self.b_f, self.b_c, self.b] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1): - """ This function treats the LSTM block as an activation function, and implements the LSTM (simplified LSTM) activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.b_f) # self.w_cf * c_tm1 - - c_t = f_t * c_tm1 + (1 - f_t) * T.tanh(Wcx + T.dot(h_tm1, - self.W_hc) + T.dot(y_tm1, self.W_yi) + self.b_c) - - h_t = T.tanh(c_t) - - y_t = T.dot(h_t, self.U_ho) + self.b - - return h_t, c_t, y_t - - -class LstmNFG(LstmBase): - """ This class implements a LSTM block without the forget gate, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a LSTM with the forget gate - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - LstmBase.__init__(self, rng, x, n_in, n_h, p, - training, rnn_batch_training) - - self.params = [self.W_xi, self.W_hi, self.w_ci, - self.W_xo, self.W_ho, self.w_co, - self.W_xc, self.W_hc, - self.b_i, self.b_o, self.b_c] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): - """ This function treats the LSTM block as an activation function, and implements the LSTM (without the forget gate) activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + - self.w_ci * c_tm1 + self.b_i) # - - c_t = c_tm1 + i_t * \ - T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c) # f_t * - - o_t = T.nnet.sigmoid( - Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o) - - h_t = o_t * T.tanh(c_t) - - return h_t, c_t - - -class LstmNIG(LstmBase): - """ This class implements a LSTM block without the input gate, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a LSTM with the input gate - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - LstmBase.__init__(self, rng, x, n_in, n_h, p, - training, rnn_batch_training) - - self.params = [self.W_xf, self.W_hf, self.w_cf, - self.W_xo, self.W_ho, self.w_co, - self.W_xc, self.W_hc, - self.b_f, self.b_o, self.b_c] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): - """ This function treats the LSTM block as an activation function, and implements the LSTM (without the input gate) activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.w_cf * c_tm1 + self.b_f) # - - c_t = f_t * c_tm1 + \ - T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c) # i_t * - - o_t = T.nnet.sigmoid( - Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o) - - h_t = o_t * T.tanh(c_t) - - return h_t, c_t - - -class LstmNOG(LstmBase): - """ This class implements a LSTM block without the output gate, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a LSTM with the output gate - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - LstmBase.__init__(self, rng, x, n_in, n_h, p, - training, rnn_batch_training) - - self.params = [self.W_xi, self.W_hi, self.w_ci, - self.W_xf, self.W_hf, self.w_cf, - self.W_xc, self.W_hc, - self.b_i, self.b_f, - self.b_c] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): - """ This function treats the LSTM block as an activation function, and implements the LSTM (without the output gate) activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + - self.w_ci * c_tm1 + self.b_i) # - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.w_cf * c_tm1 + self.b_f) # - - c_t = f_t * c_tm1 + i_t * \ - T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c) # i_t * - - h_t = T.tanh(c_t) - - return h_t, c_t - - -class LstmNoPeepholes(LstmBase): - """ This class implements a LSTM block without the peephole connections, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a LSTM with the peephole connections - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - LstmBase.__init__(self, rng, x, n_in, n_h, p, - training, rnn_batch_training) - - self.params = [self.W_xi, self.W_hi, # self.W_ci, - self.W_xf, self.W_hf, # self.W_cf, - self.W_xo, self.W_ho, # self.W_co, - self.W_xc, self.W_hc, - self.b_i, self.b_f, - self.b_o, self.b_c] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): - """ This function treats the LSTM block as an activation function, and implements the LSTM (without the output gate) activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + self.b_i) - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + self.b_f) - - c_t = f_t * c_tm1 + i_t * \ - T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c) - - o_t = T.nnet.sigmoid(Wox + T.dot(h_tm1, self.W_ho) + self.b_o) - - h_t = o_t * T.tanh(c_t) - - return h_t, c_t - - -class SimplifiedLstm(LstmBase): - """ This class implements a simplified LSTM block which only keeps the forget gate, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a LSTM with only the forget gate - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - LstmBase.__init__(self, rng, x, n_in, n_h, p, - training, rnn_batch_training) - - self.params = [self.W_xf, self.W_hf, - self.W_xc, self.W_hc, - self.b_f, self.b_c] - - self.L2_cost = (self.W_xf ** 2).sum() + (self.W_hf ** 2).sum() + \ - (self.W_xc ** 2).sum() + (self.W_hc ** 2).sum() - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): - """ This function treats the LSTM block as an activation function, and implements the LSTM (simplified LSTM) activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.b_f) # self.w_cf * c_tm1 - - c_t = f_t * c_tm1 + (1 - f_t) * T.tanh(Wcx + - T.dot(h_tm1, self.W_hc) + self.b_c) - - h_t = T.tanh(c_t) - - return h_t, c_t - - -class SimplifiedGRU(LstmBase): - """ This class implements a simplified GRU block which only keeps the forget gate, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a LSTM with the the forget gate - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - LstmBase.__init__(self, rng, x, n_in, n_h, p, - training, rnn_batch_training) - - self.params = [self.W_xf, self.W_hf, self.w_cf, - self.W_xc, self.W_hc, - self.b_f, self.b_c] - - self.L2_cost = (self.W_xf ** 2).sum() + (self.W_hf ** 2).sum() + \ - (self.W_xc ** 2).sum() + (self.W_hc ** 2).sum() - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): - """ This function treats the LSTM block as an activation function, and implements the LSTM (simplified LSTM) activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - ##can_h_t = T.tanh(Whx + r_t * T.dot(h_tm1, self.W_hh) + self.b_h) - - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.b_f) # self.w_cf * c_tm1 - - can_h_t = T.tanh(Wcx + f_t * T.dot(h_tm1, self.W_hc) + self.b_c) - - h_t = self.w_cf * (1.0 - f_t) * h_tm1 + f_t * can_h_t - c_t = h_t - -# c_t = f_t * c_tm1 + (1 - f_t) * T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c) - -# h_t = T.tanh(c_t) - - return h_t, c_t - - -class BidirectionSLstm(SimplifiedLstm): - - def __init__(self, rng, x, n_in, n_h, n_out, p=0.0, training=0, rnn_batch_training=False): - - fwd = SimplifiedLstm(rng, x, n_in, n_h, p, - training, rnn_batch_training) - bwd = SimplifiedLstm(rng, x[::-1], n_in, n_h, - p, training, rnn_batch_training) - - self.params = fwd.params + bwd.params - - self.output = T.concatenate([fwd.output, bwd.output[::-1]], axis=-1) - - -class BidirectionLstm(VanillaLstm): - - def __init__(self, rng, x, n_in, n_h, n_out, p=0.0, training=0, rnn_batch_training=False): - - fwd = VanillaLstm(rng, x, n_in, n_h, p, training, rnn_batch_training) - bwd = VanillaLstm(rng, x[::-1], n_in, n_h, p, - training, rnn_batch_training) - - self.params = fwd.params + bwd.params - - self.output = T.concatenate([fwd.output, bwd.output[::-1]], axis=-1) - - -class RecurrentOutput(object): - def __init__(self, rng, x, n_in, n_out, p=0.0, training=0, rnn_batch_training=False): - - self.W_h = theano.shared(value=np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_out), size=(n_in, n_out)), dtype=config.floatX), name='W_h') - self.W_y = theano.shared(value=np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_out), size=(n_out, n_out)), dtype=config.floatX), name='W_y') - - self.b_y = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='b_y') - - -# Gated Recurrent Unit -class GatedRecurrentUnit(object): - """ This class implements a gated recurrent unit (GRU), as proposed in Cho et al 2014 (http://arxiv.org/pdf/1406.1078.pdf). - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a gated recurrent unit - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - - self.n_in = int(n_in) - self.n_h = int(n_h) - - self.rnn_batch_training = rnn_batch_training - - self.input = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x - - self.W_xz = theano.shared(value=np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_in), - size=(n_in, n_h)), dtype=config.floatX), name='W_xz') - self.W_hz = theano.shared(value=np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_h), - size=(n_h, n_h)), dtype=config.floatX), name='W_hz') - - self.W_xr = theano.shared(value=np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_in), - size=(n_in, n_h)), dtype=config.floatX), name='W_xr') - self.W_hr = theano.shared(value=np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_h), - size=(n_h, n_h)), dtype=config.floatX), name='W_hr') - - self.W_xh = theano.shared(value=np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_in), - size=(n_in, n_h)), dtype=config.floatX), name='W_xh') - self.W_hh = theano.shared(value=np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_h), - size=(n_h, n_h)), dtype=config.floatX), name='W_hh') - - self.b_z = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_z') - - self.b_r = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_r') - - self.b_h = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_h') - - if self.rnn_batch_training: - self.h0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='c0') - - self.h0 = T.repeat(self.h0, x.shape[1], 0) - self.c0 = T.repeat(self.c0, x.shape[1], 0) - else: - self.h0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='c0') - - # pre-compute these for fast computation - self.Wzx = T.dot(self.input, self.W_xz) - self.Wrx = T.dot(self.input, self.W_xr) - self.Whx = T.dot(self.input, self.W_xh) - - [self.h, self.c], _ = theano.scan(self.gru_as_activation_function, - sequences=[ - self.Wzx, self.Wrx, self.Whx], - outputs_info=[self.h0, self.c0]) # - - self.output = self.h - - self.params = [self.W_xz, self.W_hz, self.W_xr, self.W_hr, self.W_xh, self.W_hh, - self.b_z, self.b_r, self.b_h] - - self.L2_cost = (self.W_xz ** 2).sum() + (self.W_hz ** 2).sum() + (self.W_xr ** 2).sum() + \ - (self.W_hr ** 2).sum() + (self.W_xh ** 2).sum() + (self.W_hh ** 2).sum() - - def gru_as_activation_function(self, Wzx, Wrx, Whx, h_tm1, c_tm1=None): - """ This function treats the GRU block as an activation function, and implements the GRU activation function. - This function is called by :func:`layers.gating.GatedRecurrentUnit.__init__`. - Wzx, Wrx, Whx have been pre-computed before passing to this function. - - To make the same interface as LSTM, we keep a c_tm1 (means the cell state of previous time step, but GRU does not maintain a cell state). - """ - - z_t = T.nnet.sigmoid(Wzx + T.dot(h_tm1, self.W_hz) + self.b_z) - r_t = T.nnet.sigmoid(Wrx + T.dot(h_tm1, self.W_hr) + self.b_r) - can_h_t = T.tanh(Whx + r_t * T.dot(h_tm1, self.W_hh) + self.b_h) - - h_t = (1 - z_t) * h_tm1 + z_t * can_h_t - - c_t = h_t # in order to have the same interface as LSTM - - return h_t, c_t -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import numpy -import time -import pickle -import gzip -import sys -import os -import copy - -import theano -import theano.tensor as T -from theano.tensor.shared_randomstreams import RandomStreams -from theano.ifelse import ifelse - -import logging - - -class MixtureDensityOutputLayer(object): - def __init__(self, rng, input, n_in, n_out, n_component, var_floor): - self.input = input - - W_value = rng.normal(0.0, 1.0/numpy.sqrt(n_in), - size=(n_in, n_out*n_component)) - self.W_mu = theano.shared(value=numpy.asarray( - W_value, dtype=theano.config.floatX), name='W_mu', borrow=True) - - self.W_sigma = theano.shared(value=numpy.asarray( - W_value.copy(), dtype=theano.config.floatX), name='W_sigma', borrow=True) - - W_mix_value = rng.normal( - 0.0, 1.0/numpy.sqrt(n_in), size=(n_in, n_component)) - self.W_mix = theano.shared(value=numpy.asarray( - W_mix_value, dtype=theano.config.floatX), name='W_mix', borrow=True) - - # assume linear output for mean vectors - self.mu = T.dot(self.input, self.W_mu) - - # self.sigma = T.nnet.softplus(T.dot(self.input, self.W_sigma)) # + 0.0001 - self.sigma = T.exp(T.dot(self.input, self.W_sigma)) # Zen et al. 2014 - self.sigma = T.maximum(var_floor, self.sigma) # hard variance flooring - # note: sigma contains variances, so var_floor=0.01 means that - # the lowest possible standard deviation is 0.1 - - self.mix = T.nnet.softmax(T.dot(self.input, self.W_mix)) - - self.delta_W_mu = theano.shared(value=numpy.zeros((n_in, n_out*n_component), - dtype=theano.config.floatX), name='delta_W_mu') - self.delta_W_sigma = theano.shared(value=numpy.zeros((n_in, n_out*n_component), - dtype=theano.config.floatX), name='delta_W_sigma') - self.delta_W_mix = theano.shared(value=numpy.zeros((n_in, n_component), - dtype=theano.config.floatX), name='delta_W_mix') - - self.params = [self.W_mu, self.W_sigma, self.W_mix] - self.delta_params = [self.delta_W_mu, - self.delta_W_sigma, self.delta_W_mix] - - -class LinearLayer(object): - def __init__(self, rng, input, n_in, n_out, W=None, b=None): - n_in = int(n_in) # ensure sizes have integer type - n_out = int(n_out) # ensure sizes have integer type - - self.input = input - - # initialize with 0 the weights W as a matrix of shape (n_in, n_out) - if W is None: - W_value = rng.normal(0.0, 1.0/numpy.sqrt(n_in), size=(n_in, n_out)) - W = theano.shared(value=numpy.asarray( - W_value, dtype=theano.config.floatX), name='W', borrow=True) - - if b is None: - b = theano.shared(value=numpy.zeros((n_out,), - dtype=theano.config.floatX), - name='b', borrow=True) - - self.W = W - self.b = b - - self.delta_W = theano.shared(value=numpy.zeros((n_in, n_out), - dtype=theano.config.floatX), name='delta_W') - - self.delta_b = theano.shared(value=numpy.zeros_like(self.b.get_value(borrow=True), - dtype=theano.config.floatX), name='delta_b') - - self.output = T.dot(self.input, self.W) + self.b - - self.params = [self.W, self.b] - self.delta_params = [self.delta_W, self.delta_b] - - def errors(self, y): - L = T.sum((self.output-y)*(self.output-y), axis=1) - errors = T.mean(L) - return (errors) - - def init_params(self, iparams): - updates = {} - for param, iparam in zip(self.params, iparams): - updates[param] = iparam - return updates - - -class SigmoidLayer(object): - def __init__(self, rng, x, n_in, n_out, W=None, b=None, activation=T.tanh, p=0.0, training=0): - n_in = int(n_in) # ensure sizes have integer type - n_out = int(n_out) # ensure sizes have integer type - - self.x = x - - srng = RandomStreams(seed=123456) - - def _drop(srng, x, p): - mask = srng.binomial(n=1, p=1.0-p, size=x.shape) - return x * T.cast(mask, theano.config.floatX) - - if p > 0.0: - self.x = ifelse(T.eq(training, numpy.cast['int32'](1)), _drop( - srng, x, p), numpy.cast[theano.config.floatX](1.0-p) * x) - - # initialize with 0 the weights W as a matrix of shape (n_in, n_out) - if W is None: - W_value = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in), - size=(n_in, n_out)), dtype=theano.config.floatX) - W = theano.shared(value=W_value, - name='W', borrow=True) - if b is None: - b = theano.shared(value=numpy.zeros((n_out,), - dtype=theano.config.floatX), - name='b', borrow=True) - - self.W = W - self.b = b - - self.delta_W = theano.shared(value=numpy.zeros((n_in, n_out), - dtype=theano.config.floatX), name='delta_W') - - self.delta_b = theano.shared(value=numpy.zeros_like(self.b.get_value(borrow=True), - dtype=theano.config.floatX), name='delta_b') - - self.output = T.dot(self.x, self.W) + self.b - self.output = activation(self.output) - - self.params = [self.W, self.b] - self.delta_params = [self.delta_W, self.delta_b] - - def errors(self, y): - L = T.sum((self.output-y)*(self.output-y), axis=1) - errors = T.mean(L) - return (errors) - - def init_params(self, iparams): - updates = {} - for param, iparam in zip(self.params, iparams): - updates[param] = iparam - return updates - - -class GeneralLayer(object): - - def __init__(self, rng, x, n_in, n_out, W=None, b=None, activation='linear', p=0.0, training=0): - ''' - General feed-forward layer with any activation - ''' - logger = logging.getLogger('general_layer') - - n_in = int(n_in) # ensure sizes have integer type - n_out = int(n_out) # ensure sizes have integer type - - self.x = x - - srng = RandomStreams(seed=123456) - - def _drop(srng, x, p): - mask = srng.binomial(n=1, p=1.0-p, size=x.shape) - return x * T.cast(mask, theano.config.floatX) - - if p > 0.0: - self.x = ifelse(T.eq(training, numpy.cast['int32'](1)), _drop( - srng, x, p), numpy.cast[theano.config.floatX](1.0-p) * x) - - # initialize with 0 the weights W as a matrix of shape (n_in, n_out) - if W is None: - W_value = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in), - size=(n_in, n_out)), dtype=theano.config.floatX) - W = theano.shared(value=W_value, - name='W', borrow=True) - if b is None: - b = theano.shared(value=numpy.zeros((n_out,), - dtype=theano.config.floatX), - name='b', borrow=True) - - self.W = W - self.b = b - - self.delta_W = theano.shared(value=numpy.zeros((n_in, n_out), - dtype=theano.config.floatX), name='delta_W') - - self.delta_b = theano.shared(value=numpy.zeros_like(self.b.get_value(borrow=True), - dtype=theano.config.floatX), name='delta_b') - - self.output = T.dot(self.x, self.W) + self.b - - if activation == 'sigmoid': - self.output = T.nnet.sigmoid(self.output) - - elif activation == 'softmax': - self.output = T.nnet.softmax(self.output) - - elif activation == 'tanh': - self.output = T.tanh(self.output) - - elif activation == 'relu': # rectifier linear unit - self.output = T.maximum(0.0, self.output) - - elif activation == 'resu': # rectifier smooth unit - self.output = numpy.log(1.0 + numpy.exp(self.output)) - - elif activation == 'linear': - pass - - else: - logger.critical( - 'the input activation function: %s is not supported right now. Please modify layers.py to support' % (activation)) - raise - - # parameters of the model - self.params = [self.W, self.b] - self.delta_params = [self.delta_W, self.delta_b] - - def errors(self, y): - errors = T.mean(T.sum((self.output-y)**2, axis=1)) - - return errors - - def init_params(self, iparams): - updates = {} - for param, iparam in zip(self.params, iparams): - updates[param] = iparam - return updates - - -class HiddenLayer(object): - def __init__(self, rng, input, n_in, n_out, W=None, b=None, - activation=T.tanh, do_maxout=False, pool_size=1, - do_pnorm=False, pnorm_order=1): - """ Class for hidden layer """ - self.input = input - self.n_in = n_in - self.n_out = n_out - - if W is None: - - W_values = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in), - size=(n_in, n_out)), dtype=theano.config.floatX) - - W = theano.shared(value=W_values, name='W', borrow=True) - - if b is None: - b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) - b = theano.shared(value=b_values, name='b', borrow=True) - - self.W = W - self.b = b - - self.delta_W = theano.shared(value=numpy.zeros((n_in, n_out), - dtype=theano.config.floatX), name='delta_W') - - self.delta_b = theano.shared(value=numpy.zeros_like(self.b.get_value(borrow=True), - dtype=theano.config.floatX), name='delta_b') - - lin_output = T.dot(input, self.W) + self.b - if do_maxout == True: - self.last_start = n_out - pool_size - self.tmp_output = lin_output[:, 0:self.last_start+1:pool_size] - for i in range(1, pool_size): - cur = lin_output[:, i:self.last_start+i+1:pool_size] - self.tmp_output = T.maximum(cur, self.tmp_output) - self.output = activation(self.tmp_output) - elif do_pnorm == True: - self.last_start = n_out - pool_size - self.tmp_output = abs( - lin_output[:, 0:self.last_start+1:pool_size]) ** pnorm_order - for i in range(1, pool_size): - cur = abs(lin_output[:, i:self.last_start + - i+1:pool_size]) ** pnorm_order - self.tmp_output = self.tmp_output + cur - self.tmp_output = self.tmp_output ** (1.0 / pnorm_order) - self.output = activation(self.tmp_output) - else: - self.output = (lin_output if activation is None - else activation(lin_output)) - -# self.output = self.rectifier_linear(lin_output) - - # parameters of the model - self.params = [self.W, self.b] - self.delta_params = [self.delta_W, self.delta_b] - - def rectifier_linear(self, x): - x = T.maximum(0.0, x) - - return x - - def rectifier_smooth(self, x): - x = numpy.log(1.0 + numpy.exp(x)) - - return x - - -class SplitHiddenLayer(object): - ''' - The nin x nout matrix is vertically split into 2 portions which can be updated - independently. - - n_in1 -- by convention, use this part for subword contexts - n_in2 -- by convention, use this part for word projections - - Bias is not split in any way. - ''' - - def __init__(self, rng, input, n_in1, n_in2, n_out, W1=None, W2=None, b=None, - activation=T.tanh, do_maxout=False, pool_size=1, - do_pnorm=False, pnorm_order=1): - """ Class for hidden layer """ - self.input = input - #self.n_in = n_in - self.n_out = n_out - - if W1 is None: - - W1_values = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in1), - size=(n_in1, n_out)), dtype=theano.config.floatX) - - W1 = theano.shared(value=W1_values, name='W1', borrow=True) - - if W2 is None: - - W2_values = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in1), - size=(n_in2, n_out)), dtype=theano.config.floatX) - - W2 = theano.shared(value=W2_values, name='W2', borrow=True) - - if b is None: - b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) - b = theano.shared(value=b_values, name='b', borrow=True) - - self.W1 = W1 - self.W2 = W2 - self.b = b - - self.delta_W1 = theano.shared(value=numpy.zeros((n_in1, n_out), - dtype=theano.config.floatX), name='delta_W1') - - self.delta_W2 = theano.shared(value=numpy.zeros((n_in2, n_out), - dtype=theano.config.floatX), name='delta_W2') - - self.delta_b = theano.shared(value=numpy.zeros_like(self.b.get_value(borrow=True), - dtype=theano.config.floatX), name='delta_b') - - lin_output = T.dot(input, T.concatenate([self.W1, self.W2])) + self.b - if do_maxout == True: - self.last_start = n_out - pool_size - self.tmp_output = lin_output[:, 0:self.last_start+1:pool_size] - for i in range(1, pool_size): - cur = lin_output[:, i:self.last_start+i+1:pool_size] - self.tmp_output = T.maximum(cur, self.tmp_output) - self.output = activation(self.tmp_output) - elif do_pnorm == True: - self.last_start = n_out - pool_size - self.tmp_output = abs( - lin_output[:, 0:self.last_start+1:pool_size]) ** pnorm_order - for i in range(1, pool_size): - cur = abs(lin_output[:, i:self.last_start + - i+1:pool_size]) ** pnorm_order - self.tmp_output = self.tmp_output + cur - self.tmp_output = self.tmp_output ** (1.0 / pnorm_order) - self.output = activation(self.tmp_output) - else: - self.output = (lin_output if activation is None - else activation(lin_output)) - -# self.output = self.rectifier_linear(lin_output) - - # parameters of the model - self.params = [self.W1, self.W2, self.b] - self.delta_params = [self.delta_W1, self.delta_W2, self.delta_b] - - def rectifier_linear(self, x): - x = T.maximum(0.0, x) - - return x - - def rectifier_smooth(self, x): - x = numpy.log(1.0 + numpy.exp(x)) - - return x - - -class TokenProjectionLayer(object): - ''' - A single projection, not shared. MErging or outputs with non-projected inputs is handled elsewhere. - ''' - - def __init__(self, rng, input, projection_insize, projection_outsize, initial_projection_distrib='gaussian'): - - self.input = input - self.params = [] - self.delta_params = [] - #self.n_in = n_in - self.projection_insize = projection_insize - self.projection_outsize = projection_outsize - - if initial_projection_distrib == 'gaussian': - W_values = numpy.asarray(rng.normal(0.0, 0.1, - size=(projection_insize, projection_outsize)), - dtype=theano.config.floatX) - elif initial_projection_distrib == 'uniform': - - # W_values = numpy.asarray(rng.uniform(low=-0.02, high=0.02, - W_values = numpy.asarray(rng.uniform(low=0.0, high=1.0, - size=(projection_insize, projection_outsize)), - dtype=theano.config.floatX) - - elif initial_projection_distrib == 'zeros': - - W_values = numpy.zeros((projection_insize, projection_outsize), - dtype=theano.config.floatX) - - elif initial_projection_distrib == '4mix': - - # TODO -- generalise to other n_modes and higher deimneionsal CVs - means = [(-0.5, -0.5), (0.5, 0.5), (0.5, -0.5), (-0.5, 0.5)] - var = (0.1, 0.1) - W_prelim = [] - for mean in means: - W_prelim.append( - numpy.asarray(rng.normal(mean, var, - size=(projection_insize / len(means), projection_outsize)), - dtype=theano.config.floatX) - ) - W_values = numpy.vstack(W_prelim) - rng.shuffle(W_values) - - else: - - sys.exit('initial_projection_distrib must be one of: gaussian, uniform') - - W = theano.shared(value=W_values, name='W', borrow=True) - - delta_W = theano.shared(value=numpy.zeros((projection_insize, projection_outsize), - dtype=theano.config.floatX), name='delta_W') - - self.params.append(W) - self.delta_params.append(delta_W) - - self.output = T.dot(self.input, W) - - -class dA(object): - def __init__(self, theano_rng=None, input=None, - n_visible=None, n_hidden=None, W=None, bhid=None, - bvis=None, activation=None, firstlayer=1, variance=None): - - self.n_visible = n_visible - self.n_hidden = n_hidden - - if not W: - initial_W = numpy.asarray(theano_rng.normal(0.0, 1.0/numpy.sqrt(n_in), - size=(n_visible, n_hidden)), dtype=theano.config.floatX) - W = theano.shared(value=initial_W, name='W') - # initial_W = numpy.asarray( numpy_rng.uniform( - # low = -4*numpy.sqrt(6./(n_hidden+n_visible)), - # high = 4*numpy.sqrt(6./(n_hidden+n_visible)), - # size = (n_visible, n_hidden)), - # dtype = theano.config.floatX) - - if not bvis: - bvis = theano.shared(value=numpy.zeros(n_visible, - dtype=theano.config.floatX)) - - if not bhid: - bhid = theano.shared(value=numpy.zeros(n_hidden, - dtype=theano.config.floatX), name='b') - - self.W = W - self.b = bhid - self.b_prime = bvis - self.W_prime = self.W.T - self.theano_rng = theano_rng - self.activation = activation - - if input == None: - self.x = T.dmatrix(name='input') - else: - self.x = input - - self.params = [self.W, self.b, self.b_prime] - - # first layer, use Gaussian noise - self.firstlayer = firstlayer - - if self.firstlayer == 1: - if variance == None: - self.var = T.vector(name='input') - else: - self.var = variance - else: - self.var = None - - def apply_activation(self, lin_output, activation): - if activation == 'SIGMOID': - final_output = T.nnet.sigmoid(lin_output) - - elif activation == 'TANH': - final_output = T.tanh(lin_output) - - elif activation == 'LINEAR': - final_output = lin_output - - elif activation == 'ReLU': # rectifier linear unit - final_output = T.maximum(0.0, lin_output) - - elif activation == 'ReSU': # rectifier smooth unit - final_output = numpy.log(1.0 + numpy.exp(lin_output)) - - else: - self.logger.critical( - 'the input activation function: %s is not supported right now. Please modify layers.py to support' % (activation)) - raise - - return final_output - - def get_corrupted_input(self, input, corruption_level): - if self.firstlayer == 0: - return self.theano_rng.binomial( - size=input.shape, - n=1, - p=1 - corruption_level, - dtype=theano.config.floatX) * input - else: - noise = self.theano_rng.normal(size=input.shape, - dtype=theano.config.floatX) - denoises = noise * self.var * corruption_level - return input+denoises - - def get_hidden_values(self, input): - return self.apply_activation((T.dot(input, self.W) + self.b), self.activation) - - def get_reconstructed_input(self, hidden): - if self.firstlayer == 1: - return T.dot(hidden, self.W_prime) + self.b_prime - else: - return self.apply_activation((T.dot(hidden, self.W_prime) + self.b_prime), self.activation) - - def get_cost_updates(self, corruption_level, learning_rate): - # if corruption_level == 0: - # tilde_x = self.x - # else: - # tilde_x = self.get_corrupted_input(self.x, corruption_level) - tilde_x = self.x - - y = self.get_hidden_values(tilde_x) - z = self.get_reconstructed_input(y) - - L = T.sum((self.x-z) * (self.x-z), axis=1) - cost = T.mean(L) / 2 - - gparams = T.grad(cost, self.params) - updates = {} - for param, gparam in zip(self.params, gparams): - updates[param] = param - learning_rate*gparam - - return (cost, updates) - - def init_params(self, iparams): - updates = {} - for param, iparam in zip(self.params, iparams): - updates[param] = iparam - return updates - - def get_test_cost(self, corruption_level): - """ This function computes the cost and the updates for one trainng - step of the dA """ - - # tilde_x = self.get_corrupted_input(self.x, corruption_level, 0.5) - y = self.get_hidden_values(self.x) - z = self.get_reconstructed_input(y) - L = T.sum((self.x-z) * (self.x-z), axis=1) - cost = T.mean(L) - - return cost - -import numpy -import time -import pickle -import gzip -import sys -import os -import copy - -import theano -import theano.tensor as T -from theano.tensor.shared_randomstreams import RandomStreams - -import logging - - -class SigmoidLayer_LHUC(object): - def __init__(self, rng, x, n_in, n_out, W=None, b=None, c=None, activation=T.tanh, p=0.0, training=0): - - self.x = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.x = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.x = (1-p) * x - - # initialize with 0 the weights W as a matrix of shape (n_in, n_out) - if W is None: - W_value = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in), - size=(n_in, n_out)), dtype=theano.config.floatX) - W = theano.shared(value=W_value, - name='W', borrow=True) - if b is None: - b = theano.shared(value=numpy.zeros((n_out,), - dtype=theano.config.floatX), - name='b', borrow=True) - if c is None: - c_value = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_out), - size=(n_out,)), dtype=theano.config.floatX) - c = theano.shared(value=c_value, name='c', borrow=True) - - self.W = W - self.b = b - self.c = c - - self.delta_W = theano.shared(value=numpy.zeros((n_in, n_out), - dtype=theano.config.floatX), name='delta_W') - - self.delta_b = theano.shared(value=numpy.zeros_like(self.b.get_value(borrow=True), - dtype=theano.config.floatX), name='delta_b') - - self.delta_c = theano.shared(value=numpy.zeros((n_out), - dtype=theano.config.floatX), name='delta_c') - - self.output = T.dot(self.x, self.W) + self.b - self.output = activation(self.output) - self.output = 2. * T.nnet.sigmoid(self.c) * self.output - - self.params = [self.W, self.b, self.c] - self.delta_params = [self.delta_W, self.delta_b, self.delta_c] - - def errors(self, y): - L = T.sum((self.output-y)*(self.output-y), axis=1) - errors = T.mean(L) - return (errors) - - def init_params(self, iparams): - updates = {} - for param, iparam in zip(self.params, iparams): - updates[param] = iparam - return updates - - -class LstmBase_LHUC(object): - """ - Very similar to the LSTM layer in the gating file - Extra parameters are 'C' for scaling the hidden value - """ - - def __init__(self, rng, x, n_in, n_h, p, training): - """ Initialise all the components in a LSTM block, including input gate, output gate, forget gate, peephole connections - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - - self.input = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x - - self.n_in = int(n_in) - self.n_h = int(n_h) - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=theano.config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=theano.config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=theano.config.floatX) - - # Input gate weights - self.W_xi = theano.shared(value=Wx_value, name='W_xi') - self.W_hi = theano.shared(value=Wh_value, name='W_hi') - self.w_ci = theano.shared(value=Wc_value, name='w_ci') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=theano.config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=theano.config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=theano.config.floatX) - - # Forget gate weights - self.W_xf = theano.shared(value=Wx_value, name='W_xf') - self.W_hf = theano.shared(value=Wh_value, name='W_hf') - self.w_cf = theano.shared(value=Wc_value, name='w_cf') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=theano.config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=theano.config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=theano.config.floatX) - - # Output gate weights - self.W_xo = theano.shared(value=Wx_value, name='W_xo') - self.W_ho = theano.shared(value=Wh_value, name='W_ho') - self.w_co = theano.shared(value=Wc_value, name='w_co') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=theano.config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=theano.config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=theano.config.floatX) - - # Cell weights - self.W_xc = theano.shared(value=Wx_value, name='W_xc') - self.W_hc = theano.shared(value=Wh_value, name='W_hc') - - # bias - self.b_i = theano.shared(value=np.zeros( - (n_h, ), dtype=theano.config.floatX), name='b_i') - self.b_f = theano.shared(value=np.zeros( - (n_h, ), dtype=theano.config.floatX), name='b_f') - self.b_o = theano.shared(value=np.zeros( - (n_h, ), dtype=theano.config.floatX), name='b_o') - self.b_c = theano.shared(value=np.zeros( - (n_h, ), dtype=theano.config.floatX), name='b_c') - - # scaling factor - c_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h)), dtype=theano.config.floatX) - self.C = theano.shared(value=c_value, name='c') - # make a layer - - # initial value of hidden and cell state - self.h0 = theano.shared(value=np.zeros( - (n_h, ), dtype=theano.config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (n_h, ), dtype=theano.config.floatX), name='c0') - - self.Wix = T.dot(self.input, self.W_xi) - self.Wfx = T.dot(self.input, self.W_xf) - self.Wcx = T.dot(self.input, self.W_xc) - self.Wox = T.dot(self.input, self.W_xo) - - [self.h, self.c], _ = theano.scan(self.recurrent_fn, sequences=[self.Wix, self.Wfx, self.Wcx, self.Wox], - outputs_info=[self.h0, self.c0]) - - self.output = 2. * T.nnet.sigmoid(self.C) * self.h - - def recurrent_fn(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1=None): - """ This implements a genetic recurrent function, called by self.__init__(). - - :param Wix: pre-computed matrix applying the weight matrix W on the input units, for input gate - :param Wfx: Similar to Wix, but for forget gate - :param Wcx: Similar to Wix, but for cell memory - :param Wox: Similar to Wox, but for output gate - :param h_tm1: hidden activation from previous time step - :param c_tm1: activation from cell memory from previous time step - :returns: h_t is the hidden activation of current time step, and c_t is the activation for cell memory of current time step - """ - h_t, c_t = self.lstm_as_activation_function( - Wix, Wfx, Wcx, Wox, h_tm1, c_tm1) - - return h_t, c_t - - def lstm_as_activation_function(self): - """ A genetic recurrent activation function for variants of LSTM architectures. - The function is called by self.recurrent_fn(). - - """ - pass - - -class VanillaLstm_LHUC(LstmBase_LHUC): - """ This class implements the standard LSTM block, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0): - """ Initialise a vanilla LSTM block - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - LstmBase_LHUC.__init__(self, rng, x, n_in, n_h, p, training) - - self.params = [self.W_xi, self.W_hi, self.w_ci, - self.W_xf, self.W_hf, self.w_cf, - self.W_xo, self.W_ho, self.w_co, - self.W_xc, self.W_hc, - self.b_i, self.b_f, self.b_o, self.b_c, self.C] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): - """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + - self.w_ci * c_tm1 + self.b_i) # - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.w_cf * c_tm1 + self.b_f) # - - c_t = f_t * c_tm1 + i_t * \ - T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c) - - o_t = T.nnet.sigmoid( - Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o) - - h_t = o_t * T.tanh(c_t) - - return h_t, c_t # , i_t, f_t, o_t -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import numpy -import time -import pickle -import gzip -import sys -import os -import copy - -import theano -import theano.tensor as T -from theano.tensor.shared_randomstreams import RandomStreams - -import logging - - -class MixtureDensityOutputLayer(object): - def __init__(self, rng, input, n_in, n_out, n_component): - self.input = input - - W_value = rng.normal(0.0, 1.0/numpy.sqrt(n_in), - size=(n_in, n_out*n_component)) - self.W_mu = theano.shared(value=numpy.asarray( - W_value, dtype=theano.config.floatX), name='W_mu', borrow=True) - - self.W_sigma = theano.shared(value=numpy.asarray( - W_value.copy(), dtype=theano.config.floatX), name='W_sigma', borrow=True) - - W_mix_value = rng.normal( - 0.0, 1.0/numpy.sqrt(n_in), size=(n_in, n_component)) - self.W_mix = theano.shared(value=numpy.asarray( - W_mix_value, dtype=theano.config.floatX), name='W_mix', borrow=True) - - # assume linear output for mean vectors - self.mu = T.dot(self.input, self.W_mu) - self.sigma = T.nnet.softplus( - T.dot(self.input, self.W_sigma)) # + 0.0001 - # self.sigma = T.exp(T.dot(self.input, self.W_sigma)) # + 0.0001 - - self.mix = T.nnet.softmax(T.dot(self.input, self.W_mix)) - - self.delta_W_mu = theano.shared(value=numpy.zeros((n_in, n_out*n_component), - dtype=theano.config.floatX), name='delta_W_mu') - self.delta_W_sigma = theano.shared(value=numpy.zeros((n_in, n_out*n_component), - dtype=theano.config.floatX), name='delta_W_sigma') - self.delta_W_mix = theano.shared(value=numpy.zeros((n_in, n_component), - dtype=theano.config.floatX), name='delta_W_mix') - - self.params = [self.W_mu, self.W_sigma, self.W_mix] - self.delta_params = [self.delta_W_mu, - self.delta_W_sigma, self.delta_W_mix] - - -class LinearLayer(object): - def __init__(self, rng, input, n_in, n_out, W=None, b=None): - - self.input = input - - # initialize with 0 the weights W as a matrix of shape (n_in, n_out) - if W is None: - W_value = rng.normal(0.0, 1.0/numpy.sqrt(n_in), size=(n_in, n_out)) - W = theano.shared(value=numpy.asarray( - W_value, dtype=theano.config.floatX), name='W', borrow=True) - - if b is None: - b = theano.shared(value=numpy.zeros((n_out,), - dtype=theano.config.floatX), - name='b', borrow=True) - - self.W = W - self.b = b - - self.delta_W = theano.shared(value=numpy.zeros((n_in, n_out), - dtype=theano.config.floatX), name='delta_W') - - self.delta_b = theano.shared(value=numpy.zeros_like(self.b.get_value(borrow=True), - dtype=theano.config.floatX), name='delta_b') - - self.output = T.dot(self.input, self.W) + self.b - - self.params = [self.W, self.b] - self.delta_params = [self.delta_W, self.delta_b] - - def errors(self, y): - L = T.sum((self.output-y)*(self.output-y), axis=1) - errors = T.mean(L) - return (errors) - - def init_params(self, iparams): - updates = {} - for param, iparam in zip(self.params, iparams): - updates[param] = iparam - return updates - - -class SigmoidLayer(object): - def __init__(self, rng, input, n_in, n_out, W=None, b=None, activation=T.tanh): - - self.input = input - - # initialize with 0 the weights W as a matrix of shape (n_in, n_out) - if W is None: - W_value = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in), - size=(n_in, n_out)), dtype=theano.config.floatX) - W = theano.shared(value=W_value, - name='W', borrow=True) - if b is None: - b = theano.shared(value=numpy.zeros((n_out,), - dtype=theano.config.floatX), - name='b', borrow=True) - - self.W = W - self.b = b - - self.delta_W = theano.shared(value=numpy.zeros((n_in, n_out), - dtype=theano.config.floatX), name='delta_W') - - self.delta_b = theano.shared(value=numpy.zeros_like(self.b.get_value(borrow=True), - dtype=theano.config.floatX), name='delta_b') - - self.output = T.dot(self.input, self.W) + self.b - self.output = activation(self.output) - - self.params = [self.W, self.b] - self.delta_params = [self.delta_W, self.delta_b] - - def errors(self, y): - L = T.sum((self.output-y)*(self.output-y), axis=1) - errors = T.mean(L) - return (errors) - - def init_params(self, iparams): - updates = {} - for param, iparam in zip(self.params, iparams): - updates[param] = iparam - return updates - - -class GeneralLayer(object): - - def __init__(self, rng, input, n_in, n_out, W=None, b=None, activation='linear'): - - self.input = input - self.n_in = n_in - self.n_out = n_out - - self.logger = logging.getLogger('general_layer') - - # randomly initialise the activation weights based on the input size, as advised by the 'tricks of neural network book' - if W is None: - W_values = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in), - size=(n_in, n_out)), dtype=theano.config.floatX) - W = theano.shared(value=W_values, name='W', borrow=True) - - if b is None: - b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) - b = theano.shared(value=b_values, name='b', borrow=True) - - self.W = W - self.b = b - - self.delta_W = theano.shared(value=numpy.zeros((n_in, n_out), - dtype=theano.config.floatX), name='delta_W') - - self.delta_b = theano.shared(value=numpy.zeros_like(self.b.get_value(borrow=True), - dtype=theano.config.floatX), name='delta_b') - - lin_output = T.dot(input, self.W) + self.b - if activation == 'sigmoid': - self.output = T.nnet.sigmoid(lin_output) - - elif activation == 'tanh': - self.output = T.tanh(lin_output) - - elif activation == 'linear': - self.output = lin_output - - elif activation == 'ReLU': # rectifier linear unit - self.output = T.maximum(0.0, lin_output) - - elif activation == 'ReSU': # rectifier smooth unit - self.output = numpy.log(1.0 + numpy.exp(lin_output)) - - else: - self.logger.critical( - 'the input activation function: %s is not supported right now. Please modify layers.py to support' % (activation)) - raise - - # parameters of the model - - self.params = [self.W, self.b] - self.delta_params = [self.delta_W, self.delta_b] - - def errors(self, y): - errors = T.mean(T.sum((self.output-y)**2, axis=1)) - - return errors - - def init_params(self, iparams): - updates = {} - for param, iparam in zip(self.params, iparams): - updates[param] = iparam - return updates - - -class HiddenLayer(object): - def __init__(self, rng, input, n_in, n_out, W=None, b=None, - activation=T.tanh, do_maxout=False, pool_size=1, - do_pnorm=False, pnorm_order=1): - """ Class for hidden layer """ - self.input = input - self.n_in = n_in - self.n_out = n_out - - if W is None: - - W_values = numpy.asarray(rng.normal(0.0, 1.0/numpy.sqrt(n_in), - size=(n_in, n_out)), dtype=theano.config.floatX) - - W = theano.shared(value=W_values, name='W', borrow=True) - - if b is None: - b_values = numpy.zeros((n_out,), dtype=theano.config.floatX) - b = theano.shared(value=b_values, name='b', borrow=True) - - self.W = W - self.b = b - - self.delta_W = theano.shared(value=numpy.zeros((n_in, n_out), - dtype=theano.config.floatX), name='delta_W') - - self.delta_b = theano.shared(value=numpy.zeros_like(self.b.get_value(borrow=True), - dtype=theano.config.floatX), name='delta_b') - - lin_output = T.dot(input, self.W) + self.b - if do_maxout == True: - self.last_start = n_out - pool_size - self.tmp_output = lin_output[:, 0:self.last_start+1:pool_size] - for i in range(1, pool_size): - cur = lin_output[:, i:self.last_start+i+1:pool_size] - self.tmp_output = T.maximum(cur, self.tmp_output) - self.output = activation(self.tmp_output) - elif do_pnorm == True: - self.last_start = n_out - pool_size - self.tmp_output = abs( - lin_output[:, 0:self.last_start+1:pool_size]) ** pnorm_order - for i in range(1, pool_size): - cur = abs(lin_output[:, i:self.last_start + - i+1:pool_size]) ** pnorm_order - self.tmp_output = self.tmp_output + cur - self.tmp_output = self.tmp_output ** (1.0 / pnorm_order) - self.output = activation(self.tmp_output) - else: - self.output = (lin_output if activation is None - else activation(lin_output)) - -# self.output = self.rectifier_linear(lin_output) - - # parameters of the model - self.params = [self.W, self.b] - self.delta_params = [self.delta_W, self.delta_b] - - def rectifier_linear(self, x): - x = T.maximum(0.0, x) - - return x - - def rectifier_smooth(self, x): - x = numpy.log(1.0 + numpy.exp(x)) - - return x - - -class dA(object): - def __init__(self, numpy_rng, theano_rng=None, input=None, - n_visible=None, n_hidden=None, W=None, bhid=None, - bvis=None, firstlayer=0, variance=None): - - self.n_visible = n_visible - self.n_hidden = n_hidden - - # create a Theano random generator that gives symbolic random values - if not theano_rng: - theano_rng = RandomStreams(numpy_rng.randint(2**30)) - - if not W: - initial_W = numpy.asarray(numpy_rng.uniform( - low=-4*numpy.sqrt(6./(n_hidden+n_visible)), - high=4*numpy.sqrt(6./(n_hidden+n_visible)), - size=(n_visible, n_hidden)), - dtype=theano.config.floatX) - W = theano.shared(value=initial_W, name='W') - - if not bvis: - bvis = theano.shared(value=numpy.zeros(n_visible, - dtype=theano.config.floatX)) - - if not bhid: - bhid = theano.shared(value=numpy.zeros(n_hidden, - dtype=theano.config.floatX), name='b') - - self.W = W - self.b = bhid - self.b_prime = bvis - self.W_prime = self.W.T - self.theano_rng = theano_rng - - if input == None: - self.x = T.dmatrix(name='input') - else: - self.x = input - - self.params = [self.W, self.b, self.b_prime] - - # first layer, use Gaussian noise - self.firstlayer = firstlayer - - if self.firstlayer == 1: - if variance == None: - self.var = T.vector(name='input') - else: - self.var = variance - else: - self.var = None - - def get_corrupted_input(self, input, corruption_level): - if self.firstlayer == 0: - return self.theano_rng.binomial( - size=input.shape, - n=1, - p=1 - corruption_level, - dtype=theano.config.floatX) * input - else: - noise = self.theano_rng.normal(size=input.shape, - dtype=theano.config.floatX) - denoises = noise * self.var * corruption_level - return input+denoises - - def get_hidden_values(self, input): - return T.nnet.sigmoid(T.dot(input, self.W) + self.b) - - def get_reconstructed_input(self, hidden): - if self.firstlayer == 1: - return T.dot(hidden, self.W_prime) + self.b_prime - else: - return T.nnet.sigmoid(T.dot(hidden, self.W_prime) + self.b_prime) - - def get_cost_updates(self, corruption_level, learning_rate): - tilde_x = self.get_corrupted_input(self.x, corruption_level) - y = self.get_hidden_values(tilde_x) - z = self.get_reconstructed_input(y) - - L = T.sum((self.x-z) * (self.x-z), axis=1) - cost = T.mean(L) / 2 - - gparams = T.grad(cost, self.params) - updates = {} - for param, gparam in zip(self.params, gparams): - updates[param] = param - learning_rate*gparam - - return (cost, updates) - - def init_params(self, iparams): - updates = {} - for param, iparam in zip(self.params, iparams): - updates[param] = iparam - return updates - - def get_test_cost(self, corruption_level): - """ This function computes the cost and the updates for one trainng - step of the dA """ - - # tilde_x = self.get_corrupted_input(self.x, corruption_level, 0.5) - y = self.get_hidden_values(self.x) - z = self.get_reconstructed_input(y) - L = T.sum((self.x-z) * (self.x-z), axis=1) - cost = T.mean(L) - - return cost - -import numpy as np -import theano -import theano.tensor as T -from theano import config -from theano.tensor.shared_randomstreams import RandomStreams - - -class VanillaRNNDecoder(object): - """ This class implements a standard recurrent neural network decoder: - h_{t} = f(W^{hx}x_{t} + W^{hh}h_{t-1}+ W^{yh}y_{t-1} + b_{h}) - y_{t} = g(h_{t}W^{hy} + b_{y}) - - """ - - def __init__(self, rng, x, n_in, n_h, n_out, p, training, rnn_batch_training=False): - """ This is to initialise a standard RNN hidden unit - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input data to current layer - :param n_in: dimension of input data - :param n_h: number of hidden units/blocks - :param n_out: dimension of output data - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - self.input = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x # (1-p) * - - self.n_in = int(n_in) - self.n_h = int(n_h) - self.n_out = int(n_out) - - self.rnn_batch_training = rnn_batch_training - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - #Wy_value = np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_out), size=(n_out, n_h)), dtype=config.floatX) - Ux_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_out)), dtype=config.floatX) - Uh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_out)), dtype=config.floatX) - #Uy_value = np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_out), size=(n_out, n_out)), dtype=config.floatX) - - # identity matrix initialisation - #Wh_value = np.asarray(np.eye(n_h, n_h), dtype=config.floatX) - Wy_value = np.asarray(np.eye(n_out, n_h), dtype=config.floatX) - #Uh_value = np.asarray(np.eye(n_in, n_out), dtype=config.floatX) - Uy_value = np.asarray(np.zeros(n_out, n_out), dtype=config.floatX) - - # Input gate weights - self.W_xi = theano.shared(value=Wx_value, name='W_xi') - self.W_hi = theano.shared(value=Wh_value, name='W_hi') - self.W_yi = theano.shared(value=Wy_value, name='W_yi') - - # Output gate weights - self.U_xi = theano.shared(value=Ux_value, name='U_xi') - self.U_hi = theano.shared(value=Uh_value, name='U_hi') - self.U_yi = theano.shared(value=Uy_value, name='U_yi') - - # bias - self.b_i = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_i') - self.b = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='b') - - # initial value of hidden and cell state and output - if self.rnn_batch_training: - self.h0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='c0') - self.y0 = theano.shared(value=np.zeros( - (1, n_out), dtype=config.floatX), name='y0') - - self.h0 = T.repeat(self.h0, x.shape[1], 0) - self.c0 = T.repeat(self.c0, x.shape[1], 0) - self.y0 = T.repeat(self.c0, x.shape[1], 0) - else: - self.h0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='c0') - self.y0 = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='y0') - - self.Wix = T.dot(self.input, self.W_xi) - self.Uix = T.dot(self.input, self.U_xi) - - [self.h, self.c, self.y], _ = theano.scan(self.recurrent_as_activation_function, sequences=[self.Wix, self.Uix], - outputs_info=[self.h0, self.c0, self.y0]) - - self.output = self.y - - # simple recurrent decoder params - #self.params = [self.W_xi, self.W_hi, self.W_yi, self.U_hi, self.b_i, self.b] - - # recurrent output params and additional input params - self.params = [self.W_xi, self.W_hi, self.W_yi, - self.U_xi, self.U_hi, self.U_yi, self.b_i, self.b] - - self.L2_cost = (self.W_xi ** 2).sum() + (self.W_hi ** 2).sum() + \ - (self.W_yi ** 2).sum() + (self.U_hi ** 2).sum() - - def recurrent_as_activation_function(self, Wix, Uix, h_tm1, c_tm1, y_tm1): - """ Implement the recurrent unit as an activation function. This function is called by self.__init__(). - - :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation - :type Wix: matrix - :param h_tm1: contains the hidden activation from previous time step - :type h_tm1: matrix, each row means a hidden activation vector of a time step - :param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM - :returns: h_t is the hidden activation of current time step - """ - - h_t = T.tanh(Wix + T.dot(h_tm1, self.W_hi) + - T.dot(y_tm1, self.W_yi) + self.b_i) # - - # simple recurrent decoder - #y_t = T.dot(h_t, self.U_hi) + self.b - - # recurrent output and additional input - y_t = Uix + T.dot(h_t, self.U_hi) + T.dot(y_tm1, self.U_yi) + self.b - - c_t = h_t - - return h_t, c_t, y_t - - -class ContextRNNDecoder(object): - """ This class implements a standard recurrent neural network decoder: - h_{t} = f(W^{hx}x_{t} + W^{hh}h_{t-1}+ W^{yh}y_{t-1} + b_{h}) - y_{t} = g(h_{t}W^{hy} + b_{y}) - - """ - - def __init__(self, rng, x, n_in, n_h, n_out, p, training, y=None, rnn_batch_training=False): - """ This is to initialise a standard RNN hidden unit - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input data to current layer - :param n_in: dimension of input data - :param n_h: number of hidden units/blocks - :param n_out: dimension of output data - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - self.input = x - if y is not None: - self.groundtruth = y - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x # (1-p) * - - self.n_in = int(n_in) - self.n_h = int(n_h) - self.n_out = int(n_out) - - self.rnn_batch_training = rnn_batch_training - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - #Wh_value = np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - #Wy_value = np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_out), size=(n_out, n_h)), dtype=config.floatX) - Ux_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_out)), dtype=config.floatX) - #Uh_value = np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_h), size=(n_h, n_out)), dtype=config.floatX) - #Uy_value = np.asarray(rng.normal(0.0, 1.0/np.sqrt(n_out), size=(n_out, n_out)), dtype=config.floatX) - - # identity matrix initialisation - Wh_value = np.asarray(np.eye(n_h, n_h), dtype=config.floatX) - Wy_value = np.asarray(np.eye(n_out, n_h), dtype=config.floatX) - Uh_value = np.asarray(np.eye(n_in, n_out), dtype=config.floatX) - Uy_value = np.asarray(np.zeros(n_out, n_out), dtype=config.floatX) - - # Input gate weights - self.W_xi = theano.shared(value=Wx_value, name='W_xi') - self.W_hi = theano.shared(value=Wh_value, name='W_hi') - self.W_yi = theano.shared(value=Wy_value, name='W_yi') - - # Output gate weights - self.U_xi = theano.shared(value=Ux_value, name='U_xi') - self.U_hi = theano.shared(value=Uh_value, name='U_hi') - self.U_yi = theano.shared(value=Uy_value, name='U_yi') - - # bias - self.b_i = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_i') - self.b = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='b') - - # initial value of hidden and cell state and output - if self.rnn_batch_training: - self.h0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='c0') - self.y0 = theano.shared(value=np.zeros( - (1, n_out), dtype=config.floatX), name='y0') - - self.h0 = T.repeat(self.h0, x.shape[1], 0) - self.c0 = T.repeat(self.c0, x.shape[1], 0) - self.y0 = T.repeat(self.c0, x.shape[1], 0) - else: - self.h0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='c0') - self.y0 = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='y0') - - # hard coded to remove coarse coding features - self.h0 = self.input[-1, 0:-4] - - self.outytm1 = T.roll(self.groundtruth, 1, 0) - - self.Wix = T.dot(self.input, self.W_xi) - self.Uix = T.dot(self.input, self.U_xi) - - [self.h, self.c], _ = theano.scan(self.recurrent_as_activation_function, sequences=[self.Wix, self.Wiy], - outputs_info=[self.h0, self.c0]) - - self.y = self.Uix + self.Uiy + T.dot(self.h, self.U_hi) + self.b - self.output = T.nnet.softmax(self.y) - - # recurrent output params and additional input params - self.params = [self.W_xi, self.W_hi, self.W_yi, - self.U_xi, self.U_hi, self.U_yi, self.b_i, self.b] - - self.L2_cost = (self.W_xi ** 2).sum() + (self.W_hi ** 2).sum() + \ - (self.W_yi ** 2).sum() + (self.U_hi ** 2).sum() - - def recurrent_as_activation_function(self, Wix, Wiy, h_tm1, c_tm1): - """ Implement the recurrent unit as an activation function. This function is called by self.__init__(). - - :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation - :type Wix: matrix - :param h_tm1: contains the hidden activation from previous time step - :type h_tm1: matrix, each row means a hidden activation vector of a time step - :param c_tm1: this parameter is not used, just to keep the interface consistent with LSTM - :returns: h_t is the hidden activation of current time step - """ - - h_t = T.tanh(Wix + T.dot(h_tm1, self.W_hi) + Wiy + self.b_i) # - - c_t = h_t - - return h_t, c_t - - -class LstmDecoderBase(object): - """ This class provides as a base for all long short-term memory (LSTM) related classes. - Several variants of LSTM were investigated in (Wu & King, ICASSP 2016): Zhizheng Wu, Simon King, "Investigating gated recurrent neural networks for speech synthesis", ICASSP 2016 - - """ - - def __init__(self, rng, x, n_in, n_h, n_out, p=0.0, training=0, rnn_batch_training=False): - """ Initialise all the components in a LSTM block, including input gate, output gate, forget gate, peephole connections - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - - self.input = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x - - self.n_in = int(n_in) - self.n_h = int(n_h) - - self.rnn_batch_training = rnn_batch_training - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - Wy_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_out), size=(n_out, n_h)), dtype=config.floatX) - - # Input gate weights - self.W_xi = theano.shared(value=Wx_value, name='W_xi') - self.W_hi = theano.shared(value=Wh_value, name='W_hi') - self.w_ci = theano.shared(value=Wc_value, name='w_ci') - self.W_yi = theano.shared(value=Wy_value, name='W_yi') - - # random initialisation - Uh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_out)), dtype=config.floatX) - - # Output gate weights - self.U_ho = theano.shared(value=Uh_value, name='U_ho') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Forget gate weights - self.W_xf = theano.shared(value=Wx_value, name='W_xf') - self.W_hf = theano.shared(value=Wh_value, name='W_hf') - self.w_cf = theano.shared(value=Wc_value, name='w_cf') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Output gate weights - self.W_xo = theano.shared(value=Wx_value, name='W_xo') - self.W_ho = theano.shared(value=Wh_value, name='W_ho') - self.w_co = theano.shared(value=Wc_value, name='w_co') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Cell weights - self.W_xc = theano.shared(value=Wx_value, name='W_xc') - self.W_hc = theano.shared(value=Wh_value, name='W_hc') - - # bias - self.b_i = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_i') - self.b_f = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_f') - self.b_o = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_o') - self.b_c = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_c') - self.b = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='b') - - # make a layer - - # initial value of hidden and cell state - if self.rnn_batch_training: - self.h0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='c0') - self.y0 = theano.shared(value=np.zeros( - (1, n_out), dtype=config.floatX), name='y0') - - self.h0 = T.repeat(self.h0, x.shape[1], 0) - self.c0 = T.repeat(self.c0, x.shape[1], 0) - self.y0 = T.repeat(self.c0, x.shape[1], 0) - else: - self.h0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='c0') - self.y0 = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='y0') - - self.Wix = T.dot(self.input, self.W_xi) - self.Wfx = T.dot(self.input, self.W_xf) - self.Wcx = T.dot(self.input, self.W_xc) - self.Wox = T.dot(self.input, self.W_xo) - - [self.h, self.c, self.y], _ = theano.scan(self.recurrent_fn, sequences=[self.Wix, self.Wfx, self.Wcx, self.Wox], - outputs_info=[self.h0, self.c0, self.y0]) - - self.output = self.y - - def recurrent_fn(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1=None, y_tm1=None): - """ This implements a genetic recurrent function, called by self.__init__(). - - :param Wix: pre-computed matrix applying the weight matrix W on the input units, for input gate - :param Wfx: Similar to Wix, but for forget gate - :param Wcx: Similar to Wix, but for cell memory - :param Wox: Similar to Wox, but for output gate - :param h_tm1: hidden activation from previous time step - :param c_tm1: activation from cell memory from previous time step - :returns: h_t is the hidden activation of current time step, and c_t is the activation for cell memory of current time step - """ - - h_t, c_t, y_t = self.lstm_as_activation_function( - Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1) - - return h_t, c_t, y_t - - def lstm_as_activation_function(self): - """ A genetic recurrent activation function for variants of LSTM architectures. - The function is called by self.recurrent_fn(). - - """ - pass - - -class VanillaLstmDecoder(LstmDecoderBase): - """ This class implements the standard LSTM block, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, n_out, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a vanilla LSTM block - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - self.n_out = int(n_out) - - LstmDecoderBase.__init__( - self, rng, x, n_in, n_h, n_out, p, training, rnn_batch_training) - - self.params = [self.W_xi, self.W_hi, self.w_ci, self.W_yi, - self.W_xf, self.W_hf, self.w_cf, - self.W_xo, self.W_ho, self.w_co, - self.W_xc, self.W_hc, - self.U_ho, - self.b_i, self.b_f, self.b_o, self.b_c, self.b] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1): - """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + - self.w_ci * c_tm1 + self.b_i) # - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.w_cf * c_tm1 + self.b_f) # - - c_t = f_t * c_tm1 + i_t * \ - T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + - T.dot(y_tm1, self.W_yi) + self.b_c) - - o_t = T.nnet.sigmoid( - Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o) - - h_t = o_t * T.tanh(c_t) - - y_t = T.dot(h_t, self.U_ho) + self.b - - return h_t, c_t, y_t # , i_t, f_t, o_t - - -class SimplifiedLstmDecoder(LstmDecoderBase): - """ This class implements a simplified LSTM block which only keeps the forget gate, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, n_out, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a LSTM with only the forget gate - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - self.n_out = int(n_out) - - LstmDecoderBase.__init__( - self, rng, x, n_in, n_h, n_out, p, training, rnn_batch_training) - - self.params = [self.W_yi, - self.W_xf, self.W_hf, - self.W_xc, self.W_hc, - self.U_ho, - self.b_f, self.b_c, self.b] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1, y_tm1): - """ This function treats the LSTM block as an activation function, and implements the LSTM (simplified LSTM) activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.b_f) # self.w_cf * c_tm1 - - c_t = f_t * c_tm1 + (1 - f_t) * T.tanh(Wcx + T.dot(h_tm1, - self.W_hc) + T.dot(y_tm1, self.W_yi) + self.b_c) - - h_t = T.tanh(c_t) - - y_t = T.dot(h_t, self.U_ho) + self.b - - return h_t, c_t, y_t - - -class LstmBase(object): - """ This class provides as a base for all long short-term memory (LSTM) related classes. - Several variants of LSTM were investigated in (Wu & King, ICASSP 2016): Zhizheng Wu, Simon King, "Investigating gated recurrent neural networks for speech synthesis", ICASSP 2016 - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise all the components in a LSTM block, including input gate, output gate, forget gate, peephole connections - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - - n_in = int(n_in) # ensure sizes have integer type - n_h = int(n_h) # ensure sizes have integer type - - self.input = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x - - self.n_in = int(n_in) - self.n_h = int(n_h) - - self.rnn_batch_training = rnn_batch_training - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Input gate weights - self.W_xi = theano.shared(value=Wx_value, name='W_xi') - self.W_hi = theano.shared(value=Wh_value, name='W_hi') - self.w_ci = theano.shared(value=Wc_value, name='w_ci') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Forget gate weights - self.W_xf = theano.shared(value=Wx_value, name='W_xf') - self.W_hf = theano.shared(value=Wh_value, name='W_hf') - self.w_cf = theano.shared(value=Wc_value, name='w_cf') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Output gate weights - self.W_xo = theano.shared(value=Wx_value, name='W_xo') - self.W_ho = theano.shared(value=Wh_value, name='W_ho') - self.w_co = theano.shared(value=Wc_value, name='w_co') - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_h)), dtype=config.floatX) - Wh_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, n_h)), dtype=config.floatX) - Wc_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_h), size=(n_h, )), dtype=config.floatX) - - # Cell weights - self.W_xc = theano.shared(value=Wx_value, name='W_xc') - self.W_hc = theano.shared(value=Wh_value, name='W_hc') - - # bias - self.b_i = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_i') - self.b_f = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_f') - self.b_o = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_o') - self.b_c = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='b_c') - - # make a layer - - # initial value of hidden and cell state - if self.rnn_batch_training: - self.h0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (1, n_h), dtype=config.floatX), name='c0') - - self.h0 = T.repeat(self.h0, x.shape[1], 0) - self.c0 = T.repeat(self.c0, x.shape[1], 0) - else: - self.h0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='h0') - self.c0 = theano.shared(value=np.zeros( - (n_h, ), dtype=config.floatX), name='c0') - - # hard coded to remove coarse coding features - self.h0 = self.input[-1, 0:-4] - - self.Wix = T.dot(self.input, self.W_xi) - self.Wfx = T.dot(self.input, self.W_xf) - self.Wcx = T.dot(self.input, self.W_xc) - self.Wox = T.dot(self.input, self.W_xo) - - [self.h, self.c], _ = theano.scan(self.recurrent_fn, sequences=[self.Wix, self.Wfx, self.Wcx, self.Wox], - outputs_info=[self.h0, self.c0]) - - self.output = self.h - - def recurrent_fn(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1=None): - """ This implements a genetic recurrent function, called by self.__init__(). - - :param Wix: pre-computed matrix applying the weight matrix W on the input units, for input gate - :param Wfx: Similar to Wix, but for forget gate - :param Wcx: Similar to Wix, but for cell memory - :param Wox: Similar to Wox, but for output gate - :param h_tm1: hidden activation from previous time step - :param c_tm1: activation from cell memory from previous time step - :returns: h_t is the hidden activation of current time step, and c_t is the activation for cell memory of current time step - """ - - h_t, c_t = self.lstm_as_activation_function( - Wix, Wfx, Wcx, Wox, h_tm1, c_tm1) - - return h_t, c_t - - def lstm_as_activation_function(self): - """ A genetic recurrent activation function for variants of LSTM architectures. - The function is called by self.recurrent_fn(). - - """ - pass - - -class ContextLstm(LstmBase): - """ This class implements the standard LSTM block, inheriting the genetic class :class:`layers.gating.LstmBase`. - - """ - - def __init__(self, rng, x, n_in, n_h, p=0.0, training=0, rnn_batch_training=False): - """ Initialise a vanilla LSTM block - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input to a network - :param n_in: number of input features - :type n_in: integer - :param n_h: number of hidden units - :type n_h: integer - """ - - LstmBase.__init__(self, rng, x, n_in, n_h, p, - training, rnn_batch_training) - - self.params = [self.W_xi, self.W_hi, self.w_ci, - self.W_xf, self.W_hf, self.w_cf, - self.W_xo, self.W_ho, self.w_co, - self.W_xc, self.W_hc, - self.b_i, self.b_f, self.b_o, self.b_c] - - def lstm_as_activation_function(self, Wix, Wfx, Wcx, Wox, h_tm1, c_tm1): - """ This function treats the LSTM block as an activation function, and implements the standard LSTM activation function. - The meaning of each input and output parameters can be found in :func:`layers.gating.LstmBase.recurrent_fn` - - """ - - i_t = T.nnet.sigmoid(Wix + T.dot(h_tm1, self.W_hi) + - self.w_ci * c_tm1 + self.b_i) # - f_t = T.nnet.sigmoid(Wfx + T.dot(h_tm1, self.W_hf) + - self.w_cf * c_tm1 + self.b_f) # - - c_t = f_t * c_tm1 + i_t * \ - T.tanh(Wcx + T.dot(h_tm1, self.W_hc) + self.b_c) - - o_t = T.nnet.sigmoid( - Wox + T.dot(h_tm1, self.W_ho) + self.w_co * c_t + self.b_o) - - h_t = o_t * T.tanh(c_t) - - return h_t, c_t # , i_t, f_t, o_t -import numpy as np -import theano -import theano.tensor as T -from theano import config -from theano.tensor.shared_randomstreams import RandomStreams - - -class RecurrentOutputLayer(object): - """ This class implements a standard recurrent output layer: - y_{t} = g(h_{t}W^{hy} + y_{t}W^{yy} + b_{y}) - - """ - - def __init__(self, rng, x, n_in, n_out, p=0.0, training=1, rnn_batch_training=False): - """ This is to initialise a standard RNN hidden unit - - :param rng: random state, fixed value for randome state for reproducible objective results - :param x: input data to current layer - :param n_in: dimension of input data - :param n_out: dimension of output data - :param p: the probability of dropout - :param training: a binary value to indicate training or testing (for dropout training) - """ - self.input = x - - if p > 0.0: - if training == 1: - srng = RandomStreams(seed=123456) - self.input = T.switch(srng.binomial(size=x.shape, p=p), x, 0) - else: - self.input = (1-p) * x # (1-p) * - - self.n_in = int(n_in) - self.n_out = int(n_out) - - self.rnn_batch_training = rnn_batch_training - - # random initialisation - Wx_value = np.asarray(rng.normal( - 0.0, 1.0/np.sqrt(n_in), size=(n_in, n_out)), dtype=config.floatX) - Wy_value = np.asarray(np.zeros((n_out, n_out)), dtype=config.floatX) - - # Input gate weights - self.W_xi = theano.shared(value=Wx_value, name='W_xi') - self.W_yi = theano.shared(value=Wy_value, name='W_yi') - - # bias - self.b_y = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='b_y') - - # initial value of output - if self.rnn_batch_training: - self.y0 = theano.shared(value=np.zeros( - (1, n_out), dtype=config.floatX), name='y0') - self.y0 = T.repeat(self.y0, x.shape[1], 0) - else: - self.y0 = theano.shared(value=np.zeros( - (n_out, ), dtype=config.floatX), name='y0') - - self.Wix = T.dot(self.input, self.W_xi) - - self.y, _ = theano.scan(self.recurrent_as_activation_function, sequences=self.Wix, - outputs_info=self.y0) - - self.output = self.y - - self.params = [self.W_xi, self.W_yi, self.b_y] - - def recurrent_as_activation_function(self, Wix, y_tm1): - """ Implement the recurrent unit as an activation function. This function is called by self.__init__(). - - :param Wix: it equals to W^{hx}x_{t}, as it does not relate with recurrent, pre-calculate the value for fast computation - :type Wix: matrix - :param y_tm1: contains the output from previous time step - :type y_tm1: matrix, each row means an output vector of a time step - """ - - y_t = Wix + T.dot(y_tm1, self.W_yi) + self.b_y # - - return y_t -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -# NOTES -# still to consider: pygal, for HTML5 SVG plotting - -from operator import itemgetter, attrgetter -import numpy -from matplotlib.ticker import MultipleLocator, FormatStrFormatter -import pylab -import matplotlib.pyplot as plt -import math -import string -import os - -# this module provides the base classes that we specialise here -import logging # as logging - -# for plotting -import matplotlib - -# should make this user-configurable - TO DO later -# this line has to come before the import of matplotlib.pyplot -matplotlib.use('PDF') - - -# matplotlib needs to be passed numpy arrays - -# for sorting tuples - - -# TO DO - this needs to be attached to the logging module so that it's available via config options -# class PlotHandler(logging.FileHandler): -# """A handler for saving plots to disk""" -# def __init__(self,filename): -# logging.FileHandler.__init__(self,filename, mode='a', encoding=None, delay=False) - - -class PlotWithData(object): - # a generic plot object that contains both the underlying data and the plot itself - # this class needs to be subclassed for each specialised type of plot that we want - - # the underlying data for the plot - a dictionary of data series - # each series is a list of data points of arbitrary type (e.g., tuples, arrays, ..) - data = None - # the plot generated from these data - plot = None - - def __init__(self, name): - # clear the data series - self.data = {} - - def add_data_point(self, series_name, data_point): - # if there is no data series with this name yet, create an empty one - if series_name not in self.data: - self.data[series_name] = [] - # append this data point (e.g., it might be a tuple (x,y) ) - # don't worry about data type or sorting - that is not our concern here - self.data[series_name].append(data_point) - - def sort_and_validate(self): - # only applied if the data points are tuples, such as (x,y) values - - # TO DO: first check that each series is a list of tuples, and that they have the same number of elements - - # this method checks that all data series - # 1. have the same length - # 2. are sorted in ascending order of x - # 3. have identical values in their x series - - # there has to be at least one data series - try: - assert len(self.data) > 0 - except AssertionError: - logger.critical('No data series found in plot') - raise - - # check lengths are consistent, sort, then check x values are identical - l = -1 - reference_x = None - # print "starting with self.data=",self.data - for series_name, data_points in self.data.items(): - if l > 0: - assert l == len(data_points) - else: - l = len(data_points) - # sort by ascending x value - data_points.sort(key=itemgetter(0)) - - if reference_x: - assert reference_x == [seq[0] for seq in data_points] - else: - # extract a list of just the x values - reference_x = [seq[0] for seq in data_points] - - # print "ending with self.data=",self.data - - def generate_plot(self, **kwargs): - logger = logging.getLogger("plotting") - logger.error( - 'Cannot generate a plot from abstract class: PlotWithData') - # raise an exception here? - - -class MultipleSeriesPlot(PlotWithData): - - def generate_plot(self, filename, title='', xlabel='', ylabel='', xlim=None, ylim=None): - - logger = logging.getLogger("plotting") - logger.debug('MultipleSeriesPlot.generate_plot') - - # a plot with one or more time series sharing a common x axis: - # e.g., the training error and the validation error plotted against epochs - - # sort the data series and make sure they are consistent - self.sort_and_validate() - - # if there is a plot already in existence, we will clear it and re-use it; - # this avoids creating extraneous figures which will stay in memory - # (even if we are no longer referencing them) - if self.plot: - self.plot.clf() - else: - # create a plot - self.plot = plt.figure() - - splt = self.plot.add_subplot(1, 1, 1) - splt.set_title(title) - splt.set_xlabel(xlabel) - splt.set_ylabel(ylabel) - - if xlim: - pylab.xlim(xlim) - if ylim: - pylab.ylim(ylim) - - for series_name, data_points in self.data.items(): - xpoints = numpy.asarray([seq[0] for seq in data_points]) - ypoints = numpy.asarray([seq[1] for seq in data_points]) - line, = splt.plot(xpoints, ypoints, '-', linewidth=2) - logger.debug('set_label for %s' % series_name) - line.set_label(series_name) - - splt.legend() - - # TO DO - better filename configuration for plots - self.plot.savefig(filename) - - -class SingleWeightMatrixPlot(PlotWithData): - - def generate_plot(self, filename, title='', xlabel='', ylabel=''): - - data_keys = list(self.data.keys()) - key_num = len(data_keys) - - self.plot = plt.figure() - if key_num == 1: - splt = self.plot.add_subplot(1, 1, 1) - im_data = splt.imshow(numpy.flipud( - self.data[data_keys[0]][0]), origin='lower') - splt.set_xlabel(xlabel) - splt.set_ylabel(ylabel) - splt.set_title(title) - else: # still plotting multiple image in one figure still has problem. the visualization is not good - logger.error('no supported yet') - - self.plot.colorbar(im_data) - self.plot.savefig(filename) # , bbox_inches='tight' - -# class MultipleLinesPlot(PlotWithData): -# def generate_plot(self, filename, title='', xlabel='', ylabel=''): - - -class LoggerPlotter(logging.getLoggerClass()): - """Based on the built-in logging class, with added capabilities including plotting""" - - # a dictionary to store all generated plots - # keys are plot names - # values are - plots = {} - # where the plots will be saved - a directory - plot_path = '/tmp' # default location - - def __init__(self, name): - # initialise the logging parent class - # (should really use 'super' here I think, but that fails - perhaps because the built in logger class is not derived from 'object' ?) - logging.Logger.__init__(self, name) - - def set_plot_path(self, path): - self.plot_path = path - - def remove_all_plots(self): - self.plots = {} - - def create_plot(self, plot_name, plot_object): - self.plots[plot_name] = plot_object(plot_name) - - def add_plot_point(self, plot_name, series_name, data_point): - # add a data point to a named plot - if plot_name not in self.plots: - self.plots[plot_name] = PlotWithData(plot_name) - self.plots[plot_name].add_data_point(series_name, data_point) - - def save_plot(self, plot_name, **kwargs): - logger = logging.getLogger("plotting") - if plot_name not in self.plots: - logger.warn( - 'Tried to generate a plot called %s that does not exist' % plot_name) - # raise an exception here? - else: - # # the filename to save to is known by the handler, which needs to be assigned to this logger - # # look at the handlers attached to this logger instance - # ph=None - # for h in self.handlers: - # # we want an instance of a PlotHandler - we'll take the first one we find - # # (behaviour will be unpredictable if there is more than one handler of this type) - # if isinstance(h,PlotHandler): - # ph=h - # break - # if ph: - # TO DO - need to be sure of safe file names - if not os.path.isdir(self.plot_path): - os.makedirs(self.plot_path) - filename = self.plot_path + "/" + \ - string.replace(plot_name, " ", "_") + ".pdf" - logger.info('Generating a plot in file %s' % filename) - self.plots[plot_name].generate_plot(filename, **kwargs) - # else: - # logger.warn('No handler of type PlotHandler is attached to this logger - cannot save plots') - - -class ColouredFormatter(logging.Formatter): - - # colourising formatter adapted from an answer to this question on Stack Overflow - # http://stackoverflow.com/questions/384076/how-can-i-color-python-logging-output - - BLACK, RED, GREEN, YELLOW, BLUE, MAGENTA, CYAN, WHITE = list(range(8)) - - COLOURS = { - 'DEBUG': BLUE, - 'INFO': GREEN, - 'WARNING': YELLOW, - 'ERROR': RED, - 'CRITICAL': MAGENTA - } - - max_level_name_width = '8' - - # terminal escape sequences - RESET_SEQ = "\033[0m" - COLOUR_SEQ = "\033[1;%dm" - BOLD_SEQ = "\033[1m" - - def format(self, record): - if record.levelname in self.COLOURS: - # pad to fixed width - currently hardwired, should make this dynamic - # maximum width of level names, which is the 8 characters of "CRITICAL" - fixed_width_levelname = '{0:8s}'.format(record.levelname) - record.name = '{0:8s}'.format(record.name) - # The background is set with 40 plus the number of the color, and the foreground with 30 - record.levelname = self.COLOUR_SEQ % ( - 30 + self.COLOURS[record.levelname]) + fixed_width_levelname + self.RESET_SEQ - return logging.Formatter.format(self, record) - - def factory(fmt, datefmt): - default = logging.Formatter(fmt, datefmt) - return ColouredFormatter(default) - - -if __name__ == '__main__': - # some simple tests - - # tell the built-in logger module to use our custom class when instantiating any new logger - logging.setLoggerClass(LoggerPlotter) - - logger = logging.getLogger("test_logger") - logger.setLevel(logging.DEBUG) - - # a console handler - ch = logging.StreamHandler() - ch.setLevel(logging.DEBUG) - formatter = ColouredFormatter( - '%(asctime)s - %(name)s - %(levelname)s - %(message)s') - ch.setFormatter(formatter) - logger.addHandler(ch) - - print("testing the logging code") - logger.debug('A DEBUG message') - logger.info('A INFO message') - logger.warning('A WARN message') - logger.error('A ERROR message') - logger.critical('A CRITICAL message') - - plotlogger = logging.getLogger("plotting") - plotlogger.setLevel(logging.DEBUG) - # handler for plotting logger - will write only to console - plotlogger.addHandler(ch) - - # # need a handler which will control where to save plots - # ph = PlotHandler("/tmp/plot_test/testing.pdf") - # logger.addHandler(ph) - - print("testing the plotting code") - - # the first argument is just a key for referring to this plot within the code - # the second argument says what kind of plot we will be making - - plotlogger.set_plot_path("./tmp") - - logger.create_plot('test plot', MultipleTimeSeriesPlot) - - plotlogger.add_plot_point('test plot', 'validation', (1, 4)) - plotlogger.add_plot_point('test plot', 'validation', (3, 2)) - plotlogger.add_plot_point('test plot', 'validation', (2, 3)) - plotlogger.add_plot_point('test plot', 'validation', (4, 3)) - - plotlogger.add_plot_point('test plot', 'training', (1, 3)) - plotlogger.add_plot_point('test plot', 'training', (3, 1)) - plotlogger.add_plot_point('test plot', 'training', (2, 2)) - plotlogger.add_plot_point('test plot', 'training', (4, 4)) - - plotlogger.save_plot( - 'test plot', title='Training and validation error', xlabel='epochs', ylabel='error') - - weights = [[1, 2, 3, 3], [1, 1, 2, 1], [2, 1, 2, 2]] - logger.create_plot('activation weight', SingleWeightMatrixPlot) - plotlogger.add_plot_point('activation weight', 'weight1', weights) - plotlogger.add_plot_point('activation weight', 'weight2', weights) - plotlogger.add_plot_point('activation weight', 'weight3', weights) - - plotlogger.save_plot('activation weight', title='weight', - xlabel='dimension', ylabel='dimension') - -import sys - -import numpy as np -from collections import OrderedDict - -import theano -import theano.tensor as T -from theano.tensor.shared_randomstreams import RandomStreams - -from layers.gating import SimplifiedLstm, BidirectionSLstm, VanillaLstm, BidirectionLstm, VanillaRNN, SimplifiedGRU, GatedRecurrentUnit, LstmNoPeepholes, LstmNOG, LstmNIG, LstmNFG -from layers.layers import GeneralLayer, LinearLayer, SigmoidLayer -from layers.recurrent_output_layer import RecurrentOutputLayer -from layers.lhuc_layer import SigmoidLayer_LHUC, VanillaLstm_LHUC - -from training_schemes.rprop import compile_RPROP_train_function -from training_schemes.adam_v2 import compile_ADAM_train_function - -import logging - - -class DeepRecurrentNetwork(object): - """ - This class is to assemble various neural network architectures. From basic feedforward neural network to bidirectional gated recurrent neural networks and hybrid architecture. **Hybrid** means a combination of feedforward and recurrent architecture. - - """ - - def __init__(self, n_in, hidden_layer_size, n_out, L1_reg, L2_reg, hidden_layer_type, output_type='LINEAR', dropout_rate=0.0, optimizer='sgd', loss_function='MMSE', rnn_batch_training=False): - """ This function initialises a neural network - - :param n_in: Dimensionality of input features - :type in: Integer - :param hidden_layer_size: The layer size for each hidden layer - :type hidden_layer_size: A list of integers - :param n_out: Dimensionality of output features - :type n_out: Integrer - :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM - :param L1_reg: the L1 regulasation weight - :param L2_reg: the L2 regulasation weight - :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression. - :param dropout_rate: probability of dropout, a float number between 0 and 1. - """ - - logger = logging.getLogger("DNN initialization") - - self.n_in = int(n_in) - self.n_out = int(n_out) - - self.n_layers = len(hidden_layer_size) - - self.dropout_rate = dropout_rate - self.optimizer = optimizer - self.loss_function = loss_function - self.is_train = T.iscalar('is_train') - self.rnn_batch_training = rnn_batch_training - - assert len(hidden_layer_size) == len(hidden_layer_type) - - self.list_of_activations = [ - 'TANH', 'SIGMOID', 'SOFTMAX', 'RELU', 'RESU'] - - if self.rnn_batch_training: - self.x = T.tensor3('x') - self.y = T.tensor3('y') - else: - self.x = T.matrix('x') - self.y = T.matrix('y') - - self.L1_reg = L1_reg - self.L2_reg = L2_reg - - self.rnn_layers = [] - self.params = [] - self.delta_params = [] - - rng = np.random.RandomState(123) - - for i in range(self.n_layers): - if i == 0: - input_size = n_in - else: - input_size = hidden_layer_size[i-1] - - if i == 0: - layer_input = self.x - else: - layer_input = self.rnn_layers[i-1].output - if hidden_layer_type[i-1] == 'BSLSTM' or hidden_layer_type[i-1] == 'BLSTM': - input_size = hidden_layer_size[i-1]*2 - - if hidden_layer_type[i] in self.list_of_activations: - hidden_activation = hidden_layer_type[i].lower() - hidden_layer = GeneralLayer( - rng, layer_input, input_size, hidden_layer_size[i], activation=hidden_activation, p=self.dropout_rate, training=self.is_train) - elif hidden_layer_type[i] == 'TANH_LHUC': - hidden_layer = SigmoidLayer_LHUC( - rng, layer_input, input_size, hidden_layer_size[i], activation=T.tanh, p=self.dropout_rate, training=self.is_train) - elif hidden_layer_type[i] == 'SLSTM': - hidden_layer = SimplifiedLstm( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'SGRU': - hidden_layer = SimplifiedGRU( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'GRU': - hidden_layer = GatedRecurrentUnit( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'LSTM_NFG': - hidden_layer = LstmNFG( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'LSTM_NOG': - hidden_layer = LstmNOG( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'LSTM_NIG': - hidden_layer = LstmNIG( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'LSTM_NPH': - hidden_layer = LstmNoPeepholes( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'LSTM': - hidden_layer = VanillaLstm( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'BSLSTM': - hidden_layer = BidirectionSLstm(rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[ - i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'BLSTM': - hidden_layer = BidirectionLstm(rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[ - i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'RNN': - hidden_layer = VanillaRNN( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'LSTM_LHUC': - hidden_layer = VanillaLstm_LHUC( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - else: - logger.critical("This hidden layer type: %s is not supported right now! \n Please use one of the following: SLSTM, BSLSTM, TANH, SIGMOID\n" % ( - hidden_layer_type[i])) - sys.exit(1) - - self.rnn_layers.append(hidden_layer) - self.params.extend(hidden_layer.params) - - input_size = hidden_layer_size[-1] - if hidden_layer_type[-1] == 'BSLSTM' or hidden_layer_type[-1] == 'BLSTM': - input_size = hidden_layer_size[-1]*2 - - output_activation = output_type.lower() - if output_activation == 'linear': - self.final_layer = LinearLayer( - rng, self.rnn_layers[-1].output, input_size, self.n_out) - elif output_activation == 'recurrent': - self.final_layer = RecurrentOutputLayer( - rng, self.rnn_layers[-1].output, input_size, self.n_out, rnn_batch_training=self.rnn_batch_training) - elif output_type.upper() in self.list_of_activations: - self.final_layer = GeneralLayer( - rng, self.rnn_layers[-1].output, input_size, self.n_out, activation=output_activation) - else: - logger.critical( - "This output layer type: %s is not supported right now! \n Please use one of the following: LINEAR, BSLSTM\n" % (output_type)) - sys.exit(1) - - self.params.extend(self.final_layer.params) - - self.updates = {} - for param in self.params: - self.updates[param] = theano.shared(value=np.zeros(param.get_value(borrow=True).shape, - dtype=theano.config.floatX), name='updates') - - if self.loss_function == 'CCE': - self.finetune_cost = self.categorical_crossentropy_loss( - self.final_layer.output, self.y) - self.errors = self.categorical_crossentropy_loss( - self.final_layer.output, self.y) - elif self.loss_function == 'Hinge': - self.finetune_cost = self.multiclass_hinge_loss( - self.final_layer.output, self.y) - self.errors = self.multiclass_hinge_loss( - self.final_layer.output, self.y) - elif self.loss_function == 'MMSE': - if self.rnn_batch_training: - self.y_mod = T.reshape(self.y, (-1, n_out)) - self.final_layer_output = T.reshape( - self.final_layer.output, (-1, n_out)) - - nonzero_rows = T.any(self.y_mod, 1).nonzero() - - self.y_mod = self.y_mod[nonzero_rows] - self.final_layer_output = self.final_layer_output[nonzero_rows] - - self.finetune_cost = T.mean( - T.sum((self.final_layer_output - self.y_mod) ** 2, axis=1)) - self.errors = T.mean( - T.sum((self.final_layer_output - self.y_mod) ** 2, axis=1)) - else: - self.finetune_cost = T.mean( - T.sum((self.final_layer.output - self.y) ** 2, axis=1)) - self.errors = T.mean( - T.sum((self.final_layer.output - self.y) ** 2, axis=1)) - - def categorical_crossentropy_loss(self, predictions, targets): - return T.nnet.categorical_crossentropy(predictions, targets).mean() - - def multiclass_hinge_loss(self, predictions, targets, delta=1): - num_cls = predictions.shape[1] - if targets.ndim == predictions.ndim - 1: - targets = T.extra_ops.to_one_hot(targets, num_cls) - elif targets.ndim != predictions.ndim: - raise TypeError('rank mismatch between targets and predictions') - corrects = predictions[targets.nonzero()] - rest = T.reshape(predictions[(1-targets).nonzero()], - (-1, num_cls-1)) - rest = T.max(rest, axis=1) - return T.nnet.relu(rest - corrects + delta).mean() - - def build_finetune_functions(self, train_shared_xy, valid_shared_xy, use_lhuc=False, layer_index=0): - """ This function is to build finetune functions and to update gradients - - :param train_shared_xy: theano shared variable for input and output training data - :type train_shared_xy: tuple of shared variable - :param valid_shared_xy: theano shared variable for input and output development data - :type valid_shared_xy: tuple of shared variable - :returns: finetune functions for training and development - - """ - - logger = logging.getLogger("DNN initialization") - - (train_set_x, train_set_y) = train_shared_xy - (valid_set_x, valid_set_y) = valid_shared_xy - - lr = T.scalar('lr', dtype=theano.config.floatX) - mom = T.scalar('mom', dtype=theano.config.floatX) # momentum - - cost = self.finetune_cost # + self.L2_reg * self.L2_sqr - - # added for LHUC - if use_lhuc: - # In lhuc the parameters are only scaling parameters which have the name 'c' - self.lhuc_params = [] - for p in self.params: - if p.name == 'c': - self.lhuc_params.append(p) - params = self.lhuc_params - gparams = T.grad(cost, params) - else: - params = self.params - gparams = T.grad(cost, params) - - freeze_params = 0 - for layer in range(layer_index): - freeze_params += len(self.rnn_layers[layer].params) - - # use optimizer - if self.optimizer == 'sgd': - # zip just concatenate two lists - updates = OrderedDict() - - for i, (param, gparam) in enumerate(zip(params, gparams)): - weight_update = self.updates[param] - upd = mom * weight_update - lr * gparam - updates[weight_update] = upd - - # freeze layers and update weights - if i >= freeze_params: - updates[param] = param + upd - - elif self.optimizer == 'adam': - updates = compile_ADAM_train_function( - self, gparams, learning_rate=lr) - elif self.optimizer == 'rprop': - updates = compile_RPROP_train_function(self, gparams) - else: - logger.critical( - "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" % (self.optimizer)) - sys.exit(1) - - train_model = theano.function(inputs=[lr, mom], # index, batch_size - outputs=self.errors, - updates=updates, - givens={self.x: train_set_x, # [index*batch_size:(index + 1)*batch_size] - self.y: train_set_y, - self.is_train: np.cast['int32'](1)}, on_unused_input='ignore') - - valid_model = theano.function(inputs=[], - outputs=self.errors, - givens={self.x: valid_set_x, - self.y: valid_set_y, - self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') - - return train_model, valid_model - - def parameter_prediction(self, test_set_x): # , batch_size - """ This function is to predict the output of NN - - :param test_set_x: input features for a testing sentence - :type test_set_x: python array variable - :returns: predicted features - - """ - - n_test_set_x = test_set_x.shape[0] - - test_out = theano.function([], self.final_layer.output, - givens={self.x: test_set_x, self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') - - predict_parameter = test_out() - - return predict_parameter - - # the function to output activations at a hidden layer - def generate_hidden_layer(self, test_set_x, bn_layer_index): - """ This function is to predict the bottleneck features of NN - - :param test_set_x: input features for a testing sentence - :type test_set_x: python array variable - :returns: predicted bottleneck features - - """ - - n_test_set_x = test_set_x.shape[0] - - test_out = theano.function([], self.rnn_layers[bn_layer_index].output, - givens={self.x: test_set_x, self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') - - predict_parameter = test_out() - - return predict_parameter -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -# THEANO_FLAGS='cuda.root=/opt/cuda-5.0.35,mode=FAST_RUN,device=gpu0,floatX=float32,exception_verbosity=high' python dnn.py -""" -""" -import pickle -import os -import sys -import time - -import numpy -from collections import OrderedDict - -import theano -import theano.tensor as T -from theano.tensor.shared_randomstreams import RandomStreams - -from layers.layers import LinearLayer, SigmoidLayer, HiddenLayer -from utils.providers import ListDataProvider - -from training_schemes.rprop import compile_RPROP_train_function - -import logging - - -class DNN(object): - - def __init__(self, numpy_rng, theano_rng=None, n_ins=784, - n_outs=10, l1_reg=None, l2_reg=None, - hidden_layers_sizes=[500, 500], - hidden_activation='tanh', output_activation='linear', - use_rprop=0, rprop_init_update=0.001): - - logger = logging.getLogger("DNN initialization") - - self.sigmoid_layers = [] - self.params = [] - self.delta_params = [] - self.n_layers = len(hidden_layers_sizes) - - self.output_activation = output_activation - - self.use_rprop = use_rprop - self.rprop_init_update = rprop_init_update - - self.l1_reg = l1_reg - self.l2_reg = l2_reg - - assert self.n_layers > 0 - - if not theano_rng: - theano_rng = RandomStreams(numpy_rng.randint(2 ** 30)) - - # allocate symbolic variables for the data - self.x = T.matrix('x') - self.y = T.matrix('y') - - for i in range(self.n_layers): - if i == 0: - input_size = n_ins - else: - input_size = hidden_layers_sizes[i - 1] - - if i == 0: - layer_input = self.x - else: - layer_input = self.sigmoid_layers[-1].output - - sigmoid_layer = HiddenLayer(rng=numpy_rng, - input=layer_input, - n_in=input_size, - n_out=hidden_layers_sizes[i], - activation=T.tanh) # T.nnet.sigmoid) # - self.sigmoid_layers.append(sigmoid_layer) - self.params.extend(sigmoid_layer.params) - self.delta_params.extend(sigmoid_layer.delta_params) - - # add final layer - if self.output_activation == 'linear': - self.final_layer = LinearLayer(rng=numpy_rng, - input=self.sigmoid_layers[-1].output, - n_in=hidden_layers_sizes[-1], - n_out=n_outs) - elif self.output_activation == 'sigmoid': - self.final_layer = SigmoidLayer( - rng=numpy_rng, - input=self.sigmoid_layers[-1].output, - n_in=hidden_layers_sizes[-1], - n_out=n_outs, activation=T.nnet.sigmoid) - else: - logger.critical("This output activation function: %s is not supported right now!" % ( - self.output_activation)) - sys.exit(1) - - self.params.extend(self.final_layer.params) - self.delta_params.extend(self.final_layer.delta_params) - - # MSE - self.finetune_cost = T.mean( - T.sum((self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1)) - - self.errors = T.mean( - T.sum((self.final_layer.output-self.y)*(self.final_layer.output-self.y), axis=1)) - - # L1-norm - if self.l1_reg is not None: - for i in range(self.n_layers): - W = self.params[i * 2] - self.finetune_cost += self.l1_reg * (abs(W).sum()) - - # L2-norm - if self.l2_reg is not None: - for i in range(self.n_layers): - W = self.params[i * 2] - self.finetune_cost += self.l2_reg * T.sqr(W).sum() - - def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size, - return_valid_score_i=False): - - (train_set_x, train_set_y) = train_shared_xy - (valid_set_x, valid_set_y) = valid_shared_xy - - # compute number of minibatches for training, validation and testing - n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] - n_valid_batches /= batch_size - - index = T.lscalar('index') # index to a [mini]batch - learning_rate = T.fscalar('learning_rate') - momentum = T.fscalar('momentum') - - layer_size = len(self.params) - lr_list = [] - for i in range(layer_size): - lr_list.append(learning_rate) - - # top 2 layers use a smaller learning rate - # hard-code now, change it later - if layer_size > 4: - for i in range(layer_size-4, layer_size): - lr_list[i] = learning_rate * 0.5 - - # compute list of fine-tuning updates - # compute the gradients with respect to the model parameters - gparams = T.grad(self.finetune_cost, self.params) - - if self.use_rprop == 0: - - updates = OrderedDict() - layer_index = 0 - for dparam, gparam in zip(self.delta_params, gparams): - updates[dparam] = momentum * dparam - \ - gparam * lr_list[layer_index] - layer_index += 1 - - for dparam, param in zip(self.delta_params, self.params): - updates[param] = param + updates[dparam] - - on_unused_input_value = 'raise' # Theano's default - - elif self.use_rprop: - updates = compile_RPROP_train_function(self, gparams) - on_unused_input_value = 'warn' - - # Retain learning rate and momentum to make interface backwards compatible, - # even with RPROP where we don't use them, means we have to use on_unused_input='warn'. - - train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default=0.125), - theano.Param(momentum, default=0.5)], - outputs=self.errors, - updates=updates, - on_unused_input=on_unused_input_value, - givens={self.x: train_set_x[index * batch_size: - (index + 1) * batch_size], - self.y: train_set_y[index * batch_size: - (index + 1) * batch_size]}) - - valid_fn = theano.function([], - outputs=self.errors, - givens={self.x: valid_set_x, - self.y: valid_set_y}) - - valid_score_i = theano.function([index], - outputs=self.errors, - givens={self.x: valid_set_x[index * batch_size: - (index + 1) * batch_size], - self.y: valid_set_y[index * batch_size: - (index + 1) * batch_size]}) - - # Create a function that scans the entire validation set - def valid_score(): - return [valid_score_i(i) for i in range(n_valid_batches)] - - if return_valid_score_i: - return train_fn, valid_fn, valid_score_i - else: - return train_fn, valid_fn - - def parameter_prediction(self, test_set_x): # , batch_size - - n_test_set_x = test_set_x.get_value(borrow=True).shape[0] - - test_out = theano.function([], self.final_layer.output, - givens={self.x: test_set_x[0:n_test_set_x]}) - - predict_parameter = test_out() - - return predict_parameter - - # the function to output activations at a hidden layer - def generate_top_hidden_layer(self, test_set_x, bn_layer_index): - - n_test_set_x = test_set_x.get_value(borrow=True).shape[0] - - test_out = theano.function([], self.sigmoid_layers[bn_layer_index].output, - givens={self.x: test_set_x[0:n_test_set_x]}) - - predict_parameter = test_out() - - return predict_parameter - - -if __name__ == '__main__': - - train_scp = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nn_scp/train.scp' - valid_scp = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nn_scp/gen.scp' - - model_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/practice/nnets_model' - - log_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/practice/log' - - finetune_lr = 0.01 - pretraining_epochs = 100 - pretrain_lr = 0.01 - training_epochs = 100 - batch_size = 32 - - n_ins = 898 - n_outs = 229 - - hidden_layers_sizes = [512, 512, 512] - -# test_DBN(train_scp, valid_scp, log_dir, model_dir, n_ins, n_outs, hidden_layers_sizes, -# finetune_lr, pretraining_epochs, pretrain_lr, training_epochs, batch_size) - - dnn_generation() -# THEANO_FLAGS='cuda.root=/opt/cuda-5.0.35,mode=FAST_RUN,device=gpu0,floatX=float32,exception_verbosity=high' python dnn.py -""" -""" -import pickle -import os -import sys -import time - -import numpy # as np -import gnumpy as gnp - -# cudamat - -#import theano -#import theano.tensor as T - -import logging - - -class DNN(object): - - def __init__(self, numpy_rng, n_ins=100, - n_outs=100, l1_reg=None, l2_reg=None, - hidden_layer_sizes=[500, 500], - hidden_activation='tanh', output_activation='linear'): - - logger = logging.getLogger("DNN initialization") - - self.n_layers = len(hidden_layer_sizes) - self.l1_reg = l1_reg - self.l2_reg = l2_reg - - assert self.n_layers > 0 - - self.W_params = [] - self.b_params = [] - self.mW_params = [] - self.mb_params = [] - - for i in range(self.n_layers): - if i == 0: - input_size = n_ins - else: - input_size = hidden_layer_sizes[i-1] - W_value = gnp.garray(numpy_rng.normal( - 0.0, 1.0/numpy.sqrt(input_size), size=(input_size, hidden_layer_sizes[i]))) - b_value = gnp.zeros(hidden_layer_sizes[i]) - mW_value = gnp.zeros((input_size, hidden_layer_sizes[i])) - mb_value = gnp.zeros(hidden_layer_sizes[i]) - self.W_params.append(W_value) - self.b_params.append(b_value) - self.mW_params.append(mW_value) - self.mb_params.append(mb_value) - - # output layer - input_size = hidden_layer_sizes[self.n_layers-1] - W_value = gnp.garray(numpy_rng.normal( - 0.0, 1.0/numpy.sqrt(input_size), size=(input_size, n_outs))) - b_value = gnp.zeros(n_outs) - mW_value = gnp.zeros((input_size, n_outs)) - mb_value = gnp.zeros(n_outs) - self.W_params.append(W_value) - self.b_params.append(b_value) - self.mW_params.append(mW_value) - self.mb_params.append(mb_value) - - def backpropagation(self, train_set_y): - # (train_set_x, train_set_y) = train_xy - - # assuming linear output and square error cost function - observation_error = self.final_layer_output - train_set_y - - self.W_grads = [] - self.b_grads = [] - current_error = observation_error - current_activation = self.activations[-1] - current_W_grad = gnp.dot(current_activation.T, observation_error) - current_b_grad = gnp.dot( - gnp.ones((1, observation_error.shape[0])), observation_error) - self.W_grads.append(current_W_grad) - self.b_grads.append(current_b_grad) - - # final layer is linear output, gradient is one - propagate_error = gnp.dot( - observation_error, self.W_params[self.n_layers].T) - for i in reversed(list(range(self.n_layers))): - current_activation = self.activations[i] - current_gradient = 1.0 - current_activation ** 2 - current_W_grad = gnp.dot(current_activation.T, propagate_error) - current_b_grad = gnp.dot( - gnp.ones((1, propagate_error.shape[0])), propagate_error) - propagate_error = gnp.dot( - propagate_error, self.W_params[i].T) * current_gradient - - self.W_grads.insert(0, current_W_grad) - self.b_grads.insert(0, current_b_grad) - - def feedforward(self, train_set_x): - self.activations = [] - - self.activations.append(train_set_x) - - for i in range(self.n_layers): - current_activations = gnp.tanh( - gnp.dot(self.activations[i], self.W_params[i]) + self.b_params[i]) - self.activations.append(current_activations) - - # output layers - self.final_layer_output = gnp.dot( - self.activations[self.n_layers], self.W_params[self.n_layers]) + self.b_params[self.n_layers] - - def gradient_update(self, batch_size, learning_rate, momentum): - - multiplier = learning_rate / batch_size - for i in range(len(self.W_grads)): - - if i >= len(self.W_grads) - 2: - local_multiplier = multiplier * 0.5 - else: - local_multiplier = multiplier - - self.W_grads[i] = (self.W_grads[i] + self.W_params[i] - * self.l2_reg) * local_multiplier - # + self.b_params[i] * self.l2_reg - self.b_grads[i] = self.b_grads[i] * local_multiplier - - # update weights and record momentum weights - self.mW_params[i] = (self.mW_params[i] * - momentum) - self.W_grads[i] - self.mb_params[i] = (self.mb_params[i] * - momentum) - self.b_grads[i] - self.W_params[i] += self.mW_params[i] - self.b_params[i] += self.mb_params[i] -# print self.W_params[0].shape, self.W_params[len(self.W_params)-1].shape - - def finetune(self, train_xy, batch_size, learning_rate, momentum): - (train_set_x, train_set_y) = train_xy - - train_set_x = gnp.as_garray(train_set_x) - train_set_y = gnp.as_garray(train_set_y) - - self.feedforward(train_set_x) - self.backpropagation(train_set_y) - self.gradient_update(batch_size, learning_rate, momentum) - - self.errors = gnp.sum( - (self.final_layer_output - train_set_y) ** 2, axis=1) - - return self.errors.as_numpy_array() - - def parameter_prediction(self, test_set_x): - test_set_x = gnp.as_garray(test_set_x) - - current_activations = test_set_x - - for i in range(self.n_layers): - current_activations = gnp.tanh( - gnp.dot(current_activations, self.W_params[i]) + self.b_params[i]) - - final_layer_output = gnp.dot( - current_activations, self.W_params[self.n_layers]) + self.b_params[self.n_layers] - - return final_layer_output.as_numpy_array() - -# def parameter_prediction(self, test_set_x): #, batch_size - -# n_test_set_x = test_set_x.get_value(borrow=True).shape[0] - -# test_out = theano.function([], self.final_layer.output, -# givens={self.x: test_set_x[0:n_test_set_x]}) -# predict_parameter = test_out() -# return predict_parameter - - -if __name__ == '__main__': - - train_scp = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nn_scp/train.scp' - valid_scp = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nn_scp/gen.scp' - - model_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/practice/nnets_model' - - log_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/practice/log' - - finetune_lr = 0.01 - pretraining_epochs = 100 - pretrain_lr = 0.01 - training_epochs = 100 - batch_size = 32 - - n_ins = 898 - n_outs = 229 - - hidden_layer_sizes = [512, 512, 512] - -# test_DBN(train_scp, valid_scp, log_dir, model_dir, n_ins, n_outs, hidden_layer_sizes, -# finetune_lr, pretraining_epochs, pretrain_lr, training_epochs, batch_size) - - dnn_generation() - -import sys - -import numpy as np -from collections import OrderedDict - -import theano -import theano.tensor as T -from theano.tensor.shared_randomstreams import RandomStreams - -from layers.gating import SimplifiedLstm, SimplifiedLstmDecoder, BidirectionSLstm, VanillaLstm, VanillaLstmDecoder, BidirectionLstm, VanillaRNN, VanillaRNNDecoder, SimplifiedGRU, GatedRecurrentUnit, LstmNoPeepholes, LstmNOG, LstmNIG, LstmNFG -from layers.layers import GeneralLayer, LinearLayer, SigmoidLayer -from layers.recurrent_output_layer import RecurrentOutputLayer -from layers.lhuc_layer import SigmoidLayer_LHUC, VanillaLstm_LHUC - -from training_schemes.rprop import compile_RPROP_train_function -from training_schemes.adam_v2 import compile_ADAM_train_function - -from models.seq2seq import VanillaSequenceEncoder, DistributedSequenceEncoder - -import logging - - -class DeepEncoderDecoderNetwork(object): - """ - This class is to assemble various neural network architectures. From basic feedforward neural network to bidirectional gated recurrent neural networks and hybrid architecture. **Hybrid** means a combination of feedforward and recurrent architecture. - - """ - - def __init__(self, n_in, hidden_layer_size, n_out, L1_reg, L2_reg, hidden_layer_type, output_type='LINEAR', network_type='S2S', ed_type='HED', dropout_rate=0.0, optimizer='sgd', MLU_div_lengths=[], loss_function='MMSE', rnn_batch_training=False): - """ This function initialises a neural network - - :param n_in: Dimensionality of input features - :type in: Integer - :param hidden_layer_size: The layer size for each hidden layer - :type hidden_layer_size: A list of integers - :param n_out: Dimensionality of output features - :type n_out: Integrer - :param hidden_layer_type: the activation types of each hidden layers, e.g., TANH, LSTM, GRU, BLSTM - :param L1_reg: the L1 regulasation weight - :param L2_reg: the L2 regulasation weight - :param output_type: the activation type of the output layer, by default is 'LINEAR', linear regression. - :param dropout_rate: probability of dropout, a float number between 0 and 1. - """ - - logger = logging.getLogger("DNN initialization") - - self.n_in = int(n_in) - self.n_out = int(n_out) - - self.n_layers = len(hidden_layer_size) - - self.dropout_rate = dropout_rate - self.optimizer = optimizer - self.loss_function = loss_function - self.is_train = T.iscalar('is_train') - self.rnn_batch_training = rnn_batch_training - - assert len(hidden_layer_size) == len(hidden_layer_type) - - self.list_of_activations = [ - 'TANH', 'SIGMOID', 'SOFTMAX', 'RELU', 'RESU'] - - BLSTM_variants = ['BLSTM', 'BSLSTM', 'BLSTME', 'BSLSTME'] - Encoder_variants = ['RNNE', 'LSTME', 'BLSTME', 'SLSTME', 'TANHE'] - Decoder_variants = ['RNND', 'LSTMD', 'SLSTMD'] - - if self.rnn_batch_training: - self.x = T.tensor3('x') - self.y = T.tensor3('y') - else: - self.x = T.matrix('x') - self.y = T.matrix('y') - - if network_type == "S2S": - self.d = T.ivector('d') - self.f = T.matrix('f') - - self.L1_reg = L1_reg - self.L2_reg = L2_reg - - self.rnn_layers = [] - self.params = [] - self.delta_params = [] - - rng = np.random.RandomState(123) - - prev_seg_end = 0 - encoder_count = 0 - MLU_div = MLU_div_lengths - for i in range(self.n_layers): - if i == 0: - input_size = n_in - else: - input_size = hidden_layer_size[i-1] - if hidden_layer_type[i-1] in BLSTM_variants: - input_size = hidden_layer_size[i-1]*2 - - if i == 0: - layer_input = self.x - else: - layer_input = self.rnn_layers[i-1].output - - ### sequence-to-sequence mapping ### - if hidden_layer_type[i-1] in Encoder_variants: - dur_input = self.d - frame_feat_input = self.f - - # vanilla encoder-decoder (phone-level features) - if ed_type == "VED": - seq2seq_model = DistributedSequenceEncoder( - rng, layer_input, dur_input) - layer_input = T.concatenate( - (seq2seq_model.encoded_output, frame_feat_input), axis=1) - input_size = input_size+4 - # hierarchical encoder-decoder - elif ed_type == "HED": - seg_len = layer_input.size//input_size - seg_dur_input = dur_input[prev_seg_end: prev_seg_end+seg_len] - num_of_segs = T.sum(seg_dur_input) - seq2seq_model = DistributedSequenceEncoder( - rng, layer_input, seg_dur_input) - addfeat_input = frame_feat_input[0:num_of_segs, - MLU_div[encoder_count]:MLU_div[encoder_count+1]] - layer_input = T.concatenate( - (seq2seq_model.encoded_output, addfeat_input), axis=1) - input_size = input_size + \ - (MLU_div[encoder_count+1]-MLU_div[encoder_count]) - prev_seg_end = prev_seg_end + seg_len - encoder_count = encoder_count + 1 - - # hidden layer activation - if hidden_layer_type[i] in self.list_of_activations: - hidden_activation = hidden_layer_type[i].lower() - hidden_layer = GeneralLayer( - rng, layer_input, input_size, hidden_layer_size[i], activation=hidden_activation, p=self.dropout_rate, training=self.is_train) - elif hidden_layer_type[i] == 'TANHE' or hidden_layer_type[i] == 'SIGMOIDE': - hidden_activation = hidden_layer_type[i][0:-1].lower() - hidden_layer = GeneralLayer( - rng, layer_input, input_size, hidden_layer_size[i], activation=hidden_activation, p=self.dropout_rate, training=self.is_train) - elif hidden_layer_type[i] == 'TANH_LHUC': - hidden_layer = SigmoidLayer_LHUC( - rng, layer_input, input_size, hidden_layer_size[i], activation=T.tanh, p=self.dropout_rate, training=self.is_train) - elif hidden_layer_type[i] == 'SLSTM' or hidden_layer_type[i] == 'SLSTME': - hidden_layer = SimplifiedLstm( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'SLSTMD': - hidden_layer = SimplifiedLstmDecoder( - rng, layer_input, input_size, hidden_layer_size[i], self.n_out, p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'SGRU': - hidden_layer = SimplifiedGRU( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'GRU': - hidden_layer = GatedRecurrentUnit( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'LSTM' or hidden_layer_type[i] == 'LSTME': - hidden_layer = VanillaLstm( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'LSTMD': - hidden_layer = VanillaLstmDecoder( - rng, layer_input, input_size, hidden_layer_size[i], self.n_out, p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'BSLSTM' or hidden_layer_type[i] == 'BSLSTME': - hidden_layer = BidirectionSLstm(rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[ - i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'BLSTM' or hidden_layer_type[i] == 'BLSTME': - hidden_layer = BidirectionLstm(rng, layer_input, input_size, hidden_layer_size[i], hidden_layer_size[ - i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'RNN' or hidden_layer_type[i] == 'RNNE': - hidden_layer = VanillaRNN( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'RNND': - hidden_layer = VanillaRNNDecoder( - rng, layer_input, input_size, hidden_layer_size[i], self.n_out, p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - elif hidden_layer_type[i] == 'LSTM_LHUC': - hidden_layer = VanillaLstm_LHUC( - rng, layer_input, input_size, hidden_layer_size[i], p=self.dropout_rate, training=self.is_train, rnn_batch_training=self.rnn_batch_training) - else: - logger.critical("This hidden layer type: %s is not supported right now! \n Please use one of the following: SLSTM, BSLSTM, TANH, SIGMOID\n" % ( - hidden_layer_type[i])) - sys.exit(1) - - self.rnn_layers.append(hidden_layer) - self.params.extend(hidden_layer.params) - - input_size = hidden_layer_size[-1] - if hidden_layer_type[-1] in BLSTM_variants: - input_size = hidden_layer_size[-1]*2 - - if hidden_layer_type[-1] in Decoder_variants: - self.final_layer = self.rnn_layers[-1] - else: - output_activation = output_type.lower() - if output_activation == 'linear': - self.final_layer = LinearLayer( - rng, self.rnn_layers[-1].output, input_size, self.n_out) - elif output_activation == 'recurrent': - self.final_layer = RecurrentOutputLayer( - rng, self.rnn_layers[-1].output, input_size, self.n_out, rnn_batch_training=self.rnn_batch_training) - elif output_type.upper() in self.list_of_activations: - self.final_layer = GeneralLayer( - rng, self.rnn_layers[-1].output, input_size, self.n_out, activation=output_activation) - else: - logger.critical( - "This output layer type: %s is not supported right now! \n Please use one of the following: LINEAR, BSLSTM\n" % (output_type)) - sys.exit(1) - - self.params.extend(self.final_layer.params) - - self.updates = {} - for param in self.params: - self.updates[param] = theano.shared(value=np.zeros(param.get_value(borrow=True).shape, - dtype=theano.config.floatX), name='updates') - - if self.loss_function == 'CCE': - self.finetune_cost = self.categorical_crossentropy_loss( - self.final_layer.output, self.y) - self.errors = self.categorical_crossentropy_loss( - self.final_layer.output, self.y) - elif self.loss_function == 'Hinge': - self.finetune_cost = self.multiclass_hinge_loss( - self.final_layer.output, self.y) - self.errors = self.multiclass_hinge_loss( - self.final_layer.output, self.y) - elif self.loss_function == 'MMSE': - if self.rnn_batch_training: - self.y_mod = T.reshape(self.y, (-1, n_out)) - self.final_layer_output = T.reshape( - self.final_layer.output, (-1, n_out)) - - nonzero_rows = T.any(self.y_mod, 1).nonzero() - - self.y_mod = self.y_mod[nonzero_rows] - self.final_layer_output = self.final_layer_output[nonzero_rows] - - self.finetune_cost = T.mean( - T.sum((self.final_layer_output - self.y_mod) ** 2, axis=1)) - self.errors = T.mean( - T.sum((self.final_layer_output - self.y_mod) ** 2, axis=1)) - else: - self.finetune_cost = T.mean( - T.sum((self.final_layer.output - self.y) ** 2, axis=1)) - self.errors = T.mean( - T.sum((self.final_layer.output - self.y) ** 2, axis=1)) - - def categorical_crossentropy_loss(self, predictions, targets): - return T.nnet.categorical_crossentropy(predictions, targets).mean() - - def multiclass_hinge_loss(self, predictions, targets, delta=1): - num_cls = predictions.shape[1] - if targets.ndim == predictions.ndim - 1: - targets = T.extra_ops.to_one_hot(targets, num_cls) - elif targets.ndim != predictions.ndim: - raise TypeError('rank mismatch between targets and predictions') - corrects = predictions[targets.nonzero()] - rest = T.reshape(predictions[(1-targets).nonzero()], - (-1, num_cls-1)) - rest = T.max(rest, axis=1) - return T.nnet.relu(rest - corrects + delta).mean() - - def build_finetune_functions(self, train_shared_xy, valid_shared_xy, use_lhuc=False, layer_index=0): - """ This function is to build finetune functions and to update gradients - - :param train_shared_xy: theano shared variable for input and output training data - :type train_shared_xy: tuple of shared variable - :param valid_shared_xy: theano shared variable for input and output development data - :type valid_shared_xy: tuple of shared variable - :returns: finetune functions for training and development - - """ - - logger = logging.getLogger("DNN initialization") - - (train_set_x, train_set_y) = train_shared_xy - (valid_set_x, valid_set_y) = valid_shared_xy - - lr = T.scalar('lr', dtype=theano.config.floatX) - mom = T.scalar('mom', dtype=theano.config.floatX) # momentum - - cost = self.finetune_cost # + self.L2_reg * self.L2_sqr - - # added for LHUC - if use_lhuc: - # In lhuc the parameters are only scaling parameters which have the name 'c' - self.lhuc_params = [] - for p in self.params: - if p.name == 'c': - self.lhuc_params.append(p) - params = self.lhuc_params - gparams = T.grad(cost, params) - else: - params = self.params - gparams = T.grad(cost, params) - - freeze_params = 0 - for layer in range(layer_index): - freeze_params += len(self.rnn_layers[layer].params) - - # use optimizer - if self.optimizer == 'sgd': - # zip just concatenate two lists - updates = OrderedDict() - - for i, (param, gparam) in enumerate(zip(params, gparams)): - weight_update = self.updates[param] - upd = mom * weight_update - lr * gparam - updates[weight_update] = upd - - # freeze layers and update weights - if i >= freeze_params: - updates[param] = param + upd - - elif self.optimizer == 'adam': - updates = compile_ADAM_train_function( - self, gparams, learning_rate=lr) - elif self.optimizer == 'rprop': - updates = compile_RPROP_train_function(self, gparams) - else: - logger.critical( - "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" % (self.optimizer)) - sys.exit(1) - - train_model = theano.function(inputs=[lr, mom], # index, batch_size - outputs=self.errors, - updates=updates, - givens={self.x: train_set_x, # [index*batch_size:(index + 1)*batch_size] - self.y: train_set_y, - self.is_train: np.cast['int32'](1)}, on_unused_input='ignore') - - valid_model = theano.function(inputs=[], - outputs=self.errors, - givens={self.x: valid_set_x, - self.y: valid_set_y, - self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') - - return train_model, valid_model - - def build_finetune_functions_S2S(self, train_shared_xyd, valid_shared_xyd): - """ This function is to build finetune functions and to update gradients - - :param train_shared_xy: theano shared variable for input and output training data - :type train_shared_xy: tuple of shared variable - :param valid_shared_xy: theano shared variable for input and output development data - :type valid_shared_xy: tuple of shared variable - :returns: finetune functions for training and development - - """ - - (train_set_x, train_set_y, train_set_d) = train_shared_xyd - (valid_set_x, valid_set_y, valid_set_d) = valid_shared_xyd - - lr = T.scalar('lr', dtype=theano.config.floatX) - mom = T.scalar('mom', dtype=theano.config.floatX) # momentum - - cost = self.finetune_cost # + self.L2_reg * self.L2_sqr - - gparams = T.grad(cost, self.params) - - # zip just concatenate two lists - updates = OrderedDict() - - for param, gparam in zip(self.params, gparams): - weight_update = self.updates[param] - upd = mom * weight_update - lr * gparam - updates[weight_update] = upd - updates[param] = param + upd - - train_model = theano.function(inputs=[lr, mom], - outputs=self.errors, - updates=updates, - givens={self.x: train_set_x, - self.y: train_set_y, - self.d: train_set_d, - self.is_train: np.cast['int32'](1)}, on_unused_input='ignore') - - valid_model = theano.function(inputs=[], - outputs=self.errors, - givens={self.x: valid_set_x, - self.y: valid_set_y, - self.d: valid_set_d, - self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') - - return train_model, valid_model - - def build_finetune_functions_S2SPF(self, train_shared_xydf, valid_shared_xydf, layer_index=6): - """ This function is to build finetune functions and to update gradients - - :param train_shared_xy: theano shared variable for input and output training data - :type train_shared_xy: tuple of shared variable - :param valid_shared_xy: theano shared variable for input and output development data - :type valid_shared_xy: tuple of shared variable - :returns: finetune functions for training and development - - """ - - (train_set_x, train_set_y, train_set_d, train_set_f) = train_shared_xydf - (valid_set_x, valid_set_y, valid_set_d, valid_set_f) = valid_shared_xydf - - lr = T.scalar('lr', dtype=theano.config.floatX) - mom = T.scalar('mom', dtype=theano.config.floatX) # momentum - - cost = self.finetune_cost # + self.L2_reg * self.L2_sqr - - params = self.params - gparams = T.grad(cost, params) - - encoder_params = 0 - for layer in range(layer_index): - encoder_params += len(self.rnn_layers[layer].params) - - # use optimizer - if self.optimizer == 'sgd': - # zip just concatenate two lists - updates = OrderedDict() - - for i, (param, gparam) in enumerate(zip(params, gparams)): - weight_update = self.updates[param] - if i >= encoder_params: - upd = mom * weight_update - lr * gparam - else: - upd = mom * weight_update - (lr*2) * gparam - updates[weight_update] = upd - updates[param] = param + upd - - elif self.optimizer == 'adam': - updates = compile_ADAM_train_function( - self, gparams, learning_rate=lr) - elif self.optimizer == 'rprop': - updates = compile_RPROP_train_function(self, gparams) - else: - logger.critical( - "This optimizer: %s is not supported right now! \n Please use one of the following: sgd, adam, rprop\n" % (self.optimizer)) - sys.exit(1) - - train_model = theano.function(inputs=[lr, mom], - outputs=self.errors, - updates=updates, - givens={self.x: train_set_x, - self.y: train_set_y, - self.d: train_set_d, - self.f: train_set_f, - self.is_train: np.cast['int32'](1)}, on_unused_input='ignore') - - valid_model = theano.function(inputs=[], - outputs=self.errors, - givens={self.x: valid_set_x, - self.y: valid_set_y, - self.d: valid_set_d, - self.f: valid_set_f, - self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') - - return train_model, valid_model - - def parameter_prediction(self, test_set_x): # , batch_size - """ This function is to predict the output of NN - - :param test_set_x: input features for a testing sentence - :type test_set_x: python array variable - :returns: predicted features - - """ - - n_test_set_x = test_set_x.shape[0] - - test_out = theano.function([], self.final_layer.output, - givens={self.x: test_set_x[0:n_test_set_x], self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') - - predict_parameter = test_out() - - return predict_parameter - - def parameter_prediction_S2S(self, test_set_x, test_set_d): - """ This function is to predict the output of NN - - :param test_set_x: input features for a testing sentence - :param test_set_d: phone durations for a testing sentence - :type test_set_x: python array variable - :type test_set_d: python array variable - :returns: predicted features - - """ - - n_test_set_x = test_set_x.shape[0] - - test_out = theano.function([], self.final_layer.output, - givens={self.x: test_set_x[0:n_test_set_x], self.d: test_set_d[0:n_test_set_x], self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') - - predict_parameter = test_out() - - return predict_parameter - - def parameter_prediction_S2SPF(self, test_set_x, test_set_d, test_set_f): - """ This function is to predict the output of NN - - :param test_set_x: input features for a testing sentence - :param test_set_d: phone durations for a testing sentence - :type test_set_x: python array variable - :type test_set_d: python array variable - :returns: predicted features - - """ - - n_test_set_x = test_set_x.shape[0] - num_of_frames = sum(test_set_d) - - test_out = theano.function([], self.final_layer.output, - givens={self.x: test_set_x[0:n_test_set_x], self.d: test_set_d, self.f: test_set_f, self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') - - predict_parameter = test_out() - - return predict_parameter - - def parameter_prediction_CTC(self, test_set_x): # , batch_size - - n_test_set_x = test_set_x.shape[0] - - test_out = theano.function([], self.rnn_layers[-1].output, - givens={self.x: test_set_x[0:n_test_set_x]}) - - predict_parameter = test_out() - - return predict_parameter - - def parameter_prediction_MDN(self, test_set_x): # , batch_size - - n_test_set_x = test_set_x.get_value(borrow=True).shape[0] - - test_out = theano.function([], self.final_layer.mu, - givens={self.x: test_set_x[0:n_test_set_x]}) - - predict_parameter = test_out() - - return predict_parameter - - def parameter_prediction_mix(self, test_set_x): # , batch_size - - n_test_set_x = test_set_x.get_value(borrow=True).shape[0] - - test_out = theano.function([], self.final_layer.mix, - givens={self.x: test_set_x[0:n_test_set_x]}) - - predict_parameter = test_out() - - return predict_parameter - - def parameter_prediction_sigma(self, test_set_x): # , batch_size - - n_test_set_x = test_set_x.get_value(borrow=True).shape[0] - - test_out = theano.function([], self.final_layer.sigma, - givens={self.x: test_set_x[0:n_test_set_x]}) - - predict_parameter = test_out() - - return predict_parameter - - # the function to output activations at a hidden layer - def generate_hidden_layer(self, test_set_x, bn_layer_index): - """ This function is to predict the bottleneck features of NN - - :param test_set_x: input features for a testing sentence - :type test_set_x: python array variable - :returns: predicted bottleneck features - - """ - - n_test_set_x = test_set_x.shape[0] - - test_out = theano.function([], self.rnn_layers[bn_layer_index].output, - givens={self.x: test_set_x, self.is_train: np.cast['int32'](0)}, on_unused_input='ignore') - - predict_parameter = test_out() - - return predict_parameter -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -# THEANO_FLAGS='cuda.root=/opt/cuda-5.0.35,mode=FAST_RUN,device=gpu0,floatX=float32,exception_verbosity=high' python dnn.py -""" -""" -import pickle -import os -import sys -import time -import math - -import numpy -from collections import OrderedDict - -import theano -import theano.tensor as T -from theano.tensor.shared_randomstreams import RandomStreams - -from layers.layers import LinearLayer, SigmoidLayer, HiddenLayer, GeneralLayer, MixtureDensityOutputLayer -from utils.providers import ListDataProvider - -from training_schemes.rprop import compile_RPROP_train_function - -import logging - - -class MixtureDensityNetwork(object): - def __init__(self, numpy_rng, n_ins=784, n_outs=24, l1_reg=None, l2_reg=None, - hidden_layers_sizes=[500, 500], - hidden_activation='tanh', output_activation='linear', var_floor=0.01, - n_component=1, beta_opt=False, use_rprop=0, rprop_init_update=0.001, - eff_sample_size=0.8, mean_log_det=-100.0): - - logger = logging.getLogger("Multi-stream DNN initialization") - - self.sigmoid_layers = [] - self.params = [] - self.delta_params = [] - - self.final_layers = [] - - self.n_outs = n_outs - - self.n_layers = len(hidden_layers_sizes) - - self.output_activation = output_activation - self.var_floor = var_floor - - self.use_rprop = use_rprop - self.rprop_init_update = rprop_init_update - - self.l1_reg = l1_reg - self.l2_reg = l2_reg - - self.beta_opt = beta_opt - self.eff_sample_size = eff_sample_size - self.mean_log_det = mean_log_det - - assert self.n_layers > 0 - - # allocate symbolic variables for the data - self.x = T.matrix('x') - self.y = T.matrix('y') - - for i in range(self.n_layers): - if i == 0: - input_size = n_ins - else: - input_size = hidden_layers_sizes[i - 1] - - if i == 0: - layer_input = self.x - else: - layer_input = self.sigmoid_layers[-1].output - - sigmoid_layer = HiddenLayer(rng=numpy_rng, - input=layer_input, - n_in=input_size, - n_out=hidden_layers_sizes[i], - activation=T.tanh) # T.nnet.sigmoid) # - self.sigmoid_layers.append(sigmoid_layer) - self.params.extend(sigmoid_layer.params) - self.delta_params.extend(sigmoid_layer.delta_params) - - hidden_output_size = hidden_layers_sizes[-1] - - self.final_layer = MixtureDensityOutputLayer(rng=numpy_rng, - input=sigmoid_layer.output, - n_in=hidden_output_size, - n_out=self.n_outs, - n_component=n_component, - var_floor=self.var_floor) - self.params.extend(self.final_layer.params) - self.delta_params.extend(self.final_layer.delta_params) - - # Maximum likelihood - self.finetune_cost = 0.0 - - self.errors = 0.0 - - epsd = self.eff_sample_size**(-2.0/(n_outs + 2.0)) - beta = (epsd - 1.0) + math.sqrt(epsd*(epsd - 1.0)) - - if self.beta_opt: - assert n_component == 1, "beta optimisation only implemented for single-component MDNs" - for i in range(n_component): # n_component - sigma = self.final_layer.sigma[:, i*n_outs:(i+1)*n_outs] - mu = self.final_layer.mu[:, i*n_outs:(i+1)*n_outs] - mix_weight = self.final_layer.mix[:, i] - - xEx = -0.5 * beta * \ - T.sum(((self.y - mu)**2) * T.inv(sigma), axis=1) - exponent = (0.5 * (n_outs + 2.0) * T.log(1 + beta)) + xEx - point_fit = T.exp(exponent) - beta - - log_det_mult = -0.5 * beta * T.sum(T.log(sigma), axis=1) - - # normalise by mean_log_det - log_det_mult += (0.5 * beta * self.mean_log_det) - - beta_obj = (mix_weight**2) * point_fit * T.exp(log_det_mult) - - self.finetune_cost += -T.mean(beta_obj) - - # lines to compute debugging information for later printing - #self.errors = T.min(T.min(T.log(sigma), axis=1)) - # self.errors = T.mean(T.sum(T.log(sigma), axis=1)) # computes mean_log_det - # self.errors = -xEx # (vector quantity) should be about 0.5 * beta * n_outs - # self.errors = point_fit # (vector quantity) should be about one - # self.errors = T.mean(T.exp(exponent)) / T.exp(T.max(exponent)) # fraction of the data used, should be about efficiency - # self.errors = T.mean(point_fit) # should be about one - # self.errors = log_det_mult # (vector quantity) about zero, or always less if using Rprop - # self.errors = beta_obj # (vector quantity) objective function terms - # self.errors = self.finetune_cost # disable this line below when debugging - else: - - all_mix_prob = [] - - print(n_component) - for i in range(n_component): # n_component - sigma = self.final_layer.sigma[:, i*n_outs:(i+1)*n_outs] - mu = self.final_layer.mu[:, i*n_outs:(i+1)*n_outs] - mix_weight = self.final_layer.mix[:, i] - - xEx = -0.5 * T.sum(((self.y - mu)**2) * T.inv(sigma), axis=1) - normaliser = 0.5 * \ - (n_outs * T.log(2 * numpy.pi) + T.sum(T.log(sigma), axis=1)) - exponent = xEx + T.log(mix_weight) - normaliser - all_mix_prob.append(exponent) - - max_exponent = T.max(all_mix_prob, axis=0, keepdims=True) - mod_exponent = T.as_tensor_variable(all_mix_prob) - max_exponent - - self.finetune_cost = - \ - T.mean(max_exponent + T.log(T.sum(T.exp(mod_exponent), axis=0))) - - #self.errors = self.finetune_cost - - if self.l2_reg is not None: - for i in range(self.n_layers-1): - W = self.params[i * 2] - self.finetune_cost += self.l2_reg * T.sqr(W).sum() - self.finetune_cost += self.l2_reg * \ - T.sqr(self.final_layer.W_mu).sum() - self.finetune_cost += self.l2_reg * \ - T.sqr(self.final_layer.W_sigma).sum() - self.finetune_cost += self.l2_reg * \ - T.sqr(self.final_layer.W_mix).sum() - - self.errors = self.finetune_cost # disable this line if debugging beta_opt - - def build_finetune_functions(self, train_shared_xy, valid_shared_xy, batch_size): - - (train_set_x, train_set_y) = train_shared_xy - (valid_set_x, valid_set_y) = valid_shared_xy - - # compute number of minibatches for training, validation and testing - n_valid_batches = valid_set_x.get_value(borrow=True).shape[0] - n_valid_batches /= batch_size - - index = T.lscalar('index') # index to a [mini]batch - learning_rate = T.fscalar('learning_rate') - momentum = T.fscalar('momentum') - - layer_size = len(self.params) - lr_list = [] - for i in range(layer_size): - lr_list.append(learning_rate) - - # top 2 layers use a smaller learning rate - if layer_size > 4: - for i in range(layer_size-4, layer_size): - lr_list[i] = learning_rate * 0.5 - - # compute list of fine-tuning updates - # compute the gradients with respect to the model parameters - gparams = T.grad(self.finetune_cost, self.params) - - if self.use_rprop == 0: - - updates = OrderedDict() - layer_index = 0 - for dparam, gparam in zip(self.delta_params, gparams): - updates[dparam] = momentum * dparam - \ - gparam * lr_list[layer_index] - layer_index += 1 - - for dparam, param in zip(self.delta_params, self.params): - updates[param] = param + updates[dparam] - - train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default=0.0001), - theano.Param(momentum, default=0.5)], - outputs=self.errors, - updates=updates, - on_unused_input='ignore', - givens={self.x: train_set_x[index * batch_size: - (index + 1) * batch_size], - self.y: train_set_y[index * batch_size: - (index + 1) * batch_size]}) - - elif self.use_rprop: - updates = compile_RPROP_train_function(self, gparams) - - # retain learning rate and momentum to make interface backwards compatible, - # but we won't use them, means we have to use on_unused_input='warn'. - # Otherwise same function for RPROP or otherwise -- can move this block outside if clause. - train_fn = theano.function(inputs=[index, theano.Param(learning_rate, default=0.0001), - theano.Param(momentum, default=0.5)], - outputs=self.errors, - updates=updates, - on_unused_input='warn', - givens={self.x: train_set_x[index * batch_size: - (index + 1) * batch_size], - self.y: train_set_y[index * batch_size: - (index + 1) * batch_size]}) - - valid_fn = theano.function([], - outputs=self.errors, - on_unused_input='ignore', - givens={self.x: valid_set_x, - self.y: valid_set_y}) - - valid_score_i = theano.function([index], - outputs=self.errors, - on_unused_input='ignore', - givens={self.x: valid_set_x[index * batch_size: - (index + 1) * batch_size], - self.y: valid_set_y[index * batch_size: - (index + 1) * batch_size]}) - # Create a function that scans the entire validation set - - def valid_score(): - return [valid_score_i(i) for i in range(n_valid_batches)] - - return train_fn, valid_fn - - def parameter_prediction(self, test_set_x): # , batch_size - - n_test_set_x = test_set_x.get_value(borrow=True).shape[0] - - test_out = theano.function([], self.final_layer.mu, - givens={self.x: test_set_x[0:n_test_set_x]}) - - predict_parameter = test_out() - - return predict_parameter - - def parameter_prediction_mix(self, test_set_x): # , batch_size - - n_test_set_x = test_set_x.get_value(borrow=True).shape[0] - - test_out = theano.function([], self.final_layer.mix, - givens={self.x: test_set_x[0:n_test_set_x]}) - - predict_parameter = test_out() - - return predict_parameter - - def parameter_prediction_sigma(self, test_set_x): # , batch_size - - n_test_set_x = test_set_x.get_value(borrow=True).shape[0] - - test_out = theano.function([], self.final_layer.sigma, - givens={self.x: test_set_x[0:n_test_set_x]}) - - predict_parameter = test_out() - - return predict_parameter - - -if __name__ == '__main__': - - train_scp = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nn_scp/train.scp' - valid_scp = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nn_scp/gen.scp' - - model_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/practice/nnets_model' - - log_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/practice/log' - - finetune_lr = 0.01 - pretraining_epochs = 100 - pretrain_lr = 0.01 - training_epochs = 100 - batch_size = 32 - - n_ins = 898 - n_outs = 229 - - hidden_layers_sizes = [512, 512, 512] - -# test_DBN(train_scp, valid_scp, log_dir, model_dir, n_ins, n_outs, hidden_layers_sizes, -# finetune_lr, pretraining_epochs, pretrain_lr, training_epochs, batch_size) - - dnn_generation() -import numpy as np -import theano -import theano.tensor as T -from theano import function - - -class VanillaSequenceEncoder(object): - - def __init__(self, rng, x, d): - - self.input = x - self.out_len = d - self.encoded_output = self.encode_final_state() - - ### default seq-to-seq model: tile C as input to all frames ### - def encode_final_state(self): - context_vector = self.input[-1, ] - tiled_context_vector = T.tile(context_vector, (self.out_len, 1)) - - return tiled_context_vector - - -class VanillaSequenceEncoderWithDur(object): - - def __init__(self, rng, x, d): - - self.input = x - self.dur_input = d - self.encoded_output = self.encode_final_state() - - ### default seq-to-seq model: tile C as input to all frames ### - def encode_final_state(self): - context_vector = self.input[-1, ] - tiled_context_vector = T.tile( - context_vector, (T.sum(self.dur_input), 1)) - - return tiled_context_vector - - -class DistributedSequenceEncoder(object): - - def __init__(self, rng, x, d): - - self.input = x - self.dur_input = d - self.encoded_output = self.encode_all_states() - - ### Distributed seq-to-seq model: tile C_1-C_n as input to corresponding decoder frames ### - def encode_all_states(self): - reps = T.repeat(T.arange(self.dur_input.size), self.dur_input) - dist_context_vector = self.input[reps] - - return dist_context_vector - -import pickle -import os -import sys -import time - -import numpy as np -import gnumpy as gnp - -from numpy import float64 - -import bandmat as bm -import bandmat.linalg as bla - -from guppy import hpy - -import logging - - -class SequentialDNN(object): - - def __init__(self, numpy_rng, n_ins=100, - n_outs=100, l1_reg=None, l2_reg=None, - hidden_layer_sizes=[500, 500], - hidden_activation='tanh', output_activation='linear'): - - logger = logging.getLogger("DNN initialization") - - self.n_layers = len(hidden_layer_sizes) - self.l1_reg = l1_reg - self.l2_reg = l2_reg - - assert self.n_layers > 0 - - self.W_params = [] - self.b_params = [] - self.mW_params = [] - self.mb_params = [] - - for i in range(self.n_layers): - if i == 0: - input_size = n_ins - else: - input_size = hidden_layer_sizes[i-1] - W_value = gnp.garray(numpy_rng.normal( - 0.0, 1.0/np.sqrt(input_size), size=(input_size, hidden_layer_sizes[i]))) - b_value = gnp.zeros(hidden_layer_sizes[i]) - mW_value = gnp.zeros((input_size, hidden_layer_sizes[i])) - mb_value = gnp.zeros(hidden_layer_sizes[i]) - self.W_params.append(W_value) - self.b_params.append(b_value) - self.mW_params.append(mW_value) - self.mb_params.append(mb_value) - - # output layer - input_size = hidden_layer_sizes[self.n_layers-1] - W_value = gnp.garray(numpy_rng.normal( - 0.0, 1.0/np.sqrt(input_size), size=(input_size, n_outs))) - b_value = gnp.zeros(n_outs) - mW_value = gnp.zeros((input_size, n_outs)) - mb_value = gnp.zeros(n_outs) - self.W_params.append(W_value) - self.b_params.append(b_value) - self.mW_params.append(mW_value) - self.mb_params.append(mb_value) - - def backpropagation(self, train_set_y, mean_matrix, std_matrix): - - final_layer_output = self.final_layer_output - - final_layer_output = final_layer_output * \ - gnp.garray(std_matrix) + gnp.garray(mean_matrix) - frame_number = final_layer_output.shape[0] - - final_layer_output = final_layer_output.T - obs_mat = gnp.zeros((61, frame_number*3)) - traj_err_mat = gnp.zeros((61, frame_number)) - observation_error = gnp.zeros((frame_number, 259)) - - var_base = np.zeros((61, 3)) - static_indice = [] - delta_indice = [] - acc_indice = [] - - for i in range(60): - static_indice.append(i) - delta_indice.append(i+60) - acc_indice.append(i+120) - static_indice.append(181) - delta_indice.append(182) - acc_indice.append(183) -# for i in xrange(25): -# static_indice.append(i+184) -# delta_indice.append(i+184+25) -# acc_indice.append(i+184+50) - - obs_mat[:, 0:frame_number] = final_layer_output[static_indice, :] - obs_mat[:, frame_number:frame_number * - 2] = final_layer_output[delta_indice, :] - obs_mat[:, frame_number*2:frame_number * - 3] = final_layer_output[acc_indice, :] - - var_base[:, 0] = std_matrix[0, static_indice].T - var_base[:, 1] = std_matrix[0, delta_indice].T - var_base[:, 2] = std_matrix[0, acc_indice].T - var_base = np.reshape(var_base, (61*3, 1)) - var_base = var_base ** 2 - - sub_dim_list = [] - for i in range(61): - sub_dim_list.append(1) - - sub_dim_start = 0 - for sub_dim in sub_dim_list: - wuw_mat, wu_mat = self.pre_wuw_wu( - frame_number, sub_dim, var_base[sub_dim_start*3:sub_dim_start*3+sub_dim*3]) - - obs_mu = obs_mat[sub_dim_start:sub_dim_start + - sub_dim, :].reshape((frame_number*3*sub_dim, 1)) - wuwwu = gnp.dot(wuw_mat, wu_mat) - - mlpg_traj = gnp.dot(wuwwu, obs_mu) - - sub_std_mat = std_matrix[:, static_indice].T - sub_mu_mat = mean_matrix[:, static_indice].T - sub_std_mat = sub_std_mat[sub_dim_start:sub_dim_start+sub_dim, :] - -# print sub_std_mat - sub_std_mat = sub_std_mat.reshape((frame_number*sub_dim, 1)) - sub_mu_mat = sub_mu_mat[sub_dim_start:sub_dim_start + - sub_dim, :].reshape((frame_number*sub_dim, 1)) - - sub_o_std_vec = var_base[sub_dim_start*3:sub_dim_start*3+sub_dim*3] - sub_o_std_mat = np.tile(sub_o_std_vec.T, (frame_number, 1)) - sub_o_std_mat = (sub_o_std_mat.T) ** 0.5 - sub_o_std_vec = sub_o_std_mat.reshape((frame_number*sub_dim*3, 1)) -# print sub_o_std_vec, var_base[sub_dim_start*3:sub_dim_start*3+sub_dim*3] ** 0.5 - - ref_y = train_set_y[:, static_indice].T - ref_y = ref_y[sub_dim_start:sub_dim_start + - sub_dim, :].reshape((frame_number*sub_dim, 1)) - - ref_y = ref_y * sub_std_mat + sub_mu_mat - traj_err = (mlpg_traj - ref_y) - - traj_err_mat[sub_dim_start:sub_dim_start + - sub_dim] = traj_err.reshape((sub_dim, frame_number)) - - traj_err = traj_err / sub_std_mat - - obs_err_vec = gnp.dot(wuwwu.T, traj_err) -# temp_obs_err_vec = gnp.dot(traj_err.T, wuwwu) -# print obs_err_vec, temp_obs_err_vec -# print obs_err_vec.shape, temp_obs_err_vec.shape - obs_err_vec = obs_err_vec * sub_o_std_vec -# print obs_mu, mlpg_traj, ref_y -# print obs_err_vec.shape, sub_o_std_vec.shape, frame_number, wuwwu.shape, traj_err.shape - obs_mat[sub_dim_start:sub_dim_start+sub_dim, - :] = obs_err_vec.reshape((sub_dim, frame_number*3)) - - sub_dim_start = sub_dim_start + sub_dim - - self.errors = gnp.sum(traj_err_mat[0:60, :].T ** 2, axis=1) - - observation_error[:, 0:60] = obs_mat[0:60, 0:frame_number].T - observation_error[:, 60:120] = obs_mat[0:60, - frame_number:frame_number*2].T - observation_error[:, 120:180] = obs_mat[0:60, - frame_number*2:frame_number*3].T - observation_error[:, 181] = obs_mat[60, 0:frame_number].T - observation_error[:, 182] = obs_mat[60, frame_number:frame_number*2].T - observation_error[:, 183] = obs_mat[60, - frame_number*2:frame_number*3].T - - self.W_grads = [] - self.b_grads = [] - current_error = observation_error - current_activation = self.activations[-1] - current_W_grad = gnp.dot(current_activation.T, observation_error) - current_b_grad = gnp.dot( - gnp.ones((1, observation_error.shape[0])), observation_error) - # final layer is linear output, gradient is one - propagate_error = gnp.dot( - observation_error, self.W_params[self.n_layers].T) - self.W_grads.append(current_W_grad) - self.b_grads.append(current_b_grad) - for i in reversed(list(range(self.n_layers))): - current_activation = self.activations[i] - current_gradient = 1.0 - current_activation ** 2 - current_W_grad = gnp.dot(current_activation.T, propagate_error) - current_b_grad = gnp.dot( - gnp.ones((1, propagate_error.shape[0])), propagate_error) - propagate_error = gnp.dot( - propagate_error, self.W_params[i].T) * current_gradient - - self.W_grads.insert(0, current_W_grad) - self.b_grads.insert(0, current_b_grad) - - def feedforward(self, train_set_x): - self.activations = [] - - self.activations.append(train_set_x) - - for i in range(self.n_layers): - input_data = self.activations[i] - current_activations = gnp.tanh( - gnp.dot(input_data, self.W_params[i]) + self.b_params[i]) - self.activations.append(current_activations) - - # output layers - self.final_layer_output = gnp.dot( - self.activations[self.n_layers], self.W_params[self.n_layers]) + self.b_params[self.n_layers] - - def gradient_update(self, batch_size, learning_rate, momentum): - - multiplier = learning_rate / batch_size - for i in range(len(self.W_grads)): - - if i >= len(self.W_grads) - 2: - local_multiplier = multiplier * 0.5 - else: - local_multiplier = multiplier - - self.W_grads[i] = (self.W_grads[i] + self.W_params[i] - * self.l2_reg) * local_multiplier - # + self.b_params[i] * self.l2_reg - self.b_grads[i] = self.b_grads[i] * local_multiplier - - # update weights and record momentum weights - self.mW_params[i] = (self.mW_params[i] * - momentum) - self.W_grads[i] - self.mb_params[i] = (self.mb_params[i] * - momentum) - self.b_grads[i] - self.W_params[i] += self.mW_params[i] - self.b_params[i] += self.mb_params[i] - - def finetune(self, train_xy, batch_size, learning_rate, momentum, mean_matrix, std_matrix): - (train_set_x, train_set_y) = train_xy - - train_set_x = gnp.as_garray(train_set_x) - train_set_y = gnp.as_garray(train_set_y) - - self.feedforward(train_set_x) - self.backpropagation(train_set_y, mean_matrix, std_matrix) - self.gradient_update(batch_size, learning_rate, momentum) - -# self.errors = gnp.sum((self.final_layer_output - train_set_y) ** 2, axis=1) - - return self.errors.as_numpy_array() - - def parameter_prediction(self, test_set_x): - test_set_x = gnp.garray(test_set_x) - - current_activations = test_set_x - - for i in range(self.n_layers): - input_data = current_activations - current_activations = gnp.tanh( - gnp.dot(input_data, self.W_params[i]) + self.b_params[i]) - - final_layer_output = gnp.dot( - current_activations, self.W_params[self.n_layers]) + self.b_params[self.n_layers] - - return final_layer_output.as_numpy_array() - - def parameter_prediction_trajectory(self, test_set_x, test_set_y, mean_matrix, std_matrix): - test_set_x = gnp.garray(test_set_x) - - current_activations = test_set_x - - for i in range(self.n_layers): - input_data = current_activations - current_activations = gnp.tanh( - gnp.dot(input_data, self.W_params[i]) + self.b_params[i]) - - final_layer_output = gnp.dot( - current_activations, self.W_params[self.n_layers]) + self.b_params[self.n_layers] - - final_layer_output = final_layer_output * \ - gnp.garray(std_matrix) + gnp.garray(mean_matrix) - frame_number = final_layer_output.shape[0] - - final_layer_output = final_layer_output.T - obs_mat = gnp.zeros((60, frame_number*3)) - traj_err_mat = gnp.zeros((60, frame_number)) - - var_base = np.zeros((60, 3)) - static_indice = [] - delta_indice = [] - acc_indice = [] - - for i in range(60): - static_indice.append(i) - delta_indice.append(i+60) - acc_indice.append(i+120) - - obs_mat[:, 0:frame_number] = final_layer_output[static_indice, :] - obs_mat[:, frame_number:frame_number * - 2] = final_layer_output[delta_indice, :] - obs_mat[:, frame_number*2:frame_number * - 3] = final_layer_output[acc_indice, :] - - var_base[:, 0] = std_matrix[0, static_indice].T - var_base[:, 1] = std_matrix[0, delta_indice].T - var_base[:, 2] = std_matrix[0, acc_indice].T - - var_base = np.reshape(var_base, (60*3, 1)) - var_base = var_base ** 2 - - sub_dim_list = [] - for i in range(60): - sub_dim_list.append(1) - - sub_dim_start = 0 - for sub_dim in sub_dim_list: - wuw_mat, wu_mat = self.pre_wuw_wu( - frame_number, sub_dim, var_base[sub_dim_start*3:sub_dim_start*3+sub_dim*3]) - - obs_mu = obs_mat[sub_dim_start:sub_dim_start + - sub_dim, :].reshape((frame_number*3*sub_dim, 1)) - wuwwu = gnp.dot(wuw_mat, wu_mat) - mlpg_traj = gnp.dot(wuwwu, obs_mu) - - sub_std_mat = std_matrix[:, static_indice].T - sub_mu_mat = mean_matrix[:, static_indice].T - sub_std_mat = sub_std_mat[sub_dim_start:sub_dim_start + - sub_dim, :].reshape((frame_number*sub_dim, 1)) - sub_mu_mat = sub_mu_mat[sub_dim_start:sub_dim_start + - sub_dim, :].reshape((frame_number*sub_dim, 1)) - - ref_y = test_set_y[:, static_indice].T - ref_y = ref_y[sub_dim_start:sub_dim_start + - sub_dim, :].reshape((frame_number*sub_dim, 1)) - - ref_y = ref_y * sub_std_mat + sub_mu_mat - traj_err = (mlpg_traj - ref_y) # mlpg_traj ref_y - - traj_err_mat[sub_dim_start:sub_dim_start+sub_dim, - :] = traj_err.reshape((sub_dim, frame_number)) - - sub_dim_start = sub_dim_start + sub_dim - - validation_losses = gnp.sum(traj_err_mat[1:60, :].T ** 2, axis=1) - validation_losses = validation_losses ** 0.5 - - return validation_losses.as_numpy_array() - - def set_parameters(self, W_params, b_params): - - assert len(self.W_params) == len(W_params) - -# for i in xrange(len(self.W_params)): - for i in range(len(self.W_params)): - self.W_params[i] = W_params[i] - self.b_params[i] = b_params[i] - - def set_delta_params(self, mW_params, mb_params): - assert len(self.mW_params) == len(mW_params) - - for i in range(len(self.mW_params)): - self.mW_params[i] = mW_params[i] - self.mb_params[i] = mb_params[i] - - ''' - #############following function for MLPG################## - ''' - - def pre_wuw_wu(self, frame_number, static_dimension, var_base): - - wuw_mat = gnp.zeros((frame_number*static_dimension, - frame_number*static_dimension)) - wu_mat = gnp.zeros((frame_number*static_dimension, - 3*frame_number*static_dimension)) - - for i in range(static_dimension): - temp_var_base = [var_base[i*3], var_base[i*3+1], var_base[i*3+2]] - temp_wuw, temp_wu = self.pre_compute_wuw( - frame_number, temp_var_base) - wuw_mat[frame_number*i:frame_number * - (i+1), frame_number*i:frame_number*(i+1)] = gnp.garray(temp_wuw[:]) - wu_mat[frame_number*i:frame_number * - (i+1), frame_number*i:frame_number*(i+3)] = gnp.garray(temp_wu[:]) - - return wuw_mat, wu_mat - - def pre_compute_wuw(self, frame_number, var_base): - windows = [ - (0, 0, np.array([1.0])), - (1, 1, np.array([-0.5, 0.0, 0.5])), - (1, 1, np.array([1.0, -2.0, 1.0])), - ] - num_windows = len(windows) - - win_mats = self.build_win_mats(windows, frame_number) - - var_base = np.array(var_base) - var_base = np.reshape(var_base, (1, 3)) - - var_frames = np.tile(var_base, (frame_number, 1)) - var_frames[0, 1] = 100000000000 - var_frames[0, 2] = 100000000000 - var_frames[frame_number-1, 1] = 100000000000 - var_frames[frame_number-1, 2] = 100000000000 - - tau_frames = 1.0 / var_frames - - prec = self.build_wuw(frame_number, tau_frames, win_mats) - inv_prec_full = bla.solveh(prec, np.eye(frame_number)) - - wu_list = self.build_wu(frame_number, tau_frames, win_mats) - - wu_mat = np.zeros((frame_number, frame_number * 3)) - wu_mat[:, 0:frame_number] = wu_list[0] - wu_mat[:, frame_number:frame_number*2] = wu_list[1] - wu_mat[:, frame_number*2:frame_number*3] = wu_list[2] - - return inv_prec_full, wu_mat - - def build_wuw(self, frame_number, tau_frames, win_mats, sdw=None): - if sdw is None: - sdw = max([win_mat.l + win_mat.u for win_mat in win_mats]) - - prec = bm.zeros(sdw, sdw, frame_number) - - for win_index, win_mat in enumerate(win_mats): - bm.dot_mm_plus_equals(win_mat.T, win_mat, target_bm=prec, - diag=float64(tau_frames[:, win_index])) - - return prec - - def build_wu(self, frame_number, tau_frames, win_mats, sdw=None): - if sdw is None: - sdw = max([win_mat.l + win_mat.u for win_mat in win_mats]) - - wu_list = [] - - for win_index, win_mat in enumerate(win_mats): - temp_wu = bm.zeros(sdw, sdw, frame_number) - bm.dot_mm_plus_equals(win_mat.T, win_mats[0], target_bm=temp_wu, - diag=float64(tau_frames[:, win_index])) - wu_list.append(temp_wu.full()) - - return wu_list - - def build_win_mats(self, windows, frames): - win_mats = [] - for l, u, win_coeff in windows: - assert l >= 0 and u >= 0 - assert len(win_coeff) == l + u + 1 - win_coeffs = np.tile(np.reshape(win_coeff, (l + u + 1, 1)), frames) - win_mat = bm.band_c_bm(u, l, win_coeffs).T - win_mats.append(win_mat) - - return win_mats -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import ConfigParser -import logging -import os -import sys - - -class configuration(object): - - def __init__(self): - pass - - def configure(self, configFile=None): - - # get a logger - logger = logging.getLogger("configuration") - # this (and only this) logger needs to be configured immediately, otherwise it won't work - # we can't use the full user-supplied configuration mechanism in this particular case, - # because we haven't loaded it yet! - # - # so, just use simple console-only logging - # this level is hardwired here - should change it to INFO - logger.setLevel(logging.DEBUG) - # add a handler & its formatter - will write only to console - ch = logging.StreamHandler() - logger.addHandler(ch) - formatter = logging.Formatter( - '%(asctime)s %(levelname)8s%(name)15s: %(message)s') - ch.setFormatter(formatter) - - # first, set up some default configuration values - self.initial_configuration() - - # next, load in any user-supplied configuration values - # that might over-ride the default values - self.user_configuration(configFile) - - # finally, set up all remaining configuration values - # that depend upon either default or user-supplied values - self.complete_configuration() - logger.debug('configuration completed') - - def initial_configuration(self): - - # to be called before loading any user specific values - - # things to put here are - # 1. variables that the user cannot change - # 2. variables that need to be set before loading the user's config file - - UTTID_REGEX = '(.*)\..*' - - def user_configuration(self, configFile=None): - - # get a logger - logger = logging.getLogger("configuration") - - # load and parse the provided configFile, if provided - if not configFile: - logger.warn( - 'no user configuration file provided; using only built-in default settings') - return - - # load the config file - try: - configparser = ConfigParser.ConfigParser() - configparser.readfp(open(configFile)) - logger.debug( - 'successfully read and parsed user configuration file %s' % configFile) - except: - logger.fatal('error reading user configuration file %s' % - configFile) - raise - - # work_dir must be provided before initialising other directories - self.work_dir = None - - if self.work_dir == None: - try: - self.work_dir = configparser.get('Paths', 'work') - - except (ConfigParser.NoSectionError, ConfigParser.NoOptionError): - if self.work_dir == None: - logger.critical('Paths:work has no value!') - raise Exception - - # default place for some data - self.data_dir = os.path.join(self.work_dir, 'data') - self.tensorflow_dir = os.path.join(self.work_dir, 'tensorflow') - - self.gen_dir = os.path.join(self.tensorflow_dir, 'gen') - self.model_dir = os.path.join(self.tensorflow_dir, 'models') - self.stats_dir = os.path.join(self.tensorflow_dir, 'stats') - - self.inter_data_dir = os.path.join(self.work_dir, 'inter_module') - self.def_inp_dir = os.path.join( - self.inter_data_dir, 'nn_no_silence_lab_norm_425') - self.def_out_dir = os.path.join( - self.inter_data_dir, 'nn_norm_mgc_lf0_vuv_bap_187') - - impossible_int = int(-99999) - impossible_float = float(-99999.0) - - user_options = [ - - # Paths - ('work_dir', self.work_dir, 'Paths', 'work'), - ('data_dir', self.data_dir, 'Paths', 'data'), - - ('inp_feat_dir', self.def_inp_dir, 'Paths', 'inp_feat'), - ('out_feat_dir', self.def_out_dir, 'Paths', 'out_feat'), - - ('model_dir', self.model_dir, 'Paths', 'models'), - ('stats_dir', self.stats_dir, 'Paths', 'stats'), - ('gen_dir', self.gen_dir, 'Paths', 'gen'), - - ('file_id_scp', os.path.join(self.data_dir, - 'file_id_list.scp'), 'Paths', 'file_id_list'), - ('test_id_scp', os.path.join(self.data_dir, - 'test_id_list.scp'), 'Paths', 'test_id_list'), - - # Input-Output - ('inp_dim', 425, 'Input-Output', 'inp_dim'), - ('out_dim', 187, 'Input-Output', 'out_dim'), - - ('inp_file_ext', '.lab', 'Input-Output', 'inp_file_ext'), - ('out_file_ext', '.cmp', 'Input-Output', 'out_file_ext'), - - ('inp_norm', 'MINMAX', 'Input-Output', 'inp_norm'), - ('out_norm', 'MINMAX', 'Input-Output', 'out_norm'), - - # Architecture - ('hidden_layer_type', ['TANH', 'TANH', 'TANH', 'TANH', - 'TANH', 'TANH'], 'Architecture', 'hidden_layer_type'), - ('hidden_layer_size', [1024, 1024, 1024, 1024, - 1024, 1024], 'Architecture', 'hidden_layer_size'), - - ('batch_size', 256, 'Architecture', 'batch_size'), - ('num_of_epochs', 1, 'Architecture', 'training_epochs'), - ('dropout_rate', 0.0, 'Architecture', 'dropout_rate'), - - ('output_layer_type', 'linear', 'Architecture', 'output_layer_type'), - ('optimizer', 'adam', 'Architecture', 'optimizer'), - ('loss_function', 'mse', 'Architecture', 'loss_function'), - - # RNN - ('sequential_training', False, 'Architecture', 'sequential_training'), - ('stateful', False, 'Architecture', 'stateful'), - ('use_high_batch_size', False, 'Architecture', 'use_high_batch_size'), - - ('training_algo', 1, 'Architecture', 'training_algo'), - ('merge_size', 1, 'Architecture', 'merge_size'), - ('seq_length', 200, 'Architecture', 'seq_length'), - ('bucket_range', 100, 'Architecture', 'bucket_range'), - # encoder_decoder - ('encoder_decoder', False, 'Architecture', 'encoder_decoder'), - ('attention', False, 'Architecture', 'attention'), - ("cbhg", False, "Architecture", "cbhg"), - # Data - ('shuffle_data', False, 'Data', 'shuffle_data'), - - ('train_file_number', impossible_int, 'Data', 'train_file_number'), - ('valid_file_number', impossible_int, 'Data', 'valid_file_number'), - ('test_file_number', impossible_int, 'Data', 'test_file_number'), - - # Processes - ('NORMDATA', False, 'Processes', 'NORMDATA'), - ('TRAINMODEL', False, 'Processes', 'TRAINMODEL'), - ('TESTMODEL', False, 'Processes', 'TESTMODEL') - - ] - - # this uses exec(...) which is potentially dangerous since arbitrary code could be executed - for (variable, default, section, option) in user_options: - # default value - value = None - - try: - # first, look for a user-set value for this variable in the config file - value = configparser.get(section, option) - user_or_default = 'user' - - except (ConfigParser.NoSectionError, ConfigParser.NoOptionError): - # use default value, if there is one - if (default == None) or \ - (default == '') or \ - ((type(default) == int) and (default == impossible_int)) or \ - ((type(default) == float) and (default == impossible_float)): - logger.critical('%20s has no value!' % - (section+":"+option)) - raise Exception - else: - value = default - user_or_default = 'default' - - if type(default) == str: - exec('self.%s = "%s"' % (variable, value)) - elif type(default) == int: - exec('self.%s = int(%s)' % (variable, value)) - elif type(default) == float: - exec('self.%s = float(%s)' % (variable, value)) - elif type(default) == bool: - exec('self.%s = bool(%s)' % (variable, value)) - elif type(default) == list: - exec('self.%s = list(%s)' % (variable, value)) - elif type(default) == dict: - exec('self.%s = dict(%s)' % (variable, value)) - else: - logger.critical( - 'Variable %s has default value of unsupported type %s', variable, type(default)) - raise Exception( - 'Internal error in configuration settings: unsupported default type') - - logger.info('%20s has %7s value %s' % - (section+":"+option, user_or_default, value)) - - def complete_configuration(self): - # to be called after reading any user-specific settings - # because the values set here depend on those user-specific settings - - # get a logger - logger = logging.getLogger("configuration") - - # create directories if not exists - if not os.path.exists(self.model_dir): - os.makedirs(self.model_dir) - - if not os.path.exists(self.stats_dir): - os.makedirs(self.stats_dir) - - if not os.path.exists(self.gen_dir): - os.makedirs(self.gen_dir) - - # input-output normalization stat files - self.inp_stats_file = os.path.join(self.stats_dir, "input_%d_%s_%d.norm" % ( - int(self.train_file_number), self.inp_norm, self.inp_dim)) - self.out_stats_file = os.path.join(self.stats_dir, "output_%d_%s_%d.norm" % ( - int(self.train_file_number), self.out_norm, self.out_dim)) - - # define model file name - if self.sequential_training: - self.combined_model_arch = 'RNN'+str(self.training_algo) - else: - self.combined_model_arch = 'DNN' - - self.combined_model_arch += '_'+str(len(self.hidden_layer_size)) - self.combined_model_arch += '_' + \ - '_'.join(map(str, self.hidden_layer_size)) - self.combined_model_arch += '_' + \ - '_'.join(map(str, self.hidden_layer_type)) - - self.nnets_file_name = '%s_%d_train_%d_%d_%d_%d_%d_model' \ - % (self.combined_model_arch, int(self.shuffle_data), - self.inp_dim, self.out_dim, self.train_file_number, self.batch_size, self.num_of_epochs) - - logger.info('model file: %s' % (self.nnets_file_name)) - - # model files - self.json_model_file = os.path.join( - self.model_dir, self.nnets_file_name+'.json') - self.h5_model_file = os.path.join( - self.model_dir, self.nnets_file_name+'.h5') - - # predicted features directory - self.pred_feat_dir = os.path.join(self.gen_dir, self.nnets_file_name) - if not os.path.exists(self.pred_feat_dir): - os.makedirs(self.pred_feat_dir) - - # string.lower for some architecture values - self.output_layer_type = self.output_layer_type.lower() - self.optimizer = self.optimizer.lower() - self.loss_function = self.loss_function.lower() - for i in range(len(self.hidden_layer_type)): - self.hidden_layer_type[i] = self.hidden_layer_type[i].lower() - - # set sequential training True if using LSTMs - if 'lstm' in self.hidden_layer_type: - self.sequential_training = True - - # set/limit batch size to 25 - if self.sequential_training and self.batch_size > 50: - if not self.use_high_batch_size: - logger.info('reducing the batch size from %s to 25' % - (self.batch_size)) - self.batch_size = 25 # num. of sentences in this case - - # rnn params - self.rnn_params = {} - self.rnn_params['merge_size'] = self.merge_size - self.rnn_params['seq_length'] = self.seq_length - self.rnn_params['bucket_range'] = self.bucket_range - self.rnn_params['stateful'] = self.stateful -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import os -import sys -import time -import random -import numpy as np - -from sklearn import preprocessing - -from io_funcs.binary_io import BinaryIOCollection - -############################ -##### Memory variables ##### -############################ - -UTT_BUFFER_SIZE = 10000 -FRAME_BUFFER_SIZE = 3000000 - - -def read_data_from_file_list(inp_file_list, out_file_list, inp_dim, out_dim, sequential_training=True): - io_funcs = BinaryIOCollection() - - num_of_utt = len(inp_file_list) - - file_length_dict = {'framenum2utt': {}, 'utt2framenum': {}} - - if sequential_training: - temp_set_x = {} - temp_set_y = {} - else: - temp_set_x = np.empty((FRAME_BUFFER_SIZE, inp_dim)) - temp_set_y = np.empty((FRAME_BUFFER_SIZE, out_dim)) - - ### read file by file ### - current_index = 0 - for i in xrange(num_of_utt): - inp_file_name = inp_file_list[i] - out_file_name = out_file_list[i] - inp_features, inp_frame_number = io_funcs.load_binary_file_frame( - inp_file_name, inp_dim) - out_features, out_frame_number = io_funcs.load_binary_file_frame( - out_file_name, out_dim) - - base_file_name = os.path.basename(inp_file_name).split(".")[0] - - if abs(inp_frame_number-out_frame_number) > 5: - print 'the number of frames in input and output features are different: %d vs %d (%s)' % (inp_frame_number, out_frame_number, base_file_name) - sys.exit(0) - else: - frame_number = min(inp_frame_number, out_frame_number) - - if sequential_training: - temp_set_x[base_file_name] = inp_features[0:frame_number] - temp_set_y[base_file_name] = out_features[0:frame_number] - else: - temp_set_x[current_index:current_index + - frame_number, ] = inp_features[0:frame_number] - temp_set_y[current_index:current_index + - frame_number, ] = out_features[0:frame_number] - current_index += frame_number - - if frame_number not in file_length_dict['framenum2utt']: - file_length_dict['framenum2utt'][frame_number] = [base_file_name] - else: - file_length_dict['framenum2utt'][frame_number].append( - base_file_name) - - file_length_dict['utt2framenum'][base_file_name] = frame_number - - drawProgressBar(i+1, num_of_utt) - - sys.stdout.write("\n") - - if not sequential_training: - temp_set_x = temp_set_x[0:current_index, ] - temp_set_y = temp_set_y[0:current_index, ] - - return temp_set_x, temp_set_y, file_length_dict - - -def read_test_data_from_file_list(inp_file_list, inp_dim, sequential_training=True): - io_funcs = BinaryIOCollection() - - num_of_utt = len(inp_file_list) - - file_length_dict = {'framenum2utt': {}, 'utt2framenum': {}} - - if sequential_training: - temp_set_x = {} - else: - temp_set_x = np.empty((FRAME_BUFFER_SIZE, inp_dim)) - - ### read file by file ### - current_index = 0 - for i in xrange(num_of_utt): - inp_file_name = inp_file_list[i] - inp_features, frame_number = io_funcs.load_binary_file_frame( - inp_file_name, inp_dim) - - base_file_name = os.path.basename(inp_file_name).split(".")[0] - - if sequential_training: - temp_set_x[base_file_name] = inp_features - else: - temp_set_x[current_index:current_index + - frame_number, ] = inp_features[0:frame_number] - current_index += frame_number - - if frame_number not in file_length_dict['framenum2utt']: - file_length_dict['framenum2utt'][frame_number] = [base_file_name] - else: - file_length_dict['framenum2utt'][frame_number].append( - base_file_name) - - file_length_dict['utt2framenum'][base_file_name] = frame_number - - drawProgressBar(i+1, num_of_utt) - - sys.stdout.write("\n") - - if not sequential_training: - temp_set_x = temp_set_x[0:current_index, ] - - return temp_set_x, file_length_dict - - -def transform_data_to_3d_matrix(data, seq_length=200, max_length=0, merge_size=1, shuffle_data=True, shuffle_type=1, padding="right"): - num_of_utt = len(data) - feat_dim = data[data.keys()[0]].shape[1] - - if max_length > 0: - temp_set = np.zeros((num_of_utt, max_length, feat_dim)) - - ### read file by file ### - current_index = 0 - for base_file_name, in_features in data.iteritems(): - frame_number = min(in_features.shape[0], max_length) - if padding == "right": - temp_set[current_index, 0:frame_number, ] = in_features - else: - temp_set[current_index, -frame_number:, ] = in_features - current_index += 1 - - else: - temp_set = np.zeros((FRAME_BUFFER_SIZE, feat_dim)) - - train_idx_list = data.keys() - train_idx_list.sort() - - if shuffle_data: - if shuffle_type == 1: - train_idx_list = shuffle_file_list(train_idx_list) - elif shuffle_type == 2: - train_idx_list = shuffle_file_list( - train_idx_list, shuffle_type=2, merge_size=merge_size) - - ### read file by file ### - current_index = 0 - for file_number in xrange(num_of_utt): - base_file_name = train_idx_list[file_number] - in_features = data[base_file_name] - frame_number = in_features.shape[0] - - temp_set[current_index:current_index+frame_number, ] = in_features - current_index += frame_number - - if (file_number+1) % merge_size == 0: - current_index = seq_length * \ - (int(np.ceil(float(current_index)/float(seq_length)))) - - num_of_samples = int(np.ceil(float(current_index)/float(seq_length))) - - temp_set = temp_set[0: num_of_samples*seq_length, ] - temp_set = temp_set.reshape(-1, seq_length, feat_dim) - - return temp_set - - -def read_and_transform_data_from_file_list(in_file_list, dim, seq_length=200, merge_size=1): - io_funcs = BinaryIOCollection() - - num_of_utt = len(in_file_list) - - temp_set = np.zeros((FRAME_BUFFER_SIZE, dim)) - - ### read file by file ### - current_index = 0 - for i in range(num_of_utt): - in_file_name = in_file_list[i] - in_features, frame_number = io_funcs.load_binary_file_frame( - in_file_name, dim) - base_file_name = os.path.basename(in_file_name).split(".")[0] - - temp_set[current_index:current_index+frame_number, ] = in_features - current_index += frame_number - - if (i+1) % merge_size == 0: - current_index = seq_length * \ - (int(np.ceil(float(current_index)/float(seq_length)))) - - drawProgressBar(i+1, num_of_utt) - - sys.stdout.write("\n") - - num_of_samples = int(np.ceil(float(current_index)/float(seq_length))) - - temp_set = temp_set[0: num_of_samples*seq_length, ] - temp_set = temp_set.reshape(num_of_samples, seq_length) - - return temp_set - - -def merge_data(train_x, train_y, merge_size): - temp_train_x = {} - temp_train_y = {} - - train_id_list = train_x.keys() - train_file_number = len(train_id_list) - train_id_list.sort() - - inp_dim = train_x[train_id_list[0]].shape[1] - out_dim = train_y[train_id_list[0]].shape[1] - - merged_features_x = np.zeros((0, inp_dim)) - merged_features_y = np.zeros((0, out_dim)) - new_file_count = 0 - for file_index in xrange(1, train_file_number+1): - inp_features = train_x[train_id_list[file_index-1]] - out_features = train_y[train_id_list[file_index-1]] - merged_features_x = np.vstack((merged_features_x, inp_features)) - merged_features_y = np.vstack((merged_features_y, out_features)) - - if file_index % merge_size == 0 or file_index == train_file_number: - base_file_name = "new_utterance_%04d" % (new_file_count) - temp_train_x[base_file_name] = merged_features_x - temp_train_y[base_file_name] = merged_features_y - new_file_count += 1 - merged_features_x = np.zeros((0, inp_dim)) - merged_features_y = np.zeros((0, out_dim)) - - return temp_train_x, temp_train_y - - -def shuffle_file_list(train_idx_list, shuffle_type=1, merge_size=5): - ### shuffle train id list ### - random.seed(271638) - train_file_number = len(train_idx_list) - - if shuffle_type == 1: # shuffle by sentence - random.shuffle(train_idx_list) - return train_idx_list - - elif shuffle_type == 2: # shuffle by a group of sentences - id_numbers = range(0, train_file_number, merge_size) - random.shuffle(id_numbers) - new_train_idx_list = [] - for i in xrange(len(id_numbers)): - new_train_idx_list += train_idx_list[id_numbers[i]:id_numbers[i]+merge_size] - return new_train_idx_list - - -def get_stateful_data(train_x, train_y, batch_size): - num_of_batches = int(train_x.shape[0]/batch_size) - train_x = train_x[0: num_of_batches*batch_size, ] - train_y = train_y[0: num_of_batches*batch_size, ] - - stateful_seq = np.zeros(num_of_batches*batch_size, dtype="int32") - for i in xrange(num_of_batches): - stateful_seq[i*batch_size:(i+1)*batch_size] = np.array( - range(batch_size))*num_of_batches+i - - temp_train_x = train_x[stateful_seq] - temp_train_y = train_y[stateful_seq] - - return temp_train_x, temp_train_y - - -def get_stateful_input(test_x, seq_length, batch_size=1): - [n_frames, n_dim] = test_x.shape - - num_of_samples = batch_size*seq_length - num_of_batches = int(n_frames/num_of_samples) + 1 - new_data_size = num_of_batches*num_of_samples - - temp_test_x = np.zeros((new_data_size, n_dim)) - temp_test_x[0: n_frames, ] = test_x - - temp_test_x = temp_test_x.reshape(-1, seq_length, n_dim) - - return temp_test_x - - -def compute_norm_stats(data, stats_file, method="MVN"): - #### normalize training data #### - io_funcs = BinaryIOCollection() - - if method == "MVN": - scaler = preprocessing.StandardScaler().fit(data) - norm_matrix = np.vstack((scaler.mean_, scaler.scale_)) - elif method == "MINMAX": - scaler = preprocessing.MinMaxScaler( - feature_range=(0.01, 0.99)).fit(data) - norm_matrix = np.vstack((scaler.min_, scaler.scale_)) - - print norm_matrix.shape - io_funcs.array_to_binary_file(norm_matrix, stats_file) - - return scaler - - -def load_norm_stats(stats_file, dim, method="MVN"): - #### load norm stats #### - io_funcs = BinaryIOCollection() - - norm_matrix, frame_number = io_funcs.load_binary_file_frame( - stats_file, dim) - assert frame_number == 2 - - if method == "MVN": - scaler = preprocessing.StandardScaler() - scaler.mean_ = norm_matrix[0, :] - scaler.scale_ = norm_matrix[1, :] - elif method == "MINMAX": - scaler = preprocessing.MinMaxScaler(feature_range=(0.01, 0.99)) - scaler.min_ = norm_matrix[0, :] - scaler.scale_ = norm_matrix[1, :] - - return scaler - - -def norm_data(data, scaler, sequential_training=True): - if scaler is None: - return - - #### normalize data #### - if not sequential_training: - data = scaler.transform(data) - else: - for filename, features in data.iteritems(): - data[filename] = scaler.transform(features) - - -def denorm_data(data, scaler): - if scaler is None: - return - - #### de-normalize data #### - data = scaler.inverse_transform(data) - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def read_file_list(file_name): - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - return file_lists - - -def print_status(i, length): - pr = int(float(i)/float(length)*100) - st = int(float(pr)/7) - sys.stdout.write(("\r%d/%d ") % (i, length) + - ("[ %d" % pr+"% ] <<< ")+('='*st)+(''*(100-st))) - sys.stdout.flush() - - -def drawProgressBar(indx, length, barLen=20): - percent = float(indx)/length - sys.stdout.write("\r") - progress = "" - for i in range(barLen): - if i < int(barLen * percent): - progress += "=" - else: - progress += " " - sys.stdout.write("[%s] <<< %d/%d (%d%%)" % - (progress, indx, length, percent * 100)) - sys.stdout.flush() -#!/usr/bin/env python -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import numpy as np -import tensorflow as tf -from tensorflow.contrib.layers import fully_connected, batch_norm -from tensorflow.contrib.layers import dropout -from tensorflow.contrib.rnn import MultiRNNCell, RNNCell, BasicRNNCell, BasicLSTMCell, GRUCell, LayerNormBasicLSTMCell, DropoutWrapper,\ - ResidualWrapper -from tensorflow.python.ops import rnn_cell_impl -from tensorflow.python.ops import array_ops -from tensorflow.python.ops import variable_scope as vs -from tensorflow.python.framework import dtypes -from tensorflow.python.ops import init_ops, math_ops - - -class TensorflowModels(object): - - def __init__(self, n_in, hidden_layer_size, n_out, hidden_layer_type, output_type="linear", dropout_rate=0, loss_function="mse", optimizer="adam"): - - # self.session=tf.InteractiveSession() - self.n_in = int(n_in) - self.n_out = int(n_out) - - self.n_layers = len(hidden_layer_size) - - self.hidden_layer_size = hidden_layer_size - self.hidden_layer_type = hidden_layer_type - - assert len(self.hidden_layer_size) == len(self.hidden_layer_type) - - self.output_type = output_type - self.dropout_rate = dropout_rate - self.loss_function = loss_function - self.optimizer = optimizer - #self.activation ={"tanh":tf.nn.tanh,"sigmoid":tf.nn.sigmoid} - self.graph = tf.Graph() - # self.saver=tf.train.Saver() - - def define_feedforward_model(self): - layer_list = [] - with self.graph.as_default() as g: - is_training_batch = tf.placeholder( - tf.bool, shape=(), name="is_training_batch") - bn_params = {"is_training": is_training_batch, - "decay": 0.99, "updates_collections": None} - g.add_to_collection("is_training_batch", is_training_batch) - with tf.name_scope("input"): - input_layer = tf.placeholder(dtype=tf.float32, shape=( - None, self.n_in), name="input_layer") - if self.dropout_rate != 0.0: - print "Using dropout to avoid overfitting and the dropout rate is", self.dropout_rate - is_training_drop = tf.placeholder( - dtype=tf.bool, shape=(), name="is_training_drop") - input_layer_drop = dropout( - input_layer, self.dropout_rate, is_training=is_training_drop) - layer_list.append(input_layer_drop) - g.add_to_collection( - name="is_training_drop", value=is_training_drop) - else: - layer_list.append(input_layer) - g.add_to_collection("input_layer", layer_list[0]) - for i in xrange(len(self.hidden_layer_size)): - with tf.name_scope("hidden_layer_"+str(i+1)): - if self.dropout_rate != 0.0: - last_layer = layer_list[-1] - if self.hidden_layer_type[i] == "tanh": - new_layer = fully_connected(last_layer, self.hidden_layer_size[i], activation_fn=tf.nn.tanh, normalizer_fn=batch_norm, - normalizer_params=bn_params) - if self.hidden_layer_type[i] == "sigmoid": - new_layer = fully_connected(last_layer, self.hidden_layer_size[i], activation_fn=tf.nn.sigmoid, normalizer_fn=batch_norm, - normalizer_params=bn_params) - new_layer_drop = dropout( - new_layer, self.dropout_rate, is_training=is_training_drop) - layer_list.append(new_layer_drop) - else: - last_layer = layer_list[-1] - if self.hidden_layer_type[i] == "tanh": - new_layer = fully_connected(last_layer, self.hidden_layer_size[i], activation_fn=tf.nn.tanh, normalizer_fn=batch_norm, - normalizer_params=bn_params) - if self.hidden_layer_type[i] == "sigmoid": - new_layer = fully_connected(last_layer, self.hidden_layer_size[i], activation_fn=tf.nn.sigmoid, normalizer_fn=batch_norm, - normalizer_params=bn_params) - layer_list.append(new_layer) - with tf.name_scope("output_layer"): - if self.output_type == "linear": - output_layer = fully_connected( - layer_list[-1], self.n_out, activation_fn=None) - if self.output_type == "tanh": - output_layer = fully_connected( - layer_list[-1], self.n_out, activation_fn=tf.nn.tanh) - g.add_to_collection(name="output_layer", value=output_layer) - with tf.name_scope("training_op"): - if self.optimizer == "adam": - self.training_op = tf.train.AdamOptimizer() - - def define_sequence_model(self): - seed = 12345 - np.random.seed(12345) - layer_list = [] - with self.graph.as_default() as g: - utt_length = tf.placeholder(tf.int32, shape=(None)) - g.add_to_collection(name="utt_length", value=utt_length) - with tf.name_scope("input"): - input_layer = tf.placeholder(dtype=tf.float32, shape=( - None, None, self.n_in), name="input_layer") - if self.dropout_rate != 0.0: - print "Using dropout to avoid overfitting and the dropout rate is", self.dropout_rate - is_training_drop = tf.placeholder( - dtype=tf.bool, shape=(), name="is_training_drop") - input_layer_drop = dropout( - input_layer, self.dropout_rate, is_training=is_training_drop) - layer_list.append(input_layer_drop) - g.add_to_collection( - name="is_training_drop", value=is_training_drop) - else: - layer_list.append(input_layer) - g.add_to_collection("input_layer", layer_list[0]) - with tf.name_scope("hidden_layer"): - basic_cell = [] - if "tanh" in self.hidden_layer_type: - is_training_batch = tf.placeholder( - dtype=tf.bool, shape=(), name="is_training_batch") - bn_params = {"is_training": is_training_batch, - "decay": 0.99, "updates_collections": None} - g.add_to_collection("is_training_batch", is_training_batch) - for i in xrange(len(self.hidden_layer_type)): - if self.dropout_rate != 0.0: - if self.hidden_layer_type[i] == "tanh": - new_layer = fully_connected( - layer_list[-1], self.hidden_layer_size[i], activation_fn=tf.nn.tanh, normalizer_fn=batch_norm, normalizer_params=bn_params) - new_layer_drop = dropout( - new_layer, self.dropout_rate, is_training=is_training_drop) - layer_list.append(new_layer_drop) - if self.hidden_layer_type[i] == "lstm": - basic_cell.append(MyDropoutWrapper(BasicLSTMCell( - num_units=self.hidden_layer_size[i]), self.dropout_rate, self.dropout_rate, is_training=is_training_drop)) - if self.hidden_layer_type[i] == "gru": - basic_cell.append(MyDropoutWrapper(GRUCell( - num_units=self.hidden_layer_size[i]), self.dropout_rate, self.dropout_rate, is_training=is_training_drop)) - else: - if self.hidden_layer_type[i] == "tanh": - new_layer = fully_connected( - layer_list[-1], self.hidden_layer_size[i], activation_fn=tf.nn.tanh, normalizer_fn=batch_norm, normalizer_params=bn_params) - layer_list.append(new_layer) - if self.hidden_layer_type[i] == "lstm": - basic_cell.append(LayerNormBasicLSTMCell( - num_units=self.hidden_layer_size[i])) - if self.hidden_layer_type[i] == "gru": - basic_cell.append(LayerNormGRUCell( - num_units=self.hidden_layer_size[i])) - multi_cell = MultiRNNCell(basic_cell) - rnn_outputs, rnn_states = tf.nn.dynamic_rnn( - multi_cell, layer_list[-1], dtype=tf.float32, sequence_length=utt_length) - layer_list.append(rnn_outputs) - with tf.name_scope("output_layer"): - if self.output_type == "linear": - output_layer = tf.layers.dense(rnn_outputs, self.n_out) - # stacked_rnn_outputs=tf.reshape(rnn_outputs,[-1,self.n_out]) - # stacked_outputs=tf.layers.dense(stacked_rnn_outputs,self.n_out) - # output_layer=tf.reshape(stacked_outputs,[-1,utt_length,self.n_out]) - g.add_to_collection(name="output_layer", value=output_layer) - with tf.name_scope("training_op"): - if self.optimizer == "adam": - self.training_op = tf.train.AdamOptimizer() - - def get_max_step(self, max_step): - # This method is only used when a sequence model is TrainTensorflowModels - self.max_step = max_step - - -class MyDropoutWrapper(DropoutWrapper): - - def __init__(self, cell, is_training, input_keep_prob=1.0, output_keep_prob=1.0, - state_keep_prob=1.0, variational_recurrent=False, - input_size=None, dtype=None, seed=None): - DropoutWrapper.__init__(self, cell, input_keep_prob=1.0, output_keep_prob=1.0, - state_keep_prob=1.0, variational_recurrent=False, - input_size=None, dtype=None, seed=None) - self.is_training = is_training - - def __call__(self, inputs, state, scope=None): - - return tf.cond(self.is_training, - lambda: DropoutWrapper(self._cell, self._input_keep_prob, self._output_keep_prob).__call__( - inputs, state, scope=None), - lambda: DropoutWrapper(self._cell, 1.0, 1.0).__call__(inputs, state, scope=None)) - # return self._cell(dropout(inputs,self._input_keep_prob,is_training=self.is_training,scope=None),state,scope=None) - - -class Encoder_Decoder_Models(TensorflowModels): - - def __init__(self, n_in, encoder_layer_size, n_out, encoder_layer_type, output_type="linear", dropout_rate=0, loss_function="mse", optimizer="adam", attention=False, cbhg=False): - TensorflowModels.__init__(self, n_in, encoder_layer_size, n_out, encoder_layer_type, - output_type="linear", dropout_rate=0, loss_function="mse", optimizer="adam") - self.encoder_layer_size = self.hidden_layer_size - self.encoder_layer_type = self.hidden_layer_type - self.attention = attention - self.cbhg = cbhg - - def convbank(self, inputs, conv_bank_size, scope="convbank"): - with tf.variable_scope(scope, reuse=None): - outputs = tf.layers.conv1d(inputs, self.n_in//2, 1) - for k in range(2, conv_bank_size+1): - with tf.variable_scope("num_{0}".format(k)): - k_output = tf.layers.conv1d( - inputs, self.n_in//2, k, padding="same", activation=tf.nn.relu) - outputs = tf.concat((outputs, k_output), -1) - return outputs - - def pooling(self, conv_outputs, pooling_window, stride, scope="pooling"): - with tf.variable_scope(scope, reuse=None): - pooling_outputs = tf.layers.max_pooling1d( - conv_outputs, pooling_window, stride) - # print pooling_outputs.shape - return pooling_outputs - - def convproject(self, inputs, filters, width, scope="convproject"): - with tf.variable_scope(scope, reuse=None): - projection_layer = tf.layers.conv1d( - inputs, filters, width, padding="same", activation=tf.nn.relu) - # print projection_layer.shape - return projection_layer - - def deep_feedfoward(self, project_outputs, num_units, layers=4, scope="feedforward"): - with tf.variable_scope(scope, reuse=None): - layer_list = [project_outputs] - for l in range(layers): - layer_list.append(fully_connected( - layer_list[-1], num_units, activation_fn=tf.nn.relu)) - # print layer_list[-1].shape - return layer_list[-1] - - def encoder(self, inputs, inputs_sequence_length): - with tf.variable_scope("encoder"): - basic_cell = [] - for i in xrange(len(self.hidden_layer_size)): - if self.hidden_layer_type[i] == "tanh": - basic_cell.append(tf.contrib.rnn.BasicRNNCell( - num_units=self.encoder_layer_size[i])) - if self.hidden_layer_type[i] == "lstm": - basic_cell.append(tf.contrib.rnn.BasicLSTMCell( - num_units=self.encoder_layer_size[i])) - if self.hidden_layer_type[i] == "gru": - basic_cell.append( - GRUCell(num_units=self.encoder_layer_size[i])) - multicell = MultiRNNCell(basic_cell) - enc_output, enc_state = tf.nn.bidirectional_dynamic_rnn(cell_fw=multicell, cell_bw=multicell, inputs=inputs, - sequence_length=inputs_sequence_length, dtype=tf.float32) - enc_output = tf.concat(enc_output, 2) - # enc_state=(tf.concat(enc_state[0]) - return enc_output, enc_state - - def process_decoder_input(self, target_sequence): - decode_input = tf.concat( - (tf.zeros_like(target_sequence[:, :1, :]), target_sequence[:, :-1, :]), 1) - - return decode_input - - def decoder(self, decoder_inputs, enc_output, enc_states, target_sequence_length): - """Memory is a tuple containing the forward and backward final states (output_states_fw,output_states_bw)""" - with tf.variable_scope("decoder"): - basic_cell = [] - for i in xrange(len(self.hidden_layer_size)): - if self.hidden_layer_type[i] == "tanh": - basic_cell.append(tf.contrib.rnn.BasicRNNCell( - num_units=self.encoder_layer_size[i])) - if self.hidden_layer_type[i] == "lstm": - basic_cell.append(tf.contrib.rnn.BasicLSTMCell( - num_units=self.encoder_layer_size[i])) - if self.hidden_layer_type[i] == "gru": - basic_cell.append( - GRUCell(num_units=self.encoder_layer_size[i])) - multicell = MultiRNNCell(basic_cell) - if not self.attention: - dec_output, _ = tf.nn.bidirectional_dynamic_rnn(cell_fw=multicell, cell_bw=multicell, inputs=decoder_inputs, initial_state_fw=enc_states[0], - sequence_length=target_sequence_length, initial_state_bw=enc_states[1]) - else: - attention_size = decoder_inputs.get_shape().as_list()[-1] - attention_mechanism = tf.contrib.seq2seq.BahdanauAttention( - attention_size, enc_output, target_sequence_length, normalize=True, probability_fn=tf.nn.softmax) - cell_with_attention = tf.contrib.seq2seq.AttentionWrapper( - multicell, attention_mechanism, attention_size) - dec_output, _ = tf.nn.bidirectional_dynamic_rnn( - cell_fw=cell_with_attention, cell_bw=cell_with_attention, inputs=decoder_inputs, dtype=tf.float32) - return dec_output - - def define_encoder_decoder(self): - with self.graph.as_default() as g: - with tf.name_scope("encoder_input"): - inputs_data = tf.placeholder(dtype=tf.float32, shape=[ - None, None, self.n_in], name="inputs_data") - if self.cbhg: - conv_bank = self.convbank(inputs_data, 16) - max_pooled_ = self.pooling(conv_bank, 2, 1) - conv_project = self.convproject( - max_pooled_, self.n_in//2, 3) - encoder_inputs = self.deep_feedfoward( - conv_project, self.n_in//2, 4) - else: - inputs_sequence_length = tf.placeholder( - tf.int32, shape=[None], name="inputs_sequence_length") - g.add_to_collection( - "inputs_sequence_length", inputs_sequence_length) - g.add_to_collection("inputs_data", inputs_data) - with tf.name_scope("target_sequence"): - targets = tf.placeholder(dtype=tf.float32, shape=[ - None, None, self.n_out], name="targets") - target_sequence_length = tf.placeholder( - tf.int32, [None], name="target_sequence_length") - g.add_to_collection("targets", targets) - g.add_to_collection("target_sequence_length", - target_sequence_length) - - with tf.name_scope("encoder_output"): - if self.cbhg: - enc_out, enc_states = self.encoder(encoder_inputs, None) - else: - enc_out, enc_states = self.encoder( - inputs_data, inputs_sequence_length) - with tf.name_scope("decoder_inputs"): - dec_inputs = self.process_decoder_input(targets) - with tf.name_scope("decoder_outputs"): - dec_output = self.decoder( - dec_inputs, enc_out, enc_states, target_sequence_length) - dec_output = tf.concat(dec_output, 2) - with tf.name_scope("outputs"): - if self.output_type == "linear": - outputs = tf.layers.dense(dec_output, self.n_out) - g.add_to_collection(name="decoder_outputs", value=outputs) - with tf.name_scope("training_op"): - if self.optimizer == "adam": - self.training_op = tf.train.AdamOptimizer(0.002) - - -def layer_normalization(inputs, epsilon=1e-5, scope=None): - mean, var = tf.nn.moments(inputs, [1], keep_dims=True) - with tf.variable_scope(scope+"LN", reuse=None): - scale = tf.get_variable(name="scale", shape=[inputs.get_shape()[ - 1]], initializer=tf.constant_initializer(1)) - shift = tf.get_variable(name="shift", shape=[inputs.get_shape()[ - 1]], initializer=tf.constant_initializer(0)) - LN_output = scale*(inputs-mean)/tf.sqrt(var + epsilon) + shift - return LN_output - - -class LayerNormGRUCell(RNNCell): - """Gated Recurrent Unit cell (cf. http://arxiv.org/abs/1406.1078).""" - - def __init__(self, - num_units, - activation=None, - reuse=None, - kernel_initializer=None, - bias_initializer=None): - super(LayerNormGRUCell, self).__init__(_reuse=reuse) - self._num_units = num_units - self._activation = activation or math_ops.tanh - self._kernel_initializer = kernel_initializer - self._bias_initializer = bias_initializer - - @property - def state_size(self): - return self._num_units - - @property - def output_size(self): - return self._num_units - - def __call__(self, inputs, state): - """Gated recurrent unit (GRU) with nunits cells.""" - with vs.variable_scope("gates"): # Reset gate and update gate. - # We start with bias of 1.0 to not reset and not update. - bias_ones = self._bias_initializer - if self._bias_initializer is None: - dtype = [a.dtype for a in [inputs, state]][0] - bias_ones = init_ops.constant_initializer(1.0, dtype=dtype) - value = rnn_cell_impl._linear([inputs, state], 2 * self._num_units, True, bias_ones, - self._kernel_initializer) - r, u = array_ops.split(value=value, num_or_size_splits=2, axis=1) - r, u = layer_normalization( - r, scope="r/"), layer_normalization(u, scope="u/") - r, u = math_ops.sigmoid(r), math_ops.sigmoid(u) - with vs.variable_scope("candidate"): - c = self._activation(rnn_cell_impl._linear( - [inputs, r * state], self._num_units, True, self._bias_initializer, self._kernel_initializer)) - new_h = u * state + (1 - u) * c - return new_h, new_h -#!/usr/bin/env python -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import tensorflow as tf -import numpy as np -import random -import os -import sys -from io_funcs.binary_io import BinaryIOCollection -from tensorflow_lib.model import TensorflowModels, Encoder_Decoder_Models -from tensorflow_lib import data_utils - - -class TrainTensorflowModels(TensorflowModels): - - def __init__(self, n_in, hidden_layer_size, n_out, hidden_layer_type, model_dir, output_type='linear', dropout_rate=0.0, loss_function='mse', optimizer='adam', rnn_params=None): - - TensorflowModels.__init__(self, n_in, hidden_layer_size, n_out, - hidden_layer_type, output_type, dropout_rate, loss_function, optimizer) - - #### TODO: Find a good way to pass below params #### - self.ckpt_dir = model_dir - - def train_feedforward_model(self, train_x, train_y, batch_size=256, num_of_epochs=10, shuffle_data=True): - seed = 12345 - np.random.seed(seed) - print train_x.shape - with self.graph.as_default() as g: - output_data = tf.placeholder(dtype=tf.float32, shape=( - None, self.n_out), name="output_data") - input_layer = g.get_collection(name="input_layer")[0] - is_training_batch = g.get_collection(name="is_training_batch")[0] - if self.dropout_rate != 0.0: - is_training_drop = g.get_collection(name="is_training_drop")[0] - with tf.name_scope("loss"): - output_layer = g.get_collection(name="output_layer")[0] - loss = tf.reduce_mean( - tf.square(output_layer-output_data), name="loss") - with tf.name_scope("train"): - self.training_op = self.training_op.minimize(loss) - init = tf.global_variables_initializer() - self.saver = tf.train.Saver() - with tf.Session() as sess: - init.run() - summary_writer = tf.summary.FileWriter( - os.path.join(self.ckpt_dir, "losslog"), sess.graph) - for epoch in xrange(num_of_epochs): - L = 1 - overall_loss = 0 - for iteration in range(int(train_x.shape[0]/batch_size)+1): - if (iteration+1)*batch_size > train_x.shape[0]: - x_batch, y_batch = train_x[iteration * - batch_size:], train_y[iteration*batch_size:] - if x_batch != []: - L += 1 - else: - continue - else: - x_batch, y_batch = train_x[iteration*batch_size:( - iteration+1)*batch_size, ], train_y[iteration*batch_size:(iteration+1)*batch_size] - L += 1 - if self.dropout_rate != 0.0: - _, batch_loss = sess.run([self.training_op, loss], feed_dict={ - input_layer: x_batch, output_data: y_batch, is_training_drop: True, is_training_batch: True}) - # rs=sess.run(merged,feed_dict={input_layer:x_batch,output_data:y_batch,is_training_drop:True,is_training_batch:True}) - else: - _, batch_loss = sess.run([self.training_op, loss], feed_dict={ - input_layer: x_batch, output_data: y_batch, is_training_batch: True}) - # rs=sess.run(merged,feed_dict={input_layer:x_batch,output_data:y_batch,is_training_batch:True}) - overall_loss += batch_loss - # if self.dropout_rate!=0.0: - # training_loss=loss.eval(feed_dict={input_layer:train_x,output_data:train_y,is_training_drop:False,is_training_batch:False}) - # else: - # training_loss=loss.eval(feed_dict={input_layer:train_x,output_data:train_y,is_training_batch:False}) - print "Epoch ", epoch+1, "Finishes", "Training loss:", overall_loss/L - self.saver.save(sess, os.path.join( - self.ckpt_dir, "mymodel.ckpt")) - print "The model parameters are saved" - - def get_batch(self, train_x, train_y, start, batch_size=50): - utt_keys = train_x.keys() - if (start+1)*batch_size > len(utt_keys): - batch_keys = utt_keys[start*batch_size:] - else: - batch_keys = utt_keys[start*batch_size:(start+1)*batch_size] - batch_x_dict = dict([(k, train_x[k]) for k in batch_keys]) - batch_y_dict = dict([(k, train_y[k]) for k in batch_keys]) - utt_len_batch = [len(batch_x_dict[k])for k in batch_x_dict.keys()] - return batch_x_dict, batch_y_dict, utt_len_batch - - def train_sequence_model(self, train_x, train_y, utt_length, batch_size=256, num_of_epochs=10, shuffle_data=False): - seed = 12345 - np.random.seed(seed) - # Data Preparation - temp_train_x = data_utils.transform_data_to_3d_matrix( - train_x, max_length=self.max_step, shuffle_data=False) - print("Input shape: "+str(temp_train_x.shape)) - temp_train_y = data_utils.transform_data_to_3d_matrix( - train_y, max_length=self.max_step, shuffle_data=False) - print("Output shape: "+str(temp_train_y.shape)) - # Shuffle the data - - with self.graph.as_default() as g: - output_layer = g.get_collection(name="output_layer")[0] - input_layer = g.get_collection(name="input_layer")[0] - utt_length_placeholder = g.get_collection(name="utt_length")[0] - hybrid = 0 - if "tanh" in self.hidden_layer_type: - hybrid = 1 - is_training_batch = g.get_collection( - name="is_training_batch")[0] - if self.dropout_rate != 0.0: - is_training_drop = g.get_collection(name="is_training_drop")[0] - with tf.name_scope("output_data"): - output_data = tf.placeholder( - tf.float32, shape=(None, None, self.n_out)) - with tf.name_scope("loss"): - error = output_data-output_layer - loss = tf.reduce_mean(tf.square(error), name="loss") - with tf.name_scope("train"): - self.training_op = self.training_op.minimize(loss) - init = tf.global_variables_initializer() - self.saver = tf.train.Saver() - #overall_loss=tf.summary.scalar("training loss",overall_loss) - with tf.Session() as sess: - init.run() - summary_writer = tf.summary.FileWriter( - os.path.join(self.ckpt_dir, "losslog"), sess.graph) - for epoch in xrange(num_of_epochs): - L = 1 - overall_loss = 0 - for iteration in range(int(len(train_x.keys())/batch_size)+1): - x_batch, y_batch, utt_length_batch = self.get_batch( - train_x, train_y, iteration, batch_size) - if utt_length_batch == []: - continue - else: - L += 1 - max_length_batch = max(utt_length_batch) - x_batch = data_utils.transform_data_to_3d_matrix( - x_batch, max_length=max_length_batch, shuffle_data=False) - y_batch = data_utils.transform_data_to_3d_matrix( - y_batch, max_length=max_length_batch, shuffle_data=False) - if self.dropout_rate != 0.0: - if hybrid: - _, batch_loss = sess.run([self.training_op, loss], feed_dict={input_layer: x_batch, output_data: y_batch, utt_length_placeholder: utt_length_batch, - is_training_drop: True, is_training_batch: True}) - else: - _, batch_loss = sess.run([self.training_op, loss], feed_dict={input_layer: x_batch, output_data: y_batch, utt_length_placeholder: utt_length_batch, - is_training_drop: True}) - elif hybrid: - _, batch_loss = sess.run([self.training_op, loss], feed_dict={ - input_layer: x_batch, output_data: y_batch, utt_length_placeholder: utt_length_batch, is_training_batch: True}) - else: - _, batch_loss = sess.run([self.training_op, loss], feed_dict={ - input_layer: x_batch, output_data: y_batch, utt_length_placeholder: utt_length_batch}) - overall_loss += batch_loss - # summary_writer.add_summary(overall_loss,epoch) - # if self.dropout_rate!=0.0: - # if hybrid: - # training_loss=loss.eval(feed_dict={input_layer:temp_train_x,output_data:temp_train_y,utt_length_placeholder:utt_length,\ - # is_training_drop:False,is_training_batch:False}) - # else: - # training_loss=loss.eval(feed_dict={input_layer:temp_train_x,output_data:temp_train_y,utt_length_placeholder:utt_length,\ - # is_training_drop:False}) - # elif hybrid: - # training_loss=loss.eval(feed_dict={input_layer:temp_train_x,output_data:temp_train_y,utt_length_placeholder:utt_length,is_training_batch:False}) - # else: - # training_loss=loss.eval(feed_dict={input_layer:temp_train_x,output_data:temp_train_y,utt_length_placeholder:utt_length}) - print "Epoch ", epoch+1, "Training loss:", overall_loss/L - #model_name="sequence_model"+" hybrid.ckpt" if hybrid==1 else "sequence_model.ckpt" - self.saver.save(sess, os.path.join( - self.ckpt_dir, "mymodel.ckpt")) - print "The model parameters are saved" - - def predict(self, test_x, out_scaler, gen_test_file_list, sequential_training=False, stateful=False): - #### compute predictions #### - - io_funcs = BinaryIOCollection() - - test_id_list = test_x.keys() - test_id_list.sort() - - test_file_number = len(test_id_list) - - print("generating features on held-out test data...") - with tf.Session() as sess: - new_saver = tf.train.import_meta_graph( - os.path.join(self.ckpt_dir, "mymodel.ckpt.meta")) - print "loading the model parameters..." - output_layer = tf.get_collection("output_layer")[0] - input_layer = tf.get_collection("input_layer")[0] - new_saver.restore(sess, os.path.join( - self.ckpt_dir, "mymodel.ckpt")) - print "The model parameters are successfully restored" - for utt_index in xrange(test_file_number): - gen_test_file_name = gen_test_file_list[utt_index] - temp_test_x = test_x[test_id_list[utt_index]] - num_of_rows = temp_test_x.shape[0] - if not sequential_training: - is_training_batch = tf.get_collection( - "is_training_batch")[0] - if self.dropout_rate != 0.0: - is_training_drop = tf.get_collection( - "is_training_drop")[0] - y_predict = sess.run(output_layer, feed_dict={ - input_layer: temp_test_x, is_training_drop: False, is_training_batch: False}) - else: - y_predict = sess.run(output_layer, feed_dict={ - input_layer: temp_test_x, is_training_batch: False}) - else: - temp_test_x = np.reshape( - temp_test_x, [1, num_of_rows, self.n_in]) - hybrid = 0 - utt_length_placeholder = tf.get_collection("utt_length")[0] - if "tanh" in self.hidden_layer_type: - hybrid = 1 - is_training_batch = tf.get_collection( - "is_training_batch")[0] - if self.dropout_rate != 0.0: - is_training_drop = tf.get_collection( - "is_training_drop")[0] - if hybrid: - y_predict = sess.run(output_layer, feed_dict={input_layer: temp_test_x, utt_length_placeholder: [ - num_of_rows], is_training_drop: False, is_training_batch: False}) - else: - y_predict = sess.run(output_layer, feed_dict={ - input_layer: temp_test_x, utt_length_placeholder: [num_of_rows], is_training_drop: False}) - elif hybrid: - y_predict = sess.run(output_layer, feed_dict={ - input_layer: temp_test_x, utt_length_placeholder: [num_of_rows], is_training_batch: False}) - else: - y_predict = sess.run(output_layer, feed_dict={ - input_layer: temp_test_x, utt_length_placeholder: [num_of_rows]}) - data_utils.denorm_data(y_predict, out_scaler) - io_funcs.array_to_binary_file(y_predict, gen_test_file_name) - data_utils.drawProgressBar(utt_index+1, test_file_number) - sys.stdout.write("\n") - - -class Train_Encoder_Decoder_Models(Encoder_Decoder_Models): - - def __init__(self, n_in, hidden_layer_size, n_out, hidden_layer_type, model_dir, output_type='linear', dropout_rate=0.0, loss_function='mse', optimizer='adam', attention=False, cbhg=False): - Encoder_Decoder_Models.__init__(self, n_in, hidden_layer_size, n_out, hidden_layer_type, output_type='linear', dropout_rate=0.0, loss_function='mse', - optimizer='adam', attention=attention, cbhg=cbhg) - self.ckpt_dir = os.path.join(model_dir, "temp_checkpoint_file") - - def get_batch(self, train_x, train_y, start, batch_size): - - utt_keys = train_x.keys() - if (start+1)*batch_size > len(utt_keys): - batch_keys = utt_keys[start*batch_size:] - else: - batch_keys = utt_keys[start*batch_size:(start+1)*batch_size] - batch_x_dict = dict([(k, train_x[k]) for k in batch_keys]) - batch_y_dict = dict([(k, train_y[k]) for k in batch_keys]) - utt_len_batch = [len(batch_x_dict[k])for k in batch_x_dict.keys()] - return batch_x_dict, batch_y_dict, utt_len_batch - - def train_encoder_decoder_model(self, train_x, train_y, utt_length, batch_size=1, num_of_epochs=10, shuffle_data=False): - temp_train_x = data_utils.transform_data_to_3d_matrix( - train_x, max_length=self.max_step, shuffle_data=False) - print("Input shape: "+str(temp_train_x.shape)) - temp_train_y = data_utils.transform_data_to_3d_matrix( - train_y, max_length=self.max_step, shuffle_data=False) - print("Output shape: "+str(temp_train_y.shape)) - - with self.graph.as_default() as g: - outputs = g.get_collection(name="decoder_outputs")[0] - var = g.get_collection(name="trainable_variables") - targets = g.get_tensor_by_name("target_sequence/targets:0") - inputs_data = g.get_tensor_by_name("encoder_input/inputs_data:0") - if not self.cbhg: - inputs_sequence_length = g.get_tensor_by_name( - "encoder_input/inputs_sequence_length:0") - target_sequence_length = g.get_tensor_by_name( - "target_sequence/target_sequence_length:0") - with tf.name_scope("loss"): - error = targets-outputs - loss = tf.reduce_mean(tf.square(error)) - gradients = self.training_op.compute_gradients(loss) - capped_gradients = [(tf.clip_by_value(grad, -5., 5.), var) - for grad, var in gradients if grad is not None] - self.training_op = self.training_op.apply_gradients( - capped_gradients) - init = tf.global_variables_initializer() - self.saver = tf.train.Saver() - overall_loss = 0 - tf.summary.scalar("training_loss", overall_loss) - with tf.Session() as sess: - init.run() - tf.summary_writer = tf.summary.FileWriter( - os.path.join(self.ckpt_dir, "losslog"), sess.graph) - for epoch in xrange(num_of_epochs): - L = 1 - for iteration in range(int(temp_train_x.shape[0]/batch_size)+1): - x_batch_dict, y_batch_dict, utt_length_batch = self.get_batch( - train_x, train_y, iteration, batch_size) - if utt_length_batch == []: - continue - else: - L += 1 - assert [len(v) for v in x_batch_dict.values()] == [ - len(v) for v in y_batch_dict.values()] - assert x_batch_dict.keys() == y_batch_dict.keys() - max_length_batch = max(utt_length_batch) - x_batch = data_utils.transform_data_to_3d_matrix( - x_batch_dict, max_length=max_length_batch, shuffle_data=False) - y_batch = data_utils.transform_data_to_3d_matrix( - y_batch_dict, max_length=max_length_batch, shuffle_data=False) - if self.cbhg: - _, batch_loss = sess.run([self.training_op, loss], { - inputs_data: x_batch, targets: y_batch, target_sequence_length: utt_length_batch}) - else: - _, batch_loss = sess.run([self.training_op, loss], { - inputs_data: x_batch, targets: y_batch, inputs_sequence_length: utt_length_batch, target_sequence_length: utt_length_batch}) - overall_loss += batch_loss - # if self.cbhg: - # training_loss=loss.eval(feed_dict={inputs_data:temp_train_x,targets:temp_train_y,target_sequence_length:utt_length}) - # else: - # training_loss=loss.eval(feed_dict={inputs_data:temp_train_x,targets:temp_train_y,inputs_sequence_length:utt_length,target_sequence_length:utt_length}) - print "Epoch:", epoch+1, "Training loss:", overall_loss/L - summary_writer.add_summary(str(overall_loss), epoch) - self.saver.save(sess, os.path.join( - self.ckpt_dir, "mymodel.ckpt")) - print "The model parameters are saved" - - def predict(self, test_x, out_scaler, gen_test_file_list): - #### compute predictions #### - - io_funcs = BinaryIOCollection() - - test_id_list = test_x.keys() - test_id_list.sort() - inference_batch_size = len(test_id_list) - test_file_number = len(test_id_list) - with tf.Session(graph=self.graph) as sess: - new_saver = tf.train.import_meta_graph( - self.ckpt_dir, "mymodel.ckpt.meta") - """Notice change targets=tf.get_collection("targets")[0]""" - inputs_data = self.graph.get_collection("inputs_data")[0] - """Notice Change decoder_outputs=tf.get_collection("decoder_outputs")[0]""" - inputs_sequence_length = self.graph.get_collection( - "inputs_sequence_length")[0] - target_sequence_length = self.graph.get_collection( - "target_sequence_length")[0] - print "loading the model parameters..." - new_saver.restore(sess, os.path.join( - self.ckpt_dir, "mymodel.ckpt")) - print "Model parameters are successfully restored" - print("generating features on held-out test data...") - for utt_index in xrange(test_file_number): - gen_test_file_name = gen_test_file_list[utt_index] - temp_test_x = test_x[test_id_list[utt_index]] - num_of_rows = temp_test_x.shape[0] - - #utt_length=[len(utt) for utt in test_x.values()] - # max_step=max(utt_length) - temp_test_x = tf.reshape( - temp_test_x, [1, num_of_rows, self.n_in]) - - outputs = np.zeros( - shape=[len(test_x), max_step, self.n_out], dtype=np.float32) - # dec_cell=self.graph.get_collection("decoder_cell")[0] - print "Generating speech parameters ..." - for t in range(num_of_rows): - # outputs=sess.run(inference_output,{inputs_data:temp_test_x,inputs_sequence_length:utt_length,\ - # target_sequence_length:utt_length}) - _outputs = sess.run(decoder_outputs, feed_dict={inputs_data: temp_test_x, targets: outputs, inputs_sequence_length: [num_of_rows], - target_sequence_length: [num_of_rows]}) - # #print _outputs[:,t,:] - outputs[:, t, :] = _outputs[:, t, :] - - data_utils.denorm_data(outputs, out_scaler) - io_funcs.array_to_binary_file(outputs, gen_test_file_name) - data_utils.drawProgressBar(utt_index+1, test_file_number) - sys.stdout.write("\n") -import theano -import theano.tensor as T - -import numpy as np -from collections import OrderedDict - - -def compile_ADAM_train_function(model, gparams, learning_rate=0.001, b1=0.9, b2=0.999, e=1e-8, - gamma=1-1e-8): - """ - ADAM update rules - Default values are taken from [Kingma2014] - - References: - [Kingma2014] Kingma, Diederik, and Jimmy Ba. - "Adam: A Method for Stochastic Optimization." - arXiv preprint arXiv:1412.6980 (2014). - http://arxiv.org/pdf/1412.6980v4.pdf - - """ - updates = OrderedDict() - all_params = model.params - all_grads = gparams - alpha = learning_rate - t = theano.shared(np.float32(1)) - # (Decay the first moment running average coefficient) - b1_t = b1*gamma**(t-1) - - for theta_previous, g in zip(all_params, all_grads): - m_previous = theano.shared(np.zeros(theta_previous.get_value().shape, - dtype=theano.config.floatX)) - v_previous = theano.shared(np.zeros(theta_previous.get_value().shape, - dtype=theano.config.floatX)) - - # (Update biased first moment estimate) - m = b1_t*m_previous + (1 - b1_t)*g - # (Update biased second raw moment estimate) - v = b2*v_previous + (1 - b2)*g**2 - # (Compute bias-corrected first moment estimate) - m_hat = m / (1-b1**t) - # (Compute bias-corrected second raw moment estimate) - v_hat = v / (1-b2**t) - theta = theta_previous - (alpha * m_hat) / \ - (T.sqrt(v_hat) + e) # (Update parameters) - - #updates.append((m_previous, m)) - #updates.append((v_previous, v)) - #updates.append((theta_previous, theta) ) - updates[m_previous] = m - updates[v_previous] = v - updates[theta_previous] = theta - - updates[t] = t + 1. - - return updates -""" -The MIT License (MIT) - -Copyright (c) 2015 Alec Radford - -Permission is hereby granted, free of charge, to any person obtaining a copy -of this software and associated documentation files (the "Software"), to deal -in the Software without restriction, including without limitation the rights -to use, copy, modify, merge, publish, distribute, sublicense, and/or sell -copies of the Software, and to permit persons to whom the Software is -furnished to do so, subject to the following conditions: - -The above copyright notice and this permission notice shall be included in all -copies or substantial portions of the Software. - -THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -SOFTWARE. -""" - -import theano -import theano.tensor as T - -import numpy as np -from collections import OrderedDict - - -def compile_ADAM_train_function(model, gparams, learning_rate=0.0002, b1=0.1, b2=0.001, e=1e-8): - updates = OrderedDict() - params = model.params - grads = gparams - lr = learning_rate - i = theano.shared(np.float32(0.)) - i_t = i + 1. - fix1 = 1. - (1. - b1)**i_t - fix2 = 1. - (1. - b2)**i_t - lr_t = lr * (T.sqrt(fix2) / fix1) - for p, g in zip(params, grads): - m = theano.shared(np.zeros(p.get_value().shape).astype( - dtype=theano.config.floatX)) - v = theano.shared(np.zeros(p.get_value().shape).astype( - dtype=theano.config.floatX)) - m_t = (b1 * g) + ((1. - b1) * m) - v_t = (b2 * T.sqr(g)) + ((1. - b2) * v) - g_t = m_t / (T.sqrt(v_t) + e) - p_t = p - (lr_t * g_t) - #updates.append((m, m_t)) - #updates.append((v, v_t)) - #updates.append((p, p_t)) - updates[m] = m_t - updates[v] = v_t - updates[p] = p_t - - updates[i] = i_t - - return updates -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -from collections import OrderedDict -import matplotlib.pyplot as plt -import numpy -import theano -import theano.tensor as T - -import matplotlib -# Force matplotlib to not use any Xwindows backend. -matplotlib.use('Agg') - - -def compile_RPROP_train_function(model, gparams, learning_rate=0.001, rprop_algo=2, params_to_update=[]): - - if params_to_update == []: # then update all by default - params_to_update = list(range(len(gparams))) - - # 1, 2, 3, 4: Rprop+ Rprop- iRprop+ iRprop- - # in Igel 2003 'Empirical evaluation of the improved Rprop learning algorithms' - - # It would be easier to follow if these things were defined in __init__, but - # they are here to keep all RPROP-specific stuff in one place. - # Also, make some or all - # rprop_init_update is configured during __init__, all of the others are hardcoded here - # for now:- - - model.eta_plus = 1.2 - model.eta_minus = 0.5 - model.max_update = 50.0 - model.min_update = 0.0000001 - - model.use_rprop = rprop_algo - model.rprop_init_update = learning_rate - - model.previous_gparams = [] - model.update_values = [] - - model.update_change_DEBUG = [] - - for (i, weights) in enumerate(model.params): - model.previous_gparams.append(theano.shared(value=numpy.zeros((numpy.shape(weights.get_value())), - dtype=theano.config.floatX), name='pg_%s' % (i))) - model.update_values.append(theano.shared(value=(numpy.ones(numpy.shape(weights.get_value()), - dtype=theano.config.floatX) * model.rprop_init_update), name='uv_%s' % (i))) - - model.update_change_DEBUG.append(theano.shared(value=numpy.zeros((numpy.shape(weights.get_value())), - dtype=theano.config.floatX), name='pcd_%s' % (i))) - - if model.use_rprop in [2, 4]: - - updates = OrderedDict() - - for (i, (prev_gparam, gparam, update_step, param)) in enumerate(zip(model.previous_gparams, gparams, - model.update_values, model.params)): - if i in params_to_update: - # first update update_values: - sign_change_test = prev_gparam * gparam - increase_update_size = T.gt( - sign_change_test, 0.0) * model.eta_plus - decrease_update_size = T.lt( - sign_change_test, 0.0) * model.eta_minus - retain_update_size = T.eq(sign_change_test, 0.0) - update_changes = increase_update_size + \ - decrease_update_size + retain_update_size - new_update_step = update_step * update_changes - # apply floor/ceiling to updates: - new_update_step = T.minimum(model.max_update, T.maximum( - model.min_update, new_update_step)) - updates[update_step] = new_update_step - - if model.use_rprop == 4: - # zero gradients where sign changed: reduce step size but don't change weight - gparam = gparam * \ - (T.gt(sign_change_test, 0.0) + T.eq(sign_change_test, 0.0)) - - # then update params: - updates[param] = param - T.sgn(gparam) * new_update_step - - # store previous iteration gradient to check for sign change in next iteration: - updates[prev_gparam] = gparam - - # gparam # sign_change_test # update_changes # - updates[model.update_change_DEBUG[i]] = param - - else: - sys.exit('RPROP version %s not implemented' % (model.use_rprop)) - - return updates - - -def check_rprop_values(model): - print('=== Update steps: ===') - for (i, update_step) in enumerate(model.update_values): - print(' param no. %s' % (i)) - print(get_stats(update_step)) - v = update_step.get_value() - if len(v.shape) == 2: - print(v[:4, :4]) - else: - print(v[:4]) - print(' Update changes:--') - u = model.update_change_DEBUG[i].get_value() - if len(u.shape) == 2: - print(u[:4, :4]) - else: - print(u[:4]) - - -def get_stats(theano_shared_params): - vals = theano_shared_params.get_value() - #m,n = numpy.shape(vals) - print(' shape, minm max, mean, 5th and 95th percentile') - print(' %s %s %s %s %s %s' % (numpy.shape(vals), vals.min(), vals.max(), - vals.mean(), numpy.percentile(vals, 5), numpy.percentile(vals, 95))) - -# This is generic, and not specific to RPROP: - - -def plot_weight_histogram(model, outfile, lower=-0.25, upper=0.25): - n = len(model.params) - plt.clf() - for (i, theano_shared_params) in enumerate(model.params): - weights = theano_shared_params.get_value() - values = weights.flatten() - plt.subplot(n, 1, i+1) - frame = plt.gca() - frame.axes.get_yaxis().set_ticks([]) - if i != n-1: # only keep bottom one - frame.axes.get_xaxis().set_ticks([]) - plt.hist(values, 100) - plt.xlim(lower, upper) - print(' param no. %s' % (i)) - print(get_stats(theano_shared_params)) - plt.savefig(outfile) - print('Made plot %s' % (outfile)) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -# /usr/bin/python -u - -''' -This script assumes c-version STRAIGHT which is not available to public. Please use your -own vocoder to replace this script. -''' -import sys -import os -#from utils import GlobalCfg -import logging - - -def feat_extraction_magphase(in_wav_dir, file_id_list, cfg, logger, b_multiproc=False): - sys.path.append(cfg.magphase_bindir) - import libutils as lu - import magphase as mp - - def feat_extraction_magphase_one_file(in_wav_dir, file_name_token, acous_feats_dir, cfg, logger): - - # Logging: - logger.info('Analysing waveform: %s.wav' % (file_name_token)) - - # File setup: - wav_file = os.path.join(in_wav_dir, file_name_token + '.wav') - - # Feat extraction: - mp.analysis_for_acoustic_modelling(wav_file, out_dir=acous_feats_dir, mag_dim=cfg.mag_dim, - phase_dim=cfg.real_dim, b_const_rate=cfg.magphase_const_rate) - - return - - if b_multiproc: - lu.run_multithreaded(feat_extraction_magphase_one_file, - in_wav_dir, file_id_list, cfg.acous_feats_dir, cfg, logger) - else: - for file_name_token in file_id_list: - feat_extraction_magphase_one_file( - in_wav_dir, file_name_token, cfg.acous_feats_dir, cfg, logger) - - return - - -def acous_feat_extraction(in_wav_dir, file_id_list, cfg): - - logger = logging.getLogger("acous_feat_extraction") - - # MagPhase Vocoder: - if cfg.vocoder_type == 'MAGPHASE': - feat_extraction_magphase(in_wav_dir, file_id_list, cfg, logger) - - # TODO: Add WORLD and STRAIGHT - - # If vocoder is not supported: - else: - logger.critical( - 'The vocoder %s is not supported for feature extraction yet!\n' % cfg.vocoder_type) - raise - - return -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import sys -import numpy -from io_funcs.binary_io import BinaryIOCollection -import logging -from scipy.stats.stats import pearsonr - - -class DistortionComputation(object): - def __init__(self, cmp_dim, mgc_dim, bap_dim, lf0_dim): - self.total_frame_number = 0 - self.distortion = 0.0 - self.bap_distortion = 0.0 - self.f0_distortion = 0.0 - self.vuv_error = 0.0 - - self.cmp_dim = cmp_dim - self.mgc_dim = mgc_dim - self.bap_dim = bap_dim - self.lf0_dim = lf0_dim - - def compute_distortion(self, file_id_list, reference_dir, generation_dir, cmp_ext, mgc_ext, bap_ext, lf0_ext): - - total_voiced_frame_number = 0 - for file_id in file_id_list: - reference_file_name = reference_dir + '/' + file_id + cmp_ext - mgc_file_name = generation_dir + '/' + file_id + mgc_ext - bap_file_name = generation_dir + '/' + file_id + bap_ext - lf0_file_name = generation_dir + '/' + file_id + lf0_ext - - reference_cmp, ref_frame_number = self.load_binary_file( - reference_file_name, self.cmp_dim) - generation_mgc, mgc_frame_number = self.load_binary_file( - mgc_file_name, self.mgc_dim) - generation_bap, bap_frame_number = self.load_binary_file( - bap_file_name, self.bap_dim) - generation_lf0, lf0_frame_number = self.load_binary_file( - lf0_file_name, self.lf0_dim) - - if ref_frame_number != mgc_frame_number: - print("The number of frames is not the same: %d vs %d (%s). Error in compute_distortion.py\n." % ( - ref_frame_number, mgc_frame_number, file_id)) - sys.exit(1) - - reference_mgc = reference_cmp[:, 0:self.mgc_dim] - reference_lf0 = reference_cmp[:, - self.mgc_dim*3:self.mgc_dim*3+self.lf0_dim] - reference_vuv = reference_cmp[:, self.mgc_dim * - 3+self.lf0_dim*3:self.mgc_dim*3+self.lf0_dim*3+1] - reference_bap = reference_cmp[:, self.mgc_dim*3+self.lf0_dim * - 3+1:self.mgc_dim*3+self.lf0_dim*3+1+self.bap_dim] - - reference_lf0[reference_vuv < 0.5] = 0.0 -# print reference_vuv - temp_distortion = self.compute_mse( - reference_mgc[:, 1:self.mgc_dim], generation_mgc[:, 1:self.mgc_dim]) - self.distortion += temp_distortion * \ - (10 / numpy.log(10)) * numpy.sqrt(2.0) - - temp_bap_distortion = self.compute_mse( - reference_bap, generation_bap) - self.bap_distortion += temp_bap_distortion * \ - (10 / numpy.log(10)) * numpy.sqrt(2.0) - - temp_f0_distortion, temp_vuv_error, voiced_frame_number = self.compute_f0_mse( - reference_lf0, generation_lf0) - self.f0_distortion += temp_f0_distortion - self.vuv_error += temp_vuv_error - - self.total_frame_number += ref_frame_number - total_voiced_frame_number += voiced_frame_number - - self.distortion /= float(self.total_frame_number) - self.bap_distortion /= float(self.total_frame_number) - - self.f0_distortion /= total_voiced_frame_number - self.f0_distortion = numpy.sqrt(self.f0_distortion) - - self.vuv_error /= float(self.total_frame_number) - - return self.distortion, self.bap_distortion, self.f0_distortion, self.vuv_error - - def compute_f0_mse(self, ref_data, gen_data): - ref_vuv_vector = numpy.zeros((ref_data.size, 1)) - gen_vuv_vector = numpy.zeros((ref_data.size, 1)) - - ref_vuv_vector[ref_data > 0.0] = 1.0 - gen_vuv_vector[gen_data > 0.0] = 1.0 - - sum_ref_gen_vector = ref_vuv_vector + gen_vuv_vector - voiced_ref_data = ref_data[sum_ref_gen_vector == 2.0] - voiced_gen_data = gen_data[sum_ref_gen_vector == 2.0] - voiced_frame_number = voiced_gen_data.size - - f0_mse = numpy.sum( - ((numpy.exp(voiced_ref_data) - numpy.exp(voiced_gen_data)) ** 2)) -# f0_mse = numpy.sum((((voiced_ref_data) - (voiced_gen_data)) ** 2)) - - vuv_error_vector = sum_ref_gen_vector[sum_ref_gen_vector == 0.0] - vuv_error = numpy.sum(sum_ref_gen_vector[sum_ref_gen_vector == 1.0]) - - return f0_mse, vuv_error, voiced_frame_number - - def compute_mse(self, ref_data, gen_data): - diff = (ref_data - gen_data) ** 2 - sum_diff = numpy.sum(diff, axis=1) - sum_diff = numpy.sqrt(sum_diff) # ** 0.5 - sum_diff = numpy.sum(sum_diff, axis=0) - - return sum_diff - - def load_binary_file(self, file_name, dimension): - fid_lab = open(file_name, 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - frame_number = features.size / dimension - features = features[:(dimension * frame_number)] - features = features.reshape((-1, dimension)) - - return features, frame_number - - -''' -to be refined. genertic class for various features -''' - - -class IndividualDistortionComp(object): - - def __init__(self): - self.logger = logging.getLogger('computer_distortion') - - def compute_distortion(self, file_id_list, reference_dir, generation_dir, file_ext, feature_dim): - total_voiced_frame_number = 0 - - distortion = 0.0 - vuv_error = 0 - total_frame_number = 0 - - io_funcs = BinaryIOCollection() - - ref_all_files_data = numpy.reshape(numpy.array([]), (-1, 1)) - gen_all_files_data = numpy.reshape(numpy.array([]), (-1, 1)) - for file_id in file_id_list: - ref_file_name = reference_dir + '/' + file_id + file_ext - gen_file_name = generation_dir + '/' + file_id + file_ext - - ref_data, ref_frame_number = io_funcs.load_binary_file_frame( - ref_file_name, feature_dim) - gen_data, gen_frame_number = io_funcs.load_binary_file_frame( - gen_file_name, feature_dim) - - # accept the difference upto two frames - if abs(ref_frame_number - gen_frame_number) <= 2: - ref_frame_number = min(ref_frame_number, gen_frame_number) - gen_frame_number = min(ref_frame_number, gen_frame_number) - ref_data = ref_data[0:ref_frame_number, ] - gen_data = gen_data[0:gen_frame_number, ] - - if ref_frame_number != gen_frame_number: - self.logger.critical("The number of frames is not the same: %d vs %d (%s). Error in compute_distortion.py\n." % ( - ref_frame_number, gen_frame_number, file_id)) - raise - - if file_ext == '.lf0': - ref_all_files_data = numpy.concatenate( - (ref_all_files_data, ref_data), axis=0) - gen_all_files_data = numpy.concatenate( - (gen_all_files_data, gen_data), axis=0) - temp_distortion, temp_vuv_error, voiced_frame_number = self.compute_f0_mse( - ref_data, gen_data) - vuv_error += temp_vuv_error - total_voiced_frame_number += voiced_frame_number - elif file_ext == '.dur': - ref_data = numpy.reshape(numpy.sum(ref_data, axis=1), (-1, 1)) - gen_data = numpy.reshape(numpy.sum(gen_data, axis=1), (-1, 1)) - ref_all_files_data = numpy.concatenate( - (ref_all_files_data, ref_data), axis=0) - gen_all_files_data = numpy.concatenate( - (gen_all_files_data, gen_data), axis=0) - continue - elif file_ext == '.mgc': - temp_distortion = self.compute_mse( - ref_data[:, 1:feature_dim], gen_data[:, 1:feature_dim]) - else: - temp_distortion = self.compute_mse(ref_data, gen_data) - - distortion += temp_distortion - - total_frame_number += ref_frame_number - - if file_ext == '.dur': - dur_rmse = self.compute_rmse( - ref_all_files_data, gen_all_files_data) - dur_corr = self.compute_corr( - ref_all_files_data, gen_all_files_data) - - return dur_rmse, dur_corr - elif file_ext == '.lf0': - distortion /= float(total_voiced_frame_number) - vuv_error /= float(total_frame_number) - - distortion = numpy.sqrt(distortion) - f0_corr = self.compute_f0_corr( - ref_all_files_data, gen_all_files_data) - - return distortion, f0_corr, vuv_error - else: - distortion /= float(total_frame_number) - - return distortion - - def compute_f0_mse(self, ref_data, gen_data): - ref_vuv_vector = numpy.zeros((ref_data.size, 1)) - gen_vuv_vector = numpy.zeros((ref_data.size, 1)) - - ref_vuv_vector[ref_data > 0.0] = 1.0 - gen_vuv_vector[gen_data > 0.0] = 1.0 - - sum_ref_gen_vector = ref_vuv_vector + gen_vuv_vector - voiced_ref_data = ref_data[sum_ref_gen_vector == 2.0] - voiced_gen_data = gen_data[sum_ref_gen_vector == 2.0] - voiced_frame_number = voiced_gen_data.size - - f0_mse = (numpy.exp(voiced_ref_data) - numpy.exp(voiced_gen_data)) ** 2 - f0_mse = numpy.sum((f0_mse)) - - vuv_error_vector = sum_ref_gen_vector[sum_ref_gen_vector == 0.0] - vuv_error = numpy.sum(sum_ref_gen_vector[sum_ref_gen_vector == 1.0]) - - return f0_mse, vuv_error, voiced_frame_number - - def compute_f0_corr(self, ref_data, gen_data): - ref_vuv_vector = numpy.zeros((ref_data.size, 1)) - gen_vuv_vector = numpy.zeros((ref_data.size, 1)) - - ref_vuv_vector[ref_data > 0.0] = 1.0 - gen_vuv_vector[gen_data > 0.0] = 1.0 - - sum_ref_gen_vector = ref_vuv_vector + gen_vuv_vector - voiced_ref_data = ref_data[sum_ref_gen_vector == 2.0] - voiced_gen_data = gen_data[sum_ref_gen_vector == 2.0] - f0_corr = self.compute_corr( - numpy.exp(voiced_ref_data), numpy.exp(voiced_gen_data)) - - return f0_corr - - def compute_corr(self, ref_data, gen_data): - corr_coef = pearsonr(ref_data, gen_data) - - return corr_coef[0] - - def compute_rmse(self, ref_data, gen_data): - diff = (ref_data - gen_data) ** 2 - total_frame_number = ref_data.size - sum_diff = numpy.sum(diff) - rmse = numpy.sqrt(sum_diff/total_frame_number) - - return rmse - - def compute_mse(self, ref_data, gen_data): - diff = (ref_data - gen_data) ** 2 - sum_diff = numpy.sum(diff, axis=1) - sum_diff = numpy.sqrt(sum_diff) # ** 0.5 - sum_diff = numpy.sum(sum_diff, axis=0) - - return sum_diff -# -*- coding: utf-8 -*- -# -# Copyright 2016 Google Inc. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the 'License'); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an 'AS IS' BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -"""Class to handle file paths. -""" -__author__ = 'pasindu@google.com (Pasindu De Silva)' - -import os -from .utils import prepare_file_path_list -from .utils import read_file_list - - -class FilePaths(object): - _NORM_INFO_FILE_NAME = 'norm_info_%s_%d_%s.dat' - nn_cmp_dir = '' - nn_cmp_norm_dir = '' - model_dir = '' - gen_dir = '' - inter_data_dir = '' - norm_info_file = '' - var_dir = '' - file_id_list = [] - test_id_list = [] - binary_label_dir = '' - nn_label_dir = '' - nn_label_norm_dir = '' - bottleneck_features = '' - binary_label_file_list = [] - nn_label_file_list = [] - nn_label_norm_file_list = [] - in_label_align_file_list = [] - dur_file_list = [] - seq_dur_file_list = [] - nn_cmp_norm_file_list = [] - - def __init__(self, cfg): - self.cfg = cfg - - self.inter_data_dir = cfg.inter_data_dir - if not os.path.exists(self.inter_data_dir): - os.makedirs(self.inter_data_dir) - - self.nn_cmp_dir = os.path.join( - self.inter_data_dir, - 'nn' + self.cfg.combined_feature_name + '_' + str(self.cfg.cmp_dim)) - self.nn_cmp_norm_dir = os.path.join( - self.inter_data_dir, 'nn_norm' + self.cfg.combined_feature_name + '_' + - str(self.cfg.cmp_dim)) - self.model_dir = os.path.join(self.cfg.work_dir, 'nnets_model') - self.gen_dir = os.path.join(self.cfg.work_dir, 'gen') - self.file_id_list = read_file_list(self.cfg.file_id_scp) - self.bottleneck_features = os.path.join( - self.gen_dir, 'bottleneck_features') - - if self.cfg.GenTestList: - self.test_id_list = read_file_list(cfg.test_id_scp) - - self.norm_info_file = os.path.join(self.inter_data_dir, - self._NORM_INFO_FILE_NAME % - (cfg.combined_feature_name, cfg.cmp_dim, - cfg.output_feature_normalisation)) - - # save acoustic normalisation information for normalising the features back - self.var_dir = os.path.join(self.inter_data_dir, 'var') - if not os.path.exists(self.var_dir): - os.makedirs(self.var_dir) - - if self.cfg.MAKEDUR: - self.dur_file_list = prepare_file_path_list( - self.file_id_list, self.cfg.in_dur_dir, self.cfg.dur_ext) - - if self.cfg.network_type == "S2S": - self.seq_dur_file_list = prepare_file_path_list( - self.file_id_list, self.cfg.in_seq_dur_dir, self.cfg.dur_ext) - - self.nn_cmp_norm_file_list = prepare_file_path_list( - self.file_id_list, self.nn_cmp_norm_dir, self.cfg.cmp_ext) - - def get_nnets_file_name(self): - return '%s/%s.model' % (self.model_dir, self.cfg.model_file_name) - - def get_temp_nn_dir_name(self): - return self.cfg.model_file_name - - def get_var_dic(self): - var_file_dict = {} - for feature_name in list(self.cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = self._get_var_file_name(feature_name) - return var_file_dict - - def get_train_list_x_y(self): - start = 0 - end = self.cfg.train_file_number - return self.nn_label_norm_file_list[start:end], self.nn_cmp_norm_file_list[ - start:end] - - def get_valid_list_x_y(self): - start = self.cfg.train_file_number - end = self.cfg.train_file_number + self.cfg.valid_file_number - return self.nn_label_norm_file_list[start:end], self.nn_cmp_norm_file_list[ - start:end] - - def get_test_list_x_y(self): - start = self.cfg.train_file_number + self.cfg.valid_file_number - end = self.cfg.train_file_number + \ - self.cfg.valid_file_number + self.cfg.test_file_number - return self.nn_label_norm_file_list[start:end], self.nn_cmp_norm_file_list[ - start:end] - - def _get_var_file_name(self, feature_name): - return os.path.join( - self.var_dir, - feature_name + '_' + str(self.cfg.out_dimension_dict[feature_name])) - - def set_label_dir(self, dimension, suffix, lab_dim): - self.binary_label_dir = os.path.join(self.inter_data_dir, - 'binary_label_' + str(dimension)) - self.nn_label_dir = os.path.join(self.inter_data_dir, - 'nn_no_silence_lab_' + suffix) - self.nn_label_norm_dir = os.path.join(self.inter_data_dir, - 'nn_no_silence_lab_norm_' + suffix) - - label_norm_file = 'label_norm_%s_%d.dat' % ( - self.cfg.label_style, lab_dim) - self.label_norm_file = os.path.join( - self.inter_data_dir, label_norm_file) - - out_feat_dir = os.path.join( - self.inter_data_dir, 'binary_label_' + suffix) - self.out_feat_file_list = prepare_file_path_list( - self.file_id_list, out_feat_dir, self.cfg.lab_ext) - - def get_nn_cmp_file_list(self): - return prepare_file_path_list(self.file_id_list, self.nn_cmp_dir, - self.cfg.cmp_ext) - - def get_nn_cmp_norm_file_list(self): - return self.nn_cmp_norm_file_list - - def get_lf0_file_list(self): - return prepare_file_path_list(self.file_id_list, self.cfg.in_lf0_dir, - self.cfg.lf0_ext) - - def set_label_file_list(self): - if self.cfg.GenTestList: - self.in_label_align_file_list = prepare_file_path_list( - self.test_id_list, self.cfg.in_label_align_dir, self.cfg.lab_ext, - False) - else: - self.in_label_align_file_list = prepare_file_path_list( - self.file_id_list, self.cfg.in_label_align_dir, self.cfg.lab_ext, - False) - - if self.cfg.GenTestList and self.cfg.test_synth_dir != 'None' and not self.cfg.VoiceConversion: - test_binary_file_list = self._prepare_test_binary_label_file_path_list( - self.cfg.test_synth_dir) - test_file_list = self._prepare_test_label_file_path_list( - self.cfg.test_synth_dir) - self.binary_label_file_list = test_binary_file_list - self.nn_label_file_list = test_file_list - self.nn_label_norm_file_list = test_file_list - elif self.cfg.GenTestList: - self.binary_label_file_list = self._prepare_test_label_file_path_list( - self.binary_label_dir) - self.nn_label_file_list = self._prepare_test_label_file_path_list( - self.nn_label_dir) - self.nn_label_norm_file_list = self._prepare_test_label_file_path_list( - self.nn_label_norm_dir) - else: - self.binary_label_file_list = self._prepare_file_label_file_path_list( - self.binary_label_dir) - self.nn_label_file_list = self._prepare_file_label_file_path_list( - self.nn_label_dir) - self.nn_label_norm_file_list = self._prepare_file_label_file_path_list( - self.nn_label_norm_dir) - - def _prepare_file_label_file_path_list(self, list_dir): - return prepare_file_path_list(self.file_id_list, list_dir, self.cfg.lab_ext) - - def _prepare_test_label_file_path_list(self, list_dir): - return prepare_file_path_list(self.test_id_list, list_dir, self.cfg.lab_ext) - - def _prepare_test_binary_label_file_path_list(self, list_dir): - return prepare_file_path_list(self.test_id_list, list_dir, self.cfg.lab_ext+'bin') -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -# /usr/bin/python -u - -''' -This script assumes c-version STRAIGHT which is not available to public. Please use your -own vocoder to replace this script. -''' -import sys -import os -import subprocess -import glob -import subprocess -#from utils import GlobalCfg - -from io_funcs.binary_io import BinaryIOCollection -import numpy as np - -import logging - -#import configuration - -# cannot have these outside a function - if you do that, they get executed as soon -# as this file is imported, but that can happen before the configuration is set up properly -# SPTK = cfg.SPTK -# NND = cfg.NND -# STRAIGHT = cfg.STRAIGHT - - -def run_process(args, log=True): - - logger = logging.getLogger("subprocess") - - # a convenience function instead of calling subprocess directly - # this is so that we can do some logging and catch exceptions - - # we don't always want debug logging, even when logging level is DEBUG - # especially if calling a lot of external functions - # so we can disable it by force, where necessary - if log: - logger.debug('%s' % args) - - try: - # the following is only available in later versions of Python - # rval = subprocess.check_output(args) - - # bufsize=-1 enables buffering and may improve performance compared to the unbuffered case - p = subprocess.Popen(args, bufsize=-1, shell=True, - stdin=subprocess.PIPE, stdout=subprocess.PIPE, stderr=subprocess.PIPE, - close_fds=True, env=os.environ) - # better to use communicate() than read() and write() - this avoids deadlocks - (stdoutdata, stderrdata) = p.communicate() - - if p.returncode != 0: - # for critical things, we always log, even if log==False - logger.critical('exit status %d' % p.returncode) - logger.critical(' for command: %s' % args) - logger.critical(' stderr: %s' % stderrdata) - logger.critical(' stdout: %s' % stdoutdata) - raise OSError - - return (stdoutdata, stderrdata) - - except subprocess.CalledProcessError as e: - # not sure under what circumstances this exception would be raised in Python 2.6 - logger.critical('exit status %d' % e.returncode) - logger.critical(' for command: %s' % args) - # not sure if there is an 'output' attribute under 2.6 ? still need to test this... - logger.critical(' output: %s' % e.output) - raise - - except ValueError: - logger.critical('ValueError for %s' % args) - raise - - except OSError: - logger.critical('OSError for %s' % args) - raise - - except KeyboardInterrupt: - logger.critical('KeyboardInterrupt during %s' % args) - try: - # try to kill the subprocess, if it exists - p.kill() - except UnboundLocalError: - # this means that p was undefined at the moment of the keyboard interrupt - # (and we do nothing) - pass - - raise KeyboardInterrupt - - -def bark_alpha(sr): - return 0.8517*np.sqrt(np.arctan(0.06583*sr/1000.0))-0.1916 - - -def erb_alpha(sr): - return 0.5941*np.sqrt(np.arctan(0.1418*sr/1000.0))+0.03237 - - -def post_filter(mgc_file_in, mgc_file_out, mgc_dim, pf_coef, fw_coef, co_coef, fl_coef, gen_dir, cfg): - - SPTK = cfg.SPTK - - line = "echo 1 1 " - for i in range(2, mgc_dim): - line = line + str(pf_coef) + " " - - run_process('{line} | {x2x} +af > {weight}' - .format(line=line, x2x=SPTK['X2X'], weight=os.path.join(gen_dir, 'weight'))) - - run_process('{freqt} -m {order} -a {fw} -M {co} -A 0 < {mgc} | {c2acr} -m {co} -M 0 -l {fl} > {base_r0}' - .format(freqt=SPTK['FREQT'], order=mgc_dim-1, fw=fw_coef, co=co_coef, mgc=mgc_file_in, c2acr=SPTK['C2ACR'], fl=fl_coef, base_r0=mgc_file_in+'_r0')) - - run_process('{vopr} -m -n {order} < {mgc} {weight} | {freqt} -m {order} -a {fw} -M {co} -A 0 | {c2acr} -m {co} -M 0 -l {fl} > {base_p_r0}' - .format(vopr=SPTK['VOPR'], order=mgc_dim-1, mgc=mgc_file_in, weight=os.path.join(gen_dir, 'weight'), - freqt=SPTK['FREQT'], fw=fw_coef, co=co_coef, - c2acr=SPTK['C2ACR'], fl=fl_coef, base_p_r0=mgc_file_in+'_p_r0')) - - run_process('{vopr} -m -n {order} < {mgc} {weight} | {mc2b} -m {order} -a {fw} | {bcp} -n {order} -s 0 -e 0 > {base_b0}' - .format(vopr=SPTK['VOPR'], order=mgc_dim-1, mgc=mgc_file_in, weight=os.path.join(gen_dir, 'weight'), - mc2b=SPTK['MC2B'], fw=fw_coef, - bcp=SPTK['BCP'], base_b0=mgc_file_in+'_b0')) - - run_process('{vopr} -d < {base_r0} {base_p_r0} | {sopr} -LN -d 2 | {vopr} -a {base_b0} > {base_p_b0}' - .format(vopr=SPTK['VOPR'], base_r0=mgc_file_in+'_r0', base_p_r0=mgc_file_in+'_p_r0', - sopr=SPTK['SOPR'], - base_b0=mgc_file_in+'_b0', base_p_b0=mgc_file_in+'_p_b0')) - - run_process('{vopr} -m -n {order} < {mgc} {weight} | {mc2b} -m {order} -a {fw} | {bcp} -n {order} -s 1 -e {order} | {merge} -n {order2} -s 0 -N 0 {base_p_b0} | {b2mc} -m {order} -a {fw} > {base_p_mgc}' - .format(vopr=SPTK['VOPR'], order=mgc_dim-1, mgc=mgc_file_in, weight=os.path.join(gen_dir, 'weight'), - mc2b=SPTK['MC2B'], fw=fw_coef, - bcp=SPTK['BCP'], - merge=SPTK['MERGE'], order2=mgc_dim-2, base_p_b0=mgc_file_in+'_p_b0', - b2mc=SPTK['B2MC'], base_p_mgc=mgc_file_out)) - - return - - -def wavgen_straight_type_vocoder(gen_dir, file_id_list, cfg, logger): - ''' - Waveform generation with STRAIGHT or WORLD vocoders. - (whose acoustic parameters are: mgc, bap, and lf0) - ''' - - SPTK = cfg.SPTK -# NND = cfg.NND - STRAIGHT = cfg.STRAIGHT - WORLD = cfg.WORLD - - # to be moved - pf_coef = cfg.pf_coef - if isinstance(cfg.fw_alpha, str): - if cfg.fw_alpha == 'Bark': - fw_coef = bark_alpha(cfg.sr) - elif cfg.fw_alpha == 'ERB': - fw_coef = bark_alpha(cfg.sr) - else: - raise ValueError('cfg.fw_alpha='+cfg.fw_alpha + - ' not implemented, the frequency warping coefficient "fw_coef" cannot be deduced.') - else: - fw_coef = cfg.fw_alpha - co_coef = cfg.co_coef - fl_coef = cfg.fl - - if cfg.apply_GV: - io_funcs = BinaryIOCollection() - - logger.info('loading global variance stats from %s' % (cfg.GV_dir)) - - ref_gv_mean_file = os.path.join(cfg.GV_dir, 'ref_gv.mean') - gen_gv_mean_file = os.path.join(cfg.GV_dir, 'gen_gv.mean') - ref_gv_std_file = os.path.join(cfg.GV_dir, 'ref_gv.std') - gen_gv_std_file = os.path.join(cfg.GV_dir, 'gen_gv.std') - - ref_gv_mean, frame_number = io_funcs.load_binary_file_frame( - ref_gv_mean_file, 1) - gen_gv_mean, frame_number = io_funcs.load_binary_file_frame( - gen_gv_mean_file, 1) - ref_gv_std, frame_number = io_funcs.load_binary_file_frame( - ref_gv_std_file, 1) - gen_gv_std, frame_number = io_funcs.load_binary_file_frame( - gen_gv_std_file, 1) - - counter = 1 - max_counter = len(file_id_list) - - for filename in file_id_list: - - logger.info('creating waveform for %4d of %4d: %s' % - (counter, max_counter, filename)) - counter = counter+1 - base = filename - files = {'sp': base + cfg.sp_ext, - 'mgc': base + cfg.mgc_ext, - 'f0': base + '.f0', - 'lf0': base + cfg.lf0_ext, - 'ap': base + '.ap', - 'bap': base + cfg.bap_ext, - 'wav': base + '.wav'} - - mgc_file_name = files['mgc'] - bap_file_name = files['bap'] - - cur_dir = os.getcwd() - os.chdir(gen_dir) - - # post-filtering - if cfg.do_post_filtering: - - mgc_file_name = files['mgc']+'_p_mgc' - post_filter(files['mgc'], mgc_file_name, cfg.mgc_dim, - pf_coef, fw_coef, co_coef, fl_coef, gen_dir, cfg) - - if cfg.vocoder_type == "STRAIGHT" and cfg.apply_GV: - gen_mgc, frame_number = io_funcs.load_binary_file_frame( - mgc_file_name, cfg.mgc_dim) - - gen_mu = np.reshape(np.mean(gen_mgc, axis=0), (-1, 1)) - gen_std = np.reshape(np.std(gen_mgc, axis=0), (-1, 1)) - - local_gv = (ref_gv_std/gen_gv_std) * \ - (gen_std - gen_gv_mean) + ref_gv_mean - - enhanced_mgc = np.repeat(local_gv, frame_number, 1).T / np.repeat(gen_std, frame_number, 1).T * ( - gen_mgc - np.repeat(gen_mu, frame_number, 1).T) + np.repeat(gen_mu, frame_number, 1).T - - new_mgc_file_name = files['mgc']+'_p_mgc' - io_funcs.array_to_binary_file(enhanced_mgc, new_mgc_file_name) - - mgc_file_name = files['mgc']+'_p_mgc' - - if cfg.do_post_filtering and cfg.apply_GV: - logger.critical( - 'Both smoothing techniques together can\'t be applied!!\n') - raise - - # mgc to sp to wav - if cfg.vocoder_type == 'STRAIGHT': - run_process('{mgc2sp} -a {alpha} -g 0 -m {order} -l {fl} -o 2 {mgc} > {sp}' - .format(mgc2sp=SPTK['MGC2SP'], alpha=cfg.fw_alpha, order=cfg.mgc_dim-1, fl=cfg.fl, mgc=mgc_file_name, sp=files['sp'])) - run_process('{sopr} -magic -1.0E+10 -EXP -MAGIC 0.0 {lf0} > {f0}'.format( - sopr=SPTK['SOPR'], lf0=files['lf0'], f0=files['f0'])) - run_process( - '{x2x} +fa {f0} > {f0a}'.format(x2x=SPTK['X2X'], f0=files['f0'], f0a=files['f0'] + '.a')) - - if cfg.use_cep_ap: - run_process('{mgc2sp} -a {alpha} -g 0 -m {order} -l {fl} -o 0 {bap} > {ap}' - .format(mgc2sp=SPTK['MGC2SP'], alpha=cfg.fw_alpha, order=cfg.bap_dim-1, fl=cfg.fl, bap=files['bap'], ap=files['ap'])) - else: - run_process('{bndap2ap} {bap} > {ap}' - .format(bndap2ap=STRAIGHT['BNDAP2AP'], bap=files['bap'], ap=files['ap'])) - - run_process('{synfft} -f {sr} -spec -fftl {fl} -shift {shift} -sigp 1.2 -cornf 4000 -float -apfile {ap} {f0a} {sp} {wav}' - .format(synfft=STRAIGHT['SYNTHESIS_FFT'], sr=cfg.sr, fl=cfg.fl, shift=cfg.shift, ap=files['ap'], f0a=files['f0']+'.a', sp=files['sp'], wav=files['wav'])) - - run_process('rm -f {sp} {f0} {f0a} {ap}' - .format(sp=files['sp'], f0=files['f0'], f0a=files['f0']+'.a', ap=files['ap'])) - elif cfg.vocoder_type == 'WORLD': - - run_process('{sopr} -magic -1.0E+10 -EXP -MAGIC 0.0 {lf0} | {x2x} +fd > {f0}'.format( - sopr=SPTK['SOPR'], lf0=files['lf0'], x2x=SPTK['X2X'], f0=files['f0'])) - - run_process('{sopr} -c 0 {bap} | {x2x} +fd > {ap}'.format( - sopr=SPTK['SOPR'], bap=files['bap'], x2x=SPTK['X2X'], ap=files['ap'])) - - # If using world v2, please comment above line and uncomment this - # run_process('{mgc2sp} -a {alpha} -g 0 -m {order} -l {fl} -o 0 {bap} | {sopr} -d 32768.0 -P | {x2x} +fd > {ap}' - # .format(mgc2sp=SPTK['MGC2SP'], alpha=cfg.fw_alpha, order=cfg.bap_dim, fl=cfg.fl, bap=bap_file_name, sopr=SPTK['SOPR'], x2x=SPTK['X2X'], ap=files['ap'])) - - run_process('{mgc2sp} -a {alpha} -g 0 -m {order} -l {fl} -o 2 {mgc} | {sopr} -d 32768.0 -P | {x2x} +fd > {sp}' - .format(mgc2sp=SPTK['MGC2SP'], alpha=cfg.fw_alpha, order=cfg.mgc_dim-1, fl=cfg.fl, mgc=mgc_file_name, sopr=SPTK['SOPR'], x2x=SPTK['X2X'], sp=files['sp'])) - - run_process('{synworld} {fl} {sr} {f0} {sp} {ap} {wav}' - .format(synworld=WORLD['SYNTHESIS'], fl=cfg.fl, sr=cfg.sr, f0=files['f0'], sp=files['sp'], ap=files['ap'], wav=files['wav'])) - - run_process( - 'rm -f {ap} {sp} {f0}'.format(ap=files['ap'], sp=files['sp'], f0=files['f0'])) - - os.chdir(cur_dir) - - -def wavgen_magphase(gen_dir, file_id_list, cfg, logger): - - # Import MagPhase and libraries: - sys.path.append(cfg.magphase_bindir) - import libutils as lu - import libaudio as la - import magphase as mp - - nfiles = len(file_id_list) - for nxf in xrange(nfiles): - filename_token = file_id_list[nxf] - logger.info('Creating waveform for %4d of %4d: %s' % - (nxf+1, nfiles, filename_token)) - - for pf_type in cfg.magphase_pf_type: - gen_wav_dir = os.path.join(gen_dir + '_wav_pf_' + pf_type) - lu.mkdir(gen_wav_dir) - mp.synthesis_from_acoustic_modelling(gen_dir, filename_token, gen_wav_dir, cfg.mag_dim, cfg.real_dim, - cfg.sr, pf_type=pf_type, b_const_rate=cfg.magphase_const_rate) - - return - - -def generate_wav(gen_dir, file_id_list, cfg): - - logger = logging.getLogger("wav_generation") - - # STRAIGHT or WORLD vocoders: - if (cfg.vocoder_type == 'STRAIGHT') or (cfg.vocoder_type == 'WORLD'): - wavgen_straight_type_vocoder(gen_dir, file_id_list, cfg, logger) - - # MagPhase Vocoder: - elif cfg.vocoder_type == 'MAGPHASE': - wavgen_magphase(gen_dir, file_id_list, cfg, logger) - - # Add your favorite vocoder here. - - # If vocoder is not supported: - else: - logger.critical('The vocoder %s is not supported yet!\n' % - cfg.vocoder_type) - raise - - return -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import numpy - - -class LearningRate(object): - - def __init__(self): - '''constructor''' - - def get_rate(self): - pass - - def get_next_rate(self, current_error): - pass - - -class LearningRateConstant(LearningRate): - - def __init__(self, learning_rate=0.08, epoch_num=20): - - self.learning_rate = learning_rate - self.epoch = 1 - self.epoch_num = epoch_num - self.rate = learning_rate - - def get_rate(self): - return self.rate - - def get_next_rate(self, current_error): - - if (self.epoch >= self.epoch_num): - self.rate = 0.0 - else: - self.rate = self.learning_rate - self.epoch += 1 - - return self.rate - - -class LearningRateExpDecay(LearningRate): - - def __init__(self, start_rate=0.08, scale_by=0.5, - min_derror_decay_start=0.05, min_derror_stop=0.05, init_error=100, - decay=False, min_epoch_decay_start=15, zero_rate=0.0): - - self.start_rate = start_rate - self.init_error = init_error - - self.rate = start_rate - self.scale_by = scale_by - self.min_derror_decay_start = min_derror_decay_start - self.min_derror_stop = min_derror_stop - self.lowest_error = init_error - - self.epoch = 1 - self.decay = decay - self.zero_rate = zero_rate - - self.min_epoch_decay_start = min_epoch_decay_start - - def get_rate(self): - return self.rate - - def get_next_rate(self, current_error): - diff_error = 0.0 - diff_error = self.lowest_error - current_error - - if (current_error < self.lowest_error): - self.lowest_error = current_error - - if (self.decay): - if (diff_error < self.min_derror_stop): - self.rate = 0.0 - else: - self.rate *= self.scale_by - else: - if ((diff_error < self.min_derror_decay_start) and (self.epoch > self.min_epoch_decay_start)): - self.decay = True - self.rate *= self.scale_by - - self.epoch += 1 - return self.rate - - -class LearningMinLrate(LearningRate): - - def __init__(self, start_rate=0.08, scale_by=0.5, - min_lrate_stop=0.0002, init_error=100, - decay=False, min_epoch_decay_start=15): - - self.start_rate = start_rate - self.init_error = init_error - - self.rate = start_rate - self.scale_by = scale_by - self.max_epochs = max_epochs - self.min_lrate_stop = min_lrate_stop - self.lowest_error = init_error - - self.epoch = 1 - self.decay = decay - self.min_epoch_decay_start = min_epoch_decay_start - - def get_rate(self): - return self.rate - - def get_next_rate(self, current_error): - diff_error = 0.0 - - diff_error = self.lowest_error - current_error - - if (current_error < self.lowest_error): - self.lowest_error = current_error - - if (self.decay): - if (self.rate < self.min_lrate_stop): - self.rate = 0.0 - else: - self.rate *= self.scale_by - else: - if (self.epoch >= self.min_epoch_decay_start): - self.decay = True - self.rate *= self.scale_by - - self.epoch += 1 - return self.rate - - -class ExpDecreaseLearningRate(object): - def __init__(self, start_rate=0.02, end_rate=0.001, maximum_epoch=5): - self.start_rate = start_rate - self.end_rate = end_rate - self.maximum_epoch = maximum_epoch - - self.rate_diff = self.start_rate - self.end_rate - - self.decrease_ratio = numpy.zeros((1, maximum_epoch+1)) - for i in range(maximum_epoch): - self.decrease_ratio[0, i+1] = maximum_epoch - i - - self.decrease_ratio = numpy.exp(self.decrease_ratio) - self.decrease_ratio /= numpy.sum(self.decrease_ratio) - - self.decrease_ratio[0, 0] = 1.0 - - def get_rate(self, epoch): - - if epoch < 0: - epoch = 0 - - current_rate = self.end_rate - if epoch <= self.maximum_epoch: - current_rate = self.end_rate + \ - self.decrease_ratio[0, epoch] * self.rate_diff - - return float(current_rate) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import os -import sys -import numpy -import theano -import random -from io_funcs.binary_io import BinaryIOCollection -import logging -from frontend.label_normalisation import HTSLabelNormalisation - - -class ListDataProvider(object): - """ This class provides an interface to load data into CPU/GPU memory utterance by utterance or block by block. - - In speech synthesis, usually we are not able to load all the training data/evaluation data into RAMs, we will do the following three steps: - - - Step 1: a data provide will load part of the data into a buffer - - - Step 2: training a DNN by using the data from the buffer - - - Step 3: Iterate step 1 and 2 until all the data are used for DNN training. Until now, one epoch of DNN training is finished. - - The utterance-by-utterance data loading will be useful when sequential training is used, while block-by-block loading will be used when the order of frames is not important. - - This provide assumes binary format with float32 precision without any header (e.g. HTK header). - - """ - - def __init__(self, x_file_list, y_file_list, dur_file_list=None, n_ins=0, n_outs=0, buffer_size=500000, sequential=False, network_type=None, shuffle=False): - """Initialise a data provider - - :param x_file_list: list of file names for the input files to DNN - :type x_file_list: python list - :param y_file_list: list of files for the output files to DNN - :param n_ins: the dimensionality for input feature - :param n_outs: the dimensionality for output features - :param buffer_size: the size of the buffer, indicating the number of frames in the buffer. The value depends on the memory size of RAM/GPU. - :param shuffle: True/False. To indicate whether the file list will be shuffled. When loading data block by block, the data in the buffer will be shuffle no matter this value is True or False. - """ - - self.logger = logging.getLogger("ListDataProvider") - - self.n_ins = n_ins - self.n_outs = n_outs - - self.buffer_size = buffer_size - - self.sequential = sequential - self.network_type = network_type - - self.rnn_batch_training = False - self.reshape_io = False - - # remove potential empty lines and end of line signs - - try: - assert len(x_file_list) > 0 - except AssertionError: - self.logger.critical('first list is empty') - raise - - try: - assert len(y_file_list) > 0 - except AssertionError: - self.logger.critical('second list is empty') - raise - - try: - assert len(x_file_list) == len(y_file_list) - except AssertionError: - self.logger.critical('two lists are of differing lengths: %d versus %d', len( - x_file_list), len(y_file_list)) - raise - - if dur_file_list: - try: - assert len(x_file_list) == len(dur_file_list) - except AssertionError: - self.logger.critical('two lists are of differing lengths: %d versus %d', len( - x_file_list), len(y_file_list)) - raise - - self.x_files_list = x_file_list - self.y_files_list = y_file_list - self.dur_files_list = dur_file_list - - self.logger.debug('first list of items from ...%s to ...%s' % ( - self.x_files_list[0].rjust(20)[-20:], self.x_files_list[-1].rjust(20)[-20:])) - self.logger.debug('second list of items from ...%s to ...%s' % ( - self.y_files_list[0].rjust(20)[-20:], self.y_files_list[-1].rjust(20)[-20:])) - - if shuffle: - random.seed(271638) - random.shuffle(self.x_files_list) - random.seed(271638) - random.shuffle(self.y_files_list) - if self.dur_files_list: - random.seed(271638) - random.shuffle(self.dur_files_list) - - self.file_index = 0 - self.list_size = len(self.x_files_list) - - self.remain_data_x = numpy.empty((0, self.n_ins)) - self.remain_data_y = numpy.empty((0, self.n_outs)) - self.remain_frame_number = 0 - - self.end_reading = False - - self.logger.debug('initialised') - - def __iter__(self): - return self - - def reset(self): - """When all the files in the file list have been used for DNN training, reset the data provider to start a new epoch. - - """ - self.file_index = 0 - self.end_reading = False - - self.remain_frame_number = 0 - - self.bucket_index = 0 - self.bucket_file_index = 0 - self.current_bucket_size = 0 - - self.logger.debug('reset') - - def make_shared(self, data_set, data_name): - """To make data shared for theano implementation. If you want to know why we make it shared, please refer the theano documentation: http://deeplearning.net/software/theano/library/compile/shared.html - - :param data_set: normal data in CPU memory - :param data_name: indicate the name of the data (e.g., 'x', 'y', etc) - :returns: shared dataset -- data_set - """ - data_set = theano.shared(numpy.asarray( - data_set, dtype=theano.config.floatX), name=data_name, borrow=True) - - return data_set - - def set_rnn_params(self, training_algo=1, batch_size=25, seq_length=200, merge_size=1, bucket_range=100): - # get file lengths - self.get_file_lengths() - - # set training algo - self.training_algo = training_algo - - # set batch size - self.batch_size = batch_size - - # set RNN batch training True - self.rnn_batch_training = True - - # set params for each training algo - if(self.training_algo == 1): - self.merge_size = 1 - elif(self.training_algo == 2): - self.merge_size = 1 - self.bucket_index = 0 - self.bucket_file_index = 0 - self.current_bucket_size = 0 - self.bucket_range = bucket_range - self.x_frame_list = numpy.array( - list(self.file_length_dict['framenum2utt'].keys())) - self.list_of_buckets = list( - range(min(self.x_frame_list), max(self.x_frame_list)+1, self.bucket_range)) - elif(self.training_algo == 3): - self.seq_length = seq_length - self.merge_size = merge_size - else: - self.logger.critical( - "Choose training algorithm for batch training with RNNs:") - self.logger.critical( - "1. Padding model -- pad utterances with zeros to maximum sequence length") - self.logger.critical( - "2. Bucket model -- form buckets with minimum and maximum sequence length") - self.logger.critical( - "3. Split model -- split utterances to a fixed sequence length") - sys.exit(1) - - def reshape_input_output(self): - self.reshape_io = True - - def get_file_lengths(self): - io_funcs = BinaryIOCollection() - - self.file_length_dict = {'framenum2utt': {}, - 'utt2framenum': {}, 'utt2index': {}} - - ### read file by file ### - while True: - if self.file_index >= self.list_size: - self.end_reading = True - self.file_index = 0 - break - - in_features, lab_frame_number = io_funcs.load_binary_file_frame( - self.x_files_list[self.file_index], self.n_ins) - out_features, out_frame_number = io_funcs.load_binary_file_frame( - self.y_files_list[self.file_index], self.n_outs) - - base_file_name = os.path.basename( - self.x_files_list[self.file_index]).split('.')[0] - # we allow small difference here. may not be correct, but sometimes, there is one/two frames difference - if abs(lab_frame_number - out_frame_number) < 5: - frame_number = min(lab_frame_number, out_frame_number) - else: - self.logger.critical("the number of frames in label and acoustic features are different: %d vs %d (%s)" % ( - lab_frame_number, out_frame_number, base_file_name)) - raise - - if frame_number not in self.file_length_dict['framenum2utt']: - self.file_length_dict['framenum2utt'][frame_number] = [ - base_file_name] - else: - self.file_length_dict['framenum2utt'][frame_number].append( - base_file_name) - - self.file_length_dict['utt2framenum'][base_file_name] = frame_number - self.file_length_dict['utt2index'][base_file_name] = self.file_index - self.file_index += 1 - - self.reset() - - def set_seq_length_from_current_batch(self): - temp_list = [] - for indx in range(self.batch_size): - if self.file_index+indx >= self.list_size: - break - base_file_name = os.path.basename( - self.x_files_list[self.file_index+indx]).split('.')[0] - temp_list.append( - self.file_length_dict['utt2framenum'][base_file_name]) - - self.seq_length = max(temp_list) - - def get_next_bucket(self): - min_seq_length = self.list_of_buckets[self.bucket_index] - max_seq_length = self.list_of_buckets[self.bucket_index] + \ - self.bucket_range - - current_bucket = self.x_frame_list[(self.x_frame_list >= min_seq_length) & ( - self.x_frame_list < max_seq_length)] - self.current_bucket_list = sum( - [self.file_length_dict['framenum2utt'][framenum] for framenum in current_bucket], []) - - self.bucket_file_index = 0 - self.current_bucket_size = len(self.current_bucket_list) - - self.seq_length = max_seq_length - self.bucket_index = self.bucket_index + 1 - - def set_s2s_division(self, linguistic_feats_file=None, frame_length=4): - self.MLU_div = {} - in_f = open(linguistic_feats_file, 'r') - for newline in in_f.readlines(): - temp_list = newline.strip().split() - unit = temp_list[0] - feat1 = temp_list[1][1:-1].split('-') - feat2 = temp_list[2][1:-1].split('-') - - self.MLU_div[unit] = [int(feat1[0]), int( - feat1[1]), int(feat2[0]), int(feat2[1])] - - syl_length = (self.MLU_div['syl'][1] - self.MLU_div['syl'] - [0]) + (self.MLU_div['syl'][3] - self.MLU_div['syl'][2]) - phone_length = (self.MLU_div['phone'][1] - self.MLU_div['phone'][0]) + ( - self.MLU_div['phone'][3] - self.MLU_div['phone'][2]) - self.MLU_div['length'] = [0, syl_length, syl_length + - phone_length, syl_length+phone_length+frame_length] - - return self.MLU_div - - def load_one_partition(self): - if self.sequential == True: - if not self.network_type or self.network_type == "RNN": - if self.rnn_batch_training: - shared_set_xy, temp_set_x, temp_set_y = self.load_next_batch() - else: - shared_set_xy, temp_set_x, temp_set_y = self.load_next_utterance() - elif self.network_type == "CTC": - shared_set_xy, temp_set_x, temp_set_y = self.load_next_utterance_CTC() - elif self.network_type == "S2S": - shared_set_xyd, temp_set_x, temp_set_y, temp_set_d, temp_set_af = self.load_next_utterance_S2SML() - return shared_set_xyd, temp_set_x, temp_set_y, temp_set_d, temp_set_af - else: - logger.critical("Unknown network type: %s \n Please use one of the following: DNN, RNN, S2S, CTC\n" % ( - self.network_type)) - sys.exit(1) - else: - shared_set_xy, temp_set_x, temp_set_y = self.load_next_partition() - - return shared_set_xy, temp_set_x, temp_set_y - - def load_next_batch(self): - io_funcs = BinaryIOCollection() - - # set sequence length for batch training - if(self.training_algo == 1): - # set seq length to maximum seq length from current batch - self.set_seq_length_from_current_batch() - elif(self.training_algo == 2): - # set seq length to maximum seq length from current bucket - while not self.current_bucket_size: - self.get_next_bucket() - elif(self.training_algo == 3): - # seq length is set based on default/user configuration - pass - - temp_set_x = numpy.zeros((self.buffer_size, self.n_ins)) - temp_set_y = numpy.zeros((self.buffer_size, self.n_outs)) - - ### read file by file ### - current_index = 0 - while True: - if current_index >= self.buffer_size: - print('buffer size reached by file index %d' % - (self.file_index)) - break - - if self.training_algo == 2: - # choose utterance from current bucket list - base_file_name = self.current_bucket_list[self.bucket_file_index] - self.utt_index = self.file_length_dict['utt2index'][base_file_name] - else: - # choose utterance randomly from current file list - #self.utt_index = numpy.random.randint(self.list_size) - # choose utterance in serial order - self.utt_index = self.file_index - base_file_name = os.path.basename( - self.x_files_list[self.utt_index]).split('.')[0] - - in_features, lab_frame_number = io_funcs.load_binary_file_frame( - self.x_files_list[self.utt_index], self.n_ins) - out_features, out_frame_number = io_funcs.load_binary_file_frame( - self.y_files_list[self.utt_index], self.n_outs) - - frame_number = self.file_length_dict['utt2framenum'][base_file_name] - - temp_set_x[current_index:current_index + - frame_number, ] = in_features - temp_set_y[current_index:current_index + - frame_number, ] = out_features - current_index += frame_number - - if((self.file_index+1) % self.merge_size == 0): - num_of_samples = int(numpy.ceil( - float(current_index)/float(self.seq_length))) - current_index = self.seq_length * num_of_samples - - self.file_index += 1 - - # break for any of the below conditions - if self.training_algo == 2: - self.bucket_file_index += 1 - if(self.bucket_file_index >= self.current_bucket_size): - self.current_bucket_size = 0 - break - if(self.bucket_file_index % self.batch_size == 0): - break - else: - if(self.file_index % self.batch_size == 0) or (self.file_index >= self.list_size): - break - - if self.file_index >= self.list_size: - self.end_reading = True - self.file_index = 0 - - num_of_samples = int(numpy.ceil( - float(current_index)/float(self.seq_length))) - - temp_set_x = temp_set_x[0: num_of_samples*self.seq_length, ] - temp_set_y = temp_set_y[0: num_of_samples*self.seq_length, ] - - temp_set_x = temp_set_x.reshape( - num_of_samples, self.seq_length, self.n_ins) - temp_set_y = temp_set_y.reshape( - num_of_samples, self.seq_length, self.n_outs) - - shared_set_x = self.make_shared(temp_set_x, 'x') - shared_set_y = self.make_shared(temp_set_y, 'y') - - shared_set_xy = (shared_set_x, shared_set_y) - - return shared_set_xy, temp_set_x, temp_set_y - - def load_next_utterance(self): - """Load the data for one utterance. This function will be called when utterance-by-utterance loading is required (e.g., sequential training). - - """ - - temp_set_x = numpy.empty((self.buffer_size, self.n_ins)) - temp_set_y = numpy.empty((self.buffer_size, self.n_outs)) - - io_fun = BinaryIOCollection() - - in_features, lab_frame_number = io_fun.load_binary_file_frame( - self.x_files_list[self.file_index], self.n_ins) - out_features, out_frame_number = io_fun.load_binary_file_frame( - self.y_files_list[self.file_index], self.n_outs) - - frame_number = lab_frame_number - # we allow small difference here. may not be correct, but sometimes, there is one/two frames difference - if abs(lab_frame_number - out_frame_number) < 5: - if lab_frame_number > out_frame_number: - frame_number = out_frame_number - else: - base_file_name = os.path.basename( - self.x_files_list[self.file_index]).split('.')[0] - self.logger.critical("the number of frames in label and acoustic features are different: %d vs %d (%s)" % ( - lab_frame_number, out_frame_number, base_file_name)) - raise - - temp_set_y = out_features[0:frame_number, ] - temp_set_x = in_features[0:frame_number, ] - - self.file_index += 1 - - if self.file_index >= self.list_size: - self.end_reading = True - self.file_index = 0 - - # reshape input-output - if self.reshape_io: - temp_set_x = numpy.reshape( - temp_set_x, (1, temp_set_x.shape[0], self.n_ins)) - temp_set_y = numpy.reshape( - temp_set_y, (1, temp_set_y.shape[0], self.n_outs)) - - temp_set_x = numpy.array(temp_set_x, 'float32') - temp_set_y = numpy.array(temp_set_y, 'float32') - - shared_set_x = self.make_shared(temp_set_x, 'x') - shared_set_y = self.make_shared(temp_set_y, 'y') - - shared_set_xy = (shared_set_x, shared_set_y) - - return shared_set_xy, temp_set_x, temp_set_y - - def load_next_utterance_S2S(self): - """Load the data for one utterance. This function will be called when utterance-by-utterance loading is required (e.g., sequential training). - - """ - - temp_set_x = numpy.empty((self.buffer_size, self.n_ins)) - temp_set_y = numpy.empty((self.buffer_size, self.n_outs)) - - io_fun = BinaryIOCollection() - - in_features, lab_frame_number = io_fun.load_binary_file_frame( - self.x_files_list[self.file_index], self.n_ins) - out_features, out_frame_number = io_fun.load_binary_file_frame( - self.y_files_list[self.file_index], self.n_outs) - - temp_set_x = in_features[0:lab_frame_number, ] - temp_set_y = out_features[0:out_frame_number, ] - - if not self.dur_files_list: - dur_frame_number = out_frame_number - dur_features = numpy.array([dur_frame_number]) - else: - dur_features, dur_frame_number = io_fun.load_binary_file_frame( - self.dur_files_list[self.file_index], 1) - assert sum(dur_features) == out_frame_number - - dur_features = numpy.reshape(dur_features, (-1, )) - temp_set_d = dur_features.astype(int) - - self.file_index += 1 - - if self.file_index >= self.list_size: - self.end_reading = True - self.file_index = 0 - - shared_set_x = self.make_shared(temp_set_x, 'x') - shared_set_y = self.make_shared(temp_set_y, 'y') - shared_set_d = theano.shared(numpy.asarray( - temp_set_d, dtype='int32'), name='d', borrow=True) - - shared_set_xyd = (shared_set_x, shared_set_y, shared_set_d) - - return shared_set_xyd, temp_set_x, temp_set_y, temp_set_d - - def load_next_utterance_S2SML(self): - """Load the data for one utterance. This function will be called when utterance-by-utterance loading is required (e.g., sequential training). - - """ - - io_fun = BinaryIOCollection() - - in_features, lab_frame_number = io_fun.load_binary_file_frame( - self.x_files_list[self.file_index], self.n_ins) - out_features, out_frame_number = io_fun.load_binary_file_frame( - self.y_files_list[self.file_index], self.n_outs) - dur_features, dur_frame_number = io_fun.load_binary_file_frame( - self.dur_files_list[self.file_index], 1) - - ### MLU features sub-division ### - temp_set_MLU = in_features[0:lab_frame_number, ] - temp_set_y = out_features[0:out_frame_number, ] - - temp_set_phone = numpy.concatenate([temp_set_MLU[:, self.MLU_div['phone'][0]: self.MLU_div['phone'][1]], - temp_set_MLU[:, self.MLU_div['phone'][2]: self.MLU_div['phone'][3]]], axis=1) - temp_set_syl = numpy.concatenate([temp_set_MLU[:, self.MLU_div['syl'][0]: self.MLU_div['syl'][1]], - temp_set_MLU[:, self.MLU_div['syl'][2]: self.MLU_div['syl'][3]]], axis=1) - temp_set_word = numpy.concatenate([temp_set_MLU[:, self.MLU_div['word'][0]: self.MLU_div['word'][1]], - temp_set_MLU[:, self.MLU_div['word'][2]: self.MLU_div['word'][3]]], axis=1) - - ### duration array sub-division ### - dur_features = numpy.reshape(dur_features, (-1, )) - temp_set_d = dur_features.astype(int) - dur_word_syl = temp_set_d[0: -lab_frame_number] - - num_ph = lab_frame_number - num_syl = (numpy.where(numpy.cumsum( - dur_word_syl[::-1]) == lab_frame_number)[0][0] + 1) - num_words = len(dur_word_syl) - num_syl - - temp_set_dur_phone = temp_set_d[-num_ph:] - temp_set_dur_word = dur_word_syl[0: num_words] - temp_set_dur_syl = dur_word_syl[num_words:] - - ### additional feature matrix (syllable+phone+frame=432) ### - num_frames = sum(temp_set_dur_phone) - temp_set_af = numpy.empty((num_frames, self.MLU_div['length'][-1])) - - temp_set_af[0: num_syl, self.MLU_div['length'][0]: self.MLU_div['length'] - [1]] = temp_set_syl[numpy.cumsum(temp_set_dur_syl)-1] - temp_set_af[0: num_ph, self.MLU_div['length'][1]: self.MLU_div['length'][2]] = temp_set_phone - - ### input word feature matrix ### - temp_set_dur_word_segments = numpy.zeros(num_words, dtype='int32') - syl_bound = numpy.cumsum(temp_set_dur_word) - for indx in xrange(num_words): - temp_set_dur_word_segments[indx] = int( - sum(temp_set_dur_syl[0: syl_bound[indx]])) - temp_set_x = temp_set_word[temp_set_dur_word_segments-1] - - ### rest of the code similar to S2S ### - self.file_index += 1 - - if self.file_index >= self.list_size: - self.end_reading = True - self.file_index = 0 - - shared_set_x = self.make_shared(temp_set_x, 'x') - shared_set_y = self.make_shared(temp_set_y, 'y') - shared_set_d = theano.shared(numpy.asarray( - temp_set_d, dtype='int32'), name='d', borrow=True) - - shared_set_xyd = (shared_set_x, shared_set_y, shared_set_d) - - return shared_set_xyd, temp_set_x, temp_set_y, temp_set_d, temp_set_af - - def load_next_batch_S2S(self): - """Load the data for one utterance. This function will be called when utterance-by-utterance loading is required (e.g., sequential training). - - """ - - temp_set_x = numpy.empty((self.buffer_size, self.n_ins)) - temp_set_y = numpy.empty((self.buffer_size, self.n_outs)) - temp_set_d = numpy.empty((self.buffer_size, 1)) - - io_fun = BinaryIOCollection() - - lab_start_frame_number = 0 - lab_end_frame_number = 0 - - out_start_frame_number = 0 - out_end_frame_number = 0 - - new_x_files_list = self.x_files_list[self.file_index].split(',') - new_y_files_list = self.y_files_list[self.file_index].split(',') - new_dur_files_list = self.dur_files_list[self.file_index].split(',') - - for new_file_index in xrange(len(new_x_files_list)): - in_features, lab_frame_number = io_fun.load_binary_file_frame( - new_x_files_list[new_file_index], self.n_ins) - out_features, out_frame_number = io_fun.load_binary_file_frame( - new_y_files_list[new_file_index], self.n_outs) - - lab_end_frame_number += lab_frame_number - out_end_frame_number += out_frame_number - - temp_set_x[lab_start_frame_number: lab_end_frame_number, - ] = in_features[0:lab_frame_number, ] - temp_set_y[out_start_frame_number: out_end_frame_number, - ] = out_features[0:out_frame_number, ] - if not self.dur_files_list: - dur_frame_number = out_end_frame_number - temp_set_d = numpy.array([dur_frame_number]) - else: - dur_features, dur_frame_number = io_fun.load_binary_file_frame( - new_dur_files_list[new_file_index], 1) - assert sum(dur_features) == out_frame_number - temp_set_d[lab_start_frame_number: lab_end_frame_number, - ] = dur_features[0:lab_frame_number, ] - - lab_start_frame_number = lab_end_frame_number - out_start_frame_number = out_end_frame_number - - temp_set_x = temp_set_x[0:lab_end_frame_number, ] - temp_set_y = temp_set_y[0:out_end_frame_number, ] - - temp_set_d = temp_set_d[0:lab_end_frame_number, ] - temp_set_d = numpy.reshape(temp_set_d, (-1, )) - temp_set_d = temp_set_d.astype(int) - - self.file_index += 1 - - if self.file_index >= self.list_size: - self.end_reading = True - self.file_index = 0 - - shared_set_x = self.make_shared(temp_set_x, 'x') - shared_set_y = self.make_shared(temp_set_y, 'y') - shared_set_d = theano.shared(numpy.asarray( - temp_set_d, dtype='int32'), name='d', borrow=True) - - shared_set_xyd = (shared_set_x, shared_set_y, shared_set_d) - - return shared_set_xyd, temp_set_x, temp_set_y, temp_set_d - - def load_next_batch_S2SML(self): - """Load the data for one utterance. This function will be called when utterance-by-utterance loading is required (e.g., sequential training). - - """ - - inp_length = (self.MLU_div['word'][1] - self.MLU_div['word'] - [0]) + (self.MLU_div['word'][3] - self.MLU_div['word'][2]) - af_length = self.MLU_div['length'][-1] - - new_temp_set_x = numpy.empty((self.buffer_size, inp_length)) - new_temp_set_y = numpy.empty((self.buffer_size, self.n_outs)) - new_temp_set_af = numpy.empty((self.buffer_size, af_length)) - new_temp_set_d = [numpy.array([], 'int32'), numpy.array( - [], 'int32'), numpy.array([], 'int32')] - - io_fun = BinaryIOCollection() - - lab_start_frame_number = 0 - lab_end_frame_number = 0 - - out_start_frame_number = 0 - out_end_frame_number = 0 - - new_x_files_list = self.x_files_list[self.file_index].split(',') - new_y_files_list = self.y_files_list[self.file_index].split(',') - new_dur_files_list = self.dur_files_list[self.file_index].split(',') - - for new_file_index in xrange(len(new_x_files_list)): - in_features, lab_frame_number = io_fun.load_binary_file_frame( - new_x_files_list[new_file_index], self.n_ins) - out_features, out_frame_number = io_fun.load_binary_file_frame( - new_y_files_list[new_file_index], self.n_outs) - dur_features, dur_frame_number = io_fun.load_binary_file_frame( - new_dur_files_list[new_file_index], 1) - - ### MLU features sub-division ### - temp_set_MLU = in_features[0:lab_frame_number, ] - temp_set_y = out_features[0:out_frame_number, ] - - temp_set_phone = numpy.concatenate([temp_set_MLU[:, self.MLU_div['phone'][0]: self.MLU_div['phone'][1]], - temp_set_MLU[:, self.MLU_div['phone'][2]: self.MLU_div['phone'][3]]], axis=1) - temp_set_syl = numpy.concatenate([temp_set_MLU[:, self.MLU_div['syl'][0]: self.MLU_div['syl'][1]], - temp_set_MLU[:, self.MLU_div['syl'][2]: self.MLU_div['syl'][3]]], axis=1) - temp_set_word = numpy.concatenate([temp_set_MLU[:, self.MLU_div['word'][0]: self.MLU_div['word'][1]], - temp_set_MLU[:, self.MLU_div['word'][2]: self.MLU_div['word'][3]]], axis=1) - - ### duration array sub-division ### - dur_features = numpy.reshape(dur_features, (-1, )) - temp_set_d = dur_features.astype(int) - dur_word_syl = temp_set_d[0: -lab_frame_number] - - num_ph = lab_frame_number - num_syl = (numpy.where(numpy.cumsum( - dur_word_syl[::-1]) == lab_frame_number)[0][0] + 1) - num_words = len(dur_word_syl) - num_syl - - temp_set_dur_phone = temp_set_d[-num_ph:] - temp_set_dur_word = dur_word_syl[0: num_words] - temp_set_dur_syl = dur_word_syl[num_words:] - - ### additional feature matrix (syllable+phone+frame=432) ### - num_frames = sum(temp_set_dur_phone) - temp_set_af = numpy.empty((num_frames, self.MLU_div['length'][-1])) - - temp_set_af[0: num_syl, self.MLU_div['length'][0]: self.MLU_div['length'] - [1]] = temp_set_syl[numpy.cumsum(temp_set_dur_syl)-1] - temp_set_af[0: num_ph, self.MLU_div['length'][1]: self.MLU_div['length'][2]] = temp_set_phone - - ### input word feature matrix ### - temp_set_dur_word_segments = numpy.zeros(num_words, dtype='int32') - syl_bound = numpy.cumsum(temp_set_dur_word) - for indx in xrange(num_words): - temp_set_dur_word_segments[indx] = int( - sum(temp_set_dur_syl[0: syl_bound[indx]])) - temp_set_x = temp_set_word[temp_set_dur_word_segments-1] - - ### for batch processing ### - lab_end_frame_number += num_words - out_end_frame_number += out_frame_number - - new_temp_set_x[lab_start_frame_number: lab_end_frame_number, - ] = temp_set_x[0:num_words, ] - new_temp_set_y[out_start_frame_number: out_end_frame_number, - ] = temp_set_y[0:out_frame_number, ] - new_temp_set_af[out_start_frame_number: out_end_frame_number, - ] = temp_set_af[0:out_frame_number, ] - - new_temp_set_d[0] = numpy.append( - new_temp_set_d[0], temp_set_dur_word) - new_temp_set_d[1] = numpy.append( - new_temp_set_d[1], temp_set_dur_syl) - new_temp_set_d[2] = numpy.append( - new_temp_set_d[2], temp_set_dur_phone) - - lab_start_frame_number = lab_end_frame_number - out_start_frame_number = out_end_frame_number - - new_temp_set_x = new_temp_set_x[0:lab_end_frame_number, ] - new_temp_set_y = new_temp_set_y[0:out_end_frame_number, ] - new_temp_set_af = new_temp_set_af[0:out_end_frame_number, ] - - new_temp_set_d = numpy.concatenate( - (new_temp_set_d[0], new_temp_set_d[1], new_temp_set_d[2])) - - ### rest of the code similar to S2S ### - self.file_index += 1 - - if self.file_index >= self.list_size: - self.end_reading = True - self.file_index = 0 - - shared_set_x = self.make_shared(new_temp_set_x, 'x') - shared_set_y = self.make_shared(new_temp_set_y, 'y') - shared_set_d = theano.shared(numpy.asarray( - new_temp_set_d, dtype='int32'), name='d', borrow=True) - - shared_set_xyd = (shared_set_x, shared_set_y, shared_set_d) - - return shared_set_xyd, new_temp_set_x, new_temp_set_y, new_temp_set_d, new_temp_set_af - - def load_next_utterance_CTC(self): - - temp_set_x = numpy.empty((self.buffer_size, self.n_ins)) - temp_set_y = numpy.empty(self.buffer_size) - - io_fun = BinaryIOCollection() - - in_features, lab_frame_number = io_fun.load_binary_file_frame( - self.x_files_list[self.file_index], self.n_ins) - out_features, out_frame_number = io_fun.load_binary_file_frame( - self.y_files_list[self.file_index], self.n_outs) - - frame_number = lab_frame_number - temp_set_x = in_features[0:frame_number, ] - - temp_set_y = numpy.array([self.n_outs]) - for il in numpy.argmax(out_features, axis=1): - temp_set_y = numpy.concatenate( - (temp_set_y, [il, self.n_outs]), axis=0) - - self.file_index += 1 - - if self.file_index >= self.list_size: - self.end_reading = True - self.file_index = 0 - - shared_set_x = self.make_shared(temp_set_x, 'x') - shared_set_y = theano.shared(numpy.asarray( - temp_set_y, dtype='int32'), name='y', borrow=True) - - shared_set_xy = (shared_set_x, shared_set_y) - - return shared_set_xy, temp_set_x, temp_set_y - - def load_next_partition(self): - """Load one block data. The number of frames will be the buffer size set during intialisation. - - """ - - self.logger.debug('loading next partition') - - temp_set_x = numpy.empty((self.buffer_size, self.n_ins)) - temp_set_y = numpy.empty((self.buffer_size, self.n_outs)) - current_index = 0 - - # first check whether there are remaining data from previous utterance - if self.remain_frame_number > 0: - temp_set_x[current_index:self.remain_frame_number, - ] = self.remain_data_x - temp_set_y[current_index:self.remain_frame_number, - ] = self.remain_data_y - current_index += self.remain_frame_number - - self.remain_frame_number = 0 - - io_fun = BinaryIOCollection() - while True: - if current_index >= self.buffer_size: - break - if self.file_index >= self.list_size: - self.end_reading = True - self.file_index = 0 - break - - in_features, lab_frame_number = io_fun.load_binary_file_frame( - self.x_files_list[self.file_index], self.n_ins) - out_features, out_frame_number = io_fun.load_binary_file_frame( - self.y_files_list[self.file_index], self.n_outs) - - frame_number = lab_frame_number - # we allow small difference here. may not be correct, but sometimes, there is one/two frames difference - if abs(lab_frame_number - out_frame_number) < 5: - if lab_frame_number > out_frame_number: - frame_number = out_frame_number - else: - base_file_name = os.path.basename( - self.x_files_list[self.file_index]).split('.')[0] - self.logger.critical("the number of frames in label and acoustic features are different: %d vs %d (%s)" % ( - lab_frame_number, out_frame_number, base_file_name)) - raise - - out_features = out_features[0:frame_number, ] - in_features = in_features[0:frame_number, ] - - if current_index + frame_number <= self.buffer_size: - temp_set_x[current_index:current_index + - frame_number, ] = in_features - temp_set_y[current_index:current_index + - frame_number, ] = out_features - - current_index = current_index + frame_number - else: # if current utterance cannot be stored in the block, then leave the remaining part for the next block - used_frame_number = self.buffer_size - current_index - temp_set_x[current_index:self.buffer_size, - ] = in_features[0:used_frame_number, ] - temp_set_y[current_index:self.buffer_size, - ] = out_features[0:used_frame_number, ] - current_index = self.buffer_size - - self.remain_data_x = in_features[used_frame_number:frame_number, ] - self.remain_data_y = out_features[used_frame_number:frame_number, ] - self.remain_frame_number = frame_number - used_frame_number - - self.file_index += 1 - - temp_set_x = temp_set_x[0:current_index, ] - temp_set_y = temp_set_y[0:current_index, ] - - numpy.random.seed(271639) - numpy.random.shuffle(temp_set_x) - numpy.random.seed(271639) - numpy.random.shuffle(temp_set_y) - - shared_set_x = self.make_shared(temp_set_x, 'x') - shared_set_y = self.make_shared(temp_set_y, 'y') - - shared_set_xy = (shared_set_x, shared_set_y) -# temp_set_x = self.make_shared(temp_set_x, 'x') -# temp_set_y = self.make_shared(temp_set_y, 'y') - - return shared_set_xy, temp_set_x, temp_set_y - - def is_finish(self): - return self.end_reading - - -class ListDataProviderWithProjectionIndex(ListDataProvider): - ''' - Added kwarg index_to_project to __init__ - ''' - - def __init__(self, x_file_list, y_file_list, n_ins=0, n_outs=0, - buffer_size=500000, shuffle=False, index_to_project=1, projection_insize=10000, indexes_only=False): - # ListDataProvider.__init__(x_file_list, \ - # y_file_list, n_ins=0, n_outs=0, buffer_size = 500000, shuffle=False) - super(ListDataProviderWithProjectionIndex, self).__init__(x_file_list, - y_file_list, n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=shuffle) - self.index_to_project = index_to_project - self.projection_insize = projection_insize - self.indexes_only = indexes_only - - def load_next_partition_with_projection(self): - - shared_set_xy, temp_set_x, temp_set_y = self.load_next_partition() - - if self.indexes_only: - temp_set_x, p_indexes = get_unexpanded_projection_inputs(temp_set_x, self.index_to_project, - self.projection_insize) - shared_set_x_proj = theano.shared( - p_indexes, name='x_proj', borrow=True) - else: - temp_set_x, one_hot = expand_projection_inputs(temp_set_x, self.index_to_project, - self.projection_insize) - shared_set_x_proj = self.make_shared(one_hot, 'x_proj') - - shared_set_x = self.make_shared(temp_set_x, 'x') - shared_set_y = self.make_shared(temp_set_y, 'y') - - shared_set_xy = (shared_set_x, shared_set_x_proj, shared_set_y) - - if self.indexes_only: - return shared_set_xy, temp_set_x, p_indexes, temp_set_y - else: - return shared_set_xy, temp_set_x, one_hot, temp_set_y - -# Put this function at global level so it can be imported for use in dnn_generation - - -def expand_projection_inputs(temp_set_x, index_to_project, projection_insize): - # Turn indexes to words, syllables etc. to one-hot data: - m, n = numpy.shape(temp_set_x) - projection_indices = temp_set_x[:, index_to_project] - # print projection_indices.tolist() - assert projection_indices.max() < projection_insize, 'projection_insize is %s but there is an index %s in the data' % ( - projection_insize, projection_indices.max()) - one_hot = numpy.zeros((m, projection_insize)) - - # Used advanced indexing to turn the relevant features on: - projection_indices = projection_indices.astype( - int) # check conversion???!?!?! - # print projection_indices.tolist() - # print ' ^--- proj indices' - # print - one_hot[list(range(m)), projection_indices] = 1.0 - # Effectively remove the index from the original data by setting to 0: - temp_set_x[:, index_to_project] = 0.0 - return temp_set_x, one_hot - - -def get_unexpanded_projection_inputs(temp_set_x, index_to_project, projection_insize): - # Turn indexes to words, syllables etc. to one-hot data: - m, n = numpy.shape(temp_set_x) - projection_indices = temp_set_x[:, index_to_project] - # print projection_indices.tolist() - assert projection_indices.max() < projection_insize, 'projection_insize is %s but there is an index %s in the data' % ( - projection_insize, projection_indices.max()) - - projection_indices = projection_indices.astype( - 'int32') # check conversion???!?!?! - - temp_set_x[:, index_to_project] = 0.0 - return temp_set_x, projection_indices -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://github.com/CSTR-Edinburgh/merlin -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -import logging -import os - - -def read_file_list(file_name): - logger = logging.getLogger('read_file_list') - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.info('Read file list from %s', file_name) - return file_lists - - -def prepare_file_path_list(file_id_list, - file_dir, - file_extension, - new_dir_switch=True): - logger = logging.getLogger('prepare_file_path_list') - - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - - logger.info('Preparing file_list for %s in dir \n%s', file_extension, - file_dir) - - return [ - os.path.join(file_dir, file_id + file_extension) - for file_id in file_id_list - ] -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - -# quick and dirty utility to print out binary files, for debugging - -import sys -# import numpy -from io_funcs.binary_io import BinaryIOCollection - -if __name__ == '__main__': - - # shall we read the logging config file from command line? - if len(sys.argv) < 3: - print('usage: python view.py dimension filename(s)') - sys.exit(1) - - dimension = int(sys.argv[1]) - fnames = sys.argv[2:] - - print(fnames) - - io_funcs = BinaryIOCollection() - for f in fnames: - features = io_funcs.load_binary_file(f, dimension) - - print(features.shape) - # print features -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, HTSDurationLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -#from frontend.acoustic_normalisation import CMPNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -#from frontend.feature_normalisation_base import FeatureNormBase -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer - -import configuration - -from models.dnn import DNN -#from models.ms_dnn import MultiStreamDNN -#from models.ms_dnn_gv import MultiStreamDNNGv -#from models.sdae import StackedDenoiseAutoEncoder - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) / 2 # including input and output - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i*2].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = numpy.asarray( - hyper_params['learning_rate'], dtype='float32') - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) -# private_l2_reg = float(hyper_params['private_l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - use_rprop = int(hyper_params['use_rprop']) - - use_rprop = int(hyper_params['use_rprop']) - - hidden_layers_sizes = hyper_params['hidden_layer_size'] - -# stream_weights = hyper_params['stream_weights'] -# private_hidden_sizes = hyper_params['private_hidden_sizes'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - -# stream_lr_weights = hyper_params['stream_lr_weights'] -# use_private_hidden = hyper_params['use_private_hidden'] - - model_type = hyper_params['model_type'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) - - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - train_set_x, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_next_partition() - valid_set_x, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining - pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - hidden_activation=hidden_activation, - output_activation=output_activation, - use_rprop=use_rprop, rprop_init_update=finetune_lr) - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.clock() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - previous_finetune_lr = finetune_lr - - while (epoch < training_epochs): - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.clock() - - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - train_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - train_set_y.set_value(numpy.asarray( - temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - n_train_batches = train_set_x.get_value().shape[0] / batch_size - - logger.debug('this partition: %d frames (divided into %d batches of size %d)' % ( - train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size)) - - for minibatch_index in range(n_train_batches): - this_train_error = train_fn( - minibatch_index, current_finetune_lr, current_momentum) - train_error.append(this_train_error) - - if numpy.isnan(this_train_error): - logger.warning('training error over minibatch %d of %d was %s' % ( - minibatch_index+1, n_train_batches, this_train_error)) - - train_data_reader.reset() - - logger.debug('calculating validation loss') - validation_losses = valid_fn() - this_validation_loss = numpy.mean(validation_losses) - - # this has a possible bias if the minibatches were not all of identical size - # but it should not be siginficant if minibatches are small - this_train_valid_loss = numpy.mean(train_error) - - sub_end_time = time.clock() - - loss_difference = this_validation_loss - previous_loss - - logger.info('epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss - logger.debug('validation loss decreased, so saving model') - early_stop = 0 - else: - logger.debug('validation loss did not improve') - dbn = best_dnn_model - early_stop += 1 - - if early_stop >= early_stop_epoch: - # too many consecutive epochs without surpassing the best model - logger.debug('stopping early') - break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - end_time = time.clock() - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - -# visualize_dnn(dnn_model) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.parameter_prediction( - test_set_x=test_set_x) -# predicted_parameter = test_out() - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - -# generate bottleneck layer as festures - - -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_top_hidden_layer( - test_set_x=test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layers_sizes = cfg.hyper_params['hidden_layer_size'] - - # prepare environment - - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - nn_cmp_dir = os.path.join( - data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_nosil_dir = os.path.join( - data_dir, 'nn_nosil' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_norm_dir = os.path.join( - data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(cfg.work_dir, 'gen') - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = prepare_file_path_list( - file_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_nosil_file_list = prepare_file_path_list( - file_id_list, nn_cmp_nosil_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - elif cfg.label_style == 'HTS_duration': - label_normaliser = HTSDurationLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension # + cfg.appended_input_dim - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - # the number can be removed - binary_label_dir = os.path.join(label_data_dir, 'binary_label_'+suffix) - nn_label_dir = os.path.join(label_data_dir, 'nn_no_silence_lab_'+suffix) - nn_label_norm_dir = os.path.join( - label_data_dir, 'nn_no_silence_lab_norm_'+suffix) -# nn_label_norm_mvn_dir = os.path.join(data_dir, 'nn_no_silence_lab_norm_'+suffix) - - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - file_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - - # to do - sanity check the label dimension here? - - min_max_normaliser = None - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.NORMLAB and (cfg.label_style in ['HTS', 'HTS_duration']): - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list) - - if cfg.label_style == 'HTS': - remover = SilenceRemover( - n_cmp=lab_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence( - binary_label_file_list, in_label_align_file_list, nn_label_file_list) - elif cfg.label_style == 'HTS_duration': - # don't remove silences for duration - nn_label_file_list = binary_label_file_list - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if cfg.NORMLAB and (cfg.label_style == 'composed'): - # new flexible label preprocessor - - logger.info( - 'preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, cfg.xpath_label_align_dir, cfg.utt_ext, False) - elif label_style == 'hts': - in_label_align_file_list['hts'] = prepare_file_path_list( - file_id_list, cfg.hts_label_align_dir, cfg.lab_ext, False) - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info( - 'making input label features for %4d of %4d' % (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % - required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # silence removal - if cfg.remove_silence_using_binary_labels: - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from label using silence feature: %s' % ( - label_composer.configuration.labels[silence_feature])) - logger.info('Silence will be removed from CMP files in same way') - # Binary labels have 2 roles: both the thing trimmed and the instructions for trimming: - trim_silence(binary_label_file_list, nn_label_file_list, lab_dim, - binary_label_file_list, lab_dim, silence_feature, percent_to_keep=5) - else: - logger.info('No silence removal done') - # start from the labels we have just produced, not trimmed versions - nn_label_file_list = binary_label_file_list - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if min_max_normaliser != None: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = cfg.delta_win # [-0.5, 0.0, 0.5] - acc_win = cfg.acc_win # [1.0, -2.0, 1.0] - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - if cfg.label_style == 'HTS': - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_nosil_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature) - - else: # back off to previous method using HTS labels: - remover = SilenceRemover( - n_cmp=cfg.cmp_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number], - in_label_align_file_list[0:cfg.train_file_number + - cfg.valid_file_number], - nn_cmp_nosil_file_list[0:cfg.train_file_number+cfg.valid_file_number]) # save to itself - - elif cfg.label_style == 'HTS_duration': - # don't remove silences for duration - nn_cmp_nosil_file_list = nn_cmp_file_list - pass - - # save acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - if not os.path.exists(var_dir): - os.makedirs(var_dir) - - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_nosil_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_nosil_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - - normaliser.feature_normalisation(nn_cmp_nosil_file_list[0:cfg.train_file_number+cfg.valid_file_number], - nn_cmp_norm_file_list[0:cfg.train_file_number+cfg.valid_file_number]) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim) - global_mean_vector = min_max_normaliser.compute_mean( - nn_cmp_nosil_file_list[0:cfg.train_file_number]) - global_std_vector = min_max_normaliser.compute_std( - nn_cmp_nosil_file_list[0:cfg.train_file_number], global_mean_vector) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - min_max_normaliser.find_min_max_values( - nn_cmp_nosil_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_nosil_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - # logger.debug(' value was\n%s' % cmp_norm_info) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_std_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - # logger.debug(' value was\n%s' % feature_std_vector) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number] - train_y_file_list = nn_cmp_norm_file_list[0:cfg.train_file_number] - valid_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - valid_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - # currently, there are two ways to do this - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - - elif cfg.label_style == 'composed': - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layers_sizes)) - for hid_size in hidden_layers_sizes: - combined_model_arch += '_' + str(hid_size) - - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.model' \ - % (model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number) - - # DNN model training - if cfg.TRAINDNN: - - logger.info('training DNN') - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - # print 'start DNN' - train_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot) - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - # generate parameters from DNN - temp_dir_name = '%s_%s_%d_%d_%d_%d_%d_%d' \ - % (cfg.model_type, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layers_sizes), hidden_layers_sizes[0]) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - -# dnn_generation(valid_x_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list) - dnn_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration(gen_wav_features=cfg.gen_wav_features) - generator.acoustic_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict, var_file_dict) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - generate_wav(gen_dir, gen_file_id_list, cfg) # generated speech -# generate_wav(nn_cmp_dir, gen_file_id_list) # reference copy synthesis speech - - # evaluation: calculate distortion - if cfg.CALMCD: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - valid_bap_mse = valid_bap_mse / 10.0 - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - valid_f0_mse, valid_f0_corr, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_f0_corr, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_f0_corr, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_f0_corr, test_vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_dnn.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - - sys.exit(0) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -#from frontend.acoustic_normalisation import CMPNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -#from frontend.feature_normalisation_base import FeatureNormBase -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer - -import configuration - -from models.dnn import DNN -from models.ms_dnn import MultiStreamDNN -from models.ms_dnn_gv import MultiStreamDNNGv -from models.sdae import StackedDenoiseAutoEncoder - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) / 2 # including input and output - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i*2].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - private_l2_reg = float(hyper_params['private_l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layers_sizes = hyper_params['hidden_layers_sizes'] - - stream_weights = hyper_params['stream_weights'] - private_hidden_sizes = hyper_params['private_hidden_sizes'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - stream_lr_weights = hyper_params['stream_lr_weights'] - use_private_hidden = hyper_params['use_private_hidden'] - - model_type = hyper_params['model_type'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) - - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - train_set_x, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_next_partition() - valid_set_x, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining - pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - hidden_activation=hidden_activation, - output_activation=output_activation) - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - elif model_type == 'SDAE': - # basic model is ready. - # if corruption levels is set to zero. it becomes normal autoencoder - dnn_model = StackedDenoiseAutoEncoder(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes) - - if do_pretraining: - pretraining_fn = dnn_model.pretraining_functions( - pretrain_set_x, batch_size) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - # model is ready, but the hyper-parameters are not optimised. - elif model_type == 'MSDNN': - dnn_model = MultiStreamDNN(numpy_rng=numpy_rng, n_ins=n_ins, ms_outs=ms_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - stream_weights=stream_weights, - hidden_activation=hidden_activation, - output_activation=output_activation) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), - batch_size=batch_size, lr_weights=stream_lr_weights) - elif model_type == 'MSDNN_GV': # not fully ready - dnn_model = MultiStreamDNNGv(numpy_rng=numpy_rng, n_ins=n_ins, ms_outs=ms_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - stream_weights=stream_weights, - hidden_activation=hidden_activation, - output_activation=output_activation) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), - batch_size=batch_size, lr_weights=stream_lr_weights) - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - # if pretraining is supported in one model, add the switch here - # be careful to use autoencoder for pretraining here: - # for SDAE, currently only sigmoid function is supported in the hidden layers, as our input is scaled to [0, 1] - # however, tanh works better and converge fast in finetuning - ## - # Will extend this soon... - if do_pretraining and model_type == 'SDAE': - logger.info('pretraining the %s model' % (model_type)) - - corruption_level = 0.0 - # in SDAE we do layer-wise pretraining using autoencoders - for i in range(dnn_model.n_layers): - for epoch in range(pretraining_epochs): - sub_start_time = time.clock() - - pretrain_loss = [] - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - pretrain_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - - n_train_batches = pretrain_set_x.get_value( - ).shape[0] / batch_size - - for batch_index in range(n_train_batches): - pretrain_loss.append(pretraining_fn[i](index=batch_index, - corruption=corruption_level, - learning_rate=pretraining_lr)) - - sub_end_time = time.clock() - logger.info('Pre-training layer %i, epoch %d, cost %s, time spent%.2f' % - (i+1, epoch+1, numpy.mean(pretrain_loss), (sub_end_time - sub_start_time))) - train_data_reader.reset() - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.clock() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - previous_finetune_lr = finetune_lr - while (epoch < training_epochs): - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.clock() - - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - train_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - train_set_y.set_value(numpy.asarray( - temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - n_train_batches = train_set_x.get_value().shape[0] / batch_size - - logger.debug('this partition: %d frames (divided into %d batches of size %d)' % ( - train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size)) - - for minibatch_index in range(n_train_batches): - this_train_error = train_fn( - minibatch_index, current_finetune_lr, current_momentum) - train_error.append(this_train_error) - - if numpy.isnan(this_train_error): - logger.warning('training error over minibatch %d of %d was %s' % ( - minibatch_index+1, n_train_batches, this_train_error)) - - train_data_reader.reset() - - logger.debug('calculating validation loss') - validation_losses = valid_fn() - this_validation_loss = numpy.mean(validation_losses) - - # this has a possible bias if the minibatches were not all of identical size - # but it should not be siginficant if minibatches are small - this_train_valid_loss = numpy.mean(train_error) - - sub_end_time = time.clock() - - loss_difference = this_validation_loss - previous_loss - - logger.info('epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss - logger.debug('validation loss decreased, so saving model') - early_stop = 0 - else: - logger.debug('validation loss did not improve') - dbn = best_dnn_model - early_stop += 1 - - if early_stop >= early_stop_epoch: - # too many consecutive epochs without surpassing the best model - logger.debug('stopping early') - break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - end_time = time.clock() - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - -# visualize_dnn(dbn) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.parameter_prediction( - test_set_x=test_set_x) -# predicted_parameter = test_out() - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - -# generate bottleneck layer as festures - - -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list, bottleneck_index): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_top_hidden_layer( - test_set_x=test_set_x, bn_layer_index=bottleneck_index) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layers_sizes = cfg.hyper_params['hidden_layers_sizes'] - - # prepare environment - - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - nn_cmp_dir = os.path.join( - data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_norm_dir = os.path.join( - data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(cfg.work_dir, 'gen') - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = prepare_file_path_list( - file_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - # the number can be removed - binary_label_dir = os.path.join(label_data_dir, 'binary_label_'+suffix) - nn_label_dir = os.path.join(label_data_dir, 'nn_no_silence_lab_'+suffix) - nn_label_norm_dir = os.path.join( - label_data_dir, 'nn_no_silence_lab_norm_'+suffix) -# nn_label_norm_mvn_dir = os.path.join(data_dir, 'nn_no_silence_lab_norm_'+suffix) - - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - file_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - - # to do - sanity check the label dimension here? - - min_max_normaliser = None - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.NORMLAB and (cfg.label_style == 'HTS'): - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') -# label_normaliser.perform_normalisation(in_label_align_file_list, binary_label_file_list) - -# remover = SilenceRemover(n_cmp = lab_dim, silence_pattern = ['*-#+*']) -# remover.remove_silence(binary_label_file_list, in_label_align_file_list, nn_label_file_list) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if cfg.NORMLAB and (cfg.label_style == 'composed'): - # new flexible label preprocessor - - logger.info( - 'preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, cfg.xpath_label_align_dir, cfg.utt_ext, False) - elif label_style == 'hts': - in_label_align_file_list['hts'] = prepare_file_path_list( - file_id_list, cfg.hts_label_align_dir, cfg.lab_ext, False) - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info( - 'making input label features for %4d of %4d' % (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % - required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # silence removal - if cfg.remove_silence_using_binary_labels: - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from label using silence feature: %s' % ( - label_composer.configuration.labels[silence_feature])) - logger.info('Silence will be removed from CMP files in same way') - # Binary labels have 2 roles: both the thing trimmed and the instructions for trimming: - trim_silence(binary_label_file_list, nn_label_file_list, lab_dim, - binary_label_file_list, lab_dim, silence_feature) - else: - logger.info('No silence removal done') - # start from the labels we have just produced, not trimmed versions - nn_label_file_list = binary_label_file_list - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if min_max_normaliser != None: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = [-0.5, 0.0, 0.5] - acc_win = [1.0, -2.0, 1.0] - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature) - - else: # back off to previous method using HTS labels: - remover = SilenceRemover( - n_cmp=cfg.cmp_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - nn_cmp_file_list, in_label_align_file_list, nn_cmp_file_list) # save to itself - - # save acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - if not os.path.exists(var_dir): - os.makedirs(var_dir) - - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - - normaliser.feature_normalisation( - nn_cmp_file_list, nn_cmp_norm_file_list) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim) - global_mean_vector = min_max_normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number]) - global_std_vector = min_max_normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - min_max_normaliser.find_min_max_values( - nn_cmp_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - # logger.debug(' value was\n%s' % cmp_norm_info) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_std_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - # logger.debug(' value was\n%s' % feature_std_vector) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number] - train_y_file_list = nn_cmp_norm_file_list[0:cfg.train_file_number] - valid_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - valid_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - # currently, there are two ways to do this - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension - - elif cfg.label_style == 'composed': - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layers_sizes)) - for hid_size in hidden_layers_sizes: - combined_model_arch += '_' + str(hid_size) - -# nnets_file_name = '%s/%s_%s_%d.%d.%d.%d.%d.train.%d.model' \ -# %(model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), -# len(hidden_layers_sizes), hidden_layers_sizes[0], -# lab_dim, cfg.cmp_dim, cfg.train_file_number) - - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.model' \ - % (model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number) - - # DNN model training - if cfg.TRAINDNN: - - logger.info('training DNN') - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - # print 'start DNN' - train_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot) - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - # generate parameters from DNN - - if cfg.GENBNFEA: - - temp_dir_name = '%s_%s_%d_%d_%d_%d_%d_%s_hidden' \ - % (cfg.model_type, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layers_sizes), combined_model_arch) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - bottleneck_size = min(hidden_layers_sizes) - bottleneck_index = 0 - for i in range(len(hidden_layers_sizes)): - if hidden_layers_sizes[i] == bottleneck_size: - bottleneck_index = i - - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_id_list = file_id_list[0:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - - dnn_hidden_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list, bottleneck_index) - - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - temp_dir_name = '%s_%s_%d_%d_%d_%d_%d_%d_%d' \ - % (cfg.model_type, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layers_sizes), max(hidden_layers_sizes), min(hidden_layers_sizes)) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - -# dnn_generation(valid_x_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list) - dnn_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration(gen_wav_features=cfg.gen_wav_features) - generator.acoustic_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict, var_file_dict) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - generate_wav(gen_dir, gen_file_id_list, cfg) # generated speech -# generate_wav(nn_cmp_dir, gen_file_id_list) # reference copy synthesis speech - - # evaluation: calculate distortion - if cfg.CALMCD: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - valid_bap_mse = valid_bap_mse / 10.0 - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - valid_f0_mse, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_vuv_error*100.)) - - # this can be removed - # - if 0: # to calculate distortion of HMM baseline - hmm_gen_no_silence_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nick_hmm_pf_2400_no_silence' - hmm_gen_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nick_hmm_pf_2400' - - if 1: - hmm_mgc_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.mgc_ext) - hmm_bap_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.bap_ext) - hmm_lf0_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.lf0_ext) - - hmm_mgc_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.mgc_ext) - hmm_bap_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.bap_ext) - hmm_lf0_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_mgc_list, in_gen_label_align_file_list, hmm_mgc_no_silence_list) - - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_bap_list, in_gen_label_align_file_list, hmm_bap_no_silence_list) - - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_lf0_list, in_gen_label_align_file_list, hmm_lf0_no_silence_list) - - calculator = IndividualDistortionComp() - - spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.mgc_ext, cfg.mgc_dim) - bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.bap_ext, cfg.bap_dim) - f0_mse, vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.lf0_ext, cfg.lf0_dim) - - spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - bap_mse = bap_mse / 10.0 - - logger.info('Develop: HMM -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' % - (spectral_distortion, bap_mse, f0_mse, vuv_error*100.)) - - spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.mgc_ext, cfg.mgc_dim) - bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.bap_ext, cfg.bap_dim) - f0_mse, vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.lf0_ext, cfg.lf0_dim) - - spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - bap_mse = bap_mse / 10.0 - - logger.info('Test : HMM -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' % - (spectral_distortion, bap_mse, f0_mse, vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_dnn.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - - sys.exit(0) -################################################################################ -# The Neural Network (NN) based Speech Synthesis System -# https://svn.ecdf.ed.ac.uk/repo/inf/dnn_tts/ -# -# Centre for Speech Technology Research -# University of Edinburgh, UK -# Copyright (c) 2014-2015 -# All Rights Reserved. -# -# The system as a whole and most of the files in it are distributed -# under the following copyright and conditions -# -# Permission is hereby granted, free of charge, to use and distribute -# this software and its documentation without restriction, including -# without limitation the rights to use, copy, modify, merge, publish, -# distribute, sublicense, and/or sell copies of this work, and to -# permit persons to whom this work is furnished to do so, subject to -# the following conditions: -# -# - Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# - Redistributions in binary form must reproduce the above -# copyright notice, this list of conditions and the following -# disclaimer in the documentation and/or other materials provided -# with the distribution. -# - The authors' names may not be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THE UNIVERSITY OF EDINBURGH AND THE CONTRIBUTORS TO THIS WORK -# DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING -# ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT -# SHALL THE UNIVERSITY OF EDINBURGH NOR THE CONTRIBUTORS BE LIABLE -# FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES -# WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN -# AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, -# ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF -# THIS SOFTWARE. -################################################################################ - - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -#from frontend.acoustic_normalisation import CMPNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -#from frontend.feature_normalisation_base import FeatureNormBase -from frontend.mean_variance_norm import MeanVarianceNorm -from frontend.mlpg_fast import MLParameterGenerationFast - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer - -import configuration - -from models.dnn import DNN -#from models.ms_dnn import MultiStreamDNN -#from models.ms_dnn_gv import MultiStreamDNNGv -#from models.sdae import StackedDenoiseAutoEncoder -from models.mdn import MixtureDensityNetwork - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate -from io_funcs.binary_io import BinaryIOCollection - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) / 2 # including input and output - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i*2].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, - mdn_component, var_floor=0.01, beta_opt=False, eff_sample_size=0.8, mean_log_det=-100.0, - plot=False, start_from_trained_model='_'): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - private_l2_reg = float(hyper_params['private_l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - use_rprop = int(hyper_params['use_rprop']) - - hidden_layers_sizes = hyper_params['hidden_layer_size'] - - stream_weights = hyper_params['stream_weights'] - private_hidden_sizes = hyper_params['private_hidden_sizes'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - stream_lr_weights = hyper_params['stream_lr_weights'] - use_private_hidden = hyper_params['use_private_hidden'] - - model_type = hyper_params['model_type'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) - - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - train_set_x, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_next_partition() - valid_set_x, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining - pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - - if model_type == 'DNN': - - dnn_model = MixtureDensityNetwork(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - hidden_activation=hidden_activation, - output_activation=output_activation, var_floor=var_floor, - n_component=mdn_component, - use_rprop=use_rprop, rprop_init_update=finetune_lr, - beta_opt=beta_opt, eff_sample_size=eff_sample_size, mean_log_det=mean_log_det) -# dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs = n_outs, -# l1_reg = l1_reg, l2_reg = l2_reg, -# hidden_layers_sizes = hidden_layers_sizes, -# hidden_activation = hidden_activation, -# output_activation = output_activation) - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - # We can't just unpickle the old model and use that because fine-tune functions - # depend on opt_l2e option used in construction of initial model. One way around this - # would be to unpickle, manually set unpickled_dnn_model.opt_l2e=True and then call - # unpickled_dnn_model.build_finetne_function() again. This is another way, construct - # new model from scratch with opt_l2e=True, then copy existing weights over: - - if start_from_trained_model != '_': - logger.info('load parameters from existing model: %s' % - (start_from_trained_model)) - if not os.path.isfile(start_from_trained_model): - sys.exit('Model file %s does not exist' % - (start_from_trained_model)) - existing_dnn_model = pickle.load(open(start_from_trained_model, 'rb')) - if not len(existing_dnn_model.params) == len(dnn_model.params): - sys.exit('Old and new models have different numbers of weight matrices') - for (old_weight, new_weight) in zip(existing_dnn_model.params, dnn_model.params): - old_val = old_weight.get_value() - new_val = new_weight.get_value() - if numpy.shape(old_val) == numpy.shape(new_val): - new_weight.set_value(old_val) - else: - sys.exit('old and new weight matrices have different shapes') - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.clock() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - previous_finetune_lr = finetune_lr - while (epoch < training_epochs): # training_epochs - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.clock() - - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - train_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - train_set_y.set_value(numpy.asarray( - temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - n_train_batches = train_set_x.get_value().shape[0] / batch_size - - logger.debug('this partition: %d frames (divided into %d batches of size %d)' % ( - train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size)) - - for minibatch_index in range(n_train_batches): - this_train_error = train_fn( - minibatch_index, current_finetune_lr, current_momentum) - train_error.append(this_train_error) - - if numpy.isnan(this_train_error): - logger.warning('training error over minibatch %d of %d was %s' % ( - minibatch_index+1, n_train_batches, this_train_error)) - - train_data_reader.reset() - - logger.debug('calculating validation loss') - validation_losses = valid_fn() - this_validation_loss = numpy.mean(validation_losses) - - # this has a possible bias if the minibatches were not all of identical size - # but it should not be siginficant if minibatches are small - this_train_valid_loss = numpy.mean(train_error) - - sub_end_time = time.clock() - - loss_difference = this_validation_loss - previous_loss - - logger.info('epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot('training convergence', title='Optimisation progress', - xlabel='training epochs', ylabel='objective function') - - if this_validation_loss < best_validation_loss: - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss - logger.debug('validation loss decreased, so saving model') - early_stop = 0 - else: - logger.debug('validation loss did not improve') - dbn = best_dnn_model - early_stop += 1 - - if early_stop >= early_stop_epoch: - # too many consecutive epochs without surpassing the best model - logger.debug('stopping early') - break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - end_time = time.clock() - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - -# visualize_dnn(dbn) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.parameter_prediction( - test_set_x=test_set_x) -# predicted_parameter = test_out() - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - -# multiple Gaussian components - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list, target_mean_vector, target_std_vector, out_dimension_dict, file_extension_dict): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - inf_float = -1.0e+10 - - plotlogger = logging.getLogger("plotting") - - gen_wav_features = ['mgc', 'lf0', 'bap'] - stream_start_index = {} - dimension_index = 0 - for feature_name in list(out_dimension_dict.keys()): - stream_start_index[feature_name] = dimension_index - dimension_index += out_dimension_dict[feature_name] - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - io_funcs = BinaryIOCollection() - - mlpg = MLParameterGenerationFast() - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - - frame_number = features.shape[0] - - test_set_x = theano.shared(numpy.asarray( - features, dtype=theano.config.floatX)) - - mean_matrix = numpy.tile(target_mean_vector, (features.shape[0], 1)) - std_matrix = numpy.tile(target_std_vector, (features.shape[0], 1)) - - predicted_mix = dnn_model.parameter_prediction_mix( - test_set_x=test_set_x) - max_index = numpy.argmax(predicted_mix, axis=1) - - temp_predicted_mu = dnn_model.parameter_prediction( - test_set_x=test_set_x) - temp_predicted_sigma = dnn_model.parameter_prediction_sigma( - test_set_x=test_set_x) - predicted_mu = numpy.zeros((temp_predicted_mu.shape[0], n_outs)) - predicted_sigma = numpy.zeros((temp_predicted_sigma.shape[0], n_outs)) - for kk in range(temp_predicted_mu.shape[0]): - predicted_mu[kk, :] = temp_predicted_mu[kk, - max_index[kk]*n_outs:(max_index[kk]+1)*n_outs] - predicted_sigma[kk, :] = temp_predicted_sigma[kk, - max_index[kk]*n_outs:(max_index[kk]+1)*n_outs] -# print predicted_mu.shape -# predicted_mu = predicted_mu[aa*n_outs:(aa+1)*n_outs] - predicted_mu = predicted_mu * std_matrix + mean_matrix - predicted_sigma = ((predicted_sigma ** 0.5) * std_matrix) ** 2 - - dir_name = os.path.dirname(out_file_list[i]) - file_id = os.path.splitext(os.path.basename(out_file_list[i]))[0] - - mlpg = MLParameterGenerationFast() - for feature_name in gen_wav_features: - current_features = predicted_mu[:, stream_start_index[feature_name] - :stream_start_index[feature_name]+out_dimension_dict[feature_name]] - current_sigma = predicted_sigma[:, stream_start_index[feature_name]:stream_start_index[feature_name]+out_dimension_dict[feature_name]] - - gen_features = mlpg.generation( - current_features, current_sigma, out_dimension_dict[feature_name]/3) - - if feature_name == 'lf0': - if 'vuv' in stream_start_index: - vuv_feature = predicted_mu[:, stream_start_index['vuv'] - :stream_start_index['vuv']+1] - for i in range(frame_number): - if vuv_feature[i, 0] < 0.5: - gen_features[i, 0] = inf_float -# print gen_features - new_file_name = os.path.join( - dir_name, file_id + file_extension_dict[feature_name]) - - io_funcs.array_to_binary_file(gen_features, new_file_name) - - -# generate bottleneck layer as festures -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_top_hidden_layer( - test_set_x=test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layers_sizes = cfg.hyper_params['hidden_layer_size'] - - # prepare environment - - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - nn_cmp_dir = os.path.join( - data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_norm_dir = os.path.join( - data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(cfg.work_dir, 'gen') - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = prepare_file_path_list( - file_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - # the number can be removed - binary_label_dir = os.path.join(label_data_dir, 'binary_label_'+suffix) - nn_label_dir = os.path.join(label_data_dir, 'nn_no_silence_lab_'+suffix) - nn_label_norm_dir = os.path.join( - label_data_dir, 'nn_no_silence_lab_norm_'+suffix) -# nn_label_norm_mvn_dir = os.path.join(data_dir, 'nn_no_silence_lab_norm_'+suffix) - - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - file_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - - # to do - sanity check the label dimension here? - - min_max_normaliser = None - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.NORMLAB and (cfg.label_style == 'HTS'): - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list) - - remover = SilenceRemover(n_cmp=lab_dim, silence_pattern=['*-#+*']) - remover.remove_silence(binary_label_file_list, - in_label_align_file_list, nn_label_file_list) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if cfg.NORMLAB and (cfg.label_style == 'composed'): - # new flexible label preprocessor - - logger.info( - 'preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, cfg.xpath_label_align_dir, cfg.utt_ext, False) - elif label_style == 'hts': - in_label_align_file_list['hts'] = prepare_file_path_list( - file_id_list, cfg.hts_label_align_dir, cfg.lab_ext, False) - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info( - 'making input label features for %4d of %4d' % (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % - required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # silence removal - if cfg.remove_silence_using_binary_labels: - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from label using silence feature: %s' % ( - label_composer.configuration.labels[silence_feature])) - logger.info('Silence will be removed from CMP files in same way') - # Binary labels have 2 roles: both the thing trimmed and the instructions for trimming: - trim_silence(binary_label_file_list, nn_label_file_list, lab_dim, - binary_label_file_list, lab_dim, silence_feature, percent_to_keep=5) - else: - logger.info('No silence removal done') - # start from the labels we have just produced, not trimmed versions - nn_label_file_list = binary_label_file_list - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if min_max_normaliser != None: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = [-0.5, 0.0, 0.5] - acc_win = [1.0, -2.0, 1.0] - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature, percent_to_keep=5) - - else: # back off to previous method using HTS labels: - remover = SilenceRemover( - n_cmp=cfg.cmp_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - nn_cmp_file_list, in_label_align_file_list, nn_cmp_file_list) # save to itself - - # save acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - if not os.path.exists(var_dir): - os.makedirs(var_dir) - - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - - normaliser.feature_normalisation( - nn_cmp_file_list, nn_cmp_norm_file_list) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim) - global_mean_vector = min_max_normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number]) - global_std_vector = min_max_normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - min_max_normaliser.find_min_max_values( - nn_cmp_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - # logger.debug(' value was\n%s' % cmp_norm_info) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_std_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - # logger.debug(' value was\n%s' % feature_std_vector) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number] - train_y_file_list = nn_cmp_norm_file_list[0:cfg.train_file_number] - valid_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - valid_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - # currently, there are two ways to do this - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension - - elif cfg.label_style == 'composed': - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layers_sizes)) - for hid_size in hidden_layers_sizes: - combined_model_arch += '_' + str(hid_size) - -# nnets_file_name = '%s/%s_%s_%d.%d.%d.%d.%d.train.%d.model' \ -# %(model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), -# len(hidden_layers_sizes), hidden_layers_sizes[0], -# lab_dim, cfg.cmp_dim, cfg.train_file_number) - - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.mdn.model' \ - % (model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number) - - # DNN model training - if cfg.TRAINDNN: - - logger.info('training DNN') - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - # print 'start DNN' - train_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, - mdn_component=cfg.mdn_component, var_floor=cfg.var_floor, - plot=cfg.plot, beta_opt=cfg.beta_opt, - eff_sample_size=cfg.eff_sample_size, mean_log_det=cfg.mean_log_det, - start_from_trained_model=cfg.start_from_trained_model) - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - # generate parameters from DNN - temp_dir_name = '%s_%s_%d_%d_%d_%d_%d_%d' \ - % (cfg.model_type, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layers_sizes), hidden_layers_sizes[0]) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - target_mean_vector = cmp_min_max[0, ] - target_std_vector = cmp_min_max[1, ] - -# dnn_generation(valid_x_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list) -# dnn_generation(test_x_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list) - dnn_generation(test_x_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list, - target_mean_vector, target_std_vector, cfg.out_dimension_dict, cfg.file_extension_dict) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - generate_wav(gen_dir, gen_file_id_list, cfg) # generated speech -# generate_wav(nn_cmp_dir, gen_file_id_list) # reference copy synthesis speech - - # evaluation: calculate distortion - if cfg.CALMCD: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - valid_bap_mse = valid_bap_mse / 10.0 - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - valid_f0_mse, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_vuv_error*100.)) - - # this can be removed - # - if 0: # to calculate distortion of HMM baseline - hmm_gen_no_silence_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nick_hmm_pf_2400_no_silence' - hmm_gen_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nick_hmm_pf_2400' - - if 1: - hmm_mgc_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.mgc_ext) - hmm_bap_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.bap_ext) - hmm_lf0_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.lf0_ext) - - hmm_mgc_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.mgc_ext) - hmm_bap_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.bap_ext) - hmm_lf0_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_mgc_list, in_gen_label_align_file_list, hmm_mgc_no_silence_list) - - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_bap_list, in_gen_label_align_file_list, hmm_bap_no_silence_list) - - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_lf0_list, in_gen_label_align_file_list, hmm_lf0_no_silence_list) - - calculator = IndividualDistortionComp() - - spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.mgc_ext, cfg.mgc_dim) - bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.bap_ext, cfg.bap_dim) - f0_mse, vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.lf0_ext, cfg.lf0_dim) - - spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - bap_mse = bap_mse / 10.0 - - logger.info('Develop: HMM -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' % - (spectral_distortion, bap_mse, f0_mse, vuv_error*100.)) - - spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.mgc_ext, cfg.mgc_dim) - bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.bap_ext, cfg.bap_dim) - f0_mse, vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.lf0_ext, cfg.lf0_dim) - - spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - bap_mse = bap_mse / 10.0 - - logger.info('Test : HMM -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' % - (spectral_distortion, bap_mse, f0_mse, vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_dnn.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - - sys.exit(0) - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -#import gnumpy as gnp -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer -from frontend.label_modifier import HTSLabelModification -#from frontend.mlpg_fast import MLParameterGenerationFast - -#from frontend.mlpg_fast_layer import MLParameterGenerationFastLayer - - -import configuration -from models.deep_rnn import DeepRecurrentNetwork - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - -from io_funcs.binary_io import BinaryIOCollection - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) # including input and output - plotlogger = logging.getLogger("plotting") - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) + '_' + dnn.params[i].name - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - aa = dnn.params[i].get_value(borrow=True).T - print(aa.shape, aa.size) - if aa.size > aa.shape[0]: - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def load_covariance(var_file_dict, out_dimension_dict): - var = {} - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - - var_values = numpy.reshape( - var_values, (out_dimension_dict[feature_name], 1)) - - var[feature_name] = var_values - - return var - - -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None, - cmp_mean_vector=None, cmp_std_vector=None, init_dnn_model_file=None, seq_dur_file_list=None): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layer_size = hyper_params['hidden_layer_size'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - model_type = hyper_params['model_type'] - hidden_layer_type = hyper_params['hidden_layer_type'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - - sequential_training = hyper_params['sequential_training'] - dropout_rate = hyper_params['dropout_rate'] - -# sequential_training = True - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - if cfg.network_type != 'S2S': - seq_dur_file_list = None - - if not seq_dur_file_list: - train_dur_file_list = None - valid_dur_file_list = None - else: - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, subphone_feats="coarse_coding") - train_dur_file_list = seq_dur_file_list[0:cfg.train_file_number] - valid_dur_file_list = seq_dur_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - - logger.debug('Creating training data provider') - train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, dur_file_list=train_dur_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, sequential=sequential_training, network_type=cfg.network_type, shuffle=True) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, dur_file_list=valid_dur_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, sequential=sequential_training, network_type=cfg.network_type, shuffle=False) - - if cfg.network_type == 'S2S': - shared_train_set_xyd, temp_train_set_x, temp_train_set_y, temp_train_set_d = train_data_reader.load_one_partition() - shared_valid_set_xyd, temp_valid_set_x, temp_valid_set_y, temp_valid_set_d = valid_data_reader.load_one_partition() - train_set_x, train_set_y, train_set_d = shared_train_set_xyd - valid_set_x, valid_set_y, valid_set_d = shared_valid_set_xyd - - temp_train_set_f = label_normaliser.extract_durational_features( - dur_data=temp_train_set_d) - temp_valid_set_f = label_normaliser.extract_durational_features( - dur_data=temp_valid_set_d) - train_set_f = theano.shared(numpy.asarray( - temp_train_set_f, dtype=theano.config.floatX), name='f', borrow=True) - valid_set_f = theano.shared(numpy.asarray( - temp_valid_set_f, dtype=theano.config.floatX), name='f', borrow=True) - else: - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition() - train_set_x, train_set_y = shared_train_set_xy - valid_set_x, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining -# pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - dnn_model = DeepRecurrentNetwork(n_in=n_ins, hidden_layer_size=hidden_layer_size, n_out=n_outs, - L1_reg=l1_reg, L2_reg=l2_reg, hidden_layer_type=hidden_layer_type, output_type=cfg.output_layer_type, network_type=cfg.network_type, dropout_rate=dropout_rate) - if cfg.network_type == 'S2S': - train_fn, valid_fn = dnn_model.build_finetune_functions_S2SPF( - (train_set_x, train_set_y, train_set_d, train_set_f), (valid_set_x, valid_set_y, valid_set_d, valid_set_f)) - else: - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y)) # , batch_size=batch_size - - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.time() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - -# finetune_lr = 0.000125 - previous_finetune_lr = finetune_lr - - print(finetune_lr) - - while (epoch < training_epochs): - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.time() - - while (not train_data_reader.is_finish()): - - if cfg.network_type == 'S2S': - shared_train_set_xyd, temp_train_set_x, temp_train_set_y, temp_train_set_d = train_data_reader.load_one_partition() - temp_train_set_f = label_normaliser.extract_durational_features( - dur_data=temp_train_set_d) - train_set_d.set_value(numpy.asarray( - temp_train_set_d, dtype='int32'), borrow=True) - train_set_f.set_value(numpy.asarray( - temp_train_set_f, dtype=theano.config.floatX), borrow=True) - else: - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() - - # if sequential training, the batch size will be the number of frames in an utterance - - if sequential_training == True: - #batch_size = temp_train_set_x.shape[0] - - train_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - train_set_y.set_value(numpy.asarray( - temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - this_train_error = train_fn( - current_finetune_lr, current_momentum) - train_error.append(this_train_error) - # print train_set_x.eval().shape, train_set_y.eval().shape, this_train_error - - else: - n_train_batches = temp_train_set_x.shape[0] / batch_size - for index in range(n_train_batches): - # send a batch to the shared variable, rather than pass the batch size and batch index to the finetune function - train_set_x.set_value(numpy.asarray(temp_train_set_x[index*batch_size:( - index + 1)*batch_size], dtype=theano.config.floatX), borrow=True) - train_set_y.set_value(numpy.asarray(temp_train_set_y[index*batch_size:( - index + 1)*batch_size], dtype=theano.config.floatX), borrow=True) - - this_train_error = train_fn( - current_finetune_lr, current_momentum) - train_error.append(this_train_error) - - train_data_reader.reset() - - logger.debug('calculating validation loss') - validation_losses = [] - while (not valid_data_reader.is_finish()): - - if cfg.network_type == 'S2S': - shared_valid_set_xyd, temp_valid_set_x, temp_valid_set_y, temp_valid_set_d = valid_data_reader.load_one_partition() - temp_valid_set_f = label_normaliser.extract_durational_features( - dur_data=temp_valid_set_d) - valid_set_d.set_value(numpy.asarray( - temp_valid_set_d, dtype='int32'), borrow=True) - valid_set_f.set_value(numpy.asarray( - temp_valid_set_f, dtype=theano.config.floatX), borrow=True) - else: - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition() - - valid_set_x.set_value(numpy.asarray( - temp_valid_set_x, dtype=theano.config.floatX), borrow=True) - valid_set_y.set_value(numpy.asarray( - temp_valid_set_y, dtype=theano.config.floatX), borrow=True) - - this_valid_loss = valid_fn() - - validation_losses.append(this_valid_loss) - valid_data_reader.reset() - - this_validation_loss = numpy.mean(validation_losses) - - this_train_valid_loss = numpy.mean(numpy.asarray(train_error)) - - sub_end_time = time.time() - - loss_difference = this_validation_loss - previous_loss - - logger.info('epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - if epoch > 5: - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss -# logger.debug('validation loss decreased, so saving model') - - if this_validation_loss >= previous_loss: - logger.debug('validation loss increased') - -# dbn = best_dnn_model - early_stop += 1 - - if epoch > 15 and early_stop > early_stop_epoch: - logger.debug('stopping early') - break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - end_time = time.time() -# cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_x = features.reshape((-1, n_ins)) - - predicted_parameter = dnn_model.parameter_prediction(test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def dnn_generation_S2S(valid_file_list, valid_dur_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, subphone_feats="coarse_coding") - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_x = features.reshape((-1, n_ins)) - - fid_lab = open(valid_dur_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - test_set_d = features.astype(numpy.int32) - - dur_features = label_normaliser.extract_durational_features( - dur_data=test_set_d) - test_set_f = dur_features.astype(numpy.float32) - - predicted_parameter = dnn_model.parameter_prediction_S2SPF( - test_set_x, test_set_d, test_set_f) - - # print b_indices - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def dnn_generation_lstm(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - visualize_dnn(dnn_model) - - file_number = len(valid_file_list) - - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_x = features.reshape((-1, n_ins)) - - predicted_parameter = dnn_model.parameter_prediction_lstm(test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - -# generate bottleneck layer as festures - - -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_top_hidden_layer( - test_set_x=test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layer_size = cfg.hyper_params['hidden_layer_size'] - - # prepare environment - - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - nn_cmp_dir = os.path.join( - data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_norm_dir = os.path.join( - data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(cfg.work_dir, 'gen') - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = prepare_file_path_list( - file_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, add_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - # the number can be removed - binary_label_dir = os.path.join(label_data_dir, 'binary_label_'+suffix) - nn_label_dir = os.path.join(label_data_dir, 'nn_no_silence_lab_'+suffix) - nn_label_norm_dir = os.path.join( - label_data_dir, 'nn_no_silence_lab_norm_'+suffix) - - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - file_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - dur_file_list = prepare_file_path_list( - file_id_list, cfg.in_dur_dir, cfg.dur_ext) - seq_dur_file_list = prepare_file_path_list( - file_id_list, cfg.in_seq_dur_dir, cfg.dur_ext) - lf0_file_list = prepare_file_path_list( - file_id_list, cfg.in_lf0_dir, cfg.lf0_ext) - - # to do - sanity check the label dimension here? - - min_max_normaliser = None - label_norm_file = 'label_norm_%s_%d.dat' % (cfg.label_style, lab_dim) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.GenTestList: - try: - test_id_list = read_file_list(cfg.test_id_scp) - logger.debug('Loaded file id list from %s' % cfg.test_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.test_id_scp) - raise - - in_label_align_file_list = prepare_file_path_list( - test_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - test_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - test_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - test_id_list, nn_label_norm_dir, cfg.lab_ext) - - if cfg.NORMLAB and (cfg.label_style == 'HTS'): - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list, label_type=cfg.label_type) - - remover = SilenceRemover(n_cmp=lab_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type, - remove_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - remover.remove_silence(binary_label_file_list, - in_label_align_file_list, nn_label_file_list) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - if cfg.GenTestList: - min_max_normaliser.load_min_max_values(label_norm_file) - else: - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - ### make duration data for S2S network ### - if cfg.network_type == "S2S": - logger.info('creating duration (input) features for S2S network') - label_normaliser.prepare_dur_data( - in_label_align_file_list, seq_dur_file_list, feature_type="numerical", unit_size="phoneme") - - if cfg.remove_silence_from_dur: - remover = SilenceRemover( - n_cmp=cfg.seq_dur_dim, silence_pattern=cfg.silence_pattern, remove_frame_features=cfg.add_frame_features) - remover.remove_silence( - seq_dur_file_list, in_label_align_file_list, seq_dur_file_list) - - if cfg.NORMLAB and (cfg.label_style == 'composed'): - # new flexible label preprocessor - - logger.info( - 'preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, cfg.xpath_label_align_dir, cfg.utt_ext, False) - elif label_style == 'hts': - in_label_align_file_list['hts'] = prepare_file_path_list( - file_id_list, cfg.hts_label_align_dir, cfg.lab_ext, False) - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info( - 'making input label features for %4d of %4d' % (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % - required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # silence removal - if cfg.remove_silence_using_binary_labels: - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from label using silence feature: %s' % ( - label_composer.configuration.labels[silence_feature])) - logger.info('Silence will be removed from CMP files in same way') - # Binary labels have 2 roles: both the thing trimmed and the instructions for trimming: - trim_silence(binary_label_file_list, nn_label_file_list, lab_dim, - binary_label_file_list, lab_dim, silence_feature) - else: - logger.info('No silence removal done') - # start from the labels we have just produced, not trimmed versions - nn_label_file_list = binary_label_file_list - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if min_max_normaliser != None and not cfg.GenTestList: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output duration data - if cfg.MAKEDUR: - logger.info('creating duration (output) features') - feature_type = cfg.dur_feature_type - label_normaliser.prepare_dur_data( - in_label_align_file_list, dur_file_list, feature_type) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = cfg.delta_win # [-0.5, 0.0, 0.5] - acc_win = cfg.acc_win # [1.0, -2.0, 1.0] - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - if 'dur' in list(cfg.in_dir_dict.keys()) and cfg.AcousticModel: - acoustic_worker.make_equal_frames( - dur_file_list, lf0_file_list, cfg.in_dimension_dict) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature) - - else: # back off to previous method using HTS labels: - remover = SilenceRemover(n_cmp=cfg.cmp_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type, - remove_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - remover.remove_silence(nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number], - in_label_align_file_list[0:cfg.train_file_number + - cfg.valid_file_number], - nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number]) # save to itself - - # save acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - if not os.path.exists(var_dir): - os.makedirs(var_dir) - - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - - normaliser.feature_normalisation(nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number], - nn_cmp_norm_file_list[0:cfg.train_file_number+cfg.valid_file_number]) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim) - global_mean_vector = min_max_normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number]) - global_std_vector = min_max_normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - min_max_normaliser.find_min_max_values( - nn_cmp_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_var_vector = feature_std_vector**2 - feature_var_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number] - train_y_file_list = nn_cmp_norm_file_list[0:cfg.train_file_number] - valid_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - valid_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - # currently, there are two ways to do this - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, add_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - - elif cfg.label_style == 'composed': - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layer_size)) - for hid_size in hidden_layer_size: - combined_model_arch += '_' + str(hid_size) - - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.%f.rnn.model' \ - % (model_dir, cfg.combined_model_name, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number, cfg.hyper_params['learning_rate']) - - # DNN model training - if cfg.TRAINDNN: - - var_dict = load_covariance(var_file_dict, cfg.out_dimension_dict) - - logger.info('training DNN') - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_mean_vector = cmp_min_max[0, ] - cmp_std_vector = cmp_min_max[1, ] - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - train_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot, var_dict=var_dict, - cmp_mean_vector=cmp_mean_vector, cmp_std_vector=cmp_std_vector, seq_dur_file_list=seq_dur_file_list) - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - if cfg.GENBNFEA: - ''' - Please only tune on this step when you want to generate bottleneck features from DNN - ''' - temp_dir_name = '%s_%s_%d_%d_%d_%d_%s_hidden' \ - % (cfg.model_type, cfg.combined_feature_name, - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layers_sizes), combined_model_arch) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - bottleneck_size = min(hidden_layers_sizes) - bottleneck_index = 0 - for i in range(len(hidden_layers_sizes)): - if hidden_layers_sizes(i) == bottleneck_size: - bottleneck_index = i - - logger.info('generating bottleneck features from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_id_list = file_id_list[0:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_d_file_list = seq_dur_file_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - - dnn_hidden_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list, bottleneck_index) - - # generate parameters from DNN - temp_dir_name = '%s_%s_%d_%d_%d_%d_%d_%d_%d' \ - % (cfg.combined_model_name, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layer_size), hidden_layer_size[0], hidden_layer_size[-1]) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_d_file_list = seq_dur_file_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - - if cfg.GenTestList: - gen_file_id_list = test_id_list - test_x_file_list = nn_label_norm_file_list - test_d_file_list = seq_dur_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - if cfg.network_type == "S2S": - dnn_generation_S2S(test_x_file_list, test_d_file_list, - nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list) - else: - dnn_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - if cfg.AcousticModel: - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration( - gen_wav_features=cfg.gen_wav_features) - generator.acoustic_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict, var_file_dict, do_MLPG=cfg.do_MLPG) - - if cfg.DurationModel: - ### Perform duration normalization(min. state dur set to 1) ### - gen_dur_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.dur_ext) - gen_label_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.lab_ext) - in_gen_label_align_file_list = prepare_file_path_list( - gen_file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - - generator = ParameterGeneration( - gen_wav_features=cfg.gen_wav_features) - generator.duration_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict) - - label_modifier = HTSLabelModification( - silence_pattern=cfg.silence_pattern) - label_modifier.modify_duration_labels( - in_gen_label_align_file_list, gen_dur_list, gen_label_list) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - print(len(gen_file_id_list)) - generate_wav(gen_dir, gen_file_id_list, cfg) # generated speech -# generate_wav(nn_cmp_dir, gen_file_id_list, cfg) # reference copy synthesis speech - - ### setting back to original conditions before calculating objective scores ### - if cfg.GenTestList: - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - - # evaluation: RMSE and CORR for duration - if cfg.CALMCD and cfg.DurationModel: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_dur_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.dur_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['dur'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_dur_list, cfg.dur_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover(n_cmp=cfg.dur_dim, silence_pattern=cfg.silence_pattern, - label_type=cfg.label_type, remove_frame_features=cfg.add_frame_features) - remover.remove_silence(in_file_list_dict['dur'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_dur_list) - - valid_dur_rmse, valid_dur_corr = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.dur_ext, cfg.dur_dim) - test_dur_rmse, test_dur_corr = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.dur_ext, cfg.dur_dim) - - logger.info('Develop: DNN -- RMSE: %.3f frames/phoneme; CORR: %.3f; ' - % (valid_dur_rmse, valid_dur_corr)) - logger.info('Test: DNN -- RMSE: %.3f frames/phoneme; CORR: %.3f; ' - % (test_dur_rmse, test_dur_corr)) - - # evaluation: calculate distortion - if cfg.CALMCD and cfg.AcousticModel: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - valid_bap_mse = valid_bap_mse / 10.0 - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=cfg.silence_pattern, label_type=cfg.label_type) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - valid_f0_mse, valid_f0_corr, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_f0_corr, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_f0_corr, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_f0_corr, test_vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_dnn.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - -# if gnp._boardId is not None: -# import gpu_lock -# gpu_lock.free_lock(gnp._boardId) - - sys.exit(0) - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -#import gnumpy as gnp -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer -from frontend.label_modifier import HTSLabelModification -#from frontend.mlpg_fast import MLParameterGenerationFast - -#from frontend.mlpg_fast_layer import MLParameterGenerationFastLayer - - -import configuration -from models.deep_rnn import DeepRecurrentNetwork -from models.sdae import StackedDenoiseAutoEncoder - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - -from io_funcs.binary_io import BinaryIOCollection - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) # including input and output - plotlogger = logging.getLogger("plotting") - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) + '_' + dnn.params[i].name - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - aa = dnn.params[i].get_value(borrow=True).T - print(aa.shape, aa.size) - if aa.size > aa.shape[0]: - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def load_covariance(var_file_dict, out_dimension_dict): - var = {} - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - - var_values = numpy.reshape( - var_values, (out_dimension_dict[feature_name], 1)) - - var[feature_name] = var_values - - return var - - -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None, - cmp_mean_vector=None, cmp_std_vector=None, init_dnn_model_file=None): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layer_size = hyper_params['hidden_layer_size'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - model_type = hyper_params['model_type'] - hidden_layer_type = hyper_params['hidden_layer_type'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - - sequential_training = hyper_params['sequential_training'] - dropout_rate = hyper_params['dropout_rate'] - -# sequential_training = True - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, sequential=sequential_training, shuffle=True) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, sequential=sequential_training, shuffle=False) - - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() - train_set_x, train_set_y = shared_train_set_xy - # validation data is still read block by block - shared_valid_set_xy, valid_set_x, valid_set_y = valid_data_reader.load_one_partition() - valid_set_x, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining -# pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - dnn_model = DeepRecurrentNetwork(n_in=n_ins, hidden_layer_size=hidden_layer_size, n_out=n_outs, - L1_reg=l1_reg, L2_reg=l2_reg, hidden_layer_type=hidden_layer_type, dropout_rate=dropout_rate) - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y)) # , batch_size=batch_size - elif model_type == 'SDAE': - dnn_model = StackedDenoiseAutoEncoder(n_in=n_ins, hidden_layer_size=hidden_layer_size, n_out=n_outs, - L1_reg=l1_reg, L2_reg=l2_reg, hidden_layer_type=hidden_layer_type, dropout_rate=dropout_rate) - - if do_pretraining: - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining - pretrain_set_x = train_set_x - pretraining_fn = dnn_model.pretraining_functions(pretrain_set_x) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y)) # , batch_size=batch_size - - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - # if pretraining is supported more than one model, add the switch here - # be careful to use autoencoder for pretraining here: - if do_pretraining and model_type == 'SDAE': - logger.info('pretraining the %s model' % (model_type)) - - corruption_level = 0.0 - # in SDAE we do layer-wise pretraining using autoencoders - for i in range(dnn_model.n_layers): - for epoch in range(pretraining_epochs): - sub_start_time = time.clock() - - pretrain_loss = [] - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() - # if sequential training, the batch size will be the number of frames in an utterance - if sequential_training == True: - batch_size = temp_train_set_x.shape[0] - - n_train_batches = temp_train_set_x.shape[0] / batch_size - for index in range(n_train_batches): - # send a batch to the shared variable, rather than pass the batch size and batch index to the finetune function - pretrain_set_x.set_value(numpy.asarray(temp_train_set_x[index*batch_size:( - index + 1)*batch_size], dtype=theano.config.floatX), borrow=True) - - pretrain_loss.append(pretraining_fn[i](corruption=corruption_level, - learning_rate=pretraining_lr)) - - sub_end_time = time.clock() - logger.info('Pre-training layer %i, epoch %d, cost %s, time spent%.2f' % - (i+1, epoch+1, numpy.mean(pretrain_loss), (sub_end_time - sub_start_time))) - train_data_reader.reset() - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.time() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - -# finetune_lr = 0.000125 - previous_finetune_lr = finetune_lr - - print(finetune_lr) - - while (epoch < training_epochs): - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.time() - - while (not train_data_reader.is_finish()): - - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() -# train_set_x.set_value(numpy.asarray(temp_train_set_x, dtype=theano.config.floatX), borrow=True) -# train_set_y.set_value(numpy.asarray(temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - # if sequential training, the batch size will be the number of frames in an utterance - if sequential_training == True: - batch_size = temp_train_set_x.shape[0] - - n_train_batches = temp_train_set_x.shape[0] / batch_size - for index in range(n_train_batches): - # send a batch to the shared variable, rather than pass the batch size and batch index to the finetune function - train_set_x.set_value(numpy.asarray(temp_train_set_x[index*batch_size:( - index + 1)*batch_size], dtype=theano.config.floatX), borrow=True) - train_set_y.set_value(numpy.asarray(temp_train_set_y[index*batch_size:( - index + 1)*batch_size], dtype=theano.config.floatX), borrow=True) - - this_train_error = train_fn( - current_finetune_lr, current_momentum) - - train_error.append(this_train_error) - - train_data_reader.reset() - - logger.debug('calculating validation loss') - validation_losses = [] - while (not valid_data_reader.is_finish()): - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_one_partition() - valid_set_x.set_value(numpy.asarray( - temp_valid_set_x, dtype=theano.config.floatX), borrow=True) - valid_set_y.set_value(numpy.asarray( - temp_valid_set_y, dtype=theano.config.floatX), borrow=True) - - this_valid_loss = valid_fn() - - validation_losses.append(this_valid_loss) - valid_data_reader.reset() - - this_validation_loss = numpy.mean(validation_losses) - - this_train_valid_loss = numpy.mean(numpy.asarray(train_error)) - - sub_end_time = time.time() - - loss_difference = this_validation_loss - previous_loss - - logger.info('epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - if epoch > 5: - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss -# logger.debug('validation loss decreased, so saving model') - - if this_validation_loss >= previous_loss: - logger.debug('validation loss increased') - -# dbn = best_dnn_model - early_stop += 1 - - if epoch > 15 and early_stop > early_stop_epoch: - logger.debug('stopping early') - break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - end_time = time.time() -# cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_x = features.reshape((-1, n_ins)) - - predicted_parameter = dnn_model.parameter_prediction(test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def dnn_generation_lstm(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - visualize_dnn(dnn_model) - - file_number = len(valid_file_list) - - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_x = features.reshape((-1, n_ins)) - - predicted_parameter = dnn_model.parameter_prediction_lstm(test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - -# generate bottleneck layer as festures - - -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_top_hidden_layer( - test_set_x=test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layer_size = cfg.hyper_params['hidden_layer_size'] - - # prepare environment - - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - nn_cmp_dir = os.path.join( - data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_norm_dir = os.path.join( - data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(cfg.work_dir, 'gen') - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = prepare_file_path_list( - file_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, add_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - # the number can be removed - binary_label_dir = os.path.join(label_data_dir, 'binary_label_'+suffix) - nn_label_dir = os.path.join(label_data_dir, 'nn_no_silence_lab_'+suffix) - nn_label_norm_dir = os.path.join( - label_data_dir, 'nn_no_silence_lab_norm_'+suffix) - - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - file_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - dur_file_list = prepare_file_path_list( - file_id_list, cfg.in_dur_dir, cfg.dur_ext) - lf0_file_list = prepare_file_path_list( - file_id_list, cfg.in_lf0_dir, cfg.lf0_ext) - - # to do - sanity check the label dimension here? - - min_max_normaliser = None - label_norm_file = 'label_norm_%s_%d.dat' % (cfg.label_style, lab_dim) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.GenTestList: - try: - test_id_list = read_file_list(cfg.test_id_scp) - logger.debug('Loaded file id list from %s' % cfg.test_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.test_id_scp) - raise - - in_label_align_file_list = prepare_file_path_list( - test_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - test_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - test_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - test_id_list, nn_label_norm_dir, cfg.lab_ext) - - if cfg.NORMLAB and (cfg.label_style == 'HTS'): - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list) - - remover = SilenceRemover(n_cmp=lab_dim, silence_pattern=cfg.silence_pattern, - remove_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - remover.remove_silence(binary_label_file_list, - in_label_align_file_list, nn_label_file_list) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - if cfg.GenTestList: - min_max_normaliser.load_min_max_values(label_norm_file) - else: - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if cfg.NORMLAB and (cfg.label_style == 'composed'): - # new flexible label preprocessor - - logger.info( - 'preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, cfg.xpath_label_align_dir, cfg.utt_ext, False) - elif label_style == 'hts': - in_label_align_file_list['hts'] = prepare_file_path_list( - file_id_list, cfg.hts_label_align_dir, cfg.lab_ext, False) - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info( - 'making input label features for %4d of %4d' % (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % - required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # silence removal - if cfg.remove_silence_using_binary_labels: - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from label using silence feature: %s' % ( - label_composer.configuration.labels[silence_feature])) - logger.info('Silence will be removed from CMP files in same way') - # Binary labels have 2 roles: both the thing trimmed and the instructions for trimming: - trim_silence(binary_label_file_list, nn_label_file_list, lab_dim, - binary_label_file_list, lab_dim, silence_feature) - else: - logger.info('No silence removal done') - # start from the labels we have just produced, not trimmed versions - nn_label_file_list = binary_label_file_list - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if min_max_normaliser != None and not cfg.GenTestList: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output duration data - if cfg.MAKEDUR: - logger.info('creating duration (output) features') - feature_type = cfg.dur_feature_type - label_normaliser.prepare_dur_data( - in_label_align_file_list, dur_file_list, feature_type) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = cfg.delta_win # [-0.5, 0.0, 0.5] - acc_win = cfg.acc_win # [1.0, -2.0, 1.0] - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - if 'dur' in list(cfg.in_dir_dict.keys()) and cfg.AcousticModel: - acoustic_worker.make_equal_frames( - dur_file_list, lf0_file_list, cfg.in_dimension_dict) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature) - - else: # back off to previous method using HTS labels: - remover = SilenceRemover(n_cmp=cfg.cmp_dim, silence_pattern=cfg.silence_pattern, - remove_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - remover.remove_silence(nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number], - in_label_align_file_list[0:cfg.train_file_number + - cfg.valid_file_number], - nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number]) # save to itself - - # save acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - if not os.path.exists(var_dir): - os.makedirs(var_dir) - - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - - normaliser.feature_normalisation(nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number], - nn_cmp_norm_file_list[0:cfg.train_file_number+cfg.valid_file_number]) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim) - global_mean_vector = min_max_normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number]) - global_std_vector = min_max_normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - min_max_normaliser.find_min_max_values( - nn_cmp_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_std_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number] - train_y_file_list = nn_cmp_norm_file_list[0:cfg.train_file_number] - valid_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - valid_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - # currently, there are two ways to do this - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name, add_frame_features=cfg.add_frame_features, subphone_feats=cfg.subphone_feats) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - - elif cfg.label_style == 'composed': - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layer_size)) - for hid_size in hidden_layer_size: - combined_model_arch += '_' + str(hid_size) - - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.%f.rnn.model' \ - % (model_dir, cfg.combined_model_name, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number, cfg.hyper_params['learning_rate']) - - # DNN model training - if cfg.TRAINDNN: - - var_dict = load_covariance(var_file_dict, cfg.out_dimension_dict) - - logger.info('training DNN') - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_mean_vector = cmp_min_max[0, ] - cmp_std_vector = cmp_min_max[1, ] - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - train_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot, var_dict=var_dict, - cmp_mean_vector=cmp_mean_vector, cmp_std_vector=cmp_std_vector) - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - if cfg.GENBNFEA: - ''' - Please only tune on this step when you want to generate bottleneck features from DNN - ''' - temp_dir_name = '%s_%s_%d_%d_%d_%d_%s_hidden' \ - % (cfg.model_type, cfg.combined_feature_name, - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layers_sizes), combined_model_arch) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - bottleneck_size = min(hidden_layers_sizes) - bottleneck_index = 0 - for i in range(len(hidden_layers_sizes)): - if hidden_layers_sizes(i) == bottleneck_size: - bottleneck_index = i - - logger.info('generating bottleneck features from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_id_list = file_id_list[0:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - - dnn_hidden_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list, bottleneck_index) - - # generate parameters from DNN - temp_dir_name = '%s_%s_%d_%d_%d_%d_%d_%d_%d' \ - % (cfg.combined_model_name, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layer_size), hidden_layer_size[0], hidden_layer_size[-1]) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.GenTestList: - gen_file_id_list = test_id_list - test_x_file_list = nn_label_norm_file_list - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - dnn_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - if cfg.AcousticModel: - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration( - gen_wav_features=cfg.gen_wav_features) - generator.acoustic_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict, var_file_dict, do_MLPG=cfg.do_MLPG) - - if cfg.DurationModel: - ### Perform duration normalization(min. state dur set to 1) ### - gen_dur_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.dur_ext) - gen_label_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.lab_ext) - in_gen_label_align_file_list = prepare_file_path_list( - gen_file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - - generator = ParameterGeneration( - gen_wav_features=cfg.gen_wav_features) - generator.duration_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict) - - label_modifier = HTSLabelModification( - silence_pattern=cfg.silence_pattern) - label_modifier.modify_duration_labels( - in_gen_label_align_file_list, gen_dur_list, gen_label_list) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - print(len(gen_file_id_list)) - generate_wav(gen_dir, gen_file_id_list, cfg) # generated speech -# generate_wav(nn_cmp_dir, gen_file_id_list, cfg) # reference copy synthesis speech - - ### setting back to original conditions before calculating objective scores ### - if cfg.GenTestList: - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - - # evaluation: RMSE and CORR for duration - if cfg.CALMCD and cfg.DurationModel: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_dur_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.dur_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['dur'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_dur_list, cfg.dur_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.dur_dim, silence_pattern=cfg.silence_pattern, remove_frame_features=cfg.add_frame_features) - remover.remove_silence(in_file_list_dict['dur'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_dur_list) - - valid_dur_rmse, valid_dur_corr = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.dur_ext, cfg.dur_dim) - test_dur_rmse, test_dur_corr = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.dur_ext, cfg.dur_dim) - - logger.info('Develop: DNN -- RMSE: %.3f frames/phoneme; CORR: %.3f; ' - % (valid_dur_rmse, valid_dur_corr)) - logger.info('Test: DNN -- RMSE: %.3f frames/phoneme; CORR: %.3f; ' - % (test_dur_rmse, test_dur_corr)) - - # evaluation: calculate distortion - if cfg.CALMCD and cfg.AcousticModel: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - valid_bap_mse = valid_bap_mse / 10.0 - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - valid_f0_mse, valid_f0_corr, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_f0_corr, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_f0_corr, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0:- RMSE: %.3f Hz; CORR: %.3f; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_f0_corr, test_vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_dnn.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - -# if gnp._boardId is not None: -# import gpu_lock -# gpu_lock.free_lock(gnp._boardId) - - sys.exit(0) - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -#import gnumpy as gnp -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano -#import theano.tensor as T - - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, HTSDurationLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer -#from frontend.mlpg_fast import MLParameterGenerationFast - -#from frontend.mlpg_fast_layer import MLParameterGenerationFastLayer - - -import configuration -from models.deep_rnn import DeepRecurrentNetwork - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - -from io_funcs.binary_io import BinaryIOCollection - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -def store_network(nnets_file_name, outdir): - print('store network') - - if not os.path.isdir(outdir): - os.makedirs(outdir) - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - names = [p.name for p in dnn_model.params] - param_vals = [p.get_value(borrow=True) for p in dnn_model.params] - shapes = [numpy.shape(p) for p in param_vals] - print(cfg.hidden_layer_size) - layer_types = cfg.hidden_layer_type - if cfg.output_activation == 'linear': - layer_types.append('LINEAR') - else: - sys.exit('unsupported output activation') - assert len(param_vals) == len(layer_types) * 2 # W and b for each layer - print(names) - - p_ix = 0 - for (l_ix, layer_type) in enumerate(layer_types): - layer_name = 'LAYER_' + str(l_ix+1).zfill(3) + '_' + layer_type + '_' - # print layer_name - for part in ['W', 'b']: - assert names[p_ix] == part - fname = layer_name + part - print(fname) - #numpy.savetxt(os.path.join(outdir, fname + '.txt'), param_vals[p_ix]) - numpy.save(os.path.join(outdir, fname + '.npy'), param_vals[p_ix]) - - p_ix += 1 - - # Input normalisation:- - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = cfg.data_dir - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - lab_norm_data = numpy.fromfile(label_norm_file, 'float32') - labsize = numpy.shape(lab_norm_data)[0] - - min_vect = lab_norm_data[:(labsize/2)] - max_vect = lab_norm_data[(labsize/2):] - - print(min_vect) - print(max_vect) - - fname = 'NORM_INPUT_MIN' - numpy.save(os.path.join(outdir, fname + '.npy'), min_vect) - fname = 'NORM_INPUT_MAX' - numpy.save(os.path.join(outdir, fname + '.npy'), max_vect) - - # output norm - assert cfg.output_feature_normalisation == 'MVN' - norm_info_file = os.path.join(cfg.data_dir, - 'norm_info' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - out_norm_data = numpy.fromfile(norm_info_file, 'float32') - outsize = numpy.shape(out_norm_data)[0] - - mean_vect = out_norm_data[:(outsize/2)] - std_vect = out_norm_data[(outsize/2):] - - print(mean_vect) - print(std_vect) - - fname = 'NORM_OUTPUT_MEAN' - numpy.save(os.path.join(outdir, fname + '.npy'), mean_vect) - fname = 'NORM_OUTPUT_STD' - numpy.save(os.path.join(outdir, fname + '.npy'), std_vect) - - in_streams = list(cfg.in_dimension_dict.keys()) - indims = [str(cfg.in_dimension_dict[s]) for s in in_streams] - out_streams = list(cfg.out_dimension_dict.keys()) - outdims = [str(cfg.out_dimension_dict[s]) for s in out_streams] - - f = open(os.path.join(outdir, 'stream_info.txt'), 'w') - f.write(' '.join(in_streams) + '\n') - f.write(' '.join(indims) + '\n') - f.write(' '.join(out_streams) + '\n') - f.write(' '.join(outdims) + '\n') - f.close() - - -def main_function(cfg, outdir, model_pickle_file=None): - - hidden_layer_size = cfg.hyper_params['hidden_layer_size'] - data_dir = cfg.data_dir - model_dir = os.path.join(cfg.work_dir, 'nnets_model') -# norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - print(('Input label dimension is %d' % lab_dim)) - suffix = str(lab_dim) - elif cfg.label_style == 'HTS_duration': - label_normaliser = HTSDurationLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension # + cfg.appended_input_dim - print(('Input label dimension is %d' % lab_dim)) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - combined_model_arch = str(len(hidden_layer_size)) - for hid_size in hidden_layer_size: - combined_model_arch += '_' + str(hid_size) - - # if made with run_lstm:-- - ''' - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.%f.rnn.model' \ - %(model_dir, cfg.combined_model_name, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number, cfg.hyper_params['learning_rate']) - ''' - - # if made with run_dnn:-- - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.model' \ - % (model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number) - - # override the name computed from config variables if model_pickle_file specified: - if model_pickle_file != None: - nnets_file_name = model_pickle_file - - print('store DNN') - - store_network(nnets_file_name, outdir) - - -if __name__ == '__main__': - cfg = configuration.cfg - if len(sys.argv) not in [3, 4]: - print('usage: run_dnn.sh [config file name]') - sys.exit(1) - - if len(sys.argv) == 3: - config_file = sys.argv[1] - outdir = sys.argv[2] - - model_pickle_file = None - - elif len(sys.argv) == 4: - config_file = sys.argv[1] - model_pickle_file = sys.argv[2] - outdir = sys.argv[3] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - main_function(cfg, outdir, model_pickle_file=model_pickle_file) -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" - -@author: Felipe Espic -""" -from subprocess import call -# Install it with pip (it's not the same as 'ConfigParser' (old version)) -import configparser -import magphase as mp -import libutils as lu -from shutil import copytree, copy2 -import scripts.label_st_align_to_var_rate as ltvr -from os.path import join, dirname, realpath, isdir -import sys -this_dir = dirname(realpath(__file__)) -sys.path.append(realpath(this_dir + '/../../../tools/magphase/src')) - - -def feat_extraction(in_wav_dir, file_name_token, out_feats_dir, d_opts): - - # Display: - print("\nAnalysing file: " + file_name_token + - '.wav............................') - - # File setup: - wav_file = join(in_wav_dir, file_name_token + '.wav') - - mp.analysis_for_acoustic_modelling(wav_file, out_feats_dir, - mag_dim=d_opts['mag_dim'], - phase_dim=d_opts['phase_dim'], - b_const_rate=d_opts['b_const_rate']) - return - - -def open_config_file(configfile_path): - parser = configparser.ConfigParser() - parser.optionxform = str - parser.read([configfile_path]) - return parser - - -def save_config(parser, file_path): - with open(file_path, 'wb') as file: - parser.write(file) - return - - -def mod_acoustic_config(parser, merlin_path, exper_path, exper_type, d_mp_opts): - parser['DEFAULT']['Merlin'] = merlin_path - parser['DEFAULT']['TOPLEVEL'] = exper_path - - parser['Outputs']['mag'] = '%d' % d_mp_opts['mag_dim'] - parser['Outputs']['dmag'] = '%d' % (d_mp_opts['mag_dim']*3) - - parser['Outputs']['real'] = '%d' % d_mp_opts['phase_dim'] - parser['Outputs']['imag'] = '%d' % d_mp_opts['phase_dim'] - parser['Outputs']['dreal'] = '%d' % (d_mp_opts['phase_dim']*3) - parser['Outputs']['dimag'] = '%d' % (d_mp_opts['phase_dim']*3) - - if exper_type == 'full': - parser['Architecture']['hidden_layer_size'] = "[1024, 1024, 1024, 1024, 1024, 1024]" - parser['Architecture']['hidden_layer_type'] = "['TANH', 'TANH', 'TANH', 'TANH', 'TANH', 'TANH']" - parser['Architecture']['model_file_name'] = "feed_forward_6_tanh" - - if d_mp_opts['b_const_rate']: - parser['Labels']['label_align'] = '%(TOPLEVEL)s/acoustic_model/data/label_state_align' - - parser = mod_number_of_utts(parser, exper_type) - - return parser - - -def mod_duration_config(parser, merlin_path, exper_path, exper_type, d_mp_opts): - parser['DEFAULT']['Merlin'] = merlin_path - parser['DEFAULT']['TOPLEVEL'] = exper_path - - if exper_type == 'full': - parser['Architecture']['hidden_layer_size'] = "[1024, 1024, 1024, 1024, 1024, 1024]" - parser['Architecture']['hidden_layer_type'] = "['TANH', 'TANH', 'TANH', 'TANH', 'TANH', 'TANH']" - parser['Architecture']['model_file_name'] = "feed_forward_6_tanh" - - if d_mp_opts['b_const_rate']: - parser['Labels']['label_align'] = '%(TOPLEVEL)s/acoustic_model/data/label_state_align' - - parser = mod_number_of_utts(parser, exper_type) - - return parser - - -def mod_number_of_utts(parser, exper_type): - - if exper_type == 'full': - parser['Paths']['file_id_list'] = '%(data)s/file_id_list_full.scp' - parser['Data']['train_file_number'] = '%d' % 1000 - parser['Data']['valid_file_number'] = '%d' % 66 - parser['Data']['test_file_number'] = '%d' % 65 - - elif exper_type == 'demo': - pass - - return parser - - -if __name__ == '__main__': - - # INPUT:=================================================================================================== - - # Experiment type:----------------------------------------------------------------------- - # 'demo' (50 training utts) or 'full' (1k training utts) - exper_type = 'demo' - - # Steps:--------------------------------------------------------------------------------- - b_download_data = 1 # Downloads wavs and label data. - # Copies downloaded data into the experiment directory. Plus, makes a backup copy of this script. - b_setup_data = 1 - b_config_merlin = 1 # Saves new configuration files for Merlin. - b_feat_extr = 1 # Performs acoustic feature extraction using the MagPhase vocoder - # Converts the state aligned labels to variable rate if running in variable frame rate mode (d_mp_opts['b_const_rate'] = False) - b_conv_labs_rate = 1 - b_dur_train = 1 # Merlin: Training of duration model. - b_acous_train = 1 # Merlin: Training of acoustic model. - # Merlin: Generation of state durations using the duration model. - b_dur_syn = 1 - b_acous_syn = 1 # Merlin: Waveform generation for the utterances provided in ./test_synthesis/prompt-lab - - # MagPhase Vocoder:----------------------------------------------------------------------- - # Dictionary containing internal options for the MagPhase vocoder (mp). - d_mp_opts = {} - # Number of coefficients (bins) for magnitude feature M. - d_mp_opts['mag_dim'] = 100 - # Number of coefficients (bins) for phase features R and I. - d_mp_opts['phase_dim'] = 10 - d_mp_opts['b_const_rate'] = False # To work in constant frame rate mode. - # List containing the postfilters to apply during waveform generation. - d_mp_opts['l_pf_type'] = ['no', 'magphase', 'merlin'] - # You need to choose at least one: 'magphase' (magphase-tailored postfilter), 'merlin' (Merlin's style postfilter), 'no' (no postfilter) - - # Acoustic feature extraction done in multiprocessing mode (faster). - b_feat_ext_multiproc = 1 - - # PROCESS:=================================================================================================== - # Pre setup:------------------------------------------------------------------------------- - exper_name = 'slt_arctic_magphase_%s_mag_dim_%s_phase_dim_%d_const_rate_%d' % ( - exper_type, d_mp_opts['mag_dim'], d_mp_opts['phase_dim'], d_mp_opts['b_const_rate']) - exper_path = join(this_dir, 'experiments', exper_name) - merlin_path = realpath(this_dir + '/../../..') - submit_path = join(this_dir, 'scripts', 'submit.sh') - run_merlin_path = join(merlin_path, 'src', 'run_merlin.py') - dur_model_conf_path = join(exper_path, 'duration_model', 'conf') - acous_model_conf_path = join(exper_path, 'acoustic_model', 'conf') - - # Build config parsers:------------------------------------------------------------------- - - # Duration training config file: - pars_dur_train = open_config_file( - join(this_dir, 'conf_base', 'dur_train_base.conf')) - pars_dur_train = mod_duration_config( - pars_dur_train, merlin_path, exper_path, exper_type, d_mp_opts) - - # Duration synthesis: - pars_dur_synth = open_config_file( - join(this_dir, 'conf_base', 'dur_synth_base.conf')) - pars_dur_synth = mod_duration_config( - pars_dur_synth, merlin_path, exper_path, exper_type, d_mp_opts) - - # Acoustic training: - pars_acous_train = open_config_file( - join(this_dir, 'conf_base', 'acous_train_base.conf')) - pars_acous_train = mod_acoustic_config( - pars_acous_train, merlin_path, exper_path, exper_type, d_mp_opts) - - # Acoustic synth: - pars_acous_synth = open_config_file( - join(this_dir, 'conf_base', 'acous_synth_base.conf')) - pars_acous_synth = mod_acoustic_config( - pars_acous_synth, merlin_path, exper_path, exper_type, d_mp_opts) - - # Download Data:-------------------------------------------------------------------------- - if b_download_data: - data_zip_file = join(this_dir, 'slt_arctic_%s_data.zip' % exper_type) - call(['wget', 'http://felipeespic.com/depot/databases/merlin_demos/slt_arctic_%s_data.zip' % - exper_type, '-O', data_zip_file]) - call(['unzip', '-o', '-q', data_zip_file, '-d', this_dir]) - - # Setup Data:----------------------------------------------------------------------------- - if b_setup_data: - copytree(join(this_dir, 'slt_arctic_' + - exper_type + '_data', 'exper'), exper_path) - copy2(__file__, join(exper_path, 'run_demo_backup.py')) - - # Configure Merlin:----------------------------------------------------------------------- - if b_config_merlin: - save_config(pars_dur_train, join( - dur_model_conf_path, 'dur_train.conf')) - save_config(pars_dur_synth, join( - dur_model_conf_path, 'dur_synth.conf')) - save_config(pars_acous_train, join( - acous_model_conf_path, 'acous_train.conf')) - save_config(pars_acous_synth, join( - acous_model_conf_path, 'acous_synth.conf')) - - copy2(join(this_dir, 'conf_base', 'logging_config.conf'), join( - exper_path, 'acoustic_model', 'conf', 'logging_config.conf')) - - # Read file list: - file_id_list = pars_acous_train['Paths']['file_id_list'] - l_file_tokns = lu.read_text_file2( - file_id_list, dtype='string', comments='#').tolist() - acoustic_feats_path = pars_acous_train['Paths']['in_acous_feats_dir'] - - # Acoustic Feature Extraction:------------------------------------------------------------- - if b_feat_extr: - # Extract features: - lu.mkdir(acoustic_feats_path) - - if b_feat_ext_multiproc: - lu.run_multithreaded(feat_extraction, join( - exper_path, 'acoustic_model', 'data', 'wav'), l_file_tokns, acoustic_feats_path, d_mp_opts) - else: - for file_name_token in l_file_tokns: - feat_extraction(join(exper_path, 'acoustic_model', 'data', - 'wav'), file_name_token, acoustic_feats_path, d_mp_opts) - - # Labels Conversion to Variable Frame Rate:------------------------------------------------ - # NOTE: The script ./script/label_st_align_to_var_rate.py can be also called from comand line directly. - if b_conv_labs_rate and not d_mp_opts['b_const_rate']: - label_state_align = join( - exper_path, 'acoustic_model', 'data', 'label_state_align') - label_state_align_var_rate = pars_acous_train['Labels']['label_align'] - fs = int(pars_acous_train['Waveform']['samplerate']) - ltvr.convert(file_id_list, label_state_align, - acoustic_feats_path, fs, label_state_align_var_rate) - - # Run duration training:------------------------------------------------------------------- - if b_dur_train: - call([submit_path, run_merlin_path, join( - dur_model_conf_path, 'dur_train.conf')]) - - # Run acoustic train:---------------------------------------------------------------------- - if b_acous_train: - call([submit_path, run_merlin_path, join( - acous_model_conf_path, 'acous_train.conf')]) - - # Run duration syn:------------------------------------------------------------------------ - if b_dur_syn: - call([submit_path, run_merlin_path, join( - dur_model_conf_path, 'dur_synth.conf')]) - - # Run acoustic synth:---------------------------------------------------------------------- - if b_acous_syn: - call([submit_path, run_merlin_path, join( - acous_model_conf_path, 'acous_synth.conf')]) - - print("Done!") - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -#import gnumpy as gnp -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -#import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer -#from frontend.mlpg_fast import MLParameterGenerationFast - -#from frontend.mlpg_fast_layer import MLParameterGenerationFastLayer - - -import configuration -from models.dnn_cm import DNN - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - -from io_funcs.binary_io import BinaryIOCollection - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) # including input and output - plotlogger = logging.getLogger("plotting") - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) + '_' + dnn.params[i].name - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - aa = dnn.params[i].get_value(borrow=True).T - print(aa.shape, aa.size) - if aa.size > aa.shape[0]: - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def load_covariance(var_file_dict, out_dimension_dict): - var = {} - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - - var_values = numpy.reshape( - var_values, (out_dimension_dict[feature_name], 1)) - - var[feature_name] = var_values - - return var - - -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None, - cmp_mean_vector=None, cmp_std_vector=None, init_dnn_model_file=None): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layer_size = hyper_params['hidden_layer_size'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - model_type = hyper_params['model_type'] - hidden_layer_type = hyper_params['hidden_layer_type'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - - sequential_training = hyper_params['sequential_training'] - -# sequential_training = True - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, sequential=sequential_training, shuffle=True) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, sequential=sequential_training, shuffle=False) - - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() - train_set_x, train_set_y = shared_train_set_xy - # validation data is still read block by block - shared_valid_set_xy, valid_set_x, valid_set_y = valid_data_reader.load_one_partition() - valid_set_x, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining -# pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - # dnn_model = DeepRecurrentNetwork(n_in= n_ins, hidden_layer_size = hidden_layer_size, n_out = n_outs, L1_reg = l1_reg, L2_reg = l2_reg, hidden_layer_type = hidden_layer_type) - - # dnn_model = SequentialDNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs = n_outs, - # l1_reg = l1_reg, l2_reg = l2_reg, - # hidden_layer_sizes = hidden_layer_size) - dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layer_sizes=hidden_layer_size) - - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.time() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - -# finetune_lr = 0.000125 - previous_finetune_lr = finetune_lr - - print(finetune_lr) - - while (epoch < training_epochs): - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.time() - - while (not train_data_reader.is_finish()): - shared_train_set_xy, train_set_x, train_set_y = train_data_reader.load_one_partition() - - n_train_batches = train_set_x.shape[0] / batch_size - - logger.debug('this partition: %d frames (divided into %d batches of size %d)' % ( - train_set_x.shape[0], n_train_batches, batch_size)) - - all_batches = all_batches + n_train_batches - - for minibatch_index in range(n_train_batches): - this_train_error = dnn_model.finetune((train_set_x[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :], - train_set_y[minibatch_index*batch_size:(minibatch_index+1)*batch_size, :]), batch_size, current_finetune_lr, current_momentum) - train_error.extend(this_train_error) - - train_data_reader.reset() - - logger.debug('calculating validation loss') - predicted_parameter = dnn_model.parameter_prediction( - valid_set_x) # , valid_set_y - validation_losses = numpy.sum( - (predicted_parameter - valid_set_y) ** 2, axis=1) - this_validation_loss = numpy.mean(validation_losses) - - this_train_valid_loss = numpy.mean(numpy.asarray(train_error)) - - sub_end_time = time.time() - - loss_difference = this_validation_loss - previous_loss - - logger.info('epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - if epoch > 10: - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss -# logger.debug('validation loss decreased, so saving model') - - if this_validation_loss >= previous_loss: - logger.debug('validation loss increased') - -# dbn = best_dnn_model - early_stop += 1 - -# if early_stop > early_stop_epoch: -# logger.debug('stopping early') -# break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - end_time = time.time() -# cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_x = features.reshape((-1, n_ins)) - - predicted_parameter = dnn_model.parameter_prediction(test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def dnn_generation_lstm(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - visualize_dnn(dnn_model) - - file_number = len(valid_file_list) - - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_x = features.reshape((-1, n_ins)) - - predicted_parameter = dnn_model.parameter_prediction_lstm(test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layer_size = cfg.hyper_params['hidden_layer_size'] - - # prepare environment - - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - nn_cmp_dir = os.path.join( - data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_norm_dir = os.path.join( - data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(cfg.work_dir, 'gen') - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = prepare_file_path_list( - file_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - # the number can be removed - binary_label_dir = os.path.join(label_data_dir, 'binary_label_'+suffix) - nn_label_dir = os.path.join(label_data_dir, 'nn_no_silence_lab_'+suffix) - nn_label_norm_dir = os.path.join( - label_data_dir, 'nn_no_silence_lab_norm_'+suffix) - - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - file_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - - # to do - sanity check the label dimension here? - - min_max_normaliser = None - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.NORMLAB and (cfg.label_style == 'HTS'): - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list) - - remover = SilenceRemover( - n_cmp=lab_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(binary_label_file_list, - in_label_align_file_list, nn_label_file_list) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if cfg.NORMLAB and (cfg.label_style == 'composed'): - # new flexible label preprocessor - - logger.info( - 'preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, cfg.xpath_label_align_dir, cfg.utt_ext, False) - elif label_style == 'hts': - in_label_align_file_list['hts'] = prepare_file_path_list( - file_id_list, cfg.hts_label_align_dir, cfg.lab_ext, False) - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info( - 'making input label features for %4d of %4d' % (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % - required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # silence removal - if cfg.remove_silence_using_binary_labels: - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from label using silence feature: %s' % ( - label_composer.configuration.labels[silence_feature])) - logger.info('Silence will be removed from CMP files in same way') - # Binary labels have 2 roles: both the thing trimmed and the instructions for trimming: - trim_silence(binary_label_file_list, nn_label_file_list, lab_dim, - binary_label_file_list, lab_dim, silence_feature) - else: - logger.info('No silence removal done') - # start from the labels we have just produced, not trimmed versions - nn_label_file_list = binary_label_file_list - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if min_max_normaliser != None: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = cfg.delta_win # [-0.5, 0.0, 0.5] - acc_win = cfg.acc_win # [1.0, -2.0, 1.0] - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature) - - else: # back off to previous method using HTS labels: - remover = SilenceRemover( - n_cmp=cfg.cmp_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number], - in_label_align_file_list[0:cfg.train_file_number + - cfg.valid_file_number], - nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number]) # save to itself - - # save acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - if not os.path.exists(var_dir): - os.makedirs(var_dir) - - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - - normaliser.feature_normalisation(nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number], - nn_cmp_norm_file_list[0:cfg.train_file_number+cfg.valid_file_number]) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim) - global_mean_vector = min_max_normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number]) - global_std_vector = min_max_normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - min_max_normaliser.find_min_max_values( - nn_cmp_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_std_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number] - train_y_file_list = nn_cmp_norm_file_list[0:cfg.train_file_number] - valid_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - valid_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - # currently, there are two ways to do this - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - - elif cfg.label_style == 'composed': - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layer_size)) - for hid_size in hidden_layer_size: - combined_model_arch += '_' + str(hid_size) - - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.%f.nn.model' \ - % (model_dir, cfg.combined_model_name, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number, cfg.hyper_params['learning_rate']) - - # DNN model training - if cfg.TRAINDNN: - - var_dict = load_covariance(var_file_dict, cfg.out_dimension_dict) - - logger.info('training DNN') - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_mean_vector = cmp_min_max[0, ] - cmp_std_vector = cmp_min_max[1, ] - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - train_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot, var_dict=var_dict, - cmp_mean_vector=cmp_mean_vector, cmp_std_vector=cmp_std_vector) - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - # generate parameters from DNN - temp_dir_name = '%s_%s_%d_%d_%d_%d_%d_%d' \ - % (cfg.combined_model_name, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layer_size), hidden_layer_size[0]) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - dnn_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration(gen_wav_features=cfg.gen_wav_features) - generator.acoustic_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict, var_file_dict) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - print(len(gen_file_id_list)) - # generated speech - generate_wav( - gen_dir, gen_file_id_list[cfg.valid_file_number:cfg.valid_file_number+cfg.test_file_number], cfg) -# generate_wav(nn_cmp_dir, gen_file_id_list) # reference copy synthesis speech - - # evaluation: calculate distortion - if cfg.CALMCD: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - valid_bap_mse = valid_bap_mse / 10.0 - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - valid_f0_mse, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_dnn.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - -# if gnp._boardId is not None: -# import gpu_lock -# gpu_lock.free_lock(gnp._boardId) - - sys.exit(0) - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -#import gnumpy as gnp -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -#import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer -#from frontend.mlpg_fast import MLParameterGenerationFast - -#from frontend.mlpg_fast_layer import MLParameterGenerationFastLayer - - -import configuration -from models.st_dnn_cm import SequentialDNN - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - -from io_funcs.binary_io import BinaryIOCollection - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) # including input and output - plotlogger = logging.getLogger("plotting") - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) + '_' + dnn.params[i].name - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - aa = dnn.params[i].get_value(borrow=True).T - print(aa.shape, aa.size) - if aa.size > aa.shape[0]: - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def load_covariance(var_file_dict, out_dimension_dict): - var = {} - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - - var_values = numpy.reshape( - var_values, (out_dimension_dict[feature_name], 1)) - - var[feature_name] = var_values - - return var - - -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False, var_dict=None, - cmp_mean_vector=None, cmp_std_vector=None, init_dnn_model_file=None): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layer_size = hyper_params['hidden_layer_size'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - model_type = hyper_params['model_type'] - hidden_layer_type = hyper_params['hidden_layer_type'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - - sequential_training = hyper_params['sequential_training'] - -# sequential_training = True - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, sequential=sequential_training, shuffle=True) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, sequential=sequential_training, shuffle=False) - - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_one_partition() - train_set_x, train_set_y = shared_train_set_xy - # validation data is still read block by block - shared_valid_set_xy, valid_set_x, valid_set_y = valid_data_reader.load_one_partition() - valid_set_x, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining -# pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - # dnn_model = DeepRecurrentNetwork(n_in= n_ins, hidden_layer_size = hidden_layer_size, n_out = n_outs, L1_reg = l1_reg, L2_reg = l2_reg, hidden_layer_type = hidden_layer_type) - - dnn_model = SequentialDNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layer_sizes=hidden_layer_size) - - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - logger.info('fine-tuning the %s model' % (model_type)) - - init_dnn_model = pickle.load(open(init_dnn_model_file, 'rb')) - - dnn_model.set_parameters(init_dnn_model.W_params, init_dnn_model.b_params) - - start_time = time.time() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - -# finetune_lr = 0.000125 - previous_finetune_lr = finetune_lr - - print(finetune_lr) - - while (epoch < training_epochs): - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.time() - - while (not train_data_reader.is_finish()): - shared_train_set_xy, train_set_x, train_set_y = train_data_reader.load_one_partition() - - n_train_batches = train_set_x.shape[0] - current_frame_number = train_set_x.shape[0] - - mean_matrix = numpy.tile( - cmp_mean_vector, (current_frame_number, 1)) - std_matrix = numpy.tile(cmp_std_vector, (current_frame_number, 1)) - - logger.debug('this partition: %d frames (divided into %d batches )' % ( - train_set_x.shape[0], n_train_batches)) - - this_train_error = dnn_model.finetune( - (train_set_x, train_set_y), current_frame_number, current_finetune_lr, current_momentum, mean_matrix, std_matrix) - train_error.extend(this_train_error.tolist()) - - train_data_reader.reset() - - logger.debug('calculating validation loss') - validation_losses = [] - validation_losses2 = [] - while (not valid_data_reader.is_finish()): - shared_valid_set_xy, valid_set_x, valid_set_y = valid_data_reader.load_one_partition() - - current_frame_number = valid_set_x.shape[0] - mean_matrix = numpy.tile( - cmp_mean_vector, (current_frame_number, 1)) - std_matrix = numpy.tile(cmp_std_vector, (current_frame_number, 1)) - - this_valid_loss = dnn_model.parameter_prediction_trajectory( - valid_set_x, valid_set_y, mean_matrix, std_matrix) - validation_losses.extend(this_valid_loss.tolist()) - - predicted_para = dnn_model.parameter_prediction(valid_set_x) - temp_loss = numpy.sum( - ((predicted_para[:, 0:60] - valid_set_y[:, 0:60]) * std_matrix[:, 0:60]) ** 2, axis=1) - temp_loss = temp_loss ** 0.5 - validation_losses2.extend(temp_loss.tolist()) - valid_data_reader.reset() - - this_validation_loss = numpy.mean(validation_losses) - - this_train_valid_loss = numpy.mean(numpy.asarray(train_error)) - - sub_end_time = time.time() - - loss_difference = this_validation_loss - previous_loss - - logger.info('epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - if epoch > 10: - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss -# logger.debug('validation loss decreased, so saving model') - - if this_validation_loss >= previous_loss: - logger.debug('validation loss increased') - -# dbn = best_dnn_model - early_stop += 1 - -# if early_stop > early_stop_epoch: -# logger.debug('stopping early') -# break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - end_time = time.time() -# cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_x = features.reshape((-1, n_ins)) - - predicted_parameter = dnn_model.parameter_prediction(test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def dnn_generation_lstm(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - visualize_dnn(dnn_model) - - file_number = len(valid_file_list) - - for i in range(file_number): # file_number - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - test_set_x = features.reshape((-1, n_ins)) - - predicted_parameter = dnn_model.parameter_prediction_lstm(test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layer_size = cfg.hyper_params['hidden_layer_size'] - - # prepare environment - - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - nn_cmp_dir = os.path.join( - data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_norm_dir = os.path.join( - data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(cfg.work_dir, 'gen') - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = prepare_file_path_list( - file_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - # the number can be removed - binary_label_dir = os.path.join(label_data_dir, 'binary_label_'+suffix) - nn_label_dir = os.path.join(label_data_dir, 'nn_no_silence_lab_'+suffix) - nn_label_norm_dir = os.path.join( - label_data_dir, 'nn_no_silence_lab_norm_'+suffix) - - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - file_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - - # to do - sanity check the label dimension here? - - min_max_normaliser = None - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.NORMLAB and (cfg.label_style == 'HTS'): - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list) - - remover = SilenceRemover( - n_cmp=lab_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(binary_label_file_list, - in_label_align_file_list, nn_label_file_list) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if cfg.NORMLAB and (cfg.label_style == 'composed'): - # new flexible label preprocessor - - logger.info( - 'preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, cfg.xpath_label_align_dir, cfg.utt_ext, False) - elif label_style == 'hts': - in_label_align_file_list['hts'] = prepare_file_path_list( - file_id_list, cfg.hts_label_align_dir, cfg.lab_ext, False) - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info( - 'making input label features for %4d of %4d' % (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % - required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # silence removal - if cfg.remove_silence_using_binary_labels: - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from label using silence feature: %s' % ( - label_composer.configuration.labels[silence_feature])) - logger.info('Silence will be removed from CMP files in same way') - # Binary labels have 2 roles: both the thing trimmed and the instructions for trimming: - trim_silence(binary_label_file_list, nn_label_file_list, lab_dim, - binary_label_file_list, lab_dim, silence_feature) - else: - logger.info('No silence removal done') - # start from the labels we have just produced, not trimmed versions - nn_label_file_list = binary_label_file_list - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if min_max_normaliser != None: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = cfg.delta_win # [-0.5, 0.0, 0.5] - acc_win = cfg.acc_win # [1.0, -2.0, 1.0] - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature) - - else: # back off to previous method using HTS labels: - remover = SilenceRemover( - n_cmp=cfg.cmp_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number], - in_label_align_file_list[0:cfg.train_file_number + - cfg.valid_file_number], - nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number]) # save to itself - - # save acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - if not os.path.exists(var_dir): - os.makedirs(var_dir) - - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - - normaliser.feature_normalisation(nn_cmp_file_list[0:cfg.train_file_number+cfg.valid_file_number], - nn_cmp_norm_file_list[0:cfg.train_file_number+cfg.valid_file_number]) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim) - global_mean_vector = min_max_normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number]) - global_std_vector = min_max_normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - min_max_normaliser.find_min_max_values( - nn_cmp_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_std_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number] - train_y_file_list = nn_cmp_norm_file_list[0:cfg.train_file_number] - valid_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - valid_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - # currently, there are two ways to do this - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension + cfg.appended_input_dim - - elif cfg.label_style == 'composed': - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layer_size)) - for hid_size in hidden_layer_size: - combined_model_arch += '_' + str(hid_size) - - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.%f.nn.model' \ - % (model_dir, cfg.combined_model_name, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number, cfg.hyper_params['learning_rate']) - - # DNN model training - if cfg.TRAINDNN: - - var_dict = load_covariance(var_file_dict, cfg.out_dimension_dict) - - logger.info('training DNN') - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_mean_vector = cmp_min_max[0, ] - cmp_std_vector = cmp_min_max[1, ] - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - train_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot, var_dict=var_dict, - cmp_mean_vector=cmp_mean_vector, cmp_std_vector=cmp_std_vector, init_dnn_model_file=cfg.start_from_trained_model) - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - # generate parameters from DNN - temp_dir_name = '%s_%s_%d_%d_%d_%d_%d_%d' \ - % (cfg.combined_model_name, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layer_size), hidden_layer_size[0]) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - dnn_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration(gen_wav_features=cfg.gen_wav_features) - generator.acoustic_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict, var_file_dict) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - print(len(gen_file_id_list)) - # generated speech - generate_wav( - gen_dir, gen_file_id_list[cfg.valid_file_number:cfg.valid_file_number+cfg.test_file_number], cfg) -# generate_wav(nn_cmp_dir, gen_file_id_list) # reference copy synthesis speech - - # evaluation: calculate distortion - if cfg.CALMCD: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - valid_bap_mse = valid_bap_mse / 10.0 - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=cfg.silence_pattern) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - valid_f0_mse, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_dnn.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - -# if gnp._boardId is not None: -# import gpu_lock -# gpu_lock.free_lock(gnp._boardId) - - sys.exit(0) -#!/usr/bin/env python - -import os -import sys -import re -import numpy - -import processHybridInfo - -mstohtk = 10000 -sectoms = 1000 -frameshift = 5 -numHybridSec = 4 - - -def findHybridParamRichContexts(file_id_list, feat_dict, vfloor, data_dir, tcoef_dir, lab_dir, sil_identifier, ignoreSilence=True): - ### create tcoef dir if not exists ### - if not os.path.isdir(tcoef_dir): - os.makedirs(tcoef_dir) - - # print "vfloor: {0}".format(vfloor) - for file_index in range(len(file_id_list)): - file_name = file_id_list[file_index] - label_file = os.path.join(lab_dir, file_name+'.lab') - tcoef_file = os.path.join(tcoef_dir, file_name+'.tcoef') - - label_info = processHybridInfo.readHybridLabelFile( - label_file, file_index, sil_identifier, ignoreSilence) - hybridInfo = processHybridInfo.convertToHybridLabel( - label_info, numHybridSec) - - feat_index = 0 - tempFeats = [[] for x in range(len(feat_dict))] - for feat_ext, feat_dim in feat_dict.items(): - in_feat_dir = os.path.join(data_dir, feat_ext) - feat_file = os.path.join(in_feat_dir, file_name+'.'+feat_ext) - - tempFeats[feat_index] = processHybridInfo.readBottleneckFeatures( - feat_file, feat_dim) - if feat_ext == 'lf0': - tempFeats[feat_index] = numpy.exp(tempFeats[feat_index]) - feat_index = feat_index + 1 - - features = numpy.hstack(tempFeats) - - outf = open(tcoef_file, 'w') - outf.write('EST_File Track\nDataType ascii\nNumFrames {0}\nNumChannels {1}\nNumAuxChannels 0\nfile_type 14\nEST_Header_End\n'.format( - len(hybridInfo[1]), len(features[0])*2)) - - temp = [[] for x in range(len(hybridInfo[1]))] - silMeans = numpy.zeros(len(features[0])) - silVars = numpy.ones(len(features[0])) - - for x in range(len(hybridInfo[1])): - outf.write('{0}'.format(float(hybridInfo[4][x])/sectoms)) - if sil_identifier in hybridInfo[3][x]: - tempMeans = silMeans - tempVars = silVars - else: - if int(hybridInfo[1][x]) == int(hybridInfo[2][x]): - # set to the frame value if there is no range! - temp[x] = features[hybridInfo[1][x]:hybridInfo[2][x]+1] - else: - temp[x] = features[hybridInfo[1][x]:hybridInfo[2][x]] - - tempContext = processHybridInfo.ContextInfo(float( - hybridInfo[0][x]), hybridInfo[1][x], hybridInfo[2][x], hybridInfo[3][x], temp[x]) - tempDist = tempContext.getFeatsDistribution() - - tempDist.enforceVFloor(vfloor) - tempMeans = tempDist.getArrayMeans() - tempVars = tempDist.getArrayVariances() - - for y in tempMeans: - outf.write('\t{0}'.format(y)) - for y in tempVars: - outf.write('\t{0}'.format(y)) - outf.write('\n') - - outf.close() - print_status(file_index, len(file_id_list)) - - sys.stdout.write("\n") - - return tempFeats - - -def print_status(i, length): - pr = int(float(i+1)/float(length)*100) - st = int(float(pr)/7) - sys.stdout.write(("\r%d/%d ") % (i+1, length) + - ("[ %d" % pr+"% ] <<< ")+('='*st)+(''*(100-st))) - sys.stdout.flush() - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + '.' + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def read_file_list(file_name): - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - return file_lists - - -if __name__ == '__main__': - - #### User configurable variables #### - - merlin_dir = "/work/smg/v-srikanth/merlin" - data_dir = os.path.join( - merlin_dir, "egs/slt_arctic/s1/experiments/slt_arctic_demo/acoustic_model/data") - - feat_dict = {'mgc': 60, 'lf0': 1, 'bap': 1} - - sil_identifier = 'sil' - in_lab_dir = os.path.join(data_dir, 'lab') - - out_dir = os.path.join(data_dir, 'hybrid_voice_data') - vfloor_dir = os.path.join(out_dir, 'vfloor') - tcoef_dir = os.path.join(out_dir, 'tcoef') - - if not os.path.isdir(vfloor_dir): - os.makedirs(vfloor_dir) - - if not os.path.isdir(tcoef_dir): - os.makedirs(tcoef_dir) - - tcoef_train_dir = os.path.join(tcoef_dir, 'train') - tcoef_test_dir = os.path.join(tcoef_dir, 'test') - - #### Train and test file lists #### - - train_id_scp = os.path.join(data_dir, 'train_id_list.scp') - train_id_list = read_file_list(train_id_scp) - - test_id_scp = os.path.join(data_dir, 'test_id_list.scp') - test_id_list = read_file_list(test_id_scp) - - #### calculate variance flooring for each feature (from only training files) #### - - feat_index = 0 - vf = [[] for x in range(len(feat_dict))] - - for feat_ext, feat_dim in feat_dict.items(): - filename = feat_ext+'_'+str(feat_dim)+'_vfloor' - var_file = os.path.join(vfloor_dir, filename) - - if not os.path.isfile(var_file): - print('Calculating variance flooring for '+feat_ext+'...') - in_feat_dir = os.path.join(data_dir, feat_ext) - feat_file_list = prepare_file_path_list( - train_id_list, in_feat_dir, feat_ext) - - vf[feat_index] = processHybridInfo.calculateParamGV( - feat_file_list, feat_dim) - vf[feat_index] = vf[feat_index]*0.01 - - numpy.savetxt(var_file, vf[feat_index]) - else: - vf[feat_index] = numpy.loadtxt(var_file) - - feat_index = feat_index + 1 - - vfloor = numpy.hstack(vf) - - #### calculate tcoef features #### - - print('computing tcoef features for training data...') - tempFeats = findHybridParamRichContexts( - train_id_list, feat_dict, vfloor, data_dir, tcoef_train_dir, in_lab_dir, sil_identifier) - - print('computing tcoef features for test data...') - tempFeats = findHybridParamRichContexts( - test_id_list, feat_dict, vfloor, data_dir, tcoef_test_dir, in_lab_dir, sil_identifier) -import os -import sys -import re - - -def change_label_format(inp_label_file_list, out_label_file_list, label_style="state_align"): - - utt_len = len(inp_label_file_list) - - ### read file by file ### - for i in range(utt_len): - inp_label_file_name = inp_label_file_list[i] - out_label_file_name = out_label_file_list[i] - - label_info = convert_hts_lab_to_festival_lab( - inp_label_file_name, out_label_file_name, label_style) - - print_status(i, utt_len) - - sys.stdout.write("\n") - - -def convert_hts_lab_to_festival_lab(inp_label_file_name, out_label_file_name, label_style): - ### read label file ### - fid = open(inp_label_file_name) - utt_labels = fid.readlines() - fid.close() - - dur = 0.0 - lab_info = [[], []] - - ### process label file ### - for line in utt_labels: - line = line.strip() - - if len(line) < 1: - continue - temp_list = re.split('\s+', line) - full_label = temp_list[2] - - if label_style == "state_align": - # remove state information [k] - full_label_length = len(full_label) - 3 - state_index = full_label[full_label_length + 1] - - state_index = int(state_index) - 1 - if state_index == 1: - ph_start_time = temp_list[0] - if state_index == 5: - ph_end_time = temp_list[1] - full_label = full_label[0:full_label_length] - current_phone = full_label[full_label.index( - '-') + 1:full_label.index('+')] - dur = dur + \ - ((float(ph_end_time)-float(ph_start_time))*(10**-7)) - lab_info[0].append(dur) - lab_info[1].append(current_phone) - elif label_style == "phone_align": - ph_start_time = temp_list[0] - ph_end_time = temp_list[1] - current_phone = full_label[full_label.index( - '-') + 1:full_label.index('+')] - dur = dur + ((float(ph_end_time)-float(ph_start_time))*(10**-7)) - lab_info[0].append(dur) - lab_info[1].append(current_phone) - - out_f = open(out_label_file_name, 'w') - out_f.write('#\n') - for j in range(len(lab_info[0])): - dur = lab_info[0][j] - ph = lab_info[1][j] - out_f.write(str(dur)+' 125 '+ph+'\n') - out_f.close() - - return lab_info - - -def print_status(i, length): - pr = int(float(i+1)/float(length)*100) - st = int(float(pr)/7) - sys.stdout.write(("\r%d/%d ") % (i+1, length) + - ("[ %d" % pr+"% ] <<< ")+('='*st)+(''*(100-st))) - sys.stdout.flush() - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def read_file_list(file_name): - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - return file_lists - - -if __name__ == '__main__': - - if len(sys.argv) != 5: - print('Usage: python convert_hts_label_format_to_festival.py \n') - sys.exit(1) - - inp_lab_dir = sys.argv[1] - out_lab_dir = sys.argv[2] - - file_id_scp = sys.argv[3] - file_id_list = read_file_list(file_id_scp) - - label_style = sys.argv[4] - - inp_label_file_list = prepare_file_path_list( - file_id_list, inp_lab_dir, '.lab') - out_label_file_list = prepare_file_path_list( - file_id_list, out_lab_dir, '.lab') - - print('changing HTS label format to festival...') - change_label_format(inp_label_file_list, out_label_file_list, label_style) -#!/usr/bin/env python - -import os -import numpy -import re - -mstohtk = 10000 -sectoms = 1000 -frameshift = 5 - - -class ContextInfo: - def __init__(self, fileID, frameStart, frameEnd, context, btlnkFeats=None): - self.fid = numpy.array([int(fileID)]) - self.sframe = numpy.array([int(frameStart)]) - self.eframe = numpy.array([int(frameEnd)]) - self.context = context - self.feats = numpy.array(btlnkFeats) - self.featsDist = DistributionInfo(1) # one stream - - if btlnkFeats.any(): - self.featsDist.setMean(numpy.mean(self.feats, 0)) - self.featsDist.setVariance(numpy.var(self.feats, 0)) - - def getContext(self): - return self.context - - def getFeats(self): - return self.feats - - def getFeatsDistribution(self): - return self.featsDist - - def getId(self): - return self.fid - - def getStartFrame(self): - return self.sframe - - def getEndFrame(self): - return self.eframe - - def setStartFrame(self, frameStart): - self.sframe = frameStart - - def setEndFrame(self, frameEnd): - self.eframe = frameEnd - - def sameContext(self, altContext): - if self.context == altContext: - print('match found: {0}'.format(altContext)) - return self.context == altContext - - def addContextInstance(self, fileID, frameStart, frameEnd, btlnkFeats): - self.fid = numpy.hstack([self.fid, int(fileID)]) - self.sframe = numpy.hstack([self.sframe, int(frameStart)]) - self.eframe = numpy.hstack([self.eframe, int(frameEnd)]) - self.feats = numpy.vstack([self.feats, btlnkFeats]) - - self.featsDist.setMean(numpy.mean(self.feats, 0)) - self.featsDist.setVariance(numpy.var(self.feats, 0)) - - def contextMatch(self, expr): - res = expr.search(self.context) - return res.group(1) - - -class DistributionInfo: - def __init__(self, mixNum=1): - self.mean = [None for x in range(mixNum)] - self.var = [None for x in range(mixNum)] - self.mixWeight = [None for x in range(mixNum)] - - def setVariance(self, variance, index=0): - self.var[index] = numpy.array(variance, dtype=numpy.float) - - def setMean(self, mean, index=0): - self.mean[index] = numpy.array(mean, dtype=numpy.float) - - def setMixWeight(self, weight, index=0): - self.mixWeight[index] = weight - - def getCovariance(self, index=0): - covariance = numpy.zeros((len(self.var[index]), len(self.var[index]))) - for i in range(len(self.var[index])): - covariance[i, i] = self.var[index][i] - return covariance - - def getInverseCovariance(self, index=0): - covariance = numpy.zeros((len(self.var[index]), len(self.var[index]))) - for i in range(len(self.var[index])): - covariance[i, i] = 1.0/self.var[index][i] - return covariance - - def getDimensionality(self, index=0): - return len(self.var[index]) - - def getMeans(self, index=0): - meanMatrix = numpy.transpose(numpy.matrix(self.mean[index])) - return meanMatrix - - def getArrayVariances(self, index=0): - return self.var[index] - - def getArrayMeans(self, index=0): - return self.mean[index] - - def getMixWeight(self, index=0): - return self.mixWeight[index] - - def enforceVFloor(self, varFloor, index=0): - count = 0 - for x in range(len(self.var[index])): - if self.var[index][x] < varFloor[x]: - self.var[index][x] = varFloor[x] - count = count+1 - return count - - -def readBottleneckFeatures(fname, featNum=32): - data = numpy.fromfile(fname, 'float32') - data = data.reshape(-1, featNum) - return data - - -def calculateParamGV(feat_file_list, feat_dim=32): - data = numpy.empty((1, feat_dim)) - for file_index in range(len(feat_file_list)): - file_name = feat_file_list[file_index] - (junk, ext) = feat_file_list[file_index].split('.') - - features = readBottleneckFeatures(file_name, feat_dim) - - if ext == 'lf0': # remove unvoiced values - features = features[numpy.where(features != -1.*(10**(10)))[0]] - features = numpy.exp(features) # convert to linear scale - - if file_index == 0: - data = features - else: - data = numpy.concatenate((data, features), 0) - - gv = numpy.var(data, 0) - return gv - - -def readHybridLabelFile(fname, idnum, sil_identifier='#', ignoreSilence=True): - fid = open(fname, 'r') - data = fid.readlines() - fid.close() - - lines = [[data[x].split()[0], data[x].split()[2]] - for x in range(1, len(data))] # exclude first line! - - columns = [[] for x in range(len(lines[0]))] - for line in lines: - for i, item in enumerate(line): - columns[i].append(item) - - idarr = numpy.ones(len(columns[0]))*idnum - stime = numpy.hstack( - (0, numpy.array(columns[0][:-1], dtype=numpy.float64))) - columns = numpy.vstack((idarr, stime, columns)) - - if ignoreSilence: - keep = [not(bool(re.search(sil_identifier, x))) for x in columns[3]] - else: - keep = [bool(1) for x in range(len(columns[3]))] - - toInc = numpy.where(keep)[0] - gap = numpy.array(columns[2][toInc], dtype=numpy.float64) - \ - numpy.array(columns[1][toInc], dtype=numpy.float64) - frames = (gap*sectoms)/frameshift - - frameEnd = numpy.cumsum(frames) - frameEnd = numpy.round(frameEnd, 0) - frameStart = numpy.append(0, frameEnd[:-1]) - - allFrameStart = numpy.ones(len(columns[2]))*-1 - allFrameEnd = numpy.ones(len(columns[2]))*-1 - - for point in range(len(toInc)): - allFrameEnd[toInc[point]] = frameEnd[point] - allFrameStart[toInc[point]] = frameStart[point] - - data = [columns[0], allFrameStart, allFrameEnd, columns[3], numpy.array( - columns[1], dtype=numpy.float64)*sectoms, numpy.array(columns[2], dtype=numpy.float64)*sectoms] - - return data - - -def convertToHybridLabel(labData, numHybridSec): - hybridData = [[] for x in range(len(labData))] - labDurations = labData[2]-labData[1] - - tDur = labData[5]-labData[4] - for i in range(len(labData[0])): - # keep as frames or convert to time?! Currently kept in frames - sectionLen = float(labDurations[i])/numHybridSec - - tLen = float(tDur[i])/numHybridSec - for j in range(numHybridSec): - hybridData[0].append(labData[0][0]) - hybridData[1].append( - int(labData[1][i]+numpy.floor((j)*sectionLen))) - hybridData[2].append( - int(labData[1][i]+numpy.floor((j+1)*sectionLen))) - hybridData[3].append(labData[3][i]+'[{0}]'.format(j)) - hybridData[5].append(int(labData[4][i]+numpy.floor((j+1)*tLen))) - - hybridData[1] = numpy.array(hybridData[1]) - hybridData[2] = numpy.array(hybridData[2]) - hybridData[3] = numpy.array(hybridData[3]) - hybridData[4] = numpy.append( - labData[4][0], hybridData[5][0:len(hybridData[3])-1]) - hybridData[5] = numpy.array(hybridData[5]) - - return hybridData - -import numpy - -from binary_io import BinaryIOCollection - - -class AlignFeats(object): - def __init__(self): - self.io_funcs = BinaryIOCollection() - - def align_src_feats(self, src_feat_file, src_aligned_feat_file, feat_dim, dtw_path_dict): - ''' - align source feats as per the dtw path (matching target length) - ''' - src_features, frame_number = self.io_funcs.load_binary_file_frame( - src_feat_file, feat_dim) - - tgt_length = len(dtw_path_dict) - src_aligned_features = numpy.zeros((tgt_length, feat_dim)) - - for i in range(tgt_length): - src_aligned_features[i, ] = src_features[dtw_path_dict[i]] - - self.io_funcs.array_to_binary_file( - src_aligned_features, src_aligned_feat_file) - - -import numpy - - -class BinaryIOCollection(object): - - def load_binary_file(self, file_name, dimension): - fid_lab = open(file_name, 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - assert features.size % float( - dimension) == 0.0, 'specified dimension not compatible with data' - features = features[:(dimension * (features.size // dimension))] - features = features.reshape((-1, dimension)) - - return features - - def array_to_binary_file(self, data, output_file_name): - data = numpy.array(data, 'float32') - - fid = open(output_file_name, 'wb') - data.tofile(fid) - fid.close() - - def load_binary_file_frame(self, file_name, dimension): - fid_lab = open(file_name, 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - assert features.size % float( - dimension) == 0.0, 'specified dimension not compatible with data' - frame_number = features.size // dimension - features = features[:(dimension * frame_number)] - features = features.reshape((-1, dimension)) - - return features, frame_number - - def load_binary_dtw_file(self, file_name, dimension=2): - fid_lab = open(file_name, 'rb') - features = numpy.fromfile(fid_lab, dtype="int32") - fid_lab.close() - assert features.size % float( - dimension) == 0.0, 'specified dimension not compatible with data' - frame_number = features.size // dimension - features = features[:(dimension * frame_number)] - features = features.reshape((-1, dimension)) - - feat_path_dict = {} - for i in range(frame_number): - feat_path_dict[features[i][1]] = features[i][0] - - return feat_path_dict - - def load_ascii_dtw_file(self, file_name): - fid_lab = open(file_name, 'r') - data = fid_lab.readlines() - fid_lab.close() - - feat_path_dict = {} - for newline in data[0:-1]: - temp_list = newline.strip().split() - feat_path_dict[int(temp_list[0])] = int(temp_list[1]) - - return feat_path_dict -import os -import sys -import numpy -import itertools - -from binary_io import BinaryIOCollection - -io_funcs = BinaryIOCollection() - - -def compute_mean_and_std(lf0_file_list): - all_files_lf0_arr = numpy.zeros(200000) - - current_index = 0 - for lf0_file in lf0_file_list: - lf0_arr, frame_number = io_funcs.load_binary_file_frame(lf0_file, 1) - for lf0_value in lf0_arr: - all_files_lf0_arr[current_index] = numpy.exp(lf0_value) - current_index += 1 - - all_files_lf0_arr = all_files_lf0_arr[all_files_lf0_arr > 0] - all_files_lf0_arr = numpy.log(all_files_lf0_arr) - - mean_f0 = numpy.mean(all_files_lf0_arr) - std_f0 = numpy.std(all_files_lf0_arr) - - return mean_f0, std_f0 - - -def get_lf0_filelist(lf0_dir): - lf0_files = [] - for file in os.listdir(lf0_dir): - whole_filepath = os.path.join(lf0_dir, file) - if os.path.isfile(whole_filepath) and str(whole_filepath).endswith(".lf0"): - lf0_files.append(whole_filepath) - elif os.path.isdir(whole_filepath): - lf0_files += get_lf0_filelist(whole_filepath) - - lf0_files.sort() - - return lf0_files - - -if __name__ == "__main__": - # parse the arguments - lf0_dir = sys.argv[1] - lf0_stats_file = sys.argv[2] - - lf0_file_list = get_lf0_filelist(lf0_dir) - mean_f0, std_f0 = compute_mean_and_std(lf0_file_list) - - out_f = open(lf0_stats_file, 'w') - out_f.write('%f %f\n' % (mean_f0, std_f0)) - out_f.close() -#!/usr/bin/env python -import os -import sys -import time -import shutil -import multiprocessing as mp - -import fastdtw - -from binary_io import BinaryIOCollection -from align_feats import AlignFeats - -if len(sys.argv) != 6: - print("Usage: python dtw_aligner.py ") - sys.exit(1) - -# Arguments - -# tools directory -tools_dir = sys.argv[1] - -# Source features directory -src_feat_dir = sys.argv[2] - -# Target features directory -tgt_feat_dir = sys.argv[3] - -# Source-aligned features directory -src_aligned_feat_dir = sys.argv[4] - -# bap dimension -bap_dim = int(sys.argv[5]) - -if not os.path.exists(src_aligned_feat_dir): - os.makedirs(src_aligned_feat_dir) - -# Define variables -mgc_dim = 60 -lf0_dim = 1 - -src_mgc_dir = os.path.join(src_feat_dir, "mgc") -tgt_mgc_dir = os.path.join(tgt_feat_dir, "mgc") - -src_bap_dir = os.path.join(src_feat_dir, "bap") -tgt_bap_dir = os.path.join(tgt_feat_dir, "bap") - -src_lf0_dir = os.path.join(src_feat_dir, "lf0") -tgt_lf0_dir = os.path.join(tgt_feat_dir, "lf0") - -# create outut directories -src_aligned_mgc_dir = os.path.join(src_aligned_feat_dir, "mgc") -src_aligned_bap_dir = os.path.join(src_aligned_feat_dir, "bap") -src_aligned_lf0_dir = os.path.join(src_aligned_feat_dir, "lf0") - -if not os.path.exists(src_aligned_mgc_dir): - os.mkdir(src_aligned_mgc_dir) - -if not os.path.exists(src_aligned_bap_dir): - os.mkdir(src_aligned_bap_dir) - -if not os.path.exists(src_aligned_lf0_dir): - os.mkdir(src_aligned_lf0_dir) - -################################################################# -######## align source feats with target feats using dtw ## ###### -################################################################# - -io_funcs = BinaryIOCollection() -aligner = AlignFeats() - - -def get_mgc_filelist(mgc_dir): - mgc_files = [] - for file in os.listdir(mgc_dir): - whole_filepath = os.path.join(mgc_dir, file) - if os.path.isfile(whole_filepath) and str(whole_filepath).endswith(".mgc"): - mgc_files.append(whole_filepath) - elif os.path.isdir(whole_filepath): - mgc_files += get_mgc_filelist(whole_filepath) - - mgc_files.sort() - - return mgc_files - - -def load_dtw_path(dtw_path): - dtw_path_dict = {} - nframes = len(dtw_path) - - for item, i in zip(dtw_path, range(nframes)): - if item[1] not in dtw_path_dict: - dtw_path_dict[item[1]] = item[0] - - return dtw_path_dict - - -def process(filename): - ''' - The function derives dtw alignment path given source mgc and target mgc - :param filename: path to src mgc file - ''' - file_id = os.path.basename(filename).split(".")[0] - print(file_id) - - ### DTW alignment -- align source with target parameters ### - src_mgc_file = os.path.join(src_mgc_dir, file_id + ".mgc") - tgt_mgc_file = os.path.join(tgt_mgc_dir, file_id + ".mgc") - - src_features, src_frame_number = io_funcs.load_binary_file_frame( - src_mgc_file, mgc_dim) - tgt_features, tgt_frame_number = io_funcs.load_binary_file_frame( - tgt_mgc_file, mgc_dim) - - ### dtw align src with tgt ### - distance, dtw_path = fastdtw.fastdtw(src_features, tgt_features) - - # load dtw path - dtw_path_dict = load_dtw_path(dtw_path) - assert len(dtw_path_dict) == tgt_frame_number # dtw length not matched - - # align features - aligner.align_src_feats(os.path.join(src_mgc_dir, file_id + ".mgc"), os.path.join( - src_aligned_mgc_dir, file_id + ".mgc"), mgc_dim, dtw_path_dict) - aligner.align_src_feats(os.path.join(src_bap_dir, file_id + ".bap"), os.path.join( - src_aligned_bap_dir, file_id + ".bap"), bap_dim, dtw_path_dict) - aligner.align_src_feats(os.path.join(src_lf0_dir, file_id + ".lf0"), os.path.join( - src_aligned_lf0_dir, file_id + ".lf0"), lf0_dim, dtw_path_dict) - - -print("--- DTW alignment started ---") -start_time = time.time() - -# get mgc files list -mgc_files = get_mgc_filelist(src_mgc_dir) - -# do multi-processing -pool = mp.Pool(mp.cpu_count()) -pool.map(process, mgc_files) - -(m, s) = divmod(int(time.time() - start_time), 60) -print(("--- DTW alignment completion time: %d min. %d sec ---" % (m, s))) - -if not os.path.exists(src_aligned_mgc_dir): - print("DTW alignment unsucessful!!") -else: - print("You should have your src feats(aligned with target) ready in: %s" % - (src_aligned_feat_dir)) -#!/usr/bin/env python -import os -import sys -import time -import shutil -import multiprocessing as mp - -from binary_io import BinaryIOCollection -from align_feats import AlignFeats - -if len(sys.argv) != 6: - print("Usage: python dtw_aligner_festvox.py ") - sys.exit(1) - -# Arguments - -# tools directory -tools_dir = sys.argv[1] - -# Source features directory -src_feat_dir = sys.argv[2] - -# Target features directory -tgt_feat_dir = sys.argv[3] - -# Source-aligned features directory -src_aligned_feat_dir = sys.argv[4] - -# bap dimension -bap_dim = int(sys.argv[5]) - -if not os.path.exists(src_aligned_feat_dir): - os.makedirs(src_aligned_feat_dir) - -# path to tools -sptk = os.path.join(tools_dir, "bin/SPTK-3.9") -speech_tools = os.path.join(tools_dir, "speech_tools/bin") -festvox = os.path.join(tools_dir, "festvox") - -# Define variables -mgc_dim = 60 -lf0_dim = 1 - -src_mgc_dir = os.path.join(src_feat_dir, "mgc") -tgt_mgc_dir = os.path.join(tgt_feat_dir, "mgc") - -src_bap_dir = os.path.join(src_feat_dir, "bap") -tgt_bap_dir = os.path.join(tgt_feat_dir, "bap") - -src_lf0_dir = os.path.join(src_feat_dir, "lf0") -tgt_lf0_dir = os.path.join(tgt_feat_dir, "lf0") - -# create outut directories -alignments_dir = os.path.join(src_aligned_feat_dir, "../dtw_alignments") -temp_dir = os.path.join(src_aligned_feat_dir, "../temp") - -src_aligned_mgc_dir = os.path.join(src_aligned_feat_dir, "mgc") -src_aligned_bap_dir = os.path.join(src_aligned_feat_dir, "bap") -src_aligned_lf0_dir = os.path.join(src_aligned_feat_dir, "lf0") - -if not os.path.exists(alignments_dir): - os.mkdir(alignments_dir) - -if not os.path.exists(temp_dir): - os.mkdir(temp_dir) - -if not os.path.exists(src_aligned_mgc_dir): - os.mkdir(src_aligned_mgc_dir) - -if not os.path.exists(src_aligned_bap_dir): - os.mkdir(src_aligned_bap_dir) - -if not os.path.exists(src_aligned_lf0_dir): - os.mkdir(src_aligned_lf0_dir) - -################################################################# -######## align source feats with target feats using dtw ## ###### -################################################################# - -io_funcs = BinaryIOCollection() -aligner = AlignFeats() - -# create dummy lab files -os.system("touch %s/i.lab" % (temp_dir)) -os.system("touch %s/o.lab" % (temp_dir)) - - -def get_mgc_filelist(mgc_dir): - mgc_files = [] - for file in os.listdir(mgc_dir): - whole_filepath = os.path.join(mgc_dir, file) - if os.path.isfile(whole_filepath) and str(whole_filepath).endswith(".mgc"): - mgc_files.append(whole_filepath) - elif os.path.isdir(whole_filepath): - mgc_files += get_mgc_filelist(whole_filepath) - - mgc_files.sort() - - return mgc_files - - -def process(filename): - ''' - The function derives dtw alignment path given source mgc and target mgc - :param filename: path to src mgc file - :return: .dtw files - ''' - file_id = os.path.basename(filename).split(".")[0] - print(file_id) - - ### DTW alignment -- align source with target parameters ### - src_mgc_file = os.path.join(src_mgc_dir, file_id + ".mgc") - tgt_mgc_file = os.path.join(tgt_mgc_dir, file_id + ".mgc") - - src_features, src_frame_number = io_funcs.load_binary_file_frame( - src_mgc_file, mgc_dim) - tgt_features, tgt_frame_number = io_funcs.load_binary_file_frame( - tgt_mgc_file, mgc_dim) - - ### dtw align src with tgt ### - dtw_alignment_file = os.path.join(alignments_dir, file_id + ".dtw") - - x2x_cmd1 = "%s +fa %s | xargs -n%d > %s" % (os.path.join( - sptk, "x2x"), src_mgc_file, mgc_dim, os.path.join(temp_dir, file_id + "_src_ascii.mgc")) - x2x_cmd2 = "%s +fa %s | xargs -n%d > %s" % (os.path.join( - sptk, "x2x"), tgt_mgc_file, mgc_dim, os.path.join(temp_dir, file_id + "_tgt_ascii.mgc")) - - os.system(x2x_cmd1) - os.system(x2x_cmd2) - - chtrack_cmd1 = "%s -s 0.005 -otype est_binary %s -o %s" % (os.path.join(speech_tools, "ch_track"), - os.path.join( - temp_dir, file_id + "_src_ascii.mgc"), - os.path.join(temp_dir, file_id + "_src_binary.mgc")) - - os.system(chtrack_cmd1) - - chtrack_cmd2 = "%s -s 0.005 -otype est_binary %s -o %s" % (os.path.join(speech_tools, "ch_track"), - os.path.join( - temp_dir, file_id + "_tgt_ascii.mgc"), - os.path.join(temp_dir, file_id + "_tgt_binary.mgc")) - os.system(chtrack_cmd2) - - phone_align_cmd = "%s -itrack %s -otrack %s -ilabel %s -olabel %s -verbose -withcosts > %s" % (os.path.join(festvox, "src/general/phonealign"), - os.path.join(temp_dir, file_id + "_tgt_binary.mgc"), os.path.join( - temp_dir, file_id + "_src_binary.mgc"), - os.path.join(temp_dir, "i.lab"), os.path.join(temp_dir, "o.lab"), dtw_alignment_file) - os.system(phone_align_cmd) - - # load dtw path - dtw_path_dict = io_funcs.load_ascii_dtw_file(dtw_alignment_file) - assert len(dtw_path_dict) == tgt_frame_number # dtw length not matched - - # align features - aligner.align_src_feats(os.path.join(src_mgc_dir, file_id + ".mgc"), os.path.join( - src_aligned_mgc_dir, file_id + ".mgc"), mgc_dim, dtw_path_dict) - aligner.align_src_feats(os.path.join(src_bap_dir, file_id + ".bap"), os.path.join( - src_aligned_bap_dir, file_id + ".bap"), bap_dim, dtw_path_dict) - aligner.align_src_feats(os.path.join(src_lf0_dir, file_id + ".lf0"), os.path.join( - src_aligned_lf0_dir, file_id + ".lf0"), lf0_dim, dtw_path_dict) - - -print("--- DTW alignment started ---") -start_time = time.time() - -# get mgc files list -mgc_files = get_mgc_filelist(src_mgc_dir) - -# do multi-processing -pool = mp.Pool(mp.cpu_count()) -pool.map(process, mgc_files) - -# clean temporal files -shutil.rmtree(alignments_dir, ignore_errors=True) -shutil.rmtree(temp_dir, ignore_errors=True) - -(m, s) = divmod(int(time.time() - start_time), 60) -print(("--- DTW alignment completion time: %d min. %d sec ---" % (m, s))) - -if not os.path.exists(src_aligned_mgc_dir): - print("DTW alignment unsucessful!!") -else: - print("You should have your src feats(aligned with target) ready in: %s" % - (src_aligned_feat_dir)) -#!/usr/bin/env python -import os -import sys -import time -import shutil -import multiprocessing as mp - -from binary_io import BinaryIOCollection -from align_feats import AlignFeats - -if len(sys.argv) != 5: - print("Usage: python dtw_aligner_festvox_magphase.py ") - sys.exit(1) - -# Arguments - -# tools directory -tools_dir = sys.argv[1] - -# Source features directory -src_feat_dir = sys.argv[2] - -# Target features directory -tgt_feat_dir = sys.argv[3] - -# Source-aligned features directory -src_aligned_feat_dir = sys.argv[4] - -# bap dimension -#bap_dim = int(sys.argv[5]) - -if not os.path.exists(src_aligned_feat_dir): - os.makedirs(src_aligned_feat_dir) - -# path to tools -sptk = os.path.join(tools_dir, "bin/SPTK-3.9") -speech_tools = os.path.join(tools_dir, "speech_tools/bin") -festvox = os.path.join(tools_dir, "festvox") - -# Define variables. TODO: read from config file (void hardcoding) -mag_dim = 60 -real_dim = 45 -imag_dim = 45 -lf0_dim = 1 - -#src_mag_dir = os.path.join(src_feat_dir, "mag") -#tgt_mag_dir = os.path.join(tgt_feat_dir, "mag") - -#src_bap_dir = os.path.join(src_feat_dir, "bap") -#tgt_bap_dir = os.path.join(tgt_feat_dir, "bap") - -#src_lf0_dir = os.path.join(src_feat_dir, "lf0") -#tgt_lf0_dir = os.path.join(tgt_feat_dir, "lf0") - -# create outut directories -alignments_dir = os.path.join(src_aligned_feat_dir, "../dtw_alignments") -temp_dir = os.path.join(src_aligned_feat_dir, "../temp") - -#src_aligned_mag_dir = os.path.join(src_aligned_feat_dir, "mag") -#src_aligned_bap_dir = os.path.join(src_aligned_feat_dir, "bap") -#src_aligned_lf0_dir = os.path.join(src_aligned_feat_dir, "lf0") - -if not os.path.exists(alignments_dir): - os.mkdir(alignments_dir) - -if not os.path.exists(temp_dir): - os.mkdir(temp_dir) - -# if not os.path.exists(src_aligned_mag_dir): -# os.mkdir(src_aligned_mag_dir) - -# if not os.path.exists(src_aligned_bap_dir): -# os.mkdir(src_aligned_bap_dir) - -# if not os.path.exists(src_aligned_lf0_dir): -# os.mkdir(src_aligned_lf0_dir) - -################################################################# -######## align source feats with target feats using dtw ## ###### -################################################################# - -io_funcs = BinaryIOCollection() -aligner = AlignFeats() - -# create dummy lab files -os.system("touch %s/i.lab" % (temp_dir)) -os.system("touch %s/o.lab" % (temp_dir)) - - -def get_mag_filelist(mag_dir): - mag_files = [] - for file in os.listdir(mag_dir): - whole_filepath = os.path.join(mag_dir, file) - if os.path.isfile(whole_filepath) and str(whole_filepath).endswith(".mag"): - mag_files.append(whole_filepath) - elif os.path.isdir(whole_filepath): - mag_files += get_mag_filelist(whole_filepath) - - mag_files.sort() - - return mag_files - - -def process(filename): - ''' - The function derives dtw alignment path given source mag and target mag - :param filename: path to src mag file - :return: .dtw files - ''' - file_id = os.path.basename(filename).split(".")[0] - print(file_id) - - ### DTW alignment -- align source with target parameters ### - src_mag_file = os.path.join(src_feat_dir, file_id + ".mag") - tgt_mag_file = os.path.join(tgt_feat_dir, file_id + ".mag") - - src_features, src_frame_number = io_funcs.load_binary_file_frame( - src_mag_file, mag_dim) - tgt_features, tgt_frame_number = io_funcs.load_binary_file_frame( - tgt_mag_file, mag_dim) - - ### dtw align src with tgt ### - dtw_alignment_file = os.path.join(alignments_dir, file_id + ".dtw") - - x2x_cmd1 = "%s +fa %s | xargs -n%d > %s" % (os.path.join( - sptk, "x2x"), src_mag_file, mag_dim, os.path.join(temp_dir, file_id + "_src_ascii.mag")) - x2x_cmd2 = "%s +fa %s | xargs -n%d > %s" % (os.path.join( - sptk, "x2x"), tgt_mag_file, mag_dim, os.path.join(temp_dir, file_id + "_tgt_ascii.mag")) - - os.system(x2x_cmd1) - os.system(x2x_cmd2) - - chtrack_cmd1 = "%s -s 0.005 -otype est_binary %s -o %s" % (os.path.join(speech_tools, "ch_track"), - os.path.join( - temp_dir, file_id + "_src_ascii.mag"), - os.path.join(temp_dir, file_id + "_src_binary.mag")) - - os.system(chtrack_cmd1) - - chtrack_cmd2 = "%s -s 0.005 -otype est_binary %s -o %s" % (os.path.join(speech_tools, "ch_track"), - os.path.join( - temp_dir, file_id + "_tgt_ascii.mag"), - os.path.join(temp_dir, file_id + "_tgt_binary.mag")) - os.system(chtrack_cmd2) - - phone_align_cmd = "%s -itrack %s -otrack %s -ilabel %s -olabel %s -verbose -withcosts > %s" % (os.path.join(festvox, "src/general/phonealign"), - os.path.join(temp_dir, file_id + "_tgt_binary.mag"), os.path.join( - temp_dir, file_id + "_src_binary.mag"), - os.path.join(temp_dir, "i.lab"), os.path.join(temp_dir, "o.lab"), dtw_alignment_file) - os.system(phone_align_cmd) - - # load dtw path - dtw_path_dict = io_funcs.load_ascii_dtw_file(dtw_alignment_file) - assert len(dtw_path_dict) == tgt_frame_number # dtw length not matched - - # align features - aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".mag"), os.path.join( - src_aligned_feat_dir, file_id + ".mag"), mag_dim, dtw_path_dict) - aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".real"), os.path.join( - src_aligned_feat_dir, file_id + ".real"), real_dim, dtw_path_dict) - aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".imag"), os.path.join( - src_aligned_feat_dir, file_id + ".imag"), imag_dim, dtw_path_dict) - aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".lf0"), os.path.join( - src_aligned_feat_dir, file_id + ".lf0"), lf0_dim, dtw_path_dict) - - -print("--- DTW alignment started ---") -start_time = time.time() - -# get mag files list -mag_files = get_mag_filelist(src_feat_dir) - -# do multi-processing -pool = mp.Pool(mp.cpu_count()) -pool.map(process, mag_files) - -# clean temporal files -shutil.rmtree(alignments_dir, ignore_errors=True) -shutil.rmtree(temp_dir, ignore_errors=True) - -(m, s) = divmod(int(time.time() - start_time), 60) -print(("--- DTW alignment completion time: %d min. %d sec ---" % (m, s))) - -if not os.path.exists(src_aligned_feat_dir): - print("DTW alignment unsucessful!!") -else: - print("You should have your src feats(aligned with target) ready in: %s" % - (src_aligned_feat_dir)) -#!/usr/bin/env python -import os -import sys -import time -#import shutil -import multiprocessing as mp - -import fastdtw - -from binary_io import BinaryIOCollection -from align_feats import AlignFeats - -if len(sys.argv) != 5: - print("Usage: python dtw_aligner_magphase.py ") - sys.exit(1) - -# Arguments - -# tools directory -tools_dir = sys.argv[1] - -# Source features directory -src_feat_dir = sys.argv[2] - -# Target features directory -tgt_feat_dir = sys.argv[3] - -# Source-aligned features directory -src_aligned_feat_dir = sys.argv[4] - -if not os.path.exists(src_aligned_feat_dir): - os.makedirs(src_aligned_feat_dir) - -# Define variables -mag_dim = 60 # TODO: Change this (avoid hardcoded) -real_dim = 10 -imag_dim = 10 -lf0_dim = 1 - -#src_mag_dir = src_feat_dir -#tgt_mag_dir = tgt_feat_dir - -#src_lf0_dir = os.path.join(src_feat_dir, "lf0") -#tgt_lf0_dir = os.path.join(tgt_feat_dir, "lf0") - -# create outut directories -#src_aligned_mag_dir = os.path.join(src_aligned_feat_dir, "mag") -#src_aligned_bap_dir = os.path.join(src_aligned_feat_dir, "bap") -#src_aligned_lf0_dir = os.path.join(src_aligned_feat_dir, "lf0") - -# if not os.path.exists(src_aligned_mag_dir): -# os.mkdir(src_aligned_mag_dir) - -# if not os.path.exists(src_aligned_bap_dir): -# os.mkdir(src_aligned_bap_dir) - -# if not os.path.exists(src_aligned_lf0_dir): -# os.mkdir(src_aligned_lf0_dir) - -################################################################# -######## align source feats with target feats using dtw ## ###### -################################################################# - -io_funcs = BinaryIOCollection() -aligner = AlignFeats() - - -def get_mag_filelist(mag_dir): - mag_files = [] - for file in os.listdir(mag_dir): - whole_filepath = os.path.join(mag_dir, file) - if os.path.isfile(whole_filepath) and str(whole_filepath).endswith(".mag"): - mag_files.append(whole_filepath) - elif os.path.isdir(whole_filepath): - mag_files += get_mag_filelist(whole_filepath) - - mag_files.sort() - - return mag_files - - -def load_dtw_path(dtw_path): - dtw_path_dict = {} - nframes = len(dtw_path) - - for item, i in zip(dtw_path, range(nframes)): - if item[1] not in dtw_path_dict: - dtw_path_dict[item[1]] = item[0] - - return dtw_path_dict - - -def process(filename): - ''' - The function derives dtw alignment path given source mag and target mag - :param filename: path to src mag file - ''' - file_id = os.path.basename(filename).split(".")[0] - print(file_id) - - ### DTW alignment -- align source with target parameters ### - src_mag_file = os.path.join(src_feat_dir, file_id + ".mag") - tgt_mag_file = os.path.join(tgt_feat_dir, file_id + ".mag") - - src_features, src_frame_number = io_funcs.load_binary_file_frame( - src_mag_file, mag_dim) - tgt_features, tgt_frame_number = io_funcs.load_binary_file_frame( - tgt_mag_file, mag_dim) - - ### dtw align src with tgt ### - distance, dtw_path = fastdtw.fastdtw(src_features, tgt_features) - - # load dtw path - dtw_path_dict = load_dtw_path(dtw_path) - assert len(dtw_path_dict) == tgt_frame_number # dtw length not matched - - # align features - aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".mag"), os.path.join( - src_aligned_feat_dir, file_id + ".mag"), mag_dim, dtw_path_dict) - aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".real"), os.path.join( - src_aligned_feat_dir, file_id + ".real"), real_dim, dtw_path_dict) - aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".imag"), os.path.join( - src_aligned_feat_dir, file_id + ".imag"), imag_dim, dtw_path_dict) - aligner.align_src_feats(os.path.join(src_feat_dir, file_id + ".lf0"), os.path.join( - src_aligned_feat_dir, file_id + ".lf0"), lf0_dim, dtw_path_dict) - - -print("--- DTW alignment started ---") -start_time = time.time() - -# get mag files list -mag_files = get_mag_filelist(src_feat_dir) - -# do multi-processing -pool = mp.Pool(mp.cpu_count()) -pool.map(process, mag_files) - -(m, s) = divmod(int(time.time() - start_time), 60) -print(("--- DTW alignment completion time: %d min. %d sec ---" % (m, s))) - -if not os.path.exists(src_aligned_feat_dir): - print("DTW alignment unsucessful!!") -else: - print("You should have your src feats(aligned with target) ready in: %s" % - (src_aligned_feat_dir)) -import os -import sys -import numpy -import argparse - -from binary_io import BinaryIOCollection - -io_funcs = BinaryIOCollection() - - -def read_file_list(file_name): - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - return file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def transform_f0(src_lf0_arr, stats_dict): - mu_src = stats_dict['mu_src'] - mu_tgt = stats_dict['mu_tgt'] - - std_src = stats_dict['std_src'] - std_tgt = stats_dict['std_tgt'] - - tgt_lf0_arr = numpy.zeros(len(src_lf0_arr)) - for i in range(len(src_lf0_arr)): - lf0_src = src_lf0_arr[i] - f0_src = numpy.exp(lf0_src) - if f0_src <= 0: - tgt_lf0_arr[i] = lf0_src - else: - tgt_lf0_arr[i] = (mu_tgt + (std_tgt/std_src)*(lf0_src - mu_src)) - - return tgt_lf0_arr - - -def transform_lf0_dir(src_lf0_file_list, tgt_lf0_file_list, stats_dict): - for i in range(len(src_lf0_file_list)): - src_lf0_file = src_lf0_file_list[i] - tgt_lf0_file = tgt_lf0_file_list[i] - transform_lf0_file(src_lf0_file, tgt_lf0_file, stats_dict) - - -def transform_lf0_file(src_lf0_file, tgt_lf0_file, stats_dict): - src_lf0_arr, frame_number = io_funcs.load_binary_file_frame( - src_lf0_file, 1) - tgt_lf0_arr = transform_f0(src_lf0_arr, stats_dict) - io_funcs.array_to_binary_file(tgt_lf0_arr, tgt_lf0_file) - - -def get_lf0_filelist(lf0_dir): - lf0_files = [] - for file in os.listdir(lf0_dir): - whole_filepath = os.path.join(lf0_dir, file) - if os.path.isfile(whole_filepath) and str(whole_filepath).endswith(".lf0"): - lf0_files.append(whole_filepath) - elif os.path.isdir(whole_filepath): - lf0_files += get_lf0_filelist(whole_filepath) - - lf0_files.sort() - - return lf0_files - - -if __name__ == "__main__": - # parse the arguments - parser = argparse.ArgumentParser() - parser.add_argument('--srcstatsfile', required=True, - help='path to source lf0 stats file') - parser.add_argument('--tgtstatsfile', required=True, - help='path to target lf0 stats file') - parser.add_argument('--srcdir', type=str, - help='path to source lf0 data directory') - parser.add_argument('--tgtdir', type=str, - help='path to target lf0 data directory') - parser.add_argument('--filelist', type=str, help='path to file ID list') - parser.add_argument('--srcfile', type=str, - help='path to source lf0 data file') - parser.add_argument('--tgtfile', type=str, - help='path to target lf0 data file') - opt = parser.parse_args() - - if opt.srcdir is None and opt.srcfile is None: - print("at least one of --srcdir and --srcfile is required") - sys.exit(1) - - if opt.tgtdir is None and opt.tgtfile is None: - print("at least one of --tgtdir and --tgtfile is required") - sys.exit(1) - - if opt.srcdir is not None and opt.filelist is None: - print("file ID list is required") - sys.exit(1) - - src_lf0_stats_file = opt.srcstatsfile - tgt_lf0_stats_file = opt.tgtstatsfile - - if os.path.isfile(src_lf0_stats_file): - in_f = open(src_lf0_stats_file, 'r') - data = in_f.readlines() - in_f.close() - - [src_mean_f0, src_std_f0] = map(float, data[0].strip().split()) - else: - print("File doesn't exist!! Please check path: %s" % - (src_lf0_stats_file)) - - if os.path.isfile(tgt_lf0_stats_file): - in_f = open(tgt_lf0_stats_file, 'r') - data = in_f.readlines() - in_f.close() - - [tgt_mean_f0, tgt_std_f0] = map(float, data[0].strip().split()) - else: - print("File doesn't exist!! Please check path: %s" % - (tgt_lf0_stats_file)) - - #print(src_mean_f0, src_std_f0) - #print(tgt_mean_f0, tgt_std_f0) - - stats_dict = {} - - stats_dict['mu_src'] = src_mean_f0 - stats_dict['mu_tgt'] = tgt_mean_f0 - - stats_dict['std_src'] = src_std_f0 - stats_dict['std_tgt'] = tgt_std_f0 - - if opt.srcdir is not None and opt.tgtdir is not None: - file_id_list = read_file_list(opt.filelist) - src_lf0_file_list = prepare_file_path_list( - file_id_list, opt.srcdir, '.lf0') - tgt_lf0_file_list = prepare_file_path_list( - file_id_list, opt.tgtdir, '.lf0') - - transform_lf0_dir(src_lf0_file_list, tgt_lf0_file_list, stats_dict) - - elif opt.srcfile is not None and opt.tgtfile is not None: - - transform_lf0_file(opt.srcfile, opt.tgtfile, stats_dict) - -import io -import logging.config -import logging # as logging -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -from utils.learn_rates import ExpDecreaseLearningRate -from utils.generate import generate_wav -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from models.dnn import DNN -import configuration -from frontend.label_composer import LabelComposer -from io_funcs.binary_io import BinaryIOCollection -from frontend.mean_variance_norm import MeanVarianceNorm -from frontend.parameter_generation import ParameterGeneration -from frontend.acoustic_composition import AcousticComposition -from frontend.min_max_norm import MinMaxNormalisation -from frontend.silence_remover import trim_silence -from frontend.silence_remover import SilenceRemover -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from utils.providers import ListDataProvider -import theano -import numpy.distutils.__config__ -import numpy -import pickle -import gzip -import os -import sys -import errno -import time -import math -import glob -import struct - -file_location = os.path.split(os.path.realpath( - os.path.abspath(os.path.dirname(__file__))))[0]+'/' -sys.path.append(file_location + '/../') - - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -# and only after that can we import theano - - -#from frontend.acoustic_normalisation import CMPNormalisation -#from frontend.feature_normalisation_base import FeatureNormBase - - -# the new class for label composition and normalisation - - -#from models.ms_dnn import MultiStreamDNN -#from models.ms_dnn_gv import MultiStreamDNNGv -#from models.sdae import StackedDenoiseAutoEncoder - - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) / 2 # including input and output - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i*2].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - -# visualize_dnn(dbn) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.parameter_prediction( - test_set_x=test_set_x) -# predicted_parameter = test_out() - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - -# generate bottleneck layer as festures - - -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_top_hidden_layer( - test_set_x=test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg, in_dir, out_dir): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - #### parameter setting######## - hidden_layers_sizes = cfg.hyper_params['hidden_layer_size'] - - file_id_list = [] - - if cfg.label_style == 'HTS': - ext = '.lab' - else: - ext = '.utt' - - synth_utts = glob.glob(in_dir + '/*' + ext) - for fname in synth_utts: - junk, name = os.path.split(fname) - file_id_list.append(name.replace(ext, '')) - - if not os.path.isdir(out_dir): - os.mkdir(out_dir) - - # total file number including training, development, and testing - #total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - #nn_cmp_dir = os.path.join(data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - #nn_cmp_norm_dir = os.path.join(data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(out_dir, 'gen') - - #in_file_list_dict = {} - - # for feature_name in cfg.in_dir_dict.keys(): - # in_file_list_dict[feature_name] = prepare_file_path_list(file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - #nn_cmp_file_list = prepare_file_path_list(file_id_list, nn_cmp_dir, cfg.cmp_ext) - #nn_cmp_norm_file_list = prepare_file_path_list(file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - # the number can be removed - binary_label_dir = os.path.join(out_dir, 'lab_bin') - nn_label_norm_dir = os.path.join(out_dir, 'lab_bin_norm') - - in_label_align_file_list = prepare_file_path_list( - file_id_list, in_dir, cfg.lab_ext) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - - # need this to find normalisation info: - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - min_max_normaliser = None - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.label_style == 'HTS': - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list) - - else: - - logger.info( - 'preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, in_dir, cfg.utt_ext, False) - elif label_style == 'hts': - logger.critical('script not tested with HTS labels') - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info( - 'making input label features for %4d of %4d' % (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % - required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # no silence removal for synthesis ... - - # minmax norm: - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - - # reload stored minmax values: (TODO -- move reading and writing into MinMaxNormalisation class) - fid = open(label_norm_file, 'rb') - - # This doesn't work -- precision is lost -- reads in as float64 - # label_norm_info = numpy.fromfile(fid) ## label_norm_info = numpy.array(label_norm_info, 'float32') - - # use struct to enforce float32: - nbytes = os.stat(label_norm_file)[6] # length in bytes - data = fid.read(nbytes) # = read until bytes run out - fid.close() - m = nbytes / 4 # number 32 bit floats - format = str(m)+"f" - label_norm_info = struct.unpack(format, data) - label_norm_info = numpy.array(label_norm_info) - - min_max_normaliser.min_vector = label_norm_info[:m/2] - min_max_normaliser.max_vector = label_norm_info[m/2:] - - # apply precompuated min-max to the whole dataset - min_max_normaliser.normalise_data( - binary_label_file_list, nn_label_norm_file_list) - - # make output acoustic data -# if cfg.MAKECMP: - - # retrieve acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data -# if cfg.NORMCMP: - - combined_model_arch = str(len(hidden_layers_sizes)) - for hid_size in hidden_layers_sizes: - combined_model_arch += '_' + str(hid_size) - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.model' \ - % (model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number) - - # DNN model training -# if cfg.TRAINDNN: - - # if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list(file_id_list, gen_dir, cfg.cmp_ext) - - dnn_generation(nn_label_norm_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % - (cfg.output_feature_normalisation)) - raise - - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration(gen_wav_features=cfg.gen_wav_features) - generator.acoustic_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict, var_file_dict) - - logger.info('Simple variance expansion') - test_var_scaling = False - scaled_dir = gen_dir + '_scaled' - if test_var_scaling: - file_id_list = simple_scale_variance_CONTINUUM( - gen_dir, scaled_dir, var_file_dict, cfg.out_dimension_dict, file_id_list) - else: - simple_scale_variance(gen_dir, scaled_dir, var_file_dict, cfg.out_dimension_dict, - file_id_list, gv_weight=1.0) # gv_weight hard coded here! - - # generate wav ---- - # if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - #generate_wav_glottHMM(scaled_dir, file_id_list) - generate_wav(scaled_dir, file_id_list, cfg) - - -def simple_scale_variance(indir, outdir, var_file_dict, out_dimension_dict, file_id_list, gv_weight=1.0): - # simple variance scaling (silen et al. 2012, paragraph 3.1) - # TODO: Lots of things like stream names hardcoded here; 3 for delta + delta-delta; ... - # all_streams = ['cmp','HNR','F0','LSF','Gain','LSFsource'] - # streams_to_scale = ['LSF'] - all_streams = ['cmp', 'mgc', 'lf0', 'bap'] - streams_to_scale = ['mgc'] - - static_variances = {} - - static_dimension_dict = {} - for (feature_name, size) in list(out_dimension_dict.items()): - static_dimension_dict[feature_name] = size/3 - - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - static_var_values = var_values[:static_dimension_dict[feature_name], :] - static_variances[feature_name] = static_var_values - - if not os.path.isdir(outdir): - os.makedirs(outdir) - - assert gv_weight <= 1.0 and gv_weight >= 0.0 - local_weight = 1.0 - gv_weight - - for uttname in file_id_list: - for stream in all_streams: - infile = os.path.join(indir, uttname + '.' + stream) - outfile = os.path.join(outdir, uttname + '.' + stream) - if not os.path.isfile(infile): - sys.exit(infile + ' does not exist') - if stream in streams_to_scale: - speech, dimension = io_funcs.load_binary_file_frame( - infile, static_dimension_dict[stream]) - utt_mean = numpy.mean(speech, axis=0) - utt_std = numpy.std(speech, axis=0) - - global_std = numpy.transpose((static_variances[stream])) - weighted_global_std = ( - gv_weight * global_std) + (local_weight * utt_std) - std_ratio = weighted_global_std / utt_std - - nframes, ndim = numpy.shape(speech) - utt_mean_matrix = numpy.tile(utt_mean, (nframes, 1)) - std_ratio_matrix = numpy.tile(std_ratio, (nframes, 1)) - - scaled_speech = ((speech - utt_mean_matrix) * - std_ratio_matrix) + utt_mean_matrix - io_funcs.array_to_binary_file(scaled_speech, outfile) - - else: - os.system('cp %s %s' % (infile, outfile)) - - -def simple_scale_variance_CONTINUUM(indir, outdir, var_file_dict, out_dimension_dict, file_id_list): - # Try range of interpolation weights for combining global & local variance - all_streams = ['cmp', 'HNR', 'F0', 'LSF', 'Gain', 'LSFsource'] - streams_to_scale = ['LSF'] - - static_variances = {} - - static_dimension_dict = {} - for (feature_name, size) in list(out_dimension_dict.items()): - static_dimension_dict[feature_name] = size/3 - - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - static_var_values = var_values[:static_dimension_dict[feature_name], :] - static_variances[feature_name] = static_var_values - - if not os.path.isdir(outdir): - os.makedirs(outdir) - - file_id_list_out = [] - for uttname in file_id_list: - for gv_weight in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: - local_weight = 1.0 - gv_weight - for stream in all_streams: - infile = os.path.join(indir, uttname + '.' + stream) - extended_uttname = uttname + '_gv' + str(gv_weight) - print(extended_uttname) - outfile = os.path.join(outdir, extended_uttname + '.' + stream) - if not os.path.isfile(infile): - sys.exit(infile + ' does not exist') - if stream in streams_to_scale: - speech, dimension = io_funcs.load_binary_file_frame( - infile, static_dimension_dict[stream]) - utt_mean = numpy.mean(speech, axis=0) - utt_std = numpy.std(speech, axis=0) - - global_std = numpy.transpose((static_variances[stream])) - - weighted_global_std = ( - gv_weight * global_std) + (local_weight * utt_std) - - std_ratio = weighted_global_std / utt_std - - nframes, ndim = numpy.shape(speech) - utt_mean_matrix = numpy.tile(utt_mean, (nframes, 1)) - std_ratio_matrix = numpy.tile(std_ratio, (nframes, 1)) - - scaled_speech = ((speech - utt_mean_matrix) - * std_ratio_matrix) + utt_mean_matrix - io_funcs.array_to_binary_file(scaled_speech, outfile) - - else: - os.system('cp %s %s' % (infile, outfile)) - file_id_list_out.append(extended_uttname) - return file_id_list_out - - -def log_to_hertz(infile, outfile): - f = open(infile, 'r') - log_values = [float(val) for val in f.readlines()] - f.close() - - def m2h(l): - h = math.exp(l) - return h - - hertz = [m2h(l) for l in log_values] - f = open(outfile, 'w') - for val in hertz: - if val > 0: - f.write(str(val) + '\n') - else: - f.write('0.0\n') - f.close() - - -def generate_wav_glottHMM(gen_dir, gen_file_id_list): - - x2x = '~/repos/simple4all/CSTRVoiceClone/trunk/bin/x2x' - synthesis = '~/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/tools/GlottHMM/Synthesis' - general_glott_conf = '~/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/voices/en/ky_02_toy/english_gold_basic_glott_KY/processors/speech_feature_extractor/main_config.cfg' - user_glott_conf = '~/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/voices/en/ky_02_toy/english_gold_basic_glott_KY/processors/speech_feature_extractor/user_config.cfg' - - exports = 'export LIBCONFIG_INSTALL_DIR=/afs/inf.ed.ac.uk/user/o/owatts/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/tools/GlottHMM//libconfig-1.4.9 ; export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBCONFIG_INSTALL_DIR/lib/.libs ; export LIBRARY_PATH=$LIBRARY_PATH:$LIBCONFIG_INSTALL_DIR/lib/.libs ; export CPATH=$CPATH:$LIBCONFIG_INSTALL_DIR/lib ;' - - streams = ['cmp', 'HNR', 'F0', 'LSF', 'Gain', 'LSFsource'] - for uttname in gen_file_id_list: - all_present = True - for stream in streams: - if not os.path.isfile(os.path.join(gen_dir, uttname + '.' + stream)): - all_present = False - if all_present: - for stream in streams: - extra = '' - if stream == 'F0': - extra = '.NEGVALS' - fname = os.path.join(gen_dir, uttname + '.' + stream) - fname_txt = os.path.join( - gen_dir, uttname + '.txt.' + stream + extra) - comm = '%s +fa %s > %s' % (x2x, fname, fname_txt) - os.system(comm) - log_to_hertz(os.path.join(gen_dir, uttname + '.txt.F0.NEGVALS'), - os.path.join(gen_dir, uttname + '.txt.F0')) - - stem_name = os.path.join(gen_dir, uttname + '.txt') - comm = '%s %s %s %s %s' % ( - exports, synthesis, stem_name, general_glott_conf, user_glott_conf) - print(comm) - os.system(comm) - - else: - print('missing stream(s) for utterance ' + uttname) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg -# -# # set up logging to use our custom class -# logging.setLoggerClass(LoggerPlotter) -# -# # get a logger for this main function -# logger = logging.getLogger("main") - - if len(sys.argv) != 4: - print('usage: run_dnn.sh config_file_name in_dir out_dir') - #logger.critical('usage: run_dnn.sh config_file_name utt_dir') - sys.exit(1) - - config_file = sys.argv[1] - in_dir = sys.argv[2] - out_dir = sys.argv[3] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - main_function(cfg, in_dir, out_dir) - - sys.exit(0) - -import pickle -import gzip -import os -import sys -import errno -import time -import math -import glob -import struct - -import copy - -from lxml import etree - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProviderWithProjectionIndex, expand_projection_inputs, get_unexpanded_projection_inputs # ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -#from frontend.acoustic_normalisation import CMPNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -#from frontend.feature_normalisation_base import FeatureNormBase -from frontend.mean_variance_norm import MeanVarianceNorm - -from io_funcs.binary_io import BinaryIOCollection - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer - -import configuration - -from models.dnn import DNN -from models.tpdnn import TokenProjectionDNN -from models.ms_dnn import MultiStreamDNN -from models.ms_dnn_gv import MultiStreamDNNGv -from models.sdae import StackedDenoiseAutoEncoder - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -# This should always be True -- tidy up later -expand_by_minibatch = True - -if expand_by_minibatch: - proj_type = 'int32' -else: - proj_type = theano.config.floatX - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) / 2 # including input and output - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i*2].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def infer_projections(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): - ''' - Unlike the same function in run_tpdnn.py this *DOESN'T* save model at the - end -- just returns array of the learned projection weights - ''' - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - private_l2_reg = float(hyper_params['private_l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layers_sizes = hyper_params['hidden_layers_sizes'] - - stream_weights = hyper_params['stream_weights'] - private_hidden_sizes = hyper_params['private_hidden_sizes'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - stream_lr_weights = hyper_params['stream_lr_weights'] - use_private_hidden = hyper_params['use_private_hidden'] - - model_type = hyper_params['model_type'] - - index_to_project = hyper_params['index_to_project'] - projection_insize = hyper_params['projection_insize'] - projection_outsize = hyper_params['projection_outsize'] - - ######### data providers ########## - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProviderWithProjectionIndex(x_file_list=train_x_file_list, y_file_list=train_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=True, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProviderWithProjectionIndex(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=False, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - shared_train_set_xy, temp_train_set_x, temp_train_set_x_proj, temp_train_set_y = train_data_reader.load_next_partition_with_projection() - train_set_x, train_set_x_proj, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_x_proj, temp_valid_set_y = valid_data_reader.load_next_partition_with_projection() - valid_set_x, valid_set_x_proj, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - #################################### - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - ############## load existing dnn ##### - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - train_all_fn, train_subword_fn, train_word_fn, infer_projections_fn, valid_fn, valid_score_i = \ - dnn_model.build_finetune_functions( - (train_set_x, train_set_x_proj, train_set_y), - (valid_set_x, valid_set_x_proj, valid_set_y), batch_size=batch_size) - #################################### - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.clock() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - previous_finetune_lr = finetune_lr - - logger.info('fine-tuning the %s model' % (model_type)) - - dnn_model.initialise_projection_weights() - - inference_epochs = 20 # <-------- hard coded !!!!!!!!!! - - current_finetune_lr = previous_finetune_lr = finetune_lr - warmup_epoch_3 = 10 # 10 ## <-------- hard coded !!!!!!!!!! - - #warmup_epoch_3 = epoch + warmup_epoch_3 - #inference_epochs += epoch - while (epoch < inference_epochs): - - epoch = epoch + 1 - - current_momentum = momentum - - if epoch > warmup_epoch_3: - previous_finetune_lr = current_finetune_lr - current_finetune_lr = previous_finetune_lr * 0.5 - - dev_error = [] - sub_start_time = time.clock() - - # osw -- inferring word reps on validation set in a forward pass in a single batch - # exausts memory when using 20k projected vocab -- also use minibatches - logger.debug('infer word representations for validation set') - valid_error = [] - n_valid_batches = valid_set_x.get_value().shape[0] / batch_size - for minibatch_index in range(n_valid_batches): - v_loss = infer_projections_fn( - minibatch_index, current_finetune_lr, current_momentum) - valid_error.append(v_loss) - - this_validation_loss = numpy.mean(valid_error) - - #valid_error = infer_projections_fn(current_finetune_lr, current_momentum) - #this_validation_loss = numpy.mean(valid_error) - -# if plot: -# ## add dummy validation loss so that plot works: -# plotlogger.add_plot_point('training convergence','validation set',(epoch,this_validation_loss)) -# plotlogger.add_plot_point('training convergence','training set',(epoch,this_train_valid_loss)) -# - - sub_end_time = time.clock() - - logger.info('INFERENCE epoch %i, validation error %f, time spent %.2f' % ( - epoch, this_validation_loss, (sub_end_time - sub_start_time))) - - -# if cfg.hyper_params['model_type'] == 'TPDNN': -# if not os.path.isdir(cfg.projection_weights_output_dir): -# os.mkdir(cfg.projection_weights_output_dir) -# weights = dnn_model.get_projection_weights() -# fname = os.path.join(cfg.projection_weights_output_dir, 'proj_INFERENCE_epoch_%s'%(epoch)) -# numpy.savetxt(fname, weights) -# - - best_dnn_model = dnn_model # always update - - end_time = time.clock() - ##cPickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - final_weights = dnn_model.get_projection_weights() - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - -# if plot: -# plotlogger.save_plot('training convergence',title='Final training and validation error',xlabel='epochs',ylabel='error') -# - - # ======================================================== - - -# if cfg.hyper_params['model_type'] == 'TPDNN': -# os.system('python %s %s'%('/afs/inf.ed.ac.uk/user/o/owatts/scripts_NEW/plot_weights_multiple_phases.py', cfg.projection_weights_output_dir)) - - return final_weights - - -def dnn_generation_PROJECTION(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list, cfg=None, synth_mode='constant', projection_end=0, projection_weights_to_use=None, save_weights_to_file=None): - ''' - Use the (training/dev/test) projections learned in training, but shuffled, for test tokens. - - -- projection_end is *real* value for last projection index (or some lower value) - -- this is so the samples / means are of real values learned on training data - ''' - - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation_PROJECTION') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - # 'remove' word representations by randomising them. As model is unpickled and - # not re-saved, this does not throw trained parameters away. - - if synth_mode == 'sampled_training': - # use randomly chosen training projection -- shuffle in-place = same as sampling wihtout replacement - P = dnn_model.get_projection_weights() - # shuffle in place along 1st dim (reorder rows) - numpy.random.shuffle(P[:, :projection_end]) - dnn_model.params[0].set_value(P, borrow=True) - elif synth_mode == 'uniform': - # generate utt embeddings uniformly at random within the min-max of the training set (i.e. from a (hyper)-rectangle) - P = dnn_model.get_projection_weights() - - # vector like a row of P with min of its columns - column_min = numpy.min(P[:, :projection_end], axis=0) - column_max = numpy.max(P[:, :projection_end], axis=0) - - random_proj = numpy.random.uniform( - low=column_min, high=column_max, size=numpy.shape(P)) - random_proj = random_proj.astype(numpy.float32) - - dnn_model.params[0].set_value(random_proj, borrow=True) - - elif synth_mode == 'constant': - # use mean projection - P = dnn_model.get_projection_weights() - mean_row = P[:, :projection_end].mean(axis=0) - print('mean row used for projection:') - print(mean_row) - P = numpy.ones(numpy.shape(P), dtype=numpy.float32) * \ - mean_row # stack mean rows - dnn_model.params[0].set_value(P, borrow=True) - elif synth_mode == 'inferred': - # DEBUG - assert projection_weights_to_use != None - old_weights = dnn_model.get_projection_weights() - # DEBUG:========= - # projection_weights_to_use = old_weights # numpy.array(numpy.random.uniform(low=-0.3, high=0.3, size=numpy.shape(old_weights)), dtype=numpy.float32) - # ============= - assert numpy.shape(old_weights) == numpy.shape(projection_weights_to_use), [ - numpy.shape(old_weights), numpy.shape(projection_weights_to_use)] - dnn_model.params[0].set_value(projection_weights_to_use, borrow=True) - - elif synth_mode == 'single_sentence_demo': - # generate utt embeddings from a uniform 10 x 10 grid within the min-max of the training set (i.e. from a rectangle) - P = dnn_model.get_projection_weights() - - # vector like a row of P with min of its columns - column_min = numpy.min(P[:, :projection_end], axis=0) - column_max = numpy.max(P[:, :projection_end], axis=0) - assert len( - column_min) == 2, 'Only 2D projections supported in mode single_sentence_demo' - - ranges = column_max - column_min - nstep = 10 - steps = ranges / (nstep-1) - - # pading to handle 0 index (reserved for defaults) - grid_params = [numpy.array([1.0, 1.0])] - for x in range(nstep): - for y in range(nstep): - grid_params.append(column_min + (numpy.array([x, y]) * steps)) - stacked_params = numpy.vstack(grid_params) - print(stacked_params) - print(numpy.shape(stacked_params)) - print() - print() - - proj = numpy.ones(numpy.shape(P)) - proj[:101, :] = stacked_params - - proj = proj.astype(numpy.float32) - - dnn_model.params[0].set_value(proj, borrow=True) - - elif synth_mode == 'uniform_sampled_within_std_1': - # points uniformly sampled from between the 1.8 - 2.0 stds of a diagonal covariance gaussian fitted to the data - P = dnn_model.get_projection_weights() - - # vector like a row of P with min of its columns - column_min = numpy.min(P[:, :projection_end], axis=0) - column_max = numpy.max(P[:, :projection_end], axis=0) - - std_val = numpy.std(P[:, :projection_end], axis=0) - - dots = numpy.random.uniform( - low=column_min, high=column_max, size=(100000, 2)) - dots = within_circle(dots, radius=std_val*2.0) - dots = outside_circle(dots, radius=std_val*1.8) - - m, n = numpy.shape(P) - dots = dots[:m, :] - - dots = dots.astype(numpy.float32) - dnn_model.params[0].set_value(dots, borrow=True) - - elif synth_mode == 'uniform_sampled_within_std_2': - # points uniformly sampled from between the 1.8 - 2.0 stds of a diagonal covariance gaussian fitted to the data - P = dnn_model.get_projection_weights() - - # vector like a row of P with min of its columns - column_min = numpy.min(P[:, :projection_end], axis=0) - column_max = numpy.max(P[:, :projection_end], axis=0) - - std_val = numpy.std(P[:, :projection_end], axis=0) - - dots = numpy.random.uniform( - low=column_min, high=column_max, size=(100000, 2)) - dots = within_circle(dots, radius=std_val*3.0) - dots = outside_circle(dots, radius=std_val*2.8) - - m, n = numpy.shape(P) - dots = dots[:m, :] - - dots = dots.astype(numpy.float32) - dnn_model.params[0].set_value(dots, borrow=True) - - elif synth_mode == 'uniform_sampled_within_std_3': - # points uniformly sampled from between the 1.8 - 2.0 stds of a diagonal covariance gaussian fitted to the data - P = dnn_model.get_projection_weights() - - # vector like a row of P with min of its columns - column_min = numpy.min(P[:, :projection_end], axis=0) - column_max = numpy.max(P[:, :projection_end], axis=0) - - std_val = numpy.std(P[:, :projection_end], axis=0) - - dots = numpy.random.uniform( - low=column_min, high=column_max, size=(100000, 2)) - dots = within_circle(dots, radius=std_val*4.0) - dots = outside_circle(dots, radius=std_val*3.8) - - m, n = numpy.shape(P) - dots = dots[:m, :] - - dots = dots.astype(numpy.float32) - dnn_model.params[0].set_value(dots, borrow=True) - - else: - sys.exit('unknow mode: %s' % (synth_mode)) - - # save used weights for future reference: - if save_weights_to_file: - weights = dnn_model.get_projection_weights() - numpy.savetxt(save_weights_to_file, weights) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - - # features, features_proj = expand_projection_inputs(features, cfg.index_to_project, \ - # cfg.projection_insize) - features, features_proj = get_unexpanded_projection_inputs(features, cfg.index_to_project, - cfg.projection_insize) - # temp_set_x = features.tolist() ## osw - why list conversion necessary? - test_set_x = theano.shared(numpy.asarray( - features, dtype=theano.config.floatX)) - test_set_x_proj = theano.shared( - numpy.asarray(features_proj, dtype='int32')) - - predicted_parameter = dnn_model.parameter_prediction( - test_set_x=test_set_x, test_set_x_proj=test_set_x_proj) -# predicted_parameter = test_out() - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -# define a couple of functions for circular rejection sampling: -def within_circle(dots, radius=1.0): - standardised_dots = (dots - numpy.mean(dots)) / radius - # if x^2 + y^2 <= 1, point is within unit circle - within_circle = (standardised_dots[:, 0]*standardised_dots[:, 0]) + ( - standardised_dots[:, 1]*standardised_dots[:, 1]) <= 1.0 - return dots[within_circle] -## - - -def outside_circle(dots, radius=1.0): - standardised_dots = (dots - numpy.mean(dots)) / radius - # if x^2 + y^2 <= 1, point is within unit circle - within_circle = (standardised_dots[:, 0]*standardised_dots[:, 0]) + ( - standardised_dots[:, 1]*standardised_dots[:, 1]) > 1.0 - return dots[within_circle] - - -# generate bottleneck layer as festures -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_top_hidden_layer( - test_set_x=test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def add_projection_indices(uttlist, token_xpath, attrib_name, outdir): - # Taken from: ~/proj/dnn_tts/script/add_token_index.py - ''' - For utts in uttlist, add attribute called to all nodes - matching with a corpus-unique integer value > 0. Add default - 0-valued attrib at root node. - ''' - i = 1 - for uttfile in uttlist: - utt = etree.parse(uttfile) - # clear target attribute name from all nodes to be safe: - for node in utt.xpath('//*'): # all nodes - if attrib_name in node.attrib: - del node.attrib[attrib_name] - root_node = utt.getroot() - # 0 is the defualt 'n/a' value -- *some* ancestor of all nodes will have the relevant attibute to fall back on - root_node.attrib[attrib_name] = '0' - for node in utt.xpath(token_xpath): - node.attrib[attrib_name] = str(i) - i += 1 - junk, fname = os.path.split(uttfile) - outfile = os.path.join(outdir, fname) - utt.write(outfile, encoding='utf-8', pretty_print=True) - - -def add_projection_indices_with_replicates(uttlist, token_xpath, attrib_name, outdir, nreplicates): - # Taken from: ~/proj/dnn_tts/script/add_token_index.py - ''' - For utts in uttlist, add attribute called to all nodes - matching with a corpus-unique integer value > 0. Add default - 0-valued attrib at root node. - ''' - assert len(uttlist) == 1 - uttfile = uttlist[0] - - i = 1 - - master_utt = etree.parse(uttfile) - - new_utt_names = [] - - while i < nreplicates + 1: - - utt = copy.copy(master_utt) - - # clear target attribute name from all nodes to be safe: - for node in utt.xpath('//*'): # all nodes - if attrib_name in node.attrib: - del node.attrib[attrib_name] - root_node = utt.getroot() - # 0 is the defualt 'n/a' value -- *some* ancestor of all nodes will have the relevant attibute to fall back on - root_node.attrib[attrib_name] = '0' - assert len(utt.xpath(token_xpath)) == 1 - for node in utt.xpath(token_xpath): - node.attrib[attrib_name] = str(i) - junk, fname = os.path.split(uttfile) - new_utt_name = fname.replace('.utt', '_rep_%s.utt' % (i)) - new_utt_names.append(new_utt_name) - outfile = os.path.join(outdir, new_utt_name) - utt.write(outfile, encoding='utf-8', pretty_print=True) - i += 1 - - return new_utt_names - - -def retrieve_normalisation_values(norm_file): - # TODO -- move reading and writing into MinMaxNormalisation class - - if not os.path.isfile(norm_file): - sys.exit('Normalisation file %s does not exist ' % (norm_file)) - - # reload stored minmax values: - fid = open(norm_file, 'rb') - - # This doesn't work -- precision is lost -- reads in as float64 - # label_norm_info = numpy.fromfile(fid) ## label_norm_info = numpy.array(label_norm_info, 'float32') - - # use struct to enforce float32: - nbytes = os.stat(norm_file)[6] # length in bytes - data = fid.read(nbytes) # = read until bytes run out - fid.close() - m = nbytes / 4 # number of 32 bit floats - format = str(m)+"f" - label_norm_info = struct.unpack(format, data) - label_norm_info = numpy.array(label_norm_info) - - # values can be min + max or mean + std, hence non-descript variable names: - first_vector = label_norm_info[:m/2] - second_vector = label_norm_info[m/2:] - - return (first_vector, second_vector) - - -def main_function(cfg, in_dir, out_dir, token_xpath, index_attrib_name, synth_mode, cmp_dir, projection_end): - # TODO: token_xpath & index_attrib_name should be in config - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layers_sizes = cfg.hyper_params['hidden_layers_sizes'] - - # prepare environment - synth_utts_input = glob.glob(in_dir + '/*.utt') - # synth_utts_input = synth_utts_input[:10] ### temp!!!!! - - if synth_mode == 'single_sentence_demo': - synth_utts_input = synth_utts_input[:1] - print() - print('mode: single_sentence_demo') - print(synth_utts_input) - print() - - # place to put test utts with tokens labelled with projection indices - indexed_utt_dir = os.path.join(out_dir, 'utt') - direcs = [out_dir, indexed_utt_dir] - for direc in direcs: - if not os.path.isdir(direc): - os.mkdir(direc) - - # was below -- see comment - if synth_mode == 'single_sentence_demo': - synth_utts_input = add_projection_indices_with_replicates( - synth_utts_input, token_xpath, index_attrib_name, indexed_utt_dir, 100) - else: - add_projection_indices(synth_utts_input, token_xpath, - index_attrib_name, indexed_utt_dir) - - file_id_list = [] - for fname in synth_utts_input: - junk, name = os.path.split(fname) - file_id_list.append(name.replace('.utt', '')) - - data_dir = cfg.data_dir - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(out_dir, 'gen') - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - if cfg.label_style == 'HTS': - sys.exit('only ossian utts supported') - elif cfg.label_style == 'composed': - suffix = 'composed' - - # the number can be removed - binary_label_dir = os.path.join(out_dir, 'lab_bin') - nn_label_norm_dir = os.path.join(out_dir, 'lab_bin_norm') - - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - - # need this to find normalisation info: - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - min_max_normaliser = None - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.label_style == 'HTS': - sys.exit('script not tested with HTS labels') - - # always do this in synth: - # if cfg.NORMLAB and (cfg.label_style == 'composed'): - logger.info('add projection indices to tokens in test utts') - - # add_projection_indices was here - - logger.info('preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, indexed_utt_dir, cfg.utt_ext, False) - elif label_style == 'hts': - logger.critical('script not tested with HTS labels') - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info('making input label features for %4d of %4d' % - (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # no silence removal for synthesis ... - - # minmax norm: - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99, exclude_columns=[cfg.index_to_project]) - - (min_vector, max_vector) = retrieve_normalisation_values(label_norm_file) - min_max_normaliser.min_vector = min_vector - min_max_normaliser.max_vector = max_vector - - # apply precompuated and stored min-max to the whole dataset - min_max_normaliser.normalise_data( - binary_label_file_list, nn_label_norm_file_list) - - -# DEBUG - if synth_mode == 'inferred': - - # set up paths -- write CMP data to infer from in outdir: - nn_cmp_dir = os.path.join( - out_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_norm_dir = os.path.join( - out_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - in_file_list_dict = {} - for feature_name in list(cfg.in_dir_dict.keys()): - in_direc = os.path.join(cmp_dir, feature_name) - assert os.path.isdir(in_direc), in_direc - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, in_direc, cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = prepare_file_path_list( - file_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # make output acoustic data - # if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = [-0.5, 0.0, 0.5] - acc_win = [1.0, -2.0, 1.0] - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - # skip silence removal for inference -- need to match labels, which are - # not silence removed either - - # retrieve acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data -# if cfg.NORMCMP: - - -# DEBUG - if synth_mode == 'inferred': - - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - - (mean_vector, std_vector) = retrieve_normalisation_values(norm_info_file) - normaliser.mean_vector = mean_vector - normaliser.std_vector = std_vector - - # apply precompuated and stored mean and std to the whole dataset - normaliser.feature_normalisation( - nn_cmp_file_list, nn_cmp_norm_file_list) - - elif cfg.output_feature_normalisation == 'MINMAX': - sys.exit('not implemented') - # min_max_normaliser = MinMaxNormalisation(feature_dimension = cfg.cmp_dim) - # global_mean_vector = min_max_normaliser.compute_mean(nn_cmp_file_list[0:cfg.train_file_number]) - # global_std_vector = min_max_normaliser.compute_std(nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector) - - # min_max_normaliser = MinMaxNormalisation(feature_dimension = cfg.cmp_dim, min_value = 0.01, max_value = 0.99) - # min_max_normaliser.find_min_max_values(nn_cmp_file_list[0:cfg.train_file_number]) - # min_max_normaliser.normalise_data(nn_cmp_file_list, nn_cmp_norm_file_list) - - # cmp_min_vector = min_max_normaliser.min_vector - # cmp_max_vector = min_max_normaliser.max_vector - # cmp_norm_info = numpy.concatenate((cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - combined_model_arch = str(len(hidden_layers_sizes)) - for hid_size in hidden_layers_sizes: - combined_model_arch += '_' + str(hid_size) - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.model' \ - % (model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number) - - # DNN model training -# if cfg.TRAINDNN: always do this in synth - - -# DEBUG - inferred_weights = None # default, for non-inferring synth methods - if synth_mode == 'inferred': - - # infer control values from TESTING data - - # identical lists (our test data) for 'train' and 'valid' -- this is just to - # keep the infer_projections_fn theano function happy -- operates on - # validation set. 'Train' set shouldn't be used here. - train_x_file_list = copy.copy(nn_label_norm_file_list) - train_y_file_list = copy.copy(nn_cmp_norm_file_list) - valid_x_file_list = copy.copy(nn_label_norm_file_list) - valid_y_file_list = copy.copy(nn_cmp_norm_file_list) - - print('FILELIST for inferr:') - print(train_x_file_list) - print() - - try: - inferred_weights = infer_projections(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot) - - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - # if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list(file_id_list, gen_dir, cfg.cmp_ext) - - # print nn_label_norm_file_list ## <-- this WAS mangled in inferred due to copying of file list to trainlist_x etc. which is then shuffled. Now use copy.copy - # print gen_file_list - - weights_outfile = os.path.join(out_dir, 'projection_weights_for_synth.txt') - dnn_generation_PROJECTION(nn_label_norm_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list, cfg=cfg, synth_mode=synth_mode, - projection_end=projection_end, projection_weights_to_use=inferred_weights, save_weights_to_file=weights_outfile) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - # DNNGEN - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % - (cfg.output_feature_normalisation)) - raise - - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration(gen_wav_features=cfg.gen_wav_features) - generator.acoustic_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict, var_file_dict) - - # osw: skip MLPG: -# split_cmp(gen_file_list, ['mgc', 'lf0', 'bap'], cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict) - - # Variance scaling: - scaled_dir = gen_dir + '_scaled' - simple_scale_variance(gen_dir, scaled_dir, var_file_dict, cfg.out_dimension_dict, - file_id_list, gv_weight=0.5) # gv_weight hardcoded - - # generate wav ---- glottHMM only!!! - # if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - generate_wav_glottHMM(scaled_dir, file_id_list) # generated speech - - -def simple_scale_variance(indir, outdir, var_file_dict, out_dimension_dict, file_id_list, gv_weight=1.0): - # simple variance scaling (silen et al. 2012, paragraph 3.1) - # TODO: Lots of things like stream names hardcoded here; 3 for delta + delta-delta; ... - all_streams = ['cmp', 'HNR', 'F0', 'LSF', 'Gain', 'LSFsource'] - streams_to_scale = ['LSF'] - - static_variances = {} - - static_dimension_dict = {} - for (feature_name, size) in list(out_dimension_dict.items()): - static_dimension_dict[feature_name] = size/3 - - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - static_var_values = var_values[:static_dimension_dict[feature_name], :] - static_variances[feature_name] = static_var_values - - if not os.path.isdir(outdir): - os.makedirs(outdir) - - assert gv_weight <= 1.0 and gv_weight >= 0.0 - local_weight = 1.0 - gv_weight - - for uttname in file_id_list: - for stream in all_streams: - infile = os.path.join(indir, uttname + '.' + stream) - outfile = os.path.join(outdir, uttname + '.' + stream) - if not os.path.isfile(infile): - sys.exit(infile + ' does not exist') - if stream in streams_to_scale: - speech, dimension = io_funcs.load_binary_file_frame( - infile, static_dimension_dict[stream]) - utt_mean = numpy.mean(speech, axis=0) - utt_std = numpy.std(speech, axis=0) - - global_std = numpy.transpose((static_variances[stream])) - weighted_global_std = ( - gv_weight * global_std) + (local_weight * utt_std) - std_ratio = weighted_global_std / utt_std - - nframes, ndim = numpy.shape(speech) - utt_mean_matrix = numpy.tile(utt_mean, (nframes, 1)) - std_ratio_matrix = numpy.tile(std_ratio, (nframes, 1)) - - scaled_speech = ((speech - utt_mean_matrix) * - std_ratio_matrix) + utt_mean_matrix - io_funcs.array_to_binary_file(scaled_speech, outfile) - - else: - os.system('cp %s %s' % (infile, outfile)) - - -def log_to_hertz(infile, outfile): - f = open(infile, 'r') - log_values = [float(val) for val in f.readlines()] - f.close() - - def m2h(l): - h = math.exp(l) - return h - - hertz = [m2h(l) for l in log_values] - f = open(outfile, 'w') - for val in hertz: - if val > 0: - f.write(str(val) + '\n') - else: - f.write('0.0\n') - f.close() - - -def generate_wav_glottHMM(gen_dir, gen_file_id_list): - - x2x = '~/repos/simple4all/CSTRVoiceClone/trunk/bin/x2x' - synthesis = '~/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/tools/GlottHMM/Synthesis' - general_glott_conf = '~/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/voices/en/ky_02_toy/english_gold_basic_glott_KY/processors/speech_feature_extractor/main_config.cfg' - user_glott_conf = '~/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/voices/en/ky_02_toy/english_gold_basic_glott_KY/processors/speech_feature_extractor/user_config.cfg' - - exports = 'export LIBCONFIG_INSTALL_DIR=/afs/inf.ed.ac.uk/user/o/owatts/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/tools/GlottHMM//libconfig-1.4.9 ; export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBCONFIG_INSTALL_DIR/lib/.libs ; export LIBRARY_PATH=$LIBRARY_PATH:$LIBCONFIG_INSTALL_DIR/lib/.libs ; export CPATH=$CPATH:$LIBCONFIG_INSTALL_DIR/lib ;' - - streams = ['cmp', 'HNR', 'F0', 'LSF', 'Gain', 'LSFsource'] - for uttname in gen_file_id_list: - all_present = True - for stream in streams: - if not os.path.isfile(os.path.join(gen_dir, uttname + '.' + stream)): - all_present = False - if all_present: - for stream in streams: - extra = '' - if stream == 'F0': - extra = '.NEGVALS' - fname = os.path.join(gen_dir, uttname + '.' + stream) - fname_txt = os.path.join( - gen_dir, uttname + '.txt.' + stream + extra) - comm = '%s +fa %s > %s' % (x2x, fname, fname_txt) - os.system(comm) - log_to_hertz(os.path.join(gen_dir, uttname + '.txt.F0.NEGVALS'), - os.path.join(gen_dir, uttname + '.txt.F0')) - - stem_name = os.path.join(gen_dir, uttname + '.txt') - comm = '%s %s %s %s %s' % ( - exports, synthesis, stem_name, general_glott_conf, user_glott_conf) - print(comm) - os.system(comm) - - else: - print('missing stream(s) for utterance ' + uttname) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) not in [8, 9]: - print(sys.argv) - sys.exit('usage: run_dnn.sh config_file_name utt_dir') - - config_file = sys.argv[1] - in_dir = sys.argv[2] - out_dir = sys.argv[3] - token_xpath = sys.argv[4] - index_attrib_name = sys.argv[5] - synth_mode = sys.argv[6] - projection_end = int(sys.argv[7]) - - assert synth_mode in ['constant', 'sampled_training', 'inferred', 'uniform', 'single_sentence_demo', - 'uniform_sampled_within_std_1', 'uniform_sampled_within_std_2', 'uniform_sampled_within_std_3'] - - cmp_dir = None - if synth_mode == 'inferred': - cmp_dir = sys.argv[8] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - -# if cfg.profile: -# logger.info('profiling is activated') -# import cProfile, pstats -# cProfile.run('main_function(cfg)', 'mainstats') - -# # create a stream for the profiler to write to -# profiling_output = StringIO.StringIO() -# p = pstats.Stats('mainstats', stream=profiling_output) - -# # print stats to that stream -# # here we just report the top 10 functions, sorted by total amount of time spent in each -# p.strip_dirs().sort_stats('tottime').print_stats(10) - -# # print the result to the log -# logger.info('---Profiling result follows---\n%s' % profiling_output.getvalue() ) -# profiling_output.close() -# logger.info('---End of profiling result---') -# -# else: - main_function(cfg, in_dir, out_dir, token_xpath, - index_attrib_name, synth_mode, cmp_dir, projection_end) - - sys.exit(0) - -import pickle -import gzip -import os -import sys -import errno -import time -import math -import glob -import struct - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -#from frontend.acoustic_normalisation import CMPNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -#from frontend.feature_normalisation_base import FeatureNormBase -from frontend.mean_variance_norm import MeanVarianceNorm -##from frontend.mlpg_fast import MLParameterGenerationFast -from frontend.mlpg import MLParameterGeneration as MLParameterGenerationFast # osw temp - -from io_funcs.binary_io import BinaryIOCollection - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer - -import configuration - -from models.dnn import DNN -from models.ms_dnn import MultiStreamDNN -from models.ms_dnn_gv import MultiStreamDNNGv -from models.sdae import StackedDenoiseAutoEncoder -from models.mdn import MixtureDensityNetwork - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) / 2 # including input and output - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i*2].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -# plain DNN case -# def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): -# logger = logging.getLogger("dnn_generation") -# logger.debug('Starting dnn_generation') -# -# plotlogger = logging.getLogger("plotting") -# -# dnn_model = cPickle.load(open(nnets_file_name, 'rb')) -# -# # visualize_dnn(dbn) -# -# file_number = len(valid_file_list) -# -# for i in xrange(file_number): -# logger.info('generating %4d of %4d: %s' % (i+1,file_number,valid_file_list[i]) ) -# fid_lab = open(valid_file_list[i], 'rb') -# features = numpy.fromfile(fid_lab, dtype=numpy.float32) -# fid_lab.close() -# features = features[:(n_ins * (features.size / n_ins))] -# features = features.reshape((-1, n_ins)) -# temp_set_x = features.tolist() -# test_set_x = theano.shared(numpy.asarray(temp_set_x, dtype=theano.config.floatX)) -# -# predicted_parameter = dnn_model.parameter_prediction(test_set_x=test_set_x) -# # predicted_parameter = test_out() -# -# ### write to cmp file -# predicted_parameter = numpy.array(predicted_parameter, 'float32') -# temp_parameter = predicted_parameter -# fid = open(out_file_list[i], 'wb') -# predicted_parameter.tofile(fid) -# logger.debug('saved to %s' % out_file_list[i]) -# fid.close() -# - -# multiple Gaussian components -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list, target_mean_vector, target_std_vector, out_dimension_dict, file_extension_dict, vocoder='straight'): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - inf_float = -1.0e+10 - - plotlogger = logging.getLogger("plotting") - - cfg.gen_wav_features - - if vocoder == 'straight': - gen_wav_features = ['mgc', 'lf0', 'bap'] - elif vocoder == 'glotthmm': - gen_wav_features = ['F0', 'Gain', 'HNR', 'LSF', - 'LSFsource'] # TODO: take this from config - else: - sys.exit('unsupported vocoder %s !' % (vocoder)) - - stream_start_index = {} - dimension_index = 0 - for feature_name in list(out_dimension_dict.keys()): - stream_start_index[feature_name] = dimension_index - dimension_index += out_dimension_dict[feature_name] - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - io_funcs = BinaryIOCollection() - - mlpg = MLParameterGenerationFast() - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - - frame_number = features.shape[0] - - test_set_x = theano.shared(numpy.asarray( - features, dtype=theano.config.floatX)) - - mean_matrix = numpy.tile(target_mean_vector, (features.shape[0], 1)) - std_matrix = numpy.tile(target_std_vector, (features.shape[0], 1)) - - predicted_mix = dnn_model.parameter_prediction_mix( - test_set_x=test_set_x) - max_index = numpy.argmax(predicted_mix, axis=1) - - temp_predicted_mu = dnn_model.parameter_prediction( - test_set_x=test_set_x) - temp_predicted_sigma = dnn_model.parameter_prediction_sigma( - test_set_x=test_set_x) - predicted_mu = numpy.zeros((temp_predicted_mu.shape[0], n_outs)) - predicted_sigma = numpy.zeros((temp_predicted_sigma.shape[0], n_outs)) - for kk in range(temp_predicted_mu.shape[0]): - predicted_mu[kk, :] = temp_predicted_mu[kk, - max_index[kk]*n_outs:(max_index[kk]+1)*n_outs] - predicted_sigma[kk, :] = temp_predicted_sigma[kk, - max_index[kk]*n_outs:(max_index[kk]+1)*n_outs] -# print predicted_mu.shape -# predicted_mu = predicted_mu[aa*n_outs:(aa+1)*n_outs] - predicted_mu = predicted_mu * std_matrix + mean_matrix - predicted_sigma = ((predicted_sigma ** 0.5) * std_matrix) ** 2 - - dir_name = os.path.dirname(out_file_list[i]) - file_id = os.path.splitext(os.path.basename(out_file_list[i]))[0] - - mlpg = MLParameterGenerationFast() - for feature_name in gen_wav_features: - current_features = predicted_mu[:, stream_start_index[feature_name] - :stream_start_index[feature_name]+out_dimension_dict[feature_name]] - current_sigma = predicted_sigma[:, stream_start_index[feature_name]:stream_start_index[feature_name]+out_dimension_dict[feature_name]] - - gen_features = mlpg.generation( - current_features, current_sigma, out_dimension_dict[feature_name]/3) - - if feature_name in ['lf0', 'F0']: - if 'vuv' in stream_start_index: - vuv_feature = predicted_mu[:, stream_start_index['vuv'] - :stream_start_index['vuv']+1] - for i in range(frame_number): - if vuv_feature[i, 0] < 0.5: - gen_features[i, 0] = inf_float -# print gen_features - new_file_name = os.path.join( - dir_name, file_id + file_extension_dict[feature_name]) - - io_funcs.array_to_binary_file(gen_features, new_file_name) - - -# generate bottleneck layer as festures -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_top_hidden_layer( - test_set_x=test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg, in_dir, out_dir): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layers_sizes = cfg.hyper_params['hidden_layers_sizes'] - - synth_utts = glob.glob(in_dir + '/*.utt') - - file_id_list = [] - for fname in synth_utts: - junk, name = os.path.split(fname) - file_id_list.append(name.replace('.utt', '')) - - if not os.path.isdir(out_dir): - os.mkdir(out_dir) - - # total file number including training, development, and testing - #total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - #nn_cmp_dir = os.path.join(data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - #nn_cmp_norm_dir = os.path.join(data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(out_dir, 'gen') - - #in_file_list_dict = {} - - # for feature_name in cfg.in_dir_dict.keys(): - # in_file_list_dict[feature_name] = prepare_file_path_list(file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - #nn_cmp_file_list = prepare_file_path_list(file_id_list, nn_cmp_dir, cfg.cmp_ext) - #nn_cmp_norm_file_list = prepare_file_path_list(file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - # the number can be removed - binary_label_dir = os.path.join(out_dir, 'lab_bin') - nn_label_norm_dir = os.path.join(out_dir, 'lab_bin_norm') - - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - - # need this to find normalisation info: - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - min_max_normaliser = None - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.label_style == 'HTS': - sys.exit('script not tested with HTS labels') - # simple HTS labels - # logger.info('preparing label data (input) using standard HTS style labels') - # label_normaliser.perform_normalisation(in_label_align_file_list, binary_label_file_list) - - # remover = SilenceRemover(n_cmp = lab_dim, silence_pattern = ['*-#+*']) - # remover.remove_silence(binary_label_file_list, in_label_align_file_list, nn_label_file_list) - - # min_max_normaliser = MinMaxNormalisation(feature_dimension = lab_dim, min_value = 0.01, max_value = 0.99) - # ###use only training data to find min-max information, then apply on the whole dataset - # min_max_normaliser.find_min_max_values(nn_label_file_list[0:cfg.train_file_number]) - # min_max_normaliser.normalise_data(nn_label_file_list, nn_label_norm_file_list) - - logger.info('preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, in_dir, cfg.utt_ext, False) - elif label_style == 'hts': - logger.critical('script not tested with HTS labels') - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info('making input label features for %4d of %4d' % - (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # no silence removal for synthesis ... - - # minmax norm: - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - - # reload stored minmax values: (TODO -- move reading and writing into MinMaxNormalisation class) - fid = open(label_norm_file, 'rb') - - # This doesn't work -- precision is lost -- reads in as float64 - # label_norm_info = numpy.fromfile(fid) ## label_norm_info = numpy.array(label_norm_info, 'float32') - - # use struct to enforce float32: - nbytes = os.stat(label_norm_file)[6] # length in bytes - data = fid.read(nbytes) # = read until bytes run out - fid.close() - m = nbytes / 4 # number 32 bit floats - format = str(m)+"f" - label_norm_info = struct.unpack(format, data) - label_norm_info = numpy.array(label_norm_info) - - min_max_normaliser.min_vector = label_norm_info[:m/2] - min_max_normaliser.max_vector = label_norm_info[m/2:] - - # apply precompuated min-max to the whole dataset - min_max_normaliser.normalise_data( - binary_label_file_list, nn_label_norm_file_list) - - # make output acoustic data -# if cfg.MAKECMP: - - # retrieve acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data -# if cfg.NORMCMP: - - combined_model_arch = str(len(hidden_layers_sizes)) - for hid_size in hidden_layers_sizes: - combined_model_arch += '_' + str(hid_size) - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.mdn.model' \ - % (model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number) - - # DNN model training -# if cfg.TRAINDNN: - - # if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list(file_id_list, gen_dir, cfg.cmp_ext) - - assert cfg.output_feature_normalisation == 'MVN' - - #gen_file_list = prepare_file_path_list(gen_file_id_list, gen_dir, cfg.cmp_ext) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - target_mean_vector = cmp_min_max[0, ] - target_std_vector = cmp_min_max[1, ] - -# dnn_generation(valid_x_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list) -# dnn_generation(test_x_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list) - dnn_generation(nn_label_norm_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list, - target_mean_vector, target_std_vector, cfg.out_dimension_dict, cfg.file_extension_dict, vocoder='glotthmm') - - # Variance scaling: - test_var_scaling = False - scaled_dir = gen_dir + '_scaled' - if test_var_scaling: - file_id_list = simple_scale_variance_CONTINUUM( - gen_dir, scaled_dir, var_file_dict, cfg.out_dimension_dict, file_id_list) - else: - simple_scale_variance(gen_dir, scaled_dir, var_file_dict, cfg.out_dimension_dict, - file_id_list, gv_weight=0.5) # gv_weight hard coded here! - - # generate wav ---- glottHMM only!!! - # if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - generate_wav_glottHMM(scaled_dir, file_id_list) # generated speech - - -def simple_scale_variance(indir, outdir, var_file_dict, out_dimension_dict, file_id_list, gv_weight=1.0): - # simple variance scaling (silen et al. 2012, paragraph 3.1) - # TODO: Lots of things like stream names hardcoded here; 3 for delta + delta-delta; ... - all_streams = ['HNR', 'F0', 'LSF', 'Gain', 'LSFsource'] - streams_to_scale = ['LSF'] - - static_variances = {} - - static_dimension_dict = {} - for (feature_name, size) in list(out_dimension_dict.items()): - static_dimension_dict[feature_name] = size/3 - - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - static_var_values = var_values[:static_dimension_dict[feature_name], :] - static_variances[feature_name] = static_var_values - - if not os.path.isdir(outdir): - os.makedirs(outdir) - - assert gv_weight <= 1.0 and gv_weight >= 0.0 - local_weight = 1.0 - gv_weight - - for uttname in file_id_list: - for stream in all_streams: - infile = os.path.join(indir, uttname + '.' + stream) - outfile = os.path.join(outdir, uttname + '.' + stream) - if not os.path.isfile(infile): - sys.exit(infile + ' does not exist') - if stream in streams_to_scale: - speech, dimension = io_funcs.load_binary_file_frame( - infile, static_dimension_dict[stream]) - utt_mean = numpy.mean(speech, axis=0) - utt_std = numpy.std(speech, axis=0) - - global_std = numpy.transpose((static_variances[stream])) - weighted_global_std = ( - gv_weight * global_std) + (local_weight * utt_std) - std_ratio = weighted_global_std / utt_std - - nframes, ndim = numpy.shape(speech) - utt_mean_matrix = numpy.tile(utt_mean, (nframes, 1)) - std_ratio_matrix = numpy.tile(std_ratio, (nframes, 1)) - - scaled_speech = ((speech - utt_mean_matrix) * - std_ratio_matrix) + utt_mean_matrix - io_funcs.array_to_binary_file(scaled_speech, outfile) - - else: - os.system('cp %s %s' % (infile, outfile)) - - -def simple_scale_variance_CONTINUUM(indir, outdir, var_file_dict, out_dimension_dict, file_id_list): - # Try range of interpolation weights for combining global & local variance - all_streams = ['cmp', 'HNR', 'F0', 'LSF', 'Gain', 'LSFsource'] - streams_to_scale = ['LSF'] - - static_variances = {} - - static_dimension_dict = {} - for (feature_name, size) in list(out_dimension_dict.items()): - static_dimension_dict[feature_name] = size/3 - - io_funcs = BinaryIOCollection() - for feature_name in list(var_file_dict.keys()): - var_values, dimension = io_funcs.load_binary_file_frame( - var_file_dict[feature_name], 1) - static_var_values = var_values[:static_dimension_dict[feature_name], :] - static_variances[feature_name] = static_var_values - - if not os.path.isdir(outdir): - os.makedirs(outdir) - - file_id_list_out = [] - for uttname in file_id_list: - for gv_weight in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]: - local_weight = 1.0 - gv_weight - for stream in all_streams: - infile = os.path.join(indir, uttname + '.' + stream) - extended_uttname = uttname + '_gv' + str(gv_weight) - print(extended_uttname) - outfile = os.path.join(outdir, extended_uttname + '.' + stream) - if not os.path.isfile(infile): - sys.exit(infile + ' does not exist') - if stream in streams_to_scale: - speech, dimension = io_funcs.load_binary_file_frame( - infile, static_dimension_dict[stream]) - utt_mean = numpy.mean(speech, axis=0) - utt_std = numpy.std(speech, axis=0) - - global_std = numpy.transpose((static_variances[stream])) - - weighted_global_std = ( - gv_weight * global_std) + (local_weight * utt_std) - - std_ratio = weighted_global_std / utt_std - - nframes, ndim = numpy.shape(speech) - utt_mean_matrix = numpy.tile(utt_mean, (nframes, 1)) - std_ratio_matrix = numpy.tile(std_ratio, (nframes, 1)) - - scaled_speech = ((speech - utt_mean_matrix) - * std_ratio_matrix) + utt_mean_matrix - io_funcs.array_to_binary_file(scaled_speech, outfile) - - else: - os.system('cp %s %s' % (infile, outfile)) - file_id_list_out.append(extended_uttname) - return file_id_list_out - - -def log_to_hertz(infile, outfile): - f = open(infile, 'r') - log_values = [float(val) for val in f.readlines()] - f.close() - - def m2h(l): - h = math.exp(l) - return h - - hertz = [m2h(l) for l in log_values] - f = open(outfile, 'w') - for val in hertz: - if val > 0: - f.write(str(val) + '\n') - else: - f.write('0.0\n') - f.close() - - -def generate_wav_glottHMM(gen_dir, gen_file_id_list): - - x2x = '~/repos/simple4all/CSTRVoiceClone/trunk/bin/x2x' - synthesis = '~/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/tools/GlottHMM/Synthesis' - general_glott_conf = '~/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/voices/en/ky_02_toy/english_gold_basic_glott_KY/processors/speech_feature_extractor/main_config.cfg' - user_glott_conf = '~/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/voices/en/ky_02_toy/english_gold_basic_glott_KY/processors/speech_feature_extractor/user_config.cfg' - - exports = 'export LIBCONFIG_INSTALL_DIR=/afs/inf.ed.ac.uk/user/o/owatts/sim2/oliver/nst_repos/OSSIAN/ossian-v.1.3/tools/GlottHMM//libconfig-1.4.9 ; export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:$LIBCONFIG_INSTALL_DIR/lib/.libs ; export LIBRARY_PATH=$LIBRARY_PATH:$LIBCONFIG_INSTALL_DIR/lib/.libs ; export CPATH=$CPATH:$LIBCONFIG_INSTALL_DIR/lib ;' - - streams = ['HNR', 'F0', 'LSF', 'Gain', 'LSFsource'] - for uttname in gen_file_id_list: - all_present = True - for stream in streams: - if not os.path.isfile(os.path.join(gen_dir, uttname + '.' + stream)): - all_present = False - if all_present: - for stream in streams: - extra = '' - if stream == 'F0': - extra = '.NEGVALS' - fname = os.path.join(gen_dir, uttname + '.' + stream) - fname_txt = os.path.join( - gen_dir, uttname + '.txt.' + stream + extra) - comm = '%s +fa %s > %s' % (x2x, fname, fname_txt) - os.system(comm) - log_to_hertz(os.path.join(gen_dir, uttname + '.txt.F0.NEGVALS'), - os.path.join(gen_dir, uttname + '.txt.F0')) - - stem_name = os.path.join(gen_dir, uttname + '.txt') - comm = '%s %s %s %s %s' % ( - exports, synthesis, stem_name, general_glott_conf, user_glott_conf) - print(comm) - os.system(comm) - - else: - print('missing stream(s) for utterance ' + uttname) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 4: - logger.critical('usage: run_dnn.sh config_file_name utt_dir') - sys.exit(1) - - config_file = sys.argv[1] - in_dir = sys.argv[2] - out_dir = sys.argv[3] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - -# if cfg.profile: -# logger.info('profiling is activated') -# import cProfile, pstats -# cProfile.run('main_function(cfg)', 'mainstats') - -# # create a stream for the profiler to write to -# profiling_output = StringIO.StringIO() -# p = pstats.Stats('mainstats', stream=profiling_output) - -# # print stats to that stream -# # here we just report the top 10 functions, sorted by total amount of time spent in each -# p.strip_dirs().sort_stats('tottime').print_stats(10) - -# # print the result to the log -# logger.info('---Profiling result follows---\n%s' % profiling_output.getvalue() ) -# profiling_output.close() -# logger.info('---End of profiling result---') -# -# else: - main_function(cfg, in_dir, out_dir) - - sys.exit(0) - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -#from frontend.acoustic_normalisation import CMPNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -#from frontend.feature_normalisation_base import FeatureNormBase -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer - -import configuration - -from models.dnn import DNN -from models.ms_dnn import MultiStreamDNN -from models.ms_dnn_gv import MultiStreamDNNGv -from models.sdae import StackedDenoiseAutoEncoder - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) / 2 # including input and output - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i*2].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - private_l2_reg = float(hyper_params['private_l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - use_rprop = int(hyper_params['use_rprop']) - - use_rprop = int(hyper_params['use_rprop']) - - hidden_layers_sizes = hyper_params['hidden_layers_sizes'] - - stream_weights = hyper_params['stream_weights'] - private_hidden_sizes = hyper_params['private_hidden_sizes'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - stream_lr_weights = hyper_params['stream_lr_weights'] - use_private_hidden = hyper_params['use_private_hidden'] - - model_type = hyper_params['model_type'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProvider(x_file_list=train_x_file_list, y_file_list=train_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=True) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProvider(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, - n_ins=n_ins, n_outs=n_outs, buffer_size=buffer_size, shuffle=False) - - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - train_set_x, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_next_partition() - valid_set_x, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - -# frames_per_hour = 720000.0 -# tframes = train_set_x.get_value().shape[0] -# vframes = valid_set_x.get_value().shape[0] -# print 'Training frames: %s (%s hours)'%(tframes, tframes / frames_per_hour) -# print 'Validation frames: %s (%s hours)'%(tframes, tframes / frames_per_hour) -# sys.exit('999') - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining - pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - hidden_activation=hidden_activation, - output_activation=output_activation, - use_rprop=use_rprop, rprop_init_update=finetune_lr) - - train_fn, valid_fn, valid_score_i = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size, return_valid_score_i=True) - - elif model_type == 'SDAE': - # basic model is ready. - # if corruption levels is set to zero. it becomes normal autoencoder - dnn_model = StackedDenoiseAutoEncoder(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes) - - if do_pretraining: - pretraining_fn = dnn_model.pretraining_functions( - pretrain_set_x, batch_size) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - # model is ready, but the hyper-parameters are not optimised. - elif model_type == 'MSDNN': - dnn_model = MultiStreamDNN(numpy_rng=numpy_rng, n_ins=n_ins, ms_outs=ms_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - stream_weights=stream_weights, - hidden_activation=hidden_activation, - output_activation=output_activation) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), - batch_size=batch_size, lr_weights=stream_lr_weights) - elif model_type == 'MSDNN_GV': # not fully ready - dnn_model = MultiStreamDNNGv(numpy_rng=numpy_rng, n_ins=n_ins, ms_outs=ms_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - stream_weights=stream_weights, - hidden_activation=hidden_activation, - output_activation=output_activation) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), - batch_size=batch_size, lr_weights=stream_lr_weights) - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - # if pretraining is supported in one model, add the switch here - # be careful to use autoencoder for pretraining here: - # for SDAE, currently only sigmoid function is supported in the hidden layers, as our input is scaled to [0, 1] - # however, tanh works better and converge fast in finetuning - ## - # Will extend this soon... - if do_pretraining and model_type == 'SDAE': - logger.info('pretraining the %s model' % (model_type)) - - corruption_level = 0.0 - # in SDAE we do layer-wise pretraining using autoencoders - for i in range(dnn_model.n_layers): - for epoch in range(pretraining_epochs): - sub_start_time = time.clock() - - pretrain_loss = [] - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - pretrain_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - - n_train_batches = pretrain_set_x.get_value( - ).shape[0] / batch_size - - for batch_index in range(n_train_batches): - pretrain_loss.append(pretraining_fn[i](index=batch_index, - corruption=corruption_level, - learning_rate=pretraining_lr)) - - sub_end_time = time.clock() - logger.info('Pre-training layer %i, epoch %d, cost %s, time spent%.2f' % - (i+1, epoch+1, numpy.mean(pretrain_loss), (sub_end_time - sub_start_time))) - train_data_reader.reset() - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.clock() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - previous_finetune_lr = finetune_lr - - hours_seen = 0 - seen_frames = 0 - train_error = [] - sub_start_time = time.clock() - - # ============================================================================= - # The original script (run_dnn.py) has a training routine that looks like this: - # - # foreach epoch: - # foreach partition: - # foreach minibatch: - # train_model - # validate_performance_and_stop_if_converged - # - # The current script's rountine looks like this: - # - # foreach epoch: - # foreach partition: - # foreach minibatch: - # train_model - # if we've seen another hour of data: - # validate_performance_and_stop_if_converged - # - # In order to jump out of these multiple loops when converged, we'll use this variable: - # - - break_main_loop = False - - while (epoch < training_epochs): - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - train_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - train_set_y.set_value(numpy.asarray( - temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - n_train_batches = train_set_x.get_value().shape[0] / batch_size - - logger.debug('this partition: %d frames (divided into %d batches of size %d)' % ( - train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size)) - - for minibatch_index in range(n_train_batches): - this_train_error = train_fn( - minibatch_index, current_finetune_lr, current_momentum) - train_error.append(this_train_error) - - if numpy.isnan(this_train_error): - logger.warning('training error over minibatch %d of %d was %s' % ( - minibatch_index+1, n_train_batches, this_train_error)) - - seen_frames += batch_size - - if seen_frames >= 720000: # Hardcoded checking intervals and framerate: 720000 frames per hour at 5ms frame rate - - hours_seen += 1 - logger.debug( - 'seen %s hour(s) of data -- calculating validation loss' % (hours_seen)) - - # calculation validation error in 1 big batch can fail for big data -- - # use minibatches - - #validation_losses = valid_fn() - #this_validation_loss = numpy.mean(validation_losses) - - valid_error = [] - valid_data_reader.reset() - while (not valid_data_reader.is_finish()): - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_y = valid_data_reader.load_next_partition() - valid_set_x.set_value(numpy.asarray( - temp_valid_set_x, dtype=theano.config.floatX), borrow=True) - valid_set_y.set_value(numpy.asarray( - temp_valid_set_y, dtype=theano.config.floatX), borrow=True) - n_valid_batches = valid_set_x.get_value( - ).shape[0] / batch_size - for minibatch_index in range(n_valid_batches): - v_loss = valid_score_i(minibatch_index) - valid_error.append(v_loss) - # print ' validation for batch %s (%s frames): %s'%(minibatch_index, batch_size, v_loss) - this_validation_loss = numpy.mean(valid_error) - this_validation_loss_std = numpy.std(valid_error) - print('Mean validation loss: %s, std over minibatches: %s' % ( - this_validation_loss, this_validation_loss_std)) - - # this has a possible bias if the minibatches were not all of identical size - # but it should not be siginficant if minibatches are small - this_train_valid_loss = numpy.mean(train_error) - - # It might also be interesting to look at how consistent performance is across minibatches: - this_train_valid_loss_std = numpy.std(train_error) - - sub_end_time = time.clock() - - loss_difference = this_validation_loss - previous_loss - - logger.info('epoch %i, validation error %f (std: %f), train error %f (std: %f) time spent %.2f' % ( - epoch, this_validation_loss, this_validation_loss_std, this_train_valid_loss, this_train_valid_loss_std, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (hours_seen, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (hours_seen, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='hours of data seen', ylabel='error') - - if this_validation_loss < best_validation_loss: - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss - logger.debug( - 'validation loss decreased, so saving model') - early_stop = 0 - else: - logger.debug('validation loss did not improve') - dbn = best_dnn_model - early_stop += 1 - - if early_stop > early_stop_epoch: - # too many consecutive checks without surpassing the best model - logger.debug('stopping early') - break_main_loop = True - break - - if math.isnan(this_validation_loss): - break_main_loop = True - break - - previous_loss = this_validation_loss - - sub_start_time = time.clock() - seen_frames = 0 - - train_error = [] - - if break_main_loop: - break - - if break_main_loop: - break - - train_data_reader.reset() - - end_time = time.clock() - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - -# visualize_dnn(dbn) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.parameter_prediction( - test_set_x=test_set_x) -# predicted_parameter = test_out() - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - -# generate bottleneck layer as festures - - -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_top_hidden_layer( - test_set_x=test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layers_sizes = cfg.hyper_params['hidden_layers_sizes'] - - # prepare environment - - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - nn_cmp_dir = os.path.join( - data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_norm_dir = os.path.join( - data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(cfg.work_dir, 'gen') - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = prepare_file_path_list( - file_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - # the number can be removed - binary_label_dir = os.path.join(label_data_dir, 'binary_label_'+suffix) - nn_label_dir = os.path.join(label_data_dir, 'nn_no_silence_lab_'+suffix) - nn_label_norm_dir = os.path.join( - label_data_dir, 'nn_no_silence_lab_norm_'+suffix) -# nn_label_norm_mvn_dir = os.path.join(data_dir, 'nn_no_silence_lab_norm_'+suffix) - - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - file_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - - # to do - sanity check the label dimension here? - - min_max_normaliser = None - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.NORMLAB and (cfg.label_style == 'HTS'): - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list) - - remover = SilenceRemover(n_cmp=lab_dim, silence_pattern=['*-#+*']) - remover.remove_silence(binary_label_file_list, - in_label_align_file_list, nn_label_file_list) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if cfg.NORMLAB and (cfg.label_style == 'composed'): - # new flexible label preprocessor - - logger.info( - 'preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, cfg.xpath_label_align_dir, cfg.utt_ext, False) - elif label_style == 'hts': - in_label_align_file_list['hts'] = prepare_file_path_list( - file_id_list, cfg.hts_label_align_dir, cfg.lab_ext, False) - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info( - 'making input label features for %4d of %4d' % (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % - required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # silence removal - if cfg.remove_silence_using_binary_labels: - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from label using silence feature: %s' % ( - label_composer.configuration.labels[silence_feature])) - logger.info('Silence will be removed from CMP files in same way') - # Binary labels have 2 roles: both the thing trimmed and the instructions for trimming: - trim_silence(binary_label_file_list, nn_label_file_list, lab_dim, - binary_label_file_list, lab_dim, silence_feature, percent_to_keep=5) - else: - logger.info('No silence removal done') - # start from the labels we have just produced, not trimmed versions - nn_label_file_list = binary_label_file_list - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if min_max_normaliser != None: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = [-0.5, 0.0, 0.5] - acc_win = [1.0, -2.0, 1.0] - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature, percent_to_keep=5) - - else: # back off to previous method using HTS labels: - remover = SilenceRemover( - n_cmp=cfg.cmp_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - nn_cmp_file_list, in_label_align_file_list, nn_cmp_file_list) # save to itself - - # save acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - if not os.path.exists(var_dir): - os.makedirs(var_dir) - - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - - normaliser.feature_normalisation( - nn_cmp_file_list, nn_cmp_norm_file_list) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim) - global_mean_vector = min_max_normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number]) - global_std_vector = min_max_normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - min_max_normaliser.find_min_max_values( - nn_cmp_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - # logger.debug(' value was\n%s' % cmp_norm_info) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_std_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - # logger.debug(' value was\n%s' % feature_std_vector) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number] - train_y_file_list = nn_cmp_norm_file_list[0:cfg.train_file_number] - valid_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - valid_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - # currently, there are two ways to do this - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension - - elif cfg.label_style == 'composed': - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layers_sizes)) - for hid_size in hidden_layers_sizes: - combined_model_arch += '_' + str(hid_size) - -# nnets_file_name = '%s/%s_%s_%d.%d.%d.%d.%d.train.%d.model' \ -# %(model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), -# len(hidden_layers_sizes), hidden_layers_sizes[0], -# lab_dim, cfg.cmp_dim, cfg.train_file_number) - - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.model' \ - % (model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number) - - # DNN model training - if cfg.TRAINDNN: - - logger.info('training DNN') - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - # print 'start DNN' - train_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot) - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - # generate parameters from DNN - temp_dir_name = '%s_%s_%d_%d_%d_%d_%d_%d' \ - % (cfg.model_type, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layers_sizes), hidden_layers_sizes[0]) - gen_dir = os.path.join(gen_dir, temp_dir_name) - - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - gen_file_list = prepare_file_path_list( - gen_file_id_list, gen_dir, cfg.cmp_ext) - -# dnn_generation(valid_x_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, gen_file_list) - dnn_generation(test_x_file_list, nnets_file_name, - lab_dim, cfg.cmp_dim, gen_file_list) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration(gen_wav_features=cfg.gen_wav_features) - generator.acoustic_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict, var_file_dict) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - generate_wav(gen_dir, gen_file_id_list, cfg) # generated speech -# generate_wav(nn_cmp_dir, gen_file_id_list) # reference copy synthesis speech - - # evaluation: calculate distortion - if cfg.CALMCD: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - valid_bap_mse = valid_bap_mse / 10.0 - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - valid_f0_mse, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_vuv_error*100.)) - - # this can be removed - # - if 0: # to calculate distortion of HMM baseline - hmm_gen_no_silence_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nick_hmm_pf_2400_no_silence' - hmm_gen_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nick_hmm_pf_2400' - - if 1: - hmm_mgc_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.mgc_ext) - hmm_bap_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.bap_ext) - hmm_lf0_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.lf0_ext) - - hmm_mgc_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.mgc_ext) - hmm_bap_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.bap_ext) - hmm_lf0_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_mgc_list, in_gen_label_align_file_list, hmm_mgc_no_silence_list) - - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_bap_list, in_gen_label_align_file_list, hmm_bap_no_silence_list) - - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_lf0_list, in_gen_label_align_file_list, hmm_lf0_no_silence_list) - - calculator = IndividualDistortionComp() - - spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.mgc_ext, cfg.mgc_dim) - bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.bap_ext, cfg.bap_dim) - f0_mse, vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.lf0_ext, cfg.lf0_dim) - - spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - bap_mse = bap_mse / 10.0 - - logger.info('Develop: HMM -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' % - (spectral_distortion, bap_mse, f0_mse, vuv_error*100.)) - - spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.mgc_ext, cfg.mgc_dim) - bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.bap_ext, cfg.bap_dim) - f0_mse, vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.lf0_ext, cfg.lf0_dim) - - spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - bap_mse = bap_mse / 10.0 - - logger.info('Test : HMM -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' % - (spectral_distortion, bap_mse, f0_mse, vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_dnn.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - - sys.exit(0) - -import pickle -import gzip -import os -import sys -import errno -import time -import math - -# numpy & theano imports need to be done in this order (only for some numpy installations, not sure why) -import numpy -# we need to explicitly import this in some cases, not sure why this doesn't get imported with numpy itself -import numpy.distutils.__config__ -# and only after that can we import theano -import theano - -from utils.providers import ListDataProviderWithProjectionIndex, expand_projection_inputs, get_unexpanded_projection_inputs # ListDataProvider - -from frontend.label_normalisation import HTSLabelNormalisation, XMLLabelNormalisation -from frontend.silence_remover import SilenceRemover -from frontend.silence_remover import trim_silence -from frontend.min_max_norm import MinMaxNormalisation -#from frontend.acoustic_normalisation import CMPNormalisation -from frontend.acoustic_composition import AcousticComposition -from frontend.parameter_generation import ParameterGeneration -#from frontend.feature_normalisation_base import FeatureNormBase -from frontend.mean_variance_norm import MeanVarianceNorm - -# the new class for label composition and normalisation -from frontend.label_composer import LabelComposer - -import configuration - -from models.dnn import DNN -from models.tpdnn import TokenProjectionDNN -from models.ms_dnn import MultiStreamDNN -from models.ms_dnn_gv import MultiStreamDNNGv -from models.sdae import StackedDenoiseAutoEncoder - -from utils.compute_distortion import DistortionComputation, IndividualDistortionComp -from utils.generate import generate_wav -from utils.learn_rates import ExpDecreaseLearningRate - - -#import matplotlib.pyplot as plt -# our custom logging class that can also plot -#from logplot.logging_plotting import LoggerPlotter, MultipleTimeSeriesPlot, SingleWeightMatrixPlot -from logplot.logging_plotting import LoggerPlotter, MultipleSeriesPlot, SingleWeightMatrixPlot -import logging # as logging -import logging.config -import io - - -# This should always be True -- tidy up later -expand_by_minibatch = True - -if expand_by_minibatch: - proj_type = 'int32' -else: - proj_type = theano.config.floatX - - -def extract_file_id_list(file_list): - file_id_list = [] - for file_name in file_list: - file_id = os.path.basename(os.path.splitext(file_name)[0]) - file_id_list.append(file_id) - - return file_id_list - - -def read_file_list(file_name): - - logger = logging.getLogger("read_file_list") - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - logger.debug('Read file list from %s' % file_name) - return file_lists - - -def make_output_file_list(out_dir, in_file_lists): - out_file_lists = [] - - for in_file_name in in_file_lists: - file_id = os.path.basename(in_file_name) - out_file_name = out_dir + '/' + file_id - out_file_lists.append(out_file_name) - - return out_file_lists - - -def prepare_file_path_list(file_id_list, file_dir, file_extension, new_dir_switch=True): - if not os.path.exists(file_dir) and new_dir_switch: - os.makedirs(file_dir) - file_name_list = [] - for file_id in file_id_list: - file_name = file_dir + '/' + file_id + file_extension - file_name_list.append(file_name) - - return file_name_list - - -def visualize_dnn(dnn): - - layer_num = len(dnn.params) / 2 # including input and output - - for i in range(layer_num): - fig_name = 'Activation weights W' + str(i) - fig_title = 'Activation weights of W' + str(i) - xlabel = 'Neuron index of hidden layer ' + str(i) - ylabel = 'Neuron index of hidden layer ' + str(i+1) - if i == 0: - xlabel = 'Input feature index' - if i == layer_num-1: - ylabel = 'Output feature index' - - logger.create_plot(fig_name, SingleWeightMatrixPlot) - plotlogger.add_plot_point( - fig_name, fig_name, dnn.params[i*2].get_value(borrow=True).T) - plotlogger.save_plot(fig_name, title=fig_name, - xlabel=xlabel, ylabel=ylabel) - - -# Function for training projection and non-projection parts at same time -def train_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - private_l2_reg = float(hyper_params['private_l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layers_sizes = hyper_params['hidden_layers_sizes'] - - stream_weights = hyper_params['stream_weights'] - private_hidden_sizes = hyper_params['private_hidden_sizes'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - stream_lr_weights = hyper_params['stream_lr_weights'] - use_private_hidden = hyper_params['use_private_hidden'] - - model_type = hyper_params['model_type'] - - index_to_project = hyper_params['index_to_project'] - projection_insize = hyper_params['projection_insize'] - projection_outsize = hyper_params['projection_outsize'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - initial_projection_distrib = hyper_params['initial_projection_distrib'] - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProviderWithProjectionIndex(x_file_list=train_x_file_list, y_file_list=train_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=True, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProviderWithProjectionIndex(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=False, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - shared_train_set_xy, temp_train_set_x, temp_train_set_x_proj, temp_train_set_y = train_data_reader.load_next_partition_with_projection() - train_set_x, train_set_x_proj, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_x_proj, temp_valid_set_y = valid_data_reader.load_next_partition_with_projection() - valid_set_x, valid_set_x_proj, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining - pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - hidden_activation=hidden_activation, - output_activation=output_activation) - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - elif model_type == 'TPDNN': - dnn_model = TokenProjectionDNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - hidden_activation=hidden_activation, - output_activation=output_activation, - projection_insize=projection_insize, projection_outsize=projection_outsize, - expand_by_minibatch=expand_by_minibatch, initial_projection_distrib=initial_projection_distrib) - train_all_fn, train_subword_fn, train_word_fn, infer_projections_fn, valid_fn, valid_score_i = \ - dnn_model.build_finetune_functions( - (train_set_x, train_set_x_proj, train_set_y), - (valid_set_x, valid_set_x_proj, valid_set_y), batch_size=batch_size) - - elif model_type == 'SDAE': - # basic model is ready. - # if corruption levels is set to zero. it becomes normal autoencoder - dnn_model = StackedDenoiseAutoEncoder(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes) - - if do_pretraining: - pretraining_fn = dnn_model.pretraining_functions( - pretrain_set_x, batch_size) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - # model is ready, but the hyper-parameters are not optimised. - elif model_type == 'MSDNN': - dnn_model = MultiStreamDNN(numpy_rng=numpy_rng, n_ins=n_ins, ms_outs=ms_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - stream_weights=stream_weights, - hidden_activation=hidden_activation, - output_activation=output_activation) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), - batch_size=batch_size, lr_weights=stream_lr_weights) - elif model_type == 'MSDNN_GV': # not fully ready - dnn_model = MultiStreamDNNGv(numpy_rng=numpy_rng, n_ins=n_ins, ms_outs=ms_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - stream_weights=stream_weights, - hidden_activation=hidden_activation, - output_activation=output_activation) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), - batch_size=batch_size, lr_weights=stream_lr_weights) - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - # if pretraining is supported in one model, add the switch here - # be careful to use autoencoder for pretraining here: - # for SDAE, currently only sigmoid function is supported in the hidden layers, as our input is scaled to [0, 1] - # however, tanh works better and converge fast in finetuning - ## - # Will extend this soon... - if do_pretraining and model_type == 'SDAE': - logger.info('pretraining the %s model' % (model_type)) - - corruption_level = 0.0 - # in SDAE we do layer-wise pretraining using autoencoders - for i in range(dnn_model.n_layers): - for epoch in range(pretraining_epochs): - sub_start_time = time.clock() - - pretrain_loss = [] - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - pretrain_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - - n_train_batches = pretrain_set_x.get_value( - ).shape[0] / batch_size - - for batch_index in range(n_train_batches): - pretrain_loss.append(pretraining_fn[i](index=batch_index, - corruption=corruption_level, - learning_rate=pretraining_lr)) - - sub_end_time = time.clock() - logger.info('Pre-training layer %i, epoch %d, cost %s, time spent%.2f' % - (i+1, epoch+1, numpy.mean(pretrain_loss), (sub_end_time - sub_start_time))) - train_data_reader.reset() - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.clock() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - previous_finetune_lr = finetune_lr - while (epoch < training_epochs): - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.clock() - - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_x_proj, temp_train_set_y = train_data_reader.load_next_partition_with_projection() - train_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - train_set_x_proj.set_value(numpy.asarray( - temp_train_set_x_proj, dtype=proj_type), borrow=True) - train_set_y.set_value(numpy.asarray( - temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - n_train_batches = train_set_x.get_value().shape[0] / batch_size - - logger.debug('this partition: %d frames (divided into %d batches of size %d)' % ( - train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size)) - - for minibatch_index in range(n_train_batches): - this_train_error = train_all_fn( - minibatch_index, current_finetune_lr, current_momentum) - train_error.append(this_train_error) - - if numpy.isnan(this_train_error): - logger.warning('training error over minibatch %d of %d was %s' % ( - minibatch_index+1, n_train_batches, this_train_error)) - - train_data_reader.reset() - - # osw -- getting validation error from a forward pass in a single batch - # exausts memory when using 20k projected vocab -- also use minibatches - logger.debug('calculating validation loss') - valid_error = [] - n_valid_batches = valid_set_x.get_value().shape[0] / batch_size - for minibatch_index in range(n_valid_batches): - v_loss = valid_score_i(minibatch_index) - valid_error.append(v_loss) - - this_validation_loss = numpy.mean(valid_error) - - # this has a possible bias if the minibatches were not all of identical size - # but it should not be siginficant if minibatches are small - this_train_valid_loss = numpy.mean(train_error) - - sub_end_time = time.clock() - - loss_difference = this_validation_loss - previous_loss - - logger.info('BASIC epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss - logger.debug('validation loss decreased, so saving model') - early_stop = 0 - else: - logger.debug('validation loss did not improve') - dbn = best_dnn_model - early_stop += 1 - - if early_stop > early_stop_epoch: - # too many consecutive epochs without surpassing the best model - logger.debug('stopping early') - break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - # Save projection values: - if cfg.hyper_params['model_type'] == 'TPDNN': - if not os.path.isdir(cfg.projection_weights_output_dir): - os.mkdir(cfg.projection_weights_output_dir) - weights = dnn_model.get_projection_weights() - fname = os.path.join( - cfg.projection_weights_output_dir, 'proj_BASIC_epoch_%s' % (epoch)) - numpy.savetxt(fname, weights) - - end_time = time.clock() - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - -# Function for training all model on train data as well as simultaneously -# inferring proj weights on dev data. -# in each epoch do: -# train_all_fn() -# infer_projections_fn() ## <-- updates proj for devset and gives validation loss -def train_DNN_and_traindev_projections(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - private_l2_reg = float(hyper_params['private_l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layers_sizes = hyper_params['hidden_layers_sizes'] - - stream_weights = hyper_params['stream_weights'] - private_hidden_sizes = hyper_params['private_hidden_sizes'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - stream_lr_weights = hyper_params['stream_lr_weights'] - use_private_hidden = hyper_params['use_private_hidden'] - - model_type = hyper_params['model_type'] - - index_to_project = hyper_params['index_to_project'] - projection_insize = hyper_params['projection_insize'] - projection_outsize = hyper_params['projection_outsize'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - initial_projection_distrib = hyper_params['initial_projection_distrib'] - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProviderWithProjectionIndex(x_file_list=train_x_file_list, y_file_list=train_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=True, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProviderWithProjectionIndex(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=False, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - shared_train_set_xy, temp_train_set_x, temp_train_set_x_proj, temp_train_set_y = train_data_reader.load_next_partition_with_projection() - train_set_x, train_set_x_proj, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_x_proj, temp_valid_set_y = valid_data_reader.load_next_partition_with_projection() - valid_set_x, valid_set_x_proj, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining - pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - hidden_activation=hidden_activation, - output_activation=output_activation) - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - elif model_type == 'TPDNN': - dnn_model = TokenProjectionDNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - hidden_activation=hidden_activation, - output_activation=output_activation, - projection_insize=projection_insize, projection_outsize=projection_outsize, - expand_by_minibatch=expand_by_minibatch, initial_projection_distrib=initial_projection_distrib) - train_all_fn, train_subword_fn, train_word_fn, infer_projections_fn, valid_fn, valid_score_i = \ - dnn_model.build_finetune_functions( - (train_set_x, train_set_x_proj, train_set_y), - (valid_set_x, valid_set_x_proj, valid_set_y), batch_size=batch_size) - - elif model_type == 'SDAE': - # basic model is ready. - # if corruption levels is set to zero. it becomes normal autoencoder - dnn_model = StackedDenoiseAutoEncoder(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes) - - if do_pretraining: - pretraining_fn = dnn_model.pretraining_functions( - pretrain_set_x, batch_size) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - # model is ready, but the hyper-parameters are not optimised. - elif model_type == 'MSDNN': - dnn_model = MultiStreamDNN(numpy_rng=numpy_rng, n_ins=n_ins, ms_outs=ms_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - stream_weights=stream_weights, - hidden_activation=hidden_activation, - output_activation=output_activation) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), - batch_size=batch_size, lr_weights=stream_lr_weights) - elif model_type == 'MSDNN_GV': # not fully ready - dnn_model = MultiStreamDNNGv(numpy_rng=numpy_rng, n_ins=n_ins, ms_outs=ms_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - stream_weights=stream_weights, - hidden_activation=hidden_activation, - output_activation=output_activation) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), - batch_size=batch_size, lr_weights=stream_lr_weights) - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - # if pretraining is supported in one model, add the switch here - # be careful to use autoencoder for pretraining here: - # for SDAE, currently only sigmoid function is supported in the hidden layers, as our input is scaled to [0, 1] - # however, tanh works better and converge fast in finetuning - ## - # Will extend this soon... - if do_pretraining and model_type == 'SDAE': - logger.info('pretraining the %s model' % (model_type)) - - corruption_level = 0.0 - # in SDAE we do layer-wise pretraining using autoencoders - for i in range(dnn_model.n_layers): - for epoch in range(pretraining_epochs): - sub_start_time = time.clock() - - pretrain_loss = [] - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - pretrain_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - - n_train_batches = pretrain_set_x.get_value( - ).shape[0] / batch_size - - for batch_index in range(n_train_batches): - pretrain_loss.append(pretraining_fn[i](index=batch_index, - corruption=corruption_level, - learning_rate=pretraining_lr)) - - sub_end_time = time.clock() - logger.info('Pre-training layer %i, epoch %d, cost %s, time spent%.2f' % - (i+1, epoch+1, numpy.mean(pretrain_loss), (sub_end_time - sub_start_time))) - train_data_reader.reset() - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.clock() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - previous_finetune_lr = finetune_lr - - # dnn_model.zero_projection_weights() - - while (epoch < training_epochs): - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.clock() - - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_x_proj, temp_train_set_y = train_data_reader.load_next_partition_with_projection() - train_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - train_set_x_proj.set_value(numpy.asarray( - temp_train_set_x_proj, dtype=proj_type), borrow=True) - train_set_y.set_value(numpy.asarray( - temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - n_train_batches = train_set_x.get_value().shape[0] / batch_size - - logger.debug('this partition: %d frames (divided into %d batches of size %d)' % ( - train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size)) - - for minibatch_index in range(n_train_batches): - this_train_error = train_all_fn( - minibatch_index, current_finetune_lr, current_momentum) - train_error.append(this_train_error) - - if numpy.isnan(this_train_error): - logger.warning('training error over minibatch %d of %d was %s' % ( - minibatch_index+1, n_train_batches, this_train_error)) - - train_data_reader.reset() - - # infer validation weights before getting validation error: - # osw -- inferring word reps on validation set in a forward pass in a single batch - # exausts memory when using 20k projected vocab -- also use minibatches - logger.debug('infer word representations for validation set') - valid_error = [] - n_valid_batches = valid_set_x.get_value().shape[0] / batch_size - for minibatch_index in range(n_valid_batches): - v_loss = infer_projections_fn( - minibatch_index, current_finetune_lr, current_momentum) - valid_error.append(v_loss) - - # this function also give us validation loss: - this_validation_loss = numpy.mean(valid_error) - - ''' - ## osw -- getting validation error from a forward pass in a single batch - ## exausts memory when using 20k projected vocab -- also use minibatches - logger.debug('calculating validation loss') - valid_error = [] - n_valid_batches = valid_set_x.get_value().shape[0] / batch_size - for minibatch_index in xrange(n_valid_batches): - v_loss = valid_score_i(minibatch_index) - valid_error.append(v_loss) - - this_validation_loss = numpy.mean(valid_error) - ''' - - # this has a possible bias if the minibatches were not all of identical size - # but it should not be siginficant if minibatches are small - this_train_valid_loss = numpy.mean(train_error) - - sub_end_time = time.clock() - - loss_difference = this_validation_loss - previous_loss - - logger.info('BASIC epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss - logger.debug('validation loss decreased, so saving model') - early_stop = 0 - else: - logger.debug('validation loss did not improve') - dbn = best_dnn_model - early_stop += 1 - - if early_stop > early_stop_epoch: - # too many consecutive epochs without surpassing the best model - logger.debug('stopping early') - break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - # Save projection values: - if cfg.hyper_params['model_type'] == 'TPDNN': - if not os.path.isdir(cfg.projection_weights_output_dir): - os.mkdir(cfg.projection_weights_output_dir) - weights = dnn_model.get_projection_weights() - fname = os.path.join( - cfg.projection_weights_output_dir, 'proj_BASIC_epoch_%s' % (epoch)) - numpy.savetxt(fname, weights) - - end_time = time.clock() - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - -# Function for training the non-projection part only -def train_basic_DNN(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): - - # get loggers for this function - # this one writes to both console and file - logger = logging.getLogger("main.train_DNN") - logger.debug('Starting train_DNN') - - if plot: - # this one takes care of plotting duties - plotlogger = logging.getLogger("plotting") - # create an (empty) plot of training convergence, ready to receive data points - logger.create_plot('training convergence', MultipleSeriesPlot) - - try: - assert numpy.sum(ms_outs) == n_outs - except AssertionError: - logger.critical( - 'the summation of multi-stream outputs does not equal to %d' % (n_outs)) - raise - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - private_l2_reg = float(hyper_params['private_l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layers_sizes = hyper_params['hidden_layers_sizes'] - - stream_weights = hyper_params['stream_weights'] - private_hidden_sizes = hyper_params['private_hidden_sizes'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - stream_lr_weights = hyper_params['stream_lr_weights'] - use_private_hidden = hyper_params['use_private_hidden'] - - model_type = hyper_params['model_type'] - - index_to_project = hyper_params['index_to_project'] - projection_insize = hyper_params['projection_insize'] - projection_outsize = hyper_params['projection_outsize'] - - # use a switch to turn on pretraining - # pretraining may not help too much, if this case, we turn it off to save time - do_pretraining = hyper_params['do_pretraining'] - pretraining_epochs = int(hyper_params['pretraining_epochs']) - pretraining_lr = float(hyper_params['pretraining_lr']) - initial_projection_distrib = hyper_params['initial_projection_distrib'] - - buffer_size = int(buffer_size / batch_size) * batch_size - - ################### - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProviderWithProjectionIndex(x_file_list=train_x_file_list, y_file_list=train_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=True, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProviderWithProjectionIndex(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=False, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - shared_train_set_xy, temp_train_set_x, temp_train_set_x_proj, temp_train_set_y = train_data_reader.load_next_partition_with_projection() - train_set_x, train_set_x_proj, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_x_proj, temp_valid_set_y = valid_data_reader.load_next_partition_with_projection() - valid_set_x, valid_set_x_proj, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - - # temporally we use the training set as pretrain_set_x. - # we need to support any data for pretraining - pretrain_set_x = train_set_x - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - dnn_model = None - pretrain_fn = None # not all the model support pretraining right now - train_fn = None - valid_fn = None - # valid_fn and valid_model are the same. reserve to computer multi-stream distortion - valid_model = None - if model_type == 'DNN': - dnn_model = DNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - hidden_activation=hidden_activation, - output_activation=output_activation) - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - elif model_type == 'TPDNN': - - dnn_model = TokenProjectionDNN(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - hidden_activation=hidden_activation, - output_activation=output_activation, - projection_insize=projection_insize, projection_outsize=projection_outsize, - expand_by_minibatch=expand_by_minibatch, initial_projection_distrib=initial_projection_distrib) - train_all_fn, train_subword_fn, train_word_fn, infer_projections_fn, valid_fn, valid_score_i = \ - dnn_model.build_finetune_functions( - (train_set_x, train_set_x_proj, train_set_y), - (valid_set_x, valid_set_x_proj, valid_set_y), batch_size=batch_size) - - elif model_type == 'SDAE': - # basic model is ready. - # if corruption levels is set to zero. it becomes normal autoencoder - dnn_model = StackedDenoiseAutoEncoder(numpy_rng=numpy_rng, n_ins=n_ins, n_outs=n_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes) - - if do_pretraining: - pretraining_fn = dnn_model.pretraining_functions( - pretrain_set_x, batch_size) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), batch_size=batch_size) - - # model is ready, but the hyper-parameters are not optimised. - elif model_type == 'MSDNN': - dnn_model = MultiStreamDNN(numpy_rng=numpy_rng, n_ins=n_ins, ms_outs=ms_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - stream_weights=stream_weights, - hidden_activation=hidden_activation, - output_activation=output_activation) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), - batch_size=batch_size, lr_weights=stream_lr_weights) - elif model_type == 'MSDNN_GV': # not fully ready - dnn_model = MultiStreamDNNGv(numpy_rng=numpy_rng, n_ins=n_ins, ms_outs=ms_outs, - l1_reg=l1_reg, l2_reg=l2_reg, - hidden_layers_sizes=hidden_layers_sizes, - stream_weights=stream_weights, - hidden_activation=hidden_activation, - output_activation=output_activation) - - train_fn, valid_fn = dnn_model.build_finetune_functions( - (train_set_x, train_set_y), (valid_set_x, valid_set_y), - batch_size=batch_size, lr_weights=stream_lr_weights) - else: - logger.critical('%s type NN model is not supported!' % (model_type)) - raise - - # if pretraining is supported in one model, add the switch here - # be careful to use autoencoder for pretraining here: - # for SDAE, currently only sigmoid function is supported in the hidden layers, as our input is scaled to [0, 1] - # however, tanh works better and converge fast in finetuning - ## - # Will extend this soon... - if do_pretraining and model_type == 'SDAE': - logger.info('pretraining the %s model' % (model_type)) - - corruption_level = 0.0 - # in SDAE we do layer-wise pretraining using autoencoders - for i in range(dnn_model.n_layers): - for epoch in range(pretraining_epochs): - sub_start_time = time.clock() - - pretrain_loss = [] - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_y = train_data_reader.load_next_partition() - pretrain_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - - n_train_batches = pretrain_set_x.get_value( - ).shape[0] / batch_size - - for batch_index in range(n_train_batches): - pretrain_loss.append(pretraining_fn[i](index=batch_index, - corruption=corruption_level, - learning_rate=pretraining_lr)) - - sub_end_time = time.clock() - logger.info('Pre-training layer %i, epoch %d, cost %s, time spent%.2f' % - (i+1, epoch+1, numpy.mean(pretrain_loss), (sub_end_time - sub_start_time))) - train_data_reader.reset() - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.clock() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - previous_finetune_lr = finetune_lr - - dnn_model.zero_projection_weights() - - while (epoch < training_epochs): - epoch = epoch + 1 - - current_momentum = momentum - current_finetune_lr = finetune_lr - if epoch <= warmup_epoch: - current_finetune_lr = finetune_lr - current_momentum = warmup_momentum - else: - current_finetune_lr = previous_finetune_lr * 0.5 - - previous_finetune_lr = current_finetune_lr - - train_error = [] - sub_start_time = time.clock() - - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_x_proj, temp_train_set_y = train_data_reader.load_next_partition_with_projection() - train_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - train_set_x_proj.set_value(numpy.asarray( - temp_train_set_x_proj, dtype=proj_type), borrow=True) - train_set_y.set_value(numpy.asarray( - temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - n_train_batches = train_set_x.get_value().shape[0] / batch_size - - logger.debug('this partition: %d frames (divided into %d batches of size %d)' % ( - train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size)) - - for minibatch_index in range(n_train_batches): - this_train_error = train_subword_fn( - minibatch_index, current_finetune_lr, current_momentum) - train_error.append(this_train_error) - - if numpy.isnan(this_train_error): - logger.warning('training error over minibatch %d of %d was %s' % ( - minibatch_index+1, n_train_batches, this_train_error)) - - train_data_reader.reset() - - # osw -- getting validation error from a forward pass in a single batch - # exausts memory when using 20k projected vocab -- also use minibatches - logger.debug('calculating validation loss') - valid_error = [] - n_valid_batches = valid_set_x.get_value().shape[0] / batch_size - for minibatch_index in range(n_valid_batches): - v_loss = valid_score_i(minibatch_index) - valid_error.append(v_loss) - - this_validation_loss = numpy.mean(valid_error) - - # this has a possible bias if the minibatches were not all of identical size - # but it should not be siginficant if minibatches are small - this_train_valid_loss = numpy.mean(train_error) - - sub_end_time = time.clock() - - loss_difference = this_validation_loss - previous_loss - - logger.info('BASIC epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - if plot: - plotlogger.add_plot_point( - 'training convergence', 'validation set', (epoch, this_validation_loss)) - plotlogger.add_plot_point( - 'training convergence', 'training set', (epoch, this_train_valid_loss)) - plotlogger.save_plot( - 'training convergence', title='Progress of training and validation error', xlabel='epochs', ylabel='error') - - if this_validation_loss < best_validation_loss: - best_dnn_model = dnn_model - best_validation_loss = this_validation_loss - logger.debug('validation loss decreased, so saving model') - early_stop = 0 - else: - logger.debug('validation loss did not improve') - dbn = best_dnn_model - early_stop += 1 - - if early_stop > early_stop_epoch: - # too many consecutive epochs without surpassing the best model - logger.debug('stopping early') - break - - if math.isnan(this_validation_loss): - break - - previous_loss = this_validation_loss - - # Save projection values: - if cfg.hyper_params['model_type'] == 'TPDNN': - if not os.path.isdir(cfg.projection_weights_output_dir): - os.mkdir(cfg.projection_weights_output_dir) - weights = dnn_model.get_projection_weights() - fname = os.path.join( - cfg.projection_weights_output_dir, 'proj_BASIC_epoch_%s' % (epoch)) - numpy.savetxt(fname, weights) - - end_time = time.clock() - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - - if plot: - plotlogger.save_plot( - 'training convergence', title='Final training and validation error', xlabel='epochs', ylabel='error') - - -# ========== now train the word residual ============ -def train_DNN_with_projections(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - private_l2_reg = float(hyper_params['private_l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layers_sizes = hyper_params['hidden_layers_sizes'] - - stream_weights = hyper_params['stream_weights'] - private_hidden_sizes = hyper_params['private_hidden_sizes'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - stream_lr_weights = hyper_params['stream_lr_weights'] - use_private_hidden = hyper_params['use_private_hidden'] - - model_type = hyper_params['model_type'] - - index_to_project = hyper_params['index_to_project'] - projection_insize = hyper_params['projection_insize'] - projection_outsize = hyper_params['projection_outsize'] - - ######### data providers ########## - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProviderWithProjectionIndex(x_file_list=train_x_file_list, y_file_list=train_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=True, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProviderWithProjectionIndex(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=False, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - shared_train_set_xy, temp_train_set_x, temp_train_set_x_proj, temp_train_set_y = train_data_reader.load_next_partition_with_projection() - train_set_x, train_set_x_proj, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_x_proj, temp_valid_set_y = valid_data_reader.load_next_partition_with_projection() - valid_set_x, valid_set_x_proj, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - #################################### - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - ############## load existing dnn ##### - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - train_all_fn, train_subword_fn, train_word_fn, infer_projections_fn, valid_fn, valid_score_i = \ - dnn_model.build_finetune_functions( - (train_set_x, train_set_x_proj, train_set_y), - (valid_set_x, valid_set_x_proj, valid_set_y), batch_size=batch_size) - #################################### - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.clock() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - previous_finetune_lr = finetune_lr - - dnn_model.initialise_projection_weights() - - all_epochs = 20 # 100 ## <-------- hard coded !!!!!!!!!! - - current_finetune_lr = previous_finetune_lr = finetune_lr - warmup_epoch_2 = 10 # 10 ## <-------- hard coded !!!!!!!!!! - - while (epoch < all_epochs): - epoch = epoch + 1 - - current_momentum = momentum - - if epoch > warmup_epoch_2: - previous_finetune_lr = current_finetune_lr - current_finetune_lr = previous_finetune_lr * 0.5 - - train_error = [] - sub_start_time = time.clock() - - while (not train_data_reader.is_finish()): - shared_train_set_xy, temp_train_set_x, temp_train_set_x_proj, temp_train_set_y = train_data_reader.load_next_partition_with_projection() - train_set_x.set_value(numpy.asarray( - temp_train_set_x, dtype=theano.config.floatX), borrow=True) - train_set_x_proj.set_value(numpy.asarray( - temp_train_set_x_proj, dtype=proj_type), borrow=True) - train_set_y.set_value(numpy.asarray( - temp_train_set_y, dtype=theano.config.floatX), borrow=True) - - n_train_batches = train_set_x.get_value().shape[0] / batch_size - - logger.debug('this partition: %d frames (divided into %d batches of size %d)' % ( - train_set_x.get_value(borrow=True).shape[0], n_train_batches, batch_size)) - - for minibatch_index in range(n_train_batches): - this_train_error = train_word_fn( - minibatch_index, current_finetune_lr, current_momentum) - train_error.append(this_train_error) - - if numpy.isnan(this_train_error): - logger.warning('training error over minibatch %d of %d was %s' % ( - minibatch_index+1, n_train_batches, this_train_error)) - - train_data_reader.reset() - - # COULD REMOVE THIS LATER - # osw -- getting validation error from a forward pass in a single batch - # exausts memory when using 20k projected vocab -- also use minibatches - logger.debug('calculating validation loss') - valid_error = [] - n_valid_batches = valid_set_x.get_value().shape[0] / batch_size - for minibatch_index in range(n_valid_batches): - v_loss = valid_score_i(minibatch_index) - valid_error.append(v_loss) - this_validation_loss = numpy.mean(valid_error) - - # this has a possible bias if the minibatches were not all of identical size - # but it should not be siginficant if minibatches are small - this_train_valid_loss = numpy.mean(train_error) - -# if plot: -# ## add dummy validation loss so that plot works: -# plotlogger.add_plot_point('training convergence','validation set',(epoch,this_validation_loss)) -# plotlogger.add_plot_point('training convergence','training set',(epoch,this_train_valid_loss)) -# - - sub_end_time = time.clock() - - logger.info('TOKEN epoch %i, validation error %f, train error %f time spent %.2f' % ( - epoch, this_validation_loss, this_train_valid_loss, (sub_end_time - sub_start_time))) - - if cfg.hyper_params['model_type'] == 'TPDNN': - if not os.path.isdir(cfg.projection_weights_output_dir): - os.mkdir(cfg.projection_weights_output_dir) - weights = dnn_model.get_projection_weights() - fname = os.path.join( - cfg.projection_weights_output_dir, 'proj_TOKEN_epoch_%s' % (epoch)) - numpy.savetxt(fname, weights) - - best_dnn_model = dnn_model # always update - - end_time = time.clock() - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - -# if plot: -# plotlogger.save_plot('training convergence',title='Final training and validation error',xlabel='epochs',ylabel='error') -# - - # ======================================================== - - # ========== now infer word represntations for out-of-training (dev) data ============ -# -# ### TEMP-- restarted!!! ### ~~~~~~~ -# epoch = 50 -# dnn_model = cPickle.load(open(nnets_file_name, 'rb')) -# train_all_fn, train_subword_fn, train_word_fn, infer_projections_fn, valid_fn, valid_score_i = \ -# dnn_model.build_finetune_functions( -# (train_set_x, train_set_x_proj, train_set_y), -# (valid_set_x, valid_set_x_proj, valid_set_y), batch_size=batch_size) -# this_train_valid_loss = 198.0 ## approx value -# ### ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ - - -def infer_projections(train_xy_file_list, valid_xy_file_list, - nnets_file_name, n_ins, n_outs, ms_outs, hyper_params, buffer_size, plot=False): - - ####parameters##### - finetune_lr = float(hyper_params['learning_rate']) - training_epochs = int(hyper_params['training_epochs']) - batch_size = int(hyper_params['batch_size']) - l1_reg = float(hyper_params['l1_reg']) - l2_reg = float(hyper_params['l2_reg']) - private_l2_reg = float(hyper_params['private_l2_reg']) - warmup_epoch = int(hyper_params['warmup_epoch']) - momentum = float(hyper_params['momentum']) - warmup_momentum = float(hyper_params['warmup_momentum']) - - hidden_layers_sizes = hyper_params['hidden_layers_sizes'] - - stream_weights = hyper_params['stream_weights'] - private_hidden_sizes = hyper_params['private_hidden_sizes'] - - buffer_utt_size = buffer_size - early_stop_epoch = int(hyper_params['early_stop_epochs']) - - hidden_activation = hyper_params['hidden_activation'] - output_activation = hyper_params['output_activation'] - - stream_lr_weights = hyper_params['stream_lr_weights'] - use_private_hidden = hyper_params['use_private_hidden'] - - model_type = hyper_params['model_type'] - - index_to_project = hyper_params['index_to_project'] - projection_insize = hyper_params['projection_insize'] - projection_outsize = hyper_params['projection_outsize'] - - ######### data providers ########## - (train_x_file_list, train_y_file_list) = train_xy_file_list - (valid_x_file_list, valid_y_file_list) = valid_xy_file_list - - logger.debug('Creating training data provider') - train_data_reader = ListDataProviderWithProjectionIndex(x_file_list=train_x_file_list, y_file_list=train_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=True, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - logger.debug('Creating validation data provider') - valid_data_reader = ListDataProviderWithProjectionIndex(x_file_list=valid_x_file_list, y_file_list=valid_y_file_list, n_ins=n_ins, n_outs=n_outs, - buffer_size=buffer_size, shuffle=False, index_to_project=index_to_project, projection_insize=projection_insize, indexes_only=expand_by_minibatch) - - shared_train_set_xy, temp_train_set_x, temp_train_set_x_proj, temp_train_set_y = train_data_reader.load_next_partition_with_projection() - train_set_x, train_set_x_proj, train_set_y = shared_train_set_xy - shared_valid_set_xy, temp_valid_set_x, temp_valid_set_x_proj, temp_valid_set_y = valid_data_reader.load_next_partition_with_projection() - valid_set_x, valid_set_x_proj, valid_set_y = shared_valid_set_xy - train_data_reader.reset() - valid_data_reader.reset() - #################################### - - # numpy random generator - numpy_rng = numpy.random.RandomState(123) - logger.info('building the model') - - ############## load existing dnn ##### - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - train_all_fn, train_subword_fn, train_word_fn, infer_projections_fn, valid_fn, valid_score_i = \ - dnn_model.build_finetune_functions( - (train_set_x, train_set_x_proj, train_set_y), - (valid_set_x, valid_set_x_proj, valid_set_y), batch_size=batch_size) - #################################### - - logger.info('fine-tuning the %s model' % (model_type)) - - start_time = time.clock() - - best_dnn_model = dnn_model - best_validation_loss = sys.float_info.max - previous_loss = sys.float_info.max - - early_stop = 0 - epoch = 0 - previous_finetune_lr = finetune_lr - - logger.info('fine-tuning the %s model' % (model_type)) - - # dnn_model.initialise_projection_weights() - - inference_epochs = 20 # <-------- hard coded !!!!!!!!!! - - current_finetune_lr = previous_finetune_lr = finetune_lr - warmup_epoch_3 = 10 # 10 ## <-------- hard coded !!!!!!!!!! - - #warmup_epoch_3 = epoch + warmup_epoch_3 - #inference_epochs += epoch - while (epoch < inference_epochs): - - epoch = epoch + 1 - - current_momentum = momentum - - if epoch > warmup_epoch_3: - previous_finetune_lr = current_finetune_lr - current_finetune_lr = previous_finetune_lr * 0.5 - - dev_error = [] - sub_start_time = time.clock() - - # osw -- inferring word reps on validation set in a forward pass in a single batch - # exausts memory when using 20k projected vocab -- also use minibatches - logger.debug('infer word representations for validation set') - valid_error = [] - n_valid_batches = valid_set_x.get_value().shape[0] / batch_size - for minibatch_index in range(n_valid_batches): - v_loss = infer_projections_fn( - minibatch_index, current_finetune_lr, current_momentum) - valid_error.append(v_loss) - - this_validation_loss = numpy.mean(valid_error) - - #valid_error = infer_projections_fn(current_finetune_lr, current_momentum) - #this_validation_loss = numpy.mean(valid_error) - -# if plot: -# ## add dummy validation loss so that plot works: -# plotlogger.add_plot_point('training convergence','validation set',(epoch,this_validation_loss)) -# plotlogger.add_plot_point('training convergence','training set',(epoch,this_train_valid_loss)) -# - - sub_end_time = time.clock() - - logger.info('INFERENCE epoch %i, validation error %f, time spent %.2f' % ( - epoch, this_validation_loss, (sub_end_time - sub_start_time))) - - if cfg.hyper_params['model_type'] == 'TPDNN': - if not os.path.isdir(cfg.projection_weights_output_dir): - os.mkdir(cfg.projection_weights_output_dir) - weights = dnn_model.get_projection_weights() - fname = os.path.join( - cfg.projection_weights_output_dir, 'proj_INFERENCE_epoch_%s' % (epoch)) - numpy.savetxt(fname, weights) - - best_dnn_model = dnn_model # always update - - end_time = time.clock() - pickle.dump(best_dnn_model, open(nnets_file_name, 'wb')) - - logger.info('overall training time: %.2fm validation error %f' % - ((end_time - start_time) / 60., best_validation_loss)) - -# if plot: -# plotlogger.save_plot('training convergence',title='Final training and validation error',xlabel='epochs',ylabel='error') -# - - # ======================================================== - - if cfg.hyper_params['model_type'] == 'TPDNN': - os.system('python %s %s' % ( - '/afs/inf.ed.ac.uk/user/o/owatts/scripts_NEW/plot_weights_multiple_phases.py', cfg.projection_weights_output_dir)) - - return best_validation_loss - - -def dnn_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list, cfg=None, use_word_projections=True): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - # 'remove' word representations by randomising them. As model is unpickled and - # no re-saved, this does not throw trained parameters away. - if not use_word_projections: - dnn_model.initialise_projection_weights() - -# visualize_dnn(dbn) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - - # features, features_proj = expand_projection_inputs(features, cfg.index_to_project, \ - # cfg.projection_insize) - features, features_proj = get_unexpanded_projection_inputs(features, cfg.index_to_project, - cfg.projection_insize) - # temp_set_x = features.tolist() ## osw - why list conversion necessary? - # print temp_set_x - test_set_x = theano.shared(numpy.asarray( - features, dtype=theano.config.floatX)) - test_set_x_proj = theano.shared( - numpy.asarray(features_proj, dtype='int32')) - - predicted_parameter = dnn_model.parameter_prediction( - test_set_x=test_set_x, test_set_x_proj=test_set_x_proj) -# predicted_parameter = test_out() - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - -# generate bottleneck layer as festures - - -def dnn_hidden_generation(valid_file_list, nnets_file_name, n_ins, n_outs, out_file_list): - logger = logging.getLogger("dnn_generation") - logger.debug('Starting dnn_generation') - - plotlogger = logging.getLogger("plotting") - - dnn_model = pickle.load(open(nnets_file_name, 'rb')) - - file_number = len(valid_file_list) - - for i in range(file_number): - logger.info('generating %4d of %4d: %s' % - (i+1, file_number, valid_file_list[i])) - fid_lab = open(valid_file_list[i], 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - features = features[:(n_ins * (features.size / n_ins))] - features = features.reshape((-1, n_ins)) - temp_set_x = features.tolist() - test_set_x = theano.shared(numpy.asarray( - temp_set_x, dtype=theano.config.floatX)) - - predicted_parameter = dnn_model.generate_top_hidden_layer( - test_set_x=test_set_x) - - # write to cmp file - predicted_parameter = numpy.array(predicted_parameter, 'float32') - temp_parameter = predicted_parameter - fid = open(out_file_list[i], 'wb') - predicted_parameter.tofile(fid) - logger.debug('saved to %s' % out_file_list[i]) - fid.close() - - -def main_function(cfg): - - # get a logger for this main function - logger = logging.getLogger("main") - - # get another logger to handle plotting duties - plotlogger = logging.getLogger("plotting") - - # later, we might do this via a handler that is created, attached and configured - # using the standard config mechanism of the logging module - # but for now we need to do it manually - plotlogger.set_plot_path(cfg.plot_dir) - - #### parameter setting######## - hidden_layers_sizes = cfg.hyper_params['hidden_layers_sizes'] - - # prepare environment - - try: - file_id_list = read_file_list(cfg.file_id_scp) - logger.debug('Loaded file id list from %s' % cfg.file_id_scp) - except IOError: - # this means that open(...) threw an error - logger.critical('Could not load file id list from %s' % - cfg.file_id_scp) - raise - - # total file number including training, development, and testing - total_file_number = len(file_id_list) - - data_dir = cfg.data_dir - - nn_cmp_dir = os.path.join( - data_dir, 'nn' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - nn_cmp_norm_dir = os.path.join( - data_dir, 'nn_norm' + cfg.combined_feature_name + '_' + str(cfg.cmp_dim)) - - model_dir = os.path.join(cfg.work_dir, 'nnets_model') - gen_dir = os.path.join(cfg.work_dir, 'gen') - - in_file_list_dict = {} - - for feature_name in list(cfg.in_dir_dict.keys()): - in_file_list_dict[feature_name] = prepare_file_path_list( - file_id_list, cfg.in_dir_dict[feature_name], cfg.file_extension_dict[feature_name], False) - - nn_cmp_file_list = prepare_file_path_list( - file_id_list, nn_cmp_dir, cfg.cmp_ext) - nn_cmp_norm_file_list = prepare_file_path_list( - file_id_list, nn_cmp_norm_dir, cfg.cmp_ext) - - # normalisation information - norm_info_file = os.path.join(data_dir, 'norm_info' + cfg.combined_feature_name + - '_' + str(cfg.cmp_dim) + '_' + cfg.output_feature_normalisation + '.dat') - - # normalise input full context label - - # currently supporting two different forms of lingustic features - # later, we should generalise this - - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension - logger.info('Input label dimension is %d' % lab_dim) - suffix = str(lab_dim) - # no longer supported - use new "composed" style labels instead - elif cfg.label_style == 'composed': - # label_normaliser = XMLLabelNormalisation(xpath_file_name=cfg.xpath_file_name) - suffix = 'composed' - - if cfg.process_labels_in_work_dir: - label_data_dir = cfg.work_dir - else: - label_data_dir = data_dir - - # the number can be removed - binary_label_dir = os.path.join(label_data_dir, 'binary_label_'+suffix) - nn_label_dir = os.path.join(label_data_dir, 'nn_no_silence_lab_'+suffix) - nn_label_norm_dir = os.path.join( - label_data_dir, 'nn_no_silence_lab_norm_'+suffix) -# nn_label_norm_mvn_dir = os.path.join(data_dir, 'nn_no_silence_lab_norm_'+suffix) - - in_label_align_file_list = prepare_file_path_list( - file_id_list, cfg.in_label_align_dir, cfg.lab_ext, False) - binary_label_file_list = prepare_file_path_list( - file_id_list, binary_label_dir, cfg.lab_ext) - nn_label_file_list = prepare_file_path_list( - file_id_list, nn_label_dir, cfg.lab_ext) - nn_label_norm_file_list = prepare_file_path_list( - file_id_list, nn_label_norm_dir, cfg.lab_ext) - - # to do - sanity check the label dimension here? - - min_max_normaliser = None - label_norm_file = 'label_norm_%s.dat' % (cfg.label_style) - label_norm_file = os.path.join(label_data_dir, label_norm_file) - - if cfg.NORMLAB and (cfg.label_style == 'HTS'): - # simple HTS labels - logger.info( - 'preparing label data (input) using standard HTS style labels') - label_normaliser.perform_normalisation( - in_label_align_file_list, binary_label_file_list) - - remover = SilenceRemover(n_cmp=lab_dim, silence_pattern=['*-#+*']) - remover.remove_silence(binary_label_file_list, - in_label_align_file_list, nn_label_file_list) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if cfg.NORMLAB and (cfg.label_style == 'composed'): - # new flexible label preprocessor - - logger.info( - 'preparing label data (input) using "composed" style labels') - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - - logger.info('Loaded label configuration') - # logger.info('%s' % label_composer.configuration.labels ) - - lab_dim = label_composer.compute_label_dimension() - logger.info('label dimension will be %d' % lab_dim) - - if cfg.precompile_xpaths: - label_composer.precompile_xpaths() - - # there are now a set of parallel input label files (e.g, one set of HTS and another set of Ossian trees) - # create all the lists of these, ready to pass to the label composer - - in_label_align_file_list = {} - for label_style, label_style_required in label_composer.label_styles.items(): - if label_style_required: - logger.info( - 'labels of style %s are required - constructing file paths for them' % label_style) - if label_style == 'xpath': - in_label_align_file_list['xpath'] = prepare_file_path_list( - file_id_list, cfg.xpath_label_align_dir, cfg.utt_ext, False) - elif label_style == 'hts': - in_label_align_file_list['hts'] = prepare_file_path_list( - file_id_list, cfg.hts_label_align_dir, cfg.lab_ext, False) - else: - logger.critical( - 'unsupported label style %s specified in label configuration' % label_style) - raise Exception - - # now iterate through the files, one at a time, constructing the labels for them - num_files = len(file_id_list) - logger.info('the label styles required are %s' % - label_composer.label_styles) - - for i in range(num_files): - logger.info( - 'making input label features for %4d of %4d' % (i+1, num_files)) - - # iterate through the required label styles and open each corresponding label file - - # a dictionary of file descriptors, pointing at the required files - required_labels = {} - - for label_style, label_style_required in label_composer.label_styles.items(): - - # the files will be a parallel set of files for a single utterance - # e.g., the XML tree and an HTS label file - if label_style_required: - required_labels[label_style] = open( - in_label_align_file_list[label_style][i], 'r') - logger.debug(' opening label file %s' % - in_label_align_file_list[label_style][i]) - - logger.debug('label styles with open files: %s' % - required_labels) - label_composer.make_labels( - required_labels, out_file_name=binary_label_file_list[i], fill_missing_values=cfg.fill_missing_values, iterate_over_frames=cfg.iterate_over_frames) - - # now close all opened files - for fd in required_labels.values(): - fd.close() - - # silence removal - if cfg.remove_silence_using_binary_labels: - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from label using silence feature: %s' % ( - label_composer.configuration.labels[silence_feature])) - logger.info('Silence will be removed from CMP files in same way') - # Binary labels have 2 roles: both the thing trimmed and the instructions for trimming: - trim_silence(binary_label_file_list, nn_label_file_list, lab_dim, - binary_label_file_list, lab_dim, silence_feature, percent_to_keep=5) - else: - logger.info('No silence removal done') - # start from the labels we have just produced, not trimmed versions - nn_label_file_list = binary_label_file_list - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=lab_dim, min_value=0.01, max_value=0.99, exclude_columns=[cfg.index_to_project]) - # use only training data to find min-max information, then apply on the whole dataset - min_max_normaliser.find_min_max_values( - nn_label_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_label_file_list, nn_label_norm_file_list) - - if min_max_normaliser != None: - # save label normalisation information for unseen testing labels - label_min_vector = min_max_normaliser.min_vector - label_max_vector = min_max_normaliser.max_vector - label_norm_info = numpy.concatenate( - (label_min_vector, label_max_vector), axis=0) - - label_norm_info = numpy.array(label_norm_info, 'float32') - fid = open(label_norm_file, 'wb') - label_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (label_min_vector.size, label_norm_file)) - - # make output acoustic data - if cfg.MAKECMP: - logger.info('creating acoustic (output) features') - delta_win = [-0.5, 0.0, 0.5] - acc_win = [1.0, -2.0, 1.0] - - acoustic_worker = AcousticComposition( - delta_win=delta_win, acc_win=acc_win) - acoustic_worker.prepare_nn_data( - in_file_list_dict, nn_cmp_file_list, cfg.in_dimension_dict, cfg.out_dimension_dict) - - if cfg.remove_silence_using_binary_labels: - # do this to get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - silence_feature = 0 # use first feature in label -- hardcoded for now - logger.info('Silence removal from CMP using binary label file') - - # overwrite the untrimmed audio with the trimmed version: - trim_silence(nn_cmp_file_list, nn_cmp_file_list, cfg.cmp_dim, - binary_label_file_list, lab_dim, silence_feature, percent_to_keep=5) - - else: # back off to previous method using HTS labels: - remover = SilenceRemover( - n_cmp=cfg.cmp_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - nn_cmp_file_list, in_label_align_file_list, nn_cmp_file_list) # save to itself - - # save acoustic normalisation information for normalising the features back - var_dir = os.path.join(data_dir, 'var') - if not os.path.exists(var_dir): - os.makedirs(var_dir) - - var_file_dict = {} - for feature_name in list(cfg.out_dimension_dict.keys()): - var_file_dict[feature_name] = os.path.join( - var_dir, feature_name + '_' + str(cfg.out_dimension_dict[feature_name])) - - # normalise output acoustic data - if cfg.NORMCMP: - logger.info('normalising acoustic (output) features using method %s' % - cfg.output_feature_normalisation) - cmp_norm_info = None - if cfg.output_feature_normalisation == 'MVN': - normaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - # calculate mean and std vectors on the training data, and apply on the whole dataset - global_mean_vector = normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number], 0, cfg.cmp_dim) - global_std_vector = normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector, 0, cfg.cmp_dim) - - normaliser.feature_normalisation( - nn_cmp_file_list, nn_cmp_norm_file_list) - cmp_norm_info = numpy.concatenate( - (global_mean_vector, global_std_vector), axis=0) - - elif cfg.output_feature_normalisation == 'MINMAX': - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim) - global_mean_vector = min_max_normaliser.compute_mean( - nn_cmp_file_list[0:cfg.train_file_number]) - global_std_vector = min_max_normaliser.compute_std( - nn_cmp_file_list[0:cfg.train_file_number], global_mean_vector) - - min_max_normaliser = MinMaxNormalisation( - feature_dimension=cfg.cmp_dim, min_value=0.01, max_value=0.99) - min_max_normaliser.find_min_max_values( - nn_cmp_file_list[0:cfg.train_file_number]) - min_max_normaliser.normalise_data( - nn_cmp_file_list, nn_cmp_norm_file_list) - - cmp_min_vector = min_max_normaliser.min_vector - cmp_max_vector = min_max_normaliser.max_vector - cmp_norm_info = numpy.concatenate( - (cmp_min_vector, cmp_max_vector), axis=0) - - else: - logger.critical('Normalisation type %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - cmp_norm_info = numpy.array(cmp_norm_info, 'float32') - fid = open(norm_info_file, 'wb') - cmp_norm_info.tofile(fid) - fid.close() - logger.info('saved %s vectors to %s' % - (cfg.output_feature_normalisation, norm_info_file)) - # logger.debug(' value was\n%s' % cmp_norm_info) - - feature_index = 0 - for feature_name in list(cfg.out_dimension_dict.keys()): - feature_std_vector = numpy.array( - global_std_vector[:, feature_index:feature_index+cfg.out_dimension_dict[feature_name]], 'float32') - - fid = open(var_file_dict[feature_name], 'w') - feature_std_vector.tofile(fid) - fid.close() - - logger.info('saved %s variance vector to %s' % - (feature_name, var_file_dict[feature_name])) - # logger.debug(' value was\n%s' % feature_std_vector) - - feature_index += cfg.out_dimension_dict[feature_name] - - train_x_file_list = nn_label_norm_file_list[0:cfg.train_file_number] - train_y_file_list = nn_cmp_norm_file_list[0:cfg.train_file_number] - valid_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - valid_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - test_y_file_list = nn_cmp_norm_file_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - # we need to know the label dimension before training the DNN - # computing that requires us to look at the labels - # - # currently, there are two ways to do this - if cfg.label_style == 'HTS': - label_normaliser = HTSLabelNormalisation( - question_file_name=cfg.question_file_name) - lab_dim = label_normaliser.dimension - - elif cfg.label_style == 'composed': - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - logger.info('label dimension is %d' % lab_dim) - - combined_model_arch = str(len(hidden_layers_sizes)) - for hid_size in hidden_layers_sizes: - combined_model_arch += '_' + str(hid_size) - -# nnets_file_name = '%s/%s_%s_%d.%d.%d.%d.%d.train.%d.model' \ -# %(model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), -# len(hidden_layers_sizes), hidden_layers_sizes[0], -# lab_dim, cfg.cmp_dim, cfg.train_file_number) - - nnets_file_name = '%s/%s_%s_%d_%s_%d.%d.train.%d.model' \ - % (model_dir, cfg.model_type, cfg.combined_feature_name, int(cfg.multistream_switch), - combined_model_arch, lab_dim, cfg.cmp_dim, cfg.train_file_number) - - # DNN model training - if cfg.TRAINDNN: - - logger.info('training DNN') - - try: - os.makedirs(model_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create model directory %s' % model_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - try: - if cfg.scheme == 'stagwise': - train_basic_DNN(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot) - train_DNN_with_projections(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot) - infer_projections(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot) - elif cfg.scheme == 'simultaneous': - train_DNN_and_traindev_projections(train_xy_file_list=(train_x_file_list, train_y_file_list), - valid_xy_file_list=( - valid_x_file_list, valid_y_file_list), - nnets_file_name=nnets_file_name, - n_ins=lab_dim, n_outs=cfg.cmp_dim, ms_outs=cfg.multistream_outs, - hyper_params=cfg.hyper_params, buffer_size=cfg.buffer_size, plot=cfg.plot) - else: - sys.exit('unknown scheme!') -# train_DNN(train_xy_file_list = (train_x_file_list, train_y_file_list), \ -# valid_xy_file_list = (valid_x_file_list, valid_y_file_list), \ -# nnets_file_name = nnets_file_name, \ -# n_ins = lab_dim, n_outs = cfg.cmp_dim, ms_outs = cfg.multistream_outs, \ -# hyper_params = cfg.hyper_params, buffer_size = cfg.buffer_size, plot = cfg.plot) -# infer_projections(train_xy_file_list = (train_x_file_list, train_y_file_list), \ -# valid_xy_file_list = (valid_x_file_list, valid_y_file_list), \ -# nnets_file_name = nnets_file_name, \ -# n_ins = lab_dim, n_outs = cfg.cmp_dim, ms_outs = cfg.multistream_outs, \ -# hyper_params = cfg.hyper_params, buffer_size = cfg.buffer_size, plot = cfg.plot) - - except KeyboardInterrupt: - logger.critical('train_DNN interrupted via keyboard') - # Could 'raise' the exception further, but that causes a deep traceback to be printed - # which we don't care about for a keyboard interrupt. So, just bail out immediately - sys.exit(1) - except: - logger.critical('train_DNN threw an exception') - raise - - # generate parameters from DNN (with random token reps and inferred ones -- NOTOKENS & TOKENS) - temp_dir_name_NOTOKENS = '%s_%s_%d_%d_%d_%d_%d_%d_NOTOKENS' \ - % (cfg.model_type, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layers_sizes), hidden_layers_sizes[0]) - gen_dir_NOTOKENS = os.path.join(gen_dir, temp_dir_name_NOTOKENS) - - temp_dir_name_TOKENS = '%s_%s_%d_%d_%d_%d_%d_%d_TOKENS' \ - % (cfg.model_type, cfg.combined_feature_name, int(cfg.do_post_filtering), - cfg.train_file_number, lab_dim, cfg.cmp_dim, - len(hidden_layers_sizes), hidden_layers_sizes[0]) - gen_dir_TOKENS = os.path.join(gen_dir, temp_dir_name_TOKENS) - - gen_file_id_list = file_id_list[cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number] - test_x_file_list = nn_label_norm_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.DNNGEN: - logger.info('generating from DNN') - - try: - os.makedirs(gen_dir) - except OSError as e: - if e.errno == errno.EEXIST: - # not an error - just means directory already exists - pass - else: - logger.critical( - 'Failed to create generation directory %s' % gen_dir) - logger.critical(' OS error was: %s' % e.strerror) - raise - - # Without words embeddings: - gen_file_list_NOTOKENS = prepare_file_path_list( - gen_file_id_list, gen_dir_NOTOKENS, cfg.cmp_ext) - dnn_generation(test_x_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, - gen_file_list_NOTOKENS, cfg=cfg, use_word_projections=False) - - # With word embeddings: - gen_file_list_TOKENS = prepare_file_path_list( - gen_file_id_list, gen_dir_TOKENS, cfg.cmp_ext) - dnn_generation(test_x_file_list, nnets_file_name, lab_dim, cfg.cmp_dim, - gen_file_list_TOKENS, cfg=cfg, use_word_projections=True) - - logger.debug('denormalising generated output using method %s' % - cfg.output_feature_normalisation) - - for gen_file_list in [gen_file_list_NOTOKENS, gen_file_list_TOKENS]: - - fid = open(norm_info_file, 'rb') - cmp_min_max = numpy.fromfile(fid, dtype=numpy.float32) - fid.close() - cmp_min_max = cmp_min_max.reshape((2, -1)) - cmp_min_vector = cmp_min_max[0, ] - cmp_max_vector = cmp_min_max[1, ] - - if cfg.output_feature_normalisation == 'MVN': - denormaliser = MeanVarianceNorm(feature_dimension=cfg.cmp_dim) - denormaliser.feature_denormalisation( - gen_file_list, gen_file_list, cmp_min_vector, cmp_max_vector) - - elif cfg.output_feature_normalisation == 'MINMAX': - denormaliser = MinMaxNormalisation( - cfg.cmp_dim, min_value=0.01, max_value=0.99, min_vector=cmp_min_vector, max_vector=cmp_max_vector) - denormaliser.denormalise_data(gen_file_list, gen_file_list) - else: - logger.critical('denormalising method %s is not supported!\n' % ( - cfg.output_feature_normalisation)) - raise - - # perform MLPG to smooth parameter trajectory - # lf0 is included, the output features much have vuv. - generator = ParameterGeneration( - gen_wav_features=cfg.gen_wav_features) - generator.acoustic_decomposition( - gen_file_list, cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict, var_file_dict) - - # osw: skip MLPG: -# split_cmp(gen_file_list, ['mgc', 'lf0', 'bap'], cfg.cmp_dim, cfg.out_dimension_dict, cfg.file_extension_dict) - - # generate wav - if cfg.GENWAV: - logger.info('reconstructing waveform(s)') - for gen_dir in [gen_dir_NOTOKENS, gen_dir_TOKENS]: - generate_wav(gen_dir, gen_file_id_list, cfg) # generated speech - # generate_wav(nn_cmp_dir, gen_file_id_list) # reference copy synthesis speech - - # evaluation: calculate distortion - if cfg.CALMCD: - logger.info('calculating MCD') - - ref_data_dir = os.path.join(data_dir, 'ref_data') - - ref_mgc_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.mgc_ext) - ref_bap_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.bap_ext) - ref_lf0_list = prepare_file_path_list( - gen_file_id_list, ref_data_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - calculator = IndividualDistortionComp() - - spectral_distortion = 0.0 - bap_mse = 0.0 - f0_mse = 0.0 - vuv_error = 0.0 - - valid_file_id_list = file_id_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number] - test_file_id_list = file_id_list[cfg.train_file_number + - cfg.valid_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if cfg.remove_silence_using_binary_labels: - # get lab_dim: - label_composer = LabelComposer() - label_composer.load_label_configuration(cfg.label_config_file) - lab_dim = label_composer.compute_label_dimension() - - # use first feature in label -- hardcoded for now - silence_feature = 0 - - # Use these to trim silence: - untrimmed_test_labels = binary_label_file_list[cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - - if 'mgc' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['mgc'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_mgc_list, cfg.mgc_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['mgc'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_mgc_list) - valid_spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - test_spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.mgc_ext, cfg.mgc_dim) - # MCD - valid_spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - test_spectral_distortion *= (10 / - numpy.log(10)) * numpy.sqrt(2.0) # MCD - - if 'bap' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['bap'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_bap_list, cfg.bap_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['bap'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_bap_list) - valid_bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - test_bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.bap_ext, cfg.bap_dim) - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - valid_bap_mse = valid_bap_mse / 10.0 - # Cassia's bap is computed from 10*log|S(w)|. if use HTS/SPTK style, do the same as MGC - test_bap_mse = test_bap_mse / 10.0 - - if 'lf0' in cfg.in_dimension_dict: - if cfg.remove_silence_using_binary_labels: - untrimmed_reference_data = in_file_list_dict['lf0'][cfg.train_file_number: - cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - trim_silence(untrimmed_reference_data, ref_lf0_list, cfg.lf0_dim, - untrimmed_test_labels, lab_dim, silence_feature) - else: - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=['*-#+*']) - remover.remove_silence(in_file_list_dict['lf0'][cfg.train_file_number:cfg.train_file_number + - cfg.valid_file_number+cfg.test_file_number], in_gen_label_align_file_list, ref_lf0_list) - valid_f0_mse, valid_vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - test_f0_mse, test_vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, gen_dir, cfg.lf0_ext, cfg.lf0_dim) - - logger.info('Develop: DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (valid_spectral_distortion, valid_bap_mse, valid_f0_mse, valid_vuv_error*100.)) - logger.info('Test : DNN -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' - % (test_spectral_distortion, test_bap_mse, test_f0_mse, test_vuv_error*100.)) - - # this can be removed - # - if 0: # to calculate distortion of HMM baseline - hmm_gen_no_silence_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nick_hmm_pf_2400_no_silence' - hmm_gen_dir = '/afs/inf.ed.ac.uk/group/project/dnn_tts/data/nick/nick_hmm_pf_2400' - - if 1: - hmm_mgc_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.mgc_ext) - hmm_bap_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.bap_ext) - hmm_lf0_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_dir, cfg.lf0_ext) - - hmm_mgc_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.mgc_ext) - hmm_bap_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.bap_ext) - hmm_lf0_no_silence_list = prepare_file_path_list( - gen_file_id_list, hmm_gen_no_silence_dir, cfg.lf0_ext) - - in_gen_label_align_file_list = in_label_align_file_list[ - cfg.train_file_number:cfg.train_file_number+cfg.valid_file_number+cfg.test_file_number] - remover = SilenceRemover( - n_cmp=cfg.mgc_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_mgc_list, in_gen_label_align_file_list, hmm_mgc_no_silence_list) - - remover = SilenceRemover( - n_cmp=cfg.bap_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_bap_list, in_gen_label_align_file_list, hmm_bap_no_silence_list) - - remover = SilenceRemover( - n_cmp=cfg.lf0_dim, silence_pattern=['*-#+*']) - remover.remove_silence( - hmm_lf0_list, in_gen_label_align_file_list, hmm_lf0_no_silence_list) - - calculator = IndividualDistortionComp() - - spectral_distortion = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.mgc_ext, cfg.mgc_dim) - bap_mse = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.bap_ext, cfg.bap_dim) - f0_mse, vuv_error = calculator.compute_distortion( - valid_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.lf0_ext, cfg.lf0_dim) - - spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - bap_mse = bap_mse / 10.0 - - logger.info('Develop: HMM -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' % - (spectral_distortion, bap_mse, f0_mse, vuv_error*100.)) - - spectral_distortion = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.mgc_ext, cfg.mgc_dim) - bap_mse = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.bap_ext, cfg.bap_dim) - f0_mse, vuv_error = calculator.compute_distortion( - test_file_id_list, ref_data_dir, hmm_gen_no_silence_dir, cfg.lf0_ext, cfg.lf0_dim) - - spectral_distortion *= (10 / numpy.log(10)) * numpy.sqrt(2.0) - bap_mse = bap_mse / 10.0 - - logger.info('Test : HMM -- MCD: %.3f dB; BAP: %.3f dB; F0: %.3f Hz; VUV: %.3f%%' % - (spectral_distortion, bap_mse, f0_mse, vuv_error*100.)) - - -if __name__ == '__main__': - - # these things should be done even before trying to parse the command line - - # create a configuration instance - # and get a short name for this instance - cfg = configuration.cfg - - # set up logging to use our custom class - logging.setLoggerClass(LoggerPlotter) - - # get a logger for this main function - logger = logging.getLogger("main") - - if len(sys.argv) != 2: - logger.critical('usage: run_dnn.sh [config file name]') - sys.exit(1) - - config_file = sys.argv[1] - - config_file = os.path.abspath(config_file) - cfg.configure(config_file) - - if cfg.profile: - logger.info('profiling is activated') - import cProfile - import pstats - cProfile.run('main_function(cfg)', 'mainstats') - - # create a stream for the profiler to write to - profiling_output = io.StringIO() - p = pstats.Stats('mainstats', stream=profiling_output) - - # print stats to that stream - # here we just report the top 10 functions, sorted by total amount of time spent in each - p.strip_dirs().sort_stats('tottime').print_stats(10) - - # print the result to the log - logger.info('---Profiling result follows---\n%s' % - profiling_output.getvalue()) - profiling_output.close() - logger.info('---End of profiling result---') - - else: - main_function(cfg) - - sys.exit(0) -#!/usr/bin/python - -""" -A simple discretionary locking system for /dev/nvidia devices. - -Iain Murray, November 2009, January 2010, January 2011. -""" - -import os -import os.path - -_dev_prefix = '/dev/nvidia' -#URL = 'http://www.cs.toronto.edu/~murray/code/gpu_monitoring/' -URL = 'http://homepages.inf.ed.ac.uk/imurray2/code/gpu_monitoring/' - - -# Get ID's of NVIDIA boards. Should do this through a CUDA call, but this is -# a quick and dirty way that works for now: -def board_ids(): - """Returns integer board ids available on this machine.""" - from glob import glob - board_devs = glob(_dev_prefix + '[0-9]*') - return list(range(len(board_devs))) - - -def _lock_file(id): - """lock file from integer id""" - # /tmp is cleared on reboot on many systems, but it doesn't have to be - if os.path.exists('/dev/shm'): - # /dev/shm on linux machines is a RAM disk, so is definitely cleared - return '/dev/shm/gpu_lock_%d' % id - else: - return '/tmp/gpu_lock_%d' % id - - -def owner_of_lock(id): - """Username that has locked the device id. (Empty string if no lock).""" - import pwd - try: - statinfo = os.lstat(_lock_file(id)) - return pwd.getpwuid(statinfo.st_uid).pw_name - except: - return "" - - -def _obtain_lock(id): - """Attempts to lock id, returning success as True/False.""" -# print id - try: - # On POSIX systems symlink creation is atomic, so this should be a - # robust locking operation: - os.symlink('/dev/null', _lock_file(id)) - return True - except: - return False - - -def _launch_reaper(id, pid): - """Start a process that will free a lock when process pid terminates""" - from subprocess import Popen, PIPE - me = __file__ - if me.endswith('.pyc'): - me = me[:-1] - myloc = os.path.dirname(me) - if not myloc: - myloc = os.getcwd() - reaper_cmd = os.path.join(myloc, 'run_on_me_or_pid_quit') - Popen([reaper_cmd, str(pid), me, '--free', str(id)], - stdout=open('/dev/null', 'w')) - - -def obtain_lock_id(pid=None): - """ - Finds a free id, locks it and returns integer id, or -1 if none free. - - A process is spawned that will free the lock automatically when the - process pid (by default the current python process) terminates. - """ - id = -1 - id = obtain_lock_id_to_hog() - try: - if id >= 0: - if pid is None: - pid = os.getpid() - _launch_reaper(id, pid) - except: - free_lock(id) - id = -1 - return id - - -def obtain_lock_id_to_hog(): - """ - Finds a free id, locks it and returns integer id, or -1 if none free. - - * Lock must be freed manually * - """ - for id in board_ids(): - if _obtain_lock(id): - return id - return -1 - - -def free_lock(id): - """Attempts to free lock id, returning success as True/False.""" - try: - filename = _lock_file(id) - # On POSIX systems os.rename is an atomic operation, so this is the safe - # way to delete a lock: - os.rename(filename, filename + '.redundant') - os.remove(filename + '.redundant') - return True - except: - return False - - -# If run as a program: -if __name__ == "__main__": - import sys - me = sys.argv[0] - # Report - if '--id' in sys.argv: - if len(sys.argv) > 2: - try: - pid = int(sys.argv[2]) - print(pid, sys.argv[2]) - assert(os.path.exists('/proc/%d' % pid)) - except: - print('Usage: %s --id [pid_to_wait_on]' % me) - print('The optional process id must exist if specified.') - print('Otherwise the id of the parent process is used.') - sys.exit(1) - else: - pid = os.getppid() - print(pid) - print(obtain_lock_id(pid)) - elif '--id-to-hog' in sys.argv: - print(obtain_lock_id_to_hog()) - elif '--free' in sys.argv: - try: - id = int(sys.argv[2]) - except: - print('Usage: %s --free ' % me) - sys.exit(1) - if free_lock(id): - print("Lock freed") - else: - owner = owner_of_lock(id) - if owner: - print("Failed to free lock id=%d owned by %s" % (id, owner)) - else: - print("Failed to free lock, but it wasn't actually set?") - else: - print('\n Usage instructions:\n') - print(' To obtain and lock an id: %s --id' % me) - print(' The lock is automatically freed when the parent terminates') - print() - print(" To get an id that won't be freed: %s --id-to-hog" % me) - print(" You *must* manually free these ids: %s --free \n" % me) - print(' More info: %s\n' % URL) - div = ' ' + "-"*60 - print('\n' + div) - print(" NVIDIA board users:") - print(div) - for id in board_ids(): - print(" Board %d: %s" % (id, owner_of_lock(id))) - print(div + '\n') -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" -@author: Felipe Espic - -DESCRIPTION: -As Merlin works at a constant frame rate, but MagPhase runs at a variable frame -rate, it is needed to trick Merlin by warping the time durations in the label files. -This script converts the original constant-frame-rate state aligned labels to -variable-frame-rate labels, thus compensating for the frame rate missmatch. -This script acts as a workaround, so it should be removed when Merlin natively -support variable frame rates. - -USE: -pyhthon - -NOTES: -1.- This script needs ".shift" files extracted by MagPhase, even though they are not - used for acoustic modelling (only .mag, .real, .imag, and .lf0 files are used in training/synthesis). - -2.- The file crashlist_file stores the list of utterances that were not possible - to convert. This could happen if for example some phonemes had no frames assigned. - It rarelly occurs. -""" - -import libaudio as la -import magphase as mp -import libutils as lu -import sys -import os - -this_dir = os.path.dirname(__file__) -sys.path.append(os.path.realpath(this_dir + '/../../../../tools/magphase/src')) - - -def convert(file_id_list, in_lab_dir, in_feats_dir, fs, out_lab_dir, b_prevent_zeros=False): - ''' - b_prevent_zeros: True if you want to ensure that all the phonemes have one frame at least. - (not recommended, only useful when there are too many utterances crashed) - ''' - - # Conversion: - lu.mkdir(out_lab_dir) - v_filenames = lu.read_text_file2( - file_id_list, dtype='string', comments='#') - - crashlist_file = lu.ins_pid('crash_file_list.scp') - for filename in v_filenames: - - # Display: - print('\nConverting lab file: ' + filename + - '................................') - - # Current i/o files: - in_lab_file = os.path.join(in_lab_dir, filename + '.lab') - out_lab_file = os.path.join(out_lab_dir, filename + '.lab') - - in_shift_file = os.path.join(in_feats_dir, filename + '.shift') - - # Debug: - ''' - v_shift = lu.read_binfile(in_shift_file, dim=1) - v_n_frms = mp.get_num_of_frms_per_state(v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros) - la.convert_label_state_align_to_var_frame_rate(in_lab_file, v_n_frms, out_lab_file) - #''' - - try: - v_shift = lu.read_binfile(in_shift_file, dim=1) - v_n_frms = mp.get_num_of_frms_per_state( - v_shift, in_lab_file, fs, b_prevent_zeros=b_prevent_zeros) - - la.convert_label_state_align_to_var_frame_rate( - in_lab_file, v_n_frms, out_lab_file) - - except (KeyboardInterrupt, SystemExit): - raise - - except: - with open(crashlist_file, "a") as crashlistlog: - crashlistlog.write(filename + '\n') - - print('Done!') - - -if __name__ == '__main__': - - # Parsing input arg: - file_id_list = sys.argv[1] - in_lab_dir = sys.argv[2] - in_feats_dir = sys.argv[3] - fs = int(sys.argv[4]) - out_lab_dir = sys.argv[5] - - convert(file_id_list, in_lab_dir, in_feats_dir, - fs, out_lab_dir, b_prevent_zeros=False) - - -import numpy - - -class BinaryIOCollection(object): - - def load_binary_file(self, file_name, dimension): - fid_lab = open(file_name, 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - assert features.size % float( - dimension) == 0.0, 'specified dimension not compatible with data' - features = features[:(dimension * (features.size // dimension))] - features = features.reshape((-1, dimension)) - - return features - - def array_to_binary_file(self, data, output_file_name): - data = numpy.array(data, 'float32') - - fid = open(output_file_name, 'wb') - data.tofile(fid) - fid.close() - - def load_binary_file_frame(self, file_name, dimension): - fid_lab = open(file_name, 'rb') - features = numpy.fromfile(fid_lab, dtype=numpy.float32) - fid_lab.close() - assert features.size % float( - dimension) == 0.0, 'specified dimension not compatible with data' - frame_number = features.size // dimension - features = features[:(dimension * frame_number)] - features = features.reshape((-1, dimension)) - - return features, frame_number -import os -import sys -import time - -from sys import argv, stderr -from subprocess import check_call, Popen, CalledProcessError, PIPE -from mean_variance_norm import MeanVarianceNorm - -# string constants for various shell calls -STATE_NUM = 5 -F = str(0.01) -SFAC = str(5.0) -PRUNING = [str(i) for i in (250., 150., 2000.)] - -MACROS = 'macros' -HMMDEFS = 'hmmdefs' -VFLOORS = 'vFloors' - -## -HTKDIR = path/to/tools/htk -HCompV = os.path.join(HTKDIR, 'HCompV') -HCopy = os.path.join(HTKDIR, 'HCopy') -HERest = os.path.join(HTKDIR, 'HERest') -HHEd = os.path.join(HTKDIR, 'HHEd') -HVite = os.path.join(HTKDIR, 'HVite') - - -class ForcedAlignment(object): - - def __init__(self): - self.proto = None - self.phoneme_mlf = None - - def _make_proto(self): - # make proto - fid = open(self.proto, 'w') - means = ' '.join(['0.0' for _ in range(39)]) - varg = ' '.join(['1.0' for _ in range(39)]) - fid.write("""~o 39 -~h "proto" - - 7 -""") - for i in range(2, STATE_NUM+2): - fid.write(' {0}\n 39\n{1}\n'.format(i, means)) - fid.write(' 39\n{0}\n'.format(varg)) - fid.write(""" 7 - 0.0 1.0 0.0 0.0 0.0 0.0 0.0 - 0.0 0.6 0.4 0.0 0.0 0.0 0.0 - 0.0 0.0 0.6 0.4 0.0 0.0 0.0 - 0.0 0.0 0.0 0.6 0.4 0.0 0.0 - 0.0 0.0 0.0 0.0 0.6 0.4 0.0 - 0.0 0.0 0.0 0.0 0.0 0.7 0.3 - 0.0 0.0 0.0 0.0 0.0 0.0 0.0 - -""") - fid.close() - - # make vFloors - check_call([HCompV, '-f', F, '-C', self.cfg, - '-S', self.train_scp, - '-M', self.cur_dir, self.proto]) - # make local macro - # get first three lines from local proto - fid = open(os.path.join(self.cur_dir, MACROS), 'w') - source = open(os.path.join(self.cur_dir, - os.path.split(self.proto)[1]), 'r') - for _ in range(3): - fid.write(source.readline()) - source.close() - # get remaining lines from vFloors - fid.writelines(open(os.path.join(self.cur_dir, - VFLOORS), 'r').readlines()) - fid.close() - # make hmmdefs - fid = open(os.path.join(self.cur_dir, HMMDEFS), 'w') - for phone in open(self.phonemes, 'r'): - source = open(self.proto, 'r') - # ignore - source.readline() - source.readline() - # the header - fid.write('~h "{0}"\n'.format(phone.rstrip())) - # the rest - fid.writelines(source.readlines()) - source.close() - fid.close() - - def _read_file_list(self, file_name): - - file_lists = [] - fid = open(file_name) - for line in fid.readlines(): - line = line.strip() - if len(line) < 1: - continue - file_lists.append(line) - fid.close() - - return file_lists - - def _full_to_mono(self, full_file_name, mono_file_name, phoneme_dict): - fre = open(full_file_name, 'r') - fwe = open(mono_file_name, 'w') - for line in fre.readlines(): - line = line.strip() - if len(line) < 1: - continue - tmp_list = line.split('-') - tmp_list = tmp_list[1].split('+') - mono_phone = tmp_list[0] - fwe.write('{0}\n'.format(mono_phone)) - if mono_phone not in phoneme_dict: - phoneme_dict[mono_phone] = 1 - phoneme_dict[mono_phone] += 1 - fwe.close() - fre.close() - - def _check_data(self, file_id_list, multiple_speaker): - - copy_scp = open(self.copy_scp, 'w') - check_scp = open(self.train_scp, 'w') - i = 0 - - phoneme_dict = {} - speaker_utt_dict = {} - - for file_id in file_id_list: - wav_file = os.path.join(self.wav_dir, file_id + '.wav') - lab_file = os.path.join(self.lab_dir, file_id + '.lab') - mfc_file = os.path.join(self.mfc_dir, file_id + '.mfc') - mono_lab_file = os.path.join(self.mono_lab_dir, file_id + '.lab') - - mfc_sub_dir = os.path.dirname(mfc_file) - if os.path.exists(wav_file) and os.path.exists(lab_file): - if not os.path.exists(mfc_sub_dir): - os.makedirs(mfc_sub_dir) - - copy_scp.write('{0} {1}\n'.format(wav_file, mfc_file)) - check_scp.write('{0}\n'.format(mfc_file)) - - if multiple_speaker: - tmp_list = file_id.split('/') - speaker_name = tmp_list[0] - if speaker_name not in speaker_utt_dict: - speaker_utt_dict[speaker_name] = [] - speaker_utt_dict[speaker_name].append(mfc_file) - else: - if 'only_one' not in speaker_utt_dict: - speaker_utt_dict['only_one'] = [] - speaker_utt_dict['only_one'].append(mfc_file) - - self._full_to_mono(lab_file, mono_lab_file, phoneme_dict) - copy_scp.close() - check_scp.close() - - fid = open(self.phonemes, 'w') - fmap = open(self.phoneme_map, 'w') - for phoneme in list(phoneme_dict.keys()): - fid.write('{0}\n'.format(phoneme)) - fmap.write('{0} {0}\n'.format(phoneme)) - fmap.close() - fid.close() - - self.phoneme_mlf = os.path.join(self.cfg_dir, 'mono_phone.mlf') - fid = open(self.phoneme_mlf, 'w') - fid.write('#!MLF!#\n') - fid.write('"*/*.lab" -> "' + self.mono_lab_dir + '"\n') - fid.close() - - return speaker_utt_dict - - def _HCopy(self): - """ - Compute MFCCs - """ - # write a CFG for extracting MFCCs - open(self.cfg, 'w').write("""SOURCEKIND = WAVEFORM -SOURCEFORMAT = WAVE -TARGETRATE = 50000.0 -TARGETKIND = MFCC_D_A_0 -WINDOWSIZE = 250000.0 -PREEMCOEF = 0.97 -USEHAMMING = T -ENORMALIZE = T -CEPLIFTER = 22 -NUMCHANS = 20 -NUMCEPS = 12 -""") - check_call([HCopy, '-C', self.cfg, '-S', self.copy_scp]) - # write a CFG for what we just built - open(self.cfg, 'w').write("""TARGETRATE = 50000.0 -TARGETKIND = USER -WINDOWSIZE = 250000.0 -PREEMCOEF = 0.97 -USEHAMMING = T -ENORMALIZE = T -CEPLIFTER = 22 -NUMCHANS = 20 -NUMCEPS = 12 -""") - - def _nxt_dir(self): - """ - Get the next HMM directory - """ - # pass on the previously new one to the old one - self.cur_dir = self.nxt_dir - # increment - self.n += 1 - # compute the path for the new one - self.nxt_dir = os.path.join(self.hmm_dir, str(self.n).zfill(3)) - # make the new directory - os.mkdir(self.nxt_dir) - - def prepare_training(self, file_id_list_name, wav_dir, lab_dir, work_dir, multiple_speaker): - - print('---preparing enverionment') - self.cfg_dir = os.path.join(work_dir, 'config') - self.model_dir = os.path.join(work_dir, 'model') - self.cur_dir = os.path.join(self.model_dir, 'hmm0') - if not os.path.exists(self.cfg_dir): - os.makedirs(self.cfg_dir) - if not os.path.exists(self.cur_dir): - os.makedirs(self.cur_dir) - - self.phonemes = os.path.join(work_dir, 'mono_phone.list') - self.phoneme_map = os.path.join(work_dir, 'phoneme_map.dict') - # HMMs - self.proto = os.path.join(self.cfg_dir, 'proto') - # SCP files - self.copy_scp = os.path.join(self.cfg_dir, 'copy.scp') - self.test_scp = os.path.join(self.cfg_dir, 'test.scp') - self.train_scp = os.path.join(self.cfg_dir, 'train.scp') - # CFG - self.cfg = os.path.join(self.cfg_dir, 'cfg') - - self.wav_dir = wav_dir - self.lab_dir = lab_dir - self.mfc_dir = os.path.join(work_dir, 'mfc') - if not os.path.exists(self.mfc_dir): - os.makedirs(self.mfc_dir) - - self.mono_lab_dir = os.path.join(work_dir, 'mono_no_align') - if not os.path.exists(self.mono_lab_dir): - os.makedirs(self.mono_lab_dir) - - file_id_list = self._read_file_list(file_id_list_name) - print('---checking data') - speaker_utt_dict = self._check_data(file_id_list, multiple_speaker) - - print('---extracting features') - self._HCopy() - print(time.strftime("%c")) - - print('---feature_normalisation') - normaliser = MeanVarianceNorm(39) - for key_name in list(speaker_utt_dict.keys()): - normaliser.feature_normalisation( - speaker_utt_dict[key_name], speaker_utt_dict[key_name]) # save to itself - print(time.strftime("%c")) - - print('---making proto') - self._make_proto() - - def train_hmm(self, niter, num_mix): - """ - Perform one or more rounds of estimation - """ - - print(time.strftime("%c")) - print('---training HMM models') - done = 0 - mix = 1 - while mix <= num_mix and done == 0: - for i in range(niter): - next_dir = os.path.join( - self.model_dir, 'hmm_mix_' + str(mix) + '_iter_' + str(i+1)) - if not os.path.exists(next_dir): - os.makedirs(next_dir) - check_call([HERest, '-C', self.cfg, '-S', self.train_scp, - '-I', self.phoneme_mlf, - '-M', next_dir, - '-H', os.path.join(self.cur_dir, MACROS), - '-H', os.path.join(self.cur_dir, HMMDEFS), - '-t'] + PRUNING + [self.phonemes], - stdout=PIPE) - self.cur_dir = next_dir - - if mix * 2 <= num_mix: - # increase mixture number - hed_file = os.path.join( - self.cfg_dir, 'mix_' + str(mix * 2) + '.hed') - fid = open(hed_file, 'w') - fid.write('MU ' + str(mix * 2) + - ' {*.state[2-'+str(STATE_NUM+2)+'].mix}\n') - fid.close() - - next_dir = os.path.join( - self.model_dir, 'hmm_mix_' + str(mix * 2) + '_iter_0') - if not os.path.exists(next_dir): - os.makedirs(next_dir) - - check_call([HHEd, '-A', - '-H', os.path.join(self.cur_dir, MACROS), - '-H', os.path.join(self.cur_dir, HMMDEFS), - '-M', next_dir] + [hed_file] + [self.phonemes]) - - self.cur_dir = next_dir - mix = mix * 2 - else: - done = 1 - - def align(self, work_dir, lab_align_dir): - """ - Align using the models in self.cur_dir and MLF to path - """ - print('---aligning data') - print(time.strftime("%c")) - self.align_mlf = os.path.join(work_dir, 'mono_align.mlf') - - check_call([HVite, '-a', '-f', '-m', '-y', 'lab', '-o', 'SM', - '-i', self.align_mlf, '-L', self.mono_lab_dir, - '-C', self.cfg, '-S', self.train_scp, - '-H', os.path.join(self.cur_dir, MACROS), - '-H', os.path.join(self.cur_dir, HMMDEFS), - '-I', self.phoneme_mlf, '-t'] + PRUNING + - ['-s', SFAC, self.phoneme_map, self.phonemes]) - - self._postprocess(self.align_mlf, lab_align_dir) - - def _postprocess(self, mlf, lab_align_dir): - if not os.path.exists(lab_align_dir): - os.makedirs(lab_align_dir) - - state_num = STATE_NUM - fid = open(mlf, 'r') - line = fid.readline() - while True: - line = fid.readline() - line = line.strip() - if len(line) < 1: - break - line = line.replace('"', '') - file_base = os.path.basename(line) - flab = open(os.path.join(self.lab_dir, file_base), 'r') - fw = open(os.path.join(lab_align_dir, file_base), 'w') - for full_lab in flab.readlines(): - full_lab = full_lab.strip() - for i in range(state_num): - line = fid.readline() - line = line.strip() - tmp_list = line.split() - fw.write('{0} {1} {2}[{3}]\n'.format( - tmp_list[0], tmp_list[1], full_lab, i+2)) - - fw.close() - flab.close() - line = fid.readline() - line = line.strip() - if line != '.': - print('The two files are not matched!\n') - sys.exit(1) - fid.close() - - -if __name__ == '__main__': - - work_dir = os.getcwd() - - wav_dir = os.path.join(work_dir, 'slt_wav') - lab_dir = os.path.join(work_dir, 'label_no_align') - lab_align_dir = os.path.join(work_dir, 'label_state_align') - - file_id_list_name = os.path.join(work_dir, 'file_id_list.scp') - - # if multiple_speaker is tuned on. the file_id_list.scp has to reflact this - # for example - # speaker_1/0001 - # speaker_2/0001 - # This is to do speaker-dependent normalisation - multiple_speaker = False - - aligner = ForcedAlignment() - aligner.prepare_training(file_id_list_name, wav_dir, - lab_dir, work_dir, multiple_speaker) - - aligner.train_hmm(7, 32) - aligner.align(work_dir, lab_align_dir) - print('---done!') -''' -Copyright 2011-2013 Pawel Swietojanski - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -THIS CODE IS PROVIDED *AS IS* BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -KIND, EITHER EXPRESS OR IMPLIED, INCLUDING WITHOUT LIMITATION ANY IMPLIED -WARRANTIES OR CONDITIONS OF TITLE, FITNESS FOR A PARTICULAR PURPOSE, -MERCHANTABLITY OR NON-INFRINGEMENT. -See the Apache 2 License for the specific language governing permissions and -limitations under the License. - -Not fully implemented [28 OCT 2011] -TODO: support for options: _C, H_IREFC - -''' - -import io -import os -import sys -import numpy -import struct -import logging - - -class HTK_Parm_IO(object): - ''' - For details look at the HTK book, Chapter 5.10 Storage of Parameter Files - ''' - - # HTK datatybes - H_WAVEFORM = 0 - H_LPC = 1 - H_LPREFC = 2 - H_LPCEPSTRA = 3 - H_LPDELCEP = 4 - H_IREFC = 5 - H_MFCC = 6 - H_FBANK = 7 - H_MELSPEC = 8 - H_USER = 9 - H_DISCRETE = 10 - H_PLP = 11 - H_ANON = 12 - - # Additional 'param kind' options - _E = 0x0001 # has energy - _N = 0x0002 # absolute energy suppressed - _D = 0x0004 # has delta coefficients - _A = 0x0008 # has acceleration coefficients - _C = 0x0010 # is compressed - _Z = 0x0020 # has zero mean static coef. - _K = 0x0040 # has CRC checksum - _O = 0x0080 # has 0th cepstral coef. - _V = 0x0100 # has VQ data - _T = 0x0200 # has third differential coef. - - MASK_H_DATATYPE = 0x003f # the first 6 bits contain datatype - - def __init__(self, n_samples=0, samp_period=0, samp_size=0, param_kind=0, data=None): - ''' - ''' - - # HTK header - # number of samples in file (4-byte integer) - self.n_samples = n_samples - # sample period in 100ns units (4-byte integer) - self.samp_period = samp_period - # number of bytes per sample (2-byte integer) - self.samp_size = samp_size - # a code indicating the sample kind (2-byte integer) - self.param_kind = param_kind - - self.data = data - - return None - - def htk_datatype(self): - return (self.param_kind & self.MASK_H_DATATYPE) - - def set_htk_datatype(self, value): - self.param_kind = value | ~self.MASK_H_DATATYPE - - def htk_datatype_has_option(self, option): - """Return True/False if the given options are set - - :type option: int - :param option: one of the _E _N _D etc. flags - - """ - return (((self.param_kind >> 6) & option) > 0) - - def set_htk_datatype_option(self, value): - self.param_kind = (value << 6) | self.param_kind - - def read_htk(self, filename, reshape_to_matrix=True): - ''' - ''' - try: - - f = open(filename, 'rb') - - self.n_samples = struct.unpack('H', f.read(2))[0] - #self.B = struct.unpack('>H', f.read(2))[0] - raise Exception("Compressed files not supported yet!") - - if (self.htk_datatype() == self.H_WAVEFORM): - self.data = numpy.fromfile(f, numpy.int16) - else: - self.data = numpy.fromfile(f, numpy.float32) -# print "world" - if reshape_to_matrix: - self.data = self.data.reshape((self.n_samples, -1)) - - if(sys.byteorder == 'little'): - # print "hello" - self.data.byteswap(True) # forces big-endian byte ordering - - f.close() - except IOError as e: - logging.error(e) - raise Exception(e) - - return None - - def write_htk(self, filename): - ''' - ''' - try: - - file = open(filename, 'wb') - - file.write(struct.pack('>I', self.n_samples)) - file.write(struct.pack('>I', self.samp_period)) - file.write(struct.pack('>H', self.samp_size)) - file.write(struct.pack('>H', self.param_kind)) - - if(sys.byteorder == 'little'): - self.data.byteswap(True) # force big-endian byte ordering - - self.data.tofile(file) - - except IOError as e: - raise Exception(e) - - return None - - def print_info(self): - - print("Samples number: ", self.n_samples) - print("Sample period: [100ns]", self.samp_period) - print("Bytes/sample:", self.samp_size) - print("ParamKind - datatype: ", self.htk_datatype()) - print("ParamKind - options: _E(%i), _D(%i), A(%i)", self.htk_datatype_has_option(self._E), - self.htk_datatype_has_option(self._D), self.htk_datatype_has_option(self._A)) - print("Features matrix shape", self.data.shape) - print("Features", self.data) - - return None - - def get_data_size(self): - return self.data.size*self.data.itemsize - - -def test_HTK_Parm_IO(): - - #filename_src = "../data/GE001_1.feat" - filename_src = "../data/tr1.mfc" - filename_dst = "../data/tr1_dst.mfc" - - htk = HTK_Parm_IO() - - try: - print('SOURCE FILE : ') - htk.read_htk(filename_src) - htk.print_info() - # print "t", htk.dupa, sys.byteorder - - htk.writeHTK(filename_dst) - - print('TARGET FILE : ') - htk2 = HTK_Parm_IO() - htk2.read_htk(filename_dst) - htk2.print_info() - - except Exception as e: - print(e) - - return None - - -if __name__ == "__main__": - test_HTK_Parm_IO() -# Copyright (c) 2007 Carnegie Mellon University -# -# You may copy and modify this freely under the same terms as -# Sphinx-III - -"""Read and write HTK feature files. - -This module reads and writes the acoustic feature files used by HTK -""" - -__author__ = "David Huggins-Daines " -__version__ = "$Revision $" - -from struct import unpack, pack -import numpy - -LPC = 1 -LPCREFC = 2 -LPCEPSTRA = 3 -LPCDELCEP = 4 -IREFC = 5 -MFCC = 6 -FBANK = 7 -MELSPEC = 8 -USER = 9 -DISCRETE = 10 -PLP = 11 - -_E = 0o000100 # has energy -_N = 0o000200 # absolute energy supressed -_D = 0o000400 # has delta coefficients -_A = 0o001000 # has acceleration (delta-delta) coefficients -_C = 0o002000 # is compressed -_Z = 0o004000 # has zero mean static coefficients -_K = 0o010000 # has CRC checksum -_O = 0o020000 # has 0th cepstral coefficient -_V = 0o040000 # has VQ data -_T = 0o100000 # has third differential coefficients - - -def open_htk_file(f, mode=None, veclen=13): - """Open an HTK format feature file for reading or writing. - The mode parameter is 'rb' (reading) or 'wb' (writing).""" - if mode is None: - if hasattr(f, 'mode'): - mode = f.mode - else: - mode = 'rb' - if mode in ('r', 'rb'): - return HTKFeat_read(f) # veclen is ignored since it's in the file - elif mode in ('w', 'wb'): - return HTKFeat_write(f, veclen) - else: - raise Exception("mode must be 'r', 'rb', 'w', or 'wb'") - - -class HTKFeat_read(object): - "Read HTK format feature files" - - def __init__(self, filename=None): - self.swap = (unpack('=i', pack('>i', 42))[0] != 42) - if (filename != None): - self.open_file(filename) - - def __iter__(self): - self.fh.seek(12, 0) - return self - - def open_file(self, filename): - self.filename = filename - self.fh = open(filename, "rb") - self.readheader() - - def readheader(self): - self.fh.seek(0, 0) - spam = self.fh.read(12) - self.nSamples, self.sampPeriod, self.sampSize, self.parmKind = \ - unpack(">IIHH", spam) - # Get coefficients for compressed data - if self.parmKind & _C: - self.dtype = 'h' - self.veclen = self.sampSize / 2 - if self.parmKind & 0x3f == IREFC: - self.A = 32767 - self.B = 0 - else: - self.A = numpy.fromfile(self.fh, 'f', self.veclen) - self.B = numpy.fromfile(self.fh, 'f', self.veclen) - if self.swap: - self.A = self.A.byteswap() - self.B = self.B.byteswap() - else: - self.dtype = 'f' - self.veclen = self.sampSize / 4 - self.hdrlen = self.fh.tell() - self.veclen = int(self.veclen) - - def seek(self, idx): - self.fh.seek(self.hdrlen + idx * self.sampSize, 0) - - def __next__(self): - vec = numpy.fromfile(self.fh, self.dtype, self.veclen) - if len(vec) == 0: - raise StopIteration - if self.swap: - vec = vec.byteswap() - # Uncompress data to floats if required - if self.parmKind & _C: - vec = (vec.astype('f') + self.B) / self.A - return vec - - def readvec(self): - return next(self) - - def getall(self, filename): - self.open_file(filename) - self.readheader() - -# print self.nSamples, self.veclen - -# print self.parmKind, self.sampPeriod - - self.seek(0) - data = numpy.fromfile(self.fh, self.dtype) -# print len(data), data.shape -# if self.parmKind & _K: # Remove and ignore checksum -# data = data[:-1] -# print data.shape - data = data.reshape((-1, self.veclen)) -# data = tmp_data.reshape((len(tmp_data)/self.veclen, self.veclen)) - if self.swap: - data = data.byteswap() - # Uncompress data to floats if required - if self.parmKind & _C: - data = (data.astype('f') + self.B) / self.A - return data, self.nSamples - - -class HTKFeat_write(object): - "Write Sphinx-II format feature files" - - def __init__(self, filename=None, - veclen=13, sampPeriod=100000, - paramKind=(MFCC | _O)): - self.veclen = veclen - self.sampPeriod = sampPeriod - self.sampSize = veclen * 4 - self.paramKind = paramKind - self.dtype = 'f' - self.filesize = 0 - self.swap = (unpack('=i', pack('>i', 42))[0] != 42) - if (filename != None): - self.open_file(filename) - - def __del__(self): - self.close() - - def open_file(self, filename): - self.filename = filename - self.fh = open(filename, "wb") - self.writeheader() - - def close(self): - self.writeheader() - - def writeheader(self): - self.fh.seek(0, 0) - self.fh.write(pack(">IIHH", self.filesize, - self.sampPeriod, - self.sampSize, - self.paramKind)) - - def writevec(self, vec): - if len(vec) != self.veclen: - raise Exception("Vector length must be %d" % self.veclen) - if self.swap: - numpy.array(vec, self.dtype).byteswap().tofile(self.fh) - else: - numpy.array(vec, self.dtype).tofile(self.fh) - self.filesize = self.filesize + self.veclen - - def writeall(self, arr, filename): - self.open_file(filename) - for row in arr: - self.writevec(row) - - self.close() - -from htk_io import HTK_Parm_IO -from htkmfc import HTKFeat_read, HTKFeat_write -import logging -import numpy - - -class MeanVarianceNorm(): - ''' - plan: 1: support normal MVN and denormalisation for both input and output - 2: support stream-based operation: for example, some streams can use min-max, other streams use MVN, may need one more class - ''' - - def __init__(self, feature_dimension): - - self.mean_vector = None - self.std_vector = None - self.feature_dimension = feature_dimension - - def feature_normalisation(self, in_file_list, out_file_list): - logger = logging.getLogger('feature_normalisation') - -# self.feature_dimension = feature_dimension - try: - assert len(in_file_list) == len(out_file_list) - except AssertionError: - logger.critical('The input and output file numbers are not the same! %d vs %d' % ( - len(in_file_list), len(out_file_list))) - raise - - if self.mean_vector == None: - self.mean_vector = self.compute_mean( - in_file_list, 0, self.feature_dimension) - if self.std_vector == None: - self.std_vector = self.compute_std( - in_file_list, self.mean_vector, 0, self.feature_dimension) - - io_funcs = HTKFeat_read() - file_number = len(in_file_list) - for i in range(file_number): - features, current_frame_number = io_funcs.getall(in_file_list[i]) -# print current_frame_number -# features = io_funcs.data -# current_frame_number = io_funcs.n_samples - - mean_matrix = numpy.tile( - self.mean_vector, (current_frame_number, 1)) - std_matrix = numpy.tile(self.std_vector, (current_frame_number, 1)) - - norm_features = (features - mean_matrix) / std_matrix - - htk_writer = HTKFeat_write( - veclen=io_funcs.veclen, sampPeriod=io_funcs.sampPeriod, paramKind=9) - htk_writer.writeall(norm_features, out_file_list[i]) - -# htk_writter = HTK_Parm_IO(n_samples=io_funcs.n_samples, samp_period=io_funcs.samp_period, samp_size=io_funcs.samp_size, param_kind=io_funcs.param_kind, data=norm_features) -# htk_writter.write_htk(out_file_list[i]) - - return self.mean_vector, self.std_vector - - def feature_denormalisation(self, in_file_list, out_file_list, mean_vector, std_vector): - io_funcs = BinaryIOCollection() - file_number = len(in_file_list) - try: - assert len(in_file_list) == len(out_file_list) - except AssertionError: - logger.critical('The input and output file numbers are not the same! %d vs %d' % ( - len(in_file_list), len(out_file_list))) - raise - - try: - assert mean_vector.size == self.feature_dimension and std_vector.size == self.feature_dimension - except AssertionError: - logger.critical( - 'the dimensionalities of the mean and standard derivation vectors are not the same as the dimensionality of the feature') - raise - - for i in range(file_number): - features, current_frame_number = io_funcs.load_binary_file_frame( - in_file_list[i], self.feature_dimension) - - mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1)) - std_matrix = numpy.tile(std_vector, (current_frame_number, 1)) - - norm_features = features * std_matrix + mean_matrix - - io_funcs.array_to_binary_file(norm_features, out_file_list[i]) - - def compute_mean(self, file_list, start_index, end_index): - - logger = logging.getLogger('feature_normalisation') - - local_feature_dimension = end_index - start_index - - mean_vector = numpy.zeros((1, local_feature_dimension)) - all_frame_number = 0 - - io_funcs = HTKFeat_read() - for file_name in file_list: - features, current_frame_number = io_funcs.getall(file_name) -# io_funcs = HTK_Parm_IO() -# io_funcs.read_htk(file_name) -# features = io_funcs.data -# current_frame_number = io_funcs.n_samples - - mean_vector += numpy.reshape(numpy.sum( - features[:, start_index:end_index], axis=0), (1, local_feature_dimension)) - all_frame_number += current_frame_number - - mean_vector /= float(all_frame_number) - - # setting the print options in this way seems to break subsequent printing of numpy float32 types - # no idea what is going on - removed until this can be solved - # po=numpy.get_printoptions() - # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) - logger.info('computed mean vector of length %d :' % - mean_vector.shape[1]) - logger.info(' mean: %s' % mean_vector) - # restore the print options - # numpy.set_printoptions(po) - - self.mean_vector = mean_vector - - return mean_vector - - def compute_std(self, file_list, mean_vector, start_index, end_index): - - logger = logging.getLogger('feature_normalisation') - - local_feature_dimension = end_index - start_index - - std_vector = numpy.zeros((1, self.feature_dimension)) - all_frame_number = 0 - - io_funcs = HTKFeat_read() - for file_name in file_list: - features, current_frame_number = io_funcs.getall(file_name) - - mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1)) - - std_vector += numpy.reshape(numpy.sum( - (features[:, start_index:end_index] - mean_matrix) ** 2, axis=0), (1, local_feature_dimension)) - all_frame_number += current_frame_number - - std_vector /= float(all_frame_number) - - std_vector = std_vector ** 0.5 - - # setting the print options in this way seems to break subsequent printing of numpy float32 types - # no idea what is going on - removed until this can be solved - # po=numpy.get_printoptions() - # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4) - logger.info('computed std vector of length %d' % std_vector.shape[1]) - logger.info(' std: %s' % std_vector) - # restore the print options - # numpy.set_printoptions(po) - - self.std_vector = std_vector - - return std_vector -import os -import sys -import glob -import collections - - -def readtext(fname): - f = open(fname, 'r') - data = f.read() - data = data.strip(' \n') - f.close() - return data - - -def create_dictionary_from_txt_dir(txt_dir): - utt_text = {} - textfiles = glob.glob(txt_dir + '/*.txt') - - num_of_files = len(textfiles) - - for i in range(num_of_files): - textfile = textfiles[i] - junk, filename = os.path.split(textfile) - filename = filename.split('.')[0] - - text = readtext(textfile) - utt_text[filename] = text - - return utt_text - - -def create_dictionary_from_txt_file(txt_file): - utt_text = {} - in_f = open(txt_file, 'r') - for newline in in_f.readlines(): - newline = newline.strip() - newline = newline.replace('(', '') - newline = newline.replace(')', '') - - text_parts = newline.split() - filename = text_parts[0] - - text = ' '.join(text_parts[1:]) - text = text[1:-1] # remove begining and end double quotes - - utt_text[filename] = text - - return utt_text - - -if __name__ == "__main__": - - if len(sys.argv) != 5: - print('Usage: python genScmFile.py ') - sys.exit(1) - - out_utt_dir = sys.argv[2] - out_scm_file = sys.argv[3] - out_id_file = sys.argv[4] - - if not os.path.exists(out_utt_dir): - os.makedirs(out_utt_dir) - - if os.path.isdir(sys.argv[1]): - print("creating a scheme file from text directory") - in_txt_dir = sys.argv[1] - utt_text = create_dictionary_from_txt_dir(in_txt_dir) - - elif os.path.isfile(sys.argv[1]): - print("creating a scheme file from text file") - in_txt_file = sys.argv[1] - utt_text = create_dictionary_from_txt_file(in_txt_file) - - sorted_utt_text = collections.OrderedDict(sorted(utt_text.items())) - - out_f1 = open(out_scm_file, 'w') - out_f2 = open(out_id_file, 'w') - - # if you want to use a particular voice - # out_f1.write("(voice_cstr_edi_fls_multisyn)\n") - - for utt_name, sentence in sorted_utt_text.items(): - out_file_name = os.path.join(out_utt_dir, utt_name+'.utt') - sentence = sentence.replace('"', '\\"') - out_f1.write("(utt.save (utt.synth (Utterance Text \"" + - sentence+"\" )) \""+out_file_name+"\")\n") - out_f2.write(utt_name+"\n") - - out_f1.close() - out_f2.close() -import sys -import os -import numpy as np - - -def divide_into_states(st_dur, fn_dur, num_states): - state_dur = np.zeros((2, num_states), np.int64) - - state_dur[0][0] = st_dur - state_dur[1][num_states-1] = fn_dur - - num_of_frames = (fn_dur-st_dur)/50000 - nof_each_state = num_of_frames/num_states - - # if nof_each_state<1: - # print 'warning: some states are with zero duration' - - for k in range(num_states-1): - state_dur[1][k] = state_dur[0][k]+(nof_each_state*50000) - state_dur[0][k+1] = state_dur[1][k] - - return state_dur - - -def normalize_dur(dur): - rem_t = dur % 50000 - - if rem_t <= 25000: - dur = dur - rem_t - else: - dur = dur + (50000-rem_t) - - return dur - - -def normalize_label_files(in_lab_file, out_lab_file, label_style, write_time_stamps): - out_f = open(out_lab_file, 'w') - - in_f = open(in_lab_file, 'r') - data = in_f.readlines() - in_f.close() - - ph_arr = [] - for i in data: - fstr = i.strip().split() - ftag = fstr[2] - ph = ftag[ftag.index('-')+1:ftag.index('+')] - if(ph == 'pau'): - continue - ph_arr.append(ph) - count = 0 - prev_ph = '' - merged_data = [[], [], []] - for i in data: - fstr = i.strip().split() - start_time = fstr[0] - end_time = fstr[1] - ftag = fstr[2] - mid_indx = ftag.index(':') - p1 = ftag[0:mid_indx] - p2 = ftag[mid_indx:] - ph = ftag[ftag.index('-')+1:ftag.index('+')] - # print ph - if(ph != 'pau'): - count = count+1 - if(prev_ph == 'pau' and ph == 'pau'): - continue - if(count <= 2 and 'pau' in p1) or (count > len(ph_arr)-2 and 'pau' in p1): - p1 = p1.replace('pau', 'sil') - ftag = p1+p2 - if(count >= 1 and count < len(ph_arr)): - if '-sil+' in ftag: - ftag = ftag.replace('-sil+', '-pau+') - merged_data[0].append(start_time) - merged_data[1].append(end_time) - merged_data[2].append(ftag) - prev_ph = ph - - num_states = 5 - tot_num_ph = len(merged_data[0]) - for j in range(tot_num_ph): - if j < tot_num_ph-1: - ph_end = normalize_dur(int(merged_data[0][j+1])) - merged_data[0][j+1] = str(ph_end) - merged_data[1][j] = merged_data[0][j+1] - else: - end_time = normalize_dur(int(end_time)) - merged_data[1][j] = str(end_time) - - if (int(merged_data[1][j])-int(merged_data[0][j])) == 0: - print('Error: zero duration for this phone') - raise - - if label_style == "phone_align": - if write_time_stamps: - out_f.write( - merged_data[0][j]+' '+merged_data[1][j]+' '+merged_data[2][j]+'\n') - else: - out_f.write(merged_data[2][j]+'\n') - elif label_style == "state_align": - if write_time_stamps: - for k in range(num_states): - state_dur = divide_into_states( - int(merged_data[0][j]), int(merged_data[1][j]), num_states) - out_f.write(str( - state_dur[0][k])+' '+str(state_dur[1][k])+' '+merged_data[2][j]+'['+str(k+2)+']\n') - else: - out_f.write(merged_data[2][j]+'\n') - - out_f.close() - - -if __name__ == "__main__": - - if len(sys.argv) < 5: - print('Usage: python normalize_lab_for_merlin.py \n') - sys.exit(0) - - in_lab_dir = sys.argv[1] - out_lab_dir = sys.argv[2] - label_style = sys.argv[3] - file_id_list = sys.argv[4] - - write_time_stamps = True - if len(sys.argv) == 6: - if int(sys.argv[5]) == 0: - write_time_stamps = False - - if label_style != "phone_align" and label_style != "state_align": - print("These labels %s are not supported as of now...please use state_align or phone_align!!" % ( - label_style)) - sys.exit(0) - - if not os.path.exists(out_lab_dir): - os.makedirs(out_lab_dir) - - in_f = open(file_id_list, 'r') - - for i in in_f.readlines(): - filename = i.strip()+'.lab' - print(filename) - in_lab_file = os.path.join(in_lab_dir, filename) - out_lab_file = os.path.join(out_lab_dir, filename) - normalize_label_files(in_lab_file, out_lab_file, - label_style, write_time_stamps) - # break; - - in_f.close() -import os -import sys -import numpy as np - -if __name__ == "__main__": - - if len(sys.argv) != 3: - print('Usage: python src/prepare_txt_done_data_file.py \n') - sys.exit(0) - - txt_dir = sys.argv[1] - out_file = sys.argv[2] - - out_f = open(out_file, 'w') - - for txtfile in os.listdir(txt_dir): - if txtfile is not None: - file_id = os.path.basename(txtfile).split(".")[0] - txtfile = os.path.join(txt_dir, txtfile) - with open(txtfile, 'r') as myfile: - data = myfile.read().replace('\n', '') - data = data.replace('"', '\\"') - out_f.write("( "+file_id+" \" "+data+" \")\n") - - out_f.close() -#!/usr/bin/env python2 -# -*- coding: utf-8 -*- -""" -@author: Felipe Espic - -DESCRIPTION: -This script extracts low-dimensional acoustic features from a batch of wav files intended for using with the Merlin toolkit. -It runs the extraction in parallel mode, using all the cores available in the system. - -The acoustic features extracted and used by Merlin are: -- '.mag' : Mel-scaled Log-Mag (dim=nbins_mel, usually 60). -- '.real' : Mel-scaled real (dim=nbins_phase, usually 45). -- '.imag' : Mel-scaled imag (dim=nbins_phase, usually 45). -- '.lf0' : Log-F0 (dim=1). - -Also, this script extracts the additional files: -- '.est' : File generated by REAPER containing epoch locations and voi/unvoi decisions (remove them if wanted). -- '.shift': File that contains the shifts (hop-sizes) for each extracted frame (variable frame rate). - It is used to modify the label files in Merlin. Se .... for more information. - -INSTRUCTIONS: -This demo should work out of the box. Just run it by typing: python